[PATCH v4 09/19] coresight: etm4x: Move ETM to prohibited region for disable

2021-02-25 Thread Suzuki K Poulose
If the CPU implements Arm v8.4 Trace filter controls (FEAT_TRF),
move the ETM to trace prohibited region using TRFCR, while disabling.

Cc: Mathieu Poirier 
Cc: Mike Leach 
Cc: Anshuman Khandual 
Signed-off-by: Suzuki K Poulose 
---
New patch
---
 .../coresight/coresight-etm4x-core.c  | 21 +--
 drivers/hwtracing/coresight/coresight-etm4x.h |  2 ++
 2 files changed, 21 insertions(+), 2 deletions(-)

diff --git a/drivers/hwtracing/coresight/coresight-etm4x-core.c 
b/drivers/hwtracing/coresight/coresight-etm4x-core.c
index 15016f757828..00297906669c 100644
--- a/drivers/hwtracing/coresight/coresight-etm4x-core.c
+++ b/drivers/hwtracing/coresight/coresight-etm4x-core.c
@@ -31,6 +31,7 @@
 #include 
 #include 
 
+#include 
 #include 
 #include 
 #include 
@@ -654,6 +655,7 @@ static int etm4_enable(struct coresight_device *csdev,
 static void etm4_disable_hw(void *info)
 {
u32 control;
+   u64 trfcr;
struct etmv4_drvdata *drvdata = info;
struct etmv4_config *config = >config;
struct coresight_device *csdev = drvdata->csdev;
@@ -676,6 +678,16 @@ static void etm4_disable_hw(void *info)
/* EN, bit[0] Trace unit enable bit */
control &= ~0x1;
 
+   /*
+* If the CPU supports v8.4 Trace filter Control,
+* set the ETM to trace prohibited region.
+*/
+   if (drvdata->trfc) {
+   trfcr = read_sysreg_s(SYS_TRFCR_EL1);
+   write_sysreg_s(trfcr & ~(TRFCR_ELx_ExTRE | TRFCR_ELx_E0TRE),
+  SYS_TRFCR_EL1);
+   isb();
+   }
/*
 * Make sure everything completes before disabling, as recommended
 * by section 7.3.77 ("TRCVICTLR, ViewInst Main Control Register,
@@ -683,12 +695,16 @@ static void etm4_disable_hw(void *info)
 */
dsb(sy);
isb();
+   /* Trace synchronization barrier, is a nop if not supported */
+   tsb_csync();
etm4x_relaxed_write32(csa, control, TRCPRGCTLR);
 
/* wait for TRCSTATR.PMSTABLE to go to '1' */
if (coresight_timeout(csa, TRCSTATR, TRCSTATR_PMSTABLE_BIT, 1))
dev_err(etm_dev,
"timeout while waiting for PM stable Trace Status\n");
+   if (drvdata->trfc)
+   write_sysreg_s(trfcr, SYS_TRFCR_EL1);
 
/* read the status of the single shot comparators */
for (i = 0; i < drvdata->nr_ss_cmp; i++) {
@@ -873,7 +889,7 @@ static bool etm4_init_csdev_access(struct etmv4_drvdata 
*drvdata,
return false;
 }
 
-static void cpu_enable_tracing(void)
+static void cpu_enable_tracing(struct etmv4_drvdata *drvdata)
 {
u64 dfr0 = read_sysreg(id_aa64dfr0_el1);
u64 trfcr;
@@ -881,6 +897,7 @@ static void cpu_enable_tracing(void)
if (!cpuid_feature_extract_unsigned_field(dfr0, 
ID_AA64DFR0_TRACE_FILT_SHIFT))
return;
 
+   drvdata->trfc = true;
/*
 * If the CPU supports v8.4 SelfHosted Tracing, enable
 * tracing at the kernel EL and EL0, forcing to use the
@@ -1082,7 +1099,7 @@ static void etm4_init_arch_data(void *info)
/* NUMCNTR, bits[30:28] number of counters available for tracing */
drvdata->nr_cntr = BMVAL(etmidr5, 28, 30);
etm4_cs_lock(drvdata, csa);
-   cpu_enable_tracing();
+   cpu_enable_tracing(drvdata);
 }
 
 static inline u32 etm4_get_victlr_access_type(struct etmv4_config *config)
diff --git a/drivers/hwtracing/coresight/coresight-etm4x.h 
b/drivers/hwtracing/coresight/coresight-etm4x.h
index 0af60571aa23..f6478ef642bf 100644
--- a/drivers/hwtracing/coresight/coresight-etm4x.h
+++ b/drivers/hwtracing/coresight/coresight-etm4x.h
@@ -862,6 +862,7 @@ struct etmv4_save_state {
  * @nooverflow:Indicate if overflow prevention is supported.
  * @atbtrig:   If the implementation can support ATB triggers
  * @lpoverride:If the implementation can support low-power state over.
+ * @trfc:  If the implementation supports Arm v8.4 trace filter controls.
  * @config:structure holding configuration parameters.
  * @save_state:State to be preserved across power loss
  * @state_needs_restore: True when there is context to restore after PM exit
@@ -912,6 +913,7 @@ struct etmv4_drvdata {
boolnooverflow;
boolatbtrig;
boollpoverride;
+   booltrfc;
struct etmv4_config config;
struct etmv4_save_state *save_state;
boolstate_needs_restore;
-- 
2.24.1



[PATCH v4 12/19] coresight: etm4x: Add support for PE OS lock

2021-02-25 Thread Suzuki K Poulose
ETE may not implement the OS lock and instead could rely on
the PE OS Lock for the trace unit access. This is indicated
by the TRCOLSR.OSM == 0b100. Add support for handling the
PE OS lock

Cc: Mike Leach 
Reviewed-by: mike.leach 
Reviewed-by: Mathieu Poirier 
Signed-off-by: Suzuki K Poulose 
---
 .../coresight/coresight-etm4x-core.c  | 50 +++
 drivers/hwtracing/coresight/coresight-etm4x.h | 15 ++
 2 files changed, 56 insertions(+), 9 deletions(-)

diff --git a/drivers/hwtracing/coresight/coresight-etm4x-core.c 
b/drivers/hwtracing/coresight/coresight-etm4x-core.c
index 00297906669c..35802caca32a 100644
--- a/drivers/hwtracing/coresight/coresight-etm4x-core.c
+++ b/drivers/hwtracing/coresight/coresight-etm4x-core.c
@@ -115,30 +115,59 @@ void etm4x_sysreg_write(u64 val, u32 offset, bool 
_relaxed, bool _64bit)
}
 }
 
-static void etm4_os_unlock_csa(struct etmv4_drvdata *drvdata, struct 
csdev_access *csa)
+static void etm_detect_os_lock(struct etmv4_drvdata *drvdata,
+  struct csdev_access *csa)
 {
-   /* Writing 0 to TRCOSLAR unlocks the trace registers */
-   etm4x_relaxed_write32(csa, 0x0, TRCOSLAR);
-   drvdata->os_unlock = true;
+   u32 oslsr = etm4x_relaxed_read32(csa, TRCOSLSR);
+
+   drvdata->os_lock_model = ETM_OSLSR_OSLM(oslsr);
+}
+
+static void etm_write_os_lock(struct etmv4_drvdata *drvdata,
+ struct csdev_access *csa, u32 val)
+{
+   val = !!val;
+
+   switch (drvdata->os_lock_model) {
+   case ETM_OSLOCK_PRESENT:
+   etm4x_relaxed_write32(csa, val, TRCOSLAR);
+   break;
+   case ETM_OSLOCK_PE:
+   write_sysreg_s(val, SYS_OSLAR_EL1);
+   break;
+   default:
+   pr_warn_once("CPU%d: Unsupported Trace OSLock model: %x\n",
+smp_processor_id(), drvdata->os_lock_model);
+   fallthrough;
+   case ETM_OSLOCK_NI:
+   return;
+   }
isb();
 }
 
+static inline void etm4_os_unlock_csa(struct etmv4_drvdata *drvdata,
+ struct csdev_access *csa)
+{
+   WARN_ON(drvdata->cpu != smp_processor_id());
+
+   /* Writing 0 to OS Lock unlocks the trace unit registers */
+   etm_write_os_lock(drvdata, csa, 0x0);
+   drvdata->os_unlock = true;
+}
+
 static void etm4_os_unlock(struct etmv4_drvdata *drvdata)
 {
if (!WARN_ON(!drvdata->csdev))
etm4_os_unlock_csa(drvdata, >csdev->access);
-
 }
 
 static void etm4_os_lock(struct etmv4_drvdata *drvdata)
 {
if (WARN_ON(!drvdata->csdev))
return;
-
-   /* Writing 0x1 to TRCOSLAR locks the trace registers */
-   etm4x_relaxed_write32(>csdev->access, 0x1, TRCOSLAR);
+   /* Writing 0x1 to OS Lock locks the trace registers */
+   etm_write_os_lock(drvdata, >csdev->access, 0x1);
drvdata->os_unlock = false;
-   isb();
 }
 
 static void etm4_cs_lock(struct etmv4_drvdata *drvdata,
@@ -937,6 +966,9 @@ static void etm4_init_arch_data(void *info)
if (!etm4_init_csdev_access(drvdata, csa))
return;
 
+   /* Detect the support for OS Lock before we actually use it */
+   etm_detect_os_lock(drvdata, csa);
+
/* Make sure all registers are accessible */
etm4_os_unlock_csa(drvdata, csa);
etm4_cs_unlock(drvdata, csa);
diff --git a/drivers/hwtracing/coresight/coresight-etm4x.h 
b/drivers/hwtracing/coresight/coresight-etm4x.h
index f6478ef642bf..5b961c5b78d1 100644
--- a/drivers/hwtracing/coresight/coresight-etm4x.h
+++ b/drivers/hwtracing/coresight/coresight-etm4x.h
@@ -505,6 +505,20 @@
 ETM_MODE_EXCL_KERN | \
 ETM_MODE_EXCL_USER)
 
+/*
+ * TRCOSLSR.OSLM advertises the OS Lock model.
+ * OSLM[2:0] = TRCOSLSR[4:3,0]
+ *
+ * 0b000 - Trace OS Lock is not implemented.
+ * 0b010 - Trace OS Lock is implemented.
+ * 0b100 - Trace OS Lock is not implemented, unit is controlled by PE OS 
Lock.
+ */
+#define ETM_OSLOCK_NI  0b000
+#define ETM_OSLOCK_PRESENT 0b010
+#define ETM_OSLOCK_PE  0b100
+
+#define ETM_OSLSR_OSLM(oslsr)  oslsr) & GENMASK(4, 3)) >> 2) | (oslsr & 
0x1))
+
 /*
  * TRCDEVARCH Bit field definitions
  * Bits[31:21] - ARCHITECT = Always Arm Ltd.
@@ -898,6 +912,7 @@ struct etmv4_drvdata {
u8  s_ex_level;
u8  ns_ex_level;
u8  q_support;
+   u8  os_lock_model;
boolsticky_enable;
boolboot_enable;
boolos_unlock;
-- 
2.24.1



[PATCH v4 04/19] kvm: arm64: nvhe: Save the SPE context early

2021-02-25 Thread Suzuki K Poulose
The nvhe hyp saves the SPE context, flushing any unwritten
data before we switch to the guest. But this operation is
performed way too late, because :
  - The ownership of the SPE is transferred to EL2. i.e,
using EL2 translations. (MDCR_EL2_E2PB == 0)
  - The guest Stage1 is loaded.

Thus the flush could use the host EL1 virtual address,
but use the EL2 translations instead. Fix this by
moving the SPE context save early.
i.e, Save the context before we load the guest stage1
and before we change the ownership to EL2.

The restore path is doing the right thing.

Fixes: 014c4c77aad7 ("KVM: arm64: Improve debug register save/restore flow")
Cc: sta...@vger.kernel.org
Cc: Christoffer Dall 
Cc: Marc Zyngier 
Cc: Will Deacon 
Cc: Catalin Marinas 
Cc: Mark Rutland 
Cc: Alexandru Elisei 
Signed-off-by: Suzuki K Poulose 
---
New patch.
---
 arch/arm64/include/asm/kvm_hyp.h   |  5 +
 arch/arm64/kvm/hyp/nvhe/debug-sr.c | 12 ++--
 arch/arm64/kvm/hyp/nvhe/switch.c   | 12 +++-
 3 files changed, 26 insertions(+), 3 deletions(-)

diff --git a/arch/arm64/include/asm/kvm_hyp.h b/arch/arm64/include/asm/kvm_hyp.h
index c0450828378b..385bd7dd3d39 100644
--- a/arch/arm64/include/asm/kvm_hyp.h
+++ b/arch/arm64/include/asm/kvm_hyp.h
@@ -83,6 +83,11 @@ void sysreg_restore_guest_state_vhe(struct kvm_cpu_context 
*ctxt);
 void __debug_switch_to_guest(struct kvm_vcpu *vcpu);
 void __debug_switch_to_host(struct kvm_vcpu *vcpu);
 
+#ifdef __KVM_NVHE_HYPERVISOR__
+void __debug_save_host_buffers_nvhe(struct kvm_vcpu *vcpu);
+void __debug_restore_host_buffers_nvhe(struct kvm_vcpu *vcpu);
+#endif
+
 void __fpsimd_save_state(struct user_fpsimd_state *fp_regs);
 void __fpsimd_restore_state(struct user_fpsimd_state *fp_regs);
 
diff --git a/arch/arm64/kvm/hyp/nvhe/debug-sr.c 
b/arch/arm64/kvm/hyp/nvhe/debug-sr.c
index 91a711aa8382..f401724f12ef 100644
--- a/arch/arm64/kvm/hyp/nvhe/debug-sr.c
+++ b/arch/arm64/kvm/hyp/nvhe/debug-sr.c
@@ -58,16 +58,24 @@ static void __debug_restore_spe(u64 pmscr_el1)
write_sysreg_s(pmscr_el1, SYS_PMSCR_EL1);
 }
 
-void __debug_switch_to_guest(struct kvm_vcpu *vcpu)
+void __debug_save_host_buffers_nvhe(struct kvm_vcpu *vcpu)
 {
/* Disable and flush SPE data generation */
__debug_save_spe(>arch.host_debug_state.pmscr_el1);
+}
+
+void __debug_switch_to_guest(struct kvm_vcpu *vcpu)
+{
__debug_switch_to_guest_common(vcpu);
 }
 
-void __debug_switch_to_host(struct kvm_vcpu *vcpu)
+void __debug_restore_host_buffers_nvhe(struct kvm_vcpu *vcpu)
 {
__debug_restore_spe(vcpu->arch.host_debug_state.pmscr_el1);
+}
+
+void __debug_switch_to_host(struct kvm_vcpu *vcpu)
+{
__debug_switch_to_host_common(vcpu);
 }
 
diff --git a/arch/arm64/kvm/hyp/nvhe/switch.c b/arch/arm64/kvm/hyp/nvhe/switch.c
index f3d0e9eca56c..10eed66136a0 100644
--- a/arch/arm64/kvm/hyp/nvhe/switch.c
+++ b/arch/arm64/kvm/hyp/nvhe/switch.c
@@ -192,6 +192,15 @@ int __kvm_vcpu_run(struct kvm_vcpu *vcpu)
pmu_switch_needed = __pmu_switch_to_guest(host_ctxt);
 
__sysreg_save_state_nvhe(host_ctxt);
+   /*
+* For nVHE, we must save and disable any SPE
+* buffers, as the translation regime is going
+* to be loaded with that of the guest. And we must
+* save host context for SPE, before we change the
+* ownership to EL2 (via MDCR_EL2_E2PB == 0)  and before
+* we load guest Stage1.
+*/
+   __debug_save_host_buffers_nvhe(vcpu);
 
__adjust_pc(vcpu);
 
@@ -234,11 +243,12 @@ int __kvm_vcpu_run(struct kvm_vcpu *vcpu)
if (vcpu->arch.flags & KVM_ARM64_FP_ENABLED)
__fpsimd_save_fpexc32(vcpu);
 
+   __debug_switch_to_host(vcpu);
/*
 * This must come after restoring the host sysregs, since a non-VHE
 * system may enable SPE here and make use of the TTBRs.
 */
-   __debug_switch_to_host(vcpu);
+   __debug_restore_host_buffers_nvhe(vcpu);
 
if (pmu_switch_needed)
__pmu_switch_to_host(host_ctxt);
-- 
2.24.1



[PATCH v4 01/19] perf: aux: Add flags for the buffer format

2021-02-25 Thread Suzuki K Poulose
Allocate a byte for advertising the PMU specific format type
of the given AUX record. A PMU could end up providing hardware
trace data in multiple format in a single session.

e.g, The format of hardware buffer produced by CoreSight ETM
PMU depends on the type of the "sink" device used for collection
for an event (Traditional TMC-ETR/Bs with formatting or
TRBEs without any formatting).

 # Boring story of why this is needed. Goto The_End_of_Story for skipping.

CoreSight ETM trace allows instruction level tracing of Arm CPUs.
The ETM generates the CPU excecution trace and pumps it into CoreSight
AMBA Trace Bus and is collected by a different CoreSight component
(traditionally CoreSight TMC-ETR /ETB/ETF), called "sink".
Important to note that there is no guarantee that every CPU has
a dedicated sink.  Thus multiple ETMs could pump the trace data
into the same "sink" and thus they apply additional formatting
of the trace data for the user to decode it properly and attribute
the trace data to the corresponding ETM.

However, with the introduction of Arm Trace buffer Extensions (TRBE),
we now have a dedicated per-CPU architected sink for collecting the
trace. Since the TRBE is always per-CPU, it doesn't apply any formatting
of the trace. The support for this driver is under review [1].

Now a system could have a per-cpu TRBE and one or more shared
TMC-ETRs on the system. A user could choose a "specific" sink
for a perf session (e.g, a TMC-ETR) or the driver could automatically
select the nearest sink for a given ETM. It is possible that
some ETMs could end up using TMC-ETR (e.g, if the TRBE is not
usable on the CPU) while the others using TRBE in a single
perf session. Thus we now have "formatted" trace collected
from TMC-ETR and "unformatted" trace collected from TRBE.
However, we don't get into a situation where a single event
could end up using TMC-ETR & TRBE. i.e, any AUX buffer is
guaranteed to be either RAW or FORMATTED, but not a mix
of both.

As for perf decoding, we need to know the type of the data
in the individual AUX buffers, so that it can set up the
"OpenCSD" (library for decoding CoreSight trace) decoder
instance appropriately. Thus the perf.data file must conatin
the hints for the tool to decode the data correctly.

Since this is a runtime variable, and perf tool doesn't have
a control on what sink gets used (in case of automatic sink
selection), we need this information made available from
the PMU driver for each AUX record.

 # The_End_of_Story

Cc: Peter Ziljstra 
Cc: alexander.shish...@linux.intel.com
Cc: mi...@redhat.com
Cc: w...@kernel.org
Cc: mark.rutl...@arm.com
Cc: mike.le...@linaro.org
Cc: a...@kernel.org
Cc: jo...@redhat.com
Cc: Mathieu Poirier 
Reviewed by: Mike Leach 
Acked-by: Peter Ziljstra 
Signed-off-by: Suzuki K Poulose 
---
 include/uapi/linux/perf_event.h | 9 +
 1 file changed, 5 insertions(+), 4 deletions(-)

diff --git a/include/uapi/linux/perf_event.h b/include/uapi/linux/perf_event.h
index ad15e40d7f5d..f006eeab6f0e 100644
--- a/include/uapi/linux/perf_event.h
+++ b/include/uapi/linux/perf_event.h
@@ -1156,10 +1156,11 @@ enum perf_callchain_context {
 /**
  * PERF_RECORD_AUX::flags bits
  */
-#define PERF_AUX_FLAG_TRUNCATED0x01/* record was truncated 
to fit */
-#define PERF_AUX_FLAG_OVERWRITE0x02/* snapshot from 
overwrite mode */
-#define PERF_AUX_FLAG_PARTIAL  0x04/* record contains gaps */
-#define PERF_AUX_FLAG_COLLISION0x08/* sample collided with 
another */
+#define PERF_AUX_FLAG_TRUNCATED0x01/* record was 
truncated to fit */
+#define PERF_AUX_FLAG_OVERWRITE0x02/* snapshot 
from overwrite mode */
+#define PERF_AUX_FLAG_PARTIAL  0x04/* record contains gaps 
*/
+#define PERF_AUX_FLAG_COLLISION0x08/* sample 
collided with another */
+#define PERF_AUX_FLAG_PMU_FORMAT_TYPE_MASK 0xff00  /* PMU specific trace 
format type */
 
 #define PERF_FLAG_FD_NO_GROUP  (1UL << 0)
 #define PERF_FLAG_FD_OUTPUT(1UL << 1)
-- 
2.24.1



[PATCH v4 00/19] arm64: coresight: Add support for ETE and TRBE

2021-02-25 Thread Suzuki K Poulose


This series enables future IP trace features Embedded Trace Extension (ETE)
and Trace Buffer Extension (TRBE). This series applies on linux-next/master
(from tag next-20210222), and is also available here [0].

Patches 1 & 2:  UABI updates for perf AUX flag format. We reserve
a byte for advertising the format of the buffer when the PMU could
support different formats. The CoreSight PMUs could use Frame formatted
data and Raw format of the trace source.

Patches 3 - 5: Fixes for arm64 KVM hypervisor to align with the architecture.
Patches 6 - 7: Adds the arrchitecture defintions for trace and TRBE
Patch 8  : Adds the necessary changes for enabling TRBE access to host
from the early initialisation (VHE and nVHE). Also support for nVHE hyp
to save/restore the TRBE context of the host during a trip to the guest. 

Patches 9 - 19: CoreSight driver specific changes and DT bindings for 
ETE and TRBE support


ETE is the PE (CPU) trace unit for CPUs, implementing future architecture
extensions. ETE overlaps with the ETMv4 architecture, with additions to
support the newer architecture features and some restrictions on the
supported features w.r.t ETMv4. The ETE support is added by extending the
ETMv4 driver to recognise the ETE and handle the features as exposed by the
TRCIDRx registers. ETE only supports system instructions access from the
host CPU. The ETE could be integrated with a TRBE (see below), or with the
legacy CoreSight trace bus (e.g, ETRs). Thus the ETE follows same firmware
description as the ETMs and requires a node per instance. 

Trace Buffer Extensions (TRBE) implements a per CPU trace buffer, which is
accessible via the system registers and can be combined with the ETE to
provide a 1x1 configuration of source & sink. TRBE is being represented
here as a CoreSight sink. Primary reason is that the ETE source could work
with other traditional CoreSight sink devices. As TRBE captures the trace
data which is produced by ETE, it cannot work alone.

TRBE representation here have some distinct deviations from a traditional
CoreSight sink device. Coresight path between ETE and TRBE are not built
during boot looking at respective DT or ACPI entries.

Unlike traditional sinks, TRBE can generate interrupts to signal including
many other things, buffer got filled. The interrupt is a PPI and should be
communicated from the platform. DT or ACPI entry representing TRBE should
have the PPI number for a given platform. During perf session, the TRBE IRQ
handler should capture trace for perf auxiliary buffer before restarting it
back. System registers being used here to configure ETE and TRBE could be
referred in the link below.

https://developer.arm.com/docs/ddi0601/g/aarch64-system-registers.

[0] https://gitlab.arm.com/linux-arm/linux-skp/-/tree/coresight/ete/v4/next

Changes in V4:

 - ETE and TRBE changes have been captured in the respective patches
 - Better support for nVHE
 - Re-ordered and splitted the patches to keep the changes separate
   for the generic/arm64 tree from CoreSight driver specific changes.
 - Fixes for KVM handling of Trace/SPE


Changes in V3:

https://lore.kernel.org/linux-arm-kernel/1611737738-1493-1-git-send-email-anshuman.khand...@arm.com/

- Rebased on coresight/next
- Changed DT bindings for ETE
- Included additional patches for arm64 nvhe, perf aux buffer flags etc
- TRBE changes have been captured in the respective patches

Changes in V2:

https://lore.kernel.org/linux-arm-kernel/1610511498-4058-1-git-send-email-anshuman.khand...@arm.com/

- Converted both ETE and TRBE DT bindings into Yaml
- TRBE changes have been captured in the respective patches
 
Changes in V1:

https://lore.kernel.org/linux-arm-kernel/1608717823-18387-1-git-send-email-anshuman.khand...@arm.com/

- There are not much ETE changes from Suzuki apart from splitting of the ETE 
DTS patch
- TRBE changes have been captured in the respective patches

Changes in RFC:

https://lore.kernel.org/linux-arm-kernel/1605012309-24812-1-git-send-email-anshuman.khand...@arm.com/

Cc: Will Deacon 
Cc: Marc Zyngier 
Cc: Peter Zilstra 
Cc: Mathieu Poirier 
Cc: Suzuki K Poulose 
Cc: Mike Leach 
Cc: Linu Cherian 
Cc: coresi...@lists.linaro.org
Cc: linux-arm-ker...@lists.infradead.org
Cc: linux-kernel@vger.kernel.org


Anshuman Khandual (3):
  arm64: Add TRBE definitions
  coresight: core: Add support for dedicated percpu sinks
  coresight: sink: Add TRBE driver

Suzuki K Poulose (16):
  perf: aux: Add flags for the buffer format
  perf: aux: Add CoreSight PMU buffer formats
  kvm: arm64: Hide system instruction access to Trace registers
  kvm: arm64: nvhe: Save the SPE context early
  kvm: arm64: Disable guest access to trace filter controls
  arm64: Add support for trace synchronization barrier
  arm64: kvm: Enable access to TRBE support for host
  coresight: etm4x: Move ETM to prohibited region for disable
  coresight: etm-perf: Allow an event to use different sinks
  coresight: Do not scan for graph if none is

[PATCH v4 03/19] kvm: arm64: Hide system instruction access to Trace registers

2021-02-25 Thread Suzuki K Poulose
Currently we advertise the ID_AA6DFR0_EL1.TRACEVER for the guest,
when the trace register accesses are trapped (CPTR_EL2.TTA == 1).
So, the guest will get an undefined instruction, if trusts the
ID registers and access one of the trace registers.
Lets be nice to the guest and hide the feature to avoid
unexpected behavior.

Even though this can be done at KVM sysreg emulation layer,
we do this by removing the TRACEVER from the sanitised feature
register field. This is fine as long as the ETM drivers
can handle the individual trace units separately, even
when there are differences among the CPUs.

Cc: Marc Zyngier 
Cc: Will Deacon 
Cc: Catalin Marinas 
Cc: Mark Rutland 
Signed-off-by: Suzuki K Poulose 
---
New patch
---
 arch/arm64/kernel/cpufeature.c | 1 -
 1 file changed, 1 deletion(-)

diff --git a/arch/arm64/kernel/cpufeature.c b/arch/arm64/kernel/cpufeature.c
index 066030717a4c..a4698f09bf32 100644
--- a/arch/arm64/kernel/cpufeature.c
+++ b/arch/arm64/kernel/cpufeature.c
@@ -383,7 +383,6 @@ static const struct arm64_ftr_bits ftr_id_aa64dfr0[] = {
 * of support.
 */
S_ARM64_FTR_BITS(FTR_HIDDEN, FTR_NONSTRICT, FTR_EXACT, 
ID_AA64DFR0_PMUVER_SHIFT, 4, 0),
-   ARM64_FTR_BITS(FTR_HIDDEN, FTR_STRICT, FTR_EXACT, 
ID_AA64DFR0_TRACEVER_SHIFT, 4, 0),
ARM64_FTR_BITS(FTR_HIDDEN, FTR_STRICT, FTR_EXACT, 
ID_AA64DFR0_DEBUGVER_SHIFT, 4, 0x6),
ARM64_FTR_END,
 };
-- 
2.24.1



[PATCH v4 02/19] perf: aux: Add CoreSight PMU buffer formats

2021-02-25 Thread Suzuki K Poulose
CoreSight PMU supports aux-buffer for the ETM tracing. The trace
generated by the ETM (associated with individual CPUs, like Intel PT)
is captured by a separate IP (CoreSight TMC-ETR/ETF until now).

The TMC-ETR applies formatting of the raw ETM trace data, as it
can collect traces from multiple ETMs, with the TraceID to indicate
the source of a given trace packet.

Arm Trace Buffer Extension is new "sink" IP, attached to individual
CPUs and thus do not provide additional formatting, like TMC-ETR.

Additionally, a system could have both TRBE *and* TMC-ETR for
the trace collection. e.g, TMC-ETR could be used as a single
trace buffer to collect data from multiple ETMs to correlate
the traces from different CPUs. It is possible to have a
perf session where some events end up collecting the trace
in TMC-ETR while the others in TRBE. Thus we need a way
to identify the type of the trace for each AUX record.

Define the trace formats exported by the CoreSight PMU.
We don't define the flags following the "ETM" as this
information is available to the user when issuing
the session. What is missing is the additional
formatting applied by the "sink" which is decided
at the runtime and the user may not have a control on.

So we define :
 - CORESIGHT format (indicates the Frame format)
 - RAW format (indicates the format of the source)

The default value is CORESIGHT format for all the records
(i,e == 0). Add the RAW format for others that use
raw format.

Cc: Peter Zijlstra 
Cc: Mike Leach 
Cc: Mathieu Poirier 
Cc: Leo Yan 
Cc: Anshuman Khandual 
Reviewed-by: Mike Leach 
Signed-off-by: Suzuki K Poulose 
---
Changes from previous:
 - Split from the coresight driver specific code
   for ease of merging
---
 include/uapi/linux/perf_event.h | 4 
 1 file changed, 4 insertions(+)

diff --git a/include/uapi/linux/perf_event.h b/include/uapi/linux/perf_event.h
index f006eeab6f0e..63971eaef127 100644
--- a/include/uapi/linux/perf_event.h
+++ b/include/uapi/linux/perf_event.h
@@ -1162,6 +1162,10 @@ enum perf_callchain_context {
 #define PERF_AUX_FLAG_COLLISION0x08/* sample 
collided with another */
 #define PERF_AUX_FLAG_PMU_FORMAT_TYPE_MASK 0xff00  /* PMU specific trace 
format type */
 
+/* CoreSight PMU AUX buffer formats */
+#define PERF_AUX_FLAG_CORESIGHT_FORMAT_CORESIGHT   0x /* Default for 
backward compatibility */
+#define PERF_AUX_FLAG_CORESIGHT_FORMAT_RAW 0x0100 /* Raw format of 
the source */
+
 #define PERF_FLAG_FD_NO_GROUP  (1UL << 0)
 #define PERF_FLAG_FD_OUTPUT(1UL << 1)
 #define PERF_FLAG_PID_CGROUP   (1UL << 2) /* pid=cgroup id, per-cpu 
mode only */
-- 
2.24.1



Re: [PATCH] coresight: etm4x: work around clang-12+ build failure

2021-02-25 Thread Suzuki K Poulose

On 2/25/21 9:42 AM, Arnd Bergmann wrote:

From: Arnd Bergmann 

clang-12 fails to build the etm4x driver with -fsanitize=array-bounds:

:1:7: error: expected constant expression in '.inst' directive
.inst (0xd520|2) << 19) | ((1) << 16) | (((0x160 + (i * 4) >> 2))) >> 7) & 0x7)) << 12) | 
((0x160 + (i * 4) >> 2))) & 0xf)) << 8) | (((0x160 + (i * 4) >> 2))) >> 4) & 0x7)) << 
5)))|(.L__reg_num_x8))
   ^
drivers/hwtracing/coresight/coresight-etm4x-core.c:702:4: note: while in macro 
instantiation
 etm4x_relaxed_read32(csa, TRCCNTVRn(i));
 ^
drivers/hwtracing/coresight/coresight-etm4x.h:403:4: note: expanded from macro 
'etm4x_relaxed_read32'
  read_etm4x_sysreg_offset((offset), false)))
  ^
drivers/hwtracing/coresight/coresight-etm4x.h:383:12: note: expanded from macro 
'read_etm4x_sysreg_offset'
 __val = read_etm4x_sysreg_const_offset((offset));  
 \
 ^
drivers/hwtracing/coresight/coresight-etm4x.h:149:2: note: expanded from macro 
'read_etm4x_sysreg_const_offset'
 READ_ETM4x_REG(ETM4x_OFFSET_TO_REG(offset))
 ^
drivers/hwtracing/coresight/coresight-etm4x.h:144:2: note: expanded from macro 
'READ_ETM4x_REG'
 read_sysreg_s(ETM4x_REG_NUM_TO_SYSREG((reg)))
 ^
arch/arm64/include/asm/sysreg.h:1108:15: note: expanded from macro 
'read_sysreg_s'
 asm volatile(__mrs_s("%0", r) : "=r" (__val));  \
  ^
arch/arm64/include/asm/sysreg.h:1074:2: note: expanded from macro '__mrs_s'
"   mrs_s " v ", " __stringify(r) "\n"  \
  ^

It appears that the __builin_constant_p() check in
read_etm4x_sysreg_offset() falsely returns 'true' here because clang
decides finds that an out-of-bounds access to config->cntr_val[] cannot


s/decides finds/decides/ ?


happen, and then it unrolls the loop with constant register numbers. Then
when actually emitting the output, it fails to figure out the value again.

While this is incorrect behavior in clang, it is easy to work around
by avoiding the out-of-bounds array access. Do this by limiting the
loop counter to the actual dimension of the array.

Link: https://github.com/ClangBuiltLinux/linux/issues/1310
Signed-off-by: Arnd Bergmann 
---
  drivers/hwtracing/coresight/coresight-etm4x-core.c | 4 ++--
  1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/drivers/hwtracing/coresight/coresight-etm4x-core.c 
b/drivers/hwtracing/coresight/coresight-etm4x-core.c
index 15016f757828..4cccf874a602 100644
--- a/drivers/hwtracing/coresight/coresight-etm4x-core.c
+++ b/drivers/hwtracing/coresight/coresight-etm4x-core.c
@@ -691,13 +691,13 @@ static void etm4_disable_hw(void *info)
"timeout while waiting for PM stable Trace Status\n");
  
  	/* read the status of the single shot comparators */

-   for (i = 0; i < drvdata->nr_ss_cmp; i++) {
+   for (i = 0; i < min_t(u32, drvdata->nr_ss_cmp, ETM_MAX_SS_CMP); i++) {
config->ss_status[i] =
etm4x_relaxed_read32(csa, TRCSSCSRn(i));
}
  


There are more places where we do this. So I believe you need the similar
change elsewhere too in the driver. So, that becomes cumbersome for 
circumventing
the compiler issue.

Suzuki


Re: [PATCH V3 06/14] dts: bindings: Document device tree bindings for ETE

2021-02-18 Thread Suzuki K Poulose

On 2/18/21 6:33 PM, Rob Herring wrote:

On Wed, Feb 10, 2021 at 12:33:44PM +, Suzuki K Poulose wrote:

Hi Rob

On 2/9/21 7:00 PM, Rob Herring wrote:

On Wed, Jan 27, 2021 at 02:25:30PM +0530, Anshuman Khandual wrote:

From: Suzuki K Poulose 

Document the device tree bindings for Embedded Trace Extensions.
ETE can be connected to legacy coresight components and thus
could optionally contain a connection graph as described by
the CoreSight bindings.

Cc: devicet...@vger.kernel.org
Cc: Mathieu Poirier 
Cc: Mike Leach 
Cc: Rob Herring 
Signed-off-by: Suzuki K Poulose 
Signed-off-by: Anshuman Khandual 
---
Changes in V3:

- Fixed all DT yaml semantics problems

   Documentation/devicetree/bindings/arm/ete.yaml | 74 
++
   1 file changed, 74 insertions(+)
   create mode 100644 Documentation/devicetree/bindings/arm/ete.yaml

diff --git a/Documentation/devicetree/bindings/arm/ete.yaml 
b/Documentation/devicetree/bindings/arm/ete.yaml
new file mode 100644
index 000..edc1fe2
--- /dev/null
+++ b/Documentation/devicetree/bindings/arm/ete.yaml
@@ -0,0 +1,74 @@
+# SPDX-License-Identifier: GPL-2.0-only or BSD-2-Clause
+# Copyright 2021, Arm Ltd
+%YAML 1.2
+---
+$id: "http://devicetree.org/schemas/arm/ete.yaml#;
+$schema: "http://devicetree.org/meta-schemas/core.yaml#;
+
+title: ARM Embedded Trace Extensions
+
+maintainers:
+  - Suzuki K Poulose 
+  - Mathieu Poirier 
+
+description: |
+  Arm Embedded Trace Extension(ETE) is a per CPU trace component that
+  allows tracing the CPU execution. It overlaps with the CoreSight ETMv4
+  architecture and has extended support for future architecture changes.
+  The trace generated by the ETE could be stored via legacy CoreSight
+  components (e.g, TMC-ETR) or other means (e.g, using a per CPU buffer
+  Arm Trace Buffer Extension (TRBE)). Since the ETE can be connected to
+  legacy CoreSight components, a node must be listed per instance, along
+  with any optional connection graph as per the coresight bindings.
+  See bindings/arm/coresight.txt.
+
+properties:
+  $nodename:
+pattern: "^ete([0-9a-f]+)$"
+  compatible:
+items:
+  - const: arm,embedded-trace-extension
+
+  cpu:


We've already established 'cpus' for this purpose.



Please see : 
https://lkml.kernel.org/r/9417218b-6eda-373b-a2cb-869089ffc...@arm.com
for my response in the previous version to this and the one with out-ports.


Okay, fair enough.




+description: |
+  Handle to the cpu this ETE is bound to.
+$ref: /schemas/types.yaml#/definitions/phandle
+
+  out-ports:
+type: object


Replace with: $ref: /schemas/graph.yaml#/properties/ports


So, just to confirm again :
The CoreSight graph bindings expect the input ports and output ports
grouped under in-ports{} and out-ports{} respectively to avoid having
to specify the direction of the ports in the individual "port" nodes.
i.e

in-ports {

property: ports
  OR
property: port

required:
OneOf:
ports
port


No, 'ports' as a child of in-ports is not correct. There should only be
'port(@[0-9a-f]+)?' nodes. That's why you need the above $ref added. The
$ref doesn't define the node name is 'ports', but what a 'ports' or
'foo-ports' contains.


Sorry, it is my bad. We don't expect ports{} under in-ports. So your
suggestion is the accurate one. I will respin.






}

out-ports {

# same as above
}

So thats why I added out-ports as a new object, where the ports/port
could be a child node.

Ideally the definition of out-ports /in-ports should go to a common schema
for CoreSight bindings, when we move to Yaml for the existing bindings,
which will follow in a separate series, later.


Yes, maybe, but I'm not sure something common is going to help here.
You'll still have to describe what each 'port' node does in each device
specific binding.


For CoreSight components the end-point of the ports are system specific.
i.e, it could be anything on the other end. There is no fixed end-point
connection.

e.g, ETM could be connected to a Replicator or a Funnel. Same as here
above for ETE. Thus the driver must parse the endpoints and make
the connection path from devices to other devices.

Anyways, will come to that in a different series.

I will fix the in-ports/out-ports for the next version.

Thanks for your guidance.

Cheers
Suzuki





Rob





Re: [PATCH V3 11/14] coresight: sink: Add TRBE driver

2021-02-18 Thread Suzuki K Poulose

On 2/18/21 2:30 PM, Mike Leach wrote:

HI Suzuki,

On Thu, 18 Feb 2021 at 07:50, Suzuki K Poulose  wrote:


Hi Mike

On 2/16/21 9:00 AM, Mike Leach wrote:

Hi Anshuman,

There have been plenty of detailed comments so I will restrict mine to
a few general issues:-

1) Currently there appears to be no sysfs support (I cannot see the
MODE_SYSFS constants running alongside the MODE_PERF ones present in
the other sink drivers). This is present on all other coresight
devices, and must be provided for this device. It is useful for
testing, and there are users out there who will have scripts to use
it. It is not essential it makes it into this set, but should be a
follow up set.


This is mentioned in the cover-letter and as you rightly said
we could add this in a later series.



Yes - I see that it was mentioned at the end as an open question - so
I guess this is my answer!



2) Using FILL mode for TRBE means that the trace will by definition be
lossy. Fill mode will halt collection without cleanly stopping and
flushing the source. This will result in the sink missing the last of
the data from the source as it stops. Even if taking the exception
moves into a prohibited region there is still the possibility the last
trace operations will not be seen. Further it is possible that the


Correct.


last few bytes of trace will be an incomplete packet, and indeed the
start of the next buffer could contain incomplete packets too.


Yes, this is possible.



This operation differs from the other sinks which will only halt after
the sources have stopped and the path has been flushed. This ensures
that the latest trace is complete. The weakness with the older sinks
is the lack of interrupt meaning buffers were frequently wrapped so
that only the latest trace is available.


This is true, when there was no overflow. i.e, we follow the normal
source-stop-flush, sink-stop.



By using TRBE WRAP mode, with a watermark as described in the TRBE
spec, using the interrupts it is possible to approach lossless trace
in a way that is not possible with earlier ETR/ETB. This is something


It may be possible to do lossless trace, but not without double buffering
in perf mode. In perf mode, with a single buffer, we have to honor the
boundaries set by the aux_buffer head and tail, otherwise we could be
corrupting the trace being consumed by the userland.

Please remember that the "water mark" is considered as the END of the
buffer by TRBE (unlike the SoC-600 ETR). So the LIMIT pointer could be
one of :

* Tail pointer ( of the handle space, <=  End_of_the_Buffer)
* Wake up pointer ( when the userspace would like to be woken up ,<= 
End_of_the_Buffer)

So, if we use WRAP mode for perf, the TRBE would overwrite the from
the Base, after we hit the LIMIT, where we should have started
writing *after* the LIMIT (when LIMIT < End_of_the_Buffer). Moreover
restarting from the Base is going to be even more trouble some
as it is most likely the data, perf is still collecting.



I agree that the TRBE must write inbetween head and tail / wakeup.
Howver, there is no reason that I can see why the trbe_base register
has to remain constant @ the start of the vmapped aux buffer.
A valid trbe write buffer could be set by:
trbe_base >= head (rounded up to page boundary)
trbe_limit <= min(tail, wakeup) (rounded down to page boundary)
trbe_write is then trbe_base + "watermark" offset. - as suggested in
the TRBE spec.


The problem is we are dealing with separate entities. The producer
and the consumer are separate entities playing with a single,
infinite running ring buffer. Had this been a double buffering scheme,
this would work really nice. In fact, I had some plans to do this for
SoC-600 ETR.

Coming back, with the proposed approach :

head write (watermark)  end-of-real buffer
  /  |
 ^---v---^--v---|
/\
base(aligned)limit (wakeup/tail)

In this case, assuming single shared buffer, the TRBE would start
writing from "write" watermark and WRAP at limit, going back to base.

The issues here are :
  - If there were no overflow, we cant update the AUX handle
unless we pad from head to write (which might be significant).

 If there was overflow :
  - You have wrongly ordered data. i.e, the older trace is at "write"
and newer trace is at "base". Unless we copy the data written from
base to the end of "limit", the userspace can't consume it. Or else
it thinks the data at base is older and this gets the trace decoding
gone for a toss.

As you can see, two players dealing with a single buffer doesn't allow
for the kind of flow possible with WRAP.

On the other hand this works perfectly with double buffering. We could
copy the data from the "write" to limit, followed by from "base" to the
current write ptr. This could well be used for sysfs, but since we
have 

Re: [PATCH v7 28/28] coresight: Add support for v8.4 SelfHosted tracing

2021-02-18 Thread Suzuki K Poulose

Hi Mike

On 2/12/21 5:30 PM, Mike Leach wrote:

Hi Suzuki,

On Fri, 12 Feb 2021 at 15:36, Suzuki K Poulose  wrote:


Hi Mike

On 2/12/21 10:34 AM, Mike Leach wrote:

Hi Mathieu, Suzuki,

Sorry for the really late response on this patch, but I noticed a
problem while doing a review of the ETE / TRBE set. (TRBE specs
mention TRFCR_ELx, so I was confirming a couple of things).

On Sun, 10 Jan 2021 at 22:49, Suzuki K Poulose  wrote:


From: Jonathan Zhou 

v8.4 tracing extensions added support for trace filtering controlled
by TRFCR_ELx. This must be programmed to allow tracing at EL1/EL2 and
EL0. The timestamp used is the virtual time. Also enable CONTEXIDR_EL2
tracing if we are running the kernel at EL2.

Cc: Catalin Marinas 
Cc: Mike Leach 
Cc: Will Deacon 
Reviewed-by: Mathieu Poirier 
Signed-off-by: Jonathan Zhou 
[ Move the trace filtering setup etm_init_arch_data() and
   clean ups]
Signed-off-by: Suzuki K Poulose 
---
   .../coresight/coresight-etm4x-core.c  | 25 +++
   1 file changed, 25 insertions(+)

diff --git a/drivers/hwtracing/coresight/coresight-etm4x-core.c 
b/drivers/hwtracing/coresight/coresight-etm4x-core.c
index 3d3165dd09d4..18c1a80abab8 100644
--- a/drivers/hwtracing/coresight/coresight-etm4x-core.c
+++ b/drivers/hwtracing/coresight/coresight-etm4x-core.c
@@ -859,6 +859,30 @@ static bool etm4_init_csdev_access(struct etmv4_drvdata 
*drvdata,
  return false;
   }

+static void cpu_enable_tracing(void)
+{
+   u64 dfr0 = read_sysreg(id_aa64dfr0_el1);
+   u64 trfcr;
+
+   if (!cpuid_feature_extract_unsigned_field(dfr0, 
ID_AA64DFR0_TRACE_FILT_SHIFT))
+   return;
+
+   /*
+* If the CPU supports v8.4 SelfHosted Tracing, enable
+* tracing at the kernel EL and EL0, forcing to use the
+* virtual time as the timestamp.
+*/
+   trfcr = (TRFCR_ELx_TS_VIRTUAL |
+TRFCR_ELx_ExTRE |
+TRFCR_ELx_E0TRE);
+
+   /* If we are running at EL2, allow tracing the CONTEXTIDR_EL2. */
+   if (is_kernel_in_hyp_mode())
+   trfcr |= TRFCR_EL2_CX;
+


This is wrong - CX bit is present on TRFCR_EL2, not TRFCR_EL1.


Why is this wrong ? We do this only when we are in EL2.



Sorry  - must have been looking at an older version of the ARMARM when
I looked for EL1 registers that are aliased to EL2.  So this does
indeed work!


Moreover, TRFCR_EL2 has a separate enables for tracing at EL0 and EL2.



True, that is for EL0&2 translation regimes. i.e, tracing EL0 with
the kernel running at EL2. But bits TRFCR_EL2.E2TRE == TRFCR_EL1.E1TRE
If notice, we name the bit TRFCR_ELx_ExTRE. And E0TRE == E0HTRE.

So we do the following :

1) When kernel running at EL2:
  Enable tracing at EL2 and EL0 and context tracking
2) When kernel running at EL1:
  Enable tracing at EL1 and EL0.



Secondly - is this correct in principal?  Should the driver not be
reading the access it is permitted by the kernel, rather than giving
itself unfettered access to trace where it wants to.


I dont follow the "access permitted by the kernel" here. What are we referrring 
to ?



By that I mean that as I suggest below this should be controlled by
what we could call the hypervisor, rather than a driver.


Surely TRFCR_ELx  levels should be chosen in KConfig  and then should
be set up in kernel initialisation?


I disagree with yet another Kconfig. This basic requirement for
enabling the trace collection. It is not something that we can optionally
use from the architecture. So we should transparently do the right
thing for making sure that we set up the system for something that
didn't require any other steps. Or in other words, if we add a Kconfig
option for TRFCR programming, if someone forgets to select it
when they upgraded the kernel they are in for a surprisingly long
debugging to find why the trace doesnt work.

As for the TRFCR programming, we have two choices. etm4x driver
or generic boot up for the CPU. I preferred to do this in the
driver as we can enable it only if trace drivers are available.



The point is that TRFCR are not part of the controlling registers for
the ETE or any trace source device. The architecture manual seems to
regard them as being controlled by the hypervisor, rather than the PE
trace device. This implies that the control feature is designed to be
independent from the trace generation features.

I thought they were there to allow virtualisation code to determine
what gets traced and what is prohibited, and what view the trace sees
of the clock. If you simple switch everything on from the driver and


Exactly.


control the ELs traced from the ETE / ETM registers then what are they
there for?


Yes. The hyp (KVM in this case) traps access to the ETE/ETM registers
by default and thus the EL1 can't trace itself. When we get to Virtualization
support, indeed we would need to use TRFCR in the Hyp to exclude the
host from being traced.



This 

Re: [PATCH V3 11/14] coresight: sink: Add TRBE driver

2021-02-18 Thread Suzuki K Poulose
 trace buffer could be prohibited by a higher exception level
(EL3 or EL2), indicated by TRBIDR_EL1.P. The TRBE can also generate a CPU
private interrupt (PPI) on address translation errors and when the buffer
is full. Overall implementation here is inspired from the Arm SPE driver.

Cc: Mathieu Poirier 
Cc: Mike Leach 
Cc: Suzuki K Poulose 
Signed-off-by: Anshuman Khandual 
---
Changes in V3:

- Added new DT bindings document TRBE.yaml
- Changed TRBLIMITR_TRIG_MODE_SHIFT from 2 to 3
- Dropped isb() from trbe_reset_local()
- Dropped gap between (void *) and buf->trbe_base
- Changed 'int' to 'unsigned int' in is_trbe_available()
- Dropped unused function set_trbe_running(), set_trbe_virtual_mode(),
   set_trbe_enabled() and set_trbe_limit_pointer()
- Changed get_trbe_flag_update(), is_trbe_programmable() and
   get_trbe_address_align() to accept TRBIDR value
- Changed is_trbe_running(), is_trbe_abort(), is_trbe_wrap(), is_trbe_trg(),
   is_trbe_irq(), get_trbe_bsc() and get_trbe_ec() to accept TRBSR value
- Dropped snapshot mode condition in arm_trbe_alloc_buffer()
- Exit arm_trbe_init() when arm64_kernel_unmapped_at_el0() is enabled
- Compute trbe_limit before trbe_write to get the updated handle
- Added trbe_stop_and_truncate_event()
- Dropped trbe_handle_fatal()

  Documentation/trace/coresight/coresight-trbe.rst |   39 +
  arch/arm64/include/asm/sysreg.h  |1 +
  drivers/hwtracing/coresight/Kconfig  |   11 +
  drivers/hwtracing/coresight/Makefile |1 +
  drivers/hwtracing/coresight/coresight-trbe.c | 1023 ++
  drivers/hwtracing/coresight/coresight-trbe.h |  160 
  6 files changed, 1235 insertions(+)
  create mode 100644 Documentation/trace/coresight/coresight-trbe.rst
  create mode 100644 drivers/hwtracing/coresight/coresight-trbe.c
  create mode 100644 drivers/hwtracing/coresight/coresight-trbe.h

diff --git a/Documentation/trace/coresight/coresight-trbe.rst 
b/Documentation/trace/coresight/coresight-trbe.rst
new file mode 100644
index 000..1cbb819
--- /dev/null
+++ b/Documentation/trace/coresight/coresight-trbe.rst
@@ -0,0 +1,39 @@
+.. SPDX-License-Identifier: GPL-2.0
+
+==
+Trace Buffer Extension (TRBE).
+==
+
+:Author:   Anshuman Khandual 
+:Date: November 2020
+
+Hardware Description
+
+
+Trace Buffer Extension (TRBE) is a percpu hardware which captures in system
+memory, CPU traces generated from a corresponding percpu tracing unit. This
+gets plugged in as a coresight sink device because the corresponding trace
+genarators (ETE), are plugged in as source device.
+
+The TRBE is not compliant to CoreSight architecture specifications, but is
+driven via the CoreSight driver framework to support the ETE (which is
+CoreSight compliant) integration.
+
+Sysfs files and directories
+---
+
+The TRBE devices appear on the existing coresight bus alongside the other
+coresight devices::
+
+>$ ls /sys/bus/coresight/devices
+trbe0  trbe1  trbe2 trbe3
+
+The ``trbe`` named TRBEs are associated with a CPU.::
+
+>$ ls /sys/bus/coresight/devices/trbe0/
+align dbm
+
+*Key file items are:-*
+   * ``align``: TRBE write pointer alignment
+   * ``dbm``: TRBE updates memory with access and dirty flags
+
diff --git a/arch/arm64/include/asm/sysreg.h b/arch/arm64/include/asm/sysreg.h
index 85ae4db..9e2e9b7 100644
--- a/arch/arm64/include/asm/sysreg.h
+++ b/arch/arm64/include/asm/sysreg.h
@@ -97,6 +97,7 @@
  #define SET_PSTATE_UAO(x)   __emit_inst(0xd500401f | PSTATE_UAO | ((!!x) 
<< PSTATE_Imm_shift))
  #define SET_PSTATE_SSBS(x)  __emit_inst(0xd500401f | PSTATE_SSBS | ((!!x) 
<< PSTATE_Imm_shift))
  #define SET_PSTATE_TCO(x)   __emit_inst(0xd500401f | PSTATE_TCO | ((!!x) 
<< PSTATE_Imm_shift))
+#define TSB_CSYNC   __emit_inst(0xd503225f)

  #define set_pstate_pan(x)   asm volatile(SET_PSTATE_PAN(x))
  #define set_pstate_uao(x)   asm volatile(SET_PSTATE_UAO(x))
diff --git a/drivers/hwtracing/coresight/Kconfig 
b/drivers/hwtracing/coresight/Kconfig
index f154ae7..aa657ab 100644
--- a/drivers/hwtracing/coresight/Kconfig
+++ b/drivers/hwtracing/coresight/Kconfig
@@ -164,6 +164,17 @@ config CORESIGHT_CTI
To compile this driver as a module, choose M here: the
module will be called coresight-cti.

+config CORESIGHT_TRBE
+bool "Trace Buffer Extension (TRBE) driver"
+depends on ARM64
+help
+  This driver provides support for percpu Trace Buffer Extension (TRBE).
+  TRBE always needs to be used along with it's corresponding percpu ETE
+  component. ETE generates trace data which is then captured with TRBE.
+  Unlike traditional sink devices, TRBE is a CPU feature accessible via
+  system registers. But it's explicit dependency with trace unit (ETE)
+  requires it

Re: [PATCH v7 28/28] coresight: Add support for v8.4 SelfHosted tracing

2021-02-12 Thread Suzuki K Poulose

Hi Mike

On 2/12/21 10:34 AM, Mike Leach wrote:

Hi Mathieu, Suzuki,

Sorry for the really late response on this patch, but I noticed a
problem while doing a review of the ETE / TRBE set. (TRBE specs
mention TRFCR_ELx, so I was confirming a couple of things).

On Sun, 10 Jan 2021 at 22:49, Suzuki K Poulose  wrote:


From: Jonathan Zhou 

v8.4 tracing extensions added support for trace filtering controlled
by TRFCR_ELx. This must be programmed to allow tracing at EL1/EL2 and
EL0. The timestamp used is the virtual time. Also enable CONTEXIDR_EL2
tracing if we are running the kernel at EL2.

Cc: Catalin Marinas 
Cc: Mike Leach 
Cc: Will Deacon 
Reviewed-by: Mathieu Poirier 
Signed-off-by: Jonathan Zhou 
[ Move the trace filtering setup etm_init_arch_data() and
  clean ups]
Signed-off-by: Suzuki K Poulose 
---
  .../coresight/coresight-etm4x-core.c  | 25 +++
  1 file changed, 25 insertions(+)

diff --git a/drivers/hwtracing/coresight/coresight-etm4x-core.c 
b/drivers/hwtracing/coresight/coresight-etm4x-core.c
index 3d3165dd09d4..18c1a80abab8 100644
--- a/drivers/hwtracing/coresight/coresight-etm4x-core.c
+++ b/drivers/hwtracing/coresight/coresight-etm4x-core.c
@@ -859,6 +859,30 @@ static bool etm4_init_csdev_access(struct etmv4_drvdata 
*drvdata,
 return false;
  }

+static void cpu_enable_tracing(void)
+{
+   u64 dfr0 = read_sysreg(id_aa64dfr0_el1);
+   u64 trfcr;
+
+   if (!cpuid_feature_extract_unsigned_field(dfr0, 
ID_AA64DFR0_TRACE_FILT_SHIFT))
+   return;
+
+   /*
+* If the CPU supports v8.4 SelfHosted Tracing, enable
+* tracing at the kernel EL and EL0, forcing to use the
+* virtual time as the timestamp.
+*/
+   trfcr = (TRFCR_ELx_TS_VIRTUAL |
+TRFCR_ELx_ExTRE |
+TRFCR_ELx_E0TRE);
+
+   /* If we are running at EL2, allow tracing the CONTEXTIDR_EL2. */
+   if (is_kernel_in_hyp_mode())
+   trfcr |= TRFCR_EL2_CX;
+


This is wrong - CX bit is present on TRFCR_EL2, not TRFCR_EL1.


Why is this wrong ? We do this only when we are in EL2.


Moreover, TRFCR_EL2 has a separate enables for tracing at EL0 and EL2.



True, that is for EL0&2 translation regimes. i.e, tracing EL0 with
the kernel running at EL2. But bits TRFCR_EL2.E2TRE == TRFCR_EL1.E1TRE
If notice, we name the bit TRFCR_ELx_ExTRE. And E0TRE == E0HTRE.

So we do the following :

  1) When kernel running at EL2:
Enable tracing at EL2 and EL0 and context tracking
  2) When kernel running at EL1:
Enable tracing at EL1 and EL0.



Secondly - is this correct in principal?  Should the driver not be
reading the access it is permitted by the kernel, rather than giving
itself unfettered access to trace where it wants to.


I dont follow the "access permitted by the kernel" here. What are we referrring 
to ?


Surely TRFCR_ELx  levels should be chosen in KConfig  and then should
be set up in kernel initialisation?


I disagree with yet another Kconfig. This basic requirement for
enabling the trace collection. It is not something that we can optionally
use from the architecture. So we should transparently do the right
thing for making sure that we set up the system for something that
didn't require any other steps. Or in other words, if we add a Kconfig
option for TRFCR programming, if someone forgets to select it
when they upgraded the kernel they are in for a surprisingly long
debugging to find why the trace doesnt work.

As for the TRFCR programming, we have two choices. etm4x driver
or generic boot up for the CPU. I preferred to do this in the
driver as we can enable it only if trace drivers are available.

Cheers
Suzuki



Regards

Mike




+   write_sysreg_s(trfcr, SYS_TRFCR_EL1);
+}
+
  static void etm4_init_arch_data(void *info)
  {
 u32 etmidr0;
@@ -1044,6 +1068,7 @@ static void etm4_init_arch_data(void *info)
 /* NUMCNTR, bits[30:28] number of counters available for tracing */
 drvdata->nr_cntr = BMVAL(etmidr5, 28, 30);
 etm4_cs_lock(drvdata, csa);
+   cpu_enable_tracing();
  }

  static inline u32 etm4_get_victlr_access_type(struct etmv4_config *config)
--
2.24.1




--
Mike Leach
Principal Engineer, ARM Ltd.
Manchester Design Centre. UK





Re: [PATCH v3 8/8] Documentation: coresight: Add PID tracing description

2021-02-11 Thread Suzuki K Poulose

On 2/6/21 3:08 PM, Leo Yan wrote:

After support the PID tracing for the kernel in EL1 or EL2, the usage
gets more complicated.

This patch gives description for the PMU formats of contextID configs,
this can help users to understand how to control the knobs for PID
tracing when the kernel is in different ELs.

Signed-off-by: Leo Yan 
---
  Documentation/trace/coresight/coresight.rst | 32 +
  1 file changed, 32 insertions(+)

diff --git a/Documentation/trace/coresight/coresight.rst 
b/Documentation/trace/coresight/coresight.rst
index 0b73acb44efa..169749efd8d1 100644
--- a/Documentation/trace/coresight/coresight.rst
+++ b/Documentation/trace/coresight/coresight.rst
@@ -512,6 +512,38 @@ The --itrace option controls the type and frequency of 
synthesized events
  Note that only 64-bit programs are currently supported - further work is
  required to support instruction decode of 32-bit Arm programs.
  
+2.2) Tracing PID

+
+The kernel can be built to write the PID value into the PE ContextID registers.
+For a kernel running at EL1, the PID is stored in CONTEXTIDR_EL1.  A PE may
+implement Arm Virtualization Host Extensions (VHE), which the kernel can
+run at EL2 as a virtualisation host; in this case, the PID value is stored in
+CONTEXTIDR_EL2.
+
+perf provides PMU formats that program the ETM to insert these values into the
+trace data; the PMU formats are defined as below:
+
+  "contextid1": Available on both EL1 kernel and EL2 kernel.  When the
+kernel is running at EL1, "contextid1" enables the PID
+tracing; when the kernel is running at EL2, this enables
+tracing the PID of guest applications.
+
+  "contextid2": Only usable when the kernel is running at EL2.  When
+selected, enables PID tracing on EL2 kernel.
+
+  "contextid":  Will be an alias for the option that enables PID
+tracing.  I.e,
+contextid == contextid1, on EL1 kernel.
+contextid == contextid2, on EL2 kernel.
+
+perf will always enable PID tracing at the relevant EL, this is accomplished by
+automatically enable the "contextid" config - but for EL2 it is possible to 
make
+specific adjustments using configs "contextid1" and "contextid2", E.g. if a 
user
+wants to trace PIDs for both host and guest, the two configs "contextid1" and
+"contextid2" can be set at the same time:
+
+  perf record -e cs_etm/contextid1,contextid2/u -- vm
+
  



Reviewed-by: Suzuki K Poulose 


Re: [PATCH v3 6/8] perf cs-etm: Add helper cs_etm__get_pid_fmt()

2021-02-11 Thread Suzuki K Poulose

On 2/6/21 3:08 PM, Leo Yan wrote:

This patch adds helper function cs_etm__get_pid_fmt(), by passing
parameter "traceID", it returns the PID format.

Signed-off-by: Leo Yan 


Reviewed-by: Suzuki K Poulose 


Re: [PATCH V3 06/14] dts: bindings: Document device tree bindings for ETE

2021-02-10 Thread Suzuki K Poulose

Hi Rob

On 2/9/21 7:00 PM, Rob Herring wrote:

On Wed, Jan 27, 2021 at 02:25:30PM +0530, Anshuman Khandual wrote:

From: Suzuki K Poulose 

Document the device tree bindings for Embedded Trace Extensions.
ETE can be connected to legacy coresight components and thus
could optionally contain a connection graph as described by
the CoreSight bindings.

Cc: devicet...@vger.kernel.org
Cc: Mathieu Poirier 
Cc: Mike Leach 
Cc: Rob Herring 
Signed-off-by: Suzuki K Poulose 
Signed-off-by: Anshuman Khandual 
---
Changes in V3:

- Fixed all DT yaml semantics problems

  Documentation/devicetree/bindings/arm/ete.yaml | 74 ++
  1 file changed, 74 insertions(+)
  create mode 100644 Documentation/devicetree/bindings/arm/ete.yaml

diff --git a/Documentation/devicetree/bindings/arm/ete.yaml 
b/Documentation/devicetree/bindings/arm/ete.yaml
new file mode 100644
index 000..edc1fe2
--- /dev/null
+++ b/Documentation/devicetree/bindings/arm/ete.yaml
@@ -0,0 +1,74 @@
+# SPDX-License-Identifier: GPL-2.0-only or BSD-2-Clause
+# Copyright 2021, Arm Ltd
+%YAML 1.2
+---
+$id: "http://devicetree.org/schemas/arm/ete.yaml#;
+$schema: "http://devicetree.org/meta-schemas/core.yaml#;
+
+title: ARM Embedded Trace Extensions
+
+maintainers:
+  - Suzuki K Poulose 
+  - Mathieu Poirier 
+
+description: |
+  Arm Embedded Trace Extension(ETE) is a per CPU trace component that
+  allows tracing the CPU execution. It overlaps with the CoreSight ETMv4
+  architecture and has extended support for future architecture changes.
+  The trace generated by the ETE could be stored via legacy CoreSight
+  components (e.g, TMC-ETR) or other means (e.g, using a per CPU buffer
+  Arm Trace Buffer Extension (TRBE)). Since the ETE can be connected to
+  legacy CoreSight components, a node must be listed per instance, along
+  with any optional connection graph as per the coresight bindings.
+  See bindings/arm/coresight.txt.
+
+properties:
+  $nodename:
+pattern: "^ete([0-9a-f]+)$"
+  compatible:
+items:
+  - const: arm,embedded-trace-extension
+
+  cpu:


We've already established 'cpus' for this purpose.



Please see : 
https://lkml.kernel.org/r/9417218b-6eda-373b-a2cb-869089ffc...@arm.com
for my response in the previous version to this and the one with out-ports.


+description: |
+  Handle to the cpu this ETE is bound to.
+$ref: /schemas/types.yaml#/definitions/phandle
+
+  out-ports:
+type: object


Replace with: $ref: /schemas/graph.yaml#/properties/ports


So, just to confirm again :
The CoreSight graph bindings expect the input ports and output ports
grouped under in-ports{} and out-ports{} respectively to avoid having
to specify the direction of the ports in the individual "port" nodes.
i.e

in-ports {

property: ports
  OR
property: port

required:
OneOf:
ports
port
}

out-ports {

# same as above
}

So thats why I added out-ports as a new object, where the ports/port
could be a child node.

Ideally the definition of out-ports /in-ports should go to a common schema
for CoreSight bindings, when we move to Yaml for the existing bindings,
which will follow in a separate series, later.




+description: |
+  Output connections from the ETE to legacy CoreSight trace bus.
+properties:
+  port:
+$ref: /schemas/graph.yaml#/properties/port


Actually, if only 1 port ever, you can drop 'out-ports' and just have
'port'. Not sure though if the coresight stuff depends on 'out-ports'.



Cheers
Suzuki


Re: [PATCH v2 7/7] Documentation: coresight: Add PID tracing description

2021-02-04 Thread Suzuki K Poulose

On 2/4/21 4:09 AM, Leo Yan wrote:

Hi Mike,

On Wed, Feb 03, 2021 at 05:39:54PM +, Mike Leach wrote:

[...]


+2.2) Tracing PID
+
+When the kernel is running at EL2 with Virtualization Host Extensions (VHE),
+perf records CONTEXTIDR_EL2 in the trace data and can be used as PID when
+decoding; and if the kernel is running at EL1 with nVHE, CONTEXTIDR_EL1 is
+traced for PID.
+


Would this introductory paragraph be better if is explained where the
kernel stores the PID for the different levels, then we logically move
on to how to trace this in perf.

e.g:-

"The lernel can be built to write the PID value into the PE ContextID registers.
For a kernel running at EL1, the PID is stored in CONTEXTIDR_EL1.
A PE may implement ARM Virtualisation Host Extensions (VHE), were the
kernel can run at EL2 as a virtualisation host.
In this case the PID value is stored in CONTEXTIDR_EL2.
perf provides PMU options which program the ETM to insert these values
into the trace data."


Will in next spin; thanks a lot for writing up!


+To support tracing PID for the kernel runs at different exception levels,
+the PMU formats are defined as follow:
+
+  "contextid1": Available on both EL1 kernel and EL2 kernel.  When the
+kernel is running at EL1, "contextid1" enables the PID
+tracing; when the kernel is running at EL2, this enables
+tracing the PID of guest applications.
+
+  "contextid2": Only usable when the kernel is running at EL2.  When
+selected, enables PID tracing on EL2 kernel.
+
+  "contextid":  Will be an alias for the option that enables PID
+tracing.  I.e,
+contextid == contextid1, on EL1 kernel.
+contextid == contextid2, on EL2 kernel.
+
+The perf tool automatically sets corresponding bit for the "contextid" config,
+therefore, the user doesn't have to bother which EL the kernel is running.
+
+  i.e, perf record -e cs_etm/contextid/u -- uname
+or perf record -e cs_etm//u -- uname
+
+will always do the "PID" tracing, independent of the kernel EL.
+


This is telling me that both cs_etm// and cs_etm/contextid/ have the
same effect - trace PID. Is this correct?




Just to make this clear, this is not a side effect of the patch. The perf
tool driver automatically adds the "contextid" tracing and timestamp for
"system wide" and process bound events, as they traces get mixed into
the single sink. So these options are added implicitly by the perf tool
to make the decoding easier.


Correct.


If so, then contextid, contextid1 and contextid2 have no effect except
in specific EL2 circumstances.


These are required when perf tool may not automatically request them.
With this series the EL2 is on par with the EL1, where we get the PID
automatcially in the trace.

And as you rightly said, contextid1, contextid2 are for EL2 specific
usage.

Cheers
Suzuki



Yes, exactly.

Thanks,
Leo


+When the kernel is running at EL2 with VHE, if user wants to trace both the
+PIDs for both host and guest, the two configs "contextid1" and "contextid2"
+can be set at the same time:
+
+  perf record -e cs_etm/contextid1,contextid2/u -- uname
+




Regards

Mike



  Generating coverage files for Feedback Directed Optimization: AutoFDO
  -
--
2.25.1




--
Mike Leach
Principal Engineer, ARM Ltd.
Manchester Design Centre. UK




Re: [PATCH v2 6/7] perf cs-etm: Detect pid in VMID for kernel running at EL2

2021-02-04 Thread Suzuki K Poulose

On 2/4/21 4:00 AM, Leo Yan wrote:

On Tue, Feb 02, 2021 at 11:29:47PM +, Suzuki Kuruppassery Poulose wrote:

On 2/2/21 4:38 PM, Leo Yan wrote:

From: Suzuki K Poulose 

The PID of the task could be traced as VMID when the kernel is running
at EL2.  Teach the decoder to look for VMID when the CONTEXTIDR (Arm32)
or CONTEXTIDR_EL1 (Arm64) is invalid but we have a valid VMID.

Cc: Mike Leach 
Cc: Mathieu Poirier 
Cc: Al Grant 
Co-developed-by: Leo Yan 
Signed-off-by: Suzuki K Poulose 
Signed-off-by: Leo Yan 
---
   .../perf/util/cs-etm-decoder/cs-etm-decoder.c | 32 ---
   1 file changed, 28 insertions(+), 4 deletions(-)

diff --git a/tools/perf/util/cs-etm-decoder/cs-etm-decoder.c 
b/tools/perf/util/cs-etm-decoder/cs-etm-decoder.c
index 3f4bc4050477..fb2a163ff74e 100644
--- a/tools/perf/util/cs-etm-decoder/cs-etm-decoder.c
+++ b/tools/perf/util/cs-etm-decoder/cs-etm-decoder.c
@@ -6,6 +6,7 @@
* Author: Mathieu Poirier 
*/
+#include 
   #include 
   #include 
   #include 
@@ -491,13 +492,36 @@ cs_etm_decoder__set_tid(struct cs_etm_queue *etmq,
const ocsd_generic_trace_elem *elem,
const uint8_t trace_chan_id)
   {
-   pid_t tid;
+   pid_t tid = -1;
+   u64 pid_fmt;
+   int ret;
-   /* Ignore PE_CONTEXT packets that don't have a valid contextID */
-   if (!elem->context.ctxt_id_valid)
+   ret = cs_etm__get_pid_fmt(trace_chan_id, _fmt);
+   if (ret)


Is this something we can cache in this function ? e.g,
static u64 pid_fmt;

if (!pid_pfmt)
ret = cs_etm__get_pid_fmt(trace_chan_id, _fmt);

As all the ETMs will be running at the same exception level.


Sorry that I let you repeated your comments again.

To be honest, I considered this after read your comment in the previous
series, but I thought it's possible that multiple CPUs have different
PID format, especially for big.LITTLE arch.  After read your suggestion
again, I think my concern is not valid, even for big.LITTLE, all CPUs
should run on the same kernel exception level.

So will follow up your suggestion to cache "pid_fmt".


No problem.






+   return OCSD_RESP_FATAL_SYS_ERR;
+
+   /*
+* Process the PE_CONTEXT packets if we have a valid contextID or VMID.
+* If the kernel is running at EL2, the PID is traced in CONTEXTIDR_EL2
+* as VMID, Bit ETM_OPT_CTXTID2 is set in this case.
+*/
+   switch (pid_fmt) {
+   case BIT(ETM_OPT_CTXTID):
+   if (elem->context.ctxt_id_valid)
+   tid = elem->context.context_id;
+   break;
+   case BIT(ETM_OPT_CTXTID2) | BIT(ETM_OPT_CTXTID):


I would rather fix the cs_etm__get_pid_fmt() to return either of these
as commented. i.e, ETM_OPT_CTXTID or ETM_OPT_CTXTID2. Thus we don't
need the this case.


I explained why I set both bits for ETM_OPT_CTXTID and ETM_OPT_CTXTID2
in the patch 05/07.  Could you take a look for it?


I have responded to the comment in the patch.




With the above two addressed:

Reviewed-by: Suzuki K Poulose 





Thanks
Suzuki


Re: [PATCH v2 5/7] perf cs-etm: Add helper cs_etm__get_pid_fmt()

2021-02-04 Thread Suzuki K Poulose

On 2/4/21 3:47 AM, Leo Yan wrote:

On Tue, Feb 02, 2021 at 11:19:22PM +, Suzuki Kuruppassery Poulose wrote:

On 2/2/21 4:38 PM, Leo Yan wrote:

This patch adds helper function cs_etm__get_pid_fmt(), by passing
parameter "traceID", it returns the PID format.

Signed-off-by: Leo Yan 
---
   tools/perf/util/cs-etm.c | 43 
   tools/perf/util/cs-etm.h |  1 +
   2 files changed, 44 insertions(+)

diff --git a/tools/perf/util/cs-etm.c b/tools/perf/util/cs-etm.c
index a2a369e2fbb6..8194ddbd01e5 100644
--- a/tools/perf/util/cs-etm.c
+++ b/tools/perf/util/cs-etm.c
@@ -7,6 +7,7 @@
*/
   #include 
+#include 
   #include 
   #include 
   #include 
@@ -156,6 +157,48 @@ int cs_etm__get_cpu(u8 trace_chan_id, int *cpu)
return 0;
   }
+/*
+ * The returned PID format is presented by two bits:
+ *
+ *   Bit ETM_OPT_CTXTID: CONTEXTIDR or CONTEXTIDR_EL1 is traced;
+ *   Bit ETM_OPT_CTXTID2: CONTEXTIDR_EL2 is traced.
+ *
+ * It's possible that these two bits are set together, this means the tracing
+ * contains PIDs for both CONTEXTIDR_EL1 and CONTEXTIDR_EL2.


This is a bit confusing. If both the bits are set, the session
was run on an EL2 kernel. Thus, the PID is always in CONTEXTIDR_EL2.


Sorry for confusion.  I'd like to rephrase as:

It's possible that the two bits ETM_OPT_CTXTID and ETM_OPT_CTXTID2 are
enabled at the same time when the session runs on an EL2 kernel.  This
means the CONTEXTIDR_EL1 and CONTEXTIDR_EL2 both will be recorded in
the trace data, the tool will selectively use CONTEXTIDR_EL2 as PID.


+ */
+int cs_etm__get_pid_fmt(u8 trace_chan_id, u64 *pid_fmt)
+{
+   struct int_node *inode;
+   u64 *metadata, val;
+
+   inode = intlist__find(traceid_list, trace_chan_id);
+   if (!inode)
+   return -EINVAL;
+
+   metadata = inode->priv;
+
+   if (metadata[CS_ETM_MAGIC] == __perf_cs_etmv3_magic) {
+   val = metadata[CS_ETM_ETMCR];
+   /* CONTEXTIDR is traced */
+   if (val & BIT(ETM_OPT_CTXTID))
+   *pid_fmt = BIT(ETM_OPT_CTXTID);
+   } else {
+   val = metadata[CS_ETMV4_TRCCONFIGR];
+
+   *pid_fmt = 0;
+
+   /* CONTEXTIDR_EL2 is traced */
+   if (val & (BIT(ETM4_CFG_BIT_VMID) | BIT(ETM4_CFG_BIT_VMID_OPT)))
+   *pid_fmt = BIT(ETM_OPT_CTXTID2);
+
+   /* CONTEXTIDR_EL1 is traced */
+   if (val & BIT(ETM4_CFG_BIT_CTXTID))


I haven't looked at how this gets used. But, Shouldn't this be :

else if (val & BIT(ETM4_CFG_BIT_CTXTID)) ?


Actually it's deliberately to set both bits ETM_OPT_CTXTID2 and
ETM_OPT_CTXTID if user has enable configs "contextid1" and
"contextid2".  So this is exactly the reversed flow in the
function cs_etmv4_get_config().


The point is, we don't care if the user selected both options. What we
care is, where can we find the PID. CONTEXTIDR_EL1 or CONTEXTIDR_EL2.
As such, get_pid_fmt simply should make that decision and pass it on.
So, if the CONTEXTIDR_EL2 is selected (which can only be done successfully
on an EL2 kernel), thats our pid.

So we should return the format for the PID here. i.e
 ETM_OPT_CTXTID2 OR ETM_OPT_CTXTID. But not both.

Cheers
Suzuki


Re: [PATCH V3 07/14] coresight: etm-perf: Handle stale output handles

2021-02-03 Thread Suzuki K Poulose

On 2/3/21 7:05 PM, Mathieu Poirier wrote:

On Wed, Jan 27, 2021 at 02:25:31PM +0530, Anshuman Khandual wrote:

From: Suzuki K Poulose 

The context associated with an ETM for a given perf event
includes :
   - handle -> the perf output handle for the AUX buffer.
   - the path for the trace components
   - the buffer config for the sink.

The path and the buffer config are part of the "aux_priv" data
(etm_event_data) setup by the setup_aux() callback, and made available
via perf_get_aux(handle).

Now with a sink supporting IRQ, the sink could "end" an output
handle when the buffer reaches the programmed limit and would try
to restart a handle. This could fail if there is not enough
space left the AUX buffer (e.g, the userspace has not consumed
the data). This leaves the "handle" disconnected from the "event"
and also the "perf_get_aux()" cleared. This all happens within
the sink driver, without the etm_perf driver being aware.
Now when the event is actually stopped, etm_event_stop()
will need to access the "event_data". But since the handle
is not valid anymore, we loose the information to stop the
"trace" path. So, we need a reliable way to access the etm_event_data
even when the handle may not be active.

This patch replaces the per_cpu handle array with a per_cpu context
for the ETM, which tracks the "handle" as well as the "etm_event_data".
The context notes the etm_event_data at etm_event_start() and clears
it at etm_event_stop(). This makes sure that we don't access a
stale "etm_event_data" as we are guaranteed that it is not
freed by free_aux() as long as the event is active and tracing,
also provides us with access to the critical information
needed to wind up a session even in the absence of an active
output_handle.

This is not an issue for the legacy sinks as none of them supports
an IRQ and is centrally handled by the etm-perf.

Cc: Mathieu Poirier 
Cc: Anshuman Khandual 
Cc: Leo Yan 
Cc: Mike Leach 
Signed-off-by: Suzuki K Poulose 
Signed-off-by: Anshuman Khandual 
---
  drivers/hwtracing/coresight/coresight-etm-perf.c | 45 +---
  1 file changed, 40 insertions(+), 5 deletions(-)

diff --git a/drivers/hwtracing/coresight/coresight-etm-perf.c 
b/drivers/hwtracing/coresight/coresight-etm-perf.c
index eb9e7e9..a3977b0 100644
--- a/drivers/hwtracing/coresight/coresight-etm-perf.c
+++ b/drivers/hwtracing/coresight/coresight-etm-perf.c
@@ -24,7 +24,26 @@
  static struct pmu etm_pmu;
  static bool etm_perf_up;
  
-static DEFINE_PER_CPU(struct perf_output_handle, ctx_handle);

+/*
+ * An ETM context for a running event includes the perf aux handle
+ * and aux_data. For ETM, the aux_data (etm_event_data), consists of
+ * the trace path and the sink configuration. The event data is accessible
+ * via perf_get_aux(handle). However, a sink could "end" a perf output
+ * handle via the IRQ handler. And if the "sink" encounters a failure
+ * to "begin" another session (e.g due to lack of space in the buffer),
+ * the handle will be cleared. Thus, the event_data may not be accessible
+ * from the handle when we get to the etm_event_stop(), which is required
+ * for stopping the trace path. The event_data is guaranteed to stay alive
+ * until "free_aux()", which cannot happen as long as the event is active on
+ * the ETM. Thus the event_data for the session must be part of the ETM context
+ * to make sure we can disable the trace path.
+ */
+struct etm_ctxt {
+   struct perf_output_handle handle;
+   struct etm_event_data *event_data;
+};
+
+static DEFINE_PER_CPU(struct etm_ctxt, etm_ctxt);
  static DEFINE_PER_CPU(struct coresight_device *, csdev_src);
  
  /* ETMv3.5/PTM's ETMCR is 'config' */

@@ -332,7 +351,8 @@ static void etm_event_start(struct perf_event *event, int 
flags)
  {
int cpu = smp_processor_id();
struct etm_event_data *event_data;
-   struct perf_output_handle *handle = this_cpu_ptr(_handle);
+   struct etm_ctxt *ctxt = this_cpu_ptr(_ctxt);
+   struct perf_output_handle *handle = >handle;
struct coresight_device *sink, *csdev = per_cpu(csdev_src, cpu);
struct list_head *path;


 if (!csdev)
 goto fail;

 /*
  * Something went wrong if an event data is already associated
  * with a context.
  */
 if (WARN_ONE(ctxt->event_data))
 goto fail;
  

@@ -374,6 +394,8 @@ static void etm_event_start(struct perf_event *event, int 
flags)
if (source_ops(csdev)->enable(csdev, event, CS_MODE_PERF))
goto fail_disable_path;
  
+	/* Save the event_data for this ETM */

+   ctxt->event_data = event_data;
  out:
return;
  
@@ -392,13 +414,20 @@ static void etm_event_stop(struct perf_event *event, int mode)

int cpu = smp_processor_id();
unsigned long size;

Re: [PATCH V3 04/14] coresight: ete: Add support for ETE sysreg access

2021-02-03 Thread Suzuki K Poulose

On 2/2/21 5:52 PM, Mathieu Poirier wrote:

On Wed, Jan 27, 2021 at 02:25:28PM +0530, Anshuman Khandual wrote:

From: Suzuki K Poulose 

Add support for handling the system registers for Embedded Trace
Extensions (ETE). ETE shares most of the registers with ETMv4 except
for some and also adds some new registers. Re-arrange the ETMv4x list
to share the common definitions and add the ETE sysreg support.

Cc: Mike Leach 
Cc: Mathieu Poirier 
Signed-off-by: Suzuki K Poulose 
Signed-off-by: Anshuman Khandual 
---
  drivers/hwtracing/coresight/coresight-etm4x-core.c | 32 +
  drivers/hwtracing/coresight/coresight-etm4x.h  | 52 ++
  2 files changed, 75 insertions(+), 9 deletions(-)

diff --git a/drivers/hwtracing/coresight/coresight-etm4x-core.c 
b/drivers/hwtracing/coresight/coresight-etm4x-core.c
index 9edf8be..9e92d2a 100644
--- a/drivers/hwtracing/coresight/coresight-etm4x-core.c
+++ b/drivers/hwtracing/coresight/coresight-etm4x-core.c
@@ -114,6 +114,38 @@ void etm4x_sysreg_write(u64 val, u32 offset, bool 
_relaxed, bool _64bit)
}
  }
  
+u64 ete_sysreg_read(u32 offset, bool _relaxed, bool _64bit)

+{
+   u64 res = 0;
+
+   switch (offset) {
+   ETE_READ_CASES(res)
+   default :
+   WARN_ONCE(1, "ete: trying to read unsupported register @%x\n",
+offset);


Alignment


+   }
+
+   if (!_relaxed)
+   __iormb(res);   /* Imitate the !relaxed I/O helpers */
+
+   return res;
+}
+
+void ete_sysreg_write(u64 val, u32 offset, bool _relaxed, bool _64bit)
+{
+   if (!_relaxed)
+   __iowmb();  /* Imitate the !relaxed I/O helpers */
+   if (!_64bit)
+   val &= GENMASK(31, 0);
+
+   switch (offset) {
+   ETE_WRITE_CASES(val)
+   default :
+   WARN_ONCE(1, "ete: trying to write to unsupported register 
@%x\n",
+   offset);


Alignment


+   }
+}


The etm4x_sysreg_xyz() equivalent of these use a pr_warn_ratelimited() rather
than a WARN_ONE().

With that:

Reviewed-by: Mathieu Poirier 


Converted to pr_warn_ratelimited() to both instances and fixed Alignment

Cheers
Suzuki



Re: [PATCH v2 6/7] perf cs-etm: Detect pid in VMID for kernel running at EL2

2021-02-02 Thread Suzuki K Poulose

On 2/2/21 4:38 PM, Leo Yan wrote:

From: Suzuki K Poulose 

The PID of the task could be traced as VMID when the kernel is running
at EL2.  Teach the decoder to look for VMID when the CONTEXTIDR (Arm32)
or CONTEXTIDR_EL1 (Arm64) is invalid but we have a valid VMID.

Cc: Mike Leach 
Cc: Mathieu Poirier 
Cc: Al Grant 
Co-developed-by: Leo Yan 
Signed-off-by: Suzuki K Poulose 
Signed-off-by: Leo Yan 
---
  .../perf/util/cs-etm-decoder/cs-etm-decoder.c | 32 ---
  1 file changed, 28 insertions(+), 4 deletions(-)

diff --git a/tools/perf/util/cs-etm-decoder/cs-etm-decoder.c 
b/tools/perf/util/cs-etm-decoder/cs-etm-decoder.c
index 3f4bc4050477..fb2a163ff74e 100644
--- a/tools/perf/util/cs-etm-decoder/cs-etm-decoder.c
+++ b/tools/perf/util/cs-etm-decoder/cs-etm-decoder.c
@@ -6,6 +6,7 @@
   * Author: Mathieu Poirier 
   */
  
+#include 

  #include 
  #include 
  #include 
@@ -491,13 +492,36 @@ cs_etm_decoder__set_tid(struct cs_etm_queue *etmq,
const ocsd_generic_trace_elem *elem,
const uint8_t trace_chan_id)
  {
-   pid_t tid;
+   pid_t tid = -1;
+   u64 pid_fmt;
+   int ret;
  
-	/* Ignore PE_CONTEXT packets that don't have a valid contextID */

-   if (!elem->context.ctxt_id_valid)
+   ret = cs_etm__get_pid_fmt(trace_chan_id, _fmt);
+   if (ret)


Is this something we can cache in this function ? e.g,
static u64 pid_fmt;

if (!pid_pfmt)
ret = cs_etm__get_pid_fmt(trace_chan_id, _fmt);

As all the ETMs will be running at the same exception level.


+   return OCSD_RESP_FATAL_SYS_ERR;
+
+   /*
+* Process the PE_CONTEXT packets if we have a valid contextID or VMID.
+* If the kernel is running at EL2, the PID is traced in CONTEXTIDR_EL2
+* as VMID, Bit ETM_OPT_CTXTID2 is set in this case.
+*/
+   switch (pid_fmt) {
+   case BIT(ETM_OPT_CTXTID):
+   if (elem->context.ctxt_id_valid)
+   tid = elem->context.context_id;
+   break;
+   case BIT(ETM_OPT_CTXTID2) | BIT(ETM_OPT_CTXTID):


I would rather fix the cs_etm__get_pid_fmt() to return either of these
as commented. i.e, ETM_OPT_CTXTID or ETM_OPT_CTXTID2. Thus we don't
need the this case.


With the above two addressed:

Reviewed-by: Suzuki K Poulose 


Re: [PATCH v2 7/7] Documentation: coresight: Add PID tracing description

2021-02-02 Thread Suzuki K Poulose

On 2/2/21 4:38 PM, Leo Yan wrote:

After support the PID tracing for the kernel in EL1 or EL2, the usage
gets more complicated.

This patch gives description for the PMU formats of contextID configs,
this can help users to understand how to control the knobs for PID
tracing when the kernel is in different ELs.

Signed-off-by: Leo Yan 
---
  Documentation/trace/coresight/coresight.rst | 37 +
  1 file changed, 37 insertions(+)

diff --git a/Documentation/trace/coresight/coresight.rst 
b/Documentation/trace/coresight/coresight.rst
index 0b73acb44efa..771558f22938 100644
--- a/Documentation/trace/coresight/coresight.rst
+++ b/Documentation/trace/coresight/coresight.rst
@@ -512,6 +512,43 @@ The --itrace option controls the type and frequency of 
synthesized events
  Note that only 64-bit programs are currently supported - further work is
  required to support instruction decode of 32-bit Arm programs.
  
+2.2) Tracing PID

+
+When the kernel is running at EL2 with Virtualization Host Extensions (VHE),
+perf records CONTEXTIDR_EL2 in the trace data and can be used as PID when
+decoding; and if the kernel is running at EL1 with nVHE, CONTEXTIDR_EL1 is
+traced for PID.
+
+To support tracing PID for the kernel runs at different exception levels,
+the PMU formats are defined as follow:
+
+  "contextid1": Available on both EL1 kernel and EL2 kernel.  When the
+kernel is running at EL1, "contextid1" enables the PID
+tracing; when the kernel is running at EL2, this enables
+tracing the PID of guest applications.
+
+  "contextid2": Only usable when the kernel is running at EL2.  When
+selected, enables PID tracing on EL2 kernel.
+
+  "contextid":  Will be an alias for the option that enables PID
+tracing.  I.e,
+contextid == contextid1, on EL1 kernel.
+contextid == contextid2, on EL2 kernel.
+
+The perf tool automatically sets corresponding bit for the "contextid" config,
+therefore, the user doesn't have to bother which EL the kernel is running.
+
+  i.e, perf record -e cs_etm/contextid/u -- uname
+or perf record -e cs_etm//u -- uname
+
+will always do the "PID" tracing, independent of the kernel EL.
+
+When the kernel is running at EL2 with VHE, if user wants to trace both the
+PIDs for both host and guest, the two configs "contextid1" and "contextid2"
+can be set at the same time:
+
+  perf record -e cs_etm/contextid1,contextid2/u -- uname
+


To make this case clear, we could change the command from uname to
something like:

perf record -e cs_etm/contextid1,contextid2/u -- vm

Otherwise looks good to me.

With the above fixed,

Reviewed-by: Suzuki K Poulose 


Re: [PATCH v2 5/7] perf cs-etm: Add helper cs_etm__get_pid_fmt()

2021-02-02 Thread Suzuki K Poulose

On 2/2/21 4:38 PM, Leo Yan wrote:

This patch adds helper function cs_etm__get_pid_fmt(), by passing
parameter "traceID", it returns the PID format.

Signed-off-by: Leo Yan 
---
  tools/perf/util/cs-etm.c | 43 
  tools/perf/util/cs-etm.h |  1 +
  2 files changed, 44 insertions(+)

diff --git a/tools/perf/util/cs-etm.c b/tools/perf/util/cs-etm.c
index a2a369e2fbb6..8194ddbd01e5 100644
--- a/tools/perf/util/cs-etm.c
+++ b/tools/perf/util/cs-etm.c
@@ -7,6 +7,7 @@
   */
  
  #include 

+#include 
  #include 
  #include 
  #include 
@@ -156,6 +157,48 @@ int cs_etm__get_cpu(u8 trace_chan_id, int *cpu)
return 0;
  }
  
+/*

+ * The returned PID format is presented by two bits:
+ *
+ *   Bit ETM_OPT_CTXTID: CONTEXTIDR or CONTEXTIDR_EL1 is traced;
+ *   Bit ETM_OPT_CTXTID2: CONTEXTIDR_EL2 is traced.
+ *
+ * It's possible that these two bits are set together, this means the tracing
+ * contains PIDs for both CONTEXTIDR_EL1 and CONTEXTIDR_EL2.


This is a bit confusing. If both the bits are set, the session
was run on an EL2 kernel. Thus, the PID is always in CONTEXTIDR_EL2.


+ */
+int cs_etm__get_pid_fmt(u8 trace_chan_id, u64 *pid_fmt)
+{
+   struct int_node *inode;
+   u64 *metadata, val;
+
+   inode = intlist__find(traceid_list, trace_chan_id);
+   if (!inode)
+   return -EINVAL;
+
+   metadata = inode->priv;
+
+   if (metadata[CS_ETM_MAGIC] == __perf_cs_etmv3_magic) {
+   val = metadata[CS_ETM_ETMCR];
+   /* CONTEXTIDR is traced */
+   if (val & BIT(ETM_OPT_CTXTID))
+   *pid_fmt = BIT(ETM_OPT_CTXTID);
+   } else {
+   val = metadata[CS_ETMV4_TRCCONFIGR];
+
+   *pid_fmt = 0;
+
+   /* CONTEXTIDR_EL2 is traced */
+   if (val & (BIT(ETM4_CFG_BIT_VMID) | BIT(ETM4_CFG_BIT_VMID_OPT)))
+   *pid_fmt = BIT(ETM_OPT_CTXTID2);
+
+   /* CONTEXTIDR_EL1 is traced */
+   if (val & BIT(ETM4_CFG_BIT_CTXTID))


I haven't looked at how this gets used. But, Shouldn't this be :

else if (val & BIT(ETM4_CFG_BIT_CTXTID)) ?

Suzuki


Re: [PATCH v2 2/7] coresight: etm-perf: Support PID tracing for kernel at EL2

2021-02-02 Thread Suzuki K Poulose

On 2/2/21 4:38 PM, Leo Yan wrote:

From: Suzuki K Poulose 

When the kernel is running at EL2, the PID is stored in CONTEXTIDR_EL2.
So, tracing CONTEXTIDR_EL1 doesn't give us the pid of the process.
Thus we should trace the VMID with VMIDOPT set to trace CONTEXTIDR_EL2
instead of CONTEXTIDR_EL1.  Given that we have an existing config
option "contextid" and this will be useful for tracing virtual machines
(when we get to support virtualization).

So instead, this patch extends option CTXTID with an extra bit
ETM_OPT_CTXTID2 (bit 15), thus on an EL2 kernel, we will have another
bit available for the perf tool: ETM_OPT_CTXTID is for kernel running in
EL1, ETM_OPT_CTXTID2 is used when kernel runs in EL2 with VHE enabled.

The tool must be backward compatible for users, i.e, "contextid" today
traces PID and that should remain the same; for this purpose, the perf
tool is updated to automatically set corresponding bit for the
"contextid" config, therefore, the user doesn't have to bother which EL
the kernel is running.

   i.e, perf record -e cs_etm/contextid/u --

will always do the "pid" tracing, independent of the kernel EL.

The driver parses the format "contextid", which traces CONTEXTIDR_EL1
for ETM_OPT_CTXTID (on EL1 kernel) and traces CONTEXTIDR_EL2 for
ETM_OPT_CTXTID2 (on EL2 kernel).

Besides the enhancement for format "contexid", extra two formats are
introduced: "contextid1" and "contextid2".  This considers to support
tracing both CONTEXTIDR_EL1 and CONTEXTIDR_EL2 when the kernel is
running at EL2.  Finally, the PMU formats are defined as follow:

   "contextid1": Available on both EL1 kernel and EL2 kernel.  When the
 kernel is running at EL1, "contextid1" enables the PID
tracing; when the kernel is running at EL2, this enables
tracing the PID of guest applications.

   "contextid2": Only usable when the kernel is running at EL2.  When
 selected, enables PID tracing on EL2 kernel.

   "contextid":  Will be an alias for the option that enables PID
 tracing.  I.e,
 contextid == contextid1, on EL1 kernel.
 contextid == contextid2, on EL2 kernel.

Cc: Mathieu Poirier 
Cc: Al Grant 
Cc: Mike Leach 
Cc: Leo Yan 
Signed-off-by: Suzuki K Poulose 


You may add the following line here :

[ Added two config formats: contextid1, contextid2 ]


Signed-off-by: Leo Yan 


The patch as such looks good to me.

Suzuki


Re: [PATCH v2 1/7] coresight: etm-perf: Clarify comment on perf options

2021-02-02 Thread Suzuki K Poulose

Hi Leo

On 2/2/21 4:38 PM, Leo Yan wrote:

In theory, the options should be arbitrary values and are neutral for
any ETM version; so far perf tool uses ETMv3.5/PTM ETMCR config bits
except for register's bit definitions, also uses as options.

This can introduce confusion, especially if we want to add a new option
but the new option is not supported by ETMv3.5/PTM ETMCR.  But on the
other hand, we cannot change options since these options are generic
CoreSight PMU ABI.

For easier maintenance and avoid confusion, this patch refines the
comment to clarify perf options, and gives out the background info for
these bits are coming from ETMv3.5/PTM.  Afterwards, we should take
these options as general knobs, and if there have any confliction with
ETMv3.5/PTM, should consider to define saperate macros for ETMv3.5/PTM
ETMCR config bits.

Suggested-by: Suzuki K Poulose 
Signed-off-by: Leo Yan 


The patch looks good to me.  The only concern I have is, whether
we should split this patch to kernel vs tools ? As the kernel
changes go via coresight tree and the tools patch may go via
perf tree ?

Either way, for the patch:

Reviewed-by: Suzuki K Poulose 



---
  .../hwtracing/coresight/coresight-etm-perf.c|  5 -
  include/linux/coresight-pmu.h   | 17 -
  tools/include/linux/coresight-pmu.h | 17 -
  3 files changed, 28 insertions(+), 11 deletions(-)

diff --git a/drivers/hwtracing/coresight/coresight-etm-perf.c 
b/drivers/hwtracing/coresight/coresight-etm-perf.c
index bdc34ca449f7..465ef1aa8c82 100644
--- a/drivers/hwtracing/coresight/coresight-etm-perf.c
+++ b/drivers/hwtracing/coresight/coresight-etm-perf.c
@@ -27,7 +27,10 @@ static bool etm_perf_up;
  static DEFINE_PER_CPU(struct perf_output_handle, ctx_handle);
  static DEFINE_PER_CPU(struct coresight_device *, csdev_src);
  
-/* ETMv3.5/PTM's ETMCR is 'config' */

+/*
+ * The PMU formats were orignally for ETMv3.5/PTM's ETMCR 'config';
+ * now take them as general formats and apply on all ETMs.
+ */
  PMU_FORMAT_ATTR(cycacc,   "config:" __stringify(ETM_OPT_CYCACC));
  PMU_FORMAT_ATTR(contextid,"config:" __stringify(ETM_OPT_CTXTID));
  PMU_FORMAT_ATTR(timestamp,"config:" __stringify(ETM_OPT_TS));
diff --git a/include/linux/coresight-pmu.h b/include/linux/coresight-pmu.h
index b0e35eec6499..5dc47cfdcf07 100644
--- a/include/linux/coresight-pmu.h
+++ b/include/linux/coresight-pmu.h
@@ -10,11 +10,18 @@
  #define CORESIGHT_ETM_PMU_NAME "cs_etm"
  #define CORESIGHT_ETM_PMU_SEED  0x10
  
-/* ETMv3.5/PTM's ETMCR config bit */

-#define ETM_OPT_CYCACC  12
-#define ETM_OPT_CTXTID 14
-#define ETM_OPT_TS  28
-#define ETM_OPT_RETSTK 29
+/*
+ * Below are the definition of bit offsets for perf option, and works as
+ * arbitrary values for all ETM versions.
+ *
+ * Most of them are orignally from ETMv3.5/PTM's ETMCR config, therefore,
+ * ETMv3.5/PTM doesn't define ETMCR config bits with prefix "ETM3_" and
+ * directly use below macros as config bits.
+ */
+#define ETM_OPT_CYCACC 12
+#define ETM_OPT_CTXTID 14
+#define ETM_OPT_TS 28
+#define ETM_OPT_RETSTK 29
  
  /* ETMv4 CONFIGR programming bits for the ETM OPTs */

  #define ETM4_CFG_BIT_CYCACC   4
diff --git a/tools/include/linux/coresight-pmu.h 
b/tools/include/linux/coresight-pmu.h
index b0e35eec6499..5dc47cfdcf07 100644
--- a/tools/include/linux/coresight-pmu.h
+++ b/tools/include/linux/coresight-pmu.h
@@ -10,11 +10,18 @@
  #define CORESIGHT_ETM_PMU_NAME "cs_etm"
  #define CORESIGHT_ETM_PMU_SEED  0x10
  
-/* ETMv3.5/PTM's ETMCR config bit */

-#define ETM_OPT_CYCACC  12
-#define ETM_OPT_CTXTID 14
-#define ETM_OPT_TS  28
-#define ETM_OPT_RETSTK 29
+/*
+ * Below are the definition of bit offsets for perf option, and works as
+ * arbitrary values for all ETM versions.
+ *
+ * Most of them are orignally from ETMv3.5/PTM's ETMCR config, therefore,
+ * ETMv3.5/PTM doesn't define ETMCR config bits with prefix "ETM3_" and
+ * directly use below macros as config bits.
+ */
+#define ETM_OPT_CYCACC 12
+#define ETM_OPT_CTXTID 14
+#define ETM_OPT_TS 28
+#define ETM_OPT_RETSTK 29
  
  /* ETMv4 CONFIGR programming bits for the ETM OPTs */

  #define ETM4_CFG_BIT_CYCACC   4





Re: [PATCH V3 05/14] coresight: ete: Add support for ETE tracing

2021-02-02 Thread Suzuki K Poulose

On 2/2/21 6:56 PM, Mathieu Poirier wrote:

On Wed, Jan 27, 2021 at 02:25:29PM +0530, Anshuman Khandual wrote:

From: Suzuki K Poulose 

Add ETE as one of the supported device types we support
with ETM4x driver. The devices are named following the
existing convention as ete.

ETE mandates that the trace resource status register is programmed
before the tracing is turned on. For the moment simply write to
it indicating TraceActive.

Cc: Mathieu Poirier 
Cc: Mike Leach 
Signed-off-by: Suzuki K Poulose 
Signed-off-by: Anshuman Khandual 
---


...


@@ -1834,10 +1854,6 @@ static int etm4_probe(struct device *dev, void __iomem 
*base, u32 etm_pid)
if (drvdata->cpu < 0)
return drvdata->cpu;
  
-	desc.name = devm_kasprintf(dev, GFP_KERNEL, "etm%d", drvdata->cpu);

-   if (!desc.name)
-   return -ENOMEM;
-
init_arg.drvdata = drvdata;
init_arg.csa = 
init_arg.pid = etm_pid;
@@ -1853,6 +1869,20 @@ static int etm4_probe(struct device *dev, void __iomem 
*base, u32 etm_pid)
if (!desc.access.io_mem ||
fwnode_property_present(dev_fwnode(dev), "qcom,skip-power-up"))
drvdata->skip_power_up = true;


Add a space here...


+   major = ETM_ARCH_MAJOR_VERSION(drvdata->arch);
+   minor = ETM_ARCH_MINOR_VERSION(drvdata->arch);


And here too.  Othersiwe it makes a big blob in the middle of the function.


+   if (etm4x_is_ete(drvdata)) {
+   type_name = "ete";
+   /* ETE v1 has major version == 5. Adjust this for logging.*/
+   major -= 4;


I don't have the documentation for the ETE but I would not adjust @major.  I
would simply leave it to what the HW gives us since regardless of the name, the
major revision of the IP block is 5.



At the moment only register definitions are public and can be found here :

https://developer.arm.com/docs/ddi0601/g/aarch64-system-registers/trcdevarch

The ETE is natural extension of the ETM architecture to support future
architecture changes and is designed in a way that the same software
can driver both ETM and ETE without much changes.


diff --git a/drivers/hwtracing/coresight/coresight-etm4x.h 
b/drivers/hwtracing/coresight/coresight-etm4x.h
index ca24ac5..8b90de5 100644
--- a/drivers/hwtracing/coresight/coresight-etm4x.h
+++ b/drivers/hwtracing/coresight/coresight-etm4x.h
@@ -128,6 +128,8 @@
  #define TRCCIDR2  0xFF8
  #define TRCCIDR3  0xFFC
  
+#define TRCRSR_TA			BIT(12)

+
  /*
   * System instructions to access ETM registers.
   * See ETMv4.4 spec ARM IHI0064F section 4.3.6 System instructions
@@ -390,6 +392,9 @@
  #define ETM_COMMON_SYSREG_LIST_CASES  \
ETM_COMMON_SYSREG_LIST(NOP, __unused)
  
+#define ETM4x_ONLY_SYSREG_LIST_CASES		\

+   ETM4x_ONLY_SYSREG_LIST(NOP, __unused)
+
  #define ETM4x_SYSREG_LIST_CASES   \
ETM_COMMON_SYSREG_LIST_CASES\
ETM4x_ONLY_SYSREG_LIST(NOP, __unused)
@@ -406,7 +411,6 @@
ETE_ONLY_SYSREG_LIST(WRITE, (val))
  
  #define ETE_ONLY_SYSREG_LIST_CASES		\

-   ETM_COMMON_SYSREG_LIST_CASES\


This goes in patch 04.



Sure, will move it.


With the above:

Reviewed-by: Mathieu Poirier 


Thanks for the review

Suzuki


Re: [PATCH V3 01/14] coresight: etm-perf: Allow an event to use different sinks

2021-02-02 Thread Suzuki K Poulose

On 2/2/21 4:33 PM, Mike Leach wrote:

Hi,

On Tue, 2 Feb 2021 at 09:42, Suzuki K Poulose  wrote:


On 2/1/21 11:17 PM, Mathieu Poirier wrote:

Hi Anshuman,

I have started reviewing this set.  As it is quite voluminous comments will
come over serveral days.  I will let you know when I am done.

On Wed, Jan 27, 2021 at 02:25:25PM +0530, Anshuman Khandual wrote:

From: Suzuki K Poulose 

When there are multiple sinks on the system, in the absence
of a specified sink, it is quite possible that a default sink
for an ETM could be different from that of another ETM. However
we do not support having multiple sinks for an event yet. This
patch allows the event to use the default sinks on the ETMs
where they are scheduled as long as the sinks are of the same
type.

e.g, if we have 1x1 topology with per-CPU ETRs, the event can
use the per-CPU ETR for the session. However, if the sinks
are of different type, e.g TMC-ETR on one and a custom sink
on another, the event will only trace on the first detected
sink.



I found the above changelog very confusing - I read it several times and still
couldn't get all of it.  In the end this patch prevents sinks of different types
from being used for session, and this is what the text should reflect.


Sorry about that. Your inference is correct, but it is only a side effect
of the primary motive. How about the following :

"When a sink is not specified by the user, the etm perf driver
finds a suitable sink automatically based on the first ETM, where
this event could be scheduled. Then we allocate the sink buffer based
on the selected sink. This is fine for a CPU bound event as the "sink"
is always guaranteed to be reachable from the ETM (as this is the only
ETM where the event is going to be scheduled). However, if we have a task
bound event, the event could be scheduled on any of the ETMs on the
system. In this case, currently we automatically select a sink and exclude
any ETMs that are not reachable from the selected sink. This is
problematic for 1x1 configurations as we end up in tracing the event
only on the "first" ETM, as the default sink is local to the first
ETM and unreachable from the rest.
However, we could allow the other ETMs to trace if they all have a
sink that is compatible with the "selected" sink and can use the
sink buffer. This can be easily done by verifying that they are
all driven by the same driver and matches the same subtype."




Not sure that the logic here makes total sense - I can't see _why_
multiple sinks need to be of the same type.


Because we have a single "sink_config" (read, single sink specific
buffer) for an event. i.e, we do the sink_ops->alloc_buffer() only once
and rightly so. This allocates any buffers that is used by a given sink.
e.g, for ETR it allocates an etr_perf_buffer. Now if we wanted the same
event to run on an ETM with TRBE, the TRBE doesn't have any buffer set up to
collect the trace and cant make any sense of etr_perf_buffer.
However, if there is another ETM with a different ETR, the second
ETR can make sense of the sink_config (etr_perf_buffer) and trace the event.
Please remember that this only applies to task bound events where
the event can be scheduled on different ETMs.



1) This patch is designed to allow multiple sinks to be used in a 1:1
topology system - but there is no specific restriction here - and N:M
should work on the same basis


Yes, this should work in any topology.


2) This implies that multiple sinks will work within the coresight
infrastucture.


I am afraid I don't understand the context here.


3)  The sink interface -> struct coresight_ops_sink allows sinks to be
abstracted - therefore whichever sink is chosen the coresight
infrastructure calls the operations for the given sink.


Correct. The patch is trying to ensure that a private data
setup by one driver is not interpreted by another driver as
its own private data. (the private data being sink_config)


4) Each individual sink, will have its own hardware buffer - copied
into the perf buffers at some appropriate point.


Correct. Supporting multiple types of sinks for a single event
is complex and not worth the benefit of the extra complexity.
Moreover we don't expect sane systems to have such a
configuration.



Thus if the users specifies a selected sink - we need to eliminate any
source that cannot reach it.


Yes, we do that now.


If not we need to find the relevant default sink for the source, which
might be a shared ETR, or per CPU TRBE / ETR, and the abstraction
logic ought to handle getting the captured data to the correct place.


The abstraction logic works fine, but the per-event private data is
something that makes this complex.


If it doesn't then we are on shaky ground with any multiple sink
solution.

On the face of it - type is irrelevant. If I am missing something -
this patch needs a better explanation.


I hope the explanation above makes it clear. Please let

Re: [PATCH V3 02/14] coresight: Do not scan for graph if none is present

2021-02-02 Thread Suzuki K Poulose

Hi Mike

On 2/2/21 11:10 AM, Mike Leach wrote:

Hi Ansuman,

On Wed, 27 Jan 2021 at 08:55, Anshuman Khandual
 wrote:


From: Suzuki K Poulose 

If a graph node is not found for a given node, of_get_next_endpoint()
will emit the following error message :

  OF: graph: no port node found in /

If the given component doesn't have any explicit connections (e.g,
ETE) we could simply ignore the graph parsing.

Cc: Mathieu Poirier 
Cc: Mike Leach 
Signed-off-by: Suzuki K Poulose 
Signed-off-by: Anshuman Khandual 
---
  drivers/hwtracing/coresight/coresight-platform.c | 6 ++
  1 file changed, 6 insertions(+)

diff --git a/drivers/hwtracing/coresight/coresight-platform.c 
b/drivers/hwtracing/coresight/coresight-platform.c
index 3629b78..c594f45 100644
--- a/drivers/hwtracing/coresight/coresight-platform.c
+++ b/drivers/hwtracing/coresight/coresight-platform.c
@@ -90,6 +90,12 @@ static void of_coresight_get_ports_legacy(const struct 
device_node *node,
 struct of_endpoint endpoint;
 int in = 0, out = 0;

+   /*
+* Avoid warnings in of_graph_get_next_endpoint()
+* if the device doesn't have any graph connections
+*/
+   if (!of_graph_is_present(node))
+ return;


The problem here is that you are masking genuine errors.


If the graph is not described for a component, where it is
mandatory, it won't be usable by the driver and as such using
the devices will fail.

e.g, if an ETM misses the bindings, tracing will fail. (in either
mode).


The solution is to either call this only if the device type is one
that ports are not required - i.e. ETE, or upgrade the .dts bindings


The proposed change is too invasive and is not worth the benefit
that it brings.

The side effect of this patch is, if someone makes a mistake in the
bindings they don't see the "warning" in the dmesg. But will definitely
hit the issue when trying to use the system.

i.e, Functionally there is no change.

On the other hand issuing a warning message for ETE is confusing for
a well behaved user.


for the rest of the ETM devices to yaml so that the ports requirement
is checked and validated there.


This is a step that we must take, but in a separate series. And I
don't think this will solve handling non-compliant DTs *immediately*,
as there could be :
 a) DTS that are not upstream (Quite common for CoreSight)
 b) People are getting used to the schema and running schema checks.

So, personally I vote for :

1) Merge this patch in as is
2) Convert the bindings to Yaml in a separate series.

Suzuki



Re: [PATCH V3 01/14] coresight: etm-perf: Allow an event to use different sinks

2021-02-02 Thread Suzuki K Poulose

On 2/1/21 11:17 PM, Mathieu Poirier wrote:

Hi Anshuman,

I have started reviewing this set.  As it is quite voluminous comments will
come over serveral days.  I will let you know when I am done.

On Wed, Jan 27, 2021 at 02:25:25PM +0530, Anshuman Khandual wrote:

From: Suzuki K Poulose 

When there are multiple sinks on the system, in the absence
of a specified sink, it is quite possible that a default sink
for an ETM could be different from that of another ETM. However
we do not support having multiple sinks for an event yet. This
patch allows the event to use the default sinks on the ETMs
where they are scheduled as long as the sinks are of the same
type.

e.g, if we have 1x1 topology with per-CPU ETRs, the event can
use the per-CPU ETR for the session. However, if the sinks
are of different type, e.g TMC-ETR on one and a custom sink
on another, the event will only trace on the first detected
sink.



I found the above changelog very confusing - I read it several times and still
couldn't get all of it.  In the end this patch prevents sinks of different types
from being used for session, and this is what the text should reflect.


Sorry about that. Your inference is correct, but it is only a side effect
of the primary motive. How about the following :

"When a sink is not specified by the user, the etm perf driver
finds a suitable sink automatically based on the first ETM, where
this event could be scheduled. Then we allocate the sink buffer based
on the selected sink. This is fine for a CPU bound event as the "sink"
is always guaranteed to be reachable from the ETM (as this is the only
ETM where the event is going to be scheduled). However, if we have a task
bound event, the event could be scheduled on any of the ETMs on the
system. In this case, currently we automatically select a sink and exclude
any ETMs that are not reachable from the selected sink. This is
problematic for 1x1 configurations as we end up in tracing the event
only on the "first" ETM, as the default sink is local to the first
ETM and unreachable from the rest.
However, we could allow the other ETMs to trace if they all have a
sink that is compatible with the "selected" sink and can use the
sink buffer. This can be easily done by verifying that they are
all driven by the same driver and matches the same subtype."


  

Cc: Mathieu Poirier 
Cc: Mike Leach 
Tested-by: Linu Cherian 
Signed-off-by: Suzuki K Poulose 
Signed-off-by: Anshuman Khandual 
---
  drivers/hwtracing/coresight/coresight-etm-perf.c | 48 +++-
  1 file changed, 38 insertions(+), 10 deletions(-)

diff --git a/drivers/hwtracing/coresight/coresight-etm-perf.c 
b/drivers/hwtracing/coresight/coresight-etm-perf.c
index bdc34ca..eb9e7e9 100644
--- a/drivers/hwtracing/coresight/coresight-etm-perf.c
+++ b/drivers/hwtracing/coresight/coresight-etm-perf.c
@@ -204,6 +204,13 @@ static void etm_free_aux(void *data)
schedule_work(_data->work);
  }
  
+static bool sinks_match(struct coresight_device *a, struct coresight_device *b)

+{
+   if (!a || !b)
+   return false;
+   return (sink_ops(a) == sink_ops(b));


Yes


I think we can tighten this by verifying the dev->sub_type matches too.




+}
+
  static void *etm_setup_aux(struct perf_event *event, void **pages,
   int nr_pages, bool overwrite)
  {
@@ -212,6 +219,7 @@ static void *etm_setup_aux(struct perf_event *event, void 
**pages,
cpumask_t *mask;
struct coresight_device *sink = NULL;


 struct coresight_device *user_sink = NULL;


struct etm_event_data *event_data = NULL;
+   bool sink_forced = false;
  
  	event_data = alloc_event_data(cpu);

if (!event_data)
@@ -222,6 +230,7 @@ static void *etm_setup_aux(struct perf_event *event, void 
**pages,
if (event->attr.config2) {
id = (u32)event->attr.config2;
sink = coresight_get_sink_by_id(id);


 user_sink = coresight_get_sink_by_id(id);


+   sink_forced = true;
}
  
  	mask = _data->mask;

@@ -235,7 +244,7 @@ static void *etm_setup_aux(struct perf_event *event, void 
**pages,
 */
for_each_cpu(cpu, mask) {
struct list_head *path;
-   struct coresight_device *csdev;


 struct coresight_device *last_sink = NULL;


+   struct coresight_device *csdev, *new_sink;
  
  		csdev = per_cpu(csdev_src, cpu);

/*
@@ -249,21 +258,35 @@ static void *etm_setup_aux(struct perf_event *event, void 
**pages,
}
  
  		/*

-* No sink provided - look for a default sink for one of the
-* devices. At present we only support topology where all CPUs
-* use the same sink [N:1], so only need to find one sink. The
-* coresight_build_path later will remove any CPU that does not
- 

Re: [PATCH V3 10/14] arm64: nvhe: Allow TRBE access at EL1

2021-01-28 Thread Suzuki K Poulose

On 1/28/21 9:46 AM, Marc Zyngier wrote:

On 2021-01-28 09:34, Suzuki K Poulose wrote:

On 1/27/21 9:58 AM, Marc Zyngier wrote:

On 2021-01-27 08:55, Anshuman Khandual wrote:

From: Suzuki K Poulose 

When the kernel is booted at EL2 in a nvhe configuration,
enable the TRBE access to the EL1. The EL1 still can't trace
EL2, unless EL2 permits explicitly via TRFCR_EL2.E2TRE.

Cc: Will Deacon 
Cc: Catalin Marinas 
Cc: Marc Zyngier 
Cc: Mark Rutland 
cc: Anshuman Khandual 
Signed-off-by: Suzuki K Poulose 
Signed-off-by: Anshuman Khandual 


Acked-by: Marc Zyngier 

One comment below, though:


---
 arch/arm64/include/asm/el2_setup.h | 19 +++
 arch/arm64/include/asm/kvm_arm.h   |  2 ++
 2 files changed, 21 insertions(+)

diff --git a/arch/arm64/include/asm/el2_setup.h
b/arch/arm64/include/asm/el2_setup.h
index a7f5a1b..05ecce9 100644
--- a/arch/arm64/include/asm/el2_setup.h
+++ b/arch/arm64/include/asm/el2_setup.h
@@ -72,6 +72,25 @@
 .endif

 3:
+
+.ifeqs    "\mode", "nvhe"
+    /*
+ * If the Trace Buffer is available, allow
+ * the EL1 to own it. Note that EL1 cannot
+ * trace the EL2, as it is prevented by
+ * TRFCR_EL2.E2TRE == 0.
+ */
+    ubfx    x0, x1, #ID_AA64DFR0_TRBE_SHIFT, #4
+    cbz    x0, 1f
+
+    mrs_s    x0, SYS_TRBIDR_EL1
+    and    x0, x0, TRBIDR_PROG
+    cbnz    x0, 1f
+    mov    x0, #(MDCR_EL2_E2TB_EL1_OWN << MDCR_EL2_E2TB_SHIFT)
+    orr    x2, x2, x0
+.endif
+
+1:


Note that this will (badly) conflict with the late-VHE patches[1],
where this code path has been reworked.


Thanks for the heads up. We will need to see how things get merged.
Ideally this patch and the previous one (TRBE definitions could go
via the arm64 tree / kvm tree), in which case we could rebase these
two patches on the respective tree.


I think the current plan of action is to go via the arm64 tree,
given that there is nothing really KVM specific there. I'll respin
the series one last (hopefully!) time on Monday. Let me know if
you need a hand with the rebasing.


Sounds good, will rebase it on top of that then.

Cheers
Suzuki


Re: [PATCH V3 10/14] arm64: nvhe: Allow TRBE access at EL1

2021-01-28 Thread Suzuki K Poulose

On 1/27/21 9:58 AM, Marc Zyngier wrote:

On 2021-01-27 08:55, Anshuman Khandual wrote:

From: Suzuki K Poulose 

When the kernel is booted at EL2 in a nvhe configuration,
enable the TRBE access to the EL1. The EL1 still can't trace
EL2, unless EL2 permits explicitly via TRFCR_EL2.E2TRE.

Cc: Will Deacon 
Cc: Catalin Marinas 
Cc: Marc Zyngier 
Cc: Mark Rutland 
cc: Anshuman Khandual 
Signed-off-by: Suzuki K Poulose 
Signed-off-by: Anshuman Khandual 


Acked-by: Marc Zyngier 

One comment below, though:


---
 arch/arm64/include/asm/el2_setup.h | 19 +++
 arch/arm64/include/asm/kvm_arm.h   |  2 ++
 2 files changed, 21 insertions(+)

diff --git a/arch/arm64/include/asm/el2_setup.h
b/arch/arm64/include/asm/el2_setup.h
index a7f5a1b..05ecce9 100644
--- a/arch/arm64/include/asm/el2_setup.h
+++ b/arch/arm64/include/asm/el2_setup.h
@@ -72,6 +72,25 @@
 .endif

 3:
+
+.ifeqs    "\mode", "nvhe"
+    /*
+ * If the Trace Buffer is available, allow
+ * the EL1 to own it. Note that EL1 cannot
+ * trace the EL2, as it is prevented by
+ * TRFCR_EL2.E2TRE == 0.
+ */
+    ubfx    x0, x1, #ID_AA64DFR0_TRBE_SHIFT, #4
+    cbz    x0, 1f
+
+    mrs_s    x0, SYS_TRBIDR_EL1
+    and    x0, x0, TRBIDR_PROG
+    cbnz    x0, 1f
+    mov    x0, #(MDCR_EL2_E2TB_EL1_OWN << MDCR_EL2_E2TB_SHIFT)
+    orr    x2, x2, x0
+.endif
+
+1:


Note that this will (badly) conflict with the late-VHE patches[1],
where this code path has been reworked.


Thanks for the heads up. We will need to see how things get merged.
Ideally this patch and the previous one (TRBE definitions could go
via the arm64 tree / kvm tree), in which case we could rebase these
two patches on the respective tree.

Cheers
Suzuki


Re: [PATCH V3 09/14] arm64: Add TRBE definitions

2021-01-28 Thread Suzuki K Poulose

On 1/27/21 8:55 AM, Anshuman Khandual wrote:

This adds TRBE related registers and corresponding feature macros.

Cc: Mathieu Poirier 
Cc: Mike Leach 
Cc: Suzuki K Poulose 


Cc: Catalin Marinas 
Cc: Mark Rutland 
Cc: Will Deacon 


Signed-off-by: Anshuman Khandual 


Reviewed-by: Suzuki K Poulose 


---
Changes in V3:

- ID_AA64DFR0_TRBE_SHIFT has been moved here from the TRBE driver
- Changed TRBLIMITR_TRIG_MODE_SHIFT as 3

  arch/arm64/include/asm/sysreg.h | 50 +
  1 file changed, 50 insertions(+)

diff --git a/arch/arm64/include/asm/sysreg.h b/arch/arm64/include/asm/sysreg.h
index 4acff97..85ae4db 100644
--- a/arch/arm64/include/asm/sysreg.h
+++ b/arch/arm64/include/asm/sysreg.h
@@ -329,6 +329,55 @@
  
  /*** End of Statistical Profiling Extension ***/
  
+/*

+ * TRBE Registers
+ */
+#define SYS_TRBLIMITR_EL1  sys_reg(3, 0, 9, 11, 0)
+#define SYS_TRBPTR_EL1 sys_reg(3, 0, 9, 11, 1)
+#define SYS_TRBBASER_EL1   sys_reg(3, 0, 9, 11, 2)
+#define SYS_TRBSR_EL1  sys_reg(3, 0, 9, 11, 3)
+#define SYS_TRBMAR_EL1 sys_reg(3, 0, 9, 11, 4)
+#define SYS_TRBTRG_EL1 sys_reg(3, 0, 9, 11, 6)
+#define SYS_TRBIDR_EL1 sys_reg(3, 0, 9, 11, 7)
+
+#define TRBLIMITR_LIMIT_MASK   GENMASK_ULL(51, 0)
+#define TRBLIMITR_LIMIT_SHIFT  12
+#define TRBLIMITR_NVM  BIT(5)
+#define TRBLIMITR_TRIG_MODE_MASK   GENMASK(1, 0)
+#define TRBLIMITR_TRIG_MODE_SHIFT  3
+#define TRBLIMITR_FILL_MODE_MASK   GENMASK(1, 0)
+#define TRBLIMITR_FILL_MODE_SHIFT  1
+#define TRBLIMITR_ENABLE   BIT(0)
+#define TRBPTR_PTR_MASKGENMASK_ULL(63, 0)
+#define TRBPTR_PTR_SHIFT   0
+#define TRBBASER_BASE_MASK GENMASK_ULL(51, 0)
+#define TRBBASER_BASE_SHIFT12
+#define TRBSR_EC_MASK  GENMASK(5, 0)
+#define TRBSR_EC_SHIFT 26
+#define TRBSR_IRQ  BIT(22)
+#define TRBSR_TRG  BIT(21)
+#define TRBSR_WRAP BIT(20)
+#define TRBSR_ABORTBIT(18)
+#define TRBSR_STOP BIT(17)
+#define TRBSR_MSS_MASK GENMASK(15, 0)
+#define TRBSR_MSS_SHIFT0
+#define TRBSR_BSC_MASK GENMASK(5, 0)
+#define TRBSR_BSC_SHIFT0
+#define TRBSR_FSC_MASK GENMASK(5, 0)
+#define TRBSR_FSC_SHIFT0
+#define TRBMAR_SHARE_MASK  GENMASK(1, 0)
+#define TRBMAR_SHARE_SHIFT 8
+#define TRBMAR_OUTER_MASK  GENMASK(3, 0)
+#define TRBMAR_OUTER_SHIFT 4
+#define TRBMAR_INNER_MASK  GENMASK(3, 0)
+#define TRBMAR_INNER_SHIFT 0
+#define TRBTRG_TRG_MASKGENMASK(31, 0)
+#define TRBTRG_TRG_SHIFT   0
+#define TRBIDR_FLAGBIT(5)
+#define TRBIDR_PROGBIT(4)
+#define TRBIDR_ALIGN_MASK  GENMASK(3, 0)
+#define TRBIDR_ALIGN_SHIFT 0
+
  #define SYS_PMINTENSET_EL1sys_reg(3, 0, 9, 14, 1)
  #define SYS_PMINTENCLR_EL1sys_reg(3, 0, 9, 14, 2)
  
@@ -831,6 +880,7 @@

  #define ID_AA64MMFR2_CNP_SHIFT0
  
  /* id_aa64dfr0 */

+#define ID_AA64DFR0_TRBE_SHIFT 44
  #define ID_AA64DFR0_TRACE_FILT_SHIFT  40
  #define ID_AA64DFR0_DOUBLELOCK_SHIFT  36
  #define ID_AA64DFR0_PMSVER_SHIFT  32





Re: [PATCH V3 08/14] coresight: core: Add support for dedicated percpu sinks

2021-01-28 Thread Suzuki K Poulose

On 1/27/21 8:55 AM, Anshuman Khandual wrote:

Add support for dedicated sinks that are bound to individual CPUs. (e.g,
TRBE). To allow quicker access to the sink for a given CPU bound source,
keep a percpu array of the sink devices. Also, add support for building
a path to the CPU local sink from the ETM.

This adds a new percpu sink type CORESIGHT_DEV_SUBTYPE_SINK_PERCPU_SYSMEM.
This new sink type is exclusively available and can only work with percpu
source type device CORESIGHT_DEV_SUBTYPE_SOURCE_PERCPU_PROC.

This defines a percpu structure that accommodates a single coresight_device
which can be used to store an initialized instance from a sink driver. As
these sinks are exclusively linked and dependent on corresponding percpu
sources devices, they should also be the default sink device during a perf
session.

Outwards device connections are scanned while establishing paths between a
source and a sink device. But such connections are not present for certain
percpu source and sink devices which are exclusively linked and dependent.
Build the path directly and skip connection scanning for such devices.

Cc: Mathieu Poirier 
Cc: Mike Leach 
Cc: Suzuki K Poulose 
Signed-off-by: Anshuman Khandual 
---
Changes in V3:

- Updated coresight_find_default_sink()

  drivers/hwtracing/coresight/coresight-core.c | 16 ++--
  include/linux/coresight.h| 12 
  2 files changed, 26 insertions(+), 2 deletions(-)

diff --git a/drivers/hwtracing/coresight/coresight-core.c 
b/drivers/hwtracing/coresight/coresight-core.c
index 0062c89..4795e28 100644
--- a/drivers/hwtracing/coresight/coresight-core.c
+++ b/drivers/hwtracing/coresight/coresight-core.c
@@ -23,6 +23,7 @@
  #include "coresight-priv.h"
  
  static DEFINE_MUTEX(coresight_mutex);

+DEFINE_PER_CPU(struct coresight_device *, csdev_sink);
  
  /**

   * struct coresight_node - elements of a path, from source to sink
@@ -784,6 +785,13 @@ static int _coresight_build_path(struct coresight_device 
*csdev,
if (csdev == sink)
goto out;
  
+	if (coresight_is_percpu_source(csdev) && coresight_is_percpu_sink(sink) &&

+   sink == per_cpu(csdev_sink, source_ops(csdev)->cpu_id(csdev))) {
+   _coresight_build_path(sink, sink, path);
+   found = true;
+   goto out;
+   }
+
/* Not a sink - recursively explore each port found on this element */
for (i = 0; i < csdev->pdata->nr_outport; i++) {
struct coresight_device *child_dev;
@@ -999,8 +1007,12 @@ coresight_find_default_sink(struct coresight_device 
*csdev)
int depth = 0;
  
  	/* look for a default sink if we have not found for this device */

-   if (!csdev->def_sink)
-   csdev->def_sink = coresight_find_sink(csdev, );
+   if (!csdev->def_sink) {
+   if (coresight_is_percpu_source(csdev))
+   csdev->def_sink = per_cpu(csdev_sink, 
source_ops(csdev)->cpu_id(csdev));
+   if (!csdev->def_sink)
+   csdev->def_sink = coresight_find_sink(csdev, );
+   }
return csdev->def_sink;
  }
  
diff --git a/include/linux/coresight.h b/include/linux/coresight.h

index 976ec26..bc3a5ca 100644
--- a/include/linux/coresight.h
+++ b/include/linux/coresight.h
@@ -50,6 +50,7 @@ enum coresight_dev_subtype_sink {
CORESIGHT_DEV_SUBTYPE_SINK_PORT,
CORESIGHT_DEV_SUBTYPE_SINK_BUFFER,
CORESIGHT_DEV_SUBTYPE_SINK_SYSMEM,
+   CORESIGHT_DEV_SUBTYPE_SINK_PERCPU_SYSMEM,
  };
  
  enum coresight_dev_subtype_link {

@@ -428,6 +429,17 @@ static inline void csdev_access_write64(struct 
csdev_access *csa, u64 val, u32 o
csa->write(val, offset, false, true);
  }
  
+static inline bool coresight_is_percpu_source(struct coresight_device *csdev)

+{
+   return csdev && (csdev->type == CORESIGHT_DEV_TYPE_SOURCE) &&
+  csdev->subtype.source_subtype == 
CORESIGHT_DEV_SUBTYPE_SOURCE_PROC;


Please add () around the last line. Same below.


+}
+
+static inline bool coresight_is_percpu_sink(struct coresight_device *csdev)
+{
+   return csdev && (csdev->type == CORESIGHT_DEV_TYPE_SINK) &&
+  csdev->subtype.sink_subtype == 
CORESIGHT_DEV_SUBTYPE_SINK_PERCPU_SYSMEM;
+}
  #else /* !CONFIG_64BIT */
  
  static inline u64 csdev_access_relaxed_read64(struct csdev_access *csa,




With the above :

Tested-by: Suzuki K Poulose 
Reviewed-by: Suzuki K Poulose 


[PATCH v3] coresight: etm4x: Handle accesses to TRCSTALLCTLR

2021-01-27 Thread Suzuki K Poulose
TRCSTALLCTLR register is only implemented if

   TRCIDR3.STALLCTL == 0b1

Make sure the driver touches the register only it is implemented.

Cc: sta...@vger.kernel.org
Cc: Mathieu Poirier 
Cc: Mike Leach 
Cc: Leo Yan 
Signed-off-by: Suzuki K Poulose 
---
Changes since v2:
 - Ignore STALLCTL for sysfs mode
---
 drivers/hwtracing/coresight/coresight-etm4x-core.c  | 9 ++---
 drivers/hwtracing/coresight/coresight-etm4x-sysfs.c | 2 +-
 2 files changed, 7 insertions(+), 4 deletions(-)

diff --git a/drivers/hwtracing/coresight/coresight-etm4x-core.c 
b/drivers/hwtracing/coresight/coresight-etm4x-core.c
index 473ab7480a36..5017d33ba4f5 100644
--- a/drivers/hwtracing/coresight/coresight-etm4x-core.c
+++ b/drivers/hwtracing/coresight/coresight-etm4x-core.c
@@ -306,7 +306,8 @@ static int etm4_enable_hw(struct etmv4_drvdata *drvdata)
etm4x_relaxed_write32(csa, 0x0, TRCAUXCTLR);
etm4x_relaxed_write32(csa, config->eventctrl0, TRCEVENTCTL0R);
etm4x_relaxed_write32(csa, config->eventctrl1, TRCEVENTCTL1R);
-   etm4x_relaxed_write32(csa, config->stall_ctrl, TRCSTALLCTLR);
+   if (drvdata->stallctl)
+   etm4x_relaxed_write32(csa, config->stall_ctrl, TRCSTALLCTLR);
etm4x_relaxed_write32(csa, config->ts_ctrl, TRCTSCTLR);
etm4x_relaxed_write32(csa, config->syncfreq, TRCSYNCPR);
etm4x_relaxed_write32(csa, config->ccctlr, TRTLR);
@@ -1463,7 +1464,8 @@ static int etm4_cpu_save(struct etmv4_drvdata *drvdata)
state->trcauxctlr = etm4x_read32(csa, TRCAUXCTLR);
state->trceventctl0r = etm4x_read32(csa, TRCEVENTCTL0R);
state->trceventctl1r = etm4x_read32(csa, TRCEVENTCTL1R);
-   state->trcstallctlr = etm4x_read32(csa, TRCSTALLCTLR);
+   if (drvdata->stallctl)
+   state->trcstallctlr = etm4x_read32(csa, TRCSTALLCTLR);
state->trctsctlr = etm4x_read32(csa, TRCTSCTLR);
state->trcsyncpr = etm4x_read32(csa, TRCSYNCPR);
state->trtlr = etm4x_read32(csa, TRTLR);
@@ -1575,7 +1577,8 @@ static void etm4_cpu_restore(struct etmv4_drvdata 
*drvdata)
etm4x_relaxed_write32(csa, state->trcauxctlr, TRCAUXCTLR);
etm4x_relaxed_write32(csa, state->trceventctl0r, TRCEVENTCTL0R);
etm4x_relaxed_write32(csa, state->trceventctl1r, TRCEVENTCTL1R);
-   etm4x_relaxed_write32(csa, state->trcstallctlr, TRCSTALLCTLR);
+   if (drvdata->stallctl)
+   etm4x_relaxed_write32(csa, state->trcstallctlr, TRCSTALLCTLR);
etm4x_relaxed_write32(csa, state->trctsctlr, TRCTSCTLR);
etm4x_relaxed_write32(csa, state->trcsyncpr, TRCSYNCPR);
etm4x_relaxed_write32(csa, state->trtlr, TRTLR);
diff --git a/drivers/hwtracing/coresight/coresight-etm4x-sysfs.c 
b/drivers/hwtracing/coresight/coresight-etm4x-sysfs.c
index b646d53a3133..0995a10790f4 100644
--- a/drivers/hwtracing/coresight/coresight-etm4x-sysfs.c
+++ b/drivers/hwtracing/coresight/coresight-etm4x-sysfs.c
@@ -389,7 +389,7 @@ static ssize_t mode_store(struct device *dev,
config->eventctrl1 &= ~BIT(12);
 
/* bit[8], Instruction stall bit */
-   if (config->mode & ETM_MODE_ISTALL_EN)
+   if ((config->mode & ETM_MODE_ISTALL_EN) && (drvdata->stallctl == true))
config->stall_ctrl |= BIT(8);
else
config->stall_ctrl &= ~BIT(8);
-- 
2.24.1



Re: [PATCH v2] coresight: etm4x: Handle accesses to TRCSTALLCTLR

2021-01-27 Thread Suzuki K Poulose

On 1/27/21 5:43 PM, Mathieu Poirier wrote:

Good day,

On Wed, Jan 27, 2021 at 12:00:32PM +, Suzuki K Poulose wrote:

TRCSTALLCTLR register is only implemented if

TRCIDR3.STALLCTL == 0b1

Make sure the driver touches the register only it is implemented.

Cc: sta...@vger.kernel.org
Cc: Mathieu Poirier 
Cc: Leo Yan 
Cc: Mike Leach 
Signed-off-by: Suzuki K Poulose 
---
Changes since v1:
   - No change to the patch, fixed the stable email address and
 added usual reviewers.
---
  drivers/hwtracing/coresight/coresight-etm4x-core.c  | 9 ++---
  drivers/hwtracing/coresight/coresight-etm4x-sysfs.c | 3 +++
  2 files changed, 9 insertions(+), 3 deletions(-)

diff --git a/drivers/hwtracing/coresight/coresight-etm4x-core.c 
b/drivers/hwtracing/coresight/coresight-etm4x-core.c
index b40e3c2bf818..814b49dae0c7 100644
--- a/drivers/hwtracing/coresight/coresight-etm4x-core.c
+++ b/drivers/hwtracing/coresight/coresight-etm4x-core.c
@@ -367,7 +367,8 @@ static int etm4_enable_hw(struct etmv4_drvdata *drvdata)
etm4x_relaxed_write32(csa, 0x0, TRCAUXCTLR);
etm4x_relaxed_write32(csa, config->eventctrl0, TRCEVENTCTL0R);
etm4x_relaxed_write32(csa, config->eventctrl1, TRCEVENTCTL1R);
-   etm4x_relaxed_write32(csa, config->stall_ctrl, TRCSTALLCTLR);
+   if (drvdata->stallctl)
+   etm4x_relaxed_write32(csa, config->stall_ctrl, TRCSTALLCTLR);
etm4x_relaxed_write32(csa, config->ts_ctrl, TRCTSCTLR);
etm4x_relaxed_write32(csa, config->syncfreq, TRCSYNCPR);
etm4x_relaxed_write32(csa, config->ccctlr, TRTLR);
@@ -1545,7 +1546,8 @@ static int etm4_cpu_save(struct etmv4_drvdata *drvdata)
state->trcauxctlr = etm4x_read32(csa, TRCAUXCTLR);
state->trceventctl0r = etm4x_read32(csa, TRCEVENTCTL0R);
state->trceventctl1r = etm4x_read32(csa, TRCEVENTCTL1R);
-   state->trcstallctlr = etm4x_read32(csa, TRCSTALLCTLR);
+   if (drvdata->stallctl)
+   state->trcstallctlr = etm4x_read32(csa, TRCSTALLCTLR);
state->trctsctlr = etm4x_read32(csa, TRCTSCTLR);
state->trcsyncpr = etm4x_read32(csa, TRCSYNCPR);
state->trtlr = etm4x_read32(csa, TRTLR);
@@ -1657,7 +1659,8 @@ static void etm4_cpu_restore(struct etmv4_drvdata 
*drvdata)
etm4x_relaxed_write32(csa, state->trcauxctlr, TRCAUXCTLR);
etm4x_relaxed_write32(csa, state->trceventctl0r, TRCEVENTCTL0R);
etm4x_relaxed_write32(csa, state->trceventctl1r, TRCEVENTCTL1R);
-   etm4x_relaxed_write32(csa, state->trcstallctlr, TRCSTALLCTLR);
+   if (drvdata->stallctl)
+   etm4x_relaxed_write32(csa, state->trcstallctlr, TRCSTALLCTLR);
etm4x_relaxed_write32(csa, state->trctsctlr, TRCTSCTLR);
etm4x_relaxed_write32(csa, state->trcsyncpr, TRCSYNCPR);
etm4x_relaxed_write32(csa, state->trtlr, TRTLR);
diff --git a/drivers/hwtracing/coresight/coresight-etm4x-sysfs.c 
b/drivers/hwtracing/coresight/coresight-etm4x-sysfs.c
index 1c490bcef3ad..cd9249fbf913 100644
--- a/drivers/hwtracing/coresight/coresight-etm4x-sysfs.c
+++ b/drivers/hwtracing/coresight/coresight-etm4x-sysfs.c
@@ -296,6 +296,9 @@ static ssize_t mode_store(struct device *dev,
if (kstrtoul(buf, 16, ))
return -EINVAL;
  
+	if ((val & ETM_MODE_ISTALL_EN) && !drvdata->stallctl)

+   return -EINVAL;
+


We have two choices here:

1) Follow what is already done in this function for implementation define
options like ETM_MODE_BB, ETMv4_MODE_CTXID, ETM_MODE_RETURNSTACK and others.  In
that case we would have:

 /* bit[8], Instruction stall bit */
 if ((config->mode & ETM_MODE_ISTALL_EN) && drvdata->stallctl == true))
 config->stall_ctrl |= BIT(8);
 else
 config->stall_ctrl &= ~BIT(8);

2) Return -EINVAL when something is not supported, like you have above.  In that
case we'd have to enact the same behavior for all the options, which has the > 
potential of breaking user space.


I did think about this and but now I agree  1 is better for now. I will respin.


Cheers
Suzuki


Re: [PATCH V3 14/14] coresight: etm-perf: Add support for trace buffer format

2021-01-27 Thread Suzuki K Poulose

Hi Peter

On 1/27/21 12:54 PM, Peter Zijlstra wrote:

On Wed, Jan 27, 2021 at 02:25:38PM +0530, Anshuman Khandual wrote:


It is possible to have a
perf session where some events end up collecting the trace
in TMC-ETR while the others in TRBE. Thus we need a way
to identify the type of the trace for each AUX record.



diff --git a/include/uapi/linux/perf_event.h b/include/uapi/linux/perf_event.h
index 9a5ca45..169e6b3 100644
--- a/include/uapi/linux/perf_event.h
+++ b/include/uapi/linux/perf_event.h
@@ -,6 +,10 @@ enum perf_callchain_context {
  #define PERF_AUX_FLAG_COLLISION   0x08/* sample 
collided with another */
  #define PERF_AUX_FLAG_PMU_FORMAT_TYPE_MASK0xff00  /* PMU specific trace 
format type */
  
+/* CoreSight PMU AUX buffer formats */

+#define PERF_AUX_FLAG_CORESIGHT_FORMAT_CORESIGHT   0x /* Default for 
backward compatibility */
+#define PERF_AUX_FLAG_CORESIGHT_FORMAT_RAW 0x0100 /* Raw format of 
the source */


Would CORESIGHT_FORMAT_ETR / CORESIGHT_FORMAT_TRBE be better names?



Not really. The format used by the ETR is CORESIGHT frame formatting. And
there are other sinks that uses the formatting. e.g, TMC-ETB (a sink with
internal memory), TMC-ETF (trace fifo with internal memory). So it is really
not tied to ETR.

As for TRBE, it simply pumps the data thrown at it to the memory. As such
calling it TRBE format would be confusing as the format of the buffer is
really Raw trace thrown at it. This can be inferred from the ETM/ETE
looking at the ID registers, which the userspace perf already captures
in the perf.data. So the decoder perf can look at the perf.data and the
AUX records to interpret the buffer correctly.

Suzuki


[PATCH v2] coresight: etm4x: Handle accesses to TRCSTALLCTLR

2021-01-27 Thread Suzuki K Poulose
TRCSTALLCTLR register is only implemented if

   TRCIDR3.STALLCTL == 0b1

Make sure the driver touches the register only it is implemented.

Cc: sta...@vger.kernel.org
Cc: Mathieu Poirier 
Cc: Leo Yan 
Cc: Mike Leach 
Signed-off-by: Suzuki K Poulose 
---
Changes since v1:
  - No change to the patch, fixed the stable email address and
added usual reviewers.
---
 drivers/hwtracing/coresight/coresight-etm4x-core.c  | 9 ++---
 drivers/hwtracing/coresight/coresight-etm4x-sysfs.c | 3 +++
 2 files changed, 9 insertions(+), 3 deletions(-)

diff --git a/drivers/hwtracing/coresight/coresight-etm4x-core.c 
b/drivers/hwtracing/coresight/coresight-etm4x-core.c
index b40e3c2bf818..814b49dae0c7 100644
--- a/drivers/hwtracing/coresight/coresight-etm4x-core.c
+++ b/drivers/hwtracing/coresight/coresight-etm4x-core.c
@@ -367,7 +367,8 @@ static int etm4_enable_hw(struct etmv4_drvdata *drvdata)
etm4x_relaxed_write32(csa, 0x0, TRCAUXCTLR);
etm4x_relaxed_write32(csa, config->eventctrl0, TRCEVENTCTL0R);
etm4x_relaxed_write32(csa, config->eventctrl1, TRCEVENTCTL1R);
-   etm4x_relaxed_write32(csa, config->stall_ctrl, TRCSTALLCTLR);
+   if (drvdata->stallctl)
+   etm4x_relaxed_write32(csa, config->stall_ctrl, TRCSTALLCTLR);
etm4x_relaxed_write32(csa, config->ts_ctrl, TRCTSCTLR);
etm4x_relaxed_write32(csa, config->syncfreq, TRCSYNCPR);
etm4x_relaxed_write32(csa, config->ccctlr, TRTLR);
@@ -1545,7 +1546,8 @@ static int etm4_cpu_save(struct etmv4_drvdata *drvdata)
state->trcauxctlr = etm4x_read32(csa, TRCAUXCTLR);
state->trceventctl0r = etm4x_read32(csa, TRCEVENTCTL0R);
state->trceventctl1r = etm4x_read32(csa, TRCEVENTCTL1R);
-   state->trcstallctlr = etm4x_read32(csa, TRCSTALLCTLR);
+   if (drvdata->stallctl)
+   state->trcstallctlr = etm4x_read32(csa, TRCSTALLCTLR);
state->trctsctlr = etm4x_read32(csa, TRCTSCTLR);
state->trcsyncpr = etm4x_read32(csa, TRCSYNCPR);
state->trtlr = etm4x_read32(csa, TRTLR);
@@ -1657,7 +1659,8 @@ static void etm4_cpu_restore(struct etmv4_drvdata 
*drvdata)
etm4x_relaxed_write32(csa, state->trcauxctlr, TRCAUXCTLR);
etm4x_relaxed_write32(csa, state->trceventctl0r, TRCEVENTCTL0R);
etm4x_relaxed_write32(csa, state->trceventctl1r, TRCEVENTCTL1R);
-   etm4x_relaxed_write32(csa, state->trcstallctlr, TRCSTALLCTLR);
+   if (drvdata->stallctl)
+   etm4x_relaxed_write32(csa, state->trcstallctlr, TRCSTALLCTLR);
etm4x_relaxed_write32(csa, state->trctsctlr, TRCTSCTLR);
etm4x_relaxed_write32(csa, state->trcsyncpr, TRCSYNCPR);
etm4x_relaxed_write32(csa, state->trtlr, TRTLR);
diff --git a/drivers/hwtracing/coresight/coresight-etm4x-sysfs.c 
b/drivers/hwtracing/coresight/coresight-etm4x-sysfs.c
index 1c490bcef3ad..cd9249fbf913 100644
--- a/drivers/hwtracing/coresight/coresight-etm4x-sysfs.c
+++ b/drivers/hwtracing/coresight/coresight-etm4x-sysfs.c
@@ -296,6 +296,9 @@ static ssize_t mode_store(struct device *dev,
if (kstrtoul(buf, 16, ))
return -EINVAL;
 
+   if ((val & ETM_MODE_ISTALL_EN) && !drvdata->stallctl)
+   return -EINVAL;
+
spin_lock(>spinlock);
config->mode = val & ETMv4_MODE_ALL;
 
-- 
2.24.1



Re: [PATCH v3 4/5] amba: Make the remove callback return void

2021-01-26 Thread Suzuki K Poulose

Hi

On 1/26/21 4:58 PM, Uwe Kleine-König wrote:

All amba drivers return 0 in their remove callback. Together with the
driver core ignoring the return value anyhow, it doesn't make sense to
return a value here.

Change the remove prototype to return void, which makes it explicit that
returning an error value doesn't work as expected. This simplifies changing
the core remove callback to return void, too.

Reviewed-by: Ulf Hansson 
Reviewed-by: Arnd Bergmann 
Acked-by: Alexandre Belloni 
Acked-by: Dmitry Torokhov 
Acked-by: Krzysztof Kozlowski  # for drivers/memory
Acked-by: Mark Brown 

 > Acked-by: Dmitry Torokhov 

Acked-by: Linus Walleij 
Signed-off-by: Uwe Kleine-König 




  drivers/hwtracing/coresight/coresight-etm4x-core.c | 4 +---


You are most likely to have a conflict for the above file, with what is
in coresight/next. It should be easy to resolve.

Otherwise, the changes look good for the drivers/hwtracing/coresight/*

Acked-by: Suzuki K Poulose 


Re: [RFC PATCH] perf: Handle multiple formatted AUX records

2021-01-25 Thread Suzuki K Poulose

Hi Peter

On 1/25/21 10:25 AM, Peter Zijlstra wrote:

On Fri, Jan 22, 2021 at 03:18:29PM +, Suzuki K Poulose wrote:

CoreSight PMU supports aux-buffer for the ETM tracing. The trace
generated by the ETM (associated with individual CPUs, like Intel PT)
is captured by a separate IP (CoreSight TMC-ETR/ETF until now).

The TMC-ETR applies formatting of the raw ETM trace data, as it
can collect traces from multiple ETMs, with the TraceID to indicate
the source of a given trace packet.

Arm Trace Buffer Extension is new "sink" IP, attached to individual
CPUs and thus do not provide additional formatting, like TMC-ETR.

Additionally, a system could have both TRBE *and* TMC-ETR for
the trace collection. e.g, TMC-ETR could be used as a single
trace buffer to collect data from multiple ETMs to correlate
the traces from different CPUs. It is possible to have a
perf session where some events end up collecting the trace
in TMC-ETR while the others in TRBE. Thus we need a way
to identify the type of the trace for each AUX record.

This patch adds a new flag to indicate the trace format
for the given record. Also, includes the changes that
demonstrates how this can be used in the CoreSight PMU
to solve the problem.

Signed-off-by: Suzuki K Poulose 
---



diff --git a/include/uapi/linux/perf_event.h b/include/uapi/linux/perf_event.h
index b15e3447cd9f..ea7dcc7b30f0 100644
--- a/include/uapi/linux/perf_event.h
+++ b/include/uapi/linux/perf_event.h
@@ -1109,6 +1109,7 @@ enum perf_callchain_context {
  #define PERF_AUX_FLAG_OVERWRITE   0x02/* snapshot from 
overwrite mode */
  #define PERF_AUX_FLAG_PARTIAL 0x04/* record contains gaps */
  #define PERF_AUX_FLAG_COLLISION   0x08/* sample collided with 
another */
+#define PERF_AUX_FLAG_ALT_FMT  0x10/* this record is in alternate 
trace format */


Since we have a whole u64, do we want to reserve a whole nibble (or
maybe even a byte) for a format type? Because with a single bit like
this, we'll kick ourselves when we end up with the need for a 3rd format
type.



Sure, makes sense. We could do:

#define PERF_AUX_FLAG_PMU_FORMAT_TYPE_MASK  0xff00

Additionally, the values could be allocated by individual PMUs and
interpreted by the corresponding counterpart. That way we don't
have to worry about centralized allocation of the "TYPE" fields.

e,g:

#define PERF_AUX_FLAG_CORESIGHT_FORMAT_CORESIGHT0x
#define PERF_AUX_FLAG_CORESIGHT_FORMAT_RAW  0x0100

#define PERF_AUX_FLAG_RANDOM_PMU_FORMAT_FMT10x
#define PERF_AUX_FLAG_RANDOM_PMU_FORMAT_FMT20x0100


What do you think ?

Cheers
Suzuki







Re: [PATCH V2 06/11] dts: bindings: Document device tree bindings for ETE

2021-01-25 Thread Suzuki K Poulose

On 1/25/21 10:20 PM, Suzuki K Poulose wrote:

Hi Rob

On 1/25/21 7:22 PM, Rob Herring wrote:

On Wed, Jan 13, 2021 at 09:48:13AM +0530, Anshuman Khandual wrote:

From: Suzuki K Poulose 

Document the device tree bindings for Embedded Trace Extensions.
ETE can be connected to legacy coresight components and thus
could optionally contain a connection graph as described by
the CoreSight bindings.

Cc: devicet...@vger.kernel.org
Cc: Mathieu Poirier 
Cc: Mike Leach 
Cc: Rob Herring 
Signed-off-by: Suzuki K Poulose 
Signed-off-by: Anshuman Khandual 
---
  Documentation/devicetree/bindings/arm/ete.yaml | 71 ++
  1 file changed, 71 insertions(+)
  create mode 100644 Documentation/devicetree/bindings/arm/ete.yaml

diff --git a/Documentation/devicetree/bindings/arm/ete.yaml 
b/Documentation/devicetree/bindings/arm/ete.yaml

new file mode 100644
index 000..00e6a77
--- /dev/null
+++ b/Documentation/devicetree/bindings/arm/ete.yaml
@@ -0,0 +1,71 @@
+# SPDX-License-Identifier: GPL-2.0-only or BSD-2-Clause
+# Copyright 2021, Arm Ltd
+%YAML 1.2
+---
+$id: "http://devicetree.org/schemas/arm/ete.yaml#;
+$schema: "http://devicetree.org/meta-schemas/core.yaml#;
+
+title: ARM Embedded Trace Extensions
+
+maintainers:
+  - Suzuki K Poulose 
+  - Mathieu Poirier 
+
+description: |
+  Arm Embedded Trace Extension(ETE) is a per CPU trace component that
+  allows tracing the CPU execution. It overlaps with the CoreSight ETMv4
+  architecture and has extended support for future architecture changes.
+  The trace generated by the ETE could be stored via legacy CoreSight
+  components (e.g, TMC-ETR) or other means (e.g, using a per CPU buffer
+  Arm Trace Buffer Extension (TRBE)). Since the ETE can be connected to
+  legacy CoreSight components, a node must be listed per instance, along
+  with any optional connection graph as per the coresight bindings.
+  See bindings/arm/coresight.txt.
+
+properties:
+  $nodename:
+    pattern: "^ete([0-9a-f]+)$"
+  compatible:
+    items:
+  - const: arm,embedded-trace-extension
+
+  cpu:


We use 'cpus' in a couple of other places, let's do that here for
consistency.


This is following the existing CoreSight bindings for ETM. The same driver
probes both. Also there can only ever be a single CPU for ete/etm. So, we
would prefer to keep it aligned with the existing bindings to avoid causing
confusion.




+    description: |
+  Handle to the cpu this ETE is bound to.
+    $ref: /schemas/types.yaml#/definitions/phandle
+
+  out-ports:
+    description: |
+  Out put connections from the ETE to legacy CoreSight trace bus.


Output


Will fix.




+    $ref: /schemas/graph.yaml#/properties/ports


You have to define what each 'port' is if there can be more than 1. If
there's only ever 1 then you just need 'port' though maybe all the
coresight bindings require 'out-ports'. And the port nodes need a $ref
to '/schemas/graph.yaml#/properties/port'.


All CoreSight components require an out-ports and/or in-ports. The ETM/ETE
always has one port, but must be under out-ports in line with the CoreSight
bindings.

Does this look more apt:

    out-ports:
  description: |
    Output connection from the ETE to legacy CoreSight trace bus.
  poperties:
     port:
   $ref: /schemas/graph.yaml#/properties/port



Correction, the above should be :

+  out-ports:
+type: object
+description: |
+  Output connections from the ETE to legacy CoreSight trace bus.
+properties:
+  port:
+$ref: /schemas/graph.yaml#/properties/port


That works fine for me. Does that look fine ?  Some day, we should convert the
coresight dt bindings to yaml and import the out-ports/in-ports from the scheme 
:-)

Cheers
Suzuki



Re: [PATCH V2 06/11] dts: bindings: Document device tree bindings for ETE

2021-01-25 Thread Suzuki K Poulose

Hi Rob

On 1/25/21 7:22 PM, Rob Herring wrote:

On Wed, Jan 13, 2021 at 09:48:13AM +0530, Anshuman Khandual wrote:

From: Suzuki K Poulose 

Document the device tree bindings for Embedded Trace Extensions.
ETE can be connected to legacy coresight components and thus
could optionally contain a connection graph as described by
the CoreSight bindings.

Cc: devicet...@vger.kernel.org
Cc: Mathieu Poirier 
Cc: Mike Leach 
Cc: Rob Herring 
Signed-off-by: Suzuki K Poulose 
Signed-off-by: Anshuman Khandual 
---
  Documentation/devicetree/bindings/arm/ete.yaml | 71 ++
  1 file changed, 71 insertions(+)
  create mode 100644 Documentation/devicetree/bindings/arm/ete.yaml

diff --git a/Documentation/devicetree/bindings/arm/ete.yaml 
b/Documentation/devicetree/bindings/arm/ete.yaml
new file mode 100644
index 000..00e6a77
--- /dev/null
+++ b/Documentation/devicetree/bindings/arm/ete.yaml
@@ -0,0 +1,71 @@
+# SPDX-License-Identifier: GPL-2.0-only or BSD-2-Clause
+# Copyright 2021, Arm Ltd
+%YAML 1.2
+---
+$id: "http://devicetree.org/schemas/arm/ete.yaml#;
+$schema: "http://devicetree.org/meta-schemas/core.yaml#;
+
+title: ARM Embedded Trace Extensions
+
+maintainers:
+  - Suzuki K Poulose 
+  - Mathieu Poirier 
+
+description: |
+  Arm Embedded Trace Extension(ETE) is a per CPU trace component that
+  allows tracing the CPU execution. It overlaps with the CoreSight ETMv4
+  architecture and has extended support for future architecture changes.
+  The trace generated by the ETE could be stored via legacy CoreSight
+  components (e.g, TMC-ETR) or other means (e.g, using a per CPU buffer
+  Arm Trace Buffer Extension (TRBE)). Since the ETE can be connected to
+  legacy CoreSight components, a node must be listed per instance, along
+  with any optional connection graph as per the coresight bindings.
+  See bindings/arm/coresight.txt.
+
+properties:
+  $nodename:
+pattern: "^ete([0-9a-f]+)$"
+  compatible:
+items:
+  - const: arm,embedded-trace-extension
+
+  cpu:


We use 'cpus' in a couple of other places, let's do that here for
consistency.


This is following the existing CoreSight bindings for ETM. The same driver
probes both. Also there can only ever be a single CPU for ete/etm. So, we
would prefer to keep it aligned with the existing bindings to avoid causing
confusion.




+description: |
+  Handle to the cpu this ETE is bound to.
+$ref: /schemas/types.yaml#/definitions/phandle
+
+  out-ports:
+description: |
+  Out put connections from the ETE to legacy CoreSight trace bus.


Output


Will fix.




+$ref: /schemas/graph.yaml#/properties/ports


You have to define what each 'port' is if there can be more than 1. If
there's only ever 1 then you just need 'port' though maybe all the
coresight bindings require 'out-ports'. And the port nodes need a $ref
to '/schemas/graph.yaml#/properties/port'.


All CoreSight components require an out-ports and/or in-ports. The ETM/ETE
always has one port, but must be under out-ports in line with the CoreSight
bindings.

Does this look more apt:

   out-ports:
 description: |
   Output connection from the ETE to legacy CoreSight trace bus.
 poperties:
port:
  $ref: /schemas/graph.yaml#/properties/port

Suzuki


Re: [PATCH v7 00/28] coresight: etm4x: Support for system instructions

2021-01-25 Thread Suzuki K Poulose

On 1/25/21 6:49 PM, Mathieu Poirier wrote:

On Sun, Jan 10, 2021 at 10:48:22PM +, Suzuki K Poulose wrote:

CoreSight ETMv4.4 obsoletes memory mapped access to ETM and
mandates the system instructions for registers.
This also implies that they may not be on the amba bus.
Right now all the CoreSight components are accessed via memory
map. Also, we have some common routines in coresight generic
code driver (e.g, CS_LOCK, claim/disclaim), which assume the
mmio. In order to preserve the generic algorithms at a single
place and to allow dynamic switch for ETMs, this series introduces
an abstraction layer for accessing a coresight device. It is
designed such that the mmio access are fast tracked (i.e, without
an indirect function call).

This will also help us to get rid of the driver+attribute specific
sysfs show/store routines and replace them with a single routine
to access a given register offset (which can be embedded in the
dev_ext_attribute). This is not currently implemented in the series,
but can be achieved.

Further we switch the generic routines to work with the abstraction.
With this in place, we refactor the etm4x code a bit to allow for
supporting the system instructions with very little new code.

We use TRCDEVARCH for the detection of the ETM component, which
is a standard register as per CoreSight architecture, rather than
the etm specific id register TRCIDR1. This is for making sure
that we are able to detect the ETM via system instructions accurately,
when the the trace unit could be anything (etm or a custom trace unit).
To keep the backward compatibility for any existing broken
impelementation which may not implement TRCDEVARCH, we fall back to TRCIDR1.
Also this covers us for the changes in the future architecture [0].

Also, v8.4 self-hosted tracing extensions (coupled with ETMv4.4) adds
new filtering registers for trace by exception level. So on a v8.4
system, with Trace Filtering support, without the appropriate
programming of the Trace filter registers (TRFCR_ELx), tracing
will not be enabled. This series also includes the TraceFiltering
support to cover the ETM-v4.4 support.

The series has been mildly tested on a model for system instructions.
I would really appreciate any testing on real hardware.

Applies on coresight/next. A tree is available here [1].


I have applied this set.


Thanks Mathieu, appreciate it.

Cheers
Suzuki


Re: [PATCH v5 13/21] arm64: Allow ID_AA64MMFR1_EL1.VH to be overridden from the command line

2021-01-25 Thread Suzuki K Poulose

On 1/25/21 10:50 AM, Marc Zyngier wrote:

As we want to be able to disable VHE at runtime, let's match
"id_aa64mmfr1.vh=" from the command line as an override.
This doesn't have much effect yet as our boot code doesn't look
at the cpufeature, but only at the HW registers.

Signed-off-by: Marc Zyngier 
Acked-by: David Brazdil 
---
  arch/arm64/include/asm/cpufeature.h |  2 ++
  arch/arm64/kernel/cpufeature.c  |  5 -
  arch/arm64/kernel/idreg-override.c  | 11 +++
  3 files changed, 17 insertions(+), 1 deletion(-)

diff --git a/arch/arm64/include/asm/cpufeature.h 
b/arch/arm64/include/asm/cpufeature.h
index 4179cfc8ed57..b0ed37da4067 100644
--- a/arch/arm64/include/asm/cpufeature.h
+++ b/arch/arm64/include/asm/cpufeature.h
@@ -818,6 +818,8 @@ static inline unsigned int get_vmid_bits(u64 mmfr1)
return 8;
  }
  
+extern struct arm64_ftr_override id_aa64mmfr1_override;

+
  u32 get_kvm_ipa_limit(void);
  void dump_cpu_features(void);
  
diff --git a/arch/arm64/kernel/cpufeature.c b/arch/arm64/kernel/cpufeature.c

index 4b84a1e1dc51..c1d6712c4249 100644
--- a/arch/arm64/kernel/cpufeature.c
+++ b/arch/arm64/kernel/cpufeature.c
@@ -557,6 +557,8 @@ static const struct arm64_ftr_bits ftr_raz[] = {
  
  #define ARM64_FTR_REG(id, table) ARM64_FTR_REG_OVERRIDE(id, table, _override)
  
+struct arm64_ftr_override id_aa64mmfr1_override;


Does this need to be ro_after_init ?

Otherwise, looks good to me:

Acked-by: Suzuki K Poulose 


Re: [PATCH v4 10/21] arm64: cpufeature: Use IDreg override in __read_sysreg_by_encoding()

2021-01-23 Thread Suzuki K Poulose

On 1/22/21 6:53 PM, Catalin Marinas wrote:

On Mon, Jan 18, 2021 at 09:45:22AM +, Marc Zyngier wrote:

__read_sysreg_by_encoding() is used by a bunch of cpufeature helpers,
which should take the feature override into account. Let's do that.

For a good measure (and because we are likely to need to further
down the line), make this helper available to the rest of the
non-modular kernel.

Code that needs to know the *real* features of a CPU can still
use read_sysreg_s(), and find the bare, ugly truth.

Signed-off-by: Marc Zyngier 



diff --git a/arch/arm64/kernel/cpufeature.c b/arch/arm64/kernel/cpufeature.c
index aaa075c6f029..48a011935d8c 100644
--- a/arch/arm64/kernel/cpufeature.c
+++ b/arch/arm64/kernel/cpufeature.c
@@ -1149,14 +1149,17 @@ u64 read_sanitised_ftr_reg(u32 id)
  EXPORT_SYMBOL_GPL(read_sanitised_ftr_reg);
  
  #define read_sysreg_case(r)	\

-   case r: return read_sysreg_s(r)
+   case r: val = read_sysreg_s(r); break;
  
  /*

   * __read_sysreg_by_encoding() - Used by a STARTING cpu before cpuinfo is 
populated.
   * Read the system register on the current CPU
   */
-static u64 __read_sysreg_by_encoding(u32 sys_id)
+u64 __read_sysreg_by_encoding(u32 sys_id)
  {
+   struct arm64_ftr_reg *regp;
+   u64 val;
+
switch (sys_id) {
read_sysreg_case(SYS_ID_PFR0_EL1);
read_sysreg_case(SYS_ID_PFR1_EL1);
@@ -1199,6 +1202,14 @@ static u64 __read_sysreg_by_encoding(u32 sys_id)
BUG();
return 0;
}
+
+   regp  = get_arm64_ftr_reg(sys_id);
+   if (regp && regp->override_mask && regp->override_val) {
+   val &= ~*regp->override_mask;
+   val |= (*regp->override_val & *regp->override_mask);
+   }
+
+   return val;


Ah, now the previous patch makes more sense. I don't particularly like
this but I can't tell how to work around it. I was hoping that the
overriding feature behaves more like a secondary CPU that limits all the
overridden features. However, this approach would fail for FTR_EXACT
cases (like PAC, though I wonder whether it fails already with your
previous patch since the boot CPU value won't match the override, hence
dropping to the safe one).



Correct !For FTR_EXACT, we dont want to override a value that is not safe, e.g 
PAC.
This is handled correctly in the previous patch and thus we are covered.

Reviewed-by: Suzuki K Poulose 



Re: [PATCH v4 09/21] arm64: cpufeature: Add global feature override facility

2021-01-23 Thread Suzuki K Poulose

On 1/18/21 9:45 AM, Marc Zyngier wrote:

Add a facility to globally override a feature, no matter what
the HW says. Yes, this sounds dangerous, but we do respect the
"safe" value for a given feature. This doesn't mean the user
doesn't need to know what they are doing.

Nothing uses this yet, so we are pretty safe. For now.

Signed-off-by: Marc Zyngier 


Reviewed-by: Suzuki K Poulose 


[RFC PATCH 0/1] perf: Handle multiple formatted AUX records

2021-01-22 Thread Suzuki K Poulose
This is an RFC patch to explore the solution to a problem we have
in the CoreSight ETM/ETE PMU.

CoreSight ETM trace allows instruction level tracing of Arm CPUs.
The ETM generates the CPU excecution trace and pumps it into CoreSight
AMBA Trace Bus and is collected by a different CoreSight component
(traditionally CoreSight TMC-ETR /ETB/ETF), called "sink".
Important to note that there is no guarantee that every CPU has
a dedicated sink.  Thus multiple ETMs could pump the trace data
into the same "sink" and thus they apply additional formatting
of the trace data for the user to decode it properly and attribute
the trace data to the corresponding ETM.

However, with the introduction of Arm Trace buffer Extensions (TRBE),
we now have a dedicated per-CPU architected sink for collecting the
trace. Since the TRBE is always per-CPU, it doesn't apply any formatting
of the trace. The support for this driver is under review [1].

Now a system could have a per-cpu TRBE and one or more shared
TMC-ETRs on the system. A user could choose a "specific" sink
for a perf session (e.g, a TMC-ETR) or the driver could automatically
select the nearest sink for a given ETM. It is possible that
some ETMs could end up using TMC-ETR (e.g, if the TRBE is not
usable on the CPU) while the others using TRBE in a single
perf session. Thus we now have "formatted" trace collected
from TMC-ETR and "unformatted" trace collected from TRBE.
However, we don't get into a situation where a single event
could end up using TMC-ETR & TRBE. i.e, any AUX buffer is
guaranteed to be either RAW or FORMATTED, but not a mix
of both.

As for perf decoding, we need to know the type of the data
in the individual AUX buffers, so that it can set up the
"OpenCSD" (library for decoding CoreSight trace) decoder
instance appropriately. Thus the perf.data file must conatin
the hints for the tool to decode the data correctly.

Since this is a runtime variable, and perf tool doesn't have
a control on what sink gets used (in case of automatic sink
selection), we need this information made available from
the PMU driver for each AUX record.

This patch is an attempt to solve the problem by, adding an
AUX flag for each AUX record to indicate the type of the
trace in them. It can be defined as a PMU specific flag,
which each PMU could interpret in its on way (e.g,
PERF_AUX_FLAG_PMU_FLAG_1 or could be a dedicated
flag for the CoreSight in a "generic" form
PERF_AUX_FLAG_ALT_FMT (Thanks Mike Leach for the name).

We are looking for suggestions on how best to solve this
problem and happy to explore other options if there is
a preferred way of solving this.

[1] 
https://lkml.kernel.org/r/1610511498-4058-1-git-send-email-anshuman.khand...@arm.com

Suzuki K Poulose (1):
  perf: Handle multiple formatted AUX records

 drivers/hwtracing/coresight/coresight-etm-perf.c | 2 ++
 include/linux/coresight.h| 1 +
 include/uapi/linux/perf_event.h  | 1 +
 3 files changed, 4 insertions(+)

-- 
2.24.1



[RFC PATCH] perf: Handle multiple formatted AUX records

2021-01-22 Thread Suzuki K Poulose
CoreSight PMU supports aux-buffer for the ETM tracing. The trace
generated by the ETM (associated with individual CPUs, like Intel PT)
is captured by a separate IP (CoreSight TMC-ETR/ETF until now).

The TMC-ETR applies formatting of the raw ETM trace data, as it
can collect traces from multiple ETMs, with the TraceID to indicate
the source of a given trace packet.

Arm Trace Buffer Extension is new "sink" IP, attached to individual
CPUs and thus do not provide additional formatting, like TMC-ETR.

Additionally, a system could have both TRBE *and* TMC-ETR for
the trace collection. e.g, TMC-ETR could be used as a single
trace buffer to collect data from multiple ETMs to correlate
the traces from different CPUs. It is possible to have a
perf session where some events end up collecting the trace
in TMC-ETR while the others in TRBE. Thus we need a way
to identify the type of the trace for each AUX record.

This patch adds a new flag to indicate the trace format
for the given record. Also, includes the changes that
demonstrates how this can be used in the CoreSight PMU
to solve the problem.

Signed-off-by: Suzuki K Poulose 
---
 drivers/hwtracing/coresight/coresight-etm-perf.c | 2 ++
 include/linux/coresight.h| 1 +
 include/uapi/linux/perf_event.h  | 1 +
 3 files changed, 4 insertions(+)

diff --git a/drivers/hwtracing/coresight/coresight-etm-perf.c 
b/drivers/hwtracing/coresight/coresight-etm-perf.c
index e776a07b0852..81602bd8da59 100644
--- a/drivers/hwtracing/coresight/coresight-etm-perf.c
+++ b/drivers/hwtracing/coresight/coresight-etm-perf.c
@@ -429,6 +429,8 @@ static void etm_event_stop(struct perf_event *event, int 
mode)
 
size = sink_ops(sink)->update_buffer(sink, handle,
  event_data->snk_config);
+   if (!sink->formatted_trace)
+   perf_aux_output_flag(handle, PERF_AUX_FLAG_ALT_FMT);
perf_aux_output_end(handle, size);
}
 
diff --git a/include/linux/coresight.h b/include/linux/coresight.h
index e019182521a1..45c173c391a4 100644
--- a/include/linux/coresight.h
+++ b/include/linux/coresight.h
@@ -241,6 +241,7 @@ struct coresight_device {
int nr_links;
bool has_conns_grp;
bool ect_enabled; /* true only if associated ect device is enabled */
+   bool formatted_trace; /* Trace is CoreSight formatted ? */
 };
 
 /*
diff --git a/include/uapi/linux/perf_event.h b/include/uapi/linux/perf_event.h
index b15e3447cd9f..ea7dcc7b30f0 100644
--- a/include/uapi/linux/perf_event.h
+++ b/include/uapi/linux/perf_event.h
@@ -1109,6 +1109,7 @@ enum perf_callchain_context {
 #define PERF_AUX_FLAG_OVERWRITE0x02/* snapshot from 
overwrite mode */
 #define PERF_AUX_FLAG_PARTIAL  0x04/* record contains gaps */
 #define PERF_AUX_FLAG_COLLISION0x08/* sample collided with 
another */
+#define PERF_AUX_FLAG_ALT_FMT  0x10/* this record is in alternate 
trace format */
 
 #define PERF_FLAG_FD_NO_GROUP  (1UL << 0)
 #define PERF_FLAG_FD_OUTPUT(1UL << 1)
-- 
2.24.1



Re: [PATCH] coresight: etm4x: Add config to exclude kernel mode tracing

2021-01-19 Thread Suzuki K Poulose

On 1/19/21 9:51 AM, Sai Prakash Ranjan wrote:

Hi Al,

On 2021-01-19 14:06, Al Grant wrote:

Hi Sai,


From: saiprakash.ranjan=codeaurora@mg.codeaurora.org
Hi Mathieu,

On 2021-01-19 01:53, Mathieu Poirier wrote:
> On Fri, Jan 15, 2021 at 11:16:24AM +0530, Sai Prakash Ranjan wrote:
>> Hello Mathieu, Suzuki
>>
>> On 2020-10-15 21:32, Mathieu Poirier wrote:
>> > On Thu, Oct 15, 2020 at 06:15:22PM +0530, Sai Prakash Ranjan wrote:
>> > > On production systems with ETMs enabled, it is preferred to
>> > > exclude kernel mode(NS EL1) tracing for security concerns and
>> > > support only userspace(NS EL0) tracing. So provide an option via
>> > > kconfig to exclude kernel mode tracing if it is required.
>> > > This config is disabled by default and would not affect the
>> > > current configuration which has both kernel and userspace tracing
>> > > enabled by default.
>> > >
>> >
>> > One requires root access (or be part of a special trace group) to
>> > be able to use the cs_etm PMU.  With this kind of elevated access
>> > restricting tracing at EL1 provides little in terms of security.
>> >
>>
>> Apart from the VM usecase discussed, I am told there are other
>> security concerns here regarding need to exclude kernel mode tracing
>> even for the privileged users/root. One such case being the ability
>> to analyze cryptographic code execution since ETMs can record all
>> branch instructions including timestamps in the kernel and there may
>> be other cases as well which I may not be aware of and hence have
>> added Denis and Mattias. Please let us know if you have any questions
>> further regarding this not being a security concern.
>
> Even if we were to apply this patch there are many ways to compromise
> a system or get the kernel to reveal important information using the
> perf subsystem.  I would perfer to tackle the problem at that level
> rather than concentrating on coresight.
>

Sorry but I did not understand your point. We are talking about the capabilities
of coresight etm tracing which has the instruction level tracing and a lot more.
Perf subsystem is just the framework used for it.
In other words, its not the perf subsystem which does instruction level tracing,
its the coresight etm. Why the perf subsystem should be modified to lockdown
kernel mode? If we were to let perf handle all the trace filtering for different
exception levels, then why do we need the register settings in coresight etm
driver to filter out NS EL* tracing? And more importantly, how do you suppose
we handle sysfs mode of coresight tracing with perf subsystem?


You both have good points. Mathieu is right that this is not a CoreSight
issue specifically, it is a matter of kernel security policy, and other hardware
tracing mechanisms ought to be within its scope. There should be a general
"anti kernel exfiltration" config that applies to all mechanisms within
its scope, and we'd definitely expect that to include Intel PT as well as ETM.



I agree with this part where there should be a generic config for all
hardware tracing families(atleast for Intel PT and ARM Coresight),
Suzuki suggested that as well. I am under the impression that Mathieu
didn't like adding such a config and wanted perf subsystem to handle
it since initial discussion was around whether root compromise meant
everything is lost already and such a kconfig would not help, but
Mattias already gave some good examples where that is not true.


A kernel config that forced exclude_kernel on all perf events would deal with
ETM and PT in one place, but miss the sysfs interface to ETM.

On the other hand, doing it in the ETM drivers would cover the perf and sysfs
interfaces to ETM, but would miss Intel PT.

So I think what is needed is a general config option that is both implemented
in perf (excluding all kernel tracing events) and by any drivers that provide
an alternative interface to hardware tracing events.



I am good with this approach, once Mathieu confirms, I can add a kernel
wide kconfig as Suzuki suggested earlier and make ETM{3,4}x as the
initial users. Someone more familiar with Intel PTs can then make use
of this kconfig.


Instead of adding the support for individual drivers, you could handle this
in the generic perf layer. e.g, Fail perf_event create with an attribute
which allows kernel tracing ?

if (!attr.exclude_kernel)
return -EINVAL;

Or even exclude the kernel silently always.

This could also be limited to PMUs with PERF_PMU_CAP_ITRACE, if you
want to limit this to PMUs that instruction level tracing.

Suzuki


Re: [PATCH v1 1/7] coresight: etm-perf: Add support for PID tracing for kernel at EL2

2021-01-18 Thread Suzuki K Poulose

Hi Mathieu

On 1/15/21 10:30 PM, Mathieu Poirier wrote:

Hey guys,

On Sat, Jan 09, 2021 at 03:44:29PM +0800, Leo Yan wrote:

From: Suzuki K Poulose 

When the kernel is running at EL2, the PID is stored in CONTEXTIDR_EL2.
So, tracing CONTEXTIDR_EL1 doesn't give us the pid of the process.
Thus we should trace the VMID with VMIDOPT set to trace
CONTEXTIDR_EL2 instead of CONTEXTIDR_EL1. Given that we have an existing
config option "contextid" and this will be useful for tracing
virtual machines (when we get to support virtualization). So instead,
this patch adds a new option, contextid_in_vmid as a separate config.
Thus on an EL2 kernel, we will have two options available for
the perf tool. However, to make it easier for the user to
do pid tracing, we add a new format which will default to
"contextid" (on EL1 kernel) or "contextid_in_vmid" (on EL2
kernel). So that the user doesn't have to bother which EL the
kernel is running.

  i.e, perf record -e cs_etm/pid/u --

will always do the "pid" tracing, independent of the kernel EL.

Also, the perf tool will be updated to automatically select
"pid" config instead of the "contextid" for system wide/CPU wide
mode.

Cc: Mathieu Poirier 
Cc: Al Grant 
Cc: Mike Leach 
Signed-off-by: Suzuki K Poulose 
Signed-off-by: Leo Yan 
---
  drivers/hwtracing/coresight/coresight-etm-perf.c   | 14 ++
  drivers/hwtracing/coresight/coresight-etm4x-core.c |  9 +
  include/linux/coresight-pmu.h  | 11 +++
  3 files changed, 30 insertions(+), 4 deletions(-)

diff --git a/drivers/hwtracing/coresight/coresight-etm-perf.c 
b/drivers/hwtracing/coresight/coresight-etm-perf.c
index bdc34ca449f7..f763def145e4 100644
--- a/drivers/hwtracing/coresight/coresight-etm-perf.c
+++ b/drivers/hwtracing/coresight/coresight-etm-perf.c
@@ -30,14 +30,28 @@ static DEFINE_PER_CPU(struct coresight_device *, csdev_src);
  /* ETMv3.5/PTM's ETMCR is 'config' */
  PMU_FORMAT_ATTR(cycacc,   "config:" __stringify(ETM_OPT_CYCACC));
  PMU_FORMAT_ATTR(contextid,"config:" __stringify(ETM_OPT_CTXTID));
+PMU_FORMAT_ATTR(contextid_in_vmid, "config:" 
__stringify(ETM_OPT_CTXTID_IN_VMID));


I am not convinced adding this new contextid_in_vmid is the best way forward.


  PMU_FORMAT_ATTR(timestamp,"config:" __stringify(ETM_OPT_TS));
  PMU_FORMAT_ATTR(retstack, "config:" __stringify(ETM_OPT_RETSTK));
  /* Sink ID - same for all ETMs */
  PMU_FORMAT_ATTR(sinkid,   "config2:0-31");
  
+static ssize_t format_attr_pid_show(struct device *dev,

+   struct device_attribute *attr,
+   char *page)
+{
+   int pid_fmt = is_kernel_in_hyp_mode() ? ETM_OPT_CTXTID_IN_VMID : 
ETM_OPT_CTXTID;
+
+   return sprintf(page, "config:%d\n", pid_fmt);
+}
+
+struct device_attribute format_attr_pid = __ATTR(pid, 0444, 
format_attr_pid_show, NULL);


The same applies here.  PMU format bits are options the PMU supports rather than
a representation of the hardware, making bit numbering arbitrary.  A such we
don't explicitly need a contextid_in_vmid option.  Making the current contextid
variable, the same it was done for 'pid', should be sufficient.  Based on the
value carried by contexid, i.e 14 or 15, the perf tools will know where to get
the contextID.

With regards to backward functionality, user who hard code 'config' on the perf
command line won't get the results they want when the kernel is at EL2 anyway.

The kernel, with function is_kernel_in_hyp_mode(), is not an issue.


I did think about that. The reason behind using a new alias is vaguely mentioned
in the description. If a host perf session wants to trace a VM with the 
contextid_el1,
there is no option for that with "contextid" flipped to trace "contextid_el2".
This is precisely why I preferred keeping both the hardware configurations
and let the kernel choose the right one for the EL, by having an alias.
i.e,

perf record -e cs_etm/contextid,contextid_in_vmid/ vm

could still trace the VM vcpu threads and the CID changes within
the VM. (This is triggered from the host, so VM support is not necessary).

If we decide not to do this, or change the meaning of contextid now
to mean "pid" and change it in the future back to what it really
means for supporting such scenarios above, then we are going to
be back where we are with the proposal.


Suzuki


Re: [PATCH V2 08/11] coresight: core: Add support for dedicated percpu sinks

2021-01-15 Thread Suzuki K Poulose

On 1/15/21 2:36 AM, Anshuman Khandual wrote:



On 1/13/21 3:13 PM, Suzuki K Poulose wrote:

On 1/13/21 4:18 AM, Anshuman Khandual wrote:

Add support for dedicated sinks that are bound to individual CPUs. (e.g,
TRBE). To allow quicker access to the sink for a given CPU bound source,
keep a percpu array of the sink devices. Also, add support for building
a path to the CPU local sink from the ETM.

This adds a new percpu sink type CORESIGHT_DEV_SUBTYPE_SINK_PERCPU_SYSMEM.
This new sink type is exclusively available and can only work with percpu
source type device CORESIGHT_DEV_SUBTYPE_SOURCE_PERCPU_PROC.

This defines a percpu structure that accommodates a single coresight_device
which can be used to store an initialized instance from a sink driver. As
these sinks are exclusively linked and dependent on corresponding percpu
sources devices, they should also be the default sink device during a perf
session.

Outwards device connections are scanned while establishing paths between a
source and a sink device. But such connections are not present for certain
percpu source and sink devices which are exclusively linked and dependent.
Build the path directly and skip connection scanning for such devices.

Cc: Mathieu Poirier 
Cc: Mike Leach 
Cc: Suzuki K Poulose 
Signed-off-by: Anshuman Khandual 
---
   drivers/hwtracing/coresight/coresight-core.c | 14 ++
   include/linux/coresight.h    | 12 
   2 files changed, 26 insertions(+)

diff --git a/drivers/hwtracing/coresight/coresight-core.c 
b/drivers/hwtracing/coresight/coresight-core.c
index 0062c89..b300606 100644
--- a/drivers/hwtracing/coresight/coresight-core.c
+++ b/drivers/hwtracing/coresight/coresight-core.c
@@ -23,6 +23,7 @@
   #include "coresight-priv.h"
     static DEFINE_MUTEX(coresight_mutex);
+DEFINE_PER_CPU(struct coresight_device *, csdev_sink);
     /**
    * struct coresight_node - elements of a path, from source to sink
@@ -784,6 +785,13 @@ static int _coresight_build_path(struct coresight_device 
*csdev,
   if (csdev == sink)
   goto out;
   +    if (coresight_is_percpu_source(csdev) && coresight_is_percpu_sink(sink) 
&&
+    sink == per_cpu(csdev_sink, source_ops(csdev)->cpu_id(csdev))) {
+    _coresight_build_path(sink, sink, path);
+    found = true;
+    goto out;
+    }
+
   /* Not a sink - recursively explore each port found on this element */
   for (i = 0; i < csdev->pdata->nr_outport; i++) {
   struct coresight_device *child_dev;
@@ -998,6 +1006,12 @@ coresight_find_default_sink(struct coresight_device 
*csdev)
   {
   int depth = 0;
   +    if (coresight_is_percpu_source(csdev)) {


On a system without per_cpu sink, this would reset the default sink for the 
source device
every single time and fallback to searching every single time.


Right.


So I think it would be better if did check if the def_sink was not set.
We could fold this into the case below may be. i.e,


 if (!csdev->def_sink) {
     if (coresight_is_percpu_source(csdev))
     csdev->def_sink = per_cpu(csdev_sink, 
source_ops(csdev)->cpu_id(csdev));
     if (!csdev->def_sink)    csdev->def_sink = 
coresight_find_sink(csdev, );
 }

Otherwise looks good to me.


struct coresight_device *
coresight_find_default_sink(struct coresight_device *csdev)
{
 int depth = 0;

 /* look for a default sink if we have not found for this device */
 if (!csdev->def_sink) {
 if (coresight_is_percpu_source(csdev))
 csdev->def_sink = per_cpu(csdev_sink, 
source_ops(csdev)->cpu_id(csdev));
 if (!csdev->def_sink)
 csdev->def_sink = coresight_find_sink(csdev, );
 }
 return csdev->def_sink;
}

Would this be better instead ? coresight_find_sink() is invoked both when the
source is not percpu (traditional coresight sources) and also as a fallback in
case a percpu sink is not found for the percpu source device.

Yes, this is exactly what I proposed above.

Cheers
Suzuki


Re: [PATCH V2 10/11] coresight: sink: Add TRBE driver

2021-01-15 Thread Suzuki K Poulose

On 1/15/21 5:29 AM, Anshuman Khandual wrote:



On 1/13/21 8:58 PM, Suzuki K Poulose wrote:

Hi Anshuman,

The driver looks overall good to me. Please find some minor comments below

On 1/13/21 4:18 AM, Anshuman Khandual wrote:

Trace Buffer Extension (TRBE) implements a trace buffer per CPU which is
accessible via the system registers. The TRBE supports different addressing
modes including CPU virtual address and buffer modes including the circular
buffer mode. The TRBE buffer is addressed by a base pointer (TRBBASER_EL1),
an write pointer (TRBPTR_EL1) and a limit pointer (TRBLIMITR_EL1). But the
access to the trace buffer could be prohibited by a higher exception level
(EL3 or EL2), indicated by TRBIDR_EL1.P. The TRBE can also generate a CPU
private interrupt (PPI) on address translation errors and when the buffer
is full. Overall implementation here is inspired from the Arm SPE driver.

Cc: Mathieu Poirier 
Cc: Mike Leach 
Cc: Suzuki K Poulose 
Signed-off-by: Anshuman Khandual 


...


+
+/*
+ * TRBE Buffer Management
+ *
+ * The TRBE buffer spans from the base pointer till the limit pointer. When 
enabled,
+ * it starts writing trace data from the write pointer onward till the limit 
pointer.




+ * When the write pointer reaches the address just before the limit pointer, 
it gets
+ * wrapped around again to the base pointer. This is called a TRBE wrap event, 
which
+ * generates a maintenance interrupt when operated in WRAP or STOP mode.


According to the TRM, it is FILL mode, instead of STOP. So please change the 
above to:

"operated in WRAP or FILL mode".


Updated.





     The write
+ * pointer again starts writing trace data from the base pointer until just 
before
+ * the limit pointer before getting wrapped again with an IRQ and this process 
just
+ * goes on as long as the TRBE is enabled.


This could be dropped as it applies to WRAP/CIRCULAR buffer mode, which we 
don't use.


Probably this could be changed a bit to match the FILL mode. Because it is 
essential
to describe the continuous nature of the buffer operation, even in the FILL 
mode.

  * After TRBE
  * IRQ gets handled and enabled again, write pointer again starts writing 
trace data
  * from the base pointer until just before the limit pointer before getting 
wrapped
  * again with an IRQ and this process just goes on as long as the TRBE is 
enabled.



The above doesn't parse well and kind of repeats the operation of TRBE which is
already explained above. How about :

>>> + * When the write pointer reaches the address just before the limit 
pointer, it gets
>>> + * wrapped around again to the base pointer. This is called a TRBE wrap 
event, which
>>> + * generates a maintenance interrupt when operated in WRAP or STOP mode.

This driver uses FILL mode, where the TRBE stops the trace collection at wrap 
event.
The IRQ handler updates the AUX buffer and re-enables the TRBE with updated 
WRITE and
LIMIT pointers.



+
+static void *arm_trbe_alloc_buffer(struct coresight_device *csdev,
+   struct perf_event *event, void **pages,
+   int nr_pages, bool snapshot)
+{
+    struct trbe_buf *buf;
+    struct page **pglist;
+    int i;
+
+    if ((nr_pages < 2) || (snapshot && (nr_pages & 1)))


This restriction on snapshot could be removed now, since we use the
full buffer.


Dropped only the second condition here i.e (snapshot && (nr_pages & 1).
Just wondering if the aux buffer could work with a single page so that
the first condition can also be dropped.


I think it is good to keep the restriction of 2 pages, as the WRITE_PTR
and the LIMIT_PTR must be page aligned. With a single page, you can't do
much, with writing into a partially filled buffer. This may be added
as a comment to explain the restriction.


+
+static enum trbe_fault_action trbe_get_fault_act(struct perf_output_handle 
*handle)
+{
+    int ec = get_trbe_ec();
+    int bsc = get_trbe_bsc();
+
+    WARN_ON(is_trbe_running());
+    if (is_trbe_trg() || is_trbe_abort())


We seem to be reading the TRBSR every single in these helpers. Could we 
optimise them
by passing the register value in ?


The same goes for get_trbe_ec() and get_trbe_bsc() as well. Probably all
TRBSR field probing helpers should be modified to accept a TRBSR register
value instead.



i.e
 u64 trbsr = get_trbe_status();

 WARN_ON(is_trbe_runnign(trbsr))
 if (is_trbe_trg(trbsr) || is_trbe_abort(trbsr))

For is_trbe_wrap() too


Yes.








We should skip the driver init, if the kernel is unmapped at EL0,
as the TRBE can't safely write to the kernel virtual addressed
buffer when the CPU is running at EL0. This is unlikely, but we
should cover that case.


This should be sufficient or it needs a pr_err() as well ?



Please add a pr_err() message to indicate why this failed. Otherwise
the user could be left with no clue.

Cheers
Suzuki


Re: [PATCH V2 11/11] dts: bindings: Document device tree bindings for Arm TRBE

2021-01-14 Thread Suzuki K Poulose

On 1/14/21 2:07 PM, Rob Herring wrote:

On Wed, Jan 13, 2021 at 09:48:18AM +0530, Anshuman Khandual wrote:

From: Suzuki K Poulose 

Document the device tree bindings for Trace Buffer Extension (TRBE).

Cc: Anshuman Khandual 
Cc: Mathieu Poirier 
Cc: Rob Herring 
Cc: devicet...@vger.kernel.org
Signed-off-by: Suzuki K Poulose 
Signed-off-by: Anshuman Khandual 
---
  Documentation/devicetree/bindings/arm/trbe.yaml | 46 +
  1 file changed, 46 insertions(+)
  create mode 100644 Documentation/devicetree/bindings/arm/trbe.yaml

diff --git a/Documentation/devicetree/bindings/arm/trbe.yaml 
b/Documentation/devicetree/bindings/arm/trbe.yaml
new file mode 100644
index 000..2258595
--- /dev/null
+++ b/Documentation/devicetree/bindings/arm/trbe.yaml
@@ -0,0 +1,46 @@
+# SPDX-License-Identifier: GPL-2.0-only or BSD-2-Clause
+# Copyright 2021, Arm Ltd
+%YAML 1.2
+---
+$id: "http://devicetree.org/schemas/arm/trbe.yaml#;
+$schema: "http://devicetree.org/meta-schemas/core.yaml#;
+
+title: ARM Trace Buffer Extensions
+
+maintainers:
+  - Anshuman Khandual 
+
+description: |
+  Description of TRBE hw


Huh?



Doh ! That was due to a miscommunication between us.
This should be :

description: |
  Arm Trace Buffer Extension (TRBE) is a per CPU component
  for storing trace generated on the CPU to memory. It is
  accessed via CPU system registers. The software can verify
  if it is permitted to use the component by checking the
  TRBIDR register.


+
+properties:
+  $nodename:
+pattern: "trbe"


const: trbe


+  compatible:
+items:
+  - const: arm,trace-buffer-extension


Any versioning to this? Or is that discoverable?



It must be discoverable via ID_AA64DFR0_EL1.TraceBuffer.
The IP is entirely accessed by the CPU system registers. So, any
further changes can be interpreted from the system registers
(including if the access is blocked by a higher exception level).


+
+  interrupts:
+description: |
+   Exactly 1 PPI must be listed. For heterogeneous systems where
+   TRBE is only supported on a subset of the CPUs, please consult
+   the arm,gic-v3 binding for details on describing a PPI partition.
+maxItems: 1
+
+required:
+  - compatible
+  - interrupts
+
+additionalProperties: false
+
+


Extra blank line.


Removed.

Cheers

Suzuki


Re: [PATCH V2 11/11] dts: bindings: Document device tree bindings for Arm TRBE

2021-01-14 Thread Suzuki K Poulose

Hi Rob

On 1/13/21 3:45 PM, Rob Herring wrote:

On Wed, 13 Jan 2021 09:48:18 +0530, Anshuman Khandual wrote:

From: Suzuki K Poulose 

Document the device tree bindings for Trace Buffer Extension (TRBE).

Cc: Anshuman Khandual 
Cc: Mathieu Poirier 
Cc: Rob Herring 
Cc: devicet...@vger.kernel.org
Signed-off-by: Suzuki K Poulose 
Signed-off-by: Anshuman Khandual 
---
  Documentation/devicetree/bindings/arm/trbe.yaml | 46 +
  1 file changed, 46 insertions(+)
  create mode 100644 Documentation/devicetree/bindings/arm/trbe.yaml



My bot found errors running 'make dt_binding_check' on your patch:

yamllint warnings/errors:
./Documentation/devicetree/bindings/arm/trbe.yaml:39:2: [warning] wrong 
indentation: expected 2 but found 1 (indentation)

dtschema/dtc warnings/errors:


Thanks for that. I guess Anshuman can fix this up, with the following patch:

diff --git a/Documentation/devicetree/bindings/arm/trbe.yaml 
b/Documentation/devicetree/bindings/arm/trbe.yaml

index 2258595c40dd..24951e02fa58 100644
--- a/Documentation/devicetree/bindings/arm/trbe.yaml
+++ b/Documentation/devicetree/bindings/arm/trbe.yaml
@@ -36,7 +36,7 @@ additionalProperties: false

 examples:

- - |
+  - |
#include 

trbe {



See https://patchwork.ozlabs.org/patch/1425605

This check can fail if there are any dependencies. The base for a patch
series is generally the most recent rc1.

If you already ran 'make dt_binding_check' and didn't see the above
error(s), then make sure 'yamllint' is installed and dt-schema is up to
date:


I did see the warning, but I thought I fixed it. Sorry about that.

Cheers
Suzuki


Re: [PATCH V2 10/11] coresight: sink: Add TRBE driver

2021-01-13 Thread Suzuki K Poulose

Hi Anshuman,

The driver looks overall good to me. Please find some minor comments below

On 1/13/21 4:18 AM, Anshuman Khandual wrote:

Trace Buffer Extension (TRBE) implements a trace buffer per CPU which is
accessible via the system registers. The TRBE supports different addressing
modes including CPU virtual address and buffer modes including the circular
buffer mode. The TRBE buffer is addressed by a base pointer (TRBBASER_EL1),
an write pointer (TRBPTR_EL1) and a limit pointer (TRBLIMITR_EL1). But the
access to the trace buffer could be prohibited by a higher exception level
(EL3 or EL2), indicated by TRBIDR_EL1.P. The TRBE can also generate a CPU
private interrupt (PPI) on address translation errors and when the buffer
is full. Overall implementation here is inspired from the Arm SPE driver.

Cc: Mathieu Poirier 
Cc: Mike Leach 
Cc: Suzuki K Poulose 
Signed-off-by: Anshuman Khandual 
---
Changes in V2:

- Dropped irq from coresight sysfs documentation
- Renamed get_trbe_limit() as compute_trbe_buffer_limit()
- Dropped SYSTEM_RUNNING check for system_state
- Dropped .data value from arm_trbe_of_match[]
- Dropped [set|get]_trbe_[trig|fill]_mode() helpers
- Dropped clearing TRBSR_FSC_MASK from TRBE status register
- Added a comment in arm_trbe_update_buffer()
- Updated comment for ETE_IGNORE_PACKET
- Updated comment for basic TRBE operation
- Updated TRBE buffer and trigger mode macros
- Restructured trbe_enable_hw()
- Updated trbe_snapshot_offset() to use the entire buffer
- Changed dsb(ish) as dsb(nsh) during the buffer flush
- Renamed set_trbe_flush() as trbe_drain_buffer()
- Renamed trbe_disable_and_drain_local() as trbe_drain_and_disable_local()
- Reworked sync in trbe_enable_hw(), trbe_update_buffer() and 
arm_trbe_irq_handler()

  Documentation/trace/coresight/coresight-trbe.rst |  39 +
  arch/arm64/include/asm/sysreg.h  |   2 +
  drivers/hwtracing/coresight/Kconfig  |  11 +
  drivers/hwtracing/coresight/Makefile |   1 +
  drivers/hwtracing/coresight/coresight-trbe.c | 966 +++
  drivers/hwtracing/coresight/coresight-trbe.h | 216 +
  6 files changed, 1235 insertions(+)
  create mode 100644 Documentation/trace/coresight/coresight-trbe.rst
  create mode 100644 drivers/hwtracing/coresight/coresight-trbe.c
  create mode 100644 drivers/hwtracing/coresight/coresight-trbe.h

diff --git a/Documentation/trace/coresight/coresight-trbe.rst 
b/Documentation/trace/coresight/coresight-trbe.rst
new file mode 100644
index 000..1cbb819
--- /dev/null
+++ b/Documentation/trace/coresight/coresight-trbe.rst
@@ -0,0 +1,39 @@
+.. SPDX-License-Identifier: GPL-2.0
+
+==
+Trace Buffer Extension (TRBE).
+==
+
+:Author:   Anshuman Khandual 
+:Date: November 2020
+
+Hardware Description
+
+
+Trace Buffer Extension (TRBE) is a percpu hardware which captures in system
+memory, CPU traces generated from a corresponding percpu tracing unit. This
+gets plugged in as a coresight sink device because the corresponding trace
+genarators (ETE), are plugged in as source device.
+
+The TRBE is not compliant to CoreSight architecture specifications, but is
+driven via the CoreSight driver framework to support the ETE (which is
+CoreSight compliant) integration.
+
+Sysfs files and directories
+---
+
+The TRBE devices appear on the existing coresight bus alongside the other
+coresight devices::
+
+   >$ ls /sys/bus/coresight/devices
+   trbe0  trbe1  trbe2 trbe3
+
+The ``trbe`` named TRBEs are associated with a CPU.::
+
+   >$ ls /sys/bus/coresight/devices/trbe0/
+align dbm
+
+*Key file items are:-*
+   * ``align``: TRBE write pointer alignment
+   * ``dbm``: TRBE updates memory with access and dirty flags
+
diff --git a/arch/arm64/include/asm/sysreg.h b/arch/arm64/include/asm/sysreg.h
index d60750e7..d7e65f0 100644
--- a/arch/arm64/include/asm/sysreg.h
+++ b/arch/arm64/include/asm/sysreg.h
@@ -97,6 +97,7 @@
  #define SET_PSTATE_UAO(x) __emit_inst(0xd500401f | PSTATE_UAO | ((!!x) 
<< PSTATE_Imm_shift))
  #define SET_PSTATE_SSBS(x)__emit_inst(0xd500401f | PSTATE_SSBS | ((!!x) 
<< PSTATE_Imm_shift))
  #define SET_PSTATE_TCO(x) __emit_inst(0xd500401f | PSTATE_TCO | ((!!x) 
<< PSTATE_Imm_shift))
+#define TSB_CSYNC  __emit_inst(0xd503225f)
  
  #define set_pstate_pan(x)		asm volatile(SET_PSTATE_PAN(x))

  #define set_pstate_uao(x) asm volatile(SET_PSTATE_UAO(x))
@@ -880,6 +881,7 @@
  #define ID_AA64MMFR2_CNP_SHIFT0
  
  /* id_aa64dfr0 */

+#define ID_AA64DFR0_TRBE_SHIFT 44
  #define ID_AA64DFR0_TRACE_FILT_SHIFT  40
  #define ID_AA64DFR0_DOUBLELOCK_SHIFT  36
  #define ID_AA64DFR0_PMSVER_SHIFT  32
diff --git a/drivers/hwtracing/coresight/Kconfig 
b/drivers/hwtracing/coresight/Kconfig
index f154ae7..aa657ab 100644
--- a/dri

Re: [PATCH V2 09/11] coresight: etm-perf: Truncate the perf record if handle has no space

2021-01-13 Thread Suzuki K Poulose

On 1/13/21 4:18 AM, Anshuman Khandual wrote:

While starting off the etm event, just abort and truncate the perf record
if the perf handle as no space left. This avoids configuring both source
and sink devices in case the data cannot be consumed in perf.

Cc: Mathieu Poirier 
Cc: Mike Leach 
Cc: Suzuki K Poulose 
Signed-off-by: Anshuman Khandual 


Reviewed-by: Suzuki K Poulose 


Re: [PATCH V2 08/11] coresight: core: Add support for dedicated percpu sinks

2021-01-13 Thread Suzuki K Poulose

On 1/13/21 4:18 AM, Anshuman Khandual wrote:

Add support for dedicated sinks that are bound to individual CPUs. (e.g,
TRBE). To allow quicker access to the sink for a given CPU bound source,
keep a percpu array of the sink devices. Also, add support for building
a path to the CPU local sink from the ETM.

This adds a new percpu sink type CORESIGHT_DEV_SUBTYPE_SINK_PERCPU_SYSMEM.
This new sink type is exclusively available and can only work with percpu
source type device CORESIGHT_DEV_SUBTYPE_SOURCE_PERCPU_PROC.

This defines a percpu structure that accommodates a single coresight_device
which can be used to store an initialized instance from a sink driver. As
these sinks are exclusively linked and dependent on corresponding percpu
sources devices, they should also be the default sink device during a perf
session.

Outwards device connections are scanned while establishing paths between a
source and a sink device. But such connections are not present for certain
percpu source and sink devices which are exclusively linked and dependent.
Build the path directly and skip connection scanning for such devices.

Cc: Mathieu Poirier 
Cc: Mike Leach 
Cc: Suzuki K Poulose 
Signed-off-by: Anshuman Khandual 
---
  drivers/hwtracing/coresight/coresight-core.c | 14 ++
  include/linux/coresight.h| 12 
  2 files changed, 26 insertions(+)

diff --git a/drivers/hwtracing/coresight/coresight-core.c 
b/drivers/hwtracing/coresight/coresight-core.c
index 0062c89..b300606 100644
--- a/drivers/hwtracing/coresight/coresight-core.c
+++ b/drivers/hwtracing/coresight/coresight-core.c
@@ -23,6 +23,7 @@
  #include "coresight-priv.h"
  
  static DEFINE_MUTEX(coresight_mutex);

+DEFINE_PER_CPU(struct coresight_device *, csdev_sink);
  
  /**

   * struct coresight_node - elements of a path, from source to sink
@@ -784,6 +785,13 @@ static int _coresight_build_path(struct coresight_device 
*csdev,
if (csdev == sink)
goto out;
  
+	if (coresight_is_percpu_source(csdev) && coresight_is_percpu_sink(sink) &&

+   sink == per_cpu(csdev_sink, source_ops(csdev)->cpu_id(csdev))) {
+   _coresight_build_path(sink, sink, path);
+   found = true;
+   goto out;
+   }
+
/* Not a sink - recursively explore each port found on this element */
for (i = 0; i < csdev->pdata->nr_outport; i++) {
struct coresight_device *child_dev;
@@ -998,6 +1006,12 @@ coresight_find_default_sink(struct coresight_device 
*csdev)
  {
int depth = 0;
  
+	if (coresight_is_percpu_source(csdev)) {


On a system without per_cpu sink, this would reset the default sink for the 
source device
every single time and fallback to searching every single time.
So I think it would be better if did check if the def_sink was not set.
We could fold this into the case below may be. i.e,


if (!csdev->def_sink) {
if (coresight_is_percpu_source(csdev))
csdev->def_sink = per_cpu(csdev_sink, 
source_ops(csdev)->cpu_id(csdev));
if (!csdev->def_sink)csdev->def_sink = 
coresight_find_sink(csdev, );
}

Otherwise looks good to me.

Suzuki


Re: [PATCH V2 07/11] arm64: Add TRBE definitions

2021-01-13 Thread Suzuki K Poulose

On 1/13/21 4:18 AM, Anshuman Khandual wrote:

This adds TRBE related registers and corresponding feature macros.

Cc: Mathieu Poirier 
Cc: Mike Leach 
Cc: Suzuki K Poulose 
Signed-off-by: Anshuman Khandual 
---
  arch/arm64/include/asm/sysreg.h | 49 +
  1 file changed, 49 insertions(+)

diff --git a/arch/arm64/include/asm/sysreg.h b/arch/arm64/include/asm/sysreg.h
index 4acff97..d60750e7 100644
--- a/arch/arm64/include/asm/sysreg.h
+++ b/arch/arm64/include/asm/sysreg.h
@@ -329,6 +329,55 @@
  
  /*** End of Statistical Profiling Extension ***/
  
+/*

+ * TRBE Registers
+ */
+#define SYS_TRBLIMITR_EL1  sys_reg(3, 0, 9, 11, 0)
+#define SYS_TRBPTR_EL1 sys_reg(3, 0, 9, 11, 1)
+#define SYS_TRBBASER_EL1   sys_reg(3, 0, 9, 11, 2)
+#define SYS_TRBSR_EL1  sys_reg(3, 0, 9, 11, 3)
+#define SYS_TRBMAR_EL1 sys_reg(3, 0, 9, 11, 4)
+#define SYS_TRBTRG_EL1 sys_reg(3, 0, 9, 11, 6)
+#define SYS_TRBIDR_EL1 sys_reg(3, 0, 9, 11, 7)
+
+#define TRBLIMITR_LIMIT_MASK   GENMASK_ULL(51, 0)
+#define TRBLIMITR_LIMIT_SHIFT  12
+#define TRBLIMITR_NVM  BIT(5)
+#define TRBLIMITR_TRIG_MODE_MASK   GENMASK(1, 0)
+#define TRBLIMITR_TRIG_MODE_SHIFT  2


This must be 3.

Rest looks fine to me

Suzuki


Re: [PATCH v3 09/21] arm64: cpufeature: Add global feature override facility

2021-01-12 Thread Suzuki K Poulose

On 1/12/21 11:51 AM, Marc Zyngier wrote:

On 2021-01-12 11:50, Marc Zyngier wrote:

Hi Suzuki,

On 2021-01-12 09:17, Suzuki K Poulose wrote:

Hi Marc,

On 1/11/21 7:48 PM, Marc Zyngier wrote:


[...]


diff --git a/arch/arm64/kernel/cpufeature.c b/arch/arm64/kernel/cpufeature.c
index 894af60b9669..00d99e593b65 100644
--- a/arch/arm64/kernel/cpufeature.c
+++ b/arch/arm64/kernel/cpufeature.c
@@ -774,6 +774,7 @@ static void __init init_cpu_ftr_reg(u32 sys_reg, u64 new)
  u64 strict_mask = ~0x0ULL;
  u64 user_mask = 0;
  u64 valid_mask = 0;
+    u64 override_val = 0, override_mask = 0;

  const struct arm64_ftr_bits *ftrp;
  struct arm64_ftr_reg *reg = get_arm64_ftr_reg(sys_reg);
@@ -781,9 +782,35 @@ static void __init init_cpu_ftr_reg(u32 sys_reg, u64 new)
  if (!reg)
  return;

+    if (reg->override_mask && reg->override_val) {
+    override_mask = *reg->override_mask;
+    override_val = *reg->override_val;
+    }
+
  for (ftrp = reg->ftr_bits; ftrp->width; ftrp++) {
  u64 ftr_mask = arm64_ftr_mask(ftrp);
  s64 ftr_new = arm64_ftr_value(ftrp, new);
+    s64 ftr_ovr = arm64_ftr_value(ftrp, override_val);
+
+    if ((ftr_mask & override_mask) == ftr_mask) {
+    if (ftr_ovr < ftr_new) {


Here we assume that all the features are FTR_LOWER_SAFE. We could
probably use arm64_ftr_safe_value(ftrp, ftr_new, ftr_ovr) here ?
That would cover us for both HIGHER_SAFE and LOWER_SAFE features.
However that may be restrictive for FTR_EXACT, as we the safe
value would be set to "ftr->safe_val". I guess that may be better
than forcing to use an unsafe value for the boot CPU, which could
anyway conflict with the other CPUs and eventually trigger the
ftr alue to be safe_val.


I like the idea of using the helper, as it cleanups up the code a bit.
However, not being to set a feature to a certain value could be restrictive,
as in general, it means that we can only disable a feature and not adjust
its level of support.

Take PMUVER for example: with the helper, I can't override it from v8.4 to
v8.1. I can only go to v8.0.


Actually, we can only *disable* the PMU altogether. Same question though...


It depends on two things :

1) What is the safe value for an EXACT typed feature ?
Usually, that means either disabled, or the lowest possible value.

2) How is this value consumed ?
  a) i.e, Do we use the per-CPU value
Then none of these changes have any effect
  b) System wide value ?
  Then we get the safe value as "influenced" by the infrastructure.

The safe value we use for EXACT features is exclusively for making sure
that the system uses the safe assumption and thus should be the best
option.

To answer your question, for PMU, it is 0, implies, v8.0. Or we could
update the safe value to -1 (0xf) as the safe value, which is a bit more safer,
kind of implying that the PMU is not a standard one.


Cheers
Suzuki





     M.



Is it something we care about?

Thanks,

    M.






Re: [PATCH v3 09/21] arm64: cpufeature: Add global feature override facility

2021-01-12 Thread Suzuki K Poulose

On 1/12/21 11:50 AM, Marc Zyngier wrote:

Hi Suzuki,

On 2021-01-12 09:17, Suzuki K Poulose wrote:

Hi Marc,

On 1/11/21 7:48 PM, Marc Zyngier wrote:


[...]


diff --git a/arch/arm64/kernel/cpufeature.c b/arch/arm64/kernel/cpufeature.c
index 894af60b9669..00d99e593b65 100644
--- a/arch/arm64/kernel/cpufeature.c
+++ b/arch/arm64/kernel/cpufeature.c
@@ -774,6 +774,7 @@ static void __init init_cpu_ftr_reg(u32 sys_reg, u64 new)
  u64 strict_mask = ~0x0ULL;
  u64 user_mask = 0;
  u64 valid_mask = 0;
+    u64 override_val = 0, override_mask = 0;

  const struct arm64_ftr_bits *ftrp;
  struct arm64_ftr_reg *reg = get_arm64_ftr_reg(sys_reg);
@@ -781,9 +782,35 @@ static void __init init_cpu_ftr_reg(u32 sys_reg, u64 new)
  if (!reg)
  return;

+    if (reg->override_mask && reg->override_val) {
+    override_mask = *reg->override_mask;
+    override_val = *reg->override_val;
+    }
+
  for (ftrp = reg->ftr_bits; ftrp->width; ftrp++) {
  u64 ftr_mask = arm64_ftr_mask(ftrp);
  s64 ftr_new = arm64_ftr_value(ftrp, new);
+    s64 ftr_ovr = arm64_ftr_value(ftrp, override_val);
+
+    if ((ftr_mask & override_mask) == ftr_mask) {
+    if (ftr_ovr < ftr_new) {


Here we assume that all the features are FTR_LOWER_SAFE. We could
probably use arm64_ftr_safe_value(ftrp, ftr_new, ftr_ovr) here ?
That would cover us for both HIGHER_SAFE and LOWER_SAFE features.
However that may be restrictive for FTR_EXACT, as we the safe
value would be set to "ftr->safe_val". I guess that may be better
than forcing to use an unsafe value for the boot CPU, which could
anyway conflict with the other CPUs and eventually trigger the
ftr alue to be safe_val.


I like the idea of using the helper, as it cleanups up the code a bit.
However, not being to set a feature to a certain value could be restrictive,
as in general, it means that we can only disable a feature and not adjust
its level of support.

Take PMUVER for example: with the helper, I can't override it from v8.4 to
v8.1. I can only go to v8.0.


My point is, we set this only for the "init" of cpu features. So, even if we
init to a custom , non-(default-safe) value, the secondary CPUs could scream,
and the system wide safe value could fall back to the "safe" value for EXACT 
features,
no matter what you did to init it.

Also, it will be dangerous for things like PAC, as the lower level value may
not be compatible with the "actual" cpu feature supported.

So simply setting to a lower value for EXACT features is generally not safe,
though I understand some are exceptions.

Suzuki





Is it something we care about?

Thanks,

     M.




Re: [PATCH v1 1/7] coresight: etm-perf: Add support for PID tracing for kernel at EL2

2021-01-12 Thread Suzuki K Poulose

On 1/12/21 8:58 AM, Leo Yan wrote:

Hi Mike,

On Mon, Jan 11, 2021 at 04:22:39PM +, Mike Leach wrote:

[...]


diff --git a/include/linux/coresight-pmu.h b/include/linux/coresight-pmu.h
index b0e35eec6499..927c6285ce5d 100644
--- a/include/linux/coresight-pmu.h
+++ b/include/linux/coresight-pmu.h
@@ -11,16 +11,19 @@
  #define CORESIGHT_ETM_PMU_SEED  0x10

  /* ETMv3.5/PTM's ETMCR config bit */
-#define ETM_OPT_CYCACC  12
-#define ETM_OPT_CTXTID 14
-#define ETM_OPT_TS  28
-#define ETM_OPT_RETSTK 29
+#define ETM_OPT_CYCACC 12
+#define ETM_OPT_CTXTID 14
+#define ETM_OPT_CTXTID_IN_VMID 15


Minor issue here - ETMv3.x / PTM cannot trace CXTID in VMID so this
may better be named ETM4_OPT_CTXTID_IN_VMID, rather than be grouped
with the ETM3.5 options?


I looked into this suggestion but found it's complex than I assumed.
This config bits are not only used for ETMv3.x / PTM, it's also used
as an configuration interface between user space in Perf and kernel
drivers.



Exactly. I believe this problematic. We are stuckwith using the ETM3.x/PTM
config bits for the CS_ETM pmu, which is a bit wierd and the allocation of
the config bits are sparse. The problem with changing them now, will break
older perf tools decoding a perf.data from a newer kernel. I believe we are
stuck with this.

I would recommend simply updating the comment to reflect that, thats the
generic CS PMU ABI for configuration which was initially based on ETM3.x.

Suzuki


Re: [PATCH v3 09/21] arm64: cpufeature: Add global feature override facility

2021-01-12 Thread Suzuki K Poulose

Hi Marc,

On 1/11/21 7:48 PM, Marc Zyngier wrote:

Hi Catalin,

On 2021-01-11 18:41, Catalin Marinas wrote:

Hi Marc,

On Mon, Jan 11, 2021 at 01:27:59PM +, Marc Zyngier wrote:

Add a facility to globally override a feature, no matter what
the HW says. Yes, this is dangerous.


Yeah, it's dangerous. We can make it less so if we only allow safe
values (e.g. lower if FTR_UNSIGNED).


My plan was also to allow non-safe values in order to trigger features
that are not advertised by the HW. But I can understand if you are
reluctant to allow such thing! :D


diff --git a/arch/arm64/include/asm/cpufeature.h 
b/arch/arm64/include/asm/cpufeature.h
index 9a555809b89c..465d2cb63bfc 100644
--- a/arch/arm64/include/asm/cpufeature.h
+++ b/arch/arm64/include/asm/cpufeature.h
@@ -75,6 +75,8 @@ struct arm64_ftr_reg {
 u64    sys_val;
 u64    user_val;
 const struct arm64_ftr_bits    *ftr_bits;
+    u64    *override_val;
+    u64    *override_mask;
 };


At the arm64_ftr_reg level, we don't have any information about the safe
values for a feature. Could we instead move this to arm64_ftr_bits? We
probably only need a single field. When populating the feature values,
we can make sure it doesn't go above the hardware one.

I attempted a feature modification for MTE here, though I dropped the
entire series in the meantime as we clarified the ARM ARM:

https://lore.kernel.org/linux-arm-kernel/20200515171612.1020-24-catalin.mari...@arm.com/

Srinivas copied it in his patch (but forgot to give credit ;)):

https://lore.kernel.org/linux-arm-msm/1610152163-16554-3-git-send-email-sram...@codeaurora.org/

The above adds a filter function but, instead, just use your mechanism in
this series for idreg.feature setting via cmdline. The arm64_ftr_value()
function extracts the hardware value and lowers it if a cmdline argument
was passed.


One thing is that it is not always possible to sanitise the value passed
if it is required very early on, as I do with VHE. But in that case
I actually check that we are VHE capable before starting to poke at
VHE-specific state.

I came up with the following patch on top, which preserves the current
global approach (no per arm64_ftr_bits state), but checks (and alters)
the override as it iterates through the various fields.

For example, if I pass "arm64.nopauth kvm-arm.mode=nvhe id_aa64pfr1.bt=5"
to the FVP, I get the following output:

[    0.00] CPU features: SYS_ID_AA64ISAR1_EL1[31:28]: forced from 1 to 0
[    0.00] CPU features: SYS_ID_AA64ISAR1_EL1[11:8]: forced from 1 to 0
[    0.00] CPU features: SYS_ID_AA64MMFR1_EL1[11:8]: forced from 1 to 0
[    0.00] CPU features: SYS_ID_AA64PFR1_EL1[3:0]: not forcing 1 to 5
[    0.00] CPU features: detected: GIC system register CPU interface
[    0.00] CPU features: detected: Hardware dirty bit management
[    0.00] CPU features: detected: Spectre-v4
[    0.00] CPU features: detected: Branch Target Identification

showing that the PAC features have been downgraded, together with VHE,
but that BTI is still detected as value 5 was obviously bogus.

Thoughts?

     M.

diff --git a/arch/arm64/kernel/cpufeature.c b/arch/arm64/kernel/cpufeature.c
index 894af60b9669..00d99e593b65 100644
--- a/arch/arm64/kernel/cpufeature.c
+++ b/arch/arm64/kernel/cpufeature.c
@@ -774,6 +774,7 @@ static void __init init_cpu_ftr_reg(u32 sys_reg, u64 new)
  u64 strict_mask = ~0x0ULL;
  u64 user_mask = 0;
  u64 valid_mask = 0;
+    u64 override_val = 0, override_mask = 0;

  const struct arm64_ftr_bits *ftrp;
  struct arm64_ftr_reg *reg = get_arm64_ftr_reg(sys_reg);
@@ -781,9 +782,35 @@ static void __init init_cpu_ftr_reg(u32 sys_reg, u64 new)
  if (!reg)
  return;

+    if (reg->override_mask && reg->override_val) {
+    override_mask = *reg->override_mask;
+    override_val = *reg->override_val;
+    }
+
  for (ftrp = reg->ftr_bits; ftrp->width; ftrp++) {
  u64 ftr_mask = arm64_ftr_mask(ftrp);
  s64 ftr_new = arm64_ftr_value(ftrp, new);
+    s64 ftr_ovr = arm64_ftr_value(ftrp, override_val);
+
+    if ((ftr_mask & override_mask) == ftr_mask) {
+    if (ftr_ovr < ftr_new) {


Here we assume that all the features are FTR_LOWER_SAFE. We could
probably use arm64_ftr_safe_value(ftrp, ftr_new, ftr_ovr) here ?
That would cover us for both HIGHER_SAFE and LOWER_SAFE features.
However that may be restrictive for FTR_EXACT, as we the safe
value would be set to "ftr->safe_val". I guess that may be better
than forcing to use an unsafe value for the boot CPU, which could
anyway conflict with the other CPUs and eventually trigger the
ftr alue to be safe_val.

i.e,
ftr_val = arm64_ftr_safe_value(ftrp, ftr_ovr, ftr_new);


Suzuki


Re: [PATCH v1 7/7] perf cs-etm: Detect pid in VMID for kernel running at EL2

2021-01-11 Thread Suzuki K Poulose

Hi Leo

On 1/9/21 7:44 AM, Leo Yan wrote:

From: Suzuki K Poulose 

The pid of the task could be traced as VMID when the kernel is
running at EL2. Teach the decoder to look for vmid when the
context_id is invalid but we have a valid VMID.


Thank you again for cleaning up this ! Please see one comment
below.



Cc: Mike Leach 
Cc: Mathieu Poirier 
Cc: Al Grant 
Co-developed-by: Leo Yan 
Signed-off-by: Suzuki K Poulose 
Signed-off-by: Leo Yan 
---
  .../perf/util/cs-etm-decoder/cs-etm-decoder.c | 32 ---
  1 file changed, 28 insertions(+), 4 deletions(-)

diff --git a/tools/perf/util/cs-etm-decoder/cs-etm-decoder.c 
b/tools/perf/util/cs-etm-decoder/cs-etm-decoder.c
index cd007cc9c283..9e81169dfa76 100644
--- a/tools/perf/util/cs-etm-decoder/cs-etm-decoder.c
+++ b/tools/perf/util/cs-etm-decoder/cs-etm-decoder.c
@@ -6,6 +6,7 @@
   * Author: Mathieu Poirier 
   */
  
+#include 

  #include 
  #include 
  #include 
@@ -500,13 +501,36 @@ cs_etm_decoder__set_tid(struct cs_etm_queue *etmq,
const ocsd_generic_trace_elem *elem,
const uint8_t trace_chan_id)
  {
-   pid_t tid;
+   pid_t tid = -1;
+   u64 pid_fmt;
+   int ret;
  
-	/* Ignore PE_CONTEXT packets that don't have a valid contextID */

-   if (!elem->context.ctxt_id_valid)
+   ret = cs_etm__get_pid_fmt(trace_chan_id, _fmt);
+   if (ret)
+   return OCSD_RESP_FATAL_SYS_ERR;


The patch looks fine to me. I am wondering if this can be cached somewhere in
the etmq to avoid doing the search everytime we hit a CID ? Surely for a 
session,
this woudn't change and thus for the decoder life time.

Cheers
Suzuki


Re: [PATCH v1 6/7] perf cs-etm: Add helper cs_etm__get_pid_fmt()

2021-01-11 Thread Suzuki K Poulose

On 1/9/21 7:44 AM, Leo Yan wrote:

This patch adds helper function cs_etm__get_pid_fmt(), by passing
parameter "traceID", it returns the corresponding PID format.

Signed-off-by: Leo Yan 


Acked-by: Suzuki K Poulose 


---
  tools/perf/util/cs-etm.c | 18 ++
  tools/perf/util/cs-etm.h |  1 +
  2 files changed, 19 insertions(+)

diff --git a/tools/perf/util/cs-etm.c b/tools/perf/util/cs-etm.c
index 8c125134a756..6705d39c8cee 100644
--- a/tools/perf/util/cs-etm.c
+++ b/tools/perf/util/cs-etm.c
@@ -157,6 +157,24 @@ int cs_etm__get_cpu(u8 trace_chan_id, int *cpu)
return 0;
  }
  
+int cs_etm__get_pid_fmt(u8 trace_chan_id, u64 *pid_fmt)

+{
+   struct int_node *inode;
+   u64 *metadata;
+
+   inode = intlist__find(traceid_list, trace_chan_id);
+   if (!inode)
+   return -EINVAL;
+
+   metadata = inode->priv;
+   if (metadata[CS_ETM_MAGIC] == __perf_cs_etmv3_magic)
+   *pid_fmt = metadata[CS_ETM_PID_FMT];
+   else
+   *pid_fmt = metadata[CS_ETMV4_PID_FMT];
+
+   return 0;
+}
+
  void cs_etm__etmq_set_traceid_queue_timestamp(struct cs_etm_queue *etmq,
  u8 trace_chan_id)
  {
diff --git a/tools/perf/util/cs-etm.h b/tools/perf/util/cs-etm.h
index 8cbbea6100a1..98801040175f 100644
--- a/tools/perf/util/cs-etm.h
+++ b/tools/perf/util/cs-etm.h
@@ -175,6 +175,7 @@ struct cs_etm_packet_queue {
  int cs_etm__process_auxtrace_info(union perf_event *event,
  struct perf_session *session);
  int cs_etm__get_cpu(u8 trace_chan_id, int *cpu);
+int cs_etm__get_pid_fmt(u8 trace_chan_id, u64 *pid_fmt);
  int cs_etm__etmq_set_tid(struct cs_etm_queue *etmq,
 pid_t tid, u8 trace_chan_id);
  bool cs_etm__etmq_is_timeless(struct cs_etm_queue *etmq);





Re: [PATCH v1 5/7] perf cs-etm: Fixup PID_FMT when it is zero

2021-01-11 Thread Suzuki K Poulose

On 1/9/21 7:44 AM, Leo Yan wrote:

If the metadata item CS_ETM_PID_FMT/CS_ETMV4_PID_FMT is zero, this means
the perf data file is recorded with old version tool and the tool has
not extended to support the item.

For this case, this patch fixes up PID_FMT entry to set the value as
BIT(ETM_OPT_CTXTID), this info will be delivered to the decoder to
extract PID from packet's field "context_id".

Signed-off-by: Leo Yan 


Acked-by: Suzuki K Poulose 


---
  tools/perf/util/cs-etm.c | 19 +++
  1 file changed, 19 insertions(+)

diff --git a/tools/perf/util/cs-etm.c b/tools/perf/util/cs-etm.c
index 763085db29ae..8c125134a756 100644
--- a/tools/perf/util/cs-etm.c
+++ b/tools/perf/util/cs-etm.c
@@ -7,6 +7,7 @@
   */
  
  #include 

+#include 
  #include 
  #include 
  #include 
@@ -2577,6 +2578,15 @@ int cs_etm__process_auxtrace_info(union perf_event 
*event,
for (k = 0; k < metadata_cpu_array_size; k++)
metadata[j][k] = ptr[i + k];
  
+			/*

+* If the data in CS_ETM_PID_FMT is zero, means the
+* information isn't stored in the data file, this is
+* because the old perf tool hasn't yet supported
+* CS_ETM_PID_FMT.  Fixup the item to option "CTXTID".
+*/
+   if (!metadata[j][CS_ETM_PID_FMT])
+   metadata[j][CS_ETM_PID_FMT] = 
BIT(ETM_OPT_CTXTID);
+
/* The traceID is our handle */
idx = metadata[j][CS_ETM_ETMTRACEIDR];
i += metadata_cpu_array_size;
@@ -2590,6 +2600,15 @@ int cs_etm__process_auxtrace_info(union perf_event 
*event,
for (k = 0; k < metadata_cpu_array_size; k++)
metadata[j][k] = ptr[i + k];
  
+			/*

+* If the data in CS_ETMV4_PID_FMT is zero, means the
+* information isn't stored in the data file, this is
+* because the old perf tool hasn't yet supported
+* CS_ETMV4_PID_FMT.  Fixup the item to option "CTXTID".
+*/
+   if (!metadata[j][CS_ETMV4_PID_FMT])
+   metadata[j][CS_ETMV4_PID_FMT] = 
BIT(ETM_OPT_CTXTID);
+
/* The traceID is our handle */
idx = metadata[j][CS_ETMV4_TRCTRACEIDR];
i += metadata_cpu_array_size;





Re: [PATCH v1 4/7] perf cs-etm: Add PID format into metadata

2021-01-11 Thread Suzuki K Poulose

Hi Leo,

On 1/9/21 7:44 AM, Leo Yan wrote:

It's possible for CoreSight to trace PID in either CONTEXTIDR_EL1 or
CONTEXTIDR_EL2, the PID format info is used to distinguish the PID
is traced in which register.

This patch saves PID format into the metadata when record.


The patch looks good to me. One minor suggestion below



Signed-off-by: Leo Yan 
---
  tools/perf/arch/arm/util/cs-etm.c | 21 +
  tools/perf/util/cs-etm.c  |  2 ++
  tools/perf/util/cs-etm.h  |  2 ++
  3 files changed, 25 insertions(+)

diff --git a/tools/perf/arch/arm/util/cs-etm.c 
b/tools/perf/arch/arm/util/cs-etm.c
index fad7b6e13ccc..ee78df3b1b07 100644
--- a/tools/perf/arch/arm/util/cs-etm.c
+++ b/tools/perf/arch/arm/util/cs-etm.c
@@ -613,6 +613,7 @@ static void cs_etm_get_metadata(int cpu, u32 *offset,
struct cs_etm_recording *ptr =
container_of(itr, struct cs_etm_recording, itr);
struct perf_pmu *cs_etm_pmu = ptr->cs_etm_pmu;
+   u64 pid_fmt;
  
  	/* first see what kind of tracer this cpu is affined to */

if (cs_etm_is_etmv4(itr, cpu)) {
@@ -641,6 +642,16 @@ static void cs_etm_get_metadata(int cpu, u32 *offset,
  metadata_etmv4_ro
  [CS_ETMV4_TRCAUTHSTATUS]);
  
+		/*

+* The PID format will be used when decode the trace data;
+* based on it the decoder will make decision for setting
+* sample's PID as context_id or VMID.
+*/
+   pid_fmt = perf_pmu__format_bits(_etm_pmu->format, "pid");
+   if (!pid_fmt)
+   pid_fmt = 1ULL << ETM_OPT_CTXTID;
+   info->priv[*offset + CS_ETMV4_PID_FMT] = pid_fmt;
+


Given we do this same step twice here in this function and also in patch 2.
I am wondering if this could be made into a small helper function ?

static u64 cs_etm_pmu_format_pid(cs_etm_pm)
{
pid_fmt = perf_pmu__format_bits(_etm_pmu->format, "pid");
/*
 * An older kernel doesn't expose this, so fall back to using
 * CTXTID.
 */
if (!pid_fmt)
pid_fmt = 1ULL << ETM_OPT_CTXTID;
return pid_fmt;
}

Suzuki


Re: [PATCH v1 3/7] perf cs-etm: Calculate per CPU metadata array size

2021-01-10 Thread Suzuki K Poulose

On 1/9/21 7:44 AM, Leo Yan wrote:

The metadata array can be extended over time and the tool, if using the
predefined macro (like CS_ETMV4_PRIV_MAX for ETMv4) as metadata array
size to copy data, it can cause compatible issue within different
versions of perf tool.

E.g. we recorded a data file with an old version tool, afterwards if
use the new version perf tool to parse the file, since the metadata
array has been extended and the macro CS_ETMV4_PRIV_MAX has been
altered, if use it to parse the perf data with old format, this will
lead to mismatch.

To maintain backward compatibility, this patch calculates per CPU
metadata array size on the runtime, the calculation is based on the
info stored in the data file so that it's reliable.

Signed-off-by: Leo Yan 


Looks good to me.

Acked-by: Suzuki K Poulose 



[PATCH v7 10/28] coresight: etm4x: Add commentary on the registers

2021-01-10 Thread Suzuki K Poulose
As we are about define a switch..case table for individual register
access by offset for implementing the system instruction support,
document the possible set of registers for each group to make
it easier to correlate.

Cc: Mike Leach 
Reviewed-by: Mathieu Poirier 
Signed-off-by: Suzuki K Poulose 
---
Changes since v3
 - Fix commit description spelling mistake (Mathieu)
---
 drivers/hwtracing/coresight/coresight-etm4x.h | 21 ---
 1 file changed, 13 insertions(+), 8 deletions(-)

diff --git a/drivers/hwtracing/coresight/coresight-etm4x.h 
b/drivers/hwtracing/coresight/coresight-etm4x.h
index b6854f6fd666..3c2b49ffabc8 100644
--- a/drivers/hwtracing/coresight/coresight-etm4x.h
+++ b/drivers/hwtracing/coresight/coresight-etm4x.h
@@ -45,13 +45,13 @@
 #define TRCVDSACCTLR   0x0A4
 #define TRCVDARCCTLR   0x0A8
 /* Derived resources registers */
-#define TRCSEQEVRn(n)  (0x100 + (n * 4))
+#define TRCSEQEVRn(n)  (0x100 + (n * 4)) /* n = 0-2 */
 #define TRCSEQRSTEVR   0x118
 #define TRCSEQSTR  0x11C
 #define TRCEXTINSELR   0x120
-#define TRCCNTRLDVRn(n)(0x140 + (n * 4))
-#define TRCCNTCTLRn(n) (0x150 + (n * 4))
-#define TRCCNTVRn(n)   (0x160 + (n * 4))
+#define TRCCNTRLDVRn(n)(0x140 + (n * 4)) /* n = 0-3 */
+#define TRCCNTCTLRn(n) (0x150 + (n * 4)) /* n = 0-3 */
+#define TRCCNTVRn(n)   (0x160 + (n * 4)) /* n = 0-3 */
 /* ID registers */
 #define TRCIDR80x180
 #define TRCIDR90x184
@@ -60,7 +60,7 @@
 #define TRCIDR12   0x190
 #define TRCIDR13   0x194
 #define TRCIMSPEC0 0x1C0
-#define TRCIMSPECn(n)  (0x1C0 + (n * 4))
+#define TRCIMSPECn(n)  (0x1C0 + (n * 4)) /* n = 1-7 */
 #define TRCIDR00x1E0
 #define TRCIDR10x1E4
 #define TRCIDR20x1E8
@@ -69,9 +69,12 @@
 #define TRCIDR50x1F4
 #define TRCIDR60x1F8
 #define TRCIDR70x1FC
-/* Resource selection registers */
+/*
+ * Resource selection registers, n = 2-31.
+ * First pair (regs 0, 1) is always present and is reserved.
+ */
 #define TRCRSCTLRn(n)  (0x200 + (n * 4))
-/* Single-shot comparator registers */
+/* Single-shot comparator registers, n = 0-7 */
 #define TRCSSCCRn(n)   (0x280 + (n * 4))
 #define TRCSSCSRn(n)   (0x2A0 + (n * 4))
 #define TRCSSPCICRn(n) (0x2C0 + (n * 4))
@@ -81,11 +84,13 @@
 #define TRCPDCR0x310
 #define TRCPDSR0x314
 /* Trace registers (0x318-0xEFC) */
-/* Comparator registers */
+/* Address Comparator registers n = 0-15 */
 #define TRCACVRn(n)(0x400 + (n * 8))
 #define TRCACATRn(n)   (0x480 + (n * 8))
+/* Data Value Comparator Value registers, n = 0-7 */
 #define TRCDVCVRn(n)   (0x500 + (n * 16))
 #define TRCDVCMRn(n)   (0x580 + (n * 16))
+/* ContextID/Virtual ContextID comparators, n = 0-7 */
 #define TRCCIDCVRn(n)  (0x600 + (n * 8))
 #define TRCVMIDCVRn(n) (0x640 + (n * 8))
 #define TRCCIDCCTLR0   0x680
-- 
2.24.1



[PATCH v7 06/28] coresight: Convert claim/disclaim operations to use access wrappers

2021-01-10 Thread Suzuki K Poulose
Convert the generic CLAIM tag management APIs to use the
device access layer abstraction.

Cc: Mike Leach 
Reviewed-by: Mathieu Poirier 
Signed-off-by: Suzuki K Poulose 
---
Changes since V3:
 - Removed WARN_ON(!csdev) check. (Mathieu)
 - Fixed indentation (Mathieu)
---
 drivers/hwtracing/coresight/coresight-catu.c  |  6 +-
 drivers/hwtracing/coresight/coresight-core.c  | 66 +++
 .../hwtracing/coresight/coresight-cti-core.c  | 17 +++--
 drivers/hwtracing/coresight/coresight-etb10.c |  4 +-
 .../coresight/coresight-etm3x-core.c  |  8 ++-
 .../coresight/coresight-etm4x-core.c  |  4 +-
 .../hwtracing/coresight/coresight-funnel.c|  6 +-
 .../coresight/coresight-replicator.c  | 12 ++--
 .../hwtracing/coresight/coresight-tmc-etf.c   | 10 +--
 .../hwtracing/coresight/coresight-tmc-etr.c   |  4 +-
 include/linux/coresight.h | 16 ++---
 11 files changed, 91 insertions(+), 62 deletions(-)

diff --git a/drivers/hwtracing/coresight/coresight-catu.c 
b/drivers/hwtracing/coresight/coresight-catu.c
index d6097454d399..9a0b9ce4a7da 100644
--- a/drivers/hwtracing/coresight/coresight-catu.c
+++ b/drivers/hwtracing/coresight/coresight-catu.c
@@ -412,6 +412,7 @@ static int catu_enable_hw(struct catu_drvdata *drvdata, 
void *data)
u32 control, mode;
struct etr_buf *etr_buf = data;
struct device *dev = >csdev->dev;
+   struct coresight_device *csdev = drvdata->csdev;
 
if (catu_wait_for_ready(drvdata))
dev_warn(dev, "Timeout while waiting for READY\n");
@@ -422,7 +423,7 @@ static int catu_enable_hw(struct catu_drvdata *drvdata, 
void *data)
return -EBUSY;
}
 
-   rc = coresight_claim_device_unlocked(drvdata->base);
+   rc = coresight_claim_device_unlocked(csdev);
if (rc)
return rc;
 
@@ -466,9 +467,10 @@ static int catu_disable_hw(struct catu_drvdata *drvdata)
 {
int rc = 0;
struct device *dev = >csdev->dev;
+   struct coresight_device *csdev = drvdata->csdev;
 
catu_write_control(drvdata, 0);
-   coresight_disclaim_device_unlocked(drvdata->base);
+   coresight_disclaim_device_unlocked(csdev);
if (catu_wait_for_ready(drvdata)) {
dev_info(dev, "Timeout while waiting for READY\n");
rc = -EAGAIN;
diff --git a/drivers/hwtracing/coresight/coresight-core.c 
b/drivers/hwtracing/coresight/coresight-core.c
index 74985068f325..0062c8935653 100644
--- a/drivers/hwtracing/coresight/coresight-core.c
+++ b/drivers/hwtracing/coresight/coresight-core.c
@@ -145,30 +145,32 @@ static int coresight_find_link_outport(struct 
coresight_device *csdev,
return -ENODEV;
 }
 
-static inline u32 coresight_read_claim_tags(void __iomem *base)
+static inline u32 coresight_read_claim_tags(struct coresight_device *csdev)
 {
-   return readl_relaxed(base + CORESIGHT_CLAIMCLR);
+   return csdev_access_relaxed_read32(>access, CORESIGHT_CLAIMCLR);
 }
 
-static inline bool coresight_is_claimed_self_hosted(void __iomem *base)
+static inline bool coresight_is_claimed_self_hosted(struct coresight_device 
*csdev)
 {
-   return coresight_read_claim_tags(base) == CORESIGHT_CLAIM_SELF_HOSTED;
+   return coresight_read_claim_tags(csdev) == CORESIGHT_CLAIM_SELF_HOSTED;
 }
 
-static inline bool coresight_is_claimed_any(void __iomem *base)
+static inline bool coresight_is_claimed_any(struct coresight_device *csdev)
 {
-   return coresight_read_claim_tags(base) != 0;
+   return coresight_read_claim_tags(csdev) != 0;
 }
 
-static inline void coresight_set_claim_tags(void __iomem *base)
+static inline void coresight_set_claim_tags(struct coresight_device *csdev)
 {
-   writel_relaxed(CORESIGHT_CLAIM_SELF_HOSTED, base + CORESIGHT_CLAIMSET);
+   csdev_access_relaxed_write32(>access, 
CORESIGHT_CLAIM_SELF_HOSTED,
+CORESIGHT_CLAIMSET);
isb();
 }
 
-static inline void coresight_clear_claim_tags(void __iomem *base)
+static inline void coresight_clear_claim_tags(struct coresight_device *csdev)
 {
-   writel_relaxed(CORESIGHT_CLAIM_SELF_HOSTED, base + CORESIGHT_CLAIMCLR);
+   csdev_access_relaxed_write32(>access, 
CORESIGHT_CLAIM_SELF_HOSTED,
+CORESIGHT_CLAIMCLR);
isb();
 }
 
@@ -182,27 +184,33 @@ static inline void coresight_clear_claim_tags(void 
__iomem *base)
  * Called with CS_UNLOCKed for the component.
  * Returns : 0 on success
  */
-int coresight_claim_device_unlocked(void __iomem *base)
+int coresight_claim_device_unlocked(struct coresight_device *csdev)
 {
-   if (coresight_is_claimed_any(base))
+   if (WARN_ON(!csdev))
+   return -EINVAL;
+
+   if (coresight_is_claimed_any(csdev))
return -EBUSY;
 
-   coresight_set_claim_tags(base);
-   if (coresight_is_claimed_self

[PATCH v7 20/28] coresight: etm4x: Expose trcdevarch via sysfs

2021-01-10 Thread Suzuki K Poulose
Expose the TRCDEVARCH register via the sysfs for component
detection. Given that the TRCIDR1 may not completely identify
the ETM component and instead need to use TRCDEVARCH, expose
this via sysfs for tools to use it for identification.

Cc: Mike Leach 
Reviewed-by: Mathieu Poirier 
Signed-off-by: Suzuki K Poulose 
---
Changes since v5:
 - Move the trcdevarch to mgmt/ instead of the trcidr (Mike L)
 - Add sysfs documentation for the new register (Mike L)
---
 .../ABI/testing/sysfs-bus-coresight-devices-etm4x | 8 
 drivers/hwtracing/coresight/coresight-etm4x-sysfs.c   | 1 +
 2 files changed, 9 insertions(+)

diff --git a/Documentation/ABI/testing/sysfs-bus-coresight-devices-etm4x 
b/Documentation/ABI/testing/sysfs-bus-coresight-devices-etm4x
index 881f0cd99ce4..8e53a32f8150 100644
--- a/Documentation/ABI/testing/sysfs-bus-coresight-devices-etm4x
+++ b/Documentation/ABI/testing/sysfs-bus-coresight-devices-etm4x
@@ -371,6 +371,14 @@ Contact:   Mathieu Poirier 
 Description:   (Read) Print the content of the Device ID Register
(0xFC8).  The value is taken directly from the HW.
 
+What:  /sys/bus/coresight/devices/etm/mgmt/trcdevarch
+Date:  January 2021
+KernelVersion: 5.12
+Contact:   Mathieu Poirier 
+Description:   (Read) Print the content of the Device Architecture Register
+   (offset 0xFBC).  The value is taken directly read
+   from the HW.
+
 What:  /sys/bus/coresight/devices/etm/mgmt/trcdevtype
 Date:  April 2015
 KernelVersion: 4.01
diff --git a/drivers/hwtracing/coresight/coresight-etm4x-sysfs.c 
b/drivers/hwtracing/coresight/coresight-etm4x-sysfs.c
index 45aeeac2f50e..b646d53a3133 100644
--- a/drivers/hwtracing/coresight/coresight-etm4x-sysfs.c
+++ b/drivers/hwtracing/coresight/coresight-etm4x-sysfs.c
@@ -2442,6 +2442,7 @@ static struct attribute *coresight_etmv4_mgmt_attrs[] = {
coresight_etm4x_reg(trcoslsr, TRCOSLSR),
coresight_etm4x_reg(trcconfig, TRCCONFIGR),
coresight_etm4x_reg(trctraceid, TRCTRACEIDR),
+   coresight_etm4x_reg(trcdevarch, TRCDEVARCH),
NULL,
 };
 
-- 
2.24.1



[PATCH v7 18/28] coresight: etm4x: Detect access early on the target CPU

2021-01-10 Thread Suzuki K Poulose
In preparation to detect the support for system instruction
support, move the detection of the device access to the target
CPU.

Cc: Mike Leach 
Reviewed-by: Mathieu Poirier 
Signed-off-by: Suzuki K Poulose 
---
Changes since v3
 - Name constructs etm4_xx instead of etm_** (Mathieu)
---
 .../coresight/coresight-etm4x-core.c  | 45 ---
 1 file changed, 40 insertions(+), 5 deletions(-)

diff --git a/drivers/hwtracing/coresight/coresight-etm4x-core.c 
b/drivers/hwtracing/coresight/coresight-etm4x-core.c
index 862b424aef26..7adc9bd03eb5 100644
--- a/drivers/hwtracing/coresight/coresight-etm4x-core.c
+++ b/drivers/hwtracing/coresight/coresight-etm4x-core.c
@@ -59,6 +59,11 @@ static u64 etm4_get_access_type(struct etmv4_config *config);
 
 static enum cpuhp_state hp_online;
 
+struct etm4_init_arg {
+   struct etmv4_drvdata*drvdata;
+   struct csdev_access *csa;
+};
+
 /*
  * Check if TRCSSPCICRn(i) is implemented for a given instance.
  *
@@ -776,6 +781,22 @@ static const struct coresight_ops etm4_cs_ops = {
.source_ops = _source_ops,
 };
 
+static bool etm4_init_iomem_access(struct etmv4_drvdata *drvdata,
+  struct csdev_access *csa)
+{
+   *csa = CSDEV_ACCESS_IOMEM(drvdata->base);
+   return true;
+}
+
+static bool etm4_init_csdev_access(struct etmv4_drvdata *drvdata,
+  struct csdev_access *csa)
+{
+   if (drvdata->base)
+   return etm4_init_iomem_access(drvdata, csa);
+
+   return false;
+}
+
 static void etm4_init_arch_data(void *info)
 {
u32 etmidr0;
@@ -784,11 +805,22 @@ static void etm4_init_arch_data(void *info)
u32 etmidr3;
u32 etmidr4;
u32 etmidr5;
-   struct etmv4_drvdata *drvdata = info;
-   struct csdev_access tmp_csa = CSDEV_ACCESS_IOMEM(drvdata->base);
-   struct csdev_access *csa = _csa;
+   struct etm4_init_arg *init_arg = info;
+   struct etmv4_drvdata *drvdata;
+   struct csdev_access *csa;
int i;
 
+   drvdata = init_arg->drvdata;
+   csa = init_arg->csa;
+
+   /*
+* If we are unable to detect the access mechanism,
+* or unable to detect the trace unit type, fail
+* early.
+*/
+   if (!etm4_init_csdev_access(drvdata, csa))
+   return;
+
/* Make sure all registers are accessible */
etm4_os_unlock_csa(drvdata, csa);
etm4_cs_unlock(drvdata, csa);
@@ -1634,6 +1666,7 @@ static int etm4_probe(struct amba_device *adev, const 
struct amba_id *id)
struct etmv4_drvdata *drvdata;
struct resource *res = >res;
struct coresight_desc desc = { 0 };
+   struct etm4_init_arg init_arg = { 0 };
 
drvdata = devm_kzalloc(dev, sizeof(*drvdata), GFP_KERNEL);
if (!drvdata)
@@ -1661,7 +1694,6 @@ static int etm4_probe(struct amba_device *adev, const 
struct amba_id *id)
return PTR_ERR(base);
 
drvdata->base = base;
-   desc.access = CSDEV_ACCESS_IOMEM(base);
 
spin_lock_init(>spinlock);
 
@@ -1673,8 +1705,11 @@ static int etm4_probe(struct amba_device *adev, const 
struct amba_id *id)
if (!desc.name)
return -ENOMEM;
 
+   init_arg.drvdata = drvdata;
+   init_arg.csa = 
+
if (smp_call_function_single(drvdata->cpu,
-   etm4_init_arch_data,  drvdata, 1))
+   etm4_init_arch_data,  _arg, 1))
dev_err(dev, "ETM arch init failed\n");
 
if (etm4_arch_supported(drvdata->arch) == false)
-- 
2.24.1



[PATCH v7 25/28] coresight: etm4x: Add support for sysreg only devices

2021-01-10 Thread Suzuki K Poulose
Add support for devices with system instruction access only.
They don't have a memory mapped interface and thus are not
AMBA devices. System register access is not permitted to
TRCPDCR and thus skip access to them.

Cc: Mike Leach 
Reviewed-by: Mathieu Poirier 
Signed-off-by: Suzuki K Poulose 
---
Changes since v5:
 - Rebased to accommodate check_arch_features().
   Added comments to explain why we don't pass PID for system
   register based devices.
Changes since v4
 - Add "remove" callback for platform_driver.
 - Dropped Reviewed-by tag from Mathieu due to the above

Changes since v3
 - Improve comment over "TRCPDCR" usage with sysreg
 - Rename etm_xx => etm4_xx
 - Update the compatible to "arm,coresight-etm4x-sysreg"
---
 .../coresight/coresight-etm4x-core.c  | 68 +--
 1 file changed, 63 insertions(+), 5 deletions(-)

diff --git a/drivers/hwtracing/coresight/coresight-etm4x-core.c 
b/drivers/hwtracing/coresight/coresight-etm4x-core.c
index 63a5e11b5f3f..3d3165dd09d4 100644
--- a/drivers/hwtracing/coresight/coresight-etm4x-core.c
+++ b/drivers/hwtracing/coresight/coresight-etm4x-core.c
@@ -27,6 +27,7 @@
 #include 
 #include 
 #include 
+#include 
 #include 
 #include 
 
@@ -1736,9 +1737,6 @@ static int etm4_probe(struct device *dev, void __iomem 
*base, u32 etm_pid)
return -ENOMEM;
}
 
-   if (fwnode_property_present(dev_fwnode(dev), "qcom,skip-power-up"))
-   drvdata->skip_power_up = true;
-
drvdata->base = base;
 
spin_lock_init(>spinlock);
@@ -1762,6 +1760,11 @@ static int etm4_probe(struct device *dev, void __iomem 
*base, u32 etm_pid)
if (!drvdata->arch)
return -EINVAL;
 
+   /* TRCPDCR is not accessible with system instructions. */
+   if (!desc.access.io_mem ||
+   fwnode_property_present(dev_fwnode(dev), "qcom,skip-power-up"))
+   drvdata->skip_power_up = true;
+
etm4_init_trace_id(drvdata);
etm4_set_default(>config);
 
@@ -1820,6 +1823,25 @@ static int etm4_probe_amba(struct amba_device *adev, 
const struct amba_id *id)
return ret;
 }
 
+static int etm4_probe_platform_dev(struct platform_device *pdev)
+{
+   int ret;
+
+   pm_runtime_get_noresume(>dev);
+   pm_runtime_set_active(>dev);
+   pm_runtime_enable(>dev);
+
+   /*
+* System register based devices could match the
+* HW by reading appropriate registers on the HW
+* and thus we could skip the PID.
+*/
+   ret = etm4_probe(>dev, NULL, 0);
+
+   pm_runtime_put(>dev);
+   return ret;
+}
+
 static struct amba_cs_uci_id uci_id_etm4[] = {
{
/*  ETMv4 UCI data */
@@ -1869,6 +1891,17 @@ static int __exit etm4_remove_amba(struct amba_device 
*adev)
return 0;
 }
 
+static int __exit etm4_remove_platform_dev(struct platform_device *pdev)
+{
+   int ret = 0;
+   struct etmv4_drvdata *drvdata = dev_get_drvdata(>dev);
+
+   if (drvdata)
+   ret = etm4_remove_dev(drvdata);
+   pm_runtime_disable(>dev);
+   return ret;
+}
+
 static const struct amba_id etm4_ids[] = {
CS_AMBA_ID(0x000bb95d), /* Cortex-A53 */
CS_AMBA_ID(0x000bb95e), /* Cortex-A57 */
@@ -1901,6 +1934,21 @@ static struct amba_driver etm4x_amba_driver = {
.id_table   = etm4_ids,
 };
 
+static const struct of_device_id etm4_sysreg_match[] = {
+   { .compatible   = "arm,coresight-etm4x-sysreg" },
+   {}
+};
+
+static struct platform_driver etm4_platform_driver = {
+   .probe  = etm4_probe_platform_dev,
+   .remove = etm4_remove_platform_dev,
+   .driver = {
+   .name   = "coresight-etm4x",
+   .of_match_table = etm4_sysreg_match,
+   .suppress_bind_attrs= true,
+   },
+};
+
 static int __init etm4x_init(void)
 {
int ret;
@@ -1913,16 +1961,26 @@ static int __init etm4x_init(void)
 
ret = amba_driver_register(_amba_driver);
if (ret) {
-   pr_err("Error registering etm4x driver\n");
-   etm4_pm_clear();
+   pr_err("Error registering etm4x AMBA driver\n");
+   goto clear_pm;
}
 
+   ret = platform_driver_register(_platform_driver);
+   if (!ret)
+   return 0;
+
+   pr_err("Error registering etm4x platform driver\n");
+   amba_driver_unregister(_amba_driver);
+
+clear_pm:
+   etm4_pm_clear();
return ret;
 }
 
 static void __exit etm4x_exit(void)
 {
amba_driver_unregister(_amba_driver);
+   platform_driver_unregister(_platform_driver);
etm4_pm_clear();
 }
 
-- 
2.24.1



[PATCH v7 27/28] arm64: Add TRFCR_ELx definitions

2021-01-10 Thread Suzuki K Poulose
From: Jonathan Zhou 

Add definitions for the Arm v8.4 SelfHosted trace extensions registers.

Acked-by: Catalin Marinas 
Cc: Will Deacon 
Acked-by: Mathieu Poirier 
Signed-off-by: Jonathan Zhou 
[ split the register definitions to separate patch
  rename some of the symbols ]
Signed-off-by: Suzuki K Poulose 
---
 arch/arm64/include/asm/sysreg.h | 11 +++
 1 file changed, 11 insertions(+)

diff --git a/arch/arm64/include/asm/sysreg.h b/arch/arm64/include/asm/sysreg.h
index 8b5e7e5c3cc8..4acff97519b9 100644
--- a/arch/arm64/include/asm/sysreg.h
+++ b/arch/arm64/include/asm/sysreg.h
@@ -191,6 +191,7 @@
 #define SYS_GCR_EL1sys_reg(3, 0, 1, 0, 6)
 
 #define SYS_ZCR_EL1sys_reg(3, 0, 1, 2, 0)
+#define SYS_TRFCR_EL1  sys_reg(3, 0, 1, 2, 1)
 
 #define SYS_TTBR0_EL1  sys_reg(3, 0, 2, 0, 0)
 #define SYS_TTBR1_EL1  sys_reg(3, 0, 2, 0, 1)
@@ -471,6 +472,7 @@
 
 #define SYS_SCTLR_EL2  sys_reg(3, 4, 1, 0, 0)
 #define SYS_ZCR_EL2sys_reg(3, 4, 1, 2, 0)
+#define SYS_TRFCR_EL2  sys_reg(3, 4, 1, 2, 1)
 #define SYS_DACR32_EL2 sys_reg(3, 4, 3, 0, 0)
 #define SYS_SPSR_EL2   sys_reg(3, 4, 4, 0, 0)
 #define SYS_ELR_EL2sys_reg(3, 4, 4, 0, 1)
@@ -829,6 +831,7 @@
 #define ID_AA64MMFR2_CNP_SHIFT 0
 
 /* id_aa64dfr0 */
+#define ID_AA64DFR0_TRACE_FILT_SHIFT   40
 #define ID_AA64DFR0_DOUBLELOCK_SHIFT   36
 #define ID_AA64DFR0_PMSVER_SHIFT   32
 #define ID_AA64DFR0_CTX_CMPS_SHIFT 28
@@ -1003,6 +1006,14 @@
 /* Safe value for MPIDR_EL1: Bit31:RES1, Bit30:U:0, Bit24:MT:0 */
 #define SYS_MPIDR_SAFE_VAL (BIT(31))
 
+#define TRFCR_ELx_TS_SHIFT 5
+#define TRFCR_ELx_TS_VIRTUAL   ((0x1UL) << TRFCR_ELx_TS_SHIFT)
+#define TRFCR_ELx_TS_GUEST_PHYSICAL((0x2UL) << TRFCR_ELx_TS_SHIFT)
+#define TRFCR_ELx_TS_PHYSICAL  ((0x3UL) << TRFCR_ELx_TS_SHIFT)
+#define TRFCR_EL2_CX   BIT(3)
+#define TRFCR_ELx_ExTREBIT(1)
+#define TRFCR_ELx_E0TREBIT(0)
+
 #ifdef __ASSEMBLY__
 
.irp
num,0,1,2,3,4,5,6,7,8,9,10,11,12,13,14,15,16,17,18,19,20,21,22,23,24,25,26,27,28,29,30
-- 
2.24.1



[PATCH v7 23/28] coresight: etm4x: Refactor probing routine

2021-01-10 Thread Suzuki K Poulose
CoreSight ETM with system register access may not have a
memory mapped i/o access. Refactor the ETM specific probing
into a common routine to allow reusing the code for such ETMs.

Cc: Mike Leach 
Reviewed-by: Mathieu Poirier 
Signed-off-by: Suzuki K Poulose 
---
Changes since v4:
 - Refactor the "remove" callback for AMBA driver, for reuse by
   platform_driver to follow
---
 .../coresight/coresight-etm4x-core.c  | 62 ---
 1 file changed, 39 insertions(+), 23 deletions(-)

diff --git a/drivers/hwtracing/coresight/coresight-etm4x-core.c 
b/drivers/hwtracing/coresight/coresight-etm4x-core.c
index 6cbc6e0c0819..06edb9554c27 100644
--- a/drivers/hwtracing/coresight/coresight-etm4x-core.c
+++ b/drivers/hwtracing/coresight/coresight-etm4x-core.c
@@ -1708,14 +1708,11 @@ static void etm4_pm_clear(void)
}
 }
 
-static int etm4_probe(struct amba_device *adev, const struct amba_id *id)
+static int etm4_probe(struct device *dev, void __iomem *base, u32 etm_pid)
 {
int ret;
-   void __iomem *base;
-   struct device *dev = >dev;
struct coresight_platform_data *pdata = NULL;
struct etmv4_drvdata *drvdata;
-   struct resource *res = >res;
struct coresight_desc desc = { 0 };
struct etm4_init_arg init_arg = { 0 };
 
@@ -1739,11 +1736,6 @@ static int etm4_probe(struct amba_device *adev, const 
struct amba_id *id)
if (fwnode_property_present(dev_fwnode(dev), "qcom,skip-power-up"))
drvdata->skip_power_up = true;
 
-   /* Validity for the resource is already checked by the AMBA core */
-   base = devm_ioremap_resource(dev, res);
-   if (IS_ERR(base))
-   return PTR_ERR(base);
-
drvdata->base = base;
 
spin_lock_init(>spinlock);
@@ -1773,7 +1765,7 @@ static int etm4_probe(struct amba_device *adev, const 
struct amba_id *id)
if (IS_ERR(pdata))
return PTR_ERR(pdata);
 
-   adev->dev.platform_data = pdata;
+   dev->platform_data = pdata;
 
desc.type = CORESIGHT_DEV_TYPE_SOURCE;
desc.subtype.source_subtype = CORESIGHT_DEV_SUBTYPE_SOURCE_PROC;
@@ -1793,7 +1785,6 @@ static int etm4_probe(struct amba_device *adev, const 
struct amba_id *id)
 
etmdrvdata[drvdata->cpu] = drvdata;
 
-   pm_runtime_put(>dev);
dev_info(>csdev->dev, "CPU%d: ETM v%d.%d initialized\n",
 drvdata->cpu, ETM_ARCH_MAJOR_VERSION(drvdata->arch),
 ETM_ARCH_MINOR_VERSION(drvdata->arch));
@@ -1803,11 +1794,30 @@ static int etm4_probe(struct amba_device *adev, const 
struct amba_id *id)
drvdata->boot_enable = true;
}
 
-   etm4_check_arch_features(drvdata, id->id);
+   etm4_check_arch_features(drvdata, etm_pid);
 
return 0;
 }
 
+static int etm4_probe_amba(struct amba_device *adev, const struct amba_id *id)
+{
+   void __iomem *base;
+   struct device *dev = >dev;
+   struct resource *res = >res;
+   int ret;
+
+   /* Validity for the resource is already checked by the AMBA core */
+   base = devm_ioremap_resource(dev, res);
+   if (IS_ERR(base))
+   return PTR_ERR(base);
+
+   ret = etm4_probe(dev, base, id->id);
+   if (!ret)
+   pm_runtime_put(>dev);
+
+   return ret;
+}
+
 static struct amba_cs_uci_id uci_id_etm4[] = {
{
/*  ETMv4 UCI data */
@@ -1824,15 +1834,12 @@ static void clear_etmdrvdata(void *info)
etmdrvdata[cpu] = NULL;
 }
 
-static int etm4_remove(struct amba_device *adev)
+static int __exit etm4_remove_dev(struct etmv4_drvdata *drvdata)
 {
-   struct etmv4_drvdata *drvdata = dev_get_drvdata(>dev);
-
etm_perf_symlink(drvdata->csdev, false);
-
/*
-* Taking hotplug lock here to avoid racing between etm4_remove and
-* CPU hotplug call backs.
+* Taking hotplug lock here to avoid racing between etm4_remove_dev()
+* and CPU hotplug call backs.
 */
cpus_read_lock();
/*
@@ -1851,6 +1858,15 @@ static int etm4_remove(struct amba_device *adev)
return 0;
 }
 
+static int __exit etm4_remove_amba(struct amba_device *adev)
+{
+   struct etmv4_drvdata *drvdata = dev_get_drvdata(>dev);
+
+   if (drvdata)
+   return etm4_remove_dev(drvdata);
+   return 0;
+}
+
 static const struct amba_id etm4_ids[] = {
CS_AMBA_ID(0x000bb95d), /* Cortex-A53 */
CS_AMBA_ID(0x000bb95e), /* Cortex-A57 */
@@ -1872,14 +1888,14 @@ static const struct amba_id etm4_ids[] = {
 
 MODULE_DEVICE_TABLE(amba, etm4_ids);
 
-static struct amba_driver etm4x_driver = {
+static struct amba_driver etm4x_amba_driver = {
.drv = {
.name   = "coresight-etm4x",
.owner  = THIS_MODULE,
.suppress_bind_attrs =

[PATCH v7 16/28] coresight: etm4x: Clean up exception level masks

2021-01-10 Thread Suzuki K Poulose
etm4_get_access_type() calculates the exception level bits
for use in address comparator registers. This is also used
by the TRCVICTLR register by shifting to the required position.

This patch cleans up the logic to make etm4_get_access_type()
calcualte a generic mask which can be used by all users by
shifting to their field.

No functional changes intended.

Reviewed-by: Mathieu Poirier 
Signed-off-by: Suzuki K Poulose 
---
Changes since v3:
  - Fix errors in victlr ns_mask setting.
Changes since v2:
  - Fix the duplicate shift. More commentary
---
 .../coresight/coresight-etm4x-core.c  | 47 +--
 .../coresight/coresight-etm4x-sysfs.c | 12 ++---
 drivers/hwtracing/coresight/coresight-etm4x.h | 47 ---
 3 files changed, 60 insertions(+), 46 deletions(-)

diff --git a/drivers/hwtracing/coresight/coresight-etm4x-core.c 
b/drivers/hwtracing/coresight/coresight-etm4x-core.c
index 3e3733c650a7..548bd71c2168 100644
--- a/drivers/hwtracing/coresight/coresight-etm4x-core.c
+++ b/drivers/hwtracing/coresight/coresight-etm4x-core.c
@@ -959,20 +959,16 @@ static void etm4_init_arch_data(void *info)
etm4_cs_lock(drvdata, csa);
 }
 
+static inline u32 etm4_get_victlr_access_type(struct etmv4_config *config)
+{
+   return etm4_get_access_type(config) << TRCVICTLR_EXLEVEL_SHIFT;
+}
+
 /* Set ELx trace filter access in the TRCVICTLR register */
 static void etm4_set_victlr_access(struct etmv4_config *config)
 {
-   u64 access_type;
-
-   config->vinst_ctrl &= ~(ETM_EXLEVEL_S_VICTLR_MASK | 
ETM_EXLEVEL_NS_VICTLR_MASK);
-
-   /*
-* TRCVICTLR::EXLEVEL_NS:EXLEVELS: Set kernel / user filtering
-* bits in vinst_ctrl, same bit pattern as TRCACATRn values returned by
-* etm4_get_access_type() but with a relative shift in this register.
-*/
-   access_type = etm4_get_access_type(config) << 
ETM_EXLEVEL_LSHIFT_TRCVICTLR;
-   config->vinst_ctrl |= (u32)access_type;
+   config->vinst_ctrl &= ~TRCVICTLR_EXLEVEL_MASK;
+   config->vinst_ctrl |= etm4_get_victlr_access_type(config);
 }
 
 static void etm4_set_default_config(struct etmv4_config *config)
@@ -1002,12 +998,9 @@ static u64 etm4_get_ns_access_type(struct etmv4_config 
*config)
u64 access_type = 0;
 
/*
-* EXLEVEL_NS, bits[15:12]
-* The Exception levels are:
-*   Bit[12] Exception level 0 - Application
-*   Bit[13] Exception level 1 - OS
-*   Bit[14] Exception level 2 - Hypervisor
-*   Bit[15] Never implemented
+* EXLEVEL_NS, for NonSecure Exception levels.
+* The mask here is a generic value and must be
+* shifted to the corresponding field for the registers
 */
if (!is_kernel_in_hyp_mode()) {
/* Stay away from hypervisor mode for non-VHE */
@@ -1024,20 +1017,26 @@ static u64 etm4_get_ns_access_type(struct etmv4_config 
*config)
return access_type;
 }
 
+/*
+ * Construct the exception level masks for a given config.
+ * This must be shifted to the corresponding register field
+ * for usage.
+ */
 static u64 etm4_get_access_type(struct etmv4_config *config)
 {
-   u64 access_type = etm4_get_ns_access_type(config);
-
-   /* All supported secure ELs are excluded */
-   access_type |= (u64)config->s_ex_level << TRCACATR_EXLEVEL_SHIFT;
+   /* All Secure exception levels are excluded from the trace */
+   return etm4_get_ns_access_type(config) | (u64)config->s_ex_level;
+}
 
-   return access_type;
+static u64 etm4_get_comparator_access_type(struct etmv4_config *config)
+{
+   return etm4_get_access_type(config) << TRCACATR_EXLEVEL_SHIFT;
 }
 
 static void etm4_set_comparator_filter(struct etmv4_config *config,
   u64 start, u64 stop, int comparator)
 {
-   u64 access_type = etm4_get_access_type(config);
+   u64 access_type = etm4_get_comparator_access_type(config);
 
/* First half of default address comparator */
config->addr_val[comparator] = start;
@@ -1072,7 +1071,7 @@ static void etm4_set_start_stop_filter(struct 
etmv4_config *config,
   enum etm_addr_type type)
 {
int shift;
-   u64 access_type = etm4_get_access_type(config);
+   u64 access_type = etm4_get_comparator_access_type(config);
 
/* Configure the comparator */
config->addr_val[comparator] = address;
diff --git a/drivers/hwtracing/coresight/coresight-etm4x-sysfs.c 
b/drivers/hwtracing/coresight/coresight-etm4x-sysfs.c
index e8fdda45ffca..45aeeac2f50e 100644
--- a/drivers/hwtracing/coresight/coresight-etm4x-sysfs.c
+++ b/drivers/hwtracing/coresight/coresight-etm4x-sysfs.c
@@ -743,7 +743,7 @@ static ssize_t s_exlevel_vinst_show(struct device *dev,
struct etmv4_drvdata *drvdata = dev_get_drvdata(dev->parent);
struct etmv4_config *config = >config;
 

[PATCH v7 24/28] coresight: etm4x: Run arch feature detection on the CPU

2021-01-10 Thread Suzuki K Poulose
As we are about to add support for system register based devices,
we don't get an AMBA pid. So, the detection code could check
the system registers running on the CPU to check for the architecture
specific features. Thus we move the arch feature detection to
run on the CPU. We cannot always read the PID from the HW, as the
PID could be overridden by DT for broken devices. So, use the
PID from AMBA layer if available.

Cc: Mathieu Poirier 
Cc: Mike Leach 
Cc: liuqi...@huawei.com
Signed-off-by: Suzuki K Poulose 
---
Changes since v7:
 - Fixed typo in commit description
---
 drivers/hwtracing/coresight/coresight-etm4x-core.c | 6 --
 1 file changed, 4 insertions(+), 2 deletions(-)

diff --git a/drivers/hwtracing/coresight/coresight-etm4x-core.c 
b/drivers/hwtracing/coresight/coresight-etm4x-core.c
index 06edb9554c27..63a5e11b5f3f 100644
--- a/drivers/hwtracing/coresight/coresight-etm4x-core.c
+++ b/drivers/hwtracing/coresight/coresight-etm4x-core.c
@@ -60,6 +60,7 @@ static u64 etm4_get_access_type(struct etmv4_config *config);
 static enum cpuhp_state hp_online;
 
 struct etm4_init_arg {
+   unsigned intpid;
struct etmv4_drvdata*drvdata;
struct csdev_access *csa;
 };
@@ -884,6 +885,8 @@ static void etm4_init_arch_data(void *info)
etm4_os_unlock_csa(drvdata, csa);
etm4_cs_unlock(drvdata, csa);
 
+   etm4_check_arch_features(drvdata, init_arg->pid);
+
/* find all capabilities of the tracing unit */
etmidr0 = etm4x_relaxed_read32(csa, TRCIDR0);
 
@@ -1750,6 +1753,7 @@ static int etm4_probe(struct device *dev, void __iomem 
*base, u32 etm_pid)
 
init_arg.drvdata = drvdata;
init_arg.csa = 
+   init_arg.pid = etm_pid;
 
if (smp_call_function_single(drvdata->cpu,
etm4_init_arch_data,  _arg, 1))
@@ -1794,8 +1798,6 @@ static int etm4_probe(struct device *dev, void __iomem 
*base, u32 etm_pid)
drvdata->boot_enable = true;
}
 
-   etm4_check_arch_features(drvdata, etm_pid);
-
return 0;
 }
 
-- 
2.24.1



[PATCH v7 21/28] coresight: etm4x: Add necessary synchronization for sysreg access

2021-01-10 Thread Suzuki K Poulose
As per the specification any update to the TRCPRGCTLR must be synchronized
by a context synchronization event (in our case an explicist ISB) before
the TRCSTATR is checked.

Cc: Mike Leach 
Reviewed-by: Mathieu Poirier 
Signed-off-by: Suzuki K Poulose 
---
 drivers/hwtracing/coresight/coresight-etm4x-core.c | 13 +
 1 file changed, 13 insertions(+)

diff --git a/drivers/hwtracing/coresight/coresight-etm4x-core.c 
b/drivers/hwtracing/coresight/coresight-etm4x-core.c
index 5d3041eccc14..3b17feb86d8a 100644
--- a/drivers/hwtracing/coresight/coresight-etm4x-core.c
+++ b/drivers/hwtracing/coresight/coresight-etm4x-core.c
@@ -284,6 +284,15 @@ static int etm4_enable_hw(struct etmv4_drvdata *drvdata)
/* Disable the trace unit before programming trace registers */
etm4x_relaxed_write32(csa, 0, TRCPRGCTLR);
 
+   /*
+* If we use system instructions, we need to synchronize the
+* write to the TRCPRGCTLR, before accessing the TRCSTATR.
+* See ARM IHI0064F, section
+* "4.3.7 Synchronization of register updates"
+*/
+   if (!csa->io_mem)
+   isb();
+
/* wait for TRCSTATR.IDLE to go up */
if (coresight_timeout(csa, TRCSTATR, TRCSTATR_IDLE_BIT, 1))
dev_err(etm_dev,
@@ -362,6 +371,10 @@ static int etm4_enable_hw(struct etmv4_drvdata *drvdata)
/* Enable the trace unit */
etm4x_relaxed_write32(csa, 1, TRCPRGCTLR);
 
+   /* Synchronize the register updates for sysreg access */
+   if (!csa->io_mem)
+   isb();
+
/* wait for TRCSTATR.IDLE to go back down to '0' */
if (coresight_timeout(csa, TRCSTATR, TRCSTATR_IDLE_BIT, 0))
dev_err(etm_dev,
-- 
2.24.1



[PATCH v7 05/28] coresight: Convert coresight_timeout to use access abstraction

2021-01-10 Thread Suzuki K Poulose
Convert the generic routines to use the new access abstraction layer
gradually, starting with coresigth_timeout.

Cc: Mike Leach 
Reviewed-by: Mathieu Poirier 
Signed-off-by: Suzuki K Poulose 
---
Changes since v4:
 - Remove stacking of parameters in coresight.h
Changes since v3:
 - Fix style : stacking of parameters (Mathieu)
---
 drivers/hwtracing/coresight/coresight-catu.c  |  5 ++--
 drivers/hwtracing/coresight/coresight-core.c  | 13 
 drivers/hwtracing/coresight/coresight-etb10.c |  5 ++--
 .../coresight/coresight-etm4x-core.c  | 30 ---
 drivers/hwtracing/coresight/coresight-stm.c   |  3 +-
 .../hwtracing/coresight/coresight-tmc-core.c  | 15 ++
 drivers/hwtracing/coresight/coresight-tpiu.c  |  4 +--
 include/linux/coresight.h | 11 +--
 8 files changed, 54 insertions(+), 32 deletions(-)

diff --git a/drivers/hwtracing/coresight/coresight-catu.c 
b/drivers/hwtracing/coresight/coresight-catu.c
index 867c932c7b26..d6097454d399 100644
--- a/drivers/hwtracing/coresight/coresight-catu.c
+++ b/drivers/hwtracing/coresight/coresight-catu.c
@@ -401,8 +401,9 @@ static const struct attribute_group *catu_groups[] = {
 
 static inline int catu_wait_for_ready(struct catu_drvdata *drvdata)
 {
-   return coresight_timeout(drvdata->base,
-CATU_STATUS, CATU_STATUS_READY, 1);
+   struct csdev_access *csa = >csdev->access;
+
+   return coresight_timeout(csa, CATU_STATUS, CATU_STATUS_READY, 1);
 }
 
 static int catu_enable_hw(struct catu_drvdata *drvdata, void *data)
diff --git a/drivers/hwtracing/coresight/coresight-core.c 
b/drivers/hwtracing/coresight/coresight-core.c
index a38af8f0831b..74985068f325 100644
--- a/drivers/hwtracing/coresight/coresight-core.c
+++ b/drivers/hwtracing/coresight/coresight-core.c
@@ -1418,23 +1418,24 @@ static void coresight_remove_conns(struct 
coresight_device *csdev)
 }
 
 /**
- * coresight_timeout - loop until a bit has changed to a specific state.
- * @addr: base address of the area of interest.
- * @offset: address of a register, starting from @addr.
+ * coresight_timeout - loop until a bit has changed to a specific register
+ * state.
+ * @csa: coresight device access for the device
+ * @offset: Offset of the register from the base of the device.
  * @position: the position of the bit of interest.
  * @value: the value the bit should have.
  *
  * Return: 0 as soon as the bit has taken the desired state or -EAGAIN if
  * TIMEOUT_US has elapsed, which ever happens first.
  */
-
-int coresight_timeout(void __iomem *addr, u32 offset, int position, int value)
+int coresight_timeout(struct csdev_access *csa, u32 offset,
+ int position, int value)
 {
int i;
u32 val;
 
for (i = TIMEOUT_US; i > 0; i--) {
-   val = __raw_readl(addr + offset);
+   val = csdev_access_read32(csa, offset);
/* waiting on the bit to go from 0 to 1 */
if (value) {
if (val & BIT(position))
diff --git a/drivers/hwtracing/coresight/coresight-etb10.c 
b/drivers/hwtracing/coresight/coresight-etb10.c
index cc742561a986..0f664aeeda93 100644
--- a/drivers/hwtracing/coresight/coresight-etb10.c
+++ b/drivers/hwtracing/coresight/coresight-etb10.c
@@ -252,6 +252,7 @@ static void __etb_disable_hw(struct etb_drvdata *drvdata)
 {
u32 ffcr;
struct device *dev = >csdev->dev;
+   struct csdev_access *csa = >csdev->access;
 
CS_UNLOCK(drvdata->base);
 
@@ -263,7 +264,7 @@ static void __etb_disable_hw(struct etb_drvdata *drvdata)
ffcr |= ETB_FFCR_FON_MAN;
writel_relaxed(ffcr, drvdata->base + ETB_FFCR);
 
-   if (coresight_timeout(drvdata->base, ETB_FFCR, ETB_FFCR_BIT, 0)) {
+   if (coresight_timeout(csa, ETB_FFCR, ETB_FFCR_BIT, 0)) {
dev_err(dev,
"timeout while waiting for completion of Manual Flush\n");
}
@@ -271,7 +272,7 @@ static void __etb_disable_hw(struct etb_drvdata *drvdata)
/* disable trace capture */
writel_relaxed(0x0, drvdata->base + ETB_CTL_REG);
 
-   if (coresight_timeout(drvdata->base, ETB_FFSR, ETB_FFSR_BIT, 1)) {
+   if (coresight_timeout(csa, ETB_FFSR, ETB_FFSR_BIT, 1)) {
dev_err(dev,
"timeout while waiting for Formatter to Stop\n");
}
diff --git a/drivers/hwtracing/coresight/coresight-etm4x-core.c 
b/drivers/hwtracing/coresight/coresight-etm4x-core.c
index ebd41bb7ce1c..4df8d6c04b81 100644
--- a/drivers/hwtracing/coresight/coresight-etm4x-core.c
+++ b/drivers/hwtracing/coresight/coresight-etm4x-core.c
@@ -217,7 +217,9 @@ static int etm4_enable_hw(struct etmv4_drvdata *drvdata)
 {
int i, rc;
struct etmv4_config *config = >config;
-   struct device *etm_dev = >csdev->dev;
+   struct coresight_device *csdev

[PATCH v7 07/28] coresight: etm4x: Always read the registers on the host CPU

2021-01-10 Thread Suzuki K Poulose
As we are about to add support for sysreg access to ETM4.4+ components,
make sure that we read the registers only on the host CPU.

Cc: Mike Leach 
Reviewed-by: Mathieu Poirier 
Signed-off-by: Suzuki K Poulose 
---
 .../coresight/coresight-etm4x-sysfs.c | 23 ---
 1 file changed, 10 insertions(+), 13 deletions(-)

diff --git a/drivers/hwtracing/coresight/coresight-etm4x-sysfs.c 
b/drivers/hwtracing/coresight/coresight-etm4x-sysfs.c
index 989ce7b8ade7..c4781d4e5886 100644
--- a/drivers/hwtracing/coresight/coresight-etm4x-sysfs.c
+++ b/drivers/hwtracing/coresight/coresight-etm4x-sysfs.c
@@ -2344,23 +2344,20 @@ static u32 etmv4_cross_read(const struct device *dev, 
u32 offset)
return reg.data;
 }
 
-#define coresight_etm4x_reg(name, offset)  \
-   coresight_simple_reg32(struct etmv4_drvdata, name, offset)
-
 #define coresight_etm4x_cross_read(name, offset)   \
coresight_simple_func(struct etmv4_drvdata, etmv4_cross_read,   \
  name, offset)
 
-coresight_etm4x_reg(trcpdcr, TRCPDCR);
-coresight_etm4x_reg(trcpdsr, TRCPDSR);
-coresight_etm4x_reg(trclsr, TRCLSR);
-coresight_etm4x_reg(trcauthstatus, TRCAUTHSTATUS);
-coresight_etm4x_reg(trcdevid, TRCDEVID);
-coresight_etm4x_reg(trcdevtype, TRCDEVTYPE);
-coresight_etm4x_reg(trcpidr0, TRCPIDR0);
-coresight_etm4x_reg(trcpidr1, TRCPIDR1);
-coresight_etm4x_reg(trcpidr2, TRCPIDR2);
-coresight_etm4x_reg(trcpidr3, TRCPIDR3);
+coresight_etm4x_cross_read(trcpdcr, TRCPDCR);
+coresight_etm4x_cross_read(trcpdsr, TRCPDSR);
+coresight_etm4x_cross_read(trclsr, TRCLSR);
+coresight_etm4x_cross_read(trcauthstatus, TRCAUTHSTATUS);
+coresight_etm4x_cross_read(trcdevid, TRCDEVID);
+coresight_etm4x_cross_read(trcdevtype, TRCDEVTYPE);
+coresight_etm4x_cross_read(trcpidr0, TRCPIDR0);
+coresight_etm4x_cross_read(trcpidr1, TRCPIDR1);
+coresight_etm4x_cross_read(trcpidr2, TRCPIDR2);
+coresight_etm4x_cross_read(trcpidr3, TRCPIDR3);
 coresight_etm4x_cross_read(trcoslsr, TRCOSLSR);
 coresight_etm4x_cross_read(trcconfig, TRCCONFIGR);
 coresight_etm4x_cross_read(trctraceid, TRCTRACEIDR);
-- 
2.24.1



[PATCH v7 22/28] coresight: etm4x: Detect system instructions support

2021-01-10 Thread Suzuki K Poulose
ETM v4.4 onwards adds support for system instruction access
to the ETM. Detect the support on an ETM and switch to using the
mode when available.

Cc: Mike Leach 
Reviewed-by: Mathieu Poirier 
Signed-off-by: Suzuki K Poulose 
---
 .../coresight/coresight-etm4x-core.c  | 39 +++
 1 file changed, 39 insertions(+)

diff --git a/drivers/hwtracing/coresight/coresight-etm4x-core.c 
b/drivers/hwtracing/coresight/coresight-etm4x-core.c
index 3b17feb86d8a..6cbc6e0c0819 100644
--- a/drivers/hwtracing/coresight/coresight-etm4x-core.c
+++ b/drivers/hwtracing/coresight/coresight-etm4x-core.c
@@ -782,6 +782,37 @@ static const struct coresight_ops etm4_cs_ops = {
.source_ops = _source_ops,
 };
 
+static inline bool cpu_supports_sysreg_trace(void)
+{
+   u64 dfr0 = read_sysreg_s(SYS_ID_AA64DFR0_EL1);
+
+   return ((dfr0 >> ID_AA64DFR0_TRACEVER_SHIFT) & 0xfUL) > 0;
+}
+
+static bool etm4_init_sysreg_access(struct etmv4_drvdata *drvdata,
+   struct csdev_access *csa)
+{
+   u32 devarch;
+
+   if (!cpu_supports_sysreg_trace())
+   return false;
+
+   /*
+* ETMs implementing sysreg access must implement TRCDEVARCH.
+*/
+   devarch = read_etm4x_sysreg_const_offset(TRCDEVARCH);
+   if ((devarch & ETM_DEVARCH_ID_MASK) != ETM_DEVARCH_ETMv4x_ARCH)
+   return false;
+   *csa = (struct csdev_access) {
+   .io_mem = false,
+   .read   = etm4x_sysreg_read,
+   .write  = etm4x_sysreg_write,
+   };
+
+   drvdata->arch = etm_devarch_to_arch(devarch);
+   return true;
+}
+
 static bool etm4_init_iomem_access(struct etmv4_drvdata *drvdata,
   struct csdev_access *csa)
 {
@@ -812,9 +843,17 @@ static bool etm4_init_iomem_access(struct etmv4_drvdata 
*drvdata,
 static bool etm4_init_csdev_access(struct etmv4_drvdata *drvdata,
   struct csdev_access *csa)
 {
+   /*
+* Always choose the memory mapped io, if there is
+* a memory map to prevent sysreg access on broken
+* systems.
+*/
if (drvdata->base)
return etm4_init_iomem_access(drvdata, csa);
 
+   if (etm4_init_sysreg_access(drvdata, csa))
+   return true;
+
return false;
 }
 
-- 
2.24.1



[PATCH v7 19/28] coresight: etm4x: Use TRCDEVARCH for component discovery

2021-01-10 Thread Suzuki K Poulose
We have been using TRCIDR1 for detecting the ETM version. This
is in preparation for the future IP support.

Cc: Mike Leach 
Reviewed-by: Mathieu Poirier 
Signed-off-by: Suzuki K Poulose 
---
 .../coresight/coresight-etm4x-core.c  | 46 +--
 1 file changed, 23 insertions(+), 23 deletions(-)

diff --git a/drivers/hwtracing/coresight/coresight-etm4x-core.c 
b/drivers/hwtracing/coresight/coresight-etm4x-core.c
index 7adc9bd03eb5..5d3041eccc14 100644
--- a/drivers/hwtracing/coresight/coresight-etm4x-core.c
+++ b/drivers/hwtracing/coresight/coresight-etm4x-core.c
@@ -153,18 +153,6 @@ static void etm4_cs_unlock(struct etmv4_drvdata *drvdata,
CS_UNLOCK(csa->base);
 }
 
-static bool etm4_arch_supported(u8 arch)
-{
-   /* Mask out the minor version number */
-   switch (arch & 0xf0) {
-   case ETM_ARCH_V4:
-   break;
-   default:
-   return false;
-   }
-   return true;
-}
-
 static int etm4_cpu_id(struct coresight_device *csdev)
 {
struct etmv4_drvdata *drvdata = dev_get_drvdata(csdev->dev.parent);
@@ -784,6 +772,26 @@ static const struct coresight_ops etm4_cs_ops = {
 static bool etm4_init_iomem_access(struct etmv4_drvdata *drvdata,
   struct csdev_access *csa)
 {
+   u32 devarch = readl_relaxed(drvdata->base + TRCDEVARCH);
+   u32 idr1 = readl_relaxed(drvdata->base + TRCIDR1);
+
+   /*
+* All ETMs must implement TRCDEVARCH to indicate that
+* the component is an ETMv4. To support any broken
+* implementations we fall back to TRCIDR1 check, which
+* is not really reliable.
+*/
+   if ((devarch & ETM_DEVARCH_ID_MASK) == ETM_DEVARCH_ETMv4x_ARCH) {
+   drvdata->arch = etm_devarch_to_arch(devarch);
+   } else {
+   pr_warn("CPU%d: ETM4x incompatible TRCDEVARCH: %x, falling back 
to TRCIDR1\n",
+   smp_processor_id(), devarch);
+
+   if (ETM_TRCIDR1_ARCH_MAJOR(idr1) != ETM_TRCIDR1_ARCH_ETMv4)
+   return false;
+   drvdata->arch = etm_trcidr_to_arch(idr1);
+   }
+
*csa = CSDEV_ACCESS_IOMEM(drvdata->base);
return true;
 }
@@ -800,7 +808,6 @@ static bool etm4_init_csdev_access(struct etmv4_drvdata 
*drvdata,
 static void etm4_init_arch_data(void *info)
 {
u32 etmidr0;
-   u32 etmidr1;
u32 etmidr2;
u32 etmidr3;
u32 etmidr4;
@@ -865,14 +872,6 @@ static void etm4_init_arch_data(void *info)
/* TSSIZE, bits[28:24] Global timestamp size field */
drvdata->ts_size = BMVAL(etmidr0, 24, 28);
 
-   /* base architecture of trace unit */
-   etmidr1 = etm4x_relaxed_read32(csa, TRCIDR1);
-   /*
-* TRCARCHMIN, bits[7:4] architecture the minor version number
-* TRCARCHMAJ, bits[11:8] architecture major versin number
-*/
-   drvdata->arch = BMVAL(etmidr1, 4, 11);
-
/* maximum size of resources */
etmidr2 = etm4x_relaxed_read32(csa, TRCIDR2);
/* CIDSIZE, bits[9:5] Indicates the Context ID size */
@@ -1712,7 +1711,7 @@ static int etm4_probe(struct amba_device *adev, const 
struct amba_id *id)
etm4_init_arch_data,  _arg, 1))
dev_err(dev, "ETM arch init failed\n");
 
-   if (etm4_arch_supported(drvdata->arch) == false)
+   if (!drvdata->arch)
return -EINVAL;
 
etm4_init_trace_id(drvdata);
@@ -1744,7 +1743,8 @@ static int etm4_probe(struct amba_device *adev, const 
struct amba_id *id)
 
pm_runtime_put(>dev);
dev_info(>csdev->dev, "CPU%d: ETM v%d.%d initialized\n",
-drvdata->cpu, drvdata->arch >> 4, drvdata->arch & 0xf);
+drvdata->cpu, ETM_ARCH_MAJOR_VERSION(drvdata->arch),
+ETM_ARCH_MINOR_VERSION(drvdata->arch));
 
if (boot_enable) {
coresight_enable(drvdata->csdev);
-- 
2.24.1



[PATCH v7 15/28] coresight: etm4x: Cleanup secure exception level masks

2021-01-10 Thread Suzuki K Poulose
We rely on the ETM architecture version to decide whether
Secure EL2 is available on the CPU for excluding the level
for address comparators and viewinst main control register.
We must instead use the TRCDIDR3.EXLEVEL_S field to detect
the supported levels.

Reviewed-by: Mathieu Poirier 
Signed-off-by: Suzuki K Poulose 
---
 drivers/hwtracing/coresight/coresight-etm4x-core.c | 13 +++--
 drivers/hwtracing/coresight/coresight-etm4x.h  |  6 --
 2 files changed, 7 insertions(+), 12 deletions(-)

diff --git a/drivers/hwtracing/coresight/coresight-etm4x-core.c 
b/drivers/hwtracing/coresight/coresight-etm4x-core.c
index feea52938a7b..3e3733c650a7 100644
--- a/drivers/hwtracing/coresight/coresight-etm4x-core.c
+++ b/drivers/hwtracing/coresight/coresight-etm4x-core.c
@@ -840,7 +840,6 @@ static void etm4_init_arch_data(void *info)
 * TRCARCHMAJ, bits[11:8] architecture major versin number
 */
drvdata->arch = BMVAL(etmidr1, 4, 11);
-   drvdata->config.arch = drvdata->arch;
 
/* maximum size of resources */
etmidr2 = etm4x_relaxed_read32(csa, TRCIDR2);
@@ -856,6 +855,7 @@ static void etm4_init_arch_data(void *info)
drvdata->ccitmin = BMVAL(etmidr3, 0, 11);
/* EXLEVEL_S, bits[19:16] Secure state instruction tracing */
drvdata->s_ex_level = BMVAL(etmidr3, 16, 19);
+   drvdata->config.s_ex_level = drvdata->s_ex_level;
/* EXLEVEL_NS, bits[23:20] Non-secure state instruction tracing */
drvdata->ns_ex_level = BMVAL(etmidr3, 20, 23);
 
@@ -1027,16 +1027,9 @@ static u64 etm4_get_ns_access_type(struct etmv4_config 
*config)
 static u64 etm4_get_access_type(struct etmv4_config *config)
 {
u64 access_type = etm4_get_ns_access_type(config);
-   u64 s_hyp = (config->arch & 0x0f) >= 0x4 ? ETM_EXLEVEL_S_HYP : 0;
 
-   /*
-* EXLEVEL_S, bits[11:8], don't trace anything happening
-* in secure state.
-*/
-   access_type |= (ETM_EXLEVEL_S_APP   |
-   ETM_EXLEVEL_S_OS|
-   s_hyp   |
-   ETM_EXLEVEL_S_MON);
+   /* All supported secure ELs are excluded */
+   access_type |= (u64)config->s_ex_level << TRCACATR_EXLEVEL_SHIFT;
 
return access_type;
 }
diff --git a/drivers/hwtracing/coresight/coresight-etm4x.h 
b/drivers/hwtracing/coresight/coresight-etm4x.h
index fba3c02eea0b..29cd27f53e72 100644
--- a/drivers/hwtracing/coresight/coresight-etm4x.h
+++ b/drivers/hwtracing/coresight/coresight-etm4x.h
@@ -557,6 +557,8 @@
 /* PowerDown Control Register bits */
 #define TRCPDCR_PU BIT(3)
 
+#define TRCACATR_EXLEVEL_SHIFT 8
+
 /* secure state access levels - TRCACATRn */
 #define ETM_EXLEVEL_S_APP  BIT(8)
 #define ETM_EXLEVEL_S_OS   BIT(9)
@@ -631,7 +633,7 @@ enum etm_impdef_type {
  * @vmid_mask0:VM ID comparator mask for comparator 0-3.
  * @vmid_mask1:VM ID comparator mask for comparator 4-7.
  * @ext_inp:   External input selection.
- * @arch:  ETM architecture version (for arch dependent config).
+ * @s_ex_level: Secure ELs where tracing is supported.
  */
 struct etmv4_config {
u32 mode;
@@ -675,7 +677,7 @@ struct etmv4_config {
u32 vmid_mask0;
u32 vmid_mask1;
u32 ext_inp;
-   u8  arch;
+   u8  s_ex_level;
 };
 
 /**
-- 
2.24.1



[PATCH v7 17/28] coresight: etm4x: Handle ETM architecture version

2021-01-10 Thread Suzuki K Poulose
We are about to rely on TRCDEVARCH for detecting the ETM
and its architecture version, falling back to TRCIDR1 if
the former is not implemented (in older broken implementations).

Also, we use the architecture version information to
make some decisions. Streamline the architecture version
handling by adding helpers.

Reviewed-by: Mathieu Poirier 
Signed-off-by: Suzuki K Poulose 
---
 .../coresight/coresight-etm4x-core.c  |  2 +-
 drivers/hwtracing/coresight/coresight-etm4x.h | 60 ++-
 2 files changed, 58 insertions(+), 4 deletions(-)

diff --git a/drivers/hwtracing/coresight/coresight-etm4x-core.c 
b/drivers/hwtracing/coresight/coresight-etm4x-core.c
index 548bd71c2168..862b424aef26 100644
--- a/drivers/hwtracing/coresight/coresight-etm4x-core.c
+++ b/drivers/hwtracing/coresight/coresight-etm4x-core.c
@@ -917,7 +917,7 @@ static void etm4_init_arch_data(void *info)
 * Otherwise for values 0x1 and above the number is N + 1 as per v4.2.
 */
drvdata->nr_resource = BMVAL(etmidr4, 16, 19);
-   if ((drvdata->arch < ETM4X_ARCH_4V3) || (drvdata->nr_resource > 0))
+   if ((drvdata->arch < ETM_ARCH_V4_3) || (drvdata->nr_resource > 0))
drvdata->nr_resource += 1;
/*
 * NUMSSCC, bits[23:20] the number of single-shot
diff --git a/drivers/hwtracing/coresight/coresight-etm4x.h 
b/drivers/hwtracing/coresight/coresight-etm4x.h
index 91b82002e260..0af60571aa23 100644
--- a/drivers/hwtracing/coresight/coresight-etm4x.h
+++ b/drivers/hwtracing/coresight/coresight-etm4x.h
@@ -468,7 +468,6 @@
 #define ETM_MAX_RES_SEL32
 #define ETM_MAX_SS_CMP 8
 
-#define ETM_ARCH_V40x40
 #define ETMv4_SYNC_MASK0x1F
 #define ETM_CYC_THRESHOLD_MASK 0xFFF
 #define ETM_CYC_THRESHOLD_DEFAULT   0x100
@@ -593,8 +592,63 @@
 #define TRCVICTLR_EXLEVEL_S_MASK   (ETM_EXLEVEL_S_MASK << 
TRCVICTLR_EXLEVEL_SHIFT)
 #define TRCVICTLR_EXLEVEL_NS_MASK  (ETM_EXLEVEL_NS_MASK << 
TRCVICTLR_EXLEVEL_SHIFT)
 
+#define ETM_TRCIDR1_ARCH_MAJOR_SHIFT   8
+#define ETM_TRCIDR1_ARCH_MAJOR_MASK(0xfU << ETM_TRCIDR1_ARCH_MAJOR_SHIFT)
+#define ETM_TRCIDR1_ARCH_MAJOR(x)  \
+   (((x) & ETM_TRCIDR1_ARCH_MAJOR_MASK) >> ETM_TRCIDR1_ARCH_MAJOR_SHIFT)
+#define ETM_TRCIDR1_ARCH_MINOR_SHIFT   4
+#define ETM_TRCIDR1_ARCH_MINOR_MASK(0xfU << ETM_TRCIDR1_ARCH_MINOR_SHIFT)
+#define ETM_TRCIDR1_ARCH_MINOR(x)  \
+   (((x) & ETM_TRCIDR1_ARCH_MINOR_MASK) >> ETM_TRCIDR1_ARCH_MINOR_SHIFT)
+#define ETM_TRCIDR1_ARCH_SHIFT ETM_TRCIDR1_ARCH_MINOR_SHIFT
+#define ETM_TRCIDR1_ARCH_MASK  \
+   (ETM_TRCIDR1_ARCH_MAJOR_MASK | ETM_TRCIDR1_ARCH_MINOR_MASK)
+
+#define ETM_TRCIDR1_ARCH_ETMv4 0x4
+
+/*
+ * Driver representation of the ETM architecture.
+ * The version of an ETM component can be detected from
+ *
+ * TRCDEVARCH  - CoreSight architected register
+ *- Bits[15:12] - Major version
+ *- Bits[19:16] - Minor version
+ * TRCIDR1 - ETM architected register
+ *- Bits[11:8] - Major version
+ *- Bits[7:4]  - Minor version
+ * We must rely on TRCDEVARCH for the version information,
+ * however we don't want to break the support for potential
+ * old implementations which might not implement it. Thus
+ * we fall back to TRCIDR1 if TRCDEVARCH is not implemented
+ * for memory mapped components.
+ * Now to make certain decisions easier based on the version
+ * we use an internal representation of the version in the
+ * driver, as follows :
+ *
+ * ETM_ARCH_VERSION[7:0], where :
+ *  Bits[7:4] - Major version
+ *  Bits[3:0] - Minro version
+ */
+#define ETM_ARCH_VERSION(major, minor) \
+   major) & 0xfU) << 4) | (((minor) & 0xfU)))
+#define ETM_ARCH_MAJOR_VERSION(arch)   (((arch) >> 4) & 0xfU)
+#define ETM_ARCH_MINOR_VERSION(arch)   ((arch) & 0xfU)
+
+#define ETM_ARCH_V4ETM_ARCH_VERSION(4, 0)
 /* Interpretation of resource numbers change at ETM v4.3 architecture */
-#define ETM4X_ARCH_4V3 0x43
+#define ETM_ARCH_V4_3  ETM_ARCH_VERSION(4, 3)
+
+static inline u8 etm_devarch_to_arch(u32 devarch)
+{
+   return ETM_ARCH_VERSION(ETM_DEVARCH_ARCHID_ARCH_VER(devarch),
+   ETM_DEVARCH_REVISION(devarch));
+}
+
+static inline u8 etm_trcidr_to_arch(u32 trcidr1)
+{
+   return ETM_ARCH_VERSION(ETM_TRCIDR1_ARCH_MAJOR(trcidr1),
+   ETM_TRCIDR1_ARCH_MINOR(trcidr1));
+}
 
 enum etm_impdef_type {
ETM4_IMPDEF_HISI_CORE_COMMIT,
@@ -761,7 +815,7 @@ struct etmv4_save_state {
  * @spinlock:   Only one at a time pls.
  * @mode:  This tracer's mode, i.e sysFS, Perf or disabled.
  * @cpu:The cpu this component is affined to.
- * @arch:   ETM version number.
+ * @arch:   ETM architecture version.
  * @nr_pe: 

[PATCH v7 26/28] dts: bindings: coresight: ETM system register access only units

2021-01-10 Thread Suzuki K Poulose
Document the bindings for ETMs with system register accesses.

Cc: devicet...@vger.kernel.org
Cc: Mathieu Poirier 
Cc: Mike Leach 
Acked-by: Rob Herring 
Signed-off-by: Suzuki K Poulose 
---
 Documentation/devicetree/bindings/arm/coresight.txt | 5 -
 1 file changed, 4 insertions(+), 1 deletion(-)

diff --git a/Documentation/devicetree/bindings/arm/coresight.txt 
b/Documentation/devicetree/bindings/arm/coresight.txt
index d711676b4a51..7f9c1ca87487 100644
--- a/Documentation/devicetree/bindings/arm/coresight.txt
+++ b/Documentation/devicetree/bindings/arm/coresight.txt
@@ -34,9 +34,12 @@ its hardware characteristcs.
Program Flow Trace Macrocell:
"arm,coresight-etm3x", "arm,primecell";
 
-   - Embedded Trace Macrocell (version 4.x):
+   - Embedded Trace Macrocell (version 4.x), with memory mapped 
access.
"arm,coresight-etm4x", "arm,primecell";
 
+   - Embedded Trace Macrocell (version 4.x), with system register 
access only.
+   "arm,coresight-etm4x-sysreg";
+
- Coresight programmable Replicator :
"arm,coresight-dynamic-replicator", "arm,primecell";
 
-- 
2.24.1



[PATCH v7 28/28] coresight: Add support for v8.4 SelfHosted tracing

2021-01-10 Thread Suzuki K Poulose
From: Jonathan Zhou 

v8.4 tracing extensions added support for trace filtering controlled
by TRFCR_ELx. This must be programmed to allow tracing at EL1/EL2 and
EL0. The timestamp used is the virtual time. Also enable CONTEXIDR_EL2
tracing if we are running the kernel at EL2.

Cc: Catalin Marinas 
Cc: Mike Leach 
Cc: Will Deacon 
Reviewed-by: Mathieu Poirier 
Signed-off-by: Jonathan Zhou 
[ Move the trace filtering setup etm_init_arch_data() and
 clean ups]
Signed-off-by: Suzuki K Poulose 
---
 .../coresight/coresight-etm4x-core.c  | 25 +++
 1 file changed, 25 insertions(+)

diff --git a/drivers/hwtracing/coresight/coresight-etm4x-core.c 
b/drivers/hwtracing/coresight/coresight-etm4x-core.c
index 3d3165dd09d4..18c1a80abab8 100644
--- a/drivers/hwtracing/coresight/coresight-etm4x-core.c
+++ b/drivers/hwtracing/coresight/coresight-etm4x-core.c
@@ -859,6 +859,30 @@ static bool etm4_init_csdev_access(struct etmv4_drvdata 
*drvdata,
return false;
 }
 
+static void cpu_enable_tracing(void)
+{
+   u64 dfr0 = read_sysreg(id_aa64dfr0_el1);
+   u64 trfcr;
+
+   if (!cpuid_feature_extract_unsigned_field(dfr0, 
ID_AA64DFR0_TRACE_FILT_SHIFT))
+   return;
+
+   /*
+* If the CPU supports v8.4 SelfHosted Tracing, enable
+* tracing at the kernel EL and EL0, forcing to use the
+* virtual time as the timestamp.
+*/
+   trfcr = (TRFCR_ELx_TS_VIRTUAL |
+TRFCR_ELx_ExTRE |
+TRFCR_ELx_E0TRE);
+
+   /* If we are running at EL2, allow tracing the CONTEXTIDR_EL2. */
+   if (is_kernel_in_hyp_mode())
+   trfcr |= TRFCR_EL2_CX;
+
+   write_sysreg_s(trfcr, SYS_TRFCR_EL1);
+}
+
 static void etm4_init_arch_data(void *info)
 {
u32 etmidr0;
@@ -1044,6 +1068,7 @@ static void etm4_init_arch_data(void *info)
/* NUMCNTR, bits[30:28] number of counters available for tracing */
drvdata->nr_cntr = BMVAL(etmidr5, 28, 30);
etm4_cs_lock(drvdata, csa);
+   cpu_enable_tracing();
 }
 
 static inline u32 etm4_get_victlr_access_type(struct etmv4_config *config)
-- 
2.24.1



[PATCH v7 14/28] coresight: etm4x: Check for Software Lock

2021-01-10 Thread Suzuki K Poulose
The Software lock is not implemented for system instructions
based accesses. So, skip the lock register access in such
cases.

Cc: Mike Leach 
Reviewed-by: Mathieu Poirier 
Signed-off-by: Suzuki K Poulose 
---
Change since v6:
 - Fixed indentation
---
 .../coresight/coresight-etm4x-core.c  | 40 ---
 1 file changed, 26 insertions(+), 14 deletions(-)

diff --git a/drivers/hwtracing/coresight/coresight-etm4x-core.c 
b/drivers/hwtracing/coresight/coresight-etm4x-core.c
index 043cd52a6fae..feea52938a7b 100644
--- a/drivers/hwtracing/coresight/coresight-etm4x-core.c
+++ b/drivers/hwtracing/coresight/coresight-etm4x-core.c
@@ -133,6 +133,21 @@ static void etm4_os_lock(struct etmv4_drvdata *drvdata)
isb();
 }
 
+static void etm4_cs_lock(struct etmv4_drvdata *drvdata,
+struct csdev_access *csa)
+{
+   /* Software Lock is only accessible via memory mapped interface */
+   if (csa->io_mem)
+   CS_LOCK(csa->base);
+}
+
+static void etm4_cs_unlock(struct etmv4_drvdata *drvdata,
+  struct csdev_access *csa)
+{
+   if (csa->io_mem)
+   CS_UNLOCK(csa->base);
+}
+
 static bool etm4_arch_supported(u8 arch)
 {
/* Mask out the minor version number */
@@ -263,7 +278,8 @@ static int etm4_enable_hw(struct etmv4_drvdata *drvdata)
struct device *etm_dev = >dev;
struct csdev_access *csa = >access;
 
-   CS_UNLOCK(drvdata->base);
+
+   etm4_cs_unlock(drvdata, csa);
etm4_enable_arch_specific(drvdata);
 
etm4_os_unlock(drvdata);
@@ -366,7 +382,7 @@ static int etm4_enable_hw(struct etmv4_drvdata *drvdata)
isb();
 
 done:
-   CS_LOCK(drvdata->base);
+   etm4_cs_lock(drvdata, csa);
 
dev_dbg(etm_dev, "cpu: %d enable smp call done: %d\n",
drvdata->cpu, rc);
@@ -623,7 +639,7 @@ static void etm4_disable_hw(void *info)
struct csdev_access *csa = >access;
int i;
 
-   CS_UNLOCK(drvdata->base);
+   etm4_cs_unlock(drvdata, csa);
etm4_disable_arch_specific(drvdata);
 
if (!drvdata->skip_power_up) {
@@ -665,8 +681,7 @@ static void etm4_disable_hw(void *info)
}
 
coresight_disclaim_device_unlocked(csdev);
-
-   CS_LOCK(drvdata->base);
+   etm4_cs_lock(drvdata, csa);
 
dev_dbg(>csdev->dev,
"cpu: %d disable smp call done\n", drvdata->cpu);
@@ -776,8 +791,7 @@ static void etm4_init_arch_data(void *info)
 
/* Make sure all registers are accessible */
etm4_os_unlock_csa(drvdata, csa);
-
-   CS_UNLOCK(drvdata->base);
+   etm4_cs_unlock(drvdata, csa);
 
/* find all capabilities of the tracing unit */
etmidr0 = etm4x_relaxed_read32(csa, TRCIDR0);
@@ -942,7 +956,7 @@ static void etm4_init_arch_data(void *info)
drvdata->nrseqstate = BMVAL(etmidr5, 25, 27);
/* NUMCNTR, bits[30:28] number of counters available for tracing */
drvdata->nr_cntr = BMVAL(etmidr5, 28, 30);
-   CS_LOCK(drvdata->base);
+   etm4_cs_lock(drvdata, csa);
 }
 
 /* Set ELx trace filter access in the TRCVICTLR register */
@@ -1323,8 +1337,7 @@ static int etm4_cpu_save(struct etmv4_drvdata *drvdata)
dsb(sy);
isb();
 
-   CS_UNLOCK(drvdata->base);
-
+   etm4_cs_unlock(drvdata, csa);
/* Lock the OS lock to disable trace and external debugger access */
etm4_os_lock(drvdata);
 
@@ -1437,7 +1450,7 @@ static int etm4_cpu_save(struct etmv4_drvdata *drvdata)
etm4x_relaxed_write32(csa, (state->trcpdcr & ~TRCPDCR_PU),
  TRCPDCR);
 out:
-   CS_LOCK(drvdata->base);
+   etm4_cs_lock(drvdata, csa);
return ret;
 }
 
@@ -1448,8 +1461,7 @@ static void etm4_cpu_restore(struct etmv4_drvdata 
*drvdata)
struct csdev_access tmp_csa = CSDEV_ACCESS_IOMEM(drvdata->base);
struct csdev_access *csa = _csa;
 
-   CS_UNLOCK(drvdata->base);
-
+   etm4_cs_unlock(drvdata, csa);
etm4x_relaxed_write32(csa, state->trcclaimset, TRCCLAIMSET);
 
etm4x_relaxed_write32(csa, state->trcprgctlr, TRCPRGCTLR);
@@ -1534,7 +1546,7 @@ static void etm4_cpu_restore(struct etmv4_drvdata 
*drvdata)
 
/* Unlock the OS lock to re-enable trace and external debug access */
etm4_os_unlock(drvdata);
-   CS_LOCK(drvdata->base);
+   etm4_cs_lock(drvdata, csa);
 }
 
 static int etm4_cpu_pm_notify(struct notifier_block *nb, unsigned long cmd,
-- 
2.24.1



[PATCH v7 02/28] coresight: etm4x: Skip accessing TRCPDCR in save/restore

2021-01-10 Thread Suzuki K Poulose
When the ETM is affected by Qualcomm errata, modifying the
TRCPDCR could cause the system hang. Even though this is
taken care of during enable/disable ETM, the ETM state
save/restore could still access the TRCPDCR. Make sure
we skip the access during the save/restore.

Found by code inspection.

Fixes: 02510a5aa78df45 ("coresight: etm4x: Add support to skip trace unit power 
up")
Cc: Mathieu Poirier 
Cc: Mike Leach 
Cc: Sai Prakash Ranjan 
Cc: Tingwei Zhang 
Reviewed-by: Sai Prakash Ranjan 
Tested-by: Sai Prakash Ranjan 
Signed-off-by: Suzuki K Poulose 
---
 drivers/hwtracing/coresight/coresight-etm4x-core.c | 12 +++-
 1 file changed, 7 insertions(+), 5 deletions(-)

diff --git a/drivers/hwtracing/coresight/coresight-etm4x-core.c 
b/drivers/hwtracing/coresight/coresight-etm4x-core.c
index 76526679b998..cce65fc0c9aa 100644
--- a/drivers/hwtracing/coresight/coresight-etm4x-core.c
+++ b/drivers/hwtracing/coresight/coresight-etm4x-core.c
@@ -1373,7 +1373,8 @@ static int etm4_cpu_save(struct etmv4_drvdata *drvdata)
 
state->trcclaimset = readl(drvdata->base + TRCCLAIMCLR);
 
-   state->trcpdcr = readl(drvdata->base + TRCPDCR);
+   if (!drvdata->skip_power_up)
+   state->trcpdcr = readl(drvdata->base + TRCPDCR);
 
/* wait for TRCSTATR.IDLE to go up */
if (coresight_timeout(drvdata->base, TRCSTATR, TRCSTATR_IDLE_BIT, 1)) {
@@ -1391,9 +1392,9 @@ static int etm4_cpu_save(struct etmv4_drvdata *drvdata)
 * potentially save power on systems that respect the TRCPDCR_PU
 * despite requesting software to save/restore state.
 */
-   writel_relaxed((state->trcpdcr & ~TRCPDCR_PU),
-   drvdata->base + TRCPDCR);
-
+   if (!drvdata->skip_power_up)
+   writel_relaxed((state->trcpdcr & ~TRCPDCR_PU),
+   drvdata->base + TRCPDCR);
 out:
CS_LOCK(drvdata->base);
return ret;
@@ -1488,7 +1489,8 @@ static void etm4_cpu_restore(struct etmv4_drvdata 
*drvdata)
 
writel_relaxed(state->trcclaimset, drvdata->base + TRCCLAIMSET);
 
-   writel_relaxed(state->trcpdcr, drvdata->base + TRCPDCR);
+   if (!drvdata->skip_power_up)
+   writel_relaxed(state->trcpdcr, drvdata->base + TRCPDCR);
 
drvdata->state_needs_restore = false;
 
-- 
2.24.1



[PATCH v7 13/28] coresight: etm4x: Define DEVARCH register fields

2021-01-10 Thread Suzuki K Poulose
Define the fields of the DEVARCH register for identifying
a component as an ETMv4.x unit. Going forward, we use the
DEVARCH register for the component identification, rather
than the TRCIDR3.

Cc: Mike Leach 
Reviewed-by: Mathieu Poirier 
Signed-off-by: Suzuki K Poulose 
---
 .../coresight/coresight-etm4x-core.c  |  4 +-
 drivers/hwtracing/coresight/coresight-etm4x.h | 42 +++
 2 files changed, 44 insertions(+), 2 deletions(-)

diff --git a/drivers/hwtracing/coresight/coresight-etm4x-core.c 
b/drivers/hwtracing/coresight/coresight-etm4x-core.c
index d8122b6284d9..043cd52a6fae 100644
--- a/drivers/hwtracing/coresight/coresight-etm4x-core.c
+++ b/drivers/hwtracing/coresight/coresight-etm4x-core.c
@@ -1720,8 +1720,8 @@ static int etm4_probe(struct amba_device *adev, const 
struct amba_id *id)
 static struct amba_cs_uci_id uci_id_etm4[] = {
{
/*  ETMv4 UCI data */
-   .devarch= 0x47704a13,
-   .devarch_mask   = 0xfff0,
+   .devarch= ETM_DEVARCH_ETMv4x_ARCH,
+   .devarch_mask   = ETM_DEVARCH_ID_MASK,
.devtype= 0x0013,
}
 };
diff --git a/drivers/hwtracing/coresight/coresight-etm4x.h 
b/drivers/hwtracing/coresight/coresight-etm4x.h
index 193d2819afa7..fba3c02eea0b 100644
--- a/drivers/hwtracing/coresight/coresight-etm4x.h
+++ b/drivers/hwtracing/coresight/coresight-etm4x.h
@@ -506,6 +506,48 @@
 ETM_MODE_EXCL_KERN | \
 ETM_MODE_EXCL_USER)
 
+/*
+ * TRCDEVARCH Bit field definitions
+ * Bits[31:21] - ARCHITECT = Always Arm Ltd.
+ ** Bits[31:28] = 0x4
+ ** Bits[27:21] = 0b0111011
+ * Bit[20] - PRESENT,  Indicates the presence of this register.
+ *
+ * Bit[19:16]  - REVISION, Revision of the architecture.
+ *
+ * Bit[15:0]   - ARCHID, Identifies this component as an ETM
+ ** Bits[15:12] - architecture version of ETM
+ ** = 4 for ETMv4
+ ** Bits[11:0] = 0xA13, architecture part number for ETM.
+ */
+#define ETM_DEVARCH_ARCHITECT_MASK GENMASK(31, 21)
+#define ETM_DEVARCH_ARCHITECT_ARM  ((0x4 << 28) | (0b0111011 << 
21))
+#define ETM_DEVARCH_PRESENTBIT(20)
+#define ETM_DEVARCH_REVISION_SHIFT 16
+#define ETM_DEVARCH_REVISION_MASK  GENMASK(19, 16)
+#define ETM_DEVARCH_REVISION(x)\
+   (((x) & ETM_DEVARCH_REVISION_MASK) >> ETM_DEVARCH_REVISION_SHIFT)
+#define ETM_DEVARCH_ARCHID_MASKGENMASK(15, 0)
+#define ETM_DEVARCH_ARCHID_ARCH_VER_SHIFT  12
+#define ETM_DEVARCH_ARCHID_ARCH_VER_MASK   GENMASK(15, 12)
+#define ETM_DEVARCH_ARCHID_ARCH_VER(x) \
+   (((x) & ETM_DEVARCH_ARCHID_ARCH_VER_MASK) >> 
ETM_DEVARCH_ARCHID_ARCH_VER_SHIFT)
+
+#define ETM_DEVARCH_MAKE_ARCHID_ARCH_VER(ver)  \
+   (((ver) << ETM_DEVARCH_ARCHID_ARCH_VER_SHIFT) & 
ETM_DEVARCH_ARCHID_ARCH_VER_MASK)
+
+#define ETM_DEVARCH_ARCHID_ARCH_PART(x)((x) & 0xfffUL)
+
+#define ETM_DEVARCH_MAKE_ARCHID(major) \
+   ((ETM_DEVARCH_MAKE_ARCHID_ARCH_VER(major)) | 
ETM_DEVARCH_ARCHID_ARCH_PART(0xA13))
+
+#define ETM_DEVARCH_ARCHID_ETMv4x  ETM_DEVARCH_MAKE_ARCHID(0x4)
+
+#define ETM_DEVARCH_ID_MASK\
+   (ETM_DEVARCH_ARCHITECT_MASK | ETM_DEVARCH_ARCHID_MASK | 
ETM_DEVARCH_PRESENT)
+#define ETM_DEVARCH_ETMv4x_ARCH
\
+   (ETM_DEVARCH_ARCHITECT_ARM | ETM_DEVARCH_ARCHID_ETMv4x | 
ETM_DEVARCH_PRESENT)
+
 #define TRCSTATR_IDLE_BIT  0
 #define TRCSTATR_PMSTABLE_BIT  1
 #define ETM_DEFAULT_ADDR_COMP  0
-- 
2.24.1



[PATCH v7 11/28] coresight: etm4x: Add sysreg access helpers

2021-01-10 Thread Suzuki K Poulose
ETM architecture defines the system instructions for accessing
via register accesses. Add basic support for accessing a given
register via system instructions.

We split the list of registers as :
 1) Accessible only from memory mapped interface
 2) Accessible from system register instructions.

All registers are accessible via the memory-mapped interface.
However, some registers are not accessible via the system
instructions. This list is then used to further filter out
the files we expose via sysfs.

Cc: Mike Leach 
Cc: Mathieu Poirier 
Signed-off-by: Suzuki K Poulose 
---
Changes since v6:
 - Create separate lists of mmap only accessible registers
   and system instructions accessible registers.

Changes since v3:
 - Removed stacking of parameters and other style comments
   from Mathieu.
---
 .../coresight/coresight-etm4x-core.c  |  32 ++
 drivers/hwtracing/coresight/coresight-etm4x.h | 343 --
 2 files changed, 353 insertions(+), 22 deletions(-)

diff --git a/drivers/hwtracing/coresight/coresight-etm4x-core.c 
b/drivers/hwtracing/coresight/coresight-etm4x-core.c
index 4497593b58f0..d8122b6284d9 100644
--- a/drivers/hwtracing/coresight/coresight-etm4x-core.c
+++ b/drivers/hwtracing/coresight/coresight-etm4x-core.c
@@ -75,6 +75,38 @@ static inline bool etm4x_sspcicrn_present(struct 
etmv4_drvdata *drvdata, int n)
   (drvdata->config.ss_status[n] & TRCSSCSRn_PC);
 }
 
+u64 etm4x_sysreg_read(u32 offset, bool _relaxed, bool _64bit)
+{
+   u64 res = 0;
+
+   switch (offset) {
+   ETM4x_READ_SYSREG_CASES(res)
+   default :
+   pr_warn_ratelimited("etm4x: trying to read unsupported register 
@%x\n",
+offset);
+   }
+
+   if (!_relaxed)
+   __iormb(res);   /* Imitate the !relaxed I/O helpers */
+
+   return res;
+}
+
+void etm4x_sysreg_write(u64 val, u32 offset, bool _relaxed, bool _64bit)
+{
+   if (!_relaxed)
+   __iowmb();  /* Imitate the !relaxed I/O helpers */
+   if (!_64bit)
+   val &= GENMASK(31, 0);
+
+   switch (offset) {
+   ETM4x_WRITE_SYSREG_CASES(val)
+   default :
+   pr_warn_ratelimited("etm4x: trying to write to unsupported 
register @%x\n",
+   offset);
+   }
+}
+
 static void etm4_os_unlock_csa(struct etmv4_drvdata *drvdata, struct 
csdev_access *csa)
 {
/* Writing 0 to TRCOSLAR unlocks the trace registers */
diff --git a/drivers/hwtracing/coresight/coresight-etm4x.h 
b/drivers/hwtracing/coresight/coresight-etm4x.h
index 3c2b49ffabc8..24ba0da5b096 100644
--- a/drivers/hwtracing/coresight/coresight-etm4x.h
+++ b/drivers/hwtracing/coresight/coresight-etm4x.h
@@ -126,29 +126,325 @@
 #define TRCCIDR2   0xFF8
 #define TRCCIDR3   0xFFC
 
-#define etm4x_relaxed_read32(csa, offset)  \
-   readl_relaxed((csa)->base + (offset))
-
-#define etm4x_read32(csa, offset)  \
-   readl((csa)->base + (offset))
-
-#define etm4x_relaxed_write32(csa, val, offset)\
-   writel_relaxed((val), (csa)->base + (offset))
-
-#define etm4x_write32(csa, val, offset)\
-   writel((val), (csa)->base + (offset))
-
-#define etm4x_relaxed_read64(csa, offset)  \
-   readq_relaxed((csa)->base + (offset))
-
-#define etm4x_read64(csa, offset)  \
-   readq((csa)->base + (offset))
-
-#define etm4x_relaxed_write64(csa, val, offset)\
-   writeq_relaxed((val), (csa)->base + (offset))
+/*
+ * System instructions to access ETM registers.
+ * See ETMv4.4 spec ARM IHI0064F section 4.3.6 System instructions
+ */
+#define ETM4x_OFFSET_TO_REG(x) ((x) >> 2)
+
+#define ETM4x_CRn(n)   (((n) >> 7) & 0x7)
+#define ETM4x_Op2(n)   (((n) >> 4) & 0x7)
+#define ETM4x_CRm(n)   ((n) & 0xf)
+
+#include 
+#define ETM4x_REG_NUM_TO_SYSREG(n) \
+   sys_reg(2, 1, ETM4x_CRn(n), ETM4x_CRm(n), ETM4x_Op2(n))
+
+#define READ_ETM4x_REG(reg)\
+   read_sysreg_s(ETM4x_REG_NUM_TO_SYSREG((reg)))
+#define WRITE_ETM4x_REG(val, reg)  \
+   write_sysreg_s(val, ETM4x_REG_NUM_TO_SYSREG((reg)))
+
+#define read_etm4x_sysreg_const_offset(offset) \
+   READ_ETM4x_REG(ETM4x_OFFSET_TO_REG(offset))
+
+#define write_etm4x_sysreg_const_offset(val, offset)   \
+   WRITE_ETM4x_REG(val, ETM4x_OFFSET_TO_REG(offset))
+
+#define CASE_READ(res, x)  \
+   case (x): { (res) = read_etm4x_sysreg_const_offset((x)); break; }
+
+#define CASE_WRITE(val, x) \
+   case (x): { write_etm4x_sysreg_const_offset((val), (x)); break; }
+
+/* List of registers accessible via Syst

[PATCH v7 12/28] coresight: etm4x: Hide sysfs attributes for unavailable registers

2021-01-10 Thread Suzuki K Poulose
Some of the management registers in ETMv4.x are not accessible
via system register instructions. Thus we must hide the sysfs
files exposing them to the userspace, to prevent system crashes.

This patch adds an is_visible() routine to control the visibility
at runtime for the registers that may not be accessed.

Cc: Mathieu Poirier 
Cc: Mike Leach 
Signed-off-by: Suzuki K Poulose 
---
New patch in v7
---
 .../coresight/coresight-etm4x-sysfs.c | 51 +++
 drivers/hwtracing/coresight/coresight-etm4x.h |  6 +++
 2 files changed, 57 insertions(+)

diff --git a/drivers/hwtracing/coresight/coresight-etm4x-sysfs.c 
b/drivers/hwtracing/coresight/coresight-etm4x-sysfs.c
index ddbfeb24fc3f..e8fdda45ffca 100644
--- a/drivers/hwtracing/coresight/coresight-etm4x-sysfs.c
+++ b/drivers/hwtracing/coresight/coresight-etm4x-sysfs.c
@@ -2370,6 +2370,56 @@ static ssize_t coresight_etm4x_reg_show(struct device 
*dev,
return scnprintf(buf, PAGE_SIZE, "0x%x\n", val);
 }
 
+static inline bool
+etm4x_register_implemented(struct etmv4_drvdata *drvdata, u32 offset)
+{
+   switch (offset) {
+   ETM4x_SYSREG_LIST_CASES
+   /*
+* Registers accessible via system instructions are always
+* implemented.
+*/
+   return true;
+   ETM4x_MMAP_LIST_CASES
+   /*
+* Registers accessible only via memory-mapped registers
+* must not be accessed via system instructions.
+* We cannot access the drvdata->csdev here, as this
+* function is called during the device creation, via
+* coresight_register() and the csdev is not initialized
+* until that is done. So rely on the drvdata->base to
+* detect if we have a memory mapped access.
+*/
+   return !!drvdata->base;
+   }
+
+   return false;
+}
+
+/*
+ * Hide the ETM4x registers that may not be available on the
+ * hardware.
+ * There are certain management registers unavailable via system
+ * instructions. Make those sysfs attributes hidden on such
+ * systems.
+ */
+static umode_t
+coresight_etm4x_attr_reg_implemented(struct kobject *kobj,
+struct attribute *attr, int unused)
+{
+   struct device *dev = kobj_to_dev(kobj);
+   struct etmv4_drvdata *drvdata = dev_get_drvdata(dev->parent);
+   struct device_attribute *d_attr;
+   u32 offset;
+
+   d_attr = container_of(attr, struct device_attribute, attr);
+   offset = coresight_etm4x_attr_to_offset(d_attr);
+
+   if (etm4x_register_implemented(drvdata, offset))
+   return attr->mode;
+   return 0;
+}
+
 #define coresight_etm4x_reg(name, offset)  \
&((struct dev_ext_attribute[]) {\
   {\
@@ -2417,6 +2467,7 @@ static const struct attribute_group coresight_etmv4_group 
= {
 };
 
 static const struct attribute_group coresight_etmv4_mgmt_group = {
+   .is_visible = coresight_etm4x_attr_reg_implemented,
.attrs = coresight_etmv4_mgmt_attrs,
.name = "mgmt",
 };
diff --git a/drivers/hwtracing/coresight/coresight-etm4x.h 
b/drivers/hwtracing/coresight/coresight-etm4x.h
index 24ba0da5b096..193d2819afa7 100644
--- a/drivers/hwtracing/coresight/coresight-etm4x.h
+++ b/drivers/hwtracing/coresight/coresight-etm4x.h
@@ -157,6 +157,9 @@
 #define CASE_WRITE(val, x) \
case (x): { write_etm4x_sysreg_const_offset((val), (x)); break; }
 
+#define CASE_NOP(__unused, x)  \
+   case (x):   /* fall through */
+
 /* List of registers accessible via System instructions */
 #define ETM_SYSREG_LIST(op, val)   \
CASE_##op((val), TRCPRGCTLR)\
@@ -369,6 +372,9 @@
 #define ETM4x_READ_SYSREG_CASES(res)   ETM_SYSREG_LIST(READ, (res))
 #define ETM4x_WRITE_SYSREG_CASES(val)  ETM_SYSREG_LIST(WRITE, (val))
 
+#define ETM4x_SYSREG_LIST_CASESETM_SYSREG_LIST(NOP, __unused)
+#define ETM4x_MMAP_LIST_CASES  ETM_MMAP_LIST(NOP, __unused)
+
 #define read_etm4x_sysreg_offset(offset, _64bit)   
\
({  
\
u64 __val;  
\
-- 
2.24.1



[PATCH v7 08/28] coresight: etm4x: Convert all register accesses

2021-01-10 Thread Suzuki K Poulose
Convert all register accesses from etm4x driver to use a wrapper
to allow switching the access at runtime with little overhead.

co-developed by sed tool ;-), mostly equivalent to :

s/readl\(_relaxed\)\?(drvdata->base + \(.*\))/etm4x_\1_read32(csdev, \2)
s/writel\(_relaxed\)\?(\(.*\), drvdata->base + \(.*\))/etm4x_\1_write32(csdev, 
\2, \3)

We don't want to replace them with the csdev_access_* to
avoid a function call for every register access for system
register access. This is a prepartory step to add system
register access later where the support is available.

Cc: Mike Leach 
Reviewed-by: Mathieu Poirier 
Signed-off-by: Suzuki K Poulose 
---
 .../coresight/coresight-etm4x-core.c  | 327 +-
 .../coresight/coresight-etm4x-sysfs.c |   9 +-
 drivers/hwtracing/coresight/coresight-etm4x.h |  24 ++
 3 files changed, 188 insertions(+), 172 deletions(-)

diff --git a/drivers/hwtracing/coresight/coresight-etm4x-core.c 
b/drivers/hwtracing/coresight/coresight-etm4x-core.c
index 9d9476128d31..4497593b58f0 100644
--- a/drivers/hwtracing/coresight/coresight-etm4x-core.c
+++ b/drivers/hwtracing/coresight/coresight-etm4x-core.c
@@ -75,18 +75,28 @@ static inline bool etm4x_sspcicrn_present(struct 
etmv4_drvdata *drvdata, int n)
   (drvdata->config.ss_status[n] & TRCSSCSRn_PC);
 }
 
-static void etm4_os_unlock(struct etmv4_drvdata *drvdata)
+static void etm4_os_unlock_csa(struct etmv4_drvdata *drvdata, struct 
csdev_access *csa)
 {
/* Writing 0 to TRCOSLAR unlocks the trace registers */
-   writel_relaxed(0x0, drvdata->base + TRCOSLAR);
+   etm4x_relaxed_write32(csa, 0x0, TRCOSLAR);
drvdata->os_unlock = true;
isb();
 }
 
+static void etm4_os_unlock(struct etmv4_drvdata *drvdata)
+{
+   if (!WARN_ON(!drvdata->csdev))
+   etm4_os_unlock_csa(drvdata, >csdev->access);
+
+}
+
 static void etm4_os_lock(struct etmv4_drvdata *drvdata)
 {
+   if (WARN_ON(!drvdata->csdev))
+   return;
+
/* Writing 0x1 to TRCOSLAR locks the trace registers */
-   writel_relaxed(0x1, drvdata->base + TRCOSLAR);
+   etm4x_relaxed_write32(>csdev->access, 0x1, TRCOSLAR);
drvdata->os_unlock = false;
isb();
 }
@@ -231,45 +241,39 @@ static int etm4_enable_hw(struct etmv4_drvdata *drvdata)
goto done;
 
/* Disable the trace unit before programming trace registers */
-   writel_relaxed(0, drvdata->base + TRCPRGCTLR);
+   etm4x_relaxed_write32(csa, 0, TRCPRGCTLR);
 
/* wait for TRCSTATR.IDLE to go up */
if (coresight_timeout(csa, TRCSTATR, TRCSTATR_IDLE_BIT, 1))
dev_err(etm_dev,
"timeout while waiting for Idle Trace Status\n");
if (drvdata->nr_pe)
-   writel_relaxed(config->pe_sel, drvdata->base + TRCPROCSELR);
-   writel_relaxed(config->cfg, drvdata->base + TRCCONFIGR);
+   etm4x_relaxed_write32(csa, config->pe_sel, TRCPROCSELR);
+   etm4x_relaxed_write32(csa, config->cfg, TRCCONFIGR);
/* nothing specific implemented */
-   writel_relaxed(0x0, drvdata->base + TRCAUXCTLR);
-   writel_relaxed(config->eventctrl0, drvdata->base + TRCEVENTCTL0R);
-   writel_relaxed(config->eventctrl1, drvdata->base + TRCEVENTCTL1R);
-   writel_relaxed(config->stall_ctrl, drvdata->base + TRCSTALLCTLR);
-   writel_relaxed(config->ts_ctrl, drvdata->base + TRCTSCTLR);
-   writel_relaxed(config->syncfreq, drvdata->base + TRCSYNCPR);
-   writel_relaxed(config->ccctlr, drvdata->base + TRTLR);
-   writel_relaxed(config->bb_ctrl, drvdata->base + TRCBBCTLR);
-   writel_relaxed(drvdata->trcid, drvdata->base + TRCTRACEIDR);
-   writel_relaxed(config->vinst_ctrl, drvdata->base + TRCVICTLR);
-   writel_relaxed(config->viiectlr, drvdata->base + TRCVIIECTLR);
-   writel_relaxed(config->vissctlr,
-  drvdata->base + TRCVISSCTLR);
+   etm4x_relaxed_write32(csa, 0x0, TRCAUXCTLR);
+   etm4x_relaxed_write32(csa, config->eventctrl0, TRCEVENTCTL0R);
+   etm4x_relaxed_write32(csa, config->eventctrl1, TRCEVENTCTL1R);
+   etm4x_relaxed_write32(csa, config->stall_ctrl, TRCSTALLCTLR);
+   etm4x_relaxed_write32(csa, config->ts_ctrl, TRCTSCTLR);
+   etm4x_relaxed_write32(csa, config->syncfreq, TRCSYNCPR);
+   etm4x_relaxed_write32(csa, config->ccctlr, TRTLR);
+   etm4x_relaxed_write32(csa, config->bb_ctrl, TRCBBCTLR);
+   etm4x_relaxed_write32(csa, drvdata->trcid, TRCTRACEIDR);
+   etm4x_relaxed_write32(csa, config->vinst_ctrl, TRCVICTLR);
+   etm4x_relaxed_write32(csa, config->viiectlr, TRCVIIECTLR);
+   etm4x_relaxed_write32(csa, config->vissctlr, TRCVISSCTLR);
if (drvdata->nr_pe_cmp)
-   writel_relaxed(config->

[PATCH v7 09/28] coresight: etm4x: Make offset available for sysfs attributes

2021-01-10 Thread Suzuki K Poulose
Some of the ETM management registers are not accessible via
system instructions. Thus we need to filter accesses to these
registers depending on the access mechanism for the ETM at runtime.
The driver can cope with this for normal operation, by regular
checks. But the driver also exposes them via sysfs, which now
needs to be removed.

So far, we have used the generic coresight sysfs helper macros
to export a given device register, defining a "show" operation
per register. This is not helpful to filter the files at runtime,
based on the access.

In order to do this dynamically, we need to filter the attributes
by offsets and hard coded "show" functions doesn't make this easy.
Thus, switch to extended attributes, storing the offset in the scratch
space. This allows us to implement filtering based on the offset and
also saves us some text size. This will be later used for determining
a given attribute must be "visible" via sysfs.

Cc: Mathieu Poirier 
Cc: Mike Leach 
Signed-off-by: Suzuki K Poulose 
---
New patch in v7
---
 .../coresight/coresight-etm4x-sysfs.c | 115 +-
 1 file changed, 57 insertions(+), 58 deletions(-)

diff --git a/drivers/hwtracing/coresight/coresight-etm4x-sysfs.c 
b/drivers/hwtracing/coresight/coresight-etm4x-sysfs.c
index fce9df16bfb5..ddbfeb24fc3f 100644
--- a/drivers/hwtracing/coresight/coresight-etm4x-sysfs.c
+++ b/drivers/hwtracing/coresight/coresight-etm4x-sysfs.c
@@ -2331,9 +2331,8 @@ static void do_smp_cross_read(void *data)
reg->data = etm4x_relaxed_read32(>csdev->access, reg->offset);
 }
 
-static u32 etmv4_cross_read(const struct device *dev, u32 offset)
+static u32 etmv4_cross_read(const struct etmv4_drvdata *drvdata, u32 offset)
 {
-   struct etmv4_drvdata *drvdata = dev_get_drvdata(dev);
struct etmv4_reg reg;
 
reg.offset = offset;
@@ -2347,69 +2346,69 @@ static u32 etmv4_cross_read(const struct device *dev, 
u32 offset)
return reg.data;
 }
 
-#define coresight_etm4x_cross_read(name, offset)   \
-   coresight_simple_func(struct etmv4_drvdata, etmv4_cross_read,   \
- name, offset)
-
-coresight_etm4x_cross_read(trcpdcr, TRCPDCR);
-coresight_etm4x_cross_read(trcpdsr, TRCPDSR);
-coresight_etm4x_cross_read(trclsr, TRCLSR);
-coresight_etm4x_cross_read(trcauthstatus, TRCAUTHSTATUS);
-coresight_etm4x_cross_read(trcdevid, TRCDEVID);
-coresight_etm4x_cross_read(trcdevtype, TRCDEVTYPE);
-coresight_etm4x_cross_read(trcpidr0, TRCPIDR0);
-coresight_etm4x_cross_read(trcpidr1, TRCPIDR1);
-coresight_etm4x_cross_read(trcpidr2, TRCPIDR2);
-coresight_etm4x_cross_read(trcpidr3, TRCPIDR3);
-coresight_etm4x_cross_read(trcoslsr, TRCOSLSR);
-coresight_etm4x_cross_read(trcconfig, TRCCONFIGR);
-coresight_etm4x_cross_read(trctraceid, TRCTRACEIDR);
+static inline u32 coresight_etm4x_attr_to_offset(struct device_attribute *attr)
+{
+   struct dev_ext_attribute *eattr;
+
+   eattr = container_of(attr, struct dev_ext_attribute, attr);
+   return (u32)(unsigned long)eattr->var;
+}
+
+static ssize_t coresight_etm4x_reg_show(struct device *dev,
+   struct device_attribute *d_attr,
+   char *buf)
+{
+   u32 val, offset;
+   struct etmv4_drvdata *drvdata = dev_get_drvdata(dev->parent);
+
+   offset = coresight_etm4x_attr_to_offset(d_attr);
+
+   pm_runtime_get_sync(dev->parent);
+   val = etmv4_cross_read(drvdata, offset);
+   pm_runtime_put_sync(dev->parent);
+
+   return scnprintf(buf, PAGE_SIZE, "0x%x\n", val);
+}
+
+#define coresight_etm4x_reg(name, offset)  \
+   &((struct dev_ext_attribute[]) {\
+  {\
+   __ATTR(name, 0444, coresight_etm4x_reg_show, NULL), \
+   (void *)(unsigned long)offset   \
+  }\
+   })[0].attr.attr
 
 static struct attribute *coresight_etmv4_mgmt_attrs[] = {
-   _attr_trcoslsr.attr,
-   _attr_trcpdcr.attr,
-   _attr_trcpdsr.attr,
-   _attr_trclsr.attr,
-   _attr_trcconfig.attr,
-   _attr_trctraceid.attr,
-   _attr_trcauthstatus.attr,
-   _attr_trcdevid.attr,
-   _attr_trcdevtype.attr,
-   _attr_trcpidr0.attr,
-   _attr_trcpidr1.attr,
-   _attr_trcpidr2.attr,
-   _attr_trcpidr3.attr,
+   coresight_etm4x_reg(trcpdcr, TRCPDCR),
+   coresight_etm4x_reg(trcpdsr, TRCPDSR),
+   coresight_etm4x_reg(trclsr, TRCLSR),
+   coresight_etm4x_reg(trcauthstatus, TRCAUTHSTATUS),
+   coresight_etm4x_reg(trcdevid, TRCDEVID),
+   coresight_etm4x_reg(trcdevtype, TRCDEVTYPE),
+   coresight_etm4x_reg(trcpidr0, TRCPIDR0),
+   coresight_etm4x_reg(trcpidr1, TRCPIDR1),
+   coresight

[PATCH v7 01/28] coresight: etm4x: Handle access to TRCSSPCICRn

2021-01-10 Thread Suzuki K Poulose
TRCSSPCICR is present only if all of the following are true:
TRCIDR4.NUMSSCC > n.
TRCIDR4.NUMPC > 0b .
TRCSSCSR.PC == 0b1

Add a helper function to check all the conditions.

Cc: Mike Leach 
Reviewed-by: Mathieu Poirier 
Signed-off-by: Suzuki K Poulose 
---
Changes since v3:
 - Check for TRCSSCSRn.PC too.  (Mathieu)
 - Moved into a helper for easy reuse.
---
 .../coresight/coresight-etm4x-core.c  | 29 +++
 drivers/hwtracing/coresight/coresight-etm4x.h |  2 ++
 2 files changed, 26 insertions(+), 5 deletions(-)

diff --git a/drivers/hwtracing/coresight/coresight-etm4x-core.c 
b/drivers/hwtracing/coresight/coresight-etm4x-core.c
index b20b6ff17cf6..76526679b998 100644
--- a/drivers/hwtracing/coresight/coresight-etm4x-core.c
+++ b/drivers/hwtracing/coresight/coresight-etm4x-core.c
@@ -59,6 +59,22 @@ static u64 etm4_get_access_type(struct etmv4_config *config);
 
 static enum cpuhp_state hp_online;
 
+/*
+ * Check if TRCSSPCICRn(i) is implemented for a given instance.
+ *
+ * TRCSSPCICRn is implemented only if :
+ * TRCSSPCICR is present only if all of the following are true:
+ * TRCIDR4.NUMSSCC > n.
+ * TRCIDR4.NUMPC > 0b .
+ * TRCSSCSR.PC == 0b1
+ */
+static inline bool etm4x_sspcicrn_present(struct etmv4_drvdata *drvdata, int n)
+{
+   return (n < drvdata->nr_ss_cmp) &&
+  drvdata->nr_pe &&
+  (drvdata->config.ss_status[n] & TRCSSCSRn_PC);
+}
+
 static void etm4_os_unlock(struct etmv4_drvdata *drvdata)
 {
/* Writing 0 to TRCOSLAR unlocks the trace registers */
@@ -270,8 +286,9 @@ static int etm4_enable_hw(struct etmv4_drvdata *drvdata)
   drvdata->base + TRCSSCCRn(i));
writel_relaxed(config->ss_status[i],
   drvdata->base + TRCSSCSRn(i));
-   writel_relaxed(config->ss_pe_cmp[i],
-  drvdata->base + TRCSSPCICRn(i));
+   if (etm4x_sspcicrn_present(drvdata, i))
+   writel_relaxed(config->ss_pe_cmp[i],
+  drvdata->base + TRCSSPCICRn(i));
}
for (i = 0; i < drvdata->nr_addr_cmp; i++) {
writeq_relaxed(config->addr_val[i],
@@ -1324,7 +1341,8 @@ static int etm4_cpu_save(struct etmv4_drvdata *drvdata)
for (i = 0; i < drvdata->nr_ss_cmp; i++) {
state->trcssccr[i] = readl(drvdata->base + TRCSSCCRn(i));
state->trcsscsr[i] = readl(drvdata->base + TRCSSCSRn(i));
-   state->trcsspcicr[i] = readl(drvdata->base + TRCSSPCICRn(i));
+   if (etm4x_sspcicrn_present(drvdata, i))
+   state->trcsspcicr[i] = readl(drvdata->base + 
TRCSSPCICRn(i));
}
 
for (i = 0; i < drvdata->nr_addr_cmp * 2; i++) {
@@ -1440,8 +1458,9 @@ static void etm4_cpu_restore(struct etmv4_drvdata 
*drvdata)
   drvdata->base + TRCSSCCRn(i));
writel_relaxed(state->trcsscsr[i],
   drvdata->base + TRCSSCSRn(i));
-   writel_relaxed(state->trcsspcicr[i],
-  drvdata->base + TRCSSPCICRn(i));
+   if (etm4x_sspcicrn_present(drvdata, i))
+   writel_relaxed(state->trcsspcicr[i],
+  drvdata->base + TRCSSPCICRn(i));
}
 
for (i = 0; i < drvdata->nr_addr_cmp * 2; i++) {
diff --git a/drivers/hwtracing/coresight/coresight-etm4x.h 
b/drivers/hwtracing/coresight/coresight-etm4x.h
index 3dd3e0633328..80e480c7fe5c 100644
--- a/drivers/hwtracing/coresight/coresight-etm4x.h
+++ b/drivers/hwtracing/coresight/coresight-etm4x.h
@@ -179,6 +179,8 @@
 #define TRCSTATR_PMSTABLE_BIT  1
 #define ETM_DEFAULT_ADDR_COMP  0
 
+#define TRCSSCSRn_PC   BIT(3)
+
 /* PowerDown Control Register bits */
 #define TRCPDCR_PU BIT(3)
 
-- 
2.24.1



[PATCH v7 04/28] coresight: tpiu: Prepare for using coresight device access abstraction

2021-01-10 Thread Suzuki K Poulose
Prepare the TPIU driver to make use of the CoreSight device access
abstraction layer. The driver touches the device even before the
coresight device is registered. Thus we could be accessing the
devices without a csdev. As we are about to use the abstraction
layer for accessing the device, pass in the access directly
to avoid having to deal with the un-initialised csdev.

Cc: Mike Leach 
Reviewed-by: Mathieu Poirier 
Signed-off-by: Suzuki K Poulose 
---
 drivers/hwtracing/coresight/coresight-tpiu.c | 30 +---
 1 file changed, 13 insertions(+), 17 deletions(-)

diff --git a/drivers/hwtracing/coresight/coresight-tpiu.c 
b/drivers/hwtracing/coresight/coresight-tpiu.c
index 6ca396799883..a12b6ee0a576 100644
--- a/drivers/hwtracing/coresight/coresight-tpiu.c
+++ b/drivers/hwtracing/coresight/coresight-tpiu.c
@@ -60,49 +60,45 @@ struct tpiu_drvdata {
struct coresight_device *csdev;
 };
 
-static void tpiu_enable_hw(struct tpiu_drvdata *drvdata)
+static void tpiu_enable_hw(struct csdev_access *csa)
 {
-   CS_UNLOCK(drvdata->base);
+   CS_UNLOCK(csa->base);
 
/* TODO: fill this up */
 
-   CS_LOCK(drvdata->base);
+   CS_LOCK(csa->base);
 }
 
 static int tpiu_enable(struct coresight_device *csdev, u32 mode, void 
*__unused)
 {
-   struct tpiu_drvdata *drvdata = dev_get_drvdata(csdev->dev.parent);
-
-   tpiu_enable_hw(drvdata);
+   tpiu_enable_hw(>access);
atomic_inc(csdev->refcnt);
dev_dbg(>dev, "TPIU enabled\n");
return 0;
 }
 
-static void tpiu_disable_hw(struct tpiu_drvdata *drvdata)
+static void tpiu_disable_hw(struct csdev_access *csa)
 {
-   CS_UNLOCK(drvdata->base);
+   CS_UNLOCK(csa->base);
 
/* Clear formatter and stop on flush */
-   writel_relaxed(FFCR_STOP_FI, drvdata->base + TPIU_FFCR);
+   csdev_access_relaxed_write32(csa, FFCR_STOP_FI, TPIU_FFCR);
/* Generate manual flush */
-   writel_relaxed(FFCR_STOP_FI | FFCR_FON_MAN, drvdata->base + TPIU_FFCR);
+   csdev_access_relaxed_write32(csa, FFCR_STOP_FI | FFCR_FON_MAN, 
TPIU_FFCR);
/* Wait for flush to complete */
-   coresight_timeout(drvdata->base, TPIU_FFCR, FFCR_FON_MAN_BIT, 0);
+   coresight_timeout(csa->base, TPIU_FFCR, FFCR_FON_MAN_BIT, 0);
/* Wait for formatter to stop */
-   coresight_timeout(drvdata->base, TPIU_FFSR, FFSR_FT_STOPPED_BIT, 1);
+   coresight_timeout(csa->base, TPIU_FFSR, FFSR_FT_STOPPED_BIT, 1);
 
-   CS_LOCK(drvdata->base);
+   CS_LOCK(csa->base);
 }
 
 static int tpiu_disable(struct coresight_device *csdev)
 {
-   struct tpiu_drvdata *drvdata = dev_get_drvdata(csdev->dev.parent);
-
if (atomic_dec_return(csdev->refcnt))
return -EBUSY;
 
-   tpiu_disable_hw(drvdata);
+   tpiu_disable_hw(>access);
 
dev_dbg(>dev, "TPIU disabled\n");
return 0;
@@ -152,7 +148,7 @@ static int tpiu_probe(struct amba_device *adev, const 
struct amba_id *id)
desc.access = CSDEV_ACCESS_IOMEM(base);
 
/* Disable tpiu to support older devices */
-   tpiu_disable_hw(drvdata);
+   tpiu_disable_hw();
 
pdata = coresight_get_platform_data(dev);
if (IS_ERR(pdata))
-- 
2.24.1



[PATCH v7 00/28] coresight: etm4x: Support for system instructions

2021-01-10 Thread Suzuki K Poulose
discovery. This
is for making 
  - Check the availability of OS/Software Locks before using them.

Known issues: 
  Checkpatch failure for "coresight: etm4x: Add sysreg access helpers" :

  ERROR: Macros with complex values should be enclosed in parentheses
  #121: FILE: drivers/hwtracing/coresight/coresight-etm4x.h:153:
  +#define CASE_READ(res, x)  \
  +case (x): { (res) = read_etm4x_sysreg_const_offset((x)); break; }

 I don't know a way to fix the warning without loosing the code
 readability, which I believe is crucial for such a construct.



Jonathan Zhou (2):
  arm64: Add TRFCR_ELx definitions
  coresight: Add support for v8.4 SelfHosted tracing

Suzuki K Poulose (26):
  coresight: etm4x: Handle access to TRCSSPCICRn
  coresight: etm4x: Skip accessing TRCPDCR in save/restore
  coresight: Introduce device access abstraction
  coresight: tpiu: Prepare for using coresight device access abstraction
  coresight: Convert coresight_timeout to use access abstraction
  coresight: Convert claim/disclaim operations to use access wrappers
  coresight: etm4x: Always read the registers on the host CPU
  coresight: etm4x: Convert all register accesses
  coresight: etm4x: Make offset available for sysfs attributes
  coresight: etm4x: Add commentary on the registers
  coresight: etm4x: Add sysreg access helpers
  coresight: etm4x: Hide sysfs attributes for unavailable registers
  coresight: etm4x: Define DEVARCH register fields
  coresight: etm4x: Check for Software Lock
  coresight: etm4x: Cleanup secure exception level masks
  coresight: etm4x: Clean up exception level masks
  coresight: etm4x: Handle ETM architecture version
  coresight: etm4x: Detect access early on the target CPU
  coresight: etm4x: Use TRCDEVARCH for component discovery
  coresight: etm4x: Expose trcdevarch via sysfs
  coresight: etm4x: Add necessary synchronization for sysreg access
  coresight: etm4x: Detect system instructions support
  coresight: etm4x: Refactor probing routine
  coresight: etm4x: Run arch feature detection on the CPU
  coresight: etm4x: Add support for sysreg only devices
  dts: bindings: coresight: ETM system register access only units

 .../testing/sysfs-bus-coresight-devices-etm4x |   8 +
 .../devicetree/bindings/arm/coresight.txt |   5 +-
 arch/arm64/include/asm/sysreg.h   |  11 +
 drivers/hwtracing/coresight/coresight-catu.c  |  12 +-
 drivers/hwtracing/coresight/coresight-core.c  | 122 ++-
 .../hwtracing/coresight/coresight-cti-core.c  |  18 +-
 drivers/hwtracing/coresight/coresight-etb10.c |  10 +-
 .../coresight/coresight-etm3x-core.c  |   9 +-
 .../coresight/coresight-etm4x-core.c  | 805 --
 .../coresight/coresight-etm4x-sysfs.c | 187 ++--
 drivers/hwtracing/coresight/coresight-etm4x.h | 505 ++-
 .../hwtracing/coresight/coresight-funnel.c|   7 +-
 .../coresight/coresight-replicator.c  |  13 +-
 drivers/hwtracing/coresight/coresight-stm.c   |   4 +-
 .../hwtracing/coresight/coresight-tmc-core.c  |  16 +-
 .../hwtracing/coresight/coresight-tmc-etf.c   |  10 +-
 .../hwtracing/coresight/coresight-tmc-etr.c   |   4 +-
 drivers/hwtracing/coresight/coresight-tpiu.c  |  31 +-
 include/linux/coresight.h | 220 -
 19 files changed, 1520 insertions(+), 477 deletions(-)

-- 
2.24.1



[PATCH v7 03/28] coresight: Introduce device access abstraction

2021-01-10 Thread Suzuki K Poulose
We are about to introduce support for sysreg access to ETMv4.4+
component. Since there are generic routines that access the
registers (e.g, CS_LOCK/UNLOCK , claim/disclaim operations, timeout)
and in order to preserve the logic of these operations at a
single place we introduce an abstraction layer for the accesses
to a given device.

Cc: Mike Leach 
Reviewed-by: Mathieu Poirier 
Signed-off-by: Suzuki K Poulose 
---
Change since v6:
 - Fix code indentation in coresight.h (Mathieu Poirier)
Change since v3
 - Dropped csa argument to read()/write().
 - Addressed comments on spacing and adding labels for the #if #else #endifs.
   (Mathieu)
---
 drivers/hwtracing/coresight/coresight-catu.c  |   1 +
 drivers/hwtracing/coresight/coresight-core.c  |  43 
 .../hwtracing/coresight/coresight-cti-core.c  |   1 +
 drivers/hwtracing/coresight/coresight-etb10.c |   1 +
 .../coresight/coresight-etm3x-core.c  |   1 +
 .../coresight/coresight-etm4x-core.c  |   1 +
 .../hwtracing/coresight/coresight-funnel.c|   1 +
 .../coresight/coresight-replicator.c  |   1 +
 drivers/hwtracing/coresight/coresight-stm.c   |   1 +
 .../hwtracing/coresight/coresight-tmc-core.c  |   1 +
 drivers/hwtracing/coresight/coresight-tpiu.c  |   1 +
 include/linux/coresight.h | 193 +-
 12 files changed, 243 insertions(+), 3 deletions(-)

diff --git a/drivers/hwtracing/coresight/coresight-catu.c 
b/drivers/hwtracing/coresight/coresight-catu.c
index a61313f320bd..867c932c7b26 100644
--- a/drivers/hwtracing/coresight/coresight-catu.c
+++ b/drivers/hwtracing/coresight/coresight-catu.c
@@ -551,6 +551,7 @@ static int catu_probe(struct amba_device *adev, const 
struct amba_id *id)
dev->platform_data = pdata;
 
drvdata->base = base;
+   catu_desc.access = CSDEV_ACCESS_IOMEM(base);
catu_desc.pdata = pdata;
catu_desc.dev = dev;
catu_desc.groups = catu_groups;
diff --git a/drivers/hwtracing/coresight/coresight-core.c 
b/drivers/hwtracing/coresight/coresight-core.c
index 4ba801dffcb7..a38af8f0831b 100644
--- a/drivers/hwtracing/coresight/coresight-core.c
+++ b/drivers/hwtracing/coresight/coresight-core.c
@@ -1458,6 +1458,48 @@ int coresight_timeout(void __iomem *addr, u32 offset, 
int position, int value)
 }
 EXPORT_SYMBOL_GPL(coresight_timeout);
 
+u32 coresight_relaxed_read32(struct coresight_device *csdev, u32 offset)
+{
+   return csdev_access_relaxed_read32(>access, offset);
+}
+
+u32 coresight_read32(struct coresight_device *csdev, u32 offset)
+{
+   return csdev_access_read32(>access, offset);
+}
+
+void coresight_relaxed_write32(struct coresight_device *csdev,
+  u32 val, u32 offset)
+{
+   csdev_access_relaxed_write32(>access, val, offset);
+}
+
+void coresight_write32(struct coresight_device *csdev, u32 val, u32 offset)
+{
+   csdev_access_write32(>access, val, offset);
+}
+
+u64 coresight_relaxed_read64(struct coresight_device *csdev, u32 offset)
+{
+   return csdev_access_relaxed_read64(>access, offset);
+}
+
+u64 coresight_read64(struct coresight_device *csdev, u32 offset)
+{
+   return csdev_access_read64(>access, offset);
+}
+
+void coresight_relaxed_write64(struct coresight_device *csdev,
+  u64 val, u32 offset)
+{
+   csdev_access_relaxed_write64(>access, val, offset);
+}
+
+void coresight_write64(struct coresight_device *csdev, u64 val, u32 offset)
+{
+   csdev_access_write64(>access, val, offset);
+}
+
 /*
  * coresight_release_platform_data: Release references to the devices connected
  * to the output port of this device.
@@ -1522,6 +1564,7 @@ struct coresight_device *coresight_register(struct 
coresight_desc *desc)
csdev->type = desc->type;
csdev->subtype = desc->subtype;
csdev->ops = desc->ops;
+   csdev->access = desc->access;
csdev->orphan = false;
 
csdev->dev.type = _dev_type[desc->type];
diff --git a/drivers/hwtracing/coresight/coresight-cti-core.c 
b/drivers/hwtracing/coresight/coresight-cti-core.c
index 61dbc1afd8da..b38a8db5f252 100644
--- a/drivers/hwtracing/coresight/coresight-cti-core.c
+++ b/drivers/hwtracing/coresight/coresight-cti-core.c
@@ -870,6 +870,7 @@ static int cti_probe(struct amba_device *adev, const struct 
amba_id *id)
return PTR_ERR(base);
 
drvdata->base = base;
+   cti_desc.access = CSDEV_ACCESS_IOMEM(base);
 
dev_set_drvdata(dev, drvdata);
 
diff --git a/drivers/hwtracing/coresight/coresight-etb10.c 
b/drivers/hwtracing/coresight/coresight-etb10.c
index 0cf6f0b947b6..cc742561a986 100644
--- a/drivers/hwtracing/coresight/coresight-etb10.c
+++ b/drivers/hwtracing/coresight/coresight-etb10.c
@@ -757,6 +757,7 @@ static int etb_probe(struct amba_device *adev, const struct 
amba_id *id)
return PTR_ERR(base);
 
drvdata->base = base;
+   desc.

Re: [PATCH v6 00/26] coresight: etm4x: Support for system instructions

2021-01-08 Thread Suzuki K Poulose

Hi Mathieu,

Please hold on with this series, I will update the series, fixing the
issues you have spotted and some additional patches to prevent accesses
to all the system registers that may not be available via system instructions.

Apologies for the inconvenience

Kind regards
Suzuki

On 1/8/21 9:08 AM, Suzuki K Poulose wrote:

Hi Mathieu

On 1/8/21 1:09 AM, Mathieu Poirier wrote:

Hi Suzuki,

On Thu, Jan 07, 2021 at 12:38:33PM +, Suzuki K Poulose wrote:

CoreSight ETMv4.4 obsoletes memory mapped access to ETM and
mandates the system instructions for registers.
This also implies that they may not be on the amba bus.
Right now all the CoreSight components are accessed via memory
map. Also, we have some common routines in coresight generic
code driver (e.g, CS_LOCK, claim/disclaim), which assume the
mmio. In order to preserve the generic algorithms at a single
place and to allow dynamic switch for ETMs, this series introduces
an abstraction layer for accessing a coresight device. It is
designed such that the mmio access are fast tracked (i.e, without
an indirect function call).

This will also help us to get rid of the driver+attribute specific
sysfs show/store routines and replace them with a single routine
to access a given register offset (which can be embedded in the
dev_ext_attribute). This is not currently implemented in the series,
but can be achieved.

Further we switch the generic routines to work with the abstraction.
With this in place, we refactor the etm4x code a bit to allow for
supporting the system instructions with very little new code.

We use TRCDEVARCH for the detection of the ETM component, which
is a standard register as per CoreSight architecture, rather than
the etm specific id register TRCIDR1. This is for making sure
that we are able to detect the ETM via system instructions accurately,
when the the trace unit could be anything (etm or a custom trace unit).
To keep the backward compatibility for any existing broken
impelementation which may not implement TRCDEVARCH, we fall back to TRCIDR1.
Also this covers us for the changes in the future architecture [0].

Also, v8.4 self-hosted tracing extensions (coupled with ETMv4.4) adds
new filtering registers for trace by exception level. So on a v8.4
system, with Trace Filtering support, without the appropriate
programming of the Trace filter registers (TRFCR_ELx), tracing
will not be enabled. This series also includes the TraceFiltering
support to cover the ETM-v4.4 support.

The series has been mildly tested on a model for system instructions.
I would really appreciate any testing on real hardware.


I have queued your work in my local tree.  I will have a final pass before
pushing to coresight-next tomorrow or on Monday.



Thanks for the review and fixups. Please let me know if you need a respin.

Cheers
Suzuki




Re: [PATCH v6 00/26] coresight: etm4x: Support for system instructions

2021-01-08 Thread Suzuki K Poulose

Hi Mathieu

On 1/8/21 1:09 AM, Mathieu Poirier wrote:

Hi Suzuki,

On Thu, Jan 07, 2021 at 12:38:33PM +, Suzuki K Poulose wrote:

CoreSight ETMv4.4 obsoletes memory mapped access to ETM and
mandates the system instructions for registers.
This also implies that they may not be on the amba bus.
Right now all the CoreSight components are accessed via memory
map. Also, we have some common routines in coresight generic
code driver (e.g, CS_LOCK, claim/disclaim), which assume the
mmio. In order to preserve the generic algorithms at a single
place and to allow dynamic switch for ETMs, this series introduces
an abstraction layer for accessing a coresight device. It is
designed such that the mmio access are fast tracked (i.e, without
an indirect function call).

This will also help us to get rid of the driver+attribute specific
sysfs show/store routines and replace them with a single routine
to access a given register offset (which can be embedded in the
dev_ext_attribute). This is not currently implemented in the series,
but can be achieved.

Further we switch the generic routines to work with the abstraction.
With this in place, we refactor the etm4x code a bit to allow for
supporting the system instructions with very little new code.

We use TRCDEVARCH for the detection of the ETM component, which
is a standard register as per CoreSight architecture, rather than
the etm specific id register TRCIDR1. This is for making sure
that we are able to detect the ETM via system instructions accurately,
when the the trace unit could be anything (etm or a custom trace unit).
To keep the backward compatibility for any existing broken
impelementation which may not implement TRCDEVARCH, we fall back to TRCIDR1.
Also this covers us for the changes in the future architecture [0].

Also, v8.4 self-hosted tracing extensions (coupled with ETMv4.4) adds
new filtering registers for trace by exception level. So on a v8.4
system, with Trace Filtering support, without the appropriate
programming of the Trace filter registers (TRFCR_ELx), tracing
will not be enabled. This series also includes the TraceFiltering
support to cover the ETM-v4.4 support.

The series has been mildly tested on a model for system instructions.
I would really appreciate any testing on real hardware.


I have queued your work in my local tree.  I will have a final pass before
pushing to coresight-next tomorrow or on Monday.



Thanks for the review and fixups. Please let me know if you need a respin.

Cheers
Suzuki


Re: [PATCH 11/11] dts: bindings: Document device tree binding for Arm TRBE

2021-01-07 Thread Suzuki K Poulose

On 1/4/21 3:44 AM, Anshuman Khandual wrote:


On 1/3/21 10:35 PM, Rob Herring wrote:

On Wed, Dec 23, 2020 at 03:33:43PM +0530, Anshuman Khandual wrote:

This patch documents the device tree binding in use for Arm TRBE.

Cc: devicet...@vger.kernel.org
Cc: Mathieu Poirier 
Cc: Mike Leach 
Cc: Suzuki K Poulose 
Signed-off-by: Anshuman Khandual 
---
Changes in V1:

- TRBE DT entry has been renamed as 'arm, trace-buffer-extension'

  Documentation/devicetree/bindings/arm/trbe.txt | 20 
  1 file changed, 20 insertions(+)
  create mode 100644 Documentation/devicetree/bindings/arm/trbe.txt

diff --git a/Documentation/devicetree/bindings/arm/trbe.txt 
b/Documentation/devicetree/bindings/arm/trbe.txt
new file mode 100644
index 000..001945d
--- /dev/null
+++ b/Documentation/devicetree/bindings/arm/trbe.txt
@@ -0,0 +1,20 @@
+* Trace Buffer Extension (TRBE)
+
+Trace Buffer Extension (TRBE) is used for collecting trace data generated
+from a corresponding trace unit (ETE) using an in memory trace buffer.
+
+** TRBE Required properties:
+
+- compatible : should be one of:
+  "arm,trace-buffer-extension"
+
+- interrupts : Exactly 1 PPI must be listed. For heterogeneous systems where
+  TRBE is only supported on a subset of the CPUs, please consult
+  the arm,gic-v3 binding for details on describing a PPI partition.
+
+** Example:
+
+trbe {
+   compatible = "arm,trace-buffer-extension";
+   interrupts = ;


If only an interrupt, then could just be part of ETE? If not, how is
this hardware block accessed? An interrupt alone is not enough unless
there's some architected way to access.


TRBE hardware block is accessed via respective new system registers but the
PPI number where the IRQ will be triggered for various buffer events, would
depend on the platform as defined in the SBSA.


That is correct. TRBE is accessed via CPU system registers. The IRQ is 
specifically
for the TRBE unit to handle buffer overflow situations and other errors in the
buffer handling. Please include this information in the description section of
the bindings.

Also, it may be worth switching this to yaml format.

Suzuki


Re: [PATCH 10/11] coresight: sink: Add TRBE driver

2021-01-07 Thread Suzuki K Poulose

On 1/6/21 11:50 AM, Anshuman Khandual wrote:



On 1/5/21 5:07 PM, Suzuki K Poulose wrote:

On 1/5/21 9:29 AM, Anshuman Khandual wrote:


...


+{
+    struct trbe_buf *buf = etm_perf_sink_config(handle);
+    unsigned long offset;
+
+    if (buf->snapshot)
+    offset = trbe_snapshot_offset(handle);
+    else
+    offset = trbe_normal_offset(handle);
+    return buf->trbe_base + offset;
+}
+
+static void clear_trbe_state(void)


nit: The name doesn't give much clue about what it is doing, especially, given
the following "set_trbe_state()" which does completely different from this 
"clear"
operation.


I agree that these names could have been better.

s/clear_trbe_state/trbe_reset_perf_state  - Clears TRBE from current perf config
s/set_trbe_state/trbe_prepare_perf_state  - Prepares TRBE for the next perf 
config


Please don't tie them to "perf". This is pure hardware configuration, not perf.


Okay.



Also, I wonder if we need a separate "set_trbe_state". Could we not initialize 
the LIMITR
at one go ?


There are some limitations which could prevent that.



i.e, do something like :

set_trbe_limit_pointer(limit, mode) ?

where it sets all the fields of limit pointer. Also, you may want to document 
the mode we
choose for TRBE. i.e, FILL STOP mode for us to collect the trace.


Sure, will document the TRBE mode being choosen here.








I would rather open code this with a write of 0 to trbsr in the caller.


+{
+    u64 trbsr = read_sysreg_s(SYS_TRBSR_EL1);
+
+    WARN_ON(is_trbe_enabled());
+    trbsr &= ~TRBSR_IRQ;
+    trbsr &= ~TRBSR_TRG;
+    trbsr &= ~TRBSR_WRAP;
+    trbsr &= ~(TRBSR_EC_MASK << TRBSR_EC_SHIFT);
+    trbsr &= ~(TRBSR_BSC_MASK << TRBSR_BSC_SHIFT);
+    trbsr &= ~(TRBSR_FSC_MASK << TRBSR_FSC_SHIFT);


BSC and FSC are the same fields under MSS, with their meanings determined by 
the EC field.


Could just drop the FSC part if required.



Could we simply write 0 to the register ?


I would really like to avoid that. This function clearly enumerates all
individual bit fields being cleared for resetting as well as preparing
the TRBE for the next perf session. Converting this into a 0 write for
SYS_TRBSR_EL1 sounds excessive and the only thing it would save is the
register read.







+    write_sysreg_s(trbsr, SYS_TRBSR_EL1);
+}
+
+static void set_trbe_state(void)
+{
+    u64 trblimitr = read_sysreg_s(SYS_TRBLIMITR_EL1);
+
+    trblimitr &= ~TRBLIMITR_NVM;
+    trblimitr &= ~(TRBLIMITR_FILL_MODE_MASK << TRBLIMITR_FILL_MODE_SHIFT);
+    trblimitr &= ~(TRBLIMITR_TRIG_MODE_MASK << TRBLIMITR_TRIG_MODE_SHIFT);
+    trblimitr |= (TRBE_FILL_STOP & TRBLIMITR_FILL_MODE_MASK) << 
TRBLIMITR_FILL_MODE_SHIFT;
+    trblimitr |= (TRBE_TRIGGER_IGNORE & TRBLIMITR_TRIG_MODE_MASK) << 
TRBLIMITR_TRIG_MODE_SHIFT;
+    write_sysreg_s(trblimitr, SYS_TRBLIMITR_EL1);


Do we need to read-copy-update here ? Could we simply write 0 ?
Same as above comment, could we not simply opencode it at the caller ?
Clearly the names don't help.


Will change the names as proposed or something better. But lets leave
these functions as is. Besides TRBE_TRIGGER_IGNORE also has a positive
value (i.e 3), writing all 0s into SYS_TRBLIMITR_EL1 will not be ideal.



The point is, we don't need to preserve the values for LIMITR. Also see my 
comment
above, for folding this to set_trbe_limit_pointer(). In any case, I don't think
we should rely on the values of fields we change. So it is safer and cleaner to
set set all the bits for LIMITR, including the LIMIT address in one go, without
ready-copy-update.


TRBE needs to be disabled (which is also in the LIMIT register) before we can 
update
any other fields in the LIMIT register. So there is already an order dependency 
here.
Looking at the function trbe_enable_hw(), it follows something like

1. Clear and set the TRBE mode  - followed by an isb()
2. Update the TRBE pointers - followed by an isb()
3. Set it rolling   - followed by TSB_CSYNC

static void trbe_enable_hw(struct trbe_buf *buf)
{

[Software checks]
 WARN_ON(buf->trbe_write < buf->trbe_base);
 WARN_ON(buf->trbe_write >= buf->trbe_limit);

[Disable TRBE in the limit register]
 set_trbe_disabled();



We need an isb() here.


[Clears TRBE status register]
 trbe_reset_perf_state();


Please be explicit here. Make the function name reflect the fact that
we are simply clearing the status register and nothing related to perf.



[Configures TRBE mode in the limit register]
 trbe_prepare_perf_state();


This is unnecessarily introducing a dependency not enforced by the HW.
You could program the LIMIT register with all the setting, mode, limit
and *enable TBRE* once we have programmed base and write pointer at one
shot.



 isb();


Drop the ISB



[Update all

<    1   2   3   4   5   6   7   8   9   10   >