[PATCH V8 06/10] powerpc, lib: Add new branch analysis support functions

2015-06-08 Thread Anshuman Khandual
Generic powerpc branch analysis support added in the code patching
library which will help the subsequent patch on SW based filtering
of branch records in perf.

Signed-off-by: Anshuman Khandual 
---
 arch/powerpc/include/asm/code-patching.h | 15 
 arch/powerpc/lib/code-patching.c | 66 
 2 files changed, 81 insertions(+)

diff --git a/arch/powerpc/include/asm/code-patching.h 
b/arch/powerpc/include/asm/code-patching.h
index 840a550..0a6f0d8 100644
--- a/arch/powerpc/include/asm/code-patching.h
+++ b/arch/powerpc/include/asm/code-patching.h
@@ -22,6 +22,16 @@
 #define BRANCH_SET_LINK0x1
 #define BRANCH_ABSOLUTE0x2
 
+#define XL_FORM_LR  0x4C20
+#define XL_FORM_CTR 0x4C000420
+#define XL_FORM_TAR 0x4C000460
+
+#define BO_ALWAYS0x0280
+#define BO_CTR   0x0200
+#define BO_CRBI_OFF  0x0080
+#define BO_CRBI_ON   0x0180
+#define BO_CRBI_HINT 0x0040
+
 unsigned int create_branch(const unsigned int *addr,
   unsigned long target, int flags);
 unsigned int create_cond_branch(const unsigned int *addr,
@@ -99,4 +109,9 @@ static inline unsigned long ppc_global_function_entry(void 
*func)
 #endif
 }
 
+bool instr_is_return_branch(unsigned int instr);
+bool instr_is_conditional_branch(unsigned int instr);
+bool instr_is_func_call(unsigned int instr);
+bool instr_is_indirect_func_call(unsigned int instr);
+
 #endif /* _ASM_POWERPC_CODE_PATCHING_H */
diff --git a/arch/powerpc/lib/code-patching.c b/arch/powerpc/lib/code-patching.c
index d5edbeb..15b7b88 100644
--- a/arch/powerpc/lib/code-patching.c
+++ b/arch/powerpc/lib/code-patching.c
@@ -87,6 +87,72 @@ static int instr_is_branch_bform(unsigned int instr)
return branch_opcode(instr) == 16;
 }
 
+static int instr_is_branch_xlform(unsigned int instr)
+{
+   return branch_opcode(instr) == 19;
+}
+
+static int is_xlform_lr(unsigned int instr)
+{
+   return (instr & XL_FORM_LR) == XL_FORM_LR;
+}
+
+static int is_bo_always(unsigned int instr)
+{
+   return (instr & BO_ALWAYS) == BO_ALWAYS;
+}
+
+static int is_branch_link_set(unsigned int instr)
+{
+   return (instr & BRANCH_SET_LINK) == BRANCH_SET_LINK;
+}
+
+bool instr_is_return_branch(unsigned int instr)
+{
+   /*
+* Conditional and unconditional branch to LR register
+* without seting the link register.
+*/
+   if (is_xlform_lr(instr) && !is_branch_link_set(instr))
+   return true;
+
+   return false;
+}
+
+bool instr_is_conditional_branch(unsigned int instr)
+{
+   /* I-form instruction - excluded */
+   if (instr_is_branch_iform(instr))
+   return false;
+
+   /* B-form or XL-form instruction */
+   if (instr_is_branch_bform(instr) || instr_is_branch_xlform(instr))  {
+
+   /* Not branch always */
+   if (!is_bo_always(instr))
+   return true;
+   }
+   return false;
+}
+
+bool instr_is_func_call(unsigned int instr)
+{
+   /* LR should be set */
+   if (is_branch_link_set(instr))
+   return true;
+
+   return false;
+}
+
+bool instr_is_indirect_func_call(unsigned int instr)
+{
+   /* XL-form instruction with LR set */
+   if (instr_is_branch_xlform(instr) && is_branch_link_set(instr))
+   return true;
+
+   return false;
+}
+
 int instr_is_relative_branch(unsigned int instr)
 {
if (instr & BRANCH_ABSOLUTE)
-- 
2.1.0

___
Linuxppc-dev mailing list
Linuxppc-dev@lists.ozlabs.org
https://lists.ozlabs.org/listinfo/linuxppc-dev

[PATCH V8 05/10] powerpc, perf: Change the name of HW PMU branch filter tracking variable

2015-06-08 Thread Anshuman Khandual
This patch simply changes the name of the variable from 'bhrb_filter' to
'bhrb_hw_filter' in order to add one more variable which will track SW
filters in generic powerpc book3s code which will be implemented in the
subsequent patch. This patch does not change any functionality.

Signed-off-by: Anshuman Khandual 
---
 arch/powerpc/perf/core-book3s.c | 10 +-
 1 file changed, 5 insertions(+), 5 deletions(-)

diff --git a/arch/powerpc/perf/core-book3s.c b/arch/powerpc/perf/core-book3s.c
index d10d2c1..080b038 100644
--- a/arch/powerpc/perf/core-book3s.c
+++ b/arch/powerpc/perf/core-book3s.c
@@ -52,7 +52,7 @@ struct cpu_hw_events {
int n_txn_start;
 
/* BHRB bits */
-   u64 bhrb_filter;/* BHRB HW branch 
filter */
+   u64 bhrb_hw_filter; /* BHRB HW filter */
int bhrb_users;
void*bhrb_context;
struct  perf_branch_stack   bhrb_stack;
@@ -1346,7 +1346,7 @@ static void power_pmu_enable(struct pmu *pmu)
 
mb();
if (cpuhw->bhrb_users)
-   ppmu->config_bhrb(cpuhw->bhrb_filter);
+   ppmu->config_bhrb(cpuhw->bhrb_hw_filter);
 
write_mmcr0(cpuhw, mmcr0);
 
@@ -1454,7 +1454,7 @@ nocheck:
  out:
if (has_branch_stack(event)) {
power_pmu_bhrb_enable(event);
-   cpuhw->bhrb_filter = ppmu->bhrb_filter_map(
+   cpuhw->bhrb_hw_filter = ppmu->bhrb_filter_map(
event->attr.branch_sample_type);
}
 
@@ -1839,10 +1839,10 @@ static int power_pmu_event_init(struct perf_event 
*event)
err = power_check_constraints(cpuhw, events, cflags, n + 1);
 
if (has_branch_stack(event)) {
-   cpuhw->bhrb_filter = ppmu->bhrb_filter_map(
+   cpuhw->bhrb_hw_filter = ppmu->bhrb_filter_map(
event->attr.branch_sample_type);
 
-   if (cpuhw->bhrb_filter == -1) {
+   if (cpuhw->bhrb_hw_filter == -1) {
put_cpu_var(cpu_hw_events);
return -EOPNOTSUPP;
}
-- 
2.1.0

___
Linuxppc-dev mailing list
Linuxppc-dev@lists.ozlabs.org
https://lists.ozlabs.org/listinfo/linuxppc-dev

[PATCH V8 10/10] selftests, powerpc: Add test for BHRB branch filters (HW & SW)

2015-06-08 Thread Anshuman Khandual
From: "khand...@linux.vnet.ibm.com" 

This patch adds a test for verifying that all the branch stack
sampling filters supported on powerpc works correctly. It also
adds some assembly helper functions in this regard. This patch
extends the generic event description to handle kernel mapped
ring buffers.

Signed-off-by: Anshuman Khandual 
---
 tools/testing/selftests/powerpc/pmu/Makefile   |  11 +-
 tools/testing/selftests/powerpc/pmu/bhrb/Makefile  |  13 +
 .../selftests/powerpc/pmu/bhrb/bhrb_filters.c  | 513 +
 .../selftests/powerpc/pmu/bhrb/bhrb_filters.h  |  16 +
 .../selftests/powerpc/pmu/bhrb/bhrb_filters_asm.S  | 260 +++
 tools/testing/selftests/powerpc/pmu/event.h|   5 +
 6 files changed, 816 insertions(+), 2 deletions(-)
 create mode 100644 tools/testing/selftests/powerpc/pmu/bhrb/Makefile
 create mode 100644 tools/testing/selftests/powerpc/pmu/bhrb/bhrb_filters.c
 create mode 100644 tools/testing/selftests/powerpc/pmu/bhrb/bhrb_filters.h
 create mode 100644 tools/testing/selftests/powerpc/pmu/bhrb/bhrb_filters_asm.S

diff --git a/tools/testing/selftests/powerpc/pmu/Makefile 
b/tools/testing/selftests/powerpc/pmu/Makefile
index a9099d9..2e103fd 100644
--- a/tools/testing/selftests/powerpc/pmu/Makefile
+++ b/tools/testing/selftests/powerpc/pmu/Makefile
@@ -4,7 +4,7 @@ noarg:
 TEST_PROGS := count_instructions l3_bank_test per_event_excludes
 EXTRA_SOURCES := ../harness.c event.c lib.c
 
-all: $(TEST_PROGS) ebb
+all: $(TEST_PROGS) ebb bhrb
 
 $(TEST_PROGS): $(EXTRA_SOURCES)
 
@@ -18,25 +18,32 @@ DEFAULT_RUN_TESTS := $(RUN_TESTS)
 override define RUN_TESTS
$(DEFAULT_RUN_TESTS)
$(MAKE) -C ebb run_tests
+   $(MAKE) -C bhrb run_tests
 endef
 
 DEFAULT_EMIT_TESTS := $(EMIT_TESTS)
 override define EMIT_TESTS
$(DEFAULT_EMIT_TESTS)
$(MAKE) -s -C ebb emit_tests
+   $(MAKE) -s -C bhrb emit_tests
 endef
 
 DEFAULT_INSTALL_RULE := $(INSTALL_RULE)
 override define INSTALL_RULE
$(DEFAULT_INSTALL_RULE)
$(MAKE) -C ebb install
+   $(MAKE) -C bhrb install
 endef
 
 clean:
rm -f $(TEST_PROGS) loop.o
$(MAKE) -C ebb clean
+   $(MAKE) -C bhrb clean
 
 ebb:
$(MAKE) -k -C $@ all
 
-.PHONY: all run_tests clean ebb
+bhrb:
+   $(MAKE) -k -C $@ all
+
+.PHONY: all run_tests clean ebb bhrb
diff --git a/tools/testing/selftests/powerpc/pmu/bhrb/Makefile 
b/tools/testing/selftests/powerpc/pmu/bhrb/Makefile
new file mode 100644
index 000..61c032a
--- /dev/null
+++ b/tools/testing/selftests/powerpc/pmu/bhrb/Makefile
@@ -0,0 +1,13 @@
+noarg:
+   $(MAKE) -C ../../
+
+TEST_PROGS := bhrb_filters
+
+all: $(TEST_PROGS)
+
+$(TEST_PROGS): ../../harness.c ../event.c ../lib.c bhrb_filters_asm.S
+
+include ../../../lib.mk
+
+clean:
+   rm -f $(TEST_PROGS)
diff --git a/tools/testing/selftests/powerpc/pmu/bhrb/bhrb_filters.c 
b/tools/testing/selftests/powerpc/pmu/bhrb/bhrb_filters.c
new file mode 100644
index 000..13e6b72
--- /dev/null
+++ b/tools/testing/selftests/powerpc/pmu/bhrb/bhrb_filters.c
@@ -0,0 +1,513 @@
+/*
+ * BHRB filter test (HW & SW)
+ *
+ * Copyright 2015 Anshuman Khandual, IBM Corporation.
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License
+ * as published by the Free Software Foundation; either version
+ * 2 of the License, or (at your option) any later version.
+ */
+#include 
+#include 
+#include 
+#include 
+#include 
+#include 
+#include 
+#include 
+#include 
+
+#include "bhrb_filters.h"
+#include "utils.h"
+#include "../event.h"
+#include "../lib.h"
+
+/* Fetched address counts */
+#define ALL_MAX32
+#define CALL_MAX   12
+#define RET_MAX10
+#define COND_MAX   8
+#define IND_MAX4
+
+/* Test tunables */
+#define LOOP_COUNT 10
+#define SAMPLE_PERIOD  1
+
+static int branch_sample_type;
+static int branch_test_set[27] = {
+   PERF_SAMPLE_BRANCH_ANY_CALL,/* Single filters */
+   PERF_SAMPLE_BRANCH_ANY_RETURN,
+   PERF_SAMPLE_BRANCH_COND,
+   PERF_SAMPLE_BRANCH_IND_CALL,
+   PERF_SAMPLE_BRANCH_ANY,
+
+   PERF_SAMPLE_BRANCH_ANY_CALL |   /* Double filters */
+   PERF_SAMPLE_BRANCH_ANY_RETURN,
+   PERF_SAMPLE_BRANCH_ANY_CALL |
+   PERF_SAMPLE_BRANCH_COND,
+   PERF_SAMPLE_BRANCH_ANY_CALL |
+   PERF_SAMPLE_BRANCH_IND_CALL,
+   PERF_SAMPLE_BRANCH_ANY_CALL |
+   PERF_SAMPLE_BRANCH_ANY,
+
+   PERF_SAMPLE_BRANCH_ANY_RETURN |
+   PERF_SAMPLE_BRANCH_COND,
+   PERF_SAMPLE_BRANCH_ANY_RETURN |
+   PERF_SAMPLE_BRANCH_IND_CALL,
+   PERF_SAMPLE_BRANCH_ANY_RETURN |
+   PERF_SAMPLE_BRANCH_ANY,
+
+   PERF_SAMPLE_BRA

[PATCH V8 01/10] powerpc, perf: Drop the branch sample when 'from' cannot be fetched

2015-06-08 Thread Anshuman Khandual
BHRB is a rolling buffer. Hence we might end up in a situation where
we have read one target address but when we try to read the next entry
indicating the from address of the targe, the buffer just overflows.
In this case, the captured from address will be zero which indicates
the end of the buffer.

This patch drops the entire branch record which would have otherwise
confused the user space tools.

Signed-off-by: Anshuman Khandual 
---
 arch/powerpc/perf/core-book3s.c | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

diff --git a/arch/powerpc/perf/core-book3s.c b/arch/powerpc/perf/core-book3s.c
index 12b6384..c246e65 100644
--- a/arch/powerpc/perf/core-book3s.c
+++ b/arch/powerpc/perf/core-book3s.c
@@ -452,7 +452,6 @@ static void power_pmu_bhrb_read(struct cpu_hw_events *cpuhw)
 *In this case we need to read the instruction from
 *memory to determine the target/to address.
 */
-
if (val & BHRB_TARGET) {
/* Target branches use two entries
 * (ie. computed gotos/XL form)
@@ -463,6 +462,8 @@ static void power_pmu_bhrb_read(struct cpu_hw_events *cpuhw)
 
/* Get from address in next entry */
val = read_bhrb(r_index++);
+   if (!val)
+   break;
addr = val & BHRB_EA;
if (val & BHRB_TARGET) {
/* Shouldn't have two targets in a
-- 
2.1.0

___
Linuxppc-dev mailing list
Linuxppc-dev@lists.ozlabs.org
https://lists.ozlabs.org/listinfo/linuxppc-dev

[PATCH V8 04/10] powerpc, perf: Re organize PMU based branch filter processing in POWER8

2015-06-08 Thread Anshuman Khandual
This patch does some code re-arrangements to make it clear that it ignores
any separate privilege level branch filter request and does not support
any combinations of HW PMU branch filters.

Signed-off-by: Anshuman Khandual 
---
 arch/powerpc/perf/power8-pmu.c | 22 +++---
 1 file changed, 7 insertions(+), 15 deletions(-)

diff --git a/arch/powerpc/perf/power8-pmu.c b/arch/powerpc/perf/power8-pmu.c
index 396351d..a6c6a2c 100644
--- a/arch/powerpc/perf/power8-pmu.c
+++ b/arch/powerpc/perf/power8-pmu.c
@@ -656,8 +656,6 @@ static int power8_generic_events[] = {
 
 static u64 power8_bhrb_filter_map(u64 branch_sample_type)
 {
-   u64 pmu_bhrb_filter = 0;
-
/* BHRB and regular PMU events share the same privilege state
 * filter configuration. BHRB is always recorded along with a
 * regular PMU event. As the privilege state filter is handled
@@ -665,21 +663,15 @@ static u64 power8_bhrb_filter_map(u64 branch_sample_type)
 * PMU event, we ignore any separate BHRB specific request.
 */
 
-   /* No branch filter requested */
-   if (branch_sample_type & PERF_SAMPLE_BRANCH_ANY)
-   return pmu_bhrb_filter;
-
-   /* Invalid branch filter options - HW does not support */
-   if (branch_sample_type & PERF_SAMPLE_BRANCH_ANY_RETURN)
-   return -1;
+   /* Ignore user, kernel, hv bits */
+   branch_sample_type &= ~PERF_SAMPLE_BRANCH_PLM_ALL;
 
-   if (branch_sample_type & PERF_SAMPLE_BRANCH_IND_CALL)
-   return -1;
+   /* No branch filter requested */
+   if (branch_sample_type == PERF_SAMPLE_BRANCH_ANY)
+   return 0;
 
-   if (branch_sample_type & PERF_SAMPLE_BRANCH_ANY_CALL) {
-   pmu_bhrb_filter |= POWER8_MMCRA_IFM1;
-   return pmu_bhrb_filter;
-   }
+   if (branch_sample_type == PERF_SAMPLE_BRANCH_ANY_CALL)
+   return POWER8_MMCRA_IFM1;
 
/* Every thing else is unsupported */
return -1;
-- 
2.1.0

___
Linuxppc-dev mailing list
Linuxppc-dev@lists.ozlabs.org
https://lists.ozlabs.org/listinfo/linuxppc-dev

[PATCH V8 07/10] powerpc, perf: Enable SW filtering in branch stack sampling framework

2015-06-08 Thread Anshuman Khandual
This patch enables SW based post processing of BHRB captured branches
to be able to meet more user defined branch filtration criteria in perf
branch stack sampling framework. These changes increase the number of
branch filters and their valid combinations on any powerpc64 server
platform with BHRB support. Find the summary of code changes here.

(1) struct cpu_hw_event

Introduced two new variables track various filter values and mask

(a) bhrb_sw_filter  Tracks SW implemented branch filter flags
(b) bhrb_filter Tracks both (SW and HW) branch filter flags

(2) Event creation

Kernel will figure out supported BHRB branch filters through a PMU
call back 'bhrb_filter_map'. This function will find out how many of
the requested branch filters can be supported in the PMU HW. It will
not try to invalidate any branch filter combinations. Event creation
will not error out because of lack of HW based branch filters.
Meanwhile it will track the overall supported branch filters in the
'bhrb_filter' variable.

Once the PMU call back returns kernel will process the user branch
filter request against available SW filters (bhrb_sw_filter_map) while
looking at the 'bhrb_filter'. During this phase all the branch filters
which are still pending from the user requested list will have to be
supported in SW failing which the event creation will error out.

(3) SW branch filter

During the BHRB data capture inside the PMU interrupt context, each
of the captured 'perf_branch_entry.from' will be checked for compliance
with applicable SW branch filters. If the entry does not conform to the
filter requirements, it will be discarded from the final perf branch
stack buffer.

(4) Supported SW based branch filters

(a) PERF_SAMPLE_BRANCH_ANY_RETURN
(b) PERF_SAMPLE_BRANCH_IND_CALL
(c) PERF_SAMPLE_BRANCH_ANY_CALL
(d) PERF_SAMPLE_BRANCH_COND

Please refer the patch to understand the classification of instructions
into these branch filter categories.

(5) Multiple branch filter semantics

Book3 sever implementation follows the same OR semantics (as implemented
in x86) while dealing with multiple branch filters at any point of time.
SW branch filter analysis is carried on the data set captured in the PMU
HW. So the resulting set of data (after applying the SW filters) will
inherently be an AND with the HW captured set. Hence any combination of
HW and SW branch filters will be invalid. HW based branch filters are
more efficient and faster compared to SW implemented branch filters. So
at first the PMU should decide whether it can support all the requested
branch filters itself or not. In case it can support all the branch
filters in an OR manner, we dont apply any SW branch filter on top of the
HW captured set (which is the final set). This preserves the OR semantic
of multiple branch filters as required. But in case where the PMU cannot
support all the requested branch filters in an OR manner, it should not
apply any it's filters and leave it upto the SW to handle them all. Its
the PMU code's responsibility to uphold this protocol to be able to
conform to the overall OR semantic of perf branch stack sampling framework.

Signed-off-by: Anshuman Khandual 
---
 arch/powerpc/include/asm/perf_event_server.h |   7 +-
 arch/powerpc/perf/core-book3s.c  | 188 ++-
 arch/powerpc/perf/power8-pmu.c   |   2 +-
 3 files changed, 191 insertions(+), 6 deletions(-)

diff --git a/arch/powerpc/include/asm/perf_event_server.h 
b/arch/powerpc/include/asm/perf_event_server.h
index 8146221..cb7ca1a 100644
--- a/arch/powerpc/include/asm/perf_event_server.h
+++ b/arch/powerpc/include/asm/perf_event_server.h
@@ -38,7 +38,8 @@ struct power_pmu {
unsigned long *valp);
int (*get_alternatives)(u64 event_id, unsigned int flags,
u64 alt[]);
-   u64 (*bhrb_filter_map)(u64 branch_sample_type);
+   u64 (*bhrb_filter_map)(u64 branch_sample_type,
+   u64 *bhrb_filter);
void(*config_bhrb)(u64 pmu_bhrb_filter);
void(*disable_pmc)(unsigned int pmc, unsigned long mmcr[]);
int (*limited_pmc_event)(u64 event_id);
@@ -80,6 +81,10 @@ extern unsigned long perf_misc_flags(struct pt_regs *regs);
 extern unsigned long perf_instruction_pointer(struct pt_regs *regs);
 extern unsigned long int read_bhrb(int n);
 
+#define for_each_branch_sample_type(x) \
+   for ((x) = PERF_SAMPLE_BRANCH_USER; \
+   (x) < PERF_SAMPLE_BRANCH_MAX; (x) <<= 1)
+
 /*
  * Only override the default definitions in include/linux/perf_event.h
  * if we have hardware PMU support.
diff --git a/arch/powerpc/perf/core-book3s.c b/arch/powerpc/perf/core-book3s.c
index 080b038..9a682c9 100644
--- a/arch/powerpc/perf/core-book3s.c
+++ b/arch/powerp

[PATCH V8 02/10] powerpc, perf: Restore privillege level filter support for BHRB

2015-06-08 Thread Anshuman Khandual
From: "khand...@linux.vnet.ibm.com" 

'commit 9de5cb0f6df8 ("powerpc/perf: Add per-event excludes on Power8")'
broke the PMU based BHRB privilege level filter. BHRB depends on the
same MMCR0 bits for privilege level filter which was used to freeze all
the PMCs as a group. Once we moved to individual event based privilege
filters through MMCR2 register on POWER8, event associated privilege
filters are no longer applicable to the BHRB captured branches.

This patch solves the problem by restoring to the previous method of
privilege level filters for the event in case BHRB based branch stack
sampling is requested. This patch also changes 'check_excludes' for
the same reason.

Signed-off-by: Anshuman Khandual 
---
 arch/powerpc/perf/core-book3s.c | 19 +++
 1 file changed, 11 insertions(+), 8 deletions(-)

diff --git a/arch/powerpc/perf/core-book3s.c b/arch/powerpc/perf/core-book3s.c
index c246e65..ae61629 100644
--- a/arch/powerpc/perf/core-book3s.c
+++ b/arch/powerpc/perf/core-book3s.c
@@ -930,7 +930,7 @@ static int power_check_constraints(struct cpu_hw_events 
*cpuhw,
  * added events.
  */
 static int check_excludes(struct perf_event **ctrs, unsigned int cflags[],
- int n_prev, int n_new)
+ int n_prev, int n_new, int bhrb_users)
 {
int eu = 0, ek = 0, eh = 0;
int i, n, first;
@@ -941,7 +941,7 @@ static int check_excludes(struct perf_event **ctrs, 
unsigned int cflags[],
 * don't need to do any of this logic. NB. This assumes no PMU has both
 * per event exclude and limited PMCs.
 */
-   if (ppmu->flags & PPMU_ARCH_207S)
+   if ((ppmu->flags & PPMU_ARCH_207S) && !bhrb_users)
return 0;
 
n = n_prev + n_new;
@@ -1259,7 +1259,7 @@ static void power_pmu_enable(struct pmu *pmu)
goto out;
}
 
-   if (!(ppmu->flags & PPMU_ARCH_207S)) {
+   if (!(ppmu->flags & PPMU_ARCH_207S) || cpuhw->bhrb_users) {
/*
 * Add in MMCR0 freeze bits corresponding to the attr.exclude_*
 * bits for the first event. We have already checked that all
@@ -1284,7 +1284,7 @@ static void power_pmu_enable(struct pmu *pmu)
mtspr(SPRN_MMCR1, cpuhw->mmcr[1]);
mtspr(SPRN_MMCR0, (cpuhw->mmcr[0] & ~(MMCR0_PMC1CE | MMCR0_PMCjCE))
| MMCR0_FC);
-   if (ppmu->flags & PPMU_ARCH_207S)
+   if ((ppmu->flags & PPMU_ARCH_207S) && !cpuhw->bhrb_users)
mtspr(SPRN_MMCR2, cpuhw->mmcr[3]);
 
/*
@@ -1436,7 +1436,8 @@ static int power_pmu_add(struct perf_event *event, int 
ef_flags)
if (cpuhw->group_flag & PERF_EVENT_TXN)
goto nocheck;
 
-   if (check_excludes(cpuhw->event, cpuhw->flags, n0, 1))
+   if (check_excludes(cpuhw->event, cpuhw->flags,
+   n0, 1, cpuhw->bhrb_users))
goto out;
if (power_check_constraints(cpuhw, cpuhw->events, cpuhw->flags, n0 + 1))
goto out;
@@ -1615,7 +1616,7 @@ static int power_pmu_commit_txn(struct pmu *pmu)
return -EAGAIN;
cpuhw = this_cpu_ptr(&cpu_hw_events);
n = cpuhw->n_events;
-   if (check_excludes(cpuhw->event, cpuhw->flags, 0, n))
+   if (check_excludes(cpuhw->event, cpuhw->flags, 0, n, cpuhw->bhrb_users))
return -EAGAIN;
i = power_check_constraints(cpuhw, cpuhw->events, cpuhw->flags, n);
if (i < 0)
@@ -1828,10 +1829,12 @@ static int power_pmu_event_init(struct perf_event 
*event)
events[n] = ev;
ctrs[n] = event;
cflags[n] = flags;
-   if (check_excludes(ctrs, cflags, n, 1))
+   cpuhw = &get_cpu_var(cpu_hw_events);
+   if (check_excludes(ctrs, cflags, n, 1, cpuhw->bhrb_users)) {
+   put_cpu_var(cpu_hw_events);
return -EINVAL;
+   }
 
-   cpuhw = &get_cpu_var(cpu_hw_events);
err = power_check_constraints(cpuhw, events, cflags, n + 1);
 
if (has_branch_stack(event)) {
-- 
2.1.0

___
Linuxppc-dev mailing list
Linuxppc-dev@lists.ozlabs.org
https://lists.ozlabs.org/listinfo/linuxppc-dev

Re: [PATCH V8 10/10] selftests, powerpc: Add test for BHRB branch filters (HW & SW)

2015-06-08 Thread Anshuman Khandual
On 06/08/2015 05:08 PM, Anshuman Khandual wrote:
> From: "khand...@linux.vnet.ibm.com" 

This should be "Anshuman Khandual " and it happened
to couple of other patches in this series as well. I believe it got messed up in
a test machine, will fix it next time around.

___
Linuxppc-dev mailing list
Linuxppc-dev@lists.ozlabs.org
https://lists.ozlabs.org/listinfo/linuxppc-dev

Re: [PATCH V8 02/10] powerpc, perf: Restore privillege level filter support for BHRB

2015-06-10 Thread Anshuman Khandual
On 06/10/2015 09:13 AM, Daniel Axtens wrote:
> In the subject line, privilege should only have 1 l, and I think it
> should probably start with "powerpc/perf:" rather than "powerpc, perf:".

Will fix the typo here. Have been using "powerpc, perf:" format for some
time now :) Seems to be more cleaner compared to "powerpc/perf:" format.
But again its subjective.

 > > On Mon, 2015-06-08 at 17:08 +0530, Anshuman Khandual wrote:
>> From: "khand...@linux.vnet.ibm.com" 
>>
>> 'commit 9de5cb0f6df8 ("powerpc/perf: Add per-event excludes on Power8")'
> Does this need a 'Fixes:' tag then?

Not really, it only fixes the BHRB privilege request cases not other
scenarios which are impacted by this previous commit.
 
> 
>> broke the PMU based BHRB privilege level filter. BHRB depends on the
>> same MMCR0 bits for privilege level filter which was used to freeze all
>> the PMCs as a group. Once we moved to individual event based privilege
>> filters through MMCR2 register on POWER8, event associated privilege
>> filters are no longer applicable to the BHRB captured branches.
>>
>> This patch solves the problem by restoring to the previous method of
>> privilege level filters for the event in case BHRB based branch stack
>> sampling is requested. This patch also changes 'check_excludes' for
>> the same reason.
>>
>> Signed-off-by: Anshuman Khandual 
>> ---
>>  arch/powerpc/perf/core-book3s.c | 19 +++
>>  1 file changed, 11 insertions(+), 8 deletions(-)
>>
>> diff --git a/arch/powerpc/perf/core-book3s.c 
>> b/arch/powerpc/perf/core-book3s.c
>> index c246e65..ae61629 100644
>> --- a/arch/powerpc/perf/core-book3s.c
>> +++ b/arch/powerpc/perf/core-book3s.c
>> @@ -930,7 +930,7 @@ static int power_check_constraints(struct cpu_hw_events 
>> *cpuhw,
>>   * added events.
>>   */
> Does this comment need to be updated?

Not really. The previous commit did not update it, hence this patch would
skip it as well.

>>  static int check_excludes(struct perf_event **ctrs, unsigned int cflags[],
>> -  int n_prev, int n_new)
>> +  int n_prev, int n_new, int bhrb_users)
>>  {
>>  int eu = 0, ek = 0, eh = 0;
>>  int i, n, first;
>> @@ -941,7 +941,7 @@ static int check_excludes(struct perf_event **ctrs, 
>> unsigned int cflags[],
>>   * don't need to do any of this logic. NB. This assumes no PMU has both
>>   * per event exclude and limited PMCs.
>>   */
> Likewise, does this comment need to be updated?

Yeah, will update it.

>> -if (ppmu->flags & PPMU_ARCH_207S)
>> +if ((ppmu->flags & PPMU_ARCH_207S) && !bhrb_users)
>>  return 0;
>>  
>>  n = n_prev + n_new;
>> @@ -1259,7 +1259,7 @@ static void power_pmu_enable(struct pmu *pmu)
>>  goto out;
>>  }
>>  
>> -if (!(ppmu->flags & PPMU_ARCH_207S)) {
>> +if (!(ppmu->flags & PPMU_ARCH_207S) || cpuhw->bhrb_users)

> You're using cpuhw->bhrb_users as a bool here, where it's an int. Could
> you make the test more specific so that it's clear exactly what you're
> expecting bhrb_users to contain?

Using cpuhw->bhrb_users as a bool just verifies whether it contains
zero or non-zero value in it. The test seems to be doing that as
expected. But yes, we can move it as a nested conditional block as
well if that is better.

>>  {
>>  /*
>>   * Add in MMCR0 freeze bits corresponding to the attr.exclude_*
>>   * bits for the first event. We have already checked that all
>> @@ -1284,7 +1284,7 @@ static void power_pmu_enable(struct pmu *pmu)
>>  mtspr(SPRN_MMCR1, cpuhw->mmcr[1]);
>>  mtspr(SPRN_MMCR0, (cpuhw->mmcr[0] & ~(MMCR0_PMC1CE | MMCR0_PMCjCE))
>>  | MMCR0_FC);
>> -if (ppmu->flags & PPMU_ARCH_207S)
>> +if ((ppmu->flags & PPMU_ARCH_207S) && !cpuhw->bhrb_users)
>>  mtspr(SPRN_MMCR2, cpuhw->mmcr[3]);
>>  
>>  /*
>> @@ -1436,7 +1436,8 @@ static int power_pmu_add(struct perf_event *event, int 
>> ef_flags)
>>  if (cpuhw->group_flag & PERF_EVENT_TXN)
>>  goto nocheck;
>>  
>> -if (check_excludes(cpuhw->event, cpuhw->flags, n0, 1))
>> +if (check_excludes(cpuhw->event, cpuhw->flags,
>> +n0, 1, cpuhw->bhrb_users))
>>  goto out;
>>  if (power_check_constr

Re: [PATCH V8 04/10] powerpc, perf: Re organize PMU based branch filter processing in POWER8

2015-06-10 Thread Anshuman Khandual
On 06/10/2015 10:37 AM, Daniel Axtens wrote:
> 
>>  /* Every thing else is unsupported */
>>  return -1;
> You're returning -1 as a unsigned 64bit number. Other code that reads
> this value tests for -1 and I think it works everywhere just because it
> wraps around consistently. But I would still rather not do this and I'm
> surprised it doesn't throw a warning.

Thats the existing code which is going away with this patch series.


___
Linuxppc-dev mailing list
Linuxppc-dev@lists.ozlabs.org
https://lists.ozlabs.org/listinfo/linuxppc-dev

Re: [PATCH V8 06/10] powerpc, lib: Add new branch analysis support functions

2015-06-10 Thread Anshuman Khandual
On 06/10/2015 11:03 AM, Daniel Axtens wrote:
> 
>> +static int instr_is_branch_xlform(unsigned int instr)
>> +{
>> +return branch_opcode(instr) == 19;
>> +}
> Why do these not return bool? The functions below do.

Yeah they can, will change it.

>> +
>> +bool instr_is_indirect_func_call(unsigned int instr)
>> +{
>> +/* XL-form instruction with LR set */
>> +if (instr_is_branch_xlform(instr) && is_branch_link_set(instr))
>> +return true;
>> +
>> +return false;
>> +}

> Both of these functions could be made into a single 'return' statement,
> right?

Yeah, right.

___
Linuxppc-dev mailing list
Linuxppc-dev@lists.ozlabs.org
https://lists.ozlabs.org/listinfo/linuxppc-dev

Re: [PATCH V8 03/10] powerpc, perf: Re organize BHRB processing

2015-06-10 Thread Anshuman Khandual
On 06/10/2015 10:06 AM, Daniel Axtens wrote:
> 
>> +void update_branch_entry(struct cpu_hw_events *cpuhw,
>> +int index, u64 from, u64 to, int pred)
>> +{
>> +cpuhw->bhrb_entries[index].from = from;
>> +cpuhw->bhrb_entries[index].to = to;
>> +cpuhw->bhrb_entries[index].mispred = pred;
>> +cpuhw->bhrb_entries[index].predicted = ~pred;
>> +}
> 
> I realise you're copying existing code, but:
>  - could you please rename pred? If we assign .mispred to pred
> and .predicted to ~pred, we should pick a different name for pred.

Agreed.

>  - I'm really uncomfortable with the bitwise inverting a signed integer.
> Can you explain what is going on here? Looking at
> include/uapi/linux/perf_event.h, this seems to be a single bit flag:
> shouldn't this then be a logical flip rather than a bitwise one?
> (Furthermore, looking at that header, why is pred an int at all? Why not
> a bool?)

Agreed.

> 
>> +
>>  /* Processing BHRB entries */
>>  static void power_pmu_bhrb_read(struct cpu_hw_events *cpuhw)
>>  {
>> -u64 val;
>> -u64 addr;
>> +u64 val, addr, tmp;
> Please don't use 'tmp' here. As far as I can tell, you use this variable
> to compute the 'to' address. The name should reflect that.

Agreed but then it will be a new preparatory patch at the beginning
of this patch series.

___
Linuxppc-dev mailing list
Linuxppc-dev@lists.ozlabs.org
https://lists.ozlabs.org/listinfo/linuxppc-dev

Re: [PATCH V8 08/10] powerpc, perf: Change POWER8 PMU configuration to work with SW filters

2015-06-10 Thread Anshuman Khandual
On 06/10/2015 11:19 AM, Daniel Axtens wrote:
> On Mon, 2015-06-08 at 17:08 +0530, Anshuman Khandual wrote:
>> > The kernel now supports SW based branch filters for book3s systems with
>> > some specific requirements while dealing with HW supported branch filters
>> > in order to achieve overall OR semantics prevailing in perf branch stack
>> > sampling framework. This patch adapts the BHRB branch filter configuration
>> > to meet those protocols. POWER8 PMU can only handle one HW based branch
>> > filter request at any point of time. For all other combinations PMU will
>> > pass it on to the SW.
>> > 
>> > Signed-off-by: Anshuman Khandual 
>> > ---
>> >  arch/powerpc/perf/power8-pmu.c | 51 
>> > --
>> >  1 file changed, 44 insertions(+), 7 deletions(-)
>> > 
>> > diff --git a/arch/powerpc/perf/power8-pmu.c 
>> > b/arch/powerpc/perf/power8-pmu.c
>> > index 5e17cb5..8fccf6c 100644
>> > --- a/arch/powerpc/perf/power8-pmu.c
>> > +++ b/arch/powerpc/perf/power8-pmu.c
>> > @@ -656,6 +656,16 @@ static int power8_generic_events[] = {
>> >  
> This is, I think, the third time you've modified this function in this
> patch series. I appreciate the fact that you're trying to keep logical
> changes separate, but it seems to me like this change might be able to
> be combined with patch 4, and given a single commit message that clearly
> explains the complete scope of the changes.

Here I have to disagree with you. The changes in this patch like PMU
should not handle multiple filter requests as it does not support the
OR semantic required in the protocol, the fact that we need to pass
on the entire branch filtering responsibility to the SW comes into
picture after we have enabled the SW branch filtering support in the
previous patch. So these changes have to follow that up logically and
sequentially in that order.

___
Linuxppc-dev mailing list
Linuxppc-dev@lists.ozlabs.org
https://lists.ozlabs.org/listinfo/linuxppc-dev

Re: [PATCH V8 01/10] powerpc, perf: Drop the branch sample when 'from' cannot be fetched

2015-06-10 Thread Anshuman Khandual
On 06/10/2015 08:51 AM, Daniel Axtens wrote:
> Hi Anshuman,
> 
> Was there a cover letter for this series that I missed?

This is the continuation (rebased and reworked) of the series
posted at https://lkml.org/lkml/2014/5/5/153 (which is V6). I
remember to have incremented the count for the re-send of the
first four patches of the series to Peter Z for generic review
which got pulled in last year. These patches here are the
remaining powerpc part of the original series. Will list down
the current changes as well next time around along with the new
ones.

> 
> On Mon, 2015-06-08 at 17:08 +0530, Anshuman Khandual wrote:
>> BHRB is a rolling buffer. Hence we might end up in a situation where
> Could you spell out what BHRB stands for?

Branch History Rolling Buffer, would you like to have that in the
commit message as well ?

> 
>> we have read one target address but when we try to read the next entry
>>  indicating the from address of the targe, the buffer just overflows.
> target?

Yeah its target address.

> 
>> In this case, the captured from address will be zero which indicates
>> the end of the buffer.
>>
> In what sort of situations would this occur? It seems like something we
> would want to avoid if possible?

Its not avoidable. During regular flow of branch recording, the HW would
have written both the records correctly but then the new ones came in and
we just happen to loose one of them causing this situation.

___
Linuxppc-dev mailing list
Linuxppc-dev@lists.ozlabs.org
https://lists.ozlabs.org/listinfo/linuxppc-dev

Re: [PATCH V8 10/10] selftests, powerpc: Add test for BHRB branch filters (HW & SW)

2015-06-12 Thread Anshuman Khandual
On 06/11/2015 07:39 AM, Daniel Axtens wrote:
> Hi,
> 
> On Mon, 2015-06-08 at 17:08 +0530, Anshuman Khandual wrote:
>> diff --git a/tools/testing/selftests/powerpc/pmu/bhrb/bhrb_filters.c 
>> b/tools/testing/selftests/powerpc/pmu/bhrb/bhrb_filters.c
>> new file mode 100644
>> index 000..13e6b72
>> --- /dev/null
>> +++ b/tools/testing/selftests/powerpc/pmu/bhrb/bhrb_filters.c
>> @@ -0,0 +1,513 @@
>> +/*
>> + * BHRB filter test (HW & SW)
>> + *
>> + * Copyright 2015 Anshuman Khandual, IBM Corporation.
>> + *
>> + * This program is free software; you can redistribute it and/or
>> + * modify it under the terms of the GNU General Public License
>> + * as published by the Free Software Foundation; either version
>> + * 2 of the License, or (at your option) any later version.
>> + */
> 
> This should also be gpl2 only.

Why ? Any special reason ? I can see similar existing statements here
in this file as well "powerpcC/primitives/load_unaligned_zeropad.c"

>> +#include 
>> +#include 
>> +#include 
>> +#include 
>> +#include 
>> +#include 
>> +#include 
>> +#include 
>> +#include 
>> +
>> +#include "bhrb_filters.h"
>> +#include "utils.h"
>> +#include "../event.h"
>> +#include "../lib.h"
>> +
>> +/* Fetched address counts */
>> +#define ALL_MAX 32
>> +#define CALL_MAX12
>> +#define RET_MAX 10
>> +#define COND_MAX8
>> +#define IND_MAX 4
>> +
>> +/* Test tunables */
>> +#define LOOP_COUNT  10
>> +#define SAMPLE_PERIOD   1
>> +
>> +static int branch_sample_type;
>> +static int branch_test_set[27] = {
> Do you need to explicitly provide the count here?

Not really, will fix it.

>> +PERF_SAMPLE_BRANCH_ANY_CALL,/* Single filters */
>> +PERF_SAMPLE_BRANCH_ANY_RETURN,
>> +PERF_SAMPLE_BRANCH_COND,
>> +PERF_SAMPLE_BRANCH_IND_CALL,
>> +PERF_SAMPLE_BRANCH_ANY,
>> +
> 
>> +PERF_SAMPLE_BRANCH_ANY_CALL |   /* Tripple filters */
> s/Tripple/Triple/

Sure.will fix it.

>> +PERF_SAMPLE_BRANCH_ANY_RETURN |
>> +PERF_SAMPLE_BRANCH_COND,
>> +
> 
> 
>> +
>> +static void *ring_buffer_mask(struct ring_buffer *r, void *p)

> Is this actually returning a mask? It looks more like it's calculating
> an offset, and that seems to be how you use it below.

Yeah it does calculate an offset. Will change the function name to
ring_buffer_offset instead.

>> +{
>> +unsigned long l = (unsigned long)p;
>> +
>> +return (void *)(r->ring_base + ((l - r->ring_base) & r->mask));
>> +}
> That's a lot of casts, especially when you then load it into a int64_t
> pointer below...

Will it cause any problem ? I can return here int64_t * instead to void *
to match the receiving pointer.

>> +
>> +static void dump_sample(struct perf_event_header *hdr, struct ring_buffer 
>> *r)
>> +{
>> +unsigned long from, to, flag;
>> +int i, nr;
>> +int64_t *v;
>> +
>> +/* NR Branches */
>> +v = ring_buffer_mask(r, hdr + 1);
> ...here. (and everywhere else I can see that you're using the
> ring_buffer_mask function)
>> +
>> +nr = *v;
> You are dereferencing a int64_t pointer into an int. Should nr be an
> int64_t? Or should v be a different pointer type?

hmm, int64_t sounds good.

> 
>> +
>> +/* Branches */
>> +for (i = 0; i < nr; i++) {
>> +v = ring_buffer_mask(r, v + 1);
>> +from = *v;
> Now you're dereferencing an *int64_t into an unsigned long.

Just wanted to have 64 bits for that field. To achieve some uniformity
will change all of v, nr, from, to, flags as int64_t type variables.
Also will make ring_buffer_mask function return in64_t * pointer type
instead. Will change ring_buffer_mask into ring_buffer_offset as well. 

>> +
>> +v = ring_buffer_mask(r, v + 1);
>> +to = *v;
>> +
>> +v = ring_buffer_mask(r, v + 1);
>> +flag = *v;
>> +
>> +if (!check_branch(from, to)) {
>> +has_failed = true;
>> +printf("[Filter: %d] From: %lx To: %lx Flags: %lx\n",
>> +branch_sample_type, from, to, flag);
>> +}
>> +}
>> +}
>> +
>> +static void read_ring_buffer(struct event *e)
>&g

Re: [PATCH V8 09/10] powerpc, perf: Enable privilege mode SW branch filters

2015-06-12 Thread Anshuman Khandual
On 06/11/2015 06:49 AM, Daniel Axtens wrote:
>>  if (sw_filter & PERF_SAMPLE_BRANCH_PLM_ALL) {
>> +flag = false;
> Would it be possible to use a more meaningful name than flag? Perhaps
> indicating what is it flagging?

sure, will change it with "select_branch"

>> +
>> +if (sw_filter & PERF_SAMPLE_BRANCH_USER) {
>> +if (to_plm == POWER_ADDR_USER)
>> +flag = true;
>> +}
>> +
>> +if (sw_filter & PERF_SAMPLE_BRANCH_KERNEL) {
>> +if (to_plm == POWER_ADDR_KERNEL)
>> +flag = true;
>> +}
>> +
>> +if (sw_filter & PERF_SAMPLE_BRANCH_HV) {
>> +if (cpu_has_feature(CPU_FTR_HVMODE)) {
>> +if (to_plm == POWER_ADDR_KERNEL)
>> +flag = true;
>> +}
>> +}
> 
> Is there any reason these are nested ifs rather than &&s?

No reason as such, will change it.

> 
>> +
>> +if (!flag)
>> +return false;
>> +}
>> +
> 
>> @@ -700,7 +710,6 @@ static u64 power8_bhrb_filter_map(u64 
>> branch_sample_type, u64 *bhrb_filter)
>>  if (branch_sample_type) {
>>  /* Multiple filters will be processed in SW */
>>  pmu_bhrb_filter = 0;
>> -*bhrb_filter = 0;
>>  return pmu_bhrb_filter;
>>  } else {
>>  /* Individual filter will be processed in HW */
> What's the justification for the removal of this line? You added it in
> the previous patch...

Previously PMU passed the entire branch processing to SW by indicating
0 in the bhrb_filter mask although it was handling the privilege level
requests received from the normal PMU event. SW just ignored privilege
level requests while figuring out what other filters which need to be
processed for each captured branch.

Now that we support privilege level SW branch filters, PMU needs to
exclusively inform SW about it, so that SW does not do the processing
itself assuming its not already taken care of. That is the reason why
we removed the above statement and added this code block here instead.

if (branch_sample_type & PERF_SAMPLE_BRANCH_USER)
*bhrb_filter |= PERF_SAMPLE_BRANCH_USER;

if (branch_sample_type & PERF_SAMPLE_BRANCH_KERNEL)
*bhrb_filter |= PERF_SAMPLE_BRANCH_KERNEL;

if (branch_sample_type & PERF_SAMPLE_BRANCH_HV)
*bhrb_filter |= PERF_SAMPLE_BRANCH_HV;

___
Linuxppc-dev mailing list
Linuxppc-dev@lists.ozlabs.org
https://lists.ozlabs.org/listinfo/linuxppc-dev

Re: [PATCH V8 02/10] powerpc, perf: Restore privillege level filter support for BHRB

2015-06-12 Thread Anshuman Khandual
On 06/11/2015 08:58 AM, Daniel Axtens wrote:
> 
  
 -  if (!(ppmu->flags & PPMU_ARCH_207S)) {
 +  if (!(ppmu->flags & PPMU_ARCH_207S) || cpuhw->bhrb_users)
>>
>>> You're using cpuhw->bhrb_users as a bool here, where it's an int. Could
>>> you make the test more specific so that it's clear exactly what you're
>>> expecting bhrb_users to contain?
>>
>> Using cpuhw->bhrb_users as a bool just verifies whether it contains
>> zero or non-zero value in it. The test seems to be doing that as
>> expected. But yes, we can move it as a nested conditional block as
>> well if that is better.
>>
> 
> What I meant was, should this read (cpuhw->bhrb_users != 0)? Because
> bhrb_users in check_excludes() is a signed int, I also wanted to make
> sure it shouldn't be a test for bhrb_users > 0 instead. (Also, if
> bhrb_users is always positive, should it be an unsigned int?)

Will replace both the conditional checks in comparison against 0.
Will change the data type of bhrb_users into unsigned int as well.

> 
> I don't think a nested conditional would be better. 

Okay.

> 
> 
> 
 -  if (check_excludes(ctrs, cflags, n, 1))
 +  cpuhw = &get_cpu_var(cpu_hw_events);
>>> Should this be using a this_cpu_ptr rather than a get_cpu_var? (as with
>>> the power_pmu_commit_txn case?)
 +  if (check_excludes(ctrs, cflags, n, 1, cpuhw->bhrb_users)) {
 +  put_cpu_var(cpu_hw_events);
>>> Likewise with this?
return -EINVAL;
 +  }
  
 -  cpuhw = &get_cpu_var(cpu_hw_events);
>>
>> This patch just moves the existing code couple of lines above without
>> changing it in any manner.
>>
> I see that, but I still think you should take this opportunity to
> improve it.

Will try to change it in a separate patch.

___
Linuxppc-dev mailing list
Linuxppc-dev@lists.ozlabs.org
https://lists.ozlabs.org/listinfo/linuxppc-dev

Re: [PATCH V8 03/10] powerpc, perf: Re organize BHRB processing

2015-06-12 Thread Anshuman Khandual
On 06/11/2015 09:02 AM, Daniel Axtens wrote:
 +
  /* Processing BHRB entries */
  static void power_pmu_bhrb_read(struct cpu_hw_events *cpuhw)
  {
 -  u64 val;
 -  u64 addr;
 +  u64 val, addr, tmp;
>>> Please don't use 'tmp' here. As far as I can tell, you use this variable
>>> to compute the 'to' address. The name should reflect that.
>>
>> Agreed but then it will be a new preparatory patch at the beginning
>> of this patch series.
>>
> I don't think I understand what you're saying here. Why do you need a
> new patch? As I understand it, you've introduced 'tmp' in this patch;
> couldn't you just rename it to, for example, to_addr, instead of tmp in
> this patch?

Sorry for the confusion, I meant separate patch for the other
two changes I had agreed to (i.e changing the name and type of
'pred' variable) as suggested in the previous mail not for this
one. Will change 'tmp' into 'to_addr' in this patch itself.

___
Linuxppc-dev mailing list
Linuxppc-dev@lists.ozlabs.org
https://lists.ozlabs.org/listinfo/linuxppc-dev

Re: [PATCH V8 10/10] selftests, powerpc: Add test for BHRB branch filters (HW & SW)

2015-06-12 Thread Anshuman Khandual
On 06/12/2015 12:56 PM, Madhavan Srinivasan wrote:
> 
> On Friday 12 June 2015 12:32 PM, Anshuman Khandual wrote:
>> > On 06/11/2015 07:39 AM, Daniel Axtens wrote:
>>> >> Hi,
>>> >>
>>> >> On Mon, 2015-06-08 at 17:08 +0530, Anshuman Khandual wrote:
>>>> >>> diff --git a/tools/testing/selftests/powerpc/pmu/bhrb/bhrb_filters.c 
>>>> >>> b/tools/testing/selftests/powerpc/pmu/bhrb/bhrb_filters.c
>>>> >>> new file mode 100644
>>>> >>> index 000..13e6b72
>>>> >>> --- /dev/null
>>>> >>> +++ b/tools/testing/selftests/powerpc/pmu/bhrb/bhrb_filters.c
>>>> >>> @@ -0,0 +1,513 @@
>>>> >>> +/*
>>>> >>> + * BHRB filter test (HW & SW)
>>>> >>> + *
>>>> >>> + * Copyright 2015 Anshuman Khandual, IBM Corporation.
>>>> >>> + *
>>>> >>> + * This program is free software; you can redistribute it and/or
>>>> >>> + * modify it under the terms of the GNU General Public License
>>>> >>> + * as published by the Free Software Foundation; either version
>>>> >>> + * 2 of the License, or (at your option) any later version.
>>>> >>> + */
>>> >> This should also be gpl2 only.
>> > Why ? Any special reason ? I can see similar existing statements here
>> > in this file as well "powerpcC/primitives/load_unaligned_zeropad.c"
> For the new files, mpe suggested to use gpl2 only version of the license.
> 
>  This program is free software; you can redistribute it and/or modify it
>  under the terms of the GNU General Public License version 2 as published
>  by the Free Software Foundation.
> 
> Also, preferred format for Copyright line is to have "(C)" next to word
> Copyright

Sure, will accommodate both the proposed changes next time around.

___
Linuxppc-dev mailing list
Linuxppc-dev@lists.ozlabs.org
https://lists.ozlabs.org/listinfo/linuxppc-dev

[PATCH V9 12/13] powerpc, perf: Enable privilege mode SW branch filters

2015-06-15 Thread Anshuman Khandual
This patch enables privilege mode SW branch filters. Also modifies
POWER8 PMU branch filter configuration so that the privilege mode
branch filter implemented as part of base PMU event configuration
is reflected in bhrb filter mask. As a result, the SW will skip and
not try to process the privilege mode branch filters itself.

Signed-off-by: Anshuman Khandual 
---
 arch/powerpc/include/asm/perf_event_server.h |  3 +++
 arch/powerpc/perf/core-book3s.c  | 37 ++--
 arch/powerpc/perf/power8-pmu.c   | 13 --
 3 files changed, 49 insertions(+), 4 deletions(-)

diff --git a/arch/powerpc/include/asm/perf_event_server.h 
b/arch/powerpc/include/asm/perf_event_server.h
index cb7ca1a..23d68d3 100644
--- a/arch/powerpc/include/asm/perf_event_server.h
+++ b/arch/powerpc/include/asm/perf_event_server.h
@@ -85,6 +85,9 @@ extern unsigned long int read_bhrb(int n);
for ((x) = PERF_SAMPLE_BRANCH_USER; \
(x) < PERF_SAMPLE_BRANCH_MAX; (x) <<= 1)
 
+#define POWER_ADDR_USER0
+#define POWER_ADDR_KERNEL  1
+
 /*
  * Only override the default definitions in include/linux/perf_event.h
  * if we have hardware PMU support.
diff --git a/arch/powerpc/perf/core-book3s.c b/arch/powerpc/perf/core-book3s.c
index a66c53c..69781577 100644
--- a/arch/powerpc/perf/core-book3s.c
+++ b/arch/powerpc/perf/core-book3s.c
@@ -20,6 +20,7 @@
 #include 
 #include 
 #include 
+#include 
 
 #define BHRB_MAX_ENTRIES   32
 #define BHRB_TARGET0x0002
@@ -465,10 +466,10 @@ static bool check_instruction(unsigned int *addr, u64 
sw_filter)
  * Access the instruction contained in the address and then check
  * whether it complies with the applicable SW branch filters.
  */
-static bool keep_branch(u64 from, u64 sw_filter)
+static bool keep_branch(u64 from, u64 to, u64 sw_filter)
 {
unsigned int instr;
-   bool ret;
+   bool to_plm, ret, select_branch;
 
/*
 * The "from" branch for every branch record has to go
@@ -478,6 +479,37 @@ static bool keep_branch(u64 from, u64 sw_filter)
if (sw_filter == 0)
return true;
 
+   to_plm = is_kernel_addr(to) ? POWER_ADDR_KERNEL : POWER_ADDR_USER;
+
+   /*
+* XXX: Applying the privilege mode SW branch filters first on
+* the 'TO' address creates an AND semantic with other SW branch
+* filters which are ORed with each other being applied on the
+* 'FROM' address there after.
+*/
+   if (sw_filter & PERF_SAMPLE_BRANCH_PLM_ALL) {
+   select_branch = false;
+
+   if (sw_filter & PERF_SAMPLE_BRANCH_USER) {
+   if (to_plm == POWER_ADDR_USER)
+   select_branch = true;
+   }
+
+   if (sw_filter & PERF_SAMPLE_BRANCH_KERNEL) {
+   if (to_plm == POWER_ADDR_KERNEL)
+   select_branch = true;
+   }
+
+   if (sw_filter & PERF_SAMPLE_BRANCH_HV) {
+   if (cpu_has_feature(CPU_FTR_HVMODE)
+   && (to_plm == POWER_ADDR_KERNEL))
+   select_branch = true;
+   }
+
+   if (!select_branch)
+   return false;
+   }
+
if (is_kernel_addr(from)) {
return check_instruction((unsigned int *) from, sw_filter);
} else {
@@ -568,6 +600,7 @@ static void power_pmu_bhrb_read(struct cpu_hw_events *cpuhw)
 
/* Apply SW branch filters and drop the entry if required */
if (!keep_branch(cpuhw->bhrb_entries[u_index].from,
+   cpuhw->bhrb_entries[u_index].to,
cpuhw->bhrb_sw_filter))
u_index--;
u_index++;
diff --git a/arch/powerpc/perf/power8-pmu.c b/arch/powerpc/perf/power8-pmu.c
index 8fccf6c..b56afc6 100644
--- a/arch/powerpc/perf/power8-pmu.c
+++ b/arch/powerpc/perf/power8-pmu.c
@@ -670,9 +670,19 @@ static u64 power8_bhrb_filter_map(u64 branch_sample_type, 
u64 *bhrb_filter)
 * filter configuration. BHRB is always recorded along with a
 * regular PMU event. As the privilege state filter is handled
 * in the basic PMC configuration of the accompanying regular
-* PMU event, we ignore any separate BHRB specific request.
+* PMU event, we ignore any separate BHRB specific request. But
+* this needs to be communicated with the branch filter mask.
 */
 
+   if (branch_sample_type & PERF_SAMPLE_BRANCH_USER)
+   *bhrb_filter |= PERF_SAMPLE_BRANCH_USER;
+
+   if (branch_sample_type & PERF_SAMPLE_BRANCH_KERNEL)
+   *bhrb_filter |= PERF_SAMPLE_BRANCH_KERNEL;
+
+   if (branch_sample_typ

[PATCH V9 00/13] powerpc, perf: Enable SW branch filters

2015-06-15 Thread Anshuman Khandual
This is the continuation (rebased and reworked) of the series
posted at https://lkml.org/lkml/2014/5/5/153 (which is V6). I remember
to have incremented the version count for the re-send of the first four
patches of the series to Peter Z for generic review which got pulled in
last year. These patches here are the remaining powerpc part of the
original series.

Changes in V9
=
(1) Changed some of the commit messages and fixed some typos
(2) Variable 'bhrb_users' type changed from int to unsigned int
(3) Replaced the last usage of 'get_cpu_var' with 'this_cpu_ptr'
(4) Conditional checks for 'cpuhw->bhrb_users' changed to test against zero
(5) Updated in-code documentation inside 'check_excludes' function
(6) Changed the name & type of 'pred' variable in 'power_pmu_bhrb_read'
(7) Changed the name of 'tmp' to 'to_addr' inside 'power_pmu_bhrb_read'
(8) Changed return values for branch instruction analysis functions
(9) Changed the name of 'flag' variable to 'select_branch' inside 'keep_branch'
(10) Fixed one nested conditional statement inside 'keep_branch' function
(11) Changed function name from 'update_branch_entry' to 'insert_branch'
(12) Fixed copyright and license statements for new selftest related files
(13) Improved helper assembly functions with some macro definitions
(14) Improved the core test program at various places
(15) Added .gitignore file for the new test case

Changes in V8 (http://patchwork.ozlabs.org/patch/481848/)
=
(1) Fixed BHRB privilege mode branch filter request processing
(2) Dropped branch records where 'from' cannot be fetched
(3) Added in-code documenation at various places in the patch series
(4) Added one comprehensive seltest case to verify all the filters

Changes in V7
=
(1) Incremented the version count while requesting pull for generic changes

Changes in V6 (https://lkml.org/lkml/2014/5/5/153)
=
(1) Rebased the patchset against the master
(2) Added "Reviewed-by: Andi Kleen" in the first four patches in the series 
which changes the
generic or X86 perf code. [https://lkml.org/lkml/2014/4/7/130]

Changes in V5 (https://lkml.org/lkml/2014/3/7/101)
=
(1) Added a precursor patch to cleanup the indentation problem in 
power_pmu_bhrb_read
(2) Added a precursor patch to re-arrange P8 PMU BHRB filter config which 
improved the clarity
(3) Merged the previous 10th patch into the 8th patch
(4) Moved SW based branch analysis code from core perf into code-patching 
library as suggested by Michael
(5) Simplified the logic in branch analysis library
(6) Fixed some ambiguities in documentation at various places
(7) Added some more in-code documentation blocks at various places
(8) Renamed some local variable and function names
(9) Fixed some indentation and white space errors in the code
(10) Implemented almost all the review comments and suggestions made by Michael 
Ellerman on V4 patchset
(11) Enabled privilege mode SW branch filter
(12) Simplified and generalized the SW implemented conditional branch filter
(13) PERF_SAMPLE_BRANCH_COND filter is now supported only through SW 
implementation
(14) Adjusted other patches to deal with the above changes

Changes in V4 (https://lkml.org/lkml/2013/12/4/168)
=
(1) Changed the commit message for patch (01/10)
(2) Changed the patch (02/10) to accommodate review comments from Michael 
Ellerman
(3) Rebased the patchset against latest Linus's tree

Changes in V3 (https://lkml.org/lkml/2013/10/16/59)
=
(1) Split the SW branch filter enablement into multiple patches
(2) Added PMU neutral SW branch filtering code, PMU specific HW branch 
filtering code
(3) Added new instruction analysis functionality into powerpc code-patching 
library
(4) Changed name for some of the functions
(5) Fixed couple of spelling mistakes
(6) Changed code documentation in multiple places

Changes in V2 (https://lkml.org/lkml/2013/8/30/10)
=
(1) Enabled PPC64 SW branch filtering support
(2) Incorporated changes required for all previous comments

Anshuman Khandual (13):
  powerpc, perf: Drop the branch sample when 'from' cannot be fetched
  powerpc, perf: Change type of the bhrb_users variable
  powerpc, perf: Replace last usage of get_cpu_var with this_cpu_ptr
  powerpc, perf: Restore privilege level filter support for BHRB
  powerpc, perf: Change name & type of 'pred' in power_pmu_bhrb_read
  powerpc, perf: Re organize BHRB processing
  powerpc, perf: Re organize PMU branch filter processing on POWER8
  powerpc, perf: Change the name of HW PMU branch filter tracking variable
  powerpc, lib: Add new branch analysis support functions
  powerpc, perf: Enable SW filtering in branch stack sampling framework
  powerpc, perf: Change POWER8 PMU configurat

[PATCH V9 01/13] powerpc, perf: Drop the branch sample when 'from' cannot be fetched

2015-06-15 Thread Anshuman Khandual
BHRB (Branch History Rolling Buffer) is a rolling buffer. Hence we
might end up in a situation where we have read one target address
but when we try to read the next entry indicating the from address
of the target address, the buffer just overflows. In this case, the
captured from address will be zero which indicates the end of the
buffer.

This patch drops the entire branch record which would have
otherwise confused the user space tools.

Signed-off-by: Anshuman Khandual 
---
 arch/powerpc/perf/core-book3s.c | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

diff --git a/arch/powerpc/perf/core-book3s.c b/arch/powerpc/perf/core-book3s.c
index 12b6384..c246e65 100644
--- a/arch/powerpc/perf/core-book3s.c
+++ b/arch/powerpc/perf/core-book3s.c
@@ -452,7 +452,6 @@ static void power_pmu_bhrb_read(struct cpu_hw_events *cpuhw)
 *In this case we need to read the instruction from
 *memory to determine the target/to address.
 */
-
if (val & BHRB_TARGET) {
/* Target branches use two entries
 * (ie. computed gotos/XL form)
@@ -463,6 +462,8 @@ static void power_pmu_bhrb_read(struct cpu_hw_events *cpuhw)
 
/* Get from address in next entry */
val = read_bhrb(r_index++);
+   if (!val)
+   break;
addr = val & BHRB_EA;
if (val & BHRB_TARGET) {
/* Shouldn't have two targets in a
-- 
2.1.0

___
Linuxppc-dev mailing list
Linuxppc-dev@lists.ozlabs.org
https://lists.ozlabs.org/listinfo/linuxppc-dev

[PATCH V9 03/13] powerpc, perf: Replace last usage of get_cpu_var with this_cpu_ptr

2015-06-15 Thread Anshuman Khandual
The commit 69111bac42f5ce ("powerpc: Replace __get_cpu_var uses")
replaced all usage of get_cpu_var with this_cpu_ptr inside core
perf event handling on powerpc. But it skipped one of them which
is being replaced with this patch.

Reported-by: Daniel Axtens 
Signed-off-by: Anshuman Khandual 
---
 arch/powerpc/perf/core-book3s.c | 7 ++-
 1 file changed, 2 insertions(+), 5 deletions(-)

diff --git a/arch/powerpc/perf/core-book3s.c b/arch/powerpc/perf/core-book3s.c
index 9798f00..7a03cce 100644
--- a/arch/powerpc/perf/core-book3s.c
+++ b/arch/powerpc/perf/core-book3s.c
@@ -1831,20 +1831,17 @@ static int power_pmu_event_init(struct perf_event 
*event)
if (check_excludes(ctrs, cflags, n, 1))
return -EINVAL;
 
-   cpuhw = &get_cpu_var(cpu_hw_events);
+   cpuhw = this_cpu_ptr(&cpu_hw_events);
err = power_check_constraints(cpuhw, events, cflags, n + 1);
 
if (has_branch_stack(event)) {
cpuhw->bhrb_filter = ppmu->bhrb_filter_map(
event->attr.branch_sample_type);
 
-   if (cpuhw->bhrb_filter == -1) {
-   put_cpu_var(cpu_hw_events);
+   if (cpuhw->bhrb_filter == -1)
return -EOPNOTSUPP;
-   }
}
 
-   put_cpu_var(cpu_hw_events);
if (err)
return -EINVAL;
 
-- 
2.1.0

___
Linuxppc-dev mailing list
Linuxppc-dev@lists.ozlabs.org
https://lists.ozlabs.org/listinfo/linuxppc-dev

[PATCH V9 05/13] powerpc, perf: Change name & type of 'pred' in power_pmu_bhrb_read

2015-06-15 Thread Anshuman Khandual
Branch record attributes 'mispred' and 'predicted' are single bit
fields as defined in the perf ABI. Hence the data type of the field
'pred' used during BHRB processing should be changed from integer
to bool. This patch also changes the name of the variable from 'pred'
to 'mispred' making the logical inversion process more meaningful
and readable.

Reported-by: Daniel Axtens 
Signed-off-by: Anshuman Khandual 
---
 arch/powerpc/perf/core-book3s.c | 15 +--
 1 file changed, 9 insertions(+), 6 deletions(-)

diff --git a/arch/powerpc/perf/core-book3s.c b/arch/powerpc/perf/core-book3s.c
index 892340e..a7be394 100644
--- a/arch/powerpc/perf/core-book3s.c
+++ b/arch/powerpc/perf/core-book3s.c
@@ -417,7 +417,8 @@ static void power_pmu_bhrb_read(struct cpu_hw_events *cpuhw)
 {
u64 val;
u64 addr;
-   int r_index, u_index, pred;
+   int r_index, u_index;
+   bool mispred;
 
r_index = 0;
u_index = 0;
@@ -429,7 +430,7 @@ static void power_pmu_bhrb_read(struct cpu_hw_events *cpuhw)
break;
else {
addr = val & BHRB_EA;
-   pred = val & BHRB_PREDICTION;
+   mispred = val & BHRB_PREDICTION;
 
if (!addr)
/* invalid entry */
@@ -457,8 +458,9 @@ static void power_pmu_bhrb_read(struct cpu_hw_events *cpuhw)
 * (ie. computed gotos/XL form)
 */
cpuhw->bhrb_entries[u_index].to = addr;
-   cpuhw->bhrb_entries[u_index].mispred = pred;
-   cpuhw->bhrb_entries[u_index].predicted = ~pred;
+   cpuhw->bhrb_entries[u_index].mispred = mispred;
+   cpuhw->bhrb_entries[u_index].predicted =
+   ~mispred;
 
/* Get from address in next entry */
val = read_bhrb(r_index++);
@@ -478,8 +480,9 @@ static void power_pmu_bhrb_read(struct cpu_hw_events *cpuhw)
cpuhw->bhrb_entries[u_index].from = addr;
cpuhw->bhrb_entries[u_index].to =
power_pmu_bhrb_to(addr);
-   cpuhw->bhrb_entries[u_index].mispred = pred;
-   cpuhw->bhrb_entries[u_index].predicted = ~pred;
+   cpuhw->bhrb_entries[u_index].mispred = mispred;
+   cpuhw->bhrb_entries[u_index].predicted =
+   ~mispred;
}
u_index++;
 
-- 
2.1.0

___
Linuxppc-dev mailing list
Linuxppc-dev@lists.ozlabs.org
https://lists.ozlabs.org/listinfo/linuxppc-dev

[PATCH V9 10/13] powerpc, perf: Enable SW filtering in branch stack sampling framework

2015-06-15 Thread Anshuman Khandual
This patch enables SW based post processing of BHRB captured branches
to be able to meet more user defined branch filtration criteria in perf
branch stack sampling framework. These changes increase the number of
branch filters and their valid combinations on any powerpc64 server
platform with BHRB support. Find the summary of code changes here.

(1) struct cpu_hw_event

Introduced two new variables track various filter values and mask

(a) bhrb_sw_filter  Tracks SW implemented branch filter flags
(b) bhrb_filter Tracks both (SW and HW) branch filter flags

(2) Event creation

Kernel will figure out supported BHRB branch filters through a PMU
call back 'bhrb_filter_map'. This function will find out how many of
the requested branch filters can be supported in the PMU HW. It will
not try to invalidate any branch filter combinations. Event creation
will not error out because of lack of HW based branch filters.
Meanwhile it will track the overall supported branch filters in the
'bhrb_filter' variable.

Once the PMU call back returns kernel will process the user branch
filter request against available SW filters (bhrb_sw_filter_map) while
looking at the 'bhrb_filter'. During this phase all the branch filters
which are still pending from the user requested list will have to be
supported in SW failing which the event creation will error out.

(3) SW branch filter

During the BHRB data capture inside the PMU interrupt context, each
of the captured 'perf_branch_entry.from' will be checked for compliance
with applicable SW branch filters. If the entry does not conform to the
filter requirements, it will be discarded from the final perf branch
stack buffer.

(4) Supported SW based branch filters

(a) PERF_SAMPLE_BRANCH_ANY_RETURN
(b) PERF_SAMPLE_BRANCH_IND_CALL
(c) PERF_SAMPLE_BRANCH_ANY_CALL
(d) PERF_SAMPLE_BRANCH_COND

Please refer the patch to understand the classification of instructions
into these branch filter categories.

(5) Multiple branch filter semantics

Book3 sever implementation follows the same OR semantics (as implemented
in x86) while dealing with multiple branch filters at any point of time.
SW branch filter analysis is carried on the data set captured in the PMU
HW. So the resulting set of data (after applying the SW filters) will
inherently be an AND with the HW captured set. Hence any combination of
HW and SW branch filters will be invalid. HW based branch filters are
more efficient and faster compared to SW implemented branch filters. So
at first the PMU should decide whether it can support all the requested
branch filters itself or not. In case it can support all the branch
filters in an OR manner, we dont apply any SW branch filter on top of the
HW captured set (which is the final set). This preserves the OR semantic
of multiple branch filters as required. But in case where the PMU cannot
support all the requested branch filters in an OR manner, it should not
apply any it's filters and leave it upto the SW to handle them all. Its
the PMU code's responsibility to uphold this protocol to be able to
conform to the overall OR semantic of perf branch stack sampling framework.

Signed-off-by: Anshuman Khandual 
---
 arch/powerpc/include/asm/perf_event_server.h |   7 +-
 arch/powerpc/perf/core-book3s.c  | 188 ++-
 arch/powerpc/perf/power8-pmu.c   |   2 +-
 3 files changed, 191 insertions(+), 6 deletions(-)

diff --git a/arch/powerpc/include/asm/perf_event_server.h 
b/arch/powerpc/include/asm/perf_event_server.h
index 8146221..cb7ca1a 100644
--- a/arch/powerpc/include/asm/perf_event_server.h
+++ b/arch/powerpc/include/asm/perf_event_server.h
@@ -38,7 +38,8 @@ struct power_pmu {
unsigned long *valp);
int (*get_alternatives)(u64 event_id, unsigned int flags,
u64 alt[]);
-   u64 (*bhrb_filter_map)(u64 branch_sample_type);
+   u64 (*bhrb_filter_map)(u64 branch_sample_type,
+   u64 *bhrb_filter);
void(*config_bhrb)(u64 pmu_bhrb_filter);
void(*disable_pmc)(unsigned int pmc, unsigned long mmcr[]);
int (*limited_pmc_event)(u64 event_id);
@@ -80,6 +81,10 @@ extern unsigned long perf_misc_flags(struct pt_regs *regs);
 extern unsigned long perf_instruction_pointer(struct pt_regs *regs);
 extern unsigned long int read_bhrb(int n);
 
+#define for_each_branch_sample_type(x) \
+   for ((x) = PERF_SAMPLE_BRANCH_USER; \
+   (x) < PERF_SAMPLE_BRANCH_MAX; (x) <<= 1)
+
 /*
  * Only override the default definitions in include/linux/perf_event.h
  * if we have hardware PMU support.
diff --git a/arch/powerpc/perf/core-book3s.c b/arch/powerpc/perf/core-book3s.c
index 6f6a7ca..a66c53c 100644
--- a/arch/powerpc/perf/core-book3s.c
+++ b/arch/powerp

[PATCH V9 09/13] powerpc, lib: Add new branch analysis support functions

2015-06-15 Thread Anshuman Khandual
Generic powerpc branch analysis support added in the code patching
library which will help the subsequent patch on SW based filtering
of branch records in perf.

Signed-off-by: Anshuman Khandual 
---
 arch/powerpc/include/asm/code-patching.h | 15 
 arch/powerpc/lib/code-patching.c | 60 
 2 files changed, 75 insertions(+)

diff --git a/arch/powerpc/include/asm/code-patching.h 
b/arch/powerpc/include/asm/code-patching.h
index 840a550..0a6f0d8 100644
--- a/arch/powerpc/include/asm/code-patching.h
+++ b/arch/powerpc/include/asm/code-patching.h
@@ -22,6 +22,16 @@
 #define BRANCH_SET_LINK0x1
 #define BRANCH_ABSOLUTE0x2
 
+#define XL_FORM_LR  0x4C20
+#define XL_FORM_CTR 0x4C000420
+#define XL_FORM_TAR 0x4C000460
+
+#define BO_ALWAYS0x0280
+#define BO_CTR   0x0200
+#define BO_CRBI_OFF  0x0080
+#define BO_CRBI_ON   0x0180
+#define BO_CRBI_HINT 0x0040
+
 unsigned int create_branch(const unsigned int *addr,
   unsigned long target, int flags);
 unsigned int create_cond_branch(const unsigned int *addr,
@@ -99,4 +109,9 @@ static inline unsigned long ppc_global_function_entry(void 
*func)
 #endif
 }
 
+bool instr_is_return_branch(unsigned int instr);
+bool instr_is_conditional_branch(unsigned int instr);
+bool instr_is_func_call(unsigned int instr);
+bool instr_is_indirect_func_call(unsigned int instr);
+
 #endif /* _ASM_POWERPC_CODE_PATCHING_H */
diff --git a/arch/powerpc/lib/code-patching.c b/arch/powerpc/lib/code-patching.c
index d5edbeb..46fbcca 100644
--- a/arch/powerpc/lib/code-patching.c
+++ b/arch/powerpc/lib/code-patching.c
@@ -87,6 +87,66 @@ static int instr_is_branch_bform(unsigned int instr)
return branch_opcode(instr) == 16;
 }
 
+static bool instr_is_branch_xlform(unsigned int instr)
+{
+   return branch_opcode(instr) == 19;
+}
+
+static bool is_xlform_lr(unsigned int instr)
+{
+   return (instr & XL_FORM_LR) == XL_FORM_LR;
+}
+
+static bool is_bo_always(unsigned int instr)
+{
+   return (instr & BO_ALWAYS) == BO_ALWAYS;
+}
+
+static bool is_branch_link_set(unsigned int instr)
+{
+   return (instr & BRANCH_SET_LINK) == BRANCH_SET_LINK;
+}
+
+bool instr_is_return_branch(unsigned int instr)
+{
+   /*
+* Conditional and unconditional branch to LR register
+* without seting the link register.
+*/
+   if (is_xlform_lr(instr) && !is_branch_link_set(instr))
+   return true;
+
+   return false;
+}
+
+bool instr_is_conditional_branch(unsigned int instr)
+{
+   /* I-form instruction - excluded */
+   if (instr_is_branch_iform(instr))
+   return false;
+
+   /* B-form or XL-form instruction */
+   if (instr_is_branch_bform(instr) || instr_is_branch_xlform(instr))  {
+
+   /* Not branch always */
+   if (!is_bo_always(instr))
+   return true;
+   }
+   return false;
+}
+
+bool instr_is_func_call(unsigned int instr)
+{
+   /* LR should be set */
+   return is_branch_link_set(instr);
+}
+
+bool instr_is_indirect_func_call(unsigned int instr)
+{
+   /* XL-form instruction with LR set */
+   return (instr_is_branch_xlform(instr) && is_branch_link_set(instr));
+}
+
 int instr_is_relative_branch(unsigned int instr)
 {
if (instr & BRANCH_ABSOLUTE)
-- 
2.1.0

___
Linuxppc-dev mailing list
Linuxppc-dev@lists.ozlabs.org
https://lists.ozlabs.org/listinfo/linuxppc-dev

[PATCH V9 07/13] powerpc, perf: Re organize PMU branch filter processing on POWER8

2015-06-15 Thread Anshuman Khandual
This patch does some code re-arrangements to make it clear that kernel
ignores any separate privilege level branch filter request and does not
support any combinations of HW PMU branch filters.

Signed-off-by: Anshuman Khandual 
---
 arch/powerpc/perf/power8-pmu.c | 22 +++---
 1 file changed, 7 insertions(+), 15 deletions(-)

diff --git a/arch/powerpc/perf/power8-pmu.c b/arch/powerpc/perf/power8-pmu.c
index 396351d..a6c6a2c 100644
--- a/arch/powerpc/perf/power8-pmu.c
+++ b/arch/powerpc/perf/power8-pmu.c
@@ -656,8 +656,6 @@ static int power8_generic_events[] = {
 
 static u64 power8_bhrb_filter_map(u64 branch_sample_type)
 {
-   u64 pmu_bhrb_filter = 0;
-
/* BHRB and regular PMU events share the same privilege state
 * filter configuration. BHRB is always recorded along with a
 * regular PMU event. As the privilege state filter is handled
@@ -665,21 +663,15 @@ static u64 power8_bhrb_filter_map(u64 branch_sample_type)
 * PMU event, we ignore any separate BHRB specific request.
 */
 
-   /* No branch filter requested */
-   if (branch_sample_type & PERF_SAMPLE_BRANCH_ANY)
-   return pmu_bhrb_filter;
-
-   /* Invalid branch filter options - HW does not support */
-   if (branch_sample_type & PERF_SAMPLE_BRANCH_ANY_RETURN)
-   return -1;
+   /* Ignore user, kernel, hv bits */
+   branch_sample_type &= ~PERF_SAMPLE_BRANCH_PLM_ALL;
 
-   if (branch_sample_type & PERF_SAMPLE_BRANCH_IND_CALL)
-   return -1;
+   /* No branch filter requested */
+   if (branch_sample_type == PERF_SAMPLE_BRANCH_ANY)
+   return 0;
 
-   if (branch_sample_type & PERF_SAMPLE_BRANCH_ANY_CALL) {
-   pmu_bhrb_filter |= POWER8_MMCRA_IFM1;
-   return pmu_bhrb_filter;
-   }
+   if (branch_sample_type == PERF_SAMPLE_BRANCH_ANY_CALL)
+   return POWER8_MMCRA_IFM1;
 
/* Every thing else is unsupported */
return -1;
-- 
2.1.0

___
Linuxppc-dev mailing list
Linuxppc-dev@lists.ozlabs.org
https://lists.ozlabs.org/listinfo/linuxppc-dev

[PATCH V9 13/13] selftests, powerpc: Add test for BHRB branch filters (HW & SW)

2015-06-15 Thread Anshuman Khandual
This patch adds a test for verifying that all the branch stack
sampling filters supported on powerpc work correctly. It also
adds some assembly helper functions in this regard. This patch
extends the generic event description to handle kernel mapped
ring buffers.

Signed-off-by: Anshuman Khandual 
---
 tools/testing/selftests/powerpc/pmu/Makefile   |  11 +-
 .../testing/selftests/powerpc/pmu/bhrb/.gitignore  |   1 +
 tools/testing/selftests/powerpc/pmu/bhrb/Makefile  |  13 +
 .../selftests/powerpc/pmu/bhrb/bhrb_filters.c  | 535 +
 .../selftests/powerpc/pmu/bhrb/bhrb_filters.h  |  15 +
 .../selftests/powerpc/pmu/bhrb/bhrb_filters_asm.S  | 263 ++
 tools/testing/selftests/powerpc/pmu/event.h|   5 +
 7 files changed, 841 insertions(+), 2 deletions(-)
 create mode 100644 tools/testing/selftests/powerpc/pmu/bhrb/.gitignore
 create mode 100644 tools/testing/selftests/powerpc/pmu/bhrb/Makefile
 create mode 100644 tools/testing/selftests/powerpc/pmu/bhrb/bhrb_filters.c
 create mode 100644 tools/testing/selftests/powerpc/pmu/bhrb/bhrb_filters.h
 create mode 100644 tools/testing/selftests/powerpc/pmu/bhrb/bhrb_filters_asm.S

diff --git a/tools/testing/selftests/powerpc/pmu/Makefile 
b/tools/testing/selftests/powerpc/pmu/Makefile
index a9099d9..2e103fd 100644
--- a/tools/testing/selftests/powerpc/pmu/Makefile
+++ b/tools/testing/selftests/powerpc/pmu/Makefile
@@ -4,7 +4,7 @@ noarg:
 TEST_PROGS := count_instructions l3_bank_test per_event_excludes
 EXTRA_SOURCES := ../harness.c event.c lib.c
 
-all: $(TEST_PROGS) ebb
+all: $(TEST_PROGS) ebb bhrb
 
 $(TEST_PROGS): $(EXTRA_SOURCES)
 
@@ -18,25 +18,32 @@ DEFAULT_RUN_TESTS := $(RUN_TESTS)
 override define RUN_TESTS
$(DEFAULT_RUN_TESTS)
$(MAKE) -C ebb run_tests
+   $(MAKE) -C bhrb run_tests
 endef
 
 DEFAULT_EMIT_TESTS := $(EMIT_TESTS)
 override define EMIT_TESTS
$(DEFAULT_EMIT_TESTS)
$(MAKE) -s -C ebb emit_tests
+   $(MAKE) -s -C bhrb emit_tests
 endef
 
 DEFAULT_INSTALL_RULE := $(INSTALL_RULE)
 override define INSTALL_RULE
$(DEFAULT_INSTALL_RULE)
$(MAKE) -C ebb install
+   $(MAKE) -C bhrb install
 endef
 
 clean:
rm -f $(TEST_PROGS) loop.o
$(MAKE) -C ebb clean
+   $(MAKE) -C bhrb clean
 
 ebb:
$(MAKE) -k -C $@ all
 
-.PHONY: all run_tests clean ebb
+bhrb:
+   $(MAKE) -k -C $@ all
+
+.PHONY: all run_tests clean ebb bhrb
diff --git a/tools/testing/selftests/powerpc/pmu/bhrb/.gitignore 
b/tools/testing/selftests/powerpc/pmu/bhrb/.gitignore
new file mode 100644
index 000..47c1049
--- /dev/null
+++ b/tools/testing/selftests/powerpc/pmu/bhrb/.gitignore
@@ -0,0 +1 @@
+bhrb_filters
diff --git a/tools/testing/selftests/powerpc/pmu/bhrb/Makefile 
b/tools/testing/selftests/powerpc/pmu/bhrb/Makefile
new file mode 100644
index 000..61c032a
--- /dev/null
+++ b/tools/testing/selftests/powerpc/pmu/bhrb/Makefile
@@ -0,0 +1,13 @@
+noarg:
+   $(MAKE) -C ../../
+
+TEST_PROGS := bhrb_filters
+
+all: $(TEST_PROGS)
+
+$(TEST_PROGS): ../../harness.c ../event.c ../lib.c bhrb_filters_asm.S
+
+include ../../../lib.mk
+
+clean:
+   rm -f $(TEST_PROGS)
diff --git a/tools/testing/selftests/powerpc/pmu/bhrb/bhrb_filters.c 
b/tools/testing/selftests/powerpc/pmu/bhrb/bhrb_filters.c
new file mode 100644
index 000..84e9b9a
--- /dev/null
+++ b/tools/testing/selftests/powerpc/pmu/bhrb/bhrb_filters.c
@@ -0,0 +1,535 @@
+/*
+ * BHRB filter test (HW & SW)
+ *
+ * Copyright (C) 2015 Anshuman Khandual, IBM Corporation.
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License
+ * version 2 as published by the Free Software Foundation.
+ */
+#include 
+#include 
+#include 
+#include 
+#include 
+#include 
+#include 
+#include 
+#include 
+
+#include "bhrb_filters.h"
+#include "utils.h"
+#include "../event.h"
+#include "../lib.h"
+
+/* Memory barriers */
+#definesmp_mb(){ asm volatile ("sync" : : : "memory"); }
+
+/* Fetched address counts */
+#define ALL_MAX32
+#define CALL_MAX   12
+#define RET_MAX10
+#define COND_MAX   8
+#define IND_MAX4
+
+/* Test tunables */
+#define LOOP_COUNT 10
+#define SAMPLE_PERIOD  1
+
+static int branch_test_set[] = {
+   PERF_SAMPLE_BRANCH_ANY_CALL,/* Single filters */
+   PERF_SAMPLE_BRANCH_ANY_RETURN,
+   PERF_SAMPLE_BRANCH_COND,
+   PERF_SAMPLE_BRANCH_IND_CALL,
+   PERF_SAMPLE_BRANCH_ANY,
+
+   PERF_SAMPLE_BRANCH_ANY_CALL |   /* Double filters */
+   PERF_SAMPLE_BRANCH_ANY_RETURN,
+   PERF_SAMPLE_BRANCH_ANY_CALL |
+   PERF_SAMPLE_BRANCH_COND,
+   PERF_SAMPLE_BRANCH_ANY_CALL |
+   PERF_SAMPLE_BRANCH_IND_CALL,
+   PERF

[PATCH V9 08/13] powerpc, perf: Change the name of HW PMU branch filter tracking variable

2015-06-15 Thread Anshuman Khandual
This patch simply changes the name of the variable from 'bhrb_filter' to
'bhrb_hw_filter' in order to add one more variable which will track SW
filters in generic powerpc book3s code which will be implemented in the
subsequent patch. This patch does not change any functionality.

Signed-off-by: Anshuman Khandual 
---
 arch/powerpc/perf/core-book3s.c | 10 +-
 1 file changed, 5 insertions(+), 5 deletions(-)

diff --git a/arch/powerpc/perf/core-book3s.c b/arch/powerpc/perf/core-book3s.c
index ea77057..6f6a7ca 100644
--- a/arch/powerpc/perf/core-book3s.c
+++ b/arch/powerpc/perf/core-book3s.c
@@ -52,7 +52,7 @@ struct cpu_hw_events {
int n_txn_start;
 
/* BHRB bits */
-   u64 bhrb_filter;/* BHRB HW branch 
filter */
+   u64 bhrb_hw_filter; /* BHRB HW filter */
unsigned intbhrb_users;
void*bhrb_context;
struct  perf_branch_stack   bhrb_stack;
@@ -1348,7 +1348,7 @@ static void power_pmu_enable(struct pmu *pmu)
 
mb();
if (cpuhw->bhrb_users)
-   ppmu->config_bhrb(cpuhw->bhrb_filter);
+   ppmu->config_bhrb(cpuhw->bhrb_hw_filter);
 
write_mmcr0(cpuhw, mmcr0);
 
@@ -1456,7 +1456,7 @@ nocheck:
  out:
if (has_branch_stack(event)) {
power_pmu_bhrb_enable(event);
-   cpuhw->bhrb_filter = ppmu->bhrb_filter_map(
+   cpuhw->bhrb_hw_filter = ppmu->bhrb_filter_map(
event->attr.branch_sample_type);
}
 
@@ -1839,10 +1839,10 @@ static int power_pmu_event_init(struct perf_event 
*event)
err = power_check_constraints(cpuhw, events, cflags, n + 1);
 
if (has_branch_stack(event)) {
-   cpuhw->bhrb_filter = ppmu->bhrb_filter_map(
+   cpuhw->bhrb_hw_filter = ppmu->bhrb_filter_map(
event->attr.branch_sample_type);
 
-   if (cpuhw->bhrb_filter == -1)
+   if (cpuhw->bhrb_hw_filter == -1)
return -EOPNOTSUPP;
}
 
-- 
2.1.0

___
Linuxppc-dev mailing list
Linuxppc-dev@lists.ozlabs.org
https://lists.ozlabs.org/listinfo/linuxppc-dev

[PATCH V9 11/13] powerpc, perf: Change POWER8 PMU configuration to work with SW filters

2015-06-15 Thread Anshuman Khandual
The kernel now supports SW based branch filters for book3s systems with
some specific requirements while dealing with HW supported branch filters
in order to achieve overall OR semantics prevailing in perf branch stack
sampling framework. This patch adapts the BHRB branch filter configuration
to meet those protocols. POWER8 PMU can only handle one HW based branch
filter request at any point of time. For all other combinations PMU will
pass it on to the SW.

Signed-off-by: Anshuman Khandual 
---
 arch/powerpc/perf/power8-pmu.c | 51 --
 1 file changed, 44 insertions(+), 7 deletions(-)

diff --git a/arch/powerpc/perf/power8-pmu.c b/arch/powerpc/perf/power8-pmu.c
index 5e17cb5..8fccf6c 100644
--- a/arch/powerpc/perf/power8-pmu.c
+++ b/arch/powerpc/perf/power8-pmu.c
@@ -656,6 +656,16 @@ static int power8_generic_events[] = {
 
 static u64 power8_bhrb_filter_map(u64 branch_sample_type, u64 *bhrb_filter)
 {
+   u64 x, pmu_bhrb_filter;
+
+   pmu_bhrb_filter = 0;
+   *bhrb_filter = 0;
+
+   if (branch_sample_type & PERF_SAMPLE_BRANCH_ANY) {
+   *bhrb_filter = PERF_SAMPLE_BRANCH_ANY;
+   return pmu_bhrb_filter;
+   }
+
/* BHRB and regular PMU events share the same privilege state
 * filter configuration. BHRB is always recorded along with a
 * regular PMU event. As the privilege state filter is handled
@@ -666,15 +676,42 @@ static u64 power8_bhrb_filter_map(u64 branch_sample_type, 
u64 *bhrb_filter)
/* Ignore user, kernel, hv bits */
branch_sample_type &= ~PERF_SAMPLE_BRANCH_PLM_ALL;
 
-   /* No branch filter requested */
-   if (branch_sample_type == PERF_SAMPLE_BRANCH_ANY)
-   return 0;
+   /*
+* POWER8 does not support ORing of PMU HW branch filters. Hence
+* if multiple branch filters are requested which may include filters
+* supported in PMU, still go ahead and clear the PMU based HW branch
+* filter component as in this case all the filters will be processed
+* in SW.
+*/
 
-   if (branch_sample_type == PERF_SAMPLE_BRANCH_ANY_CALL)
-   return POWER8_MMCRA_IFM1;
+   for_each_branch_sample_type(x) {
+   /* Ignore privilege branch filters */
+   if ((x == PERF_SAMPLE_BRANCH_USER)
+   || (x == PERF_SAMPLE_BRANCH_KERNEL)
+   || (x == PERF_SAMPLE_BRANCH_HV))
+   continue;
+
+   if (!(branch_sample_type & x))
+   continue;
+
+   /* Supported individual PMU branch filters */
+   if (branch_sample_type & PERF_SAMPLE_BRANCH_ANY_CALL) {
+   branch_sample_type &= ~PERF_SAMPLE_BRANCH_ANY_CALL;
+   if (branch_sample_type) {
+   /* Multiple filters will be processed in SW */
+   pmu_bhrb_filter = 0;
+   *bhrb_filter = 0;
+   return pmu_bhrb_filter;
+   } else {
+   /* Individual filter will be processed in HW */
+   pmu_bhrb_filter |= POWER8_MMCRA_IFM1;
+   *bhrb_filter|= PERF_SAMPLE_BRANCH_ANY_CALL;
+   return pmu_bhrb_filter;
+   }
+   }
+   }
 
-   /* Every thing else is unsupported */
-   return -1;
+   return pmu_bhrb_filter;
 }
 
 static void power8_config_bhrb(u64 pmu_bhrb_filter)
-- 
2.1.0

___
Linuxppc-dev mailing list
Linuxppc-dev@lists.ozlabs.org
https://lists.ozlabs.org/listinfo/linuxppc-dev

[PATCH V9 02/13] powerpc, perf: Change type of the bhrb_users variable

2015-06-15 Thread Anshuman Khandual
This patch just changes data type of bhrb_users variable from
int to unsigned int because it never contains a negative value.

Reported-by: Daniel Axtens 
Signed-off-by: Anshuman Khandual 
---
 arch/powerpc/perf/core-book3s.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/arch/powerpc/perf/core-book3s.c b/arch/powerpc/perf/core-book3s.c
index c246e65..9798f00 100644
--- a/arch/powerpc/perf/core-book3s.c
+++ b/arch/powerpc/perf/core-book3s.c
@@ -53,7 +53,7 @@ struct cpu_hw_events {
 
/* BHRB bits */
u64 bhrb_filter;/* BHRB HW branch 
filter */
-   int bhrb_users;
+   unsigned intbhrb_users;
void*bhrb_context;
struct  perf_branch_stack   bhrb_stack;
struct  perf_branch_entry   bhrb_entries[BHRB_MAX_ENTRIES];
-- 
2.1.0

___
Linuxppc-dev mailing list
Linuxppc-dev@lists.ozlabs.org
https://lists.ozlabs.org/listinfo/linuxppc-dev

[PATCH V9 04/13] powerpc, perf: Restore privilege level filter support for BHRB

2015-06-15 Thread Anshuman Khandual
'commit 9de5cb0f6df8 ("powerpc/perf: Add per-event excludes on Power8")'
broke the PMU based BHRB privilege level filter. BHRB depends on the
same MMCR0 bits for privilege level filter which was used to freeze all
the PMCs as a group. Once we moved to individual event based privilege
filters through MMCR2 register on POWER8, event associated privilege
filters are no longer applicable to the BHRB captured branches.

This patch solves the problem by restoring to the previous method of
privilege level filters for the event in case BHRB based branch stack
sampling is requested. This patch also changes 'check_excludes' for
the same reason.

Signed-off-by: Anshuman Khandual 
---
 arch/powerpc/perf/core-book3s.c | 20 +++-
 1 file changed, 11 insertions(+), 9 deletions(-)

diff --git a/arch/powerpc/perf/core-book3s.c b/arch/powerpc/perf/core-book3s.c
index 7a03cce..892340e 100644
--- a/arch/powerpc/perf/core-book3s.c
+++ b/arch/powerpc/perf/core-book3s.c
@@ -930,7 +930,7 @@ static int power_check_constraints(struct cpu_hw_events 
*cpuhw,
  * added events.
  */
 static int check_excludes(struct perf_event **ctrs, unsigned int cflags[],
- int n_prev, int n_new)
+ int n_prev, int n_new, int bhrb_users)
 {
int eu = 0, ek = 0, eh = 0;
int i, n, first;
@@ -939,9 +939,10 @@ static int check_excludes(struct perf_event **ctrs, 
unsigned int cflags[],
/*
 * If the PMU we're on supports per event exclude settings then we
 * don't need to do any of this logic. NB. This assumes no PMU has both
-* per event exclude and limited PMCs.
+* per event exclude and limited PMCs. But again if the event has also
+* requested for branch stack sampling, then process the logic here.
 */
-   if (ppmu->flags & PPMU_ARCH_207S)
+   if ((ppmu->flags & PPMU_ARCH_207S) && !bhrb_users)
return 0;
 
n = n_prev + n_new;
@@ -1259,7 +1260,7 @@ static void power_pmu_enable(struct pmu *pmu)
goto out;
}
 
-   if (!(ppmu->flags & PPMU_ARCH_207S)) {
+   if (!(ppmu->flags & PPMU_ARCH_207S) || (cpuhw->bhrb_users != 0)) {
/*
 * Add in MMCR0 freeze bits corresponding to the attr.exclude_*
 * bits for the first event. We have already checked that all
@@ -1284,7 +1285,7 @@ static void power_pmu_enable(struct pmu *pmu)
mtspr(SPRN_MMCR1, cpuhw->mmcr[1]);
mtspr(SPRN_MMCR0, (cpuhw->mmcr[0] & ~(MMCR0_PMC1CE | MMCR0_PMCjCE))
| MMCR0_FC);
-   if (ppmu->flags & PPMU_ARCH_207S)
+   if ((ppmu->flags & PPMU_ARCH_207S) && (cpuhw->bhrb_users == 0))
mtspr(SPRN_MMCR2, cpuhw->mmcr[3]);
 
/*
@@ -1436,7 +1437,8 @@ static int power_pmu_add(struct perf_event *event, int 
ef_flags)
if (cpuhw->group_flag & PERF_EVENT_TXN)
goto nocheck;
 
-   if (check_excludes(cpuhw->event, cpuhw->flags, n0, 1))
+   if (check_excludes(cpuhw->event, cpuhw->flags,
+   n0, 1, cpuhw->bhrb_users))
goto out;
if (power_check_constraints(cpuhw, cpuhw->events, cpuhw->flags, n0 + 1))
goto out;
@@ -1615,7 +1617,7 @@ static int power_pmu_commit_txn(struct pmu *pmu)
return -EAGAIN;
cpuhw = this_cpu_ptr(&cpu_hw_events);
n = cpuhw->n_events;
-   if (check_excludes(cpuhw->event, cpuhw->flags, 0, n))
+   if (check_excludes(cpuhw->event, cpuhw->flags, 0, n, cpuhw->bhrb_users))
return -EAGAIN;
i = power_check_constraints(cpuhw, cpuhw->events, cpuhw->flags, n);
if (i < 0)
@@ -1828,10 +1830,10 @@ static int power_pmu_event_init(struct perf_event 
*event)
events[n] = ev;
ctrs[n] = event;
cflags[n] = flags;
-   if (check_excludes(ctrs, cflags, n, 1))
+   cpuhw = this_cpu_ptr(&cpu_hw_events);
+   if (check_excludes(ctrs, cflags, n, 1, cpuhw->bhrb_users))
return -EINVAL;
 
-   cpuhw = this_cpu_ptr(&cpu_hw_events);
err = power_check_constraints(cpuhw, events, cflags, n + 1);
 
if (has_branch_stack(event)) {
-- 
2.1.0

___
Linuxppc-dev mailing list
Linuxppc-dev@lists.ozlabs.org
https://lists.ozlabs.org/listinfo/linuxppc-dev

[PATCH V9 06/13] powerpc, perf: Re organize BHRB processing

2015-06-15 Thread Anshuman Khandual
This patch cleans up some existing indentation problem in code and
re organizes the BHRB processing code with an helper function named
'update_branch_entry' making it more readable. This patch does not
change any functionality.

Signed-off-by: Anshuman Khandual 
---
 arch/powerpc/perf/core-book3s.c | 109 
 1 file changed, 54 insertions(+), 55 deletions(-)

diff --git a/arch/powerpc/perf/core-book3s.c b/arch/powerpc/perf/core-book3s.c
index a7be394..ea77057 100644
--- a/arch/powerpc/perf/core-book3s.c
+++ b/arch/powerpc/perf/core-book3s.c
@@ -412,11 +412,19 @@ static __u64 power_pmu_bhrb_to(u64 addr)
return target - (unsigned long)&instr + addr;
 }
 
+static void insert_branch(struct cpu_hw_events *cpuhw,
+   int index, u64 from, u64 to, int mispred)
+{
+   cpuhw->bhrb_entries[index].from = from;
+   cpuhw->bhrb_entries[index].to = to;
+   cpuhw->bhrb_entries[index].mispred = mispred;
+   cpuhw->bhrb_entries[index].predicted = ~mispred;
+}
+
 /* Processing BHRB entries */
 static void power_pmu_bhrb_read(struct cpu_hw_events *cpuhw)
 {
-   u64 val;
-   u64 addr;
+   u64 val, addr, to_addr;
int r_index, u_index;
bool mispred;
 
@@ -428,65 +436,56 @@ static void power_pmu_bhrb_read(struct cpu_hw_events 
*cpuhw)
if (!val)
/* Terminal marker: End of valid BHRB entries */
break;
-   else {
-   addr = val & BHRB_EA;
-   mispred = val & BHRB_PREDICTION;
 
-   if (!addr)
-   /* invalid entry */
-   continue;
+   addr = val & BHRB_EA;
+   mispred = val & BHRB_PREDICTION;
+
+   if (!addr)
+   /* invalid entry */
+   continue;
 
-   /* Branches are read most recent first (ie. mfbhrb 0 is
-* the most recent branch).
-* There are two types of valid entries:
-* 1) a target entry which is the to address of a
-*computed goto like a blr,bctr,btar.  The next
-*entry read from the bhrb will be branch
-*corresponding to this target (ie. the actual
-*blr/bctr/btar instruction).
-* 2) a from address which is an actual branch.  If a
-*target entry proceeds this, then this is the
-*matching branch for that target.  If this is not
-*following a target entry, then this is a branch
-*where the target is given as an immediate field
-*in the instruction (ie. an i or b form branch).
-*In this case we need to read the instruction from
-*memory to determine the target/to address.
+   /* Branches are read most recent first (ie. mfbhrb 0 is
+* the most recent branch).
+* There are two types of valid entries:
+* 1) a target entry which is the to address of a
+*computed goto like a blr,bctr,btar.  The next
+*entry read from the bhrb will be branch
+*corresponding to this target (ie. the actual
+*blr/bctr/btar instruction).
+* 2) a from address which is an actual branch.  If a
+*target entry proceeds this, then this is the
+*matching branch for that target.  If this is not
+*following a target entry, then this is a branch
+*where the target is given as an immediate field
+*in the instruction (ie. an i or b form branch).
+*In this case we need to read the instruction from
+*memory to determine the target/to address.
+*/
+   if (val & BHRB_TARGET) {
+   /* Target branches use two entries
+* (ie. computed gotos/XL form)
 */
+   to_addr = addr;
+
+   /* Get from address in next entry */
+   val = read_bhrb(r_index++);
+   if (!val)
+   break;
+   addr = val & BHRB_EA;
if (val & BHRB_TARGET) {
-   /* Target branches use two entries
-* (ie. computed gotos/XL form)
-*/
-   cpuhw->bhrb_entries[u_index].to = addr;
-

Re: [PATCH V9 00/13] powerpc, perf: Enable SW branch filters

2015-06-25 Thread Anshuman Khandual
On 06/25/2015 11:48 AM, Daniel Axtens wrote:

> Hi Anshuman,
> 
> Thanks for your continued work on this.
> 
> Given that the series is now at version 9 and is 13 patches long, I
> wonder if it might be worth splitting it up.

Splitting it up completely or just keeping all the generic fixes
and cleanups at the beginning of the series would be sufficient.
Anyways I am willing to send them out separately if that helps.

> 
> I'd suggest:
> 
>  - Patch 1 could be sent individually as it's a bug fix.

Not with the generic cleanup group as proposed below ?

> 
>  - Separating out a series of simple cleanups would make the actual
> changes in your patch set easier to understand. Patches 2, 3 and 5 are
> obvious candidates.

Agreed. Just that adding the first patch here will prevent a three way
split of the entire series.

> 
>  - It looks like the changes in patch 6 aren't used by any of the
> following patches. It might be worth separating that out or just
> dropping it entirely.

I guess you are talking about patch 7 "powerpc, perf: Re organize PMU
branch filter processing on POWER8". Patch 6 is getting used later on.

> 
> 
> That would give you a series with just:
> 4   powerpc, perf: Restore privilege level filter support for BHRB
> 7   powerpc, perf: Re organize PMU branch filter processing on POWER8
> 8   powerpc, perf: Change the name of HW PMU branch filter tracking variable
> 9   powerpc, lib: Add new branch analysis support functions
> 10   powerpc, perf: Enable SW filtering in branch stack sampling framework
> 11   powerpc, perf: Change POWER8 PMU configuration to work with SW filters
> 12   powerpc, perf: Enable privilege mode SW branch filters
> 13   selftests, powerpc: Add test for BHRB branch filters (HW & SW)
> 
> That might make it easier for you to start getting the ground work in,
> and make it easier for others to understand what you're trying to do.

Sure, agreed. Here are the two set of patches after the proposed split.
Patches are in the reverse order though. Hope this helps.

Generic cleanups and fixes
---

powerpc/perf: Re organize PMU branch filter processing on POWER8
powerpc/perf: Change name & type of 'pred' in power_pmu_bhrb_read
powerpc/perf: Replace last usage of get_cpu_var with this_cpu_ptr
powerpc/perf: Change type of the bhrb_users variable
powerpc/perf: Drop the branch sample when 'from' cannot be fetched

BHRB SW branch filter
--

selftests/powerpc: Add test for BHRB branch filters (HW & SW)
powerpc/perf: Enable privilege mode SW branch filters
powerpc/perf: Change POWER8 PMU configuration to work with SW filters
powerpc/perf: Enable SW filtering in branch stack sampling framework
powerpc/lib: Add new branch analysis support functions
powerpc/perf: Change the name of HW PMU branch filter tracking variable
powerpc/perf: Re organize BHRB processing
powerpc/perf: Restore privilege level filter support for BHRB

Regards
Anshuman

___
Linuxppc-dev mailing list
Linuxppc-dev@lists.ozlabs.org
https://lists.ozlabs.org/listinfo/linuxppc-dev

Re: [PATCH V9 02/13] powerpc, perf: Change type of the bhrb_users variable

2015-06-25 Thread Anshuman Khandual
On 06/25/2015 11:12 AM, Daniel Axtens wrote:
>> -int bhrb_users;
>> +unsigned intbhrb_users;
> 
> OK, so this is a good start.
> 
> A quick git grep for bhrb_users reveals this:
> perf/core-book3s.c: WARN_ON_ONCE(cpuhw->bhrb_users < 0);
> 
> That occurs in power_pmu_bhrb_disable, immediately following a decrement
> of bhrb_users. Now that the test can never be true, this patch should
> change the function to check if bhrb_users is 0 before decrementing.

Sure. Would replace with 'WARN_ON_ONCE(!cpuhw->bhrb_users)' before
decrementing bhrb_users in the function.

___
Linuxppc-dev mailing list
Linuxppc-dev@lists.ozlabs.org
https://lists.ozlabs.org/listinfo/linuxppc-dev

Re: [PATCH V9 04/13] powerpc, perf: Restore privilege level filter support for BHRB

2015-06-25 Thread Anshuman Khandual
On 06/25/2015 10:32 AM, Daniel Axtens wrote:
> 
>> diff --git a/arch/powerpc/perf/core-book3s.c 
>> b/arch/powerpc/perf/core-book3s.c
>> index 7a03cce..892340e 100644
>> --- a/arch/powerpc/perf/core-book3s.c
>> +++ b/arch/powerpc/perf/core-book3s.c
>> @@ -930,7 +930,7 @@ static int power_check_constraints(struct cpu_hw_events 
>> *cpuhw,
>>   * added events.
>>   */
>>  static int check_excludes(struct perf_event **ctrs, unsigned int cflags[],
>> -  int n_prev, int n_new)
>> +  int n_prev, int n_new, int bhrb_users)
> Shouldn't this be an unsigned int too?

Yeah it should be, will change it.

> 
>> -if (ppmu->flags & PPMU_ARCH_207S)
>> +if ((ppmu->flags & PPMU_ARCH_207S) && !bhrb_users)
> This is now different to the others. Now that bhrb_users is unsigned,
> I'm happy if you want to revert all of them to be like this, I was just
> concerned that if bhrb_users is an int, both 1 and -1 evaluate to true
> and I wasn't sure that was the desired behaviour.

Sure, will change it.

___
Linuxppc-dev mailing list
Linuxppc-dev@lists.ozlabs.org
https://lists.ozlabs.org/listinfo/linuxppc-dev

Re: [PATCH V9 05/13] powerpc, perf: Change name & type of 'pred' in power_pmu_bhrb_read

2015-06-25 Thread Anshuman Khandual
On 06/25/2015 10:41 AM, Daniel Axtens wrote:
>>  cpuhw->bhrb_entries[u_index].to = addr;
>> -cpuhw->bhrb_entries[u_index].mispred = pred;
>> -cpuhw->bhrb_entries[u_index].predicted = ~pred;
>> +cpuhw->bhrb_entries[u_index].mispred = mispred;
>> +cpuhw->bhrb_entries[u_index].predicted =
>> +~mispred;
>>  
> 
> This is much better! However, these are still bitwise rather than
> logical inversions. They will work, but would it be easier to understand
> if you used "!mispred"?

Sure, will change it as well.

___
Linuxppc-dev mailing list
Linuxppc-dev@lists.ozlabs.org
https://lists.ozlabs.org/listinfo/linuxppc-dev

Re: [PATCH V9 06/13] powerpc, perf: Re organize BHRB processing

2015-06-25 Thread Anshuman Khandual
On 06/25/2015 11:22 AM, Daniel Axtens wrote:
> 
>> +static void insert_branch(struct cpu_hw_events *cpuhw,
>> +int index, u64 from, u64 to, int mispred)
> Given that your previous patch made mispred a bool, this could take a
> bool too. It could probably be an inline function as well.

Sure. will change it.

___
Linuxppc-dev mailing list
Linuxppc-dev@lists.ozlabs.org
https://lists.ozlabs.org/listinfo/linuxppc-dev

Re: [RFC] powerpc, tm: Drop tm_orig_msr from thread_struct

2015-06-25 Thread Anshuman Khandual
On 04/24/2015 10:31 AM, Anshuman Khandual wrote:
> On 04/20/2015 01:45 PM, Anshuman Khandual wrote:
>> Currently tm_orig_msr is getting used during process context switch only.
>> Then there is ckpt_regs which saves the checkpointed userspace context
>> The MSR slot contained in ckpt_regs structure can be used during process
>> context switch instead of tm_orig_msr, thus allowing us to drop it from
>> thread_struct structure. This patch does that change.
>>
>> Signed-off-by: Anshuman Khandual 
>> ---
>> This issue came up in the discussion regarding ptrace interface for TM
>> specific registers https://lkml.org/lkml/2015/4/20/100, so just wanted
>> to give this a try. The basic TM tests still pass after this change.
> 
> Hey Michael/Mikey,
> 
> Whats your thoughts on this ? Can we drop tm_orig_msr ?

Just wanted some inputs/suggestions/thoughts on this idea. Did not hear
from any one on this. Will it create any problem any where if we drop
this variable.

___
Linuxppc-dev mailing list
Linuxppc-dev@lists.ozlabs.org
https://lists.ozlabs.org/listinfo/linuxppc-dev

[PATCH 0/5] BHRB fixes, improvements and cleanups

2015-06-30 Thread Anshuman Khandual
The five generic patches from BHRB SW branch filter enablement
series have been separated and grouped together in this series. These
patches are required to be applied before using the upcoming V10 of the
BHRB SW branch filter patch series.

Anshuman Khandual (5):
  powerpc/perf: Drop the branch sample when 'from' cannot be fetched
  powerpc/perf: Change type of the bhrb_users variable
  powerpc/perf: Replace last usage of get_cpu_var with this_cpu_ptr
  powerpc/perf: Change name & type of 'pred' in power_pmu_bhrb_read
  powerpc/perf: Re organize PMU branch filter processing on POWER8

 arch/powerpc/perf/core-book3s.c | 29 +++--
 arch/powerpc/perf/power8-pmu.c  | 22 +++---
 2 files changed, 22 insertions(+), 29 deletions(-)

-- 
2.1.0

___
Linuxppc-dev mailing list
Linuxppc-dev@lists.ozlabs.org
https://lists.ozlabs.org/listinfo/linuxppc-dev

[PATCH 5/5] powerpc/perf: Re organize PMU branch filter processing on POWER8

2015-06-30 Thread Anshuman Khandual
This patch does some code re-arrangements to make it clear that kernel
ignores any separate privilege level branch filter request and does not
support any combinations of HW PMU branch filters.

Signed-off-by: Anshuman Khandual 
---
 arch/powerpc/perf/power8-pmu.c | 22 +++---
 1 file changed, 7 insertions(+), 15 deletions(-)

diff --git a/arch/powerpc/perf/power8-pmu.c b/arch/powerpc/perf/power8-pmu.c
index 396351d..a6c6a2c 100644
--- a/arch/powerpc/perf/power8-pmu.c
+++ b/arch/powerpc/perf/power8-pmu.c
@@ -656,8 +656,6 @@ static int power8_generic_events[] = {
 
 static u64 power8_bhrb_filter_map(u64 branch_sample_type)
 {
-   u64 pmu_bhrb_filter = 0;
-
/* BHRB and regular PMU events share the same privilege state
 * filter configuration. BHRB is always recorded along with a
 * regular PMU event. As the privilege state filter is handled
@@ -665,21 +663,15 @@ static u64 power8_bhrb_filter_map(u64 branch_sample_type)
 * PMU event, we ignore any separate BHRB specific request.
 */
 
-   /* No branch filter requested */
-   if (branch_sample_type & PERF_SAMPLE_BRANCH_ANY)
-   return pmu_bhrb_filter;
-
-   /* Invalid branch filter options - HW does not support */
-   if (branch_sample_type & PERF_SAMPLE_BRANCH_ANY_RETURN)
-   return -1;
+   /* Ignore user, kernel, hv bits */
+   branch_sample_type &= ~PERF_SAMPLE_BRANCH_PLM_ALL;
 
-   if (branch_sample_type & PERF_SAMPLE_BRANCH_IND_CALL)
-   return -1;
+   /* No branch filter requested */
+   if (branch_sample_type == PERF_SAMPLE_BRANCH_ANY)
+   return 0;
 
-   if (branch_sample_type & PERF_SAMPLE_BRANCH_ANY_CALL) {
-   pmu_bhrb_filter |= POWER8_MMCRA_IFM1;
-   return pmu_bhrb_filter;
-   }
+   if (branch_sample_type == PERF_SAMPLE_BRANCH_ANY_CALL)
+   return POWER8_MMCRA_IFM1;
 
/* Every thing else is unsupported */
return -1;
-- 
2.1.0

___
Linuxppc-dev mailing list
Linuxppc-dev@lists.ozlabs.org
https://lists.ozlabs.org/listinfo/linuxppc-dev

[PATCH 2/5] powerpc/perf: Change type of the bhrb_users variable

2015-06-30 Thread Anshuman Khandual
This patch just changes data type of bhrb_users variable from
int to unsigned int because it never contains a negative value.

Reported-by: Daniel Axtens 
Signed-off-by: Anshuman Khandual 
---
 arch/powerpc/perf/core-book3s.c | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/arch/powerpc/perf/core-book3s.c b/arch/powerpc/perf/core-book3s.c
index b0c2d53..f9ecd93 100644
--- a/arch/powerpc/perf/core-book3s.c
+++ b/arch/powerpc/perf/core-book3s.c
@@ -53,7 +53,7 @@ struct cpu_hw_events {
 
/* BHRB bits */
u64 bhrb_filter;/* BHRB HW branch 
filter */
-   int bhrb_users;
+   unsigned intbhrb_users;
void*bhrb_context;
struct  perf_branch_stack   bhrb_stack;
struct  perf_branch_entry   bhrb_entries[BHRB_MAX_ENTRIES];
@@ -369,8 +369,8 @@ static void power_pmu_bhrb_disable(struct perf_event *event)
if (!ppmu->bhrb_nr)
return;
 
+   WARN_ON_ONCE(!cpuhw->bhrb_users);
cpuhw->bhrb_users--;
-   WARN_ON_ONCE(cpuhw->bhrb_users < 0);
perf_sched_cb_dec(event->ctx->pmu);
 
if (!cpuhw->disabled && !cpuhw->bhrb_users) {
-- 
2.1.0

___
Linuxppc-dev mailing list
Linuxppc-dev@lists.ozlabs.org
https://lists.ozlabs.org/listinfo/linuxppc-dev

[PATCH 1/5] powerpc/perf: Drop the branch sample when 'from' cannot be fetched

2015-06-30 Thread Anshuman Khandual
BHRB (Branch History Rolling Buffer) is a rolling buffer. Hence we
might end up in a situation where we have read one target address
but when we try to read the next entry indicating the from address
of the target address, the buffer just overflows. In this case, the
captured from address will be zero which indicates the end of the
buffer.

This patch drops the entire branch record which would have
otherwise confused the user space tools.

Signed-off-by: Anshuman Khandual 
---
 arch/powerpc/perf/core-book3s.c | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

diff --git a/arch/powerpc/perf/core-book3s.c b/arch/powerpc/perf/core-book3s.c
index d90893b..b0c2d53 100644
--- a/arch/powerpc/perf/core-book3s.c
+++ b/arch/powerpc/perf/core-book3s.c
@@ -461,7 +461,6 @@ static void power_pmu_bhrb_read(struct cpu_hw_events *cpuhw)
 *In this case we need to read the instruction from
 *memory to determine the target/to address.
 */
-
if (val & BHRB_TARGET) {
/* Target branches use two entries
 * (ie. computed gotos/XL form)
@@ -472,6 +471,8 @@ static void power_pmu_bhrb_read(struct cpu_hw_events *cpuhw)
 
/* Get from address in next entry */
val = read_bhrb(r_index++);
+   if (!val)
+   break;
addr = val & BHRB_EA;
if (val & BHRB_TARGET) {
/* Shouldn't have two targets in a
-- 
2.1.0

___
Linuxppc-dev mailing list
Linuxppc-dev@lists.ozlabs.org
https://lists.ozlabs.org/listinfo/linuxppc-dev

[PATCH 4/5] powerpc/perf: Change name & type of 'pred' in power_pmu_bhrb_read

2015-06-30 Thread Anshuman Khandual
Branch record attributes 'mispred' and 'predicted' are single bit
fields as defined in the perf ABI. Hence the data type of the field
'pred' used during BHRB processing should be changed from integer
to bool. This patch also changes the name of the variable from 'pred'
to 'mispred' making the logical inversion process more meaningful
and readable.

Reported-by: Daniel Axtens 
Signed-off-by: Anshuman Khandual 
---
 arch/powerpc/perf/core-book3s.c | 15 +--
 1 file changed, 9 insertions(+), 6 deletions(-)

diff --git a/arch/powerpc/perf/core-book3s.c b/arch/powerpc/perf/core-book3s.c
index 57f2c78..ddc0424 100644
--- a/arch/powerpc/perf/core-book3s.c
+++ b/arch/powerpc/perf/core-book3s.c
@@ -426,7 +426,8 @@ static void power_pmu_bhrb_read(struct cpu_hw_events *cpuhw)
 {
u64 val;
u64 addr;
-   int r_index, u_index, pred;
+   int r_index, u_index;
+   bool mispred;
 
r_index = 0;
u_index = 0;
@@ -438,7 +439,7 @@ static void power_pmu_bhrb_read(struct cpu_hw_events *cpuhw)
break;
else {
addr = val & BHRB_EA;
-   pred = val & BHRB_PREDICTION;
+   mispred = val & BHRB_PREDICTION;
 
if (!addr)
/* invalid entry */
@@ -466,8 +467,9 @@ static void power_pmu_bhrb_read(struct cpu_hw_events *cpuhw)
 * (ie. computed gotos/XL form)
 */
cpuhw->bhrb_entries[u_index].to = addr;
-   cpuhw->bhrb_entries[u_index].mispred = pred;
-   cpuhw->bhrb_entries[u_index].predicted = ~pred;
+   cpuhw->bhrb_entries[u_index].mispred = mispred;
+   cpuhw->bhrb_entries[u_index].predicted =
+   ~mispred;
 
/* Get from address in next entry */
val = read_bhrb(r_index++);
@@ -487,8 +489,9 @@ static void power_pmu_bhrb_read(struct cpu_hw_events *cpuhw)
cpuhw->bhrb_entries[u_index].from = addr;
cpuhw->bhrb_entries[u_index].to =
power_pmu_bhrb_to(addr);
-   cpuhw->bhrb_entries[u_index].mispred = pred;
-   cpuhw->bhrb_entries[u_index].predicted = ~pred;
+   cpuhw->bhrb_entries[u_index].mispred = mispred;
+   cpuhw->bhrb_entries[u_index].predicted =
+   ~mispred;
}
u_index++;
 
-- 
2.1.0

___
Linuxppc-dev mailing list
Linuxppc-dev@lists.ozlabs.org
https://lists.ozlabs.org/listinfo/linuxppc-dev

[PATCH 3/5] powerpc/perf: Replace last usage of get_cpu_var with this_cpu_ptr

2015-06-30 Thread Anshuman Khandual
The commit 69111bac42f5ce ("powerpc: Replace __get_cpu_var uses")
replaced all usage of get_cpu_var with this_cpu_ptr inside core
perf event handling on powerpc. But it skipped one of them which
is being replaced with this patch.

Reported-by: Daniel Axtens 
Signed-off-by: Anshuman Khandual 
---
 arch/powerpc/perf/core-book3s.c | 7 ++-
 1 file changed, 2 insertions(+), 5 deletions(-)

diff --git a/arch/powerpc/perf/core-book3s.c b/arch/powerpc/perf/core-book3s.c
index f9ecd93..57f2c78 100644
--- a/arch/powerpc/perf/core-book3s.c
+++ b/arch/powerpc/perf/core-book3s.c
@@ -1840,20 +1840,17 @@ static int power_pmu_event_init(struct perf_event 
*event)
if (check_excludes(ctrs, cflags, n, 1))
return -EINVAL;
 
-   cpuhw = &get_cpu_var(cpu_hw_events);
+   cpuhw = this_cpu_ptr(&cpu_hw_events);
err = power_check_constraints(cpuhw, events, cflags, n + 1);
 
if (has_branch_stack(event)) {
cpuhw->bhrb_filter = ppmu->bhrb_filter_map(
event->attr.branch_sample_type);
 
-   if (cpuhw->bhrb_filter == -1) {
-   put_cpu_var(cpu_hw_events);
+   if (cpuhw->bhrb_filter == -1)
return -EOPNOTSUPP;
-   }
}
 
-   put_cpu_var(cpu_hw_events);
if (err)
return -EINVAL;
 
-- 
2.1.0

___
Linuxppc-dev mailing list
Linuxppc-dev@lists.ozlabs.org
https://lists.ozlabs.org/listinfo/linuxppc-dev

[PATCH V10 7/8] powerpc/perf: Enable privilege mode SW branch filters

2015-06-30 Thread Anshuman Khandual
This patch enables privilege mode SW branch filters. Also modifies
POWER8 PMU branch filter configuration so that the privilege mode
branch filter implemented as part of base PMU event configuration
is reflected in bhrb filter mask. As a result, the SW will skip and
not try to process the privilege mode branch filters itself.

Signed-off-by: Anshuman Khandual 
---
 arch/powerpc/include/asm/perf_event_server.h |  3 +++
 arch/powerpc/perf/core-book3s.c  | 37 ++--
 arch/powerpc/perf/power8-pmu.c   | 13 --
 3 files changed, 49 insertions(+), 4 deletions(-)

diff --git a/arch/powerpc/include/asm/perf_event_server.h 
b/arch/powerpc/include/asm/perf_event_server.h
index cb7ca1a..23d68d3 100644
--- a/arch/powerpc/include/asm/perf_event_server.h
+++ b/arch/powerpc/include/asm/perf_event_server.h
@@ -85,6 +85,9 @@ extern unsigned long int read_bhrb(int n);
for ((x) = PERF_SAMPLE_BRANCH_USER; \
(x) < PERF_SAMPLE_BRANCH_MAX; (x) <<= 1)
 
+#define POWER_ADDR_USER0
+#define POWER_ADDR_KERNEL  1
+
 /*
  * Only override the default definitions in include/linux/perf_event.h
  * if we have hardware PMU support.
diff --git a/arch/powerpc/perf/core-book3s.c b/arch/powerpc/perf/core-book3s.c
index 59defc5..4ae2225 100644
--- a/arch/powerpc/perf/core-book3s.c
+++ b/arch/powerpc/perf/core-book3s.c
@@ -20,6 +20,7 @@
 #include 
 #include 
 #include 
+#include 
 
 #define BHRB_MAX_ENTRIES   32
 #define BHRB_TARGET0x0002
@@ -474,10 +475,10 @@ static bool check_instruction(unsigned int *addr, u64 
sw_filter)
  * Access the instruction contained in the address and then check
  * whether it complies with the applicable SW branch filters.
  */
-static bool keep_branch(u64 from, u64 sw_filter)
+static bool keep_branch(u64 from, u64 to, u64 sw_filter)
 {
unsigned int instr;
-   bool ret;
+   bool to_plm, ret, select_branch;
 
/*
 * The "from" branch for every branch record has to go
@@ -487,6 +488,37 @@ static bool keep_branch(u64 from, u64 sw_filter)
if (sw_filter == 0)
return true;
 
+   to_plm = is_kernel_addr(to) ? POWER_ADDR_KERNEL : POWER_ADDR_USER;
+
+   /*
+* XXX: Applying the privilege mode SW branch filters first on
+* the 'TO' address creates an AND semantic with other SW branch
+* filters which are ORed with each other being applied on the
+* 'FROM' address there after.
+*/
+   if (sw_filter & PERF_SAMPLE_BRANCH_PLM_ALL) {
+   select_branch = false;
+
+   if (sw_filter & PERF_SAMPLE_BRANCH_USER) {
+   if (to_plm == POWER_ADDR_USER)
+   select_branch = true;
+   }
+
+   if (sw_filter & PERF_SAMPLE_BRANCH_KERNEL) {
+   if (to_plm == POWER_ADDR_KERNEL)
+   select_branch = true;
+   }
+
+   if (sw_filter & PERF_SAMPLE_BRANCH_HV) {
+   if (cpu_has_feature(CPU_FTR_HVMODE)
+   && (to_plm == POWER_ADDR_KERNEL))
+   select_branch = true;
+   }
+
+   if (!select_branch)
+   return false;
+   }
+
if (is_kernel_addr(from)) {
return check_instruction((unsigned int *) from, sw_filter);
} else {
@@ -577,6 +609,7 @@ static void power_pmu_bhrb_read(struct cpu_hw_events *cpuhw)
 
/* Apply SW branch filters and drop the entry if required */
if (!keep_branch(cpuhw->bhrb_entries[u_index].from,
+   cpuhw->bhrb_entries[u_index].to,
cpuhw->bhrb_sw_filter))
u_index--;
u_index++;
diff --git a/arch/powerpc/perf/power8-pmu.c b/arch/powerpc/perf/power8-pmu.c
index 8fccf6c..b56afc6 100644
--- a/arch/powerpc/perf/power8-pmu.c
+++ b/arch/powerpc/perf/power8-pmu.c
@@ -670,9 +670,19 @@ static u64 power8_bhrb_filter_map(u64 branch_sample_type, 
u64 *bhrb_filter)
 * filter configuration. BHRB is always recorded along with a
 * regular PMU event. As the privilege state filter is handled
 * in the basic PMC configuration of the accompanying regular
-* PMU event, we ignore any separate BHRB specific request.
+* PMU event, we ignore any separate BHRB specific request. But
+* this needs to be communicated with the branch filter mask.
 */
 
+   if (branch_sample_type & PERF_SAMPLE_BRANCH_USER)
+   *bhrb_filter |= PERF_SAMPLE_BRANCH_USER;
+
+   if (branch_sample_type & PERF_SAMPLE_BRANCH_KERNEL)
+   *bhrb_filter |= PERF_SAMPLE_BRANCH_KERNEL;
+
+   if (branch_sample_typ

[PATCH V10 4/8] powerpc/lib: Add new branch analysis support functions

2015-06-30 Thread Anshuman Khandual
Generic powerpc branch analysis support added in the code patching
library which will help the subsequent patch on SW based filtering
of branch records in perf.

Signed-off-by: Anshuman Khandual 
---
 arch/powerpc/include/asm/code-patching.h | 15 
 arch/powerpc/lib/code-patching.c | 60 
 2 files changed, 75 insertions(+)

diff --git a/arch/powerpc/include/asm/code-patching.h 
b/arch/powerpc/include/asm/code-patching.h
index 840a550..0a6f0d8 100644
--- a/arch/powerpc/include/asm/code-patching.h
+++ b/arch/powerpc/include/asm/code-patching.h
@@ -22,6 +22,16 @@
 #define BRANCH_SET_LINK0x1
 #define BRANCH_ABSOLUTE0x2
 
+#define XL_FORM_LR  0x4C20
+#define XL_FORM_CTR 0x4C000420
+#define XL_FORM_TAR 0x4C000460
+
+#define BO_ALWAYS0x0280
+#define BO_CTR   0x0200
+#define BO_CRBI_OFF  0x0080
+#define BO_CRBI_ON   0x0180
+#define BO_CRBI_HINT 0x0040
+
 unsigned int create_branch(const unsigned int *addr,
   unsigned long target, int flags);
 unsigned int create_cond_branch(const unsigned int *addr,
@@ -99,4 +109,9 @@ static inline unsigned long ppc_global_function_entry(void 
*func)
 #endif
 }
 
+bool instr_is_return_branch(unsigned int instr);
+bool instr_is_conditional_branch(unsigned int instr);
+bool instr_is_func_call(unsigned int instr);
+bool instr_is_indirect_func_call(unsigned int instr);
+
 #endif /* _ASM_POWERPC_CODE_PATCHING_H */
diff --git a/arch/powerpc/lib/code-patching.c b/arch/powerpc/lib/code-patching.c
index d5edbeb..46fbcca 100644
--- a/arch/powerpc/lib/code-patching.c
+++ b/arch/powerpc/lib/code-patching.c
@@ -87,6 +87,66 @@ static int instr_is_branch_bform(unsigned int instr)
return branch_opcode(instr) == 16;
 }
 
+static bool instr_is_branch_xlform(unsigned int instr)
+{
+   return branch_opcode(instr) == 19;
+}
+
+static bool is_xlform_lr(unsigned int instr)
+{
+   return (instr & XL_FORM_LR) == XL_FORM_LR;
+}
+
+static bool is_bo_always(unsigned int instr)
+{
+   return (instr & BO_ALWAYS) == BO_ALWAYS;
+}
+
+static bool is_branch_link_set(unsigned int instr)
+{
+   return (instr & BRANCH_SET_LINK) == BRANCH_SET_LINK;
+}
+
+bool instr_is_return_branch(unsigned int instr)
+{
+   /*
+* Conditional and unconditional branch to LR register
+* without seting the link register.
+*/
+   if (is_xlform_lr(instr) && !is_branch_link_set(instr))
+   return true;
+
+   return false;
+}
+
+bool instr_is_conditional_branch(unsigned int instr)
+{
+   /* I-form instruction - excluded */
+   if (instr_is_branch_iform(instr))
+   return false;
+
+   /* B-form or XL-form instruction */
+   if (instr_is_branch_bform(instr) || instr_is_branch_xlform(instr))  {
+
+   /* Not branch always */
+   if (!is_bo_always(instr))
+   return true;
+   }
+   return false;
+}
+
+bool instr_is_func_call(unsigned int instr)
+{
+   /* LR should be set */
+   return is_branch_link_set(instr);
+}
+
+bool instr_is_indirect_func_call(unsigned int instr)
+{
+   /* XL-form instruction with LR set */
+   return (instr_is_branch_xlform(instr) && is_branch_link_set(instr));
+}
+
 int instr_is_relative_branch(unsigned int instr)
 {
if (instr & BRANCH_ABSOLUTE)
-- 
2.1.0

___
Linuxppc-dev mailing list
Linuxppc-dev@lists.ozlabs.org
https://lists.ozlabs.org/listinfo/linuxppc-dev

[PATCH V10 0/8] powerpc/perf: Enable SW branch filters

2015-06-30 Thread Anshuman Khandual
This is the continuation (rebased and reworked) of the series
posted at https://lkml.org/lkml/2014/5/5/153 (which is V6). I remember
to have incremented the version count for the re-send of the first four
patches of the series to Peter Z for generic review which got pulled in
last year. These patches here are the remaining powerpc part of the
original series.

NOTE: This patch series is logical extension and also dependent on the
series posted at https://patchwork.ozlabs.org/patch/489544/

Changes in V10
==
(1) The patch series is split into two groups now for easier review
(2) Changed the 'check_excludes' function as suggested by Daniel
(3) Using logical inversion of 'mispred' inside 'insert_branch' function 
(4) Made 'insert_branch' function inline and changed its last argument

Changes in V9 (https://patchwork.ozlabs.org/patch/484242/)
=
(1) Changed some of the commit messages and fixed some typos
(2) Variable 'bhrb_users' type changed from int to unsigned int
(3) Replaced the last usage of 'get_cpu_var' with 'this_cpu_ptr'
(4) Conditional checks for 'cpuhw->bhrb_users' changed to test against zero
(5) Updated in-code documentation inside 'check_excludes' function
(6) Changed the name & type of 'pred' variable in 'power_pmu_bhrb_read'
(7) Changed the name of 'tmp' to 'to_addr' inside 'power_pmu_bhrb_read'
(8) Changed return values for branch instruction analysis functions
(9) Changed the name of 'flag' variable to 'select_branch' inside 'keep_branch'
(10) Fixed one nested conditional statement inside 'keep_branch' function
(11) Changed function name from 'update_branch_entry' to 'insert_branch'
(12) Fixed copyright and license statements for new selftest related files
(13) Improved helper assembly functions with some macro definitions
(14) Improved the core test program at various places
(15) Added .gitignore file for the new test case

Changes in V8 (http://patchwork.ozlabs.org/patch/481848/)
=
(1) Fixed BHRB privilege mode branch filter request processing
(2) Dropped branch records where 'from' cannot be fetched
(3) Added in-code documenation at various places in the patch series
(4) Added one comprehensive seltest case to verify all the filters

Changes in V7
=
(1) Incremented the version count while requesting pull for generic changes

Changes in V6 (https://lkml.org/lkml/2014/5/5/153)
=
(1) Rebased the patchset against the master
(2) Added "Reviewed-by: Andi Kleen" in the first four patches in the series 
which changes the
generic or X86 perf code. [https://lkml.org/lkml/2014/4/7/130]

Changes in V5 (https://lkml.org/lkml/2014/3/7/101)
=
(1) Added a precursor patch to cleanup the indentation problem in 
power_pmu_bhrb_read
(2) Added a precursor patch to re-arrange P8 PMU BHRB filter config which 
improved the clarity
(3) Merged the previous 10th patch into the 8th patch
(4) Moved SW based branch analysis code from core perf into code-patching 
library as suggested by Michael
(5) Simplified the logic in branch analysis library
(6) Fixed some ambiguities in documentation at various places
(7) Added some more in-code documentation blocks at various places
(8) Renamed some local variable and function names
(9) Fixed some indentation and white space errors in the code
(10) Implemented almost all the review comments and suggestions made by Michael 
Ellerman on V4 patchset
(11) Enabled privilege mode SW branch filter
(12) Simplified and generalized the SW implemented conditional branch filter
(13) PERF_SAMPLE_BRANCH_COND filter is now supported only through SW 
implementation
(14) Adjusted other patches to deal with the above changes

Changes in V4 (https://lkml.org/lkml/2013/12/4/168)
=
(1) Changed the commit message for patch (01/10)
(2) Changed the patch (02/10) to accommodate review comments from Michael 
Ellerman
(3) Rebased the patchset against latest Linus's tree

Changes in V3 (https://lkml.org/lkml/2013/10/16/59)
=
(1) Split the SW branch filter enablement into multiple patches
(2) Added PMU neutral SW branch filtering code, PMU specific HW branch 
filtering code
(3) Added new instruction analysis functionality into powerpc code-patching 
library
(4) Changed name for some of the functions
(5) Fixed couple of spelling mistakes
(6) Changed code documentation in multiple places

Changes in V2 (https://lkml.org/lkml/2013/8/30/10)
=
(1) Enabled PPC64 SW branch filtering support
(2) Incorporated changes required for all previous comments


Anshuman Khandual (8):
  powerpc/perf: Restore privilege level filter support for BHRB
  powerpc/perf: Re organize BHRB processing
  powerpc/perf: Change the name of HW PMU branch filter tracking variable
  powerpc/lib: 

[PATCH V10 2/8] powerpc/perf: Re organize BHRB processing

2015-06-30 Thread Anshuman Khandual
This patch cleans up some existing indentation problem in code and
re organizes the BHRB processing code with an helper function named
'update_branch_entry' making it more readable. This patch does not
change any functionality.

Signed-off-by: Anshuman Khandual 
---
 arch/powerpc/perf/core-book3s.c | 109 
 1 file changed, 54 insertions(+), 55 deletions(-)

diff --git a/arch/powerpc/perf/core-book3s.c b/arch/powerpc/perf/core-book3s.c
index b7710b9..6935660 100644
--- a/arch/powerpc/perf/core-book3s.c
+++ b/arch/powerpc/perf/core-book3s.c
@@ -421,11 +421,19 @@ static __u64 power_pmu_bhrb_to(u64 addr)
return target - (unsigned long)&instr + addr;
 }
 
+static inline void insert_branch(struct cpu_hw_events *cpuhw,
+   int index, u64 from, u64 to, bool mispred)
+{
+   cpuhw->bhrb_entries[index].from = from;
+   cpuhw->bhrb_entries[index].to = to;
+   cpuhw->bhrb_entries[index].mispred = mispred;
+   cpuhw->bhrb_entries[index].predicted = !mispred;
+}
+
 /* Processing BHRB entries */
 static void power_pmu_bhrb_read(struct cpu_hw_events *cpuhw)
 {
-   u64 val;
-   u64 addr;
+   u64 val, addr, to_addr;
int r_index, u_index;
bool mispred;
 
@@ -437,65 +445,56 @@ static void power_pmu_bhrb_read(struct cpu_hw_events 
*cpuhw)
if (!val)
/* Terminal marker: End of valid BHRB entries */
break;
-   else {
-   addr = val & BHRB_EA;
-   mispred = val & BHRB_PREDICTION;
 
-   if (!addr)
-   /* invalid entry */
-   continue;
+   addr = val & BHRB_EA;
+   mispred = val & BHRB_PREDICTION;
+
+   if (!addr)
+   /* invalid entry */
+   continue;
 
-   /* Branches are read most recent first (ie. mfbhrb 0 is
-* the most recent branch).
-* There are two types of valid entries:
-* 1) a target entry which is the to address of a
-*computed goto like a blr,bctr,btar.  The next
-*entry read from the bhrb will be branch
-*corresponding to this target (ie. the actual
-*blr/bctr/btar instruction).
-* 2) a from address which is an actual branch.  If a
-*target entry proceeds this, then this is the
-*matching branch for that target.  If this is not
-*following a target entry, then this is a branch
-*where the target is given as an immediate field
-*in the instruction (ie. an i or b form branch).
-*In this case we need to read the instruction from
-*memory to determine the target/to address.
+   /* Branches are read most recent first (ie. mfbhrb 0 is
+* the most recent branch).
+* There are two types of valid entries:
+* 1) a target entry which is the to address of a
+*computed goto like a blr,bctr,btar.  The next
+*entry read from the bhrb will be branch
+*corresponding to this target (ie. the actual
+*blr/bctr/btar instruction).
+* 2) a from address which is an actual branch.  If a
+*target entry proceeds this, then this is the
+*matching branch for that target.  If this is not
+*following a target entry, then this is a branch
+*where the target is given as an immediate field
+*in the instruction (ie. an i or b form branch).
+*In this case we need to read the instruction from
+*memory to determine the target/to address.
+*/
+   if (val & BHRB_TARGET) {
+   /* Target branches use two entries
+* (ie. computed gotos/XL form)
 */
+   to_addr = addr;
+
+   /* Get from address in next entry */
+   val = read_bhrb(r_index++);
+   if (!val)
+   break;
+   addr = val & BHRB_EA;
if (val & BHRB_TARGET) {
-   /* Target branches use two entries
-* (ie. computed gotos/XL form)
-*/
-   cpuhw->bhrb_entries[u_index].to = addr;
-

[PATCH V10 5/8] powerpc/perf: Enable SW filtering in branch stack sampling framework

2015-06-30 Thread Anshuman Khandual
This patch enables SW based post processing of BHRB captured branches
to be able to meet more user defined branch filtration criteria in perf
branch stack sampling framework. These changes increase the number of
branch filters and their valid combinations on any powerpc64 server
platform with BHRB support. Find the summary of code changes here.

(1) struct cpu_hw_event

Introduced two new variables track various filter values and mask

(a) bhrb_sw_filter  Tracks SW implemented branch filter flags
(b) bhrb_filter Tracks both (SW and HW) branch filter flags

(2) Event creation

Kernel will figure out supported BHRB branch filters through a PMU
call back 'bhrb_filter_map'. This function will find out how many of
the requested branch filters can be supported in the PMU HW. It will
not try to invalidate any branch filter combinations. Event creation
will not error out because of lack of HW based branch filters.
Meanwhile it will track the overall supported branch filters in the
'bhrb_filter' variable.

Once the PMU call back returns kernel will process the user branch
filter request against available SW filters (bhrb_sw_filter_map) while
looking at the 'bhrb_filter'. During this phase all the branch filters
which are still pending from the user requested list will have to be
supported in SW failing which the event creation will error out.

(3) SW branch filter

During the BHRB data capture inside the PMU interrupt context, each
of the captured 'perf_branch_entry.from' will be checked for compliance
with applicable SW branch filters. If the entry does not conform to the
filter requirements, it will be discarded from the final perf branch
stack buffer.

(4) Supported SW based branch filters

(a) PERF_SAMPLE_BRANCH_ANY_RETURN
(b) PERF_SAMPLE_BRANCH_IND_CALL
(c) PERF_SAMPLE_BRANCH_ANY_CALL
(d) PERF_SAMPLE_BRANCH_COND

Please refer the patch to understand the classification of instructions
into these branch filter categories.

(5) Multiple branch filter semantics

Book3 sever implementation follows the same OR semantics (as implemented
in x86) while dealing with multiple branch filters at any point of time.
SW branch filter analysis is carried on the data set captured in the PMU
HW. So the resulting set of data (after applying the SW filters) will
inherently be an AND with the HW captured set. Hence any combination of
HW and SW branch filters will be invalid. HW based branch filters are
more efficient and faster compared to SW implemented branch filters. So
at first the PMU should decide whether it can support all the requested
branch filters itself or not. In case it can support all the branch
filters in an OR manner, we dont apply any SW branch filter on top of the
HW captured set (which is the final set). This preserves the OR semantic
of multiple branch filters as required. But in case where the PMU cannot
support all the requested branch filters in an OR manner, it should not
apply any it's filters and leave it upto the SW to handle them all. Its
the PMU code's responsibility to uphold this protocol to be able to
conform to the overall OR semantic of perf branch stack sampling framework.

Signed-off-by: Anshuman Khandual 
---
 arch/powerpc/include/asm/perf_event_server.h |   7 +-
 arch/powerpc/perf/core-book3s.c  | 188 ++-
 arch/powerpc/perf/power8-pmu.c   |   2 +-
 3 files changed, 191 insertions(+), 6 deletions(-)

diff --git a/arch/powerpc/include/asm/perf_event_server.h 
b/arch/powerpc/include/asm/perf_event_server.h
index 8146221..cb7ca1a 100644
--- a/arch/powerpc/include/asm/perf_event_server.h
+++ b/arch/powerpc/include/asm/perf_event_server.h
@@ -38,7 +38,8 @@ struct power_pmu {
unsigned long *valp);
int (*get_alternatives)(u64 event_id, unsigned int flags,
u64 alt[]);
-   u64 (*bhrb_filter_map)(u64 branch_sample_type);
+   u64 (*bhrb_filter_map)(u64 branch_sample_type,
+   u64 *bhrb_filter);
void(*config_bhrb)(u64 pmu_bhrb_filter);
void(*disable_pmc)(unsigned int pmc, unsigned long mmcr[]);
int (*limited_pmc_event)(u64 event_id);
@@ -80,6 +81,10 @@ extern unsigned long perf_misc_flags(struct pt_regs *regs);
 extern unsigned long perf_instruction_pointer(struct pt_regs *regs);
 extern unsigned long int read_bhrb(int n);
 
+#define for_each_branch_sample_type(x) \
+   for ((x) = PERF_SAMPLE_BRANCH_USER; \
+   (x) < PERF_SAMPLE_BRANCH_MAX; (x) <<= 1)
+
 /*
  * Only override the default definitions in include/linux/perf_event.h
  * if we have hardware PMU support.
diff --git a/arch/powerpc/perf/core-book3s.c b/arch/powerpc/perf/core-book3s.c
index ef0ff05..59defc5 100644
--- a/arch/powerpc/perf/core-book3s.c
+++ b/arch/powerp

[PATCH V10 8/8] selftests/powerpc: Add test for BHRB branch filters (HW & SW)

2015-06-30 Thread Anshuman Khandual
This patch adds a test for verifying that all the branch stack
sampling filters supported on powerpc work correctly. It also
adds some assembly helper functions in this regard. This patch
extends the generic event description to handle kernel mapped
ring buffers.

Signed-off-by: Anshuman Khandual 
---
 tools/testing/selftests/powerpc/pmu/Makefile   |  11 +-
 .../testing/selftests/powerpc/pmu/bhrb/.gitignore  |   1 +
 tools/testing/selftests/powerpc/pmu/bhrb/Makefile  |  13 +
 .../selftests/powerpc/pmu/bhrb/bhrb_filters.c  | 535 +
 .../selftests/powerpc/pmu/bhrb/bhrb_filters.h  |  15 +
 .../selftests/powerpc/pmu/bhrb/bhrb_filters_asm.S  | 263 ++
 tools/testing/selftests/powerpc/pmu/event.h|   5 +
 7 files changed, 841 insertions(+), 2 deletions(-)
 create mode 100644 tools/testing/selftests/powerpc/pmu/bhrb/.gitignore
 create mode 100644 tools/testing/selftests/powerpc/pmu/bhrb/Makefile
 create mode 100644 tools/testing/selftests/powerpc/pmu/bhrb/bhrb_filters.c
 create mode 100644 tools/testing/selftests/powerpc/pmu/bhrb/bhrb_filters.h
 create mode 100644 tools/testing/selftests/powerpc/pmu/bhrb/bhrb_filters_asm.S

diff --git a/tools/testing/selftests/powerpc/pmu/Makefile 
b/tools/testing/selftests/powerpc/pmu/Makefile
index a9099d9..2e103fd 100644
--- a/tools/testing/selftests/powerpc/pmu/Makefile
+++ b/tools/testing/selftests/powerpc/pmu/Makefile
@@ -4,7 +4,7 @@ noarg:
 TEST_PROGS := count_instructions l3_bank_test per_event_excludes
 EXTRA_SOURCES := ../harness.c event.c lib.c
 
-all: $(TEST_PROGS) ebb
+all: $(TEST_PROGS) ebb bhrb
 
 $(TEST_PROGS): $(EXTRA_SOURCES)
 
@@ -18,25 +18,32 @@ DEFAULT_RUN_TESTS := $(RUN_TESTS)
 override define RUN_TESTS
$(DEFAULT_RUN_TESTS)
$(MAKE) -C ebb run_tests
+   $(MAKE) -C bhrb run_tests
 endef
 
 DEFAULT_EMIT_TESTS := $(EMIT_TESTS)
 override define EMIT_TESTS
$(DEFAULT_EMIT_TESTS)
$(MAKE) -s -C ebb emit_tests
+   $(MAKE) -s -C bhrb emit_tests
 endef
 
 DEFAULT_INSTALL_RULE := $(INSTALL_RULE)
 override define INSTALL_RULE
$(DEFAULT_INSTALL_RULE)
$(MAKE) -C ebb install
+   $(MAKE) -C bhrb install
 endef
 
 clean:
rm -f $(TEST_PROGS) loop.o
$(MAKE) -C ebb clean
+   $(MAKE) -C bhrb clean
 
 ebb:
$(MAKE) -k -C $@ all
 
-.PHONY: all run_tests clean ebb
+bhrb:
+   $(MAKE) -k -C $@ all
+
+.PHONY: all run_tests clean ebb bhrb
diff --git a/tools/testing/selftests/powerpc/pmu/bhrb/.gitignore 
b/tools/testing/selftests/powerpc/pmu/bhrb/.gitignore
new file mode 100644
index 000..47c1049
--- /dev/null
+++ b/tools/testing/selftests/powerpc/pmu/bhrb/.gitignore
@@ -0,0 +1 @@
+bhrb_filters
diff --git a/tools/testing/selftests/powerpc/pmu/bhrb/Makefile 
b/tools/testing/selftests/powerpc/pmu/bhrb/Makefile
new file mode 100644
index 000..61c032a
--- /dev/null
+++ b/tools/testing/selftests/powerpc/pmu/bhrb/Makefile
@@ -0,0 +1,13 @@
+noarg:
+   $(MAKE) -C ../../
+
+TEST_PROGS := bhrb_filters
+
+all: $(TEST_PROGS)
+
+$(TEST_PROGS): ../../harness.c ../event.c ../lib.c bhrb_filters_asm.S
+
+include ../../../lib.mk
+
+clean:
+   rm -f $(TEST_PROGS)
diff --git a/tools/testing/selftests/powerpc/pmu/bhrb/bhrb_filters.c 
b/tools/testing/selftests/powerpc/pmu/bhrb/bhrb_filters.c
new file mode 100644
index 000..84e9b9a
--- /dev/null
+++ b/tools/testing/selftests/powerpc/pmu/bhrb/bhrb_filters.c
@@ -0,0 +1,535 @@
+/*
+ * BHRB filter test (HW & SW)
+ *
+ * Copyright (C) 2015 Anshuman Khandual, IBM Corporation.
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License
+ * version 2 as published by the Free Software Foundation.
+ */
+#include 
+#include 
+#include 
+#include 
+#include 
+#include 
+#include 
+#include 
+#include 
+
+#include "bhrb_filters.h"
+#include "utils.h"
+#include "../event.h"
+#include "../lib.h"
+
+/* Memory barriers */
+#definesmp_mb(){ asm volatile ("sync" : : : "memory"); }
+
+/* Fetched address counts */
+#define ALL_MAX32
+#define CALL_MAX   12
+#define RET_MAX10
+#define COND_MAX   8
+#define IND_MAX4
+
+/* Test tunables */
+#define LOOP_COUNT 10
+#define SAMPLE_PERIOD  1
+
+static int branch_test_set[] = {
+   PERF_SAMPLE_BRANCH_ANY_CALL,/* Single filters */
+   PERF_SAMPLE_BRANCH_ANY_RETURN,
+   PERF_SAMPLE_BRANCH_COND,
+   PERF_SAMPLE_BRANCH_IND_CALL,
+   PERF_SAMPLE_BRANCH_ANY,
+
+   PERF_SAMPLE_BRANCH_ANY_CALL |   /* Double filters */
+   PERF_SAMPLE_BRANCH_ANY_RETURN,
+   PERF_SAMPLE_BRANCH_ANY_CALL |
+   PERF_SAMPLE_BRANCH_COND,
+   PERF_SAMPLE_BRANCH_ANY_CALL |
+   PERF_SAMPLE_BRANCH_IND_CALL,
+   PERF

[PATCH V10 1/8] powerpc/perf: Restore privilege level filter support for BHRB

2015-06-30 Thread Anshuman Khandual
'commit 9de5cb0f6df8 ("powerpc/perf: Add per-event excludes on Power8")'
broke the PMU based BHRB privilege level filter. BHRB depends on the
same MMCR0 bits for privilege level filter which was used to freeze all
the PMCs as a group. Once we moved to individual event based privilege
filters through MMCR2 register on POWER8, event associated privilege
filters are no longer applicable to the BHRB captured branches.

This patch solves the problem by restoring to the previous method of
privilege level filters for the event in case BHRB based branch stack
sampling is requested. This patch also changes 'check_excludes' for
the same reason.

Signed-off-by: Anshuman Khandual 
---
 arch/powerpc/perf/core-book3s.c | 20 +++-
 1 file changed, 11 insertions(+), 9 deletions(-)

diff --git a/arch/powerpc/perf/core-book3s.c b/arch/powerpc/perf/core-book3s.c
index ddc0424..b7710b9 100644
--- a/arch/powerpc/perf/core-book3s.c
+++ b/arch/powerpc/perf/core-book3s.c
@@ -942,7 +942,7 @@ static int power_check_constraints(struct cpu_hw_events 
*cpuhw,
  * added events.
  */
 static int check_excludes(struct perf_event **ctrs, unsigned int cflags[],
- int n_prev, int n_new)
+ int n_prev, int n_new, unsigned int bhrb_users)
 {
int eu = 0, ek = 0, eh = 0;
int i, n, first;
@@ -951,9 +951,10 @@ static int check_excludes(struct perf_event **ctrs, 
unsigned int cflags[],
/*
 * If the PMU we're on supports per event exclude settings then we
 * don't need to do any of this logic. NB. This assumes no PMU has both
-* per event exclude and limited PMCs.
+* per event exclude and limited PMCs. But again if the event has also
+* requested for branch stack sampling, then process the logic here.
 */
-   if (ppmu->flags & PPMU_ARCH_207S)
+   if ((ppmu->flags & PPMU_ARCH_207S) && !bhrb_users)
return 0;
 
n = n_prev + n_new;
@@ -1271,7 +1272,7 @@ static void power_pmu_enable(struct pmu *pmu)
goto out;
}
 
-   if (!(ppmu->flags & PPMU_ARCH_207S)) {
+   if (!(ppmu->flags & PPMU_ARCH_207S) || cpuhw->bhrb_users) {
/*
 * Add in MMCR0 freeze bits corresponding to the attr.exclude_*
 * bits for the first event. We have already checked that all
@@ -1296,7 +1297,7 @@ static void power_pmu_enable(struct pmu *pmu)
mtspr(SPRN_MMCR1, cpuhw->mmcr[1]);
mtspr(SPRN_MMCR0, (cpuhw->mmcr[0] & ~(MMCR0_PMC1CE | MMCR0_PMCjCE))
| MMCR0_FC);
-   if (ppmu->flags & PPMU_ARCH_207S)
+   if ((ppmu->flags & PPMU_ARCH_207S) && !cpuhw->bhrb_users)
mtspr(SPRN_MMCR2, cpuhw->mmcr[3]);
 
/*
@@ -1448,7 +1449,8 @@ static int power_pmu_add(struct perf_event *event, int 
ef_flags)
if (cpuhw->group_flag & PERF_EVENT_TXN)
goto nocheck;
 
-   if (check_excludes(cpuhw->event, cpuhw->flags, n0, 1))
+   if (check_excludes(cpuhw->event, cpuhw->flags,
+   n0, 1, cpuhw->bhrb_users))
goto out;
if (power_check_constraints(cpuhw, cpuhw->events, cpuhw->flags, n0 + 1))
goto out;
@@ -1627,7 +1629,7 @@ static int power_pmu_commit_txn(struct pmu *pmu)
return -EAGAIN;
cpuhw = this_cpu_ptr(&cpu_hw_events);
n = cpuhw->n_events;
-   if (check_excludes(cpuhw->event, cpuhw->flags, 0, n))
+   if (check_excludes(cpuhw->event, cpuhw->flags, 0, n, cpuhw->bhrb_users))
return -EAGAIN;
i = power_check_constraints(cpuhw, cpuhw->events, cpuhw->flags, n);
if (i < 0)
@@ -1840,10 +1842,10 @@ static int power_pmu_event_init(struct perf_event 
*event)
events[n] = ev;
ctrs[n] = event;
cflags[n] = flags;
-   if (check_excludes(ctrs, cflags, n, 1))
+   cpuhw = this_cpu_ptr(&cpu_hw_events);
+   if (check_excludes(ctrs, cflags, n, 1, cpuhw->bhrb_users))
return -EINVAL;
 
-   cpuhw = this_cpu_ptr(&cpu_hw_events);
err = power_check_constraints(cpuhw, events, cflags, n + 1);
 
if (has_branch_stack(event)) {
-- 
2.1.0

___
Linuxppc-dev mailing list
Linuxppc-dev@lists.ozlabs.org
https://lists.ozlabs.org/listinfo/linuxppc-dev

[PATCH V10 3/8] powerpc/perf: Change the name of HW PMU branch filter tracking variable

2015-06-30 Thread Anshuman Khandual
This patch simply changes the name of the variable from 'bhrb_filter' to
'bhrb_hw_filter' in order to add one more variable which will track SW
filters in generic powerpc book3s code which will be implemented in the
subsequent patch. This patch does not change any functionality.

Signed-off-by: Anshuman Khandual 
---
 arch/powerpc/perf/core-book3s.c | 10 +-
 1 file changed, 5 insertions(+), 5 deletions(-)

diff --git a/arch/powerpc/perf/core-book3s.c b/arch/powerpc/perf/core-book3s.c
index 6935660..ef0ff05 100644
--- a/arch/powerpc/perf/core-book3s.c
+++ b/arch/powerpc/perf/core-book3s.c
@@ -52,7 +52,7 @@ struct cpu_hw_events {
int n_txn_start;
 
/* BHRB bits */
-   u64 bhrb_filter;/* BHRB HW branch 
filter */
+   u64 bhrb_hw_filter; /* BHRB HW filter */
unsigned intbhrb_users;
void*bhrb_context;
struct  perf_branch_stack   bhrb_stack;
@@ -1357,7 +1357,7 @@ static void power_pmu_enable(struct pmu *pmu)
 
mb();
if (cpuhw->bhrb_users)
-   ppmu->config_bhrb(cpuhw->bhrb_filter);
+   ppmu->config_bhrb(cpuhw->bhrb_hw_filter);
 
write_mmcr0(cpuhw, mmcr0);
 
@@ -1465,7 +1465,7 @@ nocheck:
  out:
if (has_branch_stack(event)) {
power_pmu_bhrb_enable(event);
-   cpuhw->bhrb_filter = ppmu->bhrb_filter_map(
+   cpuhw->bhrb_hw_filter = ppmu->bhrb_filter_map(
event->attr.branch_sample_type);
}
 
@@ -1848,10 +1848,10 @@ static int power_pmu_event_init(struct perf_event 
*event)
err = power_check_constraints(cpuhw, events, cflags, n + 1);
 
if (has_branch_stack(event)) {
-   cpuhw->bhrb_filter = ppmu->bhrb_filter_map(
+   cpuhw->bhrb_hw_filter = ppmu->bhrb_filter_map(
event->attr.branch_sample_type);
 
-   if (cpuhw->bhrb_filter == -1)
+   if (cpuhw->bhrb_hw_filter == -1)
return -EOPNOTSUPP;
}
 
-- 
2.1.0

___
Linuxppc-dev mailing list
Linuxppc-dev@lists.ozlabs.org
https://lists.ozlabs.org/listinfo/linuxppc-dev

[PATCH V10 6/8] powerpc/perf: Change POWER8 PMU configuration to work with SW filters

2015-06-30 Thread Anshuman Khandual
The kernel now supports SW based branch filters for book3s systems with
some specific requirements while dealing with HW supported branch filters
in order to achieve overall OR semantics prevailing in perf branch stack
sampling framework. This patch adapts the BHRB branch filter configuration
to meet those protocols. POWER8 PMU can only handle one HW based branch
filter request at any point of time. For all other combinations PMU will
pass it on to the SW.

Signed-off-by: Anshuman Khandual 
---
 arch/powerpc/perf/power8-pmu.c | 51 --
 1 file changed, 44 insertions(+), 7 deletions(-)

diff --git a/arch/powerpc/perf/power8-pmu.c b/arch/powerpc/perf/power8-pmu.c
index 5e17cb5..8fccf6c 100644
--- a/arch/powerpc/perf/power8-pmu.c
+++ b/arch/powerpc/perf/power8-pmu.c
@@ -656,6 +656,16 @@ static int power8_generic_events[] = {
 
 static u64 power8_bhrb_filter_map(u64 branch_sample_type, u64 *bhrb_filter)
 {
+   u64 x, pmu_bhrb_filter;
+
+   pmu_bhrb_filter = 0;
+   *bhrb_filter = 0;
+
+   if (branch_sample_type & PERF_SAMPLE_BRANCH_ANY) {
+   *bhrb_filter = PERF_SAMPLE_BRANCH_ANY;
+   return pmu_bhrb_filter;
+   }
+
/* BHRB and regular PMU events share the same privilege state
 * filter configuration. BHRB is always recorded along with a
 * regular PMU event. As the privilege state filter is handled
@@ -666,15 +676,42 @@ static u64 power8_bhrb_filter_map(u64 branch_sample_type, 
u64 *bhrb_filter)
/* Ignore user, kernel, hv bits */
branch_sample_type &= ~PERF_SAMPLE_BRANCH_PLM_ALL;
 
-   /* No branch filter requested */
-   if (branch_sample_type == PERF_SAMPLE_BRANCH_ANY)
-   return 0;
+   /*
+* POWER8 does not support ORing of PMU HW branch filters. Hence
+* if multiple branch filters are requested which may include filters
+* supported in PMU, still go ahead and clear the PMU based HW branch
+* filter component as in this case all the filters will be processed
+* in SW.
+*/
 
-   if (branch_sample_type == PERF_SAMPLE_BRANCH_ANY_CALL)
-   return POWER8_MMCRA_IFM1;
+   for_each_branch_sample_type(x) {
+   /* Ignore privilege branch filters */
+   if ((x == PERF_SAMPLE_BRANCH_USER)
+   || (x == PERF_SAMPLE_BRANCH_KERNEL)
+   || (x == PERF_SAMPLE_BRANCH_HV))
+   continue;
+
+   if (!(branch_sample_type & x))
+   continue;
+
+   /* Supported individual PMU branch filters */
+   if (branch_sample_type & PERF_SAMPLE_BRANCH_ANY_CALL) {
+   branch_sample_type &= ~PERF_SAMPLE_BRANCH_ANY_CALL;
+   if (branch_sample_type) {
+   /* Multiple filters will be processed in SW */
+   pmu_bhrb_filter = 0;
+   *bhrb_filter = 0;
+   return pmu_bhrb_filter;
+   } else {
+   /* Individual filter will be processed in HW */
+   pmu_bhrb_filter |= POWER8_MMCRA_IFM1;
+   *bhrb_filter|= PERF_SAMPLE_BRANCH_ANY_CALL;
+   return pmu_bhrb_filter;
+   }
+   }
+   }
 
-   /* Every thing else is unsupported */
-   return -1;
+   return pmu_bhrb_filter;
 }
 
 static void power8_config_bhrb(u64 pmu_bhrb_filter)
-- 
2.1.0

___
Linuxppc-dev mailing list
Linuxppc-dev@lists.ozlabs.org
https://lists.ozlabs.org/listinfo/linuxppc-dev

[PATCH 1/2] powerpc/signal: Fix confusing header documentation in sigcontext.h

2015-07-06 Thread Anshuman Khandual
As 'vmx_reserve' array element had been expanded to contain 101 double
words, the comment block above that needs to be updated. Also changed
the array size declaration to reflect the logic mentioned in the comment
block above. This change helps in explaining how the HW registers are
represented in the array.

Signed-off-by: Anshuman Khandual 
---
 arch/powerpc/include/uapi/asm/sigcontext.h | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/arch/powerpc/include/uapi/asm/sigcontext.h 
b/arch/powerpc/include/uapi/asm/sigcontext.h
index 9c1f24f..b0c5c46 100644
--- a/arch/powerpc/include/uapi/asm/sigcontext.h
+++ b/arch/powerpc/include/uapi/asm/sigcontext.h
@@ -28,7 +28,7 @@ struct sigcontext {
 /*
  * To maintain compatibility with current implementations the sigcontext is
  * extended by appending a pointer (v_regs) to a quadword type (elf_vrreg_t)
- * followed by an unstructured (vmx_reserve) field of 69 doublewords.  This
+ * followed by an unstructured (vmx_reserve) field of 101 doublewords. This
  * allows the array of vector registers to be quadword aligned independent of
  * the alignment of the containing sigcontext or ucontext. It is the
  * responsibility of the code setting the sigcontext to set this pointer to
@@ -80,7 +80,7 @@ struct sigcontext {
  * registers and vscr/vrsave.
  */
elf_vrreg_t __user *v_regs;
-   longvmx_reserve[ELF_NVRREG+ELF_NVRREG+32+1];
+   longvmx_reserve[ELF_NVRREG+ELF_NVRREG+1+32];
 #endif
 };
 
-- 
2.1.0

___
Linuxppc-dev mailing list
Linuxppc-dev@lists.ozlabs.org
https://lists.ozlabs.org/listinfo/linuxppc-dev

[PATCH 2/2] powerpc/signal: Add helper function to fetch quad word aligned pointer

2015-07-06 Thread Anshuman Khandual
This patch adds one helper function 'vmx_reserve_addr' which computes
quad word aligned pointer for vmx_reserve array element in sigcontext
structure making the code more readable.

Signed-off-by: Anshuman Khandual 
---
 arch/powerpc/kernel/signal_64.c | 14 +-
 1 file changed, 9 insertions(+), 5 deletions(-)

diff --git a/arch/powerpc/kernel/signal_64.c b/arch/powerpc/kernel/signal_64.c
index c7c24d2..e8762f5 100644
--- a/arch/powerpc/kernel/signal_64.c
+++ b/arch/powerpc/kernel/signal_64.c
@@ -73,6 +73,12 @@ static const char fmt32[] = KERN_INFO \
 static const char fmt64[] = KERN_INFO \
"%s[%d]: bad frame in %s: %016lx nip %016lx lr %016lx\n";
 
+static elf_vrreg_t __user *vmx_reserve_addr(struct sigcontext __user *sc)
+{
+   return (elf_vrreg_t __user *)
+   (((unsigned long)sc->vmx_reserve + 15) & ~0xful);
+}
+
 /*
  * Set up the sigcontext for the signal frame.
  */
@@ -90,7 +96,7 @@ static long setup_sigcontext(struct sigcontext __user *sc, 
struct pt_regs *regs,
 * v_regs pointer or not
 */
 #ifdef CONFIG_ALTIVEC
-   elf_vrreg_t __user *v_regs = (elf_vrreg_t __user *)(((unsigned 
long)sc->vmx_reserve + 15) & ~0xful);
+   elf_vrreg_t __user *v_regs = vmx_reserve_addr(sc);
 #endif
unsigned long msr = regs->msr;
long err = 0;
@@ -181,10 +187,8 @@ static long setup_tm_sigcontexts(struct sigcontext __user 
*sc,
 * v_regs pointer or not.
 */
 #ifdef CONFIG_ALTIVEC
-   elf_vrreg_t __user *v_regs = (elf_vrreg_t __user *)
-   (((unsigned long)sc->vmx_reserve + 15) & ~0xful);
-   elf_vrreg_t __user *tm_v_regs = (elf_vrreg_t __user *)
-   (((unsigned long)tm_sc->vmx_reserve + 15) & ~0xful);
+   elf_vrreg_t __user *v_regs = vmx_reserve_addr(sc);
+   elf_vrreg_t __user *tm_v_regs = vmx_reserve_addr(tm_sc);
 #endif
unsigned long msr = regs->msr;
long err = 0;
-- 
2.1.0

___
Linuxppc-dev mailing list
Linuxppc-dev@lists.ozlabs.org
https://lists.ozlabs.org/listinfo/linuxppc-dev

[PATCH] powerpc/tm: Drop tm_orig_msr from thread_struct

2015-07-06 Thread Anshuman Khandual
Currently tm_orig_msr is getting used during process context switch only.
Then there is ckpt_regs which saves the checkpointed userspace context
The MSR slot contained in ckpt_regs structure can be used during process
context switch instead of tm_orig_msr, thus allowing us to drop it from
thread_struct structure. This patch does that change.

Acked-by: Michael Neuling 
Signed-off-by: Anshuman Khandual 
---
RFC: https://patchwork.ozlabs.org/patch/462736/

 arch/powerpc/include/asm/processor.h |  1 -
 arch/powerpc/kernel/process.c| 14 +++---
 2 files changed, 7 insertions(+), 8 deletions(-)

diff --git a/arch/powerpc/include/asm/processor.h 
b/arch/powerpc/include/asm/processor.h
index 28ded5d..5afea36 100644
--- a/arch/powerpc/include/asm/processor.h
+++ b/arch/powerpc/include/asm/processor.h
@@ -264,7 +264,6 @@ struct thread_struct {
u64 tm_tfhar;   /* Transaction fail handler addr */
u64 tm_texasr;  /* Transaction exception & summary */
u64 tm_tfiar;   /* Transaction fail instr address reg */
-   unsigned long   tm_orig_msr;/* Thread's MSR on ctx switch */
struct pt_regs  ckpt_regs;  /* Checkpointed registers */
 
unsigned long   tm_tar;
diff --git a/arch/powerpc/kernel/process.c b/arch/powerpc/kernel/process.c
index 8005e18..99adcba 100644
--- a/arch/powerpc/kernel/process.c
+++ b/arch/powerpc/kernel/process.c
@@ -86,7 +86,7 @@ void giveup_fpu_maybe_transactional(struct task_struct *tsk)
if (tsk == current && tsk->thread.regs &&
MSR_TM_ACTIVE(tsk->thread.regs->msr) &&
!test_thread_flag(TIF_RESTORE_TM)) {
-   tsk->thread.tm_orig_msr = tsk->thread.regs->msr;
+   tsk->thread.ckpt_regs.msr = tsk->thread.regs->msr;
set_thread_flag(TIF_RESTORE_TM);
}
 
@@ -104,7 +104,7 @@ void giveup_altivec_maybe_transactional(struct task_struct 
*tsk)
if (tsk == current && tsk->thread.regs &&
MSR_TM_ACTIVE(tsk->thread.regs->msr) &&
!test_thread_flag(TIF_RESTORE_TM)) {
-   tsk->thread.tm_orig_msr = tsk->thread.regs->msr;
+   tsk->thread.ckpt_regs.msr = tsk->thread.regs->msr;
set_thread_flag(TIF_RESTORE_TM);
}
 
@@ -543,7 +543,7 @@ static void tm_reclaim_thread(struct thread_struct *thr,
 * the thread will no longer be transactional.
 */
if (test_ti_thread_flag(ti, TIF_RESTORE_TM)) {
-   msr_diff = thr->tm_orig_msr & ~thr->regs->msr;
+   msr_diff = thr->ckpt_regs.msr & ~thr->regs->msr;
if (msr_diff & MSR_FP)
memcpy(&thr->transact_fp, &thr->fp_state,
   sizeof(struct thread_fp_state));
@@ -594,10 +594,10 @@ static inline void tm_reclaim_task(struct task_struct 
*tsk)
/* Stash the original thread MSR, as giveup_fpu et al will
 * modify it.  We hold onto it to see whether the task used
 * FP & vector regs.  If the TIF_RESTORE_TM flag is set,
-* tm_orig_msr is already set.
+* ckpt_regs.msr is already set.
 */
if (!test_ti_thread_flag(task_thread_info(tsk), TIF_RESTORE_TM))
-   thr->tm_orig_msr = thr->regs->msr;
+   thr->ckpt_regs.msr = thr->regs->msr;
 
TM_DEBUG("--- tm_reclaim on pid %d (NIP=%lx, "
 "ccr=%lx, msr=%lx, trap=%lx)\n",
@@ -666,7 +666,7 @@ static inline void tm_recheckpoint_new_task(struct 
task_struct *new)
tm_restore_sprs(&new->thread);
return;
}
-   msr = new->thread.tm_orig_msr;
+   msr = new->thread.ckpt_regs.msr;
/* Recheckpoint to restore original checkpointed register state. */
TM_DEBUG("*** tm_recheckpoint of pid %d "
 "(new->msr 0x%lx, new->origmsr 0x%lx)\n",
@@ -726,7 +726,7 @@ void restore_tm_state(struct pt_regs *regs)
if (!MSR_TM_ACTIVE(regs->msr))
return;
 
-   msr_diff = current->thread.tm_orig_msr & ~regs->msr;
+   msr_diff = current->thread.ckpt_regs.msr & ~regs->msr;
msr_diff &= MSR_FP | MSR_VEC | MSR_VSX;
if (msr_diff & MSR_FP) {
fp_enable();
-- 
2.1.0

___
Linuxppc-dev mailing list
Linuxppc-dev@lists.ozlabs.org
https://lists.ozlabs.org/listinfo/linuxppc-dev

[PATCH] powerpc/irq: Enable facility unavailable exceptions in /proc/interrupts

2015-07-08 Thread Anshuman Khandual
This patch enables facility unavailable exceptions for generic facility,
FPU, ALTIVEC and VSX in /proc/interrupts listing by incrementing their
respective newly added IRQ statistical counters as and when these IRQs
happen. This also adds multiple helper functions which will be called
from within the interrupt handler context to update their statistics.

With this patch being applied, /proc/interrupts looks something
like this after running various workloads which create these exceptions.


   CPU0   CPU1
 16:   4262   6166  XICS   2 Level IPI
 17:  0  0  XICS 4101 Level virtio0
 18:  0  0  XICS 4100 Level ohci_hcd:usb1
 20:  0  0  XICS 4096 Level RAS_EPOW
 21:   5730   1744  XICS 4102 Level ibmvscsi
 22:147  0  XICS 4103 Level hvc_console
 24:  0  0  XICS 4104 Level virtio1-config
 25: 19167  XICS 4105 Level virtio1-input.0
 26:  1  0  XICS 4106 Level virtio1-output.0
LOC:   5278   7996   Local timer interrupts for timer event device
LOC: 49 24   Local timer interrupts for others
SPU:  0  0   Spurious interrupts
PMI:  0  0   Performance monitoring interrupts
MCE:  0  0   Machine check exceptions
DBL:  0  0   Doorbell interrupts
FAC:  0  0   Facility unavailable excpetions
FPU:  12172   2549   FPU unavailable excpetions
ALT:  22454   7226   ALTIVEC unavailable excpetions
VSX: 14 90   VSX unavailable excpetions
---

Signed-off-by: Anshuman Khandual 
---
 arch/powerpc/include/asm/hardirq.h   |  4 
 arch/powerpc/kernel/exceptions-64s.S |  2 ++
 arch/powerpc/kernel/irq.c| 23 +++
 arch/powerpc/kernel/traps.c  | 29 +
 4 files changed, 58 insertions(+)

diff --git a/arch/powerpc/include/asm/hardirq.h 
b/arch/powerpc/include/asm/hardirq.h
index 8add8b8..bd31390 100644
--- a/arch/powerpc/include/asm/hardirq.h
+++ b/arch/powerpc/include/asm/hardirq.h
@@ -15,6 +15,10 @@ typedef struct {
 #ifdef CONFIG_PPC_DOORBELL
unsigned int doorbell_irqs;
 #endif
+   unsigned int fac_unav_exceptions;
+   unsigned int fpu_unav_exceptions;
+   unsigned int altivec_unav_exceptions;
+   unsigned int vsx_unav_exceptions;
 } cacheline_aligned irq_cpustat_t;
 
 DECLARE_PER_CPU_SHARED_ALIGNED(irq_cpustat_t, irq_stat);
diff --git a/arch/powerpc/kernel/exceptions-64s.S 
b/arch/powerpc/kernel/exceptions-64s.S
index 0a0399c2..a86180c 100644
--- a/arch/powerpc/kernel/exceptions-64s.S
+++ b/arch/powerpc/kernel/exceptions-64s.S
@@ -1158,6 +1158,7 @@ BEGIN_FTR_SECTION
 END_FTR_SECTION_IFSET(CPU_FTR_TM)
 #endif
bl  load_up_fpu
+   bl  fpu_unav_exceptions_count
b   fast_exception_return
 #ifdef CONFIG_PPC_TRANSACTIONAL_MEM
 2: /* User process was in a transaction */
@@ -1184,6 +1185,7 @@ BEGIN_FTR_SECTION
   END_FTR_SECTION_NESTED(CPU_FTR_TM, CPU_FTR_TM, 69)
 #endif
bl  load_up_altivec
+   bl  altivec_unav_exceptions_count
b   fast_exception_return
 #ifdef CONFIG_PPC_TRANSACTIONAL_MEM
 2: /* User process was in a transaction */
diff --git a/arch/powerpc/kernel/irq.c b/arch/powerpc/kernel/irq.c
index 4509603..fa6559c 100644
--- a/arch/powerpc/kernel/irq.c
+++ b/arch/powerpc/kernel/irq.c
@@ -397,6 +397,25 @@ int arch_show_interrupts(struct seq_file *p, int prec)
seq_printf(p, "  Doorbell interrupts\n");
}
 #endif
+   seq_printf(p, "%*s: ", prec, "FAC");
+   for_each_online_cpu(j)
+   seq_printf(p, "%10u ", per_cpu(irq_stat, 
j).fac_unav_exceptions);
+   seq_printf(p, "  Facility unavailable excpetions\n");
+
+   seq_printf(p, "%*s: ", prec, "FPU");
+   for_each_online_cpu(j)
+   seq_printf(p, "%10u ", per_cpu(irq_stat, 
j).fpu_unav_exceptions);
+   seq_printf(p, "  FPU unavailable excpetions\n");
+
+   seq_printf(p, "%*s: ", prec, "ALT");
+   for_each_online_cpu(j)
+   seq_printf(p, "%10u ", per_cpu(irq_stat, 
j).altivec_unav_exceptions);
+   seq_printf(p, "  ALTIVEC unavailable excpetions\n");
+
+   seq_printf(p, "%*s: ", prec, "VSX");
+   for_each_online_cpu(j)
+   seq_printf(p, "%10u ", per_cpu(irq_stat, 
j).vsx_unav_exceptions);
+   seq_printf(p, "  VSX unavailable excpetions\n");
 
return 0;
 }
@@ -416,6 +435,10 @@ u64 arch_irq_stat_cpu(unsigned int cpu)
 #ifdef CONFIG_PPC_DOORBELL
   

[PATCH V2] powerpc/irq: Enable some more exceptions in /proc/interrupts interface

2015-07-09 Thread Anshuman Khandual
This patch enables facility unavailable exceptions for generic facility,
FPU, ALTIVEC and VSX in /proc/interrupts listing by incrementing their
newly added IRQ statistical counters as and when these exceptions happen.
This also adds couple of helper functions which will be called from within
the interrupt handler context to update their statistics. Similarly this
patch also enables alignment and program check exceptions as well.

With this patch being applied, /proc/interrupts looks something
like this after running various workloads which create these exceptions.

--
   CPU0   CPU1
 16:  28477  35288  XICS   2 Level IPI
 17:  0  0  XICS 4101 Level virtio0
 18:  0  0  XICS 4100 Level ohci_hcd:usb1
 19: 288146  0  XICS 4099 Level virtio1
 20:  0  0  XICS 4096 Level RAS_EPOW
 21:   6241  17364  XICS 4102 Level ibmvscsi
 22:133  0  XICS 4103 Level hvc_console
LOC:  12617  24509   Local timer interrupts for timer event device
LOC: 98 73   Local timer interrupts for others
SPU:  0  0   Spurious interrupts
PMI:  0  0   Performance monitoring interrupts
MCE:  0  0   Machine check exceptions
DBL:  0  0   Doorbell interrupts
ALN:  0  0   Alignment exceptions
PRG:  0  0   Program check exceptions
FAC:  0  0   Facility unavailable exceptions
FPU:  12736   2458   FPU unavailable exceptions
ALT: 108313  24507   ALTIVEC unavailable exceptions
VSX: 4085904943568   VSX unavailable exceptions
--

Signed-off-by: Anshuman Khandual 
---
Changes in V2:
- Fixed some typos in the final /proc/interrupts output
- Added support for alignment and program check exceptions

 arch/powerpc/include/asm/hardirq.h   |  6 ++
 arch/powerpc/kernel/exceptions-64s.S |  2 ++
 arch/powerpc/kernel/irq.c| 35 +++
 arch/powerpc/kernel/traps.c  | 28 
 4 files changed, 71 insertions(+)

diff --git a/arch/powerpc/include/asm/hardirq.h 
b/arch/powerpc/include/asm/hardirq.h
index 8add8b8..ba51d3e 100644
--- a/arch/powerpc/include/asm/hardirq.h
+++ b/arch/powerpc/include/asm/hardirq.h
@@ -15,6 +15,12 @@ typedef struct {
 #ifdef CONFIG_PPC_DOORBELL
unsigned int doorbell_irqs;
 #endif
+   unsigned int alignment_exceptions;
+   unsigned int program_exceptions;
+   unsigned int fac_unav_exceptions;
+   unsigned int fpu_unav_exceptions;
+   unsigned int altivec_unav_exceptions;
+   unsigned int vsx_unav_exceptions;
 } cacheline_aligned irq_cpustat_t;
 
 DECLARE_PER_CPU_SHARED_ALIGNED(irq_cpustat_t, irq_stat);
diff --git a/arch/powerpc/kernel/exceptions-64s.S 
b/arch/powerpc/kernel/exceptions-64s.S
index 0a0399c2..a86180c 100644
--- a/arch/powerpc/kernel/exceptions-64s.S
+++ b/arch/powerpc/kernel/exceptions-64s.S
@@ -1158,6 +1158,7 @@ BEGIN_FTR_SECTION
 END_FTR_SECTION_IFSET(CPU_FTR_TM)
 #endif
bl  load_up_fpu
+   bl  fpu_unav_exceptions_count
b   fast_exception_return
 #ifdef CONFIG_PPC_TRANSACTIONAL_MEM
 2: /* User process was in a transaction */
@@ -1184,6 +1185,7 @@ BEGIN_FTR_SECTION
   END_FTR_SECTION_NESTED(CPU_FTR_TM, CPU_FTR_TM, 69)
 #endif
bl  load_up_altivec
+   bl  altivec_unav_exceptions_count
b   fast_exception_return
 #ifdef CONFIG_PPC_TRANSACTIONAL_MEM
 2: /* User process was in a transaction */
diff --git a/arch/powerpc/kernel/irq.c b/arch/powerpc/kernel/irq.c
index 4509603..8b4d928 100644
--- a/arch/powerpc/kernel/irq.c
+++ b/arch/powerpc/kernel/irq.c
@@ -397,6 +397,35 @@ int arch_show_interrupts(struct seq_file *p, int prec)
seq_printf(p, "  Doorbell interrupts\n");
}
 #endif
+   seq_printf(p, "%*s: ", prec, "ALN");
+   for_each_online_cpu(j)
+   seq_printf(p, "%10u ", per_cpu(irq_stat, 
j).alignment_exceptions);
+   seq_printf(p, "  Alignment exceptions\n");
+
+   seq_printf(p, "%*s: ", prec, "PRG");
+   for_each_online_cpu(j)
+   seq_printf(p, "%10u ", per_cpu(irq_stat, j).program_exceptions);
+   seq_printf(p, "  Program check exceptions\n");
+
+   seq_printf(p, "%*s: ", prec, "FAC");
+   for_each_online_cpu(j)
+   seq_printf(p, "%10u ", per_cpu(irq_stat, 
j).fac_unav_exceptions);
+   seq_printf(p, "  Facility unavailable exceptions\n");
+
+   seq_printf(p, "%*s: ", prec, "FPU");
+   for_each_online_cpu(j)
+   seq_printf(p, &qu

Re: [PATCH V2] powerpc/irq: Enable some more exceptions in /proc/interrupts interface

2015-07-12 Thread Anshuman Khandual
On 07/10/2015 12:40 PM, Michael Neuling wrote:
> What's the performance impact of this?  If you run this test with --fp,
> --altivec or --vector what is the impact of adding this patch?
> 
> http://ozlabs.org/~anton/junkcode/context_switch2.c
> 
> eg 
>   ./context_switch2 --fp 0 0 

Please find the results here which looks similar with or without
the patch being applied.

(A) Floating point context switches (context_switch2 --fp 0 0)

Without the patch   With the patch
=   ==
320216  323460
324596  318448
321206  316540
321308  316650
318904  316478


(B) AltiVec context switches (context_switch2 --altivec 0 0)

Without the patch   With the patch
=   ==
352012  342028
345894  345156
354604  345534
354020  354714
353936  364814

(C) Vector context switches (context_switch2 --vector 0 0)

Without the patch   With the patch
=   ==
354496  344296
361386  346822
361856  354932
344906  348722
343288  355014

___
Linuxppc-dev mailing list
Linuxppc-dev@lists.ozlabs.org
https://lists.ozlabs.org/listinfo/linuxppc-dev

Re: [PATCH V2] powerpc/irq: Enable some more exceptions in /proc/interrupts interface

2015-07-12 Thread Anshuman Khandual
On 07/13/2015 11:11 AM, Michael Ellerman wrote:
> On Mon, 2015-07-13 at 10:54 +0530, Anshuman Khandual wrote:
>> On 07/10/2015 12:40 PM, Michael Neuling wrote:
>>> What's the performance impact of this?  If you run this test with --fp,
>>> --altivec or --vector what is the impact of adding this patch?
>>>
>>> http://ozlabs.org/~anton/junkcode/context_switch2.c
>>>
>>> eg 
>>>   ./context_switch2 --fp 0 0 
>>
>> Please find the results here which looks similar with or without
>> the patch being applied.
> 
> No they don't look similar with or without.
> 
>> (A) Floating point context switches (context_switch2 --fp 0 0)
> 
> If you just sort them you see:
> 
> 316478after
> 316540after
> 316650after
> 318448after
> 318904 before
> 320216 before
> 321206 before
> 321308 before
> 323460after
> 324596 before
> 
> 
> It looks like ~1% degradation. Please run the test more times (maybe 1000) and
> see how the numbers look.

Average of 1000 iterations looks better.

Withthe patch : 322599.57  (Average of 1000 results)
Without the patch : 320464.924 (Average of 1000 results)

___
Linuxppc-dev mailing list
Linuxppc-dev@lists.ozlabs.org
https://lists.ozlabs.org/listinfo/linuxppc-dev

[PATCH V3] powerpc/irq: Enable some more exceptions in /proc/interrupts interface

2015-07-13 Thread Anshuman Khandual
This patch enables facility unavailable exceptions for generic facility,
FPU, ALTIVEC and VSX in /proc/interrupts listing by incrementing their
newly added IRQ statistical counters as and when these exceptions happen.
This also adds couple of helper functions which will be called from within
the interrupt handler context to update their statistics. Similarly this
patch also enables alignment and program check exceptions as well.

With this patch being applied, /proc/interrupts looks something
like this after running various workloads which create these exceptions.

--
   CPU0   CPU1
 16:   5734  24129  XICS   2 Level IPI
 17:  0  0  XICS 4101 Level virtio0
 18:  0  0  XICS 4100 Level ohci_hcd:usb1
 19:  13920  0  XICS 4099 Level virtio1
 20:  0  0  XICS 4096 Level RAS_EPOW
 21:   6160   3241  XICS 4102 Level ibmvscsi
 22:  1  0  XICS 4103 Level hvc_console
LOC:   6825   3556   Local timer interrupts for timer event device
LOC: 22 41   Local timer interrupts for others
SPU:  1  0   Spurious interrupts
PMI:  0  0   Performance monitoring interrupts
MCE:  0  0   Machine check exceptions
DBL:  0  0   Doorbell interrupts
ALN:  0  0   Alignment exceptions
PRG:  0  0   Program check exceptions
FAC:  7 14   Facility unavailable exceptions
FPU:   2928   3162   FPU unavailable exceptions
ALT:  12950  15536   AltiVec unavailable exceptions
VSX:  12930 220183   VSX unavailable exceptions
--

Signed-off-by: Anshuman Khandual 
---
Changes in V3:
- Changed the display string from "ALTIVEC" to "AltiVec"
- Now captured "Facility unavailable exceptions" in the example

Changes in V2:
- Fixed some typos in the final /proc/interrupts output
- Added support for alignment and program check exceptions

 arch/powerpc/include/asm/hardirq.h   |  6 ++
 arch/powerpc/kernel/exceptions-64s.S |  2 ++
 arch/powerpc/kernel/irq.c| 35 +++
 arch/powerpc/kernel/traps.c  | 28 
 4 files changed, 71 insertions(+)

diff --git a/arch/powerpc/include/asm/hardirq.h 
b/arch/powerpc/include/asm/hardirq.h
index 8add8b8..ba51d3e 100644
--- a/arch/powerpc/include/asm/hardirq.h
+++ b/arch/powerpc/include/asm/hardirq.h
@@ -15,6 +15,12 @@ typedef struct {
 #ifdef CONFIG_PPC_DOORBELL
unsigned int doorbell_irqs;
 #endif
+   unsigned int alignment_exceptions;
+   unsigned int program_exceptions;
+   unsigned int fac_unav_exceptions;
+   unsigned int fpu_unav_exceptions;
+   unsigned int altivec_unav_exceptions;
+   unsigned int vsx_unav_exceptions;
 } cacheline_aligned irq_cpustat_t;
 
 DECLARE_PER_CPU_SHARED_ALIGNED(irq_cpustat_t, irq_stat);
diff --git a/arch/powerpc/kernel/exceptions-64s.S 
b/arch/powerpc/kernel/exceptions-64s.S
index 0a0399c2..a86180c 100644
--- a/arch/powerpc/kernel/exceptions-64s.S
+++ b/arch/powerpc/kernel/exceptions-64s.S
@@ -1158,6 +1158,7 @@ BEGIN_FTR_SECTION
 END_FTR_SECTION_IFSET(CPU_FTR_TM)
 #endif
bl  load_up_fpu
+   bl  fpu_unav_exceptions_count
b   fast_exception_return
 #ifdef CONFIG_PPC_TRANSACTIONAL_MEM
 2: /* User process was in a transaction */
@@ -1184,6 +1185,7 @@ BEGIN_FTR_SECTION
   END_FTR_SECTION_NESTED(CPU_FTR_TM, CPU_FTR_TM, 69)
 #endif
bl  load_up_altivec
+   bl  altivec_unav_exceptions_count
b   fast_exception_return
 #ifdef CONFIG_PPC_TRANSACTIONAL_MEM
 2: /* User process was in a transaction */
diff --git a/arch/powerpc/kernel/irq.c b/arch/powerpc/kernel/irq.c
index 4509603..60773b3 100644
--- a/arch/powerpc/kernel/irq.c
+++ b/arch/powerpc/kernel/irq.c
@@ -397,6 +397,35 @@ int arch_show_interrupts(struct seq_file *p, int prec)
seq_printf(p, "  Doorbell interrupts\n");
}
 #endif
+   seq_printf(p, "%*s: ", prec, "ALN");
+   for_each_online_cpu(j)
+   seq_printf(p, "%10u ", per_cpu(irq_stat, 
j).alignment_exceptions);
+   seq_printf(p, "  Alignment exceptions\n");
+
+   seq_printf(p, "%*s: ", prec, "PRG");
+   for_each_online_cpu(j)
+   seq_printf(p, "%10u ", per_cpu(irq_stat, j).program_exceptions);
+   seq_printf(p, "  Program check exceptions\n");
+
+   seq_printf(p, "%*s: ", prec, "FAC");
+   for_each_online_cpu(j)
+   seq_printf(p, "%10u ", per_cpu(irq_stat, 
j).fac_unav_exceptions);
+   seq_printf(p, "  Facility unavailabl

[PATCH V2] powerpc/signal: Add helper function to fetch quad word aligned pointer

2015-07-19 Thread Anshuman Khandual
This patch adds one helper function 'sigcontext_vmx_regs' which computes
quad word aligned pointer for 'vmx_reserve' array element in sigcontext
structure making the code more readable.

Signed-off-by: Anshuman Khandual 
---
- Fixed some indentation problems
- Changed function name from 'vmx_reserve_addr' to 'sigcontext_vmx_regs'
- Added some documentation before the function

 arch/powerpc/kernel/signal_64.c | 18 +-
 1 file changed, 13 insertions(+), 5 deletions(-)

diff --git a/arch/powerpc/kernel/signal_64.c b/arch/powerpc/kernel/signal_64.c
index c7c24d2..bb9c939 100644
--- a/arch/powerpc/kernel/signal_64.c
+++ b/arch/powerpc/kernel/signal_64.c
@@ -74,6 +74,16 @@ static const char fmt64[] = KERN_INFO \
"%s[%d]: bad frame in %s: %016lx nip %016lx lr %016lx\n";
 
 /*
+ * This computes quad word aligned pointer for 'vmx_reserve' array element
+ * which is used primarily in assigning to the preceding pointer 'v_regs'
+ * in sigcontext structure.
+ */
+static elf_vrreg_t __user *sigcontext_vmx_regs(struct sigcontext __user *sc)
+{
+   return (elf_vrreg_t __user *) (((unsigned long)sc->vmx_reserve + 15) & 
~0xful);
+}
+
+/*
  * Set up the sigcontext for the signal frame.
  */
 
@@ -90,7 +100,7 @@ static long setup_sigcontext(struct sigcontext __user *sc, 
struct pt_regs *regs,
 * v_regs pointer or not
 */
 #ifdef CONFIG_ALTIVEC
-   elf_vrreg_t __user *v_regs = (elf_vrreg_t __user *)(((unsigned 
long)sc->vmx_reserve + 15) & ~0xful);
+   elf_vrreg_t __user *v_regs = sigcontext_vmx_regs(sc);
 #endif
unsigned long msr = regs->msr;
long err = 0;
@@ -181,10 +191,8 @@ static long setup_tm_sigcontexts(struct sigcontext __user 
*sc,
 * v_regs pointer or not.
 */
 #ifdef CONFIG_ALTIVEC
-   elf_vrreg_t __user *v_regs = (elf_vrreg_t __user *)
-   (((unsigned long)sc->vmx_reserve + 15) & ~0xful);
-   elf_vrreg_t __user *tm_v_regs = (elf_vrreg_t __user *)
-   (((unsigned long)tm_sc->vmx_reserve + 15) & ~0xful);
+   elf_vrreg_t __user *v_regs = sigcontext_vmx_regs(sc);
+   elf_vrreg_t __user *tm_v_regs = sigcontext_vmx_regs(tm_sc);
 #endif
unsigned long msr = regs->msr;
long err = 0;
-- 
2.1.0

___
Linuxppc-dev mailing list
Linuxppc-dev@lists.ozlabs.org
https://lists.ozlabs.org/listinfo/linuxppc-dev

Re: [V2] powerpc/signal: Add helper function to fetch quad word aligned pointer

2015-07-20 Thread Anshuman Khandual
On 07/21/2015 07:07 AM, Michael Ellerman wrote:
> On Mon, 2015-20-07 at 02:58:43 UTC, Anshuman Khandual wrote:
>> This patch adds one helper function 'sigcontext_vmx_regs' which computes
>> quad word aligned pointer for 'vmx_reserve' array element in sigcontext
>> structure making the code more readable.
>>
>> diff --git a/arch/powerpc/kernel/signal_64.c 
>> b/arch/powerpc/kernel/signal_64.c
>> index c7c24d2..bb9c939 100644
>> --- a/arch/powerpc/kernel/signal_64.c
>> +++ b/arch/powerpc/kernel/signal_64.c
>> @@ -74,6 +74,16 @@ static const char fmt64[] = KERN_INFO \
>>  "%s[%d]: bad frame in %s: %016lx nip %016lx lr %016lx\n";
>>  
>>  /*
>> + * This computes quad word aligned pointer for 'vmx_reserve' array element
>> + * which is used primarily in assigning to the preceding pointer 'v_regs'
>> + * in sigcontext structure.
> 
> How about:
> 
>   * This computes a quad word aligned pointer inside the vmx_reserve array
>   * element. For historical reasons sigcontext might not be quad word aligned,
>   * but the location we write the VMX regs to must be. See the comment in
>   * sigcontext for more detail.

Yeah, its way better. Thanks !

> 
> 
>> + */
>> +static elf_vrreg_t __user *sigcontext_vmx_regs(struct sigcontext __user *sc)
>> +{
>> +return (elf_vrreg_t __user *) (((unsigned long)sc->vmx_reserve + 15) & 
>> ~0xful);
>> +}
> 
> This doesn't build for ppc64e_defconfig:

This time, I did build test for some of the configs but not the entire
comprehensive list. Sorry, missed this one though, will take care next
time around.

> 
>   arch/powerpc/kernel/signal_64.c:82:28: error: 'sigcontext_vmx_regs' defined 
> but not used [-Werror=unused-function]
>static elf_vrreg_t __user *sigcontext_vmx_regs(struct sigcontext __user 
> *sc)
> ^
> 
> For now I'll just #ifdef it.
> 
> Please do a test build of ppc64e_defconfig in future.

___
Linuxppc-dev mailing list
Linuxppc-dev@lists.ozlabs.org
https://lists.ozlabs.org/listinfo/linuxppc-dev

[RFC 7/8] powerpc/xmon: Drop 'valid' from the condition inside 'dump_segments'

2015-07-20 Thread Anshuman Khandual
From: "khand...@linux.vnet.ibm.com" 

Value of 'valid' is zero when 'esid' is zero and it does not matter
when 'esid' is non-zero. Hence the variable 'value' can be dropped
from the conditional statement. This patch does that.

Signed-off-by: Anshuman Khandual 
---
 arch/powerpc/xmon/xmon.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/arch/powerpc/xmon/xmon.c b/arch/powerpc/xmon/xmon.c
index e599259..1798e21 100644
--- a/arch/powerpc/xmon/xmon.c
+++ b/arch/powerpc/xmon/xmon.c
@@ -2740,7 +2740,7 @@ void dump_segments(void)
asm volatile("slbmfee  %0,%1" : "=r" (esid) : "r" (i));
asm volatile("slbmfev  %0,%1" : "=r" (vsid) : "r" (i));
valid = (esid & SLB_ESID_V);
-   if (valid | esid | vsid) {
+   if (esid | vsid) {
printf("%02d %016lx %016lx", i, esid, vsid);
if (valid) {
llp = vsid & SLB_VSID_LLP;
-- 
2.1.0

___
Linuxppc-dev mailing list
Linuxppc-dev@lists.ozlabs.org
https://lists.ozlabs.org/listinfo/linuxppc-dev

[RFC 1/8] powerpc/slb: Remove a duplicate extern variable

2015-07-21 Thread Anshuman Khandual
From: "khand...@linux.vnet.ibm.com" 

This patch just removes one redundant entry for one extern variable
'slb_compare_rr_to_size' from the scope. This patch does not change
any functionality.

Signed-off-by: Anshuman Khandual 
---
 arch/powerpc/mm/slb.c | 1 -
 1 file changed, 1 deletion(-)

diff --git a/arch/powerpc/mm/slb.c b/arch/powerpc/mm/slb.c
index 6e450ca..62fafb3 100644
--- a/arch/powerpc/mm/slb.c
+++ b/arch/powerpc/mm/slb.c
@@ -253,7 +253,6 @@ static inline void patch_slb_encoding(unsigned int 
*insn_addr,
patch_instruction(insn_addr, insn);
 }
 
-extern u32 slb_compare_rr_to_size[];
 extern u32 slb_miss_kernel_load_linear[];
 extern u32 slb_miss_kernel_load_io[];
 extern u32 slb_compare_rr_to_size[];
-- 
2.1.0

___
Linuxppc-dev mailing list
Linuxppc-dev@lists.ozlabs.org
https://lists.ozlabs.org/listinfo/linuxppc-dev

[RFC 8/8] powerpc/xmon: Add some more elements to the existing PACA dump list

2015-07-21 Thread Anshuman Khandual
From: "khand...@linux.vnet.ibm.com" 

This patch adds some more elements to the existing PACA dump list
inside a xmon session which can be listed here.

- hmi_event_available
- dscr_default
- vmalloc_sllp
- slb_cache_ptr
- sprg_vdso
- tm_scratch
- core_idle_state_ptr
- thread_idle_state
- thread_mask

With this patch, a typical xmon PACA dump looks something like this.

 possible = yes
 present  = yes
 online   = yes
 lock_token   = 0x8000  (0x8)
 paca_index   = 0x0 (0xa)
 kernel_toc   = 0xc0e79300  (0x10)
 kernelbase   = 0xc000  (0x18)
 kernel_msr   = 0xb0001032  (0x20)
 emergency_sp = 0xc0003fff  (0x28)
 mc_emergency_sp  = 0xc0003ffec000  (0x2e0)
 in_mce   = 0x0 (0x2e8)
 hmi_event_available  = 0x0 (0x2ea)
 data_offset  = 0xfa9f  (0x30)
 hw_cpu_id= 0x0 (0x38)
 cpu_start= 0x1 (0x3a)
 kexec_state  = 0x0 (0x3b)
 dscr_default = 0x0 (0x58)
 vmalloc_sllp = 0x510   (0x1b8)
 slb_cache_ptr= 0x3 (0x1ba)
 slb_cache[0]:= 0x3f000
 slb_cache[1]:= 0x1
 slb_cache[2]:= 0x1000
 __current= 0xc0009ce96620  (0x290)
 kstack   = 0xc0009cf2be30  (0x298)
 stab_rr  = 0x8 (0x2a0)
 saved_r1 = 0xc0009cf2b360  (0x2a8)
 trap_save= 0x0 (0x2b8)
 soft_enabled = 0x0 (0x2ba)
 irq_happened = 0x1 (0x2bb)
 io_sync  = 0x0 (0x2bc)
 irq_work_pending = 0x0 (0x2bd)
 nap_state_lost   = 0x0 (0x2be)
 sprg_vdso= 0x0 (0x2c0)
 tm_scratch   = 0x80010280f032  (0x2c8)
 core_idle_state_ptr  = (null)  (0x2d0)
 thread_idle_state= 0x0 (0x2d8)
 thread_mask  = 0x0 (0x2d9)
 subcore_sibling_mask = 0x0 (0x2da)

Signed-off-by: Anshuman Khandual 
---
 arch/powerpc/xmon/xmon.c | 27 +++
 1 file changed, 23 insertions(+), 4 deletions(-)

diff --git a/arch/powerpc/xmon/xmon.c b/arch/powerpc/xmon/xmon.c
index 1798e21..bc42f6e 100644
--- a/arch/powerpc/xmon/xmon.c
+++ b/arch/powerpc/xmon/xmon.c
@@ -2073,6 +2073,7 @@ static void xmon_rawdump (unsigned long adrs, long ndump)
 static void dump_one_paca(int cpu)
 {
struct paca_struct *p;
+   int i;
 
if (setjmp(bus_error_jmp) != 0) {
printf("*** Error dumping paca for cpu 0x%x!\n", cpu);
@@ -2086,12 +2087,12 @@ static void dump_one_paca(int cpu)
 
printf("paca for cpu 0x%x @ %p:\n", cpu, p);
 
-   printf(" %-*s = %s\n", 16, "possible", cpu_possible(cpu) ? "yes" : 
"no");
-   printf(" %-*s = %s\n", 16, "present", cpu_present(cpu) ? "yes" : "no");
-   printf(" %-*s = %s\n", 16, "online", cpu_online(cpu) ? "yes" : "no");
+   printf(" %-*s = %s\n", 20, "possible", cpu_possible(cpu) ? "yes" : 
"no");
+   printf(" %-*s = %s\n", 20, "present", cpu_present(cpu) ? "yes" : "no");
+   printf(" %-*s = %s\n", 20, "online", cpu_online(cpu) ? "yes" : "no");
 
 #define DUMP(paca, name, format) \
-   printf(" %-*s = %#-*"format"\t(0x%lx)\n", 16, #name, 18, paca->name, \
+   printf(" %-*s = %#-*"format"\t(0x%lx)\n", 20, #name, 18, paca->name, \
offsetof(struct paca_struct, name));
 
DUMP(p, lock_token, "x");
@@ -2103,11 +2104,17 @@ static void dump_one_paca(int cpu)
 #ifdef CONFIG_PPC_BOOK3S_64
DUMP(p, mc_emergency_sp, "p");
DUMP(p, in_mce, "x");
+   DUMP(p, hmi_event_available, "x");
 #endif
DUMP(p, data_offset, "lx");
DUMP(p, hw_cpu_id, "x");
DUMP(p, cpu_start, "x");
DUMP(p, kexec_state, "x");
+   DUMP(p, dscr_default, "llx");
+   DUMP(p, vmalloc_sllp, "x");
+   DUMP(p, slb_cache_ptr, "x");
+   for (i = 0; i < p->slb_cache_ptr; i++)
+   printf(" slb_cache[%d]:= 0x%lx\n", i, p->slb_cache[i]);
DUMP(p, __current, "p");
DUMP(p, kstack, "lx");
DUMP(p

[RFC 3/8] powerpc/slb: Define macros for the bolted slots

2015-07-21 Thread Anshuman Khandual
From: "khand...@linux.vnet.ibm.com" 

This patch defines macros for all the three bolted SLB slots. This also
renames the 'create_shadowed_slb' function as 'new_shadowed_slb'.

Signed-off-by: Anshuman Khandual 
---
 arch/powerpc/mm/slb.c | 27 +++
 1 file changed, 15 insertions(+), 12 deletions(-)

diff --git a/arch/powerpc/mm/slb.c b/arch/powerpc/mm/slb.c
index 3842a54..cbeaaa2 100644
--- a/arch/powerpc/mm/slb.c
+++ b/arch/powerpc/mm/slb.c
@@ -25,6 +25,9 @@
 #include 
 #include 
 
+#define SLOT_KLINR  0  /* kernel linear map  (0xc) */
+#define SLOT_KVIRT  1  /* kernel virtual map (0xd) */
+#define SLOT_KSTACK 2  /* kernel stack map   (0xf) */
 
 extern void slb_allocate_realmode(unsigned long ea);
 extern void slb_allocate_user(unsigned long ea);
@@ -74,7 +77,7 @@ static inline void slb_shadow_clear(unsigned long slot)
get_slb_shadow()->save_area[slot].esid = 0;
 }
 
-static inline void create_shadowed_slbe(unsigned long ea, int ssize,
+static inline void new_shadowed_slbe(unsigned long ea, int ssize,
unsigned long flags,
unsigned long slot)
 {
@@ -103,16 +106,16 @@ static void __slb_flush_and_rebolt(void)
lflags = SLB_VSID_KERNEL | linear_llp;
vflags = SLB_VSID_KERNEL | vmalloc_llp;
 
-   ksp_esid_data = mk_esid_data(get_paca()->kstack, mmu_kernel_ssize, 2);
+   ksp_esid_data = mk_esid_data(get_paca()->kstack, mmu_kernel_ssize, 
SLOT_KSTACK);
if ((ksp_esid_data & ~0xfffUL) <= PAGE_OFFSET) {
ksp_esid_data &= ~SLB_ESID_V;
ksp_vsid_data = 0;
-   slb_shadow_clear(2);
+   slb_shadow_clear(SLOT_KSTACK);
} else {
/* Update stack slot; others don't change */
-   slb_shadow_update(get_paca()->kstack, mmu_kernel_ssize, lflags, 
2);
+   slb_shadow_update(get_paca()->kstack, mmu_kernel_ssize, lflags, 
SLOT_KSTACK);
ksp_vsid_data =
-   be64_to_cpu(get_slb_shadow()->save_area[2].vsid);
+   
be64_to_cpu(get_slb_shadow()->save_area[SLOT_KSTACK].vsid);
}
 
/* We need to do this all in asm, so we're sure we don't touch
@@ -125,7 +128,7 @@ static void __slb_flush_and_rebolt(void)
 "slbmte%2,%3\n"
 "isync"
 :: "r"(mk_vsid_data(VMALLOC_START, mmu_kernel_ssize, 
vflags)),
-   "r"(mk_esid_data(VMALLOC_START, mmu_kernel_ssize, 1)),
+   "r"(mk_esid_data(VMALLOC_START, mmu_kernel_ssize, 
SLOT_KVIRT)),
"r"(ksp_vsid_data),
"r"(ksp_esid_data)
 : "memory");
@@ -151,7 +154,7 @@ void slb_vmalloc_update(void)
unsigned long vflags;
 
vflags = SLB_VSID_KERNEL | mmu_psize_defs[mmu_vmalloc_psize].sllp;
-   slb_shadow_update(VMALLOC_START, mmu_kernel_ssize, vflags, 1);
+   slb_shadow_update(VMALLOC_START, mmu_kernel_ssize, vflags, SLOT_KVIRT);
slb_flush_and_rebolt();
 }
 
@@ -312,19 +315,19 @@ void slb_initialize(void)
asm volatile("isync":::"memory");
asm volatile("slbmte  %0,%0"::"r" (0) : "memory");
asm volatile("isync; slbia; isync":::"memory");
-   create_shadowed_slbe(PAGE_OFFSET, mmu_kernel_ssize, lflags, 0);
-   create_shadowed_slbe(VMALLOC_START, mmu_kernel_ssize, vflags, 1);
+   new_shadowed_slbe(PAGE_OFFSET, mmu_kernel_ssize, lflags, SLOT_KLINR);
+   new_shadowed_slbe(VMALLOC_START, mmu_kernel_ssize, vflags, SLOT_KVIRT);
 
/* For the boot cpu, we're running on the stack in init_thread_union,
 * which is in the first segment of the linear mapping, and also
 * get_paca()->kstack hasn't been initialized yet.
 * For secondary cpus, we need to bolt the kernel stack slot now.
 */
-   slb_shadow_clear(2);
+   slb_shadow_clear(SLOT_KSTACK);
if (raw_smp_processor_id() != boot_cpuid &&
(get_paca()->kstack & slb_esid_mask(mmu_kernel_ssize)) > 
PAGE_OFFSET)
-   create_shadowed_slbe(get_paca()->kstack,
-mmu_kernel_ssize, lflags, 2);
+   new_shadowed_slbe(get_paca()->kstack,
+mmu_kernel_ssize, lflags, SLOT_KSTACK);
 
asm volatile("isync":::"memory");
 }
-- 
2.1.0

___
Linuxppc-dev mailing list
Linuxppc-dev@lists.ozlabs.org
https://lists.ozlabs.org/listinfo/linuxppc-dev

[RFC 5/8] powerpc/slb: Add documentation to runtime patching of SLB encoding

2015-07-21 Thread Anshuman Khandual
From: "khand...@linux.vnet.ibm.com" 

This patch adds some documentation to 'patch_slb_encoding' function
explaining about how it clears the existing immediate value in the
given instruction and inserts a new one there.

Signed-off-by: Anshuman Khandual 
---
 arch/powerpc/mm/slb.c | 8 +++-
 1 file changed, 7 insertions(+), 1 deletion(-)

diff --git a/arch/powerpc/mm/slb.c b/arch/powerpc/mm/slb.c
index dcba4c2..8083a9e 100644
--- a/arch/powerpc/mm/slb.c
+++ b/arch/powerpc/mm/slb.c
@@ -278,7 +278,13 @@ void switch_slb(struct task_struct *tsk, struct mm_struct 
*mm)
 static inline void patch_slb_encoding(unsigned int *insn_addr,
  unsigned int immed)
 {
-   int insn = (*insn_addr & 0x) | immed;
+   /*
+* Currently this patches only "li" and "cmpldi"
+* instructions with an immediate value. Here it
+* just clears the existing immediate value from
+* the instruction and inserts a new one there.
+*/
+   unsigned int insn = (*insn_addr & 0x) | immed;
patch_instruction(insn_addr, insn);
 }
 
-- 
2.1.0

___
Linuxppc-dev mailing list
Linuxppc-dev@lists.ozlabs.org
https://lists.ozlabs.org/listinfo/linuxppc-dev

[RFC 6/8] powerpc/prom: Simplify the logic while fetching SLB size

2015-07-21 Thread Anshuman Khandual
From: "khand...@linux.vnet.ibm.com" 

This patch just simplifies the existing code logic while fetching
the SLB size property from the device tree.

Signed-off-by: Anshuman Khandual 
---
 arch/powerpc/kernel/prom.c | 12 +---
 1 file changed, 5 insertions(+), 7 deletions(-)

diff --git a/arch/powerpc/kernel/prom.c b/arch/powerpc/kernel/prom.c
index 8b888b1..f6168e2 100644
--- a/arch/powerpc/kernel/prom.c
+++ b/arch/powerpc/kernel/prom.c
@@ -223,14 +223,12 @@ static void __init check_cpu_slb_size(unsigned long node)
const __be32 *slb_size_ptr;
 
slb_size_ptr = of_get_flat_dt_prop(node, "slb-size", NULL);
-   if (slb_size_ptr != NULL) {
-   mmu_slb_size = be32_to_cpup(slb_size_ptr);
-   return;
-   }
-   slb_size_ptr = of_get_flat_dt_prop(node, "ibm,slb-size", NULL);
-   if (slb_size_ptr != NULL) {
-   mmu_slb_size = be32_to_cpup(slb_size_ptr);
+   if (!slb_size_ptr) {
+   slb_size_ptr = of_get_flat_dt_prop(node, "ibm,slb-size", NULL);
+   if (!slb_size_ptr)
+   return;
}
+   mmu_slb_size = be32_to_cpup(slb_size_ptr);
 }
 #else
 #define check_cpu_slb_size(node) do { } while(0)
-- 
2.1.0

___
Linuxppc-dev mailing list
Linuxppc-dev@lists.ozlabs.org
https://lists.ozlabs.org/listinfo/linuxppc-dev

[RFC 4/8] powerpc/slb: Add some helper functions to improve modularization

2015-07-21 Thread Anshuman Khandual
From: "khand...@linux.vnet.ibm.com" 

This patch adds the following helper functions to improve modularization
and readability of the code.

(1) slb_invalid_all:Invalidates entire SLB
(2) slb_invalid_paca_slots: Invalidate SLB entries present in PACA
(3) kernel_linear_vsid_flags:   VSID flags for kernel linear mapping
(4) kernel_virtual_vsid_flags:  VSID flags for kernel virtual mapping

Signed-off-by: Anshuman Khandual 
---
 arch/powerpc/mm/slb.c | 87 ++-
 1 file changed, 59 insertions(+), 28 deletions(-)

diff --git a/arch/powerpc/mm/slb.c b/arch/powerpc/mm/slb.c
index cbeaaa2..dcba4c2 100644
--- a/arch/powerpc/mm/slb.c
+++ b/arch/powerpc/mm/slb.c
@@ -94,18 +94,37 @@ static inline void new_shadowed_slbe(unsigned long ea, int 
ssize,
 : "memory" );
 }
 
+static inline unsigned long kernel_linear_vsid_flags(void)
+{
+   return SLB_VSID_KERNEL | mmu_psize_defs[mmu_linear_psize].sllp;
+}
+
+static inline unsigned long kernel_virtual_vsid_flags(void)
+{
+   return SLB_VSID_KERNEL | mmu_psize_defs[mmu_vmalloc_psize].sllp;
+}
+
+static inline unsigned long kernel_io_vsid_flags(void)
+{
+   return SLB_VSID_KERNEL | mmu_psize_defs[mmu_io_psize].sllp;
+}
+
+#ifdef CONFIG_SPARSEMEM_VMEMMAP
+static inline unsigned long kernel_vmemmap_vsid_flags(void)
+{
+   return SLB_VSID_KERNEL | mmu_psize_defs[mmu_vmemmap_psize].sllp;
+}
+#endif
+
 static void __slb_flush_and_rebolt(void)
 {
/* If you change this make sure you change SLB_NUM_BOLTED
 * and PR KVM appropriately too. */
-   unsigned long linear_llp, vmalloc_llp, lflags, vflags;
+   unsigned long lflags, vflags;
unsigned long ksp_esid_data, ksp_vsid_data;
 
-   linear_llp = mmu_psize_defs[mmu_linear_psize].sllp;
-   vmalloc_llp = mmu_psize_defs[mmu_vmalloc_psize].sllp;
-   lflags = SLB_VSID_KERNEL | linear_llp;
-   vflags = SLB_VSID_KERNEL | vmalloc_llp;
-
+   lflags = kernel_linear_vsid_flags();
+   vflags = kernel_virtual_vsid_flags();
ksp_esid_data = mk_esid_data(get_paca()->kstack, mmu_kernel_ssize, 
SLOT_KSTACK);
if ((ksp_esid_data & ~0xfffUL) <= PAGE_OFFSET) {
ksp_esid_data &= ~SLB_ESID_V;
@@ -153,7 +172,7 @@ void slb_vmalloc_update(void)
 {
unsigned long vflags;
 
-   vflags = SLB_VSID_KERNEL | mmu_psize_defs[mmu_vmalloc_psize].sllp;
+   vflags = kernel_virtual_vsid_flags();
slb_shadow_update(VMALLOC_START, mmu_kernel_ssize, vflags, SLOT_KVIRT);
slb_flush_and_rebolt();
 }
@@ -187,6 +206,23 @@ static inline int esids_match(unsigned long addr1, 
unsigned long addr2)
return (GET_ESID_1T(addr1) == GET_ESID_1T(addr2));
 }
 
+static void slb_invalid_paca_slots(unsigned long offset)
+{
+   unsigned long slbie_data;
+   int i;
+
+   asm volatile("isync" : : : "memory");
+   for (i = 0; i < offset; i++) {
+   slbie_data = (unsigned long)get_paca()->slb_cache[i]
+   << SID_SHIFT; /* EA */
+   slbie_data |= user_segment_size(slbie_data)
+   << SLBIE_SSIZE_SHIFT;
+   slbie_data |= SLBIE_C; /* C set for user addresses */
+   asm volatile("slbie %0" : : "r" (slbie_data));
+   }
+   asm volatile("isync" : : : "memory");
+}
+
 /* Flush all user entries from the segment table of the current processor. */
 void switch_slb(struct task_struct *tsk, struct mm_struct *mm)
 {
@@ -206,17 +242,7 @@ void switch_slb(struct task_struct *tsk, struct mm_struct 
*mm)
offset = get_paca()->slb_cache_ptr;
if (!mmu_has_feature(MMU_FTR_NO_SLBIE_B) &&
offset <= SLB_CACHE_ENTRIES) {
-   int i;
-   asm volatile("isync" : : : "memory");
-   for (i = 0; i < offset; i++) {
-   slbie_data = (unsigned long)get_paca()->slb_cache[i]
-   << SID_SHIFT; /* EA */
-   slbie_data |= user_segment_size(slbie_data)
-   << SLBIE_SSIZE_SHIFT;
-   slbie_data |= SLBIE_C; /* C set for user addresses */
-   asm volatile("slbie %0" : : "r" (slbie_data));
-   }
-   asm volatile("isync" : : : "memory");
+   slb_invalid_paca_slots(offset);
} else {
__slb_flush_and_rebolt();
}
@@ -256,6 +282,14 @@ static inline void patch_slb_encoding(unsigned int 
*insn_addr,
patch_instruction(insn_addr, insn);
 }
 
+/* Invalidate the entire SLB (even slot 0) & all the ERATS */
+static inline void slb_invalid_all(void)
+{
+   asm volatile("isync":::"memory");
+   asm volatile("slbmte  %0,

[RFC 2/8] powerpc/slb: Rename all the 'entry' occurrences to 'slot'

2015-07-21 Thread Anshuman Khandual
From: "khand...@linux.vnet.ibm.com" 

These are essentially SLB individual slots what we are dealing with
in these functions. Usage of both 'entry' and 'slot' synonyms makes
it real confusing sometimes. This patch makes it uniform across the
file by replacing all those 'entry's with 'slot's.

Signed-off-by: Anshuman Khandual 
---
 arch/powerpc/mm/slb.c | 29 ++---
 1 file changed, 14 insertions(+), 15 deletions(-)

diff --git a/arch/powerpc/mm/slb.c b/arch/powerpc/mm/slb.c
index 62fafb3..3842a54 100644
--- a/arch/powerpc/mm/slb.c
+++ b/arch/powerpc/mm/slb.c
@@ -55,39 +55,39 @@ static inline unsigned long mk_vsid_data(unsigned long ea, 
int ssize,
 
 static inline void slb_shadow_update(unsigned long ea, int ssize,
 unsigned long flags,
-unsigned long entry)
+unsigned long slot)
 {
/*
-* Clear the ESID first so the entry is not valid while we are
+* Clear the ESID first so the slot is not valid while we are
 * updating it.  No write barriers are needed here, provided
 * we only update the current CPU's SLB shadow buffer.
 */
-   get_slb_shadow()->save_area[entry].esid = 0;
-   get_slb_shadow()->save_area[entry].vsid =
+   get_slb_shadow()->save_area[slot].esid = 0;
+   get_slb_shadow()->save_area[slot].vsid =
cpu_to_be64(mk_vsid_data(ea, ssize, flags));
-   get_slb_shadow()->save_area[entry].esid =
-   cpu_to_be64(mk_esid_data(ea, ssize, entry));
+   get_slb_shadow()->save_area[slot].esid =
+   cpu_to_be64(mk_esid_data(ea, ssize, slot));
 }
 
-static inline void slb_shadow_clear(unsigned long entry)
+static inline void slb_shadow_clear(unsigned long slot)
 {
-   get_slb_shadow()->save_area[entry].esid = 0;
+   get_slb_shadow()->save_area[slot].esid = 0;
 }
 
 static inline void create_shadowed_slbe(unsigned long ea, int ssize,
unsigned long flags,
-   unsigned long entry)
+   unsigned long slot)
 {
/*
 * Updating the shadow buffer before writing the SLB ensures
-* we don't get a stale entry here if we get preempted by PHYP
+* we don't get a stale slot here if we get preempted by PHYP
 * between these two statements.
 */
-   slb_shadow_update(ea, ssize, flags, entry);
+   slb_shadow_update(ea, ssize, flags, slot);
 
asm volatile("slbmte  %0,%1" :
 : "r" (mk_vsid_data(ea, ssize, flags)),
-  "r" (mk_esid_data(ea, ssize, entry))
+  "r" (mk_esid_data(ea, ssize, slot))
 : "memory" );
 }
 
@@ -109,7 +109,7 @@ static void __slb_flush_and_rebolt(void)
ksp_vsid_data = 0;
slb_shadow_clear(2);
} else {
-   /* Update stack entry; others don't change */
+   /* Update stack slot; others don't change */
slb_shadow_update(get_paca()->kstack, mmu_kernel_ssize, lflags, 
2);
ksp_vsid_data =
be64_to_cpu(get_slb_shadow()->save_area[2].vsid);
@@ -313,13 +313,12 @@ void slb_initialize(void)
asm volatile("slbmte  %0,%0"::"r" (0) : "memory");
asm volatile("isync; slbia; isync":::"memory");
create_shadowed_slbe(PAGE_OFFSET, mmu_kernel_ssize, lflags, 0);
-
create_shadowed_slbe(VMALLOC_START, mmu_kernel_ssize, vflags, 1);
 
/* For the boot cpu, we're running on the stack in init_thread_union,
 * which is in the first segment of the linear mapping, and also
 * get_paca()->kstack hasn't been initialized yet.
-* For secondary cpus, we need to bolt the kernel stack entry now.
+* For secondary cpus, we need to bolt the kernel stack slot now.
 */
slb_shadow_clear(2);
if (raw_smp_processor_id() != boot_cpuid &&
-- 
2.1.0

___
Linuxppc-dev mailing list
Linuxppc-dev@lists.ozlabs.org
https://lists.ozlabs.org/listinfo/linuxppc-dev

Re: [PATCH V3] powerpc/irq: Enable some more exceptions in /proc/interrupts interface

2015-07-21 Thread Anshuman Khandual
On 07/13/2015 01:46 PM, Anshuman Khandual wrote:
> This patch enables facility unavailable exceptions for generic facility,
> FPU, ALTIVEC and VSX in /proc/interrupts listing by incrementing their
> newly added IRQ statistical counters as and when these exceptions happen.
> This also adds couple of helper functions which will be called from within
> the interrupt handler context to update their statistics. Similarly this
> patch also enables alignment and program check exceptions as well.
> 
>   With this patch being applied, /proc/interrupts looks something
> like this after running various workloads which create these exceptions.
> 
> --
>CPU0   CPU1
>  16:   5734  24129  XICS   2 Level IPI
>  17:  0  0  XICS 4101 Level virtio0
>  18:  0  0  XICS 4100 Level ohci_hcd:usb1
>  19:  13920  0  XICS 4099 Level virtio1
>  20:  0  0  XICS 4096 Level RAS_EPOW
>  21:   6160   3241  XICS 4102 Level ibmvscsi
>  22:  1  0  XICS 4103 Level hvc_console
> LOC:   6825   3556   Local timer interrupts for timer event device
> LOC: 22 41   Local timer interrupts for others
> SPU:  1  0   Spurious interrupts
> PMI:  0  0   Performance monitoring interrupts
> MCE:  0  0   Machine check exceptions
> DBL:  0  0   Doorbell interrupts
> ALN:  0  0   Alignment exceptions
> PRG:  0  0   Program check exceptions
> FAC:  7 14   Facility unavailable exceptions
> FPU:   2928   3162   FPU unavailable exceptions
> ALT:  12950  15536   AltiVec unavailable exceptions
> VSX:  12930 220183   VSX unavailable exceptions
> --
> 
> Signed-off-by: Anshuman Khandual 
> ---
> Changes in V3:
> - Changed the display string from "ALTIVEC" to "AltiVec"
> - Now captured "Facility unavailable exceptions" in the example

Michael/Mikey,

I had already posted the performance comparison details with the
patch in the previous version mail thread. Does this new version
look good enough ?

___
Linuxppc-dev mailing list
Linuxppc-dev@lists.ozlabs.org
https://lists.ozlabs.org/listinfo/linuxppc-dev

Re: [RFC,2/8] powerpc/slb: Rename all the 'entry' occurrences to 'slot'

2015-07-21 Thread Anshuman Khandual
On 07/21/2015 03:16 PM, Michael Ellerman wrote:
> On Tue, 2015-21-07 at 06:58:40 UTC, Anshuman Khandual wrote:
>> > From: "khand...@linux.vnet.ibm.com" 
>> > 
>> > These are essentially SLB individual slots what we are dealing with
>> > in these functions. Usage of both 'entry' and 'slot' synonyms makes
>> > it real confusing sometimes. This patch makes it uniform across the
>> > file by replacing all those 'entry's with 'slot's.
> No I think it would be better the other way around.
> 
> Currently we use entry in 14 places and slot in 3.
> 
> Both can be correct in some places, but not always.
> 
> For example:
> 
>> > -   * Clear the ESID first so the entry is not valid while we are
>> > +   * Clear the ESID first so the slot is not valid while we are
> That doesn't make sense with "slot", a slot is not valid, only an entry in a
> slot is valid.
> 
> Looking at the existing uses of slot they will all make sense if you change
> them to entry.

Sure, yeah will do the other way around.

___
Linuxppc-dev mailing list
Linuxppc-dev@lists.ozlabs.org
https://lists.ozlabs.org/listinfo/linuxppc-dev

Re: [RFC, 6/8] powerpc/prom: Simplify the logic while fetching SLB size

2015-07-21 Thread Anshuman Khandual
On 07/21/2015 03:51 PM, Michael Ellerman wrote:
> On Tue, 2015-21-07 at 06:58:44 UTC, Anshuman Khandual wrote:
>> > From: "khand...@linux.vnet.ibm.com" 
>> > 
>> > This patch just simplifies the existing code logic while fetching
>> > the SLB size property from the device tree.
>> > 
>> > Signed-off-by: Anshuman Khandual 
>> > ---
>> >  arch/powerpc/kernel/prom.c | 12 +---
>> >  1 file changed, 5 insertions(+), 7 deletions(-)
>> > 
>> > diff --git a/arch/powerpc/kernel/prom.c b/arch/powerpc/kernel/prom.c
>> > index 8b888b1..f6168e2 100644
>> > --- a/arch/powerpc/kernel/prom.c
>> > +++ b/arch/powerpc/kernel/prom.c
>> > @@ -223,14 +223,12 @@ static void __init check_cpu_slb_size(unsigned long 
>> > node)
>> >const __be32 *slb_size_ptr;
>> >  
>> >slb_size_ptr = of_get_flat_dt_prop(node, "slb-size", NULL);
>> > -  if (slb_size_ptr != NULL) {
>> > -  mmu_slb_size = be32_to_cpup(slb_size_ptr);
>> > -  return;
>> > -  }
>> > -  slb_size_ptr = of_get_flat_dt_prop(node, "ibm,slb-size", NULL);
>> > -  if (slb_size_ptr != NULL) {
>> > -  mmu_slb_size = be32_to_cpup(slb_size_ptr);
>> > +  if (!slb_size_ptr) {
>> > +  slb_size_ptr = of_get_flat_dt_prop(node, "ibm,slb-size", NULL);
>> > +  if (!slb_size_ptr)
>> > +  return;
>> >}
>> > +  mmu_slb_size = be32_to_cpup(slb_size_ptr);
>> >  }
> It's still ugly. Why not go the whole way:
> 
> 
>   p = of_get_flat_dt_prop(node, "slb-size", NULL) ? :
>   of_get_flat_dt_prop(node, "ibm,slb-size", NULL);
> 
>   if (p)
>   mmu_slb_size = be32_to_cpup(p);

Yeah this is better.

> 
> 
> And while you're at it, rename the function, it doesn't check anything. It
> initialises mmu_slb_size, so call it init_mmu_slb_size()?

Sure, will do.

___
Linuxppc-dev mailing list
Linuxppc-dev@lists.ozlabs.org
https://lists.ozlabs.org/listinfo/linuxppc-dev

Re: [RFC, 7/8] powerpc/xmon: Drop 'valid' from the condition inside 'dump_segments'

2015-07-21 Thread Anshuman Khandual
On 07/21/2015 03:30 PM, Michael Ellerman wrote:
> On Tue, 2015-21-07 at 06:58:45 UTC, Anshuman Khandual wrote:
>> > From: "khand...@linux.vnet.ibm.com" 
>> > 
>> > Value of 'valid' is zero when 'esid' is zero and it does not matter
>> > when 'esid' is non-zero. 
> Yes it does. It tells you whether the entry is valid?

Yeah but it does not change the outcome of the if condition check
here. Non-zero esid will make the condition test pass irrespective
of the value of 'valid'. Yes, valid will be checked inside the code
block to print details, the point was value of valid does not make
any difference to the 'if' condition check in the first place.
Unless I am getting tricked here some how :)

> 
> In practice maybe you only see invalid entries that are entirely zero, and so
> they get skipped anyway, but that's not guaranteed.

___
Linuxppc-dev mailing list
Linuxppc-dev@lists.ozlabs.org
https://lists.ozlabs.org/listinfo/linuxppc-dev

Re: [RFC, 8/8] powerpc/xmon: Add some more elements to the existing PACA dump list

2015-07-21 Thread Anshuman Khandual
On 07/21/2015 03:38 PM, Michael Ellerman wrote:
> On Tue, 2015-21-07 at 06:58:46 UTC, Anshuman Khandual wrote:
>> > From: "khand...@linux.vnet.ibm.com" 
>> > 
>> > This patch adds some more elements to the existing PACA dump list
>> > inside a xmon session which can be listed here.
>> > 
>> >- hmi_event_available
>> >- dscr_default
>> >- vmalloc_sllp
>> >- slb_cache_ptr
>> >- sprg_vdso
>> >- tm_scratch
>> >- core_idle_state_ptr
>> >- thread_idle_state
>> >- thread_mask
> This is probably OK, except you broke the ppc64e build again.

Will fix it up while sending this as a patch series. Thanks for
the quick review of the series.

___
Linuxppc-dev mailing list
Linuxppc-dev@lists.ozlabs.org
https://lists.ozlabs.org/listinfo/linuxppc-dev

Re: [1/5] powerpc/perf: Drop the branch sample when 'from' cannot be fetched

2015-07-27 Thread Anshuman Khandual
On 07/27/2015 09:49 AM, Michael Ellerman wrote:
> On Tue, 2015-30-06 at 08:20:27 UTC, Anshuman Khandual wrote:
>> BHRB (Branch History Rolling Buffer) is a rolling buffer. Hence we
>> might end up in a situation where we have read one target address
>> but when we try to read the next entry indicating the from address
>> of the target address, the buffer just overflows. In this case, the
>> captured from address will be zero which indicates the end of the
>> buffer.
> 
> Right. But with SMT8 the size of the buffer is very small, so we will actually
> hit this case somewhat often. When we originally wrote this we decided it was
> better to get some information, ie. the from address, than no information at
> all.

You are right. But practically as of now we are not using this kind of
(from, 0) branch entries any where as a special case. More over for
certain kind of workloads which has a small code and a few branches,
the chances of getting this kind of branch (from, 0) increases a lot
making them probably one of the highest percentage entries in the final
perf report. Now with this change of code, the workload session might
have overall less number of branch entries, but in my opinion represents
more accurate branch profile of the given workload in percentage wise.

> 
>>  This patch drops the entire branch record which would have
>> otherwise confused the user space tools.
> 
> Does it confuse the tools? Can you show me before/after output from perf?

The word 'confuse' might be little misleading. But the point as
explained above that the relative branch percentage profile of
certain workloads might be distorted and that I believe is true.
Also branch entries like "from > 0" in the perf report might
be confusing to users who dont expect to see this kind of entries
in the final perf report and will never get into "perf report -D"
to figure out what really happened.

> 
> I'm not opposed to changing this but we need to be 100% sure it's the best
> option.

___
Linuxppc-dev mailing list
Linuxppc-dev@lists.ozlabs.org
https://lists.ozlabs.org/listinfo/linuxppc-dev

Re: [3/5] powerpc/perf: Replace last usage of get_cpu_var with this_cpu_ptr

2015-07-27 Thread Anshuman Khandual
On 07/27/2015 10:45 AM, Michael Ellerman wrote:
> On Tue, 2015-30-06 at 08:20:29 UTC, Anshuman Khandual wrote:
>> > The commit 69111bac42f5ce ("powerpc: Replace __get_cpu_var uses")
>> > replaced all usage of get_cpu_var with this_cpu_ptr inside core
>> > perf event handling on powerpc. But it skipped one of them which
>> > is being replaced with this patch.
> No it replaced all uses of __get_cpu_var(), not get_cpu_var(). The difference
> is important.

Hmm, I see. Was not aware about it. Daniel suggested on this and I
thought it made sense. Hence proposed the change.

> 
> get_cpu_var() disables preemption for you, so it's only safe to switch to
> this_cpu_ptr() if preemption is already disabled. Is it?

We dont disable preemption inside power_pmu_event_init neither inside
perf_try_init_event where it gets called from, I guess the answer is NO.
Will drop this patch next time around.

___
Linuxppc-dev mailing list
Linuxppc-dev@lists.ozlabs.org
https://lists.ozlabs.org/listinfo/linuxppc-dev

[PATCH 7/8] powerpc/xmon: Drop the 'valid' variable completely in 'dump_segments'

2015-07-29 Thread Anshuman Khandual
Value of the 'valid' variable is zero when 'esid' is zero and it does
not matter when 'esid' is non-zero. The variable 'valid' can be dropped
from the function 'dump_segments' by checking for validity of 'esid'
inside the nested code block. This patch does that change.

Signed-off-by: Anshuman Khandual 
---
 arch/powerpc/xmon/xmon.c | 7 +++
 1 file changed, 3 insertions(+), 4 deletions(-)

diff --git a/arch/powerpc/xmon/xmon.c b/arch/powerpc/xmon/xmon.c
index e599259..bc1b066a 100644
--- a/arch/powerpc/xmon/xmon.c
+++ b/arch/powerpc/xmon/xmon.c
@@ -2731,7 +2731,7 @@ static void xmon_print_symbol(unsigned long address, 
const char *mid,
 void dump_segments(void)
 {
int i;
-   unsigned long esid,vsid,valid;
+   unsigned long esid,vsid;
unsigned long llp;
 
printf("SLB contents of cpu 0x%x\n", smp_processor_id());
@@ -2739,10 +2739,9 @@ void dump_segments(void)
for (i = 0; i < mmu_slb_size; i++) {
asm volatile("slbmfee  %0,%1" : "=r" (esid) : "r" (i));
asm volatile("slbmfev  %0,%1" : "=r" (vsid) : "r" (i));
-   valid = (esid & SLB_ESID_V);
-   if (valid | esid | vsid) {
+   if (esid || vsid) {
printf("%02d %016lx %016lx", i, esid, vsid);
-   if (valid) {
+   if (esid & SLB_ESID_V) {
llp = vsid & SLB_VSID_LLP;
if (vsid & SLB_VSID_B_1T) {
printf("  1T  ESID=%9lx  VSID=%13lx 
LLP:%3lx \n",
-- 
2.1.0

___
Linuxppc-dev mailing list
Linuxppc-dev@lists.ozlabs.org
https://lists.ozlabs.org/listinfo/linuxppc-dev

[PATCH 6/8] powerpc/prom: Simplify the logic while fetching SLB size

2015-07-29 Thread Anshuman Khandual
This patch just simplifies the existing code logic while fetching
the SLB size property from the device tree. This also changes the
function name from check_cpu_slb_size to init_mmu_slb_size as
it just initializes the mmu_slb_size value.

Signed-off-by: Anshuman Khandual 
---
 arch/powerpc/kernel/prom.c | 18 +++---
 1 file changed, 7 insertions(+), 11 deletions(-)

diff --git a/arch/powerpc/kernel/prom.c b/arch/powerpc/kernel/prom.c
index 8b888b1..4bb43c0 100644
--- a/arch/powerpc/kernel/prom.c
+++ b/arch/powerpc/kernel/prom.c
@@ -218,22 +218,18 @@ static void __init check_cpu_pa_features(unsigned long 
node)
 }
 
 #ifdef CONFIG_PPC_STD_MMU_64
-static void __init check_cpu_slb_size(unsigned long node)
+static void __init init_mmu_slb_size(unsigned long node)
 {
const __be32 *slb_size_ptr;
 
-   slb_size_ptr = of_get_flat_dt_prop(node, "slb-size", NULL);
-   if (slb_size_ptr != NULL) {
-   mmu_slb_size = be32_to_cpup(slb_size_ptr);
-   return;
-   }
-   slb_size_ptr = of_get_flat_dt_prop(node, "ibm,slb-size", NULL);
-   if (slb_size_ptr != NULL) {
+   slb_size_ptr = of_get_flat_dt_prop(node, "slb-size", NULL) ? :
+   of_get_flat_dt_prop(node, "ibm,slb-size", NULL);
+
+   if (slb_size_ptr)
mmu_slb_size = be32_to_cpup(slb_size_ptr);
-   }
 }
 #else
-#define check_cpu_slb_size(node) do { } while(0)
+#define init_mmu_slb_size(node) do { } while(0)
 #endif
 
 static struct feature_property {
@@ -380,7 +376,7 @@ static int __init early_init_dt_scan_cpus(unsigned long 
node,
 
check_cpu_feature_properties(node);
check_cpu_pa_features(node);
-   check_cpu_slb_size(node);
+   init_mmu_slb_size(node);
 
 #ifdef CONFIG_PPC64
if (nthreads > 1)
-- 
2.1.0

___
Linuxppc-dev mailing list
Linuxppc-dev@lists.ozlabs.org
https://lists.ozlabs.org/listinfo/linuxppc-dev

[PATCH 5/8] powerpc/slb: Add documentation to runtime patching of SLB encoding

2015-07-29 Thread Anshuman Khandual
This patch adds some documentation to 'patch_slb_encoding' function
explaining about how it clears the existing immediate value in the
given instruction and inserts a new one there.

Signed-off-by: Anshuman Khandual 
---
 arch/powerpc/mm/slb.c | 13 -
 1 file changed, 12 insertions(+), 1 deletion(-)

diff --git a/arch/powerpc/mm/slb.c b/arch/powerpc/mm/slb.c
index c87d5de..1962357 100644
--- a/arch/powerpc/mm/slb.c
+++ b/arch/powerpc/mm/slb.c
@@ -279,7 +279,18 @@ void switch_slb(struct task_struct *tsk, struct mm_struct 
*mm)
 static inline void patch_slb_encoding(unsigned int *insn_addr,
  unsigned int immed)
 {
-   int insn = (*insn_addr & 0x) | immed;
+
+   /*
+* This function patches either an li or a cmpldi instruction with
+* a new immediate value. This relies on the fact that both li
+* (which is actually addi) and cmpldi both take a 16-bit immediate
+* value, and it is situated in the same location in the instruction,
+* ie. bits 16-31 (Big endian bit order) or the lower 16 bits.
+* To patch the value we read the existing instruction, clear the
+* immediate value, and or in our new value, then write the instruction
+* back.
+*/
+   unsigned int insn = (*insn_addr & 0x) | immed;
patch_instruction(insn_addr, insn);
 }
 
-- 
2.1.0

___
Linuxppc-dev mailing list
Linuxppc-dev@lists.ozlabs.org
https://lists.ozlabs.org/listinfo/linuxppc-dev

[PATCH 4/8] powerpc/slb: Add some helper functions to improve modularization

2015-07-29 Thread Anshuman Khandual
This patch adds the following six helper functions to help improve
modularization and readability of the code.

(1) slb_invalidate_all: Invalidates the entire SLB
(2) slb_invalidate: Invalidates SLB entries present in PACA
(3) mmu_linear_vsid_flags:  VSID flags for kernel linear mapping
(4) mmu_virtual_vsid_flags: VSID flags for kernel virtual mapping
(5) mmu_vmemmap_vsid_flags: VSID flags for kernel vmem mapping
(6) mmu_io_vsid_flags:  VSID flags for kernel I/O mapping

Signed-off-by: Anshuman Khandual 
---
 arch/powerpc/mm/slb.c | 92 ++-
 1 file changed, 61 insertions(+), 31 deletions(-)

diff --git a/arch/powerpc/mm/slb.c b/arch/powerpc/mm/slb.c
index 701a57f..c87d5de 100644
--- a/arch/powerpc/mm/slb.c
+++ b/arch/powerpc/mm/slb.c
@@ -96,18 +96,37 @@ static inline void new_shadowed_slbe(unsigned long ea, int 
ssize,
 : "memory" );
 }
 
+static inline unsigned long mmu_linear_vsid_flags(void)
+{
+   return SLB_VSID_KERNEL | mmu_psize_defs[mmu_linear_psize].sllp;
+}
+
+static inline unsigned long mmu_vmalloc_vsid_flags(void)
+{
+   return SLB_VSID_KERNEL | mmu_psize_defs[mmu_vmalloc_psize].sllp;
+}
+
+static inline unsigned long mmu_io_vsid_flags(void)
+{
+   return SLB_VSID_KERNEL | mmu_psize_defs[mmu_io_psize].sllp;
+}
+
+#ifdef CONFIG_SPARSEMEM_VMEMMAP
+static inline unsigned long mmu_vmemmap_vsid_flags(void)
+{
+   return SLB_VSID_KERNEL | mmu_psize_defs[mmu_vmemmap_psize].sllp;
+}
+#endif
+
 static void __slb_flush_and_rebolt(void)
 {
/* If you change this make sure you change SLB_NUM_BOLTED
 * and PR KVM appropriately too. */
-   unsigned long linear_llp, vmalloc_llp, lflags, vflags;
+   unsigned long lflags, vflags;
unsigned long ksp_esid_data, ksp_vsid_data;
 
-   linear_llp = mmu_psize_defs[mmu_linear_psize].sllp;
-   vmalloc_llp = mmu_psize_defs[mmu_vmalloc_psize].sllp;
-   lflags = SLB_VSID_KERNEL | linear_llp;
-   vflags = SLB_VSID_KERNEL | vmalloc_llp;
-
+   lflags = mmu_linear_vsid_flags();
+   vflags = mmu_vmalloc_vsid_flags();
ksp_esid_data = mk_esid_data(get_paca()->kstack, mmu_kernel_ssize, 
KSTACK_SLOT);
if ((ksp_esid_data & ~0xfffUL) <= PAGE_OFFSET) {
ksp_esid_data &= ~SLB_ESID_V;
@@ -155,7 +174,7 @@ void slb_vmalloc_update(void)
 {
unsigned long vflags;
 
-   vflags = SLB_VSID_KERNEL | mmu_psize_defs[mmu_vmalloc_psize].sllp;
+   vflags = mmu_vmalloc_vsid_flags();
slb_shadow_update(VMALLOC_START, mmu_kernel_ssize, vflags, 
VMALLOC_SLOT);
slb_flush_and_rebolt();
 }
@@ -189,26 +208,15 @@ static inline int esids_match(unsigned long addr1, 
unsigned long addr2)
return (GET_ESID_1T(addr1) == GET_ESID_1T(addr2));
 }
 
-/* Flush all user entries from the segment table of the current processor. */
-void switch_slb(struct task_struct *tsk, struct mm_struct *mm)
+static void slb_invalidate(void)
 {
-   unsigned long offset;
unsigned long slbie_data = 0;
-   unsigned long pc = KSTK_EIP(tsk);
-   unsigned long stack = KSTK_ESP(tsk);
-   unsigned long exec_base;
+   unsigned long offset;
+   int i;
 
-   /*
-* We need interrupts hard-disabled here, not just soft-disabled,
-* so that a PMU interrupt can't occur, which might try to access
-* user memory (to get a stack trace) and possible cause an SLB miss
-* which would update the slb_cache/slb_cache_ptr fields in the PACA.
-*/
-   hard_irq_disable();
offset = get_paca()->slb_cache_ptr;
if (!mmu_has_feature(MMU_FTR_NO_SLBIE_B) &&
offset <= SLB_CACHE_ENTRIES) {
-   int i;
asm volatile("isync" : : : "memory");
for (i = 0; i < offset; i++) {
slbie_data = (unsigned long)get_paca()->slb_cache[i]
@@ -226,6 +234,23 @@ void switch_slb(struct task_struct *tsk, struct mm_struct 
*mm)
/* Workaround POWER5 < DD2.1 issue */
if (offset == 1 || offset > SLB_CACHE_ENTRIES)
asm volatile("slbie %0" : : "r" (slbie_data));
+}
+
+/* Flush all user entries from the segment table of the current processor. */
+void switch_slb(struct task_struct *tsk, struct mm_struct *mm)
+{
+   unsigned long pc = KSTK_EIP(tsk);
+   unsigned long stack = KSTK_ESP(tsk);
+   unsigned long exec_base;
+
+   /*
+* We need interrupts hard-disabled here, not just soft-disabled,
+* so that a PMU interrupt can't occur, which might try to access
+* user memory (to get a stack trace) and possible cause an SLB miss
+* which would update the slb_cache/slb_cache_ptr fields in the PACA.
+*/
+   hard_irq_disable();
+   slb_invalidate();
 
get_paca()->slb_cache_ptr = 0;
   

[PATCH 3/8] powerpc/slb: Define macros for the bolted slots

2015-07-29 Thread Anshuman Khandual
This patch defines macros for all the three bolted SLB slots. This also
renames the 'create_shadowed_slb' function as 'new_shadowed_slb'.

Signed-off-by: Anshuman Khandual 
---
 arch/powerpc/mm/slb.c | 29 +
 1 file changed, 17 insertions(+), 12 deletions(-)

diff --git a/arch/powerpc/mm/slb.c b/arch/powerpc/mm/slb.c
index faf9f0c..701a57f 100644
--- a/arch/powerpc/mm/slb.c
+++ b/arch/powerpc/mm/slb.c
@@ -25,6 +25,11 @@
 #include 
 #include 
 
+enum slb_slots {
+   LINEAR_SLOT = 0, /* Kernel linear map  (0xc000) */
+   VMALLOC_SLOT= 1, /* Kernel virtual map (0xd000) */
+   KSTACK_SLOT = 2, /* Kernel stack map */
+};
 
 extern void slb_allocate_realmode(unsigned long ea);
 extern void slb_allocate_user(unsigned long ea);
@@ -74,7 +79,7 @@ static inline void slb_shadow_clear(unsigned long entry)
get_slb_shadow()->save_area[entry].esid = 0;
 }
 
-static inline void create_shadowed_slbe(unsigned long ea, int ssize,
+static inline void new_shadowed_slbe(unsigned long ea, int ssize,
unsigned long flags,
unsigned long entry)
 {
@@ -103,16 +108,16 @@ static void __slb_flush_and_rebolt(void)
lflags = SLB_VSID_KERNEL | linear_llp;
vflags = SLB_VSID_KERNEL | vmalloc_llp;
 
-   ksp_esid_data = mk_esid_data(get_paca()->kstack, mmu_kernel_ssize, 2);
+   ksp_esid_data = mk_esid_data(get_paca()->kstack, mmu_kernel_ssize, 
KSTACK_SLOT);
if ((ksp_esid_data & ~0xfffUL) <= PAGE_OFFSET) {
ksp_esid_data &= ~SLB_ESID_V;
ksp_vsid_data = 0;
-   slb_shadow_clear(2);
+   slb_shadow_clear(KSTACK_SLOT);
} else {
/* Update stack entry; others don't change */
-   slb_shadow_update(get_paca()->kstack, mmu_kernel_ssize, lflags, 
2);
+   slb_shadow_update(get_paca()->kstack, mmu_kernel_ssize, lflags, 
KSTACK_SLOT);
ksp_vsid_data =
-   be64_to_cpu(get_slb_shadow()->save_area[2].vsid);
+   
be64_to_cpu(get_slb_shadow()->save_area[KSTACK_SLOT].vsid);
}
 
/* We need to do this all in asm, so we're sure we don't touch
@@ -125,7 +130,7 @@ static void __slb_flush_and_rebolt(void)
 "slbmte%2,%3\n"
 "isync"
 :: "r"(mk_vsid_data(VMALLOC_START, mmu_kernel_ssize, 
vflags)),
-   "r"(mk_esid_data(VMALLOC_START, mmu_kernel_ssize, 1)),
+   "r"(mk_esid_data(VMALLOC_START, mmu_kernel_ssize, 
VMALLOC_SLOT)),
"r"(ksp_vsid_data),
"r"(ksp_esid_data)
 : "memory");
@@ -151,7 +156,7 @@ void slb_vmalloc_update(void)
unsigned long vflags;
 
vflags = SLB_VSID_KERNEL | mmu_psize_defs[mmu_vmalloc_psize].sllp;
-   slb_shadow_update(VMALLOC_START, mmu_kernel_ssize, vflags, 1);
+   slb_shadow_update(VMALLOC_START, mmu_kernel_ssize, vflags, 
VMALLOC_SLOT);
slb_flush_and_rebolt();
 }
 
@@ -312,19 +317,19 @@ void slb_initialize(void)
asm volatile("isync":::"memory");
asm volatile("slbmte  %0,%0"::"r" (0) : "memory");
asm volatile("isync; slbia; isync":::"memory");
-   create_shadowed_slbe(PAGE_OFFSET, mmu_kernel_ssize, lflags, 0);
-   create_shadowed_slbe(VMALLOC_START, mmu_kernel_ssize, vflags, 1);
+   new_shadowed_slbe(PAGE_OFFSET, mmu_kernel_ssize, lflags, LINEAR_SLOT);
+   new_shadowed_slbe(VMALLOC_START, mmu_kernel_ssize, vflags, 
VMALLOC_SLOT);
 
/* For the boot cpu, we're running on the stack in init_thread_union,
 * which is in the first segment of the linear mapping, and also
 * get_paca()->kstack hasn't been initialized yet.
 * For secondary cpus, we need to bolt the kernel stack entry now.
 */
-   slb_shadow_clear(2);
+   slb_shadow_clear(KSTACK_SLOT);
if (raw_smp_processor_id() != boot_cpuid &&
(get_paca()->kstack & slb_esid_mask(mmu_kernel_ssize)) > 
PAGE_OFFSET)
-   create_shadowed_slbe(get_paca()->kstack,
-mmu_kernel_ssize, lflags, 2);
+   new_shadowed_slbe(get_paca()->kstack,
+mmu_kernel_ssize, lflags, KSTACK_SLOT);
 
asm volatile("isync":::"memory");
 }
-- 
2.1.0

___
Linuxppc-dev mailing list
Linuxppc-dev@lists.ozlabs.org
https://lists.ozlabs.org/listinfo/linuxppc-dev

[PATCH 1/8] powerpc/slb: Remove a duplicate extern variable

2015-07-29 Thread Anshuman Khandual
This patch just removes one redundant entry for one extern variable
'slb_compare_rr_to_size' from the scope. This patch does not change
any functionality.

Signed-off-by: Anshuman Khandual 
---
 arch/powerpc/mm/slb.c | 1 -
 1 file changed, 1 deletion(-)

diff --git a/arch/powerpc/mm/slb.c b/arch/powerpc/mm/slb.c
index 6e450ca..62fafb3 100644
--- a/arch/powerpc/mm/slb.c
+++ b/arch/powerpc/mm/slb.c
@@ -253,7 +253,6 @@ static inline void patch_slb_encoding(unsigned int 
*insn_addr,
patch_instruction(insn_addr, insn);
 }
 
-extern u32 slb_compare_rr_to_size[];
 extern u32 slb_miss_kernel_load_linear[];
 extern u32 slb_miss_kernel_load_io[];
 extern u32 slb_compare_rr_to_size[];
-- 
2.1.0

___
Linuxppc-dev mailing list
Linuxppc-dev@lists.ozlabs.org
https://lists.ozlabs.org/listinfo/linuxppc-dev

[PATCH 8/8] powerpc/xmon: Add some more elements to the existing PACA dump list

2015-07-29 Thread Anshuman Khandual
This patch adds a set of new elements to the existing PACA dump list
inside an xmon session which can be listed below improving the overall
xmon debug support.

(1) hmi_event_available
(2) dscr_default
(3) vmalloc_sllp
(4) slb_cache_ptr
(5) sprg_vdso
(6) tm_scratch
(7) core_idle_state_ptr
(8) thread_idle_state
(9) thread_mask
(10) slb_shadow
(11) pgd
(12) kernel_pgd
(13) tcd_ptr
(14) mc_kstack
(15) crit_kstack
(16) dbg_kstack
(17) user_time
(18) system_time
(19) user_time_scaled
(20) starttime
(21) starttime_user
(22) startspurr
(23) utime_sspurr
(24) stolen_time

With this patch, a typical xmon PACA dump looks something like this.

paca for cpu 0x0 @ cfdc:
 possible = yes
 present  = yes
 online   = yes
 lock_token   = 0x8000  (0x8)
 paca_index   = 0x0 (0xa)
 kernel_toc   = 0xc0e79300  (0x10)
 kernelbase   = 0xc000  (0x18)
 kernel_msr   = 0xb0001032  (0x20)
 emergency_sp = 0xc0003fff  (0x28)
 mc_emergency_sp  = 0xc0003ffec000  (0x2e0)
 in_mce   = 0x0 (0x2e8)
 hmi_event_available  = 0x0 (0x2ea)
 data_offset  = 0xfa9f  (0x30)
 hw_cpu_id= 0x0 (0x38)
 cpu_start= 0x1 (0x3a)
 kexec_state  = 0x0 (0x3b)
 slb_shadow[0]:   = 0xc800 0x40016e7779000510
 slb_shadow[1]:   = 0xd801 0x400142add1000510
 dscr_default = 0x0 (0x58)
 vmalloc_sllp = 0x510   (0x1b8)
 slb_cache_ptr= 0x3 (0x1ba)
 slb_cache[0]:= 0x3f000
 slb_cache[1]:= 0x1
 slb_cache[2]:= 0x1000
 __current= 0xc000a7406b70  (0x290)
 kstack   = 0xc000a750fe30  (0x298)
 stab_rr  = 0x11(0x2a0)
 saved_r1 = 0xc000a750f360  (0x2a8)
 trap_save= 0x0 (0x2b8)
 soft_enabled = 0x0 (0x2ba)
 irq_happened = 0x1 (0x2bb)
 io_sync  = 0x0 (0x2bc)
 irq_work_pending = 0x0 (0x2bd)
 nap_state_lost   = 0x0 (0x2be)
 sprg_vdso= 0x0 (0x2c0)
 tm_scratch   = 0x80010280f032  (0x2c8)
 core_idle_state_ptr  = (null)  (0x2d0)
 thread_idle_state= 0x0 (0x2d8)
 thread_mask  = 0x0 (0x2d9)
 subcore_sibling_mask = 0x0 (0x2da)
 user_time= 0x18895 (0x2f0)
 system_time  = 0x11dc2 (0x2f8)
 user_time_scaled = 0x0 (0x300)
 starttime= 0xe64688b4688a  (0x308)
 starttime_user   = 0xe64688b466d1  (0x310)
 startspurr   = 0x1a79afea8 (0x318)
 utime_sspurr = 0x0 (0x320)
 stolen_time  = 0x0 (0x328)

Signed-off-by: Anshuman Khandual 
---
 arch/powerpc/xmon/xmon.c | 57 
 1 file changed, 53 insertions(+), 4 deletions(-)

diff --git a/arch/powerpc/xmon/xmon.c b/arch/powerpc/xmon/xmon.c
index bc1b066a..1e67c8b 100644
--- a/arch/powerpc/xmon/xmon.c
+++ b/arch/powerpc/xmon/xmon.c
@@ -2073,6 +2073,9 @@ static void xmon_rawdump (unsigned long adrs, long ndump)
 static void dump_one_paca(int cpu)
 {
struct paca_struct *p;
+#ifdef CONFIG_PPC_STD_MMU_64
+   int i = 0;
+#endif
 
if (setjmp(bus_error_jmp) != 0) {
printf("*** Error dumping paca for cpu 0x%x!\n", cpu);
@@ -2086,12 +2089,12 @@ static void dump_one_paca(int cpu)
 
printf("paca for cpu 0x%x @ %p:\n", cpu, p);
 
-   printf(" %-*s = %s\n", 16, "possible", cpu_possible(cpu) ? "yes" : 
"no");
-   printf(" %-*s = %s\n", 16, "present", cpu_present(cpu) ? "yes" : "no");
-   printf(" %-*s = %s\n", 16, "online", cpu_online(cpu) ? "yes" : "no");
+   printf(" %-*s = %s\n", 20, "possible", cpu_possible(cpu) ? "yes" : 
"no");
+   printf(" %-*s = %s\n", 20, "present", cpu_present(cpu) ? "yes" : "no");
+   printf(" %-*s = %s\n", 20, "online", cpu_online(cpu) ? "yes" : "no");
 
 #define DUMP(paca, name, format) \
-   printf(" %-*s = %#-*"format"\t(0x%lx)\n", 16, #name, 18, paca->name, \
+   printf(" %-*s = %#-*"format"\t(0x%lx)\n", 20, #name, 18, paca->name, \
offsetof(struct paca_stru

[PATCH 2/8] powerpc/slb: Rename all the 'slot' occurrences to 'entry'

2015-07-29 Thread Anshuman Khandual
These are essentially SLB individual slots with entries what we are
dealing with in these functions. Usage of both 'entry' and 'slot'
synonyms makes it real confusing sometimes. This patch makes it
uniform across the file by replacing all those 'slot's with 'entry's.

Signed-off-by: Anshuman Khandual 
---
 arch/powerpc/mm/slb.c | 7 +++
 1 file changed, 3 insertions(+), 4 deletions(-)

diff --git a/arch/powerpc/mm/slb.c b/arch/powerpc/mm/slb.c
index 62fafb3..faf9f0c 100644
--- a/arch/powerpc/mm/slb.c
+++ b/arch/powerpc/mm/slb.c
@@ -41,9 +41,9 @@ static void slb_allocate(unsigned long ea)
(((ssize) == MMU_SEGSIZE_256M)? ESID_MASK: ESID_MASK_1T)
 
 static inline unsigned long mk_esid_data(unsigned long ea, int ssize,
-unsigned long slot)
+unsigned long entry)
 {
-   return (ea & slb_esid_mask(ssize)) | SLB_ESID_V | slot;
+   return (ea & slb_esid_mask(ssize)) | SLB_ESID_V | entry;
 }
 
 static inline unsigned long mk_vsid_data(unsigned long ea, int ssize,
@@ -308,12 +308,11 @@ void slb_initialize(void)
lflags = SLB_VSID_KERNEL | linear_llp;
vflags = SLB_VSID_KERNEL | vmalloc_llp;
 
-   /* Invalidate the entire SLB (even slot 0) & all the ERATS */
+   /* Invalidate the entire SLB (even entry 0) & all the ERATS */
asm volatile("isync":::"memory");
asm volatile("slbmte  %0,%0"::"r" (0) : "memory");
asm volatile("isync; slbia; isync":::"memory");
create_shadowed_slbe(PAGE_OFFSET, mmu_kernel_ssize, lflags, 0);
-
create_shadowed_slbe(VMALLOC_START, mmu_kernel_ssize, vflags, 1);
 
/* For the boot cpu, we're running on the stack in init_thread_union,
-- 
2.1.0

___
Linuxppc-dev mailing list
Linuxppc-dev@lists.ozlabs.org
https://lists.ozlabs.org/listinfo/linuxppc-dev

Re: [4/5] powerpc/perf: Change name & type of 'pred' in power_pmu_bhrb_read

2015-07-29 Thread Anshuman Khandual
On 07/29/2015 08:55 AM, Michael Ellerman wrote:
> On Tue, 2015-30-06 at 08:20:30 UTC, Anshuman Khandual wrote:
>> > Branch record attributes 'mispred' and 'predicted' are single bit
>> > fields as defined in the perf ABI. Hence the data type of the field
>> > 'pred' used during BHRB processing should be changed from integer
>> > to bool. This patch also changes the name of the variable from 'pred'
>> > to 'mispred' making the logical inversion process more meaningful
>> > and readable.
> This whole function is a mess.
> 
> There's no good reason why we're doing the assignment to pred/mispred in two
> places to begin with, so if that was eliminated we wouldn't need a local for
> mispred to begin with.

Not sure whether I got this right. We are assigning mispred once with
the value (val & BHRB_PREDICTION) and then assigning mispred and it's
inversion to two different fields of the branch entry as required.

> 
> Then there's the type juggling, all of which probably works but is fishy and
> horrible.

With this patch and one more (2nd patch of the BHRB SW filter series)
patch, we are trying to make it better.

> 
> You take a u64, bitwise and it with a mask, assign that to a boolean, then 
> take

So that any residual positive value after the "AND" operation will
become logical TRUE for the boolean. We dont use any shifting here
as BHRB_PREDICTION checks for the right most (least significant) bit
in the sequence.

> the boolean, *bitwise* negate that and assign the result to a single bit
> bitfield.

This is getting fixed with a subsequent patch (2nd patch of the BHRB
SW filter series) in a new function called insert_branch.

+static inline void insert_branch(struct cpu_hw_events *cpuhw,
+   int index, u64 from, u64 to, bool mispred)
+{
+   cpuhw->bhrb_entries[index].from = from;
+   cpuhw->bhrb_entries[index].to = to;
+   cpuhw->bhrb_entries[index].mispred = mispred;
+   cpuhw->bhrb_entries[index].predicted = !mispred;
+}

___
Linuxppc-dev mailing list
Linuxppc-dev@lists.ozlabs.org
https://lists.ozlabs.org/listinfo/linuxppc-dev

[PATCH] powerpc/prom: Use DRCONF flags while processing detected LMBs

2015-08-06 Thread Anshuman Khandual
This patch just replaces hard coded values with existing
DRCONF flags while procesing detected LMBs from the device
tree. This does not change any functionality.

Signed-off-by: Anshuman Khandual 
---
 arch/powerpc/kernel/prom.c | 7 ---
 1 file changed, 4 insertions(+), 3 deletions(-)

diff --git a/arch/powerpc/kernel/prom.c b/arch/powerpc/kernel/prom.c
index 8b888b1..70a8cab 100644
--- a/arch/powerpc/kernel/prom.c
+++ b/arch/powerpc/kernel/prom.c
@@ -476,9 +476,10 @@ static int __init 
early_init_dt_scan_drconf_memory(unsigned long node)
flags = of_read_number(&dm[3], 1);
/* skip DRC index, pad, assoc. list index, flags */
dm += 4;
-   /* skip this block if the reserved bit is set in flags (0x80)
-  or if the block is not assigned to this partition (0x8) */
-   if ((flags & 0x80) || !(flags & 0x8))
+   /* skip this block if the reserved bit is set in flags
+  or if the block is not assigned to this partition */
+   if ((flags & DRCONF_MEM_RESERVED) ||
+   !(flags & DRCONF_MEM_ASSIGNED))
continue;
size = memblock_size;
rngs = 1;
-- 
2.1.0

___
Linuxppc-dev mailing list
Linuxppc-dev@lists.ozlabs.org
https://lists.ozlabs.org/listinfo/linuxppc-dev

Re: [V3] powerpc/irq: Enable some more exceptions in /proc/interrupts interface

2015-08-06 Thread Anshuman Khandual
On 08/04/2015 03:27 PM, Michael Ellerman wrote:
> On Mon, 2015-13-07 at 08:16:06 UTC, Anshuman Khandual wrote:
>> This patch enables facility unavailable exceptions for generic facility,
>> FPU, ALTIVEC and VSX in /proc/interrupts listing by incrementing their
>> newly added IRQ statistical counters as and when these exceptions happen.
>> This also adds couple of helper functions which will be called from within
>> the interrupt handler context to update their statistics. Similarly this
>> patch also enables alignment and program check exceptions as well.
> 
> ...
> 
>> diff --git a/arch/powerpc/kernel/exceptions-64s.S 
>> b/arch/powerpc/kernel/exceptions-64s.S
>> index 0a0399c2..a86180c 100644
>> --- a/arch/powerpc/kernel/exceptions-64s.S
>> +++ b/arch/powerpc/kernel/exceptions-64s.S
>> @@ -1158,6 +1158,7 @@ BEGIN_FTR_SECTION
>>  END_FTR_SECTION_IFSET(CPU_FTR_TM)
>>  #endif
>>  bl  load_up_fpu
>> +bl  fpu_unav_exceptions_count
> 
> Is it safe to call C code here?

Hmm, is it not ? I had that question but was not really sure. Dont
understand the difference between 'fast_exception_return' and
'ret_from_except' completely. Will converting the following sequence
of code

bl  load_up_fpu
+   bl  fpu_unav_exceptions_count
b   fast_exception_return

into

bl  load_up_fpu
RECONCILE_IRQ_STATE(r10, r11)
addir3,r1,STACK_FRAME_OVERHEAD
+   bl  fpu_unav_exceptions_count
b   ret_from_except

help solve the problem ?

___
Linuxppc-dev mailing list
Linuxppc-dev@lists.ozlabs.org
https://lists.ozlabs.org/listinfo/linuxppc-dev

Re: [PATCH] powerpc/prom: Use DRCONF flags while processing detected LMBs

2015-08-11 Thread Anshuman Khandual
On 08/11/2015 03:18 AM, Michael Ellerman wrote:
> On Fri, 2015-08-07 at 07:49 +0530, Madhavan Srinivasan wrote:
>> > 
>> > On Thursday 06 August 2015 06:35 PM, Anshuman Khandual wrote:
>>> > > This patch just replaces hard coded values with existing
>> > 
>> >   Please drop "This patch just" and start with "Replace hard ..."
>> > 
>> >   https://www.kernel.org/doc/Documentation/SubmittingPatches 
> Yeah I rewrote it as:
> 
> Replace hard coded values with existing DRCONF flags while procesing
> detected LMBs from the device tree. Does not change any functionality.

Thanks Michael.

___
Linuxppc-dev mailing list
Linuxppc-dev@lists.ozlabs.org
https://lists.ozlabs.org/listinfo/linuxppc-dev

Re: [PATCH 8/8] powerpc/xmon: Add some more elements to the existing PACA dump list

2015-08-11 Thread Anshuman Khandual
On 08/12/2015 11:35 AM, Michael Ellerman wrote:
> On Wed, 2015-07-29 at 12:40 +0530, Anshuman Khandual wrote:
>> This patch adds a set of new elements to the existing PACA dump list
>> inside an xmon session which can be listed below improving the overall
>> xmon debug support.
>>
>> (1) hmi_event_available
>> (2) dscr_default
>> (3) vmalloc_sllp
>> (4) slb_cache_ptr
>> (5) sprg_vdso
>> (6) tm_scratch
>> (7) core_idle_state_ptr
>> (8) thread_idle_state
>> (9) thread_mask
>> (10) slb_shadow
>> (11) pgd
>> (12) kernel_pgd
>> (13) tcd_ptr
>> (14) mc_kstack
>> (15) crit_kstack
>> (16) dbg_kstack
>> (17) user_time
>> (18) system_time
>> (19) user_time_scaled
>> (20) starttime
>> (21) starttime_user
>> (22) startspurr
>> (23) utime_sspurr
>> (24) stolen_time
> 
> Adding these makes the paca display much longer than 24 lines. I know in
> general we don't worry too much about folks on 80x24 green screens, but it's
> nice if xmon works OK on those. Or on virtual consoles that don't scroll for
> whatever reason.
> 
> So I'm going to hold off on this one until we have a way to display some of 
> the
> paca. I have an idea for that and will send a patch if it works.
> 

Sure, if you believe that is the best thing to do at the moment.

___
Linuxppc-dev mailing list
Linuxppc-dev@lists.ozlabs.org
https://lists.ozlabs.org/listinfo/linuxppc-dev

Re: [4/8] powerpc/slb: Add some helper functions to improve modularization

2015-08-11 Thread Anshuman Khandual
On 08/12/2015 09:41 AM, Michael Ellerman wrote:
> On Wed, 2015-29-07 at 07:10:01 UTC, Anshuman Khandual wrote:
>> > This patch adds the following six helper functions to help improve
>> > modularization and readability of the code.
>> > 
>> > (1) slb_invalidate_all:Invalidates the entire SLB
>> > (2) slb_invalidate:Invalidates SLB entries present in PACA
>> > (3) mmu_linear_vsid_flags: VSID flags for kernel linear mapping
>> > (4) mmu_virtual_vsid_flags:VSID flags for kernel virtual mapping
>> > (5) mmu_vmemmap_vsid_flags:VSID flags for kernel vmem mapping
>> > (6) mmu_io_vsid_flags: VSID flags for kernel I/O mapping
> That's too many changes for one patch, it's certainly not a single logical 
> change.
> 
> I'm happy with all the flag ones being done in a single patch, but please do
> the other two in separate patches.

Sure, will split this into three separate patches, also update the
in-code documentation as suggested on the [5/8] patch and then will
send out a new series.

___
Linuxppc-dev mailing list
Linuxppc-dev@lists.ozlabs.org
https://lists.ozlabs.org/listinfo/linuxppc-dev

Re: [PATCH] powerpc/xmon: Allow limiting the size of the paca display

2015-08-12 Thread Anshuman Khandual
On 08/12/2015 12:27 PM, Michael Ellerman wrote:
> The paca display is already more than 24 lines, which can be problematic
> if you have an old school 80x24 terminal, or more likely you are on a
> virtual terminal which does not scroll for whatever reason.
> 
> We'd like to expand the paca display even more, so add a way to limit
> the number of lines that are displayed.
> 
> This adds a third form of 'dp' which is 'dp # #', where the first number
> is the cpu, and the second is the number of lines to display.
> 
> Example output:
> 
>   5:mon> dp 3 6
>   paca for cpu 0x3 @ cfdc0d80:
>possible = yes
>present  = yes
>online   = yes
>lock_token   = 0x8000  (0xa)
>paca_index   = 0x3 (0x8)
> 
> Signed-off-by: Michael Ellerman 
> ---
>  arch/powerpc/xmon/xmon.c | 23 +++
>  1 file changed, 15 insertions(+), 8 deletions(-)
> 
> diff --git a/arch/powerpc/xmon/xmon.c b/arch/powerpc/xmon/xmon.c
> index e599259d84fc..6f44e9c07f34 100644
> --- a/arch/powerpc/xmon/xmon.c
> +++ b/arch/powerpc/xmon/xmon.c
> @@ -205,6 +205,7 @@ Commands:\n\
>  #ifdef CONFIG_PPC64
>"\
>dp[#]  dump paca for current cpu, or cpu #\n\
> +  dp##  dump paca for cpu #, only # lines\n\
>dpadump paca for all possible cpus\n"
>  #endif
>"\
> @@ -2070,9 +2071,10 @@ static void xmon_rawdump (unsigned long adrs, long 
> ndump)
>  }
>  
>  #ifdef CONFIG_PPC64
> -static void dump_one_paca(int cpu)
> +static void dump_one_paca(int cpu, int num_lines)
>  {
>   struct paca_struct *p;
> + int i;
>  
>   if (setjmp(bus_error_jmp) != 0) {
>   printf("*** Error dumping paca for cpu 0x%x!\n", cpu);
> @@ -2090,9 +2092,12 @@ static void dump_one_paca(int cpu)
>   printf(" %-*s = %s\n", 16, "present", cpu_present(cpu) ? "yes" : "no");
>   printf(" %-*s = %s\n", 16, "online", cpu_online(cpu) ? "yes" : "no");
>  
> + i = 4; /* We always print the first four lines */
> +
>  #define DUMP(paca, name, format) \
> - printf(" %-*s = %#-*"format"\t(0x%lx)\n", 16, #name, 18, paca->name, \
> - offsetof(struct paca_struct, name));
> + if (!num_lines || i++ < num_lines)

All look good except the fact that we are using 0 to signify that
there is no limit to the number of lines. Is not it bit confusing ?

___
Linuxppc-dev mailing list
Linuxppc-dev@lists.ozlabs.org
https://lists.ozlabs.org/listinfo/linuxppc-dev

Re: [V3] powerpc/irq: Enable some more exceptions in /proc/interrupts interface

2015-08-12 Thread Anshuman Khandual
On 08/09/2015 07:57 AM, Benjamin Herrenschmidt wrote:
> On Tue, 2015-08-04 at 19:57 +1000, Michael Ellerman wrote:
>> > On Mon, 2015-13-07 at 08:16:06 UTC, Anshuman Khandual wrote:
>>> > > This patch enables facility unavailable exceptions for generic facility,
>>> > > FPU, ALTIVEC and VSX in /proc/interrupts listing by incrementing their
>>> > > newly added IRQ statistical counters as and when these exceptions 
>>> > > happen.
>>> > > This also adds couple of helper functions which will be called from 
>>> > > within
>>> > > the interrupt handler context to update their statistics. Similarly this
>>> > > patch also enables alignment and program check exceptions as well.
>> > 
>> > ...
>> > 
>>> > > diff --git a/arch/powerpc/kernel/exceptions-64s.S 
>>> > > b/arch/powerpc/kernel/exceptions-64s.S
>>> > > index 0a0399c2..a86180c 100644
>>> > > --- a/arch/powerpc/kernel/exceptions-64s.S
>>> > > +++ b/arch/powerpc/kernel/exceptions-64s.S
>>> > > @@ -1158,6 +1158,7 @@ BEGIN_FTR_SECTION
>>> > >  END_FTR_SECTION_IFSET(CPU_FTR_TM)
>>> > >  #endif
>>> > > bl  load_up_fpu
>>> > > +   bl  fpu_unav_exceptions_count
>> > 
>> > Is it safe to call C code here?
> Even if it was (at some stage it wasn't, I'd have to look very closely
> to see what's the situation now), we certainly don't want to add
> overhead to load_up_fpu.

As I had already mentioned in the V2 thread of this patch, the
FPU performance with this patch being applied is still very much
comparable to the kernel without this patch. Though I have not
verified whether this still holds true with the new changes being
proposed in exceptions-64s.S (earlier reply in this thread) to
make the C function call safer.

Average of 1000 iterations (context_switch2 --fp 0 0)

Withthe patch : 322599.57  (Average of 1000 results)
Without the patch : 320464.924 (Average of 1000 results)

With standard deviation of the results.

6029.1407073288 (with patch ) 5941.7684079774 (without patch)

Wondering if the result above still does not convince us
that FPU performance might not be getting hit because of
this patch, let me know if we need to do more experiments.

___
Linuxppc-dev mailing list
Linuxppc-dev@lists.ozlabs.org
https://lists.ozlabs.org/listinfo/linuxppc-dev

Re: [V3] powerpc/irq: Enable some more exceptions in /proc/interrupts interface

2015-08-19 Thread Anshuman Khandual
On 08/14/2015 08:22 AM, Michael Ellerman wrote:
> On Thu, 2015-08-06 at 18:54 +0530, Anshuman Khandual wrote:
>> On 08/04/2015 03:27 PM, Michael Ellerman wrote:
>>> On Mon, 2015-13-07 at 08:16:06 UTC, Anshuman Khandual wrote:
>>>> This patch enables facility unavailable exceptions for generic facility,
>>>> FPU, ALTIVEC and VSX in /proc/interrupts listing by incrementing their
>>>> newly added IRQ statistical counters as and when these exceptions happen.
>>>> This also adds couple of helper functions which will be called from within
>>>> the interrupt handler context to update their statistics. Similarly this
>>>> patch also enables alignment and program check exceptions as well.
>>>
>>> ...
>>>
>>>> diff --git a/arch/powerpc/kernel/exceptions-64s.S 
>>>> b/arch/powerpc/kernel/exceptions-64s.S
>>>> index 0a0399c2..a86180c 100644
>>>> --- a/arch/powerpc/kernel/exceptions-64s.S
>>>> +++ b/arch/powerpc/kernel/exceptions-64s.S
>>>> @@ -1158,6 +1158,7 @@ BEGIN_FTR_SECTION
>>>>  END_FTR_SECTION_IFSET(CPU_FTR_TM)
>>>>  #endif
>>>>bl  load_up_fpu
>>>> +  bl  fpu_unav_exceptions_count
>>>
>>> Is it safe to call C code here?
>>
>> Hmm, is it not ? I had that question but was not really sure. Dont
>> understand the difference between 'fast_exception_return' and
>> 'ret_from_except' completely.
> 
> If you're "not really sure" it's correct, please say so in the change log!

Yeah I should have written that up some where after the commit
message (after "---"). Its my bad, will take care of this next
time around.

> 
> I'd rather you didn't send me patches with possibly subtle bugs in core code.

Michael, I understand your concern. I was just trying to add
new entries in there which would help us. Wondering whats our
plan for this patch, if we change it as I had proposed earlier,
will it be good enough.

___
Linuxppc-dev mailing list
Linuxppc-dev@lists.ozlabs.org
https://lists.ozlabs.org/listinfo/linuxppc-dev

Re: [PATCH] powerpc: Convert out of line __arch_hweight to inline

2013-08-06 Thread Anshuman Khandual
> 
>  obj-$(CONFIG_PPC64)  += copypage_64.o copyuser_64.o \
>  memcpy_64.o usercopy_64.o mem_64.o string.o \
> -checksum_wrappers_64.o hweight_64.o \
> +checksum_wrappers_64.o \
>  copyuser_power7.o string_64.o copypage_power7.o \
>  memcpy_power7.o
>  obj-$(CONFIG_PPC_EMULATE_SSTEP)  += sstep.o ldstfp.o
> 

As you have moved all the code from hweight_64.S file and removed it from the
compilation list in the Makefile, you need to delete the file from the 
directory as well.

___
Linuxppc-dev mailing list
Linuxppc-dev@lists.ozlabs.org
https://lists.ozlabs.org/listinfo/linuxppc-dev


Re: [RFC PATCH 1/9] powerpc: Split the common exception prolog logic into two section.

2013-08-07 Thread Anshuman Khandual
On 08/07/2013 03:08 PM, Mahesh J Salgaonkar wrote:
> From: Mahesh Salgaonkar 
> 
> This patch splits the common exception prolog logic into two parts to
> facilitate reuse of existing code in the next patch. The second part will
> be reused in the machine check exception routine in the next patch.
> 

Please avoid describing the functionality as a requirement for upcoming
sibling patches. Justification to split the code should be generic functional
or code organizational requirement. We should avoid the word "next patch" in
the commit message, as it would be confusing when you read it later point of
time. The commit message should be self sufficient pertaining to the exact
code change set in consideration.

___
Linuxppc-dev mailing list
Linuxppc-dev@lists.ozlabs.org
https://lists.ozlabs.org/listinfo/linuxppc-dev


Re: [RFC PATCH 2/9] powerpc: handle machine check in Linux host.

2013-08-07 Thread Anshuman Khandual
On 08/07/2013 03:08 PM, Mahesh J Salgaonkar wrote:
> From: Mahesh Salgaonkar 
> 
> Move machine check entry point into Linux. So far we were dependent on
> firmware to decode MCE error details and handover the high level info to OS.
> 
> This patch introduces early machine check routine that saves the MCE
> information (srr1, srr0, dar and dsisr) to the emergency stack. We allocate
> stack frame on emergency stack and set the r1 accordingly. This allows us
> to be prepared to take another exception without loosing context. One thing
> to note here that, if we get another machine check while ME bit is off then
> we risk a checkstop. Hence we restrict ourselves to save only MCE information
> and turn the ME bit on.
> 
> This is the code flow:
> 
>   Machine Check Interrupt
>   |
>   V
>  0x200 vector   ME=0, IR=0, DR=0
>   |
>   V
>   +---+
>   |machine_check_pSeries_early:   | ME=0, IR=0, DR=0
>   |   Alloc frame on emergency stack  |
>   |   Save srr1, srr0, dar and dsisr on stack |
>   +---+
>   |
>   (ME=1, IR=0, DR=0, RFID)
>   |
>   V
>   machine_check_handle_earlyME=1, IR=0, DR=0
>   |
>   V
>   +---+
>   |   machine_check_early (r3=pt_regs)| ME=1, IR=0, DR=0
>   |   Things to do: (in next patches) |
>   |   Flush SLB for SLB errors|
>   |   Flush TLB for TLB errors|
>   |   Decode and save MCE info|
>   +---+
>   |
>   (Fall through existing exception handler routine.)
>   |
>   V
>   machine_check_pSerie  ME=1, IR=0, DR=0
>   |
>   (ME=1, IR=1, DR=1, RFID)
>   |
>   V
>   machine_check_common  ME=1, IR=1, DR=1
>   .
>   .
>   .
> 
> 
> Signed-off-by: Mahesh Salgaonkar 
> ---
>  arch/powerpc/include/asm/exception-64s.h |   43 ++
>  arch/powerpc/kernel/exceptions-64s.S |   50 
> +-
>  arch/powerpc/kernel/traps.c  |   12 +++
>  3 files changed, 104 insertions(+), 1 deletion(-)
> 
> diff --git a/arch/powerpc/include/asm/exception-64s.h 
> b/arch/powerpc/include/asm/exception-64s.h
> index 2386d40..c5d2cbc 100644
> --- a/arch/powerpc/include/asm/exception-64s.h
> +++ b/arch/powerpc/include/asm/exception-64s.h
> @@ -174,6 +174,49 @@ END_FTR_SECTION_NESTED(ftr,ftr,943)
>  #define EXCEPTION_PROLOG_1(area, extra, vec) \
>   __EXCEPTION_PROLOG_1(area, extra, vec)
> 
> +/*
> + * Register contents:
> + * R12   = interrupt vector
> + * R13   = PACA
> + * R9= CR
> + * R11 & R12 is saved on PACA_EXMC
> + *
> + * Swicth to emergency stack and handle re-entrancy (though we currently
> + * don't test for overflow). Save MCE registers srr1, srr0, dar and
> + * dsisr and then turn the ME bit on.
> + */
> +#define __EARLY_MACHINE_CHECK_HANDLER(area, label)   \
> + /* Check if we are laready using emergency stack. */\
> + ld  r10,PACAEMERGSP(r13);   \
> + subir10,r10,THREAD_SIZE;\
> + rldicr  r10,r10,0,(63 - THREAD_SHIFT);  \
> + rldicr  r11,r1,0,(63 - THREAD_SHIFT);   \
> + cmpdr10,r11;/* Are we using emergency stack? */ \
> + mr  r11,r1; /* Save current stack pointer */\
> + beq 0f; \
> + ld  r1,PACAEMERGSP(r13);/* Use emergency stack */   \
> +0:   subir1,r1,INT_FRAME_SIZE;   /* alloc stack frame */ \
> + std r11,GPR1(r1);   \
> + std r11,0(r1);  /* make stack chain pointer */  \
> + mfspr   r11,SPRN_SRR0;  /* Save SRR0 */ \
> + std r11,_NIP(r1);   \
> + mfspr   r11,SPRN_SRR1;  /* Save SRR1 */ \
> + std r11,_MSR(r1);   \
> + mfspr   r11,SPRN_DAR;   /* Save DAR */  \
> + std r11,_DAR(r1);   \
> + mfspr   r11,SPRN_DSISR; /* Sav

[PATCH V2 1/6] perf: New conditional branch filter criteria in branch stack sampling

2013-08-29 Thread Anshuman Khandual
POWER8 PMU based BHRB supports filtering for conditional branches.
This patch introduces new branch filter PERF_SAMPLE_BRANCH_COND which
will extend the existing perf ABI. Other architectures can provide
this functionality with either HW filtering support (if present) or
with SW filtering of instructions.

Signed-off-by: Anshuman Khandual 
Reviewed-by: Stephane Eranian 
---
 include/uapi/linux/perf_event.h | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

diff --git a/include/uapi/linux/perf_event.h b/include/uapi/linux/perf_event.h
index 0b1df41..5da52b6 100644
--- a/include/uapi/linux/perf_event.h
+++ b/include/uapi/linux/perf_event.h
@@ -160,8 +160,9 @@ enum perf_branch_sample_type {
PERF_SAMPLE_BRANCH_ABORT_TX = 1U << 7, /* transaction aborts */
PERF_SAMPLE_BRANCH_IN_TX= 1U << 8, /* in transaction */
PERF_SAMPLE_BRANCH_NO_TX= 1U << 9, /* not in transaction */
+   PERF_SAMPLE_BRANCH_COND = 1U << 10, /* conditional branches */
 
-   PERF_SAMPLE_BRANCH_MAX  = 1U << 10, /* non-ABI */
+   PERF_SAMPLE_BRANCH_MAX  = 1U << 11, /* non-ABI */
 };
 
 #define PERF_SAMPLE_BRANCH_PLM_ALL \
-- 
1.7.11.7

___
Linuxppc-dev mailing list
Linuxppc-dev@lists.ozlabs.org
https://lists.ozlabs.org/listinfo/linuxppc-dev


<    1   2   3   4   5   6   7   8   9   10   >