[PATCH v3 2/4] selftests/powerpc: Move get_auxv_entry() to harness.c

2015-04-06 Thread Sam Bobroff
Move get_auxv_entry() from pmu/lib.c up to harness.c in order to make
it available to other tests.

Signed-off-by: Sam Bobroff 
---
 tools/testing/selftests/powerpc/harness.c |   47 +
 tools/testing/selftests/powerpc/pmu/lib.c |   47 -
 tools/testing/selftests/powerpc/pmu/lib.h |1 -
 tools/testing/selftests/powerpc/utils.h   |2 +-
 4 files changed, 48 insertions(+), 49 deletions(-)

diff --git a/tools/testing/selftests/powerpc/harness.c 
b/tools/testing/selftests/powerpc/harness.c
index 8ebc58a..f7997af 100644
--- a/tools/testing/selftests/powerpc/harness.c
+++ b/tools/testing/selftests/powerpc/harness.c
@@ -11,6 +11,10 @@
 #include 
 #include 
 #include 
+#include 
+#include 
+#include 
+#include 
 
 #include "subunit.h"
 #include "utils.h"
@@ -112,3 +116,46 @@ int test_harness(int (test_function)(void), char *name)
 
return rc;
 }
+
+static char auxv[4096];
+
+void *get_auxv_entry(int type)
+{
+   ElfW(auxv_t) *p;
+   void *result;
+   ssize_t num;
+   int fd;
+
+   fd = open("/proc/self/auxv", O_RDONLY);
+   if (fd == -1) {
+   perror("open");
+   return NULL;
+   }
+
+   result = NULL;
+
+   num = read(fd, auxv, sizeof(auxv));
+   if (num < 0) {
+   perror("read");
+   goto out;
+   }
+
+   if (num > sizeof(auxv)) {
+   printf("Overflowed auxv buffer\n");
+   goto out;
+   }
+
+   p = (ElfW(auxv_t) *)auxv;
+
+   while (p->a_type != AT_NULL) {
+   if (p->a_type == type) {
+   result = (void *)p->a_un.a_val;
+   break;
+   }
+
+   p++;
+   }
+out:
+   close(fd);
+   return result;
+}
diff --git a/tools/testing/selftests/powerpc/pmu/lib.c 
b/tools/testing/selftests/powerpc/pmu/lib.c
index 9768dea..a07104c 100644
--- a/tools/testing/selftests/powerpc/pmu/lib.c
+++ b/tools/testing/selftests/powerpc/pmu/lib.c
@@ -5,15 +5,10 @@
 
 #define _GNU_SOURCE/* For CPU_ZERO etc. */
 
-#include 
 #include 
-#include 
-#include 
 #include 
 #include 
 #include 
-#include 
-#include 
 #include 
 
 #include "utils.h"
@@ -256,45 +251,3 @@ out:
return rc;
 }
 
-static char auxv[4096];
-
-void *get_auxv_entry(int type)
-{
-   ElfW(auxv_t) *p;
-   void *result;
-   ssize_t num;
-   int fd;
-
-   fd = open("/proc/self/auxv", O_RDONLY);
-   if (fd == -1) {
-   perror("open");
-   return NULL;
-   }
-
-   result = NULL;
-
-   num = read(fd, auxv, sizeof(auxv));
-   if (num < 0) {
-   perror("read");
-   goto out;
-   }
-
-   if (num > sizeof(auxv)) {
-   printf("Overflowed auxv buffer\n");
-   goto out;
-   }
-
-   p = (ElfW(auxv_t) *)auxv;
-
-   while (p->a_type != AT_NULL) {
-   if (p->a_type == type) {
-   result = (void *)p->a_un.a_val;
-   break;
-   }
-
-   p++;
-   }
-out:
-   close(fd);
-   return result;
-}
diff --git a/tools/testing/selftests/powerpc/pmu/lib.h 
b/tools/testing/selftests/powerpc/pmu/lib.h
index 0f0339c..ca5d72a 100644
--- a/tools/testing/selftests/powerpc/pmu/lib.h
+++ b/tools/testing/selftests/powerpc/pmu/lib.h
@@ -29,7 +29,6 @@ extern int notify_parent(union pipe write_pipe);
 extern int notify_parent_of_error(union pipe write_pipe);
 extern pid_t eat_cpu(int (test_function)(void));
 extern bool require_paranoia_below(int level);
-extern void *get_auxv_entry(int type);
 
 struct addr_range {
uint64_t first, last;
diff --git a/tools/testing/selftests/powerpc/utils.h 
b/tools/testing/selftests/powerpc/utils.h
index a93777a..64f53cd 100644
--- a/tools/testing/selftests/powerpc/utils.h
+++ b/tools/testing/selftests/powerpc/utils.h
@@ -19,7 +19,7 @@ typedef uint8_t u8;
 
 
 int test_harness(int (test_function)(void), char *name);
-
+extern void *get_auxv_entry(int type);
 
 /* Yes, this is evil */
 #define FAIL_IF(x) \
-- 
1.7.10.4

___
Linuxppc-dev mailing list
Linuxppc-dev@lists.ozlabs.org
https://lists.ozlabs.org/listinfo/linuxppc-dev

[PATCH v3 0/4] powerpc/tm: Abort syscalls in active transactions

2015-04-06 Thread Sam Bobroff

See the first patch for a description of the reasoning behind this
change.

This set includes the change, a kernel selftest for it and
some slight refactoring of the selftest code.


v3:
Patch 1/4: powerpc/tm: Abort syscalls in active transactions

Use "TABORT()" macro to allow building on versions of gcc that don't support
the "tabort." instruction.


v2:
Patch 1/4: powerpc/tm: Abort syscalls in active transactions

Also update the failure code table.

Patch 3/4: selftests/powerpc: Add transactional syscall test

Further testing has shown that the success or failure of the transactions was
affected by minor changes to the code, compiler optimisation and linker
settings.

To address this, I've moved the transactional part of the test to a separate
function, written in assembly. I've also extended the test to as many
transactions as it can fit into ten seconds, to better test for failures that
occur more rarely. This has stabilised the results, and it's no longer
necessary to use special compiler or linker flags.

Patch 4/4: powerpc/tm: Correct minor documentation typos

Discovered some typos while updating the documentation.


Sam Bobroff (4):
  powerpc/tm: Abort syscalls in active transactions
  selftests/powerpc: Move get_auxv_entry() to harness.c
  selftests/powerpc: Add transactional syscall test
  powerpc/tm: Correct minor documentation typos

 Documentation/powerpc/transactional_memory.txt |   36 +++
 arch/powerpc/include/uapi/asm/tm.h |2 +-
 arch/powerpc/kernel/entry_64.S |   19 
 tools/testing/selftests/powerpc/harness.c  |   47 +
 tools/testing/selftests/powerpc/pmu/lib.c  |   47 -
 tools/testing/selftests/powerpc/pmu/lib.h  |1 -
 tools/testing/selftests/powerpc/tm/.gitignore  |1 +
 tools/testing/selftests/powerpc/tm/Makefile|4 +-
 .../testing/selftests/powerpc/tm/tm-syscall-asm.S  |   27 +
 .../testing/selftests/powerpc/tm/tm-syscall-asm.h  |2 +
 tools/testing/selftests/powerpc/tm/tm-syscall.c|  109 
 tools/testing/selftests/powerpc/utils.h|2 +-
 12 files changed, 228 insertions(+), 69 deletions(-)
 create mode 100644 tools/testing/selftests/powerpc/tm/tm-syscall-asm.S
 create mode 100644 tools/testing/selftests/powerpc/tm/tm-syscall-asm.h
 create mode 100644 tools/testing/selftests/powerpc/tm/tm-syscall.c

-- 
1.7.10.4

___
Linuxppc-dev mailing list
Linuxppc-dev@lists.ozlabs.org
https://lists.ozlabs.org/listinfo/linuxppc-dev

[PATCH v3 1/4] powerpc/tm: Abort syscalls in active transactions

2015-04-06 Thread Sam Bobroff
This patch changes the syscall handler to doom (tabort) active
transactions when a syscall is made and return immediately without
performing the syscall.

Currently, the system call instruction automatically suspends an
active transaction which causes side effects to persist when an active
transaction fails.

This does change the kernel's behaviour, but in a way that was
documented as unsupported. It doesn't reduce functionality because
syscalls will still be performed after tsuspend. It also provides a
consistent interface and makes the behaviour of user code
substantially the same across powerpc and platforms that do not
support suspended transactions (e.g. x86 and s390).

Performance measurements using
http://ozlabs.org/~anton/junkcode/null_syscall.c
indicate the cost of a system call increases by about 0.5%.

Signed-off-by: Sam Bobroff 
Acked-By: Michael Neuling 
---
v3:

Use "TABORT()" macro to allow building on versions of gcc that don't support
the "tabort." instruction.

v2:

Also update the failure code table.

 Documentation/powerpc/transactional_memory.txt |   32 
 arch/powerpc/include/uapi/asm/tm.h |2 +-
 arch/powerpc/kernel/entry_64.S |   19 ++
 3 files changed, 36 insertions(+), 17 deletions(-)

diff --git a/Documentation/powerpc/transactional_memory.txt 
b/Documentation/powerpc/transactional_memory.txt
index 9791e98..98b39af 100644
--- a/Documentation/powerpc/transactional_memory.txt
+++ b/Documentation/powerpc/transactional_memory.txt
@@ -74,22 +74,23 @@ Causes of transaction aborts
 Syscalls
 
 
-Performing syscalls from within transaction is not recommended, and can lead
-to unpredictable results.
+Syscalls made from within an active transaction will not be performed and the
+transaction will be doomed by the kernel with the failure code TM_CAUSE_SYSCALL
+| TM_CAUSE_PERSISTENT.
 
-Syscalls do not by design abort transactions, but beware: The kernel code will
-not be running in transactional state.  The effect of syscalls will always
-remain visible, but depending on the call they may abort your transaction as a
-side-effect, read soon-to-be-aborted transactional data that should not remain
-invisible, etc.  If you constantly retry a transaction that constantly aborts
-itself by calling a syscall, you'll have a livelock & make no progress.
+Syscalls made from within a suspended transaction are performed as normal and
+the transaction is not explicitly doomed by the kernel.  However, what the
+kernel does to perform the syscall may result in the transaction being doomed
+by the hardware.  The syscall is performed in suspended mode so any side
+effects will be persistent, independent of transaction success or failure.  No
+guarantees are provided by the kernel about which syscalls will affect
+transaction success.
 
-Simple syscalls (e.g. sigprocmask()) "could" be OK.  Even things like write()
-from, say, printf() should be OK as long as the kernel does not access any
-memory that was accessed transactionally.
-
-Consider any syscalls that happen to work as debug-only -- not recommended for
-production use.  Best to queue them up till after the transaction is over.
+Care must be taken when relying on syscalls to abort during active transactions
+if the calls are made via a library.  Libraries may cache values (which may
+give the appearance of success) or perform operations that cause transaction
+failure before entering the kernel (which may produce different failure codes).
+Examples are glibc's getpid() and lazy symbol resolution.
 
 
 Signals
@@ -176,8 +177,7 @@ kernel aborted a transaction:
  TM_CAUSE_RESCHED   Thread was rescheduled.
  TM_CAUSE_TLBI  Software TLB invalide.
  TM_CAUSE_FAC_UNAV  FP/VEC/VSX unavailable trap.
- TM_CAUSE_SYSCALL   Currently unused; future syscalls that must abort
-transactions for consistency will use this.
+ TM_CAUSE_SYSCALL   Syscall from active transaction.
  TM_CAUSE_SIGNALSignal delivered.
  TM_CAUSE_MISC  Currently unused.
  TM_CAUSE_ALIGNMENT Alignment fault.
diff --git a/arch/powerpc/include/uapi/asm/tm.h 
b/arch/powerpc/include/uapi/asm/tm.h
index 5d836b7..5047659 100644
--- a/arch/powerpc/include/uapi/asm/tm.h
+++ b/arch/powerpc/include/uapi/asm/tm.h
@@ -11,7 +11,7 @@
 #define TM_CAUSE_RESCHED   0xde
 #define TM_CAUSE_TLBI  0xdc
 #define TM_CAUSE_FAC_UNAV  0xda
-#define TM_CAUSE_SYSCALL   0xd8  /* future use */
+#define TM_CAUSE_SYSCALL   0xd8
 #define TM_CAUSE_MISC  0xd6  /* future use */
 #define TM_CAUSE_SIGNAL0xd4
 #define TM_CAUSE_ALIGNMENT 0xd2
diff --git a/arch/powerpc/kernel/entry_64.S b/arch/powerpc/kernel/entry_64.S
index d180caf2..6374af8 100644
--- a/arch/powerpc/kernel/entry_64.S
+++ b/arch/powerpc/kernel/entry_64.S
@@ -34,6 +34,7 @@
 #include 
 #include 
 #in

[PATCH v3 3/4] selftests/powerpc: Add transactional syscall test

2015-04-06 Thread Sam Bobroff
Check that a syscall made during an active transaction will fail with
the correct failure code and that one made during a suspended
transaction will succeed.

Signed-off-by: Sam Bobroff 
---
v2:

Further testing has shown that the success or failure of the transactions was
affected by minor changes to the code, compiler optimisation and linker
settings.

To address this, I've moved the transactional part of the test to a separate
function, written in assembly. I've also extended the test to as many
transactions as it can fit into ten seconds, to better test for failures that
occur more rarely. This has stabilised the results, and it's no longer
necessary to use special compiler or linker flags.

 tools/testing/selftests/powerpc/tm/.gitignore  |1 +
 tools/testing/selftests/powerpc/tm/Makefile|4 +-
 .../testing/selftests/powerpc/tm/tm-syscall-asm.S  |   27 +
 .../testing/selftests/powerpc/tm/tm-syscall-asm.h  |2 +
 tools/testing/selftests/powerpc/tm/tm-syscall.c|  109 
 5 files changed, 142 insertions(+), 1 deletion(-)
 create mode 100644 tools/testing/selftests/powerpc/tm/tm-syscall-asm.S
 create mode 100644 tools/testing/selftests/powerpc/tm/tm-syscall-asm.h
 create mode 100644 tools/testing/selftests/powerpc/tm/tm-syscall.c

diff --git a/tools/testing/selftests/powerpc/tm/.gitignore 
b/tools/testing/selftests/powerpc/tm/.gitignore
index 33d02cc..2699635d 100644
--- a/tools/testing/selftests/powerpc/tm/.gitignore
+++ b/tools/testing/selftests/powerpc/tm/.gitignore
@@ -1 +1,2 @@
 tm-resched-dscr
+tm-syscall
diff --git a/tools/testing/selftests/powerpc/tm/Makefile 
b/tools/testing/selftests/powerpc/tm/Makefile
index 2cede23..93bbff3 100644
--- a/tools/testing/selftests/powerpc/tm/Makefile
+++ b/tools/testing/selftests/powerpc/tm/Makefile
@@ -1,8 +1,10 @@
-PROGS := tm-resched-dscr
+PROGS := tm-resched-dscr tm-syscall
+CFLAGS:=$(CFLAGS) -mhtm
 
 all: $(PROGS)
 
 $(PROGS): ../harness.c
+tm-syscall: tm-syscall-asm.S
 
 run_tests: all
@-for PROG in $(PROGS); do \
diff --git a/tools/testing/selftests/powerpc/tm/tm-syscall-asm.S 
b/tools/testing/selftests/powerpc/tm/tm-syscall-asm.S
new file mode 100644
index 000..2b2daa7
--- /dev/null
+++ b/tools/testing/selftests/powerpc/tm/tm-syscall-asm.S
@@ -0,0 +1,27 @@
+#include 
+#include 
+
+   .text
+FUNC_START(getppid_tm_active_impl)
+   tbegin.
+   beq 1f
+   li  r0, __NR_getppid
+   sc
+   tend.
+   blr
+1:
+   li  r3, -1
+   blr
+
+FUNC_START(getppid_tm_suspended_impl)
+   tbegin.
+   beq 1f
+   li  r0, __NR_getppid
+   tsuspend.
+   sc
+   tresume.
+   tend.
+   blr
+1:
+   li  r3, -1
+   blr
diff --git a/tools/testing/selftests/powerpc/tm/tm-syscall-asm.h 
b/tools/testing/selftests/powerpc/tm/tm-syscall-asm.h
new file mode 100644
index 000..6136328
--- /dev/null
+++ b/tools/testing/selftests/powerpc/tm/tm-syscall-asm.h
@@ -0,0 +1,2 @@
+extern int getppid_tm_active_impl(void);
+extern int getppid_tm_suspended_impl(void);
diff --git a/tools/testing/selftests/powerpc/tm/tm-syscall.c 
b/tools/testing/selftests/powerpc/tm/tm-syscall.c
new file mode 100644
index 000..ff3b15c
--- /dev/null
+++ b/tools/testing/selftests/powerpc/tm/tm-syscall.c
@@ -0,0 +1,109 @@
+/* Test the kernel's system call code to ensure that a system call
+ * made from within an active HTM transaction is aborted with the
+ * correct failure code.
+ * Conversely, ensure that a system call made from within a
+ * suspended transaction can succeed.
+ */
+
+#include 
+#include 
+#include 
+#include 
+#include 
+#include 
+#include 
+#include 
+
+#include "utils.h"
+#include "tm-syscall-asm.h"
+
+unsigned retries = 0;
+
+#define TEST_DURATION 10 /* seconds */
+#define TM_RETRIES 100
+
+long failure_code(void)
+{
+   return __builtin_get_texasr() >> 56;
+}
+
+bool failure_is_persistent(void)
+{
+   return (failure_code() & TM_CAUSE_PERSISTENT) == TM_CAUSE_PERSISTENT;
+}
+
+bool failure_is_syscall(void)
+{
+   return (failure_code() & TM_CAUSE_SYSCALL) == TM_CAUSE_SYSCALL;
+}
+
+pid_t getppid_tm(bool suspend)
+{
+   int i;
+   pid_t pid;
+
+   for (i = 0; i < TM_RETRIES; i++) {
+   if (suspend)
+   pid = getppid_tm_suspended_impl();
+   else
+   pid = getppid_tm_active_impl();
+   if (pid >= 0)
+   return pid;
+   if (failure_is_persistent()) {
+   if (failure_is_syscall())
+   return -1;
+   printf("Unexpected persistent transaction failure.\n");
+   printf("TEXASR 0x%016lx, TFIAR 0x%016lx.\n",
+  __builtin_get_texasr(), __builtin_get_tfiar());
+   exit(-1);
+   }
+   retries++;

[PATCH v3 4/4] powerpc/tm: Correct minor documentation typos

2015-04-06 Thread Sam Bobroff
Signed-off-by: Sam Bobroff 
---
v2:

Discovered some typos while updating the documentation.

 Documentation/powerpc/transactional_memory.txt |4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/Documentation/powerpc/transactional_memory.txt 
b/Documentation/powerpc/transactional_memory.txt
index 98b39af..ba0a2a4 100644
--- a/Documentation/powerpc/transactional_memory.txt
+++ b/Documentation/powerpc/transactional_memory.txt
@@ -175,7 +175,7 @@ These are defined in , and distinguish different 
reasons why the
 kernel aborted a transaction:
 
  TM_CAUSE_RESCHED   Thread was rescheduled.
- TM_CAUSE_TLBI  Software TLB invalide.
+ TM_CAUSE_TLBI  Software TLB invalid.
  TM_CAUSE_FAC_UNAV  FP/VEC/VSX unavailable trap.
  TM_CAUSE_SYSCALL   Syscall from active transaction.
  TM_CAUSE_SIGNALSignal delivered.
@@ -185,7 +185,7 @@ kernel aborted a transaction:
 
 These can be checked by the user program's abort handler as TEXASR[0:7].  If
 bit 7 is set, it indicates that the error is consider persistent.  For example
-a TM_CAUSE_ALIGNMENT will be persistent while a TM_CAUSE_RESCHED will not.q
+a TM_CAUSE_ALIGNMENT will be persistent while a TM_CAUSE_RESCHED will not.
 
 GDB
 ===
-- 
1.7.10.4

___
Linuxppc-dev mailing list
Linuxppc-dev@lists.ozlabs.org
https://lists.ozlabs.org/listinfo/linuxppc-dev

[PATCH 1/1] KVM: PPC: Book3S: correct width in XER handling

2015-05-19 Thread Sam Bobroff
In 64 bit kernels, the Fixed Point Exception Register (XER) is a 64
bit field (e.g. in kvm_regs and kvm_vcpu_arch) and in most places it is
accessed as such.

This patch corrects places where it is accessed as a 32 bit field by a
64 bit kernel.  In some cases this is via a 32 bit load or store
instruction which, depending on endianness, will cause either the
lower or upper 32 bits to be missed.  In another case it is cast as a
u32, causing the upper 32 bits to be cleared.

This patch corrects those places by extending the access methods to
64 bits.

Signed-off-by: Sam Bobroff 
---

 arch/powerpc/include/asm/kvm_book3s.h   |4 ++--
 arch/powerpc/kvm/book3s_hv_rmhandlers.S |6 +++---
 arch/powerpc/kvm/book3s_segment.S   |4 ++--
 3 files changed, 7 insertions(+), 7 deletions(-)

diff --git a/arch/powerpc/include/asm/kvm_book3s.h 
b/arch/powerpc/include/asm/kvm_book3s.h
index b91e74a..05a875a 100644
--- a/arch/powerpc/include/asm/kvm_book3s.h
+++ b/arch/powerpc/include/asm/kvm_book3s.h
@@ -225,12 +225,12 @@ static inline u32 kvmppc_get_cr(struct kvm_vcpu *vcpu)
return vcpu->arch.cr;
 }
 
-static inline void kvmppc_set_xer(struct kvm_vcpu *vcpu, u32 val)
+static inline void kvmppc_set_xer(struct kvm_vcpu *vcpu, ulong val)
 {
vcpu->arch.xer = val;
 }
 
-static inline u32 kvmppc_get_xer(struct kvm_vcpu *vcpu)
+static inline ulong kvmppc_get_xer(struct kvm_vcpu *vcpu)
 {
return vcpu->arch.xer;
 }
diff --git a/arch/powerpc/kvm/book3s_hv_rmhandlers.S 
b/arch/powerpc/kvm/book3s_hv_rmhandlers.S
index 4d70df2..d75be59 100644
--- a/arch/powerpc/kvm/book3s_hv_rmhandlers.S
+++ b/arch/powerpc/kvm/book3s_hv_rmhandlers.S
@@ -870,7 +870,7 @@ END_FTR_SECTION_IFCLR(CPU_FTR_ARCH_207S)
blt hdec_soon
 
ld  r6, VCPU_CTR(r4)
-   lwz r7, VCPU_XER(r4)
+   ld  r7, VCPU_XER(r4)
 
mtctr   r6
mtxer   r7
@@ -1103,7 +1103,7 @@ END_FTR_SECTION_IFSET(CPU_FTR_HAS_PPR)
mfctr   r3
mfxer   r4
std r3, VCPU_CTR(r9)
-   stw r4, VCPU_XER(r9)
+   std r4, VCPU_XER(r9)
 
/* If this is a page table miss then see if it's theirs or ours */
cmpwi   r12, BOOK3S_INTERRUPT_H_DATA_STORAGE
@@ -1675,7 +1675,7 @@ kvmppc_hdsi:
bl  kvmppc_msr_interrupt
 fast_interrupt_c_return:
 6: ld  r7, VCPU_CTR(r9)
-   lwz r8, VCPU_XER(r9)
+   ld  r8, VCPU_XER(r9)
mtctr   r7
mtxer   r8
mr  r4, r9
diff --git a/arch/powerpc/kvm/book3s_segment.S 
b/arch/powerpc/kvm/book3s_segment.S
index acee37c..ca8f174 100644
--- a/arch/powerpc/kvm/book3s_segment.S
+++ b/arch/powerpc/kvm/book3s_segment.S
@@ -123,7 +123,7 @@ no_dcbz32_on:
PPC_LL  r8, SVCPU_CTR(r3)
PPC_LL  r9, SVCPU_LR(r3)
lwz r10, SVCPU_CR(r3)
-   lwz r11, SVCPU_XER(r3)
+   PPC_LL  r11, SVCPU_XER(r3)
 
mtctr   r8
mtlrr9
@@ -237,7 +237,7 @@ END_FTR_SECTION_IFSET(CPU_FTR_HVMODE)
mfctr   r8
mflrr9
 
-   stw r5, SVCPU_XER(r13)
+   PPC_STL r5, SVCPU_XER(r13)
PPC_STL r6, SVCPU_FAULT_DAR(r13)
stw r7, SVCPU_FAULT_DSISR(r13)
PPC_STL r8, SVCPU_CTR(r13)
-- 
1.7.10.4

___
Linuxppc-dev mailing list
Linuxppc-dev@lists.ozlabs.org
https://lists.ozlabs.org/listinfo/linuxppc-dev

Re: [PATCH 1/1] KVM: PPC: Book3S: correct width in XER handling

2015-05-25 Thread Sam Bobroff
On Mon, May 25, 2015 at 11:08:08PM +0200, Alexander Graf wrote:
> 
> 
> On 20.05.15 07:26, Sam Bobroff wrote:
> > In 64 bit kernels, the Fixed Point Exception Register (XER) is a 64
> > bit field (e.g. in kvm_regs and kvm_vcpu_arch) and in most places it is
> > accessed as such.
> > 
> > This patch corrects places where it is accessed as a 32 bit field by a
> > 64 bit kernel.  In some cases this is via a 32 bit load or store
> > instruction which, depending on endianness, will cause either the
> > lower or upper 32 bits to be missed.  In another case it is cast as a
> > u32, causing the upper 32 bits to be cleared.
> > 
> > This patch corrects those places by extending the access methods to
> > 64 bits.
> > 
> > Signed-off-by: Sam Bobroff 
> > ---
> > 
> >  arch/powerpc/include/asm/kvm_book3s.h   |4 ++--
> >  arch/powerpc/kvm/book3s_hv_rmhandlers.S |6 +++---
> >  arch/powerpc/kvm/book3s_segment.S   |4 ++--
> >  3 files changed, 7 insertions(+), 7 deletions(-)
> > 
> > diff --git a/arch/powerpc/include/asm/kvm_book3s.h 
> > b/arch/powerpc/include/asm/kvm_book3s.h
> > index b91e74a..05a875a 100644
> > --- a/arch/powerpc/include/asm/kvm_book3s.h
> > +++ b/arch/powerpc/include/asm/kvm_book3s.h
> > @@ -225,12 +225,12 @@ static inline u32 kvmppc_get_cr(struct kvm_vcpu *vcpu)
> > return vcpu->arch.cr;
> >  }
> >  
> > -static inline void kvmppc_set_xer(struct kvm_vcpu *vcpu, u32 val)
> > +static inline void kvmppc_set_xer(struct kvm_vcpu *vcpu, ulong val)
> >  {
> > vcpu->arch.xer = val;
> >  }
> >  
> > -static inline u32 kvmppc_get_xer(struct kvm_vcpu *vcpu)
> > +static inline ulong kvmppc_get_xer(struct kvm_vcpu *vcpu)
> >  {
> > return vcpu->arch.xer;
> >  }
> > diff --git a/arch/powerpc/kvm/book3s_hv_rmhandlers.S 
> > b/arch/powerpc/kvm/book3s_hv_rmhandlers.S
> > index 4d70df2..d75be59 100644
> > --- a/arch/powerpc/kvm/book3s_hv_rmhandlers.S
> > +++ b/arch/powerpc/kvm/book3s_hv_rmhandlers.S
> > @@ -870,7 +870,7 @@ END_FTR_SECTION_IFCLR(CPU_FTR_ARCH_207S)
> > blt hdec_soon
> >  
> > ld  r6, VCPU_CTR(r4)
> > -   lwz r7, VCPU_XER(r4)
> > +   ld  r7, VCPU_XER(r4)
> >  
> > mtctr   r6
> > mtxer   r7
> > @@ -1103,7 +1103,7 @@ END_FTR_SECTION_IFSET(CPU_FTR_HAS_PPR)
> > mfctr   r3
> > mfxer   r4
> > std r3, VCPU_CTR(r9)
> > -   stw r4, VCPU_XER(r9)
> > +   std r4, VCPU_XER(r9)
> >  
> > /* If this is a page table miss then see if it's theirs or ours */
> > cmpwi   r12, BOOK3S_INTERRUPT_H_DATA_STORAGE
> > @@ -1675,7 +1675,7 @@ kvmppc_hdsi:
> > bl  kvmppc_msr_interrupt
> >  fast_interrupt_c_return:
> >  6: ld  r7, VCPU_CTR(r9)
> > -   lwz r8, VCPU_XER(r9)
> > +   ld  r8, VCPU_XER(r9)
> > mtctr   r7
> > mtxer   r8
> > mr  r4, r9
> > diff --git a/arch/powerpc/kvm/book3s_segment.S 
> > b/arch/powerpc/kvm/book3s_segment.S
> > index acee37c..ca8f174 100644
> > --- a/arch/powerpc/kvm/book3s_segment.S
> > +++ b/arch/powerpc/kvm/book3s_segment.S
> > @@ -123,7 +123,7 @@ no_dcbz32_on:
> > PPC_LL  r8, SVCPU_CTR(r3)
> > PPC_LL  r9, SVCPU_LR(r3)
> > lwz r10, SVCPU_CR(r3)
> > -   lwz r11, SVCPU_XER(r3)
> > +   PPC_LL  r11, SVCPU_XER(r3)
> 
> struct kvmppc_book3s_shadow_vcpu {
> bool in_use;
> ulong gpr[14];
> u32 cr;
> u32 xer;
> [...]
> 
> so at least this change looks wrong. Please double-check all fields in
> your patch again.
> 
> 
> Alex

Thanks for the review and the catch!

The xer field in kvm_vcpu_arch is already ulong, so it looks like the one in
kvmppc_book3s_shadow_vcpu is the only other case. I'll fix that and repost.

Cheers,
Sam.

___
Linuxppc-dev mailing list
Linuxppc-dev@lists.ozlabs.org
https://lists.ozlabs.org/listinfo/linuxppc-dev

[PATCH v2 1/1] KVM: PPC: Book3S: correct width in XER handling

2015-05-25 Thread Sam Bobroff
In 64 bit kernels, the Fixed Point Exception Register (XER) is a 64
bit field (e.g. in kvm_regs and kvm_vcpu_arch) and in most places it is
accessed as such.

This patch corrects places where it is accessed as a 32 bit field by a
64 bit kernel.  In some cases this is via a 32 bit load or store
instruction which, depending on endianness, will cause either the
lower or upper 32 bits to be missed.  In another case it is cast as a
u32, causing the upper 32 bits to be cleared.

This patch corrects those places by extending the access methods to
64 bits.

Signed-off-by: Sam Bobroff 
---

v2:

Also extend kvmppc_book3s_shadow_vcpu.xer to 64 bit.

 arch/powerpc/include/asm/kvm_book3s.h |4 ++--
 arch/powerpc/include/asm/kvm_book3s_asm.h |2 +-
 arch/powerpc/kvm/book3s_hv_rmhandlers.S   |6 +++---
 arch/powerpc/kvm/book3s_segment.S |4 ++--
 4 files changed, 8 insertions(+), 8 deletions(-)

diff --git a/arch/powerpc/include/asm/kvm_book3s.h 
b/arch/powerpc/include/asm/kvm_book3s.h
index b91e74a..05a875a 100644
--- a/arch/powerpc/include/asm/kvm_book3s.h
+++ b/arch/powerpc/include/asm/kvm_book3s.h
@@ -225,12 +225,12 @@ static inline u32 kvmppc_get_cr(struct kvm_vcpu *vcpu)
return vcpu->arch.cr;
 }
 
-static inline void kvmppc_set_xer(struct kvm_vcpu *vcpu, u32 val)
+static inline void kvmppc_set_xer(struct kvm_vcpu *vcpu, ulong val)
 {
vcpu->arch.xer = val;
 }
 
-static inline u32 kvmppc_get_xer(struct kvm_vcpu *vcpu)
+static inline ulong kvmppc_get_xer(struct kvm_vcpu *vcpu)
 {
return vcpu->arch.xer;
 }
diff --git a/arch/powerpc/include/asm/kvm_book3s_asm.h 
b/arch/powerpc/include/asm/kvm_book3s_asm.h
index 5bdfb5d..c4ccd2d 100644
--- a/arch/powerpc/include/asm/kvm_book3s_asm.h
+++ b/arch/powerpc/include/asm/kvm_book3s_asm.h
@@ -112,7 +112,7 @@ struct kvmppc_book3s_shadow_vcpu {
bool in_use;
ulong gpr[14];
u32 cr;
-   u32 xer;
+   ulong xer;
ulong ctr;
ulong lr;
ulong pc;
diff --git a/arch/powerpc/kvm/book3s_hv_rmhandlers.S 
b/arch/powerpc/kvm/book3s_hv_rmhandlers.S
index 4d70df2..d75be59 100644
--- a/arch/powerpc/kvm/book3s_hv_rmhandlers.S
+++ b/arch/powerpc/kvm/book3s_hv_rmhandlers.S
@@ -870,7 +870,7 @@ END_FTR_SECTION_IFCLR(CPU_FTR_ARCH_207S)
blt hdec_soon
 
ld  r6, VCPU_CTR(r4)
-   lwz r7, VCPU_XER(r4)
+   ld  r7, VCPU_XER(r4)
 
mtctr   r6
mtxer   r7
@@ -1103,7 +1103,7 @@ END_FTR_SECTION_IFSET(CPU_FTR_HAS_PPR)
mfctr   r3
mfxer   r4
std r3, VCPU_CTR(r9)
-   stw r4, VCPU_XER(r9)
+   std r4, VCPU_XER(r9)
 
/* If this is a page table miss then see if it's theirs or ours */
cmpwi   r12, BOOK3S_INTERRUPT_H_DATA_STORAGE
@@ -1675,7 +1675,7 @@ kvmppc_hdsi:
bl  kvmppc_msr_interrupt
 fast_interrupt_c_return:
 6: ld  r7, VCPU_CTR(r9)
-   lwz r8, VCPU_XER(r9)
+   ld  r8, VCPU_XER(r9)
mtctr   r7
mtxer   r8
mr  r4, r9
diff --git a/arch/powerpc/kvm/book3s_segment.S 
b/arch/powerpc/kvm/book3s_segment.S
index acee37c..ca8f174 100644
--- a/arch/powerpc/kvm/book3s_segment.S
+++ b/arch/powerpc/kvm/book3s_segment.S
@@ -123,7 +123,7 @@ no_dcbz32_on:
PPC_LL  r8, SVCPU_CTR(r3)
PPC_LL  r9, SVCPU_LR(r3)
lwz r10, SVCPU_CR(r3)
-   lwz r11, SVCPU_XER(r3)
+   PPC_LL  r11, SVCPU_XER(r3)
 
mtctr   r8
mtlrr9
@@ -237,7 +237,7 @@ END_FTR_SECTION_IFSET(CPU_FTR_HVMODE)
mfctr   r8
mflrr9
 
-   stw r5, SVCPU_XER(r13)
+   PPC_STL r5, SVCPU_XER(r13)
PPC_STL r6, SVCPU_FAULT_DAR(r13)
stw r7, SVCPU_FAULT_DSISR(r13)
PPC_STL r8, SVCPU_CTR(r13)
-- 
1.7.10.4

___
Linuxppc-dev mailing list
Linuxppc-dev@lists.ozlabs.org
https://lists.ozlabs.org/listinfo/linuxppc-dev

Re: [PATCH v2 1/1] KVM: PPC: Book3S: correct width in XER handling

2015-05-26 Thread Sam Bobroff
On Tue, May 26, 2015 at 10:35:08AM +0200, Alexander Graf wrote:
> 
> 
> On 26.05.15 02:27, Sam Bobroff wrote:
> > In 64 bit kernels, the Fixed Point Exception Register (XER) is a 64
> > bit field (e.g. in kvm_regs and kvm_vcpu_arch) and in most places it is
> > accessed as such.
> > 
> > This patch corrects places where it is accessed as a 32 bit field by a
> > 64 bit kernel.  In some cases this is via a 32 bit load or store
> > instruction which, depending on endianness, will cause either the
> > lower or upper 32 bits to be missed.  In another case it is cast as a
> > u32, causing the upper 32 bits to be cleared.
> > 
> > This patch corrects those places by extending the access methods to
> > 64 bits.
> > 
> > Signed-off-by: Sam Bobroff 
> > ---
> > 
> > v2:
> > 
> > Also extend kvmppc_book3s_shadow_vcpu.xer to 64 bit.
> > 
> >  arch/powerpc/include/asm/kvm_book3s.h |4 ++--
> >  arch/powerpc/include/asm/kvm_book3s_asm.h |2 +-
> >  arch/powerpc/kvm/book3s_hv_rmhandlers.S   |6 +++---
> >  arch/powerpc/kvm/book3s_segment.S |4 ++--
> >  4 files changed, 8 insertions(+), 8 deletions(-)
> > 
> > diff --git a/arch/powerpc/include/asm/kvm_book3s.h 
> > b/arch/powerpc/include/asm/kvm_book3s.h
> > index b91e74a..05a875a 100644
> > --- a/arch/powerpc/include/asm/kvm_book3s.h
> > +++ b/arch/powerpc/include/asm/kvm_book3s.h
> > @@ -225,12 +225,12 @@ static inline u32 kvmppc_get_cr(struct kvm_vcpu *vcpu)
> > return vcpu->arch.cr;
> >  }
> >  
> > -static inline void kvmppc_set_xer(struct kvm_vcpu *vcpu, u32 val)
> > +static inline void kvmppc_set_xer(struct kvm_vcpu *vcpu, ulong val)
> 
> Now we have book3s and booke files with different prototypes on the same
> inline function names. That's really ugly. Please keep them in sync ;).

OK will do.

> 
> Alex
> 
> >  {
> > vcpu->arch.xer = val;
> >  }
> >  
> > -static inline u32 kvmppc_get_xer(struct kvm_vcpu *vcpu)
> > +static inline ulong kvmppc_get_xer(struct kvm_vcpu *vcpu)
> >  {
> > return vcpu->arch.xer;
> >  }
> > diff --git a/arch/powerpc/include/asm/kvm_book3s_asm.h 
> > b/arch/powerpc/include/asm/kvm_book3s_asm.h
> > index 5bdfb5d..c4ccd2d 100644
> > --- a/arch/powerpc/include/asm/kvm_book3s_asm.h
> > +++ b/arch/powerpc/include/asm/kvm_book3s_asm.h
> > @@ -112,7 +112,7 @@ struct kvmppc_book3s_shadow_vcpu {
> > bool in_use;
> > ulong gpr[14];
> > u32 cr;
> > -   u32 xer;
> > +   ulong xer;
> > ulong ctr;
> > ulong lr;
> > ulong pc;
> > diff --git a/arch/powerpc/kvm/book3s_hv_rmhandlers.S 
> > b/arch/powerpc/kvm/book3s_hv_rmhandlers.S
> > index 4d70df2..d75be59 100644
> > --- a/arch/powerpc/kvm/book3s_hv_rmhandlers.S
> > +++ b/arch/powerpc/kvm/book3s_hv_rmhandlers.S
> > @@ -870,7 +870,7 @@ END_FTR_SECTION_IFCLR(CPU_FTR_ARCH_207S)
> > blt hdec_soon
> >  
> > ld  r6, VCPU_CTR(r4)
> > -   lwz r7, VCPU_XER(r4)
> > +   ld  r7, VCPU_XER(r4)
> >  
> > mtctr   r6
> > mtxer   r7
> > @@ -1103,7 +1103,7 @@ END_FTR_SECTION_IFSET(CPU_FTR_HAS_PPR)
> > mfctr   r3
> > mfxer   r4
> > std r3, VCPU_CTR(r9)
> > -   stw r4, VCPU_XER(r9)
> > +   std r4, VCPU_XER(r9)
> >  
> > /* If this is a page table miss then see if it's theirs or ours */
> > cmpwi   r12, BOOK3S_INTERRUPT_H_DATA_STORAGE
> > @@ -1675,7 +1675,7 @@ kvmppc_hdsi:
> > bl  kvmppc_msr_interrupt
> >  fast_interrupt_c_return:
> >  6: ld  r7, VCPU_CTR(r9)
> > -   lwz r8, VCPU_XER(r9)
> > +   ld  r8, VCPU_XER(r9)
> > mtctr   r7
> > mtxer   r8
> > mr  r4, r9
> > diff --git a/arch/powerpc/kvm/book3s_segment.S 
> > b/arch/powerpc/kvm/book3s_segment.S
> > index acee37c..ca8f174 100644
> > --- a/arch/powerpc/kvm/book3s_segment.S
> > +++ b/arch/powerpc/kvm/book3s_segment.S
> > @@ -123,7 +123,7 @@ no_dcbz32_on:
> > PPC_LL  r8, SVCPU_CTR(r3)
> > PPC_LL  r9, SVCPU_LR(r3)
> > lwz r10, SVCPU_CR(r3)
> > -   lwz r11, SVCPU_XER(r3)
> > +   PPC_LL  r11, SVCPU_XER(r3)
> >  
> > mtctr   r8
> > mtlrr9
> > @@ -237,7 +237,7 @@ END_FTR_SECTION_IFSET(CPU_FTR_HVMODE)
> > mfctr   r8
> > mflrr9
> >  
> > -   stw r5, SVCPU_XER(r13)
> > +   PPC_STL r5, SVCPU_XER(r13)
> > PPC_STL r6, SVCPU_FAULT_DAR(r13)
> > stw r7, SVCPU_FAULT_DSISR(r13)
> > PPC_STL r8, SVCPU_CTR(r13)
> > 

___
Linuxppc-dev mailing list
Linuxppc-dev@lists.ozlabs.org
https://lists.ozlabs.org/listinfo/linuxppc-dev

[PATCH v3 1/1] KVM: PPC: Book3S: correct width in XER handling

2015-05-26 Thread Sam Bobroff
In 64 bit kernels, the Fixed Point Exception Register (XER) is a 64
bit field (e.g. in kvm_regs and kvm_vcpu_arch) and in most places it is
accessed as such.

This patch corrects places where it is accessed as a 32 bit field by a
64 bit kernel.  In some cases this is via a 32 bit load or store
instruction which, depending on endianness, will cause either the
lower or upper 32 bits to be missed.  In another case it is cast as a
u32, causing the upper 32 bits to be cleared.

This patch corrects those places by extending the access methods to
64 bits.

Signed-off-by: Sam Bobroff 
---

v3:
Adjust booke set/get xer to match book3s.

v2:

Also extend kvmppc_book3s_shadow_vcpu.xer to 64 bit.

 arch/powerpc/include/asm/kvm_book3s.h |4 ++--
 arch/powerpc/include/asm/kvm_book3s_asm.h |2 +-
 arch/powerpc/include/asm/kvm_booke.h  |4 ++--
 arch/powerpc/kvm/book3s_hv_rmhandlers.S   |6 +++---
 arch/powerpc/kvm/book3s_segment.S |4 ++--
 5 files changed, 10 insertions(+), 10 deletions(-)

diff --git a/arch/powerpc/include/asm/kvm_book3s.h 
b/arch/powerpc/include/asm/kvm_book3s.h
index b91e74a..05a875a 100644
--- a/arch/powerpc/include/asm/kvm_book3s.h
+++ b/arch/powerpc/include/asm/kvm_book3s.h
@@ -225,12 +225,12 @@ static inline u32 kvmppc_get_cr(struct kvm_vcpu *vcpu)
return vcpu->arch.cr;
 }
 
-static inline void kvmppc_set_xer(struct kvm_vcpu *vcpu, u32 val)
+static inline void kvmppc_set_xer(struct kvm_vcpu *vcpu, ulong val)
 {
vcpu->arch.xer = val;
 }
 
-static inline u32 kvmppc_get_xer(struct kvm_vcpu *vcpu)
+static inline ulong kvmppc_get_xer(struct kvm_vcpu *vcpu)
 {
return vcpu->arch.xer;
 }
diff --git a/arch/powerpc/include/asm/kvm_book3s_asm.h 
b/arch/powerpc/include/asm/kvm_book3s_asm.h
index 5bdfb5d..c4ccd2d 100644
--- a/arch/powerpc/include/asm/kvm_book3s_asm.h
+++ b/arch/powerpc/include/asm/kvm_book3s_asm.h
@@ -112,7 +112,7 @@ struct kvmppc_book3s_shadow_vcpu {
bool in_use;
ulong gpr[14];
u32 cr;
-   u32 xer;
+   ulong xer;
ulong ctr;
ulong lr;
ulong pc;
diff --git a/arch/powerpc/include/asm/kvm_booke.h 
b/arch/powerpc/include/asm/kvm_booke.h
index 3286f0d..bc6e29e 100644
--- a/arch/powerpc/include/asm/kvm_booke.h
+++ b/arch/powerpc/include/asm/kvm_booke.h
@@ -54,12 +54,12 @@ static inline u32 kvmppc_get_cr(struct kvm_vcpu *vcpu)
return vcpu->arch.cr;
 }
 
-static inline void kvmppc_set_xer(struct kvm_vcpu *vcpu, u32 val)
+static inline void kvmppc_set_xer(struct kvm_vcpu *vcpu, ulong val)
 {
vcpu->arch.xer = val;
 }
 
-static inline u32 kvmppc_get_xer(struct kvm_vcpu *vcpu)
+static inline ulong kvmppc_get_xer(struct kvm_vcpu *vcpu)
 {
return vcpu->arch.xer;
 }
diff --git a/arch/powerpc/kvm/book3s_hv_rmhandlers.S 
b/arch/powerpc/kvm/book3s_hv_rmhandlers.S
index 4d70df2..d75be59 100644
--- a/arch/powerpc/kvm/book3s_hv_rmhandlers.S
+++ b/arch/powerpc/kvm/book3s_hv_rmhandlers.S
@@ -870,7 +870,7 @@ END_FTR_SECTION_IFCLR(CPU_FTR_ARCH_207S)
blt hdec_soon
 
ld  r6, VCPU_CTR(r4)
-   lwz r7, VCPU_XER(r4)
+   ld  r7, VCPU_XER(r4)
 
mtctr   r6
mtxer   r7
@@ -1103,7 +1103,7 @@ END_FTR_SECTION_IFSET(CPU_FTR_HAS_PPR)
mfctr   r3
mfxer   r4
std r3, VCPU_CTR(r9)
-   stw r4, VCPU_XER(r9)
+   std r4, VCPU_XER(r9)
 
/* If this is a page table miss then see if it's theirs or ours */
cmpwi   r12, BOOK3S_INTERRUPT_H_DATA_STORAGE
@@ -1675,7 +1675,7 @@ kvmppc_hdsi:
bl  kvmppc_msr_interrupt
 fast_interrupt_c_return:
 6: ld  r7, VCPU_CTR(r9)
-   lwz r8, VCPU_XER(r9)
+   ld  r8, VCPU_XER(r9)
mtctr   r7
mtxer   r8
mr  r4, r9
diff --git a/arch/powerpc/kvm/book3s_segment.S 
b/arch/powerpc/kvm/book3s_segment.S
index acee37c..ca8f174 100644
--- a/arch/powerpc/kvm/book3s_segment.S
+++ b/arch/powerpc/kvm/book3s_segment.S
@@ -123,7 +123,7 @@ no_dcbz32_on:
PPC_LL  r8, SVCPU_CTR(r3)
PPC_LL  r9, SVCPU_LR(r3)
lwz r10, SVCPU_CR(r3)
-   lwz r11, SVCPU_XER(r3)
+   PPC_LL  r11, SVCPU_XER(r3)
 
mtctr   r8
mtlrr9
@@ -237,7 +237,7 @@ END_FTR_SECTION_IFSET(CPU_FTR_HVMODE)
mfctr   r8
mflrr9
 
-   stw r5, SVCPU_XER(r13)
+   PPC_STL r5, SVCPU_XER(r13)
PPC_STL r6, SVCPU_FAULT_DAR(r13)
stw r7, SVCPU_FAULT_DSISR(r13)
PPC_STL r8, SVCPU_CTR(r13)
-- 
1.7.10.4

___
Linuxppc-dev mailing list
Linuxppc-dev@lists.ozlabs.org
https://lists.ozlabs.org/listinfo/linuxppc-dev

[PATCH 1/1] powerpc/tm: Abort syscalls in active transactions (v2)

2015-06-11 Thread Sam Bobroff
This patch changes the syscall handler to doom (tabort) active
transactions when a syscall is made and return very early without
performing the syscall and keeping side effects to a minimum (no CPU
accounting or system call tracing is performed). Also included is a
new HWCAP2 bit, PPC_FEATURE2_HTM_NOSC, to indicate this
behaviour to userspace.

Currently, the system call instruction automatically suspends an
active transaction which causes side effects to persist when an active
transaction fails.

This does change the kernel's behaviour, but in a way that was
documented as unsupported.  It doesn't reduce functionality as
syscalls will still be performed after tsuspend; it just requires that
the transaction be explicitly suspended.  It also provides a
consistent interface and makes the behaviour of user code
substantially the same across powerpc and platforms that do not
support suspended transactions (e.g. x86 and s390).

Performance measurements using
http://ozlabs.org/~anton/junkcode/null_syscall.c indicate the cost of
a normal (non-aborted) system call increases by about 0.25%.

Signed-off-by: Sam Bobroff 
---

 Documentation/powerpc/transactional_memory.txt  | 32 -
 arch/powerpc/include/asm/cputable.h | 10 
 arch/powerpc/include/uapi/asm/cputable.h|  1 +
 arch/powerpc/include/uapi/asm/tm.h  |  2 +-
 arch/powerpc/kernel/cputable.c  |  4 +++-
 arch/powerpc/kernel/entry_64.S  | 28 ++
 tools/testing/selftests/powerpc/tm/tm-syscall.c |  3 ++-
 7 files changed, 57 insertions(+), 23 deletions(-)

diff --git a/Documentation/powerpc/transactional_memory.txt 
b/Documentation/powerpc/transactional_memory.txt
index ded6979..ba0a2a4 100644
--- a/Documentation/powerpc/transactional_memory.txt
+++ b/Documentation/powerpc/transactional_memory.txt
@@ -74,22 +74,23 @@ Causes of transaction aborts
 Syscalls
 
 
-Performing syscalls from within transaction is not recommended, and can lead
-to unpredictable results.
+Syscalls made from within an active transaction will not be performed and the
+transaction will be doomed by the kernel with the failure code TM_CAUSE_SYSCALL
+| TM_CAUSE_PERSISTENT.
 
-Syscalls do not by design abort transactions, but beware: The kernel code will
-not be running in transactional state.  The effect of syscalls will always
-remain visible, but depending on the call they may abort your transaction as a
-side-effect, read soon-to-be-aborted transactional data that should not remain
-invisible, etc.  If you constantly retry a transaction that constantly aborts
-itself by calling a syscall, you'll have a livelock & make no progress.
+Syscalls made from within a suspended transaction are performed as normal and
+the transaction is not explicitly doomed by the kernel.  However, what the
+kernel does to perform the syscall may result in the transaction being doomed
+by the hardware.  The syscall is performed in suspended mode so any side
+effects will be persistent, independent of transaction success or failure.  No
+guarantees are provided by the kernel about which syscalls will affect
+transaction success.
 
-Simple syscalls (e.g. sigprocmask()) "could" be OK.  Even things like write()
-from, say, printf() should be OK as long as the kernel does not access any
-memory that was accessed transactionally.
-
-Consider any syscalls that happen to work as debug-only -- not recommended for
-production use.  Best to queue them up till after the transaction is over.
+Care must be taken when relying on syscalls to abort during active transactions
+if the calls are made via a library.  Libraries may cache values (which may
+give the appearance of success) or perform operations that cause transaction
+failure before entering the kernel (which may produce different failure codes).
+Examples are glibc's getpid() and lazy symbol resolution.
 
 
 Signals
@@ -176,8 +177,7 @@ kernel aborted a transaction:
  TM_CAUSE_RESCHED   Thread was rescheduled.
  TM_CAUSE_TLBI  Software TLB invalid.
  TM_CAUSE_FAC_UNAV  FP/VEC/VSX unavailable trap.
- TM_CAUSE_SYSCALL   Currently unused; future syscalls that must abort
-transactions for consistency will use this.
+ TM_CAUSE_SYSCALL   Syscall from active transaction.
  TM_CAUSE_SIGNALSignal delivered.
  TM_CAUSE_MISC  Currently unused.
  TM_CAUSE_ALIGNMENT Alignment fault.
diff --git a/arch/powerpc/include/asm/cputable.h 
b/arch/powerpc/include/asm/cputable.h
index 6367b83..4994648 100644
--- a/arch/powerpc/include/asm/cputable.h
+++ b/arch/powerpc/include/asm/cputable.h
@@ -242,11 +242,13 @@ enum {
 
 /* We only set the TM feature if the kernel was compiled with TM supprt */
 #ifdef CONFIG_PPC_TRANSACTIONAL_MEM
-#define CPU_FTR_TM_COMPCPU_FTR_TM
-#define PPC_FEATURE2_HTM_COMP  PPC_FEATURE2_HTM
+#define CPU_FTR_TM_COMP 

Re: [PATCH v3 1/1] KVM: PPC: Book3S: correct width in XER handling

2015-08-05 Thread Sam Bobroff
Ping?

I think I've addressed all the comments in this version. Is there anything else
I need to look at?

Cheers,
Sam.

On Wed, May 27, 2015 at 09:56:57AM +1000, Sam Bobroff wrote:
> In 64 bit kernels, the Fixed Point Exception Register (XER) is a 64
> bit field (e.g. in kvm_regs and kvm_vcpu_arch) and in most places it is
> accessed as such.
> 
> This patch corrects places where it is accessed as a 32 bit field by a
> 64 bit kernel.  In some cases this is via a 32 bit load or store
> instruction which, depending on endianness, will cause either the
> lower or upper 32 bits to be missed.  In another case it is cast as a
> u32, causing the upper 32 bits to be cleared.
> 
> This patch corrects those places by extending the access methods to
> 64 bits.
> 
> Signed-off-by: Sam Bobroff 
> ---
> 
> v3:
> Adjust booke set/get xer to match book3s.
> 
> v2:
> 
> Also extend kvmppc_book3s_shadow_vcpu.xer to 64 bit.
> 
>  arch/powerpc/include/asm/kvm_book3s.h |4 ++--
>  arch/powerpc/include/asm/kvm_book3s_asm.h |2 +-
>  arch/powerpc/include/asm/kvm_booke.h  |4 ++--
>  arch/powerpc/kvm/book3s_hv_rmhandlers.S   |6 +++---
>  arch/powerpc/kvm/book3s_segment.S |4 ++--
>  5 files changed, 10 insertions(+), 10 deletions(-)
> 
> diff --git a/arch/powerpc/include/asm/kvm_book3s.h 
> b/arch/powerpc/include/asm/kvm_book3s.h
> index b91e74a..05a875a 100644
> --- a/arch/powerpc/include/asm/kvm_book3s.h
> +++ b/arch/powerpc/include/asm/kvm_book3s.h
> @@ -225,12 +225,12 @@ static inline u32 kvmppc_get_cr(struct kvm_vcpu *vcpu)
>   return vcpu->arch.cr;
>  }
>  
> -static inline void kvmppc_set_xer(struct kvm_vcpu *vcpu, u32 val)
> +static inline void kvmppc_set_xer(struct kvm_vcpu *vcpu, ulong val)
>  {
>   vcpu->arch.xer = val;
>  }
>  
> -static inline u32 kvmppc_get_xer(struct kvm_vcpu *vcpu)
> +static inline ulong kvmppc_get_xer(struct kvm_vcpu *vcpu)
>  {
>   return vcpu->arch.xer;
>  }
> diff --git a/arch/powerpc/include/asm/kvm_book3s_asm.h 
> b/arch/powerpc/include/asm/kvm_book3s_asm.h
> index 5bdfb5d..c4ccd2d 100644
> --- a/arch/powerpc/include/asm/kvm_book3s_asm.h
> +++ b/arch/powerpc/include/asm/kvm_book3s_asm.h
> @@ -112,7 +112,7 @@ struct kvmppc_book3s_shadow_vcpu {
>   bool in_use;
>   ulong gpr[14];
>   u32 cr;
> - u32 xer;
> + ulong xer;
>   ulong ctr;
>   ulong lr;
>   ulong pc;
> diff --git a/arch/powerpc/include/asm/kvm_booke.h 
> b/arch/powerpc/include/asm/kvm_booke.h
> index 3286f0d..bc6e29e 100644
> --- a/arch/powerpc/include/asm/kvm_booke.h
> +++ b/arch/powerpc/include/asm/kvm_booke.h
> @@ -54,12 +54,12 @@ static inline u32 kvmppc_get_cr(struct kvm_vcpu *vcpu)
>   return vcpu->arch.cr;
>  }
>  
> -static inline void kvmppc_set_xer(struct kvm_vcpu *vcpu, u32 val)
> +static inline void kvmppc_set_xer(struct kvm_vcpu *vcpu, ulong val)
>  {
>   vcpu->arch.xer = val;
>  }
>  
> -static inline u32 kvmppc_get_xer(struct kvm_vcpu *vcpu)
> +static inline ulong kvmppc_get_xer(struct kvm_vcpu *vcpu)
>  {
>   return vcpu->arch.xer;
>  }
> diff --git a/arch/powerpc/kvm/book3s_hv_rmhandlers.S 
> b/arch/powerpc/kvm/book3s_hv_rmhandlers.S
> index 4d70df2..d75be59 100644
> --- a/arch/powerpc/kvm/book3s_hv_rmhandlers.S
> +++ b/arch/powerpc/kvm/book3s_hv_rmhandlers.S
> @@ -870,7 +870,7 @@ END_FTR_SECTION_IFCLR(CPU_FTR_ARCH_207S)
>   blt hdec_soon
>  
>   ld  r6, VCPU_CTR(r4)
> - lwz r7, VCPU_XER(r4)
> + ld  r7, VCPU_XER(r4)
>  
>   mtctr   r6
>   mtxer   r7
> @@ -1103,7 +1103,7 @@ END_FTR_SECTION_IFSET(CPU_FTR_HAS_PPR)
>   mfctr   r3
>   mfxer   r4
>   std r3, VCPU_CTR(r9)
> - stw r4, VCPU_XER(r9)
> + std r4, VCPU_XER(r9)
>  
>   /* If this is a page table miss then see if it's theirs or ours */
>   cmpwi   r12, BOOK3S_INTERRUPT_H_DATA_STORAGE
> @@ -1675,7 +1675,7 @@ kvmppc_hdsi:
>   bl  kvmppc_msr_interrupt
>  fast_interrupt_c_return:
>  6:   ld  r7, VCPU_CTR(r9)
> - lwz r8, VCPU_XER(r9)
> + ld  r8, VCPU_XER(r9)
>   mtctr   r7
>   mtxer   r8
>   mr  r4, r9
> diff --git a/arch/powerpc/kvm/book3s_segment.S 
> b/arch/powerpc/kvm/book3s_segment.S
> index acee37c..ca8f174 100644
> --- a/arch/powerpc/kvm/book3s_segment.S
> +++ b/arch/powerpc/kvm/book3s_segment.S
> @@ -123,7 +123,7 @@ no_dcbz32_on:
>   PPC_LL  r8, SVCPU_CTR(r3)
>   PPC_LL  r9, SVCPU_LR(r3)
>   lwz r10, SVCPU_CR(r3)
> - lwz r11, SVCPU_XER(r3)
> + PPC_LL  r11, SVCPU_XER(r3)
>  
>   mtctr   r8
>   mtlr   

Re: [PATCH v2] powerpc/xmon: Allow limiting the size of the paca display

2015-08-13 Thread Sam Bobroff
On Wed, Aug 12, 2015 at 09:55:25PM +1000, Michael Ellerman wrote:
> The paca display is already more than 24 lines, which can be problematic
> if you have an old school 80x24 terminal, or more likely you are on a
> virtual terminal which does not scroll for whatever reason.
> 
> We'd like to expand the paca display even more, so add a way to limit
> the number of lines that are displayed.
> 
> This adds a third form of 'dp' which is 'dp # #', where the first number
> is the cpu, and the second is the number of lines to display.
> 
> Example output:
> 
>   5:mon> dp 3 6
>   paca for cpu 0x3 @ cfe00c00:
>possible = yes
>present  = yes
>online   = yes
>lock_token   = 0x8000  (0xa)
>paca_index   = 0x3 (0x8)

Michael,

This patch inspired me to do the additional work to make the output paged, more
like the memory dump commands.

I'll post it shortly as "powerpc/xmon: Paged output for paca display".

Cheers,
Sam.

___
Linuxppc-dev mailing list
Linuxppc-dev@lists.ozlabs.org
https://lists.ozlabs.org/listinfo/linuxppc-dev

[PATCH 1/1] powerpc/xmon: Paged output for paca display

2015-08-13 Thread Sam Bobroff
The paca display is already more than 24 lines, which can be problematic
if you have an old school 80x24 terminal, or more likely you are on a
virtual terminal which does not scroll for whatever reason.

This adds an optional letter to the "dp" and "dpa" xmon commands
("dpp" and "dppa"), which will enable a "per-page" display (with 16
line pages): the first page  will be displayed and if there was data
that didn't fit, it will display a message indicating that the user can
use enter to display the next page. The intent is that this feels
similar to the way the memory display functions work.

This is implemented by running over the entire output both for the
initial command and for each subsequent page: the visible part is
clipped out by checking line numbers. Handling the empty command as
"more" is done by writing a special command into a static buffer that
indicates where to move the sliding visibility window. This is similar
to the approach used for the memory dump commands except that the
state data is encoded into the "last_cmd" string, rather than a set of
static variables. The memory dump commands could probably be rewritten
to make use of the same buffer and remove their other static
variables.

Sample output:

0:mon> dpp1
paca for cpu 0x1 @ cfdc0480:
 possible = yes
 present  = yes
 online   = yes
 lock_token   = 0x8000  (0x8)
 paca_index   = 0x1 (0xa)
 kernel_toc   = 0xc0eb2400  (0x10)
 kernelbase   = 0xc000  (0x18)
 kernel_msr   = 0xb0001032  (0x20)
 emergency_sp = 0xc0003ffe8000  (0x28)
 mc_emergency_sp  = 0xc0003ffe4000  (0x2e0)
 in_mce   = 0x0 (0x2e8)
 data_offset  = 0x7f17  (0x30)
 hw_cpu_id= 0x8 (0x38)
 cpu_start= 0x1 (0x3a)
 kexec_state  = 0x0 (0x3b)
[Enter for next page]
0:mon>
 __current= 0xc0007e696620  (0x290)
 kstack   = 0xc0007e6ebe30  (0x298)
 stab_rr  = 0xb (0x2a0)
 saved_r1 = 0xc0007ef37860  (0x2a8)
 trap_save= 0x0 (0x2b8)
 soft_enabled = 0x0 (0x2ba)
 irq_happened = 0x1 (0x2bb)
 io_sync  = 0x0 (0x2bc)
 irq_work_pending = 0x0 (0x2bd)
 nap_state_lost   = 0x0 (0x2be)
0:mon>

(Based on a similar patch by Michael Ellerman 
"[v2] powerpc/xmon: Allow limiting the size of the paca display".
This patch is an alternative and cannot coexist with the original.)

Signed-off-by: Sam Bobroff 
---

 arch/powerpc/xmon/xmon.c | 82 
 1 file changed, 62 insertions(+), 20 deletions(-)

diff --git a/arch/powerpc/xmon/xmon.c b/arch/powerpc/xmon/xmon.c
index e599259..9157286 100644
--- a/arch/powerpc/xmon/xmon.c
+++ b/arch/powerpc/xmon/xmon.c
@@ -72,6 +72,7 @@ static int xmon_gate;
 
 static unsigned long in_xmon __read_mostly = 0;
 
+static char last_cmd_buf[128];
 static unsigned long adrs;
 static int size = 1;
 #define MAX_DUMP (128 * 1024)
@@ -204,8 +205,8 @@ Commands:\n\
   dldump the kernel log buffer\n"
 #ifdef CONFIG_PPC64
   "\
-  dp[#]dump paca for current cpu, or cpu #\n\
-  dpa  dump paca for all possible cpus\n"
+  dp[p][#] dump paca for current cpu, or cpu # (p = paged)\n\
+  dp[p]a   dump paca for all possible cpus (p = paged)\n"
 #endif
   "\
   dr   dump stream of raw bytes\n\
@@ -2070,7 +2071,17 @@ static void xmon_rawdump (unsigned long adrs, long ndump)
 }
 
 #ifdef CONFIG_PPC64
-static void dump_one_paca(int cpu)
+static bool line_visible(unsigned long start, unsigned long count,
+unsigned long *line) {
+   bool rv = (!count
+   || ((*line >= start) && (*line < (start + count;
+
+   (*line)++;
+   return rv;
+}
+
+static void dump_one_paca(int cpu, unsigned long start,
+ unsigned long count, unsigned long *line)
 {
struct paca_struct *p;
 
@@ -2084,15 +2095,22 @@ static void dump_one_paca(int cpu)
 
p = &paca[cpu];
 
-   printf("paca for cpu 0x%x @ %p:\n", cpu, p);
+#define VPRINT(...) do { \
+   if (line_visible(start, count, line)) \
+   printf(__VA_ARGS__); \
+} while (0)
+   VPRINT("paca for cpu 0x%x @ %p:\n", cpu, p);
 
-   printf(" %-*s = %s\n", 16, "possible", cpu_possible(cpu) ? "yes" : 
"no");
-   printf(" %-*s = %s\n", 16, "present", cpu_present(cpu) ? "yes" : "no");
-   printf(" %-*s = %s\n", 16, "online", cpu_online(cpu) ? "yes" : "no");
+   VPRINT(" %-*s = %s\n", 16, "possible", cpu_possi

Re: [PATCH v3] powerpc: Add an inline function to update POWER8 HID0

2015-08-13 Thread Sam Bobroff
On Wed, Aug 05, 2015 at 12:38:31PM +0530, Gautham R. Shenoy wrote:
> Section 3.7 of Version 1.2 of the Power8 Processor User's Manual
> prescribes that updates to HID0 be preceded by a SYNC instruction and
> followed by an ISYNC instruction (Page 91).
> 
> Create an inline function name update_power8_hid0() which follows this
> recipe and invoke it from the static split core path.
> 
> Signed-off-by: Gautham R. Shenoy 

Hi Gautham,

I've tested this on a Power 8 machine and verified that it is able to change
split modes and that when doing so the new code is used.

Reviewed-by: Sam Bobroff 
Tested-by: Sam Bobroff 

___
Linuxppc-dev mailing list
Linuxppc-dev@lists.ozlabs.org
https://lists.ozlabs.org/listinfo/linuxppc-dev

Re: [PATCH] book3s_hv_rmhandlers:Pass the correct trap argument to kvmhv_commence_exit

2015-08-13 Thread Sam Bobroff
On Thu, May 21, 2015 at 01:57:04PM +0530, Gautham R. Shenoy wrote:
> In guest_exit_cont we call kvmhv_commence_exit which expects the trap
> number as the argument. However r3 doesn't contain the trap number at
> this point and as a result we would be calling the function with a
> spurious trap number.
> 
> Fix this by copying r12 into r3 before calling kvmhv_commence_exit as
> r12 contains the trap number
> 
> Signed-off-by: Gautham R. Shenoy 

Hi Gautham,

I agree with your logic: r3 is quite clearly corrupted in that path. So:

Reviewed-by: Sam Bobroff 

Just one comment: Do you have a case of this causing some visible problem due
to the corrupted trap number? (I'll test the patch if you do.)

Cheers,
Sam.

___
Linuxppc-dev mailing list
Linuxppc-dev@lists.ozlabs.org
https://lists.ozlabs.org/listinfo/linuxppc-dev

Re: [1/1] powerpc/xmon: Paged output for paca display

2015-08-19 Thread Sam Bobroff
On Tue, Aug 18, 2015 at 04:26:32PM +1000, Michael Ellerman wrote:
> On Fri, 2015-14-08 at 02:55:14 UTC, Sam bobroff wrote:
> > The paca display is already more than 24 lines, which can be problematic
> > if you have an old school 80x24 terminal, or more likely you are on a
> > virtual terminal which does not scroll for whatever reason.
> > 
> > This adds an optional letter to the "dp" and "dpa" xmon commands
> > ("dpp" and "dppa"), which will enable a "per-page" display (with 16
> > line pages): the first page  will be displayed and if there was data
> > that didn't fit, it will display a message indicating that the user can
> > use enter to display the next page. The intent is that this feels
> > similar to the way the memory display functions work.
> > 
> > This is implemented by running over the entire output both for the
> > initial command and for each subsequent page: the visible part is
> > clipped out by checking line numbers. Handling the empty command as
> > "more" is done by writing a special command into a static buffer that
> > indicates where to move the sliding visibility window. This is similar
> > to the approach used for the memory dump commands except that the
> > state data is encoded into the "last_cmd" string, rather than a set of
> > static variables. The memory dump commands could probably be rewritten
> > to make use of the same buffer and remove their other static
> > variables.
> > 
> > Sample output:
> > 
> > 0:mon> dpp1
> > paca for cpu 0x1 @ cfdc0480:
> >  possible = yes
> >  present  = yes
> >  online   = yes
> >  lock_token   = 0x8000  (0x8)
> >  paca_index   = 0x1 (0xa)
> >  kernel_toc   = 0xc0eb2400  (0x10)
> >  kernelbase   = 0xc000  (0x18)
> >  kernel_msr   = 0xb0001032  (0x20)
> >  emergency_sp = 0xc0003ffe8000  (0x28)
> >  mc_emergency_sp  = 0xc0003ffe4000  (0x2e0)
> >  in_mce   = 0x0 (0x2e8)
> >  data_offset  = 0x7f17  (0x30)
> >  hw_cpu_id= 0x8 (0x38)
> >  cpu_start= 0x1 (0x3a)
> >  kexec_state  = 0x0 (0x3b)
> > [Enter for next page]
> > 0:mon>
> >  __current= 0xc0007e696620  (0x290)
> >  kstack   = 0xc0007e6ebe30  (0x298)
> >  stab_rr  = 0xb (0x2a0)
> >  saved_r1 = 0xc0007ef37860  (0x2a8)
> >  trap_save= 0x0 (0x2b8)
> >  soft_enabled = 0x0 (0x2ba)
> >  irq_happened = 0x1 (0x2bb)
> >  io_sync  = 0x0 (0x2bc)
> >  irq_work_pending = 0x0 (0x2bd)
> >  nap_state_lost   = 0x0 (0x2be)
> > 0:mon>
> > 
> > (Based on a similar patch by Michael Ellerman 
> > "[v2] powerpc/xmon: Allow limiting the size of the paca display".
> > This patch is an alternative and cannot coexist with the original.)
> 
> 
> So this is nice, but ... the diff is twice the size of my version, plus 128
> bytes of BSS, so I'm not sure the added benefit is sufficient to justify the
> added code complexity.
> 
> But you can convince me otherwise if you feel strongly about it.
> 
> cheers

I do think the output is a lot better paged like this :-)

The 128 byte buffer is a lot more than it needs for this particular command; it
could quite comfortably be lowered to about 32 (I was leaving space for other
commands to use it but there aren't any so far). I'll do this and repost.

Also, because the last_cmd_buf system is not specific to the paca display, it
could be used by the other paged commands (like the memory dumps). If we did
this we could (probably) remove ndump, nidump and ncsum which are all longs,
although I haven't worked out how much buffer space would be needed in
last_cmd_buf to support these (they have their own paging code but the
positional information could be stored in the string buffer). It's probably not
much work but might be a bit tricky. Do you think it's worth doing?

Since we're looking at memory usage, it looks like "tmpstr[128]" could be
removed without much work, saving 128 bytes and removing an unnecessary global
variable. If it actually turns out to be easy to do I'll post a separate patch.

Thanks for the revew,
Sam.

___
Linuxppc-dev mailing list
Linuxppc-dev@lists.ozlabs.org
https://lists.ozlabs.org/listinfo/linuxppc-dev

[PATCH 3/3] powerpc: Document sysfs DSCR interface

2014-05-20 Thread Sam Bobroff
Add some documentation about ...

/sys/devices/system/cpu/dscr_default
/sys/devices/system/cpu/cpuN/dscr

... to Documentation/ABI/stable.

Signed-off-by: Sam Bobroff 
---
 Documentation/ABI/stable/sysfs-devices-system-cpu |   25 +
 1 file changed, 25 insertions(+)
 create mode 100644 Documentation/ABI/stable/sysfs-devices-system-cpu

diff --git a/Documentation/ABI/stable/sysfs-devices-system-cpu 
b/Documentation/ABI/stable/sysfs-devices-system-cpu
new file mode 100644
index 000..33c133e
--- /dev/null
+++ b/Documentation/ABI/stable/sysfs-devices-system-cpu
@@ -0,0 +1,25 @@
+What:  /sys/devices/system/cpu/dscr_default
+Date:  13-May-2014
+KernelVersion: v3.15.0
+Contact:
+Description:   Writes are equivalent to writing to
+   /sys/devices/system/cpu/cpuN/dscr on all CPUs.
+   Reads return the last written value or 0.
+   This value is not a global default: it is a way to set
+   all per-CPU defaults at the same time.
+Values:64 bit unsigned integer (bit field)
+
+What:  /sys/devices/system/cpu/cpu[0-9]+/dscr
+Date:  13-May-2014
+KernelVersion: v3.15.0
+Contact:
+Description:   Default value for the Data Stream Control Register (DSCR) on
+   a CPU.
+   This default value is used when the kernel is executing and
+   for any process that has not set the DSCR itself.
+   If a process ever sets the DSCR (via direct access to the
+   SPR) that value will be persisted for that process and used
+   on any CPU where it executes (overriding the value described
+   here).
+   If set by a process it will be inherited by child processes.
+Values:64 bit unsigned integer (bit field)
-- 
1.7.10.4

___
Linuxppc-dev mailing list
Linuxppc-dev@lists.ozlabs.org
https://lists.ozlabs.org/listinfo/linuxppc-dev

[PATCH 2/3] powerpc: fix regression of per-CPU DSCR setting

2014-05-20 Thread Sam Bobroff
Since commit "efcac65 powerpc: Per process DSCR + some fixes (try#4)"
it is no longer possible to set the DSCR on a per-CPU basis.

The old behaviour was to minipulate the DSCR SPR directly but this is no
longer sufficient: the value is quickly overwritten by context switching.

This patch stores the per-CPU DSCR value in a kernel variable rather than
directly in the SPR and it is used whenever a process has not set the DSCR
itself. The sysfs interface (/sys/devices/system/cpu/cpuN/dscr) is unchanged.

Writes to the old global default (/sys/devices/system/cpu/dscr_default)
now set all of the per-CPU values and reads return the last written value.

The new per-CPU default is added to the paca_struct and is used everywhere
outside of sysfs.c instead of the old global default.

Signed-off-by: Sam Bobroff 
---
 arch/powerpc/include/asm/paca.h |3 +++
 arch/powerpc/kernel/asm-offsets.c   |1 +
 arch/powerpc/kernel/entry_64.S  |9 +
 arch/powerpc/kernel/sysfs.c |   32 ++-
 arch/powerpc/kernel/tm.S|   16 
 arch/powerpc/kvm/book3s_hv_rmhandlers.S |3 +--
 6 files changed, 29 insertions(+), 35 deletions(-)

diff --git a/arch/powerpc/include/asm/paca.h b/arch/powerpc/include/asm/paca.h
index 8e956a0..bb0bd25 100644
--- a/arch/powerpc/include/asm/paca.h
+++ b/arch/powerpc/include/asm/paca.h
@@ -92,7 +92,10 @@ struct paca_struct {
struct slb_shadow *slb_shadow_ptr;
struct dtl_entry *dispatch_log;
struct dtl_entry *dispatch_log_end;
+#endif /* CONFIG_PPC_STD_MMU_64 */
+   u64 dscr_default;   /* per-CPU default DSCR */
 
+#ifdef CONFIG_PPC_STD_MMU_64
/*
 * Now, starting in cacheline 2, the exception save areas
 */
diff --git a/arch/powerpc/kernel/asm-offsets.c 
b/arch/powerpc/kernel/asm-offsets.c
index dba8140..cba2697 100644
--- a/arch/powerpc/kernel/asm-offsets.c
+++ b/arch/powerpc/kernel/asm-offsets.c
@@ -247,6 +247,7 @@ int main(void)
 #endif
DEFINE(PACAHWCPUID, offsetof(struct paca_struct, hw_cpu_id));
DEFINE(PACAKEXECSTATE, offsetof(struct paca_struct, kexec_state));
+   DEFINE(PACA_DSCR, offsetof(struct paca_struct, dscr_default));
DEFINE(PACA_STARTTIME, offsetof(struct paca_struct, starttime));
DEFINE(PACA_STARTTIME_USER, offsetof(struct paca_struct, 
starttime_user));
DEFINE(PACA_USER_TIME, offsetof(struct paca_struct, user_time));
diff --git a/arch/powerpc/kernel/entry_64.S b/arch/powerpc/kernel/entry_64.S
index 9fde8a1..911d453 100644
--- a/arch/powerpc/kernel/entry_64.S
+++ b/arch/powerpc/kernel/entry_64.S
@@ -387,12 +387,6 @@ _GLOBAL(ret_from_kernel_thread)
li  r3,0
b   syscall_exit
 
-   .section".toc","aw"
-DSCR_DEFAULT:
-   .tc dscr_default[TC],dscr_default
-
-   .section".text"
-
 /*
  * This routine switches between two different tasks.  The process
  * state of one is saved on its kernel stack.  Then the state
@@ -577,11 +571,10 @@ END_FTR_SECTION_IFSET(CPU_FTR_ALTIVEC)
 #ifdef CONFIG_PPC64
 BEGIN_FTR_SECTION
lwz r6,THREAD_DSCR_INHERIT(r4)
-   ld  r7,DSCR_DEFAULT@toc(2)
ld  r0,THREAD_DSCR(r4)
cmpwi   r6,0
bne 1f
-   ld  r0,0(r7)
+   ld  r0,PACA_DSCR(r13)
 1:
 BEGIN_FTR_SECTION_NESTED(70)
mfspr   r8, SPRN_FSCR
diff --git a/arch/powerpc/kernel/sysfs.c b/arch/powerpc/kernel/sysfs.c
index e2a1d6f..67fd2fd 100644
--- a/arch/powerpc/kernel/sysfs.c
+++ b/arch/powerpc/kernel/sysfs.c
@@ -484,7 +484,6 @@ SYSFS_PMCSETUP(pmc8, SPRN_PMC8);
 SYSFS_PMCSETUP(mmcra, SPRN_MMCRA);
 SYSFS_SPRSETUP(purr, SPRN_PURR);
 SYSFS_SPRSETUP(spurr, SPRN_SPURR);
-SYSFS_SPRSETUP(dscr, SPRN_DSCR);
 SYSFS_SPRSETUP(pir, SPRN_PIR);
 
 /*
@@ -494,12 +493,27 @@ SYSFS_SPRSETUP(pir, SPRN_PIR);
 */
 static DEVICE_ATTR(mmcra, 0600, show_mmcra, store_mmcra);
 static DEVICE_ATTR(spurr, 0400, show_spurr, NULL);
-static DEVICE_ATTR(dscr, 0600, show_dscr, store_dscr);
 static DEVICE_ATTR(purr, 0400, show_purr, store_purr);
 static DEVICE_ATTR(pir, 0400, show_pir, NULL);
 
-unsigned long dscr_default = 0;
-EXPORT_SYMBOL(dscr_default);
+static unsigned long dscr_default;
+
+static void read_dscr(void *val)
+{
+   *(unsigned long *)val = get_paca()->dscr_default;
+}
+
+static void write_dscr(void *val)
+{
+   get_paca()->dscr_default = *(unsigned long *)val;
+   if (!current->thread.dscr_inherit) {
+   current->thread.dscr = *(unsigned long *)val;
+   mtspr(SPRN_DSCR, *(unsigned long *)val);
+   }
+}
+
+SYSFS_SPRSETUP_SHOW_STORE(dscr);
+static DEVICE_ATTR(dscr, 0600, show_dscr, store_dscr);
 
 static void add_write_permission_dev_attr(struct device_attribute *attr)
 {
@@ -512,14 +526,6 @@ static ssize_t show_dscr_default(struct device *dev,
return sprintf(buf, "%lx\n", dscr_default);
 }
 

[PATCH 0/3] powerpc: fix regression of per-CPU DSCR setting

2014-05-20 Thread Sam Bobroff
Hello,

This patch corrects a regression on PowerPC CPUs that causes their
per-CPU DSCR SPR value (exposed via /sys/devices/system/cpuN/dscr) to
be quickly lost during context switching, effectively meaning that the
DSCR can no longer be set on a per-CPU basis.

My intent is to restore the functionality of the per-CPU value in a
way that is compatible with the newer global default and task-specific
DSCR setting system.  Users of either the old or new systems should
now get pretty much what they expect.

A couple of notes:

I've split an existing "ifdef CONFIG_PPC_STD_MMU_64" block in
paca_struct into two parts because it allows dscr_default to be placed
into a cache line hole. (This seems be the case even without
CONFIG_PPC_STD_MMU_64 being defined.) Comments or ideas on alternative
placements are welcome.

PowerPC context switching is touched but there should not be any
performance cost; if anything it should get slightly faster due to the
per-CPU value being easier to access than the old global default.

Sam Bobroff (3):
  powerpc: Split __SYSFS_SPRSETUP macro
  powerpc: fix regression of per-CPU DSCR setting
  powerpc: Document sysfs DSCR interface

 Documentation/ABI/stable/sysfs-devices-system-cpu |   25 ++
 arch/powerpc/include/asm/paca.h   |3 ++
 arch/powerpc/kernel/asm-offsets.c |1 +
 arch/powerpc/kernel/entry_64.S|9 +---
 arch/powerpc/kernel/sysfs.c   |   51 +
 arch/powerpc/kernel/tm.S  |   16 ++-
 arch/powerpc/kvm/book3s_hv_rmhandlers.S   |3 +-
 7 files changed, 67 insertions(+), 41 deletions(-)
 create mode 100644 Documentation/ABI/stable/sysfs-devices-system-cpu

-- 
1.7.10.4

___
Linuxppc-dev mailing list
Linuxppc-dev@lists.ozlabs.org
https://lists.ozlabs.org/listinfo/linuxppc-dev

[PATCH 1/3] powerpc: Split __SYSFS_SPRSETUP macro

2014-05-20 Thread Sam Bobroff
Split the __SYSFS_SPRSETUP macro into two parts so that registers requiring
custom read and write functions can use common code for their show and store
functions.

Signed-off-by: Sam Bobroff 
---
 arch/powerpc/kernel/sysfs.c |   19 +--
 1 file changed, 13 insertions(+), 6 deletions(-)

diff --git a/arch/powerpc/kernel/sysfs.c b/arch/powerpc/kernel/sysfs.c
index d90d4b7..e2a1d6f 100644
--- a/arch/powerpc/kernel/sysfs.c
+++ b/arch/powerpc/kernel/sysfs.c
@@ -404,7 +404,7 @@ void ppc_enable_pmcs(void)
 }
 EXPORT_SYMBOL(ppc_enable_pmcs);
 
-#define __SYSFS_SPRSETUP(NAME, ADDRESS, EXTRA) \
+#define __SYSFS_SPRSETUP_READ_WRITE(NAME, ADDRESS, EXTRA) \
 static void read_##NAME(void *val) \
 { \
*(unsigned long *)val = mfspr(ADDRESS); \
@@ -413,7 +413,9 @@ static void write_##NAME(void *val) \
 { \
EXTRA; \
mtspr(ADDRESS, *(unsigned long *)val);  \
-} \
+}
+
+#define __SYSFS_SPRSETUP_SHOW_STORE(NAME) \
 static ssize_t show_##NAME(struct device *dev, \
struct device_attribute *attr, \
char *buf) \
@@ -436,10 +438,15 @@ static ssize_t __used \
return count; \
 }
 
-#define SYSFS_PMCSETUP(NAME, ADDRESS)  \
-   __SYSFS_SPRSETUP(NAME, ADDRESS, ppc_enable_pmcs())
-#define SYSFS_SPRSETUP(NAME, ADDRESS)  \
-   __SYSFS_SPRSETUP(NAME, ADDRESS, )
+#define SYSFS_PMCSETUP(NAME, ADDRESS) \
+   __SYSFS_SPRSETUP_READ_WRITE(NAME, ADDRESS, ppc_enable_pmcs()) \
+   __SYSFS_SPRSETUP_SHOW_STORE(NAME)
+#define SYSFS_SPRSETUP(NAME, ADDRESS) \
+   __SYSFS_SPRSETUP_READ_WRITE(NAME, ADDRESS, ) \
+   __SYSFS_SPRSETUP_SHOW_STORE(NAME)
+
+#define SYSFS_SPRSETUP_SHOW_STORE(NAME) \
+   __SYSFS_SPRSETUP_SHOW_STORE(NAME)
 
 /* Let's define all possible registers, we'll only hook up the ones
  * that are implemented on the current processor
-- 
1.7.10.4

___
Linuxppc-dev mailing list
Linuxppc-dev@lists.ozlabs.org
https://lists.ozlabs.org/listinfo/linuxppc-dev

[PATCH 1/1] powerpc: correct DSCR during TM context switch

2014-06-03 Thread Sam Bobroff
Correct the DSCR SPR becoming temporarily corrupted when a task is
context switched when within a transaction. It is corrected when
the transaction is aborted (which will happen after a context switch)
but if the task has suspended (TSUSPEND) the transaction the incorrect
value can be seen.

The problem is caused by saving a thread's DSCR after it has already
been reverted to the CPU's default value:

__switch_to() calls __switch_to_tm()
which calls tm_reclaim_task()
which calls tm_reclaim_thread()
which calls tm_reclaim() where the DSCR is reset
__switch_to() calls _switch
_switch() saves the DSCR to thread.dscr

The fix is to treat the DSCR similarly to the TAR and save it early
in __switch_to().

The program below will expose the problem:

  #include 
  #include 
  #include 
  #include 
  #include 

  #define TBEGIN  ".long 0x7C00051D ;"
  #define TEND".long 0x7C00055D ;"
  #define TCHECK  ".long 0x7C00059C ;"
  #define TSUSPEND".long 0x7C0005DD ;"
  #define TRESUME ".long 0x7C2005DD ;"
  #define SPRN_TEXASR 0x82
  #define SPRN_DSCR   0x03

  int main(void) {
uint64_t i = 0, rv, dscr1 = 1, dscr2, texasr;

for (;;) {
  rv = 1;
  asm __volatile__ (
  "ld  3, %[dscr1];"
  "mtspr   %[sprn_dscr], 3;"
  TBEGIN
  "beq 1f;"
  TSUSPEND
  "2: ;"
  TCHECK
  "bc  4, 0, 2b;"
  "mfspr   3, %[sprn_dscr];"
  "std 3, %[dscr2];"
  "mfspr   3, %[sprn_texasr];"
  "std 3, %[texasr];"
  TRESUME
  TEND
  "li  %[rv], 0;"
  "1: ;"
  : [rv]"=r"(rv), [dscr2]"=m"(dscr2), [texasr]"=m"(texasr)
  : [dscr1]"m"(dscr1)
  , [sprn_dscr]"i"(SPRN_DSCR), [sprn_texasr]"i"(SPRN_TEXASR)
  : "memory", "r3"
  );
  assert(rv);
  if ((texasr >> 56) == TM_CAUSE_RESCHED) {
putchar('!');
fflush(stdout);
i++;
  }
  else {
putchar('.');
fflush(stdout);
  }
  if (dscr2 != dscr1) {
printf("\n DSCR incorrect: 0x%lx (expecting 0x%lx)\n", dscr2, 
dscr1);
exit(EXIT_FAILURE);
  }
  if (i > 10) {
printf("\n DSCR TM context switching seems OK.\n");
exit(EXIT_SUCCESS);
  }
}
  }

Signed-off-by: Sam Bobroff 
---
 arch/powerpc/include/asm/switch_to.h |6 --
 arch/powerpc/kernel/entry_64.S   |6 --
 arch/powerpc/kernel/process.c|8 
 3 files changed, 8 insertions(+), 12 deletions(-)

diff --git a/arch/powerpc/include/asm/switch_to.h 
b/arch/powerpc/include/asm/switch_to.h
index 2737f46..3efd0e5 100644
--- a/arch/powerpc/include/asm/switch_to.h
+++ b/arch/powerpc/include/asm/switch_to.h
@@ -16,13 +16,15 @@ struct thread_struct;
 extern struct task_struct *_switch(struct thread_struct *prev,
   struct thread_struct *next);
 #ifdef CONFIG_PPC_BOOK3S_64
-static inline void save_tar(struct thread_struct *prev)
+static inline void save_early_sprs(struct thread_struct *prev)
 {
if (cpu_has_feature(CPU_FTR_ARCH_207S))
prev->tar = mfspr(SPRN_TAR);
+   if (cpu_has_feature(CPU_FTR_DSCR))
+   prev->dscr = mfspr(SPRN_DSCR);
 }
 #else
-static inline void save_tar(struct thread_struct *prev) {}
+static inline void save_early_sprs(struct thread_struct *prev) {}
 #endif
 
 extern void enable_kernel_fp(void);
diff --git a/arch/powerpc/kernel/entry_64.S b/arch/powerpc/kernel/entry_64.S
index 662c6dd..a107f4a 100644
--- a/arch/powerpc/kernel/entry_64.S
+++ b/arch/powerpc/kernel/entry_64.S
@@ -432,12 +432,6 @@ BEGIN_FTR_SECTION
std r24,THREAD_VRSAVE(r3)
 END_FTR_SECTION_IFSET(CPU_FTR_ALTIVEC)
 #endif /* CONFIG_ALTIVEC */
-#ifdef CONFIG_PPC64
-BEGIN_FTR_SECTION
-   mfspr   r25,SPRN_DSCR
-   std r25,THREAD_DSCR(r3)
-END_FTR_SECTION_IFSET(CPU_FTR_DSCR)
-#endif
and.r0,r0,r22
beq+1f
andcr22,r22,r0
diff --git a/arch/powerpc/kernel/process.c b/arch/powerpc/kernel/process.c
index e247898..8d2065e 100644
--- a/arch/powerpc/kernel/process.c
+++ b/arch/powerpc/kernel/process.c
@@ -771,15 +771,15 @@ struct task_struct *__switch_to(struct task_struct *prev,
 
WARN_ON(!irqs_disabled());
 
-   /* Back up the TAR across context switches.
+   /* Back up the TAR and DSCR across context switches.
 * Note that the TAR is not available for use in the kernel.  (To
 * provide this, the TAR should be backed up/restored on exception
 * entry/exit instead, and be in pt_regs.  FIXME, this should be in
 * pt_regs anyway (for debug).)
-* Save the TAR here before we do tr

Re: [PATCH 1/1] powerpc: correct DSCR during TM context switch

2014-06-04 Thread Sam Bobroff
On 04/06/14 20:03, Michael Neuling wrote:
> On Wed, 2014-06-04 at 17:31 +1000, Michael Ellerman wrote:
>> Hi Sam,
>>
>> Comments inline ..
> 
> Ditto

Responses inline...

>> On Wed, 2014-06-04 at 13:33 +1000, Sam Bobroff wrote:
>>> Correct the DSCR SPR becoming temporarily corrupted when a task is
>>> context switched when within a transaction. It is corrected when
>>> the transaction is aborted (which will happen after a context switch)
>>> but if the task has suspended (TSUSPEND) the transaction the incorrect
>>> value can be seen.
>>
>> I don't quite follow this description. How is it corrected when the 
>> transaction
>> is aborted, and when does that usually happen? If that happens the task can't
>> ever see the corrupted value?
>>
>> To hit the suspended case, the task starts a transaction, suspends it, is 
>> then
>> context switched out and back in, and at that point it can see the wrong 
>> value?
> 
> Yep, that's it and it's corrupted until the transaction is rolled back
> (normally at the tresume).  At the tresume it gets rolled back to the
> checkpointed value at tbegin and is no longer corrupt.
>

I'll re-work the explanation to be clearer about how it becomes corrupt and how 
it is corrected.

>>> The problem is caused by saving a thread's DSCR afterNo it's lost at that 
>>> point as we've not saved it and it was overwritten when we did the 
>>> treclaim.   it has already
>>> been reverted to the CPU's default value:
>>>
>>> __switch_to() calls __switch_to_tm()
>>> which calls tm_reclaim_task()
>>> which calls tm_reclaim_thread()
>>> which calls tm_reclaim() where the DSCR is reset
>>
>> Where the DSCR is set to DSCR_DEFAULT ? Or now PACA_DSCR since your previous
>> patches?
>>
>> Could we instead fix the bug there by reverting to the thread's DSCR value?
> 
> We really need to save it earlier, before the treclaim which will
> override it.

I'll try to improve this explanation as well.
 
>>> __switch_to() calls _switch
>>> _switch() saves the DSCR to thread.dscrTBEGIN
>>>
>>> The fix is to treat the DSCR similarly to the TAR and save it early
>>> in __switch_to().
>>>
>>> The program below will expose the problem:
>>
>>
>> Can you drop this in tools/testing/selftests/powerpc/tm ?
>>
>> You'll need to create that directory, you can ape the Makefile from the pmu
>> directory, it should be fairly obvious. See the pmu tests for how to 
>> integrate
>> with the test harness etc., or bug me if it's not straight forward.

Will do :-)

>>> diff --git a/arch/powerpc/include/asm/switch_to.h 
>>> b/arch/powerpc/include/asm/switch_to.h
>>> index 2737f46..3efd0e5 100644
>>> --- a/arch/powerpc/include/asm/switch_to.h
>>> +++ b/arch/powerpc/include/asm/switch_to.h
>>> @@ -16,13 +16,15 @@ struct thread_struct;
>>>  extern struct task_struct *_switch(struct thread_struct *prev,
>>>struct thread_struct *next);
>>>  #ifdef CONFIG_PPC_BOOK3S_64
>>> -static inline void save_tar(struct thread_struct *prev)
>>> +static inline void save_early_sprs(struct thread_struct *prev)
>>>  {
>>> if (cpu_has_feature(CPU_FTR_ARCH_207S))
>>> prev->tar = mfspr(SPRN_TAR);
>>> +   if (cpu_has_feature(CPU_FTR_DSCR))
>>> +   prev->dscr = mfspr(SPRN_DSCR);
>>>  }
>>
>> Are we going to end up saving more SPRs in this code? What makes the TAR & 
>> DSCR
>> special vs everything else?
> 
> There are only a limited set of SPRs that TM checkpoints.  The full list
> is CR, LR, CTR, FPSCR, AMR, PPR, VRSAVE, VSCR, DSCR, and TAR.  
> 
> http://www.scribd.com/doc/142877680/PowerISA-v2-07#outer_page_826
> 
> CR, LR, CTR, PPR are handled really early in the exception handler
> 
> FPSCR, VSCR are done in the FP/VMX/VSX code.
> 
> AMR we don't care about.
> 
> That just leaves the DSCR and the TAR for here
> 
> ... and the VRSAVE.  Sam: did you have a patch to save that one early
> too?  I think we talked about it but forgot, or did we decide that it's
> always broken anyway so we don't care? :-D

I thought we'd decided that VRSAVE was already probably broken ;-)

I haven't tested VRSAVE yet so we don't know if it's actually getting corrupted 
in this situation (although it seems likely), and from a quick look at the code 
it's not being treated like DSCR

[PATCH 1/1 v2] powerpc: Correct DSCR during TM context switch

2014-06-04 Thread Sam Bobroff
Correct the DSCR SPR becoming temporarily corrupted if a task is
context switched during a transaction.

The problem occurs while suspending the task and is caused by saving
the DSCR to thread.dscr after it has already been set to the CPU's
default value:

__switch_to() calls __switch_to_tm()
which calls tm_reclaim_task()
which calls tm_reclaim_thread()
which calls tm_reclaim()
where the DSCR is set to the CPU's default
__switch_to() calls _switch()
where thread.dscr is set to the DSCR

When the task is resumed, it's transaction will be doomed (as usual)
and the DSCR SPR will be corrupted, although the checkpointed value
will be correct. Therefore the DSCR will be immediately corrected by
the transaction aborting, unless it has been suspended. In that case
the incorrect value can be seen by the task until it resumes the
transaction.

The fix is to treat the DSCR similarly to the TAR and save it early
in __switch_to().

A program exposing the problem is added to the kernel self tests as:
tools/testing/selftests/powerpc/tm/tm-resched-dscr.

Signed-off-by: Sam Bobroff 
---
Changes:
v2:
* Reworked commit message.
* Adjusted test code and added it to kernel self tests.
---
 arch/powerpc/include/asm/switch_to.h   |6 +-
 arch/powerpc/kernel/entry_64.S |6 --
 arch/powerpc/kernel/process.c  |8 +-
 tools/testing/selftests/powerpc/Makefile   |2 +-
 tools/testing/selftests/powerpc/tm/Makefile|   15 
 .../testing/selftests/powerpc/tm/tm-resched-dscr.c |   90 
 6 files changed, 114 insertions(+), 13 deletions(-)
 create mode 100644 tools/testing/selftests/powerpc/tm/Makefile
 create mode 100644 tools/testing/selftests/powerpc/tm/tm-resched-dscr.c

diff --git a/arch/powerpc/include/asm/switch_to.h 
b/arch/powerpc/include/asm/switch_to.h
index 2737f46..3efd0e5 100644
--- a/arch/powerpc/include/asm/switch_to.h
+++ b/arch/powerpc/include/asm/switch_to.h
@@ -16,13 +16,15 @@ struct thread_struct;
 extern struct task_struct *_switch(struct thread_struct *prev,
   struct thread_struct *next);
 #ifdef CONFIG_PPC_BOOK3S_64
-static inline void save_tar(struct thread_struct *prev)
+static inline void save_early_sprs(struct thread_struct *prev)
 {
if (cpu_has_feature(CPU_FTR_ARCH_207S))
prev->tar = mfspr(SPRN_TAR);
+   if (cpu_has_feature(CPU_FTR_DSCR))
+   prev->dscr = mfspr(SPRN_DSCR);
 }
 #else
-static inline void save_tar(struct thread_struct *prev) {}
+static inline void save_early_sprs(struct thread_struct *prev) {}
 #endif
 
 extern void enable_kernel_fp(void);
diff --git a/arch/powerpc/kernel/entry_64.S b/arch/powerpc/kernel/entry_64.S
index 662c6dd..a107f4a 100644
--- a/arch/powerpc/kernel/entry_64.S
+++ b/arch/powerpc/kernel/entry_64.S
@@ -432,12 +432,6 @@ BEGIN_FTR_SECTION
std r24,THREAD_VRSAVE(r3)
 END_FTR_SECTION_IFSET(CPU_FTR_ALTIVEC)
 #endif /* CONFIG_ALTIVEC */
-#ifdef CONFIG_PPC64
-BEGIN_FTR_SECTION
-   mfspr   r25,SPRN_DSCR
-   std r25,THREAD_DSCR(r3)
-END_FTR_SECTION_IFSET(CPU_FTR_DSCR)
-#endif
and.r0,r0,r22
beq+1f
andcr22,r22,r0
diff --git a/arch/powerpc/kernel/process.c b/arch/powerpc/kernel/process.c
index e247898..8d2065e 100644
--- a/arch/powerpc/kernel/process.c
+++ b/arch/powerpc/kernel/process.c
@@ -771,15 +771,15 @@ struct task_struct *__switch_to(struct task_struct *prev,
 
WARN_ON(!irqs_disabled());
 
-   /* Back up the TAR across context switches.
+   /* Back up the TAR and DSCR across context switches.
 * Note that the TAR is not available for use in the kernel.  (To
 * provide this, the TAR should be backed up/restored on exception
 * entry/exit instead, and be in pt_regs.  FIXME, this should be in
 * pt_regs anyway (for debug).)
-* Save the TAR here before we do treclaim/trecheckpoint as these
-* will change the TAR.
+* Save the TAR and DSCR here before we do treclaim/trecheckpoint as
+* these will change them.
 */
-   save_tar(&prev->thread);
+   save_early_sprs(&prev->thread);
 
__switch_to_tm(prev);
 
diff --git a/tools/testing/selftests/powerpc/Makefile 
b/tools/testing/selftests/powerpc/Makefile
index 316194f..e1544e8 100644
--- a/tools/testing/selftests/powerpc/Makefile
+++ b/tools/testing/selftests/powerpc/Makefile
@@ -13,7 +13,7 @@ CFLAGS := -Wall -O2 -flto -Wall -Werror 
-DGIT_VERSION='"$(GIT_VERSION)"' -I$(CUR
 
 export CC CFLAGS
 
-TARGETS = pmu copyloops
+TARGETS = pmu copyloops tm
 
 endif
 
diff --git a/tools/testing/selftests/powerpc/tm/Makefile 
b/tools/testing/selftests/powerpc/tm/Makefile
new file mode 100644
index 000..51267f4
--- /dev/null
+++ b/tools/testing/selftests/powerpc/tm/Makefile
@@ -0,0 +1,15 @@
+PROGS := tm-resch

[PATCH 1/1] selftests/powerpc: fix TARGETS in powerpc selftests makefile

2014-06-24 Thread Sam Bobroff
This patch changes the name of a make variable (TARGETS) to prevent it
from colliding with a value set by the user on the command line (as
they are recommended to do by tools/testing/selftests/README.txt).

Before this patch, "make -C tools/testing/selftests TARGETS=powerpc"
would fail.

Signed-off-by: Sam Bobroff 
---
 tools/testing/selftests/powerpc/Makefile | 8 
 1 file changed, 4 insertions(+), 4 deletions(-)

diff --git a/tools/testing/selftests/powerpc/Makefile 
b/tools/testing/selftests/powerpc/Makefile
index 54833a7..84795c0 100644
--- a/tools/testing/selftests/powerpc/Makefile
+++ b/tools/testing/selftests/powerpc/Makefile
@@ -13,22 +13,22 @@ CFLAGS := -Wall -O2 -flto -Wall -Werror 
-DGIT_VERSION='"$(GIT_VERSION)"' -I$(CUR
 
 export CC CFLAGS
 
-TARGETS = pmu copyloops mm tm
+SUB_TARGETS = pmu copyloops mm tm
 
 endif
 
 all:
-   @for TARGET in $(TARGETS); do \
+   @for TARGET in $(SUB_TARGETS); do \
$(MAKE) -C $$TARGET all; \
done;
 
 run_tests: all
-   @for TARGET in $(TARGETS); do \
+   @for TARGET in $(SUB_TARGETS); do \
$(MAKE) -C $$TARGET run_tests; \
done;
 
 clean:
-   @for TARGET in $(TARGETS); do \
+   @for TARGET in $(SUB_TARGETS); do \
$(MAKE) -C $$TARGET clean; \
done;
rm -f tags
-- 
1.9.0

___
Linuxppc-dev mailing list
Linuxppc-dev@lists.ozlabs.org
https://lists.ozlabs.org/listinfo/linuxppc-dev

Re: [PATCH V3 0/3] Add new PowerPC specific ELF core notes

2014-07-17 Thread Sam Bobroff
On 17/07/14 21:14, Michael Neuling wrote:
> 
> On Jul 17, 2014 9:11 PM, "Benjamin Herrenschmidt"
> mailto:b...@kernel.crashing.org>> wrote:
>>
>> > >
>> > >> Outstanding Issues
>> > >> ==
>> > >> (1) Running DSCR register value inside a transaction does not
> seem to be saved
>> > >> at thread.dscr when the process stops for ptrace examination.
>> > >
>> > > Hey Ben,
>> > >
>> > > Any updates on this patch series ?
>> >
>> > Ben,
>> >
>> > Any updates on this patch series ?
>>
>> I haven't had a chance to review yet, I was hoping somebody else would..
>>
>> Have you made any progress vs. the DSCR outstanding issue mentioned
>> above ?
> 
> The DSCR issue should be resolved with Sam Bobroff's recent  DSCR
> fixes.  I've not tested them though.
> 
> Actually... Sam did you review this series?
> 
> Mikey
> 

I did, and applying "powerpc: Correct DSCR during TM context switch"
corrected the DSCR value in the test program (the one in the patch notes
for this series).

(In fact, IIRC, the reason for my patch set was the bug exposed by this
one ;-)

Cheers,
Sam.


___
Linuxppc-dev mailing list
Linuxppc-dev@lists.ozlabs.org
https://lists.ozlabs.org/listinfo/linuxppc-dev

Re: [PATCH V3 0/3] Add new PowerPC specific ELF core notes

2014-07-23 Thread Sam Bobroff
On 24/05/14 01:15, Anshuman Khandual wrote:
>   This patch series adds five new ELF core note sections which can be
> used with existing ptrace request PTRACE_GETREGSET/SETREGSET for accessing
> various transactional memory and miscellaneous register sets on PowerPC
> platform. Please find a test program exploiting these new ELF core note
> types on a POWER8 system.
> 
> RFC: https://lkml.org/lkml/2014/4/1/292
> V1:  https://lkml.org/lkml/2014/4/2/43
> V2:  https://lkml.org/lkml/2014/5/5/88
> 
> Changes in V3
> =
> (1) Added two new error paths in every TM related get/set functions when 
> regset
> support is not present on the system (ENODEV) or when the process does not
> have any transaction active (ENODATA) in the context
> 
> (2) Installed the active hooks for all the newly added regset core note types
> 
> Changes in V2
> =
> (1) Removed all the power specific ptrace requests corresponding to new 
> NT_PPC_*
> elf core note types. Now all the register sets can be accessed from ptrace
> through PTRACE_GETREGSET/PTRACE_SETREGSET using the individual NT_PPC* 
> core
> note type instead
> (2) Fixed couple of attribute values for REGSET_TM_CGPR register set
> (3) Renamed flush_tmreg_to_thread as flush_tmregs_to_thread
> (4) Fixed 32 bit checkpointed GPR support
> (5) Changed commit messages accordingly
> 
> Outstanding Issues
> ==
> (1) Running DSCR register value inside a transaction does not seem to be saved
> at thread.dscr when the process stops for ptrace examination.

Since this is fixed by 96d016108640bc2b7fb0ee800737f80923847294, which
is already upstream, you might want to rebase and re-test. It should
pass and then you can remove the outstanding issues :-)

> 
> Test programs

program

When I posted the patch I mentioned above, I was asked to move the test
code into the powerpc kernel selftests so you may want to do this too.

Also, your test program covers everything mine did and more so you might
want to remove mine if you do add this to the selftests.

> =
> #include 
> #include 
> #include 
> #include 
> #include 
> #include 
> #include 
> #include 
> #include 
> #include 

You should include sys/wait.h for waitpid().

> #include 
> #include 
> #include 
> 
> typedef long long u64;
> typedef unsigned int u32;
> typedef __vector128 vector128;
> 
> /* TM CFPR */
> struct tm_cfpr {
>   u64 fpr[32];
>   u64 fpscr;
> };
> 
> /* TM CVMX */
> struct tm_cvmx {
> vector128 vr[32] __attribute__((aligned(16)));
> vector128 vscr __attribute__((aligned(16)));
>   u32 vrsave; 
> };
> 
> /* TM SPR */
> struct tm_spr_regs {
>   u64 tm_tfhar;
>   u64 tm_texasr;
>   u64 tm_tfiar;
>   u64 tm_orig_msr;
>   u64 tm_tar;
>   u64 tm_ppr;
>   u64 tm_dscr;
> };
> 
> /* Miscellaneous registers */
> struct misc_regs {
>   u64 dscr;
>   u64 ppr;
>   u64 tar;
> };
> 
> /* TM instructions */
> #define TBEGIN  ".long 0x7C00051D ;"
> #define TEND".long 0x7C00055D ;"
> 
> /* SPR number */
> #define SPRN_DSCR 0x3
> #define SPRN_TAR  815
> 
> /* ELF core notes */
> #define NT_PPC_TM_SPR  0x103   /* PowerPC transactional memory 
> special registers */
> #define NT_PPC_TM_CGPR 0x104   /* PowerpC transactional memory 
> checkpointed GPR */
> #define NT_PPC_TM_CFPR 0x105   /* PowerPC transactional memory 
> checkpointed FPR */
> #define NT_PPC_TM_CVMX 0x106   /* PowerPC transactional memory 
> checkpointed VMX */
> #define NT_PPC_MISC0x107   /* PowerPC miscellaneous registers */
> 
> #define VAL1 1
> #define VAL2 2
> #define VAL3 3
> #define VAL4 4
> 
> int main(int argc, char *argv[])
> {
>   struct tm_spr_regs *tmr1;
>   struct pt_regs *pregs1, *pregs2;
>   struct tm_cfpr *fpr, *fpr1;
>   struct misc_regs *dbr1;
>   struct iovec iov;
> 
>   pid_t child;
>   int ret = 0, status = 0, i = 0, flag = 1;

status, i and flags are all unused.

> 
>   pregs2 = (struct pt_regs *) malloc(sizeof(struct pt_regs));
>   fpr = (struct tm_cfpr *) malloc(sizeof(struct tm_cfpr));
> 
>   child = fork();
>   if (child < 0) {
>   printf("fork() failed \n");
>   exit(-1);
>   }
> 
>   /* Child code */
>   if (child == 0) {
>   asm __volatile__(
>   "6: ;"  /* TM checkpointed values */
>   "li 1, %[val1];"/* GPR[1] */
>   ".long 0x7C210166;" /* FPR[1] */
>   "li 2, %[val2];"/* GPR[2] */
>   ".long 0x7C420166;" /* FPR[2] */
>   "mtspr %[tar], 1;"  /* TAR */
>   "mtspr %[dscr], 2;" /* DSCR */
>   "1: ;"
>   TBEGIN  /* TM running values */

Re: [PATCH V3 2/3] powerpc, ptrace: Enable support for transactional memory register sets

2014-07-23 Thread Sam Bobroff
On 24/05/14 01:15, Anshuman Khandual wrote:
> This patch enables get and set of transactional memory related register
> sets through PTRACE_GETREGSET/PTRACE_SETREGSET interface by implementing
> four new powerpc specific register sets i.e REGSET_TM_SPR, REGSET_TM_CGPR,
> REGSET_TM_CFPR, REGSET_CVMX support corresponding to these following new
> ELF core note types added previously in this regard.
> 
>   (1) NT_PPC_TM_SPR
>   (2) NT_PPC_TM_CGPR
>   (3) NT_PPC_TM_CFPR
>   (4) NT_PPC_TM_CVMX
> 
> Signed-off-by: Anshuman Khandual 

Hi Anshuman,

I'm not Ben but I've reviewed your patch as well as I can and I have
some comments that might be useful to you.

First of all, I couldn't get this to compile without CONFIG_VSX and
CONFIG_PPC_TRANSACTIONAL_MEM defined: there are obvious typos ("esle"
instead of "else") and references to fields that aren't defined for
those cases. I haven't mentioned any of those issues below as the
compiler will do that but you should definitely test those configurations.

Also some of the code seems to assume that if CONFIG_VSX is defined then
CONFIG_PPC_TRANSACTIONAL_MEM must also be defined, but that isn't the
case (it's the other way round: CONFIG_PPC_TRANSACTIONAL_MEM implies
CONFIG_VSX).

> ---
>  arch/powerpc/include/asm/switch_to.h |   8 +
>  arch/powerpc/kernel/process.c|  24 ++
>  arch/powerpc/kernel/ptrace.c | 792 
> +--
>  3 files changed, 795 insertions(+), 29 deletions(-)
> 
> diff --git a/arch/powerpc/include/asm/switch_to.h 
> b/arch/powerpc/include/asm/switch_to.h
> index 0e83e7d..2737f46 100644
> --- a/arch/powerpc/include/asm/switch_to.h
> +++ b/arch/powerpc/include/asm/switch_to.h
> @@ -80,6 +80,14 @@ static inline void flush_spe_to_thread(struct task_struct 
> *t)
>  }
>  #endif
>  
> +#ifdef CONFIG_PPC_TRANSACTIONAL_MEM
> +extern void flush_tmregs_to_thread(struct task_struct *);
> +#else
> +static inline void flush_tmregs_to_thread(struct task_struct *t)
> +{
> +}
> +#endif /* CONFIG_PPC_TRANSACTIONAL_MEM */
> +
>  static inline void clear_task_ebb(struct task_struct *t)
>  {
>  #ifdef CONFIG_PPC_BOOK3S_64
> diff --git a/arch/powerpc/kernel/process.c b/arch/powerpc/kernel/process.c
> index 31d0215..e247898 100644
> --- a/arch/powerpc/kernel/process.c
> +++ b/arch/powerpc/kernel/process.c
> @@ -695,6 +695,30 @@ static inline void __switch_to_tm(struct task_struct 
> *prev)
>   }
>  }
>  
> +void flush_tmregs_to_thread(struct task_struct *tsk)
> +{
> + /*
> +  * If task is not current, it should have been flushed
> +  * already to it's thread_struct during __switch_to().
> +  */
> + if (tsk != current)
> + return;
> +
> + preempt_disable();
> + if (tsk->thread.regs) {
> + /*
> +  * If we are still current, the TM state need to
> +  * be flushed to thread_struct as it will be still
> +  * present in the current cpu.
> +  */
> + if (MSR_TM_ACTIVE(tsk->thread.regs->msr)) {
> + __switch_to_tm(tsk);
> + tm_recheckpoint_new_task(tsk);

There is at least one other usage of this pair of calls in order to
"flush" the TM state (in arch_dup_task_struct()), so rather than copying
it you might want to create a new function and call it from both places.
(And include the nice comment from arch_dup_task_struct() that explains
how it works and why.)

> + }
> + }
> + preempt_enable();
> +}
> +
>  /*
>   * This is called if we are on the way out to userspace and the
>   * TIF_RESTORE_TM flag is set.  It checks if we need to reload
> diff --git a/arch/powerpc/kernel/ptrace.c b/arch/powerpc/kernel/ptrace.c
> index 2e3d2bf..17642ef 100644
> --- a/arch/powerpc/kernel/ptrace.c
> +++ b/arch/powerpc/kernel/ptrace.c
> @@ -357,6 +357,17 @@ static int gpr_set(struct task_struct *target, const 
> struct user_regset *regset,
>   return ret;
>  }
>  
> +/*
> + * When any transaction is active, "thread_struct->transact_fp" holds
> + * the current running value of all FPR registers and "thread_struct->
> + * fp_state" holds the last checkpointed FPR registers state for the
> + * current transaction.
> + *
> + * struct data {
> + *   u64 fpr[32];
> + *   u64 fpscr;
> + * };

It would be nice to say why you've included "struct data" in the comment.

> + */
>  static int fpr_get(struct task_struct *target, const struct user_regset 
> *regset,
>  unsigned int pos, unsigned int count,
>  void *kbuf, void __user *ubuf)
> @@ -365,21 +376,41 @@ static int fpr_get(struct task_struct *target, const 
> struct user_regset *regset,
>   u64 buf[33];
>   int i;
>  #endif
> - flush_fp_to_thread(target);
> + if (MSR_TM_ACTIVE(target->thread.regs->msr)) {
> + flush_fp_to_thread(target);
> + flush_altivec_to_thread(target);
> + flush_tmregs_to_thread(target);
> + } else {
> +

suspicious RCU usage clockevents_lock, tick_broadcast_lock, hrtimer_bases.lock

2015-02-12 Thread Sam Bobroff
Hello,

I'm receiving this while booting a vanilla 3.19 kernel on a Power 8 machine:

[2.522179] device-mapper: uevent: version 1.0.3
[2.522741] device-mapper: ioctl: 4.29.0-ioctl (2014-10-28)
initialised: dm-de...@redhat.com
[2.543590]
[2.543630] ===
[2.543709] [ INFO: suspicious RCU usage. ]
[2.543758] 3.19.0samb #2 Not tainted
[2.543802] ---
[2.543847] include/trace/events/timer.h:186 suspicious
rcu_dereference_check() usage!
[2.543940]
[2.543940] other info that might help us debug this:
[2.543940]
[2.544035]
[2.544035] RCU used illegally from idle CPU!
[2.544035] rcu_scheduler_active = 1, debug_locks = 0
[2.544154] RCU used illegally from extended quiescent state!
[2.544234] 3 locks held by swapper/1/0:
[2.544284]  #0:  (clockevents_lock){-.}, at:
[] .clockevents_notify+0x5c/0x320
[2.544464]  #1:  (tick_broadcast_lock){-.-...}, at:
[] .tick_broadcast_oneshot_control+0xe4/0x530
[2.544654]  #2:  (hrtimer_bases.lock#2){-.-...}, at:
[] .__hrtimer_start_range_ns+0x124/0x6e0
[2.544843]
[2.544843] stack backtrace:
[2.544904] CPU: 1 PID: 0 Comm: swapper/1 Not tainted 3.19.0samb #2
[2.544986] Call Trace:
[2.545023] [c00fdf6f7770] [c0faf118]
.dump_stack+0x98/0xd4 (unreliable)
[2.545124] [c00fdf6f77f0] [c0152b78]
.lockdep_rcu_suspicious+0x138/0x180
[2.545225] [c00fdf6f7880] [c0191374]
.enqueue_hrtimer+0x1c4/0x300
[2.545325] [c00fdf6f7910] [c019276c]
.__hrtimer_start_range_ns+0x1fc/0x6e0
[2.545425] [c00fdf6f7a10] [c01a8e90] .bc_set_next+0xc0/0xf0
[2.545510] [c00fdf6f7aa0] [c01a51f0]
.clockevents_program_event+0x100/0x1f0
[2.545607] [c00fdf6f7b40] [c01a6bac]
.tick_broadcast_set_event+0x6c/0x120
[2.545705] [c00fdf6f7bd0] [c01a7c94]
.tick_broadcast_oneshot_control+0x2b4/0x530
[2.545802] [c00fdf6f7ca0] [c01a4818]
.clockevents_notify+0x2a8/0x320
[2.545898] [c00fdf6f7d70] [c01484f4]
.cpu_startup_entry+0x404/0x730
[2.545995] [c00fdf6f7ec0] [c0044314]
.start_secondary+0x3a4/0x460
[2.546092] [c00fdf6f7f90] [c0008bfc]
start_secondary_prolog+0x10/0x14
[2.546555] Registering IBM Power 842 compression driver

Cheers,
Sam.

___
Linuxppc-dev mailing list
Linuxppc-dev@lists.ozlabs.org
https://lists.ozlabs.org/listinfo/linuxppc-dev

Re: [PATCH] powerpc/powernv: Check image loaded or not before calling flash

2015-02-12 Thread Sam Bobroff
On 13/02/15 08:27, Benjamin Herrenschmidt wrote:
> On Thu, 2015-02-12 at 15:23 +0530, Vasant Hegde wrote:
>> Present code checks for update_flash_data in opal_flash_term_callback().
>> update_flash_data has been statically initialized to zero, and that
>> is the value of FLASH_IMG_READY. Also code update initialization happens
>> during subsys init.
> 
> Please statically initialize it to a sane value instead.

I've tested this patch and it works for me (the message is suppressed)
but I agree with Ben that it seems cleaner to use a static initializer.

>> So if reboot is issued before the subsys init stage then we endup displaying
>> "Flashing new firmware" message.. which may confuse end user.
>>
>> This patch adds additional validation to make sure image is actually loaded
>> or not.
>>
>> Reported-by: Sam Bobroff 
>> Signed-off-by: Vasant Hegde 
>> ---
>>  arch/powerpc/platforms/powernv/opal-flash.c |3 ++-
>>  1 file changed, 2 insertions(+), 1 deletion(-)
>>
>> diff --git a/arch/powerpc/platforms/powernv/opal-flash.c 
>> b/arch/powerpc/platforms/powernv/opal-flash.c
>> index 5c21d9c..5455cd4 100644
>> --- a/arch/powerpc/platforms/powernv/opal-flash.c
>> +++ b/arch/powerpc/platforms/powernv/opal-flash.c
>> @@ -319,7 +319,8 @@ void opal_flash_term_callback(void)
>>  {
>>  struct cpumask mask;
>>  
>> -if (update_flash_data.status != FLASH_IMG_READY)
>> +if (update_flash_data.status != FLASH_IMG_READY ||
>> +image_data.status != IMAGE_READY)
>>  return;
>>  
>>  pr_alert("FLASH: Flashing new firmware\n");
> 
> 
> ___
> Linuxppc-dev mailing list
> Linuxppc-dev@lists.ozlabs.org
> https://lists.ozlabs.org/listinfo/linuxppc-dev
> 

___
Linuxppc-dev mailing list
Linuxppc-dev@lists.ozlabs.org
https://lists.ozlabs.org/listinfo/linuxppc-dev

Re: [RFC] powerpc: use ticket spin lock for !CONFIG_PPC_SPLPAR

2015-03-15 Thread Sam Bobroff
On 13/03/15 18:14, Benjamin Herrenschmidt wrote:
> On Fri, 2015-03-13 at 18:09 +1100, Michael Ellerman wrote:
>> On Thu, 2015-03-12 at 22:13 +1100, Benjamin Herrenschmidt wrote:
>>> On Thu, 2015-03-12 at 18:55 +0800, Kevin Hao wrote:
 I know Torsten Duwe has tried to add the ticket spinlock for powerpc
 one year ago [1]. But it make no progress due to the conflict between
 PPC_SPLPAR and lockref. We still don't find a better way to handle
 this. But instead of waiting forever for a perfect solution, can't we
 just use the ticket spinlock for the !CONFIG_PPC_SPLPAR?

 This is a very rough patch based on arm64 codes. I want to make sure
 that this is acceptable before going step further. This just passed
 build and boot test on a fsl t4240rdb board. I have done a simple
 performance benchmark by running the following command ten times before
 and after applying this patch:
./perf bench sched messaging

 BeforeAfter
 Averaged total time [sec]:  0.403 0.367

 So we can see a ~9% performance enhancing. This patch depends on this
 one [2].
>>>
>>> I would do the ifdef'ing differently, something like
>>>
>>> CONFIG_PPC_HAS_LOCK_OWNER
>>>
>>> CONFIG_PPC_TICKET_LOCKS depends on !PPC_HAS_LOCK_OWNER
>>>
>>> and use these two in the code... with SPLPAR select'ing HAS_LOCK_OWNER
>>  
>>
>> Sam was doing some work looking at CONFER, and I think so far he hasn't found
>> that it is much of a benefit. Hopefully he can chime in with his 
>> observations.
>>
>> So the question is, should we just drop the directed CONFER and switch
>> wholesale to ticket locks?

Confer certainly helps in some situations, although I think it will be
strongly workload and configuration dependent and I haven't tried to do
much realistic testing.

Also, being able to use confer depends on both knowing the lock owner
and also that the lock owner's VCPU is not currently running (which is
done via the yield count, which is also part of the lock token at the
moment). With just the yield count we could still use an undirected
confer (which might be almost as good as a directed one), but without it
we would have to drop it entirely.

>> We can still do CONFER on SPLPAR, we just tell the hypervisor we don't know 
>> who
>> to confer to.
>>
>> There is still the drawback that we loose the lock owner for debugging, but
>> that might be worth it. And I think you can get it back with appropriate 
>> debug
>> options?
> 
> Another possibility would be to change the order of the count and the
> lock in lockref and defined it to be packed. That might allow us to have
> our lock contain the ticket first and the count next and "fit" ...
> 
>> cheers
>>
> 
> 
> ___
> Linuxppc-dev mailing list
> Linuxppc-dev@lists.ozlabs.org
> https://lists.ozlabs.org/listinfo/linuxppc-dev
> 

___
Linuxppc-dev mailing list
Linuxppc-dev@lists.ozlabs.org
https://lists.ozlabs.org/listinfo/linuxppc-dev

[PATCH 0/3] powerpc/tm: Abort syscalls in active transactions

2015-03-18 Thread Sam Bobroff

See the first patch for a description of the reasoning behind this
change.

This set includes the change, a kernel selftest for it and
some slight refactoring of the selftest code.


Sam Bobroff (3):
  powerpc/tm: Abort syscalls in active transactions
  selftests/powerpc: Move get_auxv_entry() to harness.c
  selftests/powerpc: Add transactional syscall test

 Documentation/powerpc/transactional_memory.txt  |   33 +++
 arch/powerpc/include/uapi/asm/tm.h  |2 +-
 arch/powerpc/kernel/entry_64.S  |   19 
 tools/testing/selftests/powerpc/harness.c   |   47 ++
 tools/testing/selftests/powerpc/pmu/lib.c   |   47 --
 tools/testing/selftests/powerpc/pmu/lib.h   |1 -
 tools/testing/selftests/powerpc/tm/Makefile |3 +-
 tools/testing/selftests/powerpc/tm/tm-syscall.c |  113 +++
 tools/testing/selftests/powerpc/utils.h |2 +-
 9 files changed, 200 insertions(+), 67 deletions(-)
 create mode 100644 tools/testing/selftests/powerpc/tm/tm-syscall.c

-- 
1.7.10.4

___
Linuxppc-dev mailing list
Linuxppc-dev@lists.ozlabs.org
https://lists.ozlabs.org/listinfo/linuxppc-dev

[PATCH 3/3] selftests/powerpc: Add transactional syscall test

2015-03-18 Thread Sam Bobroff
Check that a syscall made during an active transaction will fail with
the correct failure code and that one made during a suspended
transaction will succeed.

Signed-off-by: Sam Bobroff 
---
 tools/testing/selftests/powerpc/tm/Makefile |3 +-
 tools/testing/selftests/powerpc/tm/tm-syscall.c |  113 +++
 2 files changed, 115 insertions(+), 1 deletion(-)
 create mode 100644 tools/testing/selftests/powerpc/tm/tm-syscall.c

diff --git a/tools/testing/selftests/powerpc/tm/Makefile 
b/tools/testing/selftests/powerpc/tm/Makefile
index 2cede23..d8dab0d 100644
--- a/tools/testing/selftests/powerpc/tm/Makefile
+++ b/tools/testing/selftests/powerpc/tm/Makefile
@@ -1,4 +1,5 @@
-PROGS := tm-resched-dscr
+PROGS := tm-resched-dscr tm-syscall
+CFLAGS:=$(CFLAGS) -mhtm -Wl,-z,now
 
 all: $(PROGS)
 
diff --git a/tools/testing/selftests/powerpc/tm/tm-syscall.c 
b/tools/testing/selftests/powerpc/tm/tm-syscall.c
new file mode 100644
index 000..7c60e53
--- /dev/null
+++ b/tools/testing/selftests/powerpc/tm/tm-syscall.c
@@ -0,0 +1,113 @@
+/* Test the kernel's system call code to ensure that a system call
+ * made from within an active HTM transaction is aborted with the
+ * correct failure code.
+ * Conversely, ensure that a system call made from within a
+ * suspended transaction can succeed.
+ *
+ * It is important to compile with -Wl,-z,now to prevent
+ * lazy symbol resolution from affecting the results.
+ */
+
+#include 
+#include 
+#include 
+#include 
+#include 
+
+#include "utils.h"
+
+#define TM_RETRIES 10
+#define TM_TEST_RUNS 1000
+
+int t_failure_persistent(void)
+{
+   long texasr = __builtin_get_texasr();
+   long failure_code = (texasr >> 56) & 0xff;
+
+   return failure_code & TM_CAUSE_PERSISTENT;
+}
+
+int t_failure_code_syscall(void)
+{
+   long texasr = __builtin_get_texasr();
+   long failure_code = (texasr >> 56) & 0xff;
+
+   return (failure_code & TM_CAUSE_SYSCALL) == TM_CAUSE_SYSCALL;
+}
+
+int t_active_getppid(void)
+{
+   int i;
+
+   for (i = 0; i < TM_RETRIES; i++) {
+   if (__builtin_tbegin(0)) {
+   getppid();
+   __builtin_tend(0);
+   return 1;
+   }
+   if (t_failure_persistent())
+   return 0;
+   }
+   return 0;
+}
+
+int t_active_getppid_test(void)
+{
+   int i;
+
+   for (i = 0; i < TM_TEST_RUNS; i++) {
+   if (t_active_getppid())
+   return 0;
+   if (!t_failure_persistent())
+   return 0;
+   if (!t_failure_code_syscall())
+   return 0;
+   }
+   return 1;
+}
+
+int t_suspended_getppid(void)
+{
+   int i;
+
+   for (i = 0; i < TM_RETRIES; i++) {
+   if (__builtin_tbegin(0)) {
+   __builtin_tsuspend();
+   getppid();
+   __builtin_tresume();
+   __builtin_tend(0);
+   return 1;
+   }
+   if (t_failure_persistent())
+   return 0;
+   }
+   return 0;
+}
+
+int t_suspended_getppid_test(void)
+{
+   int i;
+
+   for (i = 0; i < TM_TEST_RUNS; i++) {
+   if (!t_suspended_getppid())
+   return 0;
+   }
+   return 1;
+}
+
+int tm_syscall(void)
+{
+   SKIP_IF(!((long)get_auxv_entry(AT_HWCAP2) & PPC_FEATURE2_HTM));
+   setbuf(stdout, 0);
+   FAIL_IF(!t_active_getppid_test());
+   printf("%d active transactions correctly aborted.\n", TM_TEST_RUNS);
+   FAIL_IF(!t_suspended_getppid_test());
+   printf("%d suspended transactions succeeded.\n", TM_TEST_RUNS);
+   return 0;
+}
+
+int main(void)
+{
+   return test_harness(tm_syscall, "tm_syscall");
+}
+
-- 
1.7.10.4

___
Linuxppc-dev mailing list
Linuxppc-dev@lists.ozlabs.org
https://lists.ozlabs.org/listinfo/linuxppc-dev

[PATCH 1/3] powerpc/tm: Abort syscalls in active transactions

2015-03-18 Thread Sam Bobroff
This patch changes the syscall handler to doom (tabort) active
transactions when a syscall is made and return immediately without
performing the syscall.

Currently, the system call instruction automatically suspends an
active transaction which causes side effects to persist when an active
transaction fails.

This does change the kernel's behaviour, but in a way that was
documented as unsupported. It doesn't reduce functionality because
syscalls will still be performed after tsuspend. It also provides a
consistent interface and makes the behaviour of user code
substantially the same across powerpc and platforms that do not
support suspended transactions (e.g. x86 and s390).

Performance measurements using
http://ozlabs.org/~anton/junkcode/null_syscall.c
indicate the cost of a system call increases by about 0.5%.

Signed-off-by: Sam Bobroff 
---
 Documentation/powerpc/transactional_memory.txt |   33 
 arch/powerpc/include/uapi/asm/tm.h |2 +-
 arch/powerpc/kernel/entry_64.S |   19 ++
 3 files changed, 37 insertions(+), 17 deletions(-)

diff --git a/Documentation/powerpc/transactional_memory.txt 
b/Documentation/powerpc/transactional_memory.txt
index 9791e98..4167bc2 100644
--- a/Documentation/powerpc/transactional_memory.txt
+++ b/Documentation/powerpc/transactional_memory.txt
@@ -74,22 +74,23 @@ Causes of transaction aborts
 Syscalls
 
 
-Performing syscalls from within transaction is not recommended, and can lead
-to unpredictable results.
-
-Syscalls do not by design abort transactions, but beware: The kernel code will
-not be running in transactional state.  The effect of syscalls will always
-remain visible, but depending on the call they may abort your transaction as a
-side-effect, read soon-to-be-aborted transactional data that should not remain
-invisible, etc.  If you constantly retry a transaction that constantly aborts
-itself by calling a syscall, you'll have a livelock & make no progress.
-
-Simple syscalls (e.g. sigprocmask()) "could" be OK.  Even things like write()
-from, say, printf() should be OK as long as the kernel does not access any
-memory that was accessed transactionally.
-
-Consider any syscalls that happen to work as debug-only -- not recommended for
-production use.  Best to queue them up till after the transaction is over.
+Syscalls made from within an active transaction will not be performed and the
+transaction will be doomed by the kernel with the failure code TM_CAUSE_SYSCALL
+| TM_CAUSE_PERSISTENT.
+
+Syscalls made from within a suspended transaction are performed as normal and
+the transaction is not explicitly doomed by the kernel.  However, what the
+kernel does to perform the syscall may result in the transaction being doomed
+by the hardware.  The syscall is performed in suspended mode so any side
+effects will be persistent, independent of transaction success or failure.  No
+guarantees are provided by the kernel about which syscalls will affect
+transaction success.
+
+Care must be taken when relying on syscalls to abort during active transactions
+if the calls are made via a library.  Libraries may cache values (which may
+give the appearence of success) or perform operations that cause transaction
+failure before entering the kernel (which may produce different failure codes).
+Examples are glibc's getpid() and lazy symbol resolution.
 
 
 Signals
diff --git a/arch/powerpc/include/uapi/asm/tm.h 
b/arch/powerpc/include/uapi/asm/tm.h
index 5d836b7..5047659 100644
--- a/arch/powerpc/include/uapi/asm/tm.h
+++ b/arch/powerpc/include/uapi/asm/tm.h
@@ -11,7 +11,7 @@
 #define TM_CAUSE_RESCHED   0xde
 #define TM_CAUSE_TLBI  0xdc
 #define TM_CAUSE_FAC_UNAV  0xda
-#define TM_CAUSE_SYSCALL   0xd8  /* future use */
+#define TM_CAUSE_SYSCALL   0xd8
 #define TM_CAUSE_MISC  0xd6  /* future use */
 #define TM_CAUSE_SIGNAL0xd4
 #define TM_CAUSE_ALIGNMENT 0xd2
diff --git a/arch/powerpc/kernel/entry_64.S b/arch/powerpc/kernel/entry_64.S
index d180caf2..85bf81d 100644
--- a/arch/powerpc/kernel/entry_64.S
+++ b/arch/powerpc/kernel/entry_64.S
@@ -34,6 +34,7 @@
 #include 
 #include 
 #include 
+#include 
 
 /*
  * System calls.
@@ -145,6 +146,24 @@ END_FW_FTR_SECTION_IFSET(FW_FEATURE_SPLPAR)
andi.   r11,r10,_TIF_SYSCALL_DOTRACE
bne syscall_dotrace
 .Lsyscall_dotrace_cont:
+#ifdef CONFIG_PPC_TRANSACTIONAL_MEM
+BEGIN_FTR_SECTION
+   b   1f
+END_FTR_SECTION_IFCLR(CPU_FTR_TM)
+   extrdi. r11, r12, 1, (63-MSR_TS_T_LG) /* transaction active? */
+   beq+1f
+
+   /* Doom the transaction and don't perform the syscall: */
+   mfmsr   r11
+   li  r12, 1
+   rldimi  r11, r12, MSR_TM_LG, 63-MSR_TM_LG
+   mtmsrd  r11, 0
+   li  r11, (TM_CAUSE_SYSCALL|TM_CAUSE_PERSISTENT)
+   tabort. r11
+
+   b   .Lsyscall_exit
+1:
+#endif
cmpldi  0,r0,NR_

[PATCH 2/3] selftests/powerpc: Move get_auxv_entry() to harness.c

2015-03-18 Thread Sam Bobroff
Move get_auxv_entry() from pmu/lib.c up to harness.c in order to make
it available to other tests.

Signed-off-by: Sam Bobroff 
---
 tools/testing/selftests/powerpc/harness.c |   47 +
 tools/testing/selftests/powerpc/pmu/lib.c |   47 -
 tools/testing/selftests/powerpc/pmu/lib.h |1 -
 tools/testing/selftests/powerpc/utils.h   |2 +-
 4 files changed, 48 insertions(+), 49 deletions(-)

diff --git a/tools/testing/selftests/powerpc/harness.c 
b/tools/testing/selftests/powerpc/harness.c
index 8ebc58a..f7997af 100644
--- a/tools/testing/selftests/powerpc/harness.c
+++ b/tools/testing/selftests/powerpc/harness.c
@@ -11,6 +11,10 @@
 #include 
 #include 
 #include 
+#include 
+#include 
+#include 
+#include 
 
 #include "subunit.h"
 #include "utils.h"
@@ -112,3 +116,46 @@ int test_harness(int (test_function)(void), char *name)
 
return rc;
 }
+
+static char auxv[4096];
+
+void *get_auxv_entry(int type)
+{
+   ElfW(auxv_t) *p;
+   void *result;
+   ssize_t num;
+   int fd;
+
+   fd = open("/proc/self/auxv", O_RDONLY);
+   if (fd == -1) {
+   perror("open");
+   return NULL;
+   }
+
+   result = NULL;
+
+   num = read(fd, auxv, sizeof(auxv));
+   if (num < 0) {
+   perror("read");
+   goto out;
+   }
+
+   if (num > sizeof(auxv)) {
+   printf("Overflowed auxv buffer\n");
+   goto out;
+   }
+
+   p = (ElfW(auxv_t) *)auxv;
+
+   while (p->a_type != AT_NULL) {
+   if (p->a_type == type) {
+   result = (void *)p->a_un.a_val;
+   break;
+   }
+
+   p++;
+   }
+out:
+   close(fd);
+   return result;
+}
diff --git a/tools/testing/selftests/powerpc/pmu/lib.c 
b/tools/testing/selftests/powerpc/pmu/lib.c
index 9768dea..a07104c 100644
--- a/tools/testing/selftests/powerpc/pmu/lib.c
+++ b/tools/testing/selftests/powerpc/pmu/lib.c
@@ -5,15 +5,10 @@
 
 #define _GNU_SOURCE/* For CPU_ZERO etc. */
 
-#include 
 #include 
-#include 
-#include 
 #include 
 #include 
 #include 
-#include 
-#include 
 #include 
 
 #include "utils.h"
@@ -256,45 +251,3 @@ out:
return rc;
 }
 
-static char auxv[4096];
-
-void *get_auxv_entry(int type)
-{
-   ElfW(auxv_t) *p;
-   void *result;
-   ssize_t num;
-   int fd;
-
-   fd = open("/proc/self/auxv", O_RDONLY);
-   if (fd == -1) {
-   perror("open");
-   return NULL;
-   }
-
-   result = NULL;
-
-   num = read(fd, auxv, sizeof(auxv));
-   if (num < 0) {
-   perror("read");
-   goto out;
-   }
-
-   if (num > sizeof(auxv)) {
-   printf("Overflowed auxv buffer\n");
-   goto out;
-   }
-
-   p = (ElfW(auxv_t) *)auxv;
-
-   while (p->a_type != AT_NULL) {
-   if (p->a_type == type) {
-   result = (void *)p->a_un.a_val;
-   break;
-   }
-
-   p++;
-   }
-out:
-   close(fd);
-   return result;
-}
diff --git a/tools/testing/selftests/powerpc/pmu/lib.h 
b/tools/testing/selftests/powerpc/pmu/lib.h
index 0f0339c..ca5d72a 100644
--- a/tools/testing/selftests/powerpc/pmu/lib.h
+++ b/tools/testing/selftests/powerpc/pmu/lib.h
@@ -29,7 +29,6 @@ extern int notify_parent(union pipe write_pipe);
 extern int notify_parent_of_error(union pipe write_pipe);
 extern pid_t eat_cpu(int (test_function)(void));
 extern bool require_paranoia_below(int level);
-extern void *get_auxv_entry(int type);
 
 struct addr_range {
uint64_t first, last;
diff --git a/tools/testing/selftests/powerpc/utils.h 
b/tools/testing/selftests/powerpc/utils.h
index a93777a..64f53cd 100644
--- a/tools/testing/selftests/powerpc/utils.h
+++ b/tools/testing/selftests/powerpc/utils.h
@@ -19,7 +19,7 @@ typedef uint8_t u8;
 
 
 int test_harness(int (test_function)(void), char *name);
-
+extern void *get_auxv_entry(int type);
 
 /* Yes, this is evil */
 #define FAIL_IF(x) \
-- 
1.7.10.4

___
Linuxppc-dev mailing list
Linuxppc-dev@lists.ozlabs.org
https://lists.ozlabs.org/listinfo/linuxppc-dev

Re: [PATCH 3/3] selftests/powerpc: Add transactional syscall test

2015-03-23 Thread Sam Bobroff
On 20/03/15 20:25, Anshuman Khandual wrote:
> On 03/19/2015 10:13 AM, Sam Bobroff wrote:
>> Check that a syscall made during an active transaction will fail with
>> the correct failure code and that one made during a suspended
>> transaction will succeed.
>>
>> Signed-off-by: Sam Bobroff 
> 
> The test works.

Great :-)

>> +
>> +int tm_syscall(void)
>> +{
>> +SKIP_IF(!((long)get_auxv_entry(AT_HWCAP2) & PPC_FEATURE2_HTM));
>> +setbuf(stdout, 0);
>> +FAIL_IF(!t_active_getppid_test());
>> +printf("%d active transactions correctly aborted.\n", TM_TEST_RUNS);
>> +FAIL_IF(!t_suspended_getppid_test());
>> +printf("%d suspended transactions succeeded.\n", TM_TEST_RUNS);
>> +return 0;
>> +}
>> +
>> +int main(void)
>> +{
>> +return test_harness(tm_syscall, "tm_syscall");
>> +}
>> +
> 
> There is an extra blank line at the end of this file. Interchanging return
> codes of 0 and 1 for various functions make it very confusing along with
> negative FAIL_IF checks in the primary test function. Control flow structures
> like these can use some in-code documentation for readability.
> 
> + for (i = 0; i < TM_RETRIES; i++) {
> + if (__builtin_tbegin(0)) {
> + getppid();
> + __builtin_tend(0);
> + return 1;
> + }
> + if (t_failure_persistent())
> + return 0;
> 
> or
> 
> + if (__builtin_tbegin(0)) {
> + __builtin_tsuspend();
> + getppid();
> + __builtin_tresume();
> + __builtin_tend(0);
> + return 1;
> + }
> + if (t_failure_persistent())
> + return 0;
> 

Good points. I'll remove the blank line and comment the code.

I'm not sure I can do any better with the FAIL_IF() macro: I wanted it
to read "fail if the test failed", but I can see what you mean about a
double negative. Maybe it would be better to introduce a different
macro, more like a standard assert: TEST(XXX) which fails if XXX is
false. However, I think "TEST" would be too generic a name and I'm not
should what would be better. Any comments/suggestions?

Thanks for the review!

Cheers,
Sam.

___
Linuxppc-dev mailing list
Linuxppc-dev@lists.ozlabs.org
https://lists.ozlabs.org/listinfo/linuxppc-dev

Re: [PATCH 3/3] selftests/powerpc: Add transactional syscall test

2015-03-29 Thread Sam Bobroff
On 24/03/15 13:02, Michael Ellerman wrote:
> On Tue, 2015-03-24 at 12:52 +1100, Sam Bobroff wrote:
>> On 20/03/15 20:25, Anshuman Khandual wrote:
>>> On 03/19/2015 10:13 AM, Sam Bobroff wrote:
>>>> Check that a syscall made during an active transaction will fail with
>>>> the correct failure code and that one made during a suspended
>>>> transaction will succeed.
>>>>
>>>> Signed-off-by: Sam Bobroff 
>>>
>>> The test works.
>>
>> Great :-)
>>
>>>> +
>>>> +int tm_syscall(void)
>>>> +{
>>>> +  SKIP_IF(!((long)get_auxv_entry(AT_HWCAP2) & PPC_FEATURE2_HTM));
>>>> +  setbuf(stdout, 0);
>>>> +  FAIL_IF(!t_active_getppid_test());
>>>> +  printf("%d active transactions correctly aborted.\n", TM_TEST_RUNS);
>>>> +  FAIL_IF(!t_suspended_getppid_test());
>>>> +  printf("%d suspended transactions succeeded.\n", TM_TEST_RUNS);
>>>> +  return 0;
>>>> +}
>>>> +
>>>> +int main(void)
>>>> +{
>>>> +  return test_harness(tm_syscall, "tm_syscall");
>>>> +}
>>>> +
>>>
>>> There is an extra blank line at the end of this file. Interchanging return
>>> codes of 0 and 1 for various functions make it very confusing along with
>>> negative FAIL_IF checks in the primary test function. Control flow 
>>> structures
>>> like these can use some in-code documentation for readability.
>>>
>>> +   for (i = 0; i < TM_RETRIES; i++) {
>>> +   if (__builtin_tbegin(0)) {
>>> +   getppid();
>>> +   __builtin_tend(0);
>>> +   return 1;
>>> +   }
>>> +   if (t_failure_persistent())
>>> +   return 0;
>>>
>>> or
>>>
>>> +   if (__builtin_tbegin(0)) {
>>> +   __builtin_tsuspend();
>>> +   getppid();
>>> +   __builtin_tresume();
>>> +   __builtin_tend(0);
>>> +   return 1;
>>> +   }
>>> +   if (t_failure_persistent())
>>> +   return 0;
>>>
>>
>> Good points. I'll remove the blank line and comment the code.
>>
>> I'm not sure I can do any better with the FAIL_IF() macro: I wanted it
>> to read "fail if the test failed", but I can see what you mean about a
>> double negative. Maybe it would be better to introduce a different
>> macro, more like a standard assert: TEST(XXX) which fails if XXX is
>> false. However, I think "TEST" would be too generic a name and I'm not
>> should what would be better. Any comments/suggestions?
> 
> FAIL_IF() is designed for things that return 0 for OK and !0 for failure. Like
> most things in C.
> 
> So I think it would be improved if you inverted your return codes in your test
> routines.
> 
> Even better to return ESOMETHING in the error cases, and zero otherwise.
> 
> cheers

Fair enough. I think the *_test() functions I added for "clarity" were
just making it more confusing, so I've dropped them.

Moving the code around, even a little, has also exposed the fact that
transactions are very sensitive to how the code is compiled so I'm going
to move the transaction blocks out into a separate assembly file where I
can control exactly what instructions get used. This will also mean that
it's no longer dependent on using linker magic (or some other trick) to
avoid lazy symbol loading.

I'll repost the series.

Thanks for the review!

Cheers,
Sam.

___
Linuxppc-dev mailing list
Linuxppc-dev@lists.ozlabs.org
https://lists.ozlabs.org/listinfo/linuxppc-dev

[PATCH v2 3/4] selftests/powerpc: Add transactional syscall test

2015-03-29 Thread Sam Bobroff
Check that a syscall made during an active transaction will fail with
the correct failure code and that one made during a suspended
transaction will succeed.

Signed-off-by: Sam Bobroff 
---
v2:

Further testing has shown that the success or failure of the transactions was
affected by minor changes to the code, compiler optimisation and linker
settings.

To address this, I've moved the transactional part of the test to a separate
function, written in assembly. I've also extended the test to as many
transactions as it can fit into ten seconds, to better test for failures that
occur more rarely. This has stabilised the results, and it's no longer
necessary to use special compiler or linker flags.

 tools/testing/selftests/powerpc/tm/.gitignore  |1 +
 tools/testing/selftests/powerpc/tm/Makefile|4 +-
 .../testing/selftests/powerpc/tm/tm-syscall-asm.S  |   27 +
 .../testing/selftests/powerpc/tm/tm-syscall-asm.h  |2 +
 tools/testing/selftests/powerpc/tm/tm-syscall.c|  109 
 5 files changed, 142 insertions(+), 1 deletion(-)
 create mode 100644 tools/testing/selftests/powerpc/tm/tm-syscall-asm.S
 create mode 100644 tools/testing/selftests/powerpc/tm/tm-syscall-asm.h
 create mode 100644 tools/testing/selftests/powerpc/tm/tm-syscall.c

diff --git a/tools/testing/selftests/powerpc/tm/.gitignore 
b/tools/testing/selftests/powerpc/tm/.gitignore
index 33d02cc..2699635d 100644
--- a/tools/testing/selftests/powerpc/tm/.gitignore
+++ b/tools/testing/selftests/powerpc/tm/.gitignore
@@ -1 +1,2 @@
 tm-resched-dscr
+tm-syscall
diff --git a/tools/testing/selftests/powerpc/tm/Makefile 
b/tools/testing/selftests/powerpc/tm/Makefile
index 2cede23..93bbff3 100644
--- a/tools/testing/selftests/powerpc/tm/Makefile
+++ b/tools/testing/selftests/powerpc/tm/Makefile
@@ -1,8 +1,10 @@
-PROGS := tm-resched-dscr
+PROGS := tm-resched-dscr tm-syscall
+CFLAGS:=$(CFLAGS) -mhtm
 
 all: $(PROGS)
 
 $(PROGS): ../harness.c
+tm-syscall: tm-syscall-asm.S
 
 run_tests: all
@-for PROG in $(PROGS); do \
diff --git a/tools/testing/selftests/powerpc/tm/tm-syscall-asm.S 
b/tools/testing/selftests/powerpc/tm/tm-syscall-asm.S
new file mode 100644
index 000..2b2daa7
--- /dev/null
+++ b/tools/testing/selftests/powerpc/tm/tm-syscall-asm.S
@@ -0,0 +1,27 @@
+#include 
+#include 
+
+   .text
+FUNC_START(getppid_tm_active_impl)
+   tbegin.
+   beq 1f
+   li  r0, __NR_getppid
+   sc
+   tend.
+   blr
+1:
+   li  r3, -1
+   blr
+
+FUNC_START(getppid_tm_suspended_impl)
+   tbegin.
+   beq 1f
+   li  r0, __NR_getppid
+   tsuspend.
+   sc
+   tresume.
+   tend.
+   blr
+1:
+   li  r3, -1
+   blr
diff --git a/tools/testing/selftests/powerpc/tm/tm-syscall-asm.h 
b/tools/testing/selftests/powerpc/tm/tm-syscall-asm.h
new file mode 100644
index 000..6136328
--- /dev/null
+++ b/tools/testing/selftests/powerpc/tm/tm-syscall-asm.h
@@ -0,0 +1,2 @@
+extern int getppid_tm_active_impl(void);
+extern int getppid_tm_suspended_impl(void);
diff --git a/tools/testing/selftests/powerpc/tm/tm-syscall.c 
b/tools/testing/selftests/powerpc/tm/tm-syscall.c
new file mode 100644
index 000..ff3b15c
--- /dev/null
+++ b/tools/testing/selftests/powerpc/tm/tm-syscall.c
@@ -0,0 +1,109 @@
+/* Test the kernel's system call code to ensure that a system call
+ * made from within an active HTM transaction is aborted with the
+ * correct failure code.
+ * Conversely, ensure that a system call made from within a
+ * suspended transaction can succeed.
+ */
+
+#include 
+#include 
+#include 
+#include 
+#include 
+#include 
+#include 
+#include 
+
+#include "utils.h"
+#include "tm-syscall-asm.h"
+
+unsigned retries = 0;
+
+#define TEST_DURATION 10 /* seconds */
+#define TM_RETRIES 100
+
+long failure_code(void)
+{
+   return __builtin_get_texasr() >> 56;
+}
+
+bool failure_is_persistent(void)
+{
+   return (failure_code() & TM_CAUSE_PERSISTENT) == TM_CAUSE_PERSISTENT;
+}
+
+bool failure_is_syscall(void)
+{
+   return (failure_code() & TM_CAUSE_SYSCALL) == TM_CAUSE_SYSCALL;
+}
+
+pid_t getppid_tm(bool suspend)
+{
+   int i;
+   pid_t pid;
+
+   for (i = 0; i < TM_RETRIES; i++) {
+   if (suspend)
+   pid = getppid_tm_suspended_impl();
+   else
+   pid = getppid_tm_active_impl();
+   if (pid >= 0)
+   return pid;
+   if (failure_is_persistent()) {
+   if (failure_is_syscall())
+   return -1;
+   printf("Unexpected persistent transaction failure.\n");
+   printf("TEXASR 0x%016lx, TFIAR 0x%016lx.\n",
+  __builtin_get_texasr(), __builtin_get_tfiar());
+   exit(-1);
+   }
+   retries++;

[PATCH v2 1/4] powerpc/tm: Abort syscalls in active transactions

2015-03-29 Thread Sam Bobroff
This patch changes the syscall handler to doom (tabort) active
transactions when a syscall is made and return immediately without
performing the syscall.

Currently, the system call instruction automatically suspends an
active transaction which causes side effects to persist when an active
transaction fails.

This does change the kernel's behaviour, but in a way that was
documented as unsupported. It doesn't reduce functionality because
syscalls will still be performed after tsuspend. It also provides a
consistent interface and makes the behaviour of user code
substantially the same across powerpc and platforms that do not
support suspended transactions (e.g. x86 and s390).

Performance measurements using
http://ozlabs.org/~anton/junkcode/null_syscall.c
indicate the cost of a system call increases by about 0.5%.

Signed-off-by: Sam Bobroff 
---
v2:

Also update the failure code table.

 Documentation/powerpc/transactional_memory.txt |   32 
 arch/powerpc/include/uapi/asm/tm.h |2 +-
 arch/powerpc/kernel/entry_64.S |   19 ++
 3 files changed, 36 insertions(+), 17 deletions(-)

diff --git a/Documentation/powerpc/transactional_memory.txt 
b/Documentation/powerpc/transactional_memory.txt
index 9791e98..98b39af 100644
--- a/Documentation/powerpc/transactional_memory.txt
+++ b/Documentation/powerpc/transactional_memory.txt
@@ -74,22 +74,23 @@ Causes of transaction aborts
 Syscalls
 
 
-Performing syscalls from within transaction is not recommended, and can lead
-to unpredictable results.
+Syscalls made from within an active transaction will not be performed and the
+transaction will be doomed by the kernel with the failure code TM_CAUSE_SYSCALL
+| TM_CAUSE_PERSISTENT.
 
-Syscalls do not by design abort transactions, but beware: The kernel code will
-not be running in transactional state.  The effect of syscalls will always
-remain visible, but depending on the call they may abort your transaction as a
-side-effect, read soon-to-be-aborted transactional data that should not remain
-invisible, etc.  If you constantly retry a transaction that constantly aborts
-itself by calling a syscall, you'll have a livelock & make no progress.
+Syscalls made from within a suspended transaction are performed as normal and
+the transaction is not explicitly doomed by the kernel.  However, what the
+kernel does to perform the syscall may result in the transaction being doomed
+by the hardware.  The syscall is performed in suspended mode so any side
+effects will be persistent, independent of transaction success or failure.  No
+guarantees are provided by the kernel about which syscalls will affect
+transaction success.
 
-Simple syscalls (e.g. sigprocmask()) "could" be OK.  Even things like write()
-from, say, printf() should be OK as long as the kernel does not access any
-memory that was accessed transactionally.
-
-Consider any syscalls that happen to work as debug-only -- not recommended for
-production use.  Best to queue them up till after the transaction is over.
+Care must be taken when relying on syscalls to abort during active transactions
+if the calls are made via a library.  Libraries may cache values (which may
+give the appearance of success) or perform operations that cause transaction
+failure before entering the kernel (which may produce different failure codes).
+Examples are glibc's getpid() and lazy symbol resolution.
 
 
 Signals
@@ -176,8 +177,7 @@ kernel aborted a transaction:
  TM_CAUSE_RESCHED   Thread was rescheduled.
  TM_CAUSE_TLBI  Software TLB invalide.
  TM_CAUSE_FAC_UNAV  FP/VEC/VSX unavailable trap.
- TM_CAUSE_SYSCALL   Currently unused; future syscalls that must abort
-transactions for consistency will use this.
+ TM_CAUSE_SYSCALL   Syscall from active transaction.
  TM_CAUSE_SIGNALSignal delivered.
  TM_CAUSE_MISC  Currently unused.
  TM_CAUSE_ALIGNMENT Alignment fault.
diff --git a/arch/powerpc/include/uapi/asm/tm.h 
b/arch/powerpc/include/uapi/asm/tm.h
index 5d836b7..5047659 100644
--- a/arch/powerpc/include/uapi/asm/tm.h
+++ b/arch/powerpc/include/uapi/asm/tm.h
@@ -11,7 +11,7 @@
 #define TM_CAUSE_RESCHED   0xde
 #define TM_CAUSE_TLBI  0xdc
 #define TM_CAUSE_FAC_UNAV  0xda
-#define TM_CAUSE_SYSCALL   0xd8  /* future use */
+#define TM_CAUSE_SYSCALL   0xd8
 #define TM_CAUSE_MISC  0xd6  /* future use */
 #define TM_CAUSE_SIGNAL0xd4
 #define TM_CAUSE_ALIGNMENT 0xd2
diff --git a/arch/powerpc/kernel/entry_64.S b/arch/powerpc/kernel/entry_64.S
index d180caf2..85bf81d 100644
--- a/arch/powerpc/kernel/entry_64.S
+++ b/arch/powerpc/kernel/entry_64.S
@@ -34,6 +34,7 @@
 #include 
 #include 
 #include 
+#include 
 
 /*
  * System calls.
@@ -145,6 +146,24 @@ END_FW_FTR_SECTION_IFSET(FW_FEATURE_SPLPAR)
andi.   r11,r10,_TIF_SYSCALL_DOTRACE
bne syscall_dotrac

[PATCH v2 2/4] selftests/powerpc: Move get_auxv_entry() to harness.c

2015-03-29 Thread Sam Bobroff
Move get_auxv_entry() from pmu/lib.c up to harness.c in order to make
it available to other tests.

Signed-off-by: Sam Bobroff 
---
 tools/testing/selftests/powerpc/harness.c |   47 +
 tools/testing/selftests/powerpc/pmu/lib.c |   47 -
 tools/testing/selftests/powerpc/pmu/lib.h |1 -
 tools/testing/selftests/powerpc/utils.h   |2 +-
 4 files changed, 48 insertions(+), 49 deletions(-)

diff --git a/tools/testing/selftests/powerpc/harness.c 
b/tools/testing/selftests/powerpc/harness.c
index 8ebc58a..f7997af 100644
--- a/tools/testing/selftests/powerpc/harness.c
+++ b/tools/testing/selftests/powerpc/harness.c
@@ -11,6 +11,10 @@
 #include 
 #include 
 #include 
+#include 
+#include 
+#include 
+#include 
 
 #include "subunit.h"
 #include "utils.h"
@@ -112,3 +116,46 @@ int test_harness(int (test_function)(void), char *name)
 
return rc;
 }
+
+static char auxv[4096];
+
+void *get_auxv_entry(int type)
+{
+   ElfW(auxv_t) *p;
+   void *result;
+   ssize_t num;
+   int fd;
+
+   fd = open("/proc/self/auxv", O_RDONLY);
+   if (fd == -1) {
+   perror("open");
+   return NULL;
+   }
+
+   result = NULL;
+
+   num = read(fd, auxv, sizeof(auxv));
+   if (num < 0) {
+   perror("read");
+   goto out;
+   }
+
+   if (num > sizeof(auxv)) {
+   printf("Overflowed auxv buffer\n");
+   goto out;
+   }
+
+   p = (ElfW(auxv_t) *)auxv;
+
+   while (p->a_type != AT_NULL) {
+   if (p->a_type == type) {
+   result = (void *)p->a_un.a_val;
+   break;
+   }
+
+   p++;
+   }
+out:
+   close(fd);
+   return result;
+}
diff --git a/tools/testing/selftests/powerpc/pmu/lib.c 
b/tools/testing/selftests/powerpc/pmu/lib.c
index 9768dea..a07104c 100644
--- a/tools/testing/selftests/powerpc/pmu/lib.c
+++ b/tools/testing/selftests/powerpc/pmu/lib.c
@@ -5,15 +5,10 @@
 
 #define _GNU_SOURCE/* For CPU_ZERO etc. */
 
-#include 
 #include 
-#include 
-#include 
 #include 
 #include 
 #include 
-#include 
-#include 
 #include 
 
 #include "utils.h"
@@ -256,45 +251,3 @@ out:
return rc;
 }
 
-static char auxv[4096];
-
-void *get_auxv_entry(int type)
-{
-   ElfW(auxv_t) *p;
-   void *result;
-   ssize_t num;
-   int fd;
-
-   fd = open("/proc/self/auxv", O_RDONLY);
-   if (fd == -1) {
-   perror("open");
-   return NULL;
-   }
-
-   result = NULL;
-
-   num = read(fd, auxv, sizeof(auxv));
-   if (num < 0) {
-   perror("read");
-   goto out;
-   }
-
-   if (num > sizeof(auxv)) {
-   printf("Overflowed auxv buffer\n");
-   goto out;
-   }
-
-   p = (ElfW(auxv_t) *)auxv;
-
-   while (p->a_type != AT_NULL) {
-   if (p->a_type == type) {
-   result = (void *)p->a_un.a_val;
-   break;
-   }
-
-   p++;
-   }
-out:
-   close(fd);
-   return result;
-}
diff --git a/tools/testing/selftests/powerpc/pmu/lib.h 
b/tools/testing/selftests/powerpc/pmu/lib.h
index 0f0339c..ca5d72a 100644
--- a/tools/testing/selftests/powerpc/pmu/lib.h
+++ b/tools/testing/selftests/powerpc/pmu/lib.h
@@ -29,7 +29,6 @@ extern int notify_parent(union pipe write_pipe);
 extern int notify_parent_of_error(union pipe write_pipe);
 extern pid_t eat_cpu(int (test_function)(void));
 extern bool require_paranoia_below(int level);
-extern void *get_auxv_entry(int type);
 
 struct addr_range {
uint64_t first, last;
diff --git a/tools/testing/selftests/powerpc/utils.h 
b/tools/testing/selftests/powerpc/utils.h
index a93777a..64f53cd 100644
--- a/tools/testing/selftests/powerpc/utils.h
+++ b/tools/testing/selftests/powerpc/utils.h
@@ -19,7 +19,7 @@ typedef uint8_t u8;
 
 
 int test_harness(int (test_function)(void), char *name);
-
+extern void *get_auxv_entry(int type);
 
 /* Yes, this is evil */
 #define FAIL_IF(x) \
-- 
1.7.10.4

___
Linuxppc-dev mailing list
Linuxppc-dev@lists.ozlabs.org
https://lists.ozlabs.org/listinfo/linuxppc-dev

[PATCH v2 4/4] powerpc/tm: Correct minor documentation typos

2015-03-29 Thread Sam Bobroff
Signed-off-by: Sam Bobroff 
---
v2:

Discovered some typos while updating the documentation.

 Documentation/powerpc/transactional_memory.txt |4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/Documentation/powerpc/transactional_memory.txt 
b/Documentation/powerpc/transactional_memory.txt
index 98b39af..ba0a2a4 100644
--- a/Documentation/powerpc/transactional_memory.txt
+++ b/Documentation/powerpc/transactional_memory.txt
@@ -175,7 +175,7 @@ These are defined in , and distinguish different 
reasons why the
 kernel aborted a transaction:
 
  TM_CAUSE_RESCHED   Thread was rescheduled.
- TM_CAUSE_TLBI  Software TLB invalide.
+ TM_CAUSE_TLBI  Software TLB invalid.
  TM_CAUSE_FAC_UNAV  FP/VEC/VSX unavailable trap.
  TM_CAUSE_SYSCALL   Syscall from active transaction.
  TM_CAUSE_SIGNALSignal delivered.
@@ -185,7 +185,7 @@ kernel aborted a transaction:
 
 These can be checked by the user program's abort handler as TEXASR[0:7].  If
 bit 7 is set, it indicates that the error is consider persistent.  For example
-a TM_CAUSE_ALIGNMENT will be persistent while a TM_CAUSE_RESCHED will not.q
+a TM_CAUSE_ALIGNMENT will be persistent while a TM_CAUSE_RESCHED will not.
 
 GDB
 ===
-- 
1.7.10.4

___
Linuxppc-dev mailing list
Linuxppc-dev@lists.ozlabs.org
https://lists.ozlabs.org/listinfo/linuxppc-dev

[PATCH v2 0/4] powerpc/tm: Abort syscalls in active transactions

2015-03-29 Thread Sam Bobroff

See the first patch for a description of the reasoning behind this
change.

This set includes the change, a kernel selftest for it and
some slight refactoring of the selftest code.


v2:
Patch 1/4: powerpc/tm: Abort syscalls in active transactions

Also update the failure code table.

Patch 3/4: selftests/powerpc: Add transactional syscall test

Further testing has shown that the success or failure of the transactions was
affected by minor changes to the code, compiler optimisation and linker
settings.

To address this, I've moved the transactional part of the test to a separate
function, written in assembly. I've also extended the test to as many
transactions as it can fit into ten seconds, to better test for failures that
occur more rarely. This has stabilised the results, and it's no longer
necessary to use special compiler or linker flags.

Patch 4/4: powerpc/tm: Correct minor documentation typos

Discovered some typos while updating the documentation.


Sam Bobroff (4):
  powerpc/tm: Abort syscalls in active transactions
  selftests/powerpc: Move get_auxv_entry() to harness.c
  selftests/powerpc: Add transactional syscall test
  powerpc/tm: Correct minor documentation typos

 Documentation/powerpc/transactional_memory.txt |   36 +++
 arch/powerpc/include/uapi/asm/tm.h |2 +-
 arch/powerpc/kernel/entry_64.S |   19 
 tools/testing/selftests/powerpc/harness.c  |   47 +
 tools/testing/selftests/powerpc/pmu/lib.c  |   47 -
 tools/testing/selftests/powerpc/pmu/lib.h  |1 -
 tools/testing/selftests/powerpc/tm/.gitignore  |1 +
 tools/testing/selftests/powerpc/tm/Makefile|4 +-
 .../testing/selftests/powerpc/tm/tm-syscall-asm.S  |   27 +
 .../testing/selftests/powerpc/tm/tm-syscall-asm.h  |2 +
 tools/testing/selftests/powerpc/tm/tm-syscall.c|  109 
 tools/testing/selftests/powerpc/utils.h|2 +-
 12 files changed, 228 insertions(+), 69 deletions(-)
 create mode 100644 tools/testing/selftests/powerpc/tm/tm-syscall-asm.S
 create mode 100644 tools/testing/selftests/powerpc/tm/tm-syscall-asm.h
 create mode 100644 tools/testing/selftests/powerpc/tm/tm-syscall.c

-- 
1.7.10.4

___
Linuxppc-dev mailing list
Linuxppc-dev@lists.ozlabs.org
https://lists.ozlabs.org/listinfo/linuxppc-dev

[PATCH 1/1] powerpc: Detect broken or mismatched toolchains

2016-02-21 Thread Sam Bobroff
It can currently be difficult to diagnose a build that fails due to
the compiler, linker or other parts of the toolchain being unable to
build binaries of the type required by the kernel config. For example
using a little endian toolchain to build a big endian kernel may
produce:

as: unrecognized option '-maltivec'

This patch adds a basic compile test and error message to
arch/powerpc/Makefile so that the above error becomes:

*** Sorry, your toolchain seems to be broken or incorrect. ***
Make sure it supports your kernel configuration (ppc64).

Signed-off-by: Sam Bobroff 
---

 arch/powerpc/Makefile | 7 +++
 1 file changed, 7 insertions(+)

diff --git a/arch/powerpc/Makefile b/arch/powerpc/Makefile
index 96efd82..0041cd2 100644
--- a/arch/powerpc/Makefile
+++ b/arch/powerpc/Makefile
@@ -355,6 +355,13 @@ TOUT   := .tmp_gas_check
 # - Require gcc 4.0 or above on 64-bit
 # - gcc-4.2.0 has issues compiling modules on 64-bit
 checkbin:
+   @if test "$(call try-run,echo 'int _start(void) { return 0; }' > 
\"$$TMP\"; \
+   $(CC) $(KBUILD_CPPFLAGS) $(KBUILD_CFLAGS) -x c -nostdlib \"$$TMP\" \
+   -o /dev/null,ok,broken)" = "broken" ; then \
+   echo "*** Sorry, your toolchain seems to be broken or 
incorrect. ***" ; \
+   echo "Make sure it supports your kernel configuration 
($(UTS_MACHINE))." ; \
+   false; \
+   fi
@if test "$(cc-name)" != "clang" \
&& test "$(cc-version)" = "0304" ; then \
if ! /bin/echo mftb 5 | $(AS) -v -mppc -many -o $(TOUT) 
>/dev/null 2>&1 ; then \
-- 
2.1.0

___
Linuxppc-dev mailing list
Linuxppc-dev@lists.ozlabs.org
https://lists.ozlabs.org/listinfo/linuxppc-dev

Re: [PATCH 1/1] powerpc: Detect broken or mismatched toolchains

2016-02-22 Thread Sam Bobroff
On Mon, Feb 22, 2016 at 08:05:01PM -0600, Scott Wood wrote:
> On Mon, 2016-02-22 at 16:13 +1100, Sam Bobroff wrote:
> > It can currently be difficult to diagnose a build that fails due to
> > the compiler, linker or other parts of the toolchain being unable to
> > build binaries of the type required by the kernel config. For example
> > using a little endian toolchain to build a big endian kernel may
> > produce:
> > 
> > as: unrecognized option '-maltivec'
> > 
> > This patch adds a basic compile test and error message to
> > arch/powerpc/Makefile so that the above error becomes:
> > 
> > *** Sorry, your toolchain seems to be broken or incorrect. ***
> > Make sure it supports your kernel configuration (ppc64).
> > 
> > Signed-off-by: Sam Bobroff 
> > ---
> 
> How is this more useful than getting to actually see the way in which the
> toolchain (or the CFLAGS) is broken?

My reasoning was that it would be better because it happens at the start of the
build, rather than (possibly) a long way into it, and it indicates that the
problem is the toolchain setup (or config) itself rather than the file it's
trying to compile or link.

But I agree completely with what you're saying. I'll try re-working it in a way
that shows the command that fails and it's output.

Cheers,
Sam.

___
Linuxppc-dev mailing list
Linuxppc-dev@lists.ozlabs.org
https://lists.ozlabs.org/listinfo/linuxppc-dev

[PATCH v2 1/2] powerpc/xmon: Paged output for paca display

2015-08-20 Thread Sam Bobroff
The paca display is already more than 24 lines, which can be problematic
if you have an old school 80x24 terminal, or more likely you are on a
virtual terminal which does not scroll for whatever reason.

This patch adds a new command ".", which takes a single (hex) numeric
argument: lines per page. It will cause the output of "dp" and "dpa"
to be broken into pages, if necessary.

This is implemented by running over the entire output both for the
initial command and for each subsequent page: the visible part is
clipped out by checking line numbers. This is a simplistic approach
but minimally invasive; it is intended to be easily reusable for other
commands.

Sample output:

0:mon> .10
0:mon> dp1
paca for cpu 0x1 @ cfdc0480:
 possible = yes
 present  = yes
 online   = yes
 lock_token   = 0x8000  (0x8)
 paca_index   = 0x1 (0xa)
 kernel_toc   = 0xc0eb2400  (0x10)
 kernelbase   = 0xc000  (0x18)
 kernel_msr   = 0xb0001032  (0x20)
 emergency_sp = 0xc0003ffe8000  (0x28)
 mc_emergency_sp  = 0xc0003ffe4000  (0x2e0)
 in_mce   = 0x0 (0x2e8)
 data_offset  = 0x7f17  (0x30)
 hw_cpu_id= 0x8 (0x38)
 cpu_start= 0x1 (0x3a)
 kexec_state  = 0x0 (0x3b)
[Enter for next page]
0:mon>
 __current= 0xc0007e696620  (0x290)
 kstack   = 0xc0007e6ebe30  (0x298)
 stab_rr  = 0xb (0x2a0)
 saved_r1 = 0xc0007ef37860  (0x2a8)
 trap_save= 0x0 (0x2b8)
 soft_enabled = 0x0 (0x2ba)
 irq_happened = 0x1 (0x2bb)
 io_sync  = 0x0 (0x2bc)
 irq_work_pending = 0x0 (0x2bd)
 nap_state_lost   = 0x0 (0x2be)
0:mon>

(Based on a similar patch by Michael Ellerman 
"[v2] powerpc/xmon: Allow limiting the size of the paca display".
This patch is an alternative and cannot coexist with the original.)

Signed-off-by: Sam Bobroff 
---
 arch/powerpc/xmon/xmon.c | 86 +++-
 1 file changed, 71 insertions(+), 15 deletions(-)

diff --git a/arch/powerpc/xmon/xmon.c b/arch/powerpc/xmon/xmon.c
index e599259..9ce9e7d 100644
--- a/arch/powerpc/xmon/xmon.c
+++ b/arch/powerpc/xmon/xmon.c
@@ -72,6 +72,12 @@ static int xmon_gate;
 
 static unsigned long in_xmon __read_mostly = 0;
 
+#define XMON_PRINTF(...) do { if (paged_vis()) printf(__VA_ARGS__); } while (0)
+#define MAX_PAGED_SIZE 1024
+static unsigned long paged_size = 0, paged_pos, paged_cur_page;
+#ifdef CONFIG_PPC64
+static unsigned long paca_cpu;
+#endif
 static unsigned long adrs;
 static int size = 1;
 #define MAX_DUMP (128 * 1024)
@@ -242,6 +248,9 @@ Commands:\n\
 "  u   dump TLB\n"
 #endif
 "  ?   help\n"
+#ifdef CONFIG_PPC64
+"  .#  limit output to # lines per page (dump paca only)\n"
+#endif
 "  zr  reboot\n\
   zh   halt\n"
 ;
@@ -833,6 +842,19 @@ static void remove_cpu_bpts(void)
write_ciabr(0);
 }
 
+static void paged_set_size(void)
+{
+   if (!scanhex(&paged_size) || (paged_size > MAX_PAGED_SIZE)) {
+   printf("Invalid number of lines per page (max: %d).\n",
+  MAX_PAGED_SIZE);
+   paged_size = 0;
+   }
+}
+static void paged_reset(void)
+{
+   paged_cur_page = 0;
+}
+
 /* Command interpreting routine */
 static char *last_cmd;
 
@@ -863,7 +885,8 @@ cmds(struct pt_regs *excp)
take_input(last_cmd);
last_cmd = NULL;
cmd = inchar();
-   }
+   } else
+   paged_reset();
switch (cmd) {
case 'm':
cmd = inchar();
@@ -924,6 +947,9 @@ cmds(struct pt_regs *excp)
case '?':
xmon_puts(help_string);
break;
+   case '.':
+   paged_set_size();
+   break;
case 'b':
bpt_cmds();
break;
@@ -2069,6 +2095,31 @@ static void xmon_rawdump (unsigned long adrs, long ndump)
printf("\n");
 }
 
+static void paged_start(void)
+{
+   paged_pos = 0;
+}
+
+static void paged_end(char *next_cmd)
+{
+   unsigned long next_page_start = ++paged_cur_page * paged_size;
+
+   if (paged_size && (paged_pos > next_page_start)) {
+   last_cmd = next_cmd;
+   printf("[Enter for next page]\n");
+   }
+}
+
+static bool paged_vis(void)
+{
+   bool rv = (!paged_size
+   || ((paged_pos >= (paged_size * paged_cur_page))
+   && (paged_pos < (paged_size * (paged_cu

[PATCH v2 0/2] powerpc/xmon: Paged output for paca display

2015-08-20 Thread Sam Bobroff



Changes v1 -> v2:

* Removed pagination parameters from commands, replaced with new command to set
  page size. This works better for multiple commands and produces simpler code.
* Switched from encoding the page position in the command buffer to using some
  globals. Saves some memory and is less invasive to the command code.
* Added a patch to paginate the kernel log buffer display.


Sam Bobroff (2):
  powerpc/xmon: Paged output for paca display
  powerpc/xmon: Paginate kernel log buffer display

 arch/powerpc/xmon/xmon.c | 89 +++-
 1 file changed, 73 insertions(+), 16 deletions(-)

-- 
2.1.4

___
Linuxppc-dev mailing list
Linuxppc-dev@lists.ozlabs.org
https://lists.ozlabs.org/listinfo/linuxppc-dev

[PATCH v2 2/2] powerpc/xmon: Paginate kernel log buffer display

2015-08-20 Thread Sam Bobroff
The kernel log buffer is often much longer than the size of a terminal
so paginate it's output.

Signed-off-by: Sam Bobroff 
---
 arch/powerpc/xmon/xmon.c | 9 +
 1 file changed, 5 insertions(+), 4 deletions(-)

diff --git a/arch/powerpc/xmon/xmon.c b/arch/powerpc/xmon/xmon.c
index 9ce9e7d..fdd765e 100644
--- a/arch/powerpc/xmon/xmon.c
+++ b/arch/powerpc/xmon/xmon.c
@@ -248,9 +248,7 @@ Commands:\n\
 "  u   dump TLB\n"
 #endif
 "  ?   help\n"
-#ifdef CONFIG_PPC64
-"  .#  limit output to # lines per page (dump paca only)\n"
-#endif
+"  .#  limit output to # lines per page (for dp#, dpa, dl)\n"
 "  zr  reboot\n\
   zh   halt\n"
 ;
@@ -850,6 +848,7 @@ static void paged_set_size(void)
paged_size = 0;
}
 }
+
 static void paged_reset(void)
 {
paged_cur_page = 0;
@@ -2372,10 +2371,12 @@ dump_log_buf(void)
sync();
 
kmsg_dump_rewind_nolock(&dumper);
+   paged_start();
while (kmsg_dump_get_line_nolock(&dumper, false, buf, sizeof(buf), 
&len)) {
buf[len] = '\0';
-   printf("%s", buf);
+   XMON_PRINTF("%s", buf);
}
+   paged_end("dl\n");
 
sync();
/* wait a little while to see if we get a machine check */
-- 
2.1.4

___
Linuxppc-dev mailing list
Linuxppc-dev@lists.ozlabs.org
https://lists.ozlabs.org/listinfo/linuxppc-dev

[PATCH 1/1] powerpc: Individual System V IPC system calls

2015-09-23 Thread Sam Bobroff
This patch provides individual system call numbers for the following
System V IPC system calls, on PowerPC, so that they do not need to be
multiplexed:
* semop, semget, semctl, semtimedop
* msgsnd, msgrcv, msgget, msgctl
* shmat, shmdt, shmget, shmctl

Signed-off-by: Sam Bobroff 
---

 arch/powerpc/include/asm/systbl.h  | 12 
 arch/powerpc/include/asm/unistd.h  |  2 +-
 arch/powerpc/include/uapi/asm/unistd.h | 12 
 3 files changed, 25 insertions(+), 1 deletion(-)

diff --git a/arch/powerpc/include/asm/systbl.h 
b/arch/powerpc/include/asm/systbl.h
index 71f2b3f..546b9ec 100644
--- a/arch/powerpc/include/asm/systbl.h
+++ b/arch/powerpc/include/asm/systbl.h
@@ -368,3 +368,15 @@ SYSCALL_SPU(memfd_create)
 SYSCALL_SPU(bpf)
 COMPAT_SYS(execveat)
 PPC64ONLY(switch_endian)
+SYSCALL(semop)
+SYSCALL(semget)
+COMPAT_SYS(semctl)
+COMPAT_SYS(semtimedop)
+COMPAT_SYS(msgsnd)
+COMPAT_SYS(msgrcv)
+SYSCALL(msgget)
+COMPAT_SYS(msgctl)
+COMPAT_SYS(shmat)
+SYSCALL(shmdt)
+SYSCALL(shmget)
+COMPAT_SYS(shmctl)
diff --git a/arch/powerpc/include/asm/unistd.h 
b/arch/powerpc/include/asm/unistd.h
index f4f8b66..e51c51b 100644
--- a/arch/powerpc/include/asm/unistd.h
+++ b/arch/powerpc/include/asm/unistd.h
@@ -12,7 +12,7 @@
 #include 
 
 
-#define __NR_syscalls  364
+#define __NR_syscalls  376
 
 #define __NR__exit __NR_exit
 #define NR_syscalls__NR_syscalls
diff --git a/arch/powerpc/include/uapi/asm/unistd.h 
b/arch/powerpc/include/uapi/asm/unistd.h
index e4aa173..a8390ee 100644
--- a/arch/powerpc/include/uapi/asm/unistd.h
+++ b/arch/powerpc/include/uapi/asm/unistd.h
@@ -386,5 +386,17 @@
 #define __NR_bpf   361
 #define __NR_execveat  362
 #define __NR_switch_endian 363
+#define __NR_semop 364
+#define __NR_semget365
+#define __NR_semctl366
+#define __NR_semtimedop367
+#define __NR_msgsnd368
+#define __NR_msgrcv369
+#define __NR_msgget370
+#define __NR_msgctl371
+#define __NR_shmat 372
+#define __NR_shmdt 373
+#define __NR_shmget374
+#define __NR_shmctl375
 
 #endif /* _UAPI_ASM_POWERPC_UNISTD_H_ */
-- 
2.1.4

___
Linuxppc-dev mailing list
Linuxppc-dev@lists.ozlabs.org
https://lists.ozlabs.org/listinfo/linuxppc-dev

Re: [PATCH 1/1] powerpc: Individual System V IPC system calls

2015-09-24 Thread Sam Bobroff
On Fri, Sep 25, 2015 at 10:33:37AM +1000, Michael Ellerman wrote:
> On Thu, 2015-09-24 at 15:39 +1000, Sam Bobroff wrote:
> > This patch provides individual system call numbers for the following
> > System V IPC system calls, on PowerPC, so that they do not need to be
> > multiplexed:
> > * semop, semget, semctl, semtimedop
> > * msgsnd, msgrcv, msgget, msgctl
> > * shmat, shmdt, shmget, shmctl
> 
> Thanks.
> 
> Can you please rebase this on top of linux-next, where we have two new
> syscalls wired up. It should be trivial, just the numbering changes, but I
> think you have a modified libc to actually test the result.

No problem, and yes I can test the glibc changes :-)

Cheers,
Sam.

___
Linuxppc-dev mailing list
Linuxppc-dev@lists.ozlabs.org
https://lists.ozlabs.org/listinfo/linuxppc-dev

Re: [v2,1/2] powerpc/xmon: Paged output for paca display

2015-10-07 Thread Sam Bobroff
On Tue, Oct 06, 2015 at 10:05:38PM +1100, Michael Ellerman wrote:
> On Fri, 2015-21-08 at 04:24:27 UTC, Sam bobroff wrote:
> > The paca display is already more than 24 lines, which can be problematic
> > if you have an old school 80x24 terminal, or more likely you are on a
> > virtual terminal which does not scroll for whatever reason.
> > 
> > This patch adds a new command ".", which takes a single (hex) numeric
> > argument: lines per page. It will cause the output of "dp" and "dpa"
> > to be broken into pages, if necessary.
> > 
> > This is implemented by running over the entire output both for the
> > initial command and for each subsequent page: the visible part is
> > clipped out by checking line numbers. This is a simplistic approach
> > but minimally invasive; it is intended to be easily reusable for other
> > commands.
> > 
> > Sample output:
> > 
> > 0:mon> .10
> > 0:mon> dp1
> > paca for cpu 0x1 @ cfdc0480:
> >  possible = yes
> >  present  = yes
> >  online   = yes
> >  lock_token   = 0x8000  (0x8)
> >  paca_index   = 0x1 (0xa)
> >  kernel_toc   = 0xc0eb2400  (0x10)
> >  kernelbase   = 0xc000  (0x18)
> >  kernel_msr   = 0xb0001032  (0x20)
> >  emergency_sp = 0xc0003ffe8000  (0x28)
> >  mc_emergency_sp  = 0xc0003ffe4000  (0x2e0)
> >  in_mce   = 0x0 (0x2e8)
> >  data_offset  = 0x7f17  (0x30)
> >  hw_cpu_id= 0x8 (0x38)
> >  cpu_start= 0x1 (0x3a)
> >  kexec_state  = 0x0 (0x3b)
> > [Enter for next page]
> > 0:mon>
> >  __current= 0xc0007e696620  (0x290)
> >  kstack   = 0xc0007e6ebe30  (0x298)
> >  stab_rr  = 0xb (0x2a0)
> >  saved_r1 = 0xc0007ef37860  (0x2a8)
> >  trap_save= 0x0 (0x2b8)
> >  soft_enabled = 0x0 (0x2ba)
> >  irq_happened = 0x1 (0x2bb)
> >  io_sync  = 0x0 (0x2bc)
> >  irq_work_pending = 0x0         (0x2bd)
> >  nap_state_lost   = 0x0 (0x2be)
> > 0:mon>
> > 
> > (Based on a similar patch by Michael Ellerman 
> > "[v2] powerpc/xmon: Allow limiting the size of the paca display".
> > This patch is an alternative and cannot coexist with the original.)
> > 
> > Signed-off-by: Sam Bobroff 
> > ---
> >  arch/powerpc/xmon/xmon.c | 86 
> > +++-
> >  1 file changed, 71 insertions(+), 15 deletions(-)
> > 
> > diff --git a/arch/powerpc/xmon/xmon.c b/arch/powerpc/xmon/xmon.c
> > index e599259..9ce9e7d 100644
> > --- a/arch/powerpc/xmon/xmon.c
> > +++ b/arch/powerpc/xmon/xmon.c
> > @@ -72,6 +72,12 @@ static int xmon_gate;
> >  
> >  static unsigned long in_xmon __read_mostly = 0;
> >  
> > +#define XMON_PRINTF(...) do { if (paged_vis()) printf(__VA_ARGS__); } 
> > while (0)
> 
> Can you do this is a proper function. I know it will need to be varargs, but
> that shouldn't be too ugly.

Sure, but I think I'll re-work the core of the logic and place it directly into
nonstdio.c. That will allow me to implement it directly in xmon_write() and
there won't need to be any change here at all. It will also allow blocking
during the output which will remove the whole 'run the command several times
and print a slice of it' system.

I'll post a v3.

> 
> > +#define MAX_PAGED_SIZE 1024
> 
> Why do we need a max at all?

OK, removed.

> > +static unsigned long paged_size = 0, paged_pos, paged_cur_page;
> 
> > +#ifdef CONFIG_PPC64
> > +static unsigned long paca_cpu;
> > +#endif
> 
> That can just be static in dump_pacas() by the looks.

This won't be needed in the new version.

> >  static unsigned long adrs;
> >  static int size = 1;
> >  #define MAX_DUMP (128 * 1024)
> > @@ -242,6 +248,9 @@ Commands:\n\
> >  "  u   dump TLB\n"
> >  #endif
> >  "  ?   help\n"
> > +#ifdef CONFIG_PPC64
> > +"  .#  limit output to # lines per page (dump paca only)\n"
> > +#endif
> 
> Don't make it 64-bit only.

It's only because the paca display itself is 64 bit only.

> >  "  zr  reboot\n\
> >zh   halt\n"
> > 

[PATCH v3 1/2] powerpc/xmon: Paged output for paca display

2015-10-07 Thread Sam Bobroff
The paca display is already more than 24 lines, which can be problematic
if you have an old school 80x24 terminal, or more likely you are on a
virtual terminal which does not scroll for whatever reason.

This patch adds a new command ".", which takes a single (hex) numeric
argument: lines per page. It will cause the output of "dp" and "dpa"
to be broken into pages, if necessary.

Sample output:

0:mon> .10
0:mon> dp1
paca for cpu 0x1 @ cfdc0480:
 possible = yes
 present  = yes
 online   = yes
 lock_token   = 0x8000  (0x8)
 paca_index   = 0x1 (0xa)
 kernel_toc   = 0xc0eb2400  (0x10)
 kernelbase   = 0xc000  (0x18)
 kernel_msr   = 0xb0001032  (0x20)
 emergency_sp = 0xc0003ffe8000  (0x28)
 mc_emergency_sp  = 0xc0003ffe4000  (0x2e0)
 in_mce   = 0x0 (0x2e8)
 data_offset  = 0x7f17  (0x30)
 hw_cpu_id= 0x8 (0x38)
 cpu_start= 0x1 (0x3a)
 kexec_state  = 0x0 (0x3b)
[Hit a key (a:all, q:truncate, any:next page)]
0:mon>
 __current= 0xc0007e696620  (0x290)
 kstack   = 0xc0007e6ebe30  (0x298)
 stab_rr  = 0xb (0x2a0)
 saved_r1 = 0xc0007ef37860  (0x2a8)
 trap_save= 0x0 (0x2b8)
 soft_enabled = 0x0 (0x2ba)
 irq_happened = 0x1 (0x2bb)
 io_sync  = 0x0 (0x2bc)
 irq_work_pending = 0x0 (0x2bd)
 nap_state_lost   = 0x0 (0x2be)
0:mon>

(Based on a similar patch by Michael Ellerman 
"[v2] powerpc/xmon: Allow limiting the size of the paca display".
This patch is an alternative and cannot coexist with the original.)

Signed-off-by: Sam Bobroff 
---
 arch/powerpc/xmon/nonstdio.c | 57 ++--
 arch/powerpc/xmon/nonstdio.h |  3 +++
 arch/powerpc/xmon/xmon.c | 18 ++
 3 files changed, 76 insertions(+), 2 deletions(-)

diff --git a/arch/powerpc/xmon/nonstdio.c b/arch/powerpc/xmon/nonstdio.c
index c987486..18819d7 100644
--- a/arch/powerpc/xmon/nonstdio.c
+++ b/arch/powerpc/xmon/nonstdio.c
@@ -11,10 +11,25 @@
 #include 
 #include "nonstdio.h"
 
+int paginating = 0, paginate_skipping = 0;
+unsigned long paginate_lpp = 0 /* Lines Per Page */;
+unsigned long paginate_pos;
 
-static int xmon_write(const void *ptr, int nb)
+void xmon_start_pagination(void)
 {
-   return udbg_write(ptr, nb);
+   paginating = 1;
+   paginate_skipping = 0;
+   paginate_pos = 0;
+}
+
+void xmon_end_pagination(void)
+{
+   paginating = 0;
+}
+
+void xmon_set_pagination_lpp(unsigned long lpp)
+{
+   paginate_lpp = lpp;
 }
 
 static int xmon_readchar(void)
@@ -24,6 +39,44 @@ static int xmon_readchar(void)
return -1;
 }
 
+static int xmon_write(const char *ptr, int nb)
+{
+   int rv = 0;
+   const char *p = ptr, *q;
+   const char msg[] = "[Hit a key (a:all, q:truncate, any:next page)]";
+
+   if (nb <= 0)
+   return rv;
+   if (paginating && paginate_skipping)
+   return nb;
+   if (paginate_lpp) {
+   while (paginating && (q = strchr(p, '\n'))) {
+   rv += udbg_write(p, q - p + 1);
+   p = q + 1;
+   paginate_pos++;
+   if (paginate_pos >= paginate_lpp) {
+   udbg_write(msg, strlen(msg));
+   switch (xmon_readchar()) {
+   case 'a':
+   paginating = 0;
+   break;
+   case 'q':
+   paginate_skipping = 1;
+   break;
+   default:
+   /* nothing */
+   break;
+   }
+   paginate_pos = 0;
+   udbg_write("\r\n", 2);
+   if (paginate_skipping)
+   return nb;
+   }
+   }
+   }
+   return rv + udbg_write(p, nb - (p - ptr));
+}
+
 int xmon_putchar(int c)
 {
char ch = c;
diff --git a/arch/powerpc/xmon/nonstdio.h b/arch/powerpc/xmon/nonstdio.h
index 18a51de..f865336 100644
--- a/arch/powerpc/xmon/nonstdio.h
+++ b/arch/powerpc/xmon/nonstdio.h
@@ -3,6 +3,9 @@
 #define printf xmon_printf
 #define putcharxmon_putchar
 
+extern void xmon_set_pagination_lpp(unsigned long lpp);
+extern void xmon_start_pagination(void);
+extern void xmon_end_pagination(void);
 extern int xmon_putchar(in

[PATCH v3 0/2] powerpc/xmon: Paged output for paca display

2015-10-07 Thread Sam Bobroff



Changes v2 -> v3:

Moved the pagination implementation from xmon.c to nonstdio.c where it's much
easier to do and the code is significantly simplified.

As it's now trivial to do, add the capability to truncate the output or to stop
pagination and dump the rest of the output.

Changed function naming scheme to read more easily (e.g.
xmon_start_pagination()).


Changes v1 -> v2:

* Removed pagination parameters from commands, replaced with new command to set
  page size. This works better for multiple commands and produces simpler code.
* Switched from encoding the page position in the command buffer to using some
  globals. Saves some memory and is less invasive to the command code.
* Added a patch to paginate the kernel log buffer display.


Sam Bobroff (2):
  powerpc/xmon: Paged output for paca display
  powerpc/xmon: Paginate kernel log buffer display

 arch/powerpc/xmon/nonstdio.c | 57 ++--
 arch/powerpc/xmon/nonstdio.h |  3 +++
 arch/powerpc/xmon/xmon.c | 18 ++
 3 files changed, 76 insertions(+), 2 deletions(-)

-- 
2.1.4

___
Linuxppc-dev mailing list
Linuxppc-dev@lists.ozlabs.org
https://lists.ozlabs.org/listinfo/linuxppc-dev

[PATCH v3 2/2] powerpc/xmon: Paginate kernel log buffer display

2015-10-07 Thread Sam Bobroff
The kernel log buffer is often much longer than the size of a terminal
so paginate it's output.

Signed-off-by: Sam Bobroff 
---
 arch/powerpc/xmon/xmon.c | 6 +++---
 1 file changed, 3 insertions(+), 3 deletions(-)

diff --git a/arch/powerpc/xmon/xmon.c b/arch/powerpc/xmon/xmon.c
index cc070c3..8ba8ea7 100644
--- a/arch/powerpc/xmon/xmon.c
+++ b/arch/powerpc/xmon/xmon.c
@@ -242,9 +242,7 @@ Commands:\n\
 "  u   dump TLB\n"
 #endif
 "  ?   help\n"
-#ifdef CONFIG_PPC64
-"  .#  limit output to # lines per page (dump paca only)\n"
-#endif
+"  .#  limit output to # lines per page (for dp#, dpa, dl)\n"
 "  zr  reboot\n\
   zh   halt\n"
 ;
@@ -2333,10 +2331,12 @@ dump_log_buf(void)
sync();
 
kmsg_dump_rewind_nolock(&dumper);
+   xmon_start_pagination();
while (kmsg_dump_get_line_nolock(&dumper, false, buf, sizeof(buf), 
&len)) {
buf[len] = '\0';
printf("%s", buf);
}
+   xmon_end_pagination();
 
sync();
/* wait a little while to see if we get a machine check */
-- 
2.1.4

___
Linuxppc-dev mailing list
Linuxppc-dev@lists.ozlabs.org
https://lists.ozlabs.org/listinfo/linuxppc-dev

[PATCH v2 1/1] powerpc: Individual System V IPC system calls

2015-10-12 Thread Sam Bobroff
This patch provides individual system call numbers for the following
System V IPC system calls, on PowerPC, so that they do not need to be
multiplexed:
* semop, semget, semctl, semtimedop
* msgsnd, msgrcv, msgget, msgctl
* shmat, shmdt, shmget, shmctl

Signed-off-by: Sam Bobroff 
---

v2:

Rebased onto today's next-20151012.

 arch/powerpc/include/asm/systbl.h  | 12 
 arch/powerpc/include/asm/unistd.h  |  2 +-
 arch/powerpc/include/uapi/asm/unistd.h | 12 
 3 files changed, 25 insertions(+), 1 deletion(-)

diff --git a/arch/powerpc/include/asm/systbl.h 
b/arch/powerpc/include/asm/systbl.h
index 126d0c4..c9e26cb 100644
--- a/arch/powerpc/include/asm/systbl.h
+++ b/arch/powerpc/include/asm/systbl.h
@@ -370,3 +370,15 @@ COMPAT_SYS(execveat)
 PPC64ONLY(switch_endian)
 SYSCALL_SPU(userfaultfd)
 SYSCALL_SPU(membarrier)
+SYSCALL(semop)
+SYSCALL(semget)
+COMPAT_SYS(semctl)
+COMPAT_SYS(semtimedop)
+COMPAT_SYS(msgsnd)
+COMPAT_SYS(msgrcv)
+SYSCALL(msgget)
+COMPAT_SYS(msgctl)
+COMPAT_SYS(shmat)
+SYSCALL(shmdt)
+SYSCALL(shmget)
+COMPAT_SYS(shmctl)
diff --git a/arch/powerpc/include/asm/unistd.h 
b/arch/powerpc/include/asm/unistd.h
index 13411be..6d8f802 100644
--- a/arch/powerpc/include/asm/unistd.h
+++ b/arch/powerpc/include/asm/unistd.h
@@ -12,7 +12,7 @@
 #include 
 
 
-#define __NR_syscalls  366
+#define __NR_syscalls  378
 
 #define __NR__exit __NR_exit
 #define NR_syscalls__NR_syscalls
diff --git a/arch/powerpc/include/uapi/asm/unistd.h 
b/arch/powerpc/include/uapi/asm/unistd.h
index 63377380..81579e9 100644
--- a/arch/powerpc/include/uapi/asm/unistd.h
+++ b/arch/powerpc/include/uapi/asm/unistd.h
@@ -388,5 +388,17 @@
 #define __NR_switch_endian 363
 #define __NR_userfaultfd   364
 #define __NR_membarrier365
+#define __NR_semop 366
+#define __NR_semget367
+#define __NR_semctl368
+#define __NR_semtimedop369
+#define __NR_msgsnd370
+#define __NR_msgrcv371
+#define __NR_msgget372
+#define __NR_msgctl373
+#define __NR_shmat 374
+#define __NR_shmdt 375
+#define __NR_shmget376
+#define __NR_shmctl377
 
 #endif /* _UAPI_ASM_POWERPC_UNISTD_H_ */
-- 
2.1.4

___
Linuxppc-dev mailing list
Linuxppc-dev@lists.ozlabs.org
https://lists.ozlabs.org/listinfo/linuxppc-dev

Re: [v2,1/1] powerpc: Individual System V IPC system calls

2015-10-14 Thread Sam Bobroff
On Tue, Oct 13, 2015 at 08:38:42PM +1100, Michael Ellerman wrote:
> On Tue, 2015-13-10 at 01:49:28 UTC, Sam bobroff wrote:
> > This patch provides individual system call numbers for the following
> > System V IPC system calls, on PowerPC, so that they do not need to be
> > multiplexed:
> > * semop, semget, semctl, semtimedop
> > * msgsnd, msgrcv, msgget, msgctl
> > * shmat, shmdt, shmget, shmctl
> 
> You tested this right? :)  Tell me about it.

Why yes I did:

I have written a (fairly) trivial test program that calls each function in a
way that doesn't fail (but that doesn't necessarily attempt to exercise the
full functionality of it; my intent was primarily to validate the parameter
passing part as that is where most of the code change is (on the glibc side)).

I patched a local copy of glibc with the new kernel header and various tweaks
to correctly format the parameter lists for the new calls (there is actually
quite a lot of code in glibc around the IPC calls due to various compatibility
issues). I could then build a full tool chain that supported the new calls.

(This was a lot more extensive than the kernel patch but should be fairly close
to what needs to go into glibc.)

I used that tool chain to build a complete host system (using buildroot). Then
I could run the following tests:

* glibc: stock
  Host kernel: stock
  Result: success
  Notes: As expected, base case.

* glibc: stock
  Host kernel: patched
  Result: success
  Notes: As expected, the old ipc() call still exists in the patched host.

* glibc: patched
  Host kernel: stock
  Result: failure
  Notes: As expected, the test was run with a glibc that requires a patched
  kernel on an unpatched one so the syscalls are unknown.

* glibc: patched
  Host kernel: patched
  Result: success
  Notes: As expected. (Also, a bit of debug in glibc shows the new system call
  paths being followed.)

(I also re-ran the tests both for little-endian and big-endian hosts.)

It would obviously be good to have someone else test this, but I can't see a
way to make it easy to do. They would presumably have to go through all of the
above, which seems too much to ask given how trivial the kernel side of the
patch is. Still, it bothers me a bit so if there is any way please let me know.
(I thought about writing some assembly to directly test the syscall numbers but
all it would do is verify that the numbers are valid, which really isn't much
of a test.)

> Also we could make these available to SPU programs, but I don't think there's
> any point, no one's going to do a libc update for that.
> 
> cheers

Cheers,
Sam.

___
Linuxppc-dev mailing list
Linuxppc-dev@lists.ozlabs.org
https://lists.ozlabs.org/listinfo/linuxppc-dev

Re: [PATCH v3 1/2] powerpc/xmon: Paged output for paca display

2015-10-14 Thread Sam Bobroff
On Wed, Oct 14, 2015 at 08:39:09PM +1100, Michael Ellerman wrote:
> On Thu, 2015-10-08 at 11:50 +1100, Sam Bobroff wrote:
> > The paca display is already more than 24 lines, which can be problematic
> > if you have an old school 80x24 terminal, or more likely you are on a
> > virtual terminal which does not scroll for whatever reason.
> > 
> > This patch adds a new command ".", which takes a single (hex) numeric
> > argument: lines per page. It will cause the output of "dp" and "dpa"
> > to be broken into pages, if necessary.
> > 
> > Sample output:
> > 
> > 0:mon> .10
> 
> So what about making it "#" rather than "." ?
> 
> cheers

Sure, although we'll have to do a better job than the other commands in the 
help text ;-)
(They use "#" to indicate a hex number and "##" is just going to be confusing.)

Do you want me to respin? (I'm happy for you to just adjust the patch.)

Cheers,
Sam.

___
Linuxppc-dev mailing list
Linuxppc-dev@lists.ozlabs.org
https://lists.ozlabs.org/listinfo/linuxppc-dev

Re: [v2,1/1] powerpc: Individual System V IPC system calls

2015-10-14 Thread Sam Bobroff
On Wed, Oct 14, 2015 at 08:38:15PM +1100, Michael Ellerman wrote:
> On Wed, 2015-10-14 at 18:00 +1100, Sam Bobroff wrote:
> > On Tue, Oct 13, 2015 at 08:38:42PM +1100, Michael Ellerman wrote:
> > > On Tue, 2015-13-10 at 01:49:28 UTC, Sam bobroff wrote:
> > > > This patch provides individual system call numbers for the following
> > > > System V IPC system calls, on PowerPC, so that they do not need to be
> > > > multiplexed:
> > > > * semop, semget, semctl, semtimedop
> > > > * msgsnd, msgrcv, msgget, msgctl
> > > > * shmat, shmdt, shmget, shmctl
> > > 
> > > You tested this right? :)  Tell me about it.
> > 
> > Why yes I did:
> 
> ...
> 
> > (I also re-ran the tests both for little-endian and big-endian hosts.)
> 
> Did you test on 32-bit at all?

I ran the test program, compiled for 32 and 64 bit, on a biarch power7 machine
(using -m32 and -m64 to the compiler) but only to verify that the fully patched
system succeeded. Is that sufficient?

> > It would obviously be good to have someone else test this, but I can't see a
> > way to make it easy to do. They would presumably have to go through all of 
> > the
> > above, which seems too much to ask given how trivial the kernel side of the
> > patch is. Still, it bothers me a bit so if there is any way please let me 
> > know.
> > (I thought about writing some assembly to directly test the syscall numbers 
> > but
> > all it would do is verify that the numbers are valid, which really isn't 
> > much
> > of a test.)
> 
> Actually that is still a useful test, it at least tells you if the kernel
> you're running on implements the syscalls. Obviously if you're on mainline
> that's easy enough to work out from the git history, but if/when these get
> backported to distro kernels, it's often harder to work out what's in the
> source than just testing it directly.

Oh, fair enough then.

> So I wrote a quick dirty test for that, it seems to work for me:

[snip]

Thanks :-)

> Which gives:
> 
> test: ipc_unmuxed
> tags: git_version:v4.3-rc3-44-g10053fa531a8-dirty
> Testing semop returned -1, errno 22
> Testing semgetreturned -1, errno 2
> Testing semctlreturned -1, errno 22
> Testing semtimedopreturned -1, errno 22
> Testing msgsndreturned -1, errno 14
> Testing msgrcvreturned -1, errno 22
> Testing msggetreturned -1, errno 2
> Testing msgctlreturned -1, errno 22
> Testing shmat returned -1, errno 22
> Testing shmdt returned -1, errno 22
> Testing shmgetreturned -1, errno 2
> Testing shmctlreturned -1, errno 22
> success: ipc_unmuxed
> 
> 
> And on an unpatched system:
> 
> test: ipc_unmuxed
> tags: git_version:v4.3-rc3-44-g10053fa531a8-dirty
> Testing semop returned -1, errno 38
> [FAIL] Test FAILED on line 2
> failure: ipc_unmuxed
> 
> 
> Look OK?

Yep! And 38 (ENOSYS) is the code we'd expect in the failure case.

> cheers

Cheers,
Sam.

___
Linuxppc-dev mailing list
Linuxppc-dev@lists.ozlabs.org
https://lists.ozlabs.org/listinfo/linuxppc-dev

[PATCH 1/8] powerpc/64: Adjust order in pcibios_init()

2019-03-19 Thread Sam Bobroff
The pcibios_init() function for 64 bit PowerPC currently calls
pci_bus_add_devices() before pcibios_resource_survey(), which seems
incorrect because it adds devices and attempts to bind their drivers
before allocating their resources (although no problems seem to be
apparent).

So move the call to pci_bus_add_devices() to after
pcibios_resource_survey(), while extracting call to the
pcibios_fixup() hook so that it remains in the same location.

This will also allow the ppc_md.pcibios_bus_add_device() hooks to
perform actions that depend on PCI resources, both during rescanning
(where this is already the case) and at boot time, to support future
work.

Signed-off-by: Sam Bobroff 
---
 arch/powerpc/kernel/pci-common.c |  4 
 arch/powerpc/kernel/pci_32.c |  4 
 arch/powerpc/kernel/pci_64.c | 12 +---
 3 files changed, 13 insertions(+), 7 deletions(-)

diff --git a/arch/powerpc/kernel/pci-common.c b/arch/powerpc/kernel/pci-common.c
index ff4b7539cbdf..3146eb73e3b3 100644
--- a/arch/powerpc/kernel/pci-common.c
+++ b/arch/powerpc/kernel/pci-common.c
@@ -1383,10 +1383,6 @@ void __init pcibios_resource_survey(void)
pr_debug("PCI: Assigning unassigned resources...\n");
pci_assign_unassigned_resources();
}
-
-   /* Call machine dependent fixup */
-   if (ppc_md.pcibios_fixup)
-   ppc_md.pcibios_fixup();
 }
 
 /* This is used by the PCI hotplug driver to allocate resource
diff --git a/arch/powerpc/kernel/pci_32.c b/arch/powerpc/kernel/pci_32.c
index d3f04f2d8249..40aaa1a6e193 100644
--- a/arch/powerpc/kernel/pci_32.c
+++ b/arch/powerpc/kernel/pci_32.c
@@ -259,6 +259,10 @@ static int __init pcibios_init(void)
/* Call common code to handle resource allocation */
pcibios_resource_survey();
 
+   /* Call machine dependent fixup */
+   if (ppc_md.pcibios_fixup)
+   ppc_md.pcibios_fixup();
+
/* Call machine dependent post-init code */
if (ppc_md.pcibios_after_init)
ppc_md.pcibios_after_init();
diff --git a/arch/powerpc/kernel/pci_64.c b/arch/powerpc/kernel/pci_64.c
index 9d8c10d55407..6f16f30031d7 100644
--- a/arch/powerpc/kernel/pci_64.c
+++ b/arch/powerpc/kernel/pci_64.c
@@ -58,14 +58,20 @@ static int __init pcibios_init(void)
pci_add_flags(PCI_ENABLE_PROC_DOMAINS | PCI_COMPAT_DOMAIN_0);
 
/* Scan all of the recorded PCI controllers.  */
-   list_for_each_entry_safe(hose, tmp, &hose_list, list_node) {
+   list_for_each_entry_safe(hose, tmp, &hose_list, list_node)
pcibios_scan_phb(hose);
-   pci_bus_add_devices(hose->bus);
-   }
 
/* Call common code to handle resource allocation */
pcibios_resource_survey();
 
+   /* Add devices. */
+   list_for_each_entry_safe(hose, tmp, &hose_list, list_node)
+   pci_bus_add_devices(hose->bus);
+
+   /* Call machine dependent fixup */
+   if (ppc_md.pcibios_fixup)
+   ppc_md.pcibios_fixup();
+
printk(KERN_DEBUG "PCI: Probing PCI hardware done\n");
 
return 0;
-- 
2.19.0.2.gcad72f5712



[PATCH 6/8] powerpc/eeh: Initialize EEH address cache earlier

2019-03-19 Thread Sam Bobroff
The EEH address cache is currently initialized and populated by a
single function: eeh_addr_cache_build().  While the initial population
of the cache can only be done once resources are allocated,
initialization (just setting up a spinlock) could be done much
earlier.

So move the initialization step into a separate function and call it
from a core_initcall (rather than a subsys initcall).

This will allow future work to make use of the cache during boot time
PCI scanning.

Signed-off-by: Sam Bobroff 
---
 arch/powerpc/include/asm/eeh.h  |  3 +++
 arch/powerpc/kernel/eeh.c   |  2 ++
 arch/powerpc/kernel/eeh_cache.c | 13 +++--
 3 files changed, 16 insertions(+), 2 deletions(-)

diff --git a/arch/powerpc/include/asm/eeh.h b/arch/powerpc/include/asm/eeh.h
index e217ccda55d0..791b9e6fcc45 100644
--- a/arch/powerpc/include/asm/eeh.h
+++ b/arch/powerpc/include/asm/eeh.h
@@ -295,6 +295,7 @@ int __init eeh_ops_register(struct eeh_ops *ops);
 int __exit eeh_ops_unregister(const char *name);
 int eeh_check_failure(const volatile void __iomem *token);
 int eeh_dev_check_failure(struct eeh_dev *edev);
+void eeh_addr_cache_init(void);
 void eeh_addr_cache_build(void);
 void eeh_add_device_early(struct pci_dn *);
 void eeh_add_device_tree_early(struct pci_dn *);
@@ -362,6 +363,8 @@ static inline int eeh_check_failure(const volatile void 
__iomem *token)
 
 #define eeh_dev_check_failure(x) (0)
 
+static inline void eeh_addr_cache_init(void) { }
+
 static inline void eeh_addr_cache_build(void) { }
 
 static inline void eeh_add_device_early(struct pci_dn *pdn) { }
diff --git a/arch/powerpc/kernel/eeh.c b/arch/powerpc/kernel/eeh.c
index 3dcff29cb9b3..7a406d58d2c0 100644
--- a/arch/powerpc/kernel/eeh.c
+++ b/arch/powerpc/kernel/eeh.c
@@ -1219,6 +1219,8 @@ static int eeh_init(void)
list_for_each_entry_safe(hose, tmp, &hose_list, list_node)
eeh_dev_phb_init_dynamic(hose);
 
+   eeh_addr_cache_init();
+
/* Initialize EEH event */
return eeh_event_init();
 }
diff --git a/arch/powerpc/kernel/eeh_cache.c b/arch/powerpc/kernel/eeh_cache.c
index 9c68f0837385..f93dd5cf6a39 100644
--- a/arch/powerpc/kernel/eeh_cache.c
+++ b/arch/powerpc/kernel/eeh_cache.c
@@ -267,6 +267,17 @@ void eeh_addr_cache_rmv_dev(struct pci_dev *dev)
spin_unlock_irqrestore(&pci_io_addr_cache_root.piar_lock, flags);
 }
 
+/**
+ * eeh_addr_cache_init - Initialize a cache of I/O addresses
+ *
+ * Initialize a cache of pci i/o addresses.  This cache will be used to
+ * find the pci device that corresponds to a given address.
+ */
+void eeh_addr_cache_init(void)
+{
+   spin_lock_init(&pci_io_addr_cache_root.piar_lock);
+}
+
 /**
  * eeh_addr_cache_build - Build a cache of I/O addresses
  *
@@ -282,8 +293,6 @@ void eeh_addr_cache_build(void)
struct eeh_dev *edev;
struct pci_dev *dev = NULL;
 
-   spin_lock_init(&pci_io_addr_cache_root.piar_lock);
-
for_each_pci_dev(dev) {
pdn = pci_get_pdn_by_devfn(dev->bus, dev->devfn);
if (!pdn)
-- 
2.19.0.2.gcad72f5712



[PATCH 5/8] powerpc/eeh: Add eeh_show_enabled()

2019-03-19 Thread Sam Bobroff
Move the EEH enabled message into it's own function so that future
work can call it from multiple places.

Signed-off-by: Sam Bobroff 
---
 arch/powerpc/include/asm/eeh.h |  3 +++
 arch/powerpc/kernel/eeh.c  | 16 +++-
 2 files changed, 14 insertions(+), 5 deletions(-)

diff --git a/arch/powerpc/include/asm/eeh.h b/arch/powerpc/include/asm/eeh.h
index fe4cf7208890..e217ccda55d0 100644
--- a/arch/powerpc/include/asm/eeh.h
+++ b/arch/powerpc/include/asm/eeh.h
@@ -289,6 +289,7 @@ struct pci_bus *eeh_pe_bus_get(struct eeh_pe *pe);
 
 struct eeh_dev *eeh_dev_init(struct pci_dn *pdn);
 void eeh_dev_phb_init_dynamic(struct pci_controller *phb);
+void eeh_show_enabled(void);
 void eeh_probe_devices(void);
 int __init eeh_ops_register(struct eeh_ops *ops);
 int __exit eeh_ops_unregister(const char *name);
@@ -338,6 +339,8 @@ static inline bool eeh_enabled(void)
 return false;
 }
 
+static inline void eeh_show_enabled(void) { }
+
 static inline bool eeh_phb_enabled(void)
 {
return false;
diff --git a/arch/powerpc/kernel/eeh.c b/arch/powerpc/kernel/eeh.c
index b14d89547895..3dcff29cb9b3 100644
--- a/arch/powerpc/kernel/eeh.c
+++ b/arch/powerpc/kernel/eeh.c
@@ -163,6 +163,16 @@ static int __init eeh_setup(char *str)
 }
 __setup("eeh=", eeh_setup);
 
+void eeh_show_enabled(void)
+{
+   if (eeh_has_flag(EEH_FORCE_DISABLED))
+   pr_info("EEH: PCI Enhanced I/O Error Handling DISABLED (by 
eeh=off)\n");
+   else if (eeh_enabled())
+   pr_info("EEH: PCI Enhanced I/O Error Handling ENABLED (capable 
adapter found)\n");
+   else
+   pr_info("EEH: PCI Enhanced I/O Error Handling DISABLED (no 
capable adapter found)\n");
+}
+
 /*
  * This routine captures assorted PCI configuration space data
  * for the indicated PCI device, and puts them into a buffer
@@ -1166,11 +1176,7 @@ void eeh_probe_devices(void)
pdn = hose->pci_data;
traverse_pci_dn(pdn, eeh_ops->probe, NULL);
}
-   if (eeh_enabled())
-   pr_info("EEH: PCI Enhanced I/O Error Handling Enabled\n");
-   else
-   pr_info("EEH: No capable adapters found\n");
-
+   eeh_show_enabled();
 }
 
 /**
-- 
2.19.0.2.gcad72f5712



[PATCH 3/8] powerpc/eeh: Convert PNV_PHB_FLAG_EEH to global flag

2019-03-19 Thread Sam Bobroff
The PHB flag, PNV_PHB_FLAG_EEH, is set (on PowerNV) individually on
each PHB once the EEH subsystem is ready. It is the only use of the
flags member of the phb struct.

However there is no need to store this separately on each PHB, so
convert it to a global flag. For symmetry, the flag is now also set
for pSeries; although it is currently unused it may be useful in the
future.

Signed-off-by: Sam Bobroff 
---
 arch/powerpc/include/asm/eeh.h   | 11 +++
 arch/powerpc/platforms/powernv/eeh-powernv.c | 14 +++---
 arch/powerpc/platforms/powernv/pci.c |  7 +++
 arch/powerpc/platforms/powernv/pci.h |  2 --
 arch/powerpc/platforms/pseries/pci.c |  4 
 5 files changed, 21 insertions(+), 17 deletions(-)

diff --git a/arch/powerpc/include/asm/eeh.h b/arch/powerpc/include/asm/eeh.h
index 3613a56281f2..fe4cf7208890 100644
--- a/arch/powerpc/include/asm/eeh.h
+++ b/arch/powerpc/include/asm/eeh.h
@@ -43,6 +43,7 @@ struct pci_dn;
 #define EEH_VALID_PE_ZERO  0x10/* PE#0 is valid */
 #define EEH_ENABLE_IO_FOR_LOG  0x20/* Enable IO for log */
 #define EEH_EARLY_DUMP_LOG 0x40/* Dump log immediately  */
+#define EEH_PHB_ENABLED0x80/* PHB recovery uses EEH
 */
 
 /*
  * Delay for PE reset, all in ms
@@ -245,6 +246,11 @@ static inline bool eeh_enabled(void)
return eeh_has_flag(EEH_ENABLED) && !eeh_has_flag(EEH_FORCE_DISABLED);
 }
 
+static inline bool eeh_phb_enabled(void)
+{
+   return eeh_has_flag(EEH_PHB_ENABLED);
+}
+
 static inline void eeh_serialize_lock(unsigned long *flags)
 {
raw_spin_lock_irqsave(&confirm_error_lock, *flags);
@@ -332,6 +338,11 @@ static inline bool eeh_enabled(void)
 return false;
 }
 
+static inline bool eeh_phb_enabled(void)
+{
+   return false;
+}
+
 static inline void eeh_probe_devices(void) { }
 
 static inline void *eeh_dev_init(struct pci_dn *pdn, void *data)
diff --git a/arch/powerpc/platforms/powernv/eeh-powernv.c 
b/arch/powerpc/platforms/powernv/eeh-powernv.c
index 6fc1a463b796..f0a95f663810 100644
--- a/arch/powerpc/platforms/powernv/eeh-powernv.c
+++ b/arch/powerpc/platforms/powernv/eeh-powernv.c
@@ -264,22 +264,14 @@ int pnv_eeh_post_init(void)
return ret;
}
 
-   if (!eeh_enabled())
+   if (eeh_enabled())
+   eeh_add_flag(EEH_PHB_ENABLED);
+   else
disable_irq(eeh_event_irq);
 
list_for_each_entry(hose, &hose_list, list_node) {
phb = hose->private_data;
 
-   /*
-* If EEH is enabled, we're going to rely on that.
-* Otherwise, we restore to conventional mechanism
-* to clear frozen PE during PCI config access.
-*/
-   if (eeh_enabled())
-   phb->flags |= PNV_PHB_FLAG_EEH;
-   else
-   phb->flags &= ~PNV_PHB_FLAG_EEH;
-
/* Create debugfs entries */
 #ifdef CONFIG_DEBUG_FS
if (phb->has_dbgfs || !phb->dbgfs)
diff --git a/arch/powerpc/platforms/powernv/pci.c 
b/arch/powerpc/platforms/powernv/pci.c
index 307181fd8a17..d2b50f3bf6b1 100644
--- a/arch/powerpc/platforms/powernv/pci.c
+++ b/arch/powerpc/platforms/powernv/pci.c
@@ -717,10 +717,9 @@ int pnv_pci_cfg_write(struct pci_dn *pdn,
 static bool pnv_pci_cfg_check(struct pci_dn *pdn)
 {
struct eeh_dev *edev = NULL;
-   struct pnv_phb *phb = pdn->phb->private_data;
 
/* EEH not enabled ? */
-   if (!(phb->flags & PNV_PHB_FLAG_EEH))
+   if (!eeh_phb_enabled())
return true;
 
/* PE reset or device removed ? */
@@ -761,7 +760,7 @@ static int pnv_pci_read_config(struct pci_bus *bus,
 
ret = pnv_pci_cfg_read(pdn, where, size, val);
phb = pdn->phb->private_data;
-   if (phb->flags & PNV_PHB_FLAG_EEH && pdn->edev) {
+   if (eeh_phb_enabled() && pdn->edev) {
if (*val == EEH_IO_ERROR_VALUE(size) &&
eeh_dev_check_failure(pdn->edev))
 return PCIBIOS_DEVICE_NOT_FOUND;
@@ -789,7 +788,7 @@ static int pnv_pci_write_config(struct pci_bus *bus,
 
ret = pnv_pci_cfg_write(pdn, where, size, val);
phb = pdn->phb->private_data;
-   if (!(phb->flags & PNV_PHB_FLAG_EEH))
+   if (!eeh_phb_enabled())
pnv_pci_config_check_eeh(pdn);
 
return ret;
diff --git a/arch/powerpc/platforms/powernv/pci.h 
b/arch/powerpc/platforms/powernv/pci.h
index 8e36da379252..eb0add61397b 100644
--- a/arch/powerpc/platforms/powernv/pci.h
+++ b/arch/powerpc/platforms/powernv/pci.h
@@ -85,8 +85,6 @@ struct pnv_ioda_pe {
struct list_headlist;
 };
 
-#define PNV_PHB_FLAG_EEH   (1 << 0)
-
 struct pnv_phb {
struct pci_contr

[PATCH 0/8]

2019-03-19 Thread Sam Bobroff
Hi all,

This patch set adds support for EEH recovery of hot plugged devices on pSeries
machines. Specifically, devices discovered by PCI rescanning using
/sys/bus/pci/rescan, which includes devices hotplugged by QEMU's device_add
command. (pSeries doesn't currently use slot power control for hotplugging.)

As a side effect this also provides EEH support for devices removed by
/sys/bus/pci/devices/*/remove and re-discovered by writing to 
/sys/bus/pci/rescan,
on all platforms.

The approach I've taken is to use the fact that the existing
pcibios_bus_add_device() platform hooks (which are used to set up EEH on
Virtual Function devices (VFs)) are actually called for all devices, so I've
widened their scope and made other adjustments necessary to allow them to work
for hotplugged and boot-time devices as well.

Because some of the changes are in generic PowerPC code, it's
possible that I've disturbed something for another PowerPC platform. I've tried
to minimize this by leaving that code alone as much as possible and so there
are a few cases where eeh_add_device_{early,late}() or eeh_add_sysfs_files() is
called more than once. I think these can be looked at later, as duplicate calls
are not harmful.

The patch "Convert PNV_PHB_FLAG_EEH" isn't strictly necessary and I'm not sure
if it's better to keep it, because it simplifies the code or drop it, because
we may need a separate flag per PHB later on. Thoughts anyone?

The first patch is a rework of the pcibios_init reordering patch I posted
earlier, which I've included here because it's necessary for this set.

I have done some testing for PowerNV on Power9 using a modified pnv_php module
and some testing on pSeries with slot power control using a modified rpaphp
module, and the EEH-related parts seem to work.

Cheers,
Sam.

Sam Bobroff (8):
  powerpc/64: Adjust order in pcibios_init()
  powerpc/eeh: Clear stale EEH_DEV_NO_HANDLER flag
  powerpc/eeh: Convert PNV_PHB_FLAG_EEH to global flag
  powerpc/eeh: Improve debug messages around device addition
  powerpc/eeh: Add eeh_show_enabled()
  powerpc/eeh: Initialize EEH address cache earlier
  powerpc/eeh: EEH for pSeries hot plug
  powerpc/eeh: Remove eeh_probe_devices() and eeh_addr_cache_build()

 arch/powerpc/include/asm/eeh.h   | 19 +++--
 arch/powerpc/kernel/eeh.c| 33 -
 arch/powerpc/kernel/eeh_cache.c  | 29 +---
 arch/powerpc/kernel/eeh_driver.c |  4 ++
 arch/powerpc/kernel/of_platform.c|  3 +-
 arch/powerpc/kernel/pci-common.c |  4 --
 arch/powerpc/kernel/pci_32.c |  4 ++
 arch/powerpc/kernel/pci_64.c | 12 +++-
 arch/powerpc/platforms/powernv/eeh-powernv.c | 41 +--
 arch/powerpc/platforms/powernv/pci.c |  7 +-
 arch/powerpc/platforms/powernv/pci.h |  2 -
 arch/powerpc/platforms/pseries/eeh_pseries.c | 75 +++-
 arch/powerpc/platforms/pseries/pci.c |  7 +-
 13 files changed, 122 insertions(+), 118 deletions(-)

-- 
2.19.0.2.gcad72f5712



[PATCH 4/8] powerpc/eeh: Improve debug messages around device addition

2019-03-19 Thread Sam Bobroff
Also remove useless comment.

Signed-off-by: Sam Bobroff 
---
 arch/powerpc/kernel/eeh.c|  2 +-
 arch/powerpc/platforms/powernv/eeh-powernv.c | 14 
 arch/powerpc/platforms/pseries/eeh_pseries.c | 23 +++-
 3 files changed, 28 insertions(+), 11 deletions(-)

diff --git a/arch/powerpc/kernel/eeh.c b/arch/powerpc/kernel/eeh.c
index 8d3c36a1f194..b14d89547895 100644
--- a/arch/powerpc/kernel/eeh.c
+++ b/arch/powerpc/kernel/eeh.c
@@ -1291,7 +1291,7 @@ void eeh_add_device_late(struct pci_dev *dev)
pdn = pci_get_pdn_by_devfn(dev->bus, dev->devfn);
edev = pdn_to_eeh_dev(pdn);
if (edev->pdev == dev) {
-   pr_debug("EEH: Already referenced !\n");
+   pr_debug("EEH: Device %s already referenced!\n", pci_name(dev));
return;
}
 
diff --git a/arch/powerpc/platforms/powernv/eeh-powernv.c 
b/arch/powerpc/platforms/powernv/eeh-powernv.c
index f0a95f663810..51c5b6bb9b0e 100644
--- a/arch/powerpc/platforms/powernv/eeh-powernv.c
+++ b/arch/powerpc/platforms/powernv/eeh-powernv.c
@@ -50,10 +50,7 @@ void pnv_pcibios_bus_add_device(struct pci_dev *pdev)
if (!pdev->is_virtfn)
return;
 
-   /*
-* The following operations will fail if VF's sysfs files
-* aren't created or its resources aren't finalized.
-*/
+   pr_debug("%s: EEH: Setting up device %s.\n", __func__, pci_name(pdev));
eeh_add_device_early(pdn);
eeh_add_device_late(pdev);
eeh_sysfs_add_device(pdev);
@@ -389,6 +386,10 @@ static void *pnv_eeh_probe(struct pci_dn *pdn, void *data)
int ret;
int config_addr = (pdn->busno << 8) | (pdn->devfn);
 
+   pr_debug("%s: probing %04x:%02x:%02x.%01x\n",
+   __func__, hose->global_number, pdn->busno,
+   PCI_SLOT(pdn->devfn), PCI_FUNC(pdn->devfn));
+
/*
 * When probing the root bridge, which doesn't have any
 * subordinate PCI devices. We don't have OF node for
@@ -483,6 +484,11 @@ static void *pnv_eeh_probe(struct pci_dn *pdn, void *data)
/* Save memory bars */
eeh_save_bars(edev);
 
+   pr_debug("%s: EEH enabled on %02x:%02x.%01x PHB#%x-PE#%x\n",
+   __func__, pdn->busno, PCI_SLOT(pdn->devfn),
+   PCI_FUNC(pdn->devfn), edev->pe->phb->global_number,
+   edev->pe->addr);
+
return NULL;
 }
 
diff --git a/arch/powerpc/platforms/pseries/eeh_pseries.c 
b/arch/powerpc/platforms/pseries/eeh_pseries.c
index 7aa50258dd42..ae06878fbdea 100644
--- a/arch/powerpc/platforms/pseries/eeh_pseries.c
+++ b/arch/powerpc/platforms/pseries/eeh_pseries.c
@@ -65,6 +65,8 @@ void pseries_pcibios_bus_add_device(struct pci_dev *pdev)
if (!pdev->is_virtfn)
return;
 
+   pr_debug("%s: EEH: Setting up device %s.\n", __func__, pci_name(pdev));
+
pdn->device_id  =  pdev->device;
pdn->vendor_id  =  pdev->vendor;
pdn->class_code =  pdev->class;
@@ -251,6 +253,10 @@ static void *pseries_eeh_probe(struct pci_dn *pdn, void 
*data)
int enable = 0;
int ret;
 
+   pr_debug("%s: probing %04x:%02x:%02x.%01x\n",
+   __func__, pdn->phb->global_number, pdn->busno,
+   PCI_SLOT(pdn->devfn), PCI_FUNC(pdn->devfn));
+
/* Retrieve OF node and eeh device */
edev = pdn_to_eeh_dev(pdn);
if (!edev || edev->pe)
@@ -294,7 +300,12 @@ static void *pseries_eeh_probe(struct pci_dn *pdn, void 
*data)
 
/* Enable EEH on the device */
ret = eeh_ops->set_option(&pe, EEH_OPT_ENABLE);
-   if (!ret) {
+   if (ret) {
+   pr_debug("%s: EEH failed to enable on %02x:%02x.%01x 
PHB#%x-PE#%x (code %d)\n",
+   __func__, pdn->busno, PCI_SLOT(pdn->devfn),
+   PCI_FUNC(pdn->devfn), pe.phb->global_number,
+   pe.addr, ret);
+   } else {
/* Retrieve PE address */
edev->pe_config_addr = eeh_ops->get_pe_addr(&pe);
pe.addr = edev->pe_config_addr;
@@ -310,11 +321,6 @@ static void *pseries_eeh_probe(struct pci_dn *pdn, void 
*data)
if (enable) {
eeh_add_flag(EEH_ENABLED);
eeh_add_to_parent_pe(edev);
-
-   pr_debug("%s: EEH enabled on %02x:%02x.%01x 
PHB#%x-PE#%x\n",
-   __func__, pdn->busno, PCI_SLOT(pdn->devfn),
-   PCI_FUNC(pdn->devfn), pe.phb->global_number,
-   pe.addr);
} else if (pdn->parent && pdn_to_eeh_dev(pdn->parent) &&
   (pdn_to_eeh_

[PATCH 2/8] powerpc/eeh: Clear stale EEH_DEV_NO_HANDLER flag

2019-03-19 Thread Sam Bobroff
The EEH_DEV_NO_HANDLER flag is used by the EEH system to prevent the
use of driver callbacks in drivers that have been bound part way
through the recovery process. This is necessary to prevent later stage
handlers from being called when the earlier stage handlers haven't,
which can be confusing for drivers.

However, the flag is set for all devices that are added after boot
time and only cleared at the end of the EEH recovery process. This
results in hot plugged devices erroneously having the flag set during
the first recovery after they are added (causing their driver's
handlers to be incorrectly ignored).

To remedy this, clear the flag at the beginning of recovery
processing. The flag is still cleared at the end of recovery
processing, although it is no longer really necessary.

Signed-off-by: Sam Bobroff 
---
 arch/powerpc/kernel/eeh_driver.c | 4 
 1 file changed, 4 insertions(+)

diff --git a/arch/powerpc/kernel/eeh_driver.c b/arch/powerpc/kernel/eeh_driver.c
index 6f3ee30565dd..4c34b9901f15 100644
--- a/arch/powerpc/kernel/eeh_driver.c
+++ b/arch/powerpc/kernel/eeh_driver.c
@@ -819,6 +819,10 @@ void eeh_handle_normal_event(struct eeh_pe *pe)
result = PCI_ERS_RESULT_DISCONNECT;
}
 
+   eeh_for_each_pe(pe, tmp_pe)
+   eeh_pe_for_each_dev(tmp_pe, edev, tmp)
+   edev->mode &= ~EEH_DEV_NO_HANDLER;
+
/* Walk the various device drivers attached to this slot through
 * a reset sequence, giving each an opportunity to do what it needs
 * to accomplish the reset.  Each child gets a report of the
-- 
2.19.0.2.gcad72f5712



[PATCH 8/8] powerpc/eeh: Remove eeh_probe_devices() and eeh_addr_cache_build()

2019-03-19 Thread Sam Bobroff
Now that EEH support for all devices (on PowerNV and pSeries) is
provided by the pcibios bus add device hooks, eeh_probe_devices() and
eeh_addr_cache_build() are redundant and can be removed.

Note that previously on pSeries, useless EEH sysfs files were created
for some devices that did not have EEH support and this change
prevents them from being created.

Signed-off-by: Sam Bobroff 
---
 arch/powerpc/include/asm/eeh.h   |  6 
 arch/powerpc/kernel/eeh.c| 13 
 arch/powerpc/kernel/eeh_cache.c  | 32 
 arch/powerpc/platforms/powernv/eeh-powernv.c |  5 ++-
 arch/powerpc/platforms/pseries/pci.c |  3 +-
 5 files changed, 3 insertions(+), 56 deletions(-)

diff --git a/arch/powerpc/include/asm/eeh.h b/arch/powerpc/include/asm/eeh.h
index 791b9e6fcc45..f1eca1757cbc 100644
--- a/arch/powerpc/include/asm/eeh.h
+++ b/arch/powerpc/include/asm/eeh.h
@@ -290,13 +290,11 @@ struct pci_bus *eeh_pe_bus_get(struct eeh_pe *pe);
 struct eeh_dev *eeh_dev_init(struct pci_dn *pdn);
 void eeh_dev_phb_init_dynamic(struct pci_controller *phb);
 void eeh_show_enabled(void);
-void eeh_probe_devices(void);
 int __init eeh_ops_register(struct eeh_ops *ops);
 int __exit eeh_ops_unregister(const char *name);
 int eeh_check_failure(const volatile void __iomem *token);
 int eeh_dev_check_failure(struct eeh_dev *edev);
 void eeh_addr_cache_init(void);
-void eeh_addr_cache_build(void);
 void eeh_add_device_early(struct pci_dn *);
 void eeh_add_device_tree_early(struct pci_dn *);
 void eeh_add_device_late(struct pci_dev *);
@@ -347,8 +345,6 @@ static inline bool eeh_phb_enabled(void)
return false;
 }
 
-static inline void eeh_probe_devices(void) { }
-
 static inline void *eeh_dev_init(struct pci_dn *pdn, void *data)
 {
return NULL;
@@ -365,8 +361,6 @@ static inline int eeh_check_failure(const volatile void 
__iomem *token)
 
 static inline void eeh_addr_cache_init(void) { }
 
-static inline void eeh_addr_cache_build(void) { }
-
 static inline void eeh_add_device_early(struct pci_dn *pdn) { }
 
 static inline void eeh_add_device_tree_early(struct pci_dn *pdn) { }
diff --git a/arch/powerpc/kernel/eeh.c b/arch/powerpc/kernel/eeh.c
index 217e14bb1fb6..cd2abbe41497 100644
--- a/arch/powerpc/kernel/eeh.c
+++ b/arch/powerpc/kernel/eeh.c
@@ -1166,19 +1166,6 @@ static struct notifier_block eeh_reboot_nb = {
.notifier_call = eeh_reboot_notifier,
 };
 
-void eeh_probe_devices(void)
-{
-   struct pci_controller *hose, *tmp;
-   struct pci_dn *pdn;
-
-   /* Enable EEH for all adapters */
-   list_for_each_entry_safe(hose, tmp, &hose_list, list_node) {
-   pdn = hose->pci_data;
-   traverse_pci_dn(pdn, eeh_ops->probe, NULL);
-   }
-   eeh_show_enabled();
-}
-
 /**
  * eeh_init - EEH initialization
  *
diff --git a/arch/powerpc/kernel/eeh_cache.c b/arch/powerpc/kernel/eeh_cache.c
index f93dd5cf6a39..c40078d036af 100644
--- a/arch/powerpc/kernel/eeh_cache.c
+++ b/arch/powerpc/kernel/eeh_cache.c
@@ -278,38 +278,6 @@ void eeh_addr_cache_init(void)
spin_lock_init(&pci_io_addr_cache_root.piar_lock);
 }
 
-/**
- * eeh_addr_cache_build - Build a cache of I/O addresses
- *
- * Build a cache of pci i/o addresses.  This cache will be used to
- * find the pci device that corresponds to a given address.
- * This routine scans all pci busses to build the cache.
- * Must be run late in boot process, after the pci controllers
- * have been scanned for devices (after all device resources are known).
- */
-void eeh_addr_cache_build(void)
-{
-   struct pci_dn *pdn;
-   struct eeh_dev *edev;
-   struct pci_dev *dev = NULL;
-
-   for_each_pci_dev(dev) {
-   pdn = pci_get_pdn_by_devfn(dev->bus, dev->devfn);
-   if (!pdn)
-   continue;
-
-   edev = pdn_to_eeh_dev(pdn);
-   if (!edev)
-   continue;
-
-   dev->dev.archdata.edev = edev;
-   edev->pdev = dev;
-
-   eeh_addr_cache_insert_dev(dev);
-   eeh_sysfs_add_device(dev);
-   }
-}
-
 static int eeh_addr_cache_show(struct seq_file *s, void *v)
 {
struct pci_io_addr_range *piar;
diff --git a/arch/powerpc/platforms/powernv/eeh-powernv.c 
b/arch/powerpc/platforms/powernv/eeh-powernv.c
index 81b0923cc55f..6a08f4fab255 100644
--- a/arch/powerpc/platforms/powernv/eeh-powernv.c
+++ b/arch/powerpc/platforms/powernv/eeh-powernv.c
@@ -240,9 +240,7 @@ int pnv_eeh_post_init(void)
struct pnv_phb *phb;
int ret = 0;
 
-   /* Probe devices & build address cache */
-   eeh_probe_devices();
-   eeh_addr_cache_build();
+   eeh_show_enabled();
 
/* Register OPAL event notifier */
eeh_event_irq = opal_event_request(ilog2(OPAL_EVENT_PCI_ERROR));
@@ -360,6 +358,7 @@ static int pnv_eeh_find_ecap(struct pci_dn *pdn, int cap)
return 0;
 }
 
+
 /*

[PATCH 7/8] powerpc/eeh: EEH for pSeries hot plug

2019-03-19 Thread Sam Bobroff
On PowerNV and pSeries, devices currently acquire EEH support from
several different places: Boot-time devices from eeh_probe_devices()
and eeh_addr_cache_build(), Virtual Function devices from the pcibios
bus add device hooks and hot plugged devices from pci_hp_add_devices()
(with other platforms using other methods as well).  Unfortunately,
pSeries machines currently discover hot plugged devices using
pci_rescan_bus(), not pci_hp_add_devices(), and so those devices do
not receive EEH support.

Rather than adding another case for pci_rescan_bus(), this change
widens the scope of the pcibios bus add device hooks so that they can
handle all devices. As a side effect this also supports devices
discovered after manually rescanning via /sys/bus/pci/rescan.

Note that on PowerNV, this change allows the EEH subsystem to become
enabled after boot as long as it has not been forced off, which was
not previously possible (it was already possible on pSeries).

Signed-off-by: Sam Bobroff 
---
 arch/powerpc/kernel/eeh.c|  2 +-
 arch/powerpc/kernel/of_platform.c|  3 +-
 arch/powerpc/platforms/powernv/eeh-powernv.c |  8 ++-
 arch/powerpc/platforms/pseries/eeh_pseries.c | 54 ++--
 4 files changed, 35 insertions(+), 32 deletions(-)

diff --git a/arch/powerpc/kernel/eeh.c b/arch/powerpc/kernel/eeh.c
index 7a406d58d2c0..217e14bb1fb6 100644
--- a/arch/powerpc/kernel/eeh.c
+++ b/arch/powerpc/kernel/eeh.c
@@ -1291,7 +1291,7 @@ void eeh_add_device_late(struct pci_dev *dev)
struct pci_dn *pdn;
struct eeh_dev *edev;
 
-   if (!dev || !eeh_enabled())
+   if (!dev)
return;
 
pr_debug("EEH: Adding device %s\n", pci_name(dev));
diff --git a/arch/powerpc/kernel/of_platform.c 
b/arch/powerpc/kernel/of_platform.c
index becaec990140..d5818e9c4069 100644
--- a/arch/powerpc/kernel/of_platform.c
+++ b/arch/powerpc/kernel/of_platform.c
@@ -86,7 +86,8 @@ static int of_pci_phb_probe(struct platform_device *dev)
pcibios_claim_one_bus(phb->bus);
 
/* Finish EEH setup */
-   eeh_add_device_tree_late(phb->bus);
+   if (!eeh_has_flag(EEH_FORCE_DISABLED))
+   eeh_add_device_tree_late(phb->bus);
 
/* Add probed PCI devices to the device model */
pci_bus_add_devices(phb->bus);
diff --git a/arch/powerpc/platforms/powernv/eeh-powernv.c 
b/arch/powerpc/platforms/powernv/eeh-powernv.c
index 51c5b6bb9b0e..81b0923cc55f 100644
--- a/arch/powerpc/platforms/powernv/eeh-powernv.c
+++ b/arch/powerpc/platforms/powernv/eeh-powernv.c
@@ -47,7 +47,7 @@ void pnv_pcibios_bus_add_device(struct pci_dev *pdev)
 {
struct pci_dn *pdn = pci_get_pdn(pdev);
 
-   if (!pdev->is_virtfn)
+   if (eeh_has_flag(EEH_FORCE_DISABLED))
return;
 
pr_debug("%s: EEH: Setting up device %s.\n", __func__, pci_name(pdev));
@@ -479,7 +479,11 @@ static void *pnv_eeh_probe(struct pci_dn *pdn, void *data)
 * Enable EEH explicitly so that we will do EEH check
 * while accessing I/O stuff
 */
-   eeh_add_flag(EEH_ENABLED);
+   if (!eeh_has_flag(EEH_ENABLED)) {
+   enable_irq(eeh_event_irq);
+   eeh_add_flag(EEH_PHB_ENABLED);
+   eeh_add_flag(EEH_ENABLED);
+   }
 
/* Save memory bars */
eeh_save_bars(edev);
diff --git a/arch/powerpc/platforms/pseries/eeh_pseries.c 
b/arch/powerpc/platforms/pseries/eeh_pseries.c
index ae06878fbdea..e68c79164974 100644
--- a/arch/powerpc/platforms/pseries/eeh_pseries.c
+++ b/arch/powerpc/platforms/pseries/eeh_pseries.c
@@ -55,44 +55,44 @@ static int ibm_get_config_addr_info;
 static int ibm_get_config_addr_info2;
 static int ibm_configure_pe;
 
-#ifdef CONFIG_PCI_IOV
 void pseries_pcibios_bus_add_device(struct pci_dev *pdev)
 {
struct pci_dn *pdn = pci_get_pdn(pdev);
-   struct pci_dn *physfn_pdn;
-   struct eeh_dev *edev;
 
-   if (!pdev->is_virtfn)
+   if (eeh_has_flag(EEH_FORCE_DISABLED))
return;
 
pr_debug("%s: EEH: Setting up device %s.\n", __func__, pci_name(pdev));
+#ifdef CONFIG_PCI_IOV
+   if (pdev->is_virtfn) {
+   struct pci_dn *physfn_pdn;
 
-   pdn->device_id  =  pdev->device;
-   pdn->vendor_id  =  pdev->vendor;
-   pdn->class_code =  pdev->class;
-   /*
-* Last allow unfreeze return code used for retrieval
-* by user space in eeh-sysfs to show the last command
-* completion from platform.
-*/
-   pdn->last_allow_rc =  0;
-   physfn_pdn  =  pci_get_pdn(pdev->physfn);
-   pdn->pe_number  =  physfn_pdn->pe_num_map[pdn->vf_index];
-   edev = pdn_to_eeh_dev(pdn);
-
-   /*
-* The following operations will fail if VF's sysfs files
-* aren't created or its resources aren't finalized.
-*/
+   pdn->device_id  =  pdev-&g

Re: [PATCH 2/8] powerpc/eeh: Clear stale EEH_DEV_NO_HANDLER flag

2019-04-07 Thread Sam Bobroff
On Wed, Mar 20, 2019 at 05:02:57PM +1100, Alexey Kardashevskiy wrote:
> 
> 
> On 20/03/2019 13:58, Sam Bobroff wrote:
> > The EEH_DEV_NO_HANDLER flag is used by the EEH system to prevent the
> > use of driver callbacks in drivers that have been bound part way
> > through the recovery process. This is necessary to prevent later stage
> > handlers from being called when the earlier stage handlers haven't,
> > which can be confusing for drivers.
> 
> The flag is used from eeh_pe_report()->eeh_pe_report_edev which is
> called many times from eeh_handle_normal_event() (and you clear the flag
> here unconditionally) and once from eeh_handle_special_event() - so this
> is actually the only case now when the flag matters. Is my understanding
> correct? Also is not clearing the flag correct in that case? I do not
> quite understand eeh_handle_normal_event vs. eeh_handle_special_event
> business though.

I'm not sure I fully understand your question, but here's the situation:

* EEH is detected on a PCI device that has no driver bound but there is
  a driver that COULD bind.
* eeh_handle_normal_event() follows the "EEH: Reset with hotplug
  activity" path because the device doesn't (currently) have a driver
  that supports EEH.
* eeh_reset_device() removes the device (pci_hp_remove_devices()).
* eeh_reset_device() re-discovers the device with pci_hp_add_devices().
* As part of re-discovery the PCI subsystem will bind the available driver.
* eeh_handle_normal_event() calls eeh_report_resume() (via eeh_pe_report()).

If the (newly bound) driver has a resume() handler, then
eeh_report_resume() will call it and AFAIK this will cause a problem for
some drivers because their error_detected() handler wasn't called first.

The fix for this is to have eeh_add_device_late() set EEH_DEV_NO_HANDLER
so that we can detect that the device has been added DURING recovery,
and avoid calling it's handlers later.

I see what you mean about the eeh_handle_special_event() case, where
EEH_DEV_NO_HANDLER isn't cleared before calling eeh_pe_report(), and I
think it's a bug! I'll fix it in the next version.

(Cleaning up that flag is on my list. I don't think it's a very good
solution.)

> > 
> > However, the flag is set for all devices that are added after boot
> > time and only cleared at the end of the EEH recovery process. This
> > results in hot plugged devices erroneously having the flag set during
> > the first recovery after they are added (causing their driver's
> > handlers to be incorrectly ignored).
> > 
> > To remedy this, clear the flag at the beginning of recovery
> > processing. The flag is still cleared at the end of recovery
> > processing, although it is no longer really necessary.
> 
> Then may be remove that redundant clearing?

I don't really mind either way; clearing it when we are finished with
recovery seems "cleaner" to me but it doesn't have any function. (In
any case, I think I will eventually want to remove it.)

> > 
> > Signed-off-by: Sam Bobroff 
> > ---
> >  arch/powerpc/kernel/eeh_driver.c | 4 
> >  1 file changed, 4 insertions(+)
> > 
> > diff --git a/arch/powerpc/kernel/eeh_driver.c 
> > b/arch/powerpc/kernel/eeh_driver.c
> > index 6f3ee30565dd..4c34b9901f15 100644
> > --- a/arch/powerpc/kernel/eeh_driver.c
> > +++ b/arch/powerpc/kernel/eeh_driver.c
> > @@ -819,6 +819,10 @@ void eeh_handle_normal_event(struct eeh_pe *pe)
> > result = PCI_ERS_RESULT_DISCONNECT;
> > }
> >  
> > +   eeh_for_each_pe(pe, tmp_pe)
> > +   eeh_pe_for_each_dev(tmp_pe, edev, tmp)
> > +   edev->mode &= ~EEH_DEV_NO_HANDLER;
> > +
> > /* Walk the various device drivers attached to this slot through
> >  * a reset sequence, giving each an opportunity to do what it needs
> >  * to accomplish the reset.  Each child gets a report of the
> > 
> 
> -- 
> Alexey
> 


signature.asc
Description: PGP signature


Re: [PATCH 3/8] powerpc/eeh: Convert PNV_PHB_FLAG_EEH to global flag

2019-04-08 Thread Sam Bobroff
On Wed, Mar 20, 2019 at 05:02:44PM +1100, Alexey Kardashevskiy wrote:
> 
> 
> On 20/03/2019 13:58, Sam Bobroff wrote:
> > The PHB flag, PNV_PHB_FLAG_EEH, is set (on PowerNV) individually on
> > each PHB once the EEH subsystem is ready. It is the only use of the
> > flags member of the phb struct.
> 
> 
> Then why to keep pnv_phb::flags?

No reason. I'll remove it in the next version.

> > However there is no need to store this separately on each PHB, so
> > convert it to a global flag. For symmetry, the flag is now also set
> > for pSeries; although it is currently unused it may be useful in the
> > future.
> 
> Just using eeh_enabled() instead of (phb->flags & PNV_PHB_FLAG_EEH)
> seems easier and cleaner; also pseries does not use it so there is no
> point defining it there either.

I do want to do that. However, eeh_enabled() seems to be slightly
different from PNV_PHB_FLAG_EEH:

- eeh_enabled() is true as soon as the first device with EEH support is
  detected.
- eeh_phb_enabled() is true after EEH support has been enabled on every
  device that supports it.

So I was concerned that using eeh_enabled() would cause problems in
pnv_pci_config_check_eeh() if EEH was detected *during* the initial PCI
scanning phase when eeh_enabled() was true but EEH had not yet been set
up on the device or PHB where it was detected.

Does that make sense?

Would it be reasonable to keep this patch as it is for now and
investigate cleaning it up in a future patch?

> > 
> > Signed-off-by: Sam Bobroff 
> > ---
> >  arch/powerpc/include/asm/eeh.h   | 11 +++
> >  arch/powerpc/platforms/powernv/eeh-powernv.c | 14 +++---
> >  arch/powerpc/platforms/powernv/pci.c |  7 +++
> >  arch/powerpc/platforms/powernv/pci.h |  2 --
> >  arch/powerpc/platforms/pseries/pci.c |  4 
> >  5 files changed, 21 insertions(+), 17 deletions(-)
> > 
> > diff --git a/arch/powerpc/include/asm/eeh.h b/arch/powerpc/include/asm/eeh.h
> > index 3613a56281f2..fe4cf7208890 100644
> > --- a/arch/powerpc/include/asm/eeh.h
> > +++ b/arch/powerpc/include/asm/eeh.h
> > @@ -43,6 +43,7 @@ struct pci_dn;
> >  #define EEH_VALID_PE_ZERO  0x10/* PE#0 is valid */
> >  #define EEH_ENABLE_IO_FOR_LOG  0x20/* Enable IO for log
> >  */
> >  #define EEH_EARLY_DUMP_LOG 0x40/* Dump log immediately  */
> > +#define EEH_PHB_ENABLED0x80/* PHB recovery uses EEH
> >  */
> >  
> >  /*
> >   * Delay for PE reset, all in ms
> > @@ -245,6 +246,11 @@ static inline bool eeh_enabled(void)
> > return eeh_has_flag(EEH_ENABLED) && !eeh_has_flag(EEH_FORCE_DISABLED);
> >  }
> >  
> > +static inline bool eeh_phb_enabled(void)
> > +{
> > +   return eeh_has_flag(EEH_PHB_ENABLED);
> > +}
> > +
> >  static inline void eeh_serialize_lock(unsigned long *flags)
> >  {
> > raw_spin_lock_irqsave(&confirm_error_lock, *flags);
> > @@ -332,6 +338,11 @@ static inline bool eeh_enabled(void)
> >  return false;
> >  }
> >  
> > +static inline bool eeh_phb_enabled(void)
> > +{
> > +   return false;
> > +}
> > +
> >  static inline void eeh_probe_devices(void) { }
> >  
> >  static inline void *eeh_dev_init(struct pci_dn *pdn, void *data)
> > diff --git a/arch/powerpc/platforms/powernv/eeh-powernv.c 
> > b/arch/powerpc/platforms/powernv/eeh-powernv.c
> > index 6fc1a463b796..f0a95f663810 100644
> > --- a/arch/powerpc/platforms/powernv/eeh-powernv.c
> > +++ b/arch/powerpc/platforms/powernv/eeh-powernv.c
> > @@ -264,22 +264,14 @@ int pnv_eeh_post_init(void)
> > return ret;
> > }
> >  
> > -   if (!eeh_enabled())
> > +   if (eeh_enabled())
> > +   eeh_add_flag(EEH_PHB_ENABLED);
> > +   else
> > disable_irq(eeh_event_irq);
> >  
> > list_for_each_entry(hose, &hose_list, list_node) {
> > phb = hose->private_data;
> >  
> > -   /*
> > -* If EEH is enabled, we're going to rely on that.
> > -* Otherwise, we restore to conventional mechanism
> > -* to clear frozen PE during PCI config access.
> > -*/
> > -   if (eeh_enabled())
> > -   phb->flags |= PNV_PHB_FLAG_EEH;
> > -   else
> > -   phb->flags &= ~PNV_PHB_FLAG_EEH;
> > -
> > /* Create debugfs entries */
> >  #ifdef CONFIG_DEBUG_FS
> > if (phb->

Re: [PATCH 5/8] powerpc/eeh: Add eeh_show_enabled()

2019-04-08 Thread Sam Bobroff
On Wed, Mar 20, 2019 at 05:02:23PM +1100, Alexey Kardashevskiy wrote:
> 
> 
> On 20/03/2019 13:58, Sam Bobroff wrote:
> > Move the EEH enabled message into it's own function so that future
> > work can call it from multiple places.
> > 
> > Signed-off-by: Sam Bobroff 
> > ---
> >  arch/powerpc/include/asm/eeh.h |  3 +++
> >  arch/powerpc/kernel/eeh.c  | 16 +++-
> >  2 files changed, 14 insertions(+), 5 deletions(-)
> > 
> > diff --git a/arch/powerpc/include/asm/eeh.h b/arch/powerpc/include/asm/eeh.h
> > index fe4cf7208890..e217ccda55d0 100644
> > --- a/arch/powerpc/include/asm/eeh.h
> > +++ b/arch/powerpc/include/asm/eeh.h
> > @@ -289,6 +289,7 @@ struct pci_bus *eeh_pe_bus_get(struct eeh_pe *pe);
> >  
> >  struct eeh_dev *eeh_dev_init(struct pci_dn *pdn);
> >  void eeh_dev_phb_init_dynamic(struct pci_controller *phb);
> > +void eeh_show_enabled(void);
> >  void eeh_probe_devices(void);
> >  int __init eeh_ops_register(struct eeh_ops *ops);
> >  int __exit eeh_ops_unregister(const char *name);
> > @@ -338,6 +339,8 @@ static inline bool eeh_enabled(void)
> >  return false;
> >  }
> >  
> > +static inline void eeh_show_enabled(void) { }
> > +
> >  static inline bool eeh_phb_enabled(void)
> >  {
> > return false;
> > diff --git a/arch/powerpc/kernel/eeh.c b/arch/powerpc/kernel/eeh.c
> > index b14d89547895..3dcff29cb9b3 100644
> > --- a/arch/powerpc/kernel/eeh.c
> > +++ b/arch/powerpc/kernel/eeh.c
> > @@ -163,6 +163,16 @@ static int __init eeh_setup(char *str)
> >  }
> >  __setup("eeh=", eeh_setup);
> >  
> > +void eeh_show_enabled(void)
> > +{
> > +   if (eeh_has_flag(EEH_FORCE_DISABLED))
> > +   pr_info("EEH: PCI Enhanced I/O Error Handling DISABLED (by 
> > eeh=off)\n");
> > +   else if (eeh_enabled())
> 
> 
> I'd make it eeh_has_flag(EEH_ENABLED) for clarity.

OK, sounds good.

> 
> > +   pr_info("EEH: PCI Enhanced I/O Error Handling ENABLED (capable 
> > adapter found)\n");
> > +   else
> > +   pr_info("EEH: PCI Enhanced I/O Error Handling DISABLED (no 
> > capable adapter found)\n");
> > +}
> > +
> >  /*
> >   * This routine captures assorted PCI configuration space data
> >   * for the indicated PCI device, and puts them into a buffer
> > @@ -1166,11 +1176,7 @@ void eeh_probe_devices(void)
> > pdn = hose->pci_data;
> > traverse_pci_dn(pdn, eeh_ops->probe, NULL);
> > }
> > -   if (eeh_enabled())
> > -   pr_info("EEH: PCI Enhanced I/O Error Handling Enabled\n");
> > -   else
> > -   pr_info("EEH: No capable adapters found\n");
> > -
> > +   eeh_show_enabled();
> 
> 
> This line moves later in the series so I'd just merge this patch into
> 8/8 to reduce number of lines moving withing the patchset.

Oh, good idea. I'll do it.

> In general the whole point of the EEH_ENABLED flag is fading away. Its
> meaning now is that "at least somewhere in the box for at least one
> device with enabled EEH" which does not seem extremely useful as we have
> a pci_dev or pe pretty much everywhere we look at eeh_enabled() and
> pdev->dev.archdata.edev can tell if eeh is enabled for a device.
> Although I am pretty sure this is in your list already :)

Yes. :-)

> 
> >  }
> >  
> >  /**
> > 
> 
> -- 
> Alexey
> 


signature.asc
Description: PGP signature


Re: [PATCH 8/8] powerpc/eeh: Remove eeh_probe_devices() and eeh_addr_cache_build()

2019-04-08 Thread Sam Bobroff
On Wed, Mar 20, 2019 at 05:05:49PM +1100, Alexey Kardashevskiy wrote:
> 
> 
> On 20/03/2019 13:58, Sam Bobroff wrote:
> > Now that EEH support for all devices (on PowerNV and pSeries) is
> > provided by the pcibios bus add device hooks, eeh_probe_devices() and
> > eeh_addr_cache_build() are redundant and can be removed.
> > 
> > Note that previously on pSeries, useless EEH sysfs files were created
> > for some devices that did not have EEH support and this change
> > prevents them from being created.
> > 
> > Signed-off-by: Sam Bobroff 
> > ---
> >  arch/powerpc/include/asm/eeh.h   |  6 
> >  arch/powerpc/kernel/eeh.c| 13 
> >  arch/powerpc/kernel/eeh_cache.c  | 32 
> >  arch/powerpc/platforms/powernv/eeh-powernv.c |  5 ++-
> >  arch/powerpc/platforms/pseries/pci.c |  3 +-
> >  5 files changed, 3 insertions(+), 56 deletions(-)
> > 
> > diff --git a/arch/powerpc/include/asm/eeh.h b/arch/powerpc/include/asm/eeh.h
> > index 791b9e6fcc45..f1eca1757cbc 100644
> > --- a/arch/powerpc/include/asm/eeh.h
> > +++ b/arch/powerpc/include/asm/eeh.h
> > @@ -290,13 +290,11 @@ struct pci_bus *eeh_pe_bus_get(struct eeh_pe *pe);
> >  struct eeh_dev *eeh_dev_init(struct pci_dn *pdn);
> >  void eeh_dev_phb_init_dynamic(struct pci_controller *phb);
> >  void eeh_show_enabled(void);
> > -void eeh_probe_devices(void);
> >  int __init eeh_ops_register(struct eeh_ops *ops);
> >  int __exit eeh_ops_unregister(const char *name);
> >  int eeh_check_failure(const volatile void __iomem *token);
> >  int eeh_dev_check_failure(struct eeh_dev *edev);
> >  void eeh_addr_cache_init(void);
> > -void eeh_addr_cache_build(void);
> >  void eeh_add_device_early(struct pci_dn *);
> >  void eeh_add_device_tree_early(struct pci_dn *);
> >  void eeh_add_device_late(struct pci_dev *);
> > @@ -347,8 +345,6 @@ static inline bool eeh_phb_enabled(void)
> > return false;
> >  }
> >  
> > -static inline void eeh_probe_devices(void) { }
> > -
> >  static inline void *eeh_dev_init(struct pci_dn *pdn, void *data)
> >  {
> > return NULL;
> > @@ -365,8 +361,6 @@ static inline int eeh_check_failure(const volatile void 
> > __iomem *token)
> >  
> >  static inline void eeh_addr_cache_init(void) { }
> >  
> > -static inline void eeh_addr_cache_build(void) { }
> > -
> >  static inline void eeh_add_device_early(struct pci_dn *pdn) { }
> >  
> >  static inline void eeh_add_device_tree_early(struct pci_dn *pdn) { }
> > diff --git a/arch/powerpc/kernel/eeh.c b/arch/powerpc/kernel/eeh.c
> > index 217e14bb1fb6..cd2abbe41497 100644
> > --- a/arch/powerpc/kernel/eeh.c
> > +++ b/arch/powerpc/kernel/eeh.c
> > @@ -1166,19 +1166,6 @@ static struct notifier_block eeh_reboot_nb = {
> > .notifier_call = eeh_reboot_notifier,
> >  };
> >  
> > -void eeh_probe_devices(void)
> > -{
> > -   struct pci_controller *hose, *tmp;
> > -   struct pci_dn *pdn;
> > -
> > -   /* Enable EEH for all adapters */
> > -   list_for_each_entry_safe(hose, tmp, &hose_list, list_node) {
> > -   pdn = hose->pci_data;
> > -   traverse_pci_dn(pdn, eeh_ops->probe, NULL);
> > -   }
> > -   eeh_show_enabled();
> > -}
> > -
> >  /**
> >   * eeh_init - EEH initialization
> >   *
> > diff --git a/arch/powerpc/kernel/eeh_cache.c 
> > b/arch/powerpc/kernel/eeh_cache.c
> > index f93dd5cf6a39..c40078d036af 100644
> > --- a/arch/powerpc/kernel/eeh_cache.c
> > +++ b/arch/powerpc/kernel/eeh_cache.c
> > @@ -278,38 +278,6 @@ void eeh_addr_cache_init(void)
> > spin_lock_init(&pci_io_addr_cache_root.piar_lock);
> >  }
> >  
> > -/**
> > - * eeh_addr_cache_build - Build a cache of I/O addresses
> > - *
> > - * Build a cache of pci i/o addresses.  This cache will be used to
> > - * find the pci device that corresponds to a given address.
> > - * This routine scans all pci busses to build the cache.
> > - * Must be run late in boot process, after the pci controllers
> > - * have been scanned for devices (after all device resources are known).
> > - */
> > -void eeh_addr_cache_build(void)
> > -{
> > -   struct pci_dn *pdn;
> > -   struct eeh_dev *edev;
> > -   struct pci_dev *dev = NULL;
> > -
> > -   for_each_pci_dev(dev) {
> > -   pdn = pci_get_pdn_by_devfn(dev->bus, dev->devfn);
> > -   if (!pdn)
> > -   continue;
> > 

Re: [PATCH 0/8]

2019-04-14 Thread Sam Bobroff
On Thu, Apr 11, 2019 at 05:55:33PM -0700, Tyrel Datwyler wrote:
> On 03/19/2019 07:58 PM, Sam Bobroff wrote:
> > Hi all,
> > 
> > This patch set adds support for EEH recovery of hot plugged devices on 
> > pSeries
> > machines. Specifically, devices discovered by PCI rescanning using
> > /sys/bus/pci/rescan, which includes devices hotplugged by QEMU's device_add
> > command. (pSeries doesn't currently use slot power control for hotplugging.)
> 
> Slight nit that its not that pSeries doesn't support slot power control
> hotplugging, its that QEMU pSeries guests don't support it. We most definitely
> use the slot power control for hotplugging in PowerVM pSeries Linux guests. 
> More

Ah, I think I see what you mean: pSeries can (and does!) use slot power
control for hotplugging, it's just that Linux doesn't. Right :-) I'll
change it to "Linux on pSeries doesn't" for the next version.

> specifically we had to work around short comings in the rpaphp driver when
> dealing with QEMU. This being that while at initial glance the design implies
> that it had multiple devices per PHB in mind, it didn't, and only actually
> supported a single slot per PHB. Further, when we developed the QEMU pci 
> hotplug
> feature we had to deal with only having a single PHB per QEMU guest and as a
> result needed a way to plug multiple pci devices into a single PHB. Hence, 
> came
> the pci rescan work around in drmgr.
> 
> Mike Roth and I have had discussions over the years to get the slot power
> control hotplugging working properly with QEMU, and while I did get the RPA
> hotplug driver fixed to register all available slots associated with a PHB, 
> EEH
> remained an issue. So, I'm very happy to see this patchset get that working 
> with
> the rescan work around.
> 
> > 
> > As a side effect this also provides EEH support for devices removed by
> > /sys/bus/pci/devices/*/remove and re-discovered by writing to 
> > /sys/bus/pci/rescan,
> > on all platforms.
> 
> +1, this seems like icing on the cake. ;)

Yes :-)

Although maybe I should mention that we can't really benefit from this
on PowerNV *yet* because there seem to be some other problems with
removing and re-scanning devices: in my tests devices are often unusable
after being rediscovered.

(I'm hoping to take a look at that soon.)

> > 
> > The approach I've taken is to use the fact that the existing
> > pcibios_bus_add_device() platform hooks (which are used to set up EEH on
> > Virtual Function devices (VFs)) are actually called for all devices, so I've
> > widened their scope and made other adjustments necessary to allow them to 
> > work
> > for hotplugged and boot-time devices as well.
> > 
> > Because some of the changes are in generic PowerPC code, it's
> > possible that I've disturbed something for another PowerPC platform. I've 
> > tried
> > to minimize this by leaving that code alone as much as possible and so there
> > are a few cases where eeh_add_device_{early,late}() or 
> > eeh_add_sysfs_files() is
> > called more than once. I think these can be looked at later, as duplicate 
> > calls
> > are not harmful.
> > 
> > The patch "Convert PNV_PHB_FLAG_EEH" isn't strictly necessary and I'm not 
> > sure
> > if it's better to keep it, because it simplifies the code or drop it, 
> > because
> > we may need a separate flag per PHB later on. Thoughts anyone?
> > 
> > The first patch is a rework of the pcibios_init reordering patch I posted
> > earlier, which I've included here because it's necessary for this set.
> > 
> > I have done some testing for PowerNV on Power9 using a modified pnv_php 
> > module
> > and some testing on pSeries with slot power control using a modified rpaphp
> > module, and the EEH-related parts seem to work.
> 
> I'm interested in what modifications with rpaphp. Its unclear if you are 
> saying
> rpaphp modified so that slot power hotplug works with a QEMU pSeries guest? If
> thats the case it would be optimal to get that upstream and remove the work
> rescan workaround for guests that don't need it.

Unfortunately no, I didn't do enough work to really get it working.  I
just wanted to get an idea of how that code path interacted with the EEH
code I was changing, so that hopefully when we get to fixing it, the EEH
part will be easier to do.

The hack I tested with was:

- rtas_errd changed so that it doesn't pass -V to drmgr (-V seems to
  trigger drmgr to use the PCI rescan system rather that slot power
  control).
- of_pci_pars

[PATCH 0/4] powerpc/eeh: Release EEH device state synchronously

2020-03-29 Thread Sam Bobroff
Hi everyone,

Here are some fixes and cleanups that have come from other work but that I
think stand on their own.

Only one patch ("Release EEH device state synchronously", suggested by Oliver
O'Halloran) is a significant change: it moves the cleanup of some EEH device
data out of the (possibly asynchronous) device release handler and into the
(synchronously called) bus notifier. This is useful for future work as it makes
it easier to reason about the lifetimes of EEH structures.

Note that I've left a few WARN_ON_ONCEs in the code because I'm paranoid, but I
have not been able to hit them during testing.

Cheers,
Sam.

Sam Bobroff (4):
  powerpc/eeh: fix pseries_eeh_configure_bridge()
  powerpc/eeh: Release EEH device state synchronously
  powerpc/eeh: Remove workaround from eeh_add_device_late()
  powerpc/eeh: Clean up edev cleanup for VFs

 arch/powerpc/kernel/eeh.c| 49 +++-
 arch/powerpc/kernel/pci-hotplug.c|  2 -
 arch/powerpc/kernel/pci_dn.c |  9 +---
 arch/powerpc/platforms/pseries/eeh_pseries.c |  2 +-
 4 files changed, 29 insertions(+), 33 deletions(-)

-- 
2.22.0.216.g00a2a96fc9



[PATCH 2/4] powerpc/eeh: Release EEH device state synchronously

2020-03-29 Thread Sam Bobroff
EEH device state is currently removed (by eeh_remove_device()) during
the device release handler, which is invoked as the device's reference
count drops to zero. This may take some time, or forever, as other
threads may hold references.

However, the PCI device state is released synchronously by
pci_stop_and_remove_bus_device(). This mismatch causes problems, for
example the device may be re-discovered as a new device before the
release handler has been called, leaving the PCI and EEH state
mismatched.

So instead, call eeh_remove_device() from the bus device removal
handlers, which are called synchronously in the removal path.

Signed-off-by: Sam Bobroff 
---
 arch/powerpc/kernel/eeh.c | 26 ++
 arch/powerpc/kernel/pci-hotplug.c |  2 --
 2 files changed, 26 insertions(+), 2 deletions(-)

diff --git a/arch/powerpc/kernel/eeh.c b/arch/powerpc/kernel/eeh.c
index 17cb3e9b5697..c36c5a7db5ca 100644
--- a/arch/powerpc/kernel/eeh.c
+++ b/arch/powerpc/kernel/eeh.c
@@ -1106,6 +1106,32 @@ static int eeh_init(void)
 
 core_initcall_sync(eeh_init);
 
+static int eeh_device_notifier(struct notifier_block *nb,
+  unsigned long action, void *data)
+{
+   struct device *dev = data;
+
+   switch (action) {
+   case BUS_NOTIFY_DEL_DEVICE:
+   eeh_remove_device(to_pci_dev(dev));
+   break;
+   default:
+   break;
+   }
+   return NOTIFY_DONE;
+}
+
+static struct notifier_block eeh_device_nb = {
+   .notifier_call = eeh_device_notifier,
+};
+
+static __init int eeh_set_bus_notifier(void)
+{
+   bus_register_notifier(&pci_bus_type, &eeh_device_nb);
+   return 0;
+}
+arch_initcall(eeh_set_bus_notifier);
+
 /**
  * eeh_add_device_early - Enable EEH for the indicated device node
  * @pdn: PCI device node for which to set up EEH
diff --git a/arch/powerpc/kernel/pci-hotplug.c 
b/arch/powerpc/kernel/pci-hotplug.c
index d6a67f814983..28e9aa274f64 100644
--- a/arch/powerpc/kernel/pci-hotplug.c
+++ b/arch/powerpc/kernel/pci-hotplug.c
@@ -57,8 +57,6 @@ void pcibios_release_device(struct pci_dev *dev)
struct pci_controller *phb = pci_bus_to_host(dev->bus);
struct pci_dn *pdn = pci_get_pdn(dev);
 
-   eeh_remove_device(dev);
-
if (phb->controller_ops.release_device)
phb->controller_ops.release_device(dev);
 
-- 
2.22.0.216.g00a2a96fc9



[PATCH 1/4] powerpc/eeh: fix pseries_eeh_configure_bridge()

2020-03-29 Thread Sam Bobroff
If a device is hot unplgged during EEH recovery, it's possible for the
RTAS call to ibm,configure-pe in pseries_eeh_configure() to return
parameter error (-3), however negative return values are not checked
for and this leads to an infinite loop.

Fix this by correctly bailing out on negative values.

Signed-off-by: Sam Bobroff 
---
 arch/powerpc/platforms/pseries/eeh_pseries.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/arch/powerpc/platforms/pseries/eeh_pseries.c 
b/arch/powerpc/platforms/pseries/eeh_pseries.c
index 893ba3f562c4..c4ef03bec0de 100644
--- a/arch/powerpc/platforms/pseries/eeh_pseries.c
+++ b/arch/powerpc/platforms/pseries/eeh_pseries.c
@@ -605,7 +605,7 @@ static int pseries_eeh_configure_bridge(struct eeh_pe *pe)
config_addr, BUID_HI(pe->phb->buid),
BUID_LO(pe->phb->buid));
 
-   if (!ret)
+   if (ret <= 0)
return ret;
 
/*
-- 
2.22.0.216.g00a2a96fc9



[PATCH 3/4] powerpc/eeh: Remove workaround from eeh_add_device_late()

2020-03-29 Thread Sam Bobroff
When EEH device state was released asynchronously by the device
release handler, it was possible for an outstanding reference to
prevent it's release and it was necessary to work around that if a
device was re-discovered at the same PCI location.

Now that the state is released synchronously that is no longer
possible and the workaround is no longer necessary.

Signed-off-by: Sam Bobroff 
---
 arch/powerpc/kernel/eeh.c | 23 +--
 1 file changed, 1 insertion(+), 22 deletions(-)

diff --git a/arch/powerpc/kernel/eeh.c b/arch/powerpc/kernel/eeh.c
index c36c5a7db5ca..12c248a16527 100644
--- a/arch/powerpc/kernel/eeh.c
+++ b/arch/powerpc/kernel/eeh.c
@@ -1206,28 +1206,7 @@ void eeh_add_device_late(struct pci_dev *dev)
eeh_edev_dbg(edev, "Device already referenced!\n");
return;
}
-
-   /*
-* The EEH cache might not be removed correctly because of
-* unbalanced kref to the device during unplug time, which
-* relies on pcibios_release_device(). So we have to remove
-* that here explicitly.
-*/
-   if (edev->pdev) {
-   eeh_rmv_from_parent_pe(edev);
-   eeh_addr_cache_rmv_dev(edev->pdev);
-   eeh_sysfs_remove_device(edev->pdev);
-
-   /*
-* We definitely should have the PCI device removed
-* though it wasn't correctly. So we needn't call
-* into error handler afterwards.
-*/
-   edev->mode |= EEH_DEV_NO_HANDLER;
-
-   edev->pdev = NULL;
-   dev->dev.archdata.edev = NULL;
-   }
+   WARN_ON_ONCE(edev->pdev);
 
if (eeh_has_flag(EEH_PROBE_MODE_DEV))
eeh_ops->probe(pdn, NULL);
-- 
2.22.0.216.g00a2a96fc9



[PATCH 4/4] powerpc/eeh: Clean up edev cleanup for VFs

2020-03-29 Thread Sam Bobroff
Because the bus notifier calls eeh_rmv_from_parent_pe() (via
eeh_remove_device()) when a VF is removed, the call in
remove_sriov_vf_pdns() is redundant.

So remove the call.

Signed-off-by: Sam Bobroff 
---
 arch/powerpc/kernel/pci_dn.c | 9 +
 1 file changed, 1 insertion(+), 8 deletions(-)

diff --git a/arch/powerpc/kernel/pci_dn.c b/arch/powerpc/kernel/pci_dn.c
index 4e654df55969..f6ac25f7af63 100644
--- a/arch/powerpc/kernel/pci_dn.c
+++ b/arch/powerpc/kernel/pci_dn.c
@@ -236,14 +236,7 @@ void remove_sriov_vf_pdns(struct pci_dev *pdev)
 */
edev = pdn_to_eeh_dev(pdn);
if (edev) {
-   /*
-* We allocate pci_dn's for the totalvfs count,
-* but only only the vfs that were activated
-* have a configured PE.
-*/
-   if (edev->pe)
-   eeh_rmv_from_parent_pe(edev);
-
+   WARN_ON_ONCE(edev->pe);
pdn->edev = NULL;
kfree(edev);
}
-- 
2.22.0.216.g00a2a96fc9



[PATCH RFC 1/1] powerpc/eeh: Synchronization for safety

2020-03-29 Thread Sam Bobroff
There is currently little synchronization between EEH error detection
(eeh_dev_check_failure()), EEH error recovery
(eeh_handle_{normal,special}_event()) and the PCI subsystem (device
addition and removal), and so there are race conditions that lead to
crashes (often access to free'd memory or LIST_POISON).

However, a solution must consider:
- EEH error detection can occur in interrupt context, which prevents
the use of a mutex.
- EEH recovery may need to sleep, which prevents the use of a spinlock.
- EEH recovery uses PCI operations that may require the PCI
rescan/remove lock and/or device lock to be held
- PCI operations may hold the rescan/remove and/or device lock when
calling into EEH functions.
- Device driver callbacks may perform arbitrary PCI operations
during recovery, including device removal.

In this patch the existing mutex and spinlock are combined with the
EEH_PE_RECOVERING flag to provide some assurances that are then used
to reduce the race conditions.

The fields to be protected are the ones that provide the structure
of the trees of struct eeh_pe that are held for each PHB: the parent
pointer and child lists and the list of struct eeh_dev, as well as
the pe and pdev pointers within struct eeh_dev.

The existing way of using EEH_PE_RECOVERING is kept and slightly
extended: No struct eeh_pe will be removed while it has the flag set
on it. Additionally, when adding new PEs, they are marked
EEH_PE_RECOVERING if their parent PE is marked: this allows the
recovery thread to assume that all PEs underneath the one it's
processing will continue to exist during recovery.

Both the mutex and spinlock are held while any protected field is
changed or a PE is deleted, so holding either of them (elsewhere) will
keep them stable and safe to access. Additionally, if
EEH_PE_RECOVERING is set on a PE then the locks can be released and
re-acquired safely, as long as the protected fields aren't used while
no locks are held. This is used during recovery to release locks
for long sleeps (i.e. during eeh_wait_state() when we may sleep up to
5 minutes), or to maintain lock ordering.

The spinlock is used in error detection (which cannot use a mutex, see
above) and also where it's possible that the mutex is already held.
The mutex is used in areas that don't have that restriction, and where
blocking may be required. Care must be taken when ordering these locks
against the PCI rescan/remove lock and the device locks to avoid
deadlocking.

Signed-off-by: Sam Bobroff 
---
Hello everyone,

Here's an attempt to bring some safety to the interactions between the various
moving parts involved in EEH recovery.

It's based on top of my recently posted set: "Release EEH device state
synchronously".

I've tried to explain it in the commit message and code comments, but I'd like
to add:

- I'm not aware of any outstanding problems with the set, but I've kept it RFC
  for now becuase I'm looking for comments on the general strategy and
  direction -- is this a good way to achieve some safety?
- I've only done manual testing so far. If it looks good, I'll try to
  do something more thorough.
- Yes it's one big patch. I'll try to break it up if necessary.
- Good places to review carefully would be eeh_pe_report_pdev() and
  eeh_reset_device().
- The mutex and spinlock need better names. Suggestions welcome.
- I'm not aiming to fix absolutely every case here, just most of them, and to
  provide a decent foundation for fixing the remaining cases as they are
  discovered.

And finally, a big thank you to lockdep :-)

Cheers,
Sam.

 arch/powerpc/include/asm/eeh.h   |   6 +-
 arch/powerpc/kernel/eeh.c| 114 +--
 arch/powerpc/kernel/eeh_driver.c | 300 ++-
 arch/powerpc/kernel/eeh_pe.c |  47 ++-
 arch/powerpc/kernel/of_platform.c|   7 +-
 arch/powerpc/kernel/pci-common.c |   4 +
 arch/powerpc/kernel/pci-hotplug.c|   2 +
 arch/powerpc/platforms/powernv/eeh-powernv.c |  13 +-
 arch/powerpc/platforms/pseries/eeh_pseries.c |   5 +-
 arch/powerpc/platforms/pseries/pci_dlpar.c   |   5 +-
 drivers/pci/hotplug/pnv_php.c|   5 +-
 drivers/pci/hotplug/rpadlpar_core.c  |   2 +
 drivers/vfio/vfio_spapr_eeh.c|  10 +-
 13 files changed, 404 insertions(+), 116 deletions(-)

diff --git a/arch/powerpc/include/asm/eeh.h b/arch/powerpc/include/asm/eeh.h
index 6f9b2a12540a..1d4c0b19a63c 100644
--- a/arch/powerpc/include/asm/eeh.h
+++ b/arch/powerpc/include/asm/eeh.h
@@ -273,11 +273,15 @@ static inline bool eeh_state_active(int state)
== (EEH_STATE_MMIO_ACTIVE | EEH_STATE_DMA_ACTIVE);
 }
 
+void eeh_recovery_lock(void);
+void eeh_recovery_unlock(void);
+void eeh_recovery_must_be_locked(void);
+
 typedef void (*eeh_edev_traverse_func)(struct eeh_dev *edev, void *flag);
 typedef void *(*

[PATCH v2 1/1] vfio-pci/nvlink2: Allow fallback to ibm,mmio-atsd[0]

2020-03-30 Thread Sam Bobroff
Older versions of skiboot only provide a single value in the device
tree property "ibm,mmio-atsd", even when multiple Address Translation
Shoot Down (ATSD) registers are present. This prevents NVLink2 devices
(other than the first) from being used with vfio-pci because vfio-pci
expects to be able to assign a dedicated ATSD register to each NVLink2
device.

However, ATSD registers can be shared among devices. This change
allows vfio-pci to fall back to sharing the register at index 0 if
necessary.

Fixes: 7f92891778df ("vfio_pci: Add NVIDIA GV100GL [Tesla V100 SXM2] subdriver")
Signed-off-by: Sam Bobroff 
---
Patch set v2:
Patch 1/1: vfio-pci/nvlink2: Allow fallback to ibm,mmio-atsd[0]
- Removed unnecessary warning.
- Added Fixes tag.

Patch set v1:
Patch 1/1: vfio-pci/nvlink2: Allow fallback to ibm,mmio-atsd[0]

 drivers/vfio/pci/vfio_pci_nvlink2.c | 10 --
 1 file changed, 8 insertions(+), 2 deletions(-)

diff --git a/drivers/vfio/pci/vfio_pci_nvlink2.c 
b/drivers/vfio/pci/vfio_pci_nvlink2.c
index f2983f0f84be..ae2af590e501 100644
--- a/drivers/vfio/pci/vfio_pci_nvlink2.c
+++ b/drivers/vfio/pci/vfio_pci_nvlink2.c
@@ -420,8 +420,14 @@ int vfio_pci_ibm_npu2_init(struct vfio_pci_device *vdev)
 
if (of_property_read_u64_index(hose->dn, "ibm,mmio-atsd", nvlink_index,
&mmio_atsd)) {
-   dev_warn(&vdev->pdev->dev, "No available ATSD found\n");
-   mmio_atsd = 0;
+   if (of_property_read_u64_index(hose->dn, "ibm,mmio-atsd", 0,
+   &mmio_atsd)) {
+   dev_warn(&vdev->pdev->dev, "No available ATSD found\n");
+   mmio_atsd = 0;
+   } else {
+   dev_warn(&vdev->pdev->dev,
+"Using fallback ibm,mmio-atsd[0] for ATSD.\n");
+   }
}
 
if (of_property_read_u64(npu_node, "ibm,device-tgt-addr", &tgt)) {
-- 
2.22.0.216.g00a2a96fc9



Re: [PATCH 2/4] powerpc/eeh: Release EEH device state synchronously

2020-04-07 Thread Sam Bobroff
On Fri, Apr 03, 2020 at 03:51:18PM +1100, Oliver O'Halloran wrote:
> On Mon, 2020-03-30 at 15:56 +1100, Sam Bobroff wrote:
> > EEH device state is currently removed (by eeh_remove_device()) during
> > the device release handler, which is invoked as the device's reference
> > count drops to zero. This may take some time, or forever, as other
> > threads may hold references.
> > 
> > However, the PCI device state is released synchronously by
> > pci_stop_and_remove_bus_device(). This mismatch causes problems, for
> > example the device may be re-discovered as a new device before the
> > release handler has been called, leaving the PCI and EEH state
> > mismatched.
> > 
> > So instead, call eeh_remove_device() from the bus device removal
> > handlers, which are called synchronously in the removal path.
> > 
> > Signed-off-by: Sam Bobroff 
> > ---
> >  arch/powerpc/kernel/eeh.c | 26 ++
> >  arch/powerpc/kernel/pci-hotplug.c |  2 --
> >  2 files changed, 26 insertions(+), 2 deletions(-)
> > 
> > diff --git a/arch/powerpc/kernel/eeh.c b/arch/powerpc/kernel/eeh.c
> > index 17cb3e9b5697..c36c5a7db5ca 100644
> > --- a/arch/powerpc/kernel/eeh.c
> > +++ b/arch/powerpc/kernel/eeh.c
> > @@ -1106,6 +1106,32 @@ static int eeh_init(void)
> >  
> >  core_initcall_sync(eeh_init);
> >  
> > +static int eeh_device_notifier(struct notifier_block *nb,
> > +  unsigned long action, void *data)
> > +{
> > +   struct device *dev = data;
> > +
> > +   switch (action) {
> > +   case BUS_NOTIFY_DEL_DEVICE:
> > +   eeh_remove_device(to_pci_dev(dev));
> > +   break;
> > +   default:
> > +   break;
> > +   }
> 
> A comment briefly explaining why we're not doing anything in the add
> case might be nice.

Good point, I'll add something for v2.
> 
> Reviewed-by: Oliver O'Halloran 
> 
> > +   return NOTIFY_DONE;
> > +}
> > +
> > +static struct notifier_block eeh_device_nb = {
> > +   .notifier_call = eeh_device_notifier,
> > +};
> > +
> > +static __init int eeh_set_bus_notifier(void)
> > +{
> > +   bus_register_notifier(&pci_bus_type, &eeh_device_nb);
> > +   return 0;
> > +}
> > +arch_initcall(eeh_set_bus_notifier);
> > +
> >  /**
> >   * eeh_add_device_early - Enable EEH for the indicated device node
> >   * @pdn: PCI device node for which to set up EEH
> > diff --git a/arch/powerpc/kernel/pci-hotplug.c 
> > b/arch/powerpc/kernel/pci-hotplug.c
> > index d6a67f814983..28e9aa274f64 100644
> > --- a/arch/powerpc/kernel/pci-hotplug.c
> > +++ b/arch/powerpc/kernel/pci-hotplug.c
> > @@ -57,8 +57,6 @@ void pcibios_release_device(struct pci_dev *dev)
> > struct pci_controller *phb = pci_bus_to_host(dev->bus);
> > struct pci_dn *pdn = pci_get_pdn(dev);
> >  
> > -   eeh_remove_device(dev);
> > -
> > if (phb->controller_ops.release_device)
> > phb->controller_ops.release_device(dev);
> >  
> 


signature.asc
Description: PGP signature


Re: [PATCH 3/4] powerpc/eeh: Remove workaround from eeh_add_device_late()

2020-04-07 Thread Sam Bobroff
On Fri, Apr 03, 2020 at 05:08:32PM +1100, Oliver O'Halloran wrote:
> On Mon, 2020-03-30 at 15:56 +1100, Sam Bobroff wrote:
> > When EEH device state was released asynchronously by the device
> > release handler, it was possible for an outstanding reference to
> > prevent it's release and it was necessary to work around that if a
> > device was re-discovered at the same PCI location.
> 
> I think this is a bit misleading. The main situation where you'll hit
> this hack is when recovering a device with a driver that doesn't
> implement the error handling callbacks. In that case the device is
> removed, reset, then re-probed by the PCI core, but we assume it's the
> same physical device so the eeh_device state remains active.
> 
> If you actually changed the underlying device I suspect something bad
> would happen.

I'm not sure I understand. Isn't the case you're talking about caught by
the earlier check (just above the patch)?

if (edev->pdev == dev) {
eeh_edev_dbg(edev, "Device already referenced!\n");
return;
}
> 
> > Now that the state is released synchronously that is no longer
> > possible and the workaround is no longer necessary.
> 
> You could probably fold this into the previous patch, but eh. You could
> probably fold this into the previous patch, but eh.

True.

> > Signed-off-by: Sam Bobroff 
> > ---
> >  arch/powerpc/kernel/eeh.c | 23 +--
> >  1 file changed, 1 insertion(+), 22 deletions(-)
> > 
> > diff --git a/arch/powerpc/kernel/eeh.c b/arch/powerpc/kernel/eeh.c
> > index c36c5a7db5ca..12c248a16527 100644
> > --- a/arch/powerpc/kernel/eeh.c
> > +++ b/arch/powerpc/kernel/eeh.c
> > @@ -1206,28 +1206,7 @@ void eeh_add_device_late(struct pci_dev *dev)
> > eeh_edev_dbg(edev, "Device already referenced!\n");
> > return;
> > }
> > -
> > -   /*
> > -* The EEH cache might not be removed correctly because of
> > -* unbalanced kref to the device during unplug time, which
> > -* relies on pcibios_release_device(). So we have to remove
> > -* that here explicitly.
> > -*/
> > -   if (edev->pdev) {
> > -   eeh_rmv_from_parent_pe(edev);
> > -   eeh_addr_cache_rmv_dev(edev->pdev);
> > -   eeh_sysfs_remove_device(edev->pdev);
> > -
> > -   /*
> > -* We definitely should have the PCI device removed
> > -* though it wasn't correctly. So we needn't call
> > -* into error handler afterwards.
> > -*/
> > -   edev->mode |= EEH_DEV_NO_HANDLER;
> > -
> > -   edev->pdev = NULL;
> > -   dev->dev.archdata.edev = NULL;
> > -   }
> > +   WARN_ON_ONCE(edev->pdev);
> >  
> > if (eeh_has_flag(EEH_PROBE_MODE_DEV))
> > eeh_ops->probe(pdn, NULL);
> 


signature.asc
Description: PGP signature


Re: [PATCH 4/4] powerpc/eeh: Clean up edev cleanup for VFs

2020-04-07 Thread Sam Bobroff
On Fri, Apr 03, 2020 at 04:45:47PM +1100, Oliver O'Halloran wrote:
> On Mon, 2020-03-30 at 15:56 +1100, Sam Bobroff wrote:
> > Because the bus notifier calls eeh_rmv_from_parent_pe() (via
> > eeh_remove_device()) when a VF is removed, the call in
> > remove_sriov_vf_pdns() is redundant.
> 
> eeh_rmv_from_parent_pe() won't actually remove the device if the
> recovering flag is set on the PE. Are you sure we're not introducing a
> race here?
> 

Ah, I assume you're referring to the difference between calling
eeh_remove_device() and directly calling eeh_rmv_from_parent_pe(), where
the behaviour for PE's with EEH_PE_KEEP set is subtly different.

I'll take a closer look at it and make sure to explain it better in v2.


signature.asc
Description: PGP signature


Re: [PATCH] powerpc/powernv: Add a print indicating when an IODA PE is released

2020-04-09 Thread Sam Bobroff
On Wed, Apr 08, 2020 at 09:22:13PM +1000, Oliver O'Halloran wrote:
> Quite useful to know in some cases.
> 
> Signed-off-by: Oliver O'Halloran 

Agreed.
Reviewed-by: Sam Bobroff 

> ---
>  arch/powerpc/platforms/powernv/pci-ioda.c | 2 ++
>  1 file changed, 2 insertions(+)
> 
> diff --git a/arch/powerpc/platforms/powernv/pci-ioda.c 
> b/arch/powerpc/platforms/powernv/pci-ioda.c
> index 3d81c01..82e5098 100644
> --- a/arch/powerpc/platforms/powernv/pci-ioda.c
> +++ b/arch/powerpc/platforms/powernv/pci-ioda.c
> @@ -3475,6 +3475,8 @@ static void pnv_ioda_release_pe(struct pnv_ioda_pe *pe)
>   struct pnv_phb *phb = pe->phb;
>   struct pnv_ioda_pe *slave, *tmp;
>  
> + pe_info(pe, "Releasing PE\n");
> +
>   mutex_lock(&phb->ioda.pe_list_mutex);
>   list_del(&pe->list);
>   mutex_unlock(&phb->ioda.pe_list_mutex);
> -- 
> 2.9.5
> 


signature.asc
Description: PGP signature


Re: [PATCH 3/4] powerpc/eeh: Remove workaround from eeh_add_device_late()

2020-04-14 Thread Sam Bobroff
On Wed, Apr 08, 2020 at 04:53:36PM +1000, Oliver O'Halloran wrote:
> On Wed, Apr 8, 2020 at 4:22 PM Sam Bobroff  wrote:
> >
> > On Fri, Apr 03, 2020 at 05:08:32PM +1100, Oliver O'Halloran wrote:
> > > On Mon, 2020-03-30 at 15:56 +1100, Sam Bobroff wrote:
> > > > When EEH device state was released asynchronously by the device
> > > > release handler, it was possible for an outstanding reference to
> > > > prevent it's release and it was necessary to work around that if a
> > > > device was re-discovered at the same PCI location.
> > >
> > > I think this is a bit misleading. The main situation where you'll hit
> > > this hack is when recovering a device with a driver that doesn't
> > > implement the error handling callbacks. In that case the device is
> > > removed, reset, then re-probed by the PCI core, but we assume it's the
> > > same physical device so the eeh_device state remains active.
> > >
> > > If you actually changed the underlying device I suspect something bad
> > > would happen.
> >
> > I'm not sure I understand. Isn't the case you're talking about caught by
> > the earlier check (just above the patch)?
> >
> > if (edev->pdev == dev) {
> > eeh_edev_dbg(edev, "Device already referenced!\n");
> > return;
> > }
> 
> No, in the case I'm talking about the pci_dev is torn down and
> freed(). After the PE is reset we re-probe the device and create a new
> pci_dev.  If the release of the old pci_dev is delayed we need the
> hack this patch is removing.

Oh, yes, that is the case I was intending to change here.  But I must be
missing something, isn't it also the case that's changed by patch 2/4?

What I intended was, after patch 2, eeh_remove_device() is called from
the bus notifier so it happens imediately when recovery calls
pci_stop_and_remove_bus_device().  Once it returns, edev->pdev has
already been set to NULL by eeh_remove_device() so this case can't be
hit anymore, and we should clean it up (this patch).

(There is a slight difference in the way EEH_PE_KEEP is handled between
the code removed here and the body of eeh_remove_device(), but checking
and explaining that is already on my list for v2.)

(I did test recovery on an unaware device and didn't hit the
WARN_ON_ONCE().)

> The check above should probably be a WARN_ON() since we should never
> be re-running the EEH probe on the same device. I think there is a
> case where that can happen, but I don't remember the details.

Yeah, I also certainly see the "Device already referenced!" message
while debugging, and it would be good to track down.

> Oliver


signature.asc
Description: PGP signature


[PATCH v2 0/2] powerpc/eeh: Release EEH device state synchronously

2020-04-19 Thread Sam Bobroff
Hi everyone,

Here are some fixes and cleanups that have come from other work but that I
think stand on their own.

Only one patch ("Release EEH device state synchronously", suggested by Oliver
O'Halloran) is a significant change: it moves the cleanup of some EEH device
data out of the (possibly asynchronous) device release handler and into the
(synchronously called) bus notifier. This is useful for future work as it makes
it easier to reason about the lifetimes of EEH structures.

Note that I've left a few WARN_ON_ONCEs in the code because I'm paranoid, but I
have not been able to hit them during testing.

Cheers,
Sam.

Notes for v2:

I've dropped both cleanup patches (3/4, 4/4) because that type of cleanup
(replacing a call to eeh_rmv_from_parent_pe() with one to eeh_remove_device())
is incorrect: if called during recovery, it will cause edev->pe to remain set
when it would have been cleared previously. This would lead to stale
information in the edev. I think there should be a way to simplify the code
around EEH_PE_KEEP but I'll look at that separately.

Patch set changelog follows:

Patch set v2: 
Patch 1/2: powerpc/eeh: fix pseries_eeh_configure_bridge()
Patch 2/2: powerpc/eeh: Release EEH device state synchronously
- Added comment explaining why the add case can't be handled similarly to the 
remove case.
Dropped (was 3/4) powerpc/eeh: Remove workaround from eeh_add_device_late()
Dropped (was 4/4) powerpc/eeh: Clean up edev cleanup for VFs

Patch set v1:
Patch 1/4: powerpc/eeh: fix pseries_eeh_configure_bridge()
Patch 2/4: powerpc/eeh: Release EEH device state synchronously
Patch 3/4: powerpc/eeh: Remove workaround from eeh_add_device_late()
Patch 4/4: powerpc/eeh: Clean up edev cleanup for VFs

Sam Bobroff (2):
  powerpc/eeh: fix pseries_eeh_configure_bridge()
  powerpc/eeh: Release EEH device state synchronously

 arch/powerpc/kernel/eeh.c| 31 
 arch/powerpc/kernel/pci-hotplug.c|  2 --
 arch/powerpc/platforms/pseries/eeh_pseries.c |  2 +-
 3 files changed, 32 insertions(+), 3 deletions(-)

-- 
2.22.0.216.g00a2a96fc9



[PATCH v2 1/2] powerpc/eeh: fix pseries_eeh_configure_bridge()

2020-04-19 Thread Sam Bobroff
If a device is hot unplgged during EEH recovery, it's possible for the
RTAS call to ibm,configure-pe in pseries_eeh_configure() to return
parameter error (-3), however negative return values are not checked
for and this leads to an infinite loop.

Fix this by correctly bailing out on negative values.

Signed-off-by: Sam Bobroff 
---
 arch/powerpc/platforms/pseries/eeh_pseries.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/arch/powerpc/platforms/pseries/eeh_pseries.c 
b/arch/powerpc/platforms/pseries/eeh_pseries.c
index 893ba3f562c4..c4ef03bec0de 100644
--- a/arch/powerpc/platforms/pseries/eeh_pseries.c
+++ b/arch/powerpc/platforms/pseries/eeh_pseries.c
@@ -605,7 +605,7 @@ static int pseries_eeh_configure_bridge(struct eeh_pe *pe)
config_addr, BUID_HI(pe->phb->buid),
BUID_LO(pe->phb->buid));
 
-   if (!ret)
+   if (ret <= 0)
return ret;
 
/*
-- 
2.22.0.216.g00a2a96fc9



[PATCH v2 2/2] powerpc/eeh: Release EEH device state synchronously

2020-04-19 Thread Sam Bobroff
EEH device state is currently removed (by eeh_remove_device()) during
the device release handler, which is invoked as the device's reference
count drops to zero. This may take some time, or forever, as other
threads may hold references.

However, the PCI device state is released synchronously by
pci_stop_and_remove_bus_device(). This mismatch causes problems, for
example the device may be re-discovered as a new device before the
release handler has been called, leaving the PCI and EEH state
mismatched.

So instead, call eeh_remove_device() from the bus device removal
handlers, which are called synchronously in the removal path.

Signed-off-by: Sam Bobroff 
---
v2 - Added comment explaining why the add case can't be handled similarly to 
the remove case.

 arch/powerpc/kernel/eeh.c | 31 +++
 arch/powerpc/kernel/pci-hotplug.c |  2 --
 2 files changed, 31 insertions(+), 2 deletions(-)

diff --git a/arch/powerpc/kernel/eeh.c b/arch/powerpc/kernel/eeh.c
index 17cb3e9b5697..64361311bc8e 100644
--- a/arch/powerpc/kernel/eeh.c
+++ b/arch/powerpc/kernel/eeh.c
@@ -1106,6 +1106,37 @@ static int eeh_init(void)
 
 core_initcall_sync(eeh_init);
 
+static int eeh_device_notifier(struct notifier_block *nb,
+  unsigned long action, void *data)
+{
+   struct device *dev = data;
+
+   switch (action) {
+   /*
+* Note: It's not possible to perform EEH device addition (i.e.
+* {pseries,pnv}_pcibios_bus_add_device()) here because it depends on
+* the device's resources, which have not yet been set up.
+*/
+   case BUS_NOTIFY_DEL_DEVICE:
+   eeh_remove_device(to_pci_dev(dev));
+   break;
+   default:
+   break;
+   }
+   return NOTIFY_DONE;
+}
+
+static struct notifier_block eeh_device_nb = {
+   .notifier_call = eeh_device_notifier,
+};
+
+static __init int eeh_set_bus_notifier(void)
+{
+   bus_register_notifier(&pci_bus_type, &eeh_device_nb);
+   return 0;
+}
+arch_initcall(eeh_set_bus_notifier);
+
 /**
  * eeh_add_device_early - Enable EEH for the indicated device node
  * @pdn: PCI device node for which to set up EEH
diff --git a/arch/powerpc/kernel/pci-hotplug.c 
b/arch/powerpc/kernel/pci-hotplug.c
index d6a67f814983..28e9aa274f64 100644
--- a/arch/powerpc/kernel/pci-hotplug.c
+++ b/arch/powerpc/kernel/pci-hotplug.c
@@ -57,8 +57,6 @@ void pcibios_release_device(struct pci_dev *dev)
struct pci_controller *phb = pci_bus_to_host(dev->bus);
struct pci_dn *pdn = pci_get_pdn(dev);
 
-   eeh_remove_device(dev);
-
if (phb->controller_ops.release_device)
phb->controller_ops.release_device(dev);
 
-- 
2.22.0.216.g00a2a96fc9



Re: [PATCH v2 1/2] powerpc/eeh: fix pseries_eeh_configure_bridge()

2020-04-21 Thread Sam Bobroff
On Tue, Apr 21, 2020 at 06:33:36PM -0500, Nathan Lynch wrote:
> Sam Bobroff  writes:
> > If a device is hot unplgged during EEH recovery, it's possible for the
> > RTAS call to ibm,configure-pe in pseries_eeh_configure() to return
> > parameter error (-3), however negative return values are not checked
> > for and this leads to an infinite loop.
> >
> > Fix this by correctly bailing out on negative values.
> >
> > Signed-off-by: Sam Bobroff 
> > ---
> >  arch/powerpc/platforms/pseries/eeh_pseries.c | 2 +-
> >  1 file changed, 1 insertion(+), 1 deletion(-)
> >
> > diff --git a/arch/powerpc/platforms/pseries/eeh_pseries.c 
> > b/arch/powerpc/platforms/pseries/eeh_pseries.c
> > index 893ba3f562c4..c4ef03bec0de 100644
> > --- a/arch/powerpc/platforms/pseries/eeh_pseries.c
> > +++ b/arch/powerpc/platforms/pseries/eeh_pseries.c
> > @@ -605,7 +605,7 @@ static int pseries_eeh_configure_bridge(struct eeh_pe 
> > *pe)
> > config_addr, BUID_HI(pe->phb->buid),
> > BUID_LO(pe->phb->buid));
> >  
> > -   if (!ret)
> > +   if (ret <= 0)
> > return ret;
> 
> Note that this returns the firmware error value (e.g. -3 parameter
> error) without converting it to a Linux errno. Nothing checks the error
> value of this function as best I can tell, but -EINVAL would be better
> than an implicit -ESRCH here.

Right, it's never used but I agree. I'll change it for v3.

> And while this will behave correctly, the pr_warn() at the end of
> pseries_eeh_configure_bridge() hints that someone had the intention
> that this code should log a message on such an error:
> 
> static int pseries_eeh_configure_bridge(struct eeh_pe *pe)
> {
>   int config_addr;
>   int ret;
>   /* Waiting 0.2s maximum before skipping configuration */
>   int max_wait = 200;
> 
>   /* Figure out the PE address */
>   config_addr = pe->config_addr;
>   if (pe->addr)
>   config_addr = pe->addr;
> 
>   while (max_wait > 0) {
>   ret = rtas_call(ibm_configure_pe, 3, 1, NULL,
>   config_addr, BUID_HI(pe->phb->buid),
>   BUID_LO(pe->phb->buid));
> 
>   if (!ret)
>   return ret;
> 
>   /*
>* If RTAS returns a delay value that's above 100ms, cut it
>* down to 100ms in case firmware made a mistake.  For more
>* on how these delay values work see rtas_busy_delay_time
>*/
>   if (ret > RTAS_EXTENDED_DELAY_MIN+2 &&
>   ret <= RTAS_EXTENDED_DELAY_MAX)
>   ret = RTAS_EXTENDED_DELAY_MIN+2;
> 
>   max_wait -= rtas_busy_delay_time(ret);
> 
>   if (max_wait < 0)
>   break;
> 
>   rtas_busy_delay(ret);
>   }
> 
>   pr_warn("%s: Unable to configure bridge PHB#%x-PE#%x (%d)\n",
>   __func__, pe->phb->global_number, pe->addr, ret);
>   return ret;
> }
> 
> So perhaps the error path should be made to break out of the loop
> instead of returning. Or is the parameter error result simply
> uninteresting in this scenario?

Sounds reasonable to me, and given that the only way I know to trigger
the error path (see the commit message) is not going to be common, I
think a message is a good idea. (And, as one of the people likely to
debug a future issue here, I'll probably appreciate it.)

Cheers,
Sam.


signature.asc
Description: PGP signature


[PATCH v3 1/3] powerpc/rtas: Export rtas_error_rc

2020-04-23 Thread Sam Bobroff
Export rtas_error_rc() so that it can be used by other users of
rtas_call() (which is already exported).

Signed-off-by: Sam Bobroff 
---
v3 * New in this version.

 arch/powerpc/include/asm/rtas.h | 1 +
 arch/powerpc/kernel/rtas.c  | 3 ++-
 2 files changed, 3 insertions(+), 1 deletion(-)

diff --git a/arch/powerpc/include/asm/rtas.h b/arch/powerpc/include/asm/rtas.h
index 3c1887351c71..7c9e4d3635cf 100644
--- a/arch/powerpc/include/asm/rtas.h
+++ b/arch/powerpc/include/asm/rtas.h
@@ -379,6 +379,7 @@ extern int rtas_set_rtc_time(struct rtc_time *rtc_time);
 
 extern unsigned int rtas_busy_delay_time(int status);
 extern unsigned int rtas_busy_delay(int status);
+extern int rtas_error_rc(int rtas_rc);
 
 extern int early_init_dt_scan_rtas(unsigned long node,
const char *uname, int depth, void *data);
diff --git a/arch/powerpc/kernel/rtas.c b/arch/powerpc/kernel/rtas.c
index c5fa251b8950..238bf112d29a 100644
--- a/arch/powerpc/kernel/rtas.c
+++ b/arch/powerpc/kernel/rtas.c
@@ -518,7 +518,7 @@ unsigned int rtas_busy_delay(int status)
 }
 EXPORT_SYMBOL(rtas_busy_delay);
 
-static int rtas_error_rc(int rtas_rc)
+int rtas_error_rc(int rtas_rc)
 {
int rc;
 
@@ -546,6 +546,7 @@ static int rtas_error_rc(int rtas_rc)
}
return rc;
 }
+EXPORT_SYMBOL(rtas_error_rc);
 
 int rtas_get_power_level(int powerdomain, int *level)
 {
-- 
2.22.0.216.g00a2a96fc9



[PATCH v3 0/3] powerpc/eeh: Release EEH device state synchronously

2020-04-23 Thread Sam Bobroff
Hi everyone,

Here are some fixes and cleanups that have come from other work but that I
think stand on their own.

Only one patch ("Release EEH device state synchronously", suggested by Oliver
O'Halloran) is a significant change: it moves the cleanup of some EEH device
data out of the (possibly asynchronous) device release handler and into the
(synchronously called) bus notifier. This is useful for future work as it makes
it easier to reason about the lifetimes of EEH structures.

Note that I've left a few WARN_ON_ONCEs in the code because I'm paranoid, but I
have not been able to hit them during testing.

Cheers,
Sam.

Notes for v3:
I've tweaked the fix for pseries_eeh_configure_bridge() to return the correct
error code (even though it's not used) by calling an already present RTAS
function, rtas_error_rc(). However, I had to make another change to export that
function and while it does seem like the right thing to do, but I'm concerned
it's a bit out of scope for such a small fix.

Notes for v2:

I've dropped both cleanup patches (3/4, 4/4) because that type of cleanup
(replacing a call to eeh_rmv_from_parent_pe() with one to eeh_remove_device())
is incorrect: if called during recovery, it will cause edev->pe to remain set
when it would have been cleared previously. This would lead to stale
information in the edev. I think there should be a way to simplify the code
around EEH_PE_KEEP but I'll look at that separately.

Patch set changelog follows:

Patch set v3: 
Patch 1/3 (new in this version): powerpc/rtas: Export rtas_error_rc
Patch 2/3 (was 1/2): powerpc/eeh: fix pseries_eeh_configure_bridge()
Patch 3/3 (was 2/2): powerpc/eeh: Release EEH device state synchronously

Patch set v2: 
Patch 1/2: powerpc/eeh: fix pseries_eeh_configure_bridge()
Patch 2/2: powerpc/eeh: Release EEH device state synchronously
- Added comment explaining why the add case can't be handled similarly to the 
remove case.
Dropped (was 3/4) powerpc/eeh: Remove workaround from eeh_add_device_late()
Dropped (was 4/4) powerpc/eeh: Clean up edev cleanup for VFs

Patch set v1:
Patch 1/4: powerpc/eeh: fix pseries_eeh_configure_bridge()
Patch 2/4: powerpc/eeh: Release EEH device state synchronously
Patch 3/4: powerpc/eeh: Remove workaround from eeh_add_device_late()
Patch 4/4: powerpc/eeh: Clean up edev cleanup for VFs

Sam Bobroff (3):
  powerpc/rtas: Export rtas_error_rc
  powerpc/eeh: fix pseries_eeh_configure_bridge()
  powerpc/eeh: Release EEH device state synchronously

 arch/powerpc/include/asm/rtas.h  |  1 +
 arch/powerpc/kernel/eeh.c| 31 
 arch/powerpc/kernel/pci-hotplug.c|  2 --
 arch/powerpc/kernel/rtas.c   |  3 +-
 arch/powerpc/platforms/pseries/eeh_pseries.c |  4 ++-
 5 files changed, 37 insertions(+), 4 deletions(-)

-- 
2.22.0.216.g00a2a96fc9



[PATCH v3 2/3] powerpc/eeh: fix pseries_eeh_configure_bridge()

2020-04-23 Thread Sam Bobroff
If a device is hot unplgged during EEH recovery, it's possible for the
RTAS call to ibm,configure-pe in pseries_eeh_configure() to return
parameter error (-3), however negative return values are not checked
for and this leads to an infinite loop.

Fix this by correctly bailing out on negative values.

Signed-off-by: Sam Bobroff 
---
 arch/powerpc/platforms/pseries/eeh_pseries.c | 4 +++-
 1 file changed, 3 insertions(+), 1 deletion(-)

diff --git a/arch/powerpc/platforms/pseries/eeh_pseries.c 
b/arch/powerpc/platforms/pseries/eeh_pseries.c
index 893ba3f562c4..9ea1c06a78cd 100644
--- a/arch/powerpc/platforms/pseries/eeh_pseries.c
+++ b/arch/powerpc/platforms/pseries/eeh_pseries.c
@@ -607,6 +607,8 @@ static int pseries_eeh_configure_bridge(struct eeh_pe *pe)
 
if (!ret)
return ret;
+   if (ret < 0)
+   break;
 
/*
 * If RTAS returns a delay value that's above 100ms, cut it
@@ -627,7 +629,7 @@ static int pseries_eeh_configure_bridge(struct eeh_pe *pe)
 
pr_warn("%s: Unable to configure bridge PHB#%x-PE#%x (%d)\n",
__func__, pe->phb->global_number, pe->addr, ret);
-   return ret;
+   return rtas_error_rc(ret);
 }
 
 /**
-- 
2.22.0.216.g00a2a96fc9



[PATCH v3 3/3] powerpc/eeh: Release EEH device state synchronously

2020-04-23 Thread Sam Bobroff
EEH device state is currently removed (by eeh_remove_device()) during
the device release handler, which is invoked as the device's reference
count drops to zero. This may take some time, or forever, as other
threads may hold references.

However, the PCI device state is released synchronously by
pci_stop_and_remove_bus_device(). This mismatch causes problems, for
example the device may be re-discovered as a new device before the
release handler has been called, leaving the PCI and EEH state
mismatched.

So instead, call eeh_remove_device() from the bus device removal
handlers, which are called synchronously in the removal path.

Signed-off-by: Sam Bobroff 
---
 arch/powerpc/kernel/eeh.c | 31 +++
 arch/powerpc/kernel/pci-hotplug.c |  2 --
 2 files changed, 31 insertions(+), 2 deletions(-)

diff --git a/arch/powerpc/kernel/eeh.c b/arch/powerpc/kernel/eeh.c
index 17cb3e9b5697..64361311bc8e 100644
--- a/arch/powerpc/kernel/eeh.c
+++ b/arch/powerpc/kernel/eeh.c
@@ -1106,6 +1106,37 @@ static int eeh_init(void)
 
 core_initcall_sync(eeh_init);
 
+static int eeh_device_notifier(struct notifier_block *nb,
+  unsigned long action, void *data)
+{
+   struct device *dev = data;
+
+   switch (action) {
+   /*
+* Note: It's not possible to perform EEH device addition (i.e.
+* {pseries,pnv}_pcibios_bus_add_device()) here because it depends on
+* the device's resources, which have not yet been set up.
+*/
+   case BUS_NOTIFY_DEL_DEVICE:
+   eeh_remove_device(to_pci_dev(dev));
+   break;
+   default:
+   break;
+   }
+   return NOTIFY_DONE;
+}
+
+static struct notifier_block eeh_device_nb = {
+   .notifier_call = eeh_device_notifier,
+};
+
+static __init int eeh_set_bus_notifier(void)
+{
+   bus_register_notifier(&pci_bus_type, &eeh_device_nb);
+   return 0;
+}
+arch_initcall(eeh_set_bus_notifier);
+
 /**
  * eeh_add_device_early - Enable EEH for the indicated device node
  * @pdn: PCI device node for which to set up EEH
diff --git a/arch/powerpc/kernel/pci-hotplug.c 
b/arch/powerpc/kernel/pci-hotplug.c
index d6a67f814983..28e9aa274f64 100644
--- a/arch/powerpc/kernel/pci-hotplug.c
+++ b/arch/powerpc/kernel/pci-hotplug.c
@@ -57,8 +57,6 @@ void pcibios_release_device(struct pci_dev *dev)
struct pci_controller *phb = pci_bus_to_host(dev->bus);
struct pci_dn *pdn = pci_get_pdn(dev);
 
-   eeh_remove_device(dev);
-
if (phb->controller_ops.release_device)
phb->controller_ops.release_device(dev);
 
-- 
2.22.0.216.g00a2a96fc9



Re: [PATCH v3 1/3] powerpc/rtas: Export rtas_error_rc

2020-04-27 Thread Sam Bobroff
On Fri, Apr 24, 2020 at 11:07:43AM -0500, Nathan Lynch wrote:
> Sam Bobroff  writes:
> > Export rtas_error_rc() so that it can be used by other users of
> > rtas_call() (which is already exported).
> 
> This will do the right thing for your ibm,configure-pe use case in patch
> 2, but the -900x => errno translations in rtas_error_rc() appear
> tailored for the indicator- and sensor-related calls that currently use
> it. From my reading of PAPR+, the meaning of a -900x RTAS status word
> depends on the call. For example, -9002 commonly means "not authorized",
> which we would typically translate to -EPERM, but rtas_error_rc() would
> translate it to -ENODEV.
> 
> Also the semantics of -9001 as a return value seem to vary a bit.
> 
> So I don't think rtas_error_rc() should be advertised as a generically
> useful facility in its current form.
> 
> (I have had some thoughts about how firmware/hypervisor call status can
> be translated to meaningful Linux error values without tedious switch
> statements, which I'm happy to expand on if anyone is interested, but I
> don't want to hijack your submission for that discussion.)

Ah, interesting.

I'll do another version as you suggest.

Cheers,
Sam.


signature.asc
Description: PGP signature


[PATCH v4 0/2] powerpc/eeh: Release EEH device state synchronously

2020-04-27 Thread Sam Bobroff
Hi everyone,

Here are some fixes and cleanups that have come from other work but that I
think stand on their own.

Only one patch ("Release EEH device state synchronously", suggested by Oliver
O'Halloran) is a significant change: it moves the cleanup of some EEH device
data out of the (possibly asynchronous) device release handler and into the
(synchronously called) bus notifier. This is useful for future work as it makes
it easier to reason about the lifetimes of EEH structures.

Note that I've left a few WARN_ON_ONCEs in the code because I'm paranoid, but I
have not been able to hit them during testing.

Cheers,
Sam.

Notes for v4:
Stopped using rtas_error_rc() as it is too specific, intead just translate the
one code that is valid for this RTAS call. Therefore, the new patch to export
rtas_error_rc() is dropped.

Notes for v3:
I've tweaked the fix for pseries_eeh_configure_bridge() to return the correct
error code (even though it's not used) by calling an already present RTAS
function, rtas_error_rc(). However, I had to make another change to export that
function and while it does seem like the right thing to do, but I'm concerned
it's a bit out of scope for such a small fix.

Notes for v2:

I've dropped both cleanup patches (3/4, 4/4) because that type of cleanup
(replacing a call to eeh_rmv_from_parent_pe() with one to eeh_remove_device())
is incorrect: if called during recovery, it will cause edev->pe to remain set
when it would have been cleared previously. This would lead to stale
information in the edev. I think there should be a way to simplify the code
around EEH_PE_KEEP but I'll look at that separately.

Patch set changelog follows:

Patch set v4: 
Patch 1/2 (was 2/3): powerpc/eeh: fix pseries_eeh_configure_bridge()
- Just handle the error translation locally, as it's specific to the RTAS call,
  but log the unaltered code in case it's useful for debugging.
Patch 2/2 (was 3/3): powerpc/eeh: Release EEH device state synchronously
Dropped (was 1/3) powerpc/rtas: Export rtas_error_rc

Patch set v3: 
Patch 1/3 (new in this version): powerpc/rtas: Export rtas_error_rc
Patch 2/3 (was 1/2): powerpc/eeh: fix pseries_eeh_configure_bridge()
Patch 3/3 (was 2/2): powerpc/eeh: Release EEH device state synchronously

Patch set v2: 
Patch 1/2: powerpc/eeh: fix pseries_eeh_configure_bridge()
Patch 2/2: powerpc/eeh: Release EEH device state synchronously
- Added comment explaining why the add case can't be handled similarly to the 
remove case.
Dropped (was 4/4) powerpc/eeh: Clean up edev cleanup for VFs
Dropped (was 3/4) powerpc/eeh: Remove workaround from eeh_add_device_late()

Patch set v1:
Patch 1/4: powerpc/eeh: fix pseries_eeh_configure_bridge()
Patch 2/4: powerpc/eeh: Release EEH device state synchronously
Patch 3/4: powerpc/eeh: Remove workaround from eeh_add_device_late()
Patch 4/4: powerpc/eeh: Clean up edev cleanup for VFs

Sam Bobroff (2):
  powerpc/eeh: fix pseries_eeh_configure_bridge()
  powerpc/eeh: Release EEH device state synchronously

 arch/powerpc/kernel/eeh.c| 31 
 arch/powerpc/kernel/pci-hotplug.c|  2 --
 arch/powerpc/platforms/pseries/eeh_pseries.c |  8 -
 3 files changed, 38 insertions(+), 3 deletions(-)

-- 
2.22.0.216.g00a2a96fc9



[PATCH v4 2/2] powerpc/eeh: Release EEH device state synchronously

2020-04-27 Thread Sam Bobroff
EEH device state is currently removed (by eeh_remove_device()) during
the device release handler, which is invoked as the device's reference
count drops to zero. This may take some time, or forever, as other
threads may hold references.

However, the PCI device state is released synchronously by
pci_stop_and_remove_bus_device(). This mismatch causes problems, for
example the device may be re-discovered as a new device before the
release handler has been called, leaving the PCI and EEH state
mismatched.

So instead, call eeh_remove_device() from the bus device removal
handlers, which are called synchronously in the removal path.

Signed-off-by: Sam Bobroff 
---
 arch/powerpc/kernel/eeh.c | 31 +++
 arch/powerpc/kernel/pci-hotplug.c |  2 --
 2 files changed, 31 insertions(+), 2 deletions(-)

diff --git a/arch/powerpc/kernel/eeh.c b/arch/powerpc/kernel/eeh.c
index 17cb3e9b5697..64361311bc8e 100644
--- a/arch/powerpc/kernel/eeh.c
+++ b/arch/powerpc/kernel/eeh.c
@@ -1106,6 +1106,37 @@ static int eeh_init(void)
 
 core_initcall_sync(eeh_init);
 
+static int eeh_device_notifier(struct notifier_block *nb,
+  unsigned long action, void *data)
+{
+   struct device *dev = data;
+
+   switch (action) {
+   /*
+* Note: It's not possible to perform EEH device addition (i.e.
+* {pseries,pnv}_pcibios_bus_add_device()) here because it depends on
+* the device's resources, which have not yet been set up.
+*/
+   case BUS_NOTIFY_DEL_DEVICE:
+   eeh_remove_device(to_pci_dev(dev));
+   break;
+   default:
+   break;
+   }
+   return NOTIFY_DONE;
+}
+
+static struct notifier_block eeh_device_nb = {
+   .notifier_call = eeh_device_notifier,
+};
+
+static __init int eeh_set_bus_notifier(void)
+{
+   bus_register_notifier(&pci_bus_type, &eeh_device_nb);
+   return 0;
+}
+arch_initcall(eeh_set_bus_notifier);
+
 /**
  * eeh_add_device_early - Enable EEH for the indicated device node
  * @pdn: PCI device node for which to set up EEH
diff --git a/arch/powerpc/kernel/pci-hotplug.c 
b/arch/powerpc/kernel/pci-hotplug.c
index d6a67f814983..28e9aa274f64 100644
--- a/arch/powerpc/kernel/pci-hotplug.c
+++ b/arch/powerpc/kernel/pci-hotplug.c
@@ -57,8 +57,6 @@ void pcibios_release_device(struct pci_dev *dev)
struct pci_controller *phb = pci_bus_to_host(dev->bus);
struct pci_dn *pdn = pci_get_pdn(dev);
 
-   eeh_remove_device(dev);
-
if (phb->controller_ops.release_device)
phb->controller_ops.release_device(dev);
 
-- 
2.22.0.216.g00a2a96fc9



[PATCH v4 1/2] powerpc/eeh: fix pseries_eeh_configure_bridge()

2020-04-27 Thread Sam Bobroff
If a device is hot unplgged during EEH recovery, it's possible for the
RTAS call to ibm,configure-pe in pseries_eeh_configure() to return
parameter error (-3), however negative return values are not checked
for and this leads to an infinite loop.

Fix this by correctly bailing out on negative values.

Signed-off-by: Sam Bobroff 
---
v4 - Just handle the error translation locally, as it's specific to the RTAS 
call,
 but log the unaltered code in case it's useful for debugging.

 arch/powerpc/platforms/pseries/eeh_pseries.c | 8 +++-
 1 file changed, 7 insertions(+), 1 deletion(-)

diff --git a/arch/powerpc/platforms/pseries/eeh_pseries.c 
b/arch/powerpc/platforms/pseries/eeh_pseries.c
index 893ba3f562c4..04c1ed79bc6e 100644
--- a/arch/powerpc/platforms/pseries/eeh_pseries.c
+++ b/arch/powerpc/platforms/pseries/eeh_pseries.c
@@ -607,6 +607,8 @@ static int pseries_eeh_configure_bridge(struct eeh_pe *pe)
 
if (!ret)
return ret;
+   if (ret < 0)
+   break;
 
/*
 * If RTAS returns a delay value that's above 100ms, cut it
@@ -627,7 +629,11 @@ static int pseries_eeh_configure_bridge(struct eeh_pe *pe)
 
pr_warn("%s: Unable to configure bridge PHB#%x-PE#%x (%d)\n",
__func__, pe->phb->global_number, pe->addr, ret);
-   return ret;
+   /* PAPR defines -3 as "Parameter Error" for this function: */
+   if (ret == -3)
+   return -EINVAL;
+   else
+   return -EIO;
 }
 
 /**
-- 
2.22.0.216.g00a2a96fc9



Re: powerpc/pci: [PATCH 1/1]: PCIE PHB reset

2020-05-11 Thread Sam Bobroff
On Thu, May 07, 2020 at 08:10:37AM -0500, wenxi...@linux.vnet.ibm.com wrote:
> From: Wen Xiong 
> 
> Several device drivers hit EEH(Extended Error handling) when triggering
> kdump on Pseries PowerVM. This patch implemented a reset of the PHBs
> in pci general code. PHB reset stop all PCI transactions from previous
> kernel. We have tested the patch in several enviroments:
> - direct slot adapters
> - adapters under the switch
> - a VF adapter in PowerVM
> - a VF adapter/adapter in KVM guest.
> 
> Signed-off-by: Wen Xiong 

Hi Wen Xiong,

I saw Oliver's review and I think he's covered the main issues I was
going to raise:
- This will run and produce some spurious errors on powernv. (I think
  distros do compile in both pseries and powernv.)
- There's a bit of code duplication but it's probably OK for this patch.

I have a few other minor comments, below:

> ---
>  arch/powerpc/platforms/pseries/pci.c | 153 +++
>  1 file changed, 153 insertions(+)
> 
> diff --git a/arch/powerpc/platforms/pseries/pci.c 
> b/arch/powerpc/platforms/pseries/pci.c
> index 911534b89c85..aac7f00696d2 100644
> --- a/arch/powerpc/platforms/pseries/pci.c
> +++ b/arch/powerpc/platforms/pseries/pci.c
> @@ -11,6 +11,8 @@
>  #include 
>  #include 
>  #include 
> +#include 
> +#include 
>  
>  #include 
>  #include 
> @@ -354,3 +356,154 @@ int pseries_root_bridge_prepare(struct pci_host_bridge 
> *bridge)
>  
>   return 0;
>  }
> +
> +/**
> + * pseries_get_pdn_addr - Retrieve PHB address
> + * @pe: EEH PE
> + *
> + * Retrieve the assocated PHB address. Actually, there're 2 RTAS
> + * function calls dedicated for the purpose. We need implement
> + * it through the new function and then the old one. Besides,
> + * you should make sure the config address is figured out from
> + * FDT node before calling the function.
> + *
> + */
> +static int pseries_get_pdn_addr(struct pci_controller *phb)
> +{
> + int ret = -1;
> + int rets[3];
> + int ibm_get_config_addr_info;
> + int ibm_get_config_addr_info2;
> + int config_addr = 0;
> + struct pci_dn *root_pdn, *pdn;
> +
> + ibm_get_config_addr_info2   = rtas_token("ibm,get-config-addr-info2");
> + ibm_get_config_addr_info= rtas_token("ibm,get-config-addr-info");
> +
> + root_pdn = PCI_DN(phb->dn);
> + pdn = list_first_entry(&root_pdn->child_list, struct pci_dn, list);
> + config_addr = (pdn->busno << 16) | (pdn->devfn << 8);
> +
> + if (ibm_get_config_addr_info2 != RTAS_UNKNOWN_SERVICE) {
> + /*
> +  * First of all, we need to make sure there has one PE
> +  * associated with the device. Otherwise, PE address is
> +  * meaningless.
> +  */

This comment might be better if it explained how using option=0
with ibm_get_config_addr tests the PE.

> + ret = rtas_call(ibm_get_config_addr_info2, 4, 2, rets,
> + config_addr, BUID_HI(pdn->phb->buid),
> + BUID_LO(pdn->phb->buid), 1);
> + if (ret || (rets[0] == 0)) {
> + pr_warn("%s: Failed to get address for PHB#%x-PE# "
> + "option=%d config_addr=%x\n",
> + __func__, pdn->phb->global_number, 1, rets[0]);
> + return -1;
> + }
> +
> + /* Retrieve the associated PE config address */
> + ret = rtas_call(ibm_get_config_addr_info2, 4, 2, rets,
> + config_addr, BUID_HI(pdn->phb->buid),
> + BUID_LO(pdn->phb->buid), 0);
> + if (ret) {
> + pr_warn("%s: Failed to get address for PHB#%x-PE# "
> + "option=%d config_addr=%x\n",
> + __func__, pdn->phb->global_number, 0, rets[0]);
> + return -1;
> + }
> + return rets[0];
> + }
> +
> + if (ibm_get_config_addr_info != RTAS_UNKNOWN_SERVICE) {
> + ret = rtas_call(ibm_get_config_addr_info, 4, 2, rets,
> + config_addr, BUID_HI(pdn->phb->buid),
> + BUID_LO(pdn->phb->buid), 0);
> + if (ret || rets[0]) {
> + pr_warn("%s: Failed to get address for PHB#%x-PE# "
> + "config_addr=%x\n",
> + __func__, pdn->phb->global_number, rets[0]);
> + return -1;
> + }
> + return rets[0];
> + }
> +
> + return ret;
Can this ever return anything other than 0?

> +}
> +
> +static int __init pseries_phb_reset(void)
> +{
> + struct pci_controller *phb;
> + int config_addr;
> + int ibm_set_slot_reset;
> + int ibm_configure_pe;
> + int ret;
> +
> + if (is_kdump_kernel() || reset_devices) {
> + pr_info("Issue PHB reset ...\n");
> + ibm_set_slot_reset = rtas_token("ibm,set-slot-reset");
> + ibm_configure_pe

Re: [PATCH v2 1/5] powerpc/pci: Access PCI config space directly w/o pci_dn

2018-09-09 Thread Sam Bobroff
Hi Sergey,

On Thu, Sep 06, 2018 at 02:57:48PM +0300, Sergey Miroshnichenko wrote:
> The pci_dn structures are retrieved from a DT, but hot-plugged PCIe
> devices don't have them. Don't stop PCIe I/O in absence of pci_dn, so
> it is now possible to discover new devices.
> 
> Signed-off-by: Sergey Miroshnichenko 
> ---
>  arch/powerpc/kernel/rtas_pci.c   | 97 +++-
>  arch/powerpc/platforms/powernv/pci.c | 64 --
>  2 files changed, 109 insertions(+), 52 deletions(-)
> 
> diff --git a/arch/powerpc/kernel/rtas_pci.c b/arch/powerpc/kernel/rtas_pci.c
> index c2b148b1634a..0611b46d9b5f 100644
> --- a/arch/powerpc/kernel/rtas_pci.c
> +++ b/arch/powerpc/kernel/rtas_pci.c
> @@ -55,10 +55,26 @@ static inline int config_access_valid(struct pci_dn *dn, 
> int where)
>   return 0;
>  }
>  
> -int rtas_read_config(struct pci_dn *pdn, int where, int size, u32 *val)
> +static int rtas_read_raw_config(unsigned long buid, int busno, unsigned int 
> devfn,
> + int where, int size, u32 *val)
>  {
>   int returnval = -1;
> - unsigned long buid, addr;
> + unsigned long addr = rtas_config_addr(busno, devfn, where);
> + int ret;
> +
> + if (buid) {
> + ret = rtas_call(ibm_read_pci_config, 4, 2, &returnval,
> + addr, BUID_HI(buid), BUID_LO(buid), size);
> + } else {
> + ret = rtas_call(read_pci_config, 2, 2, &returnval, addr, size);
> + }
> + *val = returnval;
> +
> + return ret;
> +}
> +
> +int rtas_read_config(struct pci_dn *pdn, int where, int size, u32 *val)
> +{
>   int ret;
>  
>   if (!pdn)
> @@ -71,16 +87,8 @@ int rtas_read_config(struct pci_dn *pdn, int where, int 
> size, u32 *val)
>   return PCIBIOS_SET_FAILED;
>  #endif
>  
> - addr = rtas_config_addr(pdn->busno, pdn->devfn, where);
> - buid = pdn->phb->buid;
> - if (buid) {
> - ret = rtas_call(ibm_read_pci_config, 4, 2, &returnval,
> - addr, BUID_HI(buid), BUID_LO(buid), size);
> - } else {
> - ret = rtas_call(read_pci_config, 2, 2, &returnval, addr, size);
> - }
> - *val = returnval;
> -
> + ret = rtas_read_raw_config(pdn->phb->buid, pdn->busno, pdn->devfn,
> +where, size, val);
>   if (ret)
>   return PCIBIOS_DEVICE_NOT_FOUND;
>  
> @@ -98,18 +106,44 @@ static int rtas_pci_read_config(struct pci_bus *bus,
>  
>   pdn = pci_get_pdn_by_devfn(bus, devfn);
>  
> - /* Validity of pdn is checked in here */
> - ret = rtas_read_config(pdn, where, size, val);
> - if (*val == EEH_IO_ERROR_VALUE(size) &&
> - eeh_dev_check_failure(pdn_to_eeh_dev(pdn)))
> - return PCIBIOS_DEVICE_NOT_FOUND;
> + if (pdn && eeh_enabled()) {
> + /* Validity of pdn is checked in here */
> + ret = rtas_read_config(pdn, where, size, val);
> +
> + if (*val == EEH_IO_ERROR_VALUE(size) &&
> + eeh_dev_check_failure(pdn_to_eeh_dev(pdn)))
> + ret = PCIBIOS_DEVICE_NOT_FOUND;
> + } else {
> + struct pci_controller *phb = pci_bus_to_host(bus);
> +
> + ret = rtas_read_raw_config(phb->buid, bus->number, devfn,
> +where, size, val);
> + }

In the above block, if pdn is valid but EEH isn't enabled,
rtas_read_raw_config() will be used instead of rtas_read_config(), so
config_access_valid() won't be tested. Is that correct?

>  
>   return ret;
>  }
>  
> +static int rtas_write_raw_config(unsigned long buid, int busno, unsigned int 
> devfn,
> +  int where, int size, u32 val)
> +{
> + unsigned long addr = rtas_config_addr(busno, devfn, where);
> + int ret;
> +
> + if (buid) {
> + ret = rtas_call(ibm_write_pci_config, 5, 1, NULL, addr,
> + BUID_HI(buid), BUID_LO(buid), size, (ulong)val);
> + } else {
> + ret = rtas_call(write_pci_config, 3, 1, NULL, addr, size, 
> (ulong)val);
> + }
> +
> + if (ret)
> + return PCIBIOS_DEVICE_NOT_FOUND;
> +
> + return PCIBIOS_SUCCESSFUL;
> +}
> +
>  int rtas_write_config(struct pci_dn *pdn, int where, int size, u32 val)
>  {
> - unsigned long buid, addr;
>   int ret;
>  
>   if (!pdn)
> @@ -122,15 +156,8 @@ int rtas_write_config(struct pci_dn *pdn, int where, int 
> size, u32 val)
>   return PCIBIOS_SET_FAILED;
>  #endif
>  
> - addr = rtas_config_addr(pdn->busno, pdn->devfn, where);
> - buid = pdn->phb->buid;
> - if (buid) {
> - ret = rtas_call(ibm_write_pci_config, 5, 1, NULL, addr,
> - BUID_HI(buid), BUID_LO(buid), size, (ulong) val);
> - } else {
> - ret = rtas_call(write_pci_config, 3, 1, NULL, addr, size, 
> (ulong)val);
> - }
> -
> + ret = rtas_write_raw_config(pdn->phb->buid, pdn-

Re: [PATCH v2 2/5] powerpc/pci: Create pci_dn on demand

2018-09-09 Thread Sam Bobroff
Hi Sergey,

On Thu, Sep 06, 2018 at 02:57:49PM +0300, Sergey Miroshnichenko wrote:
> The pci_dn structures can be created not only from DT, but also
> directly from newly discovered PCIe devices, so allocate them
> dynamically.
> 
> Signed-off-by: Sergey Miroshnichenko 
> ---
>  arch/powerpc/kernel/pci_dn.c | 76 
>  1 file changed, 59 insertions(+), 17 deletions(-)
> 
> diff --git a/arch/powerpc/kernel/pci_dn.c b/arch/powerpc/kernel/pci_dn.c
> index ab147a1909c8..48ec16407835 100644
> --- a/arch/powerpc/kernel/pci_dn.c
> +++ b/arch/powerpc/kernel/pci_dn.c
> @@ -33,6 +33,8 @@
>  #include 
>  #include 
>  
> +static struct pci_dn *create_pdn(struct pci_dev *pdev, struct pci_dn 
> *parent);
> +
>  /*
>   * The function is used to find the firmware data of one
>   * specific PCI device, which is attached to the indicated
> @@ -58,6 +60,9 @@ static struct pci_dn *pci_bus_to_pdn(struct pci_bus *bus)
>   pbus = pbus->parent;
>   }
>  
> + if (!pbus->self && !pci_is_root_bus(pbus))
> + return NULL;
> +
>   /*
>* Except virtual bus, all PCI buses should
>* have device nodes.
> @@ -65,13 +70,15 @@ static struct pci_dn *pci_bus_to_pdn(struct pci_bus *bus)
>   dn = pci_bus_to_OF_node(pbus);
>   pdn = dn ? PCI_DN(dn) : NULL;
>  
> + if (!pdn && pbus->self)
> + pdn = pbus->self->dev.archdata.pci_data;
> +
>   return pdn;
>  }
>  
>  struct pci_dn *pci_get_pdn_by_devfn(struct pci_bus *bus,
>   int devfn)
>  {
> - struct device_node *dn = NULL;
>   struct pci_dn *parent, *pdn;
>   struct pci_dev *pdev = NULL;
>  
> @@ -80,17 +87,10 @@ struct pci_dn *pci_get_pdn_by_devfn(struct pci_bus *bus,
>   if (pdev->devfn == devfn) {
>   if (pdev->dev.archdata.pci_data)
>   return pdev->dev.archdata.pci_data;
> -
> - dn = pci_device_to_OF_node(pdev);
>   break;
>   }
>   }
>  
> - /* Fast path: fetch from device node */
> - pdn = dn ? PCI_DN(dn) : NULL;
> - if (pdn)
> - return pdn;
> -

Why is it necessary to remove the above fast-path?

>   /* Slow path: fetch from firmware data hierarchy */
>   parent = pci_bus_to_pdn(bus);
>   if (!parent)
> @@ -128,16 +128,9 @@ struct pci_dn *pci_get_pdn(struct pci_dev *pdev)
>   if (!parent)
>   return NULL;
>  
> - list_for_each_entry(pdn, &parent->child_list, list) {
> - if (pdn->busno == pdev->bus->number &&
> - pdn->devfn == pdev->devfn)
> - return pdn;
> - }

Could you explain why the above block was removed? Is it now impossible
for it to find a pdn?

> -
> - return NULL;
> + return create_pdn(pdev, parent);
>  }
>  
> -#ifdef CONFIG_PCI_IOV
>  static struct pci_dn *add_one_dev_pci_data(struct pci_dn *parent,
>  int vf_index,
>  int busno, int devfn)
> @@ -156,7 +149,9 @@ static struct pci_dn *add_one_dev_pci_data(struct pci_dn 
> *parent,
>   pdn->parent = parent;
>   pdn->busno = busno;
>   pdn->devfn = devfn;
> + #ifdef CONFIG_PCI_IOV
>   pdn->vf_index = vf_index;
> + #endif /* CONFIG_PCI_IOV */
>   pdn->pe_number = IODA_INVALID_PE;
>   INIT_LIST_HEAD(&pdn->child_list);
>   INIT_LIST_HEAD(&pdn->list);

I can see that this change allows you to re-use this to set up a pdn in
create_pdn(). Perhaps you should refactor pci_add_device_node_info() to
use it as well, now that it's possible?

> @@ -164,7 +159,54 @@ static struct pci_dn *add_one_dev_pci_data(struct pci_dn 
> *parent,
>  
>   return pdn;
>  }
> -#endif
> +
> +static struct pci_dn *create_pdn(struct pci_dev *pdev, struct pci_dn *parent)
> +{
> + struct pci_dn *pdn = NULL;
> +
> + pdn = add_one_dev_pci_data(parent, 0, pdev->bus->number, pdev->devfn);
> + dev_info(&pdev->dev, "Create a new pdn for devfn %2x\n", pdev->devfn / 
> 8);
> +
> + if (pdn) {
> + #ifdef CONFIG_EEH
> + struct eeh_dev *edev;
> + #endif /* CONFIG_EEH */
> + u32 class_code;
> + u16 device_id;
> + u16 vendor_id;
> +
> + #ifdef CONFIG_EEH
> + edev = eeh_dev_init(pdn);
> + if (!edev) {
> + kfree(pdn);
> + dev_err(&pdev->dev, "%s: Failed to allocate edev\n", 
> __func__);
> + return NULL;
> + }
> + #endif /* CONFIG_EEH */
> +
> + pdn->busno = pdev->bus->busn_res.start;

It seems strange that pdn->busno is set by the call to
add_one_dev_pci_data() above (to pdev->bus->number) and then overwritten
here with a different value. Should add_one_dev_pci_data() use
pdev->bus->busn_res.start and this line be removed?

> +
> + pci_bus_read_config_

Re: [PATCH v2 5/5] PCI/powerpc/eeh: Add pcibios hooks for preparing to rescan

2018-09-09 Thread Sam Bobroff
Hi Sergey,

On Thu, Sep 06, 2018 at 02:57:52PM +0300, Sergey Miroshnichenko wrote:
> Reading an empty slot returns all ones, which triggers a false
> EEH error event on PowerNV.
> 
> New callbacks pcibios_rescan_prepare/done are introduced to
> pause/resume the EEH during rescan.

If I understand it correctly, this temporarily disables EEH for config space
accesses on the whole PHB while the rescan runs. Is it possible that a
real EEH event could be missed if it occurred during the rescan?

Even if it's not possible, I think it would be good to mention that in a
comment.

> Signed-off-by: Sergey Miroshnichenko 
> ---
>  arch/powerpc/include/asm/eeh.h   |  2 ++
>  arch/powerpc/kernel/eeh.c| 12 +++
>  arch/powerpc/platforms/powernv/eeh-powernv.c | 22 
>  drivers/pci/probe.c  | 14 +
>  include/linux/pci.h  |  2 ++
>  5 files changed, 52 insertions(+)
> 
> diff --git a/arch/powerpc/include/asm/eeh.h b/arch/powerpc/include/asm/eeh.h
> index 219637ea69a1..926c3e31df99 100644
> --- a/arch/powerpc/include/asm/eeh.h
> +++ b/arch/powerpc/include/asm/eeh.h
> @@ -219,6 +219,8 @@ struct eeh_ops {
>   int (*next_error)(struct eeh_pe **pe);
>   int (*restore_config)(struct pci_dn *pdn);
>   int (*notify_resume)(struct pci_dn *pdn);
> + int (*pause)(struct pci_bus *bus);
> + int (*resume)(struct pci_bus *bus);

I think these names are a bit too generic, what about naming them
pause_bus()/resume_bus() or even prepare_rescan()/rescan_done()?

>  };
>  
>  extern int eeh_subsystem_flags;
> diff --git a/arch/powerpc/kernel/eeh.c b/arch/powerpc/kernel/eeh.c
> index 6ebba3e48b01..9fb5012f389d 100644
> --- a/arch/powerpc/kernel/eeh.c
> +++ b/arch/powerpc/kernel/eeh.c
> @@ -1831,3 +1831,15 @@ static int __init eeh_init_proc(void)
>   return 0;
>  }
>  __initcall(eeh_init_proc);
> +
> +void pcibios_rescan_prepare(struct pci_bus *bus)
> +{
> + if (eeh_ops && eeh_ops->pause)
> + eeh_ops->pause(bus);
> +}
> +
> +void pcibios_rescan_done(struct pci_bus *bus)
> +{
> + if (eeh_ops && eeh_ops->resume)
> + eeh_ops->resume(bus);
> +}
> diff --git a/arch/powerpc/platforms/powernv/eeh-powernv.c 
> b/arch/powerpc/platforms/powernv/eeh-powernv.c
> index 3c1beae29f2d..9724a58afcd2 100644
> --- a/arch/powerpc/platforms/powernv/eeh-powernv.c
> +++ b/arch/powerpc/platforms/powernv/eeh-powernv.c
> @@ -59,6 +59,26 @@ void pnv_pcibios_bus_add_device(struct pci_dev *pdev)
>   eeh_sysfs_add_device(pdev);
>  }
>  
> +static int pnv_eeh_pause(struct pci_bus *bus)
> +{
> + struct pci_controller *hose = pci_bus_to_host(bus);
> + struct pnv_phb *phb = hose->private_data;
> +
> + phb->flags &= ~PNV_PHB_FLAG_EEH;
> + disable_irq(eeh_event_irq);
> + return 0;
> +}
> +
> +static int pnv_eeh_resume(struct pci_bus *bus)
> +{
> + struct pci_controller *hose = pci_bus_to_host(bus);
> + struct pnv_phb *phb = hose->private_data;
> +
> + enable_irq(eeh_event_irq);
> + phb->flags |= PNV_PHB_FLAG_EEH;
> + return 0;
> +}
> +
>  static int pnv_eeh_init(void)
>  {
>   struct pci_controller *hose;
> @@ -1710,6 +1730,8 @@ static struct eeh_ops pnv_eeh_ops = {
>   .write_config   = pnv_eeh_write_config,
>   .next_error = pnv_eeh_next_error,
>   .restore_config = pnv_eeh_restore_config,
> + .pause  = pnv_eeh_pause,
> + .resume = pnv_eeh_resume,
>   .notify_resume  = NULL
>  };
>  
> diff --git a/drivers/pci/probe.c b/drivers/pci/probe.c
> index ac876e32de4b..4a9045364809 100644
> --- a/drivers/pci/probe.c
> +++ b/drivers/pci/probe.c
> @@ -2801,6 +2801,14 @@ void __weak pcibios_remove_bus(struct pci_bus *bus)
>  {
>  }
>  
> +void __weak pcibios_rescan_prepare(struct pci_bus *bus)
> +{
> +}
> +
> +void __weak pcibios_rescan_done(struct pci_bus *bus)
> +{
> +}
> +
>  struct pci_bus *pci_create_root_bus(struct device *parent, int bus,
>   struct pci_ops *ops, void *sysdata, struct list_head *resources)
>  {
> @@ -3055,9 +3063,15 @@ unsigned int pci_rescan_bus_bridge_resize(struct 
> pci_dev *bridge)
>  unsigned int pci_rescan_bus(struct pci_bus *bus)
>  {
>   unsigned int max;
> + struct pci_bus *root = bus;
> +
> + while (!pci_is_root_bus(root))
> + root = root->parent;
>  
> + pcibios_rescan_prepare(root);
>   max = pci_scan_child_bus(bus);
>   pci_assign_unassigned_bus_resources(bus);
> + pcibios_rescan_done(root);
>   pci_bus_add_devices(bus);
>  
>   return max;
> diff --git a/include/linux/pci.h b/include/linux/pci.h
> index 340029b2fb38..42930731c5a7 100644
> --- a/include/linux/pci.h
> +++ b/include/linux/pci.h
> @@ -1929,6 +1929,8 @@ void pcibios_penalize_isa_irq(int irq, int active);
>  int pcibios_alloc_irq(struct pci_dev *dev);
>  void pcibios_free_irq(struct pci_dev *dev);
>  resource_size_t pcibio

[PATCH 02/14] powerpc/eeh: Fix null deref for devices removed during EEH

2018-09-11 Thread Sam Bobroff
If a device is removed during EEH processing (either by a driver's
handler or as part of recovery), it can lead to a null dereference
in eeh_pe_report_edev().

To handle this, skip devices that have been removed.

Signed-off-by: Sam Bobroff 
---
 arch/powerpc/kernel/eeh_driver.c | 4 
 1 file changed, 4 insertions(+)

diff --git a/arch/powerpc/kernel/eeh_driver.c b/arch/powerpc/kernel/eeh_driver.c
index 67619b4b3f96..4115d353c349 100644
--- a/arch/powerpc/kernel/eeh_driver.c
+++ b/arch/powerpc/kernel/eeh_driver.c
@@ -281,6 +281,10 @@ static void eeh_pe_report_edev(struct eeh_dev *edev, 
eeh_report_fn fn,
struct pci_driver *driver;
enum pci_ers_result new_result;
 
+   if (!edev->pdev) {
+   eeh_edev_info(edev, "no device");
+   return;
+   }
device_lock(&edev->pdev->dev);
if (eeh_edev_actionable(edev)) {
driver = eeh_pcid_get(edev->pdev);
-- 
2.19.0.2.gcad72f5712



  1   2   3   4   >