[PATCH 06/13] powerpc/vas: Take reference to PID and mm for user space windows

2020-04-01 Thread Haren Myneni


When process opens a window, its pid and tgid will be saved in the
vas_window struct. This window will be closed when the process exits.
The kernel handles NX faults by updating CSB or send SEGV signal to pid
of the process if the userspace csb addr is invalid.

In multi-thread applications, a window can be opened by a child thread,
but it will not be closed when this thread exits. It is expected that
the parent will clean up all resources including NX windows opened by
child threads. A child thread can send NX requests using this window
and could be killed before completion is reported. If the pid assigned
to this thread is reused while requests are pending, a failure SEGV
would be directed to the wrong place.

To prevent reusing the pid, take references to pid and mm when the window
is opened and release them when when the window is closed. Then if child
thread is not running, SEGV signal will be sent to thread group leader
(tgd).

Signed-off-by: Haren Myneni 
---
 arch/powerpc/platforms/powernv/vas-debug.c  |  2 +-
 arch/powerpc/platforms/powernv/vas-window.c | 53 ++---
 arch/powerpc/platforms/powernv/vas.h|  9 -
 3 files changed, 57 insertions(+), 7 deletions(-)

diff --git a/arch/powerpc/platforms/powernv/vas-debug.c 
b/arch/powerpc/platforms/powernv/vas-debug.c
index 09e63df..ef9a717 100644
--- a/arch/powerpc/platforms/powernv/vas-debug.c
+++ b/arch/powerpc/platforms/powernv/vas-debug.c
@@ -38,7 +38,7 @@ static int info_show(struct seq_file *s, void *private)
 
seq_printf(s, "Type: %s, %s\n", cop_to_str(window->cop),
window->tx_win ? "Send" : "Receive");
-   seq_printf(s, "Pid : %d\n", window->pid);
+   seq_printf(s, "Pid : %d\n", vas_window_pid(window));
 
 unlock:
mutex_unlock(&vas_mutex);
diff --git a/arch/powerpc/platforms/powernv/vas-window.c 
b/arch/powerpc/platforms/powernv/vas-window.c
index dc46bf6..7054cd4 100644
--- a/arch/powerpc/platforms/powernv/vas-window.c
+++ b/arch/powerpc/platforms/powernv/vas-window.c
@@ -12,6 +12,8 @@
 #include 
 #include 
 #include 
+#include 
+#include 
 #include 
 #include 
 #include "vas.h"
@@ -876,8 +878,6 @@ struct vas_window *vas_rx_win_open(int vasid, enum 
vas_cop_type cop,
rxwin->user_win = rxattr->user_win;
rxwin->cop = cop;
rxwin->wcreds_max = rxattr->wcreds_max ?: VAS_WCREDS_DEFAULT;
-   if (rxattr->user_win)
-   rxwin->pid = task_pid_vnr(current);
 
init_winctx_for_rxwin(rxwin, rxattr, &winctx);
init_winctx_regs(rxwin, &winctx);
@@ -1027,7 +1027,6 @@ struct vas_window *vas_tx_win_open(int vasid, enum 
vas_cop_type cop,
txwin->tx_win = 1;
txwin->rxwin = rxwin;
txwin->nx_win = txwin->rxwin->nx_win;
-   txwin->pid = attr->pid;
txwin->user_win = attr->user_win;
txwin->wcreds_max = attr->wcreds_max ?: VAS_WCREDS_DEFAULT;
 
@@ -1059,8 +1058,43 @@ struct vas_window *vas_tx_win_open(int vasid, enum 
vas_cop_type cop,
goto free_window;
}
 
-   set_vinst_win(vinst, txwin);
+   if (txwin->user_win) {
+   /*
+* Window opened by a child thread may not be closed when
+* it exits. So take reference to its pid and release it
+* when the window is free by parent thread.
+* Acquire a reference to the task's pid to make sure
+* pid will not be re-used - needed only for multithread
+* applications.
+*/
+   txwin->pid = get_task_pid(current, PIDTYPE_PID);
+   /*
+* Acquire a reference to the task's mm.
+*/
+   txwin->mm = get_task_mm(current);
 
+   if (!txwin->mm) {
+   put_pid(txwin->pid);
+   pr_err("VAS: pid(%d): mm_struct is not found\n",
+   current->pid);
+   rc = -EPERM;
+   goto free_window;
+   }
+
+   mmgrab(txwin->mm);
+   mmput(txwin->mm);
+   mm_context_add_copro(txwin->mm);
+   /*
+* Process closes window during exit. In the case of
+* multithread application, the child thread can open
+* window and can exit without closing it. Expects parent
+* thread to use and close the window. So do not need
+* to take pid reference for parent thread.
+*/
+   txwin->tgid = find_get_pid(task_tgid_vnr(current));
+   }
+
+   set_vinst_win(vinst, txwin);
return txwin;
 
 free_window:
@@ -1257,8 +1291,17 @@ int vas_win_close(struct vas_window *window)
poll_window_castout(window);
 
/* if send window, drop reference to matching receive window */
-   if (window->tx_win)
+   if (window->tx_win) {
+   if (wind

[PATCH v9 07/13] powerpc/vas: Setup thread IRQ handler per VAS instance

2020-04-01 Thread Haren Myneni


When NX encounters translation error on CRB and any request buffer,
raises an interrupt on the CPU to handle the fault. It can raise one
interrupt for multiple faults. Expects OS to handle these faults and
return credits for fault window after processing faults.

Setup thread IRQ handler and IRQ thread function per each VAS instance.
IRQ handler checks if the thread is already woken up and can handle new
faults. If so returns with IRQ_HANDLED, otherwise wake up thread to
process new faults.

The thread functions reads each CRB entry from fault FIFO until sees
invalid entry. After reading each CRB, determine the corresponding
send window using pswid (from CRB) and process fault CRB. Then
invalidate the entry and return credit. Processing fault CRB and
return credit is described in subsequent patches.

Signed-off-by: Sukadev Bhattiprolu 
Signed-off-by: Haren Myneni 
---
 arch/powerpc/platforms/powernv/vas-fault.c  | 131 
 arch/powerpc/platforms/powernv/vas-window.c |  60 +
 arch/powerpc/platforms/powernv/vas.c|  23 -
 arch/powerpc/platforms/powernv/vas.h|   7 ++
 4 files changed, 220 insertions(+), 1 deletion(-)

diff --git a/arch/powerpc/platforms/powernv/vas-fault.c 
b/arch/powerpc/platforms/powernv/vas-fault.c
index 4044998..0da8358 100644
--- a/arch/powerpc/platforms/powernv/vas-fault.c
+++ b/arch/powerpc/platforms/powernv/vas-fault.c
@@ -11,6 +11,7 @@
 #include 
 #include 
 #include 
+#include 
 #include 
 
 #include "vas.h"
@@ -25,6 +26,136 @@
 #define VAS_FAULT_WIN_FIFO_SIZE(4 << 20)
 
 /*
+ * Process valid CRBs in fault FIFO.
+ * NX process user space requests, return credit and update the status
+ * in CRB. If it encounters transalation error when accessing CRB or
+ * request buffers, raises interrupt on the CPU to handle the fault.
+ * It takes credit on fault window, updates nx_fault_stamp in CRB with
+ * the following information and pastes CRB in fault FIFO.
+ *
+ * pswid - window ID of the window on which the request is sent.
+ * fault_storage_addr - fault address
+ *
+ * It can raise a single interrupt for multiple faults. Expects OS to
+ * process all valid faults and return credit for each fault on user
+ * space and fault windows. This fault FIFO control will be done with
+ * credit mechanism. NX can continuously paste CRBs until credits are not
+ * available on fault window. Otherwise, returns with RMA_reject.
+ *
+ * Total credits available on fault window: FIFO_SIZE(4MB)/CRBS_SIZE(128)
+ *
+ */
+irqreturn_t vas_fault_thread_fn(int irq, void *data)
+{
+   struct vas_instance *vinst = data;
+   struct coprocessor_request_block *crb, *entry;
+   struct coprocessor_request_block buf;
+   struct vas_window *window;
+   unsigned long flags;
+   void *fifo;
+
+   crb = &buf;
+
+   /*
+* VAS can interrupt with multiple page faults. So process all
+* valid CRBs within fault FIFO until reaches invalid CRB.
+* We use CCW[0] and pswid to validate validate CRBs:
+*
+* CCW[0]   Reserved bit. When NX pastes CRB, CCW[0]=0
+*  OS sets this bit to 1 after reading CRB.
+* pswidNX assigns window ID. Set pswid to -1 after
+*  reading CRB from fault FIFO.
+*
+* We exit this function if no valid CRBs are available to process.
+* So acquire fault_lock and reset fifo_in_progress to 0 before
+* exit.
+* In case kernel receives another interrupt with different page
+* fault, interrupt handler returns with IRQ_HANDLED if
+* fifo_in_progress is set. Means these new faults will be
+* handled by the current thread. Otherwise set fifo_in_progress
+* and return IRQ_WAKE_THREAD to wake up thread.
+*/
+   while (true) {
+   spin_lock_irqsave(&vinst->fault_lock, flags);
+   /*
+* Advance the fault fifo pointer to next CRB.
+* Use CRB_SIZE rather than sizeof(*crb) since the latter is
+* aligned to CRB_ALIGN (256) but the CRB written to by VAS is
+* only CRB_SIZE in len.
+*/
+   fifo = vinst->fault_fifo + (vinst->fault_crbs * CRB_SIZE);
+   entry = fifo;
+
+   if ((entry->stamp.nx.pswid == cpu_to_be32(FIFO_INVALID_ENTRY))
+   || (entry->ccw & cpu_to_be32(CCW0_INVALID))) {
+   vinst->fifo_in_progress = 0;
+   spin_unlock_irqrestore(&vinst->fault_lock, flags);
+   return IRQ_HANDLED;
+   }
+
+   spin_unlock_irqrestore(&vinst->fault_lock, flags);
+   vinst->fault_crbs++;
+   if (vinst->fault_crbs == (vinst->fault_fifo_size / CRB_SIZE))
+   vinst->fault_crbs = 0;
+
+   memcpy(crb, fifo, CRB_SIZE);
+   entry->stamp.nx.pswid = cpu_to_be32(FIFO_

[PATCH v9 08/13] powerpc/vas: Update CSB and notify process for fault CRBs

2020-04-01 Thread Haren Myneni


Applications polls on CSB for the status update after requests are
issued. NX process these requests and update the CSB with the status.
If it encounters translation error, pastes CRB in fault FIFO and
raises an interrupt. The kernel handles fault by reading CRB from
fault FIFO and process the fault CRB.

For each fault CRB, update fault address in CRB (fault_storage_addr)
and translation error status in CSB so that user space can touch the
fault address and resend the request. If the user space passed invalid
CSB address send signal to process with SIGSEGV.

In the case of multi-thread applications, child thread may not be
available. So if the task is not running, send signal to tgid.

Signed-off-by: Sukadev Bhattiprolu 
Signed-off-by: Haren Myneni 
---
 arch/powerpc/platforms/powernv/vas-fault.c | 126 -
 1 file changed, 125 insertions(+), 1 deletion(-)

diff --git a/arch/powerpc/platforms/powernv/vas-fault.c 
b/arch/powerpc/platforms/powernv/vas-fault.c
index 0da8358..354577d 100644
--- a/arch/powerpc/platforms/powernv/vas-fault.c
+++ b/arch/powerpc/platforms/powernv/vas-fault.c
@@ -11,6 +11,7 @@
 #include 
 #include 
 #include 
+#include 
 #include 
 #include 
 
@@ -26,6 +27,128 @@
 #define VAS_FAULT_WIN_FIFO_SIZE(4 << 20)
 
 /*
+ * Update the CSB to indicate a translation error.
+ *
+ * User space will be polling on CSB after the request is issued.
+ * If NX can handle the request without any issues, it updates CSB.
+ * Whereas if NX encounters page fault, the kernel will handle the
+ * fault and update CSB with translation error.
+ *
+ * If we are unable to update the CSB means copy_to_user failed due to
+ * invalid csb_addr, send a signal to the process.
+ */
+static void update_csb(struct vas_window *window,
+   struct coprocessor_request_block *crb)
+{
+   struct coprocessor_status_block csb;
+   struct kernel_siginfo info;
+   struct task_struct *tsk;
+   void __user *csb_addr;
+   struct pid *pid;
+   int rc;
+
+   /*
+* NX user space windows can not be opened for task->mm=NULL
+* and faults will not be generated for kernel requests.
+*/
+   if (WARN_ON_ONCE(!window->mm || !window->user_win))
+   return;
+
+   csb_addr = (void __user *)be64_to_cpu(crb->csb_addr);
+
+   memset(&csb, 0, sizeof(csb));
+   csb.cc = CSB_CC_TRANSLATION;
+   csb.ce = CSB_CE_TERMINATION;
+   csb.cs = 0;
+   csb.count = 0;
+
+   /*
+* NX operates and returns in BE format as defined CRB struct.
+* So saves fault_storage_addr in BE as NX pastes in FIFO and
+* expects user space to convert to CPU format.
+*/
+   csb.address = crb->stamp.nx.fault_storage_addr;
+   csb.flags = 0;
+
+   pid = window->pid;
+   tsk = get_pid_task(pid, PIDTYPE_PID);
+   /*
+* Process closes send window after all pending NX requests are
+* completed. In multi-thread applications, a child thread can
+* open a window and can exit without closing it. May be some
+* requests are pending or this window can be used by other
+* threads later. We should handle faults if NX encounters
+* pages faults on these requests. Update CSB with translation
+* error and fault address. If csb_addr passed by user space is
+* invalid, send SEGV signal to pid saved in window. If the
+* child thread is not running, send the signal to tgid.
+* Parent thread (tgid) will close this window upon its exit.
+*
+* pid and mm references are taken when window is opened by
+* process (pid). So tgid is used only when child thread opens
+* a window and exits without closing it.
+*/
+   if (!tsk) {
+   pid = window->tgid;
+   tsk = get_pid_task(pid, PIDTYPE_PID);
+   /*
+* Parent thread (tgid) will be closing window when it
+* exits. So should not get here.
+*/
+   if (WARN_ON_ONCE(!tsk))
+   return;
+   }
+
+   /* Return if the task is exiting. */
+   if (tsk->flags & PF_EXITING) {
+   put_task_struct(tsk);
+   return;
+   }
+
+   use_mm(window->mm);
+   rc = copy_to_user(csb_addr, &csb, sizeof(csb));
+   /*
+* User space polls on csb.flags (first byte). So add barrier
+* then copy first byte with csb flags update.
+*/
+   if (!rc) {
+   csb.flags = CSB_V;
+   /* Make sure update to csb.flags is visible now */
+   smp_mb();
+   rc = copy_to_user(csb_addr, &csb, sizeof(u8));
+   }
+   unuse_mm(window->mm);
+   put_task_struct(tsk);
+
+   /* Success */
+   if (!rc)
+   return;
+
+   pr_debug("Invalid CSB address 0x%p signalling pid(%d)\n",
+   csb_addr, pid_vnr(pid));
+

[PATCH v9 09/13] powerpc/vas: Return credits after handling fault

2020-04-01 Thread Haren Myneni


NX uses credit mechanism to control the number of requests issued on
a specific window at any point of time. Only send windows and fault
window are used credits. When the request is issued on a given window,
a credit is taken. This credit will be returned after that request is
processed. If credits are not available, returns RMA_Busy for send
window and RMA_Reject for fault window.

NX expects OS to return credit for send window after processing fault
CRB. Also credit has to be returned for fault window after handling
the fault.

Signed-off-by: Sukadev Bhattiprolu 
Signed-off-by: Haren Myneni 
---
 arch/powerpc/platforms/powernv/vas-fault.c  |  9 
 arch/powerpc/platforms/powernv/vas-window.c | 36 +
 arch/powerpc/platforms/powernv/vas.h|  1 +
 3 files changed, 46 insertions(+)

diff --git a/arch/powerpc/platforms/powernv/vas-fault.c 
b/arch/powerpc/platforms/powernv/vas-fault.c
index 354577d..b6bec64 100644
--- a/arch/powerpc/platforms/powernv/vas-fault.c
+++ b/arch/powerpc/platforms/powernv/vas-fault.c
@@ -224,6 +224,10 @@ irqreturn_t vas_fault_thread_fn(int irq, void *data)
memcpy(crb, fifo, CRB_SIZE);
entry->stamp.nx.pswid = cpu_to_be32(FIFO_INVALID_ENTRY);
entry->ccw |= cpu_to_be32(CCW0_INVALID);
+   /*
+* Return credit for the fault window.
+*/
+   vas_return_credit(vinst->fault_win, false);
 
pr_devel("VAS[%d] fault_fifo %p, fifo %p, fault_crbs %d\n",
vinst->vas_id, vinst->fault_fifo, fifo,
@@ -249,6 +253,11 @@ irqreturn_t vas_fault_thread_fn(int irq, void *data)
WARN_ON_ONCE(1);
} else {
update_csb(window, crb);
+   /*
+* Return credit for send window after processing
+* fault CRB.
+*/
+   vas_return_credit(window, true);
}
}
 }
diff --git a/arch/powerpc/platforms/powernv/vas-window.c 
b/arch/powerpc/platforms/powernv/vas-window.c
index 382fe25..33aaa7a 100644
--- a/arch/powerpc/platforms/powernv/vas-window.c
+++ b/arch/powerpc/platforms/powernv/vas-window.c
@@ -1318,6 +1318,42 @@ int vas_win_close(struct vas_window *window)
 }
 EXPORT_SYMBOL_GPL(vas_win_close);
 
+/*
+ * Return credit for the given window.
+ * Send windows and fault window uses credit mechanism as follows:
+ *
+ * Send windows:
+ * - The default number of credits available for each send window is
+ *   1024. It means 1024 requests can be issued asynchronously at the
+ *   same time. If the credit is not available, that request will be
+ *   returned with RMA_Busy.
+ * - One credit is taken when NX request is issued.
+ * - This credit is returned after NX processed that request.
+ * - If NX encounters translation error, kernel will return the
+ *   credit on the specific send window after processing the fault CRB.
+ *
+ * Fault window:
+ * - The total number credits available is FIFO_SIZE/CRB_SIZE.
+ *   Means 4MB/128 in the current implementation. If credit is not
+ *   available, RMA_Reject is returned.
+ * - A credit is taken when NX pastes CRB in fault FIFO.
+ * - The kernel with return credit on fault window after reading entry
+ *   from fault FIFO.
+ */
+void vas_return_credit(struct vas_window *window, bool tx)
+{
+   uint64_t val;
+
+   val = 0ULL;
+   if (tx) { /* send window */
+   val = SET_FIELD(VAS_TX_WCRED, val, 1);
+   write_hvwc_reg(window, VREG(TX_WCRED_ADDER), val);
+   } else {
+   val = SET_FIELD(VAS_LRX_WCRED, val, 1);
+   write_hvwc_reg(window, VREG(LRX_WCRED_ADDER), val);
+   }
+}
+
 struct vas_window *vas_pswid_to_window(struct vas_instance *vinst,
uint32_t pswid)
 {
diff --git a/arch/powerpc/platforms/powernv/vas.h 
b/arch/powerpc/platforms/powernv/vas.h
index 0af7912..efdaa28 100644
--- a/arch/powerpc/platforms/powernv/vas.h
+++ b/arch/powerpc/platforms/powernv/vas.h
@@ -436,6 +436,7 @@ struct vas_winctx {
 extern int vas_setup_fault_window(struct vas_instance *vinst);
 extern irqreturn_t vas_fault_thread_fn(int irq, void *data);
 extern irqreturn_t vas_fault_handler(int irq, void *dev_id);
+extern void vas_return_credit(struct vas_window *window, bool tx);
 extern struct vas_window *vas_pswid_to_window(struct vas_instance *vinst,
uint32_t pswid);
 
-- 
1.8.3.1





[PATCH v9 10/13] powerpc/vas: Print CRB and FIFO values

2020-04-01 Thread Haren Myneni


Dump FIFO entries if could not find send window and print CRB
for debugging.

Signed-off-by: Sukadev Bhattiprolu 
Signed-off-by: Haren Myneni 
---
 arch/powerpc/platforms/powernv/vas-fault.c | 41 ++
 1 file changed, 41 insertions(+)

diff --git a/arch/powerpc/platforms/powernv/vas-fault.c 
b/arch/powerpc/platforms/powernv/vas-fault.c
index b6bec64..25db70b 100644
--- a/arch/powerpc/platforms/powernv/vas-fault.c
+++ b/arch/powerpc/platforms/powernv/vas-fault.c
@@ -26,6 +26,28 @@
  */
 #define VAS_FAULT_WIN_FIFO_SIZE(4 << 20)
 
+static void dump_crb(struct coprocessor_request_block *crb)
+{
+   struct data_descriptor_entry *dde;
+   struct nx_fault_stamp *nx;
+
+   dde = &crb->source;
+   pr_devel("SrcDDE: addr 0x%llx, len %d, count %d, idx %d, flags %d\n",
+   be64_to_cpu(dde->address), be32_to_cpu(dde->length),
+   dde->count, dde->index, dde->flags);
+
+   dde = &crb->target;
+   pr_devel("TgtDDE: addr 0x%llx, len %d, count %d, idx %d, flags %d\n",
+   be64_to_cpu(dde->address), be32_to_cpu(dde->length),
+   dde->count, dde->index, dde->flags);
+
+   nx = &crb->stamp.nx;
+   pr_devel("NX Stamp: PSWID 0x%x, FSA 0x%llx, flags 0x%x, FS 0x%x\n",
+   be32_to_cpu(nx->pswid),
+   be64_to_cpu(crb->stamp.nx.fault_storage_addr),
+   nx->flags, nx->fault_status);
+}
+
 /*
  * Update the CSB to indicate a translation error.
  *
@@ -148,6 +170,23 @@ static void update_csb(struct vas_window *window,
pid_vnr(pid), rc);
 }
 
+static void dump_fifo(struct vas_instance *vinst, void *entry)
+{
+   unsigned long *end = vinst->fault_fifo + vinst->fault_fifo_size;
+   unsigned long *fifo = entry;
+   int i;
+
+   pr_err("Fault fifo size %d, Max crbs %d\n", vinst->fault_fifo_size,
+   vinst->fault_fifo_size / CRB_SIZE);
+
+   /* Dump 10 CRB entries or until end of FIFO */
+   pr_err("Fault FIFO Dump:\n");
+   for (i = 0; i < 10*(CRB_SIZE/8) && fifo < end; i += 4, fifo += 4) {
+   pr_err("[%.3d, %p]: 0x%.16lx 0x%.16lx 0x%.16lx 0x%.16lx\n",
+   i, fifo, *fifo, *(fifo+1), *(fifo+2), *(fifo+3));
+   }
+}
+
 /*
  * Process valid CRBs in fault FIFO.
  * NX process user space requests, return credit and update the status
@@ -233,6 +272,7 @@ irqreturn_t vas_fault_thread_fn(int irq, void *data)
vinst->vas_id, vinst->fault_fifo, fifo,
vinst->fault_crbs);
 
+   dump_crb(crb);
window = vas_pswid_to_window(vinst,
be32_to_cpu(crb->stamp.nx.pswid));
 
@@ -245,6 +285,7 @@ irqreturn_t vas_fault_thread_fn(int irq, void *data)
 * But we should not get here.
 * TODO: Disable IRQ.
 */
+   dump_fifo(vinst, (void *)entry);
pr_err("VAS[%d] fault_fifo %p, fifo %p, pswid 0x%x, 
fault_crbs %d bad CRB?\n",
vinst->vas_id, vinst->fault_fifo, fifo,
be32_to_cpu(crb->stamp.nx.pswid),
-- 
1.8.3.1





[PATCH v9 11/13] powerpc/vas: Do not use default credits for receive window

2020-04-01 Thread Haren Myneni


System checkstops if RxFIFO overruns with more requests than the
maximum possible number of CRBs allowed in FIFO at any time. So
max credits value (rxattr.wcreds_max) is set and is passed to
vas_rx_win_open() by the the driver.

Signed-off-by: Haren Myneni 
---
 arch/powerpc/platforms/powernv/vas-window.c | 4 ++--
 arch/powerpc/platforms/powernv/vas.h| 2 --
 2 files changed, 2 insertions(+), 4 deletions(-)

diff --git a/arch/powerpc/platforms/powernv/vas-window.c 
b/arch/powerpc/platforms/powernv/vas-window.c
index 33aaa7a..084e76b 100644
--- a/arch/powerpc/platforms/powernv/vas-window.c
+++ b/arch/powerpc/platforms/powernv/vas-window.c
@@ -772,7 +772,7 @@ static bool rx_win_args_valid(enum vas_cop_type cop,
if (attr->rx_fifo_size > VAS_RX_FIFO_SIZE_MAX)
return false;
 
-   if (attr->wcreds_max > VAS_RX_WCREDS_MAX)
+   if (!attr->wcreds_max)
return false;
 
if (attr->nx_win) {
@@ -877,7 +877,7 @@ struct vas_window *vas_rx_win_open(int vasid, enum 
vas_cop_type cop,
rxwin->nx_win = rxattr->nx_win;
rxwin->user_win = rxattr->user_win;
rxwin->cop = cop;
-   rxwin->wcreds_max = rxattr->wcreds_max ?: VAS_WCREDS_DEFAULT;
+   rxwin->wcreds_max = rxattr->wcreds_max;
 
init_winctx_for_rxwin(rxwin, rxattr, &winctx);
init_winctx_regs(rxwin, &winctx);
diff --git a/arch/powerpc/platforms/powernv/vas.h 
b/arch/powerpc/platforms/powernv/vas.h
index efdaa28..32b5261 100644
--- a/arch/powerpc/platforms/powernv/vas.h
+++ b/arch/powerpc/platforms/powernv/vas.h
@@ -101,11 +101,9 @@
 /*
  * Initial per-process credits.
  * Max send window credits:4K-1 (12-bits in VAS_TX_WCRED)
- * Max receive window credits: 64K-1 (16 bits in VAS_LRX_WCRED)
  *
  * TODO: Needs tuning for per-process credits
  */
-#define VAS_RX_WCREDS_MAX  ((64 << 10) - 1)
 #define VAS_TX_WCREDS_MAX  ((4 << 10) - 1)
 #define VAS_WCREDS_DEFAULT (1 << 10)
 
-- 
1.8.3.1





[PATCH v9 12/13] powerpc/vas: Display process stuck message

2020-04-01 Thread Haren Myneni


Process can not close send window until all requests are processed.
Means wait until window state is not busy and send credits are
returned. Display debug messages in case taking longer to close the
window.

Signed-off-by: Haren Myneni 
---
 arch/powerpc/platforms/powernv/vas-window.c | 28 
 1 file changed, 28 insertions(+)

diff --git a/arch/powerpc/platforms/powernv/vas-window.c 
b/arch/powerpc/platforms/powernv/vas-window.c
index 084e76b..c8644c3 100644
--- a/arch/powerpc/platforms/powernv/vas-window.c
+++ b/arch/powerpc/platforms/powernv/vas-window.c
@@ -1182,6 +1182,7 @@ static void poll_window_credits(struct vas_window *window)
 {
u64 val;
int creds, mode;
+   int count = 0;
 
val = read_hvwc_reg(window, VREG(WINCTL));
if (window->tx_win)
@@ -1200,10 +1201,27 @@ static void poll_window_credits(struct vas_window 
*window)
creds = GET_FIELD(VAS_LRX_WCRED, val);
}
 
+   /*
+* Takes around few milliseconds to complete all pending requests
+* and return credits.
+* TODO: Scan fault FIFO and invalidate CRBs points to this window
+*   and issue CRB Kill to stop all pending requests. Need only
+*   if there is a bug in NX or fault handling in kernel.
+*/
if (creds < window->wcreds_max) {
val = 0;
set_current_state(TASK_UNINTERRUPTIBLE);
schedule_timeout(msecs_to_jiffies(10));
+   count++;
+   /*
+* Process can not close send window until all credits are
+* returned.
+*/
+   if (!(count % 1))
+   pr_debug("VAS: pid %d stuck. Waiting for credits 
returned for Window(%d). creds %d, Retries %d\n",
+   vas_window_pid(window), window->winid,
+   creds, count);
+
goto retry;
}
 }
@@ -1217,6 +1235,7 @@ static void poll_window_busy_state(struct vas_window 
*window)
 {
int busy;
u64 val;
+   int count = 0;
 
 retry:
val = read_hvwc_reg(window, VREG(WIN_STATUS));
@@ -1225,6 +1244,15 @@ static void poll_window_busy_state(struct vas_window 
*window)
val = 0;
set_current_state(TASK_UNINTERRUPTIBLE);
schedule_timeout(msecs_to_jiffies(5));
+   count++;
+   /*
+* Takes around few milliseconds to process all pending
+* requests.
+*/
+   if (!(count % 1))
+   pr_debug("VAS: pid %d stuck. Window (ID=%d) is in busy 
state. Retries %d\n",
+   vas_window_pid(window), window->winid, count);
+
goto retry;
}
 }
-- 
1.8.3.1





[PATCH v9 13/13] powerpc/vas: Free send window in VAS instance after credits returned

2020-04-01 Thread Haren Myneni


NX may be processing requests while trying to close window. Wait until
all credits are returned and then free send window from VAS instance.

Signed-off-by: Haren Myneni 
---
 arch/powerpc/platforms/powernv/vas-window.c | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/arch/powerpc/platforms/powernv/vas-window.c 
b/arch/powerpc/platforms/powernv/vas-window.c
index c8644c3..be900ad 100644
--- a/arch/powerpc/platforms/powernv/vas-window.c
+++ b/arch/powerpc/platforms/powernv/vas-window.c
@@ -1317,14 +1317,14 @@ int vas_win_close(struct vas_window *window)
 
unmap_paste_region(window);
 
-   clear_vinst_win(window);
-
poll_window_busy_state(window);
 
unpin_close_window(window);
 
poll_window_credits(window);
 
+   clear_vinst_win(window);
+
poll_window_castout(window);
 
/* if send window, drop reference to matching receive window */
-- 
1.8.3.1





Re: [PATCH v2] powerpc/pseries: Fix MCE handling on pseries

2020-04-01 Thread Ganesh

On 3/20/20 4:31 PM, Ganesh Goudar wrote:


MCE handling on pSeries platform fails as recent rework to use common
code for pSeries and PowerNV in machine check error handling tries to
access per-cpu variables in realmode. The per-cpu variables may be
outside the RMO region on pSeries platform and needs translation to be
enabled for access. Just moving these per-cpu variable into RMO region
did'nt help because we queue some work to workqueues in real mode, which
again tries to touch per-cpu variables. Also fwnmi_release_errinfo()
cannot be called when translation is not enabled.

This patch fixes this by enabling translation in the exception handler
when all required real mode handling is done. This change only affects
the pSeries platform.

Without this fix below kernel crash is seen on injecting
SLB multihit:

BUG: Unable to handle kernel data access on read at 0xc0027b205950
Faulting instruction address: 0xc003b7e0
Oops: Kernel access of bad area, sig: 11 [#1]
LE PAGE_SIZE=64K MMU=Hash SMP NR_CPUS=2048 NUMA pSeries
Modules linked in: mcetest_slb(OE+) af_packet(E) xt_tcpudp(E) ip6t_rpfilter(E) 
ip6t_REJECT(E) ipt_REJECT(E) xt_conntrack(E) ip_set(E) nfnetlink(E) 
ebtable_nat(E) ebtable_broute(E) ip6table_nat(E) ip6table_mangle(E) 
ip6table_raw(E) ip6table_security(E) iptable_nat(E) nf_nat(E) nf_conntrack(E) 
nf_defrag_ipv6(E) nf_defrag_ipv4(E) iptable_mangle(E) iptable_raw(E) 
iptable_security(E) ebtable_filter(E) ebtables(E) ip6table_filter(E) 
ip6_tables(E) iptable_filter(E) ip_tables(E) x_tables(E) xfs(E) ibmveth(E) 
vmx_crypto(E) gf128mul(E) uio_pdrv_genirq(E) uio(E) crct10dif_vpmsum(E) 
rtc_generic(E) btrfs(E) libcrc32c(E) xor(E) zstd_decompress(E) zstd_compress(E) 
raid6_pq(E) sr_mod(E) sd_mod(E) cdrom(E) ibmvscsi(E) scsi_transport_srp(E) 
crc32c_vpmsum(E) dm_mod(E) sg(E) scsi_mod(E)
CPU: 34 PID: 8154 Comm: insmod Kdump: loaded Tainted: G OE 5.5.0-mahesh #1
NIP: c003b7e0 LR: c00f2218 CTR: 
REGS: c7dcb960 TRAP: 0300 Tainted: G OE (5.5.0-mahesh)
MSR: 80001003  CR: 28002428 XER: 2004
CFAR: c00f2214 DAR: c0027b205950 DSISR: 4000 IRQMASK: 0
GPR00: c00f2218 c7dcbbf0 c1544800 c7dcbd70
GPR04: 0001 c7dcbc98 c00800d00258 c008011c
GPR08:  00030003 c1035950 0348
GPR12: 00027a1d c7f9c000 0558 
GPR16: 0540 c0080111 c00801110540 
GPR20: c022af10 c0025480fd70 c0080128 c0004bfbb300
GPR24: c1442330 c008080d c0080800 4009287a77000510
GPR28:  0002 c1033d30 0001
NIP [c003b7e0] save_mce_event+0x30/0x240
LR [c00f2218] pseries_machine_check_realmode+0x2c8/0x4f0
Call Trace:
Instruction dump:
3c4c0151 38429050 7c0802a6 6000 fbc1fff0 fbe1fff8 f821ffd1 3d42ffaf
3fc2ffaf e98d0030 394a1150 3bdef530 <7d6a62aa> 1d2b0048 2f8b0063 380b0001
---[ end trace 46fd63f36bbdd940 ]---

Fixes: 9ca766f9891d ("powerpc/64s/pseries: machine check convert to use common event 
code")
Reviewed-by: Mahesh Salgaonkar 
Reviewed-by: Nicholas Piggin 
Signed-off-by: Ganesh Goudar 


Hi mpe, Do you have any comments on this patch ?



[PATCH v2] sched/core: fix illegal RCU from offline CPUs

2020-04-01 Thread Qian Cai
From: Peter Zijlstra 

In the CPU-offline process, it calls mmdrop() after idle entry and the
subsequent call to cpuhp_report_idle_dead(). Once execution passes the
call to rcu_report_dead(), RCU is ignoring the CPU, which results in
lockdep complaining when mmdrop() uses RCU from either memcg or
debugobjects below.

Fix it by cleaning up the active_mm state from BP instead. Every arch
which has CONFIG_HOTPLUG_CPU should have already called idle_task_exit()
from AP. The only exception is parisc because it switches them to
&init_mm unconditionally (see smp_boot_one_cpu() and smp_cpu_init()),
but the patch will still work there because it calls mmgrab(&init_mm) in
smp_cpu_init() and then should call mmdrop(&init_mm) in finish_cpu().

WARNING: suspicious RCU usage
-
kernel/workqueue.c:710 RCU or wq_pool_mutex should be held!

other info that might help us debug this:

RCU used illegally from offline CPU!
Call Trace:
 dump_stack+0xf4/0x164 (unreliable)
 lockdep_rcu_suspicious+0x140/0x164
 get_work_pool+0x110/0x150
 __queue_work+0x1bc/0xca0
 queue_work_on+0x114/0x120
 css_release+0x9c/0xc0
 percpu_ref_put_many+0x204/0x230
 free_pcp_prepare+0x264/0x570
 free_unref_page+0x38/0xf0
 __mmdrop+0x21c/0x2c0
 idle_task_exit+0x170/0x1b0
 pnv_smp_cpu_kill_self+0x38/0x2e0
 cpu_die+0x48/0x64
 arch_cpu_idle_dead+0x30/0x50
 do_idle+0x2f4/0x470
 cpu_startup_entry+0x38/0x40
 start_secondary+0x7a8/0xa80
 start_secondary_resume+0x10/0x14


Signed-off-by: Qian Cai 
---
 arch/powerpc/platforms/powernv/smp.c |  1 -
 include/linux/sched/mm.h |  2 ++
 kernel/cpu.c | 18 +-
 kernel/sched/core.c  |  5 +++--
 4 files changed, 22 insertions(+), 4 deletions(-)

diff --git a/arch/powerpc/platforms/powernv/smp.c 
b/arch/powerpc/platforms/powernv/smp.c
index 13e251699346..b2ba3e95bda7 100644
--- a/arch/powerpc/platforms/powernv/smp.c
+++ b/arch/powerpc/platforms/powernv/smp.c
@@ -167,7 +167,6 @@ static void pnv_smp_cpu_kill_self(void)
/* Standard hot unplug procedure */
 
idle_task_exit();
-   current->active_mm = NULL; /* for sanity */
cpu = smp_processor_id();
DBG("CPU%d offline\n", cpu);
generic_set_cpu_dead(cpu);
diff --git a/include/linux/sched/mm.h b/include/linux/sched/mm.h
index c49257a3b510..a132d875d351 100644
--- a/include/linux/sched/mm.h
+++ b/include/linux/sched/mm.h
@@ -49,6 +49,8 @@ static inline void mmdrop(struct mm_struct *mm)
__mmdrop(mm);
 }
 
+void mmdrop(struct mm_struct *mm);
+
 /*
  * This has to be called after a get_task_mm()/mmget_not_zero()
  * followed by taking the mmap_sem for writing before modifying the
diff --git a/kernel/cpu.c b/kernel/cpu.c
index 2371292f30b0..244d30544377 100644
--- a/kernel/cpu.c
+++ b/kernel/cpu.c
@@ -3,6 +3,7 @@
  *
  * This code is licenced under the GPL.
  */
+#include 
 #include 
 #include 
 #include 
@@ -564,6 +565,21 @@ static int bringup_cpu(unsigned int cpu)
return bringup_wait_for_ap(cpu);
 }
 
+static int finish_cpu(unsigned int cpu)
+{
+   struct task_struct *idle = idle_thread_get(cpu);
+   struct mm_struct *mm = idle->active_mm;
+
+   /*
+* idle_task_exit() will have switched to &init_mm, now
+* clean up any remaining active_mm state.
+*/
+   if (mm != &init_mm)
+   idle->active_mm = &init_mm;
+   mmdrop(mm);
+   return 0;
+}
+
 /*
  * Hotplug state machine related functions
  */
@@ -1549,7 +1565,7 @@ static struct cpuhp_step cpuhp_hp_states[] = {
[CPUHP_BRINGUP_CPU] = {
.name   = "cpu:bringup",
.startup.single = bringup_cpu,
-   .teardown.single= NULL,
+   .teardown.single= finish_cpu,
.cant_stop  = true,
},
/* Final state before CPU kills itself */
diff --git a/kernel/sched/core.c b/kernel/sched/core.c
index a2694ba82874..8787958339d5 100644
--- a/kernel/sched/core.c
+++ b/kernel/sched/core.c
@@ -6200,13 +6200,14 @@ void idle_task_exit(void)
struct mm_struct *mm = current->active_mm;
 
BUG_ON(cpu_online(smp_processor_id()));
+   BUG_ON(current != this_rq()->idle);
 
if (mm != &init_mm) {
switch_mm(mm, &init_mm, current);
-   current->active_mm = &init_mm;
finish_arch_post_lock_switch();
}
-   mmdrop(mm);
+
+   /* finish_cpu(), as ran on the BP, will clean up the active_mm state */
 }
 
 /*
-- 
2.21.0 (Apple Git-122.2)



[PATCH v5 0/9] crypto/nx: Enable GZIP engine and provide userpace API

2020-04-01 Thread Haren Myneni


Power9 processor supports Virtual Accelerator Switchboard (VAS) which
allows kernel and userspace to send compression requests to Nest
Accelerator (NX) directly. The NX unit comprises of 2 842 compression
engines and 1 GZIP engine. Linux kernel already has 842 compression
support on kernel. This patch series adds GZIP compression support
from user space. The GZIP Compression engine implements the ZLIB and
GZIP compression algorithms. No plans of adding NX-GZIP compression
support in kernel right now.

Applications can send requests to NX directly with COPY/PASTE
instructions. But kernel has to establish channel / window on NX-GZIP
device for the userspace. So userspace access to the GZIP engine is
provided through /dev/crypto/nx-gzip device with several operations.

An application must open the this device to obtain a file descriptor (fd).
Using the fd, application should issue the VAS_TX_WIN_OPEN ioctl to
establish a connection to the engine. Once window is opened, should use
mmap() system call to map the hardware address of engine's request queue
into the application's virtual address space. Then user space forms the
request as co-processor Request Block (CRB) and paste this CRB on the
mapped HW address using COPY/PASTE instructions. Application can poll
on status flags (part of CRB) with timeout for request completion.

For VAS_TX_WIN_OPEN ioctl, if user space passes vas_id = -1 (struct
vas_tx_win_open_attr), kernel determines the VAS instance on the
corresponding chip based on the CPU on which the process is executing.
Otherwise, the specified VAS instance is used if application passes the
proper VAS instance (vas_id listed in /proc/device-tree/vas@*/ibm,vas_id).

Process can open multiple windows with different FDs or can send several
requests to NX on the same window at the same time.

A userspace library libnxz is available:
https://github.com/abalib/power-gzip

Applications that use inflate/deflate calls can link with libNXz and use
NX GZIP compression without any modification.

Tested the available 842 compression on power8 and power9 system to make
sure no regression and tested GZIP compression on power9 with tests
available in the above link.

Thanks to Bulent Abali for nxz library and tests development.

Changelog:

V2:
  - Move user space API code to powerpc as suggested. Also this API
can be extended to any other coprocessor type that VAS can support
in future. Example: Fast thread wakeup feature from VAS
  - Rebased to 5.6-rc3

V3:
  - Fix sparse warnings (patches 3&6)

V4:
  - Remove unused coproc_instid and add only window address in
fp->private_data.
  - Add NX User's manual and Copy/paste links in VAS API documentation
in patch and other changes as Daniel Axtens suggested

V5:
  - Added "NX Fault handling" section in VAS API documentation as Nick
suggested.
  - Dcoumentation: mmap size should be PAGE_SIZE as Daniel Axtens pointed.

Haren Myneni (9):
  powerpc/vas: Initialize window attributes for GZIP coprocessor type
  powerpc/vas: Define VAS_TX_WIN_OPEN ioctl API
  powerpc/vas: Add VAS user space API
  crypto/nx: Initialize coproc entry with kzalloc
  crypto/nx: Rename nx-842-powernv file name to nx-common-powernv
  crypto/nx: Make enable code generic to add new GZIP compression type
  crypto/nx: Enable and setup GZIP compresstion type
  crypto/nx: Remove 'pid' in vas_tx_win_attr struct
  Documentation/powerpc: VAS API

 Documentation/powerpc/index.rst|1 +
 Documentation/powerpc/vas-api.rst  |  292 +
 Documentation/userspace-api/ioctl/ioctl-number.rst |1 +
 arch/powerpc/include/asm/vas.h |   12 +-
 arch/powerpc/include/uapi/asm/vas-api.h|   22 +
 arch/powerpc/platforms/powernv/Makefile|2 +-
 arch/powerpc/platforms/powernv/vas-api.c   |  257 +
 arch/powerpc/platforms/powernv/vas-window.c|   23 +-
 arch/powerpc/platforms/powernv/vas.h   |2 +
 drivers/crypto/nx/Makefile |2 +-
 drivers/crypto/nx/nx-842-powernv.c | 1062 --
 drivers/crypto/nx/nx-common-powernv.c  | 1133 
 12 files changed, 1736 insertions(+), 1073 deletions(-)
 create mode 100644 Documentation/powerpc/vas-api.rst
 create mode 100644 arch/powerpc/include/uapi/asm/vas-api.h
 create mode 100644 arch/powerpc/platforms/powernv/vas-api.c
 delete mode 100644 drivers/crypto/nx/nx-842-powernv.c
 create mode 100644 drivers/crypto/nx/nx-common-powernv.c

-- 
1.8.3.1





[PATCH v5 1/9] powerpc/vas: Initialize window attributes for GZIP coprocessor type

2020-04-01 Thread Haren Myneni


Initialize send and receive window attributes for GZIP high and
normal priority types.

Signed-off-by: Haren Myneni 
---
 arch/powerpc/platforms/powernv/vas-window.c | 17 -
 1 file changed, 12 insertions(+), 5 deletions(-)

diff --git a/arch/powerpc/platforms/powernv/vas-window.c 
b/arch/powerpc/platforms/powernv/vas-window.c
index be900ad..d239c4b 100644
--- a/arch/powerpc/platforms/powernv/vas-window.c
+++ b/arch/powerpc/platforms/powernv/vas-window.c
@@ -817,7 +817,8 @@ void vas_init_rx_win_attr(struct vas_rx_win_attr *rxattr, 
enum vas_cop_type cop)
 {
memset(rxattr, 0, sizeof(*rxattr));
 
-   if (cop == VAS_COP_TYPE_842 || cop == VAS_COP_TYPE_842_HIPRI) {
+   if (cop == VAS_COP_TYPE_842 || cop == VAS_COP_TYPE_842_HIPRI ||
+   cop == VAS_COP_TYPE_GZIP || cop == VAS_COP_TYPE_GZIP_HIPRI) {
rxattr->pin_win = true;
rxattr->nx_win = true;
rxattr->fault_win = false;
@@ -892,7 +893,8 @@ void vas_init_tx_win_attr(struct vas_tx_win_attr *txattr, 
enum vas_cop_type cop)
 {
memset(txattr, 0, sizeof(*txattr));
 
-   if (cop == VAS_COP_TYPE_842 || cop == VAS_COP_TYPE_842_HIPRI) {
+   if (cop == VAS_COP_TYPE_842 || cop == VAS_COP_TYPE_842_HIPRI ||
+   cop == VAS_COP_TYPE_GZIP || cop == VAS_COP_TYPE_GZIP_HIPRI) {
txattr->rej_no_credit = false;
txattr->rx_wcred_mode = true;
txattr->tx_wcred_mode = true;
@@ -976,9 +978,14 @@ static bool tx_win_args_valid(enum vas_cop_type cop,
if (attr->wcreds_max > VAS_TX_WCREDS_MAX)
return false;
 
-   if (attr->user_win &&
-   (cop != VAS_COP_TYPE_FTW || attr->rsvd_txbuf_count))
-   return false;
+   if (attr->user_win) {
+   if (attr->rsvd_txbuf_count)
+   return false;
+
+   if (cop != VAS_COP_TYPE_FTW && cop != VAS_COP_TYPE_GZIP &&
+   cop != VAS_COP_TYPE_GZIP_HIPRI)
+   return false;
+   }
 
return true;
 }
-- 
1.8.3.1





[PATCH v5 2/9] powerpc/vas: Define VAS_TX_WIN_OPEN ioctl API

2020-04-01 Thread Haren Myneni


Define the VAS_TX_WIN_OPEN ioctl interface for NX GZIP access
from user space. This interface is used to open GZIP send window and
mmap region which can be used by user space to send requests to NX
directly with copy/paste instructions.

Signed-off-by: Haren Myneni 
---
 Documentation/userspace-api/ioctl/ioctl-number.rst |  1 +
 arch/powerpc/include/uapi/asm/vas-api.h| 22 ++
 2 files changed, 23 insertions(+)
 create mode 100644 arch/powerpc/include/uapi/asm/vas-api.h

diff --git a/Documentation/userspace-api/ioctl/ioctl-number.rst 
b/Documentation/userspace-api/ioctl/ioctl-number.rst
index f759eda..f18accb 100644
--- a/Documentation/userspace-api/ioctl/ioctl-number.rst
+++ b/Documentation/userspace-api/ioctl/ioctl-number.rst
@@ -286,6 +286,7 @@ Code  Seq#Include File  
 Comments
 'v'   00-1F  linux/fs.h  conflict!
 'v'   00-0F  linux/sonypi.h  conflict!
 'v'   00-0F  media/v4l2-subdev.h conflict!
+'v'   20-27  arch/powerpc/include/uapi/asm/vas-api.hVAS API
 'v'   C0-FF  linux/meye.hconflict!
 'w'   allCERN SCI 
driver
 'y'   00-1F  packet 
based user level communications
diff --git a/arch/powerpc/include/uapi/asm/vas-api.h 
b/arch/powerpc/include/uapi/asm/vas-api.h
new file mode 100644
index 000..fe95d67
--- /dev/null
+++ b/arch/powerpc/include/uapi/asm/vas-api.h
@@ -0,0 +1,22 @@
+/* SPDX-License-Identifier: GPL-2.0+ WITH Linux-syscall-note */
+/*
+ * Copyright 2019 IBM Corp.
+ */
+
+#ifndef _UAPI_MISC_VAS_H
+#define _UAPI_MISC_VAS_H
+
+#include 
+
+#define VAS_MAGIC  'v'
+#define VAS_TX_WIN_OPEN_IOW(VAS_MAGIC, 0x20, struct 
vas_tx_win_open_attr)
+
+struct vas_tx_win_open_attr {
+   __u32   version;
+   __s16   vas_id; /* specific instance of vas or -1 for default */
+   __u16   reserved1;
+   __u64   flags;  /* Future use */
+   __u64   reserved2[6];
+};
+
+#endif /* _UAPI_MISC_VAS_H */
-- 
1.8.3.1





[PATCH v5 3/9] powerpc/vas: Add VAS user space API

2020-04-01 Thread Haren Myneni


On power9, user space can send GZIP compression requests directly to NX
once kernel establishes NX channel / window with VAS. This patch provides
user space API which allows user space to establish channel using open
VAS_TX_WIN_OPEN ioctl, mmap and close operations.

Each window corresponds to file descriptor and application can open
multiple windows. After the window is opened, VAS_TX_WIN_OPEN icoctl to
open a window on specific VAS instance, mmap() system call to map
the hardware address of engine's request queue into the application's
virtual address space.

Then the application can then submit one or more requests to the the
engine by using the copy/paste instructions and pasting the CRBs to
the virtual address (aka paste_address) returned by mmap().

Only NX GZIP coprocessor type is supported right now and allow GZIP
engine access via /dev/crypto/nx-gzip device node.

Signed-off-by: Sukadev Bhattiprolu 
Signed-off-by: Haren Myneni 
---
 arch/powerpc/include/asm/vas.h  |  11 ++
 arch/powerpc/platforms/powernv/Makefile |   2 +-
 arch/powerpc/platforms/powernv/vas-api.c| 257 
 arch/powerpc/platforms/powernv/vas-window.c |   6 +-
 arch/powerpc/platforms/powernv/vas.h|   2 +
 5 files changed, 274 insertions(+), 4 deletions(-)
 create mode 100644 arch/powerpc/platforms/powernv/vas-api.c

diff --git a/arch/powerpc/include/asm/vas.h b/arch/powerpc/include/asm/vas.h
index f93e6b0..e064953 100644
--- a/arch/powerpc/include/asm/vas.h
+++ b/arch/powerpc/include/asm/vas.h
@@ -163,4 +163,15 @@ struct vas_window *vas_tx_win_open(int vasid, enum 
vas_cop_type cop,
  */
 int vas_paste_crb(struct vas_window *win, int offset, bool re);
 
+/*
+ * Register / unregister coprocessor type to VAS API which will be exported
+ * to user space. Applications can use this API to open / close window
+ * which can be used to send / receive requests directly to cooprcessor.
+ *
+ * Only NX GZIP coprocessor type is supported now, but this API can be
+ * used for others in future.
+ */
+int vas_register_coproc_api(struct module *mod);
+void vas_unregister_coproc_api(void);
+
 #endif /* __ASM_POWERPC_VAS_H */
diff --git a/arch/powerpc/platforms/powernv/Makefile 
b/arch/powerpc/platforms/powernv/Makefile
index 395789f..fe3f0fb 100644
--- a/arch/powerpc/platforms/powernv/Makefile
+++ b/arch/powerpc/platforms/powernv/Makefile
@@ -17,7 +17,7 @@ obj-$(CONFIG_MEMORY_FAILURE)  += opal-memory-errors.o
 obj-$(CONFIG_OPAL_PRD) += opal-prd.o
 obj-$(CONFIG_PERF_EVENTS) += opal-imc.o
 obj-$(CONFIG_PPC_MEMTRACE) += memtrace.o
-obj-$(CONFIG_PPC_VAS)  += vas.o vas-window.o vas-debug.o vas-fault.o
+obj-$(CONFIG_PPC_VAS)  += vas.o vas-window.o vas-debug.o vas-fault.o vas-api.o
 obj-$(CONFIG_OCXL_BASE)+= ocxl.o
 obj-$(CONFIG_SCOM_DEBUGFS) += opal-xscom.o
 obj-$(CONFIG_PPC_SECURE_BOOT) += opal-secvar.o
diff --git a/arch/powerpc/platforms/powernv/vas-api.c 
b/arch/powerpc/platforms/powernv/vas-api.c
new file mode 100644
index 000..7d049af
--- /dev/null
+++ b/arch/powerpc/platforms/powernv/vas-api.c
@@ -0,0 +1,257 @@
+// SPDX-License-Identifier: GPL-2.0-or-later
+/*
+ * VAS user space API for its accelerators (Only NX-GZIP is supported now)
+ * Copyright (C) 2019 Haren Myneni, IBM Corp
+ */
+
+#include 
+#include 
+#include 
+#include 
+#include 
+#include 
+#include 
+#include 
+#include "vas.h"
+
+/*
+ * The driver creates the device node that can be used as follows:
+ * For NX-GZIP
+ *
+ * fd = open("/dev/crypto/nx-gzip", O_RDWR);
+ * rc = ioctl(fd, VAS_TX_WIN_OPEN, &attr);
+ * paste_addr = mmap(NULL, PAGE_SIZE, prot, MAP_SHARED, fd, 0ULL).
+ * vas_copy(&crb, 0, 1);
+ * vas_paste(paste_addr, 0, 1);
+ * close(fd) or exit process to close window.
+ *
+ * where "vas_copy" and "vas_paste" are defined in copy-paste.h.
+ * copy/paste returns to the user space directly. So refer NX hardware
+ * documententation for exact copy/paste usage and completion / error
+ * conditions.
+ */
+
+static char*coproc_dev_name = "nx-gzip";
+
+/*
+ * Wrapper object for the nx-gzip device - there is just one instance of
+ * this node for the whole system.
+ */
+static struct coproc_dev {
+   struct cdev cdev;
+   struct device *device;
+   char *name;
+   dev_t devt;
+   struct class *class;
+} coproc_device;
+
+static char *coproc_devnode(struct device *dev, umode_t *mode)
+{
+   return kasprintf(GFP_KERNEL, "crypto/%s", dev_name(dev));
+}
+
+static int coproc_open(struct inode *inode, struct file *fp)
+{
+   /*
+* vas_window is allocated and assigned to fp->private_data
+* in ioctl. Nothing to do here for NX GZIP.
+*/
+   return 0;
+}
+
+static int coproc_ioc_tx_win_open(struct file *fp, unsigned long arg)
+{
+   void __user *uptr = (void __user *)arg;
+   struct vas_tx_win_attr txattr = {};
+   struct vas_tx_win_open_attr uattr;
+   struct vas_window *txwin;
+   int rc, vasid;
+
+   /*
+* O

[PATCH v5 4/9] crypto/nx: Initialize coproc entry with kzalloc

2020-04-01 Thread Haren Myneni


coproc entry is initialized during NX probe on power9, but not on P8.
nx842_delete_coprocs() is used for both and frees receive window if it
is allocated. Getting crash for rmmod on P8 since coproc->vas.rxwin
is not initialized.

This patch replaces kmalloc with kzalloc in nx842_powernv_probe()

Signed-off-by: Haren Myneni 
Acked-by: Herbert Xu 
---
 drivers/crypto/nx/nx-842-powernv.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/drivers/crypto/nx/nx-842-powernv.c 
b/drivers/crypto/nx/nx-842-powernv.c
index c037a24..8e63326 100644
--- a/drivers/crypto/nx/nx-842-powernv.c
+++ b/drivers/crypto/nx/nx-842-powernv.c
@@ -922,7 +922,7 @@ static int __init nx842_powernv_probe(struct device_node 
*dn)
return -EINVAL;
}
 
-   coproc = kmalloc(sizeof(*coproc), GFP_KERNEL);
+   coproc = kzalloc(sizeof(*coproc), GFP_KERNEL);
if (!coproc)
return -ENOMEM;
 
-- 
1.8.3.1





[PATCH v5 5/9] crypto/nx: Rename nx-842-powernv file name to nx-common-powernv

2020-04-01 Thread Haren Myneni


Rename nx-842-powernv.c to nx-common-powernv.c to add code for setup
and enable new GZIP compression type. The actual functionality is not
changed in this patch.

Signed-off-by: Haren Myneni 
Acked-by: Herbert Xu 
---
 drivers/crypto/nx/Makefile|2 +-
 drivers/crypto/nx/nx-842-powernv.c| 1062 -
 drivers/crypto/nx/nx-common-powernv.c | 1062 +
 3 files changed, 1063 insertions(+), 1063 deletions(-)
 delete mode 100644 drivers/crypto/nx/nx-842-powernv.c
 create mode 100644 drivers/crypto/nx/nx-common-powernv.c

diff --git a/drivers/crypto/nx/Makefile b/drivers/crypto/nx/Makefile
index 015155d..bc89a20 100644
--- a/drivers/crypto/nx/Makefile
+++ b/drivers/crypto/nx/Makefile
@@ -15,4 +15,4 @@ obj-$(CONFIG_CRYPTO_DEV_NX_COMPRESS_PSERIES) += 
nx-compress-pseries.o nx-compres
 obj-$(CONFIG_CRYPTO_DEV_NX_COMPRESS_POWERNV) += nx-compress-powernv.o 
nx-compress.o
 nx-compress-objs := nx-842.o
 nx-compress-pseries-objs := nx-842-pseries.o
-nx-compress-powernv-objs := nx-842-powernv.o
+nx-compress-powernv-objs := nx-common-powernv.o
diff --git a/drivers/crypto/nx/nx-842-powernv.c 
b/drivers/crypto/nx/nx-842-powernv.c
deleted file mode 100644
index 8e63326..000
--- a/drivers/crypto/nx/nx-842-powernv.c
+++ /dev/null
@@ -1,1062 +0,0 @@
-// SPDX-License-Identifier: GPL-2.0-or-later
-/*
- * Driver for IBM PowerNV 842 compression accelerator
- *
- * Copyright (C) 2015 Dan Streetman, IBM Corp
- */
-
-#define pr_fmt(fmt) KBUILD_MODNAME ": " fmt
-
-#include "nx-842.h"
-
-#include 
-
-#include 
-#include 
-#include 
-#include 
-#include 
-#include 
-
-MODULE_LICENSE("GPL");
-MODULE_AUTHOR("Dan Streetman ");
-MODULE_DESCRIPTION("842 H/W Compression driver for IBM PowerNV processors");
-MODULE_ALIAS_CRYPTO("842");
-MODULE_ALIAS_CRYPTO("842-nx");
-
-#define WORKMEM_ALIGN  (CRB_ALIGN)
-#define CSB_WAIT_MAX   (5000) /* ms */
-#define VAS_RETRIES(10)
-
-struct nx842_workmem {
-   /* Below fields must be properly aligned */
-   struct coprocessor_request_block crb; /* CRB_ALIGN align */
-   struct data_descriptor_entry ddl_in[DDL_LEN_MAX]; /* DDE_ALIGN align */
-   struct data_descriptor_entry ddl_out[DDL_LEN_MAX]; /* DDE_ALIGN align */
-   /* Above fields must be properly aligned */
-
-   ktime_t start;
-
-   char padding[WORKMEM_ALIGN]; /* unused, to allow alignment */
-} __packed __aligned(WORKMEM_ALIGN);
-
-struct nx842_coproc {
-   unsigned int chip_id;
-   unsigned int ct;
-   unsigned int ci;/* Coprocessor instance, used with icswx */
-   struct {
-   struct vas_window *rxwin;
-   int id;
-   } vas;
-   struct list_head list;
-};
-
-/*
- * Send the request to NX engine on the chip for the corresponding CPU
- * where the process is executing. Use with VAS function.
- */
-static DEFINE_PER_CPU(struct vas_window *, cpu_txwin);
-
-/* no cpu hotplug on powernv, so this list never changes after init */
-static LIST_HEAD(nx842_coprocs);
-static unsigned int nx842_ct;  /* used in icswx function */
-
-static int (*nx842_powernv_exec)(const unsigned char *in,
-   unsigned int inlen, unsigned char *out,
-   unsigned int *outlenp, void *workmem, int fc);
-
-/**
- * setup_indirect_dde - Setup an indirect DDE
- *
- * The DDE is setup with the the DDE count, byte count, and address of
- * first direct DDE in the list.
- */
-static void setup_indirect_dde(struct data_descriptor_entry *dde,
-  struct data_descriptor_entry *ddl,
-  unsigned int dde_count, unsigned int byte_count)
-{
-   dde->flags = 0;
-   dde->count = dde_count;
-   dde->index = 0;
-   dde->length = cpu_to_be32(byte_count);
-   dde->address = cpu_to_be64(nx842_get_pa(ddl));
-}
-
-/**
- * setup_direct_dde - Setup single DDE from buffer
- *
- * The DDE is setup with the buffer and length.  The buffer must be properly
- * aligned.  The used length is returned.
- * Returns:
- *   NSuccessfully set up DDE with N bytes
- */
-static unsigned int setup_direct_dde(struct data_descriptor_entry *dde,
-unsigned long pa, unsigned int len)
-{
-   unsigned int l = min_t(unsigned int, len, LEN_ON_PAGE(pa));
-
-   dde->flags = 0;
-   dde->count = 0;
-   dde->index = 0;
-   dde->length = cpu_to_be32(l);
-   dde->address = cpu_to_be64(pa);
-
-   return l;
-}
-
-/**
- * setup_ddl - Setup DDL from buffer
- *
- * Returns:
- *   0 Successfully set up DDL
- */
-static int setup_ddl(struct data_descriptor_entry *dde,
-struct data_descriptor_entry *ddl,
-unsigned char *buf, unsigned int len,
-bool in)
-{
-   unsigned long pa = nx842_get_pa(buf);
-   int i, ret, total_len = len;
-
-   if (!IS_ALIGNED(pa, DDE_BUFFER_ALIGN)) {
-   pr_debug("%s buf

[PATCH 6/9] crypto/nx: Make enable code generic to add new GZIP compression type

2020-04-01 Thread Haren Myneni


Make setup and enable code generic to support new GZIP compression type.
Changed nx842 reference to nx and moved some code to new functions.
Functionality is not changed except sparse warning fix - setting NULL
instead of 0 for per_cpu send window in nx_delete_coprocs().

Signed-off-by: Haren Myneni 
Acked-by: Herbert Xu 
---
 drivers/crypto/nx/nx-common-powernv.c | 161 +-
 1 file changed, 101 insertions(+), 60 deletions(-)

diff --git a/drivers/crypto/nx/nx-common-powernv.c 
b/drivers/crypto/nx/nx-common-powernv.c
index f42881f..82dfa60 100644
--- a/drivers/crypto/nx/nx-common-powernv.c
+++ b/drivers/crypto/nx/nx-common-powernv.c
@@ -40,9 +40,9 @@ struct nx842_workmem {
char padding[WORKMEM_ALIGN]; /* unused, to allow alignment */
 } __packed __aligned(WORKMEM_ALIGN);
 
-struct nx842_coproc {
+struct nx_coproc {
unsigned int chip_id;
-   unsigned int ct;
+   unsigned int ct;/* Can be 842 or GZIP high/normal*/
unsigned int ci;/* Coprocessor instance, used with icswx */
struct {
struct vas_window *rxwin;
@@ -58,9 +58,15 @@ struct nx842_coproc {
 static DEFINE_PER_CPU(struct vas_window *, cpu_txwin);
 
 /* no cpu hotplug on powernv, so this list never changes after init */
-static LIST_HEAD(nx842_coprocs);
+static LIST_HEAD(nx_coprocs);
 static unsigned int nx842_ct;  /* used in icswx function */
 
+/*
+ * Using same values as in skiboot or coprocessor type representing
+ * in NX workbook.
+ */
+#define NX_CT_842  (3)
+
 static int (*nx842_powernv_exec)(const unsigned char *in,
unsigned int inlen, unsigned char *out,
unsigned int *outlenp, void *workmem, int fc);
@@ -666,15 +672,15 @@ static int nx842_powernv_decompress(const unsigned char 
*in, unsigned int inlen,
  wmem, CCW_FC_842_DECOMP_CRC);
 }
 
-static inline void nx842_add_coprocs_list(struct nx842_coproc *coproc,
+static inline void nx_add_coprocs_list(struct nx_coproc *coproc,
int chipid)
 {
coproc->chip_id = chipid;
INIT_LIST_HEAD(&coproc->list);
-   list_add(&coproc->list, &nx842_coprocs);
+   list_add(&coproc->list, &nx_coprocs);
 }
 
-static struct vas_window *nx842_alloc_txwin(struct nx842_coproc *coproc)
+static struct vas_window *nx_alloc_txwin(struct nx_coproc *coproc)
 {
struct vas_window *txwin = NULL;
struct vas_tx_win_attr txattr;
@@ -704,9 +710,9 @@ static struct vas_window *nx842_alloc_txwin(struct 
nx842_coproc *coproc)
  * cpu_txwin is used in copy/paste operation for each compression /
  * decompression request.
  */
-static int nx842_open_percpu_txwins(void)
+static int nx_open_percpu_txwins(void)
 {
-   struct nx842_coproc *coproc, *n;
+   struct nx_coproc *coproc, *n;
unsigned int i, chip_id;
 
for_each_possible_cpu(i) {
@@ -714,17 +720,18 @@ static int nx842_open_percpu_txwins(void)
 
chip_id = cpu_to_chip_id(i);
 
-   list_for_each_entry_safe(coproc, n, &nx842_coprocs, list) {
+   list_for_each_entry_safe(coproc, n, &nx_coprocs, list) {
/*
 * Kernel requests use only high priority FIFOs. So
 * open send windows for these FIFOs.
+* GZIP is not supported in kernel right now.
 */
 
if (coproc->ct != VAS_COP_TYPE_842_HIPRI)
continue;
 
if (coproc->chip_id == chip_id) {
-   txwin = nx842_alloc_txwin(coproc);
+   txwin = nx_alloc_txwin(coproc);
if (IS_ERR(txwin))
return PTR_ERR(txwin);
 
@@ -743,13 +750,28 @@ static int nx842_open_percpu_txwins(void)
return 0;
 }
 
+static int __init nx_set_ct(struct nx_coproc *coproc, const char *priority,
+   int high, int normal)
+{
+   if (!strcmp(priority, "High"))
+   coproc->ct = high;
+   else if (!strcmp(priority, "Normal"))
+   coproc->ct = normal;
+   else {
+   pr_err("Invalid RxFIFO priority value\n");
+   return -EINVAL;
+   }
+
+   return 0;
+}
+
 static int __init vas_cfg_coproc_info(struct device_node *dn, int chip_id,
-   int vasid, int *ct)
+   int vasid, int type, int *ct)
 {
struct vas_window *rxwin = NULL;
struct vas_rx_win_attr rxattr;
-   struct nx842_coproc *coproc;
u32 lpid, pid, tid, fifo_size;
+   struct nx_coproc *coproc;
u64 rx_fifo;
const char *priority;
int ret;
@@ -794,15 +816,12 @@ static int __init vas_cfg_coproc_info(struct device_node 
*dn, int chip_id,
if (!

[PATCH v5 7/9] crypto/nx: Enable and setup GZIP compression type

2020-04-01 Thread Haren Myneni


Changes to probe GZIP device-tree nodes, open RX windows and setup
GZIP compression type. No plans to provide GZIP usage in kernel right
now, but this patch enables GZIP for user space usage.

Signed-off-by: Haren Myneni 
Acked-by: Herbert Xu 
---
 drivers/crypto/nx/nx-common-powernv.c | 43 ++-
 1 file changed, 37 insertions(+), 6 deletions(-)

diff --git a/drivers/crypto/nx/nx-common-powernv.c 
b/drivers/crypto/nx/nx-common-powernv.c
index 82dfa60..f570691 100644
--- a/drivers/crypto/nx/nx-common-powernv.c
+++ b/drivers/crypto/nx/nx-common-powernv.c
@@ -65,6 +65,7 @@ struct nx_coproc {
  * Using same values as in skiboot or coprocessor type representing
  * in NX workbook.
  */
+#define NX_CT_GZIP (2) /* on P9 and later */
 #define NX_CT_842  (3)
 
 static int (*nx842_powernv_exec)(const unsigned char *in,
@@ -819,6 +820,9 @@ static int __init vas_cfg_coproc_info(struct device_node 
*dn, int chip_id,
if (type == NX_CT_842)
ret = nx_set_ct(coproc, priority, VAS_COP_TYPE_842_HIPRI,
VAS_COP_TYPE_842);
+   else if (type == NX_CT_GZIP)
+   ret = nx_set_ct(coproc, priority, VAS_COP_TYPE_GZIP_HIPRI,
+   VAS_COP_TYPE_GZIP);
 
if (ret)
goto err_out;
@@ -867,12 +871,16 @@ static int __init vas_cfg_coproc_info(struct device_node 
*dn, int chip_id,
return ret;
 }
 
-static int __init nx_coproc_init(int chip_id, int ct_842)
+static int __init nx_coproc_init(int chip_id, int ct_842, int ct_gzip)
 {
int ret = 0;
 
if (opal_check_token(OPAL_NX_COPROC_INIT)) {
ret = opal_nx_coproc_init(chip_id, ct_842);
+
+   if (!ret)
+   ret = opal_nx_coproc_init(chip_id, ct_gzip);
+
if (ret) {
ret = opal_error_code(ret);
pr_err("Failed to initialize NX for chip(%d): %d\n",
@@ -902,8 +910,8 @@ static int __init find_nx_device_tree(struct device_node 
*dn, int chip_id,
 static int __init nx_powernv_probe_vas(struct device_node *pn)
 {
int chip_id, vasid, ret = 0;
+   int ct_842 = 0, ct_gzip = 0;
struct device_node *dn;
-   int ct_842 = 0;
 
chip_id = of_get_ibm_chip_id(pn);
if (chip_id < 0) {
@@ -920,19 +928,24 @@ static int __init nx_powernv_probe_vas(struct device_node 
*pn)
for_each_child_of_node(pn, dn) {
ret = find_nx_device_tree(dn, chip_id, vasid, NX_CT_842,
"ibm,p9-nx-842", &ct_842);
+
+   if (!ret)
+   ret = find_nx_device_tree(dn, chip_id, vasid,
+   NX_CT_GZIP, "ibm,p9-nx-gzip", &ct_gzip);
+
if (ret)
return ret;
}
 
-   if (!ct_842) {
-   pr_err("NX842 FIFO nodes are missing\n");
+   if (!ct_842 || !ct_gzip) {
+   pr_err("NX FIFO nodes are missing\n");
return -EINVAL;
}
 
/*
 * Initialize NX instance for both high and normal priority FIFOs.
 */
-   ret = nx_coproc_init(chip_id, ct_842);
+   ret = nx_coproc_init(chip_id, ct_842, ct_gzip);
 
return ret;
 }
@@ -1072,10 +1085,19 @@ static __init int nx_compress_powernv_init(void)
nx842_powernv_exec = nx842_exec_icswx;
} else {
/*
+* Register VAS user space API for NX GZIP so
+* that user space can use GZIP engine.
+* 842 compression is supported only in kernel.
+*/
+   ret = vas_register_coproc_api(THIS_MODULE);
+
+   /*
 * GZIP is not supported in kernel right now.
 * So open tx windows only for 842.
 */
-   ret = nx_open_percpu_txwins();
+   if (!ret)
+   ret = nx_open_percpu_txwins();
+
if (ret) {
nx_delete_coprocs();
return ret;
@@ -1096,6 +1118,15 @@ static __init int nx_compress_powernv_init(void)
 
 static void __exit nx_compress_powernv_exit(void)
 {
+   /*
+* GZIP engine is supported only in power9 or later and nx842_ct
+* is used on power8 (icswx).
+* VAS API for NX GZIP is registered during init for user space
+* use. So delete this API use for GZIP engine.
+*/
+   if (!nx842_ct)
+   vas_unregister_coproc_api();
+
crypto_unregister_alg(&nx842_powernv_alg);
 
nx_delete_coprocs();
-- 
1.8.3.1





[PATCH v5 8/9] crypto/nx: Remove 'pid' in vas_tx_win_attr struct

2020-04-01 Thread Haren Myneni


When window is opened, pid reference is taken for user space
windows. Not needed for kernel windows. So remove 'pid' in
vas_tx_win_attr struct.

Signed-off-by: Haren Myneni 
Acked-by: Herbert Xu 
---
 arch/powerpc/include/asm/vas.h| 1 -
 drivers/crypto/nx/nx-common-powernv.c | 1 -
 2 files changed, 2 deletions(-)

diff --git a/arch/powerpc/include/asm/vas.h b/arch/powerpc/include/asm/vas.h
index e064953..994db6f 100644
--- a/arch/powerpc/include/asm/vas.h
+++ b/arch/powerpc/include/asm/vas.h
@@ -86,7 +86,6 @@ struct vas_tx_win_attr {
int wcreds_max;
int lpid;
int pidr;   /* hardware PID (from SPRN_PID) */
-   int pid;/* linux process id */
int pswid;
int rsvd_txbuf_count;
int tc_mode;
diff --git a/drivers/crypto/nx/nx-common-powernv.c 
b/drivers/crypto/nx/nx-common-powernv.c
index f570691..38333e4 100644
--- a/drivers/crypto/nx/nx-common-powernv.c
+++ b/drivers/crypto/nx/nx-common-powernv.c
@@ -692,7 +692,6 @@ static struct vas_window *nx_alloc_txwin(struct nx_coproc 
*coproc)
 */
vas_init_tx_win_attr(&txattr, coproc->ct);
txattr.lpid = 0;/* lpid is 0 for kernel requests */
-   txattr.pid = 0; /* pid is 0 for kernel requests */
 
/*
 * Open a VAS send window which is used to send request to NX.
-- 
1.8.3.1





[PATCH v5 9/9] Documentation/powerpc: VAS API

2020-04-01 Thread Haren Myneni


Power9 introduced Virtual Accelerator Switchboard (VAS) which allows
user space to communicate with Nest Accelerator (NX) directly. But
kernel has to establish channel to NX for user space. This document
describes user space API that application can use to establish
communication channel.

Signed-off-by: Sukadev Bhattiprolu 
Signed-off-by: Haren Myneni 
---
 Documentation/powerpc/index.rst   |   1 +
 Documentation/powerpc/vas-api.rst | 292 ++
 2 files changed, 293 insertions(+)
 create mode 100644 Documentation/powerpc/vas-api.rst

diff --git a/Documentation/powerpc/index.rst b/Documentation/powerpc/index.rst
index 0d45f0f..afe2d5e 100644
--- a/Documentation/powerpc/index.rst
+++ b/Documentation/powerpc/index.rst
@@ -30,6 +30,7 @@ powerpc
 syscall64-abi
 transactional_memory
 ultravisor
+vas-api
 
 .. only::  subproject and html
 
diff --git a/Documentation/powerpc/vas-api.rst 
b/Documentation/powerpc/vas-api.rst
new file mode 100644
index 000..1217c2f
--- /dev/null
+++ b/Documentation/powerpc/vas-api.rst
@@ -0,0 +1,292 @@
+.. SPDX-License-Identifier: GPL-2.0
+.. _VAS-API:
+
+===
+Virtual Accelerator Switchboard (VAS) userspace API
+===
+
+Introduction
+
+
+Power9 processor introduced Virtual Accelerator Switchboard (VAS) which
+allows both userspace and kernel communicate to co-processor
+(hardware accelerator) referred to as the Nest Accelerator (NX). The NX
+unit comprises of one or more hardware engines or co-processor types
+such as 842 compression, GZIP compression and encryption. On power9,
+userspace applications will have access to only GZIP Compression engine
+which supports ZLIB and GZIP compression algorithms in the hardware.
+
+To communicate with NX, kernel has to establish a channel or window and
+then requests can be submitted directly without kernel involvement.
+Requests to the GZIP engine must be formatted as a co-processor Request
+Block (CRB) and these CRBs must be submitted to the NX using COPY/PASTE
+instructions to paste the CRB to hardware address that is associated with
+the engine's request queue.
+
+The GZIP engine provides two priority levels of requests: Normal and
+High. Only Normal requests are supported from userspace right now.
+
+This document explains userspace API that is used to interact with
+kernel to setup channel / window which can be used to send compression
+requests directly to NX accelerator.
+
+
+Overview
+
+
+Application access to the GZIP engine is provided through
+/dev/crypto/nx-gzip device node implemented by the VAS/NX device driver.
+An application must open the /dev/crypto/nx-gzip device to obtain a file
+descriptor (fd). Then should issue VAS_TX_WIN_OPEN ioctl with this fd to
+establish connection to the engine. It means send window is opened on GZIP
+engine for this process. Once a connection is established, the application
+should use the mmap() system call to map the hardware address of engine's
+request queue into the application's virtual address space.
+
+The application can then submit one or more requests to the the engine by
+using copy/paste instructions and pasting the CRBs to the virtual address
+(aka paste_address) returned by mmap(). User space can close the
+established connection or send window by closing the file descriptior
+(close(fd)) or upon the process exit.
+
+Note that applications can send several requests with the same window or
+can establish multiple windows, but one window for each file descriptor.
+
+Following sections provide additional details and references about the
+individual steps.
+
+NX-GZIP Device Node
+===
+
+There is one /dev/crypto/nx-gzip node in the system and it provides
+access to all GZIP engines in the system. The only valid operations on
+/dev/crypto/nx-gzip are:
+
+   * open() the device for read and write.
+   * issue VAS_TX_WIN_OPEN ioctl
+   * mmap() the engine's request queue into application's virtual
+ address space (i.e. get a paste_address for the co-processor
+ engine).
+   * close the device node.
+
+Other file operations on this device node are undefined.
+
+Note that the copy and paste operations go directly to the hardware and
+do not go through this device. Refer COPY/PASTE document for more
+details.
+
+Although a system may have several instances of the NX co-processor
+engines (typically, one per P9 chip) there is just one
+/dev/crypto/nx-gzip device node in the system. When the nx-gzip device
+node is opened, Kernel opens send window on a suitable instance of NX
+accelerator. It finds CPU on which the user process is executing and
+determine the NX instance for the corresponding chip on which this CPU
+belongs.
+
+Applications may chose a specific instance of the NX co-processor using
+the vas_id field in the VAS_TX_WIN_OPEN ioctl as detailed below.
+
+A userspace li

[PATCH v9 06/13] powerpc/vas: Take reference to PID and mm for user space windows

2020-04-01 Thread Haren Myneni
(sorry reposting. version string missed)

When process opens a window, its pid and tgid will be saved in the
vas_window struct. This window will be closed when the process exits.
The kernel handles NX faults by updating CSB or send SEGV signal to pid
of the process if the user space csb addr is invalid.

In multi-thread applications, a window can be opened by a child thread,
but it will not be closed when this thread exits. It is expected that
the parent will clean up all resources including NX windows opened by
child threads. A child thread can send NX requests using this window
and could be killed before completion is reported. If the pid assigned
to this thread is reused while requests are pending, a failure SEGV
would be directed to the wrong place.

To prevent reusing the pid, take references to pid and mm when the window
is opened and release them when when the window is closed. Then if child
thread is not running, SEGV signal will be sent to thread group leader
(tgid).

Signed-off-by: Haren Myneni 
---
 arch/powerpc/platforms/powernv/vas-debug.c  |  2 +-
 arch/powerpc/platforms/powernv/vas-window.c | 53 ++---
 arch/powerpc/platforms/powernv/vas.h|  9 -
 3 files changed, 57 insertions(+), 7 deletions(-)

diff --git a/arch/powerpc/platforms/powernv/vas-debug.c 
b/arch/powerpc/platforms/powernv/vas-debug.c
index 09e63df..ef9a717 100644
--- a/arch/powerpc/platforms/powernv/vas-debug.c
+++ b/arch/powerpc/platforms/powernv/vas-debug.c
@@ -38,7 +38,7 @@ static int info_show(struct seq_file *s, void *private)
 
seq_printf(s, "Type: %s, %s\n", cop_to_str(window->cop),
window->tx_win ? "Send" : "Receive");
-   seq_printf(s, "Pid : %d\n", window->pid);
+   seq_printf(s, "Pid : %d\n", vas_window_pid(window));
 
 unlock:
mutex_unlock(&vas_mutex);
diff --git a/arch/powerpc/platforms/powernv/vas-window.c 
b/arch/powerpc/platforms/powernv/vas-window.c
index dc46bf6..7054cd4 100644
--- a/arch/powerpc/platforms/powernv/vas-window.c
+++ b/arch/powerpc/platforms/powernv/vas-window.c
@@ -12,6 +12,8 @@
 #include 
 #include 
 #include 
+#include 
+#include 
 #include 
 #include 
 #include "vas.h"
@@ -876,8 +878,6 @@ struct vas_window *vas_rx_win_open(int vasid, enum 
vas_cop_type cop,
rxwin->user_win = rxattr->user_win;
rxwin->cop = cop;
rxwin->wcreds_max = rxattr->wcreds_max ?: VAS_WCREDS_DEFAULT;
-   if (rxattr->user_win)
-   rxwin->pid = task_pid_vnr(current);
 
init_winctx_for_rxwin(rxwin, rxattr, &winctx);
init_winctx_regs(rxwin, &winctx);
@@ -1027,7 +1027,6 @@ struct vas_window *vas_tx_win_open(int vasid, enum 
vas_cop_type cop,
txwin->tx_win = 1;
txwin->rxwin = rxwin;
txwin->nx_win = txwin->rxwin->nx_win;
-   txwin->pid = attr->pid;
txwin->user_win = attr->user_win;
txwin->wcreds_max = attr->wcreds_max ?: VAS_WCREDS_DEFAULT;
 
@@ -1059,8 +1058,43 @@ struct vas_window *vas_tx_win_open(int vasid, enum 
vas_cop_type cop,
goto free_window;
}
 
-   set_vinst_win(vinst, txwin);
+   if (txwin->user_win) {
+   /*
+* Window opened by a child thread may not be closed when
+* it exits. So take reference to its pid and release it
+* when the window is free by parent thread.
+* Acquire a reference to the task's pid to make sure
+* pid will not be re-used - needed only for multithread
+* applications.
+*/
+   txwin->pid = get_task_pid(current, PIDTYPE_PID);
+   /*
+* Acquire a reference to the task's mm.
+*/
+   txwin->mm = get_task_mm(current);
 
+   if (!txwin->mm) {
+   put_pid(txwin->pid);
+   pr_err("VAS: pid(%d): mm_struct is not found\n",
+   current->pid);
+   rc = -EPERM;
+   goto free_window;
+   }
+
+   mmgrab(txwin->mm);
+   mmput(txwin->mm);
+   mm_context_add_copro(txwin->mm);
+   /*
+* Process closes window during exit. In the case of
+* multithread application, the child thread can open
+* window and can exit without closing it. Expects parent
+* thread to use and close the window. So do not need
+* to take pid reference for parent thread.
+*/
+   txwin->tgid = find_get_pid(task_tgid_vnr(current));
+   }
+
+   set_vinst_win(vinst, txwin);
return txwin;
 
 free_window:
@@ -1257,8 +1291,17 @@ int vas_win_close(struct vas_window *window)
poll_window_castout(window);
 
/* if send window, drop reference to matching receive window */
-   if (window->tx_win)
+   if (

[PATCH v5 6/9] crypto/nx: Make enable code generic to add new GZIP compression type

2020-04-01 Thread Haren Myneni
(Sorry for reposting. version number is missed in subject)

Make setup and enable code generic to support new GZIP compression type.
Changed nx842 reference to nx and moved some code to new functions.
Functionality is not changed except sparse warning fix - setting NULL
instead of 0 for per_cpu send window in nx_delete_coprocs().

Signed-off-by: Haren Myneni 
Acked-by: Herbert Xu 
---
 drivers/crypto/nx/nx-common-powernv.c | 161 +-
 1 file changed, 101 insertions(+), 60 deletions(-)

diff --git a/drivers/crypto/nx/nx-common-powernv.c 
b/drivers/crypto/nx/nx-common-powernv.c
index f42881f..82dfa60 100644
--- a/drivers/crypto/nx/nx-common-powernv.c
+++ b/drivers/crypto/nx/nx-common-powernv.c
@@ -40,9 +40,9 @@ struct nx842_workmem {
char padding[WORKMEM_ALIGN]; /* unused, to allow alignment */
 } __packed __aligned(WORKMEM_ALIGN);
 
-struct nx842_coproc {
+struct nx_coproc {
unsigned int chip_id;
-   unsigned int ct;
+   unsigned int ct;/* Can be 842 or GZIP high/normal*/
unsigned int ci;/* Coprocessor instance, used with icswx */
struct {
struct vas_window *rxwin;
@@ -58,9 +58,15 @@ struct nx842_coproc {
 static DEFINE_PER_CPU(struct vas_window *, cpu_txwin);
 
 /* no cpu hotplug on powernv, so this list never changes after init */
-static LIST_HEAD(nx842_coprocs);
+static LIST_HEAD(nx_coprocs);
 static unsigned int nx842_ct;  /* used in icswx function */
 
+/*
+ * Using same values as in skiboot or coprocessor type representing
+ * in NX workbook.
+ */
+#define NX_CT_842  (3)
+
 static int (*nx842_powernv_exec)(const unsigned char *in,
unsigned int inlen, unsigned char *out,
unsigned int *outlenp, void *workmem, int fc);
@@ -666,15 +672,15 @@ static int nx842_powernv_decompress(const unsigned char 
*in, unsigned int inlen,
  wmem, CCW_FC_842_DECOMP_CRC);
 }
 
-static inline void nx842_add_coprocs_list(struct nx842_coproc *coproc,
+static inline void nx_add_coprocs_list(struct nx_coproc *coproc,
int chipid)
 {
coproc->chip_id = chipid;
INIT_LIST_HEAD(&coproc->list);
-   list_add(&coproc->list, &nx842_coprocs);
+   list_add(&coproc->list, &nx_coprocs);
 }
 
-static struct vas_window *nx842_alloc_txwin(struct nx842_coproc *coproc)
+static struct vas_window *nx_alloc_txwin(struct nx_coproc *coproc)
 {
struct vas_window *txwin = NULL;
struct vas_tx_win_attr txattr;
@@ -704,9 +710,9 @@ static struct vas_window *nx842_alloc_txwin(struct 
nx842_coproc *coproc)
  * cpu_txwin is used in copy/paste operation for each compression /
  * decompression request.
  */
-static int nx842_open_percpu_txwins(void)
+static int nx_open_percpu_txwins(void)
 {
-   struct nx842_coproc *coproc, *n;
+   struct nx_coproc *coproc, *n;
unsigned int i, chip_id;
 
for_each_possible_cpu(i) {
@@ -714,17 +720,18 @@ static int nx842_open_percpu_txwins(void)
 
chip_id = cpu_to_chip_id(i);
 
-   list_for_each_entry_safe(coproc, n, &nx842_coprocs, list) {
+   list_for_each_entry_safe(coproc, n, &nx_coprocs, list) {
/*
 * Kernel requests use only high priority FIFOs. So
 * open send windows for these FIFOs.
+* GZIP is not supported in kernel right now.
 */
 
if (coproc->ct != VAS_COP_TYPE_842_HIPRI)
continue;
 
if (coproc->chip_id == chip_id) {
-   txwin = nx842_alloc_txwin(coproc);
+   txwin = nx_alloc_txwin(coproc);
if (IS_ERR(txwin))
return PTR_ERR(txwin);
 
@@ -743,13 +750,28 @@ static int nx842_open_percpu_txwins(void)
return 0;
 }
 
+static int __init nx_set_ct(struct nx_coproc *coproc, const char *priority,
+   int high, int normal)
+{
+   if (!strcmp(priority, "High"))
+   coproc->ct = high;
+   else if (!strcmp(priority, "Normal"))
+   coproc->ct = normal;
+   else {
+   pr_err("Invalid RxFIFO priority value\n");
+   return -EINVAL;
+   }
+
+   return 0;
+}
+
 static int __init vas_cfg_coproc_info(struct device_node *dn, int chip_id,
-   int vasid, int *ct)
+   int vasid, int type, int *ct)
 {
struct vas_window *rxwin = NULL;
struct vas_rx_win_attr rxattr;
-   struct nx842_coproc *coproc;
u32 lpid, pid, tid, fifo_size;
+   struct nx_coproc *coproc;
u64 rx_fifo;
const char *priority;
int ret;
@@ -794,15 +816,12 @@ static int __init vas_cfg_copro

Re: [PATCH v2 1/1] vfio-pci/nvlink2: Allow fallback to ibm,mmio-atsd[0]

2020-04-01 Thread Alex Williamson
On Tue, 31 Mar 2020 15:12:46 +1100
Sam Bobroff  wrote:

> Older versions of skiboot only provide a single value in the device
> tree property "ibm,mmio-atsd", even when multiple Address Translation
> Shoot Down (ATSD) registers are present. This prevents NVLink2 devices
> (other than the first) from being used with vfio-pci because vfio-pci
> expects to be able to assign a dedicated ATSD register to each NVLink2
> device.
> 
> However, ATSD registers can be shared among devices. This change
> allows vfio-pci to fall back to sharing the register at index 0 if
> necessary.
> 
> Fixes: 7f92891778df ("vfio_pci: Add NVIDIA GV100GL [Tesla V100 SXM2] 
> subdriver")
> Signed-off-by: Sam Bobroff 
> ---
> Patch set v2:
> Patch 1/1: vfio-pci/nvlink2: Allow fallback to ibm,mmio-atsd[0]
> - Removed unnecessary warning.
> - Added Fixes tag.
> 
> Patch set v1:
> Patch 1/1: vfio-pci/nvlink2: Allow fallback to ibm,mmio-atsd[0]
> 
>  drivers/vfio/pci/vfio_pci_nvlink2.c | 10 --
>  1 file changed, 8 insertions(+), 2 deletions(-)

Applied to vfio next branch for v5.7 with Alexey's review.  Thanks,

Alex

> diff --git a/drivers/vfio/pci/vfio_pci_nvlink2.c 
> b/drivers/vfio/pci/vfio_pci_nvlink2.c
> index f2983f0f84be..ae2af590e501 100644
> --- a/drivers/vfio/pci/vfio_pci_nvlink2.c
> +++ b/drivers/vfio/pci/vfio_pci_nvlink2.c
> @@ -420,8 +420,14 @@ int vfio_pci_ibm_npu2_init(struct vfio_pci_device *vdev)
>  
>   if (of_property_read_u64_index(hose->dn, "ibm,mmio-atsd", nvlink_index,
>   &mmio_atsd)) {
> - dev_warn(&vdev->pdev->dev, "No available ATSD found\n");
> - mmio_atsd = 0;
> + if (of_property_read_u64_index(hose->dn, "ibm,mmio-atsd", 0,
> + &mmio_atsd)) {
> + dev_warn(&vdev->pdev->dev, "No available ATSD found\n");
> + mmio_atsd = 0;
> + } else {
> + dev_warn(&vdev->pdev->dev,
> +  "Using fallback ibm,mmio-atsd[0] for ATSD.\n");
> + }
>   }
>  
>   if (of_property_read_u64(npu_node, "ibm,device-tgt-addr", &tgt)) {



RE: [PATCH v4 00/25] Add support for OpenCAPI Persistent Memory devices

2020-04-01 Thread Alastair D'Silva


> -Original Message-
> From: Dan Williams 
> Sent: Wednesday, 1 April 2020 7:48 PM
> To: Alastair D'Silva 
> Cc: Aneesh Kumar K . V ; Oliver O'Halloran
> ; Benjamin Herrenschmidt
> ; Paul Mackerras ; Michael
> Ellerman ; Frederic Barrat ;
> Andrew Donnellan ; Arnd Bergmann
> ; Greg Kroah-Hartman ;
> Vishal Verma ; Dave Jiang
> ; Ira Weiny ; Andrew Morton
> ; Mauro Carvalho Chehab
> ; David S. Miller ;
> Rob Herring ; Anton Blanchard ;
> Krzysztof Kozlowski ; Mahesh Salgaonkar
> ; Madhavan Srinivasan
> ; Cédric Le Goater ; Anju T
> Sudhakar ; Hari Bathini
> ; Thomas Gleixner ; Greg
> Kurz ; Nicholas Piggin ; Masahiro
> Yamada ; Alexey Kardashevskiy
> ; Linux Kernel Mailing List ;
> linuxppc-dev ; linux-nvdimm  nvd...@lists.01.org>; Linux MM 
> Subject: Re: [PATCH v4 00/25] Add support for OpenCAPI Persistent Memory
> devices
> 
> On Sun, Mar 29, 2020 at 10:23 PM Alastair D'Silva 
> wrote:
> >
> > This series adds support for OpenCAPI Persistent Memory devices on
> > bare metal (arch/powernv), exposing them as nvdimms so that we can
> > make use of the existing infrastructure. There already exists a driver
> > for the same devices abstracted through PowerVM (arch/pseries):
> > arch/powerpc/platforms/pseries/papr_scm.c
> >
> > These devices are connected via OpenCAPI, and present as LPC (lowest
> coherence point) memory to the system, practically, that means that
> memory on these cards could be treated as conventional, cache-coherent
> memory.
> >
> > Since the devices are connected via OpenCAPI, they are not enumerated
> via ACPI. Instead, OpenCAPI links present as pseudo-PCI bridges, with
> devices below them.
> >
> > This series introduces a driver that exposes the memory on these cards as
> nvdimms, with each card getting it's own bus. This is somewhat complicated
> by the fact that the cards do not have out of band persistent storage for
> metadata, so 1 SECTION_SIZE's (see SPARSEMEM) worth of storage is carved
> out of the top of the card storage to implement the ndctl_config_* calls.
> 
> Is it really tied to section-size? Can't that change based on the configured
> page-size? It's not clear to me why that would be the choice, but I'll dig 
> into
> the implementation.
> 

I had tried using PAGE_SIZE, but ran into problems carving off just 1 page and 
handing it to the kernel, while leaving the rest as pmem. That was a while ago 
though, so maybe I should retry it.

> > The driver is not responsible for configuring the NPU (NVLink Processing
> Unit) BARs to map the LPC memory from the card into the system's physical
> address space, instead, it requests this to be done via OPAL calls (typically
> implemented by Skiboot).
> 
> Are OPAL calls similar to ACPI DSMs? I.e. methods for the OS to invoke
> platform firmware services? What's Skiboot?
> 

Yes, OPAL is the interface to firmware for POWER. Skiboot is the open-source 
(and only) implementation of OPAL.

> >
> > The series is structured as follows:
> >  - Required infrastructure changes & cleanup
> >  - A minimal driver implementation
> >  - Implementing additional features within the driver
> 
> Thanks for the intro and the changelog!
> 
> >
> > Changelog:
> > V4:
> >   - Rebase on next-20200320
> 
> Do you have dependencies on other material that's in -next? Otherwise -
> next is only a viable development baseline if you are going to merge through
> Andrew's tree.
> 
> >   - Bump copyright to 2020
> >   - Ensure all uapi headers use C89 compatible comments (missed
> ocxlpmem.h)
> >   - Move the driver back to drivers/nvdimm/ocxl, after confirmation
> > that this location is desirable
> >   - Rename ocxl.c to ocxlpmem.c (+ support files)
> >   - Rename all ocxl_pmem to ocxlpmem
> >   - Address checkpatch --strict issues
> >   - "powerpc/powernv: Add OPAL calls for LPC memory alloc/release"
> > - Pass base address as __be64
> >   - "ocxl: Tally up the LPC memory on a link & allow it to be mapped"
> > - Address checkpatch spacing warnings
> > - Reword blurb
> > - Reword size description for ocxl_link_add_lpc_mem()
> > - Add an early exit in ocxl_link_lpc_release() to avoid triggering
> >   bogus warnings if called after ocxl_link_lpc_map() fails
> >   - "powerpc/powernv: Add OPAL calls for LPC memory alloc/release"
> > - Reword blurb
> >   - "powerpc/powernv: Map & release OpenCAPI LPC memory"
> > - Reword blurb
> >   - Move minor_idr init from file_init() to ocxlpmem_init() (fixes runtime
> error
> > in "nvdimm: Add driver for OpenCAPI Persistent Memory")
> >   - Wrap long lines
> >   - "nvdimm: Add driver for OpenCAPI Storage Class Memory"
> > - Remove '+ 1' workround from serial number->cookie assignment
> > - Drop out of memory message for ocxlpmem in probe()
> > - Fix leaks of ocxlpmem & ocxlpmem->ocxl_fn in probe()
> > - remove struct ocxlpmem_function0, it didn't value add
> > - factor out err_unregistered label in probe
> 

RE: [PATCH v4 01/25] powerpc/powernv: Add OPAL calls for LPC memory alloc/release

2020-04-01 Thread Alastair D'Silva
> -Original Message-
> From: Dan Williams 
> Sent: Wednesday, 1 April 2020 7:48 PM
> To: Alastair D'Silva 
> Cc: Aneesh Kumar K . V ; Oliver O'Halloran
> ; Benjamin Herrenschmidt
> ; Paul Mackerras ; Michael
> Ellerman ; Frederic Barrat ;
> Andrew Donnellan ; Arnd Bergmann
> ; Greg Kroah-Hartman ;
> Vishal Verma ; Dave Jiang
> ; Ira Weiny ; Andrew Morton
> ; Mauro Carvalho Chehab
> ; David S. Miller ;
> Rob Herring ; Anton Blanchard ;
> Krzysztof Kozlowski ; Mahesh Salgaonkar
> ; Madhavan Srinivasan
> ; Cédric Le Goater ; Anju T
> Sudhakar ; Hari Bathini
> ; Thomas Gleixner ; Greg
> Kurz ; Nicholas Piggin ; Masahiro
> Yamada ; Alexey Kardashevskiy
> ; Linux Kernel Mailing List ;
> linuxppc-dev ; linux-nvdimm  nvd...@lists.01.org>; Linux MM 
> Subject: Re: [PATCH v4 01/25] powerpc/powernv: Add OPAL calls for LPC
> memory alloc/release
> 
> On Sun, Mar 29, 2020 at 10:23 PM Alastair D'Silva 
> wrote:
> >
> > Add OPAL calls for LPC memory alloc/release
> >
> 
> This seems to be referencing an existing api definition, can you include a
> pointer to the spec in case someone wanted to understand what these
> routines do? I suspect this is not allocating memory in the traditional sense 
> as
> much as it's allocating physical address space for a device to be mapped?
> 

These API calls were introduced in the following skiboot commit:
https://github.com/open-power/skiboot/commit/1a548857ce1f02f43585b326a891eed18a7b43b3

I'll add it to the description.

> 
> > Signed-off-by: Alastair D'Silva 
> > Acked-by: Andrew Donnellan 
> > Acked-by: Frederic Barrat 
> > ---
> >  arch/powerpc/include/asm/opal-api.h| 2 ++
> >  arch/powerpc/include/asm/opal.h| 2 ++
> >  arch/powerpc/platforms/powernv/opal-call.c | 2 ++
> >  3 files changed, 6 insertions(+)
> >
> > diff --git a/arch/powerpc/include/asm/opal-api.h
> > b/arch/powerpc/include/asm/opal-api.h
> > index c1f25a760eb1..9298e603001b 100644
> > --- a/arch/powerpc/include/asm/opal-api.h
> > +++ b/arch/powerpc/include/asm/opal-api.h
> > @@ -208,6 +208,8 @@
> >  #define OPAL_HANDLE_HMI2   166
> >  #defineOPAL_NX_COPROC_INIT 167
> >  #define OPAL_XIVE_GET_VP_STATE 170
> > +#define OPAL_NPU_MEM_ALLOC 171
> > +#define OPAL_NPU_MEM_RELEASE   172
> >  #define OPAL_MPIPL_UPDATE  173
> >  #define OPAL_MPIPL_REGISTER_TAG174
> >  #define OPAL_MPIPL_QUERY_TAG   175
> > diff --git a/arch/powerpc/include/asm/opal.h
> > b/arch/powerpc/include/asm/opal.h index 9986ac34b8e2..301fea46c7ca
> > 100644
> > --- a/arch/powerpc/include/asm/opal.h
> > +++ b/arch/powerpc/include/asm/opal.h
> > @@ -39,6 +39,8 @@ int64_t opal_npu_spa_clear_cache(uint64_t phb_id,
> uint32_t bdfn,
> > uint64_t PE_handle);  int64_t
> > opal_npu_tl_set(uint64_t phb_id, uint32_t bdfn, long cap,
> > uint64_t rate_phys, uint32_t size);
> > +int64_t opal_npu_mem_alloc(u64 phb_id, u32 bdfn, u64 size, __be64
> > +*bar); int64_t opal_npu_mem_release(u64 phb_id, u32 bdfn);
> >
> >  int64_t opal_console_write(int64_t term_number, __be64 *length,
> >const uint8_t *buffer); diff --git
> > a/arch/powerpc/platforms/powernv/opal-call.c
> > b/arch/powerpc/platforms/powernv/opal-call.c
> > index 5cd0f52d258f..f26e58b72c04 100644
> > --- a/arch/powerpc/platforms/powernv/opal-call.c
> > +++ b/arch/powerpc/platforms/powernv/opal-call.c
> > @@ -287,6 +287,8 @@ OPAL_CALL(opal_pci_set_pbcq_tunnel_bar,
> OPAL_PCI_SET_PBCQ_TUNNEL_BAR);
> >  OPAL_CALL(opal_sensor_read_u64,
> OPAL_SENSOR_READ_U64);
> >  OPAL_CALL(opal_sensor_group_enable,
> OPAL_SENSOR_GROUP_ENABLE);
> >  OPAL_CALL(opal_nx_coproc_init, OPAL_NX_COPROC_INIT);
> > +OPAL_CALL(opal_npu_mem_alloc,  OPAL_NPU_MEM_ALLOC);
> > +OPAL_CALL(opal_npu_mem_release,
> OPAL_NPU_MEM_RELEASE);
> >  OPAL_CALL(opal_mpipl_update,   OPAL_MPIPL_UPDATE);
> >  OPAL_CALL(opal_mpipl_register_tag,
> OPAL_MPIPL_REGISTER_TAG);
> >  OPAL_CALL(opal_mpipl_query_tag,
> OPAL_MPIPL_QUERY_TAG);
> > --
> > 2.24.1
> >
> 
> 
> --
> This email has been checked for viruses by AVG.
> https://www.avg.com


-- 
Alastair D'Silva   mob: 0423 762 819
skype: alastair_dsilva msn: alast...@d-silva.org
blog: http://alastair.d-silva.orgTwitter: @EvilDeece



Re: linux-next: manual merge of the net-next tree with the powerpc tree

2020-04-01 Thread Stephen Rothwell
Hi all,

On Fri, 6 Mar 2020 10:21:58 +1100 Stephen Rothwell  
wrote:
>
> Today's linux-next merge of the net-next tree got a conflict in:
> 
>   fs/sysfs/group.c
> 
> between commit:
> 
>   9255782f7061 ("sysfs: Wrap __compat_only_sysfs_link_entry_to_kobj function 
> to change the symlink name")
> 
> from the powerpc tree and commit:
> 
>   303a42769c4c ("sysfs: add sysfs_group{s}_change_owner()")
> 
> from the net-next tree.
> 
> I fixed it up (see below) and can carry the fix as necessary. This
> is now fixed as far as linux-next is concerned, but any non trivial
> conflicts should be mentioned to your upstream maintainer when your tree
> is submitted for merging.  You may also want to consider cooperating
> with the maintainer of the conflicting tree to minimise any particularly
> complex conflicts.
> 
> -- 
> Cheers,
> Stephen Rothwell
> 
> diff --cc fs/sysfs/group.c
> index 1e2a096057bc,5afe0e7ff7cd..
> --- a/fs/sysfs/group.c
> +++ b/fs/sysfs/group.c
> @@@ -478,4 -457,118 +479,118 @@@ int compat_only_sysfs_link_entry_to_kob
>   kernfs_put(target);
>   return PTR_ERR_OR_ZERO(link);
>   }
>  -EXPORT_SYMBOL_GPL(__compat_only_sysfs_link_entry_to_kobj);
>  +EXPORT_SYMBOL_GPL(compat_only_sysfs_link_entry_to_kobj);
> + 
> + static int sysfs_group_attrs_change_owner(struct kernfs_node *grp_kn,
> +   const struct attribute_group *grp,
> +   struct iattr *newattrs)
> + {
> + struct kernfs_node *kn;
> + int error;
> + 
> + if (grp->attrs) {
> + struct attribute *const *attr;
> + 
> + for (attr = grp->attrs; *attr; attr++) {
> + kn = kernfs_find_and_get(grp_kn, (*attr)->name);
> + if (!kn)
> + return -ENOENT;
> + 
> + error = kernfs_setattr(kn, newattrs);
> + kernfs_put(kn);
> + if (error)
> + return error;
> + }
> + }
> + 
> + if (grp->bin_attrs) {
> + struct bin_attribute *const *bin_attr;
> + 
> + for (bin_attr = grp->bin_attrs; *bin_attr; bin_attr++) {
> + kn = kernfs_find_and_get(grp_kn, 
> (*bin_attr)->attr.name);
> + if (!kn)
> + return -ENOENT;
> + 
> + error = kernfs_setattr(kn, newattrs);
> + kernfs_put(kn);
> + if (error)
> + return error;
> + }
> + }
> + 
> + return 0;
> + }
> + 
> + /**
> +  * sysfs_group_change_owner - change owner of an attribute group.
> +  * @kobj:   The kobject containing the group.
> +  * @grp:The attribute group.
> +  * @kuid:   new owner's kuid
> +  * @kgid:   new owner's kgid
> +  *
> +  * Returns 0 on success or error code on failure.
> +  */
> + int sysfs_group_change_owner(struct kobject *kobj,
> +  const struct attribute_group *grp, kuid_t kuid,
> +  kgid_t kgid)
> + {
> + struct kernfs_node *grp_kn;
> + int error;
> + struct iattr newattrs = {
> + .ia_valid = ATTR_UID | ATTR_GID,
> + .ia_uid = kuid,
> + .ia_gid = kgid,
> + };
> + 
> + if (!kobj->state_in_sysfs)
> + return -EINVAL;
> + 
> + if (grp->name) {
> + grp_kn = kernfs_find_and_get(kobj->sd, grp->name);
> + } else {
> + kernfs_get(kobj->sd);
> + grp_kn = kobj->sd;
> + }
> + if (!grp_kn)
> + return -ENOENT;
> + 
> + error = kernfs_setattr(grp_kn, &newattrs);
> + if (!error)
> + error = sysfs_group_attrs_change_owner(grp_kn, grp, &newattrs);
> + 
> + kernfs_put(grp_kn);
> + 
> + return error;
> + }
> + EXPORT_SYMBOL_GPL(sysfs_group_change_owner);
> + 
> + /**
> +  * sysfs_groups_change_owner - change owner of a set of attribute groups.
> +  * @kobj:   The kobject containing the groups.
> +  * @groups: The attribute groups.
> +  * @kuid:   new owner's kuid
> +  * @kgid:   new owner's kgid
> +  *
> +  * Returns 0 on success or error code on failure.
> +  */
> + int sysfs_groups_change_owner(struct kobject *kobj,
> +   const struct attribute_group **groups,
> +   kuid_t kuid, kgid_t kgid)
> + {
> + int error = 0, i;
> + 
> + if (!kobj->state_in_sysfs)
> + return -EINVAL;
> + 
> + if (!groups)
> + return 0;
> + 
> + for (i = 0; groups[i]; i++) {
> + error = sysfs_group_change_owner(kobj, groups[i], kuid, kgid);
> + if (error)
> + break;
> + }
> + 
> + return error;
> + }
> + EXPORT_SYMBOL_GPL(sysfs_groups_change_owner);

This is now a conflict between the powerpc tree and Linus' tree.

-- 
Cheers,
Stephen Rothwell


pgpbZM_67Wxtq.pgp
Description: OpenPGP digital signature


Re: [PATCH v4 03/16] powerpc: Use a datatype for instructions

2020-04-01 Thread Jordan Niethe
On Wed, Apr 1, 2020 at 9:32 PM Balamuruhan S  wrote:
>
> On Fri, 2020-03-20 at 16:17 +1100, Jordan Niethe wrote:
> > Currently unsigned ints are used to represent instructions on powerpc.
> > This has worked well as instructions have always been 4 byte words.
> > However, a future ISA version will introduce some changes to
> > instructions that mean this scheme will no longer work as well. This
> > change is Prefixed Instructions. A prefixed instruction is made up of a
> > word prefix followed by a word suffix to make an 8 byte double word
> > instruction. No matter the endianess of the system the prefix always
> > comes first. Prefixed instructions are only planned for powerpc64.
> >
> > Introduce a ppc_inst type to represent both prefixed and word
> > instructions on powerpc64 while keeping it possible to exclusively have
> > word instructions on powerpc32, A latter patch will expand the type to
> > include prefixed instructions but for now just typedef it to a u32.
> >
> > Later patches will introduce helper functions and macros for
> > manipulating the instructions so that powerpc64 and powerpc32 might
> > maintain separate type definitions.
> >
> > Signed-off-by: Jordan Niethe 
> > ---
> >  arch/powerpc/include/asm/code-patching.h | 31 +--
> >  arch/powerpc/include/asm/inst.h  | 53 +++
> >  arch/powerpc/include/asm/sstep.h |  5 +-
> >  arch/powerpc/kernel/align.c  |  2 +-
> >  arch/powerpc/kernel/hw_breakpoint.c  |  3 +-
> >  arch/powerpc/kernel/kprobes.c|  2 +-
> >  arch/powerpc/kernel/mce_power.c  |  5 +-
> >  arch/powerpc/kernel/optprobes.c  | 10 ++--
> >  arch/powerpc/kernel/trace/ftrace.c   | 66 
> >  arch/powerpc/kvm/emulate_loadstore.c |  1 +
> >  arch/powerpc/lib/code-patching.c | 54 +--
> >  arch/powerpc/lib/sstep.c |  4 +-
> >  arch/powerpc/lib/test_emulate_step.c |  9 ++--
> >  arch/powerpc/xmon/xmon.c | 12 ++---
> >  14 files changed, 160 insertions(+), 97 deletions(-)
> >  create mode 100644 arch/powerpc/include/asm/inst.h
> >
> > diff --git a/arch/powerpc/include/asm/code-patching.h
> > b/arch/powerpc/include/asm/code-patching.h
> > index 898b54262881..cb5106f92d67 100644
> > --- a/arch/powerpc/include/asm/code-patching.h
> > +++ b/arch/powerpc/include/asm/code-patching.h
> > @@ -11,6 +11,7 @@
> >  #include 
> >  #include 
> >  #include 
> > +#include 
> >
> >  /* Flags for create_branch:
> >   * "b"   == create_branch(addr, target, 0);
> > @@ -22,27 +23,27 @@
> >  #define BRANCH_ABSOLUTE  0x2
> >
> >  bool is_offset_in_branch_range(long offset);
> > -unsigned int create_branch(const unsigned int *addr,
> > +ppc_inst create_branch(const ppc_inst *addr,
> >  unsigned long target, int flags);
> > -unsigned int create_cond_branch(const unsigned int *addr,
> > +unsigned int create_cond_branch(const ppc_inst *addr,
> >   unsigned long target, int flags);
> > -int patch_branch(unsigned int *addr, unsigned long target, int flags);
> > -int patch_instruction(unsigned int *addr, unsigned int instr);
> > -int raw_patch_instruction(unsigned int *addr, unsigned int instr);
> > +int patch_branch(ppc_inst *addr, unsigned long target, int flags);
> > +int patch_instruction(ppc_inst *addr, ppc_inst instr);
>
> we need to handle this change for its user in epapr_paravirt.c,
Thanks, good catch.
>
> arch/powerpc/kernel/epapr_paravirt.c: In function 'early_init_dt_scan_epapr':
> arch/powerpc/kernel/epapr_paravirt.c:40:48: error: incompatible type for
> argument 2 of 'patch_instruction'
>40 |   patch_instruction(epapr_hypercall_start + i, inst);
>   |^~~~
>   ||
>   |u32 {aka unsigned int}
> In file included from arch/powerpc/kernel/epapr_paravirt.c:12:
> ./arch/powerpc/include/asm/code-patching.h:31:44: note: expected 'ppc_inst'
> {aka 'struct ppc_inst'} but argument is of type 'u32' {aka 'unsigned int'}
>31 | int patch_instruction(void *addr, ppc_inst instr);
>   |   ~^
> make[2]: *** [scripts/Makefile.build:268: 
> arch/powerpc/kernel/epapr_paravirt.o]
> Error 1
> make[1]: *** [scripts/Makefile.build:505: arch/powerpc/kernel] Error 2
> make: *** [Makefile:1683: arch/powerpc] Error 2
>
>
> -- Bala
>
> > +int raw_patch_instruction(ppc_inst *addr, ppc_inst instr);
> >
> >  static inline unsigned long patch_site_addr(s32 *site)
> >  {
> >   return (unsigned long)site + *site;
> >  }
> >
> > -static inline int patch_instruction_site(s32 *site, unsigned int instr)
> > +static inline int patch_instruction_site(s32 *site, ppc_inst instr)
> >  {
> > - return patch_instruction((unsigned int *)patch_site_addr(site), 
> > instr);
> > + return patch_instruction((ppc_inst

Re: [PATCH v3 1/1] ppc/crash: Reset spinlocks during crash

2020-04-01 Thread Leonardo Bras
Hello Peter, 

On Wed, 2020-04-01 at 11:26 +0200, Peter Zijlstra wrote:
> You might want to add a note to your asm/spinlock.h that you rely on
> spin_unlock() unconditionally clearing a lock.
> 
> This isn't naturally true for all lock implementations. Consider ticket
> locks, doing a surplus unlock will wreck your lock state in that case.
> So anybody poking at the powerpc spinlock implementation had better know
> you rely on this.

Good idea. I will add this to my changes and generate a v4.

Thank you,


signature.asc
Description: This is a digitally signed message part


Re: [PATCH v4 13/25] nvdimm/ocxl: Read the capability registers & wait for device ready

2020-04-01 Thread Dan Williams
On Sun, Mar 29, 2020 at 10:23 PM Alastair D'Silva  wrote:
>
> This patch reads timeouts & firmware version from the controller, and
> uses those timeouts to wait for the controller to report that it is ready
> before handing the memory over to libnvdimm.
>
> Signed-off-by: Alastair D'Silva 
> ---
>  drivers/nvdimm/ocxl/Makefile|  2 +-
>  drivers/nvdimm/ocxl/main.c  | 85 +
>  drivers/nvdimm/ocxl/ocxlpmem.h  | 29 +
>  drivers/nvdimm/ocxl/ocxlpmem_internal.c | 19 ++
>  4 files changed, 134 insertions(+), 1 deletion(-)
>  create mode 100644 drivers/nvdimm/ocxl/ocxlpmem_internal.c
>
> diff --git a/drivers/nvdimm/ocxl/Makefile b/drivers/nvdimm/ocxl/Makefile
> index e0e8ade1987a..bab97082e062 100644
> --- a/drivers/nvdimm/ocxl/Makefile
> +++ b/drivers/nvdimm/ocxl/Makefile
> @@ -4,4 +4,4 @@ ccflags-$(CONFIG_PPC_WERROR)+= -Werror
>
>  obj-$(CONFIG_OCXL_PMEM) += ocxlpmem.o
>
> -ocxlpmem-y := main.o
> \ No newline at end of file
> +ocxlpmem-y := main.o ocxlpmem_internal.o
> diff --git a/drivers/nvdimm/ocxl/main.c b/drivers/nvdimm/ocxl/main.c
> index c0066fedf9cc..be76acd33d74 100644
> --- a/drivers/nvdimm/ocxl/main.c
> +++ b/drivers/nvdimm/ocxl/main.c
> @@ -8,6 +8,7 @@
>
>  #include 
>  #include 
> +#include 
>  #include 
>  #include 
>  #include 
> @@ -327,6 +328,50 @@ static void remove(struct pci_dev *pdev)
> }
>  }
>
> +/**
> + * read_device_metadata() - Retrieve config information from the AFU and 
> save it for future use
> + * @ocxlpmem: the device metadata
> + * Return: 0 on success, negative on failure
> + */
> +static int read_device_metadata(struct ocxlpmem *ocxlpmem)
> +{
> +   u64 val;
> +   int rc;
> +
> +   rc = ocxl_global_mmio_read64(ocxlpmem->ocxl_afu, GLOBAL_MMIO_CCAP0,
> +OCXL_LITTLE_ENDIAN, &val);

This calling convention would seem to defeat the ability of sparse to
validate endian correctness. That's independent of this series, but I
wonder how does someone review why this argument is sometimes
OCXL_LITTLE_ENDIAN and sometimes OCXL_HOST_ENDIAN?

> +   if (rc)
> +   return rc;
> +
> +   ocxlpmem->scm_revision = val & 0x;
> +   ocxlpmem->read_latency = (val >> 32) & 0x;
> +   ocxlpmem->readiness_timeout = (val >> 48) & 0x0F;
> +   ocxlpmem->memory_available_timeout = val >> 52;

Maybe some macros to parse out these register fields?

> +
> +   rc = ocxl_global_mmio_read64(ocxlpmem->ocxl_afu, GLOBAL_MMIO_CCAP1,
> +OCXL_LITTLE_ENDIAN, &val);
> +   if (rc)
> +   return rc;
> +
> +   ocxlpmem->max_controller_dump_size = val & 0x;
> +
> +   // Extract firmware version text
> +   rc = ocxl_global_mmio_read64(ocxlpmem->ocxl_afu, GLOBAL_MMIO_FWVER,
> +OCXL_HOST_ENDIAN,
> +(u64 *)ocxlpmem->fw_version);
> +   if (rc)
> +   return rc;
> +
> +   ocxlpmem->fw_version[8] = '\0';
> +
> +   dev_info(&ocxlpmem->dev,
> +"Firmware version '%s' SCM revision %d:%d\n",
> +ocxlpmem->fw_version, ocxlpmem->scm_revision >> 4,
> +ocxlpmem->scm_revision & 0x0F);

Does the driver need to be chatty here. If this data is relevant
should it appear in sysfs by default?

> +
> +   return 0;
> +}
> +
>  /**
>   * probe_function0() - Set up function 0 for an OpenCAPI persistent memory 
> device
>   * This is important as it enables templates higher than 0 across all other
> @@ -359,6 +404,9 @@ static int probe(struct pci_dev *pdev, const struct 
> pci_device_id *ent)
>  {
> struct ocxlpmem *ocxlpmem;
> int rc;
> +   u64 chi;
> +   u16 elapsed, timeout;
> +   bool ready = false;
>
> if (PCI_FUNC(pdev->devfn) == 0)
> return probe_function0(pdev);
> @@ -413,6 +461,43 @@ static int probe(struct pci_dev *pdev, const struct 
> pci_device_id *ent)
> goto err;
> }
>
> +   rc = read_device_metadata(ocxlpmem);
> +   if (rc) {
> +   dev_err(&pdev->dev, "Could not read metadata\n");
> +   goto err;
> +   }
> +
> +   elapsed = 0;
> +   timeout = ocxlpmem->readiness_timeout +
> + ocxlpmem->memory_available_timeout;
> +
> +   while (true) {
> +   rc = ocxlpmem_chi(ocxlpmem, &chi);
> +   ready = (chi & (GLOBAL_MMIO_CHI_CRDY | GLOBAL_MMIO_CHI_MA)) ==
> +   (GLOBAL_MMIO_CHI_CRDY | GLOBAL_MMIO_CHI_MA);
> +
> +   if (ready)
> +   break;
> +
> +   if (elapsed++ > timeout) {
> +   dev_err(&ocxlpmem->dev,
> +   "OpenCAPI Persistent Memory ready 
> timeout.\n");
> +
> +   if (!(chi & GLOBAL_MMIO_CHI_CRDY))
> +   dev_err(&ocxlpmem->dev,
> +   

Re: [PATCH v4 15/25] nvdimm/ocxl: Register a character device for userspace to interact with

2020-04-01 Thread Dan Williams
On Sun, Mar 29, 2020 at 10:53 PM Alastair D'Silva  wrote:
>
> This patch introduces a character device (/dev/ocxlpmemX) which further
> patches will use to interact with userspace, such as error logs,
> controller stats and card debug functionality.

This was asked earlier, but I'll reiterate, I do not see what
justifies an ocxlpmemX private device ABI vs routing through the
existing generic character ndbusX and nmemX character devices.

>
> Signed-off-by: Alastair D'Silva 
> ---
>  drivers/nvdimm/ocxl/main.c | 117 -
>  drivers/nvdimm/ocxl/ocxlpmem.h |   2 +
>  2 files changed, 117 insertions(+), 2 deletions(-)
>
> diff --git a/drivers/nvdimm/ocxl/main.c b/drivers/nvdimm/ocxl/main.c
> index 8db573036423..9b85fcd3f1c9 100644
> --- a/drivers/nvdimm/ocxl/main.c
> +++ b/drivers/nvdimm/ocxl/main.c
> @@ -10,6 +10,7 @@
>  #include 
>  #include 
>  #include 
> +#include 
>  #include 
>  #include 
>  #include "ocxlpmem.h"
> @@ -356,6 +357,67 @@ static int ocxlpmem_register(struct ocxlpmem *ocxlpmem)
> return device_register(&ocxlpmem->dev);
>  }
>
> +static void ocxlpmem_put(struct ocxlpmem *ocxlpmem)
> +{
> +   put_device(&ocxlpmem->dev);
> +}
> +
> +static struct ocxlpmem *ocxlpmem_get(struct ocxlpmem *ocxlpmem)
> +{
> +   return (!get_device(&ocxlpmem->dev)) ? NULL : ocxlpmem;
> +}
> +
> +static struct ocxlpmem *find_and_get_ocxlpmem(dev_t devno)
> +{
> +   struct ocxlpmem *ocxlpmem;
> +   int minor = MINOR(devno);
> +
> +   mutex_lock(&minors_idr_lock);
> +   ocxlpmem = idr_find(&minors_idr, minor);
> +   if (ocxlpmem)
> +   ocxlpmem_get(ocxlpmem);
> +   mutex_unlock(&minors_idr_lock);
> +
> +   return ocxlpmem;
> +}
> +
> +static int file_open(struct inode *inode, struct file *file)
> +{
> +   struct ocxlpmem *ocxlpmem;
> +
> +   ocxlpmem = find_and_get_ocxlpmem(inode->i_rdev);
> +   if (!ocxlpmem)
> +   return -ENODEV;
> +
> +   file->private_data = ocxlpmem;
> +   return 0;
> +}
> +
> +static int file_release(struct inode *inode, struct file *file)
> +{
> +   struct ocxlpmem *ocxlpmem = file->private_data;
> +
> +   ocxlpmem_put(ocxlpmem);
> +   return 0;
> +}
> +
> +static const struct file_operations fops = {
> +   .owner  = THIS_MODULE,
> +   .open   = file_open,
> +   .release= file_release,
> +};
> +
> +/**
> + * create_cdev() - Create the chardev in /dev for the device
> + * @ocxlpmem: the SCM metadata
> + * Return: 0 on success, negative on failure
> + */
> +static int create_cdev(struct ocxlpmem *ocxlpmem)
> +{
> +   cdev_init(&ocxlpmem->cdev, &fops);
> +   return cdev_add(&ocxlpmem->cdev, ocxlpmem->dev.devt, 1);
> +}
> +
>  /**
>   * ocxlpmem_remove() - Free an OpenCAPI persistent memory device
>   * @pdev: the PCI device information struct
> @@ -376,6 +438,13 @@ static void remove(struct pci_dev *pdev)
> if (ocxlpmem->nvdimm_bus)
> nvdimm_bus_unregister(ocxlpmem->nvdimm_bus);
>
> +   /*
> +* Remove the cdev early to prevent a race against userspace
> +* via the char dev
> +*/
> +   if (ocxlpmem->cdev.owner)
> +   cdev_del(&ocxlpmem->cdev);
> +
> device_unregister(&ocxlpmem->dev);
> }
>  }
> @@ -527,11 +596,18 @@ static int probe(struct pci_dev *pdev, const struct 
> pci_device_id *ent)
> goto err;
> }
>
> -   if (setup_command_metadata(ocxlpmem)) {
> +   rc = setup_command_metadata(ocxlpmem);
> +   if (rc) {
> dev_err(&pdev->dev, "Could not read command metadata\n");
> goto err;
> }
>
> +   rc = create_cdev(ocxlpmem);
> +   if (rc) {
> +   dev_err(&pdev->dev, "Could not create character device\n");
> +   goto err;
> +   }
> +
> elapsed = 0;
> timeout = ocxlpmem->readiness_timeout +
>   ocxlpmem->memory_available_timeout;
> @@ -599,6 +675,36 @@ static struct pci_driver pci_driver = {
> .shutdown = remove,
>  };
>
> +static int file_init(void)
> +{
> +   int rc;
> +
> +   rc = alloc_chrdev_region(&ocxlpmem_dev, 0, NUM_MINORS, "ocxlpmem");
> +   if (rc) {
> +   idr_destroy(&minors_idr);
> +   pr_err("Unable to allocate OpenCAPI persistent memory major 
> number: %d\n",
> +  rc);
> +   return rc;
> +   }
> +
> +   ocxlpmem_class = class_create(THIS_MODULE, "ocxlpmem");
> +   if (IS_ERR(ocxlpmem_class)) {
> +   idr_destroy(&minors_idr);
> +   pr_err("Unable to create ocxlpmem class\n");
> +   unregister_chrdev_region(ocxlpmem_dev, NUM_MINORS);
> +   return PTR_ERR(ocxlpmem_class);
> +   }
> +
> +   return 0;
> +}
> +
> +static void file_exit(void)
> +{
> +   class_destroy(ocxlpmem_class)

Re: [PATCH v4 08/25] ocxl: Emit a log message showing how much LPC memory was detected

2020-04-01 Thread Joe Perches
On Wed, 2020-04-01 at 01:49 -0700, Dan Williams wrote:
> On Sun, Mar 29, 2020 at 10:23 PM Alastair D'Silva  
> wrote:
> > This patch emits a message showing how much LPC memory & special purpose
> > memory was detected on an OCXL device.
[]
> > diff --git a/drivers/misc/ocxl/config.c b/drivers/misc/ocxl/config.c
[]
> > @@ -568,6 +568,10 @@ static int read_afu_lpc_memory_info(struct pci_dev 
> > *dev,
> > afu->special_purpose_mem_size =
> > total_mem_size - lpc_mem_size;
> > }
> > +
> > +   dev_info(&dev->dev, "Probed LPC memory of %#llx bytes and special 
> > purpose memory of %#llx bytes\n",
> > +afu->lpc_mem_size, afu->special_purpose_mem_size);
> 
> A patch for a single log message is too fine grained for my taste,
> let's squash this into another patch in the series.

Is the granularity of lpc_mem_size actually bytes?
Might this be better as KiB or something using functions

Maybe something like:

unsigned long si_val(unsigned long val)
{
static const char units[] = "BKMGTPE";
const char *unit = units;

while (!(val & 1023) && unit[1]) {
val >>= 10;
unit++;
}

return val;
}

char si_type(unsigned long val)
{
static const char units[] = "BKMGTPE";
const char *unit = units;

while (!(val & 1023) && unit[1]) {
val >>= 10;
unit++;
}

return *unit;
}

so this could be something like:

   dev_info(&dev->dev, "Probed LPC memory of %#llu%c and special purpose 
memory of %#llu%c\n",
si_val(afu->lpc_mem_size), si_type(afu->lpc_mem_size),
si_val(afu->special_purpose_mem_size), 
si_type(afu->special_purpose_mem_size));





Re: [PATCH v4 19/25] nvdimm/ocxl: Forward events to userspace

2020-04-01 Thread Dan Williams
On Tue, Mar 31, 2020 at 1:59 AM Alastair D'Silva  wrote:
>
> Some of the interrupts that the card generates are better handled
> by the userspace daemon, in particular:
> Controller Hardware/Firmware Fatal
> Controller Dump Available
> Error Log available
>
> This patch allows a userspace application to register an eventfd with
> the driver via SCM_IOCTL_EVENTFD to receive notifications of these
> interrupts.
>
> Userspace can then identify what events have occurred by calling
> SCM_IOCTL_EVENT_CHECK and checking against the SCM_IOCTL_EVENT_FOO
> masks.

The amount new ioctl's in this driver is too high, it seems much of
this data can be exported via sysfs attributes which are more
maintainable that ioctls. Then sysfs also has the ability to signal
events on sysfs attributes, see sys_notify_dirent.

Can you step back and review the ABI exposure of the driver and what
can be moved to sysfs? If you need to have bus specific attributes
ordered underneath the libnvdimm generic attributes you can create a
sysfs attribute subdirectory.

In general a roadmap document of all the proposed ABI is needed to
make sure it is both sufficient and necessary. See the libnvdimm
document that introduced the initial libnvdimm ABI:

https://www.kernel.org/doc/Documentation/nvdimm/nvdimm.txt

>
> Signed-off-by: Alastair D'Silva 
> ---
>  drivers/nvdimm/ocxl/main.c | 220 +
>  drivers/nvdimm/ocxl/ocxlpmem.h |   4 +
>  include/uapi/nvdimm/ocxlpmem.h |  12 ++
>  3 files changed, 236 insertions(+)
>
> diff --git a/drivers/nvdimm/ocxl/main.c b/drivers/nvdimm/ocxl/main.c
> index 0040fc09cceb..cb6cdc9eb899 100644
> --- a/drivers/nvdimm/ocxl/main.c
> +++ b/drivers/nvdimm/ocxl/main.c
> @@ -10,6 +10,7 @@
>  #include 
>  #include 
>  #include 
> +#include 
>  #include 
>  #include 
>  #include 
> @@ -301,8 +302,19 @@ static void free_ocxlpmem(struct ocxlpmem *ocxlpmem)
>  {
> int rc;
>
> +   // Disable doorbells
> +   (void)ocxl_global_mmio_set64(ocxlpmem->ocxl_afu, GLOBAL_MMIO_CHIEC,
> +OCXL_LITTLE_ENDIAN,
> +GLOBAL_MMIO_CHI_ALL);
> +
> free_minor(ocxlpmem);
>
> +   if (ocxlpmem->irq_addr[1])
> +   iounmap(ocxlpmem->irq_addr[1]);
> +
> +   if (ocxlpmem->irq_addr[0])
> +   iounmap(ocxlpmem->irq_addr[0]);
> +
> if (ocxlpmem->ocxl_context) {
> rc = ocxl_context_detach(ocxlpmem->ocxl_context);
> if (rc == -EBUSY)
> @@ -398,6 +410,11 @@ static int file_release(struct inode *inode, struct file 
> *file)
>  {
> struct ocxlpmem *ocxlpmem = file->private_data;
>
> +   if (ocxlpmem->ev_ctx) {
> +   eventfd_ctx_put(ocxlpmem->ev_ctx);
> +   ocxlpmem->ev_ctx = NULL;
> +   }
> +
> ocxlpmem_put(ocxlpmem);
> return 0;
>  }
> @@ -928,6 +945,52 @@ static int ioctl_controller_stats(struct ocxlpmem 
> *ocxlpmem,
> return rc;
>  }
>
> +static int ioctl_eventfd(struct ocxlpmem *ocxlpmem,
> +struct ioctl_ocxlpmem_eventfd __user *uarg)
> +{
> +   struct ioctl_ocxlpmem_eventfd args;
> +
> +   if (copy_from_user(&args, uarg, sizeof(args)))
> +   return -EFAULT;
> +
> +   if (ocxlpmem->ev_ctx)
> +   return -EBUSY;
> +
> +   ocxlpmem->ev_ctx = eventfd_ctx_fdget(args.eventfd);
> +   if (IS_ERR(ocxlpmem->ev_ctx))
> +   return PTR_ERR(ocxlpmem->ev_ctx);
> +
> +   return 0;
> +}
> +
> +static int ioctl_event_check(struct ocxlpmem *ocxlpmem, u64 __user *uarg)
> +{
> +   u64 val = 0;
> +   int rc;
> +   u64 chi = 0;
> +
> +   rc = ocxlpmem_chi(ocxlpmem, &chi);
> +   if (rc < 0)
> +   return rc;
> +
> +   if (chi & GLOBAL_MMIO_CHI_ELA)
> +   val |= IOCTL_OCXLPMEM_EVENT_ERROR_LOG_AVAILABLE;
> +
> +   if (chi & GLOBAL_MMIO_CHI_CDA)
> +   val |= IOCTL_OCXLPMEM_EVENT_CONTROLLER_DUMP_AVAILABLE;
> +
> +   if (chi & GLOBAL_MMIO_CHI_CFFS)
> +   val |= IOCTL_OCXLPMEM_EVENT_FIRMWARE_FATAL;
> +
> +   if (chi & GLOBAL_MMIO_CHI_CHFS)
> +   val |= IOCTL_OCXLPMEM_EVENT_HARDWARE_FATAL;
> +
> +   if (copy_to_user((u64 __user *)uarg, &val, sizeof(val)))
> +   return -EFAULT;
> +
> +   return rc;
> +}
> +
>  static long file_ioctl(struct file *file, unsigned int cmd, unsigned long 
> args)
>  {
> struct ocxlpmem *ocxlpmem = file->private_data;
> @@ -956,6 +1019,15 @@ static long file_ioctl(struct file *file, unsigned int 
> cmd, unsigned long args)
> rc = ioctl_controller_stats(ocxlpmem,
> (struct 
> ioctl_ocxlpmem_controller_stats __user *)args);
> break;
> +
> +   case IOCTL_OCXLPMEM_EVENTFD:
> +   rc = ioctl_eventfd(ocxlpmem,
> +  (struct ioctl_ocxlpmem_eventfd __user 
> *)args);
> +   br

Re: [PATCH v5 1/4] powerpc/papr_scm: Fetch nvdimm health information from PHYP

2020-04-01 Thread Dan Williams
On Tue, Mar 31, 2020 at 7:33 AM Vaibhav Jain  wrote:
>
> Implement support for fetching nvdimm health information via
> H_SCM_HEALTH hcall as documented in Ref[1]. The hcall returns a pair
> of 64-bit big-endian integers which are then stored in 'struct
> papr_scm_priv' and subsequently partially exposed to user-space via
> newly introduced dimm specific attribute 'papr_flags'. Also a new asm
> header named 'papr-scm.h' is added that describes the interface
> between PHYP and guest kernel.
>
> Following flags are reported via 'papr_flags' sysfs attribute contents
> of which are space separated string flags indicating various nvdimm
> states:
>
>  * "not_armed"  : Indicating that nvdimm contents wont survive a power
>cycle.

s/wont/will not/

>  * "save_fail"  : Indicating that nvdimm contents couldn't be flushed
>during last shutdown event.

In the nfit definition this description is "flush_fail". The
"save_fail" flag was specific to hybrid devices that don't have
persistent media and instead scuttle away data from DRAM to flash on
power-failure.

>  * "restore_fail": Indicating that nvdimm contents couldn't be restored
>during dimm initialization.
>  * "encrypted"  : Dimm contents are encrypted.

This does not seem like a health flag to me, have you considered the
libnvdimm security interface for this indicator?

>  * "smart_notify": There is health event for the nvdimm.

Are you also going to signal the sysfs attribute when this event happens?

>  * "scrubbed"   : Indicating that contents of the nvdimm have been
>scrubbed.

This one seems odd to me what does it mean if it is not set? What does
it mean if a new scrub has been launched. Basically, is there value in
exposing this state?

>  * "locked" : Indicating that nvdimm contents cant be modified
>until next power cycle.

There is the generic NDD_LOCKED flag, can you use that? ...and in
general I wonder if we should try to unify all the common papr_scm and
nfit health flags in a generic location. It will already be the case
the ndctl needs to look somewhere papr specific for this data maybe it
all should have been generic from the beginning.


In any event, can you also add this content to a new
Documentation/ABI/testing/sysfs-bus-papr? See sysfs-bus-nfit for
comparison.

>
> [1]: commit 58b278f568f0 ("powerpc: Provide initial documentation for
> PAPR hcalls")
>
> Signed-off-by: Vaibhav Jain 
> ---
> Changelog:
>
> v4..v5 : None
>
> v3..v4 : None
>
> v2..v3 : Removed PAPR_SCM_DIMM_HEALTH_NON_CRITICAL as a condition for
>  NVDIMM unarmed [Aneesh]
>
> v1..v2 : New patch in the series.
> ---
>  arch/powerpc/include/asm/papr_scm.h   |  48 ++
>  arch/powerpc/platforms/pseries/papr_scm.c | 105 +-
>  2 files changed, 151 insertions(+), 2 deletions(-)
>  create mode 100644 arch/powerpc/include/asm/papr_scm.h
>
> diff --git a/arch/powerpc/include/asm/papr_scm.h 
> b/arch/powerpc/include/asm/papr_scm.h
> new file mode 100644
> index ..868d3360f56a
> --- /dev/null
> +++ b/arch/powerpc/include/asm/papr_scm.h
> @@ -0,0 +1,48 @@
> +/* SPDX-License-Identifier: GPL-2.0-or-later */
> +/*
> + * Structures and defines needed to manage nvdimms for spapr guests.
> + */
> +#ifndef _ASM_POWERPC_PAPR_SCM_H_
> +#define _ASM_POWERPC_PAPR_SCM_H_
> +
> +#include 
> +#include 
> +
> +/* DIMM health bitmap bitmap indicators */
> +/* SCM device is unable to persist memory contents */
> +#define PAPR_SCM_DIMM_UNARMED  PPC_BIT(0)
> +/* SCM device failed to persist memory contents */
> +#define PAPR_SCM_DIMM_SHUTDOWN_DIRTY   PPC_BIT(1)
> +/* SCM device contents are persisted from previous IPL */
> +#define PAPR_SCM_DIMM_SHUTDOWN_CLEAN   PPC_BIT(2)
> +/* SCM device contents are not persisted from previous IPL */
> +#define PAPR_SCM_DIMM_EMPTYPPC_BIT(3)
> +/* SCM device memory life remaining is critically low */
> +#define PAPR_SCM_DIMM_HEALTH_CRITICAL  PPC_BIT(4)
> +/* SCM device will be garded off next IPL due to failure */
> +#define PAPR_SCM_DIMM_HEALTH_FATAL PPC_BIT(5)
> +/* SCM contents cannot persist due to current platform health status */
> +#define PAPR_SCM_DIMM_HEALTH_UNHEALTHY PPC_BIT(6)
> +/* SCM device is unable to persist memory contents in certain conditions */
> +#define PAPR_SCM_DIMM_HEALTH_NON_CRITICAL  PPC_BIT(7)
> +/* SCM device is encrypted */
> +#define PAPR_SCM_DIMM_ENCRYPTEDPPC_BIT(8)
> +/* SCM device has been scrubbed and locked */
> +#define PAPR_SCM_DIMM_SCRUBBED_AND_LOCKED  PPC_BIT(9)
> +
> +/* Bits status indicators for health bitmap indicating unarmed dimm */
> +#define PAPR_SCM_DIMM_UNARMED_MASK (PAPR_SCM_DIMM_UNARMED |\
> +   PAPR_SCM_DIMM_HEALTH_UNHEALTHY)
> +
> +/* Bits status indicators for health bitmap indicating unflushed dimm */
> +#define PAPR_S

RE: [PATCH v4 00/25] Add support for OpenCAPI Persistent Memory devices

2020-04-01 Thread Michael Ellerman
"Alastair D'Silva"  writes:
>> -Original Message-
>> From: Dan Williams 
>> 
>> On Sun, Mar 29, 2020 at 10:23 PM Alastair D'Silva 
>> wrote:
>> >
>> > This series adds support for OpenCAPI Persistent Memory devices on
>> > bare metal (arch/powernv), exposing them as nvdimms so that we can
>> > make use of the existing infrastructure. There already exists a driver
>> > for the same devices abstracted through PowerVM (arch/pseries):
>> > arch/powerpc/platforms/pseries/papr_scm.c
>> >
>> > These devices are connected via OpenCAPI, and present as LPC (lowest
>> coherence point) memory to the system, practically, that means that
>> memory on these cards could be treated as conventional, cache-coherent
>> memory.
>> >
>> > Since the devices are connected via OpenCAPI, they are not enumerated
>> via ACPI. Instead, OpenCAPI links present as pseudo-PCI bridges, with
>> devices below them.
>> >
>> > This series introduces a driver that exposes the memory on these cards as
>> nvdimms, with each card getting it's own bus. This is somewhat complicated
>> by the fact that the cards do not have out of band persistent storage for
>> metadata, so 1 SECTION_SIZE's (see SPARSEMEM) worth of storage is carved
>> out of the top of the card storage to implement the ndctl_config_* calls.
>> 
>> Is it really tied to section-size? Can't that change based on the configured
>> page-size? It's not clear to me why that would be the choice, but I'll dig 
>> into
>> the implementation.
>> 
>
> I had tried using PAGE_SIZE, but ran into problems carving off just 1 page 
> and handing it to the kernel, while leaving the rest as pmem. That was a 
> while ago though, so maybe I should retry it.
>
>> > The driver is not responsible for configuring the NPU (NVLink Processing
>> Unit) BARs to map the LPC memory from the card into the system's physical
>> address space, instead, it requests this to be done via OPAL calls (typically
>> implemented by Skiboot).
>> 
>> Are OPAL calls similar to ACPI DSMs? I.e. methods for the OS to invoke
>> platform firmware services? What's Skiboot?
>> 
>
> Yes, OPAL is the interface to firmware for POWER. Skiboot is the open-source 
> (and only) implementation of OPAL.

  https://github.com/open-power/skiboot

In particular the tokens for calls are defined here:

  https://github.com/open-power/skiboot/blob/master/include/opal-api.h#L220

And you can grep for the token to find the implementation:

  https://github.com/open-power/skiboot/blob/master/hw/npu2-opencapi.c#L2328


cheers


Re: [PATCH v4 00/25] Add support for OpenCAPI Persistent Memory devices

2020-04-01 Thread Oliver O'Halloran
On Thu, Apr 2, 2020 at 2:42 PM Michael Ellerman  wrote:
>
> "Alastair D'Silva"  writes:
> >> -Original Message-
> >> From: Dan Williams 
> >>
> >> On Sun, Mar 29, 2020 at 10:23 PM Alastair D'Silva 
> >> wrote:
> >> >
> >> > *snip*
> >> Are OPAL calls similar to ACPI DSMs? I.e. methods for the OS to invoke
> >> platform firmware services? What's Skiboot?
> >>
> >
> > Yes, OPAL is the interface to firmware for POWER. Skiboot is the 
> > open-source (and only) implementation of OPAL.
>
>   https://github.com/open-power/skiboot
>
> In particular the tokens for calls are defined here:
>
>   https://github.com/open-power/skiboot/blob/master/include/opal-api.h#L220
>
> And you can grep for the token to find the implementation:
>
>   https://github.com/open-power/skiboot/blob/master/hw/npu2-opencapi.c#L2328

I'm not sure I'd encourage anyone to read npu2-opencapi.c. I find it
hard enough to follow even with access to the workbooks.

There's an OPAL call API reference here:
http://open-power.github.io/skiboot/doc/opal-api/index.html

Oliver


Re: [RFC PATCH 3/4] powerpc ppc-opcode: move ppc instuction encoding from test_emulate_step

2020-04-01 Thread Michael Ellerman
"Naveen N. Rao"  writes:
> Balamuruhan S wrote:
>> Few ppc instructions are encoded in test_emulate_step.c, consolidate them to
>> ppc-opcode.h, fix redefintion errors in bpf_jit caused due to this 
>> consolidation.
>> Reuse the macros from ppc-opcode.h
...
>> diff --git a/arch/powerpc/net/bpf_jit32.h b/arch/powerpc/net/bpf_jit32.h
>> index 4ec2a9f14f84..8a9f16a7262e 100644
>> --- a/arch/powerpc/net/bpf_jit32.h
>> +++ b/arch/powerpc/net/bpf_jit32.h
>> @@ -76,13 +76,13 @@ DECLARE_LOAD_FUNC(sk_load_byte_msh);
>>  else {  PPC_ADDIS(r, base, IMM_HA(i));\
>>  PPC_LBZ(r, r, IMM_L(i)); } } while(0)
>> 
>> -#define PPC_LD_OFFS(r, base, i) do { if ((i) < 32768) PPC_LD(r, base, i);   
>>   \
>> +#define _OFFS(r, base, i) do { if ((i) < 32768) EMIT(PPC_ENCODE_LD(r, base, 
>> i)); \
>  ^
> Should be PPC_LD_OFFS. For the next version, please also build ppc32 and 
> booke codebase to confirm that your changes in those areas are fine.
>
> PPC_ENCODE_* also looks quite verbose, so perhaps PPC_ENC_* might be 
> better. Otherwise, this patchset looks good to me and should help reuse 
> some of those macros, especially from the eBPF codebase.
>
> Michael,
> Can you let us know if this looks ok to you? Based on your feedback, we 
> will also update the eBPF codebase.

I didn't really like the first patch which does the mass renaming. It
creates a huge amount of churn.

I think I'd be happier if this series just did what it needs, and then
maybe at the end there's a patch to update all the existing names, which
I may or may not take.

As far as the naming, currently we have:

PPC_INST_FOO - just the opcode

PPC_FOO(x) - macro to encode the opcode with x and (usually) also emit a
.long and stringify.

And you need an in-between that gives you the full instruction but
without the .long and stringify, right?

So how about PPC_RAW_FOO() for just the numeric value, without the .long
and stringify.

We also seem to have a lot of PPC_INST_FOO's that are only ever used in
the PPC_INST macro. I'm inclined to fold those into the PPC_INST macro,
to avoid people accidentally using the PPC_INST version when they don't
mean to. But that's a separate issue.

cheers


RE: [PATCH v4 02/25] mm/memory_hotplug: Allow check_hotplug_memory_addressable to be called from drivers

2020-04-01 Thread Alastair D'Silva
> -Original Message-
> From: Dan Williams 
> Sent: Wednesday, 1 April 2020 7:48 PM
> To: Alastair D'Silva 
> Cc: Aneesh Kumar K . V ; Oliver O'Halloran
> ; Benjamin Herrenschmidt
> ; Paul Mackerras ; Michael
> Ellerman ; Frederic Barrat ;
> Andrew Donnellan ; Arnd Bergmann
> ; Greg Kroah-Hartman ;
> Vishal Verma ; Dave Jiang
> ; Ira Weiny ; Andrew Morton
> ; Mauro Carvalho Chehab
> ; David S. Miller ;
> Rob Herring ; Anton Blanchard ;
> Krzysztof Kozlowski ; Mahesh Salgaonkar
> ; Madhavan Srinivasan
> ; Cédric Le Goater ; Anju T
> Sudhakar ; Hari Bathini
> ; Thomas Gleixner ; Greg
> Kurz ; Nicholas Piggin ; Masahiro
> Yamada ; Alexey Kardashevskiy
> ; Linux Kernel Mailing List ;
> linuxppc-dev ; linux-nvdimm  nvd...@lists.01.org>; Linux MM 
> Subject: Re: [PATCH v4 02/25] mm/memory_hotplug: Allow
> check_hotplug_memory_addressable to be called from drivers
> 
> On Sun, Mar 29, 2020 at 10:23 PM Alastair D'Silva 
> wrote:
> >
> > When setting up OpenCAPI connected persistent memory, the range check
> > may not be performed until quite late (or perhaps not at all, if the
> > user does not establish a DAX device).
> >
> > This patch makes the range check callable so we can perform the check
> > while probing the OpenCAPI Persistent Memory device.
> >
> > Signed-off-by: Alastair D'Silva 
> > Reviewed-by: Andrew Donnellan 
> > ---
> >  include/linux/memory_hotplug.h | 5 +
> >  mm/memory_hotplug.c| 4 ++--
> >  2 files changed, 7 insertions(+), 2 deletions(-)
> >
> > diff --git a/include/linux/memory_hotplug.h
> > b/include/linux/memory_hotplug.h index f4d59155f3d4..9a19ae0d7e31
> > 100644
> > --- a/include/linux/memory_hotplug.h
> > +++ b/include/linux/memory_hotplug.h
> > @@ -337,6 +337,11 @@ static inline void __remove_memory(int nid, u64
> > start, u64 size) {}  extern void set_zone_contiguous(struct zone
> > *zone);  extern void clear_zone_contiguous(struct zone *zone);
> >
> > +#ifdef CONFIG_MEMORY_HOTPLUG_SPARSE
> > +int check_hotplug_memory_addressable(unsigned long pfn,
> > +unsigned long nr_pages); #endif
> > +/* CONFIG_MEMORY_HOTPLUG_SPARSE */
> 
> Let's move this to include/linux/memory.h with the other
> CONFIG_MEMORY_HOTPLUG_SPARSE declarations, and add a dummy
> implementation for the CONFIG_MEMORY_HOTPLUG_SPARSE=n case.
> 
> Also, this patch can be squashed with the next one, no need for it to be
> stand alone.
> 

Ok

> 
> > +
> >  extern void __ref free_area_init_core_hotplug(int nid);  extern int
> > __add_memory(int nid, u64 start, u64 size);  extern int add_memory(int
> > nid, u64 start, u64 size); diff --git a/mm/memory_hotplug.c
> > b/mm/memory_hotplug.c index 0a54ffac8c68..14945f033594 100644
> > --- a/mm/memory_hotplug.c
> > +++ b/mm/memory_hotplug.c
> > @@ -276,8 +276,8 @@ static int check_pfn_span(unsigned long pfn,
> unsigned long nr_pages,
> > return 0;
> >  }
> >
> > -static int check_hotplug_memory_addressable(unsigned long pfn,
> > -   unsigned long nr_pages)
> > +int check_hotplug_memory_addressable(unsigned long pfn,
> > +unsigned long nr_pages)
> >  {
> > const u64 max_addr = PFN_PHYS(pfn + nr_pages) - 1;
> >
> > --
> > 2.24.1
> >

-- 
Alastair D'Silva   mob: 0423 762 819
skype: alastair_dsilva msn: alast...@d-silva.org
blog: http://alastair.d-silva.orgTwitter: @EvilDeece
 




RE: [PATCH v4 03/25] powerpc/powernv: Map & release OpenCAPI LPC memory

2020-04-01 Thread Alastair D'Silva
> -Original Message-
> From: Dan Williams 
> Sent: Wednesday, 1 April 2020 7:49 PM
> To: Alastair D'Silva 
> Cc: Aneesh Kumar K . V ; Oliver O'Halloran
> ; Benjamin Herrenschmidt
> ; Paul Mackerras ; Michael
> Ellerman ; Frederic Barrat ;
> Andrew Donnellan ; Arnd Bergmann
> ; Greg Kroah-Hartman ;
> Vishal Verma ; Dave Jiang
> ; Ira Weiny ; Andrew Morton
> ; Mauro Carvalho Chehab
> ; David S. Miller ;
> Rob Herring ; Anton Blanchard ;
> Krzysztof Kozlowski ; Mahesh Salgaonkar
> ; Madhavan Srinivasan
> ; Cédric Le Goater ; Anju T
> Sudhakar ; Hari Bathini
> ; Thomas Gleixner ; Greg
> Kurz ; Nicholas Piggin ; Masahiro
> Yamada ; Alexey Kardashevskiy
> ; Linux Kernel Mailing List ;
> linuxppc-dev ; linux-nvdimm  nvd...@lists.01.org>; Linux MM 
> Subject: Re: [PATCH v4 03/25] powerpc/powernv: Map & release OpenCAPI
> LPC memory
> 
> On Sun, Mar 29, 2020 at 10:23 PM Alastair D'Silva 
> wrote:
> >
> > This patch adds OPAL calls to powernv so that the OpenCAPI driver can
> > map & release LPC (Lowest Point of Coherency)  memory.
> >
> > Signed-off-by: Alastair D'Silva 
> > Reviewed-by: Andrew Donnellan 
> > ---
> >  arch/powerpc/include/asm/pnv-ocxl.h   |  2 ++
> >  arch/powerpc/platforms/powernv/ocxl.c | 43
> > +++
> >  2 files changed, 45 insertions(+)
> >
> > diff --git a/arch/powerpc/include/asm/pnv-ocxl.h
> > b/arch/powerpc/include/asm/pnv-ocxl.h
> > index 7de82647e761..560a19bb71b7 100644
> > --- a/arch/powerpc/include/asm/pnv-ocxl.h
> > +++ b/arch/powerpc/include/asm/pnv-ocxl.h
> > @@ -32,5 +32,7 @@ extern int
> pnv_ocxl_spa_remove_pe_from_cache(void
> > *platform_data, int pe_handle)
> >
> >  extern int pnv_ocxl_alloc_xive_irq(u32 *irq, u64 *trigger_addr);
> > extern void pnv_ocxl_free_xive_irq(u32 irq);
> > +u64 pnv_ocxl_platform_lpc_setup(struct pci_dev *pdev, u64 size); void
> > +pnv_ocxl_platform_lpc_release(struct pci_dev *pdev);
> >
> >  #endif /* _ASM_PNV_OCXL_H */
> > diff --git a/arch/powerpc/platforms/powernv/ocxl.c
> > b/arch/powerpc/platforms/powernv/ocxl.c
> > index 8c65aacda9c8..f13119a7c026 100644
> > --- a/arch/powerpc/platforms/powernv/ocxl.c
> > +++ b/arch/powerpc/platforms/powernv/ocxl.c
> > @@ -475,6 +475,49 @@ void pnv_ocxl_spa_release(void *platform_data)
> }
> > EXPORT_SYMBOL_GPL(pnv_ocxl_spa_release);
> >
> > +u64 pnv_ocxl_platform_lpc_setup(struct pci_dev *pdev, u64 size) {
> > +   struct pci_controller *hose = pci_bus_to_host(pdev->bus);
> > +   struct pnv_phb *phb = hose->private_data;
> 
> Is calling the local variable 'hose' instead of 'host' on purpose?
> 

Yes, this follows the convention used in other functions in this file.

> > +   u32 bdfn = pci_dev_id(pdev);
> > +   __be64 base_addr_be64;
> > +   u64 base_addr;
> > +   int rc;
> > +
> > +   rc = opal_npu_mem_alloc(phb->opal_id, bdfn, size,
> &base_addr_be64);
> > +   if (rc) {
> > +   dev_warn(&pdev->dev,
> > +"OPAL could not allocate LPC memory, rc=%d\n", rc);
> > +   return 0;
> > +   }
> > +
> > +   base_addr = be64_to_cpu(base_addr_be64);
> > +
> > +#ifdef CONFIG_MEMORY_HOTPLUG_SPARSE
> 
> With the proposed cleanup in patch2 the ifdef can be elided here.

Ok
> 
> > +   rc = check_hotplug_memory_addressable(base_addr >> PAGE_SHIFT,
> > + size >> PAGE_SHIFT);
> > +   if (rc)
> > +   return 0;
> 
> Is this an error worth logging if someone is wondering why their device is not
> showing up?
> 

Yes, I'll add a message.

> 
> > +#endif
> > +
> > +   return base_addr;
> > +}
> > +EXPORT_SYMBOL_GPL(pnv_ocxl_platform_lpc_setup);
> > +
> > +void pnv_ocxl_platform_lpc_release(struct pci_dev *pdev) {
> > +   struct pci_controller *hose = pci_bus_to_host(pdev->bus);
> > +   struct pnv_phb *phb = hose->private_data;
> > +   u32 bdfn = pci_dev_id(pdev);
> > +   int rc;
> > +
> > +   rc = opal_npu_mem_release(phb->opal_id, bdfn);
> > +   if (rc)
> > +   dev_warn(&pdev->dev,
> > +"OPAL reported rc=%d when releasing LPC
> > +memory\n", rc); }
> EXPORT_SYMBOL_GPL(pnv_ocxl_platform_lpc_release);
> > +
> >  int pnv_ocxl_spa_remove_pe_from_cache(void *platform_data, int
> > pe_handle)  {
> > struct spa_data *data = (struct spa_data *) platform_data;
> > --
> > 2.24.1
> >
> 
> 
> --
> This email has been checked for viruses by AVG.
> https://www.avg.com




Re: [PATCH v2 1/1] ppc/crash: Skip spinlocks during crash

2020-04-01 Thread kbuild test robot
Hi Leonardo,

Thank you for the patch! Yet something to improve:

[auto build test ERROR on tip/locking/core]
[also build test ERROR on powerpc/next paulus-powerpc/kvm-ppc-next v5.6 
next-20200401]
[if your patch is applied to the wrong git tree, please drop us a note to help
improve the system. BTW, we also suggest to use '--base' option to specify the
base tree in git format-patch, please see https://stackoverflow.com/a/37406982]

url:
https://github.com/0day-ci/linux/commits/Leonardo-Bras/ppc-crash-Skip-spinlocks-during-crash/20200327-105958
base:   https://git.kernel.org/pub/scm/linux/kernel/git/tip/tip.git 
8bf6c677ddb9c922423ea3bf494fe7c508bfbb8c
config: powerpc-randconfig-a001-20200401 (attached as .config)
compiler: powerpc-linux-gcc (GCC) 9.3.0
reproduce:
wget 
https://raw.githubusercontent.com/intel/lkp-tests/master/sbin/make.cross -O 
~/bin/make.cross
chmod +x ~/bin/make.cross
# save the attached .config to linux build tree
GCC_VERSION=9.3.0 make.cross ARCH=powerpc 

If you fix the issue, kindly add following tag
Reported-by: kbuild test robot 

All errors (new ones prefixed by >>):

   powerpc-linux-ld: arch/powerpc/kernel/traps.o: in function `arch_spin_lock':
>> arch/powerpc/include/asm/spinlock.h:147: undefined reference to 
>> `crash_skip_spinlock'
>> powerpc-linux-ld: arch/powerpc/include/asm/spinlock.h:147: undefined 
>> reference to `crash_skip_spinlock'
   powerpc-linux-ld: arch/powerpc/kernel/rtas.o: in function `arch_spin_lock':
>> arch/powerpc/include/asm/spinlock.h:147: undefined reference to 
>> `crash_skip_spinlock'
>> powerpc-linux-ld: arch/powerpc/include/asm/spinlock.h:147: undefined 
>> reference to `crash_skip_spinlock'
   powerpc-linux-ld: kernel/locking/lockdep.o: in function `arch_spin_lock':
>> arch/powerpc/include/asm/spinlock.h:147: undefined reference to 
>> `crash_skip_spinlock'
   powerpc-linux-ld: 
kernel/locking/lockdep.o:arch/powerpc/include/asm/spinlock.h:147: more 
undefined references to `crash_skip_spinlock' follow
>> pahole: .tmp_vmlinux.btf: No such file or directory
   powerpc-linux-objdump: '.tmp_vmlinux.btf': No such file
   powerpc-linux-objdump: '.tmp_vmlinux.btf': No such file
   powerpc-linux-objcopy: '.tmp_vmlinux.btf': No such file
   powerpc-linux-objcopy: --change-section-vma .BTF=0x never 
used
   powerpc-linux-objcopy: --change-section-lma .BTF=0x never 
used
   powerpc-linux-objcopy: '.btf.vmlinux.bin': No such file
   Failed to generate BTF for vmlinux
   Try to disable CONFIG_DEBUG_INFO_BTF

vim +147 arch/powerpc/include/asm/spinlock.h

   140  
   141  static inline void arch_spin_lock(arch_spinlock_t *lock)
   142  {
   143  while (1) {
   144  if (likely(__arch_spin_trylock(lock) == 0))
   145  break;
   146  do {
 > 147  if (unlikely(crash_skip_spinlock))
   148  return;
   149  HMT_low();
   150  if (is_shared_processor())
   151  splpar_spin_yield(lock);
   152  } while (unlikely(lock->slock != 0));
   153  HMT_medium();
   154  }
   155  }
   156  

---
0-DAY CI Kernel Test Service, Intel Corporation
https://lists.01.org/hyperkitty/list/kbuild-...@lists.01.org


.config.gz
Description: application/gzip


[powerpc:merge] BUILD SUCCESS d0c12846a3a24cd6d68b608c866712bc7e471634

2020-04-01 Thread kbuild test robot
tree/branch: https://git.kernel.org/pub/scm/linux/kernel/git/powerpc/linux.git  
merge
branch HEAD: d0c12846a3a24cd6d68b608c866712bc7e471634  Automatic merge of 
branch 'next' into merge

elapsed time: 879m

configs tested: 176
configs skipped: 0

The following configs have been built successfully.
More configs may be tested in the coming days.

arm  allmodconfig
arm   allnoconfig
arm  allyesconfig
arm64allmodconfig
arm64 allnoconfig
arm64allyesconfig
arm at91_dt_defconfig
arm   efm32_defconfig
arm  exynos_defconfig
armmulti_v5_defconfig
armmulti_v7_defconfig
armshmobile_defconfig
arm   sunxi_defconfig
arm64   defconfig
sparcallyesconfig
riscvnommu_virt_defconfig
ia64defconfig
powerpc defconfig
c6x  allyesconfig
powerpc   ppc64_defconfig
sparc64 defconfig
s390  allnoconfig
i386  allnoconfig
i386 alldefconfig
i386 allyesconfig
i386defconfig
ia64 allmodconfig
ia64  allnoconfig
ia64 allyesconfig
ia64 alldefconfig
c6xevmc6678_defconfig
nios2 10m50_defconfig
nios2 3c120_defconfig
openriscor1ksim_defconfig
openrisc simple_smp_defconfig
xtensa   common_defconfig
xtensa  iss_defconfig
alpha   defconfig
cskydefconfig
nds32 allnoconfig
nds32   defconfig
h8300 edosk2674_defconfig
h8300h8300h-sim_defconfig
h8300   h8s-sim_defconfig
m68k allmodconfig
m68k   m5475evb_defconfig
m68k  multi_defconfig
m68k   sun3_defconfig
arc  allyesconfig
arc defconfig
microblaze  mmu_defconfig
microblazenommu_defconfig
powerpc   allnoconfig
powerpc  rhel-kconfig
mips  fuloong2e_defconfig
mips  malta_kvm_defconfig
mips allyesconfig
mips 64r6el_defconfig
mips  allnoconfig
mips   32r2_defconfig
mips allmodconfig
pariscallnoconfig
parisc   allyesconfig
pariscgeneric-32bit_defconfig
pariscgeneric-64bit_defconfig
x86_64   randconfig-a001-20200401
x86_64   randconfig-a002-20200401
x86_64   randconfig-a003-20200401
i386 randconfig-a001-20200401
i386 randconfig-a002-20200401
i386 randconfig-a003-20200401
alpharandconfig-a001-20200401
m68k randconfig-a001-20200401
mips randconfig-a001-20200401
nds32randconfig-a001-20200401
parisc   randconfig-a001-20200401
riscvrandconfig-a001-20200401
mips randconfig-a001-20200402
c6x  randconfig-a001-20200401
h8300randconfig-a001-20200401
microblaze   randconfig-a001-20200401
nios2randconfig-a001-20200401
sparc64  randconfig-a001-20200401
c6x  randconfig-a001-20200402
h8300randconfig-a001-20200402
microblaze   randconfig-a001-20200402
nios2randconfig-a001-20200402
sparc64  randconfig-a001-20200402
csky randconfig-a001-20200401
openrisc randconfig-a001-20200401
s390 randconfig-a001-20200401
sh   randconfig-a001-20200401
xtensa   randconfig-a001-20200401
x86_64   randconfig-b001-20200402
x86_64   randconfig-b002-20200402
x86_64   randconfig-b003-20200402
i386 randconfig-b001-20200402
i386 randconfig-b002-20200402
i386 randconfig-b003-20200402
x86_64   randconfig-b001-20200401
x86_64   randconfig-b00

[powerpc:next-test] BUILD SUCCESS 824a2d10fcf429689cd20d7d36eeb24697466c9b

2020-04-01 Thread kbuild test robot
tree/branch: https://git.kernel.org/pub/scm/linux/kernel/git/powerpc/linux.git  
next-test
branch HEAD: 824a2d10fcf429689cd20d7d36eeb24697466c9b  powerpc/perf: split 
callchain.c by bitness

elapsed time: 1004m

configs tested: 192
configs skipped: 0

The following configs have been built successfully.
More configs may be tested in the coming days.

arm  allmodconfig
arm   allnoconfig
arm  allyesconfig
arm64allmodconfig
arm64 allnoconfig
arm64allyesconfig
arm at91_dt_defconfig
arm   efm32_defconfig
arm  exynos_defconfig
armmulti_v5_defconfig
armmulti_v7_defconfig
armshmobile_defconfig
arm   sunxi_defconfig
arm64   defconfig
sparcallyesconfig
h8300h8300h-sim_defconfig
s390  allnoconfig
shallnoconfig
m68k   m5475evb_defconfig
c6x  allyesconfig
powerpc   ppc64_defconfig
ia64defconfig
powerpc defconfig
sparc64 defconfig
i386  allnoconfig
i386 alldefconfig
i386 allyesconfig
i386defconfig
ia64 alldefconfig
ia64 allmodconfig
ia64  allnoconfig
ia64 allyesconfig
nios2 3c120_defconfig
nios2 10m50_defconfig
c6xevmc6678_defconfig
xtensa  iss_defconfig
xtensa   common_defconfig
openrisc simple_smp_defconfig
openriscor1ksim_defconfig
alpha   defconfig
cskydefconfig
nds32 allnoconfig
nds32   defconfig
h8300 edosk2674_defconfig
h8300   h8s-sim_defconfig
m68k allmodconfig
m68k  multi_defconfig
m68k   sun3_defconfig
arc  allyesconfig
arc defconfig
microblaze  mmu_defconfig
microblazenommu_defconfig
powerpc   allnoconfig
powerpc  rhel-kconfig
mips   32r2_defconfig
mips 64r6el_defconfig
mips allmodconfig
mips  allnoconfig
mips allyesconfig
mips  fuloong2e_defconfig
mips  malta_kvm_defconfig
pariscallnoconfig
parisc   allyesconfig
pariscgeneric-32bit_defconfig
pariscgeneric-64bit_defconfig
x86_64   randconfig-a001-20200401
x86_64   randconfig-a002-20200401
x86_64   randconfig-a003-20200401
i386 randconfig-a001-20200401
i386 randconfig-a002-20200401
i386 randconfig-a003-20200401
alpharandconfig-a001-20200401
m68k randconfig-a001-20200401
mips randconfig-a001-20200401
nds32randconfig-a001-20200401
parisc   randconfig-a001-20200401
riscvrandconfig-a001-20200401
mips randconfig-a001-20200402
nds32randconfig-a001-20200402
m68k randconfig-a001-20200402
alpharandconfig-a001-20200402
parisc   randconfig-a001-20200402
riscvrandconfig-a001-20200402
microblaze   randconfig-a001-20200331
h8300randconfig-a001-20200331
nios2randconfig-a001-20200331
c6x  randconfig-a001-20200331
sparc64  randconfig-a001-20200331
c6x  randconfig-a001-20200401
h8300randconfig-a001-20200401
microblaze   randconfig-a001-20200401
nios2randconfig-a001-20200401
sparc64  randconfig-a001-20200401
c6x  randconfig-a001-20200402
h8300randconfig-a001-20200402
microblaze   randconfig-a001-20200402
nios2randconfig-a001-20200402
sparc64  randconfig-a001-20200402
s390 randconfig-a001-20200401
xtensa   randconfig-a001-20200401
csky randconfig-a001-20200401

Re: [PATCH V2 3/5] selftests/powerpc: Add NX-GZIP engine compress testcase

2020-04-01 Thread Daniel Axtens
Raphael Moreira Zinsly  writes:

> Add a compression testcase for the powerpc NX-GZIP engine.
>
> Signed-off-by: Bulent Abali 
> Signed-off-by: Raphael Moreira Zinsly 
> ---
>  .../selftests/powerpc/nx-gzip/Makefile|  21 +
>  .../selftests/powerpc/nx-gzip/gzfht_test.c| 489 ++
>  .../selftests/powerpc/nx-gzip/gzip_vas.c  | 259 ++
>  3 files changed, 769 insertions(+)
>  create mode 100644 tools/testing/selftests/powerpc/nx-gzip/Makefile
>  create mode 100644 tools/testing/selftests/powerpc/nx-gzip/gzfht_test.c
>  create mode 100644 tools/testing/selftests/powerpc/nx-gzip/gzip_vas.c
>
> diff --git a/tools/testing/selftests/powerpc/nx-gzip/Makefile 
> b/tools/testing/selftests/powerpc/nx-gzip/Makefile
> new file mode 100644
> index ..ab903f63bbbd
> --- /dev/null
> +++ b/tools/testing/selftests/powerpc/nx-gzip/Makefile
> @@ -0,0 +1,21 @@
> +CC = gcc
> +CFLAGS = -O3
> +INC = ./inc
> +SRC = gzfht_test.c
> +OBJ = $(SRC:.c=.o)
> +TESTS = gzfht_test
> +EXTRA_SOURCES = gzip_vas.c
> +
> +all: $(TESTS)
> +
> +$(OBJ): %.o: %.c
> + $(CC) $(CFLAGS) -I$(INC) -c $<
> +
> +$(TESTS): $(OBJ)
> + $(CC) $(CFLAGS) -I$(INC) -o $@ $@.o $(EXTRA_SOURCES)
> +
> +run_tests: $(TESTS)
> + ./gzfht_test gzip_vas.c
> +
> +clean:
> + rm -f $(TESTS) *.o *~ *.gz
> diff --git a/tools/testing/selftests/powerpc/nx-gzip/gzfht_test.c 
> b/tools/testing/selftests/powerpc/nx-gzip/gzfht_test.c
> new file mode 100644
> index ..7a21c25f5611
> --- /dev/null
> +++ b/tools/testing/selftests/powerpc/nx-gzip/gzfht_test.c
> @@ -0,0 +1,489 @@
> +// SPDX-License-Identifier: GPL-2.0-or-later
> +
> +/* P9 gzip sample code for demonstrating the P9 NX hardware interface.
> + * Not intended for productive uses or for performance or compression
> + * ratio measurements.  For simplicity of demonstration, this sample
> + * code compresses in to fixed Huffman blocks only (Deflate btype=1)
> + * and has very simple memory management.  Dynamic Huffman blocks
> + * (Deflate btype=2) are more involved as detailed in the user guide.
> + * Note also that /dev/crypto/gzip, VAS and skiboot support are
> + * required.
> + *
> + * Copyright 2020 IBM Corp.
> + *
> + * https://github.com/libnxz/power-gzip for zlib api and other utils
> + *
> + * Author: Bulent Abali 
> + *
> + * Definitions of acronyms used here. See
> + * P9 NX Gzip Accelerator User's Manual for details:
> + * https://github.com/libnxz/power-gzip/blob/develop/doc/power_nx_gzip_um.pdf
> + *
> + * adler/crc: 32 bit checksums appended to stream tail
> + * ce:   completion extension
> + * cpb:  coprocessor parameter block (metadata)
> + * crb:  coprocessor request block (command)
> + * csb:  coprocessor status block (status)
> + * dht:  dynamic huffman table
> + * dde:  data descriptor element (address, length)
> + * ddl:  list of ddes
> + * dh/fh:dynamic and fixed huffman types
> + * fc:   coprocessor function code
> + * histlen:  history/dictionary length
> + * history:  sliding window of up to 32KB of data
> + * lzcount:  Deflate LZ symbol counts
> + * rembytecnt: remaining byte count
> + * sfbt: source final block type; last block's type during decomp
> + * spbc: source processed byte count
> + * subc: source unprocessed bit count
> + * tebc: target ending bit count; valid bits in the last byte
> + * tpbc: target processed byte count
> + * vas:  virtual accelerator switch; the user mode interface
> + */
> +
> +
> +#include 
> +#include 
> +#include 
> +#include 
> +#include 
> +#include 
> +#include 
> +#include 
> +#include 
> +#include 
> +#include 
> +#include 
> +#include 
> +#include 
> +#include 
> +#include 
> +#include "nxu.h"
> +#include "nx.h"
> +
> +int nx_dbg;
> +FILE *nx_gzip_log;
> +void *nx_fault_storage_address;
> +
> +#define NX_MIN(X, Y) (((X) < (Y)) ? (X) : (Y))
> +#define FNAME_MAX 1024
> +#define FEXT ".nx.gz"
> +
> +/*
> + * LZ counts returned in the user supplied nx_gzip_crb_cpb_t structure.
> + */
> +static int compress_fht_sample(char *src, uint32_t srclen, char *dst,
> + uint32_t dstlen, int with_count,
> + struct nx_gzip_crb_cpb_t *cmdp, void *handle)
> +{
> + int cc;
> + uint32_t fc;
> +
> + assert(!!cmdp);
> +
> + put32(cmdp->crb, gzip_fc, 0);  /* clear */
> + fc = (with_count) ? GZIP_FC_COMPRESS_RESUME_FHT_COUNT :
> + GZIP_FC_COMPRESS_RESUME_FHT;
> + putnn(cmdp->crb, gzip_fc, fc);
> + putnn(cmdp->cpb, in_histlen, 0); /* resuming with no history */
> + memset((void *) &cmdp->crb.csb, 0, sizeof(cmdp->crb.csb));
> +
> + /* Section 6.6 programming notes; spbc may be in two different
> +  * places depending on FC.
> +  */
> + if (!with_count)
> + put32(cmdp->cpb, out_spbc_comp, 0);
> + else
> + put32(cmdp->cpb, out_spbc_comp_with_count, 0);
> +
> + /* Figure 6-3 6-4; CSB location */
> + put

Re: [PATCH v4 06/25] ocxl: Tally up the LPC memory on a link & allow it to be mapped

2020-04-01 Thread Andrew Donnellan

On 1/4/20 7:48 pm, Dan Williams wrote:

On Sun, Mar 29, 2020 at 10:53 PM Alastair D'Silva  wrote:


OpenCAPI LPC memory is allocated per link, but each link supports
multiple AFUs, and each AFU can have LPC memory assigned to it.


Is there an OpenCAPI primer to decode these objects and their
associations that I can reference?


There isn't presently a primer that I think addresses these questions 
nicely (to my knowledge - Fred might have something he can link to?) - 
there are the specs published by the OpenCAPI Consortium at 
https://opencapi.org but they're really for hardware implementers.


We should probably expand what's currently documented in 
Documentation/userspace-api/accelerators/ocxl.rst generally, and this 
series should probably update that to include details on LPC.


To explain the specific objects here:

- A "link" is a point-to-point link between the host CPU, and a single 
OpenCAPI card. (We don't currently support cards making use of multiple 
links for increased bandwidth, though that is supported from a hardware 
point of view.)


- On POWER9, each link appears as a separate PCI domain, with a single 
bus, and the card appears as a single device.


- A device can have up to 8 functions, per PCI.

- An Attached Functional Unit (AFU) is the abstraction for a particular 
application function. Each PCI function defines the number of AFUs it 
has through a set of OpenCAPI-specific DVSECs, max 64 per function. The 
ocxl driver handles AFU discovery.


- On the host side, LPC memory is mapped by setting a single BAR for the 
whole link, but on the device side, LPC memory is requested on a per-AFU 
basis, through an AFU descriptor that is exposed through the 
aforementioned DVSECs. Hence the need to loop through the AFUs and get 
the total required LPC memory to work out the correct BAR value.


--
Andrew Donnellan  OzLabs, ADL Canberra
a...@linux.ibm.com IBM Australia Limited



Re: [PATCH v4 14/25] nvdimm/ocxl: Add support for Admin commands

2020-04-01 Thread Dan Williams
On Sun, Mar 29, 2020 at 10:23 PM Alastair D'Silva  wrote:
>
> Admin commands for these devices are the primary means of interacting
> with the device controller to provide functionality beyond the load/store
> capabilities offered via the NPU.
>
> For example, SMART data, firmware update, and device error logs are
> implemented via admin commands.
>
> This patch requests the metadata required to issue admin commands, as well
> as some helper functions to construct and check the completion of the
> commands.
>
> Signed-off-by: Alastair D'Silva 
> ---
>  drivers/nvdimm/ocxl/main.c  |  65 ++
>  drivers/nvdimm/ocxl/ocxlpmem.h  |  50 -
>  drivers/nvdimm/ocxl/ocxlpmem_internal.c | 261 
>  3 files changed, 375 insertions(+), 1 deletion(-)
>
> diff --git a/drivers/nvdimm/ocxl/main.c b/drivers/nvdimm/ocxl/main.c
> index be76acd33d74..8db573036423 100644
> --- a/drivers/nvdimm/ocxl/main.c
> +++ b/drivers/nvdimm/ocxl/main.c
> @@ -217,6 +217,58 @@ static int register_lpc_mem(struct ocxlpmem *ocxlpmem)
> return 0;
>  }
>
> +/**
> + * extract_command_metadata() - Extract command data from MMIO & save it for 
> further use
> + * @ocxlpmem: the device metadata
> + * @offset: The base address of the command data structures (address of 
> CREQO)
> + * @command_metadata: A pointer to the command metadata to populate
> + * Return: 0 on success, negative on failure
> + */
> +static int extract_command_metadata(struct ocxlpmem *ocxlpmem, u32 offset,
> +   struct command_metadata *command_metadata)

How about "struct ocxlpmem *ocp" throughout all these patches? The
full duplication of the type name as the local variable name makes
this look like non-idiomatic Linux code to me. It had not quite hit me
until I saw "struct command_metadata *command_metadata" that just
strikes me as too literal and the person that gets to maintain this
code later will appreciate a smaller amount of typing.

Also, is it really the case that the layout of the admin command
metadata needs to be programmatically determined at runtime? I would
expect it to be a static command definition in the spec.


> +{
> +   int rc;
> +   u64 tmp;
> +
> +   rc = ocxl_global_mmio_read64(ocxlpmem->ocxl_afu, offset,
> +OCXL_LITTLE_ENDIAN, &tmp);
> +   if (rc)
> +   return rc;
> +
> +   command_metadata->request_offset = tmp >> 32;
> +   command_metadata->response_offset = tmp & 0x;
> +
> +   rc = ocxl_global_mmio_read64(ocxlpmem->ocxl_afu, offset + 8,
> +OCXL_LITTLE_ENDIAN, &tmp);
> +   if (rc)
> +   return rc;
> +
> +   command_metadata->data_offset = tmp >> 32;
> +   command_metadata->data_size = tmp & 0x;
> +
> +   command_metadata->id = 0;
> +
> +   return 0;
> +}
> +
> +/**
> + * setup_command_metadata() - Set up the command metadata
> + * @ocxlpmem: the device metadata
> + */
> +static int setup_command_metadata(struct ocxlpmem *ocxlpmem)
> +{
> +   int rc;
> +
> +   mutex_init(&ocxlpmem->admin_command.lock);
> +
> +   rc = extract_command_metadata(ocxlpmem, GLOBAL_MMIO_ACMA_CREQO,
> + &ocxlpmem->admin_command);
> +   if (rc)
> +   return rc;
> +
> +   return 0;
> +}
> +
>  /**
>   * allocate_minor() - Allocate a minor number to use for an OpenCAPI pmem 
> device
>   * @ocxlpmem: the device metadata
> @@ -421,6 +473,14 @@ static int probe(struct pci_dev *pdev, const struct 
> pci_device_id *ent)
>
> ocxlpmem->pdev = pci_dev_get(pdev);
>
> +   ocxlpmem->timeouts[ADMIN_COMMAND_ERRLOG] = 2000; // ms
> +   ocxlpmem->timeouts[ADMIN_COMMAND_HEARTBEAT] = 100; // ms
> +   ocxlpmem->timeouts[ADMIN_COMMAND_SMART] = 100; // ms
> +   ocxlpmem->timeouts[ADMIN_COMMAND_CONTROLLER_DUMP] = 1000; // ms
> +   ocxlpmem->timeouts[ADMIN_COMMAND_CONTROLLER_STATS] = 100; // ms
> +   ocxlpmem->timeouts[ADMIN_COMMAND_SHUTDOWN] = 1000; // ms
> +   ocxlpmem->timeouts[ADMIN_COMMAND_FW_UPDATE] = 16000; // ms
> +
> pci_set_drvdata(pdev, ocxlpmem);
>
> ocxlpmem->ocxl_fn = ocxl_function_open(pdev);
> @@ -467,6 +527,11 @@ static int probe(struct pci_dev *pdev, const struct 
> pci_device_id *ent)
> goto err;
> }
>
> +   if (setup_command_metadata(ocxlpmem)) {
> +   dev_err(&pdev->dev, "Could not read command metadata\n");
> +   goto err;
> +   }
> +
> elapsed = 0;
> timeout = ocxlpmem->readiness_timeout +
>   ocxlpmem->memory_available_timeout;
> diff --git a/drivers/nvdimm/ocxl/ocxlpmem.h b/drivers/nvdimm/ocxl/ocxlpmem.h
> index 3eadbe19f6d0..b72b3f909fc3 100644
> --- a/drivers/nvdimm/ocxl/ocxlpmem.h
> +++ b/drivers/nvdimm/ocxl/ocxlpmem.h
> @@ -7,6 +7,7 @@
>  #include 
>
>  #define LABEL_AREA_SIZEBIT_ULL(PA_SECTION_SHIFT)
> +#define DEFAU

<    1   2