Re: [RFC PATCH v2] powerpc/xmon: restrict when kernel is locked down

2019-06-02 Thread Andrew Donnellan

On 24/5/19 10:38 pm, Christopher M. Riedl wrote:

Xmon should be either fully or partially disabled depending on the
kernel lockdown state.

Put xmon into read-only mode for lockdown=integrity and completely
disable xmon when lockdown=confidentiality. Xmon checks the lockdown
state and takes appropriate action:

  (1) during xmon_setup to prevent early xmon'ing

  (2) when triggered via sysrq

  (3) when toggled via debugfs

  (4) when triggered via a previously enabled breakpoint

The following lockdown state transitions are handled:

  (1) lockdown=none -> lockdown=integrity
  clear all breakpoints, set xmon read-only mode

  (2) lockdown=none -> lockdown=confidentiality
  clear all breakpoints, prevent re-entry into xmon

  (3) lockdown=integrity -> lockdown=confidentiality
  prevent re-entry into xmon

Suggested-by: Andrew Donnellan 
Signed-off-by: Christopher M. Riedl 
---

Applies on top of this series:
https://patchwork.kernel.org/cover/10884631/

I've done some limited testing of the scenarios mentioned in the commit
message on a single CPU QEMU config.

v1->v2:
Fix subject line
Submit to linuxppc-dev and kernel-hardening

  arch/powerpc/xmon/xmon.c | 56 +++-
  1 file changed, 55 insertions(+), 1 deletion(-)

diff --git a/arch/powerpc/xmon/xmon.c b/arch/powerpc/xmon/xmon.c
index 3e7be19aa208..8c4a5a0c28f0 100644
--- a/arch/powerpc/xmon/xmon.c
+++ b/arch/powerpc/xmon/xmon.c
@@ -191,6 +191,9 @@ static void dump_tlb_44x(void);
  static void dump_tlb_book3e(void);
  #endif
  
+static void clear_all_bpt(void);

+static void xmon_init(int);
+
  #ifdef CONFIG_PPC64
  #define REG   "%.16lx"
  #else
@@ -291,6 +294,39 @@ Commands:\n\
zh  halt\n"
  ;
  
+#ifdef CONFIG_LOCK_DOWN_KERNEL

+static bool xmon_check_lockdown(void)
+{
+   static bool lockdown = false;
+
+   if (!lockdown) {
+   lockdown = kernel_is_locked_down("Using xmon",
+LOCKDOWN_CONFIDENTIALITY);
+   if (lockdown) {
+   printf("xmon: Disabled by strict kernel lockdown\n");
+   xmon_on = 0;
+   xmon_init(0);
+   }
+   }
+
+   if (!xmon_is_ro) {
+   xmon_is_ro = kernel_is_locked_down("Using xmon write-access",
+  LOCKDOWN_INTEGRITY);
+   if (xmon_is_ro) {
+   printf("xmon: Read-only due to kernel lockdown\n");
+   clear_all_bpt();


Remind me again why we need to clear breakpoints in integrity mode?


Andrew



+   }
+   }
+
+   return lockdown;
+}
+#else
+inline static bool xmon_check_lockdown(void)
+{
+   return false;
+}
+#endif /* CONFIG_LOCK_DOWN_KERNEL */
+
  static struct pt_regs *xmon_regs;
  
  static inline void sync(void)

@@ -708,6 +744,9 @@ static int xmon_bpt(struct pt_regs *regs)
struct bpt *bp;
unsigned long offset;
  
+	if (xmon_check_lockdown())

+   return 0;
+
if ((regs->msr & (MSR_IR|MSR_PR|MSR_64BIT)) != (MSR_IR|MSR_64BIT))
return 0;
  
@@ -739,6 +778,9 @@ static int xmon_sstep(struct pt_regs *regs)
  
  static int xmon_break_match(struct pt_regs *regs)

  {
+   if (xmon_check_lockdown())
+   return 0;
+
if ((regs->msr & (MSR_IR|MSR_PR|MSR_64BIT)) != (MSR_IR|MSR_64BIT))
return 0;
if (dabr.enabled == 0)
@@ -749,6 +791,9 @@ static int xmon_break_match(struct pt_regs *regs)
  
  static int xmon_iabr_match(struct pt_regs *regs)

  {
+   if (xmon_check_lockdown())
+   return 0;
+
if ((regs->msr & (MSR_IR|MSR_PR|MSR_64BIT)) != (MSR_IR|MSR_64BIT))
return 0;
if (iabr == NULL)
@@ -3742,6 +3787,9 @@ static void xmon_init(int enable)
  #ifdef CONFIG_MAGIC_SYSRQ
  static void sysrq_handle_xmon(int key)
  {
+   if (xmon_check_lockdown())
+   return;
+
/* ensure xmon is enabled */
xmon_init(1);
debugger(get_irq_regs());
@@ -3763,7 +3811,6 @@ static int __init setup_xmon_sysrq(void)
  device_initcall(setup_xmon_sysrq);
  #endif /* CONFIG_MAGIC_SYSRQ */
  
-#ifdef CONFIG_DEBUG_FS

  static void clear_all_bpt(void)
  {
int i;
@@ -3785,8 +3832,12 @@ static void clear_all_bpt(void)
printf("xmon: All breakpoints cleared\n");
  }
  
+#ifdef CONFIG_DEBUG_FS

  static int xmon_dbgfs_set(void *data, u64 val)
  {
+   if (xmon_check_lockdown())
+   return 0;
+
xmon_on = !!val;
xmon_init(xmon_on);
  
@@ -3845,6 +3896,9 @@ early_param("xmon", early_parse_xmon);
  
  void __init xmon_setup(void)

  {
+   if (xmon_check_lockdown())
+   return;
+
if (xmon_on)
xmon_init(1);
if (xmon_early)



--
Andrew Donnellan  OzLabs, ADL Canberra
a...@linux.ibm.com IBM Australia Limited



Re: [PATCH v3 1/3] PCI: Introduce pcibios_ignore_alignment_request

2019-06-02 Thread Alexey Kardashevskiy



On 03/06/2019 12:23, Shawn Anastasio wrote:
> 
> 
> On 5/30/19 10:56 PM, Alexey Kardashevskiy wrote:
>>
>>
>> On 31/05/2019 08:49, Shawn Anastasio wrote:
>>> On 5/29/19 10:39 PM, Alexey Kardashevskiy wrote:


 On 28/05/2019 17:39, Shawn Anastasio wrote:
>
>
> On 5/28/19 1:27 AM, Alexey Kardashevskiy wrote:
>>
>>
>> On 28/05/2019 15:36, Oliver wrote:
>>> On Tue, May 28, 2019 at 2:03 PM Shawn Anastasio 
>>> wrote:

 Introduce a new pcibios function pcibios_ignore_alignment_request
 which allows the PCI core to defer to platform-specific code to
 determine whether or not to ignore alignment requests for PCI
 resources.

 The existing behavior is to simply ignore alignment requests when
 PCI_PROBE_ONLY is set. This is behavior is maintained by the
 default implementation of pcibios_ignore_alignment_request.

 Signed-off-by: Shawn Anastasio 
 ---
     drivers/pci/pci.c   | 9 +++--
     include/linux/pci.h | 1 +
     2 files changed, 8 insertions(+), 2 deletions(-)

 diff --git a/drivers/pci/pci.c b/drivers/pci/pci.c
 index 8abc843b1615..8207a09085d1 100644
 --- a/drivers/pci/pci.c
 +++ b/drivers/pci/pci.c
 @@ -5882,6 +5882,11 @@ resource_size_t __weak
 pcibios_default_alignment(void)
    return 0;
     }

 +int __weak pcibios_ignore_alignment_request(void)
 +{
 +   return pci_has_flag(PCI_PROBE_ONLY);
 +}
 +
     #define RESOURCE_ALIGNMENT_PARAM_SIZE COMMAND_LINE_SIZE
     static char
 resource_alignment_param[RESOURCE_ALIGNMENT_PARAM_SIZE] = {0};
     static DEFINE_SPINLOCK(resource_alignment_lock);
 @@ -5906,9 +5911,9 @@ static resource_size_t
 pci_specified_resource_alignment(struct pci_dev *dev,
    p = resource_alignment_param;
    if (!*p && !align)
    goto out;
 -   if (pci_has_flag(PCI_PROBE_ONLY)) {
 +   if (pcibios_ignore_alignment_request()) {
    align = 0;
 -   pr_info_once("PCI: Ignoring requested alignments
 (PCI_PROBE_ONLY)\n");
 +   pr_info_once("PCI: Ignoring requested
 alignments\n");
    goto out;
    }
>>>
>>> I think the logic here is questionable to begin with. If the user
>>> has
>>> explicitly requested re-aligning a resource via the command line
>>> then
>>> we should probably do it even if PCI_PROBE_ONLY is set. When it
>>> breaks
>>> they get to keep the pieces.
>>>
>>> That said, the real issue here is that PCI_PROBE_ONLY probably
>>> shouldn't be set under qemu/kvm. Under the other hypervisor
>>> (PowerVM)
>>> hotplugged devices are configured by firmware before it's passed to
>>> the guest and we need to keep the FW assignments otherwise things
>>> break. QEMU however doesn't do any BAR assignments and relies on
>>> that
>>> being handled by the guest. At boot time this is done by SLOF, but
>>> Linux only keeps SLOF around until it's extracted the device-tree.
>>> Once that's done SLOF gets blown away and the kernel needs to do
>>> it's
>>> own BAR assignments. I'm guessing there's a hack in there to make it
>>> work today, but it's a little surprising that it works at all...
>>
>>
>> The hack is to run a modified qemu-aware "/usr/sbin/rtas_errd" in the
>> guest which receives an event from qemu (RAS_EPOW from
>> /proc/interrupts), fetches device tree chunks (and as I understand
>> it -
>> they come with BARs from phyp but without from qemu) and writes
>> "1" to
>> "/sys/bus/pci/rescan" which calls pci_assign_resource() eventually:
>
> Interesting. Does this mean that the PHYP hotplug path doesn't
> call pci_assign_resource?


 I'd expect dlpar_add_slot() to be called under phyp and eventually
 pci_device_add() which (I think) may or may not trigger later
 reassignment.


> If so it means the patch may not
> break that platform after all, though it still may not be
> the correct way of doing things.


 We should probably stop enforcing the PCI_PROBE_ONLY flag - it seems
 that (unless resource_alignment= is used) the pseries guest should just
 walk through all allocated resources and leave them unchanged.
>>>
>>> If we add a pcibios_default_alignment() implementation like was
>>> suggested earlier, then it will behave as if the user has
>>> specified resource_alignment= by default and SLOF's assignments
>>> won't be honored (I think).
>>
>>
>> I removed pci_add_flags(PCI_PROBE_ONLY) from pSeries_setup_arch and
>> tried booting with and without 

Re: [RFC] mm: Generalize notify_page_fault()

2019-06-02 Thread Anshuman Khandual



On 05/31/2019 11:18 PM, Matthew Wilcox wrote:
> On Fri, May 31, 2019 at 02:17:43PM +0530, Anshuman Khandual wrote:
>> On 05/30/2019 07:09 PM, Matthew Wilcox wrote:
>>> On Thu, May 30, 2019 at 05:31:15PM +0530, Anshuman Khandual wrote:
 On 05/30/2019 04:36 PM, Matthew Wilcox wrote:
> The two handle preemption differently.  Why is x86 wrong and this one
> correct?

 Here it expects context to be already non-preemptible where as the proposed
 generic function makes it non-preemptible with a preempt_[disable|enable]()
 pair for the required code section, irrespective of it's present state. Is
 not this better ?
>>>
>>> git log -p arch/x86/mm/fault.c
>>>
>>> search for 'kprobes'.
>>>
>>> tell me what you think.
>>
>> Are you referring to these following commits
>>
>> a980c0ef9f6d ("x86/kprobes: Refactor kprobes_fault() like 
>> kprobe_exceptions_notify()")
>> b506a9d08bae ("x86: code clarification patch to Kprobes arch code")
>>
>> In particular the later one (b506a9d08bae). It explains how the invoking 
>> context
>> in itself should be non-preemptible for the kprobes processing context 
>> irrespective
>> of whether kprobe_running() or perhaps smp_processor_id() is safe or not. 
>> Hence it
>> does not make much sense to continue when original invoking context is 
>> preemptible.
>> Instead just bail out earlier. This seems to be making more sense than 
>> preempt
>> disable-enable pair. If there are no concerns about this change from other 
>> platforms,
>> I will change the preemption behavior in proposed generic function next time 
>> around.
> 
> Exactly.
> 
> So, any of the arch maintainers know of a reason they behave differently
> from x86 in this regard?  Or can Anshuman use the x86 implementation
> for all the architectures supporting kprobes?

So the generic notify_page_fault() will be like this.

int __kprobes notify_page_fault(struct pt_regs *regs, unsigned int trap)
{
int ret = 0;

/*
 * To be potentially processing a kprobe fault and to be allowed
 * to call kprobe_running(), we have to be non-preemptible.
 */
if (kprobes_built_in() && !preemptible() && !user_mode(regs)) {
if (kprobe_running() && kprobe_fault_handler(regs, trap))
ret = 1;
}
return ret;
}


RE: [next-20190530] Boot failure on PowerPC

2019-06-02 Thread Lili Deng (Wicresoft North America Ltd)
Hi Sachin,

I verified below patch against Ubuntu 18.04, didn't hit the kernel panic any 
more, could you please let know how did you verify?

Thanks,
Lili
-Original Message-
From: Sachin Sant  
Sent: Monday, June 3, 2019 11:12 AM
To: Dexuan-Linux Cui 
Cc: Michael Ellerman ; linuxppc-dev@lists.ozlabs.org; 
linux-n...@vger.kernel.org; Dexuan Cui ; Lili Deng 
(Wicresoft North America Ltd) 
Subject: Re: [next-20190530] Boot failure on PowerPC



> On 31-May-2019, at 11:43 PM, Dexuan-Linux Cui  wrote:
> 
> On Fri, May 31, 2019 at 6:52 AM Michael Ellerman  wrote:
>>> 
>>> Machine boots till login prompt and then panics few seconds later.
>>> 
>>> Last known next build was May 24th. Will attempt few builds till May 
>>> 30 to narrow down this problem.
>> 
>> My CI was fine with next-20190529 (9a15d2e3fd03e3).
>> 
>> cheers
> 
> Hi Sachin,
> It looks this patch may fix the issue:
> https://nam06.safelinks.protection.outlook.com/?url=https%3A%2F%2Flkml.org%2Flkml%2F2019%2F5%2F30%2F1630data=02%7C01%7Cv-lide%40microsoft.com%7C66e1ef6017aa461703f808d6e7d148cd%7C72f988bf86f141af91ab2d7cd011db47%7C1%7C1%7C636951283393233385sdata=IJFhtvL2Bd87HCoMZ7oWL%2Bar6NY%2FfPbmdCZMT%2BJz5t4%3Dreserved=0
>  , but I'm not sure.

It does not help fix the kernel panic issue, but it fixes the get_swap_device 
warning messages during the boot.

Thanks
-Sachin



Re: [PATCH] scsi: ibmvscsi: Don't use rc uninitialized in ibmvscsi_do_work

2019-06-02 Thread Nathan Chancellor
Hi Michael,

On Sun, Jun 02, 2019 at 08:15:38PM +1000, Michael Ellerman wrote:
> Hi Nathan,
> 
> It's always preferable IMHO to keep any initialisation as localised as
> possible, so that the compiler can continue to warn about uninitialised
> usages elsewhere. In this case that would mean doing the rc = 0 in the
> switch, something like:

I am certainly okay with implementing this in a v2. I mulled over which
would be preferred, I suppose I guessed wrong :) Thank you for the
review and input.

> 
> diff --git a/drivers/scsi/ibmvscsi/ibmvscsi.c 
> b/drivers/scsi/ibmvscsi/ibmvscsi.c
> index 727c31dc11a0..7ee5755cf636 100644
> --- a/drivers/scsi/ibmvscsi/ibmvscsi.c
> +++ b/drivers/scsi/ibmvscsi/ibmvscsi.c
> @@ -2123,9 +2123,6 @@ static void ibmvscsi_do_work(struct ibmvscsi_host_data 
> *hostdata)
>  
> spin_lock_irqsave(hostdata->host->host_lock, flags);
> switch (hostdata->action) {
> -   case IBMVSCSI_HOST_ACTION_NONE:
> -   case IBMVSCSI_HOST_ACTION_UNBLOCK:
> -   break;
> case IBMVSCSI_HOST_ACTION_RESET:
> spin_unlock_irqrestore(hostdata->host->host_lock, flags);
> rc = ibmvscsi_reset_crq_queue(>queue, hostdata);
> @@ -2142,7 +2139,10 @@ static void ibmvscsi_do_work(struct ibmvscsi_host_data 
> *hostdata)
> if (!rc)
> rc = ibmvscsi_send_crq(hostdata, 
> 0xC001LL, 0);
> break;
> +   case IBMVSCSI_HOST_ACTION_NONE:
> +   case IBMVSCSI_HOST_ACTION_UNBLOCK:
> default:
> +   rc = 0;
> break;
> }
> 
> 
> But then that makes me wonder if that's actually correct?
> 
> If we get an action that we don't recognise should we just throw it away
> like that? (by doing hostdata->action = IBMVSCSI_HOST_ACTION_NONE). Tyrel?

However, because of this, I will hold off on v2 until Tyrel can give
some feedback.

Thanks,
Nathan


Re: [next-20190530] Boot failure on PowerPC

2019-06-02 Thread Sachin Sant



> On 31-May-2019, at 11:43 PM, Dexuan-Linux Cui  wrote:
> 
> On Fri, May 31, 2019 at 6:52 AM Michael Ellerman  wrote:
>>> 
>>> Machine boots till login prompt and then panics few seconds later.
>>> 
>>> Last known next build was May 24th. Will attempt few builds till May 30 to
>>> narrow down this problem.
>> 
>> My CI was fine with next-20190529 (9a15d2e3fd03e3).
>> 
>> cheers
> 
> Hi Sachin,
> It looks this patch may fix the issue:
> https://lkml.org/lkml/2019/5/30/1630 , but I'm not sure.

It does not help fix the kernel panic issue, but it fixes the get_swap_device 
warning
messages during the boot.

Thanks
-Sachin



Re: [PATCH kernel] prom_init: Fetch flatten device tree from the system firmware

2019-06-02 Thread Alexey Kardashevskiy



On 03/06/2019 09:23, Segher Boessenkool wrote:
> Hi!
> 
> On Fri, May 31, 2019 at 11:03:26AM +1000, Benjamin Herrenschmidt wrote:
>> On Thu, 2019-05-30 at 14:37 -0500, Segher Boessenkool wrote:
>>> On Thu, May 30, 2019 at 05:09:06PM +1000, Alexey Kardashevskiy wrote:
 so, it is sort-of nack from David and sort-of ack from Segher, what
 happens now?
>>>
>>> Maybe what we really need just a CI call to get all properties of a node
>>> at once?  Will that speed up things enough?
>>>
>>> That way you need no change at all in lifetime of properties and how they
>>> are used, etc.; just a client getting the properties is a lot faster.
>>
>> Hrm... if we're going to create a new interface, let's go for what we
>> need.
>>
>> What we need is the FDT. It's a rather ubiquitous thing these days, it
>> makes sense to have a way to fetch an FDT directly from FW.
> 
> That is all you need if you do not want to use OF at all.

? We also need OF drivers to boot grub and the system, and a default
console for early booting, but yes, we do not want to keep using slof
that much.

> If you *do* want to keep having an Open Firmware, what we want or need
> is a faster way to walk huge device trees.

Why? We do not need to _walk_ the tree at all (we _have_ to now and
while walking we do nothing but pack everything together into the fdt
blob) as slof can easily do this already.

>> There is no use for the "fetch all properties" cases other than
>> building an FDT that any of us can think of, and it would create a more
>> complicated interface than just "fetch an FDT".
> 
> It is a simple way to speed up fetching the device tree enormously,
> without needing big changes to either OF or the clients using it -- not
> in the code, but importantly also not conceptually: everything works just
> as before, just a lot faster.

I can safely presume though that this will never ever be used in
practice. And it will be still slower, a tiny bit. And require new code
in both slof and linux.

I can rather see us getting rid of SLOF in the future which in turn will
require the fdt blob pointer in r5 (or whatever it is, forgot) when the
guest starts; so "fetch-all-props" won't be used there either.

>> So I go for the simple one and agree with Alexey's idea.
> 
> When dealing with a whole device tree you have to know about the various
> dynamically generated nodes and props, and handle each appropriately.

The code I am changing fetches the device tree and build an fdt. What is
that special knowledge in this context you are talking about?



-- 
Alexey


Re: [PATCH v3 1/3] PCI: Introduce pcibios_ignore_alignment_request

2019-06-02 Thread Shawn Anastasio




On 5/30/19 10:56 PM, Alexey Kardashevskiy wrote:



On 31/05/2019 08:49, Shawn Anastasio wrote:

On 5/29/19 10:39 PM, Alexey Kardashevskiy wrote:



On 28/05/2019 17:39, Shawn Anastasio wrote:



On 5/28/19 1:27 AM, Alexey Kardashevskiy wrote:



On 28/05/2019 15:36, Oliver wrote:

On Tue, May 28, 2019 at 2:03 PM Shawn Anastasio 
wrote:


Introduce a new pcibios function pcibios_ignore_alignment_request
which allows the PCI core to defer to platform-specific code to
determine whether or not to ignore alignment requests for PCI
resources.

The existing behavior is to simply ignore alignment requests when
PCI_PROBE_ONLY is set. This is behavior is maintained by the
default implementation of pcibios_ignore_alignment_request.

Signed-off-by: Shawn Anastasio 
---
    drivers/pci/pci.c   | 9 +++--
    include/linux/pci.h | 1 +
    2 files changed, 8 insertions(+), 2 deletions(-)

diff --git a/drivers/pci/pci.c b/drivers/pci/pci.c
index 8abc843b1615..8207a09085d1 100644
--- a/drivers/pci/pci.c
+++ b/drivers/pci/pci.c
@@ -5882,6 +5882,11 @@ resource_size_t __weak
pcibios_default_alignment(void)
   return 0;
    }

+int __weak pcibios_ignore_alignment_request(void)
+{
+   return pci_has_flag(PCI_PROBE_ONLY);
+}
+
    #define RESOURCE_ALIGNMENT_PARAM_SIZE COMMAND_LINE_SIZE
    static char
resource_alignment_param[RESOURCE_ALIGNMENT_PARAM_SIZE] = {0};
    static DEFINE_SPINLOCK(resource_alignment_lock);
@@ -5906,9 +5911,9 @@ static resource_size_t
pci_specified_resource_alignment(struct pci_dev *dev,
   p = resource_alignment_param;
   if (!*p && !align)
   goto out;
-   if (pci_has_flag(PCI_PROBE_ONLY)) {
+   if (pcibios_ignore_alignment_request()) {
   align = 0;
-   pr_info_once("PCI: Ignoring requested alignments
(PCI_PROBE_ONLY)\n");
+   pr_info_once("PCI: Ignoring requested alignments\n");
   goto out;
   }


I think the logic here is questionable to begin with. If the user has
explicitly requested re-aligning a resource via the command line then
we should probably do it even if PCI_PROBE_ONLY is set. When it breaks
they get to keep the pieces.

That said, the real issue here is that PCI_PROBE_ONLY probably
shouldn't be set under qemu/kvm. Under the other hypervisor (PowerVM)
hotplugged devices are configured by firmware before it's passed to
the guest and we need to keep the FW assignments otherwise things
break. QEMU however doesn't do any BAR assignments and relies on that
being handled by the guest. At boot time this is done by SLOF, but
Linux only keeps SLOF around until it's extracted the device-tree.
Once that's done SLOF gets blown away and the kernel needs to do it's
own BAR assignments. I'm guessing there's a hack in there to make it
work today, but it's a little surprising that it works at all...



The hack is to run a modified qemu-aware "/usr/sbin/rtas_errd" in the
guest which receives an event from qemu (RAS_EPOW from
/proc/interrupts), fetches device tree chunks (and as I understand it -
they come with BARs from phyp but without from qemu) and writes "1" to
"/sys/bus/pci/rescan" which calls pci_assign_resource() eventually:


Interesting. Does this mean that the PHYP hotplug path doesn't
call pci_assign_resource?



I'd expect dlpar_add_slot() to be called under phyp and eventually
pci_device_add() which (I think) may or may not trigger later
reassignment.



If so it means the patch may not
break that platform after all, though it still may not be
the correct way of doing things.



We should probably stop enforcing the PCI_PROBE_ONLY flag - it seems
that (unless resource_alignment= is used) the pseries guest should just
walk through all allocated resources and leave them unchanged.


If we add a pcibios_default_alignment() implementation like was
suggested earlier, then it will behave as if the user has
specified resource_alignment= by default and SLOF's assignments
won't be honored (I think).



I removed pci_add_flags(PCI_PROBE_ONLY) from pSeries_setup_arch and
tried booting with and without pci=resource_alignment= and I can see no
difference - BARs are still aligned to 64K as programmed in SLOF; if I
hack SLOF to align to 4K or 32K - BARs get packed and the guest leaves
them unchanged.



I guess it boils down to one question - is it important that we
observe SLOF's initial BAR assignments?


It isn't if it's SLOF but it is if it's phyp. It used to not
allow/support BAR reassignment and even if it does not, I'd rather avoid
touching them.


A quick update. I tried removing pci_add_flags(PCI_PROBE_ONLY) which
worked, but if I add an implementation of pcibios_default_alignment
which simply returns PAGE_SIZE, my VM fails to boot and many errors
from the virtio disk driver are printed to the console.

After some investigation, it seems that with pcibios_default_alignment
present, Linux will reallocate all resources provided by SLOF on
boot. I'm still not sure why exactly 

Re: [PATCH kernel v3 1/3] powerpc/iommu: Allow bypass-only for DMA

2019-06-02 Thread David Gibson
On Thu, May 30, 2019 at 05:03:53PM +1000, Alexey Kardashevskiy wrote:
> POWER8 and newer support a bypass mode which maps all host memory to
> PCI buses so an IOMMU table is not always required. However if we fail to
> create such a table, the DMA setup fails and the kernel does not boot.
> 
> This skips the 32bit DMA setup check if the bypass is can be selected.
> 
> Signed-off-by: Alexey Kardashevskiy 

Reviewed-by: David Gibson 

> ---
> 
> This minor thing helped me debugging next 2 patches so it can help
> somebody else too.
> ---
>  arch/powerpc/kernel/dma-iommu.c | 11 +--
>  1 file changed, 5 insertions(+), 6 deletions(-)
> 
> diff --git a/arch/powerpc/kernel/dma-iommu.c b/arch/powerpc/kernel/dma-iommu.c
> index 09231ef06d01..809c1dc01edf 100644
> --- a/arch/powerpc/kernel/dma-iommu.c
> +++ b/arch/powerpc/kernel/dma-iommu.c
> @@ -118,18 +118,17 @@ int dma_iommu_dma_supported(struct device *dev, u64 
> mask)
>  {
>   struct iommu_table *tbl = get_iommu_table_base(dev);
>  
> - if (!tbl) {
> - dev_info(dev, "Warning: IOMMU dma not supported: mask 0x%08llx"
> - ", table unavailable\n", mask);
> - return 0;
> - }
> -
>   if (dev_is_pci(dev) && dma_iommu_bypass_supported(dev, mask)) {
>   dev->archdata.iommu_bypass = true;
>   dev_dbg(dev, "iommu: 64-bit OK, using fixed ops\n");
>   return 1;
>   }
>  
> + if (!tbl) {
> + dev_err(dev, "Warning: IOMMU dma not supported: mask 0x%08llx, 
> table unavailable\n", mask);
> + return 0;
> + }
> +
>   if (tbl->it_offset > (mask >> tbl->it_page_shift)) {
>   dev_info(dev, "Warning: IOMMU offset too big for device 
> mask\n");
>   dev_info(dev, "mask: 0x%08llx, table offset: 0x%08lx\n",

-- 
David Gibson| I'll have my music baroque, and my code
david AT gibson.dropbear.id.au  | minimalist, thank you.  NOT _the_ _other_
| _way_ _around_!
http://www.ozlabs.org/~dgibson


signature.asc
Description: PGP signature


[PATCH BACKPORTv2 4.4] crypto: vmx - ghash: do nosimd fallback manually

2019-06-02 Thread Daniel Axtens
commit 357d065a44cdd77ed5ff35155a989f2a763e96ef upstream.
[backported: the VMX driver did not use crypto_simd_usable() until
 after 5.1, CRYPTO_ALG_TYPE_SHASH was still specified in .options
 until after 4.14, and the sequence for preparing the kernel to use
 vmx changed after 4.4.]

VMX ghash was using a fallback that did not support interleaving simd
and nosimd operations, leading to failures in the extended test suite.

If I understood correctly, Eric's suggestion was to use the same
data format that the generic code uses, allowing us to call into it
with the same contexts. I wasn't able to get that to work - I think
there's a very different key structure and data layout being used.

So instead steal the arm64 approach and perform the fallback
operations directly if required.

Fixes: cc333cd68dfa ("crypto: vmx - Adding GHASH routines for VMX module")
Cc: sta...@vger.kernel.org # v4.1+
Reported-by: Eric Biggers 
Signed-off-by: Daniel Axtens 
Acked-by: Ard Biesheuvel 
Tested-by: Michael Ellerman 
Signed-off-by: Herbert Xu 
Signed-off-by: Daniel Axtens 
---

v2: do stable backport form correctly.

---
 drivers/crypto/vmx/ghash.c | 218 +++--
 1 file changed, 89 insertions(+), 129 deletions(-)

diff --git a/drivers/crypto/vmx/ghash.c b/drivers/crypto/vmx/ghash.c
index 84b9389bf1ed..d6b68cf7bba7 100644
--- a/drivers/crypto/vmx/ghash.c
+++ b/drivers/crypto/vmx/ghash.c
@@ -1,22 +1,14 @@
+// SPDX-License-Identifier: GPL-2.0
 /**
  * GHASH routines supporting VMX instructions on the Power 8
  *
- * Copyright (C) 2015 International Business Machines Inc.
- *
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License as published by
- * the Free Software Foundation; version 2 only.
- *
- * This program is distributed in the hope that it will be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
- * GNU General Public License for more details.
- *
- * You should have received a copy of the GNU General Public License
- * along with this program; if not, write to the Free Software
- * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
+ * Copyright (C) 2015, 2019 International Business Machines Inc.
  *
  * Author: Marcelo Henrique Cerri 
+ *
+ * Extended by Daniel Axtens  to replace the fallback
+ * mechanism. The new approach is based on arm64 code, which is:
+ *   Copyright (C) 2014 - 2018 Linaro Ltd. 
  */
 
 #include 
@@ -39,71 +31,25 @@ void gcm_ghash_p8(u64 Xi[2], const u128 htable[16],
  const u8 *in, size_t len);
 
 struct p8_ghash_ctx {
+   /* key used by vector asm */
u128 htable[16];
-   struct crypto_shash *fallback;
+   /* key used by software fallback */
+   be128 key;
 };
 
 struct p8_ghash_desc_ctx {
u64 shash[2];
u8 buffer[GHASH_DIGEST_SIZE];
int bytes;
-   struct shash_desc fallback_desc;
 };
 
-static int p8_ghash_init_tfm(struct crypto_tfm *tfm)
-{
-   const char *alg = "ghash-generic";
-   struct crypto_shash *fallback;
-   struct crypto_shash *shash_tfm = __crypto_shash_cast(tfm);
-   struct p8_ghash_ctx *ctx = crypto_tfm_ctx(tfm);
-
-   fallback = crypto_alloc_shash(alg, 0, CRYPTO_ALG_NEED_FALLBACK);
-   if (IS_ERR(fallback)) {
-   printk(KERN_ERR
-  "Failed to allocate transformation for '%s': %ld\n",
-  alg, PTR_ERR(fallback));
-   return PTR_ERR(fallback);
-   }
-
-   crypto_shash_set_flags(fallback,
-  crypto_shash_get_flags((struct crypto_shash
-  *) tfm));
-
-   /* Check if the descsize defined in the algorithm is still enough. */
-   if (shash_tfm->descsize < sizeof(struct p8_ghash_desc_ctx)
-   + crypto_shash_descsize(fallback)) {
-   printk(KERN_ERR
-  "Desc size of the fallback implementation (%s) does not 
match the expected value: %lu vs %u\n",
-  alg,
-  shash_tfm->descsize - sizeof(struct p8_ghash_desc_ctx),
-  crypto_shash_descsize(fallback));
-   return -EINVAL;
-   }
-   ctx->fallback = fallback;
-
-   return 0;
-}
-
-static void p8_ghash_exit_tfm(struct crypto_tfm *tfm)
-{
-   struct p8_ghash_ctx *ctx = crypto_tfm_ctx(tfm);
-
-   if (ctx->fallback) {
-   crypto_free_shash(ctx->fallback);
-   ctx->fallback = NULL;
-   }
-}
-
 static int p8_ghash_init(struct shash_desc *desc)
 {
-   struct p8_ghash_ctx *ctx = crypto_tfm_ctx(crypto_shash_tfm(desc->tfm));
struct p8_ghash_desc_ctx *dctx = shash_desc_ctx(desc);
 
dctx->bytes = 0;
memset(dctx->shash, 0, GHASH_DIGEST_SIZE);
-   dctx->fallback_desc.tfm = ctx->fallback;
-   dctx->fallback_desc.flags 

[PATCH BACKPORTv2 4.9, 4.14] crypto: vmx - ghash: do nosimd fallback manually

2019-06-02 Thread Daniel Axtens
commit 357d065a44cdd77ed5ff35155a989f2a763e96ef upstream.
[backported: the VMX driver did not use crypto_simd_usable() until
 after 5.1, CRYPTO_ALG_TYPE_SHASH was still specified in .options
 until after 4.14]

VMX ghash was using a fallback that did not support interleaving simd
and nosimd operations, leading to failures in the extended test suite.

If I understood correctly, Eric's suggestion was to use the same
data format that the generic code uses, allowing us to call into it
with the same contexts. I wasn't able to get that to work - I think
there's a very different key structure and data layout being used.

So instead steal the arm64 approach and perform the fallback
operations directly if required.

Fixes: cc333cd68dfa ("crypto: vmx - Adding GHASH routines for VMX module")
Cc: sta...@vger.kernel.org # v4.1+
Reported-by: Eric Biggers 
Signed-off-by: Daniel Axtens 
Acked-by: Ard Biesheuvel 
Tested-by: Michael Ellerman 
Signed-off-by: Herbert Xu 
Signed-off-by: Daniel Axtens 
---

v2: do stable backport form correctly.

---
 drivers/crypto/vmx/ghash.c | 213 +++--
 1 file changed, 87 insertions(+), 126 deletions(-)

diff --git a/drivers/crypto/vmx/ghash.c b/drivers/crypto/vmx/ghash.c
index 1c4b5b889fba..1bfe867c0b7b 100644
--- a/drivers/crypto/vmx/ghash.c
+++ b/drivers/crypto/vmx/ghash.c
@@ -1,22 +1,14 @@
+// SPDX-License-Identifier: GPL-2.0
 /**
  * GHASH routines supporting VMX instructions on the Power 8
  *
- * Copyright (C) 2015 International Business Machines Inc.
- *
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License as published by
- * the Free Software Foundation; version 2 only.
- *
- * This program is distributed in the hope that it will be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
- * GNU General Public License for more details.
- *
- * You should have received a copy of the GNU General Public License
- * along with this program; if not, write to the Free Software
- * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
+ * Copyright (C) 2015, 2019 International Business Machines Inc.
  *
  * Author: Marcelo Henrique Cerri 
+ *
+ * Extended by Daniel Axtens  to replace the fallback
+ * mechanism. The new approach is based on arm64 code, which is:
+ *   Copyright (C) 2014 - 2018 Linaro Ltd. 
  */
 
 #include 
@@ -39,71 +31,25 @@ void gcm_ghash_p8(u64 Xi[2], const u128 htable[16],
  const u8 *in, size_t len);
 
 struct p8_ghash_ctx {
+   /* key used by vector asm */
u128 htable[16];
-   struct crypto_shash *fallback;
+   /* key used by software fallback */
+   be128 key;
 };
 
 struct p8_ghash_desc_ctx {
u64 shash[2];
u8 buffer[GHASH_DIGEST_SIZE];
int bytes;
-   struct shash_desc fallback_desc;
 };
 
-static int p8_ghash_init_tfm(struct crypto_tfm *tfm)
-{
-   const char *alg = "ghash-generic";
-   struct crypto_shash *fallback;
-   struct crypto_shash *shash_tfm = __crypto_shash_cast(tfm);
-   struct p8_ghash_ctx *ctx = crypto_tfm_ctx(tfm);
-
-   fallback = crypto_alloc_shash(alg, 0, CRYPTO_ALG_NEED_FALLBACK);
-   if (IS_ERR(fallback)) {
-   printk(KERN_ERR
-  "Failed to allocate transformation for '%s': %ld\n",
-  alg, PTR_ERR(fallback));
-   return PTR_ERR(fallback);
-   }
-
-   crypto_shash_set_flags(fallback,
-  crypto_shash_get_flags((struct crypto_shash
-  *) tfm));
-
-   /* Check if the descsize defined in the algorithm is still enough. */
-   if (shash_tfm->descsize < sizeof(struct p8_ghash_desc_ctx)
-   + crypto_shash_descsize(fallback)) {
-   printk(KERN_ERR
-  "Desc size of the fallback implementation (%s) does not 
match the expected value: %lu vs %u\n",
-  alg,
-  shash_tfm->descsize - sizeof(struct p8_ghash_desc_ctx),
-  crypto_shash_descsize(fallback));
-   return -EINVAL;
-   }
-   ctx->fallback = fallback;
-
-   return 0;
-}
-
-static void p8_ghash_exit_tfm(struct crypto_tfm *tfm)
-{
-   struct p8_ghash_ctx *ctx = crypto_tfm_ctx(tfm);
-
-   if (ctx->fallback) {
-   crypto_free_shash(ctx->fallback);
-   ctx->fallback = NULL;
-   }
-}
-
 static int p8_ghash_init(struct shash_desc *desc)
 {
-   struct p8_ghash_ctx *ctx = crypto_tfm_ctx(crypto_shash_tfm(desc->tfm));
struct p8_ghash_desc_ctx *dctx = shash_desc_ctx(desc);
 
dctx->bytes = 0;
memset(dctx->shash, 0, GHASH_DIGEST_SIZE);
-   dctx->fallback_desc.tfm = ctx->fallback;
-   dctx->fallback_desc.flags = desc->flags;
-   return crypto_shash_init(>fallback_desc);
+   

[PATCH BACKPORTv2 4.19, 5.0, 5.1] crypto: vmx - ghash: do nosimd fallback manually

2019-06-02 Thread Daniel Axtens
commit 357d065a44cdd77ed5ff35155a989f2a763e96ef upstream.
[backported: the VMX driver did not use crypto_simd_usable() until
 after 5.1]

VMX ghash was using a fallback that did not support interleaving simd
and nosimd operations, leading to failures in the extended test suite.

If I understood correctly, Eric's suggestion was to use the same
data format that the generic code uses, allowing us to call into it
with the same contexts. I wasn't able to get that to work - I think
there's a very different key structure and data layout being used.

So instead steal the arm64 approach and perform the fallback
operations directly if required.

Fixes: cc333cd68dfa ("crypto: vmx - Adding GHASH routines for VMX module")
Cc: sta...@vger.kernel.org # v4.1+
Reported-by: Eric Biggers 
Signed-off-by: Daniel Axtens 
Acked-by: Ard Biesheuvel 
Tested-by: Michael Ellerman 
Signed-off-by: Herbert Xu 
Signed-off-by: Daniel Axtens 
---

v2: do stable backport form correctly.

---
 drivers/crypto/vmx/ghash.c | 212 +++--
 1 file changed, 86 insertions(+), 126 deletions(-)

diff --git a/drivers/crypto/vmx/ghash.c b/drivers/crypto/vmx/ghash.c
index dd8b8716467a..2d1a8cd35509 100644
--- a/drivers/crypto/vmx/ghash.c
+++ b/drivers/crypto/vmx/ghash.c
@@ -1,22 +1,14 @@
+// SPDX-License-Identifier: GPL-2.0
 /**
  * GHASH routines supporting VMX instructions on the Power 8
  *
- * Copyright (C) 2015 International Business Machines Inc.
- *
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License as published by
- * the Free Software Foundation; version 2 only.
- *
- * This program is distributed in the hope that it will be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
- * GNU General Public License for more details.
- *
- * You should have received a copy of the GNU General Public License
- * along with this program; if not, write to the Free Software
- * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
+ * Copyright (C) 2015, 2019 International Business Machines Inc.
  *
  * Author: Marcelo Henrique Cerri 
+ *
+ * Extended by Daniel Axtens  to replace the fallback
+ * mechanism. The new approach is based on arm64 code, which is:
+ *   Copyright (C) 2014 - 2018 Linaro Ltd. 
  */
 
 #include 
@@ -39,71 +31,25 @@ void gcm_ghash_p8(u64 Xi[2], const u128 htable[16],
  const u8 *in, size_t len);
 
 struct p8_ghash_ctx {
+   /* key used by vector asm */
u128 htable[16];
-   struct crypto_shash *fallback;
+   /* key used by software fallback */
+   be128 key;
 };
 
 struct p8_ghash_desc_ctx {
u64 shash[2];
u8 buffer[GHASH_DIGEST_SIZE];
int bytes;
-   struct shash_desc fallback_desc;
 };
 
-static int p8_ghash_init_tfm(struct crypto_tfm *tfm)
-{
-   const char *alg = "ghash-generic";
-   struct crypto_shash *fallback;
-   struct crypto_shash *shash_tfm = __crypto_shash_cast(tfm);
-   struct p8_ghash_ctx *ctx = crypto_tfm_ctx(tfm);
-
-   fallback = crypto_alloc_shash(alg, 0, CRYPTO_ALG_NEED_FALLBACK);
-   if (IS_ERR(fallback)) {
-   printk(KERN_ERR
-  "Failed to allocate transformation for '%s': %ld\n",
-  alg, PTR_ERR(fallback));
-   return PTR_ERR(fallback);
-   }
-
-   crypto_shash_set_flags(fallback,
-  crypto_shash_get_flags((struct crypto_shash
-  *) tfm));
-
-   /* Check if the descsize defined in the algorithm is still enough. */
-   if (shash_tfm->descsize < sizeof(struct p8_ghash_desc_ctx)
-   + crypto_shash_descsize(fallback)) {
-   printk(KERN_ERR
-  "Desc size of the fallback implementation (%s) does not 
match the expected value: %lu vs %u\n",
-  alg,
-  shash_tfm->descsize - sizeof(struct p8_ghash_desc_ctx),
-  crypto_shash_descsize(fallback));
-   return -EINVAL;
-   }
-   ctx->fallback = fallback;
-
-   return 0;
-}
-
-static void p8_ghash_exit_tfm(struct crypto_tfm *tfm)
-{
-   struct p8_ghash_ctx *ctx = crypto_tfm_ctx(tfm);
-
-   if (ctx->fallback) {
-   crypto_free_shash(ctx->fallback);
-   ctx->fallback = NULL;
-   }
-}
-
 static int p8_ghash_init(struct shash_desc *desc)
 {
-   struct p8_ghash_ctx *ctx = crypto_tfm_ctx(crypto_shash_tfm(desc->tfm));
struct p8_ghash_desc_ctx *dctx = shash_desc_ctx(desc);
 
dctx->bytes = 0;
memset(dctx->shash, 0, GHASH_DIGEST_SIZE);
-   dctx->fallback_desc.tfm = ctx->fallback;
-   dctx->fallback_desc.flags = desc->flags;
-   return crypto_shash_init(>fallback_desc);
+   return 0;
 }
 
 static int p8_ghash_setkey(struct crypto_shash *tfm, const 

Re: [PATCH BACKPORT 4.19, 5.0, 5.1] crypto: vmx - ghash: do nosimd fallback manually

2019-06-02 Thread Daniel Axtens
Christophe Leroy  writes:

> Daniel Axtens  a écrit :
>
> Hi
>
> I think you have to mention the upstream commit Id when submitting a  
> patch to stable, see   
> https://elixir.bootlin.com/linux/v5.2-rc1/source/Documentation/process/stable-kernel-rules.rst

Argh, right, sorry, still in Canonical Stable Release Update mode:
>> (backported from commit 357d065a44cdd77ed5ff35155a989f2a763e96ef)

I'll do a backport v2 with the correct format.

Regards,
Daniel

>
> Christophe
>
>> VMX ghash was using a fallback that did not support interleaving simd
>> and nosimd operations, leading to failures in the extended test suite.
>>
>> If I understood correctly, Eric's suggestion was to use the same
>> data format that the generic code uses, allowing us to call into it
>> with the same contexts. I wasn't able to get that to work - I think
>> there's a very different key structure and data layout being used.
>>
>> So instead steal the arm64 approach and perform the fallback
>> operations directly if required.
>>
>> Fixes: cc333cd68dfa ("crypto: vmx - Adding GHASH routines for VMX module")
>> Cc: sta...@vger.kernel.org # v4.1+
>> Reported-by: Eric Biggers 
>> Signed-off-by: Daniel Axtens 
>> Acked-by: Ard Biesheuvel 
>> Tested-by: Michael Ellerman 
>> Signed-off-by: Herbert Xu 

>> Signed-off-by: Daniel Axtens 
>> ---
>>  drivers/crypto/vmx/ghash.c | 212 +++--
>>  1 file changed, 86 insertions(+), 126 deletions(-)
>>
>> diff --git a/drivers/crypto/vmx/ghash.c b/drivers/crypto/vmx/ghash.c
>> index dd8b8716467a..2d1a8cd35509 100644
>> --- a/drivers/crypto/vmx/ghash.c
>> +++ b/drivers/crypto/vmx/ghash.c
>> @@ -1,22 +1,14 @@
>> +// SPDX-License-Identifier: GPL-2.0
>>  /**
>>   * GHASH routines supporting VMX instructions on the Power 8
>>   *
>> - * Copyright (C) 2015 International Business Machines Inc.
>> - *
>> - * This program is free software; you can redistribute it and/or modify
>> - * it under the terms of the GNU General Public License as published by
>> - * the Free Software Foundation; version 2 only.
>> - *
>> - * This program is distributed in the hope that it will be useful,
>> - * but WITHOUT ANY WARRANTY; without even the implied warranty of
>> - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
>> - * GNU General Public License for more details.
>> - *
>> - * You should have received a copy of the GNU General Public License
>> - * along with this program; if not, write to the Free Software
>> - * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
>> + * Copyright (C) 2015, 2019 International Business Machines Inc.
>>   *
>>   * Author: Marcelo Henrique Cerri 
>> + *
>> + * Extended by Daniel Axtens  to replace the fallback
>> + * mechanism. The new approach is based on arm64 code, which is:
>> + *   Copyright (C) 2014 - 2018 Linaro Ltd. 
>>   */
>>
>>  #include 
>> @@ -39,71 +31,25 @@ void gcm_ghash_p8(u64 Xi[2], const u128 htable[16],
>>const u8 *in, size_t len);
>>
>>  struct p8_ghash_ctx {
>> +/* key used by vector asm */
>>  u128 htable[16];
>> -struct crypto_shash *fallback;
>> +/* key used by software fallback */
>> +be128 key;
>>  };
>>
>>  struct p8_ghash_desc_ctx {
>>  u64 shash[2];
>>  u8 buffer[GHASH_DIGEST_SIZE];
>>  int bytes;
>> -struct shash_desc fallback_desc;
>>  };
>>
>> -static int p8_ghash_init_tfm(struct crypto_tfm *tfm)
>> -{
>> -const char *alg = "ghash-generic";
>> -struct crypto_shash *fallback;
>> -struct crypto_shash *shash_tfm = __crypto_shash_cast(tfm);
>> -struct p8_ghash_ctx *ctx = crypto_tfm_ctx(tfm);
>> -
>> -fallback = crypto_alloc_shash(alg, 0, CRYPTO_ALG_NEED_FALLBACK);
>> -if (IS_ERR(fallback)) {
>> -printk(KERN_ERR
>> -   "Failed to allocate transformation for '%s': %ld\n",
>> -   alg, PTR_ERR(fallback));
>> -return PTR_ERR(fallback);
>> -}
>> -
>> -crypto_shash_set_flags(fallback,
>> -   crypto_shash_get_flags((struct crypto_shash
>> -   *) tfm));
>> -
>> -/* Check if the descsize defined in the algorithm is still enough. */
>> -if (shash_tfm->descsize < sizeof(struct p8_ghash_desc_ctx)
>> -+ crypto_shash_descsize(fallback)) {
>> -printk(KERN_ERR
>> -   "Desc size of the fallback implementation (%s) does not  
>> match the expected value: %lu vs %u\n",
>> -   alg,
>> -   shash_tfm->descsize - sizeof(struct p8_ghash_desc_ctx),
>> -   crypto_shash_descsize(fallback));
>> -return -EINVAL;
>> -}
>> -ctx->fallback = fallback;
>> -
>> -return 0;
>> -}
>> -
>> -static void p8_ghash_exit_tfm(struct crypto_tfm *tfm)
>> -{
>> -struct p8_ghash_ctx *ctx = crypto_tfm_ctx(tfm);
>> -
>> -if (ctx->fallback) {
>> -crypto_free_shash(ctx->fallback);
>> -ctx->fallback = NULL;
>> -   

Re: [PATCH] powerpc/nvdimm: Add support for multibyte read/write for metadata

2019-06-02 Thread Oliver
On Sun, Jun 2, 2019 at 2:44 PM Aneesh Kumar K.V
 wrote:
>
> SCM_READ/WRITE_MEATADATA hcall supports multibyte read/write. This patch
> updates the metadata read/write to use 1, 2, 4 or 8 byte read/write as
> mentioned in PAPR document.
>
> READ/WRITE_METADATA hcall supports the 1, 2, 4, or 8 bytes read/write.
> For other values hcall results H_P3.

You should probably fold the second paragraph here into the first.

> Hypervisor stores the metadata contents in big-endian format and in-order
> to enable read/write in different granularity, we need to switch the contents
> to big-endian before calling HCALL.
>
> Based on an patch from Oliver O'Halloran 
> Signed-off-by: Aneesh Kumar K.V 
> ---
>  arch/powerpc/platforms/pseries/papr_scm.c | 104 +-
>  1 file changed, 82 insertions(+), 22 deletions(-)
>
> diff --git a/arch/powerpc/platforms/pseries/papr_scm.c 
> b/arch/powerpc/platforms/pseries/papr_scm.c
> index 0176ce66673f..e33cebb8ee6c 100644
> --- a/arch/powerpc/platforms/pseries/papr_scm.c
> +++ b/arch/powerpc/platforms/pseries/papr_scm.c
> @@ -97,42 +97,102 @@ static int drc_pmem_unbind(struct papr_scm_priv *p)
>  }
>
>  static int papr_scm_meta_get(struct papr_scm_priv *p,
> -   struct nd_cmd_get_config_data_hdr *hdr)
> +struct nd_cmd_get_config_data_hdr *hdr)
>  {
> unsigned long data[PLPAR_HCALL_BUFSIZE];
> +   unsigned long offset, data_offset;
> +   int len, read;
> int64_t ret;
>
> -   if (hdr->in_offset >= p->metadata_size || hdr->in_length != 1)
> +   if ((hdr->in_offset + hdr->in_length) >= p->metadata_size)
> return -EINVAL;
>
> -   ret = plpar_hcall(H_SCM_READ_METADATA, data, p->drc_index,
> -   hdr->in_offset, 1);
> -
> -   if (ret == H_PARAMETER) /* bad DRC index */
> -   return -ENODEV;
> -   if (ret)
> -   return -EINVAL; /* other invalid parameter */
> -
> -   hdr->out_buf[0] = data[0] & 0xff;
> -
> +   for (len = hdr->in_length; len; len -= read) {
> +
> +   data_offset = hdr->in_length - len;
> +   offset = hdr->in_offset + data_offset;
> +
> +   if (len >= 8)
> +   read = 8;
> +   else if (len >= 4)
> +   read = 4;
> +   else if ( len >= 2)
> +   read = 2;
> +   else
> +   read = 1;
> +
> +   ret = plpar_hcall(H_SCM_READ_METADATA, data, p->drc_index,
> + offset, read);
> +
> +   if (ret == H_PARAMETER) /* bad DRC index */
> +   return -ENODEV;
> +   if (ret)
> +   return -EINVAL; /* other invalid parameter */
> +
> +   switch (read) {
> +   case 8:
> +   *(uint64_t *)(hdr->out_buf + data_offset) = 
> be64_to_cpu(data[0]);
> +   break;
> +   case 4:
> +   *(uint32_t *)(hdr->out_buf + data_offset) = 
> be32_to_cpu(data[0] & 0x);
> +   break;
> +
> +   case 2:
> +   *(uint16_t *)(hdr->out_buf + data_offset) = 
> be16_to_cpu(data[0] & 0x);
> +   break;
> +
> +   case 1:
> +   *(uint32_t *)(hdr->out_buf + data_offset) = (data[0] 
> & 0xff);
> +   break;
> +   }
> +   }
> return 0;
>  }
>
>  static int papr_scm_meta_set(struct papr_scm_priv *p,
> -   struct nd_cmd_set_config_hdr *hdr)
> +struct nd_cmd_set_config_hdr *hdr)
>  {
> +   unsigned long offset, data_offset;
> +   int len, wrote;
> +   unsigned long data;
> +   __be64 data_be;
> int64_t ret;
>
> -   if (hdr->in_offset >= p->metadata_size || hdr->in_length != 1)
> +   if ((hdr->in_offset + hdr->in_length) >= p->metadata_size)
> return -EINVAL;
>
> -   ret = plpar_hcall_norets(H_SCM_WRITE_METADATA,
> -   p->drc_index, hdr->in_offset, hdr->in_buf[0], 1);
> -
> -   if (ret == H_PARAMETER) /* bad DRC index */
> -   return -ENODEV;
> -   if (ret)
> -   return -EINVAL; /* other invalid parameter */
> +   for (len = hdr->in_length; len; len -= wrote) {
> +
> +   data_offset = hdr->in_length - len;
> +   offset = hdr->in_offset + data_offset;
> +
> +   if (len >= 8) {
> +   data = *(uint64_t *)(hdr->in_buf + data_offset);
> +   data_be = cpu_to_be64(data);
> +   wrote = 8;
> +   } else if (len >= 4) {
> +   data = *(uint32_t *)(hdr->in_buf + data_offset);
> +   data &= 0x;
> +   data_be = cpu_to_be32(data);
> +   

Re: [PATCH 1/3] powerpc/pseries: Simplify cpu readd to use drc_index

2019-06-02 Thread Tyrel Datwyler
On 05/20/2019 08:01 AM, Nathan Lynch wrote:
> Tyrel Datwyler  writes:
> 
>> On 05/16/2019 12:17 PM, Nathan Lynch wrote:
>>> Tyrel Datwyler  writes:
 The current dlpar_cpu_readd() takes in a cpu_id and uses that to look up
 the cpus device_node so that we can get at the ibm,my-drc-index
 property. The only user of cpu readd is an OF notifier call back. This
 call back already has a reference to the device_node and therefore can
 retrieve the drc_index from the device_node.
>>>
>>> dlpar_cpu_readd is a hack to try to change the CPU-node relationship at
>>> runtime without destabilizing the system. It doesn't accomplish that and
>>> it should just be removed (and I'm working on that).
>>>
>>
>> I will politely disagree. We've done exactly this from userspace for
>> years. My experience still suggests that memory affinity is the
>> problem area, and that the work to push this all into the kernel
>> originally was poorly tested.
> 
> Kernel implementation details aside, how do you change the cpu-node
> relationship at runtime without breaking NUMA-aware applications? Is
> this not a fundamental issue to address before adding code like this?
> 

If that is the concern then hotplug in general already breaks them. Take for
example the removal of a faulty processor and then adding a new processor back.
It is quite possible that the new processor is in a different NUMA node. Keep in
mind that in this scenario the new processor and threads gets the same logical
cpu ids as the faulty processor we just removed.

Now we have to ask the question who is right and who is wrong. In this case the
kernel data structures reflect the correct NUMA topology. However, did the NUMA
aware application or libnuma make an assumption that specific sets of logical
cpu ids are always in the same NUMA node?

-Tyrel



Re: [PATCH kernel] prom_init: Fetch flatten device tree from the system firmware

2019-06-02 Thread Segher Boessenkool
Hi!

On Fri, May 31, 2019 at 11:03:26AM +1000, Benjamin Herrenschmidt wrote:
> On Thu, 2019-05-30 at 14:37 -0500, Segher Boessenkool wrote:
> > On Thu, May 30, 2019 at 05:09:06PM +1000, Alexey Kardashevskiy wrote:
> > > so, it is sort-of nack from David and sort-of ack from Segher, what
> > > happens now?
> > 
> > Maybe what we really need just a CI call to get all properties of a node
> > at once?  Will that speed up things enough?
> > 
> > That way you need no change at all in lifetime of properties and how they
> > are used, etc.; just a client getting the properties is a lot faster.
> 
> Hrm... if we're going to create a new interface, let's go for what we
> need.
> 
> What we need is the FDT. It's a rather ubiquitous thing these days, it
> makes sense to have a way to fetch an FDT directly from FW.

That is all you need if you do not want to use OF at all.

If you *do* want to keep having an Open Firmware, what we want or need
is a faster way to walk huge device trees.

> There is no use for the "fetch all properties" cases other than
> building an FDT that any of us can think of, and it would create a more
> complicated interface than just "fetch an FDT".

It is a simple way to speed up fetching the device tree enormously,
without needing big changes to either OF or the clients using it -- not
in the code, but importantly also not conceptually: everything works just
as before, just a lot faster.

> So I go for the simple one and agree with Alexey's idea.

When dealing with a whole device tree you have to know about the various
dynamically generated nodes and props, and handle each appropriately.


Segher


Re: [GIT PULL] Please pull powerpc/linux.git powerpc-5.2-3 tag

2019-06-02 Thread pr-tracker-bot
The pull request you sent on Sun, 02 Jun 2019 21:05:12 +1000:

> https://git.kernel.org/pub/scm/linux/kernel/git/powerpc/linux.git 
> tags/powerpc-5.2-3

has been merged into torvalds/linux.git:
https://git.kernel.org/torvalds/c/460b48a0fefce25beb0fc0139e721c5691d65d7f

Thank you!

-- 
Deet-doot-dot, I am a bot.
https://korg.wiki.kernel.org/userdoc/prtracker


[GIT PULL] Please pull powerpc/linux.git powerpc-5.2-3 tag

2019-06-02 Thread Michael Ellerman
-BEGIN PGP SIGNED MESSAGE-
Hash: SHA1

Hi Linus,

Please pull some more powerpc fixes for 5.2:

The following changes since commit a188339ca5a396acc588e5851ed7e19f66b0ebd9:

  Linux 5.2-rc1 (2019-05-19 15:47:09 -0700)

are available in the git repository at:

  https://git.kernel.org/pub/scm/linux/kernel/git/powerpc/linux.git 
tags/powerpc-5.2-3

for you to fetch changes up to 8b909e3548706cbebc0a676067b81aadda57f47e:

  powerpc/kexec: Fix loading of kernel + initramfs with kexec_file_load() 
(2019-05-23 14:00:32 +1000)

- --
powerpc fixes for 5.2 #3

A minor fix to our IMC PMU code to print a less confusing error message when the
driver can't initialise properly.

A fix for a bug where a user requesting an unsupported branch sampling filter
can corrupt PMU state, preventing the PMU from counting properly.

And finally a fix for a bug in our support for kexec_file_load(), which
prevented loading a kernel and initramfs. Most versions of kexec don't yet use
kexec_file_load().

Thanks to:
  Anju T Sudhakar, Dave Young, Madhavan Srinivasan, Ravi Bangoria, Thiago Jung
  Bauermann.

- --
Anju T Sudhakar (1):
  powerpc/powernv: Return for invalid IMC domain

Ravi Bangoria (1):
  powerpc/perf: Fix MMCRA corruption by bhrb_filter

Thiago Jung Bauermann (1):
  powerpc/kexec: Fix loading of kernel + initramfs with kexec_file_load()


 arch/powerpc/kernel/kexec_elf_64.c| 6 +-
 arch/powerpc/perf/core-book3s.c   | 6 --
 arch/powerpc/perf/power8-pmu.c| 3 +++
 arch/powerpc/perf/power9-pmu.c| 3 +++
 arch/powerpc/platforms/powernv/opal-imc.c | 4 
 5 files changed, 19 insertions(+), 3 deletions(-)
-BEGIN PGP SIGNATURE-

iQIcBAEBAgAGBQJc860sAAoJEFHr6jzI4aWAgk0QAJ7e67M/DrigLDIi5LdnwDQQ
AtQW+QzeoBHWiSgWfibqv5NjC9XCdOtvbOkD44TAlF99YMe5k8wShLLwiPSCIYEu
7r83+NHPp7jpeoO8fmE4dTJsmp4Ez+cJfOKpAF6h2w+1yJ5gL2AP5wNLUBi6Cliw
lUIRb73JgWj2hwu0HMNAxbE+mlyIpi8fXRk8TeUXVB+IEInOQxU0x/RkxqN4cCtG
f0hzAnZPywdDvRBuU6roPU3zrII7nVgrLUPXjgin/v58sdqR7zFnWnsm+ou0jkuy
K5zMcCuqZ6lrYjoak+OiqOt8CcalBtqju9ZANQkDIe5hMhXn4Maex1YbFE0i1UYm
Ljbm6Dp4dSTxQcx7GV1xMzHGNHEMQFKSABX+jF9l/KOVl4aVZAEz5F6DNKZO4lo+
EVX9HPBb6ZPyvwntLei8zn9C9LiSVWP5zsAW2zFam4isi498Ca1YpAyoSd58NrRn
WXVcDwMIp9c8uiQllbICNWdHzJhJWhUu/lW2idKFy05zG5+g6dg80StVpYlaEZwK
jggKwkD1H9VWrOZjoIHceOWPUxfjJ6wrvkovGqQqx6l9CkpEYfn1EG0p4s7CwXZ/
wEq0wVsfVaHPkEDSHSwg8mI9ZaEwoY6WMXE63WbVPNMWN26yw5vLJJx4o91Gk/wq
1F4UgNfF5XuQu8m5NWyU
=qxoS
-END PGP SIGNATURE-


Re: [PATCH] scsi: ibmvscsi: Don't use rc uninitialized in ibmvscsi_do_work

2019-06-02 Thread Michael Ellerman
Hi Nathan,

Nathan Chancellor  writes:
> clang warns:
>
> drivers/scsi/ibmvscsi/ibmvscsi.c:2126:7: warning: variable 'rc' is used
> uninitialized whenever switch case is taken [-Wsometimes-uninitialized]
> case IBMVSCSI_HOST_ACTION_NONE:
>  ^
> drivers/scsi/ibmvscsi/ibmvscsi.c:2151:6: note: uninitialized use occurs
> here
> if (rc) {
> ^~
>
> Initialize rc to zero so that the atomic_set and dev_err statement don't
> trigger for the cases that just break.
>
> Fixes: 035a3c4046b5 ("scsi: ibmvscsi: redo driver work thread to use enum 
> action states")
> Link: https://github.com/ClangBuiltLinux/linux/issues/502
> Signed-off-by: Nathan Chancellor 
> ---
>  drivers/scsi/ibmvscsi/ibmvscsi.c | 2 +-
>  1 file changed, 1 insertion(+), 1 deletion(-)
>
> diff --git a/drivers/scsi/ibmvscsi/ibmvscsi.c 
> b/drivers/scsi/ibmvscsi/ibmvscsi.c
> index 727c31dc11a0..6714d8043e62 100644
> --- a/drivers/scsi/ibmvscsi/ibmvscsi.c
> +++ b/drivers/scsi/ibmvscsi/ibmvscsi.c
> @@ -2118,7 +2118,7 @@ static unsigned long ibmvscsi_get_desired_dma(struct 
> vio_dev *vdev)
>  static void ibmvscsi_do_work(struct ibmvscsi_host_data *hostdata)
>  {
>   unsigned long flags;
> - int rc;
> + int rc = 0;
>   char *action = "reset";
>  
>   spin_lock_irqsave(hostdata->host->host_lock, flags);

It's always preferable IMHO to keep any initialisation as localised as
possible, so that the compiler can continue to warn about uninitialised
usages elsewhere. In this case that would mean doing the rc = 0 in the
switch, something like:

diff --git a/drivers/scsi/ibmvscsi/ibmvscsi.c b/drivers/scsi/ibmvscsi/ibmvscsi.c
index 727c31dc11a0..7ee5755cf636 100644
--- a/drivers/scsi/ibmvscsi/ibmvscsi.c
+++ b/drivers/scsi/ibmvscsi/ibmvscsi.c
@@ -2123,9 +2123,6 @@ static void ibmvscsi_do_work(struct ibmvscsi_host_data 
*hostdata)
 
spin_lock_irqsave(hostdata->host->host_lock, flags);
switch (hostdata->action) {
-   case IBMVSCSI_HOST_ACTION_NONE:
-   case IBMVSCSI_HOST_ACTION_UNBLOCK:
-   break;
case IBMVSCSI_HOST_ACTION_RESET:
spin_unlock_irqrestore(hostdata->host->host_lock, flags);
rc = ibmvscsi_reset_crq_queue(>queue, hostdata);
@@ -2142,7 +2139,10 @@ static void ibmvscsi_do_work(struct ibmvscsi_host_data 
*hostdata)
if (!rc)
rc = ibmvscsi_send_crq(hostdata, 0xC001LL, 
0);
break;
+   case IBMVSCSI_HOST_ACTION_NONE:
+   case IBMVSCSI_HOST_ACTION_UNBLOCK:
default:
+   rc = 0;
break;
}


But then that makes me wonder if that's actually correct?

If we get an action that we don't recognise should we just throw it away
like that? (by doing hostdata->action = IBMVSCSI_HOST_ACTION_NONE). Tyrel?

cheers


[Bug 203517] WARNING: inconsistent lock state. inconsistent {SOFTIRQ-ON-W} -> {IN-SOFTIRQ-W} usage.

2019-06-02 Thread bugzilla-daemon
https://bugzilla.kernel.org/show_bug.cgi?id=203517

Michael Ellerman (mich...@ellerman.id.au) changed:

   What|Removed |Added

 CC||mich...@ellerman.id.au

--- Comment #10 from Michael Ellerman (mich...@ellerman.id.au) ---
No, it's in mainline since Friday so it will get picked up for stable in the
next week or so:

https://git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git/commit/?id=fee13fe96529523a709d1fff487f14a5e0d56d34

-- 
You are receiving this mail because:
You are watching the assignee of the bug.