[RFC PATCH 12/18] powerpc/chrp: Move PHB discovery

2020-09-24 Thread Oliver O'Halloran
Signed-off-by: Oliver O'Halloran 
---
compile tested with chrp32_defconfig
---
 arch/powerpc/platforms/chrp/pci.c   |  8 
 arch/powerpc/platforms/chrp/setup.c | 12 +---
 2 files changed, 9 insertions(+), 11 deletions(-)

diff --git a/arch/powerpc/platforms/chrp/pci.c 
b/arch/powerpc/platforms/chrp/pci.c
index b2c2bf35b76c..8c421dc78b28 100644
--- a/arch/powerpc/platforms/chrp/pci.c
+++ b/arch/powerpc/platforms/chrp/pci.c
@@ -314,6 +314,14 @@ chrp_find_bridges(void)
}
}
of_node_put(root);
+
+   /*
+*  "Temporary" fixes for PCI devices.
+*  -- Geert
+*/
+   hydra_init();   /* Mac I/O */
+
+   pci_create_OF_bus_map();
 }
 
 /* SL82C105 IDE Control/Status Register */
diff --git a/arch/powerpc/platforms/chrp/setup.c 
b/arch/powerpc/platforms/chrp/setup.c
index c45435aa5e36..3cfc382841e5 100644
--- a/arch/powerpc/platforms/chrp/setup.c
+++ b/arch/powerpc/platforms/chrp/setup.c
@@ -334,22 +334,11 @@ static void __init chrp_setup_arch(void)
/* On pegasos, enable the L2 cache if not already done by OF */
pegasos_set_l2cr();
 
-   /* Lookup PCI host bridges */
-   chrp_find_bridges();
-
-   /*
-*  Temporary fixes for PCI devices.
-*  -- Geert
-*/
-   hydra_init();   /* Mac I/O */
-
/*
 *  Fix the Super I/O configuration
 */
sio_init();
 
-   pci_create_OF_bus_map();
-
/*
 * Print the banner, then scroll down so boot progress
 * can be printed.  -- Cort
@@ -582,6 +571,7 @@ define_machine(chrp) {
.name   = "CHRP",
.probe  = chrp_probe,
.setup_arch = chrp_setup_arch,
+   .discover_phbs  = chrp_find_bridges,
.init   = chrp_init2,
.show_cpuinfo   = chrp_show_cpuinfo,
.init_IRQ   = chrp_init_IRQ,
-- 
2.26.2



[RFC PATCH 14/18] powerpc/embedded6xx/linkstation: Move PHB discovery

2020-09-24 Thread Oliver O'Halloran
Signed-off-by: Oliver O'Halloran 
---
compile tested with linkstation_defconfig
---
 arch/powerpc/platforms/embedded6xx/linkstation.c | 10 +++---
 1 file changed, 7 insertions(+), 3 deletions(-)

diff --git a/arch/powerpc/platforms/embedded6xx/linkstation.c 
b/arch/powerpc/platforms/embedded6xx/linkstation.c
index f514d5d28cd4..eb8342e7f84e 100644
--- a/arch/powerpc/platforms/embedded6xx/linkstation.c
+++ b/arch/powerpc/platforms/embedded6xx/linkstation.c
@@ -63,15 +63,18 @@ static int __init linkstation_add_bridge(struct device_node 
*dev)
 }
 
 static void __init linkstation_setup_arch(void)
+{
+   printk(KERN_INFO "BUFFALO Network Attached Storage Series\n");
+   printk(KERN_INFO "(C) 2002-2005 BUFFALO INC.\n");
+}
+
+static void __init linkstation_setup_pci(void)
 {
struct device_node *np;
 
/* Lookup PCI host bridges */
for_each_compatible_node(np, "pci", "mpc10x-pci")
linkstation_add_bridge(np);
-
-   printk(KERN_INFO "BUFFALO Network Attached Storage Series\n");
-   printk(KERN_INFO "(C) 2002-2005 BUFFALO INC.\n");
 }
 
 /*
@@ -153,6 +156,7 @@ define_machine(linkstation){
.name   = "Buffalo Linkstation",
.probe  = linkstation_probe,
.setup_arch = linkstation_setup_arch,
+   .discover_phbs  = linkstation_setup_pci,
.init_IRQ   = linkstation_init_IRQ,
.show_cpuinfo   = linkstation_show_cpuinfo,
.get_irq= mpic_get_irq,
-- 
2.26.2



[RFC PATCH 13/18] powerpc/embedded6xx/holly: Move PHB discovery

2020-09-24 Thread Oliver O'Halloran
Signed-off-by: Oliver O'Halloran 
---
compile tested with holly_defconfig
---
 arch/powerpc/platforms/embedded6xx/holly.c | 10 +++---
 1 file changed, 7 insertions(+), 3 deletions(-)

diff --git a/arch/powerpc/platforms/embedded6xx/holly.c 
b/arch/powerpc/platforms/embedded6xx/holly.c
index d8f2e2c737bb..53065d564161 100644
--- a/arch/powerpc/platforms/embedded6xx/holly.c
+++ b/arch/powerpc/platforms/embedded6xx/holly.c
@@ -108,15 +108,13 @@ static void holly_remap_bridge(void)
tsi108_write_reg(TSI108_PCI_P2O_BAR2, 0x0);
 }
 
-static void __init holly_setup_arch(void)
+static void __init holly_init_pci(void)
 {
struct device_node *np;
 
if (ppc_md.progress)
ppc_md.progress("holly_setup_arch():set_bridge", 0);
 
-   tsi108_csr_vir_base = get_vir_csrbase();
-
/* setup PCI host bridge */
holly_remap_bridge();
 
@@ -127,6 +125,11 @@ static void __init holly_setup_arch(void)
ppc_md.pci_exclude_device = holly_exclude_device;
if (ppc_md.progress)
ppc_md.progress("tsi108: resources set", 0x100);
+}
+
+static void __init holly_setup_arch(void)
+{
+   tsi108_csr_vir_base = get_vir_csrbase();
 
printk(KERN_INFO "PPC750GX/CL Platform\n");
 }
@@ -259,6 +262,7 @@ define_machine(holly){
.name   = "PPC750 GX/CL TSI",
.probe  = holly_probe,
.setup_arch = holly_setup_arch,
+   .discover_phbs  = holly_init_pci,
.init_IRQ   = holly_init_IRQ,
.show_cpuinfo   = holly_show_cpuinfo,
.get_irq= mpic_get_irq,
-- 
2.26.2



[RFC PATCH 15/18] powerpc/embedded6xx/mpc7448: Move PHB discovery

2020-09-24 Thread Oliver O'Halloran
Signed-off-by: Oliver O'Halloran 
---
compile tested with mpc7448_hpc2_defconfig
---
 arch/powerpc/platforms/embedded6xx/mpc7448_hpc2.c | 14 +-
 1 file changed, 9 insertions(+), 5 deletions(-)

diff --git a/arch/powerpc/platforms/embedded6xx/mpc7448_hpc2.c 
b/arch/powerpc/platforms/embedded6xx/mpc7448_hpc2.c
index 15437abe1f6d..20b727584e40 100644
--- a/arch/powerpc/platforms/embedded6xx/mpc7448_hpc2.c
+++ b/arch/powerpc/platforms/embedded6xx/mpc7448_hpc2.c
@@ -58,16 +58,14 @@ int mpc7448_hpc2_exclude_device(struct pci_controller *hose,
return PCIBIOS_SUCCESSFUL;
 }
 
-static void __init mpc7448_hpc2_setup_arch(void)
+static void __init mpc7448_hpc2_setup_pci(void)
 {
+#ifdef CONFIG_PCI
struct device_node *np;
if (ppc_md.progress)
-   ppc_md.progress("mpc7448_hpc2_setup_arch():set_bridge", 0);
-
-   tsi108_csr_vir_base = get_vir_csrbase();
+   ppc_md.progress("mpc7448_hpc2_setup_pci():set_bridge", 0);
 
/* setup PCI host bridge */
-#ifdef CONFIG_PCI
for_each_compatible_node(np, "pci", "tsi108-pci")
tsi108_setup_pci(np, MPC7448HPC2_PCI_CFG_PHYS, 0);
 
@@ -75,6 +73,11 @@ static void __init mpc7448_hpc2_setup_arch(void)
if (ppc_md.progress)
ppc_md.progress("tsi108: resources set", 0x100);
 #endif
+}
+
+static void __init mpc7448_hpc2_setup_arch(void)
+{
+   tsi108_csr_vir_base = get_vir_csrbase();
 
printk(KERN_INFO "MPC7448HPC2 (TAIGA) Platform\n");
printk(KERN_INFO
@@ -180,6 +183,7 @@ define_machine(mpc7448_hpc2){
.name   = "MPC7448 HPC2",
.probe  = mpc7448_hpc2_probe,
.setup_arch = mpc7448_hpc2_setup_arch,
+   .discover_phbs  = mpc7448_hpc2_setup_pci,
.init_IRQ   = mpc7448_hpc2_init_IRQ,
.show_cpuinfo   = mpc7448_hpc2_show_cpuinfo,
.get_irq= mpic_get_irq,
-- 
2.26.2



[RFC PATCH 16/18] powerpc/embedded6xx/mve5100: Move PHB discovery

2020-09-24 Thread Oliver O'Halloran
Signed-off-by: Oliver O'Halloran 
---
compile tested with mvme5100_defconfig
---
 arch/powerpc/platforms/embedded6xx/mvme5100.c   | 13 -
 arch/powerpc/platforms/embedded6xx/storcenter.c |  8 ++--
 2 files changed, 14 insertions(+), 7 deletions(-)

diff --git a/arch/powerpc/platforms/embedded6xx/mvme5100.c 
b/arch/powerpc/platforms/embedded6xx/mvme5100.c
index 1cd488daa0bf..c06a0490d157 100644
--- a/arch/powerpc/platforms/embedded6xx/mvme5100.c
+++ b/arch/powerpc/platforms/embedded6xx/mvme5100.c
@@ -154,17 +154,19 @@ static const struct of_device_id mvme5100_of_bus_ids[] 
__initconst = {
  */
 static void __init mvme5100_setup_arch(void)
 {
-   struct device_node *np;
-
if (ppc_md.progress)
ppc_md.progress("mvme5100_setup_arch()", 0);
 
-   for_each_compatible_node(np, "pci", "hawk-pci")
-   mvme5100_add_bridge(np);
-
restart = ioremap(BOARD_MODRST_REG, 4);
 }
 
+static void __init mvme5100_setup_pci(void)
+{
+   struct device_node *np;
+
+   for_each_compatible_node(np, "pci", "hawk-pci")
+   mvme5100_add_bridge(np);
+}
 
 static void mvme5100_show_cpuinfo(struct seq_file *m)
 {
@@ -205,6 +207,7 @@ define_machine(mvme5100) {
.name   = "MVME5100",
.probe  = mvme5100_probe,
.setup_arch = mvme5100_setup_arch,
+   .discover_phbs  = mvme5100_setup_pci,
.init_IRQ   = mvme5100_pic_init,
.show_cpuinfo   = mvme5100_show_cpuinfo,
.get_irq= mpic_get_irq,
diff --git a/arch/powerpc/platforms/embedded6xx/storcenter.c 
b/arch/powerpc/platforms/embedded6xx/storcenter.c
index ed1914dd34bb..e8c5de54b0e1 100644
--- a/arch/powerpc/platforms/embedded6xx/storcenter.c
+++ b/arch/powerpc/platforms/embedded6xx/storcenter.c
@@ -65,14 +65,17 @@ static int __init storcenter_add_bridge(struct device_node 
*dev)
 }
 
 static void __init storcenter_setup_arch(void)
+{
+   printk(KERN_INFO "IOMEGA StorCenter\n");
+}
+
+static void __init storcenter_setup_pci(void)
 {
struct device_node *np;
 
/* Lookup PCI host bridges */
for_each_compatible_node(np, "pci", "mpc10x-pci")
storcenter_add_bridge(np);
-
-   printk(KERN_INFO "IOMEGA StorCenter\n");
 }
 
 /*
@@ -116,6 +119,7 @@ define_machine(storcenter){
.name   = "IOMEGA StorCenter",
.probe  = storcenter_probe,
.setup_arch = storcenter_setup_arch,
+   .discover_phbs  = storcenter_setup_pci,
.init_IRQ   = storcenter_init_IRQ,
.get_irq= mpic_get_irq,
.restart= storcenter_restart,
-- 
2.26.2



[RFC PATCH 17/18] powerpc/pasemi: Move PHB discovery

2020-09-24 Thread Oliver O'Halloran
Signed-off-by: Oliver O'Halloran 
---
compile tested with pasemi_defconfig
---
 arch/powerpc/platforms/pasemi/setup.c | 3 +--
 1 file changed, 1 insertion(+), 2 deletions(-)

diff --git a/arch/powerpc/platforms/pasemi/setup.c 
b/arch/powerpc/platforms/pasemi/setup.c
index b612474f8f8e..376797eb7894 100644
--- a/arch/powerpc/platforms/pasemi/setup.c
+++ b/arch/powerpc/platforms/pasemi/setup.c
@@ -144,8 +144,6 @@ static void __init pas_setup_arch(void)
/* Setup SMP callback */
smp_ops = &pas_smp_ops;
 #endif
-   /* Lookup PCI hosts */
-   pas_pci_init();
 
/* Remap SDC register for doing reset */
/* XXXOJN This should maybe come out of the device tree */
@@ -446,6 +444,7 @@ define_machine(pasemi) {
.name   = "PA Semi PWRficient",
.probe  = pas_probe,
.setup_arch = pas_setup_arch,
+   .discover_phbs  = pas_pci_init,
.init_IRQ   = pas_init_IRQ,
.get_irq= mpic_get_irq,
.restart= pas_restart,
-- 
2.26.2



[RFC PATCH 18/18] powerpc/powermac: Move PHB discovery

2020-09-24 Thread Oliver O'Halloran
Signed-off-by: Oliver O'Halloran 
---
compile tested with pmac32_defconfig and g5_defconfig
---
 arch/powerpc/platforms/powermac/setup.c | 4 +---
 1 file changed, 1 insertion(+), 3 deletions(-)

diff --git a/arch/powerpc/platforms/powermac/setup.c 
b/arch/powerpc/platforms/powermac/setup.c
index f002b0fa69b8..0f8669139a21 100644
--- a/arch/powerpc/platforms/powermac/setup.c
+++ b/arch/powerpc/platforms/powermac/setup.c
@@ -298,9 +298,6 @@ static void __init pmac_setup_arch(void)
of_node_put(ic);
}
 
-   /* Lookup PCI hosts */
-   pmac_pci_init();
-
 #ifdef CONFIG_PPC32
ohare_init();
l2cr_init();
@@ -600,6 +597,7 @@ define_machine(powermac) {
.name   = "PowerMac",
.probe  = pmac_probe,
.setup_arch = pmac_setup_arch,
+   .discover_phbs  = pmac_pci_init,
.show_cpuinfo   = pmac_show_cpuinfo,
.init_IRQ   = pmac_pic_init,
.get_irq= NULL, /* changed later */
-- 
2.26.2



Re: [PATCH] rpadlpar_io:Add MODULE_DESCRIPTION entries to kernel modules

2020-09-24 Thread Oliver O'Halloran
On Thu, Sep 24, 2020 at 3:15 PM Mamatha Inamdar
 wrote:
>
> This patch adds a brief MODULE_DESCRIPTION to rpadlpar_io kernel modules
> (descriptions taken from Kconfig file)
>
> Signed-off-by: Mamatha Inamdar 
> ---
>  drivers/pci/hotplug/rpadlpar_core.c |1 +
>  1 file changed, 1 insertion(+)
>
> diff --git a/drivers/pci/hotplug/rpadlpar_core.c 
> b/drivers/pci/hotplug/rpadlpar_core.c
> index f979b70..bac65ed 100644
> --- a/drivers/pci/hotplug/rpadlpar_core.c
> +++ b/drivers/pci/hotplug/rpadlpar_core.c
> @@ -478,3 +478,4 @@ static void __exit rpadlpar_io_exit(void)
>  module_init(rpadlpar_io_init);
>  module_exit(rpadlpar_io_exit);
>  MODULE_LICENSE("GPL");
> +MODULE_DESCRIPTION("RPA Dynamic Logical Partitioning driver for I/O slots");

RPA as a spec was superseded by PAPR in the early 2000s. Can we rename
this already?

The only potential problem I can see is scripts doing: modprobe
rpadlpar_io or similar

However, we should be able to fix that with a module alias.

Oliver


Re: [patch RFC 00/15] mm/highmem: Provide a preemptible variant of kmap_atomic & friends

2020-09-24 Thread Thomas Gleixner
On Wed, Sep 23 2020 at 17:12, Steven Rostedt wrote:
> On Wed, 23 Sep 2020 22:55:54 +0200
> Then scratch the idea of having anonymous local_lock() and just bring
> local_lock in directly? Then have a kmap local lock, which would only
> block those that need to do a kmap.

That's still going to end up in lock ordering nightmares and you lose
the ability to use kmap_local from arbitrary contexts which was again
one of the goals of this exercise.

Aside of that you're imposing reentrancy protections on something which
does not need it in the first place.

> Now as for migration disabled nesting, at least now we would have
> groupings of this, and perhaps the theorists can handle that. I mean,
> how is this much different that having a bunch of tasks blocked on a
> mutex with the owner is pinned on a CPU?
>
> migrate_disable() is a BKL of pinning affinity.

No. That's just wrong. preempt disable is a concurrency control,
i.e. protecting against reentrancy on a given CPU. But it's a cpu global
protection which means that it's not protecting a specific code path.

Contrary to preempt disable, migrate disable is not protecting against
reentrancy on a given CPU. It's a temporary restriction to the scheduler
on placement.

The fact that disabling preemption implicitely disables migration does
not make them semantically equivalent.

> If we only have local_lock() available (even on !RT), then it makes
> the blocking in groups. At least this way you could grep for all the
> different local_locks in the system and plug that into the algorithm
> for WCS, just like one would with a bunch of mutexes.

You cannot do that on RT at all where migrate disable is substituting
preempt disable in spin and rw locks. The result would be the same as
with a !RT kernel just with horribly bad performance.

That means the stacking problem has to be solved anyway.

So why on earth do you want to create yet another special duct tape case
for kamp_local() which proliferates inconsistency instead of aiming for
consistency accross all preemption models?

Thanks,

tglx


Re: [PATCH kernel] powerpc/dma: Fix dma_map_ops::get_required_mask

2020-09-24 Thread Alexey Kardashevskiy



On 24/09/2020 00:10, Christoph Hellwig wrote:
> On Tue, Sep 22, 2020 at 12:26:18PM +1000, Alexey Kardashevskiy wrote:
>>> Well, the original intent of dma_get_required_mask is to return the
>>> mask that the driver then uses to figure out what to set, so what aacraid
>>> does fits that use case. 
>>
>> What was the original intent exactly? The driver asks for the minimum or
>> maximum DMA mask the platform supports?
>>
>> As for now, we (ppc64/powernv) can do:
>> 1. bypass (==64bit)
>> 2. a DMA window which used to be limited by 2GB but not anymore.
>>
>> I can understand if the driver asked for required mask in expectation to
>> receive "less or equal than 32bit" and "more than 32 bit" and choose.
>> And this probably was the intent as at the time when the bug was
>> introduced, the window was always smaller than 4GB.
>>
>> But today the window is bigger than than (44 bits now, or a similar
>> value, depends on max page order) so the returned mask is >32. Which
>> still enables that DAC in aacraid but I suspect this is accidental.
> 
> I think for powernv returning 64-bit always would make a lot of sense.
> AFAIK all of powernv is PCIe and not legacy PCI, so returning anything
> less isn't going to help to optimize anything.

May be... The current behavior is not wrong (after the fix) but not
optimal either. Even with legacy PCI it should just result in failing
attempt to set 64bit mask which drivers should still handle, i.e. choose
a shorter mask.

Why not ditch the whole dma_get_required_mask() and just fail on setting
a bigger mask? Are these failures not handled in some drivers? Or there
are cases when a shorter mask is better? Thanks,


>>> Of course that idea is pretty bogus for
>>> PCIe devices.
>>
>> Why? From the PHB side, there are windows. From the device side, there
>> are many crippled devices, like, no GPU I saw in last years supported
>> more than 48bit.
> 
> Yes, but dma_get_required_mask is misnamed - the mask is not required,
> it is the optimal mask.  Even if the window is smaller we handle it
> some way, usually by using swiotlb, or by iommu tricks in your case.
>
>>> I suspect the right fix is to just not query dma_get_required_mask for
>>> PCIe devices in aacraid (and other drivers that do something similar).
>>
>> May be, if you write nice and big comment next to
>> dma_get_required_mask() explaining exactly what it does, then I will
>> realize I am getting this all wrong and we will move to fixing the
>> drivers :)
> 
> Yes, it needs a comment or two, and probaby be renamed to
> dma_get_optimal_dma_mask, and a cleanup of most users.  I've added it
> to my ever growing TODO list, but I would not be unhappy if someone
> else gives it a spin.
> 

-- 
Alexey


[Bug 195755] rcu_sched detected stalls on CPUs/tasks: (detected by 0, t=6302 jiffies, g=11405, c=11404, q=1880), ppc64, G5

2020-09-24 Thread bugzilla-daemon
https://bugzilla.kernel.org/show_bug.cgi?id=195755

Nigel Reed (ni...@nigelreed.net) changed:

   What|Removed |Added

 CC||ni...@nigelreed.net

--- Comment #29 from Nigel Reed (ni...@nigelreed.net) ---
I know this is old but I have been having some issues for a while, I was
finally able to get something useful:

[165716.089703] rcu: INFO: rcu_sched detected stalls on CPUs/tasks:
[165716.095949] rcu:1-...!: (0 ticks this GP) idle=354/0/0x0
softirq=2154363/2154363 fqs=0
[165716.104512] rcu:3-...!: (0 ticks this GP) idle=29c/0/0x0
softirq=883832/883832 fqs=0
[165716.112873] rcu:4-...!: (8 GPs behind) idle=ad8/0/0x0
softirq=2165586/2165586 fqs=0
[165716.121179] rcu:9-...!: (9 GPs behind) idle=acc/0/0x0
softirq=1340600/1340600 fqs=0
[165716.129467] rcu:11-...!: (2 GPs behind) idle=a18/0/0x0
softirq=4538536/4538537 fqs=0
[165716.137828] rcu:12-...!: (0 ticks this GP) idle=870/0/0x0
softirq=2158040/2158040 fqs=0
[165775.697763] rcu: rcu_sched kthread starved for 29898 jiffies! g36134941
f0x0 RCU_GP_WAIT_FQS(5) ->state=0x402 ->cpu=3
[165775.709013] rcu: RCU grace-period kthread stack dump:
[165837.494623] watchdog: BUG: soft lockup - CPU#6 stuck for 23s!
[(resolved):52315]
[165865.494840] watchdog: BUG: soft lockup - CPU#6 stuck for 23s!
[(resolved):52315]

This happened just after freshclam ran but I don't know if it's related.

This is with a Ryzen 7 1800X CPU.
5.4.0-48-generic #52-Ubuntu

I thought I had sysrq configured but it seems not so I can't really provide any
more information, other than this is driving me crazy.

-- 
You are receiving this mail because:
You are watching the assignee of the bug.

Re: [patch RFC 00/15] mm/highmem: Provide a preemptible variant of kmap_atomic & friends

2020-09-24 Thread peterz
On Wed, Sep 23, 2020 at 11:52:51AM -0400, Steven Rostedt wrote:
> On Wed, 23 Sep 2020 10:40:32 +0200
> pet...@infradead.org wrote:
> 
> > However, with migrate_disable() we can have each task preempted in a
> > migrate_disable() region, worse we can stack them all on the _same_ CPU
> > (super ridiculous odds, sure). And then we end up only able to run one
> > task, with the rest of the CPUs picking their nose.
> 
> What if we just made migrate_disable() a local_lock() available for !RT?

Can't, neiter migrate_disable() nor migrate_enable() are allowed to
block -- which is what makes their implementation so 'interesting'.

> This should lower the SHC in theory, if you can't have stacked migrate
> disables on the same CPU.

See this email in that other thread (you're on Cc too IIRC):

  
https://lkml.kernel.org/r/20200923170809.gy1362...@hirez.programming.kicks-ass.net

I think that is we 'frob' the balance PULL, we'll end up with something
similar.

Whichever way around we turn this thing, the migrate_disable() runtime
(we'll have to add a tracer for that), will be an interference term on
the lower priority task, exactly like preempt_disable() would be. We'll
just not exclude a higher priority task from running.


AFAICT; the best case is a single migrate_disable() nesting, where a
higher priority task preempts in a migrate_disable() section -- this is
per design.

When this preempted task becomes elegible to run under the ideal model
(IOW it becomes one of the M highest priority tasks), it might still
have to wait for the preemptee's migrate_disable() section to complete.
Thereby suffering interference in the exact duration of
migrate_disable() section.

Per this argument, the change from preempt_disable() to
migrate_disable() gets us:

 - higher priority tasks gain reduced wake-up latency
 - lower priority tasks are unchanged and are subject to the exact same
   interference term as if the higher priority task were using
   preempt_disable().

Since we've already established this term is unbounded, any task but the
highest priority task is basically buggered.

TL;DR, if we get balancing fixed and achieve (near) the optimal case
above, migrate_disable() is an over-all win. But it's provably
non-deterministic as long as the migrate_disable() sections are
non-deterministic.


The reason this all mostly works in practise is (I think) because:

 - People care most about the higher prio RT tasks and craft them to
   mostly avoid the migrate_disable() infected code.

 - The preemption scenario is most pronounced at higher utilization
   scenarios, and I suspect this is fairly rare to begin with.

 - And while many of these migrate_disable() regions are unbound in
   theory, in practise they're often fairly reasonable.


So my current todo list is:

 - Change RT PULL
 - Change DL PULL
 - Add migrate_disable() tracer; exactly like preempt/irqoff, except
   measuring task-runtime instead of cpu-time.
 - Add a mode that measures actual interference.
 - Add a traceevent to detect preemption in migrate_disable().


And then I suppose I should twist Daniel's arm to update his model to
include these scenarios and numbers.


Re: [PATCH v2] powerpc/pci: unmap legacy INTx interrupts when a PHB is removed

2020-09-24 Thread Cédric Le Goater
On 9/24/20 7:11 AM, Alexey Kardashevskiy wrote:
> 
> 
> On 23/09/2020 17:06, Cédric Le Goater wrote:
>> On 9/23/20 2:33 AM, Qian Cai wrote:
>>> On Fri, 2020-08-07 at 12:18 +0200, Cédric Le Goater wrote:
 When a passthrough IO adapter is removed from a pseries machine using
 hash MMU and the XIVE interrupt mode, the POWER hypervisor expects the
 guest OS to clear all page table entries related to the adapter. If
 some are still present, the RTAS call which isolates the PCI slot
 returns error 9001 "valid outstanding translations" and the removal of
 the IO adapter fails. This is because when the PHBs are scanned, Linux
 maps automatically the INTx interrupts in the Linux interrupt number
 space but these are never removed.

 To solve this problem, we introduce a PPC platform specific
 pcibios_remove_bus() routine which clears all interrupt mappings when
 the bus is removed. This also clears the associated page table entries
 of the ESB pages when using XIVE.

 For this purpose, we record the logical interrupt numbers of the
 mapped interrupt under the PHB structure and let pcibios_remove_bus()
 do the clean up.

 Since some PCI adapters, like GPUs, use the "interrupt-map" property
 to describe interrupt mappings other than the legacy INTx interrupts,
 we can not restrict the size of the mapping array to PCI_NUM_INTX. The
 number of interrupt mappings is computed from the "interrupt-map"
 property and the mapping array is allocated accordingly.

 Cc: "Oliver O'Halloran" 
 Cc: Alexey Kardashevskiy 
 Signed-off-by: Cédric Le Goater 
>>>
>>> Some syscall fuzzing will trigger this on POWER9 NV where the traces 
>>> pointed to
>>> this patch.
>>>
>>> .config: https://gitlab.com/cailca/linux-mm/-/blob/master/powerpc.config
>>
>> OK. The patch is missing a NULL assignement after kfree() and that
>> might be the issue. 
>>
>> I did try PHB removal under PowerNV, so I would like to understand 
>> how we managed to remove twice the PCI bus and possibly reproduce. 
>> Any chance we could grab what the syscall fuzzer (syzkaller) did ? 
> 
> 
> 
> My guess would be it is doing this in parallel to provoke races.

Concurrency removal and rescan should be controlled by : 

   pci_stop_and_remove_bus_device_locked()
   pci_lock_rescan_remove() 

And, in the report, the stack traces are on the same CPU and PID. 



What I think is happening is that we did a couple of remove/rescan
of the same PHB. The problem is that ->irq_map is not reallocated
the second time because the PHB is re-scanned differently on the 
PowerNV platform. At the second remove, the ->irq_map being not NULL, 
we try to kfree it again and the allocator warns of a double free :/

This works fine on pseries/KVM because the PHB is never removed and 
on pseries/pHyp, pcibios_scan_phb() is called at PHB hotplug. But on 
PowerNV, pcibios_scan_phb() is only called at probe/boot time and 
not at hotplug time :/

I was a good thing to spot that before merge. But I need to revise 
that patch again.

Thanks,

C.


Re: [PATCH -next] powerpc/papr_scm: Fix warnings about undeclared variable

2020-09-24 Thread Vaibhav Jain


Thanks for the patch. This looks good to me.

Wang Wensheng  writes:

> Build the kernel with 'make C=2':
> arch/powerpc/platforms/pseries/papr_scm.c:825:1: warning: symbol
> 'dev_attr_perf_stats' was not declared. Should it be static?

> Signed-off-by: Wang Wensheng 
Reviewed-by: Vaibhav Jain 

-- 
Cheers
~ Vaibhav


Re: [PATCH v2 0/7] powerpc: Fix a few W=1 compile warnings

2020-09-24 Thread Michael Ellerman
On Mon, 14 Sep 2020 23:10:00 +0200, Cédric Le Goater wrote:
> Here is a small contribution improving compile with W=1.
> 
> Thanks,
> 
> C.
> 
> Changes in v2:
> 
> [...]

Patches 1, 3, 4 and 7 applied to powerpc/next.

[1/7] powerpc/sysfs: Remove unused 'err' variable in sysfs_create_dscr_default()
  https://git.kernel.org/powerpc/c/7b2aab5f22f0f7cc9e2f8672c9e65e2e88d30102
[3/7] powerpc/sstep: Remove empty if statement checking for invalid form
  https://git.kernel.org/powerpc/c/5ab187e01a5310e1f9cd2f6b192b2343b8bd14cb
[4/7] powerpc/xive: Make debug routines static
  https://git.kernel.org/powerpc/c/2228f19cf90ef796c8d84f54f3a5db2dcc85c83f
[7/7] powerpc/32: Declare stack_overflow_exception() prototype
  https://git.kernel.org/powerpc/c/ebbfeef0d8093a06ff39c60105b6650be3344cbe

cheers


Re: [PATCH -next] powerpc/pseries: convert to use DEFINE_SEQ_ATTRIBUTE macro

2020-09-24 Thread Michael Ellerman
On Wed, 16 Sep 2020 10:50:26 +0800, Liu Shixin wrote:
> Use DEFINE_SEQ_ATTRIBUTE macro to simplify the code.

Applied to powerpc/next.

[1/1] powerpc/pseries: convert to use DEFINE_SEQ_ATTRIBUTE macro
  https://git.kernel.org/powerpc/c/96543e7352bded5d6d1a0e0022376ebdd6c1b8ab

cheers


Re: [PATCH -next] drivers/macintosh/smu.c: use for_each_child_of_node() macro

2020-09-24 Thread Michael Ellerman
On Wed, 16 Sep 2020 14:21:22 +0800, Qinglang Miao wrote:
> Use for_each_child_of_node() macro instead of open coding it.

Applied to powerpc/next.

[1/1] drivers/macintosh/smu.c: use for_each_child_of_node() macro
  https://git.kernel.org/powerpc/c/9c826d31a73815464bd3df81e56d39b3d908ac73

cheers


Re: [PATCH -next] macintosh: smu_sensors: use for_each_child_of_node() macro

2020-09-24 Thread Michael Ellerman
On Wed, 16 Sep 2020 14:21:25 +0800, Qinglang Miao wrote:
> Use for_each_child_of_node() macro instead of open coding it.

Applied to powerpc/next.

[1/1] macintosh: smu_sensors: use for_each_child_of_node() macro
  https://git.kernel.org/powerpc/c/acff5e6c37fa4bf8d002c917a762c4f7615f6eaf

cheers


Re: [PATCH -next] powerpc/powernv: fix wrong warning message in opalcore_config_init()

2020-09-24 Thread Michael Ellerman
On Wed, 16 Sep 2020 14:21:29 +0800, Qinglang Miao wrote:
> The logic of the warn output is incorrect. The two args should be
> exchanged.

Applied to powerpc/next.

[1/1] powerpc/powernv: fix wrong warning message in opalcore_config_init()
  https://git.kernel.org/powerpc/c/8ec5cb12cd957e59b0470b75d26c901aaf645bc3

cheers


Re: [PATCH -next] serial: pmac_zilog: use for_each_child_of_node() macro

2020-09-24 Thread Michael Ellerman
On Wed, 16 Sep 2020 14:21:38 +0800, Qinglang Miao wrote:
> Use for_each_child_of_node() macro instead of open coding it.

Applied to powerpc/next.

[1/1] serial: pmac_zilog: use for_each_child_of_node() macro
  https://git.kernel.org/powerpc/c/1d42e07e9c249b7a032fba82b673ee8a8d6bd7b7

cheers


Re: [PATCH 1/2] powerpc/mm/64s: Fix slb_setup_new_exec() sparse warning

2020-09-24 Thread Michael Ellerman
On Wed, 16 Sep 2020 21:56:36 +1000, Michael Ellerman wrote:
> Sparse says:
>   symbol slb_setup_new_exec was not declared. Should it be static?
> 
> No, it should have a declaration in a header, add one.

Applied to powerpc/next.

[1/2] powerpc/mm/64s: Fix slb_setup_new_exec() sparse warning
  https://git.kernel.org/powerpc/c/ef1edbba52883907caf02ab85e0d00a2e4648f05
[2/2] powerpc/perf: Add declarations to fix sparse warnings
  https://git.kernel.org/powerpc/c/d10ebe79dfae7dc59b6cf77ffa615f0b8dae21bf

cheers


Re: [PATCH 1/3] powerpc: Move arch_cpu_idle_dead() into smp.c

2020-09-24 Thread Michael Ellerman
On Wed, 19 Aug 2020 11:56:32 +1000, Michael Ellerman wrote:
> arch_cpu_idle_dead() is in idle.c, which makes sense, but it's inside
> a CONFIG_HOTPLUG_CPU block.
> 
> It would be more at home in smp.c, inside the existing
> CONFIG_HOTPLUG_CPU block. Note that CONFIG_HOTPLUG_CPU depends on
> CONFIG_SMP so even though smp.c is not built for SMP=n builds, that's
> fine.

Applied to powerpc/next.

[1/3] powerpc: Move arch_cpu_idle_dead() into smp.c
  https://git.kernel.org/powerpc/c/1ea21ba231f248034e8c794aa675869ca2b97d42
[2/3] powerpc/smp: Fold cpu_die() into its only caller
  https://git.kernel.org/powerpc/c/bf3c1464db883a953ad7bbed64924480b8b2b244
[3/3] powerpc/smp: Move ppc_md.cpu_die() to smp_ops.cpu_offline_self()
  https://git.kernel.org/powerpc/c/39f87561454dc33efb2a3d8354d066207acac8a6

cheers


Re: [PATCH] powerpc/prom_init: Check display props exist before enabling btext

2020-09-24 Thread Michael Ellerman
On Fri, 21 Aug 2020 20:34:07 +1000, Michael Ellerman wrote:
> It's possible to enable CONFIG_PPC_EARLY_DEBUG_BOOTX for a pseries
> kernel (maybe it shouldn't be), which is then booted with qemu/slof.
> 
> But if you do that the kernel crashes in draw_byte(), with a DAR
> pointing somewhere near INT_MAX.
> 
> Adding some debug to prom_init we see that we're not able to read the
> "address" property from OF, so we're just using whatever junk value
> was on the stack.
> 
> [...]

Applied to powerpc/next.

[1/1] powerpc/prom_init: Check display props exist before enabling btext
  https://git.kernel.org/powerpc/c/6c71cfcc01685ef495ca7886471a76e73446424e

cheers


Re: [PATCH v2 0/4] more mm switching vs TLB shootdown and lazy tlb fixes

2020-09-24 Thread Michael Ellerman
On Mon, 14 Sep 2020 14:52:15 +1000, Nicholas Piggin wrote:
> This is an attempt to fix a few different related issues around
> switching mm, TLB flushing, and lazy tlb mm handling.
> 
> This will require all architectures to eventually move to disabling
> irqs over activate_mm, but it's possible we could add another arch
> call after irqs are re-enabled for those few which can't do their
> entire activation with irqs disabled.
> 
> [...]

Applied to powerpc/next.

[1/4] mm: fix exec activate_mm vs TLB shootdown and lazy tlb switching race
  https://git.kernel.org/powerpc/c/d53c3dfb23c45f7d4f910c3a3ca84bf0a99c6143
[2/4] powerpc: select ARCH_WANT_IRQS_OFF_ACTIVATE_MM
  https://git.kernel.org/powerpc/c/66acd46080bd9e5ad2be4b0eb1d498d5145d058e
[3/4] sparc64: remove mm_cpumask clearing to fix kthread_use_mm race
  https://git.kernel.org/powerpc/c/bafb056ce27940c9994ea905336aa8f27b4f7275
[4/4] powerpc/64s/radix: Fix mm_cpumask trimming race vs kthread_use_mm
  https://git.kernel.org/powerpc/c/a665eec0a22e11cdde708c1c256a465ebe768047

cheers


Re: [PATCH -next v2] powerpc/book3s64: fix link error with CONFIG_PPC_RADIX_MMU=n

2020-09-24 Thread Michael Ellerman
On Thu, 17 Sep 2020 10:06:43 +0800, Yang Yingliang wrote:
> Fix link error when CONFIG_PPC_RADIX_MMU is disabled:
> powerpc64-linux-gnu-ld: arch/powerpc/platforms/pseries/lpar.o:(.toc+0x0): 
> undefined reference to `mmu_pid_bits'

Applied to powerpc/next.

[1/1] powerpc/book3s64: fix link error with CONFIG_PPC_RADIX_MMU=n
  https://git.kernel.org/powerpc/c/bda7673d64b6c2e92423363a756caa657464e096

cheers


Re: [PATCH] powerpc/process: Fix uninitialised variable error

2020-09-24 Thread Michael Ellerman
On Thu, 17 Sep 2020 12:45:09 +1000, Michael Ellerman wrote:
> Clang, and GCC with -Wmaybe-uninitialized, can't see that val is
> unused in get_fpexec_mode():
> 
>   arch/powerpc/kernel/process.c:1940:7: error: variable 'val' is used
>   uninitialized whenever 'if' condition is true
> if (cpu_has_feature(CPU_FTR_SPE)) {
> ^~~~
> 
> [...]

Applied to powerpc/next.

[1/1] powerpc/process: Fix uninitialised variable error
  https://git.kernel.org/powerpc/c/d208e13c6a2277d9fb71fad6a1394c70bdd7b634

cheers


Re: [patch RFC 00/15] mm/highmem: Provide a preemptible variant of kmap_atomic & friends

2020-09-24 Thread Steven Rostedt
On Thu, 24 Sep 2020 08:57:52 +0200
Thomas Gleixner  wrote:

> > Now as for migration disabled nesting, at least now we would have
> > groupings of this, and perhaps the theorists can handle that. I mean,
> > how is this much different that having a bunch of tasks blocked on a
> > mutex with the owner is pinned on a CPU?
> >
> > migrate_disable() is a BKL of pinning affinity.  
> 
> No. That's just wrong. preempt disable is a concurrency control,

I think you totally misunderstood what I was saying. The above wasn't about
comparing preempt_disable to migrate_disable. It was comparing
migrate_disable to a chain of tasks blocked on mutexes where the top owner
has preempt_disable set. You still have a bunch of tasks that can't move to
other CPUs.


> > If we only have local_lock() available (even on !RT), then it makes
> > the blocking in groups. At least this way you could grep for all the
> > different local_locks in the system and plug that into the algorithm
> > for WCS, just like one would with a bunch of mutexes.  
> 
> You cannot do that on RT at all where migrate disable is substituting
> preempt disable in spin and rw locks. The result would be the same as
> with a !RT kernel just with horribly bad performance.

Note, the spin and rwlocks already have a lock associated with them. Why
would it be any different on RT? I wasn't suggesting adding another lock
inside a spinlock. Why would I recommend THAT? I wasn't recommending
blindly replacing migrate_disable() with local_lock(). I just meant expose
local_lock() but not migrate_disable().

> 
> That means the stacking problem has to be solved anyway.
> 
> So why on earth do you want to create yet another special duct tape case
> for kamp_local() which proliferates inconsistency instead of aiming for
> consistency accross all preemption models?

The idea was to help with the scheduling issue.

Anyway, instead of blocking. What about having a counter of number of
migrate disabled tasks per cpu, and when taking a migrate_disable(), and there's
already another task with migrate_disabled() set, and the current task has
an affinity greater than 1, it tries to migrate to another CPU?

This way migrate_disable() is less likely to have a bunch of tasks blocked
on one CPU serialized by each task exiting the migrate_disable() section.

Yes, there's more overhead, but it only happens if multiple tasks are in a
migrate disable section on the same CPU.

-- Steve



Re: [patch RFC 00/15] mm/highmem: Provide a preemptible variant of kmap_atomic & friends

2020-09-24 Thread Peter Zijlstra
On Thu, Sep 24, 2020 at 08:32:41AM -0400, Steven Rostedt wrote:
> Anyway, instead of blocking. What about having a counter of number of
> migrate disabled tasks per cpu, and when taking a migrate_disable(), and 
> there's
> already another task with migrate_disabled() set, and the current task has
> an affinity greater than 1, it tries to migrate to another CPU?

That doesn't solve the problem. On wakeup we should already prefer an
idle CPU over one running a (RT) task, but you can always wake more
tasks than there's CPUs around and you'll _have_ to stack at some point.

The trick is how to unstack them correctly. We need to detect when a
migrate_disable() task _should_ start running again, and migrate away
whoever is in the way at that point.

It turns out, that getting selected for pull-balance is exactly that
condition, and clearly a migrate_disable() task cannot be pulled, but we
can use that signal to try and pull away the running task that's in the
way.


Re: C vdso

2020-09-24 Thread Christophe Leroy

Hi Michael

Le 17/09/2020 à 14:33, Michael Ellerman a écrit :

Hi Christophe,

Christophe Leroy  writes:

Hi Michael,

What is the status with the generic C vdso merge ?
In some mail, you mentionned having difficulties getting it working on
ppc64, any progress ? What's the problem ? Can I help ?


Yeah sorry I was hoping to get time to work on it but haven't been able
to.

It's causing crashes on ppc64 ie. big endian.





As you can see from the instruction dump we have jumped into the weeds 
somewhere.

We also had the report from the kbuild robot about rela.opd being
discarded, which I think is indicative of a bigger problem. ie. we don't
process relocations for the VDSO, but opds require relocations (they
contain an absolute pointer).

I thought we could get away with that, because the VDSO entry points
aren't proper functions (so they don't have opds), and I didn't think
we'd be calling via function pointers in the VDSO code (which would
require opds). But seems something is not working right.

Sorry I haven't got back to you with those details. Things are a bit of
a mess inside IBM at the moment (always?), and I've been trying to get
everything done before I take a holiday next week.




Can you tell what defconfig you are using ? I have been able to setup a 
full glibc PPC64 cross compilation chain and been able to test it under 
QEMU with success, using Nathan's vdsotest tool.


I tested with both ppc64_defconfig and pseries_defconfig.

The only problem I got is with getcpu, which segfaults but both before 
and after applying my series, so I guess this is unrelated.


Not sure we can pay too much attention to the exact measurement as it is 
a ppc64 QEMU running on a x86 Linux which is running in a Virtual Box on 
a x86 windows Laptop, but at least it works:


Without the series:

clock-getres-monotonic:vdso: 389 nsec/call
clock-gettime-monotonic:vdso: 781 nsec/call
clock-getres-monotonic-coarse:vdso: 13715 nsec/call
clock-gettime-monotonic-coarse:vdso: 312 nsec/call
clock-getres-monotonic-raw:vdso: 13589 nsec/call
clock-getres-tai:vdso: 13827 nsec/call
clock-gettime-tai:vdso: 14846 nsec/call
clock-getres-boottime:vdso: 13596 nsec/call
clock-gettime-boottime:vdso: 14758 nsec/call
clock-getres-realtime:vdso: 327 nsec/call
clock-gettime-realtime:vdso: 717 nsec/call
clock-getres-realtime-coarse:vdso: 14102 nsec/call
clock-gettime-realtime-coarse:vdso: 299 nsec/call
gettimeofday:vdso: 771 nsec/call

With the series:

clock-getres-monotonic:vdso: 350 nsec/call
clock-gettime-monotonic:vdso: 726 nsec/call
clock-getres-monotonic-coarse:vdso: 356 nsec/call
clock-gettime-monotonic-coarse:vdso: 423 nsec/call
clock-getres-monotonic-raw:vdso: 349 nsec/call
clock-getres-tai:vdso: 419 nsec/call
clock-gettime-tai:vdso: 724 nsec/call
clock-getres-boottime:vdso: 352 nsec/call
clock-gettime-boottime:vdso: 752 nsec/call
clock-getres-realtime:vdso: 351 nsec/call
clock-gettime-realtime:vdso: 733 nsec/call
clock-getres-realtime-coarse:vdso: 356 nsec/call
clock-gettime-realtime-coarse:vdso: 367 nsec/call
gettimeofday:vdso: 796 nsec/call


Thanks
Christophe


Re: [patch RFC 00/15] mm/highmem: Provide a preemptible variant of kmap_atomic & friends

2020-09-24 Thread Steven Rostedt
On Thu, 24 Sep 2020 14:42:41 +0200
Peter Zijlstra  wrote:

> On Thu, Sep 24, 2020 at 08:32:41AM -0400, Steven Rostedt wrote:
> > Anyway, instead of blocking. What about having a counter of number of
> > migrate disabled tasks per cpu, and when taking a migrate_disable(), and 
> > there's
> > already another task with migrate_disabled() set, and the current task has
> > an affinity greater than 1, it tries to migrate to another CPU?  
> 
> That doesn't solve the problem. On wakeup we should already prefer an
> idle CPU over one running a (RT) task, but you can always wake more
> tasks than there's CPUs around and you'll _have_ to stack at some point.

Yes, understood.

> 
> The trick is how to unstack them correctly. We need to detect when a
> migrate_disable() task _should_ start running again, and migrate away
> whoever is in the way at that point.
> 
> It turns out, that getting selected for pull-balance is exactly that
> condition, and clearly a migrate_disable() task cannot be pulled, but we
> can use that signal to try and pull away the running task that's in the
> way.

Unless of course that running task is in a migrate disable section
itself ;-)

But I guess we will always have that SHC, and there will always be a
scenario that you can't balance properly. But hopefully in practice we
wont see that.

How to handle kmap_local(), will migrate_disable() be used only for
32bit or, for consistency, will it also apply to 64bit?

-- Steve


Re: [patch RFC 00/15] mm/highmem: Provide a preemptible variant of kmap_atomic & friends

2020-09-24 Thread Peter Zijlstra
On Thu, Sep 24, 2020 at 09:51:38AM -0400, Steven Rostedt wrote:

> > It turns out, that getting selected for pull-balance is exactly that
> > condition, and clearly a migrate_disable() task cannot be pulled, but we
> > can use that signal to try and pull away the running task that's in the
> > way.
> 
> Unless of course that running task is in a migrate disable section
> itself ;-)

See my ramblings here:

  
https://lkml.kernel.org/r/20200924082717.ga1362...@hirez.programming.kicks-ass.net

My plan was to have the migrate_enable() of the running task trigger the
migration in that case.



RE: [PATCH v6 04/11] PCI: designware-ep: Modify MSI and MSIX CAP way of finding

2020-09-24 Thread Z.q. Hou
Hi Bjorn,

Thanks a lot for your comments!

> -Original Message-
> From: Bjorn Helgaas 
> Sent: 2020年9月24日 4:16
> To: Xiaowei Bao 
> Cc: Z.q. Hou ; M.h. Lian
> ; Mingkai Hu ;
> bhelg...@google.com; robh...@kernel.org; shawn...@kernel.org; Leo Li
> ; kis...@ti.com; lorenzo.pieral...@arm.com; Roy
> Zang ; amur...@thegoodpenguin.co.uk;
> jingooh...@gmail.com; gustavo.pimen...@synopsys.com;
> andrew.mur...@arm.com; linux-...@vger.kernel.org;
> devicet...@vger.kernel.org; linux-ker...@vger.kernel.org;
> linux-arm-ker...@lists.infradead.org; linuxppc-dev@lists.ozlabs.org
> Subject: Re: [PATCH v6 04/11] PCI: designware-ep: Modify MSI and MSIX
> CAP way of finding
> 
> s/MSIX/MSI-X/ (subject and below)
> 
> On Sat, Mar 14, 2020 at 11:30:31AM +0800, Xiaowei Bao wrote:
> > Each PF of EP device should have it's own MSI or MSIX capabitily
> > struct, so create a dw_pcie_ep_func struct and remove the msi_cap and
> > msix_cap to this struct from dw_pcie_ep, and manage the PFs with a
> > list.
> 
> s/capabitily/capability/
> 
> I know Lorenzo has already applied this, but for the future, or in case there
> are other reasons to update this patch.
> 
> There are a bunch of unnecessary initializations below for future cleanup.

Yes, and there are many calling of dw_pcie_ep_func_select() to get func_offset, 
I plan to submit a separate patch to clean up.

Thanks,
Zhiqiang

> 
> > Signed-off-by: Xiaowei Bao 
> > ---
> > v3:
> >  - This is a new patch, to fix the issue of MSI and MSIX CAP way of
> >finding.
> > v4:
> >  - Correct some word of commit message.
> > v5:
> >  - No change.
> > v6:
> >  - Fix up the compile error.
> >
> >  drivers/pci/controller/dwc/pcie-designware-ep.c | 135
> +---
> >  drivers/pci/controller/dwc/pcie-designware.h|  18 +++-
> >  2 files changed, 134 insertions(+), 19 deletions(-)
> >
> > diff --git a/drivers/pci/controller/dwc/pcie-designware-ep.c
> > b/drivers/pci/controller/dwc/pcie-designware-ep.c
> > index 933bb89..fb915f2 100644
> > --- a/drivers/pci/controller/dwc/pcie-designware-ep.c
> > +++ b/drivers/pci/controller/dwc/pcie-designware-ep.c
> > @@ -19,6 +19,19 @@ void dw_pcie_ep_linkup(struct dw_pcie_ep *ep)
> > pci_epc_linkup(epc);
> >  }
> >
> > +struct dw_pcie_ep_func *
> > +dw_pcie_ep_get_func_from_ep(struct dw_pcie_ep *ep, u8 func_no) {
> > +   struct dw_pcie_ep_func *ep_func;
> > +
> > +   list_for_each_entry(ep_func, &ep->func_list, list) {
> > +   if (ep_func->func_no == func_no)
> > +   return ep_func;
> > +   }
> > +
> > +   return NULL;
> > +}
> > +
> >  static unsigned int dw_pcie_ep_func_select(struct dw_pcie_ep *ep, u8
> > func_no)  {
> > unsigned int func_offset = 0;
> > @@ -59,6 +72,47 @@ void dw_pcie_ep_reset_bar(struct dw_pcie *pci,
> enum pci_barno bar)
> > __dw_pcie_ep_reset_bar(pci, func_no, bar, 0);  }
> >
> > +static u8 __dw_pcie_ep_find_next_cap(struct dw_pcie_ep *ep, u8
> func_no,
> > +   u8 cap_ptr, u8 cap)
> > +{
> > +   struct dw_pcie *pci = to_dw_pcie_from_ep(ep);
> > +   unsigned int func_offset = 0;
> 
> Unnecessary initialization.
> 
> > +   u8 cap_id, next_cap_ptr;
> > +   u16 reg;
> > +
> > +   if (!cap_ptr)
> > +   return 0;
> > +
> > +   func_offset = dw_pcie_ep_func_select(ep, func_no);
> > +
> > +   reg = dw_pcie_readw_dbi(pci, func_offset + cap_ptr);
> > +   cap_id = (reg & 0x00ff);
> > +
> > +   if (cap_id > PCI_CAP_ID_MAX)
> > +   return 0;
> > +
> > +   if (cap_id == cap)
> > +   return cap_ptr;
> > +
> > +   next_cap_ptr = (reg & 0xff00) >> 8;
> > +   return __dw_pcie_ep_find_next_cap(ep, func_no, next_cap_ptr, cap); }
> > +
> > +static u8 dw_pcie_ep_find_capability(struct dw_pcie_ep *ep, u8
> > +func_no, u8 cap) {
> > +   struct dw_pcie *pci = to_dw_pcie_from_ep(ep);
> > +   unsigned int func_offset = 0;
> 
> Unnecessary initialization.
> 
> > +   u8 next_cap_ptr;
> > +   u16 reg;
> > +
> > +   func_offset = dw_pcie_ep_func_select(ep, func_no);
> > +
> > +   reg = dw_pcie_readw_dbi(pci, func_offset + PCI_CAPABILITY_LIST);
> > +   next_cap_ptr = (reg & 0x00ff);
> > +
> > +   return __dw_pcie_ep_find_next_cap(ep, func_no, next_cap_ptr, cap); }
> > +
> >  static int dw_pcie_ep_write_header(struct pci_epc *epc, u8 func_no,
> >struct pci_epf_header *hdr)
> >  {
> > @@ -246,13 +300,18 @@ static int dw_pcie_ep_get_msi(struct pci_epc
> *epc, u8 func_no)
> > struct dw_pcie *pci = to_dw_pcie_from_ep(ep);
> > u32 val, reg;
> > unsigned int func_offset = 0;
> 
> Unnecessary initialization (not from your patch).
> 
> > +   struct dw_pcie_ep_func *ep_func;
> >
> > -   if (!ep->msi_cap)
> > +   ep_func = dw_pcie_ep_get_func_from_ep(ep, func_no);
> > +   if (!ep_func)
> > +   return -EINVAL;
> > +
> > +   if (!ep_func->msi_cap)
> > return -EINVAL;
> >
> > func_offset = dw_pcie_ep_func_select(ep, func_no);
> >
> > -   reg = ep->msi_cap + func_offset + PCI_MSI_FLAGS;
> > +   reg = ep_func->

Re: [PATCH V2] Doc: admin-guide: Add entry for kvm_cma_resv_ratio kernel param

2020-09-24 Thread Jonathan Corbet
On Mon, 21 Sep 2020 14:32:20 +0530
sathn...@linux.vnet.ibm.com wrote:

> From: Satheesh Rajendran 
> 
> Add document entry for kvm_cma_resv_ratio kernel param which
> is used to alter the KVM contiguous memory allocation percentage
> for hash pagetable allocation used by hash mode PowerPC KVM guests.
> 
> Cc: linux-ker...@vger.kernel.org
> Cc: kvm-...@vger.kernel.org
> Cc: linuxppc-dev@lists.ozlabs.org
> Cc: Paul Mackerras 
> Cc: Michael Ellerman 
> Cc: Jonathan Corbet 
> Reviewed-by: Randy Dunlap 
> Signed-off-by: Satheesh Rajendran 
> ---
> 
> V2: 
> Addressed review comments from Randy.
> 
> V1: https://lkml.org/lkml/2020/9/16/72

Applied, thanks.

jon


Re: [patch RFC 00/15] mm/highmem: Provide a preemptible variant of kmap_atomic & friends

2020-09-24 Thread Thomas Gleixner
On Thu, Sep 24 2020 at 08:32, Steven Rostedt wrote:
> On Thu, 24 Sep 2020 08:57:52 +0200
> Thomas Gleixner  wrote:
>
>> > Now as for migration disabled nesting, at least now we would have
>> > groupings of this, and perhaps the theorists can handle that. I mean,
>> > how is this much different that having a bunch of tasks blocked on a
>> > mutex with the owner is pinned on a CPU?
>> >
>> > migrate_disable() is a BKL of pinning affinity.  
>> 
>> No. That's just wrong. preempt disable is a concurrency control,
>
> I think you totally misunderstood what I was saying. The above wasn't about
> comparing preempt_disable to migrate_disable. It was comparing
> migrate_disable to a chain of tasks blocked on mutexes where the top owner
> has preempt_disable set. You still have a bunch of tasks that can't move to
> other CPUs.

What? The top owner does not prevent any task from moving. The tasks
cannot move because they are blocked on the mutex, which means they are
not runnable and non runnable tasks are not migrated at all.

I really don't understand what you are trying to say.

>> > If we only have local_lock() available (even on !RT), then it makes
>> > the blocking in groups. At least this way you could grep for all the
>> > different local_locks in the system and plug that into the algorithm
>> > for WCS, just like one would with a bunch of mutexes.  
>> 
>> You cannot do that on RT at all where migrate disable is substituting
>> preempt disable in spin and rw locks. The result would be the same as
>> with a !RT kernel just with horribly bad performance.
>
> Note, the spin and rwlocks already have a lock associated with them. Why
> would it be any different on RT? I wasn't suggesting adding another lock
> inside a spinlock. Why would I recommend THAT? I wasn't recommending
> blindly replacing migrate_disable() with local_lock(). I just meant expose
> local_lock() but not migrate_disable().

We already exposed local_lock() to non RT and it's for places which do
preempt_disable() or local_irq_disable() without having a lock
associated. But both primitives are scope less and therefore behave like
CPU local BKLs. What local_lock() provides in these cases is:

  - Making the protection scope clear by associating a named local
lock which is coverred by lockdep.

  - It still maps to preempt_disable() or local_irq_disable() in !RT
kernels

  - The scope and the named lock allows RT kernels to substitute with
real (recursion aware) locking primitives which keep preemption and
interupts enabled, but provide the fine grained protection for the
scoped critical section.
  
So how would you substitute migrate_disable() with a local_lock()? You
can't. Again migrate_disable() is NOT a concurrency control and
therefore it cannot be substituted by any concurrency control primitive.

>> That means the stacking problem has to be solved anyway.
>> 
>> So why on earth do you want to create yet another special duct tape case
>> for kamp_local() which proliferates inconsistency instead of aiming for
>> consistency accross all preemption models?
>
> The idea was to help with the scheduling issue.

I don't see how mixing concepts and trying to duct tape a problem which
is clearly in the realm of the scheduler, i.e. load balancing and
placing algorithms, is helpful.

Thanks,

tglx


Re: [patch RFC 00/15] mm/highmem: Provide a preemptible variant of kmap_atomic & friends

2020-09-24 Thread Steven Rostedt
On Thu, 24 Sep 2020 19:55:10 +0200
Thomas Gleixner  wrote:

> On Thu, Sep 24 2020 at 08:32, Steven Rostedt wrote:
> > On Thu, 24 Sep 2020 08:57:52 +0200
> > Thomas Gleixner  wrote:
> >  
> >> > Now as for migration disabled nesting, at least now we would have
> >> > groupings of this, and perhaps the theorists can handle that. I mean,
> >> > how is this much different that having a bunch of tasks blocked on a
> >> > mutex with the owner is pinned on a CPU?
> >> >
> >> > migrate_disable() is a BKL of pinning affinity.
> >> 
> >> No. That's just wrong. preempt disable is a concurrency control,  
> >
> > I think you totally misunderstood what I was saying. The above wasn't about
> > comparing preempt_disable to migrate_disable. It was comparing
> > migrate_disable to a chain of tasks blocked on mutexes where the top owner
> > has preempt_disable set. You still have a bunch of tasks that can't move to
> > other CPUs.  
> 
> What? The top owner does not prevent any task from moving. The tasks
> cannot move because they are blocked on the mutex, which means they are
> not runnable and non runnable tasks are not migrated at all.

And neither are migrated disabled tasks preempted by a high priority
task.

> 
> I really don't understand what you are trying to say.

Don't worry about it. I was just making a high level comparison of how
migrate disabled tasks blocked on a higher priority task is similar to
that of tasks blocked on a mutex held by a pinned task that is
preempted by a high priority task. But we can forget this analogy as
it's not appropriate for the current conversation.

> 
> >> > If we only have local_lock() available (even on !RT), then it makes
> >> > the blocking in groups. At least this way you could grep for all the
> >> > different local_locks in the system and plug that into the algorithm
> >> > for WCS, just like one would with a bunch of mutexes.
> >> 
> >> You cannot do that on RT at all where migrate disable is substituting
> >> preempt disable in spin and rw locks. The result would be the same as
> >> with a !RT kernel just with horribly bad performance.  
> >
> > Note, the spin and rwlocks already have a lock associated with them. Why
> > would it be any different on RT? I wasn't suggesting adding another lock
> > inside a spinlock. Why would I recommend THAT? I wasn't recommending
> > blindly replacing migrate_disable() with local_lock(). I just meant expose
> > local_lock() but not migrate_disable().  
> 
> We already exposed local_lock() to non RT and it's for places which do
> preempt_disable() or local_irq_disable() without having a lock
> associated. But both primitives are scope less and therefore behave like
> CPU local BKLs. What local_lock() provides in these cases is:
> 
>   - Making the protection scope clear by associating a named local
> lock which is coverred by lockdep.
> 
>   - It still maps to preempt_disable() or local_irq_disable() in !RT
> kernels
> 
>   - The scope and the named lock allows RT kernels to substitute with
> real (recursion aware) locking primitives which keep preemption and
> interupts enabled, but provide the fine grained protection for the
> scoped critical section.

I'm very much aware of the above.

>   
> So how would you substitute migrate_disable() with a local_lock()? You
> can't. Again migrate_disable() is NOT a concurrency control and
> therefore it cannot be substituted by any concurrency control primitive.

When I was first writing my email, I was writing about a way to replace
migrate_disable with a construct similar to local locks without
actually mentioning local locks, but then rewrote it to state local
locks, trying to simplify what I was writing. I shouldn't have done
that, because it portrayed that I wanted to use local_lock()
unmodified. I was actually thinking of a new construct that was similar
but not exactly the same as local lock.

But this will just make things more complex and we can forget about it.

I'll wait to see what Peter produces.

-- Steve


Re: [patch RFC 00/15] mm/highmem: Provide a preemptible variant of kmap_atomic & friends

2020-09-24 Thread Daniel Bristot de Oliveira
On 9/24/20 10:27 AM, pet...@infradead.org wrote:
> So my current todo list is:
> 
>  - Change RT PULL
>  - Change DL PULL
>  - Add migrate_disable() tracer; exactly like preempt/irqoff, except
>measuring task-runtime instead of cpu-time.
>  - Add a mode that measures actual interference.
>  - Add a traceevent to detect preemption in migrate_disable().
> 
> 
> And then I suppose I should twist Daniel's arm to update his model to
> include these scenarios and numbers.

Challenge accepted :-)

-- Daniel



[powerpc:next-test 179/194] arch/powerpc/platforms/pseries/eeh_pseries.c:420:6: error: variable 'ret' is used uninitialized whenever 'if' condition is true

2020-09-24 Thread kernel test robot
tree:   https://git.kernel.org/pub/scm/linux/kernel/git/powerpc/linux.git 
next-test
head:   2e60265c1feb6848c77dbea22d38bbe262ec49db
commit: b90e1cfa05bddc2fa0e314a790df603479259637 [179/194] powerpc/pseries/eeh: 
Rework device EEH PE determination
config: powerpc64-randconfig-r035-20200923 (attached as .config)
compiler: clang version 12.0.0 (https://github.com/llvm/llvm-project 
d6ac649ccda289ecc2d2c0cb51892d57e8ec328c)
reproduce (this is a W=1 build):
wget 
https://raw.githubusercontent.com/intel/lkp-tests/master/sbin/make.cross -O 
~/bin/make.cross
chmod +x ~/bin/make.cross
# install powerpc64 cross compiling tool for clang build
# apt-get install binutils-powerpc64-linux-gnu
git checkout b90e1cfa05bddc2fa0e314a790df603479259637
# save the attached .config to linux build tree
COMPILER_INSTALL_PATH=$HOME/0day COMPILER=clang make.cross 
ARCH=powerpc64 

If you fix the issue, kindly add following tag as appropriate
Reported-by: kernel test robot 

All errors (new ones prefixed by >>):

   arch/powerpc/platforms/pseries/eeh_pseries.c:46:6: error: no previous 
prototype for function 'pseries_pcibios_bus_add_device' 
[-Werror,-Wmissing-prototypes]
   void pseries_pcibios_bus_add_device(struct pci_dev *pdev)
^
   arch/powerpc/platforms/pseries/eeh_pseries.c:46:1: note: declare 'static' if 
the function is not intended to be used outside of this translation unit
   void pseries_pcibios_bus_add_device(struct pci_dev *pdev)
   ^
   static 
>> arch/powerpc/platforms/pseries/eeh_pseries.c:420:6: error: variable 'ret' is 
>> used uninitialized whenever 'if' condition is true 
>> [-Werror,-Wsometimes-uninitialized]
   if (addr == 0) {
   ^
   arch/powerpc/platforms/pseries/eeh_pseries.c:449:67: note: uninitialized use 
occurs here
   eeh_edev_dbg(edev, "EEH is unsupported on device (code = %d)\n", 
ret);
^~~
   arch/powerpc/include/asm/eeh.h:162:75: note: expanded from macro 
'eeh_edev_dbg'
   #define eeh_edev_dbg(edev, fmt, ...) EEH_EDEV_PRINT(debug, (edev), fmt, 
##__VA_ARGS__)
 
^~~
   arch/powerpc/include/asm/eeh.h:161:52: note: expanded from macro 
'EEH_EDEV_PRINT'
   ((edev)->pe ? (edev)->pe_config_addr : 0x), ##__VA_ARGS__)
 ^~~
   include/linux/printk.h:423:35: note: expanded from macro 'pr_debug'
   printk(KERN_DEBUG pr_fmt(fmt), ##__VA_ARGS__)
^~~
   arch/powerpc/platforms/pseries/eeh_pseries.c:420:2: note: remove the 'if' if 
its condition is always false
   if (addr == 0) {
   ^~~~
   arch/powerpc/platforms/pseries/eeh_pseries.c:362:9: note: initialize the 
variable 'ret' to silence this warning
   int ret;
  ^
   = 0
   2 errors generated.

# 
https://git.kernel.org/pub/scm/linux/kernel/git/powerpc/linux.git/commit/?id=b90e1cfa05bddc2fa0e314a790df603479259637
git remote add powerpc 
https://git.kernel.org/pub/scm/linux/kernel/git/powerpc/linux.git
git fetch --no-tags powerpc next-test
git checkout b90e1cfa05bddc2fa0e314a790df603479259637
vim +420 arch/powerpc/platforms/pseries/eeh_pseries.c

   345  
   346  /**
   347   * pseries_eeh_init_edev - initialise the eeh_dev and eeh_pe for a 
pci_dn
   348   *
   349   * @pdn: PCI device node
   350   *
   351   * When we discover a new PCI device via the device-tree we create a
   352   * corresponding pci_dn and we allocate, but don't initialise, an 
eeh_dev.
   353   * This function takes care of the initialisation and inserts the 
eeh_dev
   354   * into the correct eeh_pe. If no eeh_pe exists we'll allocate one.
   355   */
   356  void pseries_eeh_init_edev(struct pci_dn *pdn)
   357  {
   358  struct eeh_pe pe, *parent;
   359  struct eeh_dev *edev;
   360  int addr;
   361  u32 pcie_flags;
   362  int ret;
   363  
   364  if (WARN_ON_ONCE(!eeh_has_flag(EEH_PROBE_MODE_DEVTREE)))
   365  return;
   366  
   367  /*
   368   * Find the eeh_dev for this pdn. The storage for the eeh_dev 
was
   369   * allocated at the same time as the pci_dn.
   370   *
   371   * XXX: We should probably re-visit that.
   372   */
   373  edev = pdn_to_eeh_dev(pdn);
   374  if (!edev)
   375  return;
   376  
   377  /*
   378   * If ->pe is set then we've already probed this device. We hit
   379   * this path when a pci_dev is removed and rescanned while 
recovering
   380   * a PE (i.e. for devices where the driver doesn't support error
   381   * recovery).
   382   */
   383  if (edev->pe)
   384  

[powerpc:next] BUILD SUCCESS ebbfeef0d8093a06ff39c60105b6650be3344cbe

2020-09-24 Thread kernel test robot
m68kstmark2_defconfig
sh   se7780_defconfig
sh  landisk_defconfig
mips loongson1b_defconfig
mips   xway_defconfig
sh  lboxre2_defconfig
arc nsimosci_hs_smp_defconfig
riscvallmodconfig
openrisc simple_smp_defconfig
powerpc tqm8541_defconfig
arm s3c6400_defconfig
mipsomega2p_defconfig
h8300   defconfig
mips   capcella_defconfig
mips  rb532_defconfig
mipsnlm_xlp_defconfig
powerpc mpc8315_rdb_defconfig
powerpc tqm5200_defconfig
arc haps_hs_defconfig
arm  simpad_defconfig
mips   ci20_defconfig
powerpc   mpc834x_itxgp_defconfig
openriscdefconfig
arm   sunxi_defconfig
arm   imx_v4_v5_defconfig
arm am200epdkit_defconfig
powerpc rainier_defconfig
powerpcicon_defconfig
armmulti_v7_defconfig
arcnsimosci_defconfig
m68kmvme16x_defconfig
sparc64  alldefconfig
arm   versatile_defconfig
powerpc mpc83xx_defconfig
arm  moxart_defconfig
powerpc linkstation_defconfig
mips decstation_defconfig
shedosk7760_defconfig
powerpccell_defconfig
s390  debug_defconfig
c6x dsk6455_defconfig
sh   se7721_defconfig
h8300   h8s-sim_defconfig
sh   se7724_defconfig
mips  decstation_64_defconfig
m68k  atari_defconfig
sh  sdk7786_defconfig
arm palmz72_defconfig
powerpc   eiger_defconfig
sh  sdk7780_defconfig
powerpcgamecube_defconfig
h8300alldefconfig
sh  defconfig
arm lpc18xx_defconfig
mips   ip27_defconfig
sh   sh7724_generic_defconfig
sh  polaris_defconfig
nds32 allnoconfig
shedosk7705_defconfig
arm   netwinder_defconfig
arm   spitz_defconfig
powerpcmpc7448_hpc2_defconfig
powerpc  pmac32_defconfig
powerpc mpc5200_defconfig
s390defconfig
mips  ath25_defconfig
mipsjmr3927_defconfig
mips   rbtx49xx_defconfig
mips cu1830-neo_defconfig
arc defconfig
mips  maltasmvp_defconfig
ia64 allmodconfig
m68k allmodconfig
m68kdefconfig
nios2   defconfig
arc  allyesconfig
nds32   defconfig
nios2allyesconfig
alphaallyesconfig
xtensa   allyesconfig
h8300allyesconfig
sh   allmodconfig
parisc  defconfig
parisc   allyesconfig
mips allyesconfig
powerpc  allmodconfig
powerpc   allnoconfig
i386 randconfig-a002-20200924
i386 randconfig-a006-20200924
i386 randconfig-a003-20200924
i386 randconfig-a004-20200924
i386 randconfig-a005-20200924
i386 randconfig-a001-20200924
i386 randconfig-a002-20200921
i386 randconfig-a006-20200921
i386 randconfig-a003-20200921
i386 randconfig-a004-20200921
i386 randconfig-a005-20200921
i386 randconfig-a001-20200921
i386 randconfig-a002-20200923
i386 randconfig-a006-20200923
i386 randconfig-a003-20200923
i386 randconfig-a004-20200923
i386 randconfig-a005-20200923
i386 randconfig-a001-20200923
i386 randconfig-a004-20200917
i386 randconfig-a006-20200917
i386 randconfig-a003-20200917
i386 randconfig

[powerpc:next-test] BUILD REGRESSION 2e60265c1feb6848c77dbea22d38bbe262ec49db

2020-09-24 Thread kernel test robot
allmodconfig
powerpc  allyesconfig
powerpc  allmodconfig
powerpc   allnoconfig
i386 randconfig-a002-20200924
i386 randconfig-a006-20200924
i386 randconfig-a003-20200924
i386 randconfig-a004-20200924
i386 randconfig-a005-20200924
i386 randconfig-a001-20200924
i386 randconfig-a002-20200923
i386 randconfig-a006-20200923
i386 randconfig-a003-20200923
i386 randconfig-a004-20200923
i386 randconfig-a005-20200923
i386 randconfig-a001-20200923
x86_64   randconfig-a011-20200923
x86_64   randconfig-a013-20200923
x86_64   randconfig-a014-20200923
x86_64   randconfig-a015-20200923
x86_64   randconfig-a012-20200923
x86_64   randconfig-a016-20200923
i386 randconfig-a012-20200923
i386 randconfig-a014-20200923
i386 randconfig-a013-20200923
i386 randconfig-a011-20200923
i386 randconfig-a016-20200923
i386 randconfig-a015-20200923
riscvnommu_k210_defconfig
riscvallyesconfig
riscvnommu_virt_defconfig
riscv allnoconfig
riscv   defconfig
riscv  rv32_defconfig
riscvallmodconfig
x86_64   rhel
x86_64   allyesconfig
x86_64rhel-7.6-kselftests
x86_64  defconfig
x86_64   rhel-8.3
x86_64  kexec

clang tested configs:
x86_64   randconfig-a005-20200923
x86_64   randconfig-a003-20200923
x86_64   randconfig-a004-20200923
x86_64   randconfig-a002-20200923
x86_64   randconfig-a006-20200923
x86_64   randconfig-a001-20200923

---
0-DAY CI Kernel Test Service, Intel Corporation
https://lists.01.org/hyperkitty/list/kbuild-...@lists.01.org


[powerpc:merge] BUILD SUCCESS 548ccca2a8864b7498ad8cc420fa01aecd4d4114

2020-09-24 Thread kernel test robot
tree/branch: https://git.kernel.org/pub/scm/linux/kernel/git/powerpc/linux.git  
merge
branch HEAD: 548ccca2a8864b7498ad8cc420fa01aecd4d4114  Automatic merge of 
'master' into merge (2020-09-24 22:12)

elapsed time: 807m

configs tested: 137
configs skipped: 3

The following configs have been built successfully.
More configs may be tested in the coming days.

gcc tested configs:
arm defconfig
arm64allyesconfig
arm64   defconfig
arm  allyesconfig
arm  allmodconfig
sh  r7780mp_defconfig
mips pnx8335_stb225_defconfig
sh  r7785rp_defconfig
armxcep_defconfig
arm  badge4_defconfig
mips  maltasmvp_eva_defconfig
m68k apollo_defconfig
mips  bmips_stb_defconfig
arc nsimosci_hs_defconfig
shecovec24-romimage_defconfig
arm   u8500_defconfig
powerpcwarp_defconfig
arm assabet_defconfig
sh  rsk7201_defconfig
arm davinci_all_defconfig
m68k   m5275evb_defconfig
powerpc tqm8548_defconfig
arm nhk8815_defconfig
mips rt305x_defconfig
arm   mainstone_defconfig
powerpc64   defconfig
xtensa  defconfig
m68kstmark2_defconfig
armqcom_defconfig
powerpc   ebony_defconfig
xtensasmp_lx200_defconfig
m68km5272c3_defconfig
openrisc alldefconfig
powerpc mpc8313_rdb_defconfig
powerpc pseries_defconfig
powerpc akebono_defconfig
powerpcadder875_defconfig
arc  axs103_smp_defconfig
m68k  amiga_defconfig
powerpcmpc7448_hpc2_defconfig
powerpc tqm8540_defconfig
m68k  hp300_defconfig
sparc   sparc32_defconfig
powerpc mpc837x_mds_defconfig
powerpc  pcm030_defconfig
parisc   alldefconfig
mips  malta_defconfig
powerpc sbc8548_defconfig
arc nps_defconfig
h8300h8300h-sim_defconfig
arm pxa_defconfig
powerpc powernv_defconfig
openriscdefconfig
armrealview_defconfig
powerpc   eiger_defconfig
armspear6xx_defconfig
arcvdk_hs38_smp_defconfig
powerpc  pmac32_defconfig
m68k  multi_defconfig
arm axm55xx_defconfig
sh  polaris_defconfig
sh  lboxre2_defconfig
ia64 allmodconfig
ia64defconfig
ia64 allyesconfig
m68k allmodconfig
m68kdefconfig
m68k allyesconfig
nios2   defconfig
arc  allyesconfig
nds32 allnoconfig
c6x  allyesconfig
nds32   defconfig
nios2allyesconfig
cskydefconfig
alpha   defconfig
alphaallyesconfig
xtensa   allyesconfig
h8300allyesconfig
arc defconfig
sh   allmodconfig
parisc  defconfig
s390 allyesconfig
parisc   allyesconfig
s390defconfig
i386 allyesconfig
sparcallyesconfig
sparc   defconfig
i386defconfig
mips allyesconfig
mips allmodconfig
powerpc  allyesconfig
powerpc  allmodconfig
powerpc   allnoconfig
i386 randconfig-a002-20200924
i386 randconfig-a006-20200924
i386 randconfig-a003-20200924
i386 randconfig-a004-20200924
i386 randconfig-a005-20200924
i386 randconfig-a001-202

[PATCH 4/9] iov_iter: transparently handle compat iovecs in import_iovec

2020-09-24 Thread Christoph Hellwig
Use in compat_syscall to import either native or the compat iovecs, and
remove the now superflous compat_import_iovec.

This removes the need for special compat logic in most callers, and
the remaining ones can still be simplified by using __import_iovec
with a bool compat parameter.

Signed-off-by: Christoph Hellwig 
---
 block/scsi_ioctl.c | 12 ++--
 drivers/scsi/sg.c  |  9 +
 fs/aio.c   |  8 ++--
 fs/io_uring.c  | 20 
 fs/read_write.c|  6 --
 fs/splice.c|  2 +-
 include/linux/uio.h|  8 
 lib/iov_iter.c | 14 ++
 mm/process_vm_access.c |  3 ++-
 net/compat.c   |  4 ++--
 security/keys/compat.c |  5 ++---
 11 files changed, 26 insertions(+), 65 deletions(-)

diff --git a/block/scsi_ioctl.c b/block/scsi_ioctl.c
index ef722f04f88a93..e08df86866ee5d 100644
--- a/block/scsi_ioctl.c
+++ b/block/scsi_ioctl.c
@@ -333,16 +333,8 @@ static int sg_io(struct request_queue *q, struct gendisk 
*bd_disk,
struct iov_iter i;
struct iovec *iov = NULL;
 
-#ifdef CONFIG_COMPAT
-   if (in_compat_syscall())
-   ret = compat_import_iovec(rq_data_dir(rq),
-  hdr->dxferp, hdr->iovec_count,
-  0, &iov, &i);
-   else
-#endif
-   ret = import_iovec(rq_data_dir(rq),
-  hdr->dxferp, hdr->iovec_count,
-  0, &iov, &i);
+   ret = import_iovec(rq_data_dir(rq), hdr->dxferp,
+  hdr->iovec_count, 0, &iov, &i);
if (ret < 0)
goto out_free_cdb;
 
diff --git a/drivers/scsi/sg.c b/drivers/scsi/sg.c
index 20472aaaf630a4..bfa8d77322d732 100644
--- a/drivers/scsi/sg.c
+++ b/drivers/scsi/sg.c
@@ -1820,14 +1820,7 @@ sg_start_req(Sg_request *srp, unsigned char *cmd)
struct iovec *iov = NULL;
struct iov_iter i;
 
-#ifdef CONFIG_COMPAT
-   if (in_compat_syscall())
-   res = compat_import_iovec(rw, hp->dxferp, iov_count,
- 0, &iov, &i);
-   else
-#endif
-   res = import_iovec(rw, hp->dxferp, iov_count,
-  0, &iov, &i);
+   res = import_iovec(rw, hp->dxferp, iov_count, 0, &iov, &i);
if (res < 0)
return res;
 
diff --git a/fs/aio.c b/fs/aio.c
index d5ec303855669d..c45c20d875388c 100644
--- a/fs/aio.c
+++ b/fs/aio.c
@@ -1489,12 +1489,8 @@ static ssize_t aio_setup_rw(int rw, const struct iocb 
*iocb,
*iovec = NULL;
return ret;
}
-#ifdef CONFIG_COMPAT
-   if (compat)
-   return compat_import_iovec(rw, buf, len, UIO_FASTIOV, iovec,
-   iter);
-#endif
-   return import_iovec(rw, buf, len, UIO_FASTIOV, iovec, iter);
+
+   return __import_iovec(rw, buf, len, UIO_FASTIOV, iovec, iter, compat);
 }
 
 static inline void aio_rw_done(struct kiocb *req, ssize_t ret)
diff --git a/fs/io_uring.c b/fs/io_uring.c
index 8b426aa29668cb..8c27dc28da182a 100644
--- a/fs/io_uring.c
+++ b/fs/io_uring.c
@@ -2852,13 +2852,8 @@ static ssize_t __io_import_iovec(int rw, struct io_kiocb 
*req,
return ret;
}
 
-#ifdef CONFIG_COMPAT
-   if (req->ctx->compat)
-   return compat_import_iovec(rw, buf, sqe_len, UIO_FASTIOV,
-   iovec, iter);
-#endif
-
-   return import_iovec(rw, buf, sqe_len, UIO_FASTIOV, iovec, iter);
+   return __import_iovec(rw, buf, sqe_len, UIO_FASTIOV, iovec, iter,
+ req->ctx->compat);
 }
 
 static ssize_t io_import_iovec(int rw, struct io_kiocb *req,
@@ -4200,8 +4195,9 @@ static int __io_recvmsg_copy_hdr(struct io_kiocb *req,
sr->len);
iomsg->iov = NULL;
} else {
-   ret = import_iovec(READ, uiov, iov_len, UIO_FASTIOV,
-   &iomsg->iov, &iomsg->msg.msg_iter);
+   ret = __import_iovec(READ, uiov, iov_len, UIO_FASTIOV,
+&iomsg->iov, &iomsg->msg.msg_iter,
+false);
if (ret > 0)
ret = 0;
}
@@ -4241,9 +4237,9 @@ static int __io_compat_recvmsg_copy_hdr(struct io_kiocb 
*req,
sr->len = iomsg->iov[0].iov_len;
iomsg->iov = NULL;
} else {
-   ret = compat_import_iovec(READ, uiov, len, UIO_FASTIOV,
-   &iomsg->iov,
-   &iomsg->msg.msg_iter);
+   ret = __import_iovec(READ, (struct iovec __user *)uiov, len,
+   

[PATCH 8/9] mm: remove compat_process_vm_{readv,writev}

2020-09-24 Thread Christoph Hellwig
Now that import_iovec handles compat iovecs, the native syscalls
can be used for the compat case as well.

Signed-off-by: Christoph Hellwig 
---
 arch/arm64/include/asm/unistd32.h |  4 +-
 arch/mips/kernel/syscalls/syscall_n32.tbl |  4 +-
 arch/mips/kernel/syscalls/syscall_o32.tbl |  4 +-
 arch/parisc/kernel/syscalls/syscall.tbl   |  4 +-
 arch/powerpc/kernel/syscalls/syscall.tbl  |  4 +-
 arch/s390/kernel/syscalls/syscall.tbl |  4 +-
 arch/sparc/kernel/syscalls/syscall.tbl|  4 +-
 arch/x86/entry/syscall_x32.c  |  2 +
 arch/x86/entry/syscalls/syscall_32.tbl|  4 +-
 arch/x86/entry/syscalls/syscall_64.tbl|  4 +-
 include/linux/compat.h|  8 ---
 include/uapi/asm-generic/unistd.h |  6 +-
 mm/process_vm_access.c| 69 ---
 tools/include/uapi/asm-generic/unistd.h   |  6 +-
 .../arch/powerpc/entry/syscalls/syscall.tbl   |  4 +-
 .../perf/arch/s390/entry/syscalls/syscall.tbl |  4 +-
 .../arch/x86/entry/syscalls/syscall_64.tbl|  4 +-
 17 files changed, 30 insertions(+), 109 deletions(-)

diff --git a/arch/arm64/include/asm/unistd32.h 
b/arch/arm64/include/asm/unistd32.h
index 11dfae3a8563bd..0c280a05f699bf 100644
--- a/arch/arm64/include/asm/unistd32.h
+++ b/arch/arm64/include/asm/unistd32.h
@@ -763,9 +763,9 @@ __SYSCALL(__NR_sendmmsg, compat_sys_sendmmsg)
 #define __NR_setns 375
 __SYSCALL(__NR_setns, sys_setns)
 #define __NR_process_vm_readv 376
-__SYSCALL(__NR_process_vm_readv, compat_sys_process_vm_readv)
+__SYSCALL(__NR_process_vm_readv, sys_process_vm_readv)
 #define __NR_process_vm_writev 377
-__SYSCALL(__NR_process_vm_writev, compat_sys_process_vm_writev)
+__SYSCALL(__NR_process_vm_writev, sys_process_vm_writev)
 #define __NR_kcmp 378
 __SYSCALL(__NR_kcmp, sys_kcmp)
 #define __NR_finit_module 379
diff --git a/arch/mips/kernel/syscalls/syscall_n32.tbl 
b/arch/mips/kernel/syscalls/syscall_n32.tbl
index 5a39d4de0ac85b..0bc2e0fcf1ee56 100644
--- a/arch/mips/kernel/syscalls/syscall_n32.tbl
+++ b/arch/mips/kernel/syscalls/syscall_n32.tbl
@@ -317,8 +317,8 @@
 306n32 syncfs  sys_syncfs
 307n32 sendmmsgcompat_sys_sendmmsg
 308n32 setns   sys_setns
-309n32 process_vm_readvcompat_sys_process_vm_readv
-310n32 process_vm_writev   compat_sys_process_vm_writev
+309n32 process_vm_readvsys_process_vm_readv
+310n32 process_vm_writev   sys_process_vm_writev
 311n32 kcmpsys_kcmp
 312n32 finit_modulesys_finit_module
 313n32 sched_setattr   sys_sched_setattr
diff --git a/arch/mips/kernel/syscalls/syscall_o32.tbl 
b/arch/mips/kernel/syscalls/syscall_o32.tbl
index 136efc6b8c5444..b408c13b934296 100644
--- a/arch/mips/kernel/syscalls/syscall_o32.tbl
+++ b/arch/mips/kernel/syscalls/syscall_o32.tbl
@@ -356,8 +356,8 @@
 342o32 syncfs  sys_syncfs
 343o32 sendmmsgsys_sendmmsg
compat_sys_sendmmsg
 344o32 setns   sys_setns
-345o32 process_vm_readvsys_process_vm_readv
compat_sys_process_vm_readv
-346o32 process_vm_writev   sys_process_vm_writev   
compat_sys_process_vm_writev
+345o32 process_vm_readvsys_process_vm_readv
+346o32 process_vm_writev   sys_process_vm_writev
 347o32 kcmpsys_kcmp
 348o32 finit_modulesys_finit_module
 349o32 sched_setattr   sys_sched_setattr
diff --git a/arch/parisc/kernel/syscalls/syscall.tbl 
b/arch/parisc/kernel/syscalls/syscall.tbl
index a9e184192caedd..2015a5124b78ad 100644
--- a/arch/parisc/kernel/syscalls/syscall.tbl
+++ b/arch/parisc/kernel/syscalls/syscall.tbl
@@ -372,8 +372,8 @@
 327common  syncfs  sys_syncfs
 328common  setns   sys_setns
 329common  sendmmsgsys_sendmmsg
compat_sys_sendmmsg
-330common  process_vm_readvsys_process_vm_readv
compat_sys_process_vm_readv
-331common  process_vm_writev   sys_process_vm_writev   
compat_sys_process_vm_writev
+330common  process_vm_readvsys_process_vm_readv
+331common  process_vm_writev   sys_process_vm_writev
 332common  kcmpsys_kcmp
 333common  finit_modulesys_finit_module
 334common  sched_setattr   sys_sched_setattr
diff --git a/arch/powerpc/kernel/syscalls/syscall.tbl 
b/arch/powerpc/kernel/syscalls/syscall.tbl
index 0d4985919ca34d..66a472aa635d3f 100644
--- a/arch/powerpc/kernel/syscalls/syscall.tbl
+++ b/arch/powerpc/kernel/syscalls/syscall

[PATCH 6/9] fs: remove the compat readv/writev syscalls

2020-09-24 Thread Christoph Hellwig
Now that import_iovec handles compat iovecs, the native readv and writev
syscalls can be used for the compat case as well.

Signed-off-by: Christoph Hellwig 
---
 arch/arm64/include/asm/unistd32.h  |  4 ++--
 arch/mips/kernel/syscalls/syscall_n32.tbl  |  4 ++--
 arch/mips/kernel/syscalls/syscall_o32.tbl  |  4 ++--
 arch/parisc/kernel/syscalls/syscall.tbl|  4 ++--
 arch/powerpc/kernel/syscalls/syscall.tbl   |  4 ++--
 arch/s390/kernel/syscalls/syscall.tbl  |  4 ++--
 arch/sparc/kernel/syscalls/syscall.tbl |  4 ++--
 arch/x86/entry/syscall_x32.c   |  2 ++
 arch/x86/entry/syscalls/syscall_32.tbl |  4 ++--
 arch/x86/entry/syscalls/syscall_64.tbl |  4 ++--
 fs/read_write.c| 14 --
 include/linux/compat.h |  4 
 include/uapi/asm-generic/unistd.h  |  4 ++--
 tools/include/uapi/asm-generic/unistd.h|  4 ++--
 tools/perf/arch/powerpc/entry/syscalls/syscall.tbl |  4 ++--
 tools/perf/arch/s390/entry/syscalls/syscall.tbl|  4 ++--
 tools/perf/arch/x86/entry/syscalls/syscall_64.tbl  |  4 ++--
 17 files changed, 30 insertions(+), 46 deletions(-)

diff --git a/arch/arm64/include/asm/unistd32.h 
b/arch/arm64/include/asm/unistd32.h
index 734860ac7cf9d5..4a236493dca5b9 100644
--- a/arch/arm64/include/asm/unistd32.h
+++ b/arch/arm64/include/asm/unistd32.h
@@ -301,9 +301,9 @@ __SYSCALL(__NR_flock, sys_flock)
 #define __NR_msync 144
 __SYSCALL(__NR_msync, sys_msync)
 #define __NR_readv 145
-__SYSCALL(__NR_readv, compat_sys_readv)
+__SYSCALL(__NR_readv, sys_readv)
 #define __NR_writev 146
-__SYSCALL(__NR_writev, compat_sys_writev)
+__SYSCALL(__NR_writev, sys_writev)
 #define __NR_getsid 147
 __SYSCALL(__NR_getsid, sys_getsid)
 #define __NR_fdatasync 148
diff --git a/arch/mips/kernel/syscalls/syscall_n32.tbl 
b/arch/mips/kernel/syscalls/syscall_n32.tbl
index f9df9edb67a407..c99a92646f8ee9 100644
--- a/arch/mips/kernel/syscalls/syscall_n32.tbl
+++ b/arch/mips/kernel/syscalls/syscall_n32.tbl
@@ -25,8 +25,8 @@
 15 n32 ioctl   compat_sys_ioctl
 16 n32 pread64 sys_pread64
 17 n32 pwrite64sys_pwrite64
-18 n32 readv   compat_sys_readv
-19 n32 writev  compat_sys_writev
+18 n32 readv   sys_readv
+19 n32 writev  sys_writev
 20 n32 access  sys_access
 21 n32 pipesysm_pipe
 22 n32 _newselect  compat_sys_select
diff --git a/arch/mips/kernel/syscalls/syscall_o32.tbl 
b/arch/mips/kernel/syscalls/syscall_o32.tbl
index 195b43cf27c848..075064d10661bf 100644
--- a/arch/mips/kernel/syscalls/syscall_o32.tbl
+++ b/arch/mips/kernel/syscalls/syscall_o32.tbl
@@ -156,8 +156,8 @@
 142o32 _newselect  sys_select  
compat_sys_select
 143o32 flock   sys_flock
 144o32 msync   sys_msync
-145o32 readv   sys_readv   
compat_sys_readv
-146o32 writev  sys_writev  
compat_sys_writev
+145o32 readv   sys_readv
+146o32 writev  sys_writev
 147o32 cacheflush  sys_cacheflush
 148o32 cachectlsys_cachectl
 149o32 sysmips __sys_sysmips
diff --git a/arch/parisc/kernel/syscalls/syscall.tbl 
b/arch/parisc/kernel/syscalls/syscall.tbl
index def64d221cd4fb..192abde0001d9d 100644
--- a/arch/parisc/kernel/syscalls/syscall.tbl
+++ b/arch/parisc/kernel/syscalls/syscall.tbl
@@ -159,8 +159,8 @@
 142common  _newselect  sys_select  
compat_sys_select
 143common  flock   sys_flock
 144common  msync   sys_msync
-145common  readv   sys_readv   
compat_sys_readv
-146common  writev  sys_writev  
compat_sys_writev
+145common  readv   sys_readv
+146common  writev  sys_writev
 147common  getsid  sys_getsid
 148common  fdatasync   sys_fdatasync
 149common  _sysctl sys_ni_syscall
diff --git a/arch/powerpc/kernel/syscalls/syscall.tbl 
b/arch/powerpc/kernel/syscalls/syscall.tbl
index c2d737ff2e7bec..6f1e2ecf0edad9 100644
--- a/arch/powerpc/kernel/syscalls/syscall.tbl
+++ b/arch/powerpc/kernel/syscalls/syscall.tbl
@@ -193,8 +193,8 @@
 142common  _newselect  sys_select  
compat_sys_select
 143common  floc

[PATCH 5/9] fs: remove various compat readv/writev helpers

2020-09-24 Thread Christoph Hellwig
Now that import_iovec handles compat iovecs as well, all the duplicated
code in the compat readv/writev helpers is not needed.  Remove them
and switch the compat syscall handlers to use the native helpers.

Signed-off-by: Christoph Hellwig 
---
 fs/read_write.c| 179 +++--
 include/linux/compat.h |  20 ++---
 2 files changed, 40 insertions(+), 159 deletions(-)

diff --git a/fs/read_write.c b/fs/read_write.c
index 0a68037580b455..eab427b7cc0a3f 100644
--- a/fs/read_write.c
+++ b/fs/read_write.c
@@ -1068,226 +1068,107 @@ SYSCALL_DEFINE6(pwritev2, unsigned long, fd, const 
struct iovec __user *, vec,
return do_pwritev(fd, vec, vlen, pos, flags);
 }
 
+/*
+ * Various compat syscalls.  Note that they all pretend to take a native
+ * iovec - import_iovec will properly treat those as compat_iovecs based on
+ * in_compat_syscall().
+ */
 #ifdef CONFIG_COMPAT
-static size_t compat_readv(struct file *file,
-  const struct compat_iovec __user *vec,
-  unsigned long vlen, loff_t *pos, rwf_t flags)
-{
-   struct iovec iovstack[UIO_FASTIOV];
-   struct iovec *iov = iovstack;
-   struct iov_iter iter;
-   ssize_t ret;
-
-   ret = import_iovec(READ, (const struct iovec __user *)vec, vlen,
-  UIO_FASTIOV, &iov, &iter);
-   if (ret >= 0) {
-   ret = do_iter_read(file, &iter, pos, flags);
-   kfree(iov);
-   }
-   if (ret > 0)
-   add_rchar(current, ret);
-   inc_syscr(current);
-   return ret;
-}
-
-static size_t do_compat_readv(compat_ulong_t fd,
-const struct compat_iovec __user *vec,
-compat_ulong_t vlen, rwf_t flags)
-{
-   struct fd f = fdget_pos(fd);
-   ssize_t ret;
-   loff_t pos;
-
-   if (!f.file)
-   return -EBADF;
-   pos = f.file->f_pos;
-   ret = compat_readv(f.file, vec, vlen, &pos, flags);
-   if (ret >= 0)
-   f.file->f_pos = pos;
-   fdput_pos(f);
-   return ret;
-
-}
-
 COMPAT_SYSCALL_DEFINE3(readv, compat_ulong_t, fd,
-   const struct compat_iovec __user *,vec,
+   const struct iovec __user *, vec,
compat_ulong_t, vlen)
 {
-   return do_compat_readv(fd, vec, vlen, 0);
-}
-
-static long do_compat_preadv64(unsigned long fd,
- const struct compat_iovec __user *vec,
- unsigned long vlen, loff_t pos, rwf_t flags)
-{
-   struct fd f;
-   ssize_t ret;
-
-   if (pos < 0)
-   return -EINVAL;
-   f = fdget(fd);
-   if (!f.file)
-   return -EBADF;
-   ret = -ESPIPE;
-   if (f.file->f_mode & FMODE_PREAD)
-   ret = compat_readv(f.file, vec, vlen, &pos, flags);
-   fdput(f);
-   return ret;
+   return do_readv(fd, vec, vlen, 0);
 }
 
 #ifdef __ARCH_WANT_COMPAT_SYS_PREADV64
 COMPAT_SYSCALL_DEFINE4(preadv64, unsigned long, fd,
-   const struct compat_iovec __user *,vec,
+   const struct iovec __user *, vec,
unsigned long, vlen, loff_t, pos)
 {
-   return do_compat_preadv64(fd, vec, vlen, pos, 0);
+   return do_preadv(fd, vec, vlen, pos, 0);
 }
 #endif
 
 COMPAT_SYSCALL_DEFINE5(preadv, compat_ulong_t, fd,
-   const struct compat_iovec __user *,vec,
+   const struct iovec __user *, vec,
compat_ulong_t, vlen, u32, pos_low, u32, pos_high)
 {
loff_t pos = ((loff_t)pos_high << 32) | pos_low;
 
-   return do_compat_preadv64(fd, vec, vlen, pos, 0);
+   return do_preadv(fd, vec, vlen, pos, 0);
 }
 
 #ifdef __ARCH_WANT_COMPAT_SYS_PREADV64V2
 COMPAT_SYSCALL_DEFINE5(preadv64v2, unsigned long, fd,
-   const struct compat_iovec __user *,vec,
+   const struct iovec __user *, vec,
unsigned long, vlen, loff_t, pos, rwf_t, flags)
 {
if (pos == -1)
-   return do_compat_readv(fd, vec, vlen, flags);
-
-   return do_compat_preadv64(fd, vec, vlen, pos, flags);
+   return do_readv(fd, vec, vlen, flags);
+   return do_preadv(fd, vec, vlen, pos, flags);
 }
 #endif
 
 COMPAT_SYSCALL_DEFINE6(preadv2, compat_ulong_t, fd,
-   const struct compat_iovec __user *,vec,
+   const struct iovec __user *, vec,
compat_ulong_t, vlen, u32, pos_low, u32, pos_high,
rwf_t, flags)
 {
loff_t pos = ((loff_t)pos_high << 32) | pos_low;
 
if (pos == -1)
-   return do_compat_readv(fd, vec, vlen, flags);
-
-   return do_compat_preadv64(fd, vec, vlen, pos, flags);
-}
-
-static size_t compat_writev(struct file *file,
-   const struct compat_iovec __user *vec,
-   unsigned long vlen, loff_t *pos, rwf_t flags)
-{
-   struct iovec iovstack[UIO_FASTIOV];
-  

[PATCH 7/9] fs: remove compat_sys_vmsplice

2020-09-24 Thread Christoph Hellwig
Now that import_iovec handles compat iovecs, the native vmsplice syscall
can be used for the compat case as well.

Signed-off-by: Christoph Hellwig 
---
 arch/arm64/include/asm/unistd32.h |  2 +-
 arch/mips/kernel/syscalls/syscall_n32.tbl |  2 +-
 arch/mips/kernel/syscalls/syscall_o32.tbl |  2 +-
 arch/parisc/kernel/syscalls/syscall.tbl   |  2 +-
 arch/powerpc/kernel/syscalls/syscall.tbl  |  2 +-
 arch/s390/kernel/syscalls/syscall.tbl |  2 +-
 arch/sparc/kernel/syscalls/syscall.tbl|  2 +-
 arch/x86/entry/syscall_x32.c  |  1 +
 arch/x86/entry/syscalls/syscall_32.tbl|  2 +-
 arch/x86/entry/syscalls/syscall_64.tbl|  2 +-
 fs/splice.c   | 57 +--
 include/linux/compat.h|  4 --
 include/uapi/asm-generic/unistd.h |  2 +-
 tools/include/uapi/asm-generic/unistd.h   |  2 +-
 .../arch/powerpc/entry/syscalls/syscall.tbl   |  2 +-
 .../perf/arch/s390/entry/syscalls/syscall.tbl |  2 +-
 .../arch/x86/entry/syscalls/syscall_64.tbl|  2 +-
 17 files changed, 28 insertions(+), 62 deletions(-)

diff --git a/arch/arm64/include/asm/unistd32.h 
b/arch/arm64/include/asm/unistd32.h
index 4a236493dca5b9..11dfae3a8563bd 100644
--- a/arch/arm64/include/asm/unistd32.h
+++ b/arch/arm64/include/asm/unistd32.h
@@ -697,7 +697,7 @@ __SYSCALL(__NR_sync_file_range2, 
compat_sys_aarch32_sync_file_range2)
 #define __NR_tee 342
 __SYSCALL(__NR_tee, sys_tee)
 #define __NR_vmsplice 343
-__SYSCALL(__NR_vmsplice, compat_sys_vmsplice)
+__SYSCALL(__NR_vmsplice, sys_vmsplice)
 #define __NR_move_pages 344
 __SYSCALL(__NR_move_pages, compat_sys_move_pages)
 #define __NR_getcpu 345
diff --git a/arch/mips/kernel/syscalls/syscall_n32.tbl 
b/arch/mips/kernel/syscalls/syscall_n32.tbl
index c99a92646f8ee9..5a39d4de0ac85b 100644
--- a/arch/mips/kernel/syscalls/syscall_n32.tbl
+++ b/arch/mips/kernel/syscalls/syscall_n32.tbl
@@ -278,7 +278,7 @@
 267n32 splice  sys_splice
 268n32 sync_file_range sys_sync_file_range
 269n32 tee sys_tee
-270n32 vmsplicecompat_sys_vmsplice
+270n32 vmsplicesys_vmsplice
 271n32 move_pages  compat_sys_move_pages
 272n32 set_robust_list compat_sys_set_robust_list
 273n32 get_robust_list compat_sys_get_robust_list
diff --git a/arch/mips/kernel/syscalls/syscall_o32.tbl 
b/arch/mips/kernel/syscalls/syscall_o32.tbl
index 075064d10661bf..136efc6b8c5444 100644
--- a/arch/mips/kernel/syscalls/syscall_o32.tbl
+++ b/arch/mips/kernel/syscalls/syscall_o32.tbl
@@ -318,7 +318,7 @@
 304o32 splice  sys_splice
 305o32 sync_file_range sys_sync_file_range 
sys32_sync_file_range
 306o32 tee sys_tee
-307o32 vmsplicesys_vmsplice
compat_sys_vmsplice
+307o32 vmsplicesys_vmsplice
 308o32 move_pages  sys_move_pages  
compat_sys_move_pages
 309o32 set_robust_list sys_set_robust_list 
compat_sys_set_robust_list
 310o32 get_robust_list sys_get_robust_list 
compat_sys_get_robust_list
diff --git a/arch/parisc/kernel/syscalls/syscall.tbl 
b/arch/parisc/kernel/syscalls/syscall.tbl
index 192abde0001d9d..a9e184192caedd 100644
--- a/arch/parisc/kernel/syscalls/syscall.tbl
+++ b/arch/parisc/kernel/syscalls/syscall.tbl
@@ -330,7 +330,7 @@
 29232  sync_file_range parisc_sync_file_range
 29264  sync_file_range sys_sync_file_range
 293common  tee sys_tee
-294common  vmsplicesys_vmsplice
compat_sys_vmsplice
+294common  vmsplicesys_vmsplice
 295common  move_pages  sys_move_pages  
compat_sys_move_pages
 296common  getcpu  sys_getcpu
 297common  epoll_pwait sys_epoll_pwait 
compat_sys_epoll_pwait
diff --git a/arch/powerpc/kernel/syscalls/syscall.tbl 
b/arch/powerpc/kernel/syscalls/syscall.tbl
index 6f1e2ecf0edad9..0d4985919ca34d 100644
--- a/arch/powerpc/kernel/syscalls/syscall.tbl
+++ b/arch/powerpc/kernel/syscalls/syscall.tbl
@@ -369,7 +369,7 @@
 282common  unshare sys_unshare
 283common  splice  sys_splice
 284common  tee sys_tee
-285common  vmsplicesys_vmsplice
compat_sys_vmsplice
+285common  vmsplicesys_vmsplice
 286common  openat  sys_openat  
compat_sys_openat
 287common  m

let import_iovec deal with compat_iovecs as well v4

2020-09-24 Thread Christoph Hellwig
Hi Al,

this series changes import_iovec to transparently deal with compat iovec
structures, and then cleanups up a lot of code dupliation.

Changes since v3:
 - fix up changed prototypes in compat.h as well

Changes since v2:
 - revert the switch of the access process vm sysclls to iov_iter
 - refactor the import_iovec internals differently
 - switch aio to use __import_iovec

Changes since v1:
 - improve a commit message
 - drop a pointless unlikely
 - drop the PF_FORCE_COMPAT flag
 - add a few more cleanups (including two from David Laight)

Diffstat:
 arch/arm64/include/asm/unistd32.h  |   10 
 arch/mips/kernel/syscalls/syscall_n32.tbl  |   10 
 arch/mips/kernel/syscalls/syscall_o32.tbl  |   10 
 arch/parisc/kernel/syscalls/syscall.tbl|   10 
 arch/powerpc/kernel/syscalls/syscall.tbl   |   10 
 arch/s390/kernel/syscalls/syscall.tbl  |   10 
 arch/sparc/kernel/syscalls/syscall.tbl |   10 
 arch/x86/entry/syscall_x32.c   |5 
 arch/x86/entry/syscalls/syscall_32.tbl |   10 
 arch/x86/entry/syscalls/syscall_64.tbl |   10 
 block/scsi_ioctl.c |   12 
 drivers/scsi/sg.c  |9 
 fs/aio.c   |   38 --
 fs/io_uring.c  |   20 -
 fs/read_write.c|  362 +
 fs/splice.c|   57 ---
 include/linux/compat.h |   24 -
 include/linux/fs.h |   11 
 include/linux/uio.h|   10 
 include/uapi/asm-generic/unistd.h  |   12 
 lib/iov_iter.c |  161 +++--
 mm/process_vm_access.c |   85 
 net/compat.c   |4 
 security/keys/compat.c |   37 --
 security/keys/internal.h   |5 
 security/keys/keyctl.c |2 
 tools/include/uapi/asm-generic/unistd.h|   12 
 tools/perf/arch/powerpc/entry/syscalls/syscall.tbl |   10 
 tools/perf/arch/s390/entry/syscalls/syscall.tbl|   10 
 tools/perf/arch/x86/entry/syscalls/syscall_64.tbl  |   10 
 30 files changed, 280 insertions(+), 706 deletions(-)


[PATCH 9/9] security/keys: remove compat_keyctl_instantiate_key_iov

2020-09-24 Thread Christoph Hellwig
Now that import_iovec handles compat iovecs, the native version of
keyctl_instantiate_key_iov can be used for the compat case as well.

Signed-off-by: Christoph Hellwig 
---
 security/keys/compat.c   | 36 ++--
 security/keys/internal.h |  5 -
 security/keys/keyctl.c   |  2 +-
 3 files changed, 3 insertions(+), 40 deletions(-)

diff --git a/security/keys/compat.c b/security/keys/compat.c
index 7ae531db031cf8..1545efdca56227 100644
--- a/security/keys/compat.c
+++ b/security/keys/compat.c
@@ -11,38 +11,6 @@
 #include 
 #include "internal.h"
 
-/*
- * Instantiate a key with the specified compatibility multipart payload and
- * link the key into the destination keyring if one is given.
- *
- * The caller must have the appropriate instantiation permit set for this to
- * work (see keyctl_assume_authority).  No other permissions are required.
- *
- * If successful, 0 will be returned.
- */
-static long compat_keyctl_instantiate_key_iov(
-   key_serial_t id,
-   const struct compat_iovec __user *_payload_iov,
-   unsigned ioc,
-   key_serial_t ringid)
-{
-   struct iovec iovstack[UIO_FASTIOV], *iov = iovstack;
-   struct iov_iter from;
-   long ret;
-
-   if (!_payload_iov)
-   ioc = 0;
-
-   ret = import_iovec(WRITE, (const struct iovec __user *)_payload_iov,
-  ioc, ARRAY_SIZE(iovstack), &iov, &from);
-   if (ret < 0)
-   return ret;
-
-   ret = keyctl_instantiate_key_common(id, &from, ringid);
-   kfree(iov);
-   return ret;
-}
-
 /*
  * The key control system call, 32-bit compatibility version for 64-bit archs
  */
@@ -113,8 +81,8 @@ COMPAT_SYSCALL_DEFINE5(keyctl, u32, option,
return keyctl_reject_key(arg2, arg3, arg4, arg5);
 
case KEYCTL_INSTANTIATE_IOV:
-   return compat_keyctl_instantiate_key_iov(
-   arg2, compat_ptr(arg3), arg4, arg5);
+   return keyctl_instantiate_key_iov(arg2, compat_ptr(arg3), arg4,
+ arg5);
 
case KEYCTL_INVALIDATE:
return keyctl_invalidate_key(arg2);
diff --git a/security/keys/internal.h b/security/keys/internal.h
index 338a526cbfa516..9b9cf3b6fcbb4d 100644
--- a/security/keys/internal.h
+++ b/security/keys/internal.h
@@ -262,11 +262,6 @@ extern long keyctl_instantiate_key_iov(key_serial_t,
   const struct iovec __user *,
   unsigned, key_serial_t);
 extern long keyctl_invalidate_key(key_serial_t);
-
-struct iov_iter;
-extern long keyctl_instantiate_key_common(key_serial_t,
- struct iov_iter *,
- key_serial_t);
 extern long keyctl_restrict_keyring(key_serial_t id,
const char __user *_type,
const char __user *_restriction);
diff --git a/security/keys/keyctl.c b/security/keys/keyctl.c
index 9febd37a168fd0..e26bbccda7ccee 100644
--- a/security/keys/keyctl.c
+++ b/security/keys/keyctl.c
@@ -1164,7 +1164,7 @@ static int keyctl_change_reqkey_auth(struct key *key)
  *
  * If successful, 0 will be returned.
  */
-long keyctl_instantiate_key_common(key_serial_t id,
+static long keyctl_instantiate_key_common(key_serial_t id,
   struct iov_iter *from,
   key_serial_t ringid)
 {
-- 
2.28.0



[PATCH 3/9] iov_iter: refactor rw_copy_check_uvector and import_iovec

2020-09-24 Thread Christoph Hellwig
Split rw_copy_check_uvector into two new helpers with more sensible
calling conventions:

 - iovec_from_user copies a iovec from userspace either into the provided
   stack buffer if it fits, or allocates a new buffer for it.  Returns
   the actually used iovec.  It also verifies that iov_len does fit a
   signed type, and handles compat iovecs if the compat flag is set.
 - __import_iovec consolidates the native and compat versions of
   import_iovec. It calls iovec_from_user, then validates each iovec
   actually points to user addresses, and ensures the total length
   doesn't overflow.

This has two major implications:

 - the access_process_vm case loses the total lenght checking, which
   wasn't required anyway, given that each call receives two iovecs
   for the local and remote side of the operation, and it verifies
   the total length on the local side already.
 - instead of a single loop there now are two loops over the iovecs.
   Given that the iovecs are cache hot this doesn't make a major
   difference

Signed-off-by: Christoph Hellwig 
---
 include/linux/compat.h |   6 -
 include/linux/fs.h |  13 --
 include/linux/uio.h|  12 +-
 lib/iov_iter.c | 300 -
 mm/process_vm_access.c |  34 +++--
 5 files changed, 138 insertions(+), 227 deletions(-)

diff --git a/include/linux/compat.h b/include/linux/compat.h
index 654c1ec36671a4..b930de791ff16b 100644
--- a/include/linux/compat.h
+++ b/include/linux/compat.h
@@ -451,12 +451,6 @@ extern long compat_arch_ptrace(struct task_struct *child, 
compat_long_t request,
 
 struct epoll_event;/* fortunately, this one is fixed-layout */
 
-extern ssize_t compat_rw_copy_check_uvector(int type,
-   const struct compat_iovec __user *uvector,
-   unsigned long nr_segs,
-   unsigned long fast_segs, struct iovec *fast_pointer,
-   struct iovec **ret_pointer);
-
 extern void __user *compat_alloc_user_space(unsigned long len);
 
 int compat_restore_altstack(const compat_stack_t __user *uss);
diff --git a/include/linux/fs.h b/include/linux/fs.h
index 7519ae003a082c..e69b45b6cc7b5f 100644
--- a/include/linux/fs.h
+++ b/include/linux/fs.h
@@ -178,14 +178,6 @@ typedef int (dio_iodone_t)(struct kiocb *iocb, loff_t 
offset,
 /* File supports async buffered reads */
 #define FMODE_BUF_RASYNC   ((__force fmode_t)0x4000)
 
-/*
- * Flag for rw_copy_check_uvector and compat_rw_copy_check_uvector
- * that indicates that they should check the contents of the iovec are
- * valid, but not check the memory that the iovec elements
- * points too.
- */
-#define CHECK_IOVEC_ONLY -1
-
 /*
  * Attribute flags.  These should be or-ed together to figure out what
  * has been changed!
@@ -1887,11 +1879,6 @@ static inline int call_mmap(struct file *file, struct 
vm_area_struct *vma)
return file->f_op->mmap(file, vma);
 }
 
-ssize_t rw_copy_check_uvector(int type, const struct iovec __user * uvector,
- unsigned long nr_segs, unsigned long fast_segs,
- struct iovec *fast_pointer,
- struct iovec **ret_pointer);
-
 extern ssize_t vfs_read(struct file *, char __user *, size_t, loff_t *);
 extern ssize_t vfs_write(struct file *, const char __user *, size_t, loff_t *);
 extern ssize_t vfs_readv(struct file *, const struct iovec __user *,
diff --git a/include/linux/uio.h b/include/linux/uio.h
index 3835a8a8e9eae0..92c11fe41c6228 100644
--- a/include/linux/uio.h
+++ b/include/linux/uio.h
@@ -266,9 +266,15 @@ bool csum_and_copy_from_iter_full(void *addr, size_t 
bytes, __wsum *csum, struct
 size_t hash_and_copy_to_iter(const void *addr, size_t bytes, void *hashp,
struct iov_iter *i);
 
-ssize_t import_iovec(int type, const struct iovec __user * uvector,
-unsigned nr_segs, unsigned fast_segs,
-struct iovec **iov, struct iov_iter *i);
+struct iovec *iovec_from_user(const struct iovec __user *uvector,
+   unsigned long nr_segs, unsigned long fast_segs,
+   struct iovec *fast_iov, bool compat);
+ssize_t import_iovec(int type, const struct iovec __user *uvec,
+unsigned nr_segs, unsigned fast_segs, struct iovec **iovp,
+struct iov_iter *i);
+ssize_t __import_iovec(int type, const struct iovec __user *uvec,
+unsigned nr_segs, unsigned fast_segs, struct iovec **iovp,
+struct iov_iter *i, bool compat);
 
 #ifdef CONFIG_COMPAT
 struct compat_iovec;
diff --git a/lib/iov_iter.c b/lib/iov_iter.c
index ccea9db3f72be8..d5d8afe31fca16 100644
--- a/lib/iov_iter.c
+++ b/lib/iov_iter.c
@@ -7,6 +7,7 @@
 #include 
 #include 
 #include 
+#include 
 #include 
 #include 
 #include 
@@ -1650,107 +1651,133 @@ const void *dup_iter(struct iov_iter *new, struct 
iov_iter *old, gfp_t flags)
 }
 EXPORT_SYMBOL(dup_iter);
 
-/**
- * rw_copy_check_uvector() - Copy an array of &struct iovec from userspa

[PATCH 2/9] iov_iter: move rw_copy_check_uvector() into lib/iov_iter.c

2020-09-24 Thread Christoph Hellwig
From: David Laight 

This lets the compiler inline it into import_iovec() generating
much better code.

Signed-off-by: David Laight 
Signed-off-by: Christoph Hellwig 
---
 fs/read_write.c | 179 
 lib/iov_iter.c  | 176 +++
 2 files changed, 176 insertions(+), 179 deletions(-)

diff --git a/fs/read_write.c b/fs/read_write.c
index 5db58b8c78d0dd..e5e891a88442ef 100644
--- a/fs/read_write.c
+++ b/fs/read_write.c
@@ -752,185 +752,6 @@ static ssize_t do_loop_readv_writev(struct file *filp, 
struct iov_iter *iter,
return ret;
 }
 
-/**
- * rw_copy_check_uvector() - Copy an array of &struct iovec from userspace
- * into the kernel and check that it is valid.
- *
- * @type: One of %CHECK_IOVEC_ONLY, %READ, or %WRITE.
- * @uvector: Pointer to the userspace array.
- * @nr_segs: Number of elements in userspace array.
- * @fast_segs: Number of elements in @fast_pointer.
- * @fast_pointer: Pointer to (usually small on-stack) kernel array.
- * @ret_pointer: (output parameter) Pointer to a variable that will point to
- * either @fast_pointer, a newly allocated kernel array, or NULL,
- * depending on which array was used.
- *
- * This function copies an array of &struct iovec of @nr_segs from
- * userspace into the kernel and checks that each element is valid (e.g.
- * it does not point to a kernel address or cause overflow by being too
- * large, etc.).
- *
- * As an optimization, the caller may provide a pointer to a small
- * on-stack array in @fast_pointer, typically %UIO_FASTIOV elements long
- * (the size of this array, or 0 if unused, should be given in @fast_segs).
- *
- * @ret_pointer will always point to the array that was used, so the
- * caller must take care not to call kfree() on it e.g. in case the
- * @fast_pointer array was used and it was allocated on the stack.
- *
- * Return: The total number of bytes covered by the iovec array on success
- *   or a negative error code on error.
- */
-ssize_t rw_copy_check_uvector(int type, const struct iovec __user * uvector,
- unsigned long nr_segs, unsigned long fast_segs,
- struct iovec *fast_pointer,
- struct iovec **ret_pointer)
-{
-   unsigned long seg;
-   ssize_t ret;
-   struct iovec *iov = fast_pointer;
-
-   /*
-* SuS says "The readv() function *may* fail if the iovcnt argument
-* was less than or equal to 0, or greater than {IOV_MAX}.  Linux has
-* traditionally returned zero for zero segments, so...
-*/
-   if (nr_segs == 0) {
-   ret = 0;
-   goto out;
-   }
-
-   /*
-* First get the "struct iovec" from user memory and
-* verify all the pointers
-*/
-   if (nr_segs > UIO_MAXIOV) {
-   ret = -EINVAL;
-   goto out;
-   }
-   if (nr_segs > fast_segs) {
-   iov = kmalloc_array(nr_segs, sizeof(struct iovec), GFP_KERNEL);
-   if (iov == NULL) {
-   ret = -ENOMEM;
-   goto out;
-   }
-   }
-   if (copy_from_user(iov, uvector, nr_segs*sizeof(*uvector))) {
-   ret = -EFAULT;
-   goto out;
-   }
-
-   /*
-* According to the Single Unix Specification we should return EINVAL
-* if an element length is < 0 when cast to ssize_t or if the
-* total length would overflow the ssize_t return value of the
-* system call.
-*
-* Linux caps all read/write calls to MAX_RW_COUNT, and avoids the
-* overflow case.
-*/
-   ret = 0;
-   for (seg = 0; seg < nr_segs; seg++) {
-   void __user *buf = iov[seg].iov_base;
-   ssize_t len = (ssize_t)iov[seg].iov_len;
-
-   /* see if we we're about to use an invalid len or if
-* it's about to overflow ssize_t */
-   if (len < 0) {
-   ret = -EINVAL;
-   goto out;
-   }
-   if (type >= 0
-   && unlikely(!access_ok(buf, len))) {
-   ret = -EFAULT;
-   goto out;
-   }
-   if (len > MAX_RW_COUNT - ret) {
-   len = MAX_RW_COUNT - ret;
-   iov[seg].iov_len = len;
-   }
-   ret += len;
-   }
-out:
-   *ret_pointer = iov;
-   return ret;
-}
-
-#ifdef CONFIG_COMPAT
-ssize_t compat_rw_copy_check_uvector(int type,
-   const struct compat_iovec __user *uvector, unsigned long 
nr_segs,
-   unsigned long fast_segs, struct iovec *fast_pointer,
-   struct iovec **ret_pointer)
-{
-   compat_ssize_t tot_len;
-   struct iovec *iov = *ret_pointer = fast_pointer;
-   ssize_t ret = 0;
-   int seg;
-
-   /*
- 

[PATCH 1/9] compat.h: fix a spelling error in

2020-09-24 Thread Christoph Hellwig
There is no compat_sys_readv64v2 syscall, only a compat_sys_preadv64v2
one.

Signed-off-by: Christoph Hellwig 
---
 include/linux/compat.h | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/include/linux/compat.h b/include/linux/compat.h
index b354ce58966e2d..654c1ec36671a4 100644
--- a/include/linux/compat.h
+++ b/include/linux/compat.h
@@ -812,7 +812,7 @@ asmlinkage ssize_t compat_sys_pwritev2(compat_ulong_t fd,
const struct compat_iovec __user *vec,
compat_ulong_t vlen, u32 pos_low, u32 pos_high, rwf_t flags);
 #ifdef __ARCH_WANT_COMPAT_SYS_PREADV64V2
-asmlinkage long  compat_sys_readv64v2(unsigned long fd,
+asmlinkage long  compat_sys_preadv64v2(unsigned long fd,
const struct compat_iovec __user *vec,
unsigned long vlen, loff_t pos, rwf_t flags);
 #endif
-- 
2.28.0



Re: [PATCH kernel] powerpc/dma: Fix dma_map_ops::get_required_mask

2020-09-24 Thread Christoph Hellwig
On Thu, Sep 24, 2020 at 05:03:11PM +1000, Alexey Kardashevskiy wrote:
> May be... The current behavior is not wrong (after the fix) but not
> optimal either. Even with legacy PCI it should just result in failing
> attempt to set 64bit mask which drivers should still handle, i.e. choose
> a shorter mask.

Err, no.

> Why not ditch the whole dma_get_required_mask() and just fail on setting
> a bigger mask? Are these failures not handled in some drivers? Or there
> are cases when a shorter mask is better? Thanks,

Because that is a complete pain.  Think of it, the device/driver knows
what it supports.  For 98% of the modern devices that means all 64-bit
bits, and for most others this means 32-bits, with a few wackos that
support 48 bits or something like that.  The 98% just take any address
thrown at them, and the others just care that they never see an
address larger than what they support.  They could not care any less
if the systems supports 31, 36, 41, 48, 52, 55, 61 or 63-bit addressing,
an they most certainly should not implement stupid boilerplate code to
guess what addressing mode the system implements.  They just declare
what they support.

Then you have the 12 drivers for devices that can do optimizations if
they never see large DMA addresses.  They use the somewhat misnamed
dma_get_required_mask API to query what the largest address they might
see is and act based on that, while not putting any burden on all the
sane devices/drivers.


Re: [PATCH v3] powerpc/pci: unmap legacy INTx interrupts when a PHB is removed

2020-09-24 Thread Cédric Le Goater
On 9/23/20 9:40 AM, Cédric Le Goater wrote:
> When a passthrough IO adapter is removed from a pseries machine using
> hash MMU and the XIVE interrupt mode, the POWER hypervisor expects the
> guest OS to clear all page table entries related to the adapter. If
> some are still present, the RTAS call which isolates the PCI slot
> returns error 9001 "valid outstanding translations" and the removal of
> the IO adapter fails. This is because when the PHBs are scanned, Linux
> maps automatically the INTx interrupts in the Linux interrupt number
> space but these are never removed.
> 
> To solve this problem, we introduce a PPC platform specific
> pcibios_remove_bus() routine which clears all interrupt mappings when
> the bus is removed. This also clears the associated page table entries
> of the ESB pages when using XIVE.
> 
> For this purpose, we record the logical interrupt numbers of the
> mapped interrupt under the PHB structure and let pcibios_remove_bus()
> do the clean up.
> 
> Since some PCI adapters, like GPUs, use the "interrupt-map" property
> to describe interrupt mappings other than the legacy INTx interrupts,
> we can not restrict the size of the mapping array to PCI_NUM_INTX. The
> number of interrupt mappings is computed from the "interrupt-map"
> property and the mapping array is allocated accordingly.
> 
> Cc: "Oliver O'Halloran" 
> Cc: Alexey Kardashevskiy 
> Reviewed-by: Alexey Kardashevskiy 
> Signed-off-by: Cédric Le Goater 
> ---
> 
>  Changes in v3 :
> 
>  - NULLified 'irq_map' in pci_irq_map_dispose()


Forge that. I am going to move the kfree() in the routine freeing the 
PCI controller structure.

Thanks,

C.