[PATCH v21 03/13] clocksource: arm_arch_timer: remove arch_timer_detect_rate

2017-02-06 Thread fu . wei
From: Fu Wei 

The original counter frequency detection call(arch_timer_detect_rate)
include getting the frequency from the per-cpu arch-timer and the
memory-mapped (MMIO) timer interfaces. But they will be needed only when
the system initializes the relevant timer.

This patch remove arch_timer_detect_rate founction, and use the
arch_timer_get_sysreg_freq and arch_timer_get_mmio_freq directly.

Signed-off-by: Fu Wei 
---
 drivers/clocksource/arm_arch_timer.c | 38 +++-
 1 file changed, 16 insertions(+), 22 deletions(-)

diff --git a/drivers/clocksource/arm_arch_timer.c 
b/drivers/clocksource/arm_arch_timer.c
index aa14305..63fb441 100644
--- a/drivers/clocksource/arm_arch_timer.c
+++ b/drivers/clocksource/arm_arch_timer.c
@@ -571,25 +571,6 @@ static u32 arch_timer_get_mmio_freq(void __iomem *cntbase)
return readl_relaxed(cntbase + CNTFRQ);
 }
 
-static void arch_timer_detect_rate(void __iomem *cntbase)
-{
-   /* Who has more than one independent system counter? */
-   if (arch_timer_rate)
-   return;
-
-   /*
-* Try to determine the frequency from the MMIO timer or the sysreg.
-*/
-   if (cntbase)
-   arch_timer_rate = arch_timer_get_mmio_freq(cntbase);
-   else
-   arch_timer_rate = arch_timer_get_sysreg_freq();
-
-   /* Check the timer frequency. */
-   if (arch_timer_rate == 0)
-   pr_warn("frequency not available\n");
-}
-
 static void arch_timer_banner(unsigned type)
 {
pr_info("%s%s%s timer(s) running at %lu.%02luMHz (%s%s%s).\n",
@@ -970,7 +951,11 @@ static int __init arch_timer_of_init(struct device_node 
*np)
 */
if (!arch_timer_rate &&
of_property_read_u32(np, "clock-frequency", &arch_timer_rate))
-   arch_timer_detect_rate(NULL);
+   arch_timer_rate = arch_timer_get_sysreg_freq();
+   if (!arch_timer_rate) {
+   pr_err(FW_BUG "frequency not available.\n");
+   return -EINVAL;
+   }
 
arch_timer_c3stop = !of_property_read_bool(np, "always-on");
 
@@ -1087,7 +1072,12 @@ static int __init arch_timer_mem_init(struct device_node 
*np)
 */
if (!arch_timer_rate &&
of_property_read_u32(np, "clock-frequency", &arch_timer_rate))
-   arch_timer_detect_rate(base);
+   arch_timer_rate = arch_timer_get_mmio_freq(base);
+   if (!arch_timer_rate) {
+   pr_err(FW_BUG "frequency not available for MMIO timer.\n");
+   ret = -EINVAL;
+   goto out;
+   }
 
ret = arch_timer_mem_register(base, irq);
if (ret)
@@ -1150,7 +1140,11 @@ static int __init arch_timer_acpi_init(struct 
acpi_table_header *table)
gtdt->non_secure_el2_flags);
 
/* Get the frequency from the sysreg CNTFRQ */
-   arch_timer_detect_rate(NULL);
+   arch_timer_rate = arch_timer_get_sysreg_freq();
+   if (!arch_timer_rate) {
+   pr_err(FW_BUG "frequency not available.\n");
+   return -EINVAL;
+   }
 
arch_timer_uses_ppi = arch_timer_select_ppi();
if (!arch_timer_ppi[arch_timer_uses_ppi]) {
-- 
2.9.3



[PATCH v21 04/13] clocksource: arm_arch_timer: split arch_timer_rate for different types of timer

2017-02-06 Thread fu . wei
From: Fu Wei 

Currently, arch_timer_rate is used to store the frequency got from per-cpu
arch-timer or the memory-mapped (MMIO) timers. But those values come from
different registers which should all be initialized by firmware.

This patch remove arch_timer_rate, and use arch_timer_sysreg_freq and
arch_timer_mmio_freq instead.

Signed-off-by: Fu Wei 
---
 drivers/clocksource/arm_arch_timer.c | 42 
 1 file changed, 24 insertions(+), 18 deletions(-)

diff --git a/drivers/clocksource/arm_arch_timer.c 
b/drivers/clocksource/arm_arch_timer.c
index 63fb441..97a4e90 100644
--- a/drivers/clocksource/arm_arch_timer.c
+++ b/drivers/clocksource/arm_arch_timer.c
@@ -65,7 +65,8 @@ struct arch_timer {
 
 #define to_arch_timer(e) container_of(e, struct arch_timer, evt)
 
-static u32 arch_timer_rate;
+static u32 arch_timer_sysreg_freq;
+static u32 arch_timer_mmio_freq;
 static int arch_timer_ppi[ARCH_TIMER_MAX_TIMER_PPI];
 
 static struct clock_event_device __percpu *arch_timer_evt;
@@ -417,6 +418,7 @@ static void erratum_workaround_set_sne(struct 
clock_event_device *clk)
 static void __arch_timer_setup(unsigned type,
   struct clock_event_device *clk)
 {
+   u32 freq;
clk->features = CLOCK_EVT_FEAT_ONESHOT;
 
if (type == ARCH_TIMER_TYPE_CP15) {
@@ -444,6 +446,7 @@ static void __arch_timer_setup(unsigned type,
}
 
erratum_workaround_set_sne(clk);
+   freq = arch_timer_sysreg_freq;
} else {
clk->features |= CLOCK_EVT_FEAT_DYNIRQ;
clk->name = "arch_mem_timer";
@@ -460,11 +463,12 @@ static void __arch_timer_setup(unsigned type,
clk->set_next_event =
arch_timer_set_next_event_phys_mem;
}
+   freq = arch_timer_mmio_freq;
}
 
clk->set_state_shutdown(clk);
 
-   clockevents_config_and_register(clk, arch_timer_rate, 0xf, 0x7fff);
+   clockevents_config_and_register(clk, freq, 0xf, 0x7fff);
 }
 
 static void arch_timer_evtstrm_enable(int divider)
@@ -487,7 +491,7 @@ static void arch_timer_configure_evtstream(void)
int evt_stream_div, pos;
 
/* Find the closest power of two to the divisor */
-   evt_stream_div = arch_timer_rate / ARCH_TIMER_EVT_STREAM_FREQ;
+   evt_stream_div = arch_timer_sysreg_freq / ARCH_TIMER_EVT_STREAM_FREQ;
pos = fls(evt_stream_div);
if (pos > 1 && !(evt_stream_div & (1 << (pos - 2
pos--;
@@ -578,8 +582,8 @@ static void arch_timer_banner(unsigned type)
type == (ARCH_TIMER_TYPE_CP15 | ARCH_TIMER_TYPE_MEM) ?
" and " : "",
type & ARCH_TIMER_TYPE_MEM ? "mmio" : "",
-   (unsigned long)arch_timer_rate / 100,
-   (unsigned long)(arch_timer_rate / 1) % 100,
+   (unsigned long)arch_timer_sysreg_freq / 100,
+   (unsigned long)(arch_timer_sysreg_freq / 1) % 100,
type & ARCH_TIMER_TYPE_CP15 ?
(arch_timer_uses_ppi == ARCH_TIMER_VIRT_PPI) ? "virt" : 
"phys" :
"",
@@ -591,7 +595,7 @@ static void arch_timer_banner(unsigned type)
 
 u32 arch_timer_get_rate(void)
 {
-   return arch_timer_rate;
+   return arch_timer_sysreg_freq;
 }
 
 static u64 arch_counter_get_cntvct_mem(void)
@@ -648,6 +652,7 @@ struct arch_timer_kvm_info *arch_timer_get_kvm_info(void)
 static void __init arch_counter_register(unsigned type)
 {
u64 start_count;
+   u32 freq;
 
/* Register the CP15 based counter if we have one */
if (type & ARCH_TIMER_TYPE_CP15) {
@@ -657,6 +662,8 @@ static void __init arch_counter_register(unsigned type)
else
arch_timer_read_counter = arch_counter_get_cntpct;
 
+   freq = arch_timer_sysreg_freq;
+
clocksource_counter.archdata.vdso_direct = true;
 
 #ifdef CONFIG_ARM_ARCH_TIMER_OOL_WORKAROUND
@@ -669,19 +676,20 @@ static void __init arch_counter_register(unsigned type)
 #endif
} else {
arch_timer_read_counter = arch_counter_get_cntvct_mem;
+   freq = arch_timer_mmio_freq;
}
 
if (!arch_counter_suspend_stop)
clocksource_counter.flags |= CLOCK_SOURCE_SUSPEND_NONSTOP;
start_count = arch_timer_read_counter();
-   clocksource_register_hz(&clocksource_counter, arch_timer_rate);
+   clocksource_register_hz(&clocksource_counter, freq);
cyclecounter.mult = clocksource_counter.mult;
cyclecounter.shift = clocksource_counter.shift;
timecounter_init(&arch_timer_kvm_info.timecounter,
 &cyclecounter, start_count);
 
/* 56 bits minimum, so we assume worst case rollover */
-   sched_clock_register(arch_timer_read_counter, 56, arch_timer_rate);
+   sched_clock_register(arch_ti

[PATCH v21 01/13] clocksource: arm_arch_timer: introduce two functions to get the frequency from mmio and sysreg.

2017-02-06 Thread fu . wei
From: Fu Wei 

The patch introduce two new functions: arch_timer_get_sysreg_freq and
arch_timer_get_mmio_freq, and applys them in arch_timer_detect_rate.
These will be used for getting the frequency from mmio and sysreg to
prepare for reworking counter frequency detection.

Signed-off-by: Fu Wei 
---
 drivers/clocksource/arm_arch_timer.c | 21 +++--
 1 file changed, 19 insertions(+), 2 deletions(-)

diff --git a/drivers/clocksource/arm_arch_timer.c 
b/drivers/clocksource/arm_arch_timer.c
index 46a1709..1d273d6 100644
--- a/drivers/clocksource/arm_arch_timer.c
+++ b/drivers/clocksource/arm_arch_timer.c
@@ -554,6 +554,23 @@ static int arch_timer_starting_cpu(unsigned int cpu)
return 0;
 }
 
+static u32 arch_timer_get_sysreg_freq(void)
+{
+   /*
+* Try to get the frequency from the CNTFRQ of sysreg.
+*/
+   return arch_timer_get_cntfrq();
+}
+
+static u32 arch_timer_get_mmio_freq(void __iomem *cntbase)
+{
+   /*
+* Try to get the frequency from the CNTFRQ of timer frame registers.
+* Note: please verify cntbase in caller.
+*/
+   return readl_relaxed(cntbase + CNTFRQ);
+}
+
 static void
 arch_timer_detect_rate(void __iomem *cntbase, struct device_node *np)
 {
@@ -568,9 +585,9 @@ arch_timer_detect_rate(void __iomem *cntbase, struct 
device_node *np)
if (!acpi_disabled ||
of_property_read_u32(np, "clock-frequency", &arch_timer_rate)) {
if (cntbase)
-   arch_timer_rate = readl_relaxed(cntbase + CNTFRQ);
+   arch_timer_rate = arch_timer_get_mmio_freq(cntbase);
else
-   arch_timer_rate = arch_timer_get_cntfrq();
+   arch_timer_rate = arch_timer_get_sysreg_freq();
}
 
/* Check the timer frequency. */
-- 
2.9.3



[PATCH v21 00/13] acpi, clocksource: add GTDT driver and GTDT support in arm_arch_timer

2017-02-06 Thread fu . wei
From: Fu Wei 

This patchset:
(1)Preparation for adding GTDT support in arm_arch_timer:
1. Introduce two functions to get the frequency from mmio and sysreg.
2. separate out device-tree code from arch_timer_detect_rate
3. remove arch_timer_detect_rate use arch_timer_get_*_freq directly
4. split arch_timer_rate for different types of timer
5. Refactor arch_timer_needs_probing, and move it into DT init call
6. Introduce some new structs and refactor the MMIO timer init code
for reusing some common code.

(2)Introduce ACPI GTDT parser: drivers/acpi/arm64/acpi_gtdt.c
Parse all kinds of timer in GTDT table of ACPI:arch timer,
memory-mapped timer and SBSA Generic Watchdog timer.
This driver can help to simplify all the relevant timer drivers,
and separate all the ACPI GTDT knowledge from them.

(3)Simplify ACPI code for arm_arch_timer

(4)Add GTDT support for ARM memory-mapped timer.

This patchset has been tested on the following platforms with ACPI enabled:
(1)ARM Foundation v8 model

Changelog:
v21: https://lkml.org/lkml/2017/2/6/
 Introduce two functions to get the frequency from mmio and sysreg.
 Remove arch_timer_detect_rate use arch_timer_get_*_freq directly
 Split arch_timer_rate for different types of timer.
 Skip secure timer frame in GTDT driver.
 Rebase to git://git.kernel.org/pub/scm/linux/kernel/git/mark/linux.git 
arch-timer/cleanup
 (The first 6 patches in v20 have been merged into arch-timer/cleanup 
branch)

v20: https://lkml.org/lkml/2017/1/18/534
 Reorder the first 4 patches and split the 4th patches.
 Leave CNTHCTL_* as they originally were.
 Fix the bug in arch_timer_select_ppi.
 Split "Rework counter frequency detection" patch.
 Rework the arch_timer_detect_rate function.
 Improve the commit message of "Refactor MMIO timer probing".
 Rebase to 4.10.0-rc4

v19: https://lkml.org/lkml/2016/12/21/25
 Fix a '\n' missing in a error message in arch_timer_mem_init.
 Add "request_mem_region" for ioremapping cntbase, according to
 f947ee1 clocksource/drivers/arm_arch_timer: Map frame with 
of_io_request_and_map()
 Rebase to 4.9.0-gfb779ff

v18: https://lkml.org/lkml/2016/12/8/446
 Fix 8/15 patch problem of "int ret;" in arch_timer_acpi_init.
 Rebase to 4.9.0-rc8-g9269898

v17: https://lkml.org/lkml/2016/11/25/140
 Take out some cleanups from 4/15.
 Merge 5/15 and 6/15, improve PPI determination code,
 improve commit message.
 Rework counter frequency detection.
 Move arch_timer_needs_of_probing into DT init call.
 Move Platform Timer scan loop back to timer init call to avoid allocating
 and free memory.
 Improve all the exported functions' comment.

v16: https://lkml.org/lkml/2016/11/16/268
 Fix patchset problem about static enum ppi_nr of 01/13 in v15.
 Refactor arch_timer_detect_rate.
 Refactor arch_timer_needs_probing.

v15: https://lkml.org/lkml/2016/11/15/366
 Re-order patches
 Add arm_arch_timer refactoring patches to prepare for GTDT:
 1. rename some  enums and defines, and some cleanups
 2. separate out arch_timer_uses_ppi init code and fix a potential bug
 3. Improve some new structs, refactor the timer init code.
 Since the some structs have been changed, GTDT parser for memory-mapped
 timer and SBSA Generic Watchdog timer have been update.

v14: https://lkml.org/lkml/2016/9/28/573
 Separate memory-mapped timer GTDT support into two patches
 1. Refactor the timer init code to prepare for GTDT
 2. Add GTDT support for memory-mapped timer

v13: http://www.mail-archive.com/linux-kernel@vger.kernel.org/msg1231717.html
 Improve arm_arch_timer code for memory-mapped
 timer GTDT support, refactor original memory-mapped timer
 dt support for reusing some common code.

v12: https://lkml.org/lkml/2016/9/13/250
 Rebase to latest Linux 4.8-rc6
 Delete the confusing "skipping" in the error message.

V11: https://lkml.org/lkml/2016/9/6/354
 Rebase to latest Linux 4.8-rc5
 Delete typedef (suggested by checkpatch.pl)

V10: https://lkml.org/lkml/2016/7/26/215
 Drop the "readq" patch.
 Rebase to latest Linux 4.7.

V9: https://lkml.org/lkml/2016/7/25/345
Improve pr_err message in acpi gtdt driver.
Update Commit message for 7/9
shorten the irq mapping function name
Improve GTDT driver for memory-mapped timer

v8: https://lkml.org/lkml/2016/7/19/660
Improve "pr_fmt(fmt)" definition: add "ACPI" in front of "GTDT",
and also improve printk message.
Simplify is_timer_block and is_watchdog.
Merge acpi_gtdt_desc_init and gtdt_arch_timer_init into acpi_gtdt_init();
Delete __init in include/linux/acpi.h for GTDT API
Make ARM64 select GTDT.
Delete "#include " from acpi_gtdt.c
Simplify GT block parse code.

v7: https://lkml.org/lkml/2016/7/13/769
Move the GTDT drive

Re: Regression: Failed boots bisected to 4cd13c21b207 "softirq: Let ksoftirqd do its job"

2017-02-06 Thread Russell King - ARM Linux
On Mon, Feb 06, 2017 at 06:46:19PM +, Will Deacon wrote:
> Converting the smc91x driver over to NAPI would probably solve this problem,
> but given the "vintage" of this code, I'd be more tempted by a simpler
> point fix if only I could think of one.

I'm not sure if converting it to NAPI would solve it, or just move
the problem elsewhere - IOW, move it from "we need to drop the packet
because we couldn't allocate a skb" to "the hardware dropped the packed
because the FIFO was full."

Yes, I'm intending giving it a go, once I've a spare moment to build
a kernel for the platform etc.  It runs root NFS, so should be a good
test for it.

-- 
RMK's Patch system: http://www.armlinux.org.uk/developer/patches/
FTTC broadband for 0.8mile line: currently at 9.6Mbps down 400kbps up
according to speedtest.net.


Re: [PATCHv2 2/2] arch: Rename CONFIG_DEBUG_RODATA and CONFIG_DEBUG_MODULE_RONX

2017-02-06 Thread Laura Abbott
On 02/03/2017 12:03 PM, Kees Cook wrote:
> On Fri, Feb 3, 2017 at 9:52 AM, Laura Abbott  wrote:
>>
>> Both of these options are poorly named. The features they provide are
>> necessary for system security and should not be considered debug only.
>> Change the name to something that accurately describes what these
>> options do.
> 
> It may help to explicitly call out the name change from/to in the
> commit message.
> 
>>
>> Signed-off-by: Laura Abbott 
>> ---
>> [...]
>> diff --git a/arch/arm/configs/aspeed_g4_defconfig 
>> b/arch/arm/configs/aspeed_g4_defconfig
>> index ca39c04..beea2cc 100644
>> --- a/arch/arm/configs/aspeed_g4_defconfig
>> +++ b/arch/arm/configs/aspeed_g4_defconfig
>> @@ -25,7 +25,6 @@ CONFIG_MODULE_UNLOAD=y
>>  # CONFIG_ARCH_MULTI_V7 is not set
>>  CONFIG_ARCH_ASPEED=y
>>  CONFIG_MACH_ASPEED_G4=y
>> -CONFIG_DEBUG_RODATA=y
>>  CONFIG_AEABI=y
>>  CONFIG_UACCESS_WITH_MEMCPY=y
>>  CONFIG_SECCOMP=y
> 
> Are these defconfig cases correct (dropping DEBUG_RODATA without
> adding STRICT_KERNEL_RWX)?
>

Yes, I think these need to be updated to the new config option since
these are not CPUv7


> Who should carry this series, btw?
> 

An excellent question :)

Would you be willing to carry it with Acks?

> -Kees
> 

Thanks,
Laura


Re: [PATCH 4/6] xfs: use memalloc_nofs_{save,restore} instead of memalloc_noio*

2017-02-06 Thread Michal Hocko
On Mon 06-02-17 10:32:37, Darrick J. Wong wrote:
> On Mon, Feb 06, 2017 at 06:44:15PM +0100, Michal Hocko wrote:
> > On Mon 06-02-17 07:39:23, Matthew Wilcox wrote:
> > > On Mon, Feb 06, 2017 at 03:07:16PM +0100, Michal Hocko wrote:
> > > > +++ b/fs/xfs/xfs_buf.c
> > > > @@ -442,17 +442,17 @@ _xfs_buf_map_pages(
> > > > bp->b_addr = NULL;
> > > > } else {
> > > > int retried = 0;
> > > > -   unsigned noio_flag;
> > > > +   unsigned nofs_flag;
> > > >  
> > > > /*
> > > >  * vm_map_ram() will allocate auxillary structures (e.g.
> > > >  * pagetables) with GFP_KERNEL, yet we are likely to be 
> > > > under
> > > >  * GFP_NOFS context here. Hence we need to tell memory 
> > > > reclaim
> > > > -* that we are in such a context via PF_MEMALLOC_NOIO 
> > > > to prevent
> > > > +* that we are in such a context via PF_MEMALLOC_NOFS 
> > > > to prevent
> > > >  * memory reclaim re-entering the filesystem here and
> > > >  * potentially deadlocking.
> > > >  */
> > > 
> > > This comment feels out of date ... how about:
> > 
> > which part is out of date?
> > 
> > > 
> > >   /*
> > >* vm_map_ram will allocate auxiliary structures (eg page
> > >* tables) with GFP_KERNEL.  If that tries to reclaim memory
> > >* by calling back into this filesystem, we may deadlock.
> > >* Prevent that by setting the NOFS flag.
> > >*/
> > 
> > dunno, the previous wording seems clear enough to me. Maybe little bit
> > more chatty than yours but I am not sure this is worth changing.
> 
> I prefer to keep the "...yet we are likely to be under GFP_NOFS..."
> wording of the old comment because it captures the uncertainty of
> whether or not we actually are already under NOFS.  If someone actually
> has audited this code well enough to know for sure then yes let's change
> the comment, but I haven't gone that far.

I believe we can drop the memalloc_nofs_save then as well because either
we are called from a potentially dangerous context and thus we are in
the nofs scope we we do not need the protection at all.
-- 
Michal Hocko
SUSE Labs


Re: [PATCHv2 1/2] arch: Move CONFIG_DEBUG_RODATA and CONFIG_SET_MODULE_RONX to be common

2017-02-06 Thread Laura Abbott
On 02/03/2017 01:08 PM, Kees Cook wrote:
> On Fri, Feb 3, 2017 at 12:29 PM, Russell King - ARM Linux
>  wrote:
>> On Fri, Feb 03, 2017 at 11:45:56AM -0800, Kees Cook wrote:
>>> On Fri, Feb 3, 2017 at 9:52 AM, Laura Abbott  wrote:
 diff --git a/arch/Kconfig b/arch/Kconfig
 index 99839c2..22ee01e 100644
 --- a/arch/Kconfig
 +++ b/arch/Kconfig
 @@ -781,4 +781,32 @@ config VMAP_STACK
   the stack to map directly to the KASAN shadow map using a formula
   that is incorrect if the stack is in vmalloc space.

 +config ARCH_NO_STRICT_RWX_DEFAULTS
 +   def_bool n
 +
 +config ARCH_HAS_STRICT_KERNEL_RWX
 +   def_bool n
 +
 +config DEBUG_RODATA
 +   def_bool y if !ARCH_NO_STRICT_RWX_DEFAULTS
 +   prompt "Make kernel text and rodata read-only" if 
 ARCH_NO_STRICT_RWX_DEFAULTS
>>>
>>> Ah! Yes, perfect. I totally forgot about using conditional "prompt"
>>> lines. Nice!
>>
>> It's no different from the more usual:
>>
>> bool "Make kernel text and rodata read-only" if 
>> ARCH_NO_STRICT_RWX_DEFAULTS
>> default y if !ARCH_NO_STRICT_RWX_DEFAULTS
>> depends on ARCH_HAS_STRICT_KERNEL_RWX
>>
>> But... I really don't like this - way too many negations and negatives
>> which make it difficult to figure out what's going on here.
>>
>> The situation we have today is:
>>
>> -config DEBUG_RODATA
>> -   bool "Make kernel text and rodata read-only"
>> -   depends on MMU && !XIP_KERNEL
>> -   default y if CPU_V7
>>
>> which is "allow the user to select DEBUG_RODATA if building a MMU non-XIP
>> kernel", suggesting that the user turns it on for ARMv7 CPUs.
>>
>> That changes with this and the above:
>>
>> +   select ARCH_HAS_STRICT_KERNEL_RWX if MMU && !XIP_KERNEL
>> +   select ARCH_HAS_STRICT_MODULE_RWX if MMU
>> +   select ARCH_NO_STRICT_RWX_DEFAULTS if !CPU_V7
>>
>> This means that ARCH_HAS_STRICT_KERNEL_RWX is set for a MMU non-XIP
>> kernel, which carries the same pre-condition for DEBUG_RODATA - no
>> problem there.
>>
>> However, ARCH_NO_STRICT_RWX_DEFAULTS is set for non-ARMv7 CPUs, which
>> means the "Make kernel text and rodata read-only" prompt _is_ provided
>> for those.  However, for all ARMv7 systems, we go from "suggesting that
>> the user enables the option" to "you don't have a choice, you get this
>> whether you want it or not."
>>
>> I'd prefer to keep it off for my development systems, where I don't
>> care about kernel security.  If we don't wish to do that as a general
>> rule, can we make it dependent on EMBEDDED?
>>
>> Given that on ARM it can add up to 4MB to the kernel image - there
>> _will_ be about 1MB before the .text section, the padding on between
>> __modver and __ex_table which for me is around 626k, the padding
>> between .notes and the init sections start with .vectors (the space
>> between __ex_table and end of .notes is only 4124, which gets padded
>> up to 1MB) and lastly the padding between the .init section and the
>> data section (for me around 593k).  This all adds up to an increase
>> in kernel image size of 3.2MB on 14.2MB - an increase of 22%.
>>
>> So no, I'm really not happy with that.
> 
> Ah yeah, good point. We have three cases: unsupported, mandatory,
> optional, but we have the case of setting the default for the optional
> case. Maybe something like this?
> 
> config STRICT_KERNEL_RWX
>   bool "Make kernel text and rodata read-only" if ARCH_OPTIONAL_KERNEL_RWX
>   depends on ARCH_HAS_STRICT_KERNEL_RWX
>   default ARCH_OPTIONAL_KERNEL_RWX_DEFAULT
> 
> unsupported:
> !ARCH_HAS_STRICT_KERNEL_RWX
> 
> mandatory:
> ARCH_HAS_STRICT_KERNEL_RWX
> !ARCH_OPTIONAL_KERNEL_RWX
> 
> optional:
> ARCH_HAS_STRICT_KERNEL_RWX
> ARCH_OPTIONAL_KERNEL_RWX
> with default controlled by ARCH_OPTIONAL_KERNEL_RWX_DEFAULT
> 
> Then arm is:
>   select ARCH_HAS_STRICT_KERNEL_RWX if MMU && !XIP_KERNEL
>   select ARCH_HAS_STRICT_MODULE_RWX if MMU
>   select ARCH_OPTIONAL_KERNEL_RWX if ARCH_HAS_STRICT_KERNEL_RWX
>   select ARCH_OPTIONAL_KERNEL_RWX_DEFAULT if CPU_V7
> 
> x86 and arm64 are:
>   select ARCH_HAS_STRICT_KERNEL_RWX
>   select ARCH_HAS_STRICT_MODULE_RWX
> 
> ?
> 
> -Kees
> 

Yes, that looks good. I wanted it to be mandatory to avoid the
mindset of "optional means we don't need it" but I see there
are some cases where it's better to turn it off. I'll see if
I can emphasize this properly in the help text ("Say Y here
unless you love security exploits running in production")

Thanks,
Laura


Re: [PATCH] mm/autonuma: don't use set_pte_at when updating protnone ptes

2017-02-06 Thread Rik van Riel
On Mon, 2017-02-06 at 22:36 +0530, Aneesh Kumar K.V wrote:
> Architectures like ppc64, use privilege access bit to mark pte non
> accessible.
> This implies that kernel can do a copy_to_user to an address marked
> for numa fault.
> This also implies that there can be a parallel hardware update for
> the pte.
> set_pte_at cannot be used in such scenarios. Hence switch the pte
> update to use ptep_get_and_clear and set_pte_at combination.
> 
> Signed-off-by: Aneesh Kumar K.V 

Acked-by: Rik van Riel 

-- 
All Rights Reversed.

signature.asc
Description: This is a digitally signed message part


Re: Regression: Failed boots bisected to 4cd13c21b207 "softirq: Let ksoftirqd do its job"

2017-02-06 Thread Will Deacon
Hi all,

I've also stumbled over this issue with the ARM fastmodel and, somewhat
embarrassingly, blamed the model developers for the regression. I'm using
NFS and copying ~14MB file from NFS to a virtio-blk device which takes
over 20 minutes with 4cd13c21b207, but <1 min with it reverted.

I also think I've figured out what's going on. See below.

On Fri, Nov 25, 2016 at 01:14:03PM +, Brian Starkey wrote:
> On Wed, Nov 23, 2016 at 12:03:28PM -0800, Eric Dumazet wrote:
> >On Wed, Nov 23, 2016 at 10:21 AM, Brian Starkey  
> >wrote:
> >
> >>This patch didn't help.
> >>
> >>I did get some new traces though - I've attached the diff for the
> >>trace_printks I added.
> >>
> >>Before 4cd13c21b207:
> >>https://drive.google.com/open?id=0B8siaK6ZjvEwcEtOeFQzTmY0Nnc
> >>After 4cd13c21b207:
> >>https://drive.google.com/open?id=0B8siaK6ZjvEwZnQ4MVg1d3d1Tm8
> >>
> >>It looks like the difference is that after 4cd13c21b207 the RX softirq
> >>isn't running, and RX interrupts don't call softirq_raise anymore -
> >>presumably because there's one pending, but I didn't have time to
> >>track that down to a code-path.
> >>
> >>Cheers,
> >>-Brian
> >>
> >
> >Hi Brian
> >
> >Looks like netif_rx() drops the incoming packets then ?
> >
> >Maybe netif_running() is not happy :(
> >
> >Could you trace netif_rx() return value (NET_RX_SUCCESS or NET_RX_DROP)
> 
> Some packets are dropped, but not very many:
> 
>   $ grep NET_RX_SUCCESS trace_netif_rx.txt | wc -l
>   14399
>   $ grep NET_RX_DROP trace_netif_rx.txt | wc -l
>   22
> 
> Without the ksoftirqd change there were zero NET_RX_DROPs.

The SMC91x has an on-chip 8KB FIFO (i.e. there's no DMA going on here).
When the FIFO is full (every 4 TCP packets in my case), we get an
interrupt and run down the smc_rcv path. There, we allocate an skb for
the data (netdev_alloc_skb) and copy the data out of the FIFO
(SMC_PULL_DATA) into the buffer, which we hand over the network core via
netif_rx.

The problem is that netif_rx defers to ksoftirqd to process the packet
and more crucially *free* the skb after it's been consumed. Since the
thing was allocated in IRQ context, we end up exhausting our GFP_ATOMIC
memory because ksoftirqd gets interrupted so frequently due to the tiny
FIFO depth that buffers are allocated at a much higher frequency than
they are freed. This may be exagerated by the relative speed of the model
emulated CPU with respect to the network interface, but I'd expect this
to be reproducible on real hardware too (rmk, cc'd, was going to give that
a go).

Prior to 4cd13c21b207, we'd always run softirqs synchronously on the
hardirq exit path and therefore have a chance to free some skbs before
actually EOI'ing the hardirq and allowing the FIFO-full interrupt to
interrupt us again.

Converting the smc91x driver over to NAPI would probably solve this problem,
but given the "vintage" of this code, I'd be more tempted by a simpler
point fix if only I could think of one.

Any ideas?

Will


Re: [PATCH v9 3/3] iio: adc: add support for Allwinner SoCs ADC

2017-02-06 Thread Jonathan Cameron
On 06/02/17 07:37, Quentin Schulz wrote:
> Hi Jonathan,
> 
> On 14/01/2017 20:28, Jonathan Cameron wrote:
>>
>>
>> On 14 January 2017 19:19:58 GMT+00:00, Quentin Schulz 
>>  wrote:
>>> Hi Jonathan,
>>>
>>> On 08/01/2017 12:17, Jonathan Cameron wrote:
 On 30/12/16 14:40, Jonathan Cameron wrote:
> On 13/12/16 14:33, Quentin Schulz wrote:
>> The Allwinner SoCs all have an ADC that can also act as a
>>> touchscreen
>> controller and a thermal sensor. This patch adds the ADC driver
>>> which is
>> based on the MFD for the same SoCs ADC.
>>
>> This also registers the thermal adc channel in the iio map array so
>> iio_hwmon could use it without modifying the Device Tree. This
>>> registers
>> the driver in the thermal framework.
>>
>> The thermal sensor requires the IP to be in touchscreen mode to
>>> return
>> correct values. Therefore, if the user is continuously reading the
>>> ADC
>> channel(s), the thermal framework in which the thermal sensor is
>> registered will switch the IP in touchscreen mode to get a
>>> temperature
>> value and requires a delay of 100ms (because of the mode
>>> switching),
>> then the ADC will switch back to ADC mode and requires also a delay
>>> of
>> 100ms. If the ADC readings are critical to user and the SoC
>>> temperature
>> is not, this driver is capable of not registering the thermal
>>> sensor in
>> the thermal framework and thus, "quicken" the ADC readings.
>>
>> This driver probes on three different platform_device_id to take
>>> into
>> account slight differences (registers bit and temperature
>>> computation)
>> between Allwinner SoCs ADCs.
>>
>> Signed-off-by: Quentin Schulz 
>> Acked-by: Maxime Ripard 
>> Acked-by: Jonathan Cameron 
>> Acked-for-MFD-by: Lee Jones 
> One comment inline but not a blocker.
>
> I would ideally like an ack from the thermal side.  The relevant
>>> code
> is small, but best to be sure and keep them in the loop as well.
>
> It does feel a little convoluted to have both this directly
>>> providing
> a thermal zone and being able to create one indirectly through hwmon
>>> as
> well but this solution works for me I think...
>
> Cc'd Zang and Eduardo.
 Nothing seems to have come through on that front.

 I need to get a pull request out to Greg and rebase my tree before I
>>> have
 the precursor patch in place. Give me a bump if you haven't heard
>>> anything by
 the time next week.

>>>
>>> Kindly "giving you a bump" you as requested since I haven't heard from
>>> you for a week.
>> Greg hasn't pulled yet, so may be a few more days.
>>
>> J
> 
> I haven't received any news from you on the merging of this patch series
> for a month, so kindly pinging.
Gah!  Sorry, I completely lost this one in my patch queue. Thanks for the
reminder.

Applied to the togreg branch of iio.git and pushed out as testing for the
autobuilders to play with it.

I'm afraid due to my tardiness it's missed the coming merge window, but
I'll make sure it goes in my first pull request for the next cycle.

Jonathan
> 
> Thanks,
> Quentin
> 
>>>
>>> Thanks,
>>> Quentin
>>>
 Thanks,

 Jonathan
>
> Jonathan
>> ---
>>
>> v9:
>>  - clarify comment on why we have to use the parent node as node
>>> for
>>  registering in thermal framework, (backward compatibility)
>>  - clarify comment on why we can disable CONFIG_THERMAL_OF,
>>  - clarify Kconfig help to say that CONFIG_THERMAL_OF can be
>>> disabled
>>  but should not in most cases,
>>  - make return value of devm_thermal_zone_of_sensor_register a
>>> local
>>  variable of the condition block,
>>  - correct scale from _PLUS_MICRO to _PLUS_NANO for ADC raw
>>> readings
>>  scale,
>>
>> v8:
>>  - remove Kconfig depends on !TOUCHSCREEN_SUN4I (moved to
>> MFD_SUN4I_GPADC),
>>  - fix return values of regmap_irq_get_virq and
>>> platform_get_irq_byname
>> stored in an unsigned int and then check if negative,
>>  - fix uninitialized ret value when an error occurs while
>>> registering
>> the thermal sensor in the framework,
>>
>> v7:
>>  - add Kconfig depends on !TOUCHSCREEN_SUN4I,
>>  - remove Kconfig selects THERMAL_OF,
>>  - do not register thermal sensor if CONFIG_THERMAL_OF is disabled,
>>  - disable irq in irq_handler rather than in read_raw,
>>  - add delay when switching the IP's mode or channel (delay
>>> empirically found),
>>  - quicken thermal sensor interrupt period,
>>  - add masks for channel bits,
>>  - fix deadlock in sun4i_gpadc_read if regmap_read/write fails,
>>  - move some logic from sun4i_gpadc_read to sun4i_prepare_for_irq,
>>  - mark last busy for runtime_pm only on success in
>>> sun4i_gpadc_read,
>>  - remove cached values,
>>  - increase wait_for_completion_timeout timeout to 1s to be sur

Re: [PATCH 4/6] xfs: use memalloc_nofs_{save,restore} instead of memalloc_noio*

2017-02-06 Thread Darrick J. Wong
On Mon, Feb 06, 2017 at 06:44:15PM +0100, Michal Hocko wrote:
> On Mon 06-02-17 07:39:23, Matthew Wilcox wrote:
> > On Mon, Feb 06, 2017 at 03:07:16PM +0100, Michal Hocko wrote:
> > > +++ b/fs/xfs/xfs_buf.c
> > > @@ -442,17 +442,17 @@ _xfs_buf_map_pages(
> > >   bp->b_addr = NULL;
> > >   } else {
> > >   int retried = 0;
> > > - unsigned noio_flag;
> > > + unsigned nofs_flag;
> > >  
> > >   /*
> > >* vm_map_ram() will allocate auxillary structures (e.g.
> > >* pagetables) with GFP_KERNEL, yet we are likely to be under
> > >* GFP_NOFS context here. Hence we need to tell memory reclaim
> > > -  * that we are in such a context via PF_MEMALLOC_NOIO to prevent
> > > +  * that we are in such a context via PF_MEMALLOC_NOFS to prevent
> > >* memory reclaim re-entering the filesystem here and
> > >* potentially deadlocking.
> > >*/
> > 
> > This comment feels out of date ... how about:
> 
> which part is out of date?
> 
> > 
> > /*
> >  * vm_map_ram will allocate auxiliary structures (eg page
> >  * tables) with GFP_KERNEL.  If that tries to reclaim memory
> >  * by calling back into this filesystem, we may deadlock.
> >  * Prevent that by setting the NOFS flag.
> >  */
> 
> dunno, the previous wording seems clear enough to me. Maybe little bit
> more chatty than yours but I am not sure this is worth changing.

I prefer to keep the "...yet we are likely to be under GFP_NOFS..."
wording of the old comment because it captures the uncertainty of
whether or not we actually are already under NOFS.  If someone actually
has audited this code well enough to know for sure then yes let's change
the comment, but I haven't gone that far.

The way the kmem_zalloc_large code is structured suggests to me that
callers don't have to be especially aware of the NOFS state -- they can
just call the function and it'll take care of making it work.

> > 
> > > - noio_flag = memalloc_noio_save();
> > > + nofs_flag = memalloc_nofs_save();
> > >   do {
> > >   bp->b_addr = vm_map_ram(bp->b_pages, bp->b_page_count,
> > >   -1, PAGE_KERNEL);
> > 
> > Also, I think it shows that this is the wrong place in XFS to be calling
> > memalloc_nofs_save().  I'm not arguing against including this patch;
> > it's a step towards where we want to be.  I also don't know XFS well
> > enough to know where to set that flag ;-)  Presumably when we start a
> > transaction ... ?

None of the current kmem_zalloc_large callers actually have a
transaction, at least not at that point.

> Yes that is what I would like to achieve longterm. And the reason why I
> didn't want to mimic this pattern in kvmalloc as some have suggested.
> It just takes much more time to get there from the past experience and
> we should really start somewhere.

--D

> -- 
> Michal Hocko
> SUSE Labs
> --
> To unsubscribe from this list: send the line "unsubscribe linux-xfs" in
> the body of a message to majord...@vger.kernel.org
> More majordomo info at  http://vger.kernel.org/majordomo-info.html


Re: [PATCH] scatterlist: don't overflow length field

2017-02-06 Thread David Dillow
+Jens, Christoph, and Ming based on off-list suggestion

On Fri, Feb 3, 2017 at 11:57 AM, Linus Torvalds
 wrote:
>
> On Wed, Feb 1, 2017 at 1:29 PM, David Dillow  wrote:
> > When called with a region of contiguous pages totaling > 4 GB of memory,
> > sg_alloc_table_from_pages() will overflow the length field, leading to a
> > corrupt scatter list. Fix this by tracking the number of pages we've
> > merged and start a new chunk when we would overflow.
>
> So what allows these things to be built in the first place?
>
> We limit IO sizes to fit in a signed int (so just below 2GB) not only
> because it's often an effective denial of service, but also because
> we've had issues with various drivers (and filesystems) getting
> int/long wrong.
>
> So nothing should be building those kinds of scatterlists, and it
> something is able to, it might result in other problems downstreams..

This isn't from normal read/write IO -- some applications want to
access large amounts
of userspace memory directly from hardware, and it is cleaner for them
to manage one
mapping than multiple 1GB or 2GB mappings -- assuming the hardware can even
support multiple mappings. If they have room in their container to
allocate and pin the
memory, we'd like to allow it.

There's definitely potential for problems downstream, even without
going through the
filesystems and block layers -- we noticed this potential issue while
tracking down an
bug in the IOMMU code when an entry in the list was over 1GB. We still
see a benefit
from building the large entries, though -- it allows superpages in the
IOMMU mapping
which helps the IOTLB cache.

We currently use sg_alloc_table_from_pages() to build the scatterlist
for dma_map_sg()
but we could do it ourselves if you'd rather add a length limit to the
more general code.


Re: [PATCH] platform/x86: intel_pmc_ipc: read s0ix residency API

2017-02-06 Thread Rajneesh Bhardwaj
On Mon, Feb 06, 2017 at 10:01:43PM +0800, kbuild test robot wrote:
> Hi Shanth,
> 
> [auto build test ERROR on tip/x86/core]
> [also build test ERROR on v4.10-rc7]
> [if your patch is applied to the wrong git tree, please drop us a note to 
> help improve the system]
> 
> url:
> https://github.com/0day-ci/linux/commits/Shanth-Murthy/platform-x86-intel_pmc_ipc-read-s0ix-residency-API/20170206-213213
> config: i386-randconfig-x007-201706 (attached as .config)
> compiler: gcc-6 (Debian 6.2.0-3) 6.2.0 20160901
> reproduce:
> # save the attached .config to linux build tree
> make ARCH=i386 
> 
> All errors (new ones prefixed by >>):
> 
>drivers/platform/x86/intel_pmc_ipc.c: In function 'gcr_data_readq':
> >> drivers/platform/x86/intel_pmc_ipc.c:193:9: error: implicit declaration of 
> >> function 'readq' [-Werror=implicit-function-declaration]
>  return readq(ipcdev.ipc_base + offset);
> ^
>cc1: some warnings being treated as errors
> 
> vim +/readq +193 drivers/platform/x86/intel_pmc_ipc.c
> 
>187{
>188return readl(ipcdev.ipc_base + IPC_READ_BUFFER + 
> offset);
>189}
>190
>191static inline u64 gcr_data_readq(u32 offset)
>192{
>  > 193return readq(ipcdev.ipc_base + offset);

Seems readq is not available for 32 bit configuration. Will send a fix soon.

>194}
>195
>196static int intel_pmc_ipc_check_status(void)
> 
> ---
> 0-DAY kernel test infrastructureOpen Source Technology Center
> https://lists.01.org/pipermail/kbuild-all   Intel Corporation



-- 
Best Regards,
Rajneesh


Re: [PATCH 0/3 v2] PM / docs: linux/pm.h kerneldocs update and conversion of two docs to reST

2017-02-06 Thread Jonathan Corbet
On Thu, 02 Feb 2017 01:30:08 +0100
"Rafael J. Wysocki"  wrote:

> This is a new (and hopefully final) iteration of the series of patches 
> starting
> the conversion of power management driver API documentation.
> 
> Patch [1/3] updates kerneldoc comments in include/linux/pm.h (to make the
> documentation generated out of them look better), patch [2/3] converts
> Documentation/power/devices.txt to reST, and patch [3/3] does the same thing
> to Documentation/power/notifiers.txt (the new files go into the driver-api 
> subdir
> and are hooked up to that manual).
> 
> Patch [1/3] has not been changed since the previous iteration.  Patch [2/3]
> addresses some comments from Lukas and patch [3/3] has been trivially
> rebased.
> 
> Please apply unless there are any objections.

Just applied them, thanks.

One thing I do notice is that, while you update the kerneldoc comments,
nothing in the associated documentation pulls them in.  That would be a
good future enhancement...:)

Thanks,

jon


Re: [PATCH] MAINTAINERS: drop broken reference to i2c/trivial-devices

2017-02-06 Thread Jonathan Cameron
On 06/02/17 16:54, Wolfram Sang wrote:
> Due to RST rework, the reference to i2c/trivial-devices was changed, but the
> result is broken. However, let's just drop the whole reference, since it
> doesn't make sense in the first place to reference this "global" file
> for a single driver.
> 
> Fixes: 8c27ceff3604b2 ("docs: fix locations of several documents that got 
> moved")
> Signed-off-by: Wolfram Sang 
> Cc: Mauro Carvalho Chehab 
> Cc: Kevin Tsai 
Applied to the togreg branch of iio.git.

Thanks,

Jonathan
> Cc: Jonathan Cameron 
> ---
>  MAINTAINERS | 1 -
>  1 file changed, 1 deletion(-)
> 
> diff --git a/MAINTAINERS b/MAINTAINERS
> index 023e4a8c3d104d..3c67142f17e913 100644
> --- a/MAINTAINERS
> +++ b/MAINTAINERS
> @@ -2974,7 +2974,6 @@ CAPELLA MICROSYSTEMS LIGHT SENSOR DRIVER
>  M:   Kevin Tsai 
>  S:   Maintained
>  F:   drivers/iio/light/cm*
> -F:   Documentation/devicetree/bindings/i2c/trivial-admin-guide/devices.rst
>  
>  CAVIUM I2C DRIVER
>  M:   Jan Glauber 
> 



Re: [PATCH 4/4] tools include: Fix include path for uapi/asm-generic/mman.h

2017-02-06 Thread Arnaldo Carvalho de Melo
Em Wed, Feb 01, 2017 at 10:38:04PM -0800, David Carrillo-Cisneros escreveu:
> commit f3539c12d819 ("tools include: Add uapi mman.h for each architecture")
> 
> copied include/uapi/linux/mman.h into tools/include/uapi/linux/mman.h
> but did not update the include path for uapi/asm-generic/mman.h. Fix it.

What problem is this fixing?

I.e. one is supposed to include , get the
tools/include/uapi/linux/mman.h and it in turn will include the per-arch
mman.h, say tools/arch/sparc/include/uapi/asm/mman.h, that in turn will
include uapi/asm-generic/mman-common.h after defining its diverging
bits.

Some, like the ones for the Alpha and MIPS architectures, will not even
include mman-common.h, so diverging they are.

Lots of arches, like s390, will just include uapi/asm-generic/mman.h
from its mman.h file (tools/arch/s390/include/uapi/asm/mman.h), just
like you'd do for _all_ arches in your patch below.

tools/perf/Makefile.config is the one doing:

INC_FLAGS += -I$(srctree)/tools/arch/$(ARCH)/include/

and:

[acme@jouet linux]$ ls tools/arch/*/include/uapi/asm/mman.h
tools/arch/alpha/include/uapi/asm/mman.h
tools/arch/ia64/include/uapi/asm/mman.h
tools/arch/s390/include/uapi/asm/mman.h
tools/arch/arc/include/uapi/asm/mman.h  
tools/arch/m32r/include/uapi/asm/mman.h
tools/arch/score/include/uapi/asm/mman.h
tools/arch/arm64/include/uapi/asm/mman.h
tools/arch/microblaze/include/uapi/asm/mman.h  
tools/arch/sh/include/uapi/asm/mman.h
tools/arch/arm/include/uapi/asm/mman.h  
tools/arch/mips/include/uapi/asm/mman.h
tools/arch/sparc/include/uapi/asm/mman.h
tools/arch/frv/include/uapi/asm/mman.h  
tools/arch/mn10300/include/uapi/asm/mman.h 
tools/arch/tile/include/uapi/asm/mman.h
tools/arch/h8300/include/uapi/asm/mman.h
tools/arch/parisc/include/uapi/asm/mman.h  
tools/arch/x86/include/uapi/asm/mman.h
tools/arch/hexagon/include/uapi/asm/mman.h  
tools/arch/powerpc/include/uapi/asm/mman.h 
tools/arch/xtensa/include/uapi/asm/mman.h
[acme@jouet linux]$

Am I missing something?

- Arnaldo
 
> Signed-off-by: David Carrillo-Cisneros 
> ---
>  tools/include/uapi/linux/mman.h | 2 +-
>  1 file changed, 1 insertion(+), 1 deletion(-)
> 
> diff --git a/tools/include/uapi/linux/mman.h b/tools/include/uapi/linux/mman.h
> index 81d8edf11789..74a19ae15cd1 100644
> --- a/tools/include/uapi/linux/mman.h
> +++ b/tools/include/uapi/linux/mman.h
> @@ -1,7 +1,7 @@
>  #ifndef _UAPI_LINUX_MMAN_H
>  #define _UAPI_LINUX_MMAN_H
>  
> -#include 
> +#include 
>  
>  #define MREMAP_MAYMOVE   1
>  #define MREMAP_FIXED 2
> -- 
> 2.11.0.483.g087da7b7c-goog


[PATCH] cpumask: use nr_cpumask_bits for parsing functions

2017-02-06 Thread Tejun Heo
513e3d2d11c9 ("cpumask: always use nr_cpu_ids in formatting and
parsing functions") converted both cpumask printing and parsing
functions to use nr_cpu_ids instead of nr_cpumask_bits.  While this
was okay for the printing functions as it just picked one of the two
output formats that we were alternating between depending on a kernel
config, doing the same for parsing wasn't okay.

nr_cpumask_bits can be either nr_cpu_ids or NR_CPUS.  We can always
use nr_cpu_ids but that is a variable while NR_CPUS is a constant, so
it can be more efficient to use NR_CPUS when we can get away with it.
Converting the printing functions to nr_cpu_ids makes sense because it
affects how the masks get presented to userspace and doesn't break
anything; however, using nr_cpu_ids for parsing functions can
incorrectly leave the higher bits uninitialized while reading in these
masks from userland.  As all testing and comparison functions use
nr_cpumask_bits which can be larger than nr_cpu_ids, the parsed
cpumasks can erroneously yield false negative results.

This made the taskstats interface incorrectly return -EINVAL even when
the inputs were correct.

Fix it by restoring the parse functions to use nr_cpumask_bits instead
of nr_cpu_ids.

Signed-off-by: Tejun Heo 
Fixes: 513e3d2d11c9 ("cpumask: always use nr_cpu_ids in formatting and parsing 
functions")
Cc: sta...@vger.kernel.org # v4.0+
Reported-by: Martin Steigerwald 
Debugged-by: Ben Hutchings 
---
 include/linux/cpumask.h |8 
 1 file changed, 4 insertions(+), 4 deletions(-)

diff --git a/include/linux/cpumask.h b/include/linux/cpumask.h
index c717f5e..b3d2c1a 100644
--- a/include/linux/cpumask.h
+++ b/include/linux/cpumask.h
@@ -560,7 +560,7 @@ static inline void cpumask_copy(struct cpumask *dstp,
 static inline int cpumask_parse_user(const char __user *buf, int len,
 struct cpumask *dstp)
 {
-   return bitmap_parse_user(buf, len, cpumask_bits(dstp), nr_cpu_ids);
+   return bitmap_parse_user(buf, len, cpumask_bits(dstp), nr_cpumask_bits);
 }
 
 /**
@@ -575,7 +575,7 @@ static inline int cpumask_parselist_user(const char __user 
*buf, int len,
 struct cpumask *dstp)
 {
return bitmap_parselist_user(buf, len, cpumask_bits(dstp),
-nr_cpu_ids);
+nr_cpumask_bits);
 }
 
 /**
@@ -590,7 +590,7 @@ static inline int cpumask_parse(const char *buf, struct 
cpumask *dstp)
char *nl = strchr(buf, '\n');
unsigned int len = nl ? (unsigned int)(nl - buf) : strlen(buf);
 
-   return bitmap_parse(buf, len, cpumask_bits(dstp), nr_cpu_ids);
+   return bitmap_parse(buf, len, cpumask_bits(dstp), nr_cpumask_bits);
 }
 
 /**
@@ -602,7 +602,7 @@ static inline int cpumask_parse(const char *buf, struct 
cpumask *dstp)
  */
 static inline int cpulist_parse(const char *buf, struct cpumask *dstp)
 {
-   return bitmap_parselist(buf, cpumask_bits(dstp), nr_cpu_ids);
+   return bitmap_parselist(buf, cpumask_bits(dstp), nr_cpumask_bits);
 }
 
 /**


Re: [PATCH 11/89] sched/headers, delayacct: Move the 'struct task_delay_info' definition from to

2017-02-06 Thread Linus Torvalds
On Mon, Feb 6, 2017 at 5:28 AM, Ingo Molnar  wrote:
> The 'struct task_delay_info' definition does not have to be in sched.h,
> because task_struct only has a pointer to it.
>
> So move it to  to reduce the size of .
>
> As an additional improvement make the type defined but empty in the
> !CONFIG_TASK_DELAY_ACCT case - to eliminate the ugly #ifdef
> around the task_struct field as well.

No. This is completely wrong.

Even if the structure is empty, the _pointer_ to the structure is not.
So now you removed the #ifdef, and the 'struct task_struct' becomes
unconditionally (and pointlessly) larger.

So your removal if the #ifdef and making that structure empty is
completely pointless: it wastes exactly the same amount of space even
when it is empty, because that pointer stays around and is not an
empty pointer.

In general, I heartily approve of the sched.h split-up, but quite
frankly, when there are almost a hundred patches, and a lot of them
are pure code movement (so they are *big*, and essentially impossible
to actually confirm), I *really* really think that this patch series
should be re-done so that it does *not* make these kinds of "clever"
changes.

I'd be much happier if the cleanups were all completely non-semantic.
Nothing like this. At least in the big patch series.

Then you can have a separate series that does things that isn't just
about code movement.

Ok?

Because these emails aren't easy to read as-is (well, part of them are
obvious, but others are "move a hundreds of lines from one file to
another").

And having to worry about "oh, and btw, hidden in the movement is this
small semantic change that may or may not be completely and utterly
bogus" makes it much much worse.

  Linus


Re: [v5,3/7] rtc: ac100: Add RTC driver for X-Powers AC100

2017-02-06 Thread Rask Ingemann Lambertsen
On Mon, Feb 06, 2017 at 11:29:07AM +0800, Chen-Yu Tsai wrote:
> On Mon, Feb 6, 2017 at 5:33 AM, Rask Ingemann Lambertsen
>  wrote:
> >  >8 
> > Subject: [PATCH] ARM: sunxi_defconfig: Enable AC100 RTC driver
> >
> > Enable the AC100 RTC driver so boards with it can keep track of time.
> >
> > Signed-off-by: Rask Ingemann Lambertsen 
> 
> Acked-by: Chen-Yu Tsai 
> 
> I suggest you send this in a new thread. Patches in replies tend
> to get missed.

I will. Should I Cc: stable on this?

-- 
Rask Ingemann Lambertsen


Re: [PATCH 3/5] drm: convert drivers to use of_graph_get_remote_node

2017-02-06 Thread Russell King - ARM Linux
On Mon, Feb 06, 2017 at 05:55:33PM +, Liviu Dudau wrote:
> OK, I will fix the driver if Rob's patch still requires it.

I don't think you ever needed it.  As Rob says, what you're testing
won't ever change unless you're using overlays - it's certainly not
dependent on the tda998x module being loaded or not, or even the
tda998x driver being bound or not.

-- 
RMK's Patch system: http://www.armlinux.org.uk/developer/patches/
FTTC broadband for 0.8mile line: currently at 9.6Mbps down 400kbps up
according to speedtest.net.


Re: pciehp is broken from 4.10-rc1

2017-02-06 Thread Bjorn Helgaas
On Sun, Feb 05, 2017 at 08:34:54AM +0100, Lukas Wunner wrote:
> On Sat, Feb 04, 2017 at 08:22:59PM -0800, Yinghai Lu wrote:
> > On Sat, Feb 4, 2017 at 3:34 PM, Lukas Wunner  wrote:
> > > On Sat, Feb 04, 2017 at 01:44:34PM -0800, Yinghai Lu wrote:
> > >> On Sat, Feb 4, 2017 at 10:56 AM, Lukas Wunner  wrote:
> > >> > On Sat, Feb 04, 2017 at 09:12:54AM +0100, Lukas Wunner wrote:
> > >> > Section 6.7.3.4 of the PCIe Base spec seems to support the theory 
> > >> > above,
> > >> > so here's a tentative patch.
> > >> >
> > >> > -- >8 --
> > >> > Subject: [PATCH] PCI: pciehp: Don't enable PME on runtime suspend
> > >>
> > >> it works:
> > >
> > > Thanks a lot for the report and for testing the patch!
> > 
> > Wait, Commit 68db9bc still has problem with another server (skylake
> > based), and this patch does not help.
> [...]
> > sca05-0a81fd8d:~ # echo 1 > /sys/bus/pci/slots/11/power
> > [  375.376609] pci_hotplug: power_write_file: power = 1
> > [  375.382175] pciehp :b3:00.0:pcie004: pciehp_get_power_status: 
> > SLOTCTRL a8 value read 17f1
> > [  375.392695] pciehp :b3:00.0:pcie004: pending interrupts 0x0010 from 
> > Slot Status
> > [  375.401370] pciehp :b3:00.0:pcie004: pciehp_power_on_slot: SLOTCTRL 
> > a8 write cmd 0
> > [  375.410231] pciehp :b3:00.0:pcie004: pciehp_green_led_blink: 
> > SLOTCTRL a8 write cmd 200
> > [  375.411071] pciehp :b3:00.0:pcie004: pending interrupts 0x0010 from 
> > Slot Status
> > [  375.445222] pciehp :b3:00.0:pcie004: pending interrupts 0x0010 from 
> > Slot Status
> > [  377.00] pciehp :b3:00.0:pcie004: Data Link Layer Link Active not 
> > set in 1000 msec
> > [  378.960364] pci :b4:00.0 id reading try 50 times with interval 20 ms 
> > to get 
> > [  378.969406] pciehp :b3:00.0:pcie004: pciehp_check_link_status: 
> > lnk_status = 5001
> > [  378.978059] pciehp :b3:00.0:pcie004: link training error: status 
> > 0x5001
> > [  378.985834] pciehp :b3:00.0:pcie004: Failed to check link status
> > [  378.987185] pciehp :b3:00.0:pcie004: pending interrupts 0x0010 from 
> > Slot Status
> > [  378.987253] pciehp :b3:00.0:pcie004: pciehp_power_off_slot: SLOTCTRL 
> > a8 write cmd 400
> > [  380.000409] pciehp :b3:00.0:pcie004: pciehp_green_led_off: SLOTCTRL 
> > a8 write cmd 300
> > [  380.000674] pciehp :b3:00.0:pcie004: pending interrupts 0x0010 from 
> > Slot Status
> > [  380.018020] pciehp :b3:00.0:pcie004: pciehp_set_attention_status: 
> > SLOTCTRL a8 write cmd 40
> > [  380.019053] pciehp :b3:00.0:pcie004: pending interrupts 0x0010 from 
> > Slot Status
> 
> So on this Skylake machine link training fails after resuming from D3hot
> to D0.
> 
> One thing that's a bit fishy is that normally the Link Disable bit is
> cleared when powering on the slot.  This results in a debug message
> in dmesg containg the string "lnk_ctrl = ", and that line is missing
> from the output you've pasted above, suggesting that the machine is
> not running a stock v4.10 kernel after all but something else.  Could
> you check why this message is not printed?  Could you check with lspci
> if the Link Disable bit is set before you invoke "echo 1"?
> 
> This is the call stack:
> pciehp_sysfs_enable_slot()
>   pciehp_enable_slot()
> board_added()
>   pciehp_power_on_slot()
> pciehp_link_enable()
>   __pciehp_link_set()
> 
> Another theory is that the link is generally unreliable on this machine
> since the Link Bandwidth Management Status bit is set in the Link Status
> Register ("lnk_status = 5001"), which according to the spec means:
> 
> "Hardware has changed Link speed or width to attempt to correct unreliable
> Link operation, either through an LTSSM timeout or a higher level process.
> This bit must be set if the Physical Layer reports a speed or width change
> was initiated by the Downstream component that was not indicated as an
> autonomous change."
> 
> In this case it would be good to know which hardware exactly we're dealing
> with so that we might quirk it to not runtime suspend the port.  To that
> end, could you attach a full dmesg log to the bugzilla entry I've created?
> https://bugzilla.kernel.org/show_bug.cgi?id=193951
> 
> @Mika, Rafael: Are you aware of Skylake machines with unreliable link
> training, or perhaps errata of Skylake chips related to link training
> on hotplug ports?

I think we're prematurely leaping to the conclusion that this is a
hardware erratum.  I don't have nearly the confidence that pciehp is
handling this correctly that you seem to have.

If this is a hardware erratum, we should be able to turn off
CONFIG_HOTPLUG_PCI_PCIE and drive through this scenario manually with
setpci.  That sequence would be immensely helpful to any hardware
engineers who want to investigate this.

I'm hesitant to add a quirk until we have a better understanding of
what's going on.  Yinghai tripped over this one broken case, but I
don't have any reason to believe that's the only one.

B

[PATCH v2] Staging: omap4iss: Fix coding style issues

2017-02-06 Thread Avraham Shukron
Fixes line-over-80-characters issues as well as multiline comments style.

Signed-off-by: Avraham Shukron 

---
 drivers/staging/media/omap4iss/iss_video.c | 41 --
 1 file changed, 27 insertions(+), 14 deletions(-)

diff --git a/drivers/staging/media/omap4iss/iss_video.c 
b/drivers/staging/media/omap4iss/iss_video.c
index c16927a..42de513 100644
--- a/drivers/staging/media/omap4iss/iss_video.c
+++ b/drivers/staging/media/omap4iss/iss_video.c
@@ -128,7 +128,8 @@ static unsigned int iss_video_mbus_to_pix(const struct 
iss_video *video,
pix->width = mbus->width;
pix->height = mbus->height;
 
-   /* Skip the last format in the loop so that it will be selected if no
+   /*
+* Skip the last format in the loop so that it will be selected if no
 * match is found.
 */
for (i = 0; i < ARRAY_SIZE(formats) - 1; ++i) {
@@ -138,7 +139,8 @@ static unsigned int iss_video_mbus_to_pix(const struct 
iss_video *video,
 
min_bpl = pix->width * ALIGN(formats[i].bpp, 8) / 8;
 
-   /* Clamp the requested bytes per line value. If the maximum bytes per
+   /*
+* Clamp the requested bytes per line value. If the maximum bytes per
 * line value is zero, the module doesn't support user configurable line
 * sizes. Override the requested value with the minimum in that case.
 */
@@ -172,7 +174,8 @@ static void iss_video_pix_to_mbus(const struct 
v4l2_pix_format *pix,
mbus->width = pix->width;
mbus->height = pix->height;
 
-   /* Skip the last format in the loop so that it will be selected if no
+   /*
+* Skip the last format in the loop so that it will be selected if no
 * match is found.
 */
for (i = 0; i < ARRAY_SIZE(formats) - 1; ++i) {
@@ -298,7 +301,8 @@ iss_video_check_format(struct iss_video *video, struct 
iss_video_fh *vfh)
 
 static int iss_video_queue_setup(struct vb2_queue *vq,
 unsigned int *count, unsigned int *num_planes,
-unsigned int sizes[], struct device 
*alloc_devs[])
+unsigned int sizes[],
+struct device *alloc_devs[])
 {
struct iss_video_fh *vfh = vb2_get_drv_priv(vq);
struct iss_video *video = vfh->video;
@@ -360,7 +364,8 @@ static void iss_video_buf_queue(struct vb2_buffer *vb)
 
spin_lock_irqsave(&video->qlock, flags);
 
-   /* Mark the buffer is faulty and give it back to the queue immediately
+   /*
+* Mark the buffer is faulty and give it back to the queue immediately
 * if the video node has registered an error. vb2 will perform the same
 * check when preparing the buffer, but that is inherently racy, so we
 * need to handle the race condition with an authoritative check here.
@@ -443,7 +448,8 @@ struct iss_buffer *omap4iss_video_buffer_next(struct 
iss_video *video)
 
buf->vb.vb2_buf.timestamp = ktime_get_ns();
 
-   /* Do frame number propagation only if this is the output video node.
+   /*
+* Do frame number propagation only if this is the output video node.
 * Frame number either comes from the CSI receivers or it gets
 * incremented here if H3A is not active.
 * Note: There is no guarantee that the output buffer will finish
@@ -605,7 +611,8 @@ iss_video_set_format(struct file *file, void *fh, struct 
v4l2_format *format)
 
mutex_lock(&video->mutex);
 
-   /* Fill the bytesperline and sizeimage fields by converting to media bus
+   /*
+* Fill the bytesperline and sizeimage fields by converting to media bus
 * format and back to pixel format.
 */
iss_video_pix_to_mbus(&format->fmt.pix, &fmt);
@@ -678,8 +685,9 @@ iss_video_get_selection(struct file *file, void *fh, struct 
v4l2_selection *sel)
if (subdev == NULL)
return -EINVAL;
 
-   /* Try the get selection operation first and fallback to get format if 
not
-* implemented.
+   /*
+* Try the get selection operation first and fallback to get format if
+* not implemented.
 */
sdsel.pad = pad;
ret = v4l2_subdev_call(subdev, pad, get_selection, NULL, &sdsel);
@@ -867,7 +875,8 @@ iss_video_streamon(struct file *file, void *fh, enum 
v4l2_buf_type type)
 
mutex_lock(&video->stream_lock);
 
-   /* Start streaming on the pipeline. No link touching an entity in the
+   /*
+* Start streaming on the pipeline. No link touching an entity in the
 * pipeline can be activated or deactivated once streaming is started.
 */
pipe = entity->pipe
@@ -895,7 +904,8 @@ iss_video_streamon(struct file *file, void *fh, enum 
v4l2_buf_type type)
while ((entity = media_entity_graph_walk_next(&graph)))
media_entity_enum_set(&pipe->ent_enum, entity);
 
-   /* Verify 

Re: [PATCH] Documentation: devicetree: Add PHY no lane swap binding

2017-02-06 Thread Florian Fainelli
On 02/06/2017 06:15 AM, Lukasz Majewski wrote:
> Hi Florian, Andrew,
> 
>> Le 02/04/17 à 09:23, Andrew Lunn a écrit :
>>> On Sat, Feb 04, 2017 at 04:47:47PM +0100, Lukasz Majewski wrote:
 Add the documentation to avoid PHY lane swapping. This is a boolean
 entry to notify the phy device drivers that the TX/RX lanes NO need
>>>
>>> that the TX/RX lanes should not be swapped.
>>>
 to be swapped.
 The use case for this binding mostly happens after wrong HW
 configuration of PHY IC during bootstrap.

 Signed-off-by: Lukasz Majewski 
 ---
  Documentation/devicetree/bindings/net/phy.txt | 4 
  1 file changed, 4 insertions(+)

 diff --git a/Documentation/devicetree/bindings/net/phy.txt
 b/Documentation/devicetree/bindings/net/phy.txt index
 fb5056b..5e25bc9 100644 ---
 a/Documentation/devicetree/bindings/net/phy.txt +++
 b/Documentation/devicetree/bindings/net/phy.txt @@ -39,6 +39,10 @@
 Optional Properties:
  - enet-phy-lane-swap: If set, indicates the PHY will swap the
 TX/RX lanes to compensate for the board being designed with the
 lanes swapped. 
 +- enet-phy-lane-no-swap: If set, indicates that PHY will disable
 swap of the
 +  TX/RX lanes. This binding allows the PHY to work correcly after
 e.g. wrong
 +  bootstrap configuration caused by issues in PCB layout design.
>>
>> s/binding/property/
>>
 +
>>>
>>> We are leaving it undefined what it means if neither
>>> enet-phy-lane-no-swap nor enet-phy-lane-swap properties are present.
>>> Do we want to define this? That the swap should be left untouched by
>>> the driver?
>>
>> Since this is a description of the hardware, absence of a properties
>> should mean that the driver is at freedom to either keep the hardware
>> defaults, or come up with its own settings that are sensible for that
>> particular PHY device.
>>
>> What would you see clarified here?
> 
> Any more comments to this patch?
> 
> Is the explanation informative enough?

Please resubmit as part of your PHY driver changes, and change binding
in the description above for property.
-- 
Florian


Re: [PATCH 3/5] drm: convert drivers to use of_graph_get_remote_node

2017-02-06 Thread Liviu Dudau
On Mon, Feb 06, 2017 at 05:34:07PM +, Russell King - ARM Linux wrote:
> On Mon, Feb 06, 2017 at 05:23:06PM +, Liviu Dudau wrote:
> > On Mon, Feb 06, 2017 at 11:09:49AM -0600, Rob Herring wrote:
> > > On Mon, Feb 06, 2017 at 10:29:33AM +, Liviu Dudau wrote:
> > > > On Fri, Feb 03, 2017 at 09:36:33PM -0600, Rob Herring wrote:
> > > > > - /* add the remote encoder port as component */
> > > > > - port = of_graph_get_remote_port_parent(ep);
> > > > > - of_node_put(ep);
> > > > > - if (!port || !of_device_is_available(port)) {
> > > > > - of_node_put(port);
> > > > > - return -EAGAIN;
> > > > 
> > > > The HDLCD change looks reasonable except for this -EAGAIN business. 
> > > > I'll have to
> > > > test your changes on my setup to see how this affects having the 
> > > > encoder as a module.
> > > 
> > > What are you expecting to happen with -EAGAIN? This one was a bit of an 
> > > oddball.
> > 
> > When both the HDLCD and the TDA998x drivers are compiled as modules, the
> > order in which they are inserted can be somewhat random (due to testing).
> 
> Not really "due to testing" but if you run a real distro, they tend to
> have a multi-threaded behaviour when loading kernel modules at boot.

Yeah, a lot of times I'm using a toy "distribution" (buildroot) as it boots
faster under ARM models than a "real" (read systemd-based) distro would.

> 
> > It is at that time when you want the probe of HDLCD to be retried on the
> > insmod-ing of the tda998x.ko rather than fail entirely.
> 
> -EAGAIN doesn't get you that, and in any case, solving that problem is
> exactly why the component API exists - so that DRM only comes up once
> all the necessary components are available.
> 
> -EAGAIN also doesn't get you that from inside a probe function - such
> an error will be reported in the kernel log, and no further action
> will be taken (the device driver probe will be failed, and not
> automatically retried.

I stand corrected on the behaviour of the driver then. That was the original
intent, to generate a re-probe of the driver.

> 
> The only case that we automatically retry is if a driver returns
> -EPROBE_DEFER.  Everything else causes a probe failure.

OK, I will fix the driver if Rob's patch still requires it.

Best regards,
Liviu

> 
> -- 
> RMK's Patch system: http://www.armlinux.org.uk/developer/patches/
> FTTC broadband for 0.8mile line: currently at 9.6Mbps down 400kbps up
> according to speedtest.net.

-- 

| I would like to |
| fix the world,  |
| but they're not |
| giving me the   |
 \ source code!  /
  ---
¯\_(ツ)_/¯


Re: [PATCH v4 2/2] staging: lustre: move else if statement to a single line

2017-02-06 Thread Joe Perches
On Mon, 2017-02-06 at 11:13 -0500, Maksymilian Piechota wrote:
> diff --git a/drivers/staging/wlan-ng/prism2mgmt.c 
> b/drivers/staging/wlan-ng/prism2mgmt.c
[]
> @@ -1307,8 +1307,7 @@ int prism2mgmt_wlansniff(struct wlandevice *wlandev, 
> void *msgp)
>   && (msg->prismheader.data == P80211ENUM_truth_true)) {
>   hw->sniffhdr = 0;
>   wlandev->netdev->type = ARPHRD_IEEE80211_PRISM;
> - } else
> - if ((msg->wlanheader.status == 
> P80211ENUM_msgitem_status_data_ok) && 
> + } else if ((msg->wlanheader.status == 
> P80211ENUM_msgitem_status_data_ok) &&
>   (msg->wlanheader.data == P80211ENUM_truth_true)) {
>   hw->sniffhdr = 1;
>   wlandev->netdev->type = ARPHRD_IEEE80211_PRISM;

Hi again.

When you change the else if on multiple lines to a single line,
you should also realign the continuation like:

} else if ((msg->wlanheader.status == 
P80211ENUM_msgitem_status_data_ok) &&
   (msg->wlanheader.data == P80211ENUM_truth_true)) {


Using 3 tabs then 3 spaces


Re: [PATCH v2] PCI: pciehp: Don't enable PME on runtime suspend

2017-02-06 Thread Bjorn Helgaas
On Mon, Feb 06, 2017 at 06:54:37AM +0100, Lukas Wunner wrote:
> Since commit 68db9bc81436 ("PCI: pciehp: Add runtime PM support for PCIe
> hotplug ports") we runtime suspend a hotplug port to D3hot when all its
> children are runtime suspended or none are present.
> 
> When runtime suspending the port the PCI core automatically enables PME:
> pci_pm_runtime_suspend()
> pci_finish_runtime_suspend()
> __pci_enable_wake()
> 
> According to the PCI Express Base Specification, section 6.7.3.4:
>"Note that PME and Hot-Plug Event interrupts (when both are
> implemented) always share the same MSI or MSI-X vector [...]
> If wake generation is required by the associated form factor
> specification, a hot-plug capable Downstream Port must support
> generation of a wakeup event (using the PME mechanism) on hotplug
> events that occur when the system is in a sleep state or the Port
> is in device state D1, D2, or D3Hot."
> 
> Thus, if the port is runtime suspended even though it is still occupied,
> it may immediately be woken by a PME interrupt.  

The spec goes on to say that a wakeup event should be generated when
all three of these conditions occur:

  - status register for an enabled [hotplug] event transitions from
not set to set

  - Port is in D1, D2, or D3hot,

  - PME_En is set

I think you're saying that if we put a hotplug-capable port that
controls an occupied slot into D3hot, the port may immediately
generate a wakeup PME.

What is the hotplug event that causes generation of this wakeup event?

> One scenario where this
> happens is if all children of the hotplug port have runtime suspended.
> Another scenario is power control via sysfs:  If a user manually turns
> the hotplug port off (e.g. to safely remove the card), PME will signal
> an interrupt for the still-occupied slot, which is interpreted by pciehp
> as re-insertion of a card.  As a result, power control via sysfs is no
> longer possible.  This was observed and reported by Yinghai Lu.
> 
> PME is in fact unnecessary on hotplug ports:  Hotplug can be signaled
> even in D3hot,

How are hotplug events signaled in D3hot without using PME?  I'm
looking at the PCI PM spec, r1.2, table 5-4 (p 49 in my copy), which
says a function in D3hot can only generate PME.

> and commit 68db9bc81436 ensures that all parents of the
> hotplug port are kept awake so that interrupts can be delivered.
> PME would allow us to runtime suspend the parent ports as well, but we
> do not make use of it because we cannot be sure if PME actually works.
> Thunderbolt controllers for instance advertise PME capability, but at
> least on Macs the PME pin is not connected.
> 
> Since we do not rely on PME for hotplug ports, we may as well not enable
> it, thereby avoiding its negative side effects.  However the present
> commit deliberately only avoids enabling PME on runtime suspend, the
> ability to enable it for system sleep is retained.
> 
> Bugzilla: https://bugzilla.kernel.org/show_bug.cgi?id=193951
> Fixes: 68db9bc81436 ("PCI: pciehp: Add runtime PM support for PCIe
> hotplug ports")
> Reported-by: Yinghai Lu 
> Cc: Rafael J. Wysocki 
> Cc: Mika Westerberg 
> Cc: Bjorn Helgaas 
> Signed-off-by: Lukas Wunner 
> ---
> 
> v1-> v2:
>  Move check for is_hotplug_bridge from pci_finish_runtime_suspend()
>  down into pci_dev_run_wake(), this seems cleaner, less clumsy.
> 
>  drivers/pci/pci.c | 8 
>  1 file changed, 8 insertions(+)
> 
> diff --git a/drivers/pci/pci.c b/drivers/pci/pci.c
> index a881c0d..9c22e62 100644
> --- a/drivers/pci/pci.c
> +++ b/drivers/pci/pci.c
> @@ -2096,6 +2096,14 @@ bool pci_dev_run_wake(struct pci_dev *dev)
>  {
>   struct pci_bus *bus = dev->bus;
>  
> + /*
> +  * Don't enable PME at runtime on hotplug ports (even if supported)
> +  * since PME sends unwanted interrupts if the slot is occupied while
> +  * suspended to D3hot (PCIe Base Specification, section 6.7.3.4).
> +  */
> + if (dev->is_hotplug_bridge)
> + return false;
> +
>   if (device_run_wake(&dev->dev))
>   return true;
>  
> -- 
> 2.11.0
> 


Re: Regression on next-20170203 spi/for-next 3f87493930a0f qemu on x86_64

2017-02-06 Thread Luis R. Rodriguez
On Sun, Feb 05, 2017 at 11:33:25AM +0100, Borislav Petkov wrote:
> On Sat, Feb 04, 2017 at 05:22:55PM -0800, Guenter Roeck wrote:
> > Upstream (v4.10-rc6-193-ga572a1b99948), the same command yields no error at 
> > all:
> 
> That's because you tested Linus' merge commit of the branch which fixed that 
> :-)
> 
> IOW, the fix should be:
> 
> aaaec6fc7554 ("x86/irq: Make irq activate operations symmetric")
> 
> It is on its way to stable too, as we speak.

I've taken this patch and applied it on top of next-20170203
and confirm it fixes the regression. I've also tested next-20170206
which has the fix and confirm that boots as well.

Do we have any test units which can kick off regularly to test against such
type of regression in the future or is it not worth it?

Thanks Boris!

  Luis


Re: [PATCH 2/4] tools lib traceevent: Robustify do_generate_dynamic_list_file

2017-02-06 Thread Arnaldo Carvalho de Melo
Em Wed, Feb 01, 2017 at 10:38:02PM -0800, David Carrillo-Cisneros escreveu:
> The dynamic-list-file used to export dynamic symbols introduced in
> 
> commit e3d09ec8126f ("tools lib traceevent: Export dynamic symbols
> used by traceevent plugins")
> 
> is generated without any sort of error checking.
> 
> I experienced problems due to an old version of nm (v 0.158) that outputs
> in a format distinct from the assumed by the script.
> 
> Robustify this by enforcing that the second column in the symbol output
> is an "U" (Undefined) as it should be since we are calling $(NM) -u ...
> and print an error message otherwise.

Thanks, applied.

- Arnaldo
 
> Signed-off-by: David Carrillo-Cisneros 
> ---
>  tools/lib/traceevent/Makefile | 13 +
>  1 file changed, 9 insertions(+), 4 deletions(-)
> 
> diff --git a/tools/lib/traceevent/Makefile b/tools/lib/traceevent/Makefile
> index 2616c66e10c1..1b6e146429d0 100644
> --- a/tools/lib/traceevent/Makefile
> +++ b/tools/lib/traceevent/Makefile
> @@ -257,10 +257,15 @@ define do_install_plugins
>  endef
>  
>  define do_generate_dynamic_list_file
> - (echo '{';  \
> - $(NM) -u -D $1 | awk 'NF>1 {print "\t"$$2";"}' | sort -u;   \
> - echo '};';  \
> - ) > $2
> + symbol_type=`$(NM) -u -D $1 | awk 'NF>1 {print $$1}' | sort -u`;\
> + if [ "$$symbol_type" == "U" ];then  \
> + (echo '{';  \
> + $(NM) -u -D $1 | awk 'NF>1 {print "\t"$$2";"}' | sort -u;\
> + echo '};';  \
> + ) > $2; \
> + else\
> + (echo Either missing one of [$1] or bad version of $(NM)) 1>&2;\
> + fi
>  endef
>  
>  install_lib: all_cmd install_plugins
> -- 
> 2.11.0.483.g087da7b7c-goog


Re: [PATCH 4/6] xfs: use memalloc_nofs_{save,restore} instead of memalloc_noio*

2017-02-06 Thread Michal Hocko
On Mon 06-02-17 07:39:23, Matthew Wilcox wrote:
> On Mon, Feb 06, 2017 at 03:07:16PM +0100, Michal Hocko wrote:
> > +++ b/fs/xfs/xfs_buf.c
> > @@ -442,17 +442,17 @@ _xfs_buf_map_pages(
> > bp->b_addr = NULL;
> > } else {
> > int retried = 0;
> > -   unsigned noio_flag;
> > +   unsigned nofs_flag;
> >  
> > /*
> >  * vm_map_ram() will allocate auxillary structures (e.g.
> >  * pagetables) with GFP_KERNEL, yet we are likely to be under
> >  * GFP_NOFS context here. Hence we need to tell memory reclaim
> > -* that we are in such a context via PF_MEMALLOC_NOIO to prevent
> > +* that we are in such a context via PF_MEMALLOC_NOFS to prevent
> >  * memory reclaim re-entering the filesystem here and
> >  * potentially deadlocking.
> >  */
> 
> This comment feels out of date ... how about:

which part is out of date?

> 
>   /*
>* vm_map_ram will allocate auxiliary structures (eg page
>* tables) with GFP_KERNEL.  If that tries to reclaim memory
>* by calling back into this filesystem, we may deadlock.
>* Prevent that by setting the NOFS flag.
>*/

dunno, the previous wording seems clear enough to me. Maybe little bit
more chatty than yours but I am not sure this is worth changing.

> 
> > -   noio_flag = memalloc_noio_save();
> > +   nofs_flag = memalloc_nofs_save();
> > do {
> > bp->b_addr = vm_map_ram(bp->b_pages, bp->b_page_count,
> > -1, PAGE_KERNEL);
> 
> Also, I think it shows that this is the wrong place in XFS to be calling
> memalloc_nofs_save().  I'm not arguing against including this patch;
> it's a step towards where we want to be.  I also don't know XFS well
> enough to know where to set that flag ;-)  Presumably when we start a
> transaction ... ?

Yes that is what I would like to achieve longterm. And the reason why I
didn't want to mimic this pattern in kvmalloc as some have suggested.
It just takes much more time to get there from the past experience and
we should really start somewhere.
-- 
Michal Hocko
SUSE Labs


Re: [PATCH 2/5] drm: of: introduce drm_of_find_panel_or_bridge

2017-02-06 Thread Philipp Zabel
On Mon, 2017-02-06 at 10:53 -0600, Rob Herring wrote:
> On Mon, Feb 06, 2017 at 11:42:48AM +0100, Philipp Zabel wrote:
> > On Fri, 2017-02-03 at 21:36 -0600, Rob Herring wrote:
> > > Many drivers have a common pattern of searching the OF graph for either an
> > > attached panel or bridge and then finding the DRM struct for the panel
> > > or bridge. Also, most drivers need to handle deferred probing when the
> > > DRM device is not yet instantiated. Create a common function,
> > > drm_of_find_panel_or_bridge, to find the connected node and the
> > > associated DRM panel or bridge device.
> 
> [...]
> 
> > > +int drm_of_find_panel_or_bridge(const struct device_node *np,
> > > + int port, int endpoint,
> > > + struct drm_panel **panel,
> > > + struct drm_bridge **bridge)
> > > +{
> > > + int ret = -ENODEV;
> > 
> > This is only returned if !panel && !bridge. I'd consider this invalid
> > usage of this function, so maybe use -EINVAL?
> 
> Yes.
> 
> > > + struct device_node *remote;
> > > +
> > > + remote = of_graph_get_remote_node(np, port, endpoint);
> > > + if (!remote)
> > > + return -ENODEV;
> > > +
> > > + if (bridge)
> > > + *bridge = NULL;
> > 
> > I would move this ^ ...
> > 
> > > + if (panel) {
> > > + *panel = of_drm_find_panel(remote);
> > > + if (*panel) {
> > 
> > ... here.
> 
> Okay.
> 
> > > + ret = 0;
> > > + goto out_put;
> > > + }
> > > + ret = -EPROBE_DEFER;
> > > + }
> > > +
> > > + if (bridge) {
> > > + *bridge = of_drm_find_bridge(remote);
> > > + if (*bridge)
> > > + ret = 0;
> > > + else
> > > + ret = -EPROBE_DEFER;
> > > + }
> > > +out_put:
> > > + of_node_put(remote);
> > > + return ret;
> > > +}
> 
> I've ended up re-writing things a bit getting rid of the goto and the 
> result looks like this:

Looks good to me.

> int drm_of_find_panel_or_bridge(const struct device_node *np,
>   int port, int endpoint,
>   struct drm_panel **panel,
>   struct drm_bridge **bridge)
> {
>   int ret = -EPROBE_DEFER;
>   struct device_node *remote;
> 
>   if (!panel && !bridge)
>   return -EINVAL;
> 
>   remote = of_graph_get_remote_node(np, port, endpoint);
>   if (!remote)
>   return -ENODEV;
> 
>   if (panel) {
>   *panel = of_drm_find_panel(remote);
>   if (*panel) {
>   if (bridge)
>   *bridge = NULL;

With the goto out_put gone, I'm conflicted whether I find this clearer
here, or ...

>   ret = 0;
>   }
>   }
> 
>   /* No panel found yet, check for a bridge next. */
>   if (ret && bridge) {
>   *bridge = of_drm_find_bridge(remote);
>   if (*bridge)
>   ret = 0;
>   }

... even down here:

if (bridge) {
if (ret) {
/* No panel found yet, check for a bridge next. */
*bridge = of_drm_find_bridge(remote)
if (*bridge)
ret = 0;
} else {
*bridge = NULL;
}
}

That way bridge doesn't have to be checked twice and all the
modification of *bridge is in the same block.

> 
>   of_node_put(remote);
>   return ret;
> }

Either way,

Acked-by: Philipp Zabel 

regards
Philipp



Re: [PATCH] clocksource: arm_arch_timer: print timer value at init time

2017-02-06 Thread Mark Rutland
On Sat, Feb 04, 2017 at 09:41:18AM +0100, Daniel Lezcano wrote:
> On Mon, Dec 19, 2016 at 09:47:37AM -0800, Olof Johansson wrote:
> > This is useful to get an indication of how much time we spent in firmware.
> > 
> > It's not guaranteed that the timer started at 0 on reset, so it's just
> > an approximation, and might very well be invalid on some systems. But
> > it's still a useful metric to have access to.
> 
> Hi Olof,
> 
> [ ... ]
> 
> > --- a/drivers/clocksource/arm_arch_timer.c
> > +++ b/drivers/clocksource/arm_arch_timer.c
> > @@ -521,6 +521,8 @@ arch_timer_detect_rate(void __iomem *cntbase, struct 
> > device_node *np)
> >  
> >  static void arch_timer_banner(unsigned type)
> >  {
> > +   unsigned long cnt = arch_timer_read_counter();
> > +
> 
> arch_timer_banner() is called before arch_counter_register() where the
> arch_timer_read_counter() function pointer is set.
> 
> Perhaps the arch_timer_banner() and arch_counter_register() should be swapped 
> in
> arch_timer_common_init().

That would make sense to me.

> > pr_info("Architected %s%s%s timer(s) running at %lu.%02luMHz 
> > (%s%s%s).\n",
> >  type & ARCH_CP15_TIMER ? "cp15" : "",
> >  type == (ARCH_CP15_TIMER | ARCH_MEM_TIMER) ?  " and " : "",
> > @@ -534,6 +536,8 @@ static void arch_timer_banner(unsigned type)
> >  type & ARCH_MEM_TIMER ?
> > arch_timer_mem_use_virtual ? "virt" : "phys" :
> > "");
> > +   pr_info("Initial timer value: 0x%lx: %ld.%02lds\n",
> > +   cnt, cnt/arch_timer_rate, (cnt/(arch_timer_rate/100)) % 100);

Our tiemrs should be precise enough to give us a few more digits here
(e.g. down to ns, like printk). Are there any helpers we can use to do
that?

It would also be nice to log which counter we're reading from.

Thanks,
Mark.


Re: [PATCH 3/5] drm: convert drivers to use of_graph_get_remote_node

2017-02-06 Thread Rob Herring
On Mon, Feb 6, 2017 at 11:23 AM, Liviu Dudau  wrote:
> On Mon, Feb 06, 2017 at 11:09:49AM -0600, Rob Herring wrote:
>> On Mon, Feb 06, 2017 at 10:29:33AM +, Liviu Dudau wrote:
>> > On Fri, Feb 03, 2017 at 09:36:33PM -0600, Rob Herring wrote:
>> > > Convert drivers to use the new of_graph_get_remote_node() helper
>> > > instead of parsing the endpoint node and then getting the remote device
>> > > node. Now drivers can just specify the device node and which
>> > > port/endpoint and get back the connected remote device node. The details
>> > > of the graph binding are nicely abstracted into the core OF graph code.
>> > >
>> > > This changes some error messages to debug messages (in the graph core).
>> > > Graph connections are often "no connects" depending on the particular
>> > > board, so we want to avoid spurious messages. Plus the kernel is not a
>> > > DT validator.

[...]

>> > > - /* add the remote encoder port as component */
>> > > - port = of_graph_get_remote_port_parent(ep);
>> > > - of_node_put(ep);
>> > > - if (!port || !of_device_is_available(port)) {
>> > > - of_node_put(port);
>> > > - return -EAGAIN;
>> >
>> > The HDLCD change looks reasonable except for this -EAGAIN business. I'll 
>> > have to
>> > test your changes on my setup to see how this affects having the encoder 
>> > as a module.
>>
>> What are you expecting to happen with -EAGAIN? This one was a bit of an
>> oddball.
>
> When both the HDLCD and the TDA998x drivers are compiled as modules, the 
> order in which
> they are inserted can be somewhat random (due to testing). It is at that time 
> when you
> want the probe of HDLCD to be retried on the insmod-ing of the tda998x.ko 
> rather than
> fail entirely.
>
>>
>> This condition would only change if you had an overlay. That's a use
>> case that needs to be handled in a common way ('cause I don't want to
>> clean-up every driver doing overlays in their own way latter). Just
>> having "status" changing at runtime would have all sorts of implications
>> in the kernel.
>
> Hmm, not sure what you mean here with overlays. Are you thinking that the
> remote port is initially disabled and then re-enabled by an overlay? That is
> not the only way of_device_is_available() can fail, see above regarding 
> modules.

Russell pretty much answered most of this, but specifically for
of_device_is_available, the only way of_device_is_available() can
change is a DT change with "status" changing. The only way
of_graph_get_remote_port_parent changes is also from a DT change.

Rob


Re: Linux 4.9.6 ( Restore IO-APIC irq_chip retrigger callback , breaks my box )

2017-02-06 Thread Greg KH
On Mon, Feb 06, 2017 at 06:30:15PM +0100, Gabriel C wrote:
> 
> On 26.01.2017 08:48, Greg KH wrote:
> 
> Hi Greg,
> 
> > I'm announcing the release of the 4.9.6 kernel.
> 
> 
> Somewhat late , however I didn't tested 4.9.6 but jumped from 4.9.5 to 4.9.7
> and found out by box won't boot anymore.
> 
> It hangs early and freeze with a lot RCU warnings.
> Since I cannot setup a netconsole right now I cannot post the errors , really 
> sorry.
> 
> ( but I could make a picture if needed )
> 
> 
> I bisected it down to :
> 
> > Ruslan Ruslichenko (1):
> >   x86/ioapic: Restore IO-APIC irq_chip retrigger callback
> 
> Reverting this one fixes the problem for me..
> 
> Also this problem exists in Linus tree , I tested on:
> 4.10.0-rc6-00167-ga0a28644c1cf

Ok, at least we are consistent :)

> The box is a PRIMERGY TX200 S5 , 2 socket , 2 x E5520 CPU(s) installed.
> 
> Config:
> https://raw.githubusercontent.com/frugalware/frugalware-current/master/source/base/kernel/config.x86_64

Ruslan, any thoughts about what to do here?

thanks,

greg k-h


Re: [PATCH v3 03/14] mm: use pmd lock instead of racy checks in zap_pmd_range()

2017-02-06 Thread Kirill A. Shutemov
On Mon, Feb 06, 2017 at 10:32:10AM -0600, Zi Yan wrote:
> On 6 Feb 2017, at 10:07, Kirill A. Shutemov wrote:
> 
> > On Sun, Feb 05, 2017 at 11:12:41AM -0500, Zi Yan wrote:
> >> From: Zi Yan 
> >>
> >> Originally, zap_pmd_range() checks pmd value without taking pmd lock.
> >> This can cause pmd_protnone entry not being freed.
> >>
> >> Because there are two steps in changing a pmd entry to a pmd_protnone
> >> entry. First, the pmd entry is cleared to a pmd_none entry, then,
> >> the pmd_none entry is changed into a pmd_protnone entry.
> >> The racy check, even with barrier, might only see the pmd_none entry
> >> in zap_pmd_range(), thus, the mapping is neither split nor zapped.
> >
> > That's definately a good catch.
> >
> > But I don't agree with the solution. Taking pmd lock on each
> > zap_pmd_range() is a significant hit by scalability of the code path.
> > Yes, split ptl lock helps, but it would be nice to avoid the lock in first
> > place.
> >
> > Can we fix change_huge_pmd() instead? Is there a reason why we cannot
> > setup the pmd_protnone() atomically?
> 
> If you want to setup the pmd_protnone() atomically, we need a new way of
> changing pmds, like pmdp_huge_cmp_exchange_and_clear(). Otherwise, due to
> the nature of racy check of pmd in zap_pmd_range(), it is impossible to
> eliminate the chance of catching this bug if pmd_protnone() is setup
> in two steps: first, clear it, second, set it.
> 
> However, if we use pmdp_huge_cmp_exchange_and_clear() to change pmds from now 
> on,
> instead of current two-step approach, it will eliminate the possibility of
> using batched TLB shootdown optimization (introduced by Mel Gorman for base 
> page swapping)
> when THP is swappable in the future. Maybe other optimizations?

I'll think about this more.

> Why do you think holding pmd lock is bad?

Each additional atomic operation in fast-path hurts scalability.
Cost of atomic operations rises fast as machine gets bigger.

> In zap_pte_range(), pte lock is also held when each PTE is zapped.

It's necessary evil for pte. Not so much for pmd so far.

> BTW, I am following Naoya's suggestion and going to take pmd lock inside
> the loop. So pmd lock is held when each pmd is being checked and it will be 
> released
> when the pmd entry is zapped, split, or pointed to a page table.
> Does it still hurt much on performance?

Naoya's suggestion is not correct: pmd_lock() can be different not for
each pmd entry, but for each pmd table. So taking it outside of the loop
is correct.


-- 
 Kirill A. Shutemov


Re: [PATCH 3/5] drm: convert drivers to use of_graph_get_remote_node

2017-02-06 Thread Russell King - ARM Linux
On Mon, Feb 06, 2017 at 05:23:06PM +, Liviu Dudau wrote:
> On Mon, Feb 06, 2017 at 11:09:49AM -0600, Rob Herring wrote:
> > On Mon, Feb 06, 2017 at 10:29:33AM +, Liviu Dudau wrote:
> > > On Fri, Feb 03, 2017 at 09:36:33PM -0600, Rob Herring wrote:
> > > > -   /* add the remote encoder port as component */
> > > > -   port = of_graph_get_remote_port_parent(ep);
> > > > -   of_node_put(ep);
> > > > -   if (!port || !of_device_is_available(port)) {
> > > > -   of_node_put(port);
> > > > -   return -EAGAIN;
> > > 
> > > The HDLCD change looks reasonable except for this -EAGAIN business. I'll 
> > > have to
> > > test your changes on my setup to see how this affects having the encoder 
> > > as a module.
> > 
> > What are you expecting to happen with -EAGAIN? This one was a bit of an 
> > oddball.
> 
> When both the HDLCD and the TDA998x drivers are compiled as modules, the
> order in which they are inserted can be somewhat random (due to testing).

Not really "due to testing" but if you run a real distro, they tend to
have a multi-threaded behaviour when loading kernel modules at boot.

> It is at that time when you want the probe of HDLCD to be retried on the
> insmod-ing of the tda998x.ko rather than fail entirely.

-EAGAIN doesn't get you that, and in any case, solving that problem is
exactly why the component API exists - so that DRM only comes up once
all the necessary components are available.

-EAGAIN also doesn't get you that from inside a probe function - such
an error will be reported in the kernel log, and no further action
will be taken (the device driver probe will be failed, and not
automatically retried.

The only case that we automatically retry is if a driver returns
-EPROBE_DEFER.  Everything else causes a probe failure.

-- 
RMK's Patch system: http://www.armlinux.org.uk/developer/patches/
FTTC broadband for 0.8mile line: currently at 9.6Mbps down 400kbps up
according to speedtest.net.


Re: [PATCH] somedriver: remove the initialization of static pointers.

2017-02-06 Thread Mathieu Poirier
On Sun, Feb 05, 2017 at 08:12:26AM +0100, AbdAllah MEZITI wrote:
> On Sun, 5 Feb 2017 01:29:43 +0100
> Greg Kroah-Hartman  wrote:
> 
> > On Sat, Feb 04, 2017 at 08:39:21PM +0100, AbdAllah-MEZITI wrote:
> > > In C a static pointer will be initialized to NULL (e.g: draft C99
> > > standard $6.7.8): "If an object that has static storage duration is
> > > not initialized explicitly, then:
> > >  __ if it has pointer type, it is initialized to a null pointer;"
> > > 
> > > Signed-off-by: AbdAllah-MEZITI   
> > 
> > Your subject is very odd :(
> > 
> > And is that the correct spelling of your name that you use for legal
> > documents?  If not, please fix up when you resend this.
> > 
> > Also, fix the line-wrapping of your changelog, and the C99 standard is
> > not a "draft" anymore, it was released in 1999 :)
> > 
> > thanks,
> > 
> > greg k-h
> 
> 
> Do not worry, this is just an answer to the Task 10 of the Eudyptula 
> Challenge.

AbdAllah, 

Patches submitted as part of the Eudyptula Challenge are fixing real problems
and most of the time accepted for queueing.  As such the work you sumbit for it
is serious and should be as perfect as possible.

Happy times with the Challenge,
Mathieu

> 
> yes, this is the correct spelling of my name that i use for legal documents.
> 
> OK.
> 
> AbdAllah MEZITI.


Re: [RFC 1/1] shiftfs: uid/gid shifting bind mount

2017-02-06 Thread James Bottomley
On Mon, 2017-02-06 at 09:38 -0600, l...@pengaru.com wrote:
> On Mon, Feb 06, 2017 at 07:18:16AM -0800, James Bottomley wrote:
> > On Mon, 2017-02-06 at 09:50 -0500, Theodore Ts'o wrote:
> > > On Sun, Feb 05, 2017 at 10:46:23PM -0800, James Bottomley wrote:
> > > > Yes, I know the problem.  However, I believe most current linux
> > > > filesystems no longer guarantee stable, for the lifetime of the
> > > > file, inode numbers.  The usual docker container root is
> > > > overlayfs,
> > > > which, similarly doesn't support stable inode numbers.  I see
> > > > the 
> > > > odd complaint about docker with overlayfs having unstable inode
> > > > numbers, but none seems to have any serious repercussions.
> > > 
> > > Um, no.  Most current linux file systems *do* guarantee stable
> > > inode
> > > numbers.  For one thing, NFS would break horribly if you didn't
> > > have
> > > stable inode numbers.  Never mind applications which depend on
> > > POSIX
> > > semantics.  And you wouldn't be able to save games in rogue or
> > > nethack, either.  :-)
> > 
> > I believe that's why we have the superblock export operations to
> > manufacture unique filehandles in the absence of inode number
> > stability.  The generic one uses inode numbers, but it doesn't have
> > to.
> >  I thought reiserfs (if we can go back that far) was the first
> > generally used filesystem that didn't guarantee stable inode
> > numbers,
> > so we have a lot of historical precedence.
> > 
> > Thanks to reiserfs, I thought we also iterated to weak stability
> > guarantees for inode numbers which mean no inconsistencies in
> > applications that use inode numbers for caching?  It's still not
> > POSIX,
> > but I thought it was good enough for most use cases.
> > 
> 
> Even plain tar extraction is sensitive to directory inode stability:
> http://git.savannah.gnu.org/cgit/tar.git/tree/src/extract.c?h=release
> _1_29#n867
> 
> This caused errors on overlayfs if the extraction churned through 
> enough of the dentry cache to evict the relevant directory (can be 
> forced to reproduce reliably via drop_caches).

Yes, I know the bug.  I think it's up to tar maintainers, but if they
want to support weakly posix filesystems, they should really be using
the filehandle for this check, not device and inode number.

That said, I believe reiserfs was our only other filesystem with weak
inode number stability guarantees and that's hardly in common use
today, so if we can find a solution that gives strong stability
guarantees for out current problem filesystems, there's no reason not
to use it generally.

James




Re: [PATCH 4/5] drm: convert drivers to use drm_of_find_panel_or_bridge

2017-02-06 Thread Rob Herring
On Mon, Feb 06, 2017 at 11:03:01AM +0100, Maxime Ripard wrote:
> Hi Rob,
> 
> On Fri, Feb 03, 2017 at 09:36:34PM -0600, Rob Herring wrote:
> > Similar to the previous commit, convert drivers open coding OF graph
> > parsing to use drm_of_find_panel_or_bridge instead.
> > 
> > This changes some error messages to debug messages (in the graph core).
> > Graph connections are often "no connects" depending on the particular
> > board, so we want to avoid spurious messages. Plus the kernel is not a
> > DT validator.
> > 
> > Signed-off-by: Rob Herring 
> > ---
> 
> [..]
> 
> > diff --git a/drivers/gpu/drm/sun4i/sun4i_rgb.c 
> > b/drivers/gpu/drm/sun4i/sun4i_rgb.c
> > index f5e86fe7750e..4720725b0fb0 100644
> > --- a/drivers/gpu/drm/sun4i/sun4i_rgb.c
> > +++ b/drivers/gpu/drm/sun4i/sun4i_rgb.c
> > @@ -15,6 +15,7 @@
> >  #include 
> >  #include 
> >  #include 
> > +#include 
> >  #include 
> >  
> >  #include "sun4i_drv.h"
> > @@ -217,12 +218,10 @@ int sun4i_rgb_init(struct drm_device *drm)
> > rgb->drv = drv;
> > encoder = &rgb->encoder;
> >  
> > -   tcon->panel = sun4i_tcon_find_panel(tcon->dev->of_node);
> > -   encoder->bridge = sun4i_tcon_find_bridge(tcon->dev->of_node);
> > -   if (IS_ERR(tcon->panel) && IS_ERR(encoder->bridge)) {
> > -   dev_info(drm->dev, "No panel or bridge found... RGB output 
> > disabled\n");
> > -   return 0;
> > -   }
> > +   ret = drm_of_find_panel_or_bridge(tcon->dev->of_node, 1, 0,
> > + &tcon->panel, &encoder->bridge);
> > +   if (ret)
> > +   return ret;
> 
> It used to ignore the error if it couldn't find the bridge. This will
> break the probe.

Well, I got it half right. :) The probe does that, but this needs to 
too.

> >  
> > drm_encoder_helper_add(&rgb->encoder,
> >&sun4i_rgb_enc_helper_funcs);
> > @@ -239,7 +238,7 @@ int sun4i_rgb_init(struct drm_device *drm)
> > /* The RGB encoder can only work with the TCON channel 0 */
> > rgb->encoder.possible_crtcs = BIT(0);
> >  
> > -   if (!IS_ERR(tcon->panel)) {
> > +   if (tcon->panel) {
> > drm_connector_helper_add(&rgb->connector,
> >  &sun4i_rgb_con_helper_funcs);
> > ret = drm_connector_init(drm, &rgb->connector,
> > @@ -260,7 +259,7 @@ int sun4i_rgb_init(struct drm_device *drm)
> > }
> > }
> >  
> > -   if (!IS_ERR(encoder->bridge)) {
> > +   if (encoder->bridge) {
> > encoder->bridge->encoder = &rgb->encoder;
> >  
> > ret = drm_bridge_attach(drm, encoder->bridge);
> > @@ -268,8 +267,6 @@ int sun4i_rgb_init(struct drm_device *drm)
> > dev_err(drm->dev, "Couldn't attach our bridge\n");
> > goto err_cleanup_connector;
> > }
> > -   } else {
> > -   encoder->bridge = NULL;
> > }
> >  
> > return 0;
> > diff --git a/drivers/gpu/drm/sun4i/sun4i_tcon.c 
> > b/drivers/gpu/drm/sun4i/sun4i_tcon.c
> > index ea2906f87cb9..2e4e365cecf9 100644
> > --- a/drivers/gpu/drm/sun4i/sun4i_tcon.c
> > +++ b/drivers/gpu/drm/sun4i/sun4i_tcon.c
> > @@ -15,13 +15,12 @@
> >  #include 
> >  #include 
> >  #include 
> > -#include 
> > +#include 
> >  
> >  #include 
> >  #include 
> >  #include 
> >  #include 
> > -#include 
> >  #include 
> >  #include 
> >  #include 
> > @@ -405,74 +404,6 @@ static int sun4i_tcon_init_regmap(struct device *dev,
> > return 0;
> >  }
> >  
> > -struct drm_panel *sun4i_tcon_find_panel(struct device_node *node)
> > -{
> > -   struct device_node *port, *remote, *child;
> > -   struct device_node *end_node = NULL;
> > -
> > -   /* Inputs are listed first, then outputs */
> > -   port = of_graph_get_port_by_id(node, 1);
> > -
> > -   /*
> > -* Our first output is the RGB interface where the panel will
> > -* be connected.
> > -*/
> > -   for_each_child_of_node(port, child) {
> > -   u32 reg;
> > -
> > -   of_property_read_u32(child, "reg", ®);
> > -   if (reg == 0)
> > -   end_node = child;
> > -   }
> > -
> > -   if (!end_node) {
> > -   DRM_DEBUG_DRIVER("Missing panel endpoint\n");
> > -   return ERR_PTR(-ENODEV);
> > -   }
> > -
> > -   remote = of_graph_get_remote_port_parent(end_node);
> > -   if (!remote) {
> > -   DRM_DEBUG_DRIVER("Unable to parse remote node\n");
> > -   return ERR_PTR(-EINVAL);
> > -   }
> > -
> > -   return of_drm_find_panel(remote) ?: ERR_PTR(-EPROBE_DEFER);
> 
> And the panel is only one of our endpoints, which is optional, while
> other endpoints are mandatory. This means that we might very well have
> an endpoint that is not a panel or a bridge. In this case, I think
> your function will return an error and will be treated as such, while
> it's really the expected behaviour.
> 
> I think it's better to leave this driver alone for now, it's not as
> trivial as it looks, and will require some testing to get things
> right. I'll try to g

Re: Linux 4.9.6 ( Restore IO-APIC irq_chip retrigger callback , breaks my box )

2017-02-06 Thread Gabriel C


On 26.01.2017 08:48, Greg KH wrote:

Hi Greg,


I'm announcing the release of the 4.9.6 kernel.



Somewhat late , however I didn't tested 4.9.6 but jumped from 4.9.5 to 4.9.7
and found out by box won't boot anymore.

It hangs early and freeze with a lot RCU warnings.
Since I cannot setup a netconsole right now I cannot post the errors , really 
sorry.

( but I could make a picture if needed )


I bisected it down to :

> Ruslan Ruslichenko (1):
>   x86/ioapic: Restore IO-APIC irq_chip retrigger callback

Reverting this one fixes the problem for me..

Also this problem exists in Linus tree , I tested on:
4.10.0-rc6-00167-ga0a28644c1cf

The box is a PRIMERGY TX200 S5 , 2 socket , 2 x E5520 CPU(s) installed.

Config:
https://raw.githubusercontent.com/frugalware/frugalware-current/master/source/base/kernel/config.x86_64

Regards,

Gabriel C.


Re: [PATCH v8 3/3] thermal: zx2967: add thermal driver for ZTE's zx2967 family

2017-02-06 Thread Mathieu Poirier
On Sat, Feb 04, 2017 at 11:39:57AM +0800, Baoyou Xie wrote:
> This patch adds thermal driver for ZTE's zx2967 family.
> 
> Signed-off-by: Baoyou Xie 
> ---
>  drivers/thermal/Kconfig  |   8 ++
>  drivers/thermal/Makefile |   1 +
>  drivers/thermal/zx2967_thermal.c | 255 
> +++
>  3 files changed, 264 insertions(+)
>  create mode 100644 drivers/thermal/zx2967_thermal.c
> 
> diff --git a/drivers/thermal/Kconfig b/drivers/thermal/Kconfig
> index 18f2de6..f64bd50 100644
> --- a/drivers/thermal/Kconfig
> +++ b/drivers/thermal/Kconfig
> @@ -444,4 +444,12 @@ config BCM2835_THERMAL
>   help
> Support for thermal sensors on Broadcom bcm2835 SoCs.
>  
> +config ZX2967_THERMAL
> + tristate "Thermal sensors on zx2967 SoC"
> + depends on ARCH_ZX || COMPILE_TEST
> + help
> +   Enable the zx2967 thermal sensors driver, which supports
> +   the primitive temperature sensor embedded in zx2967 SoCs.
> +   This sensor generates the real time die temperature.
> +
>  endif
> diff --git a/drivers/thermal/Makefile b/drivers/thermal/Makefile
> index 677c6d9..c00c05e 100644
> --- a/drivers/thermal/Makefile
> +++ b/drivers/thermal/Makefile
> @@ -57,3 +57,4 @@ obj-$(CONFIG_HISI_THERMAL) += hisi_thermal.o
>  obj-$(CONFIG_MTK_THERMAL)+= mtk_thermal.o
>  obj-$(CONFIG_GENERIC_ADC_THERMAL)+= thermal-generic-adc.o
>  obj-$(CONFIG_BCM2835_THERMAL)+= bcm2835_thermal.o
> +obj-$(CONFIG_ZX2967_THERMAL) += zx2967_thermal.o
> diff --git a/drivers/thermal/zx2967_thermal.c 
> b/drivers/thermal/zx2967_thermal.c
> new file mode 100644
> index 000..d177238
> --- /dev/null
> +++ b/drivers/thermal/zx2967_thermal.c
> @@ -0,0 +1,255 @@
> +/*
> + * ZTE's zx2967 family thermal sensor driver
> + *
> + * Copyright (C) 2017 ZTE Ltd.
> + *
> + * Author: Baoyou Xie 
> + *
> + * License terms: GNU General Public License (GPL) version 2
> + */
> +
> +#include 
> +#include 
> +#include 
> +#include 
> +#include 
> +#include 
> +#include 
> +
> +/* Power Mode: 0->low 1->high */
> +#define ZX2967_THERMAL_POWER_MODE0
> +#define ZX2967_POWER_MODE_LOW0
> +#define ZX2967_POWER_MODE_HIGH   1
> +
> +/* DCF Control Register */
> +#define ZX2967_THERMAL_DCF   0x4
> +#define ZX2967_DCF_ENBIT(1)
> +#define ZX2967_DCF_FREEZEBIT(0)
> +
> +/* Selection Register */
> +#define ZX2967_THERMAL_SEL   0x8
> +
> +/* Control Register */
> +#define ZX2967_THERMAL_CTRL  0x10
> +
> +#define ZX2967_THERMAL_READY BIT(12)
> +#define ZX2967_THERMAL_TEMP_MASK GENMASK(11, 0)
> +#define ZX2967_THERMAL_ID_MASK   0x18
> +#define ZX2967_THERMAL_ID0x10
> +
> +#define ZX2967_GET_TEMP_TIMEOUT_US   (100 * 1024)
> +
> +/* zx2967 Thermal Sensor Private Structure */
> +struct zx2967_thermal_priv {
> + /* struct thermal_zone_device where the sensor is registered */
> + struct thermal_zone_device  *tzd;
> + /* prevents reads sensor in parallel */
> + struct mutexlock;
> + /* topcrm clk structure */
> + struct clk  *clk_topcrm;
> + /* apb clk structure */
> + struct clk  *clk_apb;
> + /* pointer to base address of the thermal sensor */
> + void __iomem*regs;
> + struct device   *dev;
> +};

Apologies for not being clearer before.  The proper way to document a structure
can be found here [1].

[1]. http://lxr.free-electrons.com/source/drivers/thermal/of-thermal.c#L39

> +
> +static int zx2967_thermal_get_temp(void *data, int *temp)
> +{
> + void __iomem *regs;
> + struct zx2967_thermal_priv *priv = data;
> + u32 val;
> + int ret;
> +
> + if (!priv->tzd)
> + return -EAGAIN;
> +
> + regs = priv->regs;
> + mutex_lock(&priv->lock);
> + writel_relaxed(ZX2967_POWER_MODE_LOW,
> +regs + ZX2967_THERMAL_POWER_MODE);
> + writel_relaxed(ZX2967_DCF_EN, regs + ZX2967_THERMAL_DCF);
> +
> + val = readl_relaxed(regs + ZX2967_THERMAL_SEL);
> + val &= ~ZX2967_THERMAL_ID_MASK;
> + val |= ZX2967_THERMAL_ID;
> + writel_relaxed(val, regs + ZX2967_THERMAL_SEL);
> +
> + /*
> +  * Must wait for a while, surely it's a bit odd.
> +  * otherwise temperature value we got has a few deviation, even if
> +  * the THERMAL_READY bit is set.
> +  */
> + usleep_range(100, 300);
> + ret = readx_poll_timeout(readl, regs + ZX2967_THERMAL_CTRL,
> +  val, val & ZX2967_THERMAL_READY, 300,
> +  ZX2967_GET_TEMP_TIMEOUT_US);
> + if (ret) {
> + dev_err(priv->dev, "Thermal sensor data timeout\n");
> + goto unlock;
> + }
> +
> + writel_relaxed(ZX2967_DCF_FREEZE | ZX2967_DCF_EN,
> +regs + ZX2967_THERMAL_DCF);
> + val = readl_relaxed(regs + ZX2967_THERMAL_CTRL

Re: [PATCH 3/5] drm: convert drivers to use of_graph_get_remote_node

2017-02-06 Thread Liviu Dudau
On Mon, Feb 06, 2017 at 11:09:49AM -0600, Rob Herring wrote:
> On Mon, Feb 06, 2017 at 10:29:33AM +, Liviu Dudau wrote:
> > On Fri, Feb 03, 2017 at 09:36:33PM -0600, Rob Herring wrote:
> > > Convert drivers to use the new of_graph_get_remote_node() helper
> > > instead of parsing the endpoint node and then getting the remote device
> > > node. Now drivers can just specify the device node and which
> > > port/endpoint and get back the connected remote device node. The details
> > > of the graph binding are nicely abstracted into the core OF graph code.
> > > 
> > > This changes some error messages to debug messages (in the graph core).
> > > Graph connections are often "no connects" depending on the particular
> > > board, so we want to avoid spurious messages. Plus the kernel is not a
> > > DT validator.
> > > 
> > > Signed-off-by: Rob Herring 
> > > ---
> > >  drivers/gpu/drm/arm/hdlcd_drv.c | 22 ++---
> > >  drivers/gpu/drm/arm/malidp_drv.c| 29 ++-
> > >  drivers/gpu/drm/bridge/adv7511/adv7533.c| 12 +
> > >  drivers/gpu/drm/bridge/dumb-vga-dac.c   | 15 ++
> > >  drivers/gpu/drm/bridge/ti-tfp410.c  | 15 ++
> > >  drivers/gpu/drm/exynos/exynos_drm_dpi.c | 16 +-
> > >  drivers/gpu/drm/exynos/exynos_drm_dsi.c | 13 ++---
> > >  drivers/gpu/drm/exynos/exynos_drm_mic.c | 27 +-
> > >  drivers/gpu/drm/hisilicon/kirin/dw_drm_dsi.c| 26 ++
> > >  drivers/gpu/drm/hisilicon/kirin/kirin_drm_drv.c | 30 +--
> > >  drivers/gpu/drm/mediatek/mtk_dpi.c  | 12 ++---
> > >  drivers/gpu/drm/mediatek/mtk_hdmi.c | 26 ++
> > >  drivers/gpu/drm/meson/meson_drv.c   | 12 ++---
> > >  drivers/gpu/drm/meson/meson_venc_cvbs.c | 19 ++-
> > >  drivers/gpu/drm/msm/dsi/dsi_host.c  |  3 +-
> > >  drivers/gpu/drm/msm/mdp/mdp4/mdp4_kms.c | 28 +--
> > >  drivers/gpu/drm/rockchip/rockchip_drm_drv.c | 18 +++
> > >  drivers/gpu/drm/tilcdc/tilcdc_crtc.c| 11 +
> > >  drivers/gpu/drm/tilcdc/tilcdc_external.c| 66 
> > > +++--
> > >  drivers/gpu/drm/vc4/vc4_dpi.c   | 15 ++
> > >  20 files changed, 64 insertions(+), 351 deletions(-)
> > > 
> > > diff --git a/drivers/gpu/drm/arm/hdlcd_drv.c 
> > > b/drivers/gpu/drm/arm/hdlcd_drv.c
> > > index e5f4f4a6546d..0f70f5fe9970 100644
> > > --- a/drivers/gpu/drm/arm/hdlcd_drv.c
> > > +++ b/drivers/gpu/drm/arm/hdlcd_drv.c
> > > @@ -430,29 +430,13 @@ static int compare_dev(struct device *dev, void 
> > > *data)
> > >  
> > >  static int hdlcd_probe(struct platform_device *pdev)
> > >  {
> > > - struct device_node *port, *ep;
> > > + struct device_node *port;
> > >   struct component_match *match = NULL;
> > >  
> > > - if (!pdev->dev.of_node)
> > > - return -ENODEV;
> > > -
> > >   /* there is only one output port inside each device, find it */
> > > - ep = of_graph_get_next_endpoint(pdev->dev.of_node, NULL);
> > > - if (!ep)
> > > - return -ENODEV;
> > > -
> > > - if (!of_device_is_available(ep)) {
> > > - of_node_put(ep);
> > > + port = of_graph_get_remote_node(pdev->dev.of_node, 0, 0);
> > > + if (!port)
> > >   return -ENODEV;
> > > - }
> > > -
> > > - /* add the remote encoder port as component */
> > > - port = of_graph_get_remote_port_parent(ep);
> > > - of_node_put(ep);
> > > - if (!port || !of_device_is_available(port)) {
> > > - of_node_put(port);
> > > - return -EAGAIN;
> > 
> > The HDLCD change looks reasonable except for this -EAGAIN business. I'll 
> > have to
> > test your changes on my setup to see how this affects having the encoder as 
> > a module.
> 
> What are you expecting to happen with -EAGAIN? This one was a bit of an 
> oddball.

When both the HDLCD and the TDA998x drivers are compiled as modules, the order 
in which
they are inserted can be somewhat random (due to testing). It is at that time 
when you
want the probe of HDLCD to be retried on the insmod-ing of the tda998x.ko 
rather than
fail entirely.

> 
> This condition would only change if you had an overlay. That's a use 
> case that needs to be handled in a common way ('cause I don't want to 
> clean-up every driver doing overlays in their own way latter). Just 
> having "status" changing at runtime would have all sorts of implications 
> in the kernel.

Hmm, not sure what you mean here with overlays. Are you thinking that the
remote port is initially disabled and then re-enabled by an overlay? That is
not the only way of_device_is_available() can fail, see above regarding modules.

Best regards,
Liviu

> 
> > 
> > > - }
> > >  
> > >   drm_of_component_match_add(&pdev->dev, &match, compare_dev, port);
> > >   of_node_put(port);
> > > diff --git a/drivers/gpu/drm/arm/malidp_drv.c 
> > > b/drivers/gpu/drm/arm/malidp_drv.c
> > > index 32f746e31379..bfa04be7f5de 100644
> > > --- a/drivers/gpu/dr

[PATCH 2/4] staging:r8188eu: remove unused rtw_ieee80211_bar structure definition

2017-02-06 Thread Ivan Safonov
rtw_ieee80211_bar structure definition does not used. Remove it.

Signed-off-by: Ivan Safonov 
---
 drivers/staging/rtl8188eu/include/wifi.h | 15 ---
 1 file changed, 15 deletions(-)

diff --git a/drivers/staging/rtl8188eu/include/wifi.h 
b/drivers/staging/rtl8188eu/include/wifi.h
index 5630dcb..cb46d35 100644
--- a/drivers/staging/rtl8188eu/include/wifi.h
+++ b/drivers/staging/rtl8188eu/include/wifi.h
@@ -479,21 +479,6 @@ static inline int IsFrameTypeCtrl(unsigned char *pframe)
Below is the definition for 802.11n
 
--*/
 
-/**
- * struct rtw_ieee80211_bar - HT Block Ack Request
- *
- * This structure refers to "HT BlockAckReq" as
- * described in 802.11n draft section 7.2.1.7.1
- */
-struct rtw_ieee80211_bar {
-   unsigned short frame_control;
-   unsigned short duration;
-   unsigned char ra[6];
-   unsigned char ta[6];
-   unsigned short control;
-   unsigned short start_seq_num;
-} __packed;
-
 /* 802.11 BAR control masks */
 #define IEEE80211_BAR_CTRL_ACK_POLICY_NORMAL 0x
 #define IEEE80211_BAR_CTRL_CBMTID_COMPRESSED_BA  0x0004
-- 
2.10.2



Re: [PATCH v9 3/3] watchdog: zx2967: add watchdog controller driver for ZTE's zx2967 family

2017-02-06 Thread Mathieu Poirier
On Sat, Feb 04, 2017 at 09:34:15AM +0800, Baoyou Xie wrote:
> This patch adds watchdog controller driver for ZTE's zx2967 family.
>

Reviewed-by: Mathieu Poirier 
 
> Signed-off-by: Baoyou Xie 
> ---
>  drivers/watchdog/Kconfig  |  10 ++
>  drivers/watchdog/Makefile |   1 +
>  drivers/watchdog/zx2967_wdt.c | 291 
> ++
>  3 files changed, 302 insertions(+)
>  create mode 100644 drivers/watchdog/zx2967_wdt.c
> 
> diff --git a/drivers/watchdog/Kconfig b/drivers/watchdog/Kconfig
> index acb00b5..05093a2 100644
> --- a/drivers/watchdog/Kconfig
> +++ b/drivers/watchdog/Kconfig
> @@ -714,6 +714,16 @@ config ASPEED_WATCHDOG
> To compile this driver as a module, choose M here: the
> module will be called aspeed_wdt.
>  
> +config ZX2967_WATCHDOG
> + tristate "ZTE zx2967 SoCs watchdog support"
> + depends on ARCH_ZX
> + select WATCHDOG_CORE
> + help
> +   Say Y here to include support for the watchdog timer
> +   in ZTE zx2967 SoCs.
> +   To compile this driver as a module, choose M here: the
> +   module will be called zx2967_wdt.
> +
>  # AVR32 Architecture
>  
>  config AT32AP700X_WDT
> diff --git a/drivers/watchdog/Makefile b/drivers/watchdog/Makefile
> index 0c3d35e..bf2d296 100644
> --- a/drivers/watchdog/Makefile
> +++ b/drivers/watchdog/Makefile
> @@ -82,6 +82,7 @@ obj-$(CONFIG_BCM7038_WDT) += bcm7038_wdt.o
>  obj-$(CONFIG_ATLAS7_WATCHDOG) += atlas7_wdt.o
>  obj-$(CONFIG_RENESAS_WDT) += renesas_wdt.o
>  obj-$(CONFIG_ASPEED_WATCHDOG) += aspeed_wdt.o
> +obj-$(CONFIG_ZX2967_WATCHDOG) += zx2967_wdt.o
>  
>  # AVR32 Architecture
>  obj-$(CONFIG_AT32AP700X_WDT) += at32ap700x_wdt.o
> diff --git a/drivers/watchdog/zx2967_wdt.c b/drivers/watchdog/zx2967_wdt.c
> new file mode 100644
> index 000..e290d5a
> --- /dev/null
> +++ b/drivers/watchdog/zx2967_wdt.c
> @@ -0,0 +1,291 @@
> +/*
> + * watchdog driver for ZTE's zx2967 family
> + *
> + * Copyright (C) 2017 ZTE Ltd.
> + *
> + * Author: Baoyou Xie 
> + *
> + * License terms: GNU General Public License (GPL) version 2
> + */
> +
> +#include 
> +#include 
> +#include 
> +#include 
> +#include 
> +#include 
> +#include 
> +#include 
> +#include 
> +
> +#define ZX2967_WDT_CFG_REG   0x4
> +#define ZX2967_WDT_LOAD_REG  0x8
> +#define ZX2967_WDT_REFRESH_REG   0x18
> +#define ZX2967_WDT_START_REG 0x1c
> +
> +#define ZX2967_WDT_REFRESH_MASK  GENMASK(5, 0)
> +
> +#define ZX2967_WDT_CFG_DIV(n)n) & 0xff) - 1) << 
> 8)
> +#define ZX2967_WDT_START_EN  0x1
> +
> +/*
> + * Hardware magic number.
> + * When watchdog reg is written, the lowest 16 bits are valid, but
> + * the highest 16 bits should be always this number.
> + */
> +#define ZX2967_WDT_WRITEKEY  (0x1234 << 16)
> +#define ZX2967_WDT_VAL_MASK  GENMASK(15, 0)
> +
> +#define ZX2967_WDT_DIV_DEFAULT   16
> +#define ZX2967_WDT_DEFAULT_TIMEOUT   32
> +#define ZX2967_WDT_MIN_TIMEOUT   1
> +#define ZX2967_WDT_MAX_TIMEOUT   524
> +#define ZX2967_WDT_MAX_COUNT 0x
> +
> +#define ZX2967_WDT_CLK_FREQ  0x8000
> +
> +#define ZX2967_WDT_FLAG_REBOOT_MON   BIT(0)
> +
> +struct zx2967_wdt {
> + struct watchdog_device  wdt_device;
> + void __iomem*reg_base;
> + struct clk  *clock;
> +};
> +
> +static inline u32 zx2967_wdt_readl(struct zx2967_wdt *wdt, u16 reg)
> +{
> + return readl_relaxed(wdt->reg_base + reg);
> +}
> +
> +static inline void zx2967_wdt_writel(struct zx2967_wdt *wdt, u16 reg, u32 
> val)
> +{
> + writel_relaxed(val | ZX2967_WDT_WRITEKEY, wdt->reg_base + reg);
> +}
> +
> +static void zx2967_wdt_refresh(struct zx2967_wdt *wdt)
> +{
> + u32 val;
> +
> + val = zx2967_wdt_readl(wdt, ZX2967_WDT_REFRESH_REG);
> + /*
> +  * Bit 4-5, 1 and 2: refresh config info
> +  * Bit 2-3, 1 and 2: refresh counter
> +  * Bit 0-1, 1 and 2: refresh int-value
> +  * we shift each group value between 1 and 2 to refresh all data.
> +  */
> + val ^= ZX2967_WDT_REFRESH_MASK;
> + zx2967_wdt_writel(wdt, ZX2967_WDT_REFRESH_REG,
> +   val & ZX2967_WDT_VAL_MASK);
> +}
> +
> +static int
> +zx2967_wdt_set_timeout(struct watchdog_device *wdd, unsigned int timeout)
> +{
> + struct zx2967_wdt *wdt = watchdog_get_drvdata(wdd);
> + unsigned int divisor = ZX2967_WDT_DIV_DEFAULT;
> + u32 count;
> +
> + count = timeout * ZX2967_WDT_CLK_FREQ;
> + if (count > divisor * ZX2967_WDT_MAX_COUNT)
> + divisor = DIV_ROUND_UP(count, ZX2967_WDT_MAX_COUNT);
> + count = DIV_ROUND_UP(count, divisor);
> + zx2967_wdt_writel(wdt, ZX2967_WDT_CFG_REG,
> + ZX2967_WDT_CFG_DIV(divisor) & ZX2967_WDT_VAL_MASK);
> + zx2967_wdt_writel(wdt, ZX2967_WDT_LOAD_REG,
> +

[PATCH 1/4] staging:r8188eu: replace rx_end member of recv_frame with pkt->end

2017-02-06 Thread Ivan Safonov
rx_end is duplication of pkt->end pointer.
pkt->end is preferred, because it is native skb field
supported by skb_*() functions.

Signed-off-by: Ivan Safonov 
---
 drivers/staging/rtl8188eu/include/rtw_recv.h | 3 +--
 drivers/staging/rtl8188eu/os_dep/usb_ops_linux.c | 1 -
 2 files changed, 1 insertion(+), 3 deletions(-)

diff --git a/drivers/staging/rtl8188eu/include/rtw_recv.h 
b/drivers/staging/rtl8188eu/include/rtw_recv.h
index 591dd9d..2e5caa4 100644
--- a/drivers/staging/rtl8188eu/include/rtw_recv.h
+++ b/drivers/staging/rtl8188eu/include/rtw_recv.h
@@ -231,7 +231,6 @@ struct recv_frame {
uint  len;
u8 *rx_data;
u8 *rx_tail;
-   u8 *rx_end;
struct sta_info *psta;
/* for A-MPDU Rx reordering buffer control */
struct recv_reorder_ctrl *preorder_ctrl;
@@ -282,7 +281,7 @@ static inline u8 *recvframe_put(struct recv_frame 
*precvframe, int sz)
 
precvframe->rx_tail += sz;
 
-   if (precvframe->rx_tail > precvframe->rx_end) {
+   if (precvframe->rx_tail > precvframe->pkt->end) {
precvframe->rx_tail -= sz;
return NULL;
}
diff --git a/drivers/staging/rtl8188eu/os_dep/usb_ops_linux.c 
b/drivers/staging/rtl8188eu/os_dep/usb_ops_linux.c
index fd5cb8a..cdb2fd3 100644
--- a/drivers/staging/rtl8188eu/os_dep/usb_ops_linux.c
+++ b/drivers/staging/rtl8188eu/os_dep/usb_ops_linux.c
@@ -125,7 +125,6 @@ static int recvbuf2recvframe(struct adapter *adapt, struct 
sk_buff *pskb)
if (pkt_copy) {
pkt_copy->dev = adapt->pnetdev;
precvframe->pkt = pkt_copy;
-   precvframe->rx_end = pkt_copy->data + alloc_sz;
skb_reserve(pkt_copy, 8 - ((size_t)(pkt_copy->data) & 
7));/* force pkt_copy->data at 8-byte alignment address */
skb_reserve(pkt_copy, shift_sz);/* force ip_hdr at 
8-byte alignment address according to shift_sz. */
memcpy(pkt_copy->data, (pbuf + pattrib->drvinfo_sz + 
RXDESC_SIZE), skb_len);
-- 
2.10.2



[PATCH 4/4] staging:r8188eu: replace recv_frame->rx_(data|len|tail) with pkt->(data|len|tail) and remove unused recvframe_(put|pull|pull_tail)()

2017-02-06 Thread Ivan Safonov
recv_frame->rx_(data|len|tail) duplicate pkt (skb) data|len|tail members
and require special functions recvframe_(put|pull|pull_tail)()
instead of skb_(put|pull|trim).
Replace rx_(data|len|tail) with pkt->(data|len|tail),
remove rx_(data|len|tail) and remove recvframe_(put|pull|pull_tail)().

Signed-off-by: Ivan Safonov 
---
 drivers/staging/rtl8188eu/core/rtw_mlme_ext.c| 52 +++---
 drivers/staging/rtl8188eu/core/rtw_recv.c| 92 
 drivers/staging/rtl8188eu/core/rtw_security.c| 12 ++--
 drivers/staging/rtl8188eu/hal/rtl8188e_rxdesc.c  |  2 +-
 drivers/staging/rtl8188eu/include/rtw_recv.h | 62 
 drivers/staging/rtl8188eu/os_dep/mon.c   |  4 +-
 drivers/staging/rtl8188eu/os_dep/recv_linux.c|  6 --
 drivers/staging/rtl8188eu/os_dep/usb_ops_linux.c | 12 ++--
 8 files changed, 83 insertions(+), 159 deletions(-)

diff --git a/drivers/staging/rtl8188eu/core/rtw_mlme_ext.c 
b/drivers/staging/rtl8188eu/core/rtw_mlme_ext.c
index 2933479..f45af40 100644
--- a/drivers/staging/rtl8188eu/core/rtw_mlme_ext.c
+++ b/drivers/staging/rtl8188eu/core/rtw_mlme_ext.c
@@ -2053,8 +2053,8 @@ static u8 collect_bss_info(struct adapter *padapter,
u32 len;
u8 *p;
u16 val16, subtype;
-   u8 *pframe = precv_frame->rx_data;
-   u32 packet_len = precv_frame->len;
+   u8 *pframe = precv_frame->pkt->data;
+   u32 packet_len = precv_frame->pkt->len;
u8 ie_offset;
struct registry_priv*pregistrypriv = &padapter->registrypriv;
struct mlme_ext_priv*pmlmeext = &padapter->mlmeextpriv;
@@ -2563,8 +2563,8 @@ static unsigned int OnProbeReq(struct adapter *padapter,
struct mlme_ext_priv *pmlmeext = &padapter->mlmeextpriv;
struct mlme_ext_info*pmlmeinfo = &(pmlmeext->mlmext_info);
struct wlan_bssid_ex *cur = &(pmlmeinfo->network);
-   u8 *pframe = precv_frame->rx_data;
-   uint len = precv_frame->len;
+   u8 *pframe = precv_frame->pkt->data;
+   uint len = precv_frame->pkt->len;
 
if (check_fwstate(pmlmepriv, WIFI_STATION_STATE))
return _SUCCESS;
@@ -2611,8 +2611,8 @@ static unsigned int OnBeacon(struct adapter *padapter,
struct mlme_ext_info*pmlmeinfo = &(pmlmeext->mlmext_info);
struct mlme_priv *pmlmepriv = &padapter->mlmepriv;
struct sta_priv *pstapriv = &padapter->stapriv;
-   u8 *pframe = precv_frame->rx_data;
-   uint len = precv_frame->len;
+   u8 *pframe = precv_frame->pkt->data;
+   uint len = precv_frame->pkt->len;
struct wlan_bssid_ex *pbss;
int ret = _SUCCESS;
struct wlan_bssid_ex *pnetwork = &(pmlmeinfo->network);
@@ -2708,8 +2708,8 @@ static unsigned int OnAuth(struct adapter *padapter,
struct security_priv *psecuritypriv = &padapter->securitypriv;
struct mlme_ext_priv*pmlmeext = &padapter->mlmeextpriv;
struct mlme_ext_info*pmlmeinfo = &(pmlmeext->mlmext_info);
-   u8 *pframe = precv_frame->rx_data;
-   uint len = precv_frame->len;
+   u8 *pframe = precv_frame->pkt->data;
+   uint len = precv_frame->pkt->len;
 
if ((pmlmeinfo->state&0x03) != WIFI_FW_AP_STATE)
return _FAIL;
@@ -2871,8 +2871,8 @@ static unsigned int OnAuthClient(struct adapter *padapter,
unsigned intgo2asoc = 0;
struct mlme_ext_priv*pmlmeext = &padapter->mlmeextpriv;
struct mlme_ext_info*pmlmeinfo = &(pmlmeext->mlmext_info);
-   u8 *pframe = precv_frame->rx_data;
-   uint pkt_len = precv_frame->len;
+   u8 *pframe = precv_frame->pkt->data;
+   uint pkt_len = precv_frame->pkt->len;
 
DBG_88E("%s\n", __func__);
 
@@ -2959,8 +2959,8 @@ static unsigned int OnAssocReq(struct adapter *padapter,
struct mlme_ext_info*pmlmeinfo = &(pmlmeext->mlmext_info);
struct wlan_bssid_ex *cur = &(pmlmeinfo->network);
struct sta_priv *pstapriv = &padapter->stapriv;
-   u8 *pframe = precv_frame->rx_data;
-   uint pkt_len = precv_frame->len;
+   u8 *pframe = precv_frame->pkt->data;
+   uint pkt_len = precv_frame->pkt->len;
 
if ((pmlmeinfo->state&0x03) != WIFI_FW_AP_STATE)
return _FAIL;
@@ -3391,8 +3391,8 @@ static unsigned int OnAssocRsp(struct adapter *padapter,
struct mlme_ext_priv*pmlmeext = &padapter->mlmeextpriv;
struct mlme_ext_info*pmlmeinfo = &(pmlmeext->mlmext_info);
/* struct wlan_bssid_ex *cur_network = &(pmlmeinfo->network); */
-   u8 *pframe = precv_frame->rx_data;
-   uint pkt_len = precv_frame->len;
+   u8 *pframe = precv_frame->pkt->data;
+   uint pkt_len = precv_frame->pkt->len;
 
DBG_88E("%s\n", __func__);
 
@@ -3476,7 +3476,7 @@ static unsigned int OnDeAuth(struct adapter *padapter,
struct mlme_priv *pmlmepriv = &padapter->mlmepriv;
struct mlme_ext_priv*pmlmeext = &padapter->mlmeextpriv;
struct mlme_ext_info

[PATCH 3/4] staging:r8188eu: update pkt->(data|tail|len) synchronously with rx_(data|tail|len) in recv_frame structure

2017-02-06 Thread Ivan Safonov
Original driver code uses rx_* members to store skb (pkt) fields
(instead of pkt->* members), pkt->* updated only after data
completely formed, not in process.

Update pkt->* after data buffer changed (with rx_*).

Signed-off-by: Ivan Safonov 
---
 drivers/staging/rtl8188eu/include/rtw_recv.h | 5 -
 1 file changed, 4 insertions(+), 1 deletion(-)

diff --git a/drivers/staging/rtl8188eu/include/rtw_recv.h 
b/drivers/staging/rtl8188eu/include/rtw_recv.h
index 2e5caa4..d4d8a74 100644
--- a/drivers/staging/rtl8188eu/include/rtw_recv.h
+++ b/drivers/staging/rtl8188eu/include/rtw_recv.h
@@ -261,6 +261,8 @@ static inline u8 *recvframe_pull(struct recv_frame 
*precvframe, int sz)
 
if (precvframe == NULL)
return NULL;
+
+   skb_pull(precvframe->pkt, sz);
precvframe->rx_data += sz;
if (precvframe->rx_data > precvframe->rx_tail) {
precvframe->rx_data -= sz;
@@ -278,7 +280,7 @@ static inline u8 *recvframe_put(struct recv_frame 
*precvframe, int sz)
 
if (precvframe == NULL)
return NULL;
-
+   skb_put(precvframe->pkt, sz);
precvframe->rx_tail += sz;
 
if (precvframe->rx_tail > precvframe->pkt->end) {
@@ -299,6 +301,7 @@ static inline u8 *recvframe_pull_tail(struct recv_frame 
*precvframe, int sz)
 
if (precvframe == NULL)
return NULL;
+   skb_trim(precvframe->pkt, precvframe->pkt->len - sz);
precvframe->rx_tail -= sz;
if (precvframe->rx_tail < precvframe->rx_data) {
precvframe->rx_tail += sz;
-- 
2.10.2



Re: [PATCH] perf/x86/intel/pt: Allow disabling branch tracing

2017-02-06 Thread Andi Kleen
On Mon, Feb 06, 2017 at 06:05:29PM +0200, Alexander Shishkin wrote:
> Andi Kleen  writes:
> 
> > Alexander Shishkin  writes:
> >
> >> Now that Intel PT supports more types of trace content than just branch
> >> tracing, it may be useful to allow the user to disable branch tracing
> >> when it is not needed.
> >>
> >> The special case is BDW, where not setting BranchEn is not supported.
> >>
> >> This is slightly trickier than necessary, because up to this moment
> >> the driver has been setting BranchEn automatically and the userspace
> >> assumes as much. Instead of reversing the semantics of BranchEn, we
> >> introduce a 'passthrough' bit, which will forego the default and allow
> >> the user to set BranchEn to their heart's content.
> >
> > cpu/passthrough=1,branchen=1/ seems far uglier/more complicanted to me
> > than the original cpu/nobranch=1/
> 
> It's /passthrough=1,branch=0/ or simply /passthrough=1/.

Ok, but still you have to list exactly to which flags passthrough
applies to, and it will only ever be branchen.

So basically you turned nobranch=1 into two more difficult to
explain flags without any future advantage.

That is why nobranch=1 is better. It is far easier to explain
and logical to the user.

-Andi


Re: [PATCH 3/5] drm: convert drivers to use of_graph_get_remote_node

2017-02-06 Thread Rob Herring
On Mon, Feb 06, 2017 at 10:29:33AM +, Liviu Dudau wrote:
> On Fri, Feb 03, 2017 at 09:36:33PM -0600, Rob Herring wrote:
> > Convert drivers to use the new of_graph_get_remote_node() helper
> > instead of parsing the endpoint node and then getting the remote device
> > node. Now drivers can just specify the device node and which
> > port/endpoint and get back the connected remote device node. The details
> > of the graph binding are nicely abstracted into the core OF graph code.
> > 
> > This changes some error messages to debug messages (in the graph core).
> > Graph connections are often "no connects" depending on the particular
> > board, so we want to avoid spurious messages. Plus the kernel is not a
> > DT validator.
> > 
> > Signed-off-by: Rob Herring 
> > ---
> >  drivers/gpu/drm/arm/hdlcd_drv.c | 22 ++---
> >  drivers/gpu/drm/arm/malidp_drv.c| 29 ++-
> >  drivers/gpu/drm/bridge/adv7511/adv7533.c| 12 +
> >  drivers/gpu/drm/bridge/dumb-vga-dac.c   | 15 ++
> >  drivers/gpu/drm/bridge/ti-tfp410.c  | 15 ++
> >  drivers/gpu/drm/exynos/exynos_drm_dpi.c | 16 +-
> >  drivers/gpu/drm/exynos/exynos_drm_dsi.c | 13 ++---
> >  drivers/gpu/drm/exynos/exynos_drm_mic.c | 27 +-
> >  drivers/gpu/drm/hisilicon/kirin/dw_drm_dsi.c| 26 ++
> >  drivers/gpu/drm/hisilicon/kirin/kirin_drm_drv.c | 30 +--
> >  drivers/gpu/drm/mediatek/mtk_dpi.c  | 12 ++---
> >  drivers/gpu/drm/mediatek/mtk_hdmi.c | 26 ++
> >  drivers/gpu/drm/meson/meson_drv.c   | 12 ++---
> >  drivers/gpu/drm/meson/meson_venc_cvbs.c | 19 ++-
> >  drivers/gpu/drm/msm/dsi/dsi_host.c  |  3 +-
> >  drivers/gpu/drm/msm/mdp/mdp4/mdp4_kms.c | 28 +--
> >  drivers/gpu/drm/rockchip/rockchip_drm_drv.c | 18 +++
> >  drivers/gpu/drm/tilcdc/tilcdc_crtc.c| 11 +
> >  drivers/gpu/drm/tilcdc/tilcdc_external.c| 66 
> > +++--
> >  drivers/gpu/drm/vc4/vc4_dpi.c   | 15 ++
> >  20 files changed, 64 insertions(+), 351 deletions(-)
> > 
> > diff --git a/drivers/gpu/drm/arm/hdlcd_drv.c 
> > b/drivers/gpu/drm/arm/hdlcd_drv.c
> > index e5f4f4a6546d..0f70f5fe9970 100644
> > --- a/drivers/gpu/drm/arm/hdlcd_drv.c
> > +++ b/drivers/gpu/drm/arm/hdlcd_drv.c
> > @@ -430,29 +430,13 @@ static int compare_dev(struct device *dev, void *data)
> >  
> >  static int hdlcd_probe(struct platform_device *pdev)
> >  {
> > -   struct device_node *port, *ep;
> > +   struct device_node *port;
> > struct component_match *match = NULL;
> >  
> > -   if (!pdev->dev.of_node)
> > -   return -ENODEV;
> > -
> > /* there is only one output port inside each device, find it */
> > -   ep = of_graph_get_next_endpoint(pdev->dev.of_node, NULL);
> > -   if (!ep)
> > -   return -ENODEV;
> > -
> > -   if (!of_device_is_available(ep)) {
> > -   of_node_put(ep);
> > +   port = of_graph_get_remote_node(pdev->dev.of_node, 0, 0);
> > +   if (!port)
> > return -ENODEV;
> > -   }
> > -
> > -   /* add the remote encoder port as component */
> > -   port = of_graph_get_remote_port_parent(ep);
> > -   of_node_put(ep);
> > -   if (!port || !of_device_is_available(port)) {
> > -   of_node_put(port);
> > -   return -EAGAIN;
> 
> The HDLCD change looks reasonable except for this -EAGAIN business. I'll have 
> to
> test your changes on my setup to see how this affects having the encoder as a 
> module.

What are you expecting to happen with -EAGAIN? This one was a bit of an 
oddball. 

This condition would only change if you had an overlay. That's a use 
case that needs to be handled in a common way ('cause I don't want to 
clean-up every driver doing overlays in their own way latter). Just 
having "status" changing at runtime would have all sorts of implications 
in the kernel.

> 
> > -   }
> >  
> > drm_of_component_match_add(&pdev->dev, &match, compare_dev, port);
> > of_node_put(port);
> > diff --git a/drivers/gpu/drm/arm/malidp_drv.c 
> > b/drivers/gpu/drm/arm/malidp_drv.c
> > index 32f746e31379..bfa04be7f5de 100644
> > --- a/drivers/gpu/drm/arm/malidp_drv.c
> > +++ b/drivers/gpu/drm/arm/malidp_drv.c
> > @@ -262,7 +262,6 @@ static int malidp_bind(struct device *dev)
> >  {
> > struct resource *res;
> > struct drm_device *drm;
> > -   struct device_node *ep;
> > struct malidp_drm *malidp;
> > struct malidp_hw_device *hwdev;
> > struct platform_device *pdev = to_platform_device(dev);
> > @@ -360,12 +359,7 @@ static int malidp_bind(struct device *dev)
> > goto init_fail;
> >  
> > /* Set the CRTC's port so that the encoder component can find it */
> > -   ep = of_graph_get_next_endpoint(dev->of_node, NULL);
> > -   if (!ep) {
> > -   ret = -EINVAL;
> > -   goto port_fail;
> > -   }
> > -   malidp->crtc.port = of_get_next_par

[PATCH] mm/autonuma: don't use set_pte_at when updating protnone ptes

2017-02-06 Thread Aneesh Kumar K.V
Architectures like ppc64, use privilege access bit to mark pte non accessible.
This implies that kernel can do a copy_to_user to an address marked for numa 
fault.
This also implies that there can be a parallel hardware update for the pte.
set_pte_at cannot be used in such scenarios. Hence switch the pte
update to use ptep_get_and_clear and set_pte_at combination.

Signed-off-by: Aneesh Kumar K.V 
---
 arch/powerpc/mm/pgtable.c |  7 +--
 mm/memory.c   | 18 +-
 2 files changed, 10 insertions(+), 15 deletions(-)

diff --git a/arch/powerpc/mm/pgtable.c b/arch/powerpc/mm/pgtable.c
index cb39c8bd2436..b8ac81a16389 100644
--- a/arch/powerpc/mm/pgtable.c
+++ b/arch/powerpc/mm/pgtable.c
@@ -186,12 +186,7 @@ static pte_t set_access_flags_filter(pte_t pte, struct 
vm_area_struct *vma,
 void set_pte_at(struct mm_struct *mm, unsigned long addr, pte_t *ptep,
pte_t pte)
 {
-   /*
-* When handling numa faults, we already have the pte marked
-* _PAGE_PRESENT, but we can be sure that it is not in hpte.
-* Hence we can use set_pte_at for them.
-*/
-   VM_WARN_ON(pte_present(*ptep) && !pte_protnone(*ptep));
+   VM_WARN_ON(pte_present(*ptep));
 
/*
 * Add the pte bit when tryint set a pte
diff --git a/mm/memory.c b/mm/memory.c
index 6bf2b471e30c..e78bf72f30dd 100644
--- a/mm/memory.c
+++ b/mm/memory.c
@@ -3387,32 +3387,32 @@ static int do_numa_page(struct vm_fault *vmf)
int last_cpupid;
int target_nid;
bool migrated = false;
-   pte_t pte = vmf->orig_pte;
-   bool was_writable = pte_write(pte);
+   pte_t pte;
+   bool was_writable = pte_write(vmf->orig_pte);
int flags = 0;
 
/*
* The "pte" at this point cannot be used safely without
* validation through pte_unmap_same(). It's of NUMA type but
* the pfn may be screwed if the read is non atomic.
-   *
-   * We can safely just do a "set_pte_at()", because the old
-   * page table entry is not accessible, so there would be no
-   * concurrent hardware modifications to the PTE.
*/
vmf->ptl = pte_lockptr(vma->vm_mm, vmf->pmd);
spin_lock(vmf->ptl);
-   if (unlikely(!pte_same(*vmf->pte, pte))) {
+   if (unlikely(!pte_same(*vmf->pte, vmf->orig_pte))) {
pte_unmap_unlock(vmf->pte, vmf->ptl);
goto out;
}
 
-   /* Make it present again */
+   /*
+* Make it present again, Depending on how arch implementes non
+* accessible ptes, some can allow access by kernel mode.
+*/
+   pte = ptep_modify_prot_start(vma->vm_mm, vmf->address, vmf->pte);
pte = pte_modify(pte, vma->vm_page_prot);
pte = pte_mkyoung(pte);
if (was_writable)
pte = pte_mkwrite(pte);
-   set_pte_at(vma->vm_mm, vmf->address, vmf->pte, pte);
+   ptep_modify_prot_commit(vma->vm_mm, vmf->address, vmf->pte, pte);
update_mmu_cache(vma, vmf->address, vmf->pte);
 
page = vm_normal_page(vma, vmf->address, pte);
-- 
2.7.4



Re: [PATCH] [net-next?] hns: avoid stack overflow with CONFIG_KASAN

2017-02-06 Thread David Miller
From: Arnd Bergmann 
Date: Fri,  3 Feb 2017 17:35:46 +0100

> The use of ACCESS_ONCE() looks like a micro-optimization to force gcc to use
> an indexed load for the register address, but it has an absolutely detrimental
> effect on builds with gcc-5 and CONFIG_KASAN=y, leading to a very likely
> kernel stack overflow aside from very complex object code:
 ...
> This does not seem to happen any more with gcc-7, but removing the ACCESS_ONCE
> seems safe anyway and it avoids a serious issue for some people. I have 
> verified
> that with gcc-5.3.1, the object code we get is better in the new version
> both with and without CONFIG_KASAN, as we no longer allocate a 1344 byte
> stack frame for hns_dsaf_get_regs() but otherwise have practically identical
> object code.
> 
> With gcc-7.0.0, removing ACCESS_ONCE has no effect, the object code is already
> good either way.
> 
> This patch is probably not urgent to get into 4.11 as only KASAN=y builds
> with certain compilers are affected, but I still think it makes sense to
> backport into older kernels.
> 
> Cc: sta...@vger.kernel.org
> Fixes: 511e6bc ("net: add Hisilicon Network Subsystem DSAF support")
> Signed-off-by: Arnd Bergmann 

This is really terrible for the compiler to do, but what can we do about it.

I'll apply this to 'net' and queue it up for -stable, thanks.


[PATCH v3 4/9] xen/pvh: Bootstrap PVH guest

2017-02-06 Thread Boris Ostrovsky
Start PVH guest at XEN_ELFNOTE_PHYS32_ENTRY address. Setup hypercall
page, initialize boot_params, enable early page tables.

Since this stub is executed before kernel entry point we cannot use
variables in .bss which is cleared by kernel. We explicitly place
variables that are initialized here into .data.

While adjusting xen_hvm_init_shared_info() make it use cpuid_e?x()
instead of cpuid() (wherever possible).

Signed-off-by: Boris Ostrovsky 
---
Changes in v3:
* Fix gdt definition (correct base address and use GDT_ENTRY()).
* Increase stack size to 256 bytes.
* Move both gdt and early stack to .init.data section.
* Add comment describing register state at entry time.
* Add comment explaining which PVH-related variables should be
  .data as opposed to .bss.
* Adjust memmap.nr_entries test.
* Move xen_hvm_init_shared_info() updates from patch 5 here.
* Style changes.


 arch/x86/xen/Kconfig |   2 +-
 arch/x86/xen/Makefile|   1 +
 arch/x86/xen/enlighten.c | 124 +---
 arch/x86/xen/xen-pvh.S   | 161 +++
 include/xen/xen.h|   5 ++
 5 files changed, 282 insertions(+), 11 deletions(-)
 create mode 100644 arch/x86/xen/xen-pvh.S

diff --git a/arch/x86/xen/Kconfig b/arch/x86/xen/Kconfig
index c7b15f3..76b6dbd 100644
--- a/arch/x86/xen/Kconfig
+++ b/arch/x86/xen/Kconfig
@@ -53,5 +53,5 @@ config XEN_DEBUG_FS
 
 config XEN_PVH
bool "Support for running as a PVH guest"
-   depends on X86_64 && XEN && XEN_PVHVM
+   depends on XEN && XEN_PVHVM && ACPI
def_bool n
diff --git a/arch/x86/xen/Makefile b/arch/x86/xen/Makefile
index e47e527..cb0164a 100644
--- a/arch/x86/xen/Makefile
+++ b/arch/x86/xen/Makefile
@@ -23,3 +23,4 @@ obj-$(CONFIG_XEN_DEBUG_FS)+= debugfs.o
 obj-$(CONFIG_XEN_DOM0) += vga.o
 obj-$(CONFIG_SWIOTLB_XEN)  += pci-swiotlb-xen.o
 obj-$(CONFIG_XEN_EFI)  += efi.o
+obj-$(CONFIG_XEN_PVH)  += xen-pvh.o
diff --git a/arch/x86/xen/enlighten.c b/arch/x86/xen/enlighten.c
index 828f1b2..d2144f7 100644
--- a/arch/x86/xen/enlighten.c
+++ b/arch/x86/xen/enlighten.c
@@ -45,6 +45,7 @@
 #include 
 #include 
 #include 
+#include 
 #include 
 #include 
 #include 
@@ -176,6 +177,20 @@ struct tls_descs {
  */
 static DEFINE_PER_CPU(struct tls_descs, shadow_tls_desc);
 
+#ifdef CONFIG_XEN_PVH
+/*
+ * PVH variables.
+ *
+ * xen_pvh and pvh_bootparams need to live in data segment since they
+ * are used after startup_{32|64}, which clear .bss, are invoked.
+ */
+bool xen_pvh __attribute__((section(".data"))) = 0;
+struct boot_params pvh_bootparams __attribute__((section(".data")));
+
+struct hvm_start_info pvh_start_info;
+unsigned int pvh_start_info_sz = sizeof(pvh_start_info);
+#endif
+
 static void clamp_max_cpus(void)
 {
 #ifdef CONFIG_SMP
@@ -1656,6 +1671,90 @@ asmlinkage __visible void __init xen_start_kernel(void)
 #endif
 }
 
+#ifdef CONFIG_XEN_PVH
+static void __init init_pvh_bootparams(void)
+{
+   struct xen_memory_map memmap;
+   unsigned int i;
+   int rc;
+
+   memset(&pvh_bootparams, 0, sizeof(pvh_bootparams));
+
+   memmap.nr_entries = ARRAY_SIZE(pvh_bootparams.e820_map);
+   set_xen_guest_handle(memmap.buffer, pvh_bootparams.e820_map);
+   rc = HYPERVISOR_memory_op(XENMEM_memory_map, &memmap);
+   if (rc) {
+   xen_raw_printk("XENMEM_memory_map failed (%d)\n", rc);
+   BUG();
+   }
+
+   if (memmap.nr_entries < E820MAX - 1) {
+   pvh_bootparams.e820_map[memmap.nr_entries].addr =
+   ISA_START_ADDRESS;
+   pvh_bootparams.e820_map[memmap.nr_entries].size =
+   ISA_END_ADDRESS - ISA_START_ADDRESS;
+   pvh_bootparams.e820_map[memmap.nr_entries].type =
+   E820_RESERVED;
+   memmap.nr_entries++;
+   } else
+   xen_raw_printk("Warning: Can fit ISA range into e820\n");
+
+   sanitize_e820_map(pvh_bootparams.e820_map,
+ ARRAY_SIZE(pvh_bootparams.e820_map),
+ &memmap.nr_entries);
+
+   pvh_bootparams.e820_entries = memmap.nr_entries;
+   for (i = 0; i < pvh_bootparams.e820_entries; i++)
+   e820_add_region(pvh_bootparams.e820_map[i].addr,
+   pvh_bootparams.e820_map[i].size,
+   pvh_bootparams.e820_map[i].type);
+
+   pvh_bootparams.hdr.cmd_line_ptr =
+   pvh_start_info.cmdline_paddr;
+
+   /* The first module is always ramdisk. */
+   if (pvh_start_info.nr_modules) {
+   struct hvm_modlist_entry *modaddr =
+   __va(pvh_start_info.modlist_paddr);
+   pvh_bootparams.hdr.ramdisk_image = modaddr->paddr;
+   pvh_bootparams.hdr.ramdisk_size = modaddr->size;
+   }
+
+   /*
+* See Documentation/x86/boot.txt.
+*
+* Version 2.12 supports Xen entry point but we will us

Re: [PATCH 1/6] genirq: allow assigning affinity to present but not online CPUs

2017-02-06 Thread Christoph Hellwig
On Mon, Feb 06, 2017 at 12:03:05PM -0500, Keith Busch wrote:
> Can we use the online CPUs and create a new hot-cpu notifier to the nvme
> driver to free/reallocate as needed? We were doing that before blk-mq. Now
> blk-mq can change the number hardware contexts on a live queue, so we
> can reintroduce that behavior to nvme and only allocate what we need.

That could be a next step, but given how badly I keep messing up core
IRQ bits we'll probably want to wait for two rewrites of those bits
from Thomas before moving on :)


[PATCH v3 0/9] PVH v2 support (domU)

2017-02-06 Thread Boris Ostrovsky
PVHv2 support for unprivileged guests.

Changes in v3:
* See patches 4 and 5

Boris Ostrovsky (9):
  x86/boot/32: Convert the 32-bit pgtable setup code from assembly to C
  xen/x86: Remove PVH support
  xen/pvh: Import PVH-related Xen public interfaces
  xen/pvh: Bootstrap PVH guest
  xen/pvh: Make sure we don't use ACPI_IRQ_MODEL_PIC for SCI
  xen/pvh: Initialize grant table for PVH guests
  xen/pvh: PVH guests always have PV devices
  xen/pvh: Enable CPU hotplug
  xen/pvh: Use Xen's emergency_restart op for PVH guests

 arch/x86/include/asm/pgtable_32.h  |  32 
 arch/x86/kernel/head32.c   |  62 
 arch/x86/kernel/head_32.S  | 121 +-
 arch/x86/xen/Kconfig   |   2 +-
 arch/x86/xen/Makefile  |   1 +
 arch/x86/xen/enlighten.c   | 279 +
 arch/x86/xen/mmu.c |  21 +--
 arch/x86/xen/platform-pci-unplug.c |   4 +-
 arch/x86/xen/setup.c   |  37 +
 arch/x86/xen/smp.c |  78 -
 arch/x86/xen/smp.h |   8 -
 arch/x86/xen/xen-head.S|  62 +---
 arch/x86/xen/xen-ops.h |   1 -
 arch/x86/xen/xen-pvh.S | 161 +++
 drivers/xen/cpu_hotplug.c  |   2 +-
 drivers/xen/events/events_base.c   |   1 -
 drivers/xen/grant-table.c  |   8 +-
 include/xen/interface/elfnote.h|  12 +-
 include/xen/interface/hvm/hvm_vcpu.h   | 143 +
 include/xen/interface/hvm/start_info.h |  98 
 include/xen/xen.h  |  12 +-
 21 files changed, 708 insertions(+), 437 deletions(-)
 create mode 100644 arch/x86/xen/xen-pvh.S
 create mode 100644 include/xen/interface/hvm/hvm_vcpu.h
 create mode 100644 include/xen/interface/hvm/start_info.h

-- 
2.7.4



Re: [PATCHv7 4/8] printk: always use deferred printk when flush printk_safe lines

2017-02-06 Thread Steven Rostedt
On Tue, 7 Feb 2017 01:42:53 +0900
Sergey Senozhatsky  wrote:

> On (02/06/17 08:27), Steven Rostedt wrote:
> [..]
> > > > > just in case, the patch (which I prefer to be ignored)
> > > > 
> > > > let's keep printk_safe_flush_line().
> > > 
> > > I do not have strong opinion but I would slightly prefer
> > > to keep the helper function. The use of printk_deferred()
> > > is a bit tricky and it is better to have only one copy.
> > > 
> > > Steven, could you live with the original patch, please?  
> > 
> > Sure, but make it into a static inline.  
> 
> well, I'll be surprised if gcc doesn't inline that simple one-liner.

I never trust gcc ;-)  It's been known to uninline simple one liners
before that have been marked as "inline" :-p

> 
> attached. no conflicts, the patch can replace 0004.
> 
> ===8<===8<===
> 
> >From 69bbb0f436a2a89ec41a5831c03490e0a78ce12e Mon Sep 17 00:00:00 2001  
> From: Sergey Senozhatsky 
> Date: Tue, 27 Dec 2016 23:16:07 +0900
> Subject: [PATCH] printk: always use deferred printk when flush printk_safe
>  lines
> 
> Always use printk_deferred() in printk_safe_flush_line().
> Flushing can be done from NMI or printk_safe contexts (when
> we are in panic), so we can't call console drivers, yet still
> want to store the messages in the logbuf buffer. Therefore we
> use a deferred printk version.
> 
> Signed-off-by: Sergey Senozhatsky 
> Suggested-by: Petr Mladek 
> Signed-off-by: Petr Mladek 

Reviewed-by: Steven Rostedt (VMware) 

-- Steve

> ---
>  kernel/printk/printk_safe.c | 14 ++
>  1 file changed, 6 insertions(+), 8 deletions(-)
> 
> diff --git a/kernel/printk/printk_safe.c b/kernel/printk/printk_safe.c
> index efc89a4e9df5..5214d326d3ba 100644
> --- a/kernel/printk/printk_safe.c
> +++ b/kernel/printk/printk_safe.c
> @@ -110,17 +110,15 @@ static int printk_safe_log_store(struct 
> printk_safe_seq_buf *s,
>   return add;
>  }
>  
> -static void printk_safe_flush_line(const char *text, int len)
> +static inline void printk_safe_flush_line(const char *text, int len)
>  {
>   /*
> -  * The buffers are flushed in NMI only on panic.  The messages must
> -  * go only into the ring buffer at this stage.  Consoles will get
> -  * explicitly called later when a crashdump is not generated.
> +  * Avoid any console drivers calls from here, because we may be
> +  * in NMI or printk_safe context (when in panic). The messages
> +  * must go only into the ring buffer at this stage.  Consoles will
> +  * get explicitly called later when a crashdump is not generated.
>*/
> - if (in_nmi())
> - printk_deferred("%.*s", len, text);
> - else
> - printk("%.*s", len, text);
> + printk_deferred("%.*s", len, text);
>  }
>  
>  /* printk part of the temporary buffer line by line */



[PATCH v3 5/9] xen/pvh: Make sure we don't use ACPI_IRQ_MODEL_PIC for SCI

2017-02-06 Thread Boris Ostrovsky
Since we are not using PIC and (at least currently) don't have IOAPIC
we want to make sure that acpi_irq_model doesn't stay set to
ACPI_IRQ_MODEL_PIC (which is the default value). If we allowed it to
stay then acpi_os_install_interrupt_handler() would try (and fail) to
request_irq() for PIC.

Instead we set the model to ACPI_IRQ_MODEL_PLATFORM which will prevent
this from happening.

Signed-off-by: Boris Ostrovsky 
---
Changes in v3:
* Moved update of acpi_irq_model to x86_init.oem.arch_setup() (i.e.
  after ACPI has been initialized so we know whether or not we have
  IOAPICs). Clarified comit message.
* Moved xen_hvm_init_shared_info() into patch 4.

 arch/x86/xen/enlighten.c | 12 
 1 file changed, 12 insertions(+)

diff --git a/arch/x86/xen/enlighten.c b/arch/x86/xen/enlighten.c
index d2144f7..6d406f3 100644
--- a/arch/x86/xen/enlighten.c
+++ b/arch/x86/xen/enlighten.c
@@ -1672,6 +1672,16 @@ asmlinkage __visible void __init xen_start_kernel(void)
 }
 
 #ifdef CONFIG_XEN_PVH
+
+static void xen_pvh_arch_setup(void)
+{
+#ifdef CONFIG_ACPI
+   /* Make sure we don't fall back to (default) ACPI_IRQ_MODEL_PIC. */
+   if (nr_ioapics == 0)
+   acpi_irq_model = ACPI_IRQ_MODEL_PLATFORM;
+#endif
+}
+
 static void __init init_pvh_bootparams(void)
 {
struct xen_memory_map memmap;
@@ -1752,6 +1762,8 @@ void __init xen_prepare_pvh(void)
wrmsr_safe(msr, (u32)pfn, (u32)(pfn >> 32));
 
init_pvh_bootparams();
+
+   x86_init.oem.arch_setup = xen_pvh_arch_setup;
 }
 #endif
 
-- 
2.7.4



Re: [RFC PATCH] perf/stat: Add --disable-hwdt

2017-02-06 Thread Borislav Petkov
On Mon, Feb 06, 2017 at 09:23:33AM -0500, Vince Weaver wrote:
> minor issue, but is it possibly to do anything about dmesg spam?  From 
> what I recall every time you enable and disable the watchdog the kernel 
> prints a message.  Makes for messy logs, especially when you run the 
> perf_fuzzer as root.

You mean this:

[81304.460656] NMI watchdog: enabled on all CPUs, permanently consumes one 
hw-PMU counter.

We could turn it onto a

pr_info_once().

I mean, the feedback that the thing has been enabled is the 1 in
/proc/sys/kernel/nmi_watchdog anyway...

-- 
Regards/Gruss,
Boris.

Good mailing practices for 400: avoid top-posting and trim the reply.


[PATCH] x86/acpi: keep x86_cpu_to_acpiid mapping valid on cpu hotplug

2017-02-06 Thread Vitaly Kuznetsov
We may or may not have all possible CPUs in MADT on boot but in any case
we're overwriting x86_cpu_to_acpiid mapping with U32_MAX when
acpi_register_lapic() is called again on the CPU hotplug path:
acpi_processor_hotadd_init() -> acpi_map_cpu() -> acpi_register_lapic().

As we have the required acpi_id information in acpi_processor_hotadd_init()
propagate it to acpi_map_cpu() to always keep x86_cpu_to_acpiid mapping
valid.

Reported-by: Andrew Jones 
Signed-off-by: Vitaly Kuznetsov 
---
 arch/ia64/kernel/acpi.c   | 3 ++-
 arch/x86/kernel/acpi/boot.c   | 5 +++--
 drivers/acpi/acpi_processor.c | 4 ++--
 include/linux/acpi.h  | 3 ++-
 4 files changed, 9 insertions(+), 6 deletions(-)

diff --git a/arch/ia64/kernel/acpi.c b/arch/ia64/kernel/acpi.c
index 9273e03..7508c30 100644
--- a/arch/ia64/kernel/acpi.c
+++ b/arch/ia64/kernel/acpi.c
@@ -887,7 +887,8 @@ static int _acpi_map_lsapic(acpi_handle handle, int physid, 
int *pcpu)
 }
 
 /* wrapper to silence section mismatch warning */
-int __ref acpi_map_cpu(acpi_handle handle, phys_cpuid_t physid, int *pcpu)
+int __ref acpi_map_cpu(acpi_handle handle, phys_cpuid_t physid, u32 acpi_id,
+  int *pcpu)
 {
return _acpi_map_lsapic(handle, physid, pcpu);
 }
diff --git a/arch/x86/kernel/acpi/boot.c b/arch/x86/kernel/acpi/boot.c
index 64422f8..04bc5f3 100644
--- a/arch/x86/kernel/acpi/boot.c
+++ b/arch/x86/kernel/acpi/boot.c
@@ -723,11 +723,12 @@ int acpi_map_cpu2node(acpi_handle handle, int cpu, int 
physid)
return 0;
 }
 
-int acpi_map_cpu(acpi_handle handle, phys_cpuid_t physid, int *pcpu)
+int acpi_map_cpu(acpi_handle handle, phys_cpuid_t physid, u32 acpi_id,
+int *pcpu)
 {
int cpu;
 
-   cpu = acpi_register_lapic(physid, U32_MAX, ACPI_MADT_ENABLED);
+   cpu = acpi_register_lapic(physid, acpi_id, ACPI_MADT_ENABLED);
if (cpu < 0) {
pr_info(PREFIX "Unable to map lapic to logical cpu number\n");
return cpu;
diff --git a/drivers/acpi/acpi_processor.c b/drivers/acpi/acpi_processor.c
index 3de3b6b..4467a80 100644
--- a/drivers/acpi/acpi_processor.c
+++ b/drivers/acpi/acpi_processor.c
@@ -165,7 +165,7 @@ static int acpi_processor_errata(void)
 
 #ifdef CONFIG_ACPI_HOTPLUG_CPU
 int __weak acpi_map_cpu(acpi_handle handle,
-   phys_cpuid_t physid, int *pcpu)
+   phys_cpuid_t physid, u32 acpi_id, int *pcpu)
 {
return -ENODEV;
 }
@@ -203,7 +203,7 @@ static int acpi_processor_hotadd_init(struct acpi_processor 
*pr)
cpu_maps_update_begin();
cpu_hotplug_begin();
 
-   ret = acpi_map_cpu(pr->handle, pr->phys_id, &pr->id);
+   ret = acpi_map_cpu(pr->handle, pr->phys_id, pr->acpi_id, &pr->id);
if (ret)
goto out;
 
diff --git a/include/linux/acpi.h b/include/linux/acpi.h
index 5b36974..6ab47e9 100644
--- a/include/linux/acpi.h
+++ b/include/linux/acpi.h
@@ -291,7 +291,8 @@ bool acpi_processor_validate_proc_id(int proc_id);
 
 #ifdef CONFIG_ACPI_HOTPLUG_CPU
 /* Arch dependent functions for cpu hotplug support */
-int acpi_map_cpu(acpi_handle handle, phys_cpuid_t physid, int *pcpu);
+int acpi_map_cpu(acpi_handle handle, phys_cpuid_t physid, u32 acpi_id,
+int *pcpu);
 int acpi_unmap_cpu(int cpu);
 int acpi_map_cpu2node(acpi_handle handle, int cpu, int physid);
 #endif /* CONFIG_ACPI_HOTPLUG_CPU */
-- 
2.9.3



Re: linux-next: build warning after merge of the tty tree

2017-02-06 Thread Ulrich Hecht
On Mon, Feb 6, 2017 at 9:50 AM, Greg KH  wrote:
> On Mon, Feb 06, 2017 at 03:16:38PM +1100, Stephen Rothwell wrote:
>> Hi Greg,
>>
>> After merging the tty tree, today's linux-next build
>> (arm_multi_v7_defconfig) produced this warning:
>>
>> drivers/tty/serial/sh-sci.c:977:12: warning: 'scif_set_rtrg' defined but not 
>> used [-Wunused-function]
>>  static int scif_set_rtrg(struct uart_port *port, int rx_trig)
>> ^
>>
>> Introduced by commit
>>
>>   a380ed461f66 ("serial: sh-sci: implement FIFO threshold register setting")
>>
>> Forgot to add a call to this new function?
>
> I think this is fixed by a patch I just took into my tree, which isn't
> in linux-next yet.  Right Ulrich?

That is correct, it's called in "[PATCH v4 2/4] serial: sh-sci:
SCIFA/B RX FIFO software timeout".

CU
Uli


[PATCH v3 6/9] xen/pvh: Initialize grant table for PVH guests

2017-02-06 Thread Boris Ostrovsky
Like PV guests, PVH does not have PCI devices and therefore cannot
use MMIO space to store grants. Instead it balloons out memory and
keeps grants there.

Signed-off-by: Boris Ostrovsky 
Reviewed-by: Juergen Gross 
---
 drivers/xen/grant-table.c | 8 
 1 file changed, 4 insertions(+), 4 deletions(-)

diff --git a/drivers/xen/grant-table.c b/drivers/xen/grant-table.c
index bb36b1e..d6786b8 100644
--- a/drivers/xen/grant-table.c
+++ b/drivers/xen/grant-table.c
@@ -1146,13 +1146,13 @@ EXPORT_SYMBOL_GPL(gnttab_init);
 
 static int __gnttab_init(void)
 {
+   if (!xen_domain())
+   return -ENODEV;
+
/* Delay grant-table initialization in the PV on HVM case */
-   if (xen_hvm_domain())
+   if (xen_hvm_domain() && !xen_pvh_domain())
return 0;
 
-   if (!xen_pv_domain())
-   return -ENODEV;
-
return gnttab_init();
 }
 /* Starts after core_initcall so that xen_pvh_gnttab_setup can be called
-- 
2.7.4



[PATCH v3 9/9] xen/pvh: Use Xen's emergency_restart op for PVH guests

2017-02-06 Thread Boris Ostrovsky
Using native_machine_emergency_restart (called during reboot) will
lead PVH guests to machine_real_restart()  where we try to use
real_mode_header which is not initialized.

Signed-off-by: Boris Ostrovsky 
Reviewed-by: Juergen Gross 
---
 arch/x86/xen/enlighten.c | 3 +++
 1 file changed, 3 insertions(+)

diff --git a/arch/x86/xen/enlighten.c b/arch/x86/xen/enlighten.c
index 6d406f3..ec1d5c4 100644
--- a/arch/x86/xen/enlighten.c
+++ b/arch/x86/xen/enlighten.c
@@ -1924,6 +1924,9 @@ static void __init xen_hvm_guest_init(void)
x86_init.irqs.intr_init = xen_init_IRQ;
xen_hvm_init_time_ops();
xen_hvm_init_mmu_ops();
+
+   if (xen_pvh_domain())
+   machine_ops.emergency_restart = xen_emergency_restart;
 #ifdef CONFIG_KEXEC_CORE
machine_ops.shutdown = xen_hvm_shutdown;
machine_ops.crash_shutdown = xen_hvm_crash_shutdown;
-- 
2.7.4



[PATCH v3 1/9] x86/boot/32: Convert the 32-bit pgtable setup code from assembly to C

2017-02-06 Thread Boris Ostrovsky
The new Xen PVH entry point requires page tables to be setup by the
kernel since it is entered with paging disabled.

Pull the common code out of head_32.S so that mk_early_pgtbl_32() can be
invoked from both the new Xen entry point and the existing startup_32()
code.

Convert resulting common code to C.

Signed-off-by: Boris Ostrovsky 
Cc: Andy Lutomirski 
Cc: Borislav Petkov 
Cc: Brian Gerst 
Cc: Denys Vlasenko 
Cc: H. Peter Anvin 
Cc: Josh Poimboeuf 
Cc: Linus Torvalds 
Cc: Peter Zijlstra 
Cc: Thomas Gleixner 
Cc: m...@codeblueprint.co.uk
Cc: xen-de...@lists.xenproject.org
Link: 
http://lkml.kernel.org/r/1481215471-9639-1-git-send-email-boris.ostrov...@oracle.com
Signed-off-by: Ingo Molnar 
---
 arch/x86/include/asm/pgtable_32.h |  32 ++
 arch/x86/kernel/head32.c  |  62 +++
 arch/x86/kernel/head_32.S | 121 +++---
 3 files changed, 101 insertions(+), 114 deletions(-)

diff --git a/arch/x86/include/asm/pgtable_32.h 
b/arch/x86/include/asm/pgtable_32.h
index b6c0b40..fbc7336 100644
--- a/arch/x86/include/asm/pgtable_32.h
+++ b/arch/x86/include/asm/pgtable_32.h
@@ -27,6 +27,7 @@ struct vm_area_struct;
 
 extern pgd_t swapper_pg_dir[1024];
 extern pgd_t initial_page_table[1024];
+extern pmd_t initial_pg_pmd[];
 
 static inline void pgtable_cache_init(void) { }
 static inline void check_pgt_cache(void) { }
@@ -75,4 +76,35 @@ do { \
 #define kern_addr_valid(kaddr) (0)
 #endif
 
+/*
+ * This is how much memory in addition to the memory covered up to
+ * and including _end we need mapped initially.
+ * We need:
+ * (KERNEL_IMAGE_SIZE/4096) / 1024 pages (worst case, non PAE)
+ * (KERNEL_IMAGE_SIZE/4096) / 512 + 4 pages (worst case for PAE)
+ *
+ * Modulo rounding, each megabyte assigned here requires a kilobyte of
+ * memory, which is currently unreclaimed.
+ *
+ * This should be a multiple of a page.
+ *
+ * KERNEL_IMAGE_SIZE should be greater than pa(_end)
+ * and small than max_low_pfn, otherwise will waste some page table entries
+ */
+#if PTRS_PER_PMD > 1
+#define PAGE_TABLE_SIZE(pages) (((pages) / PTRS_PER_PMD) + PTRS_PER_PGD)
+#else
+#define PAGE_TABLE_SIZE(pages) ((pages) / PTRS_PER_PGD)
+#endif
+
+/*
+ * Number of possible pages in the lowmem region.
+ *
+ * We shift 2 by 31 instead of 1 by 32 to the left in order to avoid a
+ * gas warning about overflowing shift count when gas has been compiled
+ * with only a host target support using a 32-bit type for internal
+ * representation.
+ */
+#define LOWMEM_PAGES 2<<31) - __PAGE_OFFSET) >> PAGE_SHIFT))
+
 #endif /* _ASM_X86_PGTABLE_32_H */
diff --git a/arch/x86/kernel/head32.c b/arch/x86/kernel/head32.c
index f16c55b..e5fb436 100644
--- a/arch/x86/kernel/head32.c
+++ b/arch/x86/kernel/head32.c
@@ -49,3 +49,65 @@ asmlinkage __visible void __init i386_start_kernel(void)
 
start_kernel();
 }
+
+/*
+ * Initialize page tables.  This creates a PDE and a set of page
+ * tables, which are located immediately beyond __brk_base.  The variable
+ * _brk_end is set up to point to the first "safe" location.
+ * Mappings are created both at virtual address 0 (identity mapping)
+ * and PAGE_OFFSET for up to _end.
+ *
+ * In PAE mode initial_page_table is statically defined to contain
+ * enough entries to cover the VMSPLIT option (that is the top 1, 2 or 3
+ * entries). The identity mapping is handled by pointing two PGD entries
+ * to the first kernel PMD. Note the upper half of each PMD or PTE are
+ * always zero at this stage.
+ */
+void __init mk_early_pgtbl_32(void)
+{
+#ifdef __pa
+#undef __pa
+#endif
+#define __pa(x)  ((unsigned long)(x) - PAGE_OFFSET)
+   pte_t pte, *ptep;
+   int i;
+   unsigned long *ptr;
+   /* Enough space to fit pagetables for the low memory linear map */
+   const unsigned long limit = __pa(_end) +
+   (PAGE_TABLE_SIZE(LOWMEM_PAGES) << PAGE_SHIFT);
+#ifdef CONFIG_X86_PAE
+   pmd_t pl2, *pl2p = (pmd_t *)__pa(initial_pg_pmd);
+#define SET_PL2(pl2, val){ (pl2).pmd = (val); }
+#else
+   pgd_t pl2, *pl2p = (pgd_t *)__pa(initial_page_table);
+#define SET_PL2(pl2, val)   { (pl2).pgd = (val); }
+#endif
+
+   ptep = (pte_t *)__pa(__brk_base);
+   pte.pte = PTE_IDENT_ATTR;
+
+   while ((pte.pte & PTE_PFN_MASK) < limit) {
+
+   SET_PL2(pl2, (unsigned long)ptep | PDE_IDENT_ATTR);
+   *pl2p = pl2;
+#ifndef CONFIG_X86_PAE
+   /* Kernel PDE entry */
+   *(pl2p +  ((PAGE_OFFSET >> PGDIR_SHIFT))) = pl2;
+#endif
+   for (i = 0; i < PTRS_PER_PTE; i++) {
+   *ptep = pte;
+   pte.pte += PAGE_SIZE;
+   ptep++;
+   }
+
+   pl2p++;
+   }
+
+   ptr = (unsigned long *)__pa(&max_pfn_mapped);
+   /* Can't use pte_pfn() since it's a call with CONFIG_PARAVIRT */
+   *ptr = (pte.pte & PTE_PFN_MASK) >> PAGE_SHIFT;
+
+   ptr = (u

[PATCH v3 8/9] xen/pvh: Enable CPU hotplug

2017-02-06 Thread Boris Ostrovsky
PVH guests don't (yet) receive ACPI hotplug interrupts and therefore
need to monitor xenstore for CPU hotplug event.

Signed-off-by: Boris Ostrovsky 
Reviewed-by: Juergen Gross 
---
 drivers/xen/cpu_hotplug.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/drivers/xen/cpu_hotplug.c b/drivers/xen/cpu_hotplug.c
index 5676aef..0bab60a3 100644
--- a/drivers/xen/cpu_hotplug.c
+++ b/drivers/xen/cpu_hotplug.c
@@ -107,7 +107,7 @@ static int __init setup_vcpu_hotplug_event(void)
.notifier_call = setup_cpu_watcher };
 
 #ifdef CONFIG_X86
-   if (!xen_pv_domain())
+   if (!xen_pv_domain() && !xen_pvh_domain())
 #else
if (!xen_domain())
 #endif
-- 
2.7.4



[PATCH v3 3/9] xen/pvh: Import PVH-related Xen public interfaces

2017-02-06 Thread Boris Ostrovsky
Signed-off-by: Boris Ostrovsky 
Reviewed-by: Juergen Gross 
Reviewed-by: Konrad Rzeszutek Wilk 
---
 include/xen/interface/elfnote.h|  12 ++-
 include/xen/interface/hvm/hvm_vcpu.h   | 143 +
 include/xen/interface/hvm/start_info.h |  98 ++
 3 files changed, 252 insertions(+), 1 deletion(-)
 create mode 100644 include/xen/interface/hvm/hvm_vcpu.h
 create mode 100644 include/xen/interface/hvm/start_info.h

diff --git a/include/xen/interface/elfnote.h b/include/xen/interface/elfnote.h
index f90b034..9e9f9bf 100644
--- a/include/xen/interface/elfnote.h
+++ b/include/xen/interface/elfnote.h
@@ -193,9 +193,19 @@
 #define XEN_ELFNOTE_SUPPORTED_FEATURES 17
 
 /*
+ * Physical entry point into the kernel.
+ *
+ * 32bit entry point into the kernel. When requested to launch the
+ * guest kernel in a HVM container, Xen will use this entry point to
+ * launch the guest in 32bit protected mode with paging disabled.
+ * Ignored otherwise.
+ */
+#define XEN_ELFNOTE_PHYS32_ENTRY 18
+
+/*
  * The number of the highest elfnote defined.
  */
-#define XEN_ELFNOTE_MAX XEN_ELFNOTE_SUPPORTED_FEATURES
+#define XEN_ELFNOTE_MAX XEN_ELFNOTE_PHYS32_ENTRY
 
 #endif /* __XEN_PUBLIC_ELFNOTE_H__ */
 
diff --git a/include/xen/interface/hvm/hvm_vcpu.h 
b/include/xen/interface/hvm/hvm_vcpu.h
new file mode 100644
index 000..32ca83e
--- /dev/null
+++ b/include/xen/interface/hvm/hvm_vcpu.h
@@ -0,0 +1,143 @@
+/*
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to
+ * deal in the Software without restriction, including without limitation the
+ * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
+ * sell copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+ * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
+ * DEALINGS IN THE SOFTWARE.
+ *
+ * Copyright (c) 2015, Roger Pau Monne 
+ */
+
+#ifndef __XEN_PUBLIC_HVM_HVM_VCPU_H__
+#define __XEN_PUBLIC_HVM_HVM_VCPU_H__
+
+#include "../xen.h"
+
+struct vcpu_hvm_x86_32 {
+uint32_t eax;
+uint32_t ecx;
+uint32_t edx;
+uint32_t ebx;
+uint32_t esp;
+uint32_t ebp;
+uint32_t esi;
+uint32_t edi;
+uint32_t eip;
+uint32_t eflags;
+
+uint32_t cr0;
+uint32_t cr3;
+uint32_t cr4;
+
+uint32_t pad1;
+
+/*
+ * EFER should only be used to set the NXE bit (if required)
+ * when starting a vCPU in 32bit mode with paging enabled or
+ * to set the LME/LMA bits in order to start the vCPU in
+ * compatibility mode.
+ */
+uint64_t efer;
+
+uint32_t cs_base;
+uint32_t ds_base;
+uint32_t ss_base;
+uint32_t es_base;
+uint32_t tr_base;
+uint32_t cs_limit;
+uint32_t ds_limit;
+uint32_t ss_limit;
+uint32_t es_limit;
+uint32_t tr_limit;
+uint16_t cs_ar;
+uint16_t ds_ar;
+uint16_t ss_ar;
+uint16_t es_ar;
+uint16_t tr_ar;
+
+uint16_t pad2[3];
+};
+
+/*
+ * The layout of the _ar fields of the segment registers is the
+ * following:
+ *
+ * Bits   [0,3]: type (bits 40-43).
+ * Bit4: s(descriptor type, bit 44).
+ * Bit[5,6]: dpl  (descriptor privilege level, bits 45-46).
+ * Bit7: p(segment-present, bit 47).
+ * Bit8: avl  (available for system software, bit 52).
+ * Bit9: l(64-bit code segment, bit 53).
+ * Bit   10: db   (meaning depends on the segment, bit 54).
+ * Bit   11: g(granularity, bit 55)
+ * Bits [12,15]: unused, must be blank.
+ *
+ * A more complete description of the meaning of this fields can be
+ * obtained from the Intel SDM, Volume 3, section 3.4.5.
+ */
+
+struct vcpu_hvm_x86_64 {
+uint64_t rax;
+uint64_t rcx;
+uint64_t rdx;
+uint64_t rbx;
+uint64_t rsp;
+uint64_t rbp;
+uint64_t rsi;
+uint64_t rdi;
+uint64_t rip;
+uint64_t rflags;
+
+uint64_t cr0;
+uint64_t cr3;
+uint64_t cr4;
+uint64_t efer;
+
+/*
+ * Using VCPU_HVM_MODE_64B implies that the vCPU is launched
+ * directly in long mode, so the cached parts of the segment
+ * registers get set to match that environment.
+ *
+ * If the user wants to launch the vCPU in compatibility mode
+ * the 32-bit structure should be used instead.
+ */
+};
+
+struct vcpu_hvm_context {
+#d

[PATCH v3 2/9] xen/x86: Remove PVH support

2017-02-06 Thread Boris Ostrovsky
We are replacing existing PVH guests with new implementation.

We are keeping xen_pvh_domain() macro (for now set to zero) because
when we introduce new PVH implementation later in this series we will
reuse current PVH-specific code (xen_pvh_gnttab_setup()), and that
code is conditioned by 'if (xen_pvh_domain())'. (We will also need
a noop xen_pvh_domain() for !CONFIG_XEN_PVH).

Signed-off-by: Boris Ostrovsky 
Reviewed-by: Juergen Gross 
Reviewed-by: Konrad Rzeszutek Wilk 
---
 arch/x86/xen/enlighten.c | 140 ++-
 arch/x86/xen/mmu.c   |  21 +-
 arch/x86/xen/setup.c |  37 +--
 arch/x86/xen/smp.c   |  78 --
 arch/x86/xen/smp.h   |   8 ---
 arch/x86/xen/xen-head.S  |  62 ++---
 arch/x86/xen/xen-ops.h   |   1 -
 drivers/xen/events/events_base.c |   1 -
 include/xen/xen.h|  13 +---
 9 files changed, 54 insertions(+), 307 deletions(-)

diff --git a/arch/x86/xen/enlighten.c b/arch/x86/xen/enlighten.c
index 51ef952..828f1b2 100644
--- a/arch/x86/xen/enlighten.c
+++ b/arch/x86/xen/enlighten.c
@@ -1138,10 +1138,11 @@ void xen_setup_vcpu_info_placement(void)
xen_vcpu_setup(cpu);
}
 
-   /* xen_vcpu_setup managed to place the vcpu_info within the
-* percpu area for all cpus, so make use of it. Note that for
-* PVH we want to use native IRQ mechanism. */
-   if (have_vcpu_info_placement && !xen_pvh_domain()) {
+   /*
+* xen_vcpu_setup managed to place the vcpu_info within the
+* percpu area for all cpus, so make use of it.
+*/
+   if (have_vcpu_info_placement) {
pv_irq_ops.save_fl = __PV_IS_CALLEE_SAVE(xen_save_fl_direct);
pv_irq_ops.restore_fl = 
__PV_IS_CALLEE_SAVE(xen_restore_fl_direct);
pv_irq_ops.irq_disable = 
__PV_IS_CALLEE_SAVE(xen_irq_disable_direct);
@@ -1413,49 +1414,9 @@ static void __init xen_boot_params_init_edd(void)
  * Set up the GDT and segment registers for -fstack-protector.  Until
  * we do this, we have to be careful not to call any stack-protected
  * function, which is most of the kernel.
- *
- * Note, that it is __ref because the only caller of this after init
- * is PVH which is not going to use xen_load_gdt_boot or other
- * __init functions.
  */
-static void __ref xen_setup_gdt(int cpu)
+static void xen_setup_gdt(int cpu)
 {
-   if (xen_feature(XENFEAT_auto_translated_physmap)) {
-#ifdef CONFIG_X86_64
-   unsigned long dummy;
-
-   load_percpu_segment(cpu); /* We need to access per-cpu area */
-   switch_to_new_gdt(cpu); /* GDT and GS set */
-
-   /* We are switching of the Xen provided GDT to our HVM mode
-* GDT. The new GDT has  __KERNEL_CS with CS.L = 1
-* and we are jumping to reload it.
-*/
-   asm volatile ("pushq %0\n"
- "leaq 1f(%%rip),%0\n"
- "pushq %0\n"
- "lretq\n"
- "1:\n"
- : "=&r" (dummy) : "0" (__KERNEL_CS));
-
-   /*
-* While not needed, we also set the %es, %ds, and %fs
-* to zero. We don't care about %ss as it is NULL.
-* Strictly speaking this is not needed as Xen zeros those
-* out (and also MSR_FS_BASE, MSR_GS_BASE, MSR_KERNEL_GS_BASE)
-*
-* Linux zeros them in cpu_init() and in secondary_startup_64
-* (for BSP).
-*/
-   loadsegment(es, 0);
-   loadsegment(ds, 0);
-   loadsegment(fs, 0);
-#else
-   /* PVH: TODO Implement. */
-   BUG();
-#endif
-   return; /* PVH does not need any PV GDT ops. */
-   }
pv_cpu_ops.write_gdt_entry = xen_write_gdt_entry_boot;
pv_cpu_ops.load_gdt = xen_load_gdt_boot;
 
@@ -1466,59 +1427,6 @@ static void __ref xen_setup_gdt(int cpu)
pv_cpu_ops.load_gdt = xen_load_gdt;
 }
 
-#ifdef CONFIG_XEN_PVH
-/*
- * A PV guest starts with default flags that are not set for PVH, set them
- * here asap.
- */
-static void xen_pvh_set_cr_flags(int cpu)
-{
-
-   /* Some of these are setup in 'secondary_startup_64'. The others:
-* X86_CR0_TS, X86_CR0_PE, X86_CR0_ET are set by Xen for HVM guests
-* (which PVH shared codepaths), while X86_CR0_PG is for PVH. */
-   write_cr0(read_cr0() | X86_CR0_MP | X86_CR0_NE | X86_CR0_WP | 
X86_CR0_AM);
-
-   if (!cpu)
-   return;
-   /*
-* For BSP, PSE PGE are set in probe_page_size_mask(), for APs
-* set them here. For all, OSFXSR OSXMMEXCPT are set in fpu__init_cpu().
-   */
-   if (boot_cpu_has(X86_FEATURE_PSE))
-   cr4_set_bits_and_update_boot(X86_CR4_PSE);
-
-   if (bo

[PATCH] mm: Place "not" inside of unlikely() statement in wb_domain_writeout_inc()

2017-02-06 Thread Steven Rostedt
From: "Steven Rostedt (VMware)" 

The likely/unlikely profiler noticed that the unlikely statement in
wb_domain_writeout_inc() is constantly wrong. This is due to the "not" (!)
being outside the unlikely statement. It is likely that dom->period_time
will be set, but unlikely that it wont be. Move the not into the unlikely
statement.

Signed-off-by: Steven Rostedt (VMware) 
---
 mm/page-writeback.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/mm/page-writeback.c b/mm/page-writeback.c
index 290e8b7..94102c7 100644
--- a/mm/page-writeback.c
+++ b/mm/page-writeback.c
@@ -580,7 +580,7 @@ static void wb_domain_writeout_inc(struct wb_domain *dom,
__fprop_inc_percpu_max(&dom->completions, completions,
   max_prop_frac);
/* First event after period switching was turned off? */
-   if (!unlikely(dom->period_time)) {
+   if (unlikely(!dom->period_time)) {
/*
 * We can race with other __bdi_writeout_inc calls here but
 * it does not cause any harm since the resulting time when
-- 
2.9.3



[PATCH v3 7/9] xen/pvh: PVH guests always have PV devices

2017-02-06 Thread Boris Ostrovsky
Signed-off-by: Boris Ostrovsky 
Reviewed-by: Juergen Gross 
Reviewed-by: Konrad Rzeszutek Wilk 
---
 arch/x86/xen/platform-pci-unplug.c | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/arch/x86/xen/platform-pci-unplug.c 
b/arch/x86/xen/platform-pci-unplug.c
index 90d1b83..33a783c 100644
--- a/arch/x86/xen/platform-pci-unplug.c
+++ b/arch/x86/xen/platform-pci-unplug.c
@@ -73,8 +73,8 @@ bool xen_has_pv_devices(void)
if (!xen_domain())
return false;
 
-   /* PV domains always have them. */
-   if (xen_pv_domain())
+   /* PV and PVH domains always have them. */
+   if (xen_pv_domain() || xen_pvh_domain())
return true;
 
/* And user has xen_platform_pci=0 set in guest config as
-- 
2.7.4



Re: crypto: atmel - Fix authenc compile test warnings

2017-02-06 Thread kbuild test robot
Hi Herbert,

[auto build test WARNING on cryptodev/master]
[also build test WARNING on v4.10-rc7 next-20170206]
[if your patch is applied to the wrong git tree, please drop us a note to help 
improve the system]

url:
https://github.com/0day-ci/linux/commits/Herbert-Xu/crypto-atmel-Fix-authenc-compile-test-warnings/20170206-171201
base:   
https://git.kernel.org/pub/scm/linux/kernel/git/herbert/cryptodev-2.6.git master
reproduce:
# apt-get install sparse
make ARCH=x86_64 allmodconfig
make C=1 CF=-D__CHECK_ENDIAN__


sparse warnings: (new ones prefixed by >>)

   include/linux/compiler.h:253:8: sparse: attribute 'no_sanitize_address': 
unknown attribute
>> drivers/crypto/atmel-tdes.c:157:25: sparse: incompatible types in comparison 
>> expression (different type sizes)
   drivers/crypto/atmel-tdes.c:528:25: sparse: incompatible types in comparison 
expression (different type sizes)
   drivers/crypto/atmel-tdes.c:529:25: sparse: incompatible types in comparison 
expression (different type sizes)
   In file included from drivers/crypto/atmel-tdes.c:17:0:
   drivers/crypto/atmel-tdes.c: In function 'atmel_tdes_sg_copy':
   include/linux/kernel.h:753:16: warning: comparison of distinct pointer types 
lacks a cast
 (void) (&min1 == &min2);   \
   ^
   include/linux/kernel.h:756:2: note: in expansion of macro '__min'
 __min(typeof(x), typeof(y),   \
 ^
   drivers/crypto/atmel-tdes.c:157:11: note: in expansion of macro 'min'
  count = min(count, buflen);
  ^~~
   In file included from include/linux/printk.h:6:0,
from include/linux/kernel.h:13,
from drivers/crypto/atmel-tdes.c:17:
   drivers/crypto/atmel-tdes.c: In function 'atmel_tdes_crypt_pdc_stop':
   include/linux/kern_levels.h:4:18: warning: format '%u' expects argument of 
type 'unsigned int', but argument 2 has type 'size_t {aka long unsigned int}' 
[-Wformat=]
#define KERN_SOH "\001"  /* ASCII Start Of Header */
 ^
   include/linux/kern_levels.h:10:18: note: in expansion of macro 'KERN_SOH'
#define KERN_ERR KERN_SOH "3" /* error conditions */
 ^~~~
   include/linux/printk.h:292:9: note: in expansion of macro 'KERN_ERR'
 printk(KERN_ERR pr_fmt(fmt), ##__VA_ARGS__)
^~~~
   drivers/crypto/atmel-tdes.c:339:4: note: in expansion of macro 'pr_err'
   pr_err("not all data converted: %u\n", count);
   ^~
   drivers/crypto/atmel-tdes.c: In function 'atmel_tdes_buff_init':
   drivers/crypto/atmel-tdes.c:364:26: warning: format '%d' expects argument of 
type 'int', but argument 3 has type 'size_t {aka long unsigned int}' [-Wformat=]
  dev_err(dd->dev, "dma %d bytes error\n", dd->buflen);
 ^
   drivers/crypto/atmel-tdes.c:372:26: warning: format '%d' expects argument of 
type 'int', but argument 3 has type 'size_t {aka long unsigned int}' [-Wformat=]
  dev_err(dd->dev, "dma %d bytes error\n", dd->buflen);
 ^
   In file included from drivers/crypto/atmel-tdes.c:17:0:
   drivers/crypto/atmel-tdes.c: In function 'atmel_tdes_crypt_start':
   include/linux/kernel.h:753:16: warning: comparison of distinct pointer types 
lacks a cast
 (void) (&min1 == &min2);   \
   ^
   include/linux/kernel.h:756:2: note: in expansion of macro '__min'
 __min(typeof(x), typeof(y),   \
 ^
   drivers/crypto/atmel-tdes.c:528:11: note: in expansion of macro 'min'
  count = min(dd->total, sg_dma_len(dd->in_sg));
  ^~~
   include/linux/kernel.h:753:16: warning: comparison of distinct pointer types 
lacks a cast
 (void) (&min1 == &min2);   \
   ^
   include/linux/kernel.h:756:2: note: in expansion of macro '__min'
 __min(typeof(x), typeof(y),   \
 ^
   drivers/crypto/atmel-tdes.c:529:11: note: in expansion of macro 'min'
  count = min(count, sg_dma_len(dd->out_sg));
  ^~~
   In file included from include/linux/printk.h:6:0,
from include/linux/kernel.h:13,
from drivers/crypto/atmel-tdes.c:17:
   drivers/crypto/atmel-tdes.c: In function 'atmel_tdes_crypt_dma_stop':
   include/linux/kern_levels.h:4:18: warning: format '%u' expects argument of 
type 'unsigned int', but argument 2 has type 'size_t {aka long unsigned int}' 
[-Wformat=]
#define KERN_SOH "\001"  /* ASCII Start Of Header */
 ^
   include/linux/kern_levels.h:10:18: note: in expansion of macro 'KERN_SOH'
#define KERN_ERR KERN_

Re: [PATCH 4/4] refcount: Report failures through CHECK_DATA_CORRUPTION

2017-02-06 Thread Kees Cook
On Mon, Feb 6, 2017 at 12:57 AM, Peter Zijlstra  wrote:
> On Sun, Feb 05, 2017 at 03:33:36PM -0800, Kees Cook wrote:
>> On Sun, Feb 5, 2017 at 7:40 AM, Peter Zijlstra  wrote:
>> > On Fri, Feb 03, 2017 at 03:26:52PM -0800, Kees Cook wrote:
>> >> This converts from WARN_ON() to CHECK_DATA_CORRUPTION() in the
>> >> CONFIG_DEBUG_REFCOUNT case. Additionally moves refcount_t sanity check
>> >> conditionals into regular function flow. Since CHECK_DATA_CORRUPTION()
>> >> is marked __much_check, we override few cases where the failure has
>> >> already been handled but we want to explicitly report it.
>> >>
>> >> Signed-off-by: Kees Cook 
>> >> ---
>> >>  include/linux/refcount.h | 35 ++-
>> >>  lib/Kconfig.debug|  2 ++
>> >>  2 files changed, 24 insertions(+), 13 deletions(-)
>> >>
>> >> diff --git a/include/linux/refcount.h b/include/linux/refcount.h
>> >> index 5b89cad62237..ef32910c7dd8 100644
>> >> --- a/include/linux/refcount.h
>> >> +++ b/include/linux/refcount.h
>> >> @@ -43,10 +43,10 @@
>> >>  #include 
>> >>
>> >>  #if CONFIG_DEBUG_REFCOUNT
>> >> -#define REFCOUNT_WARN(cond, str) WARN_ON(cond)
>> >> +#define REFCOUNT_CHECK(cond, str) CHECK_DATA_CORRUPTION(cond, str)
>> >
>> > OK, so that goes back to a full WARN() which will make the generated
>> > code gigantic due to the whole printk() trainwreck :/
>>
>> Hrm, perhaps we need three levels? WARN_ON, WARN, and BUG?
>
> Did consider that, didn't really know if that made sense.
>
> Like I wrote, ideally we'd end up using something like the x86 exception
> table with a custom handler. Just no idea how to pull that off without
> doing a full blown arch specific implementation, so I didn't go there
> quite yet.

I haven't spent much time looking at the extable stuff. (Though
coincidentally, I was poking at it for x86's test_nx stuff...) I
thought there was a way to build arch-agnostic extables already?
kernel/extable.c is unconditionally built-in, for example.

> That way refcount_inc() would end up being inlined to something like:
>
> mov0x148(%rdi),%eax
> jmp2f
>   1:lock cmpxchg %edx,0x148(%rdi)
> je 4f
>   2:lea-0x1(%rax),%ecx
> lea0x1(%rax),%edx
> cmp$0xfffd,%ecx
> jbe1b
>   3:ud2
>   4:
>
> _ASM_EXTABLE_HANDLE(3b, 4b, ex_handler_refcount_inc)
>
>
> where:
>
> bool ex_handler_refcount_inc(const struct exception_table_entry *fixup,
>  struct pt_regs *regs, int trapnr)
> {
> regs->ip = ex_fixup_addr(fixup);
>
> if (!regs->ax)
> WARN(1, "refcount_t: increment on 0; use-after-free.\n");
> else
> WARN(1, "refcount_t: saturated; leaking memory.\n");
>
> return true;
> }
>
> and the handler is shared between all instances and can be as big and
> fancy as we'd like.

I'll dig a bit to see what I can build. Can you add the lkdtm tests to
the series, though? That should be fine as-is.

Thanks!

-Kees

-- 
Kees Cook
Pixel Security


Re: [PATCH 1/6] genirq: allow assigning affinity to present but not online CPUs

2017-02-06 Thread Keith Busch
On Sun, Feb 05, 2017 at 05:40:23PM +0100, Christoph Hellwig wrote:
> Hi Joe,
> 
> On Fri, Feb 03, 2017 at 08:58:09PM -0500, Joe Korty wrote:
> > IIRC, some years ago I ran across a customer system where
> > the #cpus_present was twice as big as #cpus_possible.
> > 
> > Hyperthreading was turned off in the BIOS so it was not
> > entirely out of line for the extra cpus to be declared
> > present, even though none of them would ever be available
> > for use.
> 
> This sounds like a system we should quirk around instead of optimizing
> for it.  Unless I totally misunderstand the idea behind cpu_possible
> and cpu_present.

Can we use the online CPUs and create a new hot-cpu notifier to the nvme
driver to free/reallocate as needed? We were doing that before blk-mq. Now
blk-mq can change the number hardware contexts on a live queue, so we
can reintroduce that behavior to nvme and only allocate what we need.


[PATCH] MAINTAINERS: drop broken reference to i2c/trivial-devices

2017-02-06 Thread Wolfram Sang
Due to RST rework, the reference to i2c/trivial-devices was changed, but the
result is broken. However, let's just drop the whole reference, since it
doesn't make sense in the first place to reference this "global" file
for a single driver.

Fixes: 8c27ceff3604b2 ("docs: fix locations of several documents that got 
moved")
Signed-off-by: Wolfram Sang 
Cc: Mauro Carvalho Chehab 
Cc: Kevin Tsai 
Cc: Jonathan Cameron 
---
 MAINTAINERS | 1 -
 1 file changed, 1 deletion(-)

diff --git a/MAINTAINERS b/MAINTAINERS
index 023e4a8c3d104d..3c67142f17e913 100644
--- a/MAINTAINERS
+++ b/MAINTAINERS
@@ -2974,7 +2974,6 @@ CAPELLA MICROSYSTEMS LIGHT SENSOR DRIVER
 M: Kevin Tsai 
 S: Maintained
 F: drivers/iio/light/cm*
-F: Documentation/devicetree/bindings/i2c/trivial-admin-guide/devices.rst
 
 CAVIUM I2C DRIVER
 M: Jan Glauber 
-- 
2.10.2



Re: [PATCH 5/6] dmaengine: Add Broadcom SBA RAID driver

2017-02-06 Thread Vinod Koul
On Mon, Feb 06, 2017 at 05:31:15PM +0530, Anup Patel wrote:

> >> +
> >> +/* SBA C_MDATA helper macros */
> >> +#define SBA_C_MDATA_LOAD_VAL(__bnum0)((__bnum0) & 0x3)
> >> +#define SBA_C_MDATA_WRITE_VAL(__bnum0)   ((__bnum0) & 0x3)
> >> +#define SBA_C_MDATA_XOR_VAL(__bnum1, __bnum0)\
> >> + ({  u32 __v = ((__bnum0) & 0x3);\
> >> + __v |= ((__bnum1) & 0x3) << 2;  \
> >> + __v;\
> >> + })
> >> +#define SBA_C_MDATA_PQ_VAL(__dnum, __bnum1, __bnum0) \
> >> + ({  u32 __v = ((__bnum0) & 0x3);\
> >> + __v |= ((__bnum1) & 0x3) << 2;  \
> >> + __v |= ((__dnum) & 0x1f) << 5;  \
> >> + __v;\
> >> + })
> >
> > ah why are we usig complex macros, why can't these be simple functions..
> 
> "static inline functions" seemed too complicated here because most of
> these macros are two lines of c-code.

and thats where I have an issue with this. Macros for simple things is fine
but not for couple of line of logic!

> 
> Do you still insist on using "static inline functions"?

Yes

> 
> >
> >> +#define SBA_C_MDATA_LS(__c_mdata_val)((__c_mdata_val) & 0xff)
> >> +#define SBA_C_MDATA_MS(__c_mdata_val)(((__c_mdata_val) >> 8) & 
> >> 0x3)
> >> +
> >> +/* Driver helper macros */
> >> +#define to_sba_request(tx)   \
> >> + container_of(tx, struct sba_request, tx)
> >> +#define to_sba_device(dchan) \
> >> + container_of(dchan, struct sba_device, dma_chan)
> >> +
> >> +enum sba_request_state {
> >> + SBA_REQUEST_STATE_FREE = 1,
> >> + SBA_REQUEST_STATE_ALLOCED = 2,
> >> + SBA_REQUEST_STATE_PENDING = 3,
> >> + SBA_REQUEST_STATE_ACTIVE = 4,
> >> + SBA_REQUEST_STATE_COMPLETED = 5,
> >> + SBA_REQUEST_STATE_ABORTED = 6,
> >
> > whats up with a very funny indentation setting, we use 8 chars.
> >
> > Please re-read the Documentation/process/coding-style.rst
> 
> I have double checked this enum. The indentation is fine
> and as-per coding style. Am I missing anything else?

Somehow the initial indent doesnt seem to be 8 chars to me.

> >> +static enum dma_status sba_tx_status(struct dma_chan *dchan,
> >> +  dma_cookie_t cookie,
> >> +  struct dma_tx_state *txstate)
> >> +{
> >> + int mchan_idx;
> >> + enum dma_status ret;
> >> + struct sba_device *sba = to_sba_device(dchan);
> >> +
> >> + ret = dma_cookie_status(dchan, cookie, txstate);
> >> + if (ret == DMA_COMPLETE)
> >> + return ret;
> >> +
> >> + for (mchan_idx = 0; mchan_idx < sba->mchans_count; mchan_idx++)
> >> + mbox_client_peek_data(sba->mchans[mchan_idx]);
> >
> > what is this achieving?
> 
> The mbox_client_peek_data() is a hint to mailbox controller driver
> to check for available messages.
> 
> This gives good performance improvement when some DMA client
> code is polling using tx_status() callback.

Then why do it before and then check status.

-- 
~Vinod


Re: [PATCH 2/5] drm: of: introduce drm_of_find_panel_or_bridge

2017-02-06 Thread Rob Herring
On Mon, Feb 06, 2017 at 11:42:48AM +0100, Philipp Zabel wrote:
> On Fri, 2017-02-03 at 21:36 -0600, Rob Herring wrote:
> > Many drivers have a common pattern of searching the OF graph for either an
> > attached panel or bridge and then finding the DRM struct for the panel
> > or bridge. Also, most drivers need to handle deferred probing when the
> > DRM device is not yet instantiated. Create a common function,
> > drm_of_find_panel_or_bridge, to find the connected node and the
> > associated DRM panel or bridge device.

[...]

> > +int drm_of_find_panel_or_bridge(const struct device_node *np,
> > +   int port, int endpoint,
> > +   struct drm_panel **panel,
> > +   struct drm_bridge **bridge)
> > +{
> > +   int ret = -ENODEV;
> 
> This is only returned if !panel && !bridge. I'd consider this invalid
> usage of this function, so maybe use -EINVAL?

Yes.

> > +   struct device_node *remote;
> > +
> > +   remote = of_graph_get_remote_node(np, port, endpoint);
> > +   if (!remote)
> > +   return -ENODEV;
> > +
> > +   if (bridge)
> > +   *bridge = NULL;
> 
> I would move this ^ ...
> 
> > +   if (panel) {
> > +   *panel = of_drm_find_panel(remote);
> > +   if (*panel) {
> 
> ... here.

Okay.

> > +   ret = 0;
> > +   goto out_put;
> > +   }
> > +   ret = -EPROBE_DEFER;
> > +   }
> > +
> > +   if (bridge) {
> > +   *bridge = of_drm_find_bridge(remote);
> > +   if (*bridge)
> > +   ret = 0;
> > +   else
> > +   ret = -EPROBE_DEFER;
> > +   }
> > +out_put:
> > +   of_node_put(remote);
> > +   return ret;
> > +}

I've ended up re-writing things a bit getting rid of the goto and the 
result looks like this:

int drm_of_find_panel_or_bridge(const struct device_node *np,
int port, int endpoint,
struct drm_panel **panel,
struct drm_bridge **bridge)
{
int ret = -EPROBE_DEFER;
struct device_node *remote;

if (!panel && !bridge)
return -EINVAL;

remote = of_graph_get_remote_node(np, port, endpoint);
if (!remote)
return -ENODEV;

if (panel) {
*panel = of_drm_find_panel(remote);
if (*panel) {
if (bridge)
*bridge = NULL;
ret = 0;
}
}

/* No panel found yet, check for a bridge next. */
if (ret && bridge) {
*bridge = of_drm_find_bridge(remote);
if (*bridge)
ret = 0;
}

of_node_put(remote);
return ret;
}



Re: [PATCH 2/2] ARM: dts: qcom: Add msm8974 CoreSight components

2017-02-06 Thread Mathieu Poirier
Hello Georgi,

Looks good to me, just a cosmetic comment below...

On Fri, Feb 03, 2017 at 08:36:28PM +0200, Georgi Djakov wrote:
> From: "Ivan T. Ivanov" 
> 
> Add initial set of CoreSight components found on Qualcomm
> msm8974 and apq8074 based platforms, including the APQ8074
> Dragonboard board.
> 
> Signed-off-by: Ivan T. Ivanov 
> Signed-off-by: Georgi Djakov 
> ---
>  arch/arm/boot/dts/qcom-msm8974.dtsi | 280 
> +++-
>  1 file changed, 276 insertions(+), 4 deletions(-)
> 
> diff --git a/arch/arm/boot/dts/qcom-msm8974.dtsi 
> b/arch/arm/boot/dts/qcom-msm8974.dtsi
> index 4b4c61e2ee35..ab766a36a461 100644
> --- a/arch/arm/boot/dts/qcom-msm8974.dtsi
> +++ b/arch/arm/boot/dts/qcom-msm8974.dtsi
> @@ -68,7 +68,7 @@
>   #size-cells = <0>;
>   interrupts = <1 9 0xf04>;
>  
> - cpu@0 {
> + CPU0: cpu@0 {
>   compatible = "qcom,krait";
>   enable-method = "qcom,kpss-acc-v2";
>   device_type = "cpu";
> @@ -79,7 +79,7 @@
>   cpu-idle-states = <&CPU_SPC>;
>   };
>  
> - cpu@1 {
> + CPU1: cpu@1 {
>   compatible = "qcom,krait";
>   enable-method = "qcom,kpss-acc-v2";
>   device_type = "cpu";
> @@ -90,7 +90,7 @@
>   cpu-idle-states = <&CPU_SPC>;
>   };
>  
> - cpu@2 {
> + CPU2: cpu@2 {
>   compatible = "qcom,krait";
>   enable-method = "qcom,kpss-acc-v2";
>   device_type = "cpu";
> @@ -101,7 +101,7 @@
>   cpu-idle-states = <&CPU_SPC>;
>   };
>  
> - cpu@3 {
> + CPU3: cpu@3 {
>   compatible = "qcom,krait";
>   enable-method = "qcom,kpss-acc-v2";
>   device_type = "cpu";
> @@ -733,6 +733,278 @@
>  
>   status = "disabled";
>   };
> +
> + etr@fc322000 {
> + compatible = "arm,coresight-tmc", "arm,primecell";
> + reg = <0xfc322000 0x1000>;
> +
> + clocks = <&rpmcc RPM_SMD_QDSS_CLK>, <&rpmcc 
> RPM_SMD_QDSS_A_CLK>;
> + clock-names = "apb_pclk", "atclk";
> +
> + port {
> + etr_in: endpoint {
> + slave-mode;
> + remote-endpoint = <&replicator_out0>;
> + };
> + };
> + };
> +
> + tpiu@fc318000 {
> + compatible = "arm,coresight-tpiu", "arm,primecell";
> + reg = <0xfc318000 0x1000>;
> +
> + clocks = <&rpmcc RPM_SMD_QDSS_CLK>, <&rpmcc 
> RPM_SMD_QDSS_A_CLK>;
> + clock-names = "apb_pclk", "atclk";
> +
> + port {
> + tpiu_in: endpoint {
> +  slave-mode;
> +  remote-endpoint = <&replicator_out1>;
> +  };
> + };
> + };
> +
> + replicator@fc31c000 {
> + compatible = "qcom,coresight-replicator1x", 
> "arm,primecell";
> + reg = <0xfc31c000 0x1000>;
> +
> + clocks = <&rpmcc RPM_SMD_QDSS_CLK>, <&rpmcc 
> RPM_SMD_QDSS_A_CLK>;
> + clock-names = "apb_pclk", "atclk";
> +
> + ports {
> + #address-cells = <1>;
> + #size-cells = <0>;
> +
> + port@0 {
> + reg = <0>;
> + replicator_out0: endpoint {
> + remote-endpoint = <&etr_in>;
> + };
> + };
> + port@1 {
> + reg = <1>;
> + replicator_out1: endpoint {
> + remote-endpoint = <&tpiu_in>;
> + };
> + };
> + port@2 {
> + reg = <0>;
> + replicator_in: endpoint {
> + slave-mode;
> + remote-endpoint = <&etf_out>;
> + };
> + };
> + };
> + };
> +
> + etf@fc307000 {
> + compatible = "arm,coresight-tmc", "arm,primecell";
> + reg = <0xfc307000 0x1000>;
> +
> +   

[PATCH -next] bus: qcom-ebi2: remove duplicated include from qcom-ebi2.c

2017-02-06 Thread Wei Yongjun
From: Wei Yongjun 

Remove duplicated include.

Signed-off-by: Wei Yongjun 
---
 drivers/bus/qcom-ebi2.c | 1 -
 1 file changed, 1 deletion(-)

diff --git a/drivers/bus/qcom-ebi2.c b/drivers/bus/qcom-ebi2.c
index a644424..374fe1d 100644
--- a/drivers/bus/qcom-ebi2.c
+++ b/drivers/bus/qcom-ebi2.c
@@ -17,7 +17,6 @@
 #include 
 #include 
 #include 
-#include 
 #include 
 #include 
 #include 





Re: [PATCH v3 1/3] lib/string: introduce ascii2utf16le() helper

2017-02-06 Thread Sergei Shtylyov

Hello!

On 02/06/2017 05:03 PM, Richard Leitner wrote:


For USB string descriptors we need to convert ASCII strings to UTF16-LE.
Therefore make a simple helper function (based on ascii2desc from
drivers/usb/core/hcd.c) for that purpose.

Signed-off-by: Richard Leitner 

[...]

diff --git a/lib/string.c b/lib/string.c
index ed83562..a113e3e 100644
--- a/lib/string.c
+++ b/lib/string.c
@@ -952,3 +952,29 @@ char *strreplace(char *s, char old, char new)
return s;
 }
 EXPORT_SYMBOL(strreplace);
+
+/**
+ * ascii2utf16le() - Helper routine for producing UTF-16LE string descriptors
+ * @s: Null-terminated ASCII (actually ISO-8859-1) string
+ * @buf: Buffer for UTF-16LE string
+ * @len: Length (in bytes; may be odd) of UTF-16LE buffer.
+ *
+ * Return: The number of bytes filled in: 2*strlen(s) or @len, whichever is 
less
+ */
+unsigned int ascii2utf16le(char const *s, u8 *buf, unsigned int len)
+{
+   unsigned int n, t = 2 * strlen(s);
+
+   if (len > t)
+   len = t;
+   n = len;
+   while (n--) {
+   t = (unsigned char)*s++;
+   *buf++ = t;
+   if (!n--)
+   break;
+   *buf++ = t >> 8;


   Isn't it always 0?


+   }
+   return len;
+}
+EXPORT_SYMBOL(ascii2utf16le);


MBR, Sergei



[PATCH 2/4] arm64: arm_arch_timer: remove fsl-a008585 parameter

2017-02-06 Thread Mark Rutland
From: Ding Tianhong 

Having a command line option to flip the errata handling for a
particular erratum is a little bit unusual, and it's vastly superior to
pass this in the DT. By common consensus, it's best to kill off the
command line parameter.

Signed-off-by: Ding Tianhong 
[Mark: split patch, reword commit message]
Signed-off-by: Mark Rutland 
Acked-by: Daniel Lezcano 
---
 Documentation/admin-guide/kernel-parameters.txt |  9 -
 drivers/clocksource/arm_arch_timer.c| 14 --
 2 files changed, 23 deletions(-)

diff --git a/Documentation/admin-guide/kernel-parameters.txt 
b/Documentation/admin-guide/kernel-parameters.txt
index be7c0d9..d8fc55a 100644
--- a/Documentation/admin-guide/kernel-parameters.txt
+++ b/Documentation/admin-guide/kernel-parameters.txt
@@ -549,15 +549,6 @@
loops can be debugged more effectively on production
systems.
 
-   clocksource.arm_arch_timer.fsl-a008585=
-   [ARM64]
-   Format: 
-   Enable/disable the workaround of Freescale/NXP
-   erratum A-008585.  This can be useful for KVM
-   guests, if the guest device tree doesn't show the
-   erratum.  If unspecified, the workaround is
-   enabled based on the device tree.
-
clearcpuid=BITNUM [X86]
Disable CPUID feature X for the kernel. See
arch/x86/include/asm/cpufeatures.h for the valid bit
diff --git a/drivers/clocksource/arm_arch_timer.c 
b/drivers/clocksource/arm_arch_timer.c
index 4c8c3fb..6a9d031 100644
--- a/drivers/clocksource/arm_arch_timer.c
+++ b/drivers/clocksource/arm_arch_timer.c
@@ -101,20 +101,6 @@ static int __init early_evtstrm_cfg(char *buf)
 
 static int fsl_a008585_enable = -1;
 
-static int __init early_fsl_a008585_cfg(char *buf)
-{
-   int ret;
-   bool val;
-
-   ret = strtobool(buf, &val);
-   if (ret)
-   return ret;
-
-   fsl_a008585_enable = val;
-   return 0;
-}
-early_param("clocksource.arm_arch_timer.fsl-a008585", early_fsl_a008585_cfg);
-
 u32 __fsl_a008585_read_cntp_tval_el0(void)
 {
return __fsl_a008585_read_reg(cntp_tval_el0);
-- 
1.9.1



Re: [PATCH 1/2] spi: davinci: Use SPI framework to handle DMA mapping

2017-02-06 Thread Mark Brown
On Mon, Feb 06, 2017 at 03:57:07PM +0100, Fabien Parent wrote:

> This commit updates the davinci SPI driver in order to use the SPI
> framework to handle the DMA mapping of buffers coming from an upper
> layer.

This claims to be using the framework to do DMA mapping but...

> + if (!t->rx_buf) {
> + sg_init_table(&sg_rx, 1);
> + t->rx_dma = dma_map_single(&spi->dev, dummy_buf,
> + t->len, DMA_FROM_DEVICE);
> + if (dma_mapping_error(&spi->dev, !t->rx_dma)) {
> + ret = -EFAULT;
> + goto err_rx_map;
> + }
> + sg_dma_address(&sg_rx) = t->rx_dma;
> + sg_dma_len(&sg_rx) = t->len;

...adds code that does DMA mapping.  That's confusing?


signature.asc
Description: PGP signature


[GIT PULL] arch timer updates for v4.11

2017-02-06 Thread Mark Rutland
Hi Daniel,

These are the arch timer updates for v4.11. I wasn't sure if you wanted
to pull these or pick them, so I'm sending the full series with all
relevant tags folded in.

I'll send the arm64 documentation and the dts updates separately via
arm64 and arm-soc respectively.

Thanks,
Mark.

The following changes since commit 49def1853334396f948dcb4cedb9347abb318df5:

  Linux 4.10-rc4 (2017-01-15 16:21:59 -0800)

are available in the git repository at:

  git://git.kernel.org/pub/scm/linux/kernel/git/mark/linux.git 
arch-timer/updates

for you to fetch changes up to 96aace83d26887143287428d0361d67cbde38d12:

  arm64: arch_timer: work around Hisilicon erratum 161010101 (2017-02-06 
16:27:08 +)


Ding Tianhong (4):
  arm64: arch_timer: add dt binding for hisilicon-161010101 erratum
  arm64: arm_arch_timer: remove fsl-a008585 parameter
  arm64: arch_timer: introduce generic errata handling infrastructure
  arm64: arch_timer: work around Hisilicon erratum 161010101

 Documentation/admin-guide/kernel-parameters.txt|   9 --
 .../devicetree/bindings/arm/arch_timer.txt |   6 +
 arch/arm64/include/asm/arch_timer.h|  38 ++
 drivers/clocksource/Kconfig|  14 ++
 drivers/clocksource/arm_arch_timer.c   | 151 +++--
 5 files changed, 143 insertions(+), 75 deletions(-)


[PATCH 3/4] arm64: arch_timer: introduce generic errata handling infrastructure

2017-02-06 Thread Mark Rutland
From: Ding Tianhong 

Currently we have code inline in the arch timer probe path to cater for
Freescale erratum A-008585, complete with ifdeffery. This is a little
ugly, and will get worse as we try to add more errata handling.

This patch refactors the handling of Freescale erratum A-008585. Now the
erratum is described in a generic arch_timer_erratum_workaround
structure, and the probe path can iterate over these to detect errata
and enable workarounds.

This will simplify the addition and maintenance of code handling
Hisilicon erratum 161010101.

Signed-off-by: Ding Tianhong 
[Mark: split patch, correct Kconfig, reword commit message]
Signed-off-by: Mark Rutland 
Acked-by: Daniel Lezcano 
---
 arch/arm64/include/asm/arch_timer.h  | 38 +--
 drivers/clocksource/Kconfig  |  4 ++
 drivers/clocksource/arm_arch_timer.c | 92 
 3 files changed, 80 insertions(+), 54 deletions(-)

diff --git a/arch/arm64/include/asm/arch_timer.h 
b/arch/arm64/include/asm/arch_timer.h
index eaa5bbe..b4b3400 100644
--- a/arch/arm64/include/asm/arch_timer.h
+++ b/arch/arm64/include/asm/arch_timer.h
@@ -29,41 +29,29 @@
 
 #include 
 
-#if IS_ENABLED(CONFIG_FSL_ERRATUM_A008585)
+#if IS_ENABLED(CONFIG_ARM_ARCH_TIMER_OOL_WORKAROUND)
 extern struct static_key_false arch_timer_read_ool_enabled;
-#define needs_fsl_a008585_workaround() \
+#define needs_unstable_timer_counter_workaround() \
static_branch_unlikely(&arch_timer_read_ool_enabled)
 #else
-#define needs_fsl_a008585_workaround()  false
+#define needs_unstable_timer_counter_workaround()  false
 #endif
 
-u32 __fsl_a008585_read_cntp_tval_el0(void);
-u32 __fsl_a008585_read_cntv_tval_el0(void);
-u64 __fsl_a008585_read_cntvct_el0(void);
 
-/*
- * The number of retries is an arbitrary value well beyond the highest number
- * of iterations the loop has been observed to take.
- */
-#define __fsl_a008585_read_reg(reg) ({ \
-   u64 _old, _new; \
-   int _retries = 200; \
-   \
-   do {\
-   _old = read_sysreg(reg);\
-   _new = read_sysreg(reg);\
-   _retries--; \
-   } while (unlikely(_old != _new) && _retries);   \
-   \
-   WARN_ON_ONCE(!_retries);\
-   _new;   \
-})
+struct arch_timer_erratum_workaround {
+   const char *id; /* Indicate the Erratum ID */
+   u32 (*read_cntp_tval_el0)(void);
+   u32 (*read_cntv_tval_el0)(void);
+   u64 (*read_cntvct_el0)(void);
+};
+
+extern const struct arch_timer_erratum_workaround 
*timer_unstable_counter_workaround;
 
 #define arch_timer_reg_read_stable(reg)\
 ({ \
u64 _val;   \
-   if (needs_fsl_a008585_workaround()) \
-   _val = __fsl_a008585_read_##reg();  \
+   if (needs_unstable_timer_counter_workaround())  \
+   _val = timer_unstable_counter_workaround->read_##reg();\
else\
_val = read_sysreg(reg);\
_val;   \
diff --git a/drivers/clocksource/Kconfig b/drivers/clocksource/Kconfig
index 4866f7a..e132bb3 100644
--- a/drivers/clocksource/Kconfig
+++ b/drivers/clocksource/Kconfig
@@ -325,10 +325,14 @@ config ARM_ARCH_TIMER_EVTSTREAM
  This must be disabled for hardware validation purposes to detect any
  hardware anomalies of missing events.
 
+config ARM_ARCH_TIMER_OOL_WORKAROUND
+   bool
+
 config FSL_ERRATUM_A008585
bool "Workaround for Freescale/NXP Erratum A-008585"
default y
depends on ARM_ARCH_TIMER && ARM64
+   select ARM_ARCH_TIMER_OOL_WORKAROUND
help
  This option enables a workaround for Freescale/NXP Erratum
  A-008585 ("ARM generic timer may contain an erroneous
diff --git a/drivers/clocksource/arm_arch_timer.c 
b/drivers/clocksource/arm_arch_timer.c
index 6a9d031..2af0739 100644
--- a/drivers/clocksource/arm_arch_timer.c
+++ b/drivers/clocksource/arm_arch_timer.c
@@ -96,27 +96,58 @@ static int __init early_evtstrm_cfg(char *buf)
  */
 
 #ifdef CONFIG_FSL_ERRATUM_A008585
-DEFINE_STATIC_KEY_FALSE(arch_timer_read_ool_enabled);
-EXPORT_SYMBOL_GPL(arch_timer_read_ool_enabled);
-
-static int fsl_a008585_enable = -1;
-
-u32 __fsl_a008585_read_cntp_tval_el0(void)
+/*
+ * The number of retries is an arbitrary value well beyond the highest number
+ * of iterations the loop has been observed to take.
+ */
+#define __fsl_a008585_read_reg(reg) ({ \
+   u64 _old, _new;

[PATCH 4/4] arm64: arch_timer: work around Hisilicon erratum 161010101

2017-02-06 Thread Mark Rutland
From: Ding Tianhong 

Erratum Hisilicon-161010101 says that the ARM generic timer counter "has
the potential to contain an erroneous value when the timer value
changes". Accesses to TVAL (both read and write) are also affected due
to the implicit counter read. Accesses to CVAL are not affected.

The workaround is to reread the system count registers until the value
of the second read is larger than the first one by less than 32, the
system counter can be guaranteed not to return wrong value twice by
back-to-back read and the error value is always larger than the correct
one by 32. Writes to TVAL are replaced with an equivalent write to CVAL.

Signed-off-by: Ding Tianhong 
[Mark: split patch, fix Kconfig, reword commit message]
Signed-off-by: Mark Rutland 
Acked-by: Daniel Lezcano 
---
 drivers/clocksource/Kconfig  | 10 
 drivers/clocksource/arm_arch_timer.c | 49 
 2 files changed, 59 insertions(+)

diff --git a/drivers/clocksource/Kconfig b/drivers/clocksource/Kconfig
index e132bb3..17ee71c 100644
--- a/drivers/clocksource/Kconfig
+++ b/drivers/clocksource/Kconfig
@@ -339,6 +339,16 @@ config FSL_ERRATUM_A008585
  value").  The workaround will only be active if the
  fsl,erratum-a008585 property is found in the timer node.
 
+config HISILICON_ERRATUM_161010101
+   bool "Workaround for Hisilicon Erratum 161010101"
+   default y
+   select ARM_ARCH_TIMER_OOL_WORKAROUND
+   depends on ARM_ARCH_TIMER && ARM64
+   help
+ This option enables a workaround for Hisilicon Erratum
+ 161010101. The workaround will be active if the 
hisilicon,erratum-161010101
+ property is found in the timer node.
+
 config ARM_GLOBAL_TIMER
bool "Support for the ARM global timer" if COMPILE_TEST
select CLKSRC_OF if OF
diff --git a/drivers/clocksource/arm_arch_timer.c 
b/drivers/clocksource/arm_arch_timer.c
index 2af0739..7b06aef 100644
--- a/drivers/clocksource/arm_arch_timer.c
+++ b/drivers/clocksource/arm_arch_timer.c
@@ -130,6 +130,47 @@ static u64 notrace fsl_a008585_read_cntvct_el0(void)
 }
 #endif
 
+#ifdef CONFIG_HISILICON_ERRATUM_161010101
+/*
+ * Verify whether the value of the second read is larger than the first by
+ * less than 32 is the only way to confirm the value is correct, so clear the
+ * lower 5 bits to check whether the difference is greater than 32 or not.
+ * Theoretically the erratum should not occur more than twice in succession
+ * when reading the system counter, but it is possible that some interrupts
+ * may lead to more than twice read errors, triggering the warning, so setting
+ * the number of retries far beyond the number of iterations the loop has been
+ * observed to take.
+ */
+#define __hisi_161010101_read_reg(reg) ({  \
+   u64 _old, _new; \
+   int _retries = 50;  \
+   \
+   do {\
+   _old = read_sysreg(reg);\
+   _new = read_sysreg(reg);\
+   _retries--; \
+   } while (unlikely((_new - _old) >> 5) && _retries); \
+   \
+   WARN_ON_ONCE(!_retries);\
+   _new;   \
+})
+
+static u32 notrace hisi_161010101_read_cntp_tval_el0(void)
+{
+   return __hisi_161010101_read_reg(cntp_tval_el0);
+}
+
+static u32 notrace hisi_161010101_read_cntv_tval_el0(void)
+{
+   return __hisi_161010101_read_reg(cntv_tval_el0);
+}
+
+static u64 notrace hisi_161010101_read_cntvct_el0(void)
+{
+   return __hisi_161010101_read_reg(cntvct_el0);
+}
+#endif
+
 #ifdef CONFIG_ARM_ARCH_TIMER_OOL_WORKAROUND
 const struct arch_timer_erratum_workaround *timer_unstable_counter_workaround 
= NULL;
 EXPORT_SYMBOL_GPL(timer_unstable_counter_workaround);
@@ -146,6 +187,14 @@ static u64 notrace fsl_a008585_read_cntvct_el0(void)
.read_cntvct_el0 = fsl_a008585_read_cntvct_el0,
},
 #endif
+#ifdef CONFIG_HISILICON_ERRATUM_161010101
+   {
+   .id = "hisilicon,erratum-161010101",
+   .read_cntp_tval_el0 = hisi_161010101_read_cntp_tval_el0,
+   .read_cntv_tval_el0 = hisi_161010101_read_cntv_tval_el0,
+   .read_cntvct_el0 = hisi_161010101_read_cntvct_el0,
+   },
+#endif
 };
 #endif /* CONFIG_ARM_ARCH_TIMER_OOL_WORKAROUND */
 
-- 
1.9.1



[PATCH 1/4] arm64: arch_timer: add dt binding for hisilicon-161010101 erratum

2017-02-06 Thread Mark Rutland
From: Ding Tianhong 

This erratum describes a bug in logic outside the core, so MIDR can't be
used to identify its presence, and reading an SoC-specific revision
register from common arch timer code would be awkward.  So, describe it
in the device tree.

Signed-off-by: Ding Tianhong 
Acked-by: Rob Herring 
Signed-off-by: Mark Rutland 
---
 Documentation/devicetree/bindings/arm/arch_timer.txt | 6 ++
 1 file changed, 6 insertions(+)

diff --git a/Documentation/devicetree/bindings/arm/arch_timer.txt 
b/Documentation/devicetree/bindings/arm/arch_timer.txt
index ad440a2..e926aea 100644
--- a/Documentation/devicetree/bindings/arm/arch_timer.txt
+++ b/Documentation/devicetree/bindings/arm/arch_timer.txt
@@ -31,6 +31,12 @@ to deliver its interrupts via SPIs.
   This also affects writes to the tval register, due to the implicit
   counter read.
 
+- hisilicon,erratum-161010101 : A boolean property. Indicates the
+  presence of Hisilicon erratum 161010101, which says that reading the
+  counters is unreliable in some cases, and reads may return a value 32
+  beyond the correct value. This also affects writes to the tval
+  registers, due to the implicit counter read.
+
 ** Optional properties:
 
 - arm,cpu-registers-not-fw-configured : Firmware does not initialize
-- 
1.9.1



Re: [PATCHv4 0/5] Fix compatible mmap() return pointer over 4Gb

2017-02-06 Thread Dmitry Safonov

On 01/30/2017 03:04 PM, Dmitry Safonov wrote:

Changes since v3:
- fixed usage of 64-bit random mask for 32-bit mm->mmap_compat_base,
  during introducing mmap_compat{_legacy,}_base

Changes since v2:
- don't distinguish native and compat tasks by TIF_ADDR32,
  introduced mmap_compat{_legacy,}_base which allows to treat them
  the same
- fixed kbuild errors

Changes since v1:
- Recalculate mmap_base instead of using max possible virtual address
  for compat/native syscall. That will make policy for allocation the
  same in 32-bit binaries and in 32-bit syscalls in 64-bit binaries.
  I need this because sys_mmap() in restored 32-bit process shouldn't
  hit the stack area.
- Fixed mmap() with MAP_32BIT flag in the same usecases
- used in_compat_syscall() helper rather TS_COMPAT check (Andy noticed)
- introduced find_top() helper as suggested by Andy to simplify code
- fixed test error-handeling: it checked the result of sys_mmap() with
  MMAP_FAILED, which is not correct, as it calls raw syscall - now
  checks return value to be aligned to PAGE_SIZE.

Description from v1 [2]:

A fix for bug in mmap() that I referenced in [1].
Also selftest for it.


Gentle ping. Any thought on this?



[1]: https://marc.info/?l=linux-kernel&m=148311451525315
[2]: https://marc.info/?l=linux-kernel&m=148415888707662

Cc: Thomas Gleixner 
Cc: Ingo Molnar 
Cc: "H. Peter Anvin" 
Cc: Andy Lutomirski 
Cc: Borislav Petkov 
Cc: x...@kernel.org
Cc: linux...@kvack.org

Dmitry Safonov (5):
  x86/mm: split arch_mmap_rnd() on compat/native versions
  x86/mm: introduce mmap{,_legacy}_base
  x86/mm: fix 32-bit mmap() for 64-bit ELF
  x86/mm: check in_compat_syscall() instead TIF_ADDR32 for
mmap(MAP_32BIT)
  selftests/x86: add test to check compat mmap() return addr

 arch/Kconfig   |   7 +
 arch/x86/Kconfig   |   1 +
 arch/x86/include/asm/elf.h |   4 +-
 arch/x86/include/asm/processor.h   |   3 +-
 arch/x86/kernel/sys_x86_64.c   |  32 +++-
 arch/x86/mm/mmap.c |  89 +++
 include/linux/mm_types.h   |   5 +
 tools/testing/selftests/x86/Makefile   |   2 +-
 tools/testing/selftests/x86/test_compat_mmap.c | 208 +
 9 files changed, 311 insertions(+), 40 deletions(-)
 create mode 100644 tools/testing/selftests/x86/test_compat_mmap.c




--
 Dmitry


[PATCH] x86/kconfig: remove misleading note regarding hibernation and KASLR

2017-02-06 Thread Niklas Cassel
From: Niklas Cassel 

There used to be a restriction with KASLR and hibernation, but this is no
longer true, and since commit 65fe935dd238 ("x86/KASLR, x86/power: Remove
x86 hibernation restrictions") the parameter "kaslr" does no longer exist.

Signed-off-by: Niklas Cassel 
---
 arch/x86/Kconfig | 4 
 1 file changed, 4 deletions(-)

diff --git a/arch/x86/Kconfig b/arch/x86/Kconfig
index e0285774bdda..04e5bf5c4666 100644
--- a/arch/x86/Kconfig
+++ b/arch/x86/Kconfig
@@ -1997,10 +1997,6 @@ config RANDOMIZE_BASE
  theoretically possible, but the implementations are further
  limited due to memory layouts.
 
- If CONFIG_HIBERNATE is also enabled, KASLR is disabled at boot
- time. To enable it, boot with "kaslr" on the kernel command
- line (which will also disable hibernation).
-
  If unsure, say N.
 
 # Relocation on x86 needs some additional build support
-- 
2.1.4



Re: [PATCH v4 13/15] livepatch: change to a per-task consistency model

2017-02-06 Thread Petr Mladek
On Fri 2017-02-03 14:39:16, Josh Poimboeuf wrote:
> On Thu, Feb 02, 2017 at 12:51:16PM +0100, Petr Mladek wrote:
> > !!! This is the right version. I am sorry again for the confusion. !!!
> >
> > >  static int __klp_disable_patch(struct klp_patch *patch)
> > >  {
> > > - struct klp_object *obj;
> > > + if (klp_transition_patch)
> > > + return -EBUSY;
> > >  
> > >   /* enforce stacking: only the last enabled patch can be disabled */
> > >   if (!list_is_last(&patch->list, &klp_patches) &&
> > >   list_next_entry(patch, list)->enabled)
> > >   return -EBUSY;
> > >  
> > > - pr_notice("disabling patch '%s'\n", patch->mod->name);
> > > + klp_init_transition(patch, KLP_UNPATCHED);
> > >  
> > > - klp_for_each_object(patch, obj) {
> > > - if (obj->patched)
> > > - klp_unpatch_object(obj);
> > > - }
> > > + /*
> > > +  * Enforce the order of the klp_target_state write in
> > > +  * klp_init_transition() and the TIF_PATCH_PENDING writes in
> > > +  * klp_start_transition() to ensure that klp_update_patch_state()
> > > +  * doesn't set a task->patch_state to KLP_UNDEFINED.
> > > +  */
> > > + smp_wmb();
> > 
> > The description is not clear. The klp_target_state manipulation
> > is synchronized by another barrier inside klp_init_transition().
> 
> Yeah.  I should also update the barrier comment in klp_init_transition()
> to clarify that it also does this.
> 
> > A similar barrier is in __klp_enable_patch() and it is correctly
> > described there:
> > 
> >It enforces the order of the func->transition writes in
> >klp_init_transition() and the ops->func_stack writes in
> >klp_patch_object(). The corresponding barrier is in
> >klp_ftrace_handler().
> > 
> > But we do not modify ops->func_stack in __klp_disable_patch().
> > So we need another explanation.
> > 
> > Huh, I spent few hours thinking about it. I am almost sure
> > that it is not needed. But I am not 100% sure. The many times
> > rewriten summary looks like:
> > 
> > /*
> >  * Enforce the order of func->transtion write in
> >  * klp_init_transition() against TIF_PATCH_PENDING writes
> >  * in klp_start_transition(). It makes sure that
> >  * klp_ftrace_hadler() will see func->transition set
> >  * after the task is migrated by klp_update_patch_state().
> >  *
> >  * The barrier probably is not needed because the task
> >  * must not be migrated when being inside klp_ftrace_handler()
> >  * and there is another full barrier in
> >  * klp_update_patch_state().
> >  * But this is slow path and better be safe than sorry.
> >  */
> >  smp_wmb();
> 
> This mostly makes sense,  but I think the barrier *is* needed for
> ordering func->transition and TIF_PATCH_PENDING writes for the rare case
> where klp_ftrace_handler() is called right after
> klp_update_patch_state(), which could be possible in the idle loop, for
> example.
> 
> CPU0  CPU1
> __klp_disable_patch()
>   klp_init_transition()
> func->transition = true;
>   (...no barrier...)
>   klp_start_transition()
> set TIF_PATCH_PENDING
> 
>   klp_update_patch_state()
> if (test_and_clear(TIF_PATCH_PENDING))
>   task->patch_state = KLP_UNPATCHED;
>   ...
>   klp_ftrace_handler()
> smp_rmb();
> if (unlikely(func->transition)) <--- false 
> (patched)
>   ...
>   klp_ftrace_handler()
> smp_rmb();
> if (unlikely(func->transition)) <--- true 
> (unpatched)

You are right. I was able to find many scenarios where the barrier
was not needed. But it is needed in this one.

The first paragraph should be enough then:

/*
 * Enforce the order of func->transition write in
 * klp_init_transition() against TIF_PATCH_PENDING writes
 * in klp_start_transition(). It makes sure that
 * klp_ftrace_handler() will see func->transition set
 * after the task is migrated by klp_update_patch_state().
 */
 smp_wmb();


> So how about:
> 
>   /*
>* Enforce the order of the func->transition writes in
>* klp_init_transition() and the TIF_PATCH_PENDING writes in
>* klp_start_transition().  In the rare case where klp_ftrace_handler()
>* is called shortly after klp_update_patch_state() switches the task,
>* this ensures the handler sees func->transition is set.
>*/
>   smp_wmb();

Looks good to me.


> > > + klp_start_transition();
> > >   patch->enabled = false;
> > >  
> > >   return 0;
> > > @@ -337,6 +341,9 @@ static int __klp_enable_patch(struct klp_patch *patch)
> > >   struct klp_object *obj;
> > >   int ret;
> > >  
> > > + if (klp_transition_patch)

Re: [PATCHv7 4/8] printk: always use deferred printk when flush printk_safe lines

2017-02-06 Thread Sergey Senozhatsky
On (02/06/17 08:27), Steven Rostedt wrote:
[..]
> > > > just in case, the patch (which I prefer to be ignored)  
> > > 
> > > let's keep printk_safe_flush_line().  
> > 
> > I do not have strong opinion but I would slightly prefer
> > to keep the helper function. The use of printk_deferred()
> > is a bit tricky and it is better to have only one copy.
> > 
> > Steven, could you live with the original patch, please?
> 
> Sure, but make it into a static inline.

well, I'll be surprised if gcc doesn't inline that simple one-liner.

attached. no conflicts, the patch can replace 0004.

===8<===8<===

>From 69bbb0f436a2a89ec41a5831c03490e0a78ce12e Mon Sep 17 00:00:00 2001
From: Sergey Senozhatsky 
Date: Tue, 27 Dec 2016 23:16:07 +0900
Subject: [PATCH] printk: always use deferred printk when flush printk_safe
 lines

Always use printk_deferred() in printk_safe_flush_line().
Flushing can be done from NMI or printk_safe contexts (when
we are in panic), so we can't call console drivers, yet still
want to store the messages in the logbuf buffer. Therefore we
use a deferred printk version.

Signed-off-by: Sergey Senozhatsky 
Suggested-by: Petr Mladek 
Signed-off-by: Petr Mladek 
---
 kernel/printk/printk_safe.c | 14 ++
 1 file changed, 6 insertions(+), 8 deletions(-)

diff --git a/kernel/printk/printk_safe.c b/kernel/printk/printk_safe.c
index efc89a4e9df5..5214d326d3ba 100644
--- a/kernel/printk/printk_safe.c
+++ b/kernel/printk/printk_safe.c
@@ -110,17 +110,15 @@ static int printk_safe_log_store(struct 
printk_safe_seq_buf *s,
return add;
 }
 
-static void printk_safe_flush_line(const char *text, int len)
+static inline void printk_safe_flush_line(const char *text, int len)
 {
/*
-* The buffers are flushed in NMI only on panic.  The messages must
-* go only into the ring buffer at this stage.  Consoles will get
-* explicitly called later when a crashdump is not generated.
+* Avoid any console drivers calls from here, because we may be
+* in NMI or printk_safe context (when in panic). The messages
+* must go only into the ring buffer at this stage.  Consoles will
+* get explicitly called later when a crashdump is not generated.
 */
-   if (in_nmi())
-   printk_deferred("%.*s", len, text);
-   else
-   printk("%.*s", len, text);
+   printk_deferred("%.*s", len, text);
 }
 
 /* printk part of the temporary buffer line by line */
-- 
2.11.0



Re: [PATCH net] ipv6: Fix IPv6 packet loss in scenarios involving roaming + snooping switches

2017-02-06 Thread David Miller
From: Linus Lüssing 
Date: Fri,  3 Feb 2017 08:11:03 +0100

> When for instance a mobile Linux device roams from one access point to
> another with both APs sharing the same broadcast domain and a
> multicast snooping switch in between:
> 
> 1)(c) <~~~> (AP1) <--[SSW]--> (AP2)
> 
> 2)  (AP1) <--[SSW]--> (AP2) <~~~> (c)
> 
> Then currently IPv6 multicast packets will get lost for (c) until an
> MLD Querier sends its next query message. The packet loss occurs
> because upon roaming the Linux host so far stayed silent regarding
> MLD and the snooping switch will therefore be unaware of the
> multicast topology change for a while.
> 
> This patch fixes this by always resending MLD reports when an interface
> change happens, for instance from NO-CARRIER to CARRIER state.
> 
> Signed-off-by: Linus Lüssing 

Looks good to me, applied, thanks.


RE: [PATCH] [net-next] mlxsw: add psample dependency for spectrum

2017-02-06 Thread Yotam Gigi
>-Original Message-
>From: netdev-ow...@vger.kernel.org [mailto:netdev-ow...@vger.kernel.org] On
>Behalf Of Arnd Bergmann
>Sent: Monday, February 06, 2017 6:27 PM
>To: Jiri Pirko ; Ido Schimmel 
>Cc: Arnd Bergmann ; David S. Miller ;
>Vadim Pasternak ; Elad Raz ; Ivan
>Vecera ; net...@vger.kernel.org; linux-kernel@vger.kernel.org
>Subject: [PATCH] [net-next] mlxsw: add psample dependency for spectrum
>
>When PSAMPLE is a loadable module, spectrum must not be built-in:
>
>drivers/net/built-in.o: In function `mlxsw_sp_rx_listener_sample_func':
>spectrum.c:(.text+0xe357e): undefined reference to `psample_sample_packet'
>
>This adds a Kconfig dependency to enforce usable configurations.
>
>Fixes: 98d0f7b9acda ("mlxsw: spectrum: Add packet sample offloading support")
>Signed-off-by: Arnd Bergmann 

Acked-by: Yotam Gigi 

>---
> drivers/net/ethernet/mellanox/mlxsw/Kconfig | 1 +
> 1 file changed, 1 insertion(+)
>
>diff --git a/drivers/net/ethernet/mellanox/mlxsw/Kconfig
>b/drivers/net/ethernet/mellanox/mlxsw/Kconfig
>index 76a7574c3c7d..ef23eaedc2ff 100644
>--- a/drivers/net/ethernet/mellanox/mlxsw/Kconfig
>+++ b/drivers/net/ethernet/mellanox/mlxsw/Kconfig
>@@ -73,6 +73,7 @@ config MLXSW_SWITCHX2
> config MLXSW_SPECTRUM
>   tristate "Mellanox Technologies Spectrum support"
>   depends on MLXSW_CORE && MLXSW_PCI && NET_SWITCHDEV &&
>VLAN_8021Q
>+  depends on PSAMPLE || PSAMPLE=n
>   select PARMAN
>   default m
>   ---help---
>--
>2.9.0



Re: [PATCH] ARM: defconfigs: make NF_CT_PROTO_SCTP and NF_CT_PROTO_UDPLITE built-in

2017-02-06 Thread Arnd Bergmann
On Fri, Feb 3, 2017 at 5:46 PM, Arnd Bergmann  wrote:
> The symbols can no longer be used as loadable modules, leading to a harmless 
> Kconfig
> warning:
>
> arch/arm/configs/imote2_defconfig:60:warning: symbol value 'm' invalid for 
> NF_CT_PROTO_UDPLITE
> arch/arm/configs/imote2_defconfig:59:warning: symbol value 'm' invalid for 
> NF_CT_PROTO_SCTP
> arch/arm/configs/ezx_defconfig:68:warning: symbol value 'm' invalid for 
> NF_CT_PROTO_UDPLITE
> arch/arm/configs/ezx_defconfig:67:warning: symbol value 'm' invalid for 
> NF_CT_PROTO_SCTP
>
> Let's make them built-in.
>
> Signed-off-by: Arnd Bergmann 

Added to the fixes branch now, as I see the warnings in kernelci for ARM.

Arnd


Re: [PATCH v3 03/14] mm: use pmd lock instead of racy checks in zap_pmd_range()

2017-02-06 Thread Zi Yan
On 6 Feb 2017, at 10:07, Kirill A. Shutemov wrote:

> On Sun, Feb 05, 2017 at 11:12:41AM -0500, Zi Yan wrote:
>> From: Zi Yan 
>>
>> Originally, zap_pmd_range() checks pmd value without taking pmd lock.
>> This can cause pmd_protnone entry not being freed.
>>
>> Because there are two steps in changing a pmd entry to a pmd_protnone
>> entry. First, the pmd entry is cleared to a pmd_none entry, then,
>> the pmd_none entry is changed into a pmd_protnone entry.
>> The racy check, even with barrier, might only see the pmd_none entry
>> in zap_pmd_range(), thus, the mapping is neither split nor zapped.
>
> That's definately a good catch.
>
> But I don't agree with the solution. Taking pmd lock on each
> zap_pmd_range() is a significant hit by scalability of the code path.
> Yes, split ptl lock helps, but it would be nice to avoid the lock in first
> place.
>
> Can we fix change_huge_pmd() instead? Is there a reason why we cannot
> setup the pmd_protnone() atomically?

If you want to setup the pmd_protnone() atomically, we need a new way of
changing pmds, like pmdp_huge_cmp_exchange_and_clear(). Otherwise, due to
the nature of racy check of pmd in zap_pmd_range(), it is impossible to
eliminate the chance of catching this bug if pmd_protnone() is setup
in two steps: first, clear it, second, set it.

However, if we use pmdp_huge_cmp_exchange_and_clear() to change pmds from now 
on,
instead of current two-step approach, it will eliminate the possibility of
using batched TLB shootdown optimization (introduced by Mel Gorman for base 
page swapping)
when THP is swappable in the future. Maybe other optimizations?

Why do you think holding pmd lock is bad? In zap_pte_range(), pte lock
is also held when each PTE is zapped.

BTW, I am following Naoya's suggestion and going to take pmd lock inside
the loop. So pmd lock is held when each pmd is being checked and it will be 
released
when the pmd entry is zapped, split, or pointed to a page table.
Does it still hurt much on performance?

Thanks.



>
> Mel? Rik?
>
>>
>> Later, in free_pmd_range(), pmd_none_or_clear() will see the
>> pmd_protnone entry and clear it as a pmd_bad entry. Furthermore,
>> since the pmd_protnone entry is not properly freed, the corresponding
>> deposited pte page table is not freed either.
>>
>> This causes memory leak or kernel crashing, if VM_BUG_ON() is enabled.
>>
>> This patch relies on __split_huge_pmd_locked() and
>> __zap_huge_pmd_locked().
>>
>> Signed-off-by: Zi Yan 
>> ---
>>  mm/memory.c | 24 +++-
>>  1 file changed, 11 insertions(+), 13 deletions(-)
>>
>> diff --git a/mm/memory.c b/mm/memory.c
>> index 3929b015faf7..7cfdd5208ef5 100644
>> --- a/mm/memory.c
>> +++ b/mm/memory.c
>> @@ -1233,33 +1233,31 @@ static inline unsigned long zap_pmd_range(struct 
>> mmu_gather *tlb,
>>  struct zap_details *details)
>>  {
>>  pmd_t *pmd;
>> +spinlock_t *ptl;
>>  unsigned long next;
>>
>>  pmd = pmd_offset(pud, addr);
>> +ptl = pmd_lock(vma->vm_mm, pmd);
>>  do {
>>  next = pmd_addr_end(addr, end);
>>  if (pmd_trans_huge(*pmd) || pmd_devmap(*pmd)) {
>>  if (next - addr != HPAGE_PMD_SIZE) {
>>  VM_BUG_ON_VMA(vma_is_anonymous(vma) &&
>>  !rwsem_is_locked(&tlb->mm->mmap_sem), vma);
>> -__split_huge_pmd(vma, pmd, addr, false, NULL);
>> -} else if (zap_huge_pmd(tlb, vma, pmd, addr))
>> -goto next;
>> +__split_huge_pmd_locked(vma, pmd, addr, false);
>> +} else if (__zap_huge_pmd_locked(tlb, vma, pmd, addr))
>> +continue;
>>  /* fall through */
>>  }
>> -/*
>> - * Here there can be other concurrent MADV_DONTNEED or
>> - * trans huge page faults running, and if the pmd is
>> - * none or trans huge it can change under us. This is
>> - * because MADV_DONTNEED holds the mmap_sem in read
>> - * mode.
>> - */
>> -if (pmd_none_or_trans_huge_or_clear_bad(pmd))
>> -goto next;
>> +
>> +if (pmd_none_or_clear_bad(pmd))
>> +continue;
>> +spin_unlock(ptl);
>>  next = zap_pte_range(tlb, vma, pmd, addr, next, details);
>> -next:
>>  cond_resched();
>> +spin_lock(ptl);
>>  } while (pmd++, addr = next, addr != end);
>> +spin_unlock(ptl);
>>
>>  return addr;
>>  }
>> -- 
>> 2.11.0
>>
>> --
>> To unsubscribe, send a message with 'unsubscribe linux-mm' in
>> the body to majord...@kvack.org.  For more info on Linux MM,
>> see: http://www.linux-mm.org/ .
>> Don't email: mailto:"d...@kvack.org";> em...@kvack.org 
>
> -- 
>  Kirill A. Shutemov


--
Best Regards
Yan Zi


signature.asc
Description: OpenPGP digital signature


Re: [PATCH v6 3/3] spi: acpi: Initialize modalias from of_compatible

2017-02-06 Thread Mark Brown
On Sun, Feb 05, 2017 at 04:30:14PM +, Dan O'Donovan wrote:
> When using devicetree spi_device.modalias is set to the compatible
> string with the vendor prefix removed. For SPI devices described via
> ACPI the spi_device.modalias string is initialized by acpi_device_hid.
> When using ACPI and DT ids this string ends up something like "PRP0001".

Acked-by: Mark Brown 


signature.asc
Description: PGP signature


Re: [PATCH v1 2/3] dt: Document for the ZTE zx296718 I2S controller

2017-02-06 Thread Mark Brown
On Mon, Feb 06, 2017 at 09:50:19PM +0800, Baoyou Xie wrote:
> This patch documents the devicetree for the ZTE's zx296718
> I2S audio controller.

Oh, this is the binding document.  Me not seeing this is a perfect
example of why you should always use subject lines matching the style
for the subsystem - if you don't it's more likely people will not see
relevant patches.


signature.asc
Description: PGP signature


Re: [PATCH v1 3/3] ASoC: zx-i2s: support zx296718 SoC for ZTE's i2s controller driver

2017-02-06 Thread Mark Brown
On Mon, Feb 06, 2017 at 09:50:20PM +0800, Baoyou Xie wrote:

> +static void zx_i2s_set_clk(struct zx_i2s_info *i2s,
> +unsigned int ch_num, unsigned int sample_rate)
> +{
> + unsigned long val = sample_rate * ch_num * CLK_RAT;
> +
> + clk_set_rate(i2s->dai_wclk, val);
> +
> + if (i2s->dai_pclk)
> + clk_set_rate(i2s->dai_pclk, val);
> +}

There's no error checking here, if we don't care if these calls succeed
we could save some power and not enable the clocks.  This function is
also very small and has exactly one user so it's not clear why it's
split out.

>  static const struct of_device_id zx_i2s_dt_ids[] = {
>   { .compatible = "zte,zx296702-i2s", },
> + { .compatible = "zte,zx296718-i2s", },
>   {}
>  };
>  MODULE_DEVICE_TABLE(of, zx_i2s_dt_ids);

This adds a new DT binding but does not update the binding document.


signature.asc
Description: PGP signature


Re: [PATCH 3/3] net: ethernet: bgmac: driver power manangement

2017-02-06 Thread Jon Mason
On Fri, Feb 3, 2017 at 9:16 PM, Florian Fainelli  wrote:
> On 02/03/2017 01:39 PM, Jon Mason wrote:
>> From: Joey Zhong 
>>
>> Implements suspend/resume, external phy 54810 is assumed
>> to remain powered up during deep-sleep for wake-on-lane.
>
> s/wake-on-lane/Wake-on-LAN, are you positive phy_stop() is not
> suspending the PHY and issuing BMCR_PWRDOWN write?
>
> This also seems incomplete in that, if the device is really configured
> for Wake-on-LAN (through ethtool) you should call
> device_set_wakeup_capable() and then check for device_may_wakeup()
> during suspend or resume to know which part of the suspend/resume
> portion should be done. You could refer to bcmgenet for an example.

After some internal discussion, WOL is not supported by our SVK.  So,
we have no way of testing it.  Given this limitation, I'm removing the
WOL comment until such time as we can actually test the logic.

>>
>> +int bgmac_enet_suspend(struct bgmac *bgmac)
>> +{
>> + netdev_info(bgmac->net_dev, "Suspending\n");
>
> remove that message
>
>> +
>> + if (netif_running(bgmac->net_dev)) {
>> + netif_stop_queue(bgmac->net_dev);
>> +
>> + napi_disable(&bgmac->napi);
>> +
>> + netif_tx_lock(bgmac->net_dev);
>> + netif_device_detach(bgmac->net_dev);
>> + netif_tx_unlock(bgmac->net_dev);
>> +
>> + bgmac_chip_intrs_off(bgmac);
>> + bgmac_chip_reset(bgmac);
>> + bgmac_dma_cleanup(bgmac);
>> + }
>
> Can you change the indentation to test for netiff_running() first and
> return 0 in that case?
>
>> +
>> + phy_stop(bgmac->net_dev->phydev);
>> +
>> + return 0;
>> +}
>> +EXPORT_SYMBOL_GPL(bgmac_enet_suspend);
>> +
>> +int bgmac_enet_resume(struct bgmac *bgmac)
>> +{
>> + int rc;
>> +
>> + netdev_info(bgmac->net_dev, "Resuming\n");
>
> Same here, this needs to be removed.

Will do this and above.

Thanks,
Jon

>
>> +
>> + phy_start(bgmac->net_dev->phydev);
>> +
>> + if (netif_running(bgmac->net_dev)) {
>> + rc = bgmac_dma_init(bgmac);
>> + if (rc)
>> + return rc;
>> +
>> + bgmac_chip_init(bgmac);
>> +
>> + napi_enable(&bgmac->napi);
>> +
>> + netif_tx_lock(bgmac->net_dev);
>> + netif_device_attach(bgmac->net_dev);
>> + netif_tx_unlock(bgmac->net_dev);
>> +
>> + netif_start_queue(bgmac->net_dev);
>> + }
> --
> Florian


Re: [RFC 1/1] shiftfs: uid/gid shifting bind mount

2017-02-06 Thread James Bottomley
On Mon, 2017-02-06 at 08:38 +0200, Amir Goldstein wrote:
> On Mon, Feb 6, 2017 at 5:25 AM, J. R. Okajima 
> wrote:
> > James Bottomley:
> > > This allows any subtree to be uid/gid shifted and bound
> > > elsewhere.  It
> > :::
> > 
> > Interesting.
> > But I am afraid that the inconsistency problem of the inode numbers 
> > will happen.
> > 
> 
> Yet another example that overlayfs already is in the process of 
> solving (it is fixed for stat of merged directory inode).
> In fact, fir the case of single layer overlay (as well as shiftfs) 
> the solution is trivial - preserve underlying inode st_ino/d_ino and 
> use the overlayed fs st_dev.

not sure I follow what st_ino is, do you mean  s_root->d_inode->i_ino?
or did you mean s_dev (which is more traditional)?

The problem with this is there's no way to ensure global uniqueness in
a mapping that goes (ino, ino) -> (ino) (or (s_dev, ino) -> (ino)) and
I believe global uniqueness is more important because the i_ino is used
in the hashed lookups.  Secondly you're not guaranteed that s_root
->d_inode->i_ino is unique ... historically a lot of filesystems use a
well known inode number as the root, that's why filehandles
traditionally used something representing the device and the inode
number (we also have s_dev uniqueness problems for tmpfs which is used
in some overlays).

We can certainly construct a filehandle using an export operations
override that is unique and can be used to lookup the underlying object
(based on the underlying device and inode).

James



[PATCH -next] MIPS: sysmips: Remove duplicated include from syscall.c

2017-02-06 Thread Wei Yongjun
From: Wei Yongjun 

Remove duplicated include.

Signed-off-by: Wei Yongjun 
---
 arch/mips/kernel/syscall.c | 1 -
 1 file changed, 1 deletion(-)

diff --git a/arch/mips/kernel/syscall.c b/arch/mips/kernel/syscall.c
index 735733f..c86ddba 100644
--- a/arch/mips/kernel/syscall.c
+++ b/arch/mips/kernel/syscall.c
@@ -36,7 +36,6 @@
 #include 
 #include 
 #include 
-#include 
 #include 
 
 /*





[PATCH v2] irqchip/gicv3: Fix GICR_WAKE & GICD_IGROUPR accesses from non-secure

2017-02-06 Thread Shanker Donthineni
On systems where GIC support two security states, both the register
GICR_WAKE and GICD_IGROUPR accesses are RAZ/WI from non-secure.
The function gic_enable_redist() to wake/sleep redistributor is not
harmful at all, but it is confusing looking at the code. The current
code checks the single security state based on bit GICD_CTLR.DS which
is absolutely incorrect. The disable security bit GICD_CTLR.DS is RAZ
to non-secure. The GICD_TYPE.SecurityExtn indicates whether the GIC
implementation supports two security states or only one security
state.

Let's introduce a new helper function gic_has_security_extn() to
know GIC security state. Use this function to bypass the code that
is touching the registers GICR_WAKE and GICD_IGROUPR.

Signed-off-by: Shanker Donthineni 
---
Changes since v1:
Edit comments.
Do explicit writes to IGRPMODR if GIC security is disabled.

 drivers/irqchip/irq-gic-v3.c   | 40 +++---
 include/linux/irqchip/arm-gic-v3.h |  1 +
 2 files changed, 34 insertions(+), 7 deletions(-)

diff --git a/drivers/irqchip/irq-gic-v3.c b/drivers/irqchip/irq-gic-v3.c
index c132f29..31b54ab 100644
--- a/drivers/irqchip/irq-gic-v3.c
+++ b/drivers/irqchip/irq-gic-v3.c
@@ -130,12 +130,28 @@ static u64 __maybe_unused gic_read_iar(void)
 }
 #endif
 
+/**
+ * Check whether the GIC implementation supports two security
+ * states or only one security state.
+ * return true if it has two security states else return false.
+ */
+static bool gic_has_security_extn(void)
+{
+   u32 typer = readl_relaxed(gic_data.dist_base + GICD_TYPER);
+
+   return !!(typer & GICD_TYPER_SECURITY_EXTN);
+}
+
 static void gic_enable_redist(bool enable)
 {
void __iomem *rbase;
u32 count = 100;/* 1s! */
u32 val;
 
+   /* With only one security state, GICR_WAKE is RAZ/WI to non-secure */
+   if (gic_has_security_extn())
+   return;
+
rbase = gic_data_rdist_rd_base();
 
val = readl_relaxed(rbase + GICR_WAKER);
@@ -399,14 +415,24 @@ static void __init gic_dist_init(void)
 
/*
 * Configure SPIs as non-secure Group-1. This will only matter
-* if the GIC only has a single security state. This will not
-* do the right thing if the kernel is running in secure mode,
-* but that's not the intended use case anyway.
+* if the GIC only has a single security state. This will do
+* the right thing if the kernel is running in secure mode and
+* with assumption all the SPIs are allocated to Linux, but
+* that's not the intended use case anyway.
+*
+* IGRPMODR  IGROUPR  Definition ShortName
+*0 0 Secure Group0  G0S
+*0 1 Non-secure Group1  G1NS
+*1 0 Secure Group1  G1S
+*1 1 Reserved   treated as G1NS
 */
-   for (i = 32; i < gic_data.irq_nr; i += 32)
-   writel_relaxed(~0, base + GICD_IGROUPR + i / 8);
-
-   gic_dist_config(base, gic_data.irq_nr, gic_dist_wait_for_rwp);
+   if (!gic_has_security_extn()) {
+   for (i = 32; i < gic_data.irq_nr; i += 32) {
+   writel_relaxed(0, base + GICD_IGRPMODR + i / 8);
+   writel_relaxed(~0, base + GICD_IGROUPR + i / 8);
+   }
+   gic_dist_config(base, gic_data.irq_nr, gic_dist_wait_for_rwp);
+   }
 
/* Enable distributor with ARE, Group1 */
writel_relaxed(GICD_CTLR_ARE_NS | GICD_CTLR_ENABLE_G1A | 
GICD_CTLR_ENABLE_G1,
diff --git a/include/linux/irqchip/arm-gic-v3.h 
b/include/linux/irqchip/arm-gic-v3.h
index e808f8a..aab00e5 100644
--- a/include/linux/irqchip/arm-gic-v3.h
+++ b/include/linux/irqchip/arm-gic-v3.h
@@ -70,6 +70,7 @@
 
 #define GICD_TYPER_LPIS(1U << 17)
 #define GICD_TYPER_MBIS(1U << 16)
+#define GICD_TYPER_SECURITY_EXTN   (1U << 10)
 
 #define GICD_TYPER_ID_BITS(typer)  typer) >> 19) & 0x1f) + 1)
 #define GICD_TYPER_IRQS(typer) typer) & 0x1f) + 1) * 32)
-- 
Qualcomm Datacenter Technologies, Inc. on behalf of the Qualcomm Technologies, 
Inc.
Qualcomm Technologies, Inc. is a member of the Code Aurora Forum, a Linux 
Foundation Collaborative Project.



[PATCH] [net-next] mlxsw: add psample dependency for spectrum

2017-02-06 Thread Arnd Bergmann
When PSAMPLE is a loadable module, spectrum must not be built-in:

drivers/net/built-in.o: In function `mlxsw_sp_rx_listener_sample_func':
spectrum.c:(.text+0xe357e): undefined reference to `psample_sample_packet'

This adds a Kconfig dependency to enforce usable configurations.

Fixes: 98d0f7b9acda ("mlxsw: spectrum: Add packet sample offloading support")
Signed-off-by: Arnd Bergmann 
---
 drivers/net/ethernet/mellanox/mlxsw/Kconfig | 1 +
 1 file changed, 1 insertion(+)

diff --git a/drivers/net/ethernet/mellanox/mlxsw/Kconfig 
b/drivers/net/ethernet/mellanox/mlxsw/Kconfig
index 76a7574c3c7d..ef23eaedc2ff 100644
--- a/drivers/net/ethernet/mellanox/mlxsw/Kconfig
+++ b/drivers/net/ethernet/mellanox/mlxsw/Kconfig
@@ -73,6 +73,7 @@ config MLXSW_SWITCHX2
 config MLXSW_SPECTRUM
tristate "Mellanox Technologies Spectrum support"
depends on MLXSW_CORE && MLXSW_PCI && NET_SWITCHDEV && VLAN_8021Q
+   depends on PSAMPLE || PSAMPLE=n
select PARMAN
default m
---help---
-- 
2.9.0



Re: [RFC 1/1] shiftfs: uid/gid shifting bind mount

2017-02-06 Thread J. R. Okajima
James Bottomley:
> Yes, I know the problem.  However, I believe most current linux
> filesystems no longer guarantee stable, for the lifetime of the file,
> inode numbers.  The usual docker container root is overlayfs, which,
> similarly doesn't support stable inode numbers.  I see the odd
> complaint about docker with overlayfs having unstable inode numbers,
> but none seems to have any serious repercussions.

I think it serious.
Reusing the backend fs' inum is a good approach which Amir wrote.
Based on this, I'd suggest you to support the hardlinks.

bakend_dentry = lookup_one_len()
if (d_inode->i_nlink != 1)
shiftfs_inode = ilookup();
if (!shiftfs_inode) {
shiftfs_inode = new_inode();
shiftfs_inode->i_ino = bakend_dentry->d_inode->i_ino;
}


J. R. Okajima


[PATCH -next] soc: zte: pm_domains: Remove redundant dev_err call in zx2967_pd_probe()

2017-02-06 Thread Wei Yongjun
From: Wei Yongjun 

There is a error message within devm_ioremap_resource
already, so remove the dev_err call to avoid redundant
error message.

Signed-off-by: Wei Yongjun 
---
 drivers/soc/zte/zx2967_pm_domains.c | 4 +---
 1 file changed, 1 insertion(+), 3 deletions(-)

diff --git a/drivers/soc/zte/zx2967_pm_domains.c 
b/drivers/soc/zte/zx2967_pm_domains.c
index 61c8d84..c42aeaa 100644
--- a/drivers/soc/zte/zx2967_pm_domains.c
+++ b/drivers/soc/zte/zx2967_pm_domains.c
@@ -125,10 +125,8 @@ int zx2967_pd_probe(struct platform_device *pdev,
 
res = platform_get_resource(pdev, IORESOURCE_MEM, 0);
pcubase = devm_ioremap_resource(&pdev->dev, res);
-   if (IS_ERR(pcubase)) {
-   dev_err(&pdev->dev, "ioremap fail.\n");
+   if (IS_ERR(pcubase))
return PTR_ERR(pcubase);
-   }
 
for (i = 0; i < domain_num; ++i) {
zx_pm_domains[i]->power_on = zx2967_power_on;



<    1   2   3   4   5   6   7   8   9   10   >