Re: [PATCH v2 03/19] powerpc: Mark variables as unused

2018-04-04 Thread Michael Ellerman
LEROY Christophe  writes:

> Mathieu Malaterre  a écrit :
>
>> Add gcc attribute unused for two variables. Fix warnings treated as errors
>> with W=1:
>>
>>   arch/powerpc/kernel/prom_init.c:1388:8: error: variable ‘path’ set  
>> but not used [-Werror=unused-but-set-variable]
>>
>> Suggested-by: Christophe Leroy 
>> Signed-off-by: Mathieu Malaterre 
>> ---
>> v2: move path within ifdef DEBUG_PROM
>>
>>  arch/powerpc/kernel/prom_init.c | 6 +++---
>>  1 file changed, 3 insertions(+), 3 deletions(-)
>>
>> diff --git a/arch/powerpc/kernel/prom_init.c  
>> b/arch/powerpc/kernel/prom_init.c
>> index acf4b2e0530c..4163b11abb6c 100644
>> --- a/arch/powerpc/kernel/prom_init.c
>> +++ b/arch/powerpc/kernel/prom_init.c
>> @@ -603,7 +603,7 @@ static void __init early_cmdline_parse(void)
>>  const char *opt;
>>
>>  char *p;
>> -int l = 0;
>> +int l __maybe_unused = 0;
>>
>>  prom_cmd_line[0] = 0;
>>  p = prom_cmd_line;
>> @@ -1385,7 +1385,7 @@ static void __init reserve_mem(u64 base, u64 size)
>>  static void __init prom_init_mem(void)
>>  {
>>  phandle node;
>> -char *path, type[64];
>> +char *path __maybe_unused, type[64];
>
> You should enclose that in an ifdef DEBUG_PROM instead of hiding the warning

I disagree, the result is horrible:

 static void __init prom_init_mem(void)
 {
phandle node;
-   char *path, type[64];
+#ifdef DEBUG_PROM
+   char *path;
+#endif
+   char type[64];
unsigned int plen;
cell_t *p, *endp;
__be32 val;


The right fix is to move the debug logic into a helper, and put the path
in there, eg. something like (not tested):

diff --git a/arch/powerpc/kernel/prom_init.c b/arch/powerpc/kernel/prom_init.c
index f9d6befb55a6..b02fa2ccc70b 100644
--- a/arch/powerpc/kernel/prom_init.c
+++ b/arch/powerpc/kernel/prom_init.c
@@ -1389,6 +1389,18 @@ static void __init reserve_mem(u64 base, u64 size)
mem_reserve_cnt = cnt + 1;
 }
 
+#ifdef DEBUG_PROM
+static void prom_debug_path(phandle node)
+{
+   char *path;
+   path = prom_scratch;
+   memset(path, 0, PROM_SCRATCH_SIZE);
+   call_prom("package-to-path", 3, 1, node, path, PROM_SCRATCH_SIZE-1);
+   prom_debug("  node %s :\n", path);
+}
+#else
+static void prom_debug_path(phandle node) { }
+#endif /* DEBUG_PROM */
 /*
  * Initialize memory allocation mechanism, parse "memory" nodes and
  * obtain that way the top of memory and RMO to setup out local allocator
@@ -1441,11 +1453,7 @@ static void __init prom_init_mem(void)
p = regbuf;
endp = p + (plen / sizeof(cell_t));
 
-#ifdef DEBUG_PROM
-   memset(path, 0, PROM_SCRATCH_SIZE);
-   call_prom("package-to-path", 3, 1, node, path, 
PROM_SCRATCH_SIZE-1);
-   prom_debug("  node %s :\n", path);
-#endif /* DEBUG_PROM */
+   prom_debug_path(node);
 
while ((endp - p) >= (rac + rsc)) {
unsigned long base, size;


Although that also begs the question of why the hell do we need path at
all, and not just use prom_scratch directly?

cheers


Re: [PATCH v2 03/19] powerpc: Mark variables as unused

2018-04-04 Thread Michael Ellerman
LEROY Christophe  writes:

> Mathieu Malaterre  a écrit :
>
>> Add gcc attribute unused for two variables. Fix warnings treated as errors
>> with W=1:
>>
>>   arch/powerpc/kernel/prom_init.c:1388:8: error: variable ‘path’ set  
>> but not used [-Werror=unused-but-set-variable]
>>
>> Suggested-by: Christophe Leroy 
>> Signed-off-by: Mathieu Malaterre 
>> ---
>> v2: move path within ifdef DEBUG_PROM
>>
>>  arch/powerpc/kernel/prom_init.c | 6 +++---
>>  1 file changed, 3 insertions(+), 3 deletions(-)
>>
>> diff --git a/arch/powerpc/kernel/prom_init.c  
>> b/arch/powerpc/kernel/prom_init.c
>> index acf4b2e0530c..4163b11abb6c 100644
>> --- a/arch/powerpc/kernel/prom_init.c
>> +++ b/arch/powerpc/kernel/prom_init.c
>> @@ -603,7 +603,7 @@ static void __init early_cmdline_parse(void)
>>  const char *opt;
>>
>>  char *p;
>> -int l = 0;
>> +int l __maybe_unused = 0;
>>
>>  prom_cmd_line[0] = 0;
>>  p = prom_cmd_line;
>> @@ -1385,7 +1385,7 @@ static void __init reserve_mem(u64 base, u64 size)
>>  static void __init prom_init_mem(void)
>>  {
>>  phandle node;
>> -char *path, type[64];
>> +char *path __maybe_unused, type[64];
>
> You should enclose that in an ifdef DEBUG_PROM instead of hiding the warning

I disagree, the result is horrible:

 static void __init prom_init_mem(void)
 {
phandle node;
-   char *path, type[64];
+#ifdef DEBUG_PROM
+   char *path;
+#endif
+   char type[64];
unsigned int plen;
cell_t *p, *endp;
__be32 val;


The right fix is to move the debug logic into a helper, and put the path
in there, eg. something like (not tested):

diff --git a/arch/powerpc/kernel/prom_init.c b/arch/powerpc/kernel/prom_init.c
index f9d6befb55a6..b02fa2ccc70b 100644
--- a/arch/powerpc/kernel/prom_init.c
+++ b/arch/powerpc/kernel/prom_init.c
@@ -1389,6 +1389,18 @@ static void __init reserve_mem(u64 base, u64 size)
mem_reserve_cnt = cnt + 1;
 }
 
+#ifdef DEBUG_PROM
+static void prom_debug_path(phandle node)
+{
+   char *path;
+   path = prom_scratch;
+   memset(path, 0, PROM_SCRATCH_SIZE);
+   call_prom("package-to-path", 3, 1, node, path, PROM_SCRATCH_SIZE-1);
+   prom_debug("  node %s :\n", path);
+}
+#else
+static void prom_debug_path(phandle node) { }
+#endif /* DEBUG_PROM */
 /*
  * Initialize memory allocation mechanism, parse "memory" nodes and
  * obtain that way the top of memory and RMO to setup out local allocator
@@ -1441,11 +1453,7 @@ static void __init prom_init_mem(void)
p = regbuf;
endp = p + (plen / sizeof(cell_t));
 
-#ifdef DEBUG_PROM
-   memset(path, 0, PROM_SCRATCH_SIZE);
-   call_prom("package-to-path", 3, 1, node, path, 
PROM_SCRATCH_SIZE-1);
-   prom_debug("  node %s :\n", path);
-#endif /* DEBUG_PROM */
+   prom_debug_path(node);
 
while ((endp - p) >= (rac + rsc)) {
unsigned long base, size;


Although that also begs the question of why the hell do we need path at
all, and not just use prom_scratch directly?

cheers


Re: [PATCH] kvm: Add emulation for movups/movupd

2018-04-04 Thread Paolo Bonzini
On 04/04/2018 19:35, Stefan Fritsch wrote:
> On Wednesday, 4 April 2018 19:24:20 CEST Paolo Bonzini wrote:
>> On 04/04/2018 19:10, Konrad Rzeszutek Wilk wrote:
>>> Should there be a corresponding test-case?
>>
>> Good point!  Stefan, could you write one?
> 
> Is there infrastructure for such tests? If yes, can you give me a pointer to 
> it?

Yes, check out x86/emulator.c in
https://git.kernel.org/pub/scm/virt/kvm/kvm-unit-tests.git.  There is
already a movaps test.

Paolo


Re: [PATCH] kvm: Add emulation for movups/movupd

2018-04-04 Thread Paolo Bonzini
On 04/04/2018 19:35, Stefan Fritsch wrote:
> On Wednesday, 4 April 2018 19:24:20 CEST Paolo Bonzini wrote:
>> On 04/04/2018 19:10, Konrad Rzeszutek Wilk wrote:
>>> Should there be a corresponding test-case?
>>
>> Good point!  Stefan, could you write one?
> 
> Is there infrastructure for such tests? If yes, can you give me a pointer to 
> it?

Yes, check out x86/emulator.c in
https://git.kernel.org/pub/scm/virt/kvm/kvm-unit-tests.git.  There is
already a movaps test.

Paolo


Re: [PATCH 2/2] efi: Add embedded peripheral firmware support

2018-04-04 Thread Lukas Wunner
On Wed, Apr 04, 2018 at 01:18:36PM -0400, Peter Jones wrote:
> > On Tue, Apr 03, 2018 at 08:07:11PM +0200, Lukas Wunner wrote:
> > > * Add the EFI Firmware Volume Protocol to include/linux/efi.h:
> > >   
> > > https://www.intel.com/content/dam/doc/reference-guide/efi-firmware-file-volume-specification.pdf
> > > 
> > > * Amend arch/x86/boot/compressed/eboot.c to read the files with the
> > >   GUIDs you're interested in into memory and pass the files to the
> > >   kernel as setup_data payloads.
> 
> To be honest, I'm a bit skeptical about the firmware volume approach.
> Tools like UEFITool[0] and uefi-firmware-parser[1] have existed for
> years, still don't seem to reliably parse firmware images I see in the
> wild, and have a fairly regular need for fixes.  These are tools
> maintained by smart people who are making a real effort, and it still
> looks pretty hard to do a good job that applies across a lot of
> platforms.
> 
> So I'd rather use Hans's existing patches, at least for now, and if
> someone is interested in hacking on making an efi firmware volume parser
> for the kernel, switch them to that when such a thing is ready.

Hello?  As I've written in the above-quoted e-mail the kernel should
read the files using EFI_FIRMWARE_VOLUME_PROTOCOL.ReadFile().

*Not* by parsing the firmware volume!

Parsing the firmware volume is only necessary to find out the GUIDs
of the files you're looking for.  You only do that *once*.

I habitually extract Apple's EFI Firmware Volumes and found the
existing tools always did a sufficiently good job to get the
information I need (such as UEFIExtract, which I've linked to,
and which is part of the UEFITool suite, which you've linked to
once more).


On Wed, Apr 04, 2018 at 10:25:05PM +0200, Hans de Goede wrote:
> Lukas, thank you for your suggestions on this, but I doubt that these
> devices use the Firmware Volume stuff.

If they're using EFI, they're using a Firmware Volume and you should
be able to use the Firmware Volume Protocol to read files from it.

If that protocol wasn't supported, the existing EFI drivers wouldn't
be able to function as they couldn't load files from the volume.


> What you are describing sounds like significantly more work then
> the vendor just embedding the firmware as a char firmware[] in their
> EFI mouse driver.

In such cases, read the EFI mouse driver from the firmware volume and
extract the firmware from the offset you've pre-determined.  That's
never going to change since the devices never receive updates, as
you're saying.  So basically you'll have a struct with GUID, offset,
length, and the name under which the firmware is made available to
Linux drivers.


> That combined with Peter's worries about difficulties parsing the
> Firmware Volume stuff, makes me believe that it is best to just
> stick with my current approach as Peter suggests.

I don't know why you insist on a hackish solution (your own words)
even though a cleaner solution is suggested, but fortunately it's
not for me to decide what goes in and what doesn't. ;-)

Thanks,

Lukas


Re: [PATCH 2/2] efi: Add embedded peripheral firmware support

2018-04-04 Thread Lukas Wunner
On Wed, Apr 04, 2018 at 01:18:36PM -0400, Peter Jones wrote:
> > On Tue, Apr 03, 2018 at 08:07:11PM +0200, Lukas Wunner wrote:
> > > * Add the EFI Firmware Volume Protocol to include/linux/efi.h:
> > >   
> > > https://www.intel.com/content/dam/doc/reference-guide/efi-firmware-file-volume-specification.pdf
> > > 
> > > * Amend arch/x86/boot/compressed/eboot.c to read the files with the
> > >   GUIDs you're interested in into memory and pass the files to the
> > >   kernel as setup_data payloads.
> 
> To be honest, I'm a bit skeptical about the firmware volume approach.
> Tools like UEFITool[0] and uefi-firmware-parser[1] have existed for
> years, still don't seem to reliably parse firmware images I see in the
> wild, and have a fairly regular need for fixes.  These are tools
> maintained by smart people who are making a real effort, and it still
> looks pretty hard to do a good job that applies across a lot of
> platforms.
> 
> So I'd rather use Hans's existing patches, at least for now, and if
> someone is interested in hacking on making an efi firmware volume parser
> for the kernel, switch them to that when such a thing is ready.

Hello?  As I've written in the above-quoted e-mail the kernel should
read the files using EFI_FIRMWARE_VOLUME_PROTOCOL.ReadFile().

*Not* by parsing the firmware volume!

Parsing the firmware volume is only necessary to find out the GUIDs
of the files you're looking for.  You only do that *once*.

I habitually extract Apple's EFI Firmware Volumes and found the
existing tools always did a sufficiently good job to get the
information I need (such as UEFIExtract, which I've linked to,
and which is part of the UEFITool suite, which you've linked to
once more).


On Wed, Apr 04, 2018 at 10:25:05PM +0200, Hans de Goede wrote:
> Lukas, thank you for your suggestions on this, but I doubt that these
> devices use the Firmware Volume stuff.

If they're using EFI, they're using a Firmware Volume and you should
be able to use the Firmware Volume Protocol to read files from it.

If that protocol wasn't supported, the existing EFI drivers wouldn't
be able to function as they couldn't load files from the volume.


> What you are describing sounds like significantly more work then
> the vendor just embedding the firmware as a char firmware[] in their
> EFI mouse driver.

In such cases, read the EFI mouse driver from the firmware volume and
extract the firmware from the offset you've pre-determined.  That's
never going to change since the devices never receive updates, as
you're saying.  So basically you'll have a struct with GUID, offset,
length, and the name under which the firmware is made available to
Linux drivers.


> That combined with Peter's worries about difficulties parsing the
> Firmware Volume stuff, makes me believe that it is best to just
> stick with my current approach as Peter suggests.

I don't know why you insist on a hackish solution (your own words)
even though a cleaner solution is suggested, but fortunately it's
not for me to decide what goes in and what doesn't. ;-)

Thanks,

Lukas


Re: [PATCH] prctl: Deprecate non PR_SET_MM_MAP operations

2018-04-04 Thread Michal Hocko
On Thu 05-04-18 00:29:06, Cyrill Gorcunov wrote:
> On Wed, Apr 04, 2018 at 01:53:08PM -0700, Randy Dunlap wrote:
[...]
> > Yeah, that one's wrong also.  :)
> 
> So, maybe just get rid of any warning message at all?

or simply do
pr_warn("PR_SET_MM_MAP has been removed. Use  instead\n");
-- 
Michal Hocko
SUSE Labs


Re: [PATCH] prctl: Deprecate non PR_SET_MM_MAP operations

2018-04-04 Thread Michal Hocko
On Thu 05-04-18 00:29:06, Cyrill Gorcunov wrote:
> On Wed, Apr 04, 2018 at 01:53:08PM -0700, Randy Dunlap wrote:
[...]
> > Yeah, that one's wrong also.  :)
> 
> So, maybe just get rid of any warning message at all?

or simply do
pr_warn("PR_SET_MM_MAP has been removed. Use  instead\n");
-- 
Michal Hocko
SUSE Labs


Re: [PATCH 1/3] kbuild: Support HOSTLDFLAGS

2018-04-04 Thread Masahiro Yamada
2018-03-29 9:48 GMT+09:00 Laura Abbott :
>
> In addition to HOSTCFLAGS, there's HOSTLDFLAGS. Ensure these get passed to
> calls to build host binaries.
>
> Signed-off-by: Laura Abbott 
> ---
>  scripts/Makefile.host  | 6 +++---
>  tools/build/Makefile.build | 2 +-
>  2 files changed, 4 insertions(+), 4 deletions(-)
>
> diff --git a/scripts/Makefile.host b/scripts/Makefile.host
> index e6dc6ae2d7c4..a3a0e2282a56 100644
> --- a/scripts/Makefile.host
> +++ b/scripts/Makefile.host
> @@ -84,7 +84,7 @@ hostcxx_flags  = -Wp,-MD,$(depfile) $(__hostcxx_flags)
>  # Create executable from a single .c file
>  # host-csingle -> Executable
>  quiet_cmd_host-csingle = HOSTCC  $@
> -  cmd_host-csingle = $(HOSTCC) $(hostc_flags) -o $@ $< \
> +  cmd_host-csingle = $(HOSTCC) $(HOSTLDFLAGS) $(hostc_flags) -o $@ $< \
> $(HOST_LOADLIBES) $(HOSTLOADLIBES_$(@F))
>  $(host-csingle): $(obj)/%: $(src)/%.c FORCE
> $(call if_changed_dep,host-csingle)


This hunk is correct, but Robin posted this.
https://patchwork.kernel.org/patch/10243073/

Somehow it fell into a crack, and I missed to pick it up.
I will apply it lately.




> @@ -102,7 +102,7 @@ $(call multi_depend, $(host-cmulti), , -objs)
>  # Create .o file from a single .c file
>  # host-cobjs -> .o
>  quiet_cmd_host-cobjs   = HOSTCC  $@
> -  cmd_host-cobjs   = $(HOSTCC) $(hostc_flags) -c -o $@ $<
> +  cmd_host-cobjs   = $(HOSTCC) $(HOSTLDFLAGS) $(hostc_flags) -c -o $@ $<
>  $(host-cobjs): $(obj)/%.o: $(src)/%.c FORCE
> $(call if_changed_dep,host-cobjs)
>

This does not involve the link stage.
You should not do this.


> @@ -126,7 +126,7 @@ $(host-cxxobjs): $(obj)/%.o: $(src)/%.cc FORCE
>  # Compile .c file, create position independent .o file
>  # host-cshobjs -> .o
>  quiet_cmd_host-cshobjs = HOSTCC  -fPIC $@
> -  cmd_host-cshobjs = $(HOSTCC) $(hostc_flags) -fPIC -c -o $@ $<
> +  cmd_host-cshobjs = $(HOSTCC) $(HOSTLDFLAGS) $(hostc_flags) -fPIC -c -o 
> $@ $<
>  $(host-cshobjs): $(obj)/%.o: $(src)/%.c FORCE
> $(call if_changed_dep,host-cshobjs)
>
> diff --git a/tools/build/Makefile.build b/tools/build/Makefile.build
> index cd72016c3cfa..cab55f0d90e1 100644
> --- a/tools/build/Makefile.build
> +++ b/tools/build/Makefile.build
> @@ -64,7 +64,7 @@ quiet_cmd_cc_o_c = CC   $@
>cmd_cc_o_c = $(CC) $(c_flags) -c -o $@ $<
>
>  quiet_cmd_host_cc_o_c = HOSTCC   $@
> -  cmd_host_cc_o_c = $(HOSTCC) $(host_c_flags) -c -o $@ $<
> +  cmd_host_cc_o_c = $(HOSTCC) $(HOSTLDFLAGS) $(host_c_flags) -c -o $@ $<
>
>  quiet_cmd_cxx_o_c = CXX  $@
>cmd_cxx_o_c = $(CXX) $(cxx_flags) -c -o $@ $<



-- 
Best Regards
Masahiro Yamada


Re: [PATCH 1/3] kbuild: Support HOSTLDFLAGS

2018-04-04 Thread Masahiro Yamada
2018-03-29 9:48 GMT+09:00 Laura Abbott :
>
> In addition to HOSTCFLAGS, there's HOSTLDFLAGS. Ensure these get passed to
> calls to build host binaries.
>
> Signed-off-by: Laura Abbott 
> ---
>  scripts/Makefile.host  | 6 +++---
>  tools/build/Makefile.build | 2 +-
>  2 files changed, 4 insertions(+), 4 deletions(-)
>
> diff --git a/scripts/Makefile.host b/scripts/Makefile.host
> index e6dc6ae2d7c4..a3a0e2282a56 100644
> --- a/scripts/Makefile.host
> +++ b/scripts/Makefile.host
> @@ -84,7 +84,7 @@ hostcxx_flags  = -Wp,-MD,$(depfile) $(__hostcxx_flags)
>  # Create executable from a single .c file
>  # host-csingle -> Executable
>  quiet_cmd_host-csingle = HOSTCC  $@
> -  cmd_host-csingle = $(HOSTCC) $(hostc_flags) -o $@ $< \
> +  cmd_host-csingle = $(HOSTCC) $(HOSTLDFLAGS) $(hostc_flags) -o $@ $< \
> $(HOST_LOADLIBES) $(HOSTLOADLIBES_$(@F))
>  $(host-csingle): $(obj)/%: $(src)/%.c FORCE
> $(call if_changed_dep,host-csingle)


This hunk is correct, but Robin posted this.
https://patchwork.kernel.org/patch/10243073/

Somehow it fell into a crack, and I missed to pick it up.
I will apply it lately.




> @@ -102,7 +102,7 @@ $(call multi_depend, $(host-cmulti), , -objs)
>  # Create .o file from a single .c file
>  # host-cobjs -> .o
>  quiet_cmd_host-cobjs   = HOSTCC  $@
> -  cmd_host-cobjs   = $(HOSTCC) $(hostc_flags) -c -o $@ $<
> +  cmd_host-cobjs   = $(HOSTCC) $(HOSTLDFLAGS) $(hostc_flags) -c -o $@ $<
>  $(host-cobjs): $(obj)/%.o: $(src)/%.c FORCE
> $(call if_changed_dep,host-cobjs)
>

This does not involve the link stage.
You should not do this.


> @@ -126,7 +126,7 @@ $(host-cxxobjs): $(obj)/%.o: $(src)/%.cc FORCE
>  # Compile .c file, create position independent .o file
>  # host-cshobjs -> .o
>  quiet_cmd_host-cshobjs = HOSTCC  -fPIC $@
> -  cmd_host-cshobjs = $(HOSTCC) $(hostc_flags) -fPIC -c -o $@ $<
> +  cmd_host-cshobjs = $(HOSTCC) $(HOSTLDFLAGS) $(hostc_flags) -fPIC -c -o 
> $@ $<
>  $(host-cshobjs): $(obj)/%.o: $(src)/%.c FORCE
> $(call if_changed_dep,host-cshobjs)
>
> diff --git a/tools/build/Makefile.build b/tools/build/Makefile.build
> index cd72016c3cfa..cab55f0d90e1 100644
> --- a/tools/build/Makefile.build
> +++ b/tools/build/Makefile.build
> @@ -64,7 +64,7 @@ quiet_cmd_cc_o_c = CC   $@
>cmd_cc_o_c = $(CC) $(c_flags) -c -o $@ $<
>
>  quiet_cmd_host_cc_o_c = HOSTCC   $@
> -  cmd_host_cc_o_c = $(HOSTCC) $(host_c_flags) -c -o $@ $<
> +  cmd_host_cc_o_c = $(HOSTCC) $(HOSTLDFLAGS) $(host_c_flags) -c -o $@ $<
>
>  quiet_cmd_cxx_o_c = CXX  $@
>cmd_cxx_o_c = $(CXX) $(cxx_flags) -c -o $@ $<



-- 
Best Regards
Masahiro Yamada


Re: [PATCH v2 2/2] perf: riscv: Add Document for Future Porting Guide

2018-04-04 Thread Alan Kao
Hi Alex,

On Tue, Apr 03, 2018 at 07:08:43PM -0700, Alex Solomatnikov wrote:
> Doc fixes:
> 
>

Thanks for these fixes.  I'll edit this patch and send a v3 once I am done
with the PMU patch.

I suppose a "Reviewed-by: Alex Solomatnikov" appending at the end of the
commit will be great, right?

Alan

> diff --git a/Documentation/riscv/pmu.txt b/Documentation/riscv/pmu.txt
> index a3e930e..ae90a5e 100644
> --- a/Documentation/riscv/pmu.txt
> +++ b/Documentation/riscv/pmu.txt
> @@ -20,7 +20,7 @@ the lack of the following general architectural
> performance monitoring features:
>  * Enabling/Disabling counters
>Counters are just free-running all the time in our case.
>  * Interrupt caused by counter overflow
> -  No such design in the spec.
> +  No such feature in the spec.
>  * Interrupt indicator
>It is not possible to have many interrupt ports for all counters, so an
>interrupt indicator is required for software to tell which counter has
> @@ -159,14 +159,14 @@ interrupt for perf, so the details are to be
> completed in the future.
> 
>  They seem symmetric but perf treats them quite differently.  For reading, 
> there
>  is a *read* interface in *struct pmu*, but it serves more than just reading.
> -According to the context, the *read* function not only read the content of 
> the
> -counter (event->count), but also update the left period to the next interrupt
> +According to the context, the *read* function not only reads the content of 
> the
> +counter (event->count), but also updates the left period for the next 
> interrupt
>  (event->hw.period_left).
> 
>  But the core of perf does not need direct write to counters.  Writing 
> counters
> -hides behind the abstraction of 1) *pmu->start*, literally start
> counting so one
> +is hidden behind the abstraction of 1) *pmu->start*, literally start
> counting so one
>  has to set the counter to a good value for the next interrupt; 2)
> inside the IRQ
> -it should set the counter with the same reason.
> +it should set the counter to the same reasonable value.
> 
>  Reading is not a problem in RISC-V but writing would need some effort, since
>  counters are not allowed to be written by S-mode.
> @@ -190,37 +190,37 @@ Three states (event->hw.state) are defined:
>  A normal flow of these state transitions are as follows:
> 
>  * A user launches a perf event, resulting in calling to *event_init*.
> -* When being context-switched in, *add* is called by the perf core, with flag
> -  PERF_EF_START, which mean that the event should be started after it is 
> added.
> -  In this stage, an general event is binded to a physical counter, if any.
> +* When being context-switched in, *add* is called by the perf core, with a 
> flag
> +  PERF_EF_START, which means that the event should be started after
> it is added.
> +  At this stage, a general event is bound to a physical counter, if any.
>The state changes to PERF_HES_STOPPED and PERF_HES_UPTODATE,
> because it is now
>stopped, and the (software) event count does not need updating.
>  ** *start* is then called, and the counter is enabled.
> -   With flag PERF_EF_RELOAD, it write the counter to an appropriate
> value (check
> -   previous section for detail).
> -   No writing is made if the flag does not contain PERF_EF_RELOAD.
> -   The state now is reset to none, because it is neither stopped nor update
> -   (the counting already starts)
> -* When being context-switched out, *del* is called.  It then checkout all the
> -  events in the PMU and call *stop* to update their counts.
> +   With flag PERF_EF_RELOAD, it writes an appropriate value to the
> counter (check
> +   the previous section for details).
> +   Nothing is written if the flag does not contain PERF_EF_RELOAD.
> +   The state now is reset to none, because it is neither stopped nor updated
> +   (the counting already started)
> +* When being context-switched out, *del* is called.  It then checks out all 
> the
> +  events in the PMU and calls *stop* to update their counts.
>  ** *stop* is called by *del*
> and the perf core with flag PERF_EF_UPDATE, and it often shares the same
> subroutine as *read* with the same logic.
> The state changes to PERF_HES_STOPPED and PERF_HES_UPTODATE, again.
> 
> -** Life cycles of these two pairs: *add* and *del* are called repeatedly as
> +** Life cycle of these two pairs: *add* and *del* are called repeatedly as
>tasks switch in-and-out; *start* and *stop* is also called when the perf 
> core
>needs a quick stop-and-start, for instance, when the interrupt
> period is being
>adjusted.
> 
> -Current implementation is sufficient for now and can be easily extend to
> +Current implementation is sufficient for now and can be easily
> extended with new
>  features in the future.
> 
>  A. Related Structures
>  -
> 
> -* struct pmu: include/linux/perf_events.h
> -* struct riscv_pmu: arch/riscv/include/asm/perf_events.h
> +* struct pmu: include/linux/perf_event.h
> +* 

Re: [PATCH v2 2/2] perf: riscv: Add Document for Future Porting Guide

2018-04-04 Thread Alan Kao
Hi Alex,

On Tue, Apr 03, 2018 at 07:08:43PM -0700, Alex Solomatnikov wrote:
> Doc fixes:
> 
>

Thanks for these fixes.  I'll edit this patch and send a v3 once I am done
with the PMU patch.

I suppose a "Reviewed-by: Alex Solomatnikov" appending at the end of the
commit will be great, right?

Alan

> diff --git a/Documentation/riscv/pmu.txt b/Documentation/riscv/pmu.txt
> index a3e930e..ae90a5e 100644
> --- a/Documentation/riscv/pmu.txt
> +++ b/Documentation/riscv/pmu.txt
> @@ -20,7 +20,7 @@ the lack of the following general architectural
> performance monitoring features:
>  * Enabling/Disabling counters
>Counters are just free-running all the time in our case.
>  * Interrupt caused by counter overflow
> -  No such design in the spec.
> +  No such feature in the spec.
>  * Interrupt indicator
>It is not possible to have many interrupt ports for all counters, so an
>interrupt indicator is required for software to tell which counter has
> @@ -159,14 +159,14 @@ interrupt for perf, so the details are to be
> completed in the future.
> 
>  They seem symmetric but perf treats them quite differently.  For reading, 
> there
>  is a *read* interface in *struct pmu*, but it serves more than just reading.
> -According to the context, the *read* function not only read the content of 
> the
> -counter (event->count), but also update the left period to the next interrupt
> +According to the context, the *read* function not only reads the content of 
> the
> +counter (event->count), but also updates the left period for the next 
> interrupt
>  (event->hw.period_left).
> 
>  But the core of perf does not need direct write to counters.  Writing 
> counters
> -hides behind the abstraction of 1) *pmu->start*, literally start
> counting so one
> +is hidden behind the abstraction of 1) *pmu->start*, literally start
> counting so one
>  has to set the counter to a good value for the next interrupt; 2)
> inside the IRQ
> -it should set the counter with the same reason.
> +it should set the counter to the same reasonable value.
> 
>  Reading is not a problem in RISC-V but writing would need some effort, since
>  counters are not allowed to be written by S-mode.
> @@ -190,37 +190,37 @@ Three states (event->hw.state) are defined:
>  A normal flow of these state transitions are as follows:
> 
>  * A user launches a perf event, resulting in calling to *event_init*.
> -* When being context-switched in, *add* is called by the perf core, with flag
> -  PERF_EF_START, which mean that the event should be started after it is 
> added.
> -  In this stage, an general event is binded to a physical counter, if any.
> +* When being context-switched in, *add* is called by the perf core, with a 
> flag
> +  PERF_EF_START, which means that the event should be started after
> it is added.
> +  At this stage, a general event is bound to a physical counter, if any.
>The state changes to PERF_HES_STOPPED and PERF_HES_UPTODATE,
> because it is now
>stopped, and the (software) event count does not need updating.
>  ** *start* is then called, and the counter is enabled.
> -   With flag PERF_EF_RELOAD, it write the counter to an appropriate
> value (check
> -   previous section for detail).
> -   No writing is made if the flag does not contain PERF_EF_RELOAD.
> -   The state now is reset to none, because it is neither stopped nor update
> -   (the counting already starts)
> -* When being context-switched out, *del* is called.  It then checkout all the
> -  events in the PMU and call *stop* to update their counts.
> +   With flag PERF_EF_RELOAD, it writes an appropriate value to the
> counter (check
> +   the previous section for details).
> +   Nothing is written if the flag does not contain PERF_EF_RELOAD.
> +   The state now is reset to none, because it is neither stopped nor updated
> +   (the counting already started)
> +* When being context-switched out, *del* is called.  It then checks out all 
> the
> +  events in the PMU and calls *stop* to update their counts.
>  ** *stop* is called by *del*
> and the perf core with flag PERF_EF_UPDATE, and it often shares the same
> subroutine as *read* with the same logic.
> The state changes to PERF_HES_STOPPED and PERF_HES_UPTODATE, again.
> 
> -** Life cycles of these two pairs: *add* and *del* are called repeatedly as
> +** Life cycle of these two pairs: *add* and *del* are called repeatedly as
>tasks switch in-and-out; *start* and *stop* is also called when the perf 
> core
>needs a quick stop-and-start, for instance, when the interrupt
> period is being
>adjusted.
> 
> -Current implementation is sufficient for now and can be easily extend to
> +Current implementation is sufficient for now and can be easily
> extended with new
>  features in the future.
> 
>  A. Related Structures
>  -
> 
> -* struct pmu: include/linux/perf_events.h
> -* struct riscv_pmu: arch/riscv/include/asm/perf_events.h
> +* struct pmu: include/linux/perf_event.h
> +* 

Re: [PATCH 0/2] perf: riscv: Preliminary Perf Event Support on RISC-V

2018-04-04 Thread Alan Kao
On Tue, Apr 03, 2018 at 03:45:17PM -0700, Palmer Dabbelt wrote:
> On Tue, 03 Apr 2018 07:29:02 PDT (-0700), alan...@andestech.com wrote:
> >On Mon, Apr 02, 2018 at 08:15:44PM -0700, Palmer Dabbelt wrote:
> >>On Mon, 02 Apr 2018 05:31:22 PDT (-0700), alan...@andestech.com wrote:
> >>>This implements the baseline PMU for RISC-V platforms.
> >>>
> >>>To ease future PMU portings, a guide is also written, containing
> >>>perf concepts, arch porting practices and some hints.
> >>>
> >>>Changes in v2:
> >>> - Fix the bug reported by Alex, which was caused by not sufficient
> >>>   initialization.  Check https://lkml.org/lkml/2018/3/31/251 for the
> >>>   discussion.
> >>>
> >>>Alan Kao (2):
> >>>  perf: riscv: preliminary RISC-V support
> >>>  perf: riscv: Add Document for Future Porting Guide
> >>>
> >>> Documentation/riscv/pmu.txt | 249 +++
> >>> arch/riscv/Kconfig  |  12 +
> >>> arch/riscv/include/asm/perf_event.h |  76 +-
> >>> arch/riscv/kernel/Makefile  |   1 +
> >>> arch/riscv/kernel/perf_event.c  | 468 
> >>> 
> >>> 5 files changed, 802 insertions(+), 4 deletions(-)
> >>> create mode 100644 Documentation/riscv/pmu.txt
> >>> create mode 100644 arch/riscv/kernel/perf_event.c
> >>
> >>I'm having some trouble pulling this into my tree.  I think you might have
> >>another patch floating around somewhere, as I don't have any
> >>arch/riscv/include/asm/perf_event.h right now.
> >>
> >>Do you mind rebasing this on top of linux-4.16 so I can look properly?
> >>
> >>Thanks!
> >
> >Sorry for the inconvenience, but this patch was based on Alex's patch at
> >https://github.com/riscv/riscv-linux/pull/124/files.  I thought that one
> >had already been picked into your tree.
> >
> >Any ideas?
> 
> Thanks, it applies on top of that.  I'm going to play around with this a
> bit, but it looks generally good.

Note that to make it work better when wraparound occurs, you should change the
value of *.counter_width* into the width of real hardware counters.  This is
because this patch does not handle wraparound checking, so using a wider
bit mask may sometimes report a extremely large number.

Ideally this should be done by adding a Kconfig option called 
"Hifive Unleashed PMU" which automatically sets the width an reuses most of the
baseline codes.  What do you think about this?

Thanks.


Re: [PATCH 0/2] perf: riscv: Preliminary Perf Event Support on RISC-V

2018-04-04 Thread Alan Kao
On Tue, Apr 03, 2018 at 03:45:17PM -0700, Palmer Dabbelt wrote:
> On Tue, 03 Apr 2018 07:29:02 PDT (-0700), alan...@andestech.com wrote:
> >On Mon, Apr 02, 2018 at 08:15:44PM -0700, Palmer Dabbelt wrote:
> >>On Mon, 02 Apr 2018 05:31:22 PDT (-0700), alan...@andestech.com wrote:
> >>>This implements the baseline PMU for RISC-V platforms.
> >>>
> >>>To ease future PMU portings, a guide is also written, containing
> >>>perf concepts, arch porting practices and some hints.
> >>>
> >>>Changes in v2:
> >>> - Fix the bug reported by Alex, which was caused by not sufficient
> >>>   initialization.  Check https://lkml.org/lkml/2018/3/31/251 for the
> >>>   discussion.
> >>>
> >>>Alan Kao (2):
> >>>  perf: riscv: preliminary RISC-V support
> >>>  perf: riscv: Add Document for Future Porting Guide
> >>>
> >>> Documentation/riscv/pmu.txt | 249 +++
> >>> arch/riscv/Kconfig  |  12 +
> >>> arch/riscv/include/asm/perf_event.h |  76 +-
> >>> arch/riscv/kernel/Makefile  |   1 +
> >>> arch/riscv/kernel/perf_event.c  | 468 
> >>> 
> >>> 5 files changed, 802 insertions(+), 4 deletions(-)
> >>> create mode 100644 Documentation/riscv/pmu.txt
> >>> create mode 100644 arch/riscv/kernel/perf_event.c
> >>
> >>I'm having some trouble pulling this into my tree.  I think you might have
> >>another patch floating around somewhere, as I don't have any
> >>arch/riscv/include/asm/perf_event.h right now.
> >>
> >>Do you mind rebasing this on top of linux-4.16 so I can look properly?
> >>
> >>Thanks!
> >
> >Sorry for the inconvenience, but this patch was based on Alex's patch at
> >https://github.com/riscv/riscv-linux/pull/124/files.  I thought that one
> >had already been picked into your tree.
> >
> >Any ideas?
> 
> Thanks, it applies on top of that.  I'm going to play around with this a
> bit, but it looks generally good.

Note that to make it work better when wraparound occurs, you should change the
value of *.counter_width* into the width of real hardware counters.  This is
because this patch does not handle wraparound checking, so using a wider
bit mask may sometimes report a extremely large number.

Ideally this should be done by adding a Kconfig option called 
"Hifive Unleashed PMU" which automatically sets the width an reuses most of the
baseline codes.  What do you think about this?

Thanks.


Re: [PATCH 3/3] kbuild: Allow passing additional HOSTCFLAGS and HOSTLDFLAGS

2018-04-04 Thread Masahiro Yamada
2018-03-29 9:48 GMT+09:00 Laura Abbott :
>
> Similar to AFLAGS_KBUILD, there may be uses (e.g. hardening) for passing in
> additional flags to host programs. Allow these to be passed in from the
> environment.
>
> Signed-off-by: Laura Abbott 
> ---
>  Documentation/kbuild/kbuild.txt | 9 +
>  Makefile| 3 +++
>  2 files changed, 12 insertions(+)
>
> diff --git a/Documentation/kbuild/kbuild.txt b/Documentation/kbuild/kbuild.txt
> index ac2363ea05c5..3751a4bc8596 100644
> --- a/Documentation/kbuild/kbuild.txt
> +++ b/Documentation/kbuild/kbuild.txt
> @@ -24,6 +24,15 @@ KAFLAGS
>  --
>  Additional options to the assembler (for built-in and modules).
>
> +AFLAGS_HOSTCFLAGS
> +--
> +Additional options passed to the compiler when building host programs.
> +
> +AFLAGS_HOSTLDFLAGS
> +--
> +Additional options passed to the linker (through the compiler) when buidling
> +host programs.
> +


I am afraid you misunderstood the meaning of 'AFLAGS'.

AFLAGS is not 'Additional flags', but 'Assembler flags'
that are used for compiling *.S files.

AFLAGS for host programs is weird.



I see similar proposals from different people.

I replied like follows:
https://lkml.org/lkml/2018/2/28/178

However, Robin seems busy lately.

I will wait a bit, then
if nobody does this, I may do it.




>  AFLAGS_MODULE
>  --
>  Additional module specific options to use for $(AS).
> diff --git a/Makefile b/Makefile
> index 7ba478ab8c82..2cab3f8d489c 100644
> --- a/Makefile
> +++ b/Makefile
> @@ -367,6 +367,9 @@ HOSTCXXFLAGS := -O2 $(HOST_LFS_CFLAGS)
>  HOSTLDFLAGS  := $(HOST_LFS_LDFLAGS)
>  HOST_LOADLIBES := $(HOST_LFS_LIBS)
>
> +HOSTCFLAGS  += $(AFLAGS_HOSTCFLAGS)
> +HOSTLDFLAGS  += $(AFLAGS_HOSTLDFLAGS)
> +
>  # Make variables (CC, etc...)
>  AS = $(CROSS_COMPILE)as
>  LD = $(CROSS_COMPILE)ld
> --
> 2.16.2
>
> --
> To unsubscribe from this list: send the line "unsubscribe linux-kbuild" in
> the body of a message to majord...@vger.kernel.org
> More majordomo info at  http://vger.kernel.org/majordomo-info.html



-- 
Best Regards
Masahiro Yamada


Re: [PATCH 3/3] kbuild: Allow passing additional HOSTCFLAGS and HOSTLDFLAGS

2018-04-04 Thread Masahiro Yamada
2018-03-29 9:48 GMT+09:00 Laura Abbott :
>
> Similar to AFLAGS_KBUILD, there may be uses (e.g. hardening) for passing in
> additional flags to host programs. Allow these to be passed in from the
> environment.
>
> Signed-off-by: Laura Abbott 
> ---
>  Documentation/kbuild/kbuild.txt | 9 +
>  Makefile| 3 +++
>  2 files changed, 12 insertions(+)
>
> diff --git a/Documentation/kbuild/kbuild.txt b/Documentation/kbuild/kbuild.txt
> index ac2363ea05c5..3751a4bc8596 100644
> --- a/Documentation/kbuild/kbuild.txt
> +++ b/Documentation/kbuild/kbuild.txt
> @@ -24,6 +24,15 @@ KAFLAGS
>  --
>  Additional options to the assembler (for built-in and modules).
>
> +AFLAGS_HOSTCFLAGS
> +--
> +Additional options passed to the compiler when building host programs.
> +
> +AFLAGS_HOSTLDFLAGS
> +--
> +Additional options passed to the linker (through the compiler) when buidling
> +host programs.
> +


I am afraid you misunderstood the meaning of 'AFLAGS'.

AFLAGS is not 'Additional flags', but 'Assembler flags'
that are used for compiling *.S files.

AFLAGS for host programs is weird.



I see similar proposals from different people.

I replied like follows:
https://lkml.org/lkml/2018/2/28/178

However, Robin seems busy lately.

I will wait a bit, then
if nobody does this, I may do it.




>  AFLAGS_MODULE
>  --
>  Additional module specific options to use for $(AS).
> diff --git a/Makefile b/Makefile
> index 7ba478ab8c82..2cab3f8d489c 100644
> --- a/Makefile
> +++ b/Makefile
> @@ -367,6 +367,9 @@ HOSTCXXFLAGS := -O2 $(HOST_LFS_CFLAGS)
>  HOSTLDFLAGS  := $(HOST_LFS_LDFLAGS)
>  HOST_LOADLIBES := $(HOST_LFS_LIBS)
>
> +HOSTCFLAGS  += $(AFLAGS_HOSTCFLAGS)
> +HOSTLDFLAGS  += $(AFLAGS_HOSTLDFLAGS)
> +
>  # Make variables (CC, etc...)
>  AS = $(CROSS_COMPILE)as
>  LD = $(CROSS_COMPILE)ld
> --
> 2.16.2
>
> --
> To unsubscribe from this list: send the line "unsubscribe linux-kbuild" in
> the body of a message to majord...@vger.kernel.org
> More majordomo info at  http://vger.kernel.org/majordomo-info.html



-- 
Best Regards
Masahiro Yamada


Re: [PATCH V2 6/7] thermal/drivers/cpu_cooling: Introduce the cpu idle cooling driver

2018-04-04 Thread Viresh Kumar
On 04-04-18, 10:50, Daniel Lezcano wrote:
> Mmh, that sounds very complex. May be it is simpler to count the number
> of cluster and initialize the idle_cdev for each cluster and then go for
> this loop with the cluster cpumask.

Maybe not sure. I have had such code in the past and it was quite
straight forward to understand :)

You can go with whichever version you like.

-- 
viresh


Re: [PATCH V2 6/7] thermal/drivers/cpu_cooling: Introduce the cpu idle cooling driver

2018-04-04 Thread Viresh Kumar
On 04-04-18, 10:50, Daniel Lezcano wrote:
> Mmh, that sounds very complex. May be it is simpler to count the number
> of cluster and initialize the idle_cdev for each cluster and then go for
> this loop with the cluster cpumask.

Maybe not sure. I have had such code in the past and it was quite
straight forward to understand :)

You can go with whichever version you like.

-- 
viresh


Re: [PATCH v15 0/9] Add io{read|write}64 to io-64-atomic headers

2018-04-04 Thread Michael Ellerman
Logan Gunthorpe  writes:
> On 4/4/2018 4:38 AM, Michael Ellerman wrote:
...
>> eg. It looks like I could take the two powerpc patches on their own for
>> 4.17, and then the rest could go via other trees?
>
> Yup! If you can take the powerpc patches I can keep trying to get the 
> rest in. They are largely independent and shouldn't really change 
> anything without the following patches.

OK, I'll take those then.

>> Is patch 1 stand alone?
>
> Essentially, yes, but patch 5 depends on it seeing it's changing the 
> same area and is trying to avoid creating the same kbuild warnings that 
> patch 1 suppresses.
>
>> The other option is to ask Andrew Morton to take it, as he often carries
>> these cross-tree type series.
>
> Thanks for the tip! I'll copy him when I repost it after the merge window.

Cool. If you want him to take it you should probably explicitly say so
when you repost.

cheers


Re: [PATCH v15 0/9] Add io{read|write}64 to io-64-atomic headers

2018-04-04 Thread Michael Ellerman
Logan Gunthorpe  writes:
> On 4/4/2018 4:38 AM, Michael Ellerman wrote:
...
>> eg. It looks like I could take the two powerpc patches on their own for
>> 4.17, and then the rest could go via other trees?
>
> Yup! If you can take the powerpc patches I can keep trying to get the 
> rest in. They are largely independent and shouldn't really change 
> anything without the following patches.

OK, I'll take those then.

>> Is patch 1 stand alone?
>
> Essentially, yes, but patch 5 depends on it seeing it's changing the 
> same area and is trying to avoid creating the same kbuild warnings that 
> patch 1 suppresses.
>
>> The other option is to ask Andrew Morton to take it, as he often carries
>> these cross-tree type series.
>
> Thanks for the tip! I'll copy him when I repost it after the merge window.

Cool. If you want him to take it you should probably explicitly say so
when you repost.

cheers


Re: [PATCH 2/3] objtool: Support HOSTCFLAGS and HOSTLDFLAGS

2018-04-04 Thread Masahiro Yamada
2018-03-29 9:48 GMT+09:00 Laura Abbott :
> It may be useful to compile host programs with different flags (e.g.
> hardening). Ensure that objtool picks up the appropriate flags.
>
> Signed-off-by: Laura Abbott 
> ---


I saw some similar patches before.

I thought they are fixing this way
https://patchwork.kernel.org/patch/10251259/

but, I am not tacking the progress.




>  tools/objtool/Makefile | 5 +++--
>  1 file changed, 3 insertions(+), 2 deletions(-)
>
> diff --git a/tools/objtool/Makefile b/tools/objtool/Makefile
> index e6acc281dd37..0ff3bcac1ca9 100644
> --- a/tools/objtool/Makefile
> +++ b/tools/objtool/Makefile
> @@ -31,8 +31,9 @@ INCLUDES := -I$(srctree)/tools/include \
> -I$(srctree)/tools/arch/$(HOSTARCH)/include/uapi \
> -I$(srctree)/tools/objtool/arch/$(ARCH)/include
>  WARNINGS := $(EXTRA_WARNINGS) -Wno-switch-default -Wno-switch-enum 
> -Wno-packed
> -CFLAGS   += -Wall -Werror $(WARNINGS) -fomit-frame-pointer -O2 -g $(INCLUDES)
> -LDFLAGS  += -lelf $(LIBSUBCMD)
> +CFLAGS   += -Wall -Werror $(WARNINGS) $(HOSTCFLAGS) -fomit-frame-pointer -O2 
> -g \
> +   $(INCLUDES)
> +LDFLAGS  += -lelf $(LIBSUBCMD) $(HOSTLDFLAGS)
>
>  # Allow old libelf to be used:
>  elfshdr := $(shell echo '\#include ' | $(CC) $(CFLAGS) -x c -E - | 
> grep elf_getshdr)
> --
> 2.16.2
>
> --
> To unsubscribe from this list: send the line "unsubscribe linux-kbuild" in
> the body of a message to majord...@vger.kernel.org
> More majordomo info at  http://vger.kernel.org/majordomo-info.html



-- 
Best Regards
Masahiro Yamada


Re: [PATCH 2/3] objtool: Support HOSTCFLAGS and HOSTLDFLAGS

2018-04-04 Thread Masahiro Yamada
2018-03-29 9:48 GMT+09:00 Laura Abbott :
> It may be useful to compile host programs with different flags (e.g.
> hardening). Ensure that objtool picks up the appropriate flags.
>
> Signed-off-by: Laura Abbott 
> ---


I saw some similar patches before.

I thought they are fixing this way
https://patchwork.kernel.org/patch/10251259/

but, I am not tacking the progress.




>  tools/objtool/Makefile | 5 +++--
>  1 file changed, 3 insertions(+), 2 deletions(-)
>
> diff --git a/tools/objtool/Makefile b/tools/objtool/Makefile
> index e6acc281dd37..0ff3bcac1ca9 100644
> --- a/tools/objtool/Makefile
> +++ b/tools/objtool/Makefile
> @@ -31,8 +31,9 @@ INCLUDES := -I$(srctree)/tools/include \
> -I$(srctree)/tools/arch/$(HOSTARCH)/include/uapi \
> -I$(srctree)/tools/objtool/arch/$(ARCH)/include
>  WARNINGS := $(EXTRA_WARNINGS) -Wno-switch-default -Wno-switch-enum 
> -Wno-packed
> -CFLAGS   += -Wall -Werror $(WARNINGS) -fomit-frame-pointer -O2 -g $(INCLUDES)
> -LDFLAGS  += -lelf $(LIBSUBCMD)
> +CFLAGS   += -Wall -Werror $(WARNINGS) $(HOSTCFLAGS) -fomit-frame-pointer -O2 
> -g \
> +   $(INCLUDES)
> +LDFLAGS  += -lelf $(LIBSUBCMD) $(HOSTLDFLAGS)
>
>  # Allow old libelf to be used:
>  elfshdr := $(shell echo '\#include ' | $(CC) $(CFLAGS) -x c -E - | 
> grep elf_getshdr)
> --
> 2.16.2
>
> --
> To unsubscribe from this list: send the line "unsubscribe linux-kbuild" in
> the body of a message to majord...@vger.kernel.org
> More majordomo info at  http://vger.kernel.org/majordomo-info.html



-- 
Best Regards
Masahiro Yamada


[PATCH] uapi: fix linux/kfd_ioctl.h userspace compilation errors

2018-04-04 Thread Dmitry V. Levin
Consistently use types provided by  via 
to fix the following linux/kfd_ioctl.h userspace compilation errors:

/usr/include/linux/kfd_ioctl.h:266:2: error: unknown type name 'uint64_t'
  uint64_t tba_addr;  /* to KFD */
/usr/include/linux/kfd_ioctl.h:267:2: error: unknown type name 'uint64_t'
  uint64_t tma_addr;  /* to KFD */
/usr/include/linux/kfd_ioctl.h:268:2: error: unknown type name 'uint32_t'
  uint32_t gpu_id;  /* to KFD */
/usr/include/linux/kfd_ioctl.h:269:2: error: unknown type name 'uint32_t'
  uint32_t pad;

Fixes: d7b9bd2248d79 ("drm/amdkfd: Add support for user-mode trap handlers")
Cc:  # v4.16
Signed-off-by: Dmitry V. Levin 
---
 include/uapi/linux/kfd_ioctl.h | 8 
 1 file changed, 4 insertions(+), 4 deletions(-)

diff --git a/include/uapi/linux/kfd_ioctl.h b/include/uapi/linux/kfd_ioctl.h
index f4cab5b3ba9a..111d73ba2d96 100644
--- a/include/uapi/linux/kfd_ioctl.h
+++ b/include/uapi/linux/kfd_ioctl.h
@@ -263,10 +263,10 @@ struct kfd_ioctl_get_tile_config_args {
 };
 
 struct kfd_ioctl_set_trap_handler_args {
-   uint64_t tba_addr;  /* to KFD */
-   uint64_t tma_addr;  /* to KFD */
-   uint32_t gpu_id;/* to KFD */
-   uint32_t pad;
+   __u64 tba_addr; /* to KFD */
+   __u64 tma_addr; /* to KFD */
+   __u32 gpu_id;   /* to KFD */
+   __u32 pad;
 };
 
 #define AMDKFD_IOCTL_BASE 'K'
-- 
ldv


[PATCH] uapi: fix linux/kfd_ioctl.h userspace compilation errors

2018-04-04 Thread Dmitry V. Levin
Consistently use types provided by  via 
to fix the following linux/kfd_ioctl.h userspace compilation errors:

/usr/include/linux/kfd_ioctl.h:266:2: error: unknown type name 'uint64_t'
  uint64_t tba_addr;  /* to KFD */
/usr/include/linux/kfd_ioctl.h:267:2: error: unknown type name 'uint64_t'
  uint64_t tma_addr;  /* to KFD */
/usr/include/linux/kfd_ioctl.h:268:2: error: unknown type name 'uint32_t'
  uint32_t gpu_id;  /* to KFD */
/usr/include/linux/kfd_ioctl.h:269:2: error: unknown type name 'uint32_t'
  uint32_t pad;

Fixes: d7b9bd2248d79 ("drm/amdkfd: Add support for user-mode trap handlers")
Cc:  # v4.16
Signed-off-by: Dmitry V. Levin 
---
 include/uapi/linux/kfd_ioctl.h | 8 
 1 file changed, 4 insertions(+), 4 deletions(-)

diff --git a/include/uapi/linux/kfd_ioctl.h b/include/uapi/linux/kfd_ioctl.h
index f4cab5b3ba9a..111d73ba2d96 100644
--- a/include/uapi/linux/kfd_ioctl.h
+++ b/include/uapi/linux/kfd_ioctl.h
@@ -263,10 +263,10 @@ struct kfd_ioctl_get_tile_config_args {
 };
 
 struct kfd_ioctl_set_trap_handler_args {
-   uint64_t tba_addr;  /* to KFD */
-   uint64_t tma_addr;  /* to KFD */
-   uint32_t gpu_id;/* to KFD */
-   uint32_t pad;
+   __u64 tba_addr; /* to KFD */
+   __u64 tma_addr; /* to KFD */
+   __u32 gpu_id;   /* to KFD */
+   __u32 pad;
 };
 
 #define AMDKFD_IOCTL_BASE 'K'
-- 
ldv


[PATCH] uapi: fix asm/bootparam.h userspace compilation errors

2018-04-04 Thread Dmitry V. Levin
Consistently use types provided by  to fix the following
asm/bootparam.h userspace compilation errors:

/usr/include/asm/bootparam.h:140:2: error: unknown type name 'u16'
  u16 version;
/usr/include/asm/bootparam.h:141:2: error: unknown type name 'u16'
  u16 compatible_version;
/usr/include/asm/bootparam.h:142:2: error: unknown type name 'u16'
  u16 pm_timer_address;
/usr/include/asm/bootparam.h:143:2: error: unknown type name 'u16'
  u16 num_cpus;
/usr/include/asm/bootparam.h:144:2: error: unknown type name 'u64'
  u64 pci_mmconfig_base;
/usr/include/asm/bootparam.h:145:2: error: unknown type name 'u32'
  u32 tsc_khz;
/usr/include/asm/bootparam.h:146:2: error: unknown type name 'u32'
  u32 apic_khz;
/usr/include/asm/bootparam.h:147:2: error: unknown type name 'u8'
  u8 standard_ioapic;
/usr/include/asm/bootparam.h:148:2: error: unknown type name 'u8'
  u8 cpu_ids[255];

Fixes: 4a362601baa6 ("x86/jailhouse: Add infrastructure for running in non-root 
cell")
Cc:  # v4.16
Signed-off-by: Dmitry V. Levin 
---
 arch/x86/include/uapi/asm/bootparam.h | 18 +-
 1 file changed, 9 insertions(+), 9 deletions(-)

diff --git a/arch/x86/include/uapi/asm/bootparam.h 
b/arch/x86/include/uapi/asm/bootparam.h
index aebf60357758..a06cbf019744 100644
--- a/arch/x86/include/uapi/asm/bootparam.h
+++ b/arch/x86/include/uapi/asm/bootparam.h
@@ -137,15 +137,15 @@ struct boot_e820_entry {
  * setup data structure.
  */
 struct jailhouse_setup_data {
-   u16 version;
-   u16 compatible_version;
-   u16 pm_timer_address;
-   u16 num_cpus;
-   u64 pci_mmconfig_base;
-   u32 tsc_khz;
-   u32 apic_khz;
-   u8  standard_ioapic;
-   u8  cpu_ids[255];
+   __u16   version;
+   __u16   compatible_version;
+   __u16   pm_timer_address;
+   __u16   num_cpus;
+   __u64   pci_mmconfig_base;
+   __u32   tsc_khz;
+   __u32   apic_khz;
+   __u8standard_ioapic;
+   __u8cpu_ids[255];
 } __attribute__((packed));
 
 /* The so-called "zeropage" */
-- 
ldv


[PATCH] uapi: fix asm/bootparam.h userspace compilation errors

2018-04-04 Thread Dmitry V. Levin
Consistently use types provided by  to fix the following
asm/bootparam.h userspace compilation errors:

/usr/include/asm/bootparam.h:140:2: error: unknown type name 'u16'
  u16 version;
/usr/include/asm/bootparam.h:141:2: error: unknown type name 'u16'
  u16 compatible_version;
/usr/include/asm/bootparam.h:142:2: error: unknown type name 'u16'
  u16 pm_timer_address;
/usr/include/asm/bootparam.h:143:2: error: unknown type name 'u16'
  u16 num_cpus;
/usr/include/asm/bootparam.h:144:2: error: unknown type name 'u64'
  u64 pci_mmconfig_base;
/usr/include/asm/bootparam.h:145:2: error: unknown type name 'u32'
  u32 tsc_khz;
/usr/include/asm/bootparam.h:146:2: error: unknown type name 'u32'
  u32 apic_khz;
/usr/include/asm/bootparam.h:147:2: error: unknown type name 'u8'
  u8 standard_ioapic;
/usr/include/asm/bootparam.h:148:2: error: unknown type name 'u8'
  u8 cpu_ids[255];

Fixes: 4a362601baa6 ("x86/jailhouse: Add infrastructure for running in non-root 
cell")
Cc:  # v4.16
Signed-off-by: Dmitry V. Levin 
---
 arch/x86/include/uapi/asm/bootparam.h | 18 +-
 1 file changed, 9 insertions(+), 9 deletions(-)

diff --git a/arch/x86/include/uapi/asm/bootparam.h 
b/arch/x86/include/uapi/asm/bootparam.h
index aebf60357758..a06cbf019744 100644
--- a/arch/x86/include/uapi/asm/bootparam.h
+++ b/arch/x86/include/uapi/asm/bootparam.h
@@ -137,15 +137,15 @@ struct boot_e820_entry {
  * setup data structure.
  */
 struct jailhouse_setup_data {
-   u16 version;
-   u16 compatible_version;
-   u16 pm_timer_address;
-   u16 num_cpus;
-   u64 pci_mmconfig_base;
-   u32 tsc_khz;
-   u32 apic_khz;
-   u8  standard_ioapic;
-   u8  cpu_ids[255];
+   __u16   version;
+   __u16   compatible_version;
+   __u16   pm_timer_address;
+   __u16   num_cpus;
+   __u64   pci_mmconfig_base;
+   __u32   tsc_khz;
+   __u32   apic_khz;
+   __u8standard_ioapic;
+   __u8cpu_ids[255];
 } __attribute__((packed));
 
 /* The so-called "zeropage" */
-- 
ldv


Re: [PATCH v3 2/4] bus: fsl-mc: add restool userspace support

2018-04-04 Thread Stuart Yoder
On Wed, Apr 4, 2018 at 7:42 AM, Andrew Lunn  wrote:
>> I hear you.  It is more complicated this way...having all these individual
>> objects vs just a single "bundle" of them that represents a NIC.  But, that's
>> the way the DPAA2 hardware is, and we're implementing kernel support for
>> the hardware as it is.
>
> Hi Stuart
>
> I see we are not making any progress here.
>
> So what i suggest is you post the kernel code and configuration tool
> concept to netdev for a full review. You want reviews from David
> Miller, Jiri Pirko, Jakub Kicinski, David Ahern, etc.

I know Ioana has other feedback she is addressing so a respin will be coming
soon, and she can include those additional reviewers.

Thanks,
Stuart


Re: [PATCH v3 2/4] bus: fsl-mc: add restool userspace support

2018-04-04 Thread Stuart Yoder
On Wed, Apr 4, 2018 at 7:42 AM, Andrew Lunn  wrote:
>> I hear you.  It is more complicated this way...having all these individual
>> objects vs just a single "bundle" of them that represents a NIC.  But, that's
>> the way the DPAA2 hardware is, and we're implementing kernel support for
>> the hardware as it is.
>
> Hi Stuart
>
> I see we are not making any progress here.
>
> So what i suggest is you post the kernel code and configuration tool
> concept to netdev for a full review. You want reviews from David
> Miller, Jiri Pirko, Jakub Kicinski, David Ahern, etc.

I know Ioana has other feedback she is addressing so a respin will be coming
soon, and she can include those additional reviewers.

Thanks,
Stuart


Re: [RFC PATCH 1/1] vmscan: Support multiple kswapd threads per node

2018-04-04 Thread Buddy Lumpkin

> On Apr 3, 2018, at 2:12 PM, Matthew Wilcox  wrote:
> 
> On Tue, Apr 03, 2018 at 01:49:25PM -0700, Buddy Lumpkin wrote:
>>> Yes, very much this.  If you have a single-threaded workload which is
>>> using the entirety of memory and would like to use even more, then it
>>> makes sense to use as many CPUs as necessary getting memory out of its
>>> way.  If you have N CPUs and N-1 threads happily occupying themselves in
>>> their own reasonably-sized working sets with one monster process trying
>>> to use as much RAM as possible, then I'd be pretty unimpressed to see
>>> the N-1 well-behaved threads preempted by kswapd.
>> 
>> The default value provides one kswapd thread per NUMA node, the same
>> it was without the patch. Also, I would point out that just because you 
>> devote
>> more threads to kswapd, doesn’t mean they are busy. If multiple kswapd 
>> threads
>> are busy, they are almost certainly doing work that would have resulted in
>> direct reclaims, which are often substantially more expensive than a couple
>> extra context switches due to preemption.
> 
> [...]
> 
>> In my previous response to Michal Hocko, I described
>> how I think we could scale watermarks in response to direct reclaims, and
>> launch more kswapd threads when kswapd peaks at 100% CPU usage.
> 
> I think you're missing my point about the workload ... kswapd isn't
> "nice", so it will compete with the N-1 threads which are chugging along
> at 100% CPU inside their working sets.  In this scenario, we _don't_
> want to kick off kswapd at all; we want the monster thread to clean up
> its own mess.  If we have idle CPUs, then yes, absolutely, lets have
> them clean up for the monster, but otherwise, I want my N-1 threads
> doing their own thing.
> 
> Maybe we should renice kswapd anyway ... thoughts?  We don't seem to have
> had a nice'd kswapd since 2.6.12, but maybe we played with that earlier
> and discovered it was a bad idea?
> 


Trying to distinguish between the monster and a high value task that you want
to run as quickly as possible would be challenging. I like your idea of using
renice. It probably makes sense to continue to run the first thread on each node
at a standard nice value, and run each additional task with a positive nice 
value.








Re: [RFC PATCH 1/1] vmscan: Support multiple kswapd threads per node

2018-04-04 Thread Buddy Lumpkin

> On Apr 3, 2018, at 2:12 PM, Matthew Wilcox  wrote:
> 
> On Tue, Apr 03, 2018 at 01:49:25PM -0700, Buddy Lumpkin wrote:
>>> Yes, very much this.  If you have a single-threaded workload which is
>>> using the entirety of memory and would like to use even more, then it
>>> makes sense to use as many CPUs as necessary getting memory out of its
>>> way.  If you have N CPUs and N-1 threads happily occupying themselves in
>>> their own reasonably-sized working sets with one monster process trying
>>> to use as much RAM as possible, then I'd be pretty unimpressed to see
>>> the N-1 well-behaved threads preempted by kswapd.
>> 
>> The default value provides one kswapd thread per NUMA node, the same
>> it was without the patch. Also, I would point out that just because you 
>> devote
>> more threads to kswapd, doesn’t mean they are busy. If multiple kswapd 
>> threads
>> are busy, they are almost certainly doing work that would have resulted in
>> direct reclaims, which are often substantially more expensive than a couple
>> extra context switches due to preemption.
> 
> [...]
> 
>> In my previous response to Michal Hocko, I described
>> how I think we could scale watermarks in response to direct reclaims, and
>> launch more kswapd threads when kswapd peaks at 100% CPU usage.
> 
> I think you're missing my point about the workload ... kswapd isn't
> "nice", so it will compete with the N-1 threads which are chugging along
> at 100% CPU inside their working sets.  In this scenario, we _don't_
> want to kick off kswapd at all; we want the monster thread to clean up
> its own mess.  If we have idle CPUs, then yes, absolutely, lets have
> them clean up for the monster, but otherwise, I want my N-1 threads
> doing their own thing.
> 
> Maybe we should renice kswapd anyway ... thoughts?  We don't seem to have
> had a nice'd kswapd since 2.6.12, but maybe we played with that earlier
> and discovered it was a bad idea?
> 


Trying to distinguish between the monster and a high value task that you want
to run as quickly as possible would be challenging. I like your idea of using
renice. It probably makes sense to continue to run the first thread on each node
at a standard nice value, and run each additional task with a positive nice 
value.








Re: [PATCH v1] kernel/trace:check the val against the available mem

2018-04-04 Thread Joel Fernandes
On Wed, Apr 4, 2018 at 7:58 PM, Matthew Wilcox  wrote:
> On Wed, Apr 04, 2018 at 11:47:30AM -0400, Steven Rostedt wrote:
>> I originally was going to remove the RETRY_MAYFAIL, but adding this
>> check (at the end of the loop though) appears to have OOM consistently
>> kill this task.
>>
>> I still like to keep RETRY_MAYFAIL, because it wont trigger OOM if
>> nothing comes in and tries to do an allocation, but instead will fail
>> nicely with -ENOMEM.
>
> I still don't get why you want RETRY_MAYFAIL.  You know that tries
> *harder* to allocate memory than plain GFP_KERNEL does, right?  And
> that seems like the exact opposite of what you want.

No. We do want it to try harder but not if its already setup for failure.


Re: [PATCH v1] kernel/trace:check the val against the available mem

2018-04-04 Thread Joel Fernandes
On Wed, Apr 4, 2018 at 7:58 PM, Matthew Wilcox  wrote:
> On Wed, Apr 04, 2018 at 11:47:30AM -0400, Steven Rostedt wrote:
>> I originally was going to remove the RETRY_MAYFAIL, but adding this
>> check (at the end of the loop though) appears to have OOM consistently
>> kill this task.
>>
>> I still like to keep RETRY_MAYFAIL, because it wont trigger OOM if
>> nothing comes in and tries to do an allocation, but instead will fail
>> nicely with -ENOMEM.
>
> I still don't get why you want RETRY_MAYFAIL.  You know that tries
> *harder* to allocate memory than plain GFP_KERNEL does, right?  And
> that seems like the exact opposite of what you want.

No. We do want it to try harder but not if its already setup for failure.


KASAN: use-after-free Read in ntfs_read_locked_inode

2018-04-04 Thread syzbot

Hello,

syzbot hit the following crash on upstream commit
3e968c9f1401088abc9a19ae6ff571644d37a355 (Wed Apr 4 21:19:24 2018 +)
Merge tag 'ext4_for_linus' of  
git://git.kernel.org/pub/scm/linux/kernel/git/tytso/ext4
syzbot dashboard link:  
https://syzkaller.appspot.com/bug?extid=19b469021157c136116a


C reproducer: https://syzkaller.appspot.com/x/repro.c?id=5682455868604416
syzkaller reproducer:  
https://syzkaller.appspot.com/x/repro.syz?id=5679121900240896
Raw console output:  
https://syzkaller.appspot.com/x/log.txt?id=5751565918928896
Kernel config:  
https://syzkaller.appspot.com/x/.config?id=9118669095563550941

compiler: gcc (GCC) 7.1.1 20170620

IMPORTANT: if you fix the bug, please add the following tag to the commit:
Reported-by: syzbot+19b469021157c1361...@syzkaller.appspotmail.com
It will help syzbot understand when the bug is fixed. See footer for  
details.

If you forward the report, please keep this part and the footer.

ntfs: volume version 3.1.
ntfs: volume version 3.1.
ntfs: volume version 3.1.
ntfs: volume version 3.1.
==
BUG: KASAN: use-after-free in ntfs_read_locked_inode+0x47fe/0x51b0  
fs/ntfs/inode.c:670

Read of size 8 at addr 8801becc42e8 by task syzkaller675411/4496

CPU: 0 PID: 4496 Comm: syzkaller675411 Not tainted 4.16.0+ #15
Hardware name: Google Google Compute Engine/Google Compute Engine, BIOS  
Google 01/01/2011

Call Trace:
 __dump_stack lib/dump_stack.c:17 [inline]
 dump_stack+0x1a7/0x27d lib/dump_stack.c:53
 print_address_description+0x73/0x250 mm/kasan/report.c:256
 kasan_report_error mm/kasan/report.c:354 [inline]
 kasan_report+0x23c/0x360 mm/kasan/report.c:412
 __asan_report_load_n_noabort+0xf/0x20 mm/kasan/report.c:443
 ntfs_read_locked_inode+0x47fe/0x51b0 fs/ntfs/inode.c:670
 ntfs_iget+0x1ab/0x240 fs/ntfs/inode.c:190
 load_and_init_quota fs/ntfs/super.c:1406 [inline]
 load_system_files+0x5f06/0x6c80 fs/ntfs/super.c:2117
 ntfs_fill_super+0x1485/0x2fb0 fs/ntfs/super.c:2908
 mount_bdev+0x2b7/0x370 fs/super.c:1119
 ntfs_mount+0x34/0x40 fs/ntfs/super.c:3065
 mount_fs+0x66/0x2d0 fs/super.c:1222
 vfs_kern_mount.part.26+0xc6/0x4a0 fs/namespace.c:1037
 vfs_kern_mount fs/namespace.c:2514 [inline]
 do_new_mount fs/namespace.c:2517 [inline]
 do_mount+0xea4/0x2b90 fs/namespace.c:2847
 ksys_mount+0xab/0x120 fs/namespace.c:3063
 SYSC_mount fs/namespace.c:3077 [inline]
 SyS_mount+0x39/0x50 fs/namespace.c:3074
 do_syscall_64+0x281/0x940 arch/x86/entry/common.c:287
 entry_SYSCALL_64_after_hwframe+0x42/0xb7
RIP: 0033:0x44597a
RSP: 002b:7ffe9dfbd7d8 EFLAGS: 0206 ORIG_RAX: 00a5
RAX: ffda RBX: 2a80 RCX: 0044597a
RDX: 2000 RSI: 2100 RDI: 7ffe9dfbd850
RBP: 0003 R08: 20077a00 R09: 000a
R10:  R11: 0206 R12: 0004
R13: ab33 R14:  R15: 

The buggy address belongs to the page:
page:ea0006fb3100 count:0 mapcount:0 mapping: index:0x1
flags: 0x2fffc00()
raw: 02fffc00  0001 
raw: dead0100 dead0200  
page dumped because: kasan: bad access detected

Memory state around the buggy address:
 8801becc4180: ff ff ff ff ff ff ff ff ff ff ff ff ff ff ff ff
 8801becc4200: ff ff ff ff ff ff ff ff ff ff ff ff ff ff ff ff

8801becc4280: ff ff ff ff ff ff ff ff ff ff ff ff ff ff ff ff

  ^
 8801becc4300: ff ff ff ff ff ff ff ff ff ff ff ff ff ff ff ff
 8801becc4380: ff ff ff ff ff ff ff ff ff ff ff ff ff ff ff ff
==


---
This bug is generated by a dumb bot. It may contain errors.
See https://goo.gl/tpsmEJ for details.
Direct all questions to syzkal...@googlegroups.com.

syzbot will keep track of this bug report.
If you forgot to add the Reported-by tag, once the fix for this bug is  
merged

into any tree, please reply to this email with:
#syz fix: exact-commit-title
If you want to test a patch for this bug, please reply with:
#syz test: git://repo/address.git branch
and provide the patch inline or as an attachment.
To mark this as a duplicate of another syzbot report, please reply with:
#syz dup: exact-subject-of-another-report
If it's a one-off invalid bug report, please reply with:
#syz invalid
Note: if the crash happens again, it will cause creation of a new bug  
report.

Note: all commands must start from beginning of the line in the email body.


KASAN: use-after-free Read in ntfs_read_locked_inode

2018-04-04 Thread syzbot

Hello,

syzbot hit the following crash on upstream commit
3e968c9f1401088abc9a19ae6ff571644d37a355 (Wed Apr 4 21:19:24 2018 +)
Merge tag 'ext4_for_linus' of  
git://git.kernel.org/pub/scm/linux/kernel/git/tytso/ext4
syzbot dashboard link:  
https://syzkaller.appspot.com/bug?extid=19b469021157c136116a


C reproducer: https://syzkaller.appspot.com/x/repro.c?id=5682455868604416
syzkaller reproducer:  
https://syzkaller.appspot.com/x/repro.syz?id=5679121900240896
Raw console output:  
https://syzkaller.appspot.com/x/log.txt?id=5751565918928896
Kernel config:  
https://syzkaller.appspot.com/x/.config?id=9118669095563550941

compiler: gcc (GCC) 7.1.1 20170620

IMPORTANT: if you fix the bug, please add the following tag to the commit:
Reported-by: syzbot+19b469021157c1361...@syzkaller.appspotmail.com
It will help syzbot understand when the bug is fixed. See footer for  
details.

If you forward the report, please keep this part and the footer.

ntfs: volume version 3.1.
ntfs: volume version 3.1.
ntfs: volume version 3.1.
ntfs: volume version 3.1.
==
BUG: KASAN: use-after-free in ntfs_read_locked_inode+0x47fe/0x51b0  
fs/ntfs/inode.c:670

Read of size 8 at addr 8801becc42e8 by task syzkaller675411/4496

CPU: 0 PID: 4496 Comm: syzkaller675411 Not tainted 4.16.0+ #15
Hardware name: Google Google Compute Engine/Google Compute Engine, BIOS  
Google 01/01/2011

Call Trace:
 __dump_stack lib/dump_stack.c:17 [inline]
 dump_stack+0x1a7/0x27d lib/dump_stack.c:53
 print_address_description+0x73/0x250 mm/kasan/report.c:256
 kasan_report_error mm/kasan/report.c:354 [inline]
 kasan_report+0x23c/0x360 mm/kasan/report.c:412
 __asan_report_load_n_noabort+0xf/0x20 mm/kasan/report.c:443
 ntfs_read_locked_inode+0x47fe/0x51b0 fs/ntfs/inode.c:670
 ntfs_iget+0x1ab/0x240 fs/ntfs/inode.c:190
 load_and_init_quota fs/ntfs/super.c:1406 [inline]
 load_system_files+0x5f06/0x6c80 fs/ntfs/super.c:2117
 ntfs_fill_super+0x1485/0x2fb0 fs/ntfs/super.c:2908
 mount_bdev+0x2b7/0x370 fs/super.c:1119
 ntfs_mount+0x34/0x40 fs/ntfs/super.c:3065
 mount_fs+0x66/0x2d0 fs/super.c:1222
 vfs_kern_mount.part.26+0xc6/0x4a0 fs/namespace.c:1037
 vfs_kern_mount fs/namespace.c:2514 [inline]
 do_new_mount fs/namespace.c:2517 [inline]
 do_mount+0xea4/0x2b90 fs/namespace.c:2847
 ksys_mount+0xab/0x120 fs/namespace.c:3063
 SYSC_mount fs/namespace.c:3077 [inline]
 SyS_mount+0x39/0x50 fs/namespace.c:3074
 do_syscall_64+0x281/0x940 arch/x86/entry/common.c:287
 entry_SYSCALL_64_after_hwframe+0x42/0xb7
RIP: 0033:0x44597a
RSP: 002b:7ffe9dfbd7d8 EFLAGS: 0206 ORIG_RAX: 00a5
RAX: ffda RBX: 2a80 RCX: 0044597a
RDX: 2000 RSI: 2100 RDI: 7ffe9dfbd850
RBP: 0003 R08: 20077a00 R09: 000a
R10:  R11: 0206 R12: 0004
R13: ab33 R14:  R15: 

The buggy address belongs to the page:
page:ea0006fb3100 count:0 mapcount:0 mapping: index:0x1
flags: 0x2fffc00()
raw: 02fffc00  0001 
raw: dead0100 dead0200  
page dumped because: kasan: bad access detected

Memory state around the buggy address:
 8801becc4180: ff ff ff ff ff ff ff ff ff ff ff ff ff ff ff ff
 8801becc4200: ff ff ff ff ff ff ff ff ff ff ff ff ff ff ff ff

8801becc4280: ff ff ff ff ff ff ff ff ff ff ff ff ff ff ff ff

  ^
 8801becc4300: ff ff ff ff ff ff ff ff ff ff ff ff ff ff ff ff
 8801becc4380: ff ff ff ff ff ff ff ff ff ff ff ff ff ff ff ff
==


---
This bug is generated by a dumb bot. It may contain errors.
See https://goo.gl/tpsmEJ for details.
Direct all questions to syzkal...@googlegroups.com.

syzbot will keep track of this bug report.
If you forgot to add the Reported-by tag, once the fix for this bug is  
merged

into any tree, please reply to this email with:
#syz fix: exact-commit-title
If you want to test a patch for this bug, please reply with:
#syz test: git://repo/address.git branch
and provide the patch inline or as an attachment.
To mark this as a duplicate of another syzbot report, please reply with:
#syz dup: exact-subject-of-another-report
If it's a one-off invalid bug report, please reply with:
#syz invalid
Note: if the crash happens again, it will cause creation of a new bug  
report.

Note: all commands must start from beginning of the line in the email body.


Re: [PATCH] f2fs: enlarge block plug coverage

2018-04-04 Thread Jaegeuk Kim
On 04/04, Chao Yu wrote:
> This patch enlarges block plug coverage in __issue_discard_cmd, in
> order to collect more pending bios before issuing them, to avoid
> being disturbed by previous discard I/O in IO aware discard mode.

Hmm, then we need to wait for huge discard IO for over 10 secs, which
will affect following read/write IOs accordingly. In order to avoid that,
we actually need to limit the discard size.

Thanks,

> 
> Signed-off-by: Chao Yu 
> ---
>  fs/f2fs/segment.c | 7 +--
>  1 file changed, 5 insertions(+), 2 deletions(-)
> 
> diff --git a/fs/f2fs/segment.c b/fs/f2fs/segment.c
> index 8f0b5ba46315..4287e208c040 100644
> --- a/fs/f2fs/segment.c
> +++ b/fs/f2fs/segment.c
> @@ -1208,10 +1208,12 @@ static int __issue_discard_cmd(struct f2fs_sb_info 
> *sbi,
>   pend_list = >pend_list[i];
>  
>   mutex_lock(>cmd_lock);
> +
> + blk_start_plug();
> +
>   if (list_empty(pend_list))
>   goto next;
>   f2fs_bug_on(sbi, !__check_rb_tree_consistence(sbi, >root));
> - blk_start_plug();
>   list_for_each_entry_safe(dc, tmp, pend_list, list) {
>   f2fs_bug_on(sbi, dc->state != D_PREP);
>  
> @@ -1227,8 +1229,9 @@ static int __issue_discard_cmd(struct f2fs_sb_info *sbi,
>   if (++iter >= dpolicy->max_requests)
>   break;
>   }
> - blk_finish_plug();
>  next:
> + blk_finish_plug();
> +
>   mutex_unlock(>cmd_lock);
>  
>   if (iter >= dpolicy->max_requests)
> -- 
> 2.15.0.55.gc2ece9dc4de6


Re: [PATCH] f2fs: enlarge block plug coverage

2018-04-04 Thread Jaegeuk Kim
On 04/04, Chao Yu wrote:
> This patch enlarges block plug coverage in __issue_discard_cmd, in
> order to collect more pending bios before issuing them, to avoid
> being disturbed by previous discard I/O in IO aware discard mode.

Hmm, then we need to wait for huge discard IO for over 10 secs, which
will affect following read/write IOs accordingly. In order to avoid that,
we actually need to limit the discard size.

Thanks,

> 
> Signed-off-by: Chao Yu 
> ---
>  fs/f2fs/segment.c | 7 +--
>  1 file changed, 5 insertions(+), 2 deletions(-)
> 
> diff --git a/fs/f2fs/segment.c b/fs/f2fs/segment.c
> index 8f0b5ba46315..4287e208c040 100644
> --- a/fs/f2fs/segment.c
> +++ b/fs/f2fs/segment.c
> @@ -1208,10 +1208,12 @@ static int __issue_discard_cmd(struct f2fs_sb_info 
> *sbi,
>   pend_list = >pend_list[i];
>  
>   mutex_lock(>cmd_lock);
> +
> + blk_start_plug();
> +
>   if (list_empty(pend_list))
>   goto next;
>   f2fs_bug_on(sbi, !__check_rb_tree_consistence(sbi, >root));
> - blk_start_plug();
>   list_for_each_entry_safe(dc, tmp, pend_list, list) {
>   f2fs_bug_on(sbi, dc->state != D_PREP);
>  
> @@ -1227,8 +1229,9 @@ static int __issue_discard_cmd(struct f2fs_sb_info *sbi,
>   if (++iter >= dpolicy->max_requests)
>   break;
>   }
> - blk_finish_plug();
>  next:
> + blk_finish_plug();
> +
>   mutex_unlock(>cmd_lock);
>  
>   if (iter >= dpolicy->max_requests)
> -- 
> 2.15.0.55.gc2ece9dc4de6


Re: [PATCH 3/4] tracing: Add action comparisons when testing matching hist triggers

2018-04-04 Thread Masami Hiramatsu
On Wed, 04 Apr 2018 10:17:03 -0500
Tom Zanussi  wrote:

> Hi Masami,
> 
> On Wed, 2018-04-04 at 21:33 +0900, Masami Hiramatsu wrote:
> > Hi Tom,
> > 
> > On Mon, 02 Apr 2018 12:09:33 -0500
> > Tom Zanussi  wrote:
> > 
> > > after:
> > > 
> > >   # echo 'wakeup_latency u64 lat; pid_t pid' >> 
> > > /sys/kernel/debug/tracing/synthetic_events
> > >   # echo 'hist:keys=pid:ts0=common_timestamp.usecs if comm=="cyclictest"' 
> > > >> /sys/kernel/debug/tracing/events/sched/sched_wakeup/trigger
> > >   # echo 'hist:keys=next_pid:wakeup_lat=common_timestamp.usecs-$ts0 if 
> > > next_comm=="cyclictest"' >> 
> > > /sys/kernel/debug/tracing/events/sched/sched_switch/trigger
> > >   # echo 
> > > 'hist:keys=next_pid:onmatch(sched.sched_wakeup).wakeup_latency(sched.sched_switch.$wakeup_lat,next_pid)
> > >  if next_comm=="cyclictest"' >> 
> > > /sys/kernel/debug/tracing/events/sched/sched_switch/trigger
> > >   # echo 
> > > 'hist:keys=next_pid:onmatch(sched.sched_wakeup).wakeup_latency(sched.sched_switch.$wakeup_lat,prev_pid)
> > >  if next_comm=="cyclictest"' >> 
> > > /sys/kernel/debug/tracing/events/sched/sched_switch/trigger
> > >   # echo 'hist:keys=next_pid if next_comm=="cyclictest"' >> 
> > > /sys/kernel/debug/tracing/events/sched/sched_switch/trigger
> > 
> > I ensured this sequence has no problem.
> > After above sequence, the trigger file shows
> > 
> > ==
> > # cat events/sched/sched_switch/trigger
> > hist:keys=next_pid:vals=hitcount:wakeup_lat=common_timestamp.usecs-$ts0:sort=hitcount:size=2048:clock=global
> >  if next_comm=="cyclictest" [active]
> > hist:keys=next_pid:vals=hitcount:sort=hitcount:size=2048:onmatch(sched.sched_wakeup).wakeup_latency(sched.sched_switch.$wakeup_lat,next_pid)
> >  if next_comm=="cyclictest" [active]
> > hist:keys=next_pid:vals=hitcount:sort=hitcount:size=2048:onmatch(sched.sched_wakeup).wakeup_latency(sched.sched_switch.$wakeup_lat,prev_pid)
> >  if next_comm=="cyclictest" [active]
> > hist:keys=next_pid:vals=hitcount:sort=hitcount:size=2048 if 
> > next_comm=="cyclictest" [active]
> > ==
> > 
> > So I clear the last one
> > 
> > ==
> > # echo '!hist:keys=next_pid if next_comm=="cyclictest"' >> 
> > events/sched/sched_switch/trigger
> > #
> > ==
> > 
> > OK, it should be removed. I can not see it anymore on the trigger file.
> > 
> > ==
> > # cat events/sched/sched_switch/trigger
> > hist:keys=next_pid:vals=hitcount:wakeup_lat=common_timestamp.usecs-$ts0:sort=hitcount:size=2048:clock=global
> >  if next_comm=="cyclictest" [active]
> > hist:keys=next_pid:vals=hitcount:sort=hitcount:size=2048:onmatch(sched.sched_wakeup).wakeup_latency(sched.sched_switch.$wakeup_lat,next_pid)
> >  if next_comm=="cyclictest" [active]
> > hist:keys=next_pid:vals=hitcount:sort=hitcount:size=2048:onmatch(sched.sched_wakeup).wakeup_latency(sched.sched_switch.$wakeup_lat,prev_pid)
> >  if next_comm=="cyclictest" [active]
> > ==
> > 
> > But when I missed to remove it again, it is accepted (is not an error?)
> > 
> > ==
> > # echo '!hist:keys=next_pid if next_comm=="cyclictest"' >> 
> > events/sched/sched_switch/trigger
> > #
> > ==
> 
> This is consistent with existing behavior, not only for the hist
> triggers but ftrace in general I think e.g.
> 
> # echo 'traceoff:1 if nr_rq > 1' >> 
> /sys/kernel/debug/tracing/events/block/block_unplug/trigger
> # echo '!traceoff:1 if nr_rq > 1' >> 
> /sys/kernel/debug/tracing/events/block/block_unplug/trigger
> # echo '!traceoff:1 if nr_rq > 1' >> 
> /sys/kernel/debug/tracing/events/block/block_unplug/trigger
> 
> # echo 'try_to_wake_up:enable_event:sched:sched_switch:2' >> set_ftrace_filter
> # echo '!try_to_wake_up:enable_event:sched:sched_switch:2' >> 
> set_ftrace_filter
> # echo '!try_to_wake_up:enable_event:sched:sched_switch:2' >> 
> set_ftrace_filter
> 
> Neither produces an error trying to remove an already-removed trigger.
> Or are you thinking about something else?

Hmm, OK, it is ftrace's behavior.

> > Hmm... anyway, let's clear others too.
> > 
> > ==
> > # echo 
> > '!hist:keys=next_pid:onmatch(sched.sched_wakeup).wakeup_latency(sched.sched_switch.$wakeup_lat,next_pid)
> >  if next_comm=="cyclictest"' >> events/sched/sched_switch/trigger
> > # echo 
> > '!hist:keys=next_pid:onmatch(sched.sched_wakeup).wakeup_latency(sched.sched_switch.$wakeup_lat,prev_pid)
> >  if next_comm=="cyclictest"' >> events/sched/sched_switch/trigger
> > # cat events/sched/sched_switch/trigger
> > # Available triggers:
> > # traceon traceoff snapshot stacktrace enable_event disable_event 
> > enable_hist disable_hist hist
> > ==
> > 
> > OK, it is cleared now.
> > 
> > Now I test it again.
> > 
> > ==
> > # echo 'hist:keys=pid:ts0=common_timestamp.usecs if comm=="cyclictest"' >> 
> > events/sched/sched_wakeup/trigger
> > sh: write error: Invalid argument
> > ==
> > 
> > Oops, what's the error?
> > 
> > ==
> > # cat events/sched/sched_switch/hist
> > 
> > ERROR: 

Re: [PATCH 3/4] tracing: Add action comparisons when testing matching hist triggers

2018-04-04 Thread Masami Hiramatsu
On Wed, 04 Apr 2018 10:17:03 -0500
Tom Zanussi  wrote:

> Hi Masami,
> 
> On Wed, 2018-04-04 at 21:33 +0900, Masami Hiramatsu wrote:
> > Hi Tom,
> > 
> > On Mon, 02 Apr 2018 12:09:33 -0500
> > Tom Zanussi  wrote:
> > 
> > > after:
> > > 
> > >   # echo 'wakeup_latency u64 lat; pid_t pid' >> 
> > > /sys/kernel/debug/tracing/synthetic_events
> > >   # echo 'hist:keys=pid:ts0=common_timestamp.usecs if comm=="cyclictest"' 
> > > >> /sys/kernel/debug/tracing/events/sched/sched_wakeup/trigger
> > >   # echo 'hist:keys=next_pid:wakeup_lat=common_timestamp.usecs-$ts0 if 
> > > next_comm=="cyclictest"' >> 
> > > /sys/kernel/debug/tracing/events/sched/sched_switch/trigger
> > >   # echo 
> > > 'hist:keys=next_pid:onmatch(sched.sched_wakeup).wakeup_latency(sched.sched_switch.$wakeup_lat,next_pid)
> > >  if next_comm=="cyclictest"' >> 
> > > /sys/kernel/debug/tracing/events/sched/sched_switch/trigger
> > >   # echo 
> > > 'hist:keys=next_pid:onmatch(sched.sched_wakeup).wakeup_latency(sched.sched_switch.$wakeup_lat,prev_pid)
> > >  if next_comm=="cyclictest"' >> 
> > > /sys/kernel/debug/tracing/events/sched/sched_switch/trigger
> > >   # echo 'hist:keys=next_pid if next_comm=="cyclictest"' >> 
> > > /sys/kernel/debug/tracing/events/sched/sched_switch/trigger
> > 
> > I ensured this sequence has no problem.
> > After above sequence, the trigger file shows
> > 
> > ==
> > # cat events/sched/sched_switch/trigger
> > hist:keys=next_pid:vals=hitcount:wakeup_lat=common_timestamp.usecs-$ts0:sort=hitcount:size=2048:clock=global
> >  if next_comm=="cyclictest" [active]
> > hist:keys=next_pid:vals=hitcount:sort=hitcount:size=2048:onmatch(sched.sched_wakeup).wakeup_latency(sched.sched_switch.$wakeup_lat,next_pid)
> >  if next_comm=="cyclictest" [active]
> > hist:keys=next_pid:vals=hitcount:sort=hitcount:size=2048:onmatch(sched.sched_wakeup).wakeup_latency(sched.sched_switch.$wakeup_lat,prev_pid)
> >  if next_comm=="cyclictest" [active]
> > hist:keys=next_pid:vals=hitcount:sort=hitcount:size=2048 if 
> > next_comm=="cyclictest" [active]
> > ==
> > 
> > So I clear the last one
> > 
> > ==
> > # echo '!hist:keys=next_pid if next_comm=="cyclictest"' >> 
> > events/sched/sched_switch/trigger
> > #
> > ==
> > 
> > OK, it should be removed. I can not see it anymore on the trigger file.
> > 
> > ==
> > # cat events/sched/sched_switch/trigger
> > hist:keys=next_pid:vals=hitcount:wakeup_lat=common_timestamp.usecs-$ts0:sort=hitcount:size=2048:clock=global
> >  if next_comm=="cyclictest" [active]
> > hist:keys=next_pid:vals=hitcount:sort=hitcount:size=2048:onmatch(sched.sched_wakeup).wakeup_latency(sched.sched_switch.$wakeup_lat,next_pid)
> >  if next_comm=="cyclictest" [active]
> > hist:keys=next_pid:vals=hitcount:sort=hitcount:size=2048:onmatch(sched.sched_wakeup).wakeup_latency(sched.sched_switch.$wakeup_lat,prev_pid)
> >  if next_comm=="cyclictest" [active]
> > ==
> > 
> > But when I missed to remove it again, it is accepted (is not an error?)
> > 
> > ==
> > # echo '!hist:keys=next_pid if next_comm=="cyclictest"' >> 
> > events/sched/sched_switch/trigger
> > #
> > ==
> 
> This is consistent with existing behavior, not only for the hist
> triggers but ftrace in general I think e.g.
> 
> # echo 'traceoff:1 if nr_rq > 1' >> 
> /sys/kernel/debug/tracing/events/block/block_unplug/trigger
> # echo '!traceoff:1 if nr_rq > 1' >> 
> /sys/kernel/debug/tracing/events/block/block_unplug/trigger
> # echo '!traceoff:1 if nr_rq > 1' >> 
> /sys/kernel/debug/tracing/events/block/block_unplug/trigger
> 
> # echo 'try_to_wake_up:enable_event:sched:sched_switch:2' >> set_ftrace_filter
> # echo '!try_to_wake_up:enable_event:sched:sched_switch:2' >> 
> set_ftrace_filter
> # echo '!try_to_wake_up:enable_event:sched:sched_switch:2' >> 
> set_ftrace_filter
> 
> Neither produces an error trying to remove an already-removed trigger.
> Or are you thinking about something else?

Hmm, OK, it is ftrace's behavior.

> > Hmm... anyway, let's clear others too.
> > 
> > ==
> > # echo 
> > '!hist:keys=next_pid:onmatch(sched.sched_wakeup).wakeup_latency(sched.sched_switch.$wakeup_lat,next_pid)
> >  if next_comm=="cyclictest"' >> events/sched/sched_switch/trigger
> > # echo 
> > '!hist:keys=next_pid:onmatch(sched.sched_wakeup).wakeup_latency(sched.sched_switch.$wakeup_lat,prev_pid)
> >  if next_comm=="cyclictest"' >> events/sched/sched_switch/trigger
> > # cat events/sched/sched_switch/trigger
> > # Available triggers:
> > # traceon traceoff snapshot stacktrace enable_event disable_event 
> > enable_hist disable_hist hist
> > ==
> > 
> > OK, it is cleared now.
> > 
> > Now I test it again.
> > 
> > ==
> > # echo 'hist:keys=pid:ts0=common_timestamp.usecs if comm=="cyclictest"' >> 
> > events/sched/sched_wakeup/trigger
> > sh: write error: Invalid argument
> > ==
> > 
> > Oops, what's the error?
> > 
> > ==
> > # cat events/sched/sched_switch/hist
> > 
> > ERROR: Variable already defined: ts0
> >   Last command: 

Re: [PATCH] f2fs: fix to show encrypt flag in FS_IOC_GETFLAGS

2018-04-04 Thread Jaegeuk Kim
On 04/03, Chao Yu wrote:
> On 2018/4/3 4:21, Jaegeuk Kim wrote:
> > On 04/02, Chao Yu wrote:
> >> This patch fixes to show encrypt flag in FS_IOC_GETFLAGS like ext4 does.
> > 
> > Actually, we have to show internal flags owned by f2fs, not generic ones.
> > We may need to define all of them separately?
> 
> Agreed, I wrote a patch, could check that? and in that patch, do we need to
> delete flag definition f2fs don't use?

IMO, we'd better keep the flags. I merged it and could you add encryption part
on top of it?

Thanks,

> 
> Thanks,
> 
> > 
> >>
> >> Signed-off-by: Chao Yu 
> >> ---
> >>  fs/f2fs/file.c | 9 +++--
> >>  1 file changed, 7 insertions(+), 2 deletions(-)
> >>
> >> diff --git a/fs/f2fs/file.c b/fs/f2fs/file.c
> >> index 8068b015ece5..271fadadaa36 100644
> >> --- a/fs/f2fs/file.c
> >> +++ b/fs/f2fs/file.c
> >> @@ -1584,8 +1584,13 @@ static int f2fs_ioc_getflags(struct file *filp, 
> >> unsigned long arg)
> >>  {
> >>struct inode *inode = file_inode(filp);
> >>struct f2fs_inode_info *fi = F2FS_I(inode);
> >> -  unsigned int flags = fi->i_flags &
> >> -  (FS_FL_USER_VISIBLE | FS_PROJINHERIT_FL);
> >> +  unsigned int flags = fi->i_flags;
> >> +
> >> +  if (file_is_encrypt(inode))
> >> +  flags |= FS_ENCRYPT_FL;
> >> +
> >> +  flags &= FS_FL_USER_VISIBLE | FS_PROJINHERIT_FL;
> >> +
> >>return put_user(flags, (int __user *)arg);
> >>  }
> >>  
> >> -- 
> >> 2.15.0.55.gc2ece9dc4de6
> > 
> > .
> > 


Re: [PATCH] f2fs: fix to show encrypt flag in FS_IOC_GETFLAGS

2018-04-04 Thread Jaegeuk Kim
On 04/03, Chao Yu wrote:
> On 2018/4/3 4:21, Jaegeuk Kim wrote:
> > On 04/02, Chao Yu wrote:
> >> This patch fixes to show encrypt flag in FS_IOC_GETFLAGS like ext4 does.
> > 
> > Actually, we have to show internal flags owned by f2fs, not generic ones.
> > We may need to define all of them separately?
> 
> Agreed, I wrote a patch, could check that? and in that patch, do we need to
> delete flag definition f2fs don't use?

IMO, we'd better keep the flags. I merged it and could you add encryption part
on top of it?

Thanks,

> 
> Thanks,
> 
> > 
> >>
> >> Signed-off-by: Chao Yu 
> >> ---
> >>  fs/f2fs/file.c | 9 +++--
> >>  1 file changed, 7 insertions(+), 2 deletions(-)
> >>
> >> diff --git a/fs/f2fs/file.c b/fs/f2fs/file.c
> >> index 8068b015ece5..271fadadaa36 100644
> >> --- a/fs/f2fs/file.c
> >> +++ b/fs/f2fs/file.c
> >> @@ -1584,8 +1584,13 @@ static int f2fs_ioc_getflags(struct file *filp, 
> >> unsigned long arg)
> >>  {
> >>struct inode *inode = file_inode(filp);
> >>struct f2fs_inode_info *fi = F2FS_I(inode);
> >> -  unsigned int flags = fi->i_flags &
> >> -  (FS_FL_USER_VISIBLE | FS_PROJINHERIT_FL);
> >> +  unsigned int flags = fi->i_flags;
> >> +
> >> +  if (file_is_encrypt(inode))
> >> +  flags |= FS_ENCRYPT_FL;
> >> +
> >> +  flags &= FS_FL_USER_VISIBLE | FS_PROJINHERIT_FL;
> >> +
> >>return put_user(flags, (int __user *)arg);
> >>  }
> >>  
> >> -- 
> >> 2.15.0.55.gc2ece9dc4de6
> > 
> > .
> > 


[GIT PULL] f2fs update for 4.17-rc1

2018-04-04 Thread Jaegeuk Kim
Hi Linus,

Could you please consider this pull request?

Thanks,

The following changes since commit 3664ce2d930983966d2aac0e167f1332988c4e25:

  Merge tag 'powerpc-4.16-4' of 
git://git.kernel.org/pub/scm/linux/kernel/git/powerpc/linux (2018-02-24 
16:05:50 -0800)

are available in the Git repository at:

  git://git.kernel.org/pub/scm/linux/kernel/git/jaegeuk/f2fs.git 
tags/f2fs-for-4.17

for you to fetch changes up to 214c2461a864a46b11856426b80dc7db453043c5:

  f2fs: remain written times to update inode during fsync (2018-04-03 18:52:47 
-0700)


f2fs-for-4.17-rc1

In this round, we've mainly focused on performance tuning and critical bug fixes
occurred in low-end devices. Sheng Yong introduced lost_found feature to keep
missing files during recovery instead of thrashing them. We're preparing coming
fsverity implementation. And, we've got more features to communicate with users
for better performance. In low-end devices, some memory-related issues were
fixed, and subtle race condtions and corner cases were addressed as well.

Enhancement:
 - large nat bitmaps for more free node ids
 - add three block allocation policies to pass down write hints given by user
 - expose extension list to user and introduce hot file extension
 - tune small devices seamlessly for low-end devices
 - set readdir_ra by default
 - give more resources under gc_urgent mode regarding to discard and cleaning
 - introduce fsync_mode to enforce posix or not
 - nowait aio support
 - add lost_found feature to keep dangling inodes
 - reserve bits for future fsverity feature
 - add test_dummy_encryption for FBE

Bug fix:
 - don't use highmem for dentry pages
 - align memory boundary for bitops
 - truncate preallocated blocks in write errors
 - guarantee i_times on fsync call
 - clear CP_TRIMMED_FLAG correctly
 - prevent node chain loop during recovery
 - avoid data race between atomic write and background cleaning
 - avoid unnecessary selinux violation warnings on resgid option
 - GFP_NOFS to avoid deadlock in quota and read paths
 - fix f2fs_skip_inode_update to allow i_size recovery

In addition to them, there are several minor bug fixes and clean-ups.


Chao Yu (16):
  f2fs: restrict inline_xattr_size configuration
  f2fs: fix to check extent cache in f2fs_drop_extent_tree
  f2fs: support large nat bitmap
  f2fs: fix to clear CP_TRIMMED_FLAG
  f2fs: fix to handle looped node chain during recovery
  f2fs: introduce sb_lock to make encrypt pwsalt update exclusive
  f2fs: fix to set KEEP_SIZE bit in f2fs_zero_range
  f2fs: expose extension_list sysfs entry
  f2fs: fix to avoid race in between atomic write and background GC
  f2fs: support hot file extension
  f2fs: wrap sb_rdonly with f2fs_readonly
  f2fs: fix to restore old mount option in ->remount_fs
  f2fs: wrap all options with f2fs_sb_info.mount_opt
  f2fs: remove unneeded set_cold_node()
  f2fs: clean up with F2FS_BLK_ALIGN
  f2fs: don't track new nat entry in nat set

Colin Ian King (1):
  f2fs: remove redundant initialization of pointer 'p'

Eric Biggers (1):
  f2fs: reserve bits for fs-verity

Gao Xiang (1):
  f2fs: flush cp pack except cp pack 2 page at first

Hyunchul Lee (4):
  f2fs: support passing down write hints given by users to block layer
  f2fs: support passing down write hints to block layer with F2FS policy
  f2fs: Add the 'whint_mode' mount option to f2fs documentation
  f2fs: add nowait aio support

Jaegeuk Kim (11):
  f2fs: handle quota for orphan inodes
  f2fs: don't stop GC if GC is contended
  f2fs: add mount option for segment allocation policy
  f2fs: add auto tuning for small devices
  f2fs: set readdir_ra by default
  f2fs: issue discard aggressively in the gc_urgent mode
  f2fs: do gc in greedy mode for whole range if gc_urgent mode is set
  f2fs: avoid selinux denial on CAP_SYS_RESOURCE
  f2fs: align memory boundary for bitops
  f2fs: truncate preallocated blocks in error case
  f2fs: remain written times to update inode during fsync

Junling Zheng (2):
  f2fs: introduce mount option for fsync mode
  f2fs: fix a wrong condition in f2fs_skip_inode_update

Qiuyang Sun (1):
  f2fs: release locks before return in f2fs_ioc_gc_range()

Ritesh Harjani (1):
  f2fs: Set GF_NOFS in read_cache_page_gfp while doing f2fs_quota_read

Sheng Yong (4):
  f2fs: fix potential corruption in area before F2FS_SUPER_OFFSET
  f2fs: clean up f2fs_sb_has_xxx functions
  f2fs: introduce F2FS_FEATURE_LOST_FOUND feature
  f2fs: introduce a new mount option test_dummy_encryption

Tiezhu Yang (1):
  f2fs: remove redundant check of page type when submit bio

Yunlei He (3):
  f2fs: Don't overwrite all types of node to keep node chain
  f2fs: check blkaddr more accuratly before 

[GIT PULL] f2fs update for 4.17-rc1

2018-04-04 Thread Jaegeuk Kim
Hi Linus,

Could you please consider this pull request?

Thanks,

The following changes since commit 3664ce2d930983966d2aac0e167f1332988c4e25:

  Merge tag 'powerpc-4.16-4' of 
git://git.kernel.org/pub/scm/linux/kernel/git/powerpc/linux (2018-02-24 
16:05:50 -0800)

are available in the Git repository at:

  git://git.kernel.org/pub/scm/linux/kernel/git/jaegeuk/f2fs.git 
tags/f2fs-for-4.17

for you to fetch changes up to 214c2461a864a46b11856426b80dc7db453043c5:

  f2fs: remain written times to update inode during fsync (2018-04-03 18:52:47 
-0700)


f2fs-for-4.17-rc1

In this round, we've mainly focused on performance tuning and critical bug fixes
occurred in low-end devices. Sheng Yong introduced lost_found feature to keep
missing files during recovery instead of thrashing them. We're preparing coming
fsverity implementation. And, we've got more features to communicate with users
for better performance. In low-end devices, some memory-related issues were
fixed, and subtle race condtions and corner cases were addressed as well.

Enhancement:
 - large nat bitmaps for more free node ids
 - add three block allocation policies to pass down write hints given by user
 - expose extension list to user and introduce hot file extension
 - tune small devices seamlessly for low-end devices
 - set readdir_ra by default
 - give more resources under gc_urgent mode regarding to discard and cleaning
 - introduce fsync_mode to enforce posix or not
 - nowait aio support
 - add lost_found feature to keep dangling inodes
 - reserve bits for future fsverity feature
 - add test_dummy_encryption for FBE

Bug fix:
 - don't use highmem for dentry pages
 - align memory boundary for bitops
 - truncate preallocated blocks in write errors
 - guarantee i_times on fsync call
 - clear CP_TRIMMED_FLAG correctly
 - prevent node chain loop during recovery
 - avoid data race between atomic write and background cleaning
 - avoid unnecessary selinux violation warnings on resgid option
 - GFP_NOFS to avoid deadlock in quota and read paths
 - fix f2fs_skip_inode_update to allow i_size recovery

In addition to them, there are several minor bug fixes and clean-ups.


Chao Yu (16):
  f2fs: restrict inline_xattr_size configuration
  f2fs: fix to check extent cache in f2fs_drop_extent_tree
  f2fs: support large nat bitmap
  f2fs: fix to clear CP_TRIMMED_FLAG
  f2fs: fix to handle looped node chain during recovery
  f2fs: introduce sb_lock to make encrypt pwsalt update exclusive
  f2fs: fix to set KEEP_SIZE bit in f2fs_zero_range
  f2fs: expose extension_list sysfs entry
  f2fs: fix to avoid race in between atomic write and background GC
  f2fs: support hot file extension
  f2fs: wrap sb_rdonly with f2fs_readonly
  f2fs: fix to restore old mount option in ->remount_fs
  f2fs: wrap all options with f2fs_sb_info.mount_opt
  f2fs: remove unneeded set_cold_node()
  f2fs: clean up with F2FS_BLK_ALIGN
  f2fs: don't track new nat entry in nat set

Colin Ian King (1):
  f2fs: remove redundant initialization of pointer 'p'

Eric Biggers (1):
  f2fs: reserve bits for fs-verity

Gao Xiang (1):
  f2fs: flush cp pack except cp pack 2 page at first

Hyunchul Lee (4):
  f2fs: support passing down write hints given by users to block layer
  f2fs: support passing down write hints to block layer with F2FS policy
  f2fs: Add the 'whint_mode' mount option to f2fs documentation
  f2fs: add nowait aio support

Jaegeuk Kim (11):
  f2fs: handle quota for orphan inodes
  f2fs: don't stop GC if GC is contended
  f2fs: add mount option for segment allocation policy
  f2fs: add auto tuning for small devices
  f2fs: set readdir_ra by default
  f2fs: issue discard aggressively in the gc_urgent mode
  f2fs: do gc in greedy mode for whole range if gc_urgent mode is set
  f2fs: avoid selinux denial on CAP_SYS_RESOURCE
  f2fs: align memory boundary for bitops
  f2fs: truncate preallocated blocks in error case
  f2fs: remain written times to update inode during fsync

Junling Zheng (2):
  f2fs: introduce mount option for fsync mode
  f2fs: fix a wrong condition in f2fs_skip_inode_update

Qiuyang Sun (1):
  f2fs: release locks before return in f2fs_ioc_gc_range()

Ritesh Harjani (1):
  f2fs: Set GF_NOFS in read_cache_page_gfp while doing f2fs_quota_read

Sheng Yong (4):
  f2fs: fix potential corruption in area before F2FS_SUPER_OFFSET
  f2fs: clean up f2fs_sb_has_xxx functions
  f2fs: introduce F2FS_FEATURE_LOST_FOUND feature
  f2fs: introduce a new mount option test_dummy_encryption

Tiezhu Yang (1):
  f2fs: remove redundant check of page type when submit bio

Yunlei He (3):
  f2fs: Don't overwrite all types of node to keep node chain
  f2fs: check blkaddr more accuratly before 

Re: WARNING in up_write

2018-04-04 Thread Matthew Wilcox
On Wed, Apr 04, 2018 at 11:22:00PM -0400, Theodore Y. Ts'o wrote:
> On Wed, Apr 04, 2018 at 12:35:04PM -0700, Matthew Wilcox wrote:
> > On Wed, Apr 04, 2018 at 09:24:05PM +0200, Dmitry Vyukov wrote:
> > > On Tue, Apr 3, 2018 at 4:01 AM, syzbot
> > >  wrote:
> > > > DEBUG_LOCKS_WARN_ON(sem->owner != get_current())
> > > > WARNING: CPU: 1 PID: 4441 at kernel/locking/rwsem.c:133 
> > > > up_write+0x1cc/0x210
> > > > kernel/locking/rwsem.c:133
> > > > Kernel panic - not syncing: panic_on_warn set ...
> > 
> > Message-Id: <1522852646-2196-1-git-send-email-long...@redhat.com>
> >
> 
> We were way ahead of syzbot in this case.  :-)

Not really ... syzbot caught it Monday evening ;-)

Date: Mon, 02 Apr 2018 19:01:01 -0700
From: syzbot 
To: linux-fsde...@vger.kernel.org, linux-kernel@vger.kernel.org,
syzkaller-b...@googlegroups.com, v...@zeniv.linux.org.uk
Subject: WARNING in up_write


Re: WARNING in up_write

2018-04-04 Thread Matthew Wilcox
On Wed, Apr 04, 2018 at 11:22:00PM -0400, Theodore Y. Ts'o wrote:
> On Wed, Apr 04, 2018 at 12:35:04PM -0700, Matthew Wilcox wrote:
> > On Wed, Apr 04, 2018 at 09:24:05PM +0200, Dmitry Vyukov wrote:
> > > On Tue, Apr 3, 2018 at 4:01 AM, syzbot
> > >  wrote:
> > > > DEBUG_LOCKS_WARN_ON(sem->owner != get_current())
> > > > WARNING: CPU: 1 PID: 4441 at kernel/locking/rwsem.c:133 
> > > > up_write+0x1cc/0x210
> > > > kernel/locking/rwsem.c:133
> > > > Kernel panic - not syncing: panic_on_warn set ...
> > 
> > Message-Id: <1522852646-2196-1-git-send-email-long...@redhat.com>
> >
> 
> We were way ahead of syzbot in this case.  :-)

Not really ... syzbot caught it Monday evening ;-)

Date: Mon, 02 Apr 2018 19:01:01 -0700
From: syzbot 
To: linux-fsde...@vger.kernel.org, linux-kernel@vger.kernel.org,
syzkaller-b...@googlegroups.com, v...@zeniv.linux.org.uk
Subject: WARNING in up_write


Re: WARNING in up_write

2018-04-04 Thread Theodore Y. Ts'o
On Wed, Apr 04, 2018 at 12:35:04PM -0700, Matthew Wilcox wrote:
> On Wed, Apr 04, 2018 at 09:24:05PM +0200, Dmitry Vyukov wrote:
> > On Tue, Apr 3, 2018 at 4:01 AM, syzbot
> >  wrote:
> > > DEBUG_LOCKS_WARN_ON(sem->owner != get_current())
> > > WARNING: CPU: 1 PID: 4441 at kernel/locking/rwsem.c:133 
> > > up_write+0x1cc/0x210
> > > kernel/locking/rwsem.c:133
> > > Kernel panic - not syncing: panic_on_warn set ...
> 
> Message-Id: <1522852646-2196-1-git-send-email-long...@redhat.com>
>

We were way ahead of syzbot in this case.  :-)

I reported the problem Tuesday morning:

https://lkml.org/lkml/2018/4/4/814

And within a few hours Waiman had proposed a fix:

https://patchwork.kernel.org/patch/10322639/

Note also that it's not ext4 specific.  It can be trivially reproduced using 
any one of:

kvm-xfstests -c ext4 generic/068
kvm-xfstests -c btrfs generic/068
kvm-xfstests -c xfs generic/068

(Basically, any file system that supports freeze/thaw.)

Cheers,

- Ted


Re: WARNING in up_write

2018-04-04 Thread Theodore Y. Ts'o
On Wed, Apr 04, 2018 at 12:35:04PM -0700, Matthew Wilcox wrote:
> On Wed, Apr 04, 2018 at 09:24:05PM +0200, Dmitry Vyukov wrote:
> > On Tue, Apr 3, 2018 at 4:01 AM, syzbot
> >  wrote:
> > > DEBUG_LOCKS_WARN_ON(sem->owner != get_current())
> > > WARNING: CPU: 1 PID: 4441 at kernel/locking/rwsem.c:133 
> > > up_write+0x1cc/0x210
> > > kernel/locking/rwsem.c:133
> > > Kernel panic - not syncing: panic_on_warn set ...
> 
> Message-Id: <1522852646-2196-1-git-send-email-long...@redhat.com>
>

We were way ahead of syzbot in this case.  :-)

I reported the problem Tuesday morning:

https://lkml.org/lkml/2018/4/4/814

And within a few hours Waiman had proposed a fix:

https://patchwork.kernel.org/patch/10322639/

Note also that it's not ext4 specific.  It can be trivially reproduced using 
any one of:

kvm-xfstests -c ext4 generic/068
kvm-xfstests -c btrfs generic/068
kvm-xfstests -c xfs generic/068

(Basically, any file system that supports freeze/thaw.)

Cheers,

- Ted


Re: [PATCH] locking/rwsem: Add up_write_non_owner() for percpu_up_write()

2018-04-04 Thread Theodore Y. Ts'o
On Wed, Apr 04, 2018 at 10:37:26AM -0400, Waiman Long wrote:
> The commit 8c5db92a705d ("locking/rwsem: Add DEBUG_RWSEMS to look for
> lock/unlock mismatches") causes a warning in ext4 fstests due to the
> fact that the freeze and thaw ioctls, by design, are run in different
> processes.

While true, it's not just ext4 fstests --- it's any file system which
supports freeze/thaw, since it's happening in the vfs layer.  I have
just as easily replicated the problem using "kvm-xfstests -c xfs
generic/068".  Or "kvm-xfstests -c btrfs generic/068".

Cheers,

- Ted


Re: [PATCH] locking/rwsem: Add up_write_non_owner() for percpu_up_write()

2018-04-04 Thread Theodore Y. Ts'o
On Wed, Apr 04, 2018 at 10:37:26AM -0400, Waiman Long wrote:
> The commit 8c5db92a705d ("locking/rwsem: Add DEBUG_RWSEMS to look for
> lock/unlock mismatches") causes a warning in ext4 fstests due to the
> fact that the freeze and thaw ioctls, by design, are run in different
> processes.

While true, it's not just ext4 fstests --- it's any file system which
supports freeze/thaw, since it's happening in the vfs layer.  I have
just as easily replicated the problem using "kvm-xfstests -c xfs
generic/068".  Or "kvm-xfstests -c btrfs generic/068".

Cheers,

- Ted


Re: [PATCH v1] kernel/trace:check the val against the available mem

2018-04-04 Thread Matthew Wilcox
On Wed, Apr 04, 2018 at 11:47:30AM -0400, Steven Rostedt wrote:
> I originally was going to remove the RETRY_MAYFAIL, but adding this
> check (at the end of the loop though) appears to have OOM consistently
> kill this task.
> 
> I still like to keep RETRY_MAYFAIL, because it wont trigger OOM if
> nothing comes in and tries to do an allocation, but instead will fail
> nicely with -ENOMEM.

I still don't get why you want RETRY_MAYFAIL.  You know that tries
*harder* to allocate memory than plain GFP_KERNEL does, right?  And
that seems like the exact opposite of what you want.


Re: [PATCH v1] kernel/trace:check the val against the available mem

2018-04-04 Thread Matthew Wilcox
On Wed, Apr 04, 2018 at 11:47:30AM -0400, Steven Rostedt wrote:
> I originally was going to remove the RETRY_MAYFAIL, but adding this
> check (at the end of the loop though) appears to have OOM consistently
> kill this task.
> 
> I still like to keep RETRY_MAYFAIL, because it wont trigger OOM if
> nothing comes in and tries to do an allocation, but instead will fail
> nicely with -ENOMEM.

I still don't get why you want RETRY_MAYFAIL.  You know that tries
*harder* to allocate memory than plain GFP_KERNEL does, right?  And
that seems like the exact opposite of what you want.


Re: [PATCH] gup: return -EFAULT on access_ok failure

2018-04-04 Thread Linus Torvalds
On Wed, Apr 4, 2018 at 6:53 PM, Michael S. Tsirkin  wrote:
>
> Any feedback on this? As this fixes a bug in vhost, I'll merge
> through the vhost tree unless someone objects.

NAK.

__get_user_pages_fast() returns the number of pages it gets.

It has never returned an error code, and all the other versions of it
(architecture-specific) don't either.

If you ask for one page, and get zero pages, then that's an -EFAULT.
Note that that's an EFAULT regardless of whether that zero page
happened due to kernel addresses or just lack of mapping in user
space.

The documentation is simply wrong if it says anything else. Fix the
docs, and fix the users.

The correct use has always been to check the number of pages returned.

Just looking around, returning an error number looks like it could
seriously confuse some things. You have things like the kvm code that
does the *right* thing:

unsigned long ... npinned ...

npinned = get_user_pages_fast(uaddr, npages, write ?
FOLL_WRITE : 0, pages);
if (npinned != npages) {
 ...

err:
if (npinned > 0)
release_pages(pages, npinned);

and the above code clearly depends on the actual behavior, not on the
documentation.

Any changes in this area would need some *extreme* care, exactly
because of code like the above that clearly depends on the existing
semantics.

In fact, the documentation really seems to be just buggy. The actual
get_user_pages() function itself is expressly being careful *not* to
return an error code, it even has a comment to the effect ("Have to be
a bit careful with return values").

So the "If no pages were pinned, returns -errno" comment is just bogus.

  Linus


Re: [PATCH] gup: return -EFAULT on access_ok failure

2018-04-04 Thread Linus Torvalds
On Wed, Apr 4, 2018 at 6:53 PM, Michael S. Tsirkin  wrote:
>
> Any feedback on this? As this fixes a bug in vhost, I'll merge
> through the vhost tree unless someone objects.

NAK.

__get_user_pages_fast() returns the number of pages it gets.

It has never returned an error code, and all the other versions of it
(architecture-specific) don't either.

If you ask for one page, and get zero pages, then that's an -EFAULT.
Note that that's an EFAULT regardless of whether that zero page
happened due to kernel addresses or just lack of mapping in user
space.

The documentation is simply wrong if it says anything else. Fix the
docs, and fix the users.

The correct use has always been to check the number of pages returned.

Just looking around, returning an error number looks like it could
seriously confuse some things. You have things like the kvm code that
does the *right* thing:

unsigned long ... npinned ...

npinned = get_user_pages_fast(uaddr, npages, write ?
FOLL_WRITE : 0, pages);
if (npinned != npages) {
 ...

err:
if (npinned > 0)
release_pages(pages, npinned);

and the above code clearly depends on the actual behavior, not on the
documentation.

Any changes in this area would need some *extreme* care, exactly
because of code like the above that clearly depends on the existing
semantics.

In fact, the documentation really seems to be just buggy. The actual
get_user_pages() function itself is expressly being careful *not* to
return an error code, it even has a comment to the effect ("Have to be
a bit careful with return values").

So the "If no pages were pinned, returns -errno" comment is just bogus.

  Linus


Re: An actual suggestion (Re: [GIT PULL] Kernel lockdown for secure boot)

2018-04-04 Thread joeyli
Hi David, 

On Wed, Apr 04, 2018 at 05:17:24PM +0100, David Howells wrote:
> Andy Lutomirski  wrote:
> 
> > Since this thread has devolved horribly, I'm going to propose a solution.
> > 
> > 1. Split the "lockdown" state into three levels:  (please don't
> > bikeshed about the names right now.)
> > 
> > LOCKDOWN_NONE: normal behavior
> > 
> > LOCKDOWN_PROTECT_INTEGREITY: kernel tries to keep root from writing to
> > kernel memory
> > 
> > LOCKDOWN_PROTECT_INTEGRITY_AND_SECRECY: kernel tries to keep root from
> > reading or writing kernel memory.
> 
> In theory, it's good idea, but in practice it's not as easy to implement as I
> think you think.
> 
> Let me list here the things that currently get restricted by lockdown:
> 
[...snip]
>  (5) Kexec.
>

About IMA with kernel module signing and kexec(not on x86_64 yet)...

Because IMA can be used to verify the integrity of kernel module or even
the image for kexec. I think that the
IMA_KEYRINGS_PERMIT_SIGNED_BY_BUILTIN_OR_SECONDARY must be enabled at runtime
when kernel is locked-down.

Because the root can enroll master key to keyring then IMA trusts the ima key
derived from master key. It causes that the arbitrary signed module can be 
loaded
when the root compromised.

Thanks
Joey Lee


Re: An actual suggestion (Re: [GIT PULL] Kernel lockdown for secure boot)

2018-04-04 Thread joeyli
Hi David, 

On Wed, Apr 04, 2018 at 05:17:24PM +0100, David Howells wrote:
> Andy Lutomirski  wrote:
> 
> > Since this thread has devolved horribly, I'm going to propose a solution.
> > 
> > 1. Split the "lockdown" state into three levels:  (please don't
> > bikeshed about the names right now.)
> > 
> > LOCKDOWN_NONE: normal behavior
> > 
> > LOCKDOWN_PROTECT_INTEGREITY: kernel tries to keep root from writing to
> > kernel memory
> > 
> > LOCKDOWN_PROTECT_INTEGRITY_AND_SECRECY: kernel tries to keep root from
> > reading or writing kernel memory.
> 
> In theory, it's good idea, but in practice it's not as easy to implement as I
> think you think.
> 
> Let me list here the things that currently get restricted by lockdown:
> 
[...snip]
>  (5) Kexec.
>

About IMA with kernel module signing and kexec(not on x86_64 yet)...

Because IMA can be used to verify the integrity of kernel module or even
the image for kexec. I think that the
IMA_KEYRINGS_PERMIT_SIGNED_BY_BUILTIN_OR_SECONDARY must be enabled at runtime
when kernel is locked-down.

Because the root can enroll master key to keyring then IMA trusts the ima key
derived from master key. It causes that the arbitrary signed module can be 
loaded
when the root compromised.

Thanks
Joey Lee


Re: [PATCH] mmc: dw_mmc-k3: Fix DDR52 mode by setting required clock divisor

2018-04-04 Thread zhangfei

+ Hisilicon colleague


On 2018年04月05日 08:51, Shawn Lin wrote:

[+ Zhangfei Gao who added support for hi6220]

On 2018/4/4 23:31, Ryan Grachek wrote:
On Tue, Apr 3, 2018 at 6:31 AM, Shawn Lin > wrote:


    On 2018/3/30 2:24, oscardagrach wrote:

    Need at least one line commit body.

    Signed-off-by: oscardagrach >

    ---
   drivers/mmc/host/dw_mmc-k3.c | 10 --
   1 file changed, 8 insertions(+), 2 deletions(-)

    diff --git a/drivers/mmc/host/dw_mmc-k3.c
    b/drivers/mmc/host/dw_mmc-k3.c
    index 89cdb3d533bb..efc546cb4db8 100644
    --- a/drivers/mmc/host/dw_mmc-k3.c
    +++ b/drivers/mmc/host/dw_mmc-k3.c
    @@ -194,8 +194,14 @@ static void dw_mci_hi6220_set_ios(struct
    dw_mci *host, struct mmc_ios *ios)
         int ret;
         unsigned int clock;
   -     clock = (ios->clock <= 2500) ? 2500 : 
ios->clock;

    -
    +       /* CLKDIV must be 1 for DDR52/8-bit mode */
    +       if (ios->bus_width == MMC_BUS_WIDTH_8 &&
    +               ios->timing == MMC_TIMING_MMC_DDR52) {
    +               mci_writel(host, CLKDIV, 0x1);
    +               clock = ios->clock;
    +       } else {
    +               clock = (ios->clock <= 2500) ? 2500 :
    ios->clock;
    +       }


    I undertand DDR52/8-bit need CLKDIV fixed 1, but shouldn't the 
following

    change is more sensible?

    if (ios->bus_width == MMC_BUS_WIDTH_8 && ios->timing ==
    MMC_TIMING_MMC_DDR52)
         clock = ios->clock * 2;
    else
         clock = (ios->clock <= 2500) ? 2500 : ios->clock;


    The reason is ios->clock is 52MHz and you could claim 104MHz from 
the

    clock provider and let dw_mmc core take care of the divder to be 1.
    Otherwise, you just force it to be DDR52/8-bit with a clk rate of 
26MHz.



         ret = clk_set_rate(host->biu_clk, clock);
         if (ret)
                 dev_warn(host->dev, "failed to set rate
    %uHz\n", clock);





For future wise, please use plain mode mail, but not HTML format.


Your feedback is correct. I see the Rockchip dwmmc driver has a similar
implementation. After applying your suggested changes, however, my board
reports "dwmmc_k3 f723d000.dwmmc0: failed to set rate 10400Hz"
during intialization of eMMC. In addition, I do not see CLKDIV being
set to 1. clk_set_rate fails and I wonder if this is out-of-scope of
the driver.

If I set CLKDIV where I did prior, with your changes, the device fails
to set the clock and falls back to 52 MHz (26 MHz) and works fine, but
again, CLKDIV is reported as 0 (even though it is 1.) One thing of
interest to note is when I manually set the clock by doing:
(echo 10400 > /sys/kernel/debug/mmc0/clock) the device reports back
'mmc_host mmc0: Bus speed (slot 0) = 19840Hz (slot req 10400Hz,
  actual 9920HZ div = 1)' which works reliably and clk_set_rate does
not report any error.



When looking closely into the code, at least dw_mci_hi6220_set_ios
goes wrong with the bus_hz, since it should be ciu_clk but not biu_clk.
"b" stands for bus, and "c" stands for card IMHO, however bus_hz
describs the clock to the card, provided by controller. Does the
following patch help?


diff --git a/drivers/mmc/host/dw_mmc-k3.c b/drivers/mmc/host/dw_mmc-k3.c
index 89cdb3d..9e78cf2 100644
--- a/drivers/mmc/host/dw_mmc-k3.c
+++ b/drivers/mmc/host/dw_mmc-k3.c
@@ -194,13 +194,21 @@ static void dw_mci_hi6220_set_ios(struct dw_mci 
*host, struct mmc_ios *ios)

    int ret;
    unsigned int clock;

-   clock = (ios->clock <= 2500) ? 2500 : ios->clock;
+   if (ios->bus_width == MMC_BUS_WIDTH_8 &&
+   ios->timing == MMC_TIMING_MMC_DDR52)
+   clock = ios->clock * 2;
+   else
+   clock = (ios->clock <= 2500) ? 2500 : ios->clock;

-   ret = clk_set_rate(host->biu_clk, clock);
+   ret = clk_set_rate(host->ciu_clk, clock);
    if (ret)
    dev_warn(host->dev, "failed to set rate %uHz\n", clock);

-   host->bus_hz = clk_get_rate(host->biu_clk);
+   clock = clk_get_rate(host->ciu_clk);
+   if (clock != host->bus_hz) {
+   host->bus_hz = clock;
+   host->current_speed = 0;
+   }
 }



I am not sure where to begin debugging these clock issues and welcome
any feedback.






Re: [PATCH] mmc: dw_mmc-k3: Fix DDR52 mode by setting required clock divisor

2018-04-04 Thread zhangfei

+ Hisilicon colleague


On 2018年04月05日 08:51, Shawn Lin wrote:

[+ Zhangfei Gao who added support for hi6220]

On 2018/4/4 23:31, Ryan Grachek wrote:
On Tue, Apr 3, 2018 at 6:31 AM, Shawn Lin > wrote:


    On 2018/3/30 2:24, oscardagrach wrote:

    Need at least one line commit body.

    Signed-off-by: oscardagrach >

    ---
   drivers/mmc/host/dw_mmc-k3.c | 10 --
   1 file changed, 8 insertions(+), 2 deletions(-)

    diff --git a/drivers/mmc/host/dw_mmc-k3.c
    b/drivers/mmc/host/dw_mmc-k3.c
    index 89cdb3d533bb..efc546cb4db8 100644
    --- a/drivers/mmc/host/dw_mmc-k3.c
    +++ b/drivers/mmc/host/dw_mmc-k3.c
    @@ -194,8 +194,14 @@ static void dw_mci_hi6220_set_ios(struct
    dw_mci *host, struct mmc_ios *ios)
         int ret;
         unsigned int clock;
   -     clock = (ios->clock <= 2500) ? 2500 : 
ios->clock;

    -
    +       /* CLKDIV must be 1 for DDR52/8-bit mode */
    +       if (ios->bus_width == MMC_BUS_WIDTH_8 &&
    +               ios->timing == MMC_TIMING_MMC_DDR52) {
    +               mci_writel(host, CLKDIV, 0x1);
    +               clock = ios->clock;
    +       } else {
    +               clock = (ios->clock <= 2500) ? 2500 :
    ios->clock;
    +       }


    I undertand DDR52/8-bit need CLKDIV fixed 1, but shouldn't the 
following

    change is more sensible?

    if (ios->bus_width == MMC_BUS_WIDTH_8 && ios->timing ==
    MMC_TIMING_MMC_DDR52)
         clock = ios->clock * 2;
    else
         clock = (ios->clock <= 2500) ? 2500 : ios->clock;


    The reason is ios->clock is 52MHz and you could claim 104MHz from 
the

    clock provider and let dw_mmc core take care of the divder to be 1.
    Otherwise, you just force it to be DDR52/8-bit with a clk rate of 
26MHz.



         ret = clk_set_rate(host->biu_clk, clock);
         if (ret)
                 dev_warn(host->dev, "failed to set rate
    %uHz\n", clock);





For future wise, please use plain mode mail, but not HTML format.


Your feedback is correct. I see the Rockchip dwmmc driver has a similar
implementation. After applying your suggested changes, however, my board
reports "dwmmc_k3 f723d000.dwmmc0: failed to set rate 10400Hz"
during intialization of eMMC. In addition, I do not see CLKDIV being
set to 1. clk_set_rate fails and I wonder if this is out-of-scope of
the driver.

If I set CLKDIV where I did prior, with your changes, the device fails
to set the clock and falls back to 52 MHz (26 MHz) and works fine, but
again, CLKDIV is reported as 0 (even though it is 1.) One thing of
interest to note is when I manually set the clock by doing:
(echo 10400 > /sys/kernel/debug/mmc0/clock) the device reports back
'mmc_host mmc0: Bus speed (slot 0) = 19840Hz (slot req 10400Hz,
  actual 9920HZ div = 1)' which works reliably and clk_set_rate does
not report any error.



When looking closely into the code, at least dw_mci_hi6220_set_ios
goes wrong with the bus_hz, since it should be ciu_clk but not biu_clk.
"b" stands for bus, and "c" stands for card IMHO, however bus_hz
describs the clock to the card, provided by controller. Does the
following patch help?


diff --git a/drivers/mmc/host/dw_mmc-k3.c b/drivers/mmc/host/dw_mmc-k3.c
index 89cdb3d..9e78cf2 100644
--- a/drivers/mmc/host/dw_mmc-k3.c
+++ b/drivers/mmc/host/dw_mmc-k3.c
@@ -194,13 +194,21 @@ static void dw_mci_hi6220_set_ios(struct dw_mci 
*host, struct mmc_ios *ios)

    int ret;
    unsigned int clock;

-   clock = (ios->clock <= 2500) ? 2500 : ios->clock;
+   if (ios->bus_width == MMC_BUS_WIDTH_8 &&
+   ios->timing == MMC_TIMING_MMC_DDR52)
+   clock = ios->clock * 2;
+   else
+   clock = (ios->clock <= 2500) ? 2500 : ios->clock;

-   ret = clk_set_rate(host->biu_clk, clock);
+   ret = clk_set_rate(host->ciu_clk, clock);
    if (ret)
    dev_warn(host->dev, "failed to set rate %uHz\n", clock);

-   host->bus_hz = clk_get_rate(host->biu_clk);
+   clock = clk_get_rate(host->ciu_clk);
+   if (clock != host->bus_hz) {
+   host->bus_hz = clock;
+   host->current_speed = 0;
+   }
 }



I am not sure where to begin debugging these clock issues and welcome
any feedback.






Re: [rtlwifi-btcoex] Suspicious code in halbtc8821a1ant driver

2018-04-04 Thread Pkshih
On Thu, 2018-04-05 at 01:25 +, Gustavo A. R. Silva wrote:
> Hi all,
> 
> While doing some static analysis I came across the following piece of code at
> drivers/net/wireless/realtek/rtlwifi/btcoexist/halbtc8821a1ant.c:1581:
> 
> 1581 static void btc8821a1ant_act_bt_sco_hid_only_busy(struct btc_coexist 
> *btcoexist,
> 1582   u8 wifi_status)
> 1583 {
> 1584 /* tdma and coex table */
> 1585 btc8821a1ant_ps_tdma(btcoexist, NORMAL_EXEC, true, 5);
> 1586 
> 1587 if (BT_8821A_1ANT_WIFI_STATUS_NON_CONNECTED_ASSO_AUTH_SCAN ==
> 1588 wifi_status)
> 1589 btc8821a1ant_coex_table_with_type(btcoexist, 
> NORMAL_EXEC, 1);
> 1590 else
> 1591 btc8821a1ant_coex_table_with_type(btcoexist, 
> NORMAL_EXEC, 1);
> 1592 }
> 
> The issue here is that the code for both branches of the if-else statement is 
> identical.
> 
> The if-else was introduced a year ago in this commit c6821613e653
> 
> I wonder if an argument should be changed in any of the calls to
> btc8821a1ant_coex_table_with_type?
> 
> 

It looks weird. Since we're in spring vacation, I'll check my colleague next 
Monday.

PK



Re: [rtlwifi-btcoex] Suspicious code in halbtc8821a1ant driver

2018-04-04 Thread Pkshih
On Thu, 2018-04-05 at 01:25 +, Gustavo A. R. Silva wrote:
> Hi all,
> 
> While doing some static analysis I came across the following piece of code at
> drivers/net/wireless/realtek/rtlwifi/btcoexist/halbtc8821a1ant.c:1581:
> 
> 1581 static void btc8821a1ant_act_bt_sco_hid_only_busy(struct btc_coexist 
> *btcoexist,
> 1582   u8 wifi_status)
> 1583 {
> 1584 /* tdma and coex table */
> 1585 btc8821a1ant_ps_tdma(btcoexist, NORMAL_EXEC, true, 5);
> 1586 
> 1587 if (BT_8821A_1ANT_WIFI_STATUS_NON_CONNECTED_ASSO_AUTH_SCAN ==
> 1588 wifi_status)
> 1589 btc8821a1ant_coex_table_with_type(btcoexist, 
> NORMAL_EXEC, 1);
> 1590 else
> 1591 btc8821a1ant_coex_table_with_type(btcoexist, 
> NORMAL_EXEC, 1);
> 1592 }
> 
> The issue here is that the code for both branches of the if-else statement is 
> identical.
> 
> The if-else was introduced a year ago in this commit c6821613e653
> 
> I wonder if an argument should be changed in any of the calls to
> btc8821a1ant_coex_table_with_type?
> 
> 

It looks weird. Since we're in spring vacation, I'll check my colleague next 
Monday.

PK



Re: [PATCH?] reiserfs: prevent panic: don't allow %-char in journal dev. name

2018-04-04 Thread Randy Dunlap
On 04/04/2018 06:48 PM, Jeff Mahoney wrote:
> On 4/4/18 9:45 PM, Andrew Morton wrote:
>> On Wed, 4 Apr 2018 18:25:16 -0700 Randy Dunlap  wrote:
>>
>>> From: Randy Dunlap 
>>>
>>> If the reiserfs mount option's journal name contains a '%' character,
>>> it can lead to a WARN_ONCE() in lib/vsprintf.c::format_decode(),
>>> saying: "Please remove unsupported %/ in format string."
>>> That's OK until panic_on_warn is set, at which point it's dead, Jim.
>>>
>>> To placate this situation, check the journal name string for a '%'
>>> character and return an error if one is found. Also print a warning
>>> (one that won't panic the kernel) about the invalid journal name (e.g.):
>>>
>>>   reiserfs: journal device name is invalid: %/file0
>>>
>>> (In this example, the caller app specified the journal device name as
>>> "%/file0".)
>>>
>>
>> Well, that is a valid filename and we should support it...
>>
>> Isn't the bug in journal_init_dev()?

OK, thanks.

> Yep.  That's exactly it.
> 
> Acked-by: Jeff Mahoney 
> 
> Thanks,
> 
> -Jeff
> 
>> --- a/fs/reiserfs/journal.c~a
>> +++ a/fs/reiserfs/journal.c
>> @@ -2643,7 +2643,7 @@ static int journal_init_dev(struct super
>>  if (IS_ERR(journal->j_dev_bd)) {
>>  result = PTR_ERR(journal->j_dev_bd);
>>  journal->j_dev_bd = NULL;
>> -reiserfs_warning(super,
>> +reiserfs_warning(super, "sh-457",
>>   "journal_init_dev: Cannot open '%s': %i",
>>   jdev_name, result);
>>  return result;
>> _
>>
>>
> 
> 


-- 
~Randy


Re: [PATCH?] reiserfs: prevent panic: don't allow %-char in journal dev. name

2018-04-04 Thread Randy Dunlap
On 04/04/2018 06:48 PM, Jeff Mahoney wrote:
> On 4/4/18 9:45 PM, Andrew Morton wrote:
>> On Wed, 4 Apr 2018 18:25:16 -0700 Randy Dunlap  wrote:
>>
>>> From: Randy Dunlap 
>>>
>>> If the reiserfs mount option's journal name contains a '%' character,
>>> it can lead to a WARN_ONCE() in lib/vsprintf.c::format_decode(),
>>> saying: "Please remove unsupported %/ in format string."
>>> That's OK until panic_on_warn is set, at which point it's dead, Jim.
>>>
>>> To placate this situation, check the journal name string for a '%'
>>> character and return an error if one is found. Also print a warning
>>> (one that won't panic the kernel) about the invalid journal name (e.g.):
>>>
>>>   reiserfs: journal device name is invalid: %/file0
>>>
>>> (In this example, the caller app specified the journal device name as
>>> "%/file0".)
>>>
>>
>> Well, that is a valid filename and we should support it...
>>
>> Isn't the bug in journal_init_dev()?

OK, thanks.

> Yep.  That's exactly it.
> 
> Acked-by: Jeff Mahoney 
> 
> Thanks,
> 
> -Jeff
> 
>> --- a/fs/reiserfs/journal.c~a
>> +++ a/fs/reiserfs/journal.c
>> @@ -2643,7 +2643,7 @@ static int journal_init_dev(struct super
>>  if (IS_ERR(journal->j_dev_bd)) {
>>  result = PTR_ERR(journal->j_dev_bd);
>>  journal->j_dev_bd = NULL;
>> -reiserfs_warning(super,
>> +reiserfs_warning(super, "sh-457",
>>   "journal_init_dev: Cannot open '%s': %i",
>>   jdev_name, result);
>>  return result;
>> _
>>
>>
> 
> 


-- 
~Randy


RE: [PATCH v30 2/4] virtio-balloon: VIRTIO_BALLOON_F_FREE_PAGE_HINT

2018-04-04 Thread Wang, Wei W
On Thursday, April 5, 2018 9:12 AM, Michael S. Tsirkin wrote:
> On Thu, Apr 05, 2018 at 12:30:27AM +, Wang, Wei W wrote:
> > On Wednesday, April 4, 2018 10:08 PM, Michael S. Tsirkin wrote:
> > > On Wed, Apr 04, 2018 at 10:07:51AM +0800, Wei Wang wrote:
> > > > On 04/04/2018 02:47 AM, Michael S. Tsirkin wrote:
> > > > > On Wed, Apr 04, 2018 at 12:10:03AM +0800, Wei Wang wrote:
> > I'm afraid the driver couldn't be aware if the added hints are stale
> > or not,
> 
> 
> No - I mean that driver has code that compares two values and stops
> reporting. Can one of the values be stale?

The driver compares "vb->cmd_id_use != vb->cmd_id_received" to decide if it 
needs to stop reporting hints, and cmd_id_received is what the driver reads 
from host (host notifies the driver to read for the latest value). If host 
sends a new cmd id, it will notify the guest to read again. I'm not sure how 
that could be a stale cmd id (or maybe I misunderstood your point here?)

Best,
Wei


RE: [PATCH v30 2/4] virtio-balloon: VIRTIO_BALLOON_F_FREE_PAGE_HINT

2018-04-04 Thread Wang, Wei W
On Thursday, April 5, 2018 9:12 AM, Michael S. Tsirkin wrote:
> On Thu, Apr 05, 2018 at 12:30:27AM +, Wang, Wei W wrote:
> > On Wednesday, April 4, 2018 10:08 PM, Michael S. Tsirkin wrote:
> > > On Wed, Apr 04, 2018 at 10:07:51AM +0800, Wei Wang wrote:
> > > > On 04/04/2018 02:47 AM, Michael S. Tsirkin wrote:
> > > > > On Wed, Apr 04, 2018 at 12:10:03AM +0800, Wei Wang wrote:
> > I'm afraid the driver couldn't be aware if the added hints are stale
> > or not,
> 
> 
> No - I mean that driver has code that compares two values and stops
> reporting. Can one of the values be stale?

The driver compares "vb->cmd_id_use != vb->cmd_id_received" to decide if it 
needs to stop reporting hints, and cmd_id_received is what the driver reads 
from host (host notifies the driver to read for the latest value). If host 
sends a new cmd id, it will notify the guest to read again. I'm not sure how 
that could be a stale cmd id (or maybe I misunderstood your point here?)

Best,
Wei


WARNING: kobject bug in sysfs_warn_dup

2018-04-04 Thread syzbot

Hello,

syzbot hit the following crash on upstream commit
3e968c9f1401088abc9a19ae6ff571644d37a355 (Wed Apr 4 21:19:24 2018 +)
Merge tag 'ext4_for_linus' of  
git://git.kernel.org/pub/scm/linux/kernel/git/tytso/ext4
syzbot dashboard link:  
https://syzkaller.appspot.com/bug?extid=ff87a28e665c163aa7f5


C reproducer: https://syzkaller.appspot.com/x/repro.c?id=5104666266304512
syzkaller reproducer:  
https://syzkaller.appspot.com/x/repro.syz?id=5683447737614336
Raw console output:  
https://syzkaller.appspot.com/x/log.txt?id=5104818200772608
Kernel config:  
https://syzkaller.appspot.com/x/.config?id=9118669095563550941

compiler: gcc (GCC) 7.1.1 20170620

IMPORTANT: if you fix the bug, please add the following tag to the commit:
Reported-by: syzbot+ff87a28e665c163aa...@syzkaller.appspotmail.com
It will help syzbot understand when the bug is fixed. See footer for  
details.

If you forward the report, please keep this part and the footer.

R10:  R11: 0286 R12: 0003
R13: 0004 R14:  R15: 
[ cut here ]
kobject_add_internal failed for nodev( with -EEXIST, don't try to register  
things with the same name in the same directory.

sysfs: cannot create duplicate filename '/fs/gfs2/nodev('
WARNING: CPU: 1 PID: 4473 at lib/kobject.c:238  
kobject_add_internal+0x8d4/0xbc0 lib/kobject.c:235

CPU: 0 PID: 4474 Comm: syzkaller533472 Not tainted 4.16.0+ #15
Kernel panic - not syncing: panic_on_warn set ...

Hardware name: Google Google Compute Engine/Google Compute Engine, BIOS  
Google 01/01/2011

Call Trace:
 __dump_stack lib/dump_stack.c:17 [inline]
 dump_stack+0x1a7/0x27d lib/dump_stack.c:53
 sysfs_warn_dup+0x83/0xa0 fs/sysfs/dir.c:30
 sysfs_create_dir_ns+0x178/0x1d0 fs/sysfs/dir.c:58
 create_dir lib/kobject.c:69 [inline]
 kobject_add_internal+0x335/0xbc0 lib/kobject.c:227
 kobject_add_varg lib/kobject.c:364 [inline]
 kobject_init_and_add+0xf9/0x150 lib/kobject.c:436
 gfs2_sys_fs_add+0x1ff/0x580 fs/gfs2/sys.c:652
 fill_super+0x86f/0x1d70 fs/gfs2/ops_fstype.c:1118
 gfs2_mount+0x587/0x6e0 fs/gfs2/ops_fstype.c:1321
 mount_fs+0x66/0x2d0 fs/super.c:1222
 vfs_kern_mount.part.26+0xc6/0x4a0 fs/namespace.c:1037
 vfs_kern_mount fs/namespace.c:2514 [inline]
 do_new_mount fs/namespace.c:2517 [inline]
 do_mount+0xea4/0x2b90 fs/namespace.c:2847
 ksys_mount+0xab/0x120 fs/namespace.c:3063
 SYSC_mount fs/namespace.c:3077 [inline]
 SyS_mount+0x39/0x50 fs/namespace.c:3074
 do_syscall_64+0x281/0x940 arch/x86/entry/common.c:287
 entry_SYSCALL_64_after_hwframe+0x42/0xb7
RIP: 0033:0x4432fa
RSP: 002b:7ffda3d84538 EFLAGS: 0286 ORIG_RAX: 00a5
RAX: ffda RBX: 20001a40 RCX: 004432fa
RDX: 20001a00 RSI: 20001a40 RDI: 7ffda3d84550
RBP:  R08: 20001f00 R09: 000a
R10:  R11: 0286 R12: 0003
R13: 0004 R14:  R15: 
CPU: 1 PID: 4473 Comm: syzkaller533472 Not tainted 4.16.0+ #15
[ cut here ]
Hardware name: Google Google Compute Engine/Google Compute Engine, BIOS  
Google 01/01/2011

Call Trace:
 __dump_stack lib/dump_stack.c:17 [inline]
 dump_stack+0x1a7/0x27d lib/dump_stack.c:53
kobject_add_internal failed for nodev( with -EEXIST, don't try to register  
things with the same name in the same directory.

 panic+0x1f8/0x42c kernel/panic.c:183
WARNING: CPU: 0 PID: 4474 at lib/kobject.c:238  
kobject_add_internal+0x8d4/0xbc0 lib/kobject.c:235

Modules linked in:
 __warn+0x1dc/0x200 kernel/panic.c:547
CPU: 0 PID: 4474 Comm: syzkaller533472 Not tainted 4.16.0+ #15
 report_bug+0x1f4/0x2b0 lib/bug.c:186
Hardware name: Google Google Compute Engine/Google Compute Engine, BIOS  
Google 01/01/2011

 fixup_bug.part.10+0x37/0x80 arch/x86/kernel/traps.c:178
RIP: 0010:kobject_add_internal+0x8d4/0xbc0 lib/kobject.c:235
 fixup_bug arch/x86/kernel/traps.c:247 [inline]
 do_error_trap+0x2d7/0x3e0 arch/x86/kernel/traps.c:296
RSP: :8801addaf470 EFLAGS: 00010282
RAX: dc08 RBX: 8801d9661110 RCX: 815b5d2e
RDX:  RSI: 110035bb5e3e RDI: 110035bb5e13
RBP: 8801addaf568 R08: 110035bb5dd5 R09: 0001
R10: 0001 R11:  R12: 110035bb5e94
R13: ffef R14: 8801d39ae348 R15: 110035bb5e98
FS:  01db2880() GS:8801db00() knlGS:
 do_invalid_op+0x1b/0x20 arch/x86/kernel/traps.c:315
 invalid_op+0x1b/0x40 arch/x86/entry/entry_64.S:991
RIP: 0010:kobject_add_internal+0x8d4/0xbc0 lib/kobject.c:235
CS:  0010 DS:  ES:  CR0: 80050033
CR2: 019657b0 CR3: 0001ae0ca000 CR4: 001406f0
RSP: 0018:8801ade37470 EFLAGS: 00010282
DR0:  DR1:  DR2: 
DR3:  DR6: fffe0ff0 DR7: 0400
RAX: dc08 RBX: 8801d9459190 

WARNING: kobject bug in sysfs_warn_dup

2018-04-04 Thread syzbot

Hello,

syzbot hit the following crash on upstream commit
3e968c9f1401088abc9a19ae6ff571644d37a355 (Wed Apr 4 21:19:24 2018 +)
Merge tag 'ext4_for_linus' of  
git://git.kernel.org/pub/scm/linux/kernel/git/tytso/ext4
syzbot dashboard link:  
https://syzkaller.appspot.com/bug?extid=ff87a28e665c163aa7f5


C reproducer: https://syzkaller.appspot.com/x/repro.c?id=5104666266304512
syzkaller reproducer:  
https://syzkaller.appspot.com/x/repro.syz?id=5683447737614336
Raw console output:  
https://syzkaller.appspot.com/x/log.txt?id=5104818200772608
Kernel config:  
https://syzkaller.appspot.com/x/.config?id=9118669095563550941

compiler: gcc (GCC) 7.1.1 20170620

IMPORTANT: if you fix the bug, please add the following tag to the commit:
Reported-by: syzbot+ff87a28e665c163aa...@syzkaller.appspotmail.com
It will help syzbot understand when the bug is fixed. See footer for  
details.

If you forward the report, please keep this part and the footer.

R10:  R11: 0286 R12: 0003
R13: 0004 R14:  R15: 
[ cut here ]
kobject_add_internal failed for nodev( with -EEXIST, don't try to register  
things with the same name in the same directory.

sysfs: cannot create duplicate filename '/fs/gfs2/nodev('
WARNING: CPU: 1 PID: 4473 at lib/kobject.c:238  
kobject_add_internal+0x8d4/0xbc0 lib/kobject.c:235

CPU: 0 PID: 4474 Comm: syzkaller533472 Not tainted 4.16.0+ #15
Kernel panic - not syncing: panic_on_warn set ...

Hardware name: Google Google Compute Engine/Google Compute Engine, BIOS  
Google 01/01/2011

Call Trace:
 __dump_stack lib/dump_stack.c:17 [inline]
 dump_stack+0x1a7/0x27d lib/dump_stack.c:53
 sysfs_warn_dup+0x83/0xa0 fs/sysfs/dir.c:30
 sysfs_create_dir_ns+0x178/0x1d0 fs/sysfs/dir.c:58
 create_dir lib/kobject.c:69 [inline]
 kobject_add_internal+0x335/0xbc0 lib/kobject.c:227
 kobject_add_varg lib/kobject.c:364 [inline]
 kobject_init_and_add+0xf9/0x150 lib/kobject.c:436
 gfs2_sys_fs_add+0x1ff/0x580 fs/gfs2/sys.c:652
 fill_super+0x86f/0x1d70 fs/gfs2/ops_fstype.c:1118
 gfs2_mount+0x587/0x6e0 fs/gfs2/ops_fstype.c:1321
 mount_fs+0x66/0x2d0 fs/super.c:1222
 vfs_kern_mount.part.26+0xc6/0x4a0 fs/namespace.c:1037
 vfs_kern_mount fs/namespace.c:2514 [inline]
 do_new_mount fs/namespace.c:2517 [inline]
 do_mount+0xea4/0x2b90 fs/namespace.c:2847
 ksys_mount+0xab/0x120 fs/namespace.c:3063
 SYSC_mount fs/namespace.c:3077 [inline]
 SyS_mount+0x39/0x50 fs/namespace.c:3074
 do_syscall_64+0x281/0x940 arch/x86/entry/common.c:287
 entry_SYSCALL_64_after_hwframe+0x42/0xb7
RIP: 0033:0x4432fa
RSP: 002b:7ffda3d84538 EFLAGS: 0286 ORIG_RAX: 00a5
RAX: ffda RBX: 20001a40 RCX: 004432fa
RDX: 20001a00 RSI: 20001a40 RDI: 7ffda3d84550
RBP:  R08: 20001f00 R09: 000a
R10:  R11: 0286 R12: 0003
R13: 0004 R14:  R15: 
CPU: 1 PID: 4473 Comm: syzkaller533472 Not tainted 4.16.0+ #15
[ cut here ]
Hardware name: Google Google Compute Engine/Google Compute Engine, BIOS  
Google 01/01/2011

Call Trace:
 __dump_stack lib/dump_stack.c:17 [inline]
 dump_stack+0x1a7/0x27d lib/dump_stack.c:53
kobject_add_internal failed for nodev( with -EEXIST, don't try to register  
things with the same name in the same directory.

 panic+0x1f8/0x42c kernel/panic.c:183
WARNING: CPU: 0 PID: 4474 at lib/kobject.c:238  
kobject_add_internal+0x8d4/0xbc0 lib/kobject.c:235

Modules linked in:
 __warn+0x1dc/0x200 kernel/panic.c:547
CPU: 0 PID: 4474 Comm: syzkaller533472 Not tainted 4.16.0+ #15
 report_bug+0x1f4/0x2b0 lib/bug.c:186
Hardware name: Google Google Compute Engine/Google Compute Engine, BIOS  
Google 01/01/2011

 fixup_bug.part.10+0x37/0x80 arch/x86/kernel/traps.c:178
RIP: 0010:kobject_add_internal+0x8d4/0xbc0 lib/kobject.c:235
 fixup_bug arch/x86/kernel/traps.c:247 [inline]
 do_error_trap+0x2d7/0x3e0 arch/x86/kernel/traps.c:296
RSP: :8801addaf470 EFLAGS: 00010282
RAX: dc08 RBX: 8801d9661110 RCX: 815b5d2e
RDX:  RSI: 110035bb5e3e RDI: 110035bb5e13
RBP: 8801addaf568 R08: 110035bb5dd5 R09: 0001
R10: 0001 R11:  R12: 110035bb5e94
R13: ffef R14: 8801d39ae348 R15: 110035bb5e98
FS:  01db2880() GS:8801db00() knlGS:
 do_invalid_op+0x1b/0x20 arch/x86/kernel/traps.c:315
 invalid_op+0x1b/0x40 arch/x86/entry/entry_64.S:991
RIP: 0010:kobject_add_internal+0x8d4/0xbc0 lib/kobject.c:235
CS:  0010 DS:  ES:  CR0: 80050033
CR2: 019657b0 CR3: 0001ae0ca000 CR4: 001406f0
RSP: 0018:8801ade37470 EFLAGS: 00010282
DR0:  DR1:  DR2: 
DR3:  DR6: fffe0ff0 DR7: 0400
RAX: dc08 RBX: 8801d9459190 

general protection fault in mount_fs

2018-04-04 Thread syzbot

Hello,

syzbot hit the following crash on upstream commit
3e968c9f1401088abc9a19ae6ff571644d37a355 (Wed Apr 4 21:19:24 2018 +)
Merge tag 'ext4_for_linus' of  
git://git.kernel.org/pub/scm/linux/kernel/git/tytso/ext4
syzbot dashboard link:  
https://syzkaller.appspot.com/bug?extid=01ffaf5d9568dd1609f7


C reproducer: https://syzkaller.appspot.com/x/repro.c?id=6269272955289600
syzkaller reproducer:  
https://syzkaller.appspot.com/x/repro.syz?id=5190169938362368
Raw console output:  
https://syzkaller.appspot.com/x/log.txt?id=5077254979715072
Kernel config:  
https://syzkaller.appspot.com/x/.config?id=9118669095563550941

compiler: gcc (GCC) 7.1.1 20170620

IMPORTANT: if you fix the bug, please add the following tag to the commit:
Reported-by: syzbot+01ffaf5d9568dd160...@syzkaller.appspotmail.com
It will help syzbot understand when the bug is fixed. See footer for  
details.

If you forward the report, please keep this part and the footer.

hfsplus: failed to load root directory
hfsplus: failed to load root directory
hfsplus: failed to load root directory
kasan: CONFIG_KASAN_INLINE enabled
kasan: GPF could be caused by NULL-ptr deref or user memory access
general protection fault:  [#1] SMP KASAN
Dumping ftrace buffer:
   (ftrace buffer empty)
Modules linked in:
CPU: 1 PID: 5704 Comm: syzkaller335224 Not tainted 4.16.0+ #15
Hardware name: Google Google Compute Engine/Google Compute Engine, BIOS  
Google 01/01/2011

RIP: 0010:mount_fs+0x92/0x2d0 fs/super.c:1227
RSP: 0018:8801acc9fad0 EFLAGS: 00010202
RAX: dc00 RBX:  RCX: 81b2ddaa
RDX: 0019 RSI:  RDI: 00c8
RBP: 8801acc9fb00 R08: 110035993e87 R09: ed0035993efc
R10:  R11:  R12: 886b29c0
R13: 8801c6243000 R14: 8801ac942ac0 R15: 
FS:  7f8ca5272700() GS:8801db10() knlGS:
CS:  0010 DS:  ES:  CR0: 80050033
CR2: 004c5198 CR3: 0001af14e000 CR4: 001406e0
DR0:  DR1:  DR2: 
DR3:  DR6: fffe0ff0 DR7: 0400
Call Trace:
 vfs_kern_mount.part.26+0xc6/0x4a0 fs/namespace.c:1037
 vfs_kern_mount fs/namespace.c:2514 [inline]
 do_new_mount fs/namespace.c:2517 [inline]
 do_mount+0xea4/0x2b90 fs/namespace.c:2847
 ksys_mount+0xab/0x120 fs/namespace.c:3063
 SYSC_mount fs/namespace.c:3077 [inline]
 SyS_mount+0x39/0x50 fs/namespace.c:3074
 do_syscall_64+0x281/0x940 arch/x86/entry/common.c:287
 entry_SYSCALL_64_after_hwframe+0x42/0xb7
RIP: 0033:0x44a34a
RSP: 002b:7f8ca5271d38 EFLAGS: 0297 ORIG_RAX: 00a5
RAX: ffda RBX: 2318 RCX: 0044a34a
RDX: 2000 RSI: 2100 RDI: 7f8ca5271d50
RBP: 006e39c4 R08: 2140 R09: 000a
R10:  R11: 0297 R12: 0003
R13: 0004 R14: 006e39c0 R15: 0073756c70736668
Code: 3d 00 f0 ff ff 48 89 c3 0f 87 eb 01 00 00 e8 66 c6 be ff 48 8d bb c8  
00 00 00 48 b8 00 00 00 00 00 fc ff df 48 89 fa 48 c1 ea 03 <80> 3c 02 00  
0f 85 08 02 00 00 4c 8b b3 c8 00 00 00 4d 85 f6 0f

RIP: mount_fs+0x92/0x2d0 fs/super.c:1227 RSP: 8801acc9fad0
---[ end trace 81f30bb7bca06fe8 ]---
Kernel panic - not syncing: Fatal exception
Dumping ftrace buffer:
   (ftrace buffer empty)
Kernel Offset: disabled
Rebooting in 86400 seconds..


---
This bug is generated by a dumb bot. It may contain errors.
See https://goo.gl/tpsmEJ for details.
Direct all questions to syzkal...@googlegroups.com.

syzbot will keep track of this bug report.
If you forgot to add the Reported-by tag, once the fix for this bug is  
merged

into any tree, please reply to this email with:
#syz fix: exact-commit-title
If you want to test a patch for this bug, please reply with:
#syz test: git://repo/address.git branch
and provide the patch inline or as an attachment.
To mark this as a duplicate of another syzbot report, please reply with:
#syz dup: exact-subject-of-another-report
If it's a one-off invalid bug report, please reply with:
#syz invalid
Note: if the crash happens again, it will cause creation of a new bug  
report.

Note: all commands must start from beginning of the line in the email body.


general protection fault in mount_fs

2018-04-04 Thread syzbot

Hello,

syzbot hit the following crash on upstream commit
3e968c9f1401088abc9a19ae6ff571644d37a355 (Wed Apr 4 21:19:24 2018 +)
Merge tag 'ext4_for_linus' of  
git://git.kernel.org/pub/scm/linux/kernel/git/tytso/ext4
syzbot dashboard link:  
https://syzkaller.appspot.com/bug?extid=01ffaf5d9568dd1609f7


C reproducer: https://syzkaller.appspot.com/x/repro.c?id=6269272955289600
syzkaller reproducer:  
https://syzkaller.appspot.com/x/repro.syz?id=5190169938362368
Raw console output:  
https://syzkaller.appspot.com/x/log.txt?id=5077254979715072
Kernel config:  
https://syzkaller.appspot.com/x/.config?id=9118669095563550941

compiler: gcc (GCC) 7.1.1 20170620

IMPORTANT: if you fix the bug, please add the following tag to the commit:
Reported-by: syzbot+01ffaf5d9568dd160...@syzkaller.appspotmail.com
It will help syzbot understand when the bug is fixed. See footer for  
details.

If you forward the report, please keep this part and the footer.

hfsplus: failed to load root directory
hfsplus: failed to load root directory
hfsplus: failed to load root directory
kasan: CONFIG_KASAN_INLINE enabled
kasan: GPF could be caused by NULL-ptr deref or user memory access
general protection fault:  [#1] SMP KASAN
Dumping ftrace buffer:
   (ftrace buffer empty)
Modules linked in:
CPU: 1 PID: 5704 Comm: syzkaller335224 Not tainted 4.16.0+ #15
Hardware name: Google Google Compute Engine/Google Compute Engine, BIOS  
Google 01/01/2011

RIP: 0010:mount_fs+0x92/0x2d0 fs/super.c:1227
RSP: 0018:8801acc9fad0 EFLAGS: 00010202
RAX: dc00 RBX:  RCX: 81b2ddaa
RDX: 0019 RSI:  RDI: 00c8
RBP: 8801acc9fb00 R08: 110035993e87 R09: ed0035993efc
R10:  R11:  R12: 886b29c0
R13: 8801c6243000 R14: 8801ac942ac0 R15: 
FS:  7f8ca5272700() GS:8801db10() knlGS:
CS:  0010 DS:  ES:  CR0: 80050033
CR2: 004c5198 CR3: 0001af14e000 CR4: 001406e0
DR0:  DR1:  DR2: 
DR3:  DR6: fffe0ff0 DR7: 0400
Call Trace:
 vfs_kern_mount.part.26+0xc6/0x4a0 fs/namespace.c:1037
 vfs_kern_mount fs/namespace.c:2514 [inline]
 do_new_mount fs/namespace.c:2517 [inline]
 do_mount+0xea4/0x2b90 fs/namespace.c:2847
 ksys_mount+0xab/0x120 fs/namespace.c:3063
 SYSC_mount fs/namespace.c:3077 [inline]
 SyS_mount+0x39/0x50 fs/namespace.c:3074
 do_syscall_64+0x281/0x940 arch/x86/entry/common.c:287
 entry_SYSCALL_64_after_hwframe+0x42/0xb7
RIP: 0033:0x44a34a
RSP: 002b:7f8ca5271d38 EFLAGS: 0297 ORIG_RAX: 00a5
RAX: ffda RBX: 2318 RCX: 0044a34a
RDX: 2000 RSI: 2100 RDI: 7f8ca5271d50
RBP: 006e39c4 R08: 2140 R09: 000a
R10:  R11: 0297 R12: 0003
R13: 0004 R14: 006e39c0 R15: 0073756c70736668
Code: 3d 00 f0 ff ff 48 89 c3 0f 87 eb 01 00 00 e8 66 c6 be ff 48 8d bb c8  
00 00 00 48 b8 00 00 00 00 00 fc ff df 48 89 fa 48 c1 ea 03 <80> 3c 02 00  
0f 85 08 02 00 00 4c 8b b3 c8 00 00 00 4d 85 f6 0f

RIP: mount_fs+0x92/0x2d0 fs/super.c:1227 RSP: 8801acc9fad0
---[ end trace 81f30bb7bca06fe8 ]---
Kernel panic - not syncing: Fatal exception
Dumping ftrace buffer:
   (ftrace buffer empty)
Kernel Offset: disabled
Rebooting in 86400 seconds..


---
This bug is generated by a dumb bot. It may contain errors.
See https://goo.gl/tpsmEJ for details.
Direct all questions to syzkal...@googlegroups.com.

syzbot will keep track of this bug report.
If you forgot to add the Reported-by tag, once the fix for this bug is  
merged

into any tree, please reply to this email with:
#syz fix: exact-commit-title
If you want to test a patch for this bug, please reply with:
#syz test: git://repo/address.git branch
and provide the patch inline or as an attachment.
To mark this as a duplicate of another syzbot report, please reply with:
#syz dup: exact-subject-of-another-report
If it's a one-off invalid bug report, please reply with:
#syz invalid
Note: if the crash happens again, it will cause creation of a new bug  
report.

Note: all commands must start from beginning of the line in the email body.


Re: [GIT PULL] Staging/IIO driver changes for 4.17-rc1

2018-04-04 Thread Linus Torvalds
On Wed, Apr 4, 2018 at 3:32 AM, Greg KH  wrote:
>
> It is a lot, over 500 changes, but not huge by previous kernel release
> standards.  We deleted more lines than we added again (27k added vs. 91k
> remvoed), thanks to finally being able to delete the IRDA drivers and
> networking code.

Hmm. The irda sysctl tables are still there in the kernel. Overlooked?

Linus


Re: [GIT PULL] Staging/IIO driver changes for 4.17-rc1

2018-04-04 Thread Linus Torvalds
On Wed, Apr 4, 2018 at 3:32 AM, Greg KH  wrote:
>
> It is a lot, over 500 changes, but not huge by previous kernel release
> standards.  We deleted more lines than we added again (27k added vs. 91k
> remvoed), thanks to finally being able to delete the IRDA drivers and
> networking code.

Hmm. The irda sysctl tables are still there in the kernel. Overlooked?

Linus


Re: [PATCH] gup: return -EFAULT on access_ok failure

2018-04-04 Thread Michael S. Tsirkin
On Fri, Mar 30, 2018 at 08:37:45PM +0300, Michael S. Tsirkin wrote:
> get_user_pages_fast is supposed to be a faster drop-in equivalent of
> get_user_pages. As such, callers expect it to return a negative return
> code when passed an invalid address, and never expect it to
> return 0 when passed a positive number of pages, since
> its documentation says:
> 
>  * Returns number of pages pinned. This may be fewer than the number
>  * requested. If nr_pages is 0 or negative, returns 0. If no pages
>  * were pinned, returns -errno.
> 
> Unfortunately this is not what the implementation does: it returns 0 if
> passed a kernel address, confusing callers: for example, the following
> is pretty common but does not appear to do the right thing with a kernel
> address:
> 
> ret = get_user_pages_fast(addr, 1, writeable, );
> if (ret < 0)
> return ret;
> 
> Change get_user_pages_fast to return -EFAULT when supplied a
> kernel address to make it match expectations.
> 
> __get_user_pages_fast does not seem to be used like this, but let's
> change __get_user_pages_fast as well for consistency and to match
> documentation.
> 
> Lightly tested.
> 
> Cc: Kirill A. Shutemov 
> Cc: Andrew Morton 
> Cc: Huang Ying 
> Cc: Jonathan Corbet 
> Cc: Linus Torvalds 
> Cc: Peter Zijlstra 
> Cc: Thomas Gleixner 
> Cc: Thorsten Leemhuis 
> Cc: sta...@vger.kernel.org
> Fixes: 5b65c4677a57 ("mm, x86/mm: Fix performance regression in 
> get_user_pages_fast()")
> Reported-by: syzbot+6304bf97ef436580f...@syzkaller.appspotmail.com
> Signed-off-by: Michael S. Tsirkin 

Any feedback on this? As this fixes a bug in vhost, I'll merge
through the vhost tree unless someone objects.

> ---
>  mm/gup.c | 10 --
>  1 file changed, 8 insertions(+), 2 deletions(-)
> 
> diff --git a/mm/gup.c b/mm/gup.c
> index 6afae32..5642521 100644
> --- a/mm/gup.c
> +++ b/mm/gup.c
> @@ -1749,6 +1749,9 @@ int __get_user_pages_fast(unsigned long start, int 
> nr_pages, int write,
>   unsigned long flags;
>   int nr = 0;
>  
> + if (nr_pages <= 0)
> + return 0;
> +
>   start &= PAGE_MASK;
>   addr = start;
>   len = (unsigned long) nr_pages << PAGE_SHIFT;
> @@ -1756,7 +1759,7 @@ int __get_user_pages_fast(unsigned long start, int 
> nr_pages, int write,
>  
>   if (unlikely(!access_ok(write ? VERIFY_WRITE : VERIFY_READ,
>   (void __user *)start, len)))
> - return 0;
> + return -EFAULT;
>  
>   /*
>* Disable interrupts.  We use the nested form as we can already have
> @@ -1806,9 +1809,12 @@ int get_user_pages_fast(unsigned long start, int 
> nr_pages, int write,
>   len = (unsigned long) nr_pages << PAGE_SHIFT;
>   end = start + len;
>  
> + if (nr_pages <= 0)
> + return 0;
> +
>   if (unlikely(!access_ok(write ? VERIFY_WRITE : VERIFY_READ,
>   (void __user *)start, len)))
> - return 0;
> + return -EFAULT;
>  
>   if (gup_fast_permitted(start, nr_pages, write)) {
>   local_irq_disable();
> -- 
> MST


Re: [PATCH] gup: return -EFAULT on access_ok failure

2018-04-04 Thread Michael S. Tsirkin
On Fri, Mar 30, 2018 at 08:37:45PM +0300, Michael S. Tsirkin wrote:
> get_user_pages_fast is supposed to be a faster drop-in equivalent of
> get_user_pages. As such, callers expect it to return a negative return
> code when passed an invalid address, and never expect it to
> return 0 when passed a positive number of pages, since
> its documentation says:
> 
>  * Returns number of pages pinned. This may be fewer than the number
>  * requested. If nr_pages is 0 or negative, returns 0. If no pages
>  * were pinned, returns -errno.
> 
> Unfortunately this is not what the implementation does: it returns 0 if
> passed a kernel address, confusing callers: for example, the following
> is pretty common but does not appear to do the right thing with a kernel
> address:
> 
> ret = get_user_pages_fast(addr, 1, writeable, );
> if (ret < 0)
> return ret;
> 
> Change get_user_pages_fast to return -EFAULT when supplied a
> kernel address to make it match expectations.
> 
> __get_user_pages_fast does not seem to be used like this, but let's
> change __get_user_pages_fast as well for consistency and to match
> documentation.
> 
> Lightly tested.
> 
> Cc: Kirill A. Shutemov 
> Cc: Andrew Morton 
> Cc: Huang Ying 
> Cc: Jonathan Corbet 
> Cc: Linus Torvalds 
> Cc: Peter Zijlstra 
> Cc: Thomas Gleixner 
> Cc: Thorsten Leemhuis 
> Cc: sta...@vger.kernel.org
> Fixes: 5b65c4677a57 ("mm, x86/mm: Fix performance regression in 
> get_user_pages_fast()")
> Reported-by: syzbot+6304bf97ef436580f...@syzkaller.appspotmail.com
> Signed-off-by: Michael S. Tsirkin 

Any feedback on this? As this fixes a bug in vhost, I'll merge
through the vhost tree unless someone objects.

> ---
>  mm/gup.c | 10 --
>  1 file changed, 8 insertions(+), 2 deletions(-)
> 
> diff --git a/mm/gup.c b/mm/gup.c
> index 6afae32..5642521 100644
> --- a/mm/gup.c
> +++ b/mm/gup.c
> @@ -1749,6 +1749,9 @@ int __get_user_pages_fast(unsigned long start, int 
> nr_pages, int write,
>   unsigned long flags;
>   int nr = 0;
>  
> + if (nr_pages <= 0)
> + return 0;
> +
>   start &= PAGE_MASK;
>   addr = start;
>   len = (unsigned long) nr_pages << PAGE_SHIFT;
> @@ -1756,7 +1759,7 @@ int __get_user_pages_fast(unsigned long start, int 
> nr_pages, int write,
>  
>   if (unlikely(!access_ok(write ? VERIFY_WRITE : VERIFY_READ,
>   (void __user *)start, len)))
> - return 0;
> + return -EFAULT;
>  
>   /*
>* Disable interrupts.  We use the nested form as we can already have
> @@ -1806,9 +1809,12 @@ int get_user_pages_fast(unsigned long start, int 
> nr_pages, int write,
>   len = (unsigned long) nr_pages << PAGE_SHIFT;
>   end = start + len;
>  
> + if (nr_pages <= 0)
> + return 0;
> +
>   if (unlikely(!access_ok(write ? VERIFY_WRITE : VERIFY_READ,
>   (void __user *)start, len)))
> - return 0;
> + return -EFAULT;
>  
>   if (gup_fast_permitted(start, nr_pages, write)) {
>   local_irq_disable();
> -- 
> MST


[PULL] fwcfg, vhost: features and fixes

2018-04-04 Thread Michael S. Tsirkin
The following changes since commit 0c8efd610b58cb23cefdfa12015799079aef94ae:

  Linux 4.16-rc5 (2018-03-11 17:25:09 -0700)

are available in the Git repository at:

  git://git.kernel.org/pub/scm/linux/kernel/git/mst/vhost.git tags/for_linus

for you to fetch changes up to dc32bb678e103afbcfa4d814489af0566307f528:

  vhost: add vsock compat ioctl (2018-03-20 03:17:42 +0200)


fw_cfg, vhost: features fixes

This cleans up the qemu fw cfg device driver.
On top of this, vmcore is dumped there on crash to
help debugging witH kASLR enabled.
Also included are some fixes in vhost.

Signed-off-by: Michael S. Tsirkin 


Marc-André Lureau (10):
  fw_cfg: fix sparse warnings in fw_cfg_sel_endianness()
  fw_cfg: fix sparse warnings with fw_cfg_file
  fw_cfg: fix sparse warning reading FW_CFG_ID
  fw_cfg: fix sparse warnings around FW_CFG_FILE_DIR read
  fw_cfg: remove inline from fw_cfg_read_blob()
  fw_cfg: handle fw_cfg_read_blob() error
  fw_cfg: add a public uapi header
  fw_cfg: add DMA register
  crash: export paddr_vmcoreinfo_note()
  fw_cfg: write vmcoreinfo details

Michael S. Tsirkin (1):
  ptr_ring: fix build

Sonny Rao (2):
  vhost: fix vhost ioctl signature to build with clang
  vhost: add vsock compat ioctl

 MAINTAINERS  |   1 +
 drivers/firmware/qemu_fw_cfg.c   | 291 +++
 drivers/vhost/vhost.c|   2 +-
 drivers/vhost/vhost.h|   4 +-
 drivers/vhost/vsock.c|  11 ++
 include/uapi/linux/qemu_fw_cfg.h |  97 +
 kernel/crash_core.c  |   1 +
 tools/virtio/ringtest/ptr_ring.c |   5 +
 8 files changed, 348 insertions(+), 64 deletions(-)
 create mode 100644 include/uapi/linux/qemu_fw_cfg.h


[PULL] fwcfg, vhost: features and fixes

2018-04-04 Thread Michael S. Tsirkin
The following changes since commit 0c8efd610b58cb23cefdfa12015799079aef94ae:

  Linux 4.16-rc5 (2018-03-11 17:25:09 -0700)

are available in the Git repository at:

  git://git.kernel.org/pub/scm/linux/kernel/git/mst/vhost.git tags/for_linus

for you to fetch changes up to dc32bb678e103afbcfa4d814489af0566307f528:

  vhost: add vsock compat ioctl (2018-03-20 03:17:42 +0200)


fw_cfg, vhost: features fixes

This cleans up the qemu fw cfg device driver.
On top of this, vmcore is dumped there on crash to
help debugging witH kASLR enabled.
Also included are some fixes in vhost.

Signed-off-by: Michael S. Tsirkin 


Marc-André Lureau (10):
  fw_cfg: fix sparse warnings in fw_cfg_sel_endianness()
  fw_cfg: fix sparse warnings with fw_cfg_file
  fw_cfg: fix sparse warning reading FW_CFG_ID
  fw_cfg: fix sparse warnings around FW_CFG_FILE_DIR read
  fw_cfg: remove inline from fw_cfg_read_blob()
  fw_cfg: handle fw_cfg_read_blob() error
  fw_cfg: add a public uapi header
  fw_cfg: add DMA register
  crash: export paddr_vmcoreinfo_note()
  fw_cfg: write vmcoreinfo details

Michael S. Tsirkin (1):
  ptr_ring: fix build

Sonny Rao (2):
  vhost: fix vhost ioctl signature to build with clang
  vhost: add vsock compat ioctl

 MAINTAINERS  |   1 +
 drivers/firmware/qemu_fw_cfg.c   | 291 +++
 drivers/vhost/vhost.c|   2 +-
 drivers/vhost/vhost.h|   4 +-
 drivers/vhost/vsock.c|  11 ++
 include/uapi/linux/qemu_fw_cfg.h |  97 +
 kernel/crash_core.c  |   1 +
 tools/virtio/ringtest/ptr_ring.c |   5 +
 8 files changed, 348 insertions(+), 64 deletions(-)
 create mode 100644 include/uapi/linux/qemu_fw_cfg.h


Re: [PATCH?] reiserfs: prevent panic: don't allow %-char in journal dev. name

2018-04-04 Thread Jeff Mahoney
On 4/4/18 9:45 PM, Andrew Morton wrote:
> On Wed, 4 Apr 2018 18:25:16 -0700 Randy Dunlap  wrote:
> 
>> From: Randy Dunlap 
>>
>> If the reiserfs mount option's journal name contains a '%' character,
>> it can lead to a WARN_ONCE() in lib/vsprintf.c::format_decode(),
>> saying: "Please remove unsupported %/ in format string."
>> That's OK until panic_on_warn is set, at which point it's dead, Jim.
>>
>> To placate this situation, check the journal name string for a '%'
>> character and return an error if one is found. Also print a warning
>> (one that won't panic the kernel) about the invalid journal name (e.g.):
>>
>>   reiserfs: journal device name is invalid: %/file0
>>
>> (In this example, the caller app specified the journal device name as
>> "%/file0".)
>>
> 
> Well, that is a valid filename and we should support it...
> 
> Isn't the bug in journal_init_dev()?

Yep.  That's exactly it.

Acked-by: Jeff Mahoney 

Thanks,

-Jeff

> --- a/fs/reiserfs/journal.c~a
> +++ a/fs/reiserfs/journal.c
> @@ -2643,7 +2643,7 @@ static int journal_init_dev(struct super
>   if (IS_ERR(journal->j_dev_bd)) {
>   result = PTR_ERR(journal->j_dev_bd);
>   journal->j_dev_bd = NULL;
> - reiserfs_warning(super,
> + reiserfs_warning(super, "sh-457",
>"journal_init_dev: Cannot open '%s': %i",
>jdev_name, result);
>   return result;
> _
> 
> 


-- 
Jeff Mahoney
SUSE Labs


Re: [PATCH?] reiserfs: prevent panic: don't allow %-char in journal dev. name

2018-04-04 Thread Jeff Mahoney
On 4/4/18 9:45 PM, Andrew Morton wrote:
> On Wed, 4 Apr 2018 18:25:16 -0700 Randy Dunlap  wrote:
> 
>> From: Randy Dunlap 
>>
>> If the reiserfs mount option's journal name contains a '%' character,
>> it can lead to a WARN_ONCE() in lib/vsprintf.c::format_decode(),
>> saying: "Please remove unsupported %/ in format string."
>> That's OK until panic_on_warn is set, at which point it's dead, Jim.
>>
>> To placate this situation, check the journal name string for a '%'
>> character and return an error if one is found. Also print a warning
>> (one that won't panic the kernel) about the invalid journal name (e.g.):
>>
>>   reiserfs: journal device name is invalid: %/file0
>>
>> (In this example, the caller app specified the journal device name as
>> "%/file0".)
>>
> 
> Well, that is a valid filename and we should support it...
> 
> Isn't the bug in journal_init_dev()?

Yep.  That's exactly it.

Acked-by: Jeff Mahoney 

Thanks,

-Jeff

> --- a/fs/reiserfs/journal.c~a
> +++ a/fs/reiserfs/journal.c
> @@ -2643,7 +2643,7 @@ static int journal_init_dev(struct super
>   if (IS_ERR(journal->j_dev_bd)) {
>   result = PTR_ERR(journal->j_dev_bd);
>   journal->j_dev_bd = NULL;
> - reiserfs_warning(super,
> + reiserfs_warning(super, "sh-457",
>"journal_init_dev: Cannot open '%s': %i",
>jdev_name, result);
>   return result;
> _
> 
> 


-- 
Jeff Mahoney
SUSE Labs


Re: An actual suggestion (Re: [GIT PULL] Kernel lockdown for secure boot)

2018-04-04 Thread joeyli
On Wed, Apr 04, 2018 at 11:19:27PM +0100, David Howells wrote:
> Jann Horn  wrote:
> 
> > > Uh, no.  bpf, for example, can be used to modify kernel memory.
> > 
> > I'm pretty sure bpf isn't supposed to be able to modify arbitrary
> > kernel memory. AFAIU if you can use BPF to write to arbitrary kernel
> > memory, that's a bug; with CAP_SYS_ADMIN, you can read from userspace,
> > write to userspace, and read from kernelspace, but you shouldn't be
> > able to write to kernelspace.
> 
> Ah - you may be right.  I seem to have misremembered what Joey Lee wrote in
> his patch description.
>

Sorry for it's my fault to misunderstood the behavoir of bpf with
CAP_SYS_ADMIN.

Joey Lee


Re: An actual suggestion (Re: [GIT PULL] Kernel lockdown for secure boot)

2018-04-04 Thread joeyli
On Wed, Apr 04, 2018 at 11:19:27PM +0100, David Howells wrote:
> Jann Horn  wrote:
> 
> > > Uh, no.  bpf, for example, can be used to modify kernel memory.
> > 
> > I'm pretty sure bpf isn't supposed to be able to modify arbitrary
> > kernel memory. AFAIU if you can use BPF to write to arbitrary kernel
> > memory, that's a bug; with CAP_SYS_ADMIN, you can read from userspace,
> > write to userspace, and read from kernelspace, but you shouldn't be
> > able to write to kernelspace.
> 
> Ah - you may be right.  I seem to have misremembered what Joey Lee wrote in
> his patch description.
>

Sorry for it's my fault to misunderstood the behavoir of bpf with
CAP_SYS_ADMIN.

Joey Lee


[rtlwifi-btcoex] Suspicious code in halbtc8821a1ant driver

2018-04-04 Thread Gustavo A. R. Silva
Hi all,

While doing some static analysis I came across the following piece of code at 
drivers/net/wireless/realtek/rtlwifi/btcoexist/halbtc8821a1ant.c:1581:

1581 static void btc8821a1ant_act_bt_sco_hid_only_busy(struct btc_coexist 
*btcoexist,
1582   u8 wifi_status)
1583 {
1584 /* tdma and coex table */
1585 btc8821a1ant_ps_tdma(btcoexist, NORMAL_EXEC, true, 5);
1586 
1587 if (BT_8821A_1ANT_WIFI_STATUS_NON_CONNECTED_ASSO_AUTH_SCAN ==
1588 wifi_status)
1589 btc8821a1ant_coex_table_with_type(btcoexist, NORMAL_EXEC, 
1);
1590 else
1591 btc8821a1ant_coex_table_with_type(btcoexist, NORMAL_EXEC, 
1);
1592 }

The issue here is that the code for both branches of the if-else statement is 
identical.

The if-else was introduced a year ago in this commit c6821613e653

I wonder if an argument should be changed in any of the calls to 
btc8821a1ant_coex_table_with_type?

What do you think?

Thanks
--
Gustavo


[rtlwifi-btcoex] Suspicious code in halbtc8821a1ant driver

2018-04-04 Thread Gustavo A. R. Silva
Hi all,

While doing some static analysis I came across the following piece of code at 
drivers/net/wireless/realtek/rtlwifi/btcoexist/halbtc8821a1ant.c:1581:

1581 static void btc8821a1ant_act_bt_sco_hid_only_busy(struct btc_coexist 
*btcoexist,
1582   u8 wifi_status)
1583 {
1584 /* tdma and coex table */
1585 btc8821a1ant_ps_tdma(btcoexist, NORMAL_EXEC, true, 5);
1586 
1587 if (BT_8821A_1ANT_WIFI_STATUS_NON_CONNECTED_ASSO_AUTH_SCAN ==
1588 wifi_status)
1589 btc8821a1ant_coex_table_with_type(btcoexist, NORMAL_EXEC, 
1);
1590 else
1591 btc8821a1ant_coex_table_with_type(btcoexist, NORMAL_EXEC, 
1);
1592 }

The issue here is that the code for both branches of the if-else statement is 
identical.

The if-else was introduced a year ago in this commit c6821613e653

I wonder if an argument should be changed in any of the calls to 
btc8821a1ant_coex_table_with_type?

What do you think?

Thanks
--
Gustavo


Re: An actual suggestion (Re: [GIT PULL] Kernel lockdown for secure boot)

2018-04-04 Thread joeyli
Hi Andy,

On Wed, Apr 04, 2018 at 07:49:12AM -0700, Andy Lutomirski wrote:
> Since this thread has devolved horribly, I'm going to propose a solution.
...
> 6. There's a way to *decrease* the lockdown level below the configured
> value.  (This ability itself may be gated by a config option.)
> Choices include a UEFI protected variable, an authenticated flag
> passed by the bootloader, and even just some special flag in the boot
> handoff protocol.  It would be really quite useful for a user to be
> able to ask their bootloader to reduce the lockdown level for the
> purpose of a particular boot for debugging.  I read the docs on

The "mokutil --disable-validation" done a similar bahvior as above.
Just it lets kernel to ignore the secure boot. 

> mokutil --disable-validation, and it's quite messy.  Let's have a way
> to do this that is mostly independent of the particular firmware in
> use.
>

Why the disabl-validation is messy?   
The mokutil is shim specific but not dependent on particular firmware. 
 
> I can imagine a grub option that decreases lockdown level along with a
> rule that grub will *not* load that option from its config, for
> example.
>

The root can modify the grub config to decrease lockdown level in next
boot without physcial accessing. The mokutil's interactive UI is used
to deal with user to confirm the physcial accessing.

Thanks
Joey Lee


Re: An actual suggestion (Re: [GIT PULL] Kernel lockdown for secure boot)

2018-04-04 Thread joeyli
Hi Andy,

On Wed, Apr 04, 2018 at 07:49:12AM -0700, Andy Lutomirski wrote:
> Since this thread has devolved horribly, I'm going to propose a solution.
...
> 6. There's a way to *decrease* the lockdown level below the configured
> value.  (This ability itself may be gated by a config option.)
> Choices include a UEFI protected variable, an authenticated flag
> passed by the bootloader, and even just some special flag in the boot
> handoff protocol.  It would be really quite useful for a user to be
> able to ask their bootloader to reduce the lockdown level for the
> purpose of a particular boot for debugging.  I read the docs on

The "mokutil --disable-validation" done a similar bahvior as above.
Just it lets kernel to ignore the secure boot. 

> mokutil --disable-validation, and it's quite messy.  Let's have a way
> to do this that is mostly independent of the particular firmware in
> use.
>

Why the disabl-validation is messy?   
The mokutil is shim specific but not dependent on particular firmware. 
 
> I can imagine a grub option that decreases lockdown level along with a
> rule that grub will *not* load that option from its config, for
> example.
>

The root can modify the grub config to decrease lockdown level in next
boot without physcial accessing. The mokutil's interactive UI is used
to deal with user to confirm the physcial accessing.

Thanks
Joey Lee


Re: [PATCH?] reiserfs: prevent panic: don't allow %-char in journal dev. name

2018-04-04 Thread Andrew Morton
On Wed, 4 Apr 2018 18:25:16 -0700 Randy Dunlap  wrote:

> From: Randy Dunlap 
> 
> If the reiserfs mount option's journal name contains a '%' character,
> it can lead to a WARN_ONCE() in lib/vsprintf.c::format_decode(),
> saying: "Please remove unsupported %/ in format string."
> That's OK until panic_on_warn is set, at which point it's dead, Jim.
> 
> To placate this situation, check the journal name string for a '%'
> character and return an error if one is found. Also print a warning
> (one that won't panic the kernel) about the invalid journal name (e.g.):
> 
>   reiserfs: journal device name is invalid: %/file0
> 
> (In this example, the caller app specified the journal device name as
> "%/file0".)
> 

Well, that is a valid filename and we should support it...

Isn't the bug in journal_init_dev()?

--- a/fs/reiserfs/journal.c~a
+++ a/fs/reiserfs/journal.c
@@ -2643,7 +2643,7 @@ static int journal_init_dev(struct super
if (IS_ERR(journal->j_dev_bd)) {
result = PTR_ERR(journal->j_dev_bd);
journal->j_dev_bd = NULL;
-   reiserfs_warning(super,
+   reiserfs_warning(super, "sh-457",
 "journal_init_dev: Cannot open '%s': %i",
 jdev_name, result);
return result;
_



Re: [PATCH?] reiserfs: prevent panic: don't allow %-char in journal dev. name

2018-04-04 Thread Andrew Morton
On Wed, 4 Apr 2018 18:25:16 -0700 Randy Dunlap  wrote:

> From: Randy Dunlap 
> 
> If the reiserfs mount option's journal name contains a '%' character,
> it can lead to a WARN_ONCE() in lib/vsprintf.c::format_decode(),
> saying: "Please remove unsupported %/ in format string."
> That's OK until panic_on_warn is set, at which point it's dead, Jim.
> 
> To placate this situation, check the journal name string for a '%'
> character and return an error if one is found. Also print a warning
> (one that won't panic the kernel) about the invalid journal name (e.g.):
> 
>   reiserfs: journal device name is invalid: %/file0
> 
> (In this example, the caller app specified the journal device name as
> "%/file0".)
> 

Well, that is a valid filename and we should support it...

Isn't the bug in journal_init_dev()?

--- a/fs/reiserfs/journal.c~a
+++ a/fs/reiserfs/journal.c
@@ -2643,7 +2643,7 @@ static int journal_init_dev(struct super
if (IS_ERR(journal->j_dev_bd)) {
result = PTR_ERR(journal->j_dev_bd);
journal->j_dev_bd = NULL;
-   reiserfs_warning(super,
+   reiserfs_warning(super, "sh-457",
 "journal_init_dev: Cannot open '%s': %i",
 jdev_name, result);
return result;
_



Re: linux-next: manual merge of the net-next tree with the pci tree

2018-04-04 Thread Stephen Rothwell
Hi all,

On Tue, 3 Apr 2018 13:14:54 +1000 Stephen Rothwell  
wrote:
>
> Today's linux-next merge of the net-next tree got a conflict in:
> 
>   drivers/net/ethernet/mellanox/mlx5/core/en_main.c
> 
> between commit:
> 
>   2907938d2375 ("net/mlx5e: Use pcie_bandwidth_available() to compute 
> bandwidth")
> 
> from the pci tree and commit:
> 
>   0608d4dbaf4e ("net/mlx5e: Unify slow PCI heuristic")
> 
> from the net-next tree.
> 
> I fixed it up (see below) and can carry the fix as necessary. This
> is now fixed as far as linux-next is concerned, but any non trivial
> conflicts should be mentioned to your upstream maintainer when your tree
> is submitted for merging.  You may also want to consider cooperating
> with the maintainer of the conflicting tree to minimise any particularly
> complex conflicts.
> 
> -- 
> Cheers,
> Stephen Rothwell
> 
> diff --cc drivers/net/ethernet/mellanox/mlx5/core/en_main.c
> index 884337f88589,0aab3afc6885..
> --- a/drivers/net/ethernet/mellanox/mlx5/core/en_main.c
> +++ b/drivers/net/ethernet/mellanox/mlx5/core/en_main.c
> @@@ -3880,16 -4026,50 +4033,20 @@@ void mlx5e_build_default_indir_rqt(u32 
>   indirection_rqt[i] = i % num_channels;
>   }
>   
> - static bool cqe_compress_heuristic(u32 link_speed, u32 pci_bw)
>  -static int mlx5e_get_pci_bw(struct mlx5_core_dev *mdev, u32 *pci_bw)
>  -{
>  -enum pcie_link_width width;
>  -enum pci_bus_speed speed;
>  -int err = 0;
>  -
>  -err = pcie_get_minimum_link(mdev->pdev, , );
>  -if (err)
>  -return err;
>  -
>  -if (speed == PCI_SPEED_UNKNOWN || width == PCIE_LNK_WIDTH_UNKNOWN)
>  -return -EINVAL;
>  -
>  -switch (speed) {
>  -case PCIE_SPEED_2_5GT:
>  -*pci_bw = 2500 * width;
>  -break;
>  -case PCIE_SPEED_5_0GT:
>  -*pci_bw = 5000 * width;
>  -break;
>  -case PCIE_SPEED_8_0GT:
>  -*pci_bw = 8000 * width;
>  -break;
>  -default:
>  -return -EINVAL;
>  -}
>  -
>  -return 0;
>  -}
>  -
> + static bool slow_pci_heuristic(struct mlx5_core_dev *mdev)
>   {
> - return (link_speed && pci_bw &&
> - (pci_bw < 4) && (pci_bw < link_speed));
> - }
> + u32 link_speed = 0;
> + u32 pci_bw = 0;
>   
> - static bool hw_lro_heuristic(u32 link_speed, u32 pci_bw)
> - {
> - return !(link_speed && pci_bw &&
> -  (pci_bw <= 16000) && (pci_bw < link_speed));
> + mlx5e_get_max_linkspeed(mdev, _speed);
>  -mlx5e_get_pci_bw(mdev, _bw);
> ++pci_bw = pcie_bandwidth_available(mdev->pdev, NULL, NULL, NULL);
> + mlx5_core_dbg_once(mdev, "Max link speed = %d, PCI BW = %d\n",
> +link_speed, pci_bw);
> + 
> + #define MLX5E_SLOW_PCI_RATIO (2)
> + 
> + return link_speed && pci_bw &&
> + link_speed > MLX5E_SLOW_PCI_RATIO * pci_bw;
>   }
>   
>   void mlx5e_set_tx_cq_mode_params(struct mlx5e_params *params, u8 
> cq_period_mode)

This is now a conflict between the pci tree and Linus' tree.

-- 
Cheers,
Stephen Rothwell


pgpraz7Cqf_sV.pgp
Description: OpenPGP digital signature


Re: linux-next: manual merge of the net-next tree with the pci tree

2018-04-04 Thread Stephen Rothwell
Hi all,

On Tue, 3 Apr 2018 13:14:54 +1000 Stephen Rothwell  
wrote:
>
> Today's linux-next merge of the net-next tree got a conflict in:
> 
>   drivers/net/ethernet/mellanox/mlx5/core/en_main.c
> 
> between commit:
> 
>   2907938d2375 ("net/mlx5e: Use pcie_bandwidth_available() to compute 
> bandwidth")
> 
> from the pci tree and commit:
> 
>   0608d4dbaf4e ("net/mlx5e: Unify slow PCI heuristic")
> 
> from the net-next tree.
> 
> I fixed it up (see below) and can carry the fix as necessary. This
> is now fixed as far as linux-next is concerned, but any non trivial
> conflicts should be mentioned to your upstream maintainer when your tree
> is submitted for merging.  You may also want to consider cooperating
> with the maintainer of the conflicting tree to minimise any particularly
> complex conflicts.
> 
> -- 
> Cheers,
> Stephen Rothwell
> 
> diff --cc drivers/net/ethernet/mellanox/mlx5/core/en_main.c
> index 884337f88589,0aab3afc6885..
> --- a/drivers/net/ethernet/mellanox/mlx5/core/en_main.c
> +++ b/drivers/net/ethernet/mellanox/mlx5/core/en_main.c
> @@@ -3880,16 -4026,50 +4033,20 @@@ void mlx5e_build_default_indir_rqt(u32 
>   indirection_rqt[i] = i % num_channels;
>   }
>   
> - static bool cqe_compress_heuristic(u32 link_speed, u32 pci_bw)
>  -static int mlx5e_get_pci_bw(struct mlx5_core_dev *mdev, u32 *pci_bw)
>  -{
>  -enum pcie_link_width width;
>  -enum pci_bus_speed speed;
>  -int err = 0;
>  -
>  -err = pcie_get_minimum_link(mdev->pdev, , );
>  -if (err)
>  -return err;
>  -
>  -if (speed == PCI_SPEED_UNKNOWN || width == PCIE_LNK_WIDTH_UNKNOWN)
>  -return -EINVAL;
>  -
>  -switch (speed) {
>  -case PCIE_SPEED_2_5GT:
>  -*pci_bw = 2500 * width;
>  -break;
>  -case PCIE_SPEED_5_0GT:
>  -*pci_bw = 5000 * width;
>  -break;
>  -case PCIE_SPEED_8_0GT:
>  -*pci_bw = 8000 * width;
>  -break;
>  -default:
>  -return -EINVAL;
>  -}
>  -
>  -return 0;
>  -}
>  -
> + static bool slow_pci_heuristic(struct mlx5_core_dev *mdev)
>   {
> - return (link_speed && pci_bw &&
> - (pci_bw < 4) && (pci_bw < link_speed));
> - }
> + u32 link_speed = 0;
> + u32 pci_bw = 0;
>   
> - static bool hw_lro_heuristic(u32 link_speed, u32 pci_bw)
> - {
> - return !(link_speed && pci_bw &&
> -  (pci_bw <= 16000) && (pci_bw < link_speed));
> + mlx5e_get_max_linkspeed(mdev, _speed);
>  -mlx5e_get_pci_bw(mdev, _bw);
> ++pci_bw = pcie_bandwidth_available(mdev->pdev, NULL, NULL, NULL);
> + mlx5_core_dbg_once(mdev, "Max link speed = %d, PCI BW = %d\n",
> +link_speed, pci_bw);
> + 
> + #define MLX5E_SLOW_PCI_RATIO (2)
> + 
> + return link_speed && pci_bw &&
> + link_speed > MLX5E_SLOW_PCI_RATIO * pci_bw;
>   }
>   
>   void mlx5e_set_tx_cq_mode_params(struct mlx5e_params *params, u8 
> cq_period_mode)

This is now a conflict between the pci tree and Linus' tree.

-- 
Cheers,
Stephen Rothwell


pgpraz7Cqf_sV.pgp
Description: OpenPGP digital signature


Re: [PATCH] soc: bcm: raspberrypi-power: Fix use of __packed

2018-04-04 Thread Sasha Levin
Hi Florian Fainelli.

[This is an automated email]

This commit has been processed because it contains a "Fixes:" tag.
fixing commit: a09cd356586d ARM: bcm2835: add rpi power domain driver.

The bot has also determined it's probably a bug fixing patch. (score: 35.5765)

The bot has tested the following trees: v4.15.15, v4.14.32, v4.9.92, 

v4.15.15: Build OK!
v4.14.32: Build OK!
v4.9.92: Build OK!

--
Thanks.
Sasha

Re: [PATCH] soc: bcm: raspberrypi-power: Fix use of __packed

2018-04-04 Thread Sasha Levin
Hi Florian Fainelli.

[This is an automated email]

This commit has been processed because it contains a "Fixes:" tag.
fixing commit: a09cd356586d ARM: bcm2835: add rpi power domain driver.

The bot has also determined it's probably a bug fixing patch. (score: 35.5765)

The bot has tested the following trees: v4.15.15, v4.14.32, v4.9.92, 

v4.15.15: Build OK!
v4.14.32: Build OK!
v4.9.92: Build OK!

--
Thanks.
Sasha

Re: [PATCH] resource: Fix integer overflow at reallocation

2018-04-04 Thread Sasha Levin
Hi Takashi Iwai.

[This is an automated email]

This commit has been processed because it contains a "Fixes:" tag.
fixing commit: 23c570a67448 resource: ability to resize an allocated resource.

The bot has also determined it's probably a bug fixing patch. (score: 99.2157)

The bot has tested the following trees: v4.15.15, v4.14.32, v4.9.92, v4.4.126, 

v4.15.15: Build OK!
v4.14.32: Build OK!
v4.9.92: Build OK!
v4.4.126: Build OK!

--
Thanks.
Sasha

Re: [PATCH] resource: Fix integer overflow at reallocation

2018-04-04 Thread Sasha Levin
Hi Takashi Iwai.

[This is an automated email]

This commit has been processed because it contains a "Fixes:" tag.
fixing commit: 23c570a67448 resource: ability to resize an allocated resource.

The bot has also determined it's probably a bug fixing patch. (score: 99.2157)

The bot has tested the following trees: v4.15.15, v4.14.32, v4.9.92, v4.4.126, 

v4.15.15: Build OK!
v4.14.32: Build OK!
v4.9.92: Build OK!
v4.4.126: Build OK!

--
Thanks.
Sasha

Re: [PATCH net] netns: filter uevents correctly

2018-04-04 Thread Christian Brauner
On Wed, Apr 04, 2018 at 05:38:02PM -0500, Eric W. Biederman wrote:
> Christian Brauner  writes:
> 
> > On Wed, Apr 04, 2018 at 09:48:57PM +0200, Christian Brauner wrote:
> >> commit 07e98962fa77 ("kobject: Send hotplug events in all network 
> >> namespaces")
> >> 
> >> enabled sending hotplug events into all network namespaces back in 2010.
> >> Over time the set of uevents that get sent into all network namespaces has
> >> shrunk. We have now reached the point where hotplug events for all devices
> >> that carry a namespace tag are filtered according to that namespace.
> >> 
> >> Specifically, they are filtered whenever the namespace tag of the kobject
> >> does not match the namespace tag of the netlink socket. One example are
> >> network devices. Uevents for network devices only show up in the network
> >> namespaces these devices are moved to or created in.
> >> 
> >> However, any uevent for a kobject that does not have a namespace tag
> >> associated with it will not be filtered and we will *try* to broadcast it
> >> into all network namespaces.
> >> 
> >> The original patchset was written in 2010 before user namespaces were a
> >> thing. With the introduction of user namespaces sending out uevents became
> >> partially isolated as they were filtered by user namespaces:
> >> 
> >> net/netlink/af_netlink.c:do_one_broadcast()
> >> 
> >> if (!net_eq(sock_net(sk), p->net)) {
> >> if (!(nlk->flags & NETLINK_F_LISTEN_ALL_NSID))
> >> return;
> >> 
> >> if (!peernet_has_id(sock_net(sk), p->net))
> >> return;
> >> 
> >> if (!file_ns_capable(sk->sk_socket->file, p->net->user_ns,
> >>  CAP_NET_BROADCAST))
> >> j   return;
> >> }
> >> 
> >> The file_ns_capable() check will check whether the caller had
> >> CAP_NET_BROADCAST at the time of opening the netlink socket in the user
> >> namespace of interest. This check is fine in general but seems insufficient
> >> to me when paired with uevents. The reason is that devices always belong to
> >> the initial user namespace so uevents for kobjects that do not carry a
> >> namespace tag should never be sent into another user namespace. This has
> >> been the intention all along. But there's one case where this breaks,
> >> namely if a new user namespace is created by root on the host and an
> >> identity mapping is established between root on the host and root in the
> >> new user namespace. Here's a reproducer:
> >> 
> >>  sudo unshare -U --map-root
> >>  udevadm monitor -k
> >>  # Now change to initial user namespace and e.g. do
> >>  modprobe kvm
> >>  # or
> >>  rmmod kvm
> >> 
> >> will allow the non-initial user namespace to retrieve all uevents from the
> >> host. This seems very anecdotal given that in the general case user
> >> namespaces do not see any uevents and also can't really do anything useful
> >> with them.
> >> 
> >> Additionally, it is now possible to send uevents from userspace. As such we
> >> can let a sufficiently privileged (CAP_SYS_ADMIN in the owning user
> >> namespace of the network namespace of the netlink socket) userspace process
> >> make a decision what uevents should be sent.
> >> 
> >> This makes me think that we should simply ensure that uevents for kobjects
> >> that do not carry a namespace tag are *always* filtered by user namespace
> >> in kobj_bcast_filter(). Specifically:
> >> - If the owning user namespace of the uevent socket is not init_user_ns the
> >>   event will always be filtered.
> >> - If the network namespace the uevent socket belongs to was created in the
> >>   initial user namespace but was opened from a non-initial user namespace
> >>   the event will be filtered as well.
> >> Put another way, uevents for kobjects not carrying a namespace tag are now
> >> always only sent to the initial user namespace. The regression potential
> >> for this is near to non-existent since user namespaces can't really do
> >> anything with interesting devices.
> >> 
> >> Signed-off-by: Christian Brauner 
> >
> > That was supposed to be [PATCH net] not [PATCH net-next] which is
> > obviously closed. Sorry about that.
> 
> This does not appear to be a fix.
> This looks like feature work.
> The motivation appears to be that looks wrong let's change it.

Hm, it looked like an oversight an therefore seems like a bug which is
why I thought would be a good candidate for net. Recent patches to the
semantics here just make it more obvious and provide a better argument
to fix it in the current release rather then defer it to the next one.
But I'm happy to leave this for net-next. I don't want to rush things if
this change in semantics is not trivial enough. For the record, I'm
merely fixing/expanding on the current status quo.

David, is it ok to queue this or would you prefer I resend when net-next
reopens?

> 
> So let's please leave this for when net-next opens again so we can
> 

Re: [PATCH net] netns: filter uevents correctly

2018-04-04 Thread Christian Brauner
On Wed, Apr 04, 2018 at 05:38:02PM -0500, Eric W. Biederman wrote:
> Christian Brauner  writes:
> 
> > On Wed, Apr 04, 2018 at 09:48:57PM +0200, Christian Brauner wrote:
> >> commit 07e98962fa77 ("kobject: Send hotplug events in all network 
> >> namespaces")
> >> 
> >> enabled sending hotplug events into all network namespaces back in 2010.
> >> Over time the set of uevents that get sent into all network namespaces has
> >> shrunk. We have now reached the point where hotplug events for all devices
> >> that carry a namespace tag are filtered according to that namespace.
> >> 
> >> Specifically, they are filtered whenever the namespace tag of the kobject
> >> does not match the namespace tag of the netlink socket. One example are
> >> network devices. Uevents for network devices only show up in the network
> >> namespaces these devices are moved to or created in.
> >> 
> >> However, any uevent for a kobject that does not have a namespace tag
> >> associated with it will not be filtered and we will *try* to broadcast it
> >> into all network namespaces.
> >> 
> >> The original patchset was written in 2010 before user namespaces were a
> >> thing. With the introduction of user namespaces sending out uevents became
> >> partially isolated as they were filtered by user namespaces:
> >> 
> >> net/netlink/af_netlink.c:do_one_broadcast()
> >> 
> >> if (!net_eq(sock_net(sk), p->net)) {
> >> if (!(nlk->flags & NETLINK_F_LISTEN_ALL_NSID))
> >> return;
> >> 
> >> if (!peernet_has_id(sock_net(sk), p->net))
> >> return;
> >> 
> >> if (!file_ns_capable(sk->sk_socket->file, p->net->user_ns,
> >>  CAP_NET_BROADCAST))
> >> j   return;
> >> }
> >> 
> >> The file_ns_capable() check will check whether the caller had
> >> CAP_NET_BROADCAST at the time of opening the netlink socket in the user
> >> namespace of interest. This check is fine in general but seems insufficient
> >> to me when paired with uevents. The reason is that devices always belong to
> >> the initial user namespace so uevents for kobjects that do not carry a
> >> namespace tag should never be sent into another user namespace. This has
> >> been the intention all along. But there's one case where this breaks,
> >> namely if a new user namespace is created by root on the host and an
> >> identity mapping is established between root on the host and root in the
> >> new user namespace. Here's a reproducer:
> >> 
> >>  sudo unshare -U --map-root
> >>  udevadm monitor -k
> >>  # Now change to initial user namespace and e.g. do
> >>  modprobe kvm
> >>  # or
> >>  rmmod kvm
> >> 
> >> will allow the non-initial user namespace to retrieve all uevents from the
> >> host. This seems very anecdotal given that in the general case user
> >> namespaces do not see any uevents and also can't really do anything useful
> >> with them.
> >> 
> >> Additionally, it is now possible to send uevents from userspace. As such we
> >> can let a sufficiently privileged (CAP_SYS_ADMIN in the owning user
> >> namespace of the network namespace of the netlink socket) userspace process
> >> make a decision what uevents should be sent.
> >> 
> >> This makes me think that we should simply ensure that uevents for kobjects
> >> that do not carry a namespace tag are *always* filtered by user namespace
> >> in kobj_bcast_filter(). Specifically:
> >> - If the owning user namespace of the uevent socket is not init_user_ns the
> >>   event will always be filtered.
> >> - If the network namespace the uevent socket belongs to was created in the
> >>   initial user namespace but was opened from a non-initial user namespace
> >>   the event will be filtered as well.
> >> Put another way, uevents for kobjects not carrying a namespace tag are now
> >> always only sent to the initial user namespace. The regression potential
> >> for this is near to non-existent since user namespaces can't really do
> >> anything with interesting devices.
> >> 
> >> Signed-off-by: Christian Brauner 
> >
> > That was supposed to be [PATCH net] not [PATCH net-next] which is
> > obviously closed. Sorry about that.
> 
> This does not appear to be a fix.
> This looks like feature work.
> The motivation appears to be that looks wrong let's change it.

Hm, it looked like an oversight an therefore seems like a bug which is
why I thought would be a good candidate for net. Recent patches to the
semantics here just make it more obvious and provide a better argument
to fix it in the current release rather then defer it to the next one.
But I'm happy to leave this for net-next. I don't want to rush things if
this change in semantics is not trivial enough. For the record, I'm
merely fixing/expanding on the current status quo.

David, is it ok to queue this or would you prefer I resend when net-next
reopens?

> 
> So let's please leave this for when net-next opens again so we can
> have time to fully consider a change in semantics.

Sure, if we 

Re: [PATCH] drm/vc4: Fix memory leak during BO teardown

2018-04-04 Thread Sasha Levin
Hi Daniel J Blueman.

[This is an automated email]

This commit has been processed because it contains a -stable tag.
The stable tag indicates that it's relevant for the following trees: all

The bot has also determined it's probably a bug fixing patch. (score: 85.0720)

The bot has tested the following trees: v4.15.15, v4.14.32, v4.9.92, v4.4.126, 

v4.15.15: Build OK!
v4.14.32: Build OK!
v4.9.92: Build OK!
v4.4.126: Failed to apply! Possible dependencies:
463873d57014: ("drm/vc4: Add an API for creating GPU shaders in GEM BOs.")
c826a6e10644: ("drm/vc4: Add a BO cache.")
d5bc60f6ad05: ("drm/vc4: Add create and map BO ioctls.")
c826a6e10644: ("drm/vc4: Add a BO cache.")


Please let us know if you'd like to have this patch included in a stable tree.

--
Thanks.
Sasha

Re: [PATCH] drm/vc4: Fix memory leak during BO teardown

2018-04-04 Thread Sasha Levin
Hi Daniel J Blueman.

[This is an automated email]

This commit has been processed because it contains a -stable tag.
The stable tag indicates that it's relevant for the following trees: all

The bot has also determined it's probably a bug fixing patch. (score: 85.0720)

The bot has tested the following trees: v4.15.15, v4.14.32, v4.9.92, v4.4.126, 

v4.15.15: Build OK!
v4.14.32: Build OK!
v4.9.92: Build OK!
v4.4.126: Failed to apply! Possible dependencies:
463873d57014: ("drm/vc4: Add an API for creating GPU shaders in GEM BOs.")
c826a6e10644: ("drm/vc4: Add a BO cache.")
d5bc60f6ad05: ("drm/vc4: Add create and map BO ioctls.")
c826a6e10644: ("drm/vc4: Add a BO cache.")


Please let us know if you'd like to have this patch included in a stable tree.

--
Thanks.
Sasha

Re: [PATCH] Input: synaptics-rmi4 - Fix an unchecked out of memory error path

2018-04-04 Thread Sasha Levin
Hi Christophe JAILLET.

[This is an automated email]

This commit has been processed by the -stable helper bot and determined
to be a high probability candidate for -stable trees. (score: 7.5278)

The bot has tested the following trees: v4.15.15, v4.14.32, v4.9.92, v4.4.126, 

v4.15.15: Build OK!
v4.14.32: Build OK!
v4.9.92: Build OK!
v4.4.126: Failed to apply! Possible dependencies:
8d99758dee31: ("Input: synaptics-rmi4 - add SPI transport driver")
ff8f83708b3e: ("Input: synaptics-rmi4 - add support for 2D sensors and F11")
fdf51604f104: ("Input: synaptics-rmi4 - add I2C transport driver")
2b6a321da9a2: ("Input: synaptics-rmi4 - add support for Synaptics RMI4 
devices")
2b6a321da9a2: ("Input: synaptics-rmi4 - add support for Synaptics RMI4 
devices")
fdf51604f104: ("Input: synaptics-rmi4 - add I2C transport driver")
2b6a321da9a2: ("Input: synaptics-rmi4 - add support for Synaptics RMI4 
devices")
2b6a321da9a2: ("Input: synaptics-rmi4 - add support for Synaptics RMI4 
devices")


Please let us know if you'd like to have this patch included in a stable tree.

--
Thanks.
Sasha

Re: [PATCH] Input: synaptics-rmi4 - Fix an unchecked out of memory error path

2018-04-04 Thread Sasha Levin
Hi Christophe JAILLET.

[This is an automated email]

This commit has been processed by the -stable helper bot and determined
to be a high probability candidate for -stable trees. (score: 7.5278)

The bot has tested the following trees: v4.15.15, v4.14.32, v4.9.92, v4.4.126, 

v4.15.15: Build OK!
v4.14.32: Build OK!
v4.9.92: Build OK!
v4.4.126: Failed to apply! Possible dependencies:
8d99758dee31: ("Input: synaptics-rmi4 - add SPI transport driver")
ff8f83708b3e: ("Input: synaptics-rmi4 - add support for 2D sensors and F11")
fdf51604f104: ("Input: synaptics-rmi4 - add I2C transport driver")
2b6a321da9a2: ("Input: synaptics-rmi4 - add support for Synaptics RMI4 
devices")
2b6a321da9a2: ("Input: synaptics-rmi4 - add support for Synaptics RMI4 
devices")
fdf51604f104: ("Input: synaptics-rmi4 - add I2C transport driver")
2b6a321da9a2: ("Input: synaptics-rmi4 - add support for Synaptics RMI4 
devices")
2b6a321da9a2: ("Input: synaptics-rmi4 - add support for Synaptics RMI4 
devices")


Please let us know if you'd like to have this patch included in a stable tree.

--
Thanks.
Sasha

Re: [PATCH 02/45] Fix exception_enter() return value

2018-04-04 Thread Sasha Levin
Hi David Howells.

[This is an automated email]

This commit has been processed by the -stable helper bot and determined
to be a high probability candidate for -stable trees. (score: 5.5190)

The bot has tested the following trees: v4.15.15, v4.14.32, v4.9.92, v4.4.126, 

v4.15.15: Build OK!
v4.14.32: Build OK!
v4.9.92: Build OK!
v4.4.126: Failed to apply! Possible dependencies:
2e9d1e150abf: ("x86/entry: Avoid interrupt flag save and restore")


Please let us know if you'd like to have this patch included in a stable tree.

--
Thanks.
Sasha

Re: [PATCH 02/45] Fix exception_enter() return value

2018-04-04 Thread Sasha Levin
Hi David Howells.

[This is an automated email]

This commit has been processed by the -stable helper bot and determined
to be a high probability candidate for -stable trees. (score: 5.5190)

The bot has tested the following trees: v4.15.15, v4.14.32, v4.9.92, v4.4.126, 

v4.15.15: Build OK!
v4.14.32: Build OK!
v4.9.92: Build OK!
v4.4.126: Failed to apply! Possible dependencies:
2e9d1e150abf: ("x86/entry: Avoid interrupt flag save and restore")


Please let us know if you'd like to have this patch included in a stable tree.

--
Thanks.
Sasha

Re: [GIT PULL] Kernel lockdown for secure boot

2018-04-04 Thread Peter Dolding
On Thu, Apr 5, 2018 at 2:26 AM, Matthew Garrett  wrote:
> On Tue, Apr 3, 2018 at 11:56 PM Peter Dolding  wrote:
>> On Wed, Apr 4, 2018 at 11:13 AM, Matthew Garrett  wrote:
>
>> > There are four cases:
>> >
>> > Verified Boot off, lockdown off: Status quo in distro and mainline
> kernels
>> > Verified Boot off, lockdown on: Perception of security improvement
> that's
>> > trivially circumvented (and so bad)
>> > Verified Boot on, lockdown off: Perception of security improvement
> that's
>> > trivially circumvented (and so bad), status quo in mainline kernels
>> > Verified Boot on, lockdown on: Security improvement, status quo in
> distro
>> > kernels
>> >
>> > Of these four options, only two make sense. The most common
> implementation
>> > of Verified Boot on x86 platforms is UEFI Secure Boot,
>
>> Stop right there.   Verified boot does not have to be UEFI secureboot.
>>You could be using a uboot verified boot or
>> https://www.coreboot.org/git-docs/Intel/vboot.html  google vboot.
>> Neither of these provide flags to kernel to say they have been
>> performed.
>
> They can be modified to set the appropriate bit in the bootparams - the
> reason we can't do that in the UEFI case is that Linux can be built as a
> UEFI binary that the firmware execute directly, and so the firmware has no
> way to set that flag.
>
With some of your embedded hardware boot loaders you have exactly the
same problem.   Where you cannot set bootparams instead have to hard
set everything in the kernel image.  This is why there is a option to
embedded initramfs image inside kernel image because some of them will
only load 1 file.

So not using UEFI  you run into the exact same problem.   So lockdown
on or off need to be a kernel build option setting default.   This
could be 3 options Always on, Always off and "Automatic based on boot
verification system status".

https://linux.die.net/man/8/efibootmgr

Also I have a problem here in non broken UEFI implementations -@ |
--append-binary-args that is very simple set the command line passed
into UEFI binary loaded by the firmware with the Linux kernel this
comes bootparams.   Yes using --append-binary-args can be a pain it is
used to tell the Linux kernel where to find the / drive.   So turning
lockdown off by bootparams is down right possible with working UEFI.
There is a lot of EFI out there that does not work properly.

>> Now Verified Boot on, lockdown off.   Insanely this can be required in
>> diagnostic on some embedded platform because EFI secureboot does not
>> have a off switch.These are platforms where they don't boot if
>> they don't have a PK and KEK set installed.  Yes some of these is jtag
>> the PK and KEK set in.
>
>> The fact that this Verified Boot on, lockdown off causes trouble
>> points to a clear problem.   User owns the hardware they should have
>> the right to defeat secureboot if they wish to.
>
> Which is why Shim allows you to disable validation if you prove physical
> user presence.

Good idea until you have a motherboard where the PS2 ports have failed
and does not support usb keyboard so you have no keyboard until after
the kernel has booted so no way to prove physical presence.   Or are
working on something embedded that has no physical user presence
interface in the boot stages these embedded devices can also be UEFI
with secureboot.  Not everything running UEFI has keyboard,
screenanything that you can prove physical user presence with
sometimes you have to pure depend on the signing key.

If I am a person who has made my own PK and has my own KEK in UEFI
system I should have the right to sign kernel with lockdown off by
default.   I may need this for diagnostics on hardware without user
interface and I may need this because the hardware is broken and I
have set PK and KEK set by direct firmware  flash access possibly by
jtag or possibly before critical port on motherboard died.

Of course I am not saying that Microsoft and others cannot have rules
that say if using their KEK that you cannot do this.   But if the
machine is my hardware and I have set my own PK and KEK set I do know
what I am doing and I should be allowed to compromise security if I
wish its my hardware.   I should not have to custom hack to do it.
Of course I am not saying that the setting in Linux kernel
configuration system cannot have a big warning that you should not do
this unless you have no other valid option and I am not saying that
the kernel should not log/report if it see what appears to be a
questionable configuration like dmesg "SECURITY ISSUE:  UEFI
secureboot detected enabled kernel built with lockdown disabled system
at risk of comprise".  Something audit tools could check logs for. .

So a kernel booting secureboot with lockdown disabled in kernel
configuration is perfectly fine to log a message that this is the
case.   Always forcing lockdown on because you see UEFI secureboot
will cause issues.

Broken 

Re: [GIT PULL] Kernel lockdown for secure boot

2018-04-04 Thread Peter Dolding
On Thu, Apr 5, 2018 at 2:26 AM, Matthew Garrett  wrote:
> On Tue, Apr 3, 2018 at 11:56 PM Peter Dolding  wrote:
>> On Wed, Apr 4, 2018 at 11:13 AM, Matthew Garrett  wrote:
>
>> > There are four cases:
>> >
>> > Verified Boot off, lockdown off: Status quo in distro and mainline
> kernels
>> > Verified Boot off, lockdown on: Perception of security improvement
> that's
>> > trivially circumvented (and so bad)
>> > Verified Boot on, lockdown off: Perception of security improvement
> that's
>> > trivially circumvented (and so bad), status quo in mainline kernels
>> > Verified Boot on, lockdown on: Security improvement, status quo in
> distro
>> > kernels
>> >
>> > Of these four options, only two make sense. The most common
> implementation
>> > of Verified Boot on x86 platforms is UEFI Secure Boot,
>
>> Stop right there.   Verified boot does not have to be UEFI secureboot.
>>You could be using a uboot verified boot or
>> https://www.coreboot.org/git-docs/Intel/vboot.html  google vboot.
>> Neither of these provide flags to kernel to say they have been
>> performed.
>
> They can be modified to set the appropriate bit in the bootparams - the
> reason we can't do that in the UEFI case is that Linux can be built as a
> UEFI binary that the firmware execute directly, and so the firmware has no
> way to set that flag.
>
With some of your embedded hardware boot loaders you have exactly the
same problem.   Where you cannot set bootparams instead have to hard
set everything in the kernel image.  This is why there is a option to
embedded initramfs image inside kernel image because some of them will
only load 1 file.

So not using UEFI  you run into the exact same problem.   So lockdown
on or off need to be a kernel build option setting default.   This
could be 3 options Always on, Always off and "Automatic based on boot
verification system status".

https://linux.die.net/man/8/efibootmgr

Also I have a problem here in non broken UEFI implementations -@ |
--append-binary-args that is very simple set the command line passed
into UEFI binary loaded by the firmware with the Linux kernel this
comes bootparams.   Yes using --append-binary-args can be a pain it is
used to tell the Linux kernel where to find the / drive.   So turning
lockdown off by bootparams is down right possible with working UEFI.
There is a lot of EFI out there that does not work properly.

>> Now Verified Boot on, lockdown off.   Insanely this can be required in
>> diagnostic on some embedded platform because EFI secureboot does not
>> have a off switch.These are platforms where they don't boot if
>> they don't have a PK and KEK set installed.  Yes some of these is jtag
>> the PK and KEK set in.
>
>> The fact that this Verified Boot on, lockdown off causes trouble
>> points to a clear problem.   User owns the hardware they should have
>> the right to defeat secureboot if they wish to.
>
> Which is why Shim allows you to disable validation if you prove physical
> user presence.

Good idea until you have a motherboard where the PS2 ports have failed
and does not support usb keyboard so you have no keyboard until after
the kernel has booted so no way to prove physical presence.   Or are
working on something embedded that has no physical user presence
interface in the boot stages these embedded devices can also be UEFI
with secureboot.  Not everything running UEFI has keyboard,
screenanything that you can prove physical user presence with
sometimes you have to pure depend on the signing key.

If I am a person who has made my own PK and has my own KEK in UEFI
system I should have the right to sign kernel with lockdown off by
default.   I may need this for diagnostics on hardware without user
interface and I may need this because the hardware is broken and I
have set PK and KEK set by direct firmware  flash access possibly by
jtag or possibly before critical port on motherboard died.

Of course I am not saying that Microsoft and others cannot have rules
that say if using their KEK that you cannot do this.   But if the
machine is my hardware and I have set my own PK and KEK set I do know
what I am doing and I should be allowed to compromise security if I
wish its my hardware.   I should not have to custom hack to do it.
Of course I am not saying that the setting in Linux kernel
configuration system cannot have a big warning that you should not do
this unless you have no other valid option and I am not saying that
the kernel should not log/report if it see what appears to be a
questionable configuration like dmesg "SECURITY ISSUE:  UEFI
secureboot detected enabled kernel built with lockdown disabled system
at risk of comprise".  Something audit tools could check logs for. .

So a kernel booting secureboot with lockdown disabled in kernel
configuration is perfectly fine to log a message that this is the
case.   Always forcing lockdown on because you see UEFI secureboot
will cause issues.

Broken hardware to get around a failed motherboard with UEFI

Re: [PATCH] ARM64: dts: meson-axg: enable the eMMC controller

2018-04-04 Thread Yixun Lan
Hi Kevin

On 04/04/2018 02:26 AM, kbuild test robot wrote:
> Hi Nan,
> 
> Thank you for the patch! Yet something to improve:
> 
> [auto build test ERROR on next-20180403]
> [cannot apply to robh/for-next v4.16 v4.16-rc7 v4.16-rc6 v4.16]
> [if your patch is applied to the wrong git tree, please drop us a note to 
> help improve the system]
> 
> url:
> https://github.com/0day-ci/linux/commits/Yixun-Lan/ARM64-dts-meson-axg-enable-the-eMMC-controller/20180403-224314
> config: arm64-allyesconfig (attached as .config)
> compiler: aarch64-linux-gnu-gcc (Debian 7.2.0-11) 7.2.0
> reproduce:
> wget 
> https://raw.githubusercontent.com/intel/lkp-tests/master/sbin/make.cross -O 
> ~/bin/make.cross
> chmod +x ~/bin/make.cross
> # save the attached .config to linux build tree
> make.cross ARCH=arm64 
> 
> All errors (new ones prefixed by >>):
> 
>>> Error: arch/arm64/boot/dts/amlogic/meson-axg-s400.dts:49.24-25 syntax error
>FATAL ERROR: Unable to parse input tree
> 
> ---
> 0-DAY kernel test infrastructureOpen Source Technology Center
> https://lists.01.org/pipermail/kbuild-all   Intel Corporation
> 

oops, need to add this header (I fail to do a last check when rebase to
the dt64 branch)

+#include 

will send a patch v2

Yixun



Re: [PATCH] ARM64: dts: meson-axg: enable the eMMC controller

2018-04-04 Thread Yixun Lan
Hi Kevin

On 04/04/2018 02:26 AM, kbuild test robot wrote:
> Hi Nan,
> 
> Thank you for the patch! Yet something to improve:
> 
> [auto build test ERROR on next-20180403]
> [cannot apply to robh/for-next v4.16 v4.16-rc7 v4.16-rc6 v4.16]
> [if your patch is applied to the wrong git tree, please drop us a note to 
> help improve the system]
> 
> url:
> https://github.com/0day-ci/linux/commits/Yixun-Lan/ARM64-dts-meson-axg-enable-the-eMMC-controller/20180403-224314
> config: arm64-allyesconfig (attached as .config)
> compiler: aarch64-linux-gnu-gcc (Debian 7.2.0-11) 7.2.0
> reproduce:
> wget 
> https://raw.githubusercontent.com/intel/lkp-tests/master/sbin/make.cross -O 
> ~/bin/make.cross
> chmod +x ~/bin/make.cross
> # save the attached .config to linux build tree
> make.cross ARCH=arm64 
> 
> All errors (new ones prefixed by >>):
> 
>>> Error: arch/arm64/boot/dts/amlogic/meson-axg-s400.dts:49.24-25 syntax error
>FATAL ERROR: Unable to parse input tree
> 
> ---
> 0-DAY kernel test infrastructureOpen Source Technology Center
> https://lists.01.org/pipermail/kbuild-all   Intel Corporation
> 

oops, need to add this header (I fail to do a last check when rebase to
the dt64 branch)

+#include 

will send a patch v2

Yixun



  1   2   3   4   5   6   7   8   9   10   >