Re: perf: fuzzer KASAN unwind_get_return_address

2016-11-15 Thread Vince Weaver
On Tue, 15 Nov 2016, Peter Zijlstra wrote:

> On Tue, Nov 15, 2016 at 12:43:56PM -0500, Vince Weaver wrote:
> > 
> > Running on my haswell machine with the imc/uncore patch applied, the 
> > perf_fuzzer next tripped over this issue.
> > 
> > [  202.034495] BAD LUCK: lost 371 message(s) from NMI context!
> > [  202.034496] 
> > ==
> > [  202.048327] BUG: KASAN: stack-out-of-bounds in 
> > unwind_get_return_address+0x35/0x80 at addr 8800cff0bd90
> > [  202.058826] Read of size 8 by task perf_fuzzer/16254
> > [  202.064186] page:ea00033fc2c0 count:1 mapcount:0 mapping:  
> > (null) index:0x0^Ac
> > [  202.073068] flags: 0x1800400(reserved)
> > [  202.077885] page dumped because: kasan: bad access detected
> > [  202.083880] CPU: 4 PID: 16254 Comm: perf_fuzzer Not tainted 4.9.0-rc5+ #5
> > [  202.091204] Hardware name: LENOVO 10AM000AUS/SHARKBAY, BIOS FBKT72AUS 
> > 01/26/2014
> > [  202.099181]  8800cff0b1d8^Ac 816bb796^Ac 8800cff0b270^Ac 
> > 8800cff0bd90^Ac
> > [  202.107896]  8800cff0b260^Ac 812fbe95^Ac 7ffc9d1ab480^Ac 
> > ^Ac
> > [  202.116638]  8125117d^Ac 0092^Ac ^Ac 
> > 8800cff0b7c0^Ac
> > [  202.125339] Call Trace:
> > [  202.127994][] dump_stack+0x63/0x8d
> > [  202.134184]  [] kasan_report_error+0x495/0x4c0
> > [  202.140680]  [] ? perf_output_begin+0x28d/0x4c0
> > [  202.147228]  [] kasan_report+0x39/0x40
> > [  202.152987]  [] ? unwind_get_return_address+0x35/0x80
> > [  202.160094]  [] __asan_load8+0x5e/0x70
> > [  202.165859]  [] unwind_get_return_address+0x35/0x80
> 
> Josh, any ideas?

>From what I can tell this maps to:

unsigned long unwind_get_return_address(struct unwind_state *state)
{
unsigned long addr;
unsigned long *addr_p = unwind_get_return_address_ptr(state);

if (unwind_done(state))
return 0;

>>addr = ftrace_graph_ret_addr(state->task, >graph_idx, *addr_p,
 addr_p);

return __kernel_text_address(addr) ? addr : 0;
}




Re: perf: fuzzer KASAN unwind_get_return_address

2016-11-15 Thread Vince Weaver
On Tue, 15 Nov 2016, Peter Zijlstra wrote:

> On Tue, Nov 15, 2016 at 12:43:56PM -0500, Vince Weaver wrote:
> > 
> > Running on my haswell machine with the imc/uncore patch applied, the 
> > perf_fuzzer next tripped over this issue.
> > 
> > [  202.034495] BAD LUCK: lost 371 message(s) from NMI context!
> > [  202.034496] 
> > ==
> > [  202.048327] BUG: KASAN: stack-out-of-bounds in 
> > unwind_get_return_address+0x35/0x80 at addr 8800cff0bd90
> > [  202.058826] Read of size 8 by task perf_fuzzer/16254
> > [  202.064186] page:ea00033fc2c0 count:1 mapcount:0 mapping:  
> > (null) index:0x0^Ac
> > [  202.073068] flags: 0x1800400(reserved)
> > [  202.077885] page dumped because: kasan: bad access detected
> > [  202.083880] CPU: 4 PID: 16254 Comm: perf_fuzzer Not tainted 4.9.0-rc5+ #5
> > [  202.091204] Hardware name: LENOVO 10AM000AUS/SHARKBAY, BIOS FBKT72AUS 
> > 01/26/2014
> > [  202.099181]  8800cff0b1d8^Ac 816bb796^Ac 8800cff0b270^Ac 
> > 8800cff0bd90^Ac
> > [  202.107896]  8800cff0b260^Ac 812fbe95^Ac 7ffc9d1ab480^Ac 
> > ^Ac
> > [  202.116638]  8125117d^Ac 0092^Ac ^Ac 
> > 8800cff0b7c0^Ac
> > [  202.125339] Call Trace:
> > [  202.127994][] dump_stack+0x63/0x8d
> > [  202.134184]  [] kasan_report_error+0x495/0x4c0
> > [  202.140680]  [] ? perf_output_begin+0x28d/0x4c0
> > [  202.147228]  [] kasan_report+0x39/0x40
> > [  202.152987]  [] ? unwind_get_return_address+0x35/0x80
> > [  202.160094]  [] __asan_load8+0x5e/0x70
> > [  202.165859]  [] unwind_get_return_address+0x35/0x80
> 
> Josh, any ideas?

>From what I can tell this maps to:

unsigned long unwind_get_return_address(struct unwind_state *state)
{
unsigned long addr;
unsigned long *addr_p = unwind_get_return_address_ptr(state);

if (unwind_done(state))
return 0;

>>addr = ftrace_graph_ret_addr(state->task, >graph_idx, *addr_p,
 addr_p);

return __kernel_text_address(addr) ? addr : 0;
}




Re: [PATCH] staging: slicoss: fix different address space warnings

2016-11-15 Thread kbuild test robot
Hi Sergio,

[auto build test ERROR on staging/staging-testing]
[also build test ERROR on v4.9-rc5 next-20161115]
[if your patch is applied to the wrong git tree, please drop us a note to help 
improve the system]

url:
https://github.com/0day-ci/linux/commits/Sergio-Paracuellos/staging-slicoss-fix-different-address-space-warnings/20161116-025006
config: i386-randconfig-x003-201646 (attached as .config)
compiler: gcc-6 (Debian 6.2.0-3) 6.2.0 20160901
reproduce:
# save the attached .config to linux build tree
make ARCH=i386 

All errors (new ones prefixed by >>):

   In file included from drivers/staging/slicoss/slicoss.c:97:0:
   drivers/staging/slicoss/slicoss.c: In function 'slic_upr_request_complete':
>> drivers/staging/slicoss/slicoss.c:1019:5: error: implicit declaration of 
>> function 'ioread64' [-Werror=implicit-function-declaration]
ioread64(>xmit_tcp_segs),
^
   drivers/staging/slicoss/slic.h:560:19: note: in definition of macro 
'UPDATE_STATS_GB'
 (largestat) += ((newstat) - (oldstat));  \
  ^~~
   cc1: some warnings being treated as errors

vim +/ioread64 +1019 drivers/staging/slicoss/slicoss.c

  1013  dev_err(>netdev->dev,
  1014  "SLIC_UPR_STATS command failed 
isr[%x]\n", isr);
  1015  break;
  1016  }
  1017  
  1018  UPDATE_STATS_GB(stst->tcp.xmit_tcp_segs,
> 1019  ioread64(>xmit_tcp_segs),
  1020  old->xmit_tcp_segs);
  1021  
  1022  UPDATE_STATS_GB(stst->tcp.xmit_tcp_bytes,

---
0-DAY kernel test infrastructureOpen Source Technology Center
https://lists.01.org/pipermail/kbuild-all   Intel Corporation


.config.gz
Description: application/gzip


Re: [PATCH] staging: slicoss: fix different address space warnings

2016-11-15 Thread kbuild test robot
Hi Sergio,

[auto build test ERROR on staging/staging-testing]
[also build test ERROR on v4.9-rc5 next-20161115]
[if your patch is applied to the wrong git tree, please drop us a note to help 
improve the system]

url:
https://github.com/0day-ci/linux/commits/Sergio-Paracuellos/staging-slicoss-fix-different-address-space-warnings/20161116-025006
config: i386-randconfig-x003-201646 (attached as .config)
compiler: gcc-6 (Debian 6.2.0-3) 6.2.0 20160901
reproduce:
# save the attached .config to linux build tree
make ARCH=i386 

All errors (new ones prefixed by >>):

   In file included from drivers/staging/slicoss/slicoss.c:97:0:
   drivers/staging/slicoss/slicoss.c: In function 'slic_upr_request_complete':
>> drivers/staging/slicoss/slicoss.c:1019:5: error: implicit declaration of 
>> function 'ioread64' [-Werror=implicit-function-declaration]
ioread64(>xmit_tcp_segs),
^
   drivers/staging/slicoss/slic.h:560:19: note: in definition of macro 
'UPDATE_STATS_GB'
 (largestat) += ((newstat) - (oldstat));  \
  ^~~
   cc1: some warnings being treated as errors

vim +/ioread64 +1019 drivers/staging/slicoss/slicoss.c

  1013  dev_err(>netdev->dev,
  1014  "SLIC_UPR_STATS command failed 
isr[%x]\n", isr);
  1015  break;
  1016  }
  1017  
  1018  UPDATE_STATS_GB(stst->tcp.xmit_tcp_segs,
> 1019  ioread64(>xmit_tcp_segs),
  1020  old->xmit_tcp_segs);
  1021  
  1022  UPDATE_STATS_GB(stst->tcp.xmit_tcp_bytes,

---
0-DAY kernel test infrastructureOpen Source Technology Center
https://lists.01.org/pipermail/kbuild-all   Intel Corporation


.config.gz
Description: application/gzip


Re: perf: fuzzer KASAN unwind_get_return_address

2016-11-15 Thread Dmitry Vyukov
On Tue, Nov 15, 2016 at 10:57 AM, Peter Zijlstra  wrote:
> On Tue, Nov 15, 2016 at 12:43:56PM -0500, Vince Weaver wrote:
>>
>> Running on my haswell machine with the imc/uncore patch applied, the
>> perf_fuzzer next tripped over this issue.
>>
>> [  202.034495] BAD LUCK: lost 371 message(s) from NMI context!
>> [  202.034496] 
>> ==
>> [  202.048327] BUG: KASAN: stack-out-of-bounds in 
>> unwind_get_return_address+0x35/0x80 at addr 8800cff0bd90
>> [  202.058826] Read of size 8 by task perf_fuzzer/16254
>> [  202.064186] page:ea00033fc2c0 count:1 mapcount:0 mapping:  
>> (null) index:0x0^Ac
>> [  202.073068] flags: 0x1800400(reserved)
>> [  202.077885] page dumped because: kasan: bad access detected
>> [  202.083880] CPU: 4 PID: 16254 Comm: perf_fuzzer Not tainted 4.9.0-rc5+ #5
>> [  202.091204] Hardware name: LENOVO 10AM000AUS/SHARKBAY, BIOS FBKT72AUS 
>> 01/26/2014
>> [  202.099181]  8800cff0b1d8^Ac 816bb796^Ac 8800cff0b270^Ac 
>> 8800cff0bd90^Ac
>> [  202.107896]  8800cff0b260^Ac 812fbe95^Ac 7ffc9d1ab480^Ac 
>> ^Ac
>> [  202.116638]  8125117d^Ac 0092^Ac ^Ac 
>> 8800cff0b7c0^Ac
>> [  202.125339] Call Trace:
>> [  202.127994][] dump_stack+0x63/0x8d
>> [  202.134184]  [] kasan_report_error+0x495/0x4c0
>> [  202.140680]  [] ? perf_output_begin+0x28d/0x4c0
>> [  202.147228]  [] kasan_report+0x39/0x40
>> [  202.152987]  [] ? unwind_get_return_address+0x35/0x80
>> [  202.160094]  [] __asan_load8+0x5e/0x70
>> [  202.165859]  [] unwind_get_return_address+0x35/0x80
>
> Josh, any ideas?

I think this is a false positive due to imprecise unwind that hits a
stack redzone.
We probably need to use READ_ONCE_NOCHECK as in get_wchan.


Re: perf: fuzzer KASAN unwind_get_return_address

2016-11-15 Thread Dmitry Vyukov
On Tue, Nov 15, 2016 at 10:57 AM, Peter Zijlstra  wrote:
> On Tue, Nov 15, 2016 at 12:43:56PM -0500, Vince Weaver wrote:
>>
>> Running on my haswell machine with the imc/uncore patch applied, the
>> perf_fuzzer next tripped over this issue.
>>
>> [  202.034495] BAD LUCK: lost 371 message(s) from NMI context!
>> [  202.034496] 
>> ==
>> [  202.048327] BUG: KASAN: stack-out-of-bounds in 
>> unwind_get_return_address+0x35/0x80 at addr 8800cff0bd90
>> [  202.058826] Read of size 8 by task perf_fuzzer/16254
>> [  202.064186] page:ea00033fc2c0 count:1 mapcount:0 mapping:  
>> (null) index:0x0^Ac
>> [  202.073068] flags: 0x1800400(reserved)
>> [  202.077885] page dumped because: kasan: bad access detected
>> [  202.083880] CPU: 4 PID: 16254 Comm: perf_fuzzer Not tainted 4.9.0-rc5+ #5
>> [  202.091204] Hardware name: LENOVO 10AM000AUS/SHARKBAY, BIOS FBKT72AUS 
>> 01/26/2014
>> [  202.099181]  8800cff0b1d8^Ac 816bb796^Ac 8800cff0b270^Ac 
>> 8800cff0bd90^Ac
>> [  202.107896]  8800cff0b260^Ac 812fbe95^Ac 7ffc9d1ab480^Ac 
>> ^Ac
>> [  202.116638]  8125117d^Ac 0092^Ac ^Ac 
>> 8800cff0b7c0^Ac
>> [  202.125339] Call Trace:
>> [  202.127994][] dump_stack+0x63/0x8d
>> [  202.134184]  [] kasan_report_error+0x495/0x4c0
>> [  202.140680]  [] ? perf_output_begin+0x28d/0x4c0
>> [  202.147228]  [] kasan_report+0x39/0x40
>> [  202.152987]  [] ? unwind_get_return_address+0x35/0x80
>> [  202.160094]  [] __asan_load8+0x5e/0x70
>> [  202.165859]  [] unwind_get_return_address+0x35/0x80
>
> Josh, any ideas?

I think this is a false positive due to imprecise unwind that hits a
stack redzone.
We probably need to use READ_ONCE_NOCHECK as in get_wchan.


Re: [PATCH 02/16] ARM: EXYNOS: use generic API to enable SCU

2016-11-15 Thread Krzysztof Kozlowski
On Mon, Nov 14, 2016 at 10:31:57AM +0530, Pankaj Dubey wrote:
> Now as we have of_scu_enable which takes care of mapping
> scu base from DT, lets use it.
> 
> This patch also fixes build failure in case !SMP caused
> by commit SHA ID: 94210b1abb2 which is already merged in
> krzk/for-next branch
> 
> CC: Krzysztof Kozlowski 
> CC: linux-samsung-...@vger.kernel.org
> Signed-off-by: Pankaj Dubey 
> ---
>  arch/arm/mach-exynos/common.h  |  1 -
>  arch/arm/mach-exynos/platsmp.c | 30 --
>  arch/arm/mach-exynos/pm.c  |  4 ++--
>  arch/arm/mach-exynos/suspend.c | 14 --
>  4 files changed, 10 insertions(+), 39 deletions(-)
> 

Looks correct, for reference:
Reviewed-by: Krzysztof Kozlowski 

However this depends on changes in my next/soc branch (and these changes
were the trigger for this patchset). I can either provide a tag with
Exynos commits or accept one with common SCU code.

Best regards,
Krzysztof


Re: [PATCH 02/16] ARM: EXYNOS: use generic API to enable SCU

2016-11-15 Thread Krzysztof Kozlowski
On Mon, Nov 14, 2016 at 10:31:57AM +0530, Pankaj Dubey wrote:
> Now as we have of_scu_enable which takes care of mapping
> scu base from DT, lets use it.
> 
> This patch also fixes build failure in case !SMP caused
> by commit SHA ID: 94210b1abb2 which is already merged in
> krzk/for-next branch
> 
> CC: Krzysztof Kozlowski 
> CC: linux-samsung-...@vger.kernel.org
> Signed-off-by: Pankaj Dubey 
> ---
>  arch/arm/mach-exynos/common.h  |  1 -
>  arch/arm/mach-exynos/platsmp.c | 30 --
>  arch/arm/mach-exynos/pm.c  |  4 ++--
>  arch/arm/mach-exynos/suspend.c | 14 --
>  4 files changed, 10 insertions(+), 39 deletions(-)
> 

Looks correct, for reference:
Reviewed-by: Krzysztof Kozlowski 

However this depends on changes in my next/soc branch (and these changes
were the trigger for this patchset). I can either provide a tag with
Exynos commits or accept one with common SCU code.

Best regards,
Krzysztof


Re: [PATCH] nvmem: qfprom: Fix to support single byte read/write

2016-11-15 Thread Stephen Boyd
On 11/15, Vivek Gautam wrote:
> @@ -53,7 +53,7 @@ static int qfprom_remove(struct platform_device *pdev)
>  static struct nvmem_config econfig = {
>   .name = "qfprom",
>   .owner = THIS_MODULE,
> - .stride = 4,
> + .stride = 1,

Are we certain that all qfproms support byte accesses?

>   .word_size = 1,
>   .reg_read = qfprom_reg_read,
>   .reg_write = qfprom_reg_write,

-- 
Qualcomm Innovation Center, Inc. is a member of Code Aurora Forum,
a Linux Foundation Collaborative Project


Re: [PATCH] nvmem: qfprom: Fix to support single byte read/write

2016-11-15 Thread Stephen Boyd
On 11/15, Vivek Gautam wrote:
> @@ -53,7 +53,7 @@ static int qfprom_remove(struct platform_device *pdev)
>  static struct nvmem_config econfig = {
>   .name = "qfprom",
>   .owner = THIS_MODULE,
> - .stride = 4,
> + .stride = 1,

Are we certain that all qfproms support byte accesses?

>   .word_size = 1,
>   .reg_read = qfprom_reg_read,
>   .reg_write = qfprom_reg_write,

-- 
Qualcomm Innovation Center, Inc. is a member of Code Aurora Forum,
a Linux Foundation Collaborative Project


Re: perf: fuzzer KASAN unwind_get_return_address

2016-11-15 Thread Peter Zijlstra
On Tue, Nov 15, 2016 at 12:43:56PM -0500, Vince Weaver wrote:
> 
> Running on my haswell machine with the imc/uncore patch applied, the 
> perf_fuzzer next tripped over this issue.
> 
> [  202.034495] BAD LUCK: lost 371 message(s) from NMI context!
> [  202.034496] 
> ==
> [  202.048327] BUG: KASAN: stack-out-of-bounds in 
> unwind_get_return_address+0x35/0x80 at addr 8800cff0bd90
> [  202.058826] Read of size 8 by task perf_fuzzer/16254
> [  202.064186] page:ea00033fc2c0 count:1 mapcount:0 mapping:  
> (null) index:0x0^Ac
> [  202.073068] flags: 0x1800400(reserved)
> [  202.077885] page dumped because: kasan: bad access detected
> [  202.083880] CPU: 4 PID: 16254 Comm: perf_fuzzer Not tainted 4.9.0-rc5+ #5
> [  202.091204] Hardware name: LENOVO 10AM000AUS/SHARKBAY, BIOS FBKT72AUS 
> 01/26/2014
> [  202.099181]  8800cff0b1d8^Ac 816bb796^Ac 8800cff0b270^Ac 
> 8800cff0bd90^Ac
> [  202.107896]  8800cff0b260^Ac 812fbe95^Ac 7ffc9d1ab480^Ac 
> ^Ac
> [  202.116638]  8125117d^Ac 0092^Ac ^Ac 
> 8800cff0b7c0^Ac
> [  202.125339] Call Trace:
> [  202.127994][] dump_stack+0x63/0x8d
> [  202.134184]  [] kasan_report_error+0x495/0x4c0
> [  202.140680]  [] ? perf_output_begin+0x28d/0x4c0
> [  202.147228]  [] kasan_report+0x39/0x40
> [  202.152987]  [] ? unwind_get_return_address+0x35/0x80
> [  202.160094]  [] __asan_load8+0x5e/0x70
> [  202.165859]  [] unwind_get_return_address+0x35/0x80

Josh, any ideas?

> [  202.172817]  [] perf_callchain_kernel+0x22d/0x270
> [  202.179590]  [] ? __asan_load4+0x24/0x80
> [  202.185548]  [] ? arch_perf_update_userpage+0x130/0x130
> [  202.192849]  [] get_perf_callchain+0x24a/0x3e0
> [  202.199339]  [] ? put_callchain_buffers+0x50/0x50
> [  202.206092]  [] ? perf_get_regs_user+0x327/0x380
> [  202.212751]  [] ? lock_release+0x30/0x540
> [  202.218803]  [] perf_callchain+0xc5/0xe0
> [  202.224767]  [] ? __asan_load4+0x24/0x80
> [  202.230696]  [] perf_prepare_sample+0x489/0x630
> [  202.237275]  [] ? lock_release+0x30/0x540
> [  202.243266]  [] ? perf_event_output_forward+0xfc/0x130
> [  202.250472]  [] ? perf_prepare_sample+0x630/0x630
> [  202.257251]  [] perf_event_output+0xae/0x130
> [  202.263564]  [] ? perf_event_output_backward+0x130/0x130
> [  202.270964]  [] ? perf_event_output_backward+0x130/0x130
> [  202.278373]  [] ? perf_event_update_userpage+0x212/0x2b0
> [  202.285772]  [] ? perf_event_task_disable+0xc0/0xc0
> [  202.292744]  [] ? __asan_loadN+0xf/0x20
> [  202.298581]  [] ? setup_pebs_sample_data+0x68d/0x830
> [  202.305622]  [] __intel_pmu_pebs_event+0x221/0x3a0
> [  202.312469]  [] ? lock_acquire+0x3d/0x190
> [  202.318523]  [] ? pebs_update_state+0x150/0x150
> [  202.325060]  [] ? get_stack_info+0x3c/0x150
> [  202.331259]  [] ? __intel_pmu_enable_all+0x77/0xf0
> [  202.338128]  [] ? __asan_load4+0x24/0x80
> [  202.344059]  [] ? intel_pmu_disable_bts+0x60/0x60
> [  202.350823]  [] ? __asan_load4+0x24/0x80
> [  202.356740]  [] ? perf_callchain+0xc5/0xe0
> [  202.362855]  [] ? lock_release+0x30/0x540
> [  202.368855]  [] ? perf_prepare_sample+0x4c1/0x630
> [  202.375619]  [] ? perf_event_output_forward+0xe4/0x130
> [  202.382849]  [] intel_pmu_drain_pebs_nhm+0x3ec/0x530
> [  202.389899]  [] ? __intel_pmu_pebs_event+0x3a0/0x3a0
> [  202.396959]  [] ? perf_event_update_userpage+0x1fa/0x2b0
> [  202.406800]  [] ? perf_event_update_userpage+0x212/0x2b0
> [  202.416486]  [] ? perf_event_task_disable+0xc0/0xc0
> [  202.425720]  [] ? intel_pmu_lbr_read+0x32/0x790
> [  202.434566]  [] ? __perf_event_overflow+0x116/0x280
> [  202.443735]  [] ? intel_bts_interrupt+0x88/0x1b0
> [  202.452538]  [] intel_pmu_handle_irq+0x3ae/0x690
> [  202.461407]  [] ? intel_pmu_save_and_restart+0x80/0x80
> [  202.470877]  [] ? lock_release+0x30/0x540
> [  202.479131]  [] ? native_apic_msr_write+0x2b/0x30
> [  202.488181]  [] ? x2apic_send_IPI_self+0x3c/0x50
> [  202.497066]  [] ? native_sched_clock+0x62/0x140
> [  202.505919]  [] perf_event_nmi_handler+0x2d/0x50
> [  202.514832]  [] nmi_handle+0xb1/0x1d0
> [  202.522697]  [] ? nmi_handle+0x5/0x1d0
> [  202.530610]  [] default_do_nmi+0xe5/0x140
> [  202.538765]  [] do_nmi+0x152/0x1b0
> [  202.546254]  [] end_repeat_nmi+0x1a/0x1e
> [  202.554257]  [] ? __intel_pmu_enable_all+0x77/0xf0
> [  202.563167]  [] ? perf_event_task_tick+0x48b/0x5f0
> [  202.572060]  [] ? perf_event_task_tick+0x48b/0x5f0
> [  202.580864]  [] ? perf_event_task_tick+0x48b/0x5f0
> [  202.589703]  [] scheduler_tick+0xb1/0x150
> [  202.598985]  [] update_process_times+0x47/0x60
> [  202.607433]  [] tick_sched_handle.isra.14+0x33/0x80
> [  202.616314]  [] tick_sched_timer+0x4b/0x90
> [  202.624322]  [] __hrtimer_run_queues+0x21e/0x540
> [  202.632864]  [] ? tick_sched_do_timer+0x50/0x50
> [  202.641337]  [] ? retrigger_next_event+0xa0/0xa0
> [  202.649947]  [] ? 

Re: perf: fuzzer KASAN unwind_get_return_address

2016-11-15 Thread Peter Zijlstra
On Tue, Nov 15, 2016 at 12:43:56PM -0500, Vince Weaver wrote:
> 
> Running on my haswell machine with the imc/uncore patch applied, the 
> perf_fuzzer next tripped over this issue.
> 
> [  202.034495] BAD LUCK: lost 371 message(s) from NMI context!
> [  202.034496] 
> ==
> [  202.048327] BUG: KASAN: stack-out-of-bounds in 
> unwind_get_return_address+0x35/0x80 at addr 8800cff0bd90
> [  202.058826] Read of size 8 by task perf_fuzzer/16254
> [  202.064186] page:ea00033fc2c0 count:1 mapcount:0 mapping:  
> (null) index:0x0^Ac
> [  202.073068] flags: 0x1800400(reserved)
> [  202.077885] page dumped because: kasan: bad access detected
> [  202.083880] CPU: 4 PID: 16254 Comm: perf_fuzzer Not tainted 4.9.0-rc5+ #5
> [  202.091204] Hardware name: LENOVO 10AM000AUS/SHARKBAY, BIOS FBKT72AUS 
> 01/26/2014
> [  202.099181]  8800cff0b1d8^Ac 816bb796^Ac 8800cff0b270^Ac 
> 8800cff0bd90^Ac
> [  202.107896]  8800cff0b260^Ac 812fbe95^Ac 7ffc9d1ab480^Ac 
> ^Ac
> [  202.116638]  8125117d^Ac 0092^Ac ^Ac 
> 8800cff0b7c0^Ac
> [  202.125339] Call Trace:
> [  202.127994][] dump_stack+0x63/0x8d
> [  202.134184]  [] kasan_report_error+0x495/0x4c0
> [  202.140680]  [] ? perf_output_begin+0x28d/0x4c0
> [  202.147228]  [] kasan_report+0x39/0x40
> [  202.152987]  [] ? unwind_get_return_address+0x35/0x80
> [  202.160094]  [] __asan_load8+0x5e/0x70
> [  202.165859]  [] unwind_get_return_address+0x35/0x80

Josh, any ideas?

> [  202.172817]  [] perf_callchain_kernel+0x22d/0x270
> [  202.179590]  [] ? __asan_load4+0x24/0x80
> [  202.185548]  [] ? arch_perf_update_userpage+0x130/0x130
> [  202.192849]  [] get_perf_callchain+0x24a/0x3e0
> [  202.199339]  [] ? put_callchain_buffers+0x50/0x50
> [  202.206092]  [] ? perf_get_regs_user+0x327/0x380
> [  202.212751]  [] ? lock_release+0x30/0x540
> [  202.218803]  [] perf_callchain+0xc5/0xe0
> [  202.224767]  [] ? __asan_load4+0x24/0x80
> [  202.230696]  [] perf_prepare_sample+0x489/0x630
> [  202.237275]  [] ? lock_release+0x30/0x540
> [  202.243266]  [] ? perf_event_output_forward+0xfc/0x130
> [  202.250472]  [] ? perf_prepare_sample+0x630/0x630
> [  202.257251]  [] perf_event_output+0xae/0x130
> [  202.263564]  [] ? perf_event_output_backward+0x130/0x130
> [  202.270964]  [] ? perf_event_output_backward+0x130/0x130
> [  202.278373]  [] ? perf_event_update_userpage+0x212/0x2b0
> [  202.285772]  [] ? perf_event_task_disable+0xc0/0xc0
> [  202.292744]  [] ? __asan_loadN+0xf/0x20
> [  202.298581]  [] ? setup_pebs_sample_data+0x68d/0x830
> [  202.305622]  [] __intel_pmu_pebs_event+0x221/0x3a0
> [  202.312469]  [] ? lock_acquire+0x3d/0x190
> [  202.318523]  [] ? pebs_update_state+0x150/0x150
> [  202.325060]  [] ? get_stack_info+0x3c/0x150
> [  202.331259]  [] ? __intel_pmu_enable_all+0x77/0xf0
> [  202.338128]  [] ? __asan_load4+0x24/0x80
> [  202.344059]  [] ? intel_pmu_disable_bts+0x60/0x60
> [  202.350823]  [] ? __asan_load4+0x24/0x80
> [  202.356740]  [] ? perf_callchain+0xc5/0xe0
> [  202.362855]  [] ? lock_release+0x30/0x540
> [  202.368855]  [] ? perf_prepare_sample+0x4c1/0x630
> [  202.375619]  [] ? perf_event_output_forward+0xe4/0x130
> [  202.382849]  [] intel_pmu_drain_pebs_nhm+0x3ec/0x530
> [  202.389899]  [] ? __intel_pmu_pebs_event+0x3a0/0x3a0
> [  202.396959]  [] ? perf_event_update_userpage+0x1fa/0x2b0
> [  202.406800]  [] ? perf_event_update_userpage+0x212/0x2b0
> [  202.416486]  [] ? perf_event_task_disable+0xc0/0xc0
> [  202.425720]  [] ? intel_pmu_lbr_read+0x32/0x790
> [  202.434566]  [] ? __perf_event_overflow+0x116/0x280
> [  202.443735]  [] ? intel_bts_interrupt+0x88/0x1b0
> [  202.452538]  [] intel_pmu_handle_irq+0x3ae/0x690
> [  202.461407]  [] ? intel_pmu_save_and_restart+0x80/0x80
> [  202.470877]  [] ? lock_release+0x30/0x540
> [  202.479131]  [] ? native_apic_msr_write+0x2b/0x30
> [  202.488181]  [] ? x2apic_send_IPI_self+0x3c/0x50
> [  202.497066]  [] ? native_sched_clock+0x62/0x140
> [  202.505919]  [] perf_event_nmi_handler+0x2d/0x50
> [  202.514832]  [] nmi_handle+0xb1/0x1d0
> [  202.522697]  [] ? nmi_handle+0x5/0x1d0
> [  202.530610]  [] default_do_nmi+0xe5/0x140
> [  202.538765]  [] do_nmi+0x152/0x1b0
> [  202.546254]  [] end_repeat_nmi+0x1a/0x1e
> [  202.554257]  [] ? __intel_pmu_enable_all+0x77/0xf0
> [  202.563167]  [] ? perf_event_task_tick+0x48b/0x5f0
> [  202.572060]  [] ? perf_event_task_tick+0x48b/0x5f0
> [  202.580864]  [] ? perf_event_task_tick+0x48b/0x5f0
> [  202.589703]  [] scheduler_tick+0xb1/0x150
> [  202.598985]  [] update_process_times+0x47/0x60
> [  202.607433]  [] tick_sched_handle.isra.14+0x33/0x80
> [  202.616314]  [] tick_sched_timer+0x4b/0x90
> [  202.624322]  [] __hrtimer_run_queues+0x21e/0x540
> [  202.632864]  [] ? tick_sched_do_timer+0x50/0x50
> [  202.641337]  [] ? retrigger_next_event+0xa0/0xa0
> [  202.649947]  [] ? 

Re: [PATCH V3 1/9] PM / OPP: Reword binding supporting multiple regulators per device

2016-11-15 Thread Stephen Boyd
On 11/15, Viresh Kumar wrote:
> On 14-11-16, 18:13, Stephen Boyd wrote:
> > On 11/14, Rob Herring wrote:
> > > On Fri, Nov 11, 2016 at 08:41:20AM +0530, Viresh Kumar wrote:
> > > > On 10-11-16, 14:51, Stephen Boyd wrote:
> > > > > 
> > > > > No. The supply names (and also clock names/index) should be left
> > > > > up to the consumer of the OPP table. We don't want to encode any
> > > > > sort of details like this between the OPP table and the consumer
> > > > > of it in DT because then it seriously couples the OPP table to
> > > > > the consumer device. "The binding" in this case that needs to be
> > > > > updated is the consumer binding, to indicate that it correlated
> > > > > foo-supply and bar-supply to index 0 and 1 of the OPP table
> > > > > voltages.
> > > > 
> > > > Are you saying that we shall have a property like this then?
> > > > 
> > > > diff --git a/Documentation/devicetree/bindings/opp/opp.txt 
> > > > b/Documentation/devicetree/bindings/opp/opp.txt
> > > > index ee91cbdd95ee..733946df2fb8 100644
> > > > --- a/Documentation/devicetree/bindings/opp/opp.txt
> > > > +++ b/Documentation/devicetree/bindings/opp/opp.txt
> > > > @@ -389,7 +389,10 @@ Example 4: Handling multiple regulators
> > > > compatible = "arm,cortex-a7";
> > > > ...
> > > >  
> > > > -   cpu-supply = <_supply0>, <_supply1>, 
> > > > <_supply2>;
> > > > +   vcc0-supply = <_supply0>;
> > > > +   vcc1-supply = <_supply1>;
> > > > +   vcc2-supply = <_supply2>;
> > > > +   opp-supply-names = "vcc0", "vcc1", "vcc2";
> > > 
> > > Uh, no. You already have the names in the *-supply properties. Yes, they 
> > > are a PIA to retrieve compared to a *-names property, but that is the 
> > > nature of this style of binding.
> 
> Its not just PIA, but impossible AFAICT.
> 
> There are two important pieces of information we need for multiple
> regulator support:
> - Which regulator in the consumer node corresponds to which entry in
>   the OPP table. As Mark mentioned earlier, DT should be able to get
>   us this.

This is also possible from C code though. Or is there some case
where it isn't possible if we're sharing the same table with two
devices? I'm lost on when this would ever happen.

It feels like trying to keep the OPP table agnostic of the
consuming device and the device's binding is more trouble than
it's worth. Especially considering we have opp-shared and *-name
now.

> - The order in which the supplies need to be programmed. We have all
>   agreed to do this in code instead of inferring it from DT and this
>   patch series already does that.

Agreed. Encoding a sequence into DT doesn't sound very feasible.
How is this going to be handled though? I don't see any users of
the code we're reviewing here, so it's hard to grasp how things
will work. It would be really useful if we had some user of the
code included in the patch series to get the big picture.

> 
> I want to solve the first problem here and I don't see how it can be
> solved using such entries:
> 
>   cpus {
>   cpu@0 {
>   compatible = "arm,cortex-a7";
>   ...
> 
> vcc0-supply = <_supply0>;
> vcc1-supply = <_supply1>;
> vcc2-supply = <_supply2>;
>   operating-points-v2 = <_opp_table>;
> };
> };
> 
>   cpu0_opp_table: opp_table0 {
>   compatible = "operating-points-v2";
>   opp-shared;
> 
>   opp@10 {
>   opp-hz = /bits/ 64 <10>;
>   opp-microvolt = <97>, /* Supply 0 */
>   <96>, /* Supply 1 */
>   <96>; /* Supply 2 */
>   };
> };
> 
> The code can't figure out which of vcc0, vcc1, vcc2 is added first in
> the CPU node and so we need to get the order somehow. A separate
> binding as I mentioned earlier is a probably (ugly) solution.
> 
> > I think the problem is that Viresh wants the binding to be "self
> > describing" so that the OPP can be used without a driver knowing
> > that a supply corresponds to a particular column in the voltage
> > table.
> 
> Right, and that's what Mark suggested as well.
> 
> > I don't understand that though. Can't we set the supply
> > names from C code somewhere based on the consumer of the OPPs?
> 
> That's what this patch series is doing right now.
> 
> So, are you saying that the way this patchset does it is fine with you
> ?

That's just to handle the ordering of operations? I need to take
a minute and understand what's changing. You may have spent
plenty of time developing/updating, but I haven't spent near
enough time understanding what's going on in these patches to
give a thorough review.

-- 
Qualcomm Innovation 

Re: [PATCH V3 1/9] PM / OPP: Reword binding supporting multiple regulators per device

2016-11-15 Thread Stephen Boyd
On 11/15, Viresh Kumar wrote:
> On 14-11-16, 18:13, Stephen Boyd wrote:
> > On 11/14, Rob Herring wrote:
> > > On Fri, Nov 11, 2016 at 08:41:20AM +0530, Viresh Kumar wrote:
> > > > On 10-11-16, 14:51, Stephen Boyd wrote:
> > > > > 
> > > > > No. The supply names (and also clock names/index) should be left
> > > > > up to the consumer of the OPP table. We don't want to encode any
> > > > > sort of details like this between the OPP table and the consumer
> > > > > of it in DT because then it seriously couples the OPP table to
> > > > > the consumer device. "The binding" in this case that needs to be
> > > > > updated is the consumer binding, to indicate that it correlated
> > > > > foo-supply and bar-supply to index 0 and 1 of the OPP table
> > > > > voltages.
> > > > 
> > > > Are you saying that we shall have a property like this then?
> > > > 
> > > > diff --git a/Documentation/devicetree/bindings/opp/opp.txt 
> > > > b/Documentation/devicetree/bindings/opp/opp.txt
> > > > index ee91cbdd95ee..733946df2fb8 100644
> > > > --- a/Documentation/devicetree/bindings/opp/opp.txt
> > > > +++ b/Documentation/devicetree/bindings/opp/opp.txt
> > > > @@ -389,7 +389,10 @@ Example 4: Handling multiple regulators
> > > > compatible = "arm,cortex-a7";
> > > > ...
> > > >  
> > > > -   cpu-supply = <_supply0>, <_supply1>, 
> > > > <_supply2>;
> > > > +   vcc0-supply = <_supply0>;
> > > > +   vcc1-supply = <_supply1>;
> > > > +   vcc2-supply = <_supply2>;
> > > > +   opp-supply-names = "vcc0", "vcc1", "vcc2";
> > > 
> > > Uh, no. You already have the names in the *-supply properties. Yes, they 
> > > are a PIA to retrieve compared to a *-names property, but that is the 
> > > nature of this style of binding.
> 
> Its not just PIA, but impossible AFAICT.
> 
> There are two important pieces of information we need for multiple
> regulator support:
> - Which regulator in the consumer node corresponds to which entry in
>   the OPP table. As Mark mentioned earlier, DT should be able to get
>   us this.

This is also possible from C code though. Or is there some case
where it isn't possible if we're sharing the same table with two
devices? I'm lost on when this would ever happen.

It feels like trying to keep the OPP table agnostic of the
consuming device and the device's binding is more trouble than
it's worth. Especially considering we have opp-shared and *-name
now.

> - The order in which the supplies need to be programmed. We have all
>   agreed to do this in code instead of inferring it from DT and this
>   patch series already does that.

Agreed. Encoding a sequence into DT doesn't sound very feasible.
How is this going to be handled though? I don't see any users of
the code we're reviewing here, so it's hard to grasp how things
will work. It would be really useful if we had some user of the
code included in the patch series to get the big picture.

> 
> I want to solve the first problem here and I don't see how it can be
> solved using such entries:
> 
>   cpus {
>   cpu@0 {
>   compatible = "arm,cortex-a7";
>   ...
> 
> vcc0-supply = <_supply0>;
> vcc1-supply = <_supply1>;
> vcc2-supply = <_supply2>;
>   operating-points-v2 = <_opp_table>;
> };
> };
> 
>   cpu0_opp_table: opp_table0 {
>   compatible = "operating-points-v2";
>   opp-shared;
> 
>   opp@10 {
>   opp-hz = /bits/ 64 <10>;
>   opp-microvolt = <97>, /* Supply 0 */
>   <96>, /* Supply 1 */
>   <96>; /* Supply 2 */
>   };
> };
> 
> The code can't figure out which of vcc0, vcc1, vcc2 is added first in
> the CPU node and so we need to get the order somehow. A separate
> binding as I mentioned earlier is a probably (ugly) solution.
> 
> > I think the problem is that Viresh wants the binding to be "self
> > describing" so that the OPP can be used without a driver knowing
> > that a supply corresponds to a particular column in the voltage
> > table.
> 
> Right, and that's what Mark suggested as well.
> 
> > I don't understand that though. Can't we set the supply
> > names from C code somewhere based on the consumer of the OPPs?
> 
> That's what this patch series is doing right now.
> 
> So, are you saying that the way this patchset does it is fine with you
> ?

That's just to handle the ordering of operations? I need to take
a minute and understand what's changing. You may have spent
plenty of time developing/updating, but I haven't spent near
enough time understanding what's going on in these patches to
give a thorough review.

-- 
Qualcomm Innovation 

Re: [PATCH 07/12] dm: use bvec iterator helpers to implement .get_page and .next_page

2016-11-15 Thread Mike Snitzer
On Tue, Nov 15 2016 at  1:55pm -0500,
Christoph Hellwig  wrote:

> > Hi Alasdair, Mike, Christoph and anyone,
> > 
> > Could you give this one a review?
> 
> It looks nice, but I don't understand the code anywhere near well
> enough to review it.  We'll need someone from the DM to look over it.

I'll try to get to it this week.


Re: [PATCH 07/12] dm: use bvec iterator helpers to implement .get_page and .next_page

2016-11-15 Thread Mike Snitzer
On Tue, Nov 15 2016 at  1:55pm -0500,
Christoph Hellwig  wrote:

> > Hi Alasdair, Mike, Christoph and anyone,
> > 
> > Could you give this one a review?
> 
> It looks nice, but I don't understand the code anywhere near well
> enough to review it.  We'll need someone from the DM to look over it.

I'll try to get to it this week.


Re: [PATCH 07/12] dm: use bvec iterator helpers to implement .get_page and .next_page

2016-11-15 Thread Christoph Hellwig
> Hi Alasdair, Mike, Christoph and anyone,
> 
> Could you give this one a review?

It looks nice, but I don't understand the code anywhere near well
enough to review it.  We'll need someone from the DM to look over it.


[for-next][PATCH 2/6] ftrace: Add more checks for FTRACE_FL_DISABLED in processing ip records

2016-11-15 Thread Steven Rostedt
From: "Steven Rostedt (Red Hat)" 

When a module is first loaded and its function ip records are added to the
ftrace list of functions to modify, they are set to DISABLED, as their text
is still in a read only state. When the module is fully loaded, and can be
updated, the flag is cleared, and if their's any functions that should be
tracing them, it is updated at that moment.

But there's several locations that do record accounting and should ignore
records that are marked as disabled, or they can cause issues.

Alexei already fixed one location, but others need to be addressed.

Cc: sta...@vger.kernel.org
Fixes: b7bb46f2 "ftrace: Add infrastructure for delayed enabling of module 
functions"
Reported-by: Alexei Starovoitov 
Signed-off-by: Steven Rostedt 
---
 kernel/trace/ftrace.c | 22 ++
 1 file changed, 22 insertions(+)

diff --git a/kernel/trace/ftrace.c b/kernel/trace/ftrace.c
index 326498baab83..da87b3cba5b3 100644
--- a/kernel/trace/ftrace.c
+++ b/kernel/trace/ftrace.c
@@ -1862,6 +1862,10 @@ static int __ftrace_hash_update_ipmodify(struct 
ftrace_ops *ops,
 
/* Update rec->flags */
do_for_each_ftrace_rec(pg, rec) {
+
+   if (rec->flags & FTRACE_FL_DISABLED)
+   continue;
+
/* We need to update only differences of filter_hash */
in_old = !!ftrace_lookup_ip(old_hash, rec->ip);
in_new = !!ftrace_lookup_ip(new_hash, rec->ip);
@@ -1884,6 +1888,10 @@ static int __ftrace_hash_update_ipmodify(struct 
ftrace_ops *ops,
 
/* Roll back what we did above */
do_for_each_ftrace_rec(pg, rec) {
+
+   if (rec->flags & FTRACE_FL_DISABLED)
+   continue;
+
if (rec == end)
goto err_out;
 
@@ -2397,6 +2405,10 @@ void __weak ftrace_replace_code(int enable)
return;
 
do_for_each_ftrace_rec(pg, rec) {
+
+   if (rec->flags & FTRACE_FL_DISABLED)
+   continue;
+
failed = __ftrace_replace_code(rec, enable);
if (failed) {
ftrace_bug(failed, rec);
@@ -3598,6 +3610,10 @@ match_records(struct ftrace_hash *hash, char *func, int 
len, char *mod)
goto out_unlock;
 
do_for_each_ftrace_rec(pg, rec) {
+
+   if (rec->flags & FTRACE_FL_DISABLED)
+   continue;
+
if (ftrace_match_record(rec, _g, mod_match, exclude_mod)) {
ret = enter_record(hash, rec, clear_filter);
if (ret < 0) {
@@ -3793,6 +3809,9 @@ register_ftrace_function_probe(char *glob, struct 
ftrace_probe_ops *ops,
 
do_for_each_ftrace_rec(pg, rec) {
 
+   if (rec->flags & FTRACE_FL_DISABLED)
+   continue;
+
if (!ftrace_match_record(rec, _g, NULL, 0))
continue;
 
@@ -4685,6 +4704,9 @@ ftrace_set_func(unsigned long *array, int *idx, int size, 
char *buffer)
 
do_for_each_ftrace_rec(pg, rec) {
 
+   if (rec->flags & FTRACE_FL_DISABLED)
+   continue;
+
if (ftrace_match_record(rec, _g, NULL, 0)) {
/* if it is in the array */
exists = false;
-- 
2.10.2




Re: [PATCH 07/12] dm: use bvec iterator helpers to implement .get_page and .next_page

2016-11-15 Thread Christoph Hellwig
> Hi Alasdair, Mike, Christoph and anyone,
> 
> Could you give this one a review?

It looks nice, but I don't understand the code anywhere near well
enough to review it.  We'll need someone from the DM to look over it.


[for-next][PATCH 2/6] ftrace: Add more checks for FTRACE_FL_DISABLED in processing ip records

2016-11-15 Thread Steven Rostedt
From: "Steven Rostedt (Red Hat)" 

When a module is first loaded and its function ip records are added to the
ftrace list of functions to modify, they are set to DISABLED, as their text
is still in a read only state. When the module is fully loaded, and can be
updated, the flag is cleared, and if their's any functions that should be
tracing them, it is updated at that moment.

But there's several locations that do record accounting and should ignore
records that are marked as disabled, or they can cause issues.

Alexei already fixed one location, but others need to be addressed.

Cc: sta...@vger.kernel.org
Fixes: b7bb46f2 "ftrace: Add infrastructure for delayed enabling of module 
functions"
Reported-by: Alexei Starovoitov 
Signed-off-by: Steven Rostedt 
---
 kernel/trace/ftrace.c | 22 ++
 1 file changed, 22 insertions(+)

diff --git a/kernel/trace/ftrace.c b/kernel/trace/ftrace.c
index 326498baab83..da87b3cba5b3 100644
--- a/kernel/trace/ftrace.c
+++ b/kernel/trace/ftrace.c
@@ -1862,6 +1862,10 @@ static int __ftrace_hash_update_ipmodify(struct 
ftrace_ops *ops,
 
/* Update rec->flags */
do_for_each_ftrace_rec(pg, rec) {
+
+   if (rec->flags & FTRACE_FL_DISABLED)
+   continue;
+
/* We need to update only differences of filter_hash */
in_old = !!ftrace_lookup_ip(old_hash, rec->ip);
in_new = !!ftrace_lookup_ip(new_hash, rec->ip);
@@ -1884,6 +1888,10 @@ static int __ftrace_hash_update_ipmodify(struct 
ftrace_ops *ops,
 
/* Roll back what we did above */
do_for_each_ftrace_rec(pg, rec) {
+
+   if (rec->flags & FTRACE_FL_DISABLED)
+   continue;
+
if (rec == end)
goto err_out;
 
@@ -2397,6 +2405,10 @@ void __weak ftrace_replace_code(int enable)
return;
 
do_for_each_ftrace_rec(pg, rec) {
+
+   if (rec->flags & FTRACE_FL_DISABLED)
+   continue;
+
failed = __ftrace_replace_code(rec, enable);
if (failed) {
ftrace_bug(failed, rec);
@@ -3598,6 +3610,10 @@ match_records(struct ftrace_hash *hash, char *func, int 
len, char *mod)
goto out_unlock;
 
do_for_each_ftrace_rec(pg, rec) {
+
+   if (rec->flags & FTRACE_FL_DISABLED)
+   continue;
+
if (ftrace_match_record(rec, _g, mod_match, exclude_mod)) {
ret = enter_record(hash, rec, clear_filter);
if (ret < 0) {
@@ -3793,6 +3809,9 @@ register_ftrace_function_probe(char *glob, struct 
ftrace_probe_ops *ops,
 
do_for_each_ftrace_rec(pg, rec) {
 
+   if (rec->flags & FTRACE_FL_DISABLED)
+   continue;
+
if (!ftrace_match_record(rec, _g, NULL, 0))
continue;
 
@@ -4685,6 +4704,9 @@ ftrace_set_func(unsigned long *array, int *idx, int size, 
char *buffer)
 
do_for_each_ftrace_rec(pg, rec) {
 
+   if (rec->flags & FTRACE_FL_DISABLED)
+   continue;
+
if (ftrace_match_record(rec, _g, NULL, 0)) {
/* if it is in the array */
exists = false;
-- 
2.10.2




[for-next][PATCH 6/6] tracing: Allow wakeup_dl tracer to be used by instances

2016-11-15 Thread Steven Rostedt
From: Zhou Chengming 

Allow wakeup_dl tracer to be used by instances, like wakeup tracer
and wakeup_rt tracer.

Link: 
http://lkml.kernel.org/r/1479093553-31264-1-git-send-email-zhouchengmi...@huawei.com

Signed-off-by: Zhou Chengming 
Signed-off-by: Steven Rostedt 
---
 kernel/trace/trace_sched_wakeup.c | 1 +
 1 file changed, 1 insertion(+)

diff --git a/kernel/trace/trace_sched_wakeup.c 
b/kernel/trace/trace_sched_wakeup.c
index 9d4399b553a3..1bf2324dc682 100644
--- a/kernel/trace/trace_sched_wakeup.c
+++ b/kernel/trace/trace_sched_wakeup.c
@@ -790,6 +790,7 @@ static struct tracer wakeup_dl_tracer __read_mostly =
 #endif
.open   = wakeup_trace_open,
.close  = wakeup_trace_close,
+   .allow_instances = true,
.use_max_tr = true,
 };
 
-- 
2.10.2




Re: [PATCH v6 0/9] tpm: cleanup/fixes in existing event log support

2016-11-15 Thread Jarkko Sakkinen
On Tue, Nov 15, 2016 at 11:06:10AM -0700, Jason Gunthorpe wrote:
> On Tue, Nov 15, 2016 at 09:40:12AM -0800, Jarkko Sakkinen wrote:
> 
> > I applied fix from Colin. I for OF specific patches in this patch set
> > I do not have means to test the code paths that exercise OF specific
> > functionality. This is what worries me a bit. If I had tested-by from
> > someone running a system that can exercise those code paths, I would
> > be less worried.
> 
> I can probably check it next week on my OF systems that do not use event log

Thanks Jason. I'll apppend your Tested-by's to the commits when you're
done. Probably doing pull request in the latter part of next week.

/Jarkko


[for-next][PATCH 6/6] tracing: Allow wakeup_dl tracer to be used by instances

2016-11-15 Thread Steven Rostedt
From: Zhou Chengming 

Allow wakeup_dl tracer to be used by instances, like wakeup tracer
and wakeup_rt tracer.

Link: 
http://lkml.kernel.org/r/1479093553-31264-1-git-send-email-zhouchengmi...@huawei.com

Signed-off-by: Zhou Chengming 
Signed-off-by: Steven Rostedt 
---
 kernel/trace/trace_sched_wakeup.c | 1 +
 1 file changed, 1 insertion(+)

diff --git a/kernel/trace/trace_sched_wakeup.c 
b/kernel/trace/trace_sched_wakeup.c
index 9d4399b553a3..1bf2324dc682 100644
--- a/kernel/trace/trace_sched_wakeup.c
+++ b/kernel/trace/trace_sched_wakeup.c
@@ -790,6 +790,7 @@ static struct tracer wakeup_dl_tracer __read_mostly =
 #endif
.open   = wakeup_trace_open,
.close  = wakeup_trace_close,
+   .allow_instances = true,
.use_max_tr = true,
 };
 
-- 
2.10.2




Re: [PATCH v6 0/9] tpm: cleanup/fixes in existing event log support

2016-11-15 Thread Jarkko Sakkinen
On Tue, Nov 15, 2016 at 11:06:10AM -0700, Jason Gunthorpe wrote:
> On Tue, Nov 15, 2016 at 09:40:12AM -0800, Jarkko Sakkinen wrote:
> 
> > I applied fix from Colin. I for OF specific patches in this patch set
> > I do not have means to test the code paths that exercise OF specific
> > functionality. This is what worries me a bit. If I had tested-by from
> > someone running a system that can exercise those code paths, I would
> > be less worried.
> 
> I can probably check it next week on my OF systems that do not use event log

Thanks Jason. I'll apppend your Tested-by's to the commits when you're
done. Probably doing pull request in the latter part of next week.

/Jarkko


[for-next][PATCH 4/6] tracing: Optimise comparison filters and fix binary and for 64 bit

2016-11-15 Thread Steven Rostedt
From: "Steven Rostedt (Red Hat)" 

Currently the filter logic for comparisons (like greater-than and less-than)
are used, they share the same function and a switch statement is used to
jump to the comparison type to perform. This is done in the extreme hot path
of the tracing code, and it does not take much more space to create a
unique comparison function to perform each type of comparison and remove the
switch statement.

Also, a bug was found where the binary and operation for 64 bits could fail
if the resulting bits were greater than 32 bits, because the result was
passed into a 32 bit variable. This was fixed when adding the separate
binary and function.

Signed-off-by: Steven Rostedt 
---
 kernel/trace/trace_events_filter.c | 80 +++---
 1 file changed, 48 insertions(+), 32 deletions(-)

diff --git a/kernel/trace/trace_events_filter.c 
b/kernel/trace/trace_events_filter.c
index e1c7e2cdc240..1ba7a6b86f55 100644
--- a/kernel/trace/trace_events_filter.c
+++ b/kernel/trace/trace_events_filter.c
@@ -145,34 +145,50 @@ struct pred_stack {
 
 /* If not of not match is equal to not of not, then it is a match */
 #define DEFINE_COMPARISON_PRED(type)   \
-static int filter_pred_##type(struct filter_pred *pred, void *event)   \
+static int filter_pred_LT_##type(struct filter_pred *pred, void *event)
\
 {  \
type *addr = (type *)(event + pred->offset);\
type val = (type)pred->val; \
-   int match = 0;  \
-   \
-   switch (pred->op) { \
-   case OP_LT: \
-   match = (*addr < val);  \
-   break;  \
-   case OP_LE: \
-   match = (*addr <= val); \
-   break;  \
-   case OP_GT: \
-   match = (*addr > val);  \
-   break;  \
-   case OP_GE: \
-   match = (*addr >= val); \
-   break;  \
-   case OP_BAND:   \
-   match = (*addr & val);  \
-   break;  \
-   default:\
-   break;  \
-   }   \
-   \
+   int match = (*addr < val);  \
return !!match == !pred->not;   \
-}
+}  \
+static int filter_pred_LE_##type(struct filter_pred *pred, void *event)
\
+{  \
+   type *addr = (type *)(event + pred->offset);\
+   type val = (type)pred->val; \
+   int match = (*addr <= val); \
+   return !!match == !pred->not;   \
+}  \
+static int filter_pred_GT_##type(struct filter_pred *pred, void *event)
\
+{  \
+   type *addr = (type *)(event + pred->offset);\
+   type val = (type)pred->val; \
+   int match = (*addr > val);  \
+   return !!match == !pred->not;   \
+}  \
+static int filter_pred_GE_##type(struct filter_pred *pred, void *event)
\
+{  \
+   type *addr = (type *)(event + pred->offset);\
+   type val = (type)pred->val; \
+   int match = (*addr >= val); \
+   return !!match == !pred->not;   

[for-next][PATCH 3/6] ftrace: Support full glob matching

2016-11-15 Thread Steven Rostedt
From: Masami Hiramatsu 

Use glob_match() to support flexible glob wildcards (*,?)
and character classes ([) for ftrace.
Since the full glob matching is slower than the current
partial matching routines(*pat, pat*, *pat*), this leaves
those routines and just add MATCH_GLOB for complex glob
expression.

e.g.

[root@localhost tracing]# echo 'sched*group' > set_ftrace_filter
[root@localhost tracing]# cat set_ftrace_filter
sched_free_group
sched_change_group
sched_create_group
sched_online_group
sched_destroy_group
sched_offline_group
[root@localhost tracing]# echo '[Ss]y[Ss]_*' > set_ftrace_filter
[root@localhost tracing]# head set_ftrace_filter
sys_arch_prctl
sys_rt_sigreturn
sys_ioperm
SyS_iopl
sys_modify_ldt
SyS_mmap
SyS_set_thread_area
SyS_get_thread_area
SyS_set_tid_address
sys_fork


Link: 
http://lkml.kernel.org/r/147566869501.29136.6462645009894738056.stgit@devbox

Acked-by: Namhyung Kim 
Signed-off-by: Masami Hiramatsu 
Signed-off-by: Steven Rostedt 
---
 Documentation/trace/events.txt |  9 +++--
 Documentation/trace/ftrace.txt |  9 +++--
 kernel/trace/Kconfig   |  2 ++
 kernel/trace/ftrace.c  |  4 
 kernel/trace/trace.c   |  2 +-
 kernel/trace/trace.h   |  2 ++
 kernel/trace/trace_events_filter.c | 17 -
 7 files changed, 31 insertions(+), 14 deletions(-)

diff --git a/Documentation/trace/events.txt b/Documentation/trace/events.txt
index 08d74d75150d..2cc08d4a326e 100644
--- a/Documentation/trace/events.txt
+++ b/Documentation/trace/events.txt
@@ -189,16 +189,13 @@ And for string fields they are:
 
 ==, !=, ~
 
-The glob (~) only accepts a wild card character (*) at the start and or
-end of the string. For example:
+The glob (~) accepts a wild card character (*,?) and character classes
+([). For example:
 
   prev_comm ~ "*sh"
   prev_comm ~ "sh*"
   prev_comm ~ "*sh*"
-
-But does not allow for it to be within the string:
-
-  prev_comm ~ "ba*sh"   <-- is invalid
+  prev_comm ~ "ba*sh"
 
 5.2 Setting filters
 ---
diff --git a/Documentation/trace/ftrace.txt b/Documentation/trace/ftrace.txt
index 185c39fea2a0..1bc66c1db0cb 100644
--- a/Documentation/trace/ftrace.txt
+++ b/Documentation/trace/ftrace.txt
@@ -2218,16 +2218,13 @@ hrtimer_interrupt
 sys_nanosleep
 
 
-Perhaps this is not enough. The filters also allow simple wild
-cards. Only the following are currently available
+Perhaps this is not enough. The filters also allow glob(7) matching.
 
   *  - will match functions that begin with 
   *  - will match functions that end with 
   ** - will match functions that have  in it
-
-These are the only wild cards which are supported.
-
-  * will not work.
+  * - will match functions that begin with
+   and end with 
 
 Note: It is better to use quotes to enclose the wild cards,
   otherwise the shell may expand the parameters into names
diff --git a/kernel/trace/Kconfig b/kernel/trace/Kconfig
index 2a96b063d659..d5038005eb5d 100644
--- a/kernel/trace/Kconfig
+++ b/kernel/trace/Kconfig
@@ -70,6 +70,7 @@ config FTRACE_NMI_ENTER
 
 config EVENT_TRACING
select CONTEXT_SWITCH_TRACER
+select GLOB
bool
 
 config CONTEXT_SWITCH_TRACER
@@ -133,6 +134,7 @@ config FUNCTION_TRACER
select KALLSYMS
select GENERIC_TRACER
select CONTEXT_SWITCH_TRACER
+select GLOB
help
  Enable the kernel to trace every kernel function. This is done
  by using a compiler feature to insert a small, 5-byte No-Operation
diff --git a/kernel/trace/ftrace.c b/kernel/trace/ftrace.c
index da87b3cba5b3..356bb70d071e 100644
--- a/kernel/trace/ftrace.c
+++ b/kernel/trace/ftrace.c
@@ -3511,6 +3511,10 @@ static int ftrace_match(char *str, struct ftrace_glob *g)
memcmp(str + slen - g->len, g->search, g->len) == 0)
matched = 1;
break;
+   case MATCH_GLOB:
+   if (glob_match(g->search, str))
+   matched = 1;
+   break;
}
 
return matched;
diff --git a/kernel/trace/trace.c b/kernel/trace/trace.c
index 8696ce6bf2f6..d904516dfdab 100644
--- a/kernel/trace/trace.c
+++ b/kernel/trace/trace.c
@@ -4065,7 +4065,7 @@ static const char readme_msg[] =
"\n  available_filter_functions - list of functions that can be 
filtered on\n"
"  set_ftrace_filter\t- echo function name in here to only trace 
these\n"
"\t\t\t  functions\n"
-   "\t accepts: func_full_name, *func_end, func_begin*, 
*func_middle*\n"
+   "\t accepts: func_full_name or glob-matching-pattern\n"
"\t modules: Can select a group via module\n"
"\t  Format: :mod:\n"
"\t example: echo :mod:ext3 > set_ftrace_filter\n"
diff --git a/kernel/trace/trace.h b/kernel/trace/trace.h
index fd24b1f9ac43..4b7918902ab8 100644
--- 

[for-next][PATCH 3/6] ftrace: Support full glob matching

2016-11-15 Thread Steven Rostedt
From: Masami Hiramatsu 

Use glob_match() to support flexible glob wildcards (*,?)
and character classes ([) for ftrace.
Since the full glob matching is slower than the current
partial matching routines(*pat, pat*, *pat*), this leaves
those routines and just add MATCH_GLOB for complex glob
expression.

e.g.

[root@localhost tracing]# echo 'sched*group' > set_ftrace_filter
[root@localhost tracing]# cat set_ftrace_filter
sched_free_group
sched_change_group
sched_create_group
sched_online_group
sched_destroy_group
sched_offline_group
[root@localhost tracing]# echo '[Ss]y[Ss]_*' > set_ftrace_filter
[root@localhost tracing]# head set_ftrace_filter
sys_arch_prctl
sys_rt_sigreturn
sys_ioperm
SyS_iopl
sys_modify_ldt
SyS_mmap
SyS_set_thread_area
SyS_get_thread_area
SyS_set_tid_address
sys_fork


Link: 
http://lkml.kernel.org/r/147566869501.29136.6462645009894738056.stgit@devbox

Acked-by: Namhyung Kim 
Signed-off-by: Masami Hiramatsu 
Signed-off-by: Steven Rostedt 
---
 Documentation/trace/events.txt |  9 +++--
 Documentation/trace/ftrace.txt |  9 +++--
 kernel/trace/Kconfig   |  2 ++
 kernel/trace/ftrace.c  |  4 
 kernel/trace/trace.c   |  2 +-
 kernel/trace/trace.h   |  2 ++
 kernel/trace/trace_events_filter.c | 17 -
 7 files changed, 31 insertions(+), 14 deletions(-)

diff --git a/Documentation/trace/events.txt b/Documentation/trace/events.txt
index 08d74d75150d..2cc08d4a326e 100644
--- a/Documentation/trace/events.txt
+++ b/Documentation/trace/events.txt
@@ -189,16 +189,13 @@ And for string fields they are:
 
 ==, !=, ~
 
-The glob (~) only accepts a wild card character (*) at the start and or
-end of the string. For example:
+The glob (~) accepts a wild card character (*,?) and character classes
+([). For example:
 
   prev_comm ~ "*sh"
   prev_comm ~ "sh*"
   prev_comm ~ "*sh*"
-
-But does not allow for it to be within the string:
-
-  prev_comm ~ "ba*sh"   <-- is invalid
+  prev_comm ~ "ba*sh"
 
 5.2 Setting filters
 ---
diff --git a/Documentation/trace/ftrace.txt b/Documentation/trace/ftrace.txt
index 185c39fea2a0..1bc66c1db0cb 100644
--- a/Documentation/trace/ftrace.txt
+++ b/Documentation/trace/ftrace.txt
@@ -2218,16 +2218,13 @@ hrtimer_interrupt
 sys_nanosleep
 
 
-Perhaps this is not enough. The filters also allow simple wild
-cards. Only the following are currently available
+Perhaps this is not enough. The filters also allow glob(7) matching.
 
   *  - will match functions that begin with 
   *  - will match functions that end with 
   ** - will match functions that have  in it
-
-These are the only wild cards which are supported.
-
-  * will not work.
+  * - will match functions that begin with
+   and end with 
 
 Note: It is better to use quotes to enclose the wild cards,
   otherwise the shell may expand the parameters into names
diff --git a/kernel/trace/Kconfig b/kernel/trace/Kconfig
index 2a96b063d659..d5038005eb5d 100644
--- a/kernel/trace/Kconfig
+++ b/kernel/trace/Kconfig
@@ -70,6 +70,7 @@ config FTRACE_NMI_ENTER
 
 config EVENT_TRACING
select CONTEXT_SWITCH_TRACER
+select GLOB
bool
 
 config CONTEXT_SWITCH_TRACER
@@ -133,6 +134,7 @@ config FUNCTION_TRACER
select KALLSYMS
select GENERIC_TRACER
select CONTEXT_SWITCH_TRACER
+select GLOB
help
  Enable the kernel to trace every kernel function. This is done
  by using a compiler feature to insert a small, 5-byte No-Operation
diff --git a/kernel/trace/ftrace.c b/kernel/trace/ftrace.c
index da87b3cba5b3..356bb70d071e 100644
--- a/kernel/trace/ftrace.c
+++ b/kernel/trace/ftrace.c
@@ -3511,6 +3511,10 @@ static int ftrace_match(char *str, struct ftrace_glob *g)
memcmp(str + slen - g->len, g->search, g->len) == 0)
matched = 1;
break;
+   case MATCH_GLOB:
+   if (glob_match(g->search, str))
+   matched = 1;
+   break;
}
 
return matched;
diff --git a/kernel/trace/trace.c b/kernel/trace/trace.c
index 8696ce6bf2f6..d904516dfdab 100644
--- a/kernel/trace/trace.c
+++ b/kernel/trace/trace.c
@@ -4065,7 +4065,7 @@ static const char readme_msg[] =
"\n  available_filter_functions - list of functions that can be 
filtered on\n"
"  set_ftrace_filter\t- echo function name in here to only trace 
these\n"
"\t\t\t  functions\n"
-   "\t accepts: func_full_name, *func_end, func_begin*, 
*func_middle*\n"
+   "\t accepts: func_full_name or glob-matching-pattern\n"
"\t modules: Can select a group via module\n"
"\t  Format: :mod:\n"
"\t example: echo :mod:ext3 > set_ftrace_filter\n"
diff --git a/kernel/trace/trace.h b/kernel/trace/trace.h
index fd24b1f9ac43..4b7918902ab8 100644
--- a/kernel/trace/trace.h
+++ b/kernel/trace/trace.h
@@ -15,6 +15,7 @@
 #include 
 

[for-next][PATCH 4/6] tracing: Optimise comparison filters and fix binary and for 64 bit

2016-11-15 Thread Steven Rostedt
From: "Steven Rostedt (Red Hat)" 

Currently the filter logic for comparisons (like greater-than and less-than)
are used, they share the same function and a switch statement is used to
jump to the comparison type to perform. This is done in the extreme hot path
of the tracing code, and it does not take much more space to create a
unique comparison function to perform each type of comparison and remove the
switch statement.

Also, a bug was found where the binary and operation for 64 bits could fail
if the resulting bits were greater than 32 bits, because the result was
passed into a 32 bit variable. This was fixed when adding the separate
binary and function.

Signed-off-by: Steven Rostedt 
---
 kernel/trace/trace_events_filter.c | 80 +++---
 1 file changed, 48 insertions(+), 32 deletions(-)

diff --git a/kernel/trace/trace_events_filter.c 
b/kernel/trace/trace_events_filter.c
index e1c7e2cdc240..1ba7a6b86f55 100644
--- a/kernel/trace/trace_events_filter.c
+++ b/kernel/trace/trace_events_filter.c
@@ -145,34 +145,50 @@ struct pred_stack {
 
 /* If not of not match is equal to not of not, then it is a match */
 #define DEFINE_COMPARISON_PRED(type)   \
-static int filter_pred_##type(struct filter_pred *pred, void *event)   \
+static int filter_pred_LT_##type(struct filter_pred *pred, void *event)
\
 {  \
type *addr = (type *)(event + pred->offset);\
type val = (type)pred->val; \
-   int match = 0;  \
-   \
-   switch (pred->op) { \
-   case OP_LT: \
-   match = (*addr < val);  \
-   break;  \
-   case OP_LE: \
-   match = (*addr <= val); \
-   break;  \
-   case OP_GT: \
-   match = (*addr > val);  \
-   break;  \
-   case OP_GE: \
-   match = (*addr >= val); \
-   break;  \
-   case OP_BAND:   \
-   match = (*addr & val);  \
-   break;  \
-   default:\
-   break;  \
-   }   \
-   \
+   int match = (*addr < val);  \
return !!match == !pred->not;   \
-}
+}  \
+static int filter_pred_LE_##type(struct filter_pred *pred, void *event)
\
+{  \
+   type *addr = (type *)(event + pred->offset);\
+   type val = (type)pred->val; \
+   int match = (*addr <= val); \
+   return !!match == !pred->not;   \
+}  \
+static int filter_pred_GT_##type(struct filter_pred *pred, void *event)
\
+{  \
+   type *addr = (type *)(event + pred->offset);\
+   type val = (type)pred->val; \
+   int match = (*addr > val);  \
+   return !!match == !pred->not;   \
+}  \
+static int filter_pred_GE_##type(struct filter_pred *pred, void *event)
\
+{  \
+   type *addr = (type *)(event + pred->offset);\
+   type val = (type)pred->val; \
+   int match = (*addr >= val); \
+   return !!match == !pred->not;   \
+}  

[for-next][PATCH 5/6] tracing/filter: Define op as the enum that it is

2016-11-15 Thread Steven Rostedt
From: "Steven Rostedt (Red Hat)" 

The trace_events_file.c filter logic can be a bit complex. I copy this into
a userspace program where I can debug it a bit easier. One issue is the op
is defined in most places as an int instead of as an enum, and gdb just
gives the value when debugging. Having the actual op name shown in gdb is
more useful.

This has no functionality change, but helps in debugging when the file is
debugged in user space.

Signed-off-by: Steven Rostedt 
---
 kernel/trace/trace_events_filter.c | 22 --
 1 file changed, 12 insertions(+), 10 deletions(-)

diff --git a/kernel/trace/trace_events_filter.c 
b/kernel/trace/trace_events_filter.c
index 1ba7a6b86f55..59a411ff60c7 100644
--- a/kernel/trace/trace_events_filter.c
+++ b/kernel/trace/trace_events_filter.c
@@ -108,12 +108,12 @@ static char *err_text[] = {
 };
 
 struct opstack_op {
-   int op;
+   enum filter_op_ids op;
struct list_head list;
 };
 
 struct postfix_elt {
-   int op;
+   enum filter_op_ids op;
char *operand;
struct list_head list;
 };
@@ -977,7 +977,7 @@ int filter_assign_type(const char *type)
return FILTER_OTHER;
 }
 
-static bool is_legal_op(struct ftrace_event_field *field, int op)
+static bool is_legal_op(struct ftrace_event_field *field, enum filter_op_ids 
op)
 {
if (is_string_field(field) &&
(op != OP_EQ && op != OP_NE && op != OP_GLOB))
@@ -988,8 +988,8 @@ static bool is_legal_op(struct ftrace_event_field *field, 
int op)
return true;
 }
 
-static filter_pred_fn_t select_comparison_fn(int op, int field_size,
-int field_is_signed)
+static filter_pred_fn_t select_comparison_fn(enum filter_op_ids op,
+   int field_size, int field_is_signed)
 {
filter_pred_fn_t fn = NULL;
 
@@ -1197,7 +1197,8 @@ static inline int append_operand_char(struct 
filter_parse_state *ps, char c)
return 0;
 }
 
-static int filter_opstack_push(struct filter_parse_state *ps, int op)
+static int filter_opstack_push(struct filter_parse_state *ps,
+  enum filter_op_ids op)
 {
struct opstack_op *opstack_op;
 
@@ -1231,7 +1232,7 @@ static int filter_opstack_top(struct filter_parse_state 
*ps)
 static int filter_opstack_pop(struct filter_parse_state *ps)
 {
struct opstack_op *opstack_op;
-   int op;
+   enum filter_op_ids op;
 
if (filter_opstack_empty(ps))
return OP_NONE;
@@ -1276,7 +1277,7 @@ static int postfix_append_operand(struct 
filter_parse_state *ps, char *operand)
return 0;
 }
 
-static int postfix_append_op(struct filter_parse_state *ps, int op)
+static int postfix_append_op(struct filter_parse_state *ps, enum filter_op_ids 
op)
 {
struct postfix_elt *elt;
 
@@ -1306,8 +1307,8 @@ static void postfix_clear(struct filter_parse_state *ps)
 
 static int filter_parse(struct filter_parse_state *ps)
 {
+   enum filter_op_ids op, top_op;
int in_string = 0;
-   int op, top_op;
char ch;
 
while ((ch = infix_next(ps))) {
@@ -1398,7 +1399,8 @@ static int filter_parse(struct filter_parse_state *ps)
 
 static struct filter_pred *create_pred(struct filter_parse_state *ps,
   struct trace_event_call *call,
-  int op, char *operand1, char *operand2)
+  enum filter_op_ids op,
+  char *operand1, char *operand2)
 {
struct ftrace_event_field *field;
static struct filter_pred pred;
-- 
2.10.2




[for-next][PATCH 0/6] tracing: Updates for 4.10

2016-11-15 Thread Steven Rostedt
The first two patches have already been sent to Linus. I'm just including
them here as I based the other patches on top.

  git://git.kernel.org/pub/scm/linux/kernel/git/rostedt/linux-trace.git
for-next

Head SHA1: 8d414bd2f77ce858f6b9d119c63b9ce29cf0b75d


Alexei Starovoitov (1):
  ftrace: Ignore FTRACE_FL_DISABLED while walking dyn_ftrace records

Masami Hiramatsu (1):
  ftrace: Support full glob matching

Steven Rostedt (Red Hat) (3):
  ftrace: Add more checks for FTRACE_FL_DISABLED in processing ip records
  tracing: Optimise comparison filters and fix binary and for 64 bit
  tracing/filter: Define op as the enum that it is

Zhou Chengming (1):
  tracing: Allow wakeup_dl tracer to be used by instances


 Documentation/trace/events.txt |   9 +--
 Documentation/trace/ftrace.txt |   9 +--
 kernel/trace/Kconfig   |   2 +
 kernel/trace/ftrace.c  |  28 -
 kernel/trace/trace.c   |   2 +-
 kernel/trace/trace.h   |   2 +
 kernel/trace/trace_events_filter.c | 119 +++--
 kernel/trace/trace_sched_wakeup.c  |   1 +
 8 files changed, 115 insertions(+), 57 deletions(-)


[for-next][PATCH 5/6] tracing/filter: Define op as the enum that it is

2016-11-15 Thread Steven Rostedt
From: "Steven Rostedt (Red Hat)" 

The trace_events_file.c filter logic can be a bit complex. I copy this into
a userspace program where I can debug it a bit easier. One issue is the op
is defined in most places as an int instead of as an enum, and gdb just
gives the value when debugging. Having the actual op name shown in gdb is
more useful.

This has no functionality change, but helps in debugging when the file is
debugged in user space.

Signed-off-by: Steven Rostedt 
---
 kernel/trace/trace_events_filter.c | 22 --
 1 file changed, 12 insertions(+), 10 deletions(-)

diff --git a/kernel/trace/trace_events_filter.c 
b/kernel/trace/trace_events_filter.c
index 1ba7a6b86f55..59a411ff60c7 100644
--- a/kernel/trace/trace_events_filter.c
+++ b/kernel/trace/trace_events_filter.c
@@ -108,12 +108,12 @@ static char *err_text[] = {
 };
 
 struct opstack_op {
-   int op;
+   enum filter_op_ids op;
struct list_head list;
 };
 
 struct postfix_elt {
-   int op;
+   enum filter_op_ids op;
char *operand;
struct list_head list;
 };
@@ -977,7 +977,7 @@ int filter_assign_type(const char *type)
return FILTER_OTHER;
 }
 
-static bool is_legal_op(struct ftrace_event_field *field, int op)
+static bool is_legal_op(struct ftrace_event_field *field, enum filter_op_ids 
op)
 {
if (is_string_field(field) &&
(op != OP_EQ && op != OP_NE && op != OP_GLOB))
@@ -988,8 +988,8 @@ static bool is_legal_op(struct ftrace_event_field *field, 
int op)
return true;
 }
 
-static filter_pred_fn_t select_comparison_fn(int op, int field_size,
-int field_is_signed)
+static filter_pred_fn_t select_comparison_fn(enum filter_op_ids op,
+   int field_size, int field_is_signed)
 {
filter_pred_fn_t fn = NULL;
 
@@ -1197,7 +1197,8 @@ static inline int append_operand_char(struct 
filter_parse_state *ps, char c)
return 0;
 }
 
-static int filter_opstack_push(struct filter_parse_state *ps, int op)
+static int filter_opstack_push(struct filter_parse_state *ps,
+  enum filter_op_ids op)
 {
struct opstack_op *opstack_op;
 
@@ -1231,7 +1232,7 @@ static int filter_opstack_top(struct filter_parse_state 
*ps)
 static int filter_opstack_pop(struct filter_parse_state *ps)
 {
struct opstack_op *opstack_op;
-   int op;
+   enum filter_op_ids op;
 
if (filter_opstack_empty(ps))
return OP_NONE;
@@ -1276,7 +1277,7 @@ static int postfix_append_operand(struct 
filter_parse_state *ps, char *operand)
return 0;
 }
 
-static int postfix_append_op(struct filter_parse_state *ps, int op)
+static int postfix_append_op(struct filter_parse_state *ps, enum filter_op_ids 
op)
 {
struct postfix_elt *elt;
 
@@ -1306,8 +1307,8 @@ static void postfix_clear(struct filter_parse_state *ps)
 
 static int filter_parse(struct filter_parse_state *ps)
 {
+   enum filter_op_ids op, top_op;
int in_string = 0;
-   int op, top_op;
char ch;
 
while ((ch = infix_next(ps))) {
@@ -1398,7 +1399,8 @@ static int filter_parse(struct filter_parse_state *ps)
 
 static struct filter_pred *create_pred(struct filter_parse_state *ps,
   struct trace_event_call *call,
-  int op, char *operand1, char *operand2)
+  enum filter_op_ids op,
+  char *operand1, char *operand2)
 {
struct ftrace_event_field *field;
static struct filter_pred pred;
-- 
2.10.2




[for-next][PATCH 0/6] tracing: Updates for 4.10

2016-11-15 Thread Steven Rostedt
The first two patches have already been sent to Linus. I'm just including
them here as I based the other patches on top.

  git://git.kernel.org/pub/scm/linux/kernel/git/rostedt/linux-trace.git
for-next

Head SHA1: 8d414bd2f77ce858f6b9d119c63b9ce29cf0b75d


Alexei Starovoitov (1):
  ftrace: Ignore FTRACE_FL_DISABLED while walking dyn_ftrace records

Masami Hiramatsu (1):
  ftrace: Support full glob matching

Steven Rostedt (Red Hat) (3):
  ftrace: Add more checks for FTRACE_FL_DISABLED in processing ip records
  tracing: Optimise comparison filters and fix binary and for 64 bit
  tracing/filter: Define op as the enum that it is

Zhou Chengming (1):
  tracing: Allow wakeup_dl tracer to be used by instances


 Documentation/trace/events.txt |   9 +--
 Documentation/trace/ftrace.txt |   9 +--
 kernel/trace/Kconfig   |   2 +
 kernel/trace/ftrace.c  |  28 -
 kernel/trace/trace.c   |   2 +-
 kernel/trace/trace.h   |   2 +
 kernel/trace/trace_events_filter.c | 119 +++--
 kernel/trace/trace_sched_wakeup.c  |   1 +
 8 files changed, 115 insertions(+), 57 deletions(-)


[for-next][PATCH 1/6] ftrace: Ignore FTRACE_FL_DISABLED while walking dyn_ftrace records

2016-11-15 Thread Steven Rostedt
From: Alexei Starovoitov 

ftrace_shutdown() checks for sanity of ftrace records
and if dyn_ftrace->flags is not zero, it will warn.
It can happen that 'flags' are set to FTRACE_FL_DISABLED at this point,
since some module was loaded, but before ftrace_module_enable()
cleared the flags for this module.

In other words the module.c is doing:
ftrace_module_init(mod); // calls ftrace_update_code() that sets 
flags=FTRACE_FL_DISABLED
... // here ftrace_shutdown() is called that warns, since
err = prepare_coming_module(mod); // didn't have a chance to clear 
FTRACE_FL_DISABLED

Fix it by ignoring disabled records.
It's similar to what __ftrace_hash_rec_update() is already doing.

Link: http://lkml.kernel.org/r/1478560460-3818619-1-git-send-email-...@fb.com

Cc: sta...@vger.kernel.org
Fixes: b7bb46f2 "ftrace: Add infrastructure for delayed enabling of module 
functions"
Signed-off-by: Alexei Starovoitov 
Signed-off-by: Steven Rostedt 
---
 kernel/trace/ftrace.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/kernel/trace/ftrace.c b/kernel/trace/ftrace.c
index 2050a7652a86..326498baab83 100644
--- a/kernel/trace/ftrace.c
+++ b/kernel/trace/ftrace.c
@@ -2763,7 +2763,7 @@ static int ftrace_shutdown(struct ftrace_ops *ops, int 
command)
struct dyn_ftrace *rec;
 
do_for_each_ftrace_rec(pg, rec) {
-   if (FTRACE_WARN_ON_ONCE(rec->flags))
+   if (FTRACE_WARN_ON_ONCE(rec->flags & 
~FTRACE_FL_DISABLED))
pr_warn("  %pS flags:%lx\n",
(void *)rec->ip, rec->flags);
} while_for_each_ftrace_rec();
-- 
2.10.2




[for-next][PATCH 1/6] ftrace: Ignore FTRACE_FL_DISABLED while walking dyn_ftrace records

2016-11-15 Thread Steven Rostedt
From: Alexei Starovoitov 

ftrace_shutdown() checks for sanity of ftrace records
and if dyn_ftrace->flags is not zero, it will warn.
It can happen that 'flags' are set to FTRACE_FL_DISABLED at this point,
since some module was loaded, but before ftrace_module_enable()
cleared the flags for this module.

In other words the module.c is doing:
ftrace_module_init(mod); // calls ftrace_update_code() that sets 
flags=FTRACE_FL_DISABLED
... // here ftrace_shutdown() is called that warns, since
err = prepare_coming_module(mod); // didn't have a chance to clear 
FTRACE_FL_DISABLED

Fix it by ignoring disabled records.
It's similar to what __ftrace_hash_rec_update() is already doing.

Link: http://lkml.kernel.org/r/1478560460-3818619-1-git-send-email-...@fb.com

Cc: sta...@vger.kernel.org
Fixes: b7bb46f2 "ftrace: Add infrastructure for delayed enabling of module 
functions"
Signed-off-by: Alexei Starovoitov 
Signed-off-by: Steven Rostedt 
---
 kernel/trace/ftrace.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/kernel/trace/ftrace.c b/kernel/trace/ftrace.c
index 2050a7652a86..326498baab83 100644
--- a/kernel/trace/ftrace.c
+++ b/kernel/trace/ftrace.c
@@ -2763,7 +2763,7 @@ static int ftrace_shutdown(struct ftrace_ops *ops, int 
command)
struct dyn_ftrace *rec;
 
do_for_each_ftrace_rec(pg, rec) {
-   if (FTRACE_WARN_ON_ONCE(rec->flags))
+   if (FTRACE_WARN_ON_ONCE(rec->flags & 
~FTRACE_FL_DISABLED))
pr_warn("  %pS flags:%lx\n",
(void *)rec->ip, rec->flags);
} while_for_each_ftrace_rec();
-- 
2.10.2




Applied "spi: spi-ti-qspi: reinit of completion variable" to the spi tree

2016-11-15 Thread Mark Brown
The patch

   spi: spi-ti-qspi: reinit of completion variable

has been applied to the spi tree at

   git://git.kernel.org/pub/scm/linux/kernel/git/broonie/spi.git 

All being well this means that it will be integrated into the linux-next
tree (usually sometime in the next 24 hours) and sent to Linus during
the next merge window (or sooner if it is a bug fix), however if
problems are discovered then the patch may be dropped or reverted.  

You may get further e-mails resulting from automated or manual testing
and review of the tree, please engage with people reporting problems and
send followup patches addressing any issues that are reported if needed.

If any updates are required or you are submitting further changes they
should be sent as incremental updates against current git, existing
patches will not be replaced.

Please add any relevant lists and maintainers to the CCs when replying
to this mail.

Thanks,
Mark

>From d06a3507fe7cfd85a296d2c1fe367dd850e9595f Mon Sep 17 00:00:00 2001
From: Prahlad V 
Date: Tue, 15 Nov 2016 23:56:43 +0530
Subject: [PATCH] spi: spi-ti-qspi: reinit of completion variable

completion variable should be reinitialized before reusing.

Signed-off-by: Prahlad V 
Signed-off-by: Mark Brown 
---
 drivers/spi/spi-ti-qspi.c | 1 +
 1 file changed, 1 insertion(+)

diff --git a/drivers/spi/spi-ti-qspi.c b/drivers/spi/spi-ti-qspi.c
index caeac66a3977..ec6fb09e2e17 100644
--- a/drivers/spi/spi-ti-qspi.c
+++ b/drivers/spi/spi-ti-qspi.c
@@ -411,6 +411,7 @@ static int ti_qspi_dma_xfer(struct ti_qspi *qspi, 
dma_addr_t dma_dst,
tx->callback = ti_qspi_dma_callback;
tx->callback_param = qspi;
cookie = tx->tx_submit(tx);
+   reinit_completion(>transfer_complete);
 
ret = dma_submit_error(cookie);
if (ret) {
-- 
2.10.2



Applied "regulator: max77620: remove unused variable" to the regulator tree

2016-11-15 Thread Mark Brown
The patch

   regulator: max77620: remove unused variable

has been applied to the regulator tree at

   git://git.kernel.org/pub/scm/linux/kernel/git/broonie/regulator.git 

All being well this means that it will be integrated into the linux-next
tree (usually sometime in the next 24 hours) and sent to Linus during
the next merge window (or sooner if it is a bug fix), however if
problems are discovered then the patch may be dropped or reverted.  

You may get further e-mails resulting from automated or manual testing
and review of the tree, please engage with people reporting problems and
send followup patches addressing any issues that are reported if needed.

If any updates are required or you are submitting further changes they
should be sent as incremental updates against current git, existing
patches will not be replaced.

Please add any relevant lists and maintainers to the CCs when replying
to this mail.

Thanks,
Mark

>From 9a40cb0cb8b55ecfdcd3cec1381bcc46ec488588 Mon Sep 17 00:00:00 2001
From: Venkat Reddy Talla 
Date: Tue, 15 Nov 2016 22:51:20 +0530
Subject: [PATCH] regulator: max77620: remove unused variable

max77620_reuglator_pdata structure variable reg_idata
is not used anywhere in the regulator driver, so removing it.

Signed-off-by: Venkat Reddy Talla 
Signed-off-by: Mark Brown 
---
 drivers/regulator/max77620-regulator.c | 1 -
 1 file changed, 1 deletion(-)

diff --git a/drivers/regulator/max77620-regulator.c 
b/drivers/regulator/max77620-regulator.c
index a1b49a6d538f..c39a56b41901 100644
--- a/drivers/regulator/max77620-regulator.c
+++ b/drivers/regulator/max77620-regulator.c
@@ -73,7 +73,6 @@ struct max77620_regulator_info {
 };
 
 struct max77620_regulator_pdata {
-   struct regulator_init_data *reg_idata;
int active_fps_src;
int active_fps_pd_slot;
int active_fps_pu_slot;
-- 
2.10.2



Applied "spi: spi-ti-qspi: reinit of completion variable" to the spi tree

2016-11-15 Thread Mark Brown
The patch

   spi: spi-ti-qspi: reinit of completion variable

has been applied to the spi tree at

   git://git.kernel.org/pub/scm/linux/kernel/git/broonie/spi.git 

All being well this means that it will be integrated into the linux-next
tree (usually sometime in the next 24 hours) and sent to Linus during
the next merge window (or sooner if it is a bug fix), however if
problems are discovered then the patch may be dropped or reverted.  

You may get further e-mails resulting from automated or manual testing
and review of the tree, please engage with people reporting problems and
send followup patches addressing any issues that are reported if needed.

If any updates are required or you are submitting further changes they
should be sent as incremental updates against current git, existing
patches will not be replaced.

Please add any relevant lists and maintainers to the CCs when replying
to this mail.

Thanks,
Mark

>From d06a3507fe7cfd85a296d2c1fe367dd850e9595f Mon Sep 17 00:00:00 2001
From: Prahlad V 
Date: Tue, 15 Nov 2016 23:56:43 +0530
Subject: [PATCH] spi: spi-ti-qspi: reinit of completion variable

completion variable should be reinitialized before reusing.

Signed-off-by: Prahlad V 
Signed-off-by: Mark Brown 
---
 drivers/spi/spi-ti-qspi.c | 1 +
 1 file changed, 1 insertion(+)

diff --git a/drivers/spi/spi-ti-qspi.c b/drivers/spi/spi-ti-qspi.c
index caeac66a3977..ec6fb09e2e17 100644
--- a/drivers/spi/spi-ti-qspi.c
+++ b/drivers/spi/spi-ti-qspi.c
@@ -411,6 +411,7 @@ static int ti_qspi_dma_xfer(struct ti_qspi *qspi, 
dma_addr_t dma_dst,
tx->callback = ti_qspi_dma_callback;
tx->callback_param = qspi;
cookie = tx->tx_submit(tx);
+   reinit_completion(>transfer_complete);
 
ret = dma_submit_error(cookie);
if (ret) {
-- 
2.10.2



Applied "regulator: max77620: remove unused variable" to the regulator tree

2016-11-15 Thread Mark Brown
The patch

   regulator: max77620: remove unused variable

has been applied to the regulator tree at

   git://git.kernel.org/pub/scm/linux/kernel/git/broonie/regulator.git 

All being well this means that it will be integrated into the linux-next
tree (usually sometime in the next 24 hours) and sent to Linus during
the next merge window (or sooner if it is a bug fix), however if
problems are discovered then the patch may be dropped or reverted.  

You may get further e-mails resulting from automated or manual testing
and review of the tree, please engage with people reporting problems and
send followup patches addressing any issues that are reported if needed.

If any updates are required or you are submitting further changes they
should be sent as incremental updates against current git, existing
patches will not be replaced.

Please add any relevant lists and maintainers to the CCs when replying
to this mail.

Thanks,
Mark

>From 9a40cb0cb8b55ecfdcd3cec1381bcc46ec488588 Mon Sep 17 00:00:00 2001
From: Venkat Reddy Talla 
Date: Tue, 15 Nov 2016 22:51:20 +0530
Subject: [PATCH] regulator: max77620: remove unused variable

max77620_reuglator_pdata structure variable reg_idata
is not used anywhere in the regulator driver, so removing it.

Signed-off-by: Venkat Reddy Talla 
Signed-off-by: Mark Brown 
---
 drivers/regulator/max77620-regulator.c | 1 -
 1 file changed, 1 deletion(-)

diff --git a/drivers/regulator/max77620-regulator.c 
b/drivers/regulator/max77620-regulator.c
index a1b49a6d538f..c39a56b41901 100644
--- a/drivers/regulator/max77620-regulator.c
+++ b/drivers/regulator/max77620-regulator.c
@@ -73,7 +73,6 @@ struct max77620_regulator_info {
 };
 
 struct max77620_regulator_pdata {
-   struct regulator_init_data *reg_idata;
int active_fps_src;
int active_fps_pd_slot;
int active_fps_pu_slot;
-- 
2.10.2



Re: [PATCH] mm: add ZONE_DEVICE statistics to smaps

2016-11-15 Thread Christoph Hellwig
Hi Dan,

On Mon, Nov 14, 2016 at 07:14:22PM -0800, Dan Williams wrote:
> Wanted to get your opinion on this given your earlier concerns about
> the VM_DAX flag.
> 
> This instead lets an application know how much of a vma is backed by
> ZONE_DEVICE pages, but does not make any indications about the vma
> having DAX semantics or not.  I.e. it is possible that 'device' and
> 'device_huge' are non-zero *and* vma_is_dax() is false.  So, it is
> purely accounting the composition of the present pages in the vma.
> 
> Another option is to have something like 'shared_thp' just to account
> for file backed huge pages that dax can map.  However if ZONE_DEVICE
> is leaking into other use cases I think it makes sense to have it be a
> first class-citizen with respect to accounting alongside
> 'anonymous_thp'.

This counter sounds fine to me, it's a debug tool and not an obvious
abuse candidate like VM_DAX.  But I'll defer to the VM folks for a real
review.


Re: [PATCH] mm: add ZONE_DEVICE statistics to smaps

2016-11-15 Thread Christoph Hellwig
Hi Dan,

On Mon, Nov 14, 2016 at 07:14:22PM -0800, Dan Williams wrote:
> Wanted to get your opinion on this given your earlier concerns about
> the VM_DAX flag.
> 
> This instead lets an application know how much of a vma is backed by
> ZONE_DEVICE pages, but does not make any indications about the vma
> having DAX semantics or not.  I.e. it is possible that 'device' and
> 'device_huge' are non-zero *and* vma_is_dax() is false.  So, it is
> purely accounting the composition of the present pages in the vma.
> 
> Another option is to have something like 'shared_thp' just to account
> for file backed huge pages that dax can map.  However if ZONE_DEVICE
> is leaking into other use cases I think it makes sense to have it be a
> first class-citizen with respect to accounting alongside
> 'anonymous_thp'.

This counter sounds fine to me, it's a debug tool and not an obvious
abuse candidate like VM_DAX.  But I'll defer to the VM folks for a real
review.


Re: [RFC/RFT][PATCH v3 0/5] Functional dependencies between devices

2016-11-15 Thread Lukas Wunner
On Thu, Sep 29, 2016 at 02:51:45AM +0200, Rafael J. Wysocki wrote:
> On Wednesday, September 28, 2016 01:42:20 PM Lukas Wunner wrote:
> > On Wed, Sep 28, 2016 at 02:33:21AM +0200, Rafael J. Wysocki wrote:
> > > I'm only a bit reluctant about advertising the usage of links between
> > > children and parents, because that doesn't look like the right tool for
> > > the purpose (as I said before, I'd prefer to add a device flag causing
> > > the parent driver to be probed before the child one if needed).
> > 
> > That wouldn't cover the unbinding of the child when the parent unbinds
> > though, so it would only be a subset of the functionality offered by
> > device links.
> > 
> > I actually don't know of a use case where driver presence is needed
> > between parent and child.  But the patches look like they should work
> > out of the box in such a scenario, so I was thinking, why forbid it?
> > Someone might just try that because they think it should obviously work,
> > and then they'll find out at runtime that it's forbidden.  That gives
> > us only a score of 5 in Rusty's API rating scheme.
> > 
> > However for consistency, if you do want to forbid it, I think it should
> > be forbidden for all ancestors of the device, not just the parent as v3
> > does it.  (Suspend/resume + shutdown ordering is already handled for
> > hierarchical dependencies, i.e. all ancestors.)
> 
> Well, there is a difference between allowing something to be done and
> documenting it as a good idea. :-)

I'm reworking the documentation and to address your concerns I have
now reformulated this paragraph as follows:

To prevent introduction of dependency loops into the graph, it is
verified upon device link addition that the supplier is not dependent
on the consumer or any children or consumers of the consumer.
(Call to device_is_dependent() from device_link_add().)  If that
constraint is violated, device_link_add() will return %NULL and
a WARNING will be logged.

Notably this also prevents addition of a device link from a parent
device to a child.  However the converse is allowed, i.e. a device link
from a child to a parent.  Since the driver core already guarantees
correct suspend/resume and shutdown ordering between parent and child,
such a device link only makes sense if a driver presence dependency is
needed on top of that.  In that case driver authors should weigh
carefully if a device link is the right tool for the purpose.
A more suitable approach might be to simply use deferred probing or
add a device flag causing the parent driver to be probed before the
child one.

If you'd prefer a different wording just shout.

Thanks,

Lukas


Re: [RFC/RFT][PATCH v3 0/5] Functional dependencies between devices

2016-11-15 Thread Lukas Wunner
On Thu, Sep 29, 2016 at 02:51:45AM +0200, Rafael J. Wysocki wrote:
> On Wednesday, September 28, 2016 01:42:20 PM Lukas Wunner wrote:
> > On Wed, Sep 28, 2016 at 02:33:21AM +0200, Rafael J. Wysocki wrote:
> > > I'm only a bit reluctant about advertising the usage of links between
> > > children and parents, because that doesn't look like the right tool for
> > > the purpose (as I said before, I'd prefer to add a device flag causing
> > > the parent driver to be probed before the child one if needed).
> > 
> > That wouldn't cover the unbinding of the child when the parent unbinds
> > though, so it would only be a subset of the functionality offered by
> > device links.
> > 
> > I actually don't know of a use case where driver presence is needed
> > between parent and child.  But the patches look like they should work
> > out of the box in such a scenario, so I was thinking, why forbid it?
> > Someone might just try that because they think it should obviously work,
> > and then they'll find out at runtime that it's forbidden.  That gives
> > us only a score of 5 in Rusty's API rating scheme.
> > 
> > However for consistency, if you do want to forbid it, I think it should
> > be forbidden for all ancestors of the device, not just the parent as v3
> > does it.  (Suspend/resume + shutdown ordering is already handled for
> > hierarchical dependencies, i.e. all ancestors.)
> 
> Well, there is a difference between allowing something to be done and
> documenting it as a good idea. :-)

I'm reworking the documentation and to address your concerns I have
now reformulated this paragraph as follows:

To prevent introduction of dependency loops into the graph, it is
verified upon device link addition that the supplier is not dependent
on the consumer or any children or consumers of the consumer.
(Call to device_is_dependent() from device_link_add().)  If that
constraint is violated, device_link_add() will return %NULL and
a WARNING will be logged.

Notably this also prevents addition of a device link from a parent
device to a child.  However the converse is allowed, i.e. a device link
from a child to a parent.  Since the driver core already guarantees
correct suspend/resume and shutdown ordering between parent and child,
such a device link only makes sense if a driver presence dependency is
needed on top of that.  In that case driver authors should weigh
carefully if a device link is the right tool for the purpose.
A more suitable approach might be to simply use deferred probing or
add a device flag causing the parent driver to be probed before the
child one.

If you'd prefer a different wording just shout.

Thanks,

Lukas


Re: [PATCH V2 1/2] pinctrl: tegra: Add DT binding for io pads control

2016-11-15 Thread Jon Hunter

On 09/11/16 13:06, Laxman Dewangan wrote:
> NVIDIA Tegra124 and later SoCs support the multi-voltage level and
> low power state of some of its IO pads. The IO pads can work in
> the voltage of the 1.8V and 3.3V of IO voltage from IO power rail
> sources. When IO interfaces are not used then IO pads can be
> configure in low power state to reduce the power consumption from
> that IO pads.
> 
> On Tegra124, the voltage level of IO power rail source is auto
> detected by hardware(SoC) and hence it is only require to configure
> in low power mode if IO pads are not used.
> 
> On T210 onwards, the auto-detection of voltage level from IO power
> rail is removed from SoC and hence SW need to configure the PMC
> register explicitly to set proper voltage in IO pads based on
> IO rail power source voltage.
> 
> Add DT binding document for detailing the DT properties for
> configuring IO pads voltage levels and its power state.
> 
> Signed-off-by: Laxman Dewangan 
> 
> ---
> Changes from V1:
>  The DT binding document is modified to explain the regulator handle
>  for different IOs and how can it be passed from the DT.
> ---
>  .../bindings/pinctrl/nvidia,tegra-io-pad.txt   | 126 
> +
>  1 file changed, 126 insertions(+)
>  create mode 100644 
> Documentation/devicetree/bindings/pinctrl/nvidia,tegra-io-pad.txt
> 
> diff --git 
> a/Documentation/devicetree/bindings/pinctrl/nvidia,tegra-io-pad.txt 
> b/Documentation/devicetree/bindings/pinctrl/nvidia,tegra-io-pad.txt
> new file mode 100644
> index 000..6ca961f
> --- /dev/null
> +++ b/Documentation/devicetree/bindings/pinctrl/nvidia,tegra-io-pad.txt
> @@ -0,0 +1,126 @@
> +NVIDIA Tegra PMC IO pad controller
> +
> +NVIDIA Tegra124 and later SoCs support the multi-voltage level and
> +low power state of some of its IO pads. When IO interface are not
> +used then IO pads can be configure in low power state to reduce
> +the power from that IO pads. The IO pads can work in the voltage
> +of the 1.8V and 3.3V of IO voltage from power rail sources.

The last sentence is a bit unclear and does not sound correct. I am not
sure if you are missing the word 'range' somewhere or if you are trying
to say it must be either 1.8V or 3.3V. Looks like you have the same
sentence on the changelog too.

Cheers
Jon

-- 
nvpublic


Re: [PATCH V2 1/2] pinctrl: tegra: Add DT binding for io pads control

2016-11-15 Thread Jon Hunter

On 09/11/16 13:06, Laxman Dewangan wrote:
> NVIDIA Tegra124 and later SoCs support the multi-voltage level and
> low power state of some of its IO pads. The IO pads can work in
> the voltage of the 1.8V and 3.3V of IO voltage from IO power rail
> sources. When IO interfaces are not used then IO pads can be
> configure in low power state to reduce the power consumption from
> that IO pads.
> 
> On Tegra124, the voltage level of IO power rail source is auto
> detected by hardware(SoC) and hence it is only require to configure
> in low power mode if IO pads are not used.
> 
> On T210 onwards, the auto-detection of voltage level from IO power
> rail is removed from SoC and hence SW need to configure the PMC
> register explicitly to set proper voltage in IO pads based on
> IO rail power source voltage.
> 
> Add DT binding document for detailing the DT properties for
> configuring IO pads voltage levels and its power state.
> 
> Signed-off-by: Laxman Dewangan 
> 
> ---
> Changes from V1:
>  The DT binding document is modified to explain the regulator handle
>  for different IOs and how can it be passed from the DT.
> ---
>  .../bindings/pinctrl/nvidia,tegra-io-pad.txt   | 126 
> +
>  1 file changed, 126 insertions(+)
>  create mode 100644 
> Documentation/devicetree/bindings/pinctrl/nvidia,tegra-io-pad.txt
> 
> diff --git 
> a/Documentation/devicetree/bindings/pinctrl/nvidia,tegra-io-pad.txt 
> b/Documentation/devicetree/bindings/pinctrl/nvidia,tegra-io-pad.txt
> new file mode 100644
> index 000..6ca961f
> --- /dev/null
> +++ b/Documentation/devicetree/bindings/pinctrl/nvidia,tegra-io-pad.txt
> @@ -0,0 +1,126 @@
> +NVIDIA Tegra PMC IO pad controller
> +
> +NVIDIA Tegra124 and later SoCs support the multi-voltage level and
> +low power state of some of its IO pads. When IO interface are not
> +used then IO pads can be configure in low power state to reduce
> +the power from that IO pads. The IO pads can work in the voltage
> +of the 1.8V and 3.3V of IO voltage from power rail sources.

The last sentence is a bit unclear and does not sound correct. I am not
sure if you are missing the word 'range' somewhere or if you are trying
to say it must be either 1.8V or 3.3V. Looks like you have the same
sentence on the changelog too.

Cheers
Jon

-- 
nvpublic


Re: [PATCH 05/29] fscrypt: Let fs select encryption index/tweak

2016-11-15 Thread Eric Biggers
On Sun, Nov 13, 2016 at 10:20:48PM +0100, Richard Weinberger wrote:
> From: David Gstir 
> 
> Avoid re-use of page index as tweak for AES-XTS when multiple parts of
> same page are encrypted. This will happen on multiple (partial) calls of
> fscrypt_encrypt_page on same page.
> page->index is only valid for writeback pages.
> 
> Signed-off-by: David Gstir 
> Signed-off-by: Richard Weinberger 
> ---
>  fs/crypto/crypto.c   | 11 +++
>  fs/ext4/inode.c  |  4 ++--
>  fs/ext4/page-io.c|  3 ++-
>  fs/f2fs/data.c   |  5 +++--
>  include/linux/fscrypto.h |  9 +
>  5 files changed, 19 insertions(+), 13 deletions(-)
> 
> diff --git a/fs/crypto/crypto.c b/fs/crypto/crypto.c
> index f5c5e84ea9db..b6029785714c 100644
> --- a/fs/crypto/crypto.c
> +++ b/fs/crypto/crypto.c
> @@ -218,6 +218,8 @@ static struct page *alloc_bounce_page(struct fscrypt_ctx 
> *ctx, gfp_t gfp_flags)
>   * @plaintext_page:   The page to encrypt. Must be locked.
>   * @plaintext_len:Length of plaintext within page
>   * @plaintext_offset: Offset of plaintext within page
> + * @index:Index for encryption. This is mainly the page index, 
> but
> + *but might be different for multiple calls on same page.

Index reuse (IV reuse) has implications for confidentiality of the encrypted
data.  Really the index *MUST* not be reused unless there is no alternative.
The comment should express this, not just suggest that the index "might" be
different.

>   * @gfp_flags:The gfp flag for memory allocation
>   *
>   * Encrypts plaintext_page using the ctx encryption context. If
> @@ -235,7 +237,7 @@ struct page *fscrypt_encrypt_page(const struct inode 
> *inode,
>   struct page *plaintext_page,
>   unsigned int plaintext_len,
>   unsigned int plaintext_offset,
> - gfp_t gfp_flags)
> + pgoff_t index, gfp_t gfp_flags)

Now that 'index' is no longer necessarily the page offset, perhaps it should
have type 'u64' instead of 'pgoff_t'?

Also, if the intent is just that the 'index' represent the data's offset in
filesystem blocks rather than in pages, then perhaps it should be documented as
such.  (This would be correct for ext4 and f2fs; they just happen to only
support encryption with block_size = PAGE_SIZE currently.)

Eric


Re: [PATCH 05/29] fscrypt: Let fs select encryption index/tweak

2016-11-15 Thread Eric Biggers
On Sun, Nov 13, 2016 at 10:20:48PM +0100, Richard Weinberger wrote:
> From: David Gstir 
> 
> Avoid re-use of page index as tweak for AES-XTS when multiple parts of
> same page are encrypted. This will happen on multiple (partial) calls of
> fscrypt_encrypt_page on same page.
> page->index is only valid for writeback pages.
> 
> Signed-off-by: David Gstir 
> Signed-off-by: Richard Weinberger 
> ---
>  fs/crypto/crypto.c   | 11 +++
>  fs/ext4/inode.c  |  4 ++--
>  fs/ext4/page-io.c|  3 ++-
>  fs/f2fs/data.c   |  5 +++--
>  include/linux/fscrypto.h |  9 +
>  5 files changed, 19 insertions(+), 13 deletions(-)
> 
> diff --git a/fs/crypto/crypto.c b/fs/crypto/crypto.c
> index f5c5e84ea9db..b6029785714c 100644
> --- a/fs/crypto/crypto.c
> +++ b/fs/crypto/crypto.c
> @@ -218,6 +218,8 @@ static struct page *alloc_bounce_page(struct fscrypt_ctx 
> *ctx, gfp_t gfp_flags)
>   * @plaintext_page:   The page to encrypt. Must be locked.
>   * @plaintext_len:Length of plaintext within page
>   * @plaintext_offset: Offset of plaintext within page
> + * @index:Index for encryption. This is mainly the page index, 
> but
> + *but might be different for multiple calls on same page.

Index reuse (IV reuse) has implications for confidentiality of the encrypted
data.  Really the index *MUST* not be reused unless there is no alternative.
The comment should express this, not just suggest that the index "might" be
different.

>   * @gfp_flags:The gfp flag for memory allocation
>   *
>   * Encrypts plaintext_page using the ctx encryption context. If
> @@ -235,7 +237,7 @@ struct page *fscrypt_encrypt_page(const struct inode 
> *inode,
>   struct page *plaintext_page,
>   unsigned int plaintext_len,
>   unsigned int plaintext_offset,
> - gfp_t gfp_flags)
> + pgoff_t index, gfp_t gfp_flags)

Now that 'index' is no longer necessarily the page offset, perhaps it should
have type 'u64' instead of 'pgoff_t'?

Also, if the intent is just that the 'index' represent the data's offset in
filesystem blocks rather than in pages, then perhaps it should be documented as
such.  (This would be correct for ext4 and f2fs; they just happen to only
support encryption with block_size = PAGE_SIZE currently.)

Eric


[PATCH] perf/x86/uncore: remove event_list for snb client uncore IMC

2016-11-15 Thread kan . liang
From: Kan Liang 

A BUG was found by perf_fuzzer after enabled KASAN.
[  205.748005] BUG: KASAN: slab-out-of-bounds in
snb_uncore_imc_event_del+0x6c/0xa0 at addr 8800caa43768
[  205.758324] Read of size 8 by task perf_fuzzer/6618
[  205.763589] CPU: 0 PID: 6618 Comm: perf_fuzzer Not tainted 4.9.0-rc5
 #4
[  205.770721] Hardware name: LENOVO 10AM000AUS/SHARKBAY, BIOS
FBKT72AUS 01/26/2014
[  205.778689]  8800c3c479b8 816bb796 88011ec00600
8800caa43580
[  205.786759]  8800c3c479e0 812fb961 8800c3c47a78
8800caa43580
[  205.794850]  8800caa43580 8800c3c47a68 812fbbd8
8800c3c47a28
[  205.802911] Call Trace:
[  205.805559]  [] dump_stack+0x63/0x8d
[  205.811135]  [] kasan_object_err+0x21/0x70
[  205.817267]  [] kasan_report_error+0x1d8/0x4c0
[  205.823752]  [] ? __lock_is_held+0x75/0xc0
[  205.829868]  [] ?
snb_uncore_imc_read_counter+0x42/0x50
[  205.837198]  [] ?
uncore_perf_event_update+0xe2/0x160
[  205.844337]  [] kasan_report+0x39/0x40
[  205.850085]  [] ?
snb_uncore_imc_event_del+0x6c/0xa0

It's caused by accessing box->event_list.

For client IMC, there is no generic counters. It defines its own fixed
free running counters. So event_list and n_events are unused. They can
be removed safely.

Reported-by: Vince Weaver 
Tested-by: Vince Weaver 
Signed-off-by: Kan Liang 
---
 arch/x86/events/intel/uncore_snb.c | 12 
 1 file changed, 12 deletions(-)

diff --git a/arch/x86/events/intel/uncore_snb.c 
b/arch/x86/events/intel/uncore_snb.c
index 81195cc..a3dcc12 100644
--- a/arch/x86/events/intel/uncore_snb.c
+++ b/arch/x86/events/intel/uncore_snb.c
@@ -490,24 +490,12 @@ static int snb_uncore_imc_event_add(struct perf_event 
*event, int flags)
 
snb_uncore_imc_event_start(event, 0);
 
-   box->n_events++;
-
return 0;
 }
 
 static void snb_uncore_imc_event_del(struct perf_event *event, int flags)
 {
-   struct intel_uncore_box *box = uncore_event_to_box(event);
-   int i;
-
snb_uncore_imc_event_stop(event, PERF_EF_UPDATE);
-
-   for (i = 0; i < box->n_events; i++) {
-   if (event == box->event_list[i]) {
-   --box->n_events;
-   break;
-   }
-   }
 }
 
 int snb_pci2phy_map_init(int devid)
-- 
2.5.5



[PATCH] perf/x86/uncore: remove event_list for snb client uncore IMC

2016-11-15 Thread kan . liang
From: Kan Liang 

A BUG was found by perf_fuzzer after enabled KASAN.
[  205.748005] BUG: KASAN: slab-out-of-bounds in
snb_uncore_imc_event_del+0x6c/0xa0 at addr 8800caa43768
[  205.758324] Read of size 8 by task perf_fuzzer/6618
[  205.763589] CPU: 0 PID: 6618 Comm: perf_fuzzer Not tainted 4.9.0-rc5
 #4
[  205.770721] Hardware name: LENOVO 10AM000AUS/SHARKBAY, BIOS
FBKT72AUS 01/26/2014
[  205.778689]  8800c3c479b8 816bb796 88011ec00600
8800caa43580
[  205.786759]  8800c3c479e0 812fb961 8800c3c47a78
8800caa43580
[  205.794850]  8800caa43580 8800c3c47a68 812fbbd8
8800c3c47a28
[  205.802911] Call Trace:
[  205.805559]  [] dump_stack+0x63/0x8d
[  205.811135]  [] kasan_object_err+0x21/0x70
[  205.817267]  [] kasan_report_error+0x1d8/0x4c0
[  205.823752]  [] ? __lock_is_held+0x75/0xc0
[  205.829868]  [] ?
snb_uncore_imc_read_counter+0x42/0x50
[  205.837198]  [] ?
uncore_perf_event_update+0xe2/0x160
[  205.844337]  [] kasan_report+0x39/0x40
[  205.850085]  [] ?
snb_uncore_imc_event_del+0x6c/0xa0

It's caused by accessing box->event_list.

For client IMC, there is no generic counters. It defines its own fixed
free running counters. So event_list and n_events are unused. They can
be removed safely.

Reported-by: Vince Weaver 
Tested-by: Vince Weaver 
Signed-off-by: Kan Liang 
---
 arch/x86/events/intel/uncore_snb.c | 12 
 1 file changed, 12 deletions(-)

diff --git a/arch/x86/events/intel/uncore_snb.c 
b/arch/x86/events/intel/uncore_snb.c
index 81195cc..a3dcc12 100644
--- a/arch/x86/events/intel/uncore_snb.c
+++ b/arch/x86/events/intel/uncore_snb.c
@@ -490,24 +490,12 @@ static int snb_uncore_imc_event_add(struct perf_event 
*event, int flags)
 
snb_uncore_imc_event_start(event, 0);
 
-   box->n_events++;
-
return 0;
 }
 
 static void snb_uncore_imc_event_del(struct perf_event *event, int flags)
 {
-   struct intel_uncore_box *box = uncore_event_to_box(event);
-   int i;
-
snb_uncore_imc_event_stop(event, PERF_EF_UPDATE);
-
-   for (i = 0; i < box->n_events; i++) {
-   if (event == box->event_list[i]) {
-   --box->n_events;
-   break;
-   }
-   }
 }
 
 int snb_pci2phy_map_init(int devid)
-- 
2.5.5



Re: [PATCHv2 5/6] arm64: Use __pa_symbol for _end

2016-11-15 Thread Catalin Marinas
On Mon, Nov 14, 2016 at 10:41:29AM -0800, Laura Abbott wrote:
> On 11/14/2016 10:19 AM, Catalin Marinas wrote:
> > On Thu, Nov 03, 2016 at 03:51:07PM +, Mark Rutland wrote:
> >> On Wed, Nov 02, 2016 at 05:56:42PM -0600, Laura Abbott wrote:
> >>> On 11/02/2016 04:52 PM, Mark Rutland wrote:
>  On Wed, Nov 02, 2016 at 03:00:53PM -0600, Laura Abbott wrote:
> >
> > __pa_symbol is technically the marco that should be used for kernel
> > symbols. Switch to this as a pre-requisite for DEBUG_VIRTUAL.
> 
>  Nit: s/marco/macro/
> 
>  I see there are some other uses of __pa() that look like they 
>  could/should be
>  __pa_symbol(), e.g. in mark_rodata_ro().
> 
>  I guess strictly speaking those need to be updated to? Or is there a 
>  reason
>  that we should not?
> >>>
> >>> If the concept of __pa_symbol is okay then yes I think all uses of __pa
> >>> should eventually be converted for consistency and debugging.
> >>
> >> I have no strong feelings either way about __pa_symbol(); I'm not clear on 
> >> what
> >> the purpose of __pa_symbol() is specifically, but I'm happy even if it's 
> >> just
> >> for consistency with other architectures.
> > 
> > At a quick grep, it seems to only be used by mips and x86 and a single
> > place in mm/memblock.c.
> > 
> > Since we haven't seen any issues on arm/arm64 without this macro, can we
> > not just continue to use __pa()?
> 
> Technically yes but if it's introduced it may be confusing why it's being
> used some places but not others.

As it currently stands, your patches introduce the first and only use of
__pa_symbol to arch/arm64. But I don't see the point, unless we replace
all of the other uses.

> Maybe the bounds in the debug virtual check should just be adjusted so
> we don't need __pa_symbol along with a nice fat comment explaining
> why. 

I'm fine with __pa_symbol use entirely from under arch/arm64. But if you
want to use __pa_symbol, I tried to change most (all?) places where
necessary, together with making virt_to_phys() only deal with the kernel
linear mapping. Not sure it looks cleaner, especially the
__va(__pa_symbol()) cases (we could replace the latter with another
macro and proper comment):

-8<--
diff --git a/arch/arm64/include/asm/kvm_mmu.h b/arch/arm64/include/asm/kvm_mmu.h
index a79b969c26fc..fa6c44ebb51f 100644
--- a/arch/arm64/include/asm/kvm_mmu.h
+++ b/arch/arm64/include/asm/kvm_mmu.h
@@ -47,7 +47,7 @@
  * If the page is in the bottom half, we have to use the top half. If
  * the page is in the top half, we have to use the bottom half:
  *
- * T = __virt_to_phys(__hyp_idmap_text_start)
+ * T = __pa_symbol(__hyp_idmap_text_start)
  * if (T & BIT(VA_BITS - 1))
  * HYP_VA_MIN = 0  //idmap in upper half
  * else
@@ -271,7 +271,7 @@ static inline void __kvm_flush_dcache_pud(pud_t pud)
kvm_flush_dcache_to_poc(page_address(page), PUD_SIZE);
 }
 
-#define kvm_virt_to_phys(x)__virt_to_phys((unsigned long)(x))
+#define kvm_virt_to_phys(x)__pa_symbol((unsigned long)(x))
 
 void kvm_set_way_flush(struct kvm_vcpu *vcpu);
 void kvm_toggle_cache(struct kvm_vcpu *vcpu, bool was_enabled);
diff --git a/arch/arm64/include/asm/memory.h b/arch/arm64/include/asm/memory.h
index eac3dbb7e313..e02f45e5ee1b 100644
--- a/arch/arm64/include/asm/memory.h
+++ b/arch/arm64/include/asm/memory.h
@@ -169,15 +169,22 @@ extern u64kimage_voffset;
  */
 #define __virt_to_phys_nodebug(x) ({   \
phys_addr_t __x = (phys_addr_t)(x); \
-   __x & BIT(VA_BITS - 1) ? (__x & ~PAGE_OFFSET) + PHYS_OFFSET :   \
-(__x - kimage_voffset); })
+   VM_BUG_ON(!(__x & BIT(VA_BITS - 1)));   \
+   ((__x & ~PAGE_OFFSET) + PHYS_OFFSET);   \
+})
+
+#define __pa_symbol_nodebug(x) ({  \
+   phys_addr_t __x = (phys_addr_t)(x); \
+   VM_BUG_ON(__x & BIT(VA_BITS - 1));  \
+   (__x - kimage_voffset); \
+})
 
 #ifdef CONFIG_DEBUG_VIRTUAL
 extern unsigned long __virt_to_phys(unsigned long x);
 extern unsigned long __phys_addr_symbol(unsigned long x);
 #else
 #define __virt_to_phys(x)  __virt_to_phys_nodebug(x)
-#define __phys_addr_symbol __pa
+#define __phys_addr_symbol(x)  __pa_symbol_nodebug(x)
 #endif
 
 #define __phys_to_virt(x)  ((unsigned long)((x) - PHYS_OFFSET) | 
PAGE_OFFSET)
@@ -210,7 +217,7 @@ static inline void *phys_to_virt(phys_addr_t x)
  * Drivers should NOT use these either.
  */
 #define __pa(x)__virt_to_phys((unsigned long)(x))
-#define __pa_symbol(x)  __phys_addr_symbol(RELOC_HIDE((unsigned long)(x), 0))
+#define __pa_symbol(x) __phys_addr_symbol(RELOC_HIDE((unsigned 
long)(x), 0))
 #define 

Re: [PATCHv2 5/6] arm64: Use __pa_symbol for _end

2016-11-15 Thread Catalin Marinas
On Mon, Nov 14, 2016 at 10:41:29AM -0800, Laura Abbott wrote:
> On 11/14/2016 10:19 AM, Catalin Marinas wrote:
> > On Thu, Nov 03, 2016 at 03:51:07PM +, Mark Rutland wrote:
> >> On Wed, Nov 02, 2016 at 05:56:42PM -0600, Laura Abbott wrote:
> >>> On 11/02/2016 04:52 PM, Mark Rutland wrote:
>  On Wed, Nov 02, 2016 at 03:00:53PM -0600, Laura Abbott wrote:
> >
> > __pa_symbol is technically the marco that should be used for kernel
> > symbols. Switch to this as a pre-requisite for DEBUG_VIRTUAL.
> 
>  Nit: s/marco/macro/
> 
>  I see there are some other uses of __pa() that look like they 
>  could/should be
>  __pa_symbol(), e.g. in mark_rodata_ro().
> 
>  I guess strictly speaking those need to be updated to? Or is there a 
>  reason
>  that we should not?
> >>>
> >>> If the concept of __pa_symbol is okay then yes I think all uses of __pa
> >>> should eventually be converted for consistency and debugging.
> >>
> >> I have no strong feelings either way about __pa_symbol(); I'm not clear on 
> >> what
> >> the purpose of __pa_symbol() is specifically, but I'm happy even if it's 
> >> just
> >> for consistency with other architectures.
> > 
> > At a quick grep, it seems to only be used by mips and x86 and a single
> > place in mm/memblock.c.
> > 
> > Since we haven't seen any issues on arm/arm64 without this macro, can we
> > not just continue to use __pa()?
> 
> Technically yes but if it's introduced it may be confusing why it's being
> used some places but not others.

As it currently stands, your patches introduce the first and only use of
__pa_symbol to arch/arm64. But I don't see the point, unless we replace
all of the other uses.

> Maybe the bounds in the debug virtual check should just be adjusted so
> we don't need __pa_symbol along with a nice fat comment explaining
> why. 

I'm fine with __pa_symbol use entirely from under arch/arm64. But if you
want to use __pa_symbol, I tried to change most (all?) places where
necessary, together with making virt_to_phys() only deal with the kernel
linear mapping. Not sure it looks cleaner, especially the
__va(__pa_symbol()) cases (we could replace the latter with another
macro and proper comment):

-8<--
diff --git a/arch/arm64/include/asm/kvm_mmu.h b/arch/arm64/include/asm/kvm_mmu.h
index a79b969c26fc..fa6c44ebb51f 100644
--- a/arch/arm64/include/asm/kvm_mmu.h
+++ b/arch/arm64/include/asm/kvm_mmu.h
@@ -47,7 +47,7 @@
  * If the page is in the bottom half, we have to use the top half. If
  * the page is in the top half, we have to use the bottom half:
  *
- * T = __virt_to_phys(__hyp_idmap_text_start)
+ * T = __pa_symbol(__hyp_idmap_text_start)
  * if (T & BIT(VA_BITS - 1))
  * HYP_VA_MIN = 0  //idmap in upper half
  * else
@@ -271,7 +271,7 @@ static inline void __kvm_flush_dcache_pud(pud_t pud)
kvm_flush_dcache_to_poc(page_address(page), PUD_SIZE);
 }
 
-#define kvm_virt_to_phys(x)__virt_to_phys((unsigned long)(x))
+#define kvm_virt_to_phys(x)__pa_symbol((unsigned long)(x))
 
 void kvm_set_way_flush(struct kvm_vcpu *vcpu);
 void kvm_toggle_cache(struct kvm_vcpu *vcpu, bool was_enabled);
diff --git a/arch/arm64/include/asm/memory.h b/arch/arm64/include/asm/memory.h
index eac3dbb7e313..e02f45e5ee1b 100644
--- a/arch/arm64/include/asm/memory.h
+++ b/arch/arm64/include/asm/memory.h
@@ -169,15 +169,22 @@ extern u64kimage_voffset;
  */
 #define __virt_to_phys_nodebug(x) ({   \
phys_addr_t __x = (phys_addr_t)(x); \
-   __x & BIT(VA_BITS - 1) ? (__x & ~PAGE_OFFSET) + PHYS_OFFSET :   \
-(__x - kimage_voffset); })
+   VM_BUG_ON(!(__x & BIT(VA_BITS - 1)));   \
+   ((__x & ~PAGE_OFFSET) + PHYS_OFFSET);   \
+})
+
+#define __pa_symbol_nodebug(x) ({  \
+   phys_addr_t __x = (phys_addr_t)(x); \
+   VM_BUG_ON(__x & BIT(VA_BITS - 1));  \
+   (__x - kimage_voffset); \
+})
 
 #ifdef CONFIG_DEBUG_VIRTUAL
 extern unsigned long __virt_to_phys(unsigned long x);
 extern unsigned long __phys_addr_symbol(unsigned long x);
 #else
 #define __virt_to_phys(x)  __virt_to_phys_nodebug(x)
-#define __phys_addr_symbol __pa
+#define __phys_addr_symbol(x)  __pa_symbol_nodebug(x)
 #endif
 
 #define __phys_to_virt(x)  ((unsigned long)((x) - PHYS_OFFSET) | 
PAGE_OFFSET)
@@ -210,7 +217,7 @@ static inline void *phys_to_virt(phys_addr_t x)
  * Drivers should NOT use these either.
  */
 #define __pa(x)__virt_to_phys((unsigned long)(x))
-#define __pa_symbol(x)  __phys_addr_symbol(RELOC_HIDE((unsigned long)(x), 0))
+#define __pa_symbol(x) __phys_addr_symbol(RELOC_HIDE((unsigned 
long)(x), 0))
 #define 

Re: [PATCH 03/29] fscrypt: Enable partial page encryption

2016-11-15 Thread Eric Biggers
On Sun, Nov 13, 2016 at 10:20:46PM +0100, Richard Weinberger wrote:
> From: David Gstir 
> 
> Not all filesystems work on full pages, thus we should allow them to
> hand partial pages to fscrypt for en/decryption.
> 
> Signed-off-by: David Gstir 
> Signed-off-by: Richard Weinberger 
> ---
>  fs/crypto/crypto.c   | 42 ++
>  fs/ext4/inode.c  |  6 --
>  fs/ext4/page-io.c|  2 +-
>  fs/f2fs/data.c   |  2 ++
>  include/linux/fscrypto.h | 16 +++-
>  5 files changed, 44 insertions(+), 24 deletions(-)
> 
> diff --git a/fs/crypto/crypto.c b/fs/crypto/crypto.c
> index 222a70520565..e170aa05011d 100644
> --- a/fs/crypto/crypto.c
> +++ b/fs/crypto/crypto.c
> @@ -149,6 +149,7 @@ typedef enum {
>  static int do_page_crypto(struct inode *inode,
>   fscrypt_direction_t rw, pgoff_t index,
>   struct page *src_page, struct page *dest_page,
> + unsigned int src_len, unsigned int src_offset,
>   gfp_t gfp_flags)

The naming of 'src_len' and 'src_offset', and 'plaintext_len' and
'plaintext_offset' below, is misleading because the length and offset actually
apply to the destination too.  Shouldn't they be 'len' and 'offset', or 'len'
and 'offs' like fscrypt_decrypt_page()?

I'm also a little concerned that users will mix up the src_len and src_offset
arguments and end up "encrypting" 0 bytes at offset PAGE_SIZE.  Adding a
'BUG_ON(len == 0)' may be appropriate.

>  /**
>   * fscypt_encrypt_page() - Encrypts a page
> - * @inode:  The inode for which the encryption should take place
> - * @plaintext_page: The page to encrypt. Must be locked.
> - * @gfp_flags:  The gfp flag for memory allocation
> + * @inode:The inode for which the encryption should take place
> + * @plaintext_page:   The page to encrypt. Must be locked.
> + * @plaintext_len:Length of plaintext within page
> + * @plaintext_offset: Offset of plaintext within page
> + * @gfp_flags:The gfp flag for memory allocation
>   *
>   * Encrypts plaintext_page using the ctx encryption context. If
>   * the filesystem supports it, encryption is performed in-place, otherwise a
> @@ -229,13 +232,17 @@ static struct page *alloc_bounce_page(struct 
> fscrypt_ctx *ctx, gfp_t gfp_flags)
>   * error value or NULL.
>   */
>  struct page *fscrypt_encrypt_page(struct inode *inode,
> - struct page *plaintext_page, gfp_t gfp_flags)
> + struct page *plaintext_page,
> + unsigned int plaintext_len,
> + unsigned int plaintext_offset,
> + gfp_t gfp_flags)
> +
>  {
>   struct fscrypt_ctx *ctx;
>   struct page *ciphertext_page = plaintext_page;
>   int err;
>  
> - BUG_ON(!PageLocked(plaintext_page));
> + BUG_ON(plaintext_len % FS_CRYPTO_BLOCK_SIZE != 0);

What is going on with PageLocked()?  Is it still a requirement?  If not the
function comment needs to be fixed.

> -int fscrypt_decrypt_page(struct inode *inode, struct page *page)
> +int fscrypt_decrypt_page(struct inode *inode, struct page *page,
> + unsigned int len, unsigned int offs)
>  {
> - BUG_ON(!PageLocked(page));
> -
> - return do_page_crypto(inode, FS_DECRYPT, page->index, page, page,
> + return do_page_crypto(inode, FS_DECRYPT, page->index, page, page, len, 
> offs,
>   GFP_NOFS);
>  }

Same with PageLocked().  Is it still a requirement?  If not the function comment
needs to be fixed.

Eric


Re: [PATCH 03/29] fscrypt: Enable partial page encryption

2016-11-15 Thread Eric Biggers
On Sun, Nov 13, 2016 at 10:20:46PM +0100, Richard Weinberger wrote:
> From: David Gstir 
> 
> Not all filesystems work on full pages, thus we should allow them to
> hand partial pages to fscrypt for en/decryption.
> 
> Signed-off-by: David Gstir 
> Signed-off-by: Richard Weinberger 
> ---
>  fs/crypto/crypto.c   | 42 ++
>  fs/ext4/inode.c  |  6 --
>  fs/ext4/page-io.c|  2 +-
>  fs/f2fs/data.c   |  2 ++
>  include/linux/fscrypto.h | 16 +++-
>  5 files changed, 44 insertions(+), 24 deletions(-)
> 
> diff --git a/fs/crypto/crypto.c b/fs/crypto/crypto.c
> index 222a70520565..e170aa05011d 100644
> --- a/fs/crypto/crypto.c
> +++ b/fs/crypto/crypto.c
> @@ -149,6 +149,7 @@ typedef enum {
>  static int do_page_crypto(struct inode *inode,
>   fscrypt_direction_t rw, pgoff_t index,
>   struct page *src_page, struct page *dest_page,
> + unsigned int src_len, unsigned int src_offset,
>   gfp_t gfp_flags)

The naming of 'src_len' and 'src_offset', and 'plaintext_len' and
'plaintext_offset' below, is misleading because the length and offset actually
apply to the destination too.  Shouldn't they be 'len' and 'offset', or 'len'
and 'offs' like fscrypt_decrypt_page()?

I'm also a little concerned that users will mix up the src_len and src_offset
arguments and end up "encrypting" 0 bytes at offset PAGE_SIZE.  Adding a
'BUG_ON(len == 0)' may be appropriate.

>  /**
>   * fscypt_encrypt_page() - Encrypts a page
> - * @inode:  The inode for which the encryption should take place
> - * @plaintext_page: The page to encrypt. Must be locked.
> - * @gfp_flags:  The gfp flag for memory allocation
> + * @inode:The inode for which the encryption should take place
> + * @plaintext_page:   The page to encrypt. Must be locked.
> + * @plaintext_len:Length of plaintext within page
> + * @plaintext_offset: Offset of plaintext within page
> + * @gfp_flags:The gfp flag for memory allocation
>   *
>   * Encrypts plaintext_page using the ctx encryption context. If
>   * the filesystem supports it, encryption is performed in-place, otherwise a
> @@ -229,13 +232,17 @@ static struct page *alloc_bounce_page(struct 
> fscrypt_ctx *ctx, gfp_t gfp_flags)
>   * error value or NULL.
>   */
>  struct page *fscrypt_encrypt_page(struct inode *inode,
> - struct page *plaintext_page, gfp_t gfp_flags)
> + struct page *plaintext_page,
> + unsigned int plaintext_len,
> + unsigned int plaintext_offset,
> + gfp_t gfp_flags)
> +
>  {
>   struct fscrypt_ctx *ctx;
>   struct page *ciphertext_page = plaintext_page;
>   int err;
>  
> - BUG_ON(!PageLocked(plaintext_page));
> + BUG_ON(plaintext_len % FS_CRYPTO_BLOCK_SIZE != 0);

What is going on with PageLocked()?  Is it still a requirement?  If not the
function comment needs to be fixed.

> -int fscrypt_decrypt_page(struct inode *inode, struct page *page)
> +int fscrypt_decrypt_page(struct inode *inode, struct page *page,
> + unsigned int len, unsigned int offs)
>  {
> - BUG_ON(!PageLocked(page));
> -
> - return do_page_crypto(inode, FS_DECRYPT, page->index, page, page,
> + return do_page_crypto(inode, FS_DECRYPT, page->index, page, page, len, 
> offs,
>   GFP_NOFS);
>  }

Same with PageLocked().  Is it still a requirement?  If not the function comment
needs to be fixed.

Eric


Re: [RFC PATCH v3 13/20] x86: DMA support for memory encryption

2016-11-15 Thread Tom Lendacky
On 11/15/2016 9:16 AM, Michael S. Tsirkin wrote:
> On Wed, Nov 09, 2016 at 06:37:23PM -0600, Tom Lendacky wrote:
>> Since DMA addresses will effectively look like 48-bit addresses when the
>> memory encryption mask is set, SWIOTLB is needed if the DMA mask of the
>> device performing the DMA does not support 48-bits. SWIOTLB will be
>> initialized to create un-encrypted bounce buffers for use by these devices.
>>
>> Signed-off-by: Tom Lendacky 
>> ---
>>  arch/x86/include/asm/dma-mapping.h |5 ++-
>>  arch/x86/include/asm/mem_encrypt.h |5 +++
>>  arch/x86/kernel/pci-dma.c  |   11 ---
>>  arch/x86/kernel/pci-nommu.c|2 +
>>  arch/x86/kernel/pci-swiotlb.c  |8 -
>>  arch/x86/mm/mem_encrypt.c  |   17 +++
>>  include/linux/swiotlb.h|1 +
>>  init/main.c|   13 
>>  lib/swiotlb.c  |   58 
>> +++-
>>  9 files changed, 103 insertions(+), 17 deletions(-)
>>
>> diff --git a/arch/x86/include/asm/dma-mapping.h 
>> b/arch/x86/include/asm/dma-mapping.h
>> index 4446162..c9cdcae 100644
>> --- a/arch/x86/include/asm/dma-mapping.h
>> +++ b/arch/x86/include/asm/dma-mapping.h

..SNIP...

>>  
>> +/*
>> + * If memory encryption is active, the DMA address for an encrypted page may
>> + * be beyond the range of the device. If bounce buffers are required be sure
>> + * that they are not on an encrypted page. This should be called before the
>> + * iotlb area is used.
> 
> Makes sense, but I think at least a dmesg warning here
> might be a good idea.

Good idea.  Should it be a warning when it is first being set up or
a warning the first time the bounce buffers need to be used.  Or maybe
both?

> 
> A boot flag that says "don't enable devices that don't support
> encryption" might be a good idea, too, since most people
> don't read dmesg output and won't notice the message.

I'll look into this. It might be something that can be checked as
part of the device setting its DMA mask or the first time a DMA
API is used if the device doesn't explicitly set its mask.

Thanks,
Tom

> 
> 


Re: [RFC PATCH v3 13/20] x86: DMA support for memory encryption

2016-11-15 Thread Tom Lendacky
On 11/15/2016 9:16 AM, Michael S. Tsirkin wrote:
> On Wed, Nov 09, 2016 at 06:37:23PM -0600, Tom Lendacky wrote:
>> Since DMA addresses will effectively look like 48-bit addresses when the
>> memory encryption mask is set, SWIOTLB is needed if the DMA mask of the
>> device performing the DMA does not support 48-bits. SWIOTLB will be
>> initialized to create un-encrypted bounce buffers for use by these devices.
>>
>> Signed-off-by: Tom Lendacky 
>> ---
>>  arch/x86/include/asm/dma-mapping.h |5 ++-
>>  arch/x86/include/asm/mem_encrypt.h |5 +++
>>  arch/x86/kernel/pci-dma.c  |   11 ---
>>  arch/x86/kernel/pci-nommu.c|2 +
>>  arch/x86/kernel/pci-swiotlb.c  |8 -
>>  arch/x86/mm/mem_encrypt.c  |   17 +++
>>  include/linux/swiotlb.h|1 +
>>  init/main.c|   13 
>>  lib/swiotlb.c  |   58 
>> +++-
>>  9 files changed, 103 insertions(+), 17 deletions(-)
>>
>> diff --git a/arch/x86/include/asm/dma-mapping.h 
>> b/arch/x86/include/asm/dma-mapping.h
>> index 4446162..c9cdcae 100644
>> --- a/arch/x86/include/asm/dma-mapping.h
>> +++ b/arch/x86/include/asm/dma-mapping.h

..SNIP...

>>  
>> +/*
>> + * If memory encryption is active, the DMA address for an encrypted page may
>> + * be beyond the range of the device. If bounce buffers are required be sure
>> + * that they are not on an encrypted page. This should be called before the
>> + * iotlb area is used.
> 
> Makes sense, but I think at least a dmesg warning here
> might be a good idea.

Good idea.  Should it be a warning when it is first being set up or
a warning the first time the bounce buffers need to be used.  Or maybe
both?

> 
> A boot flag that says "don't enable devices that don't support
> encryption" might be a good idea, too, since most people
> don't read dmesg output and won't notice the message.

I'll look into this. It might be something that can be checked as
part of the device setting its DMA mask or the first time a DMA
API is used if the device doesn't explicitly set its mask.

Thanks,
Tom

> 
> 


[PATCH] spi: spi-ti-qspi: reinit of completion variable

2016-11-15 Thread Prahlad V
completion variable should be reinitialized before reusing.

Signed-off-by: Prahlad V 
---
 drivers/spi/spi-ti-qspi.c | 1 +
 1 file changed, 1 insertion(+)

diff --git a/drivers/spi/spi-ti-qspi.c b/drivers/spi/spi-ti-qspi.c
index caeac66..ec6fb09 100644
--- a/drivers/spi/spi-ti-qspi.c
+++ b/drivers/spi/spi-ti-qspi.c
@@ -411,6 +411,7 @@ static int ti_qspi_dma_xfer(struct ti_qspi *qspi, 
dma_addr_t dma_dst,
tx->callback = ti_qspi_dma_callback;
tx->callback_param = qspi;
cookie = tx->tx_submit(tx);
+   reinit_completion(>transfer_complete);
 
ret = dma_submit_error(cookie);
if (ret) {
-- 
2.9.2.729.ga42d7b6



Re: [PATCH] lkdtm: Prevent the compiler from optimising lkdtm_CORRUPT_STACK()

2016-11-15 Thread Greg KH
On Tue, Nov 15, 2016 at 09:27:47AM -0800, Kees Cook wrote:
> On Mon, Nov 14, 2016 at 11:02 PM, Michael Ellerman  
> wrote:
> > At least on powerpc with GCC 6, the compiler is smart enough to optimise
> > lkdtm_CORRUPT_STACK() into an empty function that just returns.
> 
> Sneaky. Thanks!
> 
> > If we print the buffer after we've written to it that prevents the
> > compiler from optimising away data and the memset().
> >
> > Signed-off-by: Michael Ellerman 
> 
> Acked-by: Kees Cook 
> 
> Greg, can you add this to the drivers/misc?

Now added, thanks.

greg k-h


[PATCH] spi: spi-ti-qspi: reinit of completion variable

2016-11-15 Thread Prahlad V
completion variable should be reinitialized before reusing.

Signed-off-by: Prahlad V 
---
 drivers/spi/spi-ti-qspi.c | 1 +
 1 file changed, 1 insertion(+)

diff --git a/drivers/spi/spi-ti-qspi.c b/drivers/spi/spi-ti-qspi.c
index caeac66..ec6fb09 100644
--- a/drivers/spi/spi-ti-qspi.c
+++ b/drivers/spi/spi-ti-qspi.c
@@ -411,6 +411,7 @@ static int ti_qspi_dma_xfer(struct ti_qspi *qspi, 
dma_addr_t dma_dst,
tx->callback = ti_qspi_dma_callback;
tx->callback_param = qspi;
cookie = tx->tx_submit(tx);
+   reinit_completion(>transfer_complete);
 
ret = dma_submit_error(cookie);
if (ret) {
-- 
2.9.2.729.ga42d7b6



Re: [PATCH] lkdtm: Prevent the compiler from optimising lkdtm_CORRUPT_STACK()

2016-11-15 Thread Greg KH
On Tue, Nov 15, 2016 at 09:27:47AM -0800, Kees Cook wrote:
> On Mon, Nov 14, 2016 at 11:02 PM, Michael Ellerman  
> wrote:
> > At least on powerpc with GCC 6, the compiler is smart enough to optimise
> > lkdtm_CORRUPT_STACK() into an empty function that just returns.
> 
> Sneaky. Thanks!
> 
> > If we print the buffer after we've written to it that prevents the
> > compiler from optimising away data and the memset().
> >
> > Signed-off-by: Michael Ellerman 
> 
> Acked-by: Kees Cook 
> 
> Greg, can you add this to the drivers/misc?

Now added, thanks.

greg k-h


[PATCH V2 for-next 03/11] IB/hns: Optimize the logic of allocating memory using APIs

2016-11-15 Thread Salil Mehta
From: "Wei Hu (Xavier)" 

This patch modified the logic of allocating memory using APIs in
hns RoCE driver. We used kcalloc instead of kmalloc_array and
bitmap_zero. And When kcalloc failed, call vzalloc to alloc
memory.

Signed-off-by: Wei Hu (Xavier) 
Signed-off-by: Ping Zhang 
Signed-off-by: Salil Mehta  
---
 drivers/infiniband/hw/hns/hns_roce_mr.c |   15 ---
 1 file changed, 8 insertions(+), 7 deletions(-)

diff --git a/drivers/infiniband/hw/hns/hns_roce_mr.c 
b/drivers/infiniband/hw/hns/hns_roce_mr.c
index fb87883..d3dfb5f 100644
--- a/drivers/infiniband/hw/hns/hns_roce_mr.c
+++ b/drivers/infiniband/hw/hns/hns_roce_mr.c
@@ -137,11 +137,12 @@ static int hns_roce_buddy_init(struct hns_roce_buddy 
*buddy, int max_order)
 
for (i = 0; i <= buddy->max_order; ++i) {
s = BITS_TO_LONGS(1 << (buddy->max_order - i));
-   buddy->bits[i] = kmalloc_array(s, sizeof(long), GFP_KERNEL);
-   if (!buddy->bits[i])
-   goto err_out_free;
-
-   bitmap_zero(buddy->bits[i], 1 << (buddy->max_order - i));
+   buddy->bits[i] = kcalloc(s, sizeof(long), GFP_KERNEL);
+   if (!buddy->bits[i]) {
+   buddy->bits[i] = vzalloc(s * sizeof(long));
+   if (!buddy->bits[i])
+   goto err_out_free;
+   }
}
 
set_bit(0, buddy->bits[buddy->max_order]);
@@ -151,7 +152,7 @@ static int hns_roce_buddy_init(struct hns_roce_buddy 
*buddy, int max_order)
 
 err_out_free:
for (i = 0; i <= buddy->max_order; ++i)
-   kfree(buddy->bits[i]);
+   kvfree(buddy->bits[i]);
 
 err_out:
kfree(buddy->bits);
@@ -164,7 +165,7 @@ static void hns_roce_buddy_cleanup(struct hns_roce_buddy 
*buddy)
int i;
 
for (i = 0; i <= buddy->max_order; ++i)
-   kfree(buddy->bits[i]);
+   kvfree(buddy->bits[i]);
 
kfree(buddy->bits);
kfree(buddy->num_free);
-- 
1.7.9.5




[PATCH V2 for-next 08/11] IB/hns: Modify query info named port_num when querying RC QP

2016-11-15 Thread Salil Mehta
From: "Wei Hu (Xavier)" 

This patch modified the output query info qp_attr->port_num
to fix bug in hip06.

Signed-off-by: Wei Hu (Xavier) 
Signed-off-by: Salil Mehta  
---
 drivers/infiniband/hw/hns/hns_roce_hw_v1.c |4 +---
 1 file changed, 1 insertion(+), 3 deletions(-)

diff --git a/drivers/infiniband/hw/hns/hns_roce_hw_v1.c 
b/drivers/infiniband/hw/hns/hns_roce_hw_v1.c
index 509ea75..34b7898 100644
--- a/drivers/infiniband/hw/hns/hns_roce_hw_v1.c
+++ b/drivers/infiniband/hw/hns/hns_roce_hw_v1.c
@@ -2857,9 +2857,7 @@ static int hns_roce_v1_q_qp(struct ib_qp *ibqp, struct 
ib_qp_attr *qp_attr,
qp_attr->pkey_index = roce_get_field(context->qpc_bytes_12,
  QP_CONTEXT_QPC_BYTES_12_P_KEY_INDEX_M,
  QP_CONTEXT_QPC_BYTES_12_P_KEY_INDEX_S);
-   qp_attr->port_num = (u8)roce_get_field(context->qpc_bytes_156,
-QP_CONTEXT_QPC_BYTES_156_PORT_NUM_M,
-QP_CONTEXT_QPC_BYTES_156_PORT_NUM_S) + 1;
+   qp_attr->port_num = hr_qp->port + 1;
qp_attr->sq_draining = 0;
qp_attr->max_rd_atomic = roce_get_field(context->qpc_bytes_156,
 QP_CONTEXT_QPC_BYTES_156_INITIATOR_DEPTH_M,
-- 
1.7.9.5




[PATCH V2 for-next 03/11] IB/hns: Optimize the logic of allocating memory using APIs

2016-11-15 Thread Salil Mehta
From: "Wei Hu (Xavier)" 

This patch modified the logic of allocating memory using APIs in
hns RoCE driver. We used kcalloc instead of kmalloc_array and
bitmap_zero. And When kcalloc failed, call vzalloc to alloc
memory.

Signed-off-by: Wei Hu (Xavier) 
Signed-off-by: Ping Zhang 
Signed-off-by: Salil Mehta  
---
 drivers/infiniband/hw/hns/hns_roce_mr.c |   15 ---
 1 file changed, 8 insertions(+), 7 deletions(-)

diff --git a/drivers/infiniband/hw/hns/hns_roce_mr.c 
b/drivers/infiniband/hw/hns/hns_roce_mr.c
index fb87883..d3dfb5f 100644
--- a/drivers/infiniband/hw/hns/hns_roce_mr.c
+++ b/drivers/infiniband/hw/hns/hns_roce_mr.c
@@ -137,11 +137,12 @@ static int hns_roce_buddy_init(struct hns_roce_buddy 
*buddy, int max_order)
 
for (i = 0; i <= buddy->max_order; ++i) {
s = BITS_TO_LONGS(1 << (buddy->max_order - i));
-   buddy->bits[i] = kmalloc_array(s, sizeof(long), GFP_KERNEL);
-   if (!buddy->bits[i])
-   goto err_out_free;
-
-   bitmap_zero(buddy->bits[i], 1 << (buddy->max_order - i));
+   buddy->bits[i] = kcalloc(s, sizeof(long), GFP_KERNEL);
+   if (!buddy->bits[i]) {
+   buddy->bits[i] = vzalloc(s * sizeof(long));
+   if (!buddy->bits[i])
+   goto err_out_free;
+   }
}
 
set_bit(0, buddy->bits[buddy->max_order]);
@@ -151,7 +152,7 @@ static int hns_roce_buddy_init(struct hns_roce_buddy 
*buddy, int max_order)
 
 err_out_free:
for (i = 0; i <= buddy->max_order; ++i)
-   kfree(buddy->bits[i]);
+   kvfree(buddy->bits[i]);
 
 err_out:
kfree(buddy->bits);
@@ -164,7 +165,7 @@ static void hns_roce_buddy_cleanup(struct hns_roce_buddy 
*buddy)
int i;
 
for (i = 0; i <= buddy->max_order; ++i)
-   kfree(buddy->bits[i]);
+   kvfree(buddy->bits[i]);
 
kfree(buddy->bits);
kfree(buddy->num_free);
-- 
1.7.9.5




[PATCH V2 for-next 08/11] IB/hns: Modify query info named port_num when querying RC QP

2016-11-15 Thread Salil Mehta
From: "Wei Hu (Xavier)" 

This patch modified the output query info qp_attr->port_num
to fix bug in hip06.

Signed-off-by: Wei Hu (Xavier) 
Signed-off-by: Salil Mehta  
---
 drivers/infiniband/hw/hns/hns_roce_hw_v1.c |4 +---
 1 file changed, 1 insertion(+), 3 deletions(-)

diff --git a/drivers/infiniband/hw/hns/hns_roce_hw_v1.c 
b/drivers/infiniband/hw/hns/hns_roce_hw_v1.c
index 509ea75..34b7898 100644
--- a/drivers/infiniband/hw/hns/hns_roce_hw_v1.c
+++ b/drivers/infiniband/hw/hns/hns_roce_hw_v1.c
@@ -2857,9 +2857,7 @@ static int hns_roce_v1_q_qp(struct ib_qp *ibqp, struct 
ib_qp_attr *qp_attr,
qp_attr->pkey_index = roce_get_field(context->qpc_bytes_12,
  QP_CONTEXT_QPC_BYTES_12_P_KEY_INDEX_M,
  QP_CONTEXT_QPC_BYTES_12_P_KEY_INDEX_S);
-   qp_attr->port_num = (u8)roce_get_field(context->qpc_bytes_156,
-QP_CONTEXT_QPC_BYTES_156_PORT_NUM_M,
-QP_CONTEXT_QPC_BYTES_156_PORT_NUM_S) + 1;
+   qp_attr->port_num = hr_qp->port + 1;
qp_attr->sq_draining = 0;
qp_attr->max_rd_atomic = roce_get_field(context->qpc_bytes_156,
 QP_CONTEXT_QPC_BYTES_156_INITIATOR_DEPTH_M,
-- 
1.7.9.5




Re: [PATCH 02/29] fscrypt: Allow fscrypt_decrypt_page() to function with non-writeback pages

2016-11-15 Thread Eric Biggers
On Sun, Nov 13, 2016 at 10:20:45PM +0100, Richard Weinberger wrote:
>  /**
>   * f2crypt_decrypt_page() - Decrypts a page in-place
> - * @page: The page to decrypt. Must be locked.
> + * @inode: The encrypted inode to decrypt.
> + * @page:  The page to decrypt. Must be locked.

Strictly speaking, it's not the inode itself being decrypted, but rather the
data associated with it.  Could this be better expressed as something like
"The inode to which the page belongs"?

Eric


Re: [PATCH 02/29] fscrypt: Allow fscrypt_decrypt_page() to function with non-writeback pages

2016-11-15 Thread Eric Biggers
On Sun, Nov 13, 2016 at 10:20:45PM +0100, Richard Weinberger wrote:
>  /**
>   * f2crypt_decrypt_page() - Decrypts a page in-place
> - * @page: The page to decrypt. Must be locked.
> + * @inode: The encrypted inode to decrypt.
> + * @page:  The page to decrypt. Must be locked.

Strictly speaking, it's not the inode itself being decrypted, but rather the
data associated with it.  Could this be better expressed as something like
"The inode to which the page belongs"?

Eric


Re: [PATCHSET 0/7] perf sched: Introduce timehist command, again (v1)

2016-11-15 Thread David Ahern
On 11/15/16 8:32 AM, Namhyung Kim wrote:
> So, are you ok with the name 'wait time'?  My thinking is that they
> are all waiting for something - timer, resource or cpu.

sure. 


Re: [PATCHSET 0/7] perf sched: Introduce timehist command, again (v1)

2016-11-15 Thread David Ahern
On 11/15/16 8:32 AM, Namhyung Kim wrote:
> So, are you ok with the name 'wait time'?  My thinking is that they
> are all waiting for something - timer, resource or cpu.

sure. 


Re: [PATCH] kasan: support use-after-scope detection

2016-11-15 Thread Andrey Ryabinin


On 11/15/2016 07:07 PM, Dmitry Vyukov wrote:
> Gcc revision 241896 implements use-after-scope detection.
> Will be available in gcc 7. Support it in KASAN.
> 
> Gcc emits 2 new callbacks to poison/unpoison large stack
> objects when they go in/out of scope.
> Implement the callbacks and add a test.
> 
> Signed-off-by: Dmitry Vyukov 
> Cc: aryabi...@virtuozzo.com
> Cc: gli...@google.com
> Cc: a...@linux-foundation.org
> Cc: kasan-...@googlegroups.com
> Cc: linux...@kvack.org
> Cc: linux-kernel@vger.kernel.org
> 
> ---

Acked-by: Andrey Ryabinin 



Re: [PATCH] kasan: support use-after-scope detection

2016-11-15 Thread Andrey Ryabinin


On 11/15/2016 07:07 PM, Dmitry Vyukov wrote:
> Gcc revision 241896 implements use-after-scope detection.
> Will be available in gcc 7. Support it in KASAN.
> 
> Gcc emits 2 new callbacks to poison/unpoison large stack
> objects when they go in/out of scope.
> Implement the callbacks and add a test.
> 
> Signed-off-by: Dmitry Vyukov 
> Cc: aryabi...@virtuozzo.com
> Cc: gli...@google.com
> Cc: a...@linux-foundation.org
> Cc: kasan-...@googlegroups.com
> Cc: linux...@kvack.org
> Cc: linux-kernel@vger.kernel.org
> 
> ---

Acked-by: Andrey Ryabinin 



Re: [PATCH v2 2/2] arm64: dts: Add ARM PMU node for exynos7

2016-11-15 Thread Krzysztof Kozlowski
On Sat, Nov 12, 2016 at 11:33:18AM -0300, Javier Martinez Canillas wrote:
> Hello Alim,
> 
> On 11/12/2016 07:17 AM, Alim Akhtar wrote:
> > This patch adds ARM Performance Monitor Unit dt node for exynos7.
> > PMU provides various statistics on the operation of the CPU and
> > memory system at runtime, which are very useful when debugging or
> > profiling code. This enables the same.
> > 
> > Signed-off-by: Alim Akhtar 
> > ---
> >  arch/arm64/boot/dts/exynos/exynos7.dtsi |   10 ++
> >  1 file changed, 10 insertions(+)
> > 
> > Changes since v1:
> > * Added "interrupt-affinity" property as per Robin Murphy review comment.
> > 
> > diff --git a/arch/arm64/boot/dts/exynos/exynos7.dtsi 
> > b/arch/arm64/boot/dts/exynos/exynos7.dtsi
> > index 396ffb9..09e7a05b 100644
> > --- a/arch/arm64/boot/dts/exynos/exynos7.dtsi
> > +++ b/arch/arm64/boot/dts/exynos/exynos7.dtsi
> > @@ -472,6 +472,16 @@
> > status = "disabled";
> > };
> >  
> > +   arm-pmu {
> > +   compatible = "arm,cortex-a57-pmu", "arm,armv8-pmuv3";
> > +   interrupts = ,
> > +,
> > +,
> > +;
> > +   interrupt-affinity = <_atlas0>, <_atlas1>,
> > +<_atlas2>, <_atlas3>;
> > +   };
> > +
> 
> I didn't double check if these are the correct IRQs because I don't have
> an Exynos7 user manual, but the change looks good to me.
> 
> Reviewed-by: Javier Martinez Canillas 

Squashed 1/2 with this and applied. Thanks!

Best regards,
Krzysztof


Re: [PATCH] staging: slicoss: fix different address space warnings

2016-11-15 Thread Greg KH
On Tue, Nov 15, 2016 at 06:19:37PM +0100, Sergio Paracuellos wrote:
> This patch fix the following sparse warnings in slicoss driver:
> warning: incorrect type in assignment (different address spaces)
> 
> Signed-off-by: Sergio Paracuellos 
> ---
>  drivers/staging/slicoss/slic.h|  7 
>  drivers/staging/slicoss/slicoss.c | 83 
> ++-
>  2 files changed, 55 insertions(+), 35 deletions(-)
> 
> diff --git a/drivers/staging/slicoss/slic.h b/drivers/staging/slicoss/slic.h
> index 420546d..f76c0cd 100644
> --- a/drivers/staging/slicoss/slic.h
> +++ b/drivers/staging/slicoss/slic.h
> @@ -540,6 +540,13 @@ static inline void slic_flush_write(struct adapter 
> *adapter)
>   ioread32(adapter->regs + SLIC_REG_HOSTID);
>  }
>  
> +#define IOMEM_GET_FIELDADDR(base, member)\
> +({   \
> + char __iomem *_base = (char __iomem *)base; \
> + _base += offsetof(typeof(*base), member);   \
> + (void __iomem *)_base;  \
> +})

Really?  Why?  Shouldn't you be marking base as __iomem all the time?
That way you don't need this mess of a cast.

thanks,

greg k-h


Re: [PATCH v2 2/2] arm64: dts: Add ARM PMU node for exynos7

2016-11-15 Thread Krzysztof Kozlowski
On Sat, Nov 12, 2016 at 11:33:18AM -0300, Javier Martinez Canillas wrote:
> Hello Alim,
> 
> On 11/12/2016 07:17 AM, Alim Akhtar wrote:
> > This patch adds ARM Performance Monitor Unit dt node for exynos7.
> > PMU provides various statistics on the operation of the CPU and
> > memory system at runtime, which are very useful when debugging or
> > profiling code. This enables the same.
> > 
> > Signed-off-by: Alim Akhtar 
> > ---
> >  arch/arm64/boot/dts/exynos/exynos7.dtsi |   10 ++
> >  1 file changed, 10 insertions(+)
> > 
> > Changes since v1:
> > * Added "interrupt-affinity" property as per Robin Murphy review comment.
> > 
> > diff --git a/arch/arm64/boot/dts/exynos/exynos7.dtsi 
> > b/arch/arm64/boot/dts/exynos/exynos7.dtsi
> > index 396ffb9..09e7a05b 100644
> > --- a/arch/arm64/boot/dts/exynos/exynos7.dtsi
> > +++ b/arch/arm64/boot/dts/exynos/exynos7.dtsi
> > @@ -472,6 +472,16 @@
> > status = "disabled";
> > };
> >  
> > +   arm-pmu {
> > +   compatible = "arm,cortex-a57-pmu", "arm,armv8-pmuv3";
> > +   interrupts = ,
> > +,
> > +,
> > +;
> > +   interrupt-affinity = <_atlas0>, <_atlas1>,
> > +<_atlas2>, <_atlas3>;
> > +   };
> > +
> 
> I didn't double check if these are the correct IRQs because I don't have
> an Exynos7 user manual, but the change looks good to me.
> 
> Reviewed-by: Javier Martinez Canillas 

Squashed 1/2 with this and applied. Thanks!

Best regards,
Krzysztof


Re: [PATCH] staging: slicoss: fix different address space warnings

2016-11-15 Thread Greg KH
On Tue, Nov 15, 2016 at 06:19:37PM +0100, Sergio Paracuellos wrote:
> This patch fix the following sparse warnings in slicoss driver:
> warning: incorrect type in assignment (different address spaces)
> 
> Signed-off-by: Sergio Paracuellos 
> ---
>  drivers/staging/slicoss/slic.h|  7 
>  drivers/staging/slicoss/slicoss.c | 83 
> ++-
>  2 files changed, 55 insertions(+), 35 deletions(-)
> 
> diff --git a/drivers/staging/slicoss/slic.h b/drivers/staging/slicoss/slic.h
> index 420546d..f76c0cd 100644
> --- a/drivers/staging/slicoss/slic.h
> +++ b/drivers/staging/slicoss/slic.h
> @@ -540,6 +540,13 @@ static inline void slic_flush_write(struct adapter 
> *adapter)
>   ioread32(adapter->regs + SLIC_REG_HOSTID);
>  }
>  
> +#define IOMEM_GET_FIELDADDR(base, member)\
> +({   \
> + char __iomem *_base = (char __iomem *)base; \
> + _base += offsetof(typeof(*base), member);   \
> + (void __iomem *)_base;  \
> +})

Really?  Why?  Shouldn't you be marking base as __iomem all the time?
That way you don't need this mess of a cast.

thanks,

greg k-h


[PATCH V2 for-next 01/11] IB/hns: Add the interface for querying QP1

2016-11-15 Thread Salil Mehta
From: Lijun Ou 

In old code, It only added the interface for querying non-specific
QP. This patch mainly adds an interface for querying QP1.

Signed-off-by: Lijun Ou 
Reviewed-by: Wei Hu (Xavier) 
Signed-off-by: Salil Mehta  
---
Change Log

Patch V2: Addressed the comment provided by Anurup M
Link: https://patchwork.kernel.org/patch/9412855/
Patch V1: Initial Submit
---
 drivers/infiniband/hw/hns/hns_roce_hw_v1.c |   83 +++-
 drivers/infiniband/hw/hns/hns_roce_hw_v1.h |6 +-
 2 files changed, 86 insertions(+), 3 deletions(-)

diff --git a/drivers/infiniband/hw/hns/hns_roce_hw_v1.c 
b/drivers/infiniband/hw/hns/hns_roce_hw_v1.c
index 71232e5..7485514 100644
--- a/drivers/infiniband/hw/hns/hns_roce_hw_v1.c
+++ b/drivers/infiniband/hw/hns/hns_roce_hw_v1.c
@@ -2630,8 +2630,78 @@ static int hns_roce_v1_query_qpc(struct hns_roce_dev 
*hr_dev,
return ret;
 }
 
-int hns_roce_v1_query_qp(struct ib_qp *ibqp, struct ib_qp_attr *qp_attr,
-int qp_attr_mask, struct ib_qp_init_attr *qp_init_attr)
+static int hns_roce_v1_q_sqp(struct ib_qp *ibqp, struct ib_qp_attr *qp_attr,
+int qp_attr_mask,
+struct ib_qp_init_attr *qp_init_attr)
+{
+   struct hns_roce_dev *hr_dev = to_hr_dev(ibqp->device);
+   struct hns_roce_qp *hr_qp = to_hr_qp(ibqp);
+   struct hns_roce_sqp_context context;
+   u32 addr;
+
+   mutex_lock(_qp->mutex);
+
+   if (hr_qp->state == IB_QPS_RESET) {
+   qp_attr->qp_state = IB_QPS_RESET;
+   goto done;
+   }
+
+   addr = ROCEE_QP1C_CFG0_0_REG + 
+   hr_qp->port * sizeof(struct hns_roce_sqp_context);
+   context.qp1c_bytes_4 = roce_read(hr_dev, addr);
+   context.sq_rq_bt_l = roce_read(hr_dev, addr + 1);
+   context.qp1c_bytes_12 = roce_read(hr_dev, addr + 2);
+   context.qp1c_bytes_16 = roce_read(hr_dev, addr + 3);
+   context.qp1c_bytes_20 = roce_read(hr_dev, addr + 4);
+   context.cur_rq_wqe_ba_l = roce_read(hr_dev, addr + 5);
+   context.qp1c_bytes_28 = roce_read(hr_dev, addr + 6);
+   context.qp1c_bytes_32 = roce_read(hr_dev, addr + 7);
+   context.cur_sq_wqe_ba_l = roce_read(hr_dev, addr + 8);
+   context.qp1c_bytes_40 = roce_read(hr_dev, addr + 9);
+
+   hr_qp->state = roce_get_field(context.qp1c_bytes_4,
+ QP1C_BYTES_4_QP_STATE_M,
+ QP1C_BYTES_4_QP_STATE_S);
+   qp_attr->qp_state   = hr_qp->state;
+   qp_attr->path_mtu   = IB_MTU_256;
+   qp_attr->path_mig_state = IB_MIG_ARMED;
+   qp_attr->qkey   = QKEY_VAL;
+   qp_attr->rq_psn = 0;
+   qp_attr->sq_psn = 0;
+   qp_attr->dest_qp_num= 1;
+   qp_attr->qp_access_flags = 6;
+
+   qp_attr->pkey_index = roce_get_field(context.qp1c_bytes_20,
+QP1C_BYTES_20_PKEY_IDX_M,
+QP1C_BYTES_20_PKEY_IDX_S);
+   qp_attr->port_num = hr_qp->port + 1;
+   qp_attr->sq_draining = 0;
+   qp_attr->max_rd_atomic = 0;
+   qp_attr->max_dest_rd_atomic = 0;
+   qp_attr->min_rnr_timer = 0;
+   qp_attr->timeout = 0;
+   qp_attr->retry_cnt = 0;
+   qp_attr->rnr_retry = 0;
+   qp_attr->alt_timeout = 0;
+
+done:
+   qp_attr->cur_qp_state = qp_attr->qp_state;
+   qp_attr->cap.max_recv_wr = hr_qp->rq.wqe_cnt;
+   qp_attr->cap.max_recv_sge = hr_qp->rq.max_gs;
+   qp_attr->cap.max_send_wr = hr_qp->sq.wqe_cnt;
+   qp_attr->cap.max_send_sge = hr_qp->sq.max_gs;
+   qp_attr->cap.max_inline_data = 0;
+   qp_init_attr->cap = qp_attr->cap;
+   qp_init_attr->create_flags = 0;
+
+   mutex_unlock(_qp->mutex);
+
+   return 0;
+}
+
+static int hns_roce_v1_q_qp(struct ib_qp *ibqp, struct ib_qp_attr *qp_attr,
+   int qp_attr_mask,
+   struct ib_qp_init_attr *qp_init_attr)
 {
struct hns_roce_dev *hr_dev = to_hr_dev(ibqp->device);
struct hns_roce_qp *hr_qp = to_hr_qp(ibqp);
@@ -2767,6 +2837,15 @@ int hns_roce_v1_query_qp(struct ib_qp *ibqp, struct 
ib_qp_attr *qp_attr,
return ret;
 }
 
+int hns_roce_v1_query_qp(struct ib_qp *ibqp, struct ib_qp_attr *qp_attr,
+int qp_attr_mask, struct ib_qp_init_attr *qp_init_attr)
+{
+   struct hns_roce_qp *hr_qp = to_hr_qp(ibqp);
+
+   return hr_qp->doorbell_qpn <= 1 ?
+   hns_roce_v1_q_sqp(ibqp, qp_attr, qp_attr_mask, qp_init_attr) :
+   hns_roce_v1_q_qp(ibqp, qp_attr, qp_attr_mask, qp_init_attr);
+}
 static void hns_roce_v1_destroy_qp_common(struct hns_roce_dev *hr_dev,
  struct hns_roce_qp *hr_qp,
  int is_user)
diff --git 

[PATCH V2 for-next 07/11] IB/hns: Modify the macro for the timeout when cmd process

2016-11-15 Thread Salil Mehta
From: "Wei Hu (Xavier)" 

This patch modified the macro for the timeout when cmd is
processing as follows:
Before modification:
 enum {
HNS_ROCE_CMD_TIME_CLASS_A   = 1,
HNS_ROCE_CMD_TIME_CLASS_B   = 1,
HNS_ROCE_CMD_TIME_CLASS_C   = 1,
 };
After modification:
 #define HNS_ROCE_CMD_TIMEOUT_MSECS 1

Signed-off-by: Wei Hu (Xavier) 
Signed-off-by: Salil Mehta  
---
 drivers/infiniband/hw/hns/hns_roce_cmd.h   |7 +--
 drivers/infiniband/hw/hns/hns_roce_cq.c|4 ++--
 drivers/infiniband/hw/hns/hns_roce_hw_v1.c |8 
 drivers/infiniband/hw/hns/hns_roce_mr.c|4 ++--
 4 files changed, 9 insertions(+), 14 deletions(-)

diff --git a/drivers/infiniband/hw/hns/hns_roce_cmd.h 
b/drivers/infiniband/hw/hns/hns_roce_cmd.h
index e3997d3..ed14ad3 100644
--- a/drivers/infiniband/hw/hns/hns_roce_cmd.h
+++ b/drivers/infiniband/hw/hns/hns_roce_cmd.h
@@ -34,6 +34,7 @@
 #define _HNS_ROCE_CMD_H
 
 #define HNS_ROCE_MAILBOX_SIZE  4096
+#define HNS_ROCE_CMD_TIMEOUT_MSECS 1
 
 enum {
/* TPT commands */
@@ -57,12 +58,6 @@ enum {
HNS_ROCE_CMD_QUERY_QP   = 0x22,
 };
 
-enum {
-   HNS_ROCE_CMD_TIME_CLASS_A   = 1,
-   HNS_ROCE_CMD_TIME_CLASS_B   = 1,
-   HNS_ROCE_CMD_TIME_CLASS_C   = 1,
-};
-
 struct hns_roce_cmd_mailbox {
void   *buf;
dma_addr_t  dma;
diff --git a/drivers/infiniband/hw/hns/hns_roce_cq.c 
b/drivers/infiniband/hw/hns/hns_roce_cq.c
index 5dc8d92..461a273 100644
--- a/drivers/infiniband/hw/hns/hns_roce_cq.c
+++ b/drivers/infiniband/hw/hns/hns_roce_cq.c
@@ -77,7 +77,7 @@ static int hns_roce_sw2hw_cq(struct hns_roce_dev *dev,
 unsigned long cq_num)
 {
return hns_roce_cmd_mbox(dev, mailbox->dma, 0, cq_num, 0,
-   HNS_ROCE_CMD_SW2HW_CQ, HNS_ROCE_CMD_TIME_CLASS_A);
+   HNS_ROCE_CMD_SW2HW_CQ, HNS_ROCE_CMD_TIMEOUT_MSECS);
 }
 
 static int hns_roce_cq_alloc(struct hns_roce_dev *hr_dev, int nent,
@@ -176,7 +176,7 @@ static int hns_roce_hw2sw_cq(struct hns_roce_dev *dev,
 {
return hns_roce_cmd_mbox(dev, 0, mailbox ? mailbox->dma : 0, cq_num,
 mailbox ? 0 : 1, HNS_ROCE_CMD_HW2SW_CQ,
-HNS_ROCE_CMD_TIME_CLASS_A);
+HNS_ROCE_CMD_TIMEOUT_MSECS);
 }
 
 static void hns_roce_free_cq(struct hns_roce_dev *hr_dev,
diff --git a/drivers/infiniband/hw/hns/hns_roce_hw_v1.c 
b/drivers/infiniband/hw/hns/hns_roce_hw_v1.c
index b835a55..509ea75 100644
--- a/drivers/infiniband/hw/hns/hns_roce_hw_v1.c
+++ b/drivers/infiniband/hw/hns/hns_roce_hw_v1.c
@@ -1871,12 +1871,12 @@ static int hns_roce_v1_qp_modify(struct hns_roce_dev 
*hr_dev,
if (op[cur_state][new_state] == HNS_ROCE_CMD_2RST_QP)
return hns_roce_cmd_mbox(hr_dev, 0, 0, hr_qp->qpn, 2,
 HNS_ROCE_CMD_2RST_QP,
-HNS_ROCE_CMD_TIME_CLASS_A);
+HNS_ROCE_CMD_TIMEOUT_MSECS);
 
if (op[cur_state][new_state] == HNS_ROCE_CMD_2ERR_QP)
return hns_roce_cmd_mbox(hr_dev, 0, 0, hr_qp->qpn, 2,
 HNS_ROCE_CMD_2ERR_QP,
-HNS_ROCE_CMD_TIME_CLASS_A);
+HNS_ROCE_CMD_TIMEOUT_MSECS);
 
mailbox = hns_roce_alloc_cmd_mailbox(hr_dev);
if (IS_ERR(mailbox))
@@ -1886,7 +1886,7 @@ static int hns_roce_v1_qp_modify(struct hns_roce_dev 
*hr_dev,
 
ret = hns_roce_cmd_mbox(hr_dev, mailbox->dma, 0, hr_qp->qpn, 0,
op[cur_state][new_state],
-   HNS_ROCE_CMD_TIME_CLASS_C);
+   HNS_ROCE_CMD_TIMEOUT_MSECS);
 
hns_roce_free_cmd_mailbox(hr_dev, mailbox);
return ret;
@@ -2681,7 +2681,7 @@ static int hns_roce_v1_query_qpc(struct hns_roce_dev 
*hr_dev,
 
ret = hns_roce_cmd_mbox(hr_dev, 0, mailbox->dma, hr_qp->qpn, 0,
HNS_ROCE_CMD_QUERY_QP,
-   HNS_ROCE_CMD_TIME_CLASS_A);
+   HNS_ROCE_CMD_TIMEOUT_MSECS);
if (!ret)
memcpy(hr_context, mailbox->buf, sizeof(*hr_context));
else
diff --git a/drivers/infiniband/hw/hns/hns_roce_mr.c 
b/drivers/infiniband/hw/hns/hns_roce_mr.c
index d3dfb5f..2227962 100644
--- a/drivers/infiniband/hw/hns/hns_roce_mr.c
+++ b/drivers/infiniband/hw/hns/hns_roce_mr.c
@@ -53,7 +53,7 @@ static int hns_roce_sw2hw_mpt(struct hns_roce_dev *hr_dev,
 {
return hns_roce_cmd_mbox(hr_dev, mailbox->dma, 0, mpt_index, 0,
 HNS_ROCE_CMD_SW2HW_MPT,
-HNS_ROCE_CMD_TIME_CLASS_B);
+ 

RE: perf: fuzzer KASAN slab-out-of-bounds in snb_uncore_imc_event_del

2016-11-15 Thread Liang, Kan


> -Original Message-
> From: Vince Weaver [mailto:vincent.wea...@maine.edu]
> Sent: Tuesday, November 15, 2016 12:39 PM
> To: Liang, Kan 
> Cc: Peter Zijlstra ; Vince Weaver
> ; linux-kernel@vger.kernel.org; Ingo Molnar
> ; Arnaldo Carvalho de Melo ;
> da...@codemonkey.org.uk; dvyu...@google.com; Stephane Eranian
> 
> Subject: RE: perf: fuzzer KASAN slab-out-of-bounds in
> snb_uncore_imc_event_del
> 
> On Tue, 15 Nov 2016, Liang, Kan wrote:
> 
> > For client IMC, there is no generic counters.
> > Current implementation defines its own fixed free running counters.
> > event_list and n_events are unused.
> > I think we can just remove them.
> >
> > Vince, could you please try the patch as below?
> >
> 
> With this patch I have not been able to trigger the imc/uncore issue.
> 
> Or at least I used to be able to trigger it within 5 minutes, now I go longer
> (maybe 10 minutes) before hitting an unrelated issue.
> 

Thanks a lot for the test.
I will submit the patch then.

Thanks,
Kan






[PATCH V2 for-next 07/11] IB/hns: Modify the macro for the timeout when cmd process

2016-11-15 Thread Salil Mehta
From: "Wei Hu (Xavier)" 

This patch modified the macro for the timeout when cmd is
processing as follows:
Before modification:
 enum {
HNS_ROCE_CMD_TIME_CLASS_A   = 1,
HNS_ROCE_CMD_TIME_CLASS_B   = 1,
HNS_ROCE_CMD_TIME_CLASS_C   = 1,
 };
After modification:
 #define HNS_ROCE_CMD_TIMEOUT_MSECS 1

Signed-off-by: Wei Hu (Xavier) 
Signed-off-by: Salil Mehta  
---
 drivers/infiniband/hw/hns/hns_roce_cmd.h   |7 +--
 drivers/infiniband/hw/hns/hns_roce_cq.c|4 ++--
 drivers/infiniband/hw/hns/hns_roce_hw_v1.c |8 
 drivers/infiniband/hw/hns/hns_roce_mr.c|4 ++--
 4 files changed, 9 insertions(+), 14 deletions(-)

diff --git a/drivers/infiniband/hw/hns/hns_roce_cmd.h 
b/drivers/infiniband/hw/hns/hns_roce_cmd.h
index e3997d3..ed14ad3 100644
--- a/drivers/infiniband/hw/hns/hns_roce_cmd.h
+++ b/drivers/infiniband/hw/hns/hns_roce_cmd.h
@@ -34,6 +34,7 @@
 #define _HNS_ROCE_CMD_H
 
 #define HNS_ROCE_MAILBOX_SIZE  4096
+#define HNS_ROCE_CMD_TIMEOUT_MSECS 1
 
 enum {
/* TPT commands */
@@ -57,12 +58,6 @@ enum {
HNS_ROCE_CMD_QUERY_QP   = 0x22,
 };
 
-enum {
-   HNS_ROCE_CMD_TIME_CLASS_A   = 1,
-   HNS_ROCE_CMD_TIME_CLASS_B   = 1,
-   HNS_ROCE_CMD_TIME_CLASS_C   = 1,
-};
-
 struct hns_roce_cmd_mailbox {
void   *buf;
dma_addr_t  dma;
diff --git a/drivers/infiniband/hw/hns/hns_roce_cq.c 
b/drivers/infiniband/hw/hns/hns_roce_cq.c
index 5dc8d92..461a273 100644
--- a/drivers/infiniband/hw/hns/hns_roce_cq.c
+++ b/drivers/infiniband/hw/hns/hns_roce_cq.c
@@ -77,7 +77,7 @@ static int hns_roce_sw2hw_cq(struct hns_roce_dev *dev,
 unsigned long cq_num)
 {
return hns_roce_cmd_mbox(dev, mailbox->dma, 0, cq_num, 0,
-   HNS_ROCE_CMD_SW2HW_CQ, HNS_ROCE_CMD_TIME_CLASS_A);
+   HNS_ROCE_CMD_SW2HW_CQ, HNS_ROCE_CMD_TIMEOUT_MSECS);
 }
 
 static int hns_roce_cq_alloc(struct hns_roce_dev *hr_dev, int nent,
@@ -176,7 +176,7 @@ static int hns_roce_hw2sw_cq(struct hns_roce_dev *dev,
 {
return hns_roce_cmd_mbox(dev, 0, mailbox ? mailbox->dma : 0, cq_num,
 mailbox ? 0 : 1, HNS_ROCE_CMD_HW2SW_CQ,
-HNS_ROCE_CMD_TIME_CLASS_A);
+HNS_ROCE_CMD_TIMEOUT_MSECS);
 }
 
 static void hns_roce_free_cq(struct hns_roce_dev *hr_dev,
diff --git a/drivers/infiniband/hw/hns/hns_roce_hw_v1.c 
b/drivers/infiniband/hw/hns/hns_roce_hw_v1.c
index b835a55..509ea75 100644
--- a/drivers/infiniband/hw/hns/hns_roce_hw_v1.c
+++ b/drivers/infiniband/hw/hns/hns_roce_hw_v1.c
@@ -1871,12 +1871,12 @@ static int hns_roce_v1_qp_modify(struct hns_roce_dev 
*hr_dev,
if (op[cur_state][new_state] == HNS_ROCE_CMD_2RST_QP)
return hns_roce_cmd_mbox(hr_dev, 0, 0, hr_qp->qpn, 2,
 HNS_ROCE_CMD_2RST_QP,
-HNS_ROCE_CMD_TIME_CLASS_A);
+HNS_ROCE_CMD_TIMEOUT_MSECS);
 
if (op[cur_state][new_state] == HNS_ROCE_CMD_2ERR_QP)
return hns_roce_cmd_mbox(hr_dev, 0, 0, hr_qp->qpn, 2,
 HNS_ROCE_CMD_2ERR_QP,
-HNS_ROCE_CMD_TIME_CLASS_A);
+HNS_ROCE_CMD_TIMEOUT_MSECS);
 
mailbox = hns_roce_alloc_cmd_mailbox(hr_dev);
if (IS_ERR(mailbox))
@@ -1886,7 +1886,7 @@ static int hns_roce_v1_qp_modify(struct hns_roce_dev 
*hr_dev,
 
ret = hns_roce_cmd_mbox(hr_dev, mailbox->dma, 0, hr_qp->qpn, 0,
op[cur_state][new_state],
-   HNS_ROCE_CMD_TIME_CLASS_C);
+   HNS_ROCE_CMD_TIMEOUT_MSECS);
 
hns_roce_free_cmd_mailbox(hr_dev, mailbox);
return ret;
@@ -2681,7 +2681,7 @@ static int hns_roce_v1_query_qpc(struct hns_roce_dev 
*hr_dev,
 
ret = hns_roce_cmd_mbox(hr_dev, 0, mailbox->dma, hr_qp->qpn, 0,
HNS_ROCE_CMD_QUERY_QP,
-   HNS_ROCE_CMD_TIME_CLASS_A);
+   HNS_ROCE_CMD_TIMEOUT_MSECS);
if (!ret)
memcpy(hr_context, mailbox->buf, sizeof(*hr_context));
else
diff --git a/drivers/infiniband/hw/hns/hns_roce_mr.c 
b/drivers/infiniband/hw/hns/hns_roce_mr.c
index d3dfb5f..2227962 100644
--- a/drivers/infiniband/hw/hns/hns_roce_mr.c
+++ b/drivers/infiniband/hw/hns/hns_roce_mr.c
@@ -53,7 +53,7 @@ static int hns_roce_sw2hw_mpt(struct hns_roce_dev *hr_dev,
 {
return hns_roce_cmd_mbox(hr_dev, mailbox->dma, 0, mpt_index, 0,
 HNS_ROCE_CMD_SW2HW_MPT,
-HNS_ROCE_CMD_TIME_CLASS_B);
+HNS_ROCE_CMD_TIMEOUT_MSECS);
 }
 
 static int 

RE: perf: fuzzer KASAN slab-out-of-bounds in snb_uncore_imc_event_del

2016-11-15 Thread Liang, Kan


> -Original Message-
> From: Vince Weaver [mailto:vincent.wea...@maine.edu]
> Sent: Tuesday, November 15, 2016 12:39 PM
> To: Liang, Kan 
> Cc: Peter Zijlstra ; Vince Weaver
> ; linux-kernel@vger.kernel.org; Ingo Molnar
> ; Arnaldo Carvalho de Melo ;
> da...@codemonkey.org.uk; dvyu...@google.com; Stephane Eranian
> 
> Subject: RE: perf: fuzzer KASAN slab-out-of-bounds in
> snb_uncore_imc_event_del
> 
> On Tue, 15 Nov 2016, Liang, Kan wrote:
> 
> > For client IMC, there is no generic counters.
> > Current implementation defines its own fixed free running counters.
> > event_list and n_events are unused.
> > I think we can just remove them.
> >
> > Vince, could you please try the patch as below?
> >
> 
> With this patch I have not been able to trigger the imc/uncore issue.
> 
> Or at least I used to be able to trigger it within 5 minutes, now I go longer
> (maybe 10 minutes) before hitting an unrelated issue.
> 

Thanks a lot for the test.
I will submit the patch then.

Thanks,
Kan






[PATCH V2 for-next 01/11] IB/hns: Add the interface for querying QP1

2016-11-15 Thread Salil Mehta
From: Lijun Ou 

In old code, It only added the interface for querying non-specific
QP. This patch mainly adds an interface for querying QP1.

Signed-off-by: Lijun Ou 
Reviewed-by: Wei Hu (Xavier) 
Signed-off-by: Salil Mehta  
---
Change Log

Patch V2: Addressed the comment provided by Anurup M
Link: https://patchwork.kernel.org/patch/9412855/
Patch V1: Initial Submit
---
 drivers/infiniband/hw/hns/hns_roce_hw_v1.c |   83 +++-
 drivers/infiniband/hw/hns/hns_roce_hw_v1.h |6 +-
 2 files changed, 86 insertions(+), 3 deletions(-)

diff --git a/drivers/infiniband/hw/hns/hns_roce_hw_v1.c 
b/drivers/infiniband/hw/hns/hns_roce_hw_v1.c
index 71232e5..7485514 100644
--- a/drivers/infiniband/hw/hns/hns_roce_hw_v1.c
+++ b/drivers/infiniband/hw/hns/hns_roce_hw_v1.c
@@ -2630,8 +2630,78 @@ static int hns_roce_v1_query_qpc(struct hns_roce_dev 
*hr_dev,
return ret;
 }
 
-int hns_roce_v1_query_qp(struct ib_qp *ibqp, struct ib_qp_attr *qp_attr,
-int qp_attr_mask, struct ib_qp_init_attr *qp_init_attr)
+static int hns_roce_v1_q_sqp(struct ib_qp *ibqp, struct ib_qp_attr *qp_attr,
+int qp_attr_mask,
+struct ib_qp_init_attr *qp_init_attr)
+{
+   struct hns_roce_dev *hr_dev = to_hr_dev(ibqp->device);
+   struct hns_roce_qp *hr_qp = to_hr_qp(ibqp);
+   struct hns_roce_sqp_context context;
+   u32 addr;
+
+   mutex_lock(_qp->mutex);
+
+   if (hr_qp->state == IB_QPS_RESET) {
+   qp_attr->qp_state = IB_QPS_RESET;
+   goto done;
+   }
+
+   addr = ROCEE_QP1C_CFG0_0_REG + 
+   hr_qp->port * sizeof(struct hns_roce_sqp_context);
+   context.qp1c_bytes_4 = roce_read(hr_dev, addr);
+   context.sq_rq_bt_l = roce_read(hr_dev, addr + 1);
+   context.qp1c_bytes_12 = roce_read(hr_dev, addr + 2);
+   context.qp1c_bytes_16 = roce_read(hr_dev, addr + 3);
+   context.qp1c_bytes_20 = roce_read(hr_dev, addr + 4);
+   context.cur_rq_wqe_ba_l = roce_read(hr_dev, addr + 5);
+   context.qp1c_bytes_28 = roce_read(hr_dev, addr + 6);
+   context.qp1c_bytes_32 = roce_read(hr_dev, addr + 7);
+   context.cur_sq_wqe_ba_l = roce_read(hr_dev, addr + 8);
+   context.qp1c_bytes_40 = roce_read(hr_dev, addr + 9);
+
+   hr_qp->state = roce_get_field(context.qp1c_bytes_4,
+ QP1C_BYTES_4_QP_STATE_M,
+ QP1C_BYTES_4_QP_STATE_S);
+   qp_attr->qp_state   = hr_qp->state;
+   qp_attr->path_mtu   = IB_MTU_256;
+   qp_attr->path_mig_state = IB_MIG_ARMED;
+   qp_attr->qkey   = QKEY_VAL;
+   qp_attr->rq_psn = 0;
+   qp_attr->sq_psn = 0;
+   qp_attr->dest_qp_num= 1;
+   qp_attr->qp_access_flags = 6;
+
+   qp_attr->pkey_index = roce_get_field(context.qp1c_bytes_20,
+QP1C_BYTES_20_PKEY_IDX_M,
+QP1C_BYTES_20_PKEY_IDX_S);
+   qp_attr->port_num = hr_qp->port + 1;
+   qp_attr->sq_draining = 0;
+   qp_attr->max_rd_atomic = 0;
+   qp_attr->max_dest_rd_atomic = 0;
+   qp_attr->min_rnr_timer = 0;
+   qp_attr->timeout = 0;
+   qp_attr->retry_cnt = 0;
+   qp_attr->rnr_retry = 0;
+   qp_attr->alt_timeout = 0;
+
+done:
+   qp_attr->cur_qp_state = qp_attr->qp_state;
+   qp_attr->cap.max_recv_wr = hr_qp->rq.wqe_cnt;
+   qp_attr->cap.max_recv_sge = hr_qp->rq.max_gs;
+   qp_attr->cap.max_send_wr = hr_qp->sq.wqe_cnt;
+   qp_attr->cap.max_send_sge = hr_qp->sq.max_gs;
+   qp_attr->cap.max_inline_data = 0;
+   qp_init_attr->cap = qp_attr->cap;
+   qp_init_attr->create_flags = 0;
+
+   mutex_unlock(_qp->mutex);
+
+   return 0;
+}
+
+static int hns_roce_v1_q_qp(struct ib_qp *ibqp, struct ib_qp_attr *qp_attr,
+   int qp_attr_mask,
+   struct ib_qp_init_attr *qp_init_attr)
 {
struct hns_roce_dev *hr_dev = to_hr_dev(ibqp->device);
struct hns_roce_qp *hr_qp = to_hr_qp(ibqp);
@@ -2767,6 +2837,15 @@ int hns_roce_v1_query_qp(struct ib_qp *ibqp, struct 
ib_qp_attr *qp_attr,
return ret;
 }
 
+int hns_roce_v1_query_qp(struct ib_qp *ibqp, struct ib_qp_attr *qp_attr,
+int qp_attr_mask, struct ib_qp_init_attr *qp_init_attr)
+{
+   struct hns_roce_qp *hr_qp = to_hr_qp(ibqp);
+
+   return hr_qp->doorbell_qpn <= 1 ?
+   hns_roce_v1_q_sqp(ibqp, qp_attr, qp_attr_mask, qp_init_attr) :
+   hns_roce_v1_q_qp(ibqp, qp_attr, qp_attr_mask, qp_init_attr);
+}
 static void hns_roce_v1_destroy_qp_common(struct hns_roce_dev *hr_dev,
  struct hns_roce_qp *hr_qp,
  int is_user)
diff --git a/drivers/infiniband/hw/hns/hns_roce_hw_v1.h 
b/drivers/infiniband/hw/hns/hns_roce_hw_v1.h
index 

[PATCH V2 for-next 11/11] IB/hns: Fix for Checkpatch.pl comment style errors

2016-11-15 Thread Salil Mehta
This patch correct the comment style errors caught by
checkpatch.pl script

Signed-off-by: Salil Mehta  
---
 drivers/infiniband/hw/hns/hns_roce_cmd.c|8 ++--
 drivers/infiniband/hw/hns/hns_roce_device.h |   28 ++---
 drivers/infiniband/hw/hns/hns_roce_eq.c |6 +--
 drivers/infiniband/hw/hns/hns_roce_hem.c|6 +--
 drivers/infiniband/hw/hns/hns_roce_hw_v1.c  |   58 +--
 drivers/infiniband/hw/hns/hns_roce_main.c   |   28 ++---
 6 files changed, 67 insertions(+), 67 deletions(-)

diff --git a/drivers/infiniband/hw/hns/hns_roce_cmd.c 
b/drivers/infiniband/hw/hns/hns_roce_cmd.c
index 2a0b6c0..8c1f7a6 100644
--- a/drivers/infiniband/hw/hns/hns_roce_cmd.c
+++ b/drivers/infiniband/hw/hns/hns_roce_cmd.c
@@ -216,10 +216,10 @@ static int __hns_roce_cmd_mbox_wait(struct hns_roce_dev 
*hr_dev, u64 in_param,
goto out;
 
/*
-   * It is timeout when wait_for_completion_timeout return 0
-   * The return value is the time limit set in advance
-   * how many seconds showing
-   */
+* It is timeout when wait_for_completion_timeout return 0
+* The return value is the time limit set in advance
+* how many seconds showing
+*/
if (!wait_for_completion_timeout(>done,
 msecs_to_jiffies(timeout))) {
dev_err(dev, "[cmd]wait_for_completion_timeout timeout\n");
diff --git a/drivers/infiniband/hw/hns/hns_roce_device.h 
b/drivers/infiniband/hw/hns/hns_roce_device.h
index 9ef1cc3..e48464d 100644
--- a/drivers/infiniband/hw/hns/hns_roce_device.h
+++ b/drivers/infiniband/hw/hns/hns_roce_device.h
@@ -201,9 +201,9 @@ struct hns_roce_bitmap {
 /* Order = 0: bitmap is biggest, order = max bitmap is least (only a bit) */
 /* Every bit repesent to a partner free/used status in bitmap */
 /*
-* Initial, bits of other bitmap are all 0 except that a bit of max_order is 1
-* Bit = 1 represent to idle and available; bit = 0: not available
-*/
+ * Initial, bits of other bitmap are all 0 except that a bit of max_order is 1
+ * Bit = 1 represent to idle and available; bit = 0: not available
+ */
 struct hns_roce_buddy {
/* Members point to every order level bitmap */
unsigned long **bits;
@@ -365,25 +365,25 @@ struct hns_roce_cmdq {
struct mutexhcr_mutex;
struct semaphorepoll_sem;
/*
-   * Event mode: cmd register mutex protection,
-   * ensure to not exceed max_cmds and user use limit region
-   */
+* Event mode: cmd register mutex protection,
+* ensure to not exceed max_cmds and user use limit region
+*/
struct semaphoreevent_sem;
int max_cmds;
spinlock_t  context_lock;
int free_head;
struct hns_roce_cmd_context *context;
/*
-   * Result of get integer part
-   * which max_comds compute according a power of 2
-   */
+* Result of get integer part
+* which max_comds compute according a power of 2
+*/
u16 token_mask;
/*
-   * Process whether use event mode, init default non-zero
-   * After the event queue of cmd event ready,
-   * can switch into event mode
-   * close device, switch into poll mode(non event mode)
-   */
+* Process whether use event mode, init default non-zero
+* After the event queue of cmd event ready,
+* can switch into event mode
+* close device, switch into poll mode(non event mode)
+*/
u8  use_events;
u8  toggle;
 };
diff --git a/drivers/infiniband/hw/hns/hns_roce_eq.c 
b/drivers/infiniband/hw/hns/hns_roce_eq.c
index 21e21b0..50f8649 100644
--- a/drivers/infiniband/hw/hns/hns_roce_eq.c
+++ b/drivers/infiniband/hw/hns/hns_roce_eq.c
@@ -371,9 +371,9 @@ static int hns_roce_aeq_ovf_int(struct hns_roce_dev *hr_dev,
int i = 0;
 
/**
-   * AEQ overflow ECC mult bit err CEQ overflow alarm
-   * must clear interrupt, mask irq, clear irq, cancel mask operation
-   */
+* AEQ overflow ECC mult bit err CEQ overflow alarm
+* must clear interrupt, mask irq, clear irq, cancel mask operation
+*/
aeshift_val = roce_read(hr_dev, ROCEE_CAEP_AEQC_AEQE_SHIFT_REG);
 
if (roce_get_bit(aeshift_val,
diff --git a/drivers/infiniband/hw/hns/hns_roce_hem.c 
b/drivers/infiniband/hw/hns/hns_roce_hem.c
index 250d8f2..c5104e0 100644
--- a/drivers/infiniband/hw/hns/hns_roce_hem.c
+++ b/drivers/infiniband/hw/hns/hns_roce_hem.c
@@ -80,9 +80,9 @@ struct hns_roce_hem *hns_roce_alloc_hem(struct hns_roce_dev 
*hr_dev, int npages,
--order;
 
/*
-   * Alloc memory one time. If failed, don't alloc small block
-   * memory, directly return 

RE: [PATCH 15/15] net: usb: lan78xx: Utilize phy_ethtool_nway_reset

2016-11-15 Thread Woojung.Huh
> Signed-off-by: Florian Fainelli 
> ---
>  drivers/net/usb/lan78xx.c | 7 +--
>  1 file changed, 1 insertion(+), 6 deletions(-)
> 
> diff --git a/drivers/net/usb/lan78xx.c b/drivers/net/usb/lan78xx.c
> index bcd9010c1f27..cf2857fa938f 100644
> --- a/drivers/net/usb/lan78xx.c
> +++ b/drivers/net/usb/lan78xx.c
> @@ -1447,11 +1447,6 @@ static u32 lan78xx_get_link(struct net_device *net)
>   return net->phydev->link;
>  }
> 
> -static int lan78xx_nway_reset(struct net_device *net)
> -{
> - return phy_start_aneg(net->phydev);
> -}
> -
>  static void lan78xx_get_drvinfo(struct net_device *net,
>   struct ethtool_drvinfo *info)
>  {
> @@ -1655,7 +1650,7 @@ static int lan78xx_set_pause(struct net_device
> *net,
> 
>  static const struct ethtool_ops lan78xx_ethtool_ops = {
>   .get_link   = lan78xx_get_link,
> - .nway_reset = lan78xx_nway_reset,
> + .nway_reset = phy_ethtool_nway_reset,
>   .get_drvinfo= lan78xx_get_drvinfo,
>   .get_msglevel   = lan78xx_get_msglevel,
>   .set_msglevel   = lan78xx_set_msglevel,
> --
> 2.9.3
Acked-by: Woojung Huh 


Re: [PATCH 01/29] fscrypt: Add in-place encryption mode

2016-11-15 Thread Eric Biggers
Hi,

On Sun, Nov 13, 2016 at 10:20:44PM +0100, Richard Weinberger wrote:
> From: David Gstir 
> 
> ext4 and f2fs require a bounce page when encrypting pages. However, not
> all filesystems will need that (eg. UBIFS). This is handled via a
> flag on fscrypt_operations where a fs implementation can select in-place
> encryption over using a bounce page (which is the default).
> 
> Signed-off-by: David Gstir 
> Signed-off-by: Richard Weinberger 

The comment for fscrypt_encrypt_page() still says the following:

 * Called on the page write path.  The caller must call
 * fscrypt_restore_control_page() on the returned ciphertext page to
 * release the bounce buffer and the encryption context.

It seems this isn't correct anymore.  It also looks like the fscrypt_context
never gets released in the case where the page is encrypted in-place.

Additionally, after this change the name of the flag FS_WRITE_PATH_FL is
misleading, since it now really indicates the presence of a bounce buffer rather
than the "write path".

Eric


Re: [PATCH 01/29] fscrypt: Add in-place encryption mode

2016-11-15 Thread Eric Biggers
Hi,

On Sun, Nov 13, 2016 at 10:20:44PM +0100, Richard Weinberger wrote:
> From: David Gstir 
> 
> ext4 and f2fs require a bounce page when encrypting pages. However, not
> all filesystems will need that (eg. UBIFS). This is handled via a
> flag on fscrypt_operations where a fs implementation can select in-place
> encryption over using a bounce page (which is the default).
> 
> Signed-off-by: David Gstir 
> Signed-off-by: Richard Weinberger 

The comment for fscrypt_encrypt_page() still says the following:

 * Called on the page write path.  The caller must call
 * fscrypt_restore_control_page() on the returned ciphertext page to
 * release the bounce buffer and the encryption context.

It seems this isn't correct anymore.  It also looks like the fscrypt_context
never gets released in the case where the page is encrypted in-place.

Additionally, after this change the name of the flag FS_WRITE_PATH_FL is
misleading, since it now really indicates the presence of a bounce buffer rather
than the "write path".

Eric


RE: [PATCH 15/15] net: usb: lan78xx: Utilize phy_ethtool_nway_reset

2016-11-15 Thread Woojung.Huh
> Signed-off-by: Florian Fainelli 
> ---
>  drivers/net/usb/lan78xx.c | 7 +--
>  1 file changed, 1 insertion(+), 6 deletions(-)
> 
> diff --git a/drivers/net/usb/lan78xx.c b/drivers/net/usb/lan78xx.c
> index bcd9010c1f27..cf2857fa938f 100644
> --- a/drivers/net/usb/lan78xx.c
> +++ b/drivers/net/usb/lan78xx.c
> @@ -1447,11 +1447,6 @@ static u32 lan78xx_get_link(struct net_device *net)
>   return net->phydev->link;
>  }
> 
> -static int lan78xx_nway_reset(struct net_device *net)
> -{
> - return phy_start_aneg(net->phydev);
> -}
> -
>  static void lan78xx_get_drvinfo(struct net_device *net,
>   struct ethtool_drvinfo *info)
>  {
> @@ -1655,7 +1650,7 @@ static int lan78xx_set_pause(struct net_device
> *net,
> 
>  static const struct ethtool_ops lan78xx_ethtool_ops = {
>   .get_link   = lan78xx_get_link,
> - .nway_reset = lan78xx_nway_reset,
> + .nway_reset = phy_ethtool_nway_reset,
>   .get_drvinfo= lan78xx_get_drvinfo,
>   .get_msglevel   = lan78xx_get_msglevel,
>   .set_msglevel   = lan78xx_set_msglevel,
> --
> 2.9.3
Acked-by: Woojung Huh 


[PATCH V2 for-next 11/11] IB/hns: Fix for Checkpatch.pl comment style errors

2016-11-15 Thread Salil Mehta
This patch correct the comment style errors caught by
checkpatch.pl script

Signed-off-by: Salil Mehta  
---
 drivers/infiniband/hw/hns/hns_roce_cmd.c|8 ++--
 drivers/infiniband/hw/hns/hns_roce_device.h |   28 ++---
 drivers/infiniband/hw/hns/hns_roce_eq.c |6 +--
 drivers/infiniband/hw/hns/hns_roce_hem.c|6 +--
 drivers/infiniband/hw/hns/hns_roce_hw_v1.c  |   58 +--
 drivers/infiniband/hw/hns/hns_roce_main.c   |   28 ++---
 6 files changed, 67 insertions(+), 67 deletions(-)

diff --git a/drivers/infiniband/hw/hns/hns_roce_cmd.c 
b/drivers/infiniband/hw/hns/hns_roce_cmd.c
index 2a0b6c0..8c1f7a6 100644
--- a/drivers/infiniband/hw/hns/hns_roce_cmd.c
+++ b/drivers/infiniband/hw/hns/hns_roce_cmd.c
@@ -216,10 +216,10 @@ static int __hns_roce_cmd_mbox_wait(struct hns_roce_dev 
*hr_dev, u64 in_param,
goto out;
 
/*
-   * It is timeout when wait_for_completion_timeout return 0
-   * The return value is the time limit set in advance
-   * how many seconds showing
-   */
+* It is timeout when wait_for_completion_timeout return 0
+* The return value is the time limit set in advance
+* how many seconds showing
+*/
if (!wait_for_completion_timeout(>done,
 msecs_to_jiffies(timeout))) {
dev_err(dev, "[cmd]wait_for_completion_timeout timeout\n");
diff --git a/drivers/infiniband/hw/hns/hns_roce_device.h 
b/drivers/infiniband/hw/hns/hns_roce_device.h
index 9ef1cc3..e48464d 100644
--- a/drivers/infiniband/hw/hns/hns_roce_device.h
+++ b/drivers/infiniband/hw/hns/hns_roce_device.h
@@ -201,9 +201,9 @@ struct hns_roce_bitmap {
 /* Order = 0: bitmap is biggest, order = max bitmap is least (only a bit) */
 /* Every bit repesent to a partner free/used status in bitmap */
 /*
-* Initial, bits of other bitmap are all 0 except that a bit of max_order is 1
-* Bit = 1 represent to idle and available; bit = 0: not available
-*/
+ * Initial, bits of other bitmap are all 0 except that a bit of max_order is 1
+ * Bit = 1 represent to idle and available; bit = 0: not available
+ */
 struct hns_roce_buddy {
/* Members point to every order level bitmap */
unsigned long **bits;
@@ -365,25 +365,25 @@ struct hns_roce_cmdq {
struct mutexhcr_mutex;
struct semaphorepoll_sem;
/*
-   * Event mode: cmd register mutex protection,
-   * ensure to not exceed max_cmds and user use limit region
-   */
+* Event mode: cmd register mutex protection,
+* ensure to not exceed max_cmds and user use limit region
+*/
struct semaphoreevent_sem;
int max_cmds;
spinlock_t  context_lock;
int free_head;
struct hns_roce_cmd_context *context;
/*
-   * Result of get integer part
-   * which max_comds compute according a power of 2
-   */
+* Result of get integer part
+* which max_comds compute according a power of 2
+*/
u16 token_mask;
/*
-   * Process whether use event mode, init default non-zero
-   * After the event queue of cmd event ready,
-   * can switch into event mode
-   * close device, switch into poll mode(non event mode)
-   */
+* Process whether use event mode, init default non-zero
+* After the event queue of cmd event ready,
+* can switch into event mode
+* close device, switch into poll mode(non event mode)
+*/
u8  use_events;
u8  toggle;
 };
diff --git a/drivers/infiniband/hw/hns/hns_roce_eq.c 
b/drivers/infiniband/hw/hns/hns_roce_eq.c
index 21e21b0..50f8649 100644
--- a/drivers/infiniband/hw/hns/hns_roce_eq.c
+++ b/drivers/infiniband/hw/hns/hns_roce_eq.c
@@ -371,9 +371,9 @@ static int hns_roce_aeq_ovf_int(struct hns_roce_dev *hr_dev,
int i = 0;
 
/**
-   * AEQ overflow ECC mult bit err CEQ overflow alarm
-   * must clear interrupt, mask irq, clear irq, cancel mask operation
-   */
+* AEQ overflow ECC mult bit err CEQ overflow alarm
+* must clear interrupt, mask irq, clear irq, cancel mask operation
+*/
aeshift_val = roce_read(hr_dev, ROCEE_CAEP_AEQC_AEQE_SHIFT_REG);
 
if (roce_get_bit(aeshift_val,
diff --git a/drivers/infiniband/hw/hns/hns_roce_hem.c 
b/drivers/infiniband/hw/hns/hns_roce_hem.c
index 250d8f2..c5104e0 100644
--- a/drivers/infiniband/hw/hns/hns_roce_hem.c
+++ b/drivers/infiniband/hw/hns/hns_roce_hem.c
@@ -80,9 +80,9 @@ struct hns_roce_hem *hns_roce_alloc_hem(struct hns_roce_dev 
*hr_dev, int npages,
--order;
 
/*
-   * Alloc memory one time. If failed, don't alloc small block
-   * memory, directly return fail.
-   */
+   

Re: [RFC PATCH v3 13/20] x86: DMA support for memory encryption

2016-11-15 Thread Radim Krčmář
2016-11-15 11:02-0600, Tom Lendacky:
> On 11/15/2016 8:39 AM, Radim Krčmář wrote:
>> 2016-11-09 18:37-0600, Tom Lendacky:
>>> Since DMA addresses will effectively look like 48-bit addresses when the
>>> memory encryption mask is set, SWIOTLB is needed if the DMA mask of the
>>> device performing the DMA does not support 48-bits. SWIOTLB will be
>>> initialized to create un-encrypted bounce buffers for use by these devices.
>>>
>>> Signed-off-by: Tom Lendacky 
>>> ---
>>> diff --git a/arch/x86/kernel/pci-swiotlb.c b/arch/x86/kernel/pci-swiotlb.c
>>> @@ -64,13 +66,15 @@ static struct dma_map_ops swiotlb_dma_ops = {
>>>   * pci_swiotlb_detect_override - set swiotlb to 1 if necessary
>>>   *
>>>   * This returns non-zero if we are forced to use swiotlb (by the boot
>>> - * option).
>>> + * option). If memory encryption is enabled then swiotlb will be set
>>> + * to 1 so that bounce buffers are allocated and used for devices that
>>> + * do not support the addressing range required for the encryption mask.
>>>   */
>>>  int __init pci_swiotlb_detect_override(void)
>>>  {
>>> int use_swiotlb = swiotlb | swiotlb_force;
>>>  
>>> -   if (swiotlb_force)
>>> +   if (swiotlb_force || sme_me_mask)
>>> swiotlb = 1;
>>>  
>>> return use_swiotlb;
>> 
>> We want to return 1 even if only sme_me_mask is 1, because the return
>> value is used for detection.  The following would be less obscure, IMO:
>> 
>>  if (swiotlb_force || sme_me_mask)
>>  swiotlb = 1;
>> 
>>  return swiotlb;
> 
> If we do that then all DMA would go through the swiotlb bounce buffers.

No, that is decided for example in swiotlb_map_page() and we need to
call pci_swiotlb_init() to register that function.

> By setting swiotlb to 1 we indicate that the bounce buffers will be
> needed for those devices that can't support the addressing range when
> the encryption bit is set (48 bit DMA). But if the device can support
> the addressing range we won't use the bounce buffers.

If we return 0 here, then pci_swiotlb_init() will not be called =>
dma_ops won't be set to swiotlb_dma_ops => we won't use bounce buffers.

>> We setup encrypted swiotlb and then decrypt it, but sometimes set it up
>> decrypted (late_alloc) ... why isn't the swiotlb set up decrypted
>> directly?
> 
> When swiotlb is allocated in swiotlb_init(), it is too early to make
> use of the api to the change the page attributes. Because of this,
> the callback to make those changes is needed.

Thanks. (I don't know page table setup enough to see a lesser evil. :])

>>> @@ -541,7 +583,7 @@ static phys_addr_t
>>>  map_single(struct device *hwdev, phys_addr_t phys, size_t size,
>>>enum dma_data_direction dir)
>>>  {
>>> -   dma_addr_t start_dma_addr = phys_to_dma(hwdev, io_tlb_start);
>>> +   dma_addr_t start_dma_addr = swiotlb_phys_to_dma(hwdev, io_tlb_start);
>> 
>> We have decrypted io_tlb_start before, so shouldn't its physical address
>> be saved without the sme bit?  (Which changes a lot ...)
> 
> I'm not sure what you mean here, can you elaborate a bit more?

The C-bit (sme bit) is a part of the physical address.
If we know that a certain physical page should be accessed as
unencrypted (the bounce buffer) then the C-bit is 0.
I'm wondering why we save the physical address with the C-bit set when
we know that it can't be accessed that way (because we remove it every
time).

The naming is a bit confusing, because physical addresses are actually
virtualized by SME -- maybe we should be calling them SME addresses?


[PATCH V2 for-next 04/11] IB/hns: add self loopback for CM

2016-11-15 Thread Salil Mehta
From: Lijun Ou 

This patch mainly adds self loopback support for CM.

Signed-off-by: Lijun Ou 
Signed-off-by: Peter Chen 
Reviewed-by: Wei Hu (Xavier) 
Signed-off-by: Salil Mehta  
---
 drivers/infiniband/hw/hns/hns_roce_hw_v1.c |   11 +++
 drivers/infiniband/hw/hns/hns_roce_hw_v1.h |2 ++
 2 files changed, 13 insertions(+)

diff --git a/drivers/infiniband/hw/hns/hns_roce_hw_v1.c 
b/drivers/infiniband/hw/hns/hns_roce_hw_v1.c
index 959d5ca..e080dd6 100644
--- a/drivers/infiniband/hw/hns/hns_roce_hw_v1.c
+++ b/drivers/infiniband/hw/hns/hns_roce_hw_v1.c
@@ -32,6 +32,7 @@
 
 #include 
 #include 
+#include 
 #include 
 #include "hns_roce_common.h"
 #include "hns_roce_device.h"
@@ -72,6 +73,8 @@ int hns_roce_v1_post_send(struct ib_qp *ibqp, struct 
ib_send_wr *wr,
int nreq = 0;
u32 ind = 0;
int ret = 0;
+   u8 *smac;
+   int loopback;
 
if (unlikely(ibqp->qp_type != IB_QPT_GSI &&
ibqp->qp_type != IB_QPT_RC)) {
@@ -129,6 +132,14 @@ int hns_roce_v1_post_send(struct ib_qp *ibqp, struct 
ib_send_wr *wr,
   UD_SEND_WQE_U32_8_DMAC_5_M,
   UD_SEND_WQE_U32_8_DMAC_5_S,
   ah->av.mac[5]);
+
+   smac = (u8 *)hr_dev->dev_addr[qp->port];
+   loopback = ether_addr_equal_unaligned(ah->av.mac,
+ smac) ? 1 : 0;
+   roce_set_bit(ud_sq_wqe->u32_8,
+UD_SEND_WQE_U32_8_LOOPBACK_INDICATOR_S,
+loopback);
+
roce_set_field(ud_sq_wqe->u32_8,
   UD_SEND_WQE_U32_8_OPERATION_TYPE_M,
   UD_SEND_WQE_U32_8_OPERATION_TYPE_S,
diff --git a/drivers/infiniband/hw/hns/hns_roce_hw_v1.h 
b/drivers/infiniband/hw/hns/hns_roce_hw_v1.h
index 6004c7f..cf28f1b 100644
--- a/drivers/infiniband/hw/hns/hns_roce_hw_v1.h
+++ b/drivers/infiniband/hw/hns/hns_roce_hw_v1.h
@@ -440,6 +440,8 @@ struct hns_roce_ud_send_wqe {
 #define UD_SEND_WQE_U32_8_DMAC_5_M   \
(((1UL << 8) - 1) << UD_SEND_WQE_U32_8_DMAC_5_S)
 
+#define UD_SEND_WQE_U32_8_LOOPBACK_INDICATOR_S 22
+
 #define UD_SEND_WQE_U32_8_OPERATION_TYPE_S 16
 #define UD_SEND_WQE_U32_8_OPERATION_TYPE_M   \
(((1UL << 4) - 1) << UD_SEND_WQE_U32_8_OPERATION_TYPE_S)
-- 
1.7.9.5




Re: [RFC PATCH v3 13/20] x86: DMA support for memory encryption

2016-11-15 Thread Radim Krčmář
2016-11-15 11:02-0600, Tom Lendacky:
> On 11/15/2016 8:39 AM, Radim Krčmář wrote:
>> 2016-11-09 18:37-0600, Tom Lendacky:
>>> Since DMA addresses will effectively look like 48-bit addresses when the
>>> memory encryption mask is set, SWIOTLB is needed if the DMA mask of the
>>> device performing the DMA does not support 48-bits. SWIOTLB will be
>>> initialized to create un-encrypted bounce buffers for use by these devices.
>>>
>>> Signed-off-by: Tom Lendacky 
>>> ---
>>> diff --git a/arch/x86/kernel/pci-swiotlb.c b/arch/x86/kernel/pci-swiotlb.c
>>> @@ -64,13 +66,15 @@ static struct dma_map_ops swiotlb_dma_ops = {
>>>   * pci_swiotlb_detect_override - set swiotlb to 1 if necessary
>>>   *
>>>   * This returns non-zero if we are forced to use swiotlb (by the boot
>>> - * option).
>>> + * option). If memory encryption is enabled then swiotlb will be set
>>> + * to 1 so that bounce buffers are allocated and used for devices that
>>> + * do not support the addressing range required for the encryption mask.
>>>   */
>>>  int __init pci_swiotlb_detect_override(void)
>>>  {
>>> int use_swiotlb = swiotlb | swiotlb_force;
>>>  
>>> -   if (swiotlb_force)
>>> +   if (swiotlb_force || sme_me_mask)
>>> swiotlb = 1;
>>>  
>>> return use_swiotlb;
>> 
>> We want to return 1 even if only sme_me_mask is 1, because the return
>> value is used for detection.  The following would be less obscure, IMO:
>> 
>>  if (swiotlb_force || sme_me_mask)
>>  swiotlb = 1;
>> 
>>  return swiotlb;
> 
> If we do that then all DMA would go through the swiotlb bounce buffers.

No, that is decided for example in swiotlb_map_page() and we need to
call pci_swiotlb_init() to register that function.

> By setting swiotlb to 1 we indicate that the bounce buffers will be
> needed for those devices that can't support the addressing range when
> the encryption bit is set (48 bit DMA). But if the device can support
> the addressing range we won't use the bounce buffers.

If we return 0 here, then pci_swiotlb_init() will not be called =>
dma_ops won't be set to swiotlb_dma_ops => we won't use bounce buffers.

>> We setup encrypted swiotlb and then decrypt it, but sometimes set it up
>> decrypted (late_alloc) ... why isn't the swiotlb set up decrypted
>> directly?
> 
> When swiotlb is allocated in swiotlb_init(), it is too early to make
> use of the api to the change the page attributes. Because of this,
> the callback to make those changes is needed.

Thanks. (I don't know page table setup enough to see a lesser evil. :])

>>> @@ -541,7 +583,7 @@ static phys_addr_t
>>>  map_single(struct device *hwdev, phys_addr_t phys, size_t size,
>>>enum dma_data_direction dir)
>>>  {
>>> -   dma_addr_t start_dma_addr = phys_to_dma(hwdev, io_tlb_start);
>>> +   dma_addr_t start_dma_addr = swiotlb_phys_to_dma(hwdev, io_tlb_start);
>> 
>> We have decrypted io_tlb_start before, so shouldn't its physical address
>> be saved without the sme bit?  (Which changes a lot ...)
> 
> I'm not sure what you mean here, can you elaborate a bit more?

The C-bit (sme bit) is a part of the physical address.
If we know that a certain physical page should be accessed as
unencrypted (the bounce buffer) then the C-bit is 0.
I'm wondering why we save the physical address with the C-bit set when
we know that it can't be accessed that way (because we remove it every
time).

The naming is a bit confusing, because physical addresses are actually
virtualized by SME -- maybe we should be calling them SME addresses?


[PATCH V2 for-next 04/11] IB/hns: add self loopback for CM

2016-11-15 Thread Salil Mehta
From: Lijun Ou 

This patch mainly adds self loopback support for CM.

Signed-off-by: Lijun Ou 
Signed-off-by: Peter Chen 
Reviewed-by: Wei Hu (Xavier) 
Signed-off-by: Salil Mehta  
---
 drivers/infiniband/hw/hns/hns_roce_hw_v1.c |   11 +++
 drivers/infiniband/hw/hns/hns_roce_hw_v1.h |2 ++
 2 files changed, 13 insertions(+)

diff --git a/drivers/infiniband/hw/hns/hns_roce_hw_v1.c 
b/drivers/infiniband/hw/hns/hns_roce_hw_v1.c
index 959d5ca..e080dd6 100644
--- a/drivers/infiniband/hw/hns/hns_roce_hw_v1.c
+++ b/drivers/infiniband/hw/hns/hns_roce_hw_v1.c
@@ -32,6 +32,7 @@
 
 #include 
 #include 
+#include 
 #include 
 #include "hns_roce_common.h"
 #include "hns_roce_device.h"
@@ -72,6 +73,8 @@ int hns_roce_v1_post_send(struct ib_qp *ibqp, struct 
ib_send_wr *wr,
int nreq = 0;
u32 ind = 0;
int ret = 0;
+   u8 *smac;
+   int loopback;
 
if (unlikely(ibqp->qp_type != IB_QPT_GSI &&
ibqp->qp_type != IB_QPT_RC)) {
@@ -129,6 +132,14 @@ int hns_roce_v1_post_send(struct ib_qp *ibqp, struct 
ib_send_wr *wr,
   UD_SEND_WQE_U32_8_DMAC_5_M,
   UD_SEND_WQE_U32_8_DMAC_5_S,
   ah->av.mac[5]);
+
+   smac = (u8 *)hr_dev->dev_addr[qp->port];
+   loopback = ether_addr_equal_unaligned(ah->av.mac,
+ smac) ? 1 : 0;
+   roce_set_bit(ud_sq_wqe->u32_8,
+UD_SEND_WQE_U32_8_LOOPBACK_INDICATOR_S,
+loopback);
+
roce_set_field(ud_sq_wqe->u32_8,
   UD_SEND_WQE_U32_8_OPERATION_TYPE_M,
   UD_SEND_WQE_U32_8_OPERATION_TYPE_S,
diff --git a/drivers/infiniband/hw/hns/hns_roce_hw_v1.h 
b/drivers/infiniband/hw/hns/hns_roce_hw_v1.h
index 6004c7f..cf28f1b 100644
--- a/drivers/infiniband/hw/hns/hns_roce_hw_v1.h
+++ b/drivers/infiniband/hw/hns/hns_roce_hw_v1.h
@@ -440,6 +440,8 @@ struct hns_roce_ud_send_wqe {
 #define UD_SEND_WQE_U32_8_DMAC_5_M   \
(((1UL << 8) - 1) << UD_SEND_WQE_U32_8_DMAC_5_S)
 
+#define UD_SEND_WQE_U32_8_LOOPBACK_INDICATOR_S 22
+
 #define UD_SEND_WQE_U32_8_OPERATION_TYPE_S 16
 #define UD_SEND_WQE_U32_8_OPERATION_TYPE_M   \
(((1UL << 4) - 1) << UD_SEND_WQE_U32_8_OPERATION_TYPE_S)
-- 
1.7.9.5




[PATCH V2 for-next 10/11] IB/hns: Implement the add_gid/del_gid and optimize the GIDs management

2016-11-15 Thread Salil Mehta
From: Shaobo Xu 

IB core has implemented the calculation of GIDs and the management
of GID tables, and it is now responsible to supply query function
for GIDs. So the calculation of GIDs and the management of GID
tables in the RoCE driver is redundant.

The patch is to implement the add_gid/del_gid to set the GIDs in
the RoCE driver, remove the redundant calculation and management of
GIDs in the notifier call of the net device and the inet, and
update the query_gid.

Signed-off-by: Shaobo Xu 
Reviewed-by: Wei Hu (Xavier) 
Signed-off-by: Salil Mehta  
---
 drivers/infiniband/hw/hns/hns_roce_device.h |2 -
 drivers/infiniband/hw/hns/hns_roce_main.c   |  270 +--
 2 files changed, 48 insertions(+), 224 deletions(-)

diff --git a/drivers/infiniband/hw/hns/hns_roce_device.h 
b/drivers/infiniband/hw/hns/hns_roce_device.h
index 593a42a..9ef1cc3 100644
--- a/drivers/infiniband/hw/hns/hns_roce_device.h
+++ b/drivers/infiniband/hw/hns/hns_roce_device.h
@@ -429,8 +429,6 @@ struct hns_roce_ib_iboe {
struct net_device  *netdevs[HNS_ROCE_MAX_PORTS];
struct notifier_block   nb;
struct notifier_block   nb_inet;
-   /* 16 GID is shared by 6 port in v1 engine. */
-   union ib_gidgid_table[HNS_ROCE_MAX_GID_NUM];
u8  phy_port[HNS_ROCE_MAX_PORTS];
 };
 
diff --git a/drivers/infiniband/hw/hns/hns_roce_main.c 
b/drivers/infiniband/hw/hns/hns_roce_main.c
index 6770171..795ef97 100644
--- a/drivers/infiniband/hw/hns/hns_roce_main.c
+++ b/drivers/infiniband/hw/hns/hns_roce_main.c
@@ -35,52 +35,13 @@
 #include 
 #include 
 #include 
+#include 
 #include "hns_roce_common.h"
 #include "hns_roce_device.h"
 #include "hns_roce_user.h"
 #include "hns_roce_hem.h"
 
 /**
- * hns_roce_addrconf_ifid_eui48 - Get default gid.
- * @eui: eui.
- * @vlan_id:  gid
- * @dev:  net device
- * Description:
- *MAC convert to GID
- *gid[0..7] = fe80   
- *gid[8] = mac[0] ^ 2
- *gid[9] = mac[1]
- *gid[10] = mac[2]
- *gid[11] = ff(VLAN ID high byte (4 MS bits))
- *gid[12] = fe(VLAN ID low byte)
- *gid[13] = mac[3]
- *gid[14] = mac[4]
- *gid[15] = mac[5]
- */
-static void hns_roce_addrconf_ifid_eui48(u8 *eui, u16 vlan_id,
-struct net_device *dev)
-{
-   memcpy(eui, dev->dev_addr, 3);
-   memcpy(eui + 5, dev->dev_addr + 3, 3);
-   if (vlan_id < 0x1000) {
-   eui[3] = vlan_id >> 8;
-   eui[4] = vlan_id & 0xff;
-   } else {
-   eui[3] = 0xff;
-   eui[4] = 0xfe;
-   }
-   eui[0] ^= 2;
-}
-
-static void hns_roce_make_default_gid(struct net_device *dev, union ib_gid 
*gid)
-{
-   memset(gid, 0, sizeof(*gid));
-   gid->raw[0] = 0xFE;
-   gid->raw[1] = 0x80;
-   hns_roce_addrconf_ifid_eui48(>raw[8], 0x, dev);
-}
-
-/**
  * hns_get_gid_index - Get gid index.
  * @hr_dev: pointer to structure hns_roce_dev.
  * @port:  port, value range: 0 ~ MAX
@@ -96,30 +57,6 @@ int hns_get_gid_index(struct hns_roce_dev *hr_dev, u8 port, 
int gid_index)
return gid_index * hr_dev->caps.num_ports + port;
 }
 
-static int hns_roce_set_gid(struct hns_roce_dev *hr_dev, u8 port, int 
gid_index,
-union ib_gid *gid)
-{
-   struct device *dev = _dev->pdev->dev;
-   u8 gid_idx = 0;
-
-   if (gid_index >= hr_dev->caps.gid_table_len[port]) {
-   dev_err(dev, "gid_index %d illegal, port %d gid range: 0~%d\n",
-   gid_index, port, hr_dev->caps.gid_table_len[port] - 1);
-   return -EINVAL;
-   }
-
-   gid_idx = hns_get_gid_index(hr_dev, port, gid_index);
-
-   if (!memcmp(gid, _dev->iboe.gid_table[gid_idx], sizeof(*gid)))
-   return -EINVAL;
-
-   memcpy(_dev->iboe.gid_table[gid_idx], gid, sizeof(*gid));
-
-   hr_dev->hw->set_gid(hr_dev, port, gid_index, gid);
-
-   return 0;
-}
-
 static void hns_roce_set_mac(struct hns_roce_dev *hr_dev, u8 port, u8 *addr)
 {
u8 phy_port;
@@ -147,15 +84,44 @@ static void hns_roce_set_mtu(struct hns_roce_dev *hr_dev, 
u8 port, int mtu)
hr_dev->hw->set_mtu(hr_dev, phy_port, tmp);
 }
 
-static void hns_roce_update_gids(struct hns_roce_dev *hr_dev, int port)
+static int hns_roce_add_gid(struct ib_device *device, u8 port_num,
+   unsigned int index, const union ib_gid *gid,
+   const struct ib_gid_attr *attr, void **context)
+{
+   struct hns_roce_dev *hr_dev = to_hr_dev(device);
+   u8 port = port_num - 1;
+   unsigned long flags;
+
+   if (port >= hr_dev->caps.num_ports)
+   return -EINVAL;
+
+   spin_lock_irqsave(_dev->iboe.lock, flags);
+
+   hr_dev->hw->set_gid(hr_dev, port, index, (union ib_gid *)gid);
+
+   

[PATCH V2 for-next 10/11] IB/hns: Implement the add_gid/del_gid and optimize the GIDs management

2016-11-15 Thread Salil Mehta
From: Shaobo Xu 

IB core has implemented the calculation of GIDs and the management
of GID tables, and it is now responsible to supply query function
for GIDs. So the calculation of GIDs and the management of GID
tables in the RoCE driver is redundant.

The patch is to implement the add_gid/del_gid to set the GIDs in
the RoCE driver, remove the redundant calculation and management of
GIDs in the notifier call of the net device and the inet, and
update the query_gid.

Signed-off-by: Shaobo Xu 
Reviewed-by: Wei Hu (Xavier) 
Signed-off-by: Salil Mehta  
---
 drivers/infiniband/hw/hns/hns_roce_device.h |2 -
 drivers/infiniband/hw/hns/hns_roce_main.c   |  270 +--
 2 files changed, 48 insertions(+), 224 deletions(-)

diff --git a/drivers/infiniband/hw/hns/hns_roce_device.h 
b/drivers/infiniband/hw/hns/hns_roce_device.h
index 593a42a..9ef1cc3 100644
--- a/drivers/infiniband/hw/hns/hns_roce_device.h
+++ b/drivers/infiniband/hw/hns/hns_roce_device.h
@@ -429,8 +429,6 @@ struct hns_roce_ib_iboe {
struct net_device  *netdevs[HNS_ROCE_MAX_PORTS];
struct notifier_block   nb;
struct notifier_block   nb_inet;
-   /* 16 GID is shared by 6 port in v1 engine. */
-   union ib_gidgid_table[HNS_ROCE_MAX_GID_NUM];
u8  phy_port[HNS_ROCE_MAX_PORTS];
 };
 
diff --git a/drivers/infiniband/hw/hns/hns_roce_main.c 
b/drivers/infiniband/hw/hns/hns_roce_main.c
index 6770171..795ef97 100644
--- a/drivers/infiniband/hw/hns/hns_roce_main.c
+++ b/drivers/infiniband/hw/hns/hns_roce_main.c
@@ -35,52 +35,13 @@
 #include 
 #include 
 #include 
+#include 
 #include "hns_roce_common.h"
 #include "hns_roce_device.h"
 #include "hns_roce_user.h"
 #include "hns_roce_hem.h"
 
 /**
- * hns_roce_addrconf_ifid_eui48 - Get default gid.
- * @eui: eui.
- * @vlan_id:  gid
- * @dev:  net device
- * Description:
- *MAC convert to GID
- *gid[0..7] = fe80   
- *gid[8] = mac[0] ^ 2
- *gid[9] = mac[1]
- *gid[10] = mac[2]
- *gid[11] = ff(VLAN ID high byte (4 MS bits))
- *gid[12] = fe(VLAN ID low byte)
- *gid[13] = mac[3]
- *gid[14] = mac[4]
- *gid[15] = mac[5]
- */
-static void hns_roce_addrconf_ifid_eui48(u8 *eui, u16 vlan_id,
-struct net_device *dev)
-{
-   memcpy(eui, dev->dev_addr, 3);
-   memcpy(eui + 5, dev->dev_addr + 3, 3);
-   if (vlan_id < 0x1000) {
-   eui[3] = vlan_id >> 8;
-   eui[4] = vlan_id & 0xff;
-   } else {
-   eui[3] = 0xff;
-   eui[4] = 0xfe;
-   }
-   eui[0] ^= 2;
-}
-
-static void hns_roce_make_default_gid(struct net_device *dev, union ib_gid 
*gid)
-{
-   memset(gid, 0, sizeof(*gid));
-   gid->raw[0] = 0xFE;
-   gid->raw[1] = 0x80;
-   hns_roce_addrconf_ifid_eui48(>raw[8], 0x, dev);
-}
-
-/**
  * hns_get_gid_index - Get gid index.
  * @hr_dev: pointer to structure hns_roce_dev.
  * @port:  port, value range: 0 ~ MAX
@@ -96,30 +57,6 @@ int hns_get_gid_index(struct hns_roce_dev *hr_dev, u8 port, 
int gid_index)
return gid_index * hr_dev->caps.num_ports + port;
 }
 
-static int hns_roce_set_gid(struct hns_roce_dev *hr_dev, u8 port, int 
gid_index,
-union ib_gid *gid)
-{
-   struct device *dev = _dev->pdev->dev;
-   u8 gid_idx = 0;
-
-   if (gid_index >= hr_dev->caps.gid_table_len[port]) {
-   dev_err(dev, "gid_index %d illegal, port %d gid range: 0~%d\n",
-   gid_index, port, hr_dev->caps.gid_table_len[port] - 1);
-   return -EINVAL;
-   }
-
-   gid_idx = hns_get_gid_index(hr_dev, port, gid_index);
-
-   if (!memcmp(gid, _dev->iboe.gid_table[gid_idx], sizeof(*gid)))
-   return -EINVAL;
-
-   memcpy(_dev->iboe.gid_table[gid_idx], gid, sizeof(*gid));
-
-   hr_dev->hw->set_gid(hr_dev, port, gid_index, gid);
-
-   return 0;
-}
-
 static void hns_roce_set_mac(struct hns_roce_dev *hr_dev, u8 port, u8 *addr)
 {
u8 phy_port;
@@ -147,15 +84,44 @@ static void hns_roce_set_mtu(struct hns_roce_dev *hr_dev, 
u8 port, int mtu)
hr_dev->hw->set_mtu(hr_dev, phy_port, tmp);
 }
 
-static void hns_roce_update_gids(struct hns_roce_dev *hr_dev, int port)
+static int hns_roce_add_gid(struct ib_device *device, u8 port_num,
+   unsigned int index, const union ib_gid *gid,
+   const struct ib_gid_attr *attr, void **context)
+{
+   struct hns_roce_dev *hr_dev = to_hr_dev(device);
+   u8 port = port_num - 1;
+   unsigned long flags;
+
+   if (port >= hr_dev->caps.num_ports)
+   return -EINVAL;
+
+   spin_lock_irqsave(_dev->iboe.lock, flags);
+
+   hr_dev->hw->set_gid(hr_dev, port, index, (union ib_gid *)gid);
+
+   spin_unlock_irqrestore(_dev->iboe.lock, flags);
+
+   return 0;
+}
+
+static int 

[PATCH 02/15] net: nb8800: Utilize phy_ethtool_nway_reset

2016-11-15 Thread Florian Fainelli
Signed-off-by: Florian Fainelli 
---
 drivers/net/ethernet/aurora/nb8800.c | 12 +---
 1 file changed, 1 insertion(+), 11 deletions(-)

diff --git a/drivers/net/ethernet/aurora/nb8800.c 
b/drivers/net/ethernet/aurora/nb8800.c
index b59aa3541270..07ff6492402a 100644
--- a/drivers/net/ethernet/aurora/nb8800.c
+++ b/drivers/net/ethernet/aurora/nb8800.c
@@ -1037,16 +1037,6 @@ static const struct net_device_ops nb8800_netdev_ops = {
.ndo_validate_addr  = eth_validate_addr,
 };
 
-static int nb8800_nway_reset(struct net_device *dev)
-{
-   struct phy_device *phydev = dev->phydev;
-
-   if (!phydev)
-   return -ENODEV;
-
-   return genphy_restart_aneg(phydev);
-}
-
 static void nb8800_get_pauseparam(struct net_device *dev,
  struct ethtool_pauseparam *pp)
 {
@@ -1165,7 +1155,7 @@ static void nb8800_get_ethtool_stats(struct net_device 
*dev,
 }
 
 static const struct ethtool_ops nb8800_ethtool_ops = {
-   .nway_reset = nb8800_nway_reset,
+   .nway_reset = phy_ethtool_nway_reset,
.get_link   = ethtool_op_get_link,
.get_pauseparam = nb8800_get_pauseparam,
.set_pauseparam = nb8800_set_pauseparam,
-- 
2.9.3



[PATCH 02/15] net: nb8800: Utilize phy_ethtool_nway_reset

2016-11-15 Thread Florian Fainelli
Signed-off-by: Florian Fainelli 
---
 drivers/net/ethernet/aurora/nb8800.c | 12 +---
 1 file changed, 1 insertion(+), 11 deletions(-)

diff --git a/drivers/net/ethernet/aurora/nb8800.c 
b/drivers/net/ethernet/aurora/nb8800.c
index b59aa3541270..07ff6492402a 100644
--- a/drivers/net/ethernet/aurora/nb8800.c
+++ b/drivers/net/ethernet/aurora/nb8800.c
@@ -1037,16 +1037,6 @@ static const struct net_device_ops nb8800_netdev_ops = {
.ndo_validate_addr  = eth_validate_addr,
 };
 
-static int nb8800_nway_reset(struct net_device *dev)
-{
-   struct phy_device *phydev = dev->phydev;
-
-   if (!phydev)
-   return -ENODEV;
-
-   return genphy_restart_aneg(phydev);
-}
-
 static void nb8800_get_pauseparam(struct net_device *dev,
  struct ethtool_pauseparam *pp)
 {
@@ -1165,7 +1155,7 @@ static void nb8800_get_ethtool_stats(struct net_device 
*dev,
 }
 
 static const struct ethtool_ops nb8800_ethtool_ops = {
-   .nway_reset = nb8800_nway_reset,
+   .nway_reset = phy_ethtool_nway_reset,
.get_link   = ethtool_op_get_link,
.get_pauseparam = nb8800_get_pauseparam,
.set_pauseparam = nb8800_set_pauseparam,
-- 
2.9.3



[PATCH 04/15] net: mv643xx_eth: Utilize phy_ethtool_nway_reset

2016-11-15 Thread Florian Fainelli
Signed-off-by: Florian Fainelli 
---
 drivers/net/ethernet/marvell/mv643xx_eth.c | 10 +-
 1 file changed, 1 insertion(+), 9 deletions(-)

diff --git a/drivers/net/ethernet/marvell/mv643xx_eth.c 
b/drivers/net/ethernet/marvell/mv643xx_eth.c
index 81b08d71c0f8..5f62c3d70df9 100644
--- a/drivers/net/ethernet/marvell/mv643xx_eth.c
+++ b/drivers/net/ethernet/marvell/mv643xx_eth.c
@@ -1639,14 +1639,6 @@ static void mv643xx_eth_get_drvinfo(struct net_device 
*dev,
strlcpy(drvinfo->bus_info, "platform", sizeof(drvinfo->bus_info));
 }
 
-static int mv643xx_eth_nway_reset(struct net_device *dev)
-{
-   if (!dev->phydev)
-   return -EINVAL;
-
-   return genphy_restart_aneg(dev->phydev);
-}
-
 static int
 mv643xx_eth_get_coalesce(struct net_device *dev, struct ethtool_coalesce *ec)
 {
@@ -1770,7 +1762,7 @@ static int mv643xx_eth_get_sset_count(struct net_device 
*dev, int sset)
 
 static const struct ethtool_ops mv643xx_eth_ethtool_ops = {
.get_drvinfo= mv643xx_eth_get_drvinfo,
-   .nway_reset = mv643xx_eth_nway_reset,
+   .nway_reset = phy_ethtool_nway_reset,
.get_link   = ethtool_op_get_link,
.get_coalesce   = mv643xx_eth_get_coalesce,
.set_coalesce   = mv643xx_eth_set_coalesce,
-- 
2.9.3



[PATCH 03/15] net: bcm63xx_enet: Utilize phy_ethtool_nway_reset

2016-11-15 Thread Florian Fainelli
Signed-off-by: Florian Fainelli 
---
 drivers/net/ethernet/broadcom/bcm63xx_enet.c | 7 ++-
 1 file changed, 2 insertions(+), 5 deletions(-)

diff --git a/drivers/net/ethernet/broadcom/bcm63xx_enet.c 
b/drivers/net/ethernet/broadcom/bcm63xx_enet.c
index 5c7acef1de2e..a43ab90c051e 100644
--- a/drivers/net/ethernet/broadcom/bcm63xx_enet.c
+++ b/drivers/net/ethernet/broadcom/bcm63xx_enet.c
@@ -1434,11 +1434,8 @@ static int bcm_enet_nway_reset(struct net_device *dev)
struct bcm_enet_priv *priv;
 
priv = netdev_priv(dev);
-   if (priv->has_phy) {
-   if (!dev->phydev)
-   return -ENODEV;
-   return genphy_restart_aneg(dev->phydev);
-   }
+   if (priv->has_phy)
+   return phy_ethtool_nway_reset(dev),
 
return -EOPNOTSUPP;
 }
-- 
2.9.3



[PATCH V2 for-next 05/11] IB/hns: Modify the condition of notifying hardware loopback

2016-11-15 Thread Salil Mehta
From: Lijun Ou 

This patch modified the condition of notifying hardware loopback.

In hip06, RoCE Engine has several ports, one QP is related
to one port. hardware only support loopback in the same port,
not in the different ports.

So, If QP related to port N, the dmac in the QP context equals
the smac of the local port N or the loop_idc is 1, we should
set loopback bit in QP context to notify hardware.

Signed-off-by: Wei Hu (Xavier) 
Signed-off-by: Lijun Ou 
Signed-off-by: Salil Mehta  
---
 drivers/infiniband/hw/hns/hns_roce_hw_v1.c |   24 +++-
 1 file changed, 7 insertions(+), 17 deletions(-)

diff --git a/drivers/infiniband/hw/hns/hns_roce_hw_v1.c 
b/drivers/infiniband/hw/hns/hns_roce_hw_v1.c
index e080dd6..643a2ff 100644
--- a/drivers/infiniband/hw/hns/hns_roce_hw_v1.c
+++ b/drivers/infiniband/hw/hns/hns_roce_hw_v1.c
@@ -2244,24 +2244,14 @@ static int hns_roce_v1_m_qp(struct ib_qp *ibqp, const 
struct ib_qp_attr *attr,
 QP_CONTEXT_QPC_BYTE_32_SIGNALING_TYPE_S,
 hr_qp->sq_signal_bits);
 
-   for (port = 0; port < hr_dev->caps.num_ports; port++) {
-   smac = (u8 *)hr_dev->dev_addr[port];
-   dev_dbg(dev, "smac: %2x: %2x: %2x: %2x: %2x: %2x\n",
-   smac[0], smac[1], smac[2], smac[3], smac[4],
-   smac[5]);
-   if ((dmac[0] == smac[0]) && (dmac[1] == smac[1]) &&
-   (dmac[2] == smac[2]) && (dmac[3] == smac[3]) &&
-   (dmac[4] == smac[4]) && (dmac[5] == smac[5])) {
-   roce_set_bit(context->qpc_bytes_32,
-   QP_CONTEXT_QPC_BYTE_32_LOOPBACK_INDICATOR_S,
-   1);
-   break;
-   }
-   }
-
-   if (hr_dev->loop_idc == 0x1)
+   port = (attr_mask & IB_QP_PORT) ? (attr->port_num - 1) :
+   hr_qp->port;
+   smac = (u8 *)hr_dev->dev_addr[port];
+   /* when dmac equals smac or loop_idc is 1, it should loopback */
+   if (ether_addr_equal_unaligned(dmac, smac) ||
+   hr_dev->loop_idc == 0x1)
roce_set_bit(context->qpc_bytes_32,
-   QP_CONTEXT_QPC_BYTE_32_LOOPBACK_INDICATOR_S, 1);
+ QP_CONTEXT_QPC_BYTE_32_LOOPBACK_INDICATOR_S, 1);
 
roce_set_bit(context->qpc_bytes_32,
 QP_CONTEXT_QPC_BYTE_32_GLOBAL_HEADER_S,
-- 
1.7.9.5




[PATCH 04/15] net: mv643xx_eth: Utilize phy_ethtool_nway_reset

2016-11-15 Thread Florian Fainelli
Signed-off-by: Florian Fainelli 
---
 drivers/net/ethernet/marvell/mv643xx_eth.c | 10 +-
 1 file changed, 1 insertion(+), 9 deletions(-)

diff --git a/drivers/net/ethernet/marvell/mv643xx_eth.c 
b/drivers/net/ethernet/marvell/mv643xx_eth.c
index 81b08d71c0f8..5f62c3d70df9 100644
--- a/drivers/net/ethernet/marvell/mv643xx_eth.c
+++ b/drivers/net/ethernet/marvell/mv643xx_eth.c
@@ -1639,14 +1639,6 @@ static void mv643xx_eth_get_drvinfo(struct net_device 
*dev,
strlcpy(drvinfo->bus_info, "platform", sizeof(drvinfo->bus_info));
 }
 
-static int mv643xx_eth_nway_reset(struct net_device *dev)
-{
-   if (!dev->phydev)
-   return -EINVAL;
-
-   return genphy_restart_aneg(dev->phydev);
-}
-
 static int
 mv643xx_eth_get_coalesce(struct net_device *dev, struct ethtool_coalesce *ec)
 {
@@ -1770,7 +1762,7 @@ static int mv643xx_eth_get_sset_count(struct net_device 
*dev, int sset)
 
 static const struct ethtool_ops mv643xx_eth_ethtool_ops = {
.get_drvinfo= mv643xx_eth_get_drvinfo,
-   .nway_reset = mv643xx_eth_nway_reset,
+   .nway_reset = phy_ethtool_nway_reset,
.get_link   = ethtool_op_get_link,
.get_coalesce   = mv643xx_eth_get_coalesce,
.set_coalesce   = mv643xx_eth_set_coalesce,
-- 
2.9.3



[PATCH 03/15] net: bcm63xx_enet: Utilize phy_ethtool_nway_reset

2016-11-15 Thread Florian Fainelli
Signed-off-by: Florian Fainelli 
---
 drivers/net/ethernet/broadcom/bcm63xx_enet.c | 7 ++-
 1 file changed, 2 insertions(+), 5 deletions(-)

diff --git a/drivers/net/ethernet/broadcom/bcm63xx_enet.c 
b/drivers/net/ethernet/broadcom/bcm63xx_enet.c
index 5c7acef1de2e..a43ab90c051e 100644
--- a/drivers/net/ethernet/broadcom/bcm63xx_enet.c
+++ b/drivers/net/ethernet/broadcom/bcm63xx_enet.c
@@ -1434,11 +1434,8 @@ static int bcm_enet_nway_reset(struct net_device *dev)
struct bcm_enet_priv *priv;
 
priv = netdev_priv(dev);
-   if (priv->has_phy) {
-   if (!dev->phydev)
-   return -ENODEV;
-   return genphy_restart_aneg(dev->phydev);
-   }
+   if (priv->has_phy)
+   return phy_ethtool_nway_reset(dev),
 
return -EOPNOTSUPP;
 }
-- 
2.9.3



[PATCH V2 for-next 05/11] IB/hns: Modify the condition of notifying hardware loopback

2016-11-15 Thread Salil Mehta
From: Lijun Ou 

This patch modified the condition of notifying hardware loopback.

In hip06, RoCE Engine has several ports, one QP is related
to one port. hardware only support loopback in the same port,
not in the different ports.

So, If QP related to port N, the dmac in the QP context equals
the smac of the local port N or the loop_idc is 1, we should
set loopback bit in QP context to notify hardware.

Signed-off-by: Wei Hu (Xavier) 
Signed-off-by: Lijun Ou 
Signed-off-by: Salil Mehta  
---
 drivers/infiniband/hw/hns/hns_roce_hw_v1.c |   24 +++-
 1 file changed, 7 insertions(+), 17 deletions(-)

diff --git a/drivers/infiniband/hw/hns/hns_roce_hw_v1.c 
b/drivers/infiniband/hw/hns/hns_roce_hw_v1.c
index e080dd6..643a2ff 100644
--- a/drivers/infiniband/hw/hns/hns_roce_hw_v1.c
+++ b/drivers/infiniband/hw/hns/hns_roce_hw_v1.c
@@ -2244,24 +2244,14 @@ static int hns_roce_v1_m_qp(struct ib_qp *ibqp, const 
struct ib_qp_attr *attr,
 QP_CONTEXT_QPC_BYTE_32_SIGNALING_TYPE_S,
 hr_qp->sq_signal_bits);
 
-   for (port = 0; port < hr_dev->caps.num_ports; port++) {
-   smac = (u8 *)hr_dev->dev_addr[port];
-   dev_dbg(dev, "smac: %2x: %2x: %2x: %2x: %2x: %2x\n",
-   smac[0], smac[1], smac[2], smac[3], smac[4],
-   smac[5]);
-   if ((dmac[0] == smac[0]) && (dmac[1] == smac[1]) &&
-   (dmac[2] == smac[2]) && (dmac[3] == smac[3]) &&
-   (dmac[4] == smac[4]) && (dmac[5] == smac[5])) {
-   roce_set_bit(context->qpc_bytes_32,
-   QP_CONTEXT_QPC_BYTE_32_LOOPBACK_INDICATOR_S,
-   1);
-   break;
-   }
-   }
-
-   if (hr_dev->loop_idc == 0x1)
+   port = (attr_mask & IB_QP_PORT) ? (attr->port_num - 1) :
+   hr_qp->port;
+   smac = (u8 *)hr_dev->dev_addr[port];
+   /* when dmac equals smac or loop_idc is 1, it should loopback */
+   if (ether_addr_equal_unaligned(dmac, smac) ||
+   hr_dev->loop_idc == 0x1)
roce_set_bit(context->qpc_bytes_32,
-   QP_CONTEXT_QPC_BYTE_32_LOOPBACK_INDICATOR_S, 1);
+ QP_CONTEXT_QPC_BYTE_32_LOOPBACK_INDICATOR_S, 1);
 
roce_set_bit(context->qpc_bytes_32,
 QP_CONTEXT_QPC_BYTE_32_GLOBAL_HEADER_S,
-- 
1.7.9.5




[PATCH V2 for-next 00/11] Code improvements & fixes for HNS RoCE driver

2016-11-15 Thread Salil Mehta
This patchset introduces some code improvements and fixes
for the identified problems in the HNS RoCE driver.

Lijun Ou (4):
  IB/hns: Add the interface for querying QP1
  IB/hns: add self loopback for CM
  IB/hns: Modify the condition of notifying hardware loopback
  IB/hns: Fix the bug for qp state in hns_roce_v1_m_qp()

Salil Mehta (1):
  IB/hns: Fix for Checkpatch.pl comment style errors

Shaobo Xu (1):
  IB/hns: Implement the add_gid/del_gid and optimize the GIDs
management

Wei Hu (Xavier) (5):
  IB/hns: Add code for refreshing CQ CI using TPTR
  IB/hns: Optimize the logic of allocating memory using APIs
  IB/hns: Modify the macro for the timeout when cmd process
  IB/hns: Modify query info named port_num when querying RC QP
  IB/hns: Change qpn allocation to round-robin mode.

 drivers/infiniband/hw/hns/hns_roce_alloc.c  |   11 +-
 drivers/infiniband/hw/hns/hns_roce_cmd.c|8 +-
 drivers/infiniband/hw/hns/hns_roce_cmd.h|7 +-
 drivers/infiniband/hw/hns/hns_roce_common.h |2 -
 drivers/infiniband/hw/hns/hns_roce_cq.c |   17 +-
 drivers/infiniband/hw/hns/hns_roce_device.h |   45 ++--
 drivers/infiniband/hw/hns/hns_roce_eq.c |6 +-
 drivers/infiniband/hw/hns/hns_roce_hem.c|6 +-
 drivers/infiniband/hw/hns/hns_roce_hw_v1.c  |  267 +--
 drivers/infiniband/hw/hns/hns_roce_hw_v1.h  |   17 +-
 drivers/infiniband/hw/hns/hns_roce_main.c   |  311 +++
 drivers/infiniband/hw/hns/hns_roce_mr.c |   21 +-
 drivers/infiniband/hw/hns/hns_roce_pd.c |5 +-
 drivers/infiniband/hw/hns/hns_roce_qp.c |2 +-
 14 files changed, 363 insertions(+), 362 deletions(-)

-- 
1.7.9.5




[PATCH V2 for-next 02/11] IB/hns: Add code for refreshing CQ CI using TPTR

2016-11-15 Thread Salil Mehta
From: "Wei Hu (Xavier)" 

This patch added the code for refreshing CQ CI using TPTR in hip06
SoC.

We will send a doorbell to hardware for refreshing CQ CI when user
succeed to poll a cqe. But it will be failed if the doorbell has
been blocked. So hardware will read a special buffer called TPTR
to get the lastest CI value when the cq is almost full.

This patch support the special CI buffer as follows:
a) Alloc the memory for TPTR in the hns_roce_tptr_init function and
   free it in hns_roce_tptr_free function, these two functions will
   be called in probe function and in the remove function.
b) Add the code for computing offset(every cq need 2 bytes) and
   write the dma addr to every cq context to notice hardware in the
   function named hns_roce_v1_write_cqc.
c) Add code for mapping TPTR buffer to user space in function named
   hns_roce_mmap. The mapping distinguish TPTR and UAR of user mode
   by vm_pgoff(0: UAR, 1: TPTR, others:invaild) in hip06.
d) Alloc the code for refreshing CQ CI using TPTR in the function
   named hns_roce_v1_poll_cq.
e) Add some variable definitions to the related structure.

Signed-off-by: Wei Hu (Xavier) 
Signed-off-by: Dongdong Huang(Donald) 
Signed-off-by: Lijun Ou 
Signed-off-by: Salil Mehta  
---
 drivers/infiniband/hw/hns/hns_roce_common.h |2 -
 drivers/infiniband/hw/hns/hns_roce_cq.c |9 +++
 drivers/infiniband/hw/hns/hns_roce_device.h |6 +-
 drivers/infiniband/hw/hns/hns_roce_hw_v1.c  |   79 ---
 drivers/infiniband/hw/hns/hns_roce_hw_v1.h  |9 +++
 drivers/infiniband/hw/hns/hns_roce_main.c   |   13 -
 6 files changed, 103 insertions(+), 15 deletions(-)

diff --git a/drivers/infiniband/hw/hns/hns_roce_common.h 
b/drivers/infiniband/hw/hns/hns_roce_common.h
index 2970161..0dcb620 100644
--- a/drivers/infiniband/hw/hns/hns_roce_common.h
+++ b/drivers/infiniband/hw/hns/hns_roce_common.h
@@ -253,8 +253,6 @@
 #define ROCEE_VENDOR_ID_REG0x0
 #define ROCEE_VENDOR_PART_ID_REG   0x4
 
-#define ROCEE_HW_VERSION_REG   0x8
-
 #define ROCEE_SYS_IMAGE_GUID_L_REG 0xC
 #define ROCEE_SYS_IMAGE_GUID_H_REG 0x10
 
diff --git a/drivers/infiniband/hw/hns/hns_roce_cq.c 
b/drivers/infiniband/hw/hns/hns_roce_cq.c
index 0973659..5dc8d92 100644
--- a/drivers/infiniband/hw/hns/hns_roce_cq.c
+++ b/drivers/infiniband/hw/hns/hns_roce_cq.c
@@ -349,6 +349,15 @@ struct ib_cq *hns_roce_ib_create_cq(struct ib_device 
*ib_dev,
goto err_mtt;
}
 
+   /*
+* For the QP created by kernel space, tptr value should be initialized
+* to zero; For the QP created by user space, it will cause synchronous
+* problems if tptr is set to zero here, so we initialze it in user
+* space.
+*/
+   if (!context)
+   *hr_cq->tptr_addr = 0;
+
/* Get created cq handler and carry out event */
hr_cq->comp = hns_roce_ib_cq_comp;
hr_cq->event = hns_roce_ib_cq_event;
diff --git a/drivers/infiniband/hw/hns/hns_roce_device.h 
b/drivers/infiniband/hw/hns/hns_roce_device.h
index 3417315..7242b14 100644
--- a/drivers/infiniband/hw/hns/hns_roce_device.h
+++ b/drivers/infiniband/hw/hns/hns_roce_device.h
@@ -37,6 +37,8 @@
 
 #define DRV_NAME "hns_roce"
 
+#define HNS_ROCE_HW_VER1   ('h' << 24 | 'i' << 16 | '0' << 8 | '6')
+
 #define MAC_ADDR_OCTET_NUM 6
 #define HNS_ROCE_MAX_MSG_LEN   0x8000
 
@@ -296,7 +298,7 @@ struct hns_roce_cq {
u32 cq_depth;
u32 cons_index;
void __iomem*cq_db_l;
-   void __iomem*tptr_addr;
+   u16 *tptr_addr;
unsigned long   cqn;
u32 vector;
atomic_trefcount;
@@ -553,6 +555,8 @@ struct hns_roce_dev {
 
int cmd_mod;
int loop_idc;
+   dma_addr_t  tptr_dma_addr; /*only for hw v1*/
+   u32 tptr_size; /*only for hw v1*/
struct hns_roce_hw  *hw;
 };
 
diff --git a/drivers/infiniband/hw/hns/hns_roce_hw_v1.c 
b/drivers/infiniband/hw/hns/hns_roce_hw_v1.c
index 7485514..959d5ca 100644
--- a/drivers/infiniband/hw/hns/hns_roce_hw_v1.c
+++ b/drivers/infiniband/hw/hns/hns_roce_hw_v1.c
@@ -849,6 +849,45 @@ static void hns_roce_bt_free(struct hns_roce_dev *hr_dev)
priv->bt_table.qpc_buf.buf, priv->bt_table.qpc_buf.map);
 }
 
+static int hns_roce_tptr_init(struct hns_roce_dev *hr_dev)
+{
+   struct device *dev = _dev->pdev->dev;
+   struct hns_roce_buf_list *tptr_buf;
+   struct hns_roce_v1_priv *priv;
+
+   priv = (struct hns_roce_v1_priv *)hr_dev->hw->priv;
+   tptr_buf 

<    2   3   4   5   6   7   8   9   10   11   >