[PATCH 5.8 048/124] drm/amd/pm: Removed fixed clock in auto mode DPM

2020-10-12 Thread Greg Kroah-Hartman
From: Sudheesh Mavila 

[ Upstream commit 97cf32996c46d9935cc133d910a75fb687dd6144 ]

SMU10_UMD_PSTATE_PEAK_FCLK value should not be used to set the DPM.

Suggested-by: Evan Quan 
Reviewed-by: Evan Quan 
Signed-off-by: Sudheesh Mavila 
Signed-off-by: Alex Deucher 
Signed-off-by: Sasha Levin 
---
 drivers/gpu/drm/amd/powerplay/hwmgr/smu10_hwmgr.c | 10 ++
 1 file changed, 6 insertions(+), 4 deletions(-)

diff --git a/drivers/gpu/drm/amd/powerplay/hwmgr/smu10_hwmgr.c 
b/drivers/gpu/drm/amd/powerplay/hwmgr/smu10_hwmgr.c
index 9ee8cf8267c88..43f7adff6cb74 100644
--- a/drivers/gpu/drm/amd/powerplay/hwmgr/smu10_hwmgr.c
+++ b/drivers/gpu/drm/amd/powerplay/hwmgr/smu10_hwmgr.c
@@ -563,6 +563,8 @@ static int smu10_dpm_force_dpm_level(struct pp_hwmgr *hwmgr,
struct smu10_hwmgr *data = hwmgr->backend;
uint32_t min_sclk = hwmgr->display_config->min_core_set_clock;
uint32_t min_mclk = hwmgr->display_config->min_mem_set_clock/100;
+   uint32_t index_fclk = data->clock_vol_info.vdd_dep_on_fclk->count - 1;
+   uint32_t index_socclk = data->clock_vol_info.vdd_dep_on_socclk->count - 
1;
 
if (hwmgr->smu_version < 0x1E3700) {
pr_info("smu firmware version too old, can not set dpm 
level\n");
@@ -676,13 +678,13 @@ static int smu10_dpm_force_dpm_level(struct pp_hwmgr 
*hwmgr,
smum_send_msg_to_smc_with_parameter(hwmgr,
PPSMC_MSG_SetHardMinFclkByFreq,

hwmgr->display_config->num_display > 3 ?
-   SMU10_UMD_PSTATE_PEAK_FCLK :
+   
data->clock_vol_info.vdd_dep_on_fclk->entries[0].clk :
min_mclk,
NULL);
 
smum_send_msg_to_smc_with_parameter(hwmgr,

PPSMC_MSG_SetHardMinSocclkByFreq,
-   SMU10_UMD_PSTATE_MIN_SOCCLK,
+   
data->clock_vol_info.vdd_dep_on_socclk->entries[0].clk,
NULL);
smum_send_msg_to_smc_with_parameter(hwmgr,
PPSMC_MSG_SetHardMinVcn,
@@ -695,11 +697,11 @@ static int smu10_dpm_force_dpm_level(struct pp_hwmgr 
*hwmgr,
NULL);
smum_send_msg_to_smc_with_parameter(hwmgr,
PPSMC_MSG_SetSoftMaxFclkByFreq,
-   SMU10_UMD_PSTATE_PEAK_FCLK,
+   
data->clock_vol_info.vdd_dep_on_fclk->entries[index_fclk].clk,
NULL);
smum_send_msg_to_smc_with_parameter(hwmgr,

PPSMC_MSG_SetSoftMaxSocclkByFreq,
-   SMU10_UMD_PSTATE_PEAK_SOCCLK,
+   
data->clock_vol_info.vdd_dep_on_socclk->entries[index_socclk].clk,
NULL);
smum_send_msg_to_smc_with_parameter(hwmgr,
PPSMC_MSG_SetSoftMaxVcn,
-- 
2.25.1





[PATCH 5.8 047/124] io_uring: fix potential ABBA deadlock in ->show_fdinfo()

2020-10-12 Thread Greg Kroah-Hartman
From: Jens Axboe 

[ Upstream commit fad8e0de4426a776c9bcb060555e7c09e2d08db6 ]

syzbot reports a potential lock deadlock between the normal IO path and
->show_fdinfo():

==
WARNING: possible circular locking dependency detected
5.9.0-rc6-syzkaller #0 Not tainted
--
syz-executor.2/19710 is trying to acquire lock:
888098ddc450 (sb_writers#4){.+.+}-{0:0}, at: io_write+0x6b5/0xb30 
fs/io_uring.c:3296

but task is already holding lock:
8880a11b8428 (>uring_lock){+.+.}-{3:3}, at: 
__do_sys_io_uring_enter+0xe9a/0x1bd0 fs/io_uring.c:8348

which lock already depends on the new lock.

the existing dependency chain (in reverse order) is:

-> #2 (>uring_lock){+.+.}-{3:3}:
   __mutex_lock_common kernel/locking/mutex.c:956 [inline]
   __mutex_lock+0x134/0x10e0 kernel/locking/mutex.c:1103
   __io_uring_show_fdinfo fs/io_uring.c:8417 [inline]
   io_uring_show_fdinfo+0x194/0xc70 fs/io_uring.c:8460
   seq_show+0x4a8/0x700 fs/proc/fd.c:65
   seq_read+0x432/0x1070 fs/seq_file.c:208
   do_loop_readv_writev fs/read_write.c:734 [inline]
   do_loop_readv_writev fs/read_write.c:721 [inline]
   do_iter_read+0x48e/0x6e0 fs/read_write.c:955
   vfs_readv+0xe5/0x150 fs/read_write.c:1073
   kernel_readv fs/splice.c:355 [inline]
   default_file_splice_read.constprop.0+0x4e6/0x9e0 fs/splice.c:412
   do_splice_to+0x137/0x170 fs/splice.c:871
   splice_direct_to_actor+0x307/0x980 fs/splice.c:950
   do_splice_direct+0x1b3/0x280 fs/splice.c:1059
   do_sendfile+0x55f/0xd40 fs/read_write.c:1540
   __do_sys_sendfile64 fs/read_write.c:1601 [inline]
   __se_sys_sendfile64 fs/read_write.c:1587 [inline]
   __x64_sys_sendfile64+0x1cc/0x210 fs/read_write.c:1587
   do_syscall_64+0x2d/0x70 arch/x86/entry/common.c:46
   entry_SYSCALL_64_after_hwframe+0x44/0xa9

-> #1 (>lock){+.+.}-{3:3}:
   __mutex_lock_common kernel/locking/mutex.c:956 [inline]
   __mutex_lock+0x134/0x10e0 kernel/locking/mutex.c:1103
   seq_read+0x61/0x1070 fs/seq_file.c:155
   pde_read fs/proc/inode.c:306 [inline]
   proc_reg_read+0x221/0x300 fs/proc/inode.c:318
   do_loop_readv_writev fs/read_write.c:734 [inline]
   do_loop_readv_writev fs/read_write.c:721 [inline]
   do_iter_read+0x48e/0x6e0 fs/read_write.c:955
   vfs_readv+0xe5/0x150 fs/read_write.c:1073
   kernel_readv fs/splice.c:355 [inline]
   default_file_splice_read.constprop.0+0x4e6/0x9e0 fs/splice.c:412
   do_splice_to+0x137/0x170 fs/splice.c:871
   splice_direct_to_actor+0x307/0x980 fs/splice.c:950
   do_splice_direct+0x1b3/0x280 fs/splice.c:1059
   do_sendfile+0x55f/0xd40 fs/read_write.c:1540
   __do_sys_sendfile64 fs/read_write.c:1601 [inline]
   __se_sys_sendfile64 fs/read_write.c:1587 [inline]
   __x64_sys_sendfile64+0x1cc/0x210 fs/read_write.c:1587
   do_syscall_64+0x2d/0x70 arch/x86/entry/common.c:46
   entry_SYSCALL_64_after_hwframe+0x44/0xa9

-> #0 (sb_writers#4){.+.+}-{0:0}:
   check_prev_add kernel/locking/lockdep.c:2496 [inline]
   check_prevs_add kernel/locking/lockdep.c:2601 [inline]
   validate_chain kernel/locking/lockdep.c:3218 [inline]
   __lock_acquire+0x2a96/0x5780 kernel/locking/lockdep.c:4441
   lock_acquire+0x1f3/0xaf0 kernel/locking/lockdep.c:5029
   percpu_down_read include/linux/percpu-rwsem.h:51 [inline]
   __sb_start_write+0x228/0x450 fs/super.c:1672
   io_write+0x6b5/0xb30 fs/io_uring.c:3296
   io_issue_sqe+0x18f/0x5c50 fs/io_uring.c:5719
   __io_queue_sqe+0x280/0x1160 fs/io_uring.c:6175
   io_queue_sqe+0x692/0xfa0 fs/io_uring.c:6254
   io_submit_sqe fs/io_uring.c:6324 [inline]
   io_submit_sqes+0x1761/0x2400 fs/io_uring.c:6521
   __do_sys_io_uring_enter+0xeac/0x1bd0 fs/io_uring.c:8349
   do_syscall_64+0x2d/0x70 arch/x86/entry/common.c:46
   entry_SYSCALL_64_after_hwframe+0x44/0xa9

other info that might help us debug this:

Chain exists of:
  sb_writers#4 --> >lock --> >uring_lock

 Possible unsafe locking scenario:

   CPU0CPU1
   
  lock(>uring_lock);
   lock(>lock);
   lock(>uring_lock);
  lock(sb_writers#4);

 *** DEADLOCK ***

1 lock held by syz-executor.2/19710:
 #0: 8880a11b8428 (>uring_lock){+.+.}-{3:3}, at: 
__do_sys_io_uring_enter+0xe9a/0x1bd0 fs/io_uring.c:8348

stack backtrace:
CPU: 0 PID: 19710 Comm: syz-executor.2 Not tainted 5.9.0-rc6-syzkaller #0
Hardware name: Google Google Compute Engine/Google Compute Engine, BIOS Google 
01/01/2011
Call Trace:
 __dump_stack lib/dump_stack.c:77 [inline]
 dump_stack+0x198/0x1fd lib/dump_stack.c:118
 check_noncircular+0x324/0x3e0 kernel/locking/lockdep.c:1827
 check_prev_add kernel/locking/lockdep.c:2496 [inline]
 check_prevs_add kernel/locking/lockdep.c:2601 [inline]
 validate_chain 

[PATCH 5.8 075/124] r8169: fix RTL8168f/RTL8411 EPHY config

2020-10-12 Thread Greg Kroah-Hartman
From: Heiner Kallweit 

[ Upstream commit 709a16be0593c08190982cfbdca6df95e6d5823b ]

Mistakenly bit 2 was set instead of bit 3 as in the vendor driver.

Fixes: a7a92cf81589 ("r8169: sync PCIe PHY init with vendor driver 8.047.01")
Signed-off-by: Heiner Kallweit 
Signed-off-by: David S. Miller 
Signed-off-by: Sasha Levin 
---
 drivers/net/ethernet/realtek/r8169_main.c | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/drivers/net/ethernet/realtek/r8169_main.c 
b/drivers/net/ethernet/realtek/r8169_main.c
index a5d54fa012213..fe173ea894e2c 100644
--- a/drivers/net/ethernet/realtek/r8169_main.c
+++ b/drivers/net/ethernet/realtek/r8169_main.c
@@ -2958,7 +2958,7 @@ static void rtl_hw_start_8168f_1(struct rtl8169_private 
*tp)
{ 0x08, 0x0001, 0x0002 },
{ 0x09, 0x, 0x0080 },
{ 0x19, 0x, 0x0224 },
-   { 0x00, 0x, 0x0004 },
+   { 0x00, 0x, 0x0008 },
{ 0x0c, 0x3df0, 0x0200 },
};
 
@@ -2975,7 +2975,7 @@ static void rtl_hw_start_8411(struct rtl8169_private *tp)
{ 0x06, 0x00c0, 0x0020 },
{ 0x0f, 0x, 0x5200 },
{ 0x19, 0x, 0x0224 },
-   { 0x00, 0x, 0x0004 },
+   { 0x00, 0x, 0x0008 },
{ 0x0c, 0x3df0, 0x0200 },
};
 
-- 
2.25.1





[PATCH 5.8 082/124] octeontx2-pf: Fix synchnorization issue in mbox

2020-10-12 Thread Greg Kroah-Hartman
From: Hariprasad Kelam 

[ Upstream commit 66a5209b53418111757716d71e52727b782eabd4 ]

Mbox implementation in octeontx2 driver has three states
alloc, send and reset in mbox response. VF allocate and
sends message to PF for processing, PF ACKs them back and
reset the mbox memory. In some case we see synchronization
issue where after msgs_acked is incremented and before
mbox_reset API is called, if current execution is scheduled
out and a different thread is scheduled in which checks for
msgs_acked. Since the new thread sees msgs_acked == msgs_sent
it will try to allocate a new message and to send a new mbox
message to PF.Now if mbox_reset is scheduled in, PF will see
'0' in msgs_send.
This patch fixes the issue by calling mbox_reset before
incrementing msgs_acked flag for last processing message and
checks for valid message size.

Fixes: d424b6c02 ("octeontx2-pf: Enable SRIOV and added VF mbox handling")
Signed-off-by: Hariprasad Kelam 
Signed-off-by: Geetha sowjanya 
Signed-off-by: Sunil Goutham 
Signed-off-by: David S. Miller 
Signed-off-by: Sasha Levin 
---
 drivers/net/ethernet/marvell/octeontx2/af/mbox.c | 12 ++--
 drivers/net/ethernet/marvell/octeontx2/af/mbox.h |  1 +
 drivers/net/ethernet/marvell/octeontx2/nic/otx2_pf.c | 11 ++-
 drivers/net/ethernet/marvell/octeontx2/nic/otx2_vf.c |  4 ++--
 4 files changed, 19 insertions(+), 9 deletions(-)

diff --git a/drivers/net/ethernet/marvell/octeontx2/af/mbox.c 
b/drivers/net/ethernet/marvell/octeontx2/af/mbox.c
index 387e33fa417aa..2718fe201c147 100644
--- a/drivers/net/ethernet/marvell/octeontx2/af/mbox.c
+++ b/drivers/net/ethernet/marvell/octeontx2/af/mbox.c
@@ -17,7 +17,7 @@
 
 static const u16 msgs_offset = ALIGN(sizeof(struct mbox_hdr), MBOX_MSG_ALIGN);
 
-void otx2_mbox_reset(struct otx2_mbox *mbox, int devid)
+void __otx2_mbox_reset(struct otx2_mbox *mbox, int devid)
 {
void *hw_mbase = mbox->hwbase + (devid * MBOX_SIZE);
struct otx2_mbox_dev *mdev = >dev[devid];
@@ -26,13 +26,21 @@ void otx2_mbox_reset(struct otx2_mbox *mbox, int devid)
tx_hdr = hw_mbase + mbox->tx_start;
rx_hdr = hw_mbase + mbox->rx_start;
 
-   spin_lock(>mbox_lock);
mdev->msg_size = 0;
mdev->rsp_size = 0;
tx_hdr->num_msgs = 0;
tx_hdr->msg_size = 0;
rx_hdr->num_msgs = 0;
rx_hdr->msg_size = 0;
+}
+EXPORT_SYMBOL(__otx2_mbox_reset);
+
+void otx2_mbox_reset(struct otx2_mbox *mbox, int devid)
+{
+   struct otx2_mbox_dev *mdev = >dev[devid];
+
+   spin_lock(>mbox_lock);
+   __otx2_mbox_reset(mbox, devid);
spin_unlock(>mbox_lock);
 }
 EXPORT_SYMBOL(otx2_mbox_reset);
diff --git a/drivers/net/ethernet/marvell/octeontx2/af/mbox.h 
b/drivers/net/ethernet/marvell/octeontx2/af/mbox.h
index 6dfd0f90cd704..ab433789d2c31 100644
--- a/drivers/net/ethernet/marvell/octeontx2/af/mbox.h
+++ b/drivers/net/ethernet/marvell/octeontx2/af/mbox.h
@@ -93,6 +93,7 @@ struct mbox_msghdr {
 };
 
 void otx2_mbox_reset(struct otx2_mbox *mbox, int devid);
+void __otx2_mbox_reset(struct otx2_mbox *mbox, int devid);
 void otx2_mbox_destroy(struct otx2_mbox *mbox);
 int otx2_mbox_init(struct otx2_mbox *mbox, void __force *hwbase,
   struct pci_dev *pdev, void __force *reg_base,
diff --git a/drivers/net/ethernet/marvell/octeontx2/nic/otx2_pf.c 
b/drivers/net/ethernet/marvell/octeontx2/nic/otx2_pf.c
index 5d620a39ea802..2fb45670aca49 100644
--- a/drivers/net/ethernet/marvell/octeontx2/nic/otx2_pf.c
+++ b/drivers/net/ethernet/marvell/octeontx2/nic/otx2_pf.c
@@ -370,8 +370,8 @@ static int otx2_forward_vf_mbox_msgs(struct otx2_nic *pf,
dst_mbox = >mbox;
dst_size = dst_mbox->mbox.tx_size -
ALIGN(sizeof(*mbox_hdr), MBOX_MSG_ALIGN);
-   /* Check if msgs fit into destination area */
-   if (mbox_hdr->msg_size > dst_size)
+   /* Check if msgs fit into destination area and has valid size */
+   if (mbox_hdr->msg_size > dst_size || !mbox_hdr->msg_size)
return -EINVAL;
 
dst_mdev = _mbox->mbox.dev[0];
@@ -526,10 +526,10 @@ static void otx2_pfvf_mbox_up_handler(struct work_struct 
*work)
 
 end:
offset = mbox->rx_start + msg->next_msgoff;
+   if (mdev->msgs_acked == (vf_mbox->up_num_msgs - 1))
+   __otx2_mbox_reset(mbox, 0);
mdev->msgs_acked++;
}
-
-   otx2_mbox_reset(mbox, vf_idx);
 }
 
 static irqreturn_t otx2_pfvf_mbox_intr_handler(int irq, void *pf_irq)
@@ -803,10 +803,11 @@ static void otx2_pfaf_mbox_handler(struct work_struct 
*work)
msg = (struct mbox_msghdr *)(mdev->mbase + offset);
otx2_process_pfaf_mbox_msg(pf, msg);
offset = mbox->rx_start + msg->next_msgoff;
+   if (mdev->msgs_acked == (af_mbox->num_msgs - 1))
+   __otx2_mbox_reset(mbox, 0);

[PATCH 5.8 109/124] perf: Fix task_function_call() error handling

2020-10-12 Thread Greg Kroah-Hartman
From: Kajol Jain 

[ Upstream commit 6d6b8b9f4fceab7266ca03d194f60ec72bd4b654 ]

The error handling introduced by commit:

  2ed6edd33a21 ("perf: Add cond_resched() to task_function_call()")

looses any return value from smp_call_function_single() that is not
{0, -EINVAL}. This is a problem because it will return -EXNIO when the
target CPU is offline. Worse, in that case it'll turn into an infinite
loop.

Fixes: 2ed6edd33a21 ("perf: Add cond_resched() to task_function_call()")
Reported-by: Srikar Dronamraju 
Signed-off-by: Kajol Jain 
Signed-off-by: Peter Zijlstra (Intel) 
Signed-off-by: Ingo Molnar 
Reviewed-by: Barret Rhoden 
Tested-by: Srikar Dronamraju 
Link: https://lkml.kernel.org/r/20200827064732.20860-1-kj...@linux.ibm.com
Signed-off-by: Sasha Levin 
---
 kernel/events/core.c | 5 +++--
 1 file changed, 3 insertions(+), 2 deletions(-)

diff --git a/kernel/events/core.c b/kernel/events/core.c
index 856d98c36f562..fd8cd00099dae 100644
--- a/kernel/events/core.c
+++ b/kernel/events/core.c
@@ -99,7 +99,7 @@ static void remote_function(void *data)
  * retry due to any failures in smp_call_function_single(), such as if the
  * task_cpu() goes offline concurrently.
  *
- * returns @func return value or -ESRCH when the process isn't running
+ * returns @func return value or -ESRCH or -ENXIO when the process isn't 
running
  */
 static int
 task_function_call(struct task_struct *p, remote_function_f func, void *info)
@@ -115,7 +115,8 @@ task_function_call(struct task_struct *p, remote_function_f 
func, void *info)
for (;;) {
ret = smp_call_function_single(task_cpu(p), remote_function,
   , 1);
-   ret = !ret ? data.ret : -EAGAIN;
+   if (!ret)
+   ret = data.ret;
 
if (ret != -EAGAIN)
break;
-- 
2.25.1





[PATCH 5.8 059/124] platform/x86: fix kconfig dependency warning for LG_LAPTOP

2020-10-12 Thread Greg Kroah-Hartman
From: Necip Fazil Yildiran 

[ Upstream commit 8f0c01e85c4d2e1a233e6f4d7ab16c9f8b2a ]

When LG_LAPTOP is enabled and NEW_LEDS is disabled, it results in the
following Kbuild warning:

WARNING: unmet direct dependencies detected for LEDS_CLASS
  Depends on [n]: NEW_LEDS [=n]
  Selected by [y]:
  - LG_LAPTOP [=y] && X86 [=y] && X86_PLATFORM_DEVICES [=y] && ACPI [=y] && 
ACPI_WMI [=y] && INPUT [=y]

The reason is that LG_LAPTOP selects LEDS_CLASS without depending on or
selecting NEW_LEDS while LEDS_CLASS is subordinate to NEW_LEDS.

Honor the kconfig menu hierarchy to remove kconfig dependency warnings.

Fixes: dbf0c5a6b1f8 ("platform/x86: Add LG Gram laptop special features driver")
Signed-off-by: Necip Fazil Yildiran 
Reviewed-by: Hans de Goede 
Acked-by: mark gross 
Signed-off-by: Andy Shevchenko 
Signed-off-by: Sasha Levin 
---
 drivers/platform/x86/Kconfig | 1 +
 1 file changed, 1 insertion(+)

diff --git a/drivers/platform/x86/Kconfig b/drivers/platform/x86/Kconfig
index 0581a54cf562f..e1668a9538c8f 100644
--- a/drivers/platform/x86/Kconfig
+++ b/drivers/platform/x86/Kconfig
@@ -1091,6 +1091,7 @@ config LG_LAPTOP
depends on ACPI_WMI
depends on INPUT
select INPUT_SPARSEKMAP
+   select NEW_LEDS
select LEDS_CLASS
help
 This driver adds support for hotkeys as well as control of keyboard
-- 
2.25.1





[PATCH 5.8 046/124] btrfs: move btrfs_scratch_superblocks into btrfs_dev_replace_finishing

2020-10-12 Thread Greg Kroah-Hartman
From: Josef Bacik 

[ Upstream commit 313b085851c13ca08320372a05a7047ea25d3dd4 ]

We need to move the closing of the src_device out of all the device
replace locking, but we definitely want to zero out the superblock
before we commit the last time to make sure the device is properly
removed.  Handle this by pushing btrfs_scratch_superblocks into
btrfs_dev_replace_finishing, and then later on we'll move the src_device
closing and freeing stuff where we need it to be.

Reviewed-by: Nikolay Borisov 
Signed-off-by: Josef Bacik 
Signed-off-by: David Sterba 
Signed-off-by: Sasha Levin 
---
 fs/btrfs/dev-replace.c |  3 +++
 fs/btrfs/volumes.c | 12 +++-
 fs/btrfs/volumes.h |  3 +++
 3 files changed, 9 insertions(+), 9 deletions(-)

diff --git a/fs/btrfs/dev-replace.c b/fs/btrfs/dev-replace.c
index eb86e4b88c73a..26c9da82e6a91 100644
--- a/fs/btrfs/dev-replace.c
+++ b/fs/btrfs/dev-replace.c
@@ -783,6 +783,9 @@ error:
/* replace the sysfs entry */
btrfs_sysfs_remove_devices_dir(fs_info->fs_devices, src_device);
btrfs_sysfs_update_devid(tgt_device);
+   if (test_bit(BTRFS_DEV_STATE_WRITEABLE, _device->dev_state))
+   btrfs_scratch_superblocks(fs_info, src_device->bdev,
+ src_device->name->str);
btrfs_rm_dev_replace_free_srcdev(src_device);
 
/* write back the superblocks */
diff --git a/fs/btrfs/volumes.c b/fs/btrfs/volumes.c
index 956eb0d6bc584..8b5f666a3ea66 100644
--- a/fs/btrfs/volumes.c
+++ b/fs/btrfs/volumes.c
@@ -1999,9 +1999,9 @@ static u64 btrfs_num_devices(struct btrfs_fs_info 
*fs_info)
return num_devices;
 }
 
-static void btrfs_scratch_superblocks(struct btrfs_fs_info *fs_info,
- struct block_device *bdev,
- const char *device_path)
+void btrfs_scratch_superblocks(struct btrfs_fs_info *fs_info,
+  struct block_device *bdev,
+  const char *device_path)
 {
struct btrfs_super_block *disk_super;
int copy_num;
@@ -2224,12 +2224,6 @@ void btrfs_rm_dev_replace_free_srcdev(struct 
btrfs_device *srcdev)
struct btrfs_fs_info *fs_info = srcdev->fs_info;
struct btrfs_fs_devices *fs_devices = srcdev->fs_devices;
 
-   if (test_bit(BTRFS_DEV_STATE_WRITEABLE, >dev_state)) {
-   /* zero out the old super if it is writable */
-   btrfs_scratch_superblocks(fs_info, srcdev->bdev,
- srcdev->name->str);
-   }
-
btrfs_close_bdev(srcdev);
synchronize_rcu();
btrfs_free_device(srcdev);
diff --git a/fs/btrfs/volumes.h b/fs/btrfs/volumes.h
index 75af2334b2e37..83862e27f5663 100644
--- a/fs/btrfs/volumes.h
+++ b/fs/btrfs/volumes.h
@@ -573,6 +573,9 @@ void btrfs_set_fs_info_ptr(struct btrfs_fs_info *fs_info);
 void btrfs_reset_fs_info_ptr(struct btrfs_fs_info *fs_info);
 bool btrfs_check_rw_degradable(struct btrfs_fs_info *fs_info,
struct btrfs_device *failing_dev);
+void btrfs_scratch_superblocks(struct btrfs_fs_info *fs_info,
+  struct block_device *bdev,
+  const char *device_path);
 
 int btrfs_bg_type_to_factor(u64 flags);
 const char *btrfs_bg_type_to_raid_name(u64 flags);
-- 
2.25.1





[PATCH 5.8 089/124] net/mlx5e: Add resiliency in Striding RQ mode for packets larger than MTU

2020-10-12 Thread Greg Kroah-Hartman
From: Aya Levin 

[ Upstream commit c3c9402373fe20e2d08c04f437ce4dcd252cffb2 ]

Prior to this fix, in Striding RQ mode the driver was vulnerable when
receiving packets in the range (stride size - headroom, stride size].
Where stride size is calculated by mtu+headroom+tailroom aligned to the
closest power of 2.
Usually, this filtering is performed by the HW, except for a few cases:
- Between 2 VFs over the same PF with different MTUs
- On bluefield, when the host physical function sets a larger MTU than
  the ARM has configured on its representor and uplink representor.

When the HW filtering is not present, packets that are larger than MTU
might be harmful for the RQ's integrity, in the following impacts:
1) Overflow from one WQE to the next, causing a memory corruption that
in most cases is unharmful: as the write happens to the headroom of next
packet, which will be overwritten by build_skb(). In very rare cases,
high stress/load, this is harmful. When the next WQE is not yet reposted
and points to existing SKB head.
2) Each oversize packet overflows to the headroom of the next WQE. On
the last WQE of the WQ, where addresses wrap-around, the address of the
remainder headroom does not belong to the next WQE, but it is out of the
memory region range. This results in a HW CQE error that moves the RQ
into an error state.

Solution:
Add a page buffer at the end of each WQE to absorb the leak. Actually
the maximal overflow size is headroom but since all memory units must be
of the same size, we use page size to comply with UMR WQEs. The increase
in memory consumption is of a single page per RQ. Initialize the mkey
with all MTTs pointing to a default page. When the channels are
activated, UMR WQEs will redirect the RX WQEs to the actual memory from
the RQ's pool, while the overflow MTTs remain mapped to the default page.

Fixes: 73281b78a37a ("net/mlx5e: Derive Striding RQ size from MTU")
Signed-off-by: Aya Levin 
Reviewed-by: Tariq Toukan 
Signed-off-by: Saeed Mahameed 
Signed-off-by: Sasha Levin 
---
 drivers/net/ethernet/mellanox/mlx5/core/en.h  |  8 ++-
 .../net/ethernet/mellanox/mlx5/core/en_main.c | 55 +--
 2 files changed, 58 insertions(+), 5 deletions(-)

diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en.h 
b/drivers/net/ethernet/mellanox/mlx5/core/en.h
index 76b23ba7a4687..cb3857e136d62 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/en.h
+++ b/drivers/net/ethernet/mellanox/mlx5/core/en.h
@@ -90,7 +90,12 @@ struct page_pool;
 #define MLX5_MPWRQ_PAGES_PER_WQE   BIT(MLX5_MPWRQ_WQE_PAGE_ORDER)
 
 #define MLX5_MTT_OCTW(npages) (ALIGN(npages, 8) / 2)
-#define MLX5E_REQUIRED_WQE_MTTS
(ALIGN(MLX5_MPWRQ_PAGES_PER_WQE, 8))
+/* Add another page to MLX5E_REQUIRED_WQE_MTTS as a buffer between
+ * WQEs, This page will absorb write overflow by the hardware, when
+ * receiving packets larger than MTU. These oversize packets are
+ * dropped by the driver at a later stage.
+ */
+#define MLX5E_REQUIRED_WQE_MTTS(ALIGN(MLX5_MPWRQ_PAGES_PER_WQE 
+ 1, 8))
 #define MLX5E_LOG_ALIGNED_MPWQE_PPW(ilog2(MLX5E_REQUIRED_WQE_MTTS))
 #define MLX5E_REQUIRED_MTTS(wqes)  (wqes * MLX5E_REQUIRED_WQE_MTTS)
 #define MLX5E_MAX_RQ_NUM_MTTS  \
@@ -621,6 +626,7 @@ struct mlx5e_rq {
u32rqn;
struct mlx5_core_dev  *mdev;
struct mlx5_core_mkey  umr_mkey;
+   struct mlx5e_dma_info  wqe_overflow;
 
/* XDP read-mostly */
struct xdp_rxq_infoxdp_rxq;
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_main.c 
b/drivers/net/ethernet/mellanox/mlx5/core/en_main.c
index cccf65fc116ee..b23ad0b6761c4 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/en_main.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/en_main.c
@@ -258,12 +258,17 @@ static int mlx5e_rq_alloc_mpwqe_info(struct mlx5e_rq *rq,
 
 static int mlx5e_create_umr_mkey(struct mlx5_core_dev *mdev,
 u64 npages, u8 page_shift,
-struct mlx5_core_mkey *umr_mkey)
+struct mlx5_core_mkey *umr_mkey,
+dma_addr_t filler_addr)
 {
-   int inlen = MLX5_ST_SZ_BYTES(create_mkey_in);
+   struct mlx5_mtt *mtt;
+   int inlen;
void *mkc;
u32 *in;
int err;
+   int i;
+
+   inlen = MLX5_ST_SZ_BYTES(create_mkey_in) + sizeof(*mtt) * npages;
 
in = kvzalloc(inlen, GFP_KERNEL);
if (!in)
@@ -283,6 +288,18 @@ static int mlx5e_create_umr_mkey(struct mlx5_core_dev 
*mdev,
MLX5_SET(mkc, mkc, translations_octword_size,
 MLX5_MTT_OCTW(npages));
MLX5_SET(mkc, mkc, log_page_size, page_shift);
+   MLX5_SET(create_mkey_in, in, translations_octword_actual_size,
+MLX5_MTT_OCTW(npages));
+
+   /* Initialize the mkey with all MTTs pointing to a default
+* page (filler_addr). When the channels are activated, UMR
+* WQEs will redirect 

[PATCH 5.8 110/124] mmc: core: dont set limits.discard_granularity as 0

2020-10-12 Thread Greg Kroah-Hartman
From: Coly Li 

[ Upstream commit 4243219141b67d7c2fdb2d8073c17c539b9263eb ]

In mmc_queue_setup_discard() the mmc driver queue's discard_granularity
might be set as 0 (when card->pref_erase > max_discard) while the mmc
device still declares to support discard operation. This is buggy and
triggered the following kernel warning message,

WARNING: CPU: 0 PID: 135 at __blkdev_issue_discard+0x200/0x294
CPU: 0 PID: 135 Comm: f2fs_discard-17 Not tainted 5.9.0-rc6 #1
Hardware name: Google Kevin (DT)
pstate: 0005 (nzcv daif -PAN -UAO BTYPE=--)
pc : __blkdev_issue_discard+0x200/0x294
lr : __blkdev_issue_discard+0x54/0x294
sp : 800011dd3b10
x29: 800011dd3b10 x28:  x27: 800011dd3cc4 x26: 
800011dd3e18 x25: 0004e69b x24: 0c40 x23: 
f1deaaf0 x22: f2849200 x21: 002734d8 x20: 
0008 x19:  x18:  x17: 
 x16:  x15:  x14: 
0394 x13:  x12:  x11: 
 x10: 08b0 x9 : 800011dd3cb0 x8 : 
0004e69b x7 :  x6 : f1926400 x5 : 
f1940800 x4 :  x3 : 0c40 x2 : 
0008 x1 : 002734d8 x0 :  Call trace:
__blkdev_issue_discard+0x200/0x294
__submit_discard_cmd+0x128/0x374
__issue_discard_cmd_orderly+0x188/0x244
__issue_discard_cmd+0x2e8/0x33c
issue_discard_thread+0xe8/0x2f0
kthread+0x11c/0x120
ret_from_fork+0x10/0x1c
---[ end trace e4c8023d33dfe77a ]---

This patch fixes the issue by setting discard_granularity as SECTOR_SIZE
instead of 0 when (card->pref_erase > max_discard) is true. Now no more
complain from __blkdev_issue_discard() for the improper value of discard
granularity.

This issue is exposed after commit b35fd7422c2f ("block: check queue's
limits.discard_granularity in __blkdev_issue_discard()"), a "Fixes:" tag
is also added for the commit to make sure people won't miss this patch
after applying the change of __blkdev_issue_discard().

Fixes: e056a1b5b67b ("mmc: queue: let host controllers specify maximum discard 
timeout")
Fixes: b35fd7422c2f ("block: check queue's limits.discard_granularity in 
__blkdev_issue_discard()").
Reported-and-tested-by: Vicente Bergas 
Signed-off-by: Coly Li 
Acked-by: Adrian Hunter 
Cc: Ulf Hansson 
Link: https://lore.kernel.org/r/20201002013852.51968-1-col...@suse.de
Signed-off-by: Ulf Hansson 
Signed-off-by: Sasha Levin 
---
 drivers/mmc/core/queue.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/drivers/mmc/core/queue.c b/drivers/mmc/core/queue.c
index 4b1eb89b401d9..1ad518821157f 100644
--- a/drivers/mmc/core/queue.c
+++ b/drivers/mmc/core/queue.c
@@ -190,7 +190,7 @@ static void mmc_queue_setup_discard(struct request_queue *q,
q->limits.discard_granularity = card->pref_erase << 9;
/* granularity must not be greater than max. discard */
if (card->pref_erase > max_discard)
-   q->limits.discard_granularity = 0;
+   q->limits.discard_granularity = SECTOR_SIZE;
if (mmc_can_secure_erase_trim(card))
blk_queue_flag_set(QUEUE_FLAG_SECERASE, q);
 }
-- 
2.25.1





[PATCH 5.8 112/124] mm: khugepaged: recalculate min_free_kbytes after memory hotplug as expected by khugepaged

2020-10-12 Thread Greg Kroah-Hartman
From: Vijay Balakrishna 

commit 4aab2be0983031a05cb4a19696c9da5749523426 upstream.

When memory is hotplug added or removed the min_free_kbytes should be
recalculated based on what is expected by khugepaged.  Currently after
hotplug, min_free_kbytes will be set to a lower default and higher
default set when THP enabled is lost.

This change restores min_free_kbytes as expected for THP consumers.

[vij...@linux.microsoft.com: v5]
  Link: 
https://lkml.kernel.org/r/1601398153-5517-1-git-send-email-vij...@linux.microsoft.com

Fixes: f000565adb77 ("thp: set recommended min free kbytes")
Signed-off-by: Vijay Balakrishna 
Signed-off-by: Andrew Morton 
Reviewed-by: Pavel Tatashin 
Acked-by: Michal Hocko 
Cc: Allen Pais 
Cc: Andrea Arcangeli 
Cc: "Kirill A. Shutemov" 
Cc: Oleg Nesterov 
Cc: Song Liu 
Cc: 
Link: 
https://lkml.kernel.org/r/1600305709-2319-2-git-send-email-vij...@linux.microsoft.com
Link: 
https://lkml.kernel.org/r/1600204258-13683-1-git-send-email-vij...@linux.microsoft.com
Signed-off-by: Linus Torvalds 
Signed-off-by: Greg Kroah-Hartman 

---
 include/linux/khugepaged.h |5 +
 mm/khugepaged.c|   13 +++--
 mm/page_alloc.c|3 +++
 3 files changed, 19 insertions(+), 2 deletions(-)

--- a/include/linux/khugepaged.h
+++ b/include/linux/khugepaged.h
@@ -15,6 +15,7 @@ extern int __khugepaged_enter(struct mm_
 extern void __khugepaged_exit(struct mm_struct *mm);
 extern int khugepaged_enter_vma_merge(struct vm_area_struct *vma,
  unsigned long vm_flags);
+extern void khugepaged_min_free_kbytes_update(void);
 #ifdef CONFIG_SHMEM
 extern void collapse_pte_mapped_thp(struct mm_struct *mm, unsigned long addr);
 #else
@@ -85,6 +86,10 @@ static inline void collapse_pte_mapped_t
   unsigned long addr)
 {
 }
+
+static inline void khugepaged_min_free_kbytes_update(void)
+{
+}
 #endif /* CONFIG_TRANSPARENT_HUGEPAGE */
 
 #endif /* _LINUX_KHUGEPAGED_H */
--- a/mm/khugepaged.c
+++ b/mm/khugepaged.c
@@ -56,6 +56,9 @@ enum scan_result {
 #define CREATE_TRACE_POINTS
 #include 
 
+static struct task_struct *khugepaged_thread __read_mostly;
+static DEFINE_MUTEX(khugepaged_mutex);
+
 /* default scan 8*512 pte (or vmas) every 30 second */
 static unsigned int khugepaged_pages_to_scan __read_mostly;
 static unsigned int khugepaged_pages_collapsed;
@@ -2304,8 +2307,6 @@ static void set_recommended_min_free_kby
 
 int start_stop_khugepaged(void)
 {
-   static struct task_struct *khugepaged_thread __read_mostly;
-   static DEFINE_MUTEX(khugepaged_mutex);
int err = 0;
 
mutex_lock(_mutex);
@@ -2332,3 +2333,11 @@ fail:
mutex_unlock(_mutex);
return err;
 }
+
+void khugepaged_min_free_kbytes_update(void)
+{
+   mutex_lock(_mutex);
+   if (khugepaged_enabled() && khugepaged_thread)
+   set_recommended_min_free_kbytes();
+   mutex_unlock(_mutex);
+}
--- a/mm/page_alloc.c
+++ b/mm/page_alloc.c
@@ -69,6 +69,7 @@
 #include 
 #include 
 #include 
+#include 
 
 #include 
 #include 
@@ -7884,6 +7885,8 @@ int __meminit init_per_zone_wmark_min(vo
setup_min_slab_ratio();
 #endif
 
+   khugepaged_min_free_kbytes_update();
+
return 0;
 }
 postcore_initcall(init_per_zone_wmark_min)




[PATCH 5.8 124/124] net_sched: commit action insertions together

2020-10-12 Thread Greg Kroah-Hartman
From: Cong Wang 

commit 0fedc63fadf0404a729e73a35349481c8009c02f upstream.

syzbot is able to trigger a failure case inside the loop in
tcf_action_init(), and when this happens we clean up with
tcf_action_destroy(). But, as these actions are already inserted
into the global IDR, other parallel process could free them
before tcf_action_destroy(), then we will trigger a use-after-free.

Fix this by deferring the insertions even later, after the loop,
and committing all the insertions in a separate loop, so we will
never fail in the middle of the insertions any more.

One side effect is that the window between alloction and final
insertion becomes larger, now it is more likely that the loop in
tcf_del_walker() sees the placeholder -EBUSY pointer. So we have
to check for error pointer in tcf_del_walker().

Reported-and-tested-by: syzbot+2287853d392e4b423...@syzkaller.appspotmail.com
Fixes: 0190c1d452a9 ("net: sched: atomically check-allocate action")
Cc: Vlad Buslov 
Cc: Jamal Hadi Salim 
Cc: Jiri Pirko 
Signed-off-by: Cong Wang 
Signed-off-by: David S. Miller 
Signed-off-by: Greg Kroah-Hartman 

---
 net/sched/act_api.c |   32 +++-
 1 file changed, 23 insertions(+), 9 deletions(-)

--- a/net/sched/act_api.c
+++ b/net/sched/act_api.c
@@ -307,6 +307,8 @@ static int tcf_del_walker(struct tcf_idr
 
mutex_lock(>lock);
idr_for_each_entry_ul(idr, p, tmp, id) {
+   if (IS_ERR(p))
+   continue;
ret = tcf_idr_release_unsafe(p);
if (ret == ACT_P_DELETED) {
module_put(ops->owner);
@@ -891,14 +893,24 @@ static const struct nla_policy tcf_actio
[TCA_ACT_HW_STATS]  = NLA_POLICY_BITFIELD32(TCA_ACT_HW_STATS_ANY),
 };
 
-static void tcf_idr_insert(struct tc_action *a)
+static void tcf_idr_insert_many(struct tc_action *actions[])
 {
-   struct tcf_idrinfo *idrinfo = a->idrinfo;
+   int i;
 
-   mutex_lock(>lock);
-   /* Replace ERR_PTR(-EBUSY) allocated by tcf_idr_check_alloc */
-   WARN_ON(!IS_ERR(idr_replace(>action_idr, a, a->tcfa_index)));
-   mutex_unlock(>lock);
+   for (i = 0; i < TCA_ACT_MAX_PRIO; i++) {
+   struct tc_action *a = actions[i];
+   struct tcf_idrinfo *idrinfo;
+
+   if (!a)
+   continue;
+   idrinfo = a->idrinfo;
+   mutex_lock(>lock);
+   /* Replace ERR_PTR(-EBUSY) allocated by tcf_idr_check_alloc if
+* it is just created, otherwise this is just a nop.
+*/
+   idr_replace(>action_idr, a, a->tcfa_index);
+   mutex_unlock(>lock);
+   }
 }
 
 struct tc_action *tcf_action_init_1(struct net *net, struct tcf_proto *tp,
@@ -995,9 +1007,6 @@ struct tc_action *tcf_action_init_1(stru
return ERR_PTR(-EINVAL);
}
 
-   if (err == ACT_P_CREATED)
-   tcf_idr_insert(a);
-
if (!name && tb[TCA_ACT_COOKIE])
tcf_set_action_cookie(>act_cookie, cookie);
 
@@ -1053,6 +1062,11 @@ int tcf_action_init(struct net *net, str
actions[i - 1] = act;
}
 
+   /* We have to commit them all together, because if any error happened in
+* between, we could not handle the failure gracefully.
+*/
+   tcf_idr_insert_many(actions);
+
*attr_size = tcf_action_full_attrs_size(sz);
return i - 1;
 




[PATCH 5.8 121/124] net: usb: rtl8150: set random MAC address when set_ethernet_addr() fails

2020-10-12 Thread Greg Kroah-Hartman
From: Anant Thazhemadam 

commit f45a4248ea4cc13ed50618ff066849f9587226b2 upstream.

When get_registers() fails in set_ethernet_addr(),the uninitialized
value of node_id gets copied over as the address.
So, check the return value of get_registers().

If get_registers() executed successfully (i.e., it returns
sizeof(node_id)), copy over the MAC address using ether_addr_copy()
(instead of using memcpy()).

Else, if get_registers() failed instead, a randomly generated MAC
address is set as the MAC address instead.

Reported-by: syzbot+abbc768b560c84d92...@syzkaller.appspotmail.com
Tested-by: syzbot+abbc768b560c84d92...@syzkaller.appspotmail.com
Acked-by: Petko Manolov 
Signed-off-by: Anant Thazhemadam 
Signed-off-by: David S. Miller 
Signed-off-by: Greg Kroah-Hartman 

---
 drivers/net/usb/rtl8150.c |   16 
 1 file changed, 12 insertions(+), 4 deletions(-)

--- a/drivers/net/usb/rtl8150.c
+++ b/drivers/net/usb/rtl8150.c
@@ -274,12 +274,20 @@ static int write_mii_word(rtl8150_t * de
return 1;
 }
 
-static inline void set_ethernet_addr(rtl8150_t * dev)
+static void set_ethernet_addr(rtl8150_t *dev)
 {
-   u8 node_id[6];
+   u8 node_id[ETH_ALEN];
+   int ret;
 
-   get_registers(dev, IDR, sizeof(node_id), node_id);
-   memcpy(dev->netdev->dev_addr, node_id, sizeof(node_id));
+   ret = get_registers(dev, IDR, sizeof(node_id), node_id);
+
+   if (ret == sizeof(node_id)) {
+   ether_addr_copy(dev->netdev->dev_addr, node_id);
+   } else {
+   eth_hw_addr_random(dev->netdev);
+   netdev_notice(dev->netdev, "Assigned a random MAC address: 
%pM\n",
+ dev->netdev->dev_addr);
+   }
 }
 
 static int rtl8150_set_mac_address(struct net_device *netdev, void *p)




[PATCH 5.8 119/124] tty/vt: Do not warn when huge selection requested

2020-10-12 Thread Greg Kroah-Hartman
From: Alexey Kardashevskiy 

commit 44c413d9a51752056d606bf6f312003ac1740fab upstream.

The tty TIOCL_SETSEL ioctl allocates a memory buffer big enough for text
selection area. The maximum allowed console size is
VC_RESIZE_MAXCOL * VC_RESIZE_MAXROW == 32767*32767 == ~1GB and typical
MAX_ORDER is set to allow allocations lot less than than (circa 16MB).

So it is quite possible to trigger huge allocation (and syzkaller just
did that) which is going to fail (which is fine) with a backtrace in
mm/page_alloc.c at WARN_ON_ONCE(!(gfp_mask & __GFP_NOWARN)) and
this may trigger panic (if panic_on_warn is enabled) and
leak kernel addresses to dmesg.

This passes __GFP_NOWARN to kmalloc_array to avoid unnecessary user-
triggered WARN_ON. Note that the error is not ignored and
the warning is still printed.

Signed-off-by: Alexey Kardashevskiy 
Link: https://lore.kernel.org/r/20200617070444.116704-1-...@ozlabs.ru
Signed-off-by: Greg Kroah-Hartman 

---
 drivers/tty/vt/selection.c |2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

--- a/drivers/tty/vt/selection.c
+++ b/drivers/tty/vt/selection.c
@@ -193,7 +193,7 @@ static int vc_selection_store_chars(stru
/* Allocate a new buffer before freeing the old one ... */
/* chars can take up to 4 bytes with unicode */
bp = kmalloc_array((vc_sel.end - vc_sel.start) / 2 + 1, unicode ? 4 : 1,
-  GFP_KERNEL);
+  GFP_KERNEL | __GFP_NOWARN);
if (!bp) {
printk(KERN_WARNING "selection: kmalloc() failed\n");
clear_selection();




[PATCH 5.8 085/124] net/mlx5: Avoid possible free of command entry while timeout comp handler

2020-10-12 Thread Greg Kroah-Hartman
From: Eran Ben Elisha 

[ Upstream commit 50b2412b7e7862c5af0cbf4b10d93bc5c712d021 ]

Upon command completion timeout, driver simulates a forced command
completion. In a rare case where real interrupt for that command arrives
simultaneously, it might release the command entry while the forced
handler might still access it.

Fix that by adding an entry refcount, to track current amount of allowed
handlers. Command entry to be released only when this refcount is
decremented to zero.

Command refcount is always initialized to one. For callback commands,
command completion handler is the symmetric flow to decrement it. For
non-callback commands, it is wait_func().

Before ringing the doorbell, increment the refcount for the real completion
handler. Once the real completion handler is called, it will decrement it.

For callback commands, once the delayed work is scheduled, increment the
refcount. Upon callback command completion handler, we will try to cancel
the timeout callback. In case of success, we need to decrement the callback
refcount as it will never run.

In addition, gather the entry index free and the entry free into a one
flow for all command types release.

Fixes: e126ba97dba9 ("mlx5: Add driver for Mellanox Connect-IB adapters")
Signed-off-by: Eran Ben Elisha 
Reviewed-by: Moshe Shemesh 
Signed-off-by: Saeed Mahameed 
Signed-off-by: Sasha Levin 
---
 drivers/net/ethernet/mellanox/mlx5/core/cmd.c | 109 --
 include/linux/mlx5/driver.h   |   2 +
 2 files changed, 73 insertions(+), 38 deletions(-)

diff --git a/drivers/net/ethernet/mellanox/mlx5/core/cmd.c 
b/drivers/net/ethernet/mellanox/mlx5/core/cmd.c
index 1d91a0d0ab1d7..c0055f5479ce0 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/cmd.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/cmd.c
@@ -69,12 +69,10 @@ enum {
MLX5_CMD_DELIVERY_STAT_CMD_DESCR_ERR= 0x10,
 };
 
-static struct mlx5_cmd_work_ent *alloc_cmd(struct mlx5_cmd *cmd,
-  struct mlx5_cmd_msg *in,
-  struct mlx5_cmd_msg *out,
-  void *uout, int uout_size,
-  mlx5_cmd_cbk_t cbk,
-  void *context, int page_queue)
+static struct mlx5_cmd_work_ent *
+cmd_alloc_ent(struct mlx5_cmd *cmd, struct mlx5_cmd_msg *in,
+ struct mlx5_cmd_msg *out, void *uout, int uout_size,
+ mlx5_cmd_cbk_t cbk, void *context, int page_queue)
 {
gfp_t alloc_flags = cbk ? GFP_ATOMIC : GFP_KERNEL;
struct mlx5_cmd_work_ent *ent;
@@ -83,6 +81,7 @@ static struct mlx5_cmd_work_ent *alloc_cmd(struct mlx5_cmd 
*cmd,
if (!ent)
return ERR_PTR(-ENOMEM);
 
+   ent->idx= -EINVAL;
ent->in = in;
ent->out= out;
ent->uout   = uout;
@@ -91,10 +90,16 @@ static struct mlx5_cmd_work_ent *alloc_cmd(struct mlx5_cmd 
*cmd,
ent->context= context;
ent->cmd= cmd;
ent->page_queue = page_queue;
+   refcount_set(>refcnt, 1);
 
return ent;
 }
 
+static void cmd_free_ent(struct mlx5_cmd_work_ent *ent)
+{
+   kfree(ent);
+}
+
 static u8 alloc_token(struct mlx5_cmd *cmd)
 {
u8 token;
@@ -109,7 +114,7 @@ static u8 alloc_token(struct mlx5_cmd *cmd)
return token;
 }
 
-static int alloc_ent(struct mlx5_cmd *cmd)
+static int cmd_alloc_index(struct mlx5_cmd *cmd)
 {
unsigned long flags;
int ret;
@@ -123,7 +128,7 @@ static int alloc_ent(struct mlx5_cmd *cmd)
return ret < cmd->max_reg_cmds ? ret : -ENOMEM;
 }
 
-static void free_ent(struct mlx5_cmd *cmd, int idx)
+static void cmd_free_index(struct mlx5_cmd *cmd, int idx)
 {
unsigned long flags;
 
@@ -132,6 +137,22 @@ static void free_ent(struct mlx5_cmd *cmd, int idx)
spin_unlock_irqrestore(>alloc_lock, flags);
 }
 
+static void cmd_ent_get(struct mlx5_cmd_work_ent *ent)
+{
+   refcount_inc(>refcnt);
+}
+
+static void cmd_ent_put(struct mlx5_cmd_work_ent *ent)
+{
+   if (!refcount_dec_and_test(>refcnt))
+   return;
+
+   if (ent->idx >= 0)
+   cmd_free_index(ent->cmd, ent->idx);
+
+   cmd_free_ent(ent);
+}
+
 static struct mlx5_cmd_layout *get_inst(struct mlx5_cmd *cmd, int idx)
 {
return cmd->cmd_buf + (idx << cmd->log_stride);
@@ -219,11 +240,6 @@ static void poll_timeout(struct mlx5_cmd_work_ent *ent)
ent->ret = -ETIMEDOUT;
 }
 
-static void free_cmd(struct mlx5_cmd_work_ent *ent)
-{
-   kfree(ent);
-}
-
 static int verify_signature(struct mlx5_cmd_work_ent *ent)
 {
struct mlx5_cmd_mailbox *next = ent->out->next;
@@ -842,6 +858,7 @@ static void cb_timeout_handler(struct work_struct *work)
   mlx5_command_str(msg_to_opcode(ent->in)),
   msg_to_opcode(ent->in));
mlx5_cmd_comp_handler(dev, 1UL << ent->idx, true);
+ 

[PATCH 5.8 118/124] net/mlx5e: Fix drivers declaration to support GRE offload

2020-10-12 Thread Greg Kroah-Hartman
From: Aya Levin 

commit 3d093bc2369003b4ce6c3522d9b383e47c40045d upstream.

Declare GRE offload support with respect to the inner protocol. Add a
list of supported inner protocols on which the driver can offload
checksum and GSO. For other protocols, inform the stack to do the needed
operations. There is no noticeable impact on GRE performance.

Fixes: 2729984149e6 ("net/mlx5e: Support TSO and TX checksum offloads for GRE 
tunnels")
Signed-off-by: Aya Levin 
Reviewed-by: Moshe Shemesh 
Reviewed-by: Tariq Toukan 
Signed-off-by: Saeed Mahameed 
Signed-off-by: Greg Kroah-Hartman 

---
 drivers/net/ethernet/mellanox/mlx5/core/en_main.c |   19 ++-
 1 file changed, 18 insertions(+), 1 deletion(-)

--- a/drivers/net/ethernet/mellanox/mlx5/core/en_main.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/en_main.c
@@ -4323,6 +4323,21 @@ void mlx5e_del_vxlan_port(struct net_dev
mlx5e_vxlan_queue_work(priv, be16_to_cpu(ti->port), 0);
 }
 
+static bool mlx5e_gre_tunnel_inner_proto_offload_supported(struct 
mlx5_core_dev *mdev,
+  struct sk_buff *skb)
+{
+   switch (skb->inner_protocol) {
+   case htons(ETH_P_IP):
+   case htons(ETH_P_IPV6):
+   case htons(ETH_P_TEB):
+   return true;
+   case htons(ETH_P_MPLS_UC):
+   case htons(ETH_P_MPLS_MC):
+   return MLX5_CAP_ETH(mdev, tunnel_stateless_mpls_over_gre);
+   }
+   return false;
+}
+
 static netdev_features_t mlx5e_tunnel_features_check(struct mlx5e_priv *priv,
 struct sk_buff *skb,
 netdev_features_t features)
@@ -4345,7 +4360,9 @@ static netdev_features_t mlx5e_tunnel_fe
 
switch (proto) {
case IPPROTO_GRE:
-   return features;
+   if (mlx5e_gre_tunnel_inner_proto_offload_supported(priv->mdev, 
skb))
+   return features;
+   break;
case IPPROTO_IPIP:
case IPPROTO_IPV6:
if (mlx5e_tunnel_proto_supported(priv->mdev, IPPROTO_IPIP))




[PATCH 5.8 113/124] tcp: fix receive window update in tcp_add_backlog()

2020-10-12 Thread Greg Kroah-Hartman
From: Eric Dumazet 

commit 86bccd0367130f481ca99ba91de1c6a5aa1c78c1 upstream.

We got reports from GKE customers flows being reset by netfilter
conntrack unless nf_conntrack_tcp_be_liberal is set to 1.

Traces seemed to suggest ACK packet being dropped by the
packet capture, or more likely that ACK were received in the
wrong order.

 wscale=7, SYN and SYNACK not shown here.

 This ACK allows the sender to send 1871*128 bytes from seq 51359321 :
 New right edge of the window -> 51359321+1871*128=51598809

 09:17:23.389210 IP A > B: Flags [.], ack 51359321, win 1871, options 
[nop,nop,TS val 10 ecr 999], length 0

 09:17:23.389212 IP B > A: Flags [.], seq 51422681:51424089, ack 1577, win 268, 
options [nop,nop,TS val 999 ecr 10], length 1408
 09:17:23.389214 IP A > B: Flags [.], ack 51422681, win 1376, options 
[nop,nop,TS val 10 ecr 999], length 0
 09:17:23.389253 IP B > A: Flags [.], seq 51424089:51488857, ack 1577, win 268, 
options [nop,nop,TS val 999 ecr 10], length 64768
 09:17:23.389272 IP A > B: Flags [.], ack 51488857, win 859, options 
[nop,nop,TS val 10 ecr 999], length 0
 09:17:23.389275 IP B > A: Flags [.], seq 51488857:51521241, ack 1577, win 268, 
options [nop,nop,TS val 999 ecr 10], length 32384

 Receiver now allows to send 606*128=77568 from seq 51521241 :
 New right edge of the window -> 51521241+606*128=51598809

 09:17:23.389296 IP A > B: Flags [.], ack 51521241, win 606, options 
[nop,nop,TS val 10 ecr 999], length 0

 09:17:23.389308 IP B > A: Flags [.], seq 51521241:51553625, ack 1577, win 268, 
options [nop,nop,TS val 999 ecr 10], length 32384

 It seems the sender exceeds RWIN allowance, since 51611353 > 51598809

 09:17:23.389346 IP B > A: Flags [.], seq 51553625:51611353, ack 1577, win 268, 
options [nop,nop,TS val 999 ecr 10], length 57728
 09:17:23.389356 IP B > A: Flags [.], seq 51611353:51618393, ack 1577, win 268, 
options [nop,nop,TS val 999 ecr 10], length 7040

 09:17:23.389367 IP A > B: Flags [.], ack 51611353, win 0, options [nop,nop,TS 
val 10 ecr 999], length 0

 netfilter conntrack is not happy and sends RST

 09:17:23.389389 IP A > B: Flags [R], seq 92176528, win 0, length 0
 09:17:23.389488 IP B > A: Flags [R], seq 174478967, win 0, length 0

 Now imagine ACK were delivered out of order and tcp_add_backlog() sets window 
based on wrong packet.
 New right edge of the window -> 51521241+859*128=51631193

Normally TCP stack handles OOO packets just fine, but it
turns out tcp_add_backlog() does not. It can update the window
field of the aggregated packet even if the ACK sequence
of the last received packet is too old.

Many thanks to Alexandre Ferrieux for independently reporting the issue
and suggesting a fix.

Fixes: 4f693b55c3d2 ("tcp: implement coalescing on backlog queue")
Signed-off-by: Eric Dumazet 
Reported-by: Alexandre Ferrieux 
Acked-by: Soheil Hassas Yeganeh 
Acked-by: Neal Cardwell 
Signed-off-by: David S. Miller 
Signed-off-by: Greg Kroah-Hartman 

---
 net/ipv4/tcp_ipv4.c |6 +++---
 1 file changed, 3 insertions(+), 3 deletions(-)

--- a/net/ipv4/tcp_ipv4.c
+++ b/net/ipv4/tcp_ipv4.c
@@ -1787,12 +1787,12 @@ bool tcp_add_backlog(struct sock *sk, st
 
__skb_pull(skb, hdrlen);
if (skb_try_coalesce(tail, skb, , )) {
-   thtail->window = th->window;
-
TCP_SKB_CB(tail)->end_seq = TCP_SKB_CB(skb)->end_seq;
 
-   if (after(TCP_SKB_CB(skb)->ack_seq, TCP_SKB_CB(tail)->ack_seq))
+   if (likely(!before(TCP_SKB_CB(skb)->ack_seq, 
TCP_SKB_CB(tail)->ack_seq))) {
TCP_SKB_CB(tail)->ack_seq = TCP_SKB_CB(skb)->ack_seq;
+   thtail->window = th->window;
+   }
 
/* We have to update both TCP_SKB_CB(tail)->tcp_flags and
 * thtail->fin, so that the fast path in tcp_rcv_established()




[PATCH 5.8 091/124] net/mlx5e: Fix VLAN cleanup flow

2020-10-12 Thread Greg Kroah-Hartman
From: Aya Levin 

[ Upstream commit 8c7353b6f716436ad0bfda2b5c5524ab2dde5894 ]

Prior to this patch unloading an interface in promiscuous mode with RX
VLAN filtering feature turned off - resulted in a warning. This is due
to a wrong condition in the VLAN rules cleanup flow, which left the
any-vid rules in the VLAN steering table. These rules prevented
destroying the flow group and the flow table.

The any-vid rules are removed in 2 flows, but none of them remove it in
case both promiscuous is set and VLAN filtering is off. Fix the issue by
changing the condition of the VLAN table cleanup flow to clean also in
case of promiscuous mode.

mlx5_core :00:08.0: mlx5_destroy_flow_group:2123:(pid 28729): Flow group 20 
wasn't destroyed, refcount > 1
mlx5_core :00:08.0: mlx5_destroy_flow_group:2123:(pid 28729): Flow group 19 
wasn't destroyed, refcount > 1
mlx5_core :00:08.0: mlx5_destroy_flow_table:2112:(pid 28729): Flow table 
262149 wasn't destroyed, refcount > 1
...
...
[ cut here ]
FW pages counter is 11560 after reclaiming all pages
WARNING: CPU: 1 PID: 28729 at
drivers/net/ethernet/mellanox/mlx5/core/pagealloc.c:660
mlx5_reclaim_startup_pages+0x178/0x230 [mlx5_core]
Hardware name: QEMU Standard PC (Q35 + ICH9, 2009), BIOS
rel-1.12.1-0-ga5cab58e9a3f-prebuilt.qemu.org 04/01/2014
Call Trace:
  mlx5_function_teardown+0x2f/0x90 [mlx5_core]
  mlx5_unload_one+0x71/0x110 [mlx5_core]
  remove_one+0x44/0x80 [mlx5_core]
  pci_device_remove+0x3e/0xc0
  device_release_driver_internal+0xfb/0x1c0
  device_release_driver+0x12/0x20
  pci_stop_bus_device+0x68/0x90
  pci_stop_and_remove_bus_device+0x12/0x20
  hv_eject_device_work+0x6f/0x170 [pci_hyperv]
  ? __schedule+0x349/0x790
  process_one_work+0x206/0x400
  worker_thread+0x34/0x3f0
  ? process_one_work+0x400/0x400
  kthread+0x126/0x140
  ? kthread_park+0x90/0x90
  ret_from_fork+0x22/0x30
   ---[ end trace 6283bde8d26170dc ]---

Fixes: 9df30601c843 ("net/mlx5e: Restore vlan filter after seamless reset")
Signed-off-by: Aya Levin 
Reviewed-by: Moshe Shemesh 
Signed-off-by: Saeed Mahameed 
Signed-off-by: Sasha Levin 
---
 drivers/net/ethernet/mellanox/mlx5/core/en_fs.c | 8 ++--
 1 file changed, 6 insertions(+), 2 deletions(-)

diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_fs.c 
b/drivers/net/ethernet/mellanox/mlx5/core/en_fs.c
index 73d3dc07331f1..c5be0cdfaf0fa 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/en_fs.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/en_fs.c
@@ -415,8 +415,12 @@ static void mlx5e_del_vlan_rules(struct mlx5e_priv *priv)
for_each_set_bit(i, priv->fs.vlan.active_svlans, VLAN_N_VID)
mlx5e_del_vlan_rule(priv, MLX5E_VLAN_RULE_TYPE_MATCH_STAG_VID, 
i);
 
-   if (priv->fs.vlan.cvlan_filter_disabled &&
-   !(priv->netdev->flags & IFF_PROMISC))
+   WARN_ON_ONCE(!(test_bit(MLX5E_STATE_DESTROYING, >state)));
+
+   /* must be called after DESTROY bit is set and
+* set_rx_mode is called and flushed
+*/
+   if (priv->fs.vlan.cvlan_filter_disabled)
mlx5e_del_any_vid_rules(priv);
 }
 
-- 
2.25.1





[PATCH 5.8 117/124] net/tls: race causes kernel panic

2020-10-12 Thread Greg Kroah-Hartman
From: Rohit Maheshwari 

commit 38f7e1c0c43dd25b06513137bb6fd35476f9ec6d upstream.

BUG: kernel NULL pointer dereference, address: 00b8
 #PF: supervisor read access in kernel mode
 #PF: error_code(0x) - not-present page
 PGD 8008b6fef067 P4D 8008b6fef067 PUD 8b6fe6067 PMD 0
 Oops:  [#1] SMP PTI
 CPU: 12 PID: 23871 Comm: kworker/12:80 Kdump: loaded Tainted: G S
 5.9.0-rc3+ #1
 Hardware name: Supermicro X10SRA-F/X10SRA-F, BIOS 2.1 03/29/2018
 Workqueue: events tx_work_handler [tls]
 RIP: 0010:tx_work_handler+0x1b/0x70 [tls]
 Code: dc fe ff ff e8 16 d4 a3 f6 66 0f 1f 44 00 00 0f 1f 44 00 00 55 53 48 8b
 6f 58 48 8b bd a0 04 00 00 48 85 ff 74 1c 48 8b 47 28 <48> 8b 90 b8 00 00 00 83
 e2 02 75 0c f0 48 0f ba b0 b8 00 00 00 00
 RSP: 0018:a44ace61fe88 EFLAGS: 00010286
 RAX:  RBX: 91da9e45cc30 RCX: dead0122
 RDX: 0001 RSI: 91da9e45cc38 RDI: 91d95efac200
 RBP: 91da133fd780 R08:  R09: 73746e657665
 R10: 8080808080808080 R11:  R12: 91dad7d30700
 R13: 91dab6561080 R14: 091dad7d3070 R15: 91da9e45cc38
 FS:  () GS:91dad7d0() knlGS:
 CS:  0010 DS:  ES:  CR0: 80050033
 CR2: 00b8 CR3: 000906478003 CR4: 003706e0
 DR0:  DR1:  DR2: 
 DR3:  DR6: fffe0ff0 DR7: 0400
 Call Trace:
  process_one_work+0x1a7/0x370
  worker_thread+0x30/0x370
  ? process_one_work+0x370/0x370
  kthread+0x114/0x130
  ? kthread_park+0x80/0x80
  ret_from_fork+0x22/0x30

tls_sw_release_resources_tx() waits for encrypt_pending, which
can have race, so we need similar changes as in commit
0cada33241d9de205522e3858b18e506ca5cce2c here as well.

Fixes: a42055e8d2c3 ("net/tls: Add support for async encryption of records for 
performance")
Signed-off-by: Rohit Maheshwari 
Acked-by: Jakub Kicinski 
Signed-off-by: David S. Miller 
Signed-off-by: Greg Kroah-Hartman 

---
 net/tls/tls_sw.c |9 +++--
 1 file changed, 7 insertions(+), 2 deletions(-)

--- a/net/tls/tls_sw.c
+++ b/net/tls/tls_sw.c
@@ -2142,10 +2142,15 @@ void tls_sw_release_resources_tx(struct
struct tls_context *tls_ctx = tls_get_ctx(sk);
struct tls_sw_context_tx *ctx = tls_sw_ctx_tx(tls_ctx);
struct tls_rec *rec, *tmp;
+   int pending;
 
/* Wait for any pending async encryptions to complete */
-   smp_store_mb(ctx->async_notify, true);
-   if (atomic_read(>encrypt_pending))
+   spin_lock_bh(>encrypt_compl_lock);
+   ctx->async_notify = true;
+   pending = atomic_read(>encrypt_pending);
+   spin_unlock_bh(>encrypt_compl_lock);
+
+   if (pending)
crypto_wait_req(-EINPROGRESS, >async_wait);
 
tls_tx_records(sk, -1);




[PATCH 5.8 088/124] net/mlx5: Fix request_irqs error flow

2020-10-12 Thread Greg Kroah-Hartman
From: Maor Gottlieb 

[ Upstream commit 732ebfab7fe96b7ac9a3df3208f14752a4bb6db3 ]

Fix error flow handling in request_irqs which try to free irq
that we failed to request.
It fixes the below trace.

WARNING: CPU: 1 PID: 7587 at kernel/irq/manage.c:1684 free_irq+0x4d/0x60
CPU: 1 PID: 7587 Comm: bash Tainted: GW  OE
4.15.15-1.el7MELLANOXsmp-x86_64 #1
Hardware name: Advantech SKY-6200/SKY-6200, BIOS F2.00 08/06/2020
RIP: 0010:free_irq+0x4d/0x60
RSP: 0018:c9000ef47af0 EFLAGS: 00010282
RAX: 88001476ae00 RBX: 0655 RCX: 
RDX: 88001476ae00 RSI: c9000ef47ab8 RDI: 8800398bb478
RBP: 88001476a838 R08: 88001476ae00 R09: 156d
R10:  R11: 0004 R12: 88001476a838
R13: 0006 R14: 88001476a888 R15: ffe4
FS:  7efeadd32740() GS:88047fc4() knlGS:
CS:  0010 DS:  ES:  CR0: 80050033
CR2: 7fc9cc010008 CR3: 0001a2380004 CR4: 007606e0
DR0:  DR1:  DR2: 
DR3:  DR6: fffe0ff0 DR7: 0400
PKRU: 5554
Call Trace:
 mlx5_irq_table_create+0x38d/0x400 [mlx5_core]
 ? atomic_notifier_chain_register+0x50/0x60
 mlx5_load_one+0x7ee/0x1130 [mlx5_core]
 init_one+0x4c9/0x650 [mlx5_core]
 pci_device_probe+0xb8/0x120
 driver_probe_device+0x2a1/0x470
 ? driver_allows_async_probing+0x30/0x30
 bus_for_each_drv+0x54/0x80
 __device_attach+0xa3/0x100
 pci_bus_add_device+0x4a/0x90
 pci_iov_add_virtfn+0x2dc/0x2f0
 pci_enable_sriov+0x32e/0x420
 mlx5_core_sriov_configure+0x61/0x1b0 [mlx5_core]
 ? kstrtoll+0x22/0x70
 num_vf_store+0x4b/0x70 [mlx5_core]
 kernfs_fop_write+0x102/0x180
 __vfs_write+0x26/0x140
 ? rcu_all_qs+0x5/0x80
 ? _cond_resched+0x15/0x30
 ? __sb_start_write+0x41/0x80
 vfs_write+0xad/0x1a0
 SyS_write+0x42/0x90
 do_syscall_64+0x60/0x110
 entry_SYSCALL_64_after_hwframe+0x3d/0xa2

Fixes: 24163189da48 ("net/mlx5: Separate IRQ request/free from EQ life cycle")
Signed-off-by: Maor Gottlieb 
Reviewed-by: Eran Ben Elisha 
Signed-off-by: Saeed Mahameed 
Signed-off-by: Sasha Levin 
---
 drivers/net/ethernet/mellanox/mlx5/core/pci_irq.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/drivers/net/ethernet/mellanox/mlx5/core/pci_irq.c 
b/drivers/net/ethernet/mellanox/mlx5/core/pci_irq.c
index 373981a659c7c..6fd9749203944 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/pci_irq.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/pci_irq.c
@@ -115,7 +115,7 @@ static int request_irqs(struct mlx5_core_dev *dev, int nvec)
return 0;
 
 err_request_irq:
-   for (; i >= 0; i--) {
+   while (i--) {
struct mlx5_irq *irq = mlx5_irq_get(dev, i);
int irqn = pci_irq_vector(dev->pdev, i);
 
-- 
2.25.1





[PATCH 5.8 120/124] Input: ati_remote2 - add missing newlines when printing module parameters

2020-10-12 Thread Greg Kroah-Hartman
From: Xiongfeng Wang 

commit 37bd9e803daea816f2dc2c8f6dc264097eb3ebd2 upstream.

When I cat some module parameters by sysfs, it displays as follows. It's
better to add a newline for easy reading.

root@syzkaller:~# cat /sys/module/ati_remote2/parameters/mode_mask
0x1froot@syzkaller:~# cat /sys/module/ati_remote2/parameters/channel_mask
0xroot@syzkaller:~#

Signed-off-by: Xiongfeng Wang 
Link: https://lore.kernel.org/r/20200720092148.9320-1-wangxiongfe...@huawei.com
Signed-off-by: Dmitry Torokhov 
Signed-off-by: Greg Kroah-Hartman 

---
 drivers/input/misc/ati_remote2.c |4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

--- a/drivers/input/misc/ati_remote2.c
+++ b/drivers/input/misc/ati_remote2.c
@@ -68,7 +68,7 @@ static int ati_remote2_get_channel_mask(
 {
pr_debug("%s()\n", __func__);
 
-   return sprintf(buffer, "0x%04x", *(unsigned int *)kp->arg);
+   return sprintf(buffer, "0x%04x\n", *(unsigned int *)kp->arg);
 }
 
 static int ati_remote2_set_mode_mask(const char *val,
@@ -84,7 +84,7 @@ static int ati_remote2_get_mode_mask(cha
 {
pr_debug("%s()\n", __func__);
 
-   return sprintf(buffer, "0x%02x", *(unsigned int *)kp->arg);
+   return sprintf(buffer, "0x%02x\n", *(unsigned int *)kp->arg);
 }
 
 static unsigned int channel_mask = ATI_REMOTE2_MAX_CHANNEL_MASK;




[PATCH 5.8 123/124] net_sched: defer tcf_idr_insert() in tcf_action_init_1()

2020-10-12 Thread Greg Kroah-Hartman
From: Cong Wang 

commit e49d8c22f1261c43a986a7fdbf677ac309682a07 upstream.

All TC actions call tcf_idr_insert() for new action at the end
of their ->init(), so we can actually move it to a central place
in tcf_action_init_1().

And once the action is inserted into the global IDR, other parallel
process could free it immediately as its refcnt is still 1, so we can
not fail after this, we need to move it after the goto action
validation to avoid handling the failure case after insertion.

This is found during code review, is not directly triggered by syzbot.
And this prepares for the next patch.

Cc: Vlad Buslov 
Cc: Jamal Hadi Salim 
Cc: Jiri Pirko 
Signed-off-by: Cong Wang 
Signed-off-by: David S. Miller 
Signed-off-by: Greg Kroah-Hartman 

---
 include/net/act_api.h  |2 --
 net/sched/act_api.c|   38 --
 net/sched/act_bpf.c|4 +---
 net/sched/act_connmark.c   |1 -
 net/sched/act_csum.c   |3 ---
 net/sched/act_ct.c |2 --
 net/sched/act_ctinfo.c |3 ---
 net/sched/act_gact.c   |2 --
 net/sched/act_gate.c   |3 ---
 net/sched/act_ife.c|3 ---
 net/sched/act_ipt.c|2 --
 net/sched/act_mirred.c |2 --
 net/sched/act_mpls.c   |2 --
 net/sched/act_nat.c|3 ---
 net/sched/act_pedit.c  |2 --
 net/sched/act_police.c |2 --
 net/sched/act_sample.c |2 --
 net/sched/act_simple.c |2 --
 net/sched/act_skbedit.c|2 --
 net/sched/act_skbmod.c |2 --
 net/sched/act_tunnel_key.c |3 ---
 net/sched/act_vlan.c   |2 --
 22 files changed, 21 insertions(+), 66 deletions(-)

--- a/include/net/act_api.h
+++ b/include/net/act_api.h
@@ -166,8 +166,6 @@ int tcf_idr_create_from_flags(struct tc_
  struct nlattr *est, struct tc_action **a,
  const struct tc_action_ops *ops, int bind,
  u32 flags);
-void tcf_idr_insert(struct tc_action_net *tn, struct tc_action *a);
-
 void tcf_idr_cleanup(struct tc_action_net *tn, u32 index);
 int tcf_idr_check_alloc(struct tc_action_net *tn, u32 *index,
struct tc_action **a, int bind);
--- a/net/sched/act_api.c
+++ b/net/sched/act_api.c
@@ -467,17 +467,6 @@ int tcf_idr_create_from_flags(struct tc_
 }
 EXPORT_SYMBOL(tcf_idr_create_from_flags);
 
-void tcf_idr_insert(struct tc_action_net *tn, struct tc_action *a)
-{
-   struct tcf_idrinfo *idrinfo = tn->idrinfo;
-
-   mutex_lock(>lock);
-   /* Replace ERR_PTR(-EBUSY) allocated by tcf_idr_check_alloc */
-   WARN_ON(!IS_ERR(idr_replace(>action_idr, a, a->tcfa_index)));
-   mutex_unlock(>lock);
-}
-EXPORT_SYMBOL(tcf_idr_insert);
-
 /* Cleanup idr index that was allocated but not initialized. */
 
 void tcf_idr_cleanup(struct tc_action_net *tn, u32 index)
@@ -902,6 +891,16 @@ static const struct nla_policy tcf_actio
[TCA_ACT_HW_STATS]  = NLA_POLICY_BITFIELD32(TCA_ACT_HW_STATS_ANY),
 };
 
+static void tcf_idr_insert(struct tc_action *a)
+{
+   struct tcf_idrinfo *idrinfo = a->idrinfo;
+
+   mutex_lock(>lock);
+   /* Replace ERR_PTR(-EBUSY) allocated by tcf_idr_check_alloc */
+   WARN_ON(!IS_ERR(idr_replace(>action_idr, a, a->tcfa_index)));
+   mutex_unlock(>lock);
+}
+
 struct tc_action *tcf_action_init_1(struct net *net, struct tcf_proto *tp,
struct nlattr *nla, struct nlattr *est,
char *name, int ovr, int bind,
@@ -989,6 +988,16 @@ struct tc_action *tcf_action_init_1(stru
if (err < 0)
goto err_mod;
 
+   if (TC_ACT_EXT_CMP(a->tcfa_action, TC_ACT_GOTO_CHAIN) &&
+   !rcu_access_pointer(a->goto_chain)) {
+   tcf_action_destroy_1(a, bind);
+   NL_SET_ERR_MSG(extack, "can't use goto chain with NULL chain");
+   return ERR_PTR(-EINVAL);
+   }
+
+   if (err == ACT_P_CREATED)
+   tcf_idr_insert(a);
+
if (!name && tb[TCA_ACT_COOKIE])
tcf_set_action_cookie(>act_cookie, cookie);
 
@@ -1002,13 +1011,6 @@ struct tc_action *tcf_action_init_1(stru
if (err != ACT_P_CREATED)
module_put(a_o->owner);
 
-   if (TC_ACT_EXT_CMP(a->tcfa_action, TC_ACT_GOTO_CHAIN) &&
-   !rcu_access_pointer(a->goto_chain)) {
-   tcf_action_destroy_1(a, bind);
-   NL_SET_ERR_MSG(extack, "can't use goto chain with NULL chain");
-   return ERR_PTR(-EINVAL);
-   }
-
return a;
 
 err_mod:
--- a/net/sched/act_bpf.c
+++ b/net/sched/act_bpf.c
@@ -365,9 +365,7 @@ static int tcf_bpf_init(struct net *net,
if (goto_ch)
tcf_chain_put_by_act(goto_ch);
 
-   if (res == ACT_P_CREATED) {
-   tcf_idr_insert(tn, *act);
-   } else {
+   if (res != ACT_P_CREATED) {
/* make sure the program being replaced is no longer 

[PATCH 5.8 111/124] mm: validate inode in mapping_set_error()

2020-10-12 Thread Greg Kroah-Hartman
From: Minchan Kim 

commit 8b7b2eb131d3476062ffd34358785b44be25172f upstream.

The swap address_space doesn't have host. Thus, it makes kernel crash once
swap write meets error. Fix it.

Fixes: 735e4ae5ba28 ("vfs: track per-sb writeback errors and report them to 
syncfs")
Signed-off-by: Minchan Kim 
Signed-off-by: Andrew Morton 
Acked-by: Jeff Layton 
Cc: Jan Kara 
Cc: Andres Freund 
Cc: Matthew Wilcox 
Cc: Al Viro 
Cc: Christoph Hellwig 
Cc: Dave Chinner 
Cc: David Howells 
Cc: 
Link: https://lkml.kernel.org/r/2020101650.750063-1-minc...@kernel.org
Signed-off-by: Linus Torvalds 
Signed-off-by: Greg Kroah-Hartman 

---
 include/linux/pagemap.h |3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

--- a/include/linux/pagemap.h
+++ b/include/linux/pagemap.h
@@ -54,7 +54,8 @@ static inline void mapping_set_error(str
__filemap_set_wb_err(mapping, error);
 
/* Record it in superblock */
-   errseq_set(>host->i_sb->s_wb_err, error);
+   if (mapping->host)
+   errseq_set(>host->i_sb->s_wb_err, error);
 
/* Record it in flags for now, for legacy callers */
if (error == -ENOSPC)




Re: [PATCH v2 2/2] [RFC] CPUFreq: Add support for cpu-perf-dependencies

2020-10-12 Thread Lukasz Luba




On 10/12/20 11:59 AM, Ionela Voinescu wrote:

On Monday 12 Oct 2020 at 11:22:57 (+0100), Lukasz Luba wrote:
[..]

I thought about it and looked for other platforms' DT to see if can reuse
existing opp information. Unfortunately I don't think it is optimal. The reason
being that, because cpus have the same opp table it does not necessarily mean
that they share a clock wire. It just tells us that they have the same
capabilities (literally just tells us they have the same V/f op points).
Unless I am missing something?

When comparing with ACPI/_PSD it becomes more intuitive that there is no
equivalent way to reveal "perf-dependencies" in DT.


You should be able to by examining the clock tree. But perhaps SCMI
abstracts all that and just presents virtual clocks without parent
clocks available to determine what clocks are shared? Fix SCMI if that's
the case.


True, the SCMI clock does not support discovery of clock tree:
(from 4.6.1 Clock management protocol background)
'The protocol does not cover discovery of the clock tree, which must be
described through firmware tables instead.' [1]

In this situation, would it make sense, instead of this binding from
patch 1/2, create a binding for internal firmware/scmi node?

Something like:

firmware {
scmi {
... 
scmi-perf-dep {
compatible = "arm,scmi-perf-dependencies";
cpu-perf-dep0 {
cpu-perf-affinity = <>, <>;
};
cpu-perf-dep1 {
cpu-perf-affinity = <>, <>;
};
cpu-perf-dep2 {
cpu-perf-affinity = <>;
};
};
};
};

The code which is going to parse the binding would be inside the
scmi perf protocol code and used via API by scmi-cpufreq.c.



While SCMI cpufreq would be able to benefit from the functionality that
Nicola is trying to introduce, it's not the only driver, and more
importantly, it's not *going* to be the only driver benefiting from
this.

Currently there is also qcom-cpufreq-hw.c and the future
mediatek-cpufreq-hw.c that is currently under review [1]. They both do
their frequency setting by interacting with HW/FW, and could either take
or update their OPP tables from there. Therefore, if the platform would
require it, they could also expose different controls for frequency
setting and could benefit from additional information about clock
domains (either through opp-shared or the new entries in Nicola's patch),
without driver changes.

Another point to be made is that I strongly believe this is going to be
the norm in the future. Directly setting PLLs and regulator voltages
has been proven unsafe and unsecure.

Therefore, I see this as support for a generic cpufreq feature (a
hardware coordination type), rather than support for a specific driver.

[1] https://lkml.org/lkml/2020/9/10/11



Now regarding the 'dependent_cpus' mask.

We could avoid adding a new field 'dependent_cpus' in policy
struct, but I am not sure of one bit - Frequency Invariant Engine,
(which is also not fixed by just adding a new cpumask).

   ^^
   Let's take it step by step..


We have 3 subsystems to fix:
1. EAS - EM has API function which takes custom cpumask, so no issue,

^^
   keep in mind that EAS it's using the max aggregation method
   that schedutil is using. So if we are to describe the
   functionality correctly, it needs both a cpumask describing
   the frequency domains and an aggregation method.


EAS does not use schedutil max agregation, it calculates max_util
internally.

The compute_energy() loops through the CPUs in the domain and
takes the utilization from them via schedutil_cpu_util(cpu_rq(cpu)).
It figures out max_util and then em_cpu_energy() maps it to next
frequency for the cluster. It just needs proper utilization from
CPUs, which is taken from run-queues, which is a sum of utilization
of tasks being there. This leads to problem how we account utilization
of a task. This is the place where the FIE is involved. EAS assumes the
utilization is calculated properly.




   fix would be to use it via the scmi-cpufreq.c



2. IPA (for calculating the power of a cluster, not whole thermal needs
   this knowledge about 'dependent cpus') - this can be fixed internally



3. Frequency Invariant Engine (FIE) - currently it relies on schedutil
   filtering and providing max freq of all cpus in the cluster into the
   FIE; this info is then populated to all 'related_cpus' which will
   have this freq (we know, because there is no other freq requests);
   Issues:
3.1. Schedutil is not going to check all cpus in the cluster to take
   max freq, which is then passed into the cpufreq driver and FIE
3.2. FIE 

[PATCH 5.8 115/124] net/core: check length before updating Ethertype in skb_mpls_{push,pop}

2020-10-12 Thread Greg Kroah-Hartman
From: Guillaume Nault 

commit 4296adc3e32f5d544a95061160fe7e127be1b9ff upstream.

Openvswitch allows to drop a packet's Ethernet header, therefore
skb_mpls_push() and skb_mpls_pop() might be called with ethernet=true
and mac_len=0. In that case the pointer passed to skb_mod_eth_type()
doesn't point to an Ethernet header and the new Ethertype is written at
unexpected locations.

Fix this by verifying that mac_len is big enough to contain an Ethernet
header.

Fixes: fa4e0f8855fc ("net/sched: fix corrupted L2 header with MPLS 'push' and 
'pop' actions")
Signed-off-by: Guillaume Nault 
Acked-by: Davide Caratti 
Signed-off-by: David S. Miller 
Signed-off-by: Greg Kroah-Hartman 

---
 net/core/skbuff.c |4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

--- a/net/core/skbuff.c
+++ b/net/core/skbuff.c
@@ -5621,7 +5621,7 @@ int skb_mpls_push(struct sk_buff *skb, _
lse->label_stack_entry = mpls_lse;
skb_postpush_rcsum(skb, lse, MPLS_HLEN);
 
-   if (ethernet)
+   if (ethernet && mac_len >= ETH_HLEN)
skb_mod_eth_type(skb, eth_hdr(skb), mpls_proto);
skb->protocol = mpls_proto;
 
@@ -5661,7 +5661,7 @@ int skb_mpls_pop(struct sk_buff *skb, __
skb_reset_mac_header(skb);
skb_set_network_header(skb, mac_len);
 
-   if (ethernet) {
+   if (ethernet && mac_len >= ETH_HLEN) {
struct ethhdr *hdr;
 
/* use mpls_hdr() to get ethertype to account for VLANs. */




[PATCH 5.8 122/124] net: qrtr: ns: Protect radix_tree_deref_slot() using rcu read locks

2020-10-12 Thread Greg Kroah-Hartman
From: Manivannan Sadhasivam 

commit a7809ff90ce6c48598d3c4ab54eb599bec1e9c42 upstream.

The rcu read locks are needed to avoid potential race condition while
dereferencing radix tree from multiple threads. The issue was identified
by syzbot. Below is the crash report:

=
WARNING: suspicious RCU usage
5.7.0-syzkaller #0 Not tainted
-
include/linux/radix-tree.h:176 suspicious rcu_dereference_check() usage!

other info that might help us debug this:

rcu_scheduler_active = 2, debug_locks = 1
2 locks held by kworker/u4:1/21:
 #0: 88821b097938 ((wq_completion)qrtr_ns_handler){+.+.}-{0:0}, at: 
spin_unlock_irq include/linux/spinlock.h:403 [inline]
 #0: 88821b097938 ((wq_completion)qrtr_ns_handler){+.+.}-{0:0}, at: 
process_one_work+0x6df/0xfd0 kernel/workqueue.c:2241
 #1: c9dd7d80 ((work_completion)(_ns.work)){+.+.}-{0:0}, at: 
process_one_work+0x71e/0xfd0 kernel/workqueue.c:2243

stack backtrace:
CPU: 0 PID: 21 Comm: kworker/u4:1 Not tainted 5.7.0-syzkaller #0
Hardware name: Google Google Compute Engine/Google Compute Engine, BIOS Google 
01/01/2011
Workqueue: qrtr_ns_handler qrtr_ns_worker
Call Trace:
 __dump_stack lib/dump_stack.c:77 [inline]
 dump_stack+0x1e9/0x30e lib/dump_stack.c:118
 radix_tree_deref_slot include/linux/radix-tree.h:176 [inline]
 ctrl_cmd_new_lookup net/qrtr/ns.c:558 [inline]
 qrtr_ns_worker+0x2aff/0x4500 net/qrtr/ns.c:674
 process_one_work+0x76e/0xfd0 kernel/workqueue.c:2268
 worker_thread+0xa7f/0x1450 kernel/workqueue.c:2414
 kthread+0x353/0x380 kernel/kthread.c:268

Fixes: 0c2204a4ad71 ("net: qrtr: Migrate nameservice to kernel from userspace")
Reported-and-tested-by: syzbot+0f84f6eed90503da7...@syzkaller.appspotmail.com
Signed-off-by: Manivannan Sadhasivam 
Signed-off-by: David S. Miller 
Signed-off-by: Greg Kroah-Hartman 

---
 net/qrtr/ns.c |   34 +-
 1 file changed, 25 insertions(+), 9 deletions(-)

--- a/net/qrtr/ns.c
+++ b/net/qrtr/ns.c
@@ -193,12 +193,13 @@ static int announce_servers(struct socka
struct qrtr_server *srv;
struct qrtr_node *node;
void __rcu **slot;
-   int ret;
+   int ret = 0;
 
node = node_get(qrtr_ns.local_node);
if (!node)
return 0;
 
+   rcu_read_lock();
/* Announce the list of servers registered in this node */
radix_tree_for_each_slot(slot, >servers, , 0) {
srv = radix_tree_deref_slot(slot);
@@ -206,11 +207,14 @@ static int announce_servers(struct socka
ret = service_announce_new(sq, srv);
if (ret < 0) {
pr_err("failed to announce new service\n");
-   return ret;
+   goto err_out;
}
}
 
-   return 0;
+err_out:
+   rcu_read_unlock();
+
+   return ret;
 }
 
 static struct qrtr_server *server_add(unsigned int service,
@@ -335,7 +339,7 @@ static int ctrl_cmd_bye(struct sockaddr_
struct qrtr_node *node;
void __rcu **slot;
struct kvec iv;
-   int ret;
+   int ret = 0;
 
iv.iov_base = 
iv.iov_len = sizeof(pkt);
@@ -344,11 +348,13 @@ static int ctrl_cmd_bye(struct sockaddr_
if (!node)
return 0;
 
+   rcu_read_lock();
/* Advertise removal of this client to all servers of remote node */
radix_tree_for_each_slot(slot, >servers, , 0) {
srv = radix_tree_deref_slot(slot);
server_del(node, srv->port);
}
+   rcu_read_unlock();
 
/* Advertise the removal of this client to all local servers */
local_node = node_get(qrtr_ns.local_node);
@@ -359,6 +365,7 @@ static int ctrl_cmd_bye(struct sockaddr_
pkt.cmd = cpu_to_le32(QRTR_TYPE_BYE);
pkt.client.node = cpu_to_le32(from->sq_node);
 
+   rcu_read_lock();
radix_tree_for_each_slot(slot, _node->servers, , 0) {
srv = radix_tree_deref_slot(slot);
 
@@ -372,11 +379,14 @@ static int ctrl_cmd_bye(struct sockaddr_
ret = kernel_sendmsg(qrtr_ns.sock, , , 1, sizeof(pkt));
if (ret < 0) {
pr_err("failed to send bye cmd\n");
-   return ret;
+   goto err_out;
}
}
 
-   return 0;
+err_out:
+   rcu_read_unlock();
+
+   return ret;
 }
 
 static int ctrl_cmd_del_client(struct sockaddr_qrtr *from,
@@ -394,7 +404,7 @@ static int ctrl_cmd_del_client(struct so
struct list_head *li;
void __rcu **slot;
struct kvec iv;
-   int ret;
+   int ret = 0;
 
iv.iov_base = 
iv.iov_len = sizeof(pkt);
@@ -434,6 +444,7 @@ static int ctrl_cmd_del_client(struct so
pkt.client.node = cpu_to_le32(node_id);
pkt.client.port = cpu_to_le32(port);
 
+   rcu_read_lock();
radix_tree_for_each_slot(slot, _node->servers, , 0) {
srv = 

[PATCH 5.8 106/124] net: mscc: ocelot: extend watermark encoding function

2020-10-12 Thread Greg Kroah-Hartman
From: Maxim Kochetkov 

[ Upstream commit aa92d836d5c40a7e21e563a272ad177f1bfd44dd ]

The ocelot_wm_encode function deals with setting thresholds for pause
frame start and stop. In Ocelot and Felix the register layout is the
same, but for Seville, it isn't. The easiest way to accommodate Seville
hardware configuration is to introduce a function pointer for setting
this up.

Signed-off-by: Maxim Kochetkov 
Signed-off-by: Vladimir Oltean 
Reviewed-by: Florian Fainelli 
Signed-off-by: David S. Miller 
Signed-off-by: Sasha Levin 
---
 drivers/net/dsa/ocelot/felix_vsc9959.c | 13 +
 drivers/net/ethernet/mscc/ocelot.c | 16 ++--
 drivers/net/ethernet/mscc/ocelot_vsc7514.c | 13 +
 include/soc/mscc/ocelot.h  |  1 +
 4 files changed, 29 insertions(+), 14 deletions(-)

diff --git a/drivers/net/dsa/ocelot/felix_vsc9959.c 
b/drivers/net/dsa/ocelot/felix_vsc9959.c
index a83ecd1c5d6c2..259a612da0030 100644
--- a/drivers/net/dsa/ocelot/felix_vsc9959.c
+++ b/drivers/net/dsa/ocelot/felix_vsc9959.c
@@ -1105,8 +1105,21 @@ static int vsc9959_prevalidate_phy_mode(struct ocelot 
*ocelot, int port,
}
 }
 
+/* Watermark encode
+ * Bit 8:   Unit; 0:1, 1:16
+ * Bit 7-0: Value to be multiplied with unit
+ */
+static u16 vsc9959_wm_enc(u16 value)
+{
+   if (value >= BIT(8))
+   return BIT(8) | (value / 16);
+
+   return value;
+}
+
 static const struct ocelot_ops vsc9959_ops = {
.reset  = vsc9959_reset,
+   .wm_enc = vsc9959_wm_enc,
 };
 
 static int vsc9959_mdio_bus_alloc(struct ocelot *ocelot)
diff --git a/drivers/net/ethernet/mscc/ocelot.c 
b/drivers/net/ethernet/mscc/ocelot.c
index 6e68713c0ac6b..1438839e3f6ea 100644
--- a/drivers/net/ethernet/mscc/ocelot.c
+++ b/drivers/net/ethernet/mscc/ocelot.c
@@ -396,18 +396,6 @@ static void ocelot_vlan_init(struct ocelot *ocelot)
}
 }
 
-/* Watermark encode
- * Bit 8:   Unit; 0:1, 1:16
- * Bit 7-0: Value to be multiplied with unit
- */
-static u16 ocelot_wm_enc(u16 value)
-{
-   if (value >= BIT(8))
-   return BIT(8) | (value / 16);
-
-   return value;
-}
-
 void ocelot_adjust_link(struct ocelot *ocelot, int port,
struct phy_device *phydev)
 {
@@ -2037,9 +2025,9 @@ void ocelot_port_set_maxlen(struct ocelot *ocelot, int 
port, size_t sdu)
/* Tail dropping watermark */
atop_wm = (ocelot->shared_queue_sz - 9 * maxlen) /
   OCELOT_BUFFER_CELL_SZ;
-   ocelot_write_rix(ocelot, ocelot_wm_enc(9 * maxlen),
+   ocelot_write_rix(ocelot, ocelot->ops->wm_enc(9 * maxlen),
 SYS_ATOP, port);
-   ocelot_write(ocelot, ocelot_wm_enc(atop_wm), SYS_ATOP_TOT_CFG);
+   ocelot_write(ocelot, ocelot->ops->wm_enc(atop_wm), SYS_ATOP_TOT_CFG);
 }
 EXPORT_SYMBOL(ocelot_port_set_maxlen);
 
diff --git a/drivers/net/ethernet/mscc/ocelot_vsc7514.c 
b/drivers/net/ethernet/mscc/ocelot_vsc7514.c
index 4a15d2ff8b706..66b58b242f778 100644
--- a/drivers/net/ethernet/mscc/ocelot_vsc7514.c
+++ b/drivers/net/ethernet/mscc/ocelot_vsc7514.c
@@ -240,8 +240,21 @@ static int ocelot_reset(struct ocelot *ocelot)
return 0;
 }
 
+/* Watermark encode
+ * Bit 8:   Unit; 0:1, 1:16
+ * Bit 7-0: Value to be multiplied with unit
+ */
+static u16 ocelot_wm_enc(u16 value)
+{
+   if (value >= BIT(8))
+   return BIT(8) | (value / 16);
+
+   return value;
+}
+
 static const struct ocelot_ops ocelot_ops = {
.reset  = ocelot_reset,
+   .wm_enc = ocelot_wm_enc,
 };
 
 static const struct vcap_field vsc7514_vcap_is2_keys[] = {
diff --git a/include/soc/mscc/ocelot.h b/include/soc/mscc/ocelot.h
index 4953e9994df34..8e174a24c5757 100644
--- a/include/soc/mscc/ocelot.h
+++ b/include/soc/mscc/ocelot.h
@@ -468,6 +468,7 @@ struct ocelot;
 
 struct ocelot_ops {
int (*reset)(struct ocelot *ocelot);
+   u16 (*wm_enc)(u16 value);
 };
 
 struct ocelot_acl_block {
-- 
2.25.1





[PATCH 5.8 019/124] platform/x86: thinkpad_acpi: initialize tp_nvram_state variable

2020-10-12 Thread Greg Kroah-Hartman
From: Tom Rix 

commit 5f38b06db8af3ed6c2fc1b427504ca56fae2eacc upstream.

clang static analysis flags this represenative problem
thinkpad_acpi.c:2523:7: warning: Branch condition evaluates
  to a garbage value
if (!oldn->mute ||
^~~

In hotkey_kthread() mute is conditionally set by hotkey_read_nvram()
but unconditionally checked by hotkey_compare_and_issue_event().
So the tp_nvram_state variable s[2] needs to be initialized.

Fixes: 01e88f25985d ("ACPI: thinkpad-acpi: add CMOS NVRAM polling for hot keys 
(v9)")
Signed-off-by: Tom Rix 
Reviewed-by: Hans de Goede 
Acked-by: mark gross 
Signed-off-by: Andy Shevchenko 
Signed-off-by: Greg Kroah-Hartman 

---
 drivers/platform/x86/thinkpad_acpi.c |2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

--- a/drivers/platform/x86/thinkpad_acpi.c
+++ b/drivers/platform/x86/thinkpad_acpi.c
@@ -2569,7 +2569,7 @@ static void hotkey_compare_and_issue_eve
  */
 static int hotkey_kthread(void *data)
 {
-   struct tp_nvram_state s[2];
+   struct tp_nvram_state s[2] = { 0 };
u32 poll_mask, event_mask;
unsigned int si, so;
unsigned long t;




Re: [PATCH v2 09/17] mm: Add unsafe_follow_pfn

2020-10-12 Thread Daniel Vetter
On Mon, Oct 12, 2020 at 12:47 PM Marek Szyprowski
 wrote:
>
> Hi Jason,
>
> On 09.10.2020 14:48, Jason Gunthorpe wrote:
> > On Fri, Oct 09, 2020 at 02:37:23PM +0200, Mauro Carvalho Chehab wrote:
> >
> >> I'm not a mm/ expert, but, from what I understood from Daniel's patch
> >> description is that this is unsafe *only if*  __GFP_MOVABLE is used.
> > No, it is unconditionally unsafe. The CMA movable mappings are
> > specific VMAs that will have bad issues here, but there are other
> > types too.
>
> I'm trying to follow this thread, but I really wonder what do you mean
> by CMA movable mappings? If a buffer has been allocated from CMA and
> used for DMA, it won't be moved in the memory. It will stay at the same
> physical memory address all the time until freed by the owner. It just a
> matter of proper usage count tracking to delay freeing if it is still
> used somewhere.

 Yup. The problem is that this usage count tracking doesn't exist. And
drivers could at least in theory treat CMA like vram and swap buffers
in of it, so just refcounting the userspace vma isn't enough. In
practice, right now, it might be enough for CMA drivers though (but
there's more that's possible here).
-Daniel
-- 
Daniel Vetter
Software Engineer, Intel Corporation
http://blog.ffwll.ch


[PATCH 5.8 107/124] net: mscc: ocelot: divide watermark value by 60 when writing to SYS_ATOP

2020-10-12 Thread Greg Kroah-Hartman
From: Vladimir Oltean 

[ Upstream commit 601e984f23abcaa7cf3eb078c13de4db3cf6a4f0 ]

Tail dropping is enabled for a port when:

1. A source port consumes more packet buffers than the watermark encoded
   in SYS:PORT:ATOP_CFG.ATOP.

AND

2. Total memory use exceeds the consumption watermark encoded in
   SYS:PAUSE_CFG:ATOP_TOT_CFG.

The unit of these watermarks is a 60 byte memory cell. That unit is
programmed properly into ATOP_TOT_CFG, but not into ATOP. Actually when
written into ATOP, it would get truncated and wrap around.

Fixes: a556c76adc05 ("net: mscc: Add initial Ocelot switch support")
Signed-off-by: Vladimir Oltean 
Signed-off-by: David S. Miller 
Signed-off-by: Sasha Levin 
---
 drivers/net/ethernet/mscc/ocelot.c | 12 ++--
 1 file changed, 6 insertions(+), 6 deletions(-)

diff --git a/drivers/net/ethernet/mscc/ocelot.c 
b/drivers/net/ethernet/mscc/ocelot.c
index 1438839e3f6ea..61bbb7a090042 100644
--- a/drivers/net/ethernet/mscc/ocelot.c
+++ b/drivers/net/ethernet/mscc/ocelot.c
@@ -2001,7 +2001,7 @@ void ocelot_port_set_maxlen(struct ocelot *ocelot, int 
port, size_t sdu)
struct ocelot_port *ocelot_port = ocelot->ports[port];
int maxlen = sdu + ETH_HLEN + ETH_FCS_LEN;
int pause_start, pause_stop;
-   int atop_wm;
+   int atop, atop_tot;
 
if (port == ocelot->npi) {
maxlen += OCELOT_TAG_LEN;
@@ -2022,12 +2022,12 @@ void ocelot_port_set_maxlen(struct ocelot *ocelot, int 
port, size_t sdu)
ocelot_rmw_rix(ocelot, SYS_PAUSE_CFG_PAUSE_STOP(pause_stop),
   SYS_PAUSE_CFG_PAUSE_STOP_M, SYS_PAUSE_CFG, port);
 
-   /* Tail dropping watermark */
-   atop_wm = (ocelot->shared_queue_sz - 9 * maxlen) /
+   /* Tail dropping watermarks */
+   atop_tot = (ocelot->shared_queue_sz - 9 * maxlen) /
   OCELOT_BUFFER_CELL_SZ;
-   ocelot_write_rix(ocelot, ocelot->ops->wm_enc(9 * maxlen),
-SYS_ATOP, port);
-   ocelot_write(ocelot, ocelot->ops->wm_enc(atop_wm), SYS_ATOP_TOT_CFG);
+   atop = (9 * maxlen) / OCELOT_BUFFER_CELL_SZ;
+   ocelot_write_rix(ocelot, ocelot->ops->wm_enc(atop), SYS_ATOP, port);
+   ocelot_write(ocelot, ocelot->ops->wm_enc(atop_tot), SYS_ATOP_TOT_CFG);
 }
 EXPORT_SYMBOL(ocelot_port_set_maxlen);
 
-- 
2.25.1





[PATCH 5.8 105/124] net: mscc: ocelot: split writes to pause frame enable bit and to thresholds

2020-10-12 Thread Greg Kroah-Hartman
From: Vladimir Oltean 

[ Upstream commit e8e6e73db14273464b374d49ca7242c0994945f3 ]

We don't want ocelot_port_set_maxlen to enable pause frame TX, just to
adjust the pause thresholds.

Move the unconditional enabling of pause TX to ocelot_init_port. There
is no good place to put such setting because it shouldn't be
unconditional. But at the moment it is, we're not changing that.

Signed-off-by: Vladimir Oltean 
Reviewed-by: Florian Fainelli 
Signed-off-by: David S. Miller 
Signed-off-by: Sasha Levin 
---
 drivers/net/ethernet/mscc/ocelot.c | 19 ---
 1 file changed, 12 insertions(+), 7 deletions(-)

diff --git a/drivers/net/ethernet/mscc/ocelot.c 
b/drivers/net/ethernet/mscc/ocelot.c
index d0b79cca51840..6e68713c0ac6b 100644
--- a/drivers/net/ethernet/mscc/ocelot.c
+++ b/drivers/net/ethernet/mscc/ocelot.c
@@ -2012,6 +2012,7 @@ void ocelot_port_set_maxlen(struct ocelot *ocelot, int 
port, size_t sdu)
 {
struct ocelot_port *ocelot_port = ocelot->ports[port];
int maxlen = sdu + ETH_HLEN + ETH_FCS_LEN;
+   int pause_start, pause_stop;
int atop_wm;
 
if (port == ocelot->npi) {
@@ -2025,13 +2026,13 @@ void ocelot_port_set_maxlen(struct ocelot *ocelot, int 
port, size_t sdu)
 
ocelot_port_writel(ocelot_port, maxlen, DEV_MAC_MAXLEN_CFG);
 
-   /* Set Pause WM hysteresis
-* 152 = 6 * maxlen / OCELOT_BUFFER_CELL_SZ
-* 101 = 4 * maxlen / OCELOT_BUFFER_CELL_SZ
-*/
-   ocelot_write_rix(ocelot, SYS_PAUSE_CFG_PAUSE_ENA |
-SYS_PAUSE_CFG_PAUSE_STOP(101) |
-SYS_PAUSE_CFG_PAUSE_START(152), SYS_PAUSE_CFG, port);
+   /* Set Pause watermark hysteresis */
+   pause_start = 6 * maxlen / OCELOT_BUFFER_CELL_SZ;
+   pause_stop = 4 * maxlen / OCELOT_BUFFER_CELL_SZ;
+   ocelot_rmw_rix(ocelot, SYS_PAUSE_CFG_PAUSE_START(pause_start),
+  SYS_PAUSE_CFG_PAUSE_START_M, SYS_PAUSE_CFG, port);
+   ocelot_rmw_rix(ocelot, SYS_PAUSE_CFG_PAUSE_STOP(pause_stop),
+  SYS_PAUSE_CFG_PAUSE_STOP_M, SYS_PAUSE_CFG, port);
 
/* Tail dropping watermark */
atop_wm = (ocelot->shared_queue_sz - 9 * maxlen) /
@@ -2094,6 +2095,10 @@ void ocelot_init_port(struct ocelot *ocelot, int port)
ocelot_port_writel(ocelot_port, 0, DEV_MAC_FC_MAC_HIGH_CFG);
ocelot_port_writel(ocelot_port, 0, DEV_MAC_FC_MAC_LOW_CFG);
 
+   /* Enable transmission of pause frames */
+   ocelot_rmw_rix(ocelot, SYS_PAUSE_CFG_PAUSE_ENA, SYS_PAUSE_CFG_PAUSE_ENA,
+  SYS_PAUSE_CFG, port);
+
/* Drop frames with multicast source address */
ocelot_rmw_gix(ocelot, ANA_PORT_DROP_CFG_DROP_MC_SMAC_ENA,
   ANA_PORT_DROP_CFG_DROP_MC_SMAC_ENA,
-- 
2.25.1





[PATCH 5.8 086/124] net/mlx5: poll cmd EQ in case of command timeout

2020-10-12 Thread Greg Kroah-Hartman
From: Eran Ben Elisha 

[ Upstream commit 1d5558b1f0de81f54ddee05f3793acc5260d107f ]

Once driver detects a command interface command timeout, it warns the
user and returns timeout error to the caller. In such case, the entry of
the command is not evacuated (because only real event interrupt is allowed
to clear command interface entry). If the HW event interrupt
of this entry will never arrive, this entry will be left unused forever.
Command interface entries are limited and eventually we can end up without
the ability to post a new command.

In addition, if driver will not consume the EQE of the lost interrupt and
rearm the EQ, no new interrupts will arrive for other commands.

Add a resiliency mechanism for manually polling the command EQ in case of
a command timeout. In case resiliency mechanism will find non-handled EQE,
it will consume it, and the command interface will be fully functional
again. Once the resiliency flow finished, wait another 5 seconds for the
command interface to complete for this command entry.

Define mlx5_cmd_eq_recover() to manage the cmd EQ polling resiliency flow.
Add an async EQ spinlock to avoid races between resiliency flows and real
interrupts that might run simultaneously.

Fixes: e126ba97dba9 ("mlx5: Add driver for Mellanox Connect-IB adapters")
Signed-off-by: Eran Ben Elisha 
Signed-off-by: Saeed Mahameed 
Signed-off-by: Sasha Levin 
---
 drivers/net/ethernet/mellanox/mlx5/core/cmd.c | 53 ---
 drivers/net/ethernet/mellanox/mlx5/core/eq.c  | 40 +-
 .../net/ethernet/mellanox/mlx5/core/lib/eq.h  |  2 +
 3 files changed, 86 insertions(+), 9 deletions(-)

diff --git a/drivers/net/ethernet/mellanox/mlx5/core/cmd.c 
b/drivers/net/ethernet/mellanox/mlx5/core/cmd.c
index c0055f5479ce0..37dae95e61d5f 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/cmd.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/cmd.c
@@ -853,11 +853,21 @@ static void cb_timeout_handler(struct work_struct *work)
struct mlx5_core_dev *dev = container_of(ent->cmd, struct mlx5_core_dev,
 cmd);
 
+   mlx5_cmd_eq_recover(dev);
+
+   /* Maybe got handled by eq recover ? */
+   if (!test_bit(MLX5_CMD_ENT_STATE_PENDING_COMP, >state)) {
+   mlx5_core_warn(dev, "cmd[%d]: %s(0x%x) Async, recovered after 
timeout\n", ent->idx,
+  mlx5_command_str(msg_to_opcode(ent->in)), 
msg_to_opcode(ent->in));
+   goto out; /* phew, already handled */
+   }
+
ent->ret = -ETIMEDOUT;
-   mlx5_core_warn(dev, "%s(0x%x) timeout. Will cause a leak of a command 
resource\n",
-  mlx5_command_str(msg_to_opcode(ent->in)),
-  msg_to_opcode(ent->in));
+   mlx5_core_warn(dev, "cmd[%d]: %s(0x%x) Async, timeout. Will cause a 
leak of a command resource\n",
+  ent->idx, mlx5_command_str(msg_to_opcode(ent->in)), 
msg_to_opcode(ent->in));
mlx5_cmd_comp_handler(dev, 1UL << ent->idx, true);
+
+out:
cmd_ent_put(ent); /* for the cmd_ent_get() took on schedule delayed 
work */
 }
 
@@ -997,6 +1007,35 @@ static const char *deliv_status_to_str(u8 status)
}
 }
 
+enum {
+   MLX5_CMD_TIMEOUT_RECOVER_MSEC   = 5 * 1000,
+};
+
+static void wait_func_handle_exec_timeout(struct mlx5_core_dev *dev,
+ struct mlx5_cmd_work_ent *ent)
+{
+   unsigned long timeout = msecs_to_jiffies(MLX5_CMD_TIMEOUT_RECOVER_MSEC);
+
+   mlx5_cmd_eq_recover(dev);
+
+   /* Re-wait on the ent->done after executing the recovery flow. If the
+* recovery flow (or any other recovery flow running simultaneously)
+* has recovered an EQE, it should cause the entry to be completed by
+* the command interface.
+*/
+   if (wait_for_completion_timeout(>done, timeout)) {
+   mlx5_core_warn(dev, "cmd[%d]: %s(0x%x) recovered after 
timeout\n", ent->idx,
+  mlx5_command_str(msg_to_opcode(ent->in)), 
msg_to_opcode(ent->in));
+   return;
+   }
+
+   mlx5_core_warn(dev, "cmd[%d]: %s(0x%x) No done completion\n", ent->idx,
+  mlx5_command_str(msg_to_opcode(ent->in)), 
msg_to_opcode(ent->in));
+
+   ent->ret = -ETIMEDOUT;
+   mlx5_cmd_comp_handler(dev, 1UL << ent->idx, true);
+}
+
 static int wait_func(struct mlx5_core_dev *dev, struct mlx5_cmd_work_ent *ent)
 {
unsigned long timeout = msecs_to_jiffies(MLX5_CMD_TIMEOUT_MSEC);
@@ -1008,12 +1047,10 @@ static int wait_func(struct mlx5_core_dev *dev, struct 
mlx5_cmd_work_ent *ent)
ent->ret = -ECANCELED;
goto out_err;
}
-   if (cmd->mode == CMD_MODE_POLLING || ent->polling) {
+   if (cmd->mode == CMD_MODE_POLLING || ent->polling)
wait_for_completion(>done);
-   } else if (!wait_for_completion_timeout(>done, timeout)) {
-   ent->ret = -ETIMEDOUT;
- 

[GIT PULL] libata updates for 5.10-rc1

2020-10-12 Thread Jens Axboe
Hi Linus,

Nothing major in here, just fixes or improvements collected over the
last few months.

Please pull!


The following changes since commit a1b8638ba1320e6684aa98233c15255eb803fac7:

  Linux 5.9-rc7 (2020-09-27 14:38:10 -0700)

are available in the Git repository at:

  git://git.kernel.dk/linux-block.git tags/libata-5.10-2020-10-12

for you to fetch changes up to 45aefe3d2251e4e229d7662052739f96ad1d08d9:

  ata: ahci: mvebu: Make SATA PHY optional for Armada 3720 (2020-10-09 12:47:56 
-0600)


libata-5.10-2020-10-12


Bartlomiej Zolnierkiewicz (1):
  MAINTAINERS: remove LIBATA PATA DRIVERS entry

Gustavo A. R. Silva (1):
  pata_cmd64x: Use fallthrough pseudo-keyword

Liu Shixin (1):
  sata, highbank: simplify the return expression of ahci_highbank_suspend

Mika Westerberg (1):
  ahci: Add Intel Rocket Lake PCH-H RAID PCI IDs

Pali Rohár (1):
  ata: ahci: mvebu: Make SATA PHY optional for Armada 3720

Yuantian Tang (1):
  ahci: qoriq: enable acpi support in qoriq ahci driver

 MAINTAINERS|  9 -
 drivers/ata/ahci.c |  4 
 drivers/ata/ahci.h |  2 ++
 drivers/ata/ahci_mvebu.c   |  2 +-
 drivers/ata/ahci_qoriq.c   | 20 +---
 drivers/ata/libahci_platform.c |  2 +-
 drivers/ata/pata_cmd64x.c  |  2 +-
 drivers/ata/sata_highbank.c|  7 +--
 8 files changed, 27 insertions(+), 21 deletions(-)

-- 
Jens Axboe



[PATCH 5.8 090/124] net/mlx5e: Fix return status when setting unsupported FEC mode

2020-10-12 Thread Greg Kroah-Hartman
From: Aya Levin 

[ Upstream commit 2608a2f831c47dfdf18885a7289be5af97182b05 ]

Verify the configured FEC mode is supported by at least a single link
mode before applying the command. Otherwise fail the command and return
"Operation not supported".
Prior to this patch, the command was successful, yet it falsely set all
link modes to FEC auto mode - like configuring FEC mode to auto. Auto
mode is the default configuration if a link mode doesn't support the
configured FEC mode.

Fixes: b5ede32d3329 ("net/mlx5e: Add support for FEC modes based on 50G per 
lane links")
Signed-off-by: Aya Levin 
Reviewed-by: Eran Ben Elisha 
Reviewed-by: Moshe Shemesh 
Signed-off-by: Saeed Mahameed 
Signed-off-by: Sasha Levin 
---
 drivers/net/ethernet/mellanox/mlx5/core/en/port.c | 3 +++
 1 file changed, 3 insertions(+)

diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en/port.c 
b/drivers/net/ethernet/mellanox/mlx5/core/en/port.c
index 98e909bf3c1ec..3e32264cf6131 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/en/port.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/en/port.c
@@ -566,6 +566,9 @@ int mlx5e_set_fec_mode(struct mlx5_core_dev *dev, u16 
fec_policy)
if (fec_policy >= (1 << MLX5E_FEC_LLRS_272_257_1) && !fec_50g_per_lane)
return -EOPNOTSUPP;
 
+   if (fec_policy && !mlx5e_fec_in_caps(dev, fec_policy))
+   return -EOPNOTSUPP;
+
MLX5_SET(pplm_reg, in, local_port, 1);
err = mlx5_core_access_reg(dev, in, sz, out, sz, MLX5_REG_PPLM, 0, 0);
if (err)
-- 
2.25.1





[PATCH 5.8 114/124] netlink: fix policy dump leak

2020-10-12 Thread Greg Kroah-Hartman
From: Johannes Berg 

commit a95bc734e60449e7b073ff7ff70c35083b290ae9 upstream.

If userspace doesn't complete the policy dump, we leak the
allocated state. Fix this.

Fixes: d07dcf9aadd6 ("netlink: add infrastructure to expose policies to 
userspace")
Signed-off-by: Johannes Berg 
Reviewed-by: Jakub Kicinski 
Signed-off-by: David S. Miller 
Signed-off-by: Greg Kroah-Hartman 

---
 include/net/netlink.h   |3 ++-
 net/netlink/genetlink.c |9 -
 net/netlink/policy.c|   24 ++--
 3 files changed, 20 insertions(+), 16 deletions(-)

--- a/include/net/netlink.h
+++ b/include/net/netlink.h
@@ -1936,7 +1936,8 @@ void nla_get_range_signed(const struct n
 int netlink_policy_dump_start(const struct nla_policy *policy,
  unsigned int maxtype,
  unsigned long *state);
-bool netlink_policy_dump_loop(unsigned long *state);
+bool netlink_policy_dump_loop(unsigned long state);
 int netlink_policy_dump_write(struct sk_buff *skb, unsigned long state);
+void netlink_policy_dump_free(unsigned long state);
 
 #endif
--- a/net/netlink/genetlink.c
+++ b/net/netlink/genetlink.c
@@ -1079,7 +1079,7 @@ static int ctrl_dumppolicy(struct sk_buf
if (err)
return err;
 
-   while (netlink_policy_dump_loop(>args[1])) {
+   while (netlink_policy_dump_loop(cb->args[1])) {
void *hdr;
struct nlattr *nest;
 
@@ -1113,6 +1113,12 @@ nla_put_failure:
return skb->len;
 }
 
+static int ctrl_dumppolicy_done(struct netlink_callback *cb)
+{
+   netlink_policy_dump_free(cb->args[1]);
+   return 0;
+}
+
 static const struct genl_ops genl_ctrl_ops[] = {
{
.cmd= CTRL_CMD_GETFAMILY,
@@ -1123,6 +1129,7 @@ static const struct genl_ops genl_ctrl_o
{
.cmd= CTRL_CMD_GETPOLICY,
.dumpit = ctrl_dumppolicy,
+   .done   = ctrl_dumppolicy_done,
},
 };
 
--- a/net/netlink/policy.c
+++ b/net/netlink/policy.c
@@ -84,7 +84,6 @@ int netlink_policy_dump_start(const stru
unsigned int policy_idx;
int err;
 
-   /* also returns 0 if "*_state" is our ERR_PTR() end marker */
if (*_state)
return 0;
 
@@ -140,21 +139,11 @@ static bool netlink_policy_dump_finished
   !state->policies[state->policy_idx].policy;
 }
 
-bool netlink_policy_dump_loop(unsigned long *_state)
+bool netlink_policy_dump_loop(unsigned long _state)
 {
-   struct nl_policy_dump *state = (void *)*_state;
-
-   if (IS_ERR(state))
-   return false;
-
-   if (netlink_policy_dump_finished(state)) {
-   kfree(state);
-   /* store end marker instead of freed state */
-   *_state = (unsigned long)ERR_PTR(-ENOENT);
-   return false;
-   }
+   struct nl_policy_dump *state = (void *)_state;
 
-   return true;
+   return !netlink_policy_dump_finished(state);
 }
 
 int netlink_policy_dump_write(struct sk_buff *skb, unsigned long _state)
@@ -309,3 +298,10 @@ nla_put_failure:
nla_nest_cancel(skb, policy);
return -ENOBUFS;
 }
+
+void netlink_policy_dump_free(unsigned long _state)
+{
+   struct nl_policy_dump *state = (void *)_state;
+
+   kfree(state);
+}




[PATCH 5.8 084/124] net/mlx5: Fix a race when moving command interface to polling mode

2020-10-12 Thread Greg Kroah-Hartman
From: Eran Ben Elisha 

[ Upstream commit 432161ea26d6d5e5c3f7306d9407d26ed1e1953e ]

As part of driver unload, it destroys the commands EQ (via FW command).
As the commands EQ is destroyed, FW will not generate EQEs for any command
that driver sends afterwards. Driver should poll for later commands status.

Driver commands mode metadata is updated before the commands EQ is
actually destroyed. This can lead for double completion handle by the
driver (polling and interrupt), if a command is executed and completed by
FW after the mode was changed, but before the EQ was destroyed.

Fix that by using the mlx5_cmd_allowed_opcode mechanism to guarantee
that only DESTROY_EQ command can be executed during this time period.

Fixes: e126ba97dba9 ("mlx5: Add driver for Mellanox Connect-IB adapters")
Signed-off-by: Eran Ben Elisha 
Reviewed-by: Moshe Shemesh 
Signed-off-by: Saeed Mahameed 
Signed-off-by: Sasha Levin 
---
 drivers/net/ethernet/mellanox/mlx5/core/eq.c | 2 ++
 1 file changed, 2 insertions(+)

diff --git a/drivers/net/ethernet/mellanox/mlx5/core/eq.c 
b/drivers/net/ethernet/mellanox/mlx5/core/eq.c
index 31ef9f8420c87..1318d774b18f2 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/eq.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/eq.c
@@ -656,8 +656,10 @@ static void destroy_async_eqs(struct mlx5_core_dev *dev)
 
cleanup_async_eq(dev, >pages_eq, "pages");
cleanup_async_eq(dev, >async_eq, "async");
+   mlx5_cmd_allowed_opcode(dev, MLX5_CMD_OP_DESTROY_EQ);
mlx5_cmd_use_polling(dev);
cleanup_async_eq(dev, >cmd_eq, "cmd");
+   mlx5_cmd_allowed_opcode(dev, CMD_ALLOWED_OPCODE_ALL);
mlx5_eq_notifier_unregister(dev, >cq_err_nb);
 }
 
-- 
2.25.1





[PATCH 5.8 108/124] afs: Fix deadlock between writeback and truncate

2020-10-12 Thread Greg Kroah-Hartman
From: David Howells 

[ Upstream commit ec0fa0b659144d9c68204d23f627b6a65fa53e50 ]

The afs filesystem has a lock[*] that it uses to serialise I/O operations
going to the server (vnode->io_lock), as the server will only perform one
modification operation at a time on any given file or directory.  This
prevents the the filesystem from filling up all the call slots to a server
with calls that aren't going to be executed in parallel anyway, thereby
allowing operations on other files to obtain slots.

  [*] Note that is probably redundant for directories at least since
  i_rwsem is used to serialise directory modifications and
  lookup/reading vs modification.  The server does allow parallel
  non-modification ops, however.

When a file truncation op completes, we truncate the in-memory copy of the
file to match - but we do it whilst still holding the io_lock, the idea
being to prevent races with other operations.

However, if writeback starts in a worker thread simultaneously with
truncation (whilst notify_change() is called with i_rwsem locked, writeback
pays it no heed), it may manage to set PG_writeback bits on the pages that
will get truncated before afs_setattr_success() manages to call
truncate_pagecache().  Truncate will then wait for those pages - whilst
still inside io_lock:

# cat /proc/8837/stack
[<0>] wait_on_page_bit_common+0x184/0x1e7
[<0>] truncate_inode_pages_range+0x37f/0x3eb
[<0>] truncate_pagecache+0x3c/0x53
[<0>] afs_setattr_success+0x4d/0x6e
[<0>] afs_wait_for_operation+0xd8/0x169
[<0>] afs_do_sync_operation+0x16/0x1f
[<0>] afs_setattr+0x1fb/0x25d
[<0>] notify_change+0x2cf/0x3c4
[<0>] do_truncate+0x7f/0xb2
[<0>] do_sys_ftruncate+0xd1/0x104
[<0>] do_syscall_64+0x2d/0x3a
[<0>] entry_SYSCALL_64_after_hwframe+0x44/0xa9

The writeback operation, however, stalls indefinitely because it needs to
get the io_lock to proceed:

# cat /proc/5940/stack
[<0>] afs_get_io_locks+0x58/0x1ae
[<0>] afs_begin_vnode_operation+0xc7/0xd1
[<0>] afs_store_data+0x1b2/0x2a3
[<0>] afs_write_back_from_locked_page+0x418/0x57c
[<0>] afs_writepages_region+0x196/0x224
[<0>] afs_writepages+0x74/0x156
[<0>] do_writepages+0x2d/0x56
[<0>] __writeback_single_inode+0x84/0x207
[<0>] writeback_sb_inodes+0x238/0x3cf
[<0>] __writeback_inodes_wb+0x68/0x9f
[<0>] wb_writeback+0x145/0x26c
[<0>] wb_do_writeback+0x16a/0x194
[<0>] wb_workfn+0x74/0x177
[<0>] process_one_work+0x174/0x264
[<0>] worker_thread+0x117/0x1b9
[<0>] kthread+0xec/0xf1
[<0>] ret_from_fork+0x1f/0x30

and thus deadlock has occurred.

Note that whilst afs_setattr() calls filemap_write_and_wait(), the fact
that the caller is holding i_rwsem doesn't preclude more pages being
dirtied through an mmap'd region.

Fix this by:

 (1) Use the vnode validate_lock to mediate access between afs_setattr()
 and afs_writepages():

 (a) Exclusively lock validate_lock in afs_setattr() around the whole
 RPC operation.

 (b) If WB_SYNC_ALL isn't set on entry to afs_writepages(), trying to
 shared-lock validate_lock and returning immediately if we couldn't
 get it.

 (c) If WB_SYNC_ALL is set, wait for the lock.

 The validate_lock is also used to validate a file and to zap its cache
 if the file was altered by a third party, so it's probably a good fit
 for this.

 (2) Move the truncation outside of the io_lock in setattr, using the same
 hook as is used for local directory editing.

 This requires the old i_size to be retained in the operation record as
 we commit the revised status to the inode members inside the io_lock
 still, but we still need to know if we reduced the file size.

Fixes: d2ddc776a458 ("afs: Overhaul volume and server record caching and 
fileserver rotation")
Signed-off-by: David Howells 
Signed-off-by: Linus Torvalds 
Signed-off-by: Sasha Levin 
---
 fs/afs/inode.c| 47 ++-
 fs/afs/internal.h |  1 +
 fs/afs/write.c| 11 +++
 3 files changed, 50 insertions(+), 9 deletions(-)

diff --git a/fs/afs/inode.c b/fs/afs/inode.c
index 1d13d2e882ada..0fe8844b4bee2 100644
--- a/fs/afs/inode.c
+++ b/fs/afs/inode.c
@@ -810,14 +810,32 @@ void afs_evict_inode(struct inode *inode)
 
 static void afs_setattr_success(struct afs_operation *op)
 {
-   struct inode *inode = >file[0].vnode->vfs_inode;
+   struct afs_vnode_param *vp = >file[0];
+   struct inode *inode = >vnode->vfs_inode;
+   loff_t old_i_size = i_size_read(inode);
+
+   op->setattr.old_i_size = old_i_size;
+   afs_vnode_commit_status(op, vp);
+   /* inode->i_size has now been changed. */
+
+   if (op->setattr.attr->ia_valid & ATTR_SIZE) {
+   loff_t size = op->setattr.attr->ia_size;
+   if (size > old_i_size)
+   pagecache_isize_extended(inode, old_i_size, size);
+   }
+}
+
+static void 

[PATCH 5.8 087/124] net/mlx5: Add retry mechanism to the command entry index allocation

2020-10-12 Thread Greg Kroah-Hartman
From: Eran Ben Elisha 

[ Upstream commit 410bd754cd73c4a2ac3856d9a03d7b08f9c906bf ]

It is possible that new command entry index allocation will temporarily
fail. The new command holds the semaphore, so it means that a free entry
should be ready soon. Add one second retry mechanism before returning an
error.

Patch "net/mlx5: Avoid possible free of command entry while timeout comp
handler" increase the possibility to bump into this temporarily failure
as it delays the entry index release for non-callback commands.

Fixes: e126ba97dba9 ("mlx5: Add driver for Mellanox Connect-IB adapters")
Signed-off-by: Eran Ben Elisha 
Reviewed-by: Moshe Shemesh 
Signed-off-by: Saeed Mahameed 
Signed-off-by: Sasha Levin 
---
 drivers/net/ethernet/mellanox/mlx5/core/cmd.c | 21 ++-
 1 file changed, 20 insertions(+), 1 deletion(-)

diff --git a/drivers/net/ethernet/mellanox/mlx5/core/cmd.c 
b/drivers/net/ethernet/mellanox/mlx5/core/cmd.c
index 37dae95e61d5f..2b597ac365f84 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/cmd.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/cmd.c
@@ -883,6 +883,25 @@ static bool opcode_allowed(struct mlx5_cmd *cmd, u16 
opcode)
return cmd->allowed_opcode == opcode;
 }
 
+static int cmd_alloc_index_retry(struct mlx5_cmd *cmd)
+{
+   unsigned long alloc_end = jiffies + msecs_to_jiffies(1000);
+   int idx;
+
+retry:
+   idx = cmd_alloc_index(cmd);
+   if (idx < 0 && time_before(jiffies, alloc_end)) {
+   /* Index allocation can fail on heavy load of commands. This is 
a temporary
+* situation as the current command already holds the 
semaphore, meaning that
+* another command completion is being handled and it is 
expected to release
+* the entry index soon.
+*/
+   cpu_relax();
+   goto retry;
+   }
+   return idx;
+}
+
 static void cmd_work_handler(struct work_struct *work)
 {
struct mlx5_cmd_work_ent *ent = container_of(work, struct 
mlx5_cmd_work_ent, work);
@@ -900,7 +919,7 @@ static void cmd_work_handler(struct work_struct *work)
sem = ent->page_queue ? >pages_sem : >sem;
down(sem);
if (!ent->page_queue) {
-   alloc_ret = cmd_alloc_index(cmd);
+   alloc_ret = cmd_alloc_index_retry(cmd);
if (alloc_ret < 0) {
mlx5_core_err_rl(dev, "failed to allocate command 
entry\n");
if (ent->callback) {
-- 
2.25.1





[PATCH 5.8 116/124] net: bridge: fdb: dont flush ext_learn entries

2020-10-12 Thread Greg Kroah-Hartman
From: Nikolay Aleksandrov 

commit f2f3729fb65c5c2e6db234e6316b71a7bdc4b30b upstream.

When a user-space software manages fdb entries externally it should
set the ext_learn flag which marks the fdb entry as externally managed
and avoids expiring it (they're treated as static fdbs). Unfortunately
on events where fdb entries are flushed (STP down, netlink fdb flush
etc) these fdbs are also deleted automatically by the bridge. That in turn
causes trouble for the managing user-space software (e.g. in MLAG setups
we lose remote fdb entries on port flaps).
These entries are completely externally managed so we should avoid
automatically deleting them, the only exception are offloaded entries
(i.e. BR_FDB_ADDED_BY_EXT_LEARN + BR_FDB_OFFLOADED). They are flushed as
before.

Fixes: eb100e0e24a2 ("net: bridge: allow to add externally learned entries from 
user-space")
Signed-off-by: Nikolay Aleksandrov 
Signed-off-by: David S. Miller 
Signed-off-by: Greg Kroah-Hartman 

---
 net/bridge/br_fdb.c |2 ++
 1 file changed, 2 insertions(+)

--- a/net/bridge/br_fdb.c
+++ b/net/bridge/br_fdb.c
@@ -404,6 +404,8 @@ void br_fdb_delete_by_port(struct net_br
 
if (!do_all)
if (test_bit(BR_FDB_STATIC, >flags) ||
+   (test_bit(BR_FDB_ADDED_BY_EXT_LEARN, >flags) &&
+!test_bit(BR_FDB_OFFLOADED, >flags)) ||
(vid && f->key.vlan_id != vid))
continue;
 




[PATCH 5.8 104/124] net: mscc: ocelot: rename ocelot_board.c to ocelot_vsc7514.c

2020-10-12 Thread Greg Kroah-Hartman
From: Vladimir Oltean 

[ Upstream commit 589aa6e7c9de322d47eb33a5cee8cc38838319e6 ]

To follow the model of felix and seville where we have one
platform-specific file, rename this file to the actual SoC it serves.

Signed-off-by: Vladimir Oltean 
Signed-off-by: David S. Miller 
Signed-off-by: Sasha Levin 
---
 drivers/net/ethernet/mscc/Makefile | 2 +-
 drivers/net/ethernet/mscc/{ocelot_board.c => ocelot_vsc7514.c} | 0
 2 files changed, 1 insertion(+), 1 deletion(-)
 rename drivers/net/ethernet/mscc/{ocelot_board.c => ocelot_vsc7514.c} (100%)

diff --git a/drivers/net/ethernet/mscc/Makefile 
b/drivers/net/ethernet/mscc/Makefile
index 91b33b55054e1..ad97a5cca6f99 100644
--- a/drivers/net/ethernet/mscc/Makefile
+++ b/drivers/net/ethernet/mscc/Makefile
@@ -2,4 +2,4 @@
 obj-$(CONFIG_MSCC_OCELOT_SWITCH) += mscc_ocelot_common.o
 mscc_ocelot_common-y := ocelot.o ocelot_io.o
 mscc_ocelot_common-y += ocelot_regs.o ocelot_tc.o ocelot_police.o ocelot_ace.o 
ocelot_flower.o ocelot_ptp.o
-obj-$(CONFIG_MSCC_OCELOT_SWITCH_OCELOT) += ocelot_board.o
+obj-$(CONFIG_MSCC_OCELOT_SWITCH_OCELOT) += ocelot_vsc7514.o
diff --git a/drivers/net/ethernet/mscc/ocelot_board.c 
b/drivers/net/ethernet/mscc/ocelot_vsc7514.c
similarity index 100%
rename from drivers/net/ethernet/mscc/ocelot_board.c
rename to drivers/net/ethernet/mscc/ocelot_vsc7514.c
-- 
2.25.1





[PATCH 5.8 095/124] net: hinic: fix DEVLINK build errors

2020-10-12 Thread Greg Kroah-Hartman
From: Randy Dunlap 

[ Upstream commit 1f7e877c20517735bceff1535e1b7fa846b2f215 ]

Fix many (lots deleted here) build errors in hinic by selecting NET_DEVLINK.

ld: drivers/net/ethernet/huawei/hinic/hinic_hw_dev.o: in function 
`mgmt_watchdog_timeout_event_handler':
hinic_hw_dev.c:(.text+0x30a): undefined reference to `devlink_health_report'
ld: drivers/net/ethernet/huawei/hinic/hinic_devlink.o: in function 
`hinic_fw_reporter_dump':
hinic_devlink.c:(.text+0x1c): undefined reference to `devlink_fmsg_u32_pair_put'
ld: drivers/net/ethernet/huawei/hinic/hinic_devlink.o: in function 
`hinic_fw_reporter_dump':
hinic_devlink.c:(.text+0x126): undefined reference to 
`devlink_fmsg_binary_pair_put'
ld: drivers/net/ethernet/huawei/hinic/hinic_devlink.o: in function 
`hinic_hw_reporter_dump':
hinic_devlink.c:(.text+0x1ba): undefined reference to 
`devlink_fmsg_string_pair_put'
ld: hinic_devlink.c:(.text+0x227): undefined reference to 
`devlink_fmsg_u8_pair_put'
ld: drivers/net/ethernet/huawei/hinic/hinic_devlink.o: in function 
`hinic_devlink_alloc':
hinic_devlink.c:(.text+0xaee): undefined reference to `devlink_alloc'
ld: drivers/net/ethernet/huawei/hinic/hinic_devlink.o: in function 
`hinic_devlink_free':
hinic_devlink.c:(.text+0xb04): undefined reference to `devlink_free'
ld: drivers/net/ethernet/huawei/hinic/hinic_devlink.o: in function 
`hinic_devlink_register':
hinic_devlink.c:(.text+0xb26): undefined reference to `devlink_register'
ld: drivers/net/ethernet/huawei/hinic/hinic_devlink.o: in function 
`hinic_devlink_unregister':
hinic_devlink.c:(.text+0xb46): undefined reference to `devlink_unregister'
ld: drivers/net/ethernet/huawei/hinic/hinic_devlink.o: in function 
`hinic_health_reporters_create':
hinic_devlink.c:(.text+0xb75): undefined reference to 
`devlink_health_reporter_create'
ld: hinic_devlink.c:(.text+0xb95): undefined reference to 
`devlink_health_reporter_create'
ld: hinic_devlink.c:(.text+0xbac): undefined reference to 
`devlink_health_reporter_destroy'
ld: drivers/net/ethernet/huawei/hinic/hinic_devlink.o: in function 
`hinic_health_reporters_destroy':

Fixes: 51ba902a16e6 ("net-next/hinic: Initialize hw interface")
Signed-off-by: Randy Dunlap 
Cc: Bin Luo 
Cc: "David S. Miller" 
Cc: Jakub Kicinski 
Cc: Aviad Krawczyk 
Cc: Zhao Chen 
Signed-off-by: David S. Miller 
Signed-off-by: Sasha Levin 
---
 drivers/net/ethernet/huawei/hinic/Kconfig | 1 +
 1 file changed, 1 insertion(+)

diff --git a/drivers/net/ethernet/huawei/hinic/Kconfig 
b/drivers/net/ethernet/huawei/hinic/Kconfig
index 936e2dd3bb135..b47bd5440c5f0 100644
--- a/drivers/net/ethernet/huawei/hinic/Kconfig
+++ b/drivers/net/ethernet/huawei/hinic/Kconfig
@@ -6,6 +6,7 @@
 config HINIC
tristate "Huawei Intelligent PCIE Network Interface Card"
depends on (PCI_MSI && (X86 || ARM64))
+   select NET_DEVLINK
help
  This driver supports HiNIC PCIE Ethernet cards.
  To compile this driver as part of the kernel, choose Y here.
-- 
2.25.1





[PATCH 5.8 103/124] rxrpc: Fix server keyring leak

2020-10-12 Thread Greg Kroah-Hartman
From: David Howells 

[ Upstream commit 38b1dc47a35ba14c3f4472138ea56d014c2d609b ]

If someone calls setsockopt() twice to set a server key keyring, the first
keyring is leaked.

Fix it to return an error instead if the server key keyring is already set.

Fixes: 17926a79320a ("[AF_RXRPC]: Provide secure RxRPC sockets for use by 
userspace and kernel both")
Signed-off-by: David Howells 
Signed-off-by: Sasha Levin 
---
 net/rxrpc/key.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/net/rxrpc/key.c b/net/rxrpc/key.c
index 64cbbd2f16944..85a9ff8cd236a 100644
--- a/net/rxrpc/key.c
+++ b/net/rxrpc/key.c
@@ -903,7 +903,7 @@ int rxrpc_request_key(struct rxrpc_sock *rx, char __user 
*optval, int optlen)
 
_enter("");
 
-   if (optlen <= 0 || optlen > PAGE_SIZE - 1)
+   if (optlen <= 0 || optlen > PAGE_SIZE - 1 || rx->securities)
return -EINVAL;
 
description = memdup_user_nul(optval, optlen);
-- 
2.25.1





[PATCH] sched/cputime: correct account of irqtime

2020-10-12 Thread Pingfan Liu
__do_softirq() may be interrupted by hardware interrupts. In this case,
irqtime_account_irq() will account the time slice as CPUTIME_SOFTIRQ by
mistake.

By passing irqtime_account_irq() an extra param about either hardirq or
softirq, irqtime_account_irq() can handle the above case.

Signed-off-by: Pingfan Liu 
Cc: Ingo Molnar 
Cc: Peter Zijlstra 
Cc: Juri Lelli 
Cc: Vincent Guittot 
Cc: Dietmar Eggemann 
Cc: Steven Rostedt 
Cc: Ben Segall 
Cc: Mel Gorman 
Cc: Thomas Gleixner 
Cc: Andy Lutomirski 
Cc: Will Deacon 
Cc: "Paul E. McKenney" 
Cc: Frederic Weisbecker 
Cc: Allen Pais 
Cc: Romain Perier 
To: linux-kernel@vger.kernel.org
---
 include/linux/hardirq.h |  4 ++--
 include/linux/vtime.h   | 12 ++--
 kernel/sched/cputime.c  |  4 ++--
 kernel/softirq.c|  6 +++---
 4 files changed, 13 insertions(+), 13 deletions(-)

diff --git a/include/linux/hardirq.h b/include/linux/hardirq.h
index 754f67a..56e7bb5 100644
--- a/include/linux/hardirq.h
+++ b/include/linux/hardirq.h
@@ -32,7 +32,7 @@ static __always_inline void rcu_irq_enter_check_tick(void)
  */
 #define __irq_enter()  \
do {\
-   account_irq_enter_time(current);\
+   account_irq_enter_time(current, true);  \
preempt_count_add(HARDIRQ_OFFSET);  \
lockdep_hardirq_enter();\
} while (0)
@@ -63,7 +63,7 @@ void irq_enter_rcu(void);
 #define __irq_exit()   \
do {\
lockdep_hardirq_exit(); \
-   account_irq_exit_time(current); \
+   account_irq_exit_time(current, true);   \
preempt_count_sub(HARDIRQ_OFFSET);  \
} while (0)
 
diff --git a/include/linux/vtime.h b/include/linux/vtime.h
index 2cdeca0..294188ae1 100644
--- a/include/linux/vtime.h
+++ b/include/linux/vtime.h
@@ -98,21 +98,21 @@ static inline void vtime_flush(struct task_struct *tsk) { }
 
 
 #ifdef CONFIG_IRQ_TIME_ACCOUNTING
-extern void irqtime_account_irq(struct task_struct *tsk);
+extern void irqtime_account_irq(struct task_struct *tsk, bool hardirq);
 #else
-static inline void irqtime_account_irq(struct task_struct *tsk) { }
+static inline void irqtime_account_irq(struct task_struct *tsk, bool hardirq) 
{ }
 #endif
 
-static inline void account_irq_enter_time(struct task_struct *tsk)
+static inline void account_irq_enter_time(struct task_struct *tsk, bool 
hardirq)
 {
vtime_account_irq_enter(tsk);
-   irqtime_account_irq(tsk);
+   irqtime_account_irq(tsk, hardirq);
 }
 
-static inline void account_irq_exit_time(struct task_struct *tsk)
+static inline void account_irq_exit_time(struct task_struct *tsk, bool hardirq)
 {
vtime_account_irq_exit(tsk);
-   irqtime_account_irq(tsk);
+   irqtime_account_irq(tsk, hardirq);
 }
 
 #endif /* _LINUX_KERNEL_VTIME_H */
diff --git a/kernel/sched/cputime.c b/kernel/sched/cputime.c
index 5a55d23..166f1d7 100644
--- a/kernel/sched/cputime.c
+++ b/kernel/sched/cputime.c
@@ -47,7 +47,7 @@ static void irqtime_account_delta(struct irqtime *irqtime, 
u64 delta,
  * Called before incrementing preempt_count on {soft,}irq_enter
  * and before decrementing preempt_count on {soft,}irq_exit.
  */
-void irqtime_account_irq(struct task_struct *curr)
+void irqtime_account_irq(struct task_struct *curr, bool hardirq)
 {
struct irqtime *irqtime = this_cpu_ptr(_irqtime);
s64 delta;
@@ -68,7 +68,7 @@ void irqtime_account_irq(struct task_struct *curr)
 */
if (hardirq_count())
irqtime_account_delta(irqtime, delta, CPUTIME_IRQ);
-   else if (in_serving_softirq() && curr != this_cpu_ksoftirqd())
+   else if (in_serving_softirq() && curr != this_cpu_ksoftirqd() && 
!hardirq)
irqtime_account_delta(irqtime, delta, CPUTIME_SOFTIRQ);
 }
 EXPORT_SYMBOL_GPL(irqtime_account_irq);
diff --git a/kernel/softirq.c b/kernel/softirq.c
index bf88d7f6..da59ea39 100644
--- a/kernel/softirq.c
+++ b/kernel/softirq.c
@@ -270,7 +270,7 @@ asmlinkage __visible void __softirq_entry __do_softirq(void)
current->flags &= ~PF_MEMALLOC;
 
pending = local_softirq_pending();
-   account_irq_enter_time(current);
+   account_irq_enter_time(current, false);
 
__local_bh_disable_ip(_RET_IP_, SOFTIRQ_OFFSET);
in_hardirq = lockdep_softirq_start();
@@ -321,7 +321,7 @@ asmlinkage __visible void __softirq_entry __do_softirq(void)
}
 
lockdep_softirq_end(in_hardirq);
-   account_irq_exit_time(current);
+   account_irq_exit_time(current, false);
__local_bh_enable(SOFTIRQ_OFFSET);
WARN_ON_ONCE(in_interrupt());
current_restore_flags(old_flags, PF_MEMALLOC);
@@ -417,7 +417,7 @@ static inline void __irq_exit_rcu(void)
 #else
lockdep_assert_irqs_disabled();
 #endif
-   

[PATCH 5.8 094/124] net: stmmac: Modify configuration method of EEE timers

2020-10-12 Thread Greg Kroah-Hartman
From: Vineetha G. Jaya Kumaran 

[ Upstream commit 388e201d41fa1ed8f2dce0f0567f56f8e919ffb0 ]

Ethtool manual stated that the tx-timer is the "the amount of time the
device should stay in idle mode prior to asserting its Tx LPI". The
previous implementation for "ethtool --set-eee tx-timer" sets the LPI TW
timer duration which is not correct. Hence, this patch fixes the
"ethtool --set-eee tx-timer" to configure the EEE LPI timer.

The LPI TW Timer will be using the defined default value instead of
"ethtool --set-eee tx-timer" which follows the EEE LS timer implementation.

Changelog V2
*Not removing/modifying the eee_timer.
*EEE LPI timer can be configured through ethtool and also the eee_timer
module param.
*EEE TW Timer will be configured with default value only, not able to be
configured through ethtool or module param. This follows the implementation
of the EEE LS Timer.

Fixes: d765955d2ae0 ("stmmac: add the Energy Efficient Ethernet support")
Signed-off-by: Vineetha G. Jaya Kumaran 
Signed-off-by: Voon Weifeng 
Signed-off-by: David S. Miller 
Signed-off-by: Sasha Levin 
---
 drivers/net/ethernet/stmicro/stmmac/stmmac.h  |  2 ++
 .../ethernet/stmicro/stmmac/stmmac_ethtool.c  | 12 +-
 .../net/ethernet/stmicro/stmmac/stmmac_main.c | 23 ---
 3 files changed, 28 insertions(+), 9 deletions(-)

diff --git a/drivers/net/ethernet/stmicro/stmmac/stmmac.h 
b/drivers/net/ethernet/stmicro/stmmac/stmmac.h
index 9c02fc754bf1b..545696971f65e 100644
--- a/drivers/net/ethernet/stmicro/stmmac/stmmac.h
+++ b/drivers/net/ethernet/stmicro/stmmac/stmmac.h
@@ -203,6 +203,8 @@ struct stmmac_priv {
int eee_enabled;
int eee_active;
int tx_lpi_timer;
+   int tx_lpi_enabled;
+   int eee_tw_timer;
unsigned int mode;
unsigned int chain_mode;
int extend_desc;
diff --git a/drivers/net/ethernet/stmicro/stmmac/stmmac_ethtool.c 
b/drivers/net/ethernet/stmicro/stmmac/stmmac_ethtool.c
index c16d0cc3e9c44..b82c6715f95f3 100644
--- a/drivers/net/ethernet/stmicro/stmmac/stmmac_ethtool.c
+++ b/drivers/net/ethernet/stmicro/stmmac/stmmac_ethtool.c
@@ -652,6 +652,7 @@ static int stmmac_ethtool_op_get_eee(struct net_device *dev,
edata->eee_enabled = priv->eee_enabled;
edata->eee_active = priv->eee_active;
edata->tx_lpi_timer = priv->tx_lpi_timer;
+   edata->tx_lpi_enabled = priv->tx_lpi_enabled;
 
return phylink_ethtool_get_eee(priv->phylink, edata);
 }
@@ -665,6 +666,10 @@ static int stmmac_ethtool_op_set_eee(struct net_device 
*dev,
if (!priv->dma_cap.eee)
return -EOPNOTSUPP;
 
+   if (priv->tx_lpi_enabled != edata->tx_lpi_enabled)
+   netdev_warn(priv->dev,
+   "Setting EEE tx-lpi is not supported\n");
+
if (!edata->eee_enabled)
stmmac_disable_eee_mode(priv);
 
@@ -672,7 +677,12 @@ static int stmmac_ethtool_op_set_eee(struct net_device 
*dev,
if (ret)
return ret;
 
-   priv->tx_lpi_timer = edata->tx_lpi_timer;
+   if (edata->eee_enabled &&
+   priv->tx_lpi_timer != edata->tx_lpi_timer) {
+   priv->tx_lpi_timer = edata->tx_lpi_timer;
+   stmmac_eee_init(priv);
+   }
+
return 0;
 }
 
diff --git a/drivers/net/ethernet/stmicro/stmmac/stmmac_main.c 
b/drivers/net/ethernet/stmicro/stmmac/stmmac_main.c
index 73677c3b33b65..73465e5f5a417 100644
--- a/drivers/net/ethernet/stmicro/stmmac/stmmac_main.c
+++ b/drivers/net/ethernet/stmicro/stmmac/stmmac_main.c
@@ -94,7 +94,7 @@ static const u32 default_msg_level = (NETIF_MSG_DRV | 
NETIF_MSG_PROBE |
 static int eee_timer = STMMAC_DEFAULT_LPI_TIMER;
 module_param(eee_timer, int, 0644);
 MODULE_PARM_DESC(eee_timer, "LPI tx expiration time in msec");
-#define STMMAC_LPI_T(x) (jiffies + msecs_to_jiffies(x))
+#define STMMAC_LPI_T(x) (jiffies + usecs_to_jiffies(x))
 
 /* By default the driver will use the ring mode to manage tx and rx 
descriptors,
  * but allow user to force to use the chain instead of the ring
@@ -370,7 +370,7 @@ static void stmmac_eee_ctrl_timer(struct timer_list *t)
struct stmmac_priv *priv = from_timer(priv, t, eee_ctrl_timer);
 
stmmac_enable_eee_mode(priv);
-   mod_timer(>eee_ctrl_timer, STMMAC_LPI_T(eee_timer));
+   mod_timer(>eee_ctrl_timer, STMMAC_LPI_T(priv->tx_lpi_timer));
 }
 
 /**
@@ -383,7 +383,7 @@ static void stmmac_eee_ctrl_timer(struct timer_list *t)
  */
 bool stmmac_eee_init(struct stmmac_priv *priv)
 {
-   int tx_lpi_timer = priv->tx_lpi_timer;
+   int eee_tw_timer = priv->eee_tw_timer;
 
/* Using PCS we cannot dial with the phy registers at this stage
 * so we do not support extra feature like EEE.
@@ -403,7 +403,7 @@ bool stmmac_eee_init(struct stmmac_priv *priv)
if (priv->eee_enabled) {
netdev_dbg(priv->dev, "disable EEE\n");
del_timer_sync(>eee_ctrl_timer);
-   

[PATCH 5.8 102/124] rxrpc: The server keyring isnt network-namespaced

2020-10-12 Thread Greg Kroah-Hartman
From: David Howells 

[ Upstream commit fea99111244bae44e7d82a973744d27ea1567814 ]

The keyring containing the server's tokens isn't network-namespaced, so it
shouldn't be looked up with a network namespace.  It is expected to be
owned specifically by the server, so namespacing is unnecessary.

Fixes: a58946c158a0 ("keys: Pass the network namespace into request_key 
mechanism")
Signed-off-by: David Howells 
Signed-off-by: Sasha Levin 
---
 net/rxrpc/key.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/net/rxrpc/key.c b/net/rxrpc/key.c
index 32f46edcf7c67..64cbbd2f16944 100644
--- a/net/rxrpc/key.c
+++ b/net/rxrpc/key.c
@@ -941,7 +941,7 @@ int rxrpc_server_keyring(struct rxrpc_sock *rx, char __user 
*optval,
if (IS_ERR(description))
return PTR_ERR(description);
 
-   key = request_key_net(_type_keyring, description, 
sock_net(>sk), NULL);
+   key = request_key(_type_keyring, description, NULL);
if (IS_ERR(key)) {
kfree(description);
_leave(" = %ld", PTR_ERR(key));
-- 
2.25.1





[PATCH 5.8 092/124] net/mlx5e: Fix VLAN create flow

2020-10-12 Thread Greg Kroah-Hartman
From: Aya Levin 

[ Upstream commit d4a16052bccdd695982f89d815ca075825115821 ]

When interface is attached while in promiscuous mode and with VLAN
filtering turned off, both configurations are not respected and VLAN
filtering is performed.
There are 2 flows which add the any-vid rules during interface attach:
VLAN creation table and set rx mode. Each is relaying on the other to
add any-vid rules, eventually non of them does.

Fix this by adding any-vid rules on VLAN creation regardless of
promiscuous mode.

Fixes: 9df30601c843 ("net/mlx5e: Restore vlan filter after seamless reset")
Signed-off-by: Aya Levin 
Reviewed-by: Moshe Shemesh 
Signed-off-by: Saeed Mahameed 
Signed-off-by: Sasha Levin 
---
 drivers/net/ethernet/mellanox/mlx5/core/en_fs.c | 6 --
 1 file changed, 4 insertions(+), 2 deletions(-)

diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_fs.c 
b/drivers/net/ethernet/mellanox/mlx5/core/en_fs.c
index c5be0cdfaf0fa..713dc210f710c 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/en_fs.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/en_fs.c
@@ -217,6 +217,9 @@ static int __mlx5e_add_vlan_rule(struct mlx5e_priv *priv,
break;
}
 
+   if (WARN_ONCE(*rule_p, "VLAN rule already exists type %d", rule_type))
+   return 0;
+
*rule_p = mlx5_add_flow_rules(ft, spec, _act, , 1);
 
if (IS_ERR(*rule_p)) {
@@ -397,8 +400,7 @@ static void mlx5e_add_vlan_rules(struct mlx5e_priv *priv)
for_each_set_bit(i, priv->fs.vlan.active_svlans, VLAN_N_VID)
mlx5e_add_vlan_rule(priv, MLX5E_VLAN_RULE_TYPE_MATCH_STAG_VID, 
i);
 
-   if (priv->fs.vlan.cvlan_filter_disabled &&
-   !(priv->netdev->flags & IFF_PROMISC))
+   if (priv->fs.vlan.cvlan_filter_disabled)
mlx5e_add_any_vid_rules(priv);
 }
 
-- 
2.25.1





[PATCH 5.8 022/124] bpf: Prevent .BTF section elimination

2020-10-12 Thread Greg Kroah-Hartman
From: Tony Ambardar 

commit 65c204398928f9c79f1a29912b410439f7052635 upstream.

Systems with memory or disk constraints often reduce the kernel footprint
by configuring LD_DEAD_CODE_DATA_ELIMINATION. However, this can result in
removal of any BTF information.

Use the KEEP() macro to preserve the BTF data as done with other important
sections, while still allowing for smaller kernels.

Fixes: 90ceddcb4950 ("bpf: Support llvm-objcopy for vmlinux BTF")
Signed-off-by: Tony Ambardar 
Signed-off-by: Daniel Borkmann 
Acked-by: John Fastabend 
Acked-by: Andrii Nakryiko 
Link: 
https://lore.kernel.org/bpf/a635b5d3e2da044e7b51ec1315e8910fbce0083f.1600417359.git.tony.ambar...@gmail.com
Signed-off-by: Greg Kroah-Hartman 

---
 include/asm-generic/vmlinux.lds.h |2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

--- a/include/asm-generic/vmlinux.lds.h
+++ b/include/asm-generic/vmlinux.lds.h
@@ -641,7 +641,7 @@
 #define BTF\
.BTF : AT(ADDR(.BTF) - LOAD_OFFSET) {   \
__start_BTF = .;\
-   *(.BTF) \
+   KEEP(*(.BTF))   \
__stop_BTF = .; \
}
 #else




[PATCH 5.8 083/124] pipe: Fix memory leaks in create_pipe_files()

2020-10-12 Thread Greg Kroah-Hartman
From: Qian Cai 

[ Upstream commit 8a018eb55e3ac033592afbcb476b0ffe64465b12 ]

Calling pipe2() with O_NOTIFICATION_PIPE could results in memory
leaks unless watch_queue_init() is successful.

In case of watch_queue_init() failure in pipe2() we are left
with inode and pipe_inode_info instances that need to be freed.  That
failure exit has been introduced in commit c73be61cede5 ("pipe: Add
general notification queue support") and its handling should've been
identical to nearby treatment of alloc_file_pseudo() failures - it
is dealing with the same situation.  As it is, the mainline kernel
leaks in that case.

Another problem is that CONFIG_WATCH_QUEUE and !CONFIG_WATCH_QUEUE
cases are treated differently (and the former leaks just pipe_inode_info,
the latter - both pipe_inode_info and inode).

Fixed by providing a dummy wacth_queue_init() in !CONFIG_WATCH_QUEUE
case and by having failures of wacth_queue_init() handled the same way
we handle alloc_file_pseudo() ones.

Fixes: c73be61cede5 ("pipe: Add general notification queue support")
Signed-off-by: Qian Cai 
Signed-off-by: Al Viro 
Signed-off-by: Sasha Levin 
---
 fs/pipe.c   | 11 +--
 include/linux/watch_queue.h |  6 ++
 2 files changed, 11 insertions(+), 6 deletions(-)

diff --git a/fs/pipe.c b/fs/pipe.c
index 117db82b10af5..0ac197658a2d6 100644
--- a/fs/pipe.c
+++ b/fs/pipe.c
@@ -894,19 +894,18 @@ int create_pipe_files(struct file **res, int flags)
 {
struct inode *inode = get_pipe_inode();
struct file *f;
+   int error;
 
if (!inode)
return -ENFILE;
 
if (flags & O_NOTIFICATION_PIPE) {
-#ifdef CONFIG_WATCH_QUEUE
-   if (watch_queue_init(inode->i_pipe) < 0) {
+   error = watch_queue_init(inode->i_pipe);
+   if (error) {
+   free_pipe_info(inode->i_pipe);
iput(inode);
-   return -ENOMEM;
+   return error;
}
-#else
-   return -ENOPKG;
-#endif
}
 
f = alloc_file_pseudo(inode, pipe_mnt, "",
diff --git a/include/linux/watch_queue.h b/include/linux/watch_queue.h
index 5e08db2adc319..c994d1b2cdbaa 100644
--- a/include/linux/watch_queue.h
+++ b/include/linux/watch_queue.h
@@ -122,6 +122,12 @@ static inline void remove_watch_list(struct watch_list 
*wlist, u64 id)
  */
 #define watch_sizeof(STRUCT) (sizeof(STRUCT) << WATCH_INFO_LENGTH__SHIFT)
 
+#else
+static inline int watch_queue_init(struct pipe_inode_info *pipe)
+{
+   return -ENOPKG;
+}
+
 #endif
 
 #endif /* _LINUX_WATCH_QUEUE_H */
-- 
2.25.1





[PATCH 5.8 096/124] vhost-vdpa: fix vhost_vdpa_map() on error condition

2020-10-12 Thread Greg Kroah-Hartman
From: Si-Wei Liu 

[ Upstream commit 1477c8aebb94a1db398c12d929a9d27bbd678d8c ]

vhost_vdpa_map() should remove the iotlb entry just added
if the corresponding mapping fails to set up properly.

Fixes: 4c8cf31885f6 ("vhost: introduce vDPA-based backend")
Signed-off-by: Si-Wei Liu 
Link: 
https://lore.kernel.org/r/1601701330-16837-2-git-send-email-si-wei@oracle.com
Signed-off-by: Michael S. Tsirkin 
Signed-off-by: Sasha Levin 
---
 drivers/vhost/vdpa.c | 3 +++
 1 file changed, 3 insertions(+)

diff --git a/drivers/vhost/vdpa.c b/drivers/vhost/vdpa.c
index a54b60d6623f0..5259f5210b375 100644
--- a/drivers/vhost/vdpa.c
+++ b/drivers/vhost/vdpa.c
@@ -527,6 +527,9 @@ static int vhost_vdpa_map(struct vhost_vdpa *v,
r = iommu_map(v->domain, iova, pa, size,
  perm_to_iommu_flags(perm));
 
+   if (r)
+   vhost_iotlb_del_range(dev->iotlb, iova, iova + size - 1);
+
return r;
 }
 
-- 
2.25.1





[PATCH 5.8 101/124] rxrpc: Fix some missing _bh annotations on locking conn->state_lock

2020-10-12 Thread Greg Kroah-Hartman
From: David Howells 

[ Upstream commit fa1d113a0f96f9ab7e4fe4f8825753ba1e34a9d3 ]

conn->state_lock may be taken in softirq mode, but a previous patch
replaced an outer lock in the response-packet event handling code, and lost
the _bh from that when doing so.

Fix this by applying the _bh annotation to the state_lock locking.

Fixes: a1399f8bb033 ("rxrpc: Call channels should have separate call number 
spaces")
Signed-off-by: David Howells 
Signed-off-by: Sasha Levin 
---
 net/rxrpc/conn_event.c | 6 +++---
 1 file changed, 3 insertions(+), 3 deletions(-)

diff --git a/net/rxrpc/conn_event.c b/net/rxrpc/conn_event.c
index 447f55ca68860..6e972b4823efa 100644
--- a/net/rxrpc/conn_event.c
+++ b/net/rxrpc/conn_event.c
@@ -340,18 +340,18 @@ static int rxrpc_process_event(struct rxrpc_connection 
*conn,
return ret;
 
spin_lock(>channel_lock);
-   spin_lock(>state_lock);
+   spin_lock_bh(>state_lock);
 
if (conn->state == RXRPC_CONN_SERVICE_CHALLENGING) {
conn->state = RXRPC_CONN_SERVICE;
-   spin_unlock(>state_lock);
+   spin_unlock_bh(>state_lock);
for (loop = 0; loop < RXRPC_MAXCALLS; loop++)
rxrpc_call_is_secure(
rcu_dereference_protected(
conn->channels[loop].call,

lockdep_is_held(>channel_lock)));
} else {
-   spin_unlock(>state_lock);
+   spin_unlock_bh(>state_lock);
}
 
spin_unlock(>channel_lock);
-- 
2.25.1





[PATCH 5.8 049/124] drm/amd/display: fix return value check for hdcp_work

2020-10-12 Thread Greg Kroah-Hartman
From: Flora Cui 

[ Upstream commit 898c7302f4de1d91065e80fc46552b3ec70894ff ]

max_caps might be 0, thus hdcp_work might be ZERO_SIZE_PTR

Signed-off-by: Flora Cui 
Reviewed-by: Feifei Xu 
Signed-off-by: Alex Deucher 
Signed-off-by: Sasha Levin 
---
 drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_hdcp.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_hdcp.c 
b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_hdcp.c
index 949d10ef83040..6dd1f3f8d9903 100644
--- a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_hdcp.c
+++ b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_hdcp.c
@@ -568,7 +568,7 @@ struct hdcp_workqueue *hdcp_create_workqueue(struct 
amdgpu_device *adev, struct
int i = 0;
 
hdcp_work = kcalloc(max_caps, sizeof(*hdcp_work), GFP_KERNEL);
-   if (hdcp_work == NULL)
+   if (ZERO_OR_NULL_PTR(hdcp_work))
return NULL;
 
hdcp_work->srm = kcalloc(PSP_HDCP_SRM_FIRST_GEN_MAX_SIZE, 
sizeof(*hdcp_work->srm), GFP_KERNEL);
-- 
2.25.1





[PATCH 5.8 058/124] net: stmmac: removed enabling eee in EEE set callback

2020-10-12 Thread Greg Kroah-Hartman
From: Voon Weifeng 

[ Upstream commit 7241c5a697479c7d0c5a96595822cdab750d41ae ]

EEE should be only be enabled during stmmac_mac_link_up() when the
link are up and being set up properly. set_eee should only do settings
configuration and disabling the eee.

Without this fix, turning on EEE using ethtool will return
"Operation not supported". This is due to the driver is in a dead loop
waiting for eee to be advertised in the for eee to be activated but the
driver will only configure the EEE advertisement after the eee is
activated.

Ethtool should only return "Operation not supported" if there is no EEE
capbility in the MAC controller.

Fixes: 8a7493e58ad6 ("net: stmmac: Fix a race in EEE enable callback")
Signed-off-by: Voon Weifeng 
Acked-by: Mark Gross 
Signed-off-by: David S. Miller 
Signed-off-by: Sasha Levin 
---
 .../net/ethernet/stmicro/stmmac/stmmac_ethtool.c  | 15 ---
 1 file changed, 4 insertions(+), 11 deletions(-)

diff --git a/drivers/net/ethernet/stmicro/stmmac/stmmac_ethtool.c 
b/drivers/net/ethernet/stmicro/stmmac/stmmac_ethtool.c
index eae11c5850251..c16d0cc3e9c44 100644
--- a/drivers/net/ethernet/stmicro/stmmac/stmmac_ethtool.c
+++ b/drivers/net/ethernet/stmicro/stmmac/stmmac_ethtool.c
@@ -662,23 +662,16 @@ static int stmmac_ethtool_op_set_eee(struct net_device 
*dev,
struct stmmac_priv *priv = netdev_priv(dev);
int ret;
 
-   if (!edata->eee_enabled) {
+   if (!priv->dma_cap.eee)
+   return -EOPNOTSUPP;
+
+   if (!edata->eee_enabled)
stmmac_disable_eee_mode(priv);
-   } else {
-   /* We are asking for enabling the EEE but it is safe
-* to verify all by invoking the eee_init function.
-* In case of failure it will return an error.
-*/
-   edata->eee_enabled = stmmac_eee_init(priv);
-   if (!edata->eee_enabled)
-   return -EOPNOTSUPP;
-   }
 
ret = phylink_ethtool_set_eee(priv->phylink, edata);
if (ret)
return ret;
 
-   priv->eee_enabled = edata->eee_enabled;
priv->tx_lpi_timer = edata->tx_lpi_timer;
return 0;
 }
-- 
2.25.1





[PATCH 5.8 099/124] rxrpc: Fix rxkad token xdr encoding

2020-10-12 Thread Greg Kroah-Hartman
From: Marc Dionne 

[ Upstream commit 56305118e05b2db8d0395bba640ac9a3aee92624 ]

The session key should be encoded with just the 8 data bytes and
no length; ENCODE_DATA precedes it with a 4 byte length, which
confuses some existing tools that try to parse this format.

Add an ENCODE_BYTES macro that does not include a length, and use
it for the key.  Also adjust the expected length.

Note that commit 774521f353e1d ("rxrpc: Fix an assertion in
rxrpc_read()") had fixed a BUG by changing the length rather than
fixing the encoding.  The original length was correct.

Fixes: 99455153d067 ("RxRPC: Parse security index 5 keys (Kerberos 5)")
Signed-off-by: Marc Dionne 
Signed-off-by: David Howells 
Signed-off-by: Sasha Levin 
---
 net/rxrpc/key.c | 12 ++--
 1 file changed, 10 insertions(+), 2 deletions(-)

diff --git a/net/rxrpc/key.c b/net/rxrpc/key.c
index 0c98313dd7a8c..d77e89766406a 100644
--- a/net/rxrpc/key.c
+++ b/net/rxrpc/key.c
@@ -1073,7 +1073,7 @@ static long rxrpc_read(const struct key *key,
 
switch (token->security_index) {
case RXRPC_SECURITY_RXKAD:
-   toksize += 9 * 4;   /* viceid, kvno, key*2 + len, 
begin,
+   toksize += 8 * 4;   /* viceid, kvno, key*2, begin,
 * end, primary, tktlen */
toksize += RND(token->kad->ticket_len);
break;
@@ -1139,6 +1139,14 @@ static long rxrpc_read(const struct key *key,
memcpy((u8 *)xdr + _l, , 4 - (_l & 3));\
xdr += (_l + 3) >> 2;   \
} while(0)
+#define ENCODE_BYTES(l, s) \
+   do {\
+   u32 _l = (l);   \
+   memcpy(xdr, (s), _l);   \
+   if (_l & 3) \
+   memcpy((u8 *)xdr + _l, , 4 - (_l & 3));\
+   xdr += (_l + 3) >> 2;   \
+   } while(0)
 #define ENCODE64(x)\
do {\
__be64 y = cpu_to_be64(x);  \
@@ -1166,7 +1174,7 @@ static long rxrpc_read(const struct key *key,
case RXRPC_SECURITY_RXKAD:
ENCODE(token->kad->vice_id);
ENCODE(token->kad->kvno);
-   ENCODE_DATA(8, token->kad->session_key);
+   ENCODE_BYTES(8, token->kad->session_key);
ENCODE(token->kad->start);
ENCODE(token->kad->expiry);
ENCODE(token->kad->primary_flag);
-- 
2.25.1





[PATCH 5.8 093/124] net/mlx5e: Fix race condition on nhe->n pointer in neigh update

2020-10-12 Thread Greg Kroah-Hartman
From: Vlad Buslov 

[ Upstream commit 1253935ad801485270194d5651acab04abc97b36 ]

Current neigh update event handler implementation takes reference to
neighbour structure, assigns it to nhe->n, tries to schedule workqueue task
and releases the reference if task was already enqueued. This results
potentially overwriting existing nhe->n pointer with another neighbour
instance, which causes double release of the instance (once in neigh update
handler that failed to enqueue to workqueue and another one in neigh update
workqueue task that processes updated nhe->n pointer instead of original
one):

[ 3376.512806] [ cut here ]
[ 3376.513534] refcount_t: underflow; use-after-free.
[ 3376.521213] Modules linked in: act_skbedit act_mirred act_tunnel_key vxlan 
ip6_udp_tunnel udp_tunnel nfnetlink act_gact cls_flower sch_ingress openvswitch 
nsh nf_conncount nf_nat nf_conntrack nf_defrag_ipv6 nf_defrag_ipv4 mlx5_ib 
mlx5_core mlxfw pci_hyperv_intf ptp pps_core nfsv3 nfs_acl rpcsec_gss_krb5 
auth_rpcgss nfsv4 dns_resolver nfs lockd
 grace fscache ib_isert iscsi_target_mod ib_srpt target_core_mod ib_srp rpcrdma 
rdma_ucm ib_umad ib_ipoib ib_iser rdma_cm ib_cm iw_cm rfkill ib_uverbs ib_core 
sunrpc kvm_intel kvm iTCO_wdt iTCO_vendor_support virtio_net irqbypass 
net_failover crc32_pclmul lpc_ich i2c_i801 failover pcspkr i2c_smbus mfd_core 
ghash_clmulni_intel sch_fq_codel drm i2c
_core ip_tables crc32c_intel serio_raw [last unloaded: mlxfw]
[ 3376.529468] CPU: 8 PID: 22756 Comm: kworker/u20:5 Not tainted 5.9.0-rc5+ #6
[ 3376.530399] Hardware name: QEMU Standard PC (Q35 + ICH9, 2009), BIOS 
rel-1.12.1-0-ga5cab58e9a3f-prebuilt.qemu.org 04/01/2014
[ 3376.531975] Workqueue: mlx5e mlx5e_rep_neigh_update [mlx5_core]
[ 3376.532820] RIP: 0010:refcount_warn_saturate+0xd8/0xe0
[ 3376.533589] Code: ff 48 c7 c7 e0 b8 27 82 c6 05 0b b6 09 01 01 e8 94 93 c1 
ff 0f 0b c3 48 c7 c7 88 b8 27 82 c6 05 f7 b5 09 01 01 e8 7e 93 c1 ff <0f> 0b c3 
0f 1f 44 00 00 8b 07 3d 00 00 00 c0 74 12 83 f8 01 74 13
[ 3376.536017] RSP: 0018:c90002a97e30 EFLAGS: 00010286
[ 3376.536793] RAX:  RBX: 8882de30d648 RCX: 
[ 3376.537718] RDX: 8882f5c28f20 RSI: 8882f5c18e40 RDI: 8882f5c18e40
[ 3376.538654] RBP: 8882cdf56c00 R08: c580 R09: 1a4d
[ 3376.539582] R10: 0731 R11: c90002a97ccd R12: 
[ 3376.540519] R13: 8882de30d600 R14: 8882de30d640 R15: 88821e000900
[ 3376.541444] FS:  () GS:8882f5c0() 
knlGS:
[ 3376.542732] CS:  0010 DS:  ES:  CR0: 80050033
[ 3376.543545] CR2: 556e5504b248 CR3: 0002c6f10005 CR4: 00770ee0
[ 3376.544483] DR0:  DR1:  DR2: 
[ 3376.545419] DR3:  DR6: fffe0ff0 DR7: 0400
[ 3376.546344] PKRU: 5554
[ 3376.546911] Call Trace:
[ 3376.547479]  mlx5e_rep_neigh_update.cold+0x33/0xe2 [mlx5_core]
[ 3376.548299]  process_one_work+0x1d8/0x390
[ 3376.548977]  worker_thread+0x4d/0x3e0
[ 3376.549631]  ? rescuer_thread+0x3e0/0x3e0
[ 3376.550295]  kthread+0x118/0x130
[ 3376.550914]  ? kthread_create_worker_on_cpu+0x70/0x70
[ 3376.551675]  ret_from_fork+0x1f/0x30
[ 3376.552312] ---[ end trace d84e8f46d2a77eec ]---

Fix the bug by moving work_struct to dedicated dynamically-allocated
structure. This enabled every event handler to work on its own private
neighbour pointer and removes the need for handling the case when task is
already enqueued.

Fixes: 232c001398ae ("net/mlx5e: Add support to neighbour update flow")
Signed-off-by: Vlad Buslov 
Reviewed-by: Roi Dayan 
Signed-off-by: Saeed Mahameed 
Signed-off-by: Sasha Levin 
---
 .../mellanox/mlx5/core/en/rep/neigh.c | 81 ---
 .../net/ethernet/mellanox/mlx5/core/en_rep.h  |  6 --
 2 files changed, 50 insertions(+), 37 deletions(-)

diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en/rep/neigh.c 
b/drivers/net/ethernet/mellanox/mlx5/core/en/rep/neigh.c
index c3d167fa944c7..6a9d783d129b2 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/en/rep/neigh.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/en/rep/neigh.c
@@ -109,11 +109,25 @@ static void mlx5e_rep_neigh_stats_work(struct work_struct 
*work)
rtnl_unlock();
 }
 
+struct neigh_update_work {
+   struct work_struct work;
+   struct neighbour *n;
+   struct mlx5e_neigh_hash_entry *nhe;
+};
+
+static void mlx5e_release_neigh_update_work(struct neigh_update_work 
*update_work)
+{
+   neigh_release(update_work->n);
+   mlx5e_rep_neigh_entry_release(update_work->nhe);
+   kfree(update_work);
+}
+
 static void mlx5e_rep_neigh_update(struct work_struct *work)
 {
-   struct mlx5e_neigh_hash_entry *nhe =
-   container_of(work, struct mlx5e_neigh_hash_entry, 
neigh_update_work);
-   struct neighbour *n = nhe->n;
+   struct neigh_update_work *update_work = container_of(work, 

[PATCH 5.8 100/124] rxrpc: Downgrade the BUG() for unsupported token type in rxrpc_read()

2020-10-12 Thread Greg Kroah-Hartman
From: David Howells 

[ Upstream commit 9a059cd5ca7d9c5c4ca5a6e755cf72f230176b6a ]

If rxrpc_read() (which allows KEYCTL_READ to read a key), sees a token of a
type it doesn't recognise, it can BUG in a couple of places, which is
unnecessary as it can easily get back to userspace.

Fix this to print an error message instead.

Fixes: 99455153d067 ("RxRPC: Parse security index 5 keys (Kerberos 5)")
Signed-off-by: David Howells 
Signed-off-by: Sasha Levin 
---
 net/rxrpc/key.c | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/net/rxrpc/key.c b/net/rxrpc/key.c
index d77e89766406a..32f46edcf7c67 100644
--- a/net/rxrpc/key.c
+++ b/net/rxrpc/key.c
@@ -1108,7 +1108,8 @@ static long rxrpc_read(const struct key *key,
break;
 
default: /* we have a ticket we can't encode */
-   BUG();
+   pr_err("Unsupported key token type (%u)\n",
+  token->security_index);
continue;
}
 
@@ -1224,7 +1225,6 @@ static long rxrpc_read(const struct key *key,
break;
 
default:
-   BUG();
break;
}
 
-- 
2.25.1





Re: [linux-safety] [PATCH] e1000: drop unneeded assignment in e1000_set_itr()

2020-10-12 Thread Lukas Bulwahn



On Sun, 11 Oct 2020, Sudip Mukherjee wrote:

> The variable 'current_itr' is assigned to 0 before jumping to
> 'set_itr_now' but it has not been used after the jump. So, remove the
> unneeded assignement.
> 
> Signed-off-by: Sudip Mukherjee 
> ---
>  drivers/net/ethernet/intel/e1000/e1000_main.c | 1 -
>  1 file changed, 1 deletion(-)
> 
> diff --git a/drivers/net/ethernet/intel/e1000/e1000_main.c 
> b/drivers/net/ethernet/intel/e1000/e1000_main.c
> index 5e28cf4fa2cd..042de276e632 100644
> --- a/drivers/net/ethernet/intel/e1000/e1000_main.c
> +++ b/drivers/net/ethernet/intel/e1000/e1000_main.c
> @@ -2632,7 +2632,6 @@ static void e1000_set_itr(struct e1000_adapter *adapter)
>  
>   /* for non-gigabit speeds, just fix the interrupt rate at 4000 */
>   if (unlikely(adapter->link_speed != SPEED_1000)) {
> - current_itr = 0;
>   new_itr = 4000;
>   goto set_itr_now;
>   }

Alternatively, you could just inline the max(...) into the switch and 
completely drop the current_itr definition.

But your solution probably does the job: it is a "No functional change" 
commit.

Reviewed-by: Lukas Bulwahn 


Lukas

> -- 
> 2.11.0
> 
> 
> 
> -=-=-=-=-=-=-=-=-=-=-=-
> Links: You receive all messages sent to this group.
> View/Reply Online (#77): https://lists.elisa.tech/g/linux-safety/message/77
> Mute This Topic: https://lists.elisa.tech/mt/77448709/1714638
> Group Owner: linux-safety+ow...@lists.elisa.tech
> Unsubscribe: https://lists.elisa.tech/g/linux-safety/unsub 
> [lukas.bulw...@gmail.com]
> -=-=-=-=-=-=-=-=-=-=-=-
> 
> 
> 


[PATCH 5.8 057/124] xsk: Do not discard packet when NETDEV_TX_BUSY

2020-10-12 Thread Greg Kroah-Hartman
From: Magnus Karlsson 

[ Upstream commit 642e450b6b5955f2059d0ae372183f7c6323f951 ]

In the skb Tx path, transmission of a packet is performed with
dev_direct_xmit(). When NETDEV_TX_BUSY is set in the drivers, it
signifies that it was not possible to send the packet right now,
please try later. Unfortunately, the xsk transmit code discarded the
packet and returned EBUSY to the application. Fix this unnecessary
packet loss, by not discarding the packet in the Tx ring and return
EAGAIN. As EAGAIN is returned to the application, it can then retry
the send operation later and the packet will then likely be sent as
the driver will then likely have space/resources to send the packet.

In summary, EAGAIN tells the application that the packet was not
discarded from the Tx ring and that it needs to call send()
again. EBUSY, on the other hand, signifies that the packet was not
sent and discarded from the Tx ring. The application needs to put
the packet on the Tx ring again if it wants it to be sent.

Fixes: 35fcde7f8deb ("xsk: support for Tx")
Reported-by: Arkadiusz Zema 
Suggested-by: Arkadiusz Zema 
Suggested-by: Daniel Borkmann 
Signed-off-by: Magnus Karlsson 
Signed-off-by: Daniel Borkmann 
Reviewed-by: Jesse Brandeburg 
Link: 
https://lore.kernel.org/bpf/1600257625-2353-1-git-send-email-magnus.karls...@gmail.com
Signed-off-by: Sasha Levin 
---
 net/xdp/xsk.c | 17 -
 1 file changed, 16 insertions(+), 1 deletion(-)

diff --git a/net/xdp/xsk.c b/net/xdp/xsk.c
index 3700266229f63..dcce888b8ef54 100644
--- a/net/xdp/xsk.c
+++ b/net/xdp/xsk.c
@@ -375,15 +375,30 @@ static int xsk_generic_xmit(struct sock *sk)
skb_shinfo(skb)->destructor_arg = (void *)(long)desc.addr;
skb->destructor = xsk_destruct_skb;
 
+   /* Hinder dev_direct_xmit from freeing the packet and
+* therefore completing it in the destructor
+*/
+   refcount_inc(>users);
err = dev_direct_xmit(skb, xs->queue_id);
+   if  (err == NETDEV_TX_BUSY) {
+   /* Tell user-space to retry the send */
+   skb->destructor = sock_wfree;
+   /* Free skb without triggering the perf drop trace */
+   consume_skb(skb);
+   err = -EAGAIN;
+   goto out;
+   }
+
xskq_cons_release(xs->tx);
/* Ignore NET_XMIT_CN as packet might have been sent */
-   if (err == NET_XMIT_DROP || err == NETDEV_TX_BUSY) {
+   if (err == NET_XMIT_DROP) {
/* SKB completed but not sent */
+   kfree_skb(skb);
err = -EBUSY;
goto out;
}
 
+   consume_skb(skb);
sent_frame = true;
}
 
-- 
2.25.1





Re: [PATCH v1 1/2] ASoC: qcom: dt-bindings: Modify sc7180 machine bindings

2020-10-12 Thread Mark Brown
On Sat, Oct 10, 2020 at 12:07:54AM +0800, Ajye Huang wrote:
> On Fri, Oct 9, 2020 at 9:52 PM Mark Brown  wrote:
> > On Mon, Sep 28, 2020 at 02:37:43PM +0800, Ajye Huang wrote:

> > > Add compatible "qcom,sc7180-sndcard-rt5682-m98357-2mic"
> > > for 2mic case.

> > This doesn't apply against current code, please check and resend.

> Thank you for your reply,
> This patch depends on  Cheng-Yi's patch series
> https://patchwork.kernel.org/patch/11773221/.

That's "ASoC: qcom: dt-bindings: Add sc7180 machine bindings" for those
playing at home.

>   If I misunderstand what you mean, please correct me,

A version of some SC7180 patches was applied.  However it does seem like
that didn't include any machine driver bindings so it must've been a
different, similar looking series which is presumably waiting for a new
version - please resend based on that new version (ideally these patches
could be picked up as part of that series).

Please include human readable descriptions of things like commits and
issues being discussed in e-mail in your mails, this makes them much
easier for humans to read especially when they have no internet access.
I do frequently catch up on my mail on flights or while otherwise
travelling so this is even more pressing for me than just being about
making things a bit easier to read.


signature.asc
Description: PGP signature


Re: [PATCH v6 1/4] rcu/tree: Make rcu_do_batch count how many callbacks were executed

2020-10-12 Thread Paul E. McKenney
On Sun, Oct 11, 2020 at 09:35:37AM -0700, Joel Fernandes wrote:
> On Fri, Oct 9, 2020 at 4:14 PM Frederic Weisbecker  
> wrote:
> >
> > On Wed, Sep 23, 2020 at 11:22:08AM -0400, Joel Fernandes (Google) wrote:
> > > Currently, rcu_do_batch() depends on the unsegmented callback list's len 
> > > field
> > > to know how many CBs are executed. This fields counts down from 0 as CBs 
> > > are
> > > dequeued.  It is possible that all CBs could not be run because of 
> > > reaching
> > > limits in which case the remaining unexecuted callbacks are requeued in 
> > > the
> > > CPU's segcblist.
> > >
> > > The number of callbacks that were not requeued are then the negative 
> > > count (how
> > > many CBs were run) stored in the rcl->len which has been counting down on 
> > > every
> > > dequeue. This negative count is then added to the per-cpu segmented 
> > > callback
> > > list's to correct its count.
> > >
> > > Such a design works against future efforts to track the length of each 
> > > segment
> > > of the segmented callback list. The reason is because
> > > rcu_segcblist_extract_done_cbs() will be populating the unsegmented 
> > > callback
> > > list's length field (rcl->len) during extraction.
> > > Also, the design of counting down from 0 is confusing and error-prone 
> > > IMHO.
> >
> > Right :)
> 
> :)
> 
> > > This commit therefore explicitly counts have many callbacks were executed 
> > > in
> >
> > s/have/how
> >
> > > rcu_do_batch() itself, and uses that to update the per-CPU segcb list's 
> > > ->len
> > > field, without relying on the negativity of rcl->len.
> > >
> > > Signed-off-by: Joel Fernandes (Google) 
> >
> > Reviewed-by: Frederic Weisbecker 
> 
> Thanks! Paul would be Ok to make the minor fixup s/have/how/ that
> Frederic pointed?

But of course!  I was waiting until Frederic gets them all reviewed,
with an eye to applying and wordsmithing them as a set.

> - Joel
> (Due to COVID issues at home, I'm intermittently working so advance
> apologies for slow replies.)

And I hope that this is going as well as it possibly can!

Thanx, Paul


[PATCH 5.8 098/124] net: mvneta: fix double free of txq->buf

2020-10-12 Thread Greg Kroah-Hartman
From: Tom Rix 

[ Upstream commit f4544e5361da5050ff5c0330ceea095cb5dbdd72 ]

clang static analysis reports this problem:

drivers/net/ethernet/marvell/mvneta.c:3465:2: warning:
  Attempt to free released memory
kfree(txq->buf);
^~~

When mvneta_txq_sw_init() fails to alloc txq->tso_hdrs,
it frees without poisoning txq->buf.  The error is caught
in the mvneta_setup_txqs() caller which handles the error
by cleaning up all of the txqs with a call to
mvneta_txq_sw_deinit which also frees txq->buf.

Since mvneta_txq_sw_deinit is a general cleaner, all of the
partial cleaning in mvneta_txq_sw_deinit()'s error handling
is not needed.

Fixes: 2adb719d74f6 ("net: mvneta: Implement software TSO")
Signed-off-by: Tom Rix 
Signed-off-by: David S. Miller 
Signed-off-by: Sasha Levin 
---
 drivers/net/ethernet/marvell/mvneta.c | 13 ++---
 1 file changed, 2 insertions(+), 11 deletions(-)

diff --git a/drivers/net/ethernet/marvell/mvneta.c 
b/drivers/net/ethernet/marvell/mvneta.c
index 7d5d9d34f4e47..69a234e83b8b7 100644
--- a/drivers/net/ethernet/marvell/mvneta.c
+++ b/drivers/net/ethernet/marvell/mvneta.c
@@ -3372,24 +3372,15 @@ static int mvneta_txq_sw_init(struct mvneta_port *pp,
txq->last_desc = txq->size - 1;
 
txq->buf = kmalloc_array(txq->size, sizeof(*txq->buf), GFP_KERNEL);
-   if (!txq->buf) {
-   dma_free_coherent(pp->dev->dev.parent,
- txq->size * MVNETA_DESC_ALIGNED_SIZE,
- txq->descs, txq->descs_phys);
+   if (!txq->buf)
return -ENOMEM;
-   }
 
/* Allocate DMA buffers for TSO MAC/IP/TCP headers */
txq->tso_hdrs = dma_alloc_coherent(pp->dev->dev.parent,
   txq->size * TSO_HEADER_SIZE,
   >tso_hdrs_phys, GFP_KERNEL);
-   if (!txq->tso_hdrs) {
-   kfree(txq->buf);
-   dma_free_coherent(pp->dev->dev.parent,
- txq->size * MVNETA_DESC_ALIGNED_SIZE,
- txq->descs, txq->descs_phys);
+   if (!txq->tso_hdrs)
return -ENOMEM;
-   }
 
/* Setup XPS mapping */
if (txq_number > 1)
-- 
2.25.1





[PATCH 5.8 056/124] xfrm: clone whole liftime_cur structure in xfrm_do_migrate

2020-10-12 Thread Greg Kroah-Hartman
From: Antony Antony 

[ Upstream commit 8366685b2883e523f91e9816d7be371eb1144749 ]

When we clone state only add_time was cloned. It missed values like
bytes, packets.  Now clone the all members of the structure.

v1->v3:
 - use memcpy to copy the entire structure

Fixes: 80c9abaabf42 ("[XFRM]: Extension for dynamic update of endpoint 
address(es)")
Signed-off-by: Antony Antony 
Signed-off-by: Steffen Klassert 
Signed-off-by: Sasha Levin 
---
 net/xfrm/xfrm_state.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/net/xfrm/xfrm_state.c b/net/xfrm/xfrm_state.c
index 3a2c1f15d31dd..6b431a3830721 100644
--- a/net/xfrm/xfrm_state.c
+++ b/net/xfrm/xfrm_state.c
@@ -1550,7 +1550,7 @@ static struct xfrm_state *xfrm_state_clone(struct 
xfrm_state *orig,
x->tfcpad = orig->tfcpad;
x->replay_maxdiff = orig->replay_maxdiff;
x->replay_maxage = orig->replay_maxage;
-   x->curlft.add_time = orig->curlft.add_time;
+   memcpy(>curlft, >curlft, sizeof(x->curlft));
x->km.state = orig->km.state;
x->km.seq = orig->km.seq;
x->replay = orig->replay;
-- 
2.25.1





[PATCH 5.8 097/124] vhost-vdpa: fix page pinning leakage in error path

2020-10-12 Thread Greg Kroah-Hartman
From: Si-Wei Liu 

[ Upstream commit 7ed9e3d97c32d969caded2dfb6e67c1a2cc5a0b1 ]

Pinned pages are not properly accounted particularly when
mapping error occurs on IOTLB update. Clean up dangling
pinned pages for the error path. As the inflight pinned
pages, specifically for memory region that strides across
multiple chunks, would need more than one free page for
book keeping and accounting. For simplicity, pin pages
for all memory in the IOVA range in one go rather than
have multiple pin_user_pages calls to make up the entire
region. This way it's easier to track and account the
pages already mapped, particularly for clean-up in the
error path.

Fixes: 4c8cf31885f6 ("vhost: introduce vDPA-based backend")
Signed-off-by: Si-Wei Liu 
Link: 
https://lore.kernel.org/r/1601701330-16837-3-git-send-email-si-wei@oracle.com
Signed-off-by: Michael S. Tsirkin 
Signed-off-by: Sasha Levin 
---
 drivers/vhost/vdpa.c | 119 ++-
 1 file changed, 71 insertions(+), 48 deletions(-)

diff --git a/drivers/vhost/vdpa.c b/drivers/vhost/vdpa.c
index 5259f5210b375..e172c2efc663c 100644
--- a/drivers/vhost/vdpa.c
+++ b/drivers/vhost/vdpa.c
@@ -555,21 +555,19 @@ static int vhost_vdpa_process_iotlb_update(struct 
vhost_vdpa *v,
struct vhost_dev *dev = >vdev;
struct vhost_iotlb *iotlb = dev->iotlb;
struct page **page_list;
-   unsigned long list_size = PAGE_SIZE / sizeof(struct page *);
+   struct vm_area_struct **vmas;
unsigned int gup_flags = FOLL_LONGTERM;
-   unsigned long npages, cur_base, map_pfn, last_pfn = 0;
-   unsigned long locked, lock_limit, pinned, i;
+   unsigned long map_pfn, last_pfn = 0;
+   unsigned long npages, lock_limit;
+   unsigned long i, nmap = 0;
u64 iova = msg->iova;
+   long pinned;
int ret = 0;
 
if (vhost_iotlb_itree_first(iotlb, msg->iova,
msg->iova + msg->size - 1))
return -EEXIST;
 
-   page_list = (struct page **) __get_free_page(GFP_KERNEL);
-   if (!page_list)
-   return -ENOMEM;
-
if (msg->perm & VHOST_ACCESS_WO)
gup_flags |= FOLL_WRITE;
 
@@ -577,61 +575,86 @@ static int vhost_vdpa_process_iotlb_update(struct 
vhost_vdpa *v,
if (!npages)
return -EINVAL;
 
+   page_list = kvmalloc_array(npages, sizeof(struct page *), GFP_KERNEL);
+   vmas = kvmalloc_array(npages, sizeof(struct vm_area_struct *),
+ GFP_KERNEL);
+   if (!page_list || !vmas) {
+   ret = -ENOMEM;
+   goto free;
+   }
+
mmap_read_lock(dev->mm);
 
-   locked = atomic64_add_return(npages, >mm->pinned_vm);
lock_limit = rlimit(RLIMIT_MEMLOCK) >> PAGE_SHIFT;
-
-   if (locked > lock_limit) {
+   if (npages + atomic64_read(>mm->pinned_vm) > lock_limit) {
ret = -ENOMEM;
-   goto out;
+   goto unlock;
}
 
-   cur_base = msg->uaddr & PAGE_MASK;
-   iova &= PAGE_MASK;
+   pinned = pin_user_pages(msg->uaddr & PAGE_MASK, npages, gup_flags,
+   page_list, vmas);
+   if (npages != pinned) {
+   if (pinned < 0) {
+   ret = pinned;
+   } else {
+   unpin_user_pages(page_list, pinned);
+   ret = -ENOMEM;
+   }
+   goto unlock;
+   }
 
-   while (npages) {
-   pinned = min_t(unsigned long, npages, list_size);
-   ret = pin_user_pages(cur_base, pinned,
-gup_flags, page_list, NULL);
-   if (ret != pinned)
-   goto out;
-
-   if (!last_pfn)
-   map_pfn = page_to_pfn(page_list[0]);
-
-   for (i = 0; i < ret; i++) {
-   unsigned long this_pfn = page_to_pfn(page_list[i]);
-   u64 csize;
-
-   if (last_pfn && (this_pfn != last_pfn + 1)) {
-   /* Pin a contiguous chunk of memory */
-   csize = (last_pfn - map_pfn + 1) << PAGE_SHIFT;
-   if (vhost_vdpa_map(v, iova, csize,
-  map_pfn << PAGE_SHIFT,
-  msg->perm))
-   goto out;
-   map_pfn = this_pfn;
-   iova += csize;
+   iova &= PAGE_MASK;
+   map_pfn = page_to_pfn(page_list[0]);
+
+   /* One more iteration to avoid extra vdpa_map() call out of loop. */
+   for (i = 0; i <= npages; i++) {
+   unsigned long this_pfn;
+   u64 csize;
+
+   /* The last chunk may have no valid PFN next to it */
+   this_pfn = i < npages ? page_to_pfn(page_list[i]) : 

[PATCH 5.8 079/124] octeontx2-af: Fix enable/disable of default NPC entries

2020-10-12 Thread Greg Kroah-Hartman
From: Subbaraya Sundeep 

[ Upstream commit e154b5b70368a84a19505a0be9b0096c66562b56 ]

Packet replication feature present in Octeontx2
is a hardware linked list of PF and its VF
interfaces so that broadcast packets are sent
to all interfaces present in the list. It is
driver job to add and delete a PF/VF interface
to/from the list when the interface is brought
up and down. This patch fixes the
npc_enadis_default_entries function to handle
broadcast replication properly if packet replication
feature is present.

Fixes: 40df309e4166 ("octeontx2-af: Support to enable/disable default MCAM 
entries")
Signed-off-by: Subbaraya Sundeep 
Signed-off-by: Geetha sowjanya 
Signed-off-by: Sunil Goutham 
Signed-off-by: David S. Miller 
Signed-off-by: Sasha Levin 
---
 .../net/ethernet/marvell/octeontx2/af/rvu.h   |  3 ++-
 .../ethernet/marvell/octeontx2/af/rvu_nix.c   |  5 ++--
 .../ethernet/marvell/octeontx2/af/rvu_npc.c   | 26 ++-
 3 files changed, 23 insertions(+), 11 deletions(-)

diff --git a/drivers/net/ethernet/marvell/octeontx2/af/rvu.h 
b/drivers/net/ethernet/marvell/octeontx2/af/rvu.h
index dcf25a0920084..b89dde2c8b089 100644
--- a/drivers/net/ethernet/marvell/octeontx2/af/rvu.h
+++ b/drivers/net/ethernet/marvell/octeontx2/af/rvu.h
@@ -463,6 +463,7 @@ void rvu_nix_freemem(struct rvu *rvu);
 int rvu_get_nixlf_count(struct rvu *rvu);
 void rvu_nix_lf_teardown(struct rvu *rvu, u16 pcifunc, int blkaddr, int npalf);
 int nix_get_nixlf(struct rvu *rvu, u16 pcifunc, int *nixlf, int *nix_blkaddr);
+int nix_update_bcast_mce_list(struct rvu *rvu, u16 pcifunc, bool add);
 
 /* NPC APIs */
 int rvu_npc_init(struct rvu *rvu);
@@ -477,7 +478,7 @@ void rvu_npc_disable_promisc_entry(struct rvu *rvu, u16 
pcifunc, int nixlf);
 void rvu_npc_enable_promisc_entry(struct rvu *rvu, u16 pcifunc, int nixlf);
 void rvu_npc_install_bcast_match_entry(struct rvu *rvu, u16 pcifunc,
   int nixlf, u64 chan);
-void rvu_npc_disable_bcast_entry(struct rvu *rvu, u16 pcifunc);
+void rvu_npc_enable_bcast_entry(struct rvu *rvu, u16 pcifunc, bool enable);
 int rvu_npc_update_rxvlan(struct rvu *rvu, u16 pcifunc, int nixlf);
 void rvu_npc_disable_mcam_entries(struct rvu *rvu, u16 pcifunc, int nixlf);
 void rvu_npc_disable_default_entries(struct rvu *rvu, u16 pcifunc, int nixlf);
diff --git a/drivers/net/ethernet/marvell/octeontx2/af/rvu_nix.c 
b/drivers/net/ethernet/marvell/octeontx2/af/rvu_nix.c
index 36953d4f51c73..3495b3a6828c0 100644
--- a/drivers/net/ethernet/marvell/octeontx2/af/rvu_nix.c
+++ b/drivers/net/ethernet/marvell/octeontx2/af/rvu_nix.c
@@ -17,7 +17,6 @@
 #include "npc.h"
 #include "cgx.h"
 
-static int nix_update_bcast_mce_list(struct rvu *rvu, u16 pcifunc, bool add);
 static int rvu_nix_get_bpid(struct rvu *rvu, struct nix_bp_cfg_req *req,
int type, int chan_id);
 
@@ -2020,7 +2019,7 @@ static int nix_update_mce_list(struct nix_mce_list 
*mce_list,
return 0;
 }
 
-static int nix_update_bcast_mce_list(struct rvu *rvu, u16 pcifunc, bool add)
+int nix_update_bcast_mce_list(struct rvu *rvu, u16 pcifunc, bool add)
 {
int err = 0, idx, next_idx, last_idx;
struct nix_mce_list *mce_list;
@@ -2065,7 +2064,7 @@ static int nix_update_bcast_mce_list(struct rvu *rvu, u16 
pcifunc, bool add)
 
/* Disable MCAM entry in NPC */
if (!mce_list->count) {
-   rvu_npc_disable_bcast_entry(rvu, pcifunc);
+   rvu_npc_enable_bcast_entry(rvu, pcifunc, false);
goto end;
}
 
diff --git a/drivers/net/ethernet/marvell/octeontx2/af/rvu_npc.c 
b/drivers/net/ethernet/marvell/octeontx2/af/rvu_npc.c
index 0a214084406a6..fbaf9bcd83f2f 100644
--- a/drivers/net/ethernet/marvell/octeontx2/af/rvu_npc.c
+++ b/drivers/net/ethernet/marvell/octeontx2/af/rvu_npc.c
@@ -530,7 +530,7 @@ void rvu_npc_install_bcast_match_entry(struct rvu *rvu, u16 
pcifunc,
  NIX_INTF_RX, , true);
 }
 
-void rvu_npc_disable_bcast_entry(struct rvu *rvu, u16 pcifunc)
+void rvu_npc_enable_bcast_entry(struct rvu *rvu, u16 pcifunc, bool enable)
 {
struct npc_mcam *mcam = >hw->mcam;
int blkaddr, index;
@@ -543,7 +543,7 @@ void rvu_npc_disable_bcast_entry(struct rvu *rvu, u16 
pcifunc)
pcifunc = pcifunc & ~RVU_PFVF_FUNC_MASK;
 
index = npc_get_nixlf_mcam_index(mcam, pcifunc, 0, NIXLF_BCAST_ENTRY);
-   npc_enable_mcam_entry(rvu, mcam, blkaddr, index, false);
+   npc_enable_mcam_entry(rvu, mcam, blkaddr, index, enable);
 }
 
 void rvu_npc_update_flowkey_alg_idx(struct rvu *rvu, u16 pcifunc, int nixlf,
@@ -622,23 +622,35 @@ static void npc_enadis_default_entries(struct rvu *rvu, 
u16 pcifunc,
 nixlf, NIXLF_UCAST_ENTRY);
npc_enable_mcam_entry(rvu, mcam, blkaddr, index, enable);
 
-   /* For PF, ena/dis promisc and bcast MCAM match entries */
-   if (pcifunc & RVU_PFVF_FUNC_MASK)
+   /* For PF, ena/dis promisc and bcast 

Re: [RFC PATCH] checkpatch: add shebang check to EXECUTE_PERMISSIONS

2020-10-12 Thread Ujjwal Kumar
On 12/10/20 11:47 am, Joe Perches wrote:
> On Mon, 2020-10-12 at 11:19 +0530, Ujjwal Kumar wrote:
>> checkpatch.pl checks for invalid EXECUTE_PERMISSIONS on source
>> files. The script leverages filename extensions and its path in
>> the repository to decide whether to allow execute permissions on
>> the file or not.
>>
>> Based on current check conditions, a perl script file having
>> execute permissions, without '.pl' extension in its filename
>> and not belonging to 'scripts/' directory is reported as ERROR
>> which is a false-positive.
>>
>> Adding a shebang check along with current conditions will make
>> the check more generalised and improve checkpatch reports.
>> To do so, without breaking the core design decision of checkpatch,
>> we can fetch the first line from the patch itself and match it for
>> a shebang pattern.
>>
>> There can be cases where the first line is not part of the patch.
> 
> For instance: a patch that only changes permissions
> without changing any of the file content.
> 
>>
>> In that case there may be a false-positive report but in the end we
>> will have less false-positives as we will be handling some of the
>> unhandled cases.
> 
>> Signed-off-by: Ujjwal Kumar 
>> ---
>> Apologies, I forgot to include linux-kernel@vger.kernel.org so I'm
>> now resending.
>>
>>  scripts/checkpatch.pl | 19 +++
>>  1 file changed, 19 insertions(+)
>>
>> diff --git a/scripts/checkpatch.pl b/scripts/checkpatch.pl
> []
>> @@ -1795,6 +1795,23 @@ sub get_stat_here {
>>  return $herectx;
>>  }
> 
> First some style trivia:
> 
>> +sub get_shebang {
>> +my ($linenr, $realfile) = @_;
>> +my $rawline = "";
>> +my $shebang = "";
>> +
>> +$rawline = raw_line($linenr, 3);
>> +if (defined $rawline &&
>> +$rawline =~ /^\@\@ -\d+(?:,\d+)? \+(\d+)(,(\d+))? \@\@/) {
> 
> alignment to open parenthesis please
> 
>> +if (defined $1 && $1 == 1) {
>> +$shebang = raw_line($linenr, 4);
>> +$shebang = substr $shebang, 1;
> 
> parentheses around substr please.
> 
>> +}
>> +}
>> +
>> +return $shebang;
>> +}
> 
> And some real notes:
> 
> $realfile isn't used in this function so there doesn't
> seem to be a reason to have it as an function argument.
> 
>> +
>>  sub cat_vet {
>>  my ($vet) = @_;
>>  my ($res, $coded);
>> @@ -2680,7 +2697,9 @@ sub process {
>>  # Check for incorrect file permissions
>>  if ($line =~ /^new (file )?mode.*[7531]\d{0,2}$/) {
> 
> probably better here to use a capture group for the permissions
> 
>   if ($line =~ /^new (?:file )?mode (\d+)$/) {
>   my $mode = substr($1, -3);

This

> 
>>  my $permhere = $here . "FILE: $realfile\n";
>> +my $shebang = get_shebang($linenr, $realfile);
>>  if ($realfile !~ m@scripts/@ &&
> 
> Maybe remove the $realfile directory test as
> there are many source files that are not scripts
> in this directory and its subdirectories.

this

> 
>> +$shebang !~ /^#!\s*(\/\w)+.*/ &&
> 
> unnecessary capture group
> 
> and add
> 
>  $mode =~ /[1357]/ &&

this

> 
>>  $realfile !~ /\.(py|pl|awk|sh)$/) {
> 
> No need for a a capture group here either. (existing defect)

and this.

> 
>>  ERROR("EXECUTE_PERMISSIONS",
>>"do not set execute permissions for 
>> source files\n" . $permhere);
> 
> 
> 

Should these new changes go as a separate patch or can they be
included in the next iteration of this patch?



Thanks
Ujjwal Kumar


[PATCH 5.8 078/124] net: phy: realtek: fix rtl8211e rx/tx delay config

2020-10-12 Thread Greg Kroah-Hartman
From: Willy Liu 

[ Upstream commit bbc4d71d63549bcd003a430de18a72a742d8c91e ]

There are two chip pins named TXDLY and RXDLY which actually adds the 2ns
delays to TXC and RXC for TXD/RXD latching. These two pins can config via
4.7k-ohm resistor to 3.3V hw setting, but also config via software setting
(extension page 0xa4 register 0x1c bit13 12 and 11).

The configuration register definitions from table 13 official PHY datasheet:
PHYAD[2:0] = PHY Address
AN[1:0] = Auto-Negotiation
Mode = Interface Mode Select
RX Delay = RX Delay
TX Delay = TX Delay
SELRGV = RGMII/GMII Selection

This table describes how to config these hw pins via external pull-high or pull-
low resistor.

It is a misunderstanding that mapping it as register bits below:
8:6 = PHY Address
5:4 = Auto-Negotiation
3 = Interface Mode Select
2 = RX Delay
1 = TX Delay
0 = SELRGV
So I removed these descriptions above and add related settings as below:
14 = reserved
13 = force Tx RX Delay controlled by bit12 bit11
12 = Tx Delay
11 = Rx Delay
10:0 = Test && debug settings reserved by realtek

Test && debug settings are not recommend to modify by default.

Fixes: f81dadbcf7fd ("net: phy: realtek: Add rtl8211e rx/tx delays config")
Signed-off-by: Willy Liu 
Signed-off-by: David S. Miller 
Signed-off-by: Sasha Levin 
---
 drivers/net/phy/realtek.c | 31 ---
 1 file changed, 16 insertions(+), 15 deletions(-)

diff --git a/drivers/net/phy/realtek.c b/drivers/net/phy/realtek.c
index c7229d022a27b..48ba757046cea 100644
--- a/drivers/net/phy/realtek.c
+++ b/drivers/net/phy/realtek.c
@@ -1,6 +1,5 @@
 // SPDX-License-Identifier: GPL-2.0+
-/*
- * drivers/net/phy/realtek.c
+/* drivers/net/phy/realtek.c
  *
  * Driver for Realtek PHYs
  *
@@ -32,9 +31,9 @@
 #define RTL8211F_TX_DELAY  BIT(8)
 #define RTL8211F_RX_DELAY  BIT(3)
 
-#define RTL8211E_TX_DELAY  BIT(1)
-#define RTL8211E_RX_DELAY  BIT(2)
-#define RTL8211E_MODE_MII_GMII BIT(3)
+#define RTL8211E_CTRL_DELAYBIT(13)
+#define RTL8211E_TX_DELAY  BIT(12)
+#define RTL8211E_RX_DELAY  BIT(11)
 
 #define RTL8201F_ISR   0x1e
 #define RTL8201F_IER   0x13
@@ -246,16 +245,16 @@ static int rtl8211e_config_init(struct phy_device *phydev)
/* enable TX/RX delay for rgmii-* modes, and disable them for rgmii. */
switch (phydev->interface) {
case PHY_INTERFACE_MODE_RGMII:
-   val = 0;
+   val = RTL8211E_CTRL_DELAY | 0;
break;
case PHY_INTERFACE_MODE_RGMII_ID:
-   val = RTL8211E_TX_DELAY | RTL8211E_RX_DELAY;
+   val = RTL8211E_CTRL_DELAY | RTL8211E_TX_DELAY | 
RTL8211E_RX_DELAY;
break;
case PHY_INTERFACE_MODE_RGMII_RXID:
-   val = RTL8211E_RX_DELAY;
+   val = RTL8211E_CTRL_DELAY | RTL8211E_RX_DELAY;
break;
case PHY_INTERFACE_MODE_RGMII_TXID:
-   val = RTL8211E_TX_DELAY;
+   val = RTL8211E_CTRL_DELAY | RTL8211E_TX_DELAY;
break;
default: /* the rest of the modes imply leaving delays as is. */
return 0;
@@ -263,11 +262,12 @@ static int rtl8211e_config_init(struct phy_device *phydev)
 
/* According to a sample driver there is a 0x1c config register on the
 * 0xa4 extension page (0x7) layout. It can be used to disable/enable
-* the RX/TX delays otherwise controlled by RXDLY/TXDLY pins. It can
-* also be used to customize the whole configuration register:
-* 8:6 = PHY Address, 5:4 = Auto-Negotiation, 3 = Interface Mode Select,
-* 2 = RX Delay, 1 = TX Delay, 0 = SELRGV (see original PHY datasheet
-* for details).
+* the RX/TX delays otherwise controlled by RXDLY/TXDLY pins.
+* The configuration register definition:
+* 14 = reserved
+* 13 = Force Tx RX Delay controlled by bit12 bit11,
+* 12 = RX Delay, 11 = TX Delay
+* 10:0 = Test && debug settings reserved by realtek
 */
oldpage = phy_select_page(phydev, 0x7);
if (oldpage < 0)
@@ -277,7 +277,8 @@ static int rtl8211e_config_init(struct phy_device *phydev)
if (ret)
goto err_restore_page;
 
-   ret = __phy_modify(phydev, 0x1c, RTL8211E_TX_DELAY | RTL8211E_RX_DELAY,
+   ret = __phy_modify(phydev, 0x1c, RTL8211E_CTRL_DELAY
+  | RTL8211E_TX_DELAY | RTL8211E_RX_DELAY,
   val);
 
 err_restore_page:
-- 
2.25.1





Dear. beloved one I need Your Urgent respond,

2020-10-12 Thread Mrs. Elizabeth Edward
Greeting,

Please forgive me for stressing you with my predicaments and I sorry
to approach you through this media it is because it serves the fastest
means of communication. I came across your E-mail from my personal
search and I decided to contact you believing you will be honest to
fulfill my final wish before I die.

I am Mrs. Elizabeth Edward, 63 years, from USA, I am childless and I
am suffering from a pro-long critical cancer, my doctors confirmed I
may not live beyond two months from now as my ill health has defiled
all forms of medical treatment.

Since my days are numbered, I’ve decided willingly to fulfill my
long-time promise to donate you the sum ($5.000.000.00) million
dollars I inherited from my late husband Mr. Edward Herbart, foreign
bank account over years. I need a very honest person who can assist in
transfer of this money to his or her account and use the funds for
charities work of God while you use 50% for yourself. I want you to
know there are no risk involved, it is 100% hitch free & safe. If you
will be interesting to assist in getting this fund into your account
for charity project to fulfill my promise before I die please let me
know immediately. I will appreciate your utmost confidentiality as I
wait for your reply.

Best Regards

Mrs. Elizabeth Edward,


[PATCH 5.8 054/124] xfrm: clone XFRMA_REPLAY_ESN_VAL in xfrm_do_migrate

2020-10-12 Thread Greg Kroah-Hartman
From: Antony Antony 

[ Upstream commit 91a46c6d1b4fcbfa4773df9421b8ad3e58088101 ]

XFRMA_REPLAY_ESN_VAL was not cloned completely from the old to the new.
Migrate this attribute during XFRMA_MSG_MIGRATE

v1->v2:
 - move curleft cloning to a separate patch

Fixes: af2f464e326e ("xfrm: Assign esn pointers when cloning a state")
Signed-off-by: Antony Antony 
Signed-off-by: Steffen Klassert 
Signed-off-by: Sasha Levin 
---
 include/net/xfrm.h | 16 ++--
 1 file changed, 6 insertions(+), 10 deletions(-)

diff --git a/include/net/xfrm.h b/include/net/xfrm.h
index 51f65d23ebafa..2e32cb10ac16b 100644
--- a/include/net/xfrm.h
+++ b/include/net/xfrm.h
@@ -1767,21 +1767,17 @@ static inline unsigned int 
xfrm_replay_state_esn_len(struct xfrm_replay_state_es
 static inline int xfrm_replay_clone(struct xfrm_state *x,
 struct xfrm_state *orig)
 {
-   x->replay_esn = kzalloc(xfrm_replay_state_esn_len(orig->replay_esn),
+
+   x->replay_esn = kmemdup(orig->replay_esn,
+   xfrm_replay_state_esn_len(orig->replay_esn),
GFP_KERNEL);
if (!x->replay_esn)
return -ENOMEM;
-
-   x->replay_esn->bmp_len = orig->replay_esn->bmp_len;
-   x->replay_esn->replay_window = orig->replay_esn->replay_window;
-
-   x->preplay_esn = kmemdup(x->replay_esn,
-xfrm_replay_state_esn_len(x->replay_esn),
+   x->preplay_esn = kmemdup(orig->preplay_esn,
+xfrm_replay_state_esn_len(orig->preplay_esn),
 GFP_KERNEL);
-   if (!x->preplay_esn) {
-   kfree(x->replay_esn);
+   if (!x->preplay_esn)
return -ENOMEM;
-   }
 
return 0;
 }
-- 
2.25.1





[PATCH 5.8 055/124] xfrm: clone XFRMA_SEC_CTX in xfrm_do_migrate

2020-10-12 Thread Greg Kroah-Hartman
From: Antony Antony 

[ Upstream commit 7aa05d304785204703a67a6aa7f1db402889a172 ]

XFRMA_SEC_CTX was not cloned from the old to the new.
Migrate this attribute during XFRMA_MSG_MIGRATE

v1->v2:
 - return -ENOMEM on error
v2->v3:
 - fix return type to int

Fixes: 80c9abaabf42 ("[XFRM]: Extension for dynamic update of endpoint 
address(es)")
Signed-off-by: Antony Antony 
Signed-off-by: Steffen Klassert 
Signed-off-by: Sasha Levin 
---
 net/xfrm/xfrm_state.c | 28 
 1 file changed, 28 insertions(+)

diff --git a/net/xfrm/xfrm_state.c b/net/xfrm/xfrm_state.c
index 8ec3a6a12dd34..3a2c1f15d31dd 100644
--- a/net/xfrm/xfrm_state.c
+++ b/net/xfrm/xfrm_state.c
@@ -1441,6 +1441,30 @@ out:
 EXPORT_SYMBOL(xfrm_state_add);
 
 #ifdef CONFIG_XFRM_MIGRATE
+static inline int clone_security(struct xfrm_state *x, struct xfrm_sec_ctx 
*security)
+{
+   struct xfrm_user_sec_ctx *uctx;
+   int size = sizeof(*uctx) + security->ctx_len;
+   int err;
+
+   uctx = kmalloc(size, GFP_KERNEL);
+   if (!uctx)
+   return -ENOMEM;
+
+   uctx->exttype = XFRMA_SEC_CTX;
+   uctx->len = size;
+   uctx->ctx_doi = security->ctx_doi;
+   uctx->ctx_alg = security->ctx_alg;
+   uctx->ctx_len = security->ctx_len;
+   memcpy(uctx + 1, security->ctx_str, security->ctx_len);
+   err = security_xfrm_state_alloc(x, uctx);
+   kfree(uctx);
+   if (err)
+   return err;
+
+   return 0;
+}
+
 static struct xfrm_state *xfrm_state_clone(struct xfrm_state *orig,
   struct xfrm_encap_tmpl *encap)
 {
@@ -1497,6 +1521,10 @@ static struct xfrm_state *xfrm_state_clone(struct 
xfrm_state *orig,
goto error;
}
 
+   if (orig->security)
+   if (clone_security(x, orig->security))
+   goto error;
+
if (orig->coaddr) {
x->coaddr = kmemdup(orig->coaddr, sizeof(*x->coaddr),
GFP_KERNEL);
-- 
2.25.1





[PATCH 5.8 077/124] virtio-net: dont disable guest csum when disable LRO

2020-10-12 Thread Greg Kroah-Hartman
From: Tonghao Zhang 

[ Upstream commit 1a03b8a35a957f9f38ecb8a97443b7380bbf6a8b ]

Open vSwitch and Linux bridge will disable LRO of the interface
when this interface added to them. Now when disable the LRO, the
virtio-net csum is disable too. That drops the forwarding performance.

Fixes: a02e8964eaf9 ("virtio-net: ethtool configurable LRO")
Cc: Michael S. Tsirkin 
Cc: Jason Wang 
Cc: Willem de Bruijn 
Signed-off-by: Tonghao Zhang 
Acked-by: Willem de Bruijn 
Signed-off-by: David S. Miller 
Signed-off-by: Sasha Levin 
---
 drivers/net/virtio_net.c | 8 +++-
 1 file changed, 7 insertions(+), 1 deletion(-)

diff --git a/drivers/net/virtio_net.c b/drivers/net/virtio_net.c
index ba38765dc4905..c34927b1d806e 100644
--- a/drivers/net/virtio_net.c
+++ b/drivers/net/virtio_net.c
@@ -63,6 +63,11 @@ static const unsigned long guest_offloads[] = {
VIRTIO_NET_F_GUEST_CSUM
 };
 
+#define GUEST_OFFLOAD_LRO_MASK ((1ULL << VIRTIO_NET_F_GUEST_TSO4) | \
+   (1ULL << VIRTIO_NET_F_GUEST_TSO6) | \
+   (1ULL << VIRTIO_NET_F_GUEST_ECN)  | \
+   (1ULL << VIRTIO_NET_F_GUEST_UFO))
+
 struct virtnet_stat_desc {
char desc[ETH_GSTRING_LEN];
size_t offset;
@@ -2547,7 +2552,8 @@ static int virtnet_set_features(struct net_device *dev,
if (features & NETIF_F_LRO)
offloads = vi->guest_offloads_capable;
else
-   offloads = 0;
+   offloads = vi->guest_offloads_capable &
+  ~GUEST_OFFLOAD_LRO_MASK;
 
err = virtnet_set_guest_offloads(vi, offloads);
if (err)
-- 
2.25.1





[PATCH 5.8 081/124] octeontx2-pf: Fix the device state on error

2020-10-12 Thread Greg Kroah-Hartman
From: Hariprasad Kelam 

[ Upstream commit 1ea0166da0509e987caa42c30a6a71f2c6ca1875 ]

Currently in otx2_open on failure of nix_lf_start
transmit queues are not stopped which are already
started in link_event. Since the tx queues are not
stopped network stack still try's to send the packets
leading to driver crash while access the device resources.

Fixes: 50fe6c02e ("octeontx2-pf: Register and handle link notifications")
Signed-off-by: Hariprasad Kelam 
Signed-off-by: Geetha sowjanya 
Signed-off-by: Sunil Goutham 
Signed-off-by: David S. Miller 
Signed-off-by: Sasha Levin 
---
 drivers/net/ethernet/marvell/octeontx2/nic/otx2_pf.c | 5 -
 1 file changed, 4 insertions(+), 1 deletion(-)

diff --git a/drivers/net/ethernet/marvell/octeontx2/nic/otx2_pf.c 
b/drivers/net/ethernet/marvell/octeontx2/nic/otx2_pf.c
index 75a8c407e815c..5d620a39ea802 100644
--- a/drivers/net/ethernet/marvell/octeontx2/nic/otx2_pf.c
+++ b/drivers/net/ethernet/marvell/octeontx2/nic/otx2_pf.c
@@ -1560,10 +1560,13 @@ int otx2_open(struct net_device *netdev)
 
err = otx2_rxtx_enable(pf, true);
if (err)
-   goto err_free_cints;
+   goto err_tx_stop_queues;
 
return 0;
 
+err_tx_stop_queues:
+   netif_tx_stop_all_queues(netdev);
+   netif_carrier_off(netdev);
 err_free_cints:
otx2_free_cints(pf, qidx);
vec = pci_irq_vector(pf->pdev,
-- 
2.25.1





[PATCH 5.8 053/124] xfrm: clone XFRMA_SET_MARK in xfrm_do_migrate

2020-10-12 Thread Greg Kroah-Hartman
From: Antony Antony 

[ Upstream commit 545e5c571662b1cd79d9588f9d3b6e36985b8007 ]

XFRMA_SET_MARK and XFRMA_SET_MARK_MASK was not cloned from the old
to the new. Migrate these two attributes during XFRMA_MSG_MIGRATE

Fixes: 9b42c1f179a6 ("xfrm: Extend the output_mark to support input direction 
and masking.")
Signed-off-by: Antony Antony 
Signed-off-by: Steffen Klassert 
Signed-off-by: Sasha Levin 
---
 net/xfrm/xfrm_state.c | 1 +
 1 file changed, 1 insertion(+)

diff --git a/net/xfrm/xfrm_state.c b/net/xfrm/xfrm_state.c
index 8be2d926acc21..8ec3a6a12dd34 100644
--- a/net/xfrm/xfrm_state.c
+++ b/net/xfrm/xfrm_state.c
@@ -1510,6 +1510,7 @@ static struct xfrm_state *xfrm_state_clone(struct 
xfrm_state *orig,
}
 
memcpy(>mark, >mark, sizeof(x->mark));
+   memcpy(>props.smark, >props.smark, sizeof(x->props.smark));
 
if (xfrm_init_state(x) < 0)
goto error;
-- 
2.25.1





[PATCH 5.8 080/124] octeontx2-pf: Fix TCP/UDP checksum offload for IPv6 frames

2020-10-12 Thread Greg Kroah-Hartman
From: Geetha sowjanya 

[ Upstream commit 89eae5e87b4fa799726a3e8911c90d418cb5d2b1 ]

For TCP/UDP checksum offload feature in Octeontx2
expects L3TYPE to be set irrespective of IP header
checksum is being offloaded or not. Currently for
IPv6 frames L3TYPE is not being set resulting in
packet drop with checksum error. This patch fixes
this issue.

Fixes: 3ca6c4c88 ("octeontx2-pf: Add packet transmission support")
Signed-off-by: Geetha sowjanya 
Signed-off-by: Sunil Goutham 
Signed-off-by: David S. Miller 
Signed-off-by: Sasha Levin 
---
 drivers/net/ethernet/marvell/octeontx2/nic/otx2_txrx.c | 1 +
 1 file changed, 1 insertion(+)

diff --git a/drivers/net/ethernet/marvell/octeontx2/nic/otx2_txrx.c 
b/drivers/net/ethernet/marvell/octeontx2/nic/otx2_txrx.c
index b04f5429d72d9..334eab976ee4a 100644
--- a/drivers/net/ethernet/marvell/octeontx2/nic/otx2_txrx.c
+++ b/drivers/net/ethernet/marvell/octeontx2/nic/otx2_txrx.c
@@ -524,6 +524,7 @@ static void otx2_sqe_add_hdr(struct otx2_nic *pfvf, struct 
otx2_snd_queue *sq,
sqe_hdr->ol3type = NIX_SENDL3TYPE_IP4_CKSUM;
} else if (skb->protocol == htons(ETH_P_IPV6)) {
proto = ipv6_hdr(skb)->nexthdr;
+   sqe_hdr->ol3type = NIX_SENDL3TYPE_IP6;
}
 
if (proto == IPPROTO_TCP)
-- 
2.25.1





[PATCH 5.8 052/124] iommu/vt-d: Fix lockdep splat in iommu_flush_dev_iotlb()

2020-10-12 Thread Greg Kroah-Hartman
From: Lu Baolu 

[ Upstream commit 1a3f2fd7fc4e8f24510830e265de2ffb8e3300d2 ]

Lock(>lock) without disabling irq causes lockdep warnings.

[   12.703950] 
[   12.703962] WARNING: possible irq lock inversion dependency detected
[   12.703975] 5.9.0-rc6+ #659 Not tainted
[   12.703983] 
[   12.703995] systemd-udevd/284 just changed the state of lock:
[   12.704007] bd6ff4d8 (device_domain_lock){..-.}-{2:2}, at:
   iommu_flush_dev_iotlb.part.57+0x2e/0x90
[   12.704031] but this lock took another, SOFTIRQ-unsafe lock in the past:
[   12.704043]  (>lock){+.+.}-{2:2}
[   12.704045]

   and interrupts could create inverse lock ordering between
   them.

[   12.704073]
   other info that might help us debug this:
[   12.704085]  Possible interrupt unsafe locking scenario:

[   12.704097]CPU0CPU1
[   12.704106]
[   12.704115]   lock(>lock);
[   12.704123]local_irq_disable();
[   12.704134]lock(device_domain_lock);
[   12.704146]lock(>lock);
[   12.704158]   
[   12.704164] lock(device_domain_lock);
[   12.704174]
*** DEADLOCK ***

Signed-off-by: Lu Baolu 
Link: https://lore.kernel.org/r/20200927062428.13713-1-baolu...@linux.intel.com
Signed-off-by: Joerg Roedel 
Signed-off-by: Sasha Levin 
---
 drivers/iommu/intel/iommu.c | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/drivers/iommu/intel/iommu.c b/drivers/iommu/intel/iommu.c
index fbe0b0cc56edf..24a84d294fd01 100644
--- a/drivers/iommu/intel/iommu.c
+++ b/drivers/iommu/intel/iommu.c
@@ -2617,7 +2617,7 @@ static struct dmar_domain 
*dmar_insert_one_dev_info(struct intel_iommu *iommu,
}
 
/* Setup the PASID entry for requests without PASID: */
-   spin_lock(>lock);
+   spin_lock_irqsave(>lock, flags);
if (hw_pass_through && domain_type_is_si(domain))
ret = intel_pasid_setup_pass_through(iommu, domain,
dev, PASID_RID2PASID);
@@ -2627,7 +2627,7 @@ static struct dmar_domain 
*dmar_insert_one_dev_info(struct intel_iommu *iommu,
else
ret = intel_pasid_setup_second_level(iommu, domain,
dev, PASID_RID2PASID);
-   spin_unlock(>lock);
+   spin_unlock_irqrestore(>lock, flags);
if (ret) {
dev_err(dev, "Setup RID2PASID failed\n");
dmar_remove_one_dev_info(dev);
-- 
2.25.1





Re: [PATCH] trace: Return ENOTCONN instead of EBADF

2020-10-12 Thread Steven Rostedt
On Mon, 12 Oct 2020 10:26:42 +0200
Peter Enderborg  wrote:

> When there is no clients listening on event the trace return
> EBADF. The file is not a bad file descriptor and to get the
> userspace able to do a proper error handling it need a different
> error code that separate a bad file descriptor from a empty listening.

I have no problem with this patch, but your description is incorrect. And
before making this change, I want to make sure that what you think is
happening is actually happening.

This has nothing to do with "clients listening". This happens when the ring
buffer is disabled for some reason. The most likely case of this happening
is if someone sets /sys/kernel/tracing/tracing_on to zero.

If this is still something you want applied, please update the change log
to a more accurate scenario.

Thanks,

-- Steve


> 
> Signed-off-by: Peter Enderborg 
> ---
>  kernel/trace/trace.c | 8 
>  1 file changed, 4 insertions(+), 4 deletions(-)
> 
> diff --git a/kernel/trace/trace.c b/kernel/trace/trace.c
> index d3e5de717df2..6e592bf736df 100644
> --- a/kernel/trace/trace.c
> +++ b/kernel/trace/trace.c
> @@ -6651,8 +6651,8 @@ tracing_mark_write(struct file *filp, const char __user 
> *ubuf,
>   event = __trace_buffer_lock_reserve(buffer, TRACE_PRINT, size,
>   irq_flags, preempt_count());
>   if (unlikely(!event))
> - /* Ring buffer disabled, return as if not open for write */
> - return -EBADF;
> + /* Ring buffer disabled, return as if not connected */
> + return -ENOTCONN;
>  
>   entry = ring_buffer_event_data(event);
>   entry->ip = _THIS_IP_;
> @@ -6731,8 +6731,8 @@ tracing_mark_raw_write(struct file *filp, const char 
> __user *ubuf,
>   event = __trace_buffer_lock_reserve(buffer, TRACE_RAW_DATA, size,
>   irq_flags, preempt_count());
>   if (!event)
> - /* Ring buffer disabled, return as if not open for write */
> - return -EBADF;
> + /* Ring buffer disabled, return not connected */
> + return -ENOTCONN;
>  
>   entry = ring_buffer_event_data(event);
>  



[PATCH 5.8 076/124] net: usb: ax88179_178a: fix missing stop entry in driver_info

2020-10-12 Thread Greg Kroah-Hartman
From: Wilken Gottwalt 

[ Upstream commit 9666ea66a74adfe295cb3a8760c76e1ef70f9caf ]

Adds the missing .stop entry in the Belkin driver_info structure.

Fixes: e20bd60bf62a ("net: usb: asix88179_178a: Add support for the Belkin 
B2B128")
Signed-off-by: Wilken Gottwalt 
Signed-off-by: David S. Miller 
Signed-off-by: Sasha Levin 
---
 drivers/net/usb/ax88179_178a.c | 1 +
 1 file changed, 1 insertion(+)

diff --git a/drivers/net/usb/ax88179_178a.c b/drivers/net/usb/ax88179_178a.c
index a38e868e44d46..f0ef3706aad96 100644
--- a/drivers/net/usb/ax88179_178a.c
+++ b/drivers/net/usb/ax88179_178a.c
@@ -1823,6 +1823,7 @@ static const struct driver_info belkin_info = {
.status = ax88179_status,
.link_reset = ax88179_link_reset,
.reset  = ax88179_reset,
+   .stop   = ax88179_stop,
.flags  = FLAG_ETHER | FLAG_FRAMING_AX,
.rx_fixup = ax88179_rx_fixup,
.tx_fixup = ax88179_tx_fixup,
-- 
2.25.1





[GIT PULL] io_uring updates for 5.10-rc1

2020-10-12 Thread Jens Axboe
Hi Linus,

Here are the io_uring updates for 5.10. This pull request contains:

- Add blkcg accounting for io-wq offload (Dennis)

- A use-after-free fix for io-wq (Hillf)

- Cancelation fixes and improvements

- Use proper files_struct references for offload

- Cleanup of io_uring_get_socket() since that can now go into our own
  header

- SQPOLL fixes and cleanups, and support for sharing the thread

- Improvement to how page accounting is done for registered buffers and
  huge pages, accounting the real pinned state

- Series cleaning up the xarray code (Willy)

- Various cleanups, refactoring, and improvements (Pavel)

- Use raw spinlock for io-wq (Sebastian)

- Add support for ring restrictions (Stefano)

Please pull!


The following changes since commit c8d317aa1887b40b188ec3aaa6e9e524333caed1:

  io_uring: fix async buffered reads when readahead is disabled (2020-09-29 
07:54:00 -0600)

are available in the Git repository at:

  git://git.kernel.dk/linux-block.git tags/io_uring-5.10-2020-10-12

for you to fetch changes up to b2e9685283127f30e7f2b466af0046ff9bd27a86:

  io_uring: keep a pointer ref_node in file_data (2020-10-10 12:49:25 -0600)


io_uring-5.10-2020-10-12


Dennis Zhou (1):
  io_uring: add blkcg accounting to offloaded operations

Hillf Danton (1):
  io-wq: fix use-after-free in io_wq_worker_running

Jens Axboe (29):
  Merge branch 'io_uring-5.9' into for-5.10/io_uring
  io_uring: allow timeout/poll/files killing to take task into account
  io_uring: move dropping of files into separate helper
  io_uring: stash ctx task reference for SQPOLL
  io_uring: unconditionally grab req->task
  io_uring: return cancelation status from poll/timeout/files handlers
  io_uring: enable task/files specific overflow flushing
  io_uring: don't rely on weak ->files references
  io_uring: reference ->nsproxy for file table commands
  io_uring: move io_uring_get_socket() into io_uring.h
  io_uring: io_sq_thread() doesn't need to flush signals
  fs: align IOCB_* flags with RWF_* flags
  io_uring: use private ctx wait queue entries for SQPOLL
  io_uring: move SQPOLL post-wakeup ring need wakeup flag into wake handler
  io_uring: split work handling part of SQPOLL into helper
  io_uring: split SQPOLL data into separate structure
  io_uring: base SQPOLL handling off io_sq_data
  io_uring: enable IORING_SETUP_ATTACH_WQ to attach to SQPOLL thread too
  io_uring: mark io_uring_fops/io_op_defs as __read_mostly
  io_uring: provide IORING_ENTER_SQ_WAIT for SQPOLL SQ ring waits
  io_uring: get rid of req->io/io_async_ctx union
  io_uring: cap SQ submit size for SQPOLL with multiple rings
  io_uring: improve registered buffer accounting for huge pages
  io_uring: process task work in io_uring_register()
  io-wq: kill unused IO_WORKER_F_EXITING
  io_uring: kill callback_head argument for io_req_task_work_add()
  io_uring: batch account ->req_issue and task struct references
  io_uring: no need to call xa_destroy() on empty xarray
  io_uring: fix break condition for __io_uring_register() waiting

Joseph Qi (1):
  io_uring: show sqthread pid and cpu in fdinfo

Matthew Wilcox (Oracle) (3):
  io_uring: Fix use of XArray in __io_uring_files_cancel
  io_uring: Fix XArray usage in io_uring_add_task_file
  io_uring: Convert advanced XArray uses to the normal API

Pavel Begunkov (23):
  io_uring: simplify io_rw_prep_async()
  io_uring: refactor io_req_map_rw()
  io_uring: fix overlapped memcpy in io_req_map_rw()
  io_uring: kill extra user_bufs check
  io_uring: simplify io_alloc_req()
  io_uring: io_kiocb_ppos() style change
  io_uring: remove F_NEED_CLEANUP check in *prep()
  io_uring: set/clear IOCB_NOWAIT into io_read/write
  io_uring: remove nonblock arg from io_{rw}_prep()
  io_uring: decouple issuing and req preparation
  io_uring: move req preps out of io_issue_sqe()
  io_uring: don't io_prep_async_work() linked reqs
  io_uring: clean up ->files grabbing
  io_uring: kill extra check in fixed io_file_get()
  io_uring: simplify io_file_get()
  io_uring: improve submit_state.ios_left accounting
  io_uring: use a separate struct for timeout_remove
  io_uring: remove timeout.list after hrtimer cancel
  io_uring: clean leftovers after splitting issue
  io_uring: don't delay io_init_req() error check
  io_uring: clean file_data access in files_register
  io_uring: refactor *files_register()'s error paths
  io_uring: keep a pointer ref_node in file_data

Sebastian Andrzej Siewior (1):
  io_wq: Make io_wqe::lock a raw_spinlock_t

Stefano Garzarella (3):
  io_uring: use an enumeration for io_uring_register(2) opcodes
  io_uring: add IOURING_REGISTER_RESTRICTIONS 

[PATCH 5.8 018/124] platform/x86: intel-vbtn: Fix SW_TABLET_MODE always reporting 1 on the HP Pavilion 11 x360

2020-10-12 Thread Greg Kroah-Hartman
From: Hans de Goede 

commit d823346876a970522ff9e4d2b323c9b734dcc4de upstream.

Commit cfae58ed681c ("platform/x86: intel-vbtn: Only blacklist
SW_TABLET_MODE on the 9 / "Laptop" chasis-type") restored SW_TABLET_MODE
reporting on the HP stream x360 11 series on which it was previously broken
by commit de9647efeaa9 ("platform/x86: intel-vbtn: Only activate tablet
mode switch on 2-in-1's").

It turns out that enabling SW_TABLET_MODE reporting on devices with a
chassis-type of 10 ("Notebook") causes SW_TABLET_MODE to always report 1
at boot on the HP Pavilion 11 x360, which causes libinput to disable the
kbd and touchpad.

The HP Pavilion 11 x360's ACPI VGBS method sets bit 4 instead of bit 6 when
NOT in tablet mode at boot. Inspecting all the DSDTs in my DSDT collection
shows only one other model, the Medion E1239T ever setting bit 4 and it
always sets this together with bit 6.

So lets treat bit 4 as a second bit which when set indicates the device not
being in tablet-mode, as we already do for bit 6.

While at it also prefix all VGBS constant defines with "VGBS_".

Fixes: cfae58ed681c ("platform/x86: intel-vbtn: Only blacklist SW_TABLET_MODE 
on the 9 / "Laptop" chasis-type")
Signed-off-by: Hans de Goede 
Acked-by: Mark Gross 
Signed-off-by: Andy Shevchenko 
Signed-off-by: Greg Kroah-Hartman 

---
 drivers/platform/x86/intel-vbtn.c |   12 
 1 file changed, 8 insertions(+), 4 deletions(-)

--- a/drivers/platform/x86/intel-vbtn.c
+++ b/drivers/platform/x86/intel-vbtn.c
@@ -15,9 +15,13 @@
 #include 
 #include 
 
+/* Returned when NOT in tablet mode on some HP Stream x360 11 models */
+#define VGBS_TABLET_MODE_FLAG_ALT  0x10
 /* When NOT in tablet mode, VGBS returns with the flag 0x40 */
-#define TABLET_MODE_FLAG 0x40
-#define DOCK_MODE_FLAG   0x80
+#define VGBS_TABLET_MODE_FLAG  0x40
+#define VGBS_DOCK_MODE_FLAG0x80
+
+#define VGBS_TABLET_MODE_FLAGS (VGBS_TABLET_MODE_FLAG | 
VGBS_TABLET_MODE_FLAG_ALT)
 
 MODULE_LICENSE("GPL");
 MODULE_AUTHOR("AceLan Kao");
@@ -72,9 +76,9 @@ static void detect_tablet_mode(struct pl
if (ACPI_FAILURE(status))
return;
 
-   m = !(vgbs & TABLET_MODE_FLAG);
+   m = !(vgbs & VGBS_TABLET_MODE_FLAGS);
input_report_switch(priv->input_dev, SW_TABLET_MODE, m);
-   m = (vgbs & DOCK_MODE_FLAG) ? 1 : 0;
+   m = (vgbs & VGBS_DOCK_MODE_FLAG) ? 1 : 0;
input_report_switch(priv->input_dev, SW_DOCK, m);
 }
 




[PATCH 5.8 036/124] cifs: Fix incomplete memory allocation on setxattr path

2020-10-12 Thread Greg Kroah-Hartman
From: Vladimir Zapolskiy 

commit 64b7f674c292207624b3d788eda2dde3dc1415df upstream.

On setxattr() syscall path due to an apprent typo the size of a dynamically
allocated memory chunk for storing struct smb2_file_full_ea_info object is
computed incorrectly, to be more precise the first addend is the size of
a pointer instead of the wanted object size. Coincidentally it makes no
difference on 64-bit platforms, however on 32-bit targets the following
memcpy() writes 4 bytes of data outside of the dynamically allocated memory.

  =
  BUG kmalloc-16 (Not tainted): Redzone overwritten
  -

  Disabling lock debugging due to kernel taint
  INFO: 0x79e69a6f-0x9e5cdecf @offset=368. First byte 0x73 instead of 0xcc
  INFO: Slab 0xd36d2454 objects=85 used=51 fp=0xf7d0fc7a flags=0x35000201
  INFO: Object 0x6f171df3 @offset=352 fp=0x

  Redzone 5d4ff02d: cc cc cc cc cc cc cc cc cc cc cc cc cc cc cc cc  

  Object 6f171df3: 00 00 00 00 00 05 06 00 73 6e 72 75 62 00 66 69  
snrub.fi
  Redzone 79e69a6f: 73 68 32 0a  sh2.
  Padding 56254d82: 5a 5a 5a 5a 5a 5a 5a 5a  
  CPU: 0 PID: 8196 Comm: attr Tainted: GB 5.9.0-rc8+ #3
  Hardware name: QEMU Standard PC (i440FX + PIIX, 1996), BIOS 1.13.0-1 
04/01/2014
  Call Trace:
   dump_stack+0x54/0x6e
   print_trailer+0x12c/0x134
   check_bytes_and_report.cold+0x3e/0x69
   check_object+0x18c/0x250
   free_debug_processing+0xfe/0x230
   __slab_free+0x1c0/0x300
   kfree+0x1d3/0x220
   smb2_set_ea+0x27d/0x540
   cifs_xattr_set+0x57f/0x620
   __vfs_setxattr+0x4e/0x60
   __vfs_setxattr_noperm+0x4e/0x100
   __vfs_setxattr_locked+0xae/0xd0
   vfs_setxattr+0x4e/0xe0
   setxattr+0x12c/0x1a0
   path_setxattr+0xa4/0xc0
   __ia32_sys_lsetxattr+0x1d/0x20
   __do_fast_syscall_32+0x40/0x70
   do_fast_syscall_32+0x29/0x60
   do_SYSENTER_32+0x15/0x20
   entry_SYSENTER_32+0x9f/0xf2

Fixes: 5517554e4313 ("cifs: Add support for writing attributes on SMB2+")
Signed-off-by: Vladimir Zapolskiy 
Signed-off-by: Linus Torvalds 
Signed-off-by: Greg Kroah-Hartman 

---
 fs/cifs/smb2ops.c |2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

--- a/fs/cifs/smb2ops.c
+++ b/fs/cifs/smb2ops.c
@@ -1208,7 +1208,7 @@ smb2_set_ea(const unsigned int xid, stru
rqst[1].rq_iov = si_iov;
rqst[1].rq_nvec = 1;
 
-   len = sizeof(ea) + ea_name_len + ea_value_len + 1;
+   len = sizeof(*ea) + ea_name_len + ea_value_len + 1;
ea = kzalloc(len, GFP_KERNEL);
if (ea == NULL) {
rc = -ENOMEM;




[PATCH 5.8 033/124] nvme-tcp: check page by sendpage_ok() before calling kernel_sendpage()

2020-10-12 Thread Greg Kroah-Hartman
From: Coly Li 

commit 7d4194abfc4de13a2663c7fee6891de8360f7a52 upstream.

Currently nvme_tcp_try_send_data() doesn't use kernel_sendpage() to
send slab pages. But for pages allocated by __get_free_pages() without
__GFP_COMP, which also have refcount as 0, they are still sent by
kernel_sendpage() to remote end, this is problematic.

The new introduced helper sendpage_ok() checks both PageSlab tag and
page_count counter, and returns true if the checking page is OK to be
sent by kernel_sendpage().

This patch fixes the page checking issue of nvme_tcp_try_send_data()
with sendpage_ok(). If sendpage_ok() returns true, send this page by
kernel_sendpage(), otherwise use sock_no_sendpage to handle this page.

Signed-off-by: Coly Li 
Cc: Chaitanya Kulkarni 
Cc: Christoph Hellwig 
Cc: Hannes Reinecke 
Cc: Jan Kara 
Cc: Jens Axboe 
Cc: Mikhail Skorzhinskii 
Cc: Philipp Reisner 
Cc: Sagi Grimberg 
Cc: Vlastimil Babka 
Cc: sta...@vger.kernel.org
Signed-off-by: David S. Miller 
Signed-off-by: Greg Kroah-Hartman 

---
 drivers/nvme/host/tcp.c |7 +++
 1 file changed, 3 insertions(+), 4 deletions(-)

--- a/drivers/nvme/host/tcp.c
+++ b/drivers/nvme/host/tcp.c
@@ -889,12 +889,11 @@ static int nvme_tcp_try_send_data(struct
else
flags |= MSG_MORE | MSG_SENDPAGE_NOTLAST;
 
-   /* can't zcopy slab pages */
-   if (unlikely(PageSlab(page))) {
-   ret = sock_no_sendpage(queue->sock, page, offset, len,
+   if (sendpage_ok(page)) {
+   ret = kernel_sendpage(queue->sock, page, offset, len,
flags);
} else {
-   ret = kernel_sendpage(queue->sock, page, offset, len,
+   ret = sock_no_sendpage(queue->sock, page, offset, len,
flags);
}
if (ret <= 0)




[PATCH 5.8 040/124] i2c: owl: Clear NACK and BUS error bits

2020-10-12 Thread Greg Kroah-Hartman
From: Cristian Ciocaltea 

commit f5b3f433641c543ebe5171285a42aa6adcdb2d22 upstream.

When the NACK and BUS error bits are set by the hardware, the driver is
responsible for clearing them by writing "1" into the corresponding
status registers.

Hence perform the necessary operations in owl_i2c_interrupt().

Fixes: d211e62af466 ("i2c: Add Actions Semiconductor Owl family S900 I2C 
driver")
Reported-by: Manivannan Sadhasivam 
Signed-off-by: Cristian Ciocaltea 
Signed-off-by: Wolfram Sang 
Signed-off-by: Greg Kroah-Hartman 

---
 drivers/i2c/busses/i2c-owl.c |6 ++
 1 file changed, 6 insertions(+)

--- a/drivers/i2c/busses/i2c-owl.c
+++ b/drivers/i2c/busses/i2c-owl.c
@@ -176,6 +176,9 @@ static irqreturn_t owl_i2c_interrupt(int
fifostat = readl(i2c_dev->base + OWL_I2C_REG_FIFOSTAT);
if (fifostat & OWL_I2C_FIFOSTAT_RNB) {
i2c_dev->err = -ENXIO;
+   /* Clear NACK error bit by writing "1" */
+   owl_i2c_update_reg(i2c_dev->base + OWL_I2C_REG_FIFOSTAT,
+  OWL_I2C_FIFOSTAT_RNB, true);
goto stop;
}
 
@@ -183,6 +186,9 @@ static irqreturn_t owl_i2c_interrupt(int
stat = readl(i2c_dev->base + OWL_I2C_REG_STAT);
if (stat & OWL_I2C_STAT_BEB) {
i2c_dev->err = -EIO;
+   /* Clear BUS error bit by writing "1" */
+   owl_i2c_update_reg(i2c_dev->base + OWL_I2C_REG_STAT,
+  OWL_I2C_STAT_BEB, true);
goto stop;
}
 




[PATCH 5.8 037/124] i2c: meson: fix clock setting overwrite

2020-10-12 Thread Greg Kroah-Hartman
From: Jerome Brunet 

commit 28683e847e2f20eed22cdd24f185d7783db396d3 upstream.

When the slave address is written in do_start(), SLAVE_ADDR is written
completely. This may overwrite some setting related to the clock rate
or signal filtering.

Fix this by writing only the bits related to slave address. To avoid
causing unexpected changed, explicitly disable filtering or high/low
clock mode which may have been left over by the bootloader.

Fixes: 30021e3707a7 ("i2c: add support for Amlogic Meson I2C controller")
Signed-off-by: Jerome Brunet 
Signed-off-by: Wolfram Sang 
Signed-off-by: Greg Kroah-Hartman 

---
 drivers/i2c/busses/i2c-meson.c |   19 ++-
 1 file changed, 18 insertions(+), 1 deletion(-)

--- a/drivers/i2c/busses/i2c-meson.c
+++ b/drivers/i2c/busses/i2c-meson.c
@@ -5,6 +5,7 @@
  * Copyright (C) 2014 Beniamino Galvani 
  */
 
+#include 
 #include 
 #include 
 #include 
@@ -38,6 +39,12 @@
 #define REG_CTRL_CLKDIVEXT_SHIFT 28
 #define REG_CTRL_CLKDIVEXT_MASKGENMASK(29, 28)
 
+#define REG_SLV_ADDR   GENMASK(7, 0)
+#define REG_SLV_SDA_FILTER GENMASK(10, 8)
+#define REG_SLV_SCL_FILTER GENMASK(13, 11)
+#define REG_SLV_SCL_LOWGENMASK(27, 16)
+#define REG_SLV_SCL_LOW_EN BIT(28)
+
 #define I2C_TIMEOUT_MS 500
 
 enum {
@@ -147,6 +154,9 @@ static void meson_i2c_set_clk_div(struct
meson_i2c_set_mask(i2c, REG_CTRL, REG_CTRL_CLKDIVEXT_MASK,
   (div >> 10) << REG_CTRL_CLKDIVEXT_SHIFT);
 
+   /* Disable HIGH/LOW mode */
+   meson_i2c_set_mask(i2c, REG_SLAVE_ADDR, REG_SLV_SCL_LOW_EN, 0);
+
dev_dbg(i2c->dev, "%s: clk %lu, freq %u, div %u\n", __func__,
clk_rate, freq, div);
 }
@@ -280,7 +290,10 @@ static void meson_i2c_do_start(struct me
token = (msg->flags & I2C_M_RD) ? TOKEN_SLAVE_ADDR_READ :
TOKEN_SLAVE_ADDR_WRITE;
 
-   writel(msg->addr << 1, i2c->regs + REG_SLAVE_ADDR);
+
+   meson_i2c_set_mask(i2c, REG_SLAVE_ADDR, REG_SLV_ADDR,
+  FIELD_PREP(REG_SLV_ADDR, msg->addr << 1));
+
meson_i2c_add_token(i2c, TOKEN_START);
meson_i2c_add_token(i2c, token);
 }
@@ -461,6 +474,10 @@ static int meson_i2c_probe(struct platfo
return ret;
}
 
+   /* Disable filtering */
+   meson_i2c_set_mask(i2c, REG_SLAVE_ADDR,
+  REG_SLV_SDA_FILTER | REG_SLV_SCL_FILTER, 0);
+
meson_i2c_set_clk_div(i2c, timings.bus_freq_hz);
 
return 0;




[PATCH 5.8 044/124] openvswitch: handle DNAT tuple collision

2020-10-12 Thread Greg Kroah-Hartman
From: Dumitru Ceara 

commit 8aa7b526dc0b5dbf40c1b834d76a667ad672a410 upstream.

With multiple DNAT rules it's possible that after destination
translation the resulting tuples collide.

For example, two openvswitch flows:
nw_dst=10.0.0.10,tp_dst=10, actions=ct(commit,table=2,nat(dst=20.0.0.1:20))
nw_dst=10.0.0.20,tp_dst=10, actions=ct(commit,table=2,nat(dst=20.0.0.1:20))

Assuming two TCP clients initiating the following connections:
10.0.0.10:5000->10.0.0.10:10
10.0.0.10:5000->10.0.0.20:10

Both tuples would translate to 10.0.0.10:5000->20.0.0.1:20 causing
nf_conntrack_confirm() to fail because of tuple collision.

Netfilter handles this case by allocating a null binding for SNAT at
egress by default.  Perform the same operation in openvswitch for DNAT
if no explicit SNAT is requested by the user and allocate a null binding
for SNAT for packets in the "original" direction.

Reported-at: https://bugzilla.redhat.com/1877128
Suggested-by: Florian Westphal 
Fixes: 05752523e565 ("openvswitch: Interface with NAT.")
Signed-off-by: Dumitru Ceara 
Signed-off-by: Jakub Kicinski 
Signed-off-by: Greg Kroah-Hartman 

---
 net/openvswitch/conntrack.c |   20 
 1 file changed, 12 insertions(+), 8 deletions(-)

--- a/net/openvswitch/conntrack.c
+++ b/net/openvswitch/conntrack.c
@@ -903,15 +903,19 @@ static int ovs_ct_nat(struct net *net, s
}
err = ovs_ct_nat_execute(skb, ct, ctinfo, >range, maniptype);
 
-   if (err == NF_ACCEPT &&
-   ct->status & IPS_SRC_NAT && ct->status & IPS_DST_NAT) {
-   if (maniptype == NF_NAT_MANIP_SRC)
-   maniptype = NF_NAT_MANIP_DST;
-   else
-   maniptype = NF_NAT_MANIP_SRC;
+   if (err == NF_ACCEPT && ct->status & IPS_DST_NAT) {
+   if (ct->status & IPS_SRC_NAT) {
+   if (maniptype == NF_NAT_MANIP_SRC)
+   maniptype = NF_NAT_MANIP_DST;
+   else
+   maniptype = NF_NAT_MANIP_SRC;
 
-   err = ovs_ct_nat_execute(skb, ct, ctinfo, >range,
-maniptype);
+   err = ovs_ct_nat_execute(skb, ct, ctinfo, >range,
+maniptype);
+   } else if (CTINFO2DIR(ctinfo) == IP_CT_DIR_ORIGINAL) {
+   err = ovs_ct_nat_execute(skb, ct, ctinfo, NULL,
+NF_NAT_MANIP_SRC);
+   }
}
 
/* Mark NAT done if successful and update the flow key. */




[PATCH 5.8 041/124] sctp: fix sctp_auth_init_hmacs() error path

2020-10-12 Thread Greg Kroah-Hartman
From: Eric Dumazet 

commit d42ee76ecb6c49d499fc5eb32ca34468d95dbc3e upstream.

After freeing ep->auth_hmacs we have to clear the pointer
or risk use-after-free as reported by syzbot:

BUG: KASAN: use-after-free in sctp_auth_destroy_hmacs net/sctp/auth.c:509 
[inline]
BUG: KASAN: use-after-free in sctp_auth_destroy_hmacs net/sctp/auth.c:501 
[inline]
BUG: KASAN: use-after-free in sctp_auth_free+0x17e/0x1d0 net/sctp/auth.c:1070
Read of size 8 at addr 8880a8ff52c0 by task syz-executor941/6874

CPU: 0 PID: 6874 Comm: syz-executor941 Not tainted 5.9.0-rc8-syzkaller #0
Hardware name: Google Google Compute Engine/Google Compute Engine, BIOS Google 
01/01/2011
Call Trace:
 __dump_stack lib/dump_stack.c:77 [inline]
 dump_stack+0x198/0x1fd lib/dump_stack.c:118
 print_address_description.constprop.0.cold+0xae/0x497 mm/kasan/report.c:383
 __kasan_report mm/kasan/report.c:513 [inline]
 kasan_report.cold+0x1f/0x37 mm/kasan/report.c:530
 sctp_auth_destroy_hmacs net/sctp/auth.c:509 [inline]
 sctp_auth_destroy_hmacs net/sctp/auth.c:501 [inline]
 sctp_auth_free+0x17e/0x1d0 net/sctp/auth.c:1070
 sctp_endpoint_destroy+0x95/0x240 net/sctp/endpointola.c:203
 sctp_endpoint_put net/sctp/endpointola.c:236 [inline]
 sctp_endpoint_free+0xd6/0x110 net/sctp/endpointola.c:183
 sctp_destroy_sock+0x9c/0x3c0 net/sctp/socket.c:4981
 sctp_v6_destroy_sock+0x11/0x20 net/sctp/socket.c:9415
 sk_common_release+0x64/0x390 net/core/sock.c:3254
 sctp_close+0x4ce/0x8b0 net/sctp/socket.c:1533
 inet_release+0x12e/0x280 net/ipv4/af_inet.c:431
 inet6_release+0x4c/0x70 net/ipv6/af_inet6.c:475
 __sock_release+0xcd/0x280 net/socket.c:596
 sock_close+0x18/0x20 net/socket.c:1277
 __fput+0x285/0x920 fs/file_table.c:281
 task_work_run+0xdd/0x190 kernel/task_work.c:141
 exit_task_work include/linux/task_work.h:25 [inline]
 do_exit+0xb7d/0x29f0 kernel/exit.c:806
 do_group_exit+0x125/0x310 kernel/exit.c:903
 __do_sys_exit_group kernel/exit.c:914 [inline]
 __se_sys_exit_group kernel/exit.c:912 [inline]
 __x64_sys_exit_group+0x3a/0x50 kernel/exit.c:912
 do_syscall_64+0x2d/0x70 arch/x86/entry/common.c:46
 entry_SYSCALL_64_after_hwframe+0x44/0xa9
RIP: 0033:0x43f278
Code: Bad RIP value.
RSP: 002b:7fffe0995c38 EFLAGS: 0246 ORIG_RAX: 00e7
RAX: ffda RBX:  RCX: 0043f278
RDX:  RSI: 003c RDI: 
RBP: 004bf068 R08: 00e7 R09: ffd0
R10: 2000 R11: 0246 R12: 0001
R13: 006d1180 R14:  R15: 

Allocated by task 6874:
 kasan_save_stack+0x1b/0x40 mm/kasan/common.c:48
 kasan_set_track mm/kasan/common.c:56 [inline]
 __kasan_kmalloc.constprop.0+0xbf/0xd0 mm/kasan/common.c:461
 kmem_cache_alloc_trace+0x174/0x300 mm/slab.c:3554
 kmalloc include/linux/slab.h:554 [inline]
 kmalloc_array include/linux/slab.h:593 [inline]
 kcalloc include/linux/slab.h:605 [inline]
 sctp_auth_init_hmacs+0xdb/0x3b0 net/sctp/auth.c:464
 sctp_auth_init+0x8a/0x4a0 net/sctp/auth.c:1049
 sctp_setsockopt_auth_supported net/sctp/socket.c:4354 [inline]
 sctp_setsockopt+0x477e/0x97f0 net/sctp/socket.c:4631
 __sys_setsockopt+0x2db/0x610 net/socket.c:2132
 __do_sys_setsockopt net/socket.c:2143 [inline]
 __se_sys_setsockopt net/socket.c:2140 [inline]
 __x64_sys_setsockopt+0xba/0x150 net/socket.c:2140
 do_syscall_64+0x2d/0x70 arch/x86/entry/common.c:46
 entry_SYSCALL_64_after_hwframe+0x44/0xa9

Freed by task 6874:
 kasan_save_stack+0x1b/0x40 mm/kasan/common.c:48
 kasan_set_track+0x1c/0x30 mm/kasan/common.c:56
 kasan_set_free_info+0x1b/0x30 mm/kasan/generic.c:355
 __kasan_slab_free+0xd8/0x120 mm/kasan/common.c:422
 __cache_free mm/slab.c:3422 [inline]
 kfree+0x10e/0x2b0 mm/slab.c:3760
 sctp_auth_destroy_hmacs net/sctp/auth.c:511 [inline]
 sctp_auth_destroy_hmacs net/sctp/auth.c:501 [inline]
 sctp_auth_init_hmacs net/sctp/auth.c:496 [inline]
 sctp_auth_init_hmacs+0x2b7/0x3b0 net/sctp/auth.c:454
 sctp_auth_init+0x8a/0x4a0 net/sctp/auth.c:1049
 sctp_setsockopt_auth_supported net/sctp/socket.c:4354 [inline]
 sctp_setsockopt+0x477e/0x97f0 net/sctp/socket.c:4631
 __sys_setsockopt+0x2db/0x610 net/socket.c:2132
 __do_sys_setsockopt net/socket.c:2143 [inline]
 __se_sys_setsockopt net/socket.c:2140 [inline]
 __x64_sys_setsockopt+0xba/0x150 net/socket.c:2140
 do_syscall_64+0x2d/0x70 arch/x86/entry/common.c:46
 entry_SYSCALL_64_after_hwframe+0x44/0xa9

Fixes: 1f485649f529 ("[SCTP]: Implement SCTP-AUTH internals")
Signed-off-by: Eric Dumazet 
Cc: Vlad Yasevich 
Cc: Neil Horman 
Cc: Marcelo Ricardo Leitner 
Acked-by: Marcelo Ricardo Leitner 
Signed-off-by: Jakub Kicinski 
Signed-off-by: Greg Kroah-Hartman 

---
 net/sctp/auth.c |1 +
 1 file changed, 1 insertion(+)

--- a/net/sctp/auth.c
+++ b/net/sctp/auth.c
@@ -494,6 +494,7 @@ int sctp_auth_init_hmacs(struct sctp_end
 out_err:
/* Clean up any successful allocations */
sctp_auth_destroy_hmacs(ep->auth_hmacs);
+   ep->auth_hmacs = NULL;
return 

[PATCH 5.8 035/124] espintcp: restore IP CB before handing the packet to xfrm

2020-10-12 Thread Greg Kroah-Hartman
From: Sabrina Dubroca 

commit 4eb2e13415757a2bce5bb0d580d22bbeef1f5346 upstream.

Xiumei reported a bug with espintcp over IPv6 in transport mode,
because xfrm6_transport_finish expects to find IP6CB data (struct
inet6_skb_cb). Currently, espintcp zeroes the CB, but the relevant
part is actually preserved by previous layers (first set up by tcp,
then strparser only zeroes a small part of tcp_skb_tb), so we can just
relocate it to the start of skb->cb.

Fixes: e27cca96cd68 ("xfrm: add espintcp (RFC 8229)")
Reported-by: Xiumei Mu 
Signed-off-by: Sabrina Dubroca 
Signed-off-by: Steffen Klassert 
Signed-off-by: Greg Kroah-Hartman 

---
 net/xfrm/espintcp.c |6 +-
 1 file changed, 5 insertions(+), 1 deletion(-)

--- a/net/xfrm/espintcp.c
+++ b/net/xfrm/espintcp.c
@@ -29,8 +29,12 @@ static void handle_nonesp(struct espintc
 
 static void handle_esp(struct sk_buff *skb, struct sock *sk)
 {
+   struct tcp_skb_cb *tcp_cb = (struct tcp_skb_cb *)skb->cb;
+
skb_reset_transport_header(skb);
-   memset(skb->cb, 0, sizeof(skb->cb));
+
+   /* restore IP CB, we need at least IP6CB->nhoff */
+   memmove(skb->cb, _cb->header, sizeof(tcp_cb->header));
 
rcu_read_lock();
skb->dev = dev_get_by_index_rcu(sock_net(sk), skb->skb_iif);




[PATCH 5.8 038/124] i2c: meson: keep peripheral clock enabled

2020-10-12 Thread Greg Kroah-Hartman
From: Jerome Brunet 

commit 79e137b1540165f788394658442284d55a858984 upstream.

SCL rate appears to be different than what is expected. For example,
We get 164kHz on i2c3 of the vim3 when 400kHz is expected. This is
partially due to the peripheral clock being disabled when the clock is
set.

Let's keep the peripheral clock on after probe to fix the problem. This
does not affect the SCL output which is still gated when i2c is idle.

Fixes: 09af1c2fa490 ("i2c: meson: set clock divider in probe instead of setting 
it for each transfer")
Signed-off-by: Jerome Brunet 
Signed-off-by: Wolfram Sang 
Signed-off-by: Greg Kroah-Hartman 

---
 drivers/i2c/busses/i2c-meson.c |   10 +++---
 1 file changed, 3 insertions(+), 7 deletions(-)

--- a/drivers/i2c/busses/i2c-meson.c
+++ b/drivers/i2c/busses/i2c-meson.c
@@ -370,16 +370,12 @@ static int meson_i2c_xfer_messages(struc
struct meson_i2c *i2c = adap->algo_data;
int i, ret = 0;
 
-   clk_enable(i2c->clk);
-
for (i = 0; i < num; i++) {
ret = meson_i2c_xfer_msg(i2c, msgs + i, i == num - 1, atomic);
if (ret)
break;
}
 
-   clk_disable(i2c->clk);
-
return ret ?: i;
 }
 
@@ -448,7 +444,7 @@ static int meson_i2c_probe(struct platfo
return ret;
}
 
-   ret = clk_prepare(i2c->clk);
+   ret = clk_prepare_enable(i2c->clk);
if (ret < 0) {
dev_err(>dev, "can't prepare clock\n");
return ret;
@@ -470,7 +466,7 @@ static int meson_i2c_probe(struct platfo
 
ret = i2c_add_adapter(>adap);
if (ret < 0) {
-   clk_unprepare(i2c->clk);
+   clk_disable_unprepare(i2c->clk);
return ret;
}
 
@@ -488,7 +484,7 @@ static int meson_i2c_remove(struct platf
struct meson_i2c *i2c = platform_get_drvdata(pdev);
 
i2c_del_adapter(>adap);
-   clk_unprepare(i2c->clk);
+   clk_disable_unprepare(i2c->clk);
 
return 0;
 }




[PATCH 5.8 032/124] tcp: use sendpage_ok() to detect misused .sendpage

2020-10-12 Thread Greg Kroah-Hartman
From: Coly Li 

commit cf83a17edeeb36195596d2dae060a7c381db35f1 upstream.

commit a10674bf2406 ("tcp: detecting the misuse of .sendpage for Slab
objects") adds the checks for Slab pages, but the pages don't have
page_count are still missing from the check.

Network layer's sendpage method is not designed to send page_count 0
pages neither, therefore both PageSlab() and page_count() should be
both checked for the sending page. This is exactly what sendpage_ok()
does.

This patch uses sendpage_ok() in do_tcp_sendpages() to detect misused
.sendpage, to make the code more robust.

Fixes: a10674bf2406 ("tcp: detecting the misuse of .sendpage for Slab objects")
Suggested-by: Eric Dumazet 
Signed-off-by: Coly Li 
Cc: Vasily Averin 
Cc: David S. Miller 
Cc: sta...@vger.kernel.org
Signed-off-by: David S. Miller 
Signed-off-by: Greg Kroah-Hartman 

---
 net/ipv4/tcp.c |3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

--- a/net/ipv4/tcp.c
+++ b/net/ipv4/tcp.c
@@ -970,7 +970,8 @@ ssize_t do_tcp_sendpages(struct sock *sk
long timeo = sock_sndtimeo(sk, flags & MSG_DONTWAIT);
 
if (IS_ENABLED(CONFIG_DEBUG_VM) &&
-   WARN_ONCE(PageSlab(page), "page must not be a Slab one"))
+   WARN_ONCE(!sendpage_ok(page),
+ "page must not be a Slab one and have page_count > 0"))
return -EINVAL;
 
/* Wait for a connection to finish. One exception is TCP Fast Open




[PATCH 5.8 042/124] team: set dev->needed_headroom in team_setup_by_port()

2020-10-12 Thread Greg Kroah-Hartman
From: Eric Dumazet 

commit 89d01748b2354e210b5d4ea47bc25a42a1b42c82 upstream.

Some devices set needed_headroom. If we ignore it, we might
end up crashing in various skb_push() for example in ipgre_header()
since some layers assume enough headroom has been reserved.

Fixes: 1d76efe1577b ("team: add support for non-ethernet devices")
Signed-off-by: Eric Dumazet 
Signed-off-by: David S. Miller 
Signed-off-by: Greg Kroah-Hartman 

---
 drivers/net/team/team.c |1 +
 1 file changed, 1 insertion(+)

--- a/drivers/net/team/team.c
+++ b/drivers/net/team/team.c
@@ -2112,6 +2112,7 @@ static void team_setup_by_port(struct ne
dev->header_ops = port_dev->header_ops;
dev->type = port_dev->type;
dev->hard_header_len = port_dev->hard_header_len;
+   dev->needed_headroom = port_dev->needed_headroom;
dev->addr_len = port_dev->addr_len;
dev->mtu = port_dev->mtu;
memcpy(dev->broadcast, port_dev->broadcast, port_dev->addr_len);




[Question] About z3fold page migration

2020-10-12 Thread linmiaohe
Hi Vitaly:

Many thanks for your brilliant z3fold code. I am reading it and have 
some questions about it. It's very nice of you if you can explain it for me.
1.page->private is used in z3fold but PagePrivate flag is never set, 
should we SetPagePrivate for it?
2.Since PagePrivate flag is never set, why we ClearPagePrivate in 
free_z3fold_page and z3fold_page_migrate?
3.Should we add page to the unbuddied list in z3fold_page_putback() 
when zhdr->refcount does not reach 0 since we remove it from unbuddied list
in z3fold_page_isolate? Or When we will add page to the unbuddied list after 
z3fold_page_putback?

Thanks a lot in advance. And waiting for your reply. :)


[PATCH 5.8 026/124] nvme-core: put ctrl ref when module ref get fail

2020-10-12 Thread Greg Kroah-Hartman
From: Chaitanya Kulkarni 

commit 4bab69093044ca81f394bd0780be1b71c5a4d308 upstream.

When try_module_get() fails in the nvme_dev_open() it returns without
releasing the ctrl reference which was taken earlier.

Put the ctrl reference which is taken before calling the
try_module_get() in the error return code path.

Fixes: 52a3974feb1a "nvme-core: get/put ctrl and transport module in 
nvme_dev_open/release()"
Signed-off-by: Chaitanya Kulkarni 
Reviewed-by: Logan Gunthorpe 
Signed-off-by: Christoph Hellwig 
Signed-off-by: Greg Kroah-Hartman 

---
 drivers/nvme/host/core.c |4 +++-
 1 file changed, 3 insertions(+), 1 deletion(-)

--- a/drivers/nvme/host/core.c
+++ b/drivers/nvme/host/core.c
@@ -3061,8 +3061,10 @@ static int nvme_dev_open(struct inode *i
}
 
nvme_get_ctrl(ctrl);
-   if (!try_module_get(ctrl->ops->module))
+   if (!try_module_get(ctrl->ops->module)) {
+   nvme_put_ctrl(ctrl);
return -EINVAL;
+   }
 
file->private_data = ctrl;
return 0;




[PATCH 5.8 028/124] RISC-V: Make sure memblock reserves the memory containing DT

2020-10-12 Thread Greg Kroah-Hartman
From: Atish Patra 

commit a78c6f5956a949b496a5b087188dde52483edf51 upstream.

Currently, the memory containing DT is not reserved. Thus, that region
of memory can be reallocated or reused for other purposes. This may result
in  corrupted DT for nommu virt board in Qemu. We may not face any issue
in kendryte as DT is embedded in the kernel image for that.

Fixes: 6bd33e1ece52 ("riscv: add nommu support")
Cc: sta...@vger.kernel.org
Signed-off-by: Atish Patra 
Signed-off-by: Palmer Dabbelt 
Signed-off-by: Greg Kroah-Hartman 

---
 arch/riscv/mm/init.c |1 +
 1 file changed, 1 insertion(+)

--- a/arch/riscv/mm/init.c
+++ b/arch/riscv/mm/init.c
@@ -515,6 +515,7 @@ asmlinkage void __init setup_vm(uintptr_
 #else
dtb_early_va = (void *)dtb_pa;
 #endif
+   dtb_early_pa = dtb_pa;
 }
 
 static inline void setup_vm_final(void)




[PATCH 5.8 031/124] net: introduce helper sendpage_ok() in include/linux/net.h

2020-10-12 Thread Greg Kroah-Hartman
From: Coly Li 

commit c381b07941adc2274ce552daf86c94701c5e265a upstream.

The original problem was from nvme-over-tcp code, who mistakenly uses
kernel_sendpage() to send pages allocated by __get_free_pages() without
__GFP_COMP flag. Such pages don't have refcount (page_count is 0) on
tail pages, sending them by kernel_sendpage() may trigger a kernel panic
from a corrupted kernel heap, because these pages are incorrectly freed
in network stack as page_count 0 pages.

This patch introduces a helper sendpage_ok(), it returns true if the
checking page,
- is not slab page: PageSlab(page) is false.
- has page refcount: page_count(page) is not zero

All drivers who want to send page to remote end by kernel_sendpage()
may use this helper to check whether the page is OK. If the helper does
not return true, the driver should try other non sendpage method (e.g.
sock_no_sendpage()) to handle the page.

Signed-off-by: Coly Li 
Cc: Chaitanya Kulkarni 
Cc: Christoph Hellwig 
Cc: Hannes Reinecke 
Cc: Jan Kara 
Cc: Jens Axboe 
Cc: Mikhail Skorzhinskii 
Cc: Philipp Reisner 
Cc: Sagi Grimberg 
Cc: Vlastimil Babka 
Cc: sta...@vger.kernel.org
Signed-off-by: David S. Miller 
Signed-off-by: Greg Kroah-Hartman 

---
 include/linux/net.h |   16 
 1 file changed, 16 insertions(+)

--- a/include/linux/net.h
+++ b/include/linux/net.h
@@ -21,6 +21,7 @@
 #include 
 #include 
 #include 
+#include 
 
 #include 
 
@@ -290,6 +291,21 @@ do {   
\
 #define net_get_random_once_wait(buf, nbytes)  \
get_random_once_wait((buf), (nbytes))
 
+/*
+ * E.g. XFS meta- & log-data is in slab pages, or bcache meta
+ * data pages, or other high order pages allocated by
+ * __get_free_pages() without __GFP_COMP, which have a page_count
+ * of 0 and/or have PageSlab() set. We cannot use send_page for
+ * those, as that does get_page(); put_page(); and would cause
+ * either a VM_BUG directly, or __page_cache_release a page that
+ * would actually still be referenced by someone, leading to some
+ * obscure delayed Oops somewhere else.
+ */
+static inline bool sendpage_ok(struct page *page)
+{
+   return !PageSlab(page) && page_count(page) >= 1;
+}
+
 int kernel_sendmsg(struct socket *sock, struct msghdr *msg, struct kvec *vec,
   size_t num, size_t len);
 int kernel_sendmsg_locked(struct sock *sk, struct msghdr *msg,




[PATCH 5.8 030/124] mm/khugepaged: fix filemap page_to_pgoff(page) != offset

2020-10-12 Thread Greg Kroah-Hartman
From: Hugh Dickins 

commit 033b5d77551167f8c24ca862ce83d3e0745f9245 upstream.

There have been elusive reports of filemap_fault() hitting its
VM_BUG_ON_PAGE(page_to_pgoff(page) != offset, page) on kernels built
with CONFIG_READ_ONLY_THP_FOR_FS=y.

Suren has hit it on a kernel with CONFIG_READ_ONLY_THP_FOR_FS=y and
CONFIG_NUMA is not set: and he has analyzed it down to how khugepaged
without NUMA reuses the same huge page after collapse_file() failed
(whereas NUMA targets its allocation to the respective node each time).
And most of us were usually testing with CONFIG_NUMA=y kernels.

collapse_file(old start)
  new_page = khugepaged_alloc_page(hpage)
  __SetPageLocked(new_page)
  new_page->index = start // hpage->index=old offset
  new_page->mapping = mapping
  xas_store(, new_page)

  filemap_fault
page = find_get_page(mapping, offset)
// if offset falls inside hpage then
// compound_head(page) == hpage
lock_page_maybe_drop_mmap()
  __lock_page(page)

  // collapse fails
  xas_store(, old page)
  new_page->mapping = NULL
  unlock_page(new_page)

collapse_file(new start)
  new_page = khugepaged_alloc_page(hpage)
  __SetPageLocked(new_page)
  new_page->index = start // hpage->index=new offset
  new_page->mapping = mapping // mapping becomes valid again

// since compound_head(page) == hpage
// page_to_pgoff(page) got changed
VM_BUG_ON_PAGE(page_to_pgoff(page) != offset)

An initial patch replaced __SetPageLocked() by lock_page(), which did
fix the race which Suren illustrates above.  But testing showed that it's
not good enough: if the racing task's __lock_page() gets delayed long
after its find_get_page(), then it may follow collapse_file(new start)'s
successful final unlock_page(), and crash on the same VM_BUG_ON_PAGE.

It could be fixed by relaxing filemap_fault()'s VM_BUG_ON_PAGE to a
check and retry (as is done for mapping), with similar relaxations in
find_lock_entry() and pagecache_get_page(): but it's not obvious what
else might get caught out; and khugepaged non-NUMA appears to be unique
in exposing a page to page cache, then revoking, without going through
a full cycle of freeing before reuse.

Instead, non-NUMA khugepaged_prealloc_page() release the old page
if anyone else has a reference to it (1% of cases when I tested).

Although never reported on huge tmpfs, I believe its find_lock_entry()
has been at similar risk; but huge tmpfs does not rely on khugepaged
for its normal working nearly so much as READ_ONLY_THP_FOR_FS does.

Reported-by: Denis Lisov 
Bugzilla: https://bugzilla.kernel.org/show_bug.cgi?id=206569
Link: 
https://lore.kernel.org/linux-mm/?q=20200219144635.3b7417145de19b65f258c943%40linux-foundation.org
Reported-by: Qian Cai 
Link: https://lore.kernel.org/linux-xfs/?q=20200616013309.GB815%40lca.pw
Reported-and-analyzed-by: Suren Baghdasaryan 
Fixes: 87c460a0bded ("mm/khugepaged: collapse_shmem() without freezing 
new_page")
Signed-off-by: Hugh Dickins 
Cc: sta...@vger.kernel.org # v4.9+
Reviewed-by: Matthew Wilcox (Oracle) 
Signed-off-by: Linus Torvalds 
Signed-off-by: Greg Kroah-Hartman 

---
 mm/khugepaged.c |   12 
 1 file changed, 12 insertions(+)

--- a/mm/khugepaged.c
+++ b/mm/khugepaged.c
@@ -914,6 +914,18 @@ static struct page *khugepaged_alloc_hug
 
 static bool khugepaged_prealloc_page(struct page **hpage, bool *wait)
 {
+   /*
+* If the hpage allocated earlier was briefly exposed in page cache
+* before collapse_file() failed, it is possible that racing lookups
+* have not yet completed, and would then be unpleasantly surprised by
+* finding the hpage reused for the same mapping at a different offset.
+* Just release the previous allocation if there is any danger of that.
+*/
+   if (*hpage && page_count(*hpage) > 1) {
+   put_page(*hpage);
+   *hpage = NULL;
+   }
+
if (!*hpage)
*hpage = khugepaged_alloc_hugepage(wait);
 




[PATCH 5.8 007/124] exfat: fix use of uninitialized spinlock on error path

2020-10-12 Thread Greg Kroah-Hartman
From: Namjae Jeon 

commit 8ff006e57ad3a25f909c456d053aa498b6673a39 upstream.

syzbot reported warning message:

Call Trace:
 __dump_stack lib/dump_stack.c:77 [inline]
 dump_stack+0x1d6/0x29e lib/dump_stack.c:118
 register_lock_class+0xf06/0x1520 kernel/locking/lockdep.c:893
 __lock_acquire+0xfd/0x2ae0 kernel/locking/lockdep.c:4320
 lock_acquire+0x148/0x720 kernel/locking/lockdep.c:5029
 __raw_spin_lock include/linux/spinlock_api_smp.h:142 [inline]
 _raw_spin_lock+0x2a/0x40 kernel/locking/spinlock.c:151
 spin_lock include/linux/spinlock.h:354 [inline]
 exfat_cache_inval_inode+0x30/0x280 fs/exfat/cache.c:226
 exfat_evict_inode+0x124/0x270 fs/exfat/inode.c:660
 evict+0x2bb/0x6d0 fs/inode.c:576
 exfat_fill_super+0x1e07/0x27d0 fs/exfat/super.c:681
 get_tree_bdev+0x3e9/0x5f0 fs/super.c:1342
 vfs_get_tree+0x88/0x270 fs/super.c:1547
 do_new_mount fs/namespace.c:2875 [inline]
 path_mount+0x179d/0x29e0 fs/namespace.c:3192
 do_mount fs/namespace.c:3205 [inline]
 __do_sys_mount fs/namespace.c:3413 [inline]
 __se_sys_mount+0x126/0x180 fs/namespace.c:3390
 do_syscall_64+0x31/0x70 arch/x86/entry/common.c:46
 entry_SYSCALL_64_after_hwframe+0x44/0xa9

If exfat_read_root() returns an error, spinlock is used in
exfat_evict_inode() without initialization. This patch combines
exfat_cache_init_inode() with exfat_inode_init_once() to initialize
spinlock by slab constructor.

Fixes: c35b6810c495 ("exfat: add exfat cache")
Cc: sta...@vger.kernel.org # v5.7+
Reported-by: syzbot 
Signed-off-by: Namjae Jeon 
Signed-off-by: Greg Kroah-Hartman 

---
 fs/exfat/cache.c|   11 ---
 fs/exfat/exfat_fs.h |3 ++-
 fs/exfat/inode.c|2 --
 fs/exfat/super.c|5 -
 4 files changed, 6 insertions(+), 15 deletions(-)

--- a/fs/exfat/cache.c
+++ b/fs/exfat/cache.c
@@ -17,7 +17,6 @@
 #include "exfat_raw.h"
 #include "exfat_fs.h"
 
-#define EXFAT_CACHE_VALID  0
 #define EXFAT_MAX_CACHE16
 
 struct exfat_cache {
@@ -61,16 +60,6 @@ void exfat_cache_shutdown(void)
kmem_cache_destroy(exfat_cachep);
 }
 
-void exfat_cache_init_inode(struct inode *inode)
-{
-   struct exfat_inode_info *ei = EXFAT_I(inode);
-
-   spin_lock_init(>cache_lru_lock);
-   ei->nr_caches = 0;
-   ei->cache_valid_id = EXFAT_CACHE_VALID + 1;
-   INIT_LIST_HEAD(>cache_lru);
-}
-
 static inline struct exfat_cache *exfat_cache_alloc(void)
 {
return kmem_cache_alloc(exfat_cachep, GFP_NOFS);
--- a/fs/exfat/exfat_fs.h
+++ b/fs/exfat/exfat_fs.h
@@ -250,6 +250,8 @@ struct exfat_sb_info {
struct rcu_head rcu;
 };
 
+#define EXFAT_CACHE_VALID  0
+
 /*
  * EXFAT file system inode in-memory data
  */
@@ -429,7 +431,6 @@ extern const struct dentry_operations ex
 /* cache.c */
 int exfat_cache_init(void);
 void exfat_cache_shutdown(void);
-void exfat_cache_init_inode(struct inode *inode);
 void exfat_cache_inval_inode(struct inode *inode);
 int exfat_get_cluster(struct inode *inode, unsigned int cluster,
unsigned int *fclus, unsigned int *dclus,
--- a/fs/exfat/inode.c
+++ b/fs/exfat/inode.c
@@ -610,8 +610,6 @@ static int exfat_fill_inode(struct inode
ei->i_crtime = info->crtime;
inode->i_atime = info->atime;
 
-   exfat_cache_init_inode(inode);
-
return 0;
 }
 
--- a/fs/exfat/super.c
+++ b/fs/exfat/super.c
@@ -361,7 +361,6 @@ static int exfat_read_root(struct inode
inode->i_mtime = inode->i_atime = inode->i_ctime = ei->i_crtime =
current_time(inode);
exfat_truncate_atime(>i_atime);
-   exfat_cache_init_inode(inode);
return 0;
 }
 
@@ -747,6 +746,10 @@ static void exfat_inode_init_once(void *
 {
struct exfat_inode_info *ei = (struct exfat_inode_info *)foo;
 
+   spin_lock_init(>cache_lru_lock);
+   ei->nr_caches = 0;
+   ei->cache_valid_id = EXFAT_CACHE_VALID + 1;
+   INIT_LIST_HEAD(>cache_lru);
INIT_HLIST_NODE(>i_hash_fat);
inode_init_once(>vfs_inode);
 }




[PATCH 5.8 000/124] 5.8.15-rc1 review

2020-10-12 Thread Greg Kroah-Hartman
This is the start of the stable review cycle for the 5.8.15 release.
There are 124 patches in this series, all will be posted as a response
to this one.  If anyone has any issues with these being applied, please
let me know.

Responses should be made by Wed, 14 Oct 2020 13:31:22 +.
Anything received after that time might be too late.

The whole patch series can be found in one patch at:

https://www.kernel.org/pub/linux/kernel/v5.x/stable-review/patch-5.8.15-rc1.gz
or in the git tree and branch at:

git://git.kernel.org/pub/scm/linux/kernel/git/stable/linux-stable-rc.git 
linux-5.8.y
and the diffstat can be found below.

thanks,

greg k-h

-
Pseudo-Shortlog of commits:

Greg Kroah-Hartman 
Linux 5.8.15-rc1

Cong Wang 
net_sched: commit action insertions together

Cong Wang 
net_sched: defer tcf_idr_insert() in tcf_action_init_1()

Manivannan Sadhasivam 
net: qrtr: ns: Protect radix_tree_deref_slot() using rcu read locks

Anant Thazhemadam 
net: usb: rtl8150: set random MAC address when set_ethernet_addr() fails

Xiongfeng Wang 
Input: ati_remote2 - add missing newlines when printing module parameters

Alexey Kardashevskiy 
tty/vt: Do not warn when huge selection requested

Aya Levin 
net/mlx5e: Fix driver's declaration to support GRE offload

Rohit Maheshwari 
net/tls: race causes kernel panic

Nikolay Aleksandrov 
net: bridge: fdb: don't flush ext_learn entries

Guillaume Nault 
net/core: check length before updating Ethertype in skb_mpls_{push,pop}

Johannes Berg 
netlink: fix policy dump leak

Eric Dumazet 
tcp: fix receive window update in tcp_add_backlog()

Vijay Balakrishna 
mm: khugepaged: recalculate min_free_kbytes after memory hotplug as 
expected by khugepaged

Minchan Kim 
mm: validate inode in mapping_set_error()

Coly Li 
mmc: core: don't set limits.discard_granularity as 0

Kajol Jain 
perf: Fix task_function_call() error handling

David Howells 
afs: Fix deadlock between writeback and truncate

Vladimir Oltean 
net: mscc: ocelot: divide watermark value by 60 when writing to SYS_ATOP

Maxim Kochetkov 
net: mscc: ocelot: extend watermark encoding function

Vladimir Oltean 
net: mscc: ocelot: split writes to pause frame enable bit and to thresholds

Vladimir Oltean 
net: mscc: ocelot: rename ocelot_board.c to ocelot_vsc7514.c

David Howells 
rxrpc: Fix server keyring leak

David Howells 
rxrpc: The server keyring isn't network-namespaced

David Howells 
rxrpc: Fix some missing _bh annotations on locking conn->state_lock

David Howells 
rxrpc: Downgrade the BUG() for unsupported token type in rxrpc_read()

Marc Dionne 
rxrpc: Fix rxkad token xdr encoding

Tom Rix 
net: mvneta: fix double free of txq->buf

Si-Wei Liu 
vhost-vdpa: fix page pinning leakage in error path

Si-Wei Liu 
vhost-vdpa: fix vhost_vdpa_map() on error condition

Randy Dunlap 
net: hinic: fix DEVLINK build errors

Vineetha G. Jaya Kumaran 
net: stmmac: Modify configuration method of EEE timers

Vlad Buslov 
net/mlx5e: Fix race condition on nhe->n pointer in neigh update

Aya Levin 
net/mlx5e: Fix VLAN create flow

Aya Levin 
net/mlx5e: Fix VLAN cleanup flow

Aya Levin 
net/mlx5e: Fix return status when setting unsupported FEC mode

Aya Levin 
net/mlx5e: Add resiliency in Striding RQ mode for packets larger than MTU

Maor Gottlieb 
net/mlx5: Fix request_irqs error flow

Eran Ben Elisha 
net/mlx5: Add retry mechanism to the command entry index allocation

Eran Ben Elisha 
net/mlx5: poll cmd EQ in case of command timeout

Eran Ben Elisha 
net/mlx5: Avoid possible free of command entry while timeout comp handler

Eran Ben Elisha 
net/mlx5: Fix a race when moving command interface to polling mode

Qian Cai 
pipe: Fix memory leaks in create_pipe_files()

Hariprasad Kelam 
octeontx2-pf: Fix synchnorization issue in mbox

Hariprasad Kelam 
octeontx2-pf: Fix the device state on error

Geetha sowjanya 
octeontx2-pf: Fix TCP/UDP checksum offload for IPv6 frames

Subbaraya Sundeep 
octeontx2-af: Fix enable/disable of default NPC entries

Willy Liu 
net: phy: realtek: fix rtl8211e rx/tx delay config

Tonghao Zhang 
virtio-net: don't disable guest csum when disable LRO

Wilken Gottwalt 
net: usb: ax88179_178a: fix missing stop entry in driver_info

Heiner Kallweit 
r8169: fix RTL8168f/RTL8411 EPHY config

Ido Schimmel 
mlxsw: spectrum_acl: Fix mlxsw_sp_acl_tcam_group_add()'s error path

Randy Dunlap 
mdio: fix mdio-thunder.c dependency & build error

Eric Dumazet 
bonding: set dev->needed_headroom in bond_setup_by_slave()

Ivan Khoronzhuk 
net: ethernet: cavium: octeon_mgmt: use phy_start and phy_stop

Wong Vee Khee 
net: stmmac: Fix clock handling on remove path

Ronak Doshi 
vmxnet3: fix cksum offload issues for non-udp tunnels

Jacob Keller 
ice: fix memory leak in 

[PATCH 5.8 029/124] gpiolib: Disable compat ->read() code in UML case

2020-10-12 Thread Greg Kroah-Hartman
From: Andy Shevchenko 

commit 47e538d86d5776ac8152146c3ed3d22326243190 upstream.

It appears that UML (arch/um) has no compat.h header defined and hence
can't compile a recently provided piece of code in GPIO library.

Disable compat ->read() code in UML case to avoid compilation errors.

While at it, use pattern which is already being used in the kernel elsewhere.

Fixes: 5ad284ab3a01 ("gpiolib: Fix line event handling in syscall compatible 
mode")
Reported-by: Geert Uytterhoeven 
Signed-off-by: Andy Shevchenko 
Link: 
https://lore.kernel.org/r/20201005131044.87276-1-andriy.shevche...@linux.intel.com
Signed-off-by: Linus Walleij 
Signed-off-by: Greg Kroah-Hartman 

---
 drivers/gpio/gpiolib.c |2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

--- a/drivers/gpio/gpiolib.c
+++ b/drivers/gpio/gpiolib.c
@@ -838,7 +838,7 @@ static __poll_t lineevent_poll(struct fi
 
 static ssize_t lineevent_get_size(void)
 {
-#ifdef __x86_64__
+#if defined(CONFIG_X86_64) && !defined(CONFIG_UML)
/* i386 has no padding after 'id' */
if (in_ia32_syscall()) {
struct compat_gpioeevent_data {




[PATCH 5.8 017/124] Platform: OLPC: Fix memleak in olpc_ec_probe

2020-10-12 Thread Greg Kroah-Hartman
From: Dinghao Liu 

commit 4fd9ac6bd3044734a7028bd993944c3617d1eede upstream.

When devm_regulator_register() fails, ec should be
freed just like when olpc_ec_cmd() fails.

Fixes: 231c0c216172a ("Platform: OLPC: Add a regulator for the DCON")
Signed-off-by: Dinghao Liu 
Signed-off-by: Andy Shevchenko 
Signed-off-by: Greg Kroah-Hartman 

---
 drivers/platform/olpc/olpc-ec.c |4 +++-
 1 file changed, 3 insertions(+), 1 deletion(-)

--- a/drivers/platform/olpc/olpc-ec.c
+++ b/drivers/platform/olpc/olpc-ec.c
@@ -439,7 +439,9 @@ static int olpc_ec_probe(struct platform
);
if (IS_ERR(ec->dcon_rdev)) {
dev_err(>dev, "failed to register DCON regulator\n");
-   return PTR_ERR(ec->dcon_rdev);
+   err = PTR_ERR(ec->dcon_rdev);
+   kfree(ec);
+   return err;
}
 
ec->dbgfs_dir = olpc_ec_setup_debugfs();




[PATCH 5.8 008/124] net: wireless: nl80211: fix out-of-bounds access in nl80211_del_key()

2020-10-12 Thread Greg Kroah-Hartman
From: Anant Thazhemadam 

commit 3dc289f8f139997f4e9d3cfccf8738f20d23e47b upstream.

In nl80211_parse_key(), key.idx is first initialized as -1.
If this value of key.idx remains unmodified and gets returned, and
nl80211_key_allowed() also returns 0, then rdev_del_key() gets called
with key.idx = -1.
This causes an out-of-bounds array access.

Handle this issue by checking if the value of key.idx after
nl80211_parse_key() is called and return -EINVAL if key.idx < 0.

Cc: sta...@vger.kernel.org
Reported-by: syzbot+b1bb342d1d097516c...@syzkaller.appspotmail.com
Tested-by: syzbot+b1bb342d1d097516c...@syzkaller.appspotmail.com
Signed-off-by: Anant Thazhemadam 
Link: 
https://lore.kernel.org/r/20201007035401.9522-1-anant.thazhema...@gmail.com
Signed-off-by: Johannes Berg 
Signed-off-by: Greg Kroah-Hartman 

---
 net/wireless/nl80211.c |3 +++
 1 file changed, 3 insertions(+)

--- a/net/wireless/nl80211.c
+++ b/net/wireless/nl80211.c
@@ -4172,6 +4172,9 @@ static int nl80211_del_key(struct sk_buf
if (err)
return err;
 
+   if (key.idx < 0)
+   return -EINVAL;
+
if (info->attrs[NL80211_ATTR_MAC])
mac_addr = nla_data(info->attrs[NL80211_ATTR_MAC]);
 




[PATCH 5.8 027/124] macsec: avoid use-after-free in macsec_handle_frame()

2020-10-12 Thread Greg Kroah-Hartman
From: Eric Dumazet 

commit c7cc9200e9b4a2ac172e990ef1975cd42975dad6 upstream.

De-referencing skb after call to gro_cells_receive() is not allowed.
We need to fetch skb->len earlier.

Fixes: 5491e7c6b1a9 ("macsec: enable GRO and RPS on macsec devices")
Signed-off-by: Eric Dumazet 
Cc: Paolo Abeni 
Acked-by: Paolo Abeni 
Signed-off-by: Jakub Kicinski 
Signed-off-by: Greg Kroah-Hartman 

---
 drivers/net/macsec.c |4 +++-
 1 file changed, 3 insertions(+), 1 deletion(-)

--- a/drivers/net/macsec.c
+++ b/drivers/net/macsec.c
@@ -1077,6 +1077,7 @@ static rx_handler_result_t macsec_handle
struct macsec_rx_sa *rx_sa;
struct macsec_rxh_data *rxd;
struct macsec_dev *macsec;
+   unsigned int len;
sci_t sci;
u32 hdr_pn;
bool cbit;
@@ -1232,9 +1233,10 @@ deliver:
macsec_rxsc_put(rx_sc);
 
skb_orphan(skb);
+   len = skb->len;
ret = gro_cells_receive(>gro_cells, skb);
if (ret == NET_RX_SUCCESS)
-   count_rx(dev, skb->len);
+   count_rx(dev, len);
else
macsec->secy.netdev->stats.rx_dropped++;
 




[PATCH 5.8 004/124] Revert "ravb: Fixed to be able to unload modules"

2020-10-12 Thread Greg Kroah-Hartman
From: Geert Uytterhoeven 

commit 77972b55fb9d35d4a6b0abca99abffaa4ec6a85b upstream.

This reverts commit 1838d6c62f57836639bd3d83e7855e0ee4f6defc.

This commit moved the ravb_mdio_init() call (and thus the
of_mdiobus_register() call) from the ravb_probe() to the ravb_open()
call.  This causes a regression during system resume (s2idle/s2ram), as
new PHY devices cannot be bound while suspended.

During boot, the Micrel PHY is detected like this:

Micrel KSZ9031 Gigabit PHY e680.ethernet-:00: attached PHY 
driver [Micrel KSZ9031 Gigabit PHY] 
(mii_bus:phy_addr=e680.ethernet-:00, irq=228)
ravb e680.ethernet eth0: Link is Up - 1Gbps/Full - flow control off

During system suspend, (A) defer_all_probes is set to true, and (B)
usermodehelper_disabled is set to UMH_DISABLED, to avoid drivers being
probed while suspended.

  A. If CONFIG_MODULES=n, phy_device_register() calling device_add()
 merely adds the device, but does not probe it yet, as
 really_probe() returns early due to defer_all_probes being set:

   dpm_resume+0x128/0x4f8
 device_resume+0xcc/0x1b0
   dpm_run_callback+0x74/0x340
 ravb_resume+0x190/0x1b8
   ravb_open+0x84/0x770
 of_mdiobus_register+0x1e0/0x468
   of_mdiobus_register_phy+0x1b8/0x250
 of_mdiobus_phy_device_register+0x178/0x1e8
   phy_device_register+0x114/0x1b8
 device_add+0x3d4/0x798
   bus_probe_device+0x98/0xa0
 device_initial_probe+0x10/0x18
   __device_attach+0xe4/0x140
 bus_for_each_drv+0x64/0xc8
   __device_attach_driver+0xb8/0xe0
 driver_probe_device.part.11+0xc4/0xd8
   really_probe+0x32c/0x3b8

 Later, phy_attach_direct() notices no PHY driver has been bound,
 and falls back to the Generic PHY, leading to degraded operation:

   Generic PHY e680.ethernet-:00: attached PHY driver [Generic 
PHY] (mii_bus:phy_addr=e680.ethernet-:00, irq=POLL)
   ravb e680.ethernet eth0: Link is Up - 1Gbps/Full - flow control off

  B. If CONFIG_MODULES=y, request_module() returns early with -EBUSY due
 to UMH_DISABLED, and MDIO initialization fails completely:

   mdio_bus e680.ethernet-:00: error -16 loading PHY driver 
module for ID 0x00221622
   ravb e680.ethernet eth0: failed to initialize MDIO
   PM: dpm_run_callback(): ravb_resume+0x0/0x1b8 returns -16
   PM: Device e680.ethernet failed to resume: error -16

 Ignoring -EBUSY in phy_request_driver_module(), like was done for
 -ENOENT in commit 21e194425abd65b5 ("net: phy: fix issue with loading
 PHY driver w/o initramfs"), would makes it fall back to the Generic
 PHY, like in the CONFIG_MODULES=n case.

Signed-off-by: Geert Uytterhoeven 
Cc: sta...@vger.kernel.org
Reviewed-by: Sergei Shtylyov 
Signed-off-by: David S. Miller 
Signed-off-by: Greg Kroah-Hartman 

---
 drivers/net/ethernet/renesas/ravb_main.c |  110 +++
 1 file changed, 55 insertions(+), 55 deletions(-)

--- a/drivers/net/ethernet/renesas/ravb_main.c
+++ b/drivers/net/ethernet/renesas/ravb_main.c
@@ -1342,51 +1342,6 @@ static inline int ravb_hook_irq(unsigned
return error;
 }
 
-/* MDIO bus init function */
-static int ravb_mdio_init(struct ravb_private *priv)
-{
-   struct platform_device *pdev = priv->pdev;
-   struct device *dev = >dev;
-   int error;
-
-   /* Bitbang init */
-   priv->mdiobb.ops = _ops;
-
-   /* MII controller setting */
-   priv->mii_bus = alloc_mdio_bitbang(>mdiobb);
-   if (!priv->mii_bus)
-   return -ENOMEM;
-
-   /* Hook up MII support for ethtool */
-   priv->mii_bus->name = "ravb_mii";
-   priv->mii_bus->parent = dev;
-   snprintf(priv->mii_bus->id, MII_BUS_ID_SIZE, "%s-%x",
-pdev->name, pdev->id);
-
-   /* Register MDIO bus */
-   error = of_mdiobus_register(priv->mii_bus, dev->of_node);
-   if (error)
-   goto out_free_bus;
-
-   return 0;
-
-out_free_bus:
-   free_mdio_bitbang(priv->mii_bus);
-   return error;
-}
-
-/* MDIO bus release function */
-static int ravb_mdio_release(struct ravb_private *priv)
-{
-   /* Unregister mdio bus */
-   mdiobus_unregister(priv->mii_bus);
-
-   /* Free bitbang info */
-   free_mdio_bitbang(priv->mii_bus);
-
-   return 0;
-}
-
 /* Network device open function for Ethernet AVB */
 static int ravb_open(struct net_device *ndev)
 {
@@ -1395,13 +1350,6 @@ static int ravb_open(struct net_device *
struct device *dev = >dev;
int error;
 
-   /* MDIO bus init */
-   error = ravb_mdio_init(priv);
-   if (error) {
-   

[PATCH 5.8 006/124] crypto: arm64: Use x16 with indirect branch to bti_c

2020-10-12 Thread Greg Kroah-Hartman
From: Jeremy Linton 

commit 39e4716caa598a07a98598b2e7cd03055ce25fb9 upstream.

The AES code uses a 'br x7' as part of a function called by
a macro. That branch needs a bti_j as a target. This results
in a panic as seen below. Using x16 (or x17) with an indirect
branch keeps the target bti_c.

  Bad mode in Synchronous Abort handler detected on CPU1, code 0x3403 -- BTI
  CPU: 1 PID: 265 Comm: cryptomgr_test Not tainted 5.8.11-300.fc33.aarch64 #1
  pstate: 20400c05 (nzCv daif +PAN -UAO BTYPE=j-)
  pc : aesbs_encrypt8+0x0/0x5f0 [aes_neon_bs]
  lr : aesbs_xts_encrypt+0x48/0xe0 [aes_neon_bs]
  sp : 80001052b730

  aesbs_encrypt8+0x0/0x5f0 [aes_neon_bs]
   __xts_crypt+0xb0/0x2dc [aes_neon_bs]
   xts_encrypt+0x28/0x3c [aes_neon_bs]
  crypto_skcipher_encrypt+0x50/0x84
  simd_skcipher_encrypt+0xc8/0xe0
  crypto_skcipher_encrypt+0x50/0x84
  test_skcipher_vec_cfg+0x224/0x5f0
  test_skcipher+0xbc/0x120
  alg_test_skcipher+0xa0/0x1b0
  alg_test+0x3dc/0x47c
  cryptomgr_test+0x38/0x60

Fixes: 0e89640b640d ("crypto: arm64 - Use modern annotations for assembly 
functions")
Cc:  # 5.6.x-
Signed-off-by: Jeremy Linton 
Suggested-by: Dave P Martin 
Reviewed-by: Ard Biesheuvel 
Reviewed-by: Mark Brown 
Link: https://lore.kernel.org/r/20201006163326.2780619-1-jeremy.lin...@arm.com
Signed-off-by: Catalin Marinas 
Signed-off-by: Greg Kroah-Hartman 

---
 arch/arm64/crypto/aes-neonbs-core.S |4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

--- a/arch/arm64/crypto/aes-neonbs-core.S
+++ b/arch/arm64/crypto/aes-neonbs-core.S
@@ -788,7 +788,7 @@ SYM_FUNC_START_LOCAL(__xts_crypt8)
 
 0: mov bskey, x21
mov rounds, x22
-   br  x7
+   br  x16
 SYM_FUNC_END(__xts_crypt8)
 
.macro  __xts_crypt, do8, o0, o1, o2, o3, o4, o5, o6, o7
@@ -806,7 +806,7 @@ SYM_FUNC_END(__xts_crypt8)
uzp1v30.4s, v30.4s, v25.4s
ld1 {v25.16b}, [x24]
 
-99:adr x7, \do8
+99:adr x16, \do8
bl  __xts_crypt8
 
ldp q16, q17, [sp, #.Lframe_local_offset]




[PATCH 5.8 005/124] bpf: Fix scalar32_min_max_or bounds tracking

2020-10-12 Thread Greg Kroah-Hartman
From: Daniel Borkmann 

commit 5b9fbeb75b6a98955f628e205ac26689bcb1383e upstream.

Simon reported an issue with the current scalar32_min_max_or() implementation.
That is, compared to the other 32 bit subreg tracking functions, the code in
scalar32_min_max_or() stands out that it's using the 64 bit registers instead
of 32 bit ones. This leads to bounds tracking issues, for example:

  [...]
  8: R0=map_value(id=0,off=0,ks=4,vs=48,imm=0) R10=fp0 fp-8=
  8: (79) r1 = *(u64 *)(r0 +0)
   R0=map_value(id=0,off=0,ks=4,vs=48,imm=0) R10=fp0 fp-8=
  9: R0=map_value(id=0,off=0,ks=4,vs=48,imm=0) R1_w=inv(id=0) R10=fp0 
fp-8=
  9: (b7) r0 = 1
  10: R0_w=inv1 R1_w=inv(id=0) R10=fp0 fp-8=
  10: (18) r2 = 0x60002
  12: R0_w=inv1 R1_w=inv(id=0) R2_w=inv25769803778 R10=fp0 fp-8=
  12: (ad) if r1 < r2 goto pc+1
   R0_w=inv1 R1_w=inv(id=0,umin_value=25769803778) R2_w=inv25769803778 R10=fp0 
fp-8=
  13: R0_w=inv1 R1_w=inv(id=0,umin_value=25769803778) R2_w=inv25769803778 
R10=fp0 fp-8=
  13: (95) exit
  14: R0_w=inv1 R1_w=inv(id=0,umax_value=25769803777,var_off=(0x0; 
0x7)) R2_w=inv25769803778 R10=fp0 fp-8=
  14: (25) if r1 > 0x0 goto pc+1
   R0_w=inv1 R1_w=inv(id=0,umax_value=0,var_off=(0x0; 
0x7fff),u32_max_value=2147483647) R2_w=inv25769803778 R10=fp0 fp-8=
  15: R0_w=inv1 R1_w=inv(id=0,umax_value=0,var_off=(0x0; 
0x7fff),u32_max_value=2147483647) R2_w=inv25769803778 R10=fp0 fp-8=
  15: (95) exit
  16: R0_w=inv1 R1_w=inv(id=0,umin_value=1,umax_value=25769803777,var_off=(0x0; 
0x77fff),u32_max_value=2147483647) R2_w=inv25769803778 R10=fp0 fp-8=
  16: (47) r1 |= 0
  17: R0_w=inv1 R1_w=inv(id=0,umin_value=1,umax_value=32212254719,var_off=(0x1; 
0x7),s32_max_value=1,u32_max_value=1) R2_w=inv25769803778 R10=fp0 
fp-8=
  [...]

The bound tests on the map value force the upper unsigned bound to be 
25769803777
in 64 bit (0b111) and then lower one to be 1. By
using OR they are truncated and thus result in the range [1,1] for the 32 bit 
reg
tracker. This is incorrect given the only thing we know is that the value must 
be
positive and thus 2147483647 (0b111) at max for the
subregs. Fix it by using the {u,s}32_{min,max}_value vars instead. This also 
makes
sense, for example, for the case where we update dst_reg->s32_{min,max}_value in
the else branch we need to use the newly computed dst_reg->u32_{min,max}_value 
as
we know that these are positive. Previously, in the else branch the 64 bit 
values
of umin_value=1 and umax_value=32212254719 were used and latter got truncated to
be 1 as upper bound there. After the fix the subreg range is now correct:

  [...]
  8: R0=map_value(id=0,off=0,ks=4,vs=48,imm=0) R10=fp0 fp-8=
  8: (79) r1 = *(u64 *)(r0 +0)
   R0=map_value(id=0,off=0,ks=4,vs=48,imm=0) R10=fp0 fp-8=
  9: R0=map_value(id=0,off=0,ks=4,vs=48,imm=0) R1_w=inv(id=0) R10=fp0 
fp-8=
  9: (b7) r0 = 1
  10: R0_w=inv1 R1_w=inv(id=0) R10=fp0 fp-8=
  10: (18) r2 = 0x60002
  12: R0_w=inv1 R1_w=inv(id=0) R2_w=inv25769803778 R10=fp0 fp-8=
  12: (ad) if r1 < r2 goto pc+1
   R0_w=inv1 R1_w=inv(id=0,umin_value=25769803778) R2_w=inv25769803778 R10=fp0 
fp-8=
  13: R0_w=inv1 R1_w=inv(id=0,umin_value=25769803778) R2_w=inv25769803778 
R10=fp0 fp-8=
  13: (95) exit
  14: R0_w=inv1 R1_w=inv(id=0,umax_value=25769803777,var_off=(0x0; 
0x7)) R2_w=inv25769803778 R10=fp0 fp-8=
  14: (25) if r1 > 0x0 goto pc+1
   R0_w=inv1 R1_w=inv(id=0,umax_value=0,var_off=(0x0; 
0x7fff),u32_max_value=2147483647) R2_w=inv25769803778 R10=fp0 fp-8=
  15: R0_w=inv1 R1_w=inv(id=0,umax_value=0,var_off=(0x0; 
0x7fff),u32_max_value=2147483647) R2_w=inv25769803778 R10=fp0 fp-8=
  15: (95) exit
  16: R0_w=inv1 R1_w=inv(id=0,umin_value=1,umax_value=25769803777,var_off=(0x0; 
0x77fff),u32_max_value=2147483647) R2_w=inv25769803778 R10=fp0 fp-8=
  16: (47) r1 |= 0
  17: R0_w=inv1 R1_w=inv(id=0,umin_value=1,umax_value=32212254719,var_off=(0x0; 
0x77fff),u32_max_value=2147483647) R2_w=inv25769803778 R10=fp0 fp-8=
  [...]

Fixes: 3f50f132d840 ("bpf: Verifier, do explicit ALU32 bounds tracking")
Reported-by: Simon Scannell 
Signed-off-by: Daniel Borkmann 
Reviewed-by: John Fastabend 
Acked-by: Alexei Starovoitov 
Signed-off-by: Greg Kroah-Hartman 

---
 kernel/bpf/verifier.c |8 
 1 file changed, 4 insertions(+), 4 deletions(-)

--- a/kernel/bpf/verifier.c
+++ b/kernel/bpf/verifier.c
@@ -5490,8 +5490,8 @@ static void scalar32_min_max_or(struct b
bool src_known = tnum_subreg_is_const(src_reg->var_off);
bool dst_known = tnum_subreg_is_const(dst_reg->var_off);
struct tnum var32_off = tnum_subreg(dst_reg->var_off);
-   s32 smin_val = src_reg->smin_value;
-   u32 umin_val = src_reg->umin_value;
+   s32 smin_val = src_reg->s32_min_value;
+   

<    3   4   5   6   7   8   9   10   11   12   >