[PATCH 5.8 048/124] drm/amd/pm: Removed fixed clock in auto mode DPM
From: Sudheesh Mavila [ Upstream commit 97cf32996c46d9935cc133d910a75fb687dd6144 ] SMU10_UMD_PSTATE_PEAK_FCLK value should not be used to set the DPM. Suggested-by: Evan Quan Reviewed-by: Evan Quan Signed-off-by: Sudheesh Mavila Signed-off-by: Alex Deucher Signed-off-by: Sasha Levin --- drivers/gpu/drm/amd/powerplay/hwmgr/smu10_hwmgr.c | 10 ++ 1 file changed, 6 insertions(+), 4 deletions(-) diff --git a/drivers/gpu/drm/amd/powerplay/hwmgr/smu10_hwmgr.c b/drivers/gpu/drm/amd/powerplay/hwmgr/smu10_hwmgr.c index 9ee8cf8267c88..43f7adff6cb74 100644 --- a/drivers/gpu/drm/amd/powerplay/hwmgr/smu10_hwmgr.c +++ b/drivers/gpu/drm/amd/powerplay/hwmgr/smu10_hwmgr.c @@ -563,6 +563,8 @@ static int smu10_dpm_force_dpm_level(struct pp_hwmgr *hwmgr, struct smu10_hwmgr *data = hwmgr->backend; uint32_t min_sclk = hwmgr->display_config->min_core_set_clock; uint32_t min_mclk = hwmgr->display_config->min_mem_set_clock/100; + uint32_t index_fclk = data->clock_vol_info.vdd_dep_on_fclk->count - 1; + uint32_t index_socclk = data->clock_vol_info.vdd_dep_on_socclk->count - 1; if (hwmgr->smu_version < 0x1E3700) { pr_info("smu firmware version too old, can not set dpm level\n"); @@ -676,13 +678,13 @@ static int smu10_dpm_force_dpm_level(struct pp_hwmgr *hwmgr, smum_send_msg_to_smc_with_parameter(hwmgr, PPSMC_MSG_SetHardMinFclkByFreq, hwmgr->display_config->num_display > 3 ? - SMU10_UMD_PSTATE_PEAK_FCLK : + data->clock_vol_info.vdd_dep_on_fclk->entries[0].clk : min_mclk, NULL); smum_send_msg_to_smc_with_parameter(hwmgr, PPSMC_MSG_SetHardMinSocclkByFreq, - SMU10_UMD_PSTATE_MIN_SOCCLK, + data->clock_vol_info.vdd_dep_on_socclk->entries[0].clk, NULL); smum_send_msg_to_smc_with_parameter(hwmgr, PPSMC_MSG_SetHardMinVcn, @@ -695,11 +697,11 @@ static int smu10_dpm_force_dpm_level(struct pp_hwmgr *hwmgr, NULL); smum_send_msg_to_smc_with_parameter(hwmgr, PPSMC_MSG_SetSoftMaxFclkByFreq, - SMU10_UMD_PSTATE_PEAK_FCLK, + data->clock_vol_info.vdd_dep_on_fclk->entries[index_fclk].clk, NULL); smum_send_msg_to_smc_with_parameter(hwmgr, PPSMC_MSG_SetSoftMaxSocclkByFreq, - SMU10_UMD_PSTATE_PEAK_SOCCLK, + data->clock_vol_info.vdd_dep_on_socclk->entries[index_socclk].clk, NULL); smum_send_msg_to_smc_with_parameter(hwmgr, PPSMC_MSG_SetSoftMaxVcn, -- 2.25.1
[PATCH 5.8 047/124] io_uring: fix potential ABBA deadlock in ->show_fdinfo()
From: Jens Axboe [ Upstream commit fad8e0de4426a776c9bcb060555e7c09e2d08db6 ] syzbot reports a potential lock deadlock between the normal IO path and ->show_fdinfo(): == WARNING: possible circular locking dependency detected 5.9.0-rc6-syzkaller #0 Not tainted -- syz-executor.2/19710 is trying to acquire lock: 888098ddc450 (sb_writers#4){.+.+}-{0:0}, at: io_write+0x6b5/0xb30 fs/io_uring.c:3296 but task is already holding lock: 8880a11b8428 (>uring_lock){+.+.}-{3:3}, at: __do_sys_io_uring_enter+0xe9a/0x1bd0 fs/io_uring.c:8348 which lock already depends on the new lock. the existing dependency chain (in reverse order) is: -> #2 (>uring_lock){+.+.}-{3:3}: __mutex_lock_common kernel/locking/mutex.c:956 [inline] __mutex_lock+0x134/0x10e0 kernel/locking/mutex.c:1103 __io_uring_show_fdinfo fs/io_uring.c:8417 [inline] io_uring_show_fdinfo+0x194/0xc70 fs/io_uring.c:8460 seq_show+0x4a8/0x700 fs/proc/fd.c:65 seq_read+0x432/0x1070 fs/seq_file.c:208 do_loop_readv_writev fs/read_write.c:734 [inline] do_loop_readv_writev fs/read_write.c:721 [inline] do_iter_read+0x48e/0x6e0 fs/read_write.c:955 vfs_readv+0xe5/0x150 fs/read_write.c:1073 kernel_readv fs/splice.c:355 [inline] default_file_splice_read.constprop.0+0x4e6/0x9e0 fs/splice.c:412 do_splice_to+0x137/0x170 fs/splice.c:871 splice_direct_to_actor+0x307/0x980 fs/splice.c:950 do_splice_direct+0x1b3/0x280 fs/splice.c:1059 do_sendfile+0x55f/0xd40 fs/read_write.c:1540 __do_sys_sendfile64 fs/read_write.c:1601 [inline] __se_sys_sendfile64 fs/read_write.c:1587 [inline] __x64_sys_sendfile64+0x1cc/0x210 fs/read_write.c:1587 do_syscall_64+0x2d/0x70 arch/x86/entry/common.c:46 entry_SYSCALL_64_after_hwframe+0x44/0xa9 -> #1 (>lock){+.+.}-{3:3}: __mutex_lock_common kernel/locking/mutex.c:956 [inline] __mutex_lock+0x134/0x10e0 kernel/locking/mutex.c:1103 seq_read+0x61/0x1070 fs/seq_file.c:155 pde_read fs/proc/inode.c:306 [inline] proc_reg_read+0x221/0x300 fs/proc/inode.c:318 do_loop_readv_writev fs/read_write.c:734 [inline] do_loop_readv_writev fs/read_write.c:721 [inline] do_iter_read+0x48e/0x6e0 fs/read_write.c:955 vfs_readv+0xe5/0x150 fs/read_write.c:1073 kernel_readv fs/splice.c:355 [inline] default_file_splice_read.constprop.0+0x4e6/0x9e0 fs/splice.c:412 do_splice_to+0x137/0x170 fs/splice.c:871 splice_direct_to_actor+0x307/0x980 fs/splice.c:950 do_splice_direct+0x1b3/0x280 fs/splice.c:1059 do_sendfile+0x55f/0xd40 fs/read_write.c:1540 __do_sys_sendfile64 fs/read_write.c:1601 [inline] __se_sys_sendfile64 fs/read_write.c:1587 [inline] __x64_sys_sendfile64+0x1cc/0x210 fs/read_write.c:1587 do_syscall_64+0x2d/0x70 arch/x86/entry/common.c:46 entry_SYSCALL_64_after_hwframe+0x44/0xa9 -> #0 (sb_writers#4){.+.+}-{0:0}: check_prev_add kernel/locking/lockdep.c:2496 [inline] check_prevs_add kernel/locking/lockdep.c:2601 [inline] validate_chain kernel/locking/lockdep.c:3218 [inline] __lock_acquire+0x2a96/0x5780 kernel/locking/lockdep.c:4441 lock_acquire+0x1f3/0xaf0 kernel/locking/lockdep.c:5029 percpu_down_read include/linux/percpu-rwsem.h:51 [inline] __sb_start_write+0x228/0x450 fs/super.c:1672 io_write+0x6b5/0xb30 fs/io_uring.c:3296 io_issue_sqe+0x18f/0x5c50 fs/io_uring.c:5719 __io_queue_sqe+0x280/0x1160 fs/io_uring.c:6175 io_queue_sqe+0x692/0xfa0 fs/io_uring.c:6254 io_submit_sqe fs/io_uring.c:6324 [inline] io_submit_sqes+0x1761/0x2400 fs/io_uring.c:6521 __do_sys_io_uring_enter+0xeac/0x1bd0 fs/io_uring.c:8349 do_syscall_64+0x2d/0x70 arch/x86/entry/common.c:46 entry_SYSCALL_64_after_hwframe+0x44/0xa9 other info that might help us debug this: Chain exists of: sb_writers#4 --> >lock --> >uring_lock Possible unsafe locking scenario: CPU0CPU1 lock(>uring_lock); lock(>lock); lock(>uring_lock); lock(sb_writers#4); *** DEADLOCK *** 1 lock held by syz-executor.2/19710: #0: 8880a11b8428 (>uring_lock){+.+.}-{3:3}, at: __do_sys_io_uring_enter+0xe9a/0x1bd0 fs/io_uring.c:8348 stack backtrace: CPU: 0 PID: 19710 Comm: syz-executor.2 Not tainted 5.9.0-rc6-syzkaller #0 Hardware name: Google Google Compute Engine/Google Compute Engine, BIOS Google 01/01/2011 Call Trace: __dump_stack lib/dump_stack.c:77 [inline] dump_stack+0x198/0x1fd lib/dump_stack.c:118 check_noncircular+0x324/0x3e0 kernel/locking/lockdep.c:1827 check_prev_add kernel/locking/lockdep.c:2496 [inline] check_prevs_add kernel/locking/lockdep.c:2601 [inline] validate_chain
[PATCH 5.8 075/124] r8169: fix RTL8168f/RTL8411 EPHY config
From: Heiner Kallweit [ Upstream commit 709a16be0593c08190982cfbdca6df95e6d5823b ] Mistakenly bit 2 was set instead of bit 3 as in the vendor driver. Fixes: a7a92cf81589 ("r8169: sync PCIe PHY init with vendor driver 8.047.01") Signed-off-by: Heiner Kallweit Signed-off-by: David S. Miller Signed-off-by: Sasha Levin --- drivers/net/ethernet/realtek/r8169_main.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/drivers/net/ethernet/realtek/r8169_main.c b/drivers/net/ethernet/realtek/r8169_main.c index a5d54fa012213..fe173ea894e2c 100644 --- a/drivers/net/ethernet/realtek/r8169_main.c +++ b/drivers/net/ethernet/realtek/r8169_main.c @@ -2958,7 +2958,7 @@ static void rtl_hw_start_8168f_1(struct rtl8169_private *tp) { 0x08, 0x0001, 0x0002 }, { 0x09, 0x, 0x0080 }, { 0x19, 0x, 0x0224 }, - { 0x00, 0x, 0x0004 }, + { 0x00, 0x, 0x0008 }, { 0x0c, 0x3df0, 0x0200 }, }; @@ -2975,7 +2975,7 @@ static void rtl_hw_start_8411(struct rtl8169_private *tp) { 0x06, 0x00c0, 0x0020 }, { 0x0f, 0x, 0x5200 }, { 0x19, 0x, 0x0224 }, - { 0x00, 0x, 0x0004 }, + { 0x00, 0x, 0x0008 }, { 0x0c, 0x3df0, 0x0200 }, }; -- 2.25.1
[PATCH 5.8 082/124] octeontx2-pf: Fix synchnorization issue in mbox
From: Hariprasad Kelam [ Upstream commit 66a5209b53418111757716d71e52727b782eabd4 ] Mbox implementation in octeontx2 driver has three states alloc, send and reset in mbox response. VF allocate and sends message to PF for processing, PF ACKs them back and reset the mbox memory. In some case we see synchronization issue where after msgs_acked is incremented and before mbox_reset API is called, if current execution is scheduled out and a different thread is scheduled in which checks for msgs_acked. Since the new thread sees msgs_acked == msgs_sent it will try to allocate a new message and to send a new mbox message to PF.Now if mbox_reset is scheduled in, PF will see '0' in msgs_send. This patch fixes the issue by calling mbox_reset before incrementing msgs_acked flag for last processing message and checks for valid message size. Fixes: d424b6c02 ("octeontx2-pf: Enable SRIOV and added VF mbox handling") Signed-off-by: Hariprasad Kelam Signed-off-by: Geetha sowjanya Signed-off-by: Sunil Goutham Signed-off-by: David S. Miller Signed-off-by: Sasha Levin --- drivers/net/ethernet/marvell/octeontx2/af/mbox.c | 12 ++-- drivers/net/ethernet/marvell/octeontx2/af/mbox.h | 1 + drivers/net/ethernet/marvell/octeontx2/nic/otx2_pf.c | 11 ++- drivers/net/ethernet/marvell/octeontx2/nic/otx2_vf.c | 4 ++-- 4 files changed, 19 insertions(+), 9 deletions(-) diff --git a/drivers/net/ethernet/marvell/octeontx2/af/mbox.c b/drivers/net/ethernet/marvell/octeontx2/af/mbox.c index 387e33fa417aa..2718fe201c147 100644 --- a/drivers/net/ethernet/marvell/octeontx2/af/mbox.c +++ b/drivers/net/ethernet/marvell/octeontx2/af/mbox.c @@ -17,7 +17,7 @@ static const u16 msgs_offset = ALIGN(sizeof(struct mbox_hdr), MBOX_MSG_ALIGN); -void otx2_mbox_reset(struct otx2_mbox *mbox, int devid) +void __otx2_mbox_reset(struct otx2_mbox *mbox, int devid) { void *hw_mbase = mbox->hwbase + (devid * MBOX_SIZE); struct otx2_mbox_dev *mdev = >dev[devid]; @@ -26,13 +26,21 @@ void otx2_mbox_reset(struct otx2_mbox *mbox, int devid) tx_hdr = hw_mbase + mbox->tx_start; rx_hdr = hw_mbase + mbox->rx_start; - spin_lock(>mbox_lock); mdev->msg_size = 0; mdev->rsp_size = 0; tx_hdr->num_msgs = 0; tx_hdr->msg_size = 0; rx_hdr->num_msgs = 0; rx_hdr->msg_size = 0; +} +EXPORT_SYMBOL(__otx2_mbox_reset); + +void otx2_mbox_reset(struct otx2_mbox *mbox, int devid) +{ + struct otx2_mbox_dev *mdev = >dev[devid]; + + spin_lock(>mbox_lock); + __otx2_mbox_reset(mbox, devid); spin_unlock(>mbox_lock); } EXPORT_SYMBOL(otx2_mbox_reset); diff --git a/drivers/net/ethernet/marvell/octeontx2/af/mbox.h b/drivers/net/ethernet/marvell/octeontx2/af/mbox.h index 6dfd0f90cd704..ab433789d2c31 100644 --- a/drivers/net/ethernet/marvell/octeontx2/af/mbox.h +++ b/drivers/net/ethernet/marvell/octeontx2/af/mbox.h @@ -93,6 +93,7 @@ struct mbox_msghdr { }; void otx2_mbox_reset(struct otx2_mbox *mbox, int devid); +void __otx2_mbox_reset(struct otx2_mbox *mbox, int devid); void otx2_mbox_destroy(struct otx2_mbox *mbox); int otx2_mbox_init(struct otx2_mbox *mbox, void __force *hwbase, struct pci_dev *pdev, void __force *reg_base, diff --git a/drivers/net/ethernet/marvell/octeontx2/nic/otx2_pf.c b/drivers/net/ethernet/marvell/octeontx2/nic/otx2_pf.c index 5d620a39ea802..2fb45670aca49 100644 --- a/drivers/net/ethernet/marvell/octeontx2/nic/otx2_pf.c +++ b/drivers/net/ethernet/marvell/octeontx2/nic/otx2_pf.c @@ -370,8 +370,8 @@ static int otx2_forward_vf_mbox_msgs(struct otx2_nic *pf, dst_mbox = >mbox; dst_size = dst_mbox->mbox.tx_size - ALIGN(sizeof(*mbox_hdr), MBOX_MSG_ALIGN); - /* Check if msgs fit into destination area */ - if (mbox_hdr->msg_size > dst_size) + /* Check if msgs fit into destination area and has valid size */ + if (mbox_hdr->msg_size > dst_size || !mbox_hdr->msg_size) return -EINVAL; dst_mdev = _mbox->mbox.dev[0]; @@ -526,10 +526,10 @@ static void otx2_pfvf_mbox_up_handler(struct work_struct *work) end: offset = mbox->rx_start + msg->next_msgoff; + if (mdev->msgs_acked == (vf_mbox->up_num_msgs - 1)) + __otx2_mbox_reset(mbox, 0); mdev->msgs_acked++; } - - otx2_mbox_reset(mbox, vf_idx); } static irqreturn_t otx2_pfvf_mbox_intr_handler(int irq, void *pf_irq) @@ -803,10 +803,11 @@ static void otx2_pfaf_mbox_handler(struct work_struct *work) msg = (struct mbox_msghdr *)(mdev->mbase + offset); otx2_process_pfaf_mbox_msg(pf, msg); offset = mbox->rx_start + msg->next_msgoff; + if (mdev->msgs_acked == (af_mbox->num_msgs - 1)) + __otx2_mbox_reset(mbox, 0);
[PATCH 5.8 109/124] perf: Fix task_function_call() error handling
From: Kajol Jain [ Upstream commit 6d6b8b9f4fceab7266ca03d194f60ec72bd4b654 ] The error handling introduced by commit: 2ed6edd33a21 ("perf: Add cond_resched() to task_function_call()") looses any return value from smp_call_function_single() that is not {0, -EINVAL}. This is a problem because it will return -EXNIO when the target CPU is offline. Worse, in that case it'll turn into an infinite loop. Fixes: 2ed6edd33a21 ("perf: Add cond_resched() to task_function_call()") Reported-by: Srikar Dronamraju Signed-off-by: Kajol Jain Signed-off-by: Peter Zijlstra (Intel) Signed-off-by: Ingo Molnar Reviewed-by: Barret Rhoden Tested-by: Srikar Dronamraju Link: https://lkml.kernel.org/r/20200827064732.20860-1-kj...@linux.ibm.com Signed-off-by: Sasha Levin --- kernel/events/core.c | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/kernel/events/core.c b/kernel/events/core.c index 856d98c36f562..fd8cd00099dae 100644 --- a/kernel/events/core.c +++ b/kernel/events/core.c @@ -99,7 +99,7 @@ static void remote_function(void *data) * retry due to any failures in smp_call_function_single(), such as if the * task_cpu() goes offline concurrently. * - * returns @func return value or -ESRCH when the process isn't running + * returns @func return value or -ESRCH or -ENXIO when the process isn't running */ static int task_function_call(struct task_struct *p, remote_function_f func, void *info) @@ -115,7 +115,8 @@ task_function_call(struct task_struct *p, remote_function_f func, void *info) for (;;) { ret = smp_call_function_single(task_cpu(p), remote_function, , 1); - ret = !ret ? data.ret : -EAGAIN; + if (!ret) + ret = data.ret; if (ret != -EAGAIN) break; -- 2.25.1
[PATCH 5.8 059/124] platform/x86: fix kconfig dependency warning for LG_LAPTOP
From: Necip Fazil Yildiran [ Upstream commit 8f0c01e85c4d2e1a233e6f4d7ab16c9f8b2a ] When LG_LAPTOP is enabled and NEW_LEDS is disabled, it results in the following Kbuild warning: WARNING: unmet direct dependencies detected for LEDS_CLASS Depends on [n]: NEW_LEDS [=n] Selected by [y]: - LG_LAPTOP [=y] && X86 [=y] && X86_PLATFORM_DEVICES [=y] && ACPI [=y] && ACPI_WMI [=y] && INPUT [=y] The reason is that LG_LAPTOP selects LEDS_CLASS without depending on or selecting NEW_LEDS while LEDS_CLASS is subordinate to NEW_LEDS. Honor the kconfig menu hierarchy to remove kconfig dependency warnings. Fixes: dbf0c5a6b1f8 ("platform/x86: Add LG Gram laptop special features driver") Signed-off-by: Necip Fazil Yildiran Reviewed-by: Hans de Goede Acked-by: mark gross Signed-off-by: Andy Shevchenko Signed-off-by: Sasha Levin --- drivers/platform/x86/Kconfig | 1 + 1 file changed, 1 insertion(+) diff --git a/drivers/platform/x86/Kconfig b/drivers/platform/x86/Kconfig index 0581a54cf562f..e1668a9538c8f 100644 --- a/drivers/platform/x86/Kconfig +++ b/drivers/platform/x86/Kconfig @@ -1091,6 +1091,7 @@ config LG_LAPTOP depends on ACPI_WMI depends on INPUT select INPUT_SPARSEKMAP + select NEW_LEDS select LEDS_CLASS help This driver adds support for hotkeys as well as control of keyboard -- 2.25.1
[PATCH 5.8 046/124] btrfs: move btrfs_scratch_superblocks into btrfs_dev_replace_finishing
From: Josef Bacik [ Upstream commit 313b085851c13ca08320372a05a7047ea25d3dd4 ] We need to move the closing of the src_device out of all the device replace locking, but we definitely want to zero out the superblock before we commit the last time to make sure the device is properly removed. Handle this by pushing btrfs_scratch_superblocks into btrfs_dev_replace_finishing, and then later on we'll move the src_device closing and freeing stuff where we need it to be. Reviewed-by: Nikolay Borisov Signed-off-by: Josef Bacik Signed-off-by: David Sterba Signed-off-by: Sasha Levin --- fs/btrfs/dev-replace.c | 3 +++ fs/btrfs/volumes.c | 12 +++- fs/btrfs/volumes.h | 3 +++ 3 files changed, 9 insertions(+), 9 deletions(-) diff --git a/fs/btrfs/dev-replace.c b/fs/btrfs/dev-replace.c index eb86e4b88c73a..26c9da82e6a91 100644 --- a/fs/btrfs/dev-replace.c +++ b/fs/btrfs/dev-replace.c @@ -783,6 +783,9 @@ error: /* replace the sysfs entry */ btrfs_sysfs_remove_devices_dir(fs_info->fs_devices, src_device); btrfs_sysfs_update_devid(tgt_device); + if (test_bit(BTRFS_DEV_STATE_WRITEABLE, _device->dev_state)) + btrfs_scratch_superblocks(fs_info, src_device->bdev, + src_device->name->str); btrfs_rm_dev_replace_free_srcdev(src_device); /* write back the superblocks */ diff --git a/fs/btrfs/volumes.c b/fs/btrfs/volumes.c index 956eb0d6bc584..8b5f666a3ea66 100644 --- a/fs/btrfs/volumes.c +++ b/fs/btrfs/volumes.c @@ -1999,9 +1999,9 @@ static u64 btrfs_num_devices(struct btrfs_fs_info *fs_info) return num_devices; } -static void btrfs_scratch_superblocks(struct btrfs_fs_info *fs_info, - struct block_device *bdev, - const char *device_path) +void btrfs_scratch_superblocks(struct btrfs_fs_info *fs_info, + struct block_device *bdev, + const char *device_path) { struct btrfs_super_block *disk_super; int copy_num; @@ -2224,12 +2224,6 @@ void btrfs_rm_dev_replace_free_srcdev(struct btrfs_device *srcdev) struct btrfs_fs_info *fs_info = srcdev->fs_info; struct btrfs_fs_devices *fs_devices = srcdev->fs_devices; - if (test_bit(BTRFS_DEV_STATE_WRITEABLE, >dev_state)) { - /* zero out the old super if it is writable */ - btrfs_scratch_superblocks(fs_info, srcdev->bdev, - srcdev->name->str); - } - btrfs_close_bdev(srcdev); synchronize_rcu(); btrfs_free_device(srcdev); diff --git a/fs/btrfs/volumes.h b/fs/btrfs/volumes.h index 75af2334b2e37..83862e27f5663 100644 --- a/fs/btrfs/volumes.h +++ b/fs/btrfs/volumes.h @@ -573,6 +573,9 @@ void btrfs_set_fs_info_ptr(struct btrfs_fs_info *fs_info); void btrfs_reset_fs_info_ptr(struct btrfs_fs_info *fs_info); bool btrfs_check_rw_degradable(struct btrfs_fs_info *fs_info, struct btrfs_device *failing_dev); +void btrfs_scratch_superblocks(struct btrfs_fs_info *fs_info, + struct block_device *bdev, + const char *device_path); int btrfs_bg_type_to_factor(u64 flags); const char *btrfs_bg_type_to_raid_name(u64 flags); -- 2.25.1
[PATCH 5.8 089/124] net/mlx5e: Add resiliency in Striding RQ mode for packets larger than MTU
From: Aya Levin [ Upstream commit c3c9402373fe20e2d08c04f437ce4dcd252cffb2 ] Prior to this fix, in Striding RQ mode the driver was vulnerable when receiving packets in the range (stride size - headroom, stride size]. Where stride size is calculated by mtu+headroom+tailroom aligned to the closest power of 2. Usually, this filtering is performed by the HW, except for a few cases: - Between 2 VFs over the same PF with different MTUs - On bluefield, when the host physical function sets a larger MTU than the ARM has configured on its representor and uplink representor. When the HW filtering is not present, packets that are larger than MTU might be harmful for the RQ's integrity, in the following impacts: 1) Overflow from one WQE to the next, causing a memory corruption that in most cases is unharmful: as the write happens to the headroom of next packet, which will be overwritten by build_skb(). In very rare cases, high stress/load, this is harmful. When the next WQE is not yet reposted and points to existing SKB head. 2) Each oversize packet overflows to the headroom of the next WQE. On the last WQE of the WQ, where addresses wrap-around, the address of the remainder headroom does not belong to the next WQE, but it is out of the memory region range. This results in a HW CQE error that moves the RQ into an error state. Solution: Add a page buffer at the end of each WQE to absorb the leak. Actually the maximal overflow size is headroom but since all memory units must be of the same size, we use page size to comply with UMR WQEs. The increase in memory consumption is of a single page per RQ. Initialize the mkey with all MTTs pointing to a default page. When the channels are activated, UMR WQEs will redirect the RX WQEs to the actual memory from the RQ's pool, while the overflow MTTs remain mapped to the default page. Fixes: 73281b78a37a ("net/mlx5e: Derive Striding RQ size from MTU") Signed-off-by: Aya Levin Reviewed-by: Tariq Toukan Signed-off-by: Saeed Mahameed Signed-off-by: Sasha Levin --- drivers/net/ethernet/mellanox/mlx5/core/en.h | 8 ++- .../net/ethernet/mellanox/mlx5/core/en_main.c | 55 +-- 2 files changed, 58 insertions(+), 5 deletions(-) diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en.h b/drivers/net/ethernet/mellanox/mlx5/core/en.h index 76b23ba7a4687..cb3857e136d62 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en.h +++ b/drivers/net/ethernet/mellanox/mlx5/core/en.h @@ -90,7 +90,12 @@ struct page_pool; #define MLX5_MPWRQ_PAGES_PER_WQE BIT(MLX5_MPWRQ_WQE_PAGE_ORDER) #define MLX5_MTT_OCTW(npages) (ALIGN(npages, 8) / 2) -#define MLX5E_REQUIRED_WQE_MTTS (ALIGN(MLX5_MPWRQ_PAGES_PER_WQE, 8)) +/* Add another page to MLX5E_REQUIRED_WQE_MTTS as a buffer between + * WQEs, This page will absorb write overflow by the hardware, when + * receiving packets larger than MTU. These oversize packets are + * dropped by the driver at a later stage. + */ +#define MLX5E_REQUIRED_WQE_MTTS(ALIGN(MLX5_MPWRQ_PAGES_PER_WQE + 1, 8)) #define MLX5E_LOG_ALIGNED_MPWQE_PPW(ilog2(MLX5E_REQUIRED_WQE_MTTS)) #define MLX5E_REQUIRED_MTTS(wqes) (wqes * MLX5E_REQUIRED_WQE_MTTS) #define MLX5E_MAX_RQ_NUM_MTTS \ @@ -621,6 +626,7 @@ struct mlx5e_rq { u32rqn; struct mlx5_core_dev *mdev; struct mlx5_core_mkey umr_mkey; + struct mlx5e_dma_info wqe_overflow; /* XDP read-mostly */ struct xdp_rxq_infoxdp_rxq; diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_main.c b/drivers/net/ethernet/mellanox/mlx5/core/en_main.c index cccf65fc116ee..b23ad0b6761c4 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en_main.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/en_main.c @@ -258,12 +258,17 @@ static int mlx5e_rq_alloc_mpwqe_info(struct mlx5e_rq *rq, static int mlx5e_create_umr_mkey(struct mlx5_core_dev *mdev, u64 npages, u8 page_shift, -struct mlx5_core_mkey *umr_mkey) +struct mlx5_core_mkey *umr_mkey, +dma_addr_t filler_addr) { - int inlen = MLX5_ST_SZ_BYTES(create_mkey_in); + struct mlx5_mtt *mtt; + int inlen; void *mkc; u32 *in; int err; + int i; + + inlen = MLX5_ST_SZ_BYTES(create_mkey_in) + sizeof(*mtt) * npages; in = kvzalloc(inlen, GFP_KERNEL); if (!in) @@ -283,6 +288,18 @@ static int mlx5e_create_umr_mkey(struct mlx5_core_dev *mdev, MLX5_SET(mkc, mkc, translations_octword_size, MLX5_MTT_OCTW(npages)); MLX5_SET(mkc, mkc, log_page_size, page_shift); + MLX5_SET(create_mkey_in, in, translations_octword_actual_size, +MLX5_MTT_OCTW(npages)); + + /* Initialize the mkey with all MTTs pointing to a default +* page (filler_addr). When the channels are activated, UMR +* WQEs will redirect
[PATCH 5.8 110/124] mmc: core: dont set limits.discard_granularity as 0
From: Coly Li [ Upstream commit 4243219141b67d7c2fdb2d8073c17c539b9263eb ] In mmc_queue_setup_discard() the mmc driver queue's discard_granularity might be set as 0 (when card->pref_erase > max_discard) while the mmc device still declares to support discard operation. This is buggy and triggered the following kernel warning message, WARNING: CPU: 0 PID: 135 at __blkdev_issue_discard+0x200/0x294 CPU: 0 PID: 135 Comm: f2fs_discard-17 Not tainted 5.9.0-rc6 #1 Hardware name: Google Kevin (DT) pstate: 0005 (nzcv daif -PAN -UAO BTYPE=--) pc : __blkdev_issue_discard+0x200/0x294 lr : __blkdev_issue_discard+0x54/0x294 sp : 800011dd3b10 x29: 800011dd3b10 x28: x27: 800011dd3cc4 x26: 800011dd3e18 x25: 0004e69b x24: 0c40 x23: f1deaaf0 x22: f2849200 x21: 002734d8 x20: 0008 x19: x18: x17: x16: x15: x14: 0394 x13: x12: x11: x10: 08b0 x9 : 800011dd3cb0 x8 : 0004e69b x7 : x6 : f1926400 x5 : f1940800 x4 : x3 : 0c40 x2 : 0008 x1 : 002734d8 x0 : Call trace: __blkdev_issue_discard+0x200/0x294 __submit_discard_cmd+0x128/0x374 __issue_discard_cmd_orderly+0x188/0x244 __issue_discard_cmd+0x2e8/0x33c issue_discard_thread+0xe8/0x2f0 kthread+0x11c/0x120 ret_from_fork+0x10/0x1c ---[ end trace e4c8023d33dfe77a ]--- This patch fixes the issue by setting discard_granularity as SECTOR_SIZE instead of 0 when (card->pref_erase > max_discard) is true. Now no more complain from __blkdev_issue_discard() for the improper value of discard granularity. This issue is exposed after commit b35fd7422c2f ("block: check queue's limits.discard_granularity in __blkdev_issue_discard()"), a "Fixes:" tag is also added for the commit to make sure people won't miss this patch after applying the change of __blkdev_issue_discard(). Fixes: e056a1b5b67b ("mmc: queue: let host controllers specify maximum discard timeout") Fixes: b35fd7422c2f ("block: check queue's limits.discard_granularity in __blkdev_issue_discard()"). Reported-and-tested-by: Vicente Bergas Signed-off-by: Coly Li Acked-by: Adrian Hunter Cc: Ulf Hansson Link: https://lore.kernel.org/r/20201002013852.51968-1-col...@suse.de Signed-off-by: Ulf Hansson Signed-off-by: Sasha Levin --- drivers/mmc/core/queue.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/mmc/core/queue.c b/drivers/mmc/core/queue.c index 4b1eb89b401d9..1ad518821157f 100644 --- a/drivers/mmc/core/queue.c +++ b/drivers/mmc/core/queue.c @@ -190,7 +190,7 @@ static void mmc_queue_setup_discard(struct request_queue *q, q->limits.discard_granularity = card->pref_erase << 9; /* granularity must not be greater than max. discard */ if (card->pref_erase > max_discard) - q->limits.discard_granularity = 0; + q->limits.discard_granularity = SECTOR_SIZE; if (mmc_can_secure_erase_trim(card)) blk_queue_flag_set(QUEUE_FLAG_SECERASE, q); } -- 2.25.1
[PATCH 5.8 112/124] mm: khugepaged: recalculate min_free_kbytes after memory hotplug as expected by khugepaged
From: Vijay Balakrishna commit 4aab2be0983031a05cb4a19696c9da5749523426 upstream. When memory is hotplug added or removed the min_free_kbytes should be recalculated based on what is expected by khugepaged. Currently after hotplug, min_free_kbytes will be set to a lower default and higher default set when THP enabled is lost. This change restores min_free_kbytes as expected for THP consumers. [vij...@linux.microsoft.com: v5] Link: https://lkml.kernel.org/r/1601398153-5517-1-git-send-email-vij...@linux.microsoft.com Fixes: f000565adb77 ("thp: set recommended min free kbytes") Signed-off-by: Vijay Balakrishna Signed-off-by: Andrew Morton Reviewed-by: Pavel Tatashin Acked-by: Michal Hocko Cc: Allen Pais Cc: Andrea Arcangeli Cc: "Kirill A. Shutemov" Cc: Oleg Nesterov Cc: Song Liu Cc: Link: https://lkml.kernel.org/r/1600305709-2319-2-git-send-email-vij...@linux.microsoft.com Link: https://lkml.kernel.org/r/1600204258-13683-1-git-send-email-vij...@linux.microsoft.com Signed-off-by: Linus Torvalds Signed-off-by: Greg Kroah-Hartman --- include/linux/khugepaged.h |5 + mm/khugepaged.c| 13 +++-- mm/page_alloc.c|3 +++ 3 files changed, 19 insertions(+), 2 deletions(-) --- a/include/linux/khugepaged.h +++ b/include/linux/khugepaged.h @@ -15,6 +15,7 @@ extern int __khugepaged_enter(struct mm_ extern void __khugepaged_exit(struct mm_struct *mm); extern int khugepaged_enter_vma_merge(struct vm_area_struct *vma, unsigned long vm_flags); +extern void khugepaged_min_free_kbytes_update(void); #ifdef CONFIG_SHMEM extern void collapse_pte_mapped_thp(struct mm_struct *mm, unsigned long addr); #else @@ -85,6 +86,10 @@ static inline void collapse_pte_mapped_t unsigned long addr) { } + +static inline void khugepaged_min_free_kbytes_update(void) +{ +} #endif /* CONFIG_TRANSPARENT_HUGEPAGE */ #endif /* _LINUX_KHUGEPAGED_H */ --- a/mm/khugepaged.c +++ b/mm/khugepaged.c @@ -56,6 +56,9 @@ enum scan_result { #define CREATE_TRACE_POINTS #include +static struct task_struct *khugepaged_thread __read_mostly; +static DEFINE_MUTEX(khugepaged_mutex); + /* default scan 8*512 pte (or vmas) every 30 second */ static unsigned int khugepaged_pages_to_scan __read_mostly; static unsigned int khugepaged_pages_collapsed; @@ -2304,8 +2307,6 @@ static void set_recommended_min_free_kby int start_stop_khugepaged(void) { - static struct task_struct *khugepaged_thread __read_mostly; - static DEFINE_MUTEX(khugepaged_mutex); int err = 0; mutex_lock(_mutex); @@ -2332,3 +2333,11 @@ fail: mutex_unlock(_mutex); return err; } + +void khugepaged_min_free_kbytes_update(void) +{ + mutex_lock(_mutex); + if (khugepaged_enabled() && khugepaged_thread) + set_recommended_min_free_kbytes(); + mutex_unlock(_mutex); +} --- a/mm/page_alloc.c +++ b/mm/page_alloc.c @@ -69,6 +69,7 @@ #include #include #include +#include #include #include @@ -7884,6 +7885,8 @@ int __meminit init_per_zone_wmark_min(vo setup_min_slab_ratio(); #endif + khugepaged_min_free_kbytes_update(); + return 0; } postcore_initcall(init_per_zone_wmark_min)
[PATCH 5.8 124/124] net_sched: commit action insertions together
From: Cong Wang commit 0fedc63fadf0404a729e73a35349481c8009c02f upstream. syzbot is able to trigger a failure case inside the loop in tcf_action_init(), and when this happens we clean up with tcf_action_destroy(). But, as these actions are already inserted into the global IDR, other parallel process could free them before tcf_action_destroy(), then we will trigger a use-after-free. Fix this by deferring the insertions even later, after the loop, and committing all the insertions in a separate loop, so we will never fail in the middle of the insertions any more. One side effect is that the window between alloction and final insertion becomes larger, now it is more likely that the loop in tcf_del_walker() sees the placeholder -EBUSY pointer. So we have to check for error pointer in tcf_del_walker(). Reported-and-tested-by: syzbot+2287853d392e4b423...@syzkaller.appspotmail.com Fixes: 0190c1d452a9 ("net: sched: atomically check-allocate action") Cc: Vlad Buslov Cc: Jamal Hadi Salim Cc: Jiri Pirko Signed-off-by: Cong Wang Signed-off-by: David S. Miller Signed-off-by: Greg Kroah-Hartman --- net/sched/act_api.c | 32 +++- 1 file changed, 23 insertions(+), 9 deletions(-) --- a/net/sched/act_api.c +++ b/net/sched/act_api.c @@ -307,6 +307,8 @@ static int tcf_del_walker(struct tcf_idr mutex_lock(>lock); idr_for_each_entry_ul(idr, p, tmp, id) { + if (IS_ERR(p)) + continue; ret = tcf_idr_release_unsafe(p); if (ret == ACT_P_DELETED) { module_put(ops->owner); @@ -891,14 +893,24 @@ static const struct nla_policy tcf_actio [TCA_ACT_HW_STATS] = NLA_POLICY_BITFIELD32(TCA_ACT_HW_STATS_ANY), }; -static void tcf_idr_insert(struct tc_action *a) +static void tcf_idr_insert_many(struct tc_action *actions[]) { - struct tcf_idrinfo *idrinfo = a->idrinfo; + int i; - mutex_lock(>lock); - /* Replace ERR_PTR(-EBUSY) allocated by tcf_idr_check_alloc */ - WARN_ON(!IS_ERR(idr_replace(>action_idr, a, a->tcfa_index))); - mutex_unlock(>lock); + for (i = 0; i < TCA_ACT_MAX_PRIO; i++) { + struct tc_action *a = actions[i]; + struct tcf_idrinfo *idrinfo; + + if (!a) + continue; + idrinfo = a->idrinfo; + mutex_lock(>lock); + /* Replace ERR_PTR(-EBUSY) allocated by tcf_idr_check_alloc if +* it is just created, otherwise this is just a nop. +*/ + idr_replace(>action_idr, a, a->tcfa_index); + mutex_unlock(>lock); + } } struct tc_action *tcf_action_init_1(struct net *net, struct tcf_proto *tp, @@ -995,9 +1007,6 @@ struct tc_action *tcf_action_init_1(stru return ERR_PTR(-EINVAL); } - if (err == ACT_P_CREATED) - tcf_idr_insert(a); - if (!name && tb[TCA_ACT_COOKIE]) tcf_set_action_cookie(>act_cookie, cookie); @@ -1053,6 +1062,11 @@ int tcf_action_init(struct net *net, str actions[i - 1] = act; } + /* We have to commit them all together, because if any error happened in +* between, we could not handle the failure gracefully. +*/ + tcf_idr_insert_many(actions); + *attr_size = tcf_action_full_attrs_size(sz); return i - 1;
[PATCH 5.8 121/124] net: usb: rtl8150: set random MAC address when set_ethernet_addr() fails
From: Anant Thazhemadam commit f45a4248ea4cc13ed50618ff066849f9587226b2 upstream. When get_registers() fails in set_ethernet_addr(),the uninitialized value of node_id gets copied over as the address. So, check the return value of get_registers(). If get_registers() executed successfully (i.e., it returns sizeof(node_id)), copy over the MAC address using ether_addr_copy() (instead of using memcpy()). Else, if get_registers() failed instead, a randomly generated MAC address is set as the MAC address instead. Reported-by: syzbot+abbc768b560c84d92...@syzkaller.appspotmail.com Tested-by: syzbot+abbc768b560c84d92...@syzkaller.appspotmail.com Acked-by: Petko Manolov Signed-off-by: Anant Thazhemadam Signed-off-by: David S. Miller Signed-off-by: Greg Kroah-Hartman --- drivers/net/usb/rtl8150.c | 16 1 file changed, 12 insertions(+), 4 deletions(-) --- a/drivers/net/usb/rtl8150.c +++ b/drivers/net/usb/rtl8150.c @@ -274,12 +274,20 @@ static int write_mii_word(rtl8150_t * de return 1; } -static inline void set_ethernet_addr(rtl8150_t * dev) +static void set_ethernet_addr(rtl8150_t *dev) { - u8 node_id[6]; + u8 node_id[ETH_ALEN]; + int ret; - get_registers(dev, IDR, sizeof(node_id), node_id); - memcpy(dev->netdev->dev_addr, node_id, sizeof(node_id)); + ret = get_registers(dev, IDR, sizeof(node_id), node_id); + + if (ret == sizeof(node_id)) { + ether_addr_copy(dev->netdev->dev_addr, node_id); + } else { + eth_hw_addr_random(dev->netdev); + netdev_notice(dev->netdev, "Assigned a random MAC address: %pM\n", + dev->netdev->dev_addr); + } } static int rtl8150_set_mac_address(struct net_device *netdev, void *p)
[PATCH 5.8 119/124] tty/vt: Do not warn when huge selection requested
From: Alexey Kardashevskiy commit 44c413d9a51752056d606bf6f312003ac1740fab upstream. The tty TIOCL_SETSEL ioctl allocates a memory buffer big enough for text selection area. The maximum allowed console size is VC_RESIZE_MAXCOL * VC_RESIZE_MAXROW == 32767*32767 == ~1GB and typical MAX_ORDER is set to allow allocations lot less than than (circa 16MB). So it is quite possible to trigger huge allocation (and syzkaller just did that) which is going to fail (which is fine) with a backtrace in mm/page_alloc.c at WARN_ON_ONCE(!(gfp_mask & __GFP_NOWARN)) and this may trigger panic (if panic_on_warn is enabled) and leak kernel addresses to dmesg. This passes __GFP_NOWARN to kmalloc_array to avoid unnecessary user- triggered WARN_ON. Note that the error is not ignored and the warning is still printed. Signed-off-by: Alexey Kardashevskiy Link: https://lore.kernel.org/r/20200617070444.116704-1-...@ozlabs.ru Signed-off-by: Greg Kroah-Hartman --- drivers/tty/vt/selection.c |2 +- 1 file changed, 1 insertion(+), 1 deletion(-) --- a/drivers/tty/vt/selection.c +++ b/drivers/tty/vt/selection.c @@ -193,7 +193,7 @@ static int vc_selection_store_chars(stru /* Allocate a new buffer before freeing the old one ... */ /* chars can take up to 4 bytes with unicode */ bp = kmalloc_array((vc_sel.end - vc_sel.start) / 2 + 1, unicode ? 4 : 1, - GFP_KERNEL); + GFP_KERNEL | __GFP_NOWARN); if (!bp) { printk(KERN_WARNING "selection: kmalloc() failed\n"); clear_selection();
[PATCH 5.8 085/124] net/mlx5: Avoid possible free of command entry while timeout comp handler
From: Eran Ben Elisha [ Upstream commit 50b2412b7e7862c5af0cbf4b10d93bc5c712d021 ] Upon command completion timeout, driver simulates a forced command completion. In a rare case where real interrupt for that command arrives simultaneously, it might release the command entry while the forced handler might still access it. Fix that by adding an entry refcount, to track current amount of allowed handlers. Command entry to be released only when this refcount is decremented to zero. Command refcount is always initialized to one. For callback commands, command completion handler is the symmetric flow to decrement it. For non-callback commands, it is wait_func(). Before ringing the doorbell, increment the refcount for the real completion handler. Once the real completion handler is called, it will decrement it. For callback commands, once the delayed work is scheduled, increment the refcount. Upon callback command completion handler, we will try to cancel the timeout callback. In case of success, we need to decrement the callback refcount as it will never run. In addition, gather the entry index free and the entry free into a one flow for all command types release. Fixes: e126ba97dba9 ("mlx5: Add driver for Mellanox Connect-IB adapters") Signed-off-by: Eran Ben Elisha Reviewed-by: Moshe Shemesh Signed-off-by: Saeed Mahameed Signed-off-by: Sasha Levin --- drivers/net/ethernet/mellanox/mlx5/core/cmd.c | 109 -- include/linux/mlx5/driver.h | 2 + 2 files changed, 73 insertions(+), 38 deletions(-) diff --git a/drivers/net/ethernet/mellanox/mlx5/core/cmd.c b/drivers/net/ethernet/mellanox/mlx5/core/cmd.c index 1d91a0d0ab1d7..c0055f5479ce0 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/cmd.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/cmd.c @@ -69,12 +69,10 @@ enum { MLX5_CMD_DELIVERY_STAT_CMD_DESCR_ERR= 0x10, }; -static struct mlx5_cmd_work_ent *alloc_cmd(struct mlx5_cmd *cmd, - struct mlx5_cmd_msg *in, - struct mlx5_cmd_msg *out, - void *uout, int uout_size, - mlx5_cmd_cbk_t cbk, - void *context, int page_queue) +static struct mlx5_cmd_work_ent * +cmd_alloc_ent(struct mlx5_cmd *cmd, struct mlx5_cmd_msg *in, + struct mlx5_cmd_msg *out, void *uout, int uout_size, + mlx5_cmd_cbk_t cbk, void *context, int page_queue) { gfp_t alloc_flags = cbk ? GFP_ATOMIC : GFP_KERNEL; struct mlx5_cmd_work_ent *ent; @@ -83,6 +81,7 @@ static struct mlx5_cmd_work_ent *alloc_cmd(struct mlx5_cmd *cmd, if (!ent) return ERR_PTR(-ENOMEM); + ent->idx= -EINVAL; ent->in = in; ent->out= out; ent->uout = uout; @@ -91,10 +90,16 @@ static struct mlx5_cmd_work_ent *alloc_cmd(struct mlx5_cmd *cmd, ent->context= context; ent->cmd= cmd; ent->page_queue = page_queue; + refcount_set(>refcnt, 1); return ent; } +static void cmd_free_ent(struct mlx5_cmd_work_ent *ent) +{ + kfree(ent); +} + static u8 alloc_token(struct mlx5_cmd *cmd) { u8 token; @@ -109,7 +114,7 @@ static u8 alloc_token(struct mlx5_cmd *cmd) return token; } -static int alloc_ent(struct mlx5_cmd *cmd) +static int cmd_alloc_index(struct mlx5_cmd *cmd) { unsigned long flags; int ret; @@ -123,7 +128,7 @@ static int alloc_ent(struct mlx5_cmd *cmd) return ret < cmd->max_reg_cmds ? ret : -ENOMEM; } -static void free_ent(struct mlx5_cmd *cmd, int idx) +static void cmd_free_index(struct mlx5_cmd *cmd, int idx) { unsigned long flags; @@ -132,6 +137,22 @@ static void free_ent(struct mlx5_cmd *cmd, int idx) spin_unlock_irqrestore(>alloc_lock, flags); } +static void cmd_ent_get(struct mlx5_cmd_work_ent *ent) +{ + refcount_inc(>refcnt); +} + +static void cmd_ent_put(struct mlx5_cmd_work_ent *ent) +{ + if (!refcount_dec_and_test(>refcnt)) + return; + + if (ent->idx >= 0) + cmd_free_index(ent->cmd, ent->idx); + + cmd_free_ent(ent); +} + static struct mlx5_cmd_layout *get_inst(struct mlx5_cmd *cmd, int idx) { return cmd->cmd_buf + (idx << cmd->log_stride); @@ -219,11 +240,6 @@ static void poll_timeout(struct mlx5_cmd_work_ent *ent) ent->ret = -ETIMEDOUT; } -static void free_cmd(struct mlx5_cmd_work_ent *ent) -{ - kfree(ent); -} - static int verify_signature(struct mlx5_cmd_work_ent *ent) { struct mlx5_cmd_mailbox *next = ent->out->next; @@ -842,6 +858,7 @@ static void cb_timeout_handler(struct work_struct *work) mlx5_command_str(msg_to_opcode(ent->in)), msg_to_opcode(ent->in)); mlx5_cmd_comp_handler(dev, 1UL << ent->idx, true); +
[PATCH 5.8 118/124] net/mlx5e: Fix drivers declaration to support GRE offload
From: Aya Levin commit 3d093bc2369003b4ce6c3522d9b383e47c40045d upstream. Declare GRE offload support with respect to the inner protocol. Add a list of supported inner protocols on which the driver can offload checksum and GSO. For other protocols, inform the stack to do the needed operations. There is no noticeable impact on GRE performance. Fixes: 2729984149e6 ("net/mlx5e: Support TSO and TX checksum offloads for GRE tunnels") Signed-off-by: Aya Levin Reviewed-by: Moshe Shemesh Reviewed-by: Tariq Toukan Signed-off-by: Saeed Mahameed Signed-off-by: Greg Kroah-Hartman --- drivers/net/ethernet/mellanox/mlx5/core/en_main.c | 19 ++- 1 file changed, 18 insertions(+), 1 deletion(-) --- a/drivers/net/ethernet/mellanox/mlx5/core/en_main.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/en_main.c @@ -4323,6 +4323,21 @@ void mlx5e_del_vxlan_port(struct net_dev mlx5e_vxlan_queue_work(priv, be16_to_cpu(ti->port), 0); } +static bool mlx5e_gre_tunnel_inner_proto_offload_supported(struct mlx5_core_dev *mdev, + struct sk_buff *skb) +{ + switch (skb->inner_protocol) { + case htons(ETH_P_IP): + case htons(ETH_P_IPV6): + case htons(ETH_P_TEB): + return true; + case htons(ETH_P_MPLS_UC): + case htons(ETH_P_MPLS_MC): + return MLX5_CAP_ETH(mdev, tunnel_stateless_mpls_over_gre); + } + return false; +} + static netdev_features_t mlx5e_tunnel_features_check(struct mlx5e_priv *priv, struct sk_buff *skb, netdev_features_t features) @@ -4345,7 +4360,9 @@ static netdev_features_t mlx5e_tunnel_fe switch (proto) { case IPPROTO_GRE: - return features; + if (mlx5e_gre_tunnel_inner_proto_offload_supported(priv->mdev, skb)) + return features; + break; case IPPROTO_IPIP: case IPPROTO_IPV6: if (mlx5e_tunnel_proto_supported(priv->mdev, IPPROTO_IPIP))
[PATCH 5.8 113/124] tcp: fix receive window update in tcp_add_backlog()
From: Eric Dumazet commit 86bccd0367130f481ca99ba91de1c6a5aa1c78c1 upstream. We got reports from GKE customers flows being reset by netfilter conntrack unless nf_conntrack_tcp_be_liberal is set to 1. Traces seemed to suggest ACK packet being dropped by the packet capture, or more likely that ACK were received in the wrong order. wscale=7, SYN and SYNACK not shown here. This ACK allows the sender to send 1871*128 bytes from seq 51359321 : New right edge of the window -> 51359321+1871*128=51598809 09:17:23.389210 IP A > B: Flags [.], ack 51359321, win 1871, options [nop,nop,TS val 10 ecr 999], length 0 09:17:23.389212 IP B > A: Flags [.], seq 51422681:51424089, ack 1577, win 268, options [nop,nop,TS val 999 ecr 10], length 1408 09:17:23.389214 IP A > B: Flags [.], ack 51422681, win 1376, options [nop,nop,TS val 10 ecr 999], length 0 09:17:23.389253 IP B > A: Flags [.], seq 51424089:51488857, ack 1577, win 268, options [nop,nop,TS val 999 ecr 10], length 64768 09:17:23.389272 IP A > B: Flags [.], ack 51488857, win 859, options [nop,nop,TS val 10 ecr 999], length 0 09:17:23.389275 IP B > A: Flags [.], seq 51488857:51521241, ack 1577, win 268, options [nop,nop,TS val 999 ecr 10], length 32384 Receiver now allows to send 606*128=77568 from seq 51521241 : New right edge of the window -> 51521241+606*128=51598809 09:17:23.389296 IP A > B: Flags [.], ack 51521241, win 606, options [nop,nop,TS val 10 ecr 999], length 0 09:17:23.389308 IP B > A: Flags [.], seq 51521241:51553625, ack 1577, win 268, options [nop,nop,TS val 999 ecr 10], length 32384 It seems the sender exceeds RWIN allowance, since 51611353 > 51598809 09:17:23.389346 IP B > A: Flags [.], seq 51553625:51611353, ack 1577, win 268, options [nop,nop,TS val 999 ecr 10], length 57728 09:17:23.389356 IP B > A: Flags [.], seq 51611353:51618393, ack 1577, win 268, options [nop,nop,TS val 999 ecr 10], length 7040 09:17:23.389367 IP A > B: Flags [.], ack 51611353, win 0, options [nop,nop,TS val 10 ecr 999], length 0 netfilter conntrack is not happy and sends RST 09:17:23.389389 IP A > B: Flags [R], seq 92176528, win 0, length 0 09:17:23.389488 IP B > A: Flags [R], seq 174478967, win 0, length 0 Now imagine ACK were delivered out of order and tcp_add_backlog() sets window based on wrong packet. New right edge of the window -> 51521241+859*128=51631193 Normally TCP stack handles OOO packets just fine, but it turns out tcp_add_backlog() does not. It can update the window field of the aggregated packet even if the ACK sequence of the last received packet is too old. Many thanks to Alexandre Ferrieux for independently reporting the issue and suggesting a fix. Fixes: 4f693b55c3d2 ("tcp: implement coalescing on backlog queue") Signed-off-by: Eric Dumazet Reported-by: Alexandre Ferrieux Acked-by: Soheil Hassas Yeganeh Acked-by: Neal Cardwell Signed-off-by: David S. Miller Signed-off-by: Greg Kroah-Hartman --- net/ipv4/tcp_ipv4.c |6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) --- a/net/ipv4/tcp_ipv4.c +++ b/net/ipv4/tcp_ipv4.c @@ -1787,12 +1787,12 @@ bool tcp_add_backlog(struct sock *sk, st __skb_pull(skb, hdrlen); if (skb_try_coalesce(tail, skb, , )) { - thtail->window = th->window; - TCP_SKB_CB(tail)->end_seq = TCP_SKB_CB(skb)->end_seq; - if (after(TCP_SKB_CB(skb)->ack_seq, TCP_SKB_CB(tail)->ack_seq)) + if (likely(!before(TCP_SKB_CB(skb)->ack_seq, TCP_SKB_CB(tail)->ack_seq))) { TCP_SKB_CB(tail)->ack_seq = TCP_SKB_CB(skb)->ack_seq; + thtail->window = th->window; + } /* We have to update both TCP_SKB_CB(tail)->tcp_flags and * thtail->fin, so that the fast path in tcp_rcv_established()
[PATCH 5.8 091/124] net/mlx5e: Fix VLAN cleanup flow
From: Aya Levin [ Upstream commit 8c7353b6f716436ad0bfda2b5c5524ab2dde5894 ] Prior to this patch unloading an interface in promiscuous mode with RX VLAN filtering feature turned off - resulted in a warning. This is due to a wrong condition in the VLAN rules cleanup flow, which left the any-vid rules in the VLAN steering table. These rules prevented destroying the flow group and the flow table. The any-vid rules are removed in 2 flows, but none of them remove it in case both promiscuous is set and VLAN filtering is off. Fix the issue by changing the condition of the VLAN table cleanup flow to clean also in case of promiscuous mode. mlx5_core :00:08.0: mlx5_destroy_flow_group:2123:(pid 28729): Flow group 20 wasn't destroyed, refcount > 1 mlx5_core :00:08.0: mlx5_destroy_flow_group:2123:(pid 28729): Flow group 19 wasn't destroyed, refcount > 1 mlx5_core :00:08.0: mlx5_destroy_flow_table:2112:(pid 28729): Flow table 262149 wasn't destroyed, refcount > 1 ... ... [ cut here ] FW pages counter is 11560 after reclaiming all pages WARNING: CPU: 1 PID: 28729 at drivers/net/ethernet/mellanox/mlx5/core/pagealloc.c:660 mlx5_reclaim_startup_pages+0x178/0x230 [mlx5_core] Hardware name: QEMU Standard PC (Q35 + ICH9, 2009), BIOS rel-1.12.1-0-ga5cab58e9a3f-prebuilt.qemu.org 04/01/2014 Call Trace: mlx5_function_teardown+0x2f/0x90 [mlx5_core] mlx5_unload_one+0x71/0x110 [mlx5_core] remove_one+0x44/0x80 [mlx5_core] pci_device_remove+0x3e/0xc0 device_release_driver_internal+0xfb/0x1c0 device_release_driver+0x12/0x20 pci_stop_bus_device+0x68/0x90 pci_stop_and_remove_bus_device+0x12/0x20 hv_eject_device_work+0x6f/0x170 [pci_hyperv] ? __schedule+0x349/0x790 process_one_work+0x206/0x400 worker_thread+0x34/0x3f0 ? process_one_work+0x400/0x400 kthread+0x126/0x140 ? kthread_park+0x90/0x90 ret_from_fork+0x22/0x30 ---[ end trace 6283bde8d26170dc ]--- Fixes: 9df30601c843 ("net/mlx5e: Restore vlan filter after seamless reset") Signed-off-by: Aya Levin Reviewed-by: Moshe Shemesh Signed-off-by: Saeed Mahameed Signed-off-by: Sasha Levin --- drivers/net/ethernet/mellanox/mlx5/core/en_fs.c | 8 ++-- 1 file changed, 6 insertions(+), 2 deletions(-) diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_fs.c b/drivers/net/ethernet/mellanox/mlx5/core/en_fs.c index 73d3dc07331f1..c5be0cdfaf0fa 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en_fs.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/en_fs.c @@ -415,8 +415,12 @@ static void mlx5e_del_vlan_rules(struct mlx5e_priv *priv) for_each_set_bit(i, priv->fs.vlan.active_svlans, VLAN_N_VID) mlx5e_del_vlan_rule(priv, MLX5E_VLAN_RULE_TYPE_MATCH_STAG_VID, i); - if (priv->fs.vlan.cvlan_filter_disabled && - !(priv->netdev->flags & IFF_PROMISC)) + WARN_ON_ONCE(!(test_bit(MLX5E_STATE_DESTROYING, >state))); + + /* must be called after DESTROY bit is set and +* set_rx_mode is called and flushed +*/ + if (priv->fs.vlan.cvlan_filter_disabled) mlx5e_del_any_vid_rules(priv); } -- 2.25.1
[PATCH 5.8 117/124] net/tls: race causes kernel panic
From: Rohit Maheshwari commit 38f7e1c0c43dd25b06513137bb6fd35476f9ec6d upstream. BUG: kernel NULL pointer dereference, address: 00b8 #PF: supervisor read access in kernel mode #PF: error_code(0x) - not-present page PGD 8008b6fef067 P4D 8008b6fef067 PUD 8b6fe6067 PMD 0 Oops: [#1] SMP PTI CPU: 12 PID: 23871 Comm: kworker/12:80 Kdump: loaded Tainted: G S 5.9.0-rc3+ #1 Hardware name: Supermicro X10SRA-F/X10SRA-F, BIOS 2.1 03/29/2018 Workqueue: events tx_work_handler [tls] RIP: 0010:tx_work_handler+0x1b/0x70 [tls] Code: dc fe ff ff e8 16 d4 a3 f6 66 0f 1f 44 00 00 0f 1f 44 00 00 55 53 48 8b 6f 58 48 8b bd a0 04 00 00 48 85 ff 74 1c 48 8b 47 28 <48> 8b 90 b8 00 00 00 83 e2 02 75 0c f0 48 0f ba b0 b8 00 00 00 00 RSP: 0018:a44ace61fe88 EFLAGS: 00010286 RAX: RBX: 91da9e45cc30 RCX: dead0122 RDX: 0001 RSI: 91da9e45cc38 RDI: 91d95efac200 RBP: 91da133fd780 R08: R09: 73746e657665 R10: 8080808080808080 R11: R12: 91dad7d30700 R13: 91dab6561080 R14: 091dad7d3070 R15: 91da9e45cc38 FS: () GS:91dad7d0() knlGS: CS: 0010 DS: ES: CR0: 80050033 CR2: 00b8 CR3: 000906478003 CR4: 003706e0 DR0: DR1: DR2: DR3: DR6: fffe0ff0 DR7: 0400 Call Trace: process_one_work+0x1a7/0x370 worker_thread+0x30/0x370 ? process_one_work+0x370/0x370 kthread+0x114/0x130 ? kthread_park+0x80/0x80 ret_from_fork+0x22/0x30 tls_sw_release_resources_tx() waits for encrypt_pending, which can have race, so we need similar changes as in commit 0cada33241d9de205522e3858b18e506ca5cce2c here as well. Fixes: a42055e8d2c3 ("net/tls: Add support for async encryption of records for performance") Signed-off-by: Rohit Maheshwari Acked-by: Jakub Kicinski Signed-off-by: David S. Miller Signed-off-by: Greg Kroah-Hartman --- net/tls/tls_sw.c |9 +++-- 1 file changed, 7 insertions(+), 2 deletions(-) --- a/net/tls/tls_sw.c +++ b/net/tls/tls_sw.c @@ -2142,10 +2142,15 @@ void tls_sw_release_resources_tx(struct struct tls_context *tls_ctx = tls_get_ctx(sk); struct tls_sw_context_tx *ctx = tls_sw_ctx_tx(tls_ctx); struct tls_rec *rec, *tmp; + int pending; /* Wait for any pending async encryptions to complete */ - smp_store_mb(ctx->async_notify, true); - if (atomic_read(>encrypt_pending)) + spin_lock_bh(>encrypt_compl_lock); + ctx->async_notify = true; + pending = atomic_read(>encrypt_pending); + spin_unlock_bh(>encrypt_compl_lock); + + if (pending) crypto_wait_req(-EINPROGRESS, >async_wait); tls_tx_records(sk, -1);
[PATCH 5.8 088/124] net/mlx5: Fix request_irqs error flow
From: Maor Gottlieb [ Upstream commit 732ebfab7fe96b7ac9a3df3208f14752a4bb6db3 ] Fix error flow handling in request_irqs which try to free irq that we failed to request. It fixes the below trace. WARNING: CPU: 1 PID: 7587 at kernel/irq/manage.c:1684 free_irq+0x4d/0x60 CPU: 1 PID: 7587 Comm: bash Tainted: GW OE 4.15.15-1.el7MELLANOXsmp-x86_64 #1 Hardware name: Advantech SKY-6200/SKY-6200, BIOS F2.00 08/06/2020 RIP: 0010:free_irq+0x4d/0x60 RSP: 0018:c9000ef47af0 EFLAGS: 00010282 RAX: 88001476ae00 RBX: 0655 RCX: RDX: 88001476ae00 RSI: c9000ef47ab8 RDI: 8800398bb478 RBP: 88001476a838 R08: 88001476ae00 R09: 156d R10: R11: 0004 R12: 88001476a838 R13: 0006 R14: 88001476a888 R15: ffe4 FS: 7efeadd32740() GS:88047fc4() knlGS: CS: 0010 DS: ES: CR0: 80050033 CR2: 7fc9cc010008 CR3: 0001a2380004 CR4: 007606e0 DR0: DR1: DR2: DR3: DR6: fffe0ff0 DR7: 0400 PKRU: 5554 Call Trace: mlx5_irq_table_create+0x38d/0x400 [mlx5_core] ? atomic_notifier_chain_register+0x50/0x60 mlx5_load_one+0x7ee/0x1130 [mlx5_core] init_one+0x4c9/0x650 [mlx5_core] pci_device_probe+0xb8/0x120 driver_probe_device+0x2a1/0x470 ? driver_allows_async_probing+0x30/0x30 bus_for_each_drv+0x54/0x80 __device_attach+0xa3/0x100 pci_bus_add_device+0x4a/0x90 pci_iov_add_virtfn+0x2dc/0x2f0 pci_enable_sriov+0x32e/0x420 mlx5_core_sriov_configure+0x61/0x1b0 [mlx5_core] ? kstrtoll+0x22/0x70 num_vf_store+0x4b/0x70 [mlx5_core] kernfs_fop_write+0x102/0x180 __vfs_write+0x26/0x140 ? rcu_all_qs+0x5/0x80 ? _cond_resched+0x15/0x30 ? __sb_start_write+0x41/0x80 vfs_write+0xad/0x1a0 SyS_write+0x42/0x90 do_syscall_64+0x60/0x110 entry_SYSCALL_64_after_hwframe+0x3d/0xa2 Fixes: 24163189da48 ("net/mlx5: Separate IRQ request/free from EQ life cycle") Signed-off-by: Maor Gottlieb Reviewed-by: Eran Ben Elisha Signed-off-by: Saeed Mahameed Signed-off-by: Sasha Levin --- drivers/net/ethernet/mellanox/mlx5/core/pci_irq.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/net/ethernet/mellanox/mlx5/core/pci_irq.c b/drivers/net/ethernet/mellanox/mlx5/core/pci_irq.c index 373981a659c7c..6fd9749203944 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/pci_irq.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/pci_irq.c @@ -115,7 +115,7 @@ static int request_irqs(struct mlx5_core_dev *dev, int nvec) return 0; err_request_irq: - for (; i >= 0; i--) { + while (i--) { struct mlx5_irq *irq = mlx5_irq_get(dev, i); int irqn = pci_irq_vector(dev->pdev, i); -- 2.25.1
[PATCH 5.8 120/124] Input: ati_remote2 - add missing newlines when printing module parameters
From: Xiongfeng Wang commit 37bd9e803daea816f2dc2c8f6dc264097eb3ebd2 upstream. When I cat some module parameters by sysfs, it displays as follows. It's better to add a newline for easy reading. root@syzkaller:~# cat /sys/module/ati_remote2/parameters/mode_mask 0x1froot@syzkaller:~# cat /sys/module/ati_remote2/parameters/channel_mask 0xroot@syzkaller:~# Signed-off-by: Xiongfeng Wang Link: https://lore.kernel.org/r/20200720092148.9320-1-wangxiongfe...@huawei.com Signed-off-by: Dmitry Torokhov Signed-off-by: Greg Kroah-Hartman --- drivers/input/misc/ati_remote2.c |4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) --- a/drivers/input/misc/ati_remote2.c +++ b/drivers/input/misc/ati_remote2.c @@ -68,7 +68,7 @@ static int ati_remote2_get_channel_mask( { pr_debug("%s()\n", __func__); - return sprintf(buffer, "0x%04x", *(unsigned int *)kp->arg); + return sprintf(buffer, "0x%04x\n", *(unsigned int *)kp->arg); } static int ati_remote2_set_mode_mask(const char *val, @@ -84,7 +84,7 @@ static int ati_remote2_get_mode_mask(cha { pr_debug("%s()\n", __func__); - return sprintf(buffer, "0x%02x", *(unsigned int *)kp->arg); + return sprintf(buffer, "0x%02x\n", *(unsigned int *)kp->arg); } static unsigned int channel_mask = ATI_REMOTE2_MAX_CHANNEL_MASK;
[PATCH 5.8 123/124] net_sched: defer tcf_idr_insert() in tcf_action_init_1()
From: Cong Wang commit e49d8c22f1261c43a986a7fdbf677ac309682a07 upstream. All TC actions call tcf_idr_insert() for new action at the end of their ->init(), so we can actually move it to a central place in tcf_action_init_1(). And once the action is inserted into the global IDR, other parallel process could free it immediately as its refcnt is still 1, so we can not fail after this, we need to move it after the goto action validation to avoid handling the failure case after insertion. This is found during code review, is not directly triggered by syzbot. And this prepares for the next patch. Cc: Vlad Buslov Cc: Jamal Hadi Salim Cc: Jiri Pirko Signed-off-by: Cong Wang Signed-off-by: David S. Miller Signed-off-by: Greg Kroah-Hartman --- include/net/act_api.h |2 -- net/sched/act_api.c| 38 -- net/sched/act_bpf.c|4 +--- net/sched/act_connmark.c |1 - net/sched/act_csum.c |3 --- net/sched/act_ct.c |2 -- net/sched/act_ctinfo.c |3 --- net/sched/act_gact.c |2 -- net/sched/act_gate.c |3 --- net/sched/act_ife.c|3 --- net/sched/act_ipt.c|2 -- net/sched/act_mirred.c |2 -- net/sched/act_mpls.c |2 -- net/sched/act_nat.c|3 --- net/sched/act_pedit.c |2 -- net/sched/act_police.c |2 -- net/sched/act_sample.c |2 -- net/sched/act_simple.c |2 -- net/sched/act_skbedit.c|2 -- net/sched/act_skbmod.c |2 -- net/sched/act_tunnel_key.c |3 --- net/sched/act_vlan.c |2 -- 22 files changed, 21 insertions(+), 66 deletions(-) --- a/include/net/act_api.h +++ b/include/net/act_api.h @@ -166,8 +166,6 @@ int tcf_idr_create_from_flags(struct tc_ struct nlattr *est, struct tc_action **a, const struct tc_action_ops *ops, int bind, u32 flags); -void tcf_idr_insert(struct tc_action_net *tn, struct tc_action *a); - void tcf_idr_cleanup(struct tc_action_net *tn, u32 index); int tcf_idr_check_alloc(struct tc_action_net *tn, u32 *index, struct tc_action **a, int bind); --- a/net/sched/act_api.c +++ b/net/sched/act_api.c @@ -467,17 +467,6 @@ int tcf_idr_create_from_flags(struct tc_ } EXPORT_SYMBOL(tcf_idr_create_from_flags); -void tcf_idr_insert(struct tc_action_net *tn, struct tc_action *a) -{ - struct tcf_idrinfo *idrinfo = tn->idrinfo; - - mutex_lock(>lock); - /* Replace ERR_PTR(-EBUSY) allocated by tcf_idr_check_alloc */ - WARN_ON(!IS_ERR(idr_replace(>action_idr, a, a->tcfa_index))); - mutex_unlock(>lock); -} -EXPORT_SYMBOL(tcf_idr_insert); - /* Cleanup idr index that was allocated but not initialized. */ void tcf_idr_cleanup(struct tc_action_net *tn, u32 index) @@ -902,6 +891,16 @@ static const struct nla_policy tcf_actio [TCA_ACT_HW_STATS] = NLA_POLICY_BITFIELD32(TCA_ACT_HW_STATS_ANY), }; +static void tcf_idr_insert(struct tc_action *a) +{ + struct tcf_idrinfo *idrinfo = a->idrinfo; + + mutex_lock(>lock); + /* Replace ERR_PTR(-EBUSY) allocated by tcf_idr_check_alloc */ + WARN_ON(!IS_ERR(idr_replace(>action_idr, a, a->tcfa_index))); + mutex_unlock(>lock); +} + struct tc_action *tcf_action_init_1(struct net *net, struct tcf_proto *tp, struct nlattr *nla, struct nlattr *est, char *name, int ovr, int bind, @@ -989,6 +988,16 @@ struct tc_action *tcf_action_init_1(stru if (err < 0) goto err_mod; + if (TC_ACT_EXT_CMP(a->tcfa_action, TC_ACT_GOTO_CHAIN) && + !rcu_access_pointer(a->goto_chain)) { + tcf_action_destroy_1(a, bind); + NL_SET_ERR_MSG(extack, "can't use goto chain with NULL chain"); + return ERR_PTR(-EINVAL); + } + + if (err == ACT_P_CREATED) + tcf_idr_insert(a); + if (!name && tb[TCA_ACT_COOKIE]) tcf_set_action_cookie(>act_cookie, cookie); @@ -1002,13 +1011,6 @@ struct tc_action *tcf_action_init_1(stru if (err != ACT_P_CREATED) module_put(a_o->owner); - if (TC_ACT_EXT_CMP(a->tcfa_action, TC_ACT_GOTO_CHAIN) && - !rcu_access_pointer(a->goto_chain)) { - tcf_action_destroy_1(a, bind); - NL_SET_ERR_MSG(extack, "can't use goto chain with NULL chain"); - return ERR_PTR(-EINVAL); - } - return a; err_mod: --- a/net/sched/act_bpf.c +++ b/net/sched/act_bpf.c @@ -365,9 +365,7 @@ static int tcf_bpf_init(struct net *net, if (goto_ch) tcf_chain_put_by_act(goto_ch); - if (res == ACT_P_CREATED) { - tcf_idr_insert(tn, *act); - } else { + if (res != ACT_P_CREATED) { /* make sure the program being replaced is no longer
[PATCH 5.8 111/124] mm: validate inode in mapping_set_error()
From: Minchan Kim commit 8b7b2eb131d3476062ffd34358785b44be25172f upstream. The swap address_space doesn't have host. Thus, it makes kernel crash once swap write meets error. Fix it. Fixes: 735e4ae5ba28 ("vfs: track per-sb writeback errors and report them to syncfs") Signed-off-by: Minchan Kim Signed-off-by: Andrew Morton Acked-by: Jeff Layton Cc: Jan Kara Cc: Andres Freund Cc: Matthew Wilcox Cc: Al Viro Cc: Christoph Hellwig Cc: Dave Chinner Cc: David Howells Cc: Link: https://lkml.kernel.org/r/2020101650.750063-1-minc...@kernel.org Signed-off-by: Linus Torvalds Signed-off-by: Greg Kroah-Hartman --- include/linux/pagemap.h |3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) --- a/include/linux/pagemap.h +++ b/include/linux/pagemap.h @@ -54,7 +54,8 @@ static inline void mapping_set_error(str __filemap_set_wb_err(mapping, error); /* Record it in superblock */ - errseq_set(>host->i_sb->s_wb_err, error); + if (mapping->host) + errseq_set(>host->i_sb->s_wb_err, error); /* Record it in flags for now, for legacy callers */ if (error == -ENOSPC)
Re: [PATCH v2 2/2] [RFC] CPUFreq: Add support for cpu-perf-dependencies
On 10/12/20 11:59 AM, Ionela Voinescu wrote: On Monday 12 Oct 2020 at 11:22:57 (+0100), Lukasz Luba wrote: [..] I thought about it and looked for other platforms' DT to see if can reuse existing opp information. Unfortunately I don't think it is optimal. The reason being that, because cpus have the same opp table it does not necessarily mean that they share a clock wire. It just tells us that they have the same capabilities (literally just tells us they have the same V/f op points). Unless I am missing something? When comparing with ACPI/_PSD it becomes more intuitive that there is no equivalent way to reveal "perf-dependencies" in DT. You should be able to by examining the clock tree. But perhaps SCMI abstracts all that and just presents virtual clocks without parent clocks available to determine what clocks are shared? Fix SCMI if that's the case. True, the SCMI clock does not support discovery of clock tree: (from 4.6.1 Clock management protocol background) 'The protocol does not cover discovery of the clock tree, which must be described through firmware tables instead.' [1] In this situation, would it make sense, instead of this binding from patch 1/2, create a binding for internal firmware/scmi node? Something like: firmware { scmi { ... scmi-perf-dep { compatible = "arm,scmi-perf-dependencies"; cpu-perf-dep0 { cpu-perf-affinity = <>, <>; }; cpu-perf-dep1 { cpu-perf-affinity = <>, <>; }; cpu-perf-dep2 { cpu-perf-affinity = <>; }; }; }; }; The code which is going to parse the binding would be inside the scmi perf protocol code and used via API by scmi-cpufreq.c. While SCMI cpufreq would be able to benefit from the functionality that Nicola is trying to introduce, it's not the only driver, and more importantly, it's not *going* to be the only driver benefiting from this. Currently there is also qcom-cpufreq-hw.c and the future mediatek-cpufreq-hw.c that is currently under review [1]. They both do their frequency setting by interacting with HW/FW, and could either take or update their OPP tables from there. Therefore, if the platform would require it, they could also expose different controls for frequency setting and could benefit from additional information about clock domains (either through opp-shared or the new entries in Nicola's patch), without driver changes. Another point to be made is that I strongly believe this is going to be the norm in the future. Directly setting PLLs and regulator voltages has been proven unsafe and unsecure. Therefore, I see this as support for a generic cpufreq feature (a hardware coordination type), rather than support for a specific driver. [1] https://lkml.org/lkml/2020/9/10/11 Now regarding the 'dependent_cpus' mask. We could avoid adding a new field 'dependent_cpus' in policy struct, but I am not sure of one bit - Frequency Invariant Engine, (which is also not fixed by just adding a new cpumask). ^^ Let's take it step by step.. We have 3 subsystems to fix: 1. EAS - EM has API function which takes custom cpumask, so no issue, ^^ keep in mind that EAS it's using the max aggregation method that schedutil is using. So if we are to describe the functionality correctly, it needs both a cpumask describing the frequency domains and an aggregation method. EAS does not use schedutil max agregation, it calculates max_util internally. The compute_energy() loops through the CPUs in the domain and takes the utilization from them via schedutil_cpu_util(cpu_rq(cpu)). It figures out max_util and then em_cpu_energy() maps it to next frequency for the cluster. It just needs proper utilization from CPUs, which is taken from run-queues, which is a sum of utilization of tasks being there. This leads to problem how we account utilization of a task. This is the place where the FIE is involved. EAS assumes the utilization is calculated properly. fix would be to use it via the scmi-cpufreq.c 2. IPA (for calculating the power of a cluster, not whole thermal needs this knowledge about 'dependent cpus') - this can be fixed internally 3. Frequency Invariant Engine (FIE) - currently it relies on schedutil filtering and providing max freq of all cpus in the cluster into the FIE; this info is then populated to all 'related_cpus' which will have this freq (we know, because there is no other freq requests); Issues: 3.1. Schedutil is not going to check all cpus in the cluster to take max freq, which is then passed into the cpufreq driver and FIE 3.2. FIE
[PATCH 5.8 115/124] net/core: check length before updating Ethertype in skb_mpls_{push,pop}
From: Guillaume Nault commit 4296adc3e32f5d544a95061160fe7e127be1b9ff upstream. Openvswitch allows to drop a packet's Ethernet header, therefore skb_mpls_push() and skb_mpls_pop() might be called with ethernet=true and mac_len=0. In that case the pointer passed to skb_mod_eth_type() doesn't point to an Ethernet header and the new Ethertype is written at unexpected locations. Fix this by verifying that mac_len is big enough to contain an Ethernet header. Fixes: fa4e0f8855fc ("net/sched: fix corrupted L2 header with MPLS 'push' and 'pop' actions") Signed-off-by: Guillaume Nault Acked-by: Davide Caratti Signed-off-by: David S. Miller Signed-off-by: Greg Kroah-Hartman --- net/core/skbuff.c |4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) --- a/net/core/skbuff.c +++ b/net/core/skbuff.c @@ -5621,7 +5621,7 @@ int skb_mpls_push(struct sk_buff *skb, _ lse->label_stack_entry = mpls_lse; skb_postpush_rcsum(skb, lse, MPLS_HLEN); - if (ethernet) + if (ethernet && mac_len >= ETH_HLEN) skb_mod_eth_type(skb, eth_hdr(skb), mpls_proto); skb->protocol = mpls_proto; @@ -5661,7 +5661,7 @@ int skb_mpls_pop(struct sk_buff *skb, __ skb_reset_mac_header(skb); skb_set_network_header(skb, mac_len); - if (ethernet) { + if (ethernet && mac_len >= ETH_HLEN) { struct ethhdr *hdr; /* use mpls_hdr() to get ethertype to account for VLANs. */
[PATCH 5.8 122/124] net: qrtr: ns: Protect radix_tree_deref_slot() using rcu read locks
From: Manivannan Sadhasivam commit a7809ff90ce6c48598d3c4ab54eb599bec1e9c42 upstream. The rcu read locks are needed to avoid potential race condition while dereferencing radix tree from multiple threads. The issue was identified by syzbot. Below is the crash report: = WARNING: suspicious RCU usage 5.7.0-syzkaller #0 Not tainted - include/linux/radix-tree.h:176 suspicious rcu_dereference_check() usage! other info that might help us debug this: rcu_scheduler_active = 2, debug_locks = 1 2 locks held by kworker/u4:1/21: #0: 88821b097938 ((wq_completion)qrtr_ns_handler){+.+.}-{0:0}, at: spin_unlock_irq include/linux/spinlock.h:403 [inline] #0: 88821b097938 ((wq_completion)qrtr_ns_handler){+.+.}-{0:0}, at: process_one_work+0x6df/0xfd0 kernel/workqueue.c:2241 #1: c9dd7d80 ((work_completion)(_ns.work)){+.+.}-{0:0}, at: process_one_work+0x71e/0xfd0 kernel/workqueue.c:2243 stack backtrace: CPU: 0 PID: 21 Comm: kworker/u4:1 Not tainted 5.7.0-syzkaller #0 Hardware name: Google Google Compute Engine/Google Compute Engine, BIOS Google 01/01/2011 Workqueue: qrtr_ns_handler qrtr_ns_worker Call Trace: __dump_stack lib/dump_stack.c:77 [inline] dump_stack+0x1e9/0x30e lib/dump_stack.c:118 radix_tree_deref_slot include/linux/radix-tree.h:176 [inline] ctrl_cmd_new_lookup net/qrtr/ns.c:558 [inline] qrtr_ns_worker+0x2aff/0x4500 net/qrtr/ns.c:674 process_one_work+0x76e/0xfd0 kernel/workqueue.c:2268 worker_thread+0xa7f/0x1450 kernel/workqueue.c:2414 kthread+0x353/0x380 kernel/kthread.c:268 Fixes: 0c2204a4ad71 ("net: qrtr: Migrate nameservice to kernel from userspace") Reported-and-tested-by: syzbot+0f84f6eed90503da7...@syzkaller.appspotmail.com Signed-off-by: Manivannan Sadhasivam Signed-off-by: David S. Miller Signed-off-by: Greg Kroah-Hartman --- net/qrtr/ns.c | 34 +- 1 file changed, 25 insertions(+), 9 deletions(-) --- a/net/qrtr/ns.c +++ b/net/qrtr/ns.c @@ -193,12 +193,13 @@ static int announce_servers(struct socka struct qrtr_server *srv; struct qrtr_node *node; void __rcu **slot; - int ret; + int ret = 0; node = node_get(qrtr_ns.local_node); if (!node) return 0; + rcu_read_lock(); /* Announce the list of servers registered in this node */ radix_tree_for_each_slot(slot, >servers, , 0) { srv = radix_tree_deref_slot(slot); @@ -206,11 +207,14 @@ static int announce_servers(struct socka ret = service_announce_new(sq, srv); if (ret < 0) { pr_err("failed to announce new service\n"); - return ret; + goto err_out; } } - return 0; +err_out: + rcu_read_unlock(); + + return ret; } static struct qrtr_server *server_add(unsigned int service, @@ -335,7 +339,7 @@ static int ctrl_cmd_bye(struct sockaddr_ struct qrtr_node *node; void __rcu **slot; struct kvec iv; - int ret; + int ret = 0; iv.iov_base = iv.iov_len = sizeof(pkt); @@ -344,11 +348,13 @@ static int ctrl_cmd_bye(struct sockaddr_ if (!node) return 0; + rcu_read_lock(); /* Advertise removal of this client to all servers of remote node */ radix_tree_for_each_slot(slot, >servers, , 0) { srv = radix_tree_deref_slot(slot); server_del(node, srv->port); } + rcu_read_unlock(); /* Advertise the removal of this client to all local servers */ local_node = node_get(qrtr_ns.local_node); @@ -359,6 +365,7 @@ static int ctrl_cmd_bye(struct sockaddr_ pkt.cmd = cpu_to_le32(QRTR_TYPE_BYE); pkt.client.node = cpu_to_le32(from->sq_node); + rcu_read_lock(); radix_tree_for_each_slot(slot, _node->servers, , 0) { srv = radix_tree_deref_slot(slot); @@ -372,11 +379,14 @@ static int ctrl_cmd_bye(struct sockaddr_ ret = kernel_sendmsg(qrtr_ns.sock, , , 1, sizeof(pkt)); if (ret < 0) { pr_err("failed to send bye cmd\n"); - return ret; + goto err_out; } } - return 0; +err_out: + rcu_read_unlock(); + + return ret; } static int ctrl_cmd_del_client(struct sockaddr_qrtr *from, @@ -394,7 +404,7 @@ static int ctrl_cmd_del_client(struct so struct list_head *li; void __rcu **slot; struct kvec iv; - int ret; + int ret = 0; iv.iov_base = iv.iov_len = sizeof(pkt); @@ -434,6 +444,7 @@ static int ctrl_cmd_del_client(struct so pkt.client.node = cpu_to_le32(node_id); pkt.client.port = cpu_to_le32(port); + rcu_read_lock(); radix_tree_for_each_slot(slot, _node->servers, , 0) { srv =
[PATCH 5.8 106/124] net: mscc: ocelot: extend watermark encoding function
From: Maxim Kochetkov [ Upstream commit aa92d836d5c40a7e21e563a272ad177f1bfd44dd ] The ocelot_wm_encode function deals with setting thresholds for pause frame start and stop. In Ocelot and Felix the register layout is the same, but for Seville, it isn't. The easiest way to accommodate Seville hardware configuration is to introduce a function pointer for setting this up. Signed-off-by: Maxim Kochetkov Signed-off-by: Vladimir Oltean Reviewed-by: Florian Fainelli Signed-off-by: David S. Miller Signed-off-by: Sasha Levin --- drivers/net/dsa/ocelot/felix_vsc9959.c | 13 + drivers/net/ethernet/mscc/ocelot.c | 16 ++-- drivers/net/ethernet/mscc/ocelot_vsc7514.c | 13 + include/soc/mscc/ocelot.h | 1 + 4 files changed, 29 insertions(+), 14 deletions(-) diff --git a/drivers/net/dsa/ocelot/felix_vsc9959.c b/drivers/net/dsa/ocelot/felix_vsc9959.c index a83ecd1c5d6c2..259a612da0030 100644 --- a/drivers/net/dsa/ocelot/felix_vsc9959.c +++ b/drivers/net/dsa/ocelot/felix_vsc9959.c @@ -1105,8 +1105,21 @@ static int vsc9959_prevalidate_phy_mode(struct ocelot *ocelot, int port, } } +/* Watermark encode + * Bit 8: Unit; 0:1, 1:16 + * Bit 7-0: Value to be multiplied with unit + */ +static u16 vsc9959_wm_enc(u16 value) +{ + if (value >= BIT(8)) + return BIT(8) | (value / 16); + + return value; +} + static const struct ocelot_ops vsc9959_ops = { .reset = vsc9959_reset, + .wm_enc = vsc9959_wm_enc, }; static int vsc9959_mdio_bus_alloc(struct ocelot *ocelot) diff --git a/drivers/net/ethernet/mscc/ocelot.c b/drivers/net/ethernet/mscc/ocelot.c index 6e68713c0ac6b..1438839e3f6ea 100644 --- a/drivers/net/ethernet/mscc/ocelot.c +++ b/drivers/net/ethernet/mscc/ocelot.c @@ -396,18 +396,6 @@ static void ocelot_vlan_init(struct ocelot *ocelot) } } -/* Watermark encode - * Bit 8: Unit; 0:1, 1:16 - * Bit 7-0: Value to be multiplied with unit - */ -static u16 ocelot_wm_enc(u16 value) -{ - if (value >= BIT(8)) - return BIT(8) | (value / 16); - - return value; -} - void ocelot_adjust_link(struct ocelot *ocelot, int port, struct phy_device *phydev) { @@ -2037,9 +2025,9 @@ void ocelot_port_set_maxlen(struct ocelot *ocelot, int port, size_t sdu) /* Tail dropping watermark */ atop_wm = (ocelot->shared_queue_sz - 9 * maxlen) / OCELOT_BUFFER_CELL_SZ; - ocelot_write_rix(ocelot, ocelot_wm_enc(9 * maxlen), + ocelot_write_rix(ocelot, ocelot->ops->wm_enc(9 * maxlen), SYS_ATOP, port); - ocelot_write(ocelot, ocelot_wm_enc(atop_wm), SYS_ATOP_TOT_CFG); + ocelot_write(ocelot, ocelot->ops->wm_enc(atop_wm), SYS_ATOP_TOT_CFG); } EXPORT_SYMBOL(ocelot_port_set_maxlen); diff --git a/drivers/net/ethernet/mscc/ocelot_vsc7514.c b/drivers/net/ethernet/mscc/ocelot_vsc7514.c index 4a15d2ff8b706..66b58b242f778 100644 --- a/drivers/net/ethernet/mscc/ocelot_vsc7514.c +++ b/drivers/net/ethernet/mscc/ocelot_vsc7514.c @@ -240,8 +240,21 @@ static int ocelot_reset(struct ocelot *ocelot) return 0; } +/* Watermark encode + * Bit 8: Unit; 0:1, 1:16 + * Bit 7-0: Value to be multiplied with unit + */ +static u16 ocelot_wm_enc(u16 value) +{ + if (value >= BIT(8)) + return BIT(8) | (value / 16); + + return value; +} + static const struct ocelot_ops ocelot_ops = { .reset = ocelot_reset, + .wm_enc = ocelot_wm_enc, }; static const struct vcap_field vsc7514_vcap_is2_keys[] = { diff --git a/include/soc/mscc/ocelot.h b/include/soc/mscc/ocelot.h index 4953e9994df34..8e174a24c5757 100644 --- a/include/soc/mscc/ocelot.h +++ b/include/soc/mscc/ocelot.h @@ -468,6 +468,7 @@ struct ocelot; struct ocelot_ops { int (*reset)(struct ocelot *ocelot); + u16 (*wm_enc)(u16 value); }; struct ocelot_acl_block { -- 2.25.1
[PATCH 5.8 019/124] platform/x86: thinkpad_acpi: initialize tp_nvram_state variable
From: Tom Rix commit 5f38b06db8af3ed6c2fc1b427504ca56fae2eacc upstream. clang static analysis flags this represenative problem thinkpad_acpi.c:2523:7: warning: Branch condition evaluates to a garbage value if (!oldn->mute || ^~~ In hotkey_kthread() mute is conditionally set by hotkey_read_nvram() but unconditionally checked by hotkey_compare_and_issue_event(). So the tp_nvram_state variable s[2] needs to be initialized. Fixes: 01e88f25985d ("ACPI: thinkpad-acpi: add CMOS NVRAM polling for hot keys (v9)") Signed-off-by: Tom Rix Reviewed-by: Hans de Goede Acked-by: mark gross Signed-off-by: Andy Shevchenko Signed-off-by: Greg Kroah-Hartman --- drivers/platform/x86/thinkpad_acpi.c |2 +- 1 file changed, 1 insertion(+), 1 deletion(-) --- a/drivers/platform/x86/thinkpad_acpi.c +++ b/drivers/platform/x86/thinkpad_acpi.c @@ -2569,7 +2569,7 @@ static void hotkey_compare_and_issue_eve */ static int hotkey_kthread(void *data) { - struct tp_nvram_state s[2]; + struct tp_nvram_state s[2] = { 0 }; u32 poll_mask, event_mask; unsigned int si, so; unsigned long t;
Re: [PATCH v2 09/17] mm: Add unsafe_follow_pfn
On Mon, Oct 12, 2020 at 12:47 PM Marek Szyprowski wrote: > > Hi Jason, > > On 09.10.2020 14:48, Jason Gunthorpe wrote: > > On Fri, Oct 09, 2020 at 02:37:23PM +0200, Mauro Carvalho Chehab wrote: > > > >> I'm not a mm/ expert, but, from what I understood from Daniel's patch > >> description is that this is unsafe *only if* __GFP_MOVABLE is used. > > No, it is unconditionally unsafe. The CMA movable mappings are > > specific VMAs that will have bad issues here, but there are other > > types too. > > I'm trying to follow this thread, but I really wonder what do you mean > by CMA movable mappings? If a buffer has been allocated from CMA and > used for DMA, it won't be moved in the memory. It will stay at the same > physical memory address all the time until freed by the owner. It just a > matter of proper usage count tracking to delay freeing if it is still > used somewhere. Yup. The problem is that this usage count tracking doesn't exist. And drivers could at least in theory treat CMA like vram and swap buffers in of it, so just refcounting the userspace vma isn't enough. In practice, right now, it might be enough for CMA drivers though (but there's more that's possible here). -Daniel -- Daniel Vetter Software Engineer, Intel Corporation http://blog.ffwll.ch
[PATCH 5.8 107/124] net: mscc: ocelot: divide watermark value by 60 when writing to SYS_ATOP
From: Vladimir Oltean [ Upstream commit 601e984f23abcaa7cf3eb078c13de4db3cf6a4f0 ] Tail dropping is enabled for a port when: 1. A source port consumes more packet buffers than the watermark encoded in SYS:PORT:ATOP_CFG.ATOP. AND 2. Total memory use exceeds the consumption watermark encoded in SYS:PAUSE_CFG:ATOP_TOT_CFG. The unit of these watermarks is a 60 byte memory cell. That unit is programmed properly into ATOP_TOT_CFG, but not into ATOP. Actually when written into ATOP, it would get truncated and wrap around. Fixes: a556c76adc05 ("net: mscc: Add initial Ocelot switch support") Signed-off-by: Vladimir Oltean Signed-off-by: David S. Miller Signed-off-by: Sasha Levin --- drivers/net/ethernet/mscc/ocelot.c | 12 ++-- 1 file changed, 6 insertions(+), 6 deletions(-) diff --git a/drivers/net/ethernet/mscc/ocelot.c b/drivers/net/ethernet/mscc/ocelot.c index 1438839e3f6ea..61bbb7a090042 100644 --- a/drivers/net/ethernet/mscc/ocelot.c +++ b/drivers/net/ethernet/mscc/ocelot.c @@ -2001,7 +2001,7 @@ void ocelot_port_set_maxlen(struct ocelot *ocelot, int port, size_t sdu) struct ocelot_port *ocelot_port = ocelot->ports[port]; int maxlen = sdu + ETH_HLEN + ETH_FCS_LEN; int pause_start, pause_stop; - int atop_wm; + int atop, atop_tot; if (port == ocelot->npi) { maxlen += OCELOT_TAG_LEN; @@ -2022,12 +2022,12 @@ void ocelot_port_set_maxlen(struct ocelot *ocelot, int port, size_t sdu) ocelot_rmw_rix(ocelot, SYS_PAUSE_CFG_PAUSE_STOP(pause_stop), SYS_PAUSE_CFG_PAUSE_STOP_M, SYS_PAUSE_CFG, port); - /* Tail dropping watermark */ - atop_wm = (ocelot->shared_queue_sz - 9 * maxlen) / + /* Tail dropping watermarks */ + atop_tot = (ocelot->shared_queue_sz - 9 * maxlen) / OCELOT_BUFFER_CELL_SZ; - ocelot_write_rix(ocelot, ocelot->ops->wm_enc(9 * maxlen), -SYS_ATOP, port); - ocelot_write(ocelot, ocelot->ops->wm_enc(atop_wm), SYS_ATOP_TOT_CFG); + atop = (9 * maxlen) / OCELOT_BUFFER_CELL_SZ; + ocelot_write_rix(ocelot, ocelot->ops->wm_enc(atop), SYS_ATOP, port); + ocelot_write(ocelot, ocelot->ops->wm_enc(atop_tot), SYS_ATOP_TOT_CFG); } EXPORT_SYMBOL(ocelot_port_set_maxlen); -- 2.25.1
[PATCH 5.8 105/124] net: mscc: ocelot: split writes to pause frame enable bit and to thresholds
From: Vladimir Oltean [ Upstream commit e8e6e73db14273464b374d49ca7242c0994945f3 ] We don't want ocelot_port_set_maxlen to enable pause frame TX, just to adjust the pause thresholds. Move the unconditional enabling of pause TX to ocelot_init_port. There is no good place to put such setting because it shouldn't be unconditional. But at the moment it is, we're not changing that. Signed-off-by: Vladimir Oltean Reviewed-by: Florian Fainelli Signed-off-by: David S. Miller Signed-off-by: Sasha Levin --- drivers/net/ethernet/mscc/ocelot.c | 19 --- 1 file changed, 12 insertions(+), 7 deletions(-) diff --git a/drivers/net/ethernet/mscc/ocelot.c b/drivers/net/ethernet/mscc/ocelot.c index d0b79cca51840..6e68713c0ac6b 100644 --- a/drivers/net/ethernet/mscc/ocelot.c +++ b/drivers/net/ethernet/mscc/ocelot.c @@ -2012,6 +2012,7 @@ void ocelot_port_set_maxlen(struct ocelot *ocelot, int port, size_t sdu) { struct ocelot_port *ocelot_port = ocelot->ports[port]; int maxlen = sdu + ETH_HLEN + ETH_FCS_LEN; + int pause_start, pause_stop; int atop_wm; if (port == ocelot->npi) { @@ -2025,13 +2026,13 @@ void ocelot_port_set_maxlen(struct ocelot *ocelot, int port, size_t sdu) ocelot_port_writel(ocelot_port, maxlen, DEV_MAC_MAXLEN_CFG); - /* Set Pause WM hysteresis -* 152 = 6 * maxlen / OCELOT_BUFFER_CELL_SZ -* 101 = 4 * maxlen / OCELOT_BUFFER_CELL_SZ -*/ - ocelot_write_rix(ocelot, SYS_PAUSE_CFG_PAUSE_ENA | -SYS_PAUSE_CFG_PAUSE_STOP(101) | -SYS_PAUSE_CFG_PAUSE_START(152), SYS_PAUSE_CFG, port); + /* Set Pause watermark hysteresis */ + pause_start = 6 * maxlen / OCELOT_BUFFER_CELL_SZ; + pause_stop = 4 * maxlen / OCELOT_BUFFER_CELL_SZ; + ocelot_rmw_rix(ocelot, SYS_PAUSE_CFG_PAUSE_START(pause_start), + SYS_PAUSE_CFG_PAUSE_START_M, SYS_PAUSE_CFG, port); + ocelot_rmw_rix(ocelot, SYS_PAUSE_CFG_PAUSE_STOP(pause_stop), + SYS_PAUSE_CFG_PAUSE_STOP_M, SYS_PAUSE_CFG, port); /* Tail dropping watermark */ atop_wm = (ocelot->shared_queue_sz - 9 * maxlen) / @@ -2094,6 +2095,10 @@ void ocelot_init_port(struct ocelot *ocelot, int port) ocelot_port_writel(ocelot_port, 0, DEV_MAC_FC_MAC_HIGH_CFG); ocelot_port_writel(ocelot_port, 0, DEV_MAC_FC_MAC_LOW_CFG); + /* Enable transmission of pause frames */ + ocelot_rmw_rix(ocelot, SYS_PAUSE_CFG_PAUSE_ENA, SYS_PAUSE_CFG_PAUSE_ENA, + SYS_PAUSE_CFG, port); + /* Drop frames with multicast source address */ ocelot_rmw_gix(ocelot, ANA_PORT_DROP_CFG_DROP_MC_SMAC_ENA, ANA_PORT_DROP_CFG_DROP_MC_SMAC_ENA, -- 2.25.1
[PATCH 5.8 086/124] net/mlx5: poll cmd EQ in case of command timeout
From: Eran Ben Elisha [ Upstream commit 1d5558b1f0de81f54ddee05f3793acc5260d107f ] Once driver detects a command interface command timeout, it warns the user and returns timeout error to the caller. In such case, the entry of the command is not evacuated (because only real event interrupt is allowed to clear command interface entry). If the HW event interrupt of this entry will never arrive, this entry will be left unused forever. Command interface entries are limited and eventually we can end up without the ability to post a new command. In addition, if driver will not consume the EQE of the lost interrupt and rearm the EQ, no new interrupts will arrive for other commands. Add a resiliency mechanism for manually polling the command EQ in case of a command timeout. In case resiliency mechanism will find non-handled EQE, it will consume it, and the command interface will be fully functional again. Once the resiliency flow finished, wait another 5 seconds for the command interface to complete for this command entry. Define mlx5_cmd_eq_recover() to manage the cmd EQ polling resiliency flow. Add an async EQ spinlock to avoid races between resiliency flows and real interrupts that might run simultaneously. Fixes: e126ba97dba9 ("mlx5: Add driver for Mellanox Connect-IB adapters") Signed-off-by: Eran Ben Elisha Signed-off-by: Saeed Mahameed Signed-off-by: Sasha Levin --- drivers/net/ethernet/mellanox/mlx5/core/cmd.c | 53 --- drivers/net/ethernet/mellanox/mlx5/core/eq.c | 40 +- .../net/ethernet/mellanox/mlx5/core/lib/eq.h | 2 + 3 files changed, 86 insertions(+), 9 deletions(-) diff --git a/drivers/net/ethernet/mellanox/mlx5/core/cmd.c b/drivers/net/ethernet/mellanox/mlx5/core/cmd.c index c0055f5479ce0..37dae95e61d5f 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/cmd.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/cmd.c @@ -853,11 +853,21 @@ static void cb_timeout_handler(struct work_struct *work) struct mlx5_core_dev *dev = container_of(ent->cmd, struct mlx5_core_dev, cmd); + mlx5_cmd_eq_recover(dev); + + /* Maybe got handled by eq recover ? */ + if (!test_bit(MLX5_CMD_ENT_STATE_PENDING_COMP, >state)) { + mlx5_core_warn(dev, "cmd[%d]: %s(0x%x) Async, recovered after timeout\n", ent->idx, + mlx5_command_str(msg_to_opcode(ent->in)), msg_to_opcode(ent->in)); + goto out; /* phew, already handled */ + } + ent->ret = -ETIMEDOUT; - mlx5_core_warn(dev, "%s(0x%x) timeout. Will cause a leak of a command resource\n", - mlx5_command_str(msg_to_opcode(ent->in)), - msg_to_opcode(ent->in)); + mlx5_core_warn(dev, "cmd[%d]: %s(0x%x) Async, timeout. Will cause a leak of a command resource\n", + ent->idx, mlx5_command_str(msg_to_opcode(ent->in)), msg_to_opcode(ent->in)); mlx5_cmd_comp_handler(dev, 1UL << ent->idx, true); + +out: cmd_ent_put(ent); /* for the cmd_ent_get() took on schedule delayed work */ } @@ -997,6 +1007,35 @@ static const char *deliv_status_to_str(u8 status) } } +enum { + MLX5_CMD_TIMEOUT_RECOVER_MSEC = 5 * 1000, +}; + +static void wait_func_handle_exec_timeout(struct mlx5_core_dev *dev, + struct mlx5_cmd_work_ent *ent) +{ + unsigned long timeout = msecs_to_jiffies(MLX5_CMD_TIMEOUT_RECOVER_MSEC); + + mlx5_cmd_eq_recover(dev); + + /* Re-wait on the ent->done after executing the recovery flow. If the +* recovery flow (or any other recovery flow running simultaneously) +* has recovered an EQE, it should cause the entry to be completed by +* the command interface. +*/ + if (wait_for_completion_timeout(>done, timeout)) { + mlx5_core_warn(dev, "cmd[%d]: %s(0x%x) recovered after timeout\n", ent->idx, + mlx5_command_str(msg_to_opcode(ent->in)), msg_to_opcode(ent->in)); + return; + } + + mlx5_core_warn(dev, "cmd[%d]: %s(0x%x) No done completion\n", ent->idx, + mlx5_command_str(msg_to_opcode(ent->in)), msg_to_opcode(ent->in)); + + ent->ret = -ETIMEDOUT; + mlx5_cmd_comp_handler(dev, 1UL << ent->idx, true); +} + static int wait_func(struct mlx5_core_dev *dev, struct mlx5_cmd_work_ent *ent) { unsigned long timeout = msecs_to_jiffies(MLX5_CMD_TIMEOUT_MSEC); @@ -1008,12 +1047,10 @@ static int wait_func(struct mlx5_core_dev *dev, struct mlx5_cmd_work_ent *ent) ent->ret = -ECANCELED; goto out_err; } - if (cmd->mode == CMD_MODE_POLLING || ent->polling) { + if (cmd->mode == CMD_MODE_POLLING || ent->polling) wait_for_completion(>done); - } else if (!wait_for_completion_timeout(>done, timeout)) { - ent->ret = -ETIMEDOUT; -
[GIT PULL] libata updates for 5.10-rc1
Hi Linus, Nothing major in here, just fixes or improvements collected over the last few months. Please pull! The following changes since commit a1b8638ba1320e6684aa98233c15255eb803fac7: Linux 5.9-rc7 (2020-09-27 14:38:10 -0700) are available in the Git repository at: git://git.kernel.dk/linux-block.git tags/libata-5.10-2020-10-12 for you to fetch changes up to 45aefe3d2251e4e229d7662052739f96ad1d08d9: ata: ahci: mvebu: Make SATA PHY optional for Armada 3720 (2020-10-09 12:47:56 -0600) libata-5.10-2020-10-12 Bartlomiej Zolnierkiewicz (1): MAINTAINERS: remove LIBATA PATA DRIVERS entry Gustavo A. R. Silva (1): pata_cmd64x: Use fallthrough pseudo-keyword Liu Shixin (1): sata, highbank: simplify the return expression of ahci_highbank_suspend Mika Westerberg (1): ahci: Add Intel Rocket Lake PCH-H RAID PCI IDs Pali Rohár (1): ata: ahci: mvebu: Make SATA PHY optional for Armada 3720 Yuantian Tang (1): ahci: qoriq: enable acpi support in qoriq ahci driver MAINTAINERS| 9 - drivers/ata/ahci.c | 4 drivers/ata/ahci.h | 2 ++ drivers/ata/ahci_mvebu.c | 2 +- drivers/ata/ahci_qoriq.c | 20 +--- drivers/ata/libahci_platform.c | 2 +- drivers/ata/pata_cmd64x.c | 2 +- drivers/ata/sata_highbank.c| 7 +-- 8 files changed, 27 insertions(+), 21 deletions(-) -- Jens Axboe
[PATCH 5.8 090/124] net/mlx5e: Fix return status when setting unsupported FEC mode
From: Aya Levin [ Upstream commit 2608a2f831c47dfdf18885a7289be5af97182b05 ] Verify the configured FEC mode is supported by at least a single link mode before applying the command. Otherwise fail the command and return "Operation not supported". Prior to this patch, the command was successful, yet it falsely set all link modes to FEC auto mode - like configuring FEC mode to auto. Auto mode is the default configuration if a link mode doesn't support the configured FEC mode. Fixes: b5ede32d3329 ("net/mlx5e: Add support for FEC modes based on 50G per lane links") Signed-off-by: Aya Levin Reviewed-by: Eran Ben Elisha Reviewed-by: Moshe Shemesh Signed-off-by: Saeed Mahameed Signed-off-by: Sasha Levin --- drivers/net/ethernet/mellanox/mlx5/core/en/port.c | 3 +++ 1 file changed, 3 insertions(+) diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en/port.c b/drivers/net/ethernet/mellanox/mlx5/core/en/port.c index 98e909bf3c1ec..3e32264cf6131 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en/port.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/en/port.c @@ -566,6 +566,9 @@ int mlx5e_set_fec_mode(struct mlx5_core_dev *dev, u16 fec_policy) if (fec_policy >= (1 << MLX5E_FEC_LLRS_272_257_1) && !fec_50g_per_lane) return -EOPNOTSUPP; + if (fec_policy && !mlx5e_fec_in_caps(dev, fec_policy)) + return -EOPNOTSUPP; + MLX5_SET(pplm_reg, in, local_port, 1); err = mlx5_core_access_reg(dev, in, sz, out, sz, MLX5_REG_PPLM, 0, 0); if (err) -- 2.25.1
[PATCH 5.8 114/124] netlink: fix policy dump leak
From: Johannes Berg commit a95bc734e60449e7b073ff7ff70c35083b290ae9 upstream. If userspace doesn't complete the policy dump, we leak the allocated state. Fix this. Fixes: d07dcf9aadd6 ("netlink: add infrastructure to expose policies to userspace") Signed-off-by: Johannes Berg Reviewed-by: Jakub Kicinski Signed-off-by: David S. Miller Signed-off-by: Greg Kroah-Hartman --- include/net/netlink.h |3 ++- net/netlink/genetlink.c |9 - net/netlink/policy.c| 24 ++-- 3 files changed, 20 insertions(+), 16 deletions(-) --- a/include/net/netlink.h +++ b/include/net/netlink.h @@ -1936,7 +1936,8 @@ void nla_get_range_signed(const struct n int netlink_policy_dump_start(const struct nla_policy *policy, unsigned int maxtype, unsigned long *state); -bool netlink_policy_dump_loop(unsigned long *state); +bool netlink_policy_dump_loop(unsigned long state); int netlink_policy_dump_write(struct sk_buff *skb, unsigned long state); +void netlink_policy_dump_free(unsigned long state); #endif --- a/net/netlink/genetlink.c +++ b/net/netlink/genetlink.c @@ -1079,7 +1079,7 @@ static int ctrl_dumppolicy(struct sk_buf if (err) return err; - while (netlink_policy_dump_loop(>args[1])) { + while (netlink_policy_dump_loop(cb->args[1])) { void *hdr; struct nlattr *nest; @@ -1113,6 +1113,12 @@ nla_put_failure: return skb->len; } +static int ctrl_dumppolicy_done(struct netlink_callback *cb) +{ + netlink_policy_dump_free(cb->args[1]); + return 0; +} + static const struct genl_ops genl_ctrl_ops[] = { { .cmd= CTRL_CMD_GETFAMILY, @@ -1123,6 +1129,7 @@ static const struct genl_ops genl_ctrl_o { .cmd= CTRL_CMD_GETPOLICY, .dumpit = ctrl_dumppolicy, + .done = ctrl_dumppolicy_done, }, }; --- a/net/netlink/policy.c +++ b/net/netlink/policy.c @@ -84,7 +84,6 @@ int netlink_policy_dump_start(const stru unsigned int policy_idx; int err; - /* also returns 0 if "*_state" is our ERR_PTR() end marker */ if (*_state) return 0; @@ -140,21 +139,11 @@ static bool netlink_policy_dump_finished !state->policies[state->policy_idx].policy; } -bool netlink_policy_dump_loop(unsigned long *_state) +bool netlink_policy_dump_loop(unsigned long _state) { - struct nl_policy_dump *state = (void *)*_state; - - if (IS_ERR(state)) - return false; - - if (netlink_policy_dump_finished(state)) { - kfree(state); - /* store end marker instead of freed state */ - *_state = (unsigned long)ERR_PTR(-ENOENT); - return false; - } + struct nl_policy_dump *state = (void *)_state; - return true; + return !netlink_policy_dump_finished(state); } int netlink_policy_dump_write(struct sk_buff *skb, unsigned long _state) @@ -309,3 +298,10 @@ nla_put_failure: nla_nest_cancel(skb, policy); return -ENOBUFS; } + +void netlink_policy_dump_free(unsigned long _state) +{ + struct nl_policy_dump *state = (void *)_state; + + kfree(state); +}
[PATCH 5.8 084/124] net/mlx5: Fix a race when moving command interface to polling mode
From: Eran Ben Elisha [ Upstream commit 432161ea26d6d5e5c3f7306d9407d26ed1e1953e ] As part of driver unload, it destroys the commands EQ (via FW command). As the commands EQ is destroyed, FW will not generate EQEs for any command that driver sends afterwards. Driver should poll for later commands status. Driver commands mode metadata is updated before the commands EQ is actually destroyed. This can lead for double completion handle by the driver (polling and interrupt), if a command is executed and completed by FW after the mode was changed, but before the EQ was destroyed. Fix that by using the mlx5_cmd_allowed_opcode mechanism to guarantee that only DESTROY_EQ command can be executed during this time period. Fixes: e126ba97dba9 ("mlx5: Add driver for Mellanox Connect-IB adapters") Signed-off-by: Eran Ben Elisha Reviewed-by: Moshe Shemesh Signed-off-by: Saeed Mahameed Signed-off-by: Sasha Levin --- drivers/net/ethernet/mellanox/mlx5/core/eq.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/drivers/net/ethernet/mellanox/mlx5/core/eq.c b/drivers/net/ethernet/mellanox/mlx5/core/eq.c index 31ef9f8420c87..1318d774b18f2 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/eq.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/eq.c @@ -656,8 +656,10 @@ static void destroy_async_eqs(struct mlx5_core_dev *dev) cleanup_async_eq(dev, >pages_eq, "pages"); cleanup_async_eq(dev, >async_eq, "async"); + mlx5_cmd_allowed_opcode(dev, MLX5_CMD_OP_DESTROY_EQ); mlx5_cmd_use_polling(dev); cleanup_async_eq(dev, >cmd_eq, "cmd"); + mlx5_cmd_allowed_opcode(dev, CMD_ALLOWED_OPCODE_ALL); mlx5_eq_notifier_unregister(dev, >cq_err_nb); } -- 2.25.1
[PATCH 5.8 108/124] afs: Fix deadlock between writeback and truncate
From: David Howells [ Upstream commit ec0fa0b659144d9c68204d23f627b6a65fa53e50 ] The afs filesystem has a lock[*] that it uses to serialise I/O operations going to the server (vnode->io_lock), as the server will only perform one modification operation at a time on any given file or directory. This prevents the the filesystem from filling up all the call slots to a server with calls that aren't going to be executed in parallel anyway, thereby allowing operations on other files to obtain slots. [*] Note that is probably redundant for directories at least since i_rwsem is used to serialise directory modifications and lookup/reading vs modification. The server does allow parallel non-modification ops, however. When a file truncation op completes, we truncate the in-memory copy of the file to match - but we do it whilst still holding the io_lock, the idea being to prevent races with other operations. However, if writeback starts in a worker thread simultaneously with truncation (whilst notify_change() is called with i_rwsem locked, writeback pays it no heed), it may manage to set PG_writeback bits on the pages that will get truncated before afs_setattr_success() manages to call truncate_pagecache(). Truncate will then wait for those pages - whilst still inside io_lock: # cat /proc/8837/stack [<0>] wait_on_page_bit_common+0x184/0x1e7 [<0>] truncate_inode_pages_range+0x37f/0x3eb [<0>] truncate_pagecache+0x3c/0x53 [<0>] afs_setattr_success+0x4d/0x6e [<0>] afs_wait_for_operation+0xd8/0x169 [<0>] afs_do_sync_operation+0x16/0x1f [<0>] afs_setattr+0x1fb/0x25d [<0>] notify_change+0x2cf/0x3c4 [<0>] do_truncate+0x7f/0xb2 [<0>] do_sys_ftruncate+0xd1/0x104 [<0>] do_syscall_64+0x2d/0x3a [<0>] entry_SYSCALL_64_after_hwframe+0x44/0xa9 The writeback operation, however, stalls indefinitely because it needs to get the io_lock to proceed: # cat /proc/5940/stack [<0>] afs_get_io_locks+0x58/0x1ae [<0>] afs_begin_vnode_operation+0xc7/0xd1 [<0>] afs_store_data+0x1b2/0x2a3 [<0>] afs_write_back_from_locked_page+0x418/0x57c [<0>] afs_writepages_region+0x196/0x224 [<0>] afs_writepages+0x74/0x156 [<0>] do_writepages+0x2d/0x56 [<0>] __writeback_single_inode+0x84/0x207 [<0>] writeback_sb_inodes+0x238/0x3cf [<0>] __writeback_inodes_wb+0x68/0x9f [<0>] wb_writeback+0x145/0x26c [<0>] wb_do_writeback+0x16a/0x194 [<0>] wb_workfn+0x74/0x177 [<0>] process_one_work+0x174/0x264 [<0>] worker_thread+0x117/0x1b9 [<0>] kthread+0xec/0xf1 [<0>] ret_from_fork+0x1f/0x30 and thus deadlock has occurred. Note that whilst afs_setattr() calls filemap_write_and_wait(), the fact that the caller is holding i_rwsem doesn't preclude more pages being dirtied through an mmap'd region. Fix this by: (1) Use the vnode validate_lock to mediate access between afs_setattr() and afs_writepages(): (a) Exclusively lock validate_lock in afs_setattr() around the whole RPC operation. (b) If WB_SYNC_ALL isn't set on entry to afs_writepages(), trying to shared-lock validate_lock and returning immediately if we couldn't get it. (c) If WB_SYNC_ALL is set, wait for the lock. The validate_lock is also used to validate a file and to zap its cache if the file was altered by a third party, so it's probably a good fit for this. (2) Move the truncation outside of the io_lock in setattr, using the same hook as is used for local directory editing. This requires the old i_size to be retained in the operation record as we commit the revised status to the inode members inside the io_lock still, but we still need to know if we reduced the file size. Fixes: d2ddc776a458 ("afs: Overhaul volume and server record caching and fileserver rotation") Signed-off-by: David Howells Signed-off-by: Linus Torvalds Signed-off-by: Sasha Levin --- fs/afs/inode.c| 47 ++- fs/afs/internal.h | 1 + fs/afs/write.c| 11 +++ 3 files changed, 50 insertions(+), 9 deletions(-) diff --git a/fs/afs/inode.c b/fs/afs/inode.c index 1d13d2e882ada..0fe8844b4bee2 100644 --- a/fs/afs/inode.c +++ b/fs/afs/inode.c @@ -810,14 +810,32 @@ void afs_evict_inode(struct inode *inode) static void afs_setattr_success(struct afs_operation *op) { - struct inode *inode = >file[0].vnode->vfs_inode; + struct afs_vnode_param *vp = >file[0]; + struct inode *inode = >vnode->vfs_inode; + loff_t old_i_size = i_size_read(inode); + + op->setattr.old_i_size = old_i_size; + afs_vnode_commit_status(op, vp); + /* inode->i_size has now been changed. */ + + if (op->setattr.attr->ia_valid & ATTR_SIZE) { + loff_t size = op->setattr.attr->ia_size; + if (size > old_i_size) + pagecache_isize_extended(inode, old_i_size, size); + } +} + +static void
[PATCH 5.8 087/124] net/mlx5: Add retry mechanism to the command entry index allocation
From: Eran Ben Elisha [ Upstream commit 410bd754cd73c4a2ac3856d9a03d7b08f9c906bf ] It is possible that new command entry index allocation will temporarily fail. The new command holds the semaphore, so it means that a free entry should be ready soon. Add one second retry mechanism before returning an error. Patch "net/mlx5: Avoid possible free of command entry while timeout comp handler" increase the possibility to bump into this temporarily failure as it delays the entry index release for non-callback commands. Fixes: e126ba97dba9 ("mlx5: Add driver for Mellanox Connect-IB adapters") Signed-off-by: Eran Ben Elisha Reviewed-by: Moshe Shemesh Signed-off-by: Saeed Mahameed Signed-off-by: Sasha Levin --- drivers/net/ethernet/mellanox/mlx5/core/cmd.c | 21 ++- 1 file changed, 20 insertions(+), 1 deletion(-) diff --git a/drivers/net/ethernet/mellanox/mlx5/core/cmd.c b/drivers/net/ethernet/mellanox/mlx5/core/cmd.c index 37dae95e61d5f..2b597ac365f84 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/cmd.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/cmd.c @@ -883,6 +883,25 @@ static bool opcode_allowed(struct mlx5_cmd *cmd, u16 opcode) return cmd->allowed_opcode == opcode; } +static int cmd_alloc_index_retry(struct mlx5_cmd *cmd) +{ + unsigned long alloc_end = jiffies + msecs_to_jiffies(1000); + int idx; + +retry: + idx = cmd_alloc_index(cmd); + if (idx < 0 && time_before(jiffies, alloc_end)) { + /* Index allocation can fail on heavy load of commands. This is a temporary +* situation as the current command already holds the semaphore, meaning that +* another command completion is being handled and it is expected to release +* the entry index soon. +*/ + cpu_relax(); + goto retry; + } + return idx; +} + static void cmd_work_handler(struct work_struct *work) { struct mlx5_cmd_work_ent *ent = container_of(work, struct mlx5_cmd_work_ent, work); @@ -900,7 +919,7 @@ static void cmd_work_handler(struct work_struct *work) sem = ent->page_queue ? >pages_sem : >sem; down(sem); if (!ent->page_queue) { - alloc_ret = cmd_alloc_index(cmd); + alloc_ret = cmd_alloc_index_retry(cmd); if (alloc_ret < 0) { mlx5_core_err_rl(dev, "failed to allocate command entry\n"); if (ent->callback) { -- 2.25.1
[PATCH 5.8 116/124] net: bridge: fdb: dont flush ext_learn entries
From: Nikolay Aleksandrov commit f2f3729fb65c5c2e6db234e6316b71a7bdc4b30b upstream. When a user-space software manages fdb entries externally it should set the ext_learn flag which marks the fdb entry as externally managed and avoids expiring it (they're treated as static fdbs). Unfortunately on events where fdb entries are flushed (STP down, netlink fdb flush etc) these fdbs are also deleted automatically by the bridge. That in turn causes trouble for the managing user-space software (e.g. in MLAG setups we lose remote fdb entries on port flaps). These entries are completely externally managed so we should avoid automatically deleting them, the only exception are offloaded entries (i.e. BR_FDB_ADDED_BY_EXT_LEARN + BR_FDB_OFFLOADED). They are flushed as before. Fixes: eb100e0e24a2 ("net: bridge: allow to add externally learned entries from user-space") Signed-off-by: Nikolay Aleksandrov Signed-off-by: David S. Miller Signed-off-by: Greg Kroah-Hartman --- net/bridge/br_fdb.c |2 ++ 1 file changed, 2 insertions(+) --- a/net/bridge/br_fdb.c +++ b/net/bridge/br_fdb.c @@ -404,6 +404,8 @@ void br_fdb_delete_by_port(struct net_br if (!do_all) if (test_bit(BR_FDB_STATIC, >flags) || + (test_bit(BR_FDB_ADDED_BY_EXT_LEARN, >flags) && +!test_bit(BR_FDB_OFFLOADED, >flags)) || (vid && f->key.vlan_id != vid)) continue;
[PATCH 5.8 104/124] net: mscc: ocelot: rename ocelot_board.c to ocelot_vsc7514.c
From: Vladimir Oltean [ Upstream commit 589aa6e7c9de322d47eb33a5cee8cc38838319e6 ] To follow the model of felix and seville where we have one platform-specific file, rename this file to the actual SoC it serves. Signed-off-by: Vladimir Oltean Signed-off-by: David S. Miller Signed-off-by: Sasha Levin --- drivers/net/ethernet/mscc/Makefile | 2 +- drivers/net/ethernet/mscc/{ocelot_board.c => ocelot_vsc7514.c} | 0 2 files changed, 1 insertion(+), 1 deletion(-) rename drivers/net/ethernet/mscc/{ocelot_board.c => ocelot_vsc7514.c} (100%) diff --git a/drivers/net/ethernet/mscc/Makefile b/drivers/net/ethernet/mscc/Makefile index 91b33b55054e1..ad97a5cca6f99 100644 --- a/drivers/net/ethernet/mscc/Makefile +++ b/drivers/net/ethernet/mscc/Makefile @@ -2,4 +2,4 @@ obj-$(CONFIG_MSCC_OCELOT_SWITCH) += mscc_ocelot_common.o mscc_ocelot_common-y := ocelot.o ocelot_io.o mscc_ocelot_common-y += ocelot_regs.o ocelot_tc.o ocelot_police.o ocelot_ace.o ocelot_flower.o ocelot_ptp.o -obj-$(CONFIG_MSCC_OCELOT_SWITCH_OCELOT) += ocelot_board.o +obj-$(CONFIG_MSCC_OCELOT_SWITCH_OCELOT) += ocelot_vsc7514.o diff --git a/drivers/net/ethernet/mscc/ocelot_board.c b/drivers/net/ethernet/mscc/ocelot_vsc7514.c similarity index 100% rename from drivers/net/ethernet/mscc/ocelot_board.c rename to drivers/net/ethernet/mscc/ocelot_vsc7514.c -- 2.25.1
[PATCH 5.8 095/124] net: hinic: fix DEVLINK build errors
From: Randy Dunlap [ Upstream commit 1f7e877c20517735bceff1535e1b7fa846b2f215 ] Fix many (lots deleted here) build errors in hinic by selecting NET_DEVLINK. ld: drivers/net/ethernet/huawei/hinic/hinic_hw_dev.o: in function `mgmt_watchdog_timeout_event_handler': hinic_hw_dev.c:(.text+0x30a): undefined reference to `devlink_health_report' ld: drivers/net/ethernet/huawei/hinic/hinic_devlink.o: in function `hinic_fw_reporter_dump': hinic_devlink.c:(.text+0x1c): undefined reference to `devlink_fmsg_u32_pair_put' ld: drivers/net/ethernet/huawei/hinic/hinic_devlink.o: in function `hinic_fw_reporter_dump': hinic_devlink.c:(.text+0x126): undefined reference to `devlink_fmsg_binary_pair_put' ld: drivers/net/ethernet/huawei/hinic/hinic_devlink.o: in function `hinic_hw_reporter_dump': hinic_devlink.c:(.text+0x1ba): undefined reference to `devlink_fmsg_string_pair_put' ld: hinic_devlink.c:(.text+0x227): undefined reference to `devlink_fmsg_u8_pair_put' ld: drivers/net/ethernet/huawei/hinic/hinic_devlink.o: in function `hinic_devlink_alloc': hinic_devlink.c:(.text+0xaee): undefined reference to `devlink_alloc' ld: drivers/net/ethernet/huawei/hinic/hinic_devlink.o: in function `hinic_devlink_free': hinic_devlink.c:(.text+0xb04): undefined reference to `devlink_free' ld: drivers/net/ethernet/huawei/hinic/hinic_devlink.o: in function `hinic_devlink_register': hinic_devlink.c:(.text+0xb26): undefined reference to `devlink_register' ld: drivers/net/ethernet/huawei/hinic/hinic_devlink.o: in function `hinic_devlink_unregister': hinic_devlink.c:(.text+0xb46): undefined reference to `devlink_unregister' ld: drivers/net/ethernet/huawei/hinic/hinic_devlink.o: in function `hinic_health_reporters_create': hinic_devlink.c:(.text+0xb75): undefined reference to `devlink_health_reporter_create' ld: hinic_devlink.c:(.text+0xb95): undefined reference to `devlink_health_reporter_create' ld: hinic_devlink.c:(.text+0xbac): undefined reference to `devlink_health_reporter_destroy' ld: drivers/net/ethernet/huawei/hinic/hinic_devlink.o: in function `hinic_health_reporters_destroy': Fixes: 51ba902a16e6 ("net-next/hinic: Initialize hw interface") Signed-off-by: Randy Dunlap Cc: Bin Luo Cc: "David S. Miller" Cc: Jakub Kicinski Cc: Aviad Krawczyk Cc: Zhao Chen Signed-off-by: David S. Miller Signed-off-by: Sasha Levin --- drivers/net/ethernet/huawei/hinic/Kconfig | 1 + 1 file changed, 1 insertion(+) diff --git a/drivers/net/ethernet/huawei/hinic/Kconfig b/drivers/net/ethernet/huawei/hinic/Kconfig index 936e2dd3bb135..b47bd5440c5f0 100644 --- a/drivers/net/ethernet/huawei/hinic/Kconfig +++ b/drivers/net/ethernet/huawei/hinic/Kconfig @@ -6,6 +6,7 @@ config HINIC tristate "Huawei Intelligent PCIE Network Interface Card" depends on (PCI_MSI && (X86 || ARM64)) + select NET_DEVLINK help This driver supports HiNIC PCIE Ethernet cards. To compile this driver as part of the kernel, choose Y here. -- 2.25.1
[PATCH 5.8 103/124] rxrpc: Fix server keyring leak
From: David Howells [ Upstream commit 38b1dc47a35ba14c3f4472138ea56d014c2d609b ] If someone calls setsockopt() twice to set a server key keyring, the first keyring is leaked. Fix it to return an error instead if the server key keyring is already set. Fixes: 17926a79320a ("[AF_RXRPC]: Provide secure RxRPC sockets for use by userspace and kernel both") Signed-off-by: David Howells Signed-off-by: Sasha Levin --- net/rxrpc/key.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/net/rxrpc/key.c b/net/rxrpc/key.c index 64cbbd2f16944..85a9ff8cd236a 100644 --- a/net/rxrpc/key.c +++ b/net/rxrpc/key.c @@ -903,7 +903,7 @@ int rxrpc_request_key(struct rxrpc_sock *rx, char __user *optval, int optlen) _enter(""); - if (optlen <= 0 || optlen > PAGE_SIZE - 1) + if (optlen <= 0 || optlen > PAGE_SIZE - 1 || rx->securities) return -EINVAL; description = memdup_user_nul(optval, optlen); -- 2.25.1
[PATCH] sched/cputime: correct account of irqtime
__do_softirq() may be interrupted by hardware interrupts. In this case, irqtime_account_irq() will account the time slice as CPUTIME_SOFTIRQ by mistake. By passing irqtime_account_irq() an extra param about either hardirq or softirq, irqtime_account_irq() can handle the above case. Signed-off-by: Pingfan Liu Cc: Ingo Molnar Cc: Peter Zijlstra Cc: Juri Lelli Cc: Vincent Guittot Cc: Dietmar Eggemann Cc: Steven Rostedt Cc: Ben Segall Cc: Mel Gorman Cc: Thomas Gleixner Cc: Andy Lutomirski Cc: Will Deacon Cc: "Paul E. McKenney" Cc: Frederic Weisbecker Cc: Allen Pais Cc: Romain Perier To: linux-kernel@vger.kernel.org --- include/linux/hardirq.h | 4 ++-- include/linux/vtime.h | 12 ++-- kernel/sched/cputime.c | 4 ++-- kernel/softirq.c| 6 +++--- 4 files changed, 13 insertions(+), 13 deletions(-) diff --git a/include/linux/hardirq.h b/include/linux/hardirq.h index 754f67a..56e7bb5 100644 --- a/include/linux/hardirq.h +++ b/include/linux/hardirq.h @@ -32,7 +32,7 @@ static __always_inline void rcu_irq_enter_check_tick(void) */ #define __irq_enter() \ do {\ - account_irq_enter_time(current);\ + account_irq_enter_time(current, true); \ preempt_count_add(HARDIRQ_OFFSET); \ lockdep_hardirq_enter();\ } while (0) @@ -63,7 +63,7 @@ void irq_enter_rcu(void); #define __irq_exit() \ do {\ lockdep_hardirq_exit(); \ - account_irq_exit_time(current); \ + account_irq_exit_time(current, true); \ preempt_count_sub(HARDIRQ_OFFSET); \ } while (0) diff --git a/include/linux/vtime.h b/include/linux/vtime.h index 2cdeca0..294188ae1 100644 --- a/include/linux/vtime.h +++ b/include/linux/vtime.h @@ -98,21 +98,21 @@ static inline void vtime_flush(struct task_struct *tsk) { } #ifdef CONFIG_IRQ_TIME_ACCOUNTING -extern void irqtime_account_irq(struct task_struct *tsk); +extern void irqtime_account_irq(struct task_struct *tsk, bool hardirq); #else -static inline void irqtime_account_irq(struct task_struct *tsk) { } +static inline void irqtime_account_irq(struct task_struct *tsk, bool hardirq) { } #endif -static inline void account_irq_enter_time(struct task_struct *tsk) +static inline void account_irq_enter_time(struct task_struct *tsk, bool hardirq) { vtime_account_irq_enter(tsk); - irqtime_account_irq(tsk); + irqtime_account_irq(tsk, hardirq); } -static inline void account_irq_exit_time(struct task_struct *tsk) +static inline void account_irq_exit_time(struct task_struct *tsk, bool hardirq) { vtime_account_irq_exit(tsk); - irqtime_account_irq(tsk); + irqtime_account_irq(tsk, hardirq); } #endif /* _LINUX_KERNEL_VTIME_H */ diff --git a/kernel/sched/cputime.c b/kernel/sched/cputime.c index 5a55d23..166f1d7 100644 --- a/kernel/sched/cputime.c +++ b/kernel/sched/cputime.c @@ -47,7 +47,7 @@ static void irqtime_account_delta(struct irqtime *irqtime, u64 delta, * Called before incrementing preempt_count on {soft,}irq_enter * and before decrementing preempt_count on {soft,}irq_exit. */ -void irqtime_account_irq(struct task_struct *curr) +void irqtime_account_irq(struct task_struct *curr, bool hardirq) { struct irqtime *irqtime = this_cpu_ptr(_irqtime); s64 delta; @@ -68,7 +68,7 @@ void irqtime_account_irq(struct task_struct *curr) */ if (hardirq_count()) irqtime_account_delta(irqtime, delta, CPUTIME_IRQ); - else if (in_serving_softirq() && curr != this_cpu_ksoftirqd()) + else if (in_serving_softirq() && curr != this_cpu_ksoftirqd() && !hardirq) irqtime_account_delta(irqtime, delta, CPUTIME_SOFTIRQ); } EXPORT_SYMBOL_GPL(irqtime_account_irq); diff --git a/kernel/softirq.c b/kernel/softirq.c index bf88d7f6..da59ea39 100644 --- a/kernel/softirq.c +++ b/kernel/softirq.c @@ -270,7 +270,7 @@ asmlinkage __visible void __softirq_entry __do_softirq(void) current->flags &= ~PF_MEMALLOC; pending = local_softirq_pending(); - account_irq_enter_time(current); + account_irq_enter_time(current, false); __local_bh_disable_ip(_RET_IP_, SOFTIRQ_OFFSET); in_hardirq = lockdep_softirq_start(); @@ -321,7 +321,7 @@ asmlinkage __visible void __softirq_entry __do_softirq(void) } lockdep_softirq_end(in_hardirq); - account_irq_exit_time(current); + account_irq_exit_time(current, false); __local_bh_enable(SOFTIRQ_OFFSET); WARN_ON_ONCE(in_interrupt()); current_restore_flags(old_flags, PF_MEMALLOC); @@ -417,7 +417,7 @@ static inline void __irq_exit_rcu(void) #else lockdep_assert_irqs_disabled(); #endif -
[PATCH 5.8 094/124] net: stmmac: Modify configuration method of EEE timers
From: Vineetha G. Jaya Kumaran [ Upstream commit 388e201d41fa1ed8f2dce0f0567f56f8e919ffb0 ] Ethtool manual stated that the tx-timer is the "the amount of time the device should stay in idle mode prior to asserting its Tx LPI". The previous implementation for "ethtool --set-eee tx-timer" sets the LPI TW timer duration which is not correct. Hence, this patch fixes the "ethtool --set-eee tx-timer" to configure the EEE LPI timer. The LPI TW Timer will be using the defined default value instead of "ethtool --set-eee tx-timer" which follows the EEE LS timer implementation. Changelog V2 *Not removing/modifying the eee_timer. *EEE LPI timer can be configured through ethtool and also the eee_timer module param. *EEE TW Timer will be configured with default value only, not able to be configured through ethtool or module param. This follows the implementation of the EEE LS Timer. Fixes: d765955d2ae0 ("stmmac: add the Energy Efficient Ethernet support") Signed-off-by: Vineetha G. Jaya Kumaran Signed-off-by: Voon Weifeng Signed-off-by: David S. Miller Signed-off-by: Sasha Levin --- drivers/net/ethernet/stmicro/stmmac/stmmac.h | 2 ++ .../ethernet/stmicro/stmmac/stmmac_ethtool.c | 12 +- .../net/ethernet/stmicro/stmmac/stmmac_main.c | 23 --- 3 files changed, 28 insertions(+), 9 deletions(-) diff --git a/drivers/net/ethernet/stmicro/stmmac/stmmac.h b/drivers/net/ethernet/stmicro/stmmac/stmmac.h index 9c02fc754bf1b..545696971f65e 100644 --- a/drivers/net/ethernet/stmicro/stmmac/stmmac.h +++ b/drivers/net/ethernet/stmicro/stmmac/stmmac.h @@ -203,6 +203,8 @@ struct stmmac_priv { int eee_enabled; int eee_active; int tx_lpi_timer; + int tx_lpi_enabled; + int eee_tw_timer; unsigned int mode; unsigned int chain_mode; int extend_desc; diff --git a/drivers/net/ethernet/stmicro/stmmac/stmmac_ethtool.c b/drivers/net/ethernet/stmicro/stmmac/stmmac_ethtool.c index c16d0cc3e9c44..b82c6715f95f3 100644 --- a/drivers/net/ethernet/stmicro/stmmac/stmmac_ethtool.c +++ b/drivers/net/ethernet/stmicro/stmmac/stmmac_ethtool.c @@ -652,6 +652,7 @@ static int stmmac_ethtool_op_get_eee(struct net_device *dev, edata->eee_enabled = priv->eee_enabled; edata->eee_active = priv->eee_active; edata->tx_lpi_timer = priv->tx_lpi_timer; + edata->tx_lpi_enabled = priv->tx_lpi_enabled; return phylink_ethtool_get_eee(priv->phylink, edata); } @@ -665,6 +666,10 @@ static int stmmac_ethtool_op_set_eee(struct net_device *dev, if (!priv->dma_cap.eee) return -EOPNOTSUPP; + if (priv->tx_lpi_enabled != edata->tx_lpi_enabled) + netdev_warn(priv->dev, + "Setting EEE tx-lpi is not supported\n"); + if (!edata->eee_enabled) stmmac_disable_eee_mode(priv); @@ -672,7 +677,12 @@ static int stmmac_ethtool_op_set_eee(struct net_device *dev, if (ret) return ret; - priv->tx_lpi_timer = edata->tx_lpi_timer; + if (edata->eee_enabled && + priv->tx_lpi_timer != edata->tx_lpi_timer) { + priv->tx_lpi_timer = edata->tx_lpi_timer; + stmmac_eee_init(priv); + } + return 0; } diff --git a/drivers/net/ethernet/stmicro/stmmac/stmmac_main.c b/drivers/net/ethernet/stmicro/stmmac/stmmac_main.c index 73677c3b33b65..73465e5f5a417 100644 --- a/drivers/net/ethernet/stmicro/stmmac/stmmac_main.c +++ b/drivers/net/ethernet/stmicro/stmmac/stmmac_main.c @@ -94,7 +94,7 @@ static const u32 default_msg_level = (NETIF_MSG_DRV | NETIF_MSG_PROBE | static int eee_timer = STMMAC_DEFAULT_LPI_TIMER; module_param(eee_timer, int, 0644); MODULE_PARM_DESC(eee_timer, "LPI tx expiration time in msec"); -#define STMMAC_LPI_T(x) (jiffies + msecs_to_jiffies(x)) +#define STMMAC_LPI_T(x) (jiffies + usecs_to_jiffies(x)) /* By default the driver will use the ring mode to manage tx and rx descriptors, * but allow user to force to use the chain instead of the ring @@ -370,7 +370,7 @@ static void stmmac_eee_ctrl_timer(struct timer_list *t) struct stmmac_priv *priv = from_timer(priv, t, eee_ctrl_timer); stmmac_enable_eee_mode(priv); - mod_timer(>eee_ctrl_timer, STMMAC_LPI_T(eee_timer)); + mod_timer(>eee_ctrl_timer, STMMAC_LPI_T(priv->tx_lpi_timer)); } /** @@ -383,7 +383,7 @@ static void stmmac_eee_ctrl_timer(struct timer_list *t) */ bool stmmac_eee_init(struct stmmac_priv *priv) { - int tx_lpi_timer = priv->tx_lpi_timer; + int eee_tw_timer = priv->eee_tw_timer; /* Using PCS we cannot dial with the phy registers at this stage * so we do not support extra feature like EEE. @@ -403,7 +403,7 @@ bool stmmac_eee_init(struct stmmac_priv *priv) if (priv->eee_enabled) { netdev_dbg(priv->dev, "disable EEE\n"); del_timer_sync(>eee_ctrl_timer); -
[PATCH 5.8 102/124] rxrpc: The server keyring isnt network-namespaced
From: David Howells [ Upstream commit fea99111244bae44e7d82a973744d27ea1567814 ] The keyring containing the server's tokens isn't network-namespaced, so it shouldn't be looked up with a network namespace. It is expected to be owned specifically by the server, so namespacing is unnecessary. Fixes: a58946c158a0 ("keys: Pass the network namespace into request_key mechanism") Signed-off-by: David Howells Signed-off-by: Sasha Levin --- net/rxrpc/key.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/net/rxrpc/key.c b/net/rxrpc/key.c index 32f46edcf7c67..64cbbd2f16944 100644 --- a/net/rxrpc/key.c +++ b/net/rxrpc/key.c @@ -941,7 +941,7 @@ int rxrpc_server_keyring(struct rxrpc_sock *rx, char __user *optval, if (IS_ERR(description)) return PTR_ERR(description); - key = request_key_net(_type_keyring, description, sock_net(>sk), NULL); + key = request_key(_type_keyring, description, NULL); if (IS_ERR(key)) { kfree(description); _leave(" = %ld", PTR_ERR(key)); -- 2.25.1
[PATCH 5.8 092/124] net/mlx5e: Fix VLAN create flow
From: Aya Levin [ Upstream commit d4a16052bccdd695982f89d815ca075825115821 ] When interface is attached while in promiscuous mode and with VLAN filtering turned off, both configurations are not respected and VLAN filtering is performed. There are 2 flows which add the any-vid rules during interface attach: VLAN creation table and set rx mode. Each is relaying on the other to add any-vid rules, eventually non of them does. Fix this by adding any-vid rules on VLAN creation regardless of promiscuous mode. Fixes: 9df30601c843 ("net/mlx5e: Restore vlan filter after seamless reset") Signed-off-by: Aya Levin Reviewed-by: Moshe Shemesh Signed-off-by: Saeed Mahameed Signed-off-by: Sasha Levin --- drivers/net/ethernet/mellanox/mlx5/core/en_fs.c | 6 -- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_fs.c b/drivers/net/ethernet/mellanox/mlx5/core/en_fs.c index c5be0cdfaf0fa..713dc210f710c 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en_fs.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/en_fs.c @@ -217,6 +217,9 @@ static int __mlx5e_add_vlan_rule(struct mlx5e_priv *priv, break; } + if (WARN_ONCE(*rule_p, "VLAN rule already exists type %d", rule_type)) + return 0; + *rule_p = mlx5_add_flow_rules(ft, spec, _act, , 1); if (IS_ERR(*rule_p)) { @@ -397,8 +400,7 @@ static void mlx5e_add_vlan_rules(struct mlx5e_priv *priv) for_each_set_bit(i, priv->fs.vlan.active_svlans, VLAN_N_VID) mlx5e_add_vlan_rule(priv, MLX5E_VLAN_RULE_TYPE_MATCH_STAG_VID, i); - if (priv->fs.vlan.cvlan_filter_disabled && - !(priv->netdev->flags & IFF_PROMISC)) + if (priv->fs.vlan.cvlan_filter_disabled) mlx5e_add_any_vid_rules(priv); } -- 2.25.1
[PATCH 5.8 022/124] bpf: Prevent .BTF section elimination
From: Tony Ambardar commit 65c204398928f9c79f1a29912b410439f7052635 upstream. Systems with memory or disk constraints often reduce the kernel footprint by configuring LD_DEAD_CODE_DATA_ELIMINATION. However, this can result in removal of any BTF information. Use the KEEP() macro to preserve the BTF data as done with other important sections, while still allowing for smaller kernels. Fixes: 90ceddcb4950 ("bpf: Support llvm-objcopy for vmlinux BTF") Signed-off-by: Tony Ambardar Signed-off-by: Daniel Borkmann Acked-by: John Fastabend Acked-by: Andrii Nakryiko Link: https://lore.kernel.org/bpf/a635b5d3e2da044e7b51ec1315e8910fbce0083f.1600417359.git.tony.ambar...@gmail.com Signed-off-by: Greg Kroah-Hartman --- include/asm-generic/vmlinux.lds.h |2 +- 1 file changed, 1 insertion(+), 1 deletion(-) --- a/include/asm-generic/vmlinux.lds.h +++ b/include/asm-generic/vmlinux.lds.h @@ -641,7 +641,7 @@ #define BTF\ .BTF : AT(ADDR(.BTF) - LOAD_OFFSET) { \ __start_BTF = .;\ - *(.BTF) \ + KEEP(*(.BTF)) \ __stop_BTF = .; \ } #else
[PATCH 5.8 083/124] pipe: Fix memory leaks in create_pipe_files()
From: Qian Cai [ Upstream commit 8a018eb55e3ac033592afbcb476b0ffe64465b12 ] Calling pipe2() with O_NOTIFICATION_PIPE could results in memory leaks unless watch_queue_init() is successful. In case of watch_queue_init() failure in pipe2() we are left with inode and pipe_inode_info instances that need to be freed. That failure exit has been introduced in commit c73be61cede5 ("pipe: Add general notification queue support") and its handling should've been identical to nearby treatment of alloc_file_pseudo() failures - it is dealing with the same situation. As it is, the mainline kernel leaks in that case. Another problem is that CONFIG_WATCH_QUEUE and !CONFIG_WATCH_QUEUE cases are treated differently (and the former leaks just pipe_inode_info, the latter - both pipe_inode_info and inode). Fixed by providing a dummy wacth_queue_init() in !CONFIG_WATCH_QUEUE case and by having failures of wacth_queue_init() handled the same way we handle alloc_file_pseudo() ones. Fixes: c73be61cede5 ("pipe: Add general notification queue support") Signed-off-by: Qian Cai Signed-off-by: Al Viro Signed-off-by: Sasha Levin --- fs/pipe.c | 11 +-- include/linux/watch_queue.h | 6 ++ 2 files changed, 11 insertions(+), 6 deletions(-) diff --git a/fs/pipe.c b/fs/pipe.c index 117db82b10af5..0ac197658a2d6 100644 --- a/fs/pipe.c +++ b/fs/pipe.c @@ -894,19 +894,18 @@ int create_pipe_files(struct file **res, int flags) { struct inode *inode = get_pipe_inode(); struct file *f; + int error; if (!inode) return -ENFILE; if (flags & O_NOTIFICATION_PIPE) { -#ifdef CONFIG_WATCH_QUEUE - if (watch_queue_init(inode->i_pipe) < 0) { + error = watch_queue_init(inode->i_pipe); + if (error) { + free_pipe_info(inode->i_pipe); iput(inode); - return -ENOMEM; + return error; } -#else - return -ENOPKG; -#endif } f = alloc_file_pseudo(inode, pipe_mnt, "", diff --git a/include/linux/watch_queue.h b/include/linux/watch_queue.h index 5e08db2adc319..c994d1b2cdbaa 100644 --- a/include/linux/watch_queue.h +++ b/include/linux/watch_queue.h @@ -122,6 +122,12 @@ static inline void remove_watch_list(struct watch_list *wlist, u64 id) */ #define watch_sizeof(STRUCT) (sizeof(STRUCT) << WATCH_INFO_LENGTH__SHIFT) +#else +static inline int watch_queue_init(struct pipe_inode_info *pipe) +{ + return -ENOPKG; +} + #endif #endif /* _LINUX_WATCH_QUEUE_H */ -- 2.25.1
[PATCH 5.8 096/124] vhost-vdpa: fix vhost_vdpa_map() on error condition
From: Si-Wei Liu [ Upstream commit 1477c8aebb94a1db398c12d929a9d27bbd678d8c ] vhost_vdpa_map() should remove the iotlb entry just added if the corresponding mapping fails to set up properly. Fixes: 4c8cf31885f6 ("vhost: introduce vDPA-based backend") Signed-off-by: Si-Wei Liu Link: https://lore.kernel.org/r/1601701330-16837-2-git-send-email-si-wei@oracle.com Signed-off-by: Michael S. Tsirkin Signed-off-by: Sasha Levin --- drivers/vhost/vdpa.c | 3 +++ 1 file changed, 3 insertions(+) diff --git a/drivers/vhost/vdpa.c b/drivers/vhost/vdpa.c index a54b60d6623f0..5259f5210b375 100644 --- a/drivers/vhost/vdpa.c +++ b/drivers/vhost/vdpa.c @@ -527,6 +527,9 @@ static int vhost_vdpa_map(struct vhost_vdpa *v, r = iommu_map(v->domain, iova, pa, size, perm_to_iommu_flags(perm)); + if (r) + vhost_iotlb_del_range(dev->iotlb, iova, iova + size - 1); + return r; } -- 2.25.1
[PATCH 5.8 101/124] rxrpc: Fix some missing _bh annotations on locking conn->state_lock
From: David Howells [ Upstream commit fa1d113a0f96f9ab7e4fe4f8825753ba1e34a9d3 ] conn->state_lock may be taken in softirq mode, but a previous patch replaced an outer lock in the response-packet event handling code, and lost the _bh from that when doing so. Fix this by applying the _bh annotation to the state_lock locking. Fixes: a1399f8bb033 ("rxrpc: Call channels should have separate call number spaces") Signed-off-by: David Howells Signed-off-by: Sasha Levin --- net/rxrpc/conn_event.c | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/net/rxrpc/conn_event.c b/net/rxrpc/conn_event.c index 447f55ca68860..6e972b4823efa 100644 --- a/net/rxrpc/conn_event.c +++ b/net/rxrpc/conn_event.c @@ -340,18 +340,18 @@ static int rxrpc_process_event(struct rxrpc_connection *conn, return ret; spin_lock(>channel_lock); - spin_lock(>state_lock); + spin_lock_bh(>state_lock); if (conn->state == RXRPC_CONN_SERVICE_CHALLENGING) { conn->state = RXRPC_CONN_SERVICE; - spin_unlock(>state_lock); + spin_unlock_bh(>state_lock); for (loop = 0; loop < RXRPC_MAXCALLS; loop++) rxrpc_call_is_secure( rcu_dereference_protected( conn->channels[loop].call, lockdep_is_held(>channel_lock))); } else { - spin_unlock(>state_lock); + spin_unlock_bh(>state_lock); } spin_unlock(>channel_lock); -- 2.25.1
[PATCH 5.8 049/124] drm/amd/display: fix return value check for hdcp_work
From: Flora Cui [ Upstream commit 898c7302f4de1d91065e80fc46552b3ec70894ff ] max_caps might be 0, thus hdcp_work might be ZERO_SIZE_PTR Signed-off-by: Flora Cui Reviewed-by: Feifei Xu Signed-off-by: Alex Deucher Signed-off-by: Sasha Levin --- drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_hdcp.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_hdcp.c b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_hdcp.c index 949d10ef83040..6dd1f3f8d9903 100644 --- a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_hdcp.c +++ b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_hdcp.c @@ -568,7 +568,7 @@ struct hdcp_workqueue *hdcp_create_workqueue(struct amdgpu_device *adev, struct int i = 0; hdcp_work = kcalloc(max_caps, sizeof(*hdcp_work), GFP_KERNEL); - if (hdcp_work == NULL) + if (ZERO_OR_NULL_PTR(hdcp_work)) return NULL; hdcp_work->srm = kcalloc(PSP_HDCP_SRM_FIRST_GEN_MAX_SIZE, sizeof(*hdcp_work->srm), GFP_KERNEL); -- 2.25.1
[PATCH 5.8 058/124] net: stmmac: removed enabling eee in EEE set callback
From: Voon Weifeng [ Upstream commit 7241c5a697479c7d0c5a96595822cdab750d41ae ] EEE should be only be enabled during stmmac_mac_link_up() when the link are up and being set up properly. set_eee should only do settings configuration and disabling the eee. Without this fix, turning on EEE using ethtool will return "Operation not supported". This is due to the driver is in a dead loop waiting for eee to be advertised in the for eee to be activated but the driver will only configure the EEE advertisement after the eee is activated. Ethtool should only return "Operation not supported" if there is no EEE capbility in the MAC controller. Fixes: 8a7493e58ad6 ("net: stmmac: Fix a race in EEE enable callback") Signed-off-by: Voon Weifeng Acked-by: Mark Gross Signed-off-by: David S. Miller Signed-off-by: Sasha Levin --- .../net/ethernet/stmicro/stmmac/stmmac_ethtool.c | 15 --- 1 file changed, 4 insertions(+), 11 deletions(-) diff --git a/drivers/net/ethernet/stmicro/stmmac/stmmac_ethtool.c b/drivers/net/ethernet/stmicro/stmmac/stmmac_ethtool.c index eae11c5850251..c16d0cc3e9c44 100644 --- a/drivers/net/ethernet/stmicro/stmmac/stmmac_ethtool.c +++ b/drivers/net/ethernet/stmicro/stmmac/stmmac_ethtool.c @@ -662,23 +662,16 @@ static int stmmac_ethtool_op_set_eee(struct net_device *dev, struct stmmac_priv *priv = netdev_priv(dev); int ret; - if (!edata->eee_enabled) { + if (!priv->dma_cap.eee) + return -EOPNOTSUPP; + + if (!edata->eee_enabled) stmmac_disable_eee_mode(priv); - } else { - /* We are asking for enabling the EEE but it is safe -* to verify all by invoking the eee_init function. -* In case of failure it will return an error. -*/ - edata->eee_enabled = stmmac_eee_init(priv); - if (!edata->eee_enabled) - return -EOPNOTSUPP; - } ret = phylink_ethtool_set_eee(priv->phylink, edata); if (ret) return ret; - priv->eee_enabled = edata->eee_enabled; priv->tx_lpi_timer = edata->tx_lpi_timer; return 0; } -- 2.25.1
[PATCH 5.8 099/124] rxrpc: Fix rxkad token xdr encoding
From: Marc Dionne [ Upstream commit 56305118e05b2db8d0395bba640ac9a3aee92624 ] The session key should be encoded with just the 8 data bytes and no length; ENCODE_DATA precedes it with a 4 byte length, which confuses some existing tools that try to parse this format. Add an ENCODE_BYTES macro that does not include a length, and use it for the key. Also adjust the expected length. Note that commit 774521f353e1d ("rxrpc: Fix an assertion in rxrpc_read()") had fixed a BUG by changing the length rather than fixing the encoding. The original length was correct. Fixes: 99455153d067 ("RxRPC: Parse security index 5 keys (Kerberos 5)") Signed-off-by: Marc Dionne Signed-off-by: David Howells Signed-off-by: Sasha Levin --- net/rxrpc/key.c | 12 ++-- 1 file changed, 10 insertions(+), 2 deletions(-) diff --git a/net/rxrpc/key.c b/net/rxrpc/key.c index 0c98313dd7a8c..d77e89766406a 100644 --- a/net/rxrpc/key.c +++ b/net/rxrpc/key.c @@ -1073,7 +1073,7 @@ static long rxrpc_read(const struct key *key, switch (token->security_index) { case RXRPC_SECURITY_RXKAD: - toksize += 9 * 4; /* viceid, kvno, key*2 + len, begin, + toksize += 8 * 4; /* viceid, kvno, key*2, begin, * end, primary, tktlen */ toksize += RND(token->kad->ticket_len); break; @@ -1139,6 +1139,14 @@ static long rxrpc_read(const struct key *key, memcpy((u8 *)xdr + _l, , 4 - (_l & 3));\ xdr += (_l + 3) >> 2; \ } while(0) +#define ENCODE_BYTES(l, s) \ + do {\ + u32 _l = (l); \ + memcpy(xdr, (s), _l); \ + if (_l & 3) \ + memcpy((u8 *)xdr + _l, , 4 - (_l & 3));\ + xdr += (_l + 3) >> 2; \ + } while(0) #define ENCODE64(x)\ do {\ __be64 y = cpu_to_be64(x); \ @@ -1166,7 +1174,7 @@ static long rxrpc_read(const struct key *key, case RXRPC_SECURITY_RXKAD: ENCODE(token->kad->vice_id); ENCODE(token->kad->kvno); - ENCODE_DATA(8, token->kad->session_key); + ENCODE_BYTES(8, token->kad->session_key); ENCODE(token->kad->start); ENCODE(token->kad->expiry); ENCODE(token->kad->primary_flag); -- 2.25.1
[PATCH 5.8 093/124] net/mlx5e: Fix race condition on nhe->n pointer in neigh update
From: Vlad Buslov [ Upstream commit 1253935ad801485270194d5651acab04abc97b36 ] Current neigh update event handler implementation takes reference to neighbour structure, assigns it to nhe->n, tries to schedule workqueue task and releases the reference if task was already enqueued. This results potentially overwriting existing nhe->n pointer with another neighbour instance, which causes double release of the instance (once in neigh update handler that failed to enqueue to workqueue and another one in neigh update workqueue task that processes updated nhe->n pointer instead of original one): [ 3376.512806] [ cut here ] [ 3376.513534] refcount_t: underflow; use-after-free. [ 3376.521213] Modules linked in: act_skbedit act_mirred act_tunnel_key vxlan ip6_udp_tunnel udp_tunnel nfnetlink act_gact cls_flower sch_ingress openvswitch nsh nf_conncount nf_nat nf_conntrack nf_defrag_ipv6 nf_defrag_ipv4 mlx5_ib mlx5_core mlxfw pci_hyperv_intf ptp pps_core nfsv3 nfs_acl rpcsec_gss_krb5 auth_rpcgss nfsv4 dns_resolver nfs lockd grace fscache ib_isert iscsi_target_mod ib_srpt target_core_mod ib_srp rpcrdma rdma_ucm ib_umad ib_ipoib ib_iser rdma_cm ib_cm iw_cm rfkill ib_uverbs ib_core sunrpc kvm_intel kvm iTCO_wdt iTCO_vendor_support virtio_net irqbypass net_failover crc32_pclmul lpc_ich i2c_i801 failover pcspkr i2c_smbus mfd_core ghash_clmulni_intel sch_fq_codel drm i2c _core ip_tables crc32c_intel serio_raw [last unloaded: mlxfw] [ 3376.529468] CPU: 8 PID: 22756 Comm: kworker/u20:5 Not tainted 5.9.0-rc5+ #6 [ 3376.530399] Hardware name: QEMU Standard PC (Q35 + ICH9, 2009), BIOS rel-1.12.1-0-ga5cab58e9a3f-prebuilt.qemu.org 04/01/2014 [ 3376.531975] Workqueue: mlx5e mlx5e_rep_neigh_update [mlx5_core] [ 3376.532820] RIP: 0010:refcount_warn_saturate+0xd8/0xe0 [ 3376.533589] Code: ff 48 c7 c7 e0 b8 27 82 c6 05 0b b6 09 01 01 e8 94 93 c1 ff 0f 0b c3 48 c7 c7 88 b8 27 82 c6 05 f7 b5 09 01 01 e8 7e 93 c1 ff <0f> 0b c3 0f 1f 44 00 00 8b 07 3d 00 00 00 c0 74 12 83 f8 01 74 13 [ 3376.536017] RSP: 0018:c90002a97e30 EFLAGS: 00010286 [ 3376.536793] RAX: RBX: 8882de30d648 RCX: [ 3376.537718] RDX: 8882f5c28f20 RSI: 8882f5c18e40 RDI: 8882f5c18e40 [ 3376.538654] RBP: 8882cdf56c00 R08: c580 R09: 1a4d [ 3376.539582] R10: 0731 R11: c90002a97ccd R12: [ 3376.540519] R13: 8882de30d600 R14: 8882de30d640 R15: 88821e000900 [ 3376.541444] FS: () GS:8882f5c0() knlGS: [ 3376.542732] CS: 0010 DS: ES: CR0: 80050033 [ 3376.543545] CR2: 556e5504b248 CR3: 0002c6f10005 CR4: 00770ee0 [ 3376.544483] DR0: DR1: DR2: [ 3376.545419] DR3: DR6: fffe0ff0 DR7: 0400 [ 3376.546344] PKRU: 5554 [ 3376.546911] Call Trace: [ 3376.547479] mlx5e_rep_neigh_update.cold+0x33/0xe2 [mlx5_core] [ 3376.548299] process_one_work+0x1d8/0x390 [ 3376.548977] worker_thread+0x4d/0x3e0 [ 3376.549631] ? rescuer_thread+0x3e0/0x3e0 [ 3376.550295] kthread+0x118/0x130 [ 3376.550914] ? kthread_create_worker_on_cpu+0x70/0x70 [ 3376.551675] ret_from_fork+0x1f/0x30 [ 3376.552312] ---[ end trace d84e8f46d2a77eec ]--- Fix the bug by moving work_struct to dedicated dynamically-allocated structure. This enabled every event handler to work on its own private neighbour pointer and removes the need for handling the case when task is already enqueued. Fixes: 232c001398ae ("net/mlx5e: Add support to neighbour update flow") Signed-off-by: Vlad Buslov Reviewed-by: Roi Dayan Signed-off-by: Saeed Mahameed Signed-off-by: Sasha Levin --- .../mellanox/mlx5/core/en/rep/neigh.c | 81 --- .../net/ethernet/mellanox/mlx5/core/en_rep.h | 6 -- 2 files changed, 50 insertions(+), 37 deletions(-) diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en/rep/neigh.c b/drivers/net/ethernet/mellanox/mlx5/core/en/rep/neigh.c index c3d167fa944c7..6a9d783d129b2 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en/rep/neigh.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/en/rep/neigh.c @@ -109,11 +109,25 @@ static void mlx5e_rep_neigh_stats_work(struct work_struct *work) rtnl_unlock(); } +struct neigh_update_work { + struct work_struct work; + struct neighbour *n; + struct mlx5e_neigh_hash_entry *nhe; +}; + +static void mlx5e_release_neigh_update_work(struct neigh_update_work *update_work) +{ + neigh_release(update_work->n); + mlx5e_rep_neigh_entry_release(update_work->nhe); + kfree(update_work); +} + static void mlx5e_rep_neigh_update(struct work_struct *work) { - struct mlx5e_neigh_hash_entry *nhe = - container_of(work, struct mlx5e_neigh_hash_entry, neigh_update_work); - struct neighbour *n = nhe->n; + struct neigh_update_work *update_work = container_of(work,
[PATCH 5.8 100/124] rxrpc: Downgrade the BUG() for unsupported token type in rxrpc_read()
From: David Howells [ Upstream commit 9a059cd5ca7d9c5c4ca5a6e755cf72f230176b6a ] If rxrpc_read() (which allows KEYCTL_READ to read a key), sees a token of a type it doesn't recognise, it can BUG in a couple of places, which is unnecessary as it can easily get back to userspace. Fix this to print an error message instead. Fixes: 99455153d067 ("RxRPC: Parse security index 5 keys (Kerberos 5)") Signed-off-by: David Howells Signed-off-by: Sasha Levin --- net/rxrpc/key.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/net/rxrpc/key.c b/net/rxrpc/key.c index d77e89766406a..32f46edcf7c67 100644 --- a/net/rxrpc/key.c +++ b/net/rxrpc/key.c @@ -1108,7 +1108,8 @@ static long rxrpc_read(const struct key *key, break; default: /* we have a ticket we can't encode */ - BUG(); + pr_err("Unsupported key token type (%u)\n", + token->security_index); continue; } @@ -1224,7 +1225,6 @@ static long rxrpc_read(const struct key *key, break; default: - BUG(); break; } -- 2.25.1
Re: [linux-safety] [PATCH] e1000: drop unneeded assignment in e1000_set_itr()
On Sun, 11 Oct 2020, Sudip Mukherjee wrote: > The variable 'current_itr' is assigned to 0 before jumping to > 'set_itr_now' but it has not been used after the jump. So, remove the > unneeded assignement. > > Signed-off-by: Sudip Mukherjee > --- > drivers/net/ethernet/intel/e1000/e1000_main.c | 1 - > 1 file changed, 1 deletion(-) > > diff --git a/drivers/net/ethernet/intel/e1000/e1000_main.c > b/drivers/net/ethernet/intel/e1000/e1000_main.c > index 5e28cf4fa2cd..042de276e632 100644 > --- a/drivers/net/ethernet/intel/e1000/e1000_main.c > +++ b/drivers/net/ethernet/intel/e1000/e1000_main.c > @@ -2632,7 +2632,6 @@ static void e1000_set_itr(struct e1000_adapter *adapter) > > /* for non-gigabit speeds, just fix the interrupt rate at 4000 */ > if (unlikely(adapter->link_speed != SPEED_1000)) { > - current_itr = 0; > new_itr = 4000; > goto set_itr_now; > } Alternatively, you could just inline the max(...) into the switch and completely drop the current_itr definition. But your solution probably does the job: it is a "No functional change" commit. Reviewed-by: Lukas Bulwahn Lukas > -- > 2.11.0 > > > > -=-=-=-=-=-=-=-=-=-=-=- > Links: You receive all messages sent to this group. > View/Reply Online (#77): https://lists.elisa.tech/g/linux-safety/message/77 > Mute This Topic: https://lists.elisa.tech/mt/77448709/1714638 > Group Owner: linux-safety+ow...@lists.elisa.tech > Unsubscribe: https://lists.elisa.tech/g/linux-safety/unsub > [lukas.bulw...@gmail.com] > -=-=-=-=-=-=-=-=-=-=-=- > > >
[PATCH 5.8 057/124] xsk: Do not discard packet when NETDEV_TX_BUSY
From: Magnus Karlsson [ Upstream commit 642e450b6b5955f2059d0ae372183f7c6323f951 ] In the skb Tx path, transmission of a packet is performed with dev_direct_xmit(). When NETDEV_TX_BUSY is set in the drivers, it signifies that it was not possible to send the packet right now, please try later. Unfortunately, the xsk transmit code discarded the packet and returned EBUSY to the application. Fix this unnecessary packet loss, by not discarding the packet in the Tx ring and return EAGAIN. As EAGAIN is returned to the application, it can then retry the send operation later and the packet will then likely be sent as the driver will then likely have space/resources to send the packet. In summary, EAGAIN tells the application that the packet was not discarded from the Tx ring and that it needs to call send() again. EBUSY, on the other hand, signifies that the packet was not sent and discarded from the Tx ring. The application needs to put the packet on the Tx ring again if it wants it to be sent. Fixes: 35fcde7f8deb ("xsk: support for Tx") Reported-by: Arkadiusz Zema Suggested-by: Arkadiusz Zema Suggested-by: Daniel Borkmann Signed-off-by: Magnus Karlsson Signed-off-by: Daniel Borkmann Reviewed-by: Jesse Brandeburg Link: https://lore.kernel.org/bpf/1600257625-2353-1-git-send-email-magnus.karls...@gmail.com Signed-off-by: Sasha Levin --- net/xdp/xsk.c | 17 - 1 file changed, 16 insertions(+), 1 deletion(-) diff --git a/net/xdp/xsk.c b/net/xdp/xsk.c index 3700266229f63..dcce888b8ef54 100644 --- a/net/xdp/xsk.c +++ b/net/xdp/xsk.c @@ -375,15 +375,30 @@ static int xsk_generic_xmit(struct sock *sk) skb_shinfo(skb)->destructor_arg = (void *)(long)desc.addr; skb->destructor = xsk_destruct_skb; + /* Hinder dev_direct_xmit from freeing the packet and +* therefore completing it in the destructor +*/ + refcount_inc(>users); err = dev_direct_xmit(skb, xs->queue_id); + if (err == NETDEV_TX_BUSY) { + /* Tell user-space to retry the send */ + skb->destructor = sock_wfree; + /* Free skb without triggering the perf drop trace */ + consume_skb(skb); + err = -EAGAIN; + goto out; + } + xskq_cons_release(xs->tx); /* Ignore NET_XMIT_CN as packet might have been sent */ - if (err == NET_XMIT_DROP || err == NETDEV_TX_BUSY) { + if (err == NET_XMIT_DROP) { /* SKB completed but not sent */ + kfree_skb(skb); err = -EBUSY; goto out; } + consume_skb(skb); sent_frame = true; } -- 2.25.1
Re: [PATCH v1 1/2] ASoC: qcom: dt-bindings: Modify sc7180 machine bindings
On Sat, Oct 10, 2020 at 12:07:54AM +0800, Ajye Huang wrote: > On Fri, Oct 9, 2020 at 9:52 PM Mark Brown wrote: > > On Mon, Sep 28, 2020 at 02:37:43PM +0800, Ajye Huang wrote: > > > Add compatible "qcom,sc7180-sndcard-rt5682-m98357-2mic" > > > for 2mic case. > > This doesn't apply against current code, please check and resend. > Thank you for your reply, > This patch depends on Cheng-Yi's patch series > https://patchwork.kernel.org/patch/11773221/. That's "ASoC: qcom: dt-bindings: Add sc7180 machine bindings" for those playing at home. > If I misunderstand what you mean, please correct me, A version of some SC7180 patches was applied. However it does seem like that didn't include any machine driver bindings so it must've been a different, similar looking series which is presumably waiting for a new version - please resend based on that new version (ideally these patches could be picked up as part of that series). Please include human readable descriptions of things like commits and issues being discussed in e-mail in your mails, this makes them much easier for humans to read especially when they have no internet access. I do frequently catch up on my mail on flights or while otherwise travelling so this is even more pressing for me than just being about making things a bit easier to read. signature.asc Description: PGP signature
Re: [PATCH v6 1/4] rcu/tree: Make rcu_do_batch count how many callbacks were executed
On Sun, Oct 11, 2020 at 09:35:37AM -0700, Joel Fernandes wrote: > On Fri, Oct 9, 2020 at 4:14 PM Frederic Weisbecker > wrote: > > > > On Wed, Sep 23, 2020 at 11:22:08AM -0400, Joel Fernandes (Google) wrote: > > > Currently, rcu_do_batch() depends on the unsegmented callback list's len > > > field > > > to know how many CBs are executed. This fields counts down from 0 as CBs > > > are > > > dequeued. It is possible that all CBs could not be run because of > > > reaching > > > limits in which case the remaining unexecuted callbacks are requeued in > > > the > > > CPU's segcblist. > > > > > > The number of callbacks that were not requeued are then the negative > > > count (how > > > many CBs were run) stored in the rcl->len which has been counting down on > > > every > > > dequeue. This negative count is then added to the per-cpu segmented > > > callback > > > list's to correct its count. > > > > > > Such a design works against future efforts to track the length of each > > > segment > > > of the segmented callback list. The reason is because > > > rcu_segcblist_extract_done_cbs() will be populating the unsegmented > > > callback > > > list's length field (rcl->len) during extraction. > > > Also, the design of counting down from 0 is confusing and error-prone > > > IMHO. > > > > Right :) > > :) > > > > This commit therefore explicitly counts have many callbacks were executed > > > in > > > > s/have/how > > > > > rcu_do_batch() itself, and uses that to update the per-CPU segcb list's > > > ->len > > > field, without relying on the negativity of rcl->len. > > > > > > Signed-off-by: Joel Fernandes (Google) > > > > Reviewed-by: Frederic Weisbecker > > Thanks! Paul would be Ok to make the minor fixup s/have/how/ that > Frederic pointed? But of course! I was waiting until Frederic gets them all reviewed, with an eye to applying and wordsmithing them as a set. > - Joel > (Due to COVID issues at home, I'm intermittently working so advance > apologies for slow replies.) And I hope that this is going as well as it possibly can! Thanx, Paul
[PATCH 5.8 098/124] net: mvneta: fix double free of txq->buf
From: Tom Rix [ Upstream commit f4544e5361da5050ff5c0330ceea095cb5dbdd72 ] clang static analysis reports this problem: drivers/net/ethernet/marvell/mvneta.c:3465:2: warning: Attempt to free released memory kfree(txq->buf); ^~~ When mvneta_txq_sw_init() fails to alloc txq->tso_hdrs, it frees without poisoning txq->buf. The error is caught in the mvneta_setup_txqs() caller which handles the error by cleaning up all of the txqs with a call to mvneta_txq_sw_deinit which also frees txq->buf. Since mvneta_txq_sw_deinit is a general cleaner, all of the partial cleaning in mvneta_txq_sw_deinit()'s error handling is not needed. Fixes: 2adb719d74f6 ("net: mvneta: Implement software TSO") Signed-off-by: Tom Rix Signed-off-by: David S. Miller Signed-off-by: Sasha Levin --- drivers/net/ethernet/marvell/mvneta.c | 13 ++--- 1 file changed, 2 insertions(+), 11 deletions(-) diff --git a/drivers/net/ethernet/marvell/mvneta.c b/drivers/net/ethernet/marvell/mvneta.c index 7d5d9d34f4e47..69a234e83b8b7 100644 --- a/drivers/net/ethernet/marvell/mvneta.c +++ b/drivers/net/ethernet/marvell/mvneta.c @@ -3372,24 +3372,15 @@ static int mvneta_txq_sw_init(struct mvneta_port *pp, txq->last_desc = txq->size - 1; txq->buf = kmalloc_array(txq->size, sizeof(*txq->buf), GFP_KERNEL); - if (!txq->buf) { - dma_free_coherent(pp->dev->dev.parent, - txq->size * MVNETA_DESC_ALIGNED_SIZE, - txq->descs, txq->descs_phys); + if (!txq->buf) return -ENOMEM; - } /* Allocate DMA buffers for TSO MAC/IP/TCP headers */ txq->tso_hdrs = dma_alloc_coherent(pp->dev->dev.parent, txq->size * TSO_HEADER_SIZE, >tso_hdrs_phys, GFP_KERNEL); - if (!txq->tso_hdrs) { - kfree(txq->buf); - dma_free_coherent(pp->dev->dev.parent, - txq->size * MVNETA_DESC_ALIGNED_SIZE, - txq->descs, txq->descs_phys); + if (!txq->tso_hdrs) return -ENOMEM; - } /* Setup XPS mapping */ if (txq_number > 1) -- 2.25.1
[PATCH 5.8 056/124] xfrm: clone whole liftime_cur structure in xfrm_do_migrate
From: Antony Antony [ Upstream commit 8366685b2883e523f91e9816d7be371eb1144749 ] When we clone state only add_time was cloned. It missed values like bytes, packets. Now clone the all members of the structure. v1->v3: - use memcpy to copy the entire structure Fixes: 80c9abaabf42 ("[XFRM]: Extension for dynamic update of endpoint address(es)") Signed-off-by: Antony Antony Signed-off-by: Steffen Klassert Signed-off-by: Sasha Levin --- net/xfrm/xfrm_state.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/net/xfrm/xfrm_state.c b/net/xfrm/xfrm_state.c index 3a2c1f15d31dd..6b431a3830721 100644 --- a/net/xfrm/xfrm_state.c +++ b/net/xfrm/xfrm_state.c @@ -1550,7 +1550,7 @@ static struct xfrm_state *xfrm_state_clone(struct xfrm_state *orig, x->tfcpad = orig->tfcpad; x->replay_maxdiff = orig->replay_maxdiff; x->replay_maxage = orig->replay_maxage; - x->curlft.add_time = orig->curlft.add_time; + memcpy(>curlft, >curlft, sizeof(x->curlft)); x->km.state = orig->km.state; x->km.seq = orig->km.seq; x->replay = orig->replay; -- 2.25.1
[PATCH 5.8 097/124] vhost-vdpa: fix page pinning leakage in error path
From: Si-Wei Liu [ Upstream commit 7ed9e3d97c32d969caded2dfb6e67c1a2cc5a0b1 ] Pinned pages are not properly accounted particularly when mapping error occurs on IOTLB update. Clean up dangling pinned pages for the error path. As the inflight pinned pages, specifically for memory region that strides across multiple chunks, would need more than one free page for book keeping and accounting. For simplicity, pin pages for all memory in the IOVA range in one go rather than have multiple pin_user_pages calls to make up the entire region. This way it's easier to track and account the pages already mapped, particularly for clean-up in the error path. Fixes: 4c8cf31885f6 ("vhost: introduce vDPA-based backend") Signed-off-by: Si-Wei Liu Link: https://lore.kernel.org/r/1601701330-16837-3-git-send-email-si-wei@oracle.com Signed-off-by: Michael S. Tsirkin Signed-off-by: Sasha Levin --- drivers/vhost/vdpa.c | 119 ++- 1 file changed, 71 insertions(+), 48 deletions(-) diff --git a/drivers/vhost/vdpa.c b/drivers/vhost/vdpa.c index 5259f5210b375..e172c2efc663c 100644 --- a/drivers/vhost/vdpa.c +++ b/drivers/vhost/vdpa.c @@ -555,21 +555,19 @@ static int vhost_vdpa_process_iotlb_update(struct vhost_vdpa *v, struct vhost_dev *dev = >vdev; struct vhost_iotlb *iotlb = dev->iotlb; struct page **page_list; - unsigned long list_size = PAGE_SIZE / sizeof(struct page *); + struct vm_area_struct **vmas; unsigned int gup_flags = FOLL_LONGTERM; - unsigned long npages, cur_base, map_pfn, last_pfn = 0; - unsigned long locked, lock_limit, pinned, i; + unsigned long map_pfn, last_pfn = 0; + unsigned long npages, lock_limit; + unsigned long i, nmap = 0; u64 iova = msg->iova; + long pinned; int ret = 0; if (vhost_iotlb_itree_first(iotlb, msg->iova, msg->iova + msg->size - 1)) return -EEXIST; - page_list = (struct page **) __get_free_page(GFP_KERNEL); - if (!page_list) - return -ENOMEM; - if (msg->perm & VHOST_ACCESS_WO) gup_flags |= FOLL_WRITE; @@ -577,61 +575,86 @@ static int vhost_vdpa_process_iotlb_update(struct vhost_vdpa *v, if (!npages) return -EINVAL; + page_list = kvmalloc_array(npages, sizeof(struct page *), GFP_KERNEL); + vmas = kvmalloc_array(npages, sizeof(struct vm_area_struct *), + GFP_KERNEL); + if (!page_list || !vmas) { + ret = -ENOMEM; + goto free; + } + mmap_read_lock(dev->mm); - locked = atomic64_add_return(npages, >mm->pinned_vm); lock_limit = rlimit(RLIMIT_MEMLOCK) >> PAGE_SHIFT; - - if (locked > lock_limit) { + if (npages + atomic64_read(>mm->pinned_vm) > lock_limit) { ret = -ENOMEM; - goto out; + goto unlock; } - cur_base = msg->uaddr & PAGE_MASK; - iova &= PAGE_MASK; + pinned = pin_user_pages(msg->uaddr & PAGE_MASK, npages, gup_flags, + page_list, vmas); + if (npages != pinned) { + if (pinned < 0) { + ret = pinned; + } else { + unpin_user_pages(page_list, pinned); + ret = -ENOMEM; + } + goto unlock; + } - while (npages) { - pinned = min_t(unsigned long, npages, list_size); - ret = pin_user_pages(cur_base, pinned, -gup_flags, page_list, NULL); - if (ret != pinned) - goto out; - - if (!last_pfn) - map_pfn = page_to_pfn(page_list[0]); - - for (i = 0; i < ret; i++) { - unsigned long this_pfn = page_to_pfn(page_list[i]); - u64 csize; - - if (last_pfn && (this_pfn != last_pfn + 1)) { - /* Pin a contiguous chunk of memory */ - csize = (last_pfn - map_pfn + 1) << PAGE_SHIFT; - if (vhost_vdpa_map(v, iova, csize, - map_pfn << PAGE_SHIFT, - msg->perm)) - goto out; - map_pfn = this_pfn; - iova += csize; + iova &= PAGE_MASK; + map_pfn = page_to_pfn(page_list[0]); + + /* One more iteration to avoid extra vdpa_map() call out of loop. */ + for (i = 0; i <= npages; i++) { + unsigned long this_pfn; + u64 csize; + + /* The last chunk may have no valid PFN next to it */ + this_pfn = i < npages ? page_to_pfn(page_list[i]) :
[PATCH 5.8 079/124] octeontx2-af: Fix enable/disable of default NPC entries
From: Subbaraya Sundeep [ Upstream commit e154b5b70368a84a19505a0be9b0096c66562b56 ] Packet replication feature present in Octeontx2 is a hardware linked list of PF and its VF interfaces so that broadcast packets are sent to all interfaces present in the list. It is driver job to add and delete a PF/VF interface to/from the list when the interface is brought up and down. This patch fixes the npc_enadis_default_entries function to handle broadcast replication properly if packet replication feature is present. Fixes: 40df309e4166 ("octeontx2-af: Support to enable/disable default MCAM entries") Signed-off-by: Subbaraya Sundeep Signed-off-by: Geetha sowjanya Signed-off-by: Sunil Goutham Signed-off-by: David S. Miller Signed-off-by: Sasha Levin --- .../net/ethernet/marvell/octeontx2/af/rvu.h | 3 ++- .../ethernet/marvell/octeontx2/af/rvu_nix.c | 5 ++-- .../ethernet/marvell/octeontx2/af/rvu_npc.c | 26 ++- 3 files changed, 23 insertions(+), 11 deletions(-) diff --git a/drivers/net/ethernet/marvell/octeontx2/af/rvu.h b/drivers/net/ethernet/marvell/octeontx2/af/rvu.h index dcf25a0920084..b89dde2c8b089 100644 --- a/drivers/net/ethernet/marvell/octeontx2/af/rvu.h +++ b/drivers/net/ethernet/marvell/octeontx2/af/rvu.h @@ -463,6 +463,7 @@ void rvu_nix_freemem(struct rvu *rvu); int rvu_get_nixlf_count(struct rvu *rvu); void rvu_nix_lf_teardown(struct rvu *rvu, u16 pcifunc, int blkaddr, int npalf); int nix_get_nixlf(struct rvu *rvu, u16 pcifunc, int *nixlf, int *nix_blkaddr); +int nix_update_bcast_mce_list(struct rvu *rvu, u16 pcifunc, bool add); /* NPC APIs */ int rvu_npc_init(struct rvu *rvu); @@ -477,7 +478,7 @@ void rvu_npc_disable_promisc_entry(struct rvu *rvu, u16 pcifunc, int nixlf); void rvu_npc_enable_promisc_entry(struct rvu *rvu, u16 pcifunc, int nixlf); void rvu_npc_install_bcast_match_entry(struct rvu *rvu, u16 pcifunc, int nixlf, u64 chan); -void rvu_npc_disable_bcast_entry(struct rvu *rvu, u16 pcifunc); +void rvu_npc_enable_bcast_entry(struct rvu *rvu, u16 pcifunc, bool enable); int rvu_npc_update_rxvlan(struct rvu *rvu, u16 pcifunc, int nixlf); void rvu_npc_disable_mcam_entries(struct rvu *rvu, u16 pcifunc, int nixlf); void rvu_npc_disable_default_entries(struct rvu *rvu, u16 pcifunc, int nixlf); diff --git a/drivers/net/ethernet/marvell/octeontx2/af/rvu_nix.c b/drivers/net/ethernet/marvell/octeontx2/af/rvu_nix.c index 36953d4f51c73..3495b3a6828c0 100644 --- a/drivers/net/ethernet/marvell/octeontx2/af/rvu_nix.c +++ b/drivers/net/ethernet/marvell/octeontx2/af/rvu_nix.c @@ -17,7 +17,6 @@ #include "npc.h" #include "cgx.h" -static int nix_update_bcast_mce_list(struct rvu *rvu, u16 pcifunc, bool add); static int rvu_nix_get_bpid(struct rvu *rvu, struct nix_bp_cfg_req *req, int type, int chan_id); @@ -2020,7 +2019,7 @@ static int nix_update_mce_list(struct nix_mce_list *mce_list, return 0; } -static int nix_update_bcast_mce_list(struct rvu *rvu, u16 pcifunc, bool add) +int nix_update_bcast_mce_list(struct rvu *rvu, u16 pcifunc, bool add) { int err = 0, idx, next_idx, last_idx; struct nix_mce_list *mce_list; @@ -2065,7 +2064,7 @@ static int nix_update_bcast_mce_list(struct rvu *rvu, u16 pcifunc, bool add) /* Disable MCAM entry in NPC */ if (!mce_list->count) { - rvu_npc_disable_bcast_entry(rvu, pcifunc); + rvu_npc_enable_bcast_entry(rvu, pcifunc, false); goto end; } diff --git a/drivers/net/ethernet/marvell/octeontx2/af/rvu_npc.c b/drivers/net/ethernet/marvell/octeontx2/af/rvu_npc.c index 0a214084406a6..fbaf9bcd83f2f 100644 --- a/drivers/net/ethernet/marvell/octeontx2/af/rvu_npc.c +++ b/drivers/net/ethernet/marvell/octeontx2/af/rvu_npc.c @@ -530,7 +530,7 @@ void rvu_npc_install_bcast_match_entry(struct rvu *rvu, u16 pcifunc, NIX_INTF_RX, , true); } -void rvu_npc_disable_bcast_entry(struct rvu *rvu, u16 pcifunc) +void rvu_npc_enable_bcast_entry(struct rvu *rvu, u16 pcifunc, bool enable) { struct npc_mcam *mcam = >hw->mcam; int blkaddr, index; @@ -543,7 +543,7 @@ void rvu_npc_disable_bcast_entry(struct rvu *rvu, u16 pcifunc) pcifunc = pcifunc & ~RVU_PFVF_FUNC_MASK; index = npc_get_nixlf_mcam_index(mcam, pcifunc, 0, NIXLF_BCAST_ENTRY); - npc_enable_mcam_entry(rvu, mcam, blkaddr, index, false); + npc_enable_mcam_entry(rvu, mcam, blkaddr, index, enable); } void rvu_npc_update_flowkey_alg_idx(struct rvu *rvu, u16 pcifunc, int nixlf, @@ -622,23 +622,35 @@ static void npc_enadis_default_entries(struct rvu *rvu, u16 pcifunc, nixlf, NIXLF_UCAST_ENTRY); npc_enable_mcam_entry(rvu, mcam, blkaddr, index, enable); - /* For PF, ena/dis promisc and bcast MCAM match entries */ - if (pcifunc & RVU_PFVF_FUNC_MASK) + /* For PF, ena/dis promisc and bcast
Re: [RFC PATCH] checkpatch: add shebang check to EXECUTE_PERMISSIONS
On 12/10/20 11:47 am, Joe Perches wrote: > On Mon, 2020-10-12 at 11:19 +0530, Ujjwal Kumar wrote: >> checkpatch.pl checks for invalid EXECUTE_PERMISSIONS on source >> files. The script leverages filename extensions and its path in >> the repository to decide whether to allow execute permissions on >> the file or not. >> >> Based on current check conditions, a perl script file having >> execute permissions, without '.pl' extension in its filename >> and not belonging to 'scripts/' directory is reported as ERROR >> which is a false-positive. >> >> Adding a shebang check along with current conditions will make >> the check more generalised and improve checkpatch reports. >> To do so, without breaking the core design decision of checkpatch, >> we can fetch the first line from the patch itself and match it for >> a shebang pattern. >> >> There can be cases where the first line is not part of the patch. > > For instance: a patch that only changes permissions > without changing any of the file content. > >> >> In that case there may be a false-positive report but in the end we >> will have less false-positives as we will be handling some of the >> unhandled cases. > >> Signed-off-by: Ujjwal Kumar >> --- >> Apologies, I forgot to include linux-kernel@vger.kernel.org so I'm >> now resending. >> >> scripts/checkpatch.pl | 19 +++ >> 1 file changed, 19 insertions(+) >> >> diff --git a/scripts/checkpatch.pl b/scripts/checkpatch.pl > [] >> @@ -1795,6 +1795,23 @@ sub get_stat_here { >> return $herectx; >> } > > First some style trivia: > >> +sub get_shebang { >> +my ($linenr, $realfile) = @_; >> +my $rawline = ""; >> +my $shebang = ""; >> + >> +$rawline = raw_line($linenr, 3); >> +if (defined $rawline && >> +$rawline =~ /^\@\@ -\d+(?:,\d+)? \+(\d+)(,(\d+))? \@\@/) { > > alignment to open parenthesis please > >> +if (defined $1 && $1 == 1) { >> +$shebang = raw_line($linenr, 4); >> +$shebang = substr $shebang, 1; > > parentheses around substr please. > >> +} >> +} >> + >> +return $shebang; >> +} > > And some real notes: > > $realfile isn't used in this function so there doesn't > seem to be a reason to have it as an function argument. > >> + >> sub cat_vet { >> my ($vet) = @_; >> my ($res, $coded); >> @@ -2680,7 +2697,9 @@ sub process { >> # Check for incorrect file permissions >> if ($line =~ /^new (file )?mode.*[7531]\d{0,2}$/) { > > probably better here to use a capture group for the permissions > > if ($line =~ /^new (?:file )?mode (\d+)$/) { > my $mode = substr($1, -3); This > >> my $permhere = $here . "FILE: $realfile\n"; >> +my $shebang = get_shebang($linenr, $realfile); >> if ($realfile !~ m@scripts/@ && > > Maybe remove the $realfile directory test as > there are many source files that are not scripts > in this directory and its subdirectories. this > >> +$shebang !~ /^#!\s*(\/\w)+.*/ && > > unnecessary capture group > > and add > > $mode =~ /[1357]/ && this > >> $realfile !~ /\.(py|pl|awk|sh)$/) { > > No need for a a capture group here either. (existing defect) and this. > >> ERROR("EXECUTE_PERMISSIONS", >>"do not set execute permissions for >> source files\n" . $permhere); > > > Should these new changes go as a separate patch or can they be included in the next iteration of this patch? Thanks Ujjwal Kumar
[PATCH 5.8 078/124] net: phy: realtek: fix rtl8211e rx/tx delay config
From: Willy Liu [ Upstream commit bbc4d71d63549bcd003a430de18a72a742d8c91e ] There are two chip pins named TXDLY and RXDLY which actually adds the 2ns delays to TXC and RXC for TXD/RXD latching. These two pins can config via 4.7k-ohm resistor to 3.3V hw setting, but also config via software setting (extension page 0xa4 register 0x1c bit13 12 and 11). The configuration register definitions from table 13 official PHY datasheet: PHYAD[2:0] = PHY Address AN[1:0] = Auto-Negotiation Mode = Interface Mode Select RX Delay = RX Delay TX Delay = TX Delay SELRGV = RGMII/GMII Selection This table describes how to config these hw pins via external pull-high or pull- low resistor. It is a misunderstanding that mapping it as register bits below: 8:6 = PHY Address 5:4 = Auto-Negotiation 3 = Interface Mode Select 2 = RX Delay 1 = TX Delay 0 = SELRGV So I removed these descriptions above and add related settings as below: 14 = reserved 13 = force Tx RX Delay controlled by bit12 bit11 12 = Tx Delay 11 = Rx Delay 10:0 = Test && debug settings reserved by realtek Test && debug settings are not recommend to modify by default. Fixes: f81dadbcf7fd ("net: phy: realtek: Add rtl8211e rx/tx delays config") Signed-off-by: Willy Liu Signed-off-by: David S. Miller Signed-off-by: Sasha Levin --- drivers/net/phy/realtek.c | 31 --- 1 file changed, 16 insertions(+), 15 deletions(-) diff --git a/drivers/net/phy/realtek.c b/drivers/net/phy/realtek.c index c7229d022a27b..48ba757046cea 100644 --- a/drivers/net/phy/realtek.c +++ b/drivers/net/phy/realtek.c @@ -1,6 +1,5 @@ // SPDX-License-Identifier: GPL-2.0+ -/* - * drivers/net/phy/realtek.c +/* drivers/net/phy/realtek.c * * Driver for Realtek PHYs * @@ -32,9 +31,9 @@ #define RTL8211F_TX_DELAY BIT(8) #define RTL8211F_RX_DELAY BIT(3) -#define RTL8211E_TX_DELAY BIT(1) -#define RTL8211E_RX_DELAY BIT(2) -#define RTL8211E_MODE_MII_GMII BIT(3) +#define RTL8211E_CTRL_DELAYBIT(13) +#define RTL8211E_TX_DELAY BIT(12) +#define RTL8211E_RX_DELAY BIT(11) #define RTL8201F_ISR 0x1e #define RTL8201F_IER 0x13 @@ -246,16 +245,16 @@ static int rtl8211e_config_init(struct phy_device *phydev) /* enable TX/RX delay for rgmii-* modes, and disable them for rgmii. */ switch (phydev->interface) { case PHY_INTERFACE_MODE_RGMII: - val = 0; + val = RTL8211E_CTRL_DELAY | 0; break; case PHY_INTERFACE_MODE_RGMII_ID: - val = RTL8211E_TX_DELAY | RTL8211E_RX_DELAY; + val = RTL8211E_CTRL_DELAY | RTL8211E_TX_DELAY | RTL8211E_RX_DELAY; break; case PHY_INTERFACE_MODE_RGMII_RXID: - val = RTL8211E_RX_DELAY; + val = RTL8211E_CTRL_DELAY | RTL8211E_RX_DELAY; break; case PHY_INTERFACE_MODE_RGMII_TXID: - val = RTL8211E_TX_DELAY; + val = RTL8211E_CTRL_DELAY | RTL8211E_TX_DELAY; break; default: /* the rest of the modes imply leaving delays as is. */ return 0; @@ -263,11 +262,12 @@ static int rtl8211e_config_init(struct phy_device *phydev) /* According to a sample driver there is a 0x1c config register on the * 0xa4 extension page (0x7) layout. It can be used to disable/enable -* the RX/TX delays otherwise controlled by RXDLY/TXDLY pins. It can -* also be used to customize the whole configuration register: -* 8:6 = PHY Address, 5:4 = Auto-Negotiation, 3 = Interface Mode Select, -* 2 = RX Delay, 1 = TX Delay, 0 = SELRGV (see original PHY datasheet -* for details). +* the RX/TX delays otherwise controlled by RXDLY/TXDLY pins. +* The configuration register definition: +* 14 = reserved +* 13 = Force Tx RX Delay controlled by bit12 bit11, +* 12 = RX Delay, 11 = TX Delay +* 10:0 = Test && debug settings reserved by realtek */ oldpage = phy_select_page(phydev, 0x7); if (oldpage < 0) @@ -277,7 +277,8 @@ static int rtl8211e_config_init(struct phy_device *phydev) if (ret) goto err_restore_page; - ret = __phy_modify(phydev, 0x1c, RTL8211E_TX_DELAY | RTL8211E_RX_DELAY, + ret = __phy_modify(phydev, 0x1c, RTL8211E_CTRL_DELAY + | RTL8211E_TX_DELAY | RTL8211E_RX_DELAY, val); err_restore_page: -- 2.25.1
Dear. beloved one I need Your Urgent respond,
Greeting, Please forgive me for stressing you with my predicaments and I sorry to approach you through this media it is because it serves the fastest means of communication. I came across your E-mail from my personal search and I decided to contact you believing you will be honest to fulfill my final wish before I die. I am Mrs. Elizabeth Edward, 63 years, from USA, I am childless and I am suffering from a pro-long critical cancer, my doctors confirmed I may not live beyond two months from now as my ill health has defiled all forms of medical treatment. Since my days are numbered, I’ve decided willingly to fulfill my long-time promise to donate you the sum ($5.000.000.00) million dollars I inherited from my late husband Mr. Edward Herbart, foreign bank account over years. I need a very honest person who can assist in transfer of this money to his or her account and use the funds for charities work of God while you use 50% for yourself. I want you to know there are no risk involved, it is 100% hitch free & safe. If you will be interesting to assist in getting this fund into your account for charity project to fulfill my promise before I die please let me know immediately. I will appreciate your utmost confidentiality as I wait for your reply. Best Regards Mrs. Elizabeth Edward,
[PATCH 5.8 054/124] xfrm: clone XFRMA_REPLAY_ESN_VAL in xfrm_do_migrate
From: Antony Antony [ Upstream commit 91a46c6d1b4fcbfa4773df9421b8ad3e58088101 ] XFRMA_REPLAY_ESN_VAL was not cloned completely from the old to the new. Migrate this attribute during XFRMA_MSG_MIGRATE v1->v2: - move curleft cloning to a separate patch Fixes: af2f464e326e ("xfrm: Assign esn pointers when cloning a state") Signed-off-by: Antony Antony Signed-off-by: Steffen Klassert Signed-off-by: Sasha Levin --- include/net/xfrm.h | 16 ++-- 1 file changed, 6 insertions(+), 10 deletions(-) diff --git a/include/net/xfrm.h b/include/net/xfrm.h index 51f65d23ebafa..2e32cb10ac16b 100644 --- a/include/net/xfrm.h +++ b/include/net/xfrm.h @@ -1767,21 +1767,17 @@ static inline unsigned int xfrm_replay_state_esn_len(struct xfrm_replay_state_es static inline int xfrm_replay_clone(struct xfrm_state *x, struct xfrm_state *orig) { - x->replay_esn = kzalloc(xfrm_replay_state_esn_len(orig->replay_esn), + + x->replay_esn = kmemdup(orig->replay_esn, + xfrm_replay_state_esn_len(orig->replay_esn), GFP_KERNEL); if (!x->replay_esn) return -ENOMEM; - - x->replay_esn->bmp_len = orig->replay_esn->bmp_len; - x->replay_esn->replay_window = orig->replay_esn->replay_window; - - x->preplay_esn = kmemdup(x->replay_esn, -xfrm_replay_state_esn_len(x->replay_esn), + x->preplay_esn = kmemdup(orig->preplay_esn, +xfrm_replay_state_esn_len(orig->preplay_esn), GFP_KERNEL); - if (!x->preplay_esn) { - kfree(x->replay_esn); + if (!x->preplay_esn) return -ENOMEM; - } return 0; } -- 2.25.1
[PATCH 5.8 055/124] xfrm: clone XFRMA_SEC_CTX in xfrm_do_migrate
From: Antony Antony [ Upstream commit 7aa05d304785204703a67a6aa7f1db402889a172 ] XFRMA_SEC_CTX was not cloned from the old to the new. Migrate this attribute during XFRMA_MSG_MIGRATE v1->v2: - return -ENOMEM on error v2->v3: - fix return type to int Fixes: 80c9abaabf42 ("[XFRM]: Extension for dynamic update of endpoint address(es)") Signed-off-by: Antony Antony Signed-off-by: Steffen Klassert Signed-off-by: Sasha Levin --- net/xfrm/xfrm_state.c | 28 1 file changed, 28 insertions(+) diff --git a/net/xfrm/xfrm_state.c b/net/xfrm/xfrm_state.c index 8ec3a6a12dd34..3a2c1f15d31dd 100644 --- a/net/xfrm/xfrm_state.c +++ b/net/xfrm/xfrm_state.c @@ -1441,6 +1441,30 @@ out: EXPORT_SYMBOL(xfrm_state_add); #ifdef CONFIG_XFRM_MIGRATE +static inline int clone_security(struct xfrm_state *x, struct xfrm_sec_ctx *security) +{ + struct xfrm_user_sec_ctx *uctx; + int size = sizeof(*uctx) + security->ctx_len; + int err; + + uctx = kmalloc(size, GFP_KERNEL); + if (!uctx) + return -ENOMEM; + + uctx->exttype = XFRMA_SEC_CTX; + uctx->len = size; + uctx->ctx_doi = security->ctx_doi; + uctx->ctx_alg = security->ctx_alg; + uctx->ctx_len = security->ctx_len; + memcpy(uctx + 1, security->ctx_str, security->ctx_len); + err = security_xfrm_state_alloc(x, uctx); + kfree(uctx); + if (err) + return err; + + return 0; +} + static struct xfrm_state *xfrm_state_clone(struct xfrm_state *orig, struct xfrm_encap_tmpl *encap) { @@ -1497,6 +1521,10 @@ static struct xfrm_state *xfrm_state_clone(struct xfrm_state *orig, goto error; } + if (orig->security) + if (clone_security(x, orig->security)) + goto error; + if (orig->coaddr) { x->coaddr = kmemdup(orig->coaddr, sizeof(*x->coaddr), GFP_KERNEL); -- 2.25.1
[PATCH 5.8 077/124] virtio-net: dont disable guest csum when disable LRO
From: Tonghao Zhang [ Upstream commit 1a03b8a35a957f9f38ecb8a97443b7380bbf6a8b ] Open vSwitch and Linux bridge will disable LRO of the interface when this interface added to them. Now when disable the LRO, the virtio-net csum is disable too. That drops the forwarding performance. Fixes: a02e8964eaf9 ("virtio-net: ethtool configurable LRO") Cc: Michael S. Tsirkin Cc: Jason Wang Cc: Willem de Bruijn Signed-off-by: Tonghao Zhang Acked-by: Willem de Bruijn Signed-off-by: David S. Miller Signed-off-by: Sasha Levin --- drivers/net/virtio_net.c | 8 +++- 1 file changed, 7 insertions(+), 1 deletion(-) diff --git a/drivers/net/virtio_net.c b/drivers/net/virtio_net.c index ba38765dc4905..c34927b1d806e 100644 --- a/drivers/net/virtio_net.c +++ b/drivers/net/virtio_net.c @@ -63,6 +63,11 @@ static const unsigned long guest_offloads[] = { VIRTIO_NET_F_GUEST_CSUM }; +#define GUEST_OFFLOAD_LRO_MASK ((1ULL << VIRTIO_NET_F_GUEST_TSO4) | \ + (1ULL << VIRTIO_NET_F_GUEST_TSO6) | \ + (1ULL << VIRTIO_NET_F_GUEST_ECN) | \ + (1ULL << VIRTIO_NET_F_GUEST_UFO)) + struct virtnet_stat_desc { char desc[ETH_GSTRING_LEN]; size_t offset; @@ -2547,7 +2552,8 @@ static int virtnet_set_features(struct net_device *dev, if (features & NETIF_F_LRO) offloads = vi->guest_offloads_capable; else - offloads = 0; + offloads = vi->guest_offloads_capable & + ~GUEST_OFFLOAD_LRO_MASK; err = virtnet_set_guest_offloads(vi, offloads); if (err) -- 2.25.1
[PATCH 5.8 081/124] octeontx2-pf: Fix the device state on error
From: Hariprasad Kelam [ Upstream commit 1ea0166da0509e987caa42c30a6a71f2c6ca1875 ] Currently in otx2_open on failure of nix_lf_start transmit queues are not stopped which are already started in link_event. Since the tx queues are not stopped network stack still try's to send the packets leading to driver crash while access the device resources. Fixes: 50fe6c02e ("octeontx2-pf: Register and handle link notifications") Signed-off-by: Hariprasad Kelam Signed-off-by: Geetha sowjanya Signed-off-by: Sunil Goutham Signed-off-by: David S. Miller Signed-off-by: Sasha Levin --- drivers/net/ethernet/marvell/octeontx2/nic/otx2_pf.c | 5 - 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/drivers/net/ethernet/marvell/octeontx2/nic/otx2_pf.c b/drivers/net/ethernet/marvell/octeontx2/nic/otx2_pf.c index 75a8c407e815c..5d620a39ea802 100644 --- a/drivers/net/ethernet/marvell/octeontx2/nic/otx2_pf.c +++ b/drivers/net/ethernet/marvell/octeontx2/nic/otx2_pf.c @@ -1560,10 +1560,13 @@ int otx2_open(struct net_device *netdev) err = otx2_rxtx_enable(pf, true); if (err) - goto err_free_cints; + goto err_tx_stop_queues; return 0; +err_tx_stop_queues: + netif_tx_stop_all_queues(netdev); + netif_carrier_off(netdev); err_free_cints: otx2_free_cints(pf, qidx); vec = pci_irq_vector(pf->pdev, -- 2.25.1
[PATCH 5.8 053/124] xfrm: clone XFRMA_SET_MARK in xfrm_do_migrate
From: Antony Antony [ Upstream commit 545e5c571662b1cd79d9588f9d3b6e36985b8007 ] XFRMA_SET_MARK and XFRMA_SET_MARK_MASK was not cloned from the old to the new. Migrate these two attributes during XFRMA_MSG_MIGRATE Fixes: 9b42c1f179a6 ("xfrm: Extend the output_mark to support input direction and masking.") Signed-off-by: Antony Antony Signed-off-by: Steffen Klassert Signed-off-by: Sasha Levin --- net/xfrm/xfrm_state.c | 1 + 1 file changed, 1 insertion(+) diff --git a/net/xfrm/xfrm_state.c b/net/xfrm/xfrm_state.c index 8be2d926acc21..8ec3a6a12dd34 100644 --- a/net/xfrm/xfrm_state.c +++ b/net/xfrm/xfrm_state.c @@ -1510,6 +1510,7 @@ static struct xfrm_state *xfrm_state_clone(struct xfrm_state *orig, } memcpy(>mark, >mark, sizeof(x->mark)); + memcpy(>props.smark, >props.smark, sizeof(x->props.smark)); if (xfrm_init_state(x) < 0) goto error; -- 2.25.1
[PATCH 5.8 080/124] octeontx2-pf: Fix TCP/UDP checksum offload for IPv6 frames
From: Geetha sowjanya [ Upstream commit 89eae5e87b4fa799726a3e8911c90d418cb5d2b1 ] For TCP/UDP checksum offload feature in Octeontx2 expects L3TYPE to be set irrespective of IP header checksum is being offloaded or not. Currently for IPv6 frames L3TYPE is not being set resulting in packet drop with checksum error. This patch fixes this issue. Fixes: 3ca6c4c88 ("octeontx2-pf: Add packet transmission support") Signed-off-by: Geetha sowjanya Signed-off-by: Sunil Goutham Signed-off-by: David S. Miller Signed-off-by: Sasha Levin --- drivers/net/ethernet/marvell/octeontx2/nic/otx2_txrx.c | 1 + 1 file changed, 1 insertion(+) diff --git a/drivers/net/ethernet/marvell/octeontx2/nic/otx2_txrx.c b/drivers/net/ethernet/marvell/octeontx2/nic/otx2_txrx.c index b04f5429d72d9..334eab976ee4a 100644 --- a/drivers/net/ethernet/marvell/octeontx2/nic/otx2_txrx.c +++ b/drivers/net/ethernet/marvell/octeontx2/nic/otx2_txrx.c @@ -524,6 +524,7 @@ static void otx2_sqe_add_hdr(struct otx2_nic *pfvf, struct otx2_snd_queue *sq, sqe_hdr->ol3type = NIX_SENDL3TYPE_IP4_CKSUM; } else if (skb->protocol == htons(ETH_P_IPV6)) { proto = ipv6_hdr(skb)->nexthdr; + sqe_hdr->ol3type = NIX_SENDL3TYPE_IP6; } if (proto == IPPROTO_TCP) -- 2.25.1
[PATCH 5.8 052/124] iommu/vt-d: Fix lockdep splat in iommu_flush_dev_iotlb()
From: Lu Baolu [ Upstream commit 1a3f2fd7fc4e8f24510830e265de2ffb8e3300d2 ] Lock(>lock) without disabling irq causes lockdep warnings. [ 12.703950] [ 12.703962] WARNING: possible irq lock inversion dependency detected [ 12.703975] 5.9.0-rc6+ #659 Not tainted [ 12.703983] [ 12.703995] systemd-udevd/284 just changed the state of lock: [ 12.704007] bd6ff4d8 (device_domain_lock){..-.}-{2:2}, at: iommu_flush_dev_iotlb.part.57+0x2e/0x90 [ 12.704031] but this lock took another, SOFTIRQ-unsafe lock in the past: [ 12.704043] (>lock){+.+.}-{2:2} [ 12.704045] and interrupts could create inverse lock ordering between them. [ 12.704073] other info that might help us debug this: [ 12.704085] Possible interrupt unsafe locking scenario: [ 12.704097]CPU0CPU1 [ 12.704106] [ 12.704115] lock(>lock); [ 12.704123]local_irq_disable(); [ 12.704134]lock(device_domain_lock); [ 12.704146]lock(>lock); [ 12.704158] [ 12.704164] lock(device_domain_lock); [ 12.704174] *** DEADLOCK *** Signed-off-by: Lu Baolu Link: https://lore.kernel.org/r/20200927062428.13713-1-baolu...@linux.intel.com Signed-off-by: Joerg Roedel Signed-off-by: Sasha Levin --- drivers/iommu/intel/iommu.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/drivers/iommu/intel/iommu.c b/drivers/iommu/intel/iommu.c index fbe0b0cc56edf..24a84d294fd01 100644 --- a/drivers/iommu/intel/iommu.c +++ b/drivers/iommu/intel/iommu.c @@ -2617,7 +2617,7 @@ static struct dmar_domain *dmar_insert_one_dev_info(struct intel_iommu *iommu, } /* Setup the PASID entry for requests without PASID: */ - spin_lock(>lock); + spin_lock_irqsave(>lock, flags); if (hw_pass_through && domain_type_is_si(domain)) ret = intel_pasid_setup_pass_through(iommu, domain, dev, PASID_RID2PASID); @@ -2627,7 +2627,7 @@ static struct dmar_domain *dmar_insert_one_dev_info(struct intel_iommu *iommu, else ret = intel_pasid_setup_second_level(iommu, domain, dev, PASID_RID2PASID); - spin_unlock(>lock); + spin_unlock_irqrestore(>lock, flags); if (ret) { dev_err(dev, "Setup RID2PASID failed\n"); dmar_remove_one_dev_info(dev); -- 2.25.1
Re: [PATCH] trace: Return ENOTCONN instead of EBADF
On Mon, 12 Oct 2020 10:26:42 +0200 Peter Enderborg wrote: > When there is no clients listening on event the trace return > EBADF. The file is not a bad file descriptor and to get the > userspace able to do a proper error handling it need a different > error code that separate a bad file descriptor from a empty listening. I have no problem with this patch, but your description is incorrect. And before making this change, I want to make sure that what you think is happening is actually happening. This has nothing to do with "clients listening". This happens when the ring buffer is disabled for some reason. The most likely case of this happening is if someone sets /sys/kernel/tracing/tracing_on to zero. If this is still something you want applied, please update the change log to a more accurate scenario. Thanks, -- Steve > > Signed-off-by: Peter Enderborg > --- > kernel/trace/trace.c | 8 > 1 file changed, 4 insertions(+), 4 deletions(-) > > diff --git a/kernel/trace/trace.c b/kernel/trace/trace.c > index d3e5de717df2..6e592bf736df 100644 > --- a/kernel/trace/trace.c > +++ b/kernel/trace/trace.c > @@ -6651,8 +6651,8 @@ tracing_mark_write(struct file *filp, const char __user > *ubuf, > event = __trace_buffer_lock_reserve(buffer, TRACE_PRINT, size, > irq_flags, preempt_count()); > if (unlikely(!event)) > - /* Ring buffer disabled, return as if not open for write */ > - return -EBADF; > + /* Ring buffer disabled, return as if not connected */ > + return -ENOTCONN; > > entry = ring_buffer_event_data(event); > entry->ip = _THIS_IP_; > @@ -6731,8 +6731,8 @@ tracing_mark_raw_write(struct file *filp, const char > __user *ubuf, > event = __trace_buffer_lock_reserve(buffer, TRACE_RAW_DATA, size, > irq_flags, preempt_count()); > if (!event) > - /* Ring buffer disabled, return as if not open for write */ > - return -EBADF; > + /* Ring buffer disabled, return not connected */ > + return -ENOTCONN; > > entry = ring_buffer_event_data(event); >
[PATCH 5.8 076/124] net: usb: ax88179_178a: fix missing stop entry in driver_info
From: Wilken Gottwalt [ Upstream commit 9666ea66a74adfe295cb3a8760c76e1ef70f9caf ] Adds the missing .stop entry in the Belkin driver_info structure. Fixes: e20bd60bf62a ("net: usb: asix88179_178a: Add support for the Belkin B2B128") Signed-off-by: Wilken Gottwalt Signed-off-by: David S. Miller Signed-off-by: Sasha Levin --- drivers/net/usb/ax88179_178a.c | 1 + 1 file changed, 1 insertion(+) diff --git a/drivers/net/usb/ax88179_178a.c b/drivers/net/usb/ax88179_178a.c index a38e868e44d46..f0ef3706aad96 100644 --- a/drivers/net/usb/ax88179_178a.c +++ b/drivers/net/usb/ax88179_178a.c @@ -1823,6 +1823,7 @@ static const struct driver_info belkin_info = { .status = ax88179_status, .link_reset = ax88179_link_reset, .reset = ax88179_reset, + .stop = ax88179_stop, .flags = FLAG_ETHER | FLAG_FRAMING_AX, .rx_fixup = ax88179_rx_fixup, .tx_fixup = ax88179_tx_fixup, -- 2.25.1
[GIT PULL] io_uring updates for 5.10-rc1
Hi Linus, Here are the io_uring updates for 5.10. This pull request contains: - Add blkcg accounting for io-wq offload (Dennis) - A use-after-free fix for io-wq (Hillf) - Cancelation fixes and improvements - Use proper files_struct references for offload - Cleanup of io_uring_get_socket() since that can now go into our own header - SQPOLL fixes and cleanups, and support for sharing the thread - Improvement to how page accounting is done for registered buffers and huge pages, accounting the real pinned state - Series cleaning up the xarray code (Willy) - Various cleanups, refactoring, and improvements (Pavel) - Use raw spinlock for io-wq (Sebastian) - Add support for ring restrictions (Stefano) Please pull! The following changes since commit c8d317aa1887b40b188ec3aaa6e9e524333caed1: io_uring: fix async buffered reads when readahead is disabled (2020-09-29 07:54:00 -0600) are available in the Git repository at: git://git.kernel.dk/linux-block.git tags/io_uring-5.10-2020-10-12 for you to fetch changes up to b2e9685283127f30e7f2b466af0046ff9bd27a86: io_uring: keep a pointer ref_node in file_data (2020-10-10 12:49:25 -0600) io_uring-5.10-2020-10-12 Dennis Zhou (1): io_uring: add blkcg accounting to offloaded operations Hillf Danton (1): io-wq: fix use-after-free in io_wq_worker_running Jens Axboe (29): Merge branch 'io_uring-5.9' into for-5.10/io_uring io_uring: allow timeout/poll/files killing to take task into account io_uring: move dropping of files into separate helper io_uring: stash ctx task reference for SQPOLL io_uring: unconditionally grab req->task io_uring: return cancelation status from poll/timeout/files handlers io_uring: enable task/files specific overflow flushing io_uring: don't rely on weak ->files references io_uring: reference ->nsproxy for file table commands io_uring: move io_uring_get_socket() into io_uring.h io_uring: io_sq_thread() doesn't need to flush signals fs: align IOCB_* flags with RWF_* flags io_uring: use private ctx wait queue entries for SQPOLL io_uring: move SQPOLL post-wakeup ring need wakeup flag into wake handler io_uring: split work handling part of SQPOLL into helper io_uring: split SQPOLL data into separate structure io_uring: base SQPOLL handling off io_sq_data io_uring: enable IORING_SETUP_ATTACH_WQ to attach to SQPOLL thread too io_uring: mark io_uring_fops/io_op_defs as __read_mostly io_uring: provide IORING_ENTER_SQ_WAIT for SQPOLL SQ ring waits io_uring: get rid of req->io/io_async_ctx union io_uring: cap SQ submit size for SQPOLL with multiple rings io_uring: improve registered buffer accounting for huge pages io_uring: process task work in io_uring_register() io-wq: kill unused IO_WORKER_F_EXITING io_uring: kill callback_head argument for io_req_task_work_add() io_uring: batch account ->req_issue and task struct references io_uring: no need to call xa_destroy() on empty xarray io_uring: fix break condition for __io_uring_register() waiting Joseph Qi (1): io_uring: show sqthread pid and cpu in fdinfo Matthew Wilcox (Oracle) (3): io_uring: Fix use of XArray in __io_uring_files_cancel io_uring: Fix XArray usage in io_uring_add_task_file io_uring: Convert advanced XArray uses to the normal API Pavel Begunkov (23): io_uring: simplify io_rw_prep_async() io_uring: refactor io_req_map_rw() io_uring: fix overlapped memcpy in io_req_map_rw() io_uring: kill extra user_bufs check io_uring: simplify io_alloc_req() io_uring: io_kiocb_ppos() style change io_uring: remove F_NEED_CLEANUP check in *prep() io_uring: set/clear IOCB_NOWAIT into io_read/write io_uring: remove nonblock arg from io_{rw}_prep() io_uring: decouple issuing and req preparation io_uring: move req preps out of io_issue_sqe() io_uring: don't io_prep_async_work() linked reqs io_uring: clean up ->files grabbing io_uring: kill extra check in fixed io_file_get() io_uring: simplify io_file_get() io_uring: improve submit_state.ios_left accounting io_uring: use a separate struct for timeout_remove io_uring: remove timeout.list after hrtimer cancel io_uring: clean leftovers after splitting issue io_uring: don't delay io_init_req() error check io_uring: clean file_data access in files_register io_uring: refactor *files_register()'s error paths io_uring: keep a pointer ref_node in file_data Sebastian Andrzej Siewior (1): io_wq: Make io_wqe::lock a raw_spinlock_t Stefano Garzarella (3): io_uring: use an enumeration for io_uring_register(2) opcodes io_uring: add IOURING_REGISTER_RESTRICTIONS
[PATCH 5.8 018/124] platform/x86: intel-vbtn: Fix SW_TABLET_MODE always reporting 1 on the HP Pavilion 11 x360
From: Hans de Goede commit d823346876a970522ff9e4d2b323c9b734dcc4de upstream. Commit cfae58ed681c ("platform/x86: intel-vbtn: Only blacklist SW_TABLET_MODE on the 9 / "Laptop" chasis-type") restored SW_TABLET_MODE reporting on the HP stream x360 11 series on which it was previously broken by commit de9647efeaa9 ("platform/x86: intel-vbtn: Only activate tablet mode switch on 2-in-1's"). It turns out that enabling SW_TABLET_MODE reporting on devices with a chassis-type of 10 ("Notebook") causes SW_TABLET_MODE to always report 1 at boot on the HP Pavilion 11 x360, which causes libinput to disable the kbd and touchpad. The HP Pavilion 11 x360's ACPI VGBS method sets bit 4 instead of bit 6 when NOT in tablet mode at boot. Inspecting all the DSDTs in my DSDT collection shows only one other model, the Medion E1239T ever setting bit 4 and it always sets this together with bit 6. So lets treat bit 4 as a second bit which when set indicates the device not being in tablet-mode, as we already do for bit 6. While at it also prefix all VGBS constant defines with "VGBS_". Fixes: cfae58ed681c ("platform/x86: intel-vbtn: Only blacklist SW_TABLET_MODE on the 9 / "Laptop" chasis-type") Signed-off-by: Hans de Goede Acked-by: Mark Gross Signed-off-by: Andy Shevchenko Signed-off-by: Greg Kroah-Hartman --- drivers/platform/x86/intel-vbtn.c | 12 1 file changed, 8 insertions(+), 4 deletions(-) --- a/drivers/platform/x86/intel-vbtn.c +++ b/drivers/platform/x86/intel-vbtn.c @@ -15,9 +15,13 @@ #include #include +/* Returned when NOT in tablet mode on some HP Stream x360 11 models */ +#define VGBS_TABLET_MODE_FLAG_ALT 0x10 /* When NOT in tablet mode, VGBS returns with the flag 0x40 */ -#define TABLET_MODE_FLAG 0x40 -#define DOCK_MODE_FLAG 0x80 +#define VGBS_TABLET_MODE_FLAG 0x40 +#define VGBS_DOCK_MODE_FLAG0x80 + +#define VGBS_TABLET_MODE_FLAGS (VGBS_TABLET_MODE_FLAG | VGBS_TABLET_MODE_FLAG_ALT) MODULE_LICENSE("GPL"); MODULE_AUTHOR("AceLan Kao"); @@ -72,9 +76,9 @@ static void detect_tablet_mode(struct pl if (ACPI_FAILURE(status)) return; - m = !(vgbs & TABLET_MODE_FLAG); + m = !(vgbs & VGBS_TABLET_MODE_FLAGS); input_report_switch(priv->input_dev, SW_TABLET_MODE, m); - m = (vgbs & DOCK_MODE_FLAG) ? 1 : 0; + m = (vgbs & VGBS_DOCK_MODE_FLAG) ? 1 : 0; input_report_switch(priv->input_dev, SW_DOCK, m); }
[PATCH 5.8 036/124] cifs: Fix incomplete memory allocation on setxattr path
From: Vladimir Zapolskiy commit 64b7f674c292207624b3d788eda2dde3dc1415df upstream. On setxattr() syscall path due to an apprent typo the size of a dynamically allocated memory chunk for storing struct smb2_file_full_ea_info object is computed incorrectly, to be more precise the first addend is the size of a pointer instead of the wanted object size. Coincidentally it makes no difference on 64-bit platforms, however on 32-bit targets the following memcpy() writes 4 bytes of data outside of the dynamically allocated memory. = BUG kmalloc-16 (Not tainted): Redzone overwritten - Disabling lock debugging due to kernel taint INFO: 0x79e69a6f-0x9e5cdecf @offset=368. First byte 0x73 instead of 0xcc INFO: Slab 0xd36d2454 objects=85 used=51 fp=0xf7d0fc7a flags=0x35000201 INFO: Object 0x6f171df3 @offset=352 fp=0x Redzone 5d4ff02d: cc cc cc cc cc cc cc cc cc cc cc cc cc cc cc cc Object 6f171df3: 00 00 00 00 00 05 06 00 73 6e 72 75 62 00 66 69 snrub.fi Redzone 79e69a6f: 73 68 32 0a sh2. Padding 56254d82: 5a 5a 5a 5a 5a 5a 5a 5a CPU: 0 PID: 8196 Comm: attr Tainted: GB 5.9.0-rc8+ #3 Hardware name: QEMU Standard PC (i440FX + PIIX, 1996), BIOS 1.13.0-1 04/01/2014 Call Trace: dump_stack+0x54/0x6e print_trailer+0x12c/0x134 check_bytes_and_report.cold+0x3e/0x69 check_object+0x18c/0x250 free_debug_processing+0xfe/0x230 __slab_free+0x1c0/0x300 kfree+0x1d3/0x220 smb2_set_ea+0x27d/0x540 cifs_xattr_set+0x57f/0x620 __vfs_setxattr+0x4e/0x60 __vfs_setxattr_noperm+0x4e/0x100 __vfs_setxattr_locked+0xae/0xd0 vfs_setxattr+0x4e/0xe0 setxattr+0x12c/0x1a0 path_setxattr+0xa4/0xc0 __ia32_sys_lsetxattr+0x1d/0x20 __do_fast_syscall_32+0x40/0x70 do_fast_syscall_32+0x29/0x60 do_SYSENTER_32+0x15/0x20 entry_SYSENTER_32+0x9f/0xf2 Fixes: 5517554e4313 ("cifs: Add support for writing attributes on SMB2+") Signed-off-by: Vladimir Zapolskiy Signed-off-by: Linus Torvalds Signed-off-by: Greg Kroah-Hartman --- fs/cifs/smb2ops.c |2 +- 1 file changed, 1 insertion(+), 1 deletion(-) --- a/fs/cifs/smb2ops.c +++ b/fs/cifs/smb2ops.c @@ -1208,7 +1208,7 @@ smb2_set_ea(const unsigned int xid, stru rqst[1].rq_iov = si_iov; rqst[1].rq_nvec = 1; - len = sizeof(ea) + ea_name_len + ea_value_len + 1; + len = sizeof(*ea) + ea_name_len + ea_value_len + 1; ea = kzalloc(len, GFP_KERNEL); if (ea == NULL) { rc = -ENOMEM;
[PATCH 5.8 033/124] nvme-tcp: check page by sendpage_ok() before calling kernel_sendpage()
From: Coly Li commit 7d4194abfc4de13a2663c7fee6891de8360f7a52 upstream. Currently nvme_tcp_try_send_data() doesn't use kernel_sendpage() to send slab pages. But for pages allocated by __get_free_pages() without __GFP_COMP, which also have refcount as 0, they are still sent by kernel_sendpage() to remote end, this is problematic. The new introduced helper sendpage_ok() checks both PageSlab tag and page_count counter, and returns true if the checking page is OK to be sent by kernel_sendpage(). This patch fixes the page checking issue of nvme_tcp_try_send_data() with sendpage_ok(). If sendpage_ok() returns true, send this page by kernel_sendpage(), otherwise use sock_no_sendpage to handle this page. Signed-off-by: Coly Li Cc: Chaitanya Kulkarni Cc: Christoph Hellwig Cc: Hannes Reinecke Cc: Jan Kara Cc: Jens Axboe Cc: Mikhail Skorzhinskii Cc: Philipp Reisner Cc: Sagi Grimberg Cc: Vlastimil Babka Cc: sta...@vger.kernel.org Signed-off-by: David S. Miller Signed-off-by: Greg Kroah-Hartman --- drivers/nvme/host/tcp.c |7 +++ 1 file changed, 3 insertions(+), 4 deletions(-) --- a/drivers/nvme/host/tcp.c +++ b/drivers/nvme/host/tcp.c @@ -889,12 +889,11 @@ static int nvme_tcp_try_send_data(struct else flags |= MSG_MORE | MSG_SENDPAGE_NOTLAST; - /* can't zcopy slab pages */ - if (unlikely(PageSlab(page))) { - ret = sock_no_sendpage(queue->sock, page, offset, len, + if (sendpage_ok(page)) { + ret = kernel_sendpage(queue->sock, page, offset, len, flags); } else { - ret = kernel_sendpage(queue->sock, page, offset, len, + ret = sock_no_sendpage(queue->sock, page, offset, len, flags); } if (ret <= 0)
[PATCH 5.8 040/124] i2c: owl: Clear NACK and BUS error bits
From: Cristian Ciocaltea commit f5b3f433641c543ebe5171285a42aa6adcdb2d22 upstream. When the NACK and BUS error bits are set by the hardware, the driver is responsible for clearing them by writing "1" into the corresponding status registers. Hence perform the necessary operations in owl_i2c_interrupt(). Fixes: d211e62af466 ("i2c: Add Actions Semiconductor Owl family S900 I2C driver") Reported-by: Manivannan Sadhasivam Signed-off-by: Cristian Ciocaltea Signed-off-by: Wolfram Sang Signed-off-by: Greg Kroah-Hartman --- drivers/i2c/busses/i2c-owl.c |6 ++ 1 file changed, 6 insertions(+) --- a/drivers/i2c/busses/i2c-owl.c +++ b/drivers/i2c/busses/i2c-owl.c @@ -176,6 +176,9 @@ static irqreturn_t owl_i2c_interrupt(int fifostat = readl(i2c_dev->base + OWL_I2C_REG_FIFOSTAT); if (fifostat & OWL_I2C_FIFOSTAT_RNB) { i2c_dev->err = -ENXIO; + /* Clear NACK error bit by writing "1" */ + owl_i2c_update_reg(i2c_dev->base + OWL_I2C_REG_FIFOSTAT, + OWL_I2C_FIFOSTAT_RNB, true); goto stop; } @@ -183,6 +186,9 @@ static irqreturn_t owl_i2c_interrupt(int stat = readl(i2c_dev->base + OWL_I2C_REG_STAT); if (stat & OWL_I2C_STAT_BEB) { i2c_dev->err = -EIO; + /* Clear BUS error bit by writing "1" */ + owl_i2c_update_reg(i2c_dev->base + OWL_I2C_REG_STAT, + OWL_I2C_STAT_BEB, true); goto stop; }
[PATCH 5.8 037/124] i2c: meson: fix clock setting overwrite
From: Jerome Brunet commit 28683e847e2f20eed22cdd24f185d7783db396d3 upstream. When the slave address is written in do_start(), SLAVE_ADDR is written completely. This may overwrite some setting related to the clock rate or signal filtering. Fix this by writing only the bits related to slave address. To avoid causing unexpected changed, explicitly disable filtering or high/low clock mode which may have been left over by the bootloader. Fixes: 30021e3707a7 ("i2c: add support for Amlogic Meson I2C controller") Signed-off-by: Jerome Brunet Signed-off-by: Wolfram Sang Signed-off-by: Greg Kroah-Hartman --- drivers/i2c/busses/i2c-meson.c | 19 ++- 1 file changed, 18 insertions(+), 1 deletion(-) --- a/drivers/i2c/busses/i2c-meson.c +++ b/drivers/i2c/busses/i2c-meson.c @@ -5,6 +5,7 @@ * Copyright (C) 2014 Beniamino Galvani */ +#include #include #include #include @@ -38,6 +39,12 @@ #define REG_CTRL_CLKDIVEXT_SHIFT 28 #define REG_CTRL_CLKDIVEXT_MASKGENMASK(29, 28) +#define REG_SLV_ADDR GENMASK(7, 0) +#define REG_SLV_SDA_FILTER GENMASK(10, 8) +#define REG_SLV_SCL_FILTER GENMASK(13, 11) +#define REG_SLV_SCL_LOWGENMASK(27, 16) +#define REG_SLV_SCL_LOW_EN BIT(28) + #define I2C_TIMEOUT_MS 500 enum { @@ -147,6 +154,9 @@ static void meson_i2c_set_clk_div(struct meson_i2c_set_mask(i2c, REG_CTRL, REG_CTRL_CLKDIVEXT_MASK, (div >> 10) << REG_CTRL_CLKDIVEXT_SHIFT); + /* Disable HIGH/LOW mode */ + meson_i2c_set_mask(i2c, REG_SLAVE_ADDR, REG_SLV_SCL_LOW_EN, 0); + dev_dbg(i2c->dev, "%s: clk %lu, freq %u, div %u\n", __func__, clk_rate, freq, div); } @@ -280,7 +290,10 @@ static void meson_i2c_do_start(struct me token = (msg->flags & I2C_M_RD) ? TOKEN_SLAVE_ADDR_READ : TOKEN_SLAVE_ADDR_WRITE; - writel(msg->addr << 1, i2c->regs + REG_SLAVE_ADDR); + + meson_i2c_set_mask(i2c, REG_SLAVE_ADDR, REG_SLV_ADDR, + FIELD_PREP(REG_SLV_ADDR, msg->addr << 1)); + meson_i2c_add_token(i2c, TOKEN_START); meson_i2c_add_token(i2c, token); } @@ -461,6 +474,10 @@ static int meson_i2c_probe(struct platfo return ret; } + /* Disable filtering */ + meson_i2c_set_mask(i2c, REG_SLAVE_ADDR, + REG_SLV_SDA_FILTER | REG_SLV_SCL_FILTER, 0); + meson_i2c_set_clk_div(i2c, timings.bus_freq_hz); return 0;
[PATCH 5.8 044/124] openvswitch: handle DNAT tuple collision
From: Dumitru Ceara commit 8aa7b526dc0b5dbf40c1b834d76a667ad672a410 upstream. With multiple DNAT rules it's possible that after destination translation the resulting tuples collide. For example, two openvswitch flows: nw_dst=10.0.0.10,tp_dst=10, actions=ct(commit,table=2,nat(dst=20.0.0.1:20)) nw_dst=10.0.0.20,tp_dst=10, actions=ct(commit,table=2,nat(dst=20.0.0.1:20)) Assuming two TCP clients initiating the following connections: 10.0.0.10:5000->10.0.0.10:10 10.0.0.10:5000->10.0.0.20:10 Both tuples would translate to 10.0.0.10:5000->20.0.0.1:20 causing nf_conntrack_confirm() to fail because of tuple collision. Netfilter handles this case by allocating a null binding for SNAT at egress by default. Perform the same operation in openvswitch for DNAT if no explicit SNAT is requested by the user and allocate a null binding for SNAT for packets in the "original" direction. Reported-at: https://bugzilla.redhat.com/1877128 Suggested-by: Florian Westphal Fixes: 05752523e565 ("openvswitch: Interface with NAT.") Signed-off-by: Dumitru Ceara Signed-off-by: Jakub Kicinski Signed-off-by: Greg Kroah-Hartman --- net/openvswitch/conntrack.c | 20 1 file changed, 12 insertions(+), 8 deletions(-) --- a/net/openvswitch/conntrack.c +++ b/net/openvswitch/conntrack.c @@ -903,15 +903,19 @@ static int ovs_ct_nat(struct net *net, s } err = ovs_ct_nat_execute(skb, ct, ctinfo, >range, maniptype); - if (err == NF_ACCEPT && - ct->status & IPS_SRC_NAT && ct->status & IPS_DST_NAT) { - if (maniptype == NF_NAT_MANIP_SRC) - maniptype = NF_NAT_MANIP_DST; - else - maniptype = NF_NAT_MANIP_SRC; + if (err == NF_ACCEPT && ct->status & IPS_DST_NAT) { + if (ct->status & IPS_SRC_NAT) { + if (maniptype == NF_NAT_MANIP_SRC) + maniptype = NF_NAT_MANIP_DST; + else + maniptype = NF_NAT_MANIP_SRC; - err = ovs_ct_nat_execute(skb, ct, ctinfo, >range, -maniptype); + err = ovs_ct_nat_execute(skb, ct, ctinfo, >range, +maniptype); + } else if (CTINFO2DIR(ctinfo) == IP_CT_DIR_ORIGINAL) { + err = ovs_ct_nat_execute(skb, ct, ctinfo, NULL, +NF_NAT_MANIP_SRC); + } } /* Mark NAT done if successful and update the flow key. */
[PATCH 5.8 041/124] sctp: fix sctp_auth_init_hmacs() error path
From: Eric Dumazet commit d42ee76ecb6c49d499fc5eb32ca34468d95dbc3e upstream. After freeing ep->auth_hmacs we have to clear the pointer or risk use-after-free as reported by syzbot: BUG: KASAN: use-after-free in sctp_auth_destroy_hmacs net/sctp/auth.c:509 [inline] BUG: KASAN: use-after-free in sctp_auth_destroy_hmacs net/sctp/auth.c:501 [inline] BUG: KASAN: use-after-free in sctp_auth_free+0x17e/0x1d0 net/sctp/auth.c:1070 Read of size 8 at addr 8880a8ff52c0 by task syz-executor941/6874 CPU: 0 PID: 6874 Comm: syz-executor941 Not tainted 5.9.0-rc8-syzkaller #0 Hardware name: Google Google Compute Engine/Google Compute Engine, BIOS Google 01/01/2011 Call Trace: __dump_stack lib/dump_stack.c:77 [inline] dump_stack+0x198/0x1fd lib/dump_stack.c:118 print_address_description.constprop.0.cold+0xae/0x497 mm/kasan/report.c:383 __kasan_report mm/kasan/report.c:513 [inline] kasan_report.cold+0x1f/0x37 mm/kasan/report.c:530 sctp_auth_destroy_hmacs net/sctp/auth.c:509 [inline] sctp_auth_destroy_hmacs net/sctp/auth.c:501 [inline] sctp_auth_free+0x17e/0x1d0 net/sctp/auth.c:1070 sctp_endpoint_destroy+0x95/0x240 net/sctp/endpointola.c:203 sctp_endpoint_put net/sctp/endpointola.c:236 [inline] sctp_endpoint_free+0xd6/0x110 net/sctp/endpointola.c:183 sctp_destroy_sock+0x9c/0x3c0 net/sctp/socket.c:4981 sctp_v6_destroy_sock+0x11/0x20 net/sctp/socket.c:9415 sk_common_release+0x64/0x390 net/core/sock.c:3254 sctp_close+0x4ce/0x8b0 net/sctp/socket.c:1533 inet_release+0x12e/0x280 net/ipv4/af_inet.c:431 inet6_release+0x4c/0x70 net/ipv6/af_inet6.c:475 __sock_release+0xcd/0x280 net/socket.c:596 sock_close+0x18/0x20 net/socket.c:1277 __fput+0x285/0x920 fs/file_table.c:281 task_work_run+0xdd/0x190 kernel/task_work.c:141 exit_task_work include/linux/task_work.h:25 [inline] do_exit+0xb7d/0x29f0 kernel/exit.c:806 do_group_exit+0x125/0x310 kernel/exit.c:903 __do_sys_exit_group kernel/exit.c:914 [inline] __se_sys_exit_group kernel/exit.c:912 [inline] __x64_sys_exit_group+0x3a/0x50 kernel/exit.c:912 do_syscall_64+0x2d/0x70 arch/x86/entry/common.c:46 entry_SYSCALL_64_after_hwframe+0x44/0xa9 RIP: 0033:0x43f278 Code: Bad RIP value. RSP: 002b:7fffe0995c38 EFLAGS: 0246 ORIG_RAX: 00e7 RAX: ffda RBX: RCX: 0043f278 RDX: RSI: 003c RDI: RBP: 004bf068 R08: 00e7 R09: ffd0 R10: 2000 R11: 0246 R12: 0001 R13: 006d1180 R14: R15: Allocated by task 6874: kasan_save_stack+0x1b/0x40 mm/kasan/common.c:48 kasan_set_track mm/kasan/common.c:56 [inline] __kasan_kmalloc.constprop.0+0xbf/0xd0 mm/kasan/common.c:461 kmem_cache_alloc_trace+0x174/0x300 mm/slab.c:3554 kmalloc include/linux/slab.h:554 [inline] kmalloc_array include/linux/slab.h:593 [inline] kcalloc include/linux/slab.h:605 [inline] sctp_auth_init_hmacs+0xdb/0x3b0 net/sctp/auth.c:464 sctp_auth_init+0x8a/0x4a0 net/sctp/auth.c:1049 sctp_setsockopt_auth_supported net/sctp/socket.c:4354 [inline] sctp_setsockopt+0x477e/0x97f0 net/sctp/socket.c:4631 __sys_setsockopt+0x2db/0x610 net/socket.c:2132 __do_sys_setsockopt net/socket.c:2143 [inline] __se_sys_setsockopt net/socket.c:2140 [inline] __x64_sys_setsockopt+0xba/0x150 net/socket.c:2140 do_syscall_64+0x2d/0x70 arch/x86/entry/common.c:46 entry_SYSCALL_64_after_hwframe+0x44/0xa9 Freed by task 6874: kasan_save_stack+0x1b/0x40 mm/kasan/common.c:48 kasan_set_track+0x1c/0x30 mm/kasan/common.c:56 kasan_set_free_info+0x1b/0x30 mm/kasan/generic.c:355 __kasan_slab_free+0xd8/0x120 mm/kasan/common.c:422 __cache_free mm/slab.c:3422 [inline] kfree+0x10e/0x2b0 mm/slab.c:3760 sctp_auth_destroy_hmacs net/sctp/auth.c:511 [inline] sctp_auth_destroy_hmacs net/sctp/auth.c:501 [inline] sctp_auth_init_hmacs net/sctp/auth.c:496 [inline] sctp_auth_init_hmacs+0x2b7/0x3b0 net/sctp/auth.c:454 sctp_auth_init+0x8a/0x4a0 net/sctp/auth.c:1049 sctp_setsockopt_auth_supported net/sctp/socket.c:4354 [inline] sctp_setsockopt+0x477e/0x97f0 net/sctp/socket.c:4631 __sys_setsockopt+0x2db/0x610 net/socket.c:2132 __do_sys_setsockopt net/socket.c:2143 [inline] __se_sys_setsockopt net/socket.c:2140 [inline] __x64_sys_setsockopt+0xba/0x150 net/socket.c:2140 do_syscall_64+0x2d/0x70 arch/x86/entry/common.c:46 entry_SYSCALL_64_after_hwframe+0x44/0xa9 Fixes: 1f485649f529 ("[SCTP]: Implement SCTP-AUTH internals") Signed-off-by: Eric Dumazet Cc: Vlad Yasevich Cc: Neil Horman Cc: Marcelo Ricardo Leitner Acked-by: Marcelo Ricardo Leitner Signed-off-by: Jakub Kicinski Signed-off-by: Greg Kroah-Hartman --- net/sctp/auth.c |1 + 1 file changed, 1 insertion(+) --- a/net/sctp/auth.c +++ b/net/sctp/auth.c @@ -494,6 +494,7 @@ int sctp_auth_init_hmacs(struct sctp_end out_err: /* Clean up any successful allocations */ sctp_auth_destroy_hmacs(ep->auth_hmacs); + ep->auth_hmacs = NULL; return
[PATCH 5.8 035/124] espintcp: restore IP CB before handing the packet to xfrm
From: Sabrina Dubroca commit 4eb2e13415757a2bce5bb0d580d22bbeef1f5346 upstream. Xiumei reported a bug with espintcp over IPv6 in transport mode, because xfrm6_transport_finish expects to find IP6CB data (struct inet6_skb_cb). Currently, espintcp zeroes the CB, but the relevant part is actually preserved by previous layers (first set up by tcp, then strparser only zeroes a small part of tcp_skb_tb), so we can just relocate it to the start of skb->cb. Fixes: e27cca96cd68 ("xfrm: add espintcp (RFC 8229)") Reported-by: Xiumei Mu Signed-off-by: Sabrina Dubroca Signed-off-by: Steffen Klassert Signed-off-by: Greg Kroah-Hartman --- net/xfrm/espintcp.c |6 +- 1 file changed, 5 insertions(+), 1 deletion(-) --- a/net/xfrm/espintcp.c +++ b/net/xfrm/espintcp.c @@ -29,8 +29,12 @@ static void handle_nonesp(struct espintc static void handle_esp(struct sk_buff *skb, struct sock *sk) { + struct tcp_skb_cb *tcp_cb = (struct tcp_skb_cb *)skb->cb; + skb_reset_transport_header(skb); - memset(skb->cb, 0, sizeof(skb->cb)); + + /* restore IP CB, we need at least IP6CB->nhoff */ + memmove(skb->cb, _cb->header, sizeof(tcp_cb->header)); rcu_read_lock(); skb->dev = dev_get_by_index_rcu(sock_net(sk), skb->skb_iif);
[PATCH 5.8 038/124] i2c: meson: keep peripheral clock enabled
From: Jerome Brunet commit 79e137b1540165f788394658442284d55a858984 upstream. SCL rate appears to be different than what is expected. For example, We get 164kHz on i2c3 of the vim3 when 400kHz is expected. This is partially due to the peripheral clock being disabled when the clock is set. Let's keep the peripheral clock on after probe to fix the problem. This does not affect the SCL output which is still gated when i2c is idle. Fixes: 09af1c2fa490 ("i2c: meson: set clock divider in probe instead of setting it for each transfer") Signed-off-by: Jerome Brunet Signed-off-by: Wolfram Sang Signed-off-by: Greg Kroah-Hartman --- drivers/i2c/busses/i2c-meson.c | 10 +++--- 1 file changed, 3 insertions(+), 7 deletions(-) --- a/drivers/i2c/busses/i2c-meson.c +++ b/drivers/i2c/busses/i2c-meson.c @@ -370,16 +370,12 @@ static int meson_i2c_xfer_messages(struc struct meson_i2c *i2c = adap->algo_data; int i, ret = 0; - clk_enable(i2c->clk); - for (i = 0; i < num; i++) { ret = meson_i2c_xfer_msg(i2c, msgs + i, i == num - 1, atomic); if (ret) break; } - clk_disable(i2c->clk); - return ret ?: i; } @@ -448,7 +444,7 @@ static int meson_i2c_probe(struct platfo return ret; } - ret = clk_prepare(i2c->clk); + ret = clk_prepare_enable(i2c->clk); if (ret < 0) { dev_err(>dev, "can't prepare clock\n"); return ret; @@ -470,7 +466,7 @@ static int meson_i2c_probe(struct platfo ret = i2c_add_adapter(>adap); if (ret < 0) { - clk_unprepare(i2c->clk); + clk_disable_unprepare(i2c->clk); return ret; } @@ -488,7 +484,7 @@ static int meson_i2c_remove(struct platf struct meson_i2c *i2c = platform_get_drvdata(pdev); i2c_del_adapter(>adap); - clk_unprepare(i2c->clk); + clk_disable_unprepare(i2c->clk); return 0; }
[PATCH 5.8 032/124] tcp: use sendpage_ok() to detect misused .sendpage
From: Coly Li commit cf83a17edeeb36195596d2dae060a7c381db35f1 upstream. commit a10674bf2406 ("tcp: detecting the misuse of .sendpage for Slab objects") adds the checks for Slab pages, but the pages don't have page_count are still missing from the check. Network layer's sendpage method is not designed to send page_count 0 pages neither, therefore both PageSlab() and page_count() should be both checked for the sending page. This is exactly what sendpage_ok() does. This patch uses sendpage_ok() in do_tcp_sendpages() to detect misused .sendpage, to make the code more robust. Fixes: a10674bf2406 ("tcp: detecting the misuse of .sendpage for Slab objects") Suggested-by: Eric Dumazet Signed-off-by: Coly Li Cc: Vasily Averin Cc: David S. Miller Cc: sta...@vger.kernel.org Signed-off-by: David S. Miller Signed-off-by: Greg Kroah-Hartman --- net/ipv4/tcp.c |3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) --- a/net/ipv4/tcp.c +++ b/net/ipv4/tcp.c @@ -970,7 +970,8 @@ ssize_t do_tcp_sendpages(struct sock *sk long timeo = sock_sndtimeo(sk, flags & MSG_DONTWAIT); if (IS_ENABLED(CONFIG_DEBUG_VM) && - WARN_ONCE(PageSlab(page), "page must not be a Slab one")) + WARN_ONCE(!sendpage_ok(page), + "page must not be a Slab one and have page_count > 0")) return -EINVAL; /* Wait for a connection to finish. One exception is TCP Fast Open
[PATCH 5.8 042/124] team: set dev->needed_headroom in team_setup_by_port()
From: Eric Dumazet commit 89d01748b2354e210b5d4ea47bc25a42a1b42c82 upstream. Some devices set needed_headroom. If we ignore it, we might end up crashing in various skb_push() for example in ipgre_header() since some layers assume enough headroom has been reserved. Fixes: 1d76efe1577b ("team: add support for non-ethernet devices") Signed-off-by: Eric Dumazet Signed-off-by: David S. Miller Signed-off-by: Greg Kroah-Hartman --- drivers/net/team/team.c |1 + 1 file changed, 1 insertion(+) --- a/drivers/net/team/team.c +++ b/drivers/net/team/team.c @@ -2112,6 +2112,7 @@ static void team_setup_by_port(struct ne dev->header_ops = port_dev->header_ops; dev->type = port_dev->type; dev->hard_header_len = port_dev->hard_header_len; + dev->needed_headroom = port_dev->needed_headroom; dev->addr_len = port_dev->addr_len; dev->mtu = port_dev->mtu; memcpy(dev->broadcast, port_dev->broadcast, port_dev->addr_len);
[Question] About z3fold page migration
Hi Vitaly: Many thanks for your brilliant z3fold code. I am reading it and have some questions about it. It's very nice of you if you can explain it for me. 1.page->private is used in z3fold but PagePrivate flag is never set, should we SetPagePrivate for it? 2.Since PagePrivate flag is never set, why we ClearPagePrivate in free_z3fold_page and z3fold_page_migrate? 3.Should we add page to the unbuddied list in z3fold_page_putback() when zhdr->refcount does not reach 0 since we remove it from unbuddied list in z3fold_page_isolate? Or When we will add page to the unbuddied list after z3fold_page_putback? Thanks a lot in advance. And waiting for your reply. :)
[PATCH 5.8 026/124] nvme-core: put ctrl ref when module ref get fail
From: Chaitanya Kulkarni commit 4bab69093044ca81f394bd0780be1b71c5a4d308 upstream. When try_module_get() fails in the nvme_dev_open() it returns without releasing the ctrl reference which was taken earlier. Put the ctrl reference which is taken before calling the try_module_get() in the error return code path. Fixes: 52a3974feb1a "nvme-core: get/put ctrl and transport module in nvme_dev_open/release()" Signed-off-by: Chaitanya Kulkarni Reviewed-by: Logan Gunthorpe Signed-off-by: Christoph Hellwig Signed-off-by: Greg Kroah-Hartman --- drivers/nvme/host/core.c |4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) --- a/drivers/nvme/host/core.c +++ b/drivers/nvme/host/core.c @@ -3061,8 +3061,10 @@ static int nvme_dev_open(struct inode *i } nvme_get_ctrl(ctrl); - if (!try_module_get(ctrl->ops->module)) + if (!try_module_get(ctrl->ops->module)) { + nvme_put_ctrl(ctrl); return -EINVAL; + } file->private_data = ctrl; return 0;
[PATCH 5.8 028/124] RISC-V: Make sure memblock reserves the memory containing DT
From: Atish Patra commit a78c6f5956a949b496a5b087188dde52483edf51 upstream. Currently, the memory containing DT is not reserved. Thus, that region of memory can be reallocated or reused for other purposes. This may result in corrupted DT for nommu virt board in Qemu. We may not face any issue in kendryte as DT is embedded in the kernel image for that. Fixes: 6bd33e1ece52 ("riscv: add nommu support") Cc: sta...@vger.kernel.org Signed-off-by: Atish Patra Signed-off-by: Palmer Dabbelt Signed-off-by: Greg Kroah-Hartman --- arch/riscv/mm/init.c |1 + 1 file changed, 1 insertion(+) --- a/arch/riscv/mm/init.c +++ b/arch/riscv/mm/init.c @@ -515,6 +515,7 @@ asmlinkage void __init setup_vm(uintptr_ #else dtb_early_va = (void *)dtb_pa; #endif + dtb_early_pa = dtb_pa; } static inline void setup_vm_final(void)
[PATCH 5.8 031/124] net: introduce helper sendpage_ok() in include/linux/net.h
From: Coly Li commit c381b07941adc2274ce552daf86c94701c5e265a upstream. The original problem was from nvme-over-tcp code, who mistakenly uses kernel_sendpage() to send pages allocated by __get_free_pages() without __GFP_COMP flag. Such pages don't have refcount (page_count is 0) on tail pages, sending them by kernel_sendpage() may trigger a kernel panic from a corrupted kernel heap, because these pages are incorrectly freed in network stack as page_count 0 pages. This patch introduces a helper sendpage_ok(), it returns true if the checking page, - is not slab page: PageSlab(page) is false. - has page refcount: page_count(page) is not zero All drivers who want to send page to remote end by kernel_sendpage() may use this helper to check whether the page is OK. If the helper does not return true, the driver should try other non sendpage method (e.g. sock_no_sendpage()) to handle the page. Signed-off-by: Coly Li Cc: Chaitanya Kulkarni Cc: Christoph Hellwig Cc: Hannes Reinecke Cc: Jan Kara Cc: Jens Axboe Cc: Mikhail Skorzhinskii Cc: Philipp Reisner Cc: Sagi Grimberg Cc: Vlastimil Babka Cc: sta...@vger.kernel.org Signed-off-by: David S. Miller Signed-off-by: Greg Kroah-Hartman --- include/linux/net.h | 16 1 file changed, 16 insertions(+) --- a/include/linux/net.h +++ b/include/linux/net.h @@ -21,6 +21,7 @@ #include #include #include +#include #include @@ -290,6 +291,21 @@ do { \ #define net_get_random_once_wait(buf, nbytes) \ get_random_once_wait((buf), (nbytes)) +/* + * E.g. XFS meta- & log-data is in slab pages, or bcache meta + * data pages, or other high order pages allocated by + * __get_free_pages() without __GFP_COMP, which have a page_count + * of 0 and/or have PageSlab() set. We cannot use send_page for + * those, as that does get_page(); put_page(); and would cause + * either a VM_BUG directly, or __page_cache_release a page that + * would actually still be referenced by someone, leading to some + * obscure delayed Oops somewhere else. + */ +static inline bool sendpage_ok(struct page *page) +{ + return !PageSlab(page) && page_count(page) >= 1; +} + int kernel_sendmsg(struct socket *sock, struct msghdr *msg, struct kvec *vec, size_t num, size_t len); int kernel_sendmsg_locked(struct sock *sk, struct msghdr *msg,
[PATCH 5.8 030/124] mm/khugepaged: fix filemap page_to_pgoff(page) != offset
From: Hugh Dickins commit 033b5d77551167f8c24ca862ce83d3e0745f9245 upstream. There have been elusive reports of filemap_fault() hitting its VM_BUG_ON_PAGE(page_to_pgoff(page) != offset, page) on kernels built with CONFIG_READ_ONLY_THP_FOR_FS=y. Suren has hit it on a kernel with CONFIG_READ_ONLY_THP_FOR_FS=y and CONFIG_NUMA is not set: and he has analyzed it down to how khugepaged without NUMA reuses the same huge page after collapse_file() failed (whereas NUMA targets its allocation to the respective node each time). And most of us were usually testing with CONFIG_NUMA=y kernels. collapse_file(old start) new_page = khugepaged_alloc_page(hpage) __SetPageLocked(new_page) new_page->index = start // hpage->index=old offset new_page->mapping = mapping xas_store(, new_page) filemap_fault page = find_get_page(mapping, offset) // if offset falls inside hpage then // compound_head(page) == hpage lock_page_maybe_drop_mmap() __lock_page(page) // collapse fails xas_store(, old page) new_page->mapping = NULL unlock_page(new_page) collapse_file(new start) new_page = khugepaged_alloc_page(hpage) __SetPageLocked(new_page) new_page->index = start // hpage->index=new offset new_page->mapping = mapping // mapping becomes valid again // since compound_head(page) == hpage // page_to_pgoff(page) got changed VM_BUG_ON_PAGE(page_to_pgoff(page) != offset) An initial patch replaced __SetPageLocked() by lock_page(), which did fix the race which Suren illustrates above. But testing showed that it's not good enough: if the racing task's __lock_page() gets delayed long after its find_get_page(), then it may follow collapse_file(new start)'s successful final unlock_page(), and crash on the same VM_BUG_ON_PAGE. It could be fixed by relaxing filemap_fault()'s VM_BUG_ON_PAGE to a check and retry (as is done for mapping), with similar relaxations in find_lock_entry() and pagecache_get_page(): but it's not obvious what else might get caught out; and khugepaged non-NUMA appears to be unique in exposing a page to page cache, then revoking, without going through a full cycle of freeing before reuse. Instead, non-NUMA khugepaged_prealloc_page() release the old page if anyone else has a reference to it (1% of cases when I tested). Although never reported on huge tmpfs, I believe its find_lock_entry() has been at similar risk; but huge tmpfs does not rely on khugepaged for its normal working nearly so much as READ_ONLY_THP_FOR_FS does. Reported-by: Denis Lisov Bugzilla: https://bugzilla.kernel.org/show_bug.cgi?id=206569 Link: https://lore.kernel.org/linux-mm/?q=20200219144635.3b7417145de19b65f258c943%40linux-foundation.org Reported-by: Qian Cai Link: https://lore.kernel.org/linux-xfs/?q=20200616013309.GB815%40lca.pw Reported-and-analyzed-by: Suren Baghdasaryan Fixes: 87c460a0bded ("mm/khugepaged: collapse_shmem() without freezing new_page") Signed-off-by: Hugh Dickins Cc: sta...@vger.kernel.org # v4.9+ Reviewed-by: Matthew Wilcox (Oracle) Signed-off-by: Linus Torvalds Signed-off-by: Greg Kroah-Hartman --- mm/khugepaged.c | 12 1 file changed, 12 insertions(+) --- a/mm/khugepaged.c +++ b/mm/khugepaged.c @@ -914,6 +914,18 @@ static struct page *khugepaged_alloc_hug static bool khugepaged_prealloc_page(struct page **hpage, bool *wait) { + /* +* If the hpage allocated earlier was briefly exposed in page cache +* before collapse_file() failed, it is possible that racing lookups +* have not yet completed, and would then be unpleasantly surprised by +* finding the hpage reused for the same mapping at a different offset. +* Just release the previous allocation if there is any danger of that. +*/ + if (*hpage && page_count(*hpage) > 1) { + put_page(*hpage); + *hpage = NULL; + } + if (!*hpage) *hpage = khugepaged_alloc_hugepage(wait);
[PATCH 5.8 007/124] exfat: fix use of uninitialized spinlock on error path
From: Namjae Jeon commit 8ff006e57ad3a25f909c456d053aa498b6673a39 upstream. syzbot reported warning message: Call Trace: __dump_stack lib/dump_stack.c:77 [inline] dump_stack+0x1d6/0x29e lib/dump_stack.c:118 register_lock_class+0xf06/0x1520 kernel/locking/lockdep.c:893 __lock_acquire+0xfd/0x2ae0 kernel/locking/lockdep.c:4320 lock_acquire+0x148/0x720 kernel/locking/lockdep.c:5029 __raw_spin_lock include/linux/spinlock_api_smp.h:142 [inline] _raw_spin_lock+0x2a/0x40 kernel/locking/spinlock.c:151 spin_lock include/linux/spinlock.h:354 [inline] exfat_cache_inval_inode+0x30/0x280 fs/exfat/cache.c:226 exfat_evict_inode+0x124/0x270 fs/exfat/inode.c:660 evict+0x2bb/0x6d0 fs/inode.c:576 exfat_fill_super+0x1e07/0x27d0 fs/exfat/super.c:681 get_tree_bdev+0x3e9/0x5f0 fs/super.c:1342 vfs_get_tree+0x88/0x270 fs/super.c:1547 do_new_mount fs/namespace.c:2875 [inline] path_mount+0x179d/0x29e0 fs/namespace.c:3192 do_mount fs/namespace.c:3205 [inline] __do_sys_mount fs/namespace.c:3413 [inline] __se_sys_mount+0x126/0x180 fs/namespace.c:3390 do_syscall_64+0x31/0x70 arch/x86/entry/common.c:46 entry_SYSCALL_64_after_hwframe+0x44/0xa9 If exfat_read_root() returns an error, spinlock is used in exfat_evict_inode() without initialization. This patch combines exfat_cache_init_inode() with exfat_inode_init_once() to initialize spinlock by slab constructor. Fixes: c35b6810c495 ("exfat: add exfat cache") Cc: sta...@vger.kernel.org # v5.7+ Reported-by: syzbot Signed-off-by: Namjae Jeon Signed-off-by: Greg Kroah-Hartman --- fs/exfat/cache.c| 11 --- fs/exfat/exfat_fs.h |3 ++- fs/exfat/inode.c|2 -- fs/exfat/super.c|5 - 4 files changed, 6 insertions(+), 15 deletions(-) --- a/fs/exfat/cache.c +++ b/fs/exfat/cache.c @@ -17,7 +17,6 @@ #include "exfat_raw.h" #include "exfat_fs.h" -#define EXFAT_CACHE_VALID 0 #define EXFAT_MAX_CACHE16 struct exfat_cache { @@ -61,16 +60,6 @@ void exfat_cache_shutdown(void) kmem_cache_destroy(exfat_cachep); } -void exfat_cache_init_inode(struct inode *inode) -{ - struct exfat_inode_info *ei = EXFAT_I(inode); - - spin_lock_init(>cache_lru_lock); - ei->nr_caches = 0; - ei->cache_valid_id = EXFAT_CACHE_VALID + 1; - INIT_LIST_HEAD(>cache_lru); -} - static inline struct exfat_cache *exfat_cache_alloc(void) { return kmem_cache_alloc(exfat_cachep, GFP_NOFS); --- a/fs/exfat/exfat_fs.h +++ b/fs/exfat/exfat_fs.h @@ -250,6 +250,8 @@ struct exfat_sb_info { struct rcu_head rcu; }; +#define EXFAT_CACHE_VALID 0 + /* * EXFAT file system inode in-memory data */ @@ -429,7 +431,6 @@ extern const struct dentry_operations ex /* cache.c */ int exfat_cache_init(void); void exfat_cache_shutdown(void); -void exfat_cache_init_inode(struct inode *inode); void exfat_cache_inval_inode(struct inode *inode); int exfat_get_cluster(struct inode *inode, unsigned int cluster, unsigned int *fclus, unsigned int *dclus, --- a/fs/exfat/inode.c +++ b/fs/exfat/inode.c @@ -610,8 +610,6 @@ static int exfat_fill_inode(struct inode ei->i_crtime = info->crtime; inode->i_atime = info->atime; - exfat_cache_init_inode(inode); - return 0; } --- a/fs/exfat/super.c +++ b/fs/exfat/super.c @@ -361,7 +361,6 @@ static int exfat_read_root(struct inode inode->i_mtime = inode->i_atime = inode->i_ctime = ei->i_crtime = current_time(inode); exfat_truncate_atime(>i_atime); - exfat_cache_init_inode(inode); return 0; } @@ -747,6 +746,10 @@ static void exfat_inode_init_once(void * { struct exfat_inode_info *ei = (struct exfat_inode_info *)foo; + spin_lock_init(>cache_lru_lock); + ei->nr_caches = 0; + ei->cache_valid_id = EXFAT_CACHE_VALID + 1; + INIT_LIST_HEAD(>cache_lru); INIT_HLIST_NODE(>i_hash_fat); inode_init_once(>vfs_inode); }
[PATCH 5.8 000/124] 5.8.15-rc1 review
This is the start of the stable review cycle for the 5.8.15 release. There are 124 patches in this series, all will be posted as a response to this one. If anyone has any issues with these being applied, please let me know. Responses should be made by Wed, 14 Oct 2020 13:31:22 +. Anything received after that time might be too late. The whole patch series can be found in one patch at: https://www.kernel.org/pub/linux/kernel/v5.x/stable-review/patch-5.8.15-rc1.gz or in the git tree and branch at: git://git.kernel.org/pub/scm/linux/kernel/git/stable/linux-stable-rc.git linux-5.8.y and the diffstat can be found below. thanks, greg k-h - Pseudo-Shortlog of commits: Greg Kroah-Hartman Linux 5.8.15-rc1 Cong Wang net_sched: commit action insertions together Cong Wang net_sched: defer tcf_idr_insert() in tcf_action_init_1() Manivannan Sadhasivam net: qrtr: ns: Protect radix_tree_deref_slot() using rcu read locks Anant Thazhemadam net: usb: rtl8150: set random MAC address when set_ethernet_addr() fails Xiongfeng Wang Input: ati_remote2 - add missing newlines when printing module parameters Alexey Kardashevskiy tty/vt: Do not warn when huge selection requested Aya Levin net/mlx5e: Fix driver's declaration to support GRE offload Rohit Maheshwari net/tls: race causes kernel panic Nikolay Aleksandrov net: bridge: fdb: don't flush ext_learn entries Guillaume Nault net/core: check length before updating Ethertype in skb_mpls_{push,pop} Johannes Berg netlink: fix policy dump leak Eric Dumazet tcp: fix receive window update in tcp_add_backlog() Vijay Balakrishna mm: khugepaged: recalculate min_free_kbytes after memory hotplug as expected by khugepaged Minchan Kim mm: validate inode in mapping_set_error() Coly Li mmc: core: don't set limits.discard_granularity as 0 Kajol Jain perf: Fix task_function_call() error handling David Howells afs: Fix deadlock between writeback and truncate Vladimir Oltean net: mscc: ocelot: divide watermark value by 60 when writing to SYS_ATOP Maxim Kochetkov net: mscc: ocelot: extend watermark encoding function Vladimir Oltean net: mscc: ocelot: split writes to pause frame enable bit and to thresholds Vladimir Oltean net: mscc: ocelot: rename ocelot_board.c to ocelot_vsc7514.c David Howells rxrpc: Fix server keyring leak David Howells rxrpc: The server keyring isn't network-namespaced David Howells rxrpc: Fix some missing _bh annotations on locking conn->state_lock David Howells rxrpc: Downgrade the BUG() for unsupported token type in rxrpc_read() Marc Dionne rxrpc: Fix rxkad token xdr encoding Tom Rix net: mvneta: fix double free of txq->buf Si-Wei Liu vhost-vdpa: fix page pinning leakage in error path Si-Wei Liu vhost-vdpa: fix vhost_vdpa_map() on error condition Randy Dunlap net: hinic: fix DEVLINK build errors Vineetha G. Jaya Kumaran net: stmmac: Modify configuration method of EEE timers Vlad Buslov net/mlx5e: Fix race condition on nhe->n pointer in neigh update Aya Levin net/mlx5e: Fix VLAN create flow Aya Levin net/mlx5e: Fix VLAN cleanup flow Aya Levin net/mlx5e: Fix return status when setting unsupported FEC mode Aya Levin net/mlx5e: Add resiliency in Striding RQ mode for packets larger than MTU Maor Gottlieb net/mlx5: Fix request_irqs error flow Eran Ben Elisha net/mlx5: Add retry mechanism to the command entry index allocation Eran Ben Elisha net/mlx5: poll cmd EQ in case of command timeout Eran Ben Elisha net/mlx5: Avoid possible free of command entry while timeout comp handler Eran Ben Elisha net/mlx5: Fix a race when moving command interface to polling mode Qian Cai pipe: Fix memory leaks in create_pipe_files() Hariprasad Kelam octeontx2-pf: Fix synchnorization issue in mbox Hariprasad Kelam octeontx2-pf: Fix the device state on error Geetha sowjanya octeontx2-pf: Fix TCP/UDP checksum offload for IPv6 frames Subbaraya Sundeep octeontx2-af: Fix enable/disable of default NPC entries Willy Liu net: phy: realtek: fix rtl8211e rx/tx delay config Tonghao Zhang virtio-net: don't disable guest csum when disable LRO Wilken Gottwalt net: usb: ax88179_178a: fix missing stop entry in driver_info Heiner Kallweit r8169: fix RTL8168f/RTL8411 EPHY config Ido Schimmel mlxsw: spectrum_acl: Fix mlxsw_sp_acl_tcam_group_add()'s error path Randy Dunlap mdio: fix mdio-thunder.c dependency & build error Eric Dumazet bonding: set dev->needed_headroom in bond_setup_by_slave() Ivan Khoronzhuk net: ethernet: cavium: octeon_mgmt: use phy_start and phy_stop Wong Vee Khee net: stmmac: Fix clock handling on remove path Ronak Doshi vmxnet3: fix cksum offload issues for non-udp tunnels Jacob Keller ice: fix memory leak in
[PATCH 5.8 029/124] gpiolib: Disable compat ->read() code in UML case
From: Andy Shevchenko commit 47e538d86d5776ac8152146c3ed3d22326243190 upstream. It appears that UML (arch/um) has no compat.h header defined and hence can't compile a recently provided piece of code in GPIO library. Disable compat ->read() code in UML case to avoid compilation errors. While at it, use pattern which is already being used in the kernel elsewhere. Fixes: 5ad284ab3a01 ("gpiolib: Fix line event handling in syscall compatible mode") Reported-by: Geert Uytterhoeven Signed-off-by: Andy Shevchenko Link: https://lore.kernel.org/r/20201005131044.87276-1-andriy.shevche...@linux.intel.com Signed-off-by: Linus Walleij Signed-off-by: Greg Kroah-Hartman --- drivers/gpio/gpiolib.c |2 +- 1 file changed, 1 insertion(+), 1 deletion(-) --- a/drivers/gpio/gpiolib.c +++ b/drivers/gpio/gpiolib.c @@ -838,7 +838,7 @@ static __poll_t lineevent_poll(struct fi static ssize_t lineevent_get_size(void) { -#ifdef __x86_64__ +#if defined(CONFIG_X86_64) && !defined(CONFIG_UML) /* i386 has no padding after 'id' */ if (in_ia32_syscall()) { struct compat_gpioeevent_data {
[PATCH 5.8 017/124] Platform: OLPC: Fix memleak in olpc_ec_probe
From: Dinghao Liu commit 4fd9ac6bd3044734a7028bd993944c3617d1eede upstream. When devm_regulator_register() fails, ec should be freed just like when olpc_ec_cmd() fails. Fixes: 231c0c216172a ("Platform: OLPC: Add a regulator for the DCON") Signed-off-by: Dinghao Liu Signed-off-by: Andy Shevchenko Signed-off-by: Greg Kroah-Hartman --- drivers/platform/olpc/olpc-ec.c |4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) --- a/drivers/platform/olpc/olpc-ec.c +++ b/drivers/platform/olpc/olpc-ec.c @@ -439,7 +439,9 @@ static int olpc_ec_probe(struct platform ); if (IS_ERR(ec->dcon_rdev)) { dev_err(>dev, "failed to register DCON regulator\n"); - return PTR_ERR(ec->dcon_rdev); + err = PTR_ERR(ec->dcon_rdev); + kfree(ec); + return err; } ec->dbgfs_dir = olpc_ec_setup_debugfs();
[PATCH 5.8 008/124] net: wireless: nl80211: fix out-of-bounds access in nl80211_del_key()
From: Anant Thazhemadam commit 3dc289f8f139997f4e9d3cfccf8738f20d23e47b upstream. In nl80211_parse_key(), key.idx is first initialized as -1. If this value of key.idx remains unmodified and gets returned, and nl80211_key_allowed() also returns 0, then rdev_del_key() gets called with key.idx = -1. This causes an out-of-bounds array access. Handle this issue by checking if the value of key.idx after nl80211_parse_key() is called and return -EINVAL if key.idx < 0. Cc: sta...@vger.kernel.org Reported-by: syzbot+b1bb342d1d097516c...@syzkaller.appspotmail.com Tested-by: syzbot+b1bb342d1d097516c...@syzkaller.appspotmail.com Signed-off-by: Anant Thazhemadam Link: https://lore.kernel.org/r/20201007035401.9522-1-anant.thazhema...@gmail.com Signed-off-by: Johannes Berg Signed-off-by: Greg Kroah-Hartman --- net/wireless/nl80211.c |3 +++ 1 file changed, 3 insertions(+) --- a/net/wireless/nl80211.c +++ b/net/wireless/nl80211.c @@ -4172,6 +4172,9 @@ static int nl80211_del_key(struct sk_buf if (err) return err; + if (key.idx < 0) + return -EINVAL; + if (info->attrs[NL80211_ATTR_MAC]) mac_addr = nla_data(info->attrs[NL80211_ATTR_MAC]);
[PATCH 5.8 027/124] macsec: avoid use-after-free in macsec_handle_frame()
From: Eric Dumazet commit c7cc9200e9b4a2ac172e990ef1975cd42975dad6 upstream. De-referencing skb after call to gro_cells_receive() is not allowed. We need to fetch skb->len earlier. Fixes: 5491e7c6b1a9 ("macsec: enable GRO and RPS on macsec devices") Signed-off-by: Eric Dumazet Cc: Paolo Abeni Acked-by: Paolo Abeni Signed-off-by: Jakub Kicinski Signed-off-by: Greg Kroah-Hartman --- drivers/net/macsec.c |4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) --- a/drivers/net/macsec.c +++ b/drivers/net/macsec.c @@ -1077,6 +1077,7 @@ static rx_handler_result_t macsec_handle struct macsec_rx_sa *rx_sa; struct macsec_rxh_data *rxd; struct macsec_dev *macsec; + unsigned int len; sci_t sci; u32 hdr_pn; bool cbit; @@ -1232,9 +1233,10 @@ deliver: macsec_rxsc_put(rx_sc); skb_orphan(skb); + len = skb->len; ret = gro_cells_receive(>gro_cells, skb); if (ret == NET_RX_SUCCESS) - count_rx(dev, skb->len); + count_rx(dev, len); else macsec->secy.netdev->stats.rx_dropped++;
[PATCH 5.8 004/124] Revert "ravb: Fixed to be able to unload modules"
From: Geert Uytterhoeven commit 77972b55fb9d35d4a6b0abca99abffaa4ec6a85b upstream. This reverts commit 1838d6c62f57836639bd3d83e7855e0ee4f6defc. This commit moved the ravb_mdio_init() call (and thus the of_mdiobus_register() call) from the ravb_probe() to the ravb_open() call. This causes a regression during system resume (s2idle/s2ram), as new PHY devices cannot be bound while suspended. During boot, the Micrel PHY is detected like this: Micrel KSZ9031 Gigabit PHY e680.ethernet-:00: attached PHY driver [Micrel KSZ9031 Gigabit PHY] (mii_bus:phy_addr=e680.ethernet-:00, irq=228) ravb e680.ethernet eth0: Link is Up - 1Gbps/Full - flow control off During system suspend, (A) defer_all_probes is set to true, and (B) usermodehelper_disabled is set to UMH_DISABLED, to avoid drivers being probed while suspended. A. If CONFIG_MODULES=n, phy_device_register() calling device_add() merely adds the device, but does not probe it yet, as really_probe() returns early due to defer_all_probes being set: dpm_resume+0x128/0x4f8 device_resume+0xcc/0x1b0 dpm_run_callback+0x74/0x340 ravb_resume+0x190/0x1b8 ravb_open+0x84/0x770 of_mdiobus_register+0x1e0/0x468 of_mdiobus_register_phy+0x1b8/0x250 of_mdiobus_phy_device_register+0x178/0x1e8 phy_device_register+0x114/0x1b8 device_add+0x3d4/0x798 bus_probe_device+0x98/0xa0 device_initial_probe+0x10/0x18 __device_attach+0xe4/0x140 bus_for_each_drv+0x64/0xc8 __device_attach_driver+0xb8/0xe0 driver_probe_device.part.11+0xc4/0xd8 really_probe+0x32c/0x3b8 Later, phy_attach_direct() notices no PHY driver has been bound, and falls back to the Generic PHY, leading to degraded operation: Generic PHY e680.ethernet-:00: attached PHY driver [Generic PHY] (mii_bus:phy_addr=e680.ethernet-:00, irq=POLL) ravb e680.ethernet eth0: Link is Up - 1Gbps/Full - flow control off B. If CONFIG_MODULES=y, request_module() returns early with -EBUSY due to UMH_DISABLED, and MDIO initialization fails completely: mdio_bus e680.ethernet-:00: error -16 loading PHY driver module for ID 0x00221622 ravb e680.ethernet eth0: failed to initialize MDIO PM: dpm_run_callback(): ravb_resume+0x0/0x1b8 returns -16 PM: Device e680.ethernet failed to resume: error -16 Ignoring -EBUSY in phy_request_driver_module(), like was done for -ENOENT in commit 21e194425abd65b5 ("net: phy: fix issue with loading PHY driver w/o initramfs"), would makes it fall back to the Generic PHY, like in the CONFIG_MODULES=n case. Signed-off-by: Geert Uytterhoeven Cc: sta...@vger.kernel.org Reviewed-by: Sergei Shtylyov Signed-off-by: David S. Miller Signed-off-by: Greg Kroah-Hartman --- drivers/net/ethernet/renesas/ravb_main.c | 110 +++ 1 file changed, 55 insertions(+), 55 deletions(-) --- a/drivers/net/ethernet/renesas/ravb_main.c +++ b/drivers/net/ethernet/renesas/ravb_main.c @@ -1342,51 +1342,6 @@ static inline int ravb_hook_irq(unsigned return error; } -/* MDIO bus init function */ -static int ravb_mdio_init(struct ravb_private *priv) -{ - struct platform_device *pdev = priv->pdev; - struct device *dev = >dev; - int error; - - /* Bitbang init */ - priv->mdiobb.ops = _ops; - - /* MII controller setting */ - priv->mii_bus = alloc_mdio_bitbang(>mdiobb); - if (!priv->mii_bus) - return -ENOMEM; - - /* Hook up MII support for ethtool */ - priv->mii_bus->name = "ravb_mii"; - priv->mii_bus->parent = dev; - snprintf(priv->mii_bus->id, MII_BUS_ID_SIZE, "%s-%x", -pdev->name, pdev->id); - - /* Register MDIO bus */ - error = of_mdiobus_register(priv->mii_bus, dev->of_node); - if (error) - goto out_free_bus; - - return 0; - -out_free_bus: - free_mdio_bitbang(priv->mii_bus); - return error; -} - -/* MDIO bus release function */ -static int ravb_mdio_release(struct ravb_private *priv) -{ - /* Unregister mdio bus */ - mdiobus_unregister(priv->mii_bus); - - /* Free bitbang info */ - free_mdio_bitbang(priv->mii_bus); - - return 0; -} - /* Network device open function for Ethernet AVB */ static int ravb_open(struct net_device *ndev) { @@ -1395,13 +1350,6 @@ static int ravb_open(struct net_device * struct device *dev = >dev; int error; - /* MDIO bus init */ - error = ravb_mdio_init(priv); - if (error) { -
[PATCH 5.8 006/124] crypto: arm64: Use x16 with indirect branch to bti_c
From: Jeremy Linton commit 39e4716caa598a07a98598b2e7cd03055ce25fb9 upstream. The AES code uses a 'br x7' as part of a function called by a macro. That branch needs a bti_j as a target. This results in a panic as seen below. Using x16 (or x17) with an indirect branch keeps the target bti_c. Bad mode in Synchronous Abort handler detected on CPU1, code 0x3403 -- BTI CPU: 1 PID: 265 Comm: cryptomgr_test Not tainted 5.8.11-300.fc33.aarch64 #1 pstate: 20400c05 (nzCv daif +PAN -UAO BTYPE=j-) pc : aesbs_encrypt8+0x0/0x5f0 [aes_neon_bs] lr : aesbs_xts_encrypt+0x48/0xe0 [aes_neon_bs] sp : 80001052b730 aesbs_encrypt8+0x0/0x5f0 [aes_neon_bs] __xts_crypt+0xb0/0x2dc [aes_neon_bs] xts_encrypt+0x28/0x3c [aes_neon_bs] crypto_skcipher_encrypt+0x50/0x84 simd_skcipher_encrypt+0xc8/0xe0 crypto_skcipher_encrypt+0x50/0x84 test_skcipher_vec_cfg+0x224/0x5f0 test_skcipher+0xbc/0x120 alg_test_skcipher+0xa0/0x1b0 alg_test+0x3dc/0x47c cryptomgr_test+0x38/0x60 Fixes: 0e89640b640d ("crypto: arm64 - Use modern annotations for assembly functions") Cc: # 5.6.x- Signed-off-by: Jeremy Linton Suggested-by: Dave P Martin Reviewed-by: Ard Biesheuvel Reviewed-by: Mark Brown Link: https://lore.kernel.org/r/20201006163326.2780619-1-jeremy.lin...@arm.com Signed-off-by: Catalin Marinas Signed-off-by: Greg Kroah-Hartman --- arch/arm64/crypto/aes-neonbs-core.S |4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) --- a/arch/arm64/crypto/aes-neonbs-core.S +++ b/arch/arm64/crypto/aes-neonbs-core.S @@ -788,7 +788,7 @@ SYM_FUNC_START_LOCAL(__xts_crypt8) 0: mov bskey, x21 mov rounds, x22 - br x7 + br x16 SYM_FUNC_END(__xts_crypt8) .macro __xts_crypt, do8, o0, o1, o2, o3, o4, o5, o6, o7 @@ -806,7 +806,7 @@ SYM_FUNC_END(__xts_crypt8) uzp1v30.4s, v30.4s, v25.4s ld1 {v25.16b}, [x24] -99:adr x7, \do8 +99:adr x16, \do8 bl __xts_crypt8 ldp q16, q17, [sp, #.Lframe_local_offset]
[PATCH 5.8 005/124] bpf: Fix scalar32_min_max_or bounds tracking
From: Daniel Borkmann commit 5b9fbeb75b6a98955f628e205ac26689bcb1383e upstream. Simon reported an issue with the current scalar32_min_max_or() implementation. That is, compared to the other 32 bit subreg tracking functions, the code in scalar32_min_max_or() stands out that it's using the 64 bit registers instead of 32 bit ones. This leads to bounds tracking issues, for example: [...] 8: R0=map_value(id=0,off=0,ks=4,vs=48,imm=0) R10=fp0 fp-8= 8: (79) r1 = *(u64 *)(r0 +0) R0=map_value(id=0,off=0,ks=4,vs=48,imm=0) R10=fp0 fp-8= 9: R0=map_value(id=0,off=0,ks=4,vs=48,imm=0) R1_w=inv(id=0) R10=fp0 fp-8= 9: (b7) r0 = 1 10: R0_w=inv1 R1_w=inv(id=0) R10=fp0 fp-8= 10: (18) r2 = 0x60002 12: R0_w=inv1 R1_w=inv(id=0) R2_w=inv25769803778 R10=fp0 fp-8= 12: (ad) if r1 < r2 goto pc+1 R0_w=inv1 R1_w=inv(id=0,umin_value=25769803778) R2_w=inv25769803778 R10=fp0 fp-8= 13: R0_w=inv1 R1_w=inv(id=0,umin_value=25769803778) R2_w=inv25769803778 R10=fp0 fp-8= 13: (95) exit 14: R0_w=inv1 R1_w=inv(id=0,umax_value=25769803777,var_off=(0x0; 0x7)) R2_w=inv25769803778 R10=fp0 fp-8= 14: (25) if r1 > 0x0 goto pc+1 R0_w=inv1 R1_w=inv(id=0,umax_value=0,var_off=(0x0; 0x7fff),u32_max_value=2147483647) R2_w=inv25769803778 R10=fp0 fp-8= 15: R0_w=inv1 R1_w=inv(id=0,umax_value=0,var_off=(0x0; 0x7fff),u32_max_value=2147483647) R2_w=inv25769803778 R10=fp0 fp-8= 15: (95) exit 16: R0_w=inv1 R1_w=inv(id=0,umin_value=1,umax_value=25769803777,var_off=(0x0; 0x77fff),u32_max_value=2147483647) R2_w=inv25769803778 R10=fp0 fp-8= 16: (47) r1 |= 0 17: R0_w=inv1 R1_w=inv(id=0,umin_value=1,umax_value=32212254719,var_off=(0x1; 0x7),s32_max_value=1,u32_max_value=1) R2_w=inv25769803778 R10=fp0 fp-8= [...] The bound tests on the map value force the upper unsigned bound to be 25769803777 in 64 bit (0b111) and then lower one to be 1. By using OR they are truncated and thus result in the range [1,1] for the 32 bit reg tracker. This is incorrect given the only thing we know is that the value must be positive and thus 2147483647 (0b111) at max for the subregs. Fix it by using the {u,s}32_{min,max}_value vars instead. This also makes sense, for example, for the case where we update dst_reg->s32_{min,max}_value in the else branch we need to use the newly computed dst_reg->u32_{min,max}_value as we know that these are positive. Previously, in the else branch the 64 bit values of umin_value=1 and umax_value=32212254719 were used and latter got truncated to be 1 as upper bound there. After the fix the subreg range is now correct: [...] 8: R0=map_value(id=0,off=0,ks=4,vs=48,imm=0) R10=fp0 fp-8= 8: (79) r1 = *(u64 *)(r0 +0) R0=map_value(id=0,off=0,ks=4,vs=48,imm=0) R10=fp0 fp-8= 9: R0=map_value(id=0,off=0,ks=4,vs=48,imm=0) R1_w=inv(id=0) R10=fp0 fp-8= 9: (b7) r0 = 1 10: R0_w=inv1 R1_w=inv(id=0) R10=fp0 fp-8= 10: (18) r2 = 0x60002 12: R0_w=inv1 R1_w=inv(id=0) R2_w=inv25769803778 R10=fp0 fp-8= 12: (ad) if r1 < r2 goto pc+1 R0_w=inv1 R1_w=inv(id=0,umin_value=25769803778) R2_w=inv25769803778 R10=fp0 fp-8= 13: R0_w=inv1 R1_w=inv(id=0,umin_value=25769803778) R2_w=inv25769803778 R10=fp0 fp-8= 13: (95) exit 14: R0_w=inv1 R1_w=inv(id=0,umax_value=25769803777,var_off=(0x0; 0x7)) R2_w=inv25769803778 R10=fp0 fp-8= 14: (25) if r1 > 0x0 goto pc+1 R0_w=inv1 R1_w=inv(id=0,umax_value=0,var_off=(0x0; 0x7fff),u32_max_value=2147483647) R2_w=inv25769803778 R10=fp0 fp-8= 15: R0_w=inv1 R1_w=inv(id=0,umax_value=0,var_off=(0x0; 0x7fff),u32_max_value=2147483647) R2_w=inv25769803778 R10=fp0 fp-8= 15: (95) exit 16: R0_w=inv1 R1_w=inv(id=0,umin_value=1,umax_value=25769803777,var_off=(0x0; 0x77fff),u32_max_value=2147483647) R2_w=inv25769803778 R10=fp0 fp-8= 16: (47) r1 |= 0 17: R0_w=inv1 R1_w=inv(id=0,umin_value=1,umax_value=32212254719,var_off=(0x0; 0x77fff),u32_max_value=2147483647) R2_w=inv25769803778 R10=fp0 fp-8= [...] Fixes: 3f50f132d840 ("bpf: Verifier, do explicit ALU32 bounds tracking") Reported-by: Simon Scannell Signed-off-by: Daniel Borkmann Reviewed-by: John Fastabend Acked-by: Alexei Starovoitov Signed-off-by: Greg Kroah-Hartman --- kernel/bpf/verifier.c |8 1 file changed, 4 insertions(+), 4 deletions(-) --- a/kernel/bpf/verifier.c +++ b/kernel/bpf/verifier.c @@ -5490,8 +5490,8 @@ static void scalar32_min_max_or(struct b bool src_known = tnum_subreg_is_const(src_reg->var_off); bool dst_known = tnum_subreg_is_const(dst_reg->var_off); struct tnum var32_off = tnum_subreg(dst_reg->var_off); - s32 smin_val = src_reg->smin_value; - u32 umin_val = src_reg->umin_value; + s32 smin_val = src_reg->s32_min_value; +