diff --git a/Documentation/robust-futexes.txt b/Documentation/robust-futexes.txt
index 6c42c75103eb..6361fb01c9c1 100644
--- a/Documentation/robust-futexes.txt
+++ b/Documentation/robust-futexes.txt
@@ -218,5 +218,4 @@ All other architectures should build just fine too - but 
they won't have
 the new syscalls yet.
 
 Architectures need to implement the new futex_atomic_cmpxchg_inatomic()
-inline function before writing up the syscalls (that function returns
--ENOSYS right now).
+inline function before writing up the syscalls.
diff --git a/Makefile b/Makefile
index a76c61f77bcd..5412d556b561 100644
--- a/Makefile
+++ b/Makefile
@@ -1,7 +1,7 @@
 # SPDX-License-Identifier: GPL-2.0
 VERSION = 4
 PATCHLEVEL = 19
-SUBLEVEL = 56
+SUBLEVEL = 57
 EXTRAVERSION =
 NAME = "People's Front"
 
diff --git a/arch/arm64/Makefile b/arch/arm64/Makefile
index c12ff63265a9..5d8787f0ca5f 100644
--- a/arch/arm64/Makefile
+++ b/arch/arm64/Makefile
@@ -51,7 +51,7 @@ endif
 
 KBUILD_CFLAGS  += -mgeneral-regs-only $(lseinstr) $(brokengasinst)
 KBUILD_CFLAGS  += -fno-asynchronous-unwind-tables
-KBUILD_CFLAGS  += -Wno-psabi
+KBUILD_CFLAGS  += $(call cc-disable-warning, psabi)
 KBUILD_AFLAGS  += $(lseinstr) $(brokengasinst)
 
 KBUILD_CFLAGS  += $(call cc-option,-mabi=lp64)
diff --git a/arch/arm64/include/asm/futex.h b/arch/arm64/include/asm/futex.h
index c7e30a6ed56e..232917e9c1d9 100644
--- a/arch/arm64/include/asm/futex.h
+++ b/arch/arm64/include/asm/futex.h
@@ -134,7 +134,9 @@ futex_atomic_cmpxchg_inatomic(u32 *uval, u32 __user *_uaddr,
        : "memory");
        uaccess_disable();
 
-       *uval = val;
+       if (!ret)
+               *uval = val;
+
        return ret;
 }
 
diff --git a/arch/arm64/include/asm/insn.h b/arch/arm64/include/asm/insn.h
index c6802dea6cab..310e47d54d81 100644
--- a/arch/arm64/include/asm/insn.h
+++ b/arch/arm64/include/asm/insn.h
@@ -272,6 +272,7 @@ __AARCH64_INSN_FUNCS(adrp,  0x9F000000, 0x90000000)
 __AARCH64_INSN_FUNCS(prfm,     0x3FC00000, 0x39800000)
 __AARCH64_INSN_FUNCS(prfm_lit, 0xFF000000, 0xD8000000)
 __AARCH64_INSN_FUNCS(str_reg,  0x3FE0EC00, 0x38206800)
+__AARCH64_INSN_FUNCS(ldadd,    0x3F20FC00, 0x38200000)
 __AARCH64_INSN_FUNCS(ldr_reg,  0x3FE0EC00, 0x38606800)
 __AARCH64_INSN_FUNCS(ldr_lit,  0xBF000000, 0x18000000)
 __AARCH64_INSN_FUNCS(ldrsw_lit,        0xFF000000, 0x98000000)
@@ -389,6 +390,13 @@ u32 aarch64_insn_gen_load_store_ex(enum 
aarch64_insn_register reg,
                                   enum aarch64_insn_register state,
                                   enum aarch64_insn_size_type size,
                                   enum aarch64_insn_ldst_type type);
+u32 aarch64_insn_gen_ldadd(enum aarch64_insn_register result,
+                          enum aarch64_insn_register address,
+                          enum aarch64_insn_register value,
+                          enum aarch64_insn_size_type size);
+u32 aarch64_insn_gen_stadd(enum aarch64_insn_register address,
+                          enum aarch64_insn_register value,
+                          enum aarch64_insn_size_type size);
 u32 aarch64_insn_gen_add_sub_imm(enum aarch64_insn_register dst,
                                 enum aarch64_insn_register src,
                                 int imm, enum aarch64_insn_variant variant,
diff --git a/arch/arm64/kernel/insn.c b/arch/arm64/kernel/insn.c
index 2b3413549734..3e6229e30109 100644
--- a/arch/arm64/kernel/insn.c
+++ b/arch/arm64/kernel/insn.c
@@ -734,6 +734,46 @@ u32 aarch64_insn_gen_load_store_ex(enum 
aarch64_insn_register reg,
                                            state);
 }
 
+u32 aarch64_insn_gen_ldadd(enum aarch64_insn_register result,
+                          enum aarch64_insn_register address,
+                          enum aarch64_insn_register value,
+                          enum aarch64_insn_size_type size)
+{
+       u32 insn = aarch64_insn_get_ldadd_value();
+
+       switch (size) {
+       case AARCH64_INSN_SIZE_32:
+       case AARCH64_INSN_SIZE_64:
+               break;
+       default:
+               pr_err("%s: unimplemented size encoding %d\n", __func__, size);
+               return AARCH64_BREAK_FAULT;
+       }
+
+       insn = aarch64_insn_encode_ldst_size(size, insn);
+
+       insn = aarch64_insn_encode_register(AARCH64_INSN_REGTYPE_RT, insn,
+                                           result);
+
+       insn = aarch64_insn_encode_register(AARCH64_INSN_REGTYPE_RN, insn,
+                                           address);
+
+       return aarch64_insn_encode_register(AARCH64_INSN_REGTYPE_RS, insn,
+                                           value);
+}
+
+u32 aarch64_insn_gen_stadd(enum aarch64_insn_register address,
+                          enum aarch64_insn_register value,
+                          enum aarch64_insn_size_type size)
+{
+       /*
+        * STADD is simply encoded as an alias for LDADD with XZR as
+        * the destination register.
+        */
+       return aarch64_insn_gen_ldadd(AARCH64_INSN_REG_ZR, address,
+                                     value, size);
+}
+
 static u32 aarch64_insn_encode_prfm_imm(enum aarch64_insn_prfm_type type,
                                        enum aarch64_insn_prfm_target target,
                                        enum aarch64_insn_prfm_policy policy,
diff --git a/arch/arm64/net/bpf_jit.h b/arch/arm64/net/bpf_jit.h
index 6c881659ee8a..76606e87233f 100644
--- a/arch/arm64/net/bpf_jit.h
+++ b/arch/arm64/net/bpf_jit.h
@@ -100,6 +100,10 @@
 #define A64_STXR(sf, Rt, Rn, Rs) \
        A64_LSX(sf, Rt, Rn, Rs, STORE_EX)
 
+/* LSE atomics */
+#define A64_STADD(sf, Rn, Rs) \
+       aarch64_insn_gen_stadd(Rn, Rs, A64_SIZE(sf))
+
 /* Add/subtract (immediate) */
 #define A64_ADDSUB_IMM(sf, Rd, Rn, imm12, type) \
        aarch64_insn_gen_add_sub_imm(Rd, Rn, imm12, \
diff --git a/arch/arm64/net/bpf_jit_comp.c b/arch/arm64/net/bpf_jit_comp.c
index 2eef156b38bb..7f0258ed1f5f 100644
--- a/arch/arm64/net/bpf_jit_comp.c
+++ b/arch/arm64/net/bpf_jit_comp.c
@@ -364,7 +364,7 @@ static int build_insn(const struct bpf_insn *insn, struct 
jit_ctx *ctx)
        const int i = insn - ctx->prog->insnsi;
        const bool is64 = BPF_CLASS(code) == BPF_ALU64;
        const bool isdw = BPF_SIZE(code) == BPF_DW;
-       u8 jmp_cond;
+       u8 jmp_cond, reg;
        s32 jmp_offset;
 
 #define check_imm(bits, imm) do {                              \
@@ -730,18 +730,28 @@ static int build_insn(const struct bpf_insn *insn, struct 
jit_ctx *ctx)
                        break;
                }
                break;
+
        /* STX XADD: lock *(u32 *)(dst + off) += src */
        case BPF_STX | BPF_XADD | BPF_W:
        /* STX XADD: lock *(u64 *)(dst + off) += src */
        case BPF_STX | BPF_XADD | BPF_DW:
-               emit_a64_mov_i(1, tmp, off, ctx);
-               emit(A64_ADD(1, tmp, tmp, dst), ctx);
-               emit(A64_LDXR(isdw, tmp2, tmp), ctx);
-               emit(A64_ADD(isdw, tmp2, tmp2, src), ctx);
-               emit(A64_STXR(isdw, tmp2, tmp, tmp3), ctx);
-               jmp_offset = -3;
-               check_imm19(jmp_offset);
-               emit(A64_CBNZ(0, tmp3, jmp_offset), ctx);
+               if (!off) {
+                       reg = dst;
+               } else {
+                       emit_a64_mov_i(1, tmp, off, ctx);
+                       emit(A64_ADD(1, tmp, tmp, dst), ctx);
+                       reg = tmp;
+               }
+               if (cpus_have_cap(ARM64_HAS_LSE_ATOMICS)) {
+                       emit(A64_STADD(isdw, reg, src), ctx);
+               } else {
+                       emit(A64_LDXR(isdw, tmp2, reg), ctx);
+                       emit(A64_ADD(isdw, tmp2, tmp2, src), ctx);
+                       emit(A64_STXR(isdw, tmp2, reg, tmp3), ctx);
+                       jmp_offset = -3;
+                       check_imm19(jmp_offset);
+                       emit(A64_CBNZ(0, tmp3, jmp_offset), ctx);
+               }
                break;
 
        default:
diff --git a/arch/mips/include/asm/mips-gic.h b/arch/mips/include/asm/mips-gic.h
index 558059a8f218..0277b56157af 100644
--- a/arch/mips/include/asm/mips-gic.h
+++ b/arch/mips/include/asm/mips-gic.h
@@ -314,6 +314,36 @@ static inline bool mips_gic_present(void)
        return IS_ENABLED(CONFIG_MIPS_GIC) && mips_gic_base;
 }
 
+/**
+ * mips_gic_vx_map_reg() - Return GIC_Vx_<intr>_MAP register offset
+ * @intr: A GIC local interrupt
+ *
+ * Determine the index of the GIC_VL_<intr>_MAP or GIC_VO_<intr>_MAP register
+ * within the block of GIC map registers. This is almost the same as the order
+ * of interrupts in the pending & mask registers, as used by enum
+ * mips_gic_local_interrupt, but moves the FDC interrupt & thus offsets the
+ * interrupts after it...
+ *
+ * Return: The map register index corresponding to @intr.
+ *
+ * The return value is suitable for use with the (read|write)_gic_v[lo]_map
+ * accessor functions.
+ */
+static inline unsigned int
+mips_gic_vx_map_reg(enum mips_gic_local_interrupt intr)
+{
+       /* WD, Compare & Timer are 1:1 */
+       if (intr <= GIC_LOCAL_INT_TIMER)
+               return intr;
+
+       /* FDC moves to after Timer... */
+       if (intr == GIC_LOCAL_INT_FDC)
+               return GIC_LOCAL_INT_TIMER + 1;
+
+       /* As a result everything else is offset by 1 */
+       return intr + 1;
+}
+
 /**
  * gic_get_c0_compare_int() - Return cp0 count/compare interrupt virq
  *
diff --git a/arch/x86/kernel/cpu/bugs.c b/arch/x86/kernel/cpu/bugs.c
index 9b096f26d1c8..a5cde748cf76 100644
--- a/arch/x86/kernel/cpu/bugs.c
+++ b/arch/x86/kernel/cpu/bugs.c
@@ -820,6 +820,16 @@ static enum ssb_mitigation __init 
__ssb_select_mitigation(void)
                break;
        }
 
+       /*
+        * If SSBD is controlled by the SPEC_CTRL MSR, then set the proper
+        * bit in the mask to allow guests to use the mitigation even in the
+        * case where the host does not enable it.
+        */
+       if (static_cpu_has(X86_FEATURE_SPEC_CTRL_SSBD) ||
+           static_cpu_has(X86_FEATURE_AMD_SSBD)) {
+               x86_spec_ctrl_mask |= SPEC_CTRL_SSBD;
+       }
+
        /*
         * We have three CPU feature flags that are in play here:
         *  - X86_BUG_SPEC_STORE_BYPASS - CPU is susceptible.
@@ -837,7 +847,6 @@ static enum ssb_mitigation __init 
__ssb_select_mitigation(void)
                        x86_amd_ssb_disable();
                } else {
                        x86_spec_ctrl_base |= SPEC_CTRL_SSBD;
-                       x86_spec_ctrl_mask |= SPEC_CTRL_SSBD;
                        wrmsrl(MSR_IA32_SPEC_CTRL, x86_spec_ctrl_base);
                }
        }
diff --git a/arch/x86/kernel/cpu/intel_rdt_rdtgroup.c 
b/arch/x86/kernel/cpu/intel_rdt_rdtgroup.c
index 274d220d0a83..2013699a5c54 100644
--- a/arch/x86/kernel/cpu/intel_rdt_rdtgroup.c
+++ b/arch/x86/kernel/cpu/intel_rdt_rdtgroup.c
@@ -792,8 +792,12 @@ static int rdt_bit_usage_show(struct kernfs_open_file *of,
                              struct seq_file *seq, void *v)
 {
        struct rdt_resource *r = of->kn->parent->priv;
-       u32 sw_shareable = 0, hw_shareable = 0;
-       u32 exclusive = 0, pseudo_locked = 0;
+       /*
+        * Use unsigned long even though only 32 bits are used to ensure
+        * test_bit() is used safely.
+        */
+       unsigned long sw_shareable = 0, hw_shareable = 0;
+       unsigned long exclusive = 0, pseudo_locked = 0;
        struct rdt_domain *dom;
        int i, hwb, swb, excl, psl;
        enum rdtgrp_mode mode;
@@ -838,10 +842,10 @@ static int rdt_bit_usage_show(struct kernfs_open_file *of,
                }
                for (i = r->cache.cbm_len - 1; i >= 0; i--) {
                        pseudo_locked = dom->plr ? dom->plr->cbm : 0;
-                       hwb = test_bit(i, (unsigned long *)&hw_shareable);
-                       swb = test_bit(i, (unsigned long *)&sw_shareable);
-                       excl = test_bit(i, (unsigned long *)&exclusive);
-                       psl = test_bit(i, (unsigned long *)&pseudo_locked);
+                       hwb = test_bit(i, &hw_shareable);
+                       swb = test_bit(i, &sw_shareable);
+                       excl = test_bit(i, &exclusive);
+                       psl = test_bit(i, &pseudo_locked);
                        if (hwb && swb)
                                seq_putc(seq, 'X');
                        else if (hwb && !swb)
@@ -2320,26 +2324,19 @@ static int mkdir_mondata_all(struct kernfs_node 
*parent_kn,
  */
 static void cbm_ensure_valid(u32 *_val, struct rdt_resource *r)
 {
-       /*
-        * Convert the u32 _val to an unsigned long required by all the bit
-        * operations within this function. No more than 32 bits of this
-        * converted value can be accessed because all bit operations are
-        * additionally provided with cbm_len that is initialized during
-        * hardware enumeration using five bits from the EAX register and
-        * thus never can exceed 32 bits.
-        */
-       unsigned long *val = (unsigned long *)_val;
+       unsigned long val = *_val;
        unsigned int cbm_len = r->cache.cbm_len;
        unsigned long first_bit, zero_bit;
 
-       if (*val == 0)
+       if (val == 0)
                return;
 
-       first_bit = find_first_bit(val, cbm_len);
-       zero_bit = find_next_zero_bit(val, cbm_len, first_bit);
+       first_bit = find_first_bit(&val, cbm_len);
+       zero_bit = find_next_zero_bit(&val, cbm_len, first_bit);
 
        /* Clear any remaining bits to ensure contiguous region */
-       bitmap_clear(val, zero_bit, cbm_len - zero_bit);
+       bitmap_clear(&val, zero_bit, cbm_len - zero_bit);
+       *_val = (u32)val;
 }
 
 /**
diff --git a/arch/x86/kernel/cpu/microcode/core.c 
b/arch/x86/kernel/cpu/microcode/core.c
index b7027e667604..a96091d44a45 100644
--- a/arch/x86/kernel/cpu/microcode/core.c
+++ b/arch/x86/kernel/cpu/microcode/core.c
@@ -790,13 +790,16 @@ static struct syscore_ops mc_syscore_ops = {
        .resume                 = mc_bp_resume,
 };
 
-static int mc_cpu_online(unsigned int cpu)
+static int mc_cpu_starting(unsigned int cpu)
 {
-       struct device *dev;
-
-       dev = get_cpu_device(cpu);
        microcode_update_cpu(cpu);
        pr_debug("CPU%d added\n", cpu);
+       return 0;
+}
+
+static int mc_cpu_online(unsigned int cpu)
+{
+       struct device *dev = get_cpu_device(cpu);
 
        if (sysfs_create_group(&dev->kobj, &mc_attr_group))
                pr_err("Failed to create group for CPU%d\n", cpu);
@@ -873,7 +876,9 @@ int __init microcode_init(void)
                goto out_ucode_group;
 
        register_syscore_ops(&mc_syscore_ops);
-       cpuhp_setup_state_nocalls(CPUHP_AP_MICROCODE_LOADER, 
"x86/microcode:online",
+       cpuhp_setup_state_nocalls(CPUHP_AP_MICROCODE_LOADER, 
"x86/microcode:starting",
+                                 mc_cpu_starting, NULL);
+       cpuhp_setup_state_nocalls(CPUHP_AP_ONLINE_DYN, "x86/microcode:online",
                                  mc_cpu_online, mc_cpu_down_prep);
 
        pr_info("Microcode Update Driver: v%s.", DRIVER_VERSION);
diff --git a/arch/x86/kvm/mmu.c b/arch/x86/kvm/mmu.c
index 779ed52047d1..e0f982e35c96 100644
--- a/arch/x86/kvm/mmu.c
+++ b/arch/x86/kvm/mmu.c
@@ -5386,7 +5386,16 @@ static int alloc_mmu_pages(struct kvm_vcpu *vcpu)
        struct page *page;
        int i;
 
-       if (tdp_enabled)
+       /*
+        * When using PAE paging, the four PDPTEs are treated as 'root' pages,
+        * while the PDP table is a per-vCPU construct that's allocated at MMU
+        * creation.  When emulating 32-bit mode, cr3 is only 32 bits even on
+        * x86_64.  Therefore we need to allocate the PDP table in the first
+        * 4GB of memory, which happens to fit the DMA32 zone.  Except for
+        * SVM's 32-bit NPT support, TDP paging doesn't use PAE paging and can
+        * skip allocating the PDP table.
+        */
+       if (tdp_enabled && kvm_x86_ops->get_tdp_level(vcpu) > PT32E_ROOT_LEVEL)
                return 0;
 
        /*
diff --git a/drivers/clk/socfpga/clk-s10.c b/drivers/clk/socfpga/clk-s10.c
index 8281dfbf38c2..5bed36e12951 100644
--- a/drivers/clk/socfpga/clk-s10.c
+++ b/drivers/clk/socfpga/clk-s10.c
@@ -103,9 +103,9 @@ static const struct stratix10_perip_cnt_clock 
s10_main_perip_cnt_clks[] = {
        { STRATIX10_NOC_CLK, "noc_clk", NULL, noc_mux, ARRAY_SIZE(noc_mux),
          0, 0, 0, 0x3C, 1},
        { STRATIX10_EMAC_A_FREE_CLK, "emaca_free_clk", NULL, emaca_free_mux, 
ARRAY_SIZE(emaca_free_mux),
-         0, 0, 4, 0xB0, 0},
+         0, 0, 2, 0xB0, 0},
        { STRATIX10_EMAC_B_FREE_CLK, "emacb_free_clk", NULL, emacb_free_mux, 
ARRAY_SIZE(emacb_free_mux),
-         0, 0, 4, 0xB0, 1},
+         0, 0, 2, 0xB0, 1},
        { STRATIX10_EMAC_PTP_FREE_CLK, "emac_ptp_free_clk", NULL, 
emac_ptp_free_mux,
          ARRAY_SIZE(emac_ptp_free_mux), 0, 0, 4, 0xB0, 2},
        { STRATIX10_GPIO_DB_FREE_CLK, "gpio_db_free_clk", NULL, 
gpio_db_free_mux,
diff --git a/drivers/infiniband/core/addr.c b/drivers/infiniband/core/addr.c
index 46b855a42884..9e5f70e7122a 100644
--- a/drivers/infiniband/core/addr.c
+++ b/drivers/infiniband/core/addr.c
@@ -716,22 +716,22 @@ int rdma_addr_find_l2_eth_by_grh(const union ib_gid *sgid,
        struct rdma_dev_addr dev_addr;
        struct resolve_cb_context ctx;
        union {
-               struct sockaddr     _sockaddr;
                struct sockaddr_in  _sockaddr_in;
                struct sockaddr_in6 _sockaddr_in6;
        } sgid_addr, dgid_addr;
        int ret;
 
-       rdma_gid2ip(&sgid_addr._sockaddr, sgid);
-       rdma_gid2ip(&dgid_addr._sockaddr, dgid);
+       rdma_gid2ip((struct sockaddr *)&sgid_addr, sgid);
+       rdma_gid2ip((struct sockaddr *)&dgid_addr, dgid);
 
        memset(&dev_addr, 0, sizeof(dev_addr));
        dev_addr.bound_dev_if = ndev->ifindex;
        dev_addr.net = &init_net;
 
        init_completion(&ctx.comp);
-       ret = rdma_resolve_ip(&sgid_addr._sockaddr, &dgid_addr._sockaddr,
-                             &dev_addr, 1000, resolve_cb, &ctx);
+       ret = rdma_resolve_ip((struct sockaddr *)&sgid_addr,
+                             (struct sockaddr *)&dgid_addr, &dev_addr, 1000,
+                             resolve_cb, &ctx);
        if (ret)
                return ret;
 
diff --git a/drivers/infiniband/hw/hfi1/user_sdma.c 
b/drivers/infiniband/hw/hfi1/user_sdma.c
index 51831bfbf90f..cbff746d9e9d 100644
--- a/drivers/infiniband/hw/hfi1/user_sdma.c
+++ b/drivers/infiniband/hw/hfi1/user_sdma.c
@@ -132,25 +132,22 @@ static int defer_packet_queue(
        struct hfi1_user_sdma_pkt_q *pq =
                container_of(wait, struct hfi1_user_sdma_pkt_q, busy);
        struct hfi1_ibdev *dev = &pq->dd->verbs_dev;
-       struct user_sdma_txreq *tx =
-               container_of(txreq, struct user_sdma_txreq, txreq);
 
-       if (sdma_progress(sde, seq, txreq)) {
-               if (tx->busycount++ < MAX_DEFER_RETRY_COUNT)
-                       goto eagain;
-       }
+       write_seqlock(&dev->iowait_lock);
+       if (sdma_progress(sde, seq, txreq))
+               goto eagain;
        /*
         * We are assuming that if the list is enqueued somewhere, it
         * is to the dmawait list since that is the only place where
         * it is supposed to be enqueued.
         */
        xchg(&pq->state, SDMA_PKT_Q_DEFERRED);
-       write_seqlock(&dev->iowait_lock);
        if (list_empty(&pq->busy.list))
                iowait_queue(pkts_sent, &pq->busy, &sde->dmawait);
        write_sequnlock(&dev->iowait_lock);
        return -EBUSY;
 eagain:
+       write_sequnlock(&dev->iowait_lock);
        return -EAGAIN;
 }
 
@@ -803,7 +800,6 @@ static int user_sdma_send_pkts(struct user_sdma_request 
*req, unsigned maxpkts)
 
                tx->flags = 0;
                tx->req = req;
-               tx->busycount = 0;
                INIT_LIST_HEAD(&tx->list);
 
                /*
diff --git a/drivers/infiniband/hw/hfi1/user_sdma.h 
b/drivers/infiniband/hw/hfi1/user_sdma.h
index 91c343f91776..2c056702d975 100644
--- a/drivers/infiniband/hw/hfi1/user_sdma.h
+++ b/drivers/infiniband/hw/hfi1/user_sdma.h
@@ -245,7 +245,6 @@ struct user_sdma_txreq {
        struct list_head list;
        struct user_sdma_request *req;
        u16 flags;
-       unsigned int busycount;
        u64 seqnum;
 };
 
diff --git a/drivers/infiniband/hw/ocrdma/ocrdma_ah.c 
b/drivers/infiniband/hw/ocrdma/ocrdma_ah.c
index 58188fe5aed2..32aaa4ef481c 100644
--- a/drivers/infiniband/hw/ocrdma/ocrdma_ah.c
+++ b/drivers/infiniband/hw/ocrdma/ocrdma_ah.c
@@ -83,7 +83,6 @@ static inline int set_av_attr(struct ocrdma_dev *dev, struct 
ocrdma_ah *ah,
        struct iphdr ipv4;
        const struct ib_global_route *ib_grh;
        union {
-               struct sockaddr     _sockaddr;
                struct sockaddr_in  _sockaddr_in;
                struct sockaddr_in6 _sockaddr_in6;
        } sgid_addr, dgid_addr;
@@ -133,9 +132,9 @@ static inline int set_av_attr(struct ocrdma_dev *dev, 
struct ocrdma_ah *ah,
                ipv4.tot_len = htons(0);
                ipv4.ttl = ib_grh->hop_limit;
                ipv4.protocol = nxthdr;
-               rdma_gid2ip(&sgid_addr._sockaddr, sgid);
+               rdma_gid2ip((struct sockaddr *)&sgid_addr, sgid);
                ipv4.saddr = sgid_addr._sockaddr_in.sin_addr.s_addr;
-               rdma_gid2ip(&dgid_addr._sockaddr, &ib_grh->dgid);
+               rdma_gid2ip((struct sockaddr*)&dgid_addr, &ib_grh->dgid);
                ipv4.daddr = dgid_addr._sockaddr_in.sin_addr.s_addr;
                memcpy((u8 *)ah->av + eth_sz, &ipv4, sizeof(struct iphdr));
        } else {
diff --git a/drivers/infiniband/hw/ocrdma/ocrdma_hw.c 
b/drivers/infiniband/hw/ocrdma/ocrdma_hw.c
index e578281471af..28181f01734c 100644
--- a/drivers/infiniband/hw/ocrdma/ocrdma_hw.c
+++ b/drivers/infiniband/hw/ocrdma/ocrdma_hw.c
@@ -2499,7 +2499,6 @@ static int ocrdma_set_av_params(struct ocrdma_qp *qp,
        u32 vlan_id = 0xFFFF;
        u8 mac_addr[6], hdr_type;
        union {
-               struct sockaddr     _sockaddr;
                struct sockaddr_in  _sockaddr_in;
                struct sockaddr_in6 _sockaddr_in6;
        } sgid_addr, dgid_addr;
@@ -2541,8 +2540,8 @@ static int ocrdma_set_av_params(struct ocrdma_qp *qp,
 
        hdr_type = rdma_gid_attr_network_type(sgid_attr);
        if (hdr_type == RDMA_NETWORK_IPV4) {
-               rdma_gid2ip(&sgid_addr._sockaddr, &sgid_attr->gid);
-               rdma_gid2ip(&dgid_addr._sockaddr, &grh->dgid);
+               rdma_gid2ip((struct sockaddr *)&sgid_addr, &sgid_attr->gid);
+               rdma_gid2ip((struct sockaddr *)&dgid_addr, &grh->dgid);
                memcpy(&cmd->params.dgid[0],
                       &dgid_addr._sockaddr_in.sin_addr.s_addr, 4);
                memcpy(&cmd->params.sgid[0],
diff --git a/drivers/irqchip/irq-mips-gic.c b/drivers/irqchip/irq-mips-gic.c
index d32268cc1174..f3985469c221 100644
--- a/drivers/irqchip/irq-mips-gic.c
+++ b/drivers/irqchip/irq-mips-gic.c
@@ -388,7 +388,7 @@ static void gic_all_vpes_irq_cpu_online(struct irq_data *d)
        intr = GIC_HWIRQ_TO_LOCAL(d->hwirq);
        cd = irq_data_get_irq_chip_data(d);
 
-       write_gic_vl_map(intr, cd->map);
+       write_gic_vl_map(mips_gic_vx_map_reg(intr), cd->map);
        if (cd->mask)
                write_gic_vl_smask(BIT(intr));
 }
@@ -517,7 +517,7 @@ static int gic_irq_domain_map(struct irq_domain *d, 
unsigned int virq,
        spin_lock_irqsave(&gic_lock, flags);
        for_each_online_cpu(cpu) {
                write_gic_vl_other(mips_cm_vp_id(cpu));
-               write_gic_vo_map(intr, map);
+               write_gic_vo_map(mips_gic_vx_map_reg(intr), map);
        }
        spin_unlock_irqrestore(&gic_lock, flags);
 
diff --git a/drivers/md/dm-log-writes.c b/drivers/md/dm-log-writes.c
index 9ea2b0291f20..e549392e0ea5 100644
--- a/drivers/md/dm-log-writes.c
+++ b/drivers/md/dm-log-writes.c
@@ -60,6 +60,7 @@
 
 #define WRITE_LOG_VERSION 1ULL
 #define WRITE_LOG_MAGIC 0x6a736677736872ULL
+#define WRITE_LOG_SUPER_SECTOR 0
 
 /*
  * The disk format for this is braindead simple.
@@ -115,6 +116,7 @@ struct log_writes_c {
        struct list_head logging_blocks;
        wait_queue_head_t wait;
        struct task_struct *log_kthread;
+       struct completion super_done;
 };
 
 struct pending_block {
@@ -180,6 +182,14 @@ static void log_end_io(struct bio *bio)
        bio_put(bio);
 }
 
+static void log_end_super(struct bio *bio)
+{
+       struct log_writes_c *lc = bio->bi_private;
+
+       complete(&lc->super_done);
+       log_end_io(bio);
+}
+
 /*
  * Meant to be called if there is an error, it will free all the pages
  * associated with the block.
@@ -215,7 +225,8 @@ static int write_metadata(struct log_writes_c *lc, void 
*entry,
        bio->bi_iter.bi_size = 0;
        bio->bi_iter.bi_sector = sector;
        bio_set_dev(bio, lc->logdev->bdev);
-       bio->bi_end_io = log_end_io;
+       bio->bi_end_io = (sector == WRITE_LOG_SUPER_SECTOR) ?
+                         log_end_super : log_end_io;
        bio->bi_private = lc;
        bio_set_op_attrs(bio, REQ_OP_WRITE, 0);
 
@@ -418,11 +429,18 @@ static int log_super(struct log_writes_c *lc)
        super.nr_entries = cpu_to_le64(lc->logged_entries);
        super.sectorsize = cpu_to_le32(lc->sectorsize);
 
-       if (write_metadata(lc, &super, sizeof(super), NULL, 0, 0)) {
+       if (write_metadata(lc, &super, sizeof(super), NULL, 0,
+                          WRITE_LOG_SUPER_SECTOR)) {
                DMERR("Couldn't write super");
                return -1;
        }
 
+       /*
+        * Super sector should be writen in-order, otherwise the
+        * nr_entries could be rewritten incorrectly by an old bio.
+        */
+       wait_for_completion_io(&lc->super_done);
+
        return 0;
 }
 
@@ -531,6 +549,7 @@ static int log_writes_ctr(struct dm_target *ti, unsigned 
int argc, char **argv)
        INIT_LIST_HEAD(&lc->unflushed_blocks);
        INIT_LIST_HEAD(&lc->logging_blocks);
        init_waitqueue_head(&lc->wait);
+       init_completion(&lc->super_done);
        atomic_set(&lc->io_blocks, 0);
        atomic_set(&lc->pending_blocks, 0);
 
diff --git a/drivers/misc/eeprom/at24.c b/drivers/misc/eeprom/at24.c
index 94836fcbe721..ddfcf4ade7bf 100644
--- a/drivers/misc/eeprom/at24.c
+++ b/drivers/misc/eeprom/at24.c
@@ -106,23 +106,6 @@ static unsigned int at24_write_timeout = 25;
 module_param_named(write_timeout, at24_write_timeout, uint, 0);
 MODULE_PARM_DESC(at24_write_timeout, "Time (in ms) to try writes (default 
25)");
 
-/*
- * Both reads and writes fail if the previous write didn't complete yet. This
- * macro loops a few times waiting at least long enough for one entire page
- * write to work while making sure that at least one iteration is run before
- * checking the break condition.
- *
- * It takes two parameters: a variable in which the future timeout in jiffies
- * will be stored and a temporary variable holding the time of the last
- * iteration of processing the request. Both should be unsigned integers
- * holding at least 32 bits.
- */
-#define at24_loop_until_timeout(tout, op_time)                         \
-       for (tout = jiffies + msecs_to_jiffies(at24_write_timeout),     \
-            op_time = 0;                                               \
-            op_time ? time_before(op_time, tout) : true;               \
-            usleep_range(1000, 1500), op_time = jiffies)
-
 struct at24_chip_data {
        /*
         * these fields mirror their equivalents in
@@ -311,13 +294,22 @@ static ssize_t at24_regmap_read(struct at24_data *at24, 
char *buf,
        /* adjust offset for mac and serial read ops */
        offset += at24->offset_adj;
 
-       at24_loop_until_timeout(timeout, read_time) {
+       timeout = jiffies + msecs_to_jiffies(at24_write_timeout);
+       do {
+               /*
+                * The timestamp shall be taken before the actual operation
+                * to avoid a premature timeout in case of high CPU load.
+                */
+               read_time = jiffies;
+
                ret = regmap_bulk_read(regmap, offset, buf, count);
                dev_dbg(&client->dev, "read %zu@%d --> %d (%ld)\n",
                        count, offset, ret, jiffies);
                if (!ret)
                        return count;
-       }
+
+               usleep_range(1000, 1500);
+       } while (time_before(read_time, timeout));
 
        return -ETIMEDOUT;
 }
@@ -361,14 +353,23 @@ static ssize_t at24_regmap_write(struct at24_data *at24, 
const char *buf,
        regmap = at24_client->regmap;
        client = at24_client->client;
        count = at24_adjust_write_count(at24, offset, count);
+       timeout = jiffies + msecs_to_jiffies(at24_write_timeout);
+
+       do {
+               /*
+                * The timestamp shall be taken before the actual operation
+                * to avoid a premature timeout in case of high CPU load.
+                */
+               write_time = jiffies;
 
-       at24_loop_until_timeout(timeout, write_time) {
                ret = regmap_bulk_write(regmap, offset, buf, count);
                dev_dbg(&client->dev, "write %zu@%d --> %d (%ld)\n",
                        count, offset, ret, jiffies);
                if (!ret)
                        return count;
-       }
+
+               usleep_range(1000, 1500);
+       } while (time_before(write_time, timeout));
 
        return -ETIMEDOUT;
 }
diff --git a/drivers/net/bonding/bond_main.c b/drivers/net/bonding/bond_main.c
index 039beb5e0fa2..7e162fff01ab 100644
--- a/drivers/net/bonding/bond_main.c
+++ b/drivers/net/bonding/bond_main.c
@@ -4307,12 +4307,12 @@ void bond_setup(struct net_device *bond_dev)
        bond_dev->features |= NETIF_F_NETNS_LOCAL;
 
        bond_dev->hw_features = BOND_VLAN_FEATURES |
-                               NETIF_F_HW_VLAN_CTAG_TX |
                                NETIF_F_HW_VLAN_CTAG_RX |
                                NETIF_F_HW_VLAN_CTAG_FILTER;
 
        bond_dev->hw_features |= NETIF_F_GSO_ENCAP_ALL | NETIF_F_GSO_UDP_L4;
        bond_dev->features |= bond_dev->hw_features;
+       bond_dev->features |= NETIF_F_HW_VLAN_CTAG_TX | NETIF_F_HW_VLAN_STAG_TX;
 }
 
 /* Destroy a bonding device.
diff --git a/drivers/net/ethernet/stmicro/stmmac/stmmac_hwtstamp.c 
b/drivers/net/ethernet/stmicro/stmmac/stmmac_hwtstamp.c
index 8d9cc2157afd..7423262ce590 100644
--- a/drivers/net/ethernet/stmicro/stmmac/stmmac_hwtstamp.c
+++ b/drivers/net/ethernet/stmicro/stmmac/stmmac_hwtstamp.c
@@ -122,7 +122,7 @@ static int adjust_systime(void __iomem *ioaddr, u32 sec, 
u32 nsec,
                 * programmed with (2^32 – <new_sec_value>)
                 */
                if (gmac4)
-                       sec = (100000000ULL - sec);
+                       sec = -sec;
 
                value = readl(ioaddr + PTP_TCR);
                if (value & PTP_TCR_TSCTRLSSR)
diff --git a/drivers/net/ethernet/stmicro/stmmac/stmmac_main.c 
b/drivers/net/ethernet/stmicro/stmmac/stmmac_main.c
index 45e64d71a93f..5c18874614ba 100644
--- a/drivers/net/ethernet/stmicro/stmmac/stmmac_main.c
+++ b/drivers/net/ethernet/stmicro/stmmac/stmmac_main.c
@@ -2938,12 +2938,15 @@ static netdev_tx_t stmmac_tso_xmit(struct sk_buff *skb, 
struct net_device *dev)
 
        /* Manage tx mitigation */
        tx_q->tx_count_frames += nfrags + 1;
-       if (priv->tx_coal_frames <= tx_q->tx_count_frames) {
+       if (likely(priv->tx_coal_frames > tx_q->tx_count_frames) &&
+           !(priv->synopsys_id >= DWMAC_CORE_4_00 &&
+           (skb_shinfo(skb)->tx_flags & SKBTX_HW_TSTAMP) &&
+           priv->hwts_tx_en)) {
+               stmmac_tx_timer_arm(priv, queue);
+       } else {
+               tx_q->tx_count_frames = 0;
                stmmac_set_tx_ic(priv, desc);
                priv->xstats.tx_set_ic_bit++;
-               tx_q->tx_count_frames = 0;
-       } else {
-               stmmac_tx_timer_arm(priv, queue);
        }
 
        skb_tx_timestamp(skb);
@@ -3157,12 +3160,15 @@ static netdev_tx_t stmmac_xmit(struct sk_buff *skb, 
struct net_device *dev)
         * element in case of no SG.
         */
        tx_q->tx_count_frames += nfrags + 1;
-       if (priv->tx_coal_frames <= tx_q->tx_count_frames) {
+       if (likely(priv->tx_coal_frames > tx_q->tx_count_frames) &&
+           !(priv->synopsys_id >= DWMAC_CORE_4_00 &&
+           (skb_shinfo(skb)->tx_flags & SKBTX_HW_TSTAMP) &&
+           priv->hwts_tx_en)) {
+               stmmac_tx_timer_arm(priv, queue);
+       } else {
+               tx_q->tx_count_frames = 0;
                stmmac_set_tx_ic(priv, desc);
                priv->xstats.tx_set_ic_bit++;
-               tx_q->tx_count_frames = 0;
-       } else {
-               stmmac_tx_timer_arm(priv, queue);
        }
 
        skb_tx_timestamp(skb);
diff --git a/drivers/net/team/team.c b/drivers/net/team/team.c
index 6c6230b44bcd..dc30f11f4766 100644
--- a/drivers/net/team/team.c
+++ b/drivers/net/team/team.c
@@ -2139,12 +2139,12 @@ static void team_setup(struct net_device *dev)
        dev->features |= NETIF_F_NETNS_LOCAL;
 
        dev->hw_features = TEAM_VLAN_FEATURES |
-                          NETIF_F_HW_VLAN_CTAG_TX |
                           NETIF_F_HW_VLAN_CTAG_RX |
                           NETIF_F_HW_VLAN_CTAG_FILTER;
 
        dev->hw_features |= NETIF_F_GSO_ENCAP_ALL | NETIF_F_GSO_UDP_L4;
        dev->features |= dev->hw_features;
+       dev->features |= NETIF_F_HW_VLAN_CTAG_TX | NETIF_F_HW_VLAN_STAG_TX;
 }
 
 static int team_newlink(struct net *src_net, struct net_device *dev,
diff --git a/drivers/net/tun.c b/drivers/net/tun.c
index 78d34e0306e0..b67fee56ec81 100644
--- a/drivers/net/tun.c
+++ b/drivers/net/tun.c
@@ -1024,18 +1024,8 @@ static void tun_net_uninit(struct net_device *dev)
 /* Net device open. */
 static int tun_net_open(struct net_device *dev)
 {
-       struct tun_struct *tun = netdev_priv(dev);
-       int i;
-
        netif_tx_start_all_queues(dev);
 
-       for (i = 0; i < tun->numqueues; i++) {
-               struct tun_file *tfile;
-
-               tfile = rtnl_dereference(tun->tfiles[i]);
-               tfile->socket.sk->sk_write_space(tfile->socket.sk);
-       }
-
        return 0;
 }
 
@@ -3443,6 +3433,7 @@ static int tun_device_event(struct notifier_block *unused,
 {
        struct net_device *dev = netdev_notifier_info_to_dev(ptr);
        struct tun_struct *tun = netdev_priv(dev);
+       int i;
 
        if (dev->rtnl_link_ops != &tun_link_ops)
                return NOTIFY_DONE;
@@ -3452,6 +3443,14 @@ static int tun_device_event(struct notifier_block 
*unused,
                if (tun_queue_resize(tun))
                        return NOTIFY_BAD;
                break;
+       case NETDEV_UP:
+               for (i = 0; i < tun->numqueues; i++) {
+                       struct tun_file *tfile;
+
+                       tfile = rtnl_dereference(tun->tfiles[i]);
+                       tfile->socket.sk->sk_write_space(tfile->socket.sk);
+               }
+               break;
        default:
                break;
        }
diff --git a/drivers/net/usb/qmi_wwan.c b/drivers/net/usb/qmi_wwan.c
index d9a6699abe59..e657d8947125 100644
--- a/drivers/net/usb/qmi_wwan.c
+++ b/drivers/net/usb/qmi_wwan.c
@@ -1412,7 +1412,7 @@ static int qmi_wwan_probe(struct usb_interface *intf,
         * different. Ignore the current interface if the number of endpoints
         * equals the number for the diag interface (two).
         */
-       info = (void *)&id->driver_info;
+       info = (void *)id->driver_info;
 
        if (info->data & QMI_WWAN_QUIRK_QUECTEL_DYNCFG) {
                if (desc->bNumEndpoints == 2)
diff --git a/drivers/scsi/vmw_pvscsi.c b/drivers/scsi/vmw_pvscsi.c
index 890b8aaf95e1..64eb8ffb2ddf 100644
--- a/drivers/scsi/vmw_pvscsi.c
+++ b/drivers/scsi/vmw_pvscsi.c
@@ -763,6 +763,7 @@ static int pvscsi_queue_lck(struct scsi_cmnd *cmd, void 
(*done)(struct scsi_cmnd
        struct pvscsi_adapter *adapter = shost_priv(host);
        struct pvscsi_ctx *ctx;
        unsigned long flags;
+       unsigned char op;
 
        spin_lock_irqsave(&adapter->hw_lock, flags);
 
@@ -775,13 +776,14 @@ static int pvscsi_queue_lck(struct scsi_cmnd *cmd, void 
(*done)(struct scsi_cmnd
        }
 
        cmd->scsi_done = done;
+       op = cmd->cmnd[0];
 
        dev_dbg(&cmd->device->sdev_gendev,
-               "queued cmd %p, ctx %p, op=%x\n", cmd, ctx, cmd->cmnd[0]);
+               "queued cmd %p, ctx %p, op=%x\n", cmd, ctx, op);
 
        spin_unlock_irqrestore(&adapter->hw_lock, flags);
 
-       pvscsi_kick_io(adapter, cmd->cmnd[0]);
+       pvscsi_kick_io(adapter, op);
 
        return 0;
 }
diff --git a/drivers/usb/dwc3/core.h b/drivers/usb/dwc3/core.h
index 5bfb62533e0f..131028501752 100644
--- a/drivers/usb/dwc3/core.h
+++ b/drivers/usb/dwc3/core.h
@@ -636,9 +636,9 @@ struct dwc3_event_buffer {
 /**
  * struct dwc3_ep - device side endpoint representation
  * @endpoint: usb endpoint
+ * @cancelled_list: list of cancelled requests for this endpoint
  * @pending_list: list of pending requests for this endpoint
  * @started_list: list of started requests on this endpoint
- * @wait_end_transfer: wait_queue_head_t for waiting on End Transfer complete
  * @lock: spinlock for endpoint request queue traversal
  * @regs: pointer to first endpoint register
  * @trb_pool: array of transaction buffers
@@ -659,11 +659,10 @@ struct dwc3_event_buffer {
  */
 struct dwc3_ep {
        struct usb_ep           endpoint;
+       struct list_head        cancelled_list;
        struct list_head        pending_list;
        struct list_head        started_list;
 
-       wait_queue_head_t       wait_end_transfer;
-
        spinlock_t              lock;
        void __iomem            *regs;
 
@@ -847,11 +846,12 @@ struct dwc3_hwparams {
  * @epnum: endpoint number to which this request refers
  * @trb: pointer to struct dwc3_trb
  * @trb_dma: DMA address of @trb
- * @unaligned: true for OUT endpoints with length not divisible by maxp
+ * @num_trbs: number of TRBs used by this request
+ * @needs_extra_trb: true when request needs one extra TRB (either due to ZLP
+ *     or unaligned OUT)
  * @direction: IN or OUT direction flag
  * @mapped: true when request has been dma-mapped
  * @started: request is started
- * @zero: wants a ZLP
  */
 struct dwc3_request {
        struct usb_request      request;
@@ -867,11 +867,12 @@ struct dwc3_request {
        struct dwc3_trb         *trb;
        dma_addr_t              trb_dma;
 
-       unsigned                unaligned:1;
+       unsigned                num_trbs;
+
+       unsigned                needs_extra_trb:1;
        unsigned                direction:1;
        unsigned                mapped:1;
        unsigned                started:1;
-       unsigned                zero:1;
 };
 
 /*
diff --git a/drivers/usb/dwc3/gadget.c b/drivers/usb/dwc3/gadget.c
index 65ba1038b111..e7461c995116 100644
--- a/drivers/usb/dwc3/gadget.c
+++ b/drivers/usb/dwc3/gadget.c
@@ -177,8 +177,7 @@ static void dwc3_gadget_del_and_unmap_request(struct 
dwc3_ep *dep,
        req->started = false;
        list_del(&req->list);
        req->remaining = 0;
-       req->unaligned = false;
-       req->zero = false;
+       req->needs_extra_trb = false;
 
        if (req->request.status == -EINPROGRESS)
                req->request.status = status;
@@ -640,8 +639,6 @@ static int __dwc3_gadget_ep_enable(struct dwc3_ep *dep, 
unsigned int action)
                reg |= DWC3_DALEPENA_EP(dep->number);
                dwc3_writel(dwc->regs, DWC3_DALEPENA, reg);
 
-               init_waitqueue_head(&dep->wait_end_transfer);
-
                if (usb_endpoint_xfer_control(desc))
                        goto out;
 
@@ -1043,6 +1040,8 @@ static void dwc3_prepare_one_trb(struct dwc3_ep *dep,
                req->trb_dma = dwc3_trb_dma_offset(dep, trb);
        }
 
+       req->num_trbs++;
+
        __dwc3_prepare_one_trb(dep, trb, dma, length, chain, node,
                        stream_id, short_not_ok, no_interrupt);
 }
@@ -1070,13 +1069,14 @@ static void dwc3_prepare_one_trb_sg(struct dwc3_ep *dep,
                        struct dwc3     *dwc = dep->dwc;
                        struct dwc3_trb *trb;
 
-                       req->unaligned = true;
+                       req->needs_extra_trb = true;
 
                        /* prepare normal TRB */
                        dwc3_prepare_one_trb(dep, req, true, i);
 
                        /* Now prepare one extra TRB to align transfer size */
                        trb = &dep->trb_pool[dep->trb_enqueue];
+                       req->num_trbs++;
                        __dwc3_prepare_one_trb(dep, trb, dwc->bounce_addr,
                                        maxp - rem, false, 1,
                                        req->request.stream_id,
@@ -1114,13 +1114,14 @@ static void dwc3_prepare_one_trb_linear(struct dwc3_ep 
*dep,
                struct dwc3     *dwc = dep->dwc;
                struct dwc3_trb *trb;
 
-               req->unaligned = true;
+               req->needs_extra_trb = true;
 
                /* prepare normal TRB */
                dwc3_prepare_one_trb(dep, req, true, 0);
 
                /* Now prepare one extra TRB to align transfer size */
                trb = &dep->trb_pool[dep->trb_enqueue];
+               req->num_trbs++;
                __dwc3_prepare_one_trb(dep, trb, dwc->bounce_addr, maxp - rem,
                                false, 1, req->request.stream_id,
                                req->request.short_not_ok,
@@ -1130,13 +1131,14 @@ static void dwc3_prepare_one_trb_linear(struct dwc3_ep 
*dep,
                struct dwc3     *dwc = dep->dwc;
                struct dwc3_trb *trb;
 
-               req->zero = true;
+               req->needs_extra_trb = true;
 
                /* prepare normal TRB */
                dwc3_prepare_one_trb(dep, req, true, 0);
 
                /* Now prepare one extra TRB to handle ZLP */
                trb = &dep->trb_pool[dep->trb_enqueue];
+               req->num_trbs++;
                __dwc3_prepare_one_trb(dep, trb, dwc->bounce_addr, 0,
                                false, 1, req->request.stream_id,
                                req->request.short_not_ok,
@@ -1338,6 +1340,42 @@ static int dwc3_gadget_ep_queue(struct usb_ep *ep, 
struct usb_request *request,
        return ret;
 }
 
+static void dwc3_gadget_ep_skip_trbs(struct dwc3_ep *dep, struct dwc3_request 
*req)
+{
+       int i;
+
+       /*
+        * If request was already started, this means we had to
+        * stop the transfer. With that we also need to ignore
+        * all TRBs used by the request, however TRBs can only
+        * be modified after completion of END_TRANSFER
+        * command. So what we do here is that we wait for
+        * END_TRANSFER completion and only after that, we jump
+        * over TRBs by clearing HWO and incrementing dequeue
+        * pointer.
+        */
+       for (i = 0; i < req->num_trbs; i++) {
+               struct dwc3_trb *trb;
+
+               trb = req->trb + i;
+               trb->ctrl &= ~DWC3_TRB_CTRL_HWO;
+               dwc3_ep_inc_deq(dep);
+       }
+
+       req->num_trbs = 0;
+}
+
+static void dwc3_gadget_ep_cleanup_cancelled_requests(struct dwc3_ep *dep)
+{
+       struct dwc3_request             *req;
+       struct dwc3_request             *tmp;
+
+       list_for_each_entry_safe(req, tmp, &dep->cancelled_list, list) {
+               dwc3_gadget_ep_skip_trbs(dep, req);
+               dwc3_gadget_giveback(dep, req, -ECONNRESET);
+       }
+}
+
 static int dwc3_gadget_ep_dequeue(struct usb_ep *ep,
                struct usb_request *request)
 {
@@ -1368,68 +1406,11 @@ static int dwc3_gadget_ep_dequeue(struct usb_ep *ep,
                        /* wait until it is processed */
                        dwc3_stop_active_transfer(dep, true);
 
-                       /*
-                        * If request was already started, this means we had to
-                        * stop the transfer. With that we also need to ignore
-                        * all TRBs used by the request, however TRBs can only
-                        * be modified after completion of END_TRANSFER
-                        * command. So what we do here is that we wait for
-                        * END_TRANSFER completion and only after that, we jump
-                        * over TRBs by clearing HWO and incrementing dequeue
-                        * pointer.
-                        *
-                        * Note that we have 2 possible types of transfers here:
-                        *
-                        * i) Linear buffer request
-                        * ii) SG-list based request
-                        *
-                        * SG-list based requests will have r->num_pending_sgs
-                        * set to a valid number (> 0). Linear requests,
-                        * normally use a single TRB.
-                        *
-                        * For each of these two cases, if r->unaligned flag is
-                        * set, one extra TRB has been used to align transfer
-                        * size to wMaxPacketSize.
-                        *
-                        * All of these cases need to be taken into
-                        * consideration so we don't mess up our TRB ring
-                        * pointers.
-                        */
-                       wait_event_lock_irq(dep->wait_end_transfer,
-                                       !(dep->flags & 
DWC3_EP_END_TRANSFER_PENDING),
-                                       dwc->lock);
-
                        if (!r->trb)
                                goto out0;
 
-                       if (r->num_pending_sgs) {
-                               struct dwc3_trb *trb;
-                               int i = 0;
-
-                               for (i = 0; i < r->num_pending_sgs; i++) {
-                                       trb = r->trb + i;
-                                       trb->ctrl &= ~DWC3_TRB_CTRL_HWO;
-                                       dwc3_ep_inc_deq(dep);
-                               }
-
-                               if (r->unaligned || r->zero) {
-                                       trb = r->trb + r->num_pending_sgs + 1;
-                                       trb->ctrl &= ~DWC3_TRB_CTRL_HWO;
-                                       dwc3_ep_inc_deq(dep);
-                               }
-                       } else {
-                               struct dwc3_trb *trb = r->trb;
-
-                               trb->ctrl &= ~DWC3_TRB_CTRL_HWO;
-                               dwc3_ep_inc_deq(dep);
-
-                               if (r->unaligned || r->zero) {
-                                       trb = r->trb + 1;
-                                       trb->ctrl &= ~DWC3_TRB_CTRL_HWO;
-                                       dwc3_ep_inc_deq(dep);
-                               }
-                       }
-                       goto out1;
+                       dwc3_gadget_move_cancelled_request(req);
+                       goto out0;
                }
                dev_err(dwc->dev, "request %pK was not queued to %s\n",
                                request, ep->name);
@@ -1437,9 +1418,6 @@ static int dwc3_gadget_ep_dequeue(struct usb_ep *ep,
                goto out0;
        }
 
-out1:
-       /* giveback the request */
-
        dwc3_gadget_giveback(dep, req, -ECONNRESET);
 
 out0:
@@ -1932,8 +1910,6 @@ static int dwc3_gadget_stop(struct usb_gadget *g)
 {
        struct dwc3             *dwc = gadget_to_dwc(g);
        unsigned long           flags;
-       int                     epnum;
-       u32                     tmo_eps = 0;
 
        spin_lock_irqsave(&dwc->lock, flags);
 
@@ -1942,36 +1918,6 @@ static int dwc3_gadget_stop(struct usb_gadget *g)
 
        __dwc3_gadget_stop(dwc);
 
-       for (epnum = 2; epnum < DWC3_ENDPOINTS_NUM; epnum++) {
-               struct dwc3_ep  *dep = dwc->eps[epnum];
-               int ret;
-
-               if (!dep)
-                       continue;
-
-               if (!(dep->flags & DWC3_EP_END_TRANSFER_PENDING))
-                       continue;
-
-               ret = 
wait_event_interruptible_lock_irq_timeout(dep->wait_end_transfer,
-                           !(dep->flags & DWC3_EP_END_TRANSFER_PENDING),
-                           dwc->lock, msecs_to_jiffies(5));
-
-               if (ret <= 0) {
-                       /* Timed out or interrupted! There's nothing much
-                        * we can do so we just log here and print which
-                        * endpoints timed out at the end.
-                        */
-                       tmo_eps |= 1 << epnum;
-                       dep->flags &= DWC3_EP_END_TRANSFER_PENDING;
-               }
-       }
-
-       if (tmo_eps) {
-               dev_err(dwc->dev,
-                       "end transfer timed out on endpoints 0x%x [bitmap]\n",
-                       tmo_eps);
-       }
-
 out:
        dwc->gadget_driver      = NULL;
        spin_unlock_irqrestore(&dwc->lock, flags);
@@ -2174,6 +2120,7 @@ static int dwc3_gadget_init_endpoint(struct dwc3 *dwc, u8 
epnum)
 
        INIT_LIST_HEAD(&dep->pending_list);
        INIT_LIST_HEAD(&dep->started_list);
+       INIT_LIST_HEAD(&dep->cancelled_list);
 
        return 0;
 }
@@ -2233,6 +2180,7 @@ static int dwc3_gadget_ep_reclaim_completed_trb(struct 
dwc3_ep *dep,
        dwc3_ep_inc_deq(dep);
 
        trace_dwc3_complete_trb(dep, trb);
+       req->num_trbs--;
 
        /*
         * If we're in the middle of series of chained TRBs and we
@@ -2252,7 +2200,8 @@ static int dwc3_gadget_ep_reclaim_completed_trb(struct 
dwc3_ep *dep,
         * with one TRB pending in the ring. We need to manually clear HWO bit
         * from that TRB.
         */
-       if ((req->zero || req->unaligned) && !(trb->ctrl & DWC3_TRB_CTRL_CHN)) {
+
+       if (req->needs_extra_trb && !(trb->ctrl & DWC3_TRB_CTRL_CHN)) {
                trb->ctrl &= ~DWC3_TRB_CTRL_HWO;
                return 1;
        }
@@ -2329,11 +2278,10 @@ static int 
dwc3_gadget_ep_cleanup_completed_request(struct dwc3_ep *dep,
                ret = dwc3_gadget_ep_reclaim_trb_linear(dep, req, event,
                                status);
 
-       if (req->unaligned || req->zero) {
+       if (req->needs_extra_trb) {
                ret = dwc3_gadget_ep_reclaim_trb_linear(dep, req, event,
                                status);
-               req->unaligned = false;
-               req->zero = false;
+               req->needs_extra_trb = false;
        }
 
        req->request.actual = req->request.length - req->remaining;
@@ -2466,7 +2414,7 @@ static void dwc3_endpoint_interrupt(struct dwc3 *dwc,
 
                if (cmd == DWC3_DEPCMD_ENDTRANSFER) {
                        dep->flags &= ~DWC3_EP_END_TRANSFER_PENDING;
-                       wake_up(&dep->wait_end_transfer);
+                       dwc3_gadget_ep_cleanup_cancelled_requests(dep);
                }
                break;
        case DWC3_DEPEVT_STREAMEVT:
diff --git a/drivers/usb/dwc3/gadget.h b/drivers/usb/dwc3/gadget.h
index 2aacd1afd9ff..023a473648eb 100644
--- a/drivers/usb/dwc3/gadget.h
+++ b/drivers/usb/dwc3/gadget.h
@@ -79,6 +79,21 @@ static inline void dwc3_gadget_move_started_request(struct 
dwc3_request *req)
        list_move_tail(&req->list, &dep->started_list);
 }
 
+/**
+ * dwc3_gadget_move_cancelled_request - move @req to the cancelled_list
+ * @req: the request to be moved
+ *
+ * Caller should take care of locking. This function will move @req from its
+ * current list to the endpoint's cancelled_list.
+ */
+static inline void dwc3_gadget_move_cancelled_request(struct dwc3_request *req)
+{
+       struct dwc3_ep          *dep = req->dep;
+
+       req->started = false;
+       list_move_tail(&req->list, &dep->cancelled_list);
+}
+
 void dwc3_gadget_giveback(struct dwc3_ep *dep, struct dwc3_request *req,
                int status);
 
diff --git a/fs/9p/acl.c b/fs/9p/acl.c
index 082d227fa56b..6261719f6f2a 100644
--- a/fs/9p/acl.c
+++ b/fs/9p/acl.c
@@ -276,7 +276,7 @@ static int v9fs_xattr_set_acl(const struct xattr_handler 
*handler,
        switch (handler->flags) {
        case ACL_TYPE_ACCESS:
                if (acl) {
-                       struct iattr iattr;
+                       struct iattr iattr = { 0 };
                        struct posix_acl *old_acl = acl;
 
                        retval = posix_acl_update_mode(inode, &iattr.ia_mode, 
&acl);
diff --git a/fs/binfmt_flat.c b/fs/binfmt_flat.c
index 82a48e830018..e4b59e76afb0 100644
--- a/fs/binfmt_flat.c
+++ b/fs/binfmt_flat.c
@@ -856,9 +856,14 @@ static int load_flat_file(struct linux_binprm *bprm,
 
 static int load_flat_shared_library(int id, struct lib_info *libs)
 {
+       /*
+        * This is a fake bprm struct; only the members "buf", "file" and
+        * "filename" are actually used.
+        */
        struct linux_binprm bprm;
        int res;
        char buf[16];
+       loff_t pos = 0;
 
        memset(&bprm, 0, sizeof(bprm));
 
@@ -872,25 +877,11 @@ static int load_flat_shared_library(int id, struct 
lib_info *libs)
        if (IS_ERR(bprm.file))
                return res;
 
-       bprm.cred = prepare_exec_creds();
-       res = -ENOMEM;
-       if (!bprm.cred)
-               goto out;
-
-       /* We don't really care about recalculating credentials at this point
-        * as we're past the point of no return and are dealing with shared
-        * libraries.
-        */
-       bprm.called_set_creds = 1;
+       res = kernel_read(bprm.file, bprm.buf, BINPRM_BUF_SIZE, &pos);
 
-       res = prepare_binprm(&bprm);
-
-       if (!res)
+       if (res >= 0)
                res = load_flat_file(&bprm, libs, id, NULL);
 
-       abort_creds(bprm.cred);
-
-out:
        allow_write_access(bprm.file);
        fput(bprm.file);
 
diff --git a/fs/nfs/flexfilelayout/flexfilelayoutdev.c 
b/fs/nfs/flexfilelayout/flexfilelayoutdev.c
index a8df2f496898..364028c710a8 100644
--- a/fs/nfs/flexfilelayout/flexfilelayoutdev.c
+++ b/fs/nfs/flexfilelayout/flexfilelayoutdev.c
@@ -18,7 +18,7 @@
 
 #define NFSDBG_FACILITY                NFSDBG_PNFS_LD
 
-static unsigned int dataserver_timeo = NFS_DEF_TCP_RETRANS;
+static unsigned int dataserver_timeo = NFS_DEF_TCP_TIMEO;
 static unsigned int dataserver_retrans;
 
 static bool ff_layout_has_available_ds(struct pnfs_layout_segment *lseg);
diff --git a/fs/proc/array.c b/fs/proc/array.c
index 0ceb3b6b37e7..9eb99a43f849 100644
--- a/fs/proc/array.c
+++ b/fs/proc/array.c
@@ -452,7 +452,7 @@ static int do_task_stat(struct seq_file *m, struct 
pid_namespace *ns,
                 * a program is not able to use ptrace(2) in that case. It is
                 * safe because the task has stopped executing permanently.
                 */
-               if (permitted && (task->flags & PF_DUMPCORE)) {
+               if (permitted && (task->flags & (PF_EXITING|PF_DUMPCORE))) {
                        if (try_get_task_stack(task)) {
                                eip = KSTK_EIP(task);
                                esp = KSTK_ESP(task);
diff --git a/include/asm-generic/futex.h b/include/asm-generic/futex.h
index fcb61b4659b3..8666fe7f35d7 100644
--- a/include/asm-generic/futex.h
+++ b/include/asm-generic/futex.h
@@ -23,7 +23,9 @@
  *
  * Return:
  * 0 - On success
- * <0 - On error
+ * -EFAULT - User access resulted in a page fault
+ * -EAGAIN - Atomic operation was unable to complete due to contention
+ * -ENOSYS - Operation not supported
  */
 static inline int
 arch_futex_atomic_op_inuser(int op, u32 oparg, int *oval, u32 __user *uaddr)
@@ -85,7 +87,9 @@ arch_futex_atomic_op_inuser(int op, u32 oparg, int *oval, u32 
__user *uaddr)
  *
  * Return:
  * 0 - On success
- * <0 - On error
+ * -EFAULT - User access resulted in a page fault
+ * -EAGAIN - Atomic operation was unable to complete due to contention
+ * -ENOSYS - Function not implemented (only if !HAVE_FUTEX_CMPXCHG)
  */
 static inline int
 futex_atomic_cmpxchg_inatomic(u32 *uval, u32 __user *uaddr,
diff --git a/include/linux/bpf-cgroup.h b/include/linux/bpf-cgroup.h
index f91b0f8ff3a9..ad6b30137ac2 100644
--- a/include/linux/bpf-cgroup.h
+++ b/include/linux/bpf-cgroup.h
@@ -210,6 +210,12 @@ void bpf_cgroup_storage_release(struct bpf_prog *prog, 
struct bpf_map *map);
 #define BPF_CGROUP_RUN_PROG_UDP6_SENDMSG_LOCK(sk, uaddr, t_ctx)                
       \
        BPF_CGROUP_RUN_SA_PROG_LOCK(sk, uaddr, BPF_CGROUP_UDP6_SENDMSG, t_ctx)
 
+#define BPF_CGROUP_RUN_PROG_UDP4_RECVMSG_LOCK(sk, uaddr)                       
\
+       BPF_CGROUP_RUN_SA_PROG_LOCK(sk, uaddr, BPF_CGROUP_UDP4_RECVMSG, NULL)
+
+#define BPF_CGROUP_RUN_PROG_UDP6_RECVMSG_LOCK(sk, uaddr)                       
\
+       BPF_CGROUP_RUN_SA_PROG_LOCK(sk, uaddr, BPF_CGROUP_UDP6_RECVMSG, NULL)
+
 #define BPF_CGROUP_RUN_PROG_SOCK_OPS(sock_ops)                                \
 ({                                                                            \
        int __ret = 0;                                                         \
@@ -290,6 +296,8 @@ static inline void bpf_cgroup_storage_free(
 #define BPF_CGROUP_RUN_PROG_INET6_CONNECT_LOCK(sk, uaddr) ({ 0; })
 #define BPF_CGROUP_RUN_PROG_UDP4_SENDMSG_LOCK(sk, uaddr, t_ctx) ({ 0; })
 #define BPF_CGROUP_RUN_PROG_UDP6_SENDMSG_LOCK(sk, uaddr, t_ctx) ({ 0; })
+#define BPF_CGROUP_RUN_PROG_UDP4_RECVMSG_LOCK(sk, uaddr) ({ 0; })
+#define BPF_CGROUP_RUN_PROG_UDP6_RECVMSG_LOCK(sk, uaddr) ({ 0; })
 #define BPF_CGROUP_RUN_PROG_SOCK_OPS(sock_ops) ({ 0; })
 #define BPF_CGROUP_RUN_PROG_DEVICE_CGROUP(type,major,minor,access) ({ 0; })
 
diff --git a/include/linux/sunrpc/xprt.h b/include/linux/sunrpc/xprt.h
index f30bf500888d..e7bbd82908b1 100644
--- a/include/linux/sunrpc/xprt.h
+++ b/include/linux/sunrpc/xprt.h
@@ -325,7 +325,6 @@ struct xprt_class {
 struct rpc_xprt                *xprt_create_transport(struct xprt_create 
*args);
 void                   xprt_connect(struct rpc_task *task);
 void                   xprt_reserve(struct rpc_task *task);
-void                   xprt_request_init(struct rpc_task *task);
 void                   xprt_retry_reserve(struct rpc_task *task);
 int                    xprt_reserve_xprt(struct rpc_xprt *xprt, struct 
rpc_task *task);
 int                    xprt_reserve_xprt_cong(struct rpc_xprt *xprt, struct 
rpc_task *task);
diff --git a/include/net/9p/9p.h b/include/net/9p/9p.h
index b8eb51a661e5..4ab293f574e0 100644
--- a/include/net/9p/9p.h
+++ b/include/net/9p/9p.h
@@ -336,6 +336,9 @@ enum p9_qid_t {
 #define P9_NOFID       (u32)(~0)
 #define P9_MAXWELEM    16
 
+/* Minimal header size: size[4] type[1] tag[2] */
+#define P9_HDRSZ       7
+
 /* ample room for Twrite/Rread header */
 #define P9_IOHDRSZ     24
 
@@ -558,6 +561,7 @@ struct p9_fcall {
        size_t offset;
        size_t capacity;
 
+       struct kmem_cache *cache;
        u8 *sdata;
 };
 
diff --git a/include/net/9p/client.h b/include/net/9p/client.h
index 0fa0fbab33b0..947a570307a6 100644
--- a/include/net/9p/client.h
+++ b/include/net/9p/client.h
@@ -64,22 +64,15 @@ enum p9_trans_status {
 
 /**
  * enum p9_req_status_t - status of a request
- * @REQ_STATUS_IDLE: request slot unused
  * @REQ_STATUS_ALLOC: request has been allocated but not sent
  * @REQ_STATUS_UNSENT: request waiting to be sent
  * @REQ_STATUS_SENT: request sent to server
  * @REQ_STATUS_RCVD: response received from server
  * @REQ_STATUS_FLSHD: request has been flushed
  * @REQ_STATUS_ERROR: request encountered an error on the client side
- *
- * The @REQ_STATUS_IDLE state is used to mark a request slot as unused
- * but use is actually tracked by the idpool structure which handles tag
- * id allocation.
- *
  */
 
 enum p9_req_status_t {
-       REQ_STATUS_IDLE,
        REQ_STATUS_ALLOC,
        REQ_STATUS_UNSENT,
        REQ_STATUS_SENT,
@@ -92,70 +85,46 @@ enum p9_req_status_t {
  * struct p9_req_t - request slots
  * @status: status of this request slot
  * @t_err: transport error
- * @flush_tag: tag of request being flushed (for flush requests)
  * @wq: wait_queue for the client to block on for this request
  * @tc: the request fcall structure
  * @rc: the response fcall structure
  * @aux: transport specific data (provided for trans_fd migration)
  * @req_list: link for higher level objects to chain requests
- *
- * Transport use an array to track outstanding requests
- * instead of a list.  While this may incurr overhead during initial
- * allocation or expansion, it makes request lookup much easier as the
- * tag id is a index into an array.  (We use tag+1 so that we can accommodate
- * the -1 tag for the T_VERSION request).
- * This also has the nice effect of only having to allocate wait_queues
- * once, instead of constantly allocating and freeing them.  Its possible
- * other resources could benefit from this scheme as well.
- *
  */
-
 struct p9_req_t {
        int status;
        int t_err;
+       struct kref refcount;
        wait_queue_head_t wq;
-       struct p9_fcall *tc;
-       struct p9_fcall *rc;
+       struct p9_fcall tc;
+       struct p9_fcall rc;
        void *aux;
-
        struct list_head req_list;
 };
 
 /**
  * struct p9_client - per client instance state
- * @lock: protect @fidlist
+ * @lock: protect @fids and @reqs
  * @msize: maximum data size negotiated by protocol
- * @dotu: extension flags negotiated by protocol
  * @proto_version: 9P protocol version to use
  * @trans_mod: module API instantiated with this client
+ * @status: connection state
  * @trans: tranport instance state and API
  * @fids: All active FID handles
- * @tagpool - transaction id accounting for session
- * @reqs - 2D array of requests
- * @max_tag - current maximum tag id allocated
- * @name - node name used as client id
+ * @reqs: All active requests.
+ * @name: node name used as client id
  *
  * The client structure is used to keep track of various per-client
  * state that has been instantiated.
- * In order to minimize per-transaction overhead we use a
- * simple array to lookup requests instead of a hash table
- * or linked list.  In order to support larger number of
- * transactions, we make this a 2D array, allocating new rows
- * when we need to grow the total number of the transactions.
- *
- * Each row is 256 requests and we'll support up to 256 rows for
- * a total of 64k concurrent requests per session.
- *
- * Bugs: duplicated data and potentially unnecessary elements.
  */
-
 struct p9_client {
-       spinlock_t lock; /* protect client structure */
+       spinlock_t lock;
        unsigned int msize;
        unsigned char proto_version;
        struct p9_trans_module *trans_mod;
        enum p9_trans_status status;
        void *trans;
+       struct kmem_cache *fcall_cache;
 
        union {
                struct {
@@ -170,10 +139,7 @@ struct p9_client {
        } trans_opts;
 
        struct idr fids;
-
-       struct p9_idpool *tagpool;
-       struct p9_req_t *reqs[P9_ROW_MAXTAG];
-       int max_tag;
+       struct idr reqs;
 
        char name[__NEW_UTS_LEN + 1];
 };
@@ -266,7 +232,21 @@ int p9_client_mkdir_dotl(struct p9_fid *fid, const char 
*name, int mode,
                                kgid_t gid, struct p9_qid *);
 int p9_client_lock_dotl(struct p9_fid *fid, struct p9_flock *flock, u8 
*status);
 int p9_client_getlock_dotl(struct p9_fid *fid, struct p9_getlock *fl);
+void p9_fcall_fini(struct p9_fcall *fc);
 struct p9_req_t *p9_tag_lookup(struct p9_client *, u16);
+
+static inline void p9_req_get(struct p9_req_t *r)
+{
+       kref_get(&r->refcount);
+}
+
+static inline int p9_req_try_get(struct p9_req_t *r)
+{
+       return kref_get_unless_zero(&r->refcount);
+}
+
+int p9_req_put(struct p9_req_t *r);
+
 void p9_client_cb(struct p9_client *c, struct p9_req_t *req, int status);
 
 int p9_parse_header(struct p9_fcall *, int32_t *, int8_t *, int16_t *, int);
@@ -279,4 +259,7 @@ struct p9_fid *p9_client_xattrwalk(struct p9_fid *, const 
char *, u64 *);
 int p9_client_xattrcreate(struct p9_fid *, const char *, u64, int);
 int p9_client_readlink(struct p9_fid *fid, char **target);
 
+int p9_client_init(void);
+void p9_client_exit(void);
+
 #endif /* NET_9P_CLIENT_H */
diff --git a/include/uapi/linux/bpf.h b/include/uapi/linux/bpf.h
index 66917a4eba27..2932600ce271 100644
--- a/include/uapi/linux/bpf.h
+++ b/include/uapi/linux/bpf.h
@@ -172,6 +172,8 @@ enum bpf_attach_type {
        BPF_CGROUP_UDP4_SENDMSG,
        BPF_CGROUP_UDP6_SENDMSG,
        BPF_LIRC_MODE2,
+       BPF_CGROUP_UDP4_RECVMSG = 19,
+       BPF_CGROUP_UDP6_RECVMSG,
        __MAX_BPF_ATTACH_TYPE
 };
 
@@ -2705,8 +2707,8 @@ struct bpf_raw_tracepoint_args {
 /* DIRECT:  Skip the FIB rules and go to FIB table associated with device
  * OUTPUT:  Do lookup from egress perspective; default is ingress
  */
-#define BPF_FIB_LOOKUP_DIRECT  BIT(0)
-#define BPF_FIB_LOOKUP_OUTPUT  BIT(1)
+#define BPF_FIB_LOOKUP_DIRECT  (1U << 0)
+#define BPF_FIB_LOOKUP_OUTPUT  (1U << 1)
 
 enum {
        BPF_FIB_LKUP_RET_SUCCESS,      /* lookup successful */
diff --git a/kernel/bpf/lpm_trie.c b/kernel/bpf/lpm_trie.c
index 4f3138e6ecb2..1a8b208f6c55 100644
--- a/kernel/bpf/lpm_trie.c
+++ b/kernel/bpf/lpm_trie.c
@@ -676,9 +676,14 @@ static int trie_get_next_key(struct bpf_map *map, void 
*_key, void *_next_key)
         * have exact two children, so this function will never return NULL.
         */
        for (node = search_root; node;) {
-               if (!(node->flags & LPM_TREE_NODE_FLAG_IM))
+               if (node->flags & LPM_TREE_NODE_FLAG_IM) {
+                       node = rcu_dereference(node->child[0]);
+               } else {
                        next_node = node;
-               node = rcu_dereference(node->child[0]);
+                       node = rcu_dereference(node->child[0]);
+                       if (!node)
+                               node = rcu_dereference(next_node->child[1]);
+               }
        }
 do_copy:
        next_key->prefixlen = next_node->prefixlen;
diff --git a/kernel/bpf/syscall.c b/kernel/bpf/syscall.c
index ede82382dd32..118e3a8fc764 100644
--- a/kernel/bpf/syscall.c
+++ b/kernel/bpf/syscall.c
@@ -1342,6 +1342,8 @@ bpf_prog_load_check_attach_type(enum bpf_prog_type 
prog_type,
                case BPF_CGROUP_INET6_CONNECT:
                case BPF_CGROUP_UDP4_SENDMSG:
                case BPF_CGROUP_UDP6_SENDMSG:
+               case BPF_CGROUP_UDP4_RECVMSG:
+               case BPF_CGROUP_UDP6_RECVMSG:
                        return 0;
                default:
                        return -EINVAL;
@@ -1622,6 +1624,8 @@ static int bpf_prog_attach(const union bpf_attr *attr)
        case BPF_CGROUP_INET6_CONNECT:
        case BPF_CGROUP_UDP4_SENDMSG:
        case BPF_CGROUP_UDP6_SENDMSG:
+       case BPF_CGROUP_UDP4_RECVMSG:
+       case BPF_CGROUP_UDP6_RECVMSG:
                ptype = BPF_PROG_TYPE_CGROUP_SOCK_ADDR;
                break;
        case BPF_CGROUP_SOCK_OPS:
@@ -1698,6 +1702,8 @@ static int bpf_prog_detach(const union bpf_attr *attr)
        case BPF_CGROUP_INET6_CONNECT:
        case BPF_CGROUP_UDP4_SENDMSG:
        case BPF_CGROUP_UDP6_SENDMSG:
+       case BPF_CGROUP_UDP4_RECVMSG:
+       case BPF_CGROUP_UDP6_RECVMSG:
                ptype = BPF_PROG_TYPE_CGROUP_SOCK_ADDR;
                break;
        case BPF_CGROUP_SOCK_OPS:
@@ -1744,6 +1750,8 @@ static int bpf_prog_query(const union bpf_attr *attr,
        case BPF_CGROUP_INET6_CONNECT:
        case BPF_CGROUP_UDP4_SENDMSG:
        case BPF_CGROUP_UDP6_SENDMSG:
+       case BPF_CGROUP_UDP4_RECVMSG:
+       case BPF_CGROUP_UDP6_RECVMSG:
        case BPF_CGROUP_SOCK_OPS:
        case BPF_CGROUP_DEVICE:
                break;
diff --git a/kernel/bpf/verifier.c b/kernel/bpf/verifier.c
index d3580a68dbef..1dff5f7323cc 100644
--- a/kernel/bpf/verifier.c
+++ b/kernel/bpf/verifier.c
@@ -4342,9 +4342,12 @@ static int check_return_code(struct bpf_verifier_env 
*env)
        struct tnum range = tnum_range(0, 1);
 
        switch (env->prog->type) {
+       case BPF_PROG_TYPE_CGROUP_SOCK_ADDR:
+               if (env->prog->expected_attach_type == BPF_CGROUP_UDP4_RECVMSG 
||
+                   env->prog->expected_attach_type == BPF_CGROUP_UDP6_RECVMSG)
+                       range = tnum_range(1, 1);
        case BPF_PROG_TYPE_CGROUP_SKB:
        case BPF_PROG_TYPE_CGROUP_SOCK:
-       case BPF_PROG_TYPE_CGROUP_SOCK_ADDR:
        case BPF_PROG_TYPE_SOCK_OPS:
        case BPF_PROG_TYPE_CGROUP_DEVICE:
                break;
@@ -4360,16 +4363,17 @@ static int check_return_code(struct bpf_verifier_env 
*env)
        }
 
        if (!tnum_in(range, reg->var_off)) {
+               char tn_buf[48];
+
                verbose(env, "At program exit the register R0 ");
                if (!tnum_is_unknown(reg->var_off)) {
-                       char tn_buf[48];
-
                        tnum_strn(tn_buf, sizeof(tn_buf), reg->var_off);
                        verbose(env, "has value %s", tn_buf);
                } else {
                        verbose(env, "has unknown scalar value");
                }
-               verbose(env, " should have been 0 or 1\n");
+               tnum_strn(tn_buf, sizeof(tn_buf), range);
+               verbose(env, " should have been in %s\n", tn_buf);
                return -EINVAL;
        }
        return 0;
diff --git a/kernel/cpu.c b/kernel/cpu.c
index 5d65eae893bd..46aefe5c0e35 100644
--- a/kernel/cpu.c
+++ b/kernel/cpu.c
@@ -2289,6 +2289,9 @@ static int __init mitigations_parse_cmdline(char *arg)
                cpu_mitigations = CPU_MITIGATIONS_AUTO;
        else if (!strcmp(arg, "auto,nosmt"))
                cpu_mitigations = CPU_MITIGATIONS_AUTO_NOSMT;
+       else
+               pr_crit("Unsupported mitigations=%s, system may still be 
vulnerable\n",
+                       arg);
 
        return 0;
 }
diff --git a/kernel/trace/bpf_trace.c b/kernel/trace/bpf_trace.c
index 6c28d519447d..83c4e76f513a 100644
--- a/kernel/trace/bpf_trace.c
+++ b/kernel/trace/bpf_trace.c
@@ -365,8 +365,6 @@ static const struct bpf_func_proto 
bpf_perf_event_read_value_proto = {
        .arg4_type      = ARG_CONST_SIZE,
 };
 
-static DEFINE_PER_CPU(struct perf_sample_data, bpf_trace_sd);
-
 static __always_inline u64
 __bpf_perf_event_output(struct pt_regs *regs, struct bpf_map *map,
                        u64 flags, struct perf_sample_data *sd)
@@ -398,24 +396,50 @@ __bpf_perf_event_output(struct pt_regs *regs, struct 
bpf_map *map,
        return 0;
 }
 
+/*
+ * Support executing tracepoints in normal, irq, and nmi context that each call
+ * bpf_perf_event_output
+ */
+struct bpf_trace_sample_data {
+       struct perf_sample_data sds[3];
+};
+
+static DEFINE_PER_CPU(struct bpf_trace_sample_data, bpf_trace_sds);
+static DEFINE_PER_CPU(int, bpf_trace_nest_level);
 BPF_CALL_5(bpf_perf_event_output, struct pt_regs *, regs, struct bpf_map *, 
map,
           u64, flags, void *, data, u64, size)
 {
-       struct perf_sample_data *sd = this_cpu_ptr(&bpf_trace_sd);
+       struct bpf_trace_sample_data *sds = this_cpu_ptr(&bpf_trace_sds);
+       int nest_level = this_cpu_inc_return(bpf_trace_nest_level);
        struct perf_raw_record raw = {
                .frag = {
                        .size = size,
                        .data = data,
                },
        };
+       struct perf_sample_data *sd;
+       int err;
 
-       if (unlikely(flags & ~(BPF_F_INDEX_MASK)))
-               return -EINVAL;
+       if (WARN_ON_ONCE(nest_level > ARRAY_SIZE(sds->sds))) {
+               err = -EBUSY;
+               goto out;
+       }
+
+       sd = &sds->sds[nest_level - 1];
+
+       if (unlikely(flags & ~(BPF_F_INDEX_MASK))) {
+               err = -EINVAL;
+               goto out;
+       }
 
        perf_sample_data_init(sd, 0, 0);
        sd->raw = &raw;
 
-       return __bpf_perf_event_output(regs, map, flags, sd);
+       err = __bpf_perf_event_output(regs, map, flags, sd);
+
+out:
+       this_cpu_dec(bpf_trace_nest_level);
+       return err;
 }
 
 static const struct bpf_func_proto bpf_perf_event_output_proto = {
@@ -772,16 +796,48 @@ pe_prog_func_proto(enum bpf_func_id func_id, const struct 
bpf_prog *prog)
 /*
  * bpf_raw_tp_regs are separate from bpf_pt_regs used from skb/xdp
  * to avoid potential recursive reuse issue when/if tracepoints are added
- * inside bpf_*_event_output, bpf_get_stackid and/or bpf_get_stack
+ * inside bpf_*_event_output, bpf_get_stackid and/or bpf_get_stack.
+ *
+ * Since raw tracepoints run despite bpf_prog_active, support concurrent usage
+ * in normal, irq, and nmi context.
  */
-static DEFINE_PER_CPU(struct pt_regs, bpf_raw_tp_regs);
+struct bpf_raw_tp_regs {
+       struct pt_regs regs[3];
+};
+static DEFINE_PER_CPU(struct bpf_raw_tp_regs, bpf_raw_tp_regs);
+static DEFINE_PER_CPU(int, bpf_raw_tp_nest_level);
+static struct pt_regs *get_bpf_raw_tp_regs(void)
+{
+       struct bpf_raw_tp_regs *tp_regs = this_cpu_ptr(&bpf_raw_tp_regs);
+       int nest_level = this_cpu_inc_return(bpf_raw_tp_nest_level);
+
+       if (WARN_ON_ONCE(nest_level > ARRAY_SIZE(tp_regs->regs))) {
+               this_cpu_dec(bpf_raw_tp_nest_level);
+               return ERR_PTR(-EBUSY);
+       }
+
+       return &tp_regs->regs[nest_level - 1];
+}
+
+static void put_bpf_raw_tp_regs(void)
+{
+       this_cpu_dec(bpf_raw_tp_nest_level);
+}
+
 BPF_CALL_5(bpf_perf_event_output_raw_tp, struct bpf_raw_tracepoint_args *, 
args,
           struct bpf_map *, map, u64, flags, void *, data, u64, size)
 {
-       struct pt_regs *regs = this_cpu_ptr(&bpf_raw_tp_regs);
+       struct pt_regs *regs = get_bpf_raw_tp_regs();
+       int ret;
+
+       if (IS_ERR(regs))
+               return PTR_ERR(regs);
 
        perf_fetch_caller_regs(regs);
-       return ____bpf_perf_event_output(regs, map, flags, data, size);
+       ret = ____bpf_perf_event_output(regs, map, flags, data, size);
+
+       put_bpf_raw_tp_regs();
+       return ret;
 }
 
 static const struct bpf_func_proto bpf_perf_event_output_proto_raw_tp = {
@@ -798,12 +854,18 @@ static const struct bpf_func_proto 
bpf_perf_event_output_proto_raw_tp = {
 BPF_CALL_3(bpf_get_stackid_raw_tp, struct bpf_raw_tracepoint_args *, args,
           struct bpf_map *, map, u64, flags)
 {
-       struct pt_regs *regs = this_cpu_ptr(&bpf_raw_tp_regs);
+       struct pt_regs *regs = get_bpf_raw_tp_regs();
+       int ret;
+
+       if (IS_ERR(regs))
+               return PTR_ERR(regs);
 
        perf_fetch_caller_regs(regs);
        /* similar to bpf_perf_event_output_tp, but pt_regs fetched differently 
*/
-       return bpf_get_stackid((unsigned long) regs, (unsigned long) map,
-                              flags, 0, 0);
+       ret = bpf_get_stackid((unsigned long) regs, (unsigned long) map,
+                             flags, 0, 0);
+       put_bpf_raw_tp_regs();
+       return ret;
 }
 
 static const struct bpf_func_proto bpf_get_stackid_proto_raw_tp = {
@@ -818,11 +880,17 @@ static const struct bpf_func_proto 
bpf_get_stackid_proto_raw_tp = {
 BPF_CALL_4(bpf_get_stack_raw_tp, struct bpf_raw_tracepoint_args *, args,
           void *, buf, u32, size, u64, flags)
 {
-       struct pt_regs *regs = this_cpu_ptr(&bpf_raw_tp_regs);
+       struct pt_regs *regs = get_bpf_raw_tp_regs();
+       int ret;
+
+       if (IS_ERR(regs))
+               return PTR_ERR(regs);
 
        perf_fetch_caller_regs(regs);
-       return bpf_get_stack((unsigned long) regs, (unsigned long) buf,
-                            (unsigned long) size, flags, 0);
+       ret = bpf_get_stack((unsigned long) regs, (unsigned long) buf,
+                           (unsigned long) size, flags, 0);
+       put_bpf_raw_tp_regs();
+       return ret;
 }
 
 static const struct bpf_func_proto bpf_get_stack_proto_raw_tp = {
diff --git a/kernel/trace/trace_branch.c b/kernel/trace/trace_branch.c
index 3ea65cdff30d..4ad967453b6f 100644
--- a/kernel/trace/trace_branch.c
+++ b/kernel/trace/trace_branch.c
@@ -205,8 +205,6 @@ void trace_likely_condition(struct ftrace_likely_data *f, 
int val, int expect)
 void ftrace_likely_update(struct ftrace_likely_data *f, int val,
                          int expect, int is_constant)
 {
-       unsigned long flags = user_access_save();
-
        /* A constant is always correct */
        if (is_constant) {
                f->constant++;
@@ -225,8 +223,6 @@ void ftrace_likely_update(struct ftrace_likely_data *f, int 
val,
                f->data.correct++;
        else
                f->data.incorrect++;
-
-       user_access_restore(flags);
 }
 EXPORT_SYMBOL(ftrace_likely_update);
 
diff --git a/mm/hugetlb.c b/mm/hugetlb.c
index 65179513c2b2..57053affaad2 100644
--- a/mm/hugetlb.c
+++ b/mm/hugetlb.c
@@ -1489,16 +1489,29 @@ static int free_pool_huge_page(struct hstate *h, 
nodemask_t *nodes_allowed,
 
 /*
  * Dissolve a given free hugepage into free buddy pages. This function does
- * nothing for in-use (including surplus) hugepages. Returns -EBUSY if the
- * dissolution fails because a give page is not a free hugepage, or because
- * free hugepages are fully reserved.
+ * nothing for in-use hugepages and non-hugepages.
+ * This function returns values like below:
+ *
+ *  -EBUSY: failed to dissolved free hugepages or the hugepage is in-use
+ *          (allocated or reserved.)
+ *       0: successfully dissolved free hugepages or the page is not a
+ *          hugepage (considered as already dissolved)
  */
 int dissolve_free_huge_page(struct page *page)
 {
        int rc = -EBUSY;
 
+       /* Not to disrupt normal path by vainly holding hugetlb_lock */
+       if (!PageHuge(page))
+               return 0;
+
        spin_lock(&hugetlb_lock);
-       if (PageHuge(page) && !page_count(page)) {
+       if (!PageHuge(page)) {
+               rc = 0;
+               goto out;
+       }
+
+       if (!page_count(page)) {
                struct page *head = compound_head(page);
                struct hstate *h = page_hstate(head);
                int nid = page_to_nid(head);
@@ -1543,11 +1556,9 @@ int dissolve_free_huge_pages(unsigned long start_pfn, 
unsigned long end_pfn)
 
        for (pfn = start_pfn; pfn < end_pfn; pfn += 1 << minimum_order) {
                page = pfn_to_page(pfn);
-               if (PageHuge(page) && !page_count(page)) {
-                       rc = dissolve_free_huge_page(page);
-                       if (rc)
-                               break;
-               }
+               rc = dissolve_free_huge_page(page);
+               if (rc)
+                       break;
        }
 
        return rc;
diff --git a/mm/memory-failure.c b/mm/memory-failure.c
index 6edc6db5ec1b..2994ceb2e7b0 100644
--- a/mm/memory-failure.c
+++ b/mm/memory-failure.c
@@ -1731,6 +1731,8 @@ static int soft_offline_huge_page(struct page *page, int 
flags)
                if (!ret) {
                        if (set_hwpoison_free_buddy_page(page))
                                num_poisoned_pages_inc();
+                       else
+                               ret = -EBUSY;
                }
        }
        return ret;
@@ -1855,11 +1857,8 @@ static int soft_offline_in_use_page(struct page *page, 
int flags)
 
 static int soft_offline_free_page(struct page *page)
 {
-       int rc = 0;
-       struct page *head = compound_head(page);
+       int rc = dissolve_free_huge_page(page);
 
-       if (PageHuge(head))
-               rc = dissolve_free_huge_page(page);
        if (!rc) {
                if (set_hwpoison_free_buddy_page(page))
                        num_poisoned_pages_inc();
diff --git a/mm/mempolicy.c b/mm/mempolicy.c
index 360b24bc69e5..62f945ea3e36 100644
--- a/mm/mempolicy.c
+++ b/mm/mempolicy.c
@@ -306,7 +306,7 @@ static void mpol_rebind_nodemask(struct mempolicy *pol, 
const nodemask_t *nodes)
        else {
                nodes_remap(tmp, pol->v.nodes,pol->w.cpuset_mems_allowed,
                                                                *nodes);
-               pol->w.cpuset_mems_allowed = tmp;
+               pol->w.cpuset_mems_allowed = *nodes;
        }
 
        if (nodes_empty(tmp))
diff --git a/mm/page_idle.c b/mm/page_idle.c
index 6302bc62c27d..52ed59bbc275 100644
--- a/mm/page_idle.c
+++ b/mm/page_idle.c
@@ -136,7 +136,7 @@ static ssize_t page_idle_bitmap_read(struct file *file, 
struct kobject *kobj,
 
        end_pfn = pfn + count * BITS_PER_BYTE;
        if (end_pfn > max_pfn)
-               end_pfn = ALIGN(max_pfn, BITMAP_CHUNK_BITS);
+               end_pfn = max_pfn;
 
        for (; pfn < end_pfn; pfn++) {
                bit = pfn % BITMAP_CHUNK_BITS;
@@ -181,7 +181,7 @@ static ssize_t page_idle_bitmap_write(struct file *file, 
struct kobject *kobj,
 
        end_pfn = pfn + count * BITS_PER_BYTE;
        if (end_pfn > max_pfn)
-               end_pfn = ALIGN(max_pfn, BITMAP_CHUNK_BITS);
+               end_pfn = max_pfn;
 
        for (; pfn < end_pfn; pfn++) {
                bit = pfn % BITMAP_CHUNK_BITS;
diff --git a/net/9p/client.c b/net/9p/client.c
index 23ec6187dc07..b615aae5a0f8 100644
--- a/net/9p/client.c
+++ b/net/9p/client.c
@@ -237,144 +237,170 @@ static int parse_opts(char *opts, struct p9_client 
*clnt)
        return ret;
 }
 
-static struct p9_fcall *p9_fcall_alloc(int alloc_msize)
+static int p9_fcall_init(struct p9_client *c, struct p9_fcall *fc,
+                        int alloc_msize)
 {
-       struct p9_fcall *fc;
-       fc = kmalloc(sizeof(struct p9_fcall) + alloc_msize, GFP_NOFS);
-       if (!fc)
-               return NULL;
+       if (likely(c->fcall_cache) && alloc_msize == c->msize) {
+               fc->sdata = kmem_cache_alloc(c->fcall_cache, GFP_NOFS);
+               fc->cache = c->fcall_cache;
+       } else {
+               fc->sdata = kmalloc(alloc_msize, GFP_NOFS);
+               fc->cache = NULL;
+       }
+       if (!fc->sdata)
+               return -ENOMEM;
        fc->capacity = alloc_msize;
-       fc->sdata = (char *) fc + sizeof(struct p9_fcall);
-       return fc;
+       return 0;
+}
+
+void p9_fcall_fini(struct p9_fcall *fc)
+{
+       /* sdata can be NULL for interrupted requests in trans_rdma,
+        * and kmem_cache_free does not do NULL-check for us
+        */
+       if (unlikely(!fc->sdata))
+               return;
+
+       if (fc->cache)
+               kmem_cache_free(fc->cache, fc->sdata);
+       else
+               kfree(fc->sdata);
 }
+EXPORT_SYMBOL(p9_fcall_fini);
+
+static struct kmem_cache *p9_req_cache;
 
 /**
- * p9_tag_alloc - lookup/allocate a request by tag
- * @c: client session to lookup tag within
- * @tag: numeric id for transaction
- *
- * this is a simple array lookup, but will grow the
- * request_slots as necessary to accommodate transaction
- * ids which did not previously have a slot.
- *
- * this code relies on the client spinlock to manage locks, its
- * possible we should switch to something else, but I'd rather
- * stick with something low-overhead for the common case.
+ * p9_req_alloc - Allocate a new request.
+ * @c: Client session.
+ * @type: Transaction type.
+ * @max_size: Maximum packet size for this request.
  *
+ * Context: Process context.
+ * Return: Pointer to new request.
  */
-
 static struct p9_req_t *
-p9_tag_alloc(struct p9_client *c, u16 tag, unsigned int max_size)
+p9_tag_alloc(struct p9_client *c, int8_t type, unsigned int max_size)
 {
-       unsigned long flags;
-       int row, col;
-       struct p9_req_t *req;
+       struct p9_req_t *req = kmem_cache_alloc(p9_req_cache, GFP_NOFS);
        int alloc_msize = min(c->msize, max_size);
+       int tag;
 
-       /* This looks up the original request by tag so we know which
-        * buffer to read the data into */
-       tag++;
-
-       if (tag >= c->max_tag) {
-               spin_lock_irqsave(&c->lock, flags);
-               /* check again since original check was outside of lock */
-               while (tag >= c->max_tag) {
-                       row = (tag / P9_ROW_MAXTAG);
-                       c->reqs[row] = kcalloc(P9_ROW_MAXTAG,
-                                       sizeof(struct p9_req_t), GFP_ATOMIC);
-
-                       if (!c->reqs[row]) {
-                               pr_err("Couldn't grow tag array\n");
-                               spin_unlock_irqrestore(&c->lock, flags);
-                               return ERR_PTR(-ENOMEM);
-                       }
-                       for (col = 0; col < P9_ROW_MAXTAG; col++) {
-                               req = &c->reqs[row][col];
-                               req->status = REQ_STATUS_IDLE;
-                               init_waitqueue_head(&req->wq);
-                       }
-                       c->max_tag += P9_ROW_MAXTAG;
-               }
-               spin_unlock_irqrestore(&c->lock, flags);
-       }
-       row = tag / P9_ROW_MAXTAG;
-       col = tag % P9_ROW_MAXTAG;
-
-       req = &c->reqs[row][col];
-       if (!req->tc)
-               req->tc = p9_fcall_alloc(alloc_msize);
-       if (!req->rc)
-               req->rc = p9_fcall_alloc(alloc_msize);
-       if (!req->tc || !req->rc)
-               goto grow_failed;
+       if (!req)
+               return ERR_PTR(-ENOMEM);
 
-       p9pdu_reset(req->tc);
-       p9pdu_reset(req->rc);
+       if (p9_fcall_init(c, &req->tc, alloc_msize))
+               goto free_req;
+       if (p9_fcall_init(c, &req->rc, alloc_msize))
+               goto free;
 
-       req->tc->tag = tag-1;
+       p9pdu_reset(&req->tc);
+       p9pdu_reset(&req->rc);
        req->status = REQ_STATUS_ALLOC;
+       init_waitqueue_head(&req->wq);
+       INIT_LIST_HEAD(&req->req_list);
+
+       idr_preload(GFP_NOFS);
+       spin_lock_irq(&c->lock);
+       if (type == P9_TVERSION)
+               tag = idr_alloc(&c->reqs, req, P9_NOTAG, P9_NOTAG + 1,
+                               GFP_NOWAIT);
+       else
+               tag = idr_alloc(&c->reqs, req, 0, P9_NOTAG, GFP_NOWAIT);
+       req->tc.tag = tag;
+       spin_unlock_irq(&c->lock);
+       idr_preload_end();
+       if (tag < 0)
+               goto free;
+
+       /* Init ref to two because in the general case there is one ref
+        * that is put asynchronously by a writer thread, one ref
+        * temporarily given by p9_tag_lookup and put by p9_client_cb
+        * in the recv thread, and one ref put by p9_tag_remove in the
+        * main thread. The only exception is virtio that does not use
+        * p9_tag_lookup but does not have a writer thread either
+        * (the write happens synchronously in the request/zc_request
+        * callback), so p9_client_cb eats the second ref there
+        * as the pointer is duplicated directly by virtqueue_add_sgs()
+        */
+       refcount_set(&req->refcount.refcount, 2);
 
        return req;
 
-grow_failed:
-       pr_err("Couldn't grow tag array\n");
-       kfree(req->tc);
-       kfree(req->rc);
-       req->tc = req->rc = NULL;
+free:
+       p9_fcall_fini(&req->tc);
+       p9_fcall_fini(&req->rc);
+free_req:
+       kmem_cache_free(p9_req_cache, req);
        return ERR_PTR(-ENOMEM);
 }
 
 /**
- * p9_tag_lookup - lookup a request by tag
- * @c: client session to lookup tag within
- * @tag: numeric id for transaction
+ * p9_tag_lookup - Look up a request by tag.
+ * @c: Client session.
+ * @tag: Transaction ID.
  *
+ * Context: Any context.
+ * Return: A request, or %NULL if there is no request with that tag.
  */
-
 struct p9_req_t *p9_tag_lookup(struct p9_client *c, u16 tag)
 {
-       int row, col;
-
-       /* This looks up the original request by tag so we know which
-        * buffer to read the data into */
-       tag++;
-
-       if (tag >= c->max_tag)
-               return NULL;
+       struct p9_req_t *req;
 
-       row = tag / P9_ROW_MAXTAG;
-       col = tag % P9_ROW_MAXTAG;
+       rcu_read_lock();
+again:
+       req = idr_find(&c->reqs, tag);
+       if (req) {
+               /* We have to be careful with the req found under rcu_read_lock
+                * Thanks to SLAB_TYPESAFE_BY_RCU we can safely try to get the
+                * ref again without corrupting other data, then check again
+                * that the tag matches once we have the ref
+                */
+               if (!p9_req_try_get(req))
+                       goto again;
+               if (req->tc.tag != tag) {
+                       p9_req_put(req);
+                       goto again;
+               }
+       }
+       rcu_read_unlock();
 
-       return &c->reqs[row][col];
+       return req;
 }
 EXPORT_SYMBOL(p9_tag_lookup);
 
 /**
- * p9_tag_init - setup tags structure and contents
- * @c:  v9fs client struct
- *
- * This initializes the tags structure for each client instance.
+ * p9_tag_remove - Remove a tag.
+ * @c: Client session.
+ * @r: Request of reference.
  *
+ * Context: Any context.
  */
+static int p9_tag_remove(struct p9_client *c, struct p9_req_t *r)
+{
+       unsigned long flags;
+       u16 tag = r->tc.tag;
+
+       p9_debug(P9_DEBUG_MUX, "clnt %p req %p tag: %d\n", c, r, tag);
+       spin_lock_irqsave(&c->lock, flags);
+       idr_remove(&c->reqs, tag);
+       spin_unlock_irqrestore(&c->lock, flags);
+       return p9_req_put(r);
+}
 
-static int p9_tag_init(struct p9_client *c)
+static void p9_req_free(struct kref *ref)
 {
-       int err = 0;
+       struct p9_req_t *r = container_of(ref, struct p9_req_t, refcount);
+       p9_fcall_fini(&r->tc);
+       p9_fcall_fini(&r->rc);
+       kmem_cache_free(p9_req_cache, r);
+}
 
-       c->tagpool = p9_idpool_create();
-       if (IS_ERR(c->tagpool)) {
-               err = PTR_ERR(c->tagpool);
-               goto error;
-       }
-       err = p9_idpool_get(c->tagpool); /* reserve tag 0 */
-       if (err < 0) {
-               p9_idpool_destroy(c->tagpool);
-               goto error;
-       }
-       c->max_tag = 0;
-error:
-       return err;
+int p9_req_put(struct p9_req_t *r)
+{
+       return kref_put(&r->refcount, p9_req_free);
 }
+EXPORT_SYMBOL(p9_req_put);
 
 /**
  * p9_tag_cleanup - cleans up tags structure and reclaims resources
@@ -385,52 +411,17 @@ static int p9_tag_init(struct p9_client *c)
  */
 static void p9_tag_cleanup(struct p9_client *c)
 {
-       int row, col;
-
-       /* check to insure all requests are idle */
-       for (row = 0; row < (c->max_tag/P9_ROW_MAXTAG); row++) {
-               for (col = 0; col < P9_ROW_MAXTAG; col++) {
-                       if (c->reqs[row][col].status != REQ_STATUS_IDLE) {
-                               p9_debug(P9_DEBUG_MUX,
-                                        "Attempting to cleanup non-free tag 
%d,%d\n",
-                                        row, col);
-                               /* TODO: delay execution of cleanup */
-                               return;
-                       }
-               }
-       }
-
-       if (c->tagpool) {
-               p9_idpool_put(0, c->tagpool); /* free reserved tag 0 */
-               p9_idpool_destroy(c->tagpool);
-       }
+       struct p9_req_t *req;
+       int id;
 
-       /* free requests associated with tags */
-       for (row = 0; row < (c->max_tag/P9_ROW_MAXTAG); row++) {
-               for (col = 0; col < P9_ROW_MAXTAG; col++) {
-                       kfree(c->reqs[row][col].tc);
-                       kfree(c->reqs[row][col].rc);
-               }
-               kfree(c->reqs[row]);
+       rcu_read_lock();
+       idr_for_each_entry(&c->reqs, req, id) {
+               pr_info("Tag %d still in use\n", id);
+               if (p9_tag_remove(c, req) == 0)
+                       pr_warn("Packet with tag %d has still references",
+                               req->tc.tag);
        }
-       c->max_tag = 0;
-}
-
-/**
- * p9_free_req - free a request and clean-up as necessary
- * c: client state
- * r: request to release
- *
- */
-
-static void p9_free_req(struct p9_client *c, struct p9_req_t *r)
-{
-       int tag = r->tc->tag;
-       p9_debug(P9_DEBUG_MUX, "clnt %p req %p tag: %d\n", c, r, tag);
-
-       r->status = REQ_STATUS_IDLE;
-       if (tag != P9_NOTAG && p9_idpool_check(tag, c->tagpool))
-               p9_idpool_put(tag, c->tagpool);
+       rcu_read_unlock();
 }
 
 /**
@@ -441,7 +432,7 @@ static void p9_free_req(struct p9_client *c, struct 
p9_req_t *r)
  */
 void p9_client_cb(struct p9_client *c, struct p9_req_t *req, int status)
 {
-       p9_debug(P9_DEBUG_MUX, " tag %d\n", req->tc->tag);
+       p9_debug(P9_DEBUG_MUX, " tag %d\n", req->tc.tag);
 
        /*
         * This barrier is needed to make sure any change made to req before
@@ -451,7 +442,8 @@ void p9_client_cb(struct p9_client *c, struct p9_req_t 
*req, int status)
        req->status = status;
 
        wake_up(&req->wq);
-       p9_debug(P9_DEBUG_MUX, "wakeup: %d\n", req->tc->tag);
+       p9_debug(P9_DEBUG_MUX, "wakeup: %d\n", req->tc.tag);
+       p9_req_put(req);
 }
 EXPORT_SYMBOL(p9_client_cb);
 
@@ -522,18 +514,18 @@ static int p9_check_errors(struct p9_client *c, struct 
p9_req_t *req)
        int err;
        int ecode;
 
-       err = p9_parse_header(req->rc, NULL, &type, NULL, 0);
-       if (req->rc->size >= c->msize) {
+       err = p9_parse_header(&req->rc, NULL, &type, NULL, 0);
+       if (req->rc.size >= c->msize) {
                p9_debug(P9_DEBUG_ERROR,
                         "requested packet size too big: %d\n",
-                        req->rc->size);
+                        req->rc.size);
                return -EIO;
        }
        /*
         * dump the response from server
         * This should be after check errors which poplulate pdu_fcall.
         */
-       trace_9p_protocol_dump(c, req->rc);
+       trace_9p_protocol_dump(c, &req->rc);
        if (err) {
                p9_debug(P9_DEBUG_ERROR, "couldn't parse header %d\n", err);
                return err;
@@ -543,7 +535,7 @@ static int p9_check_errors(struct p9_client *c, struct 
p9_req_t *req)
 
        if (!p9_is_proto_dotl(c)) {
                char *ename;
-               err = p9pdu_readf(req->rc, c->proto_version, "s?d",
+               err = p9pdu_readf(&req->rc, c->proto_version, "s?d",
                                  &ename, &ecode);
                if (err)
                        goto out_err;
@@ -559,7 +551,7 @@ static int p9_check_errors(struct p9_client *c, struct 
p9_req_t *req)
                }
                kfree(ename);
        } else {
-               err = p9pdu_readf(req->rc, c->proto_version, "d", &ecode);
+               err = p9pdu_readf(&req->rc, c->proto_version, "d", &ecode);
                err = -ecode;
 
                p9_debug(P9_DEBUG_9P, "<<< RLERROR (%d)\n", -ecode);
@@ -593,12 +585,12 @@ static int p9_check_zc_errors(struct p9_client *c, struct 
p9_req_t *req,
        int8_t type;
        char *ename = NULL;
 
-       err = p9_parse_header(req->rc, NULL, &type, NULL, 0);
+       err = p9_parse_header(&req->rc, NULL, &type, NULL, 0);
        /*
         * dump the response from server
         * This should be after parse_header which poplulate pdu_fcall.
         */
-       trace_9p_protocol_dump(c, req->rc);
+       trace_9p_protocol_dump(c, &req->rc);
        if (err) {
                p9_debug(P9_DEBUG_ERROR, "couldn't parse header %d\n", err);
                return err;
@@ -613,13 +605,13 @@ static int p9_check_zc_errors(struct p9_client *c, struct 
p9_req_t *req,
                /* 7 = header size for RERROR; */
                int inline_len = in_hdrlen - 7;
 
-               len =  req->rc->size - req->rc->offset;
+               len = req->rc.size - req->rc.offset;
                if (len > (P9_ZC_HDR_SZ - 7)) {
                        err = -EFAULT;
                        goto out_err;
                }
 
-               ename = &req->rc->sdata[req->rc->offset];
+               ename = &req->rc.sdata[req->rc.offset];
                if (len > inline_len) {
                        /* We have error in external buffer */
                        if (!copy_from_iter_full(ename + inline_len,
@@ -629,7 +621,7 @@ static int p9_check_zc_errors(struct p9_client *c, struct 
p9_req_t *req,
                        }
                }
                ename = NULL;
-               err = p9pdu_readf(req->rc, c->proto_version, "s?d",
+               err = p9pdu_readf(&req->rc, c->proto_version, "s?d",
                                  &ename, &ecode);
                if (err)
                        goto out_err;
@@ -645,7 +637,7 @@ static int p9_check_zc_errors(struct p9_client *c, struct 
p9_req_t *req,
                }
                kfree(ename);
        } else {
-               err = p9pdu_readf(req->rc, c->proto_version, "d", &ecode);
+               err = p9pdu_readf(&req->rc, c->proto_version, "d", &ecode);
                err = -ecode;
 
                p9_debug(P9_DEBUG_9P, "<<< RLERROR (%d)\n", -ecode);
@@ -678,7 +670,7 @@ static int p9_client_flush(struct p9_client *c, struct 
p9_req_t *oldreq)
        int16_t oldtag;
        int err;
 
-       err = p9_parse_header(oldreq->tc, NULL, NULL, &oldtag, 1);
+       err = p9_parse_header(&oldreq->tc, NULL, NULL, &oldtag, 1);
        if (err)
                return err;
 
@@ -692,11 +684,12 @@ static int p9_client_flush(struct p9_client *c, struct 
p9_req_t *oldreq)
         * if we haven't received a response for oldreq,
         * remove it from the list
         */
-       if (oldreq->status == REQ_STATUS_SENT)
+       if (oldreq->status == REQ_STATUS_SENT) {
                if (c->trans_mod->cancelled)
                        c->trans_mod->cancelled(c, oldreq);
+       }
 
-       p9_free_req(c, req);
+       p9_tag_remove(c, req);
        return 0;
 }
 
@@ -704,7 +697,7 @@ static struct p9_req_t *p9_client_prepare_req(struct 
p9_client *c,
                                              int8_t type, int req_size,
                                              const char *fmt, va_list ap)
 {
-       int tag, err;
+       int err;
        struct p9_req_t *req;
 
        p9_debug(P9_DEBUG_MUX, "client %p op %d\n", c, type);
@@ -717,27 +710,22 @@ static struct p9_req_t *p9_client_prepare_req(struct 
p9_client *c,
        if ((c->status == BeginDisconnect) && (type != P9_TCLUNK))
                return ERR_PTR(-EIO);
 
-       tag = P9_NOTAG;
-       if (type != P9_TVERSION) {
-               tag = p9_idpool_get(c->tagpool);
-               if (tag < 0)
-                       return ERR_PTR(-ENOMEM);
-       }
-
-       req = p9_tag_alloc(c, tag, req_size);
+       req = p9_tag_alloc(c, type, req_size);
        if (IS_ERR(req))
                return req;
 
        /* marshall the data */
-       p9pdu_prepare(req->tc, tag, type);
-       err = p9pdu_vwritef(req->tc, c->proto_version, fmt, ap);
+       p9pdu_prepare(&req->tc, req->tc.tag, type);
+       err = p9pdu_vwritef(&req->tc, c->proto_version, fmt, ap);
        if (err)
                goto reterr;
-       p9pdu_finalize(c, req->tc);
-       trace_9p_client_req(c, type, tag);
+       p9pdu_finalize(c, &req->tc);
+       trace_9p_client_req(c, type, req->tc.tag);
        return req;
 reterr:
-       p9_free_req(c, req);
+       p9_tag_remove(c, req);
+       /* We have to put also the 2nd reference as it won't be used */
+       p9_req_put(req);
        return ERR_PTR(err);
 }
 
@@ -747,7 +735,7 @@ static struct p9_req_t *p9_client_prepare_req(struct 
p9_client *c,
  * @type: type of request
  * @fmt: protocol format string (see protocol.c)
  *
- * Returns request structure (which client must free using p9_free_req)
+ * Returns request structure (which client must free using p9_tag_remove)
  */
 
 static struct p9_req_t *
@@ -772,6 +760,8 @@ p9_client_rpc(struct p9_client *c, int8_t type, const char 
*fmt, ...)
 
        err = c->trans_mod->request(c, req);
        if (err < 0) {
+               /* write won't happen */
+               p9_req_put(req);
                if (err != -ERESTARTSYS && err != -EFAULT)
                        c->status = Disconnected;
                goto recalc_sigpending;
@@ -819,11 +809,11 @@ p9_client_rpc(struct p9_client *c, int8_t type, const 
char *fmt, ...)
                goto reterr;
 
        err = p9_check_errors(c, req);
-       trace_9p_client_res(c, type, req->rc->tag, err);
+       trace_9p_client_res(c, type, req->rc.tag, err);
        if (!err)
                return req;
 reterr:
-       p9_free_req(c, req);
+       p9_tag_remove(c, req);
        return ERR_PTR(safe_errno(err));
 }
 
@@ -838,7 +828,7 @@ p9_client_rpc(struct p9_client *c, int8_t type, const char 
*fmt, ...)
  * @hdrlen: reader header size, This is the size of response protocol data
  * @fmt: protocol format string (see protocol.c)
  *
- * Returns request structure (which client must free using p9_free_req)
+ * Returns request structure (which client must free using p9_tag_remove)
  */
 static struct p9_req_t *p9_client_zc_rpc(struct p9_client *c, int8_t type,
                                         struct iov_iter *uidata,
@@ -901,11 +891,11 @@ static struct p9_req_t *p9_client_zc_rpc(struct p9_client 
*c, int8_t type,
                goto reterr;
 
        err = p9_check_zc_errors(c, req, uidata, in_hdrlen);
-       trace_9p_client_res(c, type, req->rc->tag, err);
+       trace_9p_client_res(c, type, req->rc.tag, err);
        if (!err)
                return req;
 reterr:
-       p9_free_req(c, req);
+       p9_tag_remove(c, req);
        return ERR_PTR(safe_errno(err));
 }
 
@@ -984,10 +974,10 @@ static int p9_client_version(struct p9_client *c)
        if (IS_ERR(req))
                return PTR_ERR(req);
 
-       err = p9pdu_readf(req->rc, c->proto_version, "ds", &msize, &version);
+       err = p9pdu_readf(&req->rc, c->proto_version, "ds", &msize, &version);
        if (err) {
                p9_debug(P9_DEBUG_9P, "version error %d\n", err);
-               trace_9p_protocol_dump(c, req->rc);
+               trace_9p_protocol_dump(c, &req->rc);
                goto error;
        }
 
@@ -1016,7 +1006,7 @@ static int p9_client_version(struct p9_client *c)
 
 error:
        kfree(version);
-       p9_free_req(c, req);
+       p9_tag_remove(c, req);
 
        return err;
 }
@@ -1034,20 +1024,18 @@ struct p9_client *p9_client_create(const char 
*dev_name, char *options)
 
        clnt->trans_mod = NULL;
        clnt->trans = NULL;
+       clnt->fcall_cache = NULL;
 
        client_id = utsname()->nodename;
        memcpy(clnt->name, client_id, strlen(client_id) + 1);
 
        spin_lock_init(&clnt->lock);
        idr_init(&clnt->fids);
-
-       err = p9_tag_init(clnt);
-       if (err < 0)
-               goto free_client;
+       idr_init(&clnt->reqs);
 
        err = parse_opts(options, clnt);
        if (err < 0)
-               goto destroy_tagpool;
+               goto free_client;
 
        if (!clnt->trans_mod)
                clnt->trans_mod = v9fs_get_default_trans();
@@ -1056,7 +1044,7 @@ struct p9_client *p9_client_create(const char *dev_name, 
char *options)
                err = -EPROTONOSUPPORT;
                p9_debug(P9_DEBUG_ERROR,
                         "No transport defined or default transport\n");
-               goto destroy_tagpool;
+               goto free_client;
        }
 
        p9_debug(P9_DEBUG_MUX, "clnt %p trans %p msize %d protocol %d\n",
@@ -1080,14 +1068,21 @@ struct p9_client *p9_client_create(const char 
*dev_name, char *options)
        if (err)
                goto close_trans;
 
+       /* P9_HDRSZ + 4 is the smallest packet header we can have that is
+        * followed by data accessed from userspace by read
+        */
+       clnt->fcall_cache =
+               kmem_cache_create_usercopy("9p-fcall-cache", clnt->msize,
+                                          0, 0, P9_HDRSZ + 4,
+                                          clnt->msize - (P9_HDRSZ + 4),
+                                          NULL);
+
        return clnt;
 
 close_trans:
        clnt->trans_mod->close(clnt);
 put_trans:
        v9fs_put_trans(clnt->trans_mod);
-destroy_tagpool:
-       p9_idpool_destroy(clnt->tagpool);
 free_client:
        kfree(clnt);
        return ERR_PTR(err);
@@ -1113,6 +1108,7 @@ void p9_client_destroy(struct p9_client *clnt)
 
        p9_tag_cleanup(clnt);
 
+       kmem_cache_destroy(clnt->fcall_cache);
        kfree(clnt);
 }
 EXPORT_SYMBOL(p9_client_destroy);
@@ -1156,10 +1152,10 @@ struct p9_fid *p9_client_attach(struct p9_client *clnt, 
struct p9_fid *afid,
                goto error;
        }
 
-       err = p9pdu_readf(req->rc, clnt->proto_version, "Q", &qid);
+       err = p9pdu_readf(&req->rc, clnt->proto_version, "Q", &qid);
        if (err) {
-               trace_9p_protocol_dump(clnt, req->rc);
-               p9_free_req(clnt, req);
+               trace_9p_protocol_dump(clnt, &req->rc);
+               p9_tag_remove(clnt, req);
                goto error;
        }
 
@@ -1168,7 +1164,7 @@ struct p9_fid *p9_client_attach(struct p9_client *clnt, 
struct p9_fid *afid,
 
        memmove(&fid->qid, &qid, sizeof(struct p9_qid));
 
-       p9_free_req(clnt, req);
+       p9_tag_remove(clnt, req);
        return fid;
 
 error:
@@ -1213,13 +1209,13 @@ struct p9_fid *p9_client_walk(struct p9_fid *oldfid, 
uint16_t nwname,
                goto error;
        }
 
-       err = p9pdu_readf(req->rc, clnt->proto_version, "R", &nwqids, &wqids);
+       err = p9pdu_readf(&req->rc, clnt->proto_version, "R", &nwqids, &wqids);
        if (err) {
-               trace_9p_protocol_dump(clnt, req->rc);
-               p9_free_req(clnt, req);
+               trace_9p_protocol_dump(clnt, &req->rc);
+               p9_tag_remove(clnt, req);
                goto clunk_fid;
        }
-       p9_free_req(clnt, req);
+       p9_tag_remove(clnt, req);
 
        p9_debug(P9_DEBUG_9P, "<<< RWALK nwqid %d:\n", nwqids);
 
@@ -1280,9 +1276,9 @@ int p9_client_open(struct p9_fid *fid, int mode)
                goto error;
        }
 
-       err = p9pdu_readf(req->rc, clnt->proto_version, "Qd", &qid, &iounit);
+       err = p9pdu_readf(&req->rc, clnt->proto_version, "Qd", &qid, &iounit);
        if (err) {
-               trace_9p_protocol_dump(clnt, req->rc);
+               trace_9p_protocol_dump(clnt, &req->rc);
                goto free_and_error;
        }
 
@@ -1294,7 +1290,7 @@ int p9_client_open(struct p9_fid *fid, int mode)
        fid->iounit = iounit;
 
 free_and_error:
-       p9_free_req(clnt, req);
+       p9_tag_remove(clnt, req);
 error:
        return err;
 }
@@ -1324,9 +1320,9 @@ int p9_client_create_dotl(struct p9_fid *ofid, const char 
*name, u32 flags, u32
                goto error;
        }
 
-       err = p9pdu_readf(req->rc, clnt->proto_version, "Qd", qid, &iounit);
+       err = p9pdu_readf(&req->rc, clnt->proto_version, "Qd", qid, &iounit);
        if (err) {
-               trace_9p_protocol_dump(clnt, req->rc);
+               trace_9p_protocol_dump(clnt, &req->rc);
                goto free_and_error;
        }
 
@@ -1339,7 +1335,7 @@ int p9_client_create_dotl(struct p9_fid *ofid, const char 
*name, u32 flags, u32
        ofid->iounit = iounit;
 
 free_and_error:
-       p9_free_req(clnt, req);
+       p9_tag_remove(clnt, req);
 error:
        return err;
 }
@@ -1369,9 +1365,9 @@ int p9_client_fcreate(struct p9_fid *fid, const char 
*name, u32 perm, int mode,
                goto error;
        }
 
-       err = p9pdu_readf(req->rc, clnt->proto_version, "Qd", &qid, &iounit);
+       err = p9pdu_readf(&req->rc, clnt->proto_version, "Qd", &qid, &iounit);
        if (err) {
-               trace_9p_protocol_dump(clnt, req->rc);
+               trace_9p_protocol_dump(clnt, &req->rc);
                goto free_and_error;
        }
 
@@ -1384,7 +1380,7 @@ int p9_client_fcreate(struct p9_fid *fid, const char 
*name, u32 perm, int mode,
        fid->iounit = iounit;
 
 free_and_error:
-       p9_free_req(clnt, req);
+       p9_tag_remove(clnt, req);
 error:
        return err;
 }
@@ -1408,9 +1404,9 @@ int p9_client_symlink(struct p9_fid *dfid, const char 
*name,
                goto error;
        }
 
-       err = p9pdu_readf(req->rc, clnt->proto_version, "Q", qid);
+       err = p9pdu_readf(&req->rc, clnt->proto_version, "Q", qid);
        if (err) {
-               trace_9p_protocol_dump(clnt, req->rc);
+               trace_9p_protocol_dump(clnt, &req->rc);
                goto free_and_error;
        }
 
@@ -1418,7 +1414,7 @@ int p9_client_symlink(struct p9_fid *dfid, const char 
*name,
                        qid->type, (unsigned long long)qid->path, qid->version);
 
 free_and_error:
-       p9_free_req(clnt, req);
+       p9_tag_remove(clnt, req);
 error:
        return err;
 }
@@ -1438,7 +1434,7 @@ int p9_client_link(struct p9_fid *dfid, struct p9_fid 
*oldfid, const char *newna
                return PTR_ERR(req);
 
        p9_debug(P9_DEBUG_9P, "<<< RLINK\n");
-       p9_free_req(clnt, req);
+       p9_tag_remove(clnt, req);
        return 0;
 }
 EXPORT_SYMBOL(p9_client_link);
@@ -1462,7 +1458,7 @@ int p9_client_fsync(struct p9_fid *fid, int datasync)
 
        p9_debug(P9_DEBUG_9P, "<<< RFSYNC fid %d\n", fid->fid);
 
-       p9_free_req(clnt, req);
+       p9_tag_remove(clnt, req);
 
 error:
        return err;
@@ -1497,7 +1493,7 @@ int p9_client_clunk(struct p9_fid *fid)
 
        p9_debug(P9_DEBUG_9P, "<<< RCLUNK fid %d\n", fid->fid);
 
-       p9_free_req(clnt, req);
+       p9_tag_remove(clnt, req);
 error:
        /*
         * Fid is not valid even after a failed clunk
@@ -1531,7 +1527,7 @@ int p9_client_remove(struct p9_fid *fid)
 
        p9_debug(P9_DEBUG_9P, "<<< RREMOVE fid %d\n", fid->fid);
 
-       p9_free_req(clnt, req);
+       p9_tag_remove(clnt, req);
 error:
        if (err == -ERESTARTSYS)
                p9_client_clunk(fid);
@@ -1558,7 +1554,7 @@ int p9_client_unlinkat(struct p9_fid *dfid, const char 
*name, int flags)
        }
        p9_debug(P9_DEBUG_9P, "<<< RUNLINKAT fid %d %s\n", dfid->fid, name);
 
-       p9_free_req(clnt, req);
+       p9_tag_remove(clnt, req);
 error:
        return err;
 }
@@ -1606,11 +1602,11 @@ p9_client_read(struct p9_fid *fid, u64 offset, struct 
iov_iter *to, int *err)
                        break;
                }
 
-               *err = p9pdu_readf(req->rc, clnt->proto_version,
+               *err = p9pdu_readf(&req->rc, clnt->proto_version,
                                   "D", &count, &dataptr);
                if (*err) {
-                       trace_9p_protocol_dump(clnt, req->rc);
-                       p9_free_req(clnt, req);
+                       trace_9p_protocol_dump(clnt, &req->rc);
+                       p9_tag_remove(clnt, req);
                        break;
                }
                if (rsize < count) {
@@ -1620,7 +1616,7 @@ p9_client_read(struct p9_fid *fid, u64 offset, struct 
iov_iter *to, int *err)
 
                p9_debug(P9_DEBUG_9P, "<<< RREAD count %d\n", count);
                if (!count) {
-                       p9_free_req(clnt, req);
+                       p9_tag_remove(clnt, req);
                        break;
                }
 
@@ -1630,7 +1626,7 @@ p9_client_read(struct p9_fid *fid, u64 offset, struct 
iov_iter *to, int *err)
                        offset += n;
                        if (n != count) {
                                *err = -EFAULT;
-                               p9_free_req(clnt, req);
+                               p9_tag_remove(clnt, req);
                                break;
                        }
                } else {
@@ -1638,7 +1634,7 @@ p9_client_read(struct p9_fid *fid, u64 offset, struct 
iov_iter *to, int *err)
                        total += count;
                        offset += count;
                }
-               p9_free_req(clnt, req);
+               p9_tag_remove(clnt, req);
        }
        return total;
 }
@@ -1679,10 +1675,10 @@ p9_client_write(struct p9_fid *fid, u64 offset, struct 
iov_iter *from, int *err)
                        break;
                }
 
-               *err = p9pdu_readf(req->rc, clnt->proto_version, "d", &count);
+               *err = p9pdu_readf(&req->rc, clnt->proto_version, "d", &count);
                if (*err) {
-                       trace_9p_protocol_dump(clnt, req->rc);
-                       p9_free_req(clnt, req);
+                       trace_9p_protocol_dump(clnt, &req->rc);
+                       p9_tag_remove(clnt, req);
                        break;
                }
                if (rsize < count) {
@@ -1692,7 +1688,7 @@ p9_client_write(struct p9_fid *fid, u64 offset, struct 
iov_iter *from, int *err)
 
                p9_debug(P9_DEBUG_9P, "<<< RWRITE count %d\n", count);
 
-               p9_free_req(clnt, req);
+               p9_tag_remove(clnt, req);
                iov_iter_advance(from, count);
                total += count;
                offset += count;
@@ -1723,10 +1719,10 @@ struct p9_wstat *p9_client_stat(struct p9_fid *fid)
                goto error;
        }
 
-       err = p9pdu_readf(req->rc, clnt->proto_version, "wS", &ignored, ret);
+       err = p9pdu_readf(&req->rc, clnt->proto_version, "wS", &ignored, ret);
        if (err) {
-               trace_9p_protocol_dump(clnt, req->rc);
-               p9_free_req(clnt, req);
+               trace_9p_protocol_dump(clnt, &req->rc);
+               p9_tag_remove(clnt, req);
                goto error;
        }
 
@@ -1743,7 +1739,7 @@ struct p9_wstat *p9_client_stat(struct p9_fid *fid)
                from_kgid(&init_user_ns, ret->n_gid),
                from_kuid(&init_user_ns, ret->n_muid));
 
-       p9_free_req(clnt, req);
+       p9_tag_remove(clnt, req);
        return ret;
 
 error:
@@ -1776,10 +1772,10 @@ struct p9_stat_dotl *p9_client_getattr_dotl(struct 
p9_fid *fid,
                goto error;
        }
 
-       err = p9pdu_readf(req->rc, clnt->proto_version, "A", ret);
+       err = p9pdu_readf(&req->rc, clnt->proto_version, "A", ret);
        if (err) {
-               trace_9p_protocol_dump(clnt, req->rc);
-               p9_free_req(clnt, req);
+               trace_9p_protocol_dump(clnt, &req->rc);
+               p9_tag_remove(clnt, req);
                goto error;
        }
 
@@ -1804,7 +1800,7 @@ struct p9_stat_dotl *p9_client_getattr_dotl(struct p9_fid 
*fid,
                ret->st_ctime_nsec, ret->st_btime_sec, ret->st_btime_nsec,
                ret->st_gen, ret->st_data_version);
 
-       p9_free_req(clnt, req);
+       p9_tag_remove(clnt, req);
        return ret;
 
 error:
@@ -1873,7 +1869,7 @@ int p9_client_wstat(struct p9_fid *fid, struct p9_wstat 
*wst)
 
        p9_debug(P9_DEBUG_9P, "<<< RWSTAT fid %d\n", fid->fid);
 
-       p9_free_req(clnt, req);
+       p9_tag_remove(clnt, req);
 error:
        return err;
 }
@@ -1905,7 +1901,7 @@ int p9_client_setattr(struct p9_fid *fid, struct 
p9_iattr_dotl *p9attr)
                goto error;
        }
        p9_debug(P9_DEBUG_9P, "<<< RSETATTR fid %d\n", fid->fid);
-       p9_free_req(clnt, req);
+       p9_tag_remove(clnt, req);
 error:
        return err;
 }
@@ -1928,12 +1924,12 @@ int p9_client_statfs(struct p9_fid *fid, struct 
p9_rstatfs *sb)
                goto error;
        }
 
-       err = p9pdu_readf(req->rc, clnt->proto_version, "ddqqqqqqd", &sb->type,
-               &sb->bsize, &sb->blocks, &sb->bfree, &sb->bavail,
-               &sb->files, &sb->ffree, &sb->fsid, &sb->namelen);
+       err = p9pdu_readf(&req->rc, clnt->proto_version, "ddqqqqqqd", &sb->type,
+                         &sb->bsize, &sb->blocks, &sb->bfree, &sb->bavail,
+                         &sb->files, &sb->ffree, &sb->fsid, &sb->namelen);
        if (err) {
-               trace_9p_protocol_dump(clnt, req->rc);
-               p9_free_req(clnt, req);
+               trace_9p_protocol_dump(clnt, &req->rc);
+               p9_tag_remove(clnt, req);
                goto error;
        }
 
@@ -1944,7 +1940,7 @@ int p9_client_statfs(struct p9_fid *fid, struct 
p9_rstatfs *sb)
                sb->blocks, sb->bfree, sb->bavail, sb->files,  sb->ffree,
                sb->fsid, (long int)sb->namelen);
 
-       p9_free_req(clnt, req);
+       p9_tag_remove(clnt, req);
 error:
        return err;
 }
@@ -1972,7 +1968,7 @@ int p9_client_rename(struct p9_fid *fid,
 
        p9_debug(P9_DEBUG_9P, "<<< RRENAME fid %d\n", fid->fid);
 
-       p9_free_req(clnt, req);
+       p9_tag_remove(clnt, req);
 error:
        return err;
 }
@@ -2002,7 +1998,7 @@ int p9_client_renameat(struct p9_fid *olddirfid, const 
char *old_name,
        p9_debug(P9_DEBUG_9P, "<<< RRENAMEAT newdirfid %d new name %s\n",
                   newdirfid->fid, new_name);
 
-       p9_free_req(clnt, req);
+       p9_tag_remove(clnt, req);
 error:
        return err;
 }
@@ -2036,13 +2032,13 @@ struct p9_fid *p9_client_xattrwalk(struct p9_fid 
*file_fid,
                err = PTR_ERR(req);
                goto error;
        }
-       err = p9pdu_readf(req->rc, clnt->proto_version, "q", attr_size);
+       err = p9pdu_readf(&req->rc, clnt->proto_version, "q", attr_size);
        if (err) {
-               trace_9p_protocol_dump(clnt, req->rc);
-               p9_free_req(clnt, req);
+               trace_9p_protocol_dump(clnt, &req->rc);
+               p9_tag_remove(clnt, req);
                goto clunk_fid;
        }
-       p9_free_req(clnt, req);
+       p9_tag_remove(clnt, req);
        p9_debug(P9_DEBUG_9P, "<<<  RXATTRWALK fid %d size %llu\n",
                attr_fid->fid, *attr_size);
        return attr_fid;
@@ -2076,7 +2072,7 @@ int p9_client_xattrcreate(struct p9_fid *fid, const char 
*name,
                goto error;
        }
        p9_debug(P9_DEBUG_9P, "<<< RXATTRCREATE fid %d\n", fid->fid);
-       p9_free_req(clnt, req);
+       p9_tag_remove(clnt, req);
 error:
        return err;
 }
@@ -2124,9 +2120,9 @@ int p9_client_readdir(struct p9_fid *fid, char *data, u32 
count, u64 offset)
                goto error;
        }
 
-       err = p9pdu_readf(req->rc, clnt->proto_version, "D", &count, &dataptr);
+       err = p9pdu_readf(&req->rc, clnt->proto_version, "D", &count, &dataptr);
        if (err) {
-               trace_9p_protocol_dump(clnt, req->rc);
+               trace_9p_protocol_dump(clnt, &req->rc);
                goto free_and_error;
        }
        if (rsize < count) {
@@ -2139,11 +2135,11 @@ int p9_client_readdir(struct p9_fid *fid, char *data, 
u32 count, u64 offset)
        if (non_zc)
                memmove(data, dataptr, count);
 
-       p9_free_req(clnt, req);
+       p9_tag_remove(clnt, req);
        return count;
 
 free_and_error:
-       p9_free_req(clnt, req);
+       p9_tag_remove(clnt, req);
 error:
        return err;
 }
@@ -2165,16 +2161,16 @@ int p9_client_mknod_dotl(struct p9_fid *fid, const char 
*name, int mode,
        if (IS_ERR(req))
                return PTR_ERR(req);
 
-       err = p9pdu_readf(req->rc, clnt->proto_version, "Q", qid);
+       err = p9pdu_readf(&req->rc, clnt->proto_version, "Q", qid);
        if (err) {
-               trace_9p_protocol_dump(clnt, req->rc);
+               trace_9p_protocol_dump(clnt, &req->rc);
                goto error;
        }
        p9_debug(P9_DEBUG_9P, "<<< RMKNOD qid %x.%llx.%x\n", qid->type,
                                (unsigned long long)qid->path, qid->version);
 
 error:
-       p9_free_req(clnt, req);
+       p9_tag_remove(clnt, req);
        return err;
 
 }
@@ -2196,16 +2192,16 @@ int p9_client_mkdir_dotl(struct p9_fid *fid, const char 
*name, int mode,
        if (IS_ERR(req))
                return PTR_ERR(req);
 
-       err = p9pdu_readf(req->rc, clnt->proto_version, "Q", qid);
+       err = p9pdu_readf(&req->rc, clnt->proto_version, "Q", qid);
        if (err) {
-               trace_9p_protocol_dump(clnt, req->rc);
+               trace_9p_protocol_dump(clnt, &req->rc);
                goto error;
        }
        p9_debug(P9_DEBUG_9P, "<<< RMKDIR qid %x.%llx.%x\n", qid->type,
                                (unsigned long long)qid->path, qid->version);
 
 error:
-       p9_free_req(clnt, req);
+       p9_tag_remove(clnt, req);
        return err;
 
 }
@@ -2231,14 +2227,14 @@ int p9_client_lock_dotl(struct p9_fid *fid, struct 
p9_flock *flock, u8 *status)
        if (IS_ERR(req))
                return PTR_ERR(req);
 
-       err = p9pdu_readf(req->rc, clnt->proto_version, "b", status);
+       err = p9pdu_readf(&req->rc, clnt->proto_version, "b", status);
        if (err) {
-               trace_9p_protocol_dump(clnt, req->rc);
+               trace_9p_protocol_dump(clnt, &req->rc);
                goto error;
        }
        p9_debug(P9_DEBUG_9P, "<<< RLOCK status %i\n", *status);
 error:
-       p9_free_req(clnt, req);
+       p9_tag_remove(clnt, req);
        return err;
 
 }
@@ -2262,18 +2258,18 @@ int p9_client_getlock_dotl(struct p9_fid *fid, struct 
p9_getlock *glock)
        if (IS_ERR(req))
                return PTR_ERR(req);
 
-       err = p9pdu_readf(req->rc, clnt->proto_version, "bqqds", &glock->type,
-                       &glock->start, &glock->length, &glock->proc_id,
-                       &glock->client_id);
+       err = p9pdu_readf(&req->rc, clnt->proto_version, "bqqds", &glock->type,
+                         &glock->start, &glock->length, &glock->proc_id,
+                         &glock->client_id);
        if (err) {
-               trace_9p_protocol_dump(clnt, req->rc);
+               trace_9p_protocol_dump(clnt, &req->rc);
                goto error;
        }
        p9_debug(P9_DEBUG_9P, "<<< RGETLOCK type %i start %lld length %lld "
                "proc_id %d client_id %s\n", glock->type, glock->start,
                glock->length, glock->proc_id, glock->client_id);
 error:
-       p9_free_req(clnt, req);
+       p9_tag_remove(clnt, req);
        return err;
 }
 EXPORT_SYMBOL(p9_client_getlock_dotl);
@@ -2292,14 +2288,25 @@ int p9_client_readlink(struct p9_fid *fid, char 
**target)
        if (IS_ERR(req))
                return PTR_ERR(req);
 
-       err = p9pdu_readf(req->rc, clnt->proto_version, "s", target);
+       err = p9pdu_readf(&req->rc, clnt->proto_version, "s", target);
        if (err) {
-               trace_9p_protocol_dump(clnt, req->rc);
+               trace_9p_protocol_dump(clnt, &req->rc);
                goto error;
        }
        p9_debug(P9_DEBUG_9P, "<<< RREADLINK target %s\n", *target);
 error:
-       p9_free_req(clnt, req);
+       p9_tag_remove(clnt, req);
        return err;
 }
 EXPORT_SYMBOL(p9_client_readlink);
+
+int __init p9_client_init(void)
+{
+       p9_req_cache = KMEM_CACHE(p9_req_t, SLAB_TYPESAFE_BY_RCU);
+       return p9_req_cache ? 0 : -ENOMEM;
+}
+
+void __exit p9_client_exit(void)
+{
+       kmem_cache_destroy(p9_req_cache);
+}
diff --git a/net/9p/mod.c b/net/9p/mod.c
index 253ba824a325..0da56d6af73b 100644
--- a/net/9p/mod.c
+++ b/net/9p/mod.c
@@ -171,11 +171,17 @@ void v9fs_put_trans(struct p9_trans_module *m)
  */
 static int __init init_p9(void)
 {
+       int ret;
+
+       ret = p9_client_init();
+       if (ret)
+               return ret;
+
        p9_error_init();
        pr_info("Installing 9P2000 support\n");
        p9_trans_fd_init();
 
-       return 0;
+       return ret;
 }
 
 /**
@@ -188,6 +194,7 @@ static void __exit exit_p9(void)
        pr_info("Unloading 9P2000 support\n");
 
        p9_trans_fd_exit();
+       p9_client_exit();
 }
 
 module_init(init_p9)
diff --git a/net/9p/protocol.c b/net/9p/protocol.c
index b4d80c533f89..462ba144cb39 100644
--- a/net/9p/protocol.c
+++ b/net/9p/protocol.c
@@ -623,13 +623,19 @@ int p9dirent_read(struct p9_client *clnt, char *buf, int 
len,
        if (ret) {
                p9_debug(P9_DEBUG_9P, "<<< p9dirent_read failed: %d\n", ret);
                trace_9p_protocol_dump(clnt, &fake_pdu);
-               goto out;
+               return ret;
        }
 
-       strcpy(dirent->d_name, nameptr);
+       ret = strscpy(dirent->d_name, nameptr, sizeof(dirent->d_name));
+       if (ret < 0) {
+               p9_debug(P9_DEBUG_ERROR,
+                        "On the wire dirent name too long: %s\n",
+                        nameptr);
+               kfree(nameptr);
+               return ret;
+       }
        kfree(nameptr);
 
-out:
        return fake_pdu.offset;
 }
 EXPORT_SYMBOL(p9dirent_read);
diff --git a/net/9p/trans_common.c b/net/9p/trans_common.c
index b718db2085b2..3dff68f05fb9 100644
--- a/net/9p/trans_common.c
+++ b/net/9p/trans_common.c
@@ -14,6 +14,7 @@
 
 #include <linux/mm.h>
 #include <linux/module.h>
+#include "trans_common.h"
 
 /**
  *  p9_release_pages - Release pages after the transaction.
diff --git a/net/9p/trans_fd.c b/net/9p/trans_fd.c
index e2ef3c782c53..f868cf6fba79 100644
--- a/net/9p/trans_fd.c
+++ b/net/9p/trans_fd.c
@@ -131,7 +131,8 @@ struct p9_conn {
        int err;
        struct list_head req_list;
        struct list_head unsent_req_list;
-       struct p9_req_t *req;
+       struct p9_req_t *rreq;
+       struct p9_req_t *wreq;
        char tmp_buf[7];
        struct p9_fcall rc;
        int wpos;
@@ -291,7 +292,6 @@ static void p9_read_work(struct work_struct *work)
        __poll_t n;
        int err;
        struct p9_conn *m;
-       int status = REQ_STATUS_ERROR;
 
        m = container_of(work, struct p9_conn, rq);
 
@@ -322,7 +322,7 @@ static void p9_read_work(struct work_struct *work)
        m->rc.offset += err;
 
        /* header read in */
-       if ((!m->req) && (m->rc.offset == m->rc.capacity)) {
+       if ((!m->rreq) && (m->rc.offset == m->rc.capacity)) {
                p9_debug(P9_DEBUG_TRANS, "got new header\n");
 
                /* Header size */
@@ -346,23 +346,23 @@ static void p9_read_work(struct work_struct *work)
                         "mux %p pkt: size: %d bytes tag: %d\n",
                         m, m->rc.size, m->rc.tag);
 
-               m->req = p9_tag_lookup(m->client, m->rc.tag);
-               if (!m->req || (m->req->status != REQ_STATUS_SENT)) {
+               m->rreq = p9_tag_lookup(m->client, m->rc.tag);
+               if (!m->rreq || (m->rreq->status != REQ_STATUS_SENT)) {
                        p9_debug(P9_DEBUG_ERROR, "Unexpected packet tag %d\n",
                                 m->rc.tag);
                        err = -EIO;
                        goto error;
                }
 
-               if (m->req->rc == NULL) {
+               if (!m->rreq->rc.sdata) {
                        p9_debug(P9_DEBUG_ERROR,
                                 "No recv fcall for tag %d (req %p), 
disconnecting!\n",
-                                m->rc.tag, m->req);
-                       m->req = NULL;
+                                m->rc.tag, m->rreq);
+                       m->rreq = NULL;
                        err = -EIO;
                        goto error;
                }
-               m->rc.sdata = (char *)m->req->rc + sizeof(struct p9_fcall);
+               m->rc.sdata = m->rreq->rc.sdata;
                memcpy(m->rc.sdata, m->tmp_buf, m->rc.capacity);
                m->rc.capacity = m->rc.size;
        }
@@ -370,20 +370,27 @@ static void p9_read_work(struct work_struct *work)
        /* packet is read in
         * not an else because some packets (like clunk) have no payload
         */
-       if ((m->req) && (m->rc.offset == m->rc.capacity)) {
+       if ((m->rreq) && (m->rc.offset == m->rc.capacity)) {
                p9_debug(P9_DEBUG_TRANS, "got new packet\n");
-               m->req->rc->size = m->rc.offset;
+               m->rreq->rc.size = m->rc.offset;
                spin_lock(&m->client->lock);
-               if (m->req->status != REQ_STATUS_ERROR)
-                       status = REQ_STATUS_RCVD;
-               list_del(&m->req->req_list);
-               /* update req->status while holding client->lock  */
-               p9_client_cb(m->client, m->req, status);
+               if (m->rreq->status == REQ_STATUS_SENT) {
+                       list_del(&m->rreq->req_list);
+                       p9_client_cb(m->client, m->rreq, REQ_STATUS_RCVD);
+               } else {
+                       spin_unlock(&m->client->lock);
+                       p9_debug(P9_DEBUG_ERROR,
+                                "Request tag %d errored out while we were 
reading the reply\n",
+                                m->rc.tag);
+                       err = -EIO;
+                       goto error;
+               }
                spin_unlock(&m->client->lock);
                m->rc.sdata = NULL;
                m->rc.offset = 0;
                m->rc.capacity = 0;
-               m->req = NULL;
+               p9_req_put(m->rreq);
+               m->rreq = NULL;
        }
 
 end_clear:
@@ -469,9 +476,11 @@ static void p9_write_work(struct work_struct *work)
                p9_debug(P9_DEBUG_TRANS, "move req %p\n", req);
                list_move_tail(&req->req_list, &m->req_list);
 
-               m->wbuf = req->tc->sdata;
-               m->wsize = req->tc->size;
+               m->wbuf = req->tc.sdata;
+               m->wsize = req->tc.size;
                m->wpos = 0;
+               p9_req_get(req);
+               m->wreq = req;
                spin_unlock(&m->client->lock);
        }
 
@@ -492,8 +501,11 @@ static void p9_write_work(struct work_struct *work)
        }
 
        m->wpos += err;
-       if (m->wpos == m->wsize)
+       if (m->wpos == m->wsize) {
                m->wpos = m->wsize = 0;
+               p9_req_put(m->wreq);
+               m->wreq = NULL;
+       }
 
 end_clear:
        clear_bit(Wworksched, &m->wsched);
@@ -663,7 +675,7 @@ static int p9_fd_request(struct p9_client *client, struct 
p9_req_t *req)
        struct p9_conn *m = &ts->conn;
 
        p9_debug(P9_DEBUG_TRANS, "mux %p task %p tcall %p id %d\n",
-                m, current, req->tc, req->tc->id);
+                m, current, &req->tc, req->tc.id);
        if (m->err < 0)
                return m->err;
 
@@ -694,6 +706,7 @@ static int p9_fd_cancel(struct p9_client *client, struct 
p9_req_t *req)
        if (req->status == REQ_STATUS_UNSENT) {
                list_del(&req->req_list);
                req->status = REQ_STATUS_FLSHD;
+               p9_req_put(req);
                ret = 0;
        }
        spin_unlock(&client->lock);
@@ -711,6 +724,7 @@ static int p9_fd_cancelled(struct p9_client *client, struct 
p9_req_t *req)
        spin_lock(&client->lock);
        list_del(&req->req_list);
        spin_unlock(&client->lock);
+       p9_req_put(req);
 
        return 0;
 }
@@ -862,7 +876,15 @@ static void p9_conn_destroy(struct p9_conn *m)
 
        p9_mux_poll_stop(m);
        cancel_work_sync(&m->rq);
+       if (m->rreq) {
+               p9_req_put(m->rreq);
+               m->rreq = NULL;
+       }
        cancel_work_sync(&m->wq);
+       if (m->wreq) {
+               p9_req_put(m->wreq);
+               m->wreq = NULL;
+       }
 
        p9_conn_cancel(m, -ECONNRESET);
 
diff --git a/net/9p/trans_rdma.c b/net/9p/trans_rdma.c
index b513cffeeb3c..119103bfa82e 100644
--- a/net/9p/trans_rdma.c
+++ b/net/9p/trans_rdma.c
@@ -122,7 +122,7 @@ struct p9_rdma_context {
        dma_addr_t busa;
        union {
                struct p9_req_t *req;
-               struct p9_fcall *rc;
+               struct p9_fcall rc;
        };
 };
 
@@ -274,8 +274,7 @@ p9_cm_event_handler(struct rdma_cm_id *id, struct 
rdma_cm_event *event)
        case RDMA_CM_EVENT_DISCONNECTED:
                if (rdma)
                        rdma->state = P9_RDMA_CLOSED;
-               if (c)
-                       c->status = Disconnected;
+               c->status = Disconnected;
                break;
 
        case RDMA_CM_EVENT_TIMEWAIT_EXIT:
@@ -320,8 +319,8 @@ recv_done(struct ib_cq *cq, struct ib_wc *wc)
        if (wc->status != IB_WC_SUCCESS)
                goto err_out;
 
-       c->rc->size = wc->byte_len;
-       err = p9_parse_header(c->rc, NULL, NULL, &tag, 1);
+       c->rc.size = wc->byte_len;
+       err = p9_parse_header(&c->rc, NULL, NULL, &tag, 1);
        if (err)
                goto err_out;
 
@@ -331,12 +330,13 @@ recv_done(struct ib_cq *cq, struct ib_wc *wc)
 
        /* Check that we have not yet received a reply for this request.
         */
-       if (unlikely(req->rc)) {
+       if (unlikely(req->rc.sdata)) {
                pr_err("Duplicate reply for request %d", tag);
                goto err_out;
        }
 
-       req->rc = c->rc;
+       req->rc.size = c->rc.size;
+       req->rc.sdata = c->rc.sdata;
        p9_client_cb(client, req, REQ_STATUS_RCVD);
 
  out:
@@ -361,9 +361,10 @@ send_done(struct ib_cq *cq, struct ib_wc *wc)
                container_of(wc->wr_cqe, struct p9_rdma_context, cqe);
 
        ib_dma_unmap_single(rdma->cm_id->device,
-                           c->busa, c->req->tc->size,
+                           c->busa, c->req->tc.size,
                            DMA_TO_DEVICE);
        up(&rdma->sq_sem);
+       p9_req_put(c->req);
        kfree(c);
 }
 
@@ -401,7 +402,7 @@ post_recv(struct p9_client *client, struct p9_rdma_context 
*c)
        struct ib_sge sge;
 
        c->busa = ib_dma_map_single(rdma->cm_id->device,
-                                   c->rc->sdata, client->msize,
+                                   c->rc.sdata, client->msize,
                                    DMA_FROM_DEVICE);
        if (ib_dma_mapping_error(rdma->cm_id->device, c->busa))
                goto error;
@@ -443,9 +444,9 @@ static int rdma_request(struct p9_client *client, struct 
p9_req_t *req)
         **/
        if (unlikely(atomic_read(&rdma->excess_rc) > 0)) {
                if ((atomic_sub_return(1, &rdma->excess_rc) >= 0)) {
-                       /* Got one ! */
-                       kfree(req->rc);
-                       req->rc = NULL;
+                       /* Got one! */
+                       p9_fcall_fini(&req->rc);
+                       req->rc.sdata = NULL;
                        goto dont_need_post_recv;
                } else {
                        /* We raced and lost. */
@@ -459,7 +460,7 @@ static int rdma_request(struct p9_client *client, struct 
p9_req_t *req)
                err = -ENOMEM;
                goto recv_error;
        }
-       rpl_context->rc = req->rc;
+       rpl_context->rc.sdata = req->rc.sdata;
 
        /*
         * Post a receive buffer for this request. We need to ensure
@@ -475,11 +476,11 @@ static int rdma_request(struct p9_client *client, struct 
p9_req_t *req)
 
        err = post_recv(client, rpl_context);
        if (err) {
-               p9_debug(P9_DEBUG_FCALL, "POST RECV failed\n");
+               p9_debug(P9_DEBUG_ERROR, "POST RECV failed: %d\n", err);
                goto recv_error;
        }
        /* remove posted receive buffer from request structure */
-       req->rc = NULL;
+       req->rc.sdata = NULL;
 
 dont_need_post_recv:
        /* Post the request */
@@ -491,7 +492,7 @@ static int rdma_request(struct p9_client *client, struct 
p9_req_t *req)
        c->req = req;
 
        c->busa = ib_dma_map_single(rdma->cm_id->device,
-                                   c->req->tc->sdata, c->req->tc->size,
+                                   c->req->tc.sdata, c->req->tc.size,
                                    DMA_TO_DEVICE);
        if (ib_dma_mapping_error(rdma->cm_id->device, c->busa)) {
                err = -EIO;
@@ -501,7 +502,7 @@ static int rdma_request(struct p9_client *client, struct 
p9_req_t *req)
        c->cqe.done = send_done;
 
        sge.addr = c->busa;
-       sge.length = c->req->tc->size;
+       sge.length = c->req->tc.size;
        sge.lkey = rdma->pd->local_dma_lkey;
 
        wr.next = NULL;
@@ -544,7 +545,7 @@ static int rdma_request(struct p9_client *client, struct 
p9_req_t *req)
  recv_error:
        kfree(rpl_context);
        spin_lock_irqsave(&rdma->req_lock, flags);
-       if (rdma->state < P9_RDMA_CLOSING) {
+       if (err != -EINTR && rdma->state < P9_RDMA_CLOSING) {
                rdma->state = P9_RDMA_CLOSING;
                spin_unlock_irqrestore(&rdma->req_lock, flags);
                rdma_disconnect(rdma->cm_id);
diff --git a/net/9p/trans_virtio.c b/net/9p/trans_virtio.c
index 7728b0acde09..eb596c2ed546 100644
--- a/net/9p/trans_virtio.c
+++ b/net/9p/trans_virtio.c
@@ -155,7 +155,7 @@ static void req_done(struct virtqueue *vq)
                }
 
                if (len) {
-                       req->rc->size = len;
+                       req->rc.size = len;
                        p9_client_cb(chan->client, req, REQ_STATUS_RCVD);
                }
        }
@@ -207,6 +207,13 @@ static int p9_virtio_cancel(struct p9_client *client, 
struct p9_req_t *req)
        return 1;
 }
 
+/* Reply won't come, so drop req ref */
+static int p9_virtio_cancelled(struct p9_client *client, struct p9_req_t *req)
+{
+       p9_req_put(req);
+       return 0;
+}
+
 /**
  * pack_sg_list_p - Just like pack_sg_list. Instead of taking a buffer,
  * this takes a list of pages.
@@ -273,12 +280,12 @@ p9_virtio_request(struct p9_client *client, struct 
p9_req_t *req)
        out_sgs = in_sgs = 0;
        /* Handle out VirtIO ring buffers */
        out = pack_sg_list(chan->sg, 0,
-                          VIRTQUEUE_NUM, req->tc->sdata, req->tc->size);
+                          VIRTQUEUE_NUM, req->tc.sdata, req->tc.size);
        if (out)
                sgs[out_sgs++] = chan->sg;
 
        in = pack_sg_list(chan->sg, out,
-                         VIRTQUEUE_NUM, req->rc->sdata, req->rc->capacity);
+                         VIRTQUEUE_NUM, req->rc.sdata, req->rc.capacity);
        if (in)
                sgs[out_sgs + in_sgs++] = chan->sg + out;
 
@@ -404,6 +411,7 @@ p9_virtio_zc_request(struct p9_client *client, struct 
p9_req_t *req,
        struct scatterlist *sgs[4];
        size_t offs;
        int need_drop = 0;
+       int kicked = 0;
 
        p9_debug(P9_DEBUG_TRANS, "virtio request\n");
 
@@ -411,29 +419,33 @@ p9_virtio_zc_request(struct p9_client *client, struct 
p9_req_t *req,
                __le32 sz;
                int n = p9_get_mapped_pages(chan, &out_pages, uodata,
                                            outlen, &offs, &need_drop);
-               if (n < 0)
-                       return n;
+               if (n < 0) {
+                       err = n;
+                       goto err_out;
+               }
                out_nr_pages = DIV_ROUND_UP(n + offs, PAGE_SIZE);
                if (n != outlen) {
                        __le32 v = cpu_to_le32(n);
-                       memcpy(&req->tc->sdata[req->tc->size - 4], &v, 4);
+                       memcpy(&req->tc.sdata[req->tc.size - 4], &v, 4);
                        outlen = n;
                }
                /* The size field of the message must include the length of the
                 * header and the length of the data.  We didn't actually know
                 * the length of the data until this point so add it in now.
                 */
-               sz = cpu_to_le32(req->tc->size + outlen);
-               memcpy(&req->tc->sdata[0], &sz, sizeof(sz));
+               sz = cpu_to_le32(req->tc.size + outlen);
+               memcpy(&req->tc.sdata[0], &sz, sizeof(sz));
        } else if (uidata) {
                int n = p9_get_mapped_pages(chan, &in_pages, uidata,
                                            inlen, &offs, &need_drop);
-               if (n < 0)
-                       return n;
+               if (n < 0) {
+                       err = n;
+                       goto err_out;
+               }
                in_nr_pages = DIV_ROUND_UP(n + offs, PAGE_SIZE);
                if (n != inlen) {
                        __le32 v = cpu_to_le32(n);
-                       memcpy(&req->tc->sdata[req->tc->size - 4], &v, 4);
+                       memcpy(&req->tc.sdata[req->tc.size - 4], &v, 4);
                        inlen = n;
                }
        }
@@ -445,7 +457,7 @@ p9_virtio_zc_request(struct p9_client *client, struct 
p9_req_t *req,
 
        /* out data */
        out = pack_sg_list(chan->sg, 0,
-                          VIRTQUEUE_NUM, req->tc->sdata, req->tc->size);
+                          VIRTQUEUE_NUM, req->tc.sdata, req->tc.size);
 
        if (out)
                sgs[out_sgs++] = chan->sg;
@@ -464,7 +476,7 @@ p9_virtio_zc_request(struct p9_client *client, struct 
p9_req_t *req,
         * alloced memory and payload onto the user buffer.
         */
        in = pack_sg_list(chan->sg, out,
-                         VIRTQUEUE_NUM, req->rc->sdata, in_hdr_len);
+                         VIRTQUEUE_NUM, req->rc.sdata, in_hdr_len);
        if (in)
                sgs[out_sgs + in_sgs++] = chan->sg + out;
 
@@ -498,6 +510,7 @@ p9_virtio_zc_request(struct p9_client *client, struct 
p9_req_t *req,
        }
        virtqueue_kick(chan->vq);
        spin_unlock_irqrestore(&chan->lock, flags);
+       kicked = 1;
        p9_debug(P9_DEBUG_TRANS, "virtio request kicked\n");
        err = wait_event_killable(req->wq, req->status >= REQ_STATUS_RCVD);
        /*
@@ -518,6 +531,10 @@ p9_virtio_zc_request(struct p9_client *client, struct 
p9_req_t *req,
        }
        kvfree(in_pages);
        kvfree(out_pages);
+       if (!kicked) {
+               /* reply won't come */
+               p9_req_put(req);
+       }
        return err;
 }
 
@@ -750,6 +767,7 @@ static struct p9_trans_module p9_virtio_trans = {
        .request = p9_virtio_request,
        .zc_request = p9_virtio_zc_request,
        .cancel = p9_virtio_cancel,
+       .cancelled = p9_virtio_cancelled,
        /*
         * We leave one entry for input and one entry for response
         * headers. We also skip one more entry to accomodate, address
diff --git a/net/9p/trans_xen.c b/net/9p/trans_xen.c
index c2d54ac76bfd..e2fbf3677b9b 100644
--- a/net/9p/trans_xen.c
+++ b/net/9p/trans_xen.c
@@ -141,7 +141,7 @@ static int p9_xen_request(struct p9_client *client, struct 
p9_req_t *p9_req)
        struct xen_9pfs_front_priv *priv = NULL;
        RING_IDX cons, prod, masked_cons, masked_prod;
        unsigned long flags;
-       u32 size = p9_req->tc->size;
+       u32 size = p9_req->tc.size;
        struct xen_9pfs_dataring *ring;
        int num;
 
@@ -154,7 +154,7 @@ static int p9_xen_request(struct p9_client *client, struct 
p9_req_t *p9_req)
        if (!priv || priv->client != client)
                return -EINVAL;
 
-       num = p9_req->tc->tag % priv->num_rings;
+       num = p9_req->tc.tag % priv->num_rings;
        ring = &priv->rings[num];
 
 again:
@@ -176,7 +176,7 @@ static int p9_xen_request(struct p9_client *client, struct 
p9_req_t *p9_req)
        masked_prod = xen_9pfs_mask(prod, XEN_9PFS_RING_SIZE);
        masked_cons = xen_9pfs_mask(cons, XEN_9PFS_RING_SIZE);
 
-       xen_9pfs_write_packet(ring->data.out, p9_req->tc->sdata, size,
+       xen_9pfs_write_packet(ring->data.out, p9_req->tc.sdata, size,
                              &masked_prod, masked_cons, XEN_9PFS_RING_SIZE);
 
        p9_req->status = REQ_STATUS_SENT;
@@ -185,6 +185,7 @@ static int p9_xen_request(struct p9_client *client, struct 
p9_req_t *p9_req)
        ring->intf->out_prod = prod;
        spin_unlock_irqrestore(&ring->lock, flags);
        notify_remote_via_irq(ring->irq);
+       p9_req_put(p9_req);
 
        return 0;
 }
@@ -229,12 +230,12 @@ static void p9_xen_response(struct work_struct *work)
                        continue;
                }
 
-               memcpy(req->rc, &h, sizeof(h));
-               req->rc->offset = 0;
+               memcpy(&req->rc, &h, sizeof(h));
+               req->rc.offset = 0;
 
                masked_cons = xen_9pfs_mask(cons, XEN_9PFS_RING_SIZE);
                /* Then, read the whole packet (including the header) */
-               xen_9pfs_read_packet(req->rc->sdata, ring->data.in, h.size,
+               xen_9pfs_read_packet(req->rc.sdata, ring->data.in, h.size,
                                     masked_prod, &masked_cons,
                                     XEN_9PFS_RING_SIZE);
 
@@ -391,8 +392,8 @@ static int xen_9pfs_front_probe(struct xenbus_device *dev,
        unsigned int max_rings, max_ring_order, len = 0;
 
        versions = xenbus_read(XBT_NIL, dev->otherend, "versions", &len);
-       if (!len)
-               return -EINVAL;
+       if (IS_ERR(versions))
+               return PTR_ERR(versions);
        if (strcmp(versions, "1")) {
                kfree(versions);
                return -EINVAL;
diff --git a/net/core/filter.c b/net/core/filter.c
index eb81e9db4093..34ec9324737b 100644
--- a/net/core/filter.c
+++ b/net/core/filter.c
@@ -5558,6 +5558,7 @@ static bool sock_addr_is_valid_access(int off, int size,
                case BPF_CGROUP_INET4_BIND:
                case BPF_CGROUP_INET4_CONNECT:
                case BPF_CGROUP_UDP4_SENDMSG:
+               case BPF_CGROUP_UDP4_RECVMSG:
                        break;
                default:
                        return false;
@@ -5568,6 +5569,7 @@ static bool sock_addr_is_valid_access(int off, int size,
                case BPF_CGROUP_INET6_BIND:
                case BPF_CGROUP_INET6_CONNECT:
                case BPF_CGROUP_UDP6_SENDMSG:
+               case BPF_CGROUP_UDP6_RECVMSG:
                        break;
                default:
                        return false;
diff --git a/net/core/sock.c b/net/core/sock.c
index c9668dcb5eb9..9c32e8eb64da 100644
--- a/net/core/sock.c
+++ b/net/core/sock.c
@@ -1348,9 +1348,6 @@ int sock_getsockopt(struct socket *sock, int level, int 
optname,
        {
                u32 meminfo[SK_MEMINFO_VARS];
 
-               if (get_user(len, optlen))
-                       return -EFAULT;
-
                sk_get_meminfo(sk, meminfo);
 
                len = min_t(unsigned int, len, sizeof(meminfo));
diff --git a/net/ipv4/raw.c b/net/ipv4/raw.c
index 711a5c75bd4b..21800979ed62 100644
--- a/net/ipv4/raw.c
+++ b/net/ipv4/raw.c
@@ -202,7 +202,7 @@ static int raw_v4_input(struct sk_buff *skb, const struct 
iphdr *iph, int hash)
                }
                sk = __raw_v4_lookup(net, sk_next(sk), iph->protocol,
                                     iph->saddr, iph->daddr,
-                                    skb->dev->ifindex, sdif);
+                                    dif, sdif);
        }
 out:
        read_unlock(&raw_v4_hashinfo.lock);
diff --git a/net/ipv4/udp.c b/net/ipv4/udp.c
index e45a5e19e509..6ab68b06fa39 100644
--- a/net/ipv4/udp.c
+++ b/net/ipv4/udp.c
@@ -542,7 +542,11 @@ static inline struct sock *__udp4_lib_lookup_skb(struct 
sk_buff *skb,
 struct sock *udp4_lib_lookup_skb(struct sk_buff *skb,
                                 __be16 sport, __be16 dport)
 {
-       return __udp4_lib_lookup_skb(skb, sport, dport, &udp_table);
+       const struct iphdr *iph = ip_hdr(skb);
+
+       return __udp4_lib_lookup(dev_net(skb->dev), iph->saddr, sport,
+                                iph->daddr, dport, inet_iif(skb),
+                                inet_sdif(skb), &udp_table, NULL);
 }
 EXPORT_SYMBOL_GPL(udp4_lib_lookup_skb);
 
@@ -1720,6 +1724,10 @@ int udp_recvmsg(struct sock *sk, struct msghdr *msg, 
size_t len, int noblock,
                sin->sin_addr.s_addr = ip_hdr(skb)->saddr;
                memset(sin->sin_zero, 0, sizeof(sin->sin_zero));
                *addr_len = sizeof(*sin);
+
+               if (cgroup_bpf_enabled)
+                       BPF_CGROUP_RUN_PROG_UDP4_RECVMSG_LOCK(sk,
+                                                       (struct sockaddr *)sin);
        }
        if (inet->cmsg_flags)
                ip_cmsg_recv_offset(msg, sk, skb, sizeof(struct udphdr), off);
diff --git a/net/ipv6/udp.c b/net/ipv6/udp.c
index cab8b2b647f9..164f1d01273c 100644
--- a/net/ipv6/udp.c
+++ b/net/ipv6/udp.c
@@ -282,7 +282,7 @@ struct sock *udp6_lib_lookup_skb(struct sk_buff *skb,
 
        return __udp6_lib_lookup(dev_net(skb->dev), &iph->saddr, sport,
                                 &iph->daddr, dport, inet6_iif(skb),
-                                inet6_sdif(skb), &udp_table, skb);
+                                inet6_sdif(skb), &udp_table, NULL);
 }
 EXPORT_SYMBOL_GPL(udp6_lib_lookup_skb);
 
@@ -419,6 +419,10 @@ int udpv6_recvmsg(struct sock *sk, struct msghdr *msg, 
size_t len,
                                                    inet6_iif(skb));
                }
                *addr_len = sizeof(*sin6);
+
+               if (cgroup_bpf_enabled)
+                       BPF_CGROUP_RUN_PROG_UDP6_RECVMSG_LOCK(sk,
+                                               (struct sockaddr *)sin6);
        }
 
        if (np->rxopt.all)
@@ -478,7 +482,7 @@ void __udp6_lib_err(struct sk_buff *skb, struct 
inet6_skb_parm *opt,
        struct net *net = dev_net(skb->dev);
 
        sk = __udp6_lib_lookup(net, daddr, uh->dest, saddr, uh->source,
-                              inet6_iif(skb), 0, udptable, skb);
+                              inet6_iif(skb), 0, udptable, NULL);
        if (!sk) {
                __ICMP6_INC_STATS(net, __in6_dev_get(skb->dev),
                                  ICMP6_MIB_INERRORS);
diff --git a/net/packet/af_packet.c b/net/packet/af_packet.c
index d98fcf926166..93b5a4200585 100644
--- a/net/packet/af_packet.c
+++ b/net/packet/af_packet.c
@@ -2399,6 +2399,9 @@ static void tpacket_destruct_skb(struct sk_buff *skb)
 
                ts = __packet_set_timestamp(po, ph, skb);
                __packet_set_status(po, ph, TP_STATUS_AVAILABLE | ts);
+
+               if (!packet_read_pending(&po->tx_ring))
+                       complete(&po->skb_completion);
        }
 
        sock_wfree(skb);
@@ -2594,7 +2597,7 @@ static int tpacket_parse_header(struct packet_sock *po, 
void *frame,
 
 static int tpacket_snd(struct packet_sock *po, struct msghdr *msg)
 {
-       struct sk_buff *skb;
+       struct sk_buff *skb = NULL;
        struct net_device *dev;
        struct virtio_net_hdr *vnet_hdr = NULL;
        struct sockcm_cookie sockc;
@@ -2609,6 +2612,7 @@ static int tpacket_snd(struct packet_sock *po, struct 
msghdr *msg)
        int len_sum = 0;
        int status = TP_STATUS_AVAILABLE;
        int hlen, tlen, copylen = 0;
+       long timeo = 0;
 
        mutex_lock(&po->pg_vec_lock);
 
@@ -2655,12 +2659,21 @@ static int tpacket_snd(struct packet_sock *po, struct 
msghdr *msg)
        if ((size_max > dev->mtu + reserve + VLAN_HLEN) && !po->has_vnet_hdr)
                size_max = dev->mtu + reserve + VLAN_HLEN;
 
+       reinit_completion(&po->skb_completion);
+
        do {
                ph = packet_current_frame(po, &po->tx_ring,
                                          TP_STATUS_SEND_REQUEST);
                if (unlikely(ph == NULL)) {
-                       if (need_wait && need_resched())
-                               schedule();
+                       if (need_wait && skb) {
+                               timeo = sock_sndtimeo(&po->sk, msg->msg_flags & 
MSG_DONTWAIT);
+                               timeo = 
wait_for_completion_interruptible_timeout(&po->skb_completion, timeo);
+                               if (timeo <= 0) {
+                                       err = !timeo ? -ETIMEDOUT : 
-ERESTARTSYS;
+                                       goto out_put;
+                               }
+                       }
+                       /* check for additional frames */
                        continue;
                }
 
@@ -3216,6 +3229,7 @@ static int packet_create(struct net *net, struct socket 
*sock, int protocol,
        sock_init_data(sock, sk);
 
        po = pkt_sk(sk);
+       init_completion(&po->skb_completion);
        sk->sk_family = PF_PACKET;
        po->num = proto;
        po->xmit = dev_queue_xmit;
@@ -4302,7 +4316,7 @@ static int packet_set_ring(struct sock *sk, union 
tpacket_req_u *req_u,
                                    req3->tp_sizeof_priv ||
                                    req3->tp_feature_req_word) {
                                        err = -EINVAL;
-                                       goto out;
+                                       goto out_free_pg_vec;
                                }
                        }
                        break;
@@ -4366,6 +4380,7 @@ static int packet_set_ring(struct sock *sk, union 
tpacket_req_u *req_u,
                        prb_shutdown_retire_blk_timer(po, rb_queue);
        }
 
+out_free_pg_vec:
        if (pg_vec)
                free_pg_vec(pg_vec, order, req->tp_block_nr);
 out:
diff --git a/net/packet/internal.h b/net/packet/internal.h
index 3bb7c5fb3bff..c70a2794456f 100644
--- a/net/packet/internal.h
+++ b/net/packet/internal.h
@@ -128,6 +128,7 @@ struct packet_sock {
        unsigned int            tp_hdrlen;
        unsigned int            tp_reserve;
        unsigned int            tp_tstamp;
+       struct completion       skb_completion;
        struct net_device __rcu *cached_dev;
        int                     (*xmit)(struct sk_buff *skb);
        struct packet_type      prot_hook ____cacheline_aligned_in_smp;
diff --git a/net/sctp/endpointola.c b/net/sctp/endpointola.c
index 40c7eb941bc9..c99114eaf42e 100644
--- a/net/sctp/endpointola.c
+++ b/net/sctp/endpointola.c
@@ -126,10 +126,6 @@ static struct sctp_endpoint *sctp_endpoint_init(struct 
sctp_endpoint *ep,
        /* Initialize the bind addr area */
        sctp_bind_addr_init(&ep->base.bind_addr, 0);
 
-       /* Remember who we are attached to.  */
-       ep->base.sk = sk;
-       sock_hold(ep->base.sk);
-
        /* Create the lists of associations.  */
        INIT_LIST_HEAD(&ep->asocs);
 
@@ -167,6 +163,10 @@ static struct sctp_endpoint *sctp_endpoint_init(struct 
sctp_endpoint *ep,
        ep->prsctp_enable = net->sctp.prsctp_enable;
        ep->reconf_enable = net->sctp.reconf_enable;
 
+       /* Remember who we are attached to.  */
+       ep->base.sk = sk;
+       sock_hold(ep->base.sk);
+
        return ep;
 
 nomem_hmacs:
diff --git a/net/sunrpc/clnt.c b/net/sunrpc/clnt.c
index 1fc812ba9871..7e4553dbc3c7 100644
--- a/net/sunrpc/clnt.c
+++ b/net/sunrpc/clnt.c
@@ -1558,7 +1558,6 @@ call_reserveresult(struct rpc_task *task)
        task->tk_status = 0;
        if (status >= 0) {
                if (task->tk_rqstp) {
-                       xprt_request_init(task);
                        task->tk_action = call_refresh;
                        return;
                }
diff --git a/net/sunrpc/xprt.c b/net/sunrpc/xprt.c
index d066aae3cb6d..3581168e6b99 100644
--- a/net/sunrpc/xprt.c
+++ b/net/sunrpc/xprt.c
@@ -1257,6 +1257,55 @@ void xprt_free(struct rpc_xprt *xprt)
 }
 EXPORT_SYMBOL_GPL(xprt_free);
 
+static __be32
+xprt_alloc_xid(struct rpc_xprt *xprt)
+{
+       __be32 xid;
+
+       spin_lock(&xprt->reserve_lock);
+       xid = (__force __be32)xprt->xid++;
+       spin_unlock(&xprt->reserve_lock);
+       return xid;
+}
+
+static void
+xprt_init_xid(struct rpc_xprt *xprt)
+{
+       xprt->xid = prandom_u32();
+}
+
+static void
+xprt_request_init(struct rpc_task *task)
+{
+       struct rpc_xprt *xprt = task->tk_xprt;
+       struct rpc_rqst *req = task->tk_rqstp;
+
+       INIT_LIST_HEAD(&req->rq_list);
+       req->rq_timeout = task->tk_client->cl_timeout->to_initval;
+       req->rq_task    = task;
+       req->rq_xprt    = xprt;
+       req->rq_buffer  = NULL;
+       req->rq_xid     = xprt_alloc_xid(xprt);
+       req->rq_connect_cookie = xprt->connect_cookie - 1;
+       req->rq_bytes_sent = 0;
+       req->rq_snd_buf.len = 0;
+       req->rq_snd_buf.buflen = 0;
+       req->rq_rcv_buf.len = 0;
+       req->rq_rcv_buf.buflen = 0;
+       req->rq_release_snd_buf = NULL;
+       xprt_reset_majortimeo(req);
+       dprintk("RPC: %5u reserved req %p xid %08x\n", task->tk_pid,
+                       req, ntohl(req->rq_xid));
+}
+
+static void
+xprt_do_reserve(struct rpc_xprt *xprt, struct rpc_task *task)
+{
+       xprt->ops->alloc_slot(xprt, task);
+       if (task->tk_rqstp != NULL)
+               xprt_request_init(task);
+}
+
 /**
  * xprt_reserve - allocate an RPC request slot
  * @task: RPC task requesting a slot allocation
@@ -1276,7 +1325,7 @@ void xprt_reserve(struct rpc_task *task)
        task->tk_timeout = 0;
        task->tk_status = -EAGAIN;
        if (!xprt_throttle_congested(xprt, task))
-               xprt->ops->alloc_slot(xprt, task);
+               xprt_do_reserve(xprt, task);
 }
 
 /**
@@ -1298,45 +1347,7 @@ void xprt_retry_reserve(struct rpc_task *task)
 
        task->tk_timeout = 0;
        task->tk_status = -EAGAIN;
-       xprt->ops->alloc_slot(xprt, task);
-}
-
-static inline __be32 xprt_alloc_xid(struct rpc_xprt *xprt)
-{
-       __be32 xid;
-
-       spin_lock(&xprt->reserve_lock);
-       xid = (__force __be32)xprt->xid++;
-       spin_unlock(&xprt->reserve_lock);
-       return xid;
-}
-
-static inline void xprt_init_xid(struct rpc_xprt *xprt)
-{
-       xprt->xid = prandom_u32();
-}
-
-void xprt_request_init(struct rpc_task *task)
-{
-       struct rpc_xprt *xprt = task->tk_xprt;
-       struct rpc_rqst *req = task->tk_rqstp;
-
-       INIT_LIST_HEAD(&req->rq_list);
-       req->rq_timeout = task->tk_client->cl_timeout->to_initval;
-       req->rq_task    = task;
-       req->rq_xprt    = xprt;
-       req->rq_buffer  = NULL;
-       req->rq_xid     = xprt_alloc_xid(xprt);
-       req->rq_connect_cookie = xprt->connect_cookie - 1;
-       req->rq_bytes_sent = 0;
-       req->rq_snd_buf.len = 0;
-       req->rq_snd_buf.buflen = 0;
-       req->rq_rcv_buf.len = 0;
-       req->rq_rcv_buf.buflen = 0;
-       req->rq_release_snd_buf = NULL;
-       xprt_reset_majortimeo(req);
-       dprintk("RPC: %5u reserved req %p xid %08x\n", task->tk_pid,
-                       req, ntohl(req->rq_xid));
+       xprt_do_reserve(xprt, task);
 }
 
 /**
diff --git a/net/tipc/core.c b/net/tipc/core.c
index 3ecca3b88bf8..eb0f701f9bf1 100644
--- a/net/tipc/core.c
+++ b/net/tipc/core.c
@@ -132,7 +132,7 @@ static int __init tipc_init(void)
        if (err)
                goto out_sysctl;
 
-       err = register_pernet_subsys(&tipc_net_ops);
+       err = register_pernet_device(&tipc_net_ops);
        if (err)
                goto out_pernet;
 
@@ -140,7 +140,7 @@ static int __init tipc_init(void)
        if (err)
                goto out_socket;
 
-       err = register_pernet_subsys(&tipc_topsrv_net_ops);
+       err = register_pernet_device(&tipc_topsrv_net_ops);
        if (err)
                goto out_pernet_topsrv;
 
@@ -151,11 +151,11 @@ static int __init tipc_init(void)
        pr_info("Started in single node mode\n");
        return 0;
 out_bearer:
-       unregister_pernet_subsys(&tipc_topsrv_net_ops);
+       unregister_pernet_device(&tipc_topsrv_net_ops);
 out_pernet_topsrv:
        tipc_socket_stop();
 out_socket:
-       unregister_pernet_subsys(&tipc_net_ops);
+       unregister_pernet_device(&tipc_net_ops);
 out_pernet:
        tipc_unregister_sysctl();
 out_sysctl:
@@ -170,9 +170,9 @@ static int __init tipc_init(void)
 static void __exit tipc_exit(void)
 {
        tipc_bearer_cleanup();
-       unregister_pernet_subsys(&tipc_topsrv_net_ops);
+       unregister_pernet_device(&tipc_topsrv_net_ops);
        tipc_socket_stop();
-       unregister_pernet_subsys(&tipc_net_ops);
+       unregister_pernet_device(&tipc_net_ops);
        tipc_netlink_stop();
        tipc_netlink_compat_stop();
        tipc_unregister_sysctl();
diff --git a/net/tipc/netlink_compat.c b/net/tipc/netlink_compat.c
index 588d5aa14c41..85ebb675600c 100644
--- a/net/tipc/netlink_compat.c
+++ b/net/tipc/netlink_compat.c
@@ -445,7 +445,11 @@ static int tipc_nl_compat_bearer_disable(struct 
tipc_nl_compat_cmd_doit *cmd,
        if (!bearer)
                return -EMSGSIZE;
 
-       len = min_t(int, TLV_GET_DATA_LEN(msg->req), TIPC_MAX_BEARER_NAME);
+       len = TLV_GET_DATA_LEN(msg->req);
+       if (len <= 0)
+               return -EINVAL;
+
+       len = min_t(int, len, TIPC_MAX_BEARER_NAME);
        if (!string_is_valid(name, len))
                return -EINVAL;
 
@@ -537,7 +541,11 @@ static int tipc_nl_compat_link_stat_dump(struct 
tipc_nl_compat_msg *msg,
 
        name = (char *)TLV_DATA(msg->req);
 
-       len = min_t(int, TLV_GET_DATA_LEN(msg->req), TIPC_MAX_LINK_NAME);
+       len = TLV_GET_DATA_LEN(msg->req);
+       if (len <= 0)
+               return -EINVAL;
+
+       len = min_t(int, len, TIPC_MAX_BEARER_NAME);
        if (!string_is_valid(name, len))
                return -EINVAL;
 
@@ -815,7 +823,11 @@ static int tipc_nl_compat_link_reset_stats(struct 
tipc_nl_compat_cmd_doit *cmd,
        if (!link)
                return -EMSGSIZE;
 
-       len = min_t(int, TLV_GET_DATA_LEN(msg->req), TIPC_MAX_LINK_NAME);
+       len = TLV_GET_DATA_LEN(msg->req);
+       if (len <= 0)
+               return -EINVAL;
+
+       len = min_t(int, len, TIPC_MAX_BEARER_NAME);
        if (!string_is_valid(name, len))
                return -EINVAL;
 
diff --git a/net/tipc/udp_media.c b/net/tipc/udp_media.c
index da2d311476ab..382c84d9339d 100644
--- a/net/tipc/udp_media.c
+++ b/net/tipc/udp_media.c
@@ -176,7 +176,6 @@ static int tipc_udp_xmit(struct net *net, struct sk_buff 
*skb,
                        goto tx_error;
                }
 
-               skb->dev = rt->dst.dev;
                ttl = ip4_dst_hoplimit(&rt->dst);
                udp_tunnel_xmit_skb(rt, ub->ubsock->sk, skb, src->ipv4.s_addr,
                                    dst->ipv4.s_addr, 0, ttl, 0, src->port,
@@ -195,10 +194,9 @@ static int tipc_udp_xmit(struct net *net, struct sk_buff 
*skb,
                if (err)
                        goto tx_error;
                ttl = ip6_dst_hoplimit(ndst);
-               err = udp_tunnel6_xmit_skb(ndst, ub->ubsock->sk, skb,
-                                          ndst->dev, &src->ipv6,
-                                          &dst->ipv6, 0, ttl, 0, src->port,
-                                          dst->port, false);
+               err = udp_tunnel6_xmit_skb(ndst, ub->ubsock->sk, skb, NULL,
+                                          &src->ipv6, &dst->ipv6, 0, ttl, 0,
+                                          src->port, dst->port, false);
 #endif
        }
        return err;
diff --git a/tools/perf/builtin-help.c b/tools/perf/builtin-help.c
index 1c41b4eaf73c..3d29d0524a89 100644
--- a/tools/perf/builtin-help.c
+++ b/tools/perf/builtin-help.c
@@ -189,7 +189,7 @@ static void add_man_viewer(const char *name)
        while (*p)
                p = &((*p)->next);
        *p = zalloc(sizeof(**p) + len + 1);
-       strncpy((*p)->name, name, len);
+       strcpy((*p)->name, name);
 }
 
 static int supported_man_viewer(const char *name, size_t len)
diff --git a/tools/perf/ui/tui/helpline.c b/tools/perf/ui/tui/helpline.c
index 4ca799aadb4e..93d6b7240285 100644
--- a/tools/perf/ui/tui/helpline.c
+++ b/tools/perf/ui/tui/helpline.c
@@ -24,7 +24,7 @@ static void tui_helpline__push(const char *msg)
        SLsmg_set_color(0);
        SLsmg_write_nstring((char *)msg, SLtt_Screen_Cols);
        SLsmg_refresh();
-       strncpy(ui_helpline__current, msg, sz)[sz - 1] = '\0';
+       strlcpy(ui_helpline__current, msg, sz);
 }
 
 static int tui_helpline__show(const char *format, va_list ap)
diff --git a/tools/perf/util/header.c b/tools/perf/util/header.c
index bd9226bc5945..b9a82598e2ac 100644
--- a/tools/perf/util/header.c
+++ b/tools/perf/util/header.c
@@ -3562,7 +3562,7 @@ perf_event__synthesize_event_update_name(struct perf_tool 
*tool,
        if (ev == NULL)
                return -ENOMEM;
 
-       strncpy(ev->data, evsel->name, len);
+       strlcpy(ev->data, evsel->name, len + 1);
        err = process(tool, (union perf_event*) ev, NULL, NULL);
        free(ev);
        return err;
diff --git a/tools/testing/selftests/bpf/test_lpm_map.c 
b/tools/testing/selftests/bpf/test_lpm_map.c
index 02d7c871862a..006be3963977 100644
--- a/tools/testing/selftests/bpf/test_lpm_map.c
+++ b/tools/testing/selftests/bpf/test_lpm_map.c
@@ -573,13 +573,13 @@ static void test_lpm_get_next_key(void)
 
        /* add one more element (total two) */
        key_p->prefixlen = 24;
-       inet_pton(AF_INET, "192.168.0.0", key_p->data);
+       inet_pton(AF_INET, "192.168.128.0", key_p->data);
        assert(bpf_map_update_elem(map_fd, key_p, &value, 0) == 0);
 
        memset(key_p, 0, key_size);
        assert(bpf_map_get_next_key(map_fd, NULL, key_p) == 0);
        assert(key_p->prefixlen == 24 && key_p->data[0] == 192 &&
-              key_p->data[1] == 168 && key_p->data[2] == 0);
+              key_p->data[1] == 168 && key_p->data[2] == 128);
 
        memset(next_key_p, 0, key_size);
        assert(bpf_map_get_next_key(map_fd, key_p, next_key_p) == 0);
@@ -592,7 +592,7 @@ static void test_lpm_get_next_key(void)
 
        /* Add one more element (total three) */
        key_p->prefixlen = 24;
-       inet_pton(AF_INET, "192.168.128.0", key_p->data);
+       inet_pton(AF_INET, "192.168.0.0", key_p->data);
        assert(bpf_map_update_elem(map_fd, key_p, &value, 0) == 0);
 
        memset(key_p, 0, key_size);
@@ -643,6 +643,41 @@ static void test_lpm_get_next_key(void)
        assert(bpf_map_get_next_key(map_fd, key_p, next_key_p) == -1 &&
               errno == ENOENT);
 
+       /* Add one more element (total five) */
+       key_p->prefixlen = 28;
+       inet_pton(AF_INET, "192.168.1.128", key_p->data);
+       assert(bpf_map_update_elem(map_fd, key_p, &value, 0) == 0);
+
+       memset(key_p, 0, key_size);
+       assert(bpf_map_get_next_key(map_fd, NULL, key_p) == 0);
+       assert(key_p->prefixlen == 24 && key_p->data[0] == 192 &&
+              key_p->data[1] == 168 && key_p->data[2] == 0);
+
+       memset(next_key_p, 0, key_size);
+       assert(bpf_map_get_next_key(map_fd, key_p, next_key_p) == 0);
+       assert(next_key_p->prefixlen == 28 && next_key_p->data[0] == 192 &&
+              next_key_p->data[1] == 168 && next_key_p->data[2] == 1 &&
+              next_key_p->data[3] == 128);
+
+       memcpy(key_p, next_key_p, key_size);
+       assert(bpf_map_get_next_key(map_fd, key_p, next_key_p) == 0);
+       assert(next_key_p->prefixlen == 24 && next_key_p->data[0] == 192 &&
+              next_key_p->data[1] == 168 && next_key_p->data[2] == 1);
+
+       memcpy(key_p, next_key_p, key_size);
+       assert(bpf_map_get_next_key(map_fd, key_p, next_key_p) == 0);
+       assert(next_key_p->prefixlen == 24 && next_key_p->data[0] == 192 &&
+              next_key_p->data[1] == 168 && next_key_p->data[2] == 128);
+
+       memcpy(key_p, next_key_p, key_size);
+       assert(bpf_map_get_next_key(map_fd, key_p, next_key_p) == 0);
+       assert(next_key_p->prefixlen == 16 && next_key_p->data[0] == 192 &&
+              next_key_p->data[1] == 168);
+
+       memcpy(key_p, next_key_p, key_size);
+       assert(bpf_map_get_next_key(map_fd, key_p, next_key_p) == -1 &&
+              errno == ENOENT);
+
        /* no exact matching key should return the first one in post order */
        key_p->prefixlen = 22;
        inet_pton(AF_INET, "192.168.1.0", key_p->data);

Reply via email to