Re: [Devel] [PATCH] nfs: abort delegation in dying VE
On Wed, Nov 15, 2017 at 11:41:00AM -0800, Andrei Vagin wrote: > On Wed, Nov 15, 2017 at 07:55:02PM +0300, Kirill Tkhai wrote: > > On 15.11.2017 19:50, Stanislav Kinsburskiy wrote: > > > Don't queue delegation request, if ve init is exiting. > > > > > > https://jira.sw.ru/browse/PSBM-77061 > > > > > > Inspired-by: Kirill Tkhai> > > Signed-off-by: Stanislav Kinsburskiy > > > --- > > > fs/nfs/delegation.c | 16 +++- > > > 1 file changed, 15 insertions(+), 1 deletion(-) > > > > > > diff --git a/fs/nfs/delegation.c b/fs/nfs/delegation.c > > > index 66af497..2422754 100644 > > > --- a/fs/nfs/delegation.c > > > +++ b/fs/nfs/delegation.c > > > @@ -189,15 +189,29 @@ void nfs_inode_reclaim_delegation(struct inode > > > *inode, struct rpc_cred *cred, > > > nfs_inode_set_delegation(inode, cred, res); > > > } > > > > > > +static bool ve_abort_delegation(struct inode *inode) > > > +{ > > > + struct nfs_client *clp = NFS_SERVER(inode)->nfs_client; > > > + struct rpc_xprt *xprt; > > > + > > > + rcu_read_lock(); > > > + xprt = rcu_dereference(clp->cl_rpcclient->cl_xprt); > > > + rcu_read_unlock(); > > > + > > > + return xprt->xprt_net->owner_ve->ve_netns == NULL; > > > > Usually, memory pointed by a pointer, which was obtained via rcu, has to be > > used > > in rcu_read_* brackets: > > > > rcu_read_lock(); > > xprt = rcu_dereference(clp->cl_rpcclient->cl_xprt); > > ret = (xprt->xprt_net->owner_ve->ve_netns == NULL); > > rcu_read_unlock(); > > > > return ret; > > > > If there is no an exception, we have to do something like above. > > It is true when you want to dereference this pointer, otherwise I don't > see any reason to take rcu_read_lock(). Oops, here is exectly this case. I didn't read the proposed code. Sorry. > > > > > > +} > > > + > > > static int nfs_do_return_delegation(struct inode *inode, struct > > > nfs_delegation *delegation, int issync) > > > { > > > int res = 0; > > > > > > - if (!test_bit(NFS_DELEGATION_REVOKED, >flags)) > > > + if (!test_bit(NFS_DELEGATION_REVOKED, >flags) && > > > + !ve_abort_delegation(inode)) { > > > res = nfs4_proc_delegreturn(inode, > > > delegation->cred, > > > >stateid, > > > issync); > > > + } > > > nfs_free_delegation(delegation); > > > return res; > > > } > > > > > ___ > > Devel mailing list > > Devel@openvz.org > > https://lists.openvz.org/mailman/listinfo/devel > ___ > Devel mailing list > Devel@openvz.org > https://lists.openvz.org/mailman/listinfo/devel ___ Devel mailing list Devel@openvz.org https://lists.openvz.org/mailman/listinfo/devel
Re: [Devel] [PATCH] nfs: abort delegation in dying VE
On Wed, Nov 15, 2017 at 07:55:02PM +0300, Kirill Tkhai wrote: > On 15.11.2017 19:50, Stanislav Kinsburskiy wrote: > > Don't queue delegation request, if ve init is exiting. > > > > https://jira.sw.ru/browse/PSBM-77061 > > > > Inspired-by: Kirill Tkhai> > Signed-off-by: Stanislav Kinsburskiy > > --- > > fs/nfs/delegation.c | 16 +++- > > 1 file changed, 15 insertions(+), 1 deletion(-) > > > > diff --git a/fs/nfs/delegation.c b/fs/nfs/delegation.c > > index 66af497..2422754 100644 > > --- a/fs/nfs/delegation.c > > +++ b/fs/nfs/delegation.c > > @@ -189,15 +189,29 @@ void nfs_inode_reclaim_delegation(struct inode > > *inode, struct rpc_cred *cred, > > nfs_inode_set_delegation(inode, cred, res); > > } > > > > +static bool ve_abort_delegation(struct inode *inode) > > +{ > > + struct nfs_client *clp = NFS_SERVER(inode)->nfs_client; > > + struct rpc_xprt *xprt; > > + > > + rcu_read_lock(); > > + xprt = rcu_dereference(clp->cl_rpcclient->cl_xprt); > > + rcu_read_unlock(); > > + > > + return xprt->xprt_net->owner_ve->ve_netns == NULL; > > Usually, memory pointed by a pointer, which was obtained via rcu, has to be > used > in rcu_read_* brackets: > > rcu_read_lock(); > xprt = rcu_dereference(clp->cl_rpcclient->cl_xprt); > ret = (xprt->xprt_net->owner_ve->ve_netns == NULL); > rcu_read_unlock(); > > return ret; > > If there is no an exception, we have to do something like above. It is true when you want to dereference this pointer, otherwise I don't see any reason to take rcu_read_lock(). > > > +} > > + > > static int nfs_do_return_delegation(struct inode *inode, struct > > nfs_delegation *delegation, int issync) > > { > > int res = 0; > > > > - if (!test_bit(NFS_DELEGATION_REVOKED, >flags)) > > + if (!test_bit(NFS_DELEGATION_REVOKED, >flags) && > > + !ve_abort_delegation(inode)) { > > res = nfs4_proc_delegreturn(inode, > > delegation->cred, > > >stateid, > > issync); > > + } > > nfs_free_delegation(delegation); > > return res; > > } > > > ___ > Devel mailing list > Devel@openvz.org > https://lists.openvz.org/mailman/listinfo/devel ___ Devel mailing list Devel@openvz.org https://lists.openvz.org/mailman/listinfo/devel
Re: [Devel] [PATCH v2] nfs: abort delegation in dying VE
On 15.11.2017 20:00, Stanislav Kinsburskiy wrote: > Don't queue delegation request, if ve init is exiting. > > https://jira.sw.ru/browse/PSBM-77061 > > v2: check ve is dying under rcu_lock() > > Inspired-by: Kirill Tkhai> Signed-off-by: Stanislav Kinsburskiy Reviewed-by: Kirill Tkhai > --- > fs/nfs/delegation.c | 18 +- > 1 file changed, 17 insertions(+), 1 deletion(-) > > diff --git a/fs/nfs/delegation.c b/fs/nfs/delegation.c > index 66af497..1d1500c 100644 > --- a/fs/nfs/delegation.c > +++ b/fs/nfs/delegation.c > @@ -189,15 +189,31 @@ void nfs_inode_reclaim_delegation(struct inode *inode, > struct rpc_cred *cred, > nfs_inode_set_delegation(inode, cred, res); > } > > +static bool ve_abort_delegation(struct inode *inode) > +{ > + struct nfs_client *clp = NFS_SERVER(inode)->nfs_client; > + struct rpc_xprt *xprt; > + bool abort; > + > + rcu_read_lock(); > + xprt = rcu_dereference(clp->cl_rpcclient->cl_xprt); > + abort = xprt->xprt_net->owner_ve->ve_netns == NULL; > + rcu_read_unlock(); > + > + return abort; > +} > + > static int nfs_do_return_delegation(struct inode *inode, struct > nfs_delegation *delegation, int issync) > { > int res = 0; > > - if (!test_bit(NFS_DELEGATION_REVOKED, >flags)) > + if (!test_bit(NFS_DELEGATION_REVOKED, >flags) && > + !ve_abort_delegation(inode)) { > res = nfs4_proc_delegreturn(inode, > delegation->cred, > >stateid, > issync); > + } > nfs_free_delegation(delegation); > return res; > } > ___ Devel mailing list Devel@openvz.org https://lists.openvz.org/mailman/listinfo/devel
[Devel] [PATCH v2] nfs: abort delegation in dying VE
Don't queue delegation request, if ve init is exiting. https://jira.sw.ru/browse/PSBM-77061 v2: check ve is dying under rcu_lock() Inspired-by: Kirill TkhaiSigned-off-by: Stanislav Kinsburskiy --- fs/nfs/delegation.c | 18 +- 1 file changed, 17 insertions(+), 1 deletion(-) diff --git a/fs/nfs/delegation.c b/fs/nfs/delegation.c index 66af497..1d1500c 100644 --- a/fs/nfs/delegation.c +++ b/fs/nfs/delegation.c @@ -189,15 +189,31 @@ void nfs_inode_reclaim_delegation(struct inode *inode, struct rpc_cred *cred, nfs_inode_set_delegation(inode, cred, res); } +static bool ve_abort_delegation(struct inode *inode) +{ + struct nfs_client *clp = NFS_SERVER(inode)->nfs_client; + struct rpc_xprt *xprt; + bool abort; + + rcu_read_lock(); + xprt = rcu_dereference(clp->cl_rpcclient->cl_xprt); + abort = xprt->xprt_net->owner_ve->ve_netns == NULL; + rcu_read_unlock(); + + return abort; +} + static int nfs_do_return_delegation(struct inode *inode, struct nfs_delegation *delegation, int issync) { int res = 0; - if (!test_bit(NFS_DELEGATION_REVOKED, >flags)) + if (!test_bit(NFS_DELEGATION_REVOKED, >flags) && + !ve_abort_delegation(inode)) { res = nfs4_proc_delegreturn(inode, delegation->cred, >stateid, issync); + } nfs_free_delegation(delegation); return res; } ___ Devel mailing list Devel@openvz.org https://lists.openvz.org/mailman/listinfo/devel
Re: [Devel] [PATCH] nfs: abort delegation in dying VE
On 15.11.2017 19:50, Stanislav Kinsburskiy wrote: > Don't queue delegation request, if ve init is exiting. > > https://jira.sw.ru/browse/PSBM-77061 > > Inspired-by: Kirill Tkhai> Signed-off-by: Stanislav Kinsburskiy > --- > fs/nfs/delegation.c | 16 +++- > 1 file changed, 15 insertions(+), 1 deletion(-) > > diff --git a/fs/nfs/delegation.c b/fs/nfs/delegation.c > index 66af497..2422754 100644 > --- a/fs/nfs/delegation.c > +++ b/fs/nfs/delegation.c > @@ -189,15 +189,29 @@ void nfs_inode_reclaim_delegation(struct inode *inode, > struct rpc_cred *cred, > nfs_inode_set_delegation(inode, cred, res); > } > > +static bool ve_abort_delegation(struct inode *inode) > +{ > + struct nfs_client *clp = NFS_SERVER(inode)->nfs_client; > + struct rpc_xprt *xprt; > + > + rcu_read_lock(); > + xprt = rcu_dereference(clp->cl_rpcclient->cl_xprt); > + rcu_read_unlock(); > + > + return xprt->xprt_net->owner_ve->ve_netns == NULL; Usually, memory pointed by a pointer, which was obtained via rcu, has to be used in rcu_read_* brackets: rcu_read_lock(); xprt = rcu_dereference(clp->cl_rpcclient->cl_xprt); ret = (xprt->xprt_net->owner_ve->ve_netns == NULL); rcu_read_unlock(); return ret; If there is no an exception, we have to do something like above. > +} > + > static int nfs_do_return_delegation(struct inode *inode, struct > nfs_delegation *delegation, int issync) > { > int res = 0; > > - if (!test_bit(NFS_DELEGATION_REVOKED, >flags)) > + if (!test_bit(NFS_DELEGATION_REVOKED, >flags) && > + !ve_abort_delegation(inode)) { > res = nfs4_proc_delegreturn(inode, > delegation->cred, > >stateid, > issync); > + } > nfs_free_delegation(delegation); > return res; > } > ___ Devel mailing list Devel@openvz.org https://lists.openvz.org/mailman/listinfo/devel
[Devel] [PATCH] nfs: abort delegation in dying VE
Don't queue delegation request, if ve init is exiting. https://jira.sw.ru/browse/PSBM-77061 Inspired-by: Kirill TkhaiSigned-off-by: Stanislav Kinsburskiy --- fs/nfs/delegation.c | 16 +++- 1 file changed, 15 insertions(+), 1 deletion(-) diff --git a/fs/nfs/delegation.c b/fs/nfs/delegation.c index 66af497..2422754 100644 --- a/fs/nfs/delegation.c +++ b/fs/nfs/delegation.c @@ -189,15 +189,29 @@ void nfs_inode_reclaim_delegation(struct inode *inode, struct rpc_cred *cred, nfs_inode_set_delegation(inode, cred, res); } +static bool ve_abort_delegation(struct inode *inode) +{ + struct nfs_client *clp = NFS_SERVER(inode)->nfs_client; + struct rpc_xprt *xprt; + + rcu_read_lock(); + xprt = rcu_dereference(clp->cl_rpcclient->cl_xprt); + rcu_read_unlock(); + + return xprt->xprt_net->owner_ve->ve_netns == NULL; +} + static int nfs_do_return_delegation(struct inode *inode, struct nfs_delegation *delegation, int issync) { int res = 0; - if (!test_bit(NFS_DELEGATION_REVOKED, >flags)) + if (!test_bit(NFS_DELEGATION_REVOKED, >flags) && + !ve_abort_delegation(inode)) { res = nfs4_proc_delegreturn(inode, delegation->cred, >stateid, issync); + } nfs_free_delegation(delegation); return res; } ___ Devel mailing list Devel@openvz.org https://lists.openvz.org/mailman/listinfo/devel
[Devel] [PATCH RHEL7 COMMIT] ms/KVM: nVMX: do not fill vm_exit_intr_error_code in prepare_vmcs12
The commit is pushed to "branch-rh7-3.10.0-693.1.1.vz7.37.x-ovz" and will appear at https://src.openvz.org/scm/ovz/vzkernel.git after rh7-3.10.0-693.1.1.vz7.37.28 --> commit 66f0cd609d8299536ba09918a05dc43feafa2b46 Author: Paolo BonziniDate: Wed Nov 15 16:16:27 2017 +0300 ms/KVM: nVMX: do not fill vm_exit_intr_error_code in prepare_vmcs12 Do this in the caller of nested_vmx_vmexit instead. nested_vmx_check_exception was doing a vmwrite to the vmcs02's VM_EXIT_INTR_ERROR_CODE field, so that prepare_vmcs12 would move the field to vmcs12->vm_exit_intr_error_code. However that isn't possible on pre-Haswell machines. Moving the vmcs12 write to the callers fixes it. Reported-by: Jim Mattson Signed-off-by: Paolo Bonzini [Changed nested_vmx_reflect_vmexit() return type to (int)1 from (bool)1, thanks to fengguang...@intel.com] Signed-off-by: Radim KrÄmáŠ(cherry picked from commit 7313c698050387a11c21afb0c6b4c61f21f7c042) [rkagan: This is a missing part of the backport of async pagefault machinery from mainstream; hopefully it fixes #PSBM-77171] https://jira.sw.ru/browse/PSBM-77171 Signed-off-by: Roman Kagan --- arch/x86/kvm/vmx.c | 52 ++-- 1 file changed, 34 insertions(+), 18 deletions(-) diff --git a/arch/x86/kvm/vmx.c b/arch/x86/kvm/vmx.c index 1d5d577..4dd164e 100644 --- a/arch/x86/kvm/vmx.c +++ b/arch/x86/kvm/vmx.c @@ -2291,7 +2291,7 @@ static int nested_vmx_check_exception(struct kvm_vcpu *vcpu) return 0; if (vcpu->arch.exception.nested_apf) { - vmcs_write32(VM_EXIT_INTR_ERROR_CODE, vcpu->arch.exception.error_code); + vmcs12->vm_exit_intr_error_code = vcpu->arch.exception.error_code; nested_vmx_vmexit(vcpu, EXIT_REASON_EXCEPTION_NMI, PF_VECTOR | INTR_TYPE_HARD_EXCEPTION | INTR_INFO_DELIVER_CODE_MASK | INTR_INFO_VALID_MASK, @@ -2299,6 +2299,7 @@ static int nested_vmx_check_exception(struct kvm_vcpu *vcpu) return 1; } + vmcs12->vm_exit_intr_error_code = vmcs_read32(VM_EXIT_INTR_ERROR_CODE); nested_vmx_vmexit(vcpu, EXIT_REASON_EXCEPTION_NMI, vmcs_read32(VM_EXIT_INTR_INFO), vmcs_readl(EXIT_QUALIFICATION)); @@ -2507,7 +2508,7 @@ static void nested_vmx_setup_ctls_msrs(struct vcpu_vmx *vmx) * reason is that if one of these bits is necessary, it will appear * in vmcs01 and prepare_vmcs02, when it bitwise-or's the control * fields of vmcs01 and vmcs02, will turn these bits off - and -* nested_vmx_exit_handled() will not pass related exits to L1. +* nested_vmx_exit_reflected() will not pass related exits to L1. * These rules have exceptions below. */ @@ -7709,12 +7710,11 @@ static bool nested_vmx_exit_handled_cr(struct kvm_vcpu *vcpu, * should handle it ourselves in L0 (and then continue L2). Only call this * when in is_guest_mode (L2). */ -static bool nested_vmx_exit_handled(struct kvm_vcpu *vcpu) +static bool nested_vmx_exit_reflected(struct kvm_vcpu *vcpu, u32 exit_reason) { u32 intr_info = vmcs_read32(VM_EXIT_INTR_INFO); struct vcpu_vmx *vmx = to_vmx(vcpu); struct vmcs12 *vmcs12 = get_vmcs12(vcpu); - u32 exit_reason = vmx->exit_reason; trace_kvm_nested_vmexit(kvm_rip_read(vcpu), exit_reason, vmcs_readl(EXIT_QUALIFICATION), @@ -7845,6 +7845,29 @@ static bool nested_vmx_exit_handled(struct kvm_vcpu *vcpu) } } +static int nested_vmx_reflect_vmexit(struct kvm_vcpu *vcpu, u32 exit_reason) +{ + u32 exit_intr_info = vmcs_read32(VM_EXIT_INTR_INFO); + + /* +* At this point, the exit interruption info in exit_intr_info +* is only valid for EXCEPTION_NMI exits. For EXTERNAL_INTERRUPT +* we need to query the in-kernel LAPIC. +*/ + WARN_ON(exit_reason == EXIT_REASON_EXTERNAL_INTERRUPT); + if ((exit_intr_info & +(INTR_INFO_VALID_MASK | INTR_INFO_DELIVER_CODE_MASK)) == + (INTR_INFO_VALID_MASK | INTR_INFO_DELIVER_CODE_MASK)) { + struct vmcs12 *vmcs12 = get_vmcs12(vcpu); + vmcs12->vm_exit_intr_error_code = + vmcs_read32(VM_EXIT_INTR_ERROR_CODE); + } + + nested_vmx_vmexit(vcpu, exit_reason, exit_intr_info, + vmcs_readl(EXIT_QUALIFICATION)); + return 1; +} + static void vmx_get_exit_info(struct kvm_vcpu *vcpu, u64 *info1, u64 *info2) { *info1 = vmcs_readl(EXIT_QUALIFICATION); @@ -8089,12 +8112,8 @@ static int vmx_handle_exit(struct kvm_vcpu *vcpu) if (vmx->emulation_required) return
[Devel] [PATCH RHEL7 COMMIT] ms/net: sysctl: fix a kmemleak warning
The commit is pushed to "branch-rh7-3.10.0-693.1.1.vz7.37.x-ovz" and will appear at https://src.openvz.org/scm/ovz/vzkernel.git after rh7-3.10.0-693.1.1.vz7.37.28 --> commit 547f849066c5ea7ca6485aef69bcce3fbe193b22 Author: Li RongQingDate: Wed Nov 15 16:14:56 2017 +0300 ms/net: sysctl: fix a kmemleak warning the returned buffer of register_sysctl() is stored into net_header variable, but net_header is not used after, and compiler maybe optimise the variable out, and lead kmemleak reported the below warning comm "swapper/0", pid 1, jiffies 4294937448 (age 267.270s) hex dump (first 32 bytes): 90 38 8b 01 c0 ff ff ff 00 00 00 00 01 00 00 00 .8.. 01 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 backtrace: [] create_object+0x10c/0x2a0 [] kmemleak_alloc+0x54/0xa0 [] __kmalloc+0x1f8/0x4f8 [] __register_sysctl_table+0x64/0x5a0 [] register_sysctl+0x30/0x40 [] net_sysctl_init+0x20/0x58 [] sock_init+0x10/0xb0 [] do_one_initcall+0x90/0x1b8 [] kernel_init_freeable+0x218/0x2f0 [] kernel_init+0x1c/0xe8 [] ret_from_fork+0xc/0x50 [] 0x <> Before fix, the objdump result on ARM64: : 0: a9be7bfdstp x29, x30, [sp,#-32]! 4: 9001adrpx1, 0 8: 9000adrpx0, 0 c: 910003fdmov x29, sp 10: 9121add x1, x1, #0x0 14: 9100add x0, x0, #0x0 18: a90153f3stp x19, x20, [sp,#16] 1c: 12800174mov w20, #0xfff4// #-12 20: 9400bl 0 24: b4000120cbz x0, 48 28: 9013adrpx19, 0 2c: 91000273add x19, x19, #0x0 30: 9101a260add x0, x19, #0x68 34: 9400bl 0 38: 2a0003f4mov w20, w0 3c: 3560cbnzw0, 48 40: aa1303e0mov x0, x19 44: 9400bl 0 48: 2a1403e0mov w0, w20 4c: a94153f3ldp x19, x20, [sp,#16] 50: a8c27bfdldp x29, x30, [sp],#32 54: d65f03c0ret After: : 0: a9bd7bfdstp x29, x30, [sp,#-48]! 4: 9000adrpx0, 0 8: 910003fdmov x29, sp c: a90153f3stp x19, x20, [sp,#16] 10: 9013adrpx19, 0 14: 9100add x0, x0, #0x0 18: 91000273add x19, x19, #0x0 1c: f90013f5str x21, [sp,#32] 20: aa1303e1mov x1, x19 24: 12800175mov w21, #0xfff4// #-12 28: 9400bl 0 2c: f9002260str x0, [x19,#64] 30: b40001a0cbz x0, 64 34: 9014adrpx20, 0 38: 91000294add x20, x20, #0x0 3c: 9101a280add x0, x20, #0x68 40: 9400bl 0 44: 2a0003f5mov w21, w0 48: 3580cbnzw0, 58 4c: aa1403e0mov x0, x20 50: 9400bl 0 54: 1404b 64 58: f9402260ldr x0, [x19,#64] 5c: 9400bl 0 60: f900227fstr xzr, [x19,#64] 64: 2a1503e0mov w0, w21 68: f94013f5ldr x21, [sp,#32] 6c: a94153f3ldp x19, x20, [sp,#16] 70: a8c37bfdldp x29, x30, [sp],#48 74: d65f03c0ret Add the possible error handle to free the net_header to remove the kmemleak warning Signed-off-by: Li RongQing Signed-off-by: David S. Miller https://jira.sw.ru/browse/PSBM-76924 (cherry picked from commit ce9d9b8e5c2b7486edf76958bcdb5e6534a915b0) Signed-off-by: Andrey Ryabinin --- net/sysctl_net.c | 6 +- 1 file changed, 5 insertions(+), 1 deletion(-) diff --git a/net/sysctl_net.c b/net/sysctl_net.c index 42279fd..62eb022 100644 --- a/net/sysctl_net.c +++ b/net/sysctl_net.c @@ -94,10 +94,14 @@ __init int net_sysctl_init(void) goto out; ret = register_pernet_subsys(_pernet_ops); if (ret) - goto out; + goto out1; register_sysctl_root(_sysctl_root); out: return ret; +out1: + unregister_sysctl_table(net_header); + net_header = NULL; + goto out; } struct ctl_table_header *register_net_sysctl(struct net *net,
[Devel] [PATCH RHEL7 COMMIT] ms/kernel/ucount.c: mark user_header with kmemleak_ignore()
The commit is pushed to "branch-rh7-3.10.0-693.1.1.vz7.37.x-ovz" and will appear at https://src.openvz.org/scm/ovz/vzkernel.git after rh7-3.10.0-693.1.1.vz7.37.28 --> commit 6c98d7d0b66f0edb435f13e2ce8b17623a3104d5 Author: Luis R. RodriguezDate: Wed Nov 15 16:14:56 2017 +0300 ms/kernel/ucount.c: mark user_header with kmemleak_ignore() The user_header gets caught by kmemleak with the following splat as missing a free: unreferenced object 0x99667a733d80 (size 96): comm "swapper/0", pid 1, jiffies 4294892317 (age 62191.468s) hex dump (first 32 bytes): a0 b6 92 b4 ff ff ff ff 00 00 00 00 01 00 00 00 01 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 backtrace: kmemleak_alloc+0x4a/0xa0 __kmalloc+0x144/0x260 __register_sysctl_table+0x54/0x5e0 register_sysctl+0x1b/0x20 user_namespace_sysctl_init+0x17/0x34 do_one_initcall+0x52/0x1a0 kernel_init_freeable+0x173/0x200 kernel_init+0xe/0x100 ret_from_fork+0x2c/0x40 The BUG_ON()s are intended to crash so no need to clean up after ourselves on error there. This is also a kernel/ subsys_init() we don't need a respective exit call here as this is never modular, so just white list it. Link: http://lkml.kernel.org/r/20170203211404.31458-1-mcg...@kernel.org Signed-off-by: Luis R. Rodriguez Cc: Eric W. Biederman Cc: Kees Cook Cc: Nikolay Borisov Cc: Serge Hallyn Cc: Jan Kara Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds https://jira.sw.ru/browse/PSBM-76924 (cherry picked from commit ed5bd7dc88edf4a4a9c67130742b1b59aa017a5f) Signed-off-by: Andrey Ryabinin --- kernel/ucount.c | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/kernel/ucount.c b/kernel/ucount.c index 4aea3f0..533f783 100644 --- a/kernel/ucount.c +++ b/kernel/ucount.c @@ -241,11 +241,10 @@ static __init int user_namespace_sysctl_init(void) * properly. */ user_header = register_sysctl("user", empty); + kmemleak_ignore(user_header); BUG_ON(!user_header); BUG_ON(!setup_userns_sysctls(_user_ns)); #endif return 0; } subsys_initcall(user_namespace_sysctl_init); - - ___ Devel mailing list Devel@openvz.org https://lists.openvz.org/mailman/listinfo/devel
[Devel] [PATCH RHEL7 COMMIT] ms/debugobjects: Make kmemleak ignore debug objects
The commit is pushed to "branch-rh7-3.10.0-693.1.1.vz7.37.x-ovz" and will appear at https://src.openvz.org/scm/ovz/vzkernel.git after rh7-3.10.0-693.1.1.vz7.37.28 --> commit 2c1cd8780a9a0a5a86da8a04b7d10119ce9b8362 Author: Waiman LongDate: Wed Nov 15 16:14:55 2017 +0300 ms/debugobjects: Make kmemleak ignore debug objects The allocated debug objects are either on the free list or in the hashed bucket lists. So they won't get lost. However if both debug objects and kmemleak are enabled and kmemleak scanning is done while some of the debug objects are transitioning from one list to the others, false negative reporting of memory leaks may happen for those objects. For example, [38687.275678] kmemleak: 12 new suspected memory leaks (see /sys/kernel/debug/kmemleak) unreferenced object 0x92e98aabeb68 (size 40): comm "ksmtuned", pid 4344, jiffies 4298403600 (age 906.430s) hex dump (first 32 bytes): 00 00 00 00 00 00 00 00 d0 bc db 92 e9 92 ff ff 01 00 00 00 00 00 00 00 38 36 8a 61 e9 92 ff ff 86.a backtrace: [] kmemleak_alloc+0x4a/0xa0 [] kmem_cache_alloc+0xe9/0x320 [] __debug_object_init+0x3e6/0x400 [] debug_object_activate+0x131/0x210 [] __call_rcu+0x3f/0x400 [] call_rcu_sched+0x1d/0x20 [] put_object+0x2c/0x40 [] __delete_object+0x3c/0x50 [] delete_object_full+0x1d/0x20 [] kmemleak_free+0x32/0x80 [] kmem_cache_free+0x77/0x350 [] unlink_anon_vmas+0x82/0x1e0 [] free_pgtables+0xa1/0x110 [] exit_mmap+0xc1/0x170 [] mmput+0x80/0x150 [] do_exit+0x2a9/0xd20 The references in the debug objects may also hide a real memory leak. As there is no point in having kmemleak to track debug object allocations, kmemleak checking is now disabled for debug objects. Signed-off-by: Waiman Long Signed-off-by: Thomas Gleixner Cc: Andrew Morton Link: http://lkml.kernel.org/r/1502718733-8527-1-git-send-email-long...@redhat.com https://jira.sw.ru/browse/PSBM-76924 (cherry picked from commit caba4cbbd27d755572730801ac34fe063fc40a32) Signed-off-by: Andrey Ryabinin --- init/main.c| 2 +- lib/debugobjects.c | 3 +++ 2 files changed, 4 insertions(+), 1 deletion(-) diff --git a/init/main.c b/init/main.c index dd890da..364d4a7 100644 --- a/init/main.c +++ b/init/main.c @@ -615,8 +615,8 @@ asmlinkage void __init start_kernel(void) } #endif page_cgroup_init(); - debug_objects_mem_init(); kmemleak_init(); + debug_objects_mem_init(); setup_per_cpu_pageset(); numa_policy_init(); if (late_time_init) diff --git a/lib/debugobjects.c b/lib/debugobjects.c index a8c4b2f..a1b85ba 100644 --- a/lib/debugobjects.c +++ b/lib/debugobjects.c @@ -14,6 +14,7 @@ #include #include #include +#include #define ODEBUG_HASH_BITS 14 #define ODEBUG_HASH_SIZE (1 << ODEBUG_HASH_BITS) @@ -106,6 +107,7 @@ static void fill_pool(void) if (!new) return; + kmemleak_ignore(new); raw_spin_lock_irqsave(_lock, flags); hlist_add_head(>node, _pool); debug_objects_alloc++; @@ -1047,6 +1049,7 @@ static int __init debug_objects_replace_static_objects(void) obj = kmem_cache_zalloc(obj_cache, GFP_KERNEL); if (!obj) goto free; + kmemleak_ignore(obj); hlist_add_head(>node, ); } ___ Devel mailing list Devel@openvz.org https://lists.openvz.org/mailman/listinfo/devel
[Devel] [PATCH RHEL7 COMMIT] ms/tty/vt: Fix the memory leak in visual_init
The commit is pushed to "branch-rh7-3.10.0-693.1.1.vz7.37.x-ovz" and will appear at https://src.openvz.org/scm/ovz/vzkernel.git after rh7-3.10.0-693.1.1.vz7.37.28 --> commit 1d2a545b8e82899fb95a3cc2b5145c8f79cafb35 Author: Dongxing ZhangDate: Wed Nov 15 16:14:57 2017 +0300 ms/tty/vt: Fix the memory leak in visual_init If vc->vc_uni_pagedir_loc is not NULL, its refcount needs to be decreased before vc_uni_pagedir_loc is re-assigned. unreferenced object 0x88002cdd13b0 (size 512): comm "setfont", pid 503, jiffies 4294896503 (age 722.828s) hex dump (first 32 bytes): 40 92 61 2b 00 88 ff ff 00 00 00 00 00 00 00 00 @.a+ 00 00 00 00 00 00 00 00 a0 ad 61 2b 00 88 ff ff ..a+ backtrace: [] kmemleak_alloc+0x4e/0xb0 [] kmem_cache_alloc_trace+0x1c8/0x240 [] con_do_clear_unimap.isra.2+0x83/0xe0 [] con_clear_unimap+0x22/0x40 [] vt_ioctl+0xeb8/0x1170 [] tty_ioctl+0x208/0xca0 [] do_vfs_ioctl+0x2f8/0x510 [] SyS_ioctl+0x81/0xa0 [] system_call_fastpath+0x16/0x75 [] 0x unreferenced object 0x88002b619240 (size 256): comm "setfont", pid 503, jiffies 4294896503 (age 722.828s) hex dump (first 32 bytes): 90 bc 84 d5 00 88 ff ff 58 85 84 d5 00 88 ff ff X... 88 ac 84 d5 00 88 ff ff e0 b1 84 d5 00 88 ff ff backtrace: [] kmemleak_alloc+0x4e/0xb0 [] kmem_cache_alloc_trace+0x1c8/0x240 [] con_insert_unipair+0x86/0x170 [] con_set_unimap+0x1b7/0x280 [] vt_ioctl+0xe65/0x1170 [] tty_ioctl+0x208/0xca0 [] do_vfs_ioctl+0x2f8/0x510 [] SyS_ioctl+0x81/0xa0 [] system_call_fastpath+0x16/0x75 [] 0x Signed-off-by: Dongxing Zhang Signed-off-by: Xiaoming Wang Reviewed-by: Peter Hurley Tested-by: Konstantin Khlebnikov Signed-off-by: Greg Kroah-Hartman https://jira.sw.ru/browse/PSBM-76924 (cherry picked from commit 08b33249d89700ba555d4ab5cc88714192b8ee46) Signed-off-by: Andrey Ryabinin --- drivers/tty/vt/vt.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/drivers/tty/vt/vt.c b/drivers/tty/vt/vt.c index 07c5666..fbc6290 100644 --- a/drivers/tty/vt/vt.c +++ b/drivers/tty/vt/vt.c @@ -742,6 +742,8 @@ static void visual_init(struct vc_data *vc, int num, int init) __module_get(vc->vc_sw->owner); vc->vc_num = num; vc->vc_display_fg = _display_fg; + if (vc->vc_uni_pagedir_loc) + con_free_unimap(vc); vc->vc_uni_pagedir_loc = >vc_uni_pagedir; vc->vc_uni_pagedir = 0; vc->vc_hi_font_mask = 0; ___ Devel mailing list Devel@openvz.org https://lists.openvz.org/mailman/listinfo/devel
[Devel] [PATCH] KVM: nVMX: do not fill vm_exit_intr_error_code in prepare_vmcs12
From: Paolo BonziniDo this in the caller of nested_vmx_vmexit instead. nested_vmx_check_exception was doing a vmwrite to the vmcs02's VM_EXIT_INTR_ERROR_CODE field, so that prepare_vmcs12 would move the field to vmcs12->vm_exit_intr_error_code. However that isn't possible on pre-Haswell machines. Moving the vmcs12 write to the callers fixes it. Reported-by: Jim Mattson Signed-off-by: Paolo Bonzini [Changed nested_vmx_reflect_vmexit() return type to (int)1 from (bool)1, thanks to fengguang...@intel.com] Signed-off-by: Radim Krčmář (cherry picked from commit 7313c698050387a11c21afb0c6b4c61f21f7c042) [rkagan: This is a missing part of the backport of async pagefault machinery from mainstream; hopefully it fixes #PSBM-77171] Signed-off-by: Roman Kagan --- arch/x86/kvm/vmx.c | 52 ++-- 1 file changed, 34 insertions(+), 18 deletions(-) diff --git a/arch/x86/kvm/vmx.c b/arch/x86/kvm/vmx.c index 1d5d577b9f42..4dd164eebce8 100644 --- a/arch/x86/kvm/vmx.c +++ b/arch/x86/kvm/vmx.c @@ -2291,7 +2291,7 @@ static int nested_vmx_check_exception(struct kvm_vcpu *vcpu) return 0; if (vcpu->arch.exception.nested_apf) { - vmcs_write32(VM_EXIT_INTR_ERROR_CODE, vcpu->arch.exception.error_code); + vmcs12->vm_exit_intr_error_code = vcpu->arch.exception.error_code; nested_vmx_vmexit(vcpu, EXIT_REASON_EXCEPTION_NMI, PF_VECTOR | INTR_TYPE_HARD_EXCEPTION | INTR_INFO_DELIVER_CODE_MASK | INTR_INFO_VALID_MASK, @@ -2299,6 +2299,7 @@ static int nested_vmx_check_exception(struct kvm_vcpu *vcpu) return 1; } + vmcs12->vm_exit_intr_error_code = vmcs_read32(VM_EXIT_INTR_ERROR_CODE); nested_vmx_vmexit(vcpu, EXIT_REASON_EXCEPTION_NMI, vmcs_read32(VM_EXIT_INTR_INFO), vmcs_readl(EXIT_QUALIFICATION)); @@ -2507,7 +2508,7 @@ static void nested_vmx_setup_ctls_msrs(struct vcpu_vmx *vmx) * reason is that if one of these bits is necessary, it will appear * in vmcs01 and prepare_vmcs02, when it bitwise-or's the control * fields of vmcs01 and vmcs02, will turn these bits off - and -* nested_vmx_exit_handled() will not pass related exits to L1. +* nested_vmx_exit_reflected() will not pass related exits to L1. * These rules have exceptions below. */ @@ -7709,12 +7710,11 @@ static bool nested_vmx_exit_handled_cr(struct kvm_vcpu *vcpu, * should handle it ourselves in L0 (and then continue L2). Only call this * when in is_guest_mode (L2). */ -static bool nested_vmx_exit_handled(struct kvm_vcpu *vcpu) +static bool nested_vmx_exit_reflected(struct kvm_vcpu *vcpu, u32 exit_reason) { u32 intr_info = vmcs_read32(VM_EXIT_INTR_INFO); struct vcpu_vmx *vmx = to_vmx(vcpu); struct vmcs12 *vmcs12 = get_vmcs12(vcpu); - u32 exit_reason = vmx->exit_reason; trace_kvm_nested_vmexit(kvm_rip_read(vcpu), exit_reason, vmcs_readl(EXIT_QUALIFICATION), @@ -7845,6 +7845,29 @@ static bool nested_vmx_exit_handled(struct kvm_vcpu *vcpu) } } +static int nested_vmx_reflect_vmexit(struct kvm_vcpu *vcpu, u32 exit_reason) +{ + u32 exit_intr_info = vmcs_read32(VM_EXIT_INTR_INFO); + + /* +* At this point, the exit interruption info in exit_intr_info +* is only valid for EXCEPTION_NMI exits. For EXTERNAL_INTERRUPT +* we need to query the in-kernel LAPIC. +*/ + WARN_ON(exit_reason == EXIT_REASON_EXTERNAL_INTERRUPT); + if ((exit_intr_info & +(INTR_INFO_VALID_MASK | INTR_INFO_DELIVER_CODE_MASK)) == + (INTR_INFO_VALID_MASK | INTR_INFO_DELIVER_CODE_MASK)) { + struct vmcs12 *vmcs12 = get_vmcs12(vcpu); + vmcs12->vm_exit_intr_error_code = + vmcs_read32(VM_EXIT_INTR_ERROR_CODE); + } + + nested_vmx_vmexit(vcpu, exit_reason, exit_intr_info, + vmcs_readl(EXIT_QUALIFICATION)); + return 1; +} + static void vmx_get_exit_info(struct kvm_vcpu *vcpu, u64 *info1, u64 *info2) { *info1 = vmcs_readl(EXIT_QUALIFICATION); @@ -8089,12 +8112,8 @@ static int vmx_handle_exit(struct kvm_vcpu *vcpu) if (vmx->emulation_required) return handle_invalid_guest_state(vcpu); - if (is_guest_mode(vcpu) && nested_vmx_exit_handled(vcpu)) { - nested_vmx_vmexit(vcpu, exit_reason, - vmcs_read32(VM_EXIT_INTR_INFO), - vmcs_readl(EXIT_QUALIFICATION)); - return 1; - } + if (is_guest_mode(vcpu) && nested_vmx_exit_reflected(vcpu, exit_reason)) + return nested_vmx_reflect_vmexit(vcpu,
[Devel] [PATCH RH7] nr_cpus: cut lines in /proc/net/softnet_stat to number of vcpus in CT
Counters are not virtualized, we just remove the leak of host's number of cpus. Done similar to /proc/cpuinfo, in case of cpu hotplug race with reading proc, can actually show arbitrary number of lines as we do no locking like get_online_cpus or cpuhp_lock_acquire_read, it seem we do so for the sake of simplicity. https://jira.sw.ru/browse/PSBM-73238 Signed-off-by: Pavel Tikhomirov--- net/core/net-procfs.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/net/core/net-procfs.c b/net/core/net-procfs.c index 0ea5ce99aec1..263edc6794b5 100644 --- a/net/core/net-procfs.c +++ b/net/core/net-procfs.c @@ -121,7 +121,8 @@ static struct softnet_data *softnet_get_online(loff_t *pos) while (*pos < nr_cpu_ids) if (cpu_online(*pos)) { - sd = _cpu(softnet_data, *pos); + if (__cpus_weight(cpu_online_mask, *pos) < num_online_vcpus()) + sd = _cpu(softnet_data, *pos); break; } else ++*pos; -- 2.13.6 ___ Devel mailing list Devel@openvz.org https://lists.openvz.org/mailman/listinfo/devel