[PATCH] rcu: remove surplus instrumentation_end in rcu_nmi_enter
In function rcu_nmi_enter, there is a surplus instrumentation_end in second branch of if statement, although objtool check -f vmlinux.o will not complain because of its inability to correctly cover all cases (objtool will visit the third branch first, which markes following trace_rcu_dyntick as visited), I think remove the surplus instrumentation_end will make the code better. Signed-off-by: Zhouyi Zhou --- kernel/rcu/tree.c | 1 - 1 file changed, 1 deletion(-) diff --git a/kernel/rcu/tree.c b/kernel/rcu/tree.c index 40e5e3dd253e..eaec6f6032c2 100644 --- a/kernel/rcu/tree.c +++ b/kernel/rcu/tree.c @@ -1021,7 +1021,6 @@ noinstr void rcu_nmi_enter(void) } else if (!in_nmi()) { instrumentation_begin(); rcu_irq_enter_check_tick(); - instrumentation_end(); } else { instrumentation_begin(); } -- 2.25.1
[PATCH] RCU: some improvements to comments of tree.c
During my study of RCU, I go through tree.c many times and try to make some improvements to the comments. Thanks a lot. Signed-off-by: Zhouyi Zhou --- kernel/rcu/tree.c | 28 ++-- 1 file changed, 14 insertions(+), 14 deletions(-) diff --git a/kernel/rcu/tree.c b/kernel/rcu/tree.c index da6f5213fb74..50c6b8fd8d08 100644 --- a/kernel/rcu/tree.c +++ b/kernel/rcu/tree.c @@ -201,7 +201,7 @@ EXPORT_SYMBOL_GPL(rcu_get_gp_kthreads_prio); * the need for long delays to increase some race probabilities with the * need for fast grace periods to increase other race probabilities. */ -#define PER_RCU_NODE_PERIOD 3 /* Number of grace periods between delays. */ +#define PER_RCU_NODE_PERIOD 3 /* Number of grace periods between delays, for debug purpose. */ /* * Compute the mask of online CPUs for the specified rcu_node structure. @@ -835,9 +835,9 @@ void noinstr rcu_irq_exit(void) /** * rcu_irq_exit_preempt - Inform RCU that current CPU is exiting irq - * towards in kernel preemption + * towards kernel preemption * - * Same as rcu_irq_exit() but has a sanity check that scheduling is safe + * Same as rcu_irq_exit() but has some sanity checks that scheduling is safe * from RCU point of view. Invoked from return from interrupt before kernel * preemption. */ @@ -959,7 +959,7 @@ EXPORT_SYMBOL_GPL(rcu_idle_exit); */ void noinstr rcu_user_exit(void) { - rcu_eqs_exit(1); + rcu_eqs_exit(true); } /** @@ -1226,7 +1226,7 @@ EXPORT_SYMBOL_GPL(rcu_lockdep_current_cpu_online); #endif /* #if defined(CONFIG_PROVE_RCU) && defined(CONFIG_HOTPLUG_CPU) */ /* - * We are reporting a quiescent state on behalf of some other CPU, so + * We may reporting a quiescent state on behalf of some other CPU, so * it is our responsibility to check for and handle potential overflow * of the rcu_node ->gp_seq counter with respect to the rcu_data counters. * After all, the CPU might be in deep idle state, and thus executing no @@ -1808,9 +1808,9 @@ static bool rcu_gp_init(void) return false; } - /* Advance to a new grace period and initialize state. */ - record_gp_stall_check_time(); /* Record GP times before starting GP, hence rcu_seq_start(). */ + record_gp_stall_check_time(); + /* Advance to a new grace period and initialize state. */ rcu_seq_start(&rcu_state.gp_seq); ASSERT_EXCLUSIVE_WRITER(rcu_state.gp_seq); trace_rcu_grace_period(rcu_state.name, rcu_state.gp_seq, TPS("start")); @@ -2630,7 +2630,7 @@ static void rcu_do_batch(struct rcu_data *rdp) * state, for example, user mode or idle loop. It also schedules RCU * core processing. If the current grace period has gone on too long, * it will ask the scheduler to manufacture a context switch for the sole - * purpose of providing a providing the needed quiescent state. + * purpose of providing the needed quiescent state. */ void rcu_sched_clock_irq(int user) { @@ -3260,7 +3260,7 @@ put_cached_bnode(struct kfree_rcu_cpu *krcp, /* * This function is invoked in workqueue context after a grace period. - * It frees all the objects queued on ->bhead_free or ->head_free. + * It frees all the objects queued on ->bkvhead_free or ->head_free. */ static void kfree_rcu_work(struct work_struct *work) { @@ -3287,7 +3287,7 @@ static void kfree_rcu_work(struct work_struct *work) krwp->head_free = NULL; raw_spin_unlock_irqrestore(&krcp->lock, flags); - // Handle two first channels. + // Handle first two channels. for (i = 0; i < FREE_N_CHANNELS; i++) { for (; bkvhead[i]; bkvhead[i] = bnext) { bnext = bkvhead[i]->next; @@ -3530,7 +3530,7 @@ kvfree_call_rcu_add_ptr_to_bulk(struct kfree_rcu_cpu *krcp, void *ptr) /* * Queue a request for lazy invocation of appropriate free routine after a - * grace period. Please note there are three paths are maintained, two are the + * grace period. Please note there are three paths maintained, two are the * main ones that use array of pointers interface and third one is emergency * one, that is used only when the main path can not be maintained temporary, * due to memory pressure. @@ -3813,7 +3813,7 @@ EXPORT_SYMBOL_GPL(cond_synchronize_rcu); /* * Check to see if there is any immediate RCU-related work to be done by - * the current CPU, returning 1 if so and zero otherwise. The checks are + * the current CPU, returning 1 if so and 0 otherwise. The checks are * in order of increasing expense: checks that can be carried out against * CPU-local state are performed first. However, we must check for CPU * stalls first, else we might not get a chance. @@ -4153,7 +4153,7 @@ int rcutree_online_cpu(unsigned int cpu) } /* - * Near the beginning of the process. The CPU is still very much alive + * Near the beginni
[PATCH] preempt/dynamic: fix typo in macro conditional statement
commit 40607ee97e4e ("preempt/dynamic: Provide irqentry_exit_cond_resched() static call") tried to provide irqentry_exit_cond_resched() static call in irqentry_exit, but has a typo in macro conditional statement. This patch fix this typo. Signed-off-by: Zhouyi Zhou --- kernel/entry/common.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/kernel/entry/common.c b/kernel/entry/common.c index 8442e5c9cfa2..2003d69bd6d5 100644 --- a/kernel/entry/common.c +++ b/kernel/entry/common.c @@ -422,7 +422,7 @@ noinstr void irqentry_exit(struct pt_regs *regs, irqentry_state_t state) instrumentation_begin(); if (IS_ENABLED(CONFIG_PREEMPTION)) { -#ifdef CONFIG_PREEMT_DYNAMIC +#ifdef CONFIG_PREEMPT_DYNAMIC static_call(irqentry_exit_cond_resched)(); #else irqentry_exit_cond_resched(); -- 2.25.1
[PATCH RFC] netfilter: cacheline aligning in struct netns_ct
not frequently changing components should share same cachelines Signed-off-by: Zhouyi Zhou --- include/net/netns/conntrack.h | 12 1 file changed, 8 insertions(+), 4 deletions(-) diff --git a/include/net/netns/conntrack.h b/include/net/netns/conntrack.h index fbcc7fa..69d2d58 100644 --- a/include/net/netns/conntrack.h +++ b/include/net/netns/conntrack.h @@ -65,6 +65,12 @@ struct nf_ip_net { struct netns_ct { atomic_tcount; unsigned intexpect_count; + struct hlist_nulls_head unconfirmed; + struct hlist_nulls_head dying; + struct hlist_nulls_head tmpl; + + /* not frequently changing components should share same cachelines */ + struct nf_ip_netnf_ct_proto cacheline_aligned_in_smp; #ifdef CONFIG_SYSCTL struct ctl_table_header *sysctl_header; struct ctl_table_header *acct_sysctl_header; @@ -86,13 +92,11 @@ struct netns_ct { struct kmem_cache *nf_conntrack_cachep; struct hlist_nulls_head *hash; struct hlist_head *expect_hash; - struct hlist_nulls_head unconfirmed; - struct hlist_nulls_head dying; - struct hlist_nulls_head tmpl; + struct ip_conntrack_stat __percpu *stat; struct nf_ct_event_notifier __rcu *nf_conntrack_event_cb; struct nf_exp_event_notifier __rcu *nf_expect_event_cb; - struct nf_ip_netnf_ct_proto; + #if defined(CONFIG_NF_CONNTRACK_LABELS) unsigned intlabels_used; u8 label_words; -- 1.7.10.4 -- To unsubscribe from this list: send the line "unsubscribe linux-kernel" in the body of a message to majord...@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html Please read the FAQ at http://www.tux.org/lkml/
Re: [PATCH RFC] netfilter: cacheline aligning in struct netns_ct
Thanks Jesper and David for reviewing and replying. Great commitment of Jesper for moving above three hlist_nulls_heads to percpu structure. Sorry to miss the newest advances in nf-next. And arbitrarily cache-line aligning structure members is not a good idea for large cache lines. Still I have an idea that could we pack the seldom changed members (nf_ct_proto included) to a structure and we allocated it from a cache aligned slub when the structure "net" is initialized. Zhouyi On Wed, Mar 12, 2014 at 5:36 PM, David Laight wrote: > From: Zhouyi Zhou >> not frequently changing components should share same cachelines >> >> Signed-off-by: Zhouyi Zhou >> --- >> include/net/netns/conntrack.h | 12 >> 1 file changed, 8 insertions(+), 4 deletions(-) >> >> diff --git a/include/net/netns/conntrack.h b/include/net/netns/conntrack.h >> index fbcc7fa..69d2d58 100644 >> --- a/include/net/netns/conntrack.h >> +++ b/include/net/netns/conntrack.h >> @@ -65,6 +65,12 @@ struct nf_ip_net { >> struct netns_ct { >> atomic_tcount; >> unsigned intexpect_count; >> + struct hlist_nulls_head unconfirmed; >> + struct hlist_nulls_head dying; >> + struct hlist_nulls_head tmpl; >> + >> + /* not frequently changing components should share same cachelines */ >> + struct nf_ip_netnf_ct_proto cacheline_aligned_in_smp; > > I'm not sure this has the effect you are trying to achieve. > Adding an __attribute__((aligned(n))) to a structure member not only > adds padding before the member, but also pads the member to a multiple > of the specified size. > > With large cache lines you probably want neither. > IIRC one of the ppc systems has something like 256 byte cache lines > (and if it doesn't now, it might have soon). > So arbitrarily cache-line aligning structure members may not be a good idea. > > In any case reducing the number of cache lines accessed, and the number > dirtied is probably more important than putting 'readonly' data in its > own cache lines. > > David > > > -- To unsubscribe from this list: send the line "unsubscribe linux-kernel" in the body of a message to majord...@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html Please read the FAQ at http://www.tux.org/lkml/
[PATCH] s390: correct misuses of module_put in appldata_generic_handler.
correct misuses of module_put in appldata_generic_handler Signed-off-by: Zhouyi Zhou --- arch/s390/appldata/appldata_base.c | 3 --- 1 file changed, 3 deletions(-) diff --git a/arch/s390/appldata/appldata_base.c b/arch/s390/appldata/appldata_base.c index 47c8630..683e0282 100644 --- a/arch/s390/appldata/appldata_base.c +++ b/arch/s390/appldata/appldata_base.c @@ -343,7 +343,6 @@ appldata_generic_handler(struct ctl_table *ctl, int write, // protect work queue callback if (!try_module_get(ops->owner)) { mutex_unlock(&appldata_ops_mutex); - module_put(ops->owner); return -ENODEV; } ops->callback(ops->data); // init record @@ -354,7 +353,6 @@ appldata_generic_handler(struct ctl_table *ctl, int write, if (rc != 0) { pr_err("Starting the data collection for %s " "failed with rc=%d\n", ops->name, rc); - module_put(ops->owner); } else ops->active = 1; } else if ((buf[0] == '0') && (ops->active == 1)) { @@ -365,7 +363,6 @@ appldata_generic_handler(struct ctl_table *ctl, int write, if (rc != 0) pr_err("Stopping the data collection for %s " "failed with rc=%d\n", ops->name, rc); - module_put(ops->owner); } mutex_unlock(&appldata_ops_mutex); out: -- 1.8.1.2 -- To unsubscribe from this list: send the line "unsubscribe linux-kernel" in the body of a message to majord...@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html Please read the FAQ at http://www.tux.org/lkml/
Re: Re: [PATCH] IOMMU: iommu module do not check NULL return of kmem_cache_zalloc
From: Zhouyi Zhou On Tue, 4 Mar 2014 16:41:18 +0100 Joerg Roedel wrote: > On Tue, Feb 11, 2014 at 10:12:53AM +0800, Zhouyi Zhou wrote: > > From: Zhouyi Zhou > > > > The function iopte_alloc do not check NULL return of kmem_cache_zalloc, > > call iopte_free with argument 0 will panic. > > > > Signed-off-by: Zhouyi Zhou > > --- > > drivers/iommu/omap-iommu.c |3 ++- > > 1 file changed, 2 insertions(+), 1 deletion(-) > > > > diff --git a/drivers/iommu/omap-iommu.c b/drivers/iommu/omap-iommu.c > > index bcd78a7..5155714 100644 > > --- a/drivers/iommu/omap-iommu.c > > +++ b/drivers/iommu/omap-iommu.c > > @@ -551,7 +551,8 @@ static u32 *iopte_alloc(struct omap_iommu *obj, u32 > > *iopgd, u32 da) > > dev_vdbg(obj->dev, "%s: a new pte:%p\n", __func__, iopte); > > } else { > > /* We raced, free the reduniovant table */ > > - iopte_free(iopte); > > + if (iopte) > > + iopte_free(iopte); > > Isn't it better to put the check into iopte_free? > > > Joerg > > Thanks Joerg for reviewing Checking the null pointer in iopte_free guarantees kmem_cache_free is happy all the time. Signed-off-by: Zhouyi Zhou Signed-off-by: Joerg Roedel --- drivers/iommu/omap-iommu.c |3 ++- 1 files changed, 2 insertions(+), 1 deletions(-) diff --git a/drivers/iommu/omap-iommu.c b/drivers/iommu/omap-iommu.c index bcd78a7..51211e8 100644 --- a/drivers/iommu/omap-iommu.c +++ b/drivers/iommu/omap-iommu.c @@ -523,7 +523,8 @@ static void flush_iopte_range(u32 *first, u32 *last) static void iopte_free(u32 *iopte) { /* Note: freed iopte's must be clean ready for re-use */ - kmem_cache_free(iopte_cachep, iopte); + if (iopte) + kmem_cache_free(iopte_cachep, iopte); } static u32 *iopte_alloc(struct omap_iommu *obj, u32 *iopgd, u32 da) -- To unsubscribe from this list: send the line "unsubscribe linux-kernel" in the body of a message to majord...@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html Please read the FAQ at http://www.tux.org/lkml/
[PATCH v2] IOMMU: iopte free should check wether input argument is NULL
From: Zhouyi Zhou iopte free should check wether input argument is NULL because kmem_cache_free will panic on NULL argument Signed-off-by: Zhouyi Zhou Signed-off-by: Joerg Roedel --- drivers/iommu/omap-iommu.c |3 ++- 1 files changed, 2 insertions(+), 1 deletions(-) diff --git a/drivers/iommu/omap-iommu.c b/drivers/iommu/omap-iommu.c index bcd78a7..51211e8 100644 --- a/drivers/iommu/omap-iommu.c +++ b/drivers/iommu/omap-iommu.c @@ -523,7 +523,8 @@ static void flush_iopte_range(u32 *first, u32 *last) static void iopte_free(u32 *iopte) { /* Note: freed iopte's must be clean ready for re-use */ - kmem_cache_free(iopte_cachep, iopte); + if (iopte) + kmem_cache_free(iopte_cachep, iopte); } static u32 *iopte_alloc(struct omap_iommu *obj, u32 *iopgd, u32 da) -- To unsubscribe from this list: send the line "unsubscribe linux-kernel" in the body of a message to majord...@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html Please read the FAQ at http://www.tux.org/lkml/
Re: [PATCH] s390: correct misuses of module_put in appldata_generic_handler.
Thanks Gerald for reviewing and sorry for not elaborated it in the e-mail. Firstly, I think you can't call module_put after fail try_module_get Secondly, there exists duplicate module_put on the program path (the last one is before return 0) On Mon, Mar 17, 2014 at 9:28 PM, Gerald Schaefer wrote: > On Sat, 15 Mar 2014 21:35:40 +0800 > Zhouyi Zhou wrote: > >> correct misuses of module_put in appldata_generic_handler > > Sorry, I don't see any misuse, could you elaborate? > >> >> Signed-off-by: Zhouyi Zhou >> --- >> arch/s390/appldata/appldata_base.c | 3 --- >> 1 file changed, 3 deletions(-) >> >> diff --git a/arch/s390/appldata/appldata_base.c >> b/arch/s390/appldata/appldata_base.c >> index 47c8630..683e0282 100644 >> --- a/arch/s390/appldata/appldata_base.c >> +++ b/arch/s390/appldata/appldata_base.c >> @@ -343,7 +343,6 @@ appldata_generic_handler(struct ctl_table *ctl, int >> write, >> // protect work queue callback >> if (!try_module_get(ops->owner)) { >> mutex_unlock(&appldata_ops_mutex); >> - module_put(ops->owner); >> return -ENODEV; >> } >> ops->callback(ops->data); // init record >> @@ -354,7 +353,6 @@ appldata_generic_handler(struct ctl_table *ctl, int >> write, >> if (rc != 0) { >> pr_err("Starting the data collection for %s " >> "failed with rc=%d\n", ops->name, rc); >> - module_put(ops->owner); >> } else >> ops->active = 1; >> } else if ((buf[0] == '0') && (ops->active == 1)) { >> @@ -365,7 +363,6 @@ appldata_generic_handler(struct ctl_table *ctl, int >> write, >> if (rc != 0) >> pr_err("Stopping the data collection for %s " >> "failed with rc=%d\n", ops->name, rc); >> - module_put(ops->owner); >> } >> mutex_unlock(&appldata_ops_mutex); >> out: > -- To unsubscribe from this list: send the line "unsubscribe linux-kernel" in the body of a message to majord...@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html Please read the FAQ at http://www.tux.org/lkml/
Re: [PATCH] s390: correct misuses of module_put in appldata_generic_handler.
Delicate design! thanks for replying, sorry for the trouble On Tue, Mar 18, 2014 at 9:34 PM, Gerald Schaefer wrote: > On Tue, 18 Mar 2014 09:24:55 +0800 > Zhouyi Zhou wrote: > >> Thanks Gerald for reviewing and sorry for not elaborated it in the e-mail. >> >> Firstly, I think you can't call module_put after fail try_module_get > > There is another try_module_get() in this function before that, so I need > to call module_put() once if the second try_module_get() fails. > > The first try_module_get() is only needed within the function, while the > second one is needed to prevent a module from being unloaded while its > callback is registered, so that reference is kept as long as the module > is active. > >> >> Secondly, there exists duplicate module_put on the program path (the last >> one is before return 0) > > There is one module_put() for every try_module_get(). If "1" is written > to the sysctl, the function exits with one "missing" module_put(), which > is held to protect the callback. When "0" is written, this reference > is returned again with an "extra" module_put(). > >> >> On Mon, Mar 17, 2014 at 9:28 PM, Gerald Schaefer >> wrote: >> > On Sat, 15 Mar 2014 21:35:40 +0800 >> > Zhouyi Zhou wrote: >> > >> >> correct misuses of module_put in appldata_generic_handler >> > >> > Sorry, I don't see any misuse, could you elaborate? >> > >> >> >> >> Signed-off-by: Zhouyi Zhou >> >> --- >> >> arch/s390/appldata/appldata_base.c | 3 --- >> >> 1 file changed, 3 deletions(-) >> >> >> >> diff --git a/arch/s390/appldata/appldata_base.c >> >> b/arch/s390/appldata/appldata_base.c >> >> index 47c8630..683e0282 100644 >> >> --- a/arch/s390/appldata/appldata_base.c >> >> +++ b/arch/s390/appldata/appldata_base.c >> >> @@ -343,7 +343,6 @@ appldata_generic_handler(struct ctl_table *ctl, int >> >> write, >> >> // protect work queue callback >> >> if (!try_module_get(ops->owner)) { >> >> mutex_unlock(&appldata_ops_mutex); >> >> - module_put(ops->owner); >> >> return -ENODEV; >> >> } >> >> ops->callback(ops->data); // init record >> >> @@ -354,7 +353,6 @@ appldata_generic_handler(struct ctl_table *ctl, int >> >> write, >> >> if (rc != 0) { >> >> pr_err("Starting the data collection for %s " >> >> "failed with rc=%d\n", ops->name, rc); >> >> - module_put(ops->owner); >> >> } else >> >> ops->active = 1; >> >> } else if ((buf[0] == '0') && (ops->active == 1)) { >> >> @@ -365,7 +363,6 @@ appldata_generic_handler(struct ctl_table *ctl, int >> >> write, >> >> if (rc != 0) >> >> pr_err("Stopping the data collection for %s " >> >> "failed with rc=%d\n", ops->name, rc); >> >> - module_put(ops->owner); >> >> } >> >> mutex_unlock(&appldata_ops_mutex); >> >> out: >> > >> -- >> To unsubscribe from this list: send the line "unsubscribe linux-s390" in >> the body of a message to majord...@vger.kernel.org >> More majordomo info at http://vger.kernel.org/majordomo-info.html >> > -- To unsubscribe from this list: send the line "unsubscribe linux-kernel" in the body of a message to majord...@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html Please read the FAQ at http://www.tux.org/lkml/
[PATCH] IOMMU: iommu module do not check NULL return of kmem_cache_zalloc
From: Zhouyi Zhou The function iopte_alloc do not check NULL return of kmem_cache_zalloc, call iopte_free with argument 0 will panic. Signed-off-by: Zhouyi Zhou --- drivers/iommu/omap-iommu.c |3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/drivers/iommu/omap-iommu.c b/drivers/iommu/omap-iommu.c index bcd78a7..5155714 100644 --- a/drivers/iommu/omap-iommu.c +++ b/drivers/iommu/omap-iommu.c @@ -551,7 +551,8 @@ static u32 *iopte_alloc(struct omap_iommu *obj, u32 *iopgd, u32 da) dev_vdbg(obj->dev, "%s: a new pte:%p\n", __func__, iopte); } else { /* We raced, free the reduniovant table */ - iopte_free(iopte); + if (iopte) + iopte_free(iopte); } pte_ready: -- 1.7.10.4 -- To unsubscribe from this list: send the line "unsubscribe linux-kernel" in the body of a message to majord...@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html Please read the FAQ at http://www.tux.org/lkml/
[PATCH 1/1] kernel code that do not handle NULL return of kmem_cache_zalloc
I do a grep for kmem_cache_zalloc and kmem_cache_alloc in kernel tree, and find some code do not handle NULL return of kmem_cache_zalloc correctly Signed-off-by: Zhouyi Zhou --- arch/powerpc/kvm/book3s_32_mmu_host.c |5 + drivers/iommu/omap-iommu.c|3 ++- fs/jffs2/malloc.c |4 3 files changed, 11 insertions(+), 1 deletion(-) diff --git a/arch/powerpc/kvm/book3s_32_mmu_host.c b/arch/powerpc/kvm/book3s_32_mmu_host.c index 3a0abd2..5fac89d 100644 --- a/arch/powerpc/kvm/book3s_32_mmu_host.c +++ b/arch/powerpc/kvm/book3s_32_mmu_host.c @@ -243,6 +243,11 @@ next_pteg: /* Now tell our Shadow PTE code about the new page */ pte = kvmppc_mmu_hpte_cache_next(vcpu); + if (!pte) { + kvm_release_pfn_clean(hpaddr >> PAGE_SHIFT); + r = -EAGAIN; + goto out; + } dprintk_mmu("KVM: %c%c Map 0x%llx: [%lx] 0x%llx (0x%llx) -> %lx\n", orig_pte->may_write ? 'w' : '-', diff --git a/drivers/iommu/omap-iommu.c b/drivers/iommu/omap-iommu.c index bcd78a7..5155714 100644 --- a/drivers/iommu/omap-iommu.c +++ b/drivers/iommu/omap-iommu.c @@ -551,7 +551,8 @@ static u32 *iopte_alloc(struct omap_iommu *obj, u32 *iopgd, u32 da) dev_vdbg(obj->dev, "%s: a new pte:%p\n", __func__, iopte); } else { /* We raced, free the reduniovant table */ - iopte_free(iopte); + if (iopte) + iopte_free(iopte); } pte_ready: diff --git a/fs/jffs2/malloc.c b/fs/jffs2/malloc.c index 4f47aa2..58e2336 100644 --- a/fs/jffs2/malloc.c +++ b/fs/jffs2/malloc.c @@ -287,6 +287,8 @@ struct jffs2_xattr_datum *jffs2_alloc_xattr_datum(void) { struct jffs2_xattr_datum *xd; xd = kmem_cache_zalloc(xattr_datum_cache, GFP_KERNEL); + if (!xd) + return NULL; dbg_memalloc("%p\n", xd); xd->class = RAWNODE_CLASS_XATTR_DATUM; @@ -305,6 +307,8 @@ struct jffs2_xattr_ref *jffs2_alloc_xattr_ref(void) { struct jffs2_xattr_ref *ref; ref = kmem_cache_zalloc(xattr_ref_cache, GFP_KERNEL); + if (!ref) + return NULL; dbg_memalloc("%p\n", ref); ref->class = RAWNODE_CLASS_XATTR_REF; -- 1.7.10.4 -- To unsubscribe from this list: send the line "unsubscribe linux-kernel" in the body of a message to majord...@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html Please read the FAQ at http://www.tux.org/lkml/
ping: re: [PATCH 1/1] kernel code that do not handle NULL return of kmem_cache_zalloc
ping > I do a grep for kmem_cache_zalloc and kmem_cache_alloc > in kernel tree, and find some code do not handle NULL > return of kmem_cache_zalloc correctly > > > Signed-off-by: Zhouyi Zhou > --- > arch/powerpc/kvm/book3s_32_mmu_host.c |5 + > drivers/iommu/omap-iommu.c|3 ++- > fs/jffs2/malloc.c |4 > 3 files changed, 11 insertions(+), 1 deletion(-) > > diff --git a/arch/powerpc/kvm/book3s_32_mmu_host.c >b/arch/powerpc/kvm/book3s_32_mmu_host.c > index 3a0abd2..5fac89d 100644 > --- a/arch/powerpc/kvm/book3s_32_mmu_host.c > +++ b/arch/powerpc/kvm/book3s_32_mmu_host.c > @@ -243,6 +243,11 @@ next_pteg: > /* Now tell our Shadow PTE code about the new page */ > > pte = kvmppc_mmu_hpte_cache_next(vcpu); > + if (!pte) { > + kvm_release_pfn_clean(hpaddr >> PAGE_SHIFT); > + r = -EAGAIN; > + goto out; > + } > > dprintk_mmu("KVM: %c%c Map 0x%llx: [%lx] 0x%llx (0x%llx) -> %lx\n", > orig_pte->may_write ? 'w' : '-', > diff --git a/drivers/iommu/omap-iommu.c b/drivers/iommu/omap-iommu.c > index bcd78a7..5155714 100644 > --- a/drivers/iommu/omap-iommu.c > +++ b/drivers/iommu/omap-iommu.c > @@ -551,7 +551,8 @@ static u32 *iopte_alloc(struct omap_iommu *obj, u32 >*iopgd, u32 da) > dev_vdbg(obj->dev, "%s: a new pte:%p\n", __func__, iopte); > } else { > /* We raced, free the reduniovant table */ > - iopte_free(iopte); > + if (iopte) > + iopte_free(iopte); > } > > pte_ready: > diff --git a/fs/jffs2/malloc.c b/fs/jffs2/malloc.c > index 4f47aa2..58e2336 100644 > --- a/fs/jffs2/malloc.c > +++ b/fs/jffs2/malloc.c > @@ -287,6 +287,8 @@ struct jffs2_xattr_datum *jffs2_alloc_xattr_datum(void) > { > struct jffs2_xattr_datum *xd; > xd = kmem_cache_zalloc(xattr_datum_cache, GFP_KERNEL); > + if (!xd) > + return NULL; > dbg_memalloc("%p\n", xd); > > xd->class = RAWNODE_CLASS_XATTR_DATUM; > @@ -305,6 +307,8 @@ struct jffs2_xattr_ref *jffs2_alloc_xattr_ref(void) > { > struct jffs2_xattr_ref *ref; > ref = kmem_cache_zalloc(xattr_ref_cache, GFP_KERNEL); > + if (!ref) > + return NULL; > dbg_memalloc("%p\n", ref); > > ref->class = RAWNODE_CLASS_XATTR_REF; > -- > 1.7.10.4 > -- To unsubscribe from this list: send the line "unsubscribe linux-kernel" in the body of a message to majord...@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html Please read the FAQ at http://www.tux.org/lkml/
[PATCH] KVM: NULL return of kvmppc_mmu_hpte_cache_next should be handled
NULL return of kvmppc_mmu_hpte_cache_next should be handled Signed-off-by: Zhouyi Zhou --- arch/powerpc/kvm/book3s_32_mmu_host.c |5 + 1 file changed, 5 insertions(+) diff --git a/arch/powerpc/kvm/book3s_32_mmu_host.c b/arch/powerpc/kvm/book3s_32_mmu_host.c index 3a0abd2..5fac89d 100644 --- a/arch/powerpc/kvm/book3s_32_mmu_host.c +++ b/arch/powerpc/kvm/book3s_32_mmu_host.c @@ -243,6 +243,11 @@ next_pteg: /* Now tell our Shadow PTE code about the new page */ pte = kvmppc_mmu_hpte_cache_next(vcpu); + if (!pte) { + kvm_release_pfn_clean(hpaddr >> PAGE_SHIFT); + r = -EAGAIN; + goto out; + } dprintk_mmu("KVM: %c%c Map 0x%llx: [%lx] 0x%llx (0x%llx) -> %lx\n", orig_pte->may_write ? 'w' : '-', -- 1.7.10.4 -- To unsubscribe from this list: send the line "unsubscribe linux-kernel" in the body of a message to majord...@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html Please read the FAQ at http://www.tux.org/lkml/
[PATCH 1/1] NULL return of kmem_cache_zalloc should be handled
if iopte is NULL, iopte_free should not be called. Signed-off-by: Zhouyi Zhou --- drivers/iommu/omap-iommu.c |3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/drivers/iommu/omap-iommu.c b/drivers/iommu/omap-iommu.c index bcd78a7..5155714 100644 --- a/drivers/iommu/omap-iommu.c +++ b/drivers/iommu/omap-iommu.c @@ -551,7 +551,8 @@ static u32 *iopte_alloc(struct omap_iommu *obj, u32 *iopgd, u32 da) dev_vdbg(obj->dev, "%s: a new pte:%p\n", __func__, iopte); } else { /* We raced, free the reduniovant table */ - iopte_free(iopte); + if (iopte) + iopte_free(iopte); } pte_ready: -- 1.7.10.4 -- To unsubscribe from this list: send the line "unsubscribe linux-kernel" in the body of a message to majord...@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html Please read the FAQ at http://www.tux.org/lkml/
[PATCH] JFFS2: NULL return of kmem_cache_zalloc should be handled.
NULL return of kmem_cache_zalloc should be handled in jffs2_alloc_xattr_datum and jff2_alloc_xattr_ref. Signed-off-by: Zhouyi Zhou --- fs/jffs2/malloc.c |4 1 file changed, 4 insertions(+) diff --git a/fs/jffs2/malloc.c b/fs/jffs2/malloc.c index 4f47aa2..b8fd651 100644 --- a/fs/jffs2/malloc.c +++ b/fs/jffs2/malloc.c @@ -288,6 +288,8 @@ struct jffs2_xattr_datum *jffs2_alloc_xattr_datum(void) struct jffs2_xattr_datum *xd; xd = kmem_cache_zalloc(xattr_datum_cache, GFP_KERNEL); dbg_memalloc("%p\n", xd); + if (!xd) + return NULL; xd->class = RAWNODE_CLASS_XATTR_DATUM; xd->node = (void *)xd; @@ -306,6 +308,8 @@ struct jffs2_xattr_ref *jffs2_alloc_xattr_ref(void) struct jffs2_xattr_ref *ref; ref = kmem_cache_zalloc(xattr_ref_cache, GFP_KERNEL); dbg_memalloc("%p\n", ref); + if (!ref) + return NULL; ref->class = RAWNODE_CLASS_XATTR_REF; ref->node = (void *)ref; -- 1.7.10.4 -- To unsubscribe from this list: send the line "unsubscribe linux-kernel" in the body of a message to majord...@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html Please read the FAQ at http://www.tux.org/lkml/
[PATCH net-next] pktgen: add ttl option for pktgen
I think it is useful to add ttl option for pktgen, for example if a some ISP want to test its network quality, it could set ttl so that the tested links get the packet while end users won't get it. Also, add a blank line after declarations in pktgen.c Signed-off-by: Zhouyi Zhou --- Documentation/networking/pktgen.txt |2 +- net/core/pktgen.c | 24 2 files changed, 25 insertions(+), 1 deletions(-) diff --git a/Documentation/networking/pktgen.txt b/Documentation/networking/pktgen.txt index 0dffc6e..abad388 100644 --- a/Documentation/networking/pktgen.txt +++ b/Documentation/networking/pktgen.txt @@ -180,7 +180,7 @@ Examples: pgset "vlan_id " > 4095 remove vlan and svlan tags pgset "svlan " > 4095 remove svlan tag - + pgset "ttl xx" set former IPv4 TTL field (default 32) pgset "tos XX" set former IPv4 TOS field (e.g. "tos 28" for AF11 no ECN, default 00) pgset "traffic_class XX" set former IPv6 TRAFFIC CLASS (e.g. "traffic_class B8" for EF no ECN, default 00) diff --git a/net/core/pktgen.c b/net/core/pktgen.c index 8b849dd..54cb750 100644 --- a/net/core/pktgen.c +++ b/net/core/pktgen.c @@ -318,6 +318,8 @@ struct pktgen_dev { __u16 udp_dst_min; /* inclusive, dest UDP port */ __u16 udp_dst_max; /* exclusive, dest UDP port */ + __u8 ttl;/* time to live */ + /* DSCP + ECN */ __u8 tos;/* six MSB of (former) IPv4 TOS are for dscp codepoint */ @@ -606,6 +608,8 @@ static int pktgen_if_show(struct seq_file *seq, void *v) pkt_dev->svlan_id, pkt_dev->svlan_p, pkt_dev->svlan_cfi); + seq_printf(seq, " ttl: 0x%02x\n", pkt_dev->ttl); + if (pkt_dev->tos) seq_printf(seq, " tos: 0x%02x\n", pkt_dev->tos); @@ -1667,8 +1671,26 @@ static ssize_t pktgen_if_write(struct file *file, return count; } + if (!strcmp(name, "ttl")) { + __u32 tmp_value = 0; + + len = hex32_arg(&user_buffer[i], 2, &tmp_value); + if (len < 0) + return len; + + i += len; + if (len == 2) { + pkt_dev->ttl = tmp_value; + sprintf(pg_result, "OK: ttl=0x%02x", pkt_dev->ttl); + } else { + sprintf(pg_result, "ERROR: tos must be 00-ff"); + } + return count; + } + if (!strcmp(name, "tos")) { __u32 tmp_value = 0; + len = hex32_arg(&user_buffer[i], 2, &tmp_value); if (len < 0) return len; @@ -1685,6 +1707,7 @@ static ssize_t pktgen_if_write(struct file *file, if (!strcmp(name, "traffic_class")) { __u32 tmp_value = 0; + len = hex32_arg(&user_buffer[i], 2, &tmp_value); if (len < 0) return len; @@ -3558,6 +3581,7 @@ static int pktgen_add_device(struct pktgen_thread *t, const char *ifname) pkt_dev->udp_src_max = 9; pkt_dev->udp_dst_min = 9; pkt_dev->udp_dst_max = 9; + pkt_dev->ttl = 32; pkt_dev->vlan_p = 0; pkt_dev->vlan_cfi = 0; pkt_dev->vlan_id = 0x; -- 1.7.1 -- To unsubscribe from this list: send the line "unsubscribe linux-kernel" in the body of a message to majord...@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html Please read the FAQ at http://www.tux.org/lkml/
Re: Re: [PATCH net-next] pktgen: add ttl option for pktgen
Thanks for reviewing. I did miss it. > -Original Messages- > From: "Dmitry Popov" > Sent Time: Wednesday, July 30, 2014 > To: "Zhouyi Zhou" > Cc: rdun...@infradead.org, da...@davemloft.net, mini...@googlemail.com, > bro...@redhat.com, steffen.klass...@secunet.com, fan...@windriver.com, > dbork...@redhat.com, fengguang...@intel.com, tg...@suug.ch, > linux-...@vger.kernel.org, linux-kernel@vger.kernel.org, > net...@vger.kernel.org, "Zhouyi Zhou" > Subject: Re: [PATCH net-next] pktgen: add ttl option for pktgen > > On Wed, 30 Jul 2014 16:21:26 +0800 > Zhouyi Zhou wrote: > > > I think it is useful to add ttl option for pktgen, for example > > if a some ISP want to test its network quality, it could set > > ttl so that the tested links get the packet while end users won't > > get it. > > It seems you're missing > > --- a/net/core/pktgen.c > +++ b/net/core/pktgen.c > @@ -2799,7 +2799,7 @@ static struct sk_buff *fill_packet_ipv4(struct > net_device *odev, > > iph->ihl = 5; > iph->version = 4; > - iph->ttl = 32; > + iph->ttl = pkt_dev->ttl; > iph->tos = pkt_dev->tos; > iph->protocol = IPPROTO_UDP;/* UDP */ > iph->saddr = pkt_dev->cur_saddr; -- To unsubscribe from this list: send the line "unsubscribe linux-kernel" in the body of a message to majord...@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html Please read the FAQ at http://www.tux.org/lkml/
[PATCH] netdev: pktgen xmit packet through vlan interface
As http://www.spinics.net/lists/netdev/msg165015.html pktgen generates shared packet through vlan interface will cause oops because of duplicate entering tc queue. Try to solve this problem by means of packet clone instead of sharing. Signed-off-by: Zhouyi Zhou --- net/core/pktgen.c | 20 +--- 1 files changed, 17 insertions(+), 3 deletions(-) diff --git a/net/core/pktgen.c b/net/core/pktgen.c index 0304f98..ced07fc 100644 --- a/net/core/pktgen.c +++ b/net/core/pktgen.c @@ -3297,6 +3297,7 @@ static void pktgen_xmit(struct pktgen_dev *pkt_dev) netdev_tx_t (*xmit)(struct sk_buff *, struct net_device *) = odev->netdev_ops->ndo_start_xmit; struct netdev_queue *txq; + struct sk_buff *nskb = NULL; u16 queue_map; int ret; @@ -3347,8 +3348,18 @@ static void pktgen_xmit(struct pktgen_dev *pkt_dev) pkt_dev->last_ok = 0; goto unlock; } - atomic_inc(&(pkt_dev->skb->users)); - ret = (*xmit)(pkt_dev->skb, odev); + + if (pkt_dev->clone_skb && is_vlan_dev(odev)) { + nskb = skb_clone(pkt_dev->skb, GFP_ATOMIC); + ret = -ENOMEM; + if (nskb) + ret = (*xmit)(nskb, odev); + else + nskb = ERR_PTR(ret); + } else { + atomic_inc(&(pkt_dev->skb->users)); + ret = (*xmit)(pkt_dev->skb, odev); + } switch (ret) { case NETDEV_TX_OK: @@ -3372,7 +3383,10 @@ static void pktgen_xmit(struct pktgen_dev *pkt_dev) case NETDEV_TX_LOCKED: case NETDEV_TX_BUSY: /* Retry it next time */ - atomic_dec(&(pkt_dev->skb->users)); + if (nskb && !IS_ERR(nskb)) + kfree_skb(nskb); + else + atomic_dec(&(pkt_dev->skb->users)); pkt_dev->last_ok = 0; } unlock: -- 1.7.1 -- To unsubscribe from this list: send the line "unsubscribe linux-kernel" in the body of a message to majord...@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html Please read the FAQ at http://www.tux.org/lkml/
Re: [PATCH] netdev: pktgen xmit packet through vlan interface
Thanks for reviewing On Fri, May 2, 2014 at 10:00 PM, John Fastabend wrote: > On 5/2/2014 6:19 AM, Jesper Dangaard Brouer wrote: >> >> On Fri, 2 May 2014 15:18:12 +0800 >> Zhouyi Zhou wrote: >> >>> As http://www.spinics.net/lists/netdev/msg165015.html >>> pktgen generates shared packet through vlan interface will cause >>> oops because of duplicate entering tc queue. >>> >>> Try to solve this problem by means of packet clone instead of sharing. >> >> >> I really don't like adding this stuff to the fast path of pktgen. >> >> Why would you use pktgen on a VLAN? I use pktgen on a VLAN under a special circumstance. > > > Its a good way to test qdiscs. When you run pktgen over the VLAN > you exercise the lower devices qdisc. > > Although I never submitted a patch like this because I figured it > was a corner case and we would want to keep the hotpath clean. > I think is_vlan_dev(odev) have great chances to be in a cache line that seldom got reset. Could we only judge is_vlan_dev(odev) in my first if statement? Also I guess if (nskb && !IS_ERR(nskb)) happens in rarely reached cases when the interface is not overload. > >> >> Why don't you use the "vlan_id" feature available in pktgen, and send >> in the lower real device? Could we automatically convert the VLAN device's joining effort to real device + vlan_id settings in pkt_dev. or could we issue a warning or a suggestion when user try to add a VLAN device to the pktgen thread? >> >> >> >>> Signed-off-by: Zhouyi Zhou >>> --- >>> net/core/pktgen.c | 20 +--- >>> 1 files changed, 17 insertions(+), 3 deletions(-) >>> >>> diff --git a/net/core/pktgen.c b/net/core/pktgen.c >>> index 0304f98..ced07fc 100644 >>> --- a/net/core/pktgen.c >>> +++ b/net/core/pktgen.c >>> @@ -3297,6 +3297,7 @@ static void pktgen_xmit(struct pktgen_dev *pkt_dev) >>> netdev_tx_t (*xmit)(struct sk_buff *, struct net_device *) >>> = odev->netdev_ops->ndo_start_xmit; >>> struct netdev_queue *txq; >>> + struct sk_buff *nskb = NULL; >>> u16 queue_map; >>> int ret; >>> >>> @@ -3347,8 +3348,18 @@ static void pktgen_xmit(struct pktgen_dev >>> *pkt_dev) >>> pkt_dev->last_ok = 0; >>> goto unlock; >>> } >>> - atomic_inc(&(pkt_dev->skb->users)); >>> - ret = (*xmit)(pkt_dev->skb, odev); >>> + >>> + if (pkt_dev->clone_skb && is_vlan_dev(odev)) { >>> + nskb = skb_clone(pkt_dev->skb, GFP_ATOMIC); >>> + ret = -ENOMEM; >>> + if (nskb) >>> + ret = (*xmit)(nskb, odev); >>> + else >>> + nskb = ERR_PTR(ret); >>> + } else { >>> + atomic_inc(&(pkt_dev->skb->users)); >>> + ret = (*xmit)(pkt_dev->skb, odev); >>> + } >>> >>> switch (ret) { >>> case NETDEV_TX_OK: >>> @@ -3372,7 +3383,10 @@ static void pktgen_xmit(struct pktgen_dev >>> *pkt_dev) >>> case NETDEV_TX_LOCKED: >>> case NETDEV_TX_BUSY: >>> /* Retry it next time */ >>> - atomic_dec(&(pkt_dev->skb->users)); >>> + if (nskb && !IS_ERR(nskb)) >>> + kfree_skb(nskb); >>> + else >>> + atomic_dec(&(pkt_dev->skb->users)); >>> pkt_dev->last_ok = 0; >>> } >>> unlock: >> >> >> >> > -- To unsubscribe from this list: send the line "unsubscribe linux-kernel" in the body of a message to majord...@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html Please read the FAQ at http://www.tux.org/lkml/
Re: [PATCH] netdev: pktgen xmit packet through vlan interface
Thank Sergei for reviewing. I think On Sat, May 3, 2014 at 12:18 AM, Sergei Shtylyov wrote: >> + >> + if (pkt_dev->clone_skb && is_vlan_dev(odev)) { >> + nskb = skb_clone(pkt_dev->skb, GFP_ATOMIC); >> + ret = -ENOMEM; >> + if (nskb) >> + ret = (*xmit)(nskb, odev); if (is_vlan_dev(odev) && pkt_dev->clone_skb) { nskb = skb_clone(pkt_dev->skb, GFP_ATOMIC); ret = -ENOMEM; if (nskb) ret = (*xmit)(nskb, odev); } > and case NETDEV_TX_LOCKED: case NETDEV_TX_BUSY: /* Retry it next time */ atomic_dec(&(pkt_dev->skb->users)); if (is_vlan_dev(odev) && pkt_dev->clone_skb && nskb) kfree_skb(nskb); else atomic_dec(&(pkt_dev->skb->users)); is better, because is_vlan_dev(odev) is probably in read most cache line. Zhouyi -- To unsubscribe from this list: send the line "unsubscribe linux-kernel" in the body of a message to majord...@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html Please read the FAQ at http://www.tux.org/lkml/
[PATCH 1/1] powerpc/iommu: Handling null return of kzalloc_node
NULL return of kzalloc_node should be handled Signed-off-by: Zhouyi Zhou --- arch/powerpc/platforms/pseries/iommu.c | 22 -- 1 file changed, 20 insertions(+), 2 deletions(-) diff --git a/arch/powerpc/platforms/pseries/iommu.c b/arch/powerpc/platforms/pseries/iommu.c index 33b552f..593cd3d 100644 --- a/arch/powerpc/platforms/pseries/iommu.c +++ b/arch/powerpc/platforms/pseries/iommu.c @@ -613,7 +613,11 @@ static void pci_dma_bus_setup_pSeries(struct pci_bus *bus) tbl = kzalloc_node(sizeof(struct iommu_table), GFP_KERNEL, pci->phb->node); - + if (!tbl) { + pr_debug(" out of memory, can't create iommu_table !\n"); + return; + } + iommu_table_setparms(pci->phb, dn, tbl); pci->iommu_table = iommu_init_table(tbl, pci->phb->node); iommu_register_group(tbl, pci_domain_nr(bus), 0); @@ -659,6 +663,10 @@ static void pci_dma_bus_setup_pSeriesLP(struct pci_bus *bus) if (!ppci->iommu_table) { tbl = kzalloc_node(sizeof(struct iommu_table), GFP_KERNEL, ppci->phb->node); + if (!tbl) { + pr_debug(" out of memory, can't create iommu_table !\n"); + return; + } iommu_table_setparms_lpar(ppci->phb, pdn, tbl, dma_window); ppci->iommu_table = iommu_init_table(tbl, ppci->phb->node); iommu_register_group(tbl, pci_domain_nr(bus), 0); @@ -686,6 +694,11 @@ static void pci_dma_dev_setup_pSeries(struct pci_dev *dev) pr_debug(" --> first child, no bridge. Allocating iommu table.\n"); tbl = kzalloc_node(sizeof(struct iommu_table), GFP_KERNEL, phb->node); + if (!tbl) { + pr_debug(" out of memory, can't create iommu_table !\n"); + return; + } + iommu_table_setparms(phb, dn, tbl); PCI_DN(dn)->iommu_table = iommu_init_table(tbl, phb->node); iommu_register_group(tbl, pci_domain_nr(phb->bus), 0); @@ -1102,6 +1116,10 @@ static void pci_dma_dev_setup_pSeriesLP(struct pci_dev *dev) if (!pci->iommu_table) { tbl = kzalloc_node(sizeof(struct iommu_table), GFP_KERNEL, pci->phb->node); + if (!tbl) { + pr_debug(" out of memory, can't create iommu_table !\n"); + return; + } iommu_table_setparms_lpar(pci->phb, pdn, tbl, dma_window); pci->iommu_table = iommu_init_table(tbl, pci->phb->node); iommu_register_group(tbl, pci_domain_nr(pci->phb->bus), 0); -- 1.7.10.4 -- To unsubscribe from this list: send the line "unsubscribe linux-kernel" in the body of a message to majord...@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html Please read the FAQ at http://www.tux.org/lkml/
[PATCH] x86/tlb_uv: Fixing some memory allocation failure in x86 UV
Fixing some memory allocation failure handling in x86 UV Signed-off-by: Zhouyi Zhou --- arch/x86/platform/uv/tlb_uv.c | 17 + 1 file changed, 18 insertions(+) diff --git a/arch/x86/platform/uv/tlb_uv.c b/arch/x86/platform/uv/tlb_uv.c index dfe605a..a434768 100644 --- a/arch/x86/platform/uv/tlb_uv.c +++ b/arch/x86/platform/uv/tlb_uv.c @@ -1965,6 +1966,10 @@ static void make_per_cpu_thp(struct bau_control *smaster) size_t hpsz = sizeof(struct hub_and_pnode) * num_possible_cpus(); smaster->thp = kmalloc_node(hpsz, GFP_KERNEL, smaster->osnode); + if (!smaster->thp) { + pr_err("ERROR: out of memory, can not create hub and pnode!\n"); + return; + } memset(smaster->thp, 0, hpsz); for_each_present_cpu(cpu) { smaster->thp[cpu].pnode = uv_cpu_hub_info(cpu)->pnode; @@ -1980,6 +1985,8 @@ static void make_per_hub_cpumask(struct bau_control *hmaster) int sz = sizeof(cpumask_t); hmaster->cpumask = kzalloc_node(sz, GFP_KERNEL, hmaster->osnode); + if (!hmaster->cpumask) + pr_err("ERROR: out of memory, can not create cpumask!\n"); } /* @@ -2056,11 +2063,15 @@ static int __init summarize_uvhub_sockets(int nuvhubs, if (scan_sock(sdp, bdp, &smaster, &hmaster)) return 1; make_per_cpu_thp(smaster); + if (!smaster->thp) + return 1; } socket++; socket_mask = (socket_mask >> 1); } make_per_hub_cpumask(hmaster); + if (!hmaster->cpumask) + return 1; } return 0; } @@ -2077,9 +2088,16 @@ static int __init init_per_cpu(int nuvhubs, int base_part_pnode) timeout_us = calculate_destination_timeout(); vp = kmalloc(nuvhubs * sizeof(struct uvhub_desc), GFP_KERNEL); + if (!vp) + return 1; + uvhub_descs = (struct uvhub_desc *)vp; memset(uvhub_descs, 0, nuvhubs * sizeof(struct uvhub_desc)); uvhub_mask = kzalloc((nuvhubs+7)/8, GFP_KERNEL); + if (!uvhub_mask) { + kfree(uvhub_descs); + return 1; + } if (get_cpu_topology(base_part_pnode, uvhub_descs, uvhub_mask)) goto fail; -- 1.7.10.4 -- To unsubscribe from this list: send the line "unsubscribe linux-kernel" in the body of a message to majord...@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html Please read the FAQ at http://www.tux.org/lkml/
[PATCH 1/1] perf/amd: NULL return of kzalloc_node should be handled
Signed-off-by: Zhouyi Zhou --- arch/x86/kernel/cpu/perf_event_amd_uncore.c | 32 +++ 1 file changed, 18 insertions(+), 14 deletions(-) diff --git a/arch/x86/kernel/cpu/perf_event_amd_uncore.c b/arch/x86/kernel/cpu/perf_event_amd_uncore.c index 3bbdf4c..f60a50e 100644 --- a/arch/x86/kernel/cpu/perf_event_amd_uncore.c +++ b/arch/x86/kernel/cpu/perf_event_amd_uncore.c @@ -300,24 +300,28 @@ static void amd_uncore_cpu_up_prepare(unsigned int cpu) if (amd_uncore_nb) { uncore = amd_uncore_alloc(cpu); - uncore->cpu = cpu; - uncore->num_counters = NUM_COUNTERS_NB; - uncore->rdpmc_base = RDPMC_BASE_NB; - uncore->msr_base = MSR_F15H_NB_PERF_CTL; - uncore->active_mask = &amd_nb_active_mask; - uncore->pmu = &amd_nb_pmu; - *per_cpu_ptr(amd_uncore_nb, cpu) = uncore; + if (uncore) { + uncore->cpu = cpu; + uncore->num_counters = NUM_COUNTERS_NB; + uncore->rdpmc_base = RDPMC_BASE_NB; + uncore->msr_base = MSR_F15H_NB_PERF_CTL; + uncore->active_mask = &amd_nb_active_mask; + uncore->pmu = &amd_nb_pmu; + *per_cpu_ptr(amd_uncore_nb, cpu) = uncore; + } } if (amd_uncore_l2) { uncore = amd_uncore_alloc(cpu); - uncore->cpu = cpu; - uncore->num_counters = NUM_COUNTERS_L2; - uncore->rdpmc_base = RDPMC_BASE_L2; - uncore->msr_base = MSR_F16H_L2I_PERF_CTL; - uncore->active_mask = &amd_l2_active_mask; - uncore->pmu = &amd_l2_pmu; - *per_cpu_ptr(amd_uncore_l2, cpu) = uncore; + if (uncore) { + uncore->cpu = cpu; + uncore->num_counters = NUM_COUNTERS_L2; + uncore->rdpmc_base = RDPMC_BASE_L2; + uncore->msr_base = MSR_F16H_L2I_PERF_CTL; + uncore->active_mask = &amd_l2_active_mask; + uncore->pmu = &amd_l2_pmu; + *per_cpu_ptr(amd_uncore_l2, cpu) = uncore; + } } } -- 1.7.10.4 -- To unsubscribe from this list: send the line "unsubscribe linux-kernel" in the body of a message to majord...@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html Please read the FAQ at http://www.tux.org/lkml/
Re: [PATCH 1/1] perf/amd: NULL return of kzalloc_node should be handled
Thanks Peter for reviewing, and sorry for not think it thoroughly before submitting. Is that ok that when amd_uncore_cpu_up_prepare is called from amd_uncore_cpu_notifier I return notifier_from_errno(-ENOMEM), and when amd_uncore_cpu_up_prepare is called from amd_uncore_init I immediately return -ENODEV? On Tue, Jun 10, 2014 at 4:22 PM, Peter Zijlstra wrote: > On Tue, Jun 10, 2014 at 03:37:38PM +0800, Zhouyi Zhou wrote: > > Less typing more thinking, this is wrong. You should fail when the > allocation fails. -- To unsubscribe from this list: send the line "unsubscribe linux-kernel" in the body of a message to majord...@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html Please read the FAQ at http://www.tux.org/lkml/
[RFC PATCH] perf/amd: Try to fix some mem allocation failure handling
This is version 2.0 of "[PATCH 1/1] perf/amd: NULL return of kzalloc_node should be handled" (http://www.spinics.net/lists/kernel/msg1760689.html), Try to correctly handle mem allocation failure in perf_event_amd_uncore.c Signed-off-by: Zhouyi Zhou --- arch/x86/kernel/cpu/perf_event_amd_uncore.c | 36 --- 1 file changed, 33 insertions(+), 3 deletions(-) diff --git a/arch/x86/kernel/cpu/perf_event_amd_uncore.c b/arch/x86/kernel/cpu/perf_event_amd_uncore.c index 3bbdf4c..bdc8e49 100644 --- a/arch/x86/kernel/cpu/perf_event_amd_uncore.c +++ b/arch/x86/kernel/cpu/perf_event_amd_uncore.c @@ -294,12 +294,14 @@ static struct amd_uncore *amd_uncore_alloc(unsigned int cpu) cpu_to_node(cpu)); } -static void amd_uncore_cpu_up_prepare(unsigned int cpu) +static int amd_uncore_cpu_up_prepare(unsigned int cpu) { struct amd_uncore *uncore; if (amd_uncore_nb) { uncore = amd_uncore_alloc(cpu); + if (!uncore) + return 1; uncore->cpu = cpu; uncore->num_counters = NUM_COUNTERS_NB; uncore->rdpmc_base = RDPMC_BASE_NB; @@ -311,6 +313,11 @@ static void amd_uncore_cpu_up_prepare(unsigned int cpu) if (amd_uncore_l2) { uncore = amd_uncore_alloc(cpu); + if (!uncore) { + if (amd_uncore_nb) + kfree(*per_cpu_ptr(amd_uncore_nb, cpu)); + return 1; + } uncore->cpu = cpu; uncore->num_counters = NUM_COUNTERS_L2; uncore->rdpmc_base = RDPMC_BASE_L2; @@ -319,6 +326,8 @@ static void amd_uncore_cpu_up_prepare(unsigned int cpu) uncore->pmu = &amd_l2_pmu; *per_cpu_ptr(amd_uncore_l2, cpu) = uncore; } + + return 0; } static struct amd_uncore * @@ -461,7 +470,8 @@ amd_uncore_cpu_notifier(struct notifier_block *self, unsigned long action, switch (action & ~CPU_TASKS_FROZEN) { case CPU_UP_PREPARE: - amd_uncore_cpu_up_prepare(cpu); + if (amd_uncore_cpu_up_prepare(cpu)) + return notifier_from_errno(-ENOMEM), break; case CPU_STARTING: @@ -514,6 +524,8 @@ static int __init amd_uncore_init(void) if (cpu_has_perfctr_nb) { amd_uncore_nb = alloc_percpu(struct amd_uncore *); + if (!amd_uncore_nb) + return -ENOMEM; perf_pmu_register(&amd_nb_pmu, amd_nb_pmu.name, -1); printk(KERN_INFO "perf: AMD NB counters detected\n"); @@ -522,6 +534,13 @@ static int __init amd_uncore_init(void) if (cpu_has_perfctr_l2) { amd_uncore_l2 = alloc_percpu(struct amd_uncore *); + if (!amd_uncore_l2) { + if (cpu_has_perfctr_nb) { + perf_pmu_unregister(&amd_nb_pmu); + free_percpu(amd_uncore_nb); + } + return -ENOMEM; + } perf_pmu_register(&amd_l2_pmu, amd_l2_pmu.name, -1); printk(KERN_INFO "perf: AMD L2I counters detected\n"); @@ -535,7 +554,18 @@ static int __init amd_uncore_init(void) /* init cpus already online before registering for hotplug notifier */ for_each_online_cpu(cpu) { - amd_uncore_cpu_up_prepare(cpu); + if (amd_uncore_cpu_up_prepare(cpu)) { + if (cpu_has_perfctr_nb) { + perf_pmu_unregister(&amd_nb_pmu); + free_percpu(amd_uncore_nb); + } + if (cpu_has_perfctr_l2) { + perf_pmu_unregister(&amd_l2_pmu); + free_percpu(amd_uncore_l2); + } + cpu_notifier_register_done(); + return -ENOMEM; + } smp_call_function_single(cpu, init_cpu_already_online, NULL, 1); } -- 1.7.10.4 -- To unsubscribe from this list: send the line "unsubscribe linux-kernel" in the body of a message to majord...@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html Please read the FAQ at http://www.tux.org/lkml/
Re: [PATCH] x86/tlb_uv: Fixing some memory allocation failure in x86 UV
Thanks for reviewing, I will work on a new version On Wed, Jun 11, 2014 at 7:28 AM, Thomas Gleixner wrote: > On Tue, 10 Jun 2014, H. Peter Anvin wrote: > >> On 06/10/2014 12:35 AM, Zhouyi Zhou wrote: >> > Fixing some memory allocation failure handling in x86 UV >> > >> > Signed-off-by: Zhouyi Zhou >> >> Sorry, this really isn't enough description for this size of a patch. > > Correction: This is not a proper description for any patch. > > "some" is wrong to begin with. > > Either we fix a particular issue or we address all of them, but "some" > means: We fixed a few, but we did not care about the rest. > > Aside of that, I agree. The changelog is disjunct from the patch > itself. > > Thanks, > > tglx > > -- To unsubscribe from this list: send the line "unsubscribe linux-kernel" in the body of a message to majord...@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html Please read the FAQ at http://www.tux.org/lkml/
[RFC PATCH v2] perf/amd: Try to fix some mem allocation failure handling
According to Peter's advice, put the failure handling to a goto chain. Compiled in x86_64, could you check if there is anything that I missed. Signed-off-by: Zhouyi Zhou --- arch/x86/kernel/cpu/perf_event_amd_uncore.c | 111 --- 1 file changed, 84 insertions(+), 27 deletions(-) diff --git a/arch/x86/kernel/cpu/perf_event_amd_uncore.c b/arch/x86/kernel/cpu/perf_event_amd_uncore.c index 3bbdf4c..30790d7 100644 --- a/arch/x86/kernel/cpu/perf_event_amd_uncore.c +++ b/arch/x86/kernel/cpu/perf_event_amd_uncore.c @@ -294,31 +294,41 @@ static struct amd_uncore *amd_uncore_alloc(unsigned int cpu) cpu_to_node(cpu)); } -static void amd_uncore_cpu_up_prepare(unsigned int cpu) +static int amd_uncore_cpu_up_prepare(unsigned int cpu) { - struct amd_uncore *uncore; + struct amd_uncore *uncore_nb = NULL, *uncore_l2; if (amd_uncore_nb) { - uncore = amd_uncore_alloc(cpu); - uncore->cpu = cpu; - uncore->num_counters = NUM_COUNTERS_NB; - uncore->rdpmc_base = RDPMC_BASE_NB; - uncore->msr_base = MSR_F15H_NB_PERF_CTL; - uncore->active_mask = &amd_nb_active_mask; - uncore->pmu = &amd_nb_pmu; - *per_cpu_ptr(amd_uncore_nb, cpu) = uncore; + uncore_nb = amd_uncore_alloc(cpu); + if (!uncore_nb) + goto fail; + uncore_nb->cpu = cpu; + uncore_nb->num_counters = NUM_COUNTERS_NB; + uncore_nb->rdpmc_base = RDPMC_BASE_NB; + uncore_nb->msr_base = MSR_F15H_NB_PERF_CTL; + uncore_nb->active_mask = &amd_nb_active_mask; + uncore_nb->pmu = &amd_nb_pmu; + *per_cpu_ptr(amd_uncore_nb, cpu) = uncore_nb; } if (amd_uncore_l2) { - uncore = amd_uncore_alloc(cpu); - uncore->cpu = cpu; - uncore->num_counters = NUM_COUNTERS_L2; - uncore->rdpmc_base = RDPMC_BASE_L2; - uncore->msr_base = MSR_F16H_L2I_PERF_CTL; - uncore->active_mask = &amd_l2_active_mask; - uncore->pmu = &amd_l2_pmu; - *per_cpu_ptr(amd_uncore_l2, cpu) = uncore; + uncore_l2 = amd_uncore_alloc(cpu); + if (!uncore_l2) + goto fail; + uncore_l2->cpu = cpu; + uncore_l2->num_counters = NUM_COUNTERS_L2; + uncore_l2->rdpmc_base = RDPMC_BASE_L2; + uncore_l2->msr_base = MSR_F16H_L2I_PERF_CTL; + uncore_l2->active_mask = &amd_l2_active_mask; + uncore_l2->pmu = &amd_l2_pmu; + *per_cpu_ptr(amd_uncore_l2, cpu) = uncore_l2; } + + return 0; + +fail: + kfree(uncore_nb); + return -ENOMEM; } static struct amd_uncore * @@ -441,7 +451,7 @@ static void uncore_dead(unsigned int cpu, struct amd_uncore * __percpu *uncores) if (!--uncore->refcnt) kfree(uncore); - *per_cpu_ptr(amd_uncore_nb, cpu) = NULL; + *per_cpu_ptr(uncores, cpu) = NULL; } static void amd_uncore_cpu_dead(unsigned int cpu) @@ -461,7 +471,8 @@ amd_uncore_cpu_notifier(struct notifier_block *self, unsigned long action, switch (action & ~CPU_TASKS_FROZEN) { case CPU_UP_PREPARE: - amd_uncore_cpu_up_prepare(cpu); + if (amd_uncore_cpu_up_prepare(cpu)) + return notifier_from_errno(-ENOMEM); break; case CPU_STARTING: @@ -501,20 +512,33 @@ static void __init init_cpu_already_online(void *dummy) amd_uncore_cpu_online(cpu); } +static void cleanup_cpu_online(void *dummy) +{ + unsigned int cpu = smp_processor_id(); + + amd_uncore_cpu_dead(cpu); +} + static int __init amd_uncore_init(void) { - unsigned int cpu; + unsigned int cpu, cpu2; int ret = -ENODEV; if (boot_cpu_data.x86_vendor != X86_VENDOR_AMD) - return -ENODEV; + goto fail_nodev; if (!cpu_has_topoext) - return -ENODEV; + goto fail_nodev; if (cpu_has_perfctr_nb) { amd_uncore_nb = alloc_percpu(struct amd_uncore *); - perf_pmu_register(&amd_nb_pmu, amd_nb_pmu.name, -1); + if (!amd_uncore_nb) { + ret = -ENOMEM; + goto fail_nb; + } + ret = perf_pmu_register(&amd_nb_pmu, amd_nb_pmu.name, -1); + if (ret) + goto fail_nb; printk(KERN_INFO "perf: AMD NB counters detected\n"); ret = 0; @@ -522,20 +546,28 @@ static int __init amd_uncore_init(void) if (cpu_has_perfctr_l2) { amd_
[PATCH 1/1] perf/amd: NULL return of kzalloc_node should be handled
Signed-off-by: Zhouyi Zhou --- arch/x86/kernel/cpu/perf_event_amd_uncore.c | 32 +++ 1 file changed, 18 insertions(+), 14 deletions(-) diff --git a/arch/x86/kernel/cpu/perf_event_amd_uncore.c b/arch/x86/kernel/cpu/perf_event_amd_uncore.c index 3bbdf4c..f60a50e 100644 --- a/arch/x86/kernel/cpu/perf_event_amd_uncore.c +++ b/arch/x86/kernel/cpu/perf_event_amd_uncore.c @@ -300,24 +300,28 @@ static void amd_uncore_cpu_up_prepare(unsigned int cpu) if (amd_uncore_nb) { uncore = amd_uncore_alloc(cpu); - uncore->cpu = cpu; - uncore->num_counters = NUM_COUNTERS_NB; - uncore->rdpmc_base = RDPMC_BASE_NB; - uncore->msr_base = MSR_F15H_NB_PERF_CTL; - uncore->active_mask = &amd_nb_active_mask; - uncore->pmu = &amd_nb_pmu; - *per_cpu_ptr(amd_uncore_nb, cpu) = uncore; + if (uncore) { + uncore->cpu = cpu; + uncore->num_counters = NUM_COUNTERS_NB; + uncore->rdpmc_base = RDPMC_BASE_NB; + uncore->msr_base = MSR_F15H_NB_PERF_CTL; + uncore->active_mask = &amd_nb_active_mask; + uncore->pmu = &amd_nb_pmu; + *per_cpu_ptr(amd_uncore_nb, cpu) = uncore; + } } if (amd_uncore_l2) { uncore = amd_uncore_alloc(cpu); - uncore->cpu = cpu; - uncore->num_counters = NUM_COUNTERS_L2; - uncore->rdpmc_base = RDPMC_BASE_L2; - uncore->msr_base = MSR_F16H_L2I_PERF_CTL; - uncore->active_mask = &amd_l2_active_mask; - uncore->pmu = &amd_l2_pmu; - *per_cpu_ptr(amd_uncore_l2, cpu) = uncore; + if (uncore) { + uncore->cpu = cpu; + uncore->num_counters = NUM_COUNTERS_L2; + uncore->rdpmc_base = RDPMC_BASE_L2; + uncore->msr_base = MSR_F16H_L2I_PERF_CTL; + uncore->active_mask = &amd_l2_active_mask; + uncore->pmu = &amd_l2_pmu; + *per_cpu_ptr(amd_uncore_l2, cpu) = uncore; + } } } -- 1.7.10.4 -- To unsubscribe from this list: send the line "unsubscribe linux-kernel" in the body of a message to majord...@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html Please read the FAQ at http://www.tux.org/lkml/
[PATCH v2]x86/tlb_uv: Fixing all memory allocation failure handling in UV
According to Peter, Thomas, Joe and David's advices, try fixing all memory allocation failure handling in tlb_uv.c compiled in x86_64 Signed-off-by: Zhouyi Zhou --- arch/x86/platform/uv/tlb_uv.c | 122 + 1 file changed, 87 insertions(+), 35 deletions(-) diff --git a/arch/x86/platform/uv/tlb_uv.c b/arch/x86/platform/uv/tlb_uv.c index dfe605a..1a45dfa 100644 --- a/arch/x86/platform/uv/tlb_uv.c +++ b/arch/x86/platform/uv/tlb_uv.c @@ -1680,7 +1680,7 @@ static int __init uv_ptc_init(void) /* * Initialize the sending side's sending buffers. */ -static void activation_descriptor_init(int node, int pnode, int base_pnode) +static int activation_descriptor_init(int node, int pnode, int base_pnode) { int i; int cpu; @@ -1701,7 +1701,9 @@ static void activation_descriptor_init(int node, int pnode, int base_pnode) */ dsize = sizeof(struct bau_desc) * ADP_SZ * ITEMS_PER_DESC; bau_desc = kmalloc_node(dsize, GFP_KERNEL, node); - BUG_ON(!bau_desc); + if (!bau_desc) + return -ENOMEM; + gpa = uv_gpa(bau_desc); n = uv_gpa_to_gnode(gpa); @@ -1756,6 +1758,8 @@ static void activation_descriptor_init(int node, int pnode, int base_pnode) bcp = &per_cpu(bau_control, cpu); bcp->descriptor_base = bau_desc; } + + return 0; } /* @@ -1764,7 +1768,7 @@ static void activation_descriptor_init(int node, int pnode, int base_pnode) * - node is first node (kernel memory notion) on the uvhub * - pnode is the uvhub's physical identifier */ -static void pq_init(int node, int pnode) +static int pq_init(int node, int pnode) { int cpu; size_t plsize; @@ -1776,12 +1780,16 @@ static void pq_init(int node, int pnode) unsigned long last; struct bau_pq_entry *pqp; struct bau_control *bcp; + int ret = 0; plsize = (DEST_Q_SIZE + 1) * sizeof(struct bau_pq_entry); vp = kmalloc_node(plsize, GFP_KERNEL, node); - pqp = (struct bau_pq_entry *)vp; - BUG_ON(!pqp); + if (!vp) { + ret = -ENOMEM; + goto fail; + } + pqp = (struct bau_pq_entry *)vp; cp = (char *)pqp + 31; pqp = (struct bau_pq_entry *)(((unsigned long)cp >> 5) << 5); @@ -1808,29 +1816,40 @@ static void pq_init(int node, int pnode) /* in effect, all msg_type's are set to MSG_NOOP */ memset(pqp, 0, sizeof(struct bau_pq_entry) * DEST_Q_SIZE); + + return 0; +fail: + return ret; } /* * Initialization of each UV hub's structures */ -static void __init init_uvhub(int uvhub, int vector, int base_pnode) +static int __init init_uvhub(int uvhub, int vector, int base_pnode) { int node; int pnode; unsigned long apicid; + int ret; node = uvhub_to_first_node(uvhub); pnode = uv_blade_to_pnode(uvhub); - activation_descriptor_init(node, pnode, base_pnode); + ret = activation_descriptor_init(node, pnode, base_pnode); + if (ret) + return ret; - pq_init(node, pnode); + ret = pq_init(node, pnode); + if (ret) + return ret; /* * The below initialization can't be in firmware because the * messaging IRQ will be determined by the OS. */ apicid = uvhub_to_first_apicid(uvhub) | uv_apicid_hibits; write_mmr_data_config(pnode, ((apicid << 32) | vector)); + + return 0; } /* @@ -1926,7 +1945,7 @@ static int __init get_cpu_topology(int base_pnode, printk(KERN_EMERG "cpu %d pnode %d-%d beyond %d; BAU disabled\n", cpu, pnode, base_pnode, UV_DISTRIBUTION_SIZE); - return 1; + return -EINVAL; } bcp->osnode = cpu_to_node(cpu); @@ -1950,7 +1969,7 @@ static int __init get_cpu_topology(int base_pnode, if (sdp->num_cpus > MAX_CPUS_PER_SOCKET) { printk(KERN_EMERG "%d cpus per socket invalid\n", sdp->num_cpus); - return 1; + return -EINVAL; } } return 0; @@ -1959,29 +1978,29 @@ static int __init get_cpu_topology(int base_pnode, /* * Each socket is to get a local array of pnodes/hubs. */ -static void make_per_cpu_thp(struct bau_control *smaster) +static int make_per_cpu_thp(struct bau_control *smaster) { int cpu; size_t hpsz = sizeof(struct hub_and_pnode) * num_possible_cpus(); - + int ret; smaster->thp = kmalloc_node(hpsz, GFP_KERNEL, smaster->osnode); + if (!smaster->thp) { + ret = -ENOMEM; + goto fail; + } + mem
Re: [PATCH] netdev: pktgen xmit packet through vlan interface
> You could just force pktgen to not support multi-skb on vlan interfaces? > > I thought we went through this a year or two ago and came up with > something like a 'pktgen-challenged' network interface flag? Ah yes, IFF_TX_SKB_SHARING does the job, very sorry for missing that, as matter of fact, I have tailed pktgen model for my personal use. By the way, would skb_clone save the CPU cycles for memset(skb_put(skb, datalen), 0, datalen) thing ? especially for Jesper's qdisc test scenery. > > Thanks, > Ben > > > -- > Ben Greear > Candela Technologies Inc http://www.candelatech.com > -- To unsubscribe from this list: send the line "unsubscribe linux-kernel" in the body of a message to majord...@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html Please read the FAQ at http://www.tux.org/lkml/
[RFC PATCH] x86_64: double the x86_64 kernel stack size?
Somethings I compiled the Linux network modules (especially bridge and netfilter) without optimization, the kernel always crashes because of exhausted kernel stack. The similar problem has been discussed in http://lists.linuxfoundation.org/pipermail/bridge/2005-January/004402.html Is it OK the double the x86_64 kernel stack size? Signed-off-by: Zhouyi Zhou Tested-by: Zhouyi Zhou --- arch/x86/include/asm/page_64_types.h |2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/arch/x86/include/asm/page_64_types.h b/arch/x86/include/asm/page_64_types.h index 43dcd80..dc4c629 100644 --- a/arch/x86/include/asm/page_64_types.h +++ b/arch/x86/include/asm/page_64_types.h @@ -1,7 +1,7 @@ #ifndef _ASM_X86_PAGE_64_DEFS_H #define _ASM_X86_PAGE_64_DEFS_H -#define THREAD_SIZE_ORDER 1 +#define THREAD_SIZE_ORDER 2 #define THREAD_SIZE (PAGE_SIZE << THREAD_SIZE_ORDER) #define CURRENT_MASK (~(THREAD_SIZE - 1)) -- 1.7.10.4 -- To unsubscribe from this list: send the line "unsubscribe linux-kernel" in the body of a message to majord...@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html Please read the FAQ at http://www.tux.org/lkml/
[PATCH 1/1] PERF: The tail position of the event buffer should only be modified after actually use that event.
The tail position of the event buffer should only be modified after actually use that event. If not the event buffer could be invalid before use, and segment fault occurs when invoking perf top -G. Signed-off-by: Zhouyi Zhou --- tools/perf/builtin-kvm.c |4 tools/perf/builtin-top.c | 11 +-- tools/perf/builtin-trace.c|4 tools/perf/tests/code-reading.c |1 + tools/perf/tests/keep-tracking.c |1 + tools/perf/tests/mmap-basic.c |4 tools/perf/tests/open-syscall-tp-fields.c |7 ++- tools/perf/tests/perf-record.c|3 +++ tools/perf/tests/perf-time-to-tsc.c |5 - tools/perf/tests/sw-clock.c |7 ++- tools/perf/tests/task-exit.c |5 - tools/perf/util/evlist.c | 13 +++-- tools/perf/util/evlist.h |2 ++ tools/perf/util/python.c |7 ++- 14 files changed, 64 insertions(+), 8 deletions(-) diff --git a/tools/perf/builtin-kvm.c b/tools/perf/builtin-kvm.c index 935d522..e240846 100644 --- a/tools/perf/builtin-kvm.c +++ b/tools/perf/builtin-kvm.c @@ -888,16 +888,20 @@ static s64 perf_kvm__mmap_read_idx(struct perf_kvm_stat *kvm, int idx, while ((event = perf_evlist__mmap_read(kvm->evlist, idx)) != NULL) { err = perf_evlist__parse_sample(kvm->evlist, event, &sample); if (err) { + perf_evlist__mmap_write_tail(kvm->evlist, idx); pr_err("Failed to parse sample\n"); return -1; } err = perf_session_queue_event(kvm->session, event, &sample, 0); if (err) { + perf_evlist__mmap_write_tail(kvm->evlist, idx); pr_err("Failed to enqueue sample: %d\n", err); return -1; } + perf_evlist__mmap_write_tail(kvm->evlist, idx); + /* save time stamp of our first sample for this mmap */ if (n == 0) *mmap_time = sample.time; diff --git a/tools/perf/builtin-top.c b/tools/perf/builtin-top.c index 2122141..5e03cf5 100644 --- a/tools/perf/builtin-top.c +++ b/tools/perf/builtin-top.c @@ -809,6 +809,7 @@ static void perf_top__mmap_read_idx(struct perf_top *top, int idx) while ((event = perf_evlist__mmap_read(top->evlist, idx)) != NULL) { ret = perf_evlist__parse_sample(top->evlist, event, &sample); if (ret) { + perf_evlist__mmap_write_tail(top->evlist, idx); pr_err("Can't parse sample, err = %d\n", ret); continue; } @@ -824,14 +825,18 @@ static void perf_top__mmap_read_idx(struct perf_top *top, int idx) switch (origin) { case PERF_RECORD_MISC_USER: ++top->us_samples; - if (top->hide_user_symbols) + if (top->hide_user_symbols) { + perf_evlist__mmap_write_tail(top->evlist, idx); continue; + } machine = &session->machines.host; break; case PERF_RECORD_MISC_KERNEL: ++top->kernel_samples; - if (top->hide_kernel_symbols) + if (top->hide_kernel_symbols) { + perf_evlist__mmap_write_tail(top->evlist, idx); continue; + } machine = &session->machines.host; break; case PERF_RECORD_MISC_GUEST_KERNEL: @@ -847,6 +852,7 @@ static void perf_top__mmap_read_idx(struct perf_top *top, int idx) */ /* Fall thru */ default: + perf_evlist__mmap_write_tail(top->evlist, idx); continue; } @@ -859,6 +865,7 @@ static void perf_top__mmap_read_idx(struct perf_top *top, int idx) machine__process_event(machine, event); } else ++session->stats.nr_unknown_events; + perf_evlist__mmap_write_tail(top->evlist, idx); } } diff --git a/tools/perf/builtin-trace.c b/tools/perf/builtin-trace.c index 71aa3e3..7afb6cd 100644 --- a/tools/perf/builtin-trace.c +++ b/tools/perf/builtin-trace.c @@ -986,6 +986,7 @@ again: err = perf_evlist__parse_sample(evlist, event, &sample); if (err) { + perf_evlist__mmap_writ
Re: [PATCH 1/1] PERF: The tail position of the event buffer should only be modified after actually use that event.
Thanks Arnaldo For Reviewing and Nice simplication. The next headache should be how to quick copy out the digest of event. >From my own engineering experience, it is unsafe to keep the pointer to shared ring buffer for too long. On Fri, Oct 25, 2013 at 2:23 AM, Arnaldo Carvalho de Melo wrote: > Em Thu, Oct 24, 2013 at 03:43:33PM +0800, Zhouyi Zhou escreveu: >> >> The tail position of the event buffer should only be >> modified after actually use that event. If not the event >> buffer could be invalid before use, and segment fault occurs when invoking >> perf top -G. > > Good catch! > > Long standing problem, but please take a look at the patch below, which > should be a simpler version of your fix, main points: > > . Rename perf_evlist__write_tail to perf_evlist__mmap_consume: > > So it should be a transaction end counterpart to > perf_evlist__mmap_read, the "writing the tail" detail gets nicely > abstracted away. > > . The error exit path for 'perf test' entries don't need to consume the > event, since it will be all shutdown anyway > > . In other cases avoid multiple calls to the consume method by just > goto'ing to the end of the loop, where we would consume the event > anyway when everything is all right. > > Please take a look, if you're ok with it, I'll keep your patch > authorship and just add a quick note about the simplifications I made. > > After that I think we should, a-la skb_free/skb_consume have a another > method, namely perf_evlist__mmap_discard, so that we can keep a tab on > how many events were successfully consumed and how many were not > processed due to some problem. > > WRT the simplifications: > > Your patch: > > 14 files changed, 65 insertions(+), 9 deletions(-) > > Your patch + my changes: > > 14 files changed, 49 insertions(+), 16 deletions(-) > > :-) > > - Arnaldo > >> Signed-off-by: Zhouyi Zhou >> --- >> tools/perf/builtin-kvm.c |4 >> tools/perf/builtin-top.c | 11 +-- >> tools/perf/builtin-trace.c|4 >> tools/perf/tests/code-reading.c |1 + >> tools/perf/tests/keep-tracking.c |1 + >> tools/perf/tests/mmap-basic.c |4 >> tools/perf/tests/open-syscall-tp-fields.c |7 ++- >> tools/perf/tests/perf-record.c|3 +++ >> tools/perf/tests/perf-time-to-tsc.c |5 - >> tools/perf/tests/sw-clock.c |7 ++- >> tools/perf/tests/task-exit.c |5 - >> tools/perf/util/evlist.c| 13 +++-- >> tools/perf/util/evlist.h|2 ++ >> tools/perf/util/python.c |7 ++- >> 14 files changed, 64 insertions(+), 8 deletions(-) >> >> diff --git a/tools/perf/builtin-kvm.c b/tools/perf/builtin-kvm.c >> index 935d522..e240846 100644 >> --- a/tools/perf/builtin-kvm.c >> +++ b/tools/perf/builtin-kvm.c >> @@ -888,16 +888,20 @@ static s64 perf_kvm__mmap_read_idx(struct >> perf_kvm_stat *kvm, int idx, >> while ((event = perf_evlist__mmap_read(kvm->evlist, idx)) != NULL) { >> err = perf_evlist__parse_sample(kvm->evlist, event, &sample); >> if (err) { >> + perf_evlist__mmap_write_tail(kvm->evlist, idx); >> pr_err("Failed to parse sample\n"); >> return -1; >> } >> >> err = perf_session_queue_event(kvm->session, event, &sample, >> 0); >> if (err) { >> + perf_evlist__mmap_write_tail(kvm->evlist, idx); >> pr_err("Failed to enqueue sample: %d\n", err); >> return -1; >> } >> >> + perf_evlist__mmap_write_tail(kvm->evlist, idx); >> + >> /* save time stamp of our first sample for this mmap */ >> if (n == 0) >> *mmap_time = sample.time; >> diff --git a/tools/perf/builtin-top.c b/tools/perf/builtin-top.c >> index 2122141..5e03cf5 100644 >> --- a/tools/perf/builtin-top.c >> +++ b/tools/perf/builtin-top.c >> @@ -809,6 +809,7 @@ static void perf_top__mmap_read_idx(struct perf_top >> *top, int idx) >> while ((event = perf_evlist__mmap_read(top->evlist, idx)) != NULL) { >> ret = perf_evlist__parse_sample(top->evlist, event, &sample); >> if (ret) { >>
Re: [PATCH 1/1] netfilter/jump_label: use HAVE_JUMP_LABEL?
Thanks Florian for reviewing > -Original Messages- > From: "Florian Westphal" > > Zhouyi Zhou wrote: > > > > CONFIG_JUMP_LABEL doesn't ensure HAVE_JUMP_LABEL, if it > > is not the case use maintainers's own mutex to guard > > the modification of global values. CONFIG_JUMP_LABEL says the user wants to use jump labels. But we also need the toolchain to support it. That is reflected in CC_HAVE_ASM_GOTO=y, and if both are set then HAVE_JUMP_LABEL is set to true. > I don't understand this patch. > > What is the problem you are fixing? There is basically no real problem here. This patch only tries to make kernel code that using static_key infrastructure appears "unified" > > The intent is to only use static_key infrastructure > if user has enabled CONFIG_JUMP_LABEL. The other parts of kernel either use #ifdef HAVE_JUMP_LABEL, or use no "#ifdef" at all(the two exceptions are netfilter and powerpc modules which I send patches to both). Jason has suggested me to make the patches: https://lkml.org/lkml/2014/8/20/761 "Unified" is the reason, I guess :-) so rookies like me can have unified examples to follow Cheers Zhouyi -- To unsubscribe from this list: send the line "unsubscribe linux-kernel" in the body of a message to majord...@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html Please read the FAQ at http://www.tux.org/lkml/
[PATCH V2 1/1] netfilter/jump_label: HAVE_JUMP_LABEL instead of CONFIG_JUMP_LABEL
Use HAVE_JUMP_LABEL as elsewhere in the kernel to ensure that the toolchain has the required support in addition to CONFIG_JUMP_LABEL being set. Signed-off-by: Zhouyi Zhou Reviewed-by: Florian Westphal --- include/linux/netfilter.h |5 +++-- net/netfilter/core.c |6 +++--- 2 files changed, 6 insertions(+), 5 deletions(-) diff --git a/include/linux/netfilter.h b/include/linux/netfilter.h index 2077489..83a1952 100644 --- a/include/linux/netfilter.h +++ b/include/linux/netfilter.h @@ -9,6 +9,7 @@ #include #include #include +#include #include #ifdef CONFIG_NETFILTER static inline int NF_DROP_GETERR(int verdict) @@ -99,8 +100,8 @@ void nf_unregister_sockopt(struct nf_sockopt_ops *reg); extern struct list_head nf_hooks[NFPROTO_NUMPROTO][NF_MAX_HOOKS]; -#if defined(CONFIG_JUMP_LABEL) -#include +#ifdef HAVE_JUMP_LABEL + extern struct static_key nf_hooks_needed[NFPROTO_NUMPROTO][NF_MAX_HOOKS]; static inline bool nf_hooks_active(u_int8_t pf, unsigned int hook) { diff --git a/net/netfilter/core.c b/net/netfilter/core.c index a93c97f..024a2e2 100644 --- a/net/netfilter/core.c +++ b/net/netfilter/core.c @@ -54,7 +54,7 @@ EXPORT_SYMBOL_GPL(nf_unregister_afinfo); struct list_head nf_hooks[NFPROTO_NUMPROTO][NF_MAX_HOOKS] __read_mostly; EXPORT_SYMBOL(nf_hooks); -#if defined(CONFIG_JUMP_LABEL) +#ifdef HAVE_JUMP_LABEL struct static_key nf_hooks_needed[NFPROTO_NUMPROTO][NF_MAX_HOOKS]; EXPORT_SYMBOL(nf_hooks_needed); #endif @@ -72,7 +72,7 @@ int nf_register_hook(struct nf_hook_ops *reg) } list_add_rcu(®->list, elem->list.prev); mutex_unlock(&nf_hook_mutex); -#if defined(CONFIG_JUMP_LABEL) +#ifdef HAVE_JUMP_LABEL static_key_slow_inc(&nf_hooks_needed[reg->pf][reg->hooknum]); #endif return 0; @@ -84,7 +84,7 @@ void nf_unregister_hook(struct nf_hook_ops *reg) mutex_lock(&nf_hook_mutex); list_del_rcu(®->list); mutex_unlock(&nf_hook_mutex); -#if defined(CONFIG_JUMP_LABEL) +#ifdef HAVE_JUMP_LABEL static_key_slow_dec(&nf_hooks_needed[reg->pf][reg->hooknum]); #endif synchronize_net(); -- 1.7.10.4 -- To unsubscribe from this list: send the line "unsubscribe linux-kernel" in the body of a message to majord...@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html Please read the FAQ at http://www.tux.org/lkml/
[PATCH 1/1] jump_label: tidy jump_label_ratelimit.h
jump_label_ratelimit.h is split from jump_label.h to enable the includers who don't want linux/workqueue.h. As HAVE_JUMP_LABEL is only defined in jump_label.h, will following patch makes jump_labe_ratelimit.h more tidy? Compiled and Tested in x86_64 Signed-off-by: Zhouyi Zhou --- include/linux/jump_label_ratelimit.h |5 + 1 file changed, 1 insertion(+), 4 deletions(-) diff --git a/include/linux/jump_label_ratelimit.h b/include/linux/jump_label_ratelimit.h index 089f70f..0d34d7e 100644 --- a/include/linux/jump_label_ratelimit.h +++ b/include/linux/jump_label_ratelimit.h @@ -4,15 +4,12 @@ #include #include -#if defined(CC_HAVE_ASM_GOTO) && defined(CONFIG_JUMP_LABEL) +#ifdef HAVE_JUMP_LABEL struct static_key_deferred { struct static_key key; unsigned long timeout; struct delayed_work work; }; -#endif - -#ifdef HAVE_JUMP_LABEL extern void static_key_slow_dec_deferred(struct static_key_deferred *key); extern void jump_label_rate_limit(struct static_key_deferred *key, unsigned long rl); -- 1.7.10.4 -- To unsubscribe from this list: send the line "unsubscribe linux-kernel" in the body of a message to majord...@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html Please read the FAQ at http://www.tux.org/lkml/
[PATCH 1/1] powerpc/jump_label: use HAVE_JUMP_LABEL?
CONFIG_JUMP_LABEL doesn't ensure HAVE_JUMP_LABEL, if it is not the case use maintainers's own mutex to guard the modification of global values. Signed-off-by: Zhouyi Zhou --- arch/powerpc/platforms/powernv/opal-tracepoints.c |2 +- arch/powerpc/platforms/pseries/lpar.c |2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/arch/powerpc/platforms/powernv/opal-tracepoints.c b/arch/powerpc/platforms/powernv/opal-tracepoints.c index d8a000a..ae14c40 100644 --- a/arch/powerpc/platforms/powernv/opal-tracepoints.c +++ b/arch/powerpc/platforms/powernv/opal-tracepoints.c @@ -2,7 +2,7 @@ #include #include -#ifdef CONFIG_JUMP_LABEL +#ifdef HAVE_JUMP_LABEL struct static_key opal_tracepoint_key = STATIC_KEY_INIT; void opal_tracepoint_regfunc(void) diff --git a/arch/powerpc/platforms/pseries/lpar.c b/arch/powerpc/platforms/pseries/lpar.c index 34e6423..059cfe0 100644 --- a/arch/powerpc/platforms/pseries/lpar.c +++ b/arch/powerpc/platforms/pseries/lpar.c @@ -642,7 +642,7 @@ EXPORT_SYMBOL(arch_free_page); #endif #ifdef CONFIG_TRACEPOINTS -#ifdef CONFIG_JUMP_LABEL +#ifdef HAVE_JUMP_LABEL struct static_key hcall_tracepoint_key = STATIC_KEY_INIT; void hcall_tracepoint_regfunc(void) -- 1.7.10.4 -- To unsubscribe from this list: send the line "unsubscribe linux-kernel" in the body of a message to majord...@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html Please read the FAQ at http://www.tux.org/lkml/
[PATCH 1/1] netfilter/jump_label: use HAVE_JUMP_LABEL?
CONFIG_JUMP_LABEL doesn't ensure HAVE_JUMP_LABEL, if it is not the case use maintainers's own mutex to guard the modification of global values. Signed-off-by: Zhouyi Zhou --- include/linux/netfilter.h |5 +++-- net/netfilter/core.c |6 +++--- 2 files changed, 6 insertions(+), 5 deletions(-) diff --git a/include/linux/netfilter.h b/include/linux/netfilter.h index 2077489..83a1952 100644 --- a/include/linux/netfilter.h +++ b/include/linux/netfilter.h @@ -9,6 +9,7 @@ #include #include #include +#include #include #ifdef CONFIG_NETFILTER static inline int NF_DROP_GETERR(int verdict) @@ -99,8 +100,8 @@ void nf_unregister_sockopt(struct nf_sockopt_ops *reg); extern struct list_head nf_hooks[NFPROTO_NUMPROTO][NF_MAX_HOOKS]; -#if defined(CONFIG_JUMP_LABEL) -#include +#ifdef HAVE_JUMP_LABEL + extern struct static_key nf_hooks_needed[NFPROTO_NUMPROTO][NF_MAX_HOOKS]; static inline bool nf_hooks_active(u_int8_t pf, unsigned int hook) { diff --git a/net/netfilter/core.c b/net/netfilter/core.c index a93c97f..024a2e2 100644 --- a/net/netfilter/core.c +++ b/net/netfilter/core.c @@ -54,7 +54,7 @@ EXPORT_SYMBOL_GPL(nf_unregister_afinfo); struct list_head nf_hooks[NFPROTO_NUMPROTO][NF_MAX_HOOKS] __read_mostly; EXPORT_SYMBOL(nf_hooks); -#if defined(CONFIG_JUMP_LABEL) +#ifdef HAVE_JUMP_LABEL struct static_key nf_hooks_needed[NFPROTO_NUMPROTO][NF_MAX_HOOKS]; EXPORT_SYMBOL(nf_hooks_needed); #endif @@ -72,7 +72,7 @@ int nf_register_hook(struct nf_hook_ops *reg) } list_add_rcu(®->list, elem->list.prev); mutex_unlock(&nf_hook_mutex); -#if defined(CONFIG_JUMP_LABEL) +#ifdef HAVE_JUMP_LABEL static_key_slow_inc(&nf_hooks_needed[reg->pf][reg->hooknum]); #endif return 0; @@ -84,7 +84,7 @@ void nf_unregister_hook(struct nf_hook_ops *reg) mutex_lock(&nf_hook_mutex); list_del_rcu(®->list); mutex_unlock(&nf_hook_mutex); -#if defined(CONFIG_JUMP_LABEL) +#ifdef HAVE_JUMP_LABEL static_key_slow_dec(&nf_hooks_needed[reg->pf][reg->hooknum]); #endif synchronize_net(); -- 1.7.10.4 -- To unsubscribe from this list: send the line "unsubscribe linux-kernel" in the body of a message to majord...@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html Please read the FAQ at http://www.tux.org/lkml/
Re: [PATCH 1/1] jump_label: tidy jump_label_ratelimit.h
Thanks Jason for reviewing it > -Original Messages- > From: "Jason Baron" > Sent Time: Thursday, August 21, 2014 > To: "Zhouyi Zhou" > Cc: drjo...@redhat.com, konrad.w...@oracle.com, > raghavendra...@linux.vnet.ibm.com, mi...@kernel.org, da...@davemloft.net, > han...@stressinduktion.org, linux-kernel@vger.kernel.org, "Zhouyi Zhou" > > Subject: Re: [PATCH 1/1] jump_label: tidy jump_label_ratelimit.h > > Yes, that looks good. While at it I grep'd the tree for > 'CONFIG_JUMP_LABEL', and found some uses in the > netfilter code which should probably be > 'HAVE_JUMP_LABEL' as well. I have submitted two patches according to you suggestions: https://lkml.org/lkml/2014/8/20/885 https://lkml.org/lkml/2014/8/20/883 Hope I have made them right Thanks Zhouyi > > Thanks, > > -Jason > > On 08/20/2014 05:29 AM, Zhouyi Zhou wrote: > > jump_label_ratelimit.h is split from jump_label.h to enable the > > includers who don't want linux/workqueue.h. > > As HAVE_JUMP_LABEL is only defined in jump_label.h, will following > > patch makes jump_labe_ratelimit.h more tidy? > > > > Compiled and Tested in x86_64 > > Signed-off-by: Zhouyi Zhou > > --- > > include/linux/jump_label_ratelimit.h |5 + > > 1 file changed, 1 insertion(+), 4 deletions(-) > > > > diff --git a/include/linux/jump_label_ratelimit.h > > b/include/linux/jump_label_ratelimit.h > > index 089f70f..0d34d7e 100644 > > --- a/include/linux/jump_label_ratelimit.h > > +++ b/include/linux/jump_label_ratelimit.h > > @@ -4,15 +4,12 @@ > > #include > > #include > > > > -#if defined(CC_HAVE_ASM_GOTO) && defined(CONFIG_JUMP_LABEL) > > +#ifdef HAVE_JUMP_LABEL > > struct static_key_deferred { > > struct static_key key; > > unsigned long timeout; > > struct delayed_work work; > > }; > > -#endif > > - > > -#ifdef HAVE_JUMP_LABEL > > extern void static_key_slow_dec_deferred(struct static_key_deferred *key); > > extern void > > jump_label_rate_limit(struct static_key_deferred *key, unsigned long rl); > -- To unsubscribe from this list: send the line "unsubscribe linux-kernel" in the body of a message to majord...@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html Please read the FAQ at http://www.tux.org/lkml/
[RFC] ACPICA: save a function call
In function acpi_ps_build_named_op, the if statement in line 221 ensure the argument status to function call acpi_ps_next_parse_state has the value 0, while acpi_ps_next_parse_state do not change any global state when called with callback_status==0. I think following lines should be removed to save some CPU cycles because the compilers won't do it (the callee is defined in another compiling unit, so it can't be inlined). Signed-off-by: Zhouyi Zhou --- drivers/acpi/acpica/psobject.c |8 1 file changed, 8 deletions(-) diff --git a/drivers/acpi/acpica/psobject.c b/drivers/acpi/acpica/psobject.c index 2f5ddd8..4ff6530 100644 --- a/drivers/acpi/acpica/psobject.c +++ b/drivers/acpi/acpica/psobject.c @@ -230,14 +230,6 @@ acpi_ps_build_named_op(struct acpi_walk_state *walk_state, return_ACPI_STATUS(AE_CTRL_PARSE_CONTINUE); } - status = acpi_ps_next_parse_state(walk_state, *op, status); - if (ACPI_FAILURE(status)) { - if (status == AE_CTRL_PENDING) { - status = AE_CTRL_PARSE_PENDING; - } - return_ACPI_STATUS(status); - } - acpi_ps_append_arg(*op, unnamed_op->common.value.arg); if ((*op)->common.aml_opcode == AML_REGION_OP || -- 1.7.10.4 -- To unsubscribe from this list: send the line "unsubscribe linux-kernel" in the body of a message to majord...@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html Please read the FAQ at http://www.tux.org/lkml/
Re: Re: [PATCH V7] netfilter: h323: avoid potential attack
Thanks Pablo for reviewing > From: "Pablo Neira Ayuso" > Sent Time: Saturday, March 12, 2016 > To: "Zhouyi Zhou" > On Sun, Feb 21, 2016 at 12:03:59AM +0800, Zhouyi Zhou wrote: > > I think hackers chould build a malicious h323 packet to overflow (iph->ihl * 4 + th->doff * 4); > You cannot trust the information that is available in the header. If > this is bogus this check will be defeated. That's why we pass this > protoff parameters to each function. The length of IP header is checked in the function nf_conntrack_in which calls get_l4proto hook to detect bogus ip header. There is no where in the call stack to the function set_addr to check bogus TCP header, and my code does the job: + th = (void *)iph + iph->ihl * 4; + datalen = skb->len - (iph->ihl * 4 + th->doff * 4); + /* check offset overflow */ + if (addroff > datalen) + return -1; if th->doff be too big addroff will greater than datalen. > > You also refer to get_h225_addr() in your description. That function > always copies 4 or 16 bytes, so I would appreciate if you can describe > the possible issue further. The problem of get_h225_addr lies in bogus taddr->ipAddress.ip, if this value is too big, it may make the pointer p point to no exist address. (gdb) list 686 681 struct h323_ct_state *ctstate) 682 { 683 const unsigned char *p; 684 int len; 685 686 switch (taddr->choice) { 687 case eTransportAddress_ipAddress: 688 if (nf_ct_l3num(ct) != AF_INET) 689 return 0; 690 p = data + taddr->ipAddress.ip; Thanks for your time and effort Cheers Zhouyi
[PATCH] RCU: Adjust comments for force_qs_rnp
Previously, threads blocked on offlining CPUS are migrated to root rcu_node, so there is a need to initiate RCU priority boosting on root rcu_node, Current RCU does not migrate blocked tasks even if all corresponding CPUs offline. commit d19fb8d1f3f6 ("rcu: Don't migrate blocked tasks even if all corresponding CPUs offline")' Consequently, rcu does not initiate RCU priority boosting on root rcu_node. commit 1be0085b515e ("rcu: Don't initiate RCU priority boosting on root rcu_node")' So I think the comments for force_qs_rnp should be adjusted. Signed-off-by: Zhouyi Zhou --- kernel/rcu/tree.c | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/kernel/rcu/tree.c b/kernel/rcu/tree.c index acd6ccf..efddffb 100644 --- a/kernel/rcu/tree.c +++ b/kernel/rcu/tree.c @@ -2548,9 +2548,9 @@ void rcu_sched_clock_irq(int user) } /* - * Scan the leaf rcu_node structures, processing dyntick state for any that - * have not yet encountered a quiescent state, using the function specified. - * Also initiate boosting for any threads blocked on the root rcu_node. + * Scan the leaf rcu_node structures, initiating boost for any threads blocked + * on them, processing dyntick state for any that have not yet encountered a + * quiescent state, using the function specified. * * The caller must have suppressed start of new grace periods. */ -- 2.1.4
[PATCH] net: return value of skb_linearize should be handled in Linux kernel
kmalloc_reserve may fail to allocate memory inside skb_linearize, which means skb_linearize's return value should not be ignored. Following patch correct the uses of skb_linearize. Compiled in x86_64 Signed-off-by: Zhouyi Zhou --- drivers/infiniband/hw/nes/nes_nic.c | 5 +++-- drivers/net/ethernet/intel/ixgbe/ixgbe_fcoe.c | 6 +- drivers/net/ethernet/intel/ixgbe/ixgbe_main.c | 3 +-- drivers/scsi/bnx2fc/bnx2fc_fcoe.c | 7 +-- drivers/scsi/fcoe/fcoe.c | 5 - net/tipc/link.c | 3 ++- net/tipc/name_distr.c | 5 - 7 files changed, 24 insertions(+), 10 deletions(-) diff --git a/drivers/infiniband/hw/nes/nes_nic.c b/drivers/infiniband/hw/nes/nes_nic.c index 2b27d13..69372ea 100644 --- a/drivers/infiniband/hw/nes/nes_nic.c +++ b/drivers/infiniband/hw/nes/nes_nic.c @@ -662,10 +662,11 @@ static int nes_netdev_start_xmit(struct sk_buff *skb, struct net_device *netdev) nesnic->sq_head &= nesnic->sq_size-1; } } else { - nesvnic->linearized_skbs++; hoffset = skb_transport_header(skb) - skb->data; nhoffset = skb_network_header(skb) - skb->data; - skb_linearize(skb); + if (skb_linearize(skb)) + return NETDEV_TX_BUSY; + nesvnic->linearized_skbs++; skb_set_transport_header(skb, hoffset); skb_set_network_header(skb, nhoffset); if (!nes_nic_send(skb, netdev)) diff --git a/drivers/net/ethernet/intel/ixgbe/ixgbe_fcoe.c b/drivers/net/ethernet/intel/ixgbe/ixgbe_fcoe.c index 2a653ec..ab787cb 100644 --- a/drivers/net/ethernet/intel/ixgbe/ixgbe_fcoe.c +++ b/drivers/net/ethernet/intel/ixgbe/ixgbe_fcoe.c @@ -490,7 +490,11 @@ int ixgbe_fcoe_ddp(struct ixgbe_adapter *adapter, */ if ((fh->fh_r_ctl == FC_RCTL_DD_SOL_DATA) && (fctl & FC_FC_END_SEQ)) { - skb_linearize(skb); + int err = 0; + + err = skb_linearize(skb); + if (err) + return err; crc = (struct fcoe_crc_eof *)skb_put(skb, sizeof(*crc)); crc->fcoe_eof = FC_EOF_T; } diff --git a/drivers/net/ethernet/intel/ixgbe/ixgbe_main.c b/drivers/net/ethernet/intel/ixgbe/ixgbe_main.c index fee1f29..4926d48 100644 --- a/drivers/net/ethernet/intel/ixgbe/ixgbe_main.c +++ b/drivers/net/ethernet/intel/ixgbe/ixgbe_main.c @@ -2173,8 +2173,7 @@ static int ixgbe_clean_rx_irq(struct ixgbe_q_vector *q_vector, total_rx_bytes += ddp_bytes; total_rx_packets += DIV_ROUND_UP(ddp_bytes, mss); - } - if (!ddp_bytes) { + } else { dev_kfree_skb_any(skb); continue; } diff --git a/drivers/scsi/bnx2fc/bnx2fc_fcoe.c b/drivers/scsi/bnx2fc/bnx2fc_fcoe.c index f9ddb61..197d02e 100644 --- a/drivers/scsi/bnx2fc/bnx2fc_fcoe.c +++ b/drivers/scsi/bnx2fc/bnx2fc_fcoe.c @@ -542,8 +542,11 @@ static void bnx2fc_recv_frame(struct sk_buff *skb) return; } - if (skb_is_nonlinear(skb)) - skb_linearize(skb); + if (skb_linearize(skb)) { + kfree_skb(skb); + return; + } + mac = eth_hdr(skb)->h_source; dest_mac = eth_hdr(skb)->h_dest; diff --git a/drivers/scsi/fcoe/fcoe.c b/drivers/scsi/fcoe/fcoe.c index 9bd41a3..f691b97 100644 --- a/drivers/scsi/fcoe/fcoe.c +++ b/drivers/scsi/fcoe/fcoe.c @@ -1685,7 +1685,10 @@ static void fcoe_recv_frame(struct sk_buff *skb) skb->dev ? skb->dev->name : ""); port = lport_priv(lport); - skb_linearize(skb); /* check for skb_is_nonlinear is within skb_linearize */ + if (skb_linearize(skb)) { + kfree_skb(skb); + return; + } /* * Frame length checks and setting up the header pointers diff --git a/net/tipc/link.c b/net/tipc/link.c index bda89bf..077c570 100644 --- a/net/tipc/link.c +++ b/net/tipc/link.c @@ -1446,7 +1446,8 @@ static int tipc_link_proto_rcv(struct tipc_link *l, struct sk_buff *skb, if (tipc_own_addr(l->net) > msg_prevnode(hdr)) l->net_plane = msg_net_plane(hdr); - skb_linearize(skb); + if (skb_linearize(skb)) + goto exit; hdr = buf_msg(skb); data = msg_data(hdr); diff --git a/net/tipc/name_distr.c b/net/tipc/name_distr.c index c1cfd92..4e05d2a 100644 --- a/net/tipc/name_distr.c +++ b/net/tip
Re: [PATCH] net: return value of skb_linearize should be handled in Linux kernel
On Wed, Dec 7, 2016 at 1:02 PM, Cong Wang wrote: > On Mon, Dec 5, 2016 at 11:10 PM, Zhouyi Zhou wrote: >> diff --git a/drivers/net/ethernet/intel/ixgbe/ixgbe_fcoe.c >> b/drivers/net/ethernet/intel/ixgbe/ixgbe_fcoe.c >> index 2a653ec..ab787cb 100644 >> --- a/drivers/net/ethernet/intel/ixgbe/ixgbe_fcoe.c >> +++ b/drivers/net/ethernet/intel/ixgbe/ixgbe_fcoe.c >> @@ -490,7 +490,11 @@ int ixgbe_fcoe_ddp(struct ixgbe_adapter *adapter, >> */ >> if ((fh->fh_r_ctl == FC_RCTL_DD_SOL_DATA) && >> (fctl & FC_FC_END_SEQ)) { >> - skb_linearize(skb); >> + int err = 0; >> + >> + err = skb_linearize(skb); >> + if (err) >> + return err; > > > You can reuse 'rc' instead of adding 'err'. rc here is meaningful for the length of data being ddped. If using rc here, a successful skb_linearize will assign rc to 0. > > > >> crc = (struct fcoe_crc_eof *)skb_put(skb, sizeof(*crc)); >> crc->fcoe_eof = FC_EOF_T; >> } >> diff --git a/drivers/net/ethernet/intel/ixgbe/ixgbe_main.c >> b/drivers/net/ethernet/intel/ixgbe/ixgbe_main.c >> index fee1f29..4926d48 100644 >> --- a/drivers/net/ethernet/intel/ixgbe/ixgbe_main.c >> +++ b/drivers/net/ethernet/intel/ixgbe/ixgbe_main.c >> @@ -2173,8 +2173,7 @@ static int ixgbe_clean_rx_irq(struct ixgbe_q_vector >> *q_vector, >> total_rx_bytes += ddp_bytes; >> total_rx_packets += DIV_ROUND_UP(ddp_bytes, >> mss); >> - } >> - if (!ddp_bytes) { >> + } else { >> dev_kfree_skb_any(skb); >> continue; >> } > > > This piece doesn't seem to be related. if ddp_bytes is negative there will be some error, I think the skb should not pass to upper layer.
[PATCH 1/1] infiniband: nes: return value of skb_linearize should be handled
Return value of skb_linearize should be handled in function nes_netdev_start_xmit. Compiled in x86_64 Signed-off-by: Zhouyi Zhou Reviewed-by: Yuval Shaia Reviewed-by: Eric Dumazet --- drivers/infiniband/hw/nes/nes_nic.c | 8 ++-- 1 file changed, 6 insertions(+), 2 deletions(-) diff --git a/drivers/infiniband/hw/nes/nes_nic.c b/drivers/infiniband/hw/nes/nes_nic.c index 2b27d13..dfd1f57 100644 --- a/drivers/infiniband/hw/nes/nes_nic.c +++ b/drivers/infiniband/hw/nes/nes_nic.c @@ -662,10 +662,14 @@ static int nes_netdev_start_xmit(struct sk_buff *skb, struct net_device *netdev) nesnic->sq_head &= nesnic->sq_size-1; } } else { - nesvnic->linearized_skbs++; hoffset = skb_transport_header(skb) - skb->data; nhoffset = skb_network_header(skb) - skb->data; - skb_linearize(skb); + if (skb_linearize(skb)) { + nesvnic->tx_sw_dropped++; + kfree_skb(skb); + return NETDEV_TX_OK; + } + nesvnic->linearized_skbs++; skb_set_transport_header(skb, hoffset); skb_set_network_header(skb, nhoffset); if (!nes_nic_send(skb, netdev)) -- 1.9.1
[PATCH 1/1] ixgbe: fcoe: return value of skb_linearize should be handled
Signed-off-by: Zhouyi Zhou Reviewed-by: Cong Wang Reviewed-by: Yuval Shaia Reviewed-by: Eric Dumazet --- drivers/net/ethernet/intel/ixgbe/ixgbe_fcoe.c | 6 +- drivers/net/ethernet/intel/ixgbe/ixgbe_main.c | 3 +-- 2 files changed, 6 insertions(+), 3 deletions(-) diff --git a/drivers/net/ethernet/intel/ixgbe/ixgbe_fcoe.c b/drivers/net/ethernet/intel/ixgbe/ixgbe_fcoe.c index 2a653ec..7b6bdb7 100644 --- a/drivers/net/ethernet/intel/ixgbe/ixgbe_fcoe.c +++ b/drivers/net/ethernet/intel/ixgbe/ixgbe_fcoe.c @@ -490,7 +490,11 @@ int ixgbe_fcoe_ddp(struct ixgbe_adapter *adapter, */ if ((fh->fh_r_ctl == FC_RCTL_DD_SOL_DATA) && (fctl & FC_FC_END_SEQ)) { - skb_linearize(skb); + int err; + + err = skb_linearize(skb); + if (err) + return err; crc = (struct fcoe_crc_eof *)skb_put(skb, sizeof(*crc)); crc->fcoe_eof = FC_EOF_T; } diff --git a/drivers/net/ethernet/intel/ixgbe/ixgbe_main.c b/drivers/net/ethernet/intel/ixgbe/ixgbe_main.c index fee1f29..4926d48 100644 --- a/drivers/net/ethernet/intel/ixgbe/ixgbe_main.c +++ b/drivers/net/ethernet/intel/ixgbe/ixgbe_main.c @@ -2173,8 +2173,7 @@ static int ixgbe_clean_rx_irq(struct ixgbe_q_vector *q_vector, total_rx_bytes += ddp_bytes; total_rx_packets += DIV_ROUND_UP(ddp_bytes, mss); - } - if (!ddp_bytes) { + } else { dev_kfree_skb_any(skb); continue; } -- 1.9.1
[PATCH 1/1] scsi: fcoe: return value of skb_linearize should be handled
Return value of skb_linearize should be handled. Signed-off-by: Zhouyi Zhou Reviewed-by: Yuval Shaia --- drivers/scsi/bnx2fc/bnx2fc_fcoe.c | 6 -- drivers/scsi/fcoe/fcoe.c | 5 - 2 files changed, 8 insertions(+), 3 deletions(-) diff --git a/drivers/scsi/bnx2fc/bnx2fc_fcoe.c b/drivers/scsi/bnx2fc/bnx2fc_fcoe.c index f9ddb61..142f7e2 100644 --- a/drivers/scsi/bnx2fc/bnx2fc_fcoe.c +++ b/drivers/scsi/bnx2fc/bnx2fc_fcoe.c @@ -542,8 +542,10 @@ static void bnx2fc_recv_frame(struct sk_buff *skb) return; } - if (skb_is_nonlinear(skb)) - skb_linearize(skb); + if (skb_linearize(skb)) { + kfree_skb(skb); + return; + } mac = eth_hdr(skb)->h_source; dest_mac = eth_hdr(skb)->h_dest; diff --git a/drivers/scsi/fcoe/fcoe.c b/drivers/scsi/fcoe/fcoe.c index 9bd41a3..4e3499c 100644 --- a/drivers/scsi/fcoe/fcoe.c +++ b/drivers/scsi/fcoe/fcoe.c @@ -1685,7 +1685,10 @@ static void fcoe_recv_frame(struct sk_buff *skb) skb->dev ? skb->dev->name : ""); port = lport_priv(lport); - skb_linearize(skb); /* check for skb_is_nonlinear is within skb_linearize */ + if (skb_linearize(skb)) { + kfree_skb(skb); + return; + } /* * Frame length checks and setting up the header pointers -- 1.9.1
[PATCH 1/1] tipc: return value of skb_linearize should be handled
return value of skb_linearize should be handled Signed-off-by: Zhouyi Zhou Reviewed-by: Cong Wang Reviewed-by: Yuval Shaia Reviewed-by: Eric Dumazet --- net/tipc/link.c | 3 ++- net/tipc/name_distr.c | 5 - 2 files changed, 6 insertions(+), 2 deletions(-) diff --git a/net/tipc/link.c b/net/tipc/link.c index bda89bf..077c570 100644 --- a/net/tipc/link.c +++ b/net/tipc/link.c @@ -1446,7 +1446,8 @@ static int tipc_link_proto_rcv(struct tipc_link *l, struct sk_buff *skb, if (tipc_own_addr(l->net) > msg_prevnode(hdr)) l->net_plane = msg_net_plane(hdr); - skb_linearize(skb); + if (skb_linearize(skb)) + goto exit; hdr = buf_msg(skb); data = msg_data(hdr); diff --git a/net/tipc/name_distr.c b/net/tipc/name_distr.c index c1cfd92..4e05d2a 100644 --- a/net/tipc/name_distr.c +++ b/net/tipc/name_distr.c @@ -356,7 +356,10 @@ void tipc_named_rcv(struct net *net, struct sk_buff_head *inputq) spin_lock_bh(&tn->nametbl_lock); for (skb = skb_dequeue(inputq); skb; skb = skb_dequeue(inputq)) { - skb_linearize(skb); + if (skb_linearize(skb)) { + kfree_skb(skb); + continue; + } msg = buf_msg(skb); mtype = msg_type(msg); item = (struct distr_item *)msg_data(msg); -- 1.9.1
Re: [PATCH 1/1] scsi: fcoe: return value of skb_linearize should be handled
Thanks Johannes for reviewing, your code is indeeded more elegant On Wed, Dec 7, 2016 at 4:28 PM, Johannes Thumshirn wrote: > Hi Zhouyi, > On Wed, Dec 07, 2016 at 04:00:00PM +0800, Zhouyi Zhou wrote: >> Return value of skb_linearize should be handled. >> >> Signed-off-by: Zhouyi Zhou >> Reviewed-by: Yuval Shaia >> --- >> drivers/scsi/bnx2fc/bnx2fc_fcoe.c | 6 -- >> drivers/scsi/fcoe/fcoe.c | 5 - >> 2 files changed, 8 insertions(+), 3 deletions(-) >> >> diff --git a/drivers/scsi/bnx2fc/bnx2fc_fcoe.c >> b/drivers/scsi/bnx2fc/bnx2fc_fcoe.c >> index f9ddb61..142f7e2 100644 >> --- a/drivers/scsi/bnx2fc/bnx2fc_fcoe.c >> +++ b/drivers/scsi/bnx2fc/bnx2fc_fcoe.c >> @@ -542,8 +542,10 @@ static void bnx2fc_recv_frame(struct sk_buff *skb) >> return; >> } >> >> - if (skb_is_nonlinear(skb)) >> - skb_linearize(skb); >> + if (skb_linearize(skb)) { >> + kfree_skb(skb); >> + return; >> + } >> mac = eth_hdr(skb)->h_source; >> dest_mac = eth_hdr(skb)->h_dest; >> >> diff --git a/drivers/scsi/fcoe/fcoe.c b/drivers/scsi/fcoe/fcoe.c >> index 9bd41a3..4e3499c 100644 >> --- a/drivers/scsi/fcoe/fcoe.c >> +++ b/drivers/scsi/fcoe/fcoe.c >> @@ -1685,7 +1685,10 @@ static void fcoe_recv_frame(struct sk_buff *skb) >> skb->dev ? skb->dev->name : ""); >> >> port = lport_priv(lport); >> - skb_linearize(skb); /* check for skb_is_nonlinear is within >> skb_linearize */ >> + if (skb_linearize(skb)) { >> + kfree_skb(skb); >> + return; >> + } >> >> /* >>* Frame length checks and setting up the header pointers >> -- >> 1.9.1 >> >> -- >> To unsubscribe from this list: send the line "unsubscribe linux-scsi" in >> the body of a message to majord...@vger.kernel.org >> More majordomo info at http://vger.kernel.org/majordomo-info.html > > I'd personally prefer something like: > > diff --git a/drivers/scsi/fcoe/fcoe.c b/drivers/scsi/fcoe/fcoe.c > index 9bd41a3..bc6fa6c 100644 > --- a/drivers/scsi/fcoe/fcoe.c > +++ b/drivers/scsi/fcoe/fcoe.c > @@ -1673,8 +1673,7 @@ static void fcoe_recv_frame(struct sk_buff *skb) > lport = fr->fr_dev; > if (unlikely(!lport)) { > FCOE_NETDEV_DBG(skb->dev, "NULL lport in skb\n"); > - kfree_skb(skb); > - return; > + goto free_skb; > } > > FCOE_NETDEV_DBG(skb->dev, > @@ -1685,8 +1684,8 @@ static void fcoe_recv_frame(struct sk_buff *skb) > skb->dev ? skb->dev->name : ""); > > port = lport_priv(lport); > - skb_linearize(skb); /* check for skb_is_nonlinear is within > skb_linearize */ > - > + if (skb_linearize(skb)) > + goto free_skb; > /* > * Frame length checks and setting up the header pointers > * was done in fcoe_rcv already. > @@ -1732,6 +1731,7 @@ static void fcoe_recv_frame(struct sk_buff *skb) > drop: > stats->ErrorFrames++; > put_cpu(); > +free_skb: > kfree_skb(skb); > } > > > For bnx2fc as well if Chad agrees. > > Thanks, > Johannes > > -- > Johannes Thumshirn Storage > jthumsh...@suse.de+49 911 74053 689 > SUSE LINUX GmbH, Maxfeldstr. 5, 90409 Nürnberg > GF: Felix Imendörffer, Jane Smithard, Graham Norton > HRB 21284 (AG Nürnberg) > Key fingerprint = EC38 9CAB C2C4 F25D 8600 D0D0 0393 969D 2D76 0850 Thanks Zhouyi
Re: [PATCH 1/1] ixgbe: fcoe: return value of skb_linearize should be handled
Thanks Jeff for your advice, Sorry for the my innocence as a Linux kernel rookie. Zhouyi On Thu, Dec 8, 2016 at 1:30 AM, Jeff Kirsher wrote: > On Wed, 2016-12-07 at 15:43 +0800, Zhouyi Zhou wrote: >> Signed-off-by: Zhouyi Zhou >> Reviewed-by: Cong Wang >> Reviewed-by: Yuval Shaia >> Reviewed-by: Eric Dumazet >> --- >> drivers/net/ethernet/intel/ixgbe/ixgbe_fcoe.c | 6 +- >> drivers/net/ethernet/intel/ixgbe/ixgbe_main.c | 3 +-- >> 2 files changed, 6 insertions(+), 3 deletions(-) > > Did Cong, Yuval and Eric give their Reviewed-by offline? I see they made > comments and suggests, but never saw them actually give you their Reviewed- > by. You cannot automatically add their Reviewed-by, Signed-off-by, etc > just because someone provides feedback on your patch.
[PATCH] relay: fix potential memory leak
when relay_open_buf fails in relay_open, program will goto free_bufs, but chan is nowhere freed. In addition, give warning to users who forget to provide create file hook. Signed-off-by: Zhouyi Zhou --- kernel/relay.c | 8 1 file changed, 8 insertions(+) diff --git a/kernel/relay.c b/kernel/relay.c index 074994b..e0990c7 100644 --- a/kernel/relay.c +++ b/kernel/relay.c @@ -589,6 +589,13 @@ struct rchan *relay_open(const char *base_filename, chan->parent = parent; chan->private_data = private_data; if (base_filename) { + if (!cb || !cb->create_buf_file) { + printk(KERN_ERR + "relay_open: has base filename without " + "providing hook to create file\n"); + kfree(chan); + return NULL; + } chan->has_base_filename = 1; strlcpy(chan->base_filename, base_filename, NAME_MAX); } @@ -614,6 +621,7 @@ free_bufs: kref_put(&chan->kref, relay_destroy_channel); mutex_unlock(&relay_channels_mutex); + kfree(chan); return NULL; } EXPORT_SYMBOL_GPL(relay_open); -- 1.9.1
Re: Re: [PATCH] relay: fix potential memory leak
Thanks Andrew for reviewing > > In addition, give warning to users who forget to provide create file > > hook. > > Why? What's the value in this? > > If the user didn't provide ->create_buf_file then setup_callbacks() > will provide them with create_buf_file_default_callback() - what's > wrong with that? > The beginners like me will probably call relay_open with base_filename and NULL callback or callback without create_buf_file hook. This call will fail in sub function relay_open_buf because create_buf_file_default_callback returns empty dentry. I guess it will be good to warn beginners to provide filesystem related create hooks at earlier stage or they fail without knowing what has happened. Best wishes Zhouyi
patch mail rejected by vger.kernel.org
Hi, Yesterday, I cced 5 patches to linux-kernel@vger.kernel.org using send-email, but all of them were rejected by mail server at vger.kernel.org as follows: "Dear yizhouz...@ict.ac.cn, Your message to linux-kernel@vger.kernel.org was rejected by the recipient domain. The error that the other server returned was: " SMTP error, RCPT TO: 451 4.7.1 Hello [159.226.251.20], for recipient address the policy analysis reported: zpostgrey: connect: Connection refused". Your original message is included as attachment." I have successfully posted to linux-kernel@vger.kernel.org previously. And maintainers have received my patch correctly yesterday. Do you know what is going on? Many thanks
[PATCH V7] netfilter: h323: avoid potential attack
I think hackers chould build a malicious h323 packet to overflow the pointer p which will panic during the memcpy(addr, p, len) For example, he may fabricate a very large taddr->ipAddress.ip in function get_h225_addr. To avoid above, I add buffer boundary checking both in get addr functions and set addr functions. Because the temporary h323 buffer is dynamiclly allocated, I remove the h323 spin lock in my patch. Signed-off-by: Zhouyi Zhou --- include/linux/netfilter/nf_conntrack_h323.h | 17 +- net/ipv4/netfilter/nf_nat_h323.c| 33 ++- net/netfilter/nf_conntrack_h323_main.c | 328 +--- 3 files changed, 244 insertions(+), 134 deletions(-) diff --git a/include/linux/netfilter/nf_conntrack_h323.h b/include/linux/netfilter/nf_conntrack_h323.h index 858d9b2..6c6fea1 100644 --- a/include/linux/netfilter/nf_conntrack_h323.h +++ b/include/linux/netfilter/nf_conntrack_h323.h @@ -27,11 +27,17 @@ struct nf_ct_h323_master { }; }; +struct h323_ct_state { + unsigned char *buf; + unsigned char *data; + int buflen; +}; + struct nf_conn; int get_h225_addr(struct nf_conn *ct, unsigned char *data, TransportAddress *taddr, union nf_inet_addr *addr, - __be16 *port); + __be16 *port, struct h323_ct_state *ctstate); void nf_conntrack_h245_expect(struct nf_conn *new, struct nf_conntrack_expect *this); void nf_conntrack_q931_expect(struct nf_conn *new, @@ -50,12 +56,14 @@ extern int (*set_sig_addr_hook) (struct sk_buff *skb, struct nf_conn *ct, enum ip_conntrack_info ctinfo, unsigned int protoff, unsigned char **data, -TransportAddress *taddr, int count); +TransportAddress *taddr, int count, +struct h323_ct_state *ctstate); extern int (*set_ras_addr_hook) (struct sk_buff *skb, struct nf_conn *ct, enum ip_conntrack_info ctinfo, unsigned int protoff, unsigned char **data, -TransportAddress *taddr, int count); +TransportAddress *taddr, int count, +struct h323_ct_state *ctstate); extern int (*nat_rtp_rtcp_hook) (struct sk_buff *skb, struct nf_conn *ct, enum ip_conntrack_info ctinfo, @@ -90,7 +98,8 @@ extern int (*nat_q931_hook) (struct sk_buff *skb, struct nf_conn *ct, unsigned int protoff, unsigned char **data, TransportAddress *taddr, int idx, __be16 port, -struct nf_conntrack_expect *exp); +struct nf_conntrack_expect *exp, +struct h323_ct_state *ctstate); #endif diff --git a/net/ipv4/netfilter/nf_nat_h323.c b/net/ipv4/netfilter/nf_nat_h323.c index 574f7eb..5ed2d70 100644 --- a/net/ipv4/netfilter/nf_nat_h323.c +++ b/net/ipv4/netfilter/nf_nat_h323.c @@ -33,12 +33,20 @@ static int set_addr(struct sk_buff *skb, unsigned int protoff, } __attribute__ ((__packed__)) buf; const struct tcphdr *th; struct tcphdr _tcph; + int datalen; + struct iphdr *iph = ip_hdr(skb); buf.ip = ip; buf.port = port; addroff += dataoff; if (ip_hdr(skb)->protocol == IPPROTO_TCP) { + th = (void *)iph + iph->ihl * 4; + datalen = skb->len - (iph->ihl * 4 + th->doff * 4); + /* check offset overflow */ + if (addroff > datalen) + return -1; + if (!nf_nat_mangle_tcp_packet(skb, ct, ctinfo, protoff, addroff, sizeof(buf), (char *) &buf, sizeof(buf))) { @@ -53,6 +61,11 @@ static int set_addr(struct sk_buff *skb, unsigned int protoff, return -1; *data = skb->data + ip_hdrlen(skb) + th->doff * 4 + dataoff; } else { + datalen = skb->len - (iph->ihl * 4 + sizeof(struct udphdr)); + /* check offset overflow */ + if (addroff > datalen) + return -1; + if (!nf_nat_mangle_udp_packet(skb, ct, ctinfo, protoff, addroff, sizeof(buf), (char *) &buf, sizeof(buf))) { @@ -93,7 +106,8 @@ static int set_h245_addr(struct sk_buff *skb, unsigned protoff, static int set_sig_addr(struct sk_buff *skb, struct nf_conn *ct, enum ip_conntrack_info ctinfo,
[PATCH] ACPI: Fix indent for variable declaration
In function acpi_ns_one_complete_parse, the variable declaration aml_length is not correctly indented. Signed-off-by: Zhouyi Zhou --- drivers/acpi/acpica/nsparse.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/acpi/acpica/nsparse.c b/drivers/acpi/acpica/nsparse.c index 57a4cfe..802afdc 100644 --- a/drivers/acpi/acpica/nsparse.c +++ b/drivers/acpi/acpica/nsparse.c @@ -70,7 +70,7 @@ acpi_ns_one_complete_parse(u32 pass_number, { union acpi_parse_object *parse_root; acpi_status status; - u32 aml_length; + u32 aml_length; u8 *aml_start; struct acpi_walk_state *walk_state; struct acpi_table_header *table; -- 2.4.5 -- To unsubscribe from this list: send the line "unsubscribe linux-kernel" in the body of a message to majord...@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html Please read the FAQ at http://www.tux.org/lkml/
[PATCH V4] netfilter: h323: avoid potential attack
I think hackers chould build a malicious h323 packet to overflow the pointer p which will panic during the memcpy(addr, p, len) For example, he may fabricate a very large taddr->ipAddress.ip. As suggested by Eric, this module is protected by a lock (nf_h323_lock) so adding a variable h323_buffer_valid_bytes that would contain the number of valid bytes would not require to change prototypes of get_h2x5_addr. Signed-off-by: Zhouyi Zhou --- net/netfilter/nf_conntrack_h323_main.c | 31 +++ 1 file changed, 31 insertions(+) diff --git a/net/netfilter/nf_conntrack_h323_main.c b/net/netfilter/nf_conntrack_h323_main.c index 9511af0..fba9d71 100644 --- a/net/netfilter/nf_conntrack_h323_main.c +++ b/net/netfilter/nf_conntrack_h323_main.c @@ -110,6 +110,29 @@ int (*nat_q931_hook) (struct sk_buff *skb, static DEFINE_SPINLOCK(nf_h323_lock); static char *h323_buffer; +static int h323_buffer_valid_bytes; + +static int h323_buffer_ref_valid(void *p, int len) +{ + int err = 0; + + if ((unsigned long)len > h323_buffer_valid_bytes) { + err = -1; + goto out; + } + + if (p + len > (void *)h323_buffer + h323_buffer_valid_bytes) { + err = -1; + goto out; + } + + if (p < (void *)h323_buffer) { + err = -1; + goto out; + } +out: + return err; +} static struct nf_conntrack_helper nf_conntrack_helper_h245; static struct nf_conntrack_helper nf_conntrack_helper_q931[]; @@ -145,6 +168,7 @@ static int get_tpkt_data(struct sk_buff *skb, unsigned int protoff, if (*data == NULL) {/* first TPKT */ /* Get first TPKT pointer */ + h323_buffer_valid_bytes = tcpdatalen; tpkt = skb_header_pointer(skb, tcpdataoff, tcpdatalen, h323_buffer); BUG_ON(tpkt == NULL); @@ -247,6 +271,9 @@ static int get_h245_addr(struct nf_conn *ct, const unsigned char *data, return 0; } + if (h323_buffer_ref_valid((void *)p, len + sizeof(__be16))) + return 0; + memcpy(addr, p, len); memset((void *)addr + len, 0, sizeof(*addr) - len); memcpy(port, p + len, sizeof(__be16)); @@ -669,6 +696,9 @@ int get_h225_addr(struct nf_conn *ct, unsigned char *data, return 0; } + if (h323_buffer_ref_valid((void *)p, len + sizeof(__be16))) + return 0; + memcpy(addr, p, len); memset((void *)addr + len, 0, sizeof(*addr) - len); memcpy(port, p + len, sizeof(__be16)); @@ -1248,6 +1278,7 @@ static unsigned char *get_udp_data(struct sk_buff *skb, unsigned int protoff, if (dataoff >= skb->len) return NULL; *datalen = skb->len - dataoff; + h323_buffer_valid_bytes = *datalen; return skb_header_pointer(skb, dataoff, *datalen, h323_buffer); } -- 1.9.1
[PATCH V5] netfilter: h323: avoid potential attack
I think hackers chould build a malicious h323 packet to overflow the pointer p which will panic during the memcpy(addr, p, len) For example, he may fabricate a very large taddr->ipAddress.ip. In order to avoid this, I add a valid memory reference check in get_h2x5_addr functions. As suggested by Eric, this module is protected by a lock (nf_h323_lock) so adding a variable h323_buffer_valid_bytes that would contain the number of valid bytes would not require to change prototypes of get_h2x5_addr. Signed-off-by: Zhouyi Zhou --- net/netfilter/nf_conntrack_h323_main.c | 27 +++ 1 file changed, 27 insertions(+) diff --git a/net/netfilter/nf_conntrack_h323_main.c b/net/netfilter/nf_conntrack_h323_main.c index 9511af0..21665ec 100644 --- a/net/netfilter/nf_conntrack_h323_main.c +++ b/net/netfilter/nf_conntrack_h323_main.c @@ -110,6 +110,25 @@ int (*nat_q931_hook) (struct sk_buff *skb, static DEFINE_SPINLOCK(nf_h323_lock); static char *h323_buffer; +static int h323_buffer_valid_bytes; + +static bool h323_buffer_ref_valid(void *p, int len) +{ + + if ((unsigned long)len > h323_buffer_valid_bytes) { + return false; + } + + if (p + len > (void *)h323_buffer + h323_buffer_valid_bytes) { + return false; + } + + if (p < (void *)h323_buffer) { + return false; + } + + return true; +} static struct nf_conntrack_helper nf_conntrack_helper_h245; static struct nf_conntrack_helper nf_conntrack_helper_q931[]; @@ -145,6 +164,7 @@ static int get_tpkt_data(struct sk_buff *skb, unsigned int protoff, if (*data == NULL) {/* first TPKT */ /* Get first TPKT pointer */ + h323_buffer_valid_bytes = tcpdatalen; tpkt = skb_header_pointer(skb, tcpdataoff, tcpdatalen, h323_buffer); BUG_ON(tpkt == NULL); @@ -247,6 +267,9 @@ static int get_h245_addr(struct nf_conn *ct, const unsigned char *data, return 0; } + if (!h323_buffer_ref_valid((void *)p, len + sizeof(__be16))) + return 0; + memcpy(addr, p, len); memset((void *)addr + len, 0, sizeof(*addr) - len); memcpy(port, p + len, sizeof(__be16)); @@ -669,6 +692,9 @@ int get_h225_addr(struct nf_conn *ct, unsigned char *data, return 0; } + if (!h323_buffer_ref_valid((void *)p, len + sizeof(__be16))) + return 0; + memcpy(addr, p, len); memset((void *)addr + len, 0, sizeof(*addr) - len); memcpy(port, p + len, sizeof(__be16)); @@ -1248,6 +1274,7 @@ static unsigned char *get_udp_data(struct sk_buff *skb, unsigned int protoff, if (dataoff >= skb->len) return NULL; *datalen = skb->len - dataoff; + h323_buffer_valid_bytes = *datalen; return skb_header_pointer(skb, dataoff, *datalen, h323_buffer); } -- 1.9.1
[PATCH V6] netfilter: h323: avoid potential attack
I think hackers chould build a malicious h323 packet to overflow the pointer p which will panic during the memcpy(addr, p, len) For example, he may fabricate a very large taddr->ipAddress.ip. In order to avoid this, I add a valid memory reference check in get_h2x5_addr functions. As suggested by Eric, this module is protected by a lock (nf_h323_lock) so adding a variable h323_buffer_valid_bytes that would contain the number of valid bytes would not require to change prototypes of get_h2x5_addr. Thanks Sergei for reviewing. Signed-off-by: Zhouyi Zhou --- net/netfilter/nf_conntrack_h323_main.c | 23 +++ 1 file changed, 23 insertions(+) diff --git a/net/netfilter/nf_conntrack_h323_main.c b/net/netfilter/nf_conntrack_h323_main.c index 9511af0..8d24c4b 100644 --- a/net/netfilter/nf_conntrack_h323_main.c +++ b/net/netfilter/nf_conntrack_h323_main.c @@ -110,6 +110,21 @@ int (*nat_q931_hook) (struct sk_buff *skb, static DEFINE_SPINLOCK(nf_h323_lock); static char *h323_buffer; +static int h323_buffer_valid_bytes; + +static bool h323_buffer_ref_valid(void *p, int len) +{ + if ((unsigned long)len > h323_buffer_valid_bytes) + return false; + + if (p + len > (void *)h323_buffer + h323_buffer_valid_bytes) + return false; + + if (p < (void *)h323_buffer) + return false; + + return true; +} static struct nf_conntrack_helper nf_conntrack_helper_h245; static struct nf_conntrack_helper nf_conntrack_helper_q931[]; @@ -145,6 +160,7 @@ static int get_tpkt_data(struct sk_buff *skb, unsigned int protoff, if (*data == NULL) {/* first TPKT */ /* Get first TPKT pointer */ + h323_buffer_valid_bytes = tcpdatalen; tpkt = skb_header_pointer(skb, tcpdataoff, tcpdatalen, h323_buffer); BUG_ON(tpkt == NULL); @@ -247,6 +263,9 @@ static int get_h245_addr(struct nf_conn *ct, const unsigned char *data, return 0; } + if (!h323_buffer_ref_valid((void *)p, len + sizeof(__be16))) + return 0; + memcpy(addr, p, len); memset((void *)addr + len, 0, sizeof(*addr) - len); memcpy(port, p + len, sizeof(__be16)); @@ -669,6 +688,9 @@ int get_h225_addr(struct nf_conn *ct, unsigned char *data, return 0; } + if (!h323_buffer_ref_valid((void *)p, len + sizeof(__be16))) + return 0; + memcpy(addr, p, len); memset((void *)addr + len, 0, sizeof(*addr) - len); memcpy(port, p + len, sizeof(__be16)); @@ -1248,6 +1270,7 @@ static unsigned char *get_udp_data(struct sk_buff *skb, unsigned int protoff, if (dataoff >= skb->len) return NULL; *datalen = skb->len - dataoff; + h323_buffer_valid_bytes = *datalen; return skb_header_pointer(skb, dataoff, *datalen, h323_buffer); } -- 1.9.1
[PATCH V2] netfilter: h323: avoid potential attack
Thanks Eric for your review and advice. I think hackers chould build a malicious h323 packet to overflow the pointer p which will panic during the memcpy(addr, p, len) For example, he may fabricate a very large taddr->ipAddress.ip; Signed-off-by: Zhouyi Zhou --- net/netfilter/nf_conntrack_h323_main.c |7 +++ 1 file changed, 7 insertions(+) diff --git a/net/netfilter/nf_conntrack_h323_main.c b/net/netfilter/nf_conntrack_h323_main.c index 9511af0..ccd08c5 100644 --- a/net/netfilter/nf_conntrack_h323_main.c +++ b/net/netfilter/nf_conntrack_h323_main.c @@ -110,6 +110,7 @@ int (*nat_q931_hook) (struct sk_buff *skb, static DEFINE_SPINLOCK(nf_h323_lock); static char *h323_buffer; +#define CHECK_BOUND(p, n) ((void *)p + n - (void *)h323_buffer > 65536) static struct nf_conntrack_helper nf_conntrack_helper_h245; static struct nf_conntrack_helper nf_conntrack_helper_q931[]; @@ -247,6 +248,9 @@ static int get_h245_addr(struct nf_conn *ct, const unsigned char *data, return 0; } + if (CHECK_BOUND(p, len + sizeof(__be16))) + return 0; + memcpy(addr, p, len); memset((void *)addr + len, 0, sizeof(*addr) - len); memcpy(port, p + len, sizeof(__be16)); @@ -669,6 +673,9 @@ int get_h225_addr(struct nf_conn *ct, unsigned char *data, return 0; } + if (CHECK_BOUND(p, len + sizeof(__be16))) + return 0; + memcpy(addr, p, len); memset((void *)addr + len, 0, sizeof(*addr) - len); memcpy(port, p + len, sizeof(__be16)); -- 1.7.10.4
Re: Re: [PATCH V2] netfilter: h323: avoid potential attack
Thanks Eric for replying > -Original Messages- > From: "Eric Dumazet" > Sent Time: Thursday, January 28, 2016 > To: "Zhouyi Zhou" > Cc: pa...@netfilter.org, ka...@trash.net, kad...@blackhole.kfki.hu, > da...@davemloft.net, netfilter-de...@vger.kernel.org, coret...@netfilter.org, > net...@vger.kernel.org, linux-ker...@vger.kernel.or, "Zhouyi Zhou" > > Subject: Re: [PATCH V2] netfilter: h323: avoid potential attack > > On Thu, 2016-01-28 at 16:59 +0800, Zhouyi Zhou wrote: > > Thanks Eric for your review and advice. > > > > I think hackers chould build a malicious h323 packet to overflow > > the pointer p which will panic during the memcpy(addr, p, len) > > > > For example, he may fabricate a very large taddr->ipAddress.ip; > > > > Signed-off-by: Zhouyi Zhou > > --- > > Except you did not address my feedback about potentially reading not > initialized memory. > > if the frame length was 1000 bytes, then surely accessing memory at > offset 8000 will either read garbage, or read data from a prior frame > and leak secrets. My patch is intend to prevent kernel panic, to prevent reading garbage or read data from a prior frame and leak secrets, the prototypes of the get_h2x5_addr functions and the functions that call get_h2x5_addr should be changed, should we do this? > > > Cheers Zhouyi
Re: Re: [PATCH V2] netfilter: h323: avoid potential attack
Thanks for your advices. I will take your advice if I could have the opportunity to write the final code. As matter of factor, I trigger this bug when I tried to migrate H323 code to other operating systems. This could trigger a panic because get_h2x5_addr functions is the first time we ever try to ask for potentially out of range data because of H323 style decodings (the value taddr->ipAddress.ip is decoded in the midway between calling skb_header_pointer and calling get_h2x5_addr) > -Original Messages- > From: "Florian Westphal" > Sent Time: Thursday, January 28, 2016 > To: "Zhouyi Zhou" > Cc: eric.duma...@gmail.com, pa...@netfilter.org, ka...@trash.net, > kad...@blackhole.kfki.hu, da...@davemloft.net, > netfilter-de...@vger.kernel.org, coret...@netfilter.org, > net...@vger.kernel.org, linux-ker...@vger.kernel.or, "Zhouyi Zhou" > > Subject: Re: [PATCH V2] netfilter: h323: avoid potential attack > > Zhouyi Zhou wrote: > > Thanks Eric for your review and advice. > > > > I think hackers chould build a malicious h323 packet to overflow > > the pointer p which will panic during the memcpy(addr, p, len) > > > > For example, he may fabricate a very large taddr->ipAddress.ip; > > Can you be more specific? > > h323_buffer is backend storage for skb_header_pointer, i.e. > this will error out early when we ask for more data than is available in > packet. > > I don't understand how this could overflow anything. > Even assuming 64k packet we'd still have enough room in h323_buffer > for an ipv6 address, no? (we skip the l3/l4 header when extracting > packet payload). >
[PATCH V3] netfilter: h323: avoid potential attack
I think hackers chould build a malicious h323 packet to overflow the pointer p which will panic during the memcpy(addr, p, len) For example, he may fabricate a very large taddr->ipAddress.ip; As suggested by Eric, this module is protected by a lock (nf_h323_lock) so adding a variable h323_buffer_valid_bytes that would contain the number of valid bytes would not require to change prototypes of get_h2x5_addr. Signed-off-by: Zhouyi Zhou Signed-off-by: Eric Dumazet Reviewed-by: Sergei Shtylyov --- net/netfilter/nf_conntrack_h323_main.c | 13 + 1 file changed, 13 insertions(+) diff --git a/net/netfilter/nf_conntrack_h323_main.c b/net/netfilter/nf_conntrack_h323_main.c index 9511af0..65d84bc 100644 --- a/net/netfilter/nf_conntrack_h323_main.c +++ b/net/netfilter/nf_conntrack_h323_main.c @@ -110,6 +110,11 @@ int (*nat_q931_hook) (struct sk_buff *skb, static DEFINE_SPINLOCK(nf_h323_lock); static char *h323_buffer; +static unsigned int h323_buffer_valid_bytes; +/* check offset overflow and out of range data reference */ +#define CHECK_BOUND(p, n) ((n) > h323_buffer_valid_bytes ||\ + ((void *)(p) + (n) - (void *)h323_buffer \ + > h323_buffer_valid_bytes)) static struct nf_conntrack_helper nf_conntrack_helper_h245; static struct nf_conntrack_helper nf_conntrack_helper_q931[]; @@ -145,6 +150,7 @@ static int get_tpkt_data(struct sk_buff *skb, unsigned int protoff, if (*data == NULL) {/* first TPKT */ /* Get first TPKT pointer */ + h323_buffer_valid_bytes = tcpdatalen; tpkt = skb_header_pointer(skb, tcpdataoff, tcpdatalen, h323_buffer); BUG_ON(tpkt == NULL); @@ -247,6 +253,9 @@ static int get_h245_addr(struct nf_conn *ct, const unsigned char *data, return 0; } + if (CHECK_BOUND(p, len + sizeof(__be16))) + return 0; + memcpy(addr, p, len); memset((void *)addr + len, 0, sizeof(*addr) - len); memcpy(port, p + len, sizeof(__be16)); @@ -669,6 +678,9 @@ int get_h225_addr(struct nf_conn *ct, unsigned char *data, return 0; } + if (CHECK_BOUND(p, len + sizeof(__be16))) + return 0; + memcpy(addr, p, len); memset((void *)addr + len, 0, sizeof(*addr) - len); memcpy(port, p + len, sizeof(__be16)); @@ -1248,6 +1260,7 @@ static unsigned char *get_udp_data(struct sk_buff *skb, unsigned int protoff, if (dataoff >= skb->len) return NULL; *datalen = skb->len - dataoff; + h323_buffer_valid_bytes = *datalen; return skb_header_pointer(skb, dataoff, *datalen, h323_buffer); } -- 1.9.1
[PATCH 1/1] netfilter: h323: avoid potential attack
From: Zhouyi Zhou I think hackers chould build a malicious h323 packet to overflow the pointer p which will panic during the memcpy(addr, p, len) For example, he may fabricate a very large taddr->ipAddress.ip; Signed-off-by: Zhouyi Zhou --- net/netfilter/nf_conntrack_h323_main.c |8 1 file changed, 8 insertions(+) diff --git a/net/netfilter/nf_conntrack_h323_main.c b/net/netfilter/nf_conntrack_h323_main.c index 9511af0..3b3dd8c 100644 --- a/net/netfilter/nf_conntrack_h323_main.c +++ b/net/netfilter/nf_conntrack_h323_main.c @@ -110,6 +110,10 @@ int (*nat_q931_hook) (struct sk_buff *skb, static DEFINE_SPINLOCK(nf_h323_lock); static char *h323_buffer; +#define CHECK_BOUND(p, n) do { \ + if (((p - h323_buffer) + n) > 65536)\ + return 0; \ +} while (0) static struct nf_conntrack_helper nf_conntrack_helper_h245; static struct nf_conntrack_helper nf_conntrack_helper_q931[]; @@ -247,6 +251,8 @@ static int get_h245_addr(struct nf_conn *ct, const unsigned char *data, return 0; } + CHECK_BOUND(p, len); + memcpy(addr, p, len); memset((void *)addr + len, 0, sizeof(*addr) - len); memcpy(port, p + len, sizeof(__be16)); @@ -669,6 +675,8 @@ int get_h225_addr(struct nf_conn *ct, unsigned char *data, return 0; } + CHECK_BOUND(p, len); + memcpy(addr, p, len); memset((void *)addr + len, 0, sizeof(*addr) - len); memcpy(port, p + len, sizeof(__be16)); -- 1.7.10.4
[PATCH] kprobes: Correct a type error in function kprobes_module_callback
There is a tiny type error in comment of function kprobes_module_callback. Signed-off-by: Zhouyi Zhou --- kernel/kprobes.c |2 +- 1 files changed, 1 insertions(+), 1 deletions(-) diff --git a/kernel/kprobes.c b/kernel/kprobes.c index e995541..9d2042b 100644 --- a/kernel/kprobes.c +++ b/kernel/kprobes.c @@ -2432,7 +2432,7 @@ static int kprobes_module_callback(struct notifier_block *nb, within_module_core((unsigned long)p->addr, mod))) { /* * The vaddr this probe is installed will soon -* be vfreed buy not synced to disk. Hence, +* be vfreed but not synced to disk. Hence, * disarming the breakpoint isn't needed. * * Note, this will also move any optimized probes -- 1.7.1
Re: [PATCH] kprobes: Correct a type error in function kprobes_module_callback
Thanks for the tip! On Thu, Oct 8, 2020 at 11:06 AM Randy Dunlap wrote: > > Hi, > > On 10/7/20 7:59 PM, Zhouyi Zhou wrote: > > There is a tiny type error in comment of function kprobes_module_callback. > > Preferable > typo > and same in $Subject. > > 'type' usually means data type or maybe typedef, or even > font or typeface. > > I suppose you could say a "typing" error (as in using a typewriter > or keyboard). > > > > > Signed-off-by: Zhouyi Zhou > > --- > > kernel/kprobes.c |2 +- > > 1 files changed, 1 insertions(+), 1 deletions(-) > > > > diff --git a/kernel/kprobes.c b/kernel/kprobes.c > > index e995541..9d2042b 100644 > > --- a/kernel/kprobes.c > > +++ b/kernel/kprobes.c > > @@ -2432,7 +2432,7 @@ static int kprobes_module_callback(struct > > notifier_block *nb, > >within_module_core((unsigned long)p->addr, > > mod))) { > > /* > >* The vaddr this probe is installed will soon > > - * be vfreed buy not synced to disk. Hence, > > + * be vfreed but not synced to disk. Hence, > >* disarming the breakpoint isn't needed. > >* > >* Note, this will also move any optimized > > probes > > > > thanks. > -- > ~Randy >
[PATCH V2] kprobes: Correct a typo in function kprobes_module_callback
There is a tiny typo in comment of function kprobes_module_callback. Signed-off-by: Zhouyi Zhou --- kernel/kprobes.c |2 +- 1 files changed, 1 insertions(+), 1 deletions(-) diff --git a/kernel/kprobes.c b/kernel/kprobes.c index e995541..9d2042b 100644 --- a/kernel/kprobes.c +++ b/kernel/kprobes.c @@ -2432,7 +2432,7 @@ static int kprobes_module_callback(struct notifier_block *nb, within_module_core((unsigned long)p->addr, mod))) { /* * The vaddr this probe is installed will soon -* be vfreed buy not synced to disk. Hence, +* be vfreed but not synced to disk. Hence, * disarming the breakpoint isn't needed. * * Note, this will also move any optimized probes -- 1.7.1
[PATCH] RCU: fix a typo in comments of rcu_blocking_is_gp
There is a tiny typo in comment of function rcu_blocking_is_gp. Signed-off-by: Zhouyi Zhou --- kernel/rcu/tree.c |2 +- 1 files changed, 1 insertions(+), 1 deletions(-) diff --git a/kernel/rcu/tree.c b/kernel/rcu/tree.c index f78ee75..4cca03f 100644 --- a/kernel/rcu/tree.c +++ b/kernel/rcu/tree.c @@ -3512,7 +3512,7 @@ void __init kfree_rcu_scheduler_running(void) * During early boot, any blocking grace-period wait automatically * implies a grace period. Later on, this is never the case for PREEMPTION. * - * Howevr, because a context switch is a grace period for !PREEMPTION, any + * However, because a context switch is a grace period for !PREEMPTION, any * blocking grace-period wait automatically implies a grace period if * there is only one CPU online at any point time during execution of * either synchronize_rcu() or synchronize_rcu_expedited(). It is OK to -- 1.7.1
[PATCH 2/1] docs: disable KASLR when debugging kernel
commit 6807c84652b0 ("x86: Enable KASLR by default") enables KASLR by default on x86. While KASLR will confuse gdb which resolve kernel symbol address from symbol table of vmlinux. We should turn off KASLR for kernel debugging. Signed-off-by: Zhouyi Zhou --- Documentation/dev-tools/kgdb.rst | 11 ++- 1 file changed, 10 insertions(+), 1 deletion(-) diff --git a/Documentation/dev-tools/kgdb.rst b/Documentation/dev-tools/kgdb.rst index 7527320..3a051f0 100644 --- a/Documentation/dev-tools/kgdb.rst +++ b/Documentation/dev-tools/kgdb.rst @@ -348,6 +348,15 @@ default behavior is always set to 0. - ``echo 1 > /sys/module/debug_core/parameters/kgdbreboot`` - Enter the debugger on reboot notify. +Kernel parameter: ``nokaslr`` +- + +If the architecture that you are using enable KASLR by default, +you should consider turning it off. KASLR randomizes the +virtual address where the kernel image is mapped and confuse +gdb which resolve kernel symbol address from symbol table +of vmlinux. + Using kdb = @@ -358,7 +367,7 @@ This is a quick example of how to use kdb. 1. Configure kgdboc at boot using kernel parameters:: - console=ttyS0,115200 kgdboc=ttyS0,115200 + console=ttyS0,115200 kgdboc=ttyS0,115200 nokaslr OR -- 1.9.1
[PATCH 2/2] docs: disable KASLR when debugging kernel
commit 6807c84652b0 ("x86: Enable KASLR by default") enables KASLR by default on x86. While KASLR will confuse gdb which resolve kernel symbol address from symbol table of vmlinux. We should turn off KASLR for kernel debugging. Signed-off-by: Zhouyi Zhou --- Documentation/dev-tools/gdb-kernel-debugging.rst | 6 -- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/Documentation/dev-tools/gdb-kernel-debugging.rst b/Documentation/dev-tools/gdb-kernel-debugging.rst index 5e93c9b..fe2edcc 100644 --- a/Documentation/dev-tools/gdb-kernel-debugging.rst +++ b/Documentation/dev-tools/gdb-kernel-debugging.rst @@ -31,12 +31,13 @@ Setup CONFIG_DEBUG_INFO_REDUCED off. If your architecture supports CONFIG_FRAME_POINTER, keep it enabled. -- Install that kernel on the guest. +- Install that kernel on the guest, turn off KASLR by adding "nokaslr" to + the kernel command line . Alternatively, QEMU allows to boot the kernel directly using -kernel, -append, -initrd command line switches. This is generally only useful if you do not depend on modules. See QEMU documentation for more details on - this mode. + this mode. In this case, you should build the kernel with + CONFIG_RANDOMIZE_BASE disabled. - Enable the gdb stub of QEMU/KVM, either -- 1.9.1
Re: [PATCH 2/2] docs: disable KASLR when debugging kernel
Hi Kieran, Thanks for your review and invaluable advise, I will prepare a new version immediately. On Fri, Jul 7, 2017 at 4:05 PM, Kieran Bingham wrote: > Hi Zhouyi > > Thankyou for the patch, > > On 07/07/17 08:14, Zhouyi Zhou wrote: >> commit 6807c84652b0 ("x86: Enable KASLR by default") enables KASLR >> by default on x86. While KASLR will confuse gdb which resolve kernel >> symbol address from symbol table of vmlinux. We should turn off KASLR for >> kernel debugging. > Yes, this is something I had come across and certainly should be documented. > >> Signed-off-by: Zhouyi Zhou >> --- >> Documentation/dev-tools/gdb-kernel-debugging.rst | 6 -- >> 1 file changed, 4 insertions(+), 2 deletions(-) >> >> diff --git a/Documentation/dev-tools/gdb-kernel-debugging.rst >> b/Documentation/dev-tools/gdb-kernel-debugging.rst >> index 5e93c9b..fe2edcc 100644 >> --- a/Documentation/dev-tools/gdb-kernel-debugging.rst >> +++ b/Documentation/dev-tools/gdb-kernel-debugging.rst >> @@ -31,12 +31,13 @@ Setup >>CONFIG_DEBUG_INFO_REDUCED off. If your architecture supports >>CONFIG_FRAME_POINTER, keep it enabled. >> >> -- Install that kernel on the guest. >> +- Install that kernel on the guest, turn off KASLR by adding "nokaslr" to >> + the kernel command line . > Is KASLR available on *all* supported architectures? > > If not, then perhaps this should be "turn off KASLR if necessary by ..." > But I don't think that's a big deal really. > > Reviewed-by: Kieran Bingham > > >>Alternatively, QEMU allows to boot the kernel directly using -kernel, >>-append, -initrd command line switches. This is generally only useful if >>you do not depend on modules. See QEMU documentation for more details on >> - this mode. >> + this mode. In this case, you should build the kernel with >> + CONFIG_RANDOMIZE_BASE disabled. >> >> - Enable the gdb stub of QEMU/KVM, either >> > > -- > Kieran
[PATCH v2 2/2] docs: disable KASLR when debugging kernel
commit 6807c84652b0 ("x86: Enable KASLR by default") enables KASLR by default on x86. While KASLR will confuse gdb which resolve kernel symbol address from symbol table of vmlinux. We should turn off KASLR for kernel debugging. Signed-off-by: Zhouyi Zhou Reviewed-by: Kieran Bingham --- Documentation/dev-tools/gdb-kernel-debugging.rst | 6 -- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/Documentation/dev-tools/gdb-kernel-debugging.rst b/Documentation/dev-tools/gdb-kernel-debugging.rst index 5e93c9b..60fec6d 100644 --- a/Documentation/dev-tools/gdb-kernel-debugging.rst +++ b/Documentation/dev-tools/gdb-kernel-debugging.rst @@ -31,12 +31,13 @@ Setup CONFIG_DEBUG_INFO_REDUCED off. If your architecture supports CONFIG_FRAME_POINTER, keep it enabled. -- Install that kernel on the guest. +- Install that kernel on the guest, turn off KASLR if necessary by adding + "nokaslr" to the kernel command line. Alternatively, QEMU allows to boot the kernel directly using -kernel, -append, -initrd command line switches. This is generally only useful if you do not depend on modules. See QEMU documentation for more details on - this mode. + this mode. In this case, you should build the kernel with + CONFIG_RANDOMIZE_BASE disabled if the architecture supports KASLR. - Enable the gdb stub of QEMU/KVM, either -- 1.9.1
Re: [PATCH] srcu: remove never used variable
Thanks Paul's encouragement, I will keep studying SRCU code. On Fri, Feb 23, 2018 at 9:20 AM, Paul E. McKenney wrote: > On Fri, Feb 23, 2018 at 09:04:05AM +0800, Zhouyi Zhou wrote: >> Thanks Paul for reviewing > > And thank you for your interest in SRCU! I am pretty sure that other > bugs still remain. ;-) > > Thanx, Paul > >> Cheers >> Zhouyi >> >> On Friday, February 23, 2018, Paul E. McKenney >> wrote: >> >> > On Thu, Feb 22, 2018 at 06:52:37AM +, zhouzho...@gmail.com wrote: >> > > From: Zhouyi Zhou >> > > >> > > In function srcu_gp_end, the variable idxnext is never used after assign, >> > > remove it and its assign statement. >> > > >> > > Signed-off-by: Zhouyi Zhou >> > >> > Good catch, but Byungchul Park beat you to it. Please see commit >> > a72da917f186 ("srcu: Remove dead code in srcu_gp_end()") in -rcu. >> > >> > Thanx, Paul >> > >> > > --- >> > > kernel/rcu/srcutree.c | 2 -- >> > > 1 file changed, 2 deletions(-) >> > > >> > > diff --git a/kernel/rcu/srcutree.c b/kernel/rcu/srcutree.c >> > > index d5cea81..1241715 100644 >> > > --- a/kernel/rcu/srcutree.c >> > > +++ b/kernel/rcu/srcutree.c >> > > @@ -531,7 +531,6 @@ static void srcu_gp_end(struct srcu_struct *sp) >> > > unsigned long flags; >> > > unsigned long gpseq; >> > > int idx; >> > > - int idxnext; >> > > unsigned long mask; >> > > struct srcu_data *sdp; >> > > struct srcu_node *snp; >> > > @@ -555,7 +554,6 @@ static void srcu_gp_end(struct srcu_struct *sp) >> > > >> > > /* Initiate callback invocation as needed. */ >> > > idx = rcu_seq_ctr(gpseq) % ARRAY_SIZE(snp->srcu_have_cbs); >> > > - idxnext = (idx + 1) % ARRAY_SIZE(snp->srcu_have_cbs); >> > > rcu_for_each_node_breadth_first(sp, snp) { >> > > spin_lock_irq_rcu_node(snp); >> > > cbs = false; >> > > -- >> > > 2.1.4 >> > > >> > >> > >
FS: EXT4: should we sync error info in __ext4_grp_locked_error?
Hi, In function __ext4_grp_locked_error, __save_error_info(sb, function, line) is called to save error info in super block block, but does not sync that information to disk to info the subsequence fsck after reboot. The reason, I guess maybe it is in locked state. My question is why not make a call ext4_commit_super(sb, 1) after ext4_unlock_group(sb, grp) and ext4_handle_error(sb), so that subsequence fsck after reboot is sure to be well informed. Forgive my naiveness. Thanks a lot
Re: [PATCH 1/1] kasan: fix livelock in qlist_move_cache
When there are huge amount of quarantined cache allocates in system, number of entries in global_quarantine[i] will be great. Meanwhile, there is no relax in while loop in function qlist_move_cache which hold quarantine_lock. As a result, some userspace programs for example libvirt will complain. On Tue, Nov 28, 2017 at 12:04 PM, wrote: > From: Zhouyi Zhou > > This patch fix livelock by conditionally release cpu to let others > has a chance to run. > > Tested on x86_64. > Signed-off-by: Zhouyi Zhou > --- > mm/kasan/quarantine.c | 12 +++- > 1 file changed, 11 insertions(+), 1 deletion(-) > > diff --git a/mm/kasan/quarantine.c b/mm/kasan/quarantine.c > index 3a8ddf8..33eeff4 100644 > --- a/mm/kasan/quarantine.c > +++ b/mm/kasan/quarantine.c > @@ -265,10 +265,13 @@ static void qlist_move_cache(struct qlist_head *from, >struct kmem_cache *cache) > { > struct qlist_node *curr; > + struct qlist_head tmp_head; > + unsigned long flags; > > if (unlikely(qlist_empty(from))) > return; > > + qlist_init(&tmp_head); > curr = from->head; > qlist_init(from); > while (curr) { > @@ -278,10 +281,17 @@ static void qlist_move_cache(struct qlist_head *from, > if (obj_cache == cache) > qlist_put(to, curr, obj_cache->size); > else > - qlist_put(from, curr, obj_cache->size); > + qlist_put(&tmp_head, curr, obj_cache->size); > > curr = next; > + > + if (need_resched()) { > + spin_unlock_irqrestore(&quarantine_lock, flags); > + cond_resched(); > + spin_lock_irqsave(&quarantine_lock, flags); > + } > } > + qlist_move_all(&tmp_head, from); > } > > static void per_cpu_remove_cache(void *arg) > -- > 2.1.4 >
Re: [PATCH 1/1] kasan: fix livelock in qlist_move_cache
Thanks for reviewing My machine has 128G of RAM, and runs many KVM virtual machines. libvirtd always report "internal error: received hangup / error event on socket" under heavy memory load. Then I use perf top -g, qlist_move_cache consumes 100% cpu for several minutes. On Tue, Nov 28, 2017 at 3:45 PM, Dmitry Vyukov wrote: > On Tue, Nov 28, 2017 at 5:05 AM, Zhouyi Zhou wrote: >> When there are huge amount of quarantined cache allocates in system, >> number of entries in global_quarantine[i] will be great. Meanwhile, >> there is no relax in while loop in function qlist_move_cache which >> hold quarantine_lock. As a result, some userspace programs for example >> libvirt will complain. > > Hi, > > The QUARANTINE_BATCHES thing was supposed to fix this problem, see > quarantine_remove_cache() function. > What is the amount of RAM and number of CPUs in your system? > If system has 4GB of RAM, quarantine size is 128MB and that's split > into 1024 batches. Batch size is 128KB. Even if that's filled with the > smallest objects of size 32, that's only 4K objects. And there is a > cond_resched() between processing of every batch. > I don't understand why it causes problems in your setup. We use KASAN > extremely heavily on hundreds of machines 24x7 and we have not seen > any single report from this code... > > >> On Tue, Nov 28, 2017 at 12:04 PM, wrote: >>> From: Zhouyi Zhou >>> >>> This patch fix livelock by conditionally release cpu to let others >>> has a chance to run. >>> >>> Tested on x86_64. >>> Signed-off-by: Zhouyi Zhou >>> --- >>> mm/kasan/quarantine.c | 12 +++- >>> 1 file changed, 11 insertions(+), 1 deletion(-) >>> >>> diff --git a/mm/kasan/quarantine.c b/mm/kasan/quarantine.c >>> index 3a8ddf8..33eeff4 100644 >>> --- a/mm/kasan/quarantine.c >>> +++ b/mm/kasan/quarantine.c >>> @@ -265,10 +265,13 @@ static void qlist_move_cache(struct qlist_head *from, >>>struct kmem_cache *cache) >>> { >>> struct qlist_node *curr; >>> + struct qlist_head tmp_head; >>> + unsigned long flags; >>> >>> if (unlikely(qlist_empty(from))) >>> return; >>> >>> + qlist_init(&tmp_head); >>> curr = from->head; >>> qlist_init(from); >>> while (curr) { >>> @@ -278,10 +281,17 @@ static void qlist_move_cache(struct qlist_head *from, >>> if (obj_cache == cache) >>> qlist_put(to, curr, obj_cache->size); >>> else >>> - qlist_put(from, curr, obj_cache->size); >>> + qlist_put(&tmp_head, curr, obj_cache->size); >>> >>> curr = next; >>> + >>> + if (need_resched()) { >>> + spin_unlock_irqrestore(&quarantine_lock, flags); >>> + cond_resched(); >>> + spin_lock_irqsave(&quarantine_lock, flags); >>> + } >>> } >>> + qlist_move_all(&tmp_head, from); >>> } >>> >>> static void per_cpu_remove_cache(void *arg) >>> -- >>> 2.1.4 >>> >> >> -- >> You received this message because you are subscribed to the Google Groups >> "kasan-dev" group. >> To unsubscribe from this group and stop receiving emails from it, send an >> email to kasan-dev+unsubscr...@googlegroups.com. >> To post to this group, send email to kasan-...@googlegroups.com. >> To view this discussion on the web visit >> https://groups.google.com/d/msgid/kasan-dev/CAABZP2zEup53ZcNKOEUEMx_aRMLONZdYCLd7s5J4DLTccPxC-A%40mail.gmail.com. >> For more options, visit https://groups.google.com/d/optout.
Re: [PATCH 1/1] kasan: fix livelock in qlist_move_cache
Hi, Please take a look at function quarantine_put, I don't think following code will limit the batch size below quarantine_batch_size. It only advance quarantine_tail after qlist_move_all. qlist_move_all(q, &temp); spin_lock(&quarantine_lock); WRITE_ONCE(quarantine_size, quarantine_size + temp.bytes); qlist_move_all(&temp, &global_quarantine[quarantine_tail]); if (global_quarantine[quarantine_tail].bytes >= READ_ONCE(quarantine_batch_size)) { int new_tail; new_tail = quarantine_tail + 1; if (new_tail == QUARANTINE_BATCHES) new_tail = 0; if (new_tail != quarantine_head) quarantine_tail = new_tail; On Tue, Nov 28, 2017 at 4:12 PM, Dmitry Vyukov wrote: > On Tue, Nov 28, 2017 at 9:00 AM, Zhouyi Zhou wrote: >> Thanks for reviewing >>My machine has 128G of RAM, and runs many KVM virtual machines. >> libvirtd always >> report "internal error: received hangup / error event on socket" under >> heavy memory load. >>Then I use perf top -g, qlist_move_cache consumes 100% cpu for >> several minutes. > > For 128GB of RAM, batch size is 4MB. Processing such batch should not > take more than few ms. So I am still struggling to understand how/why > your change helps and why there are issues in the first place... > > > >> On Tue, Nov 28, 2017 at 3:45 PM, Dmitry Vyukov wrote: >>> On Tue, Nov 28, 2017 at 5:05 AM, Zhouyi Zhou wrote: >>>> When there are huge amount of quarantined cache allocates in system, >>>> number of entries in global_quarantine[i] will be great. Meanwhile, >>>> there is no relax in while loop in function qlist_move_cache which >>>> hold quarantine_lock. As a result, some userspace programs for example >>>> libvirt will complain. >>> >>> Hi, >>> >>> The QUARANTINE_BATCHES thing was supposed to fix this problem, see >>> quarantine_remove_cache() function. >>> What is the amount of RAM and number of CPUs in your system? >>> If system has 4GB of RAM, quarantine size is 128MB and that's split >>> into 1024 batches. Batch size is 128KB. Even if that's filled with the >>> smallest objects of size 32, that's only 4K objects. And there is a >>> cond_resched() between processing of every batch. >>> I don't understand why it causes problems in your setup. We use KASAN >>> extremely heavily on hundreds of machines 24x7 and we have not seen >>> any single report from this code... >>> >>> >>>> On Tue, Nov 28, 2017 at 12:04 PM, wrote: >>>>> From: Zhouyi Zhou >>>>> >>>>> This patch fix livelock by conditionally release cpu to let others >>>>> has a chance to run. >>>>> >>>>> Tested on x86_64. >>>>> Signed-off-by: Zhouyi Zhou >>>>> --- >>>>> mm/kasan/quarantine.c | 12 +++- >>>>> 1 file changed, 11 insertions(+), 1 deletion(-) >>>>> >>>>> diff --git a/mm/kasan/quarantine.c b/mm/kasan/quarantine.c >>>>> index 3a8ddf8..33eeff4 100644 >>>>> --- a/mm/kasan/quarantine.c >>>>> +++ b/mm/kasan/quarantine.c >>>>> @@ -265,10 +265,13 @@ static void qlist_move_cache(struct qlist_head >>>>> *from, >>>>>struct kmem_cache *cache) >>>>> { >>>>> struct qlist_node *curr; >>>>> + struct qlist_head tmp_head; >>>>> + unsigned long flags; >>>>> >>>>> if (unlikely(qlist_empty(from))) >>>>> return; >>>>> >>>>> + qlist_init(&tmp_head); >>>>> curr = from->head; >>>>> qlist_init(from); >>>>> while (curr) { >>>>> @@ -278,10 +281,17 @@ static void qlist_move_cache(struct qlist_head >>>>> *from, >>>>> if (obj_cache == cache) >>>>> qlist_put(to, curr, obj_cache->size); >>>>> else >>>>> - qlist_put(from, curr, obj_cache->size); >>>>> + qlist_put(&tmp_head, curr, obj_cache->size); >>>>> >>>>> curr
Re: [PATCH 1/1] kasan: fix livelock in qlist_move_cache
Hi, Imagine all of the QUARANTINE_BATCHES elements of global_quarantine array is of size 4MB + 1MB, now a new call to quarantine_put is invoked, one of the element will be of size 4MB + 1MB + 1MB, so on and on. On Tue, Nov 28, 2017 at 4:58 PM, Dmitry Vyukov wrote: > On Tue, Nov 28, 2017 at 9:33 AM, Zhouyi Zhou wrote: >> Hi, >>Please take a look at function quarantine_put, I don't think following >> code will limit the batch size below quarantine_batch_size. It only advance >> quarantine_tail after qlist_move_all. >> >> qlist_move_all(q, &temp); >> >> spin_lock(&quarantine_lock); >> WRITE_ONCE(quarantine_size, quarantine_size + temp.bytes); >> qlist_move_all(&temp, &global_quarantine[quarantine_tail]); >> if (global_quarantine[quarantine_tail].bytes >= >> READ_ONCE(quarantine_batch_size)) { >> int new_tail; >> >> new_tail = quarantine_tail + 1; >> if (new_tail == QUARANTINE_BATCHES) >> new_tail = 0; >> if (new_tail != quarantine_head) >> quarantine_tail = new_tail; > > > As far as I see this code can exceed global quarantine batch size by > at most 1 per-cpu batch. Per-cpu batch is caped at 1MB. So max global > batch size will be 4MB+1MB. Which does not radically change situation. > > >> On Tue, Nov 28, 2017 at 4:12 PM, Dmitry Vyukov wrote: >>> On Tue, Nov 28, 2017 at 9:00 AM, Zhouyi Zhou wrote: >>>> Thanks for reviewing >>>>My machine has 128G of RAM, and runs many KVM virtual machines. >>>> libvirtd always >>>> report "internal error: received hangup / error event on socket" under >>>> heavy memory load. >>>>Then I use perf top -g, qlist_move_cache consumes 100% cpu for >>>> several minutes. >>> >>> For 128GB of RAM, batch size is 4MB. Processing such batch should not >>> take more than few ms. So I am still struggling to understand how/why >>> your change helps and why there are issues in the first place... >>> >>> >>> >>>> On Tue, Nov 28, 2017 at 3:45 PM, Dmitry Vyukov wrote: >>>>> On Tue, Nov 28, 2017 at 5:05 AM, Zhouyi Zhou wrote: >>>>>> When there are huge amount of quarantined cache allocates in system, >>>>>> number of entries in global_quarantine[i] will be great. Meanwhile, >>>>>> there is no relax in while loop in function qlist_move_cache which >>>>>> hold quarantine_lock. As a result, some userspace programs for example >>>>>> libvirt will complain. >>>>> >>>>> Hi, >>>>> >>>>> The QUARANTINE_BATCHES thing was supposed to fix this problem, see >>>>> quarantine_remove_cache() function. >>>>> What is the amount of RAM and number of CPUs in your system? >>>>> If system has 4GB of RAM, quarantine size is 128MB and that's split >>>>> into 1024 batches. Batch size is 128KB. Even if that's filled with the >>>>> smallest objects of size 32, that's only 4K objects. And there is a >>>>> cond_resched() between processing of every batch. >>>>> I don't understand why it causes problems in your setup. We use KASAN >>>>> extremely heavily on hundreds of machines 24x7 and we have not seen >>>>> any single report from this code... >>>>> >>>>> >>>>>> On Tue, Nov 28, 2017 at 12:04 PM, wrote: >>>>>>> From: Zhouyi Zhou >>>>>>> >>>>>>> This patch fix livelock by conditionally release cpu to let others >>>>>>> has a chance to run. >>>>>>> >>>>>>> Tested on x86_64. >>>>>>> Signed-off-by: Zhouyi Zhou >>>>>>> --- >>>>>>> mm/kasan/quarantine.c | 12 +++- >>>>>>> 1 file changed, 11 insertions(+), 1 deletion(-) >>>>>>> >>>>>>> diff --git a/mm/kasan/quarantine.c b/mm/kasan/quarantine.c >>>>>>> index 3a8ddf8..33eeff4 100644 >>>>>>> --- a/mm/kasan/quarantine.c >>>>>>> +++ b/mm/kasan/quarantine.c >>>>>>> @@ -265,10 +265,13 @@ static void qlist_move_cache(struct qlist_head >>>>&g
Re: [PATCH 1/1] kasan: fix livelock in qlist_move_cache
Hi, By using perf top, qlist_move_cache occupies 100% cpu did really happen in my environment yesterday, or I won't notice the kasan code. Currently I have difficulty to let it reappear because the frontend guy modified some user mode code. I can repeat again and again now is kgdb_breakpoint () at kernel/debug/debug_core.c:1073 1073 wmb(); /* Sync point after breakpoint */ (gdb) p quarantine_batch_size $1 = 3601946 And by instrument code, maximum global_quarantine[quarantine_tail].bytes reached is 6618208. I do think drain quarantine right in quarantine_put is a better place to drain because cache_free is fine in that context. I am willing do it if you think it is convenient :-) On Tue, Nov 28, 2017 at 5:27 PM, Dmitry Vyukov wrote: > On Tue, Nov 28, 2017 at 10:17 AM, Zhouyi Zhou wrote: >> Hi, >> Imagine all of the QUARANTINE_BATCHES elements of >> global_quarantine array is of size 4MB + 1MB, now a new call >> to quarantine_put is invoked, one of the element will be of size 4MB + >> 1MB + 1MB, so on and on. > > > I see what you mean. Does it really happen in your case? What's the > maximum batch size that you get during your workload? > > I always wondered why don't we drain quarantine right in > quarantine_put when we overflow it? We already take quarantine_lock > and calling cache_free should be fine in that context, since user code > already does that. > > > >> On Tue, Nov 28, 2017 at 4:58 PM, Dmitry Vyukov wrote: >>> On Tue, Nov 28, 2017 at 9:33 AM, Zhouyi Zhou wrote: >>>> Hi, >>>>Please take a look at function quarantine_put, I don't think following >>>> code will limit the batch size below quarantine_batch_size. It only advance >>>> quarantine_tail after qlist_move_all. >>>> >>>> qlist_move_all(q, &temp); >>>> >>>> spin_lock(&quarantine_lock); >>>> WRITE_ONCE(quarantine_size, quarantine_size + temp.bytes); >>>> qlist_move_all(&temp, &global_quarantine[quarantine_tail]); >>>> if (global_quarantine[quarantine_tail].bytes >= >>>> READ_ONCE(quarantine_batch_size)) { >>>> int new_tail; >>>> >>>> new_tail = quarantine_tail + 1; >>>> if (new_tail == QUARANTINE_BATCHES) >>>> new_tail = 0; >>>> if (new_tail != quarantine_head) >>>> quarantine_tail = new_tail; >>> >>> >>> As far as I see this code can exceed global quarantine batch size by >>> at most 1 per-cpu batch. Per-cpu batch is caped at 1MB. So max global >>> batch size will be 4MB+1MB. Which does not radically change situation. >>> >>> >>>> On Tue, Nov 28, 2017 at 4:12 PM, Dmitry Vyukov wrote: >>>>> On Tue, Nov 28, 2017 at 9:00 AM, Zhouyi Zhou wrote: >>>>>> Thanks for reviewing >>>>>>My machine has 128G of RAM, and runs many KVM virtual machines. >>>>>> libvirtd always >>>>>> report "internal error: received hangup / error event on socket" under >>>>>> heavy memory load. >>>>>>Then I use perf top -g, qlist_move_cache consumes 100% cpu for >>>>>> several minutes. >>>>> >>>>> For 128GB of RAM, batch size is 4MB. Processing such batch should not >>>>> take more than few ms. So I am still struggling to understand how/why >>>>> your change helps and why there are issues in the first place... >>>>> >>>>> >>>>> >>>>>> On Tue, Nov 28, 2017 at 3:45 PM, Dmitry Vyukov >>>>>> wrote: >>>>>>> On Tue, Nov 28, 2017 at 5:05 AM, Zhouyi Zhou >>>>>>> wrote: >>>>>>>> When there are huge amount of quarantined cache allocates in system, >>>>>>>> number of entries in global_quarantine[i] will be great. Meanwhile, >>>>>>>> there is no relax in while loop in function qlist_move_cache which >>>>>>>> hold quarantine_lock. As a result, some userspace programs for example >>>>>>>> libvirt will complain. >>>>>>> >>>>>>> Hi, >>>>>>> >>>>>>> The QUARANTINE_BATCHES thing was supposed to fix this problem, see >>>>&
Re: [PATCH 1/1] kasan: fix livelock in qlist_move_cache
Hi, I will try to reestablish the environment, and design proof of concept of experiment. Cheers On Wed, Nov 29, 2017 at 1:57 AM, Dmitry Vyukov wrote: > On Tue, Nov 28, 2017 at 6:56 PM, Dmitry Vyukov wrote: >> On Tue, Nov 28, 2017 at 12:30 PM, Zhouyi Zhou wrote: >>> Hi, >>>By using perf top, qlist_move_cache occupies 100% cpu did really >>> happen in my environment yesterday, or I >>> won't notice the kasan code. >>>Currently I have difficulty to let it reappear because the frontend >>> guy modified some user mode code. >>>I can repeat again and again now is >>> kgdb_breakpoint () at kernel/debug/debug_core.c:1073 >>> 1073 wmb(); /* Sync point after breakpoint */ >>> (gdb) p quarantine_batch_size >>> $1 = 3601946 >>>And by instrument code, maximum >>> global_quarantine[quarantine_tail].bytes reached is 6618208. >> >> On second thought, size does not matter too much because there can be >> large objects. Quarantine always quantize by objects, we can't part of >> an object into one batch, and another part of the object into another >> object. But it's not a problem, because overhead per objects is O(1). >> We can push a single 4MB object and overflow target size by 4MB and >> that will be fine. >> Either way, 6MB is not terribly much too. Should take milliseconds to >> process. >> >> >> >> >>>I do think drain quarantine right in quarantine_put is a better >>> place to drain because cache_free is fine in >>> that context. I am willing do it if you think it is convenient :-) > > > Andrey, do you know of any problems with draining quarantine in push? > Do you have any objections? > > But it's still not completely clear to me what problem we are solving.
Re: [PATCH 1/1] kasan: fix livelock in qlist_move_cache
: slabdata 52 52 0 mm_struct 1 3 128031 : tunables 24 12 8 : slabdata 1 1 0 cred_jar 12 24320 121 : tunables 120 60 8 : slabdata 2 2 0 So, when I end the libvirt scenery, those slabs belong to those qemus has to invoke quarantine_remove_cache, I guess that's why qlist_move_cache occupies so much CPU cycles. I also guess this make libvirt complain (wait for too long?) Sorry not to research deeply into system in the first place and submit a patch in a hurry. And I propose a little sugguestion to improve qlist_move_cache if you like. Won't we design some kind of hash mechanism, then we group the qlist_node according to their cache, so as not to compare one by one to every qlist_node in the system. Sorry for your time Best Wishes Zhouyi On Wed, Nov 29, 2017 at 7:41 AM, Zhouyi Zhou wrote: > Hi, > I will try to reestablish the environment, and design proof of > concept of experiment. > Cheers > > On Wed, Nov 29, 2017 at 1:57 AM, Dmitry Vyukov wrote: >> On Tue, Nov 28, 2017 at 6:56 PM, Dmitry Vyukov wrote: >>> On Tue, Nov 28, 2017 at 12:30 PM, Zhouyi Zhou wrote: >>>> Hi, >>>>By using perf top, qlist_move_cache occupies 100% cpu did really >>>> happen in my environment yesterday, or I >>>> won't notice the kasan code. >>>>Currently I have difficulty to let it reappear because the frontend >>>> guy modified some user mode code. >>>>I can repeat again and again now is >>>> kgdb_breakpoint () at kernel/debug/debug_core.c:1073 >>>> 1073 wmb(); /* Sync point after breakpoint */ >>>> (gdb) p quarantine_batch_size >>>> $1 = 3601946 >>>>And by instrument code, maximum >>>> global_quarantine[quarantine_tail].bytes reached is 6618208. >>> >>> On second thought, size does not matter too much because there can be >>> large objects. Quarantine always quantize by objects, we can't part of >>> an object into one batch, and another part of the object into another >>> object. But it's not a problem, because overhead per objects is O(1). >>> We can push a single 4MB object and overflow target size by 4MB and >>> that will be fine. >>> Either way, 6MB is not terribly much too. Should take milliseconds to >>> process. >>> >>> >>> >>> >>>>I do think drain quarantine right in quarantine_put is a better >>>> place to drain because cache_free is fine in >>>> that context. I am willing do it if you think it is convenient :-) >> >> >> Andrey, do you know of any problems with draining quarantine in push? >> Do you have any objections? >> >> But it's still not completely clear to me what problem we are solving.
[tip: sched/urgent] preempt/dynamic: Fix typo in macro conditional statement
The following commit has been merged into the sched/urgent branch of tip: Commit-ID: 0c89d87d1d43d9fa268d1dc489518564d58bf497 Gitweb: https://git.kernel.org/tip/0c89d87d1d43d9fa268d1dc489518564d58bf497 Author:Zhouyi Zhou AuthorDate:Sat, 10 Apr 2021 15:35:23 +08:00 Committer: Peter Zijlstra CommitterDate: Mon, 19 Apr 2021 20:02:57 +02:00 preempt/dynamic: Fix typo in macro conditional statement Commit 40607ee97e4e ("preempt/dynamic: Provide irqentry_exit_cond_resched() static call") tried to provide irqentry_exit_cond_resched() static call in irqentry_exit, but has a typo in macro conditional statement. Fixes: 40607ee97e4e ("preempt/dynamic: Provide irqentry_exit_cond_resched() static call") Signed-off-by: Zhouyi Zhou Signed-off-by: Peter Zijlstra (Intel) Link: https://lkml.kernel.org/r/20210410073523.5493-1-zhouzho...@gmail.com --- kernel/entry/common.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/kernel/entry/common.c b/kernel/entry/common.c index 8442e5c..2003d69 100644 --- a/kernel/entry/common.c +++ b/kernel/entry/common.c @@ -422,7 +422,7 @@ noinstr void irqentry_exit(struct pt_regs *regs, irqentry_state_t state) instrumentation_begin(); if (IS_ENABLED(CONFIG_PREEMPTION)) { -#ifdef CONFIG_PREEMT_DYNAMIC +#ifdef CONFIG_PREEMPT_DYNAMIC static_call(irqentry_exit_cond_resched)(); #else irqentry_exit_cond_resched();
[tip: core/rcu] rcu: Remove spurious instrumentation_end() in rcu_nmi_enter()
The following commit has been merged into the core/rcu branch of tip: Commit-ID: 6494ccb93271bee596a12db32ff44867d5be2321 Gitweb: https://git.kernel.org/tip/6494ccb93271bee596a12db32ff44867d5be2321 Author:Zhouyi Zhou AuthorDate:Mon, 11 Jan 2021 09:08:59 +08:00 Committer: Paul E. McKenney CommitterDate: Mon, 08 Mar 2021 14:17:35 -08:00 rcu: Remove spurious instrumentation_end() in rcu_nmi_enter() In rcu_nmi_enter(), there is an erroneous instrumentation_end() in the second branch of the "if" statement. Oddly enough, "objtool check -f vmlinux.o" fails to complain because it is unable to correctly cover all cases. Instead, objtool visits the third branch first, which marks following trace_rcu_dyntick() as visited. This commit therefore removes the spurious instrumentation_end(). Fixes: 04b25a495bd6 ("rcu: Mark rcu_nmi_enter() call to rcu_cleanup_after_idle() noinstr") Reported-by Neeraj Upadhyay Signed-off-by: Zhouyi Zhou Signed-off-by: Paul E. McKenney --- kernel/rcu/tree.c | 1 - 1 file changed, 1 deletion(-) diff --git a/kernel/rcu/tree.c b/kernel/rcu/tree.c index e62c2de..4d90f20 100644 --- a/kernel/rcu/tree.c +++ b/kernel/rcu/tree.c @@ -1076,7 +1076,6 @@ noinstr void rcu_nmi_enter(void) } else if (!in_nmi()) { instrumentation_begin(); rcu_irq_enter_check_tick(); - instrumentation_end(); } else { instrumentation_begin(); }
[tip: core/rcu] rcu: Fix a typo in rcu_blocking_is_gp() header comment
The following commit has been merged into the core/rcu branch of tip: Commit-ID: 354c3f0e22dcb17c10d0b79f6e1c5ba286eec0b0 Gitweb: https://git.kernel.org/tip/354c3f0e22dcb17c10d0b79f6e1c5ba286eec0b0 Author:Zhouyi Zhou AuthorDate:Thu, 15 Oct 2020 03:53:03 Committer: Paul E. McKenney CommitterDate: Thu, 19 Nov 2020 19:37:17 -08:00 rcu: Fix a typo in rcu_blocking_is_gp() header comment This commit fixes a typo in the rcu_blocking_is_gp() function's header comment. Signed-off-by: Zhouyi Zhou Signed-off-by: Paul E. McKenney --- kernel/rcu/tree.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/kernel/rcu/tree.c b/kernel/rcu/tree.c index 3438534..0f278d6 100644 --- a/kernel/rcu/tree.c +++ b/kernel/rcu/tree.c @@ -3572,7 +3572,7 @@ void __init kfree_rcu_scheduler_running(void) * During early boot, any blocking grace-period wait automatically * implies a grace period. Later on, this is never the case for PREEMPTION. * - * Howevr, because a context switch is a grace period for !PREEMPTION, any + * However, because a context switch is a grace period for !PREEMPTION, any * blocking grace-period wait automatically implies a grace period if * there is only one CPU online at any point time during execution of * either synchronize_rcu() or synchronize_rcu_expedited(). It is OK to
[tip:perf/urgent] perf tools: Fixup mmap event consumption
Commit-ID: 8e50d384cc1d5afd2989cf0f7093756ed7164eb2 Gitweb: http://git.kernel.org/tip/8e50d384cc1d5afd2989cf0f7093756ed7164eb2 Author: Zhouyi Zhou AuthorDate: Thu, 24 Oct 2013 15:43:33 +0800 Committer: Arnaldo Carvalho de Melo CommitDate: Mon, 28 Oct 2013 16:06:00 -0300 perf tools: Fixup mmap event consumption The tail position of the event buffer should only be modified after actually use that event. If not the event buffer could be invalid before use, and segment fault occurs when invoking perf top -G. Signed-off-by: Zhouyi Zhou Cc: David Ahern Cc: Zhouyi Zhou Link: http://lkml.kernel.org/r/1382600613-32177-1-git-send-email-zhouzho...@gmail.com [ Simplified the logic using exit gotos and renamed write_tail method to mmap_consume ] Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/builtin-kvm.c | 7 +++ tools/perf/builtin-top.c | 10 ++ tools/perf/builtin-trace.c| 8 +--- tools/perf/tests/code-reading.c | 1 + tools/perf/tests/keep-tracking.c | 1 + tools/perf/tests/mmap-basic.c | 1 + tools/perf/tests/open-syscall-tp-fields.c | 4 +++- tools/perf/tests/perf-record.c| 2 ++ tools/perf/tests/perf-time-to-tsc.c | 4 +++- tools/perf/tests/sw-clock.c | 4 +++- tools/perf/tests/task-exit.c | 6 +++--- tools/perf/util/evlist.c | 13 ++--- tools/perf/util/evlist.h | 2 ++ tools/perf/util/python.c | 2 ++ 14 files changed, 49 insertions(+), 16 deletions(-) diff --git a/tools/perf/builtin-kvm.c b/tools/perf/builtin-kvm.c index 935d522..fbc2888 100644 --- a/tools/perf/builtin-kvm.c +++ b/tools/perf/builtin-kvm.c @@ -888,11 +888,18 @@ static s64 perf_kvm__mmap_read_idx(struct perf_kvm_stat *kvm, int idx, while ((event = perf_evlist__mmap_read(kvm->evlist, idx)) != NULL) { err = perf_evlist__parse_sample(kvm->evlist, event, &sample); if (err) { + perf_evlist__mmap_consume(kvm->evlist, idx); pr_err("Failed to parse sample\n"); return -1; } err = perf_session_queue_event(kvm->session, event, &sample, 0); + /* +* FIXME: Here we can't consume the event, as perf_session_queue_event will +*point to it, and it'll get possibly overwritten by the kernel. +*/ + perf_evlist__mmap_consume(kvm->evlist, idx); + if (err) { pr_err("Failed to enqueue sample: %d\n", err); return -1; diff --git a/tools/perf/builtin-top.c b/tools/perf/builtin-top.c index 0df298a..5a11f13 100644 --- a/tools/perf/builtin-top.c +++ b/tools/perf/builtin-top.c @@ -810,7 +810,7 @@ static void perf_top__mmap_read_idx(struct perf_top *top, int idx) ret = perf_evlist__parse_sample(top->evlist, event, &sample); if (ret) { pr_err("Can't parse sample, err = %d\n", ret); - continue; + goto next_event; } evsel = perf_evlist__id2evsel(session->evlist, sample.id); @@ -825,13 +825,13 @@ static void perf_top__mmap_read_idx(struct perf_top *top, int idx) case PERF_RECORD_MISC_USER: ++top->us_samples; if (top->hide_user_symbols) - continue; + goto next_event; machine = &session->machines.host; break; case PERF_RECORD_MISC_KERNEL: ++top->kernel_samples; if (top->hide_kernel_symbols) - continue; + goto next_event; machine = &session->machines.host; break; case PERF_RECORD_MISC_GUEST_KERNEL: @@ -847,7 +847,7 @@ static void perf_top__mmap_read_idx(struct perf_top *top, int idx) */ /* Fall thru */ default: - continue; + goto next_event; } @@ -859,6 +859,8 @@ static void perf_top__mmap_read_idx(struct perf_top *top, int idx) machine__process_event(machine, event); } else ++session->stats.nr_unknown_events; +next_event: + perf_evlist__mmap_consume(top->evlist, idx); } } diff --git a/tools/perf/builtin-trace.c b/tools/perf/builtin-trace.c index 71aa3e3..99c8d9a 100644 --- a/tools/perf/builtin-trace.c +++ b/tools/perf/builtin-tra
[tip:perf/core] perf/x86/amd: Try to fix some mem allocation failure handling
Commit-ID: 503d3291a937b726757c1f7c45fa02389d2f4324 Gitweb: http://git.kernel.org/tip/503d3291a937b726757c1f7c45fa02389d2f4324 Author: Zhouyi Zhou AuthorDate: Wed, 11 Jun 2014 12:09:03 +0800 Committer: Ingo Molnar CommitDate: Wed, 16 Jul 2014 13:31:06 +0200 perf/x86/amd: Try to fix some mem allocation failure handling According to Peter's advice, put the failure handling to a goto chain. Compiled in x86_64, could you check if there is anything that I missed. Signed-off-by: Zhouyi Zhou Signed-off-by: Peter Zijlstra Cc: Arnaldo Carvalho de Melo Cc: Linus Torvalds Link: http://lkml.kernel.org/r/1402459743-20513-1-git-send-email-zhouzho...@gmail.com Signed-off-by: Ingo Molnar --- arch/x86/kernel/cpu/perf_event_amd_uncore.c | 111 +--- 1 file changed, 84 insertions(+), 27 deletions(-) diff --git a/arch/x86/kernel/cpu/perf_event_amd_uncore.c b/arch/x86/kernel/cpu/perf_event_amd_uncore.c index 3bbdf4c..30790d7 100644 --- a/arch/x86/kernel/cpu/perf_event_amd_uncore.c +++ b/arch/x86/kernel/cpu/perf_event_amd_uncore.c @@ -294,31 +294,41 @@ static struct amd_uncore *amd_uncore_alloc(unsigned int cpu) cpu_to_node(cpu)); } -static void amd_uncore_cpu_up_prepare(unsigned int cpu) +static int amd_uncore_cpu_up_prepare(unsigned int cpu) { - struct amd_uncore *uncore; + struct amd_uncore *uncore_nb = NULL, *uncore_l2; if (amd_uncore_nb) { - uncore = amd_uncore_alloc(cpu); - uncore->cpu = cpu; - uncore->num_counters = NUM_COUNTERS_NB; - uncore->rdpmc_base = RDPMC_BASE_NB; - uncore->msr_base = MSR_F15H_NB_PERF_CTL; - uncore->active_mask = &amd_nb_active_mask; - uncore->pmu = &amd_nb_pmu; - *per_cpu_ptr(amd_uncore_nb, cpu) = uncore; + uncore_nb = amd_uncore_alloc(cpu); + if (!uncore_nb) + goto fail; + uncore_nb->cpu = cpu; + uncore_nb->num_counters = NUM_COUNTERS_NB; + uncore_nb->rdpmc_base = RDPMC_BASE_NB; + uncore_nb->msr_base = MSR_F15H_NB_PERF_CTL; + uncore_nb->active_mask = &amd_nb_active_mask; + uncore_nb->pmu = &amd_nb_pmu; + *per_cpu_ptr(amd_uncore_nb, cpu) = uncore_nb; } if (amd_uncore_l2) { - uncore = amd_uncore_alloc(cpu); - uncore->cpu = cpu; - uncore->num_counters = NUM_COUNTERS_L2; - uncore->rdpmc_base = RDPMC_BASE_L2; - uncore->msr_base = MSR_F16H_L2I_PERF_CTL; - uncore->active_mask = &amd_l2_active_mask; - uncore->pmu = &amd_l2_pmu; - *per_cpu_ptr(amd_uncore_l2, cpu) = uncore; + uncore_l2 = amd_uncore_alloc(cpu); + if (!uncore_l2) + goto fail; + uncore_l2->cpu = cpu; + uncore_l2->num_counters = NUM_COUNTERS_L2; + uncore_l2->rdpmc_base = RDPMC_BASE_L2; + uncore_l2->msr_base = MSR_F16H_L2I_PERF_CTL; + uncore_l2->active_mask = &amd_l2_active_mask; + uncore_l2->pmu = &amd_l2_pmu; + *per_cpu_ptr(amd_uncore_l2, cpu) = uncore_l2; } + + return 0; + +fail: + kfree(uncore_nb); + return -ENOMEM; } static struct amd_uncore * @@ -441,7 +451,7 @@ static void uncore_dead(unsigned int cpu, struct amd_uncore * __percpu *uncores) if (!--uncore->refcnt) kfree(uncore); - *per_cpu_ptr(amd_uncore_nb, cpu) = NULL; + *per_cpu_ptr(uncores, cpu) = NULL; } static void amd_uncore_cpu_dead(unsigned int cpu) @@ -461,7 +471,8 @@ amd_uncore_cpu_notifier(struct notifier_block *self, unsigned long action, switch (action & ~CPU_TASKS_FROZEN) { case CPU_UP_PREPARE: - amd_uncore_cpu_up_prepare(cpu); + if (amd_uncore_cpu_up_prepare(cpu)) + return notifier_from_errno(-ENOMEM); break; case CPU_STARTING: @@ -501,20 +512,33 @@ static void __init init_cpu_already_online(void *dummy) amd_uncore_cpu_online(cpu); } +static void cleanup_cpu_online(void *dummy) +{ + unsigned int cpu = smp_processor_id(); + + amd_uncore_cpu_dead(cpu); +} + static int __init amd_uncore_init(void) { - unsigned int cpu; + unsigned int cpu, cpu2; int ret = -ENODEV; if (boot_cpu_data.x86_vendor != X86_VENDOR_AMD) - return -ENODEV; + goto fail_nodev; if (!cpu_has_topoext) - return -ENODEV; + goto fail_nodev; if (cpu_has_perfctr_nb) { amd_uncore_nb = alloc_percpu(struct amd_uncore *); - perf_pmu_reg