[PATCH 3/3] audit: clean up refcounting in audit-tree
From: Miklos Szeredi Drop the initial reference by fsnotify_init_mark early instead of audit_tree_freeing_mark() at destroy time. In the cases we destroy the mark before we drop the initial reference we need to get rid of the get_mark that balances the put_mark in audit_tree_freeing_mark(). Signed-off-by: Miklos Szeredi --- kernel/audit_tree.c | 12 +--- 1 files changed, 9 insertions(+), 3 deletions(-) diff --git a/kernel/audit_tree.c b/kernel/audit_tree.c index 2b2..ed206fd 100644 --- a/kernel/audit_tree.c +++ b/kernel/audit_tree.c @@ -292,6 +292,7 @@ static void untag_chunk(struct node *p) spin_unlock(&hash_lock); spin_unlock(&entry->lock); fsnotify_destroy_mark(entry); + fsnotify_put_mark(&new->mark); /* drop initial reference */ goto out; Fallback: @@ -330,7 +331,6 @@ static int create_chunk(struct inode *inode, struct audit_tree *tree) spin_unlock(&hash_lock); chunk->dead = 1; spin_unlock(&entry->lock); - fsnotify_get_mark(entry); fsnotify_destroy_mark(entry); fsnotify_put_mark(entry); return 0; @@ -346,6 +346,7 @@ static int create_chunk(struct inode *inode, struct audit_tree *tree) insert_hash(chunk); spin_unlock(&hash_lock); spin_unlock(&entry->lock); + fsnotify_put_mark(entry); /* drop initial reference */ return 0; } @@ -411,7 +412,6 @@ static int tag_chunk(struct inode *inode, struct audit_tree *tree) spin_unlock(&chunk_entry->lock); spin_unlock(&old_entry->lock); - fsnotify_get_mark(chunk_entry); fsnotify_destroy_mark(chunk_entry); fsnotify_put_mark(chunk_entry); @@ -444,6 +444,7 @@ static int tag_chunk(struct inode *inode, struct audit_tree *tree) spin_unlock(&chunk_entry->lock); spin_unlock(&old_entry->lock); fsnotify_destroy_mark(old_entry); + fsnotify_put_mark(chunk_entry); /* drop initial reference */ fsnotify_put_mark(old_entry); /* pair to fsnotify_find mark_entry */ return 0; } @@ -915,7 +916,12 @@ static void audit_tree_freeing_mark(struct fsnotify_mark *entry, struct fsnotify struct audit_chunk *chunk = container_of(entry, struct audit_chunk, mark); evict_chunk(chunk); - fsnotify_put_mark(entry); + + /* +* We are guaranteed to have at least one reference to the mark from +* either the inode or the caller of fsnotify_destroy_mark(). +*/ + BUG_ON(atomic_read(&entry->refcnt) < 1); } static bool audit_tree_send_event(struct fsnotify_group *group, struct inode *inode, -- 1.7.7 -- To unsubscribe from this list: send the line "unsubscribe linux-kernel" in the body of a message to majord...@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html Please read the FAQ at http://www.tux.org/lkml/
[PATCH 1/3] audit: don't free_chunk() after fsnotify_add_mark()
From: Miklos Szeredi Don't do free_chunk() after fsnotify_add_mark(). That one does a delayed unref via the destroy list and this results in use-after-free. Signed-off-by: Miklos Szeredi Acked-by: Eric Paris CC: sta...@vger.kernel.org --- kernel/audit_tree.c |6 +++--- 1 files changed, 3 insertions(+), 3 deletions(-) diff --git a/kernel/audit_tree.c b/kernel/audit_tree.c index 3a5ca58..69a5851 100644 --- a/kernel/audit_tree.c +++ b/kernel/audit_tree.c @@ -259,7 +259,7 @@ static void untag_chunk(struct node *p) fsnotify_duplicate_mark(&new->mark, entry); if (fsnotify_add_mark(&new->mark, new->mark.group, new->mark.i.inode, NULL, 1)) { - free_chunk(new); + fsnotify_put_mark(&new->mark); goto Fallback; } @@ -322,7 +322,7 @@ static int create_chunk(struct inode *inode, struct audit_tree *tree) entry = &chunk->mark; if (fsnotify_add_mark(entry, audit_tree_group, inode, NULL, 0)) { - free_chunk(chunk); + fsnotify_put_mark(entry); return -ENOSPC; } @@ -396,7 +396,7 @@ static int tag_chunk(struct inode *inode, struct audit_tree *tree) fsnotify_duplicate_mark(chunk_entry, old_entry); if (fsnotify_add_mark(chunk_entry, chunk_entry->group, chunk_entry->i.inode, NULL, 1)) { spin_unlock(&old_entry->lock); - free_chunk(chunk); + fsnotify_put_mark(chunk_entry); fsnotify_put_mark(old_entry); return -ENOSPC; } -- 1.7.7 -- To unsubscribe from this list: send the line "unsubscribe linux-kernel" in the body of a message to majord...@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html Please read the FAQ at http://www.tux.org/lkml/
[PATCH 0/3] audit-tree fixes
Linus, The audit subsystem maintainers (Al and Eric) are not responding to repeated resends. Eric did ack them a while ago, but no response since then. So I'm sending these directly to you. Git tree is here: git://git.kernel.org/pub/scm/linux/kernel/git/mszeredi/vfs.git audit-fixes Thanks, Miklos --- Miklos Szeredi (3): audit: don't free_chunk() after fsnotify_add_mark() audit: fix refcounting in audit-tree audit: clean up refcounting in audit-tree --- kernel/audit_tree.c | 19 --- 1 files changed, 12 insertions(+), 7 deletions(-) -- To unsubscribe from this list: send the line "unsubscribe linux-kernel" in the body of a message to majord...@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html Please read the FAQ at http://www.tux.org/lkml/
Re: [PATCH v3] SUNRPC: protect service sockets lists during per-net shutdown
On Thu, Aug 16, 2012 at 03:29:03PM -0400, J. Bruce Fields wrote: > Looking back at this: > > - adding the sv_lock looks like the right thing to do anyway > independent of containers, because svc_age_temp_xprts may > still be running. This is what I've been testing with. Or alternatively if you'd rather strip out the other stuff from your patch I could take that instead. --b. commit 719f8bcc883e7992615f4d5625922e24995e2d98 Author: J. Bruce Fields Date: Mon Aug 13 17:03:00 2012 -0400 svcrpc: fix xpt_list traversal locking on shutdown Server threads are not running at this point, but svc_age_temp_xprts still may be, so we need this locking. Signed-off-by: J. Bruce Fields diff --git a/net/sunrpc/svc_xprt.c b/net/sunrpc/svc_xprt.c index bac973a..e1810b9 100644 --- a/net/sunrpc/svc_xprt.c +++ b/net/sunrpc/svc_xprt.c @@ -917,16 +917,18 @@ void svc_close_xprt(struct svc_xprt *xprt) } EXPORT_SYMBOL_GPL(svc_close_xprt); -static void svc_close_list(struct list_head *xprt_list, struct net *net) +static void svc_close_list(struct svc_serv *serv, struct list_head *xprt_list, struct net *net) { struct svc_xprt *xprt; + spin_lock(&serv->sv_lock); list_for_each_entry(xprt, xprt_list, xpt_list) { if (xprt->xpt_net != net) continue; set_bit(XPT_CLOSE, &xprt->xpt_flags); set_bit(XPT_BUSY, &xprt->xpt_flags); } + spin_unlock(&serv->sv_lock); } static void svc_clear_pools(struct svc_serv *serv, struct net *net) @@ -949,24 +951,28 @@ static void svc_clear_pools(struct svc_serv *serv, struct net *net) } } -static void svc_clear_list(struct list_head *xprt_list, struct net *net) +static void svc_clear_list(struct svc_serv *serv, struct list_head *xprt_list, struct net *net) { struct svc_xprt *xprt; struct svc_xprt *tmp; + LIST_HEAD(victims); + spin_lock(&serv->sv_lock); list_for_each_entry_safe(xprt, tmp, xprt_list, xpt_list) { if (xprt->xpt_net != net) continue; - svc_delete_xprt(xprt); + list_move(&xprt->xpt_list, &victims); } - list_for_each_entry(xprt, xprt_list, xpt_list) - BUG_ON(xprt->xpt_net == net); + spin_unlock(&serv->sv_lock); + + list_for_each_entry_safe(xprt, tmp, &victims, xpt_list) + svc_delete_xprt(xprt); } void svc_close_net(struct svc_serv *serv, struct net *net) { - svc_close_list(&serv->sv_tempsocks, net); - svc_close_list(&serv->sv_permsocks, net); + svc_close_list(serv, &serv->sv_tempsocks, net); + svc_close_list(serv, &serv->sv_permsocks, net); svc_clear_pools(serv, net); /* @@ -974,8 +980,8 @@ void svc_close_net(struct svc_serv *serv, struct net *net) * svc_xprt_enqueue will not add new entries without taking the * sp_lock and checking XPT_BUSY. */ - svc_clear_list(&serv->sv_tempsocks, net); - svc_clear_list(&serv->sv_permsocks, net); + svc_clear_list(serv, &serv->sv_tempsocks, net); + svc_clear_list(serv, &serv->sv_permsocks, net); } /* -- To unsubscribe from this list: send the line "unsubscribe linux-kernel" in the body of a message to majord...@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html Please read the FAQ at http://www.tux.org/lkml/
Re: [PATCH v8 1/5] mm: introduce a common interface for balloon pages mobility
On Tue, Aug 21, 2012 at 02:28:20PM -0300, Rafael Aquini wrote: > On Tue, Aug 21, 2012 at 09:24:32AM -0700, Paul E. McKenney wrote: > > On Tue, Aug 21, 2012 at 05:20:11PM +0200, Peter Zijlstra wrote: > > > On Tue, 2012-08-21 at 09:47 -0300, Rafael Aquini wrote: > > > > + mapping = rcu_access_pointer(page->mapping); > > > > + if (mapping) > > > > + mapping = mapping->assoc_mapping; > > > > > > The comment near rcu_access_pointer() explicitly says: > > > > > > * Return the value of the specified RCU-protected pointer, but omit the > > > * smp_read_barrier_depends() and keep the ACCESS_ONCE(). This is useful > > > * when the value of this pointer is accessed, but the pointer is not > > > * dereferenced, > > > > > > Yet you dereference the pointer... smells like fail to me. > > > > Indeed! > > > > This will break DEC Alpha. In addition, if ->mapping can transition > > from non-NULL to NULL, and if you used rcu_access_pointer() rather > > than rcu_dereference() to avoid lockdep-RCU from yelling at you about > > not either being in an RCU read-side critical section or holding an > > update-side lock, you can see failures as follows: > > > > 1. CPU 0 runs the above code, picks up mapping, and finds it non-NULL. > > > > 2. CPU 0 is preempted or otherwise delayed. (Keep in mind that > > even disabling interrupts in a guest OS does not prevent the > > host hypervisor from preempting!) > > > > 3. Some other CPU NULLs page->mapping. Because CPU 0 isn't doing > > anything to prevent it, this other CPU frees the memory. > > > > 4. CPU 0 resumes, and then accesses what is now the freelist. > > Arbitrarily bad things start happening. > > > > If you are in a read-side critical section, use rcu_dereference() instead > > of rcu_access_pointer(). If you are holding an update-side lock, use > > rcu_dereference_protected() and say what lock you are holding. If you > > are doing something else, please say what it is. > > > > Thanx, Paul > > > Paul & Peter, > > Thanks for looking into this stuff and providing me such valuable feedback, > and > RCU usage crashcourse. > > I believe rcu_dereference_protected() is what I want/need here, since this > code > is always called for pages which we hold locked (PG_locked bit). It would only help if we locked the page while updating the mapping, as far as I can see we don't. > So, it brings me > to ask you if the following usage looks sane enough to fix the well pointed > issue, > or if it's another misuse of RCU API: > > + mapping = rcu_dereference_protecetd(page->mapping, PageLocked(page)); > + if (mapping) > + mapping = mapping->assoc_mapping; > -- To unsubscribe from this list: send the line "unsubscribe linux-kernel" in the body of a message to majord...@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html Please read the FAQ at http://www.tux.org/lkml/
Re: Using random in interrupts for RT
On Tue, Aug 21, 2012 at 02:12:13PM -0400, Steven Rostedt wrote: > On Tue, 2012-08-14 at 10:30 -0400, Steven Rostedt wrote: > > Thomas, > > > > Ben Hutchings asked me if we still need "genirq: Disable random call on > > preempt-rt" for -rt? With commit 902c098a366 "random: use lockless > > techniques in the interrupt path" there is no more locks used. But does > > it still produce high latencies? > > Ben, > > Looks that the #ifndef can't be removed just yet. The code is not > totally lockless... > > > add_interrupt_randomness -> > credit_entropy_bits -> > kill_fasync -> > kill_fasync_rcu -> > spin_lock_irqsave(&fa->fa_lock, flags) > > to make things worse, that spinlock turns into a mutex on -rt, so it may > crash the box if triggered. Thanks for checking this. Ben. -- Ben Hutchings We get into the habit of living before acquiring the habit of thinking. - Albert Camus -- To unsubscribe from this list: send the line "unsubscribe linux-kernel" in the body of a message to majord...@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html Please read the FAQ at http://www.tux.org/lkml/
Re: Q:pt_base in COMPAT mode offset by two pages. Was:Re: [Xen-devel] [PATCH 02/11] xen/x86: Use memblock_reserve for sensitive areas.
On Tue, Aug 21, 2012 at 01:27:32PM -0400, Konrad Rzeszutek Wilk wrote: > On Mon, Aug 20, 2012 at 10:13:05AM -0400, Konrad Rzeszutek Wilk wrote: > > On Fri, Aug 17, 2012 at 06:35:12PM +0100, Stefano Stabellini wrote: > > > On Thu, 16 Aug 2012, Konrad Rzeszutek Wilk wrote: > > > > instead of a big memblock_reserve. This way we can be more > > > > selective in freeing regions (and it also makes it easier > > > > to understand where is what). > > > > > > > > [v1: Move the auto_translate_physmap to proper line] > > > > [v2: Per Stefano suggestion add more comments] > > > > Signed-off-by: Konrad Rzeszutek Wilk > > > > > > much better now! > > > > Thought interestingly enough it breaks 32-bit dom0s (and only dom0s). > > Will have a revised patch posted shortly. > > Jan, I thought something odd. Part of this code replaces this: > > memblock_reserve(__pa(xen_start_info->mfn_list), > xen_start_info->pt_base - xen_start_info->mfn_list); > > with a more region-by-region area. What I found out that if I boot this > as 32-bit guest with a 64-bit hypervisor the xen_start_info->pt_base is > actually wrong. > > Specifically this is what bootup says: > > (good working case - 32bit hypervisor with 32-bit dom0): > (XEN) Loaded kernel: c100->c1a23000 > (XEN) Init. ramdisk: c1a23000->cf730e00 > (XEN) Phys-Mach map: cf731000->cf831000 > (XEN) Start info:cf831000->cf83147c > (XEN) Page tables: cf832000->cf8b5000 > (XEN) Boot stack:cf8b5000->cf8b6000 > (XEN) TOTAL: c000->cfc0 > > [0.00] PT: cf832000 (f832000) > [0.00] Reserving PT: f832000->f8b5000 > > And with a 64-bit hypervisor: > > XEN) VIRTUAL MEMORY ARRANGEMENT: > (XEN) Loaded kernel: c100->c1a23000 > (XEN) Init. ramdisk: c1a23000->cf730e00 > (XEN) Phys-Mach map: cf731000->cf831000 > (XEN) Start info:cf831000->cf8314b4 > (XEN) Page tables: cf832000->cf8b6000 > (XEN) Boot stack:cf8b6000->cf8b7000 > (XEN) TOTAL: c000->cfc0 > (XEN) ENTRY ADDRESS: c16bb22c > > [0.00] PT: cf834000 (f834000) > [0.00] Reserving PT: f834000->f8b8000 > > So the pt_base is offset by two pages. And looking at c/s 13257 > its not clear to me why this two page offset was added? > > The toolstack works fine - so launching 32-bit guests either > under a 32-bit hypervisor or 64-bit works fine: > ] domainbuilder: detail: xc_dom_alloc_segment: page tables : 0xcf805000 -> > 0xcf885000 (pfn 0xf805 + 0x80 pages) > [0.00] PT: cf805000 (f805000) > And this patch on top of the others fixes this.. >From 806c312e50f122c47913145cf884f53dd09d9199 Mon Sep 17 00:00:00 2001 From: Konrad Rzeszutek Wilk Date: Tue, 21 Aug 2012 14:31:24 -0400 Subject: [PATCH] xen/x86: Workaround 64-bit hypervisor and 32-bit initial domain. If a 64-bit hypervisor is booted with a 32-bit initial domain, the hypervisor deals with the initial domain as "compat" and does some extra adjustments (like pagetables are 4 bytes instead of 8). It also adjusts the xen_start_info->pt_base incorrectly. When booted with a 32-bit hypervisor (32-bit initial domain): .. (XEN) Start info:cf831000->cf83147c (XEN) Page tables: cf832000->cf8b5000 .. [0.00] PT: cf832000 (f832000) [0.00] Reserving PT: f832000->f8b5000 And with a 64-bit hypervisor: (XEN) Start info:cf831000->cf8314b4 (XEN) Page tables: cf832000->cf8b6000 [0.00] PT: cf834000 (f834000) [0.00] Reserving PT: f834000->f8b8000 To deal with this, we keep keep track of the highest physical address we have reserved via memblock_reserve. If that address does not overlap with pt_base, we have a gap which we reserve. Signed-off-by: Konrad Rzeszutek Wilk --- arch/x86/xen/enlighten.c | 30 +- 1 files changed, 21 insertions(+), 9 deletions(-) diff --git a/arch/x86/xen/enlighten.c b/arch/x86/xen/enlighten.c index e532eb5..511f92d 100644 --- a/arch/x86/xen/enlighten.c +++ b/arch/x86/xen/enlighten.c @@ -1002,19 +1002,24 @@ static int xen_write_msr_safe(unsigned int msr, unsigned low, unsigned high) * If the MFN is not in the m2p (provided to us by the hypervisor) this * function won't do anything. In practice this means that the XenBus * MFN won't be available for the initial domain. */ -static void __init xen_reserve_mfn(unsigned long mfn) +static unsigned long __init xen_reserve_mfn(unsigned long mfn) { - unsigned long pfn; + unsigned long pfn, end_pfn = 0; if (!mfn) - return; + return end_pfn; + pfn = mfn_to_pfn(mfn); - if (phys_to_machine_mapping_valid(pfn)) - memblock_reserve(PFN_PHYS(pfn), PAGE_SIZE); + if (phys_to_machine_mapping_valid(pfn)) { + end_pfn = PFN_PHYS(pfn) + PAGE_SIZE; + memblock_reserve(P
Re: [PATCH v8 1/5] mm: introduce a common interface for balloon pages mobility
On Tue, Aug 21, 2012 at 02:55:03PM -0300, Rafael Aquini wrote: > On Tue, Aug 21, 2012 at 04:52:23PM +0300, Michael S. Tsirkin wrote: > > > + * address_space_operations utilized methods for ballooned pages: > > > + * .migratepage- used to perform balloon's page migration (as is) > > > + * .launder_page - used to isolate a page from balloon's page list > > > + * .freepage - used to reinsert an isolated page to balloon's > > > page list > > > + */ > > > > It would be a good idea to document the assumptions here. > > Looks like .launder_page and .freepage are called in rcu critical > > section. > > But migratepage isn't - why is that safe? > > > > The migratepage callback for virtio_balloon can sleep, and IIUC we cannot > sleep > within a RCU critical section. > > Also, The migratepage callback is called at inner migration's circle function > move_to_new_page(), and I don't think embedding it in a RCU critical section > would be a good idea, for the same understanding aforementioned. Yes but this means it is still exposed to the module unloading races that RCU was supposed to fix. So need to either rework that code so it won't sleep or switch to some other synchronization. -- MST -- To unsubscribe from this list: send the line "unsubscribe linux-kernel" in the body of a message to majord...@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html Please read the FAQ at http://www.tux.org/lkml/
Re: [PATCH v2 28/31] arm64: Generic timers support
On 08/14/2012 01:52 PM, Catalin Marinas wrote: > From: Marc Zyngier > > This patch adds support for the ARM generic timers with A64 instructions > for accessing the timer registers. It uses the physical counter as the > clock source and the virtual counter as sched_clock. > > The timer frequency can be specified via DT or read from the CNTFRQ_EL0 > register. The physical counter is also accessible from user space > allowing fast gettimeofday() implementation. [...] > +++ b/drivers/clocksource/arm_generic.c [...] > +static void arch_timer_reg_write(int reg, u32 val) > +{ > + switch (reg) { > + case ARCH_TIMER_REG_CTRL: > + asm volatile("msr cntp_ctl_el0, %0" : : "r" (val)); > + break; > + case ARCH_TIMER_REG_TVAL: > + asm volatile("msr cntp_tval_el0, %0" : : "r" (val)); > + break; > + default: > + BUG(); > + } > + > + isb(); > +} Doesn't architecture-specific assembly need to go in the arch directory rather than the drivers directory? Christopher -- Employee of Qualcomm Innovation Center, Inc. Qualcomm Innovation Center, Inc. is a member of Code Aurora Forum -- To unsubscribe from this list: send the line "unsubscribe linux-kernel" in the body of a message to majord...@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html Please read the FAQ at http://www.tux.org/lkml/
Re: lockdep warning on rt_mutex_lock()
On Sat, Aug 18, 2012 at 12:59:08PM +0800, Fengguang Wu wrote: > On Fri, Aug 17, 2012 at 07:44:37AM -0700, Paul E. McKenney wrote: > > On Fri, Aug 17, 2012 at 10:02:40PM +0800, Fengguang Wu wrote: > > > On Fri, Aug 17, 2012 at 06:43:28AM -0700, Paul E. McKenney wrote: > > > > On Fri, Aug 17, 2012 at 06:06:35PM +0800, Fengguang Wu wrote: > > > > > Greetings, > > > > > > > > > > FYI, a lockdep warning: > > > > > > > > Certainly looks problematic! > > > > > > > > Any hint as to what version of the kernel produced this splat? > > > > (Yes, lazy of me to ask, I know, but I am not seeing it in my testing.) > > > > > > It happens on both 3.5.0 and 3.6-rc1. Will bisect (try older kernels) > > > help? > > > Bisect is handy for me :) > > > > Bisection would be very welcome!!! ;-) > > The bisect result is... Hmmm... This patch is a bit of a blast from the past. > commit 9e571a82f0cb205a65a0ea41657f19f22b7fabb8 > Author: Paul E. McKenney > Date: Thu Sep 30 21:26:52 2010 -0700 > > rcu: add tracing for TINY_RCU and TINY_PREEMPT_RCU > > Add tracing for the tiny RCU implementations, including statistics on > boosting in the case of TINY_PREEMPT_RCU and RCU_BOOST. > > Signed-off-by: Paul E. McKenney > Signed-off-by: Paul E. McKenney So the lockdep complaint indicates that lockdep and the actual hardware had different opinions about whether or not interrupts were enabled. One way that can happen is through use of raw_local_irq_save(). And this commit did add a raw_local_irq_save(). So maybe converting to local_irq_save() will make things work better. Fengguang, could you please try out the following patch? Thanx, Paul rcu: Move TINY_PREEMPT_RCU away from raw_local_irq_save() The use of raw_local_irq_save() is unnecessary, given that local_irq_save() really does disable interrupts. Also, it appears to interfere with lockdep. Therefore, this commit moves to local_irq_save(). Reported-by: Fengguang Wu Signed-off-by: Paul E. McKenney Signed-off-by: Paul E. McKenney diff --git a/kernel/rcutiny_plugin.h b/kernel/rcutiny_plugin.h index 918fd1e..3d01902 100644 --- a/kernel/rcutiny_plugin.h +++ b/kernel/rcutiny_plugin.h @@ -278,7 +278,7 @@ static int rcu_boost(void) rcu_preempt_ctrlblk.exp_tasks == NULL) return 0; /* Nothing to boost. */ - raw_local_irq_save(flags); + local_irq_save(flags); /* * Recheck with irqs disabled: all tasks in need of boosting @@ -287,7 +287,7 @@ static int rcu_boost(void) */ if (rcu_preempt_ctrlblk.boost_tasks == NULL && rcu_preempt_ctrlblk.exp_tasks == NULL) { - raw_local_irq_restore(flags); + local_irq_restore(flags); return 0; } @@ -317,7 +317,7 @@ static int rcu_boost(void) t = container_of(tb, struct task_struct, rcu_node_entry); rt_mutex_init_proxy_locked(&mtx, t); t->rcu_boost_mutex = &mtx; - raw_local_irq_restore(flags); + local_irq_restore(flags); rt_mutex_lock(&mtx); rt_mutex_unlock(&mtx); /* Keep lockdep happy. */ @@ -991,9 +991,9 @@ static void rcu_trace_sub_qlen(struct rcu_ctrlblk *rcp, int n) { unsigned long flags; - raw_local_irq_save(flags); + local_irq_save(flags); rcp->qlen -= n; - raw_local_irq_restore(flags); + local_irq_restore(flags); } /* -- To unsubscribe from this list: send the line "unsubscribe linux-kernel" in the body of a message to majord...@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html Please read the FAQ at http://www.tux.org/lkml/
Re: [PATCH v8 1/5] mm: introduce a common interface for balloon pages mobility
On Tue, Aug 21, 2012 at 10:13:30PM +0300, Michael S. Tsirkin wrote: > > > > I believe rcu_dereference_protected() is what I want/need here, since this > > code > > is always called for pages which we hold locked (PG_locked bit). > > It would only help if we locked the page while updating the mapping, > as far as I can see we don't. > But we can do it. In fact, by doing it (locking the page) we can easily avoid the nasty race balloon_isolate_page / leak_balloon, in a much simpler way, IMHO. -- To unsubscribe from this list: send the line "unsubscribe linux-kernel" in the body of a message to majord...@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html Please read the FAQ at http://www.tux.org/lkml/
RE: [RFC][PATCH v3 2/3] efi_pstore: Introducing workqueue updating sysfs entries
> > efivars_exit(void) > > { > > if (efi_enabled) { > > + cancel_work_sync(&efivar_work); > > Please move this cancel_work_sync() to be before the efi_enabled test. > efi_enabled here means that we registered __efivars. There may be another > driver (gsmi) using the efivars code, so we should > always be cancelling this work. > OK. I will fix it. I confirmed that gsmi accually called register_efivars(). Thank you for letting me know about this. Seiji -- To unsubscribe from this list: send the line "unsubscribe linux-kernel" in the body of a message to majord...@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html Please read the FAQ at http://www.tux.org/lkml/
Re: [ 00/46] 3.5.3-stable review
I would like to see commit 2064db725cc6d4ea19a24c138bc37939b63e3ae6 (drm/nv86/fifo: suspend fix) cherry-picked to the 3.5 kernel series, it makes suspend work again on my machine. Could you please queue this up for 3.5.4? Apologies for thread hijacking, my attempts to directly send the request to sta...@vger.kernel.org failed (mail seems to have disappeared in the bit bucket). :-( Cheers, Sven -- To unsubscribe from this list: send the line "unsubscribe linux-kernel" in the body of a message to majord...@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html Please read the FAQ at http://www.tux.org/lkml/
Re: [tip:timers/core] timer: Implement TIMER_IRQSAFE
On Tue, Aug 21, 2012 at 09:43:07AM -0700, tip-bot for Tejun Heo wrote: > Commit-ID: c5f66e99b7cb091e3d51ae8e8156892e8feb7fa3 > Gitweb: http://git.kernel.org/tip/c5f66e99b7cb091e3d51ae8e8156892e8feb7fa3 > Author: Tejun Heo > AuthorDate: Wed, 8 Aug 2012 11:10:28 -0700 > Committer: Thomas Gleixner > CommitDate: Tue, 21 Aug 2012 16:28:31 +0200 > > timer: Implement TIMER_IRQSAFE > > Timer internals are protected with irq-safe locks but timer execution > isn't, so a timer being dequeued for execution and its execution > aren't atomic against IRQs. This makes it impossible to wait for its > completion from IRQ handlers and difficult to shoot down a timer from > IRQ handlers. > > This issue caused some issues for delayed_work interface. Because > there's no way to reliably shoot down delayed_work->timer from IRQ > handlers, __cancel_delayed_work() can't share the logic to steal the > target delayed_work with cancel_delayed_work_sync(), and can only > steal delayed_works which are on queued on timer. Similarly, the > pending mod_delayed_work() can't be used from IRQ handlers. > > This patch adds a new timer flag TIMER_IRQSAFE, which makes the timer > to be executed without enabling IRQ after dequeueing such that its > dequeueing and execution are atomic against IRQ handlers. > > This makes it safe to wait for the timer's completion from IRQ > handlers, for example, using del_timer_sync(). It can never be > executing on the local CPU and if executing on other CPUs it won't be > interrupted until done. > > This will enable simplifying delayed_work cancel/mod interface. > > Signed-off-by: Tejun Heo > Cc: torva...@linux-foundation.org > Cc: pet...@infradead.org > Link: http://lkml.kernel.org/r/139428-24962-5-git-send-email...@kernel.org > Signed-off-by: Thomas Gleixner Will pull into wq/for-3.7 and put delay_work changes on top. If there's any objection, please scream. Thanks a lot. -- tejun -- To unsubscribe from this list: send the line "unsubscribe linux-kernel" in the body of a message to majord...@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html Please read the FAQ at http://www.tux.org/lkml/
Re: [PATCH v8 1/5] mm: introduce a common interface for balloon pages mobility
On Tue, Aug 21, 2012 at 02:42:52PM -0300, Rafael Aquini wrote: > On Tue, Aug 21, 2012 at 06:41:42PM +0300, Michael S. Tsirkin wrote: > > On Tue, Aug 21, 2012 at 05:16:06PM +0200, Peter Zijlstra wrote: > > > On Tue, 2012-08-21 at 16:52 +0300, Michael S. Tsirkin wrote: > > > > > + rcu_read_lock(); > > > > > + mapping = rcu_dereference(page->mapping); > > > > > + if (mapping_balloon(mapping)) > > > > > + ret = true; > > > > > + rcu_read_unlock(); > > > > > > > > This looks suspicious: you drop rcu_read_unlock > > > > so can't page switch from balloon to non balloon? > > > > > > RCU read lock is a non-exclusive lock, it cannot avoid anything like > > > that. > > > > You are right, of course. So even keeping rcu_read_lock across both test > > and operation won't be enough - you need to make this function return > > the mapping and pass it to isolate_page/putback_page so that it is only > > dereferenced once. > > > No, I need to dereference page->mapping to check ->mapping flags here, before > returning. Remember this function is used at MM's compaction/migration inner > circles to identify ballooned pages and decide what's the next step. This > function is doing the right thing, IMHO. Yes but the calling code is not doing the right thing. What Peter pointed out here is that two calls to rcu dereference pointer can return different values: rcu critical section is not a lock. So the test for balloon page is not effective: it can change after the fact. To fix, get the pointer once and then pass the mapping around. > Also, looking at how compaction/migration work, we verify the only critical > path > for this function is the page isolation step. The other steps (migration and > putback) perform their work on private lists previouly isolated from a given > source. I vaguely understand but it would be nice to document this properly. The interaction between page->lru handling in balloon and in mm is especially confusing. > So, we just need to make sure that the isolation part does not screw things up > by isolating pages that balloon driver is about to release. That's why there > are > so many checkpoints down the page isolation path assuring we really are > isolating a balloon page. Well, testing same thing multiple times is just confusing. It is very hard to make sure there are no races with so much complexity, and the requirements from the balloon driver are unclear to me - it very much looks like it is poking in mm internals. -- MST -- To unsubscribe from this list: send the line "unsubscribe linux-kernel" in the body of a message to majord...@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html Please read the FAQ at http://www.tux.org/lkml/
[PATCH v9 0/2] kvm: level irqfd support
Here's the much anticipated re-write of support for level irqfds. As Michael suggested, I've rolled the eoi/ack notification fd into KVM_IRQFD as a new mode. For lack of a better name, as there seems to be objections to associating this specifically with an EOI or an ACK, I've name this OADN or "On Ack, De-assert & Notify". Patch 1of2 switches current KVM_IRQFDs to use their own IRQ source ID since we're potentially stepping on KVM_USERSPACE_IRQ_SOURCE_ID. Unfurtunately I was not able to make 2of2 use a single IRQ source ID, the reason is it's racy. Objects to track OADNs are made dynamically, we look through existing ones for a match under spinlock and setup a new one if there's no match. On teardown, we can remove the OADN from the list under lock, but that same lock prevents us from de-assigning the IRQ ACK notifier or waiting for an RCU grace period. We must make sure that any unused GSI is de-asserted, but the above means it's possible that another OADN has been created for this source ID/GSI and de-asserting the GSI could lead to breakage. Instead each OADN object gets it's own source ID, but these are all shared by users of the same GSI. So for PCI devices, we might have up to 4 IRQ source IDs allocated. Michael had also suggested avoiding reference counting and using list_empty for this OADN object. Unfortunately, that doesn't work for similar reasons. We want to release the OADN object underlock, preventing others from re-using it on the free path, but in order to have lock-less de-assert & notify we use RCU, meaning we can't trust list_empty until after an RCU grace period, which must be done outside of spinlocks. If there are suggestions how we can handle these better, please make them, but I think this compromise is race-free and still manages to make allocation of IRQ source IDs mostly a non-issue for device assignment limits. Thanks, Alex --- Alex Williamson (2): kvm: On Ack, De-assert & Notify KVM_IRQFD extension kvm: Use a reserved IRQ source ID for irqfd Documentation/virtual/kvm/api.txt | 13 ++ arch/x86/kvm/x86.c|4 + include/linux/kvm.h |7 + include/linux/kvm_host.h |2 virt/kvm/eventfd.c| 199 - 5 files changed, 218 insertions(+), 7 deletions(-) -- To unsubscribe from this list: send the line "unsubscribe linux-kernel" in the body of a message to majord...@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html Please read the FAQ at http://www.tux.org/lkml/
[PATCH v9 1/2] kvm: Use a reserved IRQ source ID for irqfd
KVM_IRQFD currently uses the reserved KVM_USERSPACE_IRQ_SOURCE_ID which is also shared with userspace injection methods like KVM_IRQ_LINE. This can cause a conflict if an irqfd triggers on a GSI asserted through KVM_IRQ_LINE. Move irqfd to it's own reserved IRQ source ID. Add a capability for userspace to test for this fix. Signed-off-by: Alex Williamson --- arch/x86/kvm/x86.c |3 +++ include/linux/kvm.h |1 + include/linux/kvm_host.h |1 + virt/kvm/eventfd.c |6 +++--- 4 files changed, 8 insertions(+), 3 deletions(-) diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c index 42bce48..cd98673 100644 --- a/arch/x86/kvm/x86.c +++ b/arch/x86/kvm/x86.c @@ -2174,6 +2174,7 @@ int kvm_dev_ioctl_check_extension(long ext) case KVM_CAP_GET_TSC_KHZ: case KVM_CAP_PCI_2_3: case KVM_CAP_KVMCLOCK_CTRL: + case KVM_CAP_IRQFD_IRQ_SOURCE_ID: r = 1; break; case KVM_CAP_COALESCED_MMIO: @@ -6258,6 +6259,8 @@ int kvm_arch_init_vm(struct kvm *kvm, unsigned long type) /* Reserve bit 0 of irq_sources_bitmap for userspace irq source */ set_bit(KVM_USERSPACE_IRQ_SOURCE_ID, &kvm->arch.irq_sources_bitmap); + /* Reserve bit 1 of irq_sources_bitmap for irqfd irq source */ + set_bit(KVM_IRQFD_IRQ_SOURCE_ID, &kvm->arch.irq_sources_bitmap); raw_spin_lock_init(&kvm->arch.tsc_write_lock); diff --git a/include/linux/kvm.h b/include/linux/kvm.h index 2ce09aa..ae66b9c 100644 --- a/include/linux/kvm.h +++ b/include/linux/kvm.h @@ -618,6 +618,7 @@ struct kvm_ppc_smmu_info { #define KVM_CAP_PPC_GET_SMMU_INFO 78 #define KVM_CAP_S390_COW 79 #define KVM_CAP_PPC_ALLOC_HTAB 80 +#define KVM_CAP_IRQFD_IRQ_SOURCE_ID 81 #ifdef KVM_CAP_IRQ_ROUTING diff --git a/include/linux/kvm_host.h b/include/linux/kvm_host.h index b70b48b..b763230 100644 --- a/include/linux/kvm_host.h +++ b/include/linux/kvm_host.h @@ -71,6 +71,7 @@ #define KVM_REQ_PMI 17 #define KVM_USERSPACE_IRQ_SOURCE_ID0 +#define KVM_IRQFD_IRQ_SOURCE_ID1 struct kvm; struct kvm_vcpu; diff --git a/virt/kvm/eventfd.c b/virt/kvm/eventfd.c index 7d7e2aa..2245cfa 100644 --- a/virt/kvm/eventfd.c +++ b/virt/kvm/eventfd.c @@ -67,8 +67,8 @@ irqfd_inject(struct work_struct *work) struct _irqfd *irqfd = container_of(work, struct _irqfd, inject); struct kvm *kvm = irqfd->kvm; - kvm_set_irq(kvm, KVM_USERSPACE_IRQ_SOURCE_ID, irqfd->gsi, 1); - kvm_set_irq(kvm, KVM_USERSPACE_IRQ_SOURCE_ID, irqfd->gsi, 0); + kvm_set_irq(kvm, KVM_IRQFD_IRQ_SOURCE_ID, irqfd->gsi, 1); + kvm_set_irq(kvm, KVM_IRQFD_IRQ_SOURCE_ID, irqfd->gsi, 0); } /* @@ -138,7 +138,7 @@ irqfd_wakeup(wait_queue_t *wait, unsigned mode, int sync, void *key) irq = rcu_dereference(irqfd->irq_entry); /* An event has been signaled, inject an interrupt */ if (irq) - kvm_set_msi(irq, kvm, KVM_USERSPACE_IRQ_SOURCE_ID, 1); + kvm_set_msi(irq, kvm, KVM_IRQFD_IRQ_SOURCE_ID, 1); else schedule_work(&irqfd->inject); rcu_read_unlock(); -- To unsubscribe from this list: send the line "unsubscribe linux-kernel" in the body of a message to majord...@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html Please read the FAQ at http://www.tux.org/lkml/
[PATCH v9 2/2] kvm: On Ack, De-assert & Notify KVM_IRQFD extension
For VFIO based device assignment we'd like a mechanism to allow level triggered interrutps to be directly injected into KVM. KVM_IRQFD already allows this for edge triggered interrupts, but for level, we need to watch for acknowledgement of the interrupt from the guest to provide us a hint when to test the device and allow it to re-assert if necessary. To do this, we create a new KVM_IRQFD mode called "On Ack, De-assert & Notify", or OADN. In this mode, an interrupt injection provides only a gsi assertion. We then hook into the IRQ ACK notifier, which when triggered de-asserts the gsi and notifies via another eventfd. It's then the responsibility of the user to re-assert the interrupt is service is still required. Signed-off-by: Alex Williamson --- Documentation/virtual/kvm/api.txt | 13 ++ arch/x86/kvm/x86.c|1 include/linux/kvm.h |6 + include/linux/kvm_host.h |1 virt/kvm/eventfd.c| 193 - 5 files changed, 210 insertions(+), 4 deletions(-) diff --git a/Documentation/virtual/kvm/api.txt b/Documentation/virtual/kvm/api.txt index bf33aaa..87d7321 100644 --- a/Documentation/virtual/kvm/api.txt +++ b/Documentation/virtual/kvm/api.txt @@ -1946,6 +1946,19 @@ the guest using the specified gsi pin. The irqfd is removed using the KVM_IRQFD_FLAG_DEASSIGN flag, specifying both kvm_irqfd.fd and kvm_irqfd.gsi. +With KVM_CAP_IRQFD_OADN, KVM_IRQFD supports an "On Ack, De-assert & +Notify" option that allows emulation of level-triggered interrupts. +When kvm_irqfd.fd is triggered, the requested gsi is asserted and +remains asserted until interaction with the irqchip indicates the +VM has acknowledged the interrupt, such as an EOI. On acknoledgement +the gsi is automatically de-asserted and the user is notified via +kvm_irqfd.notifyfd. The user is then required to re-assert the +interrupt if the associated device still requires service. To enable +this mode, configure the KVM_IRQFD using the KVM_IRQFD_FLAG_OADN flag +and specify kvm_irqfd.notifyfd. Note that closing kvm_irqfd.notifyfd +while configured in this mode does not disable the irqfd. The +KVM_IRQFD_FLAG_OADN flag is only necessary on assignment. + 4.76 KVM_PPC_ALLOCATE_HTAB Capability: KVM_CAP_PPC_ALLOC_HTAB diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c index cd98673..fde7b66 100644 --- a/arch/x86/kvm/x86.c +++ b/arch/x86/kvm/x86.c @@ -2175,6 +2175,7 @@ int kvm_dev_ioctl_check_extension(long ext) case KVM_CAP_PCI_2_3: case KVM_CAP_KVMCLOCK_CTRL: case KVM_CAP_IRQFD_IRQ_SOURCE_ID: + case KVM_CAP_IRQFD_OADN: r = 1; break; case KVM_CAP_COALESCED_MMIO: diff --git a/include/linux/kvm.h b/include/linux/kvm.h index ae66b9c..ec0f1d8 100644 --- a/include/linux/kvm.h +++ b/include/linux/kvm.h @@ -619,6 +619,7 @@ struct kvm_ppc_smmu_info { #define KVM_CAP_S390_COW 79 #define KVM_CAP_PPC_ALLOC_HTAB 80 #define KVM_CAP_IRQFD_IRQ_SOURCE_ID 81 +#define KVM_CAP_IRQFD_OADN 82 #ifdef KVM_CAP_IRQ_ROUTING @@ -684,12 +685,15 @@ struct kvm_xen_hvm_config { #endif #define KVM_IRQFD_FLAG_DEASSIGN (1 << 0) +/* Availabie with KVM_CAP_IRQFD_OADN */ +#define KVM_IRQFD_FLAG_OADN (1 << 1) struct kvm_irqfd { __u32 fd; __u32 gsi; __u32 flags; - __u8 pad[20]; + __u32 notifyfd; + __u8 pad[16]; }; struct kvm_clock_data { diff --git a/include/linux/kvm_host.h b/include/linux/kvm_host.h index b763230..d502d08 100644 --- a/include/linux/kvm_host.h +++ b/include/linux/kvm_host.h @@ -284,6 +284,7 @@ struct kvm { struct { spinlock_tlock; struct list_head items; + struct list_head oadns; } irqfds; struct list_head ioeventfds; #endif diff --git a/virt/kvm/eventfd.c b/virt/kvm/eventfd.c index 2245cfa..dfdb5b2 100644 --- a/virt/kvm/eventfd.c +++ b/virt/kvm/eventfd.c @@ -43,6 +43,23 @@ * */ +/* + * OADN irqfds (On Ack, De-assert & Notify) are a special variety of + * irqfds that assert an interrupt to the irqchip on eventfd trigger, + * receieve notification when userspace acknowledges the interrupt, + * automatically de-asserts the irqchip level, and notifies userspace + * via the oadn_eventfd. This object helps to provide one-to-many + * deassert-to-notify so we can share a single irq source ID per OADN. + */ +struct _irqfd_oadn { + struct kvm *kvm; + int irq_source_id; /* IRQ source ID shared by these irqfds */ + struct list_head irqfds; /* list of irqfds using this object */ + struct kvm_irq_ack_notifier notifier; /* IRQ ACK notification */ + struct kref kref; /* Race-free removal */ + struct list_head list; +}; + struct _irqfd { /* Used for MSI fast-path */ struct kvm *kvm; @@ -52,6 +69,10 @@ struct _irqfd { /* Used for l
Re: [PATCH v8 1/5] mm: introduce a common interface for balloon pages mobility
On Tue, Aug 21, 2012 at 04:23:58PM -0300, Rafael Aquini wrote: > On Tue, Aug 21, 2012 at 10:13:30PM +0300, Michael S. Tsirkin wrote: > > > > > > I believe rcu_dereference_protected() is what I want/need here, since > > > this code > > > is always called for pages which we hold locked (PG_locked bit). > > > > It would only help if we locked the page while updating the mapping, > > as far as I can see we don't. > > > > But we can do it. In fact, by doing it (locking the page) we can easily avoid > the nasty race balloon_isolate_page / leak_balloon, in a much simpler way, > IMHO. Absolutely. Further, we should look hard at whether most RCU uses in this patchset can be replaced with page lock. -- MST -- To unsubscribe from this list: send the line "unsubscribe linux-kernel" in the body of a message to majord...@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html Please read the FAQ at http://www.tux.org/lkml/
Re: [PATCH 2/2] spi: Add SPI master controller for OCTEON SOCs.
On 05/19/2012 10:46 PM, Grant Likely wrote: On Fri, 11 May 2012 14:34:46 -0700, David Daney wrote: From: David Daney Add the driver, link it into the kbuild system and provide device tree binding documentation. Signed-off-by: David Daney Some comments below, but you can add my a-b: Acked-by: Grant Likely [...] + p->register_base = (u64)devm_ioremap(&pdev->dev, res_mem->start, +resource_size(res_mem)); Nasty cast. p->register_base needs to be an __iomem pointer variable. No, it is only ever used as an argument to cvmx_{read,write}_csr(), which want the u64 type. The fact taht cvmx_read_csr accepts a uint64_t instead of an __iomem pointer looks really wrong. Why is it written that way? Register addresses on OCTEON are 64-bits wide. In a 32-bit kernel, pointers are only 32-bits wide. Thus was born the cvmx_read_csr() function that takes a u64 address. We no longer support 32-bit kernels, but the legacy of the interface lives on. David Daney -- To unsubscribe from this list: send the line "unsubscribe linux-kernel" in the body of a message to majord...@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html Please read the FAQ at http://www.tux.org/lkml/
Re: [PATCH v8 1/5] mm: introduce a common interface for balloon pages mobility
On Tue, Aug 21, 2012 at 10:16:12PM +0300, Michael S. Tsirkin wrote: > On Tue, Aug 21, 2012 at 02:55:03PM -0300, Rafael Aquini wrote: > > On Tue, Aug 21, 2012 at 04:52:23PM +0300, Michael S. Tsirkin wrote: > > > > + * address_space_operations utilized methods for ballooned pages: > > > > + * .migratepage- used to perform balloon's page migration (as is) > > > > + * .launder_page - used to isolate a page from balloon's page list > > > > + * .freepage - used to reinsert an isolated page to balloon's > > > > page list > > > > + */ > > > > > > It would be a good idea to document the assumptions here. > > > Looks like .launder_page and .freepage are called in rcu critical > > > section. > > > But migratepage isn't - why is that safe? > > > > > > > The migratepage callback for virtio_balloon can sleep, and IIUC we cannot > > sleep > > within a RCU critical section. > > > > Also, The migratepage callback is called at inner migration's circle > > function > > move_to_new_page(), and I don't think embedding it in a RCU critical section > > would be a good idea, for the same understanding aforementioned. > > Yes but this means it is still exposed to the module unloading > races that RCU was supposed to fix. > So need to either rework that code so it won't sleep > or switch to some other synchronization. > Can you refactor tell_host() to not sleep? Or, can I get rid of calling it at virtballoon_migratepage()? If 'no' is the answer for both questions, that's the way that code has to remain, even if we find a way around to hack the migratepage callback and have it embedded into a RCU crit section. That's why I believe once the balloon driver is commanded to unload, we must flag virtballoon_migratepage to skip it's work. By doing this, the thread performing memory compaction will have to recur to the 'putback' path which is RCU protected. (IMHO). As the module will not uload utill it leaks all pages on its list, that unload race you pointed before will be covered. -- To unsubscribe from this list: send the line "unsubscribe linux-kernel" in the body of a message to majord...@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html Please read the FAQ at http://www.tux.org/lkml/
Re: inux-next: Tree for Aug 21 (usb_speed_string)
On Tue, Aug 21, 2012 at 11:06:39AM -0700, Randy Dunlap wrote: > On 08/20/2012 11:04 PM, Stephen Rothwell wrote: > > > Hi all, > > > > Changes since 20120820: > > > > > on x86_64: > > ERROR: "usb_speed_string" [drivers/usb/core/usbcore.ko] undefined! > > > CONFIG_USB_SUPPORT is not enabled > (but many other USB drivers are enabled). > Caused by config IR_TTUSBIR which selects USB but does not have the necessary dependency on USB_ARCH_HAS_HCD. Introduced by commit 0938069fa08970f1c898970c1331a029efe9a1ce "[media] rc: Add support for the TechnoTrend USB IR Receiver". Fixing this exposes ERROR: "usb_kill_urb" [drivers/media/usb/gspca/gspca_main.ko] undefined! ERROR: "usb_set_interface" [drivers/media/usb/gspca/gspca_main.ko] undefined! ERROR: "usb_free_coherent" [drivers/media/usb/gspca/gspca_main.ko] undefined! ERROR: "usb_submit_urb" [drivers/media/usb/gspca/gspca_main.ko] undefined! ERROR: "usb_clear_halt" [drivers/media/usb/gspca/gspca_main.ko] undefined! ERROR: "usb_ifnum_to_if" [drivers/media/usb/gspca/gspca_main.ko] undefined! ERROR: "usb_alloc_coherent" [drivers/media/usb/gspca/gspca_main.ko] undefined! ERROR: "usb_free_urb" [drivers/media/usb/gspca/gspca_main.ko] undefined! ERROR: "usb_alloc_urb" [drivers/media/usb/gspca/gspca_main.ko] undefined! which I think is due to CONFIG_USB_GSPCA=m, but I have no idea how that can be enabled w/o USB support. Guenter -- To unsubscribe from this list: send the line "unsubscribe linux-kernel" in the body of a message to majord...@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html Please read the FAQ at http://www.tux.org/lkml/
Re: [PATCH 3/3] audit: clean up refcounting in audit-tree
On Tue, Aug 21, 2012 at 12:03 PM, Miklos Szeredi wrote: > + /* > +* We are guaranteed to have at least one reference to the mark from > +* either the inode or the caller of fsnotify_destroy_mark(). > +*/ > + BUG_ON(atomic_read(&entry->refcnt) < 1); I pulled, but *please* don't use BUG_ON() as some kind of "let's assert some random crap" thing. We've literally had DoS security issues due to code having BUG_ON()'s and killing the machine, and BUG_ON() often makes things *worse* if it ends up happening in irq context or with some critical lock held, and then the machine is just dead with no logging and no messages left anywhere. So before adding a BUG_ON(), you should ask yourself the following questions: (a) is this something I need to even test? There are lots of rules we have in the kernel. We don't add BUG_ON() for each and every one of them. Is it such a critical data structure that I really need to test for that condition that should never happen? (b) Is this data structure *so* central that I need to immediately kill everything, or do I just want it logged? If it's just a "I want people to know about it, but I don't expect it to happen, I'm just adding a debug thing to make sure", then WARN_ON_ONCE() is likely the right thing. It's *more* likely to get reported, exactly because the machine is more likely to survive a WARN_ON_ONCE(). (c) am I sure that none of the callers hold any central locks that make the BUG_ON() be worse than the alternatives? BUG_ON() is really drastic. Some machines will reboot on bugs. Others will halt. And a even the common ones that are just set to kill the particular process can effectively kill the whole machine due to locks or preemption counts etc that never get released. The kind of place that deserves a BUG_ON() is some really *central* code where you have major issues, and there's just not anything you can do to continue. If somebody passes kfree() a bad pointer, there's just nothing kfree() can sanely do about it. If somebody does a list_del() with list debugging enabled, and it notices that the list pointer are crap, what are you going to do? You can't continue. But some random data structure that has the wrong refcount? If you *can* return with a warning (and ONCE, at that, so that not only does it get logged, the log doesn't get spammed and useless because it gets too big), that's likely what you should do. And this is *doubly* true if it's a patch in the -rc series and you added the code because you weren't sure you tested all possible random cases. Don't potentially kill the machine because you weren't sure you got all cases! Linus -- To unsubscribe from this list: send the line "unsubscribe linux-kernel" in the body of a message to majord...@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html Please read the FAQ at http://www.tux.org/lkml/
[PATCH RESEND] compat_ioctl: Add RS-485 IOCTLs to the list
The RS-485 TIOCSRS485 and TIOCGRS485 ioctls are 32-bit compatible, so in order to call them on 64-bit systems from 32-bit user mode, we add them to the ioctl pointer list as compatible ioctls. Signed-off-by: Jaeden Amero --- fs/compat_ioctl.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/fs/compat_ioctl.c b/fs/compat_ioctl.c index debdfe0fc809edfd01ac4a72a0eaf2753efc993d..85dfebfe6820856dc3154dfd178acb6fca63bbe9 100644 --- a/fs/compat_ioctl.c +++ b/fs/compat_ioctl.c @@ -866,6 +866,8 @@ COMPATIBLE_IOCTL(TIOCGPTN) COMPATIBLE_IOCTL(TIOCSPTLCK) COMPATIBLE_IOCTL(TIOCSERGETLSR) COMPATIBLE_IOCTL(TIOCSIG) +COMPATIBLE_IOCTL(TIOCSRS485) +COMPATIBLE_IOCTL(TIOCGRS485) #ifdef TCGETS2 COMPATIBLE_IOCTL(TCGETS2) COMPATIBLE_IOCTL(TCSETS2) -- 1.7.12 -- To unsubscribe from this list: send the line "unsubscribe linux-kernel" in the body of a message to majord...@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html Please read the FAQ at http://www.tux.org/lkml/
[RFC 5/5 v2] uprobes: add global breakpoints
By setting an uprobe tracepoint, one learns whenever a certain point within a program is reached / passed. This is recorded and the application continues. This patch adds the ability to hold the program once this point has been passed and the user may attach to the program via ptrace. First, setup a global breakpoint which is very similar to a uprobe trace point: |echo 'g /home/bigeasy/uprobetest/sample:0x044d %ip %ax %bx' > uprobe_events This is exactly what uprobe does except that it starts with the letter 'g' instead of 'p'. Step two is to enable it: |echo 1 > events/uprobes/enable Step three is to add pids of prcocess which are excluded from global breakpoints even if the process would hit one. This should ensure that the debugger remains active and the global breakpoint on system libc's malloc() does not freeze the system. A pid can be excluded by | echo e $pid > uprobe_gb_exclude You need atleast one pid in the exlude list. An entry can be removed by | echo a $pid > uprobe_gb_exclude Lets assume you execute ./sample and the breakpoint is hit. In ps you will see: |1938 pts/1t+ 0:00 ./sample Now you can attach gdb via 'gdb -p 1938'. The gdb now can interact with the tracee and inspect its registers or its stack, single step, let it run⊠In case the process is not of great interest, the user may continue without gdb by writting its pid into the uprobe_gp_wakeup file |echo 1938 > uprobe_gp_wakeup Cc: gdb-patc...@sourceware.org Signed-off-by: Sebastian Andrzej Siewior --- v1..v2: - closed the window between set state / check state - tried to address Peters review / concern: - added "uprobe_gb_exclude". This file contains a list of pids which are excluded from the "global breakpoint" behavior. The idea is to whitelist programs which are essential and must not hit a breakpoint. An empty list is invalid and _no_ global breakpoint will hit. - added "uprobe_gb_active". This file contains a list of pids which hit the global breakpoint. The user can poll() here and wait for the next victim. The size of the list limited. This is step two to ensure a global system lock up does not occur. If a java program is beeing debugged and the size of the list is too small then the list could be allocated at runtime with more entries. I've been thinking about alterntives to the approach above: - cgroups Would solve some problems. It would be very easy for the user to group tasks in two groups: "root" group with "allowed" tasks and sub group "excluded" for tasks which are excluded from the global breakpoint(s). A third group would be required to put the "halted" tasks. I would need one file to set the type of the group (root is easy, "allowed" and "halted" have to be set). The notification mechanism works on per file basis. So I would have to add file with no content just to let the user that the task file has new entries. All in all this looks like a abuse of cgroups just to follow forks on the exclude list and maintain the list. - auto exclude the read()er / poll()er of uprobe_gb_active This sounds lovely but has two short commings: - the pid of the process that opened it may change after fork() since the initial owner may exit - they may be two+ childs after fork() which read() / poll(). Both should be excluded since I don't kwnow which one is which. I don't know which one terminates because ->release() is called by last process that closes the fd. That means in this scenario I would add more entries to the while list than remove. - having a list of tasks which currently poll() the file would solve the problem with this endless growing list. However once poll() is done (one process just hit the global breakpoint) I have an empty list since no one can poll() now. That means that I would exclude every further process which hits the global breakpoint before someone poll()s again. Oleg: The change in ptrace_attach() is still as it was. I tried to address Peter concern here. Now what options do I have here: - not putting the task in TASK_TRACED but simply halt. This would work without a change to ptrace_attach() but the task continues on any signal. So a signal friendly task would continue and not notice a thing. - putting the TASK_TRACED and not touching ptrace_attach(). Each ptrace() user would have to kick the task itself which means changes to gdb / strace. If this is the prefered way then I guess it can be done :) include/linux/uprobes.h | 10 ++ kernel/events/uprobes.c | 13 +- kernel/ptrace.c |4 +- kernel/trace/trace_uprobe.c | 414 ++- 4 files changed, 435 insertions(+), 6 deletions(-) diff --git a/include/linux/uprobes.h b/include/linux/uprobes.h index 0fc6585..991a665 100644 --- a/include/linux/uprobes.h +++ b/
Re: [2/2] spi: Add SPI master controller for OCTEON SOCs.
On Fri, May 11, 2012 at 08:34:46PM -, David Daney wrote: > From: David Daney > > Add the driver, link it into the kbuild system and provide device tree > binding documentation. > > Signed-off-by: David Daney > Acked-by: Grant Likely > [ ... ] > + > +static int __devexit octeon_spi_remove(struct platform_device *pdev) > +{ > + struct octeon_spi *p = platform_get_drvdata(pdev); > + struct spi_master *master = p->my_master; > + > + spi_unregister_master(master); > + I know it is a bit late, but ... The call to spi_unregister_master() frees the memory associated with master, ie 'p', and the spi_master_put() below without matching spi_master_get() is unnecessary/wrong. One possible fix would be to use struct spi_master *master = spi_master_get(p->my_master); above. That protects master and p while it is still being used, and makes use of the call to spi_master_put() below. Another option might be to move cvmx_write_csr() ahead of the call to spi_unregister_master() and drop the call to spi_master_put(). Guenter > + /* Clear the CSENA* and put everything in a known state. */ > + cvmx_write_csr(p->register_base + OCTEON_SPI_CFG, 0); > + spi_master_put(master); > + return 0; > +} > + -- To unsubscribe from this list: send the line "unsubscribe linux-kernel" in the body of a message to majord...@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html Please read the FAQ at http://www.tux.org/lkml/
Re: [PATCH v9 1/2] kvm: Use a reserved IRQ source ID for irqfd
On Tue, Aug 21, 2012 at 01:29:06PM -0600, Alex Williamson wrote: > KVM_IRQFD currently uses the reserved KVM_USERSPACE_IRQ_SOURCE_ID > which is also shared with userspace injection methods like > KVM_IRQ_LINE. This can cause a conflict if an irqfd triggers on > a GSI asserted through KVM_IRQ_LINE. What kind of conflict do you envision? Pls note level interrupts are unsupported ATM. > Move irqfd to it's own reserved IRQ source ID. Add a capability for > userspace to test for this fix. > > Signed-off-by: Alex Williamson > --- > > arch/x86/kvm/x86.c |3 +++ > include/linux/kvm.h |1 + > include/linux/kvm_host.h |1 + > virt/kvm/eventfd.c |6 +++--- > 4 files changed, 8 insertions(+), 3 deletions(-) > > diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c > index 42bce48..cd98673 100644 > --- a/arch/x86/kvm/x86.c > +++ b/arch/x86/kvm/x86.c > @@ -2174,6 +2174,7 @@ int kvm_dev_ioctl_check_extension(long ext) > case KVM_CAP_GET_TSC_KHZ: > case KVM_CAP_PCI_2_3: > case KVM_CAP_KVMCLOCK_CTRL: > + case KVM_CAP_IRQFD_IRQ_SOURCE_ID: > r = 1; > break; > case KVM_CAP_COALESCED_MMIO: > @@ -6258,6 +6259,8 @@ int kvm_arch_init_vm(struct kvm *kvm, unsigned long > type) > > /* Reserve bit 0 of irq_sources_bitmap for userspace irq source */ > set_bit(KVM_USERSPACE_IRQ_SOURCE_ID, &kvm->arch.irq_sources_bitmap); > + /* Reserve bit 1 of irq_sources_bitmap for irqfd irq source */ > + set_bit(KVM_IRQFD_IRQ_SOURCE_ID, &kvm->arch.irq_sources_bitmap); > > raw_spin_lock_init(&kvm->arch.tsc_write_lock); > > diff --git a/include/linux/kvm.h b/include/linux/kvm.h > index 2ce09aa..ae66b9c 100644 > --- a/include/linux/kvm.h > +++ b/include/linux/kvm.h > @@ -618,6 +618,7 @@ struct kvm_ppc_smmu_info { > #define KVM_CAP_PPC_GET_SMMU_INFO 78 > #define KVM_CAP_S390_COW 79 > #define KVM_CAP_PPC_ALLOC_HTAB 80 > +#define KVM_CAP_IRQFD_IRQ_SOURCE_ID 81 > > #ifdef KVM_CAP_IRQ_ROUTING > > diff --git a/include/linux/kvm_host.h b/include/linux/kvm_host.h > index b70b48b..b763230 100644 > --- a/include/linux/kvm_host.h > +++ b/include/linux/kvm_host.h > @@ -71,6 +71,7 @@ > #define KVM_REQ_PMI 17 > > #define KVM_USERSPACE_IRQ_SOURCE_ID 0 > +#define KVM_IRQFD_IRQ_SOURCE_ID 1 > > struct kvm; > struct kvm_vcpu; Above looks fine but I'm not sure why is the below needed. This changes irqfd behaviour for edge GSIs slightly in a userspace-visible way. Maybe make it a separate patch so it can be considered on merits? > diff --git a/virt/kvm/eventfd.c b/virt/kvm/eventfd.c > index 7d7e2aa..2245cfa 100644 > --- a/virt/kvm/eventfd.c > +++ b/virt/kvm/eventfd.c > @@ -67,8 +67,8 @@ irqfd_inject(struct work_struct *work) > struct _irqfd *irqfd = container_of(work, struct _irqfd, inject); > struct kvm *kvm = irqfd->kvm; > > - kvm_set_irq(kvm, KVM_USERSPACE_IRQ_SOURCE_ID, irqfd->gsi, 1); > - kvm_set_irq(kvm, KVM_USERSPACE_IRQ_SOURCE_ID, irqfd->gsi, 0); > + kvm_set_irq(kvm, KVM_IRQFD_IRQ_SOURCE_ID, irqfd->gsi, 1); > + kvm_set_irq(kvm, KVM_IRQFD_IRQ_SOURCE_ID, irqfd->gsi, 0); > } > > /* > @@ -138,7 +138,7 @@ irqfd_wakeup(wait_queue_t *wait, unsigned mode, int sync, > void *key) > irq = rcu_dereference(irqfd->irq_entry); > /* An event has been signaled, inject an interrupt */ > if (irq) > - kvm_set_msi(irq, kvm, KVM_USERSPACE_IRQ_SOURCE_ID, 1); > + kvm_set_msi(irq, kvm, KVM_IRQFD_IRQ_SOURCE_ID, 1); > else > schedule_work(&irqfd->inject); > rcu_read_unlock(); -- To unsubscribe from this list: send the line "unsubscribe linux-kernel" in the body of a message to majord...@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html Please read the FAQ at http://www.tux.org/lkml/
RE: [PATCH 04/11] x86/microcode_core_early.c: Define interfaces for early load ucode
> -Original Message- > From: Borislav Petkov [mailto:b...@amd64.org] > Sent: Monday, August 20, 2012 1:20 PM > To: H. Peter Anvin > Cc: Yu, Fenghua; Henrique de Moraes Holschuh; Ingo Molnar; Thomas > Gleixner; Mallick, Asit K; Tigran Aivazian; Andreas Herrmann; Borislav > Petkov; linux-kernel; x86 > Subject: Re: [PATCH 04/11] x86/microcode_core_early.c: Define > interfaces for early load ucode > > On Mon, Aug 20, 2012 at 01:08:49PM -0700, H. Peter Anvin wrote: > > On 08/20/2012 07:06 AM, Borislav Petkov wrote: > > > > > > Or, > > > > > > in case we want to supply more vendor-specific stuff early at boot, > we > > > could do: > > > > > > kernel/x86//microcode... > > > |-> bios_overrides > > > |-> ... > > > > > > and have this layout extensible from the beginning... > > > > > Does that make sense, though? > > Only time will tell. I was simply saying that we should leave ourselves > the door opened, should we need functionality like that in the future. > > > I'm a bit concerned about having multiple files named microcode.bin > by > > default; the pathname isn't as sticky as the filename when people > move > > things around... > > Ok, I see. > > How about the following scheme then: > > kernel/x86/-microcode.bin > kernel/x86/-bios-overrides.blob > ... > > ? > > All I'm saying is maybe we should impose some sanity rules now before > people go crazy with this and things get out of hands... We might name the cpio directory as: kernel/x86/microcode/GenuineIntel.bin kernel/x86/microcode/AuthenticAMD.bin kernel/x86/acpi/... etc. This is expendable for the future usage. Plus I will add a doc on the cpio directory, supported directory names and how to add new stuffs in the directory. Thanks. -Fenghua Thanks. -Fenghua -- To unsubscribe from this list: send the line "unsubscribe linux-kernel" in the body of a message to majord...@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html Please read the FAQ at http://www.tux.org/lkml/
Re: [PATCH v9 1/2] kvm: Use a reserved IRQ source ID for irqfd
On Tue, 2012-08-21 at 22:58 +0300, Michael S. Tsirkin wrote: > On Tue, Aug 21, 2012 at 01:29:06PM -0600, Alex Williamson wrote: > > KVM_IRQFD currently uses the reserved KVM_USERSPACE_IRQ_SOURCE_ID > > which is also shared with userspace injection methods like > > KVM_IRQ_LINE. This can cause a conflict if an irqfd triggers on > > a GSI asserted through KVM_IRQ_LINE. > > What kind of conflict do you envision? Pls note level interrupts are > unsupported ATM. If KVM_IRQ_LINE asserts a level interrupt and KVM_IRQFD triggers on the same GSI then the pin is no longer asserted as userspace thinks it is. Do we just chalk this up to userspace error? > > Move irqfd to it's own reserved IRQ source ID. Add a capability for > > userspace to test for this fix. > > > > Signed-off-by: Alex Williamson > > --- > > > > arch/x86/kvm/x86.c |3 +++ > > include/linux/kvm.h |1 + > > include/linux/kvm_host.h |1 + > > virt/kvm/eventfd.c |6 +++--- > > 4 files changed, 8 insertions(+), 3 deletions(-) > > > > diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c > > index 42bce48..cd98673 100644 > > --- a/arch/x86/kvm/x86.c > > +++ b/arch/x86/kvm/x86.c > > @@ -2174,6 +2174,7 @@ int kvm_dev_ioctl_check_extension(long ext) > > case KVM_CAP_GET_TSC_KHZ: > > case KVM_CAP_PCI_2_3: > > case KVM_CAP_KVMCLOCK_CTRL: > > + case KVM_CAP_IRQFD_IRQ_SOURCE_ID: > > r = 1; > > break; > > case KVM_CAP_COALESCED_MMIO: > > @@ -6258,6 +6259,8 @@ int kvm_arch_init_vm(struct kvm *kvm, unsigned long > > type) > > > > /* Reserve bit 0 of irq_sources_bitmap for userspace irq source */ > > set_bit(KVM_USERSPACE_IRQ_SOURCE_ID, &kvm->arch.irq_sources_bitmap); > > + /* Reserve bit 1 of irq_sources_bitmap for irqfd irq source */ > > + set_bit(KVM_IRQFD_IRQ_SOURCE_ID, &kvm->arch.irq_sources_bitmap); > > > > raw_spin_lock_init(&kvm->arch.tsc_write_lock); > > > > diff --git a/include/linux/kvm.h b/include/linux/kvm.h > > index 2ce09aa..ae66b9c 100644 > > --- a/include/linux/kvm.h > > +++ b/include/linux/kvm.h > > @@ -618,6 +618,7 @@ struct kvm_ppc_smmu_info { > > #define KVM_CAP_PPC_GET_SMMU_INFO 78 > > #define KVM_CAP_S390_COW 79 > > #define KVM_CAP_PPC_ALLOC_HTAB 80 > > +#define KVM_CAP_IRQFD_IRQ_SOURCE_ID 81 > > > > #ifdef KVM_CAP_IRQ_ROUTING > > > > diff --git a/include/linux/kvm_host.h b/include/linux/kvm_host.h > > index b70b48b..b763230 100644 > > --- a/include/linux/kvm_host.h > > +++ b/include/linux/kvm_host.h > > @@ -71,6 +71,7 @@ > > #define KVM_REQ_PMI 17 > > > > #define KVM_USERSPACE_IRQ_SOURCE_ID0 > > +#define KVM_IRQFD_IRQ_SOURCE_ID1 > > > > struct kvm; > > struct kvm_vcpu; > > Above looks fine but I'm not sure why is the below needed. > This changes irqfd behaviour for edge GSIs slightly > in a userspace-visible way. Maybe make it a separate patch > so it can be considered on merits? Hmm, the above does nothing without the below. I thought I was just implementing your idea that IRQFDs should all share a single IRQ source ID... why is that no longer a good idea? Thanks, Alex > > diff --git a/virt/kvm/eventfd.c b/virt/kvm/eventfd.c > > index 7d7e2aa..2245cfa 100644 > > --- a/virt/kvm/eventfd.c > > +++ b/virt/kvm/eventfd.c > > @@ -67,8 +67,8 @@ irqfd_inject(struct work_struct *work) > > struct _irqfd *irqfd = container_of(work, struct _irqfd, inject); > > struct kvm *kvm = irqfd->kvm; > > > > - kvm_set_irq(kvm, KVM_USERSPACE_IRQ_SOURCE_ID, irqfd->gsi, 1); > > - kvm_set_irq(kvm, KVM_USERSPACE_IRQ_SOURCE_ID, irqfd->gsi, 0); > > + kvm_set_irq(kvm, KVM_IRQFD_IRQ_SOURCE_ID, irqfd->gsi, 1); > > + kvm_set_irq(kvm, KVM_IRQFD_IRQ_SOURCE_ID, irqfd->gsi, 0); > > } > > > > /* > > @@ -138,7 +138,7 @@ irqfd_wakeup(wait_queue_t *wait, unsigned mode, int > > sync, void *key) > > irq = rcu_dereference(irqfd->irq_entry); > > /* An event has been signaled, inject an interrupt */ > > if (irq) > > - kvm_set_msi(irq, kvm, KVM_USERSPACE_IRQ_SOURCE_ID, 1); > > + kvm_set_msi(irq, kvm, KVM_IRQFD_IRQ_SOURCE_ID, 1); > > else > > schedule_work(&irqfd->inject); > > rcu_read_unlock(); -- To unsubscribe from this list: send the line "unsubscribe linux-kernel" in the body of a message to majord...@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html Please read the FAQ at http://www.tux.org/lkml/
Re: [PATCH v2 17/31] arm64: System calls handling
On Tuesday 21 August 2012, Catalin Marinas wrote: > > > +asmlinkage long sys_mmap(unsigned long addr, unsigned long len, > > > +unsigned long prot, unsigned long flags, > > > +unsigned long fd, off_t off) > > > +{ > > > + if (offset_in_page(off) != 0) > > > + return -EINVAL; > > > + > > > + return sys_mmap_pgoff(addr, len, prot, flags, fd, off >> PAGE_SHIFT); > > > +} > > > > I think > > > > #define sys_mmap sys_mmap_pgoff > > There are slightly different semantics with the last argument of > sys_mmap() which takes a byte offset. The sys_mmap_pgoff() function > takes the offset shifted by PAGE_SHIFT (which is the same as sys_mmap2). > > Looking at the other architectures, it makes sense to use a generic > sys_mmap() implementation similar to the one above (or the ia-64, seems > to be the most complete). > Why that? The generic sys_mmap_pgoff was specifically added so new architectures could just use that instead of having their own wrappers, see f8b72560. Arnd -- To unsubscribe from this list: send the line "unsubscribe linux-kernel" in the body of a message to majord...@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html Please read the FAQ at http://www.tux.org/lkml/
Re: [PATCH 04/11] x86/microcode_core_early.c: Define interfaces for early load ucode
On 08/21/2012 01:05 PM, Yu, Fenghua wrote: > > We might name the cpio directory as: > > kernel/x86/microcode/GenuineIntel.bin > kernel/x86/microcode/AuthenticAMD.bin > kernel/x86/acpi/... > etc. > > This is expendable for the future usage. > > Plus I will add a doc on the cpio directory, supported directory names and > how to add new stuffs in the directory. > I believe that was exactly my original proposal. I think it makes sense... most things aren't going to be inherently CPU-specific in this way. I don't know what Borislav was suggesting with "BIOS overrides", is that another CPU-specific thing? -hpa -- To unsubscribe from this list: send the line "unsubscribe linux-kernel" in the body of a message to majord...@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html Please read the FAQ at http://www.tux.org/lkml/
Re: [PATCH v2 16/31] arm64: ELF definitions
On Tuesday 21 August 2012, Catalin Marinas wrote: > On Thu, Aug 16, 2012 at 01:37:53PM +0100, Arnd Bergmann wrote: > > On Thursday 16 August 2012, Will Deacon wrote: > > > > This looks wrong: PER_LINUX/PER_LINUX32 decides over the output of the > > > > uname system call, while TIF_32BIT decides over the instruction set > > > > when returning to user space. You definitely should not set the > > > > personality > > > > to the value you pass from the elf loader. Instead, just do > > > > > > > > #define SET_PERSONALITY(ex) clear_thread_flag(TIF_32BIT); > > > > #defined COMPAT_SET_PERSONALITY(ex) set_thread_flag(TIF_32BIT); > > > > > > In this case, won't uname be incorrect (aarch64l) for aarch32 tasks (which > > > expect something like armv8l)? > > > > No, the uname output is meant to tell you about the system, not the > > instruction set that you are using (you already know that in compiled > > code). > > OK, so we assumed that compat tasks should get a uname as close as > possible to a 32-bit system, i.e. armv8l, for full compatibility. This > would allow us to run something like 32-bit Debian on an AArch64 kernel > without worrying about any scripts failing. You can still do that, just boot with init="/sbin/setarch armv7 /sbin/init". > But I can see on x86 that it always reports x86_64 even if the task is > x86_32. Not just x86, the same behavior is used on powerpc, s390, mips, sparc and parisc. Not sure about tile though. Arnd -- To unsubscribe from this list: send the line "unsubscribe linux-kernel" in the body of a message to majord...@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html Please read the FAQ at http://www.tux.org/lkml/
[tip:x86/apic] x86: dt: Use linear irq domain for ioapic(s)
Commit-ID: ece3234a77ebcd5bbeea6b829c9798328d290cae Gitweb: http://git.kernel.org/tip/ece3234a77ebcd5bbeea6b829c9798328d290cae Author: Sebastian Andrzej Siewior AuthorDate: Mon, 13 Aug 2012 22:23:33 +0200 Committer: Thomas Gleixner CommitDate: Tue, 21 Aug 2012 22:16:57 +0200 x86: dt: Use linear irq domain for ioapic(s) The former conversion to irq_domain_add_legacy() did not fully work since we miss the irq decs for NR_IRQS_LEGACY+. Ideally we could use irq_domain_add_simple() or the no-map variant (and program the virq <-> line mapping directly into ioapic) but this would require a different irq lookup in "do_IRQ()" and won't work with ACPI without changes. So this is probably easiest for everyone. Tested-by: Thierry Reding Signed-off-by: Sebastian Andrzej Siewior Cc: Grant Likely Link: http://lkml.kernel.org/r/20120813202304.ga3...@breakpoint.cc Signed-off-by: Thomas Gleixner --- arch/x86/kernel/devicetree.c | 51 ++--- 1 files changed, 42 insertions(+), 9 deletions(-) diff --git a/arch/x86/kernel/devicetree.c b/arch/x86/kernel/devicetree.c index 3ae2ced..b158152 100644 --- a/arch/x86/kernel/devicetree.c +++ b/arch/x86/kernel/devicetree.c @@ -342,6 +342,47 @@ const struct irq_domain_ops ioapic_irq_domain_ops = { .xlate = ioapic_xlate, }; +static void dt_add_ioapic_domain(unsigned int ioapic_num, + struct device_node *np) +{ + struct irq_domain *id; + struct mp_ioapic_gsi *gsi_cfg; + int ret; + int num; + + gsi_cfg = mp_ioapic_gsi_routing(ioapic_num); + num = gsi_cfg->gsi_end - gsi_cfg->gsi_base + 1; + + id = irq_domain_add_linear(np, num, &ioapic_irq_domain_ops, + (void *)ioapic_num); + BUG_ON(!id); + if (gsi_cfg->gsi_base == 0) { + /* +* The first NR_IRQS_LEGACY irq descs are allocated in +* early_irq_init() and need just a mapping. The +* remaining irqs need both. All of them are preallocated +* and assigned so we can keep the 1:1 mapping which the ioapic +* is having. +*/ + ret = irq_domain_associate_many(id, 0, 0, NR_IRQS_LEGACY); + if (ret) + pr_err("Error mapping legacy IRQs: %d\n", ret); + + if (num > NR_IRQS_LEGACY) { + ret = irq_create_strict_mappings(id, NR_IRQS_LEGACY, + NR_IRQS_LEGACY, num - NR_IRQS_LEGACY); + if (ret) + pr_err("Error creating mapping for the " + "remaining IRQs: %d\n", ret); + } + irq_set_default_host(id); + } else { + ret = irq_create_strict_mappings(id, gsi_cfg->gsi_base, 0, num); + if (ret) + pr_err("Error creating IRQ mapping: %d\n", ret); + } +} + static void __init ioapic_add_ofnode(struct device_node *np) { struct resource r; @@ -356,15 +397,7 @@ static void __init ioapic_add_ofnode(struct device_node *np) for (i = 0; i < nr_ioapics; i++) { if (r.start == mpc_ioapic_addr(i)) { - struct irq_domain *id; - struct mp_ioapic_gsi *gsi_cfg; - - gsi_cfg = mp_ioapic_gsi_routing(i); - - id = irq_domain_add_legacy(np, 32, gsi_cfg->gsi_base, 0, - &ioapic_irq_domain_ops, - (void*)i); - BUG_ON(!id); + dt_add_ioapic_domain(i, np); return; } } -- To unsubscribe from this list: send the line "unsubscribe linux-kernel" in the body of a message to majord...@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html Please read the FAQ at http://www.tux.org/lkml/
Re: [PATCHSET] workqueue: use irqsafe timer in delayed_work
On Wed, Aug 08, 2012 at 02:37:55PM -0700, Tejun Heo wrote: > This patchset makes delayed_work use the irqsafe timer added by the > pending "timer: clean up initializers and implement irqsafe timers" > patchset[1]. This enables try_to_grab_pending() to be used from any > context which in turn makes mod_delayed_work() usable from IRQ > handlers. cancel_delayed_work() is reimplemented using > try_to_grab_pending() so that it also can be used from IRQ handlers > and its behavior is consitent with other canceling operations. > __cancel_delayed_work() is no longer necessary and deprecated. Applied to wq/for-3.7 after pulling in tip/timers/core. Thanks. -- tejun -- To unsubscribe from this list: send the line "unsubscribe linux-kernel" in the body of a message to majord...@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html Please read the FAQ at http://www.tux.org/lkml/
[RFC][PATCH v4 0/3] make efivars/efi_pstore interrupt-safe
Changelog v3 -> v4 - Patch 2/3 Move cancel_work_sync() above an efi_enabled test in efivars_exit(). v2 -> v3 - Patch 1/3 Replace spin_lock_irqsave/spin_unlock_irqrestore with spin_lock_irq/spin_unlock_irq in efivars_unregister(), efivar_create(), efivar_store_raw() and efivar_delete() which are called in a process context. - Patch 2/3 Change a name of delete_sysfs_entry() to delete_all_stale_sysfs_entries(). Also, don't release an efivar->lock while searching efivar->list in delete_all_stale_sysfs_entries(). - Patch 3/3 Remove a logic in efi_pstore_erase() which freshly created in patch v2. v1 -> v2 - Patch 1/3 Add spin_lock_irq/spin_unlock_irq to open/close callbacks of efi_pstore instead of moving spin_locks to a read callback. - Patch 2/3 Replace a periodical timer with schedule_work(). - Patch 3/3 freshly create to kick a workqueue in oops case only. [Problem] There are following problems related to an interrupt context in efivar/efi_pstore. Currently, efivars enables interrupt while taking efivars->lock. So, there is a risk to be deadlocking in a write callback of efi_pstore if kernel panics in interrupt context while taking efi_lock. Also, efi_pstore creates sysfs entries ,which enable users to access to NVRAM, in a write callback. If a kernel panic happens in interrupt contexts, pstore may fail because it could sleep due to dynamic memory allocations during creating sysfs entries. To resolve the problems above, a goal of this patchset is making efivars/efi_pstore interrupt-safe. [Patch Description] Patch 1/3 efivars: Disable external interrupt while holding efivars->lock This patch replaces spin_lock/spin_unlock with spin_lock_irqsave/spin_lock_irqrestore to make efivars interrupt safe Patch 2/3 efi_pstore: Introducing workqueue updating sysfs entries This patch removes sysfs operations from write callback by introducing a workqueue updating sysfs entries Patch 3/3 efi_pstore: Skiping scheduling a workqueue in cases other than oops This patch restricts a schedule of a workqueue in case where users erase entries or oops happen which is truly needed for users. drivers/firmware/efivars.c | 167 +++ include/linux/efi.h|3 +- 2 files changed, 138 insertions(+), 32 deletions(-) -- To unsubscribe from this list: send the line "unsubscribe linux-kernel" in the body of a message to majord...@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html Please read the FAQ at http://www.tux.org/lkml/
[RFC][PATCH v4 1/3] efivars: Disable external interrupt while holding efivars->lock
[Problem] Currently, efivars doesn't disable interrupt while taking efivars->lock. So, there is a risk to be deadlocking in a write callback of efi_pstore if kernel panics in interrupt context while taking efivars->lock. [Patch Description] This patch disables an external interruption while holding efivars->lock as follows - In efi_pstore_open()/efi_pstore_close(), replace spin_lock/spin_unlock with spin_lock_irq/spin_unlock_irq because they are called in a process context when users access to /dev/pstore. - In unregister_efivars(), replace them with spin_lock_irq/spin_unlock_irq because they are called in a process context when unloading this module. - Also, in efivar_create()/efivar_store_raw()/efivar_delete(), replace them with spin_lock_irq/spin_unlock_irq because they are called in a process context when users access to /sys/firmware/efi/vars/{new_var|del_var}. - In other function calls, replace spin_lock/spin_unlock with spin_lock_irqsave/spin_unlock_irqrestore. Signed-off-by: Seiji Aguchi --- drivers/firmware/efivars.c | 43 +++ 1 files changed, 23 insertions(+), 20 deletions(-) diff --git a/drivers/firmware/efivars.c b/drivers/firmware/efivars.c index 47408e8..bd1df01 100644 --- a/drivers/firmware/efivars.c +++ b/drivers/firmware/efivars.c @@ -393,10 +393,11 @@ static efi_status_t get_var_data(struct efivars *efivars, struct efi_variable *var) { efi_status_t status; + unsigned long flags; - spin_lock(&efivars->lock); + spin_lock_irqsave(&efivars->lock, flags); status = get_var_data_locked(efivars, var); - spin_unlock(&efivars->lock); + spin_unlock_irqrestore(&efivars->lock, flags); if (status != EFI_SUCCESS) { printk(KERN_WARNING "efivars: get_variable() failed 0x%lx!\n", @@ -514,14 +515,14 @@ efivar_store_raw(struct efivar_entry *entry, const char *buf, size_t count) return -EINVAL; } - spin_lock(&efivars->lock); + spin_lock_irq(&efivars->lock); status = efivars->ops->set_variable(new_var->VariableName, &new_var->VendorGuid, new_var->Attributes, new_var->DataSize, new_var->Data); - spin_unlock(&efivars->lock); + spin_unlock_irq(&efivars->lock); if (status != EFI_SUCCESS) { printk(KERN_WARNING "efivars: set_variable() failed: status=%lx\n", @@ -632,7 +633,7 @@ static int efi_pstore_open(struct pstore_info *psi) { struct efivars *efivars = psi->data; - spin_lock(&efivars->lock); + spin_lock_irq(&efivars->lock); efivars->walk_entry = list_first_entry(&efivars->list, struct efivar_entry, list); return 0; @@ -642,7 +643,7 @@ static int efi_pstore_close(struct pstore_info *psi) { struct efivars *efivars = psi->data; - spin_unlock(&efivars->lock); + spin_unlock_irq(&efivars->lock); return 0; } @@ -696,11 +697,12 @@ static int efi_pstore_write(enum pstore_type_id type, struct efivars *efivars = psi->data; struct efivar_entry *entry, *found = NULL; int i, ret = 0; + unsigned long flags; sprintf(stub_name, "dump-type%u-%u-", type, part); sprintf(name, "%s%lu", stub_name, get_seconds()); - spin_lock(&efivars->lock); + spin_lock_irqsave(&efivars->lock, flags); for (i = 0; i < DUMP_NAME_LEN; i++) efi_name[i] = stub_name[i]; @@ -738,7 +740,7 @@ static int efi_pstore_write(enum pstore_type_id type, efivars->ops->set_variable(efi_name, &vendor, PSTORE_EFI_ATTRIBUTES, size, psi->buf); - spin_unlock(&efivars->lock); + spin_unlock_irqrestore(&efivars->lock, flags); if (found) efivar_unregister(found); @@ -822,7 +824,7 @@ static ssize_t efivar_create(struct file *filp, struct kobject *kobj, return -EINVAL; } - spin_lock(&efivars->lock); + spin_lock_irq(&efivars->lock); /* * Does this variable already exist? @@ -840,7 +842,7 @@ static ssize_t efivar_create(struct file *filp, struct kobject *kobj, } } if (found) { - spin_unlock(&efivars->lock); + spin_unlock_irq(&efivars->lock); return -EINVAL; } @@ -854,10 +856,10 @@ static ssize_t efivar_create(struct file *filp, struct kobject *kobj, if (status != EFI_SUCCESS) { printk(KERN_WARNING "efivars: set_variable() failed: status=%lx\n", status); - spin_unlock(&efivars->lock); + spin_unlock_irq(&efivars->lock); return -EIO; } - spin_un
[PATCH v2 5/5] X86/XEN: Add few lines explaining simple semantic for x86_init.paging.pagetable_init setup function
- Explain the purpose of the hook - Report execution constraints Signed-off-by: Attilio Rao --- arch/x86/include/asm/x86_init.h |5 + 1 files changed, 5 insertions(+), 0 deletions(-) diff --git a/arch/x86/include/asm/x86_init.h b/arch/x86/include/asm/x86_init.h index 995ea5c..7ea4186 100644 --- a/arch/x86/include/asm/x86_init.h +++ b/arch/x86/include/asm/x86_init.h @@ -82,6 +82,11 @@ struct x86_init_mapping { /** * struct x86_init_paging - platform specific paging functions * @pagetable_init:platform specific paging initialization call + * + * It does setup the kernel pagetables and prepares accessors functions to + * manipulate them. + * It must be called once, during the boot sequence and after the direct + * mapping for phys memory is setup. */ struct x86_init_paging { void (*pagetable_init)(void); -- 1.7.2.5 -- To unsubscribe from this list: send the line "unsubscribe linux-kernel" in the body of a message to majord...@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html Please read the FAQ at http://www.tux.org/lkml/
[PATCH v2 0/5] X86/XEN: Merge x86_init.paging.pagetable_setup_start and x86_init.paging.pagetable_setup_done setup functions and document its semantic
Currently the definition of x86_init.paging.pagetable_setup_start and x86_init.paging.pagetable_setup_done is twisted and not really well defined (in terms of prototypes desired). More specifically: pagetable_setup_start: * cleans up the boot time page table in the x86_32 case * it is a nop for the XEN case * it is a nop on x86_64 pagetable_setup_done: * it is a nop on x86_32 * sets up accessor functions for pagetable manipulation, for the XEN case * it is a nop on x86_64 Most of this logic can be skipped by creating a new setup function that can handle pagetable setup and pre/post operations on it. This means the above mentioned functions will be removed and only one will be used for the whole operation. The new function must be called only once, during boot-time setup and after the direct mapping for physical memory is available. Differences with v1: - The patch serie is re-arranged in a way that it helps reviews, following a plan by Thomas Gleixner - The PVOPS nomenclature is not used as it is not correct - The front-end message is adjusted with feedback by Thomas Gleixner, Stefano Stabellini and Konrad Rzeszutek Wilk Attilio Rao (5): X86/XEN: Remove the base argument from x86_init.paging.pagetable_setup_start X86/XEN: Rename pagetable_setup_start() setup functions into pagetable_init() X86/XEN: Allow setup function x86_init.paging.pagetable_init to setup kernel pagetables X86/XEN: Move content of xen_pagetable_setup_done() into xen_pagetable_init() and retire now unused x86_init.paging.pagetable_setup_done X86/XEN: Add few lines explaining simple semantic for x86_init.paging.pagetable_init setup function arch/x86/include/asm/pgtable_types.h |6 ++ arch/x86/include/asm/x86_init.h | 11 +++ arch/x86/kernel/setup.c |4 +--- arch/x86/kernel/x86_init.c |4 +--- arch/x86/mm/init_32.c| 11 --- arch/x86/xen/mmu.c | 18 +++--- 6 files changed, 22 insertions(+), 32 deletions(-) -- 1.7.2.5 -- To unsubscribe from this list: send the line "unsubscribe linux-kernel" in the body of a message to majord...@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html Please read the FAQ at http://www.tux.org/lkml/
[RFC][PATCH v4 2/3] efi_pstore: Introducing workqueue updating sysfs entries
[Problem] efi_pstore creates sysfs entries ,which enable users to access to NVRAM, in a write callback. If a kernel panic happens in interrupt contexts, pstore may fail because it could sleep due to dynamic memory allocations during creating sysfs entries. [Patch Description] This patch removes sysfs operations from a write callback by introducing a workqueue updating sysfs entries which is scheduled after the write callback is called. efi_pstore will be robust against a kernel panic in an interrupt context with it. Signed-off-by: Seiji Aguchi --- drivers/firmware/efivars.c | 119 +++ include/linux/efi.h|3 +- 2 files changed, 110 insertions(+), 12 deletions(-) diff --git a/drivers/firmware/efivars.c b/drivers/firmware/efivars.c index bd1df01..cd16ea6 100644 --- a/drivers/firmware/efivars.c +++ b/drivers/firmware/efivars.c @@ -146,6 +146,13 @@ efivar_create_sysfs_entry(struct efivars *efivars, efi_char16_t *variable_name, efi_guid_t *vendor_guid); +/* + * Prototype for workqueue functions updating sysfs entry + */ + +static void efivar_update_sysfs_entry(struct work_struct *); +static DECLARE_WORK(efivar_work, efivar_update_sysfs_entry); + /* Return the number of unicode characters in data */ static unsigned long utf16_strnlen(efi_char16_t *s, size_t maxlength) @@ -731,9 +738,6 @@ static int efi_pstore_write(enum pstore_type_id type, 0, NULL); } - if (found) - list_del(&found->list); - for (i = 0; i < DUMP_NAME_LEN; i++) efi_name[i] = name[i]; @@ -742,14 +746,7 @@ static int efi_pstore_write(enum pstore_type_id type, spin_unlock_irqrestore(&efivars->lock, flags); - if (found) - efivar_unregister(found); - - if (size) - ret = efivar_create_sysfs_entry(efivars, - utf16_strsize(efi_name, - DUMP_NAME_LEN * 2), - efi_name, &vendor); + schedule_work(&efivar_work); *id = part; return ret; @@ -1200,6 +1197,104 @@ EXPORT_SYMBOL_GPL(register_efivars); static struct efivars __efivars; static struct efivar_operations ops; +static void delete_all_stale_sysfs_entries(void) +{ + struct efivars *efivars = &__efivars; + struct efivar_entry *entry, *n, *found; + efi_status_t status; + unsigned long flags; + + while (1) { + found = NULL; + spin_lock_irqsave(&efivars->lock, flags); + list_for_each_entry_safe(entry, n, &efivars->list, list) { + status = get_var_data_locked(efivars, &entry->var); + if (status != EFI_SUCCESS) { + found = entry; + list_del(&entry->list); + break; + } + } + spin_unlock_irqrestore(&efivars->lock, flags); + if (found) + efivar_unregister(entry); + else + break; + } +} + +static bool variable_is_present(efi_char16_t *variable_name, efi_guid_t *vendor) +{ + struct efivar_entry *entry, *n; + struct efivars *efivars = &__efivars; + unsigned long strsize1, strsize2; + bool found = false; + + strsize1 = utf16_strsize(variable_name, 1024); + list_for_each_entry_safe(entry, n, &efivars->list, list) { + strsize2 = utf16_strsize(entry->var.VariableName, 1024); + if (strsize1 == strsize2 && + !memcmp(variable_name, &(entry->var.VariableName), + strsize2) && + !efi_guidcmp(entry->var.VendorGuid, + *vendor)) { + found = true; + break; + } + } + return found; +} + +static void efivar_update_sysfs_entry(struct work_struct *work) +{ + struct efivars *efivars = &__efivars; + efi_guid_t vendor; + efi_char16_t *variable_name; + unsigned long variable_name_size = 1024, flags; + efi_status_t status = EFI_NOT_FOUND; + bool found; + + /* Delete stale sysfs entries */ + delete_all_stale_sysfs_entries(); + + /* Add new sysfs entries */ + while (1) { + variable_name = kzalloc(variable_name_size, GFP_KERNEL); + if (!variable_name) { + pr_err("efivars: Memory allocation failed.\n"); + return; + } + + spin_lock_irqsave(&efivars->lock, flags); + found = false; + while (1) { + variable_name_size = 1024; + st
[PATCH] PM / Freezer: Fix small typo "regrigerator"
Noticed when digging into a suspend issue in linux-next (next-20120821). For more details see <http://marc.info/?t=13455470802&r=1&w=2>. Signed-off-by: Sedat Dilek --- kernel/power/process.c |2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/kernel/power/process.c b/kernel/power/process.c index 19db29f..87da817 100644 --- a/kernel/power/process.c +++ b/kernel/power/process.c @@ -79,7 +79,7 @@ static int try_to_freeze_tasks(bool user_only) /* * We need to retry, but first give the freezing tasks some -* time to enter the regrigerator. +* time to enter the refrigerator. */ msleep(10); } -- 1.7.9.5 -- To unsubscribe from this list: send the line "unsubscribe linux-kernel" in the body of a message to majord...@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html Please read the FAQ at http://www.tux.org/lkml/
Re: [PATCH v9 2/2] kvm: On Ack, De-assert & Notify KVM_IRQFD extension
On Tue, Aug 21, 2012 at 01:29:14PM -0600, Alex Williamson wrote: > For VFIO based device assignment we'd like a mechanism to allow level > triggered interrutps to be directly injected into KVM. KVM_IRQFD > already allows this for edge triggered interrupts, but for level, we > need to watch for acknowledgement of the interrupt from the guest to > provide us a hint when to test the device and allow it to re-assert > if necessary. To do this, we create a new KVM_IRQFD mode called > "On Ack, De-assert & Notify", or OADN. In this mode, an interrupt > injection provides only a gsi assertion. We then hook into the IRQ > ACK notifier, which when triggered de-asserts the gsi and notifies > via another eventfd. It's then the responsibility of the user to > re-assert the interrupt is service is still required. > > Signed-off-by: Alex Williamson Naming aside, looks good. I think I see some minor bugs, and I added some improvement suggestions below. Thanks! > --- > > Documentation/virtual/kvm/api.txt | 13 ++ > arch/x86/kvm/x86.c|1 > include/linux/kvm.h |6 + > include/linux/kvm_host.h |1 > virt/kvm/eventfd.c| 193 > - > 5 files changed, 210 insertions(+), 4 deletions(-) > > diff --git a/Documentation/virtual/kvm/api.txt > b/Documentation/virtual/kvm/api.txt > index bf33aaa..87d7321 100644 > --- a/Documentation/virtual/kvm/api.txt > +++ b/Documentation/virtual/kvm/api.txt > @@ -1946,6 +1946,19 @@ the guest using the specified gsi pin. The irqfd is > removed using > the KVM_IRQFD_FLAG_DEASSIGN flag, specifying both kvm_irqfd.fd > and kvm_irqfd.gsi. > > +With KVM_CAP_IRQFD_OADN, KVM_IRQFD supports an "On Ack, De-assert & > +Notify" option that allows emulation of level-triggered interrupts. > +When kvm_irqfd.fd is triggered, the requested gsi is asserted and > +remains asserted until interaction with the irqchip indicates the > +VM has acknowledged the interrupt, such as an EOI. On acknoledgement > +the gsi is automatically de-asserted and the user is notified via > +kvm_irqfd.notifyfd. The user is then required to re-assert the > +interrupt if the associated device still requires service. To enable > +this mode, configure the KVM_IRQFD using the KVM_IRQFD_FLAG_OADN flag > +and specify kvm_irqfd.notifyfd. Note that closing kvm_irqfd.notifyfd > +while configured in this mode does not disable the irqfd. The > +KVM_IRQFD_FLAG_OADN flag is only necessary on assignment. > + > 4.76 KVM_PPC_ALLOCATE_HTAB > > Capability: KVM_CAP_PPC_ALLOC_HTAB > diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c > index cd98673..fde7b66 100644 > --- a/arch/x86/kvm/x86.c > +++ b/arch/x86/kvm/x86.c > @@ -2175,6 +2175,7 @@ int kvm_dev_ioctl_check_extension(long ext) > case KVM_CAP_PCI_2_3: > case KVM_CAP_KVMCLOCK_CTRL: > case KVM_CAP_IRQFD_IRQ_SOURCE_ID: > + case KVM_CAP_IRQFD_OADN: > r = 1; > break; > case KVM_CAP_COALESCED_MMIO: > diff --git a/include/linux/kvm.h b/include/linux/kvm.h > index ae66b9c..ec0f1d8 100644 > --- a/include/linux/kvm.h > +++ b/include/linux/kvm.h > @@ -619,6 +619,7 @@ struct kvm_ppc_smmu_info { > #define KVM_CAP_S390_COW 79 > #define KVM_CAP_PPC_ALLOC_HTAB 80 > #define KVM_CAP_IRQFD_IRQ_SOURCE_ID 81 > +#define KVM_CAP_IRQFD_OADN 82 > > #ifdef KVM_CAP_IRQ_ROUTING > > @@ -684,12 +685,15 @@ struct kvm_xen_hvm_config { > #endif > > #define KVM_IRQFD_FLAG_DEASSIGN (1 << 0) > +/* Availabie with KVM_CAP_IRQFD_OADN */ Need to also explain what it is. > +#define KVM_IRQFD_FLAG_OADN (1 << 1) > > struct kvm_irqfd { > __u32 fd; > __u32 gsi; > __u32 flags; > - __u8 pad[20]; > + __u32 notifyfd; Document that this is only valid with OADN flag. Might be a good idea to rename this to deassert_on_ack_notifyfd or oadn_notifyfd to avoid confusion. > + __u8 pad[16]; > }; > > struct kvm_clock_data { > diff --git a/include/linux/kvm_host.h b/include/linux/kvm_host.h > index b763230..d502d08 100644 > --- a/include/linux/kvm_host.h > +++ b/include/linux/kvm_host.h > @@ -284,6 +284,7 @@ struct kvm { > struct { > spinlock_tlock; > struct list_head items; > + struct list_head oadns; > } irqfds; > struct list_head ioeventfds; > #endif > diff --git a/virt/kvm/eventfd.c b/virt/kvm/eventfd.c > index 2245cfa..dfdb5b2 100644 > --- a/virt/kvm/eventfd.c > +++ b/virt/kvm/eventfd.c > @@ -43,6 +43,23 @@ > * > */ > > +/* > + * OADN irqfds (On Ack, De-assert & Notify) are a special variety of > + * irqfds that assert an interrupt to the irqchip on eventfd trigger, > + * receieve notification when userspace acknowledges the interrupt, > + * automatically de-asserts the irqchip level, and notifies userspace > + * via the oadn_eventfd. This object helps to provide one-to-m
[RFC][PATCH v4 3/3] efi_pstore: Skiping scheduling a workqueue in cases other than oops
[Problem] efi_pstore creates sysfs files when logging kernel messages to NVRAM. Currently, the sysfs files are updated in a workqueue which is registered in a write callback. On the other hand, situations which users needs the sysfs files are when they erase entries or oops happen because a system will be down and users can't access to sysfs files in other cases like panic, reboot or emergency_restart. Also, if kernel panics due to a bug of workqueue operations and a write callback of efi_pstore is called in panic case, efi_pstore may fail due to a failure of schedule_work(). And panic_notifier_chain()/emergency_restart() is not kicked if efi_pstore fails. This may cause user's unwanted result. [Patch Description] This patch registers a workqueue updating sysfs entries in cases where users erase entries or oops happen only, and skips it in other cases like panic, reboot or emergency_start. Signed-off-by: Seiji Aguchi --- drivers/firmware/efivars.c |7 ++- 1 files changed, 6 insertions(+), 1 deletions(-) diff --git a/drivers/firmware/efivars.c b/drivers/firmware/efivars.c index cd16ea6..d5911fd 100644 --- a/drivers/firmware/efivars.c +++ b/drivers/firmware/efivars.c @@ -746,7 +746,12 @@ static int efi_pstore_write(enum pstore_type_id type, spin_unlock_irqrestore(&efivars->lock, flags); - schedule_work(&efivar_work); + /* +* The user may want to update sysfs for this write +* when they erase an entry via /dev/pstore or oops happen. +*/ + if (!size || reason == KMSG_DUMP_OOPS) + schedule_work(&efivar_work); *id = part; return ret; -- 1.7.1 -- To unsubscribe from this list: send the line "unsubscribe linux-kernel" in the body of a message to majord...@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html Please read the FAQ at http://www.tux.org/lkml/
[PATCH v2 3/5] X86/XEN: Allow setup function x86_init.paging.pagetable_init to setup kernel pagetables
Currently, x86_init.paging.pagetable_init relies on callers to setup the kernel pagetable. In order to unify the functionality of x86_init.paging.pagetable_setup_start and x86_init.paging.pagetable_setup_done allow the new setup function to perform the operation itself. Signed-off-by: Attilio Rao --- arch/x86/include/asm/pgtable_types.h |2 +- arch/x86/kernel/setup.c |1 - arch/x86/kernel/x86_init.c |1 - arch/x86/mm/init_32.c|1 + arch/x86/xen/mmu.c |1 + 5 files changed, 3 insertions(+), 3 deletions(-) diff --git a/arch/x86/include/asm/pgtable_types.h b/arch/x86/include/asm/pgtable_types.h index 0c01e07..c93cb8e 100644 --- a/arch/x86/include/asm/pgtable_types.h +++ b/arch/x86/include/asm/pgtable_types.h @@ -306,7 +306,7 @@ extern void native_pagetable_reserve(u64 start, u64 end); extern void native_pagetable_init(void); extern void native_pagetable_setup_done(pgd_t *base); #else -#define native_pagetable_initx86_init_pgd_init_noop +#define native_pagetable_initpaging_init #define native_pagetable_setup_done x86_init_pgd_done_noop #endif diff --git a/arch/x86/kernel/setup.c b/arch/x86/kernel/setup.c index 61b7d98..315fd24 100644 --- a/arch/x86/kernel/setup.c +++ b/arch/x86/kernel/setup.c @@ -962,7 +962,6 @@ void __init setup_arch(char **cmdline_p) #endif x86_init.paging.pagetable_init(); - paging_init(); x86_init.paging.pagetable_setup_done(swapper_pg_dir); if (boot_cpu_data.cpuid_level >= 0) { diff --git a/arch/x86/kernel/x86_init.c b/arch/x86/kernel/x86_init.c index 0e1e950..5f2478f 100644 --- a/arch/x86/kernel/x86_init.c +++ b/arch/x86/kernel/x86_init.c @@ -26,7 +26,6 @@ void __cpuinit x86_init_noop(void) { } void __init x86_init_uint_noop(unsigned int unused) { } -void __init x86_init_pgd_init_noop(void) { } void __init x86_init_pgd_done_noop(pgd_t *unused) { } int __init iommu_init_noop(void) { return 0; } void iommu_shutdown_noop(void) { } diff --git a/arch/x86/mm/init_32.c b/arch/x86/mm/init_32.c index 0e38e0e..e35b4b1 100644 --- a/arch/x86/mm/init_32.c +++ b/arch/x86/mm/init_32.c @@ -475,6 +475,7 @@ void __init native_pagetable_init(void) pte_clear(NULL, va, pte); } paravirt_alloc_pmd(&init_mm, __pa(base) >> PAGE_SHIFT); + paging_init(); } void __init native_pagetable_setup_done(pgd_t *base) diff --git a/arch/x86/xen/mmu.c b/arch/x86/xen/mmu.c index ff1af97..4f47b87 100644 --- a/arch/x86/xen/mmu.c +++ b/arch/x86/xen/mmu.c @@ -1176,6 +1176,7 @@ static void xen_exit_mmap(struct mm_struct *mm) static void __init xen_pagetable_init(void) { + paging_init(); } static __init void xen_mapping_pagetable_reserve(u64 start, u64 end) -- 1.7.2.5 -- To unsubscribe from this list: send the line "unsubscribe linux-kernel" in the body of a message to majord...@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html Please read the FAQ at http://www.tux.org/lkml/
[PATCH v2 4/5] X86/XEN: Move content of xen_pagetable_setup_done() into xen_pagetable_init() and retire now unused x86_init.paging.pagetable_setup_done
At this stage x86_init.paging.pagetable_setup_done is only used in the XEN case. Move its content in the x86_init.paging.pagetable_init setup function and remove the now unused x86_init.paging.pagetable_setup_done remaining infrastructure. Signed-off-by: Attilio Rao --- arch/x86/include/asm/pgtable_types.h |2 -- arch/x86/include/asm/x86_init.h |2 -- arch/x86/kernel/setup.c |1 - arch/x86/kernel/x86_init.c |2 -- arch/x86/mm/init_32.c|4 arch/x86/xen/mmu.c | 13 - 6 files changed, 4 insertions(+), 20 deletions(-) diff --git a/arch/x86/include/asm/pgtable_types.h b/arch/x86/include/asm/pgtable_types.h index c93cb8e..db8fec6 100644 --- a/arch/x86/include/asm/pgtable_types.h +++ b/arch/x86/include/asm/pgtable_types.h @@ -304,10 +304,8 @@ void set_pte_vaddr(unsigned long vaddr, pte_t pte); extern void native_pagetable_reserve(u64 start, u64 end); #ifdef CONFIG_X86_32 extern void native_pagetable_init(void); -extern void native_pagetable_setup_done(pgd_t *base); #else #define native_pagetable_initpaging_init -#define native_pagetable_setup_done x86_init_pgd_done_noop #endif struct seq_file; diff --git a/arch/x86/include/asm/x86_init.h b/arch/x86/include/asm/x86_init.h index 24084b2..995ea5c 100644 --- a/arch/x86/include/asm/x86_init.h +++ b/arch/x86/include/asm/x86_init.h @@ -82,11 +82,9 @@ struct x86_init_mapping { /** * struct x86_init_paging - platform specific paging functions * @pagetable_init:platform specific paging initialization call - * @pagetable_setup_done: platform specific post paging_init() call */ struct x86_init_paging { void (*pagetable_init)(void); - void (*pagetable_setup_done)(pgd_t *base); }; /** diff --git a/arch/x86/kernel/setup.c b/arch/x86/kernel/setup.c index 315fd24..4f16547 100644 --- a/arch/x86/kernel/setup.c +++ b/arch/x86/kernel/setup.c @@ -962,7 +962,6 @@ void __init setup_arch(char **cmdline_p) #endif x86_init.paging.pagetable_init(); - x86_init.paging.pagetable_setup_done(swapper_pg_dir); if (boot_cpu_data.cpuid_level >= 0) { /* A CPU has %cr4 if and only if it has CPUID */ diff --git a/arch/x86/kernel/x86_init.c b/arch/x86/kernel/x86_init.c index 5f2478f..7a3d075 100644 --- a/arch/x86/kernel/x86_init.c +++ b/arch/x86/kernel/x86_init.c @@ -26,7 +26,6 @@ void __cpuinit x86_init_noop(void) { } void __init x86_init_uint_noop(unsigned int unused) { } -void __init x86_init_pgd_done_noop(pgd_t *unused) { } int __init iommu_init_noop(void) { return 0; } void iommu_shutdown_noop(void) { } @@ -69,7 +68,6 @@ struct x86_init_ops x86_init __initdata = { .paging = { .pagetable_init = native_pagetable_init, - .pagetable_setup_done = native_pagetable_setup_done, }, .timers = { diff --git a/arch/x86/mm/init_32.c b/arch/x86/mm/init_32.c index e35b4b1..4f04db1 100644 --- a/arch/x86/mm/init_32.c +++ b/arch/x86/mm/init_32.c @@ -478,10 +478,6 @@ void __init native_pagetable_init(void) paging_init(); } -void __init native_pagetable_setup_done(pgd_t *base) -{ -} - /* * Build a proper pagetable for the kernel mappings. Up until this * point, we've been running on some set of pagetables constructed by diff --git a/arch/x86/xen/mmu.c b/arch/x86/xen/mmu.c index 4f47b87..4290d83 100644 --- a/arch/x86/xen/mmu.c +++ b/arch/x86/xen/mmu.c @@ -1174,9 +1174,13 @@ static void xen_exit_mmap(struct mm_struct *mm) spin_unlock(&mm->page_table_lock); } +static void xen_post_allocator_init(void); + static void __init xen_pagetable_init(void) { paging_init(); + xen_setup_shared_info(); + xen_post_allocator_init(); } static __init void xen_mapping_pagetable_reserve(u64 start, u64 end) @@ -1193,14 +1197,6 @@ static __init void xen_mapping_pagetable_reserve(u64 start, u64 end) } } -static void xen_post_allocator_init(void); - -static void __init xen_pagetable_setup_done(pgd_t *base) -{ - xen_setup_shared_info(); - xen_post_allocator_init(); -} - static void xen_write_cr2(unsigned long cr2) { this_cpu_read(xen_vcpu)->arch.cr2 = cr2; @@ -2070,7 +2066,6 @@ void __init xen_init_mmu_ops(void) { x86_init.mapping.pagetable_reserve = xen_mapping_pagetable_reserve; x86_init.paging.pagetable_init = xen_pagetable_init; - x86_init.paging.pagetable_setup_done = xen_pagetable_setup_done; pv_mmu_ops = xen_mmu_ops; memset(dummy_mapping, 0xff, PAGE_SIZE); -- 1.7.2.5 -- To unsubscribe from this list: send the line "unsubscribe linux-kernel" in the body of a message to majord...@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html Please read the FAQ at http://www.tux.org/lkml/
Re: [2/2] spi: Add SPI master controller for OCTEON SOCs.
On 08/21/2012 12:49 PM, Guenter Roeck wrote: On Fri, May 11, 2012 at 08:34:46PM -, David Daney wrote: From: David Daney Add the driver, link it into the kbuild system and provide device tree binding documentation. Signed-off-by: David Daney Acked-by: Grant Likely [ ... ] + +static int __devexit octeon_spi_remove(struct platform_device *pdev) +{ + struct octeon_spi *p = platform_get_drvdata(pdev); + struct spi_master *master = p->my_master; + + spi_unregister_master(master); + I know it is a bit late, but ... In this case, just in time. I am now finally getting back to fixing the issues with this driver, and looking to merging it in the near future. David Daney The call to spi_unregister_master() frees the memory associated with master, ie 'p', and the spi_master_put() below without matching spi_master_get() is unnecessary/wrong. One possible fix would be to use struct spi_master *master = spi_master_get(p->my_master); above. That protects master and p while it is still being used, and makes use of the call to spi_master_put() below. Another option might be to move cvmx_write_csr() ahead of the call to spi_unregister_master() and drop the call to spi_master_put(). Guenter + /* Clear the CSENA* and put everything in a known state. */ + cvmx_write_csr(p->register_base + OCTEON_SPI_CFG, 0); + spi_master_put(master); + return 0; +} + -- To unsubscribe from this list: send the line "unsubscribe linux-kernel" in the body of a message to majord...@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html Please read the FAQ at http://www.tux.org/lkml/
[PATCH v2 2/5] X86/XEN: Rename pagetable_setup_start() setup functions into pagetable_init()
In preparation for unifying the pagetable_setup_start() and pagetable_setup_done() setup functions, rename appropriately all the infrastructure related to pagetable_setup_start(). Signed-off-by: Attilio Rao --- arch/x86/include/asm/pgtable_types.h |4 ++-- arch/x86/include/asm/x86_init.h |4 ++-- arch/x86/kernel/setup.c |2 +- arch/x86/kernel/x86_init.c |4 ++-- arch/x86/mm/init_32.c|4 ++-- arch/x86/xen/mmu.c |4 ++-- 6 files changed, 11 insertions(+), 11 deletions(-) diff --git a/arch/x86/include/asm/pgtable_types.h b/arch/x86/include/asm/pgtable_types.h index e02b875..0c01e07 100644 --- a/arch/x86/include/asm/pgtable_types.h +++ b/arch/x86/include/asm/pgtable_types.h @@ -303,10 +303,10 @@ void set_pte_vaddr(unsigned long vaddr, pte_t pte); extern void native_pagetable_reserve(u64 start, u64 end); #ifdef CONFIG_X86_32 -extern void native_pagetable_setup_start(void); +extern void native_pagetable_init(void); extern void native_pagetable_setup_done(pgd_t *base); #else -#define native_pagetable_setup_start x86_init_pgd_start_noop +#define native_pagetable_initx86_init_pgd_init_noop #define native_pagetable_setup_done x86_init_pgd_done_noop #endif diff --git a/arch/x86/include/asm/x86_init.h b/arch/x86/include/asm/x86_init.h index 782ba0c..24084b2 100644 --- a/arch/x86/include/asm/x86_init.h +++ b/arch/x86/include/asm/x86_init.h @@ -81,11 +81,11 @@ struct x86_init_mapping { /** * struct x86_init_paging - platform specific paging functions - * @pagetable_setup_start: platform specific pre paging_init() call + * @pagetable_init:platform specific paging initialization call * @pagetable_setup_done: platform specific post paging_init() call */ struct x86_init_paging { - void (*pagetable_setup_start)(void); + void (*pagetable_init)(void); void (*pagetable_setup_done)(pgd_t *base); }; diff --git a/arch/x86/kernel/setup.c b/arch/x86/kernel/setup.c index 90cbbe0..61b7d98 100644 --- a/arch/x86/kernel/setup.c +++ b/arch/x86/kernel/setup.c @@ -961,7 +961,7 @@ void __init setup_arch(char **cmdline_p) kvmclock_init(); #endif - x86_init.paging.pagetable_setup_start(); + x86_init.paging.pagetable_init(); paging_init(); x86_init.paging.pagetable_setup_done(swapper_pg_dir); diff --git a/arch/x86/kernel/x86_init.c b/arch/x86/kernel/x86_init.c index 3b88493..0e1e950 100644 --- a/arch/x86/kernel/x86_init.c +++ b/arch/x86/kernel/x86_init.c @@ -26,7 +26,7 @@ void __cpuinit x86_init_noop(void) { } void __init x86_init_uint_noop(unsigned int unused) { } -void __init x86_init_pgd_start_noop(void) { } +void __init x86_init_pgd_init_noop(void) { } void __init x86_init_pgd_done_noop(pgd_t *unused) { } int __init iommu_init_noop(void) { return 0; } void iommu_shutdown_noop(void) { } @@ -69,7 +69,7 @@ struct x86_init_ops x86_init __initdata = { }, .paging = { - .pagetable_setup_start = native_pagetable_setup_start, + .pagetable_init = native_pagetable_init, .pagetable_setup_done = native_pagetable_setup_done, }, diff --git a/arch/x86/mm/init_32.c b/arch/x86/mm/init_32.c index c4aa1b2..0e38e0e 100644 --- a/arch/x86/mm/init_32.c +++ b/arch/x86/mm/init_32.c @@ -445,7 +445,7 @@ static inline void permanent_kmaps_init(pgd_t *pgd_base) } #endif /* CONFIG_HIGHMEM */ -void __init native_pagetable_setup_start(void) +void __init native_pagetable_init(void) { unsigned long pfn, va; pgd_t *pgd, *base = swapper_pg_dir; @@ -493,7 +493,7 @@ void __init native_pagetable_setup_done(pgd_t *base) * If we're booting paravirtualized under a hypervisor, then there are * more options: we may already be running PAE, and the pagetable may * or may not be based in swapper_pg_dir. In any case, - * paravirt_pagetable_setup_start() will set up swapper_pg_dir + * paravirt_pagetable_init() will set up swapper_pg_dir * appropriately for the rest of the initialization to work. * * In general, pagetable_init() assumes that the pagetable may already diff --git a/arch/x86/xen/mmu.c b/arch/x86/xen/mmu.c index d89ea5c..ff1af97 100644 --- a/arch/x86/xen/mmu.c +++ b/arch/x86/xen/mmu.c @@ -1174,7 +1174,7 @@ static void xen_exit_mmap(struct mm_struct *mm) spin_unlock(&mm->page_table_lock); } -static void __init xen_pagetable_setup_start(void) +static void __init xen_pagetable_init(void) { } @@ -2068,7 +2068,7 @@ static const struct pv_mmu_ops xen_mmu_ops __initconst = { void __init xen_init_mmu_ops(void) { x86_init.mapping.pagetable_reserve = xen_mapping_pagetable_reserve; - x86_init.paging.pagetable_setup_start = xen_pagetable_setup_start; + x86_init.paging.pagetable_init = xen_pagetable_init; x86_init.paging.pagetable_setup_done = xen_pagetable_setup_done; pv_mmu_ops = xen_mmu_ops; -- 1.7.2.5 -- To
[PATCH v2 1/5] X86/XEN: Remove the base argument from x86_init.paging.pagetable_setup_start
x86_init.paging.pagetable_setup_start for native will however use swapper_pg_dir in the single place where it is used and for native the argument is simply unused. Aditionally, the comments already point to swapper_pg_dir as the sole base touched. Finally, this will help with further merging of x86_init.paging.pagetable_setup_start with x86_init.paging.pagetable_setup_done. Signed-off-by: Attilio Rao --- arch/x86/include/asm/pgtable_types.h |6 +++--- arch/x86/include/asm/x86_init.h |2 +- arch/x86/kernel/setup.c |2 +- arch/x86/kernel/x86_init.c |3 ++- arch/x86/mm/init_32.c|4 ++-- arch/x86/xen/mmu.c |2 +- 6 files changed, 10 insertions(+), 9 deletions(-) diff --git a/arch/x86/include/asm/pgtable_types.h b/arch/x86/include/asm/pgtable_types.h index 013286a..e02b875 100644 --- a/arch/x86/include/asm/pgtable_types.h +++ b/arch/x86/include/asm/pgtable_types.h @@ -303,11 +303,11 @@ void set_pte_vaddr(unsigned long vaddr, pte_t pte); extern void native_pagetable_reserve(u64 start, u64 end); #ifdef CONFIG_X86_32 -extern void native_pagetable_setup_start(pgd_t *base); +extern void native_pagetable_setup_start(void); extern void native_pagetable_setup_done(pgd_t *base); #else -#define native_pagetable_setup_start x86_init_pgd_noop -#define native_pagetable_setup_done x86_init_pgd_noop +#define native_pagetable_setup_start x86_init_pgd_start_noop +#define native_pagetable_setup_done x86_init_pgd_done_noop #endif struct seq_file; diff --git a/arch/x86/include/asm/x86_init.h b/arch/x86/include/asm/x86_init.h index 38155f6..782ba0c 100644 --- a/arch/x86/include/asm/x86_init.h +++ b/arch/x86/include/asm/x86_init.h @@ -85,7 +85,7 @@ struct x86_init_mapping { * @pagetable_setup_done: platform specific post paging_init() call */ struct x86_init_paging { - void (*pagetable_setup_start)(pgd_t *base); + void (*pagetable_setup_start)(void); void (*pagetable_setup_done)(pgd_t *base); }; diff --git a/arch/x86/kernel/setup.c b/arch/x86/kernel/setup.c index f4b9b80..90cbbe0 100644 --- a/arch/x86/kernel/setup.c +++ b/arch/x86/kernel/setup.c @@ -961,7 +961,7 @@ void __init setup_arch(char **cmdline_p) kvmclock_init(); #endif - x86_init.paging.pagetable_setup_start(swapper_pg_dir); + x86_init.paging.pagetable_setup_start(); paging_init(); x86_init.paging.pagetable_setup_done(swapper_pg_dir); diff --git a/arch/x86/kernel/x86_init.c b/arch/x86/kernel/x86_init.c index 9f3167e..3b88493 100644 --- a/arch/x86/kernel/x86_init.c +++ b/arch/x86/kernel/x86_init.c @@ -26,7 +26,8 @@ void __cpuinit x86_init_noop(void) { } void __init x86_init_uint_noop(unsigned int unused) { } -void __init x86_init_pgd_noop(pgd_t *unused) { } +void __init x86_init_pgd_start_noop(void) { } +void __init x86_init_pgd_done_noop(pgd_t *unused) { } int __init iommu_init_noop(void) { return 0; } void iommu_shutdown_noop(void) { } diff --git a/arch/x86/mm/init_32.c b/arch/x86/mm/init_32.c index 575d86f..c4aa1b2 100644 --- a/arch/x86/mm/init_32.c +++ b/arch/x86/mm/init_32.c @@ -445,10 +445,10 @@ static inline void permanent_kmaps_init(pgd_t *pgd_base) } #endif /* CONFIG_HIGHMEM */ -void __init native_pagetable_setup_start(pgd_t *base) +void __init native_pagetable_setup_start(void) { unsigned long pfn, va; - pgd_t *pgd; + pgd_t *pgd, *base = swapper_pg_dir; pud_t *pud; pmd_t *pmd; pte_t *pte; diff --git a/arch/x86/xen/mmu.c b/arch/x86/xen/mmu.c index b65a761..d89ea5c 100644 --- a/arch/x86/xen/mmu.c +++ b/arch/x86/xen/mmu.c @@ -1174,7 +1174,7 @@ static void xen_exit_mmap(struct mm_struct *mm) spin_unlock(&mm->page_table_lock); } -static void __init xen_pagetable_setup_start(pgd_t *base) +static void __init xen_pagetable_setup_start(void) { } -- 1.7.2.5 -- To unsubscribe from this list: send the line "unsubscribe linux-kernel" in the body of a message to majord...@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html Please read the FAQ at http://www.tux.org/lkml/
Re: [PATCH 3/5] X86/XEN: Introduce the x86_init.paging.pagetable_init PVOPS
On 21/08/12 16:44, Thomas Gleixner wrote: On Tue, 21 Aug 2012, Attilio Rao wrote: This new PVOPS is responsible to setup the kernel pagetables and replace entirely x86_init.paging.pagetable_setup_start and x86_init.paging.pagetable_setup_done PVOPS work. For performance the x86_64 stub is implemented as a macro to paging_init() rather than an actual function stub. Huch, using a macro for an once per boot time call is really a massive performance improvement. It's confusing and wrong. You just use a macro because x86_64 does not need any extra setups aside of paging_init(). diff --git a/arch/x86/kernel/x86_init.c b/arch/x86/kernel/x86_init.c index 849be14..c1e910a 100644 --- a/arch/x86/kernel/x86_init.c +++ b/arch/x86/kernel/x86_init.c @@ -68,6 +68,7 @@ struct x86_init_ops x86_init __initdata = { }, .paging = { + .pagetable_init = native_pagetable_init, I'd prefer to see these patches implemented differently. #1 Remove the base argument from pagetable_setup_start (leave pagetable_setup_done() alone). #2 Rename pagetable_setup_start to pagetable_init, native_pagetable_setup_start to native_pagetable_init and xen_pagetable_setup_start to xen_pagetable_init #3 Instead of copying the whole native_pagetable_setup_start() function and deleting it later, move the paging_init() call from setup.c to native_pagetable_init() and xen_pagetable_init() and define native_pagetable_init as paging_init() for x86_64 #4 Move the code from xen_pagetable_setup_done() into xen_pagetable_init() and remove the now unused pagetable_setup_done(). That's less code shuffling and pointless copying which makes the review way easier. I've followed these steps in a new patch series (integrating suggestions from Konrad and Stefano too). Attilio -- To unsubscribe from this list: send the line "unsubscribe linux-kernel" in the body of a message to majord...@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html Please read the FAQ at http://www.tux.org/lkml/
Re: [PATCH] task_work: add a scheduling point in task_work_run()
On Tue, 2012-08-21 at 15:05 +0200, Eric Dumazet wrote: > From: Eric Dumazet > > It seems commit 4a9d4b02 (switch fput to task_work_add) reintroduced > the problem addressed in commit 944be0b2 (close_files(): add scheduling > point) > > If a server process with a lot of files (say 2 million tcp sockets) > is killed, we can spend a lot of time in task_work_run() and trigger > a soft lockup. > > Signed-off-by: Eric Dumazet > --- > kernel/task_work.c |1 + > 1 file changed, 1 insertion(+) > > diff --git a/kernel/task_work.c b/kernel/task_work.c > index 91d4e17..d320d44 100644 > --- a/kernel/task_work.c > +++ b/kernel/task_work.c > @@ -75,6 +75,7 @@ void task_work_run(void) > p = q->next; > q->func(q); > q = p; > + cond_resched(); > } > } > } We're here, because fput() called schedule_work() to delay the last fput(). The execution needs to take place before the syscall returns to userspace. Need to read __schedule()... Do you know if cond_resched() can guarantee that it will be executed before the return to userspace? thanks, Mimi -- To unsubscribe from this list: send the line "unsubscribe linux-kernel" in the body of a message to majord...@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html Please read the FAQ at http://www.tux.org/lkml/
Re: [PATCH v9 1/2] kvm: Use a reserved IRQ source ID for irqfd
On Tue, Aug 21, 2012 at 02:06:19PM -0600, Alex Williamson wrote: > On Tue, 2012-08-21 at 22:58 +0300, Michael S. Tsirkin wrote: > > On Tue, Aug 21, 2012 at 01:29:06PM -0600, Alex Williamson wrote: > > > KVM_IRQFD currently uses the reserved KVM_USERSPACE_IRQ_SOURCE_ID > > > which is also shared with userspace injection methods like > > > KVM_IRQ_LINE. This can cause a conflict if an irqfd triggers on > > > a GSI asserted through KVM_IRQ_LINE. > > > > What kind of conflict do you envision? Pls note level interrupts are > > unsupported ATM. > > If KVM_IRQ_LINE asserts a level interrupt and KVM_IRQFD triggers on the > same GSI then the pin is no longer asserted as userspace thinks it is. > Do we just chalk this up to userspace error? Yes: using a level GSI with current irqfd is a userspace error because you can lose interrupts anyway. Are edge GSIs affected? > > > Move irqfd to it's own reserved IRQ source ID. Add a capability for > > > userspace to test for this fix. > > > > > > Signed-off-by: Alex Williamson > > > --- > > > > > > arch/x86/kvm/x86.c |3 +++ > > > include/linux/kvm.h |1 + > > > include/linux/kvm_host.h |1 + > > > virt/kvm/eventfd.c |6 +++--- > > > 4 files changed, 8 insertions(+), 3 deletions(-) > > > > > > diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c > > > index 42bce48..cd98673 100644 > > > --- a/arch/x86/kvm/x86.c > > > +++ b/arch/x86/kvm/x86.c > > > @@ -2174,6 +2174,7 @@ int kvm_dev_ioctl_check_extension(long ext) > > > case KVM_CAP_GET_TSC_KHZ: > > > case KVM_CAP_PCI_2_3: > > > case KVM_CAP_KVMCLOCK_CTRL: > > > + case KVM_CAP_IRQFD_IRQ_SOURCE_ID: > > > r = 1; > > > break; > > > case KVM_CAP_COALESCED_MMIO: > > > @@ -6258,6 +6259,8 @@ int kvm_arch_init_vm(struct kvm *kvm, unsigned long > > > type) > > > > > > /* Reserve bit 0 of irq_sources_bitmap for userspace irq source */ > > > set_bit(KVM_USERSPACE_IRQ_SOURCE_ID, &kvm->arch.irq_sources_bitmap); > > > + /* Reserve bit 1 of irq_sources_bitmap for irqfd irq source */ > > > + set_bit(KVM_IRQFD_IRQ_SOURCE_ID, &kvm->arch.irq_sources_bitmap); > > > > > > raw_spin_lock_init(&kvm->arch.tsc_write_lock); > > > > > > diff --git a/include/linux/kvm.h b/include/linux/kvm.h > > > index 2ce09aa..ae66b9c 100644 > > > --- a/include/linux/kvm.h > > > +++ b/include/linux/kvm.h > > > @@ -618,6 +618,7 @@ struct kvm_ppc_smmu_info { > > > #define KVM_CAP_PPC_GET_SMMU_INFO 78 > > > #define KVM_CAP_S390_COW 79 > > > #define KVM_CAP_PPC_ALLOC_HTAB 80 > > > +#define KVM_CAP_IRQFD_IRQ_SOURCE_ID 81 > > > > > > #ifdef KVM_CAP_IRQ_ROUTING > > > > > > diff --git a/include/linux/kvm_host.h b/include/linux/kvm_host.h > > > index b70b48b..b763230 100644 > > > --- a/include/linux/kvm_host.h > > > +++ b/include/linux/kvm_host.h > > > @@ -71,6 +71,7 @@ > > > #define KVM_REQ_PMI 17 > > > > > > #define KVM_USERSPACE_IRQ_SOURCE_ID 0 > > > +#define KVM_IRQFD_IRQ_SOURCE_ID 1 > > > > > > struct kvm; > > > struct kvm_vcpu; > > > > Above looks fine but I'm not sure why is the below needed. > > This changes irqfd behaviour for edge GSIs slightly > > in a userspace-visible way. Maybe make it a separate patch > > so it can be considered on merits? > > Hmm, the above does nothing without the below. Yes. But you can use the above with the new irqfds you are adding. > I thought I was just > implementing your idea that IRQFDs should all share a single IRQ source > ID... Sorry I only meant for level irqfds. You are changing edge here. > why is that no longer a good idea? Thanks, > > Alex Maybe it is a good idea. I am just asking for the motivation. > > > diff --git a/virt/kvm/eventfd.c b/virt/kvm/eventfd.c > > > index 7d7e2aa..2245cfa 100644 > > > --- a/virt/kvm/eventfd.c > > > +++ b/virt/kvm/eventfd.c > > > @@ -67,8 +67,8 @@ irqfd_inject(struct work_struct *work) > > > struct _irqfd *irqfd = container_of(work, struct _irqfd, inject); > > > struct kvm *kvm = irqfd->kvm; > > > > > > - kvm_set_irq(kvm, KVM_USERSPACE_IRQ_SOURCE_ID, irqfd->gsi, 1); > > > - kvm_set_irq(kvm, KVM_USERSPACE_IRQ_SOURCE_ID, irqfd->gsi, 0); > > > + kvm_set_irq(kvm, KVM_IRQFD_IRQ_SOURCE_ID, irqfd->gsi, 1); > > > + kvm_set_irq(kvm, KVM_IRQFD_IRQ_SOURCE_ID, irqfd->gsi, 0); > > > } > > > > > > /* > > > @@ -138,7 +138,7 @@ irqfd_wakeup(wait_queue_t *wait, unsigned mode, int > > > sync, void *key) > > > irq = rcu_dereference(irqfd->irq_entry); > > > /* An event has been signaled, inject an interrupt */ > > > if (irq) > > > - kvm_set_msi(irq, kvm, KVM_USERSPACE_IRQ_SOURCE_ID, 1); > > > + kvm_set_msi(irq, kvm, KVM_IRQFD_IRQ_SOURCE_ID, 1); > > > else > > > schedule_work(&irqfd->inject); > > > rcu_read_unlock(); > > -- To unsubscribe from this list: send the line "unsubscribe linux-kernel" in the body of a message to majord...@vger.kernel.org More majordomo info at
[PATCH -next] HID: picoLCD: Add missing #include
m68k/allmodconfig: drivers/hid/hid-picolcd_debugfs.c: In function ‘picolcd_debug_reset_write’: drivers/hid/hid-picolcd_debugfs.c:54: error: implicit declaration of function ‘copy_from_user’ drivers/hid/hid-picolcd_debugfs.c: In function ‘picolcd_debug_eeprom_read’: drivers/hid/hid-picolcd_debugfs.c:112: error: implicit declaration of function ‘copy_to_user’ Signed-off-by: Geert Uytterhoeven --- http://kisskb.ellerman.id.au/kisskb/buildresult/6990818/ drivers/hid/hid-picolcd_debugfs.c |1 + 1 files changed, 1 insertions(+), 0 deletions(-) diff --git a/drivers/hid/hid-picolcd_debugfs.c b/drivers/hid/hid-picolcd_debugfs.c index f2491fa..15c22f2 100644 --- a/drivers/hid/hid-picolcd_debugfs.c +++ b/drivers/hid/hid-picolcd_debugfs.c @@ -27,6 +27,7 @@ #include #include +#include #include "hid-picolcd.h" -- 1.7.0.4 -- To unsubscribe from this list: send the line "unsubscribe linux-kernel" in the body of a message to majord...@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html Please read the FAQ at http://www.tux.org/lkml/
Re: [PATCH v8 1/5] mm: introduce a common interface for balloon pages mobility
On Tue, Aug 21, 2012 at 10:30:31PM +0300, Michael S. Tsirkin wrote: > On Tue, Aug 21, 2012 at 04:23:58PM -0300, Rafael Aquini wrote: > > On Tue, Aug 21, 2012 at 10:13:30PM +0300, Michael S. Tsirkin wrote: > > > > > > > > I believe rcu_dereference_protected() is what I want/need here, since > > > > this code > > > > is always called for pages which we hold locked (PG_locked bit). > > > > > > It would only help if we locked the page while updating the mapping, > > > as far as I can see we don't. > > > > > > > But we can do it. In fact, by doing it (locking the page) we can easily > > avoid > > the nasty race balloon_isolate_page / leak_balloon, in a much simpler way, > > IMHO. > > Absolutely. Further, we should look hard at whether most RCU uses > in this patchset can be replaced with page lock. > Yeah, In fact, by testing/grabbing the page lock at leak_balloon() even the module unload X migration / putback race seems to fade away, since migration code holds the page locked all the way. And that seems a quite easy task to be accomplished: @@ -169,21 +197,61 @@ static void leak_balloon(struct virtio_balloon *vb, size_t num) /* We can only do one array worth at a time. */ num = min(num, ARRAY_SIZE(vb->pfns)); + mutex_lock(&vb->balloon_lock); for (vb->num_pfns = 0; vb->num_pfns < num; vb->num_pfns += VIRTIO_BALLOON_PAGES_PER_PAGE) { + spin_lock(&vb->pages_lock); + /* +* 'virtballoon_isolatepage()' can drain vb->pages list +* making us to stumble across a _temporarily_ empty list. +* +* Release the spinlock and resume from here in order to +* give page migration a shot to refill vb->pages list. +*/ + if (unlikely(list_empty(&vb->pages))) { + spin_unlock(&vb->pages_lock); + break; + } + page = list_first_entry(&vb->pages, struct page, lru); + + /* +* Grab the page lock to avoid racing against threads isolating +* pages from vb->pages list (it's done under page lock). +* +* Failing to grab the page lock here means this page has been +* selected for isolation already. +*/ + if (!trylock_page(page)) { + spin_unlock(&vb->pages_lock); + break; + } + + clear_balloon_mapping(page); list_del(&page->lru); set_page_pfns(vb->pfns + vb->num_pfns, page); vb->num_pages -= VIRTIO_BALLOON_PAGES_PER_PAGE; + unlock_page(page); + spin_unlock(&vb->pages_lock); } . -- To unsubscribe from this list: send the line "unsubscribe linux-kernel" in the body of a message to majord...@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html Please read the FAQ at http://www.tux.org/lkml/
[PATCHv3 7/9] arm: vt8500: doc: Add device tree bindings for arch-vt8500 devices
Bindings for gpio, interrupt controller, power management controller, timer, realtime clock, serial uart, ehci and uhci controllers and framebuffer controllers used on the arch-vt8500 platform. Framebuffer binding also specifies a 'display' node which is required for determining the lcd panel data. Signed-off-by: Tony Prisk --- Documentation/devicetree/bindings/arm/vt8500.txt | 15 .../bindings/arm/vt8500/via,vt8500-intc.txt| 16 + .../bindings/arm/vt8500/via,vt8500-pmc.txt | 13 .../bindings/arm/vt8500/via,vt8500-timer.txt | 15 Documentation/devicetree/bindings/clock/vt8500.txt | 72 .../devicetree/bindings/gpio/gpio_vt8500.txt | 24 +++ .../devicetree/bindings/rtc/via,vt8500-rtc.txt | 15 .../bindings/tty/serial/via,vt8500-uart.txt| 15 .../devicetree/bindings/usb/platform-uhci.txt | 15 .../devicetree/bindings/usb/via,vt8500-ehci.txt| 15 .../devicetree/bindings/vendor-prefixes.txt|2 + .../devicetree/bindings/video/via,vt8500-fb.txt| 48 + .../devicetree/bindings/video/wm,prizm-ge-rops.txt | 13 .../devicetree/bindings/video/wm,wm8505-fb.txt | 22 ++ 14 files changed, 300 insertions(+) create mode 100644 Documentation/devicetree/bindings/arm/vt8500.txt create mode 100644 Documentation/devicetree/bindings/arm/vt8500/via,vt8500-intc.txt create mode 100644 Documentation/devicetree/bindings/arm/vt8500/via,vt8500-pmc.txt create mode 100644 Documentation/devicetree/bindings/arm/vt8500/via,vt8500-timer.txt create mode 100644 Documentation/devicetree/bindings/clock/vt8500.txt create mode 100644 Documentation/devicetree/bindings/gpio/gpio_vt8500.txt create mode 100644 Documentation/devicetree/bindings/rtc/via,vt8500-rtc.txt create mode 100644 Documentation/devicetree/bindings/tty/serial/via,vt8500-uart.txt create mode 100644 Documentation/devicetree/bindings/usb/platform-uhci.txt create mode 100644 Documentation/devicetree/bindings/usb/via,vt8500-ehci.txt create mode 100644 Documentation/devicetree/bindings/video/via,vt8500-fb.txt create mode 100644 Documentation/devicetree/bindings/video/wm,prizm-ge-rops.txt create mode 100644 Documentation/devicetree/bindings/video/wm,wm8505-fb.txt diff --git a/Documentation/devicetree/bindings/arm/vt8500.txt b/Documentation/devicetree/bindings/arm/vt8500.txt new file mode 100644 index 000..1b3b187 --- /dev/null +++ b/Documentation/devicetree/bindings/arm/vt8500.txt @@ -0,0 +1,15 @@ +VIA/Wondermedia VT8500 Platforms Device Tree Bindings +--- + +Boards with the VIA VT8500 SoC shall have the following properties: +Required root node property: +compatible = "via,vt8500"; + +Boards with the Wondermedia WM8505 SoC shall have the following properties: +Required root node property: +compatible = "wm,wm8505"; + +Boards with the Wondermedia WM8650 SoC shall have the following properties: +Required root node property: +compatible = "wm,wm8650"; + diff --git a/Documentation/devicetree/bindings/arm/vt8500/via,vt8500-intc.txt b/Documentation/devicetree/bindings/arm/vt8500/via,vt8500-intc.txt new file mode 100644 index 000..0a4ce10 --- /dev/null +++ b/Documentation/devicetree/bindings/arm/vt8500/via,vt8500-intc.txt @@ -0,0 +1,16 @@ +VIA/Wondermedia VT8500 Interrupt Controller +- + +Required properties: +- compatible : "via,vt8500-intc" +- reg : Should contain 1 register ranges(address and length) +- #interrupt-cells : should be <1> + +Example: + + intc: interrupt-controller@d814 { + compatible = "via,vt8500-intc"; + interrupt-controller; + reg = <0xd814 0x1>; + #interrupt-cells = <1>; + }; diff --git a/Documentation/devicetree/bindings/arm/vt8500/via,vt8500-pmc.txt b/Documentation/devicetree/bindings/arm/vt8500/via,vt8500-pmc.txt new file mode 100644 index 000..521b9c7 --- /dev/null +++ b/Documentation/devicetree/bindings/arm/vt8500/via,vt8500-pmc.txt @@ -0,0 +1,13 @@ +VIA/Wondermedia VT8500 Power Management Controller +- + +Required properties: +- compatible : "via,vt8500-pmc" +- reg : Should contain 1 register ranges(address and length) + +Example: + + pmc@d813 { + compatible = "via,vt8500-pmc"; + reg = <0xd813 0x1000>; + }; diff --git a/Documentation/devicetree/bindings/arm/vt8500/via,vt8500-timer.txt b/Documentation/devicetree/bindings/arm/vt8500/via,vt8500-timer.txt new file mode 100644 index 000..901c73f --- /dev/null +++ b/Documentation/devicetree/bindings/arm/vt8500/via,vt8500-timer.txt @@ -0,0 +1,15 @@ +VIA/Wondermedia VT8500 Timer +- + +Required properties: +- compatible : "via,vt8500-timer" +- reg : Should contain 1 register ranges(address and
[PATCHv3 8/9] arm: vt8500: gpio: Devicetree support for arch-vt8500
Converted the existing arch-vt8500 gpio to a platform_device. Added support for WM8505 and WM8650 GPIO controllers. Signed-off-by: Tony Prisk --- drivers/gpio/Kconfig |6 + drivers/gpio/Makefile |1 + drivers/gpio/gpio-vt8500.c | 313 3 files changed, 320 insertions(+) create mode 100644 drivers/gpio/gpio-vt8500.c diff --git a/drivers/gpio/Kconfig b/drivers/gpio/Kconfig index 542f0c0..3c8897a 100644 --- a/drivers/gpio/Kconfig +++ b/drivers/gpio/Kconfig @@ -183,6 +183,12 @@ config GPIO_STA2X11 Say yes here to support the STA2x11/ConneXt GPIO device. The GPIO module has 128 GPIO pins with alternate functions. +config GPIO_VT8500 + bool "VIA/Wondermedia SoC GPIO Support" + depends on ARCH_VT8500 + help + Say yes here to support the VT8500/WM8505/WM8650 GPIO controller. + config GPIO_XILINX bool "Xilinx GPIO support" depends on PPC_OF || MICROBLAZE diff --git a/drivers/gpio/Makefile b/drivers/gpio/Makefile index 0f55662..2c014b9 100644 --- a/drivers/gpio/Makefile +++ b/drivers/gpio/Makefile @@ -66,6 +66,7 @@ obj-$(CONFIG_GPIO_TPS65912) += gpio-tps65912.o obj-$(CONFIG_GPIO_TWL4030) += gpio-twl4030.o obj-$(CONFIG_GPIO_UCB1400) += gpio-ucb1400.o obj-$(CONFIG_GPIO_VR41XX) += gpio-vr41xx.o +obj-$(CONFIG_GPIO_VT8500) += gpio-vt8500.o obj-$(CONFIG_GPIO_VX855) += gpio-vx855.o obj-$(CONFIG_GPIO_WM831X) += gpio-wm831x.o obj-$(CONFIG_GPIO_WM8350) += gpio-wm8350.o diff --git a/drivers/gpio/gpio-vt8500.c b/drivers/gpio/gpio-vt8500.c new file mode 100644 index 000..19b12d9 --- /dev/null +++ b/drivers/gpio/gpio-vt8500.c @@ -0,0 +1,313 @@ +/* linux/arch/arm/mach-vt8500/gpio.c + * + * Copyright (C) 2012 Tony Prisk + * Based on gpio.c: + * - Copyright (C) 2010 Alexey Charkov + * + * This software is licensed under the terms of the GNU General Public + * License version 2, as published by the Free Software Foundation, and + * may be copied, distributed, and modified under those terms. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + */ + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +/* + We handle GPIOs by bank, each bank containing up to 32 GPIOs covered + by one set of registers (although not all may be valid). + + Because different SoC's have different register offsets, we pass the + register offsets as data in vt8500_gpio_dt_ids[]. + + A value of NO_REG is used to indicate that this register is not + supported. Only used for ->en at the moment. +*/ + +#define NO_REG 0x + +/* + * struct vt8500_gpio_bank_regoffsets + * @en: offset to enable register of the bank + * @dir: offset to direction register of the bank + * @data_out: offset to the data out register of the bank + * @data_in: offset to the data in register of the bank + * @ngpio: highest valid pin in this bank + */ + +struct vt8500_gpio_bank_regoffsets { + unsigned inten; + unsigned intdir; + unsigned intdata_out; + unsigned intdata_in; + unsigned char ngpio; +}; + +struct vt8500_gpio_data { + unsigned intnum_banks; + struct vt8500_gpio_bank_regoffsets banks[]; +}; + +#define VT8500_BANK(__en, __dir, __out, __in, __ngpio) \ +{ \ + .en = __en, \ + .dir = __dir, \ + .data_out = __out, \ + .data_in = __in,\ + .ngpio = __ngpio, \ +} + +static struct vt8500_gpio_data vt8500_data = { + .num_banks = 7, + .banks = { + VT8500_BANK(0x00, 0x20, 0x40, 0x60, 26), + VT8500_BANK(0x04, 0x24, 0x44, 0x64, 28), + VT8500_BANK(0x08, 0x28, 0x48, 0x68, 31), + VT8500_BANK(0x0C, 0x2C, 0x4C, 0x6C, 19), + VT8500_BANK(0x10, 0x30, 0x50, 0x70, 19), + VT8500_BANK(0x14, 0x34, 0x54, 0x74, 23), + VT8500_BANK(NO_REG, 0x3C, 0x5C, 0x7C, 9), + }, +}; + +static struct vt8500_gpio_data wm8505_data = { + .num_banks = 10, + .banks = { + VT8500_BANK(0x40, 0x68, 0x90, 0xB8, 8), + VT8500_BANK(0x44, 0x6C, 0x94, 0xBC, 32), + VT8500_BANK(0x48, 0x70, 0x98, 0xC0, 6), + VT8500_BANK(0x4C, 0x74, 0x9C, 0xC4, 16), + VT8500_BANK(0x50, 0x78, 0xA0, 0xC8, 25), + VT8500_BANK(0x54, 0x7C, 0xA4, 0xCC, 5), + VT8500_BANK(0x
[PATCHv3 1/9] arm: vt8500: Add device tree files for VIA/Wondermedia SoC's
Add device tree files for VT8500, WM8505 and WM8650 SoC's and reference boards. Signed-off-by: Tony Prisk --- arch/arm/boot/dts/vt8500-bv07.dts | 31 + arch/arm/boot/dts/vt8500.dtsi | 100 +++ arch/arm/boot/dts/wm8505-ref.dts | 31 + arch/arm/boot/dts/wm8505.dtsi | 126 + arch/arm/boot/dts/wm8650-mid.dts | 31 + arch/arm/boot/dts/wm8650.dtsi | 138 + 6 files changed, 457 insertions(+) create mode 100644 arch/arm/boot/dts/vt8500-bv07.dts create mode 100644 arch/arm/boot/dts/vt8500.dtsi create mode 100644 arch/arm/boot/dts/wm8505-ref.dts create mode 100644 arch/arm/boot/dts/wm8505.dtsi create mode 100644 arch/arm/boot/dts/wm8650-mid.dts create mode 100644 arch/arm/boot/dts/wm8650.dtsi diff --git a/arch/arm/boot/dts/vt8500-bv07.dts b/arch/arm/boot/dts/vt8500-bv07.dts new file mode 100644 index 000..339a664 --- /dev/null +++ b/arch/arm/boot/dts/vt8500-bv07.dts @@ -0,0 +1,31 @@ +/* + * vt8500-bv07.dts - Device tree file for Benign BV07 Netbook + * + * Copyright (C) 2012 Tony Prisk + * + * Licensed under GPLv2 or later + */ + +/dts-v1/; +/include/ "vt8500.dtsi" + +/ { + model = "Benign BV07 Netbook"; + + /* +* Display node is based on Sascha Hauer's patch on dri-devel. +* Added a bpp property to calculate the size of the framebuffer +* until the binding is formalized. +*/ + display: display { + xres = <800>; + yres = <480>; + left-margin = <88>; + right-margin = <40>; + hsync-len = <0>; + upper-margin = <32>; + lower-margin = <11>; + vsync-len = <1>; + bpp = <16>; + }; +}; diff --git a/arch/arm/boot/dts/vt8500.dtsi b/arch/arm/boot/dts/vt8500.dtsi new file mode 100644 index 000..78571d5 --- /dev/null +++ b/arch/arm/boot/dts/vt8500.dtsi @@ -0,0 +1,100 @@ +/* + * vt8500.dtsi - Device tree file for VIA VT8500 SoC + * + * Copyright (C) 2012 Tony Prisk + * + * Licensed under GPLv2 or later + */ + +/include/ "skeleton.dtsi" + +/ { + compatible = "via,vt8500"; + + soc { + #address-cells = <1>; + #size-cells = <1>; + compatible = "simple-bus"; + ranges; + interrupt-parent = <&intc>; + + intc: interrupt-controller@d814 { + compatible = "via,vt8500-intc"; + interrupt-controller; + reg = <0xd814 0x1>; + #interrupt-cells = <1>; + }; + + gpio: gpio-controller@d811 { + compatible = "via,vt8500-gpio"; + gpio-controller; + reg = <0xd811 0x1>; + #gpio-cells = <3>; + }; + + pmc@d813 { + compatible = "via,vt8500-pmc"; + reg = <0xd813 0x1000>; + }; + + timer@d8130100 { + compatible = "via,vt8500-timer"; + reg = <0xd8130100 0x28>; + interrupts = <36>; + }; + + ehci@d8007900 { + compatible = "via,vt8500-ehci"; + reg = <0xd8007900 0x200>; + interrupts = <43>; + }; + + uhci@d8007b00 { + compatible = "platform-uhci"; + reg = <0xd8007b00 0x200>; + interrupts = <43>; + }; + + fb@d800e400 { + compatible = "via,vt8500-fb"; + reg = <0xd800e400 0x400>; + interrupts = <12>; + via,display = <&display>; + }; + + ge_rops@d8050400 { + compatible = "wm,prizm-ge-rops"; + reg = <0xd8050400 0x100>; + }; + + uart@d820 { + compatible = "via,vt8500-uart"; + reg = <0xd820 0x1040>; + interrupts = <32>; + }; + + uart@d82b { + compatible = "via,vt8500-uart"; + reg = <0xd82b 0x1040>; + interrupts = <33>; + }; + + uart@d821 { + compatible = "via,vt8500-uart"; + reg = <0xd821 0x1040>; + interrupts = <47>; + }; + + uart@d82c { + compatible = "via,vt8500-uart"; + reg = <0xd82c 0x1040>; + interrupts = <50>; + }; + + rtc@d810 { +
[PATCHv3 2/9] rtc: vt8500: Add devicetree support for vt8500-rtc
Signed-off-by: Tony Prisk --- drivers/rtc/rtc-vt8500.c |9 - 1 file changed, 8 insertions(+), 1 deletion(-) diff --git a/drivers/rtc/rtc-vt8500.c b/drivers/rtc/rtc-vt8500.c index 9e94fb1..07bf193 100644 --- a/drivers/rtc/rtc-vt8500.c +++ b/drivers/rtc/rtc-vt8500.c @@ -23,6 +23,7 @@ #include #include #include +#include /* * Register definitions @@ -302,12 +303,18 @@ static int __devexit vt8500_rtc_remove(struct platform_device *pdev) return 0; } +static const struct of_device_id wmt_dt_ids[] = { + { .compatible = "via,vt8500-rtc", }, + {} +}; + static struct platform_driver vt8500_rtc_driver = { .probe = vt8500_rtc_probe, .remove = __devexit_p(vt8500_rtc_remove), .driver = { .name = "vt8500-rtc", .owner = THIS_MODULE, + .of_match_table = of_match_ptr(wmt_dt_ids), }, }; @@ -315,5 +322,5 @@ module_platform_driver(vt8500_rtc_driver); MODULE_AUTHOR("Alexey Charkov "); MODULE_DESCRIPTION("VIA VT8500 SoC Realtime Clock Driver (RTC)"); -MODULE_LICENSE("GPL"); +MODULE_LICENSE("GPL v2"); MODULE_ALIAS("platform:vt8500-rtc"); -- 1.7.9.5 -- To unsubscribe from this list: send the line "unsubscribe linux-kernel" in the body of a message to majord...@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html Please read the FAQ at http://www.tux.org/lkml/
[PATCHv3 0/9] *** ARM: Update arch-vt8500 to Devicetree ***
This patchset updates arch-vt8500 to devicetree and removes all the old-style code. Support for WM8650 has also been added. Example dts/dtsi files are given for the three currently supported models. Major changes: GPIO code has been converted to a platform_device and rewritten as WM8505 support was broken. Add support for WM8650 gpio controller. UHCI support was missing. Added this as a generic non-pci uhci controller as it doesn't require anything special. Should be usable by any system that doesn't have special requirements to get the UHCI controller working. Framebuffer code patched to support WM8650. The bindings for this are of concern but there doesn't seem to be a formalized binding yet. This patch is based off Sascha Hauer's current patch on the dri-devel mailing list and should be easily patched out when its finalized. Patchset based on Arnd's arm-soc/for-next branch. Could I get this reviewed, hopefully for inclusion into v3.7. Regards Tony Prisk Changes v2: Cleanup style/formatting errors Removed erroneous commit message about GPIO not being converted to devicetree Corrected arch-vt8500/irq.c header to correct filename Changed GPIO driver to use module_platform_driver() Renamed vt8500_gpio_bank_regs -> vt8500_gpio_bank_regoffsets Changed vt8500_gpio_bank_regoffset fields to unsigned int Changed bit-setting code to use BIT() macro Removed of_find_compatible() and use pdev->dev.of_node in _probe() Removed regoff field and related code - leftover from old design Added kerneldoc regarding struct vt8500_gpio_bank_regoffsets fields Update MODULE_LICENSE on all platform devices to "GPL v2" to match their headers Renamed dts board files to clarify product names v3: Corrected serial driver issue after porting to device tree. pdev->id no longer valid. Corrected irq.c to properly initialize slaved interrupt controller. Updated framebuffer drivers to use phandles for display node. Corrected dts definitions for updated framebuffer driver. EHCI/UHCI patch (Patch 4/9) already in -next via usb-next tree. Included common clock frame support. Added initialization code to arch/arm/mach-vt8500/vt8500.c for clocks. Updated wm8650.dtsi to include basic clocks. Tony Prisk (9): arm: vt8500: Add device tree files for VIA/Wondermedia SoC's rtc: vt8500: Add devicetree support for vt8500-rtc serial: vt8500: Add devicetree support for vt8500-serial usb: vt8500: Add devicetree support for vt8500-ehci and -uhci. video: vt8500: Add devicetree support for vt8500-fb and wm8505-fb arm: vt8500: Update arch-vt8500 to devicetree support. arm: vt8500: doc: Add device tree bindings for arch-vt8500 devices arm: vt8500: gpio: Devicetree support for arch-vt8500 arm: vt8500: clk: Add Common Clock Framework support Documentation/devicetree/bindings/arm/vt8500.txt | 15 + .../bindings/arm/vt8500/via,vt8500-intc.txt| 16 + .../bindings/arm/vt8500/via,vt8500-pmc.txt | 13 + .../bindings/arm/vt8500/via,vt8500-timer.txt | 15 + Documentation/devicetree/bindings/clock/vt8500.txt | 72 +++ .../devicetree/bindings/gpio/gpio_vt8500.txt | 24 + .../devicetree/bindings/rtc/via,vt8500-rtc.txt | 15 + .../bindings/tty/serial/via,vt8500-uart.txt| 15 + .../devicetree/bindings/usb/platform-uhci.txt | 15 + .../devicetree/bindings/usb/via,vt8500-ehci.txt| 15 + .../devicetree/bindings/vendor-prefixes.txt|2 + .../devicetree/bindings/video/via,vt8500-fb.txt| 48 ++ .../devicetree/bindings/video/wm,prizm-ge-rops.txt | 13 + .../devicetree/bindings/video/wm,wm8505-fb.txt | 22 + arch/arm/Kconfig |5 + arch/arm/boot/dts/vt8500-bv07.dts | 31 ++ arch/arm/boot/dts/vt8500.dtsi | 100 arch/arm/boot/dts/wm8505-ref.dts | 31 ++ arch/arm/boot/dts/wm8505.dtsi | 126 + arch/arm/boot/dts/wm8650-mid.dts | 31 ++ arch/arm/boot/dts/wm8650.dtsi | 138 ++ arch/arm/mach-vt8500/Kconfig | 72 +-- arch/arm/mach-vt8500/Makefile |9 +- arch/arm/mach-vt8500/bv07.c| 80 arch/arm/mach-vt8500/common.h | 28 ++ arch/arm/mach-vt8500/devices-vt8500.c | 91 arch/arm/mach-vt8500/devices-wm8505.c | 99 arch/arm/mach-vt8500/devices.c | 270 --- arch/arm/mach-vt8500/devices.h | 88 arch/arm/mach-vt8500/gpio.c| 240 -- arch/arm/mach-vt8500/include/mach/restart.h|4 +- arch/arm/mach-vt8500/include/mach/vt8500_irqs.h| 88 arch/arm/mach-vt8500/include/mach/vt8500_regs.h| 79 arch/arm/mach-vt8500/include/mach/wm8505_irqs.h| 115 - arch/arm/mach-vt8500/include/mach/wm8505_regs.h| 78 --- arch/arm/mach-vt8500/irq.c
[PATCHv3 5/9] video: vt8500: Add devicetree support for vt8500-fb and wm8505-fb
Update vt8500-fb, wm8505-fb and wmt-ge-rops to support device tree bindings. Small change in wm8505-fb.c to support WM8650 framebuffer color format. Signed-off-by: Tony Prisk --- drivers/video/Kconfig |6 +-- drivers/video/vt8500lcdfb.c | 79 ++- drivers/video/wm8505fb.c| 97 --- drivers/video/wmt_ge_rops.c |9 +++- 4 files changed, 161 insertions(+), 30 deletions(-) diff --git a/drivers/video/Kconfig b/drivers/video/Kconfig index 0217f74..b66d951 100644 --- a/drivers/video/Kconfig +++ b/drivers/video/Kconfig @@ -1788,7 +1788,7 @@ config FB_AU1200 config FB_VT8500 bool "VT8500 LCD Driver" - depends on (FB = y) && ARM && ARCH_VT8500 && VTWM_VERSION_VT8500 + depends on (FB = y) && ARM && ARCH_VT8500 select FB_WMT_GE_ROPS select FB_SYS_IMAGEBLIT help @@ -1797,11 +1797,11 @@ config FB_VT8500 config FB_WM8505 bool "WM8505 frame buffer support" - depends on (FB = y) && ARM && ARCH_VT8500 && VTWM_VERSION_WM8505 + depends on (FB = y) && ARM && ARCH_VT8500 select FB_WMT_GE_ROPS select FB_SYS_IMAGEBLIT help - This is the framebuffer driver for WonderMedia WM8505 + This is the framebuffer driver for WonderMedia WM8505/WM8650 integrated LCD controller. source "drivers/video/geode/Kconfig" diff --git a/drivers/video/vt8500lcdfb.c b/drivers/video/vt8500lcdfb.c index 2a5fe6e..758e359 100644 --- a/drivers/video/vt8500lcdfb.c +++ b/drivers/video/vt8500lcdfb.c @@ -35,6 +35,13 @@ #include "vt8500lcdfb.h" #include "wmt_ge_rops.h" +#ifdef CONFIG_OF +#include +#include +#include +#endif + + #define to_vt8500lcd_info(__info) container_of(__info, \ struct vt8500lcd_info, fb) @@ -270,15 +277,21 @@ static int __devinit vt8500lcd_probe(struct platform_device *pdev) { struct vt8500lcd_info *fbi; struct resource *res; - struct vt8500fb_platform_data *pdata = pdev->dev.platform_data; void *addr; int irq, ret; + struct fb_videomode of_mode; + struct device_node *np; + u32 bpp; + dma_addr_t fb_mem_phys; + unsigned long fb_mem_len; + void *fb_mem_virt; + ret = -ENOMEM; fbi = NULL; - fbi = kzalloc(sizeof(struct vt8500lcd_info) + sizeof(u32) * 16, - GFP_KERNEL); + fbi = devm_kzalloc(&pdev->dev, sizeof(struct vt8500lcd_info) + + sizeof(u32) * 16, GFP_KERNEL); if (!fbi) { dev_err(&pdev->dev, "Failed to initialize framebuffer device\n"); ret = -ENOMEM; @@ -333,9 +346,45 @@ static int __devinit vt8500lcd_probe(struct platform_device *pdev) goto failed_free_res; } - fbi->fb.fix.smem_start = pdata->video_mem_phys; - fbi->fb.fix.smem_len= pdata->video_mem_len; - fbi->fb.screen_base = pdata->video_mem_virt; + np = of_parse_phandle(pdev->dev.of_node, "via,display", 0); + if (!np) { + pr_err("%s: No display description in Device Tree\n", __func__); + ret = -EINVAL; + goto failed_free_res; + } + + /* +* This code is copied from Sascha Hauer's of_videomode helper +* and can be replaced with a call to the helper once mainlined +*/ + ret = 0; + ret |= of_property_read_u32(np, "xres", &of_mode.xres); + ret |= of_property_read_u32(np, "yres", &of_mode.yres); + ret |= of_property_read_u32(np, "left-margin", &of_mode.left_margin); + ret |= of_property_read_u32(np, "right-margin", &of_mode.right_margin); + ret |= of_property_read_u32(np, "hsync-len", &of_mode.hsync_len); + ret |= of_property_read_u32(np, "upper-margin", &of_mode.upper_margin); + ret |= of_property_read_u32(np, "lower-margin", &of_mode.lower_margin); + ret |= of_property_read_u32(np, "vsync-len", &of_mode.vsync_len); + ret |= of_property_read_u32(np, "bpp", &bpp); + if (ret) { + pr_err("%s: Unable to read display properties\n", __func__); + goto failed_free_res; + } + of_mode.vmode = FB_VMODE_NONINTERLACED; + + /* try allocating the framebuffer */ + fb_mem_len = of_mode.xres * of_mode.yres * 2 * (bpp / 8); + fb_mem_virt = dma_alloc_coherent(&pdev->dev, fb_mem_len, &fb_mem_phys, + GFP_KERNEL); + if (!fb_mem_virt) { + pr_err("%s: Failed to allocate framebuffer\n", __func__); + return -ENOMEM; + }; + + fbi->fb.fix.smem_start = fb_mem_phys; + fbi->fb.fix.smem_len= fb_mem_len; + fbi->fb.screen_base = fb_mem_virt; fbi->palette_size = PAGE_ALIGN(512); fbi->palette_cpu= dma_alloc_coherent(
[PATCHv3 4/9] usb: vt8500: Add devicetree support for vt8500-ehci and -uhci.
Add devicetree support for vt8500-ehci. Convert vt8500-uhci to a generic non-pci platform-uhci with device tree support. Signed-off-by: Tony Prisk --- drivers/usb/host/Kconfig |4 +- drivers/usb/host/ehci-vt8500.c | 25 -- drivers/usb/host/uhci-hcd.c |5 ++ drivers/usb/host/uhci-platform.c | 169 ++ 4 files changed, 195 insertions(+), 8 deletions(-) create mode 100644 drivers/usb/host/uhci-platform.c diff --git a/drivers/usb/host/Kconfig b/drivers/usb/host/Kconfig index dcfaaa9..d7a6b10 100644 --- a/drivers/usb/host/Kconfig +++ b/drivers/usb/host/Kconfig @@ -450,7 +450,7 @@ config USB_OHCI_LITTLE_ENDIAN config USB_UHCI_HCD tristate "UHCI HCD (most Intel and VIA) support" - depends on USB && (PCI || SPARC_LEON) + depends on USB && (PCI || SPARC_LEON || ARCH_VT8500) ---help--- The Universal Host Controller Interface is a standard by Intel for accessing the USB hardware in the PC (which is also called the USB @@ -468,7 +468,7 @@ config USB_UHCI_HCD config USB_UHCI_SUPPORT_NON_PCI_HC bool depends on USB_UHCI_HCD - default y if SPARC_LEON + default y if (SPARC_LEON || ARCH_VT8500) config USB_UHCI_BIG_ENDIAN_MMIO bool diff --git a/drivers/usb/host/ehci-vt8500.c b/drivers/usb/host/ehci-vt8500.c index c1eda73..0e1637b 100644 --- a/drivers/usb/host/ehci-vt8500.c +++ b/drivers/usb/host/ehci-vt8500.c @@ -16,6 +16,7 @@ * */ +#include #include static int ehci_update_device(struct usb_hcd *hcd, struct usb_device *udev) @@ -84,20 +85,23 @@ static const struct hc_driver vt8500_ehci_hc_driver = { .clear_tt_buffer_complete = ehci_clear_tt_buffer_complete, }; +static u64 wmt_ehci_dma_mask = DMA_BIT_MASK(32); + static int vt8500_ehci_drv_probe(struct platform_device *pdev) { struct usb_hcd *hcd; struct ehci_hcd *ehci; struct resource *res; + int irq; int ret; if (usb_disabled()) return -ENODEV; - if (pdev->resource[1].flags != IORESOURCE_IRQ) { - pr_debug("resource[1] is not IORESOURCE_IRQ"); - return -ENOMEM; - } + /* devicetree created devices don't specify a dma mask */ + if (!pdev->dev.dma_mask) + pdev->dev.dma_mask = &wmt_ehci_dma_mask; + hcd = usb_create_hcd(&vt8500_ehci_hc_driver, &pdev->dev, "VT8500"); if (!hcd) return -ENOMEM; @@ -134,8 +138,9 @@ static int vt8500_ehci_drv_probe(struct platform_device *pdev) ehci_reset(ehci); - ret = usb_add_hcd(hcd, pdev->resource[1].start, - IRQF_SHARED); + irq = platform_get_irq(pdev, 0); + + ret = usb_add_hcd(hcd, irq, IRQF_SHARED); if (ret == 0) { platform_set_drvdata(pdev, hcd); return ret; @@ -162,6 +167,11 @@ static int vt8500_ehci_drv_remove(struct platform_device *pdev) return 0; } +static const struct of_device_id vt8500_ehci_ids[] = { + { .compatible = "via,vt8500-ehci", }, + {} +}; + static struct platform_driver vt8500_ehci_driver = { .probe = vt8500_ehci_drv_probe, .remove = vt8500_ehci_drv_remove, @@ -169,7 +179,10 @@ static struct platform_driver vt8500_ehci_driver = { .driver = { .name = "vt8500-ehci", .owner = THIS_MODULE, + .of_match_table = of_match_ptr(vt8500_ehci_ids), } }; MODULE_ALIAS("platform:vt8500-ehci"); +MODULE_LICENSE("GPL v2"); +MODULE_DEVICE_TABLE(of, vt8500_ehci_ids); diff --git a/drivers/usb/host/uhci-hcd.c b/drivers/usb/host/uhci-hcd.c index e4db350..5da5c99 100644 --- a/drivers/usb/host/uhci-hcd.c +++ b/drivers/usb/host/uhci-hcd.c @@ -846,6 +846,11 @@ static const char hcd_name[] = "uhci_hcd"; #define PLATFORM_DRIVERuhci_grlib_driver #endif +#ifdef CONFIG_ARCH_VT8500 +#include "uhci-platform.c" +#define PLATFORM_DRIVERuhci_platform_driver +#endif + #if !defined(PCI_DRIVER) && !defined(PLATFORM_DRIVER) #error "missing bus glue for uhci-hcd" #endif diff --git a/drivers/usb/host/uhci-platform.c b/drivers/usb/host/uhci-platform.c new file mode 100644 index 000..35ca094 --- /dev/null +++ b/drivers/usb/host/uhci-platform.c @@ -0,0 +1,169 @@ +/* + * Generic UHCI HCD (Host Controller Driver) for Platform Devices + * + * Copyright (c) 2011 Tony Prisk + * + * This file is based on uhci-grlib.c + * (C) Copyright 2004-2007 Alan Stern, st...@rowland.harvard.edu + */ + +#include +#include + +static int uhci_platform_init(struct usb_hcd *hcd) +{ + struct uhci_hcd *uhci = hcd_to_uhci(hcd); + + uhci->rh_numports = uhci_count_ports(hcd); + + /* Set up pointers to to generic functions */ + uhci->reset_hc = uhci_generic_reset_hc; + uhci->check_and_reset_hc = uhci_generic_check_and_reset_hc; + + /* No special actions
[PATCHv3 3/9] serial: vt8500: Add devicetree support for vt8500-serial
Signed-off-by: Tony Prisk --- drivers/tty/serial/vt8500_serial.c | 37 1 file changed, 33 insertions(+), 4 deletions(-) diff --git a/drivers/tty/serial/vt8500_serial.c b/drivers/tty/serial/vt8500_serial.c index 2be006f..72e32db 100644 --- a/drivers/tty/serial/vt8500_serial.c +++ b/drivers/tty/serial/vt8500_serial.c @@ -34,6 +34,7 @@ #include #include #include +#include /* * UART Register offsets @@ -76,6 +77,8 @@ #define RX_FIFO_INTS (RXFAF | RXFF | RXOVER | PER | FER | RXTOUT) #define TX_FIFO_INTS (TXFAE | TXFE | TXUDR) +#define VT8500_MAX_PORTS 6 + struct vt8500_port { struct uart_portuart; charname[16]; @@ -83,6 +86,13 @@ struct vt8500_port { unsigned intier; }; +/* + * we use this variable to keep track of which ports + * have been allocated as we can't use pdev->id in + * devicetree + */ +static unsigned long vt8500_ports_in_use; + static inline void vt8500_write(struct uart_port *port, unsigned int val, unsigned int off) { @@ -431,7 +441,7 @@ static int vt8500_verify_port(struct uart_port *port, return 0; } -static struct vt8500_port *vt8500_uart_ports[4]; +static struct vt8500_port *vt8500_uart_ports[VT8500_MAX_PORTS]; static struct uart_driver vt8500_uart_driver; #ifdef CONFIG_SERIAL_VT8500_CONSOLE @@ -549,6 +559,7 @@ static int __devinit vt8500_serial_probe(struct platform_device *pdev) struct vt8500_port *vt8500_port; struct resource *mmres, *irqres; int ret; + int port; mmres = platform_get_resource(pdev, IORESOURCE_MEM, 0); irqres = platform_get_resource(pdev, IORESOURCE_IRQ, 0); @@ -559,13 +570,25 @@ static int __devinit vt8500_serial_probe(struct platform_device *pdev) if (!vt8500_port) return -ENOMEM; + /* calculate the port id */ + port = find_first_zero_bit(&vt8500_ports_in_use, + sizeof(vt8500_ports_in_use)); + if (port > VT8500_MAX_PORTS) + return -ENODEV; + + /* reserve the port id */ + if (test_and_set_bit(port, &vt8500_ports_in_use)) { + /* port already in use - shouldn't really happen */ + return -EBUSY; + } + vt8500_port->uart.type = PORT_VT8500; vt8500_port->uart.iotype = UPIO_MEM; vt8500_port->uart.mapbase = mmres->start; vt8500_port->uart.irq = irqres->start; vt8500_port->uart.fifosize = 16; vt8500_port->uart.ops = &vt8500_uart_pops; - vt8500_port->uart.line = pdev->id; + vt8500_port->uart.line = port; vt8500_port->uart.dev = &pdev->dev; vt8500_port->uart.flags = UPF_IOREMAP | UPF_BOOT_AUTOCONF; vt8500_port->uart.uartclk = 2400; @@ -579,7 +602,7 @@ static int __devinit vt8500_serial_probe(struct platform_device *pdev) goto err; } - vt8500_uart_ports[pdev->id] = vt8500_port; + vt8500_uart_ports[port] = vt8500_port; uart_add_one_port(&vt8500_uart_driver, &vt8500_port->uart); @@ -603,12 +626,18 @@ static int __devexit vt8500_serial_remove(struct platform_device *pdev) return 0; } +static const struct of_device_id wmt_dt_ids[] = { + { .compatible = "via,vt8500-uart", }, + {} +}; + static struct platform_driver vt8500_platform_driver = { .probe = vt8500_serial_probe, .remove = __devexit_p(vt8500_serial_remove), .driver = { .name = "vt8500_serial", .owner = THIS_MODULE, + .of_match_table = of_match_ptr(wmt_dt_ids), }, }; @@ -642,4 +671,4 @@ module_exit(vt8500_serial_exit); MODULE_AUTHOR("Alexey Charkov "); MODULE_DESCRIPTION("Driver for vt8500 serial device"); -MODULE_LICENSE("GPL"); +MODULE_LICENSE("GPL v2"); -- 1.7.9.5 -- To unsubscribe from this list: send the line "unsubscribe linux-kernel" in the body of a message to majord...@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html Please read the FAQ at http://www.tux.org/lkml/
Re: [PATCH 04/11] x86/microcode_core_early.c: Define interfaces for early load ucode
On Tue, Aug 21, 2012 at 01:13:26PM -0700, H. Peter Anvin wrote: > I don't know what Borislav was suggesting with "BIOS overrides", is > that another CPU-specific thing? Not CPU- but rather platform-specific. It is Thomas Renninger's mechanism to override BIOS tables. -- Regards/Gruss, Boris. Advanced Micro Devices GmbH Einsteinring 24, 85609 Dornach GM: Alberto Bozzo Reg: Dornach, Landkreis Muenchen HRB Nr. 43632 WEEE Registernr: 129 19551 -- To unsubscribe from this list: send the line "unsubscribe linux-kernel" in the body of a message to majord...@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html Please read the FAQ at http://www.tux.org/lkml/
[PATCHv3 9/9] arm: vt8500: clk: Add Common Clock Framework support
This patch adds common clock framework support for arch-vt8500. Support for PLL and device clocks on VT8500, WM8505 and WM8650 are included. Signed-off-by: Tony Prisk --- drivers/clk/Makefile |1 + drivers/clk/clk-vt8500.c | 496 ++ 2 files changed, 497 insertions(+) create mode 100644 drivers/clk/clk-vt8500.c diff --git a/drivers/clk/Makefile b/drivers/clk/Makefile index 5869ea3..42fb173 100644 --- a/drivers/clk/Makefile +++ b/drivers/clk/Makefile @@ -10,6 +10,7 @@ obj-$(CONFIG_ARCH_SOCFPGA)+= socfpga/ obj-$(CONFIG_PLAT_SPEAR) += spear/ obj-$(CONFIG_ARCH_U300)+= clk-u300.o obj-$(CONFIG_ARCH_INTEGRATOR) += versatile/ +obj-$(CONFIG_ARCH_VT8500) += clk-vt8500.o # Chip specific obj-$(CONFIG_COMMON_CLK_WM831X) += clk-wm831x.o diff --git a/drivers/clk/clk-vt8500.c b/drivers/clk/clk-vt8500.c new file mode 100644 index 000..524c479 --- /dev/null +++ b/drivers/clk/clk-vt8500.c @@ -0,0 +1,496 @@ +#include +#include +#include +#include +#include +#include + +/* All clocks share the same lock as none can be changed concurrently */ +static DEFINE_SPINLOCK(_lock); + +struct clk_device { + struct clk_hw hw; + void __iomem*div_reg; + unsigned intdiv_mask; + void __iomem*en_reg; + int en_bit; + spinlock_t *lock; +}; + +/* + * Add new PLL_TYPE_x definitions here as required. Use the first known model + * to support the new type as the name. + * Add case statements to vtwm_pll_recalc_rate(), vtwm_pll_round_round() and + * vtwm_pll_set_rate() to handle the new PLL_TYPE_x + */ + +#define PLL_TYPE_VT85000 +#define PLL_TYPE_WM86501 + +struct clk_pll { + struct clk_hw hw; + void __iomem*reg; + spinlock_t *lock; + int type; +}; + +static void __iomem *pmc_base; + +#define to_clk_device(_hw) container_of(_hw, struct clk_device, hw) + + +#define VT8500_PMC_BUSY_MASK 0x18 + +static void vt8500_pmc_wait_busy(void) +{ + while (readl(pmc_base) & VT8500_PMC_BUSY_MASK) + cpu_relax(); +} + +static void vt8500_dclk_endisable(struct clk_hw *hw, int enable) +{ + struct clk_device *cdev = to_clk_device(hw); + u32 en_val; + unsigned long flags = 0; + + spin_lock_irqsave(cdev->lock, flags); + + en_val = readl(cdev->en_reg); + + if (enable) + en_val |= BIT(cdev->en_bit); + else + en_val &= ~BIT(cdev->en_bit); + + writel(en_val, cdev->en_reg); + + spin_unlock_irqrestore(cdev->lock, flags); +} + +static int vt8500_dclk_enable(struct clk_hw *hw) +{ + vt8500_dclk_endisable(hw, 1); + return 0; +} + +static void vt8500_dclk_disable(struct clk_hw *hw) +{ + vt8500_dclk_endisable(hw, 0); +} + +static int vt8500_dclk_is_enabled(struct clk_hw *hw) +{ + struct clk_device *cdev = to_clk_device(hw); + u32 en_val = (readl(cdev->en_reg) & BIT(cdev->en_bit)); + + return en_val ? 1 : 0; +} + +static unsigned long vt8500_dclk_recalc_rate(struct clk_hw *hw, + unsigned long parent_rate) +{ + struct clk_device *cdev = to_clk_device(hw); + u32 div = readl(cdev->div_reg) & cdev->div_mask; + + /* Special case for SDMMC devices */ + if ((cdev->div_mask == 0x3F) && (div & BIT(5))) + div = 64 * (div & 0x1f); + + /* div == 0 is actually the highest divisor */ + if (div == 0) + div = (cdev->div_mask + 1); + + return parent_rate / div; +} + +static long vt8500_dclk_round_rate(struct clk_hw *hw, unsigned long rate, + unsigned long *prate) +{ + u32 divisor = rate / *prate; + + return *prate / divisor; +} + +static int vt8500_dclk_set_rate(struct clk_hw *hw, unsigned long rate, + unsigned long parent_rate) +{ + struct clk_device *cdev = to_clk_device(hw); + u32 divisor = rate / parent_rate; + unsigned long flags = 0; + + if (divisor == cdev->div_mask + 1) + divisor = 0; + + if (divisor > cdev->div_mask) { + pr_err("%s: invalid divisor for clock\n", __func__); + return -EINVAL; + } + + spin_lock_irqsave(cdev->lock, flags); + + vt8500_pmc_wait_busy(); + writel(divisor, cdev->div_reg); + vt8500_pmc_wait_busy(); + + spin_lock_irqsave(cdev->lock, flags); + + return 0; +} + + +static const struct clk_ops vt8500_gated_clk_ops = { + .enable = vt8500_dclk_enable, + .disable = vt8500_dclk_disable, + .is_enabled = vt8500_dclk_is_enabled, +}; + +static const struct clk_ops vt8500_divisor_clk_ops = { + .round_rate = vt8500_dclk_round_rate, + .set_rate = vt8500_dclk_set_rate, + .recalc_rate = vt8500_dclk_recalc_rate, +}; + +static const struct clk_ops vt8500_gated_divisor_clk_ops = {
RE: [PATCH 04/11] x86/microcode_core_early.c: Define interfaces for early load ucode
> -Original Message- > From: Borislav Petkov [mailto:b...@amd64.org] > Sent: Tuesday, August 21, 2012 1:49 PM > To: H. Peter Anvin > Cc: Yu, Fenghua; Henrique de Moraes Holschuh; Ingo Molnar; Thomas > Gleixner; Mallick, Asit K; Tigran Aivazian; Andreas Herrmann; Borislav > Petkov; linux-kernel; x86 > Subject: Re: [PATCH 04/11] x86/microcode_core_early.c: Define > interfaces for early load ucode > > On Tue, Aug 21, 2012 at 01:13:26PM -0700, H. Peter Anvin wrote: > > I don't know what Borislav was suggesting with "BIOS overrides", is > > that another CPU-specific thing? > > Not CPU- but rather platform-specific. It is Thomas Renninger's > mechanism to override BIOS tables. That's ACPI override. I think the ACPI tables could be put in kernel/x86/acpi/. Thanks. -Fenghua -- To unsubscribe from this list: send the line "unsubscribe linux-kernel" in the body of a message to majord...@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html Please read the FAQ at http://www.tux.org/lkml/
Re: [PATCH 04/11] x86/microcode_core_early.c: Define interfaces for early load ucode
On 08/21/2012 01:48 PM, Borislav Petkov wrote: > On Tue, Aug 21, 2012 at 01:13:26PM -0700, H. Peter Anvin wrote: >> I don't know what Borislav was suggesting with "BIOS overrides", is >> that another CPU-specific thing? > > Not CPU- but rather platform-specific. It is Thomas Renninger's > mechanism to override BIOS tables. > s/BIOS/ACPI/... Yes, so really it doesn't have any meaningful reason to live under the CPU vendor. -hpa -- To unsubscribe from this list: send the line "unsubscribe linux-kernel" in the body of a message to majord...@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html Please read the FAQ at http://www.tux.org/lkml/
Re: [PATCH 04/11] x86/microcode_core_early.c: Define interfaces for early load ucode
On 08/21/2012 01:52 PM, Yu, Fenghua wrote: >> -Original Message- >> From: Borislav Petkov [mailto:b...@amd64.org] >> Sent: Tuesday, August 21, 2012 1:49 PM >> To: H. Peter Anvin >> Cc: Yu, Fenghua; Henrique de Moraes Holschuh; Ingo Molnar; Thomas >> Gleixner; Mallick, Asit K; Tigran Aivazian; Andreas Herrmann; Borislav >> Petkov; linux-kernel; x86 >> Subject: Re: [PATCH 04/11] x86/microcode_core_early.c: Define >> interfaces for early load ucode >> >> On Tue, Aug 21, 2012 at 01:13:26PM -0700, H. Peter Anvin wrote: >>> I don't know what Borislav was suggesting with "BIOS overrides", is >>> that another CPU-specific thing? >> >> Not CPU- but rather platform-specific. It is Thomas Renninger's >> mechanism to override BIOS tables. > > That's ACPI override. I think the ACPI tables could be put in > kernel/x86/acpi/. > kernel/acpi... ACPI is not x86-specific. -hpa -- To unsubscribe from this list: send the line "unsubscribe linux-kernel" in the body of a message to majord...@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html Please read the FAQ at http://www.tux.org/lkml/
[RFC] Move kfree outside pde_unload_lock
I am currently tracking a hotlock reported by a customer on a large, 512 cores, system, I am currently running 3.6.0 rc1 but the issue looks like it has been this way for a very long time. The offending lock is proc_dir_entry->pde_unload_lock. In proc_reg_release we are doing a kfree under the spinlock which is ok but it means we are holding the lock longer then required. Scaling improved when I moved kfree out. Also shouldn't the comment on pde_unload_lock also note that pde_openers and pde_unload_completion are both used under the lock? Here is some data from quick test program which just reads from /proc/cpuinfo. Lower is better, as you can see the worst case scenario is improved. baselinemoved kfree tasks read-secread-sec 1 0.0141 0.0141 2 0.0140 0.0140 4 0.0140 0.0141 8 0.0145 0.0145 16 0.0553 0.0548 32 0.1688 0.1622 64 0.5017 0.3856 128 1.7005 0.9710 256 5.2513 2.6519 512 8.0529 6.2976 If the patch looks agreeable I will resend it properly. diff --git a/fs/proc/inode.c b/fs/proc/inode.c index 7ac817b..46016c1 100644 --- a/fs/proc/inode.c +++ b/fs/proc/inode.c @@ -403,9 +403,11 @@ static int proc_reg_release(struct inode *inode, struct file *file) release = pde->proc_fops->release; if (pdeo) { list_del(&pdeo->lh); - kfree(pdeo); } spin_unlock(&pde->pde_unload_lock); + if (pdeo) { + kfree(pdeo); + } if (release) rv = release(inode, file); -- To unsubscribe from this list: send the line "unsubscribe linux-kernel" in the body of a message to majord...@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html Please read the FAQ at http://www.tux.org/lkml/
RE: [PATCH 04/11] x86/microcode_core_early.c: Define interfaces for early load ucode
> -Original Message- > From: H. Peter Anvin [mailto:h...@zytor.com] > Sent: Tuesday, August 21, 2012 1:54 PM > To: Yu, Fenghua > Cc: Borislav Petkov; Henrique de Moraes Holschuh; Ingo Molnar; Thomas > Gleixner; Mallick, Asit K; Tigran Aivazian; Andreas Herrmann; Borislav > Petkov; linux-kernel; x86 > Subject: Re: [PATCH 04/11] x86/microcode_core_early.c: Define > interfaces for early load ucode > > On 08/21/2012 01:52 PM, Yu, Fenghua wrote: > >> -Original Message- > >> From: Borislav Petkov [mailto:b...@amd64.org] > >> Sent: Tuesday, August 21, 2012 1:49 PM > >> To: H. Peter Anvin > >> Cc: Yu, Fenghua; Henrique de Moraes Holschuh; Ingo Molnar; Thomas > >> Gleixner; Mallick, Asit K; Tigran Aivazian; Andreas Herrmann; > Borislav > >> Petkov; linux-kernel; x86 > >> Subject: Re: [PATCH 04/11] x86/microcode_core_early.c: Define > >> interfaces for early load ucode > >> > >> On Tue, Aug 21, 2012 at 01:13:26PM -0700, H. Peter Anvin wrote: > >>> I don't know what Borislav was suggesting with "BIOS overrides", is > >>> that another CPU-specific thing? > >> > >> Not CPU- but rather platform-specific. It is Thomas Renninger's > >> mechanism to override BIOS tables. > > > > That's ACPI override. I think the ACPI tables could be put in > kernel/x86/acpi/. > > > > kernel/acpi... ACPI is not x86-specific. That's right. Thanks. -Fenghua -- To unsubscribe from this list: send the line "unsubscribe linux-kernel" in the body of a message to majord...@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html Please read the FAQ at http://www.tux.org/lkml/
Re: [PATCH v9 2/2] kvm: On Ack, De-assert & Notify KVM_IRQFD extension
On Tue, 2012-08-21 at 23:37 +0300, Michael S. Tsirkin wrote: > On Tue, Aug 21, 2012 at 01:29:14PM -0600, Alex Williamson wrote: > > For VFIO based device assignment we'd like a mechanism to allow level > > triggered interrutps to be directly injected into KVM. KVM_IRQFD > > already allows this for edge triggered interrupts, but for level, we > > need to watch for acknowledgement of the interrupt from the guest to > > provide us a hint when to test the device and allow it to re-assert > > if necessary. To do this, we create a new KVM_IRQFD mode called > > "On Ack, De-assert & Notify", or OADN. In this mode, an interrupt > > injection provides only a gsi assertion. We then hook into the IRQ > > ACK notifier, which when triggered de-asserts the gsi and notifies > > via another eventfd. It's then the responsibility of the user to > > re-assert the interrupt is service is still required. > > > > Signed-off-by: Alex Williamson > > Naming aside, looks good. > I think I see some minor bugs, and I added some improvement > suggestions below. > > Thanks! > > > --- > > > > Documentation/virtual/kvm/api.txt | 13 ++ > > arch/x86/kvm/x86.c|1 > > include/linux/kvm.h |6 + > > include/linux/kvm_host.h |1 > > virt/kvm/eventfd.c| 193 > > - > > 5 files changed, 210 insertions(+), 4 deletions(-) > > > > diff --git a/Documentation/virtual/kvm/api.txt > > b/Documentation/virtual/kvm/api.txt > > index bf33aaa..87d7321 100644 > > --- a/Documentation/virtual/kvm/api.txt > > +++ b/Documentation/virtual/kvm/api.txt > > @@ -1946,6 +1946,19 @@ the guest using the specified gsi pin. The irqfd is > > removed using > > the KVM_IRQFD_FLAG_DEASSIGN flag, specifying both kvm_irqfd.fd > > and kvm_irqfd.gsi. > > > > +With KVM_CAP_IRQFD_OADN, KVM_IRQFD supports an "On Ack, De-assert & > > +Notify" option that allows emulation of level-triggered interrupts. > > +When kvm_irqfd.fd is triggered, the requested gsi is asserted and > > +remains asserted until interaction with the irqchip indicates the > > +VM has acknowledged the interrupt, such as an EOI. On acknoledgement > > +the gsi is automatically de-asserted and the user is notified via > > +kvm_irqfd.notifyfd. The user is then required to re-assert the > > +interrupt if the associated device still requires service. To enable > > +this mode, configure the KVM_IRQFD using the KVM_IRQFD_FLAG_OADN flag > > +and specify kvm_irqfd.notifyfd. Note that closing kvm_irqfd.notifyfd > > +while configured in this mode does not disable the irqfd. The > > +KVM_IRQFD_FLAG_OADN flag is only necessary on assignment. > > + > > 4.76 KVM_PPC_ALLOCATE_HTAB > > > > Capability: KVM_CAP_PPC_ALLOC_HTAB > > diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c > > index cd98673..fde7b66 100644 > > --- a/arch/x86/kvm/x86.c > > +++ b/arch/x86/kvm/x86.c > > @@ -2175,6 +2175,7 @@ int kvm_dev_ioctl_check_extension(long ext) > > case KVM_CAP_PCI_2_3: > > case KVM_CAP_KVMCLOCK_CTRL: > > case KVM_CAP_IRQFD_IRQ_SOURCE_ID: > > + case KVM_CAP_IRQFD_OADN: > > r = 1; > > break; > > case KVM_CAP_COALESCED_MMIO: > > diff --git a/include/linux/kvm.h b/include/linux/kvm.h > > index ae66b9c..ec0f1d8 100644 > > --- a/include/linux/kvm.h > > +++ b/include/linux/kvm.h > > @@ -619,6 +619,7 @@ struct kvm_ppc_smmu_info { > > #define KVM_CAP_S390_COW 79 > > #define KVM_CAP_PPC_ALLOC_HTAB 80 > > #define KVM_CAP_IRQFD_IRQ_SOURCE_ID 81 > > +#define KVM_CAP_IRQFD_OADN 82 > > > > #ifdef KVM_CAP_IRQ_ROUTING > > > > @@ -684,12 +685,15 @@ struct kvm_xen_hvm_config { > > #endif > > > > #define KVM_IRQFD_FLAG_DEASSIGN (1 << 0) > > +/* Availabie with KVM_CAP_IRQFD_OADN */ > > Need to also explain what it is. Beyond Documentation/virtual/kvm/api.txt? I don't see much else getting documented here. Or maybe you mean /* On Ack, De-assert & Notify */ > > +#define KVM_IRQFD_FLAG_OADN (1 << 1) > > > > struct kvm_irqfd { > > __u32 fd; > > __u32 gsi; > > __u32 flags; > > - __u8 pad[20]; > > + __u32 notifyfd; > > Document that this is only valid with OADN flag. Might be a good idea > to rename this to deassert_on_ack_notifyfd or oadn_notifyfd > to avoid confusion. I'll add a /* only valid with KVM_IRQFD_FLAG_OADN */ I can change the name if you prefer, but it seems pretty clear to me how a notifyfd might relate to a "On Ack, De-assert & Notify" irqfd without pulling longer names into userspace. > > + __u8 pad[16]; > > }; > > > > struct kvm_clock_data { > > diff --git a/include/linux/kvm_host.h b/include/linux/kvm_host.h > > index b763230..d502d08 100644 > > --- a/include/linux/kvm_host.h > > +++ b/include/linux/kvm_host.h > > @@ -284,6 +284,7 @@ struct kvm { > > struct { > > spinlock_tlock; > > struct list_head items; > > + struct list_head oadns; > > } irqfds; > > struct list_hea
Re: [PATCH 0/4] fat: fix ESTALE errors
On Tue, Aug 21, 2012 at 8:41 AM, OGAWA Hirofumi wrote: > Namjae Jeon writes: > >> And.. Hi Ogawa. >> I checked other filesystem about unlink - inode issue. but I found >> Ext4 have same issue. >> Although other filesysm is having this issue, Can we think It could be >> only FAT issue ? > > (I assume this issue == orphaned inode issue). > > ext* doesn't have this issue. If ext* made orphaned inode, ext* doesn't > delete inode from inode table until calling iput() from last referencer. > > In FAT case, FAT inode is embedded into dir entry. So, if unlinked inode > (then orphaned inode is detached (fat_detach())), FAT deletes inode (dir > entry) from dir. Could be possible to not delete it? I mean using a special value for this case, mark delete (using 0xe5 as first character) but put for instance creation month to be egal to 15. This entry will be therefore be keep and not overwritten by successive file creation. At least this solve the file deleted issue (not the rename issue unfortunatly) Bastien > OGAWA Hirofumi > -- > To unsubscribe from this list: send the line "unsubscribe linux-kernel" in > the body of a message to majord...@vger.kernel.org > More majordomo info at http://vger.kernel.org/majordomo-info.html > Please read the FAQ at http://www.tux.org/lkml/ -- To unsubscribe from this list: send the line "unsubscribe linux-kernel" in the body of a message to majord...@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html Please read the FAQ at http://www.tux.org/lkml/
Re: [PATCH v9 1/2] kvm: Use a reserved IRQ source ID for irqfd
On Tue, 2012-08-21 at 23:41 +0300, Michael S. Tsirkin wrote: > On Tue, Aug 21, 2012 at 02:06:19PM -0600, Alex Williamson wrote: > > On Tue, 2012-08-21 at 22:58 +0300, Michael S. Tsirkin wrote: > > > On Tue, Aug 21, 2012 at 01:29:06PM -0600, Alex Williamson wrote: > > > > KVM_IRQFD currently uses the reserved KVM_USERSPACE_IRQ_SOURCE_ID > > > > which is also shared with userspace injection methods like > > > > KVM_IRQ_LINE. This can cause a conflict if an irqfd triggers on > > > > a GSI asserted through KVM_IRQ_LINE. > > > > > > What kind of conflict do you envision? Pls note level interrupts are > > > unsupported ATM. > > > > If KVM_IRQ_LINE asserts a level interrupt and KVM_IRQFD triggers on the > > same GSI then the pin is no longer asserted as userspace thinks it is. > > Do we just chalk this up to userspace error? > > Yes: using a level GSI with current irqfd is a userspace error > because you can lose interrupts anyway. > > Are edge GSIs affected? I wouldn't think so. > > > > Move irqfd to it's own reserved IRQ source ID. Add a capability for > > > > userspace to test for this fix. > > > > > > > > Signed-off-by: Alex Williamson > > > > --- > > > > > > > > arch/x86/kvm/x86.c |3 +++ > > > > include/linux/kvm.h |1 + > > > > include/linux/kvm_host.h |1 + > > > > virt/kvm/eventfd.c |6 +++--- > > > > 4 files changed, 8 insertions(+), 3 deletions(-) > > > > > > > > diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c > > > > index 42bce48..cd98673 100644 > > > > --- a/arch/x86/kvm/x86.c > > > > +++ b/arch/x86/kvm/x86.c > > > > @@ -2174,6 +2174,7 @@ int kvm_dev_ioctl_check_extension(long ext) > > > > case KVM_CAP_GET_TSC_KHZ: > > > > case KVM_CAP_PCI_2_3: > > > > case KVM_CAP_KVMCLOCK_CTRL: > > > > + case KVM_CAP_IRQFD_IRQ_SOURCE_ID: > > > > r = 1; > > > > break; > > > > case KVM_CAP_COALESCED_MMIO: > > > > @@ -6258,6 +6259,8 @@ int kvm_arch_init_vm(struct kvm *kvm, unsigned > > > > long type) > > > > > > > > /* Reserve bit 0 of irq_sources_bitmap for userspace irq source > > > > */ > > > > set_bit(KVM_USERSPACE_IRQ_SOURCE_ID, > > > > &kvm->arch.irq_sources_bitmap); > > > > + /* Reserve bit 1 of irq_sources_bitmap for irqfd irq source */ > > > > + set_bit(KVM_IRQFD_IRQ_SOURCE_ID, &kvm->arch.irq_sources_bitmap); > > > > > > > > raw_spin_lock_init(&kvm->arch.tsc_write_lock); > > > > > > > > diff --git a/include/linux/kvm.h b/include/linux/kvm.h > > > > index 2ce09aa..ae66b9c 100644 > > > > --- a/include/linux/kvm.h > > > > +++ b/include/linux/kvm.h > > > > @@ -618,6 +618,7 @@ struct kvm_ppc_smmu_info { > > > > #define KVM_CAP_PPC_GET_SMMU_INFO 78 > > > > #define KVM_CAP_S390_COW 79 > > > > #define KVM_CAP_PPC_ALLOC_HTAB 80 > > > > +#define KVM_CAP_IRQFD_IRQ_SOURCE_ID 81 > > > > > > > > #ifdef KVM_CAP_IRQ_ROUTING > > > > > > > > diff --git a/include/linux/kvm_host.h b/include/linux/kvm_host.h > > > > index b70b48b..b763230 100644 > > > > --- a/include/linux/kvm_host.h > > > > +++ b/include/linux/kvm_host.h > > > > @@ -71,6 +71,7 @@ > > > > #define KVM_REQ_PMI 17 > > > > > > > > #define KVM_USERSPACE_IRQ_SOURCE_ID0 > > > > +#define KVM_IRQFD_IRQ_SOURCE_ID1 > > > > > > > > struct kvm; > > > > struct kvm_vcpu; > > > > > > Above looks fine but I'm not sure why is the below needed. > > > This changes irqfd behaviour for edge GSIs slightly > > > in a userspace-visible way. Maybe make it a separate patch > > > so it can be considered on merits? > > > > Hmm, the above does nothing without the below. > > Yes. But you can use the above with the new irqfds you are adding. Nope, racy. > > I thought I was just > > implementing your idea that IRQFDs should all share a single IRQ source > > ID... > > Sorry I only meant for level irqfds. You are changing edge here. Ok, I misunderstood then. > > why is that no longer a good idea? Thanks, > > > > Alex > > Maybe it is a good idea. I am just asking for the motivation. I assumed you were pointing out the level vs edge interaction. If we call that a userspace bug, I can just drop this. Thanks, Alex > > > > diff --git a/virt/kvm/eventfd.c b/virt/kvm/eventfd.c > > > > index 7d7e2aa..2245cfa 100644 > > > > --- a/virt/kvm/eventfd.c > > > > +++ b/virt/kvm/eventfd.c > > > > @@ -67,8 +67,8 @@ irqfd_inject(struct work_struct *work) > > > > struct _irqfd *irqfd = container_of(work, struct _irqfd, > > > > inject); > > > > struct kvm *kvm = irqfd->kvm; > > > > > > > > - kvm_set_irq(kvm, KVM_USERSPACE_IRQ_SOURCE_ID, irqfd->gsi, 1); > > > > - kvm_set_irq(kvm, KVM_USERSPACE_IRQ_SOURCE_ID, irqfd->gsi, 0); > > > > + kvm_set_irq(kvm, KVM_IRQFD_IRQ_SOURCE_ID, irqfd->gsi, 1); > > > > + kvm_set_irq(kvm, KVM_IRQFD_IRQ_SOURCE_ID, irqfd->gsi, 0); > > > > } > > > > > > > > /* > > > > @@ -138,7 +138,7 @@ irqfd_wakeup(wait_queue_t *wai
Re: [PATCH v2 0/5] X86/XEN: Merge x86_init.paging.pagetable_setup_start and x86_init.paging.pagetable_setup_done setup functions and document its semantic
On Tue, 21 Aug 2012, Attilio Rao wrote: > Differences with v1: > - The patch serie is re-arranged in a way that it helps reviews, following > a plan by Thomas Gleixner > - The PVOPS nomenclature is not used as it is not correct > - The front-end message is adjusted with feedback by Thomas Gleixner, > Stefano Stabellini and Konrad Rzeszutek Wilk This is much simpler to read and review. Just have a look at the diffstats of the two series: 6 files changed, 9 insertions(+), 8 deletions(-) 6 files changed, 11 insertions(+), 9 deletions(-) 5 files changed, 50 insertions(+), 2 deletions(-) 6 files changed, 2 insertions(+), 65 deletions(-) 1 files changed, 5 insertions(+), 0 deletions(-) versus 6 files changed, 10 insertions(+), 9 deletions(-) 6 files changed, 11 insertions(+), 11 deletions(-) 5 files changed, 3 insertions(+), 3 deletions(-) 6 files changed, 4 insertions(+), 20 deletions(-) 1 files changed, 5 insertions(+), 0 deletions(-) The overall result is basically the same, but it's way simpler to look at obvious and well done patches than checking whether a subtle copy and paste bug happened in 3/5 of the first version. Copy and paste is the #1 cause for subtle bugs. :) I'm waiting for the ack of Xen folks before taking it into tip. Thanks for following up! tglx -- To unsubscribe from this list: send the line "unsubscribe linux-kernel" in the body of a message to majord...@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html Please read the FAQ at http://www.tux.org/lkml/
[PATCH] strings: helper for maximum decimal encoding of an unsigned integer
From: "J. Bruce Fields" I've seen a couple examples recently where we've gotten this wrong. Maybe something like this would help? Is there some better way? (Approximation due to Jim Rees). Signed-off-by: J. Bruce Fields --- include/linux/string.h |6 ++ net/sunrpc/cache.c |2 +- 2 files changed, 7 insertions(+), 1 deletion(-) diff --git a/include/linux/string.h b/include/linux/string.h index ffe0442..d4809b7 100644 --- a/include/linux/string.h +++ b/include/linux/string.h @@ -126,6 +126,12 @@ extern void argv_free(char **argv); extern bool sysfs_streq(const char *s1, const char *s2); extern int strtobool(const char *s, bool *res); +/* + * length of the decimal representation of an unsigned integer. Just an + * approximation, but it's right for types of size 1 to 36 bytes: + */ +#define base10len(i) (sizeof(i) * 24 / 10 + 1) + #ifdef CONFIG_BINARY_PRINTF int vbin_printf(u32 *bin_buf, size_t size, const char *fmt, va_list args); int bstr_printf(char *buf, size_t size, const char *fmt, const u32 *bin_buf); diff --git a/net/sunrpc/cache.c b/net/sunrpc/cache.c index 2afd2a8..1dcd2b3 100644 --- a/net/sunrpc/cache.c +++ b/net/sunrpc/cache.c @@ -1409,7 +1409,7 @@ static ssize_t read_flush(struct file *file, char __user *buf, size_t count, loff_t *ppos, struct cache_detail *cd) { - char tbuf[20]; + char tbuf[base10len(long) + 2]; unsigned long p = *ppos; size_t len; -- 1.7.9.5 -- To unsubscribe from this list: send the line "unsubscribe linux-kernel" in the body of a message to majord...@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html Please read the FAQ at http://www.tux.org/lkml/
Re: [PATCH] task_work: add a scheduling point in task_work_run()
On Tue, 2012-08-21 at 16:37 -0400, Mimi Zohar wrote: > We're here, because fput() called schedule_work() to delay the last > fput(). The execution needs to take place before the syscall returns to > userspace. Need to read __schedule()... Do you know if cond_resched() > can guarantee that it will be executed before the return to userspace? Some clarifications : - fput() does not call schedule_work() in this case but task_work_add() - cond_resched() wont return to userspace. -- To unsubscribe from this list: send the line "unsubscribe linux-kernel" in the body of a message to majord...@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html Please read the FAQ at http://www.tux.org/lkml/
Re: [PATCH] strings: helper for maximum decimal encoding of an unsigned integer
J. Bruce Fields wrote: From: "J. Bruce Fields" I've seen a couple examples recently where we've gotten this wrong. Maybe something like this would help? Is there some better way? (Approximation due to Jim Rees). Please add Suggested-by: Jim Rees . I'm thinking of patenting the algorithm. -- To unsubscribe from this list: send the line "unsubscribe linux-kernel" in the body of a message to majord...@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html Please read the FAQ at http://www.tux.org/lkml/
Re: [PATCH v2 1/4] pinctrl: add samsung pinctrl and gpiolib driver
On 08/21/2012 05:25 AM, Linus Walleij wrote: > On Wed, Aug 15, 2012 at 9:57 PM, Thomas Abraham > wrote: > >> Add a new device tree enabled pinctrl and gpiolib driver for Samsung >> SoC's. ... >> + The child node can also optionally specify one or more of the pin >> + configuration that should be applied on all the pins listed in the >> + "samsung,pins" property of the child node. The following pin configuration >> + properties are supported. >> + >> + - samsung,pin-pud: Pull up/down configuration. >> + - samsung,pin-drv: Drive strength configuration. >> + - samsung,pin-pud-pdn: Pull up/down configuration in power down mode. >> + - samsung,pin-drv-pdn: Drive strength configuration in power down mode. > > This looks a bit scary, as it seems to be orthogonal to the pin config > interface. I.e. this will be programmed "behind the back" of the > pin config system. However as long as the pin config implementation > reads back these things from the registers it will work, too. > > In the U300 and Ux500 I explicitly use pin config hogs to set up > the pin configuration, and when we enter a state such as > "default" the mux setting and config settings are set from the > framework separately. I know that some HW has a separate set of registers (or fields) for the "awake" and "sleep" configuration, and the HW switches between the two automatically when sleeping. I have no idea if the Samsung SoCs do this, but I think if this were the case, it'd be quite legitimate to define both these HW states as separate sets of properties within a single pinctrl SW state. So, that might be the explanation here? -- To unsubscribe from this list: send the line "unsubscribe linux-kernel" in the body of a message to majord...@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html Please read the FAQ at http://www.tux.org/lkml/
Re: [PATCH v6 3/4] cgroup: add xattr support
Heya, (sorry for the late reply) On 16.08.2012 22:00, Tejun Heo wrote: On Thu, Aug 16, 2012 at 01:44:56PM -0400, a...@redhat.com wrote: Attaching meta information to services, in an easily discoverable way. For example, in systemd we create one cgroup for each service, and could then store data like the main pid of the specific service as an xattr on the cgroup itself. That way we'd have almost all service state in the cgroupfs, which would make it possible to terminate systemd and later restart it without losing any state information. But there's more: for example, some very peculiar services cannot be terminated on shutdown (i.e. fakeraid DM stuff) and it would be really nice if the services in question could just mark that on their cgroup, by setting an xattr. On the more desktopy side of things there are other possibilities: for example there are plans defining what an application is along the lines of a cgroup (i.e. an app being a collection of processes). With xattrs one could then attach an icon or human readable program name on the cgroup. The key idea is that this would allow attaching runtime meta information to cgroups and everything they model (services, apps, vms), that doesn't need any complex userspace infrastructure, has good access control (i.e. because the file system enforces that anyway, and there's the "trusted." xattr namespace), notifications (inotify), and can easily be shared among applications. I'm not against this but unsure whether using kmem is enough for the suggested use case. Lennart, would this suit systemd? How much metadata are we talking about? Just small things, like values, PIDs, i.e. a few 100 bytes or so per cgroup should be more than sufficient for our needs. Lennart -- To unsubscribe from this list: send the line "unsubscribe linux-kernel" in the body of a message to majord...@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html Please read the FAQ at http://www.tux.org/lkml/
Re: [PATCH 0/5] Memory policy corruption fixes V2
> I tested this with trinity with CONFIG_DEBUG_SLAB enabled and it passed. I > did not test LTP such as Josh reported a problem with or with a database that > used shared policies like Andi tested. The series is almost all Kosaki's > work of course. If he has a revised series that simply got delayed in > posting it should take precedence. Initial tests of this patchkit look with a test programgood, full database tests are still pending. -Andi -- a...@linux.intel.com -- Speaking for myself only -- To unsubscribe from this list: send the line "unsubscribe linux-kernel" in the body of a message to majord...@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html Please read the FAQ at http://www.tux.org/lkml/
Re: [PATCH v6 3/4] cgroup: add xattr support
Hello, On Tue, Aug 21, 2012 at 11:43:44PM +0200, Lennart Poettering wrote: > >I'm not against this but unsure whether using kmem is enough for the > >suggested use case. Lennart, would this suit systemd? How much > >metadata are we talking about? > > Just small things, like values, PIDs, i.e. a few 100 bytes or so per > cgroup should be more than sufficient for our needs. Alright, then. I think there's gonna be one more round to address Hugh's comments. Hugh, how should this be routed? Is there some git branch that tmpfs changes can go in so that cgroup tree can pull? Thanks. -- tejun -- To unsubscribe from this list: send the line "unsubscribe linux-kernel" in the body of a message to majord...@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html Please read the FAQ at http://www.tux.org/lkml/
Re: [PATCH v2 06/11] memcg: kmem controller infrastructure
On Thu, Aug 09 2012, Glauber Costa wrote: > This patch introduces infrastructure for tracking kernel memory pages to > a given memcg. This will happen whenever the caller includes the flag > __GFP_KMEMCG flag, and the task belong to a memcg other than the root. > > In memcontrol.h those functions are wrapped in inline accessors. The > idea is to later on, patch those with static branches, so we don't incur > any overhead when no mem cgroups with limited kmem are being used. > > [ v2: improved comments and standardized function names ] > > Signed-off-by: Glauber Costa > CC: Christoph Lameter > CC: Pekka Enberg > CC: Michal Hocko > CC: Kamezawa Hiroyuki > CC: Johannes Weiner > --- > include/linux/memcontrol.h | 79 +++ > mm/memcontrol.c| 185 > + > 2 files changed, 264 insertions(+) > > diff --git a/include/linux/memcontrol.h b/include/linux/memcontrol.h > index 8d9489f..75b247e 100644 > --- a/include/linux/memcontrol.h > +++ b/include/linux/memcontrol.h > @@ -21,6 +21,7 @@ > #define _LINUX_MEMCONTROL_H > #include > #include > +#include > > struct mem_cgroup; > struct page_cgroup; > @@ -399,6 +400,11 @@ struct sock; > #ifdef CONFIG_MEMCG_KMEM > void sock_update_memcg(struct sock *sk); > void sock_release_memcg(struct sock *sk); > + > +#define memcg_kmem_on 1 > +bool __memcg_kmem_new_page(gfp_t gfp, void *handle, int order); > +void __memcg_kmem_commit_page(struct page *page, void *handle, int order); > +void __memcg_kmem_free_page(struct page *page, int order); > #else > static inline void sock_update_memcg(struct sock *sk) > { > @@ -406,6 +412,79 @@ static inline void sock_update_memcg(struct sock *sk) > static inline void sock_release_memcg(struct sock *sk) > { > } > + > +#define memcg_kmem_on 0 > +static inline bool > +__memcg_kmem_new_page(gfp_t gfp, void *handle, int order) > +{ > + return false; > +} > + > +static inline void __memcg_kmem_free_page(struct page *page, int order) > +{ > +} > + > +static inline void > +__memcg_kmem_commit_page(struct page *page, struct mem_cgroup *handle, int > order) > +{ > +} > #endif /* CONFIG_MEMCG_KMEM */ > + > +/** > + * memcg_kmem_new_page: verify if a new kmem allocation is allowed. > + * @gfp: the gfp allocation flags. > + * @handle: a pointer to the memcg this was charged against. > + * @order: allocation order. > + * > + * returns true if the memcg where the current task belongs can hold this > + * allocation. > + * > + * We return true automatically if this allocation is not to be accounted to > + * any memcg. > + */ > +static __always_inline bool > +memcg_kmem_new_page(gfp_t gfp, void *handle, int order) > +{ > + if (!memcg_kmem_on) > + return true; > + if (!(gfp & __GFP_KMEMCG) || (gfp & __GFP_NOFAIL)) > + return true; > + if (in_interrupt() || (!current->mm) || (current->flags & PF_KTHREAD)) > + return true; > + return __memcg_kmem_new_page(gfp, handle, order); > +} > + > +/** > + * memcg_kmem_free_page: uncharge pages from memcg > + * @page: pointer to struct page being freed > + * @order: allocation order. > + * > + * there is no need to specify memcg here, since it is embedded in > page_cgroup > + */ > +static __always_inline void > +memcg_kmem_free_page(struct page *page, int order) > +{ > + if (memcg_kmem_on) > + __memcg_kmem_free_page(page, order); > +} > + > +/** > + * memcg_kmem_commit_page: embeds correct memcg in a page > + * @handle: a pointer to the memcg this was charged against. > + * @page: pointer to struct page recently allocated > + * @handle: the memcg structure we charged against > + * @order: allocation order. > + * > + * Needs to be called after memcg_kmem_new_page, regardless of success or > + * failure of the allocation. if @page is NULL, this function will revert the > + * charges. Otherwise, it will commit the memcg given by @handle to the > + * corresponding page_cgroup. > + */ > +static __always_inline void > +memcg_kmem_commit_page(struct page *page, struct mem_cgroup *handle, int > order) > +{ > + if (memcg_kmem_on) > + __memcg_kmem_commit_page(page, handle, order); > +} > #endif /* _LINUX_MEMCONTROL_H */ > > diff --git a/mm/memcontrol.c b/mm/memcontrol.c > index 54e93de..e9824c1 100644 > --- a/mm/memcontrol.c > +++ b/mm/memcontrol.c > @@ -10,6 +10,10 @@ > * Copyright (C) 2009 Nokia Corporation > * Author: Kirill A. Shutemov > * > + * Kernel Memory Controller > + * Copyright (C) 2012 Parallels Inc. and Google Inc. > + * Authors: Glauber Costa and Suleiman Souhlal > + * > * This program is free software; you can redistribute it and/or modify > * it under the terms of the GNU General Public License as published by > * the Free Software Foundation; either version 2 of the License, or > @@ -434,6 +438,9 @@ struct mem_cgroup *mem_cgroup_from_css(struct > cgroup_subsys_state *s) > #include > > static bool mem_cgroup_is_ro
[PATCH] perf: do not flush maps on COMM for perf report
This fixes a long-standing bug caused by the lack of separate COMM and EXEC record types, which makes "perf report" lose track of symbols when a process renames itself. With this fix (suggested by Stephane Eranian), a COMM (rename) no longer flushes the maps, which is the correct behavior. An EXEC also no longer flushes the maps, but this doesn't matter because as new mappings are created (for the executable and the libraries) the old mappings are automatically removed. This is not by accident: the functionality is necessary because DLLs can be explicitly loaded at any time with dlopen(), possibly on top of existing text, so "perf report" handles correctly the clobbering of new mappings on top of old ones. An alternative patch (which I proposed earlier) would be to introduce a separate PERF_RECORD_EXEC type, but it is a much larger change (about 300 lines) and is not necessary. Signed-off-by: Luigi Semenzato --- tools/perf/util/thread.c |1 - 1 files changed, 0 insertions(+), 1 deletions(-) diff --git a/tools/perf/util/thread.c b/tools/perf/util/thread.c index fb4b7ea..8b3e593 100644 --- a/tools/perf/util/thread.c +++ b/tools/perf/util/thread.c @@ -39,7 +39,6 @@ int thread__set_comm(struct thread *self, const char *comm) err = self->comm == NULL ? -ENOMEM : 0; if (!err) { self->comm_set = true; - map_groups__flush(&self->mg); } return err; } -- 1.7.7.3 -- To unsubscribe from this list: send the line "unsubscribe linux-kernel" in the body of a message to majord...@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html Please read the FAQ at http://www.tux.org/lkml/
Re: [PATCH] ide: fix generic_ide_suspend/resume Oops
From: Miklos Szeredi Date: Tue, 21 Aug 2012 17:20:30 +0200 > From: Miklos Szeredi > > This patch fixes a regresion introduced by commit 0998d063 (device-core: > Ensure > drvdata = NULL when no driver is bound). > > Suspend oopses in generic_ide_suspend() because dev_get_drvdata() > returns NULL (dev->p->driver_data == NULL) and this function is not > prepared for this. > > Fix is based on Alan Stern's suggestion. > > Signed-off-by: Miklos Szeredi > Acked-by: Rafael J. Wysocki Applied, thanks. -- To unsubscribe from this list: send the line "unsubscribe linux-kernel" in the body of a message to majord...@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html Please read the FAQ at http://www.tux.org/lkml/
Re: [MMTests] dbench4 async on ext3
On Mon 23-07-12 22:21:46, Mel Gorman wrote: > Configuration:global-dhp__io-dbench4-async-ext3 > Result: > http://www.csn.ul.ie/~mel/postings/mmtests-20120424/global-dhp__io-dbench4-async-ext3 > Benchmarks: dbench4 > > Summary > === > > In general there was a massive drop in throughput after 3.0. Very broadly > speaking it looks like the Read operation got faster but at the cost of > a big regression in the Flush operation. Mel, I had a look into this and it's actually very likely only a configuration issue. In 3.1 ext3 started to default to enabled barriers (barrier=1 in mount options) which is a safer but slower choice. When I set barriers explicitely, I see no performance difference for dbench4 between 3.0 and 3.1. Honza -- Jan Kara SUSE Labs, CR -- To unsubscribe from this list: send the line "unsubscribe linux-kernel" in the body of a message to majord...@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html Please read the FAQ at http://www.tux.org/lkml/
Re: [PATCH v2 17/31] arm64: System calls handling
On Tue, Aug 21, 2012 at 09:14:01PM +0100, Arnd Bergmann wrote: > On Tuesday 21 August 2012, Catalin Marinas wrote: > > > > +asmlinkage long sys_mmap(unsigned long addr, unsigned long len, > > > > +unsigned long prot, unsigned long flags, > > > > +unsigned long fd, off_t off) > > > > +{ > > > > + if (offset_in_page(off) != 0) > > > > + return -EINVAL; > > > > + > > > > + return sys_mmap_pgoff(addr, len, prot, flags, fd, off >> > > > > PAGE_SHIFT); > > > > +} > > > > > > I think > > > > > > #define sys_mmap sys_mmap_pgoff > > > > There are slightly different semantics with the last argument of > > sys_mmap() which takes a byte offset. The sys_mmap_pgoff() function > > takes the offset shifted by PAGE_SHIFT (which is the same as sys_mmap2). > > > > Looking at the other architectures, it makes sense to use a generic > > sys_mmap() implementation similar to the one above (or the ia-64, seems > > to be the most complete). > > Why that? The generic sys_mmap_pgoff was specifically added so new > architectures > could just use that instead of having their own wrappers, see f8b72560. As I understand, sys_mmap_pgoff can be used instead of sys_mmap2 on new 32-bit architectures. But on 64-bit architectures we don't have sys_mmap2, only sys_mmap with the difference that the last argument is the offset in bytes (and multiple of PAGE_SIZE) rather than in pages. So unless we change the meaning of this last argument for sys_mmap, we cannot just define it to sys_mmap_pgoff. Since the other 64-bit architectures seem to have a sys_mmap wrapper that does this: sys_mmap_pgoff(..., off >> PAGE_SHIFT); I think AArch64 should also use the same sys_mmap convention. We can make this wrapper generic. -- Catalin -- To unsubscribe from this list: send the line "unsubscribe linux-kernel" in the body of a message to majord...@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html Please read the FAQ at http://www.tux.org/lkml/
Re: [RFC] Move kfree outside pde_unload_lock
On Tue, Aug 21, 2012 at 03:54:54PM -0500, Nathan Zimmer wrote: > I am currently tracking a hotlock reported by a customer on a large, 512 > cores, > system, I am currently running 3.6.0 rc1 but the issue looks like it has been > this way for a very long time. > The offending lock is proc_dir_entry->pde_unload_lock. > > In proc_reg_release we are doing a kfree under the spinlock which is ok but it > means we are holding the lock longer then required. Scaling improved when I > moved kfree out. It's OK to move it out. Acked-by: Alexey Dobriyan > Also shouldn't the comment on pde_unload_lock also note that pde_openers and > pde_unload_completion are both used under the lock? Yeah, why not. > --- a/fs/proc/inode.c > +++ b/fs/proc/inode.c > @@ -403,9 +403,11 @@ static int proc_reg_release(struct inode *inode, struct > file *file) > release = pde->proc_fops->release; > if (pdeo) { > list_del(&pdeo->lh); > - kfree(pdeo); > } > spin_unlock(&pde->pde_unload_lock); > + if (pdeo) { > + kfree(pdeo); > + } > > if (release) > rv = release(inode, file); -- To unsubscribe from this list: send the line "unsubscribe linux-kernel" in the body of a message to majord...@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html Please read the FAQ at http://www.tux.org/lkml/
Re: [PATCH] strings: helper for maximum decimal encoding of an unsigned integer
On Tue, Aug 21, 2012 at 05:22:27PM -0400, Jim Rees wrote: > J. Bruce Fields wrote: > > From: "J. Bruce Fields" > > I've seen a couple examples recently where we've gotten this wrong. > Maybe something like this would help? Is there some better way? > > (Approximation due to Jim Rees). > > Please add Suggested-by: Jim Rees . I'm thinking of > patenting the algorithm. Is that a joke? Patenting the fact that log10(256) is 2.408..., which is about 2.4, which is 24/10? I really hope we are Poe'd... BTW, NAK the comment - s/36/26/ in there; check it yourself - $ echo '2^(8*27)' | bc 105312291668557186697918027683670432318895095400549111254310977536 which is 66-digit, not 65 as the estimate would be. -- To unsubscribe from this list: send the line "unsubscribe linux-kernel" in the body of a message to majord...@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html Please read the FAQ at http://www.tux.org/lkml/
Re: [PATCHv3 3/9] serial: vt8500: Add devicetree support for vt8500-serial
On Wed, 22 Aug 2012 08:47:32 +1200 Tony Prisk wrote: > Signed-off-by: Tony Prisk > --- > drivers/tty/serial/vt8500_serial.c | 37 > > 1 file changed, 33 insertions(+), 4 deletions(-) Can we have a comment attached to a change this size. In particular one describing why it gone from 4 to 6 ports, and why the port id twiddling. Is there a reason you can't use the device tree port id ? What are the regression risks for existing users expecting the pdev->id binding ? -- To unsubscribe from this list: send the line "unsubscribe linux-kernel" in the body of a message to majord...@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html Please read the FAQ at http://www.tux.org/lkml/
Re: [PATCH] strings: helper for maximum decimal encoding of an unsigned integer
On Tue, Aug 21, 2012 at 11:06:13PM +0100, Al Viro wrote: > On Tue, Aug 21, 2012 at 05:22:27PM -0400, Jim Rees wrote: > > J. Bruce Fields wrote: > > > > From: "J. Bruce Fields" > > > > I've seen a couple examples recently where we've gotten this wrong. > > Maybe something like this would help? Is there some better way? > > > > (Approximation due to Jim Rees). > > > > Please add Suggested-by: Jim Rees . I'm thinking of > > patenting the algorithm. > > Is that a joke? Patenting the fact that log10(256) is 2.408..., which > is about 2.4, which is 24/10? I really hope we are Poe'd... BTW, NAK > the comment - s/36/26/ in there; check it yourself - > $ echo '2^(8*27)' | bc > 105312291668557186697918027683670432318895095400549111254310977536 > which is 66-digit, not 65 as the estimate would be. Erp, you're right. Anyway, does something like base10len(type) seem reasonable? Or define macros that enumerate the sizes? (ULONG_STR_MAX or something?) --b. -- To unsubscribe from this list: send the line "unsubscribe linux-kernel" in the body of a message to majord...@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html Please read the FAQ at http://www.tux.org/lkml/
[PATCH 0/2] console_lock debug improvements
Hi all, After Dave Airlie blew through a few days to track down a deadlock at boot-up when handing over from the firmware fb to the kms/drm framebuffer driver (1), I've figured that lockdep /should/ have caught this. And indeed, by adding proper annotations to the console_lock it complains about the potential deadlock when exercising the entire driver life-cycle of just one fb driver (i.e. not even a handover required). While at it, I've replaced the existing in_interrupt check with the more paranoid might_sleep. Comments, flames and review highly welcome. Yours, Daniel [1]: https://lkml.org/lkml/2012/8/21/36 Daniel Vetter (2): console: use might_sleep in console_lock console: implement lockdep support for console_lock kernel/printk.c | 12 +++- 1 file changed, 11 insertions(+), 1 deletion(-) -- 1.7.10.4 -- To unsubscribe from this list: send the line "unsubscribe linux-kernel" in the body of a message to majord...@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html Please read the FAQ at http://www.tux.org/lkml/
[PATCH 2/2] console: implement lockdep support for console_lock
Dave Airlie recently discovered a locking bug in the fbcon layer, where a timer_del_sync (for the blinking cursor) deadlocks with the timer itself, since both (want to) hold the console_lock: https://lkml.org/lkml/2012/8/21/36 Unfortunately the console_lock isn't a plain mutex and hence has no lockdep support. Which resulted in a few days wasted of tracking down this bug (complicated by the fact that printk doesn't show anything when the console is locked) instead of noticing the bug much earlier with the lockdep splat. Hence I've figured I need to fix that for the next deadlock involving console_lock - and with kms/drm growing ever more complex locking that'll eventually happen. Now the console_lock has rather funky semantics, so after a quick irc discussion with Thomas Gleixner and Dave Airlie I've quickly ditched the original idead of switching to a real mutex (since it won't work) and instead opted to annotate the console_lock with lockdep information manually. There are a few special cases: - The console_lock state is protected by the console_sem, and usually grabbed/dropped at _lock/_unlock time. But the suspend/resume code drops the semaphore without dropping the console_lock (see suspend_console/resume_console). But since the same thread that did the suspend will do the resume, we don't need to fix up anything. - In the printk code there's a special trylock, only used to kick off the logbuffer printk'ing in console_unlock. But all that happens while lockdep is disable (since printk does a few other evil tricks). So no issue there, either. - The console_lock can also be acquired form irq context (but only with a trylock). lockdep already handles that. This all leaves us with annotating the normal console_lock, _unlock and _trylock functions. And yes, it works - simply unloading a drm kms driver resulted in lockdep complaining about the deadlock in fbcon_deinit: == [ INFO: possible circular locking dependency detected ] 3.6.0-rc2+ #552 Not tainted --- kms-reload/3577 is trying to acquire lock: ((&info->queue)){+.+...}, at: [] wait_on_work+0x0/0xa7 but task is already holding lock: (console_lock){+.+.+.}, at: [] bind_con_driver+0x38/0x263 which lock already depends on the new lock. the existing dependency chain (in reverse order) is: -> #1 (console_lock){+.+.+.}: [] lock_acquire+0x95/0x105 [] console_lock+0x59/0x5b [] fb_flashcursor+0x2e/0x12c [] process_one_work+0x1d9/0x3b4 [] worker_thread+0x1a7/0x24b [] kthread+0x7f/0x87 [] kernel_thread_helper+0x4/0x10 -> #0 ((&info->queue)){+.+...}: [] __lock_acquire+0x999/0xcf6 [] lock_acquire+0x95/0x105 [] wait_on_work+0x3b/0xa7 [] __cancel_work_timer+0xbf/0x102 [] cancel_work_sync+0xb/0xd [] fbcon_deinit+0x11c/0x1dc [] bind_con_driver+0x145/0x263 [] unbind_con_driver+0x14f/0x195 [] store_bind+0x1ad/0x1c1 [] dev_attr_store+0x13/0x1f [] sysfs_write_file+0xe9/0x121 [] vfs_write+0x9b/0xfd [] sys_write+0x3e/0x6b [] system_call_fastpath+0x16/0x1b other info that might help us debug this: Possible unsafe locking scenario: CPU0CPU1 lock(console_lock); lock((&info->queue)); lock(console_lock); lock((&info->queue)); *** DEADLOCK *** Cc: Dave Airlie Cc: Thomas Gleixner Signed-off-by: Daniel Vetter --- kernel/printk.c |9 + 1 file changed, 9 insertions(+) diff --git a/kernel/printk.c b/kernel/printk.c index ed9af6a..ab2ab24 100644 --- a/kernel/printk.c +++ b/kernel/printk.c @@ -87,6 +87,12 @@ static DEFINE_SEMAPHORE(console_sem); struct console *console_drivers; EXPORT_SYMBOL_GPL(console_drivers); +#ifdef CONFIG_LOCKDEP +struct lockdep_map console_lock_dep_map = { + .name = "console_lock" +}; +#endif + /* * This is used for debugging the mess that is the VT code by * keeping track if we have the console semaphore held. It's @@ -1916,6 +1922,7 @@ void console_lock(void) return; console_locked = 1; console_may_schedule = 1; + mutex_acquire(&console_lock_dep_map, 0, 0, _RET_IP_); } EXPORT_SYMBOL(console_lock); @@ -1937,6 +1944,7 @@ int console_trylock(void) } console_locked = 1; console_may_schedule = 0; + mutex_acquire(&console_lock_dep_map, 0, 1, _RET_IP_); return 1; } EXPORT_SYMBOL(console_trylock); @@ -2097,6 +2105,7 @@ skip: local_irq_restore(flags); } console_locked = 0; + mutex_release(&console_lock_dep_map, 1, _RET_IP_); /* Release the exclusive_console once it is used */ if (unlikely(exclusive_console)) -- 1.7.10.4 -- To unsubscribe from this list: send the line "unsubscribe linux-kern
[PATCH 1/2] console: use might_sleep in console_lock
Instead of BUG_ON(in_interrupt()), since that doesn't check for all the newfangled stuff like preempt. Note that this is valid since the console_sem is essentially used like a real mutex with only two twists: - we allow trylock from hardirq context - across suspend/resume we lock the logical console_lock, but drop the semaphore protecting the locking state. Now that doesn't guarantee that no one is playing tricks in single-thread atomic contexts at suspend/resume/boot time, but - I couldn't find anything suspicious with some grepping, - might_sleep shouldn't die, - and I think the upside of catching more potential issues is worth the risk of getting a might_sleep backtrace that would have been save (and then dealing with that fallout). Cc: Dave Airlie Cc: Thomas Gleixner Signed-off-by: Daniel Vetter --- kernel/printk.c |3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/kernel/printk.c b/kernel/printk.c index 66a2ea3..ed9af6a 100644 --- a/kernel/printk.c +++ b/kernel/printk.c @@ -1909,7 +1909,8 @@ static int __cpuinit console_cpu_notify(struct notifier_block *self, */ void console_lock(void) { - BUG_ON(in_interrupt()); + might_sleep(); + down(&console_sem); if (console_suspended) return; -- 1.7.10.4 -- To unsubscribe from this list: send the line "unsubscribe linux-kernel" in the body of a message to majord...@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html Please read the FAQ at http://www.tux.org/lkml/
Re: [RFC PATCH 2/2] mm: Batch page_check_references in shrink_page_list sharing the same i_mmap_mutex
On Tue, 2012-08-21 at 09:21 -0400, Matthew Wilcox wrote: > > The only clunky bit would seem to be this bit: > > > if (page_mapped(page) && mapping) { > > - switch (try_to_unmap(page, TTU_UNMAP)) { > > + switch (try_to_unmap(page, TTU_UNMAP, > > + mmap_mutex_locked)) { > > Which I think has to look like this: > > if (page_mapped(page) && mapping) { > - switch (try_to_unmap(page, TTU_UNMAP)) { > + int result; > + if (i_mmap_mutex) > + result = __try_to_unmap(page, TTU_UNMAP); > + else > + result = try_to_unmap(page, TTU_UNMAP); > + switch (result) { > I think - switch (try_to_unmap(page, TTU_UNMAP)) { + switch (__try_to_unmap(page, TTU_UNMAP)) { should be enough when your changes are adopted. Because if the page mmap mutex needs to be locked, we will have locked it here before __try_to_unmap gets used. + if (needs_page_mmap_mutex(page) && + i_mmap_mutex != &page->mapping->i_mmap_mutex) { + if (i_mmap_mutex) + mutex_unlock(i_mmap_mutex); + i_mmap_mutex = &page->mapping->i_mmap_mutex; + mutex_lock(i_mmap_mutex); + } Tim -- To unsubscribe from this list: send the line "unsubscribe linux-kernel" in the body of a message to majord...@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html Please read the FAQ at http://www.tux.org/lkml/
Re: [PATCH RT 2/2] fix printk flush of messages
added recipients... On 08/21/12 07:30, Michael Thalmeier wrote: > Frank Rowand am.sony.com> writes: > >> >> >> Updates console-make-rt-friendly.patch >> >> #ifdef CONFIG_PREEMPT_RT_FULL, printk() output is never flushed by >> printk() because: >> ... >> >> On system boot some printk() output is flushed because register_console() >> and tty_open() call console_unlock(). >> >> This change also fixes the problem that was previously fixed by >> preempt-rt-allow-immediate-magic-sysrq-output-for-preempt_rt_full.patch >> >> Signed-off-by: Frank Rowand am.sony.com> >> >> --- >> kernel/printk.c |2 1 + 1 - 0 ! >> 1 file changed, 1 insertion(+), 1 deletion(-) >> >> Index: b/kernel/printk.c >> === >> --- a/kernel/printk.c >> +++ b/kernel/printk.c >> @@ -847,7 +847,7 @@ static int console_trylock_for_printk(un >> int retval = 0, wake = 0; >> #ifdef CONFIG_PREEMPT_RT_FULL >> int lock = !early_boot_irqs_disabled && !irqs_disabled_flags(flags) && >> -!preempt_count(); >> +(preempt_count() <= 1); >> #else >> int lock = 1; >> #endif >> >> > > I have seen that this patch is applied in the 3.4 stable rt series. > As we are using the 3.0 stable rt kernel I have tested this patch on this > kernel series (on a Freescale i.MX31 based board) and have not found any > problems so far. > Is there something I have missed why this patch has not found its way > in the 3.0 series ? > > Thanks in advance, > Michael -- To unsubscribe from this list: send the line "unsubscribe linux-kernel" in the body of a message to majord...@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html Please read the FAQ at http://www.tux.org/lkml/
[PATCH] tools lib traceevent: Modify header to work in C++ programs
Arnaldo, As the libtraceevent library is also used by powertop, and that is written in *cough* C++ *cough*, we need to make sure that the headers do not have any C++ reserved words. Please apply this patch. Thanks. -- Steve Steven Rostedt (1): tools lib traceevent: Modify header to work in C++ programs tools/lib/traceevent/event-parse.h |4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) --- commit 79f481dbd7cb14e33113180b74e09e742cd839f9 Author: Steven Rostedt Date: Sat May 7 19:04:01 2011 -0400 tools lib traceevent: Modify header to work in C++ programs Replace keyword "private" from event-parse.h to allow it to be used in C++ programs. Signed-off-by: Steven Rostedt diff --git a/tools/lib/traceevent/event-parse.h b/tools/lib/traceevent/event-parse.h index 5772ad8..c28713b 100644 --- a/tools/lib/traceevent/event-parse.h +++ b/tools/lib/traceevent/event-parse.h @@ -49,7 +49,7 @@ struct pevent_record { int cpu; int ref_count; int locked; /* Do not free, even if ref_count is zero */ - void*private; + void*r_private; #if DEBUG_RECORD struct pevent_record*prev; struct pevent_record*next; @@ -106,7 +106,7 @@ struct plugin_option { char*plugin_alias; char*description; char*value; - void*private; + void*p_private; int set; }; -- To unsubscribe from this list: send the line "unsubscribe linux-kernel" in the body of a message to majord...@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html Please read the FAQ at http://www.tux.org/lkml/
Re: [PATCH v6 3/4] cgroup: add xattr support
On Tue, 21 Aug 2012, Tejun Heo wrote: > On Tue, Aug 21, 2012 at 11:43:44PM +0200, Lennart Poettering wrote: > > >I'm not against this but unsure whether using kmem is enough for the > > >suggested use case. Lennart, would this suit systemd? How much > > >metadata are we talking about? > > > > Just small things, like values, PIDs, i.e. a few 100 bytes or so per > > cgroup should be more than sufficient for our needs. That is reasonable. > > Alright, then. I think there's gonna be one more round to address > Hugh's comments. Hugh, how should this be routed? Is there some git > branch that tmpfs changes can go in so that cgroup tree can pull? No git tree, but we can easily handle it in one of two ways. include/linux/shmem_fs.h and mm/shmem.c usually go to Linus from Andrew from his mmotm tree (which includes and is included in linux-next, by some magic escaping infinite recursion). Are we expecting Aristeu+Zefan's simple_xattr patches to go into 3.7? I don't have anything planned for shmem.c for 3.7 beyond a bugfix, which shouldn't interact with the simple_xattr changes at all (I could remove info->lock, but will not do so this time around, precisely so as not to interfere with those patches). So it should be perfectly workable for you to take Aristeu+Zefan's shmem patches into your cgroup tree, then any further mods from mmotm will get layered on top. But if you prefer to leave shmem.c changes to Andrew, then it would also be perfectly workable for Aristeu to split the 1/4 into two: one for you which updates fs/xattr.c and include/linux/xattr.h with simple_xattr code stolen from mm/shmem.c and include/linux/shmem_fs.h; and one for Andrew which updates mm/shmem.c and include/linux/shmem_fs.h to delete its shmem_xattr stuff and use simple_xattr interfaces instead. Either approach is fine with me. Hugh -- To unsubscribe from this list: send the line "unsubscribe linux-kernel" in the body of a message to majord...@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html Please read the FAQ at http://www.tux.org/lkml/
[GIT] Networking
A couple weeks of bug fixing in there. The largest chunk is all the broken crap Amerigo Wang found in the netpoll layer. 1) netpoll and it's users has several serious bugs: a) uses GFP_KERNEL with locks held b) interfaces requiring interrupts disabled are called with them enabled c) and vice versa d) VLAN tag demuxing, as per all other RX packet input paths, is not applied All from Amerigo Wang. 2) Hopefully cure the ipv4 mapped ipv6 address TCP early demux bugs for good, from Neal Cardwell. 3) Unlike AF_UNIX, AF_PACKET sockets don't set a default credentials when the user doesn't specify one explicitly during sendmsg(). Instead we attach an empty (zero) SCM credential block which is definitely not what we want. Fix from Eric Dumazet. 4) IPv6 illegally invokes netdevice notifiers with RCU lock held, fix from Ben Hutchings. 5) inet_csk_route_child_sock() checks wrong inet options pointer, fix from Christoph Paasch. 6) When AF_PACKET is used for transmit, packet loopback doesn't behave properly when a socket fanout is enabled, from Eric Leblond. 7) On bluetooth l2cap channel create failure, we leak the socket, from Jaganath Kanakkassery. 8) Fix all the netprio file handling bugs found by Al Viro, from John Fastabend. 9) Several error return and NULL deref bug fixes in networking drivers from Julia Lawall. 10) A large smattering of struct padding et al. kernel memory leaks to userspace found of Mathias Krause. 11) Conntrack expections in netfilter can access an uninitialized timer, fix from Pablo Neira Ayuso. 12) Several netfilter SIP tracker bug fixes from Patrick McHardy. 13) IPSEC ipv6 routes are not initialized correctly all the time, resulting in an OOPS in inet_putpeer(). Also from Patrick McHardy. 14) Bridging does rcu_dereference() outside of RCU protected area, from Stephen Hemminger. 15) Fix routing cache removal performance regression when looking up output routes that have a local destination. From Zheng Yan. Please pull, thanks a lot! The following changes since commit ddf343f635fe4440cad528e12f96f28bd50aa099: Merge branch 'for-linus' of git://git.kernel.org/pub/scm/linux/kernel/git/s390/linux (2012-08-14 07:58:59 +0300) are available in the git repository at: git://git.kernel.org/pub/scm/linux/kernel/git/davem/net.git master for you to fetch changes up to e0e3cea46d31d23dc40df0a49a7a2c04fe8edfea: af_netlink: force credentials passing [CVE-2012-3520] (2012-08-21 14:53:01 -0700) Alexey Khoroshilov (1): rndis_wlan: Fix potential memory leak in update_pmkid() Amerigo Wang (15): netpoll: use GFP_ATOMIC in slave_enable_netpoll() and __netpoll_setup() netpoll: make __netpoll_cleanup non-block netconsole: do not release spin_lock when calling __netpoll_cleanup netpoll: take rcu_read_lock_bh() in netpoll_rx() netpoll: use netpoll_rx_on() in netpoll_rx() netpoll: take rcu_read_lock_bh() in netpoll_send_skb_on_dev() bridge: add some comments for NETDEV_RELEASE bridge: use list_for_each_entry() in netpoll functions netpoll: check netpoll tx status on the right device netpoll: convert several functions to bool vlan: clean up some variable names vlan: clean up vlan_dev_hard_start_xmit() netpoll: handle vlan tags in netpoll tx and rx path netpoll: re-enable irq in poll_napi() netconsole: remove a redundant netconsole_target_put() Andrei Emeltchenko (1): Bluetooth: smp: Fix possible NULL dereference Ben Hutchings (4): llc2: Fix silent failure of llc_station_init() llc2: Call llc_station_exit() on llc2_init() failure path llc: Fix races between llc2 handler use and (un)registration ipv6: addrconf: Avoid calling netdevice notifiers with RCU read-side lock Bjørn Mork (5): net: qmi_wwan: use fixed interface number matching net: qmi_wwan: add Sierra Wireless devices net: qmi_wwan: compress device_id list using macros net: sierra_net: replace whitelist with ifnumber match net: qmi_wwan: new devices: UML290 and K5006-Z Bob Copeland (1): ath5k: fix spin_lock_irqsave/spin_lock_bh nesting in mesh Christoph Paasch (1): ipv4: Use newinet->inet_opt in inet_csk_route_child_sock() Dan Carpenter (1): wireless: at76c50x: signedness bug in at76_dfu_get_state() David S. Miller (2): Merge branch 'for-davem' of git://git.kernel.org/.../linville/wireless Merge git://1984.lsi.us.es/nf Dirk Gouders (1): netconsole.txt: revision of examples for the receiver of kernel messages Eric Dumazet (3): tcp: fix possible socket refcount problem ipv4: fix ip header ident selection in __ip_make_skb() af_netlink: force credentials passing [CVE-2012-3520] Eric Leblond (1): af_packet: don't emit packet on orig fanout group Felix Fietkau (1):
Re: [PATCH v7 2/4] virtio_balloon: introduce migration primitives to balloon pages
On Wed, 15 Aug 2012 14:28:51 +0300, "Michael S. Tsirkin" wrote: > On Wed, Aug 15, 2012 at 12:16:51PM +0100, Mel Gorman wrote: > > I was thinking of exactly that page->mapping == balloon_mapping check. As I > > do not know how many active balloon drivers there might be I cannot guess > > in advance how much of a scalability problem it will be. > > Not at all sure multiple drivers are worth supporting, but multiple > *devices* is I think worth supporting, if for no other reason than that > they can work today. For that, we need a device pointer which Rafael > wants to put into the mapping, this means multiple balloon mappings. Rafael, please make sure that the balloon driver fails on the second and subsequent balloon devices. Michael, we only allow multiple balloon devices because it fell out of the implementation. If it causes us even the slightest issue, we should not support it. It's not a sensible setup. Cheers, Rusty. -- To unsubscribe from this list: send the line "unsubscribe linux-kernel" in the body of a message to majord...@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html Please read the FAQ at http://www.tux.org/lkml/
Re: [PATCH 00/25] Crypto keys and module signing
On Thu, 16 Aug 2012 02:34:05 +0100, David Howells wrote: > > Hi Rusty, > > I've posted new versions of my module signing patches to my GIT trees. Now I get to punt this discussion to KS. I knew it was good for something! Cheers, Rusty. -- To unsubscribe from this list: send the line "unsubscribe linux-kernel" in the body of a message to majord...@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html Please read the FAQ at http://www.tux.org/lkml/
Re: [PATCH 0/4] PCI/PM: PCI D3cold support fixes for 3.6-rc1
On Sun, Aug 19, 2012 at 6:09 PM, huang ying wrote: > Hi, Bjorn, > > Could you please merge this patchset? They fix real bugs. I assume you wanted the updated "[PATCH 3/4] PCI/PM: Fix config reg access ..." patch posted Aug 15. I merged these (with the updated 3/4 patch) to my "for-linus" branch. After it's in linux-next for a couple days, I'll ask Linus to pull it. > On Sun, Aug 19, 2012 at 6:35 PM, Bjørn Mork wrote: >> Huang Ying writes: >> >>> [BUGFIX 1/4] PCI/PM: enable D3/D3cold by default for most devices >>> [BUGFIX 2/4] PCI/PM: Keep parent bridge active when probing device >>> [BUGFIX 3/4] PCI/PM: Fix config reg access for D3cold and bridge suspending >>> [PATCH 4/4] PCI/PM: Add ABI document for sysfs file d3cold_allowed >> >> Hello, >> >> I am hoping these patches will appear in 3.6? They fix real problems in >> 3.6-rc1 for me. If it helps in any way, feel free to add >> >> Tested-by: Bjørn Mork >> >> to the 3 bugfix patches, including version 2 of patch #3. >> >> >> Bjørn >> -- >> To unsubscribe from this list: send the line "unsubscribe linux-kernel" in >> the body of a message to majord...@vger.kernel.org >> More majordomo info at http://vger.kernel.org/majordomo-info.html >> Please read the FAQ at http://www.tux.org/lkml/ -- To unsubscribe from this list: send the line "unsubscribe linux-kernel" in the body of a message to majord...@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html Please read the FAQ at http://www.tux.org/lkml/
[GIT PULL for v3.6-rc3] media fixes
Hi Linus, Please pull from: git://git.kernel.org/pub/scm/linux/kernel/git/mchehab/linux-media v4l_for_linus For bug fixes, at soc_camera, si470x, uvcvideo, iguanaworks IR driver, radio_shark Kbuild fixes, and at the V4L2 core (radio fixes). Thank you! Mauro - The following changes since commit 8762541f067d371320731510669e27f5cc40af38: Merge branch 'v4l_for_linus' of git://git.kernel.org/pub/scm/linux/kernel/git/mchehab/linux-media (2012-07-31 18:47:44 -0700) are available in the git repository at: git://git.kernel.org/pub/scm/linux/kernel/git/mchehab/linux-media v4l_for_linus for you to fetch changes up to 991b3137f21e13db4711f313edbe67d49bed795b: [media] media: soc_camera: don't clear pix->sizeimage in JPEG mode (2012-08-15 19:24:28 -0300) Albert Wang (1): [media] media: soc_camera: don't clear pix->sizeimage in JPEG mode Alex Gershgorin (1): [media] media: mx3_camera: buf_init() add buffer state check Fabio Estevam (2): [media] video: mx1_camera: Use clk_prepare_enable/clk_disable_unprepare [media] video: mx2_camera: Use clk_prepare_enable/clk_disable_unprepare Guenter Roeck (1): [media] Add USB dependency for IguanaWorks USB IR Transceiver Hans Verkuil (5): [media] DocBook: Remove a spurious character [media] si470x: v4l2-compliance fixes [media] mem2mem_testdev: fix querycap regression [media] VIDIOC_ENUM_FREQ_BANDS fix [media] Add missing logging for rangelow/high of hwseek Hans de Goede (4): [media] radio-shark*: Remove work-around for dangling pointer in usb intfdata [media] radio-shark*: Call cancel_work_sync from disconnect rather then release [media] radio-shark: Only compile led support when CONFIG_LED_CLASS is set [media] radio-shark2: Only compile led support when CONFIG_LED_CLASS is set Javier Martin (1): [media] media: mx2_camera: Fix clock handling for i.MX27 Jayakrishnan Memana (1): [media] uvcvideo: Reset the bytesused field when recycling an erroneous buffer Documentation/DocBook/media/v4l/vidioc-g-tuner.xml | 2 +- drivers/media/radio/radio-shark.c | 151 +++-- drivers/media/radio/radio-shark2.c | 137 ++- drivers/media/radio/si470x/radio-si470x-common.c | 3 + drivers/media/radio/si470x/radio-si470x-i2c.c | 5 +- drivers/media/radio/si470x/radio-si470x-usb.c | 2 +- drivers/media/rc/Kconfig | 1 + drivers/media/video/mem2mem_testdev.c | 2 +- drivers/media/video/mx1_camera.c | 4 +- drivers/media/video/mx2_camera.c | 47 --- drivers/media/video/mx3_camera.c | 22 +-- drivers/media/video/soc_camera.c | 3 +- drivers/media/video/soc_mediabus.c | 6 + drivers/media/video/uvc/uvc_queue.c| 1 + drivers/media/video/v4l2-ioctl.c | 10 +- 15 files changed, 217 insertions(+), 179 deletions(-) -- To unsubscribe from this list: send the line "unsubscribe linux-kernel" in the body of a message to majord...@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html Please read the FAQ at http://www.tux.org/lkml/
[PATCH 1/1] tcp: Wrong timeout for SYN segments
Hi David, I'm not 100% sure, but it looks like I found an RFC mismatch with the current default values of the TCP implementation. Alex >From 8b854a525eb45f64ad29dfab16f9d9f681e84495 Mon Sep 17 00:00:00 2001 From: Alexander Bergmann Date: Wed, 22 Aug 2012 00:29:08 +0200 Subject: [PATCH 1/1] tcp: Wrong timeout for SYN segments Commit 9ad7c049 changed the initRTO from 3secs to 1sec in accordance to RFC6298 (former RFC2988bis). This introduced a gap with RFC1122 that defines a minimum retransmission window for SYN segments of at least 180secs. Prior to 9ad7c049 the timeout was defined with 189secs. Now we have only a timeout of 63secs. ((2 << 5) - 1) * 3 secs = 189 secs ((2 << 5) - 1) * 1 secs = 63 secs To fulfill the MUST constraint in RFC1122 section 4.2.3.5 about R2 for SYN segments, the values of TCP_SYN_RETRIES and TCP_SYNACK_RETRIES must be changed to 7 reties. ((2 << 7) - 1) * 1 secs = 255 secs This would result in an ETIMEDOUT of 4 minutes 15 seconds. Signed-off-by: Alexander Bergmann --- include/net/tcp.h |4 ++-- 1 files changed, 2 insertions(+), 2 deletions(-) diff --git a/include/net/tcp.h b/include/net/tcp.h index 1f000ff..7eaae19 100644 --- a/include/net/tcp.h +++ b/include/net/tcp.h @@ -98,10 +98,10 @@ extern void tcp_time_wait(struct sock *sk, int state, int timeo); * 15 is ~13-30min depending on RTO. */ -#define TCP_SYN_RETRIES 5 /* number of times to retry active opening a +#define TCP_SYN_RETRIES 7 /* number of times to retry active opening a * connection: ~180sec is RFC minimum */ -#define TCP_SYNACK_RETRIES 5 /* number of times to retry passive opening a +#define TCP_SYNACK_RETRIES 7 /* number of times to retry passive opening a * connection: ~180sec is RFC minimum */ #define TCP_TIMEWAIT_LEN (60*HZ) /* how long to wait to destroy TIME-WAIT -- 1.7.8.6 -- To unsubscribe from this list: send the line "unsubscribe linux-kernel" in the body of a message to majord...@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html Please read the FAQ at http://www.tux.org/lkml/