date:20120821

[PATCH 3/3] audit: clean up refcounting in audit-tree

2012-08-21 Thread Miklos Szeredi

From: Miklos Szeredi 

Drop the initial reference by fsnotify_init_mark early instead of
audit_tree_freeing_mark() at destroy time.

In the cases we destroy the mark before we drop the initial reference we need to
get rid of the get_mark that balances the put_mark in audit_tree_freeing_mark().

Signed-off-by: Miklos Szeredi 
---
 kernel/audit_tree.c |   12 +---
 1 files changed, 9 insertions(+), 3 deletions(-)

diff --git a/kernel/audit_tree.c b/kernel/audit_tree.c
index 2b2..ed206fd 100644
--- a/kernel/audit_tree.c
+++ b/kernel/audit_tree.c
@@ -292,6 +292,7 @@ static void untag_chunk(struct node *p)
spin_unlock(&hash_lock);
spin_unlock(&entry->lock);
fsnotify_destroy_mark(entry);
+   fsnotify_put_mark(&new->mark);  /* drop initial reference */
goto out;
 
 Fallback:
@@ -330,7 +331,6 @@ static int create_chunk(struct inode *inode, struct 
audit_tree *tree)
spin_unlock(&hash_lock);
chunk->dead = 1;
spin_unlock(&entry->lock);
-   fsnotify_get_mark(entry);
fsnotify_destroy_mark(entry);
fsnotify_put_mark(entry);
return 0;
@@ -346,6 +346,7 @@ static int create_chunk(struct inode *inode, struct 
audit_tree *tree)
insert_hash(chunk);
spin_unlock(&hash_lock);
spin_unlock(&entry->lock);
+   fsnotify_put_mark(entry);   /* drop initial reference */
return 0;
 }
 
@@ -411,7 +412,6 @@ static int tag_chunk(struct inode *inode, struct audit_tree 
*tree)
spin_unlock(&chunk_entry->lock);
spin_unlock(&old_entry->lock);
 
-   fsnotify_get_mark(chunk_entry);
fsnotify_destroy_mark(chunk_entry);
 
fsnotify_put_mark(chunk_entry);
@@ -444,6 +444,7 @@ static int tag_chunk(struct inode *inode, struct audit_tree 
*tree)
spin_unlock(&chunk_entry->lock);
spin_unlock(&old_entry->lock);
fsnotify_destroy_mark(old_entry);
+   fsnotify_put_mark(chunk_entry); /* drop initial reference */
fsnotify_put_mark(old_entry); /* pair to fsnotify_find mark_entry */
return 0;
 }
@@ -915,7 +916,12 @@ static void audit_tree_freeing_mark(struct fsnotify_mark 
*entry, struct fsnotify
struct audit_chunk *chunk = container_of(entry, struct audit_chunk, 
mark);
 
evict_chunk(chunk);
-   fsnotify_put_mark(entry);
+
+   /*
+* We are guaranteed to have at least one reference to the mark from
+* either the inode or the caller of fsnotify_destroy_mark().
+*/
+   BUG_ON(atomic_read(&entry->refcnt) < 1);
 }
 
 static bool audit_tree_send_event(struct fsnotify_group *group, struct inode 
*inode,
-- 
1.7.7

--
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Please read the FAQ at  http://www.tux.org/lkml/

[PATCH 1/3] audit: don't free_chunk() after fsnotify_add_mark()

2012-08-21 Thread Miklos Szeredi

From: Miklos Szeredi 

Don't do free_chunk() after fsnotify_add_mark().  That one does a delayed unref
via the destroy list and this results in use-after-free.

Signed-off-by: Miklos Szeredi 
Acked-by: Eric Paris 
CC: sta...@vger.kernel.org
---
 kernel/audit_tree.c |6 +++---
 1 files changed, 3 insertions(+), 3 deletions(-)

diff --git a/kernel/audit_tree.c b/kernel/audit_tree.c
index 3a5ca58..69a5851 100644
--- a/kernel/audit_tree.c
+++ b/kernel/audit_tree.c
@@ -259,7 +259,7 @@ static void untag_chunk(struct node *p)
 
fsnotify_duplicate_mark(&new->mark, entry);
if (fsnotify_add_mark(&new->mark, new->mark.group, new->mark.i.inode, 
NULL, 1)) {
-   free_chunk(new);
+   fsnotify_put_mark(&new->mark);
goto Fallback;
}
 
@@ -322,7 +322,7 @@ static int create_chunk(struct inode *inode, struct 
audit_tree *tree)
 
entry = &chunk->mark;
if (fsnotify_add_mark(entry, audit_tree_group, inode, NULL, 0)) {
-   free_chunk(chunk);
+   fsnotify_put_mark(entry);
return -ENOSPC;
}
 
@@ -396,7 +396,7 @@ static int tag_chunk(struct inode *inode, struct audit_tree 
*tree)
fsnotify_duplicate_mark(chunk_entry, old_entry);
if (fsnotify_add_mark(chunk_entry, chunk_entry->group, 
chunk_entry->i.inode, NULL, 1)) {
spin_unlock(&old_entry->lock);
-   free_chunk(chunk);
+   fsnotify_put_mark(chunk_entry);
fsnotify_put_mark(old_entry);
return -ENOSPC;
}
-- 
1.7.7

--
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Please read the FAQ at  http://www.tux.org/lkml/

[PATCH 0/3] audit-tree fixes

2012-08-21 Thread Miklos Szeredi

Linus,

The audit subsystem maintainers (Al and Eric) are not responding to repeated
resends.  Eric did ack them a while ago, but no response since then.  So I'm
sending these directly to you.

Git tree is here:

  git://git.kernel.org/pub/scm/linux/kernel/git/mszeredi/vfs.git audit-fixes

Thanks,
Miklos


---
Miklos Szeredi (3):
  audit: don't free_chunk() after fsnotify_add_mark()
  audit: fix refcounting in audit-tree
  audit: clean up refcounting in audit-tree

---
 kernel/audit_tree.c |   19 ---
 1 files changed, 12 insertions(+), 7 deletions(-)

--
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Please read the FAQ at  http://www.tux.org/lkml/

Re: [PATCH v3] SUNRPC: protect service sockets lists during per-net shutdown

2012-08-21 Thread J. Bruce Fields

On Thu, Aug 16, 2012 at 03:29:03PM -0400, J. Bruce Fields wrote:
> Looking back at this:
> 
>   - adding the sv_lock looks like the right thing to do anyway
> independent of containers, because svc_age_temp_xprts may
> still be running.

This is what I've been testing with.

Or alternatively if you'd rather strip out the other stuff from your
patch I could take that instead.

--b.

commit 719f8bcc883e7992615f4d5625922e24995e2d98
Author: J. Bruce Fields 
Date:   Mon Aug 13 17:03:00 2012 -0400

svcrpc: fix xpt_list traversal locking on shutdown

Server threads are not running at this point, but svc_age_temp_xprts
still may be, so we need this locking.

Signed-off-by: J. Bruce Fields 

diff --git a/net/sunrpc/svc_xprt.c b/net/sunrpc/svc_xprt.c
index bac973a..e1810b9 100644
--- a/net/sunrpc/svc_xprt.c
+++ b/net/sunrpc/svc_xprt.c
@@ -917,16 +917,18 @@ void svc_close_xprt(struct svc_xprt *xprt)
 }
 EXPORT_SYMBOL_GPL(svc_close_xprt);
 
-static void svc_close_list(struct list_head *xprt_list, struct net *net)
+static void svc_close_list(struct svc_serv *serv, struct list_head *xprt_list, 
struct net *net)
 {
struct svc_xprt *xprt;
 
+   spin_lock(&serv->sv_lock);
list_for_each_entry(xprt, xprt_list, xpt_list) {
if (xprt->xpt_net != net)
continue;
set_bit(XPT_CLOSE, &xprt->xpt_flags);
set_bit(XPT_BUSY, &xprt->xpt_flags);
}
+   spin_unlock(&serv->sv_lock);
 }
 
 static void svc_clear_pools(struct svc_serv *serv, struct net *net)
@@ -949,24 +951,28 @@ static void svc_clear_pools(struct svc_serv *serv, struct 
net *net)
}
 }
 
-static void svc_clear_list(struct list_head *xprt_list, struct net *net)
+static void svc_clear_list(struct svc_serv *serv, struct list_head *xprt_list, 
struct net *net)
 {
struct svc_xprt *xprt;
struct svc_xprt *tmp;
+   LIST_HEAD(victims);
 
+   spin_lock(&serv->sv_lock);
list_for_each_entry_safe(xprt, tmp, xprt_list, xpt_list) {
if (xprt->xpt_net != net)
continue;
-   svc_delete_xprt(xprt);
+   list_move(&xprt->xpt_list, &victims);
}
-   list_for_each_entry(xprt, xprt_list, xpt_list)
-   BUG_ON(xprt->xpt_net == net);
+   spin_unlock(&serv->sv_lock);
+
+   list_for_each_entry_safe(xprt, tmp, &victims, xpt_list)
+   svc_delete_xprt(xprt);
 }
 
 void svc_close_net(struct svc_serv *serv, struct net *net)
 {
-   svc_close_list(&serv->sv_tempsocks, net);
-   svc_close_list(&serv->sv_permsocks, net);
+   svc_close_list(serv, &serv->sv_tempsocks, net);
+   svc_close_list(serv, &serv->sv_permsocks, net);
 
svc_clear_pools(serv, net);
/*
@@ -974,8 +980,8 @@ void svc_close_net(struct svc_serv *serv, struct net *net)
 * svc_xprt_enqueue will not add new entries without taking the
 * sp_lock and checking XPT_BUSY.
 */
-   svc_clear_list(&serv->sv_tempsocks, net);
-   svc_clear_list(&serv->sv_permsocks, net);
+   svc_clear_list(serv, &serv->sv_tempsocks, net);
+   svc_clear_list(serv, &serv->sv_permsocks, net);
 }
 
 /*
--
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Please read the FAQ at  http://www.tux.org/lkml/

Re: [PATCH v8 1/5] mm: introduce a common interface for balloon pages mobility

2012-08-21 Thread Michael S. Tsirkin

On Tue, Aug 21, 2012 at 02:28:20PM -0300, Rafael Aquini wrote:
> On Tue, Aug 21, 2012 at 09:24:32AM -0700, Paul E. McKenney wrote:
> > On Tue, Aug 21, 2012 at 05:20:11PM +0200, Peter Zijlstra wrote:
> > > On Tue, 2012-08-21 at 09:47 -0300, Rafael Aquini wrote:
> > > > +   mapping = rcu_access_pointer(page->mapping);
> > > > +   if (mapping)
> > > > +   mapping = mapping->assoc_mapping; 
> > > 
> > > The comment near rcu_access_pointer() explicitly says:
> > > 
> > >  * Return the value of the specified RCU-protected pointer, but omit the
> > >  * smp_read_barrier_depends() and keep the ACCESS_ONCE().  This is useful
> > >  * when the value of this pointer is accessed, but the pointer is not
> > >  * dereferenced,
> > > 
> > > Yet you dereference the pointer... smells like fail to me.
> > 
> > Indeed!
> > 
> > This will break DEC Alpha.  In addition, if ->mapping can transition
> > from non-NULL to NULL, and if you used rcu_access_pointer() rather
> > than rcu_dereference() to avoid lockdep-RCU from yelling at you about
> > not either being in an RCU read-side critical section or holding an
> > update-side lock, you can see failures as follows:
> > 
> > 1.  CPU 0 runs the above code, picks up mapping, and finds it non-NULL.
> > 
> > 2.  CPU 0 is preempted or otherwise delayed.  (Keep in mind that
> > even disabling interrupts in a guest OS does not prevent the
> > host hypervisor from preempting!)
> > 
> > 3.  Some other CPU NULLs page->mapping.  Because CPU 0 isn't doing
> > anything to prevent it, this other CPU frees the memory.
> > 
> > 4.  CPU 0 resumes, and then accesses what is now the freelist.
> > Arbitrarily bad things start happening.
> > 
> > If you are in a read-side critical section, use rcu_dereference() instead
> > of rcu_access_pointer().  If you are holding an update-side lock, use
> > rcu_dereference_protected() and say what lock you are holding.  If you
> > are doing something else, please say what it is.
> > 
> > Thanx, Paul
> >
> Paul & Peter,
> 
> Thanks for looking into this stuff and providing me such valuable feedback, 
> and
> RCU usage crashcourse.
> 
> I believe rcu_dereference_protected() is what I want/need here, since this 
> code
> is always called for pages which we hold locked (PG_locked bit).

It would only help if we locked the page while updating the mapping,
as far as I can see we don't.

> So, it brings me
> to ask you if the following usage looks sane enough to fix the well pointed 
> issue,
> or if it's another misuse of RCU API:
> 
> +   mapping = rcu_dereference_protecetd(page->mapping, PageLocked(page));
> +   if (mapping)
> +   mapping = mapping->assoc_mapping; 
> 
--
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Please read the FAQ at  http://www.tux.org/lkml/

Re: Using random in interrupts for RT

2012-08-21 Thread Ben Hutchings

On Tue, Aug 21, 2012 at 02:12:13PM -0400, Steven Rostedt wrote:
> On Tue, 2012-08-14 at 10:30 -0400, Steven Rostedt wrote:
> > Thomas,
> > 
> > Ben Hutchings asked me if we still need "genirq: Disable random call on
> > preempt-rt" for -rt? With commit 902c098a366 "random: use lockless
> > techniques in the interrupt path" there is no more locks used. But does
> > it still produce high latencies?
> 
> Ben,
> 
> Looks that the #ifndef can't be removed just yet. The code is not
> totally lockless...
> 
> 
> add_interrupt_randomness ->
>   credit_entropy_bits ->
>   kill_fasync ->
>   kill_fasync_rcu ->
>   spin_lock_irqsave(&fa->fa_lock, flags)
> 
> to make things worse, that spinlock turns into a mutex on -rt, so it may
> crash the box if triggered.
 
Thanks for checking this.

Ben.

-- 
Ben Hutchings
We get into the habit of living before acquiring the habit of thinking.
  - Albert Camus
--
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Please read the FAQ at  http://www.tux.org/lkml/

Re: Q:pt_base in COMPAT mode offset by two pages. Was:Re: [Xen-devel] [PATCH 02/11] xen/x86: Use memblock_reserve for sensitive areas.

2012-08-21 Thread Konrad Rzeszutek Wilk

On Tue, Aug 21, 2012 at 01:27:32PM -0400, Konrad Rzeszutek Wilk wrote:
> On Mon, Aug 20, 2012 at 10:13:05AM -0400, Konrad Rzeszutek Wilk wrote:
> > On Fri, Aug 17, 2012 at 06:35:12PM +0100, Stefano Stabellini wrote:
> > > On Thu, 16 Aug 2012, Konrad Rzeszutek Wilk wrote:
> > > > instead of a big memblock_reserve. This way we can be more
> > > > selective in freeing regions (and it also makes it easier
> > > > to understand where is what).
> > > > 
> > > > [v1: Move the auto_translate_physmap to proper line]
> > > > [v2: Per Stefano suggestion add more comments]
> > > > Signed-off-by: Konrad Rzeszutek Wilk 
> > > 
> > > much better now!
> > 
> > Thought interestingly enough it breaks 32-bit dom0s (and only dom0s).
> > Will have a revised patch posted shortly.
> 
> Jan, I thought something odd. Part of this code replaces this:
> 
>   memblock_reserve(__pa(xen_start_info->mfn_list),
>   xen_start_info->pt_base - xen_start_info->mfn_list);
> 
> with a more region-by-region area. What I found out that if I boot this
> as 32-bit guest with a 64-bit hypervisor the xen_start_info->pt_base is
> actually wrong.
> 
> Specifically this is what bootup says:
> 
> (good working case - 32bit hypervisor with 32-bit dom0):
> (XEN)  Loaded kernel: c100->c1a23000
> (XEN)  Init. ramdisk: c1a23000->cf730e00
> (XEN)  Phys-Mach map: cf731000->cf831000
> (XEN)  Start info:cf831000->cf83147c
> (XEN)  Page tables:   cf832000->cf8b5000
> (XEN)  Boot stack:cf8b5000->cf8b6000
> (XEN)  TOTAL: c000->cfc0
> 
> [0.00] PT: cf832000 (f832000)
> [0.00] Reserving PT: f832000->f8b5000
> 
> And with a 64-bit hypervisor:
> 
> XEN) VIRTUAL MEMORY ARRANGEMENT:
> (XEN)  Loaded kernel: c100->c1a23000
> (XEN)  Init. ramdisk: c1a23000->cf730e00
> (XEN)  Phys-Mach map: cf731000->cf831000
> (XEN)  Start info:cf831000->cf8314b4
> (XEN)  Page tables:   cf832000->cf8b6000
> (XEN)  Boot stack:cf8b6000->cf8b7000
> (XEN)  TOTAL: c000->cfc0
> (XEN)  ENTRY ADDRESS: c16bb22c
> 
> [0.00] PT: cf834000 (f834000)
> [0.00] Reserving PT: f834000->f8b8000
> 
> So the pt_base is offset by two pages. And looking at c/s 13257
> its not clear to me why this two page offset was added?
> 
> The toolstack works fine - so launching 32-bit guests either
> under a 32-bit hypervisor or 64-bit works fine:
> ] domainbuilder: detail: xc_dom_alloc_segment:   page tables  : 0xcf805000 -> 
> 0xcf885000  (pfn 0xf805 + 0x80 pages)
> [0.00] PT: cf805000 (f805000)
> 

And this patch on top of the others fixes this..


>From 806c312e50f122c47913145cf884f53dd09d9199 Mon Sep 17 00:00:00 2001
From: Konrad Rzeszutek Wilk 
Date: Tue, 21 Aug 2012 14:31:24 -0400
Subject: [PATCH] xen/x86: Workaround 64-bit hypervisor and 32-bit initial
 domain.

If a 64-bit hypervisor is booted with a 32-bit initial domain,
the hypervisor deals with the initial domain as "compat" and
does some extra adjustments (like pagetables are 4 bytes instead
of 8). It also adjusts the xen_start_info->pt_base incorrectly.

When booted with a 32-bit hypervisor (32-bit initial domain):
..
(XEN)  Start info:cf831000->cf83147c
(XEN)  Page tables:   cf832000->cf8b5000
..
[0.00] PT: cf832000 (f832000)
[0.00] Reserving PT: f832000->f8b5000

And with a 64-bit hypervisor:
(XEN)  Start info:cf831000->cf8314b4
(XEN)  Page tables:   cf832000->cf8b6000

[0.00] PT: cf834000 (f834000)
[0.00] Reserving PT: f834000->f8b8000

To deal with this, we keep keep track of the highest physical
address we have reserved via memblock_reserve. If that address
does not overlap with pt_base, we have a gap which we reserve.

Signed-off-by: Konrad Rzeszutek Wilk 
---
 arch/x86/xen/enlighten.c |   30 +-
 1 files changed, 21 insertions(+), 9 deletions(-)

diff --git a/arch/x86/xen/enlighten.c b/arch/x86/xen/enlighten.c
index e532eb5..511f92d 100644
--- a/arch/x86/xen/enlighten.c
+++ b/arch/x86/xen/enlighten.c
@@ -1002,19 +1002,24 @@ static int xen_write_msr_safe(unsigned int msr, 
unsigned low, unsigned high)
  * If the MFN is not in the m2p (provided to us by the hypervisor) this
  * function won't do anything. In practice this means that the XenBus
  * MFN won't be available for the initial domain. */
-static void __init xen_reserve_mfn(unsigned long mfn)
+static unsigned long __init xen_reserve_mfn(unsigned long mfn)
 {
-   unsigned long pfn;
+   unsigned long pfn, end_pfn = 0;
 
if (!mfn)
-   return;
+   return end_pfn;
+
pfn = mfn_to_pfn(mfn);
-   if (phys_to_machine_mapping_valid(pfn))
-   memblock_reserve(PFN_PHYS(pfn), PAGE_SIZE);
+   if (phys_to_machine_mapping_valid(pfn)) {
+   end_pfn = PFN_PHYS(pfn) + PAGE_SIZE;
+   memblock_reserve(P

Re: [PATCH v8 1/5] mm: introduce a common interface for balloon pages mobility

2012-08-21 Thread Michael S. Tsirkin

On Tue, Aug 21, 2012 at 02:55:03PM -0300, Rafael Aquini wrote:
> On Tue, Aug 21, 2012 at 04:52:23PM +0300, Michael S. Tsirkin wrote:
> > > + * address_space_operations utilized methods for ballooned pages:
> > > + *   .migratepage- used to perform balloon's page migration (as is)
> > > + *   .launder_page   - used to isolate a page from balloon's page list
> > > + *   .freepage   - used to reinsert an isolated page to balloon's 
> > > page list
> > > + */
> > 
> > It would be a good idea to document the assumptions here.
> > Looks like .launder_page and .freepage are called in rcu critical
> > section.
> > But migratepage isn't - why is that safe?
> > 
> 
> The migratepage callback for virtio_balloon can sleep, and IIUC we cannot 
> sleep
> within a RCU critical section. 
> 
> Also, The migratepage callback is called at inner migration's circle function
> move_to_new_page(), and I don't think embedding it in a RCU critical section
> would be a good idea, for the same understanding aforementioned.

Yes but this means it is still exposed to the module unloading
races that RCU was supposed to fix.
So need to either rework that code so it won't sleep
or switch to some other synchronization.

-- 
MST
--
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Please read the FAQ at  http://www.tux.org/lkml/

Re: [PATCH v2 28/31] arm64: Generic timers support

2012-08-21 Thread Christopher Covington

On 08/14/2012 01:52 PM, Catalin Marinas wrote:
> From: Marc Zyngier 
> 
> This patch adds support for the ARM generic timers with A64 instructions
> for accessing the timer registers. It uses the physical counter as the
> clock source and the virtual counter as sched_clock.
> 
> The timer frequency can be specified via DT or read from the CNTFRQ_EL0
> register. The physical counter is also accessible from user space
> allowing fast gettimeofday() implementation.

[...]

> +++ b/drivers/clocksource/arm_generic.c

[...]

> +static void arch_timer_reg_write(int reg, u32 val)
> +{
> + switch (reg) {
> + case ARCH_TIMER_REG_CTRL:
> + asm volatile("msr cntp_ctl_el0,  %0" : : "r" (val));
> + break;
> + case ARCH_TIMER_REG_TVAL:
> + asm volatile("msr cntp_tval_el0, %0" : : "r" (val));
> + break;
> + default:
> + BUG();
> + }
> +
> + isb();
> +}

Doesn't architecture-specific assembly need to go in the arch directory rather
than the drivers directory?

Christopher

-- 
Employee of Qualcomm Innovation Center, Inc.
Qualcomm Innovation Center, Inc. is a member of Code Aurora Forum
--
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Please read the FAQ at  http://www.tux.org/lkml/

Re: lockdep warning on rt_mutex_lock()

2012-08-21 Thread Paul E. McKenney

On Sat, Aug 18, 2012 at 12:59:08PM +0800, Fengguang Wu wrote:
> On Fri, Aug 17, 2012 at 07:44:37AM -0700, Paul E. McKenney wrote:
> > On Fri, Aug 17, 2012 at 10:02:40PM +0800, Fengguang Wu wrote:
> > > On Fri, Aug 17, 2012 at 06:43:28AM -0700, Paul E. McKenney wrote:
> > > > On Fri, Aug 17, 2012 at 06:06:35PM +0800, Fengguang Wu wrote:
> > > > > Greetings,
> > > > > 
> > > > > FYI, a lockdep warning:
> > > > 
> > > > Certainly looks problematic!
> > > > 
> > > > Any hint as to what version of the kernel produced this splat?
> > > > (Yes, lazy of me to ask, I know, but I am not seeing it in my testing.)
> > > 
> > > It happens on both 3.5.0 and 3.6-rc1. Will bisect (try older kernels) 
> > > help?
> > > Bisect is handy for me :)
> > 
> > Bisection would be very welcome!!!  ;-)
> 
> The bisect result is...

Hmmm...  This patch is a bit of a blast from the past.

> commit 9e571a82f0cb205a65a0ea41657f19f22b7fabb8
> Author: Paul E. McKenney 
> Date:   Thu Sep 30 21:26:52 2010 -0700
> 
> rcu: add tracing for TINY_RCU and TINY_PREEMPT_RCU
> 
> Add tracing for the tiny RCU implementations, including statistics on
> boosting in the case of TINY_PREEMPT_RCU and RCU_BOOST.
> 
> Signed-off-by: Paul E. McKenney 
> Signed-off-by: Paul E. McKenney 

So the lockdep complaint indicates that lockdep and the actual hardware
had different opinions about whether or not interrupts were enabled.
One way that can happen is through use of raw_local_irq_save().  And this
commit did add a raw_local_irq_save().

So maybe converting to local_irq_save() will make things work better.

Fengguang, could you please try out the following patch?

Thanx, Paul



rcu: Move TINY_PREEMPT_RCU away from raw_local_irq_save()

The use of raw_local_irq_save() is unnecessary, given that local_irq_save()
really does disable interrupts.  Also, it appears to interfere with lockdep.
Therefore, this commit moves to local_irq_save().

Reported-by: Fengguang Wu 
Signed-off-by: Paul E. McKenney 
Signed-off-by: Paul E. McKenney 

diff --git a/kernel/rcutiny_plugin.h b/kernel/rcutiny_plugin.h
index 918fd1e..3d01902 100644
--- a/kernel/rcutiny_plugin.h
+++ b/kernel/rcutiny_plugin.h
@@ -278,7 +278,7 @@ static int rcu_boost(void)
rcu_preempt_ctrlblk.exp_tasks == NULL)
return 0;  /* Nothing to boost. */
 
-   raw_local_irq_save(flags);
+   local_irq_save(flags);
 
/*
 * Recheck with irqs disabled: all tasks in need of boosting
@@ -287,7 +287,7 @@ static int rcu_boost(void)
 */
if (rcu_preempt_ctrlblk.boost_tasks == NULL &&
rcu_preempt_ctrlblk.exp_tasks == NULL) {
-   raw_local_irq_restore(flags);
+   local_irq_restore(flags);
return 0;
}
 
@@ -317,7 +317,7 @@ static int rcu_boost(void)
t = container_of(tb, struct task_struct, rcu_node_entry);
rt_mutex_init_proxy_locked(&mtx, t);
t->rcu_boost_mutex = &mtx;
-   raw_local_irq_restore(flags);
+   local_irq_restore(flags);
rt_mutex_lock(&mtx);
rt_mutex_unlock(&mtx);  /* Keep lockdep happy. */
 
@@ -991,9 +991,9 @@ static void rcu_trace_sub_qlen(struct rcu_ctrlblk *rcp, int 
n)
 {
unsigned long flags;
 
-   raw_local_irq_save(flags);
+   local_irq_save(flags);
rcp->qlen -= n;
-   raw_local_irq_restore(flags);
+   local_irq_restore(flags);
 }
 
 /*

--
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Please read the FAQ at  http://www.tux.org/lkml/

Re: [PATCH v8 1/5] mm: introduce a common interface for balloon pages mobility

2012-08-21 Thread Rafael Aquini

On Tue, Aug 21, 2012 at 10:13:30PM +0300, Michael S. Tsirkin wrote:
> > 
> > I believe rcu_dereference_protected() is what I want/need here, since this 
> > code
> > is always called for pages which we hold locked (PG_locked bit).
> 
> It would only help if we locked the page while updating the mapping,
> as far as I can see we don't.
>

But we can do it. In fact, by doing it (locking the page) we can easily avoid
the nasty race balloon_isolate_page / leak_balloon, in a much simpler way, IMHO.


--
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Please read the FAQ at  http://www.tux.org/lkml/

RE: [RFC][PATCH v3 2/3] efi_pstore: Introducing workqueue updating sysfs entries

2012-08-21 Thread Seiji Aguchi

> >  efivars_exit(void)
> >  {
> > if (efi_enabled) {
> > +   cancel_work_sync(&efivar_work);
> 
> Please move this cancel_work_sync() to be before the efi_enabled test.
>  efi_enabled here means that we registered __efivars.  There may be another 
> driver (gsmi) using the efivars code, so we should
> always be cancelling this work.
> 

OK. I will fix it.
I confirmed that gsmi accually called register_efivars().
Thank you for letting me know about this.

Seiji
--
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Please read the FAQ at  http://www.tux.org/lkml/

Re: [ 00/46] 3.5.3-stable review

2012-08-21 Thread Sven Joachim

I would like to see commit 2064db725cc6d4ea19a24c138bc37939b63e3ae6
(drm/nv86/fifo: suspend fix) cherry-picked to the 3.5 kernel series, it
makes suspend work again on my machine.  Could you please queue this up
for 3.5.4?

Apologies for thread hijacking, my attempts to directly send the request
to sta...@vger.kernel.org failed (mail seems to have disappeared in the
bit bucket). :-(

Cheers,
   Sven
--
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Please read the FAQ at  http://www.tux.org/lkml/

Re: [tip:timers/core] timer: Implement TIMER_IRQSAFE

2012-08-21 Thread Tejun Heo

On Tue, Aug 21, 2012 at 09:43:07AM -0700, tip-bot for Tejun Heo wrote:
> Commit-ID:  c5f66e99b7cb091e3d51ae8e8156892e8feb7fa3
> Gitweb: http://git.kernel.org/tip/c5f66e99b7cb091e3d51ae8e8156892e8feb7fa3
> Author: Tejun Heo 
> AuthorDate: Wed, 8 Aug 2012 11:10:28 -0700
> Committer:  Thomas Gleixner 
> CommitDate: Tue, 21 Aug 2012 16:28:31 +0200
> 
> timer: Implement TIMER_IRQSAFE
> 
> Timer internals are protected with irq-safe locks but timer execution
> isn't, so a timer being dequeued for execution and its execution
> aren't atomic against IRQs.  This makes it impossible to wait for its
> completion from IRQ handlers and difficult to shoot down a timer from
> IRQ handlers.
> 
> This issue caused some issues for delayed_work interface.  Because
> there's no way to reliably shoot down delayed_work->timer from IRQ
> handlers, __cancel_delayed_work() can't share the logic to steal the
> target delayed_work with cancel_delayed_work_sync(), and can only
> steal delayed_works which are on queued on timer.  Similarly, the
> pending mod_delayed_work() can't be used from IRQ handlers.
> 
> This patch adds a new timer flag TIMER_IRQSAFE, which makes the timer
> to be executed without enabling IRQ after dequeueing such that its
> dequeueing and execution are atomic against IRQ handlers.
> 
> This makes it safe to wait for the timer's completion from IRQ
> handlers, for example, using del_timer_sync().  It can never be
> executing on the local CPU and if executing on other CPUs it won't be
> interrupted until done.
> 
> This will enable simplifying delayed_work cancel/mod interface.
> 
> Signed-off-by: Tejun Heo 
> Cc: torva...@linux-foundation.org
> Cc: pet...@infradead.org
> Link: http://lkml.kernel.org/r/139428-24962-5-git-send-email...@kernel.org
> Signed-off-by: Thomas Gleixner 

Will pull into wq/for-3.7 and put delay_work changes on top.  If
there's any objection, please scream.

Thanks a lot.

-- 
tejun
--
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Please read the FAQ at  http://www.tux.org/lkml/

Re: [PATCH v8 1/5] mm: introduce a common interface for balloon pages mobility

2012-08-21 Thread Michael S. Tsirkin

On Tue, Aug 21, 2012 at 02:42:52PM -0300, Rafael Aquini wrote:
> On Tue, Aug 21, 2012 at 06:41:42PM +0300, Michael S. Tsirkin wrote:
> > On Tue, Aug 21, 2012 at 05:16:06PM +0200, Peter Zijlstra wrote:
> > > On Tue, 2012-08-21 at 16:52 +0300, Michael S. Tsirkin wrote:
> > > > > + rcu_read_lock();
> > > > > + mapping = rcu_dereference(page->mapping);
> > > > > + if (mapping_balloon(mapping))
> > > > > + ret = true;
> > > > > + rcu_read_unlock();
> > > > 
> > > > This looks suspicious: you drop rcu_read_unlock
> > > > so can't page switch from balloon to non balloon? 
> > > 
> > > RCU read lock is a non-exclusive lock, it cannot avoid anything like
> > > that.
> > 
> > You are right, of course. So even keeping rcu_read_lock across both test
> > and operation won't be enough - you need to make this function return
> > the mapping and pass it to isolate_page/putback_page so that it is only
> > dereferenced once.
> >
> No, I need to dereference page->mapping to check ->mapping flags here, before
> returning. Remember this function is used at MM's compaction/migration inner
> circles to identify ballooned pages and decide what's the next step. This
> function is doing the right thing, IMHO.

Yes but the calling code is not doing the right thing.

What Peter pointed out here is that two calls to rcu dereference pointer
can return different values: rcu critical section is not a lock.
So the test for balloon page is not effective: it can change
after the fact.

To fix, get the pointer once and then pass the mapping
around.


> Also, looking at how compaction/migration work, we verify the only critical 
> path
> for this function is the page isolation step. The other steps (migration and
> putback) perform their work on private lists previouly isolated from a given
> source.

I vaguely understand but it would be nice to document this properly.
The interaction between page->lru handling in balloon and in mm
is especially confusing.

> So, we just need to make sure that the isolation part does not screw things up
> by isolating pages that balloon driver is about to release. That's why there 
> are
> so many checkpoints down the page isolation path assuring we really are
> isolating a balloon page. 

Well, testing same thing multiple times is just confusing.  It is very
hard to make sure there are no races with so much complexity,
and the requirements from the balloon driver are unclear to me -
it very much looks like it is poking in mm internals.

-- 
MST
--
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Please read the FAQ at  http://www.tux.org/lkml/

[PATCH v9 0/2] kvm: level irqfd support

2012-08-21 Thread Alex Williamson

Here's the much anticipated re-write of support for level irqfds.  As
Michael suggested, I've rolled the eoi/ack notification fd into
KVM_IRQFD as a new mode.  For lack of a better name, as there seems to
be objections to associating this specifically with an EOI or an ACK,
I've name this OADN or "On Ack, De-assert & Notify".

Patch 1of2 switches current KVM_IRQFDs to use their own IRQ source ID
since we're potentially stepping on KVM_USERSPACE_IRQ_SOURCE_ID.
Unfurtunately I was not able to make 2of2 use a single IRQ source ID,
the reason is it's racy.  Objects to track OADNs are made dynamically,
we look through existing ones for a match under spinlock and setup a
new one if there's no match.  On teardown, we can remove the OADN from
the list under lock, but that same lock prevents us from de-assigning
the IRQ ACK notifier or waiting for an RCU grace period.  We must make
sure that any unused GSI is de-asserted, but the above means it's
possible that another OADN has been created for this source ID/GSI
and de-asserting the GSI could lead to breakage.  Instead each OADN
object gets it's own source ID, but these are all shared by users
of the same GSI.  So for PCI devices, we might have up to 4 IRQ
source IDs allocated.

Michael had also suggested avoiding reference counting and using
list_empty for this OADN object.  Unfortunately, that doesn't work
for similar reasons.  We want to release the OADN object underlock,
preventing others from re-using it on the free path, but in order
to have lock-less de-assert & notify we use RCU, meaning we can't
trust list_empty until after an RCU grace period, which must be
done outside of spinlocks.

If there are suggestions how we can handle these better, please
make them, but I think this compromise is race-free and still
manages to make allocation of IRQ source IDs mostly a non-issue
for device assignment limits.  Thanks,

Alex

---

Alex Williamson (2):
  kvm: On Ack, De-assert & Notify KVM_IRQFD extension
  kvm: Use a reserved IRQ source ID for irqfd


 Documentation/virtual/kvm/api.txt |   13 ++
 arch/x86/kvm/x86.c|4 +
 include/linux/kvm.h   |7 +
 include/linux/kvm_host.h  |2 
 virt/kvm/eventfd.c|  199 -
 5 files changed, 218 insertions(+), 7 deletions(-)
--
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Please read the FAQ at  http://www.tux.org/lkml/

[PATCH v9 1/2] kvm: Use a reserved IRQ source ID for irqfd

2012-08-21 Thread Alex Williamson

KVM_IRQFD currently uses the reserved KVM_USERSPACE_IRQ_SOURCE_ID
which is also shared with userspace injection methods like
KVM_IRQ_LINE.  This can cause a conflict if an irqfd triggers on
a GSI asserted through KVM_IRQ_LINE.  Move irqfd to it's own
reserved IRQ source ID.  Add a capability for userspace to test
for this fix.

Signed-off-by: Alex Williamson 
---

 arch/x86/kvm/x86.c   |3 +++
 include/linux/kvm.h  |1 +
 include/linux/kvm_host.h |1 +
 virt/kvm/eventfd.c   |6 +++---
 4 files changed, 8 insertions(+), 3 deletions(-)

diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c
index 42bce48..cd98673 100644
--- a/arch/x86/kvm/x86.c
+++ b/arch/x86/kvm/x86.c
@@ -2174,6 +2174,7 @@ int kvm_dev_ioctl_check_extension(long ext)
case KVM_CAP_GET_TSC_KHZ:
case KVM_CAP_PCI_2_3:
case KVM_CAP_KVMCLOCK_CTRL:
+   case KVM_CAP_IRQFD_IRQ_SOURCE_ID:
r = 1;
break;
case KVM_CAP_COALESCED_MMIO:
@@ -6258,6 +6259,8 @@ int kvm_arch_init_vm(struct kvm *kvm, unsigned long type)
 
/* Reserve bit 0 of irq_sources_bitmap for userspace irq source */
set_bit(KVM_USERSPACE_IRQ_SOURCE_ID, &kvm->arch.irq_sources_bitmap);
+   /* Reserve bit 1 of irq_sources_bitmap for irqfd irq source */
+   set_bit(KVM_IRQFD_IRQ_SOURCE_ID, &kvm->arch.irq_sources_bitmap);
 
raw_spin_lock_init(&kvm->arch.tsc_write_lock);
 
diff --git a/include/linux/kvm.h b/include/linux/kvm.h
index 2ce09aa..ae66b9c 100644
--- a/include/linux/kvm.h
+++ b/include/linux/kvm.h
@@ -618,6 +618,7 @@ struct kvm_ppc_smmu_info {
 #define KVM_CAP_PPC_GET_SMMU_INFO 78
 #define KVM_CAP_S390_COW 79
 #define KVM_CAP_PPC_ALLOC_HTAB 80
+#define KVM_CAP_IRQFD_IRQ_SOURCE_ID 81
 
 #ifdef KVM_CAP_IRQ_ROUTING
 
diff --git a/include/linux/kvm_host.h b/include/linux/kvm_host.h
index b70b48b..b763230 100644
--- a/include/linux/kvm_host.h
+++ b/include/linux/kvm_host.h
@@ -71,6 +71,7 @@
 #define KVM_REQ_PMI   17
 
 #define KVM_USERSPACE_IRQ_SOURCE_ID0
+#define KVM_IRQFD_IRQ_SOURCE_ID1
 
 struct kvm;
 struct kvm_vcpu;
diff --git a/virt/kvm/eventfd.c b/virt/kvm/eventfd.c
index 7d7e2aa..2245cfa 100644
--- a/virt/kvm/eventfd.c
+++ b/virt/kvm/eventfd.c
@@ -67,8 +67,8 @@ irqfd_inject(struct work_struct *work)
struct _irqfd *irqfd = container_of(work, struct _irqfd, inject);
struct kvm *kvm = irqfd->kvm;
 
-   kvm_set_irq(kvm, KVM_USERSPACE_IRQ_SOURCE_ID, irqfd->gsi, 1);
-   kvm_set_irq(kvm, KVM_USERSPACE_IRQ_SOURCE_ID, irqfd->gsi, 0);
+   kvm_set_irq(kvm, KVM_IRQFD_IRQ_SOURCE_ID, irqfd->gsi, 1);
+   kvm_set_irq(kvm, KVM_IRQFD_IRQ_SOURCE_ID, irqfd->gsi, 0);
 }
 
 /*
@@ -138,7 +138,7 @@ irqfd_wakeup(wait_queue_t *wait, unsigned mode, int sync, 
void *key)
irq = rcu_dereference(irqfd->irq_entry);
/* An event has been signaled, inject an interrupt */
if (irq)
-   kvm_set_msi(irq, kvm, KVM_USERSPACE_IRQ_SOURCE_ID, 1);
+   kvm_set_msi(irq, kvm, KVM_IRQFD_IRQ_SOURCE_ID, 1);
else
schedule_work(&irqfd->inject);
rcu_read_unlock();

--
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Please read the FAQ at  http://www.tux.org/lkml/

[PATCH v9 2/2] kvm: On Ack, De-assert & Notify KVM_IRQFD extension

2012-08-21 Thread Alex Williamson

For VFIO based device assignment we'd like a mechanism to allow level
triggered interrutps to be directly injected into KVM.  KVM_IRQFD
already allows this for edge triggered interrupts, but for level, we
need to watch for acknowledgement of the interrupt from the guest to
provide us a hint when to test the device and allow it to re-assert
if necessary.  To do this, we create a new KVM_IRQFD mode called
"On Ack, De-assert & Notify", or OADN.  In this mode, an interrupt
injection provides only a gsi assertion.  We then hook into the IRQ
ACK notifier, which when triggered de-asserts the gsi and notifies
via another eventfd.  It's then the responsibility of the user to
re-assert the interrupt is service is still required.

Signed-off-by: Alex Williamson 
---

 Documentation/virtual/kvm/api.txt |   13 ++
 arch/x86/kvm/x86.c|1 
 include/linux/kvm.h   |6 +
 include/linux/kvm_host.h  |1 
 virt/kvm/eventfd.c|  193 -
 5 files changed, 210 insertions(+), 4 deletions(-)

diff --git a/Documentation/virtual/kvm/api.txt 
b/Documentation/virtual/kvm/api.txt
index bf33aaa..87d7321 100644
--- a/Documentation/virtual/kvm/api.txt
+++ b/Documentation/virtual/kvm/api.txt
@@ -1946,6 +1946,19 @@ the guest using the specified gsi pin.  The irqfd is 
removed using
 the KVM_IRQFD_FLAG_DEASSIGN flag, specifying both kvm_irqfd.fd
 and kvm_irqfd.gsi.
 
+With KVM_CAP_IRQFD_OADN, KVM_IRQFD supports an "On Ack, De-assert &
+Notify" option that allows emulation of level-triggered interrupts.
+When kvm_irqfd.fd is triggered, the requested gsi is asserted and
+remains asserted until interaction with the irqchip indicates the
+VM has acknowledged the interrupt, such as an EOI.  On acknoledgement
+the gsi is automatically de-asserted and the user is notified via
+kvm_irqfd.notifyfd.  The user is then required to re-assert the
+interrupt if the associated device still requires service.  To enable
+this mode, configure the KVM_IRQFD using the KVM_IRQFD_FLAG_OADN flag
+and specify kvm_irqfd.notifyfd.  Note that closing kvm_irqfd.notifyfd
+while configured in this mode does not disable the irqfd.  The
+KVM_IRQFD_FLAG_OADN flag is only necessary on assignment.
+
 4.76 KVM_PPC_ALLOCATE_HTAB
 
 Capability: KVM_CAP_PPC_ALLOC_HTAB
diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c
index cd98673..fde7b66 100644
--- a/arch/x86/kvm/x86.c
+++ b/arch/x86/kvm/x86.c
@@ -2175,6 +2175,7 @@ int kvm_dev_ioctl_check_extension(long ext)
case KVM_CAP_PCI_2_3:
case KVM_CAP_KVMCLOCK_CTRL:
case KVM_CAP_IRQFD_IRQ_SOURCE_ID:
+   case KVM_CAP_IRQFD_OADN:
r = 1;
break;
case KVM_CAP_COALESCED_MMIO:
diff --git a/include/linux/kvm.h b/include/linux/kvm.h
index ae66b9c..ec0f1d8 100644
--- a/include/linux/kvm.h
+++ b/include/linux/kvm.h
@@ -619,6 +619,7 @@ struct kvm_ppc_smmu_info {
 #define KVM_CAP_S390_COW 79
 #define KVM_CAP_PPC_ALLOC_HTAB 80
 #define KVM_CAP_IRQFD_IRQ_SOURCE_ID 81
+#define KVM_CAP_IRQFD_OADN 82
 
 #ifdef KVM_CAP_IRQ_ROUTING
 
@@ -684,12 +685,15 @@ struct kvm_xen_hvm_config {
 #endif
 
 #define KVM_IRQFD_FLAG_DEASSIGN (1 << 0)
+/* Availabie with KVM_CAP_IRQFD_OADN */
+#define KVM_IRQFD_FLAG_OADN (1 << 1)
 
 struct kvm_irqfd {
__u32 fd;
__u32 gsi;
__u32 flags;
-   __u8  pad[20];
+   __u32 notifyfd;
+   __u8  pad[16];
 };
 
 struct kvm_clock_data {
diff --git a/include/linux/kvm_host.h b/include/linux/kvm_host.h
index b763230..d502d08 100644
--- a/include/linux/kvm_host.h
+++ b/include/linux/kvm_host.h
@@ -284,6 +284,7 @@ struct kvm {
struct {
spinlock_tlock;
struct list_head  items;
+   struct list_head  oadns;
} irqfds;
struct list_head ioeventfds;
 #endif
diff --git a/virt/kvm/eventfd.c b/virt/kvm/eventfd.c
index 2245cfa..dfdb5b2 100644
--- a/virt/kvm/eventfd.c
+++ b/virt/kvm/eventfd.c
@@ -43,6 +43,23 @@
  * 
  */
 
+/*
+ * OADN irqfds (On Ack, De-assert & Notify) are a special variety of
+ * irqfds that assert an interrupt to the irqchip on eventfd trigger,
+ * receieve notification when userspace acknowledges the interrupt,
+ * automatically de-asserts the irqchip level, and notifies userspace
+ * via the oadn_eventfd.  This object helps to provide one-to-many
+ * deassert-to-notify so we can share a single irq source ID per OADN.
+ */
+struct _irqfd_oadn {
+   struct kvm *kvm;
+   int irq_source_id; /* IRQ source ID shared by these irqfds */
+   struct list_head irqfds; /* list of irqfds using this object */
+   struct kvm_irq_ack_notifier notifier; /* IRQ ACK notification */
+   struct kref kref; /* Race-free removal */
+   struct list_head list;
+};
+
 struct _irqfd {
/* Used for MSI fast-path */
struct kvm *kvm;
@@ -52,6 +69,10 @@ struct _irqfd {
/* Used for l

Re: [PATCH v8 1/5] mm: introduce a common interface for balloon pages mobility

2012-08-21 Thread Michael S. Tsirkin

On Tue, Aug 21, 2012 at 04:23:58PM -0300, Rafael Aquini wrote:
> On Tue, Aug 21, 2012 at 10:13:30PM +0300, Michael S. Tsirkin wrote:
> > > 
> > > I believe rcu_dereference_protected() is what I want/need here, since 
> > > this code
> > > is always called for pages which we hold locked (PG_locked bit).
> > 
> > It would only help if we locked the page while updating the mapping,
> > as far as I can see we don't.
> >
> 
> But we can do it. In fact, by doing it (locking the page) we can easily avoid
> the nasty race balloon_isolate_page / leak_balloon, in a much simpler way, 
> IMHO.

Absolutely. Further, we should look hard at whether most RCU uses
in this patchset can be replaced with page lock.

-- 
MST
--
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Please read the FAQ at  http://www.tux.org/lkml/

Re: [PATCH 2/2] spi: Add SPI master controller for OCTEON SOCs.

2012-08-21 Thread David Daney


On 05/19/2012 10:46 PM, Grant Likely wrote:

On Fri, 11 May 2012 14:34:46 -0700, David Daney  wrote:

From: David Daney 

Add the driver, link it into the kbuild system and provide device tree
binding documentation.

Signed-off-by: David Daney 


Some comments below, but you can add my a-b:

Acked-by: Grant Likely 


[...]

+   p->register_base = (u64)devm_ioremap(&pdev->dev, res_mem->start,
+resource_size(res_mem));


Nasty cast.  p->register_base needs to be an __iomem pointer
variable.


No, it is only ever used as an argument to cvmx_{read,write}_csr(),
which want the u64 type.


 The fact taht cvmx_read_csr accepts a uint64_t instead of
an __iomem pointer looks really wrong.  Why is it written that way?



Register addresses on OCTEON are 64-bits wide.  In a 32-bit kernel,
pointers are only 32-bits wide.  Thus was born the cvmx_read_csr()
function that takes a u64 address.

We no longer support 32-bit kernels, but the legacy of the interface
lives on.

David Daney

--
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Please read the FAQ at  http://www.tux.org/lkml/

Re: [PATCH v8 1/5] mm: introduce a common interface for balloon pages mobility

2012-08-21 Thread Rafael Aquini

On Tue, Aug 21, 2012 at 10:16:12PM +0300, Michael S. Tsirkin wrote:
> On Tue, Aug 21, 2012 at 02:55:03PM -0300, Rafael Aquini wrote:
> > On Tue, Aug 21, 2012 at 04:52:23PM +0300, Michael S. Tsirkin wrote:
> > > > + * address_space_operations utilized methods for ballooned pages:
> > > > + *   .migratepage- used to perform balloon's page migration (as is)
> > > > + *   .launder_page   - used to isolate a page from balloon's page list
> > > > + *   .freepage   - used to reinsert an isolated page to balloon's 
> > > > page list
> > > > + */
> > > 
> > > It would be a good idea to document the assumptions here.
> > > Looks like .launder_page and .freepage are called in rcu critical
> > > section.
> > > But migratepage isn't - why is that safe?
> > > 
> > 
> > The migratepage callback for virtio_balloon can sleep, and IIUC we cannot 
> > sleep
> > within a RCU critical section. 
> > 
> > Also, The migratepage callback is called at inner migration's circle 
> > function
> > move_to_new_page(), and I don't think embedding it in a RCU critical section
> > would be a good idea, for the same understanding aforementioned.
> 
> Yes but this means it is still exposed to the module unloading
> races that RCU was supposed to fix.
> So need to either rework that code so it won't sleep
> or switch to some other synchronization.
>
Can you refactor tell_host() to not sleep? Or, can I get rid of calling it at
virtballoon_migratepage()? If 'no' is the answer for both questions, that's the
way that code has to remain, even if we find a way around to hack the
migratepage callback and have it embedded into a RCU crit section.

That's why I believe once the balloon driver is commanded to unload, we must
flag virtballoon_migratepage to skip it's work. By doing this, the thread
performing memory compaction will have to recur to the 'putback' path which is
RCU protected. (IMHO).

As the module will not uload utill it leaks all pages on its list, that unload
race you pointed before will be covered.

--
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Please read the FAQ at  http://www.tux.org/lkml/

Re: inux-next: Tree for Aug 21 (usb_speed_string)

2012-08-21 Thread Guenter Roeck

On Tue, Aug 21, 2012 at 11:06:39AM -0700, Randy Dunlap wrote:
> On 08/20/2012 11:04 PM, Stephen Rothwell wrote:
> 
> > Hi all,
> > 
> > Changes since 20120820:
> > 
> 
> 
> on x86_64:
> 
> ERROR: "usb_speed_string" [drivers/usb/core/usbcore.ko] undefined!
> 
> 
> CONFIG_USB_SUPPORT is not enabled
> (but many other USB drivers are enabled).
> 

Caused by

config IR_TTUSBIR

which selects USB but does not have the necessary dependency on 
USB_ARCH_HAS_HCD.

Introduced by commit 0938069fa08970f1c898970c1331a029efe9a1ce "[media] rc: Add
support for the TechnoTrend USB IR Receiver".

Fixing this exposes

ERROR: "usb_kill_urb" [drivers/media/usb/gspca/gspca_main.ko] undefined!
ERROR: "usb_set_interface" [drivers/media/usb/gspca/gspca_main.ko] undefined!
ERROR: "usb_free_coherent" [drivers/media/usb/gspca/gspca_main.ko] undefined!
ERROR: "usb_submit_urb" [drivers/media/usb/gspca/gspca_main.ko] undefined!
ERROR: "usb_clear_halt" [drivers/media/usb/gspca/gspca_main.ko] undefined!
ERROR: "usb_ifnum_to_if" [drivers/media/usb/gspca/gspca_main.ko] undefined!
ERROR: "usb_alloc_coherent" [drivers/media/usb/gspca/gspca_main.ko] undefined!
ERROR: "usb_free_urb" [drivers/media/usb/gspca/gspca_main.ko] undefined!
ERROR: "usb_alloc_urb" [drivers/media/usb/gspca/gspca_main.ko] undefined!

which I think is due to CONFIG_USB_GSPCA=m, but I have no idea how that can be
enabled w/o USB support.

Guenter
--
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Please read the FAQ at  http://www.tux.org/lkml/

Re: [PATCH 3/3] audit: clean up refcounting in audit-tree

2012-08-21 Thread Linus Torvalds

On Tue, Aug 21, 2012 at 12:03 PM, Miklos Szeredi  wrote:
> +   /*
> +* We are guaranteed to have at least one reference to the mark from
> +* either the inode or the caller of fsnotify_destroy_mark().
> +*/
> +   BUG_ON(atomic_read(&entry->refcnt) < 1);

I pulled, but *please* don't use BUG_ON() as some kind of "let's
assert some random crap" thing. We've literally had DoS security
issues due to code having BUG_ON()'s and killing the machine, and
BUG_ON() often makes things *worse* if it ends up happening in irq
context or with some critical lock held, and then the machine is just
dead with no logging and no messages left anywhere.

So before adding a BUG_ON(), you should ask yourself the following questions:

 (a) is this something I need to even test?

 There are lots of rules we have in the kernel. We don't add
BUG_ON() for each and every one of them. Is it such a critical data
structure that I really need to test for that condition that should
never happen?

 (b) Is this data structure *so* central that I need to immediately
kill everything, or do I just want it logged?

 If it's just a "I want people to know about it, but I don't
expect it to happen, I'm just adding a debug thing to make sure", then
WARN_ON_ONCE() is likely the right thing. It's *more* likely to get
reported, exactly because the machine is more likely to survive a
WARN_ON_ONCE().

 (c) am I sure that none of the callers hold any central locks that
make the BUG_ON() be worse than the alternatives?

BUG_ON() is really drastic. Some machines will reboot on bugs. Others
will halt. And a even the common ones that are just set to kill the
particular process can effectively kill the whole machine due to locks
or preemption counts etc that never get released.

The kind of place that deserves a BUG_ON() is some really *central*
code where you have major issues, and there's just not anything you
can do to continue. If somebody passes kfree() a bad pointer, there's
just nothing kfree() can sanely do about it. If somebody does a
list_del() with list debugging enabled, and it notices that the list
pointer are crap, what are you going to do? You can't continue.

But some random data structure that has the wrong refcount? If you
*can* return with a warning (and ONCE, at that, so that not only does
it get logged, the log doesn't get spammed and useless because it gets
too big), that's likely what you should do.

And this is *doubly* true if it's a patch in the -rc series and you
added the code because you weren't sure you tested all possible random
cases. Don't potentially kill the machine because you weren't sure you
got all cases!

Linus
--
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Please read the FAQ at  http://www.tux.org/lkml/

[PATCH RESEND] compat_ioctl: Add RS-485 IOCTLs to the list

2012-08-21 Thread Jaeden Amero

The RS-485 TIOCSRS485 and TIOCGRS485 ioctls are 32-bit compatible, so
in order to call them on 64-bit systems from 32-bit user mode, we add
them to the ioctl pointer list as compatible ioctls.

Signed-off-by: Jaeden Amero 
---
 fs/compat_ioctl.c | 2 ++
 1 file changed, 2 insertions(+)

diff --git a/fs/compat_ioctl.c b/fs/compat_ioctl.c
index 
debdfe0fc809edfd01ac4a72a0eaf2753efc993d..85dfebfe6820856dc3154dfd178acb6fca63bbe9
 100644
--- a/fs/compat_ioctl.c
+++ b/fs/compat_ioctl.c
@@ -866,6 +866,8 @@ COMPATIBLE_IOCTL(TIOCGPTN)
 COMPATIBLE_IOCTL(TIOCSPTLCK)
 COMPATIBLE_IOCTL(TIOCSERGETLSR)
 COMPATIBLE_IOCTL(TIOCSIG)
+COMPATIBLE_IOCTL(TIOCSRS485)
+COMPATIBLE_IOCTL(TIOCGRS485)
 #ifdef TCGETS2
 COMPATIBLE_IOCTL(TCGETS2)
 COMPATIBLE_IOCTL(TCSETS2)
-- 
1.7.12

--
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Please read the FAQ at  http://www.tux.org/lkml/

[RFC 5/5 v2] uprobes: add global breakpoints

2012-08-21 Thread Sebastian Andrzej Siewior

By setting an uprobe tracepoint, one learns whenever a certain point
within a program is reached / passed. This is recorded and the
application continues.
This patch adds the ability to hold the program once this point has been
passed and the user may attach to the program via ptrace.
First, setup a global breakpoint which is very similar to a uprobe trace
point:

|echo 'g /home/bigeasy/uprobetest/sample:0x044d %ip %ax %bx' > uprobe_events

This is exactly what uprobe does except that it starts with the letter
'g' instead of 'p'.

Step two is to enable it:
|echo 1 > events/uprobes/enable

Step three is to add pids of prcocess which are excluded from global
breakpoints even if the process would hit one. This should ensure that
the debugger remains active and the global breakpoint on system libc's
malloc() does not freeze the system. A pid can be excluded by
| echo e $pid > uprobe_gb_exclude

You need atleast one pid in the exlude list. An entry can be removed by
| echo a $pid > uprobe_gb_exclude

Lets assume you execute ./sample and the breakpoint is hit. In ps you will
see:
|1938 pts/1t+ 0:00 ./sample

Now you can attach gdb via 'gdb -p 1938'. The gdb now can interact with
the tracee and inspect its registers or its stack, single step, let it
runâŠ
In case the process is not of great interest, the user may continue
without gdb by writting its pid into the uprobe_gp_wakeup file

|echo 1938 > uprobe_gp_wakeup

Cc: gdb-patc...@sourceware.org
Signed-off-by: Sebastian Andrzej Siewior 
---
v1..v2: 
 - closed the window between set state / check state
 - tried to address Peters review / concern:
   - added "uprobe_gb_exclude". This file contains a list of pids which
 are excluded from the "global breakpoint" behavior. The idea is to
 whitelist programs which are essential and must not hit a
 breakpoint. An empty list is invalid and _no_ global breakpoint will
 hit.
   - added "uprobe_gb_active". This file contains a list of pids which
 hit the global breakpoint. The user can poll() here and wait for
 the next victim. The size of the list limited. This is step two to
 ensure a global system lock up does not occur. If a java program is
 beeing debugged and the size of the list is too small then the list
 could be allocated at runtime with more entries.

   I've been thinking about alterntives to the approach above:
   - cgroups
 Would solve some problems. It would be very easy for the user to
 group tasks in two groups: "root" group with "allowed" tasks and
 sub group "excluded" for tasks which are excluded from the global
 breakpoint(s). A third group would be required to put the "halted"
 tasks. I would need one file to set the type of the group (root is
 easy, "allowed" and "halted" have to be set). The notification
 mechanism works on per file basis. So I would have to add file with
 no content just to let the user that the task file has new entries.
 All in all this looks like a abuse of cgroups just to follow forks
 on the exclude list and maintain the list.
   - auto exclude the read()er / poll()er of uprobe_gb_active
 This sounds lovely but has two short commings:
 - the pid of the process that opened it may change after fork()
   since the initial owner may exit
 - they may be two+ childs after fork() which read() / poll(). Both
   should be excluded since I don't kwnow which one is which. I don't
   know which one terminates because ->release() is called by last
   process that closes the fd. That means in this scenario I would
   add more entries to the while list than remove.
 - having a list of tasks which currently poll() the file would
   solve the problem with this endless growing list. However once
   poll() is done (one process just hit the global breakpoint) I have
   an empty list since no one can poll() now. That means that I
   would exclude every further process which hits the global breakpoint
   before someone poll()s again.

Oleg: The change in ptrace_attach() is still as it was. I tried to
address Peter concern here.
Now what options do I have here:
- not putting the task in TASK_TRACED but simply halt. This would work
  without a change to ptrace_attach() but the task continues on any
  signal. So a signal friendly task would continue and not notice a
  thing.
- putting the TASK_TRACED and not touching ptrace_attach(). Each
  ptrace() user would have to kick the task itself which means changes
  to gdb / strace. If this is the prefered way then I guess it can be
  done :)

 include/linux/uprobes.h |   10 ++
 kernel/events/uprobes.c |   13 +-
 kernel/ptrace.c |4 +-
 kernel/trace/trace_uprobe.c |  414 ++-
 4 files changed, 435 insertions(+), 6 deletions(-)

diff --git a/include/linux/uprobes.h b/include/linux/uprobes.h
index 0fc6585..991a665 100644
--- a/include/linux/uprobes.h
+++ b/

Re: [2/2] spi: Add SPI master controller for OCTEON SOCs.

2012-08-21 Thread Guenter Roeck

On Fri, May 11, 2012 at 08:34:46PM -, David Daney wrote:
> From: David Daney 
> 
> Add the driver, link it into the kbuild system and provide device tree
> binding documentation.
> 
> Signed-off-by: David Daney 
> Acked-by: Grant Likely 
> 
[ ... ]

> +
> +static int __devexit octeon_spi_remove(struct platform_device *pdev)
> +{
> + struct octeon_spi *p = platform_get_drvdata(pdev);
> + struct spi_master *master = p->my_master;
> +
> + spi_unregister_master(master);
> +

I know it is a bit late, but ...

The call to spi_unregister_master() frees the memory associated with master,
ie 'p', and the spi_master_put() below without matching spi_master_get() is
unnecessary/wrong. One possible fix would be to use 

struct spi_master *master = spi_master_get(p->my_master);

above. That protects master and p while it is still being used, and makes use
of the call to spi_master_put() below. Another option might be to move
cvmx_write_csr() ahead of the call to spi_unregister_master() and drop the
call to spi_master_put().

Guenter

> + /* Clear the CSENA* and put everything in a known state. */
> + cvmx_write_csr(p->register_base + OCTEON_SPI_CFG, 0);
> + spi_master_put(master);
> + return 0;
> +}
> +
--
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Please read the FAQ at  http://www.tux.org/lkml/

Re: [PATCH v9 1/2] kvm: Use a reserved IRQ source ID for irqfd

2012-08-21 Thread Michael S. Tsirkin

On Tue, Aug 21, 2012 at 01:29:06PM -0600, Alex Williamson wrote:
> KVM_IRQFD currently uses the reserved KVM_USERSPACE_IRQ_SOURCE_ID
> which is also shared with userspace injection methods like
> KVM_IRQ_LINE.  This can cause a conflict if an irqfd triggers on
> a GSI asserted through KVM_IRQ_LINE.

What kind of conflict do you envision?  Pls note level interrupts are
unsupported ATM.

> Move irqfd to it's own reserved IRQ source ID.  Add a capability for
> userspace to test for this fix.
> 
> Signed-off-by: Alex Williamson 
> ---
> 
>  arch/x86/kvm/x86.c   |3 +++
>  include/linux/kvm.h  |1 +
>  include/linux/kvm_host.h |1 +
>  virt/kvm/eventfd.c   |6 +++---
>  4 files changed, 8 insertions(+), 3 deletions(-)
> 
> diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c
> index 42bce48..cd98673 100644
> --- a/arch/x86/kvm/x86.c
> +++ b/arch/x86/kvm/x86.c
> @@ -2174,6 +2174,7 @@ int kvm_dev_ioctl_check_extension(long ext)
>   case KVM_CAP_GET_TSC_KHZ:
>   case KVM_CAP_PCI_2_3:
>   case KVM_CAP_KVMCLOCK_CTRL:
> + case KVM_CAP_IRQFD_IRQ_SOURCE_ID:
>   r = 1;
>   break;
>   case KVM_CAP_COALESCED_MMIO:
> @@ -6258,6 +6259,8 @@ int kvm_arch_init_vm(struct kvm *kvm, unsigned long 
> type)
>  
>   /* Reserve bit 0 of irq_sources_bitmap for userspace irq source */
>   set_bit(KVM_USERSPACE_IRQ_SOURCE_ID, &kvm->arch.irq_sources_bitmap);
> + /* Reserve bit 1 of irq_sources_bitmap for irqfd irq source */
> + set_bit(KVM_IRQFD_IRQ_SOURCE_ID, &kvm->arch.irq_sources_bitmap);
>  
>   raw_spin_lock_init(&kvm->arch.tsc_write_lock);
>  
> diff --git a/include/linux/kvm.h b/include/linux/kvm.h
> index 2ce09aa..ae66b9c 100644
> --- a/include/linux/kvm.h
> +++ b/include/linux/kvm.h
> @@ -618,6 +618,7 @@ struct kvm_ppc_smmu_info {
>  #define KVM_CAP_PPC_GET_SMMU_INFO 78
>  #define KVM_CAP_S390_COW 79
>  #define KVM_CAP_PPC_ALLOC_HTAB 80
> +#define KVM_CAP_IRQFD_IRQ_SOURCE_ID 81
>  
>  #ifdef KVM_CAP_IRQ_ROUTING
>  
> diff --git a/include/linux/kvm_host.h b/include/linux/kvm_host.h
> index b70b48b..b763230 100644
> --- a/include/linux/kvm_host.h
> +++ b/include/linux/kvm_host.h
> @@ -71,6 +71,7 @@
>  #define KVM_REQ_PMI   17
>  
>  #define KVM_USERSPACE_IRQ_SOURCE_ID  0
> +#define KVM_IRQFD_IRQ_SOURCE_ID  1
>  
>  struct kvm;
>  struct kvm_vcpu;

Above looks fine but I'm not sure why is the below needed.
This changes irqfd behaviour for edge GSIs slightly
in a userspace-visible way. Maybe make it a separate patch
so it can be considered on merits?

> diff --git a/virt/kvm/eventfd.c b/virt/kvm/eventfd.c
> index 7d7e2aa..2245cfa 100644
> --- a/virt/kvm/eventfd.c
> +++ b/virt/kvm/eventfd.c
> @@ -67,8 +67,8 @@ irqfd_inject(struct work_struct *work)
>   struct _irqfd *irqfd = container_of(work, struct _irqfd, inject);
>   struct kvm *kvm = irqfd->kvm;
>  
> - kvm_set_irq(kvm, KVM_USERSPACE_IRQ_SOURCE_ID, irqfd->gsi, 1);
> - kvm_set_irq(kvm, KVM_USERSPACE_IRQ_SOURCE_ID, irqfd->gsi, 0);
> + kvm_set_irq(kvm, KVM_IRQFD_IRQ_SOURCE_ID, irqfd->gsi, 1);
> + kvm_set_irq(kvm, KVM_IRQFD_IRQ_SOURCE_ID, irqfd->gsi, 0);
>  }
>  
>  /*
> @@ -138,7 +138,7 @@ irqfd_wakeup(wait_queue_t *wait, unsigned mode, int sync, 
> void *key)
>   irq = rcu_dereference(irqfd->irq_entry);
>   /* An event has been signaled, inject an interrupt */
>   if (irq)
> - kvm_set_msi(irq, kvm, KVM_USERSPACE_IRQ_SOURCE_ID, 1);
> + kvm_set_msi(irq, kvm, KVM_IRQFD_IRQ_SOURCE_ID, 1);
>   else
>   schedule_work(&irqfd->inject);
>   rcu_read_unlock();
--
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Please read the FAQ at  http://www.tux.org/lkml/

RE: [PATCH 04/11] x86/microcode_core_early.c: Define interfaces for early load ucode

2012-08-21 Thread Yu, Fenghua

> -Original Message-
> From: Borislav Petkov [mailto:b...@amd64.org]
> Sent: Monday, August 20, 2012 1:20 PM
> To: H. Peter Anvin
> Cc: Yu, Fenghua; Henrique de Moraes Holschuh; Ingo Molnar; Thomas
> Gleixner; Mallick, Asit K; Tigran Aivazian; Andreas Herrmann; Borislav
> Petkov; linux-kernel; x86
> Subject: Re: [PATCH 04/11] x86/microcode_core_early.c: Define
> interfaces for early load ucode
> 
> On Mon, Aug 20, 2012 at 01:08:49PM -0700, H. Peter Anvin wrote:
> > On 08/20/2012 07:06 AM, Borislav Petkov wrote:
> > >
> > > Or,
> > >
> > > in case we want to supply more vendor-specific stuff early at boot,
> we
> > > could do:
> > >
> > > kernel/x86//microcode...
> > >   |-> bios_overrides
> > >   |-> ...
> > >
> > > and have this layout extensible from the beginning...
> > >
> > Does that make sense, though?
> 
> Only time will tell. I was simply saying that we should leave ourselves
> the door opened, should we need functionality like that in the future.
> 
> > I'm a bit concerned about having multiple files named microcode.bin
> by
> > default; the pathname isn't as sticky as the filename when people
> move
> > things around...
> 
> Ok, I see.
> 
> How about the following scheme then:
> 
> kernel/x86/-microcode.bin
> kernel/x86/-bios-overrides.blob
> ...
> 
> ?
> 
> All I'm saying is maybe we should impose some sanity rules now before
> people go crazy with this and things get out of hands...

We might name the cpio directory as:

kernel/x86/microcode/GenuineIntel.bin
kernel/x86/microcode/AuthenticAMD.bin
kernel/x86/acpi/...
etc.

This is expendable for the future usage.
 
Plus I will add a doc on the cpio directory, supported directory names and how 
to add new stuffs in the directory.

Thanks.

-Fenghua


Thanks.

-Fenghua
--
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Please read the FAQ at  http://www.tux.org/lkml/

Re: [PATCH v9 1/2] kvm: Use a reserved IRQ source ID for irqfd

2012-08-21 Thread Alex Williamson

On Tue, 2012-08-21 at 22:58 +0300, Michael S. Tsirkin wrote:
> On Tue, Aug 21, 2012 at 01:29:06PM -0600, Alex Williamson wrote:
> > KVM_IRQFD currently uses the reserved KVM_USERSPACE_IRQ_SOURCE_ID
> > which is also shared with userspace injection methods like
> > KVM_IRQ_LINE.  This can cause a conflict if an irqfd triggers on
> > a GSI asserted through KVM_IRQ_LINE.
> 
> What kind of conflict do you envision?  Pls note level interrupts are
> unsupported ATM.

If KVM_IRQ_LINE asserts a level interrupt and KVM_IRQFD triggers on the
same GSI then the pin is no longer asserted as userspace thinks it is.
Do we just chalk this up to userspace error?

> > Move irqfd to it's own reserved IRQ source ID.  Add a capability for
> > userspace to test for this fix.
> > 
> > Signed-off-by: Alex Williamson 
> > ---
> > 
> >  arch/x86/kvm/x86.c   |3 +++
> >  include/linux/kvm.h  |1 +
> >  include/linux/kvm_host.h |1 +
> >  virt/kvm/eventfd.c   |6 +++---
> >  4 files changed, 8 insertions(+), 3 deletions(-)
> > 
> > diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c
> > index 42bce48..cd98673 100644
> > --- a/arch/x86/kvm/x86.c
> > +++ b/arch/x86/kvm/x86.c
> > @@ -2174,6 +2174,7 @@ int kvm_dev_ioctl_check_extension(long ext)
> > case KVM_CAP_GET_TSC_KHZ:
> > case KVM_CAP_PCI_2_3:
> > case KVM_CAP_KVMCLOCK_CTRL:
> > +   case KVM_CAP_IRQFD_IRQ_SOURCE_ID:
> > r = 1;
> > break;
> > case KVM_CAP_COALESCED_MMIO:
> > @@ -6258,6 +6259,8 @@ int kvm_arch_init_vm(struct kvm *kvm, unsigned long 
> > type)
> >  
> > /* Reserve bit 0 of irq_sources_bitmap for userspace irq source */
> > set_bit(KVM_USERSPACE_IRQ_SOURCE_ID, &kvm->arch.irq_sources_bitmap);
> > +   /* Reserve bit 1 of irq_sources_bitmap for irqfd irq source */
> > +   set_bit(KVM_IRQFD_IRQ_SOURCE_ID, &kvm->arch.irq_sources_bitmap);
> >  
> > raw_spin_lock_init(&kvm->arch.tsc_write_lock);
> >  
> > diff --git a/include/linux/kvm.h b/include/linux/kvm.h
> > index 2ce09aa..ae66b9c 100644
> > --- a/include/linux/kvm.h
> > +++ b/include/linux/kvm.h
> > @@ -618,6 +618,7 @@ struct kvm_ppc_smmu_info {
> >  #define KVM_CAP_PPC_GET_SMMU_INFO 78
> >  #define KVM_CAP_S390_COW 79
> >  #define KVM_CAP_PPC_ALLOC_HTAB 80
> > +#define KVM_CAP_IRQFD_IRQ_SOURCE_ID 81
> >  
> >  #ifdef KVM_CAP_IRQ_ROUTING
> >  
> > diff --git a/include/linux/kvm_host.h b/include/linux/kvm_host.h
> > index b70b48b..b763230 100644
> > --- a/include/linux/kvm_host.h
> > +++ b/include/linux/kvm_host.h
> > @@ -71,6 +71,7 @@
> >  #define KVM_REQ_PMI   17
> >  
> >  #define KVM_USERSPACE_IRQ_SOURCE_ID0
> > +#define KVM_IRQFD_IRQ_SOURCE_ID1
> >  
> >  struct kvm;
> >  struct kvm_vcpu;
> 
> Above looks fine but I'm not sure why is the below needed.
> This changes irqfd behaviour for edge GSIs slightly
> in a userspace-visible way. Maybe make it a separate patch
> so it can be considered on merits?

Hmm, the above does nothing without the below.  I thought I was just
implementing your idea that IRQFDs should all share a single IRQ source
ID... why is that no longer a good idea?  Thanks,

Alex

> > diff --git a/virt/kvm/eventfd.c b/virt/kvm/eventfd.c
> > index 7d7e2aa..2245cfa 100644
> > --- a/virt/kvm/eventfd.c
> > +++ b/virt/kvm/eventfd.c
> > @@ -67,8 +67,8 @@ irqfd_inject(struct work_struct *work)
> > struct _irqfd *irqfd = container_of(work, struct _irqfd, inject);
> > struct kvm *kvm = irqfd->kvm;
> >  
> > -   kvm_set_irq(kvm, KVM_USERSPACE_IRQ_SOURCE_ID, irqfd->gsi, 1);
> > -   kvm_set_irq(kvm, KVM_USERSPACE_IRQ_SOURCE_ID, irqfd->gsi, 0);
> > +   kvm_set_irq(kvm, KVM_IRQFD_IRQ_SOURCE_ID, irqfd->gsi, 1);
> > +   kvm_set_irq(kvm, KVM_IRQFD_IRQ_SOURCE_ID, irqfd->gsi, 0);
> >  }
> >  
> >  /*
> > @@ -138,7 +138,7 @@ irqfd_wakeup(wait_queue_t *wait, unsigned mode, int 
> > sync, void *key)
> > irq = rcu_dereference(irqfd->irq_entry);
> > /* An event has been signaled, inject an interrupt */
> > if (irq)
> > -   kvm_set_msi(irq, kvm, KVM_USERSPACE_IRQ_SOURCE_ID, 1);
> > +   kvm_set_msi(irq, kvm, KVM_IRQFD_IRQ_SOURCE_ID, 1);
> > else
> > schedule_work(&irqfd->inject);
> > rcu_read_unlock();



--
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Please read the FAQ at  http://www.tux.org/lkml/

Re: [PATCH v2 17/31] arm64: System calls handling

2012-08-21 Thread Arnd Bergmann

On Tuesday 21 August 2012, Catalin Marinas wrote:
> > > +asmlinkage long sys_mmap(unsigned long addr, unsigned long len,
> > > +unsigned long prot, unsigned long flags,
> > > +unsigned long fd, off_t off)
> > > +{
> > > +   if (offset_in_page(off) != 0)
> > > +   return -EINVAL;
> > > +
> > > +   return sys_mmap_pgoff(addr, len, prot, flags, fd, off >> PAGE_SHIFT);
> > > +}
> > 
> > I think
> > 
> > #define sys_mmap sys_mmap_pgoff 
> 
> There are slightly different semantics with the last argument of
> sys_mmap() which takes a byte offset. The sys_mmap_pgoff() function
> takes the offset shifted by PAGE_SHIFT (which is the same as sys_mmap2).
> 
> Looking at the other architectures, it makes sense to use a generic
> sys_mmap() implementation similar to the one above (or the ia-64, seems
> to be the most complete).
> 

Why that? The generic sys_mmap_pgoff was specifically added so new architectures
could just use that instead of having their own wrappers, see f8b72560.

Arnd
--
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Please read the FAQ at  http://www.tux.org/lkml/

Re: [PATCH 04/11] x86/microcode_core_early.c: Define interfaces for early load ucode

2012-08-21 Thread H. Peter Anvin

On 08/21/2012 01:05 PM, Yu, Fenghua wrote:
> 
> We might name the cpio directory as:
> 
> kernel/x86/microcode/GenuineIntel.bin
> kernel/x86/microcode/AuthenticAMD.bin
> kernel/x86/acpi/...
> etc.
> 
> This is expendable for the future usage.
>  
> Plus I will add a doc on the cpio directory, supported directory names and 
> how to add new stuffs in the directory.
> 

I believe that was exactly my original proposal.  I think it makes
sense... most things aren't going to be inherently CPU-specific in this way.

I don't know what Borislav was suggesting with "BIOS overrides", is that
another CPU-specific thing?

-hpa

--
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Please read the FAQ at  http://www.tux.org/lkml/

Re: [PATCH v2 16/31] arm64: ELF definitions

2012-08-21 Thread Arnd Bergmann

On Tuesday 21 August 2012, Catalin Marinas wrote:
> On Thu, Aug 16, 2012 at 01:37:53PM +0100, Arnd Bergmann wrote:
> > On Thursday 16 August 2012, Will Deacon wrote:
> > > > This looks wrong: PER_LINUX/PER_LINUX32 decides over the output of the
> > > > uname system call, while TIF_32BIT decides over the instruction set
> > > > when returning to user space. You definitely should not set the 
> > > > personality
> > > > to the value you pass from the elf loader. Instead, just do
> > > > 
> > > > #define SET_PERSONALITY(ex) clear_thread_flag(TIF_32BIT);
> > > > #defined COMPAT_SET_PERSONALITY(ex) set_thread_flag(TIF_32BIT);
> > > 
> > > In this case, won't uname be incorrect (aarch64l) for aarch32 tasks (which
> > > expect something like armv8l)?
> > 
> > No, the uname output is meant to tell you about the system, not the
> > instruction set that you are using (you already know that in compiled
> > code).
> 
> OK, so we assumed that compat tasks should get a uname as close as
> possible to a 32-bit system, i.e. armv8l, for full compatibility. This
> would allow us to run something like 32-bit Debian on an AArch64 kernel
> without worrying about any scripts failing.

You can still do that, just boot with init="/sbin/setarch armv7 /sbin/init".

> But I can see on x86 that it always reports x86_64 even if the task is
> x86_32.

Not just x86, the same behavior is used on powerpc, s390, mips, sparc and
parisc. Not sure about tile though.

Arnd
--
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Please read the FAQ at  http://www.tux.org/lkml/

[tip:x86/apic] x86: dt: Use linear irq domain for ioapic(s)

2012-08-21 Thread tip-bot for Sebastian Andrzej Siewior

Commit-ID:  ece3234a77ebcd5bbeea6b829c9798328d290cae
Gitweb: http://git.kernel.org/tip/ece3234a77ebcd5bbeea6b829c9798328d290cae
Author: Sebastian Andrzej Siewior 
AuthorDate: Mon, 13 Aug 2012 22:23:33 +0200
Committer:  Thomas Gleixner 
CommitDate: Tue, 21 Aug 2012 22:16:57 +0200

x86: dt: Use linear irq domain for ioapic(s)

The former conversion to irq_domain_add_legacy() did not fully work
since we miss the irq decs for NR_IRQS_LEGACY+.

Ideally we could use irq_domain_add_simple() or the no-map variant (and
program the virq <-> line mapping directly into ioapic) but this would
require a different irq lookup in "do_IRQ()" and won't work with ACPI
without changes. So this is probably easiest for everyone.

Tested-by: Thierry Reding 
Signed-off-by: Sebastian Andrzej Siewior 
Cc: Grant Likely 
Link: http://lkml.kernel.org/r/20120813202304.ga3...@breakpoint.cc
Signed-off-by: Thomas Gleixner 
---
 arch/x86/kernel/devicetree.c |   51 ++---
 1 files changed, 42 insertions(+), 9 deletions(-)

diff --git a/arch/x86/kernel/devicetree.c b/arch/x86/kernel/devicetree.c
index 3ae2ced..b158152 100644
--- a/arch/x86/kernel/devicetree.c
+++ b/arch/x86/kernel/devicetree.c
@@ -342,6 +342,47 @@ const struct irq_domain_ops ioapic_irq_domain_ops = {
.xlate = ioapic_xlate,
 };
 
+static void dt_add_ioapic_domain(unsigned int ioapic_num,
+   struct device_node *np)
+{
+   struct irq_domain *id;
+   struct mp_ioapic_gsi *gsi_cfg;
+   int ret;
+   int num;
+
+   gsi_cfg = mp_ioapic_gsi_routing(ioapic_num);
+   num = gsi_cfg->gsi_end - gsi_cfg->gsi_base + 1;
+
+   id = irq_domain_add_linear(np, num, &ioapic_irq_domain_ops,
+   (void *)ioapic_num);
+   BUG_ON(!id);
+   if (gsi_cfg->gsi_base == 0) {
+   /*
+* The first NR_IRQS_LEGACY irq descs are allocated in
+* early_irq_init() and need just a mapping. The
+* remaining irqs need both. All of them are preallocated
+* and assigned so we can keep the 1:1 mapping which the ioapic
+* is having.
+*/
+   ret = irq_domain_associate_many(id, 0, 0, NR_IRQS_LEGACY);
+   if (ret)
+   pr_err("Error mapping legacy IRQs: %d\n", ret);
+
+   if (num > NR_IRQS_LEGACY) {
+   ret = irq_create_strict_mappings(id, NR_IRQS_LEGACY,
+   NR_IRQS_LEGACY, num - NR_IRQS_LEGACY);
+   if (ret)
+   pr_err("Error creating mapping for the "
+   "remaining IRQs: %d\n", ret);
+   }
+   irq_set_default_host(id);
+   } else {
+   ret = irq_create_strict_mappings(id, gsi_cfg->gsi_base, 0, num);
+   if (ret)
+   pr_err("Error creating IRQ mapping: %d\n", ret);
+   }
+}
+
 static void __init ioapic_add_ofnode(struct device_node *np)
 {
struct resource r;
@@ -356,15 +397,7 @@ static void __init ioapic_add_ofnode(struct device_node 
*np)
 
for (i = 0; i < nr_ioapics; i++) {
if (r.start == mpc_ioapic_addr(i)) {
-   struct irq_domain *id;
-   struct mp_ioapic_gsi *gsi_cfg;
-
-   gsi_cfg = mp_ioapic_gsi_routing(i);
-
-   id = irq_domain_add_legacy(np, 32, gsi_cfg->gsi_base, 0,
-  &ioapic_irq_domain_ops,
-  (void*)i);
-   BUG_ON(!id);
+   dt_add_ioapic_domain(i, np);
return;
}
}
--
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Please read the FAQ at  http://www.tux.org/lkml/

Re: [PATCHSET] workqueue: use irqsafe timer in delayed_work

2012-08-21 Thread Tejun Heo

On Wed, Aug 08, 2012 at 02:37:55PM -0700, Tejun Heo wrote:
> This patchset makes delayed_work use the irqsafe timer added by the
> pending "timer: clean up initializers and implement irqsafe timers"
> patchset[1].  This enables try_to_grab_pending() to be used from any
> context which in turn makes mod_delayed_work() usable from IRQ
> handlers.  cancel_delayed_work() is reimplemented using
> try_to_grab_pending() so that it also can be used from IRQ handlers
> and its behavior is consitent with other canceling operations.
> __cancel_delayed_work() is no longer necessary and deprecated.

Applied to wq/for-3.7 after pulling in tip/timers/core.

Thanks.

-- 
tejun
--
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Please read the FAQ at  http://www.tux.org/lkml/

[RFC][PATCH v4 0/3] make efivars/efi_pstore interrupt-safe

2012-08-21 Thread Seiji Aguchi

Changelog
v3 -> v4
  - Patch 2/3
Move cancel_work_sync() above an efi_enabled test in efivars_exit().

v2 -> v3
  - Patch 1/3
Replace spin_lock_irqsave/spin_unlock_irqrestore with 
spin_lock_irq/spin_unlock_irq in efivars_unregister(),
efivar_create(), efivar_store_raw() and efivar_delete() which are called in 
a process context. 

 - Patch 2/3
Change a name of delete_sysfs_entry() to delete_all_stale_sysfs_entries().
Also, don't release an efivar->lock while searching efivar->list in 
delete_all_stale_sysfs_entries().

 - Patch 3/3
Remove a logic in efi_pstore_erase() which freshly created in patch v2.

v1 -> v2
 - Patch 1/3
Add spin_lock_irq/spin_unlock_irq to open/close callbacks of efi_pstore 
instead of moving spin_locks to a read callback.

 - Patch 2/3
Replace a periodical timer with schedule_work().

 - Patch 3/3
freshly create to kick a workqueue in oops case only.

[Problem]
 There are following problems related to an interrupt context in 
efivar/efi_pstore.

 Currently, efivars enables interrupt while taking efivars->lock.
 So, there is a risk to be deadlocking in a write callback of efi_pstore if 
kernel panics 
 in interrupt context while taking efi_lock.

 Also, efi_pstore creates sysfs entries ,which enable users to access to NVRAM, 
in a write callback.
 If a kernel panic happens in interrupt contexts, pstore may fail because it 
could sleep due to dynamic 
 memory allocations during creating sysfs entries.

To resolve the problems above, a goal of this patchset is making 
efivars/efi_pstore interrupt-safe.

[Patch Description]
 Patch 1/3 efivars: Disable external interrupt while holding efivars->lock
   This patch replaces spin_lock/spin_unlock with 
spin_lock_irqsave/spin_lock_irqrestore to make efivars interrupt safe

 Patch 2/3 efi_pstore: Introducing workqueue updating sysfs entries
   This patch removes sysfs operations from write callback by introducing a 
workqueue updating sysfs entries

 Patch 3/3 efi_pstore: Skiping scheduling a workqueue in cases other than oops
   This patch restricts a schedule of a workqueue in case where users erase 
entries or oops happen which is truly needed for users.

 drivers/firmware/efivars.c |  167 +++
 include/linux/efi.h|3 +-
 2 files changed, 138 insertions(+), 32 deletions(-)




--
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Please read the FAQ at  http://www.tux.org/lkml/

[RFC][PATCH v4 1/3] efivars: Disable external interrupt while holding efivars->lock

2012-08-21 Thread Seiji Aguchi

[Problem]
 Currently, efivars doesn't disable interrupt while taking efivars->lock.
 So, there is a risk to be deadlocking in a write callback of efi_pstore
 if kernel panics in interrupt context while taking efivars->lock.

[Patch Description]
 This patch disables an external interruption while holding efivars->lock as 
follows

 - In efi_pstore_open()/efi_pstore_close(), replace spin_lock/spin_unlock with 
spin_lock_irq/spin_unlock_irq
   because they are called in a process context when users access to 
/dev/pstore.

 - In unregister_efivars(), replace them with spin_lock_irq/spin_unlock_irq
   because they are called in a process context when unloading this module.

 - Also, in efivar_create()/efivar_store_raw()/efivar_delete(), replace them 
with spin_lock_irq/spin_unlock_irq
   because they are called in a process context when users access to 
/sys/firmware/efi/vars/{new_var|del_var}.

 - In other function calls, replace spin_lock/spin_unlock with 
spin_lock_irqsave/spin_unlock_irqrestore.

Signed-off-by: Seiji Aguchi 
---
 drivers/firmware/efivars.c |   43 +++
 1 files changed, 23 insertions(+), 20 deletions(-)

diff --git a/drivers/firmware/efivars.c b/drivers/firmware/efivars.c
index 47408e8..bd1df01 100644
--- a/drivers/firmware/efivars.c
+++ b/drivers/firmware/efivars.c
@@ -393,10 +393,11 @@ static efi_status_t
 get_var_data(struct efivars *efivars, struct efi_variable *var)
 {
efi_status_t status;
+   unsigned long flags;
 
-   spin_lock(&efivars->lock);
+   spin_lock_irqsave(&efivars->lock, flags);
status = get_var_data_locked(efivars, var);
-   spin_unlock(&efivars->lock);
+   spin_unlock_irqrestore(&efivars->lock, flags);
 
if (status != EFI_SUCCESS) {
printk(KERN_WARNING "efivars: get_variable() failed 0x%lx!\n",
@@ -514,14 +515,14 @@ efivar_store_raw(struct efivar_entry *entry, const char 
*buf, size_t count)
return -EINVAL;
}
 
-   spin_lock(&efivars->lock);
+   spin_lock_irq(&efivars->lock);
status = efivars->ops->set_variable(new_var->VariableName,
&new_var->VendorGuid,
new_var->Attributes,
new_var->DataSize,
new_var->Data);
 
-   spin_unlock(&efivars->lock);
+   spin_unlock_irq(&efivars->lock);
 
if (status != EFI_SUCCESS) {
printk(KERN_WARNING "efivars: set_variable() failed: 
status=%lx\n",
@@ -632,7 +633,7 @@ static int efi_pstore_open(struct pstore_info *psi)
 {
struct efivars *efivars = psi->data;
 
-   spin_lock(&efivars->lock);
+   spin_lock_irq(&efivars->lock);
efivars->walk_entry = list_first_entry(&efivars->list,
   struct efivar_entry, list);
return 0;
@@ -642,7 +643,7 @@ static int efi_pstore_close(struct pstore_info *psi)
 {
struct efivars *efivars = psi->data;
 
-   spin_unlock(&efivars->lock);
+   spin_unlock_irq(&efivars->lock);
return 0;
 }
 
@@ -696,11 +697,12 @@ static int efi_pstore_write(enum pstore_type_id type,
struct efivars *efivars = psi->data;
struct efivar_entry *entry, *found = NULL;
int i, ret = 0;
+   unsigned long flags;
 
sprintf(stub_name, "dump-type%u-%u-", type, part);
sprintf(name, "%s%lu", stub_name, get_seconds());
 
-   spin_lock(&efivars->lock);
+   spin_lock_irqsave(&efivars->lock, flags);
 
for (i = 0; i < DUMP_NAME_LEN; i++)
efi_name[i] = stub_name[i];
@@ -738,7 +740,7 @@ static int efi_pstore_write(enum pstore_type_id type,
efivars->ops->set_variable(efi_name, &vendor, PSTORE_EFI_ATTRIBUTES,
   size, psi->buf);
 
-   spin_unlock(&efivars->lock);
+   spin_unlock_irqrestore(&efivars->lock, flags);
 
if (found)
efivar_unregister(found);
@@ -822,7 +824,7 @@ static ssize_t efivar_create(struct file *filp, struct 
kobject *kobj,
return -EINVAL;
}
 
-   spin_lock(&efivars->lock);
+   spin_lock_irq(&efivars->lock);
 
/*
 * Does this variable already exist?
@@ -840,7 +842,7 @@ static ssize_t efivar_create(struct file *filp, struct 
kobject *kobj,
}
}
if (found) {
-   spin_unlock(&efivars->lock);
+   spin_unlock_irq(&efivars->lock);
return -EINVAL;
}
 
@@ -854,10 +856,10 @@ static ssize_t efivar_create(struct file *filp, struct 
kobject *kobj,
if (status != EFI_SUCCESS) {
printk(KERN_WARNING "efivars: set_variable() failed: 
status=%lx\n",
status);
-   spin_unlock(&efivars->lock);
+   spin_unlock_irq(&efivars->lock);
return -EIO;
}
-   spin_un

[PATCH v2 5/5] X86/XEN: Add few lines explaining simple semantic for x86_init.paging.pagetable_init setup function

2012-08-21 Thread Attilio Rao

- Explain the purpose of the hook
- Report execution constraints

Signed-off-by: Attilio Rao 
---
 arch/x86/include/asm/x86_init.h |5 +
 1 files changed, 5 insertions(+), 0 deletions(-)

diff --git a/arch/x86/include/asm/x86_init.h b/arch/x86/include/asm/x86_init.h
index 995ea5c..7ea4186 100644
--- a/arch/x86/include/asm/x86_init.h
+++ b/arch/x86/include/asm/x86_init.h
@@ -82,6 +82,11 @@ struct x86_init_mapping {
 /**
  * struct x86_init_paging - platform specific paging functions
  * @pagetable_init:platform specific paging initialization call
+ *
+ * It does setup the kernel pagetables and prepares accessors functions to
+ * manipulate them.
+ * It must be called once, during the boot sequence and after the direct
+ * mapping for phys memory is setup.
  */
 struct x86_init_paging {
void (*pagetable_init)(void);
-- 
1.7.2.5

--
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Please read the FAQ at  http://www.tux.org/lkml/

[PATCH v2 0/5] X86/XEN: Merge x86_init.paging.pagetable_setup_start and x86_init.paging.pagetable_setup_done setup functions and document its semantic

2012-08-21 Thread Attilio Rao

Currently the definition of x86_init.paging.pagetable_setup_start and
x86_init.paging.pagetable_setup_done is twisted and not really well
defined (in terms of prototypes desired). More specifically:
pagetable_setup_start:
 * cleans up the boot time page table in the x86_32 case
 * it is a nop for the XEN case
 * it is a nop on x86_64

pagetable_setup_done:
 * it is a nop on x86_32
 * sets up accessor functions for pagetable manipulation, for the
   XEN case
 * it is a nop on x86_64

Most of this logic can be skipped by creating a new setup function that can
handle pagetable setup and pre/post operations on it. This means the above
mentioned functions will be removed and only one will be used for the whole
operation.
The new function must be called only once, during boot-time setup and
after the direct mapping for physical memory is available.

Differences with v1:
- The patch serie is re-arranged in a way that it helps reviews, following
  a plan by Thomas Gleixner
- The PVOPS nomenclature is not used as it is not correct
- The front-end message is adjusted with feedback by Thomas Gleixner,
  Stefano Stabellini and Konrad Rzeszutek Wilk 


Attilio Rao (5):
  X86/XEN: Remove the base argument from
x86_init.paging.pagetable_setup_start
  X86/XEN: Rename pagetable_setup_start() setup functions into
pagetable_init()
  X86/XEN: Allow setup function x86_init.paging.pagetable_init to setup
kernel pagetables
  X86/XEN: Move content of xen_pagetable_setup_done() into
xen_pagetable_init() and retire now unused
x86_init.paging.pagetable_setup_done
  X86/XEN: Add few lines explaining simple semantic for
x86_init.paging.pagetable_init setup function

 arch/x86/include/asm/pgtable_types.h |6 ++
 arch/x86/include/asm/x86_init.h  |   11 +++
 arch/x86/kernel/setup.c  |4 +---
 arch/x86/kernel/x86_init.c   |4 +---
 arch/x86/mm/init_32.c|   11 ---
 arch/x86/xen/mmu.c   |   18 +++---
 6 files changed, 22 insertions(+), 32 deletions(-)

-- 
1.7.2.5

--
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Please read the FAQ at  http://www.tux.org/lkml/

[RFC][PATCH v4 2/3] efi_pstore: Introducing workqueue updating sysfs entries

2012-08-21 Thread Seiji Aguchi

[Problem]
  efi_pstore creates sysfs entries ,which enable users to access to NVRAM,
  in a write callback. If a kernel panic happens in interrupt contexts, pstore 
may
  fail because it could sleep due to dynamic memory allocations during creating 
sysfs entries.

[Patch Description]
 This patch removes sysfs operations from a write callback by introducing a 
workqueue
 updating sysfs entries which is scheduled after the write callback is called.
 efi_pstore will be robust against a kernel panic in an interrupt context with 
it.

Signed-off-by: Seiji Aguchi 
---
 drivers/firmware/efivars.c |  119 +++
 include/linux/efi.h|3 +-
 2 files changed, 110 insertions(+), 12 deletions(-)

diff --git a/drivers/firmware/efivars.c b/drivers/firmware/efivars.c
index bd1df01..cd16ea6 100644
--- a/drivers/firmware/efivars.c
+++ b/drivers/firmware/efivars.c
@@ -146,6 +146,13 @@ efivar_create_sysfs_entry(struct efivars *efivars,
  efi_char16_t *variable_name,
  efi_guid_t *vendor_guid);
 
+/*
+ * Prototype for workqueue functions updating sysfs entry
+ */
+
+static void efivar_update_sysfs_entry(struct work_struct *);
+static DECLARE_WORK(efivar_work, efivar_update_sysfs_entry);
+
 /* Return the number of unicode characters in data */
 static unsigned long
 utf16_strnlen(efi_char16_t *s, size_t maxlength)
@@ -731,9 +738,6 @@ static int efi_pstore_write(enum pstore_type_id type,
   0, NULL);
}
 
-   if (found)
-   list_del(&found->list);
-
for (i = 0; i < DUMP_NAME_LEN; i++)
efi_name[i] = name[i];
 
@@ -742,14 +746,7 @@ static int efi_pstore_write(enum pstore_type_id type,
 
spin_unlock_irqrestore(&efivars->lock, flags);
 
-   if (found)
-   efivar_unregister(found);
-
-   if (size)
-   ret = efivar_create_sysfs_entry(efivars,
- utf16_strsize(efi_name,
-   DUMP_NAME_LEN * 2),
- efi_name, &vendor);
+   schedule_work(&efivar_work);
 
*id = part;
return ret;
@@ -1200,6 +1197,104 @@ EXPORT_SYMBOL_GPL(register_efivars);
 static struct efivars __efivars;
 static struct efivar_operations ops;
 
+static void delete_all_stale_sysfs_entries(void)
+{
+   struct efivars *efivars = &__efivars;
+   struct efivar_entry *entry, *n, *found;
+   efi_status_t status;
+   unsigned long flags;
+
+   while (1) {
+   found = NULL;
+   spin_lock_irqsave(&efivars->lock, flags);
+   list_for_each_entry_safe(entry, n, &efivars->list, list) {
+   status = get_var_data_locked(efivars, &entry->var);
+   if (status != EFI_SUCCESS) {
+   found = entry;
+   list_del(&entry->list);
+   break;
+   }
+   }
+   spin_unlock_irqrestore(&efivars->lock, flags);
+   if (found)
+   efivar_unregister(entry);
+   else
+   break;
+   }
+}
+
+static bool variable_is_present(efi_char16_t *variable_name, efi_guid_t 
*vendor)
+{
+   struct efivar_entry *entry, *n;
+   struct efivars *efivars = &__efivars;
+   unsigned long strsize1, strsize2;
+   bool found = false;
+
+   strsize1 = utf16_strsize(variable_name, 1024);
+   list_for_each_entry_safe(entry, n, &efivars->list, list) {
+   strsize2 = utf16_strsize(entry->var.VariableName, 1024);
+   if (strsize1 == strsize2 &&
+   !memcmp(variable_name, &(entry->var.VariableName),
+   strsize2) &&
+   !efi_guidcmp(entry->var.VendorGuid,
+   *vendor)) {
+   found = true;
+   break;
+   }
+   }
+   return found;
+}
+
+static void efivar_update_sysfs_entry(struct work_struct *work)
+{
+   struct efivars *efivars = &__efivars;
+   efi_guid_t vendor;
+   efi_char16_t *variable_name;
+   unsigned long variable_name_size = 1024, flags;
+   efi_status_t status = EFI_NOT_FOUND;
+   bool found;
+
+   /* Delete stale sysfs entries */
+   delete_all_stale_sysfs_entries();
+
+   /* Add new sysfs entries */
+   while (1) {
+   variable_name = kzalloc(variable_name_size, GFP_KERNEL);
+   if (!variable_name) {
+   pr_err("efivars: Memory allocation failed.\n");
+   return;
+   }
+
+   spin_lock_irqsave(&efivars->lock, flags);
+   found = false;
+   while (1) {
+   variable_name_size = 1024;
+   st

[PATCH] PM / Freezer: Fix small typo "regrigerator"

2012-08-21 Thread Sedat Dilek

Noticed when digging into a suspend issue in linux-next (next-20120821).

For more details see <http://marc.info/?t=13455470802&r=1&w=2>.

Signed-off-by: Sedat Dilek 
---
 kernel/power/process.c |2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/kernel/power/process.c b/kernel/power/process.c
index 19db29f..87da817 100644
--- a/kernel/power/process.c
+++ b/kernel/power/process.c
@@ -79,7 +79,7 @@ static int try_to_freeze_tasks(bool user_only)
 
/*
 * We need to retry, but first give the freezing tasks some
-* time to enter the regrigerator.
+* time to enter the refrigerator.
 */
msleep(10);
}
-- 
1.7.9.5

--
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Please read the FAQ at  http://www.tux.org/lkml/

Re: [PATCH v9 2/2] kvm: On Ack, De-assert & Notify KVM_IRQFD extension

2012-08-21 Thread Michael S. Tsirkin

On Tue, Aug 21, 2012 at 01:29:14PM -0600, Alex Williamson wrote:
> For VFIO based device assignment we'd like a mechanism to allow level
> triggered interrutps to be directly injected into KVM.  KVM_IRQFD
> already allows this for edge triggered interrupts, but for level, we
> need to watch for acknowledgement of the interrupt from the guest to
> provide us a hint when to test the device and allow it to re-assert
> if necessary.  To do this, we create a new KVM_IRQFD mode called
> "On Ack, De-assert & Notify", or OADN.  In this mode, an interrupt
> injection provides only a gsi assertion.  We then hook into the IRQ
> ACK notifier, which when triggered de-asserts the gsi and notifies
> via another eventfd.  It's then the responsibility of the user to
> re-assert the interrupt is service is still required.
> 
> Signed-off-by: Alex Williamson 

Naming aside, looks good.
I think I see some minor bugs, and I added some improvement
suggestions below.

Thanks!

> ---
> 
>  Documentation/virtual/kvm/api.txt |   13 ++
>  arch/x86/kvm/x86.c|1 
>  include/linux/kvm.h   |6 +
>  include/linux/kvm_host.h  |1 
>  virt/kvm/eventfd.c|  193 
> -
>  5 files changed, 210 insertions(+), 4 deletions(-)
> 
> diff --git a/Documentation/virtual/kvm/api.txt 
> b/Documentation/virtual/kvm/api.txt
> index bf33aaa..87d7321 100644
> --- a/Documentation/virtual/kvm/api.txt
> +++ b/Documentation/virtual/kvm/api.txt
> @@ -1946,6 +1946,19 @@ the guest using the specified gsi pin.  The irqfd is 
> removed using
>  the KVM_IRQFD_FLAG_DEASSIGN flag, specifying both kvm_irqfd.fd
>  and kvm_irqfd.gsi.
>  
> +With KVM_CAP_IRQFD_OADN, KVM_IRQFD supports an "On Ack, De-assert &
> +Notify" option that allows emulation of level-triggered interrupts.
> +When kvm_irqfd.fd is triggered, the requested gsi is asserted and
> +remains asserted until interaction with the irqchip indicates the
> +VM has acknowledged the interrupt, such as an EOI.  On acknoledgement
> +the gsi is automatically de-asserted and the user is notified via
> +kvm_irqfd.notifyfd.  The user is then required to re-assert the
> +interrupt if the associated device still requires service.  To enable
> +this mode, configure the KVM_IRQFD using the KVM_IRQFD_FLAG_OADN flag
> +and specify kvm_irqfd.notifyfd.  Note that closing kvm_irqfd.notifyfd
> +while configured in this mode does not disable the irqfd.  The
> +KVM_IRQFD_FLAG_OADN flag is only necessary on assignment.
> +
>  4.76 KVM_PPC_ALLOCATE_HTAB
>  
>  Capability: KVM_CAP_PPC_ALLOC_HTAB
> diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c
> index cd98673..fde7b66 100644
> --- a/arch/x86/kvm/x86.c
> +++ b/arch/x86/kvm/x86.c
> @@ -2175,6 +2175,7 @@ int kvm_dev_ioctl_check_extension(long ext)
>   case KVM_CAP_PCI_2_3:
>   case KVM_CAP_KVMCLOCK_CTRL:
>   case KVM_CAP_IRQFD_IRQ_SOURCE_ID:
> + case KVM_CAP_IRQFD_OADN:
>   r = 1;
>   break;
>   case KVM_CAP_COALESCED_MMIO:
> diff --git a/include/linux/kvm.h b/include/linux/kvm.h
> index ae66b9c..ec0f1d8 100644
> --- a/include/linux/kvm.h
> +++ b/include/linux/kvm.h
> @@ -619,6 +619,7 @@ struct kvm_ppc_smmu_info {
>  #define KVM_CAP_S390_COW 79
>  #define KVM_CAP_PPC_ALLOC_HTAB 80
>  #define KVM_CAP_IRQFD_IRQ_SOURCE_ID 81
> +#define KVM_CAP_IRQFD_OADN 82
>  
>  #ifdef KVM_CAP_IRQ_ROUTING
>  
> @@ -684,12 +685,15 @@ struct kvm_xen_hvm_config {
>  #endif
>  
>  #define KVM_IRQFD_FLAG_DEASSIGN (1 << 0)
> +/* Availabie with KVM_CAP_IRQFD_OADN */

Need to also explain what it is.

> +#define KVM_IRQFD_FLAG_OADN (1 << 1)
>  
>  struct kvm_irqfd {
>   __u32 fd;
>   __u32 gsi;
>   __u32 flags;
> - __u8  pad[20];
> + __u32 notifyfd;

Document that this is only valid with OADN flag.  Might be a good idea
to rename this to deassert_on_ack_notifyfd or oadn_notifyfd
to avoid confusion.


> + __u8  pad[16];
>  };
>  
>  struct kvm_clock_data {
> diff --git a/include/linux/kvm_host.h b/include/linux/kvm_host.h
> index b763230..d502d08 100644
> --- a/include/linux/kvm_host.h
> +++ b/include/linux/kvm_host.h
> @@ -284,6 +284,7 @@ struct kvm {
>   struct {
>   spinlock_tlock;
>   struct list_head  items;
> + struct list_head  oadns;
>   } irqfds;
>   struct list_head ioeventfds;
>  #endif
> diff --git a/virt/kvm/eventfd.c b/virt/kvm/eventfd.c
> index 2245cfa..dfdb5b2 100644
> --- a/virt/kvm/eventfd.c
> +++ b/virt/kvm/eventfd.c
> @@ -43,6 +43,23 @@
>   * 
>   */
>  
> +/*
> + * OADN irqfds (On Ack, De-assert & Notify) are a special variety of
> + * irqfds that assert an interrupt to the irqchip on eventfd trigger,
> + * receieve notification when userspace acknowledges the interrupt,
> + * automatically de-asserts the irqchip level, and notifies userspace
> + * via the oadn_eventfd.  This object helps to provide one-to-m

[RFC][PATCH v4 3/3] efi_pstore: Skiping scheduling a workqueue in cases other than oops

2012-08-21 Thread Seiji Aguchi

[Problem]
 efi_pstore creates sysfs files when logging kernel messages to NVRAM.
 Currently, the sysfs files are updated in a workqueue which is registered in a 
write callback.

 On the other hand, situations which users needs the sysfs files are when they 
erase entries or oops happen 
 because a system will  be down and users can't access to sysfs files in other 
cases like panic, reboot or emergency_restart.

 Also, if kernel panics due to a bug of workqueue operations and a write 
callback of efi_pstore is called in
 panic case, efi_pstore may fail due to a failure of schedule_work(). 
 And panic_notifier_chain()/emergency_restart() is not kicked if efi_pstore 
fails.  
   This may cause user's unwanted result.

[Patch Description]
 This patch registers a workqueue updating sysfs entries in cases where users 
erase entries or oops happen only,
 and skips it in other cases like panic, reboot or emergency_start.

Signed-off-by: Seiji Aguchi 
---
 drivers/firmware/efivars.c |7 ++-
 1 files changed, 6 insertions(+), 1 deletions(-)

diff --git a/drivers/firmware/efivars.c b/drivers/firmware/efivars.c
index cd16ea6..d5911fd 100644
--- a/drivers/firmware/efivars.c
+++ b/drivers/firmware/efivars.c
@@ -746,7 +746,12 @@ static int efi_pstore_write(enum pstore_type_id type,
 
spin_unlock_irqrestore(&efivars->lock, flags);
 
-   schedule_work(&efivar_work);
+   /*
+* The user may want to update sysfs for this write
+* when they erase an entry via /dev/pstore or oops happen.
+*/
+   if (!size || reason == KMSG_DUMP_OOPS)
+   schedule_work(&efivar_work);
 
*id = part;
return ret;
-- 1.7.1
--
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Please read the FAQ at  http://www.tux.org/lkml/

[PATCH v2 3/5] X86/XEN: Allow setup function x86_init.paging.pagetable_init to setup kernel pagetables

2012-08-21 Thread Attilio Rao

Currently, x86_init.paging.pagetable_init relies on callers to setup the
kernel pagetable.  In order to unify the functionality of
x86_init.paging.pagetable_setup_start and x86_init.paging.pagetable_setup_done
allow the new setup function to perform the operation itself.

Signed-off-by: Attilio Rao 
---
 arch/x86/include/asm/pgtable_types.h |2 +-
 arch/x86/kernel/setup.c  |1 -
 arch/x86/kernel/x86_init.c   |1 -
 arch/x86/mm/init_32.c|1 +
 arch/x86/xen/mmu.c   |1 +
 5 files changed, 3 insertions(+), 3 deletions(-)

diff --git a/arch/x86/include/asm/pgtable_types.h 
b/arch/x86/include/asm/pgtable_types.h
index 0c01e07..c93cb8e 100644
--- a/arch/x86/include/asm/pgtable_types.h
+++ b/arch/x86/include/asm/pgtable_types.h
@@ -306,7 +306,7 @@ extern void native_pagetable_reserve(u64 start, u64 end);
 extern void native_pagetable_init(void);
 extern void native_pagetable_setup_done(pgd_t *base);
 #else
-#define native_pagetable_initx86_init_pgd_init_noop
+#define native_pagetable_initpaging_init
 #define native_pagetable_setup_done  x86_init_pgd_done_noop
 #endif
 
diff --git a/arch/x86/kernel/setup.c b/arch/x86/kernel/setup.c
index 61b7d98..315fd24 100644
--- a/arch/x86/kernel/setup.c
+++ b/arch/x86/kernel/setup.c
@@ -962,7 +962,6 @@ void __init setup_arch(char **cmdline_p)
 #endif
 
x86_init.paging.pagetable_init();
-   paging_init();
x86_init.paging.pagetable_setup_done(swapper_pg_dir);
 
if (boot_cpu_data.cpuid_level >= 0) {
diff --git a/arch/x86/kernel/x86_init.c b/arch/x86/kernel/x86_init.c
index 0e1e950..5f2478f 100644
--- a/arch/x86/kernel/x86_init.c
+++ b/arch/x86/kernel/x86_init.c
@@ -26,7 +26,6 @@
 
 void __cpuinit x86_init_noop(void) { }
 void __init x86_init_uint_noop(unsigned int unused) { }
-void __init x86_init_pgd_init_noop(void) { }
 void __init x86_init_pgd_done_noop(pgd_t *unused) { }
 int __init iommu_init_noop(void) { return 0; }
 void iommu_shutdown_noop(void) { }
diff --git a/arch/x86/mm/init_32.c b/arch/x86/mm/init_32.c
index 0e38e0e..e35b4b1 100644
--- a/arch/x86/mm/init_32.c
+++ b/arch/x86/mm/init_32.c
@@ -475,6 +475,7 @@ void __init native_pagetable_init(void)
pte_clear(NULL, va, pte);
}
paravirt_alloc_pmd(&init_mm, __pa(base) >> PAGE_SHIFT);
+   paging_init();
 }
 
 void __init native_pagetable_setup_done(pgd_t *base)
diff --git a/arch/x86/xen/mmu.c b/arch/x86/xen/mmu.c
index ff1af97..4f47b87 100644
--- a/arch/x86/xen/mmu.c
+++ b/arch/x86/xen/mmu.c
@@ -1176,6 +1176,7 @@ static void xen_exit_mmap(struct mm_struct *mm)
 
 static void __init xen_pagetable_init(void)
 {
+   paging_init();
 }
 
 static __init void xen_mapping_pagetable_reserve(u64 start, u64 end)
-- 
1.7.2.5

--
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Please read the FAQ at  http://www.tux.org/lkml/

[PATCH v2 4/5] X86/XEN: Move content of xen_pagetable_setup_done() into xen_pagetable_init() and retire now unused x86_init.paging.pagetable_setup_done

2012-08-21 Thread Attilio Rao

At this stage x86_init.paging.pagetable_setup_done is only used in the
XEN case. Move its content in the x86_init.paging.pagetable_init setup
function and remove the now unused x86_init.paging.pagetable_setup_done
remaining infrastructure.

Signed-off-by: Attilio Rao 
---
 arch/x86/include/asm/pgtable_types.h |2 --
 arch/x86/include/asm/x86_init.h  |2 --
 arch/x86/kernel/setup.c  |1 -
 arch/x86/kernel/x86_init.c   |2 --
 arch/x86/mm/init_32.c|4 
 arch/x86/xen/mmu.c   |   13 -
 6 files changed, 4 insertions(+), 20 deletions(-)

diff --git a/arch/x86/include/asm/pgtable_types.h 
b/arch/x86/include/asm/pgtable_types.h
index c93cb8e..db8fec6 100644
--- a/arch/x86/include/asm/pgtable_types.h
+++ b/arch/x86/include/asm/pgtable_types.h
@@ -304,10 +304,8 @@ void set_pte_vaddr(unsigned long vaddr, pte_t pte);
 extern void native_pagetable_reserve(u64 start, u64 end);
 #ifdef CONFIG_X86_32
 extern void native_pagetable_init(void);
-extern void native_pagetable_setup_done(pgd_t *base);
 #else
 #define native_pagetable_initpaging_init
-#define native_pagetable_setup_done  x86_init_pgd_done_noop
 #endif
 
 struct seq_file;
diff --git a/arch/x86/include/asm/x86_init.h b/arch/x86/include/asm/x86_init.h
index 24084b2..995ea5c 100644
--- a/arch/x86/include/asm/x86_init.h
+++ b/arch/x86/include/asm/x86_init.h
@@ -82,11 +82,9 @@ struct x86_init_mapping {
 /**
  * struct x86_init_paging - platform specific paging functions
  * @pagetable_init:platform specific paging initialization call
- * @pagetable_setup_done:  platform specific post paging_init() call
  */
 struct x86_init_paging {
void (*pagetable_init)(void);
-   void (*pagetable_setup_done)(pgd_t *base);
 };
 
 /**
diff --git a/arch/x86/kernel/setup.c b/arch/x86/kernel/setup.c
index 315fd24..4f16547 100644
--- a/arch/x86/kernel/setup.c
+++ b/arch/x86/kernel/setup.c
@@ -962,7 +962,6 @@ void __init setup_arch(char **cmdline_p)
 #endif
 
x86_init.paging.pagetable_init();
-   x86_init.paging.pagetable_setup_done(swapper_pg_dir);
 
if (boot_cpu_data.cpuid_level >= 0) {
/* A CPU has %cr4 if and only if it has CPUID */
diff --git a/arch/x86/kernel/x86_init.c b/arch/x86/kernel/x86_init.c
index 5f2478f..7a3d075 100644
--- a/arch/x86/kernel/x86_init.c
+++ b/arch/x86/kernel/x86_init.c
@@ -26,7 +26,6 @@
 
 void __cpuinit x86_init_noop(void) { }
 void __init x86_init_uint_noop(unsigned int unused) { }
-void __init x86_init_pgd_done_noop(pgd_t *unused) { }
 int __init iommu_init_noop(void) { return 0; }
 void iommu_shutdown_noop(void) { }
 
@@ -69,7 +68,6 @@ struct x86_init_ops x86_init __initdata = {
 
.paging = {
.pagetable_init = native_pagetable_init,
-   .pagetable_setup_done   = native_pagetable_setup_done,
},
 
.timers = {
diff --git a/arch/x86/mm/init_32.c b/arch/x86/mm/init_32.c
index e35b4b1..4f04db1 100644
--- a/arch/x86/mm/init_32.c
+++ b/arch/x86/mm/init_32.c
@@ -478,10 +478,6 @@ void __init native_pagetable_init(void)
paging_init();
 }
 
-void __init native_pagetable_setup_done(pgd_t *base)
-{
-}
-
 /*
  * Build a proper pagetable for the kernel mappings.  Up until this
  * point, we've been running on some set of pagetables constructed by
diff --git a/arch/x86/xen/mmu.c b/arch/x86/xen/mmu.c
index 4f47b87..4290d83 100644
--- a/arch/x86/xen/mmu.c
+++ b/arch/x86/xen/mmu.c
@@ -1174,9 +1174,13 @@ static void xen_exit_mmap(struct mm_struct *mm)
spin_unlock(&mm->page_table_lock);
 }
 
+static void xen_post_allocator_init(void);
+
 static void __init xen_pagetable_init(void)
 {
paging_init();
+   xen_setup_shared_info();
+   xen_post_allocator_init();
 }
 
 static __init void xen_mapping_pagetable_reserve(u64 start, u64 end)
@@ -1193,14 +1197,6 @@ static __init void xen_mapping_pagetable_reserve(u64 
start, u64 end)
}
 }
 
-static void xen_post_allocator_init(void);
-
-static void __init xen_pagetable_setup_done(pgd_t *base)
-{
-   xen_setup_shared_info();
-   xen_post_allocator_init();
-}
-
 static void xen_write_cr2(unsigned long cr2)
 {
this_cpu_read(xen_vcpu)->arch.cr2 = cr2;
@@ -2070,7 +2066,6 @@ void __init xen_init_mmu_ops(void)
 {
x86_init.mapping.pagetable_reserve = xen_mapping_pagetable_reserve;
x86_init.paging.pagetable_init = xen_pagetable_init;
-   x86_init.paging.pagetable_setup_done = xen_pagetable_setup_done;
pv_mmu_ops = xen_mmu_ops;
 
memset(dummy_mapping, 0xff, PAGE_SIZE);
-- 
1.7.2.5

--
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Please read the FAQ at  http://www.tux.org/lkml/

Re: [2/2] spi: Add SPI master controller for OCTEON SOCs.

2012-08-21 Thread David Daney


On 08/21/2012 12:49 PM, Guenter Roeck wrote:

On Fri, May 11, 2012 at 08:34:46PM -, David Daney wrote:

From: David Daney 

Add the driver, link it into the kbuild system and provide device tree
binding documentation.

Signed-off-by: David Daney 
Acked-by: Grant Likely 


[ ... ]


+
+static int __devexit octeon_spi_remove(struct platform_device *pdev)
+{
+   struct octeon_spi *p = platform_get_drvdata(pdev);
+   struct spi_master *master = p->my_master;
+
+   spi_unregister_master(master);
+


I know it is a bit late, but ...


In this case, just in time.  I am now finally getting back to fixing the 
issues with this driver, and looking to merging it in the near future.


David Daney



The call to spi_unregister_master() frees the memory associated with master,
ie 'p', and the spi_master_put() below without matching spi_master_get() is
unnecessary/wrong. One possible fix would be to use

struct spi_master *master = spi_master_get(p->my_master);

above. That protects master and p while it is still being used, and makes use
of the call to spi_master_put() below. Another option might be to move
cvmx_write_csr() ahead of the call to spi_unregister_master() and drop the
call to spi_master_put().

Guenter


+   /* Clear the CSENA* and put everything in a known state. */
+   cvmx_write_csr(p->register_base + OCTEON_SPI_CFG, 0);
+   spi_master_put(master);
+   return 0;
+}
+





--
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Please read the FAQ at  http://www.tux.org/lkml/

[PATCH v2 2/5] X86/XEN: Rename pagetable_setup_start() setup functions into pagetable_init()

2012-08-21 Thread Attilio Rao

In preparation for unifying the pagetable_setup_start() and
pagetable_setup_done() setup functions, rename appropriately all the
infrastructure related to pagetable_setup_start().

Signed-off-by: Attilio Rao 
---
 arch/x86/include/asm/pgtable_types.h |4 ++--
 arch/x86/include/asm/x86_init.h  |4 ++--
 arch/x86/kernel/setup.c  |2 +-
 arch/x86/kernel/x86_init.c   |4 ++--
 arch/x86/mm/init_32.c|4 ++--
 arch/x86/xen/mmu.c   |4 ++--
 6 files changed, 11 insertions(+), 11 deletions(-)

diff --git a/arch/x86/include/asm/pgtable_types.h 
b/arch/x86/include/asm/pgtable_types.h
index e02b875..0c01e07 100644
--- a/arch/x86/include/asm/pgtable_types.h
+++ b/arch/x86/include/asm/pgtable_types.h
@@ -303,10 +303,10 @@ void set_pte_vaddr(unsigned long vaddr, pte_t pte);
 
 extern void native_pagetable_reserve(u64 start, u64 end);
 #ifdef CONFIG_X86_32
-extern void native_pagetable_setup_start(void);
+extern void native_pagetable_init(void);
 extern void native_pagetable_setup_done(pgd_t *base);
 #else
-#define native_pagetable_setup_start x86_init_pgd_start_noop
+#define native_pagetable_initx86_init_pgd_init_noop
 #define native_pagetable_setup_done  x86_init_pgd_done_noop
 #endif
 
diff --git a/arch/x86/include/asm/x86_init.h b/arch/x86/include/asm/x86_init.h
index 782ba0c..24084b2 100644
--- a/arch/x86/include/asm/x86_init.h
+++ b/arch/x86/include/asm/x86_init.h
@@ -81,11 +81,11 @@ struct x86_init_mapping {
 
 /**
  * struct x86_init_paging - platform specific paging functions
- * @pagetable_setup_start: platform specific pre paging_init() call
+ * @pagetable_init:platform specific paging initialization call
  * @pagetable_setup_done:  platform specific post paging_init() call
  */
 struct x86_init_paging {
-   void (*pagetable_setup_start)(void);
+   void (*pagetable_init)(void);
void (*pagetable_setup_done)(pgd_t *base);
 };
 
diff --git a/arch/x86/kernel/setup.c b/arch/x86/kernel/setup.c
index 90cbbe0..61b7d98 100644
--- a/arch/x86/kernel/setup.c
+++ b/arch/x86/kernel/setup.c
@@ -961,7 +961,7 @@ void __init setup_arch(char **cmdline_p)
kvmclock_init();
 #endif
 
-   x86_init.paging.pagetable_setup_start();
+   x86_init.paging.pagetable_init();
paging_init();
x86_init.paging.pagetable_setup_done(swapper_pg_dir);
 
diff --git a/arch/x86/kernel/x86_init.c b/arch/x86/kernel/x86_init.c
index 3b88493..0e1e950 100644
--- a/arch/x86/kernel/x86_init.c
+++ b/arch/x86/kernel/x86_init.c
@@ -26,7 +26,7 @@
 
 void __cpuinit x86_init_noop(void) { }
 void __init x86_init_uint_noop(unsigned int unused) { }
-void __init x86_init_pgd_start_noop(void) { }
+void __init x86_init_pgd_init_noop(void) { }
 void __init x86_init_pgd_done_noop(pgd_t *unused) { }
 int __init iommu_init_noop(void) { return 0; }
 void iommu_shutdown_noop(void) { }
@@ -69,7 +69,7 @@ struct x86_init_ops x86_init __initdata = {
},
 
.paging = {
-   .pagetable_setup_start  = native_pagetable_setup_start,
+   .pagetable_init = native_pagetable_init,
.pagetable_setup_done   = native_pagetable_setup_done,
},
 
diff --git a/arch/x86/mm/init_32.c b/arch/x86/mm/init_32.c
index c4aa1b2..0e38e0e 100644
--- a/arch/x86/mm/init_32.c
+++ b/arch/x86/mm/init_32.c
@@ -445,7 +445,7 @@ static inline void permanent_kmaps_init(pgd_t *pgd_base)
 }
 #endif /* CONFIG_HIGHMEM */
 
-void __init native_pagetable_setup_start(void)
+void __init native_pagetable_init(void)
 {
unsigned long pfn, va;
pgd_t *pgd, *base = swapper_pg_dir;
@@ -493,7 +493,7 @@ void __init native_pagetable_setup_done(pgd_t *base)
  * If we're booting paravirtualized under a hypervisor, then there are
  * more options: we may already be running PAE, and the pagetable may
  * or may not be based in swapper_pg_dir.  In any case,
- * paravirt_pagetable_setup_start() will set up swapper_pg_dir
+ * paravirt_pagetable_init() will set up swapper_pg_dir
  * appropriately for the rest of the initialization to work.
  *
  * In general, pagetable_init() assumes that the pagetable may already
diff --git a/arch/x86/xen/mmu.c b/arch/x86/xen/mmu.c
index d89ea5c..ff1af97 100644
--- a/arch/x86/xen/mmu.c
+++ b/arch/x86/xen/mmu.c
@@ -1174,7 +1174,7 @@ static void xen_exit_mmap(struct mm_struct *mm)
spin_unlock(&mm->page_table_lock);
 }
 
-static void __init xen_pagetable_setup_start(void)
+static void __init xen_pagetable_init(void)
 {
 }
 
@@ -2068,7 +2068,7 @@ static const struct pv_mmu_ops xen_mmu_ops __initconst = {
 void __init xen_init_mmu_ops(void)
 {
x86_init.mapping.pagetable_reserve = xen_mapping_pagetable_reserve;
-   x86_init.paging.pagetable_setup_start = xen_pagetable_setup_start;
+   x86_init.paging.pagetable_init = xen_pagetable_init;
x86_init.paging.pagetable_setup_done = xen_pagetable_setup_done;
pv_mmu_ops = xen_mmu_ops;
 
-- 
1.7.2.5

--
To

[PATCH v2 1/5] X86/XEN: Remove the base argument from x86_init.paging.pagetable_setup_start

2012-08-21 Thread Attilio Rao

x86_init.paging.pagetable_setup_start for native will however use
swapper_pg_dir in the single place where it is used and for native the
argument is simply unused. Aditionally, the comments already point to
swapper_pg_dir as the sole base touched.
Finally, this will help with further merging of
x86_init.paging.pagetable_setup_start with
x86_init.paging.pagetable_setup_done.

Signed-off-by: Attilio Rao 
---
 arch/x86/include/asm/pgtable_types.h |6 +++---
 arch/x86/include/asm/x86_init.h  |2 +-
 arch/x86/kernel/setup.c  |2 +-
 arch/x86/kernel/x86_init.c   |3 ++-
 arch/x86/mm/init_32.c|4 ++--
 arch/x86/xen/mmu.c   |2 +-
 6 files changed, 10 insertions(+), 9 deletions(-)

diff --git a/arch/x86/include/asm/pgtable_types.h 
b/arch/x86/include/asm/pgtable_types.h
index 013286a..e02b875 100644
--- a/arch/x86/include/asm/pgtable_types.h
+++ b/arch/x86/include/asm/pgtable_types.h
@@ -303,11 +303,11 @@ void set_pte_vaddr(unsigned long vaddr, pte_t pte);
 
 extern void native_pagetable_reserve(u64 start, u64 end);
 #ifdef CONFIG_X86_32
-extern void native_pagetable_setup_start(pgd_t *base);
+extern void native_pagetable_setup_start(void);
 extern void native_pagetable_setup_done(pgd_t *base);
 #else
-#define native_pagetable_setup_start x86_init_pgd_noop
-#define native_pagetable_setup_done  x86_init_pgd_noop
+#define native_pagetable_setup_start x86_init_pgd_start_noop
+#define native_pagetable_setup_done  x86_init_pgd_done_noop
 #endif
 
 struct seq_file;
diff --git a/arch/x86/include/asm/x86_init.h b/arch/x86/include/asm/x86_init.h
index 38155f6..782ba0c 100644
--- a/arch/x86/include/asm/x86_init.h
+++ b/arch/x86/include/asm/x86_init.h
@@ -85,7 +85,7 @@ struct x86_init_mapping {
  * @pagetable_setup_done:  platform specific post paging_init() call
  */
 struct x86_init_paging {
-   void (*pagetable_setup_start)(pgd_t *base);
+   void (*pagetable_setup_start)(void);
void (*pagetable_setup_done)(pgd_t *base);
 };
 
diff --git a/arch/x86/kernel/setup.c b/arch/x86/kernel/setup.c
index f4b9b80..90cbbe0 100644
--- a/arch/x86/kernel/setup.c
+++ b/arch/x86/kernel/setup.c
@@ -961,7 +961,7 @@ void __init setup_arch(char **cmdline_p)
kvmclock_init();
 #endif
 
-   x86_init.paging.pagetable_setup_start(swapper_pg_dir);
+   x86_init.paging.pagetable_setup_start();
paging_init();
x86_init.paging.pagetable_setup_done(swapper_pg_dir);
 
diff --git a/arch/x86/kernel/x86_init.c b/arch/x86/kernel/x86_init.c
index 9f3167e..3b88493 100644
--- a/arch/x86/kernel/x86_init.c
+++ b/arch/x86/kernel/x86_init.c
@@ -26,7 +26,8 @@
 
 void __cpuinit x86_init_noop(void) { }
 void __init x86_init_uint_noop(unsigned int unused) { }
-void __init x86_init_pgd_noop(pgd_t *unused) { }
+void __init x86_init_pgd_start_noop(void) { }
+void __init x86_init_pgd_done_noop(pgd_t *unused) { }
 int __init iommu_init_noop(void) { return 0; }
 void iommu_shutdown_noop(void) { }
 
diff --git a/arch/x86/mm/init_32.c b/arch/x86/mm/init_32.c
index 575d86f..c4aa1b2 100644
--- a/arch/x86/mm/init_32.c
+++ b/arch/x86/mm/init_32.c
@@ -445,10 +445,10 @@ static inline void permanent_kmaps_init(pgd_t *pgd_base)
 }
 #endif /* CONFIG_HIGHMEM */
 
-void __init native_pagetable_setup_start(pgd_t *base)
+void __init native_pagetable_setup_start(void)
 {
unsigned long pfn, va;
-   pgd_t *pgd;
+   pgd_t *pgd, *base = swapper_pg_dir;
pud_t *pud;
pmd_t *pmd;
pte_t *pte;
diff --git a/arch/x86/xen/mmu.c b/arch/x86/xen/mmu.c
index b65a761..d89ea5c 100644
--- a/arch/x86/xen/mmu.c
+++ b/arch/x86/xen/mmu.c
@@ -1174,7 +1174,7 @@ static void xen_exit_mmap(struct mm_struct *mm)
spin_unlock(&mm->page_table_lock);
 }
 
-static void __init xen_pagetable_setup_start(pgd_t *base)
+static void __init xen_pagetable_setup_start(void)
 {
 }
 
-- 
1.7.2.5

--
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Please read the FAQ at  http://www.tux.org/lkml/

Re: [PATCH 3/5] X86/XEN: Introduce the x86_init.paging.pagetable_init PVOPS

2012-08-21 Thread Attilio Rao


On 21/08/12 16:44, Thomas Gleixner wrote:

On Tue, 21 Aug 2012, Attilio Rao wrote:

   

This new PVOPS is responsible to setup the kernel pagetables and
replace entirely x86_init.paging.pagetable_setup_start and
x86_init.paging.pagetable_setup_done PVOPS work.
 


   

For performance the x86_64 stub is implemented as a macro to paging_init()
rather than an actual function stub.
 

Huch, using a macro for an once per boot time call is really a massive
performance improvement.

It's confusing and wrong. You just use a macro because x86_64 does not
need any extra setups aside of paging_init().

   

diff --git a/arch/x86/kernel/x86_init.c b/arch/x86/kernel/x86_init.c
index 849be14..c1e910a 100644
--- a/arch/x86/kernel/x86_init.c
+++ b/arch/x86/kernel/x86_init.c
@@ -68,6 +68,7 @@ struct x86_init_ops x86_init __initdata = {
},

.paging = {
+   .pagetable_init = native_pagetable_init,
 

I'd prefer to see these patches implemented differently.

  #1 Remove the base argument from pagetable_setup_start (leave
 pagetable_setup_done() alone).

  #2 Rename pagetable_setup_start to pagetable_init,
 native_pagetable_setup_start to native_pagetable_init and
 xen_pagetable_setup_start to xen_pagetable_init

  #3 Instead of copying the whole native_pagetable_setup_start()
 function and deleting it later, move the paging_init() call from
 setup.c to native_pagetable_init() and xen_pagetable_init()
 and define native_pagetable_init as paging_init() for x86_64

  #4 Move the code from xen_pagetable_setup_done() into
 xen_pagetable_init() and remove the now unused
 pagetable_setup_done().

That's less code shuffling and pointless copying which makes the
review way easier.
   


I've followed these steps in a new patch series (integrating suggestions 
from Konrad and Stefano too).


Attilio
--
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Please read the FAQ at  http://www.tux.org/lkml/

Re: [PATCH] task_work: add a scheduling point in task_work_run()

2012-08-21 Thread Mimi Zohar

On Tue, 2012-08-21 at 15:05 +0200, Eric Dumazet wrote:
> From: Eric Dumazet 
> 
> It seems commit 4a9d4b02 (switch fput to task_work_add) reintroduced
> the problem addressed in commit 944be0b2 (close_files(): add scheduling
> point)
> 
> If a server process with a lot of files (say 2 million tcp sockets)
> is killed, we can spend a lot of time in task_work_run() and trigger
> a soft lockup.
> 
> Signed-off-by: Eric Dumazet 
> ---
>  kernel/task_work.c |1 +
>  1 file changed, 1 insertion(+)
> 
> diff --git a/kernel/task_work.c b/kernel/task_work.c
> index 91d4e17..d320d44 100644
> --- a/kernel/task_work.c
> +++ b/kernel/task_work.c
> @@ -75,6 +75,7 @@ void task_work_run(void)
>   p = q->next;
>   q->func(q);
>   q = p;
> + cond_resched();
>   }
>   }
>  }

We're here, because fput() called schedule_work() to delay the last
fput().  The execution needs to take place before the syscall returns to
userspace.  Need to read __schedule()...  Do you know if cond_resched()
can guarantee that it will be executed before the return to userspace? 

thanks,

Mimi

--
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Please read the FAQ at  http://www.tux.org/lkml/

Re: [PATCH v9 1/2] kvm: Use a reserved IRQ source ID for irqfd

2012-08-21 Thread Michael S. Tsirkin

On Tue, Aug 21, 2012 at 02:06:19PM -0600, Alex Williamson wrote:
> On Tue, 2012-08-21 at 22:58 +0300, Michael S. Tsirkin wrote:
> > On Tue, Aug 21, 2012 at 01:29:06PM -0600, Alex Williamson wrote:
> > > KVM_IRQFD currently uses the reserved KVM_USERSPACE_IRQ_SOURCE_ID
> > > which is also shared with userspace injection methods like
> > > KVM_IRQ_LINE.  This can cause a conflict if an irqfd triggers on
> > > a GSI asserted through KVM_IRQ_LINE.
> > 
> > What kind of conflict do you envision?  Pls note level interrupts are
> > unsupported ATM.
> 
> If KVM_IRQ_LINE asserts a level interrupt and KVM_IRQFD triggers on the
> same GSI then the pin is no longer asserted as userspace thinks it is.
> Do we just chalk this up to userspace error?

Yes: using a level GSI with current irqfd is a userspace error
because you can lose interrupts anyway.

Are edge GSIs affected?

> > > Move irqfd to it's own reserved IRQ source ID.  Add a capability for
> > > userspace to test for this fix.
> > > 
> > > Signed-off-by: Alex Williamson 
> > > ---
> > > 
> > >  arch/x86/kvm/x86.c   |3 +++
> > >  include/linux/kvm.h  |1 +
> > >  include/linux/kvm_host.h |1 +
> > >  virt/kvm/eventfd.c   |6 +++---
> > >  4 files changed, 8 insertions(+), 3 deletions(-)
> > > 
> > > diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c
> > > index 42bce48..cd98673 100644
> > > --- a/arch/x86/kvm/x86.c
> > > +++ b/arch/x86/kvm/x86.c
> > > @@ -2174,6 +2174,7 @@ int kvm_dev_ioctl_check_extension(long ext)
> > >   case KVM_CAP_GET_TSC_KHZ:
> > >   case KVM_CAP_PCI_2_3:
> > >   case KVM_CAP_KVMCLOCK_CTRL:
> > > + case KVM_CAP_IRQFD_IRQ_SOURCE_ID:
> > >   r = 1;
> > >   break;
> > >   case KVM_CAP_COALESCED_MMIO:
> > > @@ -6258,6 +6259,8 @@ int kvm_arch_init_vm(struct kvm *kvm, unsigned long 
> > > type)
> > >  
> > >   /* Reserve bit 0 of irq_sources_bitmap for userspace irq source */
> > >   set_bit(KVM_USERSPACE_IRQ_SOURCE_ID, &kvm->arch.irq_sources_bitmap);
> > > + /* Reserve bit 1 of irq_sources_bitmap for irqfd irq source */
> > > + set_bit(KVM_IRQFD_IRQ_SOURCE_ID, &kvm->arch.irq_sources_bitmap);
> > >  
> > >   raw_spin_lock_init(&kvm->arch.tsc_write_lock);
> > >  
> > > diff --git a/include/linux/kvm.h b/include/linux/kvm.h
> > > index 2ce09aa..ae66b9c 100644
> > > --- a/include/linux/kvm.h
> > > +++ b/include/linux/kvm.h
> > > @@ -618,6 +618,7 @@ struct kvm_ppc_smmu_info {
> > >  #define KVM_CAP_PPC_GET_SMMU_INFO 78
> > >  #define KVM_CAP_S390_COW 79
> > >  #define KVM_CAP_PPC_ALLOC_HTAB 80
> > > +#define KVM_CAP_IRQFD_IRQ_SOURCE_ID 81
> > >  
> > >  #ifdef KVM_CAP_IRQ_ROUTING
> > >  
> > > diff --git a/include/linux/kvm_host.h b/include/linux/kvm_host.h
> > > index b70b48b..b763230 100644
> > > --- a/include/linux/kvm_host.h
> > > +++ b/include/linux/kvm_host.h
> > > @@ -71,6 +71,7 @@
> > >  #define KVM_REQ_PMI   17
> > >  
> > >  #define KVM_USERSPACE_IRQ_SOURCE_ID  0
> > > +#define KVM_IRQFD_IRQ_SOURCE_ID  1
> > >  
> > >  struct kvm;
> > >  struct kvm_vcpu;
> > 
> > Above looks fine but I'm not sure why is the below needed.
> > This changes irqfd behaviour for edge GSIs slightly
> > in a userspace-visible way. Maybe make it a separate patch
> > so it can be considered on merits?
> 
> Hmm, the above does nothing without the below.

Yes. But you can use the above with the new irqfds you are adding.

> I thought I was just
> implementing your idea that IRQFDs should all share a single IRQ source
> ID...

Sorry I only meant for level irqfds. You are changing edge here.

> why is that no longer a good idea?  Thanks,
> 
> Alex

Maybe it is a good idea. I am just asking for the motivation.

> > > diff --git a/virt/kvm/eventfd.c b/virt/kvm/eventfd.c
> > > index 7d7e2aa..2245cfa 100644
> > > --- a/virt/kvm/eventfd.c
> > > +++ b/virt/kvm/eventfd.c
> > > @@ -67,8 +67,8 @@ irqfd_inject(struct work_struct *work)
> > >   struct _irqfd *irqfd = container_of(work, struct _irqfd, inject);
> > >   struct kvm *kvm = irqfd->kvm;
> > >  
> > > - kvm_set_irq(kvm, KVM_USERSPACE_IRQ_SOURCE_ID, irqfd->gsi, 1);
> > > - kvm_set_irq(kvm, KVM_USERSPACE_IRQ_SOURCE_ID, irqfd->gsi, 0);
> > > + kvm_set_irq(kvm, KVM_IRQFD_IRQ_SOURCE_ID, irqfd->gsi, 1);
> > > + kvm_set_irq(kvm, KVM_IRQFD_IRQ_SOURCE_ID, irqfd->gsi, 0);
> > >  }
> > >  
> > >  /*
> > > @@ -138,7 +138,7 @@ irqfd_wakeup(wait_queue_t *wait, unsigned mode, int 
> > > sync, void *key)
> > >   irq = rcu_dereference(irqfd->irq_entry);
> > >   /* An event has been signaled, inject an interrupt */
> > >   if (irq)
> > > - kvm_set_msi(irq, kvm, KVM_USERSPACE_IRQ_SOURCE_ID, 1);
> > > + kvm_set_msi(irq, kvm, KVM_IRQFD_IRQ_SOURCE_ID, 1);
> > >   else
> > >   schedule_work(&irqfd->inject);
> > >   rcu_read_unlock();
> 
> 
--
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to majord...@vger.kernel.org
More majordomo info at

[PATCH -next] HID: picoLCD: Add missing #include

2012-08-21 Thread Geert Uytterhoeven

m68k/allmodconfig:

drivers/hid/hid-picolcd_debugfs.c: In function ‘picolcd_debug_reset_write’:
drivers/hid/hid-picolcd_debugfs.c:54: error: implicit declaration of function 
‘copy_from_user’
drivers/hid/hid-picolcd_debugfs.c: In function ‘picolcd_debug_eeprom_read’:
drivers/hid/hid-picolcd_debugfs.c:112: error: implicit declaration of function 
‘copy_to_user’

Signed-off-by: Geert Uytterhoeven 
---
http://kisskb.ellerman.id.au/kisskb/buildresult/6990818/

 drivers/hid/hid-picolcd_debugfs.c |1 +
 1 files changed, 1 insertions(+), 0 deletions(-)

diff --git a/drivers/hid/hid-picolcd_debugfs.c 
b/drivers/hid/hid-picolcd_debugfs.c
index f2491fa..15c22f2 100644
--- a/drivers/hid/hid-picolcd_debugfs.c
+++ b/drivers/hid/hid-picolcd_debugfs.c
@@ -27,6 +27,7 @@
 #include 
 
 #include 
+#include 
 
 #include "hid-picolcd.h"
 
-- 
1.7.0.4

--
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Please read the FAQ at  http://www.tux.org/lkml/

Re: [PATCH v8 1/5] mm: introduce a common interface for balloon pages mobility

2012-08-21 Thread Rafael Aquini

On Tue, Aug 21, 2012 at 10:30:31PM +0300, Michael S. Tsirkin wrote:
> On Tue, Aug 21, 2012 at 04:23:58PM -0300, Rafael Aquini wrote:
> > On Tue, Aug 21, 2012 at 10:13:30PM +0300, Michael S. Tsirkin wrote:
> > > > 
> > > > I believe rcu_dereference_protected() is what I want/need here, since 
> > > > this code
> > > > is always called for pages which we hold locked (PG_locked bit).
> > > 
> > > It would only help if we locked the page while updating the mapping,
> > > as far as I can see we don't.
> > >
> > 
> > But we can do it. In fact, by doing it (locking the page) we can easily 
> > avoid
> > the nasty race balloon_isolate_page / leak_balloon, in a much simpler way, 
> > IMHO.
> 
> Absolutely. Further, we should look hard at whether most RCU uses
> in this patchset can be replaced with page lock.
>

Yeah, In fact, by testing/grabbing the page lock at leak_balloon() even the
module unload X migration / putback race seems to fade away, since migration
code holds the page locked all the way.

And that seems a quite easy task to be accomplished:


@@ -169,21 +197,61 @@ static void leak_balloon(struct virtio_balloon *vb, size_t
num)
/* We can only do one array worth at a time. */
num = min(num, ARRAY_SIZE(vb->pfns));

+   mutex_lock(&vb->balloon_lock);
for (vb->num_pfns = 0; vb->num_pfns < num;
 vb->num_pfns += VIRTIO_BALLOON_PAGES_PER_PAGE) {
+   spin_lock(&vb->pages_lock);
+   /*
+* 'virtballoon_isolatepage()' can drain vb->pages list
+* making us to stumble across a _temporarily_ empty list.
+*
+* Release the spinlock and resume from here in order to
+* give page migration a shot to refill vb->pages list.
+*/
+   if (unlikely(list_empty(&vb->pages))) {
+   spin_unlock(&vb->pages_lock);
+   break;
+   }
+
page = list_first_entry(&vb->pages, struct page, lru);
+
+   /*
+* Grab the page lock to avoid racing against threads isolating
+* pages from vb->pages list (it's done under page lock).
+*
+* Failing to grab the page lock here means this page has been
+* selected for isolation already.
+*/
+   if (!trylock_page(page)) {
+   spin_unlock(&vb->pages_lock);
+   break;
+   }
+
+   clear_balloon_mapping(page);
list_del(&page->lru);
set_page_pfns(vb->pfns + vb->num_pfns, page);
vb->num_pages -= VIRTIO_BALLOON_PAGES_PER_PAGE;
+   unlock_page(page);
+   spin_unlock(&vb->pages_lock);
}

.
--
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Please read the FAQ at  http://www.tux.org/lkml/

[PATCHv3 7/9] arm: vt8500: doc: Add device tree bindings for arch-vt8500 devices

2012-08-21 Thread Tony Prisk

Bindings for gpio, interrupt controller, power management controller,
timer, realtime clock, serial uart, ehci and uhci controllers and
framebuffer controllers used on the arch-vt8500 platform.

Framebuffer binding also specifies a 'display' node which is required
for determining the lcd panel data.

Signed-off-by: Tony Prisk 
---
 Documentation/devicetree/bindings/arm/vt8500.txt   |   15 
 .../bindings/arm/vt8500/via,vt8500-intc.txt|   16 +
 .../bindings/arm/vt8500/via,vt8500-pmc.txt |   13 
 .../bindings/arm/vt8500/via,vt8500-timer.txt   |   15 
 Documentation/devicetree/bindings/clock/vt8500.txt |   72 
 .../devicetree/bindings/gpio/gpio_vt8500.txt   |   24 +++
 .../devicetree/bindings/rtc/via,vt8500-rtc.txt |   15 
 .../bindings/tty/serial/via,vt8500-uart.txt|   15 
 .../devicetree/bindings/usb/platform-uhci.txt  |   15 
 .../devicetree/bindings/usb/via,vt8500-ehci.txt|   15 
 .../devicetree/bindings/vendor-prefixes.txt|2 +
 .../devicetree/bindings/video/via,vt8500-fb.txt|   48 +
 .../devicetree/bindings/video/wm,prizm-ge-rops.txt |   13 
 .../devicetree/bindings/video/wm,wm8505-fb.txt |   22 ++
 14 files changed, 300 insertions(+)
 create mode 100644 Documentation/devicetree/bindings/arm/vt8500.txt
 create mode 100644 
Documentation/devicetree/bindings/arm/vt8500/via,vt8500-intc.txt
 create mode 100644 
Documentation/devicetree/bindings/arm/vt8500/via,vt8500-pmc.txt
 create mode 100644 
Documentation/devicetree/bindings/arm/vt8500/via,vt8500-timer.txt
 create mode 100644 Documentation/devicetree/bindings/clock/vt8500.txt
 create mode 100644 Documentation/devicetree/bindings/gpio/gpio_vt8500.txt
 create mode 100644 Documentation/devicetree/bindings/rtc/via,vt8500-rtc.txt
 create mode 100644 
Documentation/devicetree/bindings/tty/serial/via,vt8500-uart.txt
 create mode 100644 Documentation/devicetree/bindings/usb/platform-uhci.txt
 create mode 100644 Documentation/devicetree/bindings/usb/via,vt8500-ehci.txt
 create mode 100644 Documentation/devicetree/bindings/video/via,vt8500-fb.txt
 create mode 100644 Documentation/devicetree/bindings/video/wm,prizm-ge-rops.txt
 create mode 100644 Documentation/devicetree/bindings/video/wm,wm8505-fb.txt

diff --git a/Documentation/devicetree/bindings/arm/vt8500.txt 
b/Documentation/devicetree/bindings/arm/vt8500.txt
new file mode 100644
index 000..1b3b187
--- /dev/null
+++ b/Documentation/devicetree/bindings/arm/vt8500.txt
@@ -0,0 +1,15 @@
+VIA/Wondermedia VT8500 Platforms Device Tree Bindings
+---
+
+Boards with the VIA VT8500 SoC shall have the following properties:
+Required root node property:
+compatible = "via,vt8500";
+
+Boards with the Wondermedia WM8505 SoC shall have the following properties:
+Required root node property:
+compatible = "wm,wm8505";
+
+Boards with the Wondermedia WM8650 SoC shall have the following properties:
+Required root node property:
+compatible = "wm,wm8650";
+
diff --git a/Documentation/devicetree/bindings/arm/vt8500/via,vt8500-intc.txt 
b/Documentation/devicetree/bindings/arm/vt8500/via,vt8500-intc.txt
new file mode 100644
index 000..0a4ce10
--- /dev/null
+++ b/Documentation/devicetree/bindings/arm/vt8500/via,vt8500-intc.txt
@@ -0,0 +1,16 @@
+VIA/Wondermedia VT8500 Interrupt Controller
+-
+
+Required properties:
+- compatible : "via,vt8500-intc"
+- reg : Should contain 1 register ranges(address and length)
+- #interrupt-cells : should be <1>
+
+Example:
+
+   intc: interrupt-controller@d814 {
+   compatible = "via,vt8500-intc";
+   interrupt-controller;
+   reg = <0xd814 0x1>;
+   #interrupt-cells = <1>;
+   };
diff --git a/Documentation/devicetree/bindings/arm/vt8500/via,vt8500-pmc.txt 
b/Documentation/devicetree/bindings/arm/vt8500/via,vt8500-pmc.txt
new file mode 100644
index 000..521b9c7
--- /dev/null
+++ b/Documentation/devicetree/bindings/arm/vt8500/via,vt8500-pmc.txt
@@ -0,0 +1,13 @@
+VIA/Wondermedia VT8500 Power Management Controller
+-
+
+Required properties:
+- compatible : "via,vt8500-pmc"
+- reg : Should contain 1 register ranges(address and length)
+
+Example:
+
+   pmc@d813 {
+   compatible = "via,vt8500-pmc";
+   reg = <0xd813 0x1000>;
+   };
diff --git a/Documentation/devicetree/bindings/arm/vt8500/via,vt8500-timer.txt 
b/Documentation/devicetree/bindings/arm/vt8500/via,vt8500-timer.txt
new file mode 100644
index 000..901c73f
--- /dev/null
+++ b/Documentation/devicetree/bindings/arm/vt8500/via,vt8500-timer.txt
@@ -0,0 +1,15 @@
+VIA/Wondermedia VT8500 Timer
+-
+
+Required properties:
+- compatible : "via,vt8500-timer"
+- reg : Should contain 1 register ranges(address and

[PATCHv3 8/9] arm: vt8500: gpio: Devicetree support for arch-vt8500

2012-08-21 Thread Tony Prisk

Converted the existing arch-vt8500 gpio to a platform_device.
Added support for WM8505 and WM8650 GPIO controllers.

Signed-off-by: Tony Prisk 
---
 drivers/gpio/Kconfig   |6 +
 drivers/gpio/Makefile  |1 +
 drivers/gpio/gpio-vt8500.c |  313 
 3 files changed, 320 insertions(+)
 create mode 100644 drivers/gpio/gpio-vt8500.c

diff --git a/drivers/gpio/Kconfig b/drivers/gpio/Kconfig
index 542f0c0..3c8897a 100644
--- a/drivers/gpio/Kconfig
+++ b/drivers/gpio/Kconfig
@@ -183,6 +183,12 @@ config GPIO_STA2X11
  Say yes here to support the STA2x11/ConneXt GPIO device.
  The GPIO module has 128 GPIO pins with alternate functions.
 
+config GPIO_VT8500
+   bool "VIA/Wondermedia SoC GPIO Support"
+   depends on ARCH_VT8500
+   help
+ Say yes here to support the VT8500/WM8505/WM8650 GPIO controller.
+
 config GPIO_XILINX
bool "Xilinx GPIO support"
depends on PPC_OF || MICROBLAZE
diff --git a/drivers/gpio/Makefile b/drivers/gpio/Makefile
index 0f55662..2c014b9 100644
--- a/drivers/gpio/Makefile
+++ b/drivers/gpio/Makefile
@@ -66,6 +66,7 @@ obj-$(CONFIG_GPIO_TPS65912)   += gpio-tps65912.o
 obj-$(CONFIG_GPIO_TWL4030) += gpio-twl4030.o
 obj-$(CONFIG_GPIO_UCB1400) += gpio-ucb1400.o
 obj-$(CONFIG_GPIO_VR41XX)  += gpio-vr41xx.o
+obj-$(CONFIG_GPIO_VT8500)  += gpio-vt8500.o
 obj-$(CONFIG_GPIO_VX855)   += gpio-vx855.o
 obj-$(CONFIG_GPIO_WM831X)  += gpio-wm831x.o
 obj-$(CONFIG_GPIO_WM8350)  += gpio-wm8350.o
diff --git a/drivers/gpio/gpio-vt8500.c b/drivers/gpio/gpio-vt8500.c
new file mode 100644
index 000..19b12d9
--- /dev/null
+++ b/drivers/gpio/gpio-vt8500.c
@@ -0,0 +1,313 @@
+/* linux/arch/arm/mach-vt8500/gpio.c
+ *
+ * Copyright (C) 2012 Tony Prisk 
+ * Based on gpio.c:
+ * - Copyright (C) 2010 Alexey Charkov 
+ *
+ * This software is licensed under the terms of the GNU General Public
+ * License version 2, as published by the Free Software Foundation, and
+ * may be copied, distributed, and modified under those terms.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ */
+
+#include 
+#include 
+#include 
+#include 
+#include 
+#include 
+#include 
+#include 
+#include 
+#include 
+
+/*
+   We handle GPIOs by bank, each bank containing up to 32 GPIOs covered
+   by one set of registers (although not all may be valid).
+
+   Because different SoC's have different register offsets, we pass the
+   register offsets as data in vt8500_gpio_dt_ids[].
+
+   A value of NO_REG is used to indicate that this register is not
+   supported. Only used for ->en at the moment.
+*/
+
+#define NO_REG 0x
+
+/*
+ * struct vt8500_gpio_bank_regoffsets
+ * @en: offset to enable register of the bank
+ * @dir: offset to direction register of the bank
+ * @data_out: offset to the data out register of the bank
+ * @data_in: offset to the data in register of the bank
+ * @ngpio: highest valid pin in this bank
+ */
+
+struct vt8500_gpio_bank_regoffsets {
+   unsigned inten;
+   unsigned intdir;
+   unsigned intdata_out;
+   unsigned intdata_in;
+   unsigned char   ngpio;
+};
+
+struct vt8500_gpio_data {
+   unsigned intnum_banks;
+   struct vt8500_gpio_bank_regoffsets  banks[];
+};
+
+#define VT8500_BANK(__en, __dir, __out, __in, __ngpio) \
+{  \
+   .en = __en, \
+   .dir = __dir,   \
+   .data_out = __out,  \
+   .data_in = __in,\
+   .ngpio = __ngpio,   \
+}
+
+static struct vt8500_gpio_data vt8500_data = {
+   .num_banks  = 7,
+   .banks  = {
+   VT8500_BANK(0x00, 0x20, 0x40, 0x60, 26),
+   VT8500_BANK(0x04, 0x24, 0x44, 0x64, 28),
+   VT8500_BANK(0x08, 0x28, 0x48, 0x68, 31),
+   VT8500_BANK(0x0C, 0x2C, 0x4C, 0x6C, 19),
+   VT8500_BANK(0x10, 0x30, 0x50, 0x70, 19),
+   VT8500_BANK(0x14, 0x34, 0x54, 0x74, 23),
+   VT8500_BANK(NO_REG, 0x3C, 0x5C, 0x7C, 9),
+   },
+};
+
+static struct vt8500_gpio_data wm8505_data = {
+   .num_banks  = 10,
+   .banks  = {
+   VT8500_BANK(0x40, 0x68, 0x90, 0xB8, 8),
+   VT8500_BANK(0x44, 0x6C, 0x94, 0xBC, 32),
+   VT8500_BANK(0x48, 0x70, 0x98, 0xC0, 6),
+   VT8500_BANK(0x4C, 0x74, 0x9C, 0xC4, 16),
+   VT8500_BANK(0x50, 0x78, 0xA0, 0xC8, 25),
+   VT8500_BANK(0x54, 0x7C, 0xA4, 0xCC, 5),
+   VT8500_BANK(0x

[PATCHv3 1/9] arm: vt8500: Add device tree files for VIA/Wondermedia SoC's

2012-08-21 Thread Tony Prisk

Add device tree files for VT8500, WM8505 and WM8650 SoC's and
reference boards.

Signed-off-by: Tony Prisk 
---
 arch/arm/boot/dts/vt8500-bv07.dts |   31 +
 arch/arm/boot/dts/vt8500.dtsi |  100 +++
 arch/arm/boot/dts/wm8505-ref.dts  |   31 +
 arch/arm/boot/dts/wm8505.dtsi |  126 +
 arch/arm/boot/dts/wm8650-mid.dts  |   31 +
 arch/arm/boot/dts/wm8650.dtsi |  138 +
 6 files changed, 457 insertions(+)
 create mode 100644 arch/arm/boot/dts/vt8500-bv07.dts
 create mode 100644 arch/arm/boot/dts/vt8500.dtsi
 create mode 100644 arch/arm/boot/dts/wm8505-ref.dts
 create mode 100644 arch/arm/boot/dts/wm8505.dtsi
 create mode 100644 arch/arm/boot/dts/wm8650-mid.dts
 create mode 100644 arch/arm/boot/dts/wm8650.dtsi

diff --git a/arch/arm/boot/dts/vt8500-bv07.dts 
b/arch/arm/boot/dts/vt8500-bv07.dts
new file mode 100644
index 000..339a664
--- /dev/null
+++ b/arch/arm/boot/dts/vt8500-bv07.dts
@@ -0,0 +1,31 @@
+/*
+ * vt8500-bv07.dts - Device tree file for Benign BV07 Netbook
+ *
+ * Copyright (C) 2012 Tony Prisk 
+ *
+ * Licensed under GPLv2 or later
+ */
+
+/dts-v1/;
+/include/ "vt8500.dtsi"
+
+/ {
+   model = "Benign BV07 Netbook";
+
+   /*
+* Display node is based on Sascha Hauer's patch on dri-devel.
+* Added a bpp property to calculate the size of the framebuffer
+* until the binding is formalized.
+*/
+   display: display {
+   xres = <800>;
+   yres = <480>;
+   left-margin = <88>;
+   right-margin = <40>;
+   hsync-len = <0>;
+   upper-margin = <32>;
+   lower-margin = <11>;
+   vsync-len = <1>;
+   bpp = <16>;
+   };
+};
diff --git a/arch/arm/boot/dts/vt8500.dtsi b/arch/arm/boot/dts/vt8500.dtsi
new file mode 100644
index 000..78571d5
--- /dev/null
+++ b/arch/arm/boot/dts/vt8500.dtsi
@@ -0,0 +1,100 @@
+/*
+ * vt8500.dtsi - Device tree file for VIA VT8500 SoC
+ *
+ * Copyright (C) 2012 Tony Prisk 
+ *
+ * Licensed under GPLv2 or later
+ */
+
+/include/ "skeleton.dtsi"
+
+/ {
+   compatible = "via,vt8500";
+
+   soc {
+   #address-cells = <1>;
+   #size-cells = <1>;
+   compatible = "simple-bus";
+   ranges;
+   interrupt-parent = <&intc>;
+
+   intc: interrupt-controller@d814 {
+   compatible = "via,vt8500-intc";
+   interrupt-controller;
+   reg = <0xd814 0x1>;
+   #interrupt-cells = <1>;
+   };
+
+   gpio: gpio-controller@d811 {
+   compatible = "via,vt8500-gpio";
+   gpio-controller;
+   reg = <0xd811 0x1>;
+   #gpio-cells = <3>;
+   };
+
+   pmc@d813 {
+   compatible = "via,vt8500-pmc";
+   reg = <0xd813 0x1000>;
+   };
+
+   timer@d8130100 {
+   compatible = "via,vt8500-timer";
+   reg = <0xd8130100 0x28>;
+   interrupts = <36>;
+   };
+
+   ehci@d8007900 {
+   compatible = "via,vt8500-ehci";
+   reg = <0xd8007900 0x200>;
+   interrupts = <43>;
+   };
+
+   uhci@d8007b00 {
+   compatible = "platform-uhci";
+   reg = <0xd8007b00 0x200>;
+   interrupts = <43>;
+   };
+
+   fb@d800e400 {
+   compatible = "via,vt8500-fb";
+   reg = <0xd800e400 0x400>;
+   interrupts = <12>;
+   via,display = <&display>;
+   };
+
+   ge_rops@d8050400 {
+   compatible = "wm,prizm-ge-rops";
+   reg = <0xd8050400 0x100>;
+   };
+
+   uart@d820 {
+   compatible = "via,vt8500-uart";
+   reg = <0xd820 0x1040>;
+   interrupts = <32>;
+   };
+
+   uart@d82b {
+   compatible = "via,vt8500-uart";
+   reg = <0xd82b 0x1040>;
+   interrupts = <33>;
+   };
+
+   uart@d821 {
+   compatible = "via,vt8500-uart";
+   reg = <0xd821 0x1040>;
+   interrupts = <47>;
+   };
+
+   uart@d82c {
+   compatible = "via,vt8500-uart";
+   reg = <0xd82c 0x1040>;
+   interrupts = <50>;
+   };
+
+   rtc@d810 {
+

[PATCHv3 2/9] rtc: vt8500: Add devicetree support for vt8500-rtc

2012-08-21 Thread Tony Prisk

Signed-off-by: Tony Prisk 
---
 drivers/rtc/rtc-vt8500.c |9 -
 1 file changed, 8 insertions(+), 1 deletion(-)

diff --git a/drivers/rtc/rtc-vt8500.c b/drivers/rtc/rtc-vt8500.c
index 9e94fb1..07bf193 100644
--- a/drivers/rtc/rtc-vt8500.c
+++ b/drivers/rtc/rtc-vt8500.c
@@ -23,6 +23,7 @@
 #include 
 #include 
 #include 
+#include 
 
 /*
  * Register definitions
@@ -302,12 +303,18 @@ static int __devexit vt8500_rtc_remove(struct 
platform_device *pdev)
return 0;
 }
 
+static const struct of_device_id wmt_dt_ids[] = {
+   { .compatible = "via,vt8500-rtc", },
+   {}
+};
+
 static struct platform_driver vt8500_rtc_driver = {
.probe  = vt8500_rtc_probe,
.remove = __devexit_p(vt8500_rtc_remove),
.driver = {
.name   = "vt8500-rtc",
.owner  = THIS_MODULE,
+   .of_match_table = of_match_ptr(wmt_dt_ids),
},
 };
 
@@ -315,5 +322,5 @@ module_platform_driver(vt8500_rtc_driver);
 
 MODULE_AUTHOR("Alexey Charkov ");
 MODULE_DESCRIPTION("VIA VT8500 SoC Realtime Clock Driver (RTC)");
-MODULE_LICENSE("GPL");
+MODULE_LICENSE("GPL v2");
 MODULE_ALIAS("platform:vt8500-rtc");
-- 
1.7.9.5

--
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Please read the FAQ at  http://www.tux.org/lkml/

[PATCHv3 0/9] * ARM: Update arch-vt8500 to Devicetree *

2012-08-21 Thread Tony Prisk

This patchset updates arch-vt8500 to devicetree and removes all the old-style
code. Support for WM8650 has also been added.

Example dts/dtsi files are given for the three currently supported models.

Major changes:

GPIO code has been converted to a platform_device and rewritten as WM8505
support was broken. Add support for WM8650 gpio controller.

UHCI support was missing. Added this as a generic non-pci uhci controller as
it doesn't require anything special. Should be usable by any system that doesn't
have special requirements to get the UHCI controller working.

Framebuffer code patched to support WM8650. The bindings for this are of concern
but there doesn't seem to be a formalized binding yet. This patch is based off
Sascha Hauer's current patch on the dri-devel mailing list and should be easily
patched out when its finalized.

Patchset based on Arnd's arm-soc/for-next branch.


Could I get this reviewed, hopefully for inclusion into v3.7.

Regards
Tony Prisk

Changes
v2:
Cleanup style/formatting errors
Removed erroneous commit message about GPIO not being converted to devicetree
Corrected arch-vt8500/irq.c header to correct filename
Changed GPIO driver to use module_platform_driver()
Renamed vt8500_gpio_bank_regs -> vt8500_gpio_bank_regoffsets
Changed vt8500_gpio_bank_regoffset fields to unsigned int
Changed bit-setting code to use BIT() macro
Removed of_find_compatible() and use pdev->dev.of_node in _probe()
Removed regoff field and related code - leftover from old design
Added kerneldoc regarding struct vt8500_gpio_bank_regoffsets fields
Update MODULE_LICENSE on all platform devices to "GPL v2" to match their headers
Renamed dts board files to clarify product names

v3:
Corrected serial driver issue after porting to device tree. pdev->id no longer
valid.
Corrected irq.c to properly initialize slaved interrupt controller.
Updated framebuffer drivers to use phandles for display node.
Corrected dts definitions for updated framebuffer driver.
EHCI/UHCI patch (Patch 4/9) already in -next via usb-next tree.

Included common clock frame support.
Added initialization code to arch/arm/mach-vt8500/vt8500.c for clocks.
Updated wm8650.dtsi to include basic clocks.


Tony Prisk (9):
  arm: vt8500: Add device tree files for VIA/Wondermedia SoC's
  rtc: vt8500: Add devicetree support for vt8500-rtc
  serial: vt8500: Add devicetree support for vt8500-serial
  usb: vt8500: Add devicetree support for vt8500-ehci and -uhci.
  video: vt8500: Add devicetree support for vt8500-fb and wm8505-fb
  arm: vt8500: Update arch-vt8500 to devicetree support.
  arm: vt8500: doc: Add device tree bindings for arch-vt8500 devices
  arm: vt8500: gpio: Devicetree support for arch-vt8500
  arm: vt8500: clk: Add Common Clock Framework support

 Documentation/devicetree/bindings/arm/vt8500.txt   |   15 +
 .../bindings/arm/vt8500/via,vt8500-intc.txt|   16 +
 .../bindings/arm/vt8500/via,vt8500-pmc.txt |   13 +
 .../bindings/arm/vt8500/via,vt8500-timer.txt   |   15 +
 Documentation/devicetree/bindings/clock/vt8500.txt |   72 +++
 .../devicetree/bindings/gpio/gpio_vt8500.txt   |   24 +
 .../devicetree/bindings/rtc/via,vt8500-rtc.txt |   15 +
 .../bindings/tty/serial/via,vt8500-uart.txt|   15 +
 .../devicetree/bindings/usb/platform-uhci.txt  |   15 +
 .../devicetree/bindings/usb/via,vt8500-ehci.txt|   15 +
 .../devicetree/bindings/vendor-prefixes.txt|2 +
 .../devicetree/bindings/video/via,vt8500-fb.txt|   48 ++
 .../devicetree/bindings/video/wm,prizm-ge-rops.txt |   13 +
 .../devicetree/bindings/video/wm,wm8505-fb.txt |   22 +
 arch/arm/Kconfig   |5 +
 arch/arm/boot/dts/vt8500-bv07.dts  |   31 ++
 arch/arm/boot/dts/vt8500.dtsi  |  100 
 arch/arm/boot/dts/wm8505-ref.dts   |   31 ++
 arch/arm/boot/dts/wm8505.dtsi  |  126 +
 arch/arm/boot/dts/wm8650-mid.dts   |   31 ++
 arch/arm/boot/dts/wm8650.dtsi  |  138 ++
 arch/arm/mach-vt8500/Kconfig   |   72 +--
 arch/arm/mach-vt8500/Makefile  |9 +-
 arch/arm/mach-vt8500/bv07.c|   80 
 arch/arm/mach-vt8500/common.h  |   28 ++
 arch/arm/mach-vt8500/devices-vt8500.c  |   91 
 arch/arm/mach-vt8500/devices-wm8505.c  |   99 
 arch/arm/mach-vt8500/devices.c |  270 ---
 arch/arm/mach-vt8500/devices.h |   88 
 arch/arm/mach-vt8500/gpio.c|  240 --
 arch/arm/mach-vt8500/include/mach/restart.h|4 +-
 arch/arm/mach-vt8500/include/mach/vt8500_irqs.h|   88 
 arch/arm/mach-vt8500/include/mach/vt8500_regs.h|   79 
 arch/arm/mach-vt8500/include/mach/wm8505_irqs.h|  115 -
 arch/arm/mach-vt8500/include/mach/wm8505_regs.h|   78 ---
 arch/arm/mach-vt8500/irq.c

[PATCHv3 5/9] video: vt8500: Add devicetree support for vt8500-fb and wm8505-fb

2012-08-21 Thread Tony Prisk

Update vt8500-fb, wm8505-fb and wmt-ge-rops to support device
tree bindings.
Small change in wm8505-fb.c to support WM8650 framebuffer color
format.

Signed-off-by: Tony Prisk 
---
 drivers/video/Kconfig   |6 +--
 drivers/video/vt8500lcdfb.c |   79 ++-
 drivers/video/wm8505fb.c|   97 ---
 drivers/video/wmt_ge_rops.c |9 +++-
 4 files changed, 161 insertions(+), 30 deletions(-)

diff --git a/drivers/video/Kconfig b/drivers/video/Kconfig
index 0217f74..b66d951 100644
--- a/drivers/video/Kconfig
+++ b/drivers/video/Kconfig
@@ -1788,7 +1788,7 @@ config FB_AU1200
 
 config FB_VT8500
bool "VT8500 LCD Driver"
-   depends on (FB = y) && ARM && ARCH_VT8500 && VTWM_VERSION_VT8500
+   depends on (FB = y) && ARM && ARCH_VT8500
select FB_WMT_GE_ROPS
select FB_SYS_IMAGEBLIT
help
@@ -1797,11 +1797,11 @@ config FB_VT8500
 
 config FB_WM8505
bool "WM8505 frame buffer support"
-   depends on (FB = y) && ARM && ARCH_VT8500 && VTWM_VERSION_WM8505
+   depends on (FB = y) && ARM && ARCH_VT8500
select FB_WMT_GE_ROPS
select FB_SYS_IMAGEBLIT
help
- This is the framebuffer driver for WonderMedia WM8505
+ This is the framebuffer driver for WonderMedia WM8505/WM8650
  integrated LCD controller.
 
 source "drivers/video/geode/Kconfig"
diff --git a/drivers/video/vt8500lcdfb.c b/drivers/video/vt8500lcdfb.c
index 2a5fe6e..758e359 100644
--- a/drivers/video/vt8500lcdfb.c
+++ b/drivers/video/vt8500lcdfb.c
@@ -35,6 +35,13 @@
 #include "vt8500lcdfb.h"
 #include "wmt_ge_rops.h"
 
+#ifdef CONFIG_OF
+#include 
+#include 
+#include 
+#endif
+
+
 #define to_vt8500lcd_info(__info) container_of(__info, \
struct vt8500lcd_info, fb)
 
@@ -270,15 +277,21 @@ static int __devinit vt8500lcd_probe(struct 
platform_device *pdev)
 {
struct vt8500lcd_info *fbi;
struct resource *res;
-   struct vt8500fb_platform_data *pdata = pdev->dev.platform_data;
void *addr;
int irq, ret;
 
+   struct fb_videomode of_mode;
+   struct device_node  *np;
+   u32 bpp;
+   dma_addr_t fb_mem_phys;
+   unsigned long fb_mem_len;
+   void *fb_mem_virt;
+
ret = -ENOMEM;
fbi = NULL;
 
-   fbi = kzalloc(sizeof(struct vt8500lcd_info) + sizeof(u32) * 16,
-   GFP_KERNEL);
+   fbi = devm_kzalloc(&pdev->dev, sizeof(struct vt8500lcd_info)
+   + sizeof(u32) * 16, GFP_KERNEL);
if (!fbi) {
dev_err(&pdev->dev, "Failed to initialize framebuffer 
device\n");
ret = -ENOMEM;
@@ -333,9 +346,45 @@ static int __devinit vt8500lcd_probe(struct 
platform_device *pdev)
goto failed_free_res;
}
 
-   fbi->fb.fix.smem_start  = pdata->video_mem_phys;
-   fbi->fb.fix.smem_len= pdata->video_mem_len;
-   fbi->fb.screen_base = pdata->video_mem_virt;
+   np = of_parse_phandle(pdev->dev.of_node, "via,display", 0);
+   if (!np) {
+   pr_err("%s: No display description in Device Tree\n", __func__);
+   ret = -EINVAL;
+   goto failed_free_res;
+   }
+
+   /*
+* This code is copied from Sascha Hauer's of_videomode helper
+* and can be replaced with a call to the helper once mainlined
+*/
+   ret = 0;
+   ret |= of_property_read_u32(np, "xres", &of_mode.xres);
+   ret |= of_property_read_u32(np, "yres", &of_mode.yres);
+   ret |= of_property_read_u32(np, "left-margin", &of_mode.left_margin);
+   ret |= of_property_read_u32(np, "right-margin", &of_mode.right_margin);
+   ret |= of_property_read_u32(np, "hsync-len", &of_mode.hsync_len);
+   ret |= of_property_read_u32(np, "upper-margin", &of_mode.upper_margin);
+   ret |= of_property_read_u32(np, "lower-margin", &of_mode.lower_margin);
+   ret |= of_property_read_u32(np, "vsync-len", &of_mode.vsync_len);
+   ret |= of_property_read_u32(np, "bpp", &bpp);
+   if (ret) {
+   pr_err("%s: Unable to read display properties\n", __func__);
+   goto failed_free_res;
+   }
+   of_mode.vmode = FB_VMODE_NONINTERLACED;
+
+   /* try allocating the framebuffer */
+   fb_mem_len = of_mode.xres * of_mode.yres * 2 * (bpp / 8);
+   fb_mem_virt = dma_alloc_coherent(&pdev->dev, fb_mem_len, &fb_mem_phys,
+   GFP_KERNEL);
+   if (!fb_mem_virt) {
+   pr_err("%s: Failed to allocate framebuffer\n", __func__);
+   return -ENOMEM;
+   };
+
+   fbi->fb.fix.smem_start  = fb_mem_phys;
+   fbi->fb.fix.smem_len= fb_mem_len;
+   fbi->fb.screen_base = fb_mem_virt;
 
fbi->palette_size   = PAGE_ALIGN(512);
fbi->palette_cpu= dma_alloc_coherent(

[PATCHv3 4/9] usb: vt8500: Add devicetree support for vt8500-ehci and -uhci.

2012-08-21 Thread Tony Prisk

Add devicetree support for vt8500-ehci.
Convert vt8500-uhci to a generic non-pci platform-uhci with
device tree support.

Signed-off-by: Tony Prisk 
---
 drivers/usb/host/Kconfig |4 +-
 drivers/usb/host/ehci-vt8500.c   |   25 --
 drivers/usb/host/uhci-hcd.c  |5 ++
 drivers/usb/host/uhci-platform.c |  169 ++
 4 files changed, 195 insertions(+), 8 deletions(-)
 create mode 100644 drivers/usb/host/uhci-platform.c

diff --git a/drivers/usb/host/Kconfig b/drivers/usb/host/Kconfig
index dcfaaa9..d7a6b10 100644
--- a/drivers/usb/host/Kconfig
+++ b/drivers/usb/host/Kconfig
@@ -450,7 +450,7 @@ config USB_OHCI_LITTLE_ENDIAN
 
 config USB_UHCI_HCD
tristate "UHCI HCD (most Intel and VIA) support"
-   depends on USB && (PCI || SPARC_LEON)
+   depends on USB && (PCI || SPARC_LEON || ARCH_VT8500)
---help---
  The Universal Host Controller Interface is a standard by Intel for
  accessing the USB hardware in the PC (which is also called the USB
@@ -468,7 +468,7 @@ config USB_UHCI_HCD
 config USB_UHCI_SUPPORT_NON_PCI_HC
bool
depends on USB_UHCI_HCD
-   default y if SPARC_LEON
+   default y if (SPARC_LEON  || ARCH_VT8500)
 
 config USB_UHCI_BIG_ENDIAN_MMIO
bool
diff --git a/drivers/usb/host/ehci-vt8500.c b/drivers/usb/host/ehci-vt8500.c
index c1eda73..0e1637b 100644
--- a/drivers/usb/host/ehci-vt8500.c
+++ b/drivers/usb/host/ehci-vt8500.c
@@ -16,6 +16,7 @@
  *
  */
 
+#include 
 #include 
 
 static int ehci_update_device(struct usb_hcd *hcd, struct usb_device *udev)
@@ -84,20 +85,23 @@ static const struct hc_driver vt8500_ehci_hc_driver = {
.clear_tt_buffer_complete   = ehci_clear_tt_buffer_complete,
 };
 
+static u64 wmt_ehci_dma_mask = DMA_BIT_MASK(32);
+
 static int vt8500_ehci_drv_probe(struct platform_device *pdev)
 {
struct usb_hcd *hcd;
struct ehci_hcd *ehci;
struct resource *res;
+   int irq;
int ret;
 
if (usb_disabled())
return -ENODEV;
 
-   if (pdev->resource[1].flags != IORESOURCE_IRQ) {
-   pr_debug("resource[1] is not IORESOURCE_IRQ");
-   return -ENOMEM;
-   }
+   /* devicetree created devices don't specify a dma mask */
+   if (!pdev->dev.dma_mask)
+   pdev->dev.dma_mask = &wmt_ehci_dma_mask;
+
hcd = usb_create_hcd(&vt8500_ehci_hc_driver, &pdev->dev, "VT8500");
if (!hcd)
return -ENOMEM;
@@ -134,8 +138,9 @@ static int vt8500_ehci_drv_probe(struct platform_device 
*pdev)
 
ehci_reset(ehci);
 
-   ret = usb_add_hcd(hcd, pdev->resource[1].start,
- IRQF_SHARED);
+   irq = platform_get_irq(pdev, 0);
+
+   ret = usb_add_hcd(hcd, irq, IRQF_SHARED);
if (ret == 0) {
platform_set_drvdata(pdev, hcd);
return ret;
@@ -162,6 +167,11 @@ static int vt8500_ehci_drv_remove(struct platform_device 
*pdev)
return 0;
 }
 
+static const struct of_device_id vt8500_ehci_ids[] = {
+   { .compatible = "via,vt8500-ehci", },
+   {}
+};
+
 static struct platform_driver vt8500_ehci_driver = {
.probe  = vt8500_ehci_drv_probe,
.remove = vt8500_ehci_drv_remove,
@@ -169,7 +179,10 @@ static struct platform_driver vt8500_ehci_driver = {
.driver = {
.name   = "vt8500-ehci",
.owner  = THIS_MODULE,
+   .of_match_table = of_match_ptr(vt8500_ehci_ids),
}
 };
 
 MODULE_ALIAS("platform:vt8500-ehci");
+MODULE_LICENSE("GPL v2");
+MODULE_DEVICE_TABLE(of, vt8500_ehci_ids);
diff --git a/drivers/usb/host/uhci-hcd.c b/drivers/usb/host/uhci-hcd.c
index e4db350..5da5c99 100644
--- a/drivers/usb/host/uhci-hcd.c
+++ b/drivers/usb/host/uhci-hcd.c
@@ -846,6 +846,11 @@ static const char hcd_name[] = "uhci_hcd";
 #define PLATFORM_DRIVERuhci_grlib_driver
 #endif
 
+#ifdef CONFIG_ARCH_VT8500
+#include "uhci-platform.c"
+#define PLATFORM_DRIVERuhci_platform_driver
+#endif
+
 #if !defined(PCI_DRIVER) && !defined(PLATFORM_DRIVER)
 #error "missing bus glue for uhci-hcd"
 #endif
diff --git a/drivers/usb/host/uhci-platform.c b/drivers/usb/host/uhci-platform.c
new file mode 100644
index 000..35ca094
--- /dev/null
+++ b/drivers/usb/host/uhci-platform.c
@@ -0,0 +1,169 @@
+/*
+ * Generic UHCI HCD (Host Controller Driver) for Platform Devices
+ *
+ * Copyright (c) 2011 Tony Prisk 
+ *
+ * This file is based on uhci-grlib.c
+ * (C) Copyright 2004-2007 Alan Stern, st...@rowland.harvard.edu
+ */
+
+#include 
+#include 
+
+static int uhci_platform_init(struct usb_hcd *hcd)
+{
+   struct uhci_hcd *uhci = hcd_to_uhci(hcd);
+
+   uhci->rh_numports = uhci_count_ports(hcd);
+
+   /* Set up pointers to to generic functions */
+   uhci->reset_hc = uhci_generic_reset_hc;
+   uhci->check_and_reset_hc = uhci_generic_check_and_reset_hc;
+
+   /* No special actions

[PATCHv3 3/9] serial: vt8500: Add devicetree support for vt8500-serial

2012-08-21 Thread Tony Prisk

Signed-off-by: Tony Prisk 
---
 drivers/tty/serial/vt8500_serial.c |   37 
 1 file changed, 33 insertions(+), 4 deletions(-)

diff --git a/drivers/tty/serial/vt8500_serial.c 
b/drivers/tty/serial/vt8500_serial.c
index 2be006f..72e32db 100644
--- a/drivers/tty/serial/vt8500_serial.c
+++ b/drivers/tty/serial/vt8500_serial.c
@@ -34,6 +34,7 @@
 #include 
 #include 
 #include 
+#include 
 
 /*
  * UART Register offsets
@@ -76,6 +77,8 @@
 #define RX_FIFO_INTS   (RXFAF | RXFF | RXOVER | PER | FER | RXTOUT)
 #define TX_FIFO_INTS   (TXFAE | TXFE | TXUDR)
 
+#define VT8500_MAX_PORTS   6
+
 struct vt8500_port {
struct uart_portuart;
charname[16];
@@ -83,6 +86,13 @@ struct vt8500_port {
unsigned intier;
 };
 
+/*
+ * we use this variable to keep track of which ports
+ * have been allocated as we can't use pdev->id in
+ * devicetree
+ */
+static unsigned long vt8500_ports_in_use;
+
 static inline void vt8500_write(struct uart_port *port, unsigned int val,
 unsigned int off)
 {
@@ -431,7 +441,7 @@ static int vt8500_verify_port(struct uart_port *port,
return 0;
 }
 
-static struct vt8500_port *vt8500_uart_ports[4];
+static struct vt8500_port *vt8500_uart_ports[VT8500_MAX_PORTS];
 static struct uart_driver vt8500_uart_driver;
 
 #ifdef CONFIG_SERIAL_VT8500_CONSOLE
@@ -549,6 +559,7 @@ static int __devinit vt8500_serial_probe(struct 
platform_device *pdev)
struct vt8500_port *vt8500_port;
struct resource *mmres, *irqres;
int ret;
+   int port;
 
mmres = platform_get_resource(pdev, IORESOURCE_MEM, 0);
irqres = platform_get_resource(pdev, IORESOURCE_IRQ, 0);
@@ -559,13 +570,25 @@ static int __devinit vt8500_serial_probe(struct 
platform_device *pdev)
if (!vt8500_port)
return -ENOMEM;
 
+   /* calculate the port id */
+   port = find_first_zero_bit(&vt8500_ports_in_use,
+   sizeof(vt8500_ports_in_use));
+   if (port > VT8500_MAX_PORTS)
+   return -ENODEV;
+
+   /* reserve the port id */
+   if (test_and_set_bit(port, &vt8500_ports_in_use)) {
+   /* port already in use - shouldn't really happen */
+   return -EBUSY;
+   }
+
vt8500_port->uart.type = PORT_VT8500;
vt8500_port->uart.iotype = UPIO_MEM;
vt8500_port->uart.mapbase = mmres->start;
vt8500_port->uart.irq = irqres->start;
vt8500_port->uart.fifosize = 16;
vt8500_port->uart.ops = &vt8500_uart_pops;
-   vt8500_port->uart.line = pdev->id;
+   vt8500_port->uart.line = port;
vt8500_port->uart.dev = &pdev->dev;
vt8500_port->uart.flags = UPF_IOREMAP | UPF_BOOT_AUTOCONF;
vt8500_port->uart.uartclk = 2400;
@@ -579,7 +602,7 @@ static int __devinit vt8500_serial_probe(struct 
platform_device *pdev)
goto err;
}
 
-   vt8500_uart_ports[pdev->id] = vt8500_port;
+   vt8500_uart_ports[port] = vt8500_port;
 
uart_add_one_port(&vt8500_uart_driver, &vt8500_port->uart);
 
@@ -603,12 +626,18 @@ static int __devexit vt8500_serial_remove(struct 
platform_device *pdev)
return 0;
 }
 
+static const struct of_device_id wmt_dt_ids[] = {
+   { .compatible = "via,vt8500-uart", },
+   {}
+};
+
 static struct platform_driver vt8500_platform_driver = {
.probe  = vt8500_serial_probe,
.remove = __devexit_p(vt8500_serial_remove),
.driver = {
.name = "vt8500_serial",
.owner = THIS_MODULE,
+   .of_match_table = of_match_ptr(wmt_dt_ids),
},
 };
 
@@ -642,4 +671,4 @@ module_exit(vt8500_serial_exit);
 
 MODULE_AUTHOR("Alexey Charkov ");
 MODULE_DESCRIPTION("Driver for vt8500 serial device");
-MODULE_LICENSE("GPL");
+MODULE_LICENSE("GPL v2");
-- 
1.7.9.5

--
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Please read the FAQ at  http://www.tux.org/lkml/

Re: [PATCH 04/11] x86/microcode_core_early.c: Define interfaces for early load ucode

2012-08-21 Thread Borislav Petkov

On Tue, Aug 21, 2012 at 01:13:26PM -0700, H. Peter Anvin wrote:
> I don't know what Borislav was suggesting with "BIOS overrides", is
> that another CPU-specific thing?

Not CPU- but rather platform-specific. It is Thomas Renninger's
mechanism to override BIOS tables.

-- 
Regards/Gruss,
Boris.

Advanced Micro Devices GmbH
Einsteinring 24, 85609 Dornach
GM: Alberto Bozzo
Reg: Dornach, Landkreis Muenchen
HRB Nr. 43632 WEEE Registernr: 129 19551
--
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Please read the FAQ at  http://www.tux.org/lkml/

[PATCHv3 9/9] arm: vt8500: clk: Add Common Clock Framework support

2012-08-21 Thread Tony Prisk

This patch adds common clock framework support for arch-vt8500.
Support for PLL and device clocks on VT8500, WM8505 and WM8650
are included.

Signed-off-by: Tony Prisk 
---
 drivers/clk/Makefile |1 +
 drivers/clk/clk-vt8500.c |  496 ++
 2 files changed, 497 insertions(+)
 create mode 100644 drivers/clk/clk-vt8500.c

diff --git a/drivers/clk/Makefile b/drivers/clk/Makefile
index 5869ea3..42fb173 100644
--- a/drivers/clk/Makefile
+++ b/drivers/clk/Makefile
@@ -10,6 +10,7 @@ obj-$(CONFIG_ARCH_SOCFPGA)+= socfpga/
 obj-$(CONFIG_PLAT_SPEAR)   += spear/
 obj-$(CONFIG_ARCH_U300)+= clk-u300.o
 obj-$(CONFIG_ARCH_INTEGRATOR)  += versatile/
+obj-$(CONFIG_ARCH_VT8500)  += clk-vt8500.o
 
 # Chip specific
 obj-$(CONFIG_COMMON_CLK_WM831X) += clk-wm831x.o
diff --git a/drivers/clk/clk-vt8500.c b/drivers/clk/clk-vt8500.c
new file mode 100644
index 000..524c479
--- /dev/null
+++ b/drivers/clk/clk-vt8500.c
@@ -0,0 +1,496 @@
+#include 
+#include 
+#include 
+#include 
+#include 
+#include 
+
+/* All clocks share the same lock as none can be changed concurrently */
+static DEFINE_SPINLOCK(_lock);
+
+struct clk_device {
+   struct clk_hw   hw;
+   void __iomem*div_reg;
+   unsigned intdiv_mask;
+   void __iomem*en_reg;
+   int en_bit;
+   spinlock_t  *lock;
+};
+
+/*
+ * Add new PLL_TYPE_x definitions here as required. Use the first known model
+ * to support the new type as the name.
+ * Add case statements to vtwm_pll_recalc_rate(), vtwm_pll_round_round() and
+ * vtwm_pll_set_rate() to handle the new PLL_TYPE_x
+ */
+
+#define PLL_TYPE_VT85000
+#define PLL_TYPE_WM86501
+
+struct clk_pll {
+   struct clk_hw   hw;
+   void __iomem*reg;
+   spinlock_t  *lock;
+   int type;
+};
+
+static void __iomem *pmc_base;
+
+#define to_clk_device(_hw) container_of(_hw, struct clk_device, hw)
+
+
+#define VT8500_PMC_BUSY_MASK   0x18
+
+static void vt8500_pmc_wait_busy(void)
+{
+   while (readl(pmc_base) & VT8500_PMC_BUSY_MASK)
+   cpu_relax();
+}
+
+static void vt8500_dclk_endisable(struct clk_hw *hw, int enable)
+{
+   struct clk_device *cdev = to_clk_device(hw);
+   u32 en_val;
+   unsigned long flags = 0;
+
+   spin_lock_irqsave(cdev->lock, flags);
+
+   en_val = readl(cdev->en_reg);
+
+   if (enable)
+   en_val |= BIT(cdev->en_bit);
+   else
+   en_val &= ~BIT(cdev->en_bit);
+
+   writel(en_val, cdev->en_reg);
+
+   spin_unlock_irqrestore(cdev->lock, flags);
+}
+
+static int vt8500_dclk_enable(struct clk_hw *hw)
+{
+   vt8500_dclk_endisable(hw, 1);
+   return 0;
+}
+
+static void vt8500_dclk_disable(struct clk_hw *hw)
+{
+   vt8500_dclk_endisable(hw, 0);
+}
+
+static int vt8500_dclk_is_enabled(struct clk_hw *hw)
+{
+   struct clk_device *cdev = to_clk_device(hw);
+   u32 en_val = (readl(cdev->en_reg) & BIT(cdev->en_bit));
+
+   return en_val ? 1 : 0;
+}
+
+static unsigned long vt8500_dclk_recalc_rate(struct clk_hw *hw,
+   unsigned long parent_rate)
+{
+   struct clk_device *cdev = to_clk_device(hw);
+   u32 div = readl(cdev->div_reg) & cdev->div_mask;
+
+   /* Special case for SDMMC devices */
+   if ((cdev->div_mask == 0x3F) && (div & BIT(5)))
+   div = 64 * (div & 0x1f);
+
+   /* div == 0 is actually the highest divisor */
+   if (div == 0)
+   div = (cdev->div_mask + 1);
+
+   return parent_rate / div;
+}
+
+static long vt8500_dclk_round_rate(struct clk_hw *hw, unsigned long rate,
+   unsigned long *prate)
+{
+   u32 divisor = rate / *prate;
+
+   return *prate / divisor;
+}
+
+static int vt8500_dclk_set_rate(struct clk_hw *hw, unsigned long rate,
+   unsigned long parent_rate)
+{
+   struct clk_device *cdev = to_clk_device(hw);
+   u32 divisor = rate / parent_rate;
+   unsigned long flags = 0;
+
+   if (divisor == cdev->div_mask + 1)
+   divisor = 0;
+
+   if (divisor > cdev->div_mask) {
+   pr_err("%s: invalid divisor for clock\n", __func__);
+   return -EINVAL;
+   }
+
+   spin_lock_irqsave(cdev->lock, flags);
+
+   vt8500_pmc_wait_busy();
+   writel(divisor, cdev->div_reg);
+   vt8500_pmc_wait_busy();
+
+   spin_lock_irqsave(cdev->lock, flags);
+
+   return 0;
+}
+
+
+static const struct clk_ops vt8500_gated_clk_ops = {
+   .enable = vt8500_dclk_enable,
+   .disable = vt8500_dclk_disable,
+   .is_enabled = vt8500_dclk_is_enabled,
+};
+
+static const struct clk_ops vt8500_divisor_clk_ops = {
+   .round_rate = vt8500_dclk_round_rate,
+   .set_rate = vt8500_dclk_set_rate,
+   .recalc_rate = vt8500_dclk_recalc_rate,
+};
+
+static const struct clk_ops vt8500_gated_divisor_clk_ops = {

RE: [PATCH 04/11] x86/microcode_core_early.c: Define interfaces for early load ucode

2012-08-21 Thread Yu, Fenghua

> -Original Message-
> From: Borislav Petkov [mailto:b...@amd64.org]
> Sent: Tuesday, August 21, 2012 1:49 PM
> To: H. Peter Anvin
> Cc: Yu, Fenghua; Henrique de Moraes Holschuh; Ingo Molnar; Thomas
> Gleixner; Mallick, Asit K; Tigran Aivazian; Andreas Herrmann; Borislav
> Petkov; linux-kernel; x86
> Subject: Re: [PATCH 04/11] x86/microcode_core_early.c: Define
> interfaces for early load ucode
> 
> On Tue, Aug 21, 2012 at 01:13:26PM -0700, H. Peter Anvin wrote:
> > I don't know what Borislav was suggesting with "BIOS overrides", is
> > that another CPU-specific thing?
> 
> Not CPU- but rather platform-specific. It is Thomas Renninger's
> mechanism to override BIOS tables.

That's ACPI override. I think the ACPI tables could be put in kernel/x86/acpi/.

Thanks.

-Fenghua

--
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Please read the FAQ at  http://www.tux.org/lkml/

Re: [PATCH 04/11] x86/microcode_core_early.c: Define interfaces for early load ucode

2012-08-21 Thread H. Peter Anvin

On 08/21/2012 01:48 PM, Borislav Petkov wrote:
> On Tue, Aug 21, 2012 at 01:13:26PM -0700, H. Peter Anvin wrote:
>> I don't know what Borislav was suggesting with "BIOS overrides", is
>> that another CPU-specific thing?
> 
> Not CPU- but rather platform-specific. It is Thomas Renninger's
> mechanism to override BIOS tables.
> 

s/BIOS/ACPI/... Yes, so really it doesn't have any meaningful reason to
live under the CPU vendor.

-hpa


--
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Please read the FAQ at  http://www.tux.org/lkml/

Re: [PATCH 04/11] x86/microcode_core_early.c: Define interfaces for early load ucode

2012-08-21 Thread H. Peter Anvin

On 08/21/2012 01:52 PM, Yu, Fenghua wrote:
>> -Original Message-
>> From: Borislav Petkov [mailto:b...@amd64.org]
>> Sent: Tuesday, August 21, 2012 1:49 PM
>> To: H. Peter Anvin
>> Cc: Yu, Fenghua; Henrique de Moraes Holschuh; Ingo Molnar; Thomas
>> Gleixner; Mallick, Asit K; Tigran Aivazian; Andreas Herrmann; Borislav
>> Petkov; linux-kernel; x86
>> Subject: Re: [PATCH 04/11] x86/microcode_core_early.c: Define
>> interfaces for early load ucode
>>
>> On Tue, Aug 21, 2012 at 01:13:26PM -0700, H. Peter Anvin wrote:
>>> I don't know what Borislav was suggesting with "BIOS overrides", is
>>> that another CPU-specific thing?
>>
>> Not CPU- but rather platform-specific. It is Thomas Renninger's
>> mechanism to override BIOS tables.
> 
> That's ACPI override. I think the ACPI tables could be put in 
> kernel/x86/acpi/.
> 

kernel/acpi... ACPI is not x86-specific.

-hpa

--
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Please read the FAQ at  http://www.tux.org/lkml/

[RFC] Move kfree outside pde_unload_lock

2012-08-21 Thread Nathan Zimmer


I am currently tracking a hotlock reported by a customer on a large, 512 cores,
system, I am currently running 3.6.0 rc1 but the issue looks like it has been
this way for a very long time.
The offending lock is proc_dir_entry->pde_unload_lock.  

In proc_reg_release we are doing a kfree under the spinlock which is ok but it
means we are holding the lock longer then required. Scaling improved when I 
moved kfree out.

Also shouldn't the comment on pde_unload_lock also note that pde_openers and 
pde_unload_completion are both used under the lock?

Here is some data from quick test program which just reads from /proc/cpuinfo.
Lower is better, as you can see the worst case scenario is improved.
baselinemoved kfree 
tasks   read-secread-sec
1   0.0141  0.0141
2   0.0140  0.0140
4   0.0140  0.0141
8   0.0145  0.0145
16  0.0553  0.0548
32  0.1688  0.1622
64  0.5017  0.3856
128 1.7005  0.9710
256 5.2513  2.6519
512 8.0529  6.2976

If the patch looks agreeable I will resend it properly.
 
diff --git a/fs/proc/inode.c b/fs/proc/inode.c
index 7ac817b..46016c1 100644
--- a/fs/proc/inode.c
+++ b/fs/proc/inode.c
@@ -403,9 +403,11 @@ static int proc_reg_release(struct inode *inode, struct 
file *file)
release = pde->proc_fops->release;
if (pdeo) {
list_del(&pdeo->lh);
-   kfree(pdeo);
}
spin_unlock(&pde->pde_unload_lock);
+   if (pdeo) {
+   kfree(pdeo);
+   }
 
if (release)
rv = release(inode, file);

--
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Please read the FAQ at  http://www.tux.org/lkml/

RE: [PATCH 04/11] x86/microcode_core_early.c: Define interfaces for early load ucode

2012-08-21 Thread Yu, Fenghua

> -Original Message-
> From: H. Peter Anvin [mailto:h...@zytor.com]
> Sent: Tuesday, August 21, 2012 1:54 PM
> To: Yu, Fenghua
> Cc: Borislav Petkov; Henrique de Moraes Holschuh; Ingo Molnar; Thomas
> Gleixner; Mallick, Asit K; Tigran Aivazian; Andreas Herrmann; Borislav
> Petkov; linux-kernel; x86
> Subject: Re: [PATCH 04/11] x86/microcode_core_early.c: Define
> interfaces for early load ucode
> 
> On 08/21/2012 01:52 PM, Yu, Fenghua wrote:
> >> -Original Message-
> >> From: Borislav Petkov [mailto:b...@amd64.org]
> >> Sent: Tuesday, August 21, 2012 1:49 PM
> >> To: H. Peter Anvin
> >> Cc: Yu, Fenghua; Henrique de Moraes Holschuh; Ingo Molnar; Thomas
> >> Gleixner; Mallick, Asit K; Tigran Aivazian; Andreas Herrmann;
> Borislav
> >> Petkov; linux-kernel; x86
> >> Subject: Re: [PATCH 04/11] x86/microcode_core_early.c: Define
> >> interfaces for early load ucode
> >>
> >> On Tue, Aug 21, 2012 at 01:13:26PM -0700, H. Peter Anvin wrote:
> >>> I don't know what Borislav was suggesting with "BIOS overrides", is
> >>> that another CPU-specific thing?
> >>
> >> Not CPU- but rather platform-specific. It is Thomas Renninger's
> >> mechanism to override BIOS tables.
> >
> > That's ACPI override. I think the ACPI tables could be put in
> kernel/x86/acpi/.
> >
> 
> kernel/acpi... ACPI is not x86-specific.

That's right.

Thanks.

-Fenghua
--
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Please read the FAQ at  http://www.tux.org/lkml/

Re: [PATCH v9 2/2] kvm: On Ack, De-assert & Notify KVM_IRQFD extension

2012-08-21 Thread Alex Williamson

On Tue, 2012-08-21 at 23:37 +0300, Michael S. Tsirkin wrote:
> On Tue, Aug 21, 2012 at 01:29:14PM -0600, Alex Williamson wrote:
> > For VFIO based device assignment we'd like a mechanism to allow level
> > triggered interrutps to be directly injected into KVM.  KVM_IRQFD
> > already allows this for edge triggered interrupts, but for level, we
> > need to watch for acknowledgement of the interrupt from the guest to
> > provide us a hint when to test the device and allow it to re-assert
> > if necessary.  To do this, we create a new KVM_IRQFD mode called
> > "On Ack, De-assert & Notify", or OADN.  In this mode, an interrupt
> > injection provides only a gsi assertion.  We then hook into the IRQ
> > ACK notifier, which when triggered de-asserts the gsi and notifies
> > via another eventfd.  It's then the responsibility of the user to
> > re-assert the interrupt is service is still required.
> > 
> > Signed-off-by: Alex Williamson 
> 
> Naming aside, looks good.
> I think I see some minor bugs, and I added some improvement
> suggestions below.
> 
> Thanks!
> 
> > ---
> > 
> >  Documentation/virtual/kvm/api.txt |   13 ++
> >  arch/x86/kvm/x86.c|1 
> >  include/linux/kvm.h   |6 +
> >  include/linux/kvm_host.h  |1 
> >  virt/kvm/eventfd.c|  193 
> > -
> >  5 files changed, 210 insertions(+), 4 deletions(-)
> > 
> > diff --git a/Documentation/virtual/kvm/api.txt 
> > b/Documentation/virtual/kvm/api.txt
> > index bf33aaa..87d7321 100644
> > --- a/Documentation/virtual/kvm/api.txt
> > +++ b/Documentation/virtual/kvm/api.txt
> > @@ -1946,6 +1946,19 @@ the guest using the specified gsi pin.  The irqfd is 
> > removed using
> >  the KVM_IRQFD_FLAG_DEASSIGN flag, specifying both kvm_irqfd.fd
> >  and kvm_irqfd.gsi.
> >  
> > +With KVM_CAP_IRQFD_OADN, KVM_IRQFD supports an "On Ack, De-assert &
> > +Notify" option that allows emulation of level-triggered interrupts.
> > +When kvm_irqfd.fd is triggered, the requested gsi is asserted and
> > +remains asserted until interaction with the irqchip indicates the
> > +VM has acknowledged the interrupt, such as an EOI.  On acknoledgement
> > +the gsi is automatically de-asserted and the user is notified via
> > +kvm_irqfd.notifyfd.  The user is then required to re-assert the
> > +interrupt if the associated device still requires service.  To enable
> > +this mode, configure the KVM_IRQFD using the KVM_IRQFD_FLAG_OADN flag
> > +and specify kvm_irqfd.notifyfd.  Note that closing kvm_irqfd.notifyfd
> > +while configured in this mode does not disable the irqfd.  The
> > +KVM_IRQFD_FLAG_OADN flag is only necessary on assignment.
> > +
> >  4.76 KVM_PPC_ALLOCATE_HTAB
> >  
> >  Capability: KVM_CAP_PPC_ALLOC_HTAB
> > diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c
> > index cd98673..fde7b66 100644
> > --- a/arch/x86/kvm/x86.c
> > +++ b/arch/x86/kvm/x86.c
> > @@ -2175,6 +2175,7 @@ int kvm_dev_ioctl_check_extension(long ext)
> > case KVM_CAP_PCI_2_3:
> > case KVM_CAP_KVMCLOCK_CTRL:
> > case KVM_CAP_IRQFD_IRQ_SOURCE_ID:
> > +   case KVM_CAP_IRQFD_OADN:
> > r = 1;
> > break;
> > case KVM_CAP_COALESCED_MMIO:
> > diff --git a/include/linux/kvm.h b/include/linux/kvm.h
> > index ae66b9c..ec0f1d8 100644
> > --- a/include/linux/kvm.h
> > +++ b/include/linux/kvm.h
> > @@ -619,6 +619,7 @@ struct kvm_ppc_smmu_info {
> >  #define KVM_CAP_S390_COW 79
> >  #define KVM_CAP_PPC_ALLOC_HTAB 80
> >  #define KVM_CAP_IRQFD_IRQ_SOURCE_ID 81
> > +#define KVM_CAP_IRQFD_OADN 82
> >  
> >  #ifdef KVM_CAP_IRQ_ROUTING
> >  
> > @@ -684,12 +685,15 @@ struct kvm_xen_hvm_config {
> >  #endif
> >  
> >  #define KVM_IRQFD_FLAG_DEASSIGN (1 << 0)
> > +/* Availabie with KVM_CAP_IRQFD_OADN */
> 
> Need to also explain what it is.

Beyond Documentation/virtual/kvm/api.txt?  I don't see much else getting
documented here.  Or maybe you mean

/* On Ack, De-assert & Notify */

> > +#define KVM_IRQFD_FLAG_OADN (1 << 1)
> >  
> >  struct kvm_irqfd {
> > __u32 fd;
> > __u32 gsi;
> > __u32 flags;
> > -   __u8  pad[20];
> > +   __u32 notifyfd;
> 
> Document that this is only valid with OADN flag.  Might be a good idea
> to rename this to deassert_on_ack_notifyfd or oadn_notifyfd
> to avoid confusion.

I'll add a /* only valid with KVM_IRQFD_FLAG_OADN */

I can change the name if you prefer, but it seems pretty clear to me how
a notifyfd might relate to a "On Ack, De-assert & Notify" irqfd without
pulling longer names into userspace.

> > +   __u8  pad[16];
> >  };
> >  
> >  struct kvm_clock_data {
> > diff --git a/include/linux/kvm_host.h b/include/linux/kvm_host.h
> > index b763230..d502d08 100644
> > --- a/include/linux/kvm_host.h
> > +++ b/include/linux/kvm_host.h
> > @@ -284,6 +284,7 @@ struct kvm {
> > struct {
> > spinlock_tlock;
> > struct list_head  items;
> > +   struct list_head  oadns;
> > } irqfds;
> > struct list_hea

Re: [PATCH 0/4] fat: fix ESTALE errors

2012-08-21 Thread Bastien ROUCARIES

On Tue, Aug 21, 2012 at 8:41 AM, OGAWA Hirofumi
 wrote:
> Namjae Jeon  writes:
>
>> And.. Hi Ogawa.
>> I checked other filesystem about unlink - inode issue. but I found
>> Ext4 have same issue.
>> Although other filesysm is having this issue, Can we think It could be
>> only FAT issue ?
>
> (I assume this issue == orphaned inode issue).
>
> ext* doesn't have this issue. If ext* made orphaned inode, ext* doesn't
> delete inode from inode table until calling iput() from last referencer.
>
> In FAT case, FAT inode is embedded into dir entry. So, if unlinked inode
> (then orphaned inode is detached (fat_detach())), FAT deletes inode (dir
> entry) from dir.

Could be possible to not delete it?

I mean using a special value for this case, mark delete (using 0xe5 as
first character) but put for instance creation month to be egal to 15.

This entry will be therefore be keep and not overwritten by successive
file creation.

At least this solve the file deleted issue (not the rename issue unfortunatly)

Bastien
> OGAWA Hirofumi 
> --
> To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
> the body of a message to majord...@vger.kernel.org
> More majordomo info at  http://vger.kernel.org/majordomo-info.html
> Please read the FAQ at  http://www.tux.org/lkml/
--
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Please read the FAQ at  http://www.tux.org/lkml/

Re: [PATCH v9 1/2] kvm: Use a reserved IRQ source ID for irqfd

2012-08-21 Thread Alex Williamson

On Tue, 2012-08-21 at 23:41 +0300, Michael S. Tsirkin wrote:
> On Tue, Aug 21, 2012 at 02:06:19PM -0600, Alex Williamson wrote:
> > On Tue, 2012-08-21 at 22:58 +0300, Michael S. Tsirkin wrote:
> > > On Tue, Aug 21, 2012 at 01:29:06PM -0600, Alex Williamson wrote:
> > > > KVM_IRQFD currently uses the reserved KVM_USERSPACE_IRQ_SOURCE_ID
> > > > which is also shared with userspace injection methods like
> > > > KVM_IRQ_LINE.  This can cause a conflict if an irqfd triggers on
> > > > a GSI asserted through KVM_IRQ_LINE.
> > > 
> > > What kind of conflict do you envision?  Pls note level interrupts are
> > > unsupported ATM.
> > 
> > If KVM_IRQ_LINE asserts a level interrupt and KVM_IRQFD triggers on the
> > same GSI then the pin is no longer asserted as userspace thinks it is.
> > Do we just chalk this up to userspace error?
> 
> Yes: using a level GSI with current irqfd is a userspace error
> because you can lose interrupts anyway.
> 
> Are edge GSIs affected?

I wouldn't think so.

> > > > Move irqfd to it's own reserved IRQ source ID.  Add a capability for
> > > > userspace to test for this fix.
> > > > 
> > > > Signed-off-by: Alex Williamson 
> > > > ---
> > > > 
> > > >  arch/x86/kvm/x86.c   |3 +++
> > > >  include/linux/kvm.h  |1 +
> > > >  include/linux/kvm_host.h |1 +
> > > >  virt/kvm/eventfd.c   |6 +++---
> > > >  4 files changed, 8 insertions(+), 3 deletions(-)
> > > > 
> > > > diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c
> > > > index 42bce48..cd98673 100644
> > > > --- a/arch/x86/kvm/x86.c
> > > > +++ b/arch/x86/kvm/x86.c
> > > > @@ -2174,6 +2174,7 @@ int kvm_dev_ioctl_check_extension(long ext)
> > > > case KVM_CAP_GET_TSC_KHZ:
> > > > case KVM_CAP_PCI_2_3:
> > > > case KVM_CAP_KVMCLOCK_CTRL:
> > > > +   case KVM_CAP_IRQFD_IRQ_SOURCE_ID:
> > > > r = 1;
> > > > break;
> > > > case KVM_CAP_COALESCED_MMIO:
> > > > @@ -6258,6 +6259,8 @@ int kvm_arch_init_vm(struct kvm *kvm, unsigned 
> > > > long type)
> > > >  
> > > > /* Reserve bit 0 of irq_sources_bitmap for userspace irq source 
> > > > */
> > > > set_bit(KVM_USERSPACE_IRQ_SOURCE_ID, 
> > > > &kvm->arch.irq_sources_bitmap);
> > > > +   /* Reserve bit 1 of irq_sources_bitmap for irqfd irq source */
> > > > +   set_bit(KVM_IRQFD_IRQ_SOURCE_ID, &kvm->arch.irq_sources_bitmap);
> > > >  
> > > > raw_spin_lock_init(&kvm->arch.tsc_write_lock);
> > > >  
> > > > diff --git a/include/linux/kvm.h b/include/linux/kvm.h
> > > > index 2ce09aa..ae66b9c 100644
> > > > --- a/include/linux/kvm.h
> > > > +++ b/include/linux/kvm.h
> > > > @@ -618,6 +618,7 @@ struct kvm_ppc_smmu_info {
> > > >  #define KVM_CAP_PPC_GET_SMMU_INFO 78
> > > >  #define KVM_CAP_S390_COW 79
> > > >  #define KVM_CAP_PPC_ALLOC_HTAB 80
> > > > +#define KVM_CAP_IRQFD_IRQ_SOURCE_ID 81
> > > >  
> > > >  #ifdef KVM_CAP_IRQ_ROUTING
> > > >  
> > > > diff --git a/include/linux/kvm_host.h b/include/linux/kvm_host.h
> > > > index b70b48b..b763230 100644
> > > > --- a/include/linux/kvm_host.h
> > > > +++ b/include/linux/kvm_host.h
> > > > @@ -71,6 +71,7 @@
> > > >  #define KVM_REQ_PMI   17
> > > >  
> > > >  #define KVM_USERSPACE_IRQ_SOURCE_ID0
> > > > +#define KVM_IRQFD_IRQ_SOURCE_ID1
> > > >  
> > > >  struct kvm;
> > > >  struct kvm_vcpu;
> > > 
> > > Above looks fine but I'm not sure why is the below needed.
> > > This changes irqfd behaviour for edge GSIs slightly
> > > in a userspace-visible way. Maybe make it a separate patch
> > > so it can be considered on merits?
> > 
> > Hmm, the above does nothing without the below.
> 
> Yes. But you can use the above with the new irqfds you are adding.

Nope, racy.

> > I thought I was just
> > implementing your idea that IRQFDs should all share a single IRQ source
> > ID...
> 
> Sorry I only meant for level irqfds. You are changing edge here.

Ok, I misunderstood then.

> > why is that no longer a good idea?  Thanks,
> > 
> > Alex
> 
> Maybe it is a good idea. I am just asking for the motivation.

I assumed you were pointing out the level vs edge interaction.  If we
call that a userspace bug, I can just drop this.  Thanks,

Alex

> > > > diff --git a/virt/kvm/eventfd.c b/virt/kvm/eventfd.c
> > > > index 7d7e2aa..2245cfa 100644
> > > > --- a/virt/kvm/eventfd.c
> > > > +++ b/virt/kvm/eventfd.c
> > > > @@ -67,8 +67,8 @@ irqfd_inject(struct work_struct *work)
> > > > struct _irqfd *irqfd = container_of(work, struct _irqfd, 
> > > > inject);
> > > > struct kvm *kvm = irqfd->kvm;
> > > >  
> > > > -   kvm_set_irq(kvm, KVM_USERSPACE_IRQ_SOURCE_ID, irqfd->gsi, 1);
> > > > -   kvm_set_irq(kvm, KVM_USERSPACE_IRQ_SOURCE_ID, irqfd->gsi, 0);
> > > > +   kvm_set_irq(kvm, KVM_IRQFD_IRQ_SOURCE_ID, irqfd->gsi, 1);
> > > > +   kvm_set_irq(kvm, KVM_IRQFD_IRQ_SOURCE_ID, irqfd->gsi, 0);
> > > >  }
> > > >  
> > > >  /*
> > > > @@ -138,7 +138,7 @@ irqfd_wakeup(wait_queue_t *wai

Re: [PATCH v2 0/5] X86/XEN: Merge x86_init.paging.pagetable_setup_start and x86_init.paging.pagetable_setup_done setup functions and document its semantic

2012-08-21 Thread Thomas Gleixner

On Tue, 21 Aug 2012, Attilio Rao wrote:
> Differences with v1:
> - The patch serie is re-arranged in a way that it helps reviews, following
>   a plan by Thomas Gleixner
> - The PVOPS nomenclature is not used as it is not correct
> - The front-end message is adjusted with feedback by Thomas Gleixner,
>   Stefano Stabellini and Konrad Rzeszutek Wilk 

This is much simpler to read and review. Just have a look at the
diffstats of the two series:

 6 files changed,  9 insertions(+),  8 deletions(-)
 6 files changed, 11 insertions(+),  9 deletions(-)
 5 files changed, 50 insertions(+),  2 deletions(-)
 6 files changed,  2 insertions(+), 65 deletions(-)
 1 files changed,  5 insertions(+),  0 deletions(-)

versus

 6 files changed, 10 insertions(+),  9 deletions(-)
 6 files changed, 11 insertions(+), 11 deletions(-)
 5 files changed,  3 insertions(+),  3 deletions(-)
 6 files changed,  4 insertions(+), 20 deletions(-)
 1 files changed,  5 insertions(+),  0 deletions(-)

The overall result is basically the same, but it's way simpler to look
at obvious and well done patches than checking whether a subtle copy
and paste bug happened in 3/5 of the first version. Copy and paste is
the #1 cause for subtle bugs. :)

I'm waiting for the ack of Xen folks before taking it into tip.

Thanks for following up!

tglx
--
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Please read the FAQ at  http://www.tux.org/lkml/

[PATCH] strings: helper for maximum decimal encoding of an unsigned integer

2012-08-21 Thread J. Bruce Fields

From: "J. Bruce Fields" 

I've seen a couple examples recently where we've gotten this wrong.
Maybe something like this would help?  Is there some better way?

(Approximation due to Jim Rees).

Signed-off-by: J. Bruce Fields 
---
 include/linux/string.h |6 ++
 net/sunrpc/cache.c |2 +-
 2 files changed, 7 insertions(+), 1 deletion(-)

diff --git a/include/linux/string.h b/include/linux/string.h
index ffe0442..d4809b7 100644
--- a/include/linux/string.h
+++ b/include/linux/string.h
@@ -126,6 +126,12 @@ extern void argv_free(char **argv);
 extern bool sysfs_streq(const char *s1, const char *s2);
 extern int strtobool(const char *s, bool *res);
 
+/*
+ * length of the decimal representation of an unsigned integer.  Just an
+ * approximation, but it's right for types of size 1 to 36 bytes:
+ */
+#define base10len(i) (sizeof(i) * 24 / 10 + 1)
+
 #ifdef CONFIG_BINARY_PRINTF
 int vbin_printf(u32 *bin_buf, size_t size, const char *fmt, va_list args);
 int bstr_printf(char *buf, size_t size, const char *fmt, const u32 *bin_buf);
diff --git a/net/sunrpc/cache.c b/net/sunrpc/cache.c
index 2afd2a8..1dcd2b3 100644
--- a/net/sunrpc/cache.c
+++ b/net/sunrpc/cache.c
@@ -1409,7 +1409,7 @@ static ssize_t read_flush(struct file *file, char __user 
*buf,
  size_t count, loff_t *ppos,
  struct cache_detail *cd)
 {
-   char tbuf[20];
+   char tbuf[base10len(long) + 2];
unsigned long p = *ppos;
size_t len;
 
-- 
1.7.9.5

--
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Please read the FAQ at  http://www.tux.org/lkml/

Re: [PATCH] task_work: add a scheduling point in task_work_run()

2012-08-21 Thread Eric Dumazet

On Tue, 2012-08-21 at 16:37 -0400, Mimi Zohar wrote:

> We're here, because fput() called schedule_work() to delay the last
> fput().  The execution needs to take place before the syscall returns to
> userspace.  Need to read __schedule()...  Do you know if cond_resched()
> can guarantee that it will be executed before the return to userspace? 

Some clarifications : 

- fput() does not call schedule_work() in this case but task_work_add()

- cond_resched() wont return to userspace.


--
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Please read the FAQ at  http://www.tux.org/lkml/

Re: [PATCH] strings: helper for maximum decimal encoding of an unsigned integer

2012-08-21 Thread Jim Rees

J. Bruce Fields wrote:

  From: "J. Bruce Fields" 
  
  I've seen a couple examples recently where we've gotten this wrong.
  Maybe something like this would help?  Is there some better way?
  
  (Approximation due to Jim Rees).

Please add Suggested-by: Jim Rees .  I'm thinking of
patenting the algorithm.
--
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Please read the FAQ at  http://www.tux.org/lkml/

Re: [PATCH v2 1/4] pinctrl: add samsung pinctrl and gpiolib driver

2012-08-21 Thread Stephen Warren

On 08/21/2012 05:25 AM, Linus Walleij wrote:
> On Wed, Aug 15, 2012 at 9:57 PM, Thomas Abraham
>  wrote:
> 
>> Add a new device tree enabled pinctrl and gpiolib driver for Samsung
>> SoC's.
...
>> +  The child node can also optionally specify one or more of the pin
>> +  configuration that should be applied on all the pins listed in the
>> +  "samsung,pins" property of the child node. The following pin configuration
>> +  properties are supported.
>> +
>> +  - samsung,pin-pud: Pull up/down configuration.
>> +  - samsung,pin-drv: Drive strength configuration.
>> +  - samsung,pin-pud-pdn: Pull up/down configuration in power down mode.
>> +  - samsung,pin-drv-pdn: Drive strength configuration in power down mode.
> 
> This looks a bit scary, as it seems to be orthogonal to the pin config
> interface. I.e. this will be programmed "behind the back" of the
> pin config system. However as long as the pin config implementation
> reads back these things from the registers it will work, too.
> 
> In the U300 and Ux500 I explicitly use pin config hogs to set up
> the pin configuration, and when we enter a state such as
> "default" the mux setting and config settings are set from the
> framework separately.

I know that some HW has a separate set of registers (or fields) for the
"awake" and "sleep" configuration, and the HW switches between the two
automatically when sleeping. I have no idea if the Samsung SoCs do this,
but I think if this were the case, it'd be quite legitimate to define
both these HW states as separate sets of properties within a single
pinctrl SW state. So, that might be the explanation here?

--
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Please read the FAQ at  http://www.tux.org/lkml/

Re: [PATCH v6 3/4] cgroup: add xattr support

2012-08-21 Thread Lennart Poettering


Heya,

(sorry for the late reply)

On 16.08.2012 22:00, Tejun Heo wrote:

On Thu, Aug 16, 2012 at 01:44:56PM -0400, a...@redhat.com wrote:



Attaching meta information to services, in an easily discoverable
way. For example, in systemd we create one cgroup for each service, and
could then store data like the main pid of the specific service as an
xattr on the cgroup itself. That way we'd have almost all service state
in the cgroupfs, which would make it possible to terminate systemd and
later restart it without losing any state information. But there's more:
for example, some very peculiar services cannot be terminated on
shutdown (i.e. fakeraid DM stuff) and it would be really nice if the
services in question could just mark that on their cgroup, by setting an
xattr. On the more desktopy side of things there are other
possibilities: for example there are plans defining what an application
is along the lines of a cgroup (i.e. an app being a collection of
processes). With xattrs one could then attach an icon or human readable
program name on the cgroup.

The key idea is that this would allow attaching runtime meta information
to cgroups and everything they model (services, apps, vms), that doesn't
need any complex userspace infrastructure, has good access control
(i.e. because the file system enforces that anyway, and there's the
"trusted." xattr namespace), notifications (inotify), and can easily be
shared among applications.




I'm not against this but unsure whether using kmem is enough for the
suggested use case.  Lennart, would this suit systemd?  How much
metadata are we talking about?


Just small things, like values, PIDs, i.e. a few 100 bytes or so per 
cgroup should be more than sufficient for our needs.


Lennart
--
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Please read the FAQ at  http://www.tux.org/lkml/

Re: [PATCH 0/5] Memory policy corruption fixes V2

2012-08-21 Thread Andi Kleen

> I tested this with trinity with CONFIG_DEBUG_SLAB enabled and it passed. I
> did not test LTP such as Josh reported a problem with or with a database that
> used shared policies like Andi tested. The series is almost all Kosaki's
> work of course. If he has a revised series that simply got delayed in
> posting it should take precedence.

Initial tests of this patchkit look with a test programgood, full database 
tests 
are still pending.

-Andi
-- 
a...@linux.intel.com -- Speaking for myself only
--
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Please read the FAQ at  http://www.tux.org/lkml/

Re: [PATCH v6 3/4] cgroup: add xattr support

2012-08-21 Thread Tejun Heo

Hello,

On Tue, Aug 21, 2012 at 11:43:44PM +0200, Lennart Poettering wrote:
> >I'm not against this but unsure whether using kmem is enough for the
> >suggested use case.  Lennart, would this suit systemd?  How much
> >metadata are we talking about?
> 
> Just small things, like values, PIDs, i.e. a few 100 bytes or so per
> cgroup should be more than sufficient for our needs.

Alright, then.  I think there's gonna be one more round to address
Hugh's comments.  Hugh, how should this be routed?  Is there some git
branch that tmpfs changes can go in so that cgroup tree can pull?

Thanks.

-- 
tejun
--
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Please read the FAQ at  http://www.tux.org/lkml/

Re: [PATCH v2 06/11] memcg: kmem controller infrastructure

2012-08-21 Thread Greg Thelen

On Thu, Aug 09 2012, Glauber Costa wrote:

> This patch introduces infrastructure for tracking kernel memory pages to
> a given memcg. This will happen whenever the caller includes the flag
> __GFP_KMEMCG flag, and the task belong to a memcg other than the root.
>
> In memcontrol.h those functions are wrapped in inline accessors.  The
> idea is to later on, patch those with static branches, so we don't incur
> any overhead when no mem cgroups with limited kmem are being used.
>
> [ v2: improved comments and standardized function names ]
>
> Signed-off-by: Glauber Costa 
> CC: Christoph Lameter 
> CC: Pekka Enberg 
> CC: Michal Hocko 
> CC: Kamezawa Hiroyuki 
> CC: Johannes Weiner 
> ---
>  include/linux/memcontrol.h |  79 +++
>  mm/memcontrol.c| 185 
> +
>  2 files changed, 264 insertions(+)
>
> diff --git a/include/linux/memcontrol.h b/include/linux/memcontrol.h
> index 8d9489f..75b247e 100644
> --- a/include/linux/memcontrol.h
> +++ b/include/linux/memcontrol.h
> @@ -21,6 +21,7 @@
>  #define _LINUX_MEMCONTROL_H
>  #include 
>  #include 
> +#include 
>  
>  struct mem_cgroup;
>  struct page_cgroup;
> @@ -399,6 +400,11 @@ struct sock;
>  #ifdef CONFIG_MEMCG_KMEM
>  void sock_update_memcg(struct sock *sk);
>  void sock_release_memcg(struct sock *sk);
> +
> +#define memcg_kmem_on 1
> +bool __memcg_kmem_new_page(gfp_t gfp, void *handle, int order);
> +void __memcg_kmem_commit_page(struct page *page, void *handle, int order);
> +void __memcg_kmem_free_page(struct page *page, int order);
>  #else
>  static inline void sock_update_memcg(struct sock *sk)
>  {
> @@ -406,6 +412,79 @@ static inline void sock_update_memcg(struct sock *sk)
>  static inline void sock_release_memcg(struct sock *sk)
>  {
>  }
> +
> +#define memcg_kmem_on 0
> +static inline bool
> +__memcg_kmem_new_page(gfp_t gfp, void *handle, int order)
> +{
> + return false;
> +}
> +
> +static inline void  __memcg_kmem_free_page(struct page *page, int order)
> +{
> +}
> +
> +static inline void
> +__memcg_kmem_commit_page(struct page *page, struct mem_cgroup *handle, int 
> order)
> +{
> +}
>  #endif /* CONFIG_MEMCG_KMEM */
> +
> +/**
> + * memcg_kmem_new_page: verify if a new kmem allocation is allowed.
> + * @gfp: the gfp allocation flags.
> + * @handle: a pointer to the memcg this was charged against.
> + * @order: allocation order.
> + *
> + * returns true if the memcg where the current task belongs can hold this
> + * allocation.
> + *
> + * We return true automatically if this allocation is not to be accounted to
> + * any memcg.
> + */
> +static __always_inline bool
> +memcg_kmem_new_page(gfp_t gfp, void *handle, int order)
> +{
> + if (!memcg_kmem_on)
> + return true;
> + if (!(gfp & __GFP_KMEMCG) || (gfp & __GFP_NOFAIL))
> + return true;
> + if (in_interrupt() || (!current->mm) || (current->flags & PF_KTHREAD))
> + return true;
> + return __memcg_kmem_new_page(gfp, handle, order);
> +}
> +
> +/**
> + * memcg_kmem_free_page: uncharge pages from memcg
> + * @page: pointer to struct page being freed
> + * @order: allocation order.
> + *
> + * there is no need to specify memcg here, since it is embedded in 
> page_cgroup
> + */
> +static __always_inline void
> +memcg_kmem_free_page(struct page *page, int order)
> +{
> + if (memcg_kmem_on)
> + __memcg_kmem_free_page(page, order);
> +}
> +
> +/**
> + * memcg_kmem_commit_page: embeds correct memcg in a page
> + * @handle: a pointer to the memcg this was charged against.
> + * @page: pointer to struct page recently allocated
> + * @handle: the memcg structure we charged against
> + * @order: allocation order.
> + *
> + * Needs to be called after memcg_kmem_new_page, regardless of success or
> + * failure of the allocation. if @page is NULL, this function will revert the
> + * charges. Otherwise, it will commit the memcg given by @handle to the
> + * corresponding page_cgroup.
> + */
> +static __always_inline void
> +memcg_kmem_commit_page(struct page *page, struct mem_cgroup *handle, int 
> order)
> +{
> + if (memcg_kmem_on)
> + __memcg_kmem_commit_page(page, handle, order);
> +}
>  #endif /* _LINUX_MEMCONTROL_H */
>  
> diff --git a/mm/memcontrol.c b/mm/memcontrol.c
> index 54e93de..e9824c1 100644
> --- a/mm/memcontrol.c
> +++ b/mm/memcontrol.c
> @@ -10,6 +10,10 @@
>   * Copyright (C) 2009 Nokia Corporation
>   * Author: Kirill A. Shutemov
>   *
> + * Kernel Memory Controller
> + * Copyright (C) 2012 Parallels Inc. and Google Inc.
> + * Authors: Glauber Costa and Suleiman Souhlal
> + *
>   * This program is free software; you can redistribute it and/or modify
>   * it under the terms of the GNU General Public License as published by
>   * the Free Software Foundation; either version 2 of the License, or
> @@ -434,6 +438,9 @@ struct mem_cgroup *mem_cgroup_from_css(struct 
> cgroup_subsys_state *s)
>  #include 
>  
>  static bool mem_cgroup_is_ro

[PATCH] perf: do not flush maps on COMM for perf report

2012-08-21 Thread Luigi Semenzato

This fixes a long-standing bug caused by the lack of separate
COMM and EXEC record types, which makes "perf report" lose
track of symbols when a process renames itself.

With this fix (suggested by Stephane Eranian), a COMM (rename)
no longer flushes the maps, which is the correct behavior.
An EXEC also no longer flushes the maps, but this doesn't
matter because as new mappings are created (for the executable
and the libraries) the old mappings are automatically removed.
This is not by accident: the functionality is necessary because
DLLs can be explicitly loaded at any time with dlopen(),
possibly on top of existing text, so "perf report" handles
correctly the clobbering of new mappings on top of old ones.

An alternative patch (which I proposed earlier) would be to
introduce a separate PERF_RECORD_EXEC type, but it is a much
larger change (about 300 lines) and is not necessary.

Signed-off-by: Luigi Semenzato 
---
 tools/perf/util/thread.c |1 -
 1 files changed, 0 insertions(+), 1 deletions(-)

diff --git a/tools/perf/util/thread.c b/tools/perf/util/thread.c
index fb4b7ea..8b3e593 100644
--- a/tools/perf/util/thread.c
+++ b/tools/perf/util/thread.c
@@ -39,7 +39,6 @@ int thread__set_comm(struct thread *self, const char *comm)
err = self->comm == NULL ? -ENOMEM : 0;
if (!err) {
self->comm_set = true;
-   map_groups__flush(&self->mg);
}
return err;
 }
-- 
1.7.7.3

--
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Please read the FAQ at  http://www.tux.org/lkml/

Re: [PATCH] ide: fix generic_ide_suspend/resume Oops

2012-08-21 Thread David Miller

From: Miklos Szeredi 
Date: Tue, 21 Aug 2012 17:20:30 +0200

> From: Miklos Szeredi 
> 
> This patch fixes a regresion introduced by commit 0998d063 (device-core: 
> Ensure
> drvdata = NULL when no driver is bound).
> 
> Suspend oopses in generic_ide_suspend() because dev_get_drvdata()
> returns NULL (dev->p->driver_data == NULL) and this function is not
> prepared for this.
> 
> Fix is based on Alan Stern's suggestion.
> 
> Signed-off-by: Miklos Szeredi 
> Acked-by: Rafael J. Wysocki 

Applied, thanks.
--
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Please read the FAQ at  http://www.tux.org/lkml/

Re: [MMTests] dbench4 async on ext3

2012-08-21 Thread Jan Kara

On Mon 23-07-12 22:21:46, Mel Gorman wrote:
> Configuration:global-dhp__io-dbench4-async-ext3
> Result:   
> http://www.csn.ul.ie/~mel/postings/mmtests-20120424/global-dhp__io-dbench4-async-ext3
> Benchmarks:   dbench4
> 
> Summary
> ===
> 
> In general there was a massive drop in throughput after 3.0. Very broadly
> speaking it looks like the Read operation got faster but at the cost of
> a big regression in the Flush operation.
  Mel, I had a look into this and it's actually very likely only a
configuration issue. In 3.1 ext3 started to default to enabled barriers
(barrier=1 in mount options) which is a safer but slower choice. When I set
barriers explicitely, I see no performance difference for dbench4 between
3.0 and 3.1.

Honza
-- 
Jan Kara 
SUSE Labs, CR
--
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Please read the FAQ at  http://www.tux.org/lkml/

Re: [PATCH v2 17/31] arm64: System calls handling

2012-08-21 Thread Catalin Marinas

On Tue, Aug 21, 2012 at 09:14:01PM +0100, Arnd Bergmann wrote:
> On Tuesday 21 August 2012, Catalin Marinas wrote:
> > > > +asmlinkage long sys_mmap(unsigned long addr, unsigned long len,
> > > > +unsigned long prot, unsigned long flags,
> > > > +unsigned long fd, off_t off)
> > > > +{
> > > > +   if (offset_in_page(off) != 0)
> > > > +   return -EINVAL;
> > > > +
> > > > +   return sys_mmap_pgoff(addr, len, prot, flags, fd, off >> 
> > > > PAGE_SHIFT);
> > > > +}
> > > 
> > > I think
> > > 
> > > #define sys_mmap sys_mmap_pgoff 
> > 
> > There are slightly different semantics with the last argument of
> > sys_mmap() which takes a byte offset. The sys_mmap_pgoff() function
> > takes the offset shifted by PAGE_SHIFT (which is the same as sys_mmap2).
> > 
> > Looking at the other architectures, it makes sense to use a generic
> > sys_mmap() implementation similar to the one above (or the ia-64, seems
> > to be the most complete).
> 
> Why that? The generic sys_mmap_pgoff was specifically added so new 
> architectures
> could just use that instead of having their own wrappers, see f8b72560.

As I understand, sys_mmap_pgoff can be used instead of sys_mmap2 on new
32-bit architectures. But on 64-bit architectures we don't have
sys_mmap2, only sys_mmap with the difference that the last argument is
the offset in bytes (and multiple of PAGE_SIZE) rather than in pages. So
unless we change the meaning of this last argument for sys_mmap, we
cannot just define it to sys_mmap_pgoff.

Since the other 64-bit architectures seem to have a sys_mmap wrapper
that does this:

sys_mmap_pgoff(..., off >> PAGE_SHIFT);

I think AArch64 should also use the same sys_mmap convention. We can
make this wrapper generic.

-- 
Catalin
--
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Please read the FAQ at  http://www.tux.org/lkml/

Re: [RFC] Move kfree outside pde_unload_lock

2012-08-21 Thread Alexey Dobriyan

On Tue, Aug 21, 2012 at 03:54:54PM -0500, Nathan Zimmer wrote:
> I am currently tracking a hotlock reported by a customer on a large, 512 
> cores,
> system, I am currently running 3.6.0 rc1 but the issue looks like it has been
> this way for a very long time.
> The offending lock is proc_dir_entry->pde_unload_lock.  
> 
> In proc_reg_release we are doing a kfree under the spinlock which is ok but it
> means we are holding the lock longer then required. Scaling improved when I 
> moved kfree out.

It's OK to move it out.
Acked-by: Alexey Dobriyan 

> Also shouldn't the comment on pde_unload_lock also note that pde_openers and 
> pde_unload_completion are both used under the lock?

Yeah, why not.

> --- a/fs/proc/inode.c
> +++ b/fs/proc/inode.c
> @@ -403,9 +403,11 @@ static int proc_reg_release(struct inode *inode, struct 
> file *file)
>   release = pde->proc_fops->release;
>   if (pdeo) {
>   list_del(&pdeo->lh);
> - kfree(pdeo);
>   }
>   spin_unlock(&pde->pde_unload_lock);
> + if (pdeo) {
> + kfree(pdeo);
> + }
>  
>   if (release)
>   rv = release(inode, file);
--
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Please read the FAQ at  http://www.tux.org/lkml/

Re: [PATCH] strings: helper for maximum decimal encoding of an unsigned integer

2012-08-21 Thread Al Viro

On Tue, Aug 21, 2012 at 05:22:27PM -0400, Jim Rees wrote:
> J. Bruce Fields wrote:
> 
>   From: "J. Bruce Fields" 
>   
>   I've seen a couple examples recently where we've gotten this wrong.
>   Maybe something like this would help?  Is there some better way?
>   
>   (Approximation due to Jim Rees).
> 
> Please add Suggested-by: Jim Rees .  I'm thinking of
> patenting the algorithm.

Is that a joke?  Patenting the fact that log10(256) is 2.408..., which
is about 2.4, which is 24/10?  I really hope we are Poe'd...  BTW, NAK
the comment - s/36/26/ in there; check it yourself -
$ echo '2^(8*27)' | bc
105312291668557186697918027683670432318895095400549111254310977536
which is 66-digit, not 65 as the estimate would be.
--
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Please read the FAQ at  http://www.tux.org/lkml/

Re: [PATCHv3 3/9] serial: vt8500: Add devicetree support for vt8500-serial

2012-08-21 Thread Alan Cox

On Wed, 22 Aug 2012 08:47:32 +1200
Tony Prisk  wrote:

> Signed-off-by: Tony Prisk 
> ---
>  drivers/tty/serial/vt8500_serial.c |   37 
> 
>  1 file changed, 33 insertions(+), 4 deletions(-)

Can we have a comment attached to a change this size. In particular one
describing why it gone from 4 to 6 ports, and why the port id twiddling.

Is there a reason you can't use the device tree port id ?

What are the regression risks for existing users expecting the pdev->id
binding ?
--
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Please read the FAQ at  http://www.tux.org/lkml/

Re: [PATCH] strings: helper for maximum decimal encoding of an unsigned integer

2012-08-21 Thread J. Bruce Fields

On Tue, Aug 21, 2012 at 11:06:13PM +0100, Al Viro wrote:
> On Tue, Aug 21, 2012 at 05:22:27PM -0400, Jim Rees wrote:
> > J. Bruce Fields wrote:
> > 
> >   From: "J. Bruce Fields" 
> >   
> >   I've seen a couple examples recently where we've gotten this wrong.
> >   Maybe something like this would help?  Is there some better way?
> >   
> >   (Approximation due to Jim Rees).
> > 
> > Please add Suggested-by: Jim Rees .  I'm thinking of
> > patenting the algorithm.
> 
> Is that a joke?  Patenting the fact that log10(256) is 2.408..., which
> is about 2.4, which is 24/10?  I really hope we are Poe'd...  BTW, NAK
> the comment - s/36/26/ in there; check it yourself -
> $ echo '2^(8*27)' | bc
> 105312291668557186697918027683670432318895095400549111254310977536
> which is 66-digit, not 65 as the estimate would be.

Erp, you're right.

Anyway, does something like base10len(type) seem reasonable?  Or define
macros that enumerate the sizes?  (ULONG_STR_MAX or something?)

--b.
--
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Please read the FAQ at  http://www.tux.org/lkml/

[PATCH 0/2] console_lock debug improvements

2012-08-21 Thread Daniel Vetter

Hi all,

After Dave Airlie blew through a few days to track down a deadlock at boot-up
when handing over from the firmware fb to the kms/drm framebuffer driver (1), 
I've
figured that lockdep /should/ have caught this.

And indeed, by adding proper annotations to the console_lock it complains about
the potential deadlock when exercising the entire driver life-cycle of just one
fb driver (i.e. not even a handover required). While at it, I've replaced the
existing in_interrupt check with the more paranoid might_sleep.

Comments, flames and review highly welcome.

Yours, Daniel

[1]: https://lkml.org/lkml/2012/8/21/36

Daniel Vetter (2):
  console: use might_sleep in console_lock
  console: implement lockdep support for console_lock

 kernel/printk.c |   12 +++-
 1 file changed, 11 insertions(+), 1 deletion(-)

-- 
1.7.10.4

--
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Please read the FAQ at  http://www.tux.org/lkml/

[PATCH 2/2] console: implement lockdep support for console_lock

2012-08-21 Thread Daniel Vetter

Dave Airlie recently discovered a locking bug in the fbcon layer,
where a timer_del_sync (for the blinking cursor) deadlocks with the
timer itself, since both (want to) hold the console_lock:

https://lkml.org/lkml/2012/8/21/36

Unfortunately the console_lock isn't a plain mutex and hence has no
lockdep support. Which resulted in a few days wasted of tracking down
this bug (complicated by the fact that printk doesn't show anything
when the console is locked) instead of noticing the bug much earlier
with the lockdep splat.

Hence I've figured I need to fix that for the next deadlock involving
console_lock - and with kms/drm growing ever more complex locking
that'll eventually happen.

Now the console_lock has rather funky semantics, so after a quick irc
discussion with Thomas Gleixner and Dave Airlie I've quickly ditched
the original idead of switching to a real mutex (since it won't work)
and instead opted to annotate the console_lock with lockdep
information manually.

There are a few special cases:
- The console_lock state is protected by the console_sem, and usually
  grabbed/dropped at _lock/_unlock time. But the suspend/resume code
  drops the semaphore without dropping the console_lock (see
  suspend_console/resume_console). But since the same thread that did
  the suspend will do the resume, we don't need to fix up anything.

- In the printk code there's a special trylock, only used to kick off
  the logbuffer printk'ing in console_unlock. But all that happens
  while lockdep is disable (since printk does a few other evil
  tricks). So no issue there, either.

- The console_lock can also be acquired form irq context (but only
  with a trylock). lockdep already handles that.

This all leaves us with annotating the normal console_lock, _unlock
and _trylock functions.

And yes, it works - simply unloading a drm kms driver resulted in
lockdep complaining about the deadlock in fbcon_deinit:

==
[ INFO: possible circular locking dependency detected ]
3.6.0-rc2+ #552 Not tainted
---
kms-reload/3577 is trying to acquire lock:
 ((&info->queue)){+.+...}, at: [] wait_on_work+0x0/0xa7

but task is already holding lock:
 (console_lock){+.+.+.}, at: [] bind_con_driver+0x38/0x263

which lock already depends on the new lock.

the existing dependency chain (in reverse order) is:

-> #1 (console_lock){+.+.+.}:
   [] lock_acquire+0x95/0x105
   [] console_lock+0x59/0x5b
   [] fb_flashcursor+0x2e/0x12c
   [] process_one_work+0x1d9/0x3b4
   [] worker_thread+0x1a7/0x24b
   [] kthread+0x7f/0x87
   [] kernel_thread_helper+0x4/0x10

-> #0 ((&info->queue)){+.+...}:
   [] __lock_acquire+0x999/0xcf6
   [] lock_acquire+0x95/0x105
   [] wait_on_work+0x3b/0xa7
   [] __cancel_work_timer+0xbf/0x102
   [] cancel_work_sync+0xb/0xd
   [] fbcon_deinit+0x11c/0x1dc
   [] bind_con_driver+0x145/0x263
   [] unbind_con_driver+0x14f/0x195
   [] store_bind+0x1ad/0x1c1
   [] dev_attr_store+0x13/0x1f
   [] sysfs_write_file+0xe9/0x121
   [] vfs_write+0x9b/0xfd
   [] sys_write+0x3e/0x6b
   [] system_call_fastpath+0x16/0x1b

other info that might help us debug this:

 Possible unsafe locking scenario:

   CPU0CPU1
   
  lock(console_lock);
   lock((&info->queue));
   lock(console_lock);
  lock((&info->queue));

 *** DEADLOCK ***

Cc: Dave Airlie 
Cc: Thomas Gleixner 
Signed-off-by: Daniel Vetter 
---
 kernel/printk.c |9 +
 1 file changed, 9 insertions(+)

diff --git a/kernel/printk.c b/kernel/printk.c
index ed9af6a..ab2ab24 100644
--- a/kernel/printk.c
+++ b/kernel/printk.c
@@ -87,6 +87,12 @@ static DEFINE_SEMAPHORE(console_sem);
 struct console *console_drivers;
 EXPORT_SYMBOL_GPL(console_drivers);
 
+#ifdef CONFIG_LOCKDEP
+struct lockdep_map console_lock_dep_map = {
+   .name = "console_lock"
+};
+#endif
+
 /*
  * This is used for debugging the mess that is the VT code by
  * keeping track if we have the console semaphore held. It's
@@ -1916,6 +1922,7 @@ void console_lock(void)
return;
console_locked = 1;
console_may_schedule = 1;
+   mutex_acquire(&console_lock_dep_map, 0, 0, _RET_IP_);
 }
 EXPORT_SYMBOL(console_lock);
 
@@ -1937,6 +1944,7 @@ int console_trylock(void)
}
console_locked = 1;
console_may_schedule = 0;
+   mutex_acquire(&console_lock_dep_map, 0, 1, _RET_IP_);
return 1;
 }
 EXPORT_SYMBOL(console_trylock);
@@ -2097,6 +2105,7 @@ skip:
local_irq_restore(flags);
}
console_locked = 0;
+   mutex_release(&console_lock_dep_map, 1, _RET_IP_);
 
/* Release the exclusive_console once it is used */
if (unlikely(exclusive_console))
-- 
1.7.10.4

--
To unsubscribe from this list: send the line "unsubscribe linux-kern

[PATCH 1/2] console: use might_sleep in console_lock

2012-08-21 Thread Daniel Vetter

Instead of BUG_ON(in_interrupt()), since that doesn't check for all
the newfangled stuff like preempt.

Note that this is valid since the console_sem is essentially used like
a real mutex with only two twists:
- we allow trylock from hardirq context
- across suspend/resume we lock the logical console_lock, but drop the
  semaphore protecting the locking state.

Now that doesn't guarantee that no one is playing tricks in
single-thread atomic contexts at suspend/resume/boot time, but
- I couldn't find anything suspicious with some grepping,
- might_sleep shouldn't die,
- and I think the upside of catching more potential issues is worth
  the risk of getting a might_sleep backtrace that would have been
  save (and then dealing with that fallout).

Cc: Dave Airlie 
Cc: Thomas Gleixner 
Signed-off-by: Daniel Vetter 
---
 kernel/printk.c |3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

diff --git a/kernel/printk.c b/kernel/printk.c
index 66a2ea3..ed9af6a 100644
--- a/kernel/printk.c
+++ b/kernel/printk.c
@@ -1909,7 +1909,8 @@ static int __cpuinit console_cpu_notify(struct 
notifier_block *self,
  */
 void console_lock(void)
 {
-   BUG_ON(in_interrupt());
+   might_sleep();
+
down(&console_sem);
if (console_suspended)
return;
-- 
1.7.10.4

--
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Please read the FAQ at  http://www.tux.org/lkml/

Re: [RFC PATCH 2/2] mm: Batch page_check_references in shrink_page_list sharing the same i_mmap_mutex

2012-08-21 Thread Tim Chen

On Tue, 2012-08-21 at 09:21 -0400, Matthew Wilcox wrote:

> 
> The only clunky bit would seem to be this bit:
> 
> > if (page_mapped(page) && mapping) {
> > -   switch (try_to_unmap(page, TTU_UNMAP)) {
> > +   switch (try_to_unmap(page, TTU_UNMAP,
> > +   mmap_mutex_locked)) {
> 
> Which I think has to look like this:
> 
>   if (page_mapped(page) && mapping) {
> - switch (try_to_unmap(page, TTU_UNMAP)) {
> + int result;
> + if (i_mmap_mutex)
> + result = __try_to_unmap(page, TTU_UNMAP);
> + else
> + result = try_to_unmap(page, TTU_UNMAP);
> + switch (result) {
> 

I think

-   switch (try_to_unmap(page, TTU_UNMAP)) {
+   switch (__try_to_unmap(page, TTU_UNMAP)) {

should be enough when your changes are adopted.  Because if the page
mmap mutex needs to be locked, we will have locked it here before
__try_to_unmap gets used.  

+   if (needs_page_mmap_mutex(page) &&
+   i_mmap_mutex != &page->mapping->i_mmap_mutex) {
+   if (i_mmap_mutex)
+   mutex_unlock(i_mmap_mutex);
+   i_mmap_mutex = &page->mapping->i_mmap_mutex;
+   mutex_lock(i_mmap_mutex);
+   }


Tim

--
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Please read the FAQ at  http://www.tux.org/lkml/

Re: [PATCH RT 2/2] fix printk flush of messages

2012-08-21 Thread Frank Rowand

added recipients...

On 08/21/12 07:30, Michael Thalmeier wrote:
> Frank Rowand  am.sony.com> writes:
> 
>>
>>
>> Updates console-make-rt-friendly.patch
>>
>> #ifdef CONFIG_PREEMPT_RT_FULL, printk() output is never flushed by
>> printk() because:
>> ...
>>
>> On system boot some printk() output is flushed because register_console()
>> and tty_open() call console_unlock().
>>
>> This change also fixes the problem that was previously fixed by
>> preempt-rt-allow-immediate-magic-sysrq-output-for-preempt_rt_full.patch
>>
>> Signed-off-by: Frank Rowand  am.sony.com>
>>
>> ---
>>  kernel/printk.c |2  1 + 1 - 0 !
>>  1 file changed, 1 insertion(+), 1 deletion(-)
>>
>> Index: b/kernel/printk.c
>> ===
>> --- a/kernel/printk.c
>> +++ b/kernel/printk.c
>> @@ -847,7 +847,7 @@ static int console_trylock_for_printk(un
>>  int retval = 0, wake = 0;
>>  #ifdef CONFIG_PREEMPT_RT_FULL
>>  int lock = !early_boot_irqs_disabled && !irqs_disabled_flags(flags) &&
>> -!preempt_count();
>> +(preempt_count() <= 1);
>>  #else
>>  int lock = 1;
>>  #endif
>>
>>
> 
> I have seen that this patch is applied in the 3.4 stable rt series.
> As we are using the 3.0 stable rt kernel I have tested this patch on this
> kernel series (on a Freescale i.MX31 based board) and have not found any
> problems so far.
> Is there something I have missed why this patch has not found its way
> in the 3.0 series ?
> 
> Thanks in advance,
> Michael

--
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Please read the FAQ at  http://www.tux.org/lkml/

[PATCH] tools lib traceevent: Modify header to work in C++ programs

2012-08-21 Thread Steven Rostedt


Arnaldo,

As the libtraceevent library is also used by powertop, and that is
written in *cough* C++ *cough*, we need to make sure that the headers do
not have any C++ reserved words. Please apply this patch. Thanks.

-- Steve


Steven Rostedt (1):
  tools lib traceevent: Modify header to work in C++ programs


 tools/lib/traceevent/event-parse.h |4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)
---
commit 79f481dbd7cb14e33113180b74e09e742cd839f9
Author: Steven Rostedt 
Date:   Sat May 7 19:04:01 2011 -0400

tools lib traceevent: Modify header to work in C++ programs

Replace keyword "private" from event-parse.h to allow it to be
used in C++ programs.

Signed-off-by: Steven Rostedt 

diff --git a/tools/lib/traceevent/event-parse.h 
b/tools/lib/traceevent/event-parse.h
index 5772ad8..c28713b 100644
--- a/tools/lib/traceevent/event-parse.h
+++ b/tools/lib/traceevent/event-parse.h
@@ -49,7 +49,7 @@ struct pevent_record {
int cpu;
int ref_count;
int locked; /* Do not free, even if 
ref_count is zero */
-   void*private;
+   void*r_private;
 #if DEBUG_RECORD
struct pevent_record*prev;
struct pevent_record*next;
@@ -106,7 +106,7 @@ struct plugin_option {
char*plugin_alias;
char*description;
char*value;
-   void*private;
+   void*p_private;
int set;
 };
 


--
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Please read the FAQ at  http://www.tux.org/lkml/

Re: [PATCH v6 3/4] cgroup: add xattr support

2012-08-21 Thread Hugh Dickins

On Tue, 21 Aug 2012, Tejun Heo wrote:
> On Tue, Aug 21, 2012 at 11:43:44PM +0200, Lennart Poettering wrote:
> > >I'm not against this but unsure whether using kmem is enough for the
> > >suggested use case.  Lennart, would this suit systemd?  How much
> > >metadata are we talking about?
> > 
> > Just small things, like values, PIDs, i.e. a few 100 bytes or so per
> > cgroup should be more than sufficient for our needs.

That is reasonable.

> 
> Alright, then.  I think there's gonna be one more round to address
> Hugh's comments.  Hugh, how should this be routed?  Is there some git
> branch that tmpfs changes can go in so that cgroup tree can pull?

No git tree, but we can easily handle it in one of two ways.

include/linux/shmem_fs.h and mm/shmem.c usually go to Linus from Andrew
from his mmotm tree (which includes and is included in linux-next,
by some magic escaping infinite recursion).

Are we expecting Aristeu+Zefan's simple_xattr patches to go into 3.7?
I don't have anything planned for shmem.c for 3.7 beyond a bugfix,
which shouldn't interact with the simple_xattr changes at all
(I could remove info->lock, but will not do so this time around,
precisely so as not to interfere with those patches).

So it should be perfectly workable for you to take Aristeu+Zefan's
shmem patches into your cgroup tree, then any further mods from
mmotm will get layered on top.

But if you prefer to leave shmem.c changes to Andrew, then it would
also be perfectly workable for Aristeu to split the 1/4 into two:
one for you which updates fs/xattr.c and include/linux/xattr.h with
simple_xattr code stolen from mm/shmem.c and include/linux/shmem_fs.h;
and one for Andrew which updates mm/shmem.c and include/linux/shmem_fs.h
to delete its shmem_xattr stuff and use simple_xattr interfaces instead.

Either approach is fine with me.

Hugh
--
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Please read the FAQ at  http://www.tux.org/lkml/

[GIT] Networking

2012-08-21 Thread David Miller


A couple weeks of bug fixing in there.  The largest chunk is all the
broken crap Amerigo Wang found in the netpoll layer.

1) netpoll and it's users has several serious bugs:
   a) uses GFP_KERNEL with locks held
   b) interfaces requiring interrupts disabled are called with them
  enabled
   c) and vice versa
   d) VLAN tag demuxing, as per all other RX packet input paths, is
  not applied

   All from Amerigo Wang.

2) Hopefully cure the ipv4 mapped ipv6 address TCP early demux bugs
   for good, from Neal Cardwell.

3) Unlike AF_UNIX, AF_PACKET sockets don't set a default credentials
   when the user doesn't specify one explicitly during sendmsg().
   Instead we attach an empty (zero) SCM credential block which
   is definitely not what we want.  Fix from Eric Dumazet.

4) IPv6 illegally invokes netdevice notifiers with RCU lock held, fix
   from Ben Hutchings.

5) inet_csk_route_child_sock() checks wrong inet options pointer, fix
   from Christoph Paasch.

6) When AF_PACKET is used for transmit, packet loopback doesn't behave
   properly when a socket fanout is enabled, from Eric Leblond.

7) On bluetooth l2cap channel create failure, we leak the socket, from
   Jaganath Kanakkassery.

8) Fix all the netprio file handling bugs found by Al Viro, from John
   Fastabend.

9) Several error return and NULL deref bug fixes in networking drivers
   from Julia Lawall.

10) A large smattering of struct padding et al. kernel memory leaks
to userspace found of Mathias Krause.

11) Conntrack expections in netfilter can access an uninitialized timer,
fix from Pablo Neira Ayuso.

12) Several netfilter SIP tracker bug fixes from Patrick McHardy.

13) IPSEC ipv6 routes are not initialized correctly all the time,
resulting in an OOPS in inet_putpeer().  Also from Patrick
McHardy.

14) Bridging does rcu_dereference() outside of RCU protected area,
from Stephen Hemminger.

15) Fix routing cache removal performance regression when looking up
output routes that have a local destination.  From Zheng Yan.

Please pull, thanks a lot!

The following changes since commit ddf343f635fe4440cad528e12f96f28bd50aa099:

  Merge branch 'for-linus' of 
git://git.kernel.org/pub/scm/linux/kernel/git/s390/linux (2012-08-14 07:58:59 
+0300)

are available in the git repository at:


  git://git.kernel.org/pub/scm/linux/kernel/git/davem/net.git master

for you to fetch changes up to e0e3cea46d31d23dc40df0a49a7a2c04fe8edfea:

  af_netlink: force credentials passing [CVE-2012-3520] (2012-08-21 14:53:01 
-0700)


Alexey Khoroshilov (1):
  rndis_wlan: Fix potential memory leak in update_pmkid()

Amerigo Wang (15):
  netpoll: use GFP_ATOMIC in slave_enable_netpoll() and __netpoll_setup()
  netpoll: make __netpoll_cleanup non-block
  netconsole: do not release spin_lock when calling __netpoll_cleanup
  netpoll: take rcu_read_lock_bh() in netpoll_rx()
  netpoll: use netpoll_rx_on() in netpoll_rx()
  netpoll: take rcu_read_lock_bh() in netpoll_send_skb_on_dev()
  bridge: add some comments for NETDEV_RELEASE
  bridge: use list_for_each_entry() in netpoll functions
  netpoll: check netpoll tx status on the right device
  netpoll: convert several functions to bool
  vlan: clean up some variable names
  vlan: clean up vlan_dev_hard_start_xmit()
  netpoll: handle vlan tags in netpoll tx and rx path
  netpoll: re-enable irq in poll_napi()
  netconsole: remove a redundant netconsole_target_put()

Andrei Emeltchenko (1):
  Bluetooth: smp: Fix possible NULL dereference

Ben Hutchings (4):
  llc2: Fix silent failure of llc_station_init()
  llc2: Call llc_station_exit() on llc2_init() failure path
  llc: Fix races between llc2 handler use and (un)registration
  ipv6: addrconf: Avoid calling netdevice notifiers with RCU read-side lock

Bjørn Mork (5):
  net: qmi_wwan: use fixed interface number matching
  net: qmi_wwan: add Sierra Wireless devices
  net: qmi_wwan: compress device_id list using macros
  net: sierra_net: replace whitelist with ifnumber match
  net: qmi_wwan: new devices: UML290 and K5006-Z

Bob Copeland (1):
  ath5k: fix spin_lock_irqsave/spin_lock_bh nesting in mesh

Christoph Paasch (1):
  ipv4: Use newinet->inet_opt in inet_csk_route_child_sock()

Dan Carpenter (1):
  wireless: at76c50x: signedness bug in at76_dfu_get_state()

David S. Miller (2):
  Merge branch 'for-davem' of git://git.kernel.org/.../linville/wireless
  Merge git://1984.lsi.us.es/nf

Dirk Gouders (1):
  netconsole.txt: revision of examples for the receiver of kernel messages

Eric Dumazet (3):
  tcp: fix possible socket refcount problem
  ipv4: fix ip header ident selection in __ip_make_skb()
  af_netlink: force credentials passing [CVE-2012-3520]

Eric Leblond (1):
  af_packet: don't emit packet on orig fanout group

Felix Fietkau (1):

Re: [PATCH v7 2/4] virtio_balloon: introduce migration primitives to balloon pages

2012-08-21 Thread Rusty Russell

On Wed, 15 Aug 2012 14:28:51 +0300, "Michael S. Tsirkin"  
wrote:
> On Wed, Aug 15, 2012 at 12:16:51PM +0100, Mel Gorman wrote:
> > I was thinking of exactly that page->mapping == balloon_mapping check. As I
> > do not know how many active balloon drivers there might be I cannot guess
> > in advance how much of a scalability problem it will be.
> 
> Not at all sure multiple drivers are worth supporting, but multiple
> *devices* is I think worth supporting, if for no other reason than that
> they can work today. For that, we need a device pointer which Rafael
> wants to put into the mapping, this means multiple balloon mappings.

Rafael, please make sure that the balloon driver fails on the second and
subsequent balloon devices.

Michael, we only allow multiple balloon devices because it fell out of
the implementation.  If it causes us even the slightest issue, we should
not support it.  It's not a sensible setup.

Cheers,
Rusty.

--
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Please read the FAQ at  http://www.tux.org/lkml/

Re: [PATCH 00/25] Crypto keys and module signing

2012-08-21 Thread Rusty Russell

On Thu, 16 Aug 2012 02:34:05 +0100, David Howells  wrote:
> 
> Hi Rusty,
> 
> I've posted new versions of my module signing patches to my GIT trees.

Now I get to punt this discussion to KS.

I knew it was good for something!

Cheers,
Rusty.
--
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Please read the FAQ at  http://www.tux.org/lkml/

Re: [PATCH 0/4] PCI/PM: PCI D3cold support fixes for 3.6-rc1

2012-08-21 Thread Bjorn Helgaas

On Sun, Aug 19, 2012 at 6:09 PM, huang ying
 wrote:
> Hi, Bjorn,
>
> Could you please merge this patchset?  They fix real bugs.

I assume you wanted the updated "[PATCH 3/4] PCI/PM:  Fix config reg
access ..." patch posted Aug 15.

I merged these (with the updated 3/4 patch) to my "for-linus" branch.
After it's in linux-next for a couple days, I'll ask Linus to pull it.

> On Sun, Aug 19, 2012 at 6:35 PM, Bjørn Mork  wrote:
>> Huang Ying  writes:
>>
>>> [BUGFIX 1/4] PCI/PM: enable D3/D3cold by default for most devices
>>> [BUGFIX 2/4] PCI/PM: Keep parent bridge active when probing device
>>> [BUGFIX 3/4] PCI/PM: Fix config reg access for D3cold and bridge suspending
>>> [PATCH 4/4] PCI/PM: Add ABI document for sysfs file d3cold_allowed
>>
>> Hello,
>>
>> I am hoping these patches will appear in 3.6?  They fix real problems in
>> 3.6-rc1 for me. If it helps in any way, feel free to add
>>
>> Tested-by: Bjørn Mork 
>>
>> to the 3 bugfix patches, including version 2 of patch #3.
>>
>>
>> Bjørn
>> --
>> To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
>> the body of a message to majord...@vger.kernel.org
>> More majordomo info at  http://vger.kernel.org/majordomo-info.html
>> Please read the FAQ at  http://www.tux.org/lkml/
--
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Please read the FAQ at  http://www.tux.org/lkml/

[GIT PULL for v3.6-rc3] media fixes

2012-08-21 Thread Mauro Carvalho Chehab

Hi Linus,

Please pull from:
  git://git.kernel.org/pub/scm/linux/kernel/git/mchehab/linux-media 
v4l_for_linus

For bug fixes, at soc_camera, si470x, uvcvideo, iguanaworks IR driver, 
radio_shark Kbuild fixes, and at the V4L2 core (radio fixes).

Thank you!
Mauro

-

The following changes since commit 8762541f067d371320731510669e27f5cc40af38:

  Merge branch 'v4l_for_linus' of 
git://git.kernel.org/pub/scm/linux/kernel/git/mchehab/linux-media (2012-07-31 
18:47:44 -0700)

are available in the git repository at:

  git://git.kernel.org/pub/scm/linux/kernel/git/mchehab/linux-media 
v4l_for_linus

for you to fetch changes up to 991b3137f21e13db4711f313edbe67d49bed795b:

  [media] media: soc_camera: don't clear pix->sizeimage in JPEG mode 
(2012-08-15 19:24:28 -0300)


Albert Wang (1):
  [media] media: soc_camera: don't clear pix->sizeimage in JPEG mode

Alex Gershgorin (1):
  [media] media: mx3_camera: buf_init() add buffer state check

Fabio Estevam (2):
  [media] video: mx1_camera: Use clk_prepare_enable/clk_disable_unprepare
  [media] video: mx2_camera: Use clk_prepare_enable/clk_disable_unprepare

Guenter Roeck (1):
  [media] Add USB dependency for IguanaWorks USB IR Transceiver

Hans Verkuil (5):
  [media] DocBook: Remove a spurious character
  [media] si470x: v4l2-compliance fixes
  [media] mem2mem_testdev: fix querycap regression
  [media] VIDIOC_ENUM_FREQ_BANDS fix
  [media] Add missing logging for rangelow/high of hwseek

Hans de Goede (4):
  [media] radio-shark*: Remove work-around for dangling pointer in usb 
intfdata
  [media] radio-shark*: Call cancel_work_sync from disconnect rather then 
release
  [media] radio-shark: Only compile led support when CONFIG_LED_CLASS is set
  [media] radio-shark2: Only compile led support when CONFIG_LED_CLASS is 
set

Javier Martin (1):
  [media] media: mx2_camera: Fix clock handling for i.MX27

Jayakrishnan Memana (1):
  [media] uvcvideo: Reset the bytesused field when recycling an erroneous 
buffer

 Documentation/DocBook/media/v4l/vidioc-g-tuner.xml |   2 +-
 drivers/media/radio/radio-shark.c  | 151 +++--
 drivers/media/radio/radio-shark2.c | 137 ++-
 drivers/media/radio/si470x/radio-si470x-common.c   |   3 +
 drivers/media/radio/si470x/radio-si470x-i2c.c  |   5 +-
 drivers/media/radio/si470x/radio-si470x-usb.c  |   2 +-
 drivers/media/rc/Kconfig   |   1 +
 drivers/media/video/mem2mem_testdev.c  |   2 +-
 drivers/media/video/mx1_camera.c   |   4 +-
 drivers/media/video/mx2_camera.c   |  47 ---
 drivers/media/video/mx3_camera.c   |  22 +--
 drivers/media/video/soc_camera.c   |   3 +-
 drivers/media/video/soc_mediabus.c |   6 +
 drivers/media/video/uvc/uvc_queue.c|   1 +
 drivers/media/video/v4l2-ioctl.c   |  10 +-
 15 files changed, 217 insertions(+), 179 deletions(-)

--
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Please read the FAQ at  http://www.tux.org/lkml/

[PATCH 1/1] tcp: Wrong timeout for SYN segments

2012-08-21 Thread Alex Bergmann

Hi David,

I'm not 100% sure, but it looks like I found an RFC mismatch with the 
current default values of the TCP implementation.

Alex

>From 8b854a525eb45f64ad29dfab16f9d9f681e84495 Mon Sep 17 00:00:00 2001
From: Alexander Bergmann 
Date: Wed, 22 Aug 2012 00:29:08 +0200
Subject: [PATCH 1/1] tcp: Wrong timeout for SYN segments

Commit 9ad7c049 changed the initRTO from 3secs to 1sec in accordance to
RFC6298 (former RFC2988bis). This introduced a gap with RFC1122 that
defines a minimum retransmission window for SYN segments of at least
180secs.

Prior to 9ad7c049 the timeout was defined with 189secs. Now we have only
a timeout of 63secs.

((2 << 5) - 1) * 3 secs = 189 secs
((2 << 5) - 1) * 1 secs = 63 secs

To fulfill the MUST constraint in RFC1122 section 4.2.3.5 about R2 for
SYN segments, the values of TCP_SYN_RETRIES and TCP_SYNACK_RETRIES must
be changed to 7 reties.

((2 << 7) - 1) * 1 secs = 255 secs

This would result in an ETIMEDOUT of 4 minutes 15 seconds.

Signed-off-by: Alexander Bergmann 
---
 include/net/tcp.h |4 ++--
 1 files changed, 2 insertions(+), 2 deletions(-)

diff --git a/include/net/tcp.h b/include/net/tcp.h
index 1f000ff..7eaae19 100644
--- a/include/net/tcp.h
+++ b/include/net/tcp.h
@@ -98,10 +98,10 @@ extern void tcp_time_wait(struct sock *sk, int state, int 
timeo);
 * 15 is ~13-30min depending on RTO.
 */

-#define TCP_SYN_RETRIES 5  /* number of times to retry active 
opening a
+#define TCP_SYN_RETRIES 7  /* number of times to retry active 
opening a
 * connection: ~180sec is RFC minimum   */

-#define TCP_SYNACK_RETRIES 5   /* number of times to retry passive opening a
+#define TCP_SYNACK_RETRIES 7   /* number of times to retry passive opening a
 * connection: ~180sec is RFC minimum   */

 #define TCP_TIMEWAIT_LEN (60*HZ) /* how long to wait to destroy TIME-WAIT
-- 
1.7.8.6

--
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Please read the FAQ at  http://www.tux.org/lkml/

1 2 3 4 5 6 7 8 >

1 - 100 of 766 matches

Mail list logo