date:20120709

Re: [PATCH firmware 3/4] rtl_nic: add new firmware for RTL8106E

2012-07-09 Thread Ben Hutchings

On Mon, 2012-07-09 at 11:11 +0800, Hayes Wang wrote:
> File: rtl_nic/rtl8106e-1.fw
> Version: 0.0.1
[...]

As someone already pointed out, this message was not correctly encoded.
The character encoding was specified as an empty string
(Content-Encoding header) or whitespace (Subject):

> Content-Type: text/plain; charset=""
[...]
> Subject: =?
>   
> ?q?=5BPATCH=20firmware=203/4=5D=20rtl=5Fnic=3A=20add=20new=20firmware=20for=20RTL8106E?=

I fixed that up just this once, but please fix whatever causes that
because I don't wish to have to do so again.

Ben.

-- 
Ben Hutchings
The generation of random numbers is too important to be left to chance.
- Robert Coveyou

signature.asc
Description: This is a digitally signed message part

[PATCH 2/3] pstore/ram: Make ECC size configurable

2012-07-09 Thread Anton Vorontsov

This is now pretty straightforward: instead of using bool, just pass
an integer. For backwards compatibility ramoops.ecc=1 means 16 bytes
ECC (using 1 byte for ECC isn't much of use anyway).

Suggested-by: Arve Hjønnevåg 
Signed-off-by: Anton Vorontsov 
---
 fs/pstore/ram.c|   22 ++
 fs/pstore/ram_core.c   |   15 ---
 include/linux/pstore_ram.h |4 ++--
 3 files changed, 24 insertions(+), 17 deletions(-)

diff --git a/fs/pstore/ram.c b/fs/pstore/ram.c
index 58b93fb..b39aebb 100644
--- a/fs/pstore/ram.c
+++ b/fs/pstore/ram.c
@@ -63,7 +63,9 @@ MODULE_PARM_DESC(dump_oops,
 static int ramoops_ecc;
 module_param_named(ecc, ramoops_ecc, int, 0600);
 MODULE_PARM_DESC(ramoops_ecc,
-   "set to 1 to enable ECC support");
+   "if non-zero, the option enables ECC support and specifies "
+   "ECC buffer size in bytes (1 is a special value, means 16 "
+   "bytes ECC)");
 
 struct ramoops_context {
struct persistent_ram_zone **przs;
@@ -73,7 +75,7 @@ struct ramoops_context {
size_t record_size;
size_t console_size;
int dump_oops;
-   bool ecc;
+   int ecc_size;
unsigned int max_dump_cnt;
unsigned int dump_write_cnt;
unsigned int dump_read_cnt;
@@ -288,7 +290,7 @@ static int ramoops_init_przs(struct device *dev, struct 
ramoops_context *cxt,
for (i = 0; i < cxt->max_dump_cnt; i++) {
size_t sz = cxt->record_size;
 
-   cxt->przs[i] = persistent_ram_new(*paddr, sz, cxt->ecc);
+   cxt->przs[i] = persistent_ram_new(*paddr, sz, cxt->ecc_size);
if (IS_ERR(cxt->przs[i])) {
err = PTR_ERR(cxt->przs[i]);
dev_err(dev, "failed to request mem region 
(0x%zx@0x%llx): %d\n",
@@ -314,7 +316,7 @@ static int ramoops_init_prz(struct device *dev, struct 
ramoops_context *cxt,
if (*paddr + sz > *paddr + cxt->size)
return -ENOMEM;
 
-   *prz = persistent_ram_new(*paddr, sz, cxt->ecc);
+   *prz = persistent_ram_new(*paddr, sz, cxt->ecc_size);
if (IS_ERR(*prz)) {
int err = PTR_ERR(*prz);
 
@@ -361,7 +363,7 @@ static int __devinit ramoops_probe(struct platform_device 
*pdev)
cxt->record_size = pdata->record_size;
cxt->console_size = pdata->console_size;
cxt->dump_oops = pdata->dump_oops;
-   cxt->ecc = pdata->ecc;
+   cxt->ecc_size = pdata->ecc_size;
 
paddr = cxt->phys_addr;
 
@@ -411,9 +413,9 @@ static int __devinit ramoops_probe(struct platform_device 
*pdev)
record_size = pdata->record_size;
dump_oops = pdata->dump_oops;
 
-   pr_info("attached 0x%lx@0x%llx, ecc: %s\n",
+   pr_info("attached 0x%lx@0x%llx, ecc: %d\n",
cxt->size, (unsigned long long)cxt->phys_addr,
-   ramoops_ecc ? "on" : "off");
+   cxt->ecc_size);
 
return 0;
 
@@ -478,7 +480,11 @@ static void ramoops_register_dummy(void)
dummy_data->record_size = record_size;
dummy_data->console_size = ramoops_console_size;
dummy_data->dump_oops = dump_oops;
-   dummy_data->ecc = ramoops_ecc;
+   /*
+* For backwards compatibility ramoops.ecc=1 means 16 bytes ECC
+* (using 1 byte for ECC isn't much of use anyway).
+*/
+   dummy_data->ecc_size = ramoops_ecc == 1 ? 16 : ramoops_ecc;
 
dummy = platform_device_register_data(NULL, "ramoops", -1,
dummy_data, sizeof(struct ramoops_platform_data));
diff --git a/fs/pstore/ram_core.c b/fs/pstore/ram_core.c
index 3f4d6e6..7e5a2a9 100644
--- a/fs/pstore/ram_core.c
+++ b/fs/pstore/ram_core.c
@@ -171,7 +171,8 @@ static void persistent_ram_ecc_old(struct 
persistent_ram_zone *prz)
}
 }
 
-static int persistent_ram_init_ecc(struct persistent_ram_zone *prz)
+static int persistent_ram_init_ecc(struct persistent_ram_zone *prz,
+  int ecc_size)
 {
int numerr;
struct persistent_ram_buffer *buffer = prz->buffer;
@@ -184,7 +185,7 @@ static int persistent_ram_init_ecc(struct 
persistent_ram_zone *prz)
return 0;
 
prz->ecc_block_size = 128;
-   prz->ecc_size = 16;
+   prz->ecc_size = ecc_size;
 
ecc_blocks = DIV_ROUND_UP(prz->buffer_size, prz->ecc_block_size);
ecc_total = (ecc_blocks + 1) * prz->ecc_size;
@@ -390,13 +391,13 @@ static int persistent_ram_buffer_map(phys_addr_t start, 
phys_addr_t size,
 }
 
 static int __devinit persistent_ram_post_init(struct persistent_ram_zone *prz,
- bool ecc)
+ int ecc_size)
 {
int ret;
 
-   prz->ecc = ecc;
+   prz->ecc = ecc_size;
 
-   ret = persistent_ram_init_ecc(prz);
+   ret = persistent_ram_init_ecc(prz, ecc_size);
if (ret)
return ret;
 
@@ -444,7 +445,7 @@

[PATCH 3/3] pstore/ram_core: Get rid of prz->ecc enable/disable flag

2012-07-09 Thread Anton Vorontsov

Nowadays we can use prz->ecc_size as a flag, no need for the special
member in the prz struct.

Signed-off-by: Anton Vorontsov 
---
 fs/pstore/ram_core.c   |   10 --
 include/linux/pstore_ram.h |1 -
 2 files changed, 4 insertions(+), 7 deletions(-)

diff --git a/fs/pstore/ram_core.c b/fs/pstore/ram_core.c
index 7e5a2a9..4dabbb8 100644
--- a/fs/pstore/ram_core.c
+++ b/fs/pstore/ram_core.c
@@ -114,7 +114,7 @@ static void notrace persistent_ram_update_ecc(struct 
persistent_ram_zone *prz,
int ecc_size = prz->ecc_size;
int size = prz->ecc_block_size;
 
-   if (!prz->ecc)
+   if (!prz->ecc_size)
return;
 
block = buffer->data + (start & ~(ecc_block_size - 1));
@@ -133,7 +133,7 @@ static void persistent_ram_update_header_ecc(struct 
persistent_ram_zone *prz)
 {
struct persistent_ram_buffer *buffer = prz->buffer;
 
-   if (!prz->ecc)
+   if (!prz->ecc_size)
return;
 
persistent_ram_encode_rs8(prz, (uint8_t *)buffer, sizeof(*buffer),
@@ -146,7 +146,7 @@ static void persistent_ram_ecc_old(struct 
persistent_ram_zone *prz)
uint8_t *block;
uint8_t *par;
 
-   if (!prz->ecc)
+   if (!prz->ecc_size)
return;
 
block = buffer->data;
@@ -181,7 +181,7 @@ static int persistent_ram_init_ecc(struct 
persistent_ram_zone *prz,
int ecc_symsize = 8;
int ecc_poly = 0x11d;
 
-   if (!prz->ecc)
+   if (!ecc_size)
return 0;
 
prz->ecc_block_size = 128;
@@ -395,8 +395,6 @@ static int __devinit persistent_ram_post_init(struct 
persistent_ram_zone *prz,
 {
int ret;
 
-   prz->ecc = ecc_size;
-
ret = persistent_ram_init_ecc(prz, ecc_size);
if (ret)
return ret;
diff --git a/include/linux/pstore_ram.h b/include/linux/pstore_ram.h
index 94b79f1..dcf805f 100644
--- a/include/linux/pstore_ram.h
+++ b/include/linux/pstore_ram.h
@@ -33,7 +33,6 @@ struct persistent_ram_zone {
size_t buffer_size;
 
/* ECC correction */
-   bool ecc;
char *par_buffer;
char *par_header;
struct rs_control *rs_decoder;
-- 
1.7.10.4

--
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Please read the FAQ at  http://www.tux.org/lkml/

[PATCH 1/3] pstore/ram_core: Get rid of prz->ecc_symsize and prz->ecc_poly

2012-07-09 Thread Anton Vorontsov

The struct members were never used anywhere outside of
persistent_ram_init_ecc(), so there's actually no need for them
to be in the struct.

If we ever want to make polynomial or symbol size configurable,
it would make more sense to just pass initialized rs_decoder
to the persistent_ram init functions.

Signed-off-by: Anton Vorontsov 
---
 fs/pstore/ram_core.c   |7 +++
 include/linux/pstore_ram.h |2 --
 2 files changed, 3 insertions(+), 6 deletions(-)

diff --git a/fs/pstore/ram_core.c b/fs/pstore/ram_core.c
index a5a7b13..3f4d6e6 100644
--- a/fs/pstore/ram_core.c
+++ b/fs/pstore/ram_core.c
@@ -177,14 +177,14 @@ static int persistent_ram_init_ecc(struct 
persistent_ram_zone *prz)
struct persistent_ram_buffer *buffer = prz->buffer;
int ecc_blocks;
size_t ecc_total;
+   int ecc_symsize = 8;
+   int ecc_poly = 0x11d;
 
if (!prz->ecc)
return 0;
 
prz->ecc_block_size = 128;
prz->ecc_size = 16;
-   prz->ecc_symsize = 8;
-   prz->ecc_poly = 0x11d;
 
ecc_blocks = DIV_ROUND_UP(prz->buffer_size, prz->ecc_block_size);
ecc_total = (ecc_blocks + 1) * prz->ecc_size;
@@ -202,8 +202,7 @@ static int persistent_ram_init_ecc(struct 
persistent_ram_zone *prz)
 * first consecutive root is 0
 * primitive element to generate roots = 1
 */
-   prz->rs_decoder = init_rs(prz->ecc_symsize, prz->ecc_poly, 0, 1,
- prz->ecc_size);
+   prz->rs_decoder = init_rs(ecc_symsize, ecc_poly, 0, 1, prz->ecc_size);
if (prz->rs_decoder == NULL) {
pr_info("persistent_ram: init_rs failed\n");
return -EINVAL;
diff --git a/include/linux/pstore_ram.h b/include/linux/pstore_ram.h
index e681af9..a0975c0 100644
--- a/include/linux/pstore_ram.h
+++ b/include/linux/pstore_ram.h
@@ -41,8 +41,6 @@ struct persistent_ram_zone {
int bad_blocks;
int ecc_block_size;
int ecc_size;
-   int ecc_symsize;
-   int ecc_poly;
 
char *old_log;
size_t old_log_size;
-- 
1.7.10.4

--
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Please read the FAQ at  http://www.tux.org/lkml/

Re: [PATCH v2] mm: Warn about costly page allocation

2012-07-09 Thread Minchan Kim

Please ignore,
It is sent by mistake. :(
Sorry for the noise.

On Tue, Jul 10, 2012 at 08:55:53AM +0900, Minchan Kim wrote:
> Since lumpy reclaim was introduced at 2.6.23, it helped higher
> order allocation.
> Recently, we removed it at 3.4 and we didn't enable compaction
> forcingly[1]. The reason makes sense that compaction.o + migration.o
> isn't trivial for system doesn't use higher order allocation.
> But the problem is that we have to enable compaction explicitly
> while lumpy reclaim enabled unconditionally.
> 
> Normally, admin doesn't know his system have used higher order
> allocation and even lumpy reclaim have helped it.
> Admin in embdded system have a tendency to minimise code size so that
> they can disable compaction. In this case, we can see page allocation
> failure we can never see in the past. It's critical on embedded side
> because...
> 
> Let's think this scenario.
> 
> There is QA team in embedded company and they have tested their product.
> In test scenario, they can allocate 100 high order allocation.
> (they don't matter how many high order allocations in kernel are needed
> during test. their concern is just only working well or fail of their
> middleware/application) High order allocation will be serviced well
> by natural buddy allocation without lumpy's help. So they released
> the product and sold out all over the world.
> Unfortunately, in real practice, sometime, 105 high order allocation was
> needed rarely and fortunately, lumpy reclaim could help it so the product
> doesn't have a problem until now.
> 
> If they use latest kernel, they will see the new config CONFIG_COMPACTION
> which is very poor documentation, and they can't know it's replacement of
> lumpy reclaim(even, they don't know lumpy reclaim) so they simply disable
> that option for size optimization. Of course, QA team still test it but they
> can't find the problem if they don't do test stronger than old.
> It ends up release the product and sold out all over the world, again.
> But in this time, we don't have both lumpy and compaction so the problem
> would happen in real practice. A poor enginner from Korea have to flight
> to the USA for the fix a ton of products. Otherwise, should recall products
> from all over the world. Maybe he can lose a job. :(
> 
> This patch adds warning for notice. If the system try to allocate
> PAGE_ALLOC_COSTLY_ORDER above page and system enters reclaim path,
> it emits the warning. At least, it gives a chance to look into their
> system before the relase.
> 
> Please keep in mind. It's not a good idea to depend lumpy/compaction
> for regular high-order allocations. Both depends on being able to move
> MIGRATE_MOVABLE allocations to satisfy the high-order allocation. If used
> reregularly for high-order kernel allocations and tehy are long-lived,
> the system will eventually be unable to grant these allocations, with or
> without compaction or lumpy reclaim. Hatchet jobs that work around this 
> problem
> include forcing MIGRATE_RESERVE to be only used for high-order allocations
> and tuning its size. It's a major hack though and is unlikely to be merged
> to mainline but might suit an embedded product.
> 
> This patch avoids false positive by alloc_large_system_hash which
> allocates with GFP_ATOMIC and a fallback mechanism so it can make
> this warning useless.
> 
> [1] c53919ad(mm: vmscan: remove lumpy reclaim)
> 
> Signed-off-by: Minchan Kim 
> ---
> Changelog
> 
> * from v1
>  - add more description about warning failure of high-order allocation
>  - use printk_ratelimited/pr_warn and dump stack - [Mel, Andrew]
> 
>  mm/page_alloc.c |   25 +
>  1 file changed, 25 insertions(+)
> 
> diff --git a/mm/page_alloc.c b/mm/page_alloc.c
> index a4d3a19..710d0e90 100644
> --- a/mm/page_alloc.c
> +++ b/mm/page_alloc.c
> @@ -2276,6 +2276,29 @@ gfp_to_alloc_flags(gfp_t gfp_mask)
>   return alloc_flags;
>  }
>  
> +#if defined(CONFIG_DEBUG_VM) && !defined(CONFIG_COMPACTION)
> +static inline void check_page_alloc_costly_order(unsigned int order, gfp_t 
> flags)
> +{
> + if (likely(order <= PAGE_ALLOC_COSTLY_ORDER))
> + return;
> +
> + if (!printk_ratelimited())
> + return;
> +
> + pr_warn("%s: page allocation high-order stupidity: "
> + "order:%d, mode:0x%x\n", current->comm, order, flags);
> + pr_warn("Enable compaction if high-order allocations are "
> + "very few and rare.\n");
> + pr_warn("If you need regular high-order allocation, "
> + "compaction wouldn't help it.\n");
> + dump_stack();
> +}
> +#else
> +static inline void check_page_alloc_costly_order(unsigned int order)
> +{
> +}
> +#endif
> +
>  static inline struct page *
>  __alloc_pages_slowpath(gfp_t gfp_mask, unsigned int order,
>   struct zonelist *zonelist, enum zone_type high_zoneidx,
> @@ -2353,6 +2376,8 @@ rebalance:
>   if (!wait)
>   goto nopage;
>  
> +

Re: [ 35/48] dummy: fix rcu_sched self-detected stalls

2012-07-09 Thread David Miller

From: Ben Hutchings 
Date: Tue, 10 Jul 2012 00:47:06 +0100

> I built on x86_64...

As did I :-)

> But it's not an important bug in mainline (yet) so perhaps we can treat
> it as only required in stable.

I think this is the best thing to do for now.
--
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Please read the FAQ at  http://www.tux.org/lkml/

Re: [PATCH resend 0/3] pstore/ram: Configurable ECC size

2012-07-09 Thread Anton Vorontsov

On Mon, Jul 09, 2012 at 04:45:59PM -0700, Anton Vorontsov wrote:
> Hi all,
> 
> Just a few patches left from the series that used to add configurable
> ECC size for pstore/ram backend. Most patches were merged into -next,
> and this is just a resend of the leftovers.

Oh, I fogot: actually, it's not a plain resend, I fixed Dan Carpenter's
comment (thanks!): added more text about special ecc=1 case, both
as a module param description and as a comment in the code.

-- 
Anton Vorontsov
Email: cbouatmai...@gmail.com
--
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Please read the FAQ at  http://www.tux.org/lkml/

Re: [PATCH firmware 2/4] rtl_nic: update firmware for RTL8411

2012-07-09 Thread Ben Hutchings

On Mon, 2012-07-09 at 11:11 +0800, Hayes Wang wrote:
> File: rtl_nic/rtl8411-1.fw
> Version: 0.0.3
[...]

Applied.

Ben.

-- 
Ben Hutchings
The generation of random numbers is too important to be left to chance.
- Robert Coveyou


signature.asc
Description: This is a digitally signed message part

Re: [PATCH firmware 1/4] rtl_nic: update firmware for RTL8168F

2012-07-09 Thread Ben Hutchings

On Mon, 2012-07-09 at 11:11 +0800, Hayes Wang wrote:
> File: rtl_nic/rtl8168f-1.fw
> Version: 0.0.5
[...]

Applied.

Ben.

-- 
Ben Hutchings
The generation of random numbers is too important to be left to chance.
- Robert Coveyou


signature.asc
Description: This is a digitally signed message part

Re: [PATCH] mm: hugetlb: flush dcache before returning zeroed huge page to userspace

2012-07-09 Thread Hugh Dickins

On Mon, 9 Jul 2012, Will Deacon wrote:
> On Mon, Jul 09, 2012 at 01:25:23PM +0100, Michal Hocko wrote:
> > On Wed 04-07-12 15:32:56, Will Deacon wrote:
> > > When allocating and returning clear huge pages to userspace as a
> > > response to a fault, we may zero and return a mapping to a previously
> > > dirtied physical region (for example, it may have been written by
> > > a private mapping which was freed as a result of an ftruncate on the
> > > backing file). On architectures with Harvard caches, this can lead to
> > > I/D inconsistency since the zeroed view may not be visible to the
> > > instruction stream.
> > > 
> > > This patch solves the problem by flushing the region after allocating
> > > and clearing a new huge page. Note that PowerPC avoids this issue by
> > > performing the flushing in their clear_user_page implementation to keep
> > > the loader happy, however this is closely tied to the semantics of the
> > > PG_arch_1 page flag which is architecture-specific.
> > > 
> > > Acked-by: Catalin Marinas 
> > > Signed-off-by: Will Deacon 
> > > ---
> > >  mm/hugetlb.c |1 +
> > >  1 files changed, 1 insertions(+), 0 deletions(-)
> > > 
> > > diff --git a/mm/hugetlb.c b/mm/hugetlb.c
> > > index e198831..b83d026 100644
> > > --- a/mm/hugetlb.c
> > > +++ b/mm/hugetlb.c
> > > @@ -2646,6 +2646,7 @@ retry:
> > >   goto out;
> > >   }
> > >   clear_huge_page(page, address, pages_per_huge_page(h));
> > > + flush_dcache_page(page);
> > >   __SetPageUptodate(page);
> > 
> > Does this have to be explicit in the arch independent code?
> > It seems that ia64 uses flush_dcache_page already in the clear_user_page
> 
> It would match what is done in similar situations by cow_user_page 
> (mm/memory.c)
> and shmem_writepage (mm/shmem.c). Other subsystems also have explicit page
> flushing (DMA bounce, ksm) so I think this is the right place for it.

I am not at all sure if you are right or not:
please let's consult linux-arch about this - now Cc'ed.

If this hugetlb_no_page() were solely mapping the hugepage into that
userspace, I would say you are wrong.  It's the job of clear_huge_page()
to take the mapped address into account, and pass it down to the
architecture-specific implementation, to do whatever flushing is
needed - you should be providing that in your architecture.

In particular, notice how clear_huge_page() goes round a loop of
clear_user_highpage()s: in your patch, you're expecting the implementation
of flush_dcache_page() to notice whether or not this is a hugepage, and
flush the appropriate size.

Perhaps yours is the only architecture to need this on huge, and your
flush_dcache_page() implements it correctly; but it does seem surprising.

If I start to grep the architectures for non-empty flush_dcache_page(),
I soon find things in arch/arm such as v4_mc_copy_user_highpage() doing
if (!test_and_set_bit(PG_dcache_clean,)) __flush_dcache_page() - where
the naming suggests that I'm right, it's the architecture's responsibility
to arrange whatever flushing is needed in its copy and clear page functions.

But... this hugetlb_no_page() has a VM_MAYSHARE case below, which puts
the new page into page cache, making it accessible by other processes:
that may indeed be reason for flush_dcache_page() there - or a loop of
flush_dcache_page()s.  But I worry then that in the !VM_MAYSHARE case
you would be duplicating expensive flushes: perhaps they should be
restricted to the VM_MAYSHARE block.

Hugh
--
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Please read the FAQ at  http://www.tux.org/lkml/

[PATCH v2] mm: Warn about costly page allocation

2012-07-09 Thread Minchan Kim

Since lumpy reclaim was introduced at 2.6.23, it helped higher
order allocation.
Recently, we removed it at 3.4 and we didn't enable compaction
forcingly[1]. The reason makes sense that compaction.o + migration.o
isn't trivial for system doesn't use higher order allocation.
But the problem is that we have to enable compaction explicitly
while lumpy reclaim enabled unconditionally.

Normally, admin doesn't know his system have used higher order
allocation and even lumpy reclaim have helped it.
Admin in embdded system have a tendency to minimise code size so that
they can disable compaction. In this case, we can see page allocation
failure we can never see in the past. It's critical on embedded side
because...

Let's think this scenario.

There is QA team in embedded company and they have tested their product.
In test scenario, they can allocate 100 high order allocation.
(they don't matter how many high order allocations in kernel are needed
during test. their concern is just only working well or fail of their
middleware/application) High order allocation will be serviced well
by natural buddy allocation without lumpy's help. So they released
the product and sold out all over the world.
Unfortunately, in real practice, sometime, 105 high order allocation was
needed rarely and fortunately, lumpy reclaim could help it so the product
doesn't have a problem until now.

If they use latest kernel, they will see the new config CONFIG_COMPACTION
which is very poor documentation, and they can't know it's replacement of
lumpy reclaim(even, they don't know lumpy reclaim) so they simply disable
that option for size optimization. Of course, QA team still test it but they
can't find the problem if they don't do test stronger than old.
It ends up release the product and sold out all over the world, again.
But in this time, we don't have both lumpy and compaction so the problem
would happen in real practice. A poor enginner from Korea have to flight
to the USA for the fix a ton of products. Otherwise, should recall products
from all over the world. Maybe he can lose a job. :(

This patch adds warning for notice. If the system try to allocate
PAGE_ALLOC_COSTLY_ORDER above page and system enters reclaim path,
it emits the warning. At least, it gives a chance to look into their
system before the relase.

Please keep in mind. It's not a good idea to depend lumpy/compaction
for regular high-order allocations. Both depends on being able to move
MIGRATE_MOVABLE allocations to satisfy the high-order allocation. If used
reregularly for high-order kernel allocations and tehy are long-lived,
the system will eventually be unable to grant these allocations, with or
without compaction or lumpy reclaim. Hatchet jobs that work around this problem
include forcing MIGRATE_RESERVE to be only used for high-order allocations
and tuning its size. It's a major hack though and is unlikely to be merged
to mainline but might suit an embedded product.

This patch avoids false positive by alloc_large_system_hash which
allocates with GFP_ATOMIC and a fallback mechanism so it can make
this warning useless.

[1] c53919ad(mm: vmscan: remove lumpy reclaim)

Signed-off-by: Minchan Kim 
---
Changelog

* from v1
 - add more description about warning failure of high-order allocation
 - use printk_ratelimited/pr_warn and dump stack - [Mel, Andrew]

 mm/page_alloc.c |   25 +
 1 file changed, 25 insertions(+)

diff --git a/mm/page_alloc.c b/mm/page_alloc.c
index a4d3a19..710d0e90 100644
--- a/mm/page_alloc.c
+++ b/mm/page_alloc.c
@@ -2276,6 +2276,29 @@ gfp_to_alloc_flags(gfp_t gfp_mask)
return alloc_flags;
 }
 
+#if defined(CONFIG_DEBUG_VM) && !defined(CONFIG_COMPACTION)
+static inline void check_page_alloc_costly_order(unsigned int order, gfp_t 
flags)
+{
+   if (likely(order <= PAGE_ALLOC_COSTLY_ORDER))
+   return;
+
+   if (!printk_ratelimited())
+   return;
+
+   pr_warn("%s: page allocation high-order stupidity: "
+   "order:%d, mode:0x%x\n", current->comm, order, flags);
+   pr_warn("Enable compaction if high-order allocations are "
+   "very few and rare.\n");
+   pr_warn("If you need regular high-order allocation, "
+   "compaction wouldn't help it.\n");
+   dump_stack();
+}
+#else
+static inline void check_page_alloc_costly_order(unsigned int order)
+{
+}
+#endif
+
 static inline struct page *
 __alloc_pages_slowpath(gfp_t gfp_mask, unsigned int order,
struct zonelist *zonelist, enum zone_type high_zoneidx,
@@ -2353,6 +2376,8 @@ rebalance:
if (!wait)
goto nopage;
 
+   check_page_alloc_costly_order(order);
+
/* Avoid recursion of direct reclaim */
if (current->flags & PF_MEMALLOC)
goto nopage;
-- 
1.7.9.5

--
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to majord...@vger.kernel.org
More majordomo info at

Re: [PATCH 2/3] PCI: reimplement P2P bridge 1K I/O windows (Intel P64H2)

2012-07-09 Thread Yinghai Lu

On Mon, Jul 9, 2012 at 3:21 PM, Bjorn Helgaas  wrote:

> What bad things would happen if I just appended your patch to the end
> of this series?  Would that break bisection in some scenario?

Should be ok. it should not break bisection.
Don't think sizing is working for 1k before.

Thanks

Yinghai
--
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Please read the FAQ at  http://www.tux.org/lkml/

[PATCH resend 0/3] pstore/ram: Configurable ECC size

2012-07-09 Thread Anton Vorontsov

Hi all,

Just a few patches left from the series that used to add configurable
ECC size for pstore/ram backend. Most patches were merged into -next,
and this is just a resend of the leftovers.

(Note that pstore/trace patches go on top of this series.)

Thanks,

---
 fs/pstore/ram.c|   14 +++---
 fs/pstore/ram_core.c   |   30 ++
 include/linux/pstore_ram.h |7 ++-
 3 files changed, 23 insertions(+), 28 deletions(-)

-- 
Anton Vorontsov
Email: cbouatmai...@gmail.com
--
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Please read the FAQ at  http://www.tux.org/lkml/

Re: + checkpatch-add-check-for-use-of-sizeof-without-parenthesis.patch added to -mm tree

2012-07-09 Thread David Rientjes

On Mon, 9 Jul 2012, Joe Perches wrote:

> > So, nack, don't start enforcing your own coding style and preferences in 
> > checkpatch.pl.
> 
> Not just my opinion.
> 
> https://lkml.org/lkml/2008/12/23/138
> 
> Date: Tue, 23 Dec 2008 10:08:50 -0800 (PST)
> From: Linus Torvalds <>
> []
> Another example of this is "sizeof". The kernel universally (I hope) has 
> parenthesis around the sizeof argument, even though it's clearly not 
> required by the C language. 
> 

Well, let's add Linus to the cc then because it's certainly not a C 
standard.  The sizeof operator requires parenthesis for type names, you 
can't do "sizeof unsigned long", for example, it requires 
"sizeof (unsigned long)".  All other unary operators do not need the 
parenthesis by ANY C standard.

Documentation/CodingStyle does not ask for the parenthesis to be added 
just like it doesn't ask for parenthesis to do things like (i++); which is 
another unary operator.  You would complain that (i++) has unnecessary 
parenthesis, and I claim the same for sizeof (*foo).

The fact that the kernel has over 1000 occurrences of "sizeof ..." without 
parenthesis shows that people actually know what _is_ standard, otherwise 
you'd get an error at compile time.  Taking just unary operators with 
pointers as an example, it spans the entire kernel:

$ grep -lr 'sizeof \*' * | cut -f1 -d / | uniq
arch
crypto
Documentation
drivers
fs
include
ipc
kernel
lib
net
security
sound
tools

So, for people like me who don't use checkpatch.pl at all, please modify 
Documentation/CodingStyle and get Linus to merge it to add this as a 
stylistic preference.  Saying it's a standard, though, is wrong.
--
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Please read the FAQ at  http://www.tux.org/lkml/

Re: [ 35/48] dummy: fix rcu_sched self-detected stalls

2012-07-09 Thread Ben Hutchings

On Mon, 2012-07-09 at 20:39 -0300, Herton Ronaldo Krzesinski wrote:
> On Mon, Jul 09, 2012 at 03:31:51PM +0100, Ben Hutchings wrote:
> > 3.2-stable review patch.  If anyone has any objections, please let me know.
> > 
> > --
> > 
> > From: Eric Dumazet 
> > 
> > [ Upstream commit 16b0dc29c1af9df341428f4c49ada4f626258082 ]
> > 
> > Trying to "modprobe dummy numdummies=3" triggers :
> > 
> > INFO: rcu_sched self-detected stall on CPU { 8} (t=6 jiffies)
> > 
> > After this splat, RTNL is locked and reboot is needed.
> > 
> > We must call cond_resched() to avoid this, even holding RTNL.
> > 
> > Signed-off-by: Eric Dumazet 
> > Signed-off-by: David S. Miller 
> > Signed-off-by: Ben Hutchings 
> > ---
> >  drivers/net/dummy.c |4 +++-
> >  1 file changed, 3 insertions(+), 1 deletion(-)
> > 
> > diff --git a/drivers/net/dummy.c b/drivers/net/dummy.c
> > index eeac9ca..87e4632 100644
> > --- a/drivers/net/dummy.c
> > +++ b/drivers/net/dummy.c
> > @@ -186,8 +186,10 @@ static int __init dummy_init_module(void)
> > rtnl_lock();
> > err = __rtnl_link_register(_link_ops);
> >  
> > -   for (i = 0; i < numdummies && !err; i++)
> > +   for (i = 0; i < numdummies && !err; i++) {
> > err = dummy_init_one();
> > +   cond_resched();
> > +   }
> 
> Hi, I got the following build error with this applied to 3.2, on i386
> and arm builds:
> 
> linux-stable/drivers/net/dummy.c: In function 'dummy_init_module':
> linux-stable/drivers/net/dummy.c:191:3: error: implicit declaration of 
> function 'cond_resched' [-Werror=implicit-function-declaration]
> 
> This doesn't happen on mainline so far. Looking here on mainline, sched.h is
> implicitly included through  ->
>  ->  -> 
> 
> But on 3.2 this doesn't happen, and build fails on i386 and arm. On
> x86_64 it works though since sched.h is pulled from netdevice.h ->
> ethtool.h -> compat.h if CONFIG_COMPAT is enabled.

I built on x86_64...

> May be the best solution is for the patch backported to 3.2, to include
>  also.
[...]

Not sure.  Really, it ought to be directly included from this file in
mainline as well rather than depending on that long chain of includes.
But it's not an important bug in mainline (yet) so perhaps we can treat
it as only required in stable.

Ben.

-- 
Ben Hutchings
The generation of random numbers is too important to be left to chance.
- Robert Coveyou


signature.asc
Description: This is a digitally signed message part

Re: [PATCH 4/7] tracing/function: Introduce persistent trace option

2012-07-09 Thread Anton Vorontsov

On Wed, Jun 27, 2012 at 08:40:12AM -0400, Steven Rostedt wrote:
> On Tue, 2012-06-26 at 16:23 -0700, Anton Vorontsov wrote:
> > @@ -219,6 +232,8 @@ static int func_set_flag(u32 old_flags, u32 bit, int 
> > set)
> >  
> > return 0;
> > }
> > +   if (bit == TRACE_FUNC_OPT_PSTORE)
> > +   return 0;
> 
> Perhaps this should be a switch() statement, or at least put a
>  } else if () {
> 
> As it can't be bit == TRACE_FUNC_OPT_STACK and bit ==
> TRACE_FUNC_OPT_PSTORE.

Yup, thanks for the idea. I'll start using 'else if' at first, and then
will convert it to a switch statement via a separate patch, that way I'll
not clobber the functional patch with cosmetic changes.

Thanks,

-- 
Anton Vorontsov
Email: cbouatmai...@gmail.com
--
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Please read the FAQ at  http://www.tux.org/lkml/

Re: [PATCH v2] fail dentry revalidation after namespace change

2012-07-09 Thread Serge Hallyn

Quoting Andrew Morton (a...@linux-foundation.org):
> On Fri,  6 Jul 2012 13:09:07 +0400
> Glauber Costa  wrote:
> 
> > When we change the namespace tag of a sysfs entry, the associated dentry
> > is still kept around. readdir() will work correctly and not display the
> > old entries, but open() will still succeed, so will reads and writes.
> > 
> > This will no longer happen if sysfs is remounted, hinting that this is a
> > cache-related problem.
> > 
> > I am using the following sequence to demonstrate that:
> > 
> > shell1:
> > ip link add type veth
> > unshare -nm
> > 
> > shell2:
> > ip link set veth1 
> > cat /sys/devices/virtual/net/veth1/ifindex
> > 
> > Before that patch, this will succeed (fail to fail). After it, it will
> > correctly return an error. Differently from a normal rename, which we
> > handle fine, changing the object namespace will keep it's path intact.
> > So this check seems necessary as well.
> > 
> > ...
> >
> > --- a/fs/sysfs/dir.c
> > +++ b/fs/sysfs/dir.c
> > @@ -307,6 +307,7 @@ static int sysfs_dentry_revalidate(struct dentry 
> > *dentry, struct nameidata *nd)
> >  {
> > struct sysfs_dirent *sd;
> > int is_dir;
> > +   int type;
> >  
> > if (nd->flags & LOOKUP_RCU)
> > return -ECHILD;
> > @@ -326,6 +327,13 @@ static int sysfs_dentry_revalidate(struct dentry 
> > *dentry, struct nameidata *nd)
> > if (strcmp(dentry->d_name.name, sd->s_name) != 0)
> > goto out_bad;
> >  
> > +   /* The sysfs dirent has been moved to a different namespace */
> > +   type = KOBJ_NS_TYPE_NONE;
> > +   if (sd->s_parent)
> > +   type = sysfs_ns_type(sd->s_parent);
> > +   if (type && (sysfs_info(dentry->d_sb)->ns[type] != sd->s_ns))
> 
> eww, the code is assuming that KOBJ_NS_TYPE_NONE has a value of zero. 
> Don't do that; it smells bad.
> 
> I renamed my version of this patch to "sysfs: fail dentry revalidation
> after namespace change", as carefully explained in section 15 of the
> excellent Documentation/SubmittingPatches, then queued this:
> 
> 
> From: Andrew Morton 
> Subject: sysfs-fail-dentry-revalidation-after-namespace-change-fix
> 
> don't assume that KOBJ_NS_TYPE_NONE==0.  Also save a test-n-branch.
> 
> Cc: Eric W. Biederman 
> Cc: Glauber Costa 
> Cc: Greg Kroah-Hartman 
> Cc: Tejun Heo 

Acked-by: Serge E. Hallyn 

> Signed-off-by: Andrew Morton 
> ---
> 
>  fs/sysfs/dir.c |8 +---
>  1 file changed, 5 insertions(+), 3 deletions(-)
> 
> diff -puN 
> fs/sysfs/dir.c~sysfs-fail-dentry-revalidation-after-namespace-change-fix 
> fs/sysfs/dir.c
> --- a/fs/sysfs/dir.c~sysfs-fail-dentry-revalidation-after-namespace-change-fix
> +++ a/fs/sysfs/dir.c
> @@ -329,10 +329,12 @@ static int sysfs_dentry_revalidate(struc
>  
>   /* The sysfs dirent has been moved to a different namespace */
>   type = KOBJ_NS_TYPE_NONE;
> - if (sd->s_parent)
> + if (sd->s_parent) {
>   type = sysfs_ns_type(sd->s_parent);
> - if (type && (sysfs_info(dentry->d_sb)->ns[type] != sd->s_ns))
> - goto out_bad;
> + if (type != KOBJ_NS_TYPE_NONE &&
> + sysfs_info(dentry->d_sb)->ns[type] != sd->s_ns)
> + goto out_bad;
> + }
>  
>   mutex_unlock(_mutex);
>  out_valid:
> _
> 
> 
--
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Please read the FAQ at  http://www.tux.org/lkml/

Re: [PATCH v3] printk: Have printk() never buffer its data

2012-07-09 Thread Joe Perches

On Mon, 2012-07-09 at 16:32 -0700, Joe Perches wrote:
> Then you've changed semantics and I think you need to
> fix it.
> 
> A dev_ call is not guaranteed to be a complete
> message.
> 
> There are dev_ and netdev_ calls
> followed by pr_cont.
> 
> Maybe these could be fixed up and then they could be
> always integral.  There don't look to be too many.
> 
> This may be most (all?) of them:

Nah, there's a bunch more:

$ git grep -E -A10 "\b(netdev|dev)_(info|warn|notice|err|alert|emerg|crit)" 
drivers | \
  grep -B10 -E "\bprintk\s*\(\s*(KERN_CONT|)\s*\""

All of them could be fixed up though.

--
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Please read the FAQ at  http://www.tux.org/lkml/

Re: [PATCH 0/3] shmem/tmpfs: three late patches

2012-07-09 Thread Andrew Morton

On Mon, 9 Jul 2012 15:35:26 -0700 (PDT)
Hugh Dickins  wrote:

> Here's three little shmem/tmpfs patches against v3.5-rc6.
> Either the first should go in before v3.5 final, or it should not go
> in at all.  The second and third are independent of it: I'd like them
> in v3.5, but don't have a clinching argument: see what you think.

Thanks, I queued all three for 3.5.
--
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Please read the FAQ at  http://www.tux.org/lkml/

Re: [ 35/48] dummy: fix rcu_sched self-detected stalls

2012-07-09 Thread Herton Ronaldo Krzesinski

On Mon, Jul 09, 2012 at 03:31:51PM +0100, Ben Hutchings wrote:
> 3.2-stable review patch.  If anyone has any objections, please let me know.
> 
> --
> 
> From: Eric Dumazet 
> 
> [ Upstream commit 16b0dc29c1af9df341428f4c49ada4f626258082 ]
> 
> Trying to "modprobe dummy numdummies=3" triggers :
> 
> INFO: rcu_sched self-detected stall on CPU { 8} (t=6 jiffies)
> 
> After this splat, RTNL is locked and reboot is needed.
> 
> We must call cond_resched() to avoid this, even holding RTNL.
> 
> Signed-off-by: Eric Dumazet 
> Signed-off-by: David S. Miller 
> Signed-off-by: Ben Hutchings 
> ---
>  drivers/net/dummy.c |4 +++-
>  1 file changed, 3 insertions(+), 1 deletion(-)
> 
> diff --git a/drivers/net/dummy.c b/drivers/net/dummy.c
> index eeac9ca..87e4632 100644
> --- a/drivers/net/dummy.c
> +++ b/drivers/net/dummy.c
> @@ -186,8 +186,10 @@ static int __init dummy_init_module(void)
>   rtnl_lock();
>   err = __rtnl_link_register(_link_ops);
>  
> - for (i = 0; i < numdummies && !err; i++)
> + for (i = 0; i < numdummies && !err; i++) {
>   err = dummy_init_one();
> + cond_resched();
> + }

Hi, I got the following build error with this applied to 3.2, on i386
and arm builds:

linux-stable/drivers/net/dummy.c: In function 'dummy_init_module':
linux-stable/drivers/net/dummy.c:191:3: error: implicit declaration of function 
'cond_resched' [-Werror=implicit-function-declaration]

This doesn't happen on mainline so far. Looking here on mainline, sched.h is
implicitly included through  ->
 ->  -> 

But on 3.2 this doesn't happen, and build fails on i386 and arm. On
x86_64 it works though since sched.h is pulled from netdevice.h ->
ethtool.h -> compat.h if CONFIG_COMPAT is enabled.

May be the best solution is for the patch backported to 3.2, to include
 also.

>   if (err < 0)
>   __rtnl_link_unregister(_link_ops);
>   rtnl_unlock();
> 
> 
> --
> To unsubscribe from this list: send the line "unsubscribe stable" in
> the body of a message to majord...@vger.kernel.org
> More majordomo info at  http://vger.kernel.org/majordomo-info.html
> 

-- 
[]'s
Herton
--
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Please read the FAQ at  http://www.tux.org/lkml/

[PATCH 01/13] rbtree: reference Documentation/rbtree.txt for usage instructions

2012-07-09 Thread Michel Lespinasse

include/linux/rbtree.h included some basic usage instructions, while
Documentation/rbtree.txt had some more complete and easier to follow
instructions.  Replacing the former with a reference to the latter.

Signed-off-by: Michel Lespinasse 
---
 include/linux/rbtree.h |   67 +---
 1 files changed, 1 insertions(+), 66 deletions(-)

diff --git a/include/linux/rbtree.h b/include/linux/rbtree.h
index 033b507..e6a8077 100644
--- a/include/linux/rbtree.h
+++ b/include/linux/rbtree.h
@@ -23,72 +23,7 @@
   I know it's not the cleaner way,  but in C (not in C++) to get
   performances and genericity...
 
-  Some example of insert and search follows here. The search is a plain
-  normal search over an ordered tree. The insert instead must be implemented
-  in two steps: First, the code must insert the element in order as a red leaf
-  in the tree, and then the support library function rb_insert_color() must
-  be called. Such function will do the not trivial work to rebalance the
-  rbtree, if necessary.
-

-static inline struct page * rb_search_page_cache(struct inode * inode,
-unsigned long offset)
-{
-   struct rb_node * n = inode->i_rb_page_cache.rb_node;
-   struct page * page;
-
-   while (n)
-   {
-   page = rb_entry(n, struct page, rb_page_cache);
-
-   if (offset < page->offset)
-   n = n->rb_left;
-   else if (offset > page->offset)
-   n = n->rb_right;
-   else
-   return page;
-   }
-   return NULL;
-}
-
-static inline struct page * __rb_insert_page_cache(struct inode * inode,
-  unsigned long offset,
-  struct rb_node * node)
-{
-   struct rb_node ** p = >i_rb_page_cache.rb_node;
-   struct rb_node * parent = NULL;
-   struct page * page;
-
-   while (*p)
-   {
-   parent = *p;
-   page = rb_entry(parent, struct page, rb_page_cache);
-
-   if (offset < page->offset)
-   p = &(*p)->rb_left;
-   else if (offset > page->offset)
-   p = &(*p)->rb_right;
-   else
-   return page;
-   }
-
-   rb_link_node(node, parent, p);
-
-   return NULL;
-}
-
-static inline struct page * rb_insert_page_cache(struct inode * inode,
-unsigned long offset,
-struct rb_node * node)
-{
-   struct page * ret;
-   if ((ret = __rb_insert_page_cache(inode, offset, node)))
-   goto out;
-   rb_insert_color(node, >i_rb_page_cache);
- out:
-   return ret;
-}

+  See Documentation/rbtree.txt for documentation and samples.
 */
 
 #ifndef_LINUX_RBTREE_H
-- 
1.7.7.3

--
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Please read the FAQ at  http://www.tux.org/lkml/

[PATCH 03/13] rbtree: fix incorrect rbtree node insertion in fs/proc/proc_sysctl.c

2012-07-09 Thread Michel Lespinasse

The recently added code to use rbtrees in sysctl did not follow the proper
rbtree interface on insertion - it was calling rb_link_node() which
inserts a new node into the binary tree, but missed the call to
rb_insert_color() which properly balances the rbtree and establishes all
expected rbtree invariants.

I found out about this only because faulty commit also used rb_init_node(),
which I am removing within this patchset. But I think it's an easy mistake
to make, and it makes me wonder if we should change the rbtree API so that
insertions would be done with a single rb_insert() call (even if its
implementation could still inline the rb_link_node() part and call
a private __rb_insert_color function to do the rebalancing).

Signed-off-by: Michel Lespinasse 
---
 fs/proc/proc_sysctl.c |1 +
 1 files changed, 1 insertions(+), 0 deletions(-)

diff --git a/fs/proc/proc_sysctl.c b/fs/proc/proc_sysctl.c
index 33aea86..77602c1a 100644
--- a/fs/proc/proc_sysctl.c
+++ b/fs/proc/proc_sysctl.c
@@ -142,6 +142,7 @@ static int insert_entry(struct ctl_table_header *head, 
struct ctl_table *entry)
}
 
rb_link_node(node, parent, p);
+   rb_insert_color(node, >parent->root);
return 0;
 }
 
-- 
1.7.7.3

--
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Please read the FAQ at  http://www.tux.org/lkml/

[PATCH 05/13] rbtree: performance and correctness test

2012-07-09 Thread Michel Lespinasse

This small module helps measure the performance of rbtree insert and erase.

Additionally, we run a few correctness tests to check that the rbtrees have
all desired properties:
- contains the right number of nodes in the order desired,
- never two consecutive red nodes on any path,
- all paths to leaf nodes have the same number of black nodes,
- root node is black

Signed-off-by: Michel Lespinasse 
---
 tests/rbtree_test.c |  135 +++
 1 files changed, 135 insertions(+), 0 deletions(-)
 create mode 100644 tests/rbtree_test.c

diff --git a/tests/rbtree_test.c b/tests/rbtree_test.c
new file mode 100644
index 000..2e3944d
--- /dev/null
+++ b/tests/rbtree_test.c
@@ -0,0 +1,135 @@
+#include 
+#include 
+#include 
+#include 
+
+#define NODES   100
+#define PERF_LOOPS  10
+#define CHECK_LOOPS 100
+
+struct test_node {
+   struct rb_node rb;
+   u32 key;
+};
+
+static struct rb_root root = RB_ROOT;
+static struct test_node nodes[NODES];
+
+static struct rnd_state rnd;
+
+static void insert(struct test_node *node, struct rb_root *root)
+{
+   struct rb_node **new = >rb_node, *parent = NULL;
+
+   while (*new) {
+   parent = *new;
+   if (node->key < rb_entry(parent, struct test_node, rb)->key)
+   new = >rb_left;
+   else
+   new = >rb_right;
+   }
+
+   rb_link_node(>rb, parent, new);
+   rb_insert_color(>rb, root);
+}
+
+static inline void erase(struct test_node *node, struct rb_root *root)
+{
+   rb_erase(>rb, root);
+}
+
+static void init(void)
+{
+   int i;
+   for (i = 0; i < NODES; i++)
+   nodes[i].key = prandom32();
+}
+
+static bool is_red(struct rb_node *rb)
+{
+   return rb->rb_parent_color == (unsigned long)rb_parent(rb);
+}
+
+static int black_path_count(struct rb_node *rb)
+{
+   int count;
+   for (count = 0; rb; rb = rb_parent(rb))
+   count += !is_red(rb);
+   return count;
+}
+
+static void check(int nr_nodes)
+{
+   struct rb_node *rb;
+   int count = 0;
+   int blacks;
+   u32 prev_key = 0;
+
+   for (rb = rb_first(); rb; rb = rb_next(rb)) {
+   struct test_node *node = rb_entry(rb, struct test_node, rb);
+   WARN_ON_ONCE(node->key < prev_key);
+   WARN_ON_ONCE(is_red(rb) &&
+(!rb_parent(rb) || is_red(rb_parent(rb;
+   if (!count)
+   blacks = black_path_count(rb);
+   else
+   WARN_ON_ONCE((!rb->rb_left || !rb->rb_right) &&
+blacks != black_path_count(rb));
+   prev_key = node->key;
+   count++;
+   }
+   WARN_ON_ONCE(count != nr_nodes);
+}
+
+static int rbtree_test_init(void)
+{
+   int i, j;
+   cycles_t time1, time2, time;
+
+   printk(KERN_ALERT "rbtree testing");
+
+   prandom32_seed(, 3141592653589793238);
+   init();
+
+   time1 = get_cycles();
+
+   for (i = 0; i < PERF_LOOPS; i++) {
+   for (j = 0; j < NODES; j++)
+   insert(nodes + j, );
+   for (j = 0; j < NODES; j++)
+   erase(nodes + j, );
+   }
+
+   time2 = get_cycles();
+   time = time2 - time1;
+
+   time = div_u64(time, PERF_LOOPS);
+   printk(" -> %llu cycles\n", time);
+
+   for (i = 0; i < CHECK_LOOPS; i++) {
+   init();
+   for (j = 0; j < NODES; j++) {
+   check(j);
+   insert(nodes + j, );
+   }
+   for (j = 0; j < NODES; j++) {
+   check(NODES - j);
+   erase(nodes + j, );
+   }
+   check(0);
+   }
+
+   return -EAGAIN; /* Fail will directly unload the module */
+}
+
+static void rbtree_test_exit(void)
+{
+   printk(KERN_ALERT "test exit\n");
+}
+
+module_init(rbtree_test_init)
+module_exit(rbtree_test_exit)
+
+MODULE_LICENSE("GPL");
+MODULE_AUTHOR("Michel Lespinasse");
+MODULE_DESCRIPTION("Red Black Tree test");
-- 
1.7.7.3

--
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Please read the FAQ at  http://www.tux.org/lkml/

[PATCH 07/13] rbtree: adjust root color in rb_insert_color() only when necessary

2012-07-09 Thread Michel Lespinasse

The root node of an rbtree must always be black. However, rb_insert_color()
only needs to maintain this invariant when it has been broken - that is,
when it exits the loop due to the current (red) node being the root.
In all other cases (exiting after tree rotations, or exiting due to
an existing black parent) the invariant is already satisfied, so there
is no need to adjust the root node color.

Signed-off-by: Michel Lespinasse 
---
 lib/rbtree.c |   19 +++
 1 files changed, 15 insertions(+), 4 deletions(-)

diff --git a/lib/rbtree.c b/lib/rbtree.c
index 19bee6c..0d9d184 100644
--- a/lib/rbtree.c
+++ b/lib/rbtree.c
@@ -91,8 +91,21 @@ void rb_insert_color(struct rb_node *node, struct rb_root 
*root)
 {
struct rb_node *parent, *gparent;
 
-   while ((parent = rb_parent(node)) && rb_is_red(parent))
-   {
+   while (true) {
+   /*
+* Loop invariant: node is red
+*
+* If there is a black parent, we are done.
+* Otherwise, take some corrective action as we don't
+* want a red root or two consecutive red nodes.
+*/
+   parent = rb_parent(node);
+   if (!parent) {
+   rb_set_black(node);
+   break;
+   } else if (rb_is_black(parent))
+   break;
+
gparent = rb_parent(parent);
 
if (parent == gparent->rb_left)
@@ -142,8 +155,6 @@ void rb_insert_color(struct rb_node *node, struct rb_root 
*root)
break;
}
}
-
-   rb_set_black(root->rb_node);
 }
 EXPORT_SYMBOL(rb_insert_color);
 
-- 
1.7.7.3

--
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Please read the FAQ at  http://www.tux.org/lkml/

[PATCH 09/13] rbtree: optimize color flips and parent fetching in rb_insert_color()

2012-07-09 Thread Michel Lespinasse

- Use the newly introduced rb_set_parent_color() function to flip the color
  of nodes whose parent is already known.
- Optimize rb_parent() when the node is known to be red - there is no need
  to mask out the color in that case.
- Flipping gparent's color to red requires us to fetch its rb_parent_color
  field, so we can reuse it as the parent value for the next loop iteration.

Signed-off-by: Michel Lespinasse 
---
 lib/rbtree.c |   26 --
 1 files changed, 16 insertions(+), 10 deletions(-)

diff --git a/lib/rbtree.c b/lib/rbtree.c
index f668886..56369d8 100644
--- a/lib/rbtree.c
+++ b/lib/rbtree.c
@@ -47,6 +47,11 @@ static inline void rb_set_parent_color(struct rb_node *rb,
rb->rb_parent_color = (unsigned long)p | color;
 }
 
+static inline struct rb_node *rb_red_parent(struct rb_node *red)
+{
+   return (struct rb_node *)red->rb_parent_color;
+}
+
 static void __rb_rotate_left(struct rb_node *node, struct rb_root *root)
 {
struct rb_node *right = node->rb_right;
@@ -116,7 +121,7 @@ __rb_rotate_set_parents(struct rb_node *old, struct rb_node 
*new,
 
 void rb_insert_color(struct rb_node *node, struct rb_root *root)
 {
-   struct rb_node *parent, *gparent, *tmp;
+   struct rb_node *parent = rb_red_parent(node), *gparent, *tmp;
 
while (true) {
/*
@@ -126,23 +131,23 @@ void rb_insert_color(struct rb_node *node, struct rb_root 
*root)
 * Otherwise, take some corrective action as we don't
 * want a red root or two consecutive red nodes.
 */
-   parent = rb_parent(node);
if (!parent) {
-   rb_set_black(node);
+   rb_set_parent_color(node, NULL, RB_BLACK);
break;
} else if (rb_is_black(parent))
break;
 
-   gparent = rb_parent(parent);
+   gparent = rb_red_parent(parent);
 
if (parent == gparent->rb_left) {
tmp = gparent->rb_right;
if (tmp && rb_is_red(tmp)) {
/* Case 1 - color flips */
-   rb_set_black(tmp);
-   rb_set_black(parent);
-   rb_set_red(gparent);
+   rb_set_parent_color(tmp, gparent, RB_BLACK);
+   rb_set_parent_color(parent, gparent, RB_BLACK);
node = gparent;
+   parent = rb_parent(node);
+   rb_set_parent_color(node, parent, RB_RED);
continue;
}
 
@@ -168,10 +173,11 @@ void rb_insert_color(struct rb_node *node, struct rb_root 
*root)
tmp = gparent->rb_left;
if (tmp && rb_is_red(tmp)) {
/* Case 1 - color flips */
-   rb_set_black(tmp);
-   rb_set_black(parent);
-   rb_set_red(gparent);
+   rb_set_parent_color(tmp, gparent, RB_BLACK);
+   rb_set_parent_color(parent, gparent, RB_BLACK);
node = gparent;
+   parent = rb_parent(node);
+   rb_set_parent_color(node, parent, RB_RED);
continue;
}
 
-- 
1.7.7.3

--
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Please read the FAQ at  http://www.tux.org/lkml/

[PATCH 04/13] rbtree: move some implementation details from rbtree.h to rbtree.c

2012-07-09 Thread Michel Lespinasse

rbtree users must use the documented APIs to manipulate the tree
structure.  Low-level helpers to manipulate node colors and parenthood
are not part of that API, so move them to lib/rbtree.c

Signed-off-by: Michel Lespinasse 
---
 include/linux/rbtree.h |   16 
 lib/rbtree.c   |   18 ++
 2 files changed, 18 insertions(+), 16 deletions(-)

diff --git a/include/linux/rbtree.h b/include/linux/rbtree.h
index 2049087..a06c044 100644
--- a/include/linux/rbtree.h
+++ b/include/linux/rbtree.h
@@ -35,8 +35,6 @@
 struct rb_node
 {
unsigned long  rb_parent_color;
-#defineRB_RED  0
-#defineRB_BLACK1
struct rb_node *rb_right;
struct rb_node *rb_left;
 } __attribute__((aligned(sizeof(long;
@@ -49,20 +47,6 @@ struct rb_root
 
 
 #define rb_parent(r)   ((struct rb_node *)((r)->rb_parent_color & ~3))
-#define rb_color(r)   ((r)->rb_parent_color & 1)
-#define rb_is_red(r)   (!rb_color(r))
-#define rb_is_black(r) rb_color(r)
-#define rb_set_red(r)  do { (r)->rb_parent_color &= ~1; } while (0)
-#define rb_set_black(r)  do { (r)->rb_parent_color |= 1; } while (0)
-
-static inline void rb_set_parent(struct rb_node *rb, struct rb_node *p)
-{
-   rb->rb_parent_color = (rb->rb_parent_color & 3) | (unsigned long)p;
-}
-static inline void rb_set_color(struct rb_node *rb, int color)
-{
-   rb->rb_parent_color = (rb->rb_parent_color & ~1) | color;
-}
 
 #define RB_ROOT(struct rb_root) { NULL, }
 #definerb_entry(ptr, type, member) container_of(ptr, type, member)
diff --git a/lib/rbtree.c b/lib/rbtree.c
index fe43c8c..d0ec339 100644
--- a/lib/rbtree.c
+++ b/lib/rbtree.c
@@ -23,6 +23,24 @@
 #include 
 #include 
 
+#defineRB_RED  0
+#defineRB_BLACK1
+
+#define rb_color(r)   ((r)->rb_parent_color & 1)
+#define rb_is_red(r)   (!rb_color(r))
+#define rb_is_black(r) rb_color(r)
+#define rb_set_red(r)  do { (r)->rb_parent_color &= ~1; } while (0)
+#define rb_set_black(r)  do { (r)->rb_parent_color |= 1; } while (0)
+
+static inline void rb_set_parent(struct rb_node *rb, struct rb_node *p)
+{
+   rb->rb_parent_color = (rb->rb_parent_color & 3) | (unsigned long)p;
+}
+static inline void rb_set_color(struct rb_node *rb, int color)
+{
+   rb->rb_parent_color = (rb->rb_parent_color & ~1) | color;
+}
+
 static void __rb_rotate_left(struct rb_node *node, struct rb_root *root)
 {
struct rb_node *right = node->rb_right;
-- 
1.7.7.3

--
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Please read the FAQ at  http://www.tux.org/lkml/

[PATCH 02/13] rbtree: empty nodes have no color

2012-07-09 Thread Michel Lespinasse

Empty nodes have no color.  We can make use of this property to
simplify the code emitted by the RB_EMPTY_NODE and RB_CLEAR_NODE
macros.  Also, we can get rid of the rb_init_node function which had
been introduced by commit 88d19cf37952a7e1e38b2bf87a00f0e857e63180
to avoid some issue with the empty node's color not being initialized.

I'm not sure what the RB_EMPTY_NODE checks in rb_prev() / rb_next()
are doing there, though. axboe introduced them in commit 10fd48f2376d.
The way I see it, the 'empty node' abstraction is only used by rbtree
users to flag nodes that they haven't inserted in any rbtree, so asking
the predecessor or successor of such nodes doesn't make any sense.

One final rb_init_node() caller was recently added in sysctl code
to implement faster sysctl name lookups. This code doesn't make use
of RB_EMPTY_NODE at all, and from what I could see it only called
rb_init_node() under the mistaken assumption that such initialization
was required before node insertion.

Signed-off-by: Michel Lespinasse 
---
 fs/proc/proc_sysctl.c  |4 +---
 include/linux/rbtree.h |   15 +--
 include/linux/timerqueue.h |2 +-
 lib/rbtree.c   |4 ++--
 4 files changed, 9 insertions(+), 16 deletions(-)

diff --git a/fs/proc/proc_sysctl.c b/fs/proc/proc_sysctl.c
index 21d836f..33aea86 100644
--- a/fs/proc/proc_sysctl.c
+++ b/fs/proc/proc_sysctl.c
@@ -168,10 +168,8 @@ static void init_header(struct ctl_table_header *head,
head->node = node;
if (node) {
struct ctl_table *entry;
-   for (entry = table; entry->procname; entry++, node++) {
-   rb_init_node(>node);
+   for (entry = table; entry->procname; entry++, node++)
node->header = head;
-   }
}
 }
 
diff --git a/include/linux/rbtree.h b/include/linux/rbtree.h
index e6a8077..2049087 100644
--- a/include/linux/rbtree.h
+++ b/include/linux/rbtree.h
@@ -67,17 +67,12 @@ static inline void rb_set_color(struct rb_node *rb, int 
color)
 #define RB_ROOT(struct rb_root) { NULL, }
 #definerb_entry(ptr, type, member) container_of(ptr, type, member)
 
-#define RB_EMPTY_ROOT(root)((root)->rb_node == NULL)
-#define RB_EMPTY_NODE(node)(rb_parent(node) == node)
-#define RB_CLEAR_NODE(node)(rb_set_parent(node, node))
+#define RB_EMPTY_ROOT(root)  ((root)->rb_node == NULL)
+
+/* 'empty' nodes are nodes that are known not to be inserted in an rbree */
+#define RB_EMPTY_NODE(node)  ((node)->rb_parent_color == (unsigned long)(node))
+#define RB_CLEAR_NODE(node)  ((node)->rb_parent_color = (unsigned long)(node))
 
-static inline void rb_init_node(struct rb_node *rb)
-{
-   rb->rb_parent_color = 0;
-   rb->rb_right = NULL;
-   rb->rb_left = NULL;
-   RB_CLEAR_NODE(rb);
-}
 
 extern void rb_insert_color(struct rb_node *, struct rb_root *);
 extern void rb_erase(struct rb_node *, struct rb_root *);
diff --git a/include/linux/timerqueue.h b/include/linux/timerqueue.h
index 5088727..a520fd7 100644
--- a/include/linux/timerqueue.h
+++ b/include/linux/timerqueue.h
@@ -39,7 +39,7 @@ struct timerqueue_node *timerqueue_getnext(struct 
timerqueue_head *head)
 
 static inline void timerqueue_init(struct timerqueue_node *node)
 {
-   rb_init_node(>node);
+   RB_CLEAR_NODE(>node);
 }
 
 static inline void timerqueue_init_head(struct timerqueue_head *head)
diff --git a/lib/rbtree.c b/lib/rbtree.c
index d417556..fe43c8c 100644
--- a/lib/rbtree.c
+++ b/lib/rbtree.c
@@ -387,7 +387,7 @@ struct rb_node *rb_next(const struct rb_node *node)
 {
struct rb_node *parent;
 
-   if (rb_parent(node) == node)
+   if (RB_EMPTY_NODE(node))
return NULL;
 
/* If we have a right-hand child, go down and then left as far
@@ -416,7 +416,7 @@ struct rb_node *rb_prev(const struct rb_node *node)
 {
struct rb_node *parent;
 
-   if (rb_parent(node) == node)
+   if (RB_EMPTY_NODE(node))
return NULL;
 
/* If we have a left-hand child, go down and then right as far
-- 
1.7.7.3

--
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Please read the FAQ at  http://www.tux.org/lkml/

[PATCH 12/13] rbtree: optimize tree rotations in __rb_erase_color()

2012-07-09 Thread Michel Lespinasse

In __rb_erase_color(), we often already have pointers to the nodes
being rotated and/or know what their colors must be, so we can
generate more efficient code than the generic __rb_rotate_left()
and __rb_rotate_right() functions.

Signed-off-by: Michel Lespinasse 
---
 lib/rbtree.c |  154 -
 1 files changed, 65 insertions(+), 89 deletions(-)

diff --git a/lib/rbtree.c b/lib/rbtree.c
index 597c1b9..c956248 100644
--- a/lib/rbtree.c
+++ b/lib/rbtree.c
@@ -36,10 +36,6 @@ static inline void rb_set_parent(struct rb_node *rb, struct 
rb_node *p)
 {
rb->rb_parent_color = (rb->rb_parent_color & 3) | (unsigned long)p;
 }
-static inline void rb_set_color(struct rb_node *rb, int color)
-{
-   rb->rb_parent_color = (rb->rb_parent_color & ~1) | color;
-}
 
 static inline void rb_set_parent_color(struct rb_node *rb,
   struct rb_node *p, int color)
@@ -52,52 +48,6 @@ static inline struct rb_node *rb_red_parent(struct rb_node 
*red)
return (struct rb_node *)red->rb_parent_color;
 }
 
-static void __rb_rotate_left(struct rb_node *node, struct rb_root *root)
-{
-   struct rb_node *right = node->rb_right;
-   struct rb_node *parent = rb_parent(node);
-
-   if ((node->rb_right = right->rb_left))
-   rb_set_parent(right->rb_left, node);
-   right->rb_left = node;
-
-   rb_set_parent(right, parent);
-
-   if (parent)
-   {
-   if (node == parent->rb_left)
-   parent->rb_left = right;
-   else
-   parent->rb_right = right;
-   }
-   else
-   root->rb_node = right;
-   rb_set_parent(node, right);
-}
-
-static void __rb_rotate_right(struct rb_node *node, struct rb_root *root)
-{
-   struct rb_node *left = node->rb_left;
-   struct rb_node *parent = rb_parent(node);
-
-   if ((node->rb_left = left->rb_right))
-   rb_set_parent(left->rb_right, node);
-   left->rb_right = node;
-
-   rb_set_parent(left, parent);
-
-   if (parent)
-   {
-   if (node == parent->rb_right)
-   parent->rb_right = left;
-   else
-   parent->rb_left = left;
-   }
-   else
-   root->rb_node = left;
-   rb_set_parent(node, left);
-}
-
 /*
  * Helper function for rotations:
  * - old's parent and color get assigned to new
@@ -207,7 +157,7 @@ EXPORT_SYMBOL(rb_insert_color);
 static void __rb_erase_color(struct rb_node *node, struct rb_node *parent,
 struct rb_root *root)
 {
-   struct rb_node *other;
+   struct rb_node *sibling, *tmp1, *tmp2;
 
while (true) {
/*
@@ -225,58 +175,84 @@ static void __rb_erase_color(struct rb_node *node, struct 
rb_node *parent,
} else if (!parent) {
break;
} else if (parent->rb_left == node) {
-   other = parent->rb_right;
-   if (rb_is_red(other))
-   {
-   rb_set_black(other);
-   rb_set_red(parent);
-   __rb_rotate_left(parent, root);
-   other = parent->rb_right;
+   sibling = parent->rb_right;
+   if (rb_is_red(sibling)) {
+   /* Case 1 - left rotate at parent */
+   parent->rb_right = tmp1 = sibling->rb_left;
+   sibling->rb_left = parent;
+   rb_set_parent_color(tmp1, parent, RB_BLACK);
+   __rb_rotate_set_parents(parent, sibling, root,
+   RB_RED);
+   sibling = tmp1;
}
-   if (!other->rb_right || rb_is_black(other->rb_right)) {
-   if (!other->rb_left ||
-   rb_is_black(other->rb_left)) {
-   rb_set_red(other);
+   tmp1 = sibling->rb_right;
+   if (!tmp1 || rb_is_black(tmp1)) {
+   tmp2 = sibling->rb_left;
+   if (!tmp2 || rb_is_black(tmp2)) {
+   /* Case 2 - sibling color flip */
+   rb_set_red(sibling);
node = parent;
parent = rb_parent(node);
continue;
}
-   rb_set_black(other->rb_left);
-   rb_set_red(other);
-   __rb_rotate_right(other, root);
-   other =

[PATCH 13/13] rbtree: optimize color flips in __rb_erase_color()

2012-07-09 Thread Michel Lespinasse

In __rb_erase_color(), when the current node is red or when flipping
the sibling's color, the parent is already known so we can use the
more efficient rb_set_parent_color() function to set the desired color.

Signed-off-by: Michel Lespinasse 
---
 lib/rbtree.c |   10 +-
 1 files changed, 5 insertions(+), 5 deletions(-)

diff --git a/lib/rbtree.c b/lib/rbtree.c
index c956248..f8c1a75 100644
--- a/lib/rbtree.c
+++ b/lib/rbtree.c
@@ -29,8 +29,6 @@
 #define rb_color(r)   ((r)->rb_parent_color & 1)
 #define rb_is_red(r)   (!rb_color(r))
 #define rb_is_black(r) rb_color(r)
-#define rb_set_red(r)  do { (r)->rb_parent_color &= ~1; } while (0)
-#define rb_set_black(r)  do { (r)->rb_parent_color |= 1; } while (0)
 
 static inline void rb_set_parent(struct rb_node *rb, struct rb_node *p)
 {
@@ -170,7 +168,7 @@ static void __rb_erase_color(struct rb_node *node, struct 
rb_node *parent,
 * and tree rotations as per one of the 4 cases below.
 */
if (node && rb_is_red(node)) {
-   rb_set_black(node);
+   rb_set_parent_color(node, parent, RB_BLACK);
break;
} else if (!parent) {
break;
@@ -190,7 +188,8 @@ static void __rb_erase_color(struct rb_node *node, struct 
rb_node *parent,
tmp2 = sibling->rb_left;
if (!tmp2 || rb_is_black(tmp2)) {
/* Case 2 - sibling color flip */
-   rb_set_red(sibling);
+   rb_set_parent_color(sibling, parent,
+   RB_RED);
node = parent;
parent = rb_parent(node);
continue;
@@ -230,7 +229,8 @@ static void __rb_erase_color(struct rb_node *node, struct 
rb_node *parent,
tmp2 = sibling->rb_right;
if (!tmp2 || rb_is_black(tmp2)) {
/* Case 2 - sibling color flip */
-   rb_set_red(sibling);
+   rb_set_parent_color(sibling, parent,
+   RB_RED);
node = parent;
parent = rb_parent(node);
continue;
-- 
1.7.7.3

--
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Please read the FAQ at  http://www.tux.org/lkml/

[PATCH 11/13] rbtree: optimize case selection logic in __rb_erase_color()

2012-07-09 Thread Michel Lespinasse

In __rb_erase_color(), we have to select one of 3 cases depending on the
color on the 'other' node children. If both children are black, we flip
a few node colors and iterate. Otherwise, we do either one or two
tree rotations, depending on the color of the 'other' child opposite
to 'node', and then we are done.

The corresponding logic had duplicate checks for the color of the 'other'
child opposite to 'node'. It was checking it first to determine if both
children are black, and then to determine how many tree rotations are
required. Rearrange the logic to avoid that extra check.

Signed-off-by: Michel Lespinasse 
---
 lib/rbtree.c |   68 +
 1 files changed, 30 insertions(+), 38 deletions(-)

diff --git a/lib/rbtree.c b/lib/rbtree.c
index 44cbbd5..597c1b9 100644
--- a/lib/rbtree.c
+++ b/lib/rbtree.c
@@ -233,28 +233,24 @@ static void __rb_erase_color(struct rb_node *node, struct 
rb_node *parent,
__rb_rotate_left(parent, root);
other = parent->rb_right;
}
-   if ((!other->rb_left || rb_is_black(other->rb_left)) &&
-   (!other->rb_right || rb_is_black(other->rb_right)))
-   {
-   rb_set_red(other);
-   node = parent;
-   parent = rb_parent(node);
-   }
-   else
-   {
-   if (!other->rb_right || 
rb_is_black(other->rb_right))
-   {
-   rb_set_black(other->rb_left);
+   if (!other->rb_right || rb_is_black(other->rb_right)) {
+   if (!other->rb_left ||
+   rb_is_black(other->rb_left)) {
rb_set_red(other);
-   __rb_rotate_right(other, root);
-   other = parent->rb_right;
+   node = parent;
+   parent = rb_parent(node);
+   continue;
}
-   rb_set_color(other, rb_color(parent));
-   rb_set_black(parent);
-   rb_set_black(other->rb_right);
-   __rb_rotate_left(parent, root);
-   break;
+   rb_set_black(other->rb_left);
+   rb_set_red(other);
+   __rb_rotate_right(other, root);
+   other = parent->rb_right;
}
+   rb_set_color(other, rb_color(parent));
+   rb_set_black(parent);
+   rb_set_black(other->rb_right);
+   __rb_rotate_left(parent, root);
+   break;
} else {
other = parent->rb_left;
if (rb_is_red(other))
@@ -264,28 +260,24 @@ static void __rb_erase_color(struct rb_node *node, struct 
rb_node *parent,
__rb_rotate_right(parent, root);
other = parent->rb_left;
}
-   if ((!other->rb_left || rb_is_black(other->rb_left)) &&
-   (!other->rb_right || rb_is_black(other->rb_right)))
-   {
-   rb_set_red(other);
-   node = parent;
-   parent = rb_parent(node);
-   }
-   else
-   {
-   if (!other->rb_left || 
rb_is_black(other->rb_left))
-   {
-   rb_set_black(other->rb_right);
+   if (!other->rb_left || rb_is_black(other->rb_left)) {
+   if (!other->rb_right ||
+   rb_is_black(other->rb_right)) {
rb_set_red(other);
-   __rb_rotate_left(other, root);
-   other = parent->rb_left;
+   node = parent;
+   parent = rb_parent(node);
+   continue;
}
-   rb_set_color(other, rb_color(parent));
-   rb_set_black(parent);
-   rb_set_black(other->rb_left);
-   __rb_rotate_right(parent, root);
-

[PATCH 08/13] rbtree: optimize tree rotations in rb_insert_color()

2012-07-09 Thread Michel Lespinasse

In rb_insert_color(), we can do better than calling __rb_rotate_left()
and __rb_rotate_right() to handle tree rotations: we already have
pointers to all relevant nodes, and know their colors (either because
we want to adjust it, or because we've tested it, or we can deduce it
as black due to the node proximity to a known red node). So we can
generate more efficient code by making use of the node pointers
we already have, and setting both the parent and color attributes for
nodes all at once. Also in case 2, some node attributes don't have to
be set because we know another tree rotation (case 3) will always follow
and override them.

Signed-off-by: Michel Lespinasse 
---
 lib/rbtree.c |  102 -
 1 files changed, 71 insertions(+), 31 deletions(-)

diff --git a/lib/rbtree.c b/lib/rbtree.c
index 0d9d184..f668886 100644
--- a/lib/rbtree.c
+++ b/lib/rbtree.c
@@ -41,6 +41,12 @@ static inline void rb_set_color(struct rb_node *rb, int 
color)
rb->rb_parent_color = (rb->rb_parent_color & ~1) | color;
 }
 
+static inline void rb_set_parent_color(struct rb_node *rb,
+  struct rb_node *p, int color)
+{
+   rb->rb_parent_color = (unsigned long)p | color;
+}
+
 static void __rb_rotate_left(struct rb_node *node, struct rb_root *root)
 {
struct rb_node *right = node->rb_right;
@@ -87,9 +93,30 @@ static void __rb_rotate_right(struct rb_node *node, struct 
rb_root *root)
rb_set_parent(node, left);
 }
 
+/*
+ * Helper function for rotations:
+ * - old's parent and color get assigned to new
+ * - old gets assigned new as a parent and 'color' as a color.
+ */
+static inline void
+__rb_rotate_set_parents(struct rb_node *old, struct rb_node *new,
+   struct rb_root *root, int color)
+{
+   struct rb_node *parent = rb_parent(old);
+   new->rb_parent_color = old->rb_parent_color;
+   rb_set_parent_color(old, new, color);
+   if (parent) {
+   if (parent->rb_left == old)
+   parent->rb_left = new;
+   else
+   parent->rb_right = new;
+   } else
+   root->rb_node = new;
+}
+
 void rb_insert_color(struct rb_node *node, struct rb_root *root)
 {
-   struct rb_node *parent, *gparent;
+   struct rb_node *parent, *gparent, *tmp;
 
while (true) {
/*
@@ -108,50 +135,63 @@ void rb_insert_color(struct rb_node *node, struct rb_root 
*root)
 
gparent = rb_parent(parent);
 
-   if (parent == gparent->rb_left)
-   {
-   {
-   register struct rb_node *uncle = 
gparent->rb_right;
-   if (uncle && rb_is_red(uncle))
-   {
-   rb_set_black(uncle);
-   rb_set_black(parent);
-   rb_set_red(gparent);
-   node = gparent;
-   continue;
-   }
+   if (parent == gparent->rb_left) {
+   tmp = gparent->rb_right;
+   if (tmp && rb_is_red(tmp)) {
+   /* Case 1 - color flips */
+   rb_set_black(tmp);
+   rb_set_black(parent);
+   rb_set_red(gparent);
+   node = gparent;
+   continue;
}
 
if (parent->rb_right == node) {
-   __rb_rotate_left(parent, root);
+   /* Case 2 - left rotate at parent */
+   parent->rb_right = tmp = node->rb_left;
+   node->rb_left = parent;
+   if (tmp)
+   rb_set_parent_color(tmp, parent,
+   RB_BLACK);
+   rb_set_parent_color(parent, node, RB_RED);
parent = node;
}
 
-   rb_set_black(parent);
-   rb_set_red(gparent);
-   __rb_rotate_right(gparent, root);
+   /* Case 3 - right rotate at gparent */
+   gparent->rb_left = tmp = parent->rb_right;
+   parent->rb_right = gparent;
+   if (tmp)
+   rb_set_parent_color(tmp, gparent, RB_BLACK);
+   __rb_rotate_set_parents(gparent, parent, root, RB_RED);
break;
} else {
-   {
-   register struct rb_node *uncle = 
gparent->rb_left;
-

[PATCH 10/13] rbtree: adjust node color in __rb_erase_color() only when necessary

2012-07-09 Thread Michel Lespinasse

In __rb_erase_color(), we were always setting a node to black after
exiting the main loop. And in one case, after fixing up the tree to
satisfy all rbtree invariants, we were setting the current node to root
just to guarantee a loop exit, at which point the root would be set to
black. However this is not necessary, as the root of an rbtree is already
known to be black. The only case where the color flip is required is when
we exit the loop due to the current node being red, and it's easiest to
just do the flip at that point instead of doing it after the loop.

Signed-off-by: Michel Lespinasse 
---
 lib/rbtree.c |   28 +---
 1 files changed, 17 insertions(+), 11 deletions(-)

diff --git a/lib/rbtree.c b/lib/rbtree.c
index 56369d8..44cbbd5 100644
--- a/lib/rbtree.c
+++ b/lib/rbtree.c
@@ -209,10 +209,22 @@ static void __rb_erase_color(struct rb_node *node, struct 
rb_node *parent,
 {
struct rb_node *other;
 
-   while ((!node || rb_is_black(node)) && node != root->rb_node)
-   {
-   if (parent->rb_left == node)
-   {
+   while (true) {
+   /*
+* Loop invariant: all leaf paths going through node have a
+* black node count that is 1 lower than other leaf paths.
+*
+* If node is red, we can flip it to black to adjust.
+* If node is the root, all leaf paths go through it.
+* Otherwise, we need to adjust the tree through color flips
+* and tree rotations as per one of the 4 cases below.
+*/
+   if (node && rb_is_red(node)) {
+   rb_set_black(node);
+   break;
+   } else if (!parent) {
+   break;
+   } else if (parent->rb_left == node) {
other = parent->rb_right;
if (rb_is_red(other))
{
@@ -241,12 +253,9 @@ static void __rb_erase_color(struct rb_node *node, struct 
rb_node *parent,
rb_set_black(parent);
rb_set_black(other->rb_right);
__rb_rotate_left(parent, root);
-   node = root->rb_node;
break;
}
-   }
-   else
-   {
+   } else {
other = parent->rb_left;
if (rb_is_red(other))
{
@@ -275,13 +284,10 @@ static void __rb_erase_color(struct rb_node *node, struct 
rb_node *parent,
rb_set_black(parent);
rb_set_black(other->rb_left);
__rb_rotate_right(parent, root);
-   node = root->rb_node;
break;
}
}
}
-   if (node)
-   rb_set_black(node);
 }
 
 void rb_erase(struct rb_node *node, struct rb_root *root)
-- 
1.7.7.3

--
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Please read the FAQ at  http://www.tux.org/lkml/

[PATCH 06/13] rbtree: break out of rb_insert_color loop after tree rotation

2012-07-09 Thread Michel Lespinasse

It is a well known property of rbtrees that insertion never requires
more than two tree rotations.  In our implementation, after one loop
iteration identified one or two necessary tree rotations, we would iterate
and look for more.  However at that point the node's parent would always
be black, which would cause us to exit the loop.

We can make the code flow more obvious by just adding a break statement
after the tree rotations, where we know we are done.  Additionally, in the
cases where two tree rotations are necessary, we don't have to update the
'node' pointer as it wouldn't be used until the next loop iteration, which
we now avoid due to this break statement.

Signed-off-by: Michel Lespinasse 
---
 lib/rbtree.c |   14 --
 1 files changed, 4 insertions(+), 10 deletions(-)

diff --git a/lib/rbtree.c b/lib/rbtree.c
index d0ec339..19bee6c 100644
--- a/lib/rbtree.c
+++ b/lib/rbtree.c
@@ -109,18 +109,15 @@ void rb_insert_color(struct rb_node *node, struct rb_root 
*root)
}
}
 
-   if (parent->rb_right == node)
-   {
-   register struct rb_node *tmp;
+   if (parent->rb_right == node) {
__rb_rotate_left(parent, root);
-   tmp = parent;
parent = node;
-   node = tmp;
}
 
rb_set_black(parent);
rb_set_red(gparent);
__rb_rotate_right(gparent, root);
+   break;
} else {
{
register struct rb_node *uncle = 
gparent->rb_left;
@@ -134,18 +131,15 @@ void rb_insert_color(struct rb_node *node, struct rb_root 
*root)
}
}
 
-   if (parent->rb_left == node)
-   {
-   register struct rb_node *tmp;
+   if (parent->rb_left == node) {
__rb_rotate_right(parent, root);
-   tmp = parent;
parent = node;
-   node = tmp;
}
 
rb_set_black(parent);
rb_set_red(gparent);
__rb_rotate_left(gparent, root);
+   break;
}
}
 
-- 
1.7.7.3

--
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Please read the FAQ at  http://www.tux.org/lkml/

[PATCH 00/13] rbtree updates

2012-07-09 Thread Michel Lespinasse


I recently started looking at the rbtree code (with an eye towards
improving the augmented rbtree support, but I haven't gotten there
yet). I noticed a lot of possible speed improvements, which I am now
proposing in this patch set.

Patches 1-4 are preparatory: remove internal functions from rbtree.h
so that users won't be tempted to use them instead of the documented
APIs, clean up some incorrect usages I've noticed (in particular, with
the recently added fs/proc/proc_sysctl.c rbtree usage), reference the
documentation so that people have one less excuse to miss it, etc.

Patch 5 is a small module I wrote to check the rbtree performance.
It creates 100 nodes with random keys and repeatedly inserts and erases
them from an rbtree. Additionally, it has code to check for rbtree
invariants after each insert or erase operation.

Patches 6-13 is where the rbtree optimizations are done, and they touch
only that one file, lib/rbtree.c . I am getting good results out of these -
in my small benchmark doing rbtree insertion (including search) and erase,
I'm seeing a 30% runtime reduction on Sandybridge E5, which is more than
I initially thought would be possible. (the results aren't as impressive
on my two other test hosts though, AMD barcelona and Intel Westmere, where
I am seeing 14% runtime reduction only). The code size - both source
(ommiting comments) and compiled - is also shorter after these changes.
However, I do admit that the updated code is more arduous to read - one
big reason for that is the removal of the tree rotation helpers, which
added some overhead but also made it easier to reason about things locally.
Overall, I believe this is an acceptable compromise, given that this code
doesn't get modified very often, and that I have good tests for it.

For those people who want to really understand the code, I can only
recommend keeping around a copy of the cormen/leiserson/rivest book, as
the original algorithm seems to be inspired by it and having the rbtrees
drawn up really helps.

This patchset is against v3.4 - I had actually done most of the development
against v3.3 but the rbtree code doesn't change very often so I didn't have
to update it much, save for dealing with the recent rbtree additions in
fs/proc/proc_sysctl.c

My proposal would be to use this as a base to add on the augmented rbtree
support enhancements, which I'd like to do next. Then this could all go in
-mm tree so that various augmented rbtree uses that have been discussed
(such as finding gaps between vmas) can use this.

Michel Lespinasse (13):
  rbtree: reference Documentation/rbtree.txt for usage instructions
  rbtree: empty nodes have no color
  rbtree: fix incorrect rbtree node insertion in fs/proc/proc_sysctl.c
  rbtree: move some implementation details from rbtree.h to rbtree.c
  rbtree: performance and correctness test
  rbtree: break out of rb_insert_color loop after tree rotation
  rbtree: adjust root color in rb_insert_color() only when necessary
  rbtree: optimize tree rotations in rb_insert_color()
  rbtree: optimize color flips and parent fetching in rb_insert_color()
  rbtree: adjust node color in __rb_erase_color() only when necessary
  rbtree: optimize case selection logic in __rb_erase_color()
  rbtree: optimize tree rotations in __rb_erase_color()
  rbtree: optimize color flips in __rb_erase_color()

 fs/proc/proc_sysctl.c  |5 +-
 include/linux/rbtree.h |   98 +
 include/linux/timerqueue.h |2 +-
 lib/rbtree.c   |  349 +---
 tests/rbtree_test.c|  135 +
 5 files changed, 340 insertions(+), 249 deletions(-)
 create mode 100644 tests/rbtree_test.c

-- 
1.7.7.3

--
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Please read the FAQ at  http://www.tux.org/lkml/

Re: [PATCH] [PATCH] asus_oled: Use pr_err(...) rather than printk(KERN_ERR ...)

2012-07-09 Thread yamanetoshi

No problem :-)

On Tue, Jul 10, 2012 at 4:33 AM, Greg KH  wrote:
> On Sun, Jul 08, 2012 at 06:25:47PM +0900, Toshiaki Yamane wrote:
>> This change is inspired by checkpatch.
>>
>> Signed-off-by: Toshiaki Yamane 
>> ---
>>  drivers/staging/asus_oled/asus_oled.c |6 +++---
>>  1 files changed, 3 insertions(+), 3 deletions(-)
>
> Sorry, someone beat you to this very change by a few hours :(
>
> greg k-h



-- 

Regards,


 .
  .
...

Yamane Toshiaki

yamaneto...@gmail.com
--
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Please read the FAQ at  http://www.tux.org/lkml/

Re: [PATCH v3] printk: Have printk() never buffer its data

2012-07-09 Thread Joe Perches

On Tue, 2012-07-10 at 00:40 +0200, Kay Sievers wrote:
> On Tue, Jul 10, 2012 at 12:29 AM, Joe Perches  wrote:
> > On Tue, 2012-07-10 at 00:10 +0200, Kay Sievers wrote:
> >> On Mon, Jul 9, 2012 at 11:42 PM, Joe Perches  wrote:
> >> > On Sun, 2012-07-08 at 19:55 +0200, Kay Sievers wrote:
> >> >
> >> >> At the same time the CPU#2 prints the same warning with a continuation
> >> >> line, but the buffer from CPU#1 can not be flushed to the console, nor
> >> >> can the continuation line printk()s from CPU#2 be merged at this point.
> >> >> The consoles are still locked and busy with replaying the old log
> >> >> messages, so the new continuation data is just stored away in the record
> >> >> buffer as it is coming in.
> >> >> If the console would be registered a bit earlier, or the warning would
> >> >> happen a bit later, we would probably not see any of this.
> >> >>
> >> >> I can fake something like this just by holding the console semaphore
> >> >> over a longer time and printing continuation lines with different CPUs
> >> >> in a row.
> >> >>
> >> >> The patch below seems to work for me. It is also here:
> >> >>   
> >> >> http://git.kernel.org/?p=linux/kernel/git/kay/patches.git;a=blob;f=kmsg-merge-cont.patch;hb=HEAD
> >> >>
> >> >> It only applies cleanly on top of this patch:
> >> >>   
> >> >> http://git.kernel.org/?p=linux/kernel/git/kay/patches.git;a=blob;f=kmsg-syslog-1-byte-read.patch;hb=HEAD
> >> >>
> >> >
> >> > Hi Kay.
> >> >
> >> > I just ran a test with what's in Greg's driver-core -for-linus branch.
> >> >
> >> > One of the differences in dmesg is timestamping of consecutive
> >> > pr_("foo...)
> >> > followed directly by
> >> > pr_cont("bar...")
> >> >
> >> > For instance: (dmesg is 3.4, dmesg.0 is 3.5-rc6+)
> >> >
> >> > # grep MAP /var/log/dm* -A1
> >> > dmesg:[0.781687] ata_piix :00:1f.2: MAP [ P0 P2 P1 P3 ]
> >> > dmesg-[0.781707] ata2: port disabled--ignoring
> >> > --
> >> > dmesg.0:[0.948881] ata_piix :00:1f.2: MAP [
> >> > dmesg.0-[0.948883]  P0 P2 P1 P3 ]
> >> >
> >> > These messages originate starting at
> >> > drivers/ata/ata_piix.c:1354
> >> >
> >> > All the continuations are emitted with pr_cont.
> >> >
> >> > I think this output should still be coalesced without
> >> > timestamp deltas.  Perhaps the timestamping code can
> >> > still be reworked to avoid too small a delta producing
> >> > a new timestamp and another dmesg line.
> >>
> >> Hmm, I don't see that.
> >>
> >> If I do:
> >>   pr_info("[");
> >>   for (i = 0; i < 4; i++)
> >>pr_cont("%i ", i);
> >>   pr_cont("]\n");
> >>
> >> I get:
> >>   6,173,0;[0 1 2 3 ]
> >>
> >> And if I fill the cont buffer and forcefully hold the console sem
> >> during all that, and we can't merge anymore, I get:
> >>   6,167,0;[
> >>   4,168,0;0
> >>   4,169,0;1
> >>   4,170,0;2
> >>   4,171,0;3
> >>   4,172,0;]
> >>
> >> But the output is still all fine for both lines:
> >>   [0.00] [0 1 2 3 ]
> >>   [0.00] [0 1 2 3 ]
> >>
> >> What do I miss?
> >
> > In this case the initial line is dev_info not pr_info
> > so there are the additional dict descriptors output to
> > /dev/kmsg as well.
> >
> > Maybe that interferes with continuations.  Dunno.
> 
> Yes, it does. Annotated records dev_printk() must be reliable in the
> data storage and for the consumers. We can not expose them to the racy
> continuation printk()s. We need to be able to trust the data they
> print and not possibly merge unrelated things into it.
> 
> If it's needed, we can try to set the flags accordingly, that they
> *look* like a line in the classic byte-stream output, but the
> interface in /dev/kmsg must not export them that way, because
> continuation lines can never be trusted to be correct.

Then you've changed semantics and I think you need to
fix it.

A dev_ call is not guaranteed to be a complete
message.

There are dev_ and netdev_ calls
followed by pr_cont.

Maybe these could be fixed up and then they could be
always integral.  There don't look to be too many.

This may be most (all?) of them:

 drivers/ata/ata_piix.c   |   16 +-
 drivers/media/rc/redrat3.c   |   36 -
 drivers/net/ethernet/broadcom/bnx2.c |   26 ++--
 3 files changed, 42 insertions(+), 36 deletions(-)

diff --git a/drivers/ata/ata_piix.c b/drivers/ata/ata_piix.c
index 3c809bf..f51962f 100644
--- a/drivers/ata/ata_piix.c
+++ b/drivers/ata/ata_piix.c
@@ -1346,38 +1346,42 @@ static const int *__devinit piix_init_sata_map(struct 
pci_dev *pdev,
const int *map;
int i, invalid_map = 0;
u8 map_value;
+   char maps[50] = {0};
+   int len = 0;
 
pci_read_config_byte(pdev, ICH5_PMR, _value);
 
map = map_db->map[map_value & map_db->mask];
 
-   dev_info(>dev, "MAP [");
for (i = 0; i < 4; i++) {
switch (map[i]) {
case RV:
invalid_map = 1;
-

Re: + checkpatch-add-check-for-use-of-sizeof-without-parenthesis.patch added to -mm tree

2012-07-09 Thread Joe Perches

On Mon, 2012-07-09 at 15:55 -0700, David Rientjes wrote:
> So, nack, don't start enforcing your own coding style and preferences in 
> checkpatch.pl.

Not just my opinion.

https://lkml.org/lkml/2008/12/23/138

Date: Tue, 23 Dec 2008 10:08:50 -0800 (PST)
From: Linus Torvalds <>
[]
Another example of this is "sizeof". The kernel universally (I hope) has 
parenthesis around the sizeof argument, even though it's clearly not 
required by the C language. 

It's a coding standard. 

cheers, Joe

--
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Please read the FAQ at  http://www.tux.org/lkml/

Re: [RESEND AGAIN][PATCH] pcmcia: move unbind/rebind into dev_pm_ops.complete

2012-07-09 Thread Christian Lamparter

(sorry, keyboard splat)

On Tuesday, July 10, 2012 01:01:31 AM Andrew Morton wrote:
> On Tue, 10 Jul 2012 00:54:54 +0200
> Christian Lamparter  wrote:
> > On Monday, July 09, 2012 11:59:39 PM Andrew Morton wrote:
> > > On Fri, 6 Jul 2012 14:30:16 -0700
> > > Greg KH  wrote:
> > > 
> > > > On Fri, Jul 06, 2012 at 11:23:52PM +0200, Christian Lamparter wrote:
> > > > > The idea of moving rebind procedure into pm.complete
> > > > > was taken from the usb-subsystem, which has similar
> > > > > problems with reattaching devices during/after
> > > > > resume.
> > > > > 
> > > > > Signed-off-by: Christian Lamparter 
> > > > > ---
> > > > > To Greg:
> > > > > 
> > > > > I have submitted this patch back in March and again in May.
> > > > > As far as I can tell it was neither rejected nor was it
> > > > > accepted into linux-pcmcia.git since. So I'm asking you,
> > > > > if you could take the patch instead... please.
> > > > 
> > > > There is a PCMCIA "team" who should be taking these types of patches.
> > > > Why are they not doing so?
> > > > 
> > > 
> > > Things are pretty quiet in pcmcia world, but Dominik does appear to
> > > still be doing stuff.
> > > 
> > > I sometimes queue PCMCIA patches for people, but not this one.  The
> > > changelog is just junk.  What does the patch do?  Why does it do it? 
> > > What problems does it solve?  What are these mysterious "problems with
> > > reattaching devices" to which it refers?  Useless...
> > > 
> > Well, that can be improved, but it is a bit tricky. 
> > AFAICT the usb subsystem dealt with pm in this commit:
> > 
> > "commit 5096aedcd2eb70fbea83f09281f97f9ec973d9de
> > Author: Alan Stern 
> > Date:   Tue Aug 12 14:34:14 2008 -0400
> > 
> > USB: Don't rebind before "complete" callback
> > 
> > [...] We are not allowed to call drivers' probe routines during 
> > a system sleep transition between the "prepare" and "complete"
> > callbacks, but that's exactly what we do when a driver doesn't
> > have full suspend/resume support. [...]"
> > 
> > And on the pcmcia subsystem we have this:
> > 
> > "commit 88b060d6c03fcb9e4d2018b4349954c4242a5c7f
> > Author: Dominik Brodowski 
> > Date:   Sat Jan 2 14:14:23 2010 +0100
> > 
> > pcmcia: improve check for same card in slot after resume
> > 
> > During a suspend/resume cycle, an user may change the card in the
> > PCMCIA/CardBus slot. [...]
> > 
> > For CardBus devices, the PCI hotplug interface doesn't offer a "rescan"
> > facility which also _removes_ devices no longer to be found behind a
> > bridge. Therefore, remove and re-add all devices unconditionally."
> > 
> > Unfortunately, the "re-add" is currently done in the *pm resume* callback
> > (socket_late_resume), but according to "USB: Don't rebind..." is not
> > allowed to have it there, so the patch moves it into the *pm complete*
> > callback. The Documentation/power/* contains mostly informations for
> > drivers developers, but AFAICT it doesn't say much about the subsystem
> > to which the device is connected should behave, so there's a bit of a
> > "citing-gap".
> 
> hm, it does seem a bit of a screwup.
> 
> What's unclear to me is whether your patch fixes any observed runtime
> problems.  Or adds any runtime problems, which looks to be a distinct
> possibility.

well, the patch for the pcmcia subsystem is part of a fix for this
bug-report: 
(Note: the firmware-core changes I talk about there have been fixed
by Rafael with his "firmware_class: Rework usermodehelper check"
series)

But that could very well be a straw-man argument. As I can't really
tell without the same HW. (I have mostly minipci hardware, and the
laptop I have with a cardbus doesn't really suspend reliably).

> > So, my question now: Would you accept the pcmcia patch if I add the
> > "USB: Don't rebind..." as a reference to why the re-add needs to be
> > done in complete? Or do you think that I should bug the pm people
> > (and Alan - since he wrote that it is "not allowed") in this case
> > so I can link their official answer to this patch?
> 
> Well I could grab it and give it a little bit of testing in linux-next.
> But I'd be super-reluctant to send such a patch upstream without
> detailed input from Alan/Greg/Rafael/Dominik/etc.
Alright, I've added them in the 'CC'.

To Alan:
Can you please tell me, if you (still) know the details why the "re-add"
is not allowed in _resume callback (see patch message & commit id from
above: USB: Don't rebind before "complete")?

Regards,
Chr

Good night!
--
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Please read the FAQ at  http://www.tux.org/lkml/

Re: [PATCH v2] fail dentry revalidation after namespace change

2012-07-09 Thread Andrew Morton

On Fri,  6 Jul 2012 13:09:07 +0400
Glauber Costa  wrote:

> When we change the namespace tag of a sysfs entry, the associated dentry
> is still kept around. readdir() will work correctly and not display the
> old entries, but open() will still succeed, so will reads and writes.
> 
> This will no longer happen if sysfs is remounted, hinting that this is a
> cache-related problem.
> 
> I am using the following sequence to demonstrate that:
> 
> shell1:
> ip link add type veth
> unshare -nm
> 
> shell2:
> ip link set veth1 
> cat /sys/devices/virtual/net/veth1/ifindex
> 
> Before that patch, this will succeed (fail to fail). After it, it will
> correctly return an error. Differently from a normal rename, which we
> handle fine, changing the object namespace will keep it's path intact.
> So this check seems necessary as well.
> 
> ...
>
> --- a/fs/sysfs/dir.c
> +++ b/fs/sysfs/dir.c
> @@ -307,6 +307,7 @@ static int sysfs_dentry_revalidate(struct dentry *dentry, 
> struct nameidata *nd)
>  {
>   struct sysfs_dirent *sd;
>   int is_dir;
> + int type;
>  
>   if (nd->flags & LOOKUP_RCU)
>   return -ECHILD;
> @@ -326,6 +327,13 @@ static int sysfs_dentry_revalidate(struct dentry 
> *dentry, struct nameidata *nd)
>   if (strcmp(dentry->d_name.name, sd->s_name) != 0)
>   goto out_bad;
>  
> + /* The sysfs dirent has been moved to a different namespace */
> + type = KOBJ_NS_TYPE_NONE;
> + if (sd->s_parent)
> + type = sysfs_ns_type(sd->s_parent);
> + if (type && (sysfs_info(dentry->d_sb)->ns[type] != sd->s_ns))

eww, the code is assuming that KOBJ_NS_TYPE_NONE has a value of zero. 
Don't do that; it smells bad.

I renamed my version of this patch to "sysfs: fail dentry revalidation
after namespace change", as carefully explained in section 15 of the
excellent Documentation/SubmittingPatches, then queued this:


From: Andrew Morton 
Subject: sysfs-fail-dentry-revalidation-after-namespace-change-fix

don't assume that KOBJ_NS_TYPE_NONE==0.  Also save a test-n-branch.

Cc: Eric W. Biederman 
Cc: Glauber Costa 
Cc: Greg Kroah-Hartman 
Cc: Tejun Heo 
Signed-off-by: Andrew Morton 
---

 fs/sysfs/dir.c |8 +---
 1 file changed, 5 insertions(+), 3 deletions(-)

diff -puN 
fs/sysfs/dir.c~sysfs-fail-dentry-revalidation-after-namespace-change-fix 
fs/sysfs/dir.c
--- a/fs/sysfs/dir.c~sysfs-fail-dentry-revalidation-after-namespace-change-fix
+++ a/fs/sysfs/dir.c
@@ -329,10 +329,12 @@ static int sysfs_dentry_revalidate(struc
 
/* The sysfs dirent has been moved to a different namespace */
type = KOBJ_NS_TYPE_NONE;
-   if (sd->s_parent)
+   if (sd->s_parent) {
type = sysfs_ns_type(sd->s_parent);
-   if (type && (sysfs_info(dentry->d_sb)->ns[type] != sd->s_ns))
-   goto out_bad;
+   if (type != KOBJ_NS_TYPE_NONE &&
+   sysfs_info(dentry->d_sb)->ns[type] != sd->s_ns)
+   goto out_bad;
+   }
 
mutex_unlock(_mutex);
 out_valid:
_


--
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Please read the FAQ at  http://www.tux.org/lkml/

Re: [RESEND AGAIN][PATCH] pcmcia: move unbind/rebind into dev_pm_ops.complete

2012-07-09 Thread Christian Lamparter

On Tuesday, July 10, 2012 01:01:31 AM Andrew Morton wrote:
> On Tue, 10 Jul 2012 00:54:54 +0200
> Christian Lamparter  wrote:
> 
> > On Monday, July 09, 2012 11:59:39 PM Andrew Morton wrote:
> > > On Fri, 6 Jul 2012 14:30:16 -0700
> > > Greg KH  wrote:
> > > 
> > > > On Fri, Jul 06, 2012 at 11:23:52PM +0200, Christian Lamparter wrote:
> > > > > The idea of moving rebind procedure into pm.complete
> > > > > was taken from the usb-subsystem, which has similar
> > > > > problems with reattaching devices during/after
> > > > > resume.
> > > > > 
> > > > > Signed-off-by: Christian Lamparter 
> > > > > ---
> > > > > To Greg:
> > > > > 
> > > > > I have submitted this patch back in March and again in May.
> > > > > As far as I can tell it was neither rejected nor was it
> > > > > accepted into linux-pcmcia.git since. So I'm asking you,
> > > > > if you could take the patch instead... please.
> > > > 
> > > > There is a PCMCIA "team" who should be taking these types of patches.
> > > > Why are they not doing so?
> > > > 
> > > 
> > > Things are pretty quiet in pcmcia world, but Dominik does appear to
> > > still be doing stuff.
> > > 
> > > I sometimes queue PCMCIA patches for people, but not this one.  The
> > > changelog is just junk.  What does the patch do?  Why does it do it? 
> > > What problems does it solve?  What are these mysterious "problems with
> > > reattaching devices" to which it refers?  Useless...
> > > 
> > Well, that can be improved, but it is a bit tricky. 
> > AFAICT the usb subsystem dealt with pm in this commit:
> > 
> > "commit 5096aedcd2eb70fbea83f09281f97f9ec973d9de
> > Author: Alan Stern 
> > Date:   Tue Aug 12 14:34:14 2008 -0400
> > 
> > USB: Don't rebind before "complete" callback
> > 
> > [...] We are not allowed to call drivers' probe routines during 
> > a system sleep transition between the "prepare" and "complete"
> > callbacks, but that's exactly what we do when a driver doesn't
> > have full suspend/resume support. [...]"
> > 
> > And on the pcmcia subsystem we have this:
> > 
> > "commit 88b060d6c03fcb9e4d2018b4349954c4242a5c7f
> > Author: Dominik Brodowski 
> > Date:   Sat Jan 2 14:14:23 2010 +0100
> > 
> > pcmcia: improve check for same card in slot after resume
> > 
> > During a suspend/resume cycle, an user may change the card in the
> > PCMCIA/CardBus slot. [...]
> > 
> > For CardBus devices, the PCI hotplug interface doesn't offer a "rescan"
> > facility which also _removes_ devices no longer to be found behind a
> > bridge. Therefore, remove and re-add all devices unconditionally."
> > 
> > Unfortunately, the "re-add" is currently done in the *pm resume* callback
> > (socket_late_resume), but according to "USB: Don't rebind..." is not
> > allowed to have it there, so the patch moves it into the *pm complete*
> > callback. The Documentation/power/* contains mostly informations for
> > drivers developers, but AFAICT it doesn't say much about the subsystem
> > to which the device is connected should behave, so there's a bit of a
> > "citing-gap".
> 
> hm, it does seem a bit of a screwup.
> 
> What's unclear to me is whether your patch fixes any observed runtime
> problems.  Or adds any runtime problems, which looks to be a distinct
> possibility.

well, there is this bug

> 
> > So, my question now: Would you accept the pcmcia patch if I add the
> > "USB: Don't rebind..." as a reference to why the re-add needs to be
> > done in complete? Or do you think that I should bug the pm people
> > (and Alan - since he wrote that it is "not allowed") in this case
> > so I can link their official answer to this patch?
> 
> Well I could grab it and give it a little bit of testing in linux-next.
> But I'd be super-reluctant to send such a patch upstream without
> detailed input from Alan/Greg/Rafael/Dominik/etc.
> 
> 
--
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Please read the FAQ at  http://www.tux.org/lkml/

Re: [RESEND AGAIN][PATCH] pcmcia: move unbind/rebind into dev_pm_ops.complete

2012-07-09 Thread Andrew Morton

On Tue, 10 Jul 2012 00:54:54 +0200
Christian Lamparter  wrote:

> On Monday, July 09, 2012 11:59:39 PM Andrew Morton wrote:
> > On Fri, 6 Jul 2012 14:30:16 -0700
> > Greg KH  wrote:
> > 
> > > On Fri, Jul 06, 2012 at 11:23:52PM +0200, Christian Lamparter wrote:
> > > > The idea of moving rebind procedure into pm.complete
> > > > was taken from the usb-subsystem, which has similar
> > > > problems with reattaching devices during/after
> > > > resume.
> > > > 
> > > > Signed-off-by: Christian Lamparter 
> > > > ---
> > > > To Greg:
> > > > 
> > > > I have submitted this patch back in March and again in May.
> > > > As far as I can tell it was neither rejected nor was it
> > > > accepted into linux-pcmcia.git since. So I'm asking you,
> > > > if you could take the patch instead... please.
> > > 
> > > There is a PCMCIA "team" who should be taking these types of patches.
> > > Why are they not doing so?
> > > 
> > 
> > Things are pretty quiet in pcmcia world, but Dominik does appear to
> > still be doing stuff.
> > 
> > I sometimes queue PCMCIA patches for people, but not this one.  The
> > changelog is just junk.  What does the patch do?  Why does it do it? 
> > What problems does it solve?  What are these mysterious "problems with
> > reattaching devices" to which it refers?  Useless...
> > 
> Well, that can be improved, but it is a bit tricky. 
> AFAICT the usb subsystem dealt with pm in this commit:
> 
> "commit 5096aedcd2eb70fbea83f09281f97f9ec973d9de
> Author: Alan Stern 
> Date:   Tue Aug 12 14:34:14 2008 -0400
> 
> USB: Don't rebind before "complete" callback
> 
>   [...] We are not allowed to call drivers' probe routines during 
>   a system sleep transition between the "prepare" and "complete"
>   callbacks, but that's exactly what we do when a driver doesn't
>   have full suspend/resume support. [...]"
> 
> And on the pcmcia subsystem we have this:
> 
> "commit 88b060d6c03fcb9e4d2018b4349954c4242a5c7f
> Author: Dominik Brodowski 
> Date:   Sat Jan 2 14:14:23 2010 +0100
> 
> pcmcia: improve check for same card in slot after resume
> 
> During a suspend/resume cycle, an user may change the card in the
> PCMCIA/CardBus slot. [...]
> 
> For CardBus devices, the PCI hotplug interface doesn't offer a "rescan"
> facility which also _removes_ devices no longer to be found behind a
> bridge. Therefore, remove and re-add all devices unconditionally."
> 
> Unfortunately, the "re-add" is currently done in the *pm resume* callback
> (socket_late_resume), but according to "USB: Don't rebind..." is not
> allowed to have it there, so the patch moves it into the *pm complete*
> callback. The Documentation/power/* contains mostly informations for
> drivers developers, but AFAICT it doesn't say much about the subsystem
> to which the device is connected should behave, so there's a bit of a
> "citing-gap".

hm, it does seem a bit of a screwup.

What's unclear to me is whether your patch fixes any observed runtime
problems.  Or adds any runtime problems, which looks to be a distinct
possibility.

> So, my question now: Would you accept the pcmcia patch if I add the
> "USB: Don't rebind..." as a reference to why the re-add needs to be
> done in complete? Or do you think that I should bug the pm people
> (and Alan - since he wrote that it is "not allowed") in this case
> so I can link their official answer to this patch?

Well I could grab it and give it a little bit of testing in linux-next.
But I'd be super-reluctant to send such a patch upstream without
detailed input from Alan/Greg/Rafael/Dominik/etc.

--
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Please read the FAQ at  http://www.tux.org/lkml/

Re: [PATCH v2] block: do not artificially constrain max_sectors for stacking drivers

2012-07-09 Thread Mike Snitzer

On Mon, Jul 09 2012 at 10:57am -0400,
Mike Snitzer  wrote:

> blk_set_stacking_limits() is intended to allow stacking drivers to build
> up the limits of the stacked device based on the underlying devices'
> limits.  But in the case of 'max_sectors' the default of
> BLK_DEF_MAX_SECTORS (1024) doesn't allow the stacking driver to inherit
> a max_sectors larger than 1024.
> 
> It is now clear that this artificial limit is getting in the way so
> change blk_set_stacking_limits's max_sectors to UINT_MAX (which allows
> stacking drivers like dm-multipath to inherit 'max_sectors' from the
> underlying paths).
> 
> blk_limits_max_hw_sectors() must allow stacking drivers to not have
> max_sectors set to BLK_DEF_MAX_SECTORS as a side-effect.  Move that
> historic constraint to blk_queue_max_hw_sectors().
> 
> Reported-by: Vijay Chauhan 
> Signed-off-by: Mike Snitzer 
> Cc: Martin K. Petersen 
> ---
>  block/blk-settings.c |8 
>  1 files changed, 4 insertions(+), 4 deletions(-)
> 
> v2: tweak blk_limits_max_hw_sectors and blk_queue_max_hw_sectors

As it happens, v2's changes to blk_limits_max_hw_sectors and
blk_queue_max_hw_sectors are not strictly required in order for existing
stacking drivers to have have an unconstrained max_sectors.  Dropping
those changes also allows for consistency across both block functions.

So I'd be happy if v1 were to be staged for 3.6.  NetApp: it would be
great if you could confirm that v1 does in fact address the max_sectors
issue you reported.

Thanks,
Mike
--
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Please read the FAQ at  http://www.tux.org/lkml/

Re: + checkpatch-add-check-for-use-of-sizeof-without-parenthesis.patch added to -mm tree

2012-07-09 Thread David Rientjes

On Mon, 9 Jul 2012, Joe Perches wrote:

> Huh?  Maybe I misunderstand you.
> 
> $ cat sizeof.c
> #include 
> #include 
> #include 
> 
> struct foo {
>   int bar[20];
>   char *baz;
> };
> 
> int main(int argc, char **argv)
> {
>   struct foo bar;
>   struct foo *baz;
> 
>   printf("1: %zu\n", sizeof(struct foo));

Parenthesis are required for type names such as this and is normally 
written as "sizeof (struct foo)" in gcc, glibc, the C99 standard, etc., 
but the Linux coding style asks that no space is introduced.

>   printf("2: %zu\n", sizeof bar);

This is a unary expression and no parenthesis are required either by the 
C99 standard nor the Linux coding style and there are over 1000 
occurrences where there are no parenthesis for unary operators currently 
in the kernel.

So, nack, don't start enforcing your own coding style and preferences in 
checkpatch.pl.
--
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Please read the FAQ at  http://www.tux.org/lkml/

Re: [RESEND AGAIN][PATCH] pcmcia: move unbind/rebind into dev_pm_ops.complete

2012-07-09 Thread Christian Lamparter

On Monday, July 09, 2012 11:59:39 PM Andrew Morton wrote:
> On Fri, 6 Jul 2012 14:30:16 -0700
> Greg KH  wrote:
> 
> > On Fri, Jul 06, 2012 at 11:23:52PM +0200, Christian Lamparter wrote:
> > > The idea of moving rebind procedure into pm.complete
> > > was taken from the usb-subsystem, which has similar
> > > problems with reattaching devices during/after
> > > resume.
> > > 
> > > Signed-off-by: Christian Lamparter 
> > > ---
> > > To Greg:
> > > 
> > > I have submitted this patch back in March and again in May.
> > > As far as I can tell it was neither rejected nor was it
> > > accepted into linux-pcmcia.git since. So I'm asking you,
> > > if you could take the patch instead... please.
> > 
> > There is a PCMCIA "team" who should be taking these types of patches.
> > Why are they not doing so?
> > 
> 
> Things are pretty quiet in pcmcia world, but Dominik does appear to
> still be doing stuff.
> 
> I sometimes queue PCMCIA patches for people, but not this one.  The
> changelog is just junk.  What does the patch do?  Why does it do it? 
> What problems does it solve?  What are these mysterious "problems with
> reattaching devices" to which it refers?  Useless...
> 
Well, that can be improved, but it is a bit tricky. 
AFAICT the usb subsystem dealt with pm in this commit:

"commit 5096aedcd2eb70fbea83f09281f97f9ec973d9de
Author: Alan Stern 
Date:   Tue Aug 12 14:34:14 2008 -0400

USB: Don't rebind before "complete" callback

[...] We are not allowed to call drivers' probe routines during 
a system sleep transition between the "prepare" and "complete"
callbacks, but that's exactly what we do when a driver doesn't
have full suspend/resume support. [...]"

And on the pcmcia subsystem we have this:

"commit 88b060d6c03fcb9e4d2018b4349954c4242a5c7f
Author: Dominik Brodowski 
Date:   Sat Jan 2 14:14:23 2010 +0100

pcmcia: improve check for same card in slot after resume

During a suspend/resume cycle, an user may change the card in the
PCMCIA/CardBus slot. [...]

For CardBus devices, the PCI hotplug interface doesn't offer a "rescan"
facility which also _removes_ devices no longer to be found behind a
bridge. Therefore, remove and re-add all devices unconditionally."

Unfortunately, the "re-add" is currently done in the *pm resume* callback
(socket_late_resume), but according to "USB: Don't rebind..." is not
allowed to have it there, so the patch moves it into the *pm complete*
callback. The Documentation/power/* contains mostly informations for
drivers developers, but AFAICT it doesn't say much about the subsystem
to which the device is connected should behave, so there's a bit of a
"citing-gap".

So, my question now: Would you accept the pcmcia patch if I add the
"USB: Don't rebind..." as a reference to why the re-add needs to be
done in complete? Or do you think that I should bug the pm people
(and Alan - since he wrote that it is "not allowed") in this case
so I can link their official answer to this patch?

Regards,
Chr
--
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Please read the FAQ at  http://www.tux.org/lkml/

[PATCH 3/3] shmem: cleanup shmem_add_to_page_cache

2012-07-09 Thread Hugh Dickins

shmem_add_to_page_cache() has three callsites, but only one of them
wants the radix_tree_preload() (an exceptional entry guarantees that
the radix tree node is present in the other cases), and only that site
can achieve mem_cgroup_uncharge_cache_page() (PageSwapCache makes it a
no-op in the other cases).  We did it this way originally to reflect
add_to_page_cache_locked(); but it's confusing now, so move the
radix_tree preloading and mem_cgroup uncharging to that one caller.

Signed-off-by: Hugh Dickins 
Cc: Johannes Weiner 
Cc: KAMEZAWA Hiroyuki 
Cc: Michal Hocko 
---
This is just a cleanup: I'd prefer it to go in along with the fix 2/3,
but it can be delayed to v3.6 if you prefer.

 mm/shmem.c |   58 ---
 1 file changed, 28 insertions(+), 30 deletions(-)

--- 3.5-rc6+/mm/shmem.c 2012-07-07 19:20:52.026952048 -0700
+++ linux/mm/shmem.c2012-07-07 19:21:44.342952082 -0700
@@ -288,40 +288,31 @@ static int shmem_add_to_page_cache(struc
   struct address_space *mapping,
   pgoff_t index, gfp_t gfp, void *expected)
 {
-   int error = 0;
+   int error;
 
VM_BUG_ON(!PageLocked(page));
VM_BUG_ON(!PageSwapBacked(page));
 
+   page_cache_get(page);
+   page->mapping = mapping;
+   page->index = index;
+
+   spin_lock_irq(>tree_lock);
if (!expected)
-   error = radix_tree_preload(gfp & GFP_RECLAIM_MASK);
+   error = radix_tree_insert(>page_tree, index, page);
+   else
+   error = shmem_radix_tree_replace(mapping, index, expected,
+page);
if (!error) {
-   page_cache_get(page);
-   page->mapping = mapping;
-   page->index = index;
-
-   spin_lock_irq(>tree_lock);
-   if (!expected)
-   error = radix_tree_insert(>page_tree,
-   index, page);
-   else
-   error = shmem_radix_tree_replace(mapping, index,
-   expected, page);
-   if (!error) {
-   mapping->nrpages++;
-   __inc_zone_page_state(page, NR_FILE_PAGES);
-   __inc_zone_page_state(page, NR_SHMEM);
-   spin_unlock_irq(>tree_lock);
-   } else {
-   page->mapping = NULL;
-   spin_unlock_irq(>tree_lock);
-   page_cache_release(page);
-   }
-   if (!expected)
-   radix_tree_preload_end();
+   mapping->nrpages++;
+   __inc_zone_page_state(page, NR_FILE_PAGES);
+   __inc_zone_page_state(page, NR_SHMEM);
+   spin_unlock_irq(>tree_lock);
+   } else {
+   page->mapping = NULL;
+   spin_unlock_irq(>tree_lock);
+   page_cache_release(page);
}
-   if (error)
-   mem_cgroup_uncharge_cache_page(page);
return error;
 }
 
@@ -1202,11 +1193,18 @@ repeat:
__set_page_locked(page);
error = mem_cgroup_cache_charge(page, current->mm,
gfp & GFP_RECLAIM_MASK);
-   if (!error)
-   error = shmem_add_to_page_cache(page, mapping, index,
-   gfp, NULL);
if (error)
goto decused;
+   error = radix_tree_preload(gfp & GFP_RECLAIM_MASK);
+   if (!error) {
+   error = shmem_add_to_page_cache(page, mapping, index,
+   gfp, NULL);
+   radix_tree_preload_end();
+   }
+   if (error) {
+   mem_cgroup_uncharge_cache_page(page);
+   goto decused;
+   }
lru_cache_add_anon(page);
 
spin_lock(>lock);
--
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Please read the FAQ at  http://www.tux.org/lkml/

[PATCH] drm: use drm_compat_ioctl for 32-bit apps

2012-07-09 Thread Keith Packard

Most of the DRM drivers appear to be missing the .compat_ioctl file
operation entry necessary for 32-bit application compatibility.

This patch  uses drm_compat_ioctl for all drivers which don't have
their own, and which are using drm_ioctl for .unlocked_ioctl.

This leaves drivers/gpu/drm/psb/psb_drv.c unchanged; it has a custom
.unlocked_ioctl and will presumably need a custom .compat_ioctl as
well.

Signed-off-by: Keith Packard 
---
 drivers/gpu/drm/ast/ast_drv.c   |3 +++
 drivers/gpu/drm/cirrus/cirrus_drv.c |3 +++
 drivers/gpu/drm/exynos/exynos_drm_drv.c |3 +++
 drivers/gpu/drm/i810/i810_dma.c |3 +++
 drivers/gpu/drm/i810/i810_drv.c |3 +++
 drivers/gpu/drm/mgag200/mgag200_drv.c   |3 +++
 drivers/gpu/drm/savage/savage_drv.c |3 +++
 drivers/gpu/drm/sis/sis_drv.c   |3 +++
 drivers/gpu/drm/tdfx/tdfx_drv.c |3 +++
 drivers/gpu/drm/udl/udl_drv.c   |3 +++
 drivers/gpu/drm/via/via_drv.c   |3 +++
 11 files changed, 33 insertions(+)

diff --git a/drivers/gpu/drm/ast/ast_drv.c b/drivers/gpu/drm/ast/ast_drv.c
index d0c4574..3616480 100644
--- a/drivers/gpu/drm/ast/ast_drv.c
+++ b/drivers/gpu/drm/ast/ast_drv.c
@@ -193,6 +193,9 @@ static const struct file_operations ast_fops = {
.mmap = ast_mmap,
.poll = drm_poll,
.fasync = drm_fasync,
+#ifdef CONFIG_COMPAT
+   .compat_ioctl = drm_compat_ioctl,
+#endif
.read = drm_read,
 };
 
diff --git a/drivers/gpu/drm/cirrus/cirrus_drv.c 
b/drivers/gpu/drm/cirrus/cirrus_drv.c
index 7053140..b83a2d7 100644
--- a/drivers/gpu/drm/cirrus/cirrus_drv.c
+++ b/drivers/gpu/drm/cirrus/cirrus_drv.c
@@ -74,6 +74,9 @@ static const struct file_operations cirrus_driver_fops = {
.unlocked_ioctl = drm_ioctl,
.mmap = cirrus_mmap,
.poll = drm_poll,
+#ifdef CONFIG_COMPAT
+   .compat_ioctl = drm_compat_ioctl,
+#endif
.fasync = drm_fasync,
 };
 static struct drm_driver driver = {
diff --git a/drivers/gpu/drm/exynos/exynos_drm_drv.c 
b/drivers/gpu/drm/exynos/exynos_drm_drv.c
index d6de2e0..6b07cfe 100644
--- a/drivers/gpu/drm/exynos/exynos_drm_drv.c
+++ b/drivers/gpu/drm/exynos/exynos_drm_drv.c
@@ -240,6 +240,9 @@ static const struct file_operations exynos_drm_driver_fops 
= {
.poll   = drm_poll,
.read   = drm_read,
.unlocked_ioctl = drm_ioctl,
+#ifdef CONFIG_COMPAT
+   .compat_ioctl = drm_compat_ioctl,
+#endif
.release= drm_release,
 };
 
diff --git a/drivers/gpu/drm/i810/i810_dma.c b/drivers/gpu/drm/i810/i810_dma.c
index fa94391..6e8f935 100644
--- a/drivers/gpu/drm/i810/i810_dma.c
+++ b/drivers/gpu/drm/i810/i810_dma.c
@@ -115,6 +115,9 @@ static const struct file_operations i810_buffer_fops = {
.unlocked_ioctl = drm_ioctl,
.mmap = i810_mmap_buffers,
.fasync = drm_fasync,
+#ifdef CONFIG_COMPAT
+   .compat_ioctl = drm_compat_ioctl,
+#endif
.llseek = noop_llseek,
 };
 
diff --git a/drivers/gpu/drm/i810/i810_drv.c b/drivers/gpu/drm/i810/i810_drv.c
index ec12f7d..8e50757 100644
--- a/drivers/gpu/drm/i810/i810_drv.c
+++ b/drivers/gpu/drm/i810/i810_drv.c
@@ -51,6 +51,9 @@ static const struct file_operations i810_driver_fops = {
.mmap = drm_mmap,
.poll = drm_poll,
.fasync = drm_fasync,
+#ifdef CONFIG_COMPAT
+   .compat_ioctl = drm_compat_ioctl,
+#endif
.llseek = noop_llseek,
 };
 
diff --git a/drivers/gpu/drm/mgag200/mgag200_drv.c 
b/drivers/gpu/drm/mgag200/mgag200_drv.c
index 93e832d..895a67e 100644
--- a/drivers/gpu/drm/mgag200/mgag200_drv.c
+++ b/drivers/gpu/drm/mgag200/mgag200_drv.c
@@ -81,6 +81,9 @@ static const struct file_operations mgag200_driver_fops = {
.mmap = mgag200_mmap,
.poll = drm_poll,
.fasync = drm_fasync,
+#ifdef CONFIG_COMPAT
+   .compat_ioctl = drm_compat_ioctl,
+#endif
.read = drm_read,
 };
 
diff --git a/drivers/gpu/drm/savage/savage_drv.c 
b/drivers/gpu/drm/savage/savage_drv.c
index 89afe0b..c77823a 100644
--- a/drivers/gpu/drm/savage/savage_drv.c
+++ b/drivers/gpu/drm/savage/savage_drv.c
@@ -43,6 +43,9 @@ static const struct file_operations savage_driver_fops = {
.mmap = drm_mmap,
.poll = drm_poll,
.fasync = drm_fasync,
+#ifdef CONFIG_COMPAT
+   .compat_ioctl = drm_compat_ioctl,
+#endif
.llseek = noop_llseek,
 };
 
diff --git a/drivers/gpu/drm/sis/sis_drv.c b/drivers/gpu/drm/sis/sis_drv.c
index dd14cd1..02cd328 100644
--- a/drivers/gpu/drm/sis/sis_drv.c
+++ b/drivers/gpu/drm/sis/sis_drv.c
@@ -74,6 +74,9 @@ static const struct file_operations sis_driver_fops = {
.mmap = drm_mmap,
.poll = drm_poll,
.fasync = drm_fasync,
+#ifdef CONFIG_COMPAT
+   .compat_ioctl = drm_compat_ioctl,
+#endif
.llseek = noop_llseek,
 };
 
diff --git a/drivers/gpu/drm/tdfx/tdfx_drv.c b/drivers/gpu/drm/tdfx/tdfx_drv.c
index 1613c78..3af8832 100644
--- a/drivers/gpu/drm/tdfx/tdfx_drv.c
+++

[PATCH 2/3] shmem: fix negative rss in memcg memory.stat

2012-07-09 Thread Hugh Dickins

When adding the page_private checks before calling shmem_replace_page(),
I did realize that there is a further race, but thought it too unlikely
to need a hurried fix.

But independently I've been chasing why a mem cgroup's memory.stat
sometimes shows negative rss after all tasks have gone: I expected it
to be a stats gathering bug, but actually it's shmem swapping's fault.

It's an old surprise, that when you lock_page(lookup_swap_cache(swap)),
the page may have been removed from swapcache before getting the lock; 
or it may have been freed and reused and be back in swapcache; and it
can even be using the same swap location as before (page_private same).

The swapoff case is already secure against this (swap cannot be reused
until the whole area has been swapped off, and a new swapped on); and
shmem_getpage_gfp() is protected by shmem_add_to_page_cache()'s check
for the expected radix_tree entry - but a little too late.

By that time, we might have already decided to shmem_replace_page():
I don't know of a problem from that, but I'd feel more at ease not to
do so spuriously.  And we have already done mem_cgroup_cache_charge(),
on perhaps the wrong mem cgroup: and this charge is not then undone on
the error path, because PageSwapCache ends up preventing that.

It's this last case which causes the occasional negative rss in
memory.stat: the page is charged here as cache, but (sometimes) found
to be anon when eventually it's uncharged - and in between, it's an
undeserved charge on the wrong memcg.

Fix this by adding an earlier check on the radix_tree entry: it's
inelegant to descend the tree twice, but swapping is not the fast path,
and a better solution would need a pair (try+commit) of memcg calls,
and a rework of shmem_replace_page() to keep out of the swapcache.

We can use the added shmem_confirm_swap() function to replace the
find_get_page+page_cache_release we were already doing on the error
path.  And add a comment on that -EEXIST: it seems a peculiar errno
to be using, but originates from its use in radix_tree_insert().

[It can be surprising to see positive rss left in a memcg's memory.stat
after all tasks have gone, since it is supposed to count anonymous but
not shmem.  Aside from sharing anon pages via fork with a task in some
other memcg, it often happens after swapping: because a swap page can't
be freed while under writeback, nor while locked.  So it's not an error,
and these residual pages are easily freed once pressure demands.]

Signed-off-by: Hugh Dickins 
Cc: Johannes Weiner 
Cc: KAMEZAWA Hiroyuki 
Cc: Michal Hocko 
---
I'd rather like this to go into v3.5, but it is late, and I don't have
a very strong argument for it: as you prefer.  And I've not marked it
for stable, since the patch won't apply to v3.4 as is; but I'd happily
supply a patch for v3.1 onwards if asked.

 mm/shmem.c |   41 +
 1 file changed, 29 insertions(+), 12 deletions(-)

--- 3.5-rc6/mm/shmem.c  2012-07-07 19:20:02.986950655 -0700
+++ linux/mm/shmem.c2012-07-07 19:20:52.026952048 -0700
@@ -264,6 +264,24 @@ static int shmem_radix_tree_replace(stru
 }
 
 /*
+ * Sometimes, before we decide whether to proceed or to fail, we must check
+ * that an entry was not already brought back from swap by a racing thread.
+ *
+ * Checking page is not enough: by the time a SwapCache page is locked, it
+ * might be reused, and again be SwapCache, using the same swap as before.
+ */
+static bool shmem_confirm_swap(struct address_space *mapping,
+  pgoff_t index, swp_entry_t swap)
+{
+   void *item;
+
+   rcu_read_lock();
+   item = radix_tree_lookup(>page_tree, index);
+   rcu_read_unlock();
+   return item == swp_to_radix_entry(swap);
+}
+
+/*
  * Like add_to_page_cache_locked, but error if expected item has gone.
  */
 static int shmem_add_to_page_cache(struct page *page,
@@ -1124,9 +1142,9 @@ repeat:
/* We have to do this with page locked to prevent races */
lock_page(page);
if (!PageSwapCache(page) || page_private(page) != swap.val ||
-   page->mapping) {
+   !shmem_confirm_swap(mapping, index, swap)) {
error = -EEXIST;/* try again */
-   goto failed;
+   goto unlock;
}
if (!PageUptodate(page)) {
error = -EIO;
@@ -1142,9 +1160,12 @@ repeat:
 
error = mem_cgroup_cache_charge(page, current->mm,
gfp & GFP_RECLAIM_MASK);
-   if (!error)
+   if (!error) {
error = shmem_add_to_page_cache(page, mapping, index,
gfp, swp_to_radix_entry(swap));
+   /* We already confirmed swap, and make no allocation */
+   VM_BUG_ON(error);
+   }

[PATCH 1/3] tmpfs: revert SEEK_DATA and SEEK_HOLE

2012-07-09 Thread Hugh Dickins

Revert 4fb5ef089b28 ("tmpfs: support SEEK_DATA and SEEK_HOLE").
I believe it's correct, and it's been nice to have from rc1 to rc6;
but as the original commit said:

I don't know who actually uses SEEK_DATA or SEEK_HOLE, and whether it
would be of any use to them on tmpfs.  This code adds 92 lines and 752
bytes on x86_64 - is that bloat or worthwhile?

Nobody asked for it, so I conclude that it's bloat: let's revert tmpfs
to the dumb generic support for v3.5.  We can always reinstate it later
if useful, and anyone needing it in a hurry can just get it out of git.

Signed-off-by: Hugh Dickins 
Cc: Christoph Hellwig 
Cc: Josef Bacik 
Cc: Andi Kleen 
Cc: Andreas Dilger 
Cc: Dave Chinner 
Cc: Marco Stornelli 
Cc: Jeff liu 
Cc: Chris Mason 
---
But if someone protests at this reversion, of course we can drop it.

 mm/shmem.c |   94 ---
 1 file changed, 1 insertion(+), 93 deletions(-)

--- 3.5-rc6/mm/shmem.c  2012-07-07 18:20:40.635328642 -0700
+++ linux/mm/shmem.c2012-07-07 19:20:02.986950655 -0700
@@ -1692,98 +1692,6 @@ static ssize_t shmem_file_splice_read(st
return error;
 }
 
-/*
- * llseek SEEK_DATA or SEEK_HOLE through the radix_tree.
- */
-static pgoff_t shmem_seek_hole_data(struct address_space *mapping,
-   pgoff_t index, pgoff_t end, int origin)
-{
-   struct page *page;
-   struct pagevec pvec;
-   pgoff_t indices[PAGEVEC_SIZE];
-   bool done = false;
-   int i;
-
-   pagevec_init(, 0);
-   pvec.nr = 1;/* start small: we may be there already */
-   while (!done) {
-   pvec.nr = shmem_find_get_pages_and_swap(mapping, index,
-   pvec.nr, pvec.pages, indices);
-   if (!pvec.nr) {
-   if (origin == SEEK_DATA)
-   index = end;
-   break;
-   }
-   for (i = 0; i < pvec.nr; i++, index++) {
-   if (index < indices[i]) {
-   if (origin == SEEK_HOLE) {
-   done = true;
-   break;
-   }
-   index = indices[i];
-   }
-   page = pvec.pages[i];
-   if (page && !radix_tree_exceptional_entry(page)) {
-   if (!PageUptodate(page))
-   page = NULL;
-   }
-   if (index >= end ||
-   (page && origin == SEEK_DATA) ||
-   (!page && origin == SEEK_HOLE)) {
-   done = true;
-   break;
-   }
-   }
-   shmem_deswap_pagevec();
-   pagevec_release();
-   pvec.nr = PAGEVEC_SIZE;
-   cond_resched();
-   }
-   return index;
-}
-
-static loff_t shmem_file_llseek(struct file *file, loff_t offset, int origin)
-{
-   struct address_space *mapping;
-   struct inode *inode;
-   pgoff_t start, end;
-   loff_t new_offset;
-
-   if (origin != SEEK_DATA && origin != SEEK_HOLE)
-   return generic_file_llseek_size(file, offset, origin,
-   MAX_LFS_FILESIZE);
-   mapping = file->f_mapping;
-   inode = mapping->host;
-   mutex_lock(>i_mutex);
-   /* We're holding i_mutex so we can access i_size directly */
-
-   if (offset < 0)
-   offset = -EINVAL;
-   else if (offset >= inode->i_size)
-   offset = -ENXIO;
-   else {
-   start = offset >> PAGE_CACHE_SHIFT;
-   end = (inode->i_size + PAGE_CACHE_SIZE - 1) >> PAGE_CACHE_SHIFT;
-   new_offset = shmem_seek_hole_data(mapping, start, end, origin);
-   new_offset <<= PAGE_CACHE_SHIFT;
-   if (new_offset > offset) {
-   if (new_offset < inode->i_size)
-   offset = new_offset;
-   else if (origin == SEEK_DATA)
-   offset = -ENXIO;
-   else
-   offset = inode->i_size;
-   }
-   }
-
-   if (offset >= 0 && offset != file->f_pos) {
-   file->f_pos = offset;
-   file->f_version = 0;
-   }
-   mutex_unlock(>i_mutex);
-   return offset;
-}
-
 static long shmem_fallocate(struct file *file, int mode, loff_t offset,
 loff_t len)
 {
@@ -2787,7 +2695,7 @@ static const struct address_space_operat
 static const struct file_operations shmem_file_operations = {
.mmap   = shmem_mmap,
 #ifdef CONFIG_TMPFS
-   .llseek = shmem_file_llseek,
+

Re: [PATCH] mm: don't invoke __alloc_pages_direct_compact when order 0

2012-07-09 Thread Andrew Morton

On Mon, 9 Jul 2012 23:13:50 +0900
JoonSoo Kim  wrote:

> >> In my kernel image, __alloc_pages_direct_compact() is not inlined by gcc.

My gcc-4.4.4 doesn't inline it either.

> I think __alloc_pages_direct_compact() can't be inlined by gcc,
> because it is so big and is invoked two times in __alloc_pages_nodemask().

This.  Large function, two callsites.

Making __alloc_pages_direct_compact() __always_inline adds only 26
bytes to my page_alloc.o's .text.  Such is the suckiness of passing
eleven arguments!

--
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Please read the FAQ at  http://www.tux.org/lkml/

Re: [PATCH v3] printk: Have printk() never buffer its data

2012-07-09 Thread Kay Sievers

On Tue, Jul 10, 2012 at 12:29 AM, Joe Perches  wrote:
> On Tue, 2012-07-10 at 00:10 +0200, Kay Sievers wrote:
>> On Mon, Jul 9, 2012 at 11:42 PM, Joe Perches  wrote:
>> > On Sun, 2012-07-08 at 19:55 +0200, Kay Sievers wrote:
>> >
>> >> At the same time the CPU#2 prints the same warning with a continuation
>> >> line, but the buffer from CPU#1 can not be flushed to the console, nor
>> >> can the continuation line printk()s from CPU#2 be merged at this point.
>> >> The consoles are still locked and busy with replaying the old log
>> >> messages, so the new continuation data is just stored away in the record
>> >> buffer as it is coming in.
>> >> If the console would be registered a bit earlier, or the warning would
>> >> happen a bit later, we would probably not see any of this.
>> >>
>> >> I can fake something like this just by holding the console semaphore
>> >> over a longer time and printing continuation lines with different CPUs
>> >> in a row.
>> >>
>> >> The patch below seems to work for me. It is also here:
>> >>   
>> >> http://git.kernel.org/?p=linux/kernel/git/kay/patches.git;a=blob;f=kmsg-merge-cont.patch;hb=HEAD
>> >>
>> >> It only applies cleanly on top of this patch:
>> >>   
>> >> http://git.kernel.org/?p=linux/kernel/git/kay/patches.git;a=blob;f=kmsg-syslog-1-byte-read.patch;hb=HEAD
>> >>
>> >
>> > Hi Kay.
>> >
>> > I just ran a test with what's in Greg's driver-core -for-linus branch.
>> >
>> > One of the differences in dmesg is timestamping of consecutive
>> > pr_("foo...)
>> > followed directly by
>> > pr_cont("bar...")
>> >
>> > For instance: (dmesg is 3.4, dmesg.0 is 3.5-rc6+)
>> >
>> > # grep MAP /var/log/dm* -A1
>> > dmesg:[0.781687] ata_piix :00:1f.2: MAP [ P0 P2 P1 P3 ]
>> > dmesg-[0.781707] ata2: port disabled--ignoring
>> > --
>> > dmesg.0:[0.948881] ata_piix :00:1f.2: MAP [
>> > dmesg.0-[0.948883]  P0 P2 P1 P3 ]
>> >
>> > These messages originate starting at
>> > drivers/ata/ata_piix.c:1354
>> >
>> > All the continuations are emitted with pr_cont.
>> >
>> > I think this output should still be coalesced without
>> > timestamp deltas.  Perhaps the timestamping code can
>> > still be reworked to avoid too small a delta producing
>> > a new timestamp and another dmesg line.
>>
>> Hmm, I don't see that.
>>
>> If I do:
>>   pr_info("[");
>>   for (i = 0; i < 4; i++)
>>pr_cont("%i ", i);
>>   pr_cont("]\n");
>>
>> I get:
>>   6,173,0;[0 1 2 3 ]
>>
>> And if I fill the cont buffer and forcefully hold the console sem
>> during all that, and we can't merge anymore, I get:
>>   6,167,0;[
>>   4,168,0;0
>>   4,169,0;1
>>   4,170,0;2
>>   4,171,0;3
>>   4,172,0;]
>>
>> But the output is still all fine for both lines:
>>   [0.00] [0 1 2 3 ]
>>   [0.00] [0 1 2 3 ]
>>
>> What do I miss?
>
> In this case the initial line is dev_info not pr_info
> so there are the additional dict descriptors output to
> /dev/kmsg as well.
>
> Maybe that interferes with continuations.  Dunno.

Yes, it does. Annotated records dev_printk() must be reliable in the
data storage and for the consumers. We can not expose them to the racy
continuation printk()s. We need to be able to trust the data they
print and not possibly merge unrelated things into it.

If it's needed, we can try to set the flags accordingly, that they
*look* like a line in the classic byte-stream output, but the
interface in /dev/kmsg must not export them that way, because
continuation lines can never be trusted to be correct.

Kay
--
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Please read the FAQ at  http://www.tux.org/lkml/

Re: [patch 0/2] [V4] block: Support online resize of disk partitions

2012-07-09 Thread Phillip Susi

-BEGIN PGP SIGNED MESSAGE-
Hash: SHA1

On 07/09/2012 05:34 PM, vgo...@redhat.com wrote:
> Phillip, do let me know if I should put your signed-off-by also in the
> patch.

Sure, kernel side looks good.  My original util-linux patches also added a -u 
update mode to kpartx, which I think is the more useful interface than the 
lower level resizepart command, but I suppose I can rebase it to apply on top 
of this patch.

-BEGIN PGP SIGNATURE-
Version: GnuPG v1.4.11 (GNU/Linux)
Comment: Using GnuPG with Mozilla - http://enigmail.mozdev.org/

iQEcBAEBAgAGBQJP+13DAAoJEJrBOlT6nu75cQIIAJvm6ZFxpNNvgkXq0I6blvIj
Q3s5YbzJYecouHPZdy06UXIdfucHKO7WAaMvpmPnDk+JgtltNljVpA50d21NN2lY
k2j2oU9mFHGEKLDnnYobnr6cO2UShaZkrcMtC29S4LaAdgAgPNyD8aTTWS9w0frv
+p2ko+HKvp3neRpOBwfnYXq/rTBLUmOn0k7XsG8QjnNb3aMnMyYp/crV9Kzeb4YX
uCbnIkzN++oDhmnqsLDGt82/VGXZdnA1umISbV9vZw+Q7FfeaiJVMneFxKe5w/FZ
wiCixoCtGhbqtz1hSsUR+rs2ZaDozL0iygSB15Z71aLPim0TLumtRTpfpu+JR3w=
=Vtl+
-END PGP SIGNATURE-
--
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Please read the FAQ at  http://www.tux.org/lkml/

Re: [PATCH RFC 1/2] kvm vcpu: Note down pause loop exit

2012-07-09 Thread Rik van Riel


On 07/09/2012 02:20 AM, Raghavendra K T wrote:


@@ -484,6 +484,13 @@ struct kvm_vcpu_arch {
u64 length;
u64 status;
} osvw;
+
+   /* Pause loop exit optimization */
+   struct {
+   bool pause_loop_exited;
+   bool dy_eligible;
+   } plo;


I know kvm_vcpu_arch is traditionally not a well documented
structure, but it would be really nice if each variable inside
this sub-structure could get some documentation.

Also, do we really want to introduce another acronym here?

Or would we be better off simply calling this struct .ple,
since that is a name people are already familiar with.

--
All rights reversed
--
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Please read the FAQ at  http://www.tux.org/lkml/

Re: Build regressions/improvements in v3.5-rc6

2012-07-09 Thread Bjorn Helgaas

On Mon, Jul 9, 2012 at 3:31 PM, Geert Uytterhoeven  wrote:
> Hi Bjorn (or Björn?),
>
> On Mon, Jul 9, 2012 at 11:20 PM, Bjorn Helgaas  wrote:
>> There are some PCI-related regressions here that I'd like to fix, but
>> I don't know where to start.  For example, these:
>>
>>>   + drivers/pci/quirks.c: error: implicit declaration of function 
>>> 'ioread32' [-Werror=implicit-function-declaration]:  => 3154:2
>>>   + drivers/pci/quirks.c: error: implicit declaration of function 
>>> 'iowrite32' [-Werror=implicit-function-declaration]:  => 3144:2
>>
>> I don't see these in an x86 build, so I assume these are from some
>> other arch or other config.  Can I tell which one?
>
> xtensa-allmodconfig
>
> http://kisskb.ellerman.id.au/kisskb/buildresult/6668885/

Thanks!

There are a ton of xtensa warnings related to the fact that xtensa
doesn't support the iomap interface (ioread, iowrite, pci_iomap,
pci_iounmap, etc).

I'm going to punt and hope Chris takes care of this.

Bjorn
--
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Please read the FAQ at  http://www.tux.org/lkml/

Re: [PATCH v3] printk: Have printk() never buffer its data

2012-07-09 Thread Michael Neuling

Greg Kroah-Hartman  wrote:

> On Sun, Jul 08, 2012 at 07:55:55PM +0200, Kay Sievers wrote:
> > On Sat, 2012-07-07 at 07:04 +1000, Michael Neuling wrote:
> > > Whole kmsg below.
> > 
> > I guess I have an idea now what's going on.
> > 
> > > 4,47,0;WARNING: at 
> > > /scratch/mikey/src/linux-ozlabs/arch/powerpc/sysdev/xics/xics-common.c:105
> > > 4,51,0;MSR: 90021032   CR: 2442  XER: 
> > > 2200
> > > 4,54,0;TASK = c0b2dd80[0] 'swapper/0' THREAD: c0c24000 
> > > CPU: 0
> > 
> > This is the warning on CPU#1, all fine, all in one line.
> > 
> > > 6,74,0;console [tty0] enabled
> > > 6,75,0;console [hvc0] enabled
> > 
> > Now the boot consoles are registered, which replays the whole buffer
> > that was collected up to this point. During the entire time the console
> > semaphore needs to be held, and this can be quite a while.
> > 
> > > 4,87,24545;WARNING: at 
> > > /scratch/mikey/src/linux-ozlabs/arch/powerpc/sysdev/xics/xics-common.c:105
> > > \4,91,24586;MSR: 90021032 
> > > 4,92,24590;<
> > > 4,93,24594;SF
> > > 4,94,24599;,HV
> > > 4,95,24604;,ME
> > > 4,96,24609;,IR
> > > 4,97,24614;,DR
> > > 4,98,24619;,RI
> > > 4,99,24623;>
> > > 4,104,24661; CPU: 1
> > 
> > At the same time the CPU#2 prints the same warning with a continuation
> > line, but the buffer from CPU#1 can not be flushed to the console, nor
> > can the continuation line printk()s from CPU#2 be merged at this point.
> > The consoles are still locked and busy with replaying the old log
> > messages, so the new continuation data is just stored away in the record
> > buffer as it is coming in.
> > If the console would be registered a bit earlier, or the warning would
> > happen a bit later, we would probably not see any of this.
> > 
> > I can fake something like this just by holding the console semaphore
> > over a longer time and printing continuation lines with different CPUs
> > in a row.
> > 
> > The patch below seems to work for me. It is also here:
> >   
> > http://git.kernel.org/?p=linux/kernel/git/kay/patches.git;a=blob;f=kmsg-merge-cont.patch;hb=HEAD
> > 
> > It only applies cleanly on top of this patch:
> >   
> > http://git.kernel.org/?p=linux/kernel/git/kay/patches.git;a=blob;f=kmsg-syslog-1-byte-read.patch;hb=HEAD
> > 
> > Thanks,
> > Kay
> > 
> > 
> > Subject: kmsg: merge continuation records while printing
> > 
> > In (the unlikely) case our continuation merge buffer is busy, we 
> > unfortunately
> > can not merge further continuation printk()s into a single record and have 
> > to
> > store them separately, which leads to split-up output of these lines when 
> > they
> > are printed.
> > 
> > Add some flags about newlines and prefix existence to these records and try 
> > to
> > reconstruct the full line again, when the separated records are printed.
> > ---
> >  kernel/printk.c |  119 
> > 
> >  1 file changed, 77 insertions(+), 42 deletions(-)
> 
> Michael, did this solve the issue for you?

It didn't but I've been working with Kay offline and what he pushed to
you and is now in your driver-core-linus branch is good.  ie.

  5becfb1 kmsg: merge continuation records while printing

... works for me.

Thanks,
Mikey
--
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Please read the FAQ at  http://www.tux.org/lkml/

Re: + checkpatch-add-check-for-use-of-sizeof-without-parenthesis.patch added to -mm tree

2012-07-09 Thread Joe Perches

On Mon, 2012-07-09 at 15:23 -0700, David Rientjes wrote:
> On Mon, 9 Jul 2012, a...@linux-foundation.org wrote:
> 
> > From: Joe Perches 
> > Subject: checkpatch: Add acheck for use of sizeof without parenthesis
> > 
> > Kernel style uses parenthesis around sizeof.
> > 
> 
> Nack, there's a difference between "sizeof *task" and 
> "sizeof(struct task_struct)".  The former operates on a unary expression 
> and the latter operates on a type.  There are over 1000 occurrences in the 
> kernel where the sizeof operator, the former, is used on a unary 
> expression.

Huh?  Maybe I misunderstand you.

$ cat sizeof.c
#include 
#include 
#include 

struct foo {
int bar[20];
char *baz;
};

int main(int argc, char **argv)
{
struct foo bar;
struct foo *baz;

printf("1: %zu\n", sizeof(struct foo));
printf("2: %zu\n", sizeof bar);
printf("3: %zu\n", sizeof *baz); 

return 0;
}

$ gcc sizeof.c
$ ./a.out
1: 84
2: 84
3: 84

> --
> To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
> the body of a message to majord...@vger.kernel.org
> More majordomo info at  http://vger.kernel.org/majordomo-info.html
> Please read the FAQ at  http://www.tux.org/lkml/
> 



--
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Please read the FAQ at  http://www.tux.org/lkml/

[PATCH 0/3] shmem/tmpfs: three late patches

2012-07-09 Thread Hugh Dickins

Here's three little shmem/tmpfs patches against v3.5-rc6.
Either the first should go in before v3.5 final, or it should not go
in at all.  The second and third are independent of it: I'd like them
in v3.5, but don't have a clinching argument: see what you think.

[PATCH 1/3] tmpfs: revert SEEK_DATA and SEEK_HOLE
[PATCH 2/3] shmem: fix negative rss in memcg memory.stat
[PATCH 3/3] shmem: cleanup shmem_add_to_page_cache

 mm/shmem.c |  193 +++
 1 file changed, 58 insertions(+), 135 deletions(-)

Thanks,
Hugh
--
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Please read the FAQ at  http://www.tux.org/lkml/

Re: [PATCH RFC 2/2] kvm PLE handler: Choose better candidate for directed yield

2012-07-09 Thread Rik van Riel


On 07/09/2012 02:20 AM, Raghavendra K T wrote:


+bool kvm_arch_vcpu_check_and_update_eligible(struct kvm_vcpu *vcpu)
+{
+   bool eligible;
+
+   eligible = !vcpu->arch.plo.pause_loop_exited ||
+   (vcpu->arch.plo.pause_loop_exited&&
+vcpu->arch.plo.dy_eligible);
+
+   if (vcpu->arch.plo.pause_loop_exited)
+   vcpu->arch.plo.dy_eligible = !vcpu->arch.plo.dy_eligible;
+
+   return eligible;
+}


This is a nice simple mechanism to skip CPUs that were
eligible last time and had pause loop exits recently.

However, it could stand some documentation.  Please
add a good comment explaining how and why the algorithm
works, when arch.plo.pause_loop_exited is cleared, etc...

It would be good to make this heuristic understandable
to people who look at the code for the first time.

--
All rights reversed
--
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Please read the FAQ at  http://www.tux.org/lkml/

Re: [PATCH v3] printk: Have printk() never buffer its data

2012-07-09 Thread Joe Perches

On Tue, 2012-07-10 at 00:10 +0200, Kay Sievers wrote:
> On Mon, Jul 9, 2012 at 11:42 PM, Joe Perches  wrote:
> > On Sun, 2012-07-08 at 19:55 +0200, Kay Sievers wrote:
> >
> >> At the same time the CPU#2 prints the same warning with a continuation
> >> line, but the buffer from CPU#1 can not be flushed to the console, nor
> >> can the continuation line printk()s from CPU#2 be merged at this point.
> >> The consoles are still locked and busy with replaying the old log
> >> messages, so the new continuation data is just stored away in the record
> >> buffer as it is coming in.
> >> If the console would be registered a bit earlier, or the warning would
> >> happen a bit later, we would probably not see any of this.
> >>
> >> I can fake something like this just by holding the console semaphore
> >> over a longer time and printing continuation lines with different CPUs
> >> in a row.
> >>
> >> The patch below seems to work for me. It is also here:
> >>   
> >> http://git.kernel.org/?p=linux/kernel/git/kay/patches.git;a=blob;f=kmsg-merge-cont.patch;hb=HEAD
> >>
> >> It only applies cleanly on top of this patch:
> >>   
> >> http://git.kernel.org/?p=linux/kernel/git/kay/patches.git;a=blob;f=kmsg-syslog-1-byte-read.patch;hb=HEAD
> >>
> >
> > Hi Kay.
> >
> > I just ran a test with what's in Greg's driver-core -for-linus branch.
> >
> > One of the differences in dmesg is timestamping of consecutive
> > pr_("foo...)
> > followed directly by
> > pr_cont("bar...")
> >
> > For instance: (dmesg is 3.4, dmesg.0 is 3.5-rc6+)
> >
> > # grep MAP /var/log/dm* -A1
> > dmesg:[0.781687] ata_piix :00:1f.2: MAP [ P0 P2 P1 P3 ]
> > dmesg-[0.781707] ata2: port disabled--ignoring
> > --
> > dmesg.0:[0.948881] ata_piix :00:1f.2: MAP [
> > dmesg.0-[0.948883]  P0 P2 P1 P3 ]
> >
> > These messages originate starting at
> > drivers/ata/ata_piix.c:1354
> >
> > All the continuations are emitted with pr_cont.
> >
> > I think this output should still be coalesced without
> > timestamp deltas.  Perhaps the timestamping code can
> > still be reworked to avoid too small a delta producing
> > a new timestamp and another dmesg line.
> 
> Hmm, I don't see that.
> 
> If I do:
>   pr_info("[");
>   for (i = 0; i < 4; i++)
>pr_cont("%i ", i);
>   pr_cont("]\n");
> 
> I get:
>   6,173,0;[0 1 2 3 ]
> 
> And if I fill the cont buffer and forcefully hold the console sem
> during all that, and we can't merge anymore, I get:
>   6,167,0;[
>   4,168,0;0
>   4,169,0;1
>   4,170,0;2
>   4,171,0;3
>   4,172,0;]
> 
> But the output is still all fine for both lines:
>   [0.00] [0 1 2 3 ]
>   [0.00] [0 1 2 3 ]
> 
> What do I miss?

In this case the initial line is dev_info not pr_info
so there are the additional dict descriptors output to
/dev/kmsg as well.

Maybe that interferes with continuations.  Dunno.

--
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Please read the FAQ at  http://www.tux.org/lkml/

Re: [PATCH RFC 0/2] kvm: Improving directed yield in PLE handler

2012-07-09 Thread Rik van Riel


On 07/09/2012 02:20 AM, Raghavendra K T wrote:

Currently Pause Looop Exit (PLE) handler is doing directed yield to a
random VCPU on PL exit. Though we already have filtering while choosing
the candidate to yield_to, we can do better.

Problem is, for large vcpu guests, we have more probability of yielding
to a bad vcpu. We are not able to prevent directed yield to same guy who
has done PL exit recently, who perhaps spins again and wastes CPU.

Fix that by keeping track of who has done PL exit. So The Algorithm in series
give chance to a VCPU which has:

  (a) Not done PLE exit at all (probably he is preempted lock-holder)

  (b) VCPU skipped in last iteration because it did PL exit, and probably
  has become eligible now (next eligible lock holder)

Future enhancemnets:


Your patch series looks good to me. Simple changes with a
significant result.

However, the simple heuristic could use some comments :)

--
All rights reversed
--
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Please read the FAQ at  http://www.tux.org/lkml/

Re: [PATCH 6/7] drivers/net/ethernet/broadcom/cnic.c: remove invalid reference to list iterator variable

2012-07-09 Thread David Miller

From: Julia Lawall 
Date: Sun,  8 Jul 2012 13:37:43 +0200

> From: Julia Lawall 
> 
> If list_for_each_entry, etc complete a traversal of the list, the iterator
> variable ends up pointing to an address at an offset from the list head,
> and not a meaningful structure.  Thus this value should not be used after
> the end of the iterator.  There does not seem to be a meaningful value to
> provide to netdev_warn.  Replace with pr_warn, since pr_err is used
> elsewhere.
> 
> This problem was found using Coccinelle (http://coccinelle.lip6.fr/).
> 
> Signed-off-by: Julia Lawall 

Applied.
--
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Please read the FAQ at  http://www.tux.org/lkml/

Re: [PATCH 2/7] net/rxrpc/ar-peer.c: remove invalid reference to list iterator variable

2012-07-09 Thread David Miller

From: Julia Lawall 
Date: Sun,  8 Jul 2012 13:37:39 +0200

> From: Julia Lawall 
> 
> If list_for_each_entry, etc complete a traversal of the list, the iterator
> variable ends up pointing to an address at an offset from the list head,
> and not a meaningful structure.  Thus this value should not be used after
> the end of the iterator.  This seems to be a copy-paste bug from a previous
> debugging message, and so the meaningless value is just deleted.
> 
> This problem was found using Coccinelle (http://coccinelle.lip6.fr/).
> 
> Signed-off-by: Julia Lawall 

Applied.
--
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Please read the FAQ at  http://www.tux.org/lkml/

Re: [PATCH 1/7] drivers/isdn/mISDN/stack.c: remove invalid reference to list iterator variable

2012-07-09 Thread David Miller

From: Julia Lawall 
Date: Sun,  8 Jul 2012 13:37:38 +0200

> From: Julia Lawall 
> 
> If list_for_each_entry, etc complete a traversal of the list, the iterator
> variable ends up pointing to an address at an offset from the list head,
> and not a meaningful structure.  Thus this value should not be used after
> the end of the iterator.  The dereferences are just deleted from the
> debugging statement.
> 
> This problem was found using Coccinelle (http://coccinelle.lip6.fr/).
> 
> Signed-off-by: Julia Lawall 

Applied.
--
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Please read the FAQ at  http://www.tux.org/lkml/

Re: + checkpatch-add-check-for-use-of-sizeof-without-parenthesis.patch added to -mm tree

2012-07-09 Thread David Rientjes

On Mon, 9 Jul 2012, a...@linux-foundation.org wrote:

> From: Joe Perches 
> Subject: checkpatch: Add acheck for use of sizeof without parenthesis
> 
> Kernel style uses parenthesis around sizeof.
> 

Nack, there's a difference between "sizeof *task" and 
"sizeof(struct task_struct)".  The former operates on a unary expression 
and the latter operates on a type.  There are over 1000 occurrences in the 
kernel where the sizeof operator, the former, is used on a unary 
expression.
--
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Please read the FAQ at  http://www.tux.org/lkml/

Re: [PATCH 2/3] PCI: reimplement P2P bridge 1K I/O windows (Intel P64H2)

2012-07-09 Thread Bjorn Helgaas

On Mon, Jul 9, 2012 at 3:43 PM, Yinghai Lu  wrote:
> On Mon, Jul 9, 2012 at 1:32 PM, Bjorn Helgaas  wrote:
>>
>> Note that the bridge window assignment code, e.g., pbus_size_io(), should
>> pay attention to dev->io_window_1k, too, but I didn't fix that.
>
> Please check attached patch that will fix pbus_size_io.

Thanks for posting this again.  I had looked for it, but couldn't find it.

> You may fold the patch in your patch, or could split your patch to
> two. First one
> only add io_window_1k, and second one will use io_window_1k. Then could put my
> patch between them.

What bad things would happen if I just appended your patch to the end
of this series?  Would that break bisection in some scenario?

Bjorn
--
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Please read the FAQ at  http://www.tux.org/lkml/

Re: [PATCH 2/2] fat (exportfs): reconnect file handles to evicted inodes/dentries

2012-07-09 Thread OGAWA Hirofumi

"Steven J. Magnani"  writes:

>> Hm, not really, if the file handle is including parent ino. ext2 will
>> get the latest parent ino, because it checks parent of inode of file
>> handle.
>
> Can you point me to the code for this? The code I see looks pretty
> congruent to what I think the FAT code would be.
>
>> But if the file handle is including parent ino and we believe it is
>> parent, I think NFS server can be return the old parent. The difference
>> is the result of ->get_parent().
>
> I'm a little confused about which function we're discussing here.
> fat_get_parent() isn't called with a file handle. fat_fh_to_parent() is,
> but it is only called by exportfs_decode_fh() and I am reasonably sure
> that that function is handling the case you're concerned about.

Oh, you are right. I should discuss about it based on the new patch, and
only if there is new usage of file handle (e.g. parent->i_no).
-- 
OGAWA Hirofumi 
--
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Please read the FAQ at  http://www.tux.org/lkml/

Re: [PATCH] drivers/rtc/rtc-twl.c: fix threaded IRQ to use IRQF_ONESHOT

2012-07-09 Thread Andrew Morton

On Fri,  6 Jul 2012 09:33:54 -0700
Kevin Hilman  wrote:

> Requesting a threaded interrupt without a primary handler and without
> IRQF_ONESHOT is dangerous, and after commit 1c6c6952 (genirq: Reject
> bogus threaded irq requests), these requests are rejected.  This
> causes ->probe() to fail, and the RTC driver not to be availble.
> 
> To fix, add IRQF_ONESHOT to the IRQ flags.
> 
> Tested on OMAP3730/OveroSTORM and OMAP4430/Panda board using rtcwake
> to wake from system suspend multiple times.
> 
> Signed-off-by: Kevin Hilman 
> ---
> Resending to broader audience and including Andrew.  Since, I understand
> that drivers/rtc is somewhat orphaned, Andrew, can you queue this fix for
> v3.5.  Thanks.
> 
>  drivers/rtc/rtc-twl.c |2 +-
>  1 file changed, 1 insertion(+), 1 deletion(-)
> 
> diff --git a/drivers/rtc/rtc-twl.c b/drivers/rtc/rtc-twl.c
> index 258abea..c5d06fe 100644
> --- a/drivers/rtc/rtc-twl.c
> +++ b/drivers/rtc/rtc-twl.c
> @@ -510,7 +510,7 @@ static int __devinit twl_rtc_probe(struct platform_device 
> *pdev)
>   }
>  
>   ret = request_threaded_irq(irq, NULL, twl_rtc_interrupt,
> -IRQF_TRIGGER_RISING,
> +IRQF_TRIGGER_RISING | IRQF_ONESHOT,
>  dev_name(>dev), rtc);
>   if (ret < 0) {
>   dev_err(>dev, "IRQ is not free.\n");

OK, this is the second such patch I've seen and it's time to wonder if
we should get grumpy at tglx.  afacit 1c6c6952 broke the following
drivers:


sound/soc/codecs/wm8994.c
sound/soc/codecs/max98095.c
sound/soc/codecs/twl6040.c
drivers/usb/otg/ab8500-usb.c
drivers/usb/otg/twl4030-usb.c
drivers/gpio/gpio-sx150x.c
drivers/gpio/gpio-ab8500.c
drivers/mfd/ab8500-gpadc.c
drivers/mfd/ti-ssp.c
drivers/mfd/twl4030-madc.c
drivers/mfd/htc-i2cpld.c
drivers/mfd/wm831x-auxadc.c
drivers/mfd/twl6040-core.c
drivers/mfd/wm8350-core.c
drivers/extcon/extcon-max8997.c
drivers/mmc/host/of_mmc_spi.c
drivers/mmc/core/cd-gpio.c
drivers/net/can/mcp251x.c
drivers/nfc/pn544_hci.c
drivers/nfc/pn544.c
drivers/power/ab8500_btemp.c
drivers/power/twl4030_charger.c
drivers/power/lp8727_charger.c
drivers/power/smb347-charger.c
drivers/power/max17042_battery.c
drivers/power/wm831x_power.c
drivers/power/ab8500_fg.c
drivers/power/max8903_charger.c
drivers/power/ab8500_charger.c
drivers/regulator/wm831x-isink.c
drivers/regulator/wm831x-ldo.c
drivers/regulator/wm831x-dcdc.c
drivers/staging/ste_rmi4/synaptics_i2c_rmi4.c
drivers/staging/iio/adc/adt7310.c
drivers/staging/iio/adc/adt7410.c
drivers/staging/iio/adc/ad7816.c
drivers/staging/iio/cdc/ad7150.c
drivers/staging/iio/accel/sca3000_core.c
drivers/staging/cptm1217/clearpad_tm1217.c
drivers/input/keyboard/tc3589x-keypad.c
drivers/input/keyboard/twl4030_keypad.c
drivers/input/misc/twl4030-pwrbutton.c
drivers/input/misc/twl6040-vibra.c
drivers/input/misc/wm831x-on.c
drivers/media/radio/si470x/radio-si470x-i2c.c
drivers/base/regmap/regmap-irq.c
drivers/rtc/rtc-wm831x.c
drivers/rtc/rtc-twl.c
drivers/rtc/rtc-ab8500.c
drivers/rtc/rtc-max8998.c
drivers/rtc/rtc-isl1208.c
drivers/platform/x86/intel_mid_powerbtn.c
include/linux/mfd/wm8994/core.h
include/linux/mfd/wm8350/core.h

what am I missing here?
--
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Please read the FAQ at  http://www.tux.org/lkml/

Re: [PATCH v3] printk: Have printk() never buffer its data

2012-07-09 Thread Kay Sievers

On Mon, Jul 9, 2012 at 11:42 PM, Joe Perches  wrote:
> On Sun, 2012-07-08 at 19:55 +0200, Kay Sievers wrote:
>
>> At the same time the CPU#2 prints the same warning with a continuation
>> line, but the buffer from CPU#1 can not be flushed to the console, nor
>> can the continuation line printk()s from CPU#2 be merged at this point.
>> The consoles are still locked and busy with replaying the old log
>> messages, so the new continuation data is just stored away in the record
>> buffer as it is coming in.
>> If the console would be registered a bit earlier, or the warning would
>> happen a bit later, we would probably not see any of this.
>>
>> I can fake something like this just by holding the console semaphore
>> over a longer time and printing continuation lines with different CPUs
>> in a row.
>>
>> The patch below seems to work for me. It is also here:
>>   
>> http://git.kernel.org/?p=linux/kernel/git/kay/patches.git;a=blob;f=kmsg-merge-cont.patch;hb=HEAD
>>
>> It only applies cleanly on top of this patch:
>>   
>> http://git.kernel.org/?p=linux/kernel/git/kay/patches.git;a=blob;f=kmsg-syslog-1-byte-read.patch;hb=HEAD
>>
>
> Hi Kay.
>
> I just ran a test with what's in Greg's driver-core -for-linus branch.
>
> One of the differences in dmesg is timestamping of consecutive
> pr_("foo...)
> followed directly by
> pr_cont("bar...")
>
> For instance: (dmesg is 3.4, dmesg.0 is 3.5-rc6+)
>
> # grep MAP /var/log/dm* -A1
> dmesg:[0.781687] ata_piix :00:1f.2: MAP [ P0 P2 P1 P3 ]
> dmesg-[0.781707] ata2: port disabled--ignoring
> --
> dmesg.0:[0.948881] ata_piix :00:1f.2: MAP [
> dmesg.0-[0.948883]  P0 P2 P1 P3 ]
>
> These messages originate starting at
> drivers/ata/ata_piix.c:1354
>
> All the continuations are emitted with pr_cont.
>
> I think this output should still be coalesced without
> timestamp deltas.  Perhaps the timestamping code can
> still be reworked to avoid too small a delta producing
> a new timestamp and another dmesg line.

Hmm, I don't see that.

If I do:
  pr_info("[");
  for (i = 0; i < 4; i++)
   pr_cont("%i ", i);
  pr_cont("]\n");

I get:
  6,173,0;[0 1 2 3 ]

And if I fill the cont buffer and forcefully hold the console sem
during all that, and we can't merge anymore, I get:
  6,167,0;[
  4,168,0;0
  4,169,0;1
  4,170,0;2
  4,171,0;3
  4,172,0;]

But the output is still all fine for both lines:
  [0.00] [0 1 2 3 ]
  [0.00] [0 1 2 3 ]

What do I miss?

Kay
--
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Please read the FAQ at  http://www.tux.org/lkml/

[PATCH RFT] regulator: max8952: Use core GPIO enable support

2012-07-09 Thread Axel Lin

Signed-off-by: Axel Lin 
---
 drivers/regulator/max8952.c |   59 +++
 1 file changed, 4 insertions(+), 55 deletions(-)

diff --git a/drivers/regulator/max8952.c b/drivers/regulator/max8952.c
index ea58a44..355ca7b 100644
--- a/drivers/regulator/max8952.c
+++ b/drivers/regulator/max8952.c
@@ -51,7 +51,6 @@ struct max8952_data {
 
bool vid0;
bool vid1;
-   bool en;
 };
 
 static int max8952_read_reg(struct max8952_data *max8952, u8 reg)
@@ -80,38 +79,6 @@ static int max8952_list_voltage(struct regulator_dev *rdev,
return (max8952->pdata->dvs_mode[selector] * 10 + 770) * 1000;
 }
 
-static int max8952_is_enabled(struct regulator_dev *rdev)
-{
-   struct max8952_data *max8952 = rdev_get_drvdata(rdev);
-   return max8952->en;
-}
-
-static int max8952_enable(struct regulator_dev *rdev)
-{
-   struct max8952_data *max8952 = rdev_get_drvdata(rdev);
-
-   /* If not valid, assume "ALWAYS_HIGH" */
-   if (gpio_is_valid(max8952->pdata->gpio_en))
-   gpio_set_value(max8952->pdata->gpio_en, 1);
-
-   max8952->en = true;
-   return 0;
-}
-
-static int max8952_disable(struct regulator_dev *rdev)
-{
-   struct max8952_data *max8952 = rdev_get_drvdata(rdev);
-
-   /* If not valid, assume "ALWAYS_HIGH" -> not permitted */
-   if (gpio_is_valid(max8952->pdata->gpio_en))
-   gpio_set_value(max8952->pdata->gpio_en, 0);
-   else
-   return -EPERM;
-
-   max8952->en = false;
-   return 0;
-}
-
 static int max8952_get_voltage_sel(struct regulator_dev *rdev)
 {
struct max8952_data *max8952 = rdev_get_drvdata(rdev);
@@ -146,9 +113,6 @@ static int max8952_set_voltage_sel(struct regulator_dev 
*rdev,
 
 static struct regulator_ops max8952_ops = {
.list_voltage   = max8952_list_voltage,
-   .is_enabled = max8952_is_enabled,
-   .enable = max8952_enable,
-   .disable= max8952_disable,
.get_voltage_sel= max8952_get_voltage_sel,
.set_voltage_sel= max8952_set_voltage_sel,
 };
@@ -193,6 +157,10 @@ static int __devinit max8952_pmic_probe(struct i2c_client 
*client,
config.init_data = >reg_data;
config.driver_data = max8952;
 
+   config.ena_gpio = pdata->gpio_en;
+   if (pdata->reg_data.constraints.boot_on)
+   config.ena_gpio_flags |= GPIOF_OUT_INIT_HIGH;
+
max8952->rdev = regulator_register(, );
 
if (IS_ERR(max8952->rdev)) {
@@ -201,27 +169,9 @@ static int __devinit max8952_pmic_probe(struct i2c_client 
*client,
return ret;
}
 
-   max8952->en = !!(pdata->reg_data.constraints.boot_on);
max8952->vid0 = pdata->default_mode & 0x1;
max8952->vid1 = (pdata->default_mode >> 1) & 0x1;
 
-   if (gpio_is_valid(pdata->gpio_en)) {
-   if (!gpio_request(pdata->gpio_en, "MAX8952 EN"))
-   gpio_direction_output(pdata->gpio_en, max8952->en);
-   else
-   err = 1;
-   } else
-   err = 2;
-
-   if (err) {
-   dev_info(max8952->dev, "EN gpio invalid: assume that EN"
-   "is always High\n");
-   max8952->en = 1;
-   pdata->gpio_en = -1; /* Mark invalid */
-   }
-
-   err = 0;
-
if (gpio_is_valid(pdata->gpio_vid0) &&
gpio_is_valid(pdata->gpio_vid1)) {
if (!gpio_request(pdata->gpio_vid0, "MAX8952 VID0"))
@@ -307,7 +257,6 @@ static int __devexit max8952_pmic_remove(struct i2c_client 
*client)
 
gpio_free(pdata->gpio_vid0);
gpio_free(pdata->gpio_vid1);
-   gpio_free(pdata->gpio_en);
return 0;
 }
 
-- 
1.7.9.5



--
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Please read the FAQ at  http://www.tux.org/lkml/

Re: [PATCH 03/19 v2] Staging: xgifb: Remove LCDA detection in xgifb_probe().

2012-07-09 Thread Aaro Koskinen

Hi,

On Mon, Jul 09, 2012 at 11:39:47AM -0700, Greg KH wrote:
> On Fri, Jul 06, 2012 at 12:40:38PM +0200, Miguel Gómez wrote:
> > The piece of code that checks for LCDA in xgifb_probe() just checks for some
> > register values but doesn't really do anything in response to them (the 
> > actions
> > that should be executed are commented).
> > As nothing is really being done, the code can be safely removed.
> 
> Are you sure?  Some devices (foolish ones that is) need registers read
> to work properly as that is part of their start-up logic.  Yeah, it
> doesn't seem to make sense, but hardware designers usually have good
> reasons for doing this type of thing.  Or at least they think they do :)

Those registers (CR 0x30..0x38) are scratch registers, so reading them
should have no side effects in the device side. I think it's safe to
delete that code.

A.
--
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Please read the FAQ at  http://www.tux.org/lkml/

Re: [PATCH 2/2] fat (exportfs): reconnect file handles to evicted inodes/dentries

2012-07-09 Thread Steven J. Magnani

On Tue, 2012-07-10 at 06:34 +0900, OGAWA Hirofumi wrote: 
> "Steven J. Magnani"  writes:
> 
> >> Ah, i_ino. I was talking about i_pos. Well, so, what happens if the
> >> child was renamed to other parent on NFS server machine (not via nfs
> >> client)? The file handle would be including the old i_ino, and the old
> >> i_ino on file handle is still vaild as old parent. So, it returns the
> >> wrong parent?
> >
> > Yes, but I believe exportfs_decode_fh() handles that case:
> >
> >   /*
> >* Now that we've got both a well-connected parent and a
> >* dentry for the inode we're after, make sure that our
> >* inode is actually connected to the parent.
> >*/
> >
> >
> > Really, the FAT NFS code will pretty much parallel that of ext2.
> 
> Hm, not really, if the file handle is including parent ino. ext2 will
> get the latest parent ino, because it checks parent of inode of file
> handle.

Can you point me to the code for this? The code I see looks pretty
congruent to what I think the FAT code would be.

> But if the file handle is including parent ino and we believe it is
> parent, I think NFS server can be return the old parent. The difference
> is the result of ->get_parent().

I'm a little confused about which function we're discussing here.
fat_get_parent() isn't called with a file handle. fat_fh_to_parent() is,
but it is only called by exportfs_decode_fh() and I am reasonably sure
that that function is handling the case you're concerned about.

Steven J. Magnani   "I claim this network for MARS!
www.digidescorp.com  Earthling, return my space modulator!"

#include 



--
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Please read the FAQ at  http://www.tux.org/lkml/

Re: [RESEND AGAIN][PATCH] pcmcia: move unbind/rebind into dev_pm_ops.complete

2012-07-09 Thread Andrew Morton

On Fri, 6 Jul 2012 14:30:16 -0700
Greg KH  wrote:

> On Fri, Jul 06, 2012 at 11:23:52PM +0200, Christian Lamparter wrote:
> > The idea of moving rebind procedure into pm.complete
> > was taken from the usb-subsystem, which has similar
> > problems with reattaching devices during/after
> > resume.
> > 
> > Signed-off-by: Christian Lamparter 
> > ---
> > To Greg:
> > 
> > I have submitted this patch back in March and again in May.
> > As far as I can tell it was neither rejected nor was it
> > accepted into linux-pcmcia.git since. So I'm asking you,
> > if you could take the patch instead... please.
> 
> There is a PCMCIA "team" who should be taking these types of patches.
> Why are they not doing so?
> 

Things are pretty quiet in pcmcia world, but Dominik does appear to
still be doing stuff.

I sometimes queue PCMCIA patches for people, but not this one.  The
changelog is just junk.  What does the patch do?  Why does it do it? 
What problems does it solve?  What are these mysterious "problems with
reattaching devices" to which it refers?  Useless...


--
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Please read the FAQ at  http://www.tux.org/lkml/

Re: [PATCH] driver core: fixup reversed deferred probe order

2012-07-09 Thread Grant Likely

On Wed, May 30, 2012 at 2:46 AM, Kuninori Morimoto
 wrote:
> If driver requests probe deferral,
> it will be added to deferred_probe_pending_list
> by driver_deferred_probe_add(), but, it used list_add().
> Because of that, deferred probe will be run as reversed order.
> This patch uses list_add_tail(), and solved this issue.
>
> Signed-off-by: Kuninori Morimoto 

Acked-by: Grant Likely 

(But not tested; I'm not in a position to test anything at the moment)  :-(

g.

> ---
>  drivers/base/dd.c |2 +-
>  1 files changed, 1 insertions(+), 1 deletions(-)
>
> diff --git a/drivers/base/dd.c b/drivers/base/dd.c
> index 1b1cbb5..dcb8a6e 100644
> --- a/drivers/base/dd.c
> +++ b/drivers/base/dd.c
> @@ -100,7 +100,7 @@ static void driver_deferred_probe_add(struct device *dev)
> mutex_lock(_probe_mutex);
> if (list_empty(>p->deferred_probe)) {
> dev_dbg(dev, "Added to deferred list\n");
> -   list_add(>p->deferred_probe, 
> _probe_pending_list);
> +   list_add_tail(>p->deferred_probe, 
> _probe_pending_list);
> }
> mutex_unlock(_probe_mutex);
>  }
> --
> 1.7.5.4
>
> --
> To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
> the body of a message to majord...@vger.kernel.org
> More majordomo info at  http://vger.kernel.org/majordomo-info.html
> Please read the FAQ at  http://www.tux.org/lkml/



-- 
Grant Likely, B.Sc., P.Eng.
Secret Lab Technologies Ltd.
--
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Please read the FAQ at  http://www.tux.org/lkml/

Re: [PATCH] gpiolib: Defer failed gpio requests by default

2012-07-09 Thread Grant Likely

On Mon, Jul 9, 2012 at 9:31 PM, Linus Walleij  wrote:
> On Mon, Jul 9, 2012 at 1:22 PM, Mark Brown
>  wrote:
>
>> Since users must be explicitly provided with a GPIO number in order to
>> request one the overwhelmingly common case for failing to request will
>> be that the required GPIO driver has not yet registered and we should
>> therefore defer until it has registered.
>>
>> In order to avoid having to code this logic in individual drivers have
>> gpio_request() return -EPROBE_DEFER when failing to look up the GPIO.
>> Drivers which don't want this behaviour can override it if they desire.
>>
>> Signed-off-by: Mark Brown 
>
> While this makes perfect sense to me I would *really* like to
> wait for Grants opinion on this one patch, him having devised
> the deferral and being GPIO maintainer.
>
> Is any deferral of this deferral mechanism causing you to
> defer important work right now?

I'm fine with this patch, but the patch that adds the twizzling of the
dpm_list when probing needs some tweaking, and this patch must be
applied after that one.  I'll go and reply to that patch now (and cc
you if you're not already).

g.
--
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Please read the FAQ at  http://www.tux.org/lkml/

Re: [PATCH] net: cgroup: fix out of bounds accesses

2012-07-09 Thread David Miller

From: Gao feng 
Date: Mon, 09 Jul 2012 16:15:29 +0800

> 于 2012年07月09日 15:45, Eric Dumazet 写道:
>> From: Eric Dumazet 
>> 
>> dev->priomap is allocated by extend_netdev_table() called from
>> update_netdev_tables().
>> And this is only called if write_priomap() is called.
>> 
>> But if write_priomap() is not called, it seems we can have out of bounds
>> accesses in cgrp_destroy(), read_priomap() & skb_update_prio()
>> 
>> With help from Gao Feng
>> 
>> Signed-off-by: Eric Dumazet 
>> Cc: Neil Horman 
>> Cc: Gao feng 
>> ---
>> net/core/dev.c|8 ++--
>> net/core/netprio_cgroup.c |4 ++--
>> 2 files changed, 8 insertions(+), 4 deletions(-)
> 
> Acked-by: Gao feng 

Applied.
--
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Please read the FAQ at  http://www.tux.org/lkml/

Re: [PATCH 2/2] bonding: debugfs and network namespaces are incompatible

2012-07-09 Thread David Miller

From: ebied...@xmission.com (Eric W. Biederman)
Date: Mon, 09 Jul 2012 13:52:43 -0700

> 
> The bonding debugfs support has been broken in the presence of network
> namespaces since it has been added.  The debugfs support does not handle
> multiple bonding devices with the same name in different network
> namespaces.
> 
> I haven't had any bug reports, and I'm not interested in getting any.
> Disable the debugfs support when network namespaces are enabled.
> 
> Signed-off-by: "Eric W. Biederman" 

Applied.
--
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Please read the FAQ at  http://www.tux.org/lkml/

Re: [PATCH 1/2] bonding: Manage /proc/net/bonding/ entries from the netdev events

2012-07-09 Thread David Miller

From: ebied...@xmission.com (Eric W. Biederman)
Date: Mon, 09 Jul 2012 13:51:45 -0700

> 
> It was recently reported that moving a bonding device between network
> namespaces causes warnings from /proc.  It turns out after the move we
> were trying to add and to remove the /proc/net/bonding entries from the
> wrong network namespace.
> 
> Move the bonding /proc registration code into the NETDEV_REGISTER and
> NETDEV_UNREGISTER events where the proc registration and unregistration
> will always happen at the right time.
> 
> Signed-off-by: "Eric W. Biederman" 

Applied.
--
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Please read the FAQ at  http://www.tux.org/lkml/

Re: [PATCH RFC 0/2] kvm: Improving directed yield in PLE handler

2012-07-09 Thread Andrew Theurer

On Mon, 2012-07-09 at 11:50 +0530, Raghavendra K T wrote:
> Currently Pause Looop Exit (PLE) handler is doing directed yield to a
> random VCPU on PL exit. Though we already have filtering while choosing
> the candidate to yield_to, we can do better.

Hi, Raghu.

> Problem is, for large vcpu guests, we have more probability of yielding
> to a bad vcpu. We are not able to prevent directed yield to same guy who
> has done PL exit recently, who perhaps spins again and wastes CPU.
> 
> Fix that by keeping track of who has done PL exit. So The Algorithm in series
> give chance to a VCPU which has:
> 
>  (a) Not done PLE exit at all (probably he is preempted lock-holder)
> 
>  (b) VCPU skipped in last iteration because it did PL exit, and probably
>  has become eligible now (next eligible lock holder)
> 
> Future enhancemnets:
>   (1) Currently we have a boolean to decide on eligibility of vcpu. It
> would be nice if I get feedback on guest (>32 vcpu) whether we can
> improve better with integer counter. (with counter = say f(log n )).
>   
>   (2) We have not considered system load during iteration of vcpu. With
>that information we can limit the scan and also decide whether schedule()
>is better. [ I am able to use #kicked vcpus to decide on this But may
>be there are better ideas like information from global loadavg.]
> 
>   (3) We can exploit this further with PV patches since it also knows about
>next eligible lock-holder.
> 
> Summary: There is a huge improvement for moderate / no overcommit scenario
>  for kvm based guest on PLE machine (which is difficult ;) ).
> 
> Result:
> Base : kernel 3.5.0-rc5 with Rik's Ple handler fix
> 
> Machine : Intel(R) Xeon(R) CPU X7560  @ 2.27GHz, 4 numa node, 256GB RAM,
>   32 core machine

Is this with HT enabled, therefore 64 CPU threads?

> Host: enterprise linux  gcc version 4.4.6 20120305 (Red Hat 4.4.6-4) (GCC)
>   with test kernels 
> 
> Guest: fedora 16 with 32 vcpus 8GB memory. 

Can you briefly explain the 1x and 2x configs?  This of course is highly
dependent whether or not HT is enabled...

FWIW, I started testing what I would call "0.5x", where I have one 40
vcpu guest running on a host with 40 cores and 80 CPU threads total (HT
enabled, no extra load on the system).  For ebizzy, the results are
quite erratic from run to run, so I am inclined to discard it as a
workload, but maybe I should try "1x" and "2x" cpu over-commit as well.

>From initial observations, at least for the ebizzy workload, the
percentage of exits that result in a yield_to() are very low, around 1%,
before these patches.  So, I am concerned that at least for this test,
reducing that number even more has diminishing returns.  I am however
still concerned about the scalability problem with yield_to(), which
shows like this for me (perf):

> 63.56% 282095 qemu-kvm  [kernel.kallsyms][k] 
> _raw_spin_lock  
> 5.42%  24420 qemu-kvm  [kvm][k] 
> kvm_vcpu_yield_to   
> 5.33%  26481 qemu-kvm  [kernel.kallsyms][k] get_pid_task  
>   
> 4.35%  20049 qemu-kvm  [kernel.kallsyms][k] yield_to  
>   
> 2.74%  15652 qemu-kvm  [kvm][k] 
> kvm_apic_present
> 1.70%   8657 qemu-kvm  [kvm][k] 
> kvm_vcpu_on_spin
> 1.45%   7889 qemu-kvm  [kvm][k] 
> vcpu_enter_guest

For the cpu threads in the host that are actually active (in this case
1/2 of them), ~50% of their time is in kernel and ~43% in guest.  This
is for a no-IO workload, so that's just incredible to see so much cpu
wasted.  I feel that 2 important areas to tackle are a more scalable
yield_to() and reducing the number of pause exits itself (hopefully by
just tuning ple_window for the latter).

Honestly, I not confident addressing this problem will improve the
ebizzy score. That workload is so erratic for me, that I do not trust
the results at all.  I have however seen consistent improvements in
disabling PLE for a http guest workload and a very high IOPS guest
workload, both with much time spent in host in the double runqueue lock
for yield_to(), so that's why I still gravitate toward that issue.

-Andrew Theurer

--
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Please read the FAQ at  http://www.tux.org/lkml/

Re: [PATCH 2/3] PCI: reimplement P2P bridge 1K I/O windows (Intel P64H2)

2012-07-09 Thread Yinghai Lu

On Mon, Jul 9, 2012 at 1:32 PM, Bjorn Helgaas  wrote:
>
> Note that the bridge window assignment code, e.g., pbus_size_io(), should
> pay attention to dev->io_window_1k, too, but I didn't fix that.

Please check attached patch that will fix pbus_size_io.

You may fold the patch in your patch, or could split your patch to
two. First one
only add io_window_1k, and second one will use io_window_1k. Then could put my
patch between them.

Thanks

Yinghai

io_window_1k.patch
Description: Binary data

Re: [PATCH v3] printk: Have printk() never buffer its data

2012-07-09 Thread Joe Perches

On Sun, 2012-07-08 at 19:55 +0200, Kay Sievers wrote:

> At the same time the CPU#2 prints the same warning with a continuation
> line, but the buffer from CPU#1 can not be flushed to the console, nor
> can the continuation line printk()s from CPU#2 be merged at this point.
> The consoles are still locked and busy with replaying the old log
> messages, so the new continuation data is just stored away in the record
> buffer as it is coming in.
> If the console would be registered a bit earlier, or the warning would
> happen a bit later, we would probably not see any of this.
> 
> I can fake something like this just by holding the console semaphore
> over a longer time and printing continuation lines with different CPUs
> in a row.
> 
> The patch below seems to work for me. It is also here:
>   
> http://git.kernel.org/?p=linux/kernel/git/kay/patches.git;a=blob;f=kmsg-merge-cont.patch;hb=HEAD
> 
> It only applies cleanly on top of this patch:
>   
> http://git.kernel.org/?p=linux/kernel/git/kay/patches.git;a=blob;f=kmsg-syslog-1-byte-read.patch;hb=HEAD
> 

Hi Kay.

I just ran a test with what's in Greg's driver-core -for-linus branch.

One of the differences in dmesg is timestamping of consecutive
pr_("foo...)
followed directly by
pr_cont("bar...")

For instance: (dmesg is 3.4, dmesg.0 is 3.5-rc6+)

# grep MAP /var/log/dm* -A1
dmesg:[0.781687] ata_piix :00:1f.2: MAP [ P0 P2 P1 P3 ]
dmesg-[0.781707] ata2: port disabled--ignoring
--
dmesg.0:[0.948881] ata_piix :00:1f.2: MAP [
dmesg.0-[0.948883]  P0 P2 P1 P3 ]

These messages originate starting at
drivers/ata/ata_piix.c:1354

All the continuations are emitted with pr_cont.

I think this output should still be coalesced without
timestamp deltas.  Perhaps the timestamping code can
still be reworked to avoid too small a delta producing
a new timestamp and another dmesg line.

cheers, Joe

--
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Please read the FAQ at  http://www.tux.org/lkml/

Re: linux-next: Tree for July 2 (crypto/hifn_795x)

2012-07-09 Thread Randy Dunlap

On 07/09/2012 10:54 AM, Jussi Kivilinna wrote:

> Quoting Randy Dunlap :
> 
>> On 07/02/2012 12:23 AM, Stephen Rothwell wrote:
>>
>>> Hi all,
>>>
>>> Changes since 20120629:
>>>
>>
>>
>> on i386:
>>
>>
>> ERROR: "__divdi3" [drivers/crypto/hifn_795x.ko] undefined!
>>
> 
> This is caused by commit feb7b7ab928afa97a79a9c424e4e0691f49d63be. hifn_795x 
> has "DIV_ROUND_UP(NSEC_PER_SEC, dev->pk_clk_freq)", which should be changed 
> to DIV_ROUND_UP_ULL now that NSEC_PER_SEC is 64bit on 32bit archs. Patch to 
> fix hifn_795x is attached (only compile tested).



Acked-by: Randy Dunlap 

Thanks.


-- 
~Randy
--
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Please read the FAQ at  http://www.tux.org/lkml/

[patch 2/2] util-linux: resizepart: Utility to resize a partition

2012-07-09 Thread vgoyal


A simple user space utility to resize an existing partition. It tries to read
the start of partiton from sysfs.

This is a real quick dirty patch I used for my testing. I am sure there
are better and faster ways of getting to partition "start" from device
and partition number.

Signed-off-by: Vivek Goyal 
---
 disk-utils/Makemodule.am |7 +++-
 disk-utils/partx.h   |   19 +
 disk-utils/resizepart.8  |   38 ++
 disk-utils/resizepart.c  |   98 ++
 4 files changed, 161 insertions(+), 1 deletions(-)
 create mode 100644 disk-utils/resizepart.8
 create mode 100644 disk-utils/resizepart.c

diff --git a/disk-utils/Makemodule.am b/disk-utils/Makemodule.am
index 830e8f7..b544e87 100644
--- a/disk-utils/Makemodule.am
+++ b/disk-utils/Makemodule.am
@@ -113,7 +113,7 @@ endif # LINUX
 
 
 if BUILD_PARTX
-usrsbin_exec_PROGRAMS += partx addpart delpart
+usrsbin_exec_PROGRAMS += partx addpart delpart resizepart
 dist_man_MANS += \
disk-utils/addpart.8 \
disk-utils/delpart.8 \
@@ -130,6 +130,11 @@ delpart_SOURCES = \
disk-utils/partx.h
 delpart_LDADD = $(LDADD) libcommon.la
 
+resizepart_SOURCES = \
+   disk-utils/resizepart.c \
+   disk-utils/partx.h
+resizepart_LDADD = $(LDADD) libcommon.la
+
 partx_SOURCES = \
disk-utils/partx.c \
disk-utils/partx.h
diff --git a/disk-utils/partx.h b/disk-utils/partx.h
index ed0fd0a..02e273e 100644
--- a/disk-utils/partx.h
+++ b/disk-utils/partx.h
@@ -41,4 +41,23 @@ static inline int partx_add_partition(int fd, int partno,
return ioctl(fd, BLKPG, );
 }
 
+static inline int partx_resize_partition(int fd, int partno,
+   long long start, long long size)
+{
+   struct blkpg_ioctl_arg a;
+   struct blkpg_partition p;
+
+   p.pno = partno;
+   p.start = start << 9;
+   p.length = size << 9;
+   p.devname[0] = 0;
+   p.volname[0] = 0;
+   a.op = BLKPG_RESIZE_PARTITION;
+   a.flags = 0;
+   a.datalen = sizeof(p);
+   a.data = 
+
+   return ioctl(fd, BLKPG, );
+}
+
 #endif /*  UTIL_LINUX_PARTX_H */
diff --git a/disk-utils/resizepart.8 b/disk-utils/resizepart.8
new file mode 100644
index 000..c009cc3
--- /dev/null
+++ b/disk-utils/resizepart.8
@@ -0,0 +1,38 @@
+.\" resizepart.8 --
+.\" Copyright 2012 Vivek Goyal 
+.\" Copyright 2012 Red Hat, Inc.
+.\" May be distributed under the GNU General Public License
+.TH RESIZEPART 8 "February 2012" "util-linux" "System Administration"
+.SH NAME
+resizepart \-
+simple wrapper around the "resize partition" ioctl
+.SH SYNOPSIS
+.B resizepart
+.I device partition length
+.SH DESCRIPTION
+.B resizepart
+is a program that informs the Linux kernel of new partition size.
+
+This command doesn't manipulate partitions on hard drive.
+
+.SH PARAMETERS
+.TP
+.I device
+Specify the disk device.
+.TP
+.I partition
+Specify the partition number.
+.TP
+.I length
+Specify the length of the partition (in 512-byte sectors).
+
+.SH SEE ALSO
+.BR addpart (8),
+.BR delpart (8),
+.BR fdisk (8),
+.BR parted (8),
+.BR partprobe (8),
+.BR partx (8)
+.SH AVAILABILITY
+The resizepart command is part of the util-linux package and is available from
+ftp://ftp.kernel.org/pub/linux/utils/util-linux/.
diff --git a/disk-utils/resizepart.c b/disk-utils/resizepart.c
new file mode 100644
index 000..4f9e9ce
--- /dev/null
+++ b/disk-utils/resizepart.c
@@ -0,0 +1,98 @@
+#include 
+#include 
+#include 
+#include 
+#include "canonicalize.h"
+#include "sysfs.h"
+#include "partx.h"
+
+char *
+get_devname_from_canonical_path(char *path)
+{
+   struct sysfs_cxt cxt;
+   dev_t devno;
+   char name[PATH_MAX];
+   char *devname;
+
+   devno = sysfs_devname_to_devno(path, NULL);
+   if (!devno) {
+   fprintf(stderr, "failed to read devno. \n");
+   exit(1);
+   }
+
+   if (sysfs_init(, devno, NULL)) {
+   fprintf(stderr, "failed to initialize sysfs. \n");
+   exit(1);
+   }
+   devname = sysfs_get_devname(, name, sizeof(name));
+   return strdup(devname);
+}
+
+char *
+get_partname_from_devname(char *devname, int partno)
+{
+   char partname[PATH_MAX];
+
+   if (isdigit(devname[strlen(devname) - 1]))
+   snprintf(partname, PATH_MAX, "%sp%d", devname, partno);
+   else
+   snprintf(partname, PATH_MAX, "%s%d", devname, partno);
+
+   return strdup(partname);
+}
+
+
+int
+main(int argc, char **argv)
+{
+   int fd;
+   char *real_path, *devname, *partname, *pstart;
+   char part_sysfs_path[PATH_MAX], part_start[30];
+   FILE *fp;
+
+   if (argc != 4) {
+   fprintf(stderr,
+   "usage: %s diskdevice partitionnr length\n",
+   argv[0]);
+   exit(1);
+   }
+   if ((fd = open(argv[1], O_RDONLY)) < 0) {
+   perror(argv[1]);
+   exit(1);
+   }
+
+   real_path

[PATCH -v2 0/8] PCI: Add 'pci_fixup_final' quirks into hot-plug paths

2012-07-09 Thread Myron Stowe

PCI's final quirks (pci_fixup_final) are currently invoked by
pci_apply_final_quirk() which traverses the platform's list of PCI
devices.  The calling mechanism, and to some point the use of the device
list, limits the quirk invocations to a single instance during boot.  As
such, hot-plugable devices do not have their associated final quirks
called upon hot-plug events.

This series implements a interim solution to integrate pci_fixup_final
quirks into the various hot-plug event paths[1].

The series basis is
git://git.kernel.org/pub/scm/linux/kernel/git/helgaas/pci.git next

  -v2: Re-worked PATCH 1/9 based on Bjorn's suggestion:
   http://marc.info/?l=linux-pci=134074984925361=2
   There is some more opportunity to clean this up even further
   if we re-work the script that gathers initcall data to accept
   'dev_dbg()' structured output.

   Replaced PATCHes 3/9-8/9 with 3/8-7/8 (I don't know what I
   was thinking with the original set).


[1] I intended to come up with a single, uniform, solution that would
satisfy both the boot path and the various hot-plug event paths with
respect to final quirks.  From an architectural perspective, the proper
placement for the final quirks is somewhere just prior to when drivers can
probe and attach which would be the device_add path: pci_bus_add_devices
or pci_bus_add device.

I originally started with that approach but eventually realized that there
are issues with moving the quirks into the device_add path with respect to
booting.  Using the 'initcall_debug' boot command instrumentation, one
can see that moving the final quirks into the device_add path would cause
the quirks to be called substantially earlier during boot.  While there
may be additional issues, two that were especially concerning were that
the final quirks would be called *before* both 'pci_subsys_init' and
'pcibios_assign_resources'.

Calling the quirks prior to resource assignment seems fraught with
potential issues so I started looking into the various hot-plug paths and
quickly noticed asymmetry with respect to PCI device setup between the
boot path and the hot-plug paths.

Currently, the boot path scans the PCI devices, adds the devices, assigns
resources, and then call the final quirks whereas the hot-plug paths scan,
assign resources, and then add the devices which is better sequencing with
respect to the assignment of resources and the addition of devices (i.e.
resource assignment occurs *before* a driver can probe and attach).

All of this suggests that we should change PCI device setup in the boot
path to be more like hot-plug: scan, assign resources, (final fixups,)
then add.  While I think that is the correct approach, and something that
we should be addressing, it will require a lot of work.  So until that
occurs, this series should serve as a stop-gap solution for the interim.

When the boot path's PCI device setup is addressed we should end up with a
single, uniform, device_add based solution for applying final quirks
after:
  o  removing 'fs_initcall_sync(pci_apply_final_quirks);',
  o  removing the global variable 'pci_fixup_final_inited' and all
 of its usages,
  o  renaming, and moving, the 'pci_cache_line_size' related code
 currently embedded in 'pci_apply_final_quirks()'.

Note: I do not have a cross-compile environment so I have only tested x86.
---

Myron Stowe (8):
  PCI: Integrate 'pci_fixup_final' quirks into hot-plug paths
  PCI: Move final fixup quirks from __init to __devinit
  x86/PCI: Move final fixup quirks from __init to __devinit
  MIPS/PCI: Move final fixup quirks from __init to __devinit
  alpha/PCI: Move final fixup quirks from __init to __devinit
  PCI: Adjust section annotations of various quirks
  PCI: release temporary reference in __nv_msi_ht_cap_quirk()
  PCI: Restructure 'pci_do_fixups()'


 arch/alpha/kernel/pci.c |2 -
 arch/mips/mti-malta/malta-pci.c |2 -
 arch/mips/pci/ops-tx4927.c  |2 -
 arch/mips/txx9/generic/pci.c|4 +
 arch/x86/kernel/quirks.c|2 -
 drivers/pci/bus.c   |4 +
 drivers/pci/quirks.c|  107 ++-
 7 files changed, 81 insertions(+), 42 deletions(-)

-- 
--
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Please read the FAQ at  http://www.tux.org/lkml/

[patch 0/2] [V4] block: Support online resize of disk partitions

2012-07-09 Thread vgoyal

Hi,

Few people have pinged me in rencent past about status of this patch, hence,
this is V4 of patch which adds support for online resizing of a partition.
This patch is based on previously posted patches by Phillip Susi. 

There are two patches. Out of which one is kernel patch and other one is
util-linux patch to add support of a user space utility "resizepart" to
allow resizing the partition.

This ioctl only resizes the partition size in kenrel and does not change
the size on disk. A user needs to make sure that corresponding changes
are made to disk data structures also using fdisk(or partx), if changes
are to be retained across reboot.

Changes since V3

- Do bdput() in error path as per the Maxim's review comments.

Changes since V2

- Do not ignore the "start" parameter in RESIZE ioctl.
- Change resizepart utility to parse sysfs to get to partition start.

Changes since V1

Following are changes since the version Phillip posted.
- RESIZE ioctl ignores the partition "start" and does not expect user to
  specify one. Caller needs to just specify "device", "partition number" and
  "size" of new partition.

- Got rid of part_nr_sects_write_begin/part_nr_sects_write_end functions
  and replaced these with single part_nr_sects_write().

- Some sequence counter related changes are simply lifted from i_size_write().

- Initialized part->nr_sects_seq using seqcount_init().

Phillip, do let me know if I should put your signed-off-by also in the
patch.

Any review feedback is welcome.

I did following test.

- Create a partition of 10MB on a disk using fdisk.
- Add this partition to a volume group
- Use fdisk to increase the partition size to 20MB. (First delete the
  partition and then create a new one of 20MB size).
- Use resizepart to extend partition size in kernel.
resizepart /dev/sdc 1 40960
- Do pvresize on partition so that physical volume can be incrased in
  size online.
pvresize /dev/sda1

pvresize does recognize the new size. Also lsblk and /proc/partitions
report the new size of partition.

Thanks
Vivek
--
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Please read the FAQ at  http://www.tux.org/lkml/

[PATCH -v2 4/8] alpha/PCI: Move final fixup quirks from init to devinit

2012-07-09 Thread Myron Stowe

The PCI subsystem's final fixups are executed once during boot, after the
pci-device is found.  As long as the system does not support hot-plug,
specifying __init is fine.

With hot-plug, either physically based hot-plug events or pseudo hot-plug
events such as "echo 1 > /sys/bus/pci/rescan", it is possible to remove a
PCI bus during run time and have it rediscovered which will require the
call of the fixups again in order for the device to function properly.

This patch prepares specific quirk(s) for use with hot-plug events.

Signed-off-by: Myron Stowe 
---

 arch/alpha/kernel/pci.c |2 +-
 1 files changed, 1 insertions(+), 1 deletions(-)

diff --git a/arch/alpha/kernel/pci.c b/arch/alpha/kernel/pci.c
index 1a62963..a4681fe 100644
--- a/arch/alpha/kernel/pci.c
+++ b/arch/alpha/kernel/pci.c
@@ -106,7 +106,7 @@ quirk_cypress(struct pci_dev *dev)
 DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_CONTAQ, PCI_DEVICE_ID_CONTAQ_82C693, 
quirk_cypress);
 
 /* Called for each device after PCI setup is done. */
-static void __init
+static void __devinit
 pcibios_fixup_final(struct pci_dev *dev)
 {
unsigned int class = dev->class >> 8;

--
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Please read the FAQ at  http://www.tux.org/lkml/

[PATCH -v2 8/8] PCI: Integrate 'pci_fixup_final' quirks into hot-plug paths

2012-07-09 Thread Myron Stowe

PCI's final quirks (pci_fixup_final) are currently invoked by
pci_apply_final_quirk() which traverses the platform's list of PCI
devices.  The calling mechanism limits the quirk invocations to a single
instance during boot.  As such, hot-plugable devices do not have their
associated final quirks called upon hot-plug events.

This series implements a interim solution[1] to integrate pci_fixup_final
quirks into the various hot-plug event paths.

As I intend for the global variable introduced by this patch to be
temporary I purposely chose not to include its 'extern' declaration within
a header file (i.e. include/linux/pci.h).


[1] I intended to come up with a single, uniform, solution that would
satisfy both the boot path and the various hot-plug event paths with
respect to final quirks.  From an architectural perspective, the proper
placement for the final quirks is somewhere just prior to, or within, the
device_add path.

I originally started with that approach but eventually realized that there
are issues with moving the quirks into the device_add path with respect to
the boot path.  Currently, the boot path scans the PCI devices, adds the
devices, assigns resources, and then call the final quirks whereas the
hot-plug paths scan, assign resources, and then add the devices which is
better sequencing with respect to the assignment of resources and the
addition of devices.

All of this suggests that we should change PCI device setup in the boot
path to be more like hot-plug: scan, assign resources, (final fixups),
then add.  While I think that is the correct approach, and something that
we should be addressing, it will require a lot of work.  So until that
occurs, this series should serve as a stop-gap solution for the interim by
keeping the current boot path sequencing in place and then adding the
final quirk processing into the device_add path for hot-plug events via a
(temporary) global variable qualifier.

When the boot path's PCI device setup is addressed we should end up with a
single, uniform, device_add based solution for applying final quirks
by:
  o  removing 'fs_initcall_sync(pci_apply_final_quirks);',
  o  removing the global variable 'pci_fixup_final_inited' and all
 of its usages,
  o  renaming, and moving, the 'pci_cache_line_size' related code
 currently embedded in 'pci_apply_final_quirks()'.

Signed-off-by: Myron Stowe 
---

 drivers/pci/bus.c|4 
 drivers/pci/quirks.c |   18 ++
 2 files changed, 22 insertions(+), 0 deletions(-)

diff --git a/drivers/pci/bus.c b/drivers/pci/bus.c
index 4ce5ef2..b511bd4 100644
--- a/drivers/pci/bus.c
+++ b/drivers/pci/bus.c
@@ -164,6 +164,10 @@ pci_bus_alloc_resource(struct pci_bus *bus, struct 
resource *res,
 int pci_bus_add_device(struct pci_dev *dev)
 {
int retval;
+   extern bool pci_fixup_final_inited;
+
+   if (pci_fixup_final_inited)
+   pci_fixup_device(pci_fixup_final, dev);
retval = device_add(>dev);
if (retval)
return retval;
diff --git a/drivers/pci/quirks.c b/drivers/pci/quirks.c
index d19e522..1850eb5 100644
--- a/drivers/pci/quirks.c
+++ b/drivers/pci/quirks.c
@@ -3054,6 +3054,22 @@ void pci_fixup_device(enum pci_fixup_pass pass, struct 
pci_dev *dev)
 }
 EXPORT_SYMBOL(pci_fixup_device);
 
+
+/*
+ * The global variable 'pci_fixup_final_inited' is being used as a interim
+ * solution for calling the final quirks only during hot-plug events (not
+ * during boot processing).
+ *
+ * When the boot path's PCI device setup sequencing is addressed, we can
+ * remove the instance, and usages of, 'pci_fixup_final_inited' along with
+ * removing 'fs_initcall_sync(pci_apply_final_quirks);' and end up with a
+ * single, uniform, solution that satisfies both the boot path and the
+ * various hot-plug event paths.
+ *
+ * ToDo: Remove 'pci_fixup_final_inited'
+ */
+bool pci_fixup_final_inited;
+
 static int __init pci_apply_final_quirks(void)
 {
struct pci_dev *dev = NULL;
@@ -3084,6 +3100,8 @@ static int __init pci_apply_final_quirks(void)
pci_cache_line_size = pci_dfl_cache_line_size;
}
}
+   pci_fixup_final_inited = 1;
+
if (!pci_cache_line_size) {
printk(KERN_DEBUG "PCI: CLS %u bytes, default %u\n",
   cls << 2, pci_dfl_cache_line_size << 2);

--
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Please read the FAQ at  http://www.tux.org/lkml/

[PATCH -v2 7/8] PCI: Move final fixup quirks from init to devinit

2012-07-09 Thread Myron Stowe

The PCI subsystem's final fixups are executed once during boot, after the
pci-device is found.  As long as the system does not support hot-plug,
specifying __init is fine.

With hot-plug, either physically based hot-plug events or pseudo hot-plug
events such as "echo 1 > /sys/bus/pci/rescan", it is possible to remove a
PCI bus during run time and have it rediscovered which will require the
call of the fixups again in order for the device to function properly.

This patch prepares specific quirk(s) for use with hot-plug events.

Signed-off-by: Myron Stowe 
---

 drivers/pci/quirks.c |8 
 1 files changed, 4 insertions(+), 4 deletions(-)

diff --git a/drivers/pci/quirks.c b/drivers/pci/quirks.c
index 8014091..d19e522 100644
--- a/drivers/pci/quirks.c
+++ b/drivers/pci/quirks.c
@@ -253,7 +253,7 @@ DECLARE_PCI_FIXUP_FINAL(PCI_VENDOR_ID_VIA,  
PCI_DEVICE_ID_VIA_82C576,   quirk_vsfx)
  * workaround applied too
  * [Info kindly provided by ALi]
  */
-static void __init quirk_alimagik(struct pci_dev *dev)
+static void __devinit quirk_alimagik(struct pci_dev *dev)
 {
if ((pci_pci_problems_ALIMAGIK)==0) {
dev_info(>dev, "Limiting direct PCI/PCI transfers\n");
@@ -789,7 +789,7 @@ static void __devinit quirk_amd_ioapic(struct pci_dev *dev)
 }
 DECLARE_PCI_FIXUP_FINAL(PCI_VENDOR_ID_AMD, PCI_DEVICE_ID_AMD_VIPER_7410,   
quirk_amd_ioapic);
 
-static void __init quirk_ioapic_rmw(struct pci_dev *dev)
+static void __devinit quirk_ioapic_rmw(struct pci_dev *dev)
 {
if (dev->devfn == 0 && dev->bus->number == 0)
sis_apic_bug = 1;
@@ -801,7 +801,7 @@ DECLARE_PCI_FIXUP_FINAL(PCI_VENDOR_ID_SI,   PCI_ANY_ID, 
quirk_ioapic_rmw);
  * Some settings of MMRBC can lead to data corruption so block changes.
  * See AMD 8131 HyperTransport PCI-X Tunnel Revision Guide
  */
-static void __init quirk_amd_8131_mmrbc(struct pci_dev *dev)
+static void __devinit quirk_amd_8131_mmrbc(struct pci_dev *dev)
 {
if (dev->subordinate && dev->revision <= 0x12) {
dev_info(>dev, "AMD8131 rev %x detected; "
@@ -2169,7 +2169,7 @@ DECLARE_PCI_FIXUP_EARLY(PCI_VENDOR_ID_PLX, 0x8624, 
quirk_tile_plx_gen1);
  * aware of it.  Instead of setting the flag on all busses in the
  * machine, simply disable MSI globally.
  */
-static void __init quirk_disable_all_msi(struct pci_dev *dev)
+static void __devinit quirk_disable_all_msi(struct pci_dev *dev)
 {
pci_no_msi();
dev_warn(>dev, "MSI quirk detected; MSI disabled\n");

--
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Please read the FAQ at  http://www.tux.org/lkml/

[PATCH -v2 6/8] x86/PCI: Move final fixup quirks from init to devinit

2012-07-09 Thread Myron Stowe

The PCI subsystem's final fixups are executed once during boot, after the
pci-device is found.  As long as the system does not support hot-plug,
specifying __init is fine.

With hot-plug, either physically based hot-plug events or pseudo hot-plug
events such as "echo 1 > /sys/bus/pci/rescan", it is possible to remove a
PCI bus during run time and have it rediscovered which will require the
call of the fixups again in order for the device to function properly.

This patch prepares specific quirk(s) for use with hot-plug events.

Signed-off-by: Myron Stowe 
---

 arch/x86/kernel/quirks.c |2 +-
 1 files changed, 1 insertions(+), 1 deletions(-)

diff --git a/arch/x86/kernel/quirks.c b/arch/x86/kernel/quirks.c
index 03920a1..1b27de5 100644
--- a/arch/x86/kernel/quirks.c
+++ b/arch/x86/kernel/quirks.c
@@ -512,7 +512,7 @@ DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_ATI, 
PCI_DEVICE_ID_ATI_SBX00_SMBUS,
 
 #if defined(CONFIG_PCI) && defined(CONFIG_NUMA)
 /* Set correct numa_node information for AMD NB functions */
-static void __init quirk_amd_nb_node(struct pci_dev *dev)
+static void __devinit quirk_amd_nb_node(struct pci_dev *dev)
 {
struct pci_dev *nb_ht;
unsigned int devfn;

--
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Please read the FAQ at  http://www.tux.org/lkml/

[PATCH -v2 5/8] MIPS/PCI: Move final fixup quirks from init to devinit

2012-07-09 Thread Myron Stowe

The PCI subsystem's final fixups are executed once during boot, after the
pci-device is found.  As long as the system does not support hot-plug,
specifying __init is fine.

With hot-plug, either physically based hot-plug events or pseudo hot-plug
events such as "echo 1 > /sys/bus/pci/rescan", it is possible to remove a
PCI bus during run time and have it rediscovered which will require the
call of the fixups again in order for the device to function properly.

This patch prepares specific quirk(s) for use with hot-plug events.

Signed-off-by: Myron Stowe 
---

 arch/mips/mti-malta/malta-pci.c |2 +-
 arch/mips/pci/ops-tx4927.c  |2 +-
 arch/mips/txx9/generic/pci.c|4 ++--
 3 files changed, 4 insertions(+), 4 deletions(-)

diff --git a/arch/mips/mti-malta/malta-pci.c b/arch/mips/mti-malta/malta-pci.c
index bf80921..b663402 100644
--- a/arch/mips/mti-malta/malta-pci.c
+++ b/arch/mips/mti-malta/malta-pci.c
@@ -253,7 +253,7 @@ void __init mips_pcibios_init(void)
 }
 
 /* Enable PCI 2.1 compatibility in PIIX4 */
-static void __init quirk_dlcsetup(struct pci_dev *dev)
+static void __devinit quirk_dlcsetup(struct pci_dev *dev)
 {
u8 odlc, ndlc;
(void) pci_read_config_byte(dev, 0x82, );
diff --git a/arch/mips/pci/ops-tx4927.c b/arch/mips/pci/ops-tx4927.c
index a1e7e6d..bc13e29 100644
--- a/arch/mips/pci/ops-tx4927.c
+++ b/arch/mips/pci/ops-tx4927.c
@@ -495,7 +495,7 @@ irqreturn_t tx4927_pcierr_interrupt(int irq, void *dev_id)
 }
 
 #ifdef CONFIG_TOSHIBA_FPCIB0
-static void __init tx4927_quirk_slc90e66_bridge(struct pci_dev *dev)
+static void __devinit tx4927_quirk_slc90e66_bridge(struct pci_dev *dev)
 {
struct tx4927_pcic_reg __iomem *pcicptr = pci_bus_to_pcicptr(dev->bus);
 
diff --git a/arch/mips/txx9/generic/pci.c b/arch/mips/txx9/generic/pci.c
index 682efb0..ce1ee50 100644
--- a/arch/mips/txx9/generic/pci.c
+++ b/arch/mips/txx9/generic/pci.c
@@ -256,7 +256,7 @@ static irqreturn_t i8259_interrupt(int irq, void *dev_id)
return IRQ_HANDLED;
 }
 
-static int __init
+static int __devinit
 txx9_i8259_irq_setup(int irq)
 {
int err;
@@ -269,7 +269,7 @@ txx9_i8259_irq_setup(int irq)
return err;
 }
 
-static void __init quirk_slc90e66_bridge(struct pci_dev *dev)
+static void __devinit quirk_slc90e66_bridge(struct pci_dev *dev)
 {
int irq;/* PCI/ISA Bridge interrupt */
u8 reg_64;

--
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Please read the FAQ at  http://www.tux.org/lkml/

[PATCH -v2 3/8] PCI: Adjust section annotations of various quirks

2012-07-09 Thread Myron Stowe

PCI's quirk types 'pci_fixup_enable', 'pci_fixup_resume',
'pci_fixup_suspend', and 'pci_fixup_resume_early' can not be __init or
__devinit; they must be in normal text because they can be called at any
time.

This patch removes the '__init' section annotation of such quirks.

Signed-off-by: Myron Stowe 
---

 drivers/pci/quirks.c |   28 ++--
 1 files changed, 14 insertions(+), 14 deletions(-)

diff --git a/drivers/pci/quirks.c b/drivers/pci/quirks.c
index 8b2d553..8014091 100644
--- a/drivers/pci/quirks.c
+++ b/drivers/pci/quirks.c
@@ -1039,7 +1039,7 @@ static void quirk_disable_pxb(struct pci_dev *pdev)
 DECLARE_PCI_FIXUP_FINAL(PCI_VENDOR_ID_INTEL,   PCI_DEVICE_ID_INTEL_82454NX,
quirk_disable_pxb);
 DECLARE_PCI_FIXUP_RESUME_EARLY(PCI_VENDOR_ID_INTEL,
PCI_DEVICE_ID_INTEL_82454NX,quirk_disable_pxb);
 
-static void __devinit quirk_amd_ide_mode(struct pci_dev *pdev)
+static void quirk_amd_ide_mode(struct pci_dev *pdev)
 {
/* set SBX00/Hudson-2 SATA in IDE mode to AHCI mode */
u8 tmp;
@@ -2104,7 +2104,7 @@ DECLARE_PCI_FIXUP_FINAL(PCI_VENDOR_ID_BROADCOM,
PCI_DEVICE_ID_NX2_5709S,
quirk_brcm_570x_limit_vpd);
 
-static void __devinit quirk_brcm_5719_limit_mrrs(struct pci_dev *dev)
+static void quirk_brcm_5719_limit_mrrs(struct pci_dev *dev)
 {
u32 rev;
 
@@ -2217,7 +2217,7 @@ DECLARE_PCI_FIXUP_FINAL(PCI_VENDOR_ID_AMD, 0x9601, 
quirk_amd_780_apc_msi);
 
 /* Go through the list of Hypertransport capabilities and
  * return 1 if a HT MSI capability is found and enabled */
-static int __devinit msi_ht_cap_enabled(struct pci_dev *dev)
+static int msi_ht_cap_enabled(struct pci_dev *dev)
 {
int pos, ttl = 48;
 
@@ -2241,7 +2241,7 @@ static int __devinit msi_ht_cap_enabled(struct pci_dev 
*dev)
 }
 
 /* Check the hypertransport MSI mapping to know whether MSI is enabled or not 
*/
-static void __devinit quirk_msi_ht_cap(struct pci_dev *dev)
+static void quirk_msi_ht_cap(struct pci_dev *dev)
 {
if (dev->subordinate && !msi_ht_cap_enabled(dev)) {
dev_warn(>dev, "MSI quirk detected; "
@@ -2255,7 +2255,7 @@ DECLARE_PCI_FIXUP_FINAL(PCI_VENDOR_ID_SERVERWORKS, 
PCI_DEVICE_ID_SERVERWORKS_HT2
 /* The nVidia CK804 chipset may have 2 HT MSI mappings.
  * MSI are supported if the MSI capability set in any of these mappings.
  */
-static void __devinit quirk_nvidia_ck804_msi_ht_cap(struct pci_dev *dev)
+static void quirk_nvidia_ck804_msi_ht_cap(struct pci_dev *dev)
 {
struct pci_dev *pdev;
 
@@ -2279,7 +2279,7 @@ DECLARE_PCI_FIXUP_FINAL(PCI_VENDOR_ID_NVIDIA, 
PCI_DEVICE_ID_NVIDIA_CK804_PCIE,
quirk_nvidia_ck804_msi_ht_cap);
 
 /* Force enable MSI mapping capability on HT bridges */
-static void __devinit ht_enable_msi_mapping(struct pci_dev *dev)
+static void ht_enable_msi_mapping(struct pci_dev *dev)
 {
int pos, ttl = 48;
 
@@ -2359,7 +2359,7 @@ DECLARE_PCI_FIXUP_EARLY(PCI_VENDOR_ID_NVIDIA,
PCI_DEVICE_ID_NVIDIA_MCP55_BRIDGE_V4,
nvbridge_check_legacy_irq_routing);
 
-static int __devinit ht_check_msi_mapping(struct pci_dev *dev)
+static int ht_check_msi_mapping(struct pci_dev *dev)
 {
int pos, ttl = 48;
int found = 0;
@@ -2387,7 +2387,7 @@ static int __devinit ht_check_msi_mapping(struct pci_dev 
*dev)
return found;
 }
 
-static int __devinit host_bridge_with_leaf(struct pci_dev *host_bridge)
+static int host_bridge_with_leaf(struct pci_dev *host_bridge)
 {
struct pci_dev *dev;
int pos;
@@ -2421,7 +2421,7 @@ static int __devinit host_bridge_with_leaf(struct pci_dev 
*host_bridge)
 #define PCI_HT_CAP_SLAVE_CTRL0 4/* link control */
 #define PCI_HT_CAP_SLAVE_CTRL1 8/* link control to */
 
-static int __devinit is_end_of_ht_chain(struct pci_dev *dev)
+static int is_end_of_ht_chain(struct pci_dev *dev)
 {
int pos, ctrl_off;
int end = 0;
@@ -2445,7 +2445,7 @@ out:
return end;
 }
 
-static void __devinit nv_ht_enable_msi_mapping(struct pci_dev *dev)
+static void nv_ht_enable_msi_mapping(struct pci_dev *dev)
 {
struct pci_dev *host_bridge;
int pos;
@@ -2484,7 +2484,7 @@ out:
pci_dev_put(host_bridge);
 }
 
-static void __devinit ht_disable_msi_mapping(struct pci_dev *dev)
+static void ht_disable_msi_mapping(struct pci_dev *dev)
 {
int pos, ttl = 48;
 
@@ -2504,7 +2504,7 @@ static void __devinit ht_disable_msi_mapping(struct 
pci_dev *dev)
}
 }
 
-static void __devinit __nv_msi_ht_cap_quirk(struct pci_dev *dev, int all)
+static void __nv_msi_ht_cap_quirk(struct pci_dev *dev, int all)
 {
struct pci_dev *host_bridge;
int pos;
@@ -2555,12 +2555,12 @@ out:
pci_dev_put(host_bridge);
 }
 
-static void __devinit nv_msi_ht_cap_quirk_all(struct pci_dev *dev)
+static void nv_msi_ht_cap_quirk_all(struct pci_dev *dev)
 {
return __nv_msi_ht_cap_quirk(dev, 1);
 }
 
-static void

[PATCH -v2 2/8] PCI: release temporary reference in __nv_msi_ht_cap_quirk()

2012-07-09 Thread Myron Stowe

__nv_msi_ht_cap_quirk() acquires a temporary reference via
'pci_get_bus_and_slot()' that is never released.

This patch releases the temporary reference.

Signed-off-by: Myron Stowe 
---

 drivers/pci/quirks.c |7 +--
 1 files changed, 5 insertions(+), 2 deletions(-)

diff --git a/drivers/pci/quirks.c b/drivers/pci/quirks.c
index 0f5ca86..8b2d553 100644
--- a/drivers/pci/quirks.c
+++ b/drivers/pci/quirks.c
@@ -2541,15 +2541,18 @@ static void __devinit __nv_msi_ht_cap_quirk(struct 
pci_dev *dev, int all)
else
nv_ht_enable_msi_mapping(dev);
}
-   return;
+   goto out;
}
 
/* HT MSI is not enabled */
if (found == 1)
-   return;
+   goto out;
 
/* Host bridge is not to HT, disable HT MSI mapping on this device */
ht_disable_msi_mapping(dev);
+
+out:
+   pci_dev_put(host_bridge);
 }
 
 static void __devinit nv_msi_ht_cap_quirk_all(struct pci_dev *dev)

--
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Please read the FAQ at  http://www.tux.org/lkml/

[PATCH -v2 1/8] PCI: Restructure 'pci_do_fixups()'

2012-07-09 Thread Myron Stowe

It's a bit ugly that we have two possible call sites for the quirk: either
inside do_one_fixup_debug() or directly in pci_do_fixups().

This patch restructures pci_do_fixups()'s quirk invocations in the style
of initcall_debug_start() and initcall_debug_report().

Signed-off-by: Myron Stowe 
---

 drivers/pci/quirks.c |   46 ++
 1 files changed, 30 insertions(+), 16 deletions(-)

diff --git a/drivers/pci/quirks.c b/drivers/pci/quirks.c
index 194b243..0f5ca86 100644
--- a/drivers/pci/quirks.c
+++ b/drivers/pci/quirks.c
@@ -2879,20 +2879,34 @@ DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_INTEL, 0x65f9, 
quirk_intel_mc_errata);
 DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_INTEL, 0x65fa, quirk_intel_mc_errata);
 
 
-static void do_one_fixup_debug(void (*fn)(struct pci_dev *dev), struct pci_dev 
*dev)
+static ktime_t fixup_debug_start(struct pci_dev *dev,
+void (*fn)(struct pci_dev *dev))
 {
-   ktime_t calltime, delta, rettime;
+   ktime_t calltime = ktime_set(0, 0);
+
+   dev_dbg(>dev, "calling %pF\n", fn);
+   if (initcall_debug) {
+   pr_debug("calling  %pF @ %i for %s\n",
+fn, task_pid_nr(current), dev_name(>dev));
+   calltime = ktime_get();
+   }
+
+   return calltime;
+}
+
+static void fixup_debug_report(struct pci_dev *dev, ktime_t calltime,
+  void (*fn)(struct pci_dev *dev))
+{
+   ktime_t delta, rettime;
unsigned long long duration;
 
-   printk(KERN_DEBUG "calling  %pF @ %i for %s\n",
-   fn, task_pid_nr(current), dev_name(>dev));
-   calltime = ktime_get();
-   fn(dev);
-   rettime = ktime_get();
-   delta = ktime_sub(rettime, calltime);
-   duration = (unsigned long long) ktime_to_ns(delta) >> 10;
-   printk(KERN_DEBUG "pci fixup %pF returned after %lld usecs for %s\n",
-   fn, duration, dev_name(>dev));
+   if (initcall_debug) {
+   rettime = ktime_get();
+   delta = ktime_sub(rettime, calltime);
+   duration = (unsigned long long) ktime_to_ns(delta) >> 10;
+   pr_debug("pci fixup %pF returned after %lld usecs for %s\n",
+fn, duration, dev_name(>dev));
+   }
 }
 
 /*
@@ -2958,6 +2972,8 @@ DECLARE_PCI_FIXUP_FINAL(PCI_VENDOR_ID_INTEL, 0x1c2d, 
asus_ehci_no_d3);
 static void pci_do_fixups(struct pci_dev *dev, struct pci_fixup *f,
  struct pci_fixup *end)
 {
+   ktime_t calltime;
+
for (; f < end; f++)
if ((f->class == (u32) (dev->class >> f->class_shift) ||
 f->class == (u32) PCI_ANY_ID) &&
@@ -2965,11 +2981,9 @@ static void pci_do_fixups(struct pci_dev *dev, struct 
pci_fixup *f,
 f->vendor == (u16) PCI_ANY_ID) &&
(f->device == dev->device ||
 f->device == (u16) PCI_ANY_ID)) {
-   dev_dbg(>dev, "calling %pF\n", f->hook);
-   if (initcall_debug)
-   do_one_fixup_debug(f->hook, dev);
-   else
-   f->hook(dev);
+   calltime = fixup_debug_start(dev, f->hook);
+   f->hook(dev);
+   fixup_debug_report(dev, calltime, f->hook);
}
 }
 

--
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Please read the FAQ at  http://www.tux.org/lkml/

[patch 1/2] block: add partition resize function to blkpg ioctl

2012-07-09 Thread vgoyal


Add a new operation code (BLKPG_RESIZE_PARTITION) to the BLKPG ioctl that
allows altering the size of an existing partition, even if it is currently
in use.

This patch converts hd_struct->nr_sects into sequence counter because
One might extend a partition while IO is happening to it and update of
nr_sects can be non-atomic on 32bit machines with 64bit sector_t. This
can lead to issues like reading inconsistent size of a partition. Sequence
counter have been used so that readers don't have to take bdev mutex lock
as we call sector_in_part() very frequently.

Now all the access to hd_struct->nr_sects should happen using sequence
counter read/update helper functions part_nr_sects_read/part_nr_sects_write.
There is one exception though, set_capacity()/get_capacity(). I think
theoritically race should exist there too but this patch does not
modify set_capacity()/get_capacity() due to sheer number of call sites
and I am afraid that change might break something. I have left that as a
TODO item. We can handle it later if need be. This patch does not introduce
any new races as such w.r.t set_capacity()/get_capacity().

Signed-off-by: Vivek Goyal 
---
 block/genhd.c |   20 +++
 block/ioctl.c |   59 ++--
 block/partition-generic.c |4 ++-
 include/linux/blkpg.h |1 +
 include/linux/genhd.h |   57 +++
 5 files changed, 132 insertions(+), 9 deletions(-)

diff --git a/block/genhd.c b/block/genhd.c
index 9cf5583..cac7366 100644
--- a/block/genhd.c
+++ b/block/genhd.c
@@ -154,7 +154,7 @@ struct hd_struct *disk_part_iter_next(struct disk_part_iter 
*piter)
part = rcu_dereference(ptbl->part[piter->idx]);
if (!part)
continue;
-   if (!part->nr_sects &&
+   if (!part_nr_sects_read(part) &&
!(piter->flags & DISK_PITER_INCL_EMPTY) &&
!(piter->flags & DISK_PITER_INCL_EMPTY_PART0 &&
  piter->idx == 0))
@@ -191,7 +191,7 @@ EXPORT_SYMBOL_GPL(disk_part_iter_exit);
 static inline int sector_in_part(struct hd_struct *part, sector_t sector)
 {
return part->start_sect <= sector &&
-   sector < part->start_sect + part->nr_sects;
+   sector < part->start_sect + part_nr_sects_read(part);
 }
 
 /**
@@ -769,8 +769,8 @@ void __init printk_all_partitions(void)
 
printk("%s%s %10llu %s %s", is_part0 ? "" : "  ",
   bdevt_str(part_devt(part), devt_buf),
-  (unsigned long long)part->nr_sects >> 1,
-  disk_name(disk, part->partno, name_buf),
+  (unsigned long long)part_nr_sects_read(part) >> 1
+  , disk_name(disk, part->partno, name_buf),
   uuid_buf);
if (is_part0) {
if (disk->driverfs_dev != NULL &&
@@ -862,7 +862,7 @@ static int show_partition(struct seq_file *seqf, void *v)
while ((part = disk_part_iter_next()))
seq_printf(seqf, "%4d  %7d %10llu %s\n",
   MAJOR(part_devt(part)), MINOR(part_devt(part)),
-  (unsigned long long)part->nr_sects >> 1,
+  (unsigned long long)part_nr_sects_read(part) >> 1,
   disk_name(sgp, part->partno, buf));
disk_part_iter_exit();
 
@@ -1268,6 +1268,16 @@ struct gendisk *alloc_disk_node(int minors, int node_id)
}
disk->part_tbl->part[0] = >part0;
 
+   /*
+* set_capacity() and get_capacity() currently don't use
+* seqcounter to read/update the part0->nr_sects. Still init
+* the counter as we can read the sectors in IO submission
+* patch using seqence counters.
+*
+* TODO: Ideally set_capacity() and get_capacity() should be
+* converted to make use of bd_mutex and sequence counters.
+*/
+   seqcount_init(>part0.nr_sects_seq);
hd_ref_init(>part0);
 
disk->minors = minors;
diff --git a/block/ioctl.c b/block/ioctl.c
index ba15b2d..4476e0e8 100644
--- a/block/ioctl.c
+++ b/block/ioctl.c
@@ -13,7 +13,7 @@ static int blkpg_ioctl(struct block_device *bdev, struct 
blkpg_ioctl_arg __user
 {
struct block_device *bdevp;
struct gendisk *disk;
-   struct hd_struct *part;
+   struct hd_struct *part, *lpart;
struct blkpg_ioctl_arg a;
struct blkpg_partition p;
struct disk_part_iter piter;
@@ -36,8 +36,8 @@ static int blkpg_ioctl(struct block_device *bdev, struct 
blkpg_ioctl_arg __user
case BLKPG_ADD_PARTITION:
start = p.start >> 9;
length

[PATCH] SH: Convert out[bwl] macros to inline functions

2012-07-09 Thread minyard

From: Corey Minyard 

The macros just called BUG(), but that results in unused variable
warnings all over the place, like in the IPMI driver.  The build
regression emails were annoying me, so here's the fix.  I have
not even compile tested this, but it's rather obvious.

Signed-off-by: Corey Minyard 
---
 arch/sh/include/asm/io_noioport.h |   17 ++---
 1 files changed, 14 insertions(+), 3 deletions(-)

diff --git a/arch/sh/include/asm/io_noioport.h 
b/arch/sh/include/asm/io_noioport.h
index e136d28..8cf7084 100644
--- a/arch/sh/include/asm/io_noioport.h
+++ b/arch/sh/include/asm/io_noioport.h
@@ -19,9 +19,20 @@ static inline u32 inl(unsigned long addr)
return -1;
 }
 
-#define outb(x, y) BUG()
-#define outw(x, y) BUG()
-#define outl(x, y) BUG()
+static inline void outb(unsigned char x, unsigned int port)
+{
+   BUG();
+}
+
+static inline void outw(unsigned short x, unsigned int port)
+{
+   BUG();
+}
+
+static inline void outl(unsigned int x, unsigned int port)
+{
+   BUG();
+}
 
 #define inb_p(addr)inb(addr)
 #define inw_p(addr)inw(addr)
-- 
1.7.4.1

--
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Please read the FAQ at  http://www.tux.org/lkml/

Re: [PATCH 2/2] fat (exportfs): reconnect file handles to evicted inodes/dentries

2012-07-09 Thread OGAWA Hirofumi

"Steven J. Magnani"  writes:

>> Ah, i_ino. I was talking about i_pos. Well, so, what happens if the
>> child was renamed to other parent on NFS server machine (not via nfs
>> client)? The file handle would be including the old i_ino, and the old
>> i_ino on file handle is still vaild as old parent. So, it returns the
>> wrong parent?
>
> Yes, but I believe exportfs_decode_fh() handles that case:
>
>   /*
>* Now that we've got both a well-connected parent and a
>* dentry for the inode we're after, make sure that our
>* inode is actually connected to the parent.
>*/
>
>
> Really, the FAT NFS code will pretty much parallel that of ext2.

Hm, not really, if the file handle is including parent ino. ext2 will
get the latest parent ino, because it checks parent of inode of file
handle.

But if the file handle is including parent ino and we believe it is
parent, I think NFS server can be return the old parent. The difference
is the result of ->get_parent().
-- 
OGAWA Hirofumi 
--
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Please read the FAQ at  http://www.tux.org/lkml/

Re: Build regressions/improvements in v3.5-rc6

2012-07-09 Thread Geert Uytterhoeven

Hi Bjorn (or Björn?),

On Mon, Jul 9, 2012 at 11:20 PM, Bjorn Helgaas  wrote:
> There are some PCI-related regressions here that I'd like to fix, but
> I don't know where to start.  For example, these:
>
>>   + drivers/pci/quirks.c: error: implicit declaration of function 'ioread32' 
>> [-Werror=implicit-function-declaration]:  => 3154:2
>>   + drivers/pci/quirks.c: error: implicit declaration of function 
>> 'iowrite32' [-Werror=implicit-function-declaration]:  => 3144:2
>
> I don't see these in an x86 build, so I assume these are from some
> other arch or other config.  Can I tell which one?

xtensa-allmodconfig

http://kisskb.ellerman.id.au/kisskb/buildresult/6668885/

Gr{oetje,eeting}s,

Geert

--
Geert Uytterhoeven -- There's lots of Linux beyond ia32 -- ge...@linux-m68k.org

In personal conversations with technical people, I call myself a hacker. But
when I'm talking to journalists I just say "programmer" or something like that.
-- Linus Torvalds
--
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Please read the FAQ at  http://www.tux.org/lkml/

Re: pull request: wireless 2012-07-09

2012-07-09 Thread David Miller

From: "John W. Linville" 
Date: Mon, 9 Jul 2012 15:28:18 -0400

> Please accept these fixes for the 3.5 stream...

Pulled, thanks John.
--
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Please read the FAQ at  http://www.tux.org/lkml/

Re: [PATCH 3/3] sparc/PCI: replace pci_cfg_fake_ranges() with pci_read_bridge_bases()

2012-07-09 Thread David Miller

From: Bjorn Helgaas 
Date: Mon, 09 Jul 2012 14:32:14 -0600

> The generic code to read P2P bridge windows is functionally equivalent
> to the sparc-specific pci_cfg_fake_ranges(), so use the generic code.
> 
> The "if (!res->start) res->start = ..." removed from the I/O window code
> here was an artifact of the Intel 1K window support from 9d265124d051 and
> is no longer necessary (it probably was just cloned from x86 and was never
> useful on sparc).
> 
> CC: "David S. Miller" 
> CC: sparcli...@vger.kernel.org
> Signed-off-by: Bjorn Helgaas 

Acked-by: David S. Miller 
--
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Please read the FAQ at  http://www.tux.org/lkml/

[PATCH] CRIS: Fix I/O macros

2012-07-09 Thread minyard

From: Corey Minyard 

The inb/outb macros for CRIS are broken from a number of points of
view, missing () around parameters and they have an unprotected if
statement in them.  This was breaking the compile of IPMI on CRIS
and thus I was being annoyed by build regressions, so I fixed them.

Plus I don't think they would have worked at all, since the data
values were missing "&" and the outsl had a "3" instead of a "4"
for the size.  From what I can tell, this stuff is not used at all,
so this can't be any more broken than it was before, anyway.

Signed-off-by: Corey Minyard 
---
 arch/cris/include/asm/io.h |   39 +--
 1 files changed, 33 insertions(+), 6 deletions(-)

diff --git a/arch/cris/include/asm/io.h b/arch/cris/include/asm/io.h
index 32567bc..ac12ae2 100644
--- a/arch/cris/include/asm/io.h
+++ b/arch/cris/include/asm/io.h
@@ -133,12 +133,39 @@ static inline void writel(unsigned int b, volatile void 
__iomem *addr)
 #define insb(port,addr,count) (cris_iops ? 
cris_iops->read_io(port,addr,1,count) : 0)
 #define insw(port,addr,count) (cris_iops ? 
cris_iops->read_io(port,addr,2,count) : 0)
 #define insl(port,addr,count) (cris_iops ? 
cris_iops->read_io(port,addr,4,count) : 0)
-#define outb(data,port) if (cris_iops) 
cris_iops->write_io(port,(void*)(unsigned)data,1,1)
-#define outw(data,port) if (cris_iops) 
cris_iops->write_io(port,(void*)(unsigned)data,2,1)
-#define outl(data,port) if (cris_iops) 
cris_iops->write_io(port,(void*)(unsigned)data,4,1)
-#define outsb(port,addr,count) if(cris_iops) 
cris_iops->write_io(port,(void*)addr,1,count)
-#define outsw(port,addr,count) if(cris_iops) 
cris_iops->write_io(port,(void*)addr,2,count)
-#define outsl(port,addr,count) if(cris_iops) 
cris_iops->write_io(port,(void*)addr,3,count)
+static inline void outb(unsigned char data, unsigned int port)
+{
+   if (cris_iops)
+   cris_iops->write_io(port, (void *) , 1, 1);
+}
+static inline void outw(unsigned short data, unsigned int port)
+{
+   if (cris_iops)
+   cris_iops->write_io(port, (void *) , 2, 1);
+}
+static inline void outl(unsigned int data, unsigned int port)
+{
+   if (cris_iops)
+   cris_iops->write_io(port, (void *) , 4, 1);
+}
+static inline void outsb(unsigned int port, const void *addr,
+unsigned long count)
+{
+   if (cris_iops)
+   cris_iops->write_io(port, (void *)addr, 1, count);
+}
+static inline void outsw(unsigned int port, const void *addr,
+unsigned long count)
+{
+   if (cris_iops)
+   cris_iops->write_io(port, (void *)addr, 2, count);
+}
+static inline void outsl(unsigned int port, const void *addr,
+unsigned long count)
+{
+   if (cris_iops)
+   cris_iops->write_io(port, (void *)addr, 4, count);
+}
 
 /*
  * Convert a physical pointer to a virtual kernel pointer for /dev/mem
-- 
1.7.4.1

--
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Please read the FAQ at  http://www.tux.org/lkml/

Re: [PATCH 7/9] Input: ab8500-ponkey: Create AB8500 domain IRQ mapping

2012-07-09 Thread Lee Jones


On 09/07/12 23:03, Linus Walleij wrote:

On Mon, Jul 9, 2012 at 10:59 PM, Lee Jones  wrote:


Before we can use any domain allocated IRQ, we need to first create a
map between the Hardware IRQ (hwirq) and the Linux Virtual IRQ (virq).
We do this with a helper function provided by the AB8500 IRQ domain
controller called ab8500_irq_get_virq(). We need to do this for both
IRQs which the Power-On-Key driver uses; one for button press, the other
for button depress.

Signed-off-by: Lee Jones 


Now since I was fooled by the last patch, thinking you had tested it before
submitting (obviously not) - please send a test log of some
cat /dev/input/event* for this one before I ACK it this time.


I did test it, but must have only looked at the one IRQ incrementing.

I can provide a log tomorrow if it makes you feel better. :)

--
Lee Jones
Linaro ST-Ericsson Landing Team Lead
M: +44 77 88 633 515
Linaro.org │ Open source software for ARM SoCs
Follow Linaro: Facebook | Twitter | Blog


--
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Please read the FAQ at  http://www.tux.org/lkml/

Re: Build regressions/improvements in v3.5-rc6

2012-07-09 Thread Bjorn Helgaas

Hi Geert,

There are some PCI-related regressions here that I'd like to fix, but
I don't know where to start.  For example, these:

>   + drivers/pci/quirks.c: error: implicit declaration of function 'ioread32' 
> [-Werror=implicit-function-declaration]:  => 3154:2
>   + drivers/pci/quirks.c: error: implicit declaration of function 'iowrite32' 
> [-Werror=implicit-function-declaration]:  => 3144:2

I don't see these in an x86 build, so I assume these are from some
other arch or other config.  Can I tell which one?
--
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Please read the FAQ at  http://www.tux.org/lkml/

< 1 2 3 4 5 6 7 8 9 10 >

101 - 200 of 1424 matches

Mail list logo