[REPOST PATCH 2/4] slab: introduce helper functions to get/set free object

2013-09-05 Thread Joonsoo Kim
In the following patches, to get/set free objects from the freelist
is changed so that simple casting doesn't work for it. Therefore,
introduce helper functions.

Signed-off-by: Joonsoo Kim 

diff --git a/mm/slab.c b/mm/slab.c
index 9d4bad5..a0e49bb 100644
--- a/mm/slab.c
+++ b/mm/slab.c
@@ -2545,9 +2545,15 @@ static struct freelist *alloc_slabmgmt(struct kmem_cache 
*cachep,
return freelist;
 }
 
-static inline unsigned int *slab_freelist(struct page *page)
+static inline unsigned int get_free_obj(struct page *page, unsigned int idx)
 {
-   return (unsigned int *)(page->freelist);
+   return ((unsigned int *)page->freelist)[idx];
+}
+
+static inline void set_free_obj(struct page *page,
+   unsigned int idx, unsigned int val)
+{
+   ((unsigned int *)(page->freelist))[idx] = val;
 }
 
 static void cache_init_objs(struct kmem_cache *cachep,
@@ -2592,7 +2598,7 @@ static void cache_init_objs(struct kmem_cache *cachep,
if (cachep->ctor)
cachep->ctor(objp);
 #endif
-   slab_freelist(page)[i] = i;
+   set_free_obj(page, i, i);
}
 }
 
@@ -2611,7 +2617,7 @@ static void *slab_get_obj(struct kmem_cache *cachep, 
struct page *page,
 {
void *objp;
 
-   objp = index_to_obj(cachep, page, slab_freelist(page)[page->active]);
+   objp = index_to_obj(cachep, page, get_free_obj(page, page->active));
page->active++;
 #if DEBUG
WARN_ON(page_to_nid(virt_to_page(objp)) != nodeid);
@@ -2632,7 +2638,7 @@ static void slab_put_obj(struct kmem_cache *cachep, 
struct page *page,
 
/* Verify double free bug */
for (i = page->active; i < cachep->num; i++) {
-   if (slab_freelist(page)[i] == objnr) {
+   if (get_free_obj(page, i) == objnr) {
printk(KERN_ERR "slab: double free detected in cache "
"'%s', objp %p\n", cachep->name, objp);
BUG();
@@ -2640,7 +2646,7 @@ static void slab_put_obj(struct kmem_cache *cachep, 
struct page *page,
}
 #endif
page->active--;
-   slab_freelist(page)[page->active] = objnr;
+   set_free_obj(page, page->active, objnr);
 }
 
 /*
@@ -4214,7 +4220,7 @@ static void handle_slab(unsigned long *n, struct 
kmem_cache *c,
 
for (j = page->active; j < c->num; j++) {
/* Skip freed item */
-   if (slab_freelist(page)[j] == i) {
+   if (get_free_obj(page, j) == i) {
active = false;
break;
}
-- 
1.7.9.5

--
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Please read the FAQ at  http://www.tux.org/lkml/


[REPOST PATCH 1/4] slab: factor out calculate nr objects in cache_estimate

2013-09-05 Thread Joonsoo Kim
This logic is not simple to understand so that making separate function
helping readability. Additionally, we can use this change in the
following patch which implement for freelist to have another sized index
in according to nr objects.

Signed-off-by: Joonsoo Kim 

diff --git a/mm/slab.c b/mm/slab.c
index f3868fe..9d4bad5 100644
--- a/mm/slab.c
+++ b/mm/slab.c
@@ -565,9 +565,31 @@ static inline struct array_cache *cpu_cache_get(struct 
kmem_cache *cachep)
return cachep->array[smp_processor_id()];
 }
 
-static size_t slab_mgmt_size(size_t nr_objs, size_t align)
+static int calculate_nr_objs(size_t slab_size, size_t buffer_size,
+   size_t idx_size, size_t align)
 {
-   return ALIGN(nr_objs * sizeof(unsigned int), align);
+   int nr_objs;
+   size_t freelist_size;
+
+   /*
+* Ignore padding for the initial guess. The padding
+* is at most @align-1 bytes, and @buffer_size is at
+* least @align. In the worst case, this result will
+* be one greater than the number of objects that fit
+* into the memory allocation when taking the padding
+* into account.
+*/
+   nr_objs = slab_size / (buffer_size + idx_size);
+
+   /*
+* This calculated number will be either the right
+* amount, or one greater than what we want.
+*/
+   freelist_size = slab_size - nr_objs * buffer_size;
+   if (freelist_size < ALIGN(nr_objs * idx_size, align))
+   nr_objs--;
+
+   return nr_objs;
 }
 
 /*
@@ -600,28 +622,12 @@ static void cache_estimate(unsigned long gfporder, size_t 
buffer_size,
nr_objs = slab_size / buffer_size;
 
} else {
-   /*
-* Ignore padding for the initial guess. The padding
-* is at most @align-1 bytes, and @buffer_size is at
-* least @align. In the worst case, this result will
-* be one greater than the number of objects that fit
-* into the memory allocation when taking the padding
-* into account.
-*/
-   nr_objs = (slab_size) / (buffer_size + sizeof(unsigned int));
-
-   /*
-* This calculated number will be either the right
-* amount, or one greater than what we want.
-*/
-   if (slab_mgmt_size(nr_objs, align) + nr_objs*buffer_size
-  > slab_size)
-   nr_objs--;
-
-   mgmt_size = slab_mgmt_size(nr_objs, align);
+   nr_objs = calculate_nr_objs(slab_size, buffer_size,
+   sizeof(unsigned int), align);
+   mgmt_size = ALIGN(nr_objs * sizeof(unsigned int), align);
}
*num = nr_objs;
-   *left_over = slab_size - nr_objs*buffer_size - mgmt_size;
+   *left_over = slab_size - (nr_objs * buffer_size) - mgmt_size;
 }
 
 #if DEBUG
-- 
1.7.9.5

--
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Please read the FAQ at  http://www.tux.org/lkml/


Re: [PATCH 0/4] slab: implement byte sized indexes for the freelist of a slab

2013-09-05 Thread Joonsoo Kim
On Thu, Sep 05, 2013 at 02:33:56PM +, Christoph Lameter wrote:
> On Thu, 5 Sep 2013, Joonsoo Kim wrote:
> 
> > I think that all patchsets deserve to be merged, since it reduces memory 
> > usage and
> > also improves performance. :)
> 
> Could you clean things up etc and the repost the patchset? This time do
> *not* do this as a response to an earlier email but start the patchset
> with new thread id. I think some people are not seeing this patchset.

Okay. I just did that.

Thanks.
--
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Please read the FAQ at  http://www.tux.org/lkml/


[REPOST PATCH 3/4] slab: introduce byte sized index for the freelist of a slab

2013-09-05 Thread Joonsoo Kim
Currently, the freelist of a slab consist of unsigned int sized indexes.
Most of slabs have less number of objects than 256, since restriction
for page order is at most 1 in default configuration. For example,
consider a slab consisting of 32 byte sized objects on two continous
pages. In this case, 256 objects is possible and these number fit to byte
sized indexes. 256 objects is maximum possible value in default
configuration, since 32 byte is minimum object size in the SLAB.
(8192 / 32 = 256). Therefore, if we use byte sized index, we can save
3 bytes for each object.

This introduce one likely branch to functions used for setting/getting
objects to/from the freelist, but we may get more benefits from
this change.

Signed-off-by: Joonsoo Kim 

diff --git a/mm/slab.c b/mm/slab.c
index a0e49bb..bd366e5 100644
--- a/mm/slab.c
+++ b/mm/slab.c
@@ -565,8 +565,16 @@ static inline struct array_cache *cpu_cache_get(struct 
kmem_cache *cachep)
return cachep->array[smp_processor_id()];
 }
 
-static int calculate_nr_objs(size_t slab_size, size_t buffer_size,
-   size_t idx_size, size_t align)
+static inline bool can_byte_index(int nr_objs)
+{
+   if (likely(nr_objs <= (sizeof(unsigned char) << 8)))
+   return true;
+
+   return false;
+}
+
+static int __calculate_nr_objs(size_t slab_size, size_t buffer_size,
+   unsigned int idx_size, size_t align)
 {
int nr_objs;
size_t freelist_size;
@@ -592,6 +600,29 @@ static int calculate_nr_objs(size_t slab_size, size_t 
buffer_size,
return nr_objs;
 }
 
+static int calculate_nr_objs(size_t slab_size, size_t buffer_size,
+   size_t align)
+{
+   int nr_objs;
+   int byte_nr_objs;
+
+   nr_objs = __calculate_nr_objs(slab_size, buffer_size,
+   sizeof(unsigned int), align);
+   if (!can_byte_index(nr_objs))
+   return nr_objs;
+
+   byte_nr_objs = __calculate_nr_objs(slab_size, buffer_size,
+   sizeof(unsigned char), align);
+   /*
+* nr_objs can be larger when using byte index,
+* so that it cannot be indexed by byte index.
+*/
+   if (can_byte_index(byte_nr_objs))
+   return byte_nr_objs;
+   else
+   return nr_objs;
+}
+
 /*
  * Calculate the number of objects and left-over bytes for a given buffer size.
  */
@@ -618,13 +649,18 @@ static void cache_estimate(unsigned long gfporder, size_t 
buffer_size,
 * correct alignment when allocated.
 */
if (flags & CFLGS_OFF_SLAB) {
-   mgmt_size = 0;
nr_objs = slab_size / buffer_size;
+   mgmt_size = 0;
 
} else {
-   nr_objs = calculate_nr_objs(slab_size, buffer_size,
-   sizeof(unsigned int), align);
-   mgmt_size = ALIGN(nr_objs * sizeof(unsigned int), align);
+   nr_objs = calculate_nr_objs(slab_size, buffer_size, align);
+   if (can_byte_index(nr_objs)) {
+   mgmt_size =
+   ALIGN(nr_objs * sizeof(unsigned char), align);
+   } else {
+   mgmt_size =
+   ALIGN(nr_objs * sizeof(unsigned int), align);
+   }
}
*num = nr_objs;
*left_over = slab_size - (nr_objs * buffer_size) - mgmt_size;
@@ -2012,7 +2048,10 @@ static size_t calculate_slab_order(struct kmem_cache 
*cachep,
 * looping condition in cache_grow().
 */
offslab_limit = size;
-   offslab_limit /= sizeof(unsigned int);
+   if (can_byte_index(num))
+   offslab_limit /= sizeof(unsigned char);
+   else
+   offslab_limit /= sizeof(unsigned int);
 
if (num > offslab_limit)
break;
@@ -2253,8 +2292,13 @@ __kmem_cache_create (struct kmem_cache *cachep, unsigned 
long flags)
if (!cachep->num)
return -E2BIG;
 
-   freelist_size =
-   ALIGN(cachep->num * sizeof(unsigned int), cachep->align);
+   if (can_byte_index(cachep->num)) {
+   freelist_size = ALIGN(cachep->num * sizeof(unsigned char),
+   cachep->align);
+   } else {
+   freelist_size = ALIGN(cachep->num * sizeof(unsigned int),
+   cachep->align);
+   }
 
/*
 * If the slab has been placed off-slab, and we have enough space then
@@ -2267,7 +2311,10 @@ __kmem_cache_create (struct kmem_cache *cachep, unsigned 
long flags)
 
if (flags & CFLGS_OFF_SLAB) {
   

[REPOST PATCH 4/4] slab: make more slab management structure off the slab

2013-09-05 Thread Joonsoo Kim
Now, the size of the freelist for the slab management diminish,
so that the on-slab management structure can waste large space
if the object of the slab is large.

Consider a 128 byte sized slab. If on-slab is used, 31 objects can be
in the slab. The size of the freelist for this case would be 31 bytes
so that 97 bytes, that is, more than 75% of object size, are wasted.

In a 64 byte sized slab case, no space is wasted if we use on-slab.
So set off-slab determining constraint to 128 bytes.

Signed-off-by: Joonsoo Kim 

diff --git a/mm/slab.c b/mm/slab.c
index bd366e5..d01a2f0 100644
--- a/mm/slab.c
+++ b/mm/slab.c
@@ -2277,7 +2277,7 @@ __kmem_cache_create (struct kmem_cache *cachep, unsigned 
long flags)
 * it too early on. Always use on-slab management when
 * SLAB_NOLEAKTRACE to avoid recursive calls into kmemleak)
 */
-   if ((size >= (PAGE_SIZE >> 3)) && !slab_early_init &&
+   if ((size >= (PAGE_SIZE >> 5)) && !slab_early_init &&
!(flags & SLAB_NOLEAKTRACE))
/*
 * Size is large, assume best to place the slab management obj
-- 
1.7.9.5

--
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Please read the FAQ at  http://www.tux.org/lkml/


[REPOST PATCH 0/4] slab: implement byte sized indexes for the freelist of a slab

2013-09-05 Thread Joonsoo Kim
* THIS IS JUST REPOSTED ACCORDING TO MAINTAINER'S REQUEST *

* Changes from original post
Correct the position of the results.
Attach more results about cache-misses and elapsed time on a hackbench test.

-
This patchset implements byte sized indexes for the freelist of a slab.

Currently, the freelist of a slab consist of unsigned int sized indexes.
Most of slabs have less number of objects than 256, so much space is wasted.
To reduce this overhead, this patchset implements byte sized indexes for
the freelist of a slab. With it, we can save 3 bytes for each objects.

This introduce one likely branch to functions used for setting/getting
objects to/from the freelist, but we may get more benefits from
this change.

Below is some numbers of 'cat /proc/slabinfo' related to my previous posting
and this patchset.


* Before *
# name
 : tunables [snip...]
kmalloc-512  52760051281 : tunables   54   270 : 
slabdata 75 75  0   
kmalloc-256  210210256   151 : tunables  120   600 : 
slabdata 14 14  0   
kmalloc-192 1040   1040192   201 : tunables  120   600 : 
slabdata 52 52  0   
kmalloc-96   750750128   301 : tunables  120   600 : 
slabdata 25 25  0   
kmalloc-64  2773   2773 64   591 : tunables  120   600 : 
slabdata 47 47  0   
kmalloc-128  660690128   301 : tunables  120   600 : 
slabdata 23 23  0   
kmalloc-32 11200  11200 32  1121 : tunables  120   600 : 
slabdata100100  0   
kmem_cache   197200192   201 : tunables  120   600 : 
slabdata 10 10  0   

* After my previous posting(overload struct slab over struct page) *
# name
 : tunables [snip...]
kmalloc-512  52564051281 : tunables   54   270 : 
slabdata 80 80  0   
kmalloc-256  210210256   151 : tunables  120   600 : 
slabdata 14 14  0   
kmalloc-192 1016   1040192   201 : tunables  120   600 : 
slabdata 52 52  0   
kmalloc-96   560620128   311 : tunables  120   600 : 
slabdata 20 20  0   
kmalloc-64  2148   2280 64   601 : tunables  120   600 : 
slabdata 38 38  0   
kmalloc-128  647682128   311 : tunables  120   600 : 
slabdata 22 22  0   
kmalloc-32 11360  11413 32  1131 : tunables  120   600 : 
slabdata101101  0   
kmem_cache   197200192   201 : tunables  120   600 : 
slabdata 10 10  0   

kmem_caches consisting of objects less than or equal to 128 byte have one more
objects in a slab. You can see it at objperslab.

We can improve further with this patchset.

* My previous posting + this patchset *
# name
 : tunables [snip...]
kmalloc-512  52164851281 : tunables   54   270 : 
slabdata 81 81  0
kmalloc-256  208208256   161 : tunables  120   600 : 
slabdata 13 13  0
kmalloc-192 1029   1029192   211 : tunables  120   600 : 
slabdata 49 49  0
kmalloc-96   529589128   311 : tunables  120   600 : 
slabdata 19 19  0
kmalloc-64  2142   2142 64   631 : tunables  120   600 : 
slabdata 34 34  0
kmalloc-128  660682128   311 : tunables  120   600 : 
slabdata 22 22  0
kmalloc-32 11716  11780 32  1241 : tunables  120   600 : 
slabdata 95 95  0
kmem_cache   197210192   211 : tunables  120   600 : 
slabdata 10 10  0

kmem_caches consisting of objects less than or equal to 256 byte have
one or more objects than before. In the case of kmalloc-32, we have 11 more
objects, so 352 bytes (11 * 32) are saved and this is roughly 9% saving of
memory. Of couse, this percentage decreases as the number of objects
in a slab decreases.



Here are the performance results on my 4 cpus machine.

* Before *

 Performance counter stats for 'perf bench sched messaging -g 50 -l 1000' (10 
runs):

   238,309,671 cache-misses 
 ( +-  0.40% )

  12.010172090 seconds time elapsed 
 ( +-  0.21% )

* After my previous posting *

 Performance counter stats for 'perf bench sched messaging -g 50 -l 1000' (10 
runs):

   229,945,138 cache-misses 
 ( +-  0.23% )

  11.627897174 seconds time elapsed 
 ( +-  0.14% )

* My previous posting + this patchset *

 Performance counter stats for 'perf 

Re: [PATCH V4] regulator: palmas: add support for external control of rails

2013-09-05 Thread Laxman Dewangan

On Thursday 05 September 2013 09:04 PM, Mark Brown wrote:

* PGP Signed by an unknown key

On Thu, Sep 05, 2013 at 08:27:24PM +0530, Laxman Dewangan wrote:

On Thursday 05 September 2013 08:04 PM, Lee Jones wrote:

It won't go in until v3.12 now, but I have applied the patch.

Thanks Lee for taking care.

If it's going to wait for v3.12 there's no point applying it to MFD as
the dependency will be in mainline after the merge window.
Agree that it should go on regulator tree if it is v3.12 and if there is 
any issue on applying the patch, I will resend at that time after 
rebasing to that branch.


Thanks,
Laxman

--
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Please read the FAQ at  http://www.tux.org/lkml/


[PATCH] perf tools: Free strlist in strlist__delete()

2013-09-05 Thread Namhyung Kim
From: Namhyung Kim 

It seems strlist never deleted after allocated.  AFAICS every strlist
is allocated dynamically, just free it in the _delete() function.

Signed-off-by: Namhyung Kim 
---
 tools/perf/util/strlist.c | 4 +++-
 1 file changed, 3 insertions(+), 1 deletion(-)

diff --git a/tools/perf/util/strlist.c b/tools/perf/util/strlist.c
index eabdce0a2daa..11593d899eb2 100644
--- a/tools/perf/util/strlist.c
+++ b/tools/perf/util/strlist.c
@@ -155,8 +155,10 @@ out_error:
 
 void strlist__delete(struct strlist *slist)
 {
-   if (slist != NULL)
+   if (slist != NULL) {
rblist__delete(>rblist);
+   free(slist);
+   }
 }
 
 struct str_node *strlist__entry(const struct strlist *slist, unsigned int idx)
-- 
1.7.11.7

--
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Please read the FAQ at  http://www.tux.org/lkml/


Re: [RFC PATCH v3 09/35] mm: Track the freepage migratetype of pages accurately

2013-09-05 Thread Srivatsa S. Bhat
On 09/04/2013 01:53 PM, Yasuaki Ishimatsu wrote:
> (2013/09/03 17:45), Srivatsa S. Bhat wrote:
>> On 09/03/2013 12:08 PM, Yasuaki Ishimatsu wrote:
>>> (2013/08/30 22:16), Srivatsa S. Bhat wrote:
 Due to the region-wise ordering of the pages in the buddy allocator's
 free lists, whenever we want to delete a free pageblock from a free
 list
 (for ex: when moving blocks of pages from one list to the other), we
 need
 to be able to tell the buddy allocator exactly which migratetype it
 belongs
 to. For that purpose, we can use the page's freepage migratetype
 (which is
 maintained in the page's ->index field).

 So, while splitting up higher order pages into smaller ones as part of
 buddy
 operations, keep the new head pages updated with the correct freepage
 migratetype information (because we depend on tracking this info
 accurately,
 as outlined above).

 Signed-off-by: Srivatsa S. Bhat 
 ---

mm/page_alloc.c |7 +++
1 file changed, 7 insertions(+)

 diff --git a/mm/page_alloc.c b/mm/page_alloc.c
 index 398b62c..b4b1275 100644
 --- a/mm/page_alloc.c
 +++ b/mm/page_alloc.c
 @@ -947,6 +947,13 @@ static inline void expand(struct zone *zone,
 struct page *page,
add_to_freelist([size], >free_list[migratetype]);
area->nr_free++;
set_page_order([size], high);
 +
 +/*
 + * Freepage migratetype is tracked using the index field of
 the
 + * first page of the block. So we need to update the new first
 + * page, when changing the page order.
 + */
 +set_freepage_migratetype([size], migratetype);
}
}


>>>
>>> It this patch a bug fix patch?
>>> If so, I want you to split the patch from the patch-set.
>>>
>>
>> No, its not a bug-fix. We need to take care of this only when using the
>> sorted-buddy design to maintain the freelists, which is introduced
>> only in
>> this patchset. So mainline doesn't need this patch.
>>
>> In mainline, we can delete a page from a buddy freelist by simply calling
>> list_del() by passing a pointer to page->lru. It doesn't matter which
>> freelist
>> the page was belonging to. However, in the sorted-buddy design introduced
>> in this patchset, we also need to know which particular freelist we are
>> deleting that page from, because apart from breaking the ->lru link from
>> the linked-list, we also need to update certain other things such as the
>> region->page_block pointer etc, which are part of that particular
>> freelist.
>> Thus, it becomes essential to know which freelist we are deleting the
>> page
>> from. And for that, we need this patch to maintain that information
>> accurately
>> even during buddy operations such as splitting buddy pages in expand().
> 
> I may be wrong because I do not know this part clearly.
> 
> Original code is here:
> 
> ---
> static inline void expand(struct zone *zone, struct page *page,
> int low, int high, struct free_area *area,
> int migratetype)
> {
> ...
> list_add([size].lru, >free_list[migratetype]);
> area->nr_free++;
> set_page_order([size], high);
> ---
> 
> It seems that migratietype of page[size] page is changed. So even if not
> applying your patch, I think migratetype of the page should be changed.
> 

Hmm, thinking about this a bit more, I agree with you. Although its not a
bug-fix for mainline, it is certainly good to have, since it makes things
more consistent by tracking the freepage migratetype properly for pages
split during buddy expansion. I'll separate this patch from the series and
post it as a stand-alone patch. Thank you!

Regards,
Srivatsa S. Bhat

--
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Please read the FAQ at  http://www.tux.org/lkml/


[PATCH v3 06/20] mm, hugetlb: return a reserved page to a reserved pool if failed

2013-09-05 Thread Joonsoo Kim
If we fail with a reserved page, just calling put_page() is not sufficient,
because put_page() invoke free_huge_page() at last step and it doesn't
know whether a page comes from a reserved pool or not. So it doesn't do
anything related to reserved count. This makes reserve count lower
than how we need, because reserve count already decrease in
dequeue_huge_page_vma(). This patch fix this situation.

In this patch, PagePrivate() is used for tracking reservation.
When resereved pages are dequeued from reserved pool, Private flag is
assigned to the hugepage until properly mapped. On page returning process,
if there is a hugepage with Private flag, it is considered as the one
returned in certain error path, so that we should restore one
reserve count back in order to preserve certain user's reserved hugepage.

Using Private flag is safe for the hugepage, because it doesn't use the
LRU mechanism so that there is no other user of this page except us.
Therefore we can use this flag safely.

Signed-off-by: Joonsoo Kim 
---
Replenishing commit message only.

diff --git a/mm/hugetlb.c b/mm/hugetlb.c
index 6c8eec2..3f834f1 100644
--- a/mm/hugetlb.c
+++ b/mm/hugetlb.c
@@ -572,6 +572,7 @@ retry_cpuset:
if (!vma_has_reserves(vma, chg))
break;
 
+   SetPagePrivate(page);
h->resv_huge_pages--;
break;
}
@@ -626,15 +627,20 @@ static void free_huge_page(struct page *page)
int nid = page_to_nid(page);
struct hugepage_subpool *spool =
(struct hugepage_subpool *)page_private(page);
+   bool restore_reserve;
 
set_page_private(page, 0);
page->mapping = NULL;
BUG_ON(page_count(page));
BUG_ON(page_mapcount(page));
+   restore_reserve = PagePrivate(page);
 
spin_lock(_lock);
hugetlb_cgroup_uncharge_page(hstate_index(h),
 pages_per_huge_page(h), page);
+   if (restore_reserve)
+   h->resv_huge_pages++;
+
if (h->surplus_huge_pages_node[nid] && huge_page_order(h) < MAX_ORDER) {
/* remove the page from active list */
list_del(>lru);
@@ -2616,6 +2622,8 @@ retry_avoidcopy:
spin_lock(>page_table_lock);
ptep = huge_pte_offset(mm, address & huge_page_mask(h));
if (likely(pte_same(huge_ptep_get(ptep), pte))) {
+   ClearPagePrivate(new_page);
+
/* Break COW */
huge_ptep_clear_flush(vma, address, ptep);
set_huge_pte_at(mm, address, ptep,
@@ -2727,6 +2735,7 @@ retry:
goto retry;
goto out;
}
+   ClearPagePrivate(page);
 
spin_lock(>i_lock);
inode->i_blocks += blocks_per_huge_page(h);
@@ -2773,8 +2782,10 @@ retry:
if (!huge_pte_none(huge_ptep_get(ptep)))
goto backout;
 
-   if (anon_rmap)
+   if (anon_rmap) {
+   ClearPagePrivate(page);
hugepage_add_new_anon_rmap(page, vma, address);
+   }
else
page_dup_rmap(page);
new_pte = make_huge_pte(vma, page, ((vma->vm_flags & VM_WRITE)
-- 
1.7.9.5

--
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Please read the FAQ at  http://www.tux.org/lkml/


[PATCH v3 03/20] mm, hugetlb: fix subpool accounting handling

2013-09-05 Thread Joonsoo Kim
There is a case that we attempt to allocate a hugepage with chg = 0 and
avoid_reserve = 1. Although chg = 0 means that it has a reserved hugepage,
we wouldn't use it, since avoid_reserve = 1 represents that we don't want
to allocate a hugepage from a reserved pool. This happens when the parent
process that created a MAP_PRIVATE mapping is about to perform a COW due to
a shared page count and it attempt to satisfy the allocation without using
the existing reserves.

In this case, we would not dequeue a reserved hugepage and, instead, try
to allocate a new hugepage. Therefore, we should check subpool counter
for a new hugepage. This patch implement it.

Reviewed-by: Aneesh Kumar K.V 
Signed-off-by: Joonsoo Kim 
---
Replenishing commit message and adding reviewed-by tag.

diff --git a/mm/hugetlb.c b/mm/hugetlb.c
index 12b6581..ea1ae0a 100644
--- a/mm/hugetlb.c
+++ b/mm/hugetlb.c
@@ -1144,13 +1144,14 @@ static struct page *alloc_huge_page(struct 
vm_area_struct *vma,
chg = vma_needs_reservation(h, vma, addr);
if (chg < 0)
return ERR_PTR(-ENOMEM);
-   if (chg)
-   if (hugepage_subpool_get_pages(spool, chg))
+   if (chg || avoid_reserve)
+   if (hugepage_subpool_get_pages(spool, 1))
return ERR_PTR(-ENOSPC);
 
ret = hugetlb_cgroup_charge_cgroup(idx, pages_per_huge_page(h), _cg);
if (ret) {
-   hugepage_subpool_put_pages(spool, chg);
+   if (chg || avoid_reserve)
+   hugepage_subpool_put_pages(spool, 1);
return ERR_PTR(-ENOSPC);
}
spin_lock(_lock);
@@ -1162,7 +1163,8 @@ static struct page *alloc_huge_page(struct vm_area_struct 
*vma,
hugetlb_cgroup_uncharge_cgroup(idx,
   pages_per_huge_page(h),
   h_cg);
-   hugepage_subpool_put_pages(spool, chg);
+   if (chg || avoid_reserve)
+   hugepage_subpool_put_pages(spool, 1);
return ERR_PTR(-ENOSPC);
}
spin_lock(_lock);
-- 
1.7.9.5

--
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Please read the FAQ at  http://www.tux.org/lkml/


[PATCH v3 03/20] mm, hugetlb: fix subpool accounting handling

2013-09-05 Thread Joonsoo Kim
There is a case that we attempt to allocate a hugepage with chg = 0 and
avoid_reserve = 1. Although chg = 0 means that it has a reserved hugepage,
we wouldn't use it, since avoid_reserve = 1 represents that we don't want
to allocate a hugepage from a reserved pool. This happens when the parent
process that created a MAP_PRIVATE mapping is about to perform a COW due to
a shared page count and it attempt to satisfy the allocation without using
the existing reserves.

In this case, we would not dequeue a reserved hugepage and, instead, try
to allocate a new hugepage. Therefore, we should check subpool counter
for a new hugepage. This patch implement it.

Reviewed-by: Aneesh Kumar K.V 
Signed-off-by: Joonsoo Kim 
---
Replenishing commit message and adding reviewed-by tag.

diff --git a/mm/hugetlb.c b/mm/hugetlb.c
index 12b6581..ea1ae0a 100644
--- a/mm/hugetlb.c
+++ b/mm/hugetlb.c
@@ -1144,13 +1144,14 @@ static struct page *alloc_huge_page(struct 
vm_area_struct *vma,
chg = vma_needs_reservation(h, vma, addr);
if (chg < 0)
return ERR_PTR(-ENOMEM);
-   if (chg)
-   if (hugepage_subpool_get_pages(spool, chg))
+   if (chg || avoid_reserve)
+   if (hugepage_subpool_get_pages(spool, 1))
return ERR_PTR(-ENOSPC);
 
ret = hugetlb_cgroup_charge_cgroup(idx, pages_per_huge_page(h), _cg);
if (ret) {
-   hugepage_subpool_put_pages(spool, chg);
+   if (chg || avoid_reserve)
+   hugepage_subpool_put_pages(spool, 1);
return ERR_PTR(-ENOSPC);
}
spin_lock(_lock);
@@ -1162,7 +1163,8 @@ static struct page *alloc_huge_page(struct vm_area_struct 
*vma,
hugetlb_cgroup_uncharge_cgroup(idx,
   pages_per_huge_page(h),
   h_cg);
-   hugepage_subpool_put_pages(spool, chg);
+   if (chg || avoid_reserve)
+   hugepage_subpool_put_pages(spool, 1);
return ERR_PTR(-ENOSPC);
}
spin_lock(_lock);
-- 
1.7.9.5

--
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Please read the FAQ at  http://www.tux.org/lkml/


[PATCH v2 1/4] mm/zswap: bugfix: memory leak when re-swapon

2013-09-05 Thread Weijie Yang
zswap_tree is not freed when swapoff, and it got re-kmalloc in swapon,
so memory-leak occurs.

Modify: free memory of zswap_tree in zswap_frontswap_invalidate_area().

Signed-off-by: Weijie Yang 
---
 mm/zswap.c |4 
 1 file changed, 4 insertions(+)

diff --git a/mm/zswap.c b/mm/zswap.c
index deda2b6..cbd9578 100644
--- a/mm/zswap.c
+++ b/mm/zswap.c
@@ -816,6 +816,10 @@ static void zswap_frontswap_invalidate_area(unsigned type)
}
tree->rbroot = RB_ROOT;
spin_unlock(>lock);
+
+   zbud_destroy_pool(tree->pool);
+   kfree(tree);
+   zswap_trees[type] = NULL;
 }
 
 static struct zbud_ops zswap_zbud_ops = {
-- 
1.7.10.4


--
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Please read the FAQ at  http://www.tux.org/lkml/


[PATCH v2 2/4] mm/zswap: bugfix: memory leak when invalidate and reclaim occur concurrently

2013-09-05 Thread Weijie Yang
Consider the following scenario:
thread 0: reclaim entry x (get refcount, but not call zswap_get_swap_cache_page)
thread 1: call zswap_frontswap_invalidate_page to invalidate entry x.
finished, entry x and its zbud is not freed as its refcount != 0
now, the swap_map[x] = 0
thread 0: now call zswap_get_swap_cache_page
swapcache_prepare return -ENOENT because entry x is not used any more
zswap_get_swap_cache_page return ZSWAP_SWAPCACHE_NOMEM
zswap_writeback_entry do nothing except put refcount
Now, the memory of zswap_entry x and its zpage leak.

Modify:
- check the refcount in fail path, free memory if it is not referenced.
- use ZSWAP_SWAPCACHE_FAIL instead of ZSWAP_SWAPCACHE_NOMEM as the fail path
can be not only caused by nomem but also by invalidate.

Signed-off-by: Weijie Yang 
---
 mm/zswap.c |   21 +
 1 file changed, 13 insertions(+), 8 deletions(-)

diff --git a/mm/zswap.c b/mm/zswap.c
index cbd9578..1be7b90 100644
--- a/mm/zswap.c
+++ b/mm/zswap.c
@@ -387,7 +387,7 @@ static void zswap_free_entry(struct zswap_tree *tree, 
struct zswap_entry *entry)
 enum zswap_get_swap_ret {
ZSWAP_SWAPCACHE_NEW,
ZSWAP_SWAPCACHE_EXIST,
-   ZSWAP_SWAPCACHE_NOMEM
+   ZSWAP_SWAPCACHE_FAIL,
 };
 
 /*
@@ -401,9 +401,9 @@ enum zswap_get_swap_ret {
  * added to the swap cache, and returned in retpage.
  *
  * If success, the swap cache page is returned in retpage
- * Returns 0 if page was already in the swap cache, page is not locked
- * Returns 1 if the new page needs to be populated, page is locked
- * Returns <0 on error
+ * Returns ZSWAP_SWAPCACHE_EXIST if page was already in the swap cache
+ * Returns ZSWAP_SWAPCACHE_NEW if the new page needs to be populated, page is 
locked
+ * Returns ZSWAP_SWAPCACHE_FAIL on error
  */
 static int zswap_get_swap_cache_page(swp_entry_t entry,
struct page **retpage)
@@ -475,7 +475,7 @@ static int zswap_get_swap_cache_page(swp_entry_t entry,
if (new_page)
page_cache_release(new_page);
if (!found_page)
-   return ZSWAP_SWAPCACHE_NOMEM;
+   return ZSWAP_SWAPCACHE_FAIL;
*retpage = found_page;
return ZSWAP_SWAPCACHE_EXIST;
 }
@@ -529,11 +529,11 @@ static int zswap_writeback_entry(struct zbud_pool *pool, 
unsigned long handle)
 
/* try to allocate swap cache page */
switch (zswap_get_swap_cache_page(swpentry, )) {
-   case ZSWAP_SWAPCACHE_NOMEM: /* no memory */
+   case ZSWAP_SWAPCACHE_FAIL: /* no memory or invalidate happened */
ret = -ENOMEM;
goto fail;
 
-   case ZSWAP_SWAPCACHE_EXIST: /* page is unlocked */
+   case ZSWAP_SWAPCACHE_EXIST:
/* page is already in the swap cache, ignore for now */
page_cache_release(page);
ret = -EEXIST;
@@ -591,7 +591,12 @@ static int zswap_writeback_entry(struct zbud_pool *pool, 
unsigned long handle)
 
 fail:
spin_lock(>lock);
-   zswap_entry_put(entry);
+   refcount = zswap_entry_put(entry);
+   if (refcount <= 0) {
+   /* invalidate happened, consider writeback as success */
+   zswap_free_entry(tree, entry);
+   ret = 0;
+   }
spin_unlock(>lock);
return ret;
 }
-- 
1.7.10.4


--
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Please read the FAQ at  http://www.tux.org/lkml/


[PATCH v2 3/4] mm/zswap: avoid unnecessary page scanning

2013-09-05 Thread Weijie Yang
add SetPageReclaim before __swap_writepage so that page can be moved to the
tail of the inactive list, which can avoid unnecessary page scanning as this
page was reclaimed by swap subsystem before.

Signed-off-by: Weijie Yang 
---
 mm/zswap.c |3 +++
 1 file changed, 3 insertions(+)

diff --git a/mm/zswap.c b/mm/zswap.c
index 1be7b90..cc40e6a 100644
--- a/mm/zswap.c
+++ b/mm/zswap.c
@@ -556,6 +556,9 @@ static int zswap_writeback_entry(struct zbud_pool *pool, 
unsigned long handle)
SetPageUptodate(page);
}
 
+   /* move it to the tail of the inactive list after end_writeback */
+   SetPageReclaim(page);
+
/* start writeback */
__swap_writepage(page, , end_swap_bio_write);
page_cache_release(page);
-- 
1.7.10.4


--
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Please read the FAQ at  http://www.tux.org/lkml/


[PATCH v2 4/4] mm/zswap: use GFP_NOIO instead of GFP_KERNEL

2013-09-05 Thread Weijie Yang
To avoid zswap store and reclaim functions called recursively,
use GFP_NOIO instead of GFP_KERNEL

Signed-off-by: Weijie Yang 
---
 mm/zswap.c |6 +++---
 1 file changed, 3 insertions(+), 3 deletions(-)

diff --git a/mm/zswap.c b/mm/zswap.c
index cc40e6a..3d05ed8 100644
--- a/mm/zswap.c
+++ b/mm/zswap.c
@@ -427,7 +427,7 @@ static int zswap_get_swap_cache_page(swp_entry_t entry,
 * Get a new page to read into from swap.
 */
if (!new_page) {
-   new_page = alloc_page(GFP_KERNEL);
+   new_page = alloc_page(GFP_NOIO);
if (!new_page)
break; /* Out of memory */
}
@@ -435,7 +435,7 @@ static int zswap_get_swap_cache_page(swp_entry_t entry,
/*
 * call radix_tree_preload() while we can wait.
 */
-   err = radix_tree_preload(GFP_KERNEL);
+   err = radix_tree_preload(GFP_NOIO);
if (err)
break;
 
@@ -636,7 +636,7 @@ static int zswap_frontswap_store(unsigned type, pgoff_t 
offset,
}
 
/* allocate entry */
-   entry = zswap_entry_cache_alloc(GFP_KERNEL);
+   entry = zswap_entry_cache_alloc(GFP_NOIO);
if (!entry) {
zswap_reject_kmemcache_fail++;
ret = -ENOMEM;
-- 
1.7.10.4


--
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Please read the FAQ at  http://www.tux.org/lkml/


[PATCH v2 0/4] mm/zswap bugfix: memory leaks and other problems

2013-09-05 Thread Weijie Yang
This patch series fix a few bugs in zswap based on Linux-3.11.

v1 --> v2
- free memory in zswap_frontswap_invalidate_area (in patch 1)
- fix whitespace corruption (line wrapping)

Corresponding mail thread: https://lkml.org/lkml/2013/8/18/59

These issues fixed/optimized are:

 1. memory leaks when re-swapon
 
 2. memory leaks when invalidate and reclaim occur concurrently
 
 3. avoid unnecessary page scanning
 
 4. use GFP_NOIO instead of GFP_KERNEL to avoid zswap store and reclaim 
functions called recursively

Issues discussed in that mail thread NOT fixed as it happens rarely or
not a big problem:

 1. a "theoretical race condition" when reclaim page
When a handle alloced from zbud, zbud considers this handle is used
validly by upper(zswap) and can be a candidate for reclaim. But zswap has
to initialize it such as setting swapentry and adding it to rbtree.
so there is a race condition, such as:
 thread 0: obtain handle x from zbud_alloc
 thread 1: zbud_reclaim_page is called
 thread 1: callback zswap_writeback_entry to reclaim handle x
 thread 1: get swpentry from handle x (it is random value now)
 thread 1: bad thing may happen
 thread 0: initialize handle x with swapentry

2. frontswap_map bitmap not cleared after zswap reclaim
Frontswap uses frontswap_map bitmap to track page in "backend" 
implementation,
when zswap reclaim a page, the corresponding bitmap record is not cleared.

 mm/zswap.c |   34 +++---
 1 file changed, 23 insertions(+), 11 deletions(-)

--
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Please read the FAQ at  http://www.tux.org/lkml/


Re: [PATCH v3 0/2] ext4: increase mbcache scalability

2013-09-05 Thread Andreas Dilger
On 2013-09-05, at 3:49 AM, Thavatchai Makphaibulchoke wrote:
> On 09/05/2013 02:35 AM, Theodore Ts'o wrote:
>> How did you gather these results?  The mbcache is only used if you
>> are using extended attributes, and only if the extended attributes don't fit 
>> in the inode's extra space.
>> 
>> I checked aim7, and it doesn't do any extended attribute operations.
>> So why are you seeing differences?  Are you doing something like
>> deliberately using 128 byte inodes (which is not the default inode
>> size), and then enabling SELinux, or some such?
> 
> No, I did not do anything special, including changing an inode's size. I just 
> used the profile data, which indicated mb_cache module as one of the 
> bottleneck.  Please see below for perf data from one of th new_fserver run, 
> which also shows some mb_cache activities.
> 
> 
>|--3.51%-- __mb_cache_entry_find
>|  mb_cache_entry_find_first
>|  ext4_xattr_cache_find
>|  ext4_xattr_block_set
>|  ext4_xattr_set_handle
>|  ext4_initxattrs
>|  security_inode_init_security
>|  ext4_init_security

Looks like this is some large security xattr, or enough smaller
xattrs to exceed the ~120 bytes of in-inode xattr storage.  How
big is the SELinux xattr (assuming that is what it is)?

> Looks like it's a bit harder to disable mbcache than I thought.
> I ended up adding code to collect the statics.
> 
> With selinux enabled, for new_fserver workload of aim7, there
> are a total of 0x7e054201 ext4_xattr_cache_find() calls
> that result in a hit and 0xc100 calls that are not.
> The number does not seem to favor the complete disabling of
> mbcache in this case.

This is about a 65% hit rate, which seems reasonable.

You could try a few different things here:
- disable selinux completely (boot with "selinux=0" on the kernel
  command line) and see how much faster it is
- format your ext4 filesystem with larger inodes (-I 512) and see
  if this is an improvement or not.  That depends on the size of
  the selinux xattrs and if they will fit into the extra 256 bytes
  of xattr space these larger inodes will give you.  The performance
  might also be worse, since there will be more data to read/write
  for each inode, but it would avoid seeking to the xattr blocks.

Cheers, Andreas





--
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Please read the FAQ at  http://www.tux.org/lkml/


linux-next: build warning after merge of the ceph tree

2013-09-05 Thread Stephen Rothwell
Hi Sage,

After merging the ceph tree, today's linux-next build (x86_64
allmodconfig) produced this warning:

In file included from fs/ceph/super.h:4:0,
 from fs/ceph/cache.c:26:
include/linux/ceph/ceph_debug.h:4:0: warning: "pr_fmt" redefined [enabled by 
default]
 #define pr_fmt(fmt) KBUILD_MODNAME ": " fmt
 ^
In file included from include/linux/kernel.h:13:0,
 from include/asm-generic/bug.h:13,
 from arch/x86/include/asm/bug.h:38,
 from include/linux/bug.h:4,
 from include/linux/thread_info.h:11,
 from include/linux/preempt.h:9,
 from include/linux/spinlock.h:50,
 from include/linux/wait.h:7,
 from include/linux/fs.h:6,
 from include/linux/fscache.h:21,
 from fs/ceph/cache.c:24:
include/linux/printk.h:206:0: note: this is the location of the previous 
definition
 #define pr_fmt(fmt) fmt
 ^

Probably introduced by commit cb0963fcf836 ("ceph: use fscache as a local
presisent cache").

pr_fmt needs to be defined before printk.h gets included.

-- 
Cheers,
Stephen Rothwells...@canb.auug.org.au


pgpepYAR3zjQQ.pgp
Description: PGP signature


Re: [PATCH] ethernet/arc/arc_emac: optimize the Tx/Tx-reclaim paths a bit

2013-09-05 Thread David Miller
From: Vineet Gupta 
Date: Fri, 6 Sep 2013 04:24:39 +

> On 09/05/2013 11:54 PM, David Miller wrote:
>> You should keep the check in the transmit queueing code as a BUG check,
>> almost every driver has code of the form (using NIU as an example):
 ...
>> Otherwise queue management bugs are incredibly hard to diagnose.
>>
>> I'm not applying this patch.
> 
> The check is already there for current BD. What I removed was checking for 
> next BD
> too (please see below). IMHO this is useless since it will be done in next
> iteration anyways. In my tests, the next check never got hit, so it was waste 
> of
> cycles.
> 
> static int arc_emac_tx(struct sk_buff *skb, struct net_device *ndev)
> {
> if (unlikely((le32_to_cpu(*info) & OWN_MASK) == FOR_EMAC)) {
> netif_stop_queue(ndev);
> return NETDEV_TX_BUSY;
> }
> 
> ...
> *txbd_curr = (*txbd_curr + 1) % TX_BD_NUM;
> 
> -   /* Get "info" of the next BD */
> -   info = >txbd[*txbd_curr].info;
> -
> -   /* Check if if Tx BD ring is full - next BD is still owned by EMAC */
> -   if (unlikely((le32_to_cpu(*info) & OWN_MASK) == FOR_EMAC))
> -   netif_stop_queue(ndev);
> 
> OTOH, I do see a slight stats update issue - if the queue is stopped (but pkt 
> not
> dropped) we are failing to increment tx_errors. But that would be a separate 
> patch.

It is exactly the correct thing to do.  The driver should _NEVER_
return NETDEV_TX_BUSY under normal circumstances.  The queue should
always be stopped by the ->ndo_start_xmit() method when it fills the
queue.

Again, when ->ndo_start_xmit() is invoked, it should never see the
queue full.  When that happens it is a bug.

You are deleting exactly the correct part of this function, what it is
doing right now is precisely the correct way to manage netif queue
state.

The only valid change you can make here is to make the:

if (unlikely((le32_to_cpu(*info) & OWN_MASK) == FOR_EMAC)) {
netif_stop_queue(ndev);
return NETDEV_TX_BUSY;
}

print out an error message and increment tx_errors.
--
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Please read the FAQ at  http://www.tux.org/lkml/


Re: [PATCH 7/9] KGDB/KDB: add new system NMI entry code to KDB

2013-09-05 Thread Jason Wessel
On 09/05/2013 05:50 PM, Mike Travis wrote:
> This patch adds a new "KDB_REASON" code (KDB_REASON_SYSTEM_NMI).  This
> is purely cosmetic to distinguish it from the other various reasons that
> NMI may occur and are usually after an error occurred.  Also the dumping
> of registers is not done to more closely match what is displayed when KDB
> is entered manually via the sysreq 'g' key.


This patch is not quite right.   See below.


> 
> Signed-off-by: Mike Travis 
> Reviewed-by: Dimitri Sivanich 
> Reviewed-by: Hedi Berriche 
> ---
>  include/linux/kdb.h |1 +
>  include/linux/kgdb.h|1 +
>  kernel/debug/debug_core.c   |5 +
>  kernel/debug/kdb/kdb_debugger.c |5 -
>  kernel/debug/kdb/kdb_main.c |3 +++
>  5 files changed, 14 insertions(+), 1 deletion(-)
> 
> --- linux.orig/include/linux/kdb.h
> +++ linux/include/linux/kdb.h
> @@ -109,6 +109,7 @@ typedef enum {
>   KDB_REASON_RECURSE, /* Recursive entry to kdb;
>* regs probably valid */
>   KDB_REASON_SSTEP,   /* Single Step trap. - regs valid */
> + KDB_REASON_SYSTEM_NMI,  /* In NMI due to SYSTEM cmd; regs valid */
>  } kdb_reason_t;
>  
>  extern int kdb_trap_printk;
> --- linux.orig/include/linux/kgdb.h
> +++ linux/include/linux/kgdb.h
> @@ -52,6 +52,7 @@ extern int kgdb_connected;
>  extern int kgdb_io_module_registered;
>  
>  extern atomic_t  kgdb_setting_breakpoint;
> +extern atomic_t  kgdb_system_nmi;


We don't need extra atomics.  You should add another variable to the kgdb_state 
which is processor specific in this case.

Better yet, just set the ks->err_code properly in your kgdb_nmicallin() or in 
the origination call to kgdb_nmicallback() from your nmi handler (remember I 
still have the question pending if we actually need kgdb_nmicallin() in the 
first place.  You already did the work of adding another NMI type to the enum.  
We just need to use the ks->err_code variable as well.


>  extern atomic_t  kgdb_cpu_doing_single_step;
>  
>  extern struct task_struct*kgdb_usethread;
> --- linux.orig/kernel/debug/debug_core.c
> +++ linux/kernel/debug/debug_core.c
> @@ -125,6 +125,7 @@ static atomic_t   masters_in_kgdb;
>  static atomic_t  slaves_in_kgdb;
>  static atomic_t  kgdb_break_tasklet_var;
>  atomic_t kgdb_setting_breakpoint;
> +atomic_t kgdb_system_nmi;
>  
>  struct task_struct   *kgdb_usethread;
>  struct task_struct   *kgdb_contthread;
> @@ -760,7 +761,11 @@ int kgdb_nmicallin(int cpu, int trapnr,
>  
>   /* Indicate there are slaves waiting */
>   kgdb_info[cpu].send_ready = send_ready;
> +
> + /* Use new reason code "SYSTEM_NMI" */
> + atomic_inc(_system_nmi);
>   kgdb_cpu_enter(ks, regs, DCPU_WANT_MASTER);
> + atomic_dec(_system_nmi);
>   kgdb_do_roundup = save_kgdb_do_roundup;
>   kgdb_info[cpu].send_ready = NULL;
>  
> --- linux.orig/kernel/debug/kdb/kdb_debugger.c
> +++ linux/kernel/debug/kdb/kdb_debugger.c
> @@ -69,7 +69,10 @@ int kdb_stub(struct kgdb_state *ks)
>   if (atomic_read(_setting_breakpoint))
>   reason = KDB_REASON_KEYBOARD;
>  
> - if (in_nmi())
> + if (atomic_read(_system_nmi))
> + reason = KDB_REASON_SYSTEM_NMI;


This would get changed to if (ks->err == KDB_REASON_SYSNMI && ks->signo == 
SIGTRAP) 

Cheers,
Jason.

> +
> + else if (in_nmi())
>   reason = KDB_REASON_NMI;
>  
>   for (i = 0, bp = kdb_breakpoints; i < KDB_MAXBPT; i++, bp++) {
> --- linux.orig/kernel/debug/kdb/kdb_main.c
> +++ linux/kernel/debug/kdb/kdb_main.c
> @@ -1200,6 +1200,9 @@ static int kdb_local(kdb_reason_t reason
>  instruction_pointer(regs));
>   kdb_dumpregs(regs);
>   break;
> + case KDB_REASON_SYSTEM_NMI:
> + kdb_printf("due to System NonMaskable Interrupt\n");
> + break;
>   case KDB_REASON_NMI:
>   kdb_printf("due to NonMaskable Interrupt @ "
>  kdb_machreg_fmt "\n",
> 
> -- 
> 

--
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Please read the FAQ at  http://www.tux.org/lkml/


Re: [PATCH RESEND v3 3/7] Intel MIC Host Driver, card OS state management.

2013-09-05 Thread Greg Kroah-Hartman
On Thu, Sep 05, 2013 at 04:41:55PM -0700, Sudeep Dutt wrote:
> +What:/sys/class/mic/mic(x)/firmware
> +Date:August 2013
> +KernelVersion:   3.11
> +Contact: Sudeep Dutt 
> +Description:
> + When read, this sysfs entry provides the path name under
> + /lib/firmware/ where the firmware image to be booted on the
> + card can be found. The entry can be written to change the
> + firmware image location under /lib/firmware/.

I don't understand, is the path under the HOST device, or the Client
device's disk?  Why do you need to change the path on the HOST?  What's
wrong with the existing firmware path selection we have in the kernel?

thanks,

greg k-h
--
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Please read the FAQ at  http://www.tux.org/lkml/


Re: [PATCH RESEND v3 3/7] Intel MIC Host Driver, card OS state management.

2013-09-05 Thread Greg Kroah-Hartman
On Thu, Sep 05, 2013 at 04:41:55PM -0700, Sudeep Dutt wrote:
> +What:/sys/class/mic/mic(x)/cmdline
> +Date:August 2013
> +KernelVersion:   3.11
> +Contact: Sudeep Dutt 
> +Description:
> + An Intel MIC device runs a Linux OS during its operation. Before
> + booting this card OS, it is possible to pass kernel command line
> + options to configure various features in it, similar to
> + self-bootable machines. When read, this entry provides
> + information about the current kernel command line options set to
> + boot the card OS. This entry can be written to change the
> + existing kernel command line options. Typically, the user would
> + want to read the current command line options, append new ones
> + or modify existing ones and then write the whole kernel command
> + line back to this entry.

Is a PAGE_SIZE value going to be big enough for your command line?  I
know some embedded systems have horribly long command lines, hopefully
this will be enough for you.

thanks,

greg k-h
--
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Please read the FAQ at  http://www.tux.org/lkml/


Re: [PATCH RESEND v3 3/7] Intel MIC Host Driver, card OS state management.

2013-09-05 Thread Greg Kroah-Hartman
Again, very minor fixups for later (I can even do them...)

> +static DEVICE_ATTR(state, S_IRUGO|S_IWUSR, mic_show_state, mic_store_state);

DEVICE_ATTR_RW() please.

Same for the other attributes you create in this patch.

thanks,

greg k-h
--
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Please read the FAQ at  http://www.tux.org/lkml/


Re: [PATCH v2 1/1] dcache: Translating dentry into pathname without taking rename_lock

2013-09-05 Thread Linus Torvalds
On Thu, Sep 5, 2013 at 7:01 PM, Waiman Long  wrote:
>
> I am sorry that I misunderstand what you said. I will do what you and Al
> advise me to do.

I'm sorry I shouted at you. I was getting a bit frustrated there..

Linus
--
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Please read the FAQ at  http://www.tux.org/lkml/


Re: [PATCH RESEND v3 1/7] Intel MIC Host Driver for X100 family.

2013-09-05 Thread Greg Kroah-Hartman
On Thu, Sep 05, 2013 at 04:41:31PM -0700, Sudeep Dutt wrote:
>  drivers/misc/mic/common/mic_device.h  |  37 +++
>  drivers/misc/mic/host/mic_device.h| 109 +

Two different files, with the same name?  You are asking for trouble in
the future, getting them confused :)

Please try to pick a unique name, especially when you later do things
like:

> +#include "../common/mic_device.h"
> +#include "mic_device.h"

Which just looks odd.

Again, not a big deal, follow-on patch can fix this.

thanks,

greg k-h
--
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Please read the FAQ at  http://www.tux.org/lkml/


Re: [PATCH RESEND v3 1/7] Intel MIC Host Driver for X100 family.

2013-09-05 Thread Greg Kroah-Hartman
Very minor nits, you can change this in a future add-on patch:

> +static DEVICE_ATTR(family, S_IRUGO, mic_show_family, NULL);

This should use DEVICE_ATTR_RO(), so that we don't have to audit the
permissions of your DEVICE_ATTR() files.

> +static DEVICE_ATTR(stepping, S_IRUGO, mic_show_stepping, NULL);

Same here.

> +static struct attribute *mic_default_attrs[] = {
> + _attr_family.attr,
> + _attr_stepping.attr,
> +
> + NULL
> +};
> +
> +static struct attribute_group mic_attr_group = {
> + .attrs = mic_default_attrs,
> +};
> +
> +static const struct attribute_group *__mic_attr_group[] = {
> + _attr_group,
> + NULL
> +};

These last two structures can be replaced with:
ATTRIBUTE_GROUPS(mic_default);

> +void mic_sysfs_init(struct mic_device *mdev)
> +{
> + mdev->attr_group = __mic_attr_group;
> +}

This is "odd", why not just export the data structure and reference it
in the other code?  The pci core does this, and so do other busses.

Anyway, it's not a big deal, just a bit strange to me.

thanks,

greg k-h
--
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Please read the FAQ at  http://www.tux.org/lkml/


Re: [PATCH 0/8] ceph: fscache support & upstream changes

2013-09-05 Thread Milosz Tanski
David,

After running this for a day on some loaded machines I ran into what
looks like an old issue with the new code. I remember you saw an issue
that manifested it self in a similar way a while back.

[13837253.462779] FS-Cache: Assertion failed
[13837253.462782] 3 == 5 is false
[13837253.462807] [ cut here ]
[13837253.462811] kernel BUG at fs/fscache/operation.c:414!
[13837253.462815] invalid opcode:  [#1] SMP
[13837253.462820] Modules linked in: cachefiles microcode auth_rpcgss
oid_registry nfsv4 nfs lockd ceph sunrpc libceph fscache raid10
raid456 async_pq async_xor async_memcpy async_raid6_recov async_tx
raid1 raid0 multipath linear btrfs raid6_pq lzo_compress xor
zlib_deflate libcrc32c
[13837253.462851] CPU: 1 PID: 1848 Comm: kworker/1:2 Not tainted
3.11.0-rc5-virtual #55
[13837253.462870] Workqueue: ceph-revalidate ceph_revalidate_work [ceph]
[13837253.462875] task: 8804251f16f0 ti: 8804047fa000 task.ti:
8804047fa000
[13837253.462879] RIP: e030:[]  []
fscache_put_operation+0x2ad/0x330 [fscache]
[13837253.462893] RSP: e02b:8804047fbd58  EFLAGS: 00010296
[13837253.462896] RAX: 000f RBX: 880424049d80 RCX:
0006
[13837253.462901] RDX: 0007 RSI: 0007 RDI:
8804047f0218
[13837253.462906] RBP: 8804047fbd68 R08:  R09:

[13837253.462910] R10: 0108 R11: 0107 R12:
8804251cf928
[13837253.462915] R13: 8804253c7370 R14:  R15:

[13837253.462923] FS:  7f5c56e43700()
GS:88044350() knlGS:
[13837253.462928] CS:  e033 DS:  ES:  CR0: 8005003b
[13837253.462932] CR2: 7fc08b7ee000 CR3: 0004259a4000 CR4:
2660
[13837253.462936] Stack:
[13837253.462939]  880424049d80 8804251cf928 8804047fbda8
a016def1
[13837253.462946]  88042b462b20 88040701c750 88040701c730
88040701c3f0
[13837253.462953]  0003  8804047fbde8
a025ba3f
[13837253.462959] Call Trace:
[13837253.462966]  []
__fscache_check_consistency+0x1a1/0x2c0 [fscache]
[13837253.462977]  [] ceph_revalidate_work+0x8f/0x120 [ceph]
[13837253.462987]  [] process_one_work+0x179/0x490
[13837253.462992]  [] worker_thread+0x11b/0x370
[13837253.462998]  [] ? manage_workers.isra.21+0x2e0/0x2e0
[13837253.463004]  [] kthread+0xc0/0xd0
[13837253.463011]  [] ? perf_trace_xen_mmu_pmd_clear+0x50/0xc0
[13837253.463017]  [] ? flush_kthread_worker+0xb0/0xb0
[13837253.463024]  [] ret_from_fork+0x7c/0xb0
[13837253.463029]  [] ? flush_kthread_worker+0xb0/0xb0
[13837253.463033] Code: 31 c0 e8 5d e6 3e e1 48 c7 c7 04 8e 17 a0 31
c0 e8 4f e6 3e e1 8b 73 40 ba 05 00 00 00 48 c7 c7 62 8e 17 a0 31 c0
e8 39 e6 3e e1 <0f> 0b 65 48 8b 34 25 80 c7 00 00 48 c7 c7 4f 8e 17 a0
48 81 c6
[13837253.463071] RIP  []
fscache_put_operation+0x2ad/0x330 [fscache]
[13837253.463079]  RSP 
[13837253.463085] ---[ end trace 2972d68e8efd961e ]---
[13837253.463130] BUG: unable to handle kernel paging request at
ffd8
[13837253.463136] IP: [] kthread_data+0x11/0x20
[13837253.463142] PGD 1a0f067 PUD 1a11067 PMD 0
[13837253.463146] Oops:  [#2] SMP
[13837253.463150] Modules linked in: cachefiles microcode auth_rpcgss
oid_registry nfsv4 nfs lockd ceph sunrpc libceph fscache raid10
raid456 async_pq async_xor async_memcpy async_raid6_recov async_tx
raid1 raid0 multipath linear btrfs raid6_pq lzo_compress xor
zlib_deflate libcrc32c
[13837253.463176] CPU: 1 PID: 1848 Comm: kworker/1:2 Tainted: G  D
 3.11.0-rc5-virtual #55
[13837253.463190] task: 8804251f16f0 ti: 8804047fa000 task.ti:
8804047fa000
[13837253.463194] RIP: e030:[]  []
kthread_data+0x11/0x20
[13837253.463201] RSP: e02b:8804047fba00  EFLAGS: 00010046
[13837253.463204] RAX:  RBX:  RCX:
81c30d00
[13837253.463209] RDX: 0001 RSI: 0001 RDI:
8804251f16f0
[13837253.463213] RBP: 8804047fba18 R08: 27bf1216 R09:

[13837253.463217] R10: 88044360cec0 R11: 000e R12:
0001
[13837253.463222] R13: 8804251f1ac8 R14: 88042c498000 R15:

[13837253.463228] FS:  7f5c56e43700()
GS:88044350() knlGS:
[13837253.463233] CS:  e033 DS:  ES:  CR0: 8005003b
[13837253.463237] CR2: 0028 CR3: 0004259a4000 CR4:
2660
[13837253.463241] Stack:
[13837253.463243]  8107c3d6 880443513fc0 0001
8804047fba98
[13837253.463249]  81568308 0003 8804251f1ce8
8804251f16f0
[13837253.463255]  8804047fbfd8 8804047fbfd8 8804047fbfd8
8804047fba78
[13837253.463261] Call Trace:
[13837253.463265]  [] ? wq_worker_sleeping+0x16/0x90
[13837253.463272]  [] __schedule+0x5c8/0x820
[13837253.463276]  [] schedule+0x29/0x70
[13837253.662186]  [] do_exit+0x6e0/0xa60

Re: [PATCH 2/3] thermal: samsung: change base_common to more meaningful base_second

2013-09-05 Thread amit daniel kachhap
On Wed, Sep 4, 2013 at 9:53 AM, Naveen Krishna Chatradhi
 wrote:
> On Exynos5440 and Exynos5420 there are registers common
> across the TMU channels.
>
> To support that, we introduced a ADDRESS_MULTIPLE flag in the
> driver and the 2nd set of register base and size are provided
> in the "reg" property of the node.
>
> As per Amit's suggestion, this patch changes the base_common
> to base_second and SHARED_MEMORY to ADDRESS_MULTIPLE.
>
> Signed-off-by: Naveen Krishna Chatradhi 
The changes look good. For all the 3 patches in the series,

Acked-by: Amit Daniel Kachhap 
Reviewed-by: Amit Daniel Kachhap

Thanks,
Amit Daniel
> ---
> Changes since v2:
> Changed the flag name from SHARED_MEMORY to ADDRESS_MULTIPLE.
> https://lkml.org/lkml/2013/8/1/38
>
>  .../devicetree/bindings/thermal/exynos-thermal.txt |4 ++--
>  drivers/thermal/samsung/exynos_tmu.c   |   12 ++--
>  drivers/thermal/samsung/exynos_tmu.h   |4 ++--
>  drivers/thermal/samsung/exynos_tmu_data.c  |2 +-
>  4 files changed, 11 insertions(+), 11 deletions(-)
>
> diff --git a/Documentation/devicetree/bindings/thermal/exynos-thermal.txt 
> b/Documentation/devicetree/bindings/thermal/exynos-thermal.txt
> index 284f530..116cca0 100644
> --- a/Documentation/devicetree/bindings/thermal/exynos-thermal.txt
> +++ b/Documentation/devicetree/bindings/thermal/exynos-thermal.txt
> @@ -11,8 +11,8 @@
>  - reg : Address range of the thermal registers. For soc's which has multiple
> instances of TMU and some registers are shared across all TMU's like
> interrupt related then 2 set of register has to supplied. First set
> -   belongs to each instance of TMU and second set belongs to common TMU
> -   registers.
> +   belongs to each instance of TMU and second set belongs to second set
> +   of common TMU registers.
>  - interrupts : Should contain interrupt for thermal system
>  - clocks : The main clock for TMU device
>  - clock-names : Thermal system clock name
> diff --git a/drivers/thermal/samsung/exynos_tmu.c 
> b/drivers/thermal/samsung/exynos_tmu.c
> index d201ed8..3a55caf 100644
> --- a/drivers/thermal/samsung/exynos_tmu.c
> +++ b/drivers/thermal/samsung/exynos_tmu.c
> @@ -41,7 +41,7 @@
>   * @id: identifier of the one instance of the TMU controller.
>   * @pdata: pointer to the tmu platform/configuration data
>   * @base: base address of the single instance of the TMU controller.
> - * @base_common: base address of the common registers of the TMU controller.
> + * @base_second: base address of the common registers of the TMU controller.
>   * @irq: irq number of the TMU controller.
>   * @soc: id of the SOC type.
>   * @irq_work: pointer to the irq work structure.
> @@ -56,7 +56,7 @@ struct exynos_tmu_data {
> int id;
> struct exynos_tmu_platform_data *pdata;
> void __iomem *base;
> -   void __iomem *base_common;
> +   void __iomem *base_second;
> int irq;
> enum soc_type soc;
> struct work_struct irq_work;
> @@ -297,7 +297,7 @@ skip_calib_data:
> }
> /*Clear the PMIN in the common TMU register*/
> if (reg->tmu_pmin && !data->id)
> -   writel(0, data->base_common + reg->tmu_pmin);
> +   writel(0, data->base_second + reg->tmu_pmin);
>  out:
> clk_disable(data->clk);
> mutex_unlock(>lock);
> @@ -451,7 +451,7 @@ static void exynos_tmu_work(struct work_struct *work)
>
> /* Find which sensor generated this interrupt */
> if (reg->tmu_irqstatus) {
> -   val_type = readl(data->base_common + reg->tmu_irqstatus);
> +   val_type = readl(data->base_second + reg->tmu_irqstatus);
> if (!((val_type >> data->id) & 0x1))
> goto out;
> }
> @@ -582,7 +582,7 @@ static int exynos_map_dt_data(struct platform_device 
> *pdev)
>  * Check if the TMU shares some registers and then try to map the
>  * memory of common registers.
>  */
> -   if (!TMU_SUPPORTS(pdata, SHARED_MEMORY))
> +   if (!TMU_SUPPORTS(pdata, ADDRESS_MULTIPLE))
> return 0;
>
> if (of_address_to_resource(pdev->dev.of_node, 1, )) {
> @@ -590,7 +590,7 @@ static int exynos_map_dt_data(struct platform_device 
> *pdev)
> return -ENODEV;
> }
>
> -   data->base_common = devm_ioremap(>dev, res.start,
> +   data->base_second = devm_ioremap(>dev, res.start,
> resource_size());
> if (!data->base) {
> dev_err(>dev, "Failed to ioremap memory\n");
> diff --git a/drivers/thermal/samsung/exynos_tmu.h 
> b/drivers/thermal/samsung/exynos_tmu.h
> index 7c6c34a..ebd2ec1 100644
> --- a/drivers/thermal/samsung/exynos_tmu.h
> +++ b/drivers/thermal/samsung/exynos_tmu.h
> @@ -59,7 +59,7 @@ enum soc_type {
>   * state(active/idle) can be checked.
>   * TMU_SUPPORT_EMUL_TIME - This 

Re: [PATCH 5/9] KGDB/KDB: add support for external NMI handler to call KGDB/KDB.

2013-09-05 Thread Jason Wessel
On 09/05/2013 05:50 PM, Mike Travis wrote:
> This patch adds a kgdb_nmicallin() interface that can be used by
> external NMI handlers to call the KGDB/KDB handler.  The primary need
> for this is for those types of NMI interrupts where all the CPUs
> have already received the NMI signal.  Therefore no send_IPI(NMI)
> is required, and in fact it will cause a 2nd unhandled NMI to occur.
> This generates the "Dazed and Confuzed" messages.
>
> Since all the CPUs are getting the NMI at roughly the same time, it's not
> guaranteed that the first CPU that hits the NMI handler will manage to
> enter KGDB and set the dbg_master_lock before the slaves start entering.

It should have been ok to have more than one master if this was some kind of 
watch dog.  The raw spin lock for the dbg_master_lock should have ensured that 
only a single CPU is in fact the master.  If it is the case that we cannot send 
a nested IPI at this point, the UV machine type should have replaced the 
kgdb_roundup_cpus() routine with something that will work, such as looking at 
the exception type on the way in and perhaps skipping the IPI send.

Also if there is no possibility of restarting the machine from this state it 
would have been possible to simply turn off kgdb_do_roundup in the custom 
kgdb_roundup_cpus().

The patch you created appears that it will work, but it comes at the cost of 
some complexity because you are also checking on the state of 
"kgdb_info[cpu].send_ready" in some other location in the NMI handler.  It 
might be better to consider not sending a nested NMI if all the CPUs are going 
to enter anyway in the master state.


>
> The new argument "send_ready" was added for KGDB to signal the NMI handler
> to release the slave CPUs for entry into KGDB.
>
> Signed-off-by: Mike Travis 
> Reviewed-by: Dimitri Sivanich 
> Reviewed-by: Hedi Berriche 
> ---
>  include/linux/kgdb.h  |1 +
>  kernel/debug/debug_core.c |   41 +
>  kernel/debug/debug_core.h |1 +
>  3 files changed, 43 insertions(+)
>
> --- linux.orig/include/linux/kgdb.h
> +++ linux/include/linux/kgdb.h
> @@ -310,6 +310,7 @@ extern int
>  kgdb_handle_exception(int ex_vector, int signo, int err_code,
>struct pt_regs *regs);
>  extern int kgdb_nmicallback(int cpu, void *regs);
> +extern int kgdb_nmicallin(int cpu, int trapnr, void *regs, atomic_t 
> *snd_rdy);
>  extern void gdbstub_exit(int status);
> 
>  extern intkgdb_single_step;
> --- linux.orig/kernel/debug/debug_core.c
> +++ linux/kernel/debug/debug_core.c
> @@ -578,6 +578,10 @@ return_normal:
>  /* Signal the other CPUs to enter kgdb_wait() */
>  if ((!kgdb_single_step) && kgdb_do_roundup)
>  kgdb_roundup_cpus(flags);
> +
> +/* If optional send ready pointer, signal CPUs to proceed */
> +if (kgdb_info[cpu].send_ready)
> +atomic_set(kgdb_info[cpu].send_ready, 1);
>  #endif
> 
>  /*
> @@ -729,6 +733,43 @@ int kgdb_nmicallback(int cpu, void *regs
>  return 0;
>  }
>  #endif
> +return 1;
> +}
> +
> +int kgdb_nmicallin(int cpu, int trapnr, void *regs, atomic_t *send_ready)
> +{
> +#ifdef CONFIG_SMP
> +if (!kgdb_io_ready(0))
> +return 1;
> +
> +if (kgdb_info[cpu].enter_kgdb == 0) {
> +struct kgdb_state kgdb_var;
> +struct kgdb_state *ks = _var;
> +int save_kgdb_do_roundup = kgdb_do_roundup;
> +
> +memset(ks, 0, sizeof(struct kgdb_state));
> +ks->cpu= cpu;
> +ks->ex_vector= trapnr;
> +ks->signo= SIGTRAP;
> +ks->err_code= 0;
> +ks->kgdb_usethreadid= 0;
> +ks->linux_regs= regs;
> +
> +/* Do not broadcast NMI */
> +kgdb_do_roundup = 0;
> +
> +/* Indicate there are slaves waiting */
> +kgdb_info[cpu].send_ready = send_ready;
> +kgdb_cpu_enter(ks, regs, DCPU_WANT_MASTER);

This is the one part of the patch I don't quite understand.  Why does the 
kgdb_nmicallin() desire to be the master core?

It was not obvious the circumstance as to why this is called.  Is it some kind 
of watch dog where you really do want to enter the debugger or is it more to 
deal with nested slave interrupts were the round up would have possibly hung on 
this hardware.  If it is the later, I would have thought this should be a slave 
and not the master.

Perhaps a comment in the code can clear this up?

Thanks,
Jason.


--
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Please read the FAQ at  http://www.tux.org/lkml/


Re: [PATCH] ethernet/arc/arc_emac: optimize the Tx/Tx-reclaim paths a bit

2013-09-05 Thread Vineet Gupta
Hi David,

On 09/05/2013 11:54 PM, David Miller wrote:
> From: Vineet Gupta 
> Date: Wed, 4 Sep 2013 18:33:11 +0530
>
>> This came out of staring at code due to recent performance fix.
>>
>> * TX BD reclaim can call netif_wake_queue() once, outside the loop if
>>   one/more BDs were freed, NO need to do this each iteration.
>>
>> * TX need not look at next BD to stop the netif queue. It rather be done
>>   in the next tx call, when it actually fails as the queue seldom gets
>>   full but the check nevertheless needs to be done for each packet Tx.
>>   Profiled this under heavy traffic (big tar file cp, LMBench betworking
>>   tests) and saw not a single hit to that code.
>>
>> Signed-off-by: Vineet Gupta 
> You should keep the check in the transmit queueing code as a BUG check,
> almost every driver has code of the form (using NIU as an example):
>
>   if (niu_tx_avail(rp) <= (skb_shinfo(skb)->nr_frags + 1)) {
>   netif_tx_stop_queue(txq);
>   dev_err(np->device, "%s: BUG! Tx ring full when queue 
> awake!\n", dev->name);
>   rp->tx_errors++;
>   return NETDEV_TX_BUSY;
>   }
>
> and arc_emac should too.
>
> Otherwise queue management bugs are incredibly hard to diagnose.
>
> I'm not applying this patch.

The check is already there for current BD. What I removed was checking for next 
BD
too (please see below). IMHO this is useless since it will be done in next
iteration anyways. In my tests, the next check never got hit, so it was waste of
cycles.

static int arc_emac_tx(struct sk_buff *skb, struct net_device *ndev)
{
if (unlikely((le32_to_cpu(*info) & OWN_MASK) == FOR_EMAC)) {
netif_stop_queue(ndev);
return NETDEV_TX_BUSY;
}

...
*txbd_curr = (*txbd_curr + 1) % TX_BD_NUM;

-   /* Get "info" of the next BD */
-   info = >txbd[*txbd_curr].info;
-
-   /* Check if if Tx BD ring is full - next BD is still owned by EMAC */
-   if (unlikely((le32_to_cpu(*info) & OWN_MASK) == FOR_EMAC))
-   netif_stop_queue(ndev);

OTOH, I do see a slight stats update issue - if the queue is stopped (but pkt 
not
dropped) we are failing to increment tx_errors. But that would be a separate 
patch.

-Vineet
--
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Please read the FAQ at  http://www.tux.org/lkml/


Re: [PATCH] net: stmmac: fix bad merge conflict resolution

2013-09-05 Thread Stephen Rothwell
Hi all,

On Thu, 05 Sep 2013 22:58:17 -0400 (EDT) David Miller  
wrote:
>
> From: Olof Johansson 
> Date: Thu,  5 Sep 2013 18:01:41 -0700
> 
> > Merge commit 06c54055bebf919249aa1eb68312887c3cfe77b4 did a bad conflict
> > resolution accidentally leaving out a closing brace. Add it back.
> > 
> > Signed-off-by: Olof Johansson 
> > ---
> > 
> > This breaks a handful of defconfigs on ARM, so it'd be good to see it
> > applied pretty quickly. Thanks!
> 
> Looks like Linus applied this, thanks Olof.

And I cherry-picked it into linux-next for today.

-- 
Cheers,
Stephen Rothwells...@canb.auug.org.au


pgputkdoVt9ya.pgp
Description: PGP signature


[PATCH 0/5] Squashfs: extra sanity checks and sanity check fixes

2013-09-05 Thread Phillip Lougher
Hi,

Following on from the "Squashfs: sanity check information from disk"
patch from Dan Carpenter, I have added a couple more sanity checks,
and fixed a couple of existing sanity checks (including the patch from
Dan Carpenter).

These sanity checks mainly exist to trap maliciously corrupted
filesystems either through using a deliberately modified mksquashfs,
or where the user has deliberately chosen to generate uncompressed
metadata and then corrupted it.

Normally metadata in Squashfs filesystems is compressed, which means
corruption (either accidental or malicious) is detected when
trying to decompress the metadata.  So corrupted data does not normally
get as far as the code paths in question here.

Phillip Lougher (5):
  Squashfs: fix corruption check in get_dir_index_using_name()
  Squashfs: fix corruption checks in squashfs_lookup()
  Squashfs: fix corruption checks in squashfs_readdir()
  Squashfs: add corruption check in get_dir_index_using_offset()
  Squashfs: add corruption check for type in squashfs_readdir()

 fs/squashfs/dir.c | 17 +
 fs/squashfs/namei.c   |  7 +++
 fs/squashfs/squashfs_fs.h |  5 -
 3 files changed, 20 insertions(+), 9 deletions(-)

-- 
1.8.3.2

--
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Please read the FAQ at  http://www.tux.org/lkml/


[PATCH 2/5] Squashfs: fix corruption checks in squashfs_lookup()

2013-09-05 Thread Phillip Lougher
The dir_count and size fields when read from disk are sanity
checked for correctness.  However, the sanity checks only check the
values are not greater than expected.  As dir_count and size were
incorrectly defined as signed ints, this can lead to corrupted values
appearing as negative which are not trapped.

Signed-off-by: Phillip Lougher 
---
 fs/squashfs/namei.c | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

diff --git a/fs/squashfs/namei.c b/fs/squashfs/namei.c
index 342a5aa..67cad77 100644
--- a/fs/squashfs/namei.c
+++ b/fs/squashfs/namei.c
@@ -147,7 +147,8 @@ static struct dentry *squashfs_lookup(struct inode *dir, 
struct dentry *dentry,
struct squashfs_dir_entry *dire;
u64 block = squashfs_i(dir)->start + msblk->directory_table;
int offset = squashfs_i(dir)->offset;
-   int err, length, dir_count, size;
+   int err, length;
+   unsigned int dir_count, size;
 
TRACE("Entered squashfs_lookup [%llx:%x]\n", block, offset);
 
-- 
1.8.3.2

--
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Please read the FAQ at  http://www.tux.org/lkml/


[PATCH 4/5] Squashfs: add corruption check in get_dir_index_using_offset()

2013-09-05 Thread Phillip Lougher
We read the size (of the name) field from disk.  This value should
be sanity checked for correctness to avoid blindly reading
huge amounts of unnecessary data from disk on corruption.

Note, here we're not actually reading the name into a buffer, but
skipping it, and so corruption doesn't cause buffer overflow, merely
lots of unnecessary amounts of data to be read.

Signed-off-by: Phillip Lougher 
---
 fs/squashfs/dir.c | 9 -
 1 file changed, 8 insertions(+), 1 deletion(-)

diff --git a/fs/squashfs/dir.c b/fs/squashfs/dir.c
index 1192084..bd7155b 100644
--- a/fs/squashfs/dir.c
+++ b/fs/squashfs/dir.c
@@ -54,6 +54,7 @@ static int get_dir_index_using_offset(struct super_block *sb,
 {
struct squashfs_sb_info *msblk = sb->s_fs_info;
int err, i, index, length = 0;
+   unsigned int size;
struct squashfs_dir_index dir_index;
 
TRACE("Entered get_dir_index_using_offset, i_count %d, f_pos %lld\n",
@@ -81,8 +82,14 @@ static int get_dir_index_using_offset(struct super_block *sb,
 */
break;
 
+   size = le32_to_cpu(dir_index.size) + 1;
+
+   /* size should never be larger than SQUASHFS_NAME_LEN */
+   if (size > SQUASHFS_NAME_LEN)
+   break;
+
err = squashfs_read_metadata(sb, NULL, _start,
-   _offset, le32_to_cpu(dir_index.size) + 1);
+   _offset, size);
if (err < 0)
break;
 
-- 
1.8.3.2

--
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Please read the FAQ at  http://www.tux.org/lkml/


[PATCH 3/5] Squashfs: fix corruption checks in squashfs_readdir()

2013-09-05 Thread Phillip Lougher
The dir_count and size fields when read from disk are sanity
checked for correctness.  However, the sanity checks only check the
values are not greater than expected.  As dir_count and size were
incorrectly defined as signed ints, this can lead to corrupted values
appearing as negative which are not trapped.

Signed-off-by: Phillip Lougher 
---
 fs/squashfs/dir.c | 5 ++---
 1 file changed, 2 insertions(+), 3 deletions(-)

diff --git a/fs/squashfs/dir.c b/fs/squashfs/dir.c
index f7f527b..1192084 100644
--- a/fs/squashfs/dir.c
+++ b/fs/squashfs/dir.c
@@ -105,9 +105,8 @@ static int squashfs_readdir(struct file *file, struct 
dir_context *ctx)
struct inode *inode = file_inode(file);
struct squashfs_sb_info *msblk = inode->i_sb->s_fs_info;
u64 block = squashfs_i(inode)->start + msblk->directory_table;
-   int offset = squashfs_i(inode)->offset, length, dir_count, size,
-   type, err;
-   unsigned int inode_number;
+   int offset = squashfs_i(inode)->offset, length, type, err;
+   unsigned int inode_number, dir_count, size;
struct squashfs_dir_header dirh;
struct squashfs_dir_entry *dire;
 
-- 
1.8.3.2

--
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Please read the FAQ at  http://www.tux.org/lkml/


[PATCH 5/5] Squashfs: add corruption check for type in squashfs_readdir()

2013-09-05 Thread Phillip Lougher
We read the type field from disk.  This value should be sanity
checked for correctness to avoid an out of bounds access when
reading the squashfs_filetype_table array.

Signed-off-by: Phillip Lougher 
---
 fs/squashfs/dir.c | 7 +--
 fs/squashfs/squashfs_fs.h | 5 -
 2 files changed, 9 insertions(+), 3 deletions(-)

diff --git a/fs/squashfs/dir.c b/fs/squashfs/dir.c
index bd7155b..d8c2d74 100644
--- a/fs/squashfs/dir.c
+++ b/fs/squashfs/dir.c
@@ -112,8 +112,8 @@ static int squashfs_readdir(struct file *file, struct 
dir_context *ctx)
struct inode *inode = file_inode(file);
struct squashfs_sb_info *msblk = inode->i_sb->s_fs_info;
u64 block = squashfs_i(inode)->start + msblk->directory_table;
-   int offset = squashfs_i(inode)->offset, length, type, err;
-   unsigned int inode_number, dir_count, size;
+   int offset = squashfs_i(inode)->offset, length, err;
+   unsigned int inode_number, dir_count, size, type;
struct squashfs_dir_header dirh;
struct squashfs_dir_entry *dire;
 
@@ -206,6 +206,9 @@ static int squashfs_readdir(struct file *file, struct 
dir_context *ctx)
((short) le16_to_cpu(dire->inode_number));
type = le16_to_cpu(dire->type);
 
+   if (type > SQUASHFS_MAX_DIR_TYPE)
+   goto failed_read;
+
if (!dir_emit(ctx, dire->name, size,
inode_number,
squashfs_filetype_table[type]))
diff --git a/fs/squashfs/squashfs_fs.h b/fs/squashfs/squashfs_fs.h
index 9e2349d..4b2beda 100644
--- a/fs/squashfs/squashfs_fs.h
+++ b/fs/squashfs/squashfs_fs.h
@@ -87,7 +87,7 @@
 #define SQUASHFS_COMP_OPTS(flags)  SQUASHFS_BIT(flags, \
SQUASHFS_COMP_OPT)
 
-/* Max number of types and file types */
+/* Inode types including extended types */
 #define SQUASHFS_DIR_TYPE  1
 #define SQUASHFS_REG_TYPE  2
 #define SQUASHFS_SYMLINK_TYPE  3
@@ -103,6 +103,9 @@
 #define SQUASHFS_LFIFO_TYPE13
 #define SQUASHFS_LSOCKET_TYPE  14
 
+/* Max type value stored in directory entry */
+#define SQUASHFS_MAX_DIR_TYPE  7
+
 /* Xattr types */
 #define SQUASHFS_XATTR_USER 0
 #define SQUASHFS_XATTR_TRUSTED  1
-- 
1.8.3.2

--
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Please read the FAQ at  http://www.tux.org/lkml/


[PATCH 1/5] Squashfs: fix corruption check in get_dir_index_using_name()

2013-09-05 Thread Phillip Lougher
Patch "Squashfs: sanity check information from disk" from
Dan Carpenter adds a missing check for corruption in the
"size" field while reading the directory index from disk.

It, however, sets err to -EINVAL, this value is not used later, and
so setting it is completely redundant.  So remove it.

Errors in reading the index are deliberately non-fatal.  If we
get an error in reading the index we just return the part of the
index we have managed to read - the index isn't essential,
just quicker.

Signed-off-by: Phillip Lougher 
---
 fs/squashfs/namei.c | 4 +---
 1 file changed, 1 insertion(+), 3 deletions(-)

diff --git a/fs/squashfs/namei.c b/fs/squashfs/namei.c
index f866d42..342a5aa 100644
--- a/fs/squashfs/namei.c
+++ b/fs/squashfs/namei.c
@@ -104,10 +104,8 @@ static int get_dir_index_using_name(struct super_block *sb,
 
 
size = le32_to_cpu(index->size) + 1;
-   if (size > SQUASHFS_NAME_LEN) {
-   err = -EINVAL;
+   if (size > SQUASHFS_NAME_LEN)
break;
-   }
 
err = squashfs_read_metadata(sb, index->name, _start,
_offset, size);
-- 
1.8.3.2

--
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Please read the FAQ at  http://www.tux.org/lkml/


Re: ftrace 'failed to modify' bug when loading reiserfs.ko

2013-09-05 Thread Dave Jones
On Thu, Sep 05, 2013 at 09:51:54PM -0400, Steven Rostedt wrote:
 > On Thu, 5 Sep 2013 21:48:59 -0400
 > Dave Jones  wrote:
 > 
 > > On Thu, Sep 05, 2013 at 09:44:55PM -0400, Steven Rostedt wrote:
 > >  > On Thu, 5 Sep 2013 21:34:55 -0400
 > >  > Dave Jones  wrote:
 > >  > 
 > >  > > On Thu, Sep 05, 2013 at 09:28:34PM -0400, Steven Rostedt wrote:
 > 
 > >  > Did you change a config option, or update your gcc?
 > > 
 > > Yeah, changed CONFIG_DEBUG_KOBJECT, which rebuilt the world.
 > 
 > Still doesn't explain why it gave you that splat there.
 > 
 > Do you still have that binary module, and can you show me what's at
 > reiserfs_init_bitmap_cache+0x0 with objdump?

I didn't, but it turns out I can recreate this. A little convoluted but..

disable DEBUG_KOBJECT_RELEASE
build, install and boot into kernel

enable DEBUG_KOBJECT_RELEASE
build kernel
install -> boom


28b0 :

return bh;
}

int reiserfs_init_bitmap_cache(struct super_block *sb)
{
28b0:   e8 00 00 00 00  callq  28b5 

28b5:   55  push   %rbp

/* Don't trust REISERFS_SB(sb)->s_bmap_nr, it's a u16
 * which overflows on large file systems. */
static inline __u32 reiserfs_bmap_count(struct super_block *sb)
{
return (SB_BLOCK_COUNT(sb) - 1) / (sb->s_blocksize * 8) + 1;
28b6:   31 d2   xor%edx,%edx
28b8:   48 89 e5mov%rsp,%rbp
28bb:   41 54   push   %r12
28bd:   53  push   %rbx
28be:   48 89 fbmov%rdi,%rbx
28c1:   48 8b 87 50 07 00 00mov0x750(%rdi),%rax
28c8:   48 8b 77 18 mov0x18(%rdi),%rsi
28cc:   48 8b 40 08 mov0x8(%rax),%rax
28d0:   48 8d 0c f5 00 00 00lea0x0(,%rsi,8),%rcx
28d7:   00 
28d8:   8b 00   mov(%rax),%eax
28da:   83 e8 01sub$0x1,%eax
28dd:   48 f7 f1div%rcx

--
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Please read the FAQ at  http://www.tux.org/lkml/


[PATCH v2] mfd: rtsx: Modify rts5249_optimize_phy

2013-09-05 Thread wei_wang
From: Wei WANG 

In some platforms, specially Thinkpad series, rts5249 won't be
initialized properly. So we need adjust some phy parameters to
improve the compatibility issue.

Signed-off-by: Wei WANG 
---
 drivers/mfd/rts5249.c|   35 --
 include/linux/mfd/rtsx_pci.h |   43 ++
 2 files changed, 76 insertions(+), 2 deletions(-)

diff --git a/drivers/mfd/rts5249.c b/drivers/mfd/rts5249.c
index 3b835f5..7653638 100644
--- a/drivers/mfd/rts5249.c
+++ b/drivers/mfd/rts5249.c
@@ -130,13 +130,44 @@ static int rts5249_optimize_phy(struct rtsx_pcr *pcr)
 {
int err;
 
-   err = rtsx_pci_write_phy_register(pcr, PHY_REG_REV, 0xFE46);
+   err = rtsx_pci_write_phy_register(pcr, PHY_REG_REV, REG_REV_RESV |
+   RXIDLE_LATCHED | P1_EN | RXIDLE_EN | RX_PWST |
+   CLKREQ_DLY_TIMER_1_0 | STOP_CLKRD | STOP_CLKWR);
if (err < 0)
return err;
 
msleep(1);
 
-   return rtsx_pci_write_phy_register(pcr, PHY_BPCR, 0x05C0);
+   err = rtsx_pci_write_phy_register(pcr, PHY_BPCR, IBRXSEL | IBTXSEL |
+   IB_FILTER | CMIRROR_EN);
+   if (err < 0)
+   return err;
+   err = rtsx_pci_write_phy_register(pcr, PHY_PCR, FORCE_CODE |
+   OOBS_CALI_50 | OOBS_VCM_08 | OOBS_SEN_90 | RSSI_EN);
+   if (err < 0)
+   return err;
+   err = rtsx_pci_write_phy_register(pcr, PHY_RCR2, EMPHASE_EN | NADJR |
+   CDR_CP_10 | CDR_SR_2 | FREQSEL_12 | CPADJEN |
+   CDR_SC_8 | CALIB_LATE);
+   if (err < 0)
+   return err;
+   err = rtsx_pci_write_phy_register(pcr, PHY_FLD4, FLDEN_SEL | REQ_REF |
+   RXAMP_OFF | REQ_ADDA | BER_COUNT |
+   BER_TIMER | BER_CHK_EN);
+   if (err < 0)
+   return err;
+   err = rtsx_pci_write_phy_register(pcr, PHY_RDR, RXDSEL_1_9);
+   if (err < 0)
+   return err;
+   err = rtsx_pci_write_phy_register(pcr, PHY_RCR1, ADP_TIME | VCO_COARSE);
+   if (err < 0)
+   return err;
+   err = rtsx_pci_write_phy_register(pcr, PHY_FLD3, TIMER_4 | TIMER_6 |
+   RXDELINK);
+   if (err < 0)
+   return err;
+   return rtsx_pci_write_phy_register(pcr, PHY_TUNE, TUNEREF_1_0 |
+   VBGSEL_1252 | SDBUS_33 | TUNED18 | TUNED12);
 }
 
 static int rts5249_turn_on_led(struct rtsx_pcr *pcr)
diff --git a/include/linux/mfd/rtsx_pci.h b/include/linux/mfd/rtsx_pci.h
index d1382df..de20538 100644
--- a/include/linux/mfd/rtsx_pci.h
+++ b/include/linux/mfd/rtsx_pci.h
@@ -719,16 +719,41 @@
 
 /* Phy register */
 #define PHY_PCR0x00
+#define  FORCE_CODE0xB000
+#define  OOBS_CALI_50  0x0800
+#define  OOBS_VCM_08   0x0200
+#define  OOBS_SEN_90   0x0040
+#define  RSSI_EN   0x0002
 #define PHY_RCR0   0x01
 #define PHY_RCR1   0x02
+#define  ADP_TIME  0x0100
+#define  VCO_COARSE0x001F
 #define PHY_RCR2   0x03
+#define  EMPHASE_EN0x8000
+#define  NADJR 0x4000
+#define  CDR_CP_10 0x0400
+#define  CDR_SR_2  0x0100
+#define  FREQSEL_120x0040
+#define  CPADJEN   0x0020
+#define  CDR_SC_8  0x0008
+#define  CALIB_LATE0x0002
 #define PHY_RTCR   0x04
 #define PHY_RDR0x05
+#define  RXDSEL_1_90x4000
 #define PHY_TCR0   0x06
 #define PHY_TCR1   0x07
 #define PHY_TUNE   0x08
+#define  TUNEREF_1_0   0x4000
+#define  VBGSEL_1252   0x0C00
+#define  SDBUS_33  0x0200
+#define  TUNED18   0x01C0
+#define  TUNED12   0X0020
 #define PHY_IMR0x09
 #define PHY_BPCR   0x0A
+#define  IBRXSEL   0x0400
+#define  IBTXSEL   0x0100
+#define  IB_FILTER 0x0080
+#define  CMIRROR_EN0x0040
 #define PHY_BIST   0x0B
 #define PHY_RAW_L  0x0C
 #define PHY_RAW_H  0x0D
@@ -744,11 +769,29 @@
 #define PHY_BRNR2  0x17
 #define PHY_BENR   0x18
 #define PHY_REG_REV0x19
+#define  REG_REV_RESV  0xE000
+#define  RXIDLE_LATCHED0x1000
+#define  P1_EN 0x0800
+#define  RXIDLE_EN 0x0400
+#define  CLKREQ_DLY_TIMER_1_0  0x0040
+#define  

Re: [PATCH] perf mem: add priv level filtering support

2013-09-05 Thread Andi Kleen
> But my worry here is about consistency accross tools for the single
> letter options, so perhaps if you could use:
> 
>  -U   collect only user level samples
>  -K   collect only kernel level samples

Support for this would be nice for perf stat too, to use with 
the implicit events (using by -d, soon -T etc.)

-Andi

-- 
a...@linux.intel.com -- Speaking for myself only
--
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Please read the FAQ at  http://www.tux.org/lkml/


[PATCH v2] mfd: rtsx: Modify rts5249_optimize_phy

2013-09-05 Thread wei_wang
From: Wei WANG 

v2:
Name those new-added register values

Wei WANG (1):
  mfd: rtsx: Modify rts5249_optimize_phy

 drivers/mfd/rts5249.c|   35 --
 include/linux/mfd/rtsx_pci.h |   43 ++
 2 files changed, 76 insertions(+), 2 deletions(-)

-- 
1.7.9.5

--
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Please read the FAQ at  http://www.tux.org/lkml/


[PATCH 1/4] perf, x86: Avoid checkpointed counters causing excessive TSX aborts v5

2013-09-05 Thread Andi Kleen
From: Andi Kleen 

With checkpointed counters there can be a situation where the counter
is overflowing, aborts the transaction, is set back to a non overflowing
checkpoint, causes interupt. The interrupt doesn't see the overflow
because it has been checkpointed.  This is then a spurious PMI, typically with
a ugly NMI message.  It can also lead to excessive aborts.

Avoid this problem by:
- Using the full counter width for counting counters (earlier patch)
- Forbid sampling for checkpointed counters. It's not too useful anyways,
checkpointing is mainly for counting. The check is approximate
(to still handle KVM), but should catch the majority of cases.
- On a PMI always set back checkpointed counters to zero.

v2: Add unlikely. Add comment
v3: Allow large sampling periods with CP for KVM
v4: Use event_is_checkpointed. Use EOPNOTSUPP. (Stephane Eranian)
v5: Remove comment.
Signed-off-by: Andi Kleen 
---
 arch/x86/kernel/cpu/perf_event_intel.c | 37 ++
 1 file changed, 37 insertions(+)

diff --git a/arch/x86/kernel/cpu/perf_event_intel.c 
b/arch/x86/kernel/cpu/perf_event_intel.c
index a45d8d4..91e3f8c 100644
--- a/arch/x86/kernel/cpu/perf_event_intel.c
+++ b/arch/x86/kernel/cpu/perf_event_intel.c
@@ -1134,6 +1134,11 @@ static void intel_pmu_enable_event(struct perf_event 
*event)
__x86_pmu_enable_event(hwc, ARCH_PERFMON_EVENTSEL_ENABLE);
 }
 
+static inline bool event_is_checkpointed(struct perf_event *event)
+{
+   return (event->hw.config & HSW_IN_TX_CHECKPOINTED) != 0;
+}
+
 /*
  * Save and restart an expired event. Called by NMI contexts,
  * so it has to be careful about preempting normal event ops:
@@ -1141,6 +1146,17 @@ static void intel_pmu_enable_event(struct perf_event 
*event)
 int intel_pmu_save_and_restart(struct perf_event *event)
 {
x86_perf_event_update(event);
+   /*
+* For a checkpointed counter always reset back to 0.  This
+* avoids a situation where the counter overflows, aborts the
+* transaction and is then set back to shortly before the
+* overflow, and overflows and aborts again.
+*/
+   if (unlikely(event_is_checkpointed(event))) {
+   /* No race with NMIs because the counter should not be armed */
+   wrmsrl(event->hw.event_base, 0);
+   local64_set(>hw.prev_count, 0);
+   }
return x86_perf_event_set_period(event);
 }
 
@@ -1224,6 +1240,13 @@ again:
x86_pmu.drain_pebs(regs);
}
 
+   /*
+* To avoid spurious interrupts with perf stat always reset checkpointed
+* counters.
+*/
+   if (cpuc->events[2] && event_is_checkpointed(cpuc->events[2]))
+   status |= (1ULL << 2);
+
for_each_set_bit(bit, (unsigned long *), X86_PMC_IDX_MAX) {
struct perf_event *event = cpuc->events[bit];
 
@@ -1689,6 +1712,20 @@ static int hsw_hw_config(struct perf_event *event)
  event->attr.precise_ip > 0))
return -EOPNOTSUPP;
 
+   if (event_is_checkpointed(event)) {
+   /*
+* Sampling of checkpointed events can cause situations where
+* the CPU constantly aborts because of a overflow, which is
+* then checkpointed back and ignored. Forbid checkpointing
+* for sampling.
+*
+* But still allow a long sampling period, so that perf stat
+* from KVM works.
+*/
+   if (event->attr.sample_period > 0 &&
+   event->attr.sample_period < 0x7fff)
+   return -EOPNOTSUPP;
+   }
return 0;
 }
 
-- 
1.8.3.1

--
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Please read the FAQ at  http://www.tux.org/lkml/


[PATCH 2/4] perf, x86: Report TSX transaction abort cost as weight v3

2013-09-05 Thread Andi Kleen
From: Andi Kleen 

Use the existing weight reporting facility to report the transaction
abort cost, that is the number of cycles wasted in aborts.
Haswell reports this in the PEBS record.

This was in fact the original user for weight.

This is a very useful sort key to concentrate on the most
costly aborts and a good metric for TSX tuning.

v2: Add Peter's changes with minor modifications. More comments.
v3: Adjust white space.
Signed-off-by: Andi Kleen 
---
 arch/x86/kernel/cpu/perf_event_intel_ds.c | 55 +++
 1 file changed, 42 insertions(+), 13 deletions(-)

diff --git a/arch/x86/kernel/cpu/perf_event_intel_ds.c 
b/arch/x86/kernel/cpu/perf_event_intel_ds.c
index 3065c57..d4ed99f 100644
--- a/arch/x86/kernel/cpu/perf_event_intel_ds.c
+++ b/arch/x86/kernel/cpu/perf_event_intel_ds.c
@@ -182,16 +182,29 @@ struct pebs_record_nhm {
  * Same as pebs_record_nhm, with two additional fields.
  */
 struct pebs_record_hsw {
-   struct pebs_record_nhm nhm;
-   /*
-* Real IP of the event. In the Intel documentation this
-* is called eventingrip.
-*/
-   u64 real_ip;
-   /*
-* TSX tuning information field: abort cycles and abort flags.
-*/
-   u64 tsx_tuning;
+   u64 flags, ip;
+   u64 ax, bx, cx, dx;
+   u64 si, di, bp, sp;
+   u64 r8,  r9,  r10, r11;
+   u64 r12, r13, r14, r15;
+   u64 status, dla, dse, lat;
+   u64 real_ip; /* the actual eventing ip */
+   u64 tsx_tuning; /* TSX abort cycles and flags */
+};
+
+union hsw_tsx_tuning {
+   struct {
+   u32 cycles_last_block : 32,
+   hle_abort : 1,
+   rtm_abort : 1,
+   instruction_abort : 1,
+   non_instruction_abort : 1,
+   retry : 1,
+   data_conflict : 1,
+   capacity_writes   : 1,
+   capacity_reads: 1;
+   };
+   u64 value;
 };
 
 void init_debug_store_on_cpu(int cpu)
@@ -759,16 +772,26 @@ static int intel_pmu_pebs_fixup_ip(struct pt_regs *regs)
return 0;
 }
 
+static inline u64 intel_hsw_weight(struct pebs_record_hsw *pebs)
+{
+   if (pebs->tsx_tuning) {
+   union hsw_tsx_tuning tsx = { .value = pebs->tsx_tuning };
+   return tsx.cycles_last_block;
+   }
+   return 0;
+}
+
 static void __intel_pmu_pebs_event(struct perf_event *event,
   struct pt_regs *iregs, void *__pebs)
 {
/*
 * We cast to pebs_record_nhm to get the load latency data
 * if extra_reg MSR_PEBS_LD_LAT_THRESHOLD used
+* We cast to the biggest PEBS record are careful not
+* to access out-of-bounds members.
 */
struct cpu_hw_events *cpuc = &__get_cpu_var(cpu_hw_events);
-   struct pebs_record_nhm *pebs = __pebs;
-   struct pebs_record_hsw *pebs_hsw = __pebs;
+   struct pebs_record_hsw *pebs = __pebs;
struct perf_sample_data data;
struct pt_regs regs;
u64 sample_type;
@@ -827,7 +850,7 @@ static void __intel_pmu_pebs_event(struct perf_event *event,
regs.sp = pebs->sp;
 
if (event->attr.precise_ip > 1 && x86_pmu.intel_cap.pebs_format >= 2) {
-   regs.ip = pebs_hsw->real_ip;
+   regs.ip = pebs->real_ip;
regs.flags |= PERF_EFLAGS_EXACT;
} else if (event->attr.precise_ip > 1 && intel_pmu_pebs_fixup_ip())
regs.flags |= PERF_EFLAGS_EXACT;
@@ -838,6 +861,12 @@ static void __intel_pmu_pebs_event(struct perf_event 
*event,
x86_pmu.intel_cap.pebs_format >= 1)
data.addr = pebs->dla;
 
+   /* Only set the TSX weight when no memory weight was requested. */
+   if ((event->attr.sample_type & PERF_SAMPLE_WEIGHT) &&
+   !fll &&
+   (x86_pmu.intel_cap.pebs_format >= 2))
+   data.weight = intel_hsw_weight(pebs);
+
if (has_branch_stack(event))
data.br_stack = >lbr_stack;
 
-- 
1.8.3.1

--
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Please read the FAQ at  http://www.tux.org/lkml/


[PATCH 3/4] perf, x86: Add Haswell TSX event aliases v6

2013-09-05 Thread Andi Kleen
From: Andi Kleen 

Add TSX event aliases, and export them from the kernel to perf.

These are used by perf stat -T and to allow
more user friendly access to events. The events are designed to
be fairly generic and may also apply to other architectures
implementing HTM.  They all cover common situations that
happens during tuning of transactional code.

For Haswell we have to separate the HLE and RTM events,
as they are separate in the PMU.

This adds the following events.

tx-startCount start transaction (used by perf stat -T)
tx-commit   Count commit of transaction
tx-abortCount all aborts
tx-conflict Count aborts due to conflict with another CPU.
tx-capacity Count capacity aborts (transaction too large)

Then matching el-* events for HLE

cycles-tTransactional cycles (used by perf stat -T)
* also exists on POWER8
cycles-ct   Transactional cycles commited (used by perf stat -T)
* according to Michael Ellerman POWER8 has a cycles-transactional-committed,
* perf stat -T handles both cases

Note for useful abort profiling often precise has to be set,
as Haswell can only report the point inside the transaction
with precise=2.

(I had another patchkit to allow exporting precise too, but Vince
Weaver pointed out it violates the ABI, so dropped now)

For some classes of aborts, like conflicts, this is not needed,
as it makes more sense to look at the complete critical section.

This gives a clean set of generalized events to examine transaction
success and aborts. Haswell has additional events for TSX, but those are more
specialized for very specific situations.

v2: Move to new sysfs infrastructure
v3: Use own sysfs functions now
v4: Add tx/el-abort-return for better conflict sampling
v5: Different white space.
v6: Cut down events, rewrite description.
Signed-off-by: Andi Kleen 
---
 arch/x86/kernel/cpu/perf_event_intel.c | 27 +++
 1 file changed, 27 insertions(+)

diff --git a/arch/x86/kernel/cpu/perf_event_intel.c 
b/arch/x86/kernel/cpu/perf_event_intel.c
index 91e3f8c..da58663 100644
--- a/arch/x86/kernel/cpu/perf_event_intel.c
+++ b/arch/x86/kernel/cpu/perf_event_intel.c
@@ -2074,7 +2074,34 @@ static __init void intel_nehalem_quirk(void)
 EVENT_ATTR_STR(mem-loads,  mem_ld_hsw, "event=0xcd,umask=0x1,ldlat=3");
 EVENT_ATTR_STR(mem-stores, mem_st_hsw, "event=0xd0,umask=0x82")
 
+/* Haswell special events */
+EVENT_ATTR_STR(tx-start,tx_start,   "event=0xc9,umask=0x1");
+EVENT_ATTR_STR(tx-commit,   tx_commit,  "event=0xc9,umask=0x2");
+EVENT_ATTR_STR(tx-abort,tx_abort,  "event=0xc9,umask=0x4");
+EVENT_ATTR_STR(tx-capacity, tx_capacity,   "event=0x54,umask=0x2");
+EVENT_ATTR_STR(tx-conflict, tx_conflict,   "event=0x54,umask=0x1");
+EVENT_ATTR_STR(el-start,el_start,   "event=0xc8,umask=0x1");
+EVENT_ATTR_STR(el-commit,   el_commit,  "event=0xc8,umask=0x2");
+EVENT_ATTR_STR(el-abort,el_abort,  "event=0xc8,umask=0x4");
+EVENT_ATTR_STR(el-capacity, el_capacity,"event=0x54,umask=0x2");
+EVENT_ATTR_STR(el-conflict, el_conflict,"event=0x54,umask=0x1");
+EVENT_ATTR_STR(cycles-t,cycles_t,   "event=0x3c,in_tx=1");
+EVENT_ATTR_STR(cycles-ct,   cycles_ct,
+   "event=0x3c,in_tx=1,in_tx_cp=1");
+
 static struct attribute *hsw_events_attrs[] = {
+   EVENT_PTR(tx_start),
+   EVENT_PTR(tx_commit),
+   EVENT_PTR(tx_abort),
+   EVENT_PTR(tx_capacity),
+   EVENT_PTR(tx_conflict),
+   EVENT_PTR(el_start),
+   EVENT_PTR(el_commit),
+   EVENT_PTR(el_abort),
+   EVENT_PTR(el_capacity),
+   EVENT_PTR(el_conflict),
+   EVENT_PTR(cycles_t),
+   EVENT_PTR(cycles_ct),
EVENT_PTR(mem_ld_hsw),
EVENT_PTR(mem_st_hsw),
NULL
-- 
1.8.3.1

--
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Please read the FAQ at  http://www.tux.org/lkml/


perf, x86: Add parts of the remaining haswell PMU functionality v5

2013-09-05 Thread Andi Kleen
I hope this version is ok for everyone now.

[v2: Added Peter's changes to the PEBS handler]
[v3: Addressed Arnaldo's feedback for the perf stat -T change
 and avoid conflict]
[v4: Remove XXX comment in checkpoint patch.
 Add Arnaldo's ack for tools patch]
[v5: Some white space adjustments]

Add some more TSX functionality to the basic Haswell PMU.

A lot of the infrastructure needed for these patches has
been merged earlier, so it is all quite straight forward
now.

- Add the checkpointed counter workaround.
(Parts of this have been already merged earlier)
- Add support for reporting PEBS transaction abort cost as weight.
This is useful to judge the cost of aborts and concentrate
on expensive ones first.
(Large parts of this have been already merged earlier,
this is just adding the final few lines to the PEBS handler)
- Add TSX event aliases, needed for perf stat -T and general
usability.
(Infrastructure also already in)
- Add perf stat -T support to give a user friendly highlevel
counting frontend for transaction..
This version should also be usable for POWER8 eventually.

Not included:

Support for transaction flags and TSX LBR flags.

-Andi
--
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Please read the FAQ at  http://www.tux.org/lkml/


[PATCH 4/4] perf, tools: Add perf stat --transaction v5

2013-09-05 Thread Andi Kleen
From: Andi Kleen 

Add support to perf stat to print the basic transactional execution statistics:
Total cycles, Cycles in Transaction, Cycles in aborted transsactions
using the in_tx and in_tx_checkpoint qualifiers.
Transaction Starts and Elision Starts, to compute the average transaction
length.

This is a reasonable overview over the success of the transactions.

Also support architectures that have a transaction aborted cycles
counter like POWER8. Since that is awkward to handle in the kernel
abstract handle both cases here.

Enable with a new --transaction / -T option.

This requires measuring these events in a group, since they depend on each
other.

This is implemented by using TM sysfs events exported by the kernel

v2: Only print the extended statistics when the option is enabled.
This avoids negative output when the user specifies the -T events
in separate groups.
v3: Port to latest tree
v4: Remove merge error. Avoid linear walks for comparisons. Check
transaction_run earlier. Minor fixes.
v5: Move option to avoid conflict. Improve description.
Acked-by: Arnaldo Carvalho de Melo 
Signed-off-by: Andi Kleen 
---
 tools/perf/Documentation/perf-stat.txt |   5 ++
 tools/perf/builtin-stat.c  | 144 -
 tools/perf/util/evsel.h|   6 ++
 tools/perf/util/pmu.c  |  16 
 tools/perf/util/pmu.h  |   1 +
 5 files changed, 171 insertions(+), 1 deletion(-)

diff --git a/tools/perf/Documentation/perf-stat.txt 
b/tools/perf/Documentation/perf-stat.txt
index 2fe87fb..40bc65a 100644
--- a/tools/perf/Documentation/perf-stat.txt
+++ b/tools/perf/Documentation/perf-stat.txt
@@ -132,6 +132,11 @@ is a useful mode to detect imbalance between physical 
cores.  To enable this mod
 use --per-core in addition to -a. (system-wide).  The output includes the
 core number and the number of online logical processors on that physical 
processor.
 
+-T::
+--transaction::
+
+Print statistics of transactional execution if supported.
+
 EXAMPLES
 
 
diff --git a/tools/perf/builtin-stat.c b/tools/perf/builtin-stat.c
index 352fbd7..6bd90e4 100644
--- a/tools/perf/builtin-stat.c
+++ b/tools/perf/builtin-stat.c
@@ -46,6 +46,7 @@
 #include "util/util.h"
 #include "util/parse-options.h"
 #include "util/parse-events.h"
+#include "util/pmu.h"
 #include "util/event.h"
 #include "util/evlist.h"
 #include "util/evsel.h"
@@ -70,6 +71,41 @@ static void print_counter_aggr(struct perf_evsel *counter, 
char *prefix);
 static void print_counter(struct perf_evsel *counter, char *prefix);
 static void print_aggr(char *prefix);
 
+/* Default events used for perf stat -T */
+static const char * const transaction_attrs[] = {
+   "task-clock",
+   "{"
+   "instructions,"
+   "cycles,"
+   "cpu/cycles-t/,"
+   "cpu/tx-start/,"
+   "cpu/el-start/,"
+   "cpu/cycles-ct/"
+   "}"
+};
+
+/* More limited version when the CPU does not have all events. */
+static const char * const transaction_limited_attrs[] = {
+   "task-clock",
+   "{"
+   "instructions,"
+   "cycles,"
+   "cpu/cycles-t/,"
+   "cpu/tx-start/"
+   "}"
+};
+
+/* must match transaction_attrs and the beginning limited_attrs */
+enum {
+   T_TASK_CLOCK,
+   T_INSTRUCTIONS,
+   T_CYCLES,
+   T_CYCLES_IN_TX,
+   T_TRANSACTION_START,
+   T_ELISION_START,
+   T_CYCLES_IN_TX_CP,
+};
+
 static struct perf_evlist  *evsel_list;
 
 static struct perf_target  target = {
@@ -90,6 +126,7 @@ static enum aggr_modeaggr_mode   
= AGGR_GLOBAL;
 static volatile pid_t  child_pid   = -1;
 static boolnull_run=  false;
 static int detailed_run=  0;
+static booltransaction_run;
 static boolbig_num =  true;
 static int big_num_opt =  -1;
 static const char  *csv_sep= NULL;
@@ -213,7 +250,10 @@ static struct stats runtime_l1_icache_stats[MAX_NR_CPUS];
 static struct stats runtime_ll_cache_stats[MAX_NR_CPUS];
 static struct stats runtime_itlb_cache_stats[MAX_NR_CPUS];
 static struct stats runtime_dtlb_cache_stats[MAX_NR_CPUS];
+static struct stats runtime_cycles_in_tx_stats[MAX_NR_CPUS];
 static struct stats walltime_nsecs_stats;
+static struct stats runtime_transaction_stats[MAX_NR_CPUS];
+static struct stats runtime_elision_stats[MAX_NR_CPUS];
 
 static void perf_stat__reset_stats(struct perf_evlist *evlist)
 {
@@ -235,6 +275,11 @@ static void perf_stat__reset_stats(struct perf_evlist 
*evlist)
memset(runtime_ll_cache_stats, 0, sizeof(runtime_ll_cache_stats));
memset(runtime_itlb_cache_stats, 0, sizeof(runtime_itlb_cache_stats));
memset(runtime_dtlb_cache_stats, 0, sizeof(runtime_dtlb_cache_stats));
+   

Re: soft lockup in sysvipc code.

2013-09-05 Thread Lin Ming
On Thu, Sep 5, 2013 at 5:50 AM, Dave Jones  wrote:
> Haven't seen this before.
> Tree based on v3.11-3104-gf357a82
>
> BUG: soft lockup - CPU#0 stuck for 22s! [trinity-child0:25479]

Can't imagine how it could happen.
In my understanding, "soft lockup" happens when code stuck at
somewhere with preemption disabled.

Look at the code, preemption disabled at:
sysvipc_proc_next -> sysvipc_find_ipc -> ipc_lock_by_ptr

enabled at:
sysvipc_proc_next -> ipc_unlock
or
sysvipc_proc_stop -> ipc_unlock

And I didn't find code may stuck in the path.
I may miss something ..

Regards,
Lin Ming

> Modules linked in: sctp snd_seq_dummy fuse dlci rfcomm tun bnep hidp ipt_ULOG 
> nfnetlink can_raw can_bcm scsi_transport_iscsi nfc caif_socket caif af_802154 
> phonet af_rxrpc bluetooth rfkill can llc2 pppoe pppox ppp_generic slhc irda 
> crc_ccitt rds af_key rose x25 atm netrom appletalk ipx p8023 psnap p8022 llc 
> ax25 xfs snd_hda_codec_realtek libcrc32c snd_hda_intel snd_hda_codec 
> snd_hwdep snd_seq snd_seq_device snd_pcm snd_page_alloc snd_timer snd 
> soundcore pcspkr usb_debug e1000e ptp pps_core
> irq event stamp: 1143030
> hardirqs last  enabled at (1143029): [] 
> restore_args+0x0/0x30
> hardirqs last disabled at (1143030): [] 
> apic_timer_interrupt+0x6a/0x80
> softirqs last  enabled at (1143028): [] 
> __do_softirq+0x198/0x460
> softirqs last disabled at (1143023): [] irq_exit+0x135/0x150
> CPU: 0 PID: 25479 Comm: trinity-child0 Not tainted 3.11.0+ #44
> task: 88022c013f90 ti: 88022bd8c000 task.ti: 88022bd8c000
> RIP: 0010:[]  [] 
> idr_find_slowpath+0x9b/0x150
> RSP: 0018:88022bd8dc88  EFLAGS: 0206
> RAX: 0006 RBX: 000a6c0a RCX: 0008
> RDX: 0008 RSI: 81c41040 RDI: 88022c014668
> RBP: 88022bd8dca0 R08:  R09: 
> R10: 0001 R11: 0001 R12: 88023831a290
> R13: 0001 R14: 88022bd8dbe8 R15: 8802449d
> FS:  7fcfcad2c740() GS:88024480() knlGS:
> CS:  0010 DS:  ES:  CR0: 80050033
> CR2: 7fcfc84cb968 CR3: 0001de93f000 CR4: 001407f0
> DR0:  DR1:  DR2: 
> DR3:  DR6: fffe0ff0 DR7: 0400
> Stack:
>  0260 2dba 81c7e258 88022bd8dcf8
>  812b1131 88022c013f90 8801d37174c0 88022bd8dd38
>  81c7e2f0 88022bd8dd38 8801e065cec8 880241d86ca8
> Call Trace:
>  [] sysvipc_find_ipc+0x61/0x300
>  [] sysvipc_proc_next+0x46/0xd0
>  [] traverse.isra.7+0xc9/0x260
>  [] ? lock_release_non_nested+0x308/0x350
>  [] seq_read+0x3e1/0x450
>  [] ? proc_reg_write+0x80/0x80
>  [] proc_reg_read+0x3d/0x80
>  [] do_loop_readv_writev+0x63/0x90
>  [] do_readv_writev+0x21d/0x240
>  [] ? local_clock+0x3f/0x50
>  [] ? context_tracking_user_exit+0x46/0x1a0
>  [] vfs_readv+0x35/0x60
>  [] SyS_preadv+0xa2/0xd0
>  [] tracesys+0xdd/0xe2
> Code: 7e 6e 41 8b 84 24 2c 08 00 00 83 eb 08 c1 e0 03 39 c3 0f 85 c1 00 00 00 
> 89 d9 44 89 e8 d3 f8 0f b6 c0 48 83 c0 04 4d 8b 64 c4 08  80 b4 d6 ff 85 
> c0 74 c4 80 3d f7 2f 9d 00 00 75 bb e8 6e b4
--
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Please read the FAQ at  http://www.tux.org/lkml/


[PATCH] mm/mmap.c: Remove unnecessary pgoff assignment

2013-09-05 Thread Zhang Yanfei
We never access variable pgoff later, so the assignment is
redundant. Remove it. 

Signed-off-by: Zhang Yanfei 
---
 mm/mmap.c |1 - 
 1 files changed, 0 insertions(+), 1 deletions(-)

diff --git a/mm/mmap.c b/mm/mmap.c
index f9c97d1..db44f6a 100644
--- a/mm/mmap.c
+++ b/mm/mmap.c
@@ -1570,7 +1570,6 @@ munmap_back:
WARN_ON_ONCE(addr != vma->vm_start);
 
addr = vma->vm_start;
-   pgoff = vma->vm_pgoff;
vm_flags = vma->vm_flags;
} else if (vm_flags & VM_SHARED) {
if (unlikely(vm_flags & (VM_GROWSDOWN|VM_GROWSUP)))
-- 
1.7.1
--
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Please read the FAQ at  http://www.tux.org/lkml/


Re: linux-next: back merge of Linus' tree into the vfio tree

2013-09-05 Thread Stephen Rothwell
Hi Alex,

On Thu, 05 Sep 2013 17:14:29 -0600 Alex Williamson  
wrote:
>
> On Fri, 2013-09-06 at 09:08 +1000, Stephen Rothwell wrote:
> > 
> > I noticed that you have back merged Linus' tree into yours.  Linus
> > usually takes a dim view of that - especially when there is no
> > explanation in the merge commit message.  i.e. you shouldn't to that
> > unless you really need to - and then you should explain why you did it.
> 
> Hmm, I was hoping that wouldn't be a problem, especially with no
> conflicts in the merge.  I did it because the first commit after the
> merge in my next tree depends on PCI changes that have already been
> merged by Linus.  Re-basing is an even bigger sin and I felt it better
> to do a merge than ask for two pulls or add an unbuild-able commit to my
> next tree.  How do you suggest that I resolve this?

See above ... you should have said all that in the merge commit message.
I guess that you should just own it now and explain it to Linus when you
ask him to pull your tree.

-- 
Cheers,
Stephen Rothwells...@canb.auug.org.au


pgp4IpFrSiJMD.pgp
Description: PGP signature


Re: [PATCH 10/11] x86, mem-hotplug: Support initialize page tables from low to high.

2013-09-05 Thread Tang Chen

Hi Wanpeng,

On 09/06/2013 10:16 AM, Wanpeng Li wrote:
..

+#ifdef CONFIG_MOVABLE_NODE
+   unsigned long kernel_end;
+
+   if (movablenode_enable_srat&&
+   memblock.current_order == MEMBLOCK_ORDER_LOW_TO_HIGH) {


I think memblock.current_order == MEMBLOCK_ORDER_LOW_TO_HIGH is always
true if config MOVABLE_NODE and movablenode_enable_srat == true if PATCH
11/11 is applied.


memblock.current_order == MEMBLOCK_ORDER_LOW_TO_HIGH is true here if
MOVABLE_NODE
is configured, and it will be reset after SRAT is parsed. But
movablenode_enable_srat
could only be true when users specify movablenode boot option in the
kernel commandline.


You are right.

I mean the change should be:

+#ifdef CONFIG_MOVABLE_NODE
+   unsigned long kernel_end;
+
+   if (movablenode_enable_srat) {

The is unnecessary to check memblock.current_order since it is always true
if movable_node is configured and movablenode_enable_srat is true.



But I think, memblock.current_order is set outside init_mem_mapping(). And
the path in the if statement could only be run when current order is from
low to high. So I think it is safe to check it here.

I prefer to keep it at least in the next version patch-set. If others also
think it is unnecessary, I'm OK with removing the checking. :)

Thanks. :)
--
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Please read the FAQ at  http://www.tux.org/lkml/


Re: [PATCH] net: stmmac: fix bad merge conflict resolution

2013-09-05 Thread David Miller
From: Olof Johansson 
Date: Thu,  5 Sep 2013 18:01:41 -0700

> Merge commit 06c54055bebf919249aa1eb68312887c3cfe77b4 did a bad conflict
> resolution accidentally leaving out a closing brace. Add it back.
> 
> Signed-off-by: Olof Johansson 
> ---
> 
> This breaks a handful of defconfigs on ARM, so it'd be good to see it
> applied pretty quickly. Thanks!

Looks like Linus applied this, thanks Olof.
--
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Please read the FAQ at  http://www.tux.org/lkml/


Re: [GIT] Sparc

2013-09-05 Thread David Miller
From: Sergei Shtylyov 
Date: Fri, 06 Sep 2013 02:32:51 +0400

> Hello.
> 
> On 09/06/2013 12:44 AM, David Miller wrote:
> 
>> Several bug fixes (from Kirill Tkhai, Geery Uytterhoeven, and Alexey
>> Dobriyan) and some support for Fujitsu sparc64x chips (from Allen
>> Pais).
> 
>> Please pull, thanks a lot!
> 
>You meant that for 'linux-sparc', not 'linux-ide', right? :-)

Yes, sparclinux is the intended destination, and I forwarded it there
once I realized my mistake :-)
--
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Please read the FAQ at  http://www.tux.org/lkml/


[git pull] Please pull powerpc.git next branch

2013-09-05 Thread Benjamin Herrenschmidt
Hi Linus !

Here's the powerpc batch for this merge window. Some of the highlights are:

 * A bunch of endian fixes ! We don't have full LE support yet in that
release but this contains a lot of fixes all over arch/powerpc to use the
proper accessors, call the firmware with the right endian mode, etc...

 * A few updates to our "powernv" platform (non-virtualized, the one
to run KVM on), among other, support for bridging the P8 LPC bus for UARTs,
support and some EEH fixes.
 
 * Some mpc51xx clock API cleanups in preparation for a clock API overhaul

 * A pile of cleanups of our old math emulation code, including better
support for using it to emulate optional FP instructions on embedded
chips that otherwise have a HW FPU.

 * Some infrastructure in selftest, for powerpc now, but could be generalized,
initially used by some tests for our perf instruction counting code.

 * A pile of fixes for hotplug on pseries (that was seriously bitrotting)

 * The usual slew of freescale embedded updates, new boards, 64-bit hiberation
support, e6500 core PMU support, etc...

Cheers,
Ben.

The following changes since commit d4e4ab86bcba5a72779c43dc1459f71fea3d89c8:

  Linux 3.11-rc5 (2013-08-11 18:04:20 -0700)

are available in the git repository at:

  git://git.kernel.org/pub/scm/linux/kernel/git/benh/powerpc.git next

for you to fetch changes up to 9f24b0c9ef9b6b1292579c9e2cd7ff07ddc372b7:

  powerpc: Correct FSCR bit definitions (2013-09-05 17:29:20 +1000)


Alistair Popple (4):
  powerpc: More little endian fixes for prom.c
  powerpc: More little endian fixes for setup-common.c
  powerpc: Little endian fixes for legacy_serial.c
  powerpc: Make NUMA device node code endian safe

Andy Fleming (2):
  powerpc: Add smp_generic_cpu_bootable
  powerpc: Convert platforms to smp_generic_cpu_bootable

Anton Blanchard (29):
  powerpc: Align p_toc
  powerpc: Handle unaligned ldbrx/stdbrx
  powerpc: Wrap MSR macros with parentheses
  powerpc: Remove SAVE_VSRU and REST_VSRU macros
  powerpc: Simplify logic in include/uapi/asm/elf.h
  powerpc/pseries: Simplify H_GET_TERM_CHAR
  powerpc: Fix a number of sparse warnings
  powerpc/pci: Don't use bitfield for force_32bit_msi
  powerpc: Stop using non-architected shared_proc field in lppaca
  powerpc: Make RTAS device tree accesses endian safe
  powerpc: Make cache info device tree accesses endian safe
  powerpc: Make RTAS calls endian safe
  powerpc: Make logical to real cpu mapping code endian safe
  powerpc: Add some endian annotations to time and xics code
  powerpc: Fix some endian issues in xics code
  powerpc: of_parse_dma_window should take a __be32 *dma_window
  powerpc: Make device tree accesses in cache info code endian safe
  powerpc: Make device tree accesses in HVC VIO console endian safe
  powerpc: Make device tree accesses in VIO subsystem endian safe
  powerpc: Make OF PCI device tree accesses endian safe
  powerpc: Make PCI device node device tree accesses endian safe
  powerpc: Add endian annotations to lppaca, slb_shadow and dtl_entry
  powerpc: Fix little endian lppaca, slb_shadow and dtl_entry
  powerpc: Emulate instructions in little endian mode
  powerpc: Little endian SMP IPI demux
  powerpc/pseries: Fix endian issues in H_GET_TERM_CHAR/H_PUT_TERM_CHAR
  powerpc: Fix little endian coredumps
  powerpc: Make rwlocks endian safe
  powerpc: Never handle VSX alignment exceptions from kernel

Benjamin Herrenschmidt (21):
  Merge remote-tracking branch 'scott/next' into next
  powerpc/pmac: Early debug output on screen on 64-bit macs
  powerpc: Better split CONFIG_PPC_INDIRECT_PIO and CONFIG_PPC_INDIRECT_MMIO
  powerpc/powernv: Update opal.h to add new LPC and XSCOM functions
  powerpc/powernv: Add helper to get ibm,chip-id of a node
  powerpc/powernv: Add PIO accessors for Power8 LPC bus
  powerpc: Cleanup udbg_16550 and add support for LPC PIO-only UARTs
  powerpc: Check "status" property before adding legacy ISA serial ports
  powerpc/powernv: Don't crash if there are no OPAL consoles
  powerpc/powernv: Enable detection of legacy UARTs
  Revert "powerpc/e500: Update compilation flags with core specific options"
  powerpc: Make prom_init.c endian safe
  powerpc/wsp: Fix early debug build
  Merge remote-tracking branch 'scott/next' into next
  Merge branch 'merge' into next
  powerpc/btext: Fix CONFIG_PPC_EARLY_DEBUG_BOOTX on ppc32
  powerpc: Don't Oops when accessing /proc/powerpc/lparcfg without 
hypervisor
  powerpc/powernv: Return secondary CPUs to firmware on kexec
  Merge branch 'merge' into next
  powerpc/pseries: Move lparcfg.c to platforms/pseries
  Merge remote-tracking branch 'agust/next' into next

Catalin Udma (2):
  powerpc/perf: increase the perf HW events to 6

Re: [PATCH v2 4/4] kernel: add support for init_array constructors

2013-09-05 Thread Rusty Russell
Frantisek Hrbata  writes:
> This adds the .init_array section as yet another section with constructors. 
> This
> is needed because gcc could add __gcov_init calls to .init_array or .ctors
> section, depending on gcc version.
>
> v2: - reuse mod->ctors for .init_array section for modules, because gcc uses
>   .ctors or .init_array, but not both at the same time
>
> Signed-off-by: Frantisek Hrbata 

Might be nice to document which gcc version changed this, so people can
choose whether to cherry-pick this change?

Acked-by: Rusty Russell 

> ---
>  include/asm-generic/vmlinux.lds.h | 1 +
>  kernel/module.c   | 3 +++
>  2 files changed, 4 insertions(+)
>
> diff --git a/include/asm-generic/vmlinux.lds.h 
> b/include/asm-generic/vmlinux.lds.h
> index 69732d2..c55d8d9 100644
> --- a/include/asm-generic/vmlinux.lds.h
> +++ b/include/asm-generic/vmlinux.lds.h
> @@ -468,6 +468,7 @@
>  #define KERNEL_CTORS()   . = ALIGN(8);  \
>   VMLINUX_SYMBOL(__ctors_start) = .; \
>   *(.ctors)  \
> + *(.init_array) \
>   VMLINUX_SYMBOL(__ctors_end) = .;
>  #else
>  #define KERNEL_CTORS()
> diff --git a/kernel/module.c b/kernel/module.c
> index 2069158..bbbd953 100644
> --- a/kernel/module.c
> +++ b/kernel/module.c
> @@ -2760,6 +2760,9 @@ static void find_module_sections(struct module *mod, 
> struct load_info *info)
>  #ifdef CONFIG_CONSTRUCTORS
>   mod->ctors = section_objs(info, ".ctors",
> sizeof(*mod->ctors), >num_ctors);
> + if (!mod->ctors)
> + mod->ctors = section_objs(info, ".init_array",
> + sizeof(*mod->ctors), >num_ctors);
>  #endif
>  
>  #ifdef CONFIG_TRACEPOINTS
> -- 
> 1.8.3.1
--
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Please read the FAQ at  http://www.tux.org/lkml/


Re: [PATCH v2 1/1] dcache: Translating dentry into pathname without taking rename_lock

2013-09-05 Thread Waiman Long

On 09/05/2013 04:42 PM, Linus Torvalds wrote:

On Thu, Sep 5, 2013 at 1:29 PM, Waiman Long  wrote:

It is not as simple as doing a strncpy().

Yes it damn well is.

Stop the f*cking stupid arguments, and instead listen to what I say.

Here. Let me bold-face the most important part for you, so that you
don't miss it in all the other crap:

MAKE prepend() JUST USE "strncpy()" INSTEAD OF "memcpy()".

Nothing else. Seriously. Your "you can't do it because we copy
backwards" arguments are pure and utter garbage, exactly BECAUSE YOU
DON'T CHANGE ANY OF THAT. You can actually use the unreliable length
variable BUT YOU MUST STILL STOP AT A ZERO.

Get it?

You're complicating the whole thing for no good reason. I'm telling
you (and HAVE BEEN telling you multiple times) that you cannot use
"memcpy()" because the length may not be reliable, so you need to
check for zero in the middle and stop early. All your arguments have
been totally pointless, because you don't seem to see that simple and
fundamental issue. You don't change ANYTHING else. But you damn well
not do a "memcpy", you do something that stops when it hits a NUL
character.

We call that function "strncpy()". I'd actually prefer to write it out
by hand (because somebody could implement "strncpy()" as a
questionable function that accesses past the NUL as long as it's
within the 'n'), and because I think we might want to do that
word-at-a-time version of it, but for a first approximation, just do
that one-liner version.

Don't do anything else. Don't do locking. Don't do memchr. Just make
sure that you stop at a NUL character, and don't trust the length,
because the length may not match the pointer. That's was always ALL
you needed to do.

   Linus
I am sorry that I misunderstand what you said. I will do what you and Al 
advise me to do.


-Longman
--
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Please read the FAQ at  http://www.tux.org/lkml/


Hope and your long-term cooperation

2013-09-05 Thread jenny
Dear Manager: 

Glad to write to you. 

We are manufacturer of  Stearic acid from China,
We have Zinc stearate, calcium stearate, magnesium stearate, etc.
If you need such chemicals, please do not hesitate to contact me.

 
Best Regards,





  
Shijiazhuang Shinearly Chemicals Co.,Ltd

No. 105 Yellow River Road, Hightech Zone, Shijiazhuang City, Hebei Province, 
China

Tel:  0086-311-89809275

Fax:  
0086-311-67795015N�Р骒r��yb�X�肚�v�^�)藓{.n�+�伐�{��赙zXФ�≤�}��财�z�:+v�����赙zZ+��+zf"�h���~i���z��wア�?�ㄨ��&�)撷f��^j谦y�m��@A�a囤�
0鹅h���i

Re: ftrace 'failed to modify' bug when loading reiserfs.ko

2013-09-05 Thread Steven Rostedt
On Thu, 5 Sep 2013 21:48:59 -0400
Dave Jones  wrote:

> On Thu, Sep 05, 2013 at 09:44:55PM -0400, Steven Rostedt wrote:
>  > On Thu, 5 Sep 2013 21:34:55 -0400
>  > Dave Jones  wrote:
>  > 
>  > > On Thu, Sep 05, 2013 at 09:28:34PM -0400, Steven Rostedt wrote:

>  > Did you change a config option, or update your gcc?
> 
> Yeah, changed CONFIG_DEBUG_KOBJECT, which rebuilt the world.

Still doesn't explain why it gave you that splat there.

Do you still have that binary module, and can you show me what's at
reiserfs_init_bitmap_cache+0x0 with objdump?

-- Steve
--
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Please read the FAQ at  http://www.tux.org/lkml/


[for-next][PATCH 4/4] ftrace/rcu: Do not trace debug_lockdep_rcu_enabled()

2013-09-05 Thread Steven Rostedt
From: "Steven Rostedt (Red Hat)" 

The function debug_lockdep_rcu_enabled() is part of the RCU lockdep
debugging, and is called very frequently. I found that if I enable
a lot of debugging and run the function graph tracer, this
function can cause a live lock of the system.

We don't usually trace lockdep infrastructure, no need to trace
this either.

Reviewed-by: Paul E. McKenney 
Signed-off-by: Steven Rostedt 
---
 kernel/rcupdate.c |2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/kernel/rcupdate.c b/kernel/rcupdate.c
index cce6ba8..4f20c6c 100644
--- a/kernel/rcupdate.c
+++ b/kernel/rcupdate.c
@@ -122,7 +122,7 @@ struct lockdep_map rcu_sched_lock_map =
STATIC_LOCKDEP_MAP_INIT("rcu_read_lock_sched", _sched_lock_key);
 EXPORT_SYMBOL_GPL(rcu_sched_lock_map);
 
-int debug_lockdep_rcu_enabled(void)
+int notrace debug_lockdep_rcu_enabled(void)
 {
return rcu_scheduler_active && debug_locks &&
   current->lockdep_recursion == 0;
-- 
1.7.10.4


--
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Please read the FAQ at  http://www.tux.org/lkml/


[for-next][PATCH 1/4] tracing: Make tracing_cpumask available for all instances

2013-09-05 Thread Steven Rostedt
From: Alexander Z Lam 

Allow tracer instances to disable tracing by cpu by moving
the static global tracing_cpumask into trace_array.

Link: 
http://lkml.kernel.org/r/921622317f239bfc2283cac2242647801ef584f2.1375980149.git@google.com

Cc: Vaibhav Nagarnaik 
Cc: David Sharp 
Cc: Alexander Z Lam 
Signed-off-by: Alexander Z Lam 
Signed-off-by: Steven Rostedt 
---
 kernel/trace/trace.c |   37 -
 kernel/trace/trace.h |1 +
 2 files changed, 21 insertions(+), 17 deletions(-)

diff --git a/kernel/trace/trace.c b/kernel/trace/trace.c
index 496f94d..7974ba2 100644
--- a/kernel/trace/trace.c
+++ b/kernel/trace/trace.c
@@ -3166,11 +3166,6 @@ static const struct file_operations show_traces_fops = {
 };
 
 /*
- * Only trace on a CPU if the bitmask is set:
- */
-static cpumask_var_t tracing_cpumask;
-
-/*
  * The tracer itself will not take this lock, but still we want
  * to provide a consistent cpumask to user-space:
  */
@@ -3186,11 +3181,12 @@ static ssize_t
 tracing_cpumask_read(struct file *filp, char __user *ubuf,
 size_t count, loff_t *ppos)
 {
+   struct trace_array *tr = file_inode(filp)->i_private;
int len;
 
mutex_lock(_cpumask_update_lock);
 
-   len = cpumask_scnprintf(mask_str, count, tracing_cpumask);
+   len = cpumask_scnprintf(mask_str, count, tr->tracing_cpumask);
if (count - len < 2) {
count = -EINVAL;
goto out_err;
@@ -3208,7 +3204,7 @@ static ssize_t
 tracing_cpumask_write(struct file *filp, const char __user *ubuf,
  size_t count, loff_t *ppos)
 {
-   struct trace_array *tr = filp->private_data;
+   struct trace_array *tr = file_inode(filp)->i_private;
cpumask_var_t tracing_cpumask_new;
int err, cpu;
 
@@ -3228,12 +3224,12 @@ tracing_cpumask_write(struct file *filp, const char 
__user *ubuf,
 * Increase/decrease the disabled counter if we are
 * about to flip a bit in the cpumask:
 */
-   if (cpumask_test_cpu(cpu, tracing_cpumask) &&
+   if (cpumask_test_cpu(cpu, tr->tracing_cpumask) &&
!cpumask_test_cpu(cpu, tracing_cpumask_new)) {
atomic_inc(_cpu_ptr(tr->trace_buffer.data, 
cpu)->disabled);
ring_buffer_record_disable_cpu(tr->trace_buffer.buffer, 
cpu);
}
-   if (!cpumask_test_cpu(cpu, tracing_cpumask) &&
+   if (!cpumask_test_cpu(cpu, tr->tracing_cpumask) &&
cpumask_test_cpu(cpu, tracing_cpumask_new)) {
atomic_dec(_cpu_ptr(tr->trace_buffer.data, 
cpu)->disabled);
ring_buffer_record_enable_cpu(tr->trace_buffer.buffer, 
cpu);
@@ -3242,7 +3238,7 @@ tracing_cpumask_write(struct file *filp, const char 
__user *ubuf,
arch_spin_unlock(_max_lock);
local_irq_enable();
 
-   cpumask_copy(tracing_cpumask, tracing_cpumask_new);
+   cpumask_copy(tr->tracing_cpumask, tracing_cpumask_new);
 
mutex_unlock(_cpumask_update_lock);
free_cpumask_var(tracing_cpumask_new);
@@ -3256,9 +3252,10 @@ err_unlock:
 }
 
 static const struct file_operations tracing_cpumask_fops = {
-   .open   = tracing_open_generic,
+   .open   = tracing_open_generic_tr,
.read   = tracing_cpumask_read,
.write  = tracing_cpumask_write,
+   .release= tracing_release_generic_tr,
.llseek = generic_file_llseek,
 };
 
@@ -5938,6 +5935,11 @@ static int new_instance_create(const char *name)
if (!tr->name)
goto out_free_tr;
 
+   if (!alloc_cpumask_var(>tracing_cpumask, GFP_KERNEL))
+   goto out_free_tr;
+
+   cpumask_copy(tr->tracing_cpumask, cpu_all_mask);
+
raw_spin_lock_init(>start_lock);
 
tr->current_trace = _trace;
@@ -5969,6 +5971,7 @@ static int new_instance_create(const char *name)
  out_free_tr:
if (tr->trace_buffer.buffer)
ring_buffer_free(tr->trace_buffer.buffer);
+   free_cpumask_var(tr->tracing_cpumask);
kfree(tr->name);
kfree(tr);
 
@@ -6098,6 +6101,9 @@ init_tracer_debugfs(struct trace_array *tr, struct dentry 
*d_tracer)
 {
int cpu;
 
+   trace_create_file("tracing_cpumask", 0644, d_tracer,
+ tr, _cpumask_fops);
+
trace_create_file("trace_options", 0644, d_tracer,
  tr, _iter_fops);
 
@@ -6147,9 +6153,6 @@ static __init int tracer_init_debugfs(void)
 
init_tracer_debugfs(_trace, d_tracer);
 
-   trace_create_file("tracing_cpumask", 0644, d_tracer,
-   _trace, _cpumask_fops);
-
trace_create_file("available_tracers", 0444, d_tracer,
_trace, _traces_fops);
 
@@ -6371,7 +6374,7 @@ __init static int tracer_alloc_buffers(void)
if 

[for-next][PATCH 3/4] x86-32, ftrace: Fix static ftrace when early microcode is enabled

2013-09-05 Thread Steven Rostedt
From: "H. Peter Anvin" 

Early microcode loading runs C code before paging is enabled on 32
bits.  Since ftrace puts a hook into every function, that hook needs
to be safe to execute in the pre-paging environment.  This is
currently true for dynamic ftrace but not for static ftrace.

Static ftrace is obsolescent and assumed to not be
performance-critical, so we can simply test that the stack pointer
falls within the valid range of kernel addresses.

Reported-by: Jan Kiszka 
Tested-by: Jan Kiszka 
Signed-off-by: H. Peter Anvin 
Signed-off-by: Steven Rostedt 
---
 arch/x86/kernel/entry_32.S |3 +++
 1 file changed, 3 insertions(+)

diff --git a/arch/x86/kernel/entry_32.S b/arch/x86/kernel/entry_32.S
index 2cfbc3a..f0dcb0c 100644
--- a/arch/x86/kernel/entry_32.S
+++ b/arch/x86/kernel/entry_32.S
@@ -1176,6 +1176,9 @@ ftrace_restore_flags:
 #else /* ! CONFIG_DYNAMIC_FTRACE */
 
 ENTRY(mcount)
+   cmpl $__PAGE_OFFSET, %esp
+   jb ftrace_stub  /* Paging not enabled yet? */
+
cmpl $0, function_trace_stop
jne  ftrace_stub
 
-- 
1.7.10.4


--
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Please read the FAQ at  http://www.tux.org/lkml/


[for-next][PATCH 2/4] ftrace: Fix a slight race in modifying what function callback gets traced

2013-09-05 Thread Steven Rostedt
From: "Steven Rostedt (Red Hat)" 

There's a slight race when going from a list function to a non list
function. That is, when only one callback is registered to the function
tracer, it gets called directly by the mcount trampoline. But if this
function has filters, it may be called by the wrong functions.

As the list ops callback that handles multiple callbacks that are
registered to ftrace, it also handles what functions they call. While
the transaction is taking place, use the list function always, and
after all the updates are finished (only the functions that should be
traced are being traced), then we can update the trampoline to call
the function directly.

Signed-off-by: Steven Rostedt 
---
 kernel/trace/ftrace.c |   17 -
 1 file changed, 16 insertions(+), 1 deletion(-)

diff --git a/kernel/trace/ftrace.c b/kernel/trace/ftrace.c
index a6d098c..03cf44a 100644
--- a/kernel/trace/ftrace.c
+++ b/kernel/trace/ftrace.c
@@ -1978,12 +1978,27 @@ int __weak ftrace_arch_code_modify_post_process(void)
 
 void ftrace_modify_all_code(int command)
 {
+   int update = command & FTRACE_UPDATE_TRACE_FUNC;
+
+   /*
+* If the ftrace_caller calls a ftrace_ops func directly,
+* we need to make sure that it only traces functions it
+* expects to trace. When doing the switch of functions,
+* we need to update to the ftrace_ops_list_func first
+* before the transition between old and new calls are set,
+* as the ftrace_ops_list_func will check the ops hashes
+* to make sure the ops are having the right functions
+* traced.
+*/
+   if (update)
+   ftrace_update_ftrace_func(ftrace_ops_list_func);
+
if (command & FTRACE_UPDATE_CALLS)
ftrace_replace_code(1);
else if (command & FTRACE_DISABLE_CALLS)
ftrace_replace_code(0);
 
-   if (command & FTRACE_UPDATE_TRACE_FUNC)
+   if (update && ftrace_trace_function != ftrace_ops_list_func)
ftrace_update_ftrace_func(ftrace_trace_function);
 
if (command & FTRACE_START_FUNC_RET)
-- 
1.7.10.4


--
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Please read the FAQ at  http://www.tux.org/lkml/


[for-next][PATCH 0/4] tracing: Updated changes for 3.12

2013-09-05 Thread Steven Rostedt
I'm holding off on the rcu unsafe changes with perf and function tracing.
We'll still get bug splats with unsafe rcu usage, but we need to work
out a better solution than I was going to push for 3.12. It's too late
to get things smooth, thus we need to wait till 3.13 to get something
that is decent.

For now, root needs to be careful in how they trace functions with perf.

  git://git.kernel.org/pub/scm/linux/kernel/git/rostedt/linux-trace.git
for-next

Head SHA1: a0a5a0561f63905fe94c49bc567615829f42ce1e


Alexander Z Lam (1):
  tracing: Make tracing_cpumask available for all instances

H. Peter Anvin (1):
  x86-32, ftrace: Fix static ftrace when early microcode is enabled

Steven Rostedt (Red Hat) (2):
  ftrace: Fix a slight race in modifying what function callback gets traced
  ftrace/rcu: Do not trace debug_lockdep_rcu_enabled()


 arch/x86/kernel/entry_32.S |3 +++
 kernel/rcupdate.c  |2 +-
 kernel/trace/ftrace.c  |   17 -
 kernel/trace/trace.c   |   37 -
 kernel/trace/trace.h   |1 +
 5 files changed, 41 insertions(+), 19 deletions(-)
--
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Please read the FAQ at  http://www.tux.org/lkml/


Re: ftrace 'failed to modify' bug when loading reiserfs.ko

2013-09-05 Thread Dave Jones
On Thu, Sep 05, 2013 at 09:44:55PM -0400, Steven Rostedt wrote:
 > On Thu, 5 Sep 2013 21:34:55 -0400
 > Dave Jones  wrote:
 > 
 > > On Thu, Sep 05, 2013 at 09:28:34PM -0400, Steven Rostedt wrote:
 > >  > On Thu, 5 Sep 2013 21:19:24 -0400
 > >  > Dave Jones  wrote:
 > >  > 
 > >  > > For whatever dumb reason, when running 'make install' on a Fedora 
 > > system,
 > >  > > os-prober tries to figure out what filesystems are needed by loading 
 > > filesystems,
 > >  > > and seeing what sticks..  Today it blew up spectacularly when it got 
 > > to
 > >  > > loading reiserfs..  System wedged entirely afterwards.
 > >  > 
 > >  > Could it be that the reiserfs module was compiled differently than the
 > >  > running kernel?
 > >  
 > > o... it was probably installing the just-built version over the same 
 > > '3.11+'
 > > modules tree that was running.  This has never been a problem before 
 > > though..
 > > 
 > 
 > Did you change a config option, or update your gcc?

Yeah, changed CONFIG_DEBUG_KOBJECT, which rebuilt the world.

Dave
--
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Please read the FAQ at  http://www.tux.org/lkml/


Re: ftrace 'failed to modify' bug when loading reiserfs.ko

2013-09-05 Thread Steven Rostedt
On Thu, 5 Sep 2013 21:34:55 -0400
Dave Jones  wrote:

> On Thu, Sep 05, 2013 at 09:28:34PM -0400, Steven Rostedt wrote:
>  > On Thu, 5 Sep 2013 21:19:24 -0400
>  > Dave Jones  wrote:
>  > 
>  > > For whatever dumb reason, when running 'make install' on a Fedora system,
>  > > os-prober tries to figure out what filesystems are needed by loading 
> filesystems,
>  > > and seeing what sticks..  Today it blew up spectacularly when it got to
>  > > loading reiserfs..  System wedged entirely afterwards.
>  > 
>  > Could it be that the reiserfs module was compiled differently than the
>  > running kernel?
>  
> o... it was probably installing the just-built version over the same 
> '3.11+'
> modules tree that was running.  This has never been a problem before though..
> 

Did you change a config option, or update your gcc?

Although, it doesn't really explain why the location would have
something that it doesn't expect. As the mcount/fentry table is created
in the module itself.

 -- Steve
--
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Please read the FAQ at  http://www.tux.org/lkml/


[GIT PULL] security subsystem changes for 3.12

2013-09-05 Thread James Morris
Nothing major for this kernel, just maintenance updates.

Please pull.



The following changes since commit 2e032852245b3dcfe5461d7353e34eb6da095ccf:

  Merge branch 'for-linus' of git://git.linaro.org/people/rmk/linux-arm 
(2013-09-05 18:07:32 -0700)

are available in the git repository at:

  git://git.kernel.org/pub/scm/linux/kernel/git/jmorris/linux-security.git next

Casey Schaufler (1):
  Smack: network label match fix

James Morris (2):
  Merge branch 'linus-master'; commit 'v3.11-rc2' into ra-next
  Merge branch 'smack-for-3.12' of 
git://git.gitorious.org/smack-next/kernel into ra-next

John Johansen (14):
  apparmor: enable users to query whether apparmor is enabled
  apparmor: add a features/policy dir to interface
  apparmor: provide base for multiple profiles to be replaced at once
  apparmor: convert profile lists to RCU based locking
  apparmor: change how profile replacement update is done
  apparmor: update how unconfined is handled
  apparmor: rework namespace free path
  apparmor: make free_profile available outside of policy.c
  apparmor: allow setting any profile into the unconfined state
  apparmor: add interface files for profiles and namespaces
  apparmor: add an optional profile attachment string for profiles
  apparmor: add the profile introspection file to interface
  apparmor: export set of capabilities supported by the apparmor module
  apparmor: add the ability to report a sha1 hash of loaded policy

Rafal Krypa (1):
  Smack: parse multiple rules per write to load2, up to PAGE_SIZE-1 bytes

Tetsuo Handa (2):
  xattr: Constify ->name member of "struct xattr".
  apparmor: remove minimum size check for vmalloc()

Tomasz Stanislawski (2):
  security: smack: fix memleak in smk_write_rules_list()
  security: smack: add a hash table to quicken smk_find_entry()

 fs/ocfs2/xattr.h  |2 +-
 include/linux/security.h  |8 +-
 include/linux/xattr.h |2 +-
 include/uapi/linux/reiserfs_xattr.h   |2 +-
 security/apparmor/Kconfig |   12 +
 security/apparmor/Makefile|7 +-
 security/apparmor/apparmorfs.c|  636 -
 security/apparmor/capability.c|5 +
 security/apparmor/context.c   |   16 +-
 security/apparmor/crypto.c|   97 +
 security/apparmor/domain.c|   24 +-
 security/apparmor/include/apparmor.h  |6 +
 security/apparmor/include/apparmorfs.h|   40 ++
 security/apparmor/include/audit.h |1 -
 security/apparmor/include/capability.h|4 +
 security/apparmor/include/context.h   |   15 +-
 security/apparmor/include/crypto.h|   36 ++
 security/apparmor/include/policy.h|  218 +++---
 security/apparmor/include/policy_unpack.h |   21 +-
 security/apparmor/lib.c   |5 -
 security/apparmor/lsm.c   |   22 +-
 security/apparmor/policy.c|  609 
 security/apparmor/policy_unpack.c |  135 +--
 security/apparmor/procattr.c  |2 +-
 security/capability.c |2 +-
 security/integrity/evm/evm_main.c |2 +-
 security/security.c   |8 +-
 security/selinux/hooks.c  |   17 +-
 security/smack/smack.h|   13 +-
 security/smack/smack_access.c |   29 ++-
 security/smack/smack_lsm.c|   51 ++-
 security/smack/smackfs.c  |  184 -
 32 files changed, 1675 insertions(+), 556 deletions(-)
 create mode 100644 security/apparmor/crypto.c
 create mode 100644 security/apparmor/include/crypto.h
--
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Please read the FAQ at  http://www.tux.org/lkml/


Re: [PATCH 10/11] x86, mem-hotplug: Support initialize page tables from low to high.

2013-09-05 Thread Tang Chen

Hi Wanpeng,

Thank you for reviewing. See below, please.

On 09/05/2013 09:30 PM, Wanpeng Li wrote:
..

+#ifdef CONFIG_MOVABLE_NODE
+   unsigned long kernel_end;
+
+   if (movablenode_enable_srat&&
+   memblock.current_order == MEMBLOCK_ORDER_LOW_TO_HIGH) {


I think memblock.current_order == MEMBLOCK_ORDER_LOW_TO_HIGH is always
true if config MOVABLE_NODE and movablenode_enable_srat == true if PATCH
11/11 is applied.


memblock.current_order == MEMBLOCK_ORDER_LOW_TO_HIGH is true here if 
MOVABLE_NODE
is configured, and it will be reset after SRAT is parsed. But 
movablenode_enable_srat
could only be true when users specify movablenode boot option in the 
kernel commandline.


Please refer to patch 9/11.




+   kernel_end = round_up(__pa_symbol(_end), PMD_SIZE);
+
+   memory_map_from_low(kernel_end, end);
+   memory_map_from_low(ISA_END_ADDRESS, kernel_end);


Why split ISA_END_ADDRESS ~ end?


The first 5 pages for the page tables are from brk, please refer to 
alloc_low_pages().
They are able to map about 2MB memory. And this 2MB memory will be used 
to store

page tables for the next mapped pages.

Here, we split [ISA_END_ADDRESS, end) into [ISA_END_ADDRESS, _end) and 
[_end, end),
and map [_end, end) first. This is because memory in [ISA_END_ADDRESS, 
_end) may be
used, then we have not enough memory for the next coming page tables. We 
should map

[_end, end) first because this memory is highly likely unused.




..


I think the variables sorted by address is:
ISA_END_ADDRESS ->  _end ->  real_end ->  end


Yes.




+   memory_map_from_high(ISA_END_ADDRESS, real_end);


If this is overlap with work done between #ifdef CONFIG_MOVABLE_NODE and
#endif?



I don't think so. Seeing from my code, if work between #ifdef 
CONFIG_MOVABLE_NODE and

#endif is done, it will goto out, right ?

Thanks.

--
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Please read the FAQ at  http://www.tux.org/lkml/


Re: [PATCH RESEND v3 0/7] Enable Drivers for Intel MIC X100 Coprocessors.

2013-09-05 Thread Joe Perches
Whitespace neatening...

Multiline statement argument alignment.
Argument wrapping.
Use kmalloc_array instead of kmalloc.

---

 drivers/misc/mic/card/mic_virtio.c  | 17 ---
 drivers/misc/mic/card/mic_x100.c|  4 +-
 drivers/misc/mic/host/mic_debugfs.c | 91 ++---
 drivers/misc/mic/host/mic_fops.c|  6 +--
 drivers/misc/mic/host/mic_intr.c| 37 ---
 drivers/misc/mic/host/mic_smpt.c| 17 +++
 drivers/misc/mic/host/mic_sysfs.c   | 18 
 drivers/misc/mic/host/mic_virtio.c  | 34 ++
 drivers/misc/mic/host/mic_x100.c| 29 ++--
 9 files changed, 122 insertions(+), 131 deletions(-)

diff --git a/drivers/misc/mic/card/mic_virtio.c 
b/drivers/misc/mic/card/mic_virtio.c
index 38275c1..6071aec 100644
--- a/drivers/misc/mic/card/mic_virtio.c
+++ b/drivers/misc/mic/card/mic_virtio.c
@@ -103,7 +103,7 @@ static void mic_finalize_features(struct virtio_device 
*vdev)
for (i = 0; i < bits; i++) {
if (test_bit(i, vdev->features))
iowrite8(ioread8(_features[i / 8]) | (1 << (i % 8)),
-   _features[i / 8]);
+_features[i / 8]);
}
 }
 
@@ -197,10 +197,9 @@ static void mic_notify(struct virtqueue *vq)
 static void mic_del_vq(struct virtqueue *vq, int n)
 {
struct mic_vdev *mvdev = to_micvdev(vq->vdev);
-   struct vring *vr = (struct vring *) (vq + 1);
+   struct vring *vr = (struct vring *)(vq + 1);
 
-   free_pages((unsigned long) vr->used,
-   get_order(mvdev->used_size[n]));
+   free_pages((unsigned long) vr->used, get_order(mvdev->used_size[n]));
vring_del_virtqueue(vq);
mic_card_unmap(mvdev->mdev, mvdev->vr[n]);
mvdev->vr[n] = NULL;
@@ -274,8 +273,8 @@ static struct virtqueue *mic_find_vq(struct virtio_device 
*vdev,
/* Allocate and reassign used ring now */
mvdev->used_size[index] = PAGE_ALIGN(sizeof(__u16) * 3 +
sizeof(struct vring_used_elem) * config.num);
-   used = (void *) __get_free_pages(GFP_KERNEL | __GFP_ZERO,
-   get_order(mvdev->used_size[index]));
+   used = (void *)__get_free_pages(GFP_KERNEL | __GFP_ZERO,
+   get_order(mvdev->used_size[index]));
if (!used) {
err = -ENOMEM;
dev_err(mic_dev(mvdev), "%s %d err %d\n",
@@ -291,7 +290,7 @@ static struct virtqueue *mic_find_vq(struct virtio_device 
*vdev,
 * vring_new_virtqueue() would ensure that
 *  (>vring == (struct vring *) (>vq + 1));
 */
-   vr = (struct vring *) (vq + 1);
+   vr = (struct vring *)(vq + 1);
vr->used = used;
 
vq->priv = mvdev;
@@ -544,7 +543,7 @@ static void mic_scan_devices(struct mic_driver *mdrv, bool 
remove)
if (dev) {
if (remove)
iowrite8(MIC_VIRTIO_PARAM_DEV_REMOVE,
-   >config_change);
+>config_change);
put_device(dev);
mic_handle_config_change(d, i, mdrv);
ret = mic_remove_device(d, i, mdrv);
@@ -559,7 +558,7 @@ static void mic_scan_devices(struct mic_driver *mdrv, bool 
remove)
 
/* new device */
dev_dbg(mdrv->dev, "%s %d Adding new virtio device %p\n",
-   __func__, __LINE__, d);
+   __func__, __LINE__, d);
if (!remove)
mic_add_device(d, i, mdrv);
}
diff --git a/drivers/misc/mic/card/mic_x100.c b/drivers/misc/mic/card/mic_x100.c
index 7cb3469..e54dfcb 100644
--- a/drivers/misc/mic/card/mic_x100.c
+++ b/drivers/misc/mic/card/mic_x100.c
@@ -66,8 +66,8 @@ void mic_send_intr(struct mic_device *mdev, int doorbell)
/* Ensure that the interrupt is ordered w.r.t previous stores. */
wmb();
mic_mmio_write(mw, MIC_X100_SBOX_SDBIC0_DBREQ_BIT,
-   MIC_X100_SBOX_BASE_ADDRESS +
-   (MIC_X100_SBOX_SDBIC0 + (4 * doorbell)));
+  MIC_X100_SBOX_BASE_ADDRESS +
+  (MIC_X100_SBOX_SDBIC0 + (4 * doorbell)));
 }
 
 /**
diff --git a/drivers/misc/mic/host/mic_debugfs.c 
b/drivers/misc/mic/host/mic_debugfs.c
index e22fb7b..002faa5 100644
--- a/drivers/misc/mic/host/mic_debugfs.c
+++ b/drivers/misc/mic/host/mic_debugfs.c
@@ -103,7 +103,7 @@ static int mic_smpt_show(struct seq_file *s, void *pos)
unsigned long flags;
 
seq_printf(s, "MIC %-2d |%-10s| %-14s %-10s\n",
-   mdev->id, "SMPT entry", "SW DMA addr", "RefCount");
+  mdev->id, "SMPT entry", "SW DMA addr", "RefCount");
seq_puts(s, "\n");
 
if (mdev->smpt) {
@@ -111,8 +111,8 @@ static int mic_smpt_show(struct 

Re: ftrace 'failed to modify' bug when loading reiserfs.ko

2013-09-05 Thread Dave Jones
On Thu, Sep 05, 2013 at 09:28:34PM -0400, Steven Rostedt wrote:
 > On Thu, 5 Sep 2013 21:19:24 -0400
 > Dave Jones  wrote:
 > 
 > > For whatever dumb reason, when running 'make install' on a Fedora system,
 > > os-prober tries to figure out what filesystems are needed by loading 
 > > filesystems,
 > > and seeing what sticks..  Today it blew up spectacularly when it got to
 > > loading reiserfs..  System wedged entirely afterwards.
 > 
 > Could it be that the reiserfs module was compiled differently than the
 > running kernel?
 
o... it was probably installing the just-built version over the same '3.11+'
modules tree that was running.  This has never been a problem before though..

Dave
 
--
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Please read the FAQ at  http://www.tux.org/lkml/


Re: ftrace 'failed to modify' bug when loading reiserfs.ko

2013-09-05 Thread Steven Rostedt
On Thu, 5 Sep 2013 21:19:24 -0400
Dave Jones  wrote:

> For whatever dumb reason, when running 'make install' on a Fedora system,
> os-prober tries to figure out what filesystems are needed by loading 
> filesystems,
> and seeing what sticks..  Today it blew up spectacularly when it got to
> loading reiserfs..  System wedged entirely afterwards.

Could it be that the reiserfs module was compiled differently than the
running kernel?

> 
>   Dave
> 
> [ cut here ]
> WARNING: CPU: 2 PID: 30566 at kernel/trace/ftrace.c:1694 
> ftrace_bug+0x25d/0x270()
> Modules linked in: reiserfs(+) snd_hda_codec_hdmi snd_hda_codec_realtek 
> snd_hda_intel snd_hda_codec snd_hwdep snd_seq snd_seq_device snd_pcm 
> snd_page_alloc xfs snd_timer libcrc32c snd e1000e ptp usb_debug pps_core 
> pcspkr soundcore
> CPU: 2 PID: 30566 Comm: modprobe Not tainted 3.11.0+ #57 
>  81a2809d 88008de19c30 817171e9 
>  88008de19c68 81053dad 0010 a02738b0
>  8802419e3518  8801ab16e100 88008de19c78
> Call Trace:
>  [] dump_stack+0x54/0x74
>  [] warn_slowpath_common+0x7d/0xa0
>  [] warn_slowpath_null+0x1a/0x20
>  [] ftrace_bug+0x25d/0x270
>  [] ftrace_process_locs+0x308/0x630
>  [] ftrace_module_notify_enter+0x3c/0x40
>  [] notifier_call_chain+0x66/0x150
>  [] __blocking_notifier_call_chain+0x67/0xc0
>  [] blocking_notifier_call_chain+0x16/0x20
>  [] load_module+0x1f7d/0x2680
>  [] ? store_uevent+0x40/0x40
>  [] ? reiserfs_xattr_register_handlers+0xf9f/0xf9f 
> [reiserfs]
>  [] ? reiserfs_xattr_register_handlers+0xf9f/0xf9f 
> [reiserfs]
>  [] SyS_finit_module+0x86/0xb0
>  [] tracesys+0xdd/0xe2
> ---[ end trace 956db59f53237fe4 ]---
> ftrace failed to modify [] 
> reiserfs_init_bitmap_cache+0x0/0x5750 [reiserfs]
>  actual: 14:00:00:00:00

Hmm, where it expected to see a call to mcount, instead is sees the
instruction:

 0x14 00 00 00 00


Can you do an objdump of that same binary, and show me what's located
at: reiserfs_init_bitmap_cache+0x0

-- Steve

> [ cut here ]
> WARNING: CPU: 2 PID: 30566 at arch/x86/mm/pageattr.c:677 
> __cpa_process_fault+0x91/0xa0()
> CPA: called for zero pte. vaddr = a0249000 cpa->vaddr = 
> a0249000
> Modules linked in: reiserfs(+) snd_hda_codec_hdmi snd_hda_codec_realtek 
> snd_hda_intel snd_hda_codec snd_hwdep snd_seq snd_seq_device snd_pcm 
> snd_page_alloc xfs snd_timer libcrc32c snd e1000e ptp usb_debug pps_core 
> pcspkr soundcore
> CPU: 2 PID: 30566 Comm: modprobe Tainted: GW3.11.0+ #57 
>  81a0ba44 88008de19b40 817171e9 88008de19b88
>  88008de19b78 81053dad 88008de19d08 fff2
>  a0249000 880238646248 88008de19d08 88008de19bd8
> Call Trace:
>  [] dump_stack+0x54/0x74
>  [] warn_slowpath_common+0x7d/0xa0
>  [] ? reiserfs_xattr_register_handlers+0x9f9f/0x29f9f 
> [reiserfs]
>  [] warn_slowpath_fmt+0x4c/0x50
>  [] ? reiserfs_xattr_register_handlers+0x8f9f/0xf9f 
> [reiserfs]
>  [] ? reiserfs_xattr_register_handlers+0x9f9f/0x29f9f 
> [reiserfs]
>  [] ? reiserfs_xattr_register_handlers+0x9f9f/0x29f9f 
> [reiserfs]
>  [] __cpa_process_fault+0x91/0xa0
>  [] __change_page_attr_set_clr+0x392/0xab0
>  [] ? 0xa023efff
>  [] change_page_attr_set_clr+0x123/0x460
>  [] ? 0xa023efff
>  [] set_memory_ro+0x2f/0x40
>  [] ? reiserfs_xattr_register_handlers+0x9f9f/0x29f9f 
> [reiserfs]
>  [] set_section_ro_nx+0x3a/0x71
>  [] load_module+0x1f9e/0x2680
>  [] ? store_uevent+0x40/0x40
>  [] ? reiserfs_xattr_register_handlers+0xf9f/0xf9f 
> [reiserfs]
>  [] ? reiserfs_xattr_register_handlers+0xf9f/0xf9f 
> [reiserfs]
>  [] SyS_finit_module+0x86/0xb0
>  [] tracesys+0xdd/0xe2
> ---[ end trace 956db59f53237fe5 ]---
> Oops: 0003 [#1] SMP 
> Modules linked in: reiserfs snd_hda_codec_hdmi snd_hda_codec_realtek 
> snd_hda_intel snd_hda_codec snd_hwdep snd_seq snd_seq_device snd_pcm 
> snd_page_alloc xfs snd_timer libcrc32c snd e1000e ptp usb_debug pps_core 
> pcspkr soundcore
> CPU: 1 PID: 30571 Comm: modprobe Tainted: GW3.11.0+ #57 
> task: 8801238a ti: 8801ab314000 task.ti: 8801ab314000
> RIP: 0010:[]  [] load_module+0x161b/0x2680
> RSP: 0018:8801ab315dc0  EFLAGS: 00010202
> RAX: a009c000 RBX: 8801ab315ef8 RCX: a00c2000
> RDX: a00c2000 RSI: 0055 RDI: a00c3f98
> RBP: 8801ab315ee8 R08: a009fa68 R09: a009c000
> R10: a00c3f98 R11: 0002 R12: a02d2838
> R13: 0001 R14:  R15: a02d2820
> FS:  7f6f48b51740() GS:88024580() knlGS:
> CS:  0010 DS:  ES:  CR0: 80050033
> CR2: a00c2000 CR3: 0002211e9000 CR4: 001407e0
> DR0:  DR1:  DR2: 
> DR3:  DR6: fffe0ff0 DR7: 0400
> 

[PATCH V2] arm: LLVMLinux: use static inline in ARM ftrace.h

2013-09-05 Thread behanw
From: Behan Webster 

With compilers which follow the C99 standard (like modern versions of gcc and
clang), "extern inline" does the wrong thing (emits code for an externally
linkable version of the inline function). In this case using static inline
and removing the NULL version of return_address in return_address.c does
the right thing.

Signed-off-by: Behan Webster 
---
 arch/arm/include/asm/ftrace.h| 2 +-
 arch/arm/kernel/return_address.c | 5 -
 2 files changed, 1 insertion(+), 6 deletions(-)

diff --git a/arch/arm/include/asm/ftrace.h b/arch/arm/include/asm/ftrace.h
index f89515a..2bb8cac 100644
--- a/arch/arm/include/asm/ftrace.h
+++ b/arch/arm/include/asm/ftrace.h
@@ -45,7 +45,7 @@ void *return_address(unsigned int);
 
 #else
 
-extern inline void *return_address(unsigned int level)
+static inline void *return_address(unsigned int level)
 {
return NULL;
 }
diff --git a/arch/arm/kernel/return_address.c b/arch/arm/kernel/return_address.c
index fafedd8..f6aa84d 100644
--- a/arch/arm/kernel/return_address.c
+++ b/arch/arm/kernel/return_address.c
@@ -63,11 +63,6 @@ void *return_address(unsigned int level)
 #warning "TODO: return_address should use unwind tables"
 #endif
 
-void *return_address(unsigned int level)
-{
-   return NULL;
-}
-
 #endif /* if defined(CONFIG_FRAME_POINTER) && !defined(CONFIG_ARM_UNWIND) / 
else */
 
 EXPORT_SYMBOL_GPL(return_address);
-- 
1.8.1.2

--
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Please read the FAQ at  http://www.tux.org/lkml/


ftrace 'failed to modify' bug when loading reiserfs.ko

2013-09-05 Thread Dave Jones
For whatever dumb reason, when running 'make install' on a Fedora system,
os-prober tries to figure out what filesystems are needed by loading 
filesystems,
and seeing what sticks..  Today it blew up spectacularly when it got to
loading reiserfs..  System wedged entirely afterwards.

Dave

[ cut here ]
WARNING: CPU: 2 PID: 30566 at kernel/trace/ftrace.c:1694 
ftrace_bug+0x25d/0x270()
Modules linked in: reiserfs(+) snd_hda_codec_hdmi snd_hda_codec_realtek 
snd_hda_intel snd_hda_codec snd_hwdep snd_seq snd_seq_device snd_pcm 
snd_page_alloc xfs snd_timer libcrc32c snd e1000e ptp usb_debug pps_core pcspkr 
soundcore
CPU: 2 PID: 30566 Comm: modprobe Not tainted 3.11.0+ #57 
 81a2809d 88008de19c30 817171e9 
 88008de19c68 81053dad 0010 a02738b0
 8802419e3518  8801ab16e100 88008de19c78
Call Trace:
 [] dump_stack+0x54/0x74
 [] warn_slowpath_common+0x7d/0xa0
 [] warn_slowpath_null+0x1a/0x20
 [] ftrace_bug+0x25d/0x270
 [] ftrace_process_locs+0x308/0x630
 [] ftrace_module_notify_enter+0x3c/0x40
 [] notifier_call_chain+0x66/0x150
 [] __blocking_notifier_call_chain+0x67/0xc0
 [] blocking_notifier_call_chain+0x16/0x20
 [] load_module+0x1f7d/0x2680
 [] ? store_uevent+0x40/0x40
 [] ? reiserfs_xattr_register_handlers+0xf9f/0xf9f [reiserfs]
 [] ? reiserfs_xattr_register_handlers+0xf9f/0xf9f [reiserfs]
 [] SyS_finit_module+0x86/0xb0
 [] tracesys+0xdd/0xe2
---[ end trace 956db59f53237fe4 ]---
ftrace failed to modify [] 
reiserfs_init_bitmap_cache+0x0/0x5750 [reiserfs]
 actual: 14:00:00:00:00
[ cut here ]
WARNING: CPU: 2 PID: 30566 at arch/x86/mm/pageattr.c:677 
__cpa_process_fault+0x91/0xa0()
CPA: called for zero pte. vaddr = a0249000 cpa->vaddr = a0249000
Modules linked in: reiserfs(+) snd_hda_codec_hdmi snd_hda_codec_realtek 
snd_hda_intel snd_hda_codec snd_hwdep snd_seq snd_seq_device snd_pcm 
snd_page_alloc xfs snd_timer libcrc32c snd e1000e ptp usb_debug pps_core pcspkr 
soundcore
CPU: 2 PID: 30566 Comm: modprobe Tainted: GW3.11.0+ #57 
 81a0ba44 88008de19b40 817171e9 88008de19b88
 88008de19b78 81053dad 88008de19d08 fff2
 a0249000 880238646248 88008de19d08 88008de19bd8
Call Trace:
 [] dump_stack+0x54/0x74
 [] warn_slowpath_common+0x7d/0xa0
 [] ? reiserfs_xattr_register_handlers+0x9f9f/0x29f9f 
[reiserfs]
 [] warn_slowpath_fmt+0x4c/0x50
 [] ? reiserfs_xattr_register_handlers+0x8f9f/0xf9f [reiserfs]
 [] ? reiserfs_xattr_register_handlers+0x9f9f/0x29f9f 
[reiserfs]
 [] ? reiserfs_xattr_register_handlers+0x9f9f/0x29f9f 
[reiserfs]
 [] __cpa_process_fault+0x91/0xa0
 [] __change_page_attr_set_clr+0x392/0xab0
 [] ? 0xa023efff
 [] change_page_attr_set_clr+0x123/0x460
 [] ? 0xa023efff
 [] set_memory_ro+0x2f/0x40
 [] ? reiserfs_xattr_register_handlers+0x9f9f/0x29f9f 
[reiserfs]
 [] set_section_ro_nx+0x3a/0x71
 [] load_module+0x1f9e/0x2680
 [] ? store_uevent+0x40/0x40
 [] ? reiserfs_xattr_register_handlers+0xf9f/0xf9f [reiserfs]
 [] ? reiserfs_xattr_register_handlers+0xf9f/0xf9f [reiserfs]
 [] SyS_finit_module+0x86/0xb0
 [] tracesys+0xdd/0xe2
---[ end trace 956db59f53237fe5 ]---
Oops: 0003 [#1] SMP 
Modules linked in: reiserfs snd_hda_codec_hdmi snd_hda_codec_realtek 
snd_hda_intel snd_hda_codec snd_hwdep snd_seq snd_seq_device snd_pcm 
snd_page_alloc xfs snd_timer libcrc32c snd e1000e ptp usb_debug pps_core pcspkr 
soundcore
CPU: 1 PID: 30571 Comm: modprobe Tainted: GW3.11.0+ #57 
task: 8801238a ti: 8801ab314000 task.ti: 8801ab314000
RIP: 0010:[]  [] load_module+0x161b/0x2680
RSP: 0018:8801ab315dc0  EFLAGS: 00010202
RAX: a009c000 RBX: 8801ab315ef8 RCX: a00c2000
RDX: a00c2000 RSI: 0055 RDI: a00c3f98
RBP: 8801ab315ee8 R08: a009fa68 R09: a009c000
R10: a00c3f98 R11: 0002 R12: a02d2838
R13: 0001 R14:  R15: a02d2820
FS:  7f6f48b51740() GS:88024580() knlGS:
CS:  0010 DS:  ES:  CR0: 80050033
CR2: a00c2000 CR3: 0002211e9000 CR4: 001407e0
DR0:  DR1:  DR2: 
DR3:  DR6: fffe0ff0 DR7: 0400
Stack:
 003fa26b 8801238a 8801ab315e48 8801238a
 a009c000 a02d2a58 a02d2838 3a80
 a009c000 a00c2000 003a94a10969 a00c3f98
Call Trace:
 [] ? xfs_setattr_nonsize+0x240/0x5d0 [xfs]
 [] ? xfs_inumbers+0x248/0x420 [xfs]
 [] ? copy_module_from_fd.isra.48+0x12a/0x190
 [] SyS_finit_module+0x86/0xb0
 [] tracesys+0xdd/0xe2
Code: 48 83 7a 38 00 78 6a 48 8b 30 44 89 ea 4c 89 d7 48 8d 14 52 4c 89 4c 24 
40 41 83 c5 01 48 8d 14 d1 48 89 4c 24 48 4c 89 54 24 58 <48> 89 32 48 8b 70 08 
48 89 72 

Re: [PATCH v4 0/3] cleanup of gpio_pcf857x.c

2013-09-05 Thread Kuninori Morimoto

Hi

> This patch series
> - removes the irq_demux_work
> - Uses devm_request_threaded_irq
> - Call the user handler iff gpio_to_irq is done.
> 
> v1 --> v2
> Split v1 to 3 patches
> v2 --> v3
>   Remove the unnecessary dts patches.
> v3 --> v4
>   Remove gpio->irq (in patch 2)
> 
> Note: these patches were made after applying [1].
> [1] - [PATCH v5] gpio: pcf857x: Add OF support - 
> https://lkml.org/lkml/2013/8/27/70
> 
> George Cherian (3):
>   gpio: pcf857x: change to devm_request_threaded_irq
>   gpio: pcf857x: remove the irq_demux_work and gpio->irq
>   gpio: pcf857x: call the gpio user handler iff gpio_to_irq is done
> 
>  drivers/gpio/gpio-pcf857x.c | 53 
> ++---
>  1 file changed, 26 insertions(+), 27 deletions(-)

For all patches

Acked-by: Kuninori Morimoto 

Best regards
---
Kuninori Morimoto
--
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Please read the FAQ at  http://www.tux.org/lkml/


Re: kobject: delayed kobject release: help find buggy drivers

2013-09-05 Thread Dave Jones
On Thu, Sep 05, 2013 at 10:44:34PM +0100, Russell King - ARM Linux wrote:
 > On Thu, Sep 05, 2013 at 05:26:06PM -0400, Dave Jones wrote:
 > > On Thu, Sep 05, 2013 at 05:11:13PM -0400, Dave Jones wrote:
 > >  >  > Trying without serial console next..
 > >  > 
 > >  > rebuilt with all serial turned off.
 > >  > 
 > >  > no luck, then it oopses somewhere else. I'm suspecting something isn't
 > >  > right with that debug patch, as the next trace is also in 
 > > kobject_release
 > 
 > You're right about that - I had assumed that it was necessary for all
 > kobjects to be 'added' before they're released, but that is not so.
 > Mea culpa.  They just need to be initialised - the problem with putting
 > a kmalloc into kobject_init() is that if it fails, we have no way to
 > report that failure...
 > 
 > Updated patch attached... though it sounds like you got it working
 > anyway.
 > 
 > > I managed to get to userspace on one boot, and got this.
 > > 
 > > kobject 88023d93f518 has been corrupted (magic 0x6b6b6b6b).  Please
 > > enable kobject debugging for full debug.
 > 
 > Okay, so this is definitely a case that someone has kfree'd the kobject
 > without waiting for the ->release function to be called.
 > 
 > > That looks like SLAB_POISON. Incompatibility between the two options ?
 > > 
 > > For some reason, even though I have DEBUG_KOBJECT on, I didn't get
 > > extra messages output.
 > 
 > Hmm.
 > 
 > ifeq ($(CONFIG_DEBUG_KOBJECT),y)
 > CFLAGS_kobject.o += -DDEBUG
 > CFLAGS_kobject_uevent.o += -DDEBUG
 > endif
 > 
 > should enable the pr_debug()'s in lib/kobject.c... which should at least
 > appear in the dmesg log.  Being debug level, of course, they won't appear
 > during normal kernel boot unless 'debug' is passed on the kernel command
 > line.
 > 
 > It seems to work for me - but... produces rather a lot of debug messages,
 > so you may also wish to ensure that you have LOG_BUF_SHIFT set to
 > something large.

As discussed on irc, with this updated patch it hangs *really* early in boot. 

With earlyprintk=vga I can see some of the trace..

kobject_init
firmware_map_add_entry
firmware_map_add_early
e820_reserve_resources
setup_arch
start_kernel
x86_64_start_reservations
x86_64_start_kernel

 kmem_cache_alloc_trace

Dave

--
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Please read the FAQ at  http://www.tux.org/lkml/


Re: [PATCH] ARM: LLVMLinux: Change "extern inline" to "gnu_inline" in ARM ftrace.h

2013-09-05 Thread Behan Webster
Sorry for the delay. A mistake in my email filters ate all your replies. 
Doh!


On 08/14/13 18:45, Russell King - ARM Linux wrote:

On Wed, Aug 14, 2013 at 05:37:41PM -0400, beh...@converseincode.com wrote:

-extern inline void *return_address(unsigned int level)
+extern inline __attribute__((gnu_inline))
+void *return_address(unsigned int level)

Well, that should be static inline, not extern inline in any case.  Does
clang work if that's static inline?


Actually, neither gcc nor clang work with it merely changed to "static 
inline".


Which is why we left it with the explicit GNU89 meaning of "extern 
inline" which is gnu_inline. C99 changed the meaning of what "extern 
inline" means. One of the major issues we've had with the clang kernel 
port is that clang defaults to gnu99 (which is mostly just C99) while 
until recently gcc defaulted to gnu89.


For recent versions of gcc:

http://gcc.gnu.org/onlinedocs/gcc/Standards.html

"The default, if no C language dialect options are given, is -std=gnu90; 
this will change to -std=gnu99 or -std=gnu11 in some future release when 
the C99 or C11 support is complete. Some features that are part of the 
C99 standard are accepted as extensions in C90 mode, and some features 
that are part of the C11 standard are accepted as extensions in C90 and 
C99 modes."


However, having said all that, it seems if I remove the corresponding 
NULL definition for return_address in arch/arm/kernel/return_address.c, 
I can make it "static inline" and it seems to work for both gcc and clang.


I'll send a new patch. :)

Incidentally the LLVMLinux project tests all the project's patches with 
both gcc and clang. The idea is to make it work with both compilers 
after all.


Behan

--
Behan Webster
beh...@converseincode.com

--
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Please read the FAQ at  http://www.tux.org/lkml/


[PATCH] net: stmmac: fix bad merge conflict resolution

2013-09-05 Thread Olof Johansson
Merge commit 06c54055bebf919249aa1eb68312887c3cfe77b4 did a bad conflict
resolution accidentally leaving out a closing brace. Add it back.

Signed-off-by: Olof Johansson 
---

This breaks a handful of defconfigs on ARM, so it'd be good to see it
applied pretty quickly. Thanks!


-Olof

 drivers/net/ethernet/stmicro/stmmac/stmmac_platform.c |1 +
 1 file changed, 1 insertion(+)

diff --git a/drivers/net/ethernet/stmicro/stmmac/stmmac_platform.c 
b/drivers/net/ethernet/stmicro/stmmac/stmmac_platform.c
index 7a00720..51c9069 100644
--- a/drivers/net/ethernet/stmicro/stmmac/stmmac_platform.c
+++ b/drivers/net/ethernet/stmicro/stmmac/stmmac_platform.c
@@ -87,6 +87,7 @@ static int stmmac_probe_config_dt(struct platform_device 
*pdev,
if (plat->force_thresh_dma_mode) {
plat->force_sf_dma_mode = 0;
pr_warn("force_sf_dma_mode is ignored if force_thresh_dma_mode 
is set.");
+   }
 
return 0;
 }
-- 
1.7.10.4

--
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Please read the FAQ at  http://www.tux.org/lkml/


Re: [git pull] drm tree for 3.12-rc1

2013-09-05 Thread Linus Torvalds
On Thu, Sep 5, 2013 at 4:19 PM, Linus Torvalds
 wrote:
>
> So I've decided I'm going to try to bisect this after all. I've done
> enough pulls for today anyway, I guess. Let's see if I can bisect it
> by just trying to boot many times each try.

Ok, it's not the recent drm pull at all. I can't find a good kernel in
the bunch - they all fail eventually.

It may have been going in for as long as I've had this Haswell
machine, and I was just lucky (and not rebooting a lot until in the
merge window - and 4/5 boots work fine).

It may also be user-space and have come in with the mesa update I got
through yum yesterday. So there might be multiple reasons why I saw it
today after the drm pull for the first time.

The black screen - when it happens - happens after the fedora logo has
flashed, and gdm is supposed to start up. I tried reproducing it by
logging out and back in again (to restart X), but that doesn't do it.
Maybe timing-related with boot or just demand-loading of binaries the
first time, whatever.. Or mayby it's something special that gdm does
at startup?

  Linus
--
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Please read the FAQ at  http://www.tux.org/lkml/


Re: [PATCH] Avoid useless inodes and dentries reclamation

2013-09-05 Thread Dave Chinner
On Tue, Sep 03, 2013 at 11:38:27AM -0700, Tim Chen wrote:
> On Sat, 2013-08-31 at 19:00 +1000, Dave Chinner wrote:
> > On Fri, Aug 30, 2013 at 09:21:34AM -0700, Tim Chen wrote:
> > >
> > > 
> > > Signed-off-by: Tim Chen 
> > > ---
> > > diff --git a/fs/super.c b/fs/super.c
> > > index 73d0952..4df1fab 100644
> > > --- a/fs/super.c
> > > +++ b/fs/super.c
> > > @@ -112,9 +112,6 @@ static unsigned long super_cache_count(struct 
> > > shrinker *shrink,
> > >  
> > >   sb = container_of(shrink, struct super_block, s_shrink);
> > >  
> > > - if (!grab_super_passive(sb))
> > > - return 0;
> > > -
> > 
> > I think the function needs a comment explaining why we aren't
> > grabbing the sb here, otherwise people are going to read the code
> > and ask why it's different to the scanning callout.
> > 
> > >   if (sb->s_op && sb->s_op->nr_cached_objects)
> > >   total_objects = sb->s_op->nr_cached_objects(sb,
> > >sc->nid);
> > 
> 
> Yes, those comments are needed.
> I also need to remove the corresponding
>   drop_super(sb);
> 
> So probably something like:
> 
> ---
> diff --git a/fs/super.c b/fs/super.c
> index 73d0952..7b5a6e5 100644
> --- a/fs/super.c
> +++ b/fs/super.c
> @@ -112,9 +112,14 @@ static unsigned long super_cache_count(struct shrinker 
> *shrink,
>  
>   sb = container_of(shrink, struct super_block, s_shrink);
>  
> - if (!grab_super_passive(sb))
> - return 0;
> -
> + /*
> +  * Don't call grab_super_passive as it is a potential 
> +  * scalability bottleneck. The counts could get updated 
> +  * between super_cache_count and super_cache_scan anyway.
> +  * Call to super_cache_count with shrinker_rwsem held
> +  * ensures the safety of call to list_lru_count_node() and 
> +  * s_op->nr_cached_objects().
> +  */

Well, that's not true of s_op->nr_cached_objects() right now. It's
only going to be true if the shrinker deregistration is moved before
->kill_sb()

> > Let me have a bit more of a think about this - the solution may
> > simply be unregistering the shrinker before we call ->kill_sb() so
> > the shrinker can't get called while we are tearing down the fs.
> > First, though, I need to go back and remind myself of why I put that
> > after ->kill_sb() in the first place.  
> 
> Seems very reasonable as I haven't found a case where the shrinker 
> is touched in ->kill_sb() yet. It looks like unregistering the
> shrinker before ->kill_sb() should be okay.

Having looked at it some more, I have to agree. I think the original
reason for unregistering the shrinker there was to avoid problems
with locking - the shrinker callouts are run holding the
shrinker_rwsem in read mode, and then we lock the sb->s_umount in
read mount. In the unmount case, we currently take the sb->s_umount
lock in write mode (thereby locking out the shrinker) but we drop it
before deregistering the shrinker and so there is no inverted
locking order.

The thing is, grab_super_passive does a try-lock on the sb->s_umount
now, and so if we are in the unmount process, it won't ever block.
That means what used to be a deadlock and races we were avoiding
by using grab_super_passive() is now:

shrinkerumount

down_read(shrinker_rwsem)
down_write(sb->s_umount)
shrinker_unregister
  down_write(shrinker_rwsem)

grab_super_passive(sb)
  down_read_trylock(sb->s_umount)




up_read(shrinker_rwsem)
  
  
  up_write(shrinker_rwsem)
->kill_sb()


And so it appears to be safe to deregister the shrinker before
->kill_sb().

Can you do this as two patches? The first moves the shrinker
deregistration to before ->kill_sb(), then second is the above patch
that drops the grab-super_passive() calls from the ->count_objects
function?

Cheers,

Dave.
-- 
Dave Chinner
da...@fromorbit.com
--
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Please read the FAQ at  http://www.tux.org/lkml/


[GIT PULL] NTB bug fixes and features for v3.12

2013-09-05 Thread Jon Mason
Hi Linus,
Below are a number of NTB bug fixes and features for v3.12.  Please
consider pulling them.

Thanks,
Jon

The following changes since commit 6e4664525b1db28f8c4e1130957f70a94c19213e:

  Linux 3.11 (2013-09-02 13:46:10 -0700)

are available in the git repository at:

  git://github.com/jonmason/ntb.git tags/ntb-3.12

for you to fetch changes up to 73f47cadfe29f98c48d7fbac4dd79d3e7979ee0b:

  ntb: clean up unnecessary MSI/MSI-X capability find (2013-09-05 11:08:01 
-0700)


NTB driver bug fixes to address issues in NTB-RP enablement, spad,
debugfs, and USD/DSD identification.  Add a workaround on Xeon NTB
devices for b2bdoorbell errata.  Also, add new NTB driver features to
support 32bit x86, DMA engine support, and NTB-RP support.  Finally, a
few clean-ups and update to MAINTAINERS for the NTB git tree and wiki
location.


Jon Mason (16):
  NTB: Add Error Handling in ntb_device_setup
  NTB: Correct Number of Scratch Pad Registers
  NTB: Correct USD/DSD Identification
  NTB: Correct debugfs to work with more than 1 NTB Device
  NTB: Xeon Errata Workaround
  NTB: BWD Link Recovery
  NTB: Update Device IDs
  NTB: Enable 32bit Support
  NTB: Use DMA Engine to Transmit and Receive
  NTB: Rename Variables for NTB-RP
  NTB: NTB-RP support
  NTB: Remove References of non-B2B BWD HW
  NTB: Remove unused variable
  NTB: Comment Fix
  NTB: Update Version
  MAINTAINERS: Add Website and Git Tree for NTB

Yijing Wang (1):
  ntb: clean up unnecessary MSI/MSI-X capability find

 MAINTAINERS |2 +
 drivers/ntb/Kconfig |2 +-
 drivers/ntb/ntb_hw.c|  501 ++-
 drivers/ntb/ntb_hw.h|  105 +++--
 drivers/ntb/ntb_regs.h  |   50 +++--
 drivers/ntb/ntb_transport.c |  422 
 6 files changed, 855 insertions(+), 227 deletions(-)
--
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Please read the FAQ at  http://www.tux.org/lkml/


Re: [PATCH] perf mem: add priv level filtering support

2013-09-05 Thread Sukadev Bhattiprolu
Arnaldo Carvalho de Melo [a...@redhat.com] wrote:
| Em Wed, Aug 28, 2013 at 03:38:28PM +0200, Stephane Eranian escreveu:
| > On Wed, Aug 28, 2013 at 3:27 PM, Arnaldo Carvalho de Melo  
wrote:
| > > So perhaps we should change both to (and add this to 'report' as well):
| > >
| > > -U, --hide_kernel_symbols   hide kernel symbols
| > > -K, --hide_user_symbols hide user symbols
| > >
| > 
| > Well, I don't know what perf top does here but I don't want to hide
| > the samples. I simply don't want to collect them (do not appear
| > in the perf.data file). If that's what is happening in perf top, then
| > I'll be glad to use the same options.
| 
| Indeed, its for different purposes, 'perf top' when used with one of
| those options will still collect samples for all priv levels and will
| just toggle a flag to not zap the ones asked not to show when decaying
| the samples.
| 
| When the user presses 'U' or 'K' on the UI, the flags gets toggled and
| samples start being considered/zapped.
| 
| But my worry here is about consistency accross tools for the single
| letter options, so perhaps if you could use:
| 
|  -U   collect only user level samples
|  -K   collect only kernel level samples
| 
| I think it would stay consistent and clear, what do you think?

But, we use lower case qualifiers :u, :k to select user or kernel mode
monitoring. 

perf record -e cycles   # both kernel and user
perf record -e cycles:u ... # just user

(tools/perf/util/parse-events.c:

struct event_modifier {
int eu;
int ek;
int eh;
int eH;
int eG;
int precise;
int exclude_GH;
};

Will we ever need hypervisor and host monitoring for 'perf mem' ?

Or can we add a '-e' option to 'perf mem' so user can specify the events
and qualfiers same as they do for 'perf record' ?

perf mem -e mem-loads:u record .

(this would of course expose the mem-loads and mem-stores events to
the user)

Sukadev

--
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Please read the FAQ at  http://www.tux.org/lkml/


Re: [PATCH] usb: gadget LLVMLinux: Removing the use of VLAIS from the gadget driver

2013-09-05 Thread Behan Webster

Replying to my patch email just in case it was missed before.

Thanks,

Behan

On 08/01/13 21:35, beh...@converseincode.com wrote:

From: Behan Webster 

The use of variable length arrays in structs (VLAIS) in the Linux Kernel code
precludes the use of compilers which don't implement VLAIS (for instance the
Clang compiler). This patch removes the use of VLAIS in the gadget driver.

Signed-off-by: Mark Charlebois 
Signed-off-by: Behan Webster 
---
  drivers/usb/gadget/f_fs.c | 128 +++---
  1 file changed, 76 insertions(+), 52 deletions(-)

diff --git a/drivers/usb/gadget/f_fs.c b/drivers/usb/gadget/f_fs.c
index f394f29..4b872c4 100644
--- a/drivers/usb/gadget/f_fs.c
+++ b/drivers/usb/gadget/f_fs.c
@@ -30,7 +30,6 @@
  
  #define FUNCTIONFS_MAGIC	0xa647361 /* Chosen by a honest dice roll ;) */
  
-

  /* Debugging /
  
  #ifdef VERBOSE_DEBUG

@@ -214,6 +213,8 @@ struct ffs_data {
/* ids in stringtabs are set in functionfs_bind() */
const void  *raw_strings;
struct usb_gadget_strings   **stringtabs;
+   struct usb_gadget_strings   *stringtab;
+   struct usb_string   *strings;
  
  	/*

 * File system's super block, write once when file system is
@@ -263,7 +264,10 @@ struct ffs_function {
  
  	struct ffs_ep			*eps;

u8  eps_revmap[16];
+   struct usb_descriptor_header**fs_descs;
+   struct usb_descriptor_header**hs_descs;
short   *interfaces_nums;
+   char*raw_descs;
  
  	struct usb_function		function;

  };
@@ -1345,6 +1349,8 @@ static void ffs_data_clear(struct ffs_data *ffs)
kfree(ffs->raw_descs);
kfree(ffs->raw_strings);
kfree(ffs->stringtabs);
+   kfree(ffs->stringtab);
+   kfree(ffs->strings);
  }
  
  static void ffs_data_reset(struct ffs_data *ffs)

@@ -1357,6 +1363,8 @@ static void ffs_data_reset(struct ffs_data *ffs)
ffs->raw_descs = NULL;
ffs->raw_strings = NULL;
ffs->stringtabs = NULL;
+   ffs->stringtab = NULL;
+   ffs->strings = NULL;
  
  	ffs->raw_descs_length = 0;

ffs->raw_fs_descs_length = 0;
@@ -1528,12 +1536,10 @@ static void ffs_func_free(struct ffs_function *func)
ffs_data_put(func->ffs);
  
  	kfree(func->eps);

-   /*
-* eps and interfaces_nums are allocated in the same chunk so
-* only one free is required.  Descriptors are also allocated
-* in the same chunk.
-*/
-
+   kfree(func->fs_descs);
+   kfree(func->hs_descs);
+   kfree(func->interfaces_nums);
+   kfree(func->raw_descs);
kfree(func);
  }
  
@@ -1907,33 +1913,35 @@ static int __ffs_data_got_strings(struct ffs_data *ffs,

return 0;
}
  
-	/* Allocate everything in one chunk so there's less maintenance. */

{
-   struct {
-   struct usb_gadget_strings *stringtabs[lang_count + 1];
-   struct usb_gadget_strings stringtab[lang_count];
-   struct usb_string strings[lang_count*(needed_count+1)];
-   } *d;
unsigned i = 0;
-
-   d = kmalloc(sizeof *d, GFP_KERNEL);
-   if (unlikely(!d)) {
+   usb_gadget_strings **stringtabs = NULL;
+   usb_gadget_strings *stringtab = NULL;
+   usb_string *strings = NULL;
+
+   stringtabs = kmalloc(sizeof(*stringtabs)*(lang_count + 1),
+   GFP_KERNEL);
+   stringtab = kmalloc(sizeof(*stringtab)*(lang_count),
+   GFP_KERNEL);
+   strings = kmalloc(sizeof(*strings)
+   * (lang_count * (needed_count + 1)), GFP_KERNEL);
+   if (unlikely(!stringtabs || !stringtab || !strings)) {
+   kfree(stringtabs);
+   kfree(stringtab);
+   kfree(strings);
kfree(_data);
return -ENOMEM;
}
-
-   stringtabs = d->stringtabs;
-   t = d->stringtab;
+   b = stringtabs;
+   t = stringtab;
i = lang_count;
do {
-   *stringtabs++ = t++;
+   *b++ = t++;
} while (--i);
-   *stringtabs = NULL;
+   *b = NULL;
  
-		stringtabs = d->stringtabs;

-   t = d->stringtab;
-   s = d->strings;
-   strings = s;
+   t = stringtab;
+   s = strings;
}
  
  	/* For each language */

@@ -1991,12 +1999,16 @@ static int __ffs_data_got_strings(struct ffs_data *ffs,
  
  	/* Done! */

ffs->stringtabs = stringtabs;
+   ffs->stringtab = stringtab;
+   ffs->strings = 

[PATCHv2 1/2] ARM: msm: Add support for MSM8974 Dragonboard

2013-09-05 Thread Rohit Vaswani
This patch adds basic board support for MSM8974 Dragonboard
which belongs to the Snapdragon 800 family.
For now, just support a basic machine with device tree.

Signed-off-by: Rohit Vaswani 
---
 arch/arm/boot/dts/Makefile|  3 ++-
 arch/arm/boot/dts/msm8974-db.dts  |  7 +++
 arch/arm/boot/dts/msm8974.dtsi| 30 ++
 arch/arm/mach-msm/Kconfig | 20 ++--
 arch/arm/mach-msm/Makefile|  1 +
 arch/arm/mach-msm/board-dt-8974.c | 23 +++
 6 files changed, 81 insertions(+), 3 deletions(-)
 create mode 100644 arch/arm/boot/dts/msm8974-db.dts
 create mode 100644 arch/arm/boot/dts/msm8974.dtsi
 create mode 100644 arch/arm/mach-msm/board-dt-8974.c

diff --git a/arch/arm/boot/dts/Makefile b/arch/arm/boot/dts/Makefile
index 69193be..95ace01 100644
--- a/arch/arm/boot/dts/Makefile
+++ b/arch/arm/boot/dts/Makefile
@@ -103,7 +103,8 @@ dtb-$(CONFIG_ARCH_KIRKWOOD) += kirkwood-cloudbox.dtb \
kirkwood-openblocks_a6.dtb
 dtb-$(CONFIG_ARCH_MARCO) += marco-evb.dtb
 dtb-$(CONFIG_ARCH_MSM) += msm8660-surf.dtb \
-   msm8960-cdp.dtb
+   msm8960-cdp.dtb \
+   msm8974-db.dtb
 dtb-$(CONFIG_ARCH_MVEBU) += armada-370-db.dtb \
armada-370-mirabox.dtb \
armada-370-netgear-rn102.dtb \
diff --git a/arch/arm/boot/dts/msm8974-db.dts b/arch/arm/boot/dts/msm8974-db.dts
new file mode 100644
index 000..74106a8
--- /dev/null
+++ b/arch/arm/boot/dts/msm8974-db.dts
@@ -0,0 +1,7 @@
+/include/ "msm8974.dtsi"
+
+/ {
+   model = "Qualcomm MSM8974 Dragonboard";
+   compatible = "qcom,msm8974-db", "qcom,msm8974";
+};
+
diff --git a/arch/arm/boot/dts/msm8974.dtsi b/arch/arm/boot/dts/msm8974.dtsi
new file mode 100644
index 000..aa3bb2f
--- /dev/null
+++ b/arch/arm/boot/dts/msm8974.dtsi
@@ -0,0 +1,30 @@
+/dts-v1/;
+
+/include/ "skeleton.dtsi"
+
+/ {
+   model = "Qualcomm MSM8974";
+   compatible = "qcom,msm8974";
+   interrupt-parent = <>;
+
+   soc: soc { };
+};
+
+ {
+   intc: interrupt-controller@f900 {
+   compatible = "qcom,msm-qgic2";
+   interrupt-controller;
+   #interrupt-cells = <3>;
+   reg = < 0xf900 0x1000 >,
+ < 0xf9002000 0x1000 >;
+   };
+
+   timer {
+   compatible = "arm,armv7-timer";
+   interrupts = <1 2 0xf08>,
+<1 3 0xf08>,
+<1 4 0xf08>,
+<1 1 0xf08>;
+   clock-frequency = <1920>;
+   };
+};
diff --git a/arch/arm/mach-msm/Kconfig b/arch/arm/mach-msm/Kconfig
index 905efc8..499e8fe 100644
--- a/arch/arm/mach-msm/Kconfig
+++ b/arch/arm/mach-msm/Kconfig
@@ -1,12 +1,12 @@
 if ARCH_MSM
 
 comment "Qualcomm MSM SoC Type"
-   depends on (ARCH_MSM8X60 || ARCH_MSM8960)
+   depends on ARCH_MSM_DT
 
 choice
prompt "Qualcomm MSM SoC Type"
default ARCH_MSM7X00A
-   depends on !(ARCH_MSM8X60 || ARCH_MSM8960)
+   depends on !ARCH_MSM_DT
 
 config ARCH_MSM7X00A
bool "MSM7x00A / MSM7x01A"
@@ -60,6 +60,19 @@ config ARCH_MSM8960
select MSM_SCM if SMP
select USE_OF
 
+config ARCH_MSM8974
+   bool "MSM8974"
+   select ARM_GIC
+   select CPU_V7
+   select HAVE_ARM_ARCH_TIMER
+   select HAVE_SMP
+   select MSM_SCM if SMP
+   select USE_OF
+
+config ARCH_MSM_DT
+   def_bool y
+   depends on (ARCH_MSM8X60 || ARCH_MSM8960 || ARCH_MSM8974)
+
 config MSM_HAS_DEBUG_UART_HS
bool
 
@@ -68,6 +81,7 @@ config MSM_SOC_REV_A
 
 config  ARCH_MSM_ARM11
bool
+
 config  ARCH_MSM_SCORPION
bool
 
@@ -75,6 +89,7 @@ config  MSM_VIC
bool
 
 menu "Qualcomm MSM Board Type"
+   depends on !ARCH_MSM_DT
 
 config MACH_HALIBUT
depends on ARCH_MSM
@@ -122,6 +137,7 @@ config MSM_SMD
 
 config MSM_GPIOMUX
bool
+   depends on !ARCH_MSM_DT
help
  Support for MSM V1 TLMM GPIOMUX architecture.
 
diff --git a/arch/arm/mach-msm/Makefile b/arch/arm/mach-msm/Makefile
index d872634..28e5c21 100644
--- a/arch/arm/mach-msm/Makefile
+++ b/arch/arm/mach-msm/Makefile
@@ -28,5 +28,6 @@ obj-$(CONFIG_ARCH_MSM7X30) += board-msm7x30.o 
devices-msm7x30.o
 obj-$(CONFIG_ARCH_QSD8X50) += board-qsd8x50.o devices-qsd8x50.o
 obj-$(CONFIG_ARCH_MSM8X60) += board-dt-8660.o
 obj-$(CONFIG_ARCH_MSM8960) += board-dt-8960.o
+obj-$(CONFIG_ARCH_MSM8974) += board-dt-8974.o
 obj-$(CONFIG_MSM_GPIOMUX) += gpiomux.o
 obj-$(CONFIG_ARCH_QSD8X50) += gpiomux-8x50.o
diff --git a/arch/arm/mach-msm/board-dt-8974.c 
b/arch/arm/mach-msm/board-dt-8974.c
new file mode 100644
index 000..697623e
--- /dev/null
+++ b/arch/arm/mach-msm/board-dt-8974.c
@@ -0,0 +1,23 @@
+/* Copyright (c) 2013, The Linux Foundation. All rights reserved.
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 and
+ * only version 2 as published by the 

[PATCH RESEND v3 3/7] Intel MIC Host Driver, card OS state management.

2013-09-05 Thread Sudeep Dutt
This patch enables the following features:
a) Boots and shuts down the card via sysfs entries.
b) Allocates and maps a device page for communication with the
   card driver and updates the device page address via scratchpad
   registers.
c) Provides sysfs entries for shutdown status, kernel command line,
   ramdisk and log buffer information.

Co-author: Dasaratharaman Chandramouli 
Signed-off-by: Ashutosh Dixit 
Signed-off-by: Caz Yokoyama 
Signed-off-by: Dasaratharaman Chandramouli 

Signed-off-by: Harshavardhan R Kharche 
Signed-off-by: Nikhil Rao 
Signed-off-by: Sudeep Dutt 
Acked-by: Yaozu (Eddie) Dong 
Reviewed-by: Peter P Waskiewicz Jr 
---
 Documentation/ABI/testing/sysfs-class-mic.txt | 113 
 drivers/misc/mic/common/mic_device.h  |   7 +
 drivers/misc/mic/host/Makefile|   2 +
 drivers/misc/mic/host/mic_boot.c  | 184 +
 drivers/misc/mic/host/mic_debugfs.c   | 355 +
 drivers/misc/mic/host/mic_device.h|  60 +
 drivers/misc/mic/host/mic_main.c  | 129 -
 drivers/misc/mic/host/mic_sysfs.c | 369 ++
 drivers/misc/mic/host/mic_x100.c  | 251 ++
 drivers/misc/mic/host/mic_x100.h  |  12 +
 include/uapi/linux/Kbuild |   1 +
 include/uapi/linux/mic_common.h   |  74 ++
 12 files changed, 1553 insertions(+), 4 deletions(-)
 create mode 100644 drivers/misc/mic/host/mic_boot.c
 create mode 100644 drivers/misc/mic/host/mic_debugfs.c
 create mode 100644 include/uapi/linux/mic_common.h

diff --git a/Documentation/ABI/testing/sysfs-class-mic.txt 
b/Documentation/ABI/testing/sysfs-class-mic.txt
index 09eb3c6..82cdad3 100644
--- a/Documentation/ABI/testing/sysfs-class-mic.txt
+++ b/Documentation/ABI/testing/sysfs-class-mic.txt
@@ -32,3 +32,116 @@ Contact:Sudeep Dutt 
 Description:
Provides information about the silicon stepping for an Intel
MIC device. For example - "A0" or "B0"
+
+What:  /sys/class/mic/mic(x)/state
+Date:  August 2013
+KernelVersion: 3.11
+Contact:   Sudeep Dutt 
+Description:
+   When read, this entry provides the current state of an Intel
+   MIC device in the context of the card OS. Possible values that
+   will be read are:
+   "offline" - The MIC device is ready to boot the card OS.
+   "online" - The MIC device has initiated booting a card OS.
+   "shutting_down" - The card OS is shutting down.
+   "reset_failed" - The MIC device has failed to reset.
+
+   When written, this sysfs entry triggers different state change
+   operations depending upon the current state of the card OS.
+   Acceptable values are:
+   "boot" - Boot the card OS image specified by the combination
+of firmware, ramdisk, cmdline and bootmode
+   sysfs entries.
+   "reset" - Initiates device reset.
+   "shutdown" - Initiates card OS shutdown.
+
+What:  /sys/class/mic/mic(x)/shutdown_status
+Date:  August 2013
+KernelVersion: 3.11
+Contact:   Sudeep Dutt 
+Description:
+   An Intel MIC device runs a Linux OS during its operation. This
+   OS can shutdown because of various reasons. When read, this
+   entry provides the status on why the card OS was shutdown.
+   Possible values are:
+   "nop" -  shutdown status is not applicable, when the card OS is
+   "online"
+   "crashed" - Shutdown because of a HW or SW crash.
+   "halted" - Shutdown because of a halt command.
+   "poweroff" - Shutdown because of a poweroff command.
+   "restart" - Shutdown because of a restart command.
+
+What:  /sys/class/mic/mic(x)/cmdline
+Date:  August 2013
+KernelVersion: 3.11
+Contact:   Sudeep Dutt 
+Description:
+   An Intel MIC device runs a Linux OS during its operation. Before
+   booting this card OS, it is possible to pass kernel command line
+   options to configure various features in it, similar to
+   self-bootable machines. When read, this entry provides
+   information about the current kernel command line options set to
+   boot the card OS. This entry can be written to change the
+   existing kernel command line options. Typically, the user would
+   want to read the current command line options, append new ones
+   or modify existing ones and then write the whole kernel command
+   line back to this entry.
+
+What:  /sys/class/mic/mic(x)/firmware
+Date:  August 2013
+KernelVersion: 3.11
+Contact:   Sudeep Dutt 
+Description:
+   When 

Vážení E-mail užívatela;

2013-09-05 Thread WEBMAIL UPDATE 2013


-- 
Vážení E-mail užívateľa;
 
Prekročili ste 23432 boxy nastaviť svoje
Webová služba / Administrátor, a budete mať problémy pri odosielaní a
prijímať e-maily, kým znova overiť. Musíte aktualizovať kliknutím na
odkaz nižšie a vyplňte údaje pre overenie vášho účtu
Prosím,  kliknite na odkaz nižšie alebo skopírovať vložiť do
e-prehliadač pre overenie Schránky.

 http://webmailupdate2034213.jimdo.com/
Pozor!
Ak tak neurobíte, budú mať obmedzený prístup k e-mailu schránky. Ak
sa
nepodarí aktualizovať svoj ​​účet do troch dní od aktualizácie
oznámenia,
bude váš účet natrvalo uzavretá.
S pozdravom,
System Administrator ®
--
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Please read the FAQ at  http://www.tux.org/lkml/


[PATCH RESEND v3 5/7] Intel MIC Host Driver Changes for Virtio Devices.

2013-09-05 Thread Sudeep Dutt
From: Ashutosh Dixit 

This patch introduces the host "Virtio over PCIe" interface for
Intel MIC. It allows creating user space backends on the host and instantiating
virtio devices for them on the Intel MIC card. It uses the existing VRINGH
infrastructure in the kernel to access virtio rings from the host. A character
device per MIC is exposed with IOCTL, mmap and poll callbacks. This allows the
user space backend to:
(a) add/remove a virtio device via a device page.
(b) map (R/O) virtio rings and device page to user space.
(c) poll for availability of data.
(d) copy a descriptor or entire descriptor chain to/from the card.
(e) modify virtio configuration.
(f) handle virtio device reset.
The buffers are copied over using CPU copies for this initial patch
and host initiated MIC DMA support is planned for future patches.
The avail and desc virtio rings are in host memory and the used ring
is in card memory to maximize writes across PCIe for performance.

Co-author: Sudeep Dutt 
Signed-off-by: Ashutosh Dixit 
Signed-off-by: Caz Yokoyama 
Signed-off-by: Dasaratharaman Chandramouli 

Signed-off-by: Nikhil Rao 
Signed-off-by: Harshavardhan R Kharche 
Signed-off-by: Sudeep Dutt 
Acked-by: Yaozu (Eddie) Dong 
Reviewed-by: Peter P Waskiewicz Jr 
---
 drivers/misc/mic/Kconfig |   1 +
 drivers/misc/mic/common/mic_device.h |   7 +
 drivers/misc/mic/host/Makefile   |   2 +
 drivers/misc/mic/host/mic_boot.c |   3 +-
 drivers/misc/mic/host/mic_debugfs.c  | 140 +++
 drivers/misc/mic/host/mic_device.h   |   5 +
 drivers/misc/mic/host/mic_fops.c | 221 +++
 drivers/misc/mic/host/mic_fops.h |  32 ++
 drivers/misc/mic/host/mic_main.c |  26 ++
 drivers/misc/mic/host/mic_virtio.c   | 703 +++
 drivers/misc/mic/host/mic_virtio.h   | 138 +++
 include/uapi/linux/Kbuild|   1 +
 include/uapi/linux/mic_common.h  | 166 -
 include/uapi/linux/mic_ioctl.h   |  74 
 14 files changed, 1517 insertions(+), 2 deletions(-)
 create mode 100644 drivers/misc/mic/host/mic_fops.c
 create mode 100644 drivers/misc/mic/host/mic_fops.h
 create mode 100644 drivers/misc/mic/host/mic_virtio.c
 create mode 100644 drivers/misc/mic/host/mic_virtio.h
 create mode 100644 include/uapi/linux/mic_ioctl.h

diff --git a/drivers/misc/mic/Kconfig b/drivers/misc/mic/Kconfig
index 279a2e6..01f1a4a 100644
--- a/drivers/misc/mic/Kconfig
+++ b/drivers/misc/mic/Kconfig
@@ -3,6 +3,7 @@ comment "Intel MIC Host Driver"
 config INTEL_MIC_HOST
tristate "Intel MIC Host Driver"
depends on 64BIT && PCI
+   select VHOST_RING
default N
help
  This enables Host Driver support for the Intel Many Integrated
diff --git a/drivers/misc/mic/common/mic_device.h 
b/drivers/misc/mic/common/mic_device.h
index 6440e9d..01eb74f 100644
--- a/drivers/misc/mic/common/mic_device.h
+++ b/drivers/misc/mic/common/mic_device.h
@@ -41,4 +41,11 @@ struct mic_mw {
 #define MIC_DPLO_SPAD 14
 #define MIC_DPHI_SPAD 15
 
+/*
+ * These values are supposed to be in the config_change field of the
+ * device page when the host sends a config change interrupt to the card.
+ */
+#define MIC_VIRTIO_PARAM_DEV_REMOVE 0x1
+#define MIC_VIRTIO_PARAM_CONFIG_CHANGED 0x2
+
 #endif
diff --git a/drivers/misc/mic/host/Makefile b/drivers/misc/mic/host/Makefile
index a375dd3..c2197f9 100644
--- a/drivers/misc/mic/host/Makefile
+++ b/drivers/misc/mic/host/Makefile
@@ -10,3 +10,5 @@ mic_host-objs += mic_smpt.o
 mic_host-objs += mic_intr.o
 mic_host-objs += mic_boot.o
 mic_host-objs += mic_debugfs.o
+mic_host-objs += mic_fops.o
+mic_host-objs += mic_virtio.o
diff --git a/drivers/misc/mic/host/mic_boot.c b/drivers/misc/mic/host/mic_boot.c
index 936fc58..fd9ff6d 100644
--- a/drivers/misc/mic/host/mic_boot.c
+++ b/drivers/misc/mic/host/mic_boot.c
@@ -20,12 +20,12 @@
  */
 #include 
 #include 
-#include 
 
 #include 
 #include "../common/mic_device.h"
 #include "mic_device.h"
 #include "mic_smpt.h"
+#include "mic_virtio.h"
 
 /**
  * mic_reset - Reset the MIC device.
@@ -117,6 +117,7 @@ void mic_stop(struct mic_device *mdev, bool force)
 {
mutex_lock(>mic_mutex);
if (MIC_OFFLINE != mdev->state || force) {
+   mic_virtio_reset_devices(mdev);
mic_bootparam_init(mdev);
mic_reset(mdev);
if (MIC_RESET_FAILED == mdev->state)
diff --git a/drivers/misc/mic/host/mic_debugfs.c 
b/drivers/misc/mic/host/mic_debugfs.c
index 78541d4..e22fb7b 100644
--- a/drivers/misc/mic/host/mic_debugfs.c
+++ b/drivers/misc/mic/host/mic_debugfs.c
@@ -26,6 +26,7 @@
 #include "../common/mic_device.h"
 #include "mic_device.h"
 #include "mic_smpt.h"
+#include "mic_virtio.h"
 
 /* Debugfs parent dir */
 static struct dentry *mic_dbg;
@@ -193,7 +194,13 @@ static const struct file_operations post_code_ops = {
 static int mic_dp_show(struct seq_file *s, void *pos)
 {
struct mic_device *mdev = s->private;
+   struct mic_device_desc *d;

[PATCH RESEND v3 6/7] Intel MIC Card Driver Changes for Virtio Devices.

2013-09-05 Thread Sudeep Dutt
From: Ashutosh Dixit 

This patch introduces the card "Virtio over PCIe" interface for
Intel MIC. It allows virtio drivers on the card to communicate with their
user space backends on the host via a device page. Ring 3 apps on the host
can add, remove and configure virtio devices. A thin MIC specific
virtio_config_ops is implemented which is borrowed heavily from previous
similar implementations in lguest and s390 @
drivers/lguest/lguest_device.c
drivers/s390/kvm/kvm_virtio.c

Co-author: Sudeep Dutt 
Signed-off-by: Ashutosh Dixit 
Signed-off-by: Caz Yokoyama 
Signed-off-by: Dasaratharaman Chandramouli 

Signed-off-by: Nikhil Rao 
Signed-off-by: Harshavardhan R Kharche 
Signed-off-by: Sudeep Dutt 
Acked-by: Yaozu (Eddie) Dong 
Reviewed-by: Peter P Waskiewicz Jr 
---
 drivers/misc/mic/Kconfig   |   1 +
 drivers/misc/mic/card/Makefile |   1 +
 drivers/misc/mic/card/mic_device.c |   7 +
 drivers/misc/mic/card/mic_virtio.c | 631 +
 drivers/misc/mic/card/mic_virtio.h |  77 +
 5 files changed, 717 insertions(+)
 create mode 100644 drivers/misc/mic/card/mic_virtio.c
 create mode 100644 drivers/misc/mic/card/mic_virtio.h

diff --git a/drivers/misc/mic/Kconfig b/drivers/misc/mic/Kconfig
index 01f1a4a..d453768 100644
--- a/drivers/misc/mic/Kconfig
+++ b/drivers/misc/mic/Kconfig
@@ -24,6 +24,7 @@ comment "Intel MIC Card Driver"
 config INTEL_MIC_CARD
tristate "Intel MIC Card Driver"
depends on 64BIT
+   select VIRTIO
default N
help
  This enables card driver support for the Intel Many Integrated
diff --git a/drivers/misc/mic/card/Makefile b/drivers/misc/mic/card/Makefile
index 6e9675e..69d58be 100644
--- a/drivers/misc/mic/card/Makefile
+++ b/drivers/misc/mic/card/Makefile
@@ -8,3 +8,4 @@ obj-$(CONFIG_INTEL_MIC_CARD) += mic_card.o
 mic_card-y += mic_x100.o
 mic_card-y += mic_device.o
 mic_card-y += mic_debugfs.o
+mic_card-y += mic_virtio.o
diff --git a/drivers/misc/mic/card/mic_device.c 
b/drivers/misc/mic/card/mic_device.c
index 3c5c302..4125217 100644
--- a/drivers/misc/mic/card/mic_device.c
+++ b/drivers/misc/mic/card/mic_device.c
@@ -32,6 +32,7 @@
 #include 
 #include "../common/mic_device.h"
 #include "mic_device.h"
+#include "mic_virtio.h"
 
 static struct mic_driver *g_drv;
 static struct mic_irq *shutdown_cookie;
@@ -265,10 +266,15 @@ int __init mic_driver_init(struct mic_driver *mdrv)
rc = mic_shutdown_init();
if (rc)
goto irq_uninit;
+   rc = mic_devices_init(mdrv);
+   if (rc)
+   goto shutdown_uninit;
mic_create_card_debug_dir(mdrv);
atomic_notifier_chain_register(_notifier_list, _panic);
 done:
return rc;
+shutdown_uninit:
+   mic_shutdown_uninit();
 irq_uninit:
mic_uninit_irq();
 dp_uninit:
@@ -286,6 +292,7 @@ put:
 void mic_driver_uninit(struct mic_driver *mdrv)
 {
mic_delete_card_debug_dir(mdrv);
+   mic_devices_uninit(mdrv);
/*
 * Inform the host about the shutdown status i.e. poweroff/restart etc.
 * The module cannot be unloaded so the only code path to call
diff --git a/drivers/misc/mic/card/mic_virtio.c 
b/drivers/misc/mic/card/mic_virtio.c
new file mode 100644
index 000..38275c1
--- /dev/null
+++ b/drivers/misc/mic/card/mic_virtio.c
@@ -0,0 +1,631 @@
+/*
+ * Intel MIC Platform Software Stack (MPSS)
+ *
+ * Copyright(c) 2013 Intel Corporation.
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License, version 2, as
+ * published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it will be useful, but
+ * WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ * General Public License for more details.
+ *
+ * The full GNU General Public License is included in this distribution in
+ * the file called "COPYING".
+ *
+ * Disclaimer: The codes contained in these modules may be specific to
+ * the Intel Software Development Platform codenamed: Knights Ferry, and
+ * the Intel product codenamed: Knights Corner, and are not backward
+ * compatible with other Intel products. Additionally, Intel will NOT
+ * support the codes or instruction set in future products.
+ *
+ * Adapted from:
+ *
+ * virtio for kvm on s390
+ *
+ * Copyright IBM Corp. 2008
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License (version 2 only)
+ * as published by the Free Software Foundation.
+ *
+ *Author(s): Christian Borntraeger 
+ *
+ * Intel MIC Card driver.
+ *
+ */
+#include 
+#include 
+#include 
+
+#include "../common/mic_device.h"
+#include "mic_virtio.h"
+
+#define VIRTIO_SUBCODE_64 0x0D00
+
+#define MIC_MAX_VRINGS4
+struct mic_vdev {
+   struct virtio_device vdev;
+   struct mic_device_desc __iomem *desc;
+ 

[PATCH RESEND v3 4/7] Intel MIC Card Driver for X100 family.

2013-09-05 Thread Sudeep Dutt
This patch does the following:
a) Initializes the Intel MIC X100 platform device and driver.
b) Sets up support to handle shutdown requests from the host.
c) Maps the device page after obtaining the device page address
from the scratchpad registers updated by the host.
d) Informs the host upon a card crash by registering a panic notifier.
e) Informs the host upon a poweroff/halt event.

Co-author: Dasaratharaman Chandramouli 
Signed-off-by: Ashutosh Dixit 
Signed-off-by: Caz Yokoyama 
Signed-off-by: Dasaratharaman Chandramouli 

Signed-off-by: Nikhil Rao 
Signed-off-by: Harshavardhan R Kharche 
Signed-off-by: Sudeep Dutt 
Acked-by: Yaozu (Eddie) Dong 
Reviewed-by: Peter P Waskiewicz Jr 
---
 drivers/misc/mic/Kconfig|  18 +++
 drivers/misc/mic/Makefile   |   1 +
 drivers/misc/mic/card/Makefile  |  10 ++
 drivers/misc/mic/card/mic_debugfs.c | 130 
 drivers/misc/mic/card/mic_device.c  | 299 
 drivers/misc/mic/card/mic_device.h  | 133 
 drivers/misc/mic/card/mic_x100.c| 256 ++
 drivers/misc/mic/card/mic_x100.h|  48 ++
 8 files changed, 895 insertions(+)
 create mode 100644 drivers/misc/mic/card/Makefile
 create mode 100644 drivers/misc/mic/card/mic_debugfs.c
 create mode 100644 drivers/misc/mic/card/mic_device.c
 create mode 100644 drivers/misc/mic/card/mic_device.h
 create mode 100644 drivers/misc/mic/card/mic_x100.c
 create mode 100644 drivers/misc/mic/card/mic_x100.h

diff --git a/drivers/misc/mic/Kconfig b/drivers/misc/mic/Kconfig
index aaefd0c..279a2e6 100644
--- a/drivers/misc/mic/Kconfig
+++ b/drivers/misc/mic/Kconfig
@@ -17,3 +17,21 @@ config INTEL_MIC_HOST
  More information about the Intel MIC family as well as the Linux
  OS and tools for MIC to use with this driver are available from
  .
+
+comment "Intel MIC Card Driver"
+
+config INTEL_MIC_CARD
+   tristate "Intel MIC Card Driver"
+   depends on 64BIT
+   default N
+   help
+ This enables card driver support for the Intel Many Integrated
+ Core (MIC) device family. The card driver communicates shutdown/
+ crash events to the host and allows registration/configuration of
+ virtio devices. Intel MIC X100 devices are currently supported.
+
+ If you are building a card kernel for an Intel MIC device then
+ say M (recommended) or Y, else say N. If unsure say N.
+
+ For more information see
+ .
diff --git a/drivers/misc/mic/Makefile b/drivers/misc/mic/Makefile
index 8e72421..05b34d6 100644
--- a/drivers/misc/mic/Makefile
+++ b/drivers/misc/mic/Makefile
@@ -3,3 +3,4 @@
 # Copyright(c) 2013, Intel Corporation.
 #
 obj-$(CONFIG_INTEL_MIC_HOST) += host/
+obj-$(CONFIG_INTEL_MIC_CARD) += card/
diff --git a/drivers/misc/mic/card/Makefile b/drivers/misc/mic/card/Makefile
new file mode 100644
index 000..6e9675e
--- /dev/null
+++ b/drivers/misc/mic/card/Makefile
@@ -0,0 +1,10 @@
+#
+# Makefile - Intel MIC Linux driver.
+# Copyright(c) 2013, Intel Corporation.
+#
+ccflags-y += -DINTEL_MIC_CARD
+
+obj-$(CONFIG_INTEL_MIC_CARD) += mic_card.o
+mic_card-y += mic_x100.o
+mic_card-y += mic_device.o
+mic_card-y += mic_debugfs.o
diff --git a/drivers/misc/mic/card/mic_debugfs.c 
b/drivers/misc/mic/card/mic_debugfs.c
new file mode 100644
index 000..95cf186
--- /dev/null
+++ b/drivers/misc/mic/card/mic_debugfs.c
@@ -0,0 +1,130 @@
+/*
+ * Intel MIC Platform Software Stack (MPSS)
+ *
+ * Copyright(c) 2013 Intel Corporation.
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License, version 2, as
+ * published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it will be useful, but
+ * WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ * General Public License for more details.
+ *
+ * The full GNU General Public License is included in this distribution in
+ * the file called "COPYING".
+ *
+ * Disclaimer: The codes contained in these modules may be specific to
+ * the Intel Software Development Platform codenamed: Knights Ferry, and
+ * the Intel product codenamed: Knights Corner, and are not backward
+ * compatible with other Intel products. Additionally, Intel will NOT
+ * support the codes or instruction set in future products.
+ *
+ * Intel MIC Card driver.
+ *
+ */
+#include 
+#include 
+#include 
+#include 
+#include 
+
+#include "../common/mic_device.h"
+#include "mic_device.h"
+
+/* Debugfs parent dir */
+static struct dentry *mic_dbg;
+
+/**
+ * mic_intr_test - Send interrupts to host.
+ */
+static int mic_intr_test(struct seq_file *s, void *unused)
+{
+   struct mic_driver *mdrv = s->private;
+   struct mic_device *mdev = >mdev;
+
+   

[PATCH RESEND v3 1/7] Intel MIC Host Driver for X100 family.

2013-09-05 Thread Sudeep Dutt
This patch enables the following:
a) Initializes the Intel MIC X100 PCIe devices.
b) Provides sysfs entries for family and stepping information.

Co-author: Dasaratharaman Chandramouli 
Signed-off-by: Ashutosh Dixit 
Signed-off-by: Caz Yokoyama 
Signed-off-by: Dasaratharaman Chandramouli 

Signed-off-by: Harshavardhan R Kharche 
Signed-off-by: Nikhil Rao 
Signed-off-by: Sudeep Dutt 
Acked-by: Yaozu (Eddie) Dong 
Reviewed-by: Peter P Waskiewicz Jr 
---
 Documentation/ABI/testing/sysfs-class-mic.txt |  34 +++
 drivers/misc/Kconfig  |   1 +
 drivers/misc/Makefile |   1 +
 drivers/misc/mic/Kconfig  |  19 ++
 drivers/misc/mic/Makefile |   5 +
 drivers/misc/mic/common/mic_device.h  |  37 +++
 drivers/misc/mic/host/Makefile|   8 +
 drivers/misc/mic/host/mic_device.h| 109 +
 drivers/misc/mic/host/mic_main.c  | 309 ++
 drivers/misc/mic/host/mic_sysfs.c |  97 
 drivers/misc/mic/host/mic_x100.c  |  75 +++
 drivers/misc/mic/host/mic_x100.h  |  47 
 12 files changed, 742 insertions(+)
 create mode 100644 Documentation/ABI/testing/sysfs-class-mic.txt
 create mode 100644 drivers/misc/mic/Kconfig
 create mode 100644 drivers/misc/mic/Makefile
 create mode 100644 drivers/misc/mic/common/mic_device.h
 create mode 100644 drivers/misc/mic/host/Makefile
 create mode 100644 drivers/misc/mic/host/mic_device.h
 create mode 100644 drivers/misc/mic/host/mic_main.c
 create mode 100644 drivers/misc/mic/host/mic_sysfs.c
 create mode 100644 drivers/misc/mic/host/mic_x100.c
 create mode 100644 drivers/misc/mic/host/mic_x100.h

diff --git a/Documentation/ABI/testing/sysfs-class-mic.txt 
b/Documentation/ABI/testing/sysfs-class-mic.txt
new file mode 100644
index 000..09eb3c6
--- /dev/null
+++ b/Documentation/ABI/testing/sysfs-class-mic.txt
@@ -0,0 +1,34 @@
+What:  /sys/class/mic/
+Date:  August 2013
+KernelVersion: 3.11
+Contact:   Sudeep Dutt 
+Description:
+   The mic class directory belongs to Intel MIC devices and
+   provides information per MIC device. An Intel MIC device is a
+   PCIe form factor add-in Coprocessor card based on the Intel Many
+   Integrated Core (MIC) architecture that runs a Linux OS.
+
+What:  /sys/class/mic/mic(x)
+Date:  August 2013
+KernelVersion: 3.11
+Contact:   Sudeep Dutt 
+Description:
+   The directories /sys/class/mic/mic0, /sys/class/mic/mic1 etc.,
+   represent MIC devices (0,1,..etc). Each directory has
+   information specific to that MIC device.
+
+What:  /sys/class/mic/mic(x)/family
+Date:  August 2013
+KernelVersion: 3.11
+Contact:   Sudeep Dutt 
+Description:
+   Provides information about the Coprocessor family for an Intel
+   MIC device. For example - "x100"
+
+What:  /sys/class/mic/mic(x)/stepping
+Date:  August 2013
+KernelVersion: 3.11
+Contact:   Sudeep Dutt 
+Description:
+   Provides information about the silicon stepping for an Intel
+   MIC device. For example - "A0" or "B0"
diff --git a/drivers/misc/Kconfig b/drivers/misc/Kconfig
index 8dacd4c..e760715 100644
--- a/drivers/misc/Kconfig
+++ b/drivers/misc/Kconfig
@@ -537,4 +537,5 @@ source "drivers/misc/carma/Kconfig"
 source "drivers/misc/altera-stapl/Kconfig"
 source "drivers/misc/mei/Kconfig"
 source "drivers/misc/vmw_vmci/Kconfig"
+source "drivers/misc/mic/Kconfig"
 endmenu
diff --git a/drivers/misc/Makefile b/drivers/misc/Makefile
index c235d5b..0b7ea3e 100644
--- a/drivers/misc/Makefile
+++ b/drivers/misc/Makefile
@@ -53,3 +53,4 @@ obj-$(CONFIG_INTEL_MEI)   += mei/
 obj-$(CONFIG_VMWARE_VMCI)  += vmw_vmci/
 obj-$(CONFIG_LATTICE_ECP3_CONFIG)  += lattice-ecp3-config.o
 obj-$(CONFIG_SRAM) += sram.o
+obj-y  += mic/
diff --git a/drivers/misc/mic/Kconfig b/drivers/misc/mic/Kconfig
new file mode 100644
index 000..aaefd0c
--- /dev/null
+++ b/drivers/misc/mic/Kconfig
@@ -0,0 +1,19 @@
+comment "Intel MIC Host Driver"
+
+config INTEL_MIC_HOST
+   tristate "Intel MIC Host Driver"
+   depends on 64BIT && PCI
+   default N
+   help
+ This enables Host Driver support for the Intel Many Integrated
+ Core (MIC) family of PCIe form factor coprocessor devices that
+ run a 64 bit Linux OS. The driver manages card OS state and
+ enables communication between host and card. Intel MIC X100
+ devices are currently supported.
+
+ If you are building a host kernel with an Intel MIC device then
+ say M (recommended) or Y, else say N. If unsure say N.
+
+ More information about the Intel MIC family as well as the Linux
+ OS and tools for MIC to use with this driver are available from

[PATCH RESEND v3 2/7] Intel MIC Host Driver Interrupt/SMPT support.

2013-09-05 Thread Sudeep Dutt
From: Dasaratharaman Chandramouli 

This patch enables the following features:
a) MSIx, MSI and legacy interrupt support.
b) System Memory Page Table(SMPT) support. SMPT enables system memory
   access from the card. On X100 devices the host can program 32 SMPT
   registers each capable of accessing 16GB of system memory
   address space from X100 devices. The registers can thereby be used
   to access a cumulative 512GB of system memory address space from
   X100 devices at any point in time.

Co-author: Sudeep Dutt 
Signed-off-by: Ashutosh Dixit 
Signed-off-by: Caz Yokoyama 
Signed-off-by: Dasaratharaman Chandramouli 

Signed-off-by: Nikhil Rao 
Signed-off-by: Harshavardhan R Kharche 
Signed-off-by: Sudeep Dutt 
Acked-by: Yaozu (Eddie) Dong 
Reviewed-by: Peter P Waskiewicz Jr 
---
 drivers/misc/mic/host/Makefile |   2 +
 drivers/misc/mic/host/mic_device.h |  21 ++
 drivers/misc/mic/host/mic_intr.c   | 628 +
 drivers/misc/mic/host/mic_intr.h   | 137 
 drivers/misc/mic/host/mic_main.c   |  26 +-
 drivers/misc/mic/host/mic_smpt.c   | 442 ++
 drivers/misc/mic/host/mic_smpt.h   |  98 ++
 drivers/misc/mic/host/mic_x100.c   | 247 +++
 drivers/misc/mic/host/mic_x100.h   |  39 +++
 9 files changed, 1638 insertions(+), 2 deletions(-)
 create mode 100644 drivers/misc/mic/host/mic_intr.c
 create mode 100644 drivers/misc/mic/host/mic_intr.h
 create mode 100644 drivers/misc/mic/host/mic_smpt.c
 create mode 100644 drivers/misc/mic/host/mic_smpt.h

diff --git a/drivers/misc/mic/host/Makefile b/drivers/misc/mic/host/Makefile
index 93b9d25..6ff5550 100644
--- a/drivers/misc/mic/host/Makefile
+++ b/drivers/misc/mic/host/Makefile
@@ -6,3 +6,5 @@ obj-$(CONFIG_INTEL_MIC_HOST) += mic_host.o
 mic_host-objs := mic_main.o
 mic_host-objs += mic_x100.o
 mic_host-objs += mic_sysfs.o
+mic_host-objs += mic_smpt.o
+mic_host-objs += mic_intr.o
diff --git a/drivers/misc/mic/host/mic_device.h 
b/drivers/misc/mic/host/mic_device.h
index 6cd904c..dd34b65 100644
--- a/drivers/misc/mic/host/mic_device.h
+++ b/drivers/misc/mic/host/mic_device.h
@@ -21,6 +21,10 @@
 #ifndef _MIC_DEVICE_H_
 #define _MIC_DEVICE_H_
 
+#include 
+
+#include "mic_intr.h"
+
 /* The maximum number of MIC devices supported in a single host system. */
 #define MIC_MAX_NUM_DEVS 256
 
@@ -53,6 +57,12 @@ enum mic_stepping {
  * @stepping: Stepping ID.
  * @attr_group: Pointer to list of sysfs attribute groups.
  * @sdev: Device for sysfs entries.
+ * @mic_mutex: Mutex for synchronizing access to mic_device.
+ * @intr_ops: HW specific interrupt operations.
+ * @smpt_ops: Hardware specific SMPT operations.
+ * @smpt: MIC SMPT information.
+ * @intr_info: H/W specific interrupt information.
+ * @irq_info: The OS specific irq information
  */
 struct mic_device {
struct mic_mw mmio;
@@ -63,6 +73,12 @@ struct mic_device {
enum mic_stepping stepping;
const struct attribute_group **attr_group;
struct device *sdev;
+   struct mutex mic_mutex;
+   struct mic_hw_intr_ops *intr_ops;
+   struct mic_smpt_ops *smpt_ops;
+   struct mic_smpt_info *smpt;
+   struct mic_intr_info *intr_info;
+   struct mic_irq_info irq_info;
 };
 
 /**
@@ -71,12 +87,17 @@ struct mic_device {
  * @mmio_bar: MMIO bar resource number.
  * @read_spad: Read from scratch pad register.
  * @write_spad: Write to scratch pad register.
+ * @send_intr: Send an interrupt for a particular doorbell on the card.
+ * @ack_interrupt: Hardware specific operations to ack the h/w on
+ * receipt of an interrupt.
  */
 struct mic_hw_ops {
u8 aper_bar;
u8 mmio_bar;
u32 (*read_spad)(struct mic_device *mdev, unsigned int idx);
void (*write_spad)(struct mic_device *mdev, unsigned int idx, u32 val);
+   void (*send_intr)(struct mic_device *mdev, int doorbell);
+   u32 (*ack_interrupt)(struct mic_device *mdev);
 };
 
 /**
diff --git a/drivers/misc/mic/host/mic_intr.c b/drivers/misc/mic/host/mic_intr.c
new file mode 100644
index 000..cdae314
--- /dev/null
+++ b/drivers/misc/mic/host/mic_intr.c
@@ -0,0 +1,628 @@
+/*
+ * Intel MIC Platform Software Stack (MPSS)
+ *
+ * Copyright(c) 2013 Intel Corporation.
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License, version 2, as
+ * published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it will be useful, but
+ * WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ * General Public License for more details.
+ *
+ * The full GNU General Public License is included in this distribution in
+ * the file called "COPYING".
+ *
+ * Intel MIC Host driver.
+ *
+ */
+#include 
+#include 
+
+#include "../common/mic_device.h"
+#include "mic_device.h"
+
+/*
+ * mic_invoke_callback - Invoke callback functions registered for
+ * 

[PATCH RESEND v3 0/7] Enable Drivers for Intel MIC X100 Coprocessors.

2013-09-05 Thread Sudeep Dutt
ChangeLog:
=

v2 => v3:
a) Patch 1 data structure cleanups, header file include cleanups,
   IDA interface reuse and switching to device_create_with_groups(..)
   as per feedback from Greg Kroah-Hartman.
b) Patch 7 signal documentation, sleep workaround removal and sysfs
   access API cleanups as per feedback from Michael S. Tsirkin.

v1 => v2: @ http://lwn.net/Articles/563131/
a) License wording cleanup, sysfs ABI documentation, patch 1 refactoring
   into 3 smaller patches and function renames, as per feedback from
   Greg Kroah-Hartman.
b) Use VRINGH infrastructure for accessing virtio rings from the host
   in patch 5, as per feedback from Michael S. Tsirkin.

v1: Initial post @ https://lwn.net/Articles/561314/

Description:


An Intel MIC X100 device is a PCIe form factor add-in coprocessor
card based on the Intel Many Integrated Core (MIC) architecture
that runs a Linux OS. It is a PCIe endpoint in a platform and therefore
implements the three required standard address spaces i.e. configuration,
memory and I/O. The host OS loads a device driver as is typical for
PCIe devices. The card itself runs a bootstrap after reset that
transfers control to the card OS downloaded from the host driver.
The card OS as shipped by Intel is a Linux kernel with modifications
for the X100 devices.

Since it is a PCIe card, it does not have the ability to host hardware
devices for networking, storage and console. We provide these devices
on X100 coprocessors thus enabling a self-bootable equivalent environment
for applications. A key benefit of our solution is that it leverages
the standard virtio framework for network, disk and console devices,
though in our case the virtio framework is used across a PCIe bus.

Here is a block diagram of the various components described above. The
virtio backends are situated on the host rather than the card given better
single threaded performance for the host compared to MIC, the ability of
the host to initiate DMA's to/from the card using the MIC DMA engine and
the fact that the virtio block storage backend can only be on the host.

   +--+   | +--+
   | Card OS  |   | | Host OS  |
   +--+   | +--+
  |
+---+ ++ +--+ | +-+  ++ ++
| Virtio| |Virtio  | |Virtio| | |Virtio   |  |Virtio  | |Virtio  |
| Net   | |Console | |Block | | |Net  |  |Console | |Block   |
| Driver| |Driver  | |Driver| | |backend  |  |backend | |backend |
+---+ ++ +--+ | +-+  ++ ++
| | | |  || |
| | | |User  || |
| | | |--||-|---
+---+ |Kernel +--+
  |   |   | Virtio over PCIe IOCTLs  |
  |   |   +--+
  +--+|   |
  |Intel MIC ||+---+
  |Card Driver   |||Intel MIC  |
  +--+||Host Driver|
  |   |+---+
  |   |   |
 +-+
 | |
 |PCIe Bus |
 +-+

The following series of patches are partitioned as follows:

Patch 1: This patch introduces the "Intel MIC Host Driver" in the block
diagram which does the following:
a) Initializes the Intel MIC X100 PCIe devices.
b) Provides sysfs entries for family and stepping information.

Patch 2: This patch enables the following features in the
"Intel MIC Host Driver" in the block diagram:
a) MSIx, MSI and legacy interrupt support.
b) System Memory Page Table(SMPT) support. SMPT enables system memory
   access from the card. On X100 devices the host can program 32 SMPT
   registers each capable of accessing 16GB of system memory
   address space from X100 devices. The registers can thereby be used
   to access a cumulative 512GB of system memory address space from
   X100 devices at any point in time.

Patch 3: This patch enables the following features in the
"Intel MIC Host Driver" in the block diagram:
a) Boots and shuts down the card via sysfs entries.
b) Allocates and maps a device page for communication with the
   card driver and updates the device page address via scratchpad
   registers.
c) Provides sysfs entries for shutdown status, kernel command line,
   firmware, ramdisk, bootmode and log buffer information.

Patch 4: This patch introduces the "Intel MIC Card Driver" in the block
diagram which does the 

Re: [PATCH] rcu: Is it safe to enter an RCU read-side critical section?

2013-09-05 Thread Steven Rostedt
On Thu, 5 Sep 2013 14:05:37 -0700
"Paul E. McKenney"  wrote:


> 
> rcu: Is it safe to enter an RCU read-side critical section?
> 
> There is currently no way for kernel code to determine whether it
> is safe to enter an RCU read-side critical section, in other words,

Shouldn't that be a semi-colon?

"read-side critical section; in other words,"

> whether or not RCU is paying attention to the currently running CPU.
> Given the large and increasing quantity of code shared by the idle loop
> and non-idle code, the this shortcoming is becoming increasingly painful.

s/the//

> 
> This commit therefore adds rcu_watching_this_cpu(), which returns true
> if it is safe to enter an RCU read-side critical section on the currently
> running CPU.  This function is quite fast, using only a __this_cpu_read().
> However, the caller must disable preemption.
> 
> Reported-by: Steven Rostedt 
> Signed-off-by: Paul E. McKenney 
> 
> diff --git a/include/linux/rcupdate.h b/include/linux/rcupdate.h
> index 15d33d9..7c024fd 100644
> --- a/include/linux/rcupdate.h
> +++ b/include/linux/rcupdate.h
> @@ -225,6 +225,9 @@ extern void rcu_idle_enter(void);
>  extern void rcu_idle_exit(void);
>  extern void rcu_irq_enter(void);
>  extern void rcu_irq_exit(void);
> +#if defined(CONFIG_SMP) || defined(CONFIG_RCU_TRACE)
> +extern bool rcu_watching_this_cpu(void);

I'm assuming that rcu is always watching a CPU if "!SMP" and
"!RCU_TRACE". Thus you still need to have:

#else
static inline bool rcu_watching_this_cpu(void)
{
return true;
}

Otherwise we still fail to compile.

-- Steve

> +#endif /* #if defined(CONFIG_SMP) || defined(CONFIG_RCU_TRACE) */
>  
>  #ifdef CONFIG_RCU_USER_QS
>  extern void rcu_user_enter(void);
> diff --git a/kernel/rcutiny.c b/kernel/rcutiny.c
> index 7e3b0d6..fce820f 100644
> --- a/kernel/rcutiny.c
> +++ b/kernel/rcutiny.c
> @@ -189,6 +189,17 @@ EXPORT_SYMBOL(rcu_is_cpu_idle);
>  
>  #endif /* #ifdef CONFIG_DEBUG_LOCK_ALLOC */
>  
> +#ifdef CONFIG_RCU_TRACE
> +/*
> + * Test whether the current CPU can safely enter RCU read-side critical
> + * sections.  The caller must at least have disabled interrupts.
> + */
> +bool rcu_watching_this_cpu(void)
> +{
> + return !!rcu_dynticks_nesting;
> +}
> +#endif /* #ifdef CONFIG_RCU_TRACE */
> +
>  /*
>   * Test whether the current CPU was interrupted from idle.  Nested
>   * interrupts don't count, we must be running at the first interrupt
> diff --git a/kernel/rcutree.c b/kernel/rcutree.c
> index a06d172..7b8fcee 100644
> --- a/kernel/rcutree.c
> +++ b/kernel/rcutree.c
> @@ -710,6 +710,18 @@ EXPORT_SYMBOL_GPL(rcu_lockdep_current_cpu_online);
>  #endif /* #if defined(CONFIG_PROVE_RCU) && defined(CONFIG_HOTPLUG_CPU) */
>  
>  /**
> + * rcu_watching_this_cpu - are RCU read-side critical sections safe?
> + *
> + * Return true if RCU is watching the running CPU, which means that this
> + * CPU can safely enter RCU read-side critical sections.  The caller must
> + * have at least disabled preemption.
> + */
> +bool rcu_watching_this_cpu(void)
> +{
> + return !!__this_cpu_read(rcu_dynticks.dynticks_nesting);
> +}
> +
> +/**
>   * rcu_is_cpu_rrupt_from_idle - see if idle or immediately interrupted from 
> idle
>   *
>   * If the current CPU is idle or running at a first-level (not nested)

--
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Please read the FAQ at  http://www.tux.org/lkml/


Re: [PATCH v9 12/13] KVM: PPC: Add support for IOMMU in-kernel handling

2013-09-05 Thread Alexey Kardashevskiy
On 09/06/2013 04:10 AM, Gleb Natapov wrote:
> On Wed, Sep 04, 2013 at 02:01:28AM +1000, Alexey Kardashevskiy wrote:
>> On 09/03/2013 08:53 PM, Gleb Natapov wrote:
>>> On Mon, Sep 02, 2013 at 01:14:29PM +1000, Alexey Kardashevskiy wrote:
 On 09/01/2013 10:06 PM, Gleb Natapov wrote:
> On Wed, Aug 28, 2013 at 06:50:41PM +1000, Alexey Kardashevskiy wrote:
>> This allows the host kernel to handle H_PUT_TCE, H_PUT_TCE_INDIRECT
>> and H_STUFF_TCE requests targeted an IOMMU TCE table without passing
>> them to user space which saves time on switching to user space and back.
>>
>> Both real and virtual modes are supported. The kernel tries to
>> handle a TCE request in the real mode, if fails it passes the request
>> to the virtual mode to complete the operation. If it a virtual mode
>> handler fails, the request is passed to user space.
>>
>> The first user of this is VFIO on POWER. Trampolines to the VFIO external
>> user API functions are required for this patch.
>>
>> This adds a "SPAPR TCE IOMMU" KVM device to associate a logical bus
>> number (LIOBN) with an VFIO IOMMU group fd and enable in-kernel handling
>> of map/unmap requests. The device supports a single attribute which is
>> a struct with LIOBN and IOMMU fd. When the attribute is set, the device
>> establishes the connection between KVM and VFIO.
>>
>> Tests show that this patch increases transmission speed from 220MB/s
>> to 750..1020MB/s on 10Gb network (Chelsea CXGB3 10Gb ethernet card).
>>
>> Signed-off-by: Paul Mackerras 
>> Signed-off-by: Alexey Kardashevskiy 
>>
>> ---
>>
>> Changes:
>> v9:
>> * KVM_CAP_SPAPR_TCE_IOMMU ioctl to KVM replaced with "SPAPR TCE IOMMU"
>> KVM device
>> * release_spapr_tce_table() is not shared between different TCE types
>> * reduced the patch size by moving VFIO external API
>> trampolines to separate patche
>> * moved documentation from Documentation/virtual/kvm/api.txt to
>> Documentation/virtual/kvm/devices/spapr_tce_iommu.txt
>>
>> v8:
>> * fixed warnings from check_patch.pl
>>
>> 2013/07/11:
>> * removed multiple #ifdef IOMMU_API as IOMMU_API is always enabled
>> for KVM_BOOK3S_64
>> * kvmppc_gpa_to_hva_and_get also returns host phys address. Not much 
>> sense
>> for this here but the next patch for hugepages support will use it more.
>>
>> 2013/07/06:
>> * added realmode arch_spin_lock to protect TCE table from races
>> in real and virtual modes
>> * POWERPC IOMMU API is changed to support real mode
>> * iommu_take_ownership and iommu_release_ownership are protected by
>> iommu_table's locks
>> * VFIO external user API use rewritten
>> * multiple small fixes
>>
>> 2013/06/27:
>> * tce_list page is referenced now in order to protect it from accident
>> invalidation during H_PUT_TCE_INDIRECT execution
>> * added use of the external user VFIO API
>>
>> 2013/06/05:
>> * changed capability number
>> * changed ioctl number
>> * update the doc article number
>>
>> 2013/05/20:
>> * removed get_user() from real mode handlers
>> * kvm_vcpu_arch::tce_tmp usage extended. Now real mode handler puts there
>> translated TCEs, tries realmode_get_page() on those and if it fails, it
>> passes control over the virtual mode handler which tries to finish
>> the request handling
>> * kvmppc_lookup_pte() now does realmode_get_page() protected by BUSY bit
>> on a page
>> * The only reason to pass the request to user mode now is when the user 
>> mode
>> did not register TCE table in the kernel, in all other cases the virtual 
>> mode
>> handler is expected to do the job
>> ---
>>  .../virtual/kvm/devices/spapr_tce_iommu.txt|  37 +++
>>  arch/powerpc/include/asm/kvm_host.h|   4 +
>>  arch/powerpc/kvm/book3s_64_vio.c   | 310 
>> -
>>  arch/powerpc/kvm/book3s_64_vio_hv.c| 122 
>>  arch/powerpc/kvm/powerpc.c |   1 +
>>  include/linux/kvm_host.h   |   1 +
>>  virt/kvm/kvm_main.c|   5 +
>>  7 files changed, 477 insertions(+), 3 deletions(-)
>>  create mode 100644 Documentation/virtual/kvm/devices/spapr_tce_iommu.txt
>>
>> diff --git a/Documentation/virtual/kvm/devices/spapr_tce_iommu.txt 
>> b/Documentation/virtual/kvm/devices/spapr_tce_iommu.txt
>> new file mode 100644
>> index 000..4bc8fc3
>> --- /dev/null
>> +++ b/Documentation/virtual/kvm/devices/spapr_tce_iommu.txt
>> @@ -0,0 +1,37 @@
>> +SPAPR TCE IOMMU device
>> +
>> +Capability: KVM_CAP_SPAPR_TCE_IOMMU
>> +Architectures: powerpc
>> +
>> +Device type supported: KVM_DEV_TYPE_SPAPR_TCE_IOMMU
>> +

Re: Excess dmesg output from ACPIPHP on boot (was: Re: [PATCH 25/30] ACPI / hotplug / PCI: Check for new devices on enabled slots)

2013-09-05 Thread Rafael J. Wysocki
On Thursday, September 05, 2013 05:31:58 PM Alex Williamson wrote:
> On Fri, 2013-09-06 at 01:36 +0200, Rafael J. Wysocki wrote:
> > On Thursday, September 05, 2013 05:08:03 PM Alex Williamson wrote:
> > > On Fri, 2013-09-06 at 00:40 +0200, Rafael J. Wysocki wrote:
> > > > On Thursday, September 05, 2013 04:17:25 PM Alex Williamson wrote:
> > > > > On Thu, 2013-09-05 at 23:39 +0200, Rafael J. Wysocki wrote:
> > > > > > On Thursday, September 05, 2013 09:44:26 PM Rafael J. Wysocki wrote:
> > > > > > > On Thursday, September 05, 2013 08:21:41 AM Alex Williamson wrote:
> > > > > > 
> > > > > > [...]
> > > > > > 
> > > > > > > > > 
> > > > > > > > > [   18.288122] pci :00:00.0: no hotplug settings from 
> > > > > > > > > platform
> > > > > > > > > [   18.288127] pcieport :00:01.0: no hotplug settings 
> > > > > > > > > from platform
> > > > > > > > > [   18.288142] pci :01:00.0: no hotplug settings from 
> > > > > > > > > platform
> > > > > > > > > [   18.288157] pci :01:00.1: no hotplug settings from 
> > > > > > > > > platform
> > > > > > > > > [   18.288162] pcieport :00:03.0: no hotplug settings 
> > > > > > > > > from platform
> > > > > > > > > [   18.288176] pci :02:00.0: no hotplug settings from 
> > > > > > > > > platform
> > > > > > > > > [   18.288190] pci :02:00.1: no hotplug settings from 
> > > > > > > > > platform
> > > > > > > > > [   18.288195] pcieport :00:07.0: no hotplug settings 
> > > > > > > > > from platform
> > > > > > > > > [   18.288209] pci :03:00.0: no hotplug settings from 
> > > > > > > > > platform
> > > > > > > > > [   18.288224] pci :03:00.1: no hotplug settings from 
> > > > > > > > > platform
> > > > > > > > > [   18.288228] pci :00:14.0: no hotplug settings from 
> > > > > > > > > platform
> > > > > > > > > [   18.288233] pci :00:14.1: no hotplug settings from 
> > > > > > > > > platform
> > > > > > > > > [   18.288237] pci :00:14.2: no hotplug settings from 
> > > > > > > > > platform
> > > > > > > > > [   18.288242] pci :00:16.0: no hotplug settings from 
> > > > > > > > > platform
> > > > > > > > > [   18.288247] pci :00:16.1: no hotplug settings from 
> > > > > > > > > platform
> > > > > > > > > [   18.288251] pci :00:16.2: no hotplug settings from 
> > > > > > > > > platform
> > > > > > > > > [   18.288256] pci :00:16.3: no hotplug settings from 
> > > > > > > > > platform
> > > > > > > > > [   18.288260] pci :00:16.4: no hotplug settings from 
> > > > > > > > > platform
> > > > > > > > > [   18.288265] pci :00:16.5: no hotplug settings from 
> > > > > > > > > platform
> > > > > > > > > [   18.288269] pci :00:16.6: no hotplug settings from 
> > > > > > > > > platform
> > > > > > > > > [   18.288274] pci :00:16.7: no hotplug settings from 
> > > > > > > > > platform
> > > > > > > > > [   18.288278] pci :00:1a.0: no hotplug settings from 
> > > > > > > > > platform
> > > > > > > > > [   18.288279] pci :00:1a.0: using default PCI settings
> > > > > > > > > [   18.288292] pci :00:1a.1: no hotplug settings from 
> > > > > > > > > platform
> > > > > > > > > [   18.288293] pci :00:1a.1: using default PCI settings
> > > > > > > > > [   18.288307] ehci-pci :00:1a.7: no hotplug settings 
> > > > > > > > > from platform
> > > > > > > > > [   18.288308] ehci-pci :00:1a.7: using default PCI 
> > > > > > > > > settings
> > > > > > > > > [   18.288322] pci :00:1b.0: no hotplug settings from 
> > > > > > > > > platform
> > > > > > > > > [   18.288327] pcieport :00:1c.0: no hotplug settings 
> > > > > > > > > from platform
> > > > > > > > > [   18.288332] pcieport :00:1c.4: no hotplug settings 
> > > > > > > > > from platform
> > > > > > > > > [   18.288344] pci :05:00.0: no hotplug settings from 
> > > > > > > > > platform
> > > > > > > > > [   18.288349] pci :00:1d.0: no hotplug settings from 
> > > > > > > > > platform
> > > > > > > > > [   18.288350] pci :00:1d.0: using default PCI settings
> > > > > > > > > [   18.288360] pci :00:1d.1: no hotplug settings from 
> > > > > > > > > platform
> > > > > > > > > [   18.288361] pci :00:1d.1: using default PCI settings
> > > > > > > > > [   18.288374] pci :00:1d.2: no hotplug settings from 
> > > > > > > > > platform
> > > > > > > > > [   18.288374] pci :00:1d.2: using default PCI settings
> > > > > > > > > [   18.288387] pci :00:1d.3: no hotplug settings from 
> > > > > > > > > platform
> > > > > > > > > [   18.288387] pci :00:1d.3: using default PCI settings
> > > > > > > > > 
> > > > > > > > > The boot is noticeably slower.  What's going to happen on 
> > > > > > > > > systems that
> > > > > > > > > actually have a significant I/O topology vs my little 
> > > > > > > > > workstation?
> > > > > > > 
> > > > > > > That depends on how many bus check/device check events they 
> > > > > > > generate on boot.
> > > > > > > 
> > > > > > > My test machines don't generate them during boot at 

Re: Excess dmesg output from ACPIPHP on boot (was: Re: [PATCH 25/30] ACPI / hotplug / PCI: Check for new devices on enabled slots)

2013-09-05 Thread Alex Williamson
On Fri, 2013-09-06 at 01:36 +0200, Rafael J. Wysocki wrote:
> On Thursday, September 05, 2013 05:08:03 PM Alex Williamson wrote:
> > On Fri, 2013-09-06 at 00:40 +0200, Rafael J. Wysocki wrote:
> > > On Thursday, September 05, 2013 04:17:25 PM Alex Williamson wrote:
> > > > On Thu, 2013-09-05 at 23:39 +0200, Rafael J. Wysocki wrote:
> > > > > On Thursday, September 05, 2013 09:44:26 PM Rafael J. Wysocki wrote:
> > > > > > On Thursday, September 05, 2013 08:21:41 AM Alex Williamson wrote:
> > > > > 
> > > > > [...]
> > > > > 
> > > > > > > > 
> > > > > > > > [   18.288122] pci :00:00.0: no hotplug settings from 
> > > > > > > > platform
> > > > > > > > [   18.288127] pcieport :00:01.0: no hotplug settings from 
> > > > > > > > platform
> > > > > > > > [   18.288142] pci :01:00.0: no hotplug settings from 
> > > > > > > > platform
> > > > > > > > [   18.288157] pci :01:00.1: no hotplug settings from 
> > > > > > > > platform
> > > > > > > > [   18.288162] pcieport :00:03.0: no hotplug settings from 
> > > > > > > > platform
> > > > > > > > [   18.288176] pci :02:00.0: no hotplug settings from 
> > > > > > > > platform
> > > > > > > > [   18.288190] pci :02:00.1: no hotplug settings from 
> > > > > > > > platform
> > > > > > > > [   18.288195] pcieport :00:07.0: no hotplug settings from 
> > > > > > > > platform
> > > > > > > > [   18.288209] pci :03:00.0: no hotplug settings from 
> > > > > > > > platform
> > > > > > > > [   18.288224] pci :03:00.1: no hotplug settings from 
> > > > > > > > platform
> > > > > > > > [   18.288228] pci :00:14.0: no hotplug settings from 
> > > > > > > > platform
> > > > > > > > [   18.288233] pci :00:14.1: no hotplug settings from 
> > > > > > > > platform
> > > > > > > > [   18.288237] pci :00:14.2: no hotplug settings from 
> > > > > > > > platform
> > > > > > > > [   18.288242] pci :00:16.0: no hotplug settings from 
> > > > > > > > platform
> > > > > > > > [   18.288247] pci :00:16.1: no hotplug settings from 
> > > > > > > > platform
> > > > > > > > [   18.288251] pci :00:16.2: no hotplug settings from 
> > > > > > > > platform
> > > > > > > > [   18.288256] pci :00:16.3: no hotplug settings from 
> > > > > > > > platform
> > > > > > > > [   18.288260] pci :00:16.4: no hotplug settings from 
> > > > > > > > platform
> > > > > > > > [   18.288265] pci :00:16.5: no hotplug settings from 
> > > > > > > > platform
> > > > > > > > [   18.288269] pci :00:16.6: no hotplug settings from 
> > > > > > > > platform
> > > > > > > > [   18.288274] pci :00:16.7: no hotplug settings from 
> > > > > > > > platform
> > > > > > > > [   18.288278] pci :00:1a.0: no hotplug settings from 
> > > > > > > > platform
> > > > > > > > [   18.288279] pci :00:1a.0: using default PCI settings
> > > > > > > > [   18.288292] pci :00:1a.1: no hotplug settings from 
> > > > > > > > platform
> > > > > > > > [   18.288293] pci :00:1a.1: using default PCI settings
> > > > > > > > [   18.288307] ehci-pci :00:1a.7: no hotplug settings from 
> > > > > > > > platform
> > > > > > > > [   18.288308] ehci-pci :00:1a.7: using default PCI settings
> > > > > > > > [   18.288322] pci :00:1b.0: no hotplug settings from 
> > > > > > > > platform
> > > > > > > > [   18.288327] pcieport :00:1c.0: no hotplug settings from 
> > > > > > > > platform
> > > > > > > > [   18.288332] pcieport :00:1c.4: no hotplug settings from 
> > > > > > > > platform
> > > > > > > > [   18.288344] pci :05:00.0: no hotplug settings from 
> > > > > > > > platform
> > > > > > > > [   18.288349] pci :00:1d.0: no hotplug settings from 
> > > > > > > > platform
> > > > > > > > [   18.288350] pci :00:1d.0: using default PCI settings
> > > > > > > > [   18.288360] pci :00:1d.1: no hotplug settings from 
> > > > > > > > platform
> > > > > > > > [   18.288361] pci :00:1d.1: using default PCI settings
> > > > > > > > [   18.288374] pci :00:1d.2: no hotplug settings from 
> > > > > > > > platform
> > > > > > > > [   18.288374] pci :00:1d.2: using default PCI settings
> > > > > > > > [   18.288387] pci :00:1d.3: no hotplug settings from 
> > > > > > > > platform
> > > > > > > > [   18.288387] pci :00:1d.3: using default PCI settings
> > > > > > > > 
> > > > > > > > The boot is noticeably slower.  What's going to happen on 
> > > > > > > > systems that
> > > > > > > > actually have a significant I/O topology vs my little 
> > > > > > > > workstation?
> > > > > > 
> > > > > > That depends on how many bus check/device check events they 
> > > > > > generate on boot.
> > > > > > 
> > > > > > My test machines don't generate them during boot at all (even the 
> > > > > > one with
> > > > > > a Thunderbolt connector), so I don't see the messages in question 
> > > > > > during boot
> > > > > > on any of them.  Mika doesn't see them either I suppose, or he 
> > > > > > would have told
> > > > > > me about that 

Re: Excess dmesg output from ACPIPHP on boot (was: Re: [PATCH 25/30] ACPI / hotplug / PCI: Check for new devices on enabled slots)

2013-09-05 Thread Rafael J. Wysocki
On Thursday, September 05, 2013 05:08:03 PM Alex Williamson wrote:
> On Fri, 2013-09-06 at 00:40 +0200, Rafael J. Wysocki wrote:
> > On Thursday, September 05, 2013 04:17:25 PM Alex Williamson wrote:
> > > On Thu, 2013-09-05 at 23:39 +0200, Rafael J. Wysocki wrote:
> > > > On Thursday, September 05, 2013 09:44:26 PM Rafael J. Wysocki wrote:
> > > > > On Thursday, September 05, 2013 08:21:41 AM Alex Williamson wrote:
> > > > 
> > > > [...]
> > > > 
> > > > > > > 
> > > > > > > [   18.288122] pci :00:00.0: no hotplug settings from platform
> > > > > > > [   18.288127] pcieport :00:01.0: no hotplug settings from 
> > > > > > > platform
> > > > > > > [   18.288142] pci :01:00.0: no hotplug settings from platform
> > > > > > > [   18.288157] pci :01:00.1: no hotplug settings from platform
> > > > > > > [   18.288162] pcieport :00:03.0: no hotplug settings from 
> > > > > > > platform
> > > > > > > [   18.288176] pci :02:00.0: no hotplug settings from platform
> > > > > > > [   18.288190] pci :02:00.1: no hotplug settings from platform
> > > > > > > [   18.288195] pcieport :00:07.0: no hotplug settings from 
> > > > > > > platform
> > > > > > > [   18.288209] pci :03:00.0: no hotplug settings from platform
> > > > > > > [   18.288224] pci :03:00.1: no hotplug settings from platform
> > > > > > > [   18.288228] pci :00:14.0: no hotplug settings from platform
> > > > > > > [   18.288233] pci :00:14.1: no hotplug settings from platform
> > > > > > > [   18.288237] pci :00:14.2: no hotplug settings from platform
> > > > > > > [   18.288242] pci :00:16.0: no hotplug settings from platform
> > > > > > > [   18.288247] pci :00:16.1: no hotplug settings from platform
> > > > > > > [   18.288251] pci :00:16.2: no hotplug settings from platform
> > > > > > > [   18.288256] pci :00:16.3: no hotplug settings from platform
> > > > > > > [   18.288260] pci :00:16.4: no hotplug settings from platform
> > > > > > > [   18.288265] pci :00:16.5: no hotplug settings from platform
> > > > > > > [   18.288269] pci :00:16.6: no hotplug settings from platform
> > > > > > > [   18.288274] pci :00:16.7: no hotplug settings from platform
> > > > > > > [   18.288278] pci :00:1a.0: no hotplug settings from platform
> > > > > > > [   18.288279] pci :00:1a.0: using default PCI settings
> > > > > > > [   18.288292] pci :00:1a.1: no hotplug settings from platform
> > > > > > > [   18.288293] pci :00:1a.1: using default PCI settings
> > > > > > > [   18.288307] ehci-pci :00:1a.7: no hotplug settings from 
> > > > > > > platform
> > > > > > > [   18.288308] ehci-pci :00:1a.7: using default PCI settings
> > > > > > > [   18.288322] pci :00:1b.0: no hotplug settings from platform
> > > > > > > [   18.288327] pcieport :00:1c.0: no hotplug settings from 
> > > > > > > platform
> > > > > > > [   18.288332] pcieport :00:1c.4: no hotplug settings from 
> > > > > > > platform
> > > > > > > [   18.288344] pci :05:00.0: no hotplug settings from platform
> > > > > > > [   18.288349] pci :00:1d.0: no hotplug settings from platform
> > > > > > > [   18.288350] pci :00:1d.0: using default PCI settings
> > > > > > > [   18.288360] pci :00:1d.1: no hotplug settings from platform
> > > > > > > [   18.288361] pci :00:1d.1: using default PCI settings
> > > > > > > [   18.288374] pci :00:1d.2: no hotplug settings from platform
> > > > > > > [   18.288374] pci :00:1d.2: using default PCI settings
> > > > > > > [   18.288387] pci :00:1d.3: no hotplug settings from platform
> > > > > > > [   18.288387] pci :00:1d.3: using default PCI settings
> > > > > > > 
> > > > > > > The boot is noticeably slower.  What's going to happen on systems 
> > > > > > > that
> > > > > > > actually have a significant I/O topology vs my little workstation?
> > > > > 
> > > > > That depends on how many bus check/device check events they generate 
> > > > > on boot.
> > > > > 
> > > > > My test machines don't generate them during boot at all (even the one 
> > > > > with
> > > > > a Thunderbolt connector), so I don't see the messages in question 
> > > > > during boot
> > > > > on any of them.  Mika doesn't see them either I suppose, or he would 
> > > > > have told
> > > > > me about that before.
> > > > > 
> > > > > And let's just make it clear that it is not usual or even OK to 
> > > > > generate bus
> > > > > checks or device checks during boot like this.  And since the changes 
> > > > > in
> > > > > question have been in linux-next since right after the 3.11 merge 
> > > > > window, I
> > > > > think that someone would have complained already had that been a 
> > > > > common issue.
> > > > > 
> > > > > Of course, we need to deal with that somehow nevertheless. :-)
> > > > > 
> > > > > > Just to give you an idea:
> > > > > > 
> > > > > > CONFIG_HOTPLUG_PCI_ACPI=y
> > > > > > 
> > > > > > $ dmesg | wc
> > > > > >   5697  49935 

Re: [git pull] drm tree for 3.12-rc1

2013-09-05 Thread Linus Torvalds
On Thu, Sep 5, 2013 at 3:32 PM, Jesse Barnes  wrote:
> On Thu, 5 Sep 2013 12:18:32 -0700
>>
>> The first time I booted this, I just got a black screen on my Haswell
>> desktop when X11 started up.  I could ctrl-alt-BS and ctrl-alt-del to
>> reboot the machine, and neither the Xorg.0.log nor the dmesg contained
>> anything interesting.
>
> Did the console come back after ctl-alt-bs?  Or was it just a blind
> reboot?  Troubling that it doesn't happen again...

Blind reboot.

And Dave's theory that it is a "boot from old kernel to show the
problem in case it's some missing hw setup" is a good one, but doesn't
match my experience: I did boot the old kernel in between (to see what
went wrong), so both the working and nonworking setups were from
warm-booting from an old kernel..

I've booted a few times since (it's the merge window, so I boot fairly
frequently), and it hasn't happened again...

Looking more closely at the log-file, I notice that the

> AFAIK we have libva support out there for HSW.  The trick is getting
> your stack to actually use it.  Gwenole or Sean may be able to help.
>
>> Because my shiny new 65W haswell is really nice and does a "make
>> allmodconfig" in half the time of my old machine, but the GPU side has
>> been something of a step backwards...
>
> Well we definitely don't want that...

There's another thing I've noticed with Haswell - while putting the
screen to sleep works fine with DP, it comes back with odd corruption.
Pressing enter to get the actual password prompt correctly repaints
things, so it's not always noticeable - but using something else than
the enter key to wake things up seems to show it consistently (also
happens with "xset dpms force off", you don't have to wait for
locking)

Of course, this may be old user-land, again. It's current F19, the
intel module says "compiled for 1.14.2, module version = 2.21.12".

Maybe the problem I had with HDMI/DVI was just a different expression
of this same bug.

 Linus
--
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Please read the FAQ at  http://www.tux.org/lkml/


Re: Excess dmesg output from ACPIPHP on boot (was: Re: [PATCH 25/30] ACPI / hotplug / PCI: Check for new devices on enabled slots)

2013-09-05 Thread Alex Williamson
On Fri, 2013-09-06 at 00:40 +0200, Rafael J. Wysocki wrote:
> On Thursday, September 05, 2013 04:17:25 PM Alex Williamson wrote:
> > On Thu, 2013-09-05 at 23:39 +0200, Rafael J. Wysocki wrote:
> > > On Thursday, September 05, 2013 09:44:26 PM Rafael J. Wysocki wrote:
> > > > On Thursday, September 05, 2013 08:21:41 AM Alex Williamson wrote:
> > > 
> > > [...]
> > > 
> > > > > > 
> > > > > > [   18.288122] pci :00:00.0: no hotplug settings from platform
> > > > > > [   18.288127] pcieport :00:01.0: no hotplug settings from 
> > > > > > platform
> > > > > > [   18.288142] pci :01:00.0: no hotplug settings from platform
> > > > > > [   18.288157] pci :01:00.1: no hotplug settings from platform
> > > > > > [   18.288162] pcieport :00:03.0: no hotplug settings from 
> > > > > > platform
> > > > > > [   18.288176] pci :02:00.0: no hotplug settings from platform
> > > > > > [   18.288190] pci :02:00.1: no hotplug settings from platform
> > > > > > [   18.288195] pcieport :00:07.0: no hotplug settings from 
> > > > > > platform
> > > > > > [   18.288209] pci :03:00.0: no hotplug settings from platform
> > > > > > [   18.288224] pci :03:00.1: no hotplug settings from platform
> > > > > > [   18.288228] pci :00:14.0: no hotplug settings from platform
> > > > > > [   18.288233] pci :00:14.1: no hotplug settings from platform
> > > > > > [   18.288237] pci :00:14.2: no hotplug settings from platform
> > > > > > [   18.288242] pci :00:16.0: no hotplug settings from platform
> > > > > > [   18.288247] pci :00:16.1: no hotplug settings from platform
> > > > > > [   18.288251] pci :00:16.2: no hotplug settings from platform
> > > > > > [   18.288256] pci :00:16.3: no hotplug settings from platform
> > > > > > [   18.288260] pci :00:16.4: no hotplug settings from platform
> > > > > > [   18.288265] pci :00:16.5: no hotplug settings from platform
> > > > > > [   18.288269] pci :00:16.6: no hotplug settings from platform
> > > > > > [   18.288274] pci :00:16.7: no hotplug settings from platform
> > > > > > [   18.288278] pci :00:1a.0: no hotplug settings from platform
> > > > > > [   18.288279] pci :00:1a.0: using default PCI settings
> > > > > > [   18.288292] pci :00:1a.1: no hotplug settings from platform
> > > > > > [   18.288293] pci :00:1a.1: using default PCI settings
> > > > > > [   18.288307] ehci-pci :00:1a.7: no hotplug settings from 
> > > > > > platform
> > > > > > [   18.288308] ehci-pci :00:1a.7: using default PCI settings
> > > > > > [   18.288322] pci :00:1b.0: no hotplug settings from platform
> > > > > > [   18.288327] pcieport :00:1c.0: no hotplug settings from 
> > > > > > platform
> > > > > > [   18.288332] pcieport :00:1c.4: no hotplug settings from 
> > > > > > platform
> > > > > > [   18.288344] pci :05:00.0: no hotplug settings from platform
> > > > > > [   18.288349] pci :00:1d.0: no hotplug settings from platform
> > > > > > [   18.288350] pci :00:1d.0: using default PCI settings
> > > > > > [   18.288360] pci :00:1d.1: no hotplug settings from platform
> > > > > > [   18.288361] pci :00:1d.1: using default PCI settings
> > > > > > [   18.288374] pci :00:1d.2: no hotplug settings from platform
> > > > > > [   18.288374] pci :00:1d.2: using default PCI settings
> > > > > > [   18.288387] pci :00:1d.3: no hotplug settings from platform
> > > > > > [   18.288387] pci :00:1d.3: using default PCI settings
> > > > > > 
> > > > > > The boot is noticeably slower.  What's going to happen on systems 
> > > > > > that
> > > > > > actually have a significant I/O topology vs my little workstation?
> > > > 
> > > > That depends on how many bus check/device check events they generate on 
> > > > boot.
> > > > 
> > > > My test machines don't generate them during boot at all (even the one 
> > > > with
> > > > a Thunderbolt connector), so I don't see the messages in question 
> > > > during boot
> > > > on any of them.  Mika doesn't see them either I suppose, or he would 
> > > > have told
> > > > me about that before.
> > > > 
> > > > And let's just make it clear that it is not usual or even OK to 
> > > > generate bus
> > > > checks or device checks during boot like this.  And since the changes in
> > > > question have been in linux-next since right after the 3.11 merge 
> > > > window, I
> > > > think that someone would have complained already had that been a common 
> > > > issue.
> > > > 
> > > > Of course, we need to deal with that somehow nevertheless. :-)
> > > > 
> > > > > Just to give you an idea:
> > > > > 
> > > > > CONFIG_HOTPLUG_PCI_ACPI=y
> > > > > 
> > > > > $ dmesg | wc
> > > > >   5697  49935 384368
> > > > > 
> > > > > $ dmesg | tail --lines=1
> > > > > [   53.137123] Ebtables v2.0 registered
> > > > > 
> > > > > -- vs --
> > > > > 
> > > > > # CONFIG_HOTPLUG_PCI_ACPI is not set
> > > > > 
> > > > > $ dmesg | wc
> > > > >  1053  9176 71652
> 

linux-next: problem fetching the watchdog tree

2013-09-05 Thread Stephen Rothwell
Hi Wim,

Fetching the wireless tree yesterday and today produced this error:

fatal: unable to connect to www.linux-watchdog.org:
www.linux-watchdog.org[0: 83.149.101.17]: errno=Connection refused

-- 
Cheers,
Stephen Rothwells...@canb.auug.org.au


pgpQhf_3Ty84e.pgp
Description: PGP signature


[PATCH 0/9] x86/UV/KDB/NMI: Updates for NMI/KDB handler for SGI UV

2013-09-05 Thread Mike Travis

V2:  Split KDB updates from NMI updates.  Broke up the big patch to
 uv_nmi.c into smaller patches.  Updated to the latest linux
 kernel version.

The current UV NMI handler has not been updated for the changes in the
system NMI handler and the perf operations.  The UV NMI handler reads
an MMR in the UV Hub to check to see if the NMI event was caused by
the external 'system NMI' that the operator can initiate on the System
Mgmt Controller.

The problem arises when the perf tools are running, causing millions of
perf events per second on very large CPU count systems.  Previously this
was okay because the perf NMI handler ran at a higher priority on the
NMI call chain and if the NMI was a perf event, it would stop calling
other NMI handlers remaining on the NMI call chain.

Now the system NMI handler calls all the handlers on the NMI call
chain including the UV NMI handler.  This causes the UV NMI handler
to read the MMRs at the same millions per second rate.  This can lead
to significant performance loss and possible system failures.  It also
can cause thousands of 'Dazed and Confused' messages being sent to the
system console.  This effectively makes perf tools unusable on UV systems.

This patch set addresses this problem and allows the perf tools to run on
UV without impacting performance and causing system failures.

-- 
--
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Please read the FAQ at  http://www.tux.org/lkml/


Re: [ 00/36] 3.10.11-stable review

2013-09-05 Thread Guenter Roeck
On Thu, Sep 05, 2013 at 01:27:29PM -0700, Greg Kroah-Hartman wrote:
> This is the start of the stable review cycle for the 3.10.11 release.
> There are 36 patches in this series, all will be posted as a response
> to this one.  If anyone has any issues with these being applied, please
> let me know.
> 
> Responses should be made by Sat Sep  7 20:26:25 UTC 2013.
> Anything received after that time might be too late.
> 
> The whole patch series can be found in one patch at:
>   kernel.org/pub/linux/kernel/v3.0/stable-review/patch-3.10.11-rc1.gz
> and the diffstat can be found below.
> 
buildbot says:
total: 100 pass: 100 skipped: 0 fail: 0
Just like the previous release, this is a perfect result. Time to add some
failing builds to raise the challenge ?

qemu test runs for arm, microblaze, mips, mips64, ppc, x86, and x86_64 are fine.

Details are available at http://server.roeck-us.net:8010/builders.

Guenter
--
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Please read the FAQ at  http://www.tux.org/lkml/


  1   2   3   4   5   6   7   8   9   10   >