date:20180117

Re: linux-next: Signed-off-by missing for commit in the pci tree

2018-01-17 Thread Bjorn Helgaas

On Wed, Jan 17, 2018 at 2:53 PM, Stephen Rothwell  wrote:
> Hi Bjorn,
>
> Commit
>
>   209930d809fa ("PCI/ASPM: Add pci_enable_link_state()")
>
> is missing a Signed-off-by from its author and commmiter.

Oh, thanks!  I didn't intend to include that commit.  I dropped it and
updated my "next" branch.

Re: linux-next: Signed-off-by missing for commit in the pci tree

2018-01-17 Thread Bjorn Helgaas

On Wed, Jan 17, 2018 at 2:53 PM, Stephen Rothwell  wrote:
> Hi Bjorn,
>
> Commit
>
>   209930d809fa ("PCI/ASPM: Add pci_enable_link_state()")
>
> is missing a Signed-off-by from its author and commmiter.

Oh, thanks!  I didn't intend to include that commit.  I dropped it and
updated my "next" branch.

[PATCH v6 32/99] mm: Convert truncate to XArray

2018-01-17 Thread Matthew Wilcox

From: Matthew Wilcox 

This is essentially xa_cmpxchg() with the locking handled above us,
and it doesn't have to handle replacing a NULL entry.

Signed-off-by: Matthew Wilcox 
---
 mm/truncate.c | 15 ++-
 1 file changed, 6 insertions(+), 9 deletions(-)

diff --git a/mm/truncate.c b/mm/truncate.c
index 69bb743dd7e5..70323c347298 100644
--- a/mm/truncate.c
+++ b/mm/truncate.c
@@ -33,15 +33,12 @@
 static inline void __clear_shadow_entry(struct address_space *mapping,
pgoff_t index, void *entry)
 {
-   struct radix_tree_node *node;
-   void **slot;
+   XA_STATE(xas, >pages, index);
 
-   if (!__radix_tree_lookup(>pages, index, , ))
+   xas_set_update(, workingset_update_node);
+   if (xas_load() != entry)
return;
-   if (*slot != entry)
-   return;
-   __radix_tree_replace(>pages, node, slot, NULL,
-workingset_update_node);
+   xas_store(, NULL);
mapping->nrexceptional--;
 }
 
@@ -746,10 +743,10 @@ int invalidate_inode_pages2_range(struct address_space 
*mapping,
index++;
}
/*
-* For DAX we invalidate page tables after invalidating radix tree.  We
+* For DAX we invalidate page tables after invalidating page cache.  We
 * could invalidate page tables while invalidating each entry however
 * that would be expensive. And doing range unmapping before doesn't
-* work as we have no cheap way to find whether radix tree entry didn't
+* work as we have no cheap way to find whether page cache entry didn't
 * get remapped later.
 */
if (dax_mapping(mapping)) {
-- 
2.15.1

[PATCH v6 32/99] mm: Convert truncate to XArray

2018-01-17 Thread Matthew Wilcox

From: Matthew Wilcox 

This is essentially xa_cmpxchg() with the locking handled above us,
and it doesn't have to handle replacing a NULL entry.

Signed-off-by: Matthew Wilcox 
---
 mm/truncate.c | 15 ++-
 1 file changed, 6 insertions(+), 9 deletions(-)

diff --git a/mm/truncate.c b/mm/truncate.c
index 69bb743dd7e5..70323c347298 100644
--- a/mm/truncate.c
+++ b/mm/truncate.c
@@ -33,15 +33,12 @@
 static inline void __clear_shadow_entry(struct address_space *mapping,
pgoff_t index, void *entry)
 {
-   struct radix_tree_node *node;
-   void **slot;
+   XA_STATE(xas, >pages, index);
 
-   if (!__radix_tree_lookup(>pages, index, , ))
+   xas_set_update(, workingset_update_node);
+   if (xas_load() != entry)
return;
-   if (*slot != entry)
-   return;
-   __radix_tree_replace(>pages, node, slot, NULL,
-workingset_update_node);
+   xas_store(, NULL);
mapping->nrexceptional--;
 }
 
@@ -746,10 +743,10 @@ int invalidate_inode_pages2_range(struct address_space 
*mapping,
index++;
}
/*
-* For DAX we invalidate page tables after invalidating radix tree.  We
+* For DAX we invalidate page tables after invalidating page cache.  We
 * could invalidate page tables while invalidating each entry however
 * that would be expensive. And doing range unmapping before doesn't
-* work as we have no cheap way to find whether radix tree entry didn't
+* work as we have no cheap way to find whether page cache entry didn't
 * get remapped later.
 */
if (dax_mapping(mapping)) {
-- 
2.15.1

[PATCH v6 33/99] mm: Convert add_to_swap_cache to XArray

2018-01-17 Thread Matthew Wilcox

From: Matthew Wilcox 

Combine __add_to_swap_cache and add_to_swap_cache into one function
since there is no more need to preload.

Signed-off-by: Matthew Wilcox 
---
 mm/swap_state.c | 93 ++---
 1 file changed, 29 insertions(+), 64 deletions(-)

diff --git a/mm/swap_state.c b/mm/swap_state.c
index 3f95e8fc4cb2..a57b5ad4c503 100644
--- a/mm/swap_state.c
+++ b/mm/swap_state.c
@@ -107,14 +107,15 @@ void show_swap_cache_info(void)
 }
 
 /*
- * __add_to_swap_cache resembles add_to_page_cache_locked on swapper_space,
+ * add_to_swap_cache resembles add_to_page_cache_locked on swapper_space,
  * but sets SwapCache flag and private instead of mapping and index.
  */
-int __add_to_swap_cache(struct page *page, swp_entry_t entry)
+int add_to_swap_cache(struct page *page, swp_entry_t entry, gfp_t gfp)
 {
-   int error, i, nr = hpage_nr_pages(page);
-   struct address_space *address_space;
+   struct address_space *address_space = swap_address_space(entry);
pgoff_t idx = swp_offset(entry);
+   XA_STATE(xas, _space->pages, idx);
+   unsigned long i, nr = 1UL << compound_order(page);
 
VM_BUG_ON_PAGE(!PageLocked(page), page);
VM_BUG_ON_PAGE(PageSwapCache(page), page);
@@ -123,50 +124,30 @@ int __add_to_swap_cache(struct page *page, swp_entry_t 
entry)
page_ref_add(page, nr);
SetPageSwapCache(page);
 
-   address_space = swap_address_space(entry);
-   xa_lock_irq(_space->pages);
-   for (i = 0; i < nr; i++) {
-   set_page_private(page + i, entry.val + i);
-   error = radix_tree_insert(_space->pages,
- idx + i, page + i);
-   if (unlikely(error))
-   break;
-   }
-   if (likely(!error)) {
+   do {
+   xas_lock_irq();
+   xas_create_range(, idx + nr - 1);
+   if (xas_error())
+   goto unlock;
+   for (i = 0; i < nr; i++) {
+   VM_BUG_ON_PAGE(xas.xa_index != idx + i, page);
+   set_page_private(page + i, entry.val + i);
+   xas_store(, page + i);
+   xas_next();
+   }
address_space->nrpages += nr;
__mod_node_page_state(page_pgdat(page), NR_FILE_PAGES, nr);
ADD_CACHE_INFO(add_total, nr);
-   } else {
-   /*
-* Only the context which have set SWAP_HAS_CACHE flag
-* would call add_to_swap_cache().
-* So add_to_swap_cache() doesn't returns -EEXIST.
-*/
-   VM_BUG_ON(error == -EEXIST);
-   set_page_private(page + i, 0UL);
-   while (i--) {
-   radix_tree_delete(_space->pages, idx + i);
-   set_page_private(page + i, 0UL);
-   }
-   ClearPageSwapCache(page);
-   page_ref_sub(page, nr);
-   }
-   xa_unlock_irq(_space->pages);
+unlock:
+   xas_unlock_irq();
+   } while (xas_nomem(, gfp));
 
-   return error;
-}
-
-
-int add_to_swap_cache(struct page *page, swp_entry_t entry, gfp_t gfp_mask)
-{
-   int error;
+   if (!xas_error())
+   return 0;
 
-   error = radix_tree_maybe_preload_order(gfp_mask, compound_order(page));
-   if (!error) {
-   error = __add_to_swap_cache(page, entry);
-   radix_tree_preload_end();
-   }
-   return error;
+   ClearPageSwapCache(page);
+   page_ref_sub(page, nr);
+   return xas_error();
 }
 
 /*
@@ -220,7 +201,7 @@ int add_to_swap(struct page *page)
goto fail;
 
/*
-* Radix-tree node allocations from PF_MEMALLOC contexts could
+* XArray node allocations from PF_MEMALLOC contexts could
 * completely exhaust the page allocator. __GFP_NOMEMALLOC
 * stops emergency reserves from being allocated.
 *
@@ -232,7 +213,6 @@ int add_to_swap(struct page *page)
 */
err = add_to_swap_cache(page, entry,
__GFP_HIGH|__GFP_NOMEMALLOC|__GFP_NOWARN);
-   /* -ENOMEM radix-tree allocation failure */
if (err)
/*
 * add_to_swap_cache() doesn't return -EEXIST, so we can safely
@@ -400,19 +380,11 @@ struct page *__read_swap_cache_async(swp_entry_t entry, 
gfp_t gfp_mask,
break;  /* Out of memory */
}
 
-   /*
-* call radix_tree_preload() while we can wait.
-*/
-   err = radix_tree_maybe_preload(gfp_mask & GFP_KERNEL);
-   if (err)
-   break;
-
/*
 * Swap entry may have been freed since our caller observed it.
 */

[PATCH v6 33/99] mm: Convert add_to_swap_cache to XArray

2018-01-17 Thread Matthew Wilcox

From: Matthew Wilcox 

Combine __add_to_swap_cache and add_to_swap_cache into one function
since there is no more need to preload.

Signed-off-by: Matthew Wilcox 
---
 mm/swap_state.c | 93 ++---
 1 file changed, 29 insertions(+), 64 deletions(-)

diff --git a/mm/swap_state.c b/mm/swap_state.c
index 3f95e8fc4cb2..a57b5ad4c503 100644
--- a/mm/swap_state.c
+++ b/mm/swap_state.c
@@ -107,14 +107,15 @@ void show_swap_cache_info(void)
 }
 
 /*
- * __add_to_swap_cache resembles add_to_page_cache_locked on swapper_space,
+ * add_to_swap_cache resembles add_to_page_cache_locked on swapper_space,
  * but sets SwapCache flag and private instead of mapping and index.
  */
-int __add_to_swap_cache(struct page *page, swp_entry_t entry)
+int add_to_swap_cache(struct page *page, swp_entry_t entry, gfp_t gfp)
 {
-   int error, i, nr = hpage_nr_pages(page);
-   struct address_space *address_space;
+   struct address_space *address_space = swap_address_space(entry);
pgoff_t idx = swp_offset(entry);
+   XA_STATE(xas, _space->pages, idx);
+   unsigned long i, nr = 1UL << compound_order(page);
 
VM_BUG_ON_PAGE(!PageLocked(page), page);
VM_BUG_ON_PAGE(PageSwapCache(page), page);
@@ -123,50 +124,30 @@ int __add_to_swap_cache(struct page *page, swp_entry_t 
entry)
page_ref_add(page, nr);
SetPageSwapCache(page);
 
-   address_space = swap_address_space(entry);
-   xa_lock_irq(_space->pages);
-   for (i = 0; i < nr; i++) {
-   set_page_private(page + i, entry.val + i);
-   error = radix_tree_insert(_space->pages,
- idx + i, page + i);
-   if (unlikely(error))
-   break;
-   }
-   if (likely(!error)) {
+   do {
+   xas_lock_irq();
+   xas_create_range(, idx + nr - 1);
+   if (xas_error())
+   goto unlock;
+   for (i = 0; i < nr; i++) {
+   VM_BUG_ON_PAGE(xas.xa_index != idx + i, page);
+   set_page_private(page + i, entry.val + i);
+   xas_store(, page + i);
+   xas_next();
+   }
address_space->nrpages += nr;
__mod_node_page_state(page_pgdat(page), NR_FILE_PAGES, nr);
ADD_CACHE_INFO(add_total, nr);
-   } else {
-   /*
-* Only the context which have set SWAP_HAS_CACHE flag
-* would call add_to_swap_cache().
-* So add_to_swap_cache() doesn't returns -EEXIST.
-*/
-   VM_BUG_ON(error == -EEXIST);
-   set_page_private(page + i, 0UL);
-   while (i--) {
-   radix_tree_delete(_space->pages, idx + i);
-   set_page_private(page + i, 0UL);
-   }
-   ClearPageSwapCache(page);
-   page_ref_sub(page, nr);
-   }
-   xa_unlock_irq(_space->pages);
+unlock:
+   xas_unlock_irq();
+   } while (xas_nomem(, gfp));
 
-   return error;
-}
-
-
-int add_to_swap_cache(struct page *page, swp_entry_t entry, gfp_t gfp_mask)
-{
-   int error;
+   if (!xas_error())
+   return 0;
 
-   error = radix_tree_maybe_preload_order(gfp_mask, compound_order(page));
-   if (!error) {
-   error = __add_to_swap_cache(page, entry);
-   radix_tree_preload_end();
-   }
-   return error;
+   ClearPageSwapCache(page);
+   page_ref_sub(page, nr);
+   return xas_error();
 }
 
 /*
@@ -220,7 +201,7 @@ int add_to_swap(struct page *page)
goto fail;
 
/*
-* Radix-tree node allocations from PF_MEMALLOC contexts could
+* XArray node allocations from PF_MEMALLOC contexts could
 * completely exhaust the page allocator. __GFP_NOMEMALLOC
 * stops emergency reserves from being allocated.
 *
@@ -232,7 +213,6 @@ int add_to_swap(struct page *page)
 */
err = add_to_swap_cache(page, entry,
__GFP_HIGH|__GFP_NOMEMALLOC|__GFP_NOWARN);
-   /* -ENOMEM radix-tree allocation failure */
if (err)
/*
 * add_to_swap_cache() doesn't return -EEXIST, so we can safely
@@ -400,19 +380,11 @@ struct page *__read_swap_cache_async(swp_entry_t entry, 
gfp_t gfp_mask,
break;  /* Out of memory */
}
 
-   /*
-* call radix_tree_preload() while we can wait.
-*/
-   err = radix_tree_maybe_preload(gfp_mask & GFP_KERNEL);
-   if (err)
-   break;
-
/*
 * Swap entry may have been freed since our caller observed it.
 */
err = swapcache_prepare(entry);

[PATCH v6 37/99] mm: Convert huge_memory to XArray

2018-01-17 Thread Matthew Wilcox

From: Matthew Wilcox 

Quite a straightforward conversion.

Signed-off-by: Matthew Wilcox 
---
 mm/huge_memory.c | 19 ---
 1 file changed, 8 insertions(+), 11 deletions(-)

diff --git a/mm/huge_memory.c b/mm/huge_memory.c
index f71dd3e7d8cd..5c275295bbd3 100644
--- a/mm/huge_memory.c
+++ b/mm/huge_memory.c
@@ -2379,7 +2379,7 @@ static void __split_huge_page_tail(struct page *head, int 
tail,
if (PageAnon(head) && !PageSwapCache(head)) {
page_ref_inc(page_tail);
} else {
-   /* Additional pin to radix tree */
+   /* Additional pin to page cache */
page_ref_add(page_tail, 2);
}
 
@@ -2450,13 +2450,13 @@ static void __split_huge_page(struct page *page, struct 
list_head *list,
ClearPageCompound(head);
/* See comment in __split_huge_page_tail() */
if (PageAnon(head)) {
-   /* Additional pin to radix tree of swap cache */
+   /* Additional pin to swap cache */
if (PageSwapCache(head))
page_ref_add(head, 2);
else
page_ref_inc(head);
} else {
-   /* Additional pin to radix tree */
+   /* Additional pin to page cache */
page_ref_add(head, 2);
xa_unlock(>mapping->pages);
}
@@ -2568,7 +2568,7 @@ bool can_split_huge_page(struct page *page, int 
*pextra_pins)
 {
int extra_pins;
 
-   /* Additional pins from radix tree */
+   /* Additional pins from page cache */
if (PageAnon(page))
extra_pins = PageSwapCache(page) ? HPAGE_PMD_NR : 0;
else
@@ -2664,17 +2664,14 @@ int split_huge_page_to_list(struct page *page, struct 
list_head *list)
spin_lock_irqsave(zone_lru_lock(page_zone(head)), flags);
 
if (mapping) {
-   void **pslot;
+   XA_STATE(xas, >pages, page_index(head));
 
-   xa_lock(>pages);
-   pslot = radix_tree_lookup_slot(>pages,
-   page_index(head));
/*
-* Check if the head page is present in radix tree.
+* Check if the head page is present in page cache.
 * We assume all tail are present too, if head is there.
 */
-   if (radix_tree_deref_slot_protected(pslot,
-   >pages.xa_lock) != head)
+   xa_lock(>pages);
+   if (xas_load() != head)
goto fail;
}
 
-- 
2.15.1

[PATCH v6 37/99] mm: Convert huge_memory to XArray

2018-01-17 Thread Matthew Wilcox

From: Matthew Wilcox 

Quite a straightforward conversion.

Signed-off-by: Matthew Wilcox 
---
 mm/huge_memory.c | 19 ---
 1 file changed, 8 insertions(+), 11 deletions(-)

diff --git a/mm/huge_memory.c b/mm/huge_memory.c
index f71dd3e7d8cd..5c275295bbd3 100644
--- a/mm/huge_memory.c
+++ b/mm/huge_memory.c
@@ -2379,7 +2379,7 @@ static void __split_huge_page_tail(struct page *head, int 
tail,
if (PageAnon(head) && !PageSwapCache(head)) {
page_ref_inc(page_tail);
} else {
-   /* Additional pin to radix tree */
+   /* Additional pin to page cache */
page_ref_add(page_tail, 2);
}
 
@@ -2450,13 +2450,13 @@ static void __split_huge_page(struct page *page, struct 
list_head *list,
ClearPageCompound(head);
/* See comment in __split_huge_page_tail() */
if (PageAnon(head)) {
-   /* Additional pin to radix tree of swap cache */
+   /* Additional pin to swap cache */
if (PageSwapCache(head))
page_ref_add(head, 2);
else
page_ref_inc(head);
} else {
-   /* Additional pin to radix tree */
+   /* Additional pin to page cache */
page_ref_add(head, 2);
xa_unlock(>mapping->pages);
}
@@ -2568,7 +2568,7 @@ bool can_split_huge_page(struct page *page, int 
*pextra_pins)
 {
int extra_pins;
 
-   /* Additional pins from radix tree */
+   /* Additional pins from page cache */
if (PageAnon(page))
extra_pins = PageSwapCache(page) ? HPAGE_PMD_NR : 0;
else
@@ -2664,17 +2664,14 @@ int split_huge_page_to_list(struct page *page, struct 
list_head *list)
spin_lock_irqsave(zone_lru_lock(page_zone(head)), flags);
 
if (mapping) {
-   void **pslot;
+   XA_STATE(xas, >pages, page_index(head));
 
-   xa_lock(>pages);
-   pslot = radix_tree_lookup_slot(>pages,
-   page_index(head));
/*
-* Check if the head page is present in radix tree.
+* Check if the head page is present in page cache.
 * We assume all tail are present too, if head is there.
 */
-   if (radix_tree_deref_slot_protected(pslot,
-   >pages.xa_lock) != head)
+   xa_lock(>pages);
+   if (xas_load() != head)
goto fail;
}
 
-- 
2.15.1

[RFC PATCH v3 04/13] bootsplash: Add corner positioning

2018-01-17 Thread Max Staudt

This allows showing multiple logos, each in its own position,
relative to the eight screen corners.

Signed-off-by: Max Staudt 
---
 drivers/video/fbdev/core/bootsplash_render.c | 136 ++-
 include/uapi/linux/bootsplash_file.h |  45 -
 2 files changed, 178 insertions(+), 3 deletions(-)

diff --git a/drivers/video/fbdev/core/bootsplash_render.c 
b/drivers/video/fbdev/core/bootsplash_render.c
index 8c09c306ff67..07e3a4eab811 100644
--- a/drivers/video/fbdev/core/bootsplash_render.c
+++ b/drivers/video/fbdev/core/bootsplash_render.c
@@ -155,6 +155,7 @@ void bootsplash_do_render_pictures(struct fb_info *info,
for (i = 0; i < fp->header->num_pics; i++) {
struct splash_blob_priv *bp;
struct splash_pic_priv *pp = >pics[i];
+   const struct splash_pic_header *ph = pp->pic_header;
long dst_xoff, dst_yoff;
 
if (pp->blobs_loaded < 1)
@@ -165,8 +166,139 @@ void bootsplash_do_render_pictures(struct fb_info *info,
if (!bp || bp->blob_header->type != 0)
continue;
 
-   dst_xoff = (info->var.xres - pp->pic_header->width) / 2;
-   dst_yoff = (info->var.yres - pp->pic_header->height) / 2;
+   switch (ph->position) {
+   case SPLASH_POS_FLAG_CORNER | SPLASH_CORNER_TOP_LEFT:
+   dst_xoff = 0;
+   dst_yoff = 0;
+
+   dst_xoff += ph->position_offset;
+   dst_yoff += ph->position_offset;
+   break;
+   case SPLASH_POS_FLAG_CORNER | SPLASH_CORNER_TOP:
+   dst_xoff = info->var.xres - pp->pic_header->width;
+   dst_xoff /= 2;
+   dst_yoff = 0;
+
+   dst_yoff += ph->position_offset;
+   break;
+   case SPLASH_POS_FLAG_CORNER | SPLASH_CORNER_TOP_RIGHT:
+   dst_xoff = info->var.xres - pp->pic_header->width;
+   dst_yoff = 0;
+
+   dst_xoff -= ph->position_offset;
+   dst_yoff += ph->position_offset;
+   break;
+   case SPLASH_POS_FLAG_CORNER | SPLASH_CORNER_RIGHT:
+   dst_xoff = info->var.xres - pp->pic_header->width;
+   dst_yoff = info->var.yres - pp->pic_header->height;
+   dst_yoff /= 2;
+
+   dst_xoff -= ph->position_offset;
+   break;
+   case SPLASH_POS_FLAG_CORNER | SPLASH_CORNER_BOTTOM_RIGHT:
+   dst_xoff = info->var.xres - pp->pic_header->width;
+   dst_yoff = info->var.yres - pp->pic_header->height;
+
+   dst_xoff -= ph->position_offset;
+   dst_yoff -= ph->position_offset;
+   break;
+   case SPLASH_POS_FLAG_CORNER | SPLASH_CORNER_BOTTOM:
+   dst_xoff = info->var.xres - pp->pic_header->width;
+   dst_xoff /= 2;
+   dst_yoff = info->var.yres - pp->pic_header->height;
+
+   dst_yoff -= ph->position_offset;
+   break;
+   case SPLASH_POS_FLAG_CORNER | SPLASH_CORNER_BOTTOM_LEFT:
+   dst_xoff = 0 + ph->position_offset;
+   dst_yoff = info->var.yres - pp->pic_header->height
+ - ph->position_offset;
+   break;
+   case SPLASH_POS_FLAG_CORNER | SPLASH_CORNER_LEFT:
+   dst_xoff = 0;
+   dst_yoff = info->var.yres - pp->pic_header->height;
+   dst_yoff /= 2;
+
+   dst_xoff += ph->position_offset;
+   break;
+
+   case SPLASH_CORNER_TOP_LEFT:
+   dst_xoff = info->var.xres - pp->pic_header->width;
+   dst_xoff /= 2;
+   dst_yoff = info->var.yres - pp->pic_header->height;
+   dst_yoff /= 2;
+
+   dst_xoff -= ph->position_offset;
+   dst_yoff -= ph->position_offset;
+   break;
+   case SPLASH_CORNER_TOP:
+   dst_xoff = info->var.xres - pp->pic_header->width;
+   dst_xoff /= 2;
+   dst_yoff = info->var.yres - pp->pic_header->height;
+   dst_yoff /= 2;
+
+   dst_yoff -= ph->position_offset;
+   break;
+   case SPLASH_CORNER_TOP_RIGHT:
+   dst_xoff = info->var.xres - pp->pic_header->width;
+   dst_xoff /= 2;
+   dst_yoff = info->var.yres - pp->pic_header->height;
+

[RFC PATCH v3 04/13] bootsplash: Add corner positioning

2018-01-17 Thread Max Staudt

This allows showing multiple logos, each in its own position,
relative to the eight screen corners.

Signed-off-by: Max Staudt 
---
 drivers/video/fbdev/core/bootsplash_render.c | 136 ++-
 include/uapi/linux/bootsplash_file.h |  45 -
 2 files changed, 178 insertions(+), 3 deletions(-)

diff --git a/drivers/video/fbdev/core/bootsplash_render.c 
b/drivers/video/fbdev/core/bootsplash_render.c
index 8c09c306ff67..07e3a4eab811 100644
--- a/drivers/video/fbdev/core/bootsplash_render.c
+++ b/drivers/video/fbdev/core/bootsplash_render.c
@@ -155,6 +155,7 @@ void bootsplash_do_render_pictures(struct fb_info *info,
for (i = 0; i < fp->header->num_pics; i++) {
struct splash_blob_priv *bp;
struct splash_pic_priv *pp = >pics[i];
+   const struct splash_pic_header *ph = pp->pic_header;
long dst_xoff, dst_yoff;
 
if (pp->blobs_loaded < 1)
@@ -165,8 +166,139 @@ void bootsplash_do_render_pictures(struct fb_info *info,
if (!bp || bp->blob_header->type != 0)
continue;
 
-   dst_xoff = (info->var.xres - pp->pic_header->width) / 2;
-   dst_yoff = (info->var.yres - pp->pic_header->height) / 2;
+   switch (ph->position) {
+   case SPLASH_POS_FLAG_CORNER | SPLASH_CORNER_TOP_LEFT:
+   dst_xoff = 0;
+   dst_yoff = 0;
+
+   dst_xoff += ph->position_offset;
+   dst_yoff += ph->position_offset;
+   break;
+   case SPLASH_POS_FLAG_CORNER | SPLASH_CORNER_TOP:
+   dst_xoff = info->var.xres - pp->pic_header->width;
+   dst_xoff /= 2;
+   dst_yoff = 0;
+
+   dst_yoff += ph->position_offset;
+   break;
+   case SPLASH_POS_FLAG_CORNER | SPLASH_CORNER_TOP_RIGHT:
+   dst_xoff = info->var.xres - pp->pic_header->width;
+   dst_yoff = 0;
+
+   dst_xoff -= ph->position_offset;
+   dst_yoff += ph->position_offset;
+   break;
+   case SPLASH_POS_FLAG_CORNER | SPLASH_CORNER_RIGHT:
+   dst_xoff = info->var.xres - pp->pic_header->width;
+   dst_yoff = info->var.yres - pp->pic_header->height;
+   dst_yoff /= 2;
+
+   dst_xoff -= ph->position_offset;
+   break;
+   case SPLASH_POS_FLAG_CORNER | SPLASH_CORNER_BOTTOM_RIGHT:
+   dst_xoff = info->var.xres - pp->pic_header->width;
+   dst_yoff = info->var.yres - pp->pic_header->height;
+
+   dst_xoff -= ph->position_offset;
+   dst_yoff -= ph->position_offset;
+   break;
+   case SPLASH_POS_FLAG_CORNER | SPLASH_CORNER_BOTTOM:
+   dst_xoff = info->var.xres - pp->pic_header->width;
+   dst_xoff /= 2;
+   dst_yoff = info->var.yres - pp->pic_header->height;
+
+   dst_yoff -= ph->position_offset;
+   break;
+   case SPLASH_POS_FLAG_CORNER | SPLASH_CORNER_BOTTOM_LEFT:
+   dst_xoff = 0 + ph->position_offset;
+   dst_yoff = info->var.yres - pp->pic_header->height
+ - ph->position_offset;
+   break;
+   case SPLASH_POS_FLAG_CORNER | SPLASH_CORNER_LEFT:
+   dst_xoff = 0;
+   dst_yoff = info->var.yres - pp->pic_header->height;
+   dst_yoff /= 2;
+
+   dst_xoff += ph->position_offset;
+   break;
+
+   case SPLASH_CORNER_TOP_LEFT:
+   dst_xoff = info->var.xres - pp->pic_header->width;
+   dst_xoff /= 2;
+   dst_yoff = info->var.yres - pp->pic_header->height;
+   dst_yoff /= 2;
+
+   dst_xoff -= ph->position_offset;
+   dst_yoff -= ph->position_offset;
+   break;
+   case SPLASH_CORNER_TOP:
+   dst_xoff = info->var.xres - pp->pic_header->width;
+   dst_xoff /= 2;
+   dst_yoff = info->var.yres - pp->pic_header->height;
+   dst_yoff /= 2;
+
+   dst_yoff -= ph->position_offset;
+   break;
+   case SPLASH_CORNER_TOP_RIGHT:
+   dst_xoff = info->var.xres - pp->pic_header->width;
+   dst_xoff /= 2;
+   dst_yoff = info->var.yres - pp->pic_header->height;
+   dst_yoff /= 2;
+
+

[RFC PATCH v3 02/13] bootsplash: Add file reading and picture rendering

2018-01-17 Thread Max Staudt

Load logo(s) from a file and render them in the center of the screen.

This removes the "black screen" functionality, which can now be emulated
by providing a splash file with no pictures and a black background.

To enable the bootsplash at boot, provide a theme file *in the initramfs*
and tell the kernel to use it as follows:

  bootsplash.bootfile=mypath/myfile

Since the splash code is using request_firmware() to load the file,
the path has to be beneath /lib/firmware.

Signed-off-by: Max Staudt 
---
 MAINTAINERS|   1 +
 drivers/video/fbdev/core/Makefile  |   2 +-
 drivers/video/fbdev/core/bootsplash.c  |  36 +++-
 drivers/video/fbdev/core/bootsplash_internal.h |  45 -
 drivers/video/fbdev/core/bootsplash_load.c | 225 +
 drivers/video/fbdev/core/bootsplash_render.c   | 103 ++-
 include/uapi/linux/bootsplash_file.h   | 118 +
 7 files changed, 522 insertions(+), 8 deletions(-)
 create mode 100644 drivers/video/fbdev/core/bootsplash_load.c
 create mode 100644 include/uapi/linux/bootsplash_file.h

diff --git a/MAINTAINERS b/MAINTAINERS
index b5633b56391e..5c237445761e 100644
--- a/MAINTAINERS
+++ b/MAINTAINERS
@@ -2712,6 +2712,7 @@ S:Maintained
 F: drivers/video/fbdev/core/bootsplash*.*
 F: drivers/video/fbdev/core/dummycon.c
 F: include/linux/bootsplash.h
+F: include/uapi/linux/bootsplash_file.h
 
 BPF (Safe dynamic programs and tools)
 M: Alexei Starovoitov 
diff --git a/drivers/video/fbdev/core/Makefile 
b/drivers/video/fbdev/core/Makefile
index 66895321928e..6a8d1bab8a01 100644
--- a/drivers/video/fbdev/core/Makefile
+++ b/drivers/video/fbdev/core/Makefile
@@ -31,4 +31,4 @@ obj-$(CONFIG_FB_SVGALIB)   += svgalib.o
 obj-$(CONFIG_FB_DDC)   += fb_ddc.o
 
 obj-$(CONFIG_BOOTSPLASH)   += bootsplash.o bootsplash_render.o \
-  dummyblit.o
+  bootsplash_load.o dummyblit.o
diff --git a/drivers/video/fbdev/core/bootsplash.c 
b/drivers/video/fbdev/core/bootsplash.c
index e449755af268..843c5400fefc 100644
--- a/drivers/video/fbdev/core/bootsplash.c
+++ b/drivers/video/fbdev/core/bootsplash.c
@@ -32,6 +32,7 @@
 #include 
 
 #include "bootsplash_internal.h"
+#include "uapi/linux/bootsplash_file.h"
 
 
 /*
@@ -102,10 +103,17 @@ static bool is_fb_compatible(const struct fb_info *info)
  */
 void bootsplash_render_full(struct fb_info *info)
 {
+   mutex_lock(_state.data_lock);
+
if (!is_fb_compatible(info))
-   return;
+   goto out;
+
+   bootsplash_do_render_background(info, splash_state.file);
+
+   bootsplash_do_render_pictures(info, splash_state.file);
 
-   bootsplash_do_render_background(info);
+out:
+   mutex_unlock(_state.data_lock);
 }
 
 
@@ -116,6 +124,7 @@ bool bootsplash_would_render_now(void)
 {
return !oops_in_progress
&& !console_blanked
+   && splash_state.file
&& bootsplash_is_enabled();
 }
 
@@ -252,6 +261,7 @@ static struct platform_driver splash_driver = {
 void bootsplash_init(void)
 {
int ret;
+   struct splash_file_priv *fp;
 
/* Initialized already? */
if (splash_state.splash_device)
@@ -280,8 +290,26 @@ void bootsplash_init(void)
}
 
 
+   mutex_init(_state.data_lock);
+   set_bit(0, _state.enabled);
+
INIT_WORK(_state.work_redraw_vc, splash_callback_redraw_vc);
 
+
+   if (!splash_state.bootfile || !strlen(splash_state.bootfile))
+   return;
+
+   fp = bootsplash_load_firmware(_state.splash_device->dev,
+ splash_state.bootfile);
+
+   if (!fp)
+   goto err;
+
+   mutex_lock(_state.data_lock);
+   splash_state.splash_fb = NULL;
+   splash_state.file = fp;
+   mutex_unlock(_state.data_lock);
+
return;
 
 err_device:
@@ -292,3 +320,7 @@ void bootsplash_init(void)
 err:
pr_err("Failed to initialize.\n");
 }
+
+
+module_param_named(bootfile, splash_state.bootfile, charp, 0444);
+MODULE_PARM_DESC(bootfile, "Bootsplash file to load on boot");
diff --git a/drivers/video/fbdev/core/bootsplash_internal.h 
b/drivers/video/fbdev/core/bootsplash_internal.h
index b11da5cb90bf..71e2a27ac0b8 100644
--- a/drivers/video/fbdev/core/bootsplash_internal.h
+++ b/drivers/video/fbdev/core/bootsplash_internal.h
@@ -15,15 +15,43 @@
 
 #include 
 #include 
+#include 
 #include 
 #include 
 #include 
 
+#include "uapi/linux/bootsplash_file.h"
+
 
 /*
  * Runtime types
  */
+struct splash_blob_priv {
+   struct splash_blob_header *blob_header;
+   const void *data;
+};
+
+
+struct splash_pic_priv {
+   const struct splash_pic_header *pic_header;
+
+   struct splash_blob_priv *blobs;
+   u16 blobs_loaded;
+};
+
+
+struct splash_file_priv {
+   const struct firmware *fw;
+   const struct

[RFC PATCH v3 02/13] bootsplash: Add file reading and picture rendering

2018-01-17 Thread Max Staudt

Load logo(s) from a file and render them in the center of the screen.

This removes the "black screen" functionality, which can now be emulated
by providing a splash file with no pictures and a black background.

To enable the bootsplash at boot, provide a theme file *in the initramfs*
and tell the kernel to use it as follows:

  bootsplash.bootfile=mypath/myfile

Since the splash code is using request_firmware() to load the file,
the path has to be beneath /lib/firmware.

Signed-off-by: Max Staudt 
---
 MAINTAINERS|   1 +
 drivers/video/fbdev/core/Makefile  |   2 +-
 drivers/video/fbdev/core/bootsplash.c  |  36 +++-
 drivers/video/fbdev/core/bootsplash_internal.h |  45 -
 drivers/video/fbdev/core/bootsplash_load.c | 225 +
 drivers/video/fbdev/core/bootsplash_render.c   | 103 ++-
 include/uapi/linux/bootsplash_file.h   | 118 +
 7 files changed, 522 insertions(+), 8 deletions(-)
 create mode 100644 drivers/video/fbdev/core/bootsplash_load.c
 create mode 100644 include/uapi/linux/bootsplash_file.h

diff --git a/MAINTAINERS b/MAINTAINERS
index b5633b56391e..5c237445761e 100644
--- a/MAINTAINERS
+++ b/MAINTAINERS
@@ -2712,6 +2712,7 @@ S:Maintained
 F: drivers/video/fbdev/core/bootsplash*.*
 F: drivers/video/fbdev/core/dummycon.c
 F: include/linux/bootsplash.h
+F: include/uapi/linux/bootsplash_file.h
 
 BPF (Safe dynamic programs and tools)
 M: Alexei Starovoitov 
diff --git a/drivers/video/fbdev/core/Makefile 
b/drivers/video/fbdev/core/Makefile
index 66895321928e..6a8d1bab8a01 100644
--- a/drivers/video/fbdev/core/Makefile
+++ b/drivers/video/fbdev/core/Makefile
@@ -31,4 +31,4 @@ obj-$(CONFIG_FB_SVGALIB)   += svgalib.o
 obj-$(CONFIG_FB_DDC)   += fb_ddc.o
 
 obj-$(CONFIG_BOOTSPLASH)   += bootsplash.o bootsplash_render.o \
-  dummyblit.o
+  bootsplash_load.o dummyblit.o
diff --git a/drivers/video/fbdev/core/bootsplash.c 
b/drivers/video/fbdev/core/bootsplash.c
index e449755af268..843c5400fefc 100644
--- a/drivers/video/fbdev/core/bootsplash.c
+++ b/drivers/video/fbdev/core/bootsplash.c
@@ -32,6 +32,7 @@
 #include 
 
 #include "bootsplash_internal.h"
+#include "uapi/linux/bootsplash_file.h"
 
 
 /*
@@ -102,10 +103,17 @@ static bool is_fb_compatible(const struct fb_info *info)
  */
 void bootsplash_render_full(struct fb_info *info)
 {
+   mutex_lock(_state.data_lock);
+
if (!is_fb_compatible(info))
-   return;
+   goto out;
+
+   bootsplash_do_render_background(info, splash_state.file);
+
+   bootsplash_do_render_pictures(info, splash_state.file);
 
-   bootsplash_do_render_background(info);
+out:
+   mutex_unlock(_state.data_lock);
 }
 
 
@@ -116,6 +124,7 @@ bool bootsplash_would_render_now(void)
 {
return !oops_in_progress
&& !console_blanked
+   && splash_state.file
&& bootsplash_is_enabled();
 }
 
@@ -252,6 +261,7 @@ static struct platform_driver splash_driver = {
 void bootsplash_init(void)
 {
int ret;
+   struct splash_file_priv *fp;
 
/* Initialized already? */
if (splash_state.splash_device)
@@ -280,8 +290,26 @@ void bootsplash_init(void)
}
 
 
+   mutex_init(_state.data_lock);
+   set_bit(0, _state.enabled);
+
INIT_WORK(_state.work_redraw_vc, splash_callback_redraw_vc);
 
+
+   if (!splash_state.bootfile || !strlen(splash_state.bootfile))
+   return;
+
+   fp = bootsplash_load_firmware(_state.splash_device->dev,
+ splash_state.bootfile);
+
+   if (!fp)
+   goto err;
+
+   mutex_lock(_state.data_lock);
+   splash_state.splash_fb = NULL;
+   splash_state.file = fp;
+   mutex_unlock(_state.data_lock);
+
return;
 
 err_device:
@@ -292,3 +320,7 @@ void bootsplash_init(void)
 err:
pr_err("Failed to initialize.\n");
 }
+
+
+module_param_named(bootfile, splash_state.bootfile, charp, 0444);
+MODULE_PARM_DESC(bootfile, "Bootsplash file to load on boot");
diff --git a/drivers/video/fbdev/core/bootsplash_internal.h 
b/drivers/video/fbdev/core/bootsplash_internal.h
index b11da5cb90bf..71e2a27ac0b8 100644
--- a/drivers/video/fbdev/core/bootsplash_internal.h
+++ b/drivers/video/fbdev/core/bootsplash_internal.h
@@ -15,15 +15,43 @@
 
 #include 
 #include 
+#include 
 #include 
 #include 
 #include 
 
+#include "uapi/linux/bootsplash_file.h"
+
 
 /*
  * Runtime types
  */
+struct splash_blob_priv {
+   struct splash_blob_header *blob_header;
+   const void *data;
+};
+
+
+struct splash_pic_priv {
+   const struct splash_pic_header *pic_header;
+
+   struct splash_blob_priv *blobs;
+   u16 blobs_loaded;
+};
+
+
+struct splash_file_priv {
+   const struct firmware *fw;
+   const struct splash_file_header *header;
+
+

[RFC PATCH v3 07/13] vt: Add keyboard hook to disable bootsplash

2018-01-17 Thread Max Staudt

Let's disable the splash if the user presses ESC or F1-F12 on a VT.

The F1-F12 check is to disable the splash on VT switches.

Signed-off-by: Max Staudt 
---
 drivers/tty/vt/keyboard.c | 24 
 1 file changed, 24 insertions(+)

diff --git a/drivers/tty/vt/keyboard.c b/drivers/tty/vt/keyboard.c
index f4166263bb3a..a248429194bb 100644
--- a/drivers/tty/vt/keyboard.c
+++ b/drivers/tty/vt/keyboard.c
@@ -47,6 +47,8 @@
 
 #include 
 
+#include 
+
 extern void ctrl_alt_del(void);
 
 /*
@@ -1353,6 +1355,28 @@ static void kbd_keycode(unsigned int keycode, int down, 
int hw_raw)
}
 #endif
 
+   /* Trap keys when bootsplash is shown */
+   if (bootsplash_would_render_now()) {
+   /* Deactivate bootsplash on ESC or Alt+Fxx VT switch */
+   if (keycode >= KEY_F1 && keycode <= KEY_F12) {
+   bootsplash_disable();
+
+   /*
+* No return here since we want to actually
+* perform the VT switch.
+*/
+   } else {
+   if (keycode == KEY_ESC)
+   bootsplash_disable();
+
+   /*
+* Just drop any other keys.
+* Their effect would be hidden by the splash.
+*/
+   return;
+   }
+   }
+
if (kbd->kbdmode == VC_MEDIUMRAW) {
/*
 * This is extended medium raw mode, with keys above 127
-- 
2.12.3

[RFC PATCH v3 07/13] vt: Add keyboard hook to disable bootsplash

2018-01-17 Thread Max Staudt

Let's disable the splash if the user presses ESC or F1-F12 on a VT.

The F1-F12 check is to disable the splash on VT switches.

Signed-off-by: Max Staudt 
---
 drivers/tty/vt/keyboard.c | 24 
 1 file changed, 24 insertions(+)

diff --git a/drivers/tty/vt/keyboard.c b/drivers/tty/vt/keyboard.c
index f4166263bb3a..a248429194bb 100644
--- a/drivers/tty/vt/keyboard.c
+++ b/drivers/tty/vt/keyboard.c
@@ -47,6 +47,8 @@
 
 #include 
 
+#include 
+
 extern void ctrl_alt_del(void);
 
 /*
@@ -1353,6 +1355,28 @@ static void kbd_keycode(unsigned int keycode, int down, 
int hw_raw)
}
 #endif
 
+   /* Trap keys when bootsplash is shown */
+   if (bootsplash_would_render_now()) {
+   /* Deactivate bootsplash on ESC or Alt+Fxx VT switch */
+   if (keycode >= KEY_F1 && keycode <= KEY_F12) {
+   bootsplash_disable();
+
+   /*
+* No return here since we want to actually
+* perform the VT switch.
+*/
+   } else {
+   if (keycode == KEY_ESC)
+   bootsplash_disable();
+
+   /*
+* Just drop any other keys.
+* Their effect would be hidden by the splash.
+*/
+   return;
+   }
+   }
+
if (kbd->kbdmode == VC_MEDIUMRAW) {
/*
 * This is extended medium raw mode, with keys above 127
-- 
2.12.3

[RFC PATCH v3 09/13] fbcon: Disable bootsplash on oops

2018-01-17 Thread Max Staudt

Signed-off-by: Max Staudt 
Reviewed-by: Oliver Neukum 
---
 drivers/video/fbdev/core/fbcon.c | 10 ++
 1 file changed, 10 insertions(+)

diff --git a/drivers/video/fbdev/core/fbcon.c b/drivers/video/fbdev/core/fbcon.c
index 9a39a6fcfe98..8a9c67e1c5d8 100644
--- a/drivers/video/fbdev/core/fbcon.c
+++ b/drivers/video/fbdev/core/fbcon.c
@@ -1343,6 +1343,16 @@ static void fbcon_cursor(struct vc_data *vc, int mode)
int y;
int c = scr_readw((u16 *) vc->vc_pos);
 
+   /*
+* Disable the splash here so we don't have to hook into
+* vt_console_print() in drivers/tty/vt/vt.c
+*
+* We'd disable the splash just before the call to
+* hide_cursor() anyway, so this spot is just fine.
+*/
+   if (oops_in_progress)
+   bootsplash_disable();
+
ops->cur_blink_jiffies = msecs_to_jiffies(vc->vc_cur_blink_ms);
 
if (fbcon_is_inactive(vc, info) || vc->vc_deccm != 1)
-- 
2.12.3

[RFC PATCH v3 09/13] fbcon: Disable bootsplash on oops

2018-01-17 Thread Max Staudt

Signed-off-by: Max Staudt 
Reviewed-by: Oliver Neukum 
---
 drivers/video/fbdev/core/fbcon.c | 10 ++
 1 file changed, 10 insertions(+)

diff --git a/drivers/video/fbdev/core/fbcon.c b/drivers/video/fbdev/core/fbcon.c
index 9a39a6fcfe98..8a9c67e1c5d8 100644
--- a/drivers/video/fbdev/core/fbcon.c
+++ b/drivers/video/fbdev/core/fbcon.c
@@ -1343,6 +1343,16 @@ static void fbcon_cursor(struct vc_data *vc, int mode)
int y;
int c = scr_readw((u16 *) vc->vc_pos);
 
+   /*
+* Disable the splash here so we don't have to hook into
+* vt_console_print() in drivers/tty/vt/vt.c
+*
+* We'd disable the splash just before the call to
+* hide_cursor() anyway, so this spot is just fine.
+*/
+   if (oops_in_progress)
+   bootsplash_disable();
+
ops->cur_blink_jiffies = msecs_to_jiffies(vc->vc_cur_blink_ms);
 
if (fbcon_is_inactive(vc, info) || vc->vc_deccm != 1)
-- 
2.12.3

[RFC PATCH v3 01/13] bootsplash: Initial implementation showing black screen

2018-01-17 Thread Max Staudt

This is the initial prototype for a lean Linux kernel bootsplash.

It works by replacing fbcon's FB manipulation routines (such as
bitblit, tileblit) with dummy functions, effectively disabling text
output, and drawing the splash directly onto the FB device.

There is a userland API via sysfs, to show/hide the splash on request
by dracut, systemd, or other init systems.

As of this commit, the code will show a black screen rather than a
logo, and only if manually enabled via sysfs by writing:

  echo 1 > /sys/devices/platform/bootsplash.0/enabled

The reasons for implementing a bootsplash in kernel space are:

 - Quieting things more and nicer than with the quiet boot option:
   Currently the 'quiet' boot option does not remove the blinking
   cursor and errors are still printed. There are use cases where this
   is not desirable (such as embedded and desktop systems, digital
   signage, etc.) and a vendor logo is preferable.

 - Showing graphics, and never text, when the GUI crashes:
   This is an extension of the above use case, where recovery is meant
   to happen as invisibly to the user as possible. A system integrator
   needs the flexibility to hide "scary text" from users in all cases
   other than a panic.
   This is especially desirable in embedded systems such as digital
   signage.

 - Racy VT API:
   Userspace bootsplashes and GUIs (e.g. plymouth and X) tend to kick
   each other out via the non-exclusive KDSETMODE ioctl. This can
   result in situations such as the user being stuck in X with chvt
   and Ctrl-Alt-Fx no longer working.

 - Mode switching from FB to KMS:
   We cannot switch from a generic framebuffer (vesafb, efifb) to a
   KMS driver while a userspace splash keeps /dev/fb0 open. The device
   will vanish, but the address space is still busy, so the KMS driver
   cannot reserve its VRAM.

 - Simplification of userspace integration:
   Right now, hooking up a splash screen in userspace is quite complex.
   Having it in the kernel makes this a breeze, as hooks for
   switch_root, remounting r/w, etc. become obsolete.

Signed-off-by: Max Staudt 
---
 MAINTAINERS|   8 +
 drivers/video/console/Kconfig  |  24 ++
 drivers/video/fbdev/core/Makefile  |   3 +
 drivers/video/fbdev/core/bootsplash.c  | 294 +
 drivers/video/fbdev/core/bootsplash_internal.h |  55 +
 drivers/video/fbdev/core/bootsplash_render.c   |  93 
 drivers/video/fbdev/core/dummyblit.c   |  89 
 drivers/video/fbdev/core/fbcon.c   |  22 ++
 drivers/video/fbdev/core/fbcon.h   |   5 +
 include/linux/bootsplash.h |  43 
 10 files changed, 636 insertions(+)
 create mode 100644 drivers/video/fbdev/core/bootsplash.c
 create mode 100644 drivers/video/fbdev/core/bootsplash_internal.h
 create mode 100644 drivers/video/fbdev/core/bootsplash_render.c
 create mode 100644 drivers/video/fbdev/core/dummyblit.c
 create mode 100644 include/linux/bootsplash.h

diff --git a/MAINTAINERS b/MAINTAINERS
index a74227ad082e..b5633b56391e 100644
--- a/MAINTAINERS
+++ b/MAINTAINERS
@@ -2705,6 +2705,14 @@ S:   Supported
 F: drivers/net/bonding/
 F: include/uapi/linux/if_bonding.h
 
+BOOTSPLASH
+M: Max Staudt 
+L: linux-fb...@vger.kernel.org
+S: Maintained
+F: drivers/video/fbdev/core/bootsplash*.*
+F: drivers/video/fbdev/core/dummycon.c
+F: include/linux/bootsplash.h
+
 BPF (Safe dynamic programs and tools)
 M: Alexei Starovoitov 
 M: Daniel Borkmann 
diff --git a/drivers/video/console/Kconfig b/drivers/video/console/Kconfig
index 7f1f1fbcef9e..f3ff976266fe 100644
--- a/drivers/video/console/Kconfig
+++ b/drivers/video/console/Kconfig
@@ -151,6 +151,30 @@ config FRAMEBUFFER_CONSOLE_ROTATION
  such that other users of the framebuffer will remain normally
  oriented.
 
+config BOOTSPLASH
+   bool "Bootup splash screen"
+   depends on FRAMEBUFFER_CONSOLE
+   ---help---
+ This option enables the Linux bootsplash screen.
+
+ The bootsplash is a full-screen logo or animation indicating a
+ booting system. It replaces the classic scrolling text with a
+ graphical alternative, similar to other systems.
+
+ Since this is technically implemented as a hook on top of fbcon,
+ it can only work if the FRAMEBUFFER_CONSOLE is enabled and a
+ framebuffer driver is active. Thus, to get a text-free boot,
+ the system needs to boot with vesafb, efifb, or similar.
+
+ Once built into the kernel, the bootsplash needs to be enabled
+ with bootsplash.enabled=1 and a splash file needs to be supplied.
+
+ Further documentation can be found in:
+   Documentation/fb/bootsplash.txt
+
+ If unsure, say N.
+ This is typically used by distributors and system

[RFC PATCH v3 05/13] bootsplash: Add animation support

2018-01-17 Thread Max Staudt

Each 'picture' in the splash file can consist of multiple 'blobs'.

If animation is enabled, these blobs become the frames of an animation,
in the order in which they are stored in the file.

Note: There is only one global timer, so all animations happen at
  the same frame rate. It doesn't really make sense to animate
  more than one object at a time anyway.

Furthermore, this patch introduces a check for reusing a framebuffer
where the splash has recently been painted on - in this case, we only
redraw the objects that are animated.

Signed-off-by: Max Staudt 
---
 drivers/video/fbdev/core/bootsplash.c  | 62 +++---
 drivers/video/fbdev/core/bootsplash_internal.h | 13 +-
 drivers/video/fbdev/core/bootsplash_load.c | 21 +
 drivers/video/fbdev/core/bootsplash_render.c   | 30 -
 include/uapi/linux/bootsplash_file.h   | 35 ++-
 5 files changed, 151 insertions(+), 10 deletions(-)

diff --git a/drivers/video/fbdev/core/bootsplash.c 
b/drivers/video/fbdev/core/bootsplash.c
index 815b007f81ca..c8642142cfea 100644
--- a/drivers/video/fbdev/core/bootsplash.c
+++ b/drivers/video/fbdev/core/bootsplash.c
@@ -53,6 +53,14 @@ static void splash_callback_redraw_vc(struct work_struct 
*ignored)
console_unlock();
 }
 
+static void splash_callback_animation(struct work_struct *ignored)
+{
+   if (bootsplash_would_render_now()) {
+   /* This will also re-schedule this delayed worker */
+   splash_callback_redraw_vc(ignored);
+   }
+}
+
 
 static bool is_fb_compatible(const struct fb_info *info)
 {
@@ -103,17 +111,44 @@ static bool is_fb_compatible(const struct fb_info *info)
  */
 void bootsplash_render_full(struct fb_info *info)
 {
+   bool is_update = false;
+
mutex_lock(_state.data_lock);
 
-   if (!is_fb_compatible(info))
-   goto out;
+   /*
+* If we've painted on this FB recently, we don't have to do
+* the sanity checks and background drawing again.
+*/
+   if (splash_state.splash_fb == info)
+   is_update = true;
+
+
+   if (!is_update) {
+   /* Check whether we actually support this FB. */
+   splash_state.splash_fb = NULL;
+
+   if (!is_fb_compatible(info))
+   goto out;
+
+   /* Draw the background only once */
+   bootsplash_do_render_background(info, splash_state.file);
 
-   bootsplash_do_render_background(info, splash_state.file);
+   /* Mark this FB as last seen */
+   splash_state.splash_fb = info;
+   }
 
-   bootsplash_do_render_pictures(info, splash_state.file);
+   bootsplash_do_render_pictures(info, splash_state.file, is_update);
 
bootsplash_do_render_flush(info);
 
+   bootsplash_do_step_animations(splash_state.file);
+
+   /* Schedule update for animated splash screens */
+   if (splash_state.file->frame_ms > 0)
+   schedule_delayed_work(_state.dwork_animation,
+ msecs_to_jiffies(
+ splash_state.file->frame_ms));
+
 out:
mutex_unlock(_state.data_lock);
 }
@@ -169,8 +204,14 @@ void bootsplash_enable(void)
 
was_enabled = test_and_set_bit(0, _state.enabled);
 
-   if (!was_enabled)
+   if (!was_enabled) {
+   /* Force a full redraw when the splash is re-activated */
+   mutex_lock(_state.data_lock);
+   splash_state.splash_fb = NULL;
+   mutex_unlock(_state.data_lock);
+
schedule_work(_state.work_redraw_vc);
+   }
 }
 
 
@@ -227,6 +268,14 @@ ATTRIBUTE_GROUPS(splash_dev);
  */
 static int splash_resume(struct device *device)
 {
+   /*
+* Force full redraw on resume since we've probably lost the
+* framebuffer's contents meanwhile
+*/
+   mutex_lock(_state.data_lock);
+   splash_state.splash_fb = NULL;
+   mutex_unlock(_state.data_lock);
+
if (bootsplash_would_render_now())
schedule_work(_state.work_redraw_vc);
 
@@ -235,6 +284,7 @@ static int splash_resume(struct device *device)
 
 static int splash_suspend(struct device *device)
 {
+   cancel_delayed_work_sync(_state.dwork_animation);
cancel_work_sync(_state.work_redraw_vc);
 
return 0;
@@ -296,6 +346,8 @@ void bootsplash_init(void)
set_bit(0, _state.enabled);
 
INIT_WORK(_state.work_redraw_vc, splash_callback_redraw_vc);
+   INIT_DELAYED_WORK(_state.dwork_animation,
+ splash_callback_animation);
 
 
if (!splash_state.bootfile || !strlen(splash_state.bootfile))
diff --git a/drivers/video/fbdev/core/bootsplash_internal.h 
b/drivers/video/fbdev/core/bootsplash_internal.h
index 0acb383aa4e3..b3a74835d90f 100644
--- a/drivers/video/fbdev/core/bootsplash_internal.h
+++

[RFC PATCH v3 05/13] bootsplash: Add animation support

2018-01-17 Thread Max Staudt

Each 'picture' in the splash file can consist of multiple 'blobs'.

If animation is enabled, these blobs become the frames of an animation,
in the order in which they are stored in the file.

Note: There is only one global timer, so all animations happen at
  the same frame rate. It doesn't really make sense to animate
  more than one object at a time anyway.

Furthermore, this patch introduces a check for reusing a framebuffer
where the splash has recently been painted on - in this case, we only
redraw the objects that are animated.

Signed-off-by: Max Staudt 
---
 drivers/video/fbdev/core/bootsplash.c  | 62 +++---
 drivers/video/fbdev/core/bootsplash_internal.h | 13 +-
 drivers/video/fbdev/core/bootsplash_load.c | 21 +
 drivers/video/fbdev/core/bootsplash_render.c   | 30 -
 include/uapi/linux/bootsplash_file.h   | 35 ++-
 5 files changed, 151 insertions(+), 10 deletions(-)

diff --git a/drivers/video/fbdev/core/bootsplash.c 
b/drivers/video/fbdev/core/bootsplash.c
index 815b007f81ca..c8642142cfea 100644
--- a/drivers/video/fbdev/core/bootsplash.c
+++ b/drivers/video/fbdev/core/bootsplash.c
@@ -53,6 +53,14 @@ static void splash_callback_redraw_vc(struct work_struct 
*ignored)
console_unlock();
 }
 
+static void splash_callback_animation(struct work_struct *ignored)
+{
+   if (bootsplash_would_render_now()) {
+   /* This will also re-schedule this delayed worker */
+   splash_callback_redraw_vc(ignored);
+   }
+}
+
 
 static bool is_fb_compatible(const struct fb_info *info)
 {
@@ -103,17 +111,44 @@ static bool is_fb_compatible(const struct fb_info *info)
  */
 void bootsplash_render_full(struct fb_info *info)
 {
+   bool is_update = false;
+
mutex_lock(_state.data_lock);
 
-   if (!is_fb_compatible(info))
-   goto out;
+   /*
+* If we've painted on this FB recently, we don't have to do
+* the sanity checks and background drawing again.
+*/
+   if (splash_state.splash_fb == info)
+   is_update = true;
+
+
+   if (!is_update) {
+   /* Check whether we actually support this FB. */
+   splash_state.splash_fb = NULL;
+
+   if (!is_fb_compatible(info))
+   goto out;
+
+   /* Draw the background only once */
+   bootsplash_do_render_background(info, splash_state.file);
 
-   bootsplash_do_render_background(info, splash_state.file);
+   /* Mark this FB as last seen */
+   splash_state.splash_fb = info;
+   }
 
-   bootsplash_do_render_pictures(info, splash_state.file);
+   bootsplash_do_render_pictures(info, splash_state.file, is_update);
 
bootsplash_do_render_flush(info);
 
+   bootsplash_do_step_animations(splash_state.file);
+
+   /* Schedule update for animated splash screens */
+   if (splash_state.file->frame_ms > 0)
+   schedule_delayed_work(_state.dwork_animation,
+ msecs_to_jiffies(
+ splash_state.file->frame_ms));
+
 out:
mutex_unlock(_state.data_lock);
 }
@@ -169,8 +204,14 @@ void bootsplash_enable(void)
 
was_enabled = test_and_set_bit(0, _state.enabled);
 
-   if (!was_enabled)
+   if (!was_enabled) {
+   /* Force a full redraw when the splash is re-activated */
+   mutex_lock(_state.data_lock);
+   splash_state.splash_fb = NULL;
+   mutex_unlock(_state.data_lock);
+
schedule_work(_state.work_redraw_vc);
+   }
 }
 
 
@@ -227,6 +268,14 @@ ATTRIBUTE_GROUPS(splash_dev);
  */
 static int splash_resume(struct device *device)
 {
+   /*
+* Force full redraw on resume since we've probably lost the
+* framebuffer's contents meanwhile
+*/
+   mutex_lock(_state.data_lock);
+   splash_state.splash_fb = NULL;
+   mutex_unlock(_state.data_lock);
+
if (bootsplash_would_render_now())
schedule_work(_state.work_redraw_vc);
 
@@ -235,6 +284,7 @@ static int splash_resume(struct device *device)
 
 static int splash_suspend(struct device *device)
 {
+   cancel_delayed_work_sync(_state.dwork_animation);
cancel_work_sync(_state.work_redraw_vc);
 
return 0;
@@ -296,6 +346,8 @@ void bootsplash_init(void)
set_bit(0, _state.enabled);
 
INIT_WORK(_state.work_redraw_vc, splash_callback_redraw_vc);
+   INIT_DELAYED_WORK(_state.dwork_animation,
+ splash_callback_animation);
 
 
if (!splash_state.bootfile || !strlen(splash_state.bootfile))
diff --git a/drivers/video/fbdev/core/bootsplash_internal.h 
b/drivers/video/fbdev/core/bootsplash_internal.h
index 0acb383aa4e3..b3a74835d90f 100644
--- a/drivers/video/fbdev/core/bootsplash_internal.h
+++ b/drivers/video/fbdev/core/bootsplash_internal.h

[RFC PATCH v3 01/13] bootsplash: Initial implementation showing black screen

2018-01-17 Thread Max Staudt

This is the initial prototype for a lean Linux kernel bootsplash.

It works by replacing fbcon's FB manipulation routines (such as
bitblit, tileblit) with dummy functions, effectively disabling text
output, and drawing the splash directly onto the FB device.

There is a userland API via sysfs, to show/hide the splash on request
by dracut, systemd, or other init systems.

As of this commit, the code will show a black screen rather than a
logo, and only if manually enabled via sysfs by writing:

  echo 1 > /sys/devices/platform/bootsplash.0/enabled

The reasons for implementing a bootsplash in kernel space are:

 - Quieting things more and nicer than with the quiet boot option:
   Currently the 'quiet' boot option does not remove the blinking
   cursor and errors are still printed. There are use cases where this
   is not desirable (such as embedded and desktop systems, digital
   signage, etc.) and a vendor logo is preferable.

 - Showing graphics, and never text, when the GUI crashes:
   This is an extension of the above use case, where recovery is meant
   to happen as invisibly to the user as possible. A system integrator
   needs the flexibility to hide "scary text" from users in all cases
   other than a panic.
   This is especially desirable in embedded systems such as digital
   signage.

 - Racy VT API:
   Userspace bootsplashes and GUIs (e.g. plymouth and X) tend to kick
   each other out via the non-exclusive KDSETMODE ioctl. This can
   result in situations such as the user being stuck in X with chvt
   and Ctrl-Alt-Fx no longer working.

 - Mode switching from FB to KMS:
   We cannot switch from a generic framebuffer (vesafb, efifb) to a
   KMS driver while a userspace splash keeps /dev/fb0 open. The device
   will vanish, but the address space is still busy, so the KMS driver
   cannot reserve its VRAM.

 - Simplification of userspace integration:
   Right now, hooking up a splash screen in userspace is quite complex.
   Having it in the kernel makes this a breeze, as hooks for
   switch_root, remounting r/w, etc. become obsolete.

Signed-off-by: Max Staudt 
---
 MAINTAINERS|   8 +
 drivers/video/console/Kconfig  |  24 ++
 drivers/video/fbdev/core/Makefile  |   3 +
 drivers/video/fbdev/core/bootsplash.c  | 294 +
 drivers/video/fbdev/core/bootsplash_internal.h |  55 +
 drivers/video/fbdev/core/bootsplash_render.c   |  93 
 drivers/video/fbdev/core/dummyblit.c   |  89 
 drivers/video/fbdev/core/fbcon.c   |  22 ++
 drivers/video/fbdev/core/fbcon.h   |   5 +
 include/linux/bootsplash.h |  43 
 10 files changed, 636 insertions(+)
 create mode 100644 drivers/video/fbdev/core/bootsplash.c
 create mode 100644 drivers/video/fbdev/core/bootsplash_internal.h
 create mode 100644 drivers/video/fbdev/core/bootsplash_render.c
 create mode 100644 drivers/video/fbdev/core/dummyblit.c
 create mode 100644 include/linux/bootsplash.h

diff --git a/MAINTAINERS b/MAINTAINERS
index a74227ad082e..b5633b56391e 100644
--- a/MAINTAINERS
+++ b/MAINTAINERS
@@ -2705,6 +2705,14 @@ S:   Supported
 F: drivers/net/bonding/
 F: include/uapi/linux/if_bonding.h
 
+BOOTSPLASH
+M: Max Staudt 
+L: linux-fb...@vger.kernel.org
+S: Maintained
+F: drivers/video/fbdev/core/bootsplash*.*
+F: drivers/video/fbdev/core/dummycon.c
+F: include/linux/bootsplash.h
+
 BPF (Safe dynamic programs and tools)
 M: Alexei Starovoitov 
 M: Daniel Borkmann 
diff --git a/drivers/video/console/Kconfig b/drivers/video/console/Kconfig
index 7f1f1fbcef9e..f3ff976266fe 100644
--- a/drivers/video/console/Kconfig
+++ b/drivers/video/console/Kconfig
@@ -151,6 +151,30 @@ config FRAMEBUFFER_CONSOLE_ROTATION
  such that other users of the framebuffer will remain normally
  oriented.
 
+config BOOTSPLASH
+   bool "Bootup splash screen"
+   depends on FRAMEBUFFER_CONSOLE
+   ---help---
+ This option enables the Linux bootsplash screen.
+
+ The bootsplash is a full-screen logo or animation indicating a
+ booting system. It replaces the classic scrolling text with a
+ graphical alternative, similar to other systems.
+
+ Since this is technically implemented as a hook on top of fbcon,
+ it can only work if the FRAMEBUFFER_CONSOLE is enabled and a
+ framebuffer driver is active. Thus, to get a text-free boot,
+ the system needs to boot with vesafb, efifb, or similar.
+
+ Once built into the kernel, the bootsplash needs to be enabled
+ with bootsplash.enabled=1 and a splash file needs to be supplied.
+
+ Further documentation can be found in:
+   Documentation/fb/bootsplash.txt
+
+ If unsure, say N.
+ This is typically used by distributors and system integrators.
+
 config STI_CONSOLE
 bool "STI text console"

[RFC PATCH v3 10/13] Documentation: Add bootsplash main documentation

2018-01-17 Thread Max Staudt

Signed-off-by: Max Staudt 
---
 .../ABI/testing/sysfs-platform-bootsplash  |  11 +
 Documentation/bootsplash.rst   | 285 +
 MAINTAINERS|   2 +
 3 files changed, 298 insertions(+)
 create mode 100644 Documentation/ABI/testing/sysfs-platform-bootsplash
 create mode 100644 Documentation/bootsplash.rst

diff --git a/Documentation/ABI/testing/sysfs-platform-bootsplash 
b/Documentation/ABI/testing/sysfs-platform-bootsplash
new file mode 100644
index ..742c7b035ded
--- /dev/null
+++ b/Documentation/ABI/testing/sysfs-platform-bootsplash
@@ -0,0 +1,11 @@
+What:  /sys/devices/platform/bootsplash.0/enabled
+Date:  Oct 2017
+KernelVersion: 4.14
+Contact:   Max Staudt 
+Description:
+   Can be set and read.
+
+   0: Splash is disabled.
+   1: Splash is shown whenever fbcon would show a text console
+  (i.e. no graphical application is running), and a splash
+  file is loaded.
diff --git a/Documentation/bootsplash.rst b/Documentation/bootsplash.rst
new file mode 100644
index ..611f0c558925
--- /dev/null
+++ b/Documentation/bootsplash.rst
@@ -0,0 +1,285 @@
+
+The Linux bootsplash
+
+
+:Date: November, 2017
+:Author: Max Staudt 
+
+
+The Linux bootsplash is a graphical replacement for the '``quiet``' boot
+option, typically showing a logo and a spinner animation as the system starts.
+
+Currently, it is a part of the Framebuffer Console support, and can be found
+as ``CONFIG_BOOTSPLASH`` in the kernel configuration. This means that as long
+as it is enabled, it hijacks fbcon's output and draws a splash screen instead.
+
+Purely compiling in the bootsplash will not render it functional - to actually
+render a splash, you will also need a splash theme file. See the example
+utility and script in ``tools/bootsplash`` for a live demo.
+
+
+
+Motivation
+==
+
+- The '``quiet``' boot option only suppresses most messages during boot, but
+  errors are still shown.
+
+- A user space implementation can only show a logo once user space has been
+  initialized far enough to allow this. A kernel splash can display a splash
+  immediately as soon as fbcon can be displayed.
+
+- Implementing a splash screen in user space (e.g. Plymouth) is problematic
+  due to resource conflicts.
+
+  For example, if Plymouth is keeping ``/dev/fb0`` (provided via vesafb/efifb)
+  open, then most DRM drivers can't replace it because the address space is
+  still busy - thus leading to a VRAM reservation error.
+
+  See: https://bugzilla.opensuse.org/show_bug.cgi?id=980750
+
+
+
+Command line arguments
+==
+
+``bootsplash.bootfile``
+  Which file in the initramfs to load.
+
+  The splash theme is loaded via request_firmware(), thus to load
+  ``/lib/firmware/bootsplash/mytheme`` pass the command line:
+
+  ``bootsplash.bootfile=bootsplash/mytheme``
+
+  Note: The splash file *has to be* in the initramfs, as it needs to be
+  available when the splash is initialized early on.
+
+  Default: none, i.e. a non-functional splash, falling back to showing text.
+
+
+
+sysfs run-time configuration
+
+
+``/sys/devices/platform/bootsplash.0/enabled``
+  Enable/disable the bootsplash.
+  The system boots with this set to 1, but will not show a splash unless
+  a splash theme file is also loaded.
+
+
+
+Kconfig
+===
+
+``BOOTSPLASH``
+  Whether to compile in bootsplash support
+  (depends on fbcon compiled in, i.e. ``FRAMEBUFFER_CONSOLE=y``)
+
+
+
+Bootsplash file format
+==
+
+A file specified in the kernel configuration as ``CONFIG_BOOTSPLASH_FILE``
+or specified on the command line as ``bootsplash.bootfile`` will be loaded
+and displayed as soon as fbcon is initialized.
+
+
+Main blocks
+---
+
+There are 3 main blocks in each file:
+
+  - one File header
+  -   n Picture headers
+  -   m (Blob header + payload) blocks
+
+
+Structures
+--
+
+The on-disk structures are defined in
+``drivers/video/fbdev/core/bootsplash_file.h`` and represent these blocks:
+
+  - ``struct splash_file_header``
+
+Represents the file header, with splash-wide information including:
+
+  - The magic string "``Linux bootsplash``" on big-endian platforms
+(the reverse on little endian)
+  - The file format version (for incompatible updates, hopefully never)
+  - The background color
+  - Number of picture and blob blocks
+  - Animation speed (we only allow one delay for all animations)
+
+The file header is followed by the first picture header.
+
+
+  - ``struct splash_picture_header``
+
+Represents an object (picture) drawn on screen, including its immutable
+properties:
+  - Width, height
+  - Positioning relative to screen corners or in the center
+

[RFC PATCH v3 10/13] Documentation: Add bootsplash main documentation

2018-01-17 Thread Max Staudt

Signed-off-by: Max Staudt 
---
 .../ABI/testing/sysfs-platform-bootsplash  |  11 +
 Documentation/bootsplash.rst   | 285 +
 MAINTAINERS|   2 +
 3 files changed, 298 insertions(+)
 create mode 100644 Documentation/ABI/testing/sysfs-platform-bootsplash
 create mode 100644 Documentation/bootsplash.rst

diff --git a/Documentation/ABI/testing/sysfs-platform-bootsplash 
b/Documentation/ABI/testing/sysfs-platform-bootsplash
new file mode 100644
index ..742c7b035ded
--- /dev/null
+++ b/Documentation/ABI/testing/sysfs-platform-bootsplash
@@ -0,0 +1,11 @@
+What:  /sys/devices/platform/bootsplash.0/enabled
+Date:  Oct 2017
+KernelVersion: 4.14
+Contact:   Max Staudt 
+Description:
+   Can be set and read.
+
+   0: Splash is disabled.
+   1: Splash is shown whenever fbcon would show a text console
+  (i.e. no graphical application is running), and a splash
+  file is loaded.
diff --git a/Documentation/bootsplash.rst b/Documentation/bootsplash.rst
new file mode 100644
index ..611f0c558925
--- /dev/null
+++ b/Documentation/bootsplash.rst
@@ -0,0 +1,285 @@
+
+The Linux bootsplash
+
+
+:Date: November, 2017
+:Author: Max Staudt 
+
+
+The Linux bootsplash is a graphical replacement for the '``quiet``' boot
+option, typically showing a logo and a spinner animation as the system starts.
+
+Currently, it is a part of the Framebuffer Console support, and can be found
+as ``CONFIG_BOOTSPLASH`` in the kernel configuration. This means that as long
+as it is enabled, it hijacks fbcon's output and draws a splash screen instead.
+
+Purely compiling in the bootsplash will not render it functional - to actually
+render a splash, you will also need a splash theme file. See the example
+utility and script in ``tools/bootsplash`` for a live demo.
+
+
+
+Motivation
+==
+
+- The '``quiet``' boot option only suppresses most messages during boot, but
+  errors are still shown.
+
+- A user space implementation can only show a logo once user space has been
+  initialized far enough to allow this. A kernel splash can display a splash
+  immediately as soon as fbcon can be displayed.
+
+- Implementing a splash screen in user space (e.g. Plymouth) is problematic
+  due to resource conflicts.
+
+  For example, if Plymouth is keeping ``/dev/fb0`` (provided via vesafb/efifb)
+  open, then most DRM drivers can't replace it because the address space is
+  still busy - thus leading to a VRAM reservation error.
+
+  See: https://bugzilla.opensuse.org/show_bug.cgi?id=980750
+
+
+
+Command line arguments
+==
+
+``bootsplash.bootfile``
+  Which file in the initramfs to load.
+
+  The splash theme is loaded via request_firmware(), thus to load
+  ``/lib/firmware/bootsplash/mytheme`` pass the command line:
+
+  ``bootsplash.bootfile=bootsplash/mytheme``
+
+  Note: The splash file *has to be* in the initramfs, as it needs to be
+  available when the splash is initialized early on.
+
+  Default: none, i.e. a non-functional splash, falling back to showing text.
+
+
+
+sysfs run-time configuration
+
+
+``/sys/devices/platform/bootsplash.0/enabled``
+  Enable/disable the bootsplash.
+  The system boots with this set to 1, but will not show a splash unless
+  a splash theme file is also loaded.
+
+
+
+Kconfig
+===
+
+``BOOTSPLASH``
+  Whether to compile in bootsplash support
+  (depends on fbcon compiled in, i.e. ``FRAMEBUFFER_CONSOLE=y``)
+
+
+
+Bootsplash file format
+==
+
+A file specified in the kernel configuration as ``CONFIG_BOOTSPLASH_FILE``
+or specified on the command line as ``bootsplash.bootfile`` will be loaded
+and displayed as soon as fbcon is initialized.
+
+
+Main blocks
+---
+
+There are 3 main blocks in each file:
+
+  - one File header
+  -   n Picture headers
+  -   m (Blob header + payload) blocks
+
+
+Structures
+--
+
+The on-disk structures are defined in
+``drivers/video/fbdev/core/bootsplash_file.h`` and represent these blocks:
+
+  - ``struct splash_file_header``
+
+Represents the file header, with splash-wide information including:
+
+  - The magic string "``Linux bootsplash``" on big-endian platforms
+(the reverse on little endian)
+  - The file format version (for incompatible updates, hopefully never)
+  - The background color
+  - Number of picture and blob blocks
+  - Animation speed (we only allow one delay for all animations)
+
+The file header is followed by the first picture header.
+
+
+  - ``struct splash_picture_header``
+
+Represents an object (picture) drawn on screen, including its immutable
+properties:
+  - Width, height
+  - Positioning relative to screen corners or in the center
+  - Animation, if any
+  - Animation type
+

[RFC PATCH v3 06/13] vt: Redraw bootsplash fully on console_unblank

2018-01-17 Thread Max Staudt

After exiting a KD_GRAPHICS program and falling back to the text
console, a previously enabled splash needs to be fully redrawn.

This corner case was introduced with selective re-drawing while
implementing animations.

Without this patch, the following happens:

1. Switch to a text console
2. Enable splash
3. Start X (or any other KD_GRAPHICS program)
4. Exit X
5. Splash is not seen, apart from animations

Signed-off-by: Max Staudt 
Reviewed-by: Oliver Neukum 
---
 drivers/tty/vt/vt.c   |  2 ++
 drivers/video/fbdev/core/bootsplash.c | 15 +--
 include/linux/bootsplash.h|  4 
 3 files changed, 15 insertions(+), 6 deletions(-)

diff --git a/drivers/tty/vt/vt.c b/drivers/tty/vt/vt.c
index 2ebaba16f785..416735ab6dc1 100644
--- a/drivers/tty/vt/vt.c
+++ b/drivers/tty/vt/vt.c
@@ -102,6 +102,7 @@
 #include 
 #include 
 #include 
+#include 
 
 #define MAX_NR_CON_DRIVER 16
 
@@ -3903,6 +3904,7 @@ void do_unblank_screen(int leaving_gfx)
}
 
console_blanked = 0;
+   bootsplash_mark_dirty();
if (vc->vc_sw->con_blank(vc, 0, leaving_gfx) || 
vt_force_oops_output(vc))
/* Low-level driver cannot restore -> do it ourselves */
update_screen(vc);
diff --git a/drivers/video/fbdev/core/bootsplash.c 
b/drivers/video/fbdev/core/bootsplash.c
index c8642142cfea..13fcaabbc2ca 100644
--- a/drivers/video/fbdev/core/bootsplash.c
+++ b/drivers/video/fbdev/core/bootsplash.c
@@ -165,6 +165,13 @@ bool bootsplash_would_render_now(void)
&& bootsplash_is_enabled();
 }
 
+void bootsplash_mark_dirty(void)
+{
+   mutex_lock(_state.data_lock);
+   splash_state.splash_fb = NULL;
+   mutex_unlock(_state.data_lock);
+}
+
 bool bootsplash_is_enabled(void)
 {
bool was_enabled;
@@ -206,9 +213,7 @@ void bootsplash_enable(void)
 
if (!was_enabled) {
/* Force a full redraw when the splash is re-activated */
-   mutex_lock(_state.data_lock);
-   splash_state.splash_fb = NULL;
-   mutex_unlock(_state.data_lock);
+   bootsplash_mark_dirty();
 
schedule_work(_state.work_redraw_vc);
}
@@ -272,9 +277,7 @@ static int splash_resume(struct device *device)
 * Force full redraw on resume since we've probably lost the
 * framebuffer's contents meanwhile
 */
-   mutex_lock(_state.data_lock);
-   splash_state.splash_fb = NULL;
-   mutex_unlock(_state.data_lock);
+   bootsplash_mark_dirty();
 
if (bootsplash_would_render_now())
schedule_work(_state.work_redraw_vc);
diff --git a/include/linux/bootsplash.h b/include/linux/bootsplash.h
index c6dd0b43180d..4075098aaadd 100644
--- a/include/linux/bootsplash.h
+++ b/include/linux/bootsplash.h
@@ -19,6 +19,8 @@ extern void bootsplash_render_full(struct fb_info *info);
 
 extern bool bootsplash_would_render_now(void);
 
+extern void bootsplash_mark_dirty(void);
+
 extern bool bootsplash_is_enabled(void);
 extern void bootsplash_disable(void);
 extern void bootsplash_enable(void);
@@ -31,6 +33,8 @@ extern void bootsplash_init(void);
 
 #define bootsplash_would_render_now() (false)
 
+#define bootsplash_mark_dirty()
+
 #define bootsplash_is_enabled() (false)
 #define bootsplash_disable()
 #define bootsplash_enable()
-- 
2.12.3

[RFC PATCH v3 06/13] vt: Redraw bootsplash fully on console_unblank

2018-01-17 Thread Max Staudt

After exiting a KD_GRAPHICS program and falling back to the text
console, a previously enabled splash needs to be fully redrawn.

This corner case was introduced with selective re-drawing while
implementing animations.

Without this patch, the following happens:

1. Switch to a text console
2. Enable splash
3. Start X (or any other KD_GRAPHICS program)
4. Exit X
5. Splash is not seen, apart from animations

Signed-off-by: Max Staudt 
Reviewed-by: Oliver Neukum 
---
 drivers/tty/vt/vt.c   |  2 ++
 drivers/video/fbdev/core/bootsplash.c | 15 +--
 include/linux/bootsplash.h|  4 
 3 files changed, 15 insertions(+), 6 deletions(-)

diff --git a/drivers/tty/vt/vt.c b/drivers/tty/vt/vt.c
index 2ebaba16f785..416735ab6dc1 100644
--- a/drivers/tty/vt/vt.c
+++ b/drivers/tty/vt/vt.c
@@ -102,6 +102,7 @@
 #include 
 #include 
 #include 
+#include 
 
 #define MAX_NR_CON_DRIVER 16
 
@@ -3903,6 +3904,7 @@ void do_unblank_screen(int leaving_gfx)
}
 
console_blanked = 0;
+   bootsplash_mark_dirty();
if (vc->vc_sw->con_blank(vc, 0, leaving_gfx) || 
vt_force_oops_output(vc))
/* Low-level driver cannot restore -> do it ourselves */
update_screen(vc);
diff --git a/drivers/video/fbdev/core/bootsplash.c 
b/drivers/video/fbdev/core/bootsplash.c
index c8642142cfea..13fcaabbc2ca 100644
--- a/drivers/video/fbdev/core/bootsplash.c
+++ b/drivers/video/fbdev/core/bootsplash.c
@@ -165,6 +165,13 @@ bool bootsplash_would_render_now(void)
&& bootsplash_is_enabled();
 }
 
+void bootsplash_mark_dirty(void)
+{
+   mutex_lock(_state.data_lock);
+   splash_state.splash_fb = NULL;
+   mutex_unlock(_state.data_lock);
+}
+
 bool bootsplash_is_enabled(void)
 {
bool was_enabled;
@@ -206,9 +213,7 @@ void bootsplash_enable(void)
 
if (!was_enabled) {
/* Force a full redraw when the splash is re-activated */
-   mutex_lock(_state.data_lock);
-   splash_state.splash_fb = NULL;
-   mutex_unlock(_state.data_lock);
+   bootsplash_mark_dirty();
 
schedule_work(_state.work_redraw_vc);
}
@@ -272,9 +277,7 @@ static int splash_resume(struct device *device)
 * Force full redraw on resume since we've probably lost the
 * framebuffer's contents meanwhile
 */
-   mutex_lock(_state.data_lock);
-   splash_state.splash_fb = NULL;
-   mutex_unlock(_state.data_lock);
+   bootsplash_mark_dirty();
 
if (bootsplash_would_render_now())
schedule_work(_state.work_redraw_vc);
diff --git a/include/linux/bootsplash.h b/include/linux/bootsplash.h
index c6dd0b43180d..4075098aaadd 100644
--- a/include/linux/bootsplash.h
+++ b/include/linux/bootsplash.h
@@ -19,6 +19,8 @@ extern void bootsplash_render_full(struct fb_info *info);
 
 extern bool bootsplash_would_render_now(void);
 
+extern void bootsplash_mark_dirty(void);
+
 extern bool bootsplash_is_enabled(void);
 extern void bootsplash_disable(void);
 extern void bootsplash_enable(void);
@@ -31,6 +33,8 @@ extern void bootsplash_init(void);
 
 #define bootsplash_would_render_now() (false)
 
+#define bootsplash_mark_dirty()
+
 #define bootsplash_is_enabled() (false)
 #define bootsplash_disable()
 #define bootsplash_enable()
-- 
2.12.3

[RFC PATCH v3 13/13] tools/bootsplash: Add script and data to create sample file

2018-01-17 Thread Max Staudt

Also, mention this in the bootsplash documentation.

Signed-off-by: Max Staudt 
---
 Documentation/bootsplash.rst   |  10 ++
 tools/bootsplash/.gitignore|   3 ++
 tools/bootsplash/ajax-loader.gif   | Bin 0 -> 3208 bytes
 tools/bootsplash/bootsplash-tux.sh |  66 +
 4 files changed, 79 insertions(+)
 create mode 100644 tools/bootsplash/ajax-loader.gif
 create mode 100755 tools/bootsplash/bootsplash-tux.sh

diff --git a/Documentation/bootsplash.rst b/Documentation/bootsplash.rst
index b35aba5093e8..d4f132eca615 100644
--- a/Documentation/bootsplash.rst
+++ b/Documentation/bootsplash.rst
@@ -195,6 +195,16 @@ Hooks - how the bootsplash is integrated
 
 
 
+Crating a bootsplash theme file
+===
+
+A simple tool for theme file creation is included in ``tools/bootsplash``.
+
+There is also an example shell script, as an example on how to use the tool
+and in order to generate a reference bootsplash file.
+
+
+
 FAQ: Frequently Asked Questions
 ===
 
diff --git a/tools/bootsplash/.gitignore b/tools/bootsplash/.gitignore
index 091b99a17567..5dfced41ba82 100644
--- a/tools/bootsplash/.gitignore
+++ b/tools/bootsplash/.gitignore
@@ -1 +1,4 @@
 bootsplash-packer
+bootsplash
+logo.rgb
+throbber*.rgb
diff --git a/tools/bootsplash/ajax-loader.gif b/tools/bootsplash/ajax-loader.gif
new file mode 100644
index 
..3288d1035d70bb86517e2c233f1a904e41f06b29
GIT binary patch
literal 3208
zcmc(iX;4#H9>pJdFE7h`I{IF)0|5<6L}(j=N}5%L009EB2nYfyF)E0PvIqo$u!IC;
z4PgyY5|S9AEh38G)(9eq4TbH7_UHg@yWrlIJ$6smIADL7s^P;_O;ykRc(QR@t*blbKzsf0}bscEqRc5Hd3o(-N5RyW=zWB*zQw6Zh>*
z2CROCDAbu#D`)S|J_o(lL9Yn3l*+8RdiRD_>iNz$#_IAzCna
zSF_(rRCDD!wi#i8oAm_@VB%2-H*G%bN#|(6R6N?wM)3u`PiGzwuX7qmTgyF
zpE)h0kuoxQ9?=kW7Y!=R@DmhU9)vwT*EZWzJ
zrt+=2tqFts72yIp?|gvdLhs8Hfku^Z(){gmN%Y=K#

[RFC PATCH v3 13/13] tools/bootsplash: Add script and data to create sample file

2018-01-17 Thread Max Staudt

Also, mention this in the bootsplash documentation.

Signed-off-by: Max Staudt 
---
 Documentation/bootsplash.rst   |  10 ++
 tools/bootsplash/.gitignore|   3 ++
 tools/bootsplash/ajax-loader.gif   | Bin 0 -> 3208 bytes
 tools/bootsplash/bootsplash-tux.sh |  66 +
 4 files changed, 79 insertions(+)
 create mode 100644 tools/bootsplash/ajax-loader.gif
 create mode 100755 tools/bootsplash/bootsplash-tux.sh

diff --git a/Documentation/bootsplash.rst b/Documentation/bootsplash.rst
index b35aba5093e8..d4f132eca615 100644
--- a/Documentation/bootsplash.rst
+++ b/Documentation/bootsplash.rst
@@ -195,6 +195,16 @@ Hooks - how the bootsplash is integrated
 
 
 
+Crating a bootsplash theme file
+===
+
+A simple tool for theme file creation is included in ``tools/bootsplash``.
+
+There is also an example shell script, as an example on how to use the tool
+and in order to generate a reference bootsplash file.
+
+
+
 FAQ: Frequently Asked Questions
 ===
 
diff --git a/tools/bootsplash/.gitignore b/tools/bootsplash/.gitignore
index 091b99a17567..5dfced41ba82 100644
--- a/tools/bootsplash/.gitignore
+++ b/tools/bootsplash/.gitignore
@@ -1 +1,4 @@
 bootsplash-packer
+bootsplash
+logo.rgb
+throbber*.rgb
diff --git a/tools/bootsplash/ajax-loader.gif b/tools/bootsplash/ajax-loader.gif
new file mode 100644
index 
..3288d1035d70bb86517e2c233f1a904e41f06b29
GIT binary patch
literal 3208
zcmc(iX;4#H9>pJdFE7h`I{IF)0|5<6L}(j=N}5%L009EB2nYfyF)E0PvIqo$u!IC;
z4PgyY5|S9AEh38G)(9eq4TbH7_UHg@yWrlIJ$6smIADL7s^P;_O;ykRc9soXl`UC*LwQJXkii*0rx|*7rI2=x7WaRkx_~XZqFJ8R3c=2Kg
zf@aSAv8+BJ8+^hyay>(QR@t*blbKzsf0}bscEqRc5Hd3o(-N5RyW=zWB*zQw6Zh>*
z2CROCDAbu#D`)S|J_o(lL9Yn3l*+8RdiRD_>iNz$#_IAzCna
zSF_(rRCDD!wi#i8oAm_@VB%2-H*G%bN#|(6R6N?wM)3u`PiGzwuX7qmTgyF
zpE)h0kuoxQ9?=kW7Y!=R@DmhU9)vwT*EZWzJ
zrt+=2tqFts72yIp?|gvdLhs8Hfku^Z(){gmN%Y=K#P|%fkvgUj~HfIp3CuXqCtYGtJ#me+n+-LmP(
z*XNuk%!aH8bIE@_Bj46>M*dSro|7<6vZ7WUHh5YQzN$>IJFqCb|CT!wj~R2C2%=q{
zpt8rzY$aw?W?=Ustv{jo?Ow@ZRkLe<)NItY>Cyhle*wR59dTdF6(@{5^
zAQBOB*hNtc3bkY-8{Cm$nFS@elbTtSqrt7MB{h_4y+~`!mVa}?c>&?P}GqdMuhQ
z&@TD5Czd((DcG_Su~dKKV)Pj$-qi1WHM8_vc^O4?^!oY|tmK~i!{fjd&@_1E(T~r7
z_REZy^ySJB3W7l$4YhR`M(J7S5S~+4Q&3HPa)z%zPpisOp$^
zTEe99ig2$5_qFr!$;7A6CJ}PJmRhli>w?LC}Y`#HLGy6
zMU4EhL~dKCN5Ut;U2jd*83ShBNiu
zcJB0l9>1Modc?-oM)e*rh%hdqM)5Q)*+O8
zXm;SEbs@koiYS!9YXIclSg+5m_s~yrW#kKMdiRszg(gCP5HPmP7L)vCf8@fxUh6qY
z@Z#TmkjzAZX{rwE+q|K~F2v5{_@vt%>yT_a#fF03SFt{0RXvDAiaY~K9CgS1O>frXgAjBCS}mEd4mIWZ$=ovd5|
zR?GRdU}d6+Q`+JRW)|=v7$)XNkn3yE`!nAiSCvOB1jKT
zG<1aK3s<0b0m==egTD#8i(Of=1pGDTOCho0XpIOMQ=C6kIg
z9cH=@a+`|{(_?YC9fdm?1TY~-pwlBn?>=(~1pDKbco6jloP;0-cqRiwV1A_S
zEyV0Dj8Pwy!nekzaN>{)7rgZ&_QLxK{~1yRe865^yx>}+a!ECd>#MMwddow
z@CU{l+Rt$xuXuf}?ga{3IAr?Raql^c@a%sI0U5m}HvJ5O1#I%_MMPt#BH>OqUZ{-k
zt>4Xzz=%jT*FVW(uYkWyx}9Gw$HdN*qU?Bit#ji(Wi7p-u|_8?h^%szIS^s^fNM}b
zgGy>|=cbEufpguY5_6w~=Bo06UF9EYIY;Er-1VK)SyF&!|J{axiE1z^(hXwVq
zsFS=K-#zC}CcOs^8W{KAt+kK)jYDgDYbCXv{{rwsgqtIU3<910$CJi)s??
z_t8k{>7*0~4l~LLF7$WXT5OSq5QCTbP_l!SN|{R}3D~0ltWh1IL+ZBX4rRSt
zWF6Om3WDMu4xK^1(BF`2cL}rUCzhHAB`@j5_l*t;mPGY|u2^o|myvcOdrg0W
z%=lX;f^Vkqfp?u7*4qQq%A3Mpf!xspWBSKS@O%r*TSM}?dl(@*%{0Jm_8;(h{R__M
Bt

[RFC PATCH v3 11/13] bootsplash: sysfs entries to load and unload files

2018-01-17 Thread Max Staudt

Users can use this to replace their splash screen at runtime by writing
a path and filename to /sys/devices/platform/bootsplash.0/load_file and
making sure the splash is enabled.

Notes:
  - The path has to be a path in /lib/firmware since request_firmware()
is used to fetch the data.
  - When setting the splash from the shell, echo -n has to be used as
any trailing '\n' newline will be interpreted as part of the path.

Writes to /sys/devices/platform/bootsplash.0/drop_splash will cause the
current splash theme to be freed and the console to switch to text mode,

Signed-off-by: Max Staudt 
---
 .../ABI/testing/sysfs-platform-bootsplash  | 32 +
 Documentation/bootsplash.rst   |  8 
 drivers/video/fbdev/core/bootsplash.c  | 54 ++
 3 files changed, 94 insertions(+)

diff --git a/Documentation/ABI/testing/sysfs-platform-bootsplash 
b/Documentation/ABI/testing/sysfs-platform-bootsplash
index 742c7b035ded..f8f4b259220e 100644
--- a/Documentation/ABI/testing/sysfs-platform-bootsplash
+++ b/Documentation/ABI/testing/sysfs-platform-bootsplash
@@ -9,3 +9,35 @@ Description:
1: Splash is shown whenever fbcon would show a text console
   (i.e. no graphical application is running), and a splash
   file is loaded.
+
+What:  /sys/devices/platform/bootsplash.0/drop_splash
+Date:  Oct 2017
+KernelVersion: 4.14
+Contact:   Max Staudt 
+Description:
+   Can only be set.
+
+   Any value written will cause the current splash theme file
+   to be unloaded and the text console to be redrawn.
+
+What:  /sys/devices/platform/bootsplash.0/load_file
+Date:  Oct 2017
+KernelVersion: 4.14
+Contact:   Max Staudt 
+Description:
+   Can only be set.
+
+   Any value written will cause the splash to be disabled and
+   internal memory structures to be freed.
+
+   A firmware path written will cause a new theme file to be
+   loaded and the current bootsplash to be replaced.
+   The current enabled/disabled status is not touched.
+   If the splash is already active, it will be redrawn.
+
+   The path has to be a path in /lib/firmware since
+   request_firmware() is used to fetch the data.
+
+   When setting the splash from the shell, echo -n has to be
+   used as any trailing '\n' newline will be interpreted as
+   part of the path.
diff --git a/Documentation/bootsplash.rst b/Documentation/bootsplash.rst
index 611f0c558925..b35aba5093e8 100644
--- a/Documentation/bootsplash.rst
+++ b/Documentation/bootsplash.rst
@@ -67,6 +67,14 @@ sysfs run-time configuration
   a splash theme file is also loaded.
 
 
+``/sys/devices/platform/bootsplash.0/drop_splash``
+  Unload splash data and free memory.
+
+``/sys/devices/platform/bootsplash.0/load_file``
+  Load a splash file from ``/lib/firmware/``.
+  Note that trailing newlines will be interpreted as part of the file name.
+
+
 
 Kconfig
 ===
diff --git a/drivers/video/fbdev/core/bootsplash.c 
b/drivers/video/fbdev/core/bootsplash.c
index 13fcaabbc2ca..16cb0493629d 100644
--- a/drivers/video/fbdev/core/bootsplash.c
+++ b/drivers/video/fbdev/core/bootsplash.c
@@ -251,11 +251,65 @@ static ssize_t splash_store_enabled(struct device *device,
return count;
 }
 
+static ssize_t splash_store_drop_splash(struct device *device,
+   struct device_attribute *attr,
+   const char *buf, size_t count)
+{
+   struct splash_file_priv *fp;
+
+   if (!buf || !count || !splash_state.file)
+   return count;
+
+   mutex_lock(_state.data_lock);
+   fp = splash_state.file;
+   splash_state.file = NULL;
+   mutex_unlock(_state.data_lock);
+
+   /* Redraw the text console */
+   schedule_work(_state.work_redraw_vc);
+
+   bootsplash_free_file(fp);
+
+   return count;
+}
+
+static ssize_t splash_store_load_file(struct device *device,
+ struct device_attribute *attr,
+ const char *buf, size_t count)
+{
+   struct splash_file_priv *fp, *fp_old;
+
+   if (!count)
+   return 0;
+
+   fp = bootsplash_load_firmware(_state.splash_device->dev,
+ buf);
+
+   if (!fp)
+   return -ENXIO;
+
+   mutex_lock(_state.data_lock);
+   fp_old = splash_state.file;
+   splash_state.splash_fb = NULL;
+   splash_state.file = fp;
+   mutex_unlock(_state.data_lock);
+
+   /* Update the splash or text console */
+   schedule_work(_state.work_redraw_vc);
+
+   bootsplash_free_file(fp_old);
+   return count;
+}
+
 static DEVICE_ATTR(enabled,

[RFC PATCH v3 11/13] bootsplash: sysfs entries to load and unload files

2018-01-17 Thread Max Staudt

Users can use this to replace their splash screen at runtime by writing
a path and filename to /sys/devices/platform/bootsplash.0/load_file and
making sure the splash is enabled.

Notes:
  - The path has to be a path in /lib/firmware since request_firmware()
is used to fetch the data.
  - When setting the splash from the shell, echo -n has to be used as
any trailing '\n' newline will be interpreted as part of the path.

Writes to /sys/devices/platform/bootsplash.0/drop_splash will cause the
current splash theme to be freed and the console to switch to text mode,

Signed-off-by: Max Staudt 
---
 .../ABI/testing/sysfs-platform-bootsplash  | 32 +
 Documentation/bootsplash.rst   |  8 
 drivers/video/fbdev/core/bootsplash.c  | 54 ++
 3 files changed, 94 insertions(+)

diff --git a/Documentation/ABI/testing/sysfs-platform-bootsplash 
b/Documentation/ABI/testing/sysfs-platform-bootsplash
index 742c7b035ded..f8f4b259220e 100644
--- a/Documentation/ABI/testing/sysfs-platform-bootsplash
+++ b/Documentation/ABI/testing/sysfs-platform-bootsplash
@@ -9,3 +9,35 @@ Description:
1: Splash is shown whenever fbcon would show a text console
   (i.e. no graphical application is running), and a splash
   file is loaded.
+
+What:  /sys/devices/platform/bootsplash.0/drop_splash
+Date:  Oct 2017
+KernelVersion: 4.14
+Contact:   Max Staudt 
+Description:
+   Can only be set.
+
+   Any value written will cause the current splash theme file
+   to be unloaded and the text console to be redrawn.
+
+What:  /sys/devices/platform/bootsplash.0/load_file
+Date:  Oct 2017
+KernelVersion: 4.14
+Contact:   Max Staudt 
+Description:
+   Can only be set.
+
+   Any value written will cause the splash to be disabled and
+   internal memory structures to be freed.
+
+   A firmware path written will cause a new theme file to be
+   loaded and the current bootsplash to be replaced.
+   The current enabled/disabled status is not touched.
+   If the splash is already active, it will be redrawn.
+
+   The path has to be a path in /lib/firmware since
+   request_firmware() is used to fetch the data.
+
+   When setting the splash from the shell, echo -n has to be
+   used as any trailing '\n' newline will be interpreted as
+   part of the path.
diff --git a/Documentation/bootsplash.rst b/Documentation/bootsplash.rst
index 611f0c558925..b35aba5093e8 100644
--- a/Documentation/bootsplash.rst
+++ b/Documentation/bootsplash.rst
@@ -67,6 +67,14 @@ sysfs run-time configuration
   a splash theme file is also loaded.
 
 
+``/sys/devices/platform/bootsplash.0/drop_splash``
+  Unload splash data and free memory.
+
+``/sys/devices/platform/bootsplash.0/load_file``
+  Load a splash file from ``/lib/firmware/``.
+  Note that trailing newlines will be interpreted as part of the file name.
+
+
 
 Kconfig
 ===
diff --git a/drivers/video/fbdev/core/bootsplash.c 
b/drivers/video/fbdev/core/bootsplash.c
index 13fcaabbc2ca..16cb0493629d 100644
--- a/drivers/video/fbdev/core/bootsplash.c
+++ b/drivers/video/fbdev/core/bootsplash.c
@@ -251,11 +251,65 @@ static ssize_t splash_store_enabled(struct device *device,
return count;
 }
 
+static ssize_t splash_store_drop_splash(struct device *device,
+   struct device_attribute *attr,
+   const char *buf, size_t count)
+{
+   struct splash_file_priv *fp;
+
+   if (!buf || !count || !splash_state.file)
+   return count;
+
+   mutex_lock(_state.data_lock);
+   fp = splash_state.file;
+   splash_state.file = NULL;
+   mutex_unlock(_state.data_lock);
+
+   /* Redraw the text console */
+   schedule_work(_state.work_redraw_vc);
+
+   bootsplash_free_file(fp);
+
+   return count;
+}
+
+static ssize_t splash_store_load_file(struct device *device,
+ struct device_attribute *attr,
+ const char *buf, size_t count)
+{
+   struct splash_file_priv *fp, *fp_old;
+
+   if (!count)
+   return 0;
+
+   fp = bootsplash_load_firmware(_state.splash_device->dev,
+ buf);
+
+   if (!fp)
+   return -ENXIO;
+
+   mutex_lock(_state.data_lock);
+   fp_old = splash_state.file;
+   splash_state.splash_fb = NULL;
+   splash_state.file = fp;
+   mutex_unlock(_state.data_lock);
+
+   /* Update the splash or text console */
+   schedule_work(_state.work_redraw_vc);
+
+   bootsplash_free_file(fp_old);
+   return count;
+}
+
 static DEVICE_ATTR(enabled, 0644, splash_show_enabled, splash_store_enabled);

[RFC PATCH v3 03/13] bootsplash: Flush framebuffer after drawing

2018-01-17 Thread Max Staudt

Framebuffers with deferred I/O need to be flushed to the screen
explicitly, since we use neither the mmap nor the file I/O abstractions
that handle this for userspace FB clients.

Example: xenfb

Some framebuffer drivers implement lazy access to the screen without
actually exposing a fbdefio interface - we also match some known ones,
currently:
 - ast
 - cirrus
 - mgag200

Signed-off-by: Max Staudt 
Reviewed-by: Oliver Neukum 
---
 drivers/video/fbdev/core/bootsplash.c  |  2 ++
 drivers/video/fbdev/core/bootsplash_internal.h |  1 +
 drivers/video/fbdev/core/bootsplash_render.c   | 33 ++
 3 files changed, 36 insertions(+)

diff --git a/drivers/video/fbdev/core/bootsplash.c 
b/drivers/video/fbdev/core/bootsplash.c
index 843c5400fefc..815b007f81ca 100644
--- a/drivers/video/fbdev/core/bootsplash.c
+++ b/drivers/video/fbdev/core/bootsplash.c
@@ -112,6 +112,8 @@ void bootsplash_render_full(struct fb_info *info)
 
bootsplash_do_render_pictures(info, splash_state.file);
 
+   bootsplash_do_render_flush(info);
+
 out:
mutex_unlock(_state.data_lock);
 }
diff --git a/drivers/video/fbdev/core/bootsplash_internal.h 
b/drivers/video/fbdev/core/bootsplash_internal.h
index 71e2a27ac0b8..0acb383aa4e3 100644
--- a/drivers/video/fbdev/core/bootsplash_internal.h
+++ b/drivers/video/fbdev/core/bootsplash_internal.h
@@ -89,6 +89,7 @@ void bootsplash_do_render_background(struct fb_info *info,
 const struct splash_file_priv *fp);
 void bootsplash_do_render_pictures(struct fb_info *info,
   const struct splash_file_priv *fp);
+void bootsplash_do_render_flush(struct fb_info *info);
 
 
 void bootsplash_free_file(struct splash_file_priv *fp);
diff --git a/drivers/video/fbdev/core/bootsplash_render.c 
b/drivers/video/fbdev/core/bootsplash_render.c
index 2ae36949d0e3..8c09c306ff67 100644
--- a/drivers/video/fbdev/core/bootsplash_render.c
+++ b/drivers/video/fbdev/core/bootsplash_render.c
@@ -186,3 +186,36 @@ void bootsplash_do_render_pictures(struct fb_info *info,
pp->pic_header->width, pp->pic_header->height);
}
 }
+
+
+void bootsplash_do_render_flush(struct fb_info *info)
+{
+   /*
+* FB drivers using deferred_io (such as Xen) need to sync the
+* screen after modifying its contents. When the FB is mmap()ed
+* from userspace, this happens via a dirty pages callback, but
+* when modifying the FB from the kernel, there is no such thing.
+*
+* So let's issue a fake fb_copyarea (copying the FB onto itself)
+* to trick the FB driver into syncing the screen.
+*
+* A few DRM drivers' FB implementations are broken by not using
+* deferred_io when they really should - we match on the known
+* bad ones manually for now.
+*/
+   if (info->fbdefio
+   || !strcmp(info->fix.id, "astdrmfb")
+   || !strcmp(info->fix.id, "cirrusdrmfb")
+   || !strcmp(info->fix.id, "mgadrmfb")) {
+   struct fb_copyarea area;
+
+   area.dx = 0;
+   area.dy = 0;
+   area.width = info->var.xres;
+   area.height = info->var.yres;
+   area.sx = 0;
+   area.sy = 0;
+
+   info->fbops->fb_copyarea(info, );
+   }
+}
-- 
2.12.3

[RFC PATCH v3 03/13] bootsplash: Flush framebuffer after drawing

2018-01-17 Thread Max Staudt

Framebuffers with deferred I/O need to be flushed to the screen
explicitly, since we use neither the mmap nor the file I/O abstractions
that handle this for userspace FB clients.

Example: xenfb

Some framebuffer drivers implement lazy access to the screen without
actually exposing a fbdefio interface - we also match some known ones,
currently:
 - ast
 - cirrus
 - mgag200

Signed-off-by: Max Staudt 
Reviewed-by: Oliver Neukum 
---
 drivers/video/fbdev/core/bootsplash.c  |  2 ++
 drivers/video/fbdev/core/bootsplash_internal.h |  1 +
 drivers/video/fbdev/core/bootsplash_render.c   | 33 ++
 3 files changed, 36 insertions(+)

diff --git a/drivers/video/fbdev/core/bootsplash.c 
b/drivers/video/fbdev/core/bootsplash.c
index 843c5400fefc..815b007f81ca 100644
--- a/drivers/video/fbdev/core/bootsplash.c
+++ b/drivers/video/fbdev/core/bootsplash.c
@@ -112,6 +112,8 @@ void bootsplash_render_full(struct fb_info *info)
 
bootsplash_do_render_pictures(info, splash_state.file);
 
+   bootsplash_do_render_flush(info);
+
 out:
mutex_unlock(_state.data_lock);
 }
diff --git a/drivers/video/fbdev/core/bootsplash_internal.h 
b/drivers/video/fbdev/core/bootsplash_internal.h
index 71e2a27ac0b8..0acb383aa4e3 100644
--- a/drivers/video/fbdev/core/bootsplash_internal.h
+++ b/drivers/video/fbdev/core/bootsplash_internal.h
@@ -89,6 +89,7 @@ void bootsplash_do_render_background(struct fb_info *info,
 const struct splash_file_priv *fp);
 void bootsplash_do_render_pictures(struct fb_info *info,
   const struct splash_file_priv *fp);
+void bootsplash_do_render_flush(struct fb_info *info);
 
 
 void bootsplash_free_file(struct splash_file_priv *fp);
diff --git a/drivers/video/fbdev/core/bootsplash_render.c 
b/drivers/video/fbdev/core/bootsplash_render.c
index 2ae36949d0e3..8c09c306ff67 100644
--- a/drivers/video/fbdev/core/bootsplash_render.c
+++ b/drivers/video/fbdev/core/bootsplash_render.c
@@ -186,3 +186,36 @@ void bootsplash_do_render_pictures(struct fb_info *info,
pp->pic_header->width, pp->pic_header->height);
}
 }
+
+
+void bootsplash_do_render_flush(struct fb_info *info)
+{
+   /*
+* FB drivers using deferred_io (such as Xen) need to sync the
+* screen after modifying its contents. When the FB is mmap()ed
+* from userspace, this happens via a dirty pages callback, but
+* when modifying the FB from the kernel, there is no such thing.
+*
+* So let's issue a fake fb_copyarea (copying the FB onto itself)
+* to trick the FB driver into syncing the screen.
+*
+* A few DRM drivers' FB implementations are broken by not using
+* deferred_io when they really should - we match on the known
+* bad ones manually for now.
+*/
+   if (info->fbdefio
+   || !strcmp(info->fix.id, "astdrmfb")
+   || !strcmp(info->fix.id, "cirrusdrmfb")
+   || !strcmp(info->fix.id, "mgadrmfb")) {
+   struct fb_copyarea area;
+
+   area.dx = 0;
+   area.dy = 0;
+   area.width = info->var.xres;
+   area.height = info->var.yres;
+   area.sx = 0;
+   area.sy = 0;
+
+   info->fbops->fb_copyarea(info, );
+   }
+}
-- 
2.12.3

[RFC PATCH v3 12/13] tools/bootsplash: Add a basic splash file creation tool

2018-01-17 Thread Max Staudt

Signed-off-by: Max Staudt 
---
 MAINTAINERS  |   1 +
 tools/bootsplash/.gitignore  |   1 +
 tools/bootsplash/Makefile|   9 +
 tools/bootsplash/bootsplash-packer.c | 471 +++
 4 files changed, 482 insertions(+)
 create mode 100644 tools/bootsplash/.gitignore
 create mode 100644 tools/bootsplash/Makefile
 create mode 100644 tools/bootsplash/bootsplash-packer.c

diff --git a/MAINTAINERS b/MAINTAINERS
index 7ffac272434e..ddff07cd794c 100644
--- a/MAINTAINERS
+++ b/MAINTAINERS
@@ -2715,6 +2715,7 @@ F:drivers/video/fbdev/core/bootsplash*.*
 F: drivers/video/fbdev/core/dummycon.c
 F: include/linux/bootsplash.h
 F: include/uapi/linux/bootsplash_file.h
+F: tools/bootsplash/*
 
 BPF (Safe dynamic programs and tools)
 M: Alexei Starovoitov 
diff --git a/tools/bootsplash/.gitignore b/tools/bootsplash/.gitignore
new file mode 100644
index ..091b99a17567
--- /dev/null
+++ b/tools/bootsplash/.gitignore
@@ -0,0 +1 @@
+bootsplash-packer
diff --git a/tools/bootsplash/Makefile b/tools/bootsplash/Makefile
new file mode 100644
index ..0ad8e8a84942
--- /dev/null
+++ b/tools/bootsplash/Makefile
@@ -0,0 +1,9 @@
+CC := $(CROSS_COMPILE)gcc
+CFLAGS := -I../../usr/include
+
+PROGS := bootsplash-packer
+
+all: $(PROGS)
+
+clean:
+   rm -fr $(PROGS)
diff --git a/tools/bootsplash/bootsplash-packer.c 
b/tools/bootsplash/bootsplash-packer.c
new file mode 100644
index ..ffb6a8b69885
--- /dev/null
+++ b/tools/bootsplash/bootsplash-packer.c
@@ -0,0 +1,471 @@
+/*
+ * Kernel based bootsplash.
+ *
+ * (Splash file packer tool)
+ *
+ * Authors:
+ * Max Staudt 
+ *
+ * SPDX-License-Identifier: GPL-2.0
+ */
+
+#include 
+#include 
+#include 
+#include 
+#include 
+#include 
+
+#include 
+
+
+static void print_help(char *progname)
+{
+   printf("Usage: %s [OPTIONS] outfile\n", progname);
+   printf("\n"
+  "Options, executed in order given:\n"
+  "  -h, --help   Print this help message\n"
+  "\n"
+  "  --bg_red Background color (red part)\n"
+  "  --bg_green   Background color (green part)\n"
+  "  --bg_blueBackground color (blue part)\n"
+  "  --bg_reserved(do not use)\n"
+  "  --frame_ms  Minimum milliseconds between 
animation steps\n"
+  "\n"
+  "  --pictureStart describing the next 
picture\n"
+  "  --pic_width Picture width in pixels\n"
+  "  --pic_heightPicture height in pixels\n"
+  "  --pic_position  Coarse picture placement:\n"
+  "  0x00 - Top left\n"
+  "  0x01 - Top\n"
+  "  0x02 - Top right\n"
+  "  0x03 - Right\n"
+  "  0x04 - Bottom right\n"
+  "  0x05 - Bottom\n"
+  "  0x06 - Bottom left\n"
+  "  0x07 - Left\n"
+  "\n"
+  "Flags:\n"
+  " 0x10 - Calculate offset from 
corner towards center,\n"
+  " rather than from 
center towards corner\n"
+  "  --pic_position_offset   Distance from base position in 
pixels\n"
+  "  --pic_anim_type  Animation type:\n"
+  " 0 - None\n"
+  " 1 - Forward loop\n"
+  "  --pic_anim_loop  Loop point for animation\n"
+  "\n"
+  "  --blob Include next data stream\n"
+  "  --blob_type Type of data\n"
+  "  --blob_picture_idPicture to associate this blob 
with, starting at 0\n"
+  " (default: number of last 
--picture)\n"
+  "\n");
+   printf("This tool will write %s files.\n\n",
+#if __BYTE_ORDER == __BIG_ENDIAN
+  "Big Endian (BE)");
+#elif __BYTE_ORDER == __LITTLE_ENDIAN
+  "Little Endian (LE)");
+#else
+#error
+#endif
+}
+
+
+struct blob_entry {
+   struct blob_entry *next;
+
+   char *fn;
+
+   struct splash_blob_header header;
+};
+
+
+static void dump_file_header(struct splash_file_header *h)
+{
+   printf(" --- File header ---\n");
+   printf("\n");
+   printf("  version: %5u\n", h->version);
+   printf("\n");
+   printf("  bg_red:  %5u\n", h->bg_red);
+   printf("  bg_green:

[RFC PATCH v3 12/13] tools/bootsplash: Add a basic splash file creation tool

2018-01-17 Thread Max Staudt

Signed-off-by: Max Staudt 
---
 MAINTAINERS  |   1 +
 tools/bootsplash/.gitignore  |   1 +
 tools/bootsplash/Makefile|   9 +
 tools/bootsplash/bootsplash-packer.c | 471 +++
 4 files changed, 482 insertions(+)
 create mode 100644 tools/bootsplash/.gitignore
 create mode 100644 tools/bootsplash/Makefile
 create mode 100644 tools/bootsplash/bootsplash-packer.c

diff --git a/MAINTAINERS b/MAINTAINERS
index 7ffac272434e..ddff07cd794c 100644
--- a/MAINTAINERS
+++ b/MAINTAINERS
@@ -2715,6 +2715,7 @@ F:drivers/video/fbdev/core/bootsplash*.*
 F: drivers/video/fbdev/core/dummycon.c
 F: include/linux/bootsplash.h
 F: include/uapi/linux/bootsplash_file.h
+F: tools/bootsplash/*
 
 BPF (Safe dynamic programs and tools)
 M: Alexei Starovoitov 
diff --git a/tools/bootsplash/.gitignore b/tools/bootsplash/.gitignore
new file mode 100644
index ..091b99a17567
--- /dev/null
+++ b/tools/bootsplash/.gitignore
@@ -0,0 +1 @@
+bootsplash-packer
diff --git a/tools/bootsplash/Makefile b/tools/bootsplash/Makefile
new file mode 100644
index ..0ad8e8a84942
--- /dev/null
+++ b/tools/bootsplash/Makefile
@@ -0,0 +1,9 @@
+CC := $(CROSS_COMPILE)gcc
+CFLAGS := -I../../usr/include
+
+PROGS := bootsplash-packer
+
+all: $(PROGS)
+
+clean:
+   rm -fr $(PROGS)
diff --git a/tools/bootsplash/bootsplash-packer.c 
b/tools/bootsplash/bootsplash-packer.c
new file mode 100644
index ..ffb6a8b69885
--- /dev/null
+++ b/tools/bootsplash/bootsplash-packer.c
@@ -0,0 +1,471 @@
+/*
+ * Kernel based bootsplash.
+ *
+ * (Splash file packer tool)
+ *
+ * Authors:
+ * Max Staudt 
+ *
+ * SPDX-License-Identifier: GPL-2.0
+ */
+
+#include 
+#include 
+#include 
+#include 
+#include 
+#include 
+
+#include 
+
+
+static void print_help(char *progname)
+{
+   printf("Usage: %s [OPTIONS] outfile\n", progname);
+   printf("\n"
+  "Options, executed in order given:\n"
+  "  -h, --help   Print this help message\n"
+  "\n"
+  "  --bg_red Background color (red part)\n"
+  "  --bg_green   Background color (green part)\n"
+  "  --bg_blueBackground color (blue part)\n"
+  "  --bg_reserved(do not use)\n"
+  "  --frame_ms  Minimum milliseconds between 
animation steps\n"
+  "\n"
+  "  --pictureStart describing the next 
picture\n"
+  "  --pic_width Picture width in pixels\n"
+  "  --pic_heightPicture height in pixels\n"
+  "  --pic_position  Coarse picture placement:\n"
+  "  0x00 - Top left\n"
+  "  0x01 - Top\n"
+  "  0x02 - Top right\n"
+  "  0x03 - Right\n"
+  "  0x04 - Bottom right\n"
+  "  0x05 - Bottom\n"
+  "  0x06 - Bottom left\n"
+  "  0x07 - Left\n"
+  "\n"
+  "Flags:\n"
+  " 0x10 - Calculate offset from 
corner towards center,\n"
+  " rather than from 
center towards corner\n"
+  "  --pic_position_offset   Distance from base position in 
pixels\n"
+  "  --pic_anim_type  Animation type:\n"
+  " 0 - None\n"
+  " 1 - Forward loop\n"
+  "  --pic_anim_loop  Loop point for animation\n"
+  "\n"
+  "  --blob Include next data stream\n"
+  "  --blob_type Type of data\n"
+  "  --blob_picture_idPicture to associate this blob 
with, starting at 0\n"
+  " (default: number of last 
--picture)\n"
+  "\n");
+   printf("This tool will write %s files.\n\n",
+#if __BYTE_ORDER == __BIG_ENDIAN
+  "Big Endian (BE)");
+#elif __BYTE_ORDER == __LITTLE_ENDIAN
+  "Little Endian (LE)");
+#else
+#error
+#endif
+}
+
+
+struct blob_entry {
+   struct blob_entry *next;
+
+   char *fn;
+
+   struct splash_blob_header header;
+};
+
+
+static void dump_file_header(struct splash_file_header *h)
+{
+   printf(" --- File header ---\n");
+   printf("\n");
+   printf("  version: %5u\n", h->version);
+   printf("\n");
+   printf("  bg_red:  %5u\n", h->bg_red);
+   printf("  bg_green:%5u\n", h->bg_green);
+   printf("  bg_blue:

[RFC PATCH v3 00/13] Kernel based bootsplash

2018-01-17 Thread Max Staudt

Dear fbdev/fbcon/dri developers,

Thanks for all the valuable feedback.

I've looked into the suggestions you made, and found that it doesn't
currently make sense to continue working on the splash code, given the
low practical interest I've received on LKML. The code is, and always
has been, intended primarily as a study of what can be done, and at
this point it has fulfilled this requirement.

Please find attached my latest version of the patchset in which I've
clarified the documentation a bit, as well as added a FAQ and To-Do
section for anyone wishing to pick up the code.

The code is still based on v4.14-rc5, sorry about that.

In particular, I hope to have clarified in the FAQ why I'm building on
top of fbdev and fbcon, as I think I haven't made myself clear enough
in the previous discussion. If you still think that my reasoning is
wrong, I'd be thankful for pointers towards a better solution.


I'll be happy to rebase it and continue to work on it if interest
arises.


This project has been a valuable experience - so huge thanks to everyone
involved in any way, from user feedback over code reviews and all the way
to architectural discussion.


Max

[RFC PATCH v3 00/13] Kernel based bootsplash

2018-01-17 Thread Max Staudt

Dear fbdev/fbcon/dri developers,

Thanks for all the valuable feedback.

I've looked into the suggestions you made, and found that it doesn't
currently make sense to continue working on the splash code, given the
low practical interest I've received on LKML. The code is, and always
has been, intended primarily as a study of what can be done, and at
this point it has fulfilled this requirement.

Please find attached my latest version of the patchset in which I've
clarified the documentation a bit, as well as added a FAQ and To-Do
section for anyone wishing to pick up the code.

The code is still based on v4.14-rc5, sorry about that.

In particular, I hope to have clarified in the FAQ why I'm building on
top of fbdev and fbcon, as I think I haven't made myself clear enough
in the previous discussion. If you still think that my reasoning is
wrong, I'd be thankful for pointers towards a better solution.


I'll be happy to rebase it and continue to work on it if interest
arises.


This project has been a valuable experience - so huge thanks to everyone
involved in any way, from user feedback over code reviews and all the way
to architectural discussion.


Max

[RFC PATCH v3 08/13] sysrq: Disable bootsplash on SAK

2018-01-17 Thread Max Staudt

When the user requests a clean TTY via the SAK SysRq, that means he
really wants to use the console.

Let's disable the bootsplash, even if the request is not on a VT, as
the user probably knows what he's doing and it's more helpful to get
out of his way.

Signed-off-by: Max Staudt 
Reviewed-by: Oliver Neukum 
---
 drivers/tty/sysrq.c | 3 +++
 1 file changed, 3 insertions(+)

diff --git a/drivers/tty/sysrq.c b/drivers/tty/sysrq.c
index 3ffc1ce29023..bc6a24c9dfa8 100644
--- a/drivers/tty/sysrq.c
+++ b/drivers/tty/sysrq.c
@@ -49,6 +49,7 @@
 #include 
 #include 
 #include 
+#include 
 
 #include 
 #include 
@@ -104,6 +105,8 @@ static void sysrq_handle_SAK(int key)
 {
struct work_struct *SAK_work = _cons[fg_console].SAK_work;
schedule_work(SAK_work);
+
+   bootsplash_disable();
 }
 static struct sysrq_key_op sysrq_SAK_op = {
.handler= sysrq_handle_SAK,
-- 
2.12.3

[RFC PATCH v3 08/13] sysrq: Disable bootsplash on SAK

2018-01-17 Thread Max Staudt

When the user requests a clean TTY via the SAK SysRq, that means he
really wants to use the console.

Let's disable the bootsplash, even if the request is not on a VT, as
the user probably knows what he's doing and it's more helpful to get
out of his way.

Signed-off-by: Max Staudt 
Reviewed-by: Oliver Neukum 
---
 drivers/tty/sysrq.c | 3 +++
 1 file changed, 3 insertions(+)

diff --git a/drivers/tty/sysrq.c b/drivers/tty/sysrq.c
index 3ffc1ce29023..bc6a24c9dfa8 100644
--- a/drivers/tty/sysrq.c
+++ b/drivers/tty/sysrq.c
@@ -49,6 +49,7 @@
 #include 
 #include 
 #include 
+#include 
 
 #include 
 #include 
@@ -104,6 +105,8 @@ static void sysrq_handle_SAK(int key)
 {
struct work_struct *SAK_work = _cons[fg_console].SAK_work;
schedule_work(SAK_work);
+
+   bootsplash_disable();
 }
 static struct sysrq_key_op sysrq_SAK_op = {
.handler= sysrq_handle_SAK,
-- 
2.12.3

[PATCH v6 36/99] mm: Convert page migration to XArray

2018-01-17 Thread Matthew Wilcox

From: Matthew Wilcox 

Signed-off-by: Matthew Wilcox 
---
 mm/migrate.c | 41 -
 1 file changed, 16 insertions(+), 25 deletions(-)

diff --git a/mm/migrate.c b/mm/migrate.c
index 75d19904dd9a..7122fec9b075 100644
--- a/mm/migrate.c
+++ b/mm/migrate.c
@@ -322,7 +322,7 @@ void __migration_entry_wait(struct mm_struct *mm, pte_t 
*ptep,
page = migration_entry_to_page(entry);
 
/*
-* Once radix-tree replacement of page migration started, page_count
+* Once page cache replacement of page migration started, page_count
 * *must* be zero. And, we don't want to call wait_on_page_locked()
 * against a page without get_page().
 * So, we use get_page_unless_zero(), here. Even failed, page fault
@@ -437,10 +437,10 @@ int migrate_page_move_mapping(struct address_space 
*mapping,
struct buffer_head *head, enum migrate_mode mode,
int extra_count)
 {
+   XA_STATE(xas, >pages, page_index(page));
struct zone *oldzone, *newzone;
int dirty;
int expected_count = 1 + extra_count;
-   void **pslot;
 
/*
 * Device public or private pages have an extra refcount as they are
@@ -466,21 +466,16 @@ int migrate_page_move_mapping(struct address_space 
*mapping,
oldzone = page_zone(page);
newzone = page_zone(newpage);
 
-   xa_lock_irq(>pages);
-
-   pslot = radix_tree_lookup_slot(>pages,
-   page_index(page));
+   xas_lock_irq();
 
expected_count += 1 + page_has_private(page);
-   if (page_count(page) != expected_count ||
-   radix_tree_deref_slot_protected(pslot,
-   >pages.xa_lock) != page) {
-   xa_unlock_irq(>pages);
+   if (page_count(page) != expected_count || xas_load() != page) {
+   xas_unlock_irq();
return -EAGAIN;
}
 
if (!page_ref_freeze(page, expected_count)) {
-   xa_unlock_irq(>pages);
+   xas_unlock_irq();
return -EAGAIN;
}
 
@@ -494,7 +489,7 @@ int migrate_page_move_mapping(struct address_space *mapping,
if (mode == MIGRATE_ASYNC && head &&
!buffer_migrate_lock_buffers(head, mode)) {
page_ref_unfreeze(page, expected_count);
-   xa_unlock_irq(>pages);
+   xas_unlock_irq();
return -EAGAIN;
}
 
@@ -522,7 +517,7 @@ int migrate_page_move_mapping(struct address_space *mapping,
SetPageDirty(newpage);
}
 
-   radix_tree_replace_slot(>pages, pslot, newpage);
+   xas_store(, newpage);
 
/*
 * Drop cache reference from old page by unfreezing
@@ -531,7 +526,7 @@ int migrate_page_move_mapping(struct address_space *mapping,
 */
page_ref_unfreeze(page, expected_count - 1);
 
-   xa_unlock(>pages);
+   xas_unlock();
/* Leave irq disabled to prevent preemption while updating stats */
 
/*
@@ -571,22 +566,18 @@ EXPORT_SYMBOL(migrate_page_move_mapping);
 int migrate_huge_page_move_mapping(struct address_space *mapping,
   struct page *newpage, struct page *page)
 {
+   XA_STATE(xas, >pages, page_index(page));
int expected_count;
-   void **pslot;
-
-   xa_lock_irq(>pages);
-
-   pslot = radix_tree_lookup_slot(>pages, page_index(page));
 
+   xas_lock_irq();
expected_count = 2 + page_has_private(page);
-   if (page_count(page) != expected_count ||
-   radix_tree_deref_slot_protected(pslot, >pages.xa_lock) 
!= page) {
-   xa_unlock_irq(>pages);
+   if (page_count(page) != expected_count || xas_load() != page) {
+   xas_unlock_irq();
return -EAGAIN;
}
 
if (!page_ref_freeze(page, expected_count)) {
-   xa_unlock_irq(>pages);
+   xas_unlock_irq();
return -EAGAIN;
}
 
@@ -595,11 +586,11 @@ int migrate_huge_page_move_mapping(struct address_space 
*mapping,
 
get_page(newpage);
 
-   radix_tree_replace_slot(>pages, pslot, newpage);
+   xas_store(, newpage);
 
page_ref_unfreeze(page, expected_count - 1);
 
-   xa_unlock_irq(>pages);
+   xas_unlock_irq();
 
return MIGRATEPAGE_SUCCESS;
 }
-- 
2.15.1

Re: [PATCH v2 2/2] dts: Probe efuse for CI20

2018-01-17 Thread James Hogan

On Thu, Dec 28, 2017 at 10:29:53PM +0100, Mathieu Malaterre wrote:
> MIPS Creator CI20 comes with JZ4780 SoC. Provides access to the efuse block
> using jz4780 efuse driver.
> 
> Signed-off-by: Mathieu Malaterre 
> ---
>  arch/mips/configs/ci20_defconfig | 2 ++
>  1 file changed, 2 insertions(+)
> 
> diff --git a/arch/mips/configs/ci20_defconfig 
> b/arch/mips/configs/ci20_defconfig
> index b5f4ad8f2c45..62c63617e97a 100644
> --- a/arch/mips/configs/ci20_defconfig
> +++ b/arch/mips/configs/ci20_defconfig
> @@ -171,3 +171,5 @@ CONFIG_STACKTRACE=y
>  # CONFIG_FTRACE is not set
>  CONFIG_CMDLINE_BOOL=y
>  CONFIG_CMDLINE="earlycon console=ttyS4,115200 clk_ignore_unused"
> +CONFIG_NVMEM=y
> +CONFIG_JZ4780_EFUSE=y

NVMEM is already implied by RTC_CLASS (which turns on RTC_NVMEM by
default).

I would suggest loading the defconfig:
make ARCH=mips ci20_defconfig

Then enabling the extra configuration options you need, then create a
new minimal defconfig:
make ARCH=mips savedefconfig

Then look at the new file called "defconfig" or copy it over
arch/mips/configs/ci20_defconfig, and see what changes it adds. That way
you'll get the minimum you need and in the right order in the defconfig.

Don't feel like you have to submit other random changes due to config
changes since the defconfig was added, but if you do please do it as a
separate patch to bring the defconfig up to date (i.e. just load
defconfig and save it) before the patch which actually enables the
EFUSE.

Cheers
James

> -- 
> 2.11.0
> 
> 

signature.asc
Description: Digital signature

[PATCH v6 36/99] mm: Convert page migration to XArray

2018-01-17 Thread Matthew Wilcox

From: Matthew Wilcox 

Signed-off-by: Matthew Wilcox 
---
 mm/migrate.c | 41 -
 1 file changed, 16 insertions(+), 25 deletions(-)

diff --git a/mm/migrate.c b/mm/migrate.c
index 75d19904dd9a..7122fec9b075 100644
--- a/mm/migrate.c
+++ b/mm/migrate.c
@@ -322,7 +322,7 @@ void __migration_entry_wait(struct mm_struct *mm, pte_t 
*ptep,
page = migration_entry_to_page(entry);
 
/*
-* Once radix-tree replacement of page migration started, page_count
+* Once page cache replacement of page migration started, page_count
 * *must* be zero. And, we don't want to call wait_on_page_locked()
 * against a page without get_page().
 * So, we use get_page_unless_zero(), here. Even failed, page fault
@@ -437,10 +437,10 @@ int migrate_page_move_mapping(struct address_space 
*mapping,
struct buffer_head *head, enum migrate_mode mode,
int extra_count)
 {
+   XA_STATE(xas, >pages, page_index(page));
struct zone *oldzone, *newzone;
int dirty;
int expected_count = 1 + extra_count;
-   void **pslot;
 
/*
 * Device public or private pages have an extra refcount as they are
@@ -466,21 +466,16 @@ int migrate_page_move_mapping(struct address_space 
*mapping,
oldzone = page_zone(page);
newzone = page_zone(newpage);
 
-   xa_lock_irq(>pages);
-
-   pslot = radix_tree_lookup_slot(>pages,
-   page_index(page));
+   xas_lock_irq();
 
expected_count += 1 + page_has_private(page);
-   if (page_count(page) != expected_count ||
-   radix_tree_deref_slot_protected(pslot,
-   >pages.xa_lock) != page) {
-   xa_unlock_irq(>pages);
+   if (page_count(page) != expected_count || xas_load() != page) {
+   xas_unlock_irq();
return -EAGAIN;
}
 
if (!page_ref_freeze(page, expected_count)) {
-   xa_unlock_irq(>pages);
+   xas_unlock_irq();
return -EAGAIN;
}
 
@@ -494,7 +489,7 @@ int migrate_page_move_mapping(struct address_space *mapping,
if (mode == MIGRATE_ASYNC && head &&
!buffer_migrate_lock_buffers(head, mode)) {
page_ref_unfreeze(page, expected_count);
-   xa_unlock_irq(>pages);
+   xas_unlock_irq();
return -EAGAIN;
}
 
@@ -522,7 +517,7 @@ int migrate_page_move_mapping(struct address_space *mapping,
SetPageDirty(newpage);
}
 
-   radix_tree_replace_slot(>pages, pslot, newpage);
+   xas_store(, newpage);
 
/*
 * Drop cache reference from old page by unfreezing
@@ -531,7 +526,7 @@ int migrate_page_move_mapping(struct address_space *mapping,
 */
page_ref_unfreeze(page, expected_count - 1);
 
-   xa_unlock(>pages);
+   xas_unlock();
/* Leave irq disabled to prevent preemption while updating stats */
 
/*
@@ -571,22 +566,18 @@ EXPORT_SYMBOL(migrate_page_move_mapping);
 int migrate_huge_page_move_mapping(struct address_space *mapping,
   struct page *newpage, struct page *page)
 {
+   XA_STATE(xas, >pages, page_index(page));
int expected_count;
-   void **pslot;
-
-   xa_lock_irq(>pages);
-
-   pslot = radix_tree_lookup_slot(>pages, page_index(page));
 
+   xas_lock_irq();
expected_count = 2 + page_has_private(page);
-   if (page_count(page) != expected_count ||
-   radix_tree_deref_slot_protected(pslot, >pages.xa_lock) 
!= page) {
-   xa_unlock_irq(>pages);
+   if (page_count(page) != expected_count || xas_load() != page) {
+   xas_unlock_irq();
return -EAGAIN;
}
 
if (!page_ref_freeze(page, expected_count)) {
-   xa_unlock_irq(>pages);
+   xas_unlock_irq();
return -EAGAIN;
}
 
@@ -595,11 +586,11 @@ int migrate_huge_page_move_mapping(struct address_space 
*mapping,
 
get_page(newpage);
 
-   radix_tree_replace_slot(>pages, pslot, newpage);
+   xas_store(, newpage);
 
page_ref_unfreeze(page, expected_count - 1);
 
-   xa_unlock_irq(>pages);
+   xas_unlock_irq();
 
return MIGRATEPAGE_SUCCESS;
 }
-- 
2.15.1

Re: [PATCH v2 2/2] dts: Probe efuse for CI20

2018-01-17 Thread James Hogan

On Thu, Dec 28, 2017 at 10:29:53PM +0100, Mathieu Malaterre wrote:
> MIPS Creator CI20 comes with JZ4780 SoC. Provides access to the efuse block
> using jz4780 efuse driver.
> 
> Signed-off-by: Mathieu Malaterre 
> ---
>  arch/mips/configs/ci20_defconfig | 2 ++
>  1 file changed, 2 insertions(+)
> 
> diff --git a/arch/mips/configs/ci20_defconfig 
> b/arch/mips/configs/ci20_defconfig
> index b5f4ad8f2c45..62c63617e97a 100644
> --- a/arch/mips/configs/ci20_defconfig
> +++ b/arch/mips/configs/ci20_defconfig
> @@ -171,3 +171,5 @@ CONFIG_STACKTRACE=y
>  # CONFIG_FTRACE is not set
>  CONFIG_CMDLINE_BOOL=y
>  CONFIG_CMDLINE="earlycon console=ttyS4,115200 clk_ignore_unused"
> +CONFIG_NVMEM=y
> +CONFIG_JZ4780_EFUSE=y

NVMEM is already implied by RTC_CLASS (which turns on RTC_NVMEM by
default).

I would suggest loading the defconfig:
make ARCH=mips ci20_defconfig

Then enabling the extra configuration options you need, then create a
new minimal defconfig:
make ARCH=mips savedefconfig

Then look at the new file called "defconfig" or copy it over
arch/mips/configs/ci20_defconfig, and see what changes it adds. That way
you'll get the minimum you need and in the right order in the defconfig.

Don't feel like you have to submit other random changes due to config
changes since the defconfig was added, but if you do please do it as a
separate patch to bring the defconfig up to date (i.e. just load
defconfig and save it) before the patch which actually enables the
EFUSE.

Cheers
James

> -- 
> 2.11.0
> 
> 

signature.asc
Description: Digital signature

[PATCH v6 35/99] mm: Convert __do_page_cache_readahead to XArray

2018-01-17 Thread Matthew Wilcox

From: Matthew Wilcox 

This one is trivial.

Signed-off-by: Matthew Wilcox 
---
 mm/readahead.c | 4 +---
 1 file changed, 1 insertion(+), 3 deletions(-)

diff --git a/mm/readahead.c b/mm/readahead.c
index f64b31b3a84a..66bcaffd47f0 100644
--- a/mm/readahead.c
+++ b/mm/readahead.c
@@ -174,9 +174,7 @@ int __do_page_cache_readahead(struct address_space 
*mapping, struct file *filp,
if (page_offset > end_index)
break;
 
-   rcu_read_lock();
-   page = radix_tree_lookup(>pages, page_offset);
-   rcu_read_unlock();
+   page = xa_load(>pages, page_offset);
if (page && !xa_is_value(page))
continue;
 
-- 
2.15.1

[PATCH v6 35/99] mm: Convert __do_page_cache_readahead to XArray

2018-01-17 Thread Matthew Wilcox

From: Matthew Wilcox 

This one is trivial.

Signed-off-by: Matthew Wilcox 
---
 mm/readahead.c | 4 +---
 1 file changed, 1 insertion(+), 3 deletions(-)

diff --git a/mm/readahead.c b/mm/readahead.c
index f64b31b3a84a..66bcaffd47f0 100644
--- a/mm/readahead.c
+++ b/mm/readahead.c
@@ -174,9 +174,7 @@ int __do_page_cache_readahead(struct address_space 
*mapping, struct file *filp,
if (page_offset > end_index)
break;
 
-   rcu_read_lock();
-   page = radix_tree_lookup(>pages, page_offset);
-   rcu_read_unlock();
+   page = xa_load(>pages, page_offset);
if (page && !xa_is_value(page))
continue;
 
-- 
2.15.1

[PATCH v6 39/99] mm: Convert khugepaged_scan_shmem to XArray

2018-01-17 Thread Matthew Wilcox

From: Matthew Wilcox 

Slightly shorter and easier to read code.

Signed-off-by: Matthew Wilcox 
---
 mm/khugepaged.c | 17 +
 1 file changed, 5 insertions(+), 12 deletions(-)

diff --git a/mm/khugepaged.c b/mm/khugepaged.c
index 9f49d0cd61c2..15f1b2d81a69 100644
--- a/mm/khugepaged.c
+++ b/mm/khugepaged.c
@@ -1534,8 +1534,7 @@ static void khugepaged_scan_shmem(struct mm_struct *mm,
pgoff_t start, struct page **hpage)
 {
struct page *page = NULL;
-   struct radix_tree_iter iter;
-   void **slot;
+   XA_STATE(xas, >pages, start);
int present, swap;
int node = NUMA_NO_NODE;
int result = SCAN_SUCCEED;
@@ -1544,17 +1543,11 @@ static void khugepaged_scan_shmem(struct mm_struct *mm,
swap = 0;
memset(khugepaged_node_load, 0, sizeof(khugepaged_node_load));
rcu_read_lock();
-   radix_tree_for_each_slot(slot, >pages, , start) {
-   if (iter.index >= start + HPAGE_PMD_NR)
-   break;
-
-   page = radix_tree_deref_slot(slot);
-   if (radix_tree_deref_retry(page)) {
-   slot = radix_tree_iter_retry();
+   xas_for_each(, page, start + HPAGE_PMD_NR - 1) {
+   if (xas_retry(, page))
continue;
-   }
 
-   if (radix_tree_exception(page)) {
+   if (xa_is_value(page)) {
if (++swap > khugepaged_max_ptes_swap) {
result = SCAN_EXCEED_SWAP_PTE;
break;
@@ -1593,7 +1586,7 @@ static void khugepaged_scan_shmem(struct mm_struct *mm,
present++;
 
if (need_resched()) {
-   slot = radix_tree_iter_resume(slot, );
+   xas_pause();
cond_resched_rcu();
}
}
-- 
2.15.1

[PATCH v6 39/99] mm: Convert khugepaged_scan_shmem to XArray

2018-01-17 Thread Matthew Wilcox

From: Matthew Wilcox 

Slightly shorter and easier to read code.

Signed-off-by: Matthew Wilcox 
---
 mm/khugepaged.c | 17 +
 1 file changed, 5 insertions(+), 12 deletions(-)

diff --git a/mm/khugepaged.c b/mm/khugepaged.c
index 9f49d0cd61c2..15f1b2d81a69 100644
--- a/mm/khugepaged.c
+++ b/mm/khugepaged.c
@@ -1534,8 +1534,7 @@ static void khugepaged_scan_shmem(struct mm_struct *mm,
pgoff_t start, struct page **hpage)
 {
struct page *page = NULL;
-   struct radix_tree_iter iter;
-   void **slot;
+   XA_STATE(xas, >pages, start);
int present, swap;
int node = NUMA_NO_NODE;
int result = SCAN_SUCCEED;
@@ -1544,17 +1543,11 @@ static void khugepaged_scan_shmem(struct mm_struct *mm,
swap = 0;
memset(khugepaged_node_load, 0, sizeof(khugepaged_node_load));
rcu_read_lock();
-   radix_tree_for_each_slot(slot, >pages, , start) {
-   if (iter.index >= start + HPAGE_PMD_NR)
-   break;
-
-   page = radix_tree_deref_slot(slot);
-   if (radix_tree_deref_retry(page)) {
-   slot = radix_tree_iter_retry();
+   xas_for_each(, page, start + HPAGE_PMD_NR - 1) {
+   if (xas_retry(, page))
continue;
-   }
 
-   if (radix_tree_exception(page)) {
+   if (xa_is_value(page)) {
if (++swap > khugepaged_max_ptes_swap) {
result = SCAN_EXCEED_SWAP_PTE;
break;
@@ -1593,7 +1586,7 @@ static void khugepaged_scan_shmem(struct mm_struct *mm,
present++;
 
if (need_resched()) {
-   slot = radix_tree_iter_resume(slot, );
+   xas_pause();
cond_resched_rcu();
}
}
-- 
2.15.1

[PATCH v6 38/99] mm: Convert collapse_shmem to XArray

2018-01-17 Thread Matthew Wilcox

From: Matthew Wilcox 

I found another victim of the radix tree being hard to use.  Because
there was no call to radix_tree_preload(), khugepaged was allocating
radix_tree_nodes using GFP_ATOMIC.

I also converted a local_irq_save()/restore() pair to
disable()/enable().

Signed-off-by: Matthew Wilcox 
---
 mm/khugepaged.c | 158 +++-
 1 file changed, 65 insertions(+), 93 deletions(-)

diff --git a/mm/khugepaged.c b/mm/khugepaged.c
index 55ade70c33bb..9f49d0cd61c2 100644
--- a/mm/khugepaged.c
+++ b/mm/khugepaged.c
@@ -1282,17 +1282,17 @@ static void retract_page_tables(struct address_space 
*mapping, pgoff_t pgoff)
  *
  * Basic scheme is simple, details are more complex:
  *  - allocate and freeze a new huge page;
- *  - scan over radix tree replacing old pages the new one
+ *  - scan page cache replacing old pages with the new one
  *+ swap in pages if necessary;
  *+ fill in gaps;
- *+ keep old pages around in case if rollback is required;
- *  - if replacing succeed:
+ *+ keep old pages around in case rollback is required;
+ *  - if replacing succeeds:
  *+ copy data over;
  *+ free old pages;
  *+ unfreeze huge page;
  *  - if replacing failed;
  *+ put all pages back and unfreeze them;
- *+ restore gaps in the radix-tree;
+ *+ restore gaps in the page cache;
  *+ free huge page;
  */
 static void collapse_shmem(struct mm_struct *mm,
@@ -1300,12 +1300,11 @@ static void collapse_shmem(struct mm_struct *mm,
struct page **hpage, int node)
 {
gfp_t gfp;
-   struct page *page, *new_page, *tmp;
+   struct page *new_page;
struct mem_cgroup *memcg;
pgoff_t index, end = start + HPAGE_PMD_NR;
LIST_HEAD(pagelist);
-   struct radix_tree_iter iter;
-   void **slot;
+   XA_STATE(xas, >pages, start);
int nr_none = 0, result = SCAN_SUCCEED;
 
VM_BUG_ON(start & (HPAGE_PMD_NR - 1));
@@ -1330,48 +1329,48 @@ static void collapse_shmem(struct mm_struct *mm,
__SetPageLocked(new_page);
BUG_ON(!page_ref_freeze(new_page, 1));
 
-
/*
-* At this point the new_page is 'frozen' (page_count() is zero), locked
-* and not up-to-date. It's safe to insert it into radix tree, because
-* nobody would be able to map it or use it in other way until we
-* unfreeze it.
+* At this point the new_page is 'frozen' (page_count() is zero),
+* locked and not up-to-date. It's safe to insert it into the page
+* cache, because nobody would be able to map it or use it in other
+* way until we unfreeze it.
 */
 
-   index = start;
-   xa_lock_irq(>pages);
-   radix_tree_for_each_slot(slot, >pages, , start) {
-   int n = min(iter.index, end) - index;
-
-   /*
-* Handle holes in the radix tree: charge it from shmem and
-* insert relevant subpage of new_page into the radix-tree.
-*/
-   if (n && !shmem_charge(mapping->host, n)) {
-   result = SCAN_FAIL;
+   /* This will be less messy when we use multi-index entries */
+   do {
+   xas_lock_irq();
+   xas_create_range(, end - 1);
+   if (!xas_error())
break;
-   }
-   nr_none += n;
-   for (; index < min(iter.index, end); index++) {
-   radix_tree_insert(>pages, index,
-   new_page + (index % HPAGE_PMD_NR));
-   }
+   xas_unlock_irq();
+   if (!xas_nomem(, GFP_KERNEL))
+   goto out;
+   } while (1);
 
-   /* We are done. */
-   if (index >= end)
-   break;
+   for (index = start; index < end; index++) {
+   struct page *page = xas_next();
+
+   VM_BUG_ON(index != xas.xa_index);
+   if (!page) {
+   if (!shmem_charge(mapping->host, 1)) {
+   result = SCAN_FAIL;
+   break;
+   }
+   xas_store(, new_page + (index % HPAGE_PMD_NR));
+   nr_none++;
+   continue;
+   }
 
-   page = radix_tree_deref_slot_protected(slot,
-   >pages.xa_lock);
if (xa_is_value(page) || !PageUptodate(page)) {
-   xa_unlock_irq(>pages);
+   xas_unlock_irq();
/* swap in or instantiate fallocated page */
if (shmem_getpage(mapping->host, index, ,
SGP_NOHUGE)) {
result = SCAN_FAIL;
-   goto

[PATCH v6 38/99] mm: Convert collapse_shmem to XArray

2018-01-17 Thread Matthew Wilcox

From: Matthew Wilcox 

I found another victim of the radix tree being hard to use.  Because
there was no call to radix_tree_preload(), khugepaged was allocating
radix_tree_nodes using GFP_ATOMIC.

I also converted a local_irq_save()/restore() pair to
disable()/enable().

Signed-off-by: Matthew Wilcox 
---
 mm/khugepaged.c | 158 +++-
 1 file changed, 65 insertions(+), 93 deletions(-)

diff --git a/mm/khugepaged.c b/mm/khugepaged.c
index 55ade70c33bb..9f49d0cd61c2 100644
--- a/mm/khugepaged.c
+++ b/mm/khugepaged.c
@@ -1282,17 +1282,17 @@ static void retract_page_tables(struct address_space 
*mapping, pgoff_t pgoff)
  *
  * Basic scheme is simple, details are more complex:
  *  - allocate and freeze a new huge page;
- *  - scan over radix tree replacing old pages the new one
+ *  - scan page cache replacing old pages with the new one
  *+ swap in pages if necessary;
  *+ fill in gaps;
- *+ keep old pages around in case if rollback is required;
- *  - if replacing succeed:
+ *+ keep old pages around in case rollback is required;
+ *  - if replacing succeeds:
  *+ copy data over;
  *+ free old pages;
  *+ unfreeze huge page;
  *  - if replacing failed;
  *+ put all pages back and unfreeze them;
- *+ restore gaps in the radix-tree;
+ *+ restore gaps in the page cache;
  *+ free huge page;
  */
 static void collapse_shmem(struct mm_struct *mm,
@@ -1300,12 +1300,11 @@ static void collapse_shmem(struct mm_struct *mm,
struct page **hpage, int node)
 {
gfp_t gfp;
-   struct page *page, *new_page, *tmp;
+   struct page *new_page;
struct mem_cgroup *memcg;
pgoff_t index, end = start + HPAGE_PMD_NR;
LIST_HEAD(pagelist);
-   struct radix_tree_iter iter;
-   void **slot;
+   XA_STATE(xas, >pages, start);
int nr_none = 0, result = SCAN_SUCCEED;
 
VM_BUG_ON(start & (HPAGE_PMD_NR - 1));
@@ -1330,48 +1329,48 @@ static void collapse_shmem(struct mm_struct *mm,
__SetPageLocked(new_page);
BUG_ON(!page_ref_freeze(new_page, 1));
 
-
/*
-* At this point the new_page is 'frozen' (page_count() is zero), locked
-* and not up-to-date. It's safe to insert it into radix tree, because
-* nobody would be able to map it or use it in other way until we
-* unfreeze it.
+* At this point the new_page is 'frozen' (page_count() is zero),
+* locked and not up-to-date. It's safe to insert it into the page
+* cache, because nobody would be able to map it or use it in other
+* way until we unfreeze it.
 */
 
-   index = start;
-   xa_lock_irq(>pages);
-   radix_tree_for_each_slot(slot, >pages, , start) {
-   int n = min(iter.index, end) - index;
-
-   /*
-* Handle holes in the radix tree: charge it from shmem and
-* insert relevant subpage of new_page into the radix-tree.
-*/
-   if (n && !shmem_charge(mapping->host, n)) {
-   result = SCAN_FAIL;
+   /* This will be less messy when we use multi-index entries */
+   do {
+   xas_lock_irq();
+   xas_create_range(, end - 1);
+   if (!xas_error())
break;
-   }
-   nr_none += n;
-   for (; index < min(iter.index, end); index++) {
-   radix_tree_insert(>pages, index,
-   new_page + (index % HPAGE_PMD_NR));
-   }
+   xas_unlock_irq();
+   if (!xas_nomem(, GFP_KERNEL))
+   goto out;
+   } while (1);
 
-   /* We are done. */
-   if (index >= end)
-   break;
+   for (index = start; index < end; index++) {
+   struct page *page = xas_next();
+
+   VM_BUG_ON(index != xas.xa_index);
+   if (!page) {
+   if (!shmem_charge(mapping->host, 1)) {
+   result = SCAN_FAIL;
+   break;
+   }
+   xas_store(, new_page + (index % HPAGE_PMD_NR));
+   nr_none++;
+   continue;
+   }
 
-   page = radix_tree_deref_slot_protected(slot,
-   >pages.xa_lock);
if (xa_is_value(page) || !PageUptodate(page)) {
-   xa_unlock_irq(>pages);
+   xas_unlock_irq();
/* swap in or instantiate fallocated page */
if (shmem_getpage(mapping->host, index, ,
SGP_NOHUGE)) {
result = SCAN_FAIL;
-   goto tree_unlocked;
+   goto

[PATCH v6 42/99] shmem: Convert shmem_confirm_swap to XArray

2018-01-17 Thread Matthew Wilcox

From: Matthew Wilcox 

xa_load has its own RCU locking, so we can eliminate it here.

Signed-off-by: Matthew Wilcox 
---
 mm/shmem.c | 7 +--
 1 file changed, 1 insertion(+), 6 deletions(-)

diff --git a/mm/shmem.c b/mm/shmem.c
index fad6c9e7402e..654f367aca90 100644
--- a/mm/shmem.c
+++ b/mm/shmem.c
@@ -348,12 +348,7 @@ static int shmem_xa_replace(struct address_space *mapping,
 static bool shmem_confirm_swap(struct address_space *mapping,
   pgoff_t index, swp_entry_t swap)
 {
-   void *item;
-
-   rcu_read_lock();
-   item = radix_tree_lookup(>pages, index);
-   rcu_read_unlock();
-   return item == swp_to_radix_entry(swap);
+   return xa_load(>pages, index) == swp_to_radix_entry(swap);
 }
 
 /*
-- 
2.15.1

[PATCH v6 42/99] shmem: Convert shmem_confirm_swap to XArray

2018-01-17 Thread Matthew Wilcox

From: Matthew Wilcox 

xa_load has its own RCU locking, so we can eliminate it here.

Signed-off-by: Matthew Wilcox 
---
 mm/shmem.c | 7 +--
 1 file changed, 1 insertion(+), 6 deletions(-)

diff --git a/mm/shmem.c b/mm/shmem.c
index fad6c9e7402e..654f367aca90 100644
--- a/mm/shmem.c
+++ b/mm/shmem.c
@@ -348,12 +348,7 @@ static int shmem_xa_replace(struct address_space *mapping,
 static bool shmem_confirm_swap(struct address_space *mapping,
   pgoff_t index, swp_entry_t swap)
 {
-   void *item;
-
-   rcu_read_lock();
-   item = radix_tree_lookup(>pages, index);
-   rcu_read_unlock();
-   return item == swp_to_radix_entry(swap);
+   return xa_load(>pages, index) == swp_to_radix_entry(swap);
 }
 
 /*
-- 
2.15.1

linux-next: Signed-off-by missing for commit in the pci tree

2018-01-17 Thread Stephen Rothwell

Hi Bjorn,

Commit

  209930d809fa ("PCI/ASPM: Add pci_enable_link_state()")

is missing a Signed-off-by from its author and commmiter.

-- 
Cheers,
Stephen Rothwell

linux-next: Signed-off-by missing for commit in the pci tree

2018-01-17 Thread Stephen Rothwell

Hi Bjorn,

Commit

  209930d809fa ("PCI/ASPM: Add pci_enable_link_state()")

is missing a Signed-off-by from its author and commmiter.

-- 
Cheers,
Stephen Rothwell

[PATCH v6 43/99] shmem: Convert find_swap_entry to XArray

2018-01-17 Thread Matthew Wilcox

From: Matthew Wilcox 

This is a 1:1 conversion.

Signed-off-by: Matthew Wilcox 
---
 mm/shmem.c | 23 +++
 1 file changed, 11 insertions(+), 12 deletions(-)

diff --git a/mm/shmem.c b/mm/shmem.c
index 654f367aca90..ce285ae635ea 100644
--- a/mm/shmem.c
+++ b/mm/shmem.c
@@ -1076,28 +1076,27 @@ static void shmem_evict_inode(struct inode *inode)
clear_inode(inode);
 }
 
-static unsigned long find_swap_entry(struct radix_tree_root *root, void *item)
+static unsigned long find_swap_entry(struct xarray *xa, void *item)
 {
-   struct radix_tree_iter iter;
-   void **slot;
-   unsigned long found = -1;
+   XA_STATE(xas, xa, 0);
unsigned int checked = 0;
+   void *entry;
 
rcu_read_lock();
-   radix_tree_for_each_slot(slot, root, , 0) {
-   if (*slot == item) {
-   found = iter.index;
+   xas_for_each(, entry, ULONG_MAX) {
+   if (xas_retry(, entry))
+   continue;
+   if (entry == item)
break;
-   }
checked++;
-   if ((checked % 4096) != 0)
+   if ((checked % XA_CHECK_SCHED) != 0)
continue;
-   slot = radix_tree_iter_resume(slot, );
+   xas_pause();
cond_resched_rcu();
}
-
rcu_read_unlock();
-   return found;
+
+   return xas_invalid() ? -1 : xas.xa_index;
 }
 
 /*
-- 
2.15.1

[PATCH v6 40/99] pagevec: Use xa_tag_t

2018-01-17 Thread Matthew Wilcox

From: Matthew Wilcox 

Removes sparse warnings.

Signed-off-by: Matthew Wilcox 
---
 fs/btrfs/extent_io.c| 4 ++--
 fs/ext4/inode.c | 2 +-
 fs/f2fs/data.c  | 2 +-
 fs/gfs2/aops.c  | 2 +-
 include/linux/pagevec.h | 8 +---
 mm/swap.c   | 4 ++--
 6 files changed, 12 insertions(+), 10 deletions(-)

diff --git a/fs/btrfs/extent_io.c b/fs/btrfs/extent_io.c
index 22948f4febe7..4301cbf4e31f 100644
--- a/fs/btrfs/extent_io.c
+++ b/fs/btrfs/extent_io.c
@@ -3795,7 +3795,7 @@ int btree_write_cache_pages(struct address_space *mapping,
pgoff_t index;
pgoff_t end;/* Inclusive */
int scanned = 0;
-   int tag;
+   xa_tag_t tag;
 
pagevec_init();
if (wbc->range_cyclic) {
@@ -3922,7 +3922,7 @@ static int extent_write_cache_pages(struct address_space 
*mapping,
pgoff_t done_index;
int range_whole = 0;
int scanned = 0;
-   int tag;
+   xa_tag_t tag;
 
/*
 * We have to hold onto the inode so that ordered extents can do their
diff --git a/fs/ext4/inode.c b/fs/ext4/inode.c
index 534a9130f625..4b7c10853928 100644
--- a/fs/ext4/inode.c
+++ b/fs/ext4/inode.c
@@ -2614,7 +2614,7 @@ static int mpage_prepare_extent_to_map(struct 
mpage_da_data *mpd)
long left = mpd->wbc->nr_to_write;
pgoff_t index = mpd->first_page;
pgoff_t end = mpd->last_page;
-   int tag;
+   xa_tag_t tag;
int i, err = 0;
int blkbits = mpd->inode->i_blkbits;
ext4_lblk_t lblk;
diff --git a/fs/f2fs/data.c b/fs/f2fs/data.c
index 8f51ac47b77f..c8f6d9806896 100644
--- a/fs/f2fs/data.c
+++ b/fs/f2fs/data.c
@@ -1640,7 +1640,7 @@ static int f2fs_write_cache_pages(struct address_space 
*mapping,
pgoff_t last_idx = ULONG_MAX;
int cycled;
int range_whole = 0;
-   int tag;
+   xa_tag_t tag;
 
pagevec_init();
 
diff --git a/fs/gfs2/aops.c b/fs/gfs2/aops.c
index 1daf15a1f00c..c78ecd008191 100644
--- a/fs/gfs2/aops.c
+++ b/fs/gfs2/aops.c
@@ -369,7 +369,7 @@ static int gfs2_write_cache_jdata(struct address_space 
*mapping,
pgoff_t done_index;
int cycled;
int range_whole = 0;
-   int tag;
+   xa_tag_t tag;
 
pagevec_init();
if (wbc->range_cyclic) {
diff --git a/include/linux/pagevec.h b/include/linux/pagevec.h
index 5fb6580f7f23..5168901bf06d 100644
--- a/include/linux/pagevec.h
+++ b/include/linux/pagevec.h
@@ -9,6 +9,8 @@
 #ifndef _LINUX_PAGEVEC_H
 #define _LINUX_PAGEVEC_H
 
+#include 
+
 /* 14 pointers + two long's align the pagevec structure to a power of two */
 #define PAGEVEC_SIZE   14
 
@@ -40,12 +42,12 @@ static inline unsigned pagevec_lookup(struct pagevec *pvec,
 
 unsigned pagevec_lookup_range_tag(struct pagevec *pvec,
struct address_space *mapping, pgoff_t *index, pgoff_t end,
-   int tag);
+   xa_tag_t tag);
 unsigned pagevec_lookup_range_nr_tag(struct pagevec *pvec,
struct address_space *mapping, pgoff_t *index, pgoff_t end,
-   int tag, unsigned max_pages);
+   xa_tag_t tag, unsigned max_pages);
 static inline unsigned pagevec_lookup_tag(struct pagevec *pvec,
-   struct address_space *mapping, pgoff_t *index, int tag)
+   struct address_space *mapping, pgoff_t *index, xa_tag_t tag)
 {
return pagevec_lookup_range_tag(pvec, mapping, index, (pgoff_t)-1, tag);
 }
diff --git a/mm/swap.c b/mm/swap.c
index 8d7773cb2c3f..31d79479dacf 100644
--- a/mm/swap.c
+++ b/mm/swap.c
@@ -991,7 +991,7 @@ EXPORT_SYMBOL(pagevec_lookup_range);
 
 unsigned pagevec_lookup_range_tag(struct pagevec *pvec,
struct address_space *mapping, pgoff_t *index, pgoff_t end,
-   int tag)
+   xa_tag_t tag)
 {
pvec->nr = find_get_pages_range_tag(mapping, index, end, tag,
PAGEVEC_SIZE, pvec->pages);
@@ -1001,7 +1001,7 @@ EXPORT_SYMBOL(pagevec_lookup_range_tag);
 
 unsigned pagevec_lookup_range_nr_tag(struct pagevec *pvec,
struct address_space *mapping, pgoff_t *index, pgoff_t end,
-   int tag, unsigned max_pages)
+   xa_tag_t tag, unsigned max_pages)
 {
pvec->nr = find_get_pages_range_tag(mapping, index, end, tag,
min_t(unsigned int, max_pages, PAGEVEC_SIZE), pvec->pages);
-- 
2.15.1

[PATCH v6 43/99] shmem: Convert find_swap_entry to XArray

2018-01-17 Thread Matthew Wilcox

From: Matthew Wilcox 

This is a 1:1 conversion.

Signed-off-by: Matthew Wilcox 
---
 mm/shmem.c | 23 +++
 1 file changed, 11 insertions(+), 12 deletions(-)

diff --git a/mm/shmem.c b/mm/shmem.c
index 654f367aca90..ce285ae635ea 100644
--- a/mm/shmem.c
+++ b/mm/shmem.c
@@ -1076,28 +1076,27 @@ static void shmem_evict_inode(struct inode *inode)
clear_inode(inode);
 }
 
-static unsigned long find_swap_entry(struct radix_tree_root *root, void *item)
+static unsigned long find_swap_entry(struct xarray *xa, void *item)
 {
-   struct radix_tree_iter iter;
-   void **slot;
-   unsigned long found = -1;
+   XA_STATE(xas, xa, 0);
unsigned int checked = 0;
+   void *entry;
 
rcu_read_lock();
-   radix_tree_for_each_slot(slot, root, , 0) {
-   if (*slot == item) {
-   found = iter.index;
+   xas_for_each(, entry, ULONG_MAX) {
+   if (xas_retry(, entry))
+   continue;
+   if (entry == item)
break;
-   }
checked++;
-   if ((checked % 4096) != 0)
+   if ((checked % XA_CHECK_SCHED) != 0)
continue;
-   slot = radix_tree_iter_resume(slot, );
+   xas_pause();
cond_resched_rcu();
}
-
rcu_read_unlock();
-   return found;
+
+   return xas_invalid() ? -1 : xas.xa_index;
 }
 
 /*
-- 
2.15.1

[PATCH v6 40/99] pagevec: Use xa_tag_t

2018-01-17 Thread Matthew Wilcox

From: Matthew Wilcox 

Removes sparse warnings.

Signed-off-by: Matthew Wilcox 
---
 fs/btrfs/extent_io.c| 4 ++--
 fs/ext4/inode.c | 2 +-
 fs/f2fs/data.c  | 2 +-
 fs/gfs2/aops.c  | 2 +-
 include/linux/pagevec.h | 8 +---
 mm/swap.c   | 4 ++--
 6 files changed, 12 insertions(+), 10 deletions(-)

diff --git a/fs/btrfs/extent_io.c b/fs/btrfs/extent_io.c
index 22948f4febe7..4301cbf4e31f 100644
--- a/fs/btrfs/extent_io.c
+++ b/fs/btrfs/extent_io.c
@@ -3795,7 +3795,7 @@ int btree_write_cache_pages(struct address_space *mapping,
pgoff_t index;
pgoff_t end;/* Inclusive */
int scanned = 0;
-   int tag;
+   xa_tag_t tag;
 
pagevec_init();
if (wbc->range_cyclic) {
@@ -3922,7 +3922,7 @@ static int extent_write_cache_pages(struct address_space 
*mapping,
pgoff_t done_index;
int range_whole = 0;
int scanned = 0;
-   int tag;
+   xa_tag_t tag;
 
/*
 * We have to hold onto the inode so that ordered extents can do their
diff --git a/fs/ext4/inode.c b/fs/ext4/inode.c
index 534a9130f625..4b7c10853928 100644
--- a/fs/ext4/inode.c
+++ b/fs/ext4/inode.c
@@ -2614,7 +2614,7 @@ static int mpage_prepare_extent_to_map(struct 
mpage_da_data *mpd)
long left = mpd->wbc->nr_to_write;
pgoff_t index = mpd->first_page;
pgoff_t end = mpd->last_page;
-   int tag;
+   xa_tag_t tag;
int i, err = 0;
int blkbits = mpd->inode->i_blkbits;
ext4_lblk_t lblk;
diff --git a/fs/f2fs/data.c b/fs/f2fs/data.c
index 8f51ac47b77f..c8f6d9806896 100644
--- a/fs/f2fs/data.c
+++ b/fs/f2fs/data.c
@@ -1640,7 +1640,7 @@ static int f2fs_write_cache_pages(struct address_space 
*mapping,
pgoff_t last_idx = ULONG_MAX;
int cycled;
int range_whole = 0;
-   int tag;
+   xa_tag_t tag;
 
pagevec_init();
 
diff --git a/fs/gfs2/aops.c b/fs/gfs2/aops.c
index 1daf15a1f00c..c78ecd008191 100644
--- a/fs/gfs2/aops.c
+++ b/fs/gfs2/aops.c
@@ -369,7 +369,7 @@ static int gfs2_write_cache_jdata(struct address_space 
*mapping,
pgoff_t done_index;
int cycled;
int range_whole = 0;
-   int tag;
+   xa_tag_t tag;
 
pagevec_init();
if (wbc->range_cyclic) {
diff --git a/include/linux/pagevec.h b/include/linux/pagevec.h
index 5fb6580f7f23..5168901bf06d 100644
--- a/include/linux/pagevec.h
+++ b/include/linux/pagevec.h
@@ -9,6 +9,8 @@
 #ifndef _LINUX_PAGEVEC_H
 #define _LINUX_PAGEVEC_H
 
+#include 
+
 /* 14 pointers + two long's align the pagevec structure to a power of two */
 #define PAGEVEC_SIZE   14
 
@@ -40,12 +42,12 @@ static inline unsigned pagevec_lookup(struct pagevec *pvec,
 
 unsigned pagevec_lookup_range_tag(struct pagevec *pvec,
struct address_space *mapping, pgoff_t *index, pgoff_t end,
-   int tag);
+   xa_tag_t tag);
 unsigned pagevec_lookup_range_nr_tag(struct pagevec *pvec,
struct address_space *mapping, pgoff_t *index, pgoff_t end,
-   int tag, unsigned max_pages);
+   xa_tag_t tag, unsigned max_pages);
 static inline unsigned pagevec_lookup_tag(struct pagevec *pvec,
-   struct address_space *mapping, pgoff_t *index, int tag)
+   struct address_space *mapping, pgoff_t *index, xa_tag_t tag)
 {
return pagevec_lookup_range_tag(pvec, mapping, index, (pgoff_t)-1, tag);
 }
diff --git a/mm/swap.c b/mm/swap.c
index 8d7773cb2c3f..31d79479dacf 100644
--- a/mm/swap.c
+++ b/mm/swap.c
@@ -991,7 +991,7 @@ EXPORT_SYMBOL(pagevec_lookup_range);
 
 unsigned pagevec_lookup_range_tag(struct pagevec *pvec,
struct address_space *mapping, pgoff_t *index, pgoff_t end,
-   int tag)
+   xa_tag_t tag)
 {
pvec->nr = find_get_pages_range_tag(mapping, index, end, tag,
PAGEVEC_SIZE, pvec->pages);
@@ -1001,7 +1001,7 @@ EXPORT_SYMBOL(pagevec_lookup_range_tag);
 
 unsigned pagevec_lookup_range_nr_tag(struct pagevec *pvec,
struct address_space *mapping, pgoff_t *index, pgoff_t end,
-   int tag, unsigned max_pages)
+   xa_tag_t tag, unsigned max_pages)
 {
pvec->nr = find_get_pages_range_tag(mapping, index, end, tag,
min_t(unsigned int, max_pages, PAGEVEC_SIZE), pvec->pages);
-- 
2.15.1

[PATCH v6 41/99] shmem: Convert replace to XArray

2018-01-17 Thread Matthew Wilcox

From: Matthew Wilcox 

shmem_radix_tree_replace() is renamed to shmem_xa_replace() and
converted to use the XArray API.

Signed-off-by: Matthew Wilcox 
---
 mm/shmem.c | 22 --
 1 file changed, 8 insertions(+), 14 deletions(-)

diff --git a/mm/shmem.c b/mm/shmem.c
index c5731bb954a1..fad6c9e7402e 100644
--- a/mm/shmem.c
+++ b/mm/shmem.c
@@ -321,24 +321,20 @@ void shmem_uncharge(struct inode *inode, long pages)
 }
 
 /*
- * Replace item expected in radix tree by a new item, while holding tree lock.
+ * Replace item expected in xarray by a new item, while holding xa_lock.
  */
-static int shmem_radix_tree_replace(struct address_space *mapping,
+static int shmem_xa_replace(struct address_space *mapping,
pgoff_t index, void *expected, void *replacement)
 {
-   struct radix_tree_node *node;
-   void **pslot;
+   XA_STATE(xas, >pages, index);
void *item;
 
VM_BUG_ON(!expected);
VM_BUG_ON(!replacement);
-   item = __radix_tree_lookup(>pages, index, , );
-   if (!item)
-   return -ENOENT;
+   item = xas_load();
if (item != expected)
return -ENOENT;
-   __radix_tree_replace(>pages, node, pslot,
-replacement, NULL);
+   xas_store(, replacement);
return 0;
 }
 
@@ -605,8 +601,7 @@ static int shmem_add_to_page_cache(struct page *page,
} else if (!expected) {
error = radix_tree_insert(>pages, index, page);
} else {
-   error = shmem_radix_tree_replace(mapping, index, expected,
-page);
+   error = shmem_xa_replace(mapping, index, expected, page);
}
 
if (!error) {
@@ -635,7 +630,7 @@ static void shmem_delete_from_page_cache(struct page *page, 
void *radswap)
VM_BUG_ON_PAGE(PageCompound(page), page);
 
xa_lock_irq(>pages);
-   error = shmem_radix_tree_replace(mapping, page->index, page, radswap);
+   error = shmem_xa_replace(mapping, page->index, page, radswap);
page->mapping = NULL;
mapping->nrpages--;
__dec_node_page_state(page, NR_FILE_PAGES);
@@ -1550,8 +1545,7 @@ static int shmem_replace_page(struct page **pagep, gfp_t 
gfp,
 * a nice clean interface for us to replace oldpage by newpage there.
 */
xa_lock_irq(_mapping->pages);
-   error = shmem_radix_tree_replace(swap_mapping, swap_index, oldpage,
-  newpage);
+   error = shmem_xa_replace(swap_mapping, swap_index, oldpage, newpage);
if (!error) {
__inc_node_page_state(newpage, NR_FILE_PAGES);
__dec_node_page_state(oldpage, NR_FILE_PAGES);
-- 
2.15.1

[PATCH v6 41/99] shmem: Convert replace to XArray

2018-01-17 Thread Matthew Wilcox

From: Matthew Wilcox 

shmem_radix_tree_replace() is renamed to shmem_xa_replace() and
converted to use the XArray API.

Signed-off-by: Matthew Wilcox 
---
 mm/shmem.c | 22 --
 1 file changed, 8 insertions(+), 14 deletions(-)

diff --git a/mm/shmem.c b/mm/shmem.c
index c5731bb954a1..fad6c9e7402e 100644
--- a/mm/shmem.c
+++ b/mm/shmem.c
@@ -321,24 +321,20 @@ void shmem_uncharge(struct inode *inode, long pages)
 }
 
 /*
- * Replace item expected in radix tree by a new item, while holding tree lock.
+ * Replace item expected in xarray by a new item, while holding xa_lock.
  */
-static int shmem_radix_tree_replace(struct address_space *mapping,
+static int shmem_xa_replace(struct address_space *mapping,
pgoff_t index, void *expected, void *replacement)
 {
-   struct radix_tree_node *node;
-   void **pslot;
+   XA_STATE(xas, >pages, index);
void *item;
 
VM_BUG_ON(!expected);
VM_BUG_ON(!replacement);
-   item = __radix_tree_lookup(>pages, index, , );
-   if (!item)
-   return -ENOENT;
+   item = xas_load();
if (item != expected)
return -ENOENT;
-   __radix_tree_replace(>pages, node, pslot,
-replacement, NULL);
+   xas_store(, replacement);
return 0;
 }
 
@@ -605,8 +601,7 @@ static int shmem_add_to_page_cache(struct page *page,
} else if (!expected) {
error = radix_tree_insert(>pages, index, page);
} else {
-   error = shmem_radix_tree_replace(mapping, index, expected,
-page);
+   error = shmem_xa_replace(mapping, index, expected, page);
}
 
if (!error) {
@@ -635,7 +630,7 @@ static void shmem_delete_from_page_cache(struct page *page, 
void *radswap)
VM_BUG_ON_PAGE(PageCompound(page), page);
 
xa_lock_irq(>pages);
-   error = shmem_radix_tree_replace(mapping, page->index, page, radswap);
+   error = shmem_xa_replace(mapping, page->index, page, radswap);
page->mapping = NULL;
mapping->nrpages--;
__dec_node_page_state(page, NR_FILE_PAGES);
@@ -1550,8 +1545,7 @@ static int shmem_replace_page(struct page **pagep, gfp_t 
gfp,
 * a nice clean interface for us to replace oldpage by newpage there.
 */
xa_lock_irq(_mapping->pages);
-   error = shmem_radix_tree_replace(swap_mapping, swap_index, oldpage,
-  newpage);
+   error = shmem_xa_replace(swap_mapping, swap_index, oldpage, newpage);
if (!error) {
__inc_node_page_state(newpage, NR_FILE_PAGES);
__dec_node_page_state(oldpage, NR_FILE_PAGES);
-- 
2.15.1

[PATCH v6 44/99] shmem: Convert shmem_tag_pins to XArray

2018-01-17 Thread Matthew Wilcox

From: Matthew Wilcox 

Simplify the locking by taking the spinlock while we walk the tree on
the assumption that many acquires and releases of the lock will be
worse than holding the lock for a (potentially) long time.

We could replicate the same locking behaviour with the xarray, but would
have to be careful that the xa_node wasn't RCU-freed under us before we
took the lock.

Signed-off-by: Matthew Wilcox 
---
 mm/shmem.c | 39 ---
 1 file changed, 16 insertions(+), 23 deletions(-)

diff --git a/mm/shmem.c b/mm/shmem.c
index ce285ae635ea..2f41c7ceea18 100644
--- a/mm/shmem.c
+++ b/mm/shmem.c
@@ -2601,35 +2601,28 @@ static loff_t shmem_file_llseek(struct file *file, 
loff_t offset, int whence)
 
 static void shmem_tag_pins(struct address_space *mapping)
 {
-   struct radix_tree_iter iter;
-   void **slot;
-   pgoff_t start;
+   XA_STATE(xas, >pages, 0);
struct page *page;
+   unsigned int tagged = 0;
 
lru_add_drain();
-   start = 0;
-   rcu_read_lock();
 
-   radix_tree_for_each_slot(slot, >pages, , start) {
-   page = radix_tree_deref_slot(slot);
-   if (!page || radix_tree_exception(page)) {
-   if (radix_tree_deref_retry(page)) {
-   slot = radix_tree_iter_retry();
-   continue;
-   }
-   } else if (page_count(page) - page_mapcount(page) > 1) {
-   xa_lock_irq(>pages);
-   radix_tree_tag_set(>pages, iter.index,
-  SHMEM_TAG_PINNED);
-   xa_unlock_irq(>pages);
-   }
+   xas_lock_irq();
+   xas_for_each(, page, ULONG_MAX) {
+   if (xa_is_value(page))
+   continue;
+   if (page_count(page) - page_mapcount(page) > 1)
+   xas_set_tag(, SHMEM_TAG_PINNED);
 
-   if (need_resched()) {
-   slot = radix_tree_iter_resume(slot, );
-   cond_resched_rcu();
-   }
+   if (++tagged % XA_CHECK_SCHED)
+   continue;
+
+   xas_pause();
+   xas_unlock_irq();
+   cond_resched();
+   xas_lock_irq();
}
-   rcu_read_unlock();
+   xas_unlock_irq();
 }
 
 /*
-- 
2.15.1

[PATCH v6 44/99] shmem: Convert shmem_tag_pins to XArray

2018-01-17 Thread Matthew Wilcox

From: Matthew Wilcox 

Simplify the locking by taking the spinlock while we walk the tree on
the assumption that many acquires and releases of the lock will be
worse than holding the lock for a (potentially) long time.

We could replicate the same locking behaviour with the xarray, but would
have to be careful that the xa_node wasn't RCU-freed under us before we
took the lock.

Signed-off-by: Matthew Wilcox 
---
 mm/shmem.c | 39 ---
 1 file changed, 16 insertions(+), 23 deletions(-)

diff --git a/mm/shmem.c b/mm/shmem.c
index ce285ae635ea..2f41c7ceea18 100644
--- a/mm/shmem.c
+++ b/mm/shmem.c
@@ -2601,35 +2601,28 @@ static loff_t shmem_file_llseek(struct file *file, 
loff_t offset, int whence)
 
 static void shmem_tag_pins(struct address_space *mapping)
 {
-   struct radix_tree_iter iter;
-   void **slot;
-   pgoff_t start;
+   XA_STATE(xas, >pages, 0);
struct page *page;
+   unsigned int tagged = 0;
 
lru_add_drain();
-   start = 0;
-   rcu_read_lock();
 
-   radix_tree_for_each_slot(slot, >pages, , start) {
-   page = radix_tree_deref_slot(slot);
-   if (!page || radix_tree_exception(page)) {
-   if (radix_tree_deref_retry(page)) {
-   slot = radix_tree_iter_retry();
-   continue;
-   }
-   } else if (page_count(page) - page_mapcount(page) > 1) {
-   xa_lock_irq(>pages);
-   radix_tree_tag_set(>pages, iter.index,
-  SHMEM_TAG_PINNED);
-   xa_unlock_irq(>pages);
-   }
+   xas_lock_irq();
+   xas_for_each(, page, ULONG_MAX) {
+   if (xa_is_value(page))
+   continue;
+   if (page_count(page) - page_mapcount(page) > 1)
+   xas_set_tag(, SHMEM_TAG_PINNED);
 
-   if (need_resched()) {
-   slot = radix_tree_iter_resume(slot, );
-   cond_resched_rcu();
-   }
+   if (++tagged % XA_CHECK_SCHED)
+   continue;
+
+   xas_pause();
+   xas_unlock_irq();
+   cond_resched();
+   xas_lock_irq();
}
-   rcu_read_unlock();
+   xas_unlock_irq();
 }
 
 /*
-- 
2.15.1

[PATCH v6 45/99] shmem: Convert shmem_wait_for_pins to XArray

2018-01-17 Thread Matthew Wilcox

From: Matthew Wilcox 

As with shmem_tag_pins(), hold the lock around the entire loop instead
of acquiring & dropping it for each entry we're going to untag.

Signed-off-by: Matthew Wilcox 
---
 mm/shmem.c | 59 ---
 1 file changed, 24 insertions(+), 35 deletions(-)

diff --git a/mm/shmem.c b/mm/shmem.c
index 2f41c7ceea18..e4a2eb1336be 100644
--- a/mm/shmem.c
+++ b/mm/shmem.c
@@ -2636,9 +2636,7 @@ static void shmem_tag_pins(struct address_space *mapping)
  */
 static int shmem_wait_for_pins(struct address_space *mapping)
 {
-   struct radix_tree_iter iter;
-   void **slot;
-   pgoff_t start;
+   XA_STATE(xas, >pages, 0);
struct page *page;
int error, scan;
 
@@ -2646,7 +2644,9 @@ static int shmem_wait_for_pins(struct address_space 
*mapping)
 
error = 0;
for (scan = 0; scan <= LAST_SCAN; scan++) {
-   if (!radix_tree_tagged(>pages, SHMEM_TAG_PINNED))
+   unsigned int tagged = 0;
+
+   if (!xas_tagged(, SHMEM_TAG_PINNED))
break;
 
if (!scan)
@@ -2654,45 +2654,34 @@ static int shmem_wait_for_pins(struct address_space 
*mapping)
else if (schedule_timeout_killable((HZ << scan) / 200))
scan = LAST_SCAN;
 
-   start = 0;
-   rcu_read_lock();
-   radix_tree_for_each_tagged(slot, >pages, ,
-  start, SHMEM_TAG_PINNED) {
-
-   page = radix_tree_deref_slot(slot);
-   if (radix_tree_exception(page)) {
-   if (radix_tree_deref_retry(page)) {
-   slot = radix_tree_iter_retry();
-   continue;
-   }
-
-   page = NULL;
-   }
-
-   if (page &&
-   page_count(page) - page_mapcount(page) != 1) {
-   if (scan < LAST_SCAN)
-   goto continue_resched;
-
+   xas_set(, 0);
+   xas_lock_irq();
+   xas_for_each_tag(, page, ULONG_MAX, SHMEM_TAG_PINNED) {
+   bool clear = true;
+   if (xa_is_value(page))
+   continue;
+   if (page_count(page) - page_mapcount(page) != 1) {
/*
 * On the last scan, we clean up all those tags
 * we inserted; but make a note that we still
 * found pages pinned.
 */
-   error = -EBUSY;
+   if (scan == LAST_SCAN)
+   error = -EBUSY;
+   else
+   clear = false;
}
+   if (clear)
+   xas_clear_tag(, SHMEM_TAG_PINNED);
+   if (++tagged % XA_CHECK_SCHED)
+   continue;
 
-   xa_lock_irq(>pages);
-   radix_tree_tag_clear(>pages,
-iter.index, SHMEM_TAG_PINNED);
-   xa_unlock_irq(>pages);
-continue_resched:
-   if (need_resched()) {
-   slot = radix_tree_iter_resume(slot, );
-   cond_resched_rcu();
-   }
+   xas_pause();
+   xas_unlock_irq();
+   cond_resched();
+   xas_lock_irq();
}
-   rcu_read_unlock();
+   xas_unlock_irq();
}
 
return error;
-- 
2.15.1

[PATCH v6 45/99] shmem: Convert shmem_wait_for_pins to XArray

2018-01-17 Thread Matthew Wilcox

From: Matthew Wilcox 

As with shmem_tag_pins(), hold the lock around the entire loop instead
of acquiring & dropping it for each entry we're going to untag.

Signed-off-by: Matthew Wilcox 
---
 mm/shmem.c | 59 ---
 1 file changed, 24 insertions(+), 35 deletions(-)

diff --git a/mm/shmem.c b/mm/shmem.c
index 2f41c7ceea18..e4a2eb1336be 100644
--- a/mm/shmem.c
+++ b/mm/shmem.c
@@ -2636,9 +2636,7 @@ static void shmem_tag_pins(struct address_space *mapping)
  */
 static int shmem_wait_for_pins(struct address_space *mapping)
 {
-   struct radix_tree_iter iter;
-   void **slot;
-   pgoff_t start;
+   XA_STATE(xas, >pages, 0);
struct page *page;
int error, scan;
 
@@ -2646,7 +2644,9 @@ static int shmem_wait_for_pins(struct address_space 
*mapping)
 
error = 0;
for (scan = 0; scan <= LAST_SCAN; scan++) {
-   if (!radix_tree_tagged(>pages, SHMEM_TAG_PINNED))
+   unsigned int tagged = 0;
+
+   if (!xas_tagged(, SHMEM_TAG_PINNED))
break;
 
if (!scan)
@@ -2654,45 +2654,34 @@ static int shmem_wait_for_pins(struct address_space 
*mapping)
else if (schedule_timeout_killable((HZ << scan) / 200))
scan = LAST_SCAN;
 
-   start = 0;
-   rcu_read_lock();
-   radix_tree_for_each_tagged(slot, >pages, ,
-  start, SHMEM_TAG_PINNED) {
-
-   page = radix_tree_deref_slot(slot);
-   if (radix_tree_exception(page)) {
-   if (radix_tree_deref_retry(page)) {
-   slot = radix_tree_iter_retry();
-   continue;
-   }
-
-   page = NULL;
-   }
-
-   if (page &&
-   page_count(page) - page_mapcount(page) != 1) {
-   if (scan < LAST_SCAN)
-   goto continue_resched;
-
+   xas_set(, 0);
+   xas_lock_irq();
+   xas_for_each_tag(, page, ULONG_MAX, SHMEM_TAG_PINNED) {
+   bool clear = true;
+   if (xa_is_value(page))
+   continue;
+   if (page_count(page) - page_mapcount(page) != 1) {
/*
 * On the last scan, we clean up all those tags
 * we inserted; but make a note that we still
 * found pages pinned.
 */
-   error = -EBUSY;
+   if (scan == LAST_SCAN)
+   error = -EBUSY;
+   else
+   clear = false;
}
+   if (clear)
+   xas_clear_tag(, SHMEM_TAG_PINNED);
+   if (++tagged % XA_CHECK_SCHED)
+   continue;
 
-   xa_lock_irq(>pages);
-   radix_tree_tag_clear(>pages,
-iter.index, SHMEM_TAG_PINNED);
-   xa_unlock_irq(>pages);
-continue_resched:
-   if (need_resched()) {
-   slot = radix_tree_iter_resume(slot, );
-   cond_resched_rcu();
-   }
+   xas_pause();
+   xas_unlock_irq();
+   cond_resched();
+   xas_lock_irq();
}
-   rcu_read_unlock();
+   xas_unlock_irq();
}
 
return error;
-- 
2.15.1

[PATCH v6 49/99] shmem: Convert shmem_partial_swap_usage to XArray

2018-01-17 Thread Matthew Wilcox

From: Matthew Wilcox 

Simpler code because the xarray takes care of things like the limit and
dereferencing the slot.

Signed-off-by: Matthew Wilcox 
---
 mm/shmem.c | 18 +++---
 1 file changed, 3 insertions(+), 15 deletions(-)

diff --git a/mm/shmem.c b/mm/shmem.c
index 5a2226e06f8c..4dbcfb436bd1 100644
--- a/mm/shmem.c
+++ b/mm/shmem.c
@@ -658,29 +658,17 @@ static int shmem_free_swap(struct address_space *mapping,
 unsigned long shmem_partial_swap_usage(struct address_space *mapping,
pgoff_t start, pgoff_t end)
 {
-   struct radix_tree_iter iter;
-   void **slot;
+   XA_STATE(xas, >pages, start);
struct page *page;
unsigned long swapped = 0;
 
rcu_read_lock();
-
-   radix_tree_for_each_slot(slot, >pages, , start) {
-   if (iter.index >= end)
-   break;
-
-   page = radix_tree_deref_slot(slot);
-
-   if (radix_tree_deref_retry(page)) {
-   slot = radix_tree_iter_retry();
-   continue;
-   }
-
+   xas_for_each(, page, end - 1) {
if (xa_is_value(page))
swapped++;
 
if (need_resched()) {
-   slot = radix_tree_iter_resume(slot, );
+   xas_pause();
cond_resched_rcu();
}
}
-- 
2.15.1

[PATCH v6 49/99] shmem: Convert shmem_partial_swap_usage to XArray

2018-01-17 Thread Matthew Wilcox

From: Matthew Wilcox 

Simpler code because the xarray takes care of things like the limit and
dereferencing the slot.

Signed-off-by: Matthew Wilcox 
---
 mm/shmem.c | 18 +++---
 1 file changed, 3 insertions(+), 15 deletions(-)

diff --git a/mm/shmem.c b/mm/shmem.c
index 5a2226e06f8c..4dbcfb436bd1 100644
--- a/mm/shmem.c
+++ b/mm/shmem.c
@@ -658,29 +658,17 @@ static int shmem_free_swap(struct address_space *mapping,
 unsigned long shmem_partial_swap_usage(struct address_space *mapping,
pgoff_t start, pgoff_t end)
 {
-   struct radix_tree_iter iter;
-   void **slot;
+   XA_STATE(xas, >pages, start);
struct page *page;
unsigned long swapped = 0;
 
rcu_read_lock();
-
-   radix_tree_for_each_slot(slot, >pages, , start) {
-   if (iter.index >= end)
-   break;
-
-   page = radix_tree_deref_slot(slot);
-
-   if (radix_tree_deref_retry(page)) {
-   slot = radix_tree_iter_retry();
-   continue;
-   }
-
+   xas_for_each(, page, end - 1) {
if (xa_is_value(page))
swapped++;
 
if (need_resched()) {
-   slot = radix_tree_iter_resume(slot, );
+   xas_pause();
cond_resched_rcu();
}
}
-- 
2.15.1

[PATCH v6 48/99] shmem: Convert shmem_free_swap to XArray

2018-01-17 Thread Matthew Wilcox

From: Matthew Wilcox 

This is a perfect use for xa_cmpxchg().  Note the use of 0 for GFP
flags; we won't be allocating memory.

Signed-off-by: Matthew Wilcox 
---
 mm/shmem.c | 7 ++-
 1 file changed, 2 insertions(+), 5 deletions(-)

diff --git a/mm/shmem.c b/mm/shmem.c
index e8233cb7ab5c..5a2226e06f8c 100644
--- a/mm/shmem.c
+++ b/mm/shmem.c
@@ -635,16 +635,13 @@ static void shmem_delete_from_page_cache(struct page 
*page, void *radswap)
 }
 
 /*
- * Remove swap entry from radix tree, free the swap and its page cache.
+ * Remove swap entry from page cache, free the swap and its page cache.
  */
 static int shmem_free_swap(struct address_space *mapping,
   pgoff_t index, void *radswap)
 {
-   void *old;
+   void *old = xa_cmpxchg(>pages, index, radswap, NULL, 0);
 
-   xa_lock_irq(>pages);
-   old = radix_tree_delete_item(>pages, index, radswap);
-   xa_unlock_irq(>pages);
if (old != radswap)
return -ENOENT;
free_swap_and_cache(radix_to_swp_entry(radswap));
-- 
2.15.1

[PATCH v6 48/99] shmem: Convert shmem_free_swap to XArray

2018-01-17 Thread Matthew Wilcox

From: Matthew Wilcox 

This is a perfect use for xa_cmpxchg().  Note the use of 0 for GFP
flags; we won't be allocating memory.

Signed-off-by: Matthew Wilcox 
---
 mm/shmem.c | 7 ++-
 1 file changed, 2 insertions(+), 5 deletions(-)

diff --git a/mm/shmem.c b/mm/shmem.c
index e8233cb7ab5c..5a2226e06f8c 100644
--- a/mm/shmem.c
+++ b/mm/shmem.c
@@ -635,16 +635,13 @@ static void shmem_delete_from_page_cache(struct page 
*page, void *radswap)
 }
 
 /*
- * Remove swap entry from radix tree, free the swap and its page cache.
+ * Remove swap entry from page cache, free the swap and its page cache.
  */
 static int shmem_free_swap(struct address_space *mapping,
   pgoff_t index, void *radswap)
 {
-   void *old;
+   void *old = xa_cmpxchg(>pages, index, radswap, NULL, 0);
 
-   xa_lock_irq(>pages);
-   old = radix_tree_delete_item(>pages, index, radswap);
-   xa_unlock_irq(>pages);
if (old != radswap)
return -ENOENT;
free_swap_and_cache(radix_to_swp_entry(radswap));
-- 
2.15.1

[PATCH v6 47/99] shmem: Convert shmem_alloc_hugepage to XArray

2018-01-17 Thread Matthew Wilcox

From: Matthew Wilcox 

xa_find() is a slightly easier API to use than
radix_tree_gang_lookup_slot() because it contains its own RCU locking.

Signed-off-by: Matthew Wilcox 
---
 mm/shmem.c | 14 --
 1 file changed, 4 insertions(+), 10 deletions(-)

diff --git a/mm/shmem.c b/mm/shmem.c
index 0f49edae05e4..e8233cb7ab5c 100644
--- a/mm/shmem.c
+++ b/mm/shmem.c
@@ -1413,23 +1413,17 @@ static struct page *shmem_alloc_hugepage(gfp_t gfp,
struct shmem_inode_info *info, pgoff_t index)
 {
struct vm_area_struct pvma;
-   struct inode *inode = >vfs_inode;
-   struct address_space *mapping = inode->i_mapping;
-   pgoff_t idx, hindex;
-   void __rcu **results;
+   struct address_space *mapping = info->vfs_inode.i_mapping;
+   pgoff_t hindex;
struct page *page;
 
if (!IS_ENABLED(CONFIG_TRANSPARENT_HUGE_PAGECACHE))
return NULL;
 
hindex = round_down(index, HPAGE_PMD_NR);
-   rcu_read_lock();
-   if (radix_tree_gang_lookup_slot(>pages, , ,
-   hindex, 1) && idx < hindex + HPAGE_PMD_NR) {
-   rcu_read_unlock();
+   if (xa_find(>pages, , hindex + HPAGE_PMD_NR - 1,
+   XA_PRESENT))
return NULL;
-   }
-   rcu_read_unlock();
 
shmem_pseudo_vma_init(, info, hindex);
page = alloc_pages_vma(gfp | __GFP_COMP | __GFP_NORETRY | __GFP_NOWARN,
-- 
2.15.1

[PATCH v6 47/99] shmem: Convert shmem_alloc_hugepage to XArray

2018-01-17 Thread Matthew Wilcox

From: Matthew Wilcox 

xa_find() is a slightly easier API to use than
radix_tree_gang_lookup_slot() because it contains its own RCU locking.

Signed-off-by: Matthew Wilcox 
---
 mm/shmem.c | 14 --
 1 file changed, 4 insertions(+), 10 deletions(-)

diff --git a/mm/shmem.c b/mm/shmem.c
index 0f49edae05e4..e8233cb7ab5c 100644
--- a/mm/shmem.c
+++ b/mm/shmem.c
@@ -1413,23 +1413,17 @@ static struct page *shmem_alloc_hugepage(gfp_t gfp,
struct shmem_inode_info *info, pgoff_t index)
 {
struct vm_area_struct pvma;
-   struct inode *inode = >vfs_inode;
-   struct address_space *mapping = inode->i_mapping;
-   pgoff_t idx, hindex;
-   void __rcu **results;
+   struct address_space *mapping = info->vfs_inode.i_mapping;
+   pgoff_t hindex;
struct page *page;
 
if (!IS_ENABLED(CONFIG_TRANSPARENT_HUGE_PAGECACHE))
return NULL;
 
hindex = round_down(index, HPAGE_PMD_NR);
-   rcu_read_lock();
-   if (radix_tree_gang_lookup_slot(>pages, , ,
-   hindex, 1) && idx < hindex + HPAGE_PMD_NR) {
-   rcu_read_unlock();
+   if (xa_find(>pages, , hindex + HPAGE_PMD_NR - 1,
+   XA_PRESENT))
return NULL;
-   }
-   rcu_read_unlock();
 
shmem_pseudo_vma_init(, info, hindex);
page = alloc_pages_vma(gfp | __GFP_COMP | __GFP_NORETRY | __GFP_NOWARN,
-- 
2.15.1

[PATCH v6 46/99] shmem: Convert shmem_add_to_page_cache to XArray

2018-01-17 Thread Matthew Wilcox

From: Matthew Wilcox 

This removes the last caller of radix_tree_maybe_preload_order().
Simpler code, unless we run out of memory for new xa_nodes partway through
inserting entries into the xarray.  Hopefully we can support multi-index
entries in the page cache soon and all the awful code goes away.

Signed-off-by: Matthew Wilcox 
---
 mm/shmem.c | 87 --
 1 file changed, 39 insertions(+), 48 deletions(-)

diff --git a/mm/shmem.c b/mm/shmem.c
index e4a2eb1336be..0f49edae05e4 100644
--- a/mm/shmem.c
+++ b/mm/shmem.c
@@ -558,9 +558,10 @@ static unsigned long shmem_unused_huge_shrink(struct 
shmem_sb_info *sbinfo,
  */
 static int shmem_add_to_page_cache(struct page *page,
   struct address_space *mapping,
-  pgoff_t index, void *expected)
+  pgoff_t index, void *expected, gfp_t gfp)
 {
-   int error, nr = hpage_nr_pages(page);
+   XA_STATE(xas, >pages, index);
+   unsigned long i, nr = 1UL << compound_order(page);
 
VM_BUG_ON_PAGE(PageTail(page), page);
VM_BUG_ON_PAGE(index != round_down(index, nr), page);
@@ -569,49 +570,47 @@ static int shmem_add_to_page_cache(struct page *page,
VM_BUG_ON(expected && PageTransHuge(page));
 
page_ref_add(page, nr);
-   page->mapping = mapping;
page->index = index;
+   page->mapping = mapping;
 
-   xa_lock_irq(>pages);
-   if (PageTransHuge(page)) {
-   void __rcu **results;
-   pgoff_t idx;
-   int i;
-
-   error = 0;
-   if (radix_tree_gang_lookup_slot(>pages,
-   , , index, 1) &&
-   idx < index + HPAGE_PMD_NR) {
-   error = -EEXIST;
+   do {
+   xas_lock_irq();
+   xas_create_range(, index + nr - 1);
+   if (xas_error())
+   goto unlock;
+   for (i = 0; i < nr; i++) {
+   void *entry = xas_load();
+   if (entry != expected)
+   xas_set_err(, -ENOENT);
+   if (xas_error())
+   goto undo;
+   xas_store(, page + i);
+   xas_next();
}
-
-   if (!error) {
-   for (i = 0; i < HPAGE_PMD_NR; i++) {
-   error = radix_tree_insert(>pages,
-   index + i, page + i);
-   VM_BUG_ON(error);
-   }
+   if (PageTransHuge(page)) {
count_vm_event(THP_FILE_ALLOC);
+   __inc_node_page_state(page, NR_SHMEM_THPS);
}
-   } else if (!expected) {
-   error = radix_tree_insert(>pages, index, page);
-   } else {
-   error = shmem_xa_replace(mapping, index, expected, page);
-   }
-
-   if (!error) {
mapping->nrpages += nr;
-   if (PageTransHuge(page))
-   __inc_node_page_state(page, NR_SHMEM_THPS);
__mod_node_page_state(page_pgdat(page), NR_FILE_PAGES, nr);
__mod_node_page_state(page_pgdat(page), NR_SHMEM, nr);
-   xa_unlock_irq(>pages);
-   } else {
+   goto unlock;
+undo:
+   while (i-- > 0) {
+   xas_store(, NULL);
+   xas_prev();
+   }
+unlock:
+   xas_unlock_irq();
+   } while (xas_nomem(, gfp));
+
+   if (xas_error()) {
page->mapping = NULL;
-   xa_unlock_irq(>pages);
page_ref_sub(page, nr);
+   return xas_error();
}
-   return error;
+
+   return 0;
 }
 
 /*
@@ -1159,7 +1158,7 @@ static int shmem_unuse_inode(struct shmem_inode_info 
*info,
 */
if (!error)
error = shmem_add_to_page_cache(*pagep, mapping, index,
-   radswap);
+   radswap, gfp);
if (error != -ENOMEM) {
/*
 * Truncation and eviction use free_swap_and_cache(), which
@@ -1677,7 +1676,7 @@ static int shmem_getpage_gfp(struct inode *inode, pgoff_t 
index,
false);
if (!error) {
error = shmem_add_to_page_cache(page, mapping, index,
-   swp_to_radix_entry(swap));
+   swp_to_radix_entry(swap), gfp);
/*
 * We already confirmed swap under page lock, and make
 * no memory allocation here, so

[PATCH v6 46/99] shmem: Convert shmem_add_to_page_cache to XArray

2018-01-17 Thread Matthew Wilcox

From: Matthew Wilcox 

This removes the last caller of radix_tree_maybe_preload_order().
Simpler code, unless we run out of memory for new xa_nodes partway through
inserting entries into the xarray.  Hopefully we can support multi-index
entries in the page cache soon and all the awful code goes away.

Signed-off-by: Matthew Wilcox 
---
 mm/shmem.c | 87 --
 1 file changed, 39 insertions(+), 48 deletions(-)

diff --git a/mm/shmem.c b/mm/shmem.c
index e4a2eb1336be..0f49edae05e4 100644
--- a/mm/shmem.c
+++ b/mm/shmem.c
@@ -558,9 +558,10 @@ static unsigned long shmem_unused_huge_shrink(struct 
shmem_sb_info *sbinfo,
  */
 static int shmem_add_to_page_cache(struct page *page,
   struct address_space *mapping,
-  pgoff_t index, void *expected)
+  pgoff_t index, void *expected, gfp_t gfp)
 {
-   int error, nr = hpage_nr_pages(page);
+   XA_STATE(xas, >pages, index);
+   unsigned long i, nr = 1UL << compound_order(page);
 
VM_BUG_ON_PAGE(PageTail(page), page);
VM_BUG_ON_PAGE(index != round_down(index, nr), page);
@@ -569,49 +570,47 @@ static int shmem_add_to_page_cache(struct page *page,
VM_BUG_ON(expected && PageTransHuge(page));
 
page_ref_add(page, nr);
-   page->mapping = mapping;
page->index = index;
+   page->mapping = mapping;
 
-   xa_lock_irq(>pages);
-   if (PageTransHuge(page)) {
-   void __rcu **results;
-   pgoff_t idx;
-   int i;
-
-   error = 0;
-   if (radix_tree_gang_lookup_slot(>pages,
-   , , index, 1) &&
-   idx < index + HPAGE_PMD_NR) {
-   error = -EEXIST;
+   do {
+   xas_lock_irq();
+   xas_create_range(, index + nr - 1);
+   if (xas_error())
+   goto unlock;
+   for (i = 0; i < nr; i++) {
+   void *entry = xas_load();
+   if (entry != expected)
+   xas_set_err(, -ENOENT);
+   if (xas_error())
+   goto undo;
+   xas_store(, page + i);
+   xas_next();
}
-
-   if (!error) {
-   for (i = 0; i < HPAGE_PMD_NR; i++) {
-   error = radix_tree_insert(>pages,
-   index + i, page + i);
-   VM_BUG_ON(error);
-   }
+   if (PageTransHuge(page)) {
count_vm_event(THP_FILE_ALLOC);
+   __inc_node_page_state(page, NR_SHMEM_THPS);
}
-   } else if (!expected) {
-   error = radix_tree_insert(>pages, index, page);
-   } else {
-   error = shmem_xa_replace(mapping, index, expected, page);
-   }
-
-   if (!error) {
mapping->nrpages += nr;
-   if (PageTransHuge(page))
-   __inc_node_page_state(page, NR_SHMEM_THPS);
__mod_node_page_state(page_pgdat(page), NR_FILE_PAGES, nr);
__mod_node_page_state(page_pgdat(page), NR_SHMEM, nr);
-   xa_unlock_irq(>pages);
-   } else {
+   goto unlock;
+undo:
+   while (i-- > 0) {
+   xas_store(, NULL);
+   xas_prev();
+   }
+unlock:
+   xas_unlock_irq();
+   } while (xas_nomem(, gfp));
+
+   if (xas_error()) {
page->mapping = NULL;
-   xa_unlock_irq(>pages);
page_ref_sub(page, nr);
+   return xas_error();
}
-   return error;
+
+   return 0;
 }
 
 /*
@@ -1159,7 +1158,7 @@ static int shmem_unuse_inode(struct shmem_inode_info 
*info,
 */
if (!error)
error = shmem_add_to_page_cache(*pagep, mapping, index,
-   radswap);
+   radswap, gfp);
if (error != -ENOMEM) {
/*
 * Truncation and eviction use free_swap_and_cache(), which
@@ -1677,7 +1676,7 @@ static int shmem_getpage_gfp(struct inode *inode, pgoff_t 
index,
false);
if (!error) {
error = shmem_add_to_page_cache(page, mapping, index,
-   swp_to_radix_entry(swap));
+   swp_to_radix_entry(swap), gfp);
/*
 * We already confirmed swap under page lock, and make
 * no memory allocation here, so usually no possibility
@@ -1783,13 +1782,8 @@

[PATCH v6 50/99] shmem: Comment fixups

2018-01-17 Thread Matthew Wilcox

From: Matthew Wilcox 

Remove the last mentions of radix tree from various comments.

Signed-off-by: Matthew Wilcox 
---
 mm/shmem.c | 14 +++---
 1 file changed, 7 insertions(+), 7 deletions(-)

diff --git a/mm/shmem.c b/mm/shmem.c
index 4dbcfb436bd1..5110848885d4 100644
--- a/mm/shmem.c
+++ b/mm/shmem.c
@@ -743,7 +743,7 @@ void shmem_unlock_mapping(struct address_space *mapping)
 }
 
 /*
- * Remove range of pages and swap entries from radix tree, and free them.
+ * Remove range of pages and swap entries from page cache, and free them.
  * If !unfalloc, truncate or punch hole; if unfalloc, undo failed fallocate.
  */
 static void shmem_undo_range(struct inode *inode, loff_t lstart, loff_t lend,
@@ -1118,10 +1118,10 @@ static int shmem_unuse_inode(struct shmem_inode_info 
*info,
 * We needed to drop mutex to make that restrictive page
 * allocation, but the inode might have been freed while we
 * dropped it: although a racing shmem_evict_inode() cannot
-* complete without emptying the radix_tree, our page lock
+* complete without emptying the page cache, our page lock
 * on this swapcache page is not enough to prevent that -
 * free_swap_and_cache() of our swap entry will only
-* trylock_page(), removing swap from radix_tree whatever.
+* trylock_page(), removing swap from page cache whatever.
 *
 * We must not proceed to shmem_add_to_page_cache() if the
 * inode has been freed, but of course we cannot rely on
@@ -1187,7 +1187,7 @@ int shmem_unuse(swp_entry_t swap, struct page *page)
false);
if (error)
goto out;
-   /* No radix_tree_preload: swap entry keeps a place for page in tree */
+   /* No memory allocation: swap entry occupies the slot for the page */
error = -EAGAIN;
 
mutex_lock(_swaplist_mutex);
@@ -1863,7 +1863,7 @@ alloc_nohuge: page = 
shmem_alloc_and_acct_page(gfp, inode,
spin_unlock_irq(>lock);
goto repeat;
}
-   if (error == -EEXIST)   /* from above or from radix_tree_insert */
+   if (error == -EEXIST)
goto repeat;
return error;
 }
@@ -2475,7 +2475,7 @@ static ssize_t shmem_file_read_iter(struct kiocb *iocb, 
struct iov_iter *to)
 }
 
 /*
- * llseek SEEK_DATA or SEEK_HOLE through the radix_tree.
+ * llseek SEEK_DATA or SEEK_HOLE through the page cache.
  */
 static pgoff_t shmem_seek_hole_data(struct address_space *mapping,
pgoff_t index, pgoff_t end, int whence)
@@ -2563,7 +2563,7 @@ static loff_t shmem_file_llseek(struct file *file, loff_t 
offset, int whence)
 }
 
 /*
- * We need a tag: a new tag would expand every radix_tree_node by 8 bytes,
+ * We need a tag: a new tag would expand every xa_node by 8 bytes,
  * so reuse a tag which we firmly believe is never set or cleared on shmem.
  */
 #define SHMEM_TAG_PINNEDPAGECACHE_TAG_TOWRITE
-- 
2.15.1

[PATCH v6 50/99] shmem: Comment fixups

2018-01-17 Thread Matthew Wilcox

From: Matthew Wilcox 

Remove the last mentions of radix tree from various comments.

Signed-off-by: Matthew Wilcox 
---
 mm/shmem.c | 14 +++---
 1 file changed, 7 insertions(+), 7 deletions(-)

diff --git a/mm/shmem.c b/mm/shmem.c
index 4dbcfb436bd1..5110848885d4 100644
--- a/mm/shmem.c
+++ b/mm/shmem.c
@@ -743,7 +743,7 @@ void shmem_unlock_mapping(struct address_space *mapping)
 }
 
 /*
- * Remove range of pages and swap entries from radix tree, and free them.
+ * Remove range of pages and swap entries from page cache, and free them.
  * If !unfalloc, truncate or punch hole; if unfalloc, undo failed fallocate.
  */
 static void shmem_undo_range(struct inode *inode, loff_t lstart, loff_t lend,
@@ -1118,10 +1118,10 @@ static int shmem_unuse_inode(struct shmem_inode_info 
*info,
 * We needed to drop mutex to make that restrictive page
 * allocation, but the inode might have been freed while we
 * dropped it: although a racing shmem_evict_inode() cannot
-* complete without emptying the radix_tree, our page lock
+* complete without emptying the page cache, our page lock
 * on this swapcache page is not enough to prevent that -
 * free_swap_and_cache() of our swap entry will only
-* trylock_page(), removing swap from radix_tree whatever.
+* trylock_page(), removing swap from page cache whatever.
 *
 * We must not proceed to shmem_add_to_page_cache() if the
 * inode has been freed, but of course we cannot rely on
@@ -1187,7 +1187,7 @@ int shmem_unuse(swp_entry_t swap, struct page *page)
false);
if (error)
goto out;
-   /* No radix_tree_preload: swap entry keeps a place for page in tree */
+   /* No memory allocation: swap entry occupies the slot for the page */
error = -EAGAIN;
 
mutex_lock(_swaplist_mutex);
@@ -1863,7 +1863,7 @@ alloc_nohuge: page = 
shmem_alloc_and_acct_page(gfp, inode,
spin_unlock_irq(>lock);
goto repeat;
}
-   if (error == -EEXIST)   /* from above or from radix_tree_insert */
+   if (error == -EEXIST)
goto repeat;
return error;
 }
@@ -2475,7 +2475,7 @@ static ssize_t shmem_file_read_iter(struct kiocb *iocb, 
struct iov_iter *to)
 }
 
 /*
- * llseek SEEK_DATA or SEEK_HOLE through the radix_tree.
+ * llseek SEEK_DATA or SEEK_HOLE through the page cache.
  */
 static pgoff_t shmem_seek_hole_data(struct address_space *mapping,
pgoff_t index, pgoff_t end, int whence)
@@ -2563,7 +2563,7 @@ static loff_t shmem_file_llseek(struct file *file, loff_t 
offset, int whence)
 }
 
 /*
- * We need a tag: a new tag would expand every radix_tree_node by 8 bytes,
+ * We need a tag: a new tag would expand every xa_node by 8 bytes,
  * so reuse a tag which we firmly believe is never set or cleared on shmem.
  */
 #define SHMEM_TAG_PINNEDPAGECACHE_TAG_TOWRITE
-- 
2.15.1

[PATCH v6 51/99] btrfs: Convert page cache to XArray

2018-01-17 Thread Matthew Wilcox

From: Matthew Wilcox 

Signed-off-by: Matthew Wilcox 
---
 fs/btrfs/compression.c | 4 +---
 fs/btrfs/extent_io.c   | 6 ++
 2 files changed, 3 insertions(+), 7 deletions(-)

diff --git a/fs/btrfs/compression.c b/fs/btrfs/compression.c
index e687d06cd97c..4174b166e235 100644
--- a/fs/btrfs/compression.c
+++ b/fs/btrfs/compression.c
@@ -449,9 +449,7 @@ static noinline int add_ra_bio_pages(struct inode *inode,
if (pg_index > end_index)
break;
 
-   rcu_read_lock();
-   page = radix_tree_lookup(>pages, pg_index);
-   rcu_read_unlock();
+   page = xa_load(>pages, pg_index);
if (page && !xa_is_value(page)) {
misses++;
if (misses > 4)
diff --git a/fs/btrfs/extent_io.c b/fs/btrfs/extent_io.c
index 4301cbf4e31f..fd5e9d887328 100644
--- a/fs/btrfs/extent_io.c
+++ b/fs/btrfs/extent_io.c
@@ -5197,11 +5197,9 @@ void clear_extent_buffer_dirty(struct extent_buffer *eb)
 
clear_page_dirty_for_io(page);
xa_lock_irq(>mapping->pages);
-   if (!PageDirty(page)) {
-   radix_tree_tag_clear(>mapping->pages,
-   page_index(page),
+   if (!PageDirty(page))
+   __xa_clear_tag(>mapping->pages, page_index(page),
PAGECACHE_TAG_DIRTY);
-   }
xa_unlock_irq(>mapping->pages);
ClearPageError(page);
unlock_page(page);
-- 
2.15.1

[PATCH v6 51/99] btrfs: Convert page cache to XArray

2018-01-17 Thread Matthew Wilcox

From: Matthew Wilcox 

Signed-off-by: Matthew Wilcox 
---
 fs/btrfs/compression.c | 4 +---
 fs/btrfs/extent_io.c   | 6 ++
 2 files changed, 3 insertions(+), 7 deletions(-)

diff --git a/fs/btrfs/compression.c b/fs/btrfs/compression.c
index e687d06cd97c..4174b166e235 100644
--- a/fs/btrfs/compression.c
+++ b/fs/btrfs/compression.c
@@ -449,9 +449,7 @@ static noinline int add_ra_bio_pages(struct inode *inode,
if (pg_index > end_index)
break;
 
-   rcu_read_lock();
-   page = radix_tree_lookup(>pages, pg_index);
-   rcu_read_unlock();
+   page = xa_load(>pages, pg_index);
if (page && !xa_is_value(page)) {
misses++;
if (misses > 4)
diff --git a/fs/btrfs/extent_io.c b/fs/btrfs/extent_io.c
index 4301cbf4e31f..fd5e9d887328 100644
--- a/fs/btrfs/extent_io.c
+++ b/fs/btrfs/extent_io.c
@@ -5197,11 +5197,9 @@ void clear_extent_buffer_dirty(struct extent_buffer *eb)
 
clear_page_dirty_for_io(page);
xa_lock_irq(>mapping->pages);
-   if (!PageDirty(page)) {
-   radix_tree_tag_clear(>mapping->pages,
-   page_index(page),
+   if (!PageDirty(page))
+   __xa_clear_tag(>mapping->pages, page_index(page),
PAGECACHE_TAG_DIRTY);
-   }
xa_unlock_irq(>mapping->pages);
ClearPageError(page);
unlock_page(page);
-- 
2.15.1

[PATCH v6 55/99] f2fs: Convert to XArray

2018-01-17 Thread Matthew Wilcox

From: Matthew Wilcox 

This is a straightforward conversion.

Signed-off-by: Matthew Wilcox 
---
 fs/f2fs/data.c   |  3 +--
 fs/f2fs/dir.c|  5 +
 fs/f2fs/inline.c |  6 +-
 fs/f2fs/node.c   | 10 ++
 4 files changed, 5 insertions(+), 19 deletions(-)

diff --git a/fs/f2fs/data.c b/fs/f2fs/data.c
index c8f6d9806896..1f3f192f152f 100644
--- a/fs/f2fs/data.c
+++ b/fs/f2fs/data.c
@@ -2175,8 +2175,7 @@ void f2fs_set_page_dirty_nobuffers(struct page *page)
xa_lock_irqsave(>pages, flags);
WARN_ON_ONCE(!PageUptodate(page));
account_page_dirtied(page, mapping);
-   radix_tree_tag_set(>pages,
-   page_index(page), PAGECACHE_TAG_DIRTY);
+   __xa_set_tag(>pages, page_index(page), PAGECACHE_TAG_DIRTY);
xa_unlock_irqrestore(>pages, flags);
unlock_page_memcg(page);
 
diff --git a/fs/f2fs/dir.c b/fs/f2fs/dir.c
index b5515ea6bb2f..296070016ec9 100644
--- a/fs/f2fs/dir.c
+++ b/fs/f2fs/dir.c
@@ -708,7 +708,6 @@ void f2fs_delete_entry(struct f2fs_dir_entry *dentry, 
struct page *page,
unsigned int bit_pos;
int slots = GET_DENTRY_SLOTS(le16_to_cpu(dentry->name_len));
struct address_space *mapping = page_mapping(page);
-   unsigned long flags;
int i;
 
f2fs_update_time(F2FS_I_SB(dir), REQ_TIME);
@@ -739,10 +738,8 @@ void f2fs_delete_entry(struct f2fs_dir_entry *dentry, 
struct page *page,
 
if (bit_pos == NR_DENTRY_IN_BLOCK &&
!truncate_hole(dir, page->index, page->index + 1)) {
-   xa_lock_irqsave(>pages, flags);
-   radix_tree_tag_clear(>pages, page_index(page),
+   xa_clear_tag(>pages, page_index(page),
 PAGECACHE_TAG_DIRTY);
-   xa_unlock_irqrestore(>pages, flags);
 
clear_page_dirty_for_io(page);
ClearPagePrivate(page);
diff --git a/fs/f2fs/inline.c b/fs/f2fs/inline.c
index 7858b8e15f33..d3c3f84beca9 100644
--- a/fs/f2fs/inline.c
+++ b/fs/f2fs/inline.c
@@ -204,7 +204,6 @@ int f2fs_write_inline_data(struct inode *inode, struct page 
*page)
void *src_addr, *dst_addr;
struct dnode_of_data dn;
struct address_space *mapping = page_mapping(page);
-   unsigned long flags;
int err;
 
set_new_dnode(, inode, NULL, NULL, 0);
@@ -226,10 +225,7 @@ int f2fs_write_inline_data(struct inode *inode, struct 
page *page)
kunmap_atomic(src_addr);
set_page_dirty(dn.inode_page);
 
-   xa_lock_irqsave(>pages, flags);
-   radix_tree_tag_clear(>pages, page_index(page),
-PAGECACHE_TAG_DIRTY);
-   xa_unlock_irqrestore(>pages, flags);
+   xa_clear_tag(>pages, page_index(page), PAGECACHE_TAG_DIRTY);
 
set_inode_flag(inode, FI_APPEND_WRITE);
set_inode_flag(inode, FI_DATA_EXIST);
diff --git a/fs/f2fs/node.c b/fs/f2fs/node.c
index 6b64a3009d55..0a6d5c2f996e 100644
--- a/fs/f2fs/node.c
+++ b/fs/f2fs/node.c
@@ -88,14 +88,10 @@ bool available_free_memory(struct f2fs_sb_info *sbi, int 
type)
 static void clear_node_page_dirty(struct page *page)
 {
struct address_space *mapping = page->mapping;
-   unsigned int long flags;
 
if (PageDirty(page)) {
-   xa_lock_irqsave(>pages, flags);
-   radix_tree_tag_clear(>pages,
-   page_index(page),
+   xa_clear_tag(>pages, page_index(page),
PAGECACHE_TAG_DIRTY);
-   xa_unlock_irqrestore(>pages, flags);
 
clear_page_dirty_for_io(page);
dec_page_count(F2FS_M_SB(mapping), F2FS_DIRTY_NODES);
@@ -1142,9 +1138,7 @@ void ra_node_page(struct f2fs_sb_info *sbi, nid_t nid)
return;
f2fs_bug_on(sbi, check_nid_range(sbi, nid));
 
-   rcu_read_lock();
-   apage = radix_tree_lookup(_MAPPING(sbi)->pages, nid);
-   rcu_read_unlock();
+   apage = xa_load(_MAPPING(sbi)->pages, nid);
if (apage)
return;
 
-- 
2.15.1

[PATCH v6 55/99] f2fs: Convert to XArray

2018-01-17 Thread Matthew Wilcox

From: Matthew Wilcox 

This is a straightforward conversion.

Signed-off-by: Matthew Wilcox 
---
 fs/f2fs/data.c   |  3 +--
 fs/f2fs/dir.c|  5 +
 fs/f2fs/inline.c |  6 +-
 fs/f2fs/node.c   | 10 ++
 4 files changed, 5 insertions(+), 19 deletions(-)

diff --git a/fs/f2fs/data.c b/fs/f2fs/data.c
index c8f6d9806896..1f3f192f152f 100644
--- a/fs/f2fs/data.c
+++ b/fs/f2fs/data.c
@@ -2175,8 +2175,7 @@ void f2fs_set_page_dirty_nobuffers(struct page *page)
xa_lock_irqsave(>pages, flags);
WARN_ON_ONCE(!PageUptodate(page));
account_page_dirtied(page, mapping);
-   radix_tree_tag_set(>pages,
-   page_index(page), PAGECACHE_TAG_DIRTY);
+   __xa_set_tag(>pages, page_index(page), PAGECACHE_TAG_DIRTY);
xa_unlock_irqrestore(>pages, flags);
unlock_page_memcg(page);
 
diff --git a/fs/f2fs/dir.c b/fs/f2fs/dir.c
index b5515ea6bb2f..296070016ec9 100644
--- a/fs/f2fs/dir.c
+++ b/fs/f2fs/dir.c
@@ -708,7 +708,6 @@ void f2fs_delete_entry(struct f2fs_dir_entry *dentry, 
struct page *page,
unsigned int bit_pos;
int slots = GET_DENTRY_SLOTS(le16_to_cpu(dentry->name_len));
struct address_space *mapping = page_mapping(page);
-   unsigned long flags;
int i;
 
f2fs_update_time(F2FS_I_SB(dir), REQ_TIME);
@@ -739,10 +738,8 @@ void f2fs_delete_entry(struct f2fs_dir_entry *dentry, 
struct page *page,
 
if (bit_pos == NR_DENTRY_IN_BLOCK &&
!truncate_hole(dir, page->index, page->index + 1)) {
-   xa_lock_irqsave(>pages, flags);
-   radix_tree_tag_clear(>pages, page_index(page),
+   xa_clear_tag(>pages, page_index(page),
 PAGECACHE_TAG_DIRTY);
-   xa_unlock_irqrestore(>pages, flags);
 
clear_page_dirty_for_io(page);
ClearPagePrivate(page);
diff --git a/fs/f2fs/inline.c b/fs/f2fs/inline.c
index 7858b8e15f33..d3c3f84beca9 100644
--- a/fs/f2fs/inline.c
+++ b/fs/f2fs/inline.c
@@ -204,7 +204,6 @@ int f2fs_write_inline_data(struct inode *inode, struct page 
*page)
void *src_addr, *dst_addr;
struct dnode_of_data dn;
struct address_space *mapping = page_mapping(page);
-   unsigned long flags;
int err;
 
set_new_dnode(, inode, NULL, NULL, 0);
@@ -226,10 +225,7 @@ int f2fs_write_inline_data(struct inode *inode, struct 
page *page)
kunmap_atomic(src_addr);
set_page_dirty(dn.inode_page);
 
-   xa_lock_irqsave(>pages, flags);
-   radix_tree_tag_clear(>pages, page_index(page),
-PAGECACHE_TAG_DIRTY);
-   xa_unlock_irqrestore(>pages, flags);
+   xa_clear_tag(>pages, page_index(page), PAGECACHE_TAG_DIRTY);
 
set_inode_flag(inode, FI_APPEND_WRITE);
set_inode_flag(inode, FI_DATA_EXIST);
diff --git a/fs/f2fs/node.c b/fs/f2fs/node.c
index 6b64a3009d55..0a6d5c2f996e 100644
--- a/fs/f2fs/node.c
+++ b/fs/f2fs/node.c
@@ -88,14 +88,10 @@ bool available_free_memory(struct f2fs_sb_info *sbi, int 
type)
 static void clear_node_page_dirty(struct page *page)
 {
struct address_space *mapping = page->mapping;
-   unsigned int long flags;
 
if (PageDirty(page)) {
-   xa_lock_irqsave(>pages, flags);
-   radix_tree_tag_clear(>pages,
-   page_index(page),
+   xa_clear_tag(>pages, page_index(page),
PAGECACHE_TAG_DIRTY);
-   xa_unlock_irqrestore(>pages, flags);
 
clear_page_dirty_for_io(page);
dec_page_count(F2FS_M_SB(mapping), F2FS_DIRTY_NODES);
@@ -1142,9 +1138,7 @@ void ra_node_page(struct f2fs_sb_info *sbi, nid_t nid)
return;
f2fs_bug_on(sbi, check_nid_range(sbi, nid));
 
-   rcu_read_lock();
-   apage = radix_tree_lookup(_MAPPING(sbi)->pages, nid);
-   rcu_read_unlock();
+   apage = xa_load(_MAPPING(sbi)->pages, nid);
if (apage)
return;
 
-- 
2.15.1

Re: dangers of bots on the mailing lists was Re: divide error in ___bpf_prog_run

2018-01-17 Thread Theodore Ts'o

On Wed, Jan 17, 2018 at 12:09:18PM +0100, Dmitry Vyukov wrote:
> On Wed, Jan 17, 2018 at 10:49 AM, Daniel Borkmann  
> wrote:
> > Don't know if there's such a possibility, but it would be nice if we could
> > target fuzzing for specific subsystems in related subtrees directly (e.g.
> > for bpf in bpf and bpf-next trees as one example). Dmitry?
> 
> Hi Daniel,
> 
> It's doable.
> Let's start with one bpf tree. Will it be bpf or bpf-next? Which one
> contains more ongoing work? What's the exact git repo address/branch,
> so that I don't second guess?

As a suggestion, until the bpf subsystem is free from problems that
can be found by Syzkaller in Linus's upstream tree, maybe it's not
worth trying to test individual subsystem trees such as the bpf tree?
After all, there's no point trying to bisect our way checking to see
if the problem is with a newly added commit in a development tree, if
it turns out the problem was first introduced years ago in the 4.1 or
3.19 timeframe.

After all, finding these older problems is going to have much higher
value, since these are the sorts of potential security problems that
are worth backporting to real device kernels for Android/ChromeOS, and
for enterprise distro kernels.  So from an "impact to the industry"
perspective, focusing on Linus's tree is going to be far more
productive.  That's a win for the community, and it's a win for those
people on the Syzkaller team who might be going up for promo or
listing their achievements at performance review time.  :-)

This will also give the Syzkaller team more time to make the
automation more intelligent in terms of being able to do the automatic
bisection to find the first guilty commit, labelling the report with
the specific subsystem tree that that it came from, etc., etc.

Cheers,

- Ted

P.S.  Something that might be *really* interesting is for those cases
where Syzkaller can find a repro, to test that repro on various stable
4.4, 4.9, 3.18, et. al. LTS kernels.  This will take less resources
than a full bisection, but it will add real value since knowledge that
it will trigger on a LTS kernel will help prioritize which reports
developers might be more interested in focusing upon, and it will give
them a head start in determining which fixes needed to be backported
to which stable kernels.

Re: dangers of bots on the mailing lists was Re: divide error in ___bpf_prog_run

2018-01-17 Thread Theodore Ts'o

On Wed, Jan 17, 2018 at 12:09:18PM +0100, Dmitry Vyukov wrote:
> On Wed, Jan 17, 2018 at 10:49 AM, Daniel Borkmann  
> wrote:
> > Don't know if there's such a possibility, but it would be nice if we could
> > target fuzzing for specific subsystems in related subtrees directly (e.g.
> > for bpf in bpf and bpf-next trees as one example). Dmitry?
> 
> Hi Daniel,
> 
> It's doable.
> Let's start with one bpf tree. Will it be bpf or bpf-next? Which one
> contains more ongoing work? What's the exact git repo address/branch,
> so that I don't second guess?

As a suggestion, until the bpf subsystem is free from problems that
can be found by Syzkaller in Linus's upstream tree, maybe it's not
worth trying to test individual subsystem trees such as the bpf tree?
After all, there's no point trying to bisect our way checking to see
if the problem is with a newly added commit in a development tree, if
it turns out the problem was first introduced years ago in the 4.1 or
3.19 timeframe.

After all, finding these older problems is going to have much higher
value, since these are the sorts of potential security problems that
are worth backporting to real device kernels for Android/ChromeOS, and
for enterprise distro kernels.  So from an "impact to the industry"
perspective, focusing on Linus's tree is going to be far more
productive.  That's a win for the community, and it's a win for those
people on the Syzkaller team who might be going up for promo or
listing their achievements at performance review time.  :-)

This will also give the Syzkaller team more time to make the
automation more intelligent in terms of being able to do the automatic
bisection to find the first guilty commit, labelling the report with
the specific subsystem tree that that it came from, etc., etc.

Cheers,

- Ted

P.S.  Something that might be *really* interesting is for those cases
where Syzkaller can find a repro, to test that repro on various stable
4.4, 4.9, 3.18, et. al. LTS kernels.  This will take less resources
than a full bisection, but it will add real value since knowledge that
it will trigger on a LTS kernel will help prioritize which reports
developers might be more interested in focusing upon, and it will give
them a head start in determining which fixes needed to be backported
to which stable kernels.

[PATCH v6 54/99] nilfs2: Convert to XArray

2018-01-17 Thread Matthew Wilcox

From: Matthew Wilcox 

I'm not 100% convinced that the rewrite of nilfs_copy_back_pages is
correct, but it will at least have different bugs from the current
version.

Signed-off-by: Matthew Wilcox 
---
 fs/nilfs2/btnode.c | 37 +++-
 fs/nilfs2/page.c   | 72 +++---
 2 files changed, 56 insertions(+), 53 deletions(-)

diff --git a/fs/nilfs2/btnode.c b/fs/nilfs2/btnode.c
index 9e2a00207436..b5997e8c5441 100644
--- a/fs/nilfs2/btnode.c
+++ b/fs/nilfs2/btnode.c
@@ -177,42 +177,36 @@ int nilfs_btnode_prepare_change_key(struct address_space 
*btnc,
ctxt->newbh = NULL;
 
if (inode->i_blkbits == PAGE_SHIFT) {
-   lock_page(obh->b_page);
-   /*
-* We cannot call radix_tree_preload for the kernels older
-* than 2.6.23, because it is not exported for modules.
-*/
+   void *entry;
+   struct page *opage = obh->b_page;
+   lock_page(opage);
 retry:
-   err = radix_tree_preload(GFP_NOFS & ~__GFP_HIGHMEM);
-   if (err)
-   goto failed_unlock;
/* BUG_ON(oldkey != obh->b_page->index); */
-   if (unlikely(oldkey != obh->b_page->index))
-   NILFS_PAGE_BUG(obh->b_page,
+   if (unlikely(oldkey != opage->index))
+   NILFS_PAGE_BUG(opage,
   "invalid oldkey %lld (newkey=%lld)",
   (unsigned long long)oldkey,
   (unsigned long long)newkey);
 
-   xa_lock_irq(>pages);
-   err = radix_tree_insert(>pages, newkey, obh->b_page);
-   xa_unlock_irq(>pages);
+   entry = xa_cmpxchg(>pages, newkey, NULL, opage, GFP_NOFS);
/*
 * Note: page->index will not change to newkey until
 * nilfs_btnode_commit_change_key() will be called.
 * To protect the page in intermediate state, the page lock
 * is held.
 */
-   radix_tree_preload_end();
-   if (!err)
+   if (!entry)
return 0;
-   else if (err != -EEXIST)
+   if (xa_is_err(entry)) {
+   err = xa_err(entry);
goto failed_unlock;
+   }
 
err = invalidate_inode_pages2_range(btnc, newkey, newkey);
if (!err)
goto retry;
/* fallback to copy mode */
-   unlock_page(obh->b_page);
+   unlock_page(opage);
}
 
nbh = nilfs_btnode_create_block(btnc, newkey);
@@ -252,9 +246,8 @@ void nilfs_btnode_commit_change_key(struct address_space 
*btnc,
mark_buffer_dirty(obh);
 
xa_lock_irq(>pages);
-   radix_tree_delete(>pages, oldkey);
-   radix_tree_tag_set(>pages, newkey,
-  PAGECACHE_TAG_DIRTY);
+   __xa_erase(>pages, oldkey);
+   __xa_set_tag(>pages, newkey, PAGECACHE_TAG_DIRTY);
xa_unlock_irq(>pages);
 
opage->index = obh->b_blocknr = newkey;
@@ -283,9 +276,7 @@ void nilfs_btnode_abort_change_key(struct address_space 
*btnc,
return;
 
if (nbh == NULL) {  /* blocksize == pagesize */
-   xa_lock_irq(>pages);
-   radix_tree_delete(>pages, newkey);
-   xa_unlock_irq(>pages);
+   xa_erase(>pages, newkey);
unlock_page(ctxt->bh->b_page);
} else
brelse(nbh);
diff --git a/fs/nilfs2/page.c b/fs/nilfs2/page.c
index 1c6703efde9e..31d20f624971 100644
--- a/fs/nilfs2/page.c
+++ b/fs/nilfs2/page.c
@@ -304,10 +304,10 @@ int nilfs_copy_dirty_pages(struct address_space *dmap,
 void nilfs_copy_back_pages(struct address_space *dmap,
   struct address_space *smap)
 {
+   XA_STATE(xas, >pages, 0);
struct pagevec pvec;
unsigned int i, n;
pgoff_t index = 0;
-   int err;
 
pagevec_init();
 repeat:
@@ -317,43 +317,56 @@ void nilfs_copy_back_pages(struct address_space *dmap,
 
for (i = 0; i < pagevec_count(); i++) {
struct page *page = pvec.pages[i], *dpage;
-   pgoff_t offset = page->index;
+   xas_set(, page->index);
 
lock_page(page);
-   dpage = find_lock_page(dmap, offset);
+   do {
+   xas_lock_irq();
+   dpage = xas_create();
+   if (!xas_error())
+   break;
+   xas_unlock_irq();
+   if (!xas_nomem(, GFP_NOFS)) {
+

[PATCH v6 54/99] nilfs2: Convert to XArray

2018-01-17 Thread Matthew Wilcox

From: Matthew Wilcox 

I'm not 100% convinced that the rewrite of nilfs_copy_back_pages is
correct, but it will at least have different bugs from the current
version.

Signed-off-by: Matthew Wilcox 
---
 fs/nilfs2/btnode.c | 37 +++-
 fs/nilfs2/page.c   | 72 +++---
 2 files changed, 56 insertions(+), 53 deletions(-)

diff --git a/fs/nilfs2/btnode.c b/fs/nilfs2/btnode.c
index 9e2a00207436..b5997e8c5441 100644
--- a/fs/nilfs2/btnode.c
+++ b/fs/nilfs2/btnode.c
@@ -177,42 +177,36 @@ int nilfs_btnode_prepare_change_key(struct address_space 
*btnc,
ctxt->newbh = NULL;
 
if (inode->i_blkbits == PAGE_SHIFT) {
-   lock_page(obh->b_page);
-   /*
-* We cannot call radix_tree_preload for the kernels older
-* than 2.6.23, because it is not exported for modules.
-*/
+   void *entry;
+   struct page *opage = obh->b_page;
+   lock_page(opage);
 retry:
-   err = radix_tree_preload(GFP_NOFS & ~__GFP_HIGHMEM);
-   if (err)
-   goto failed_unlock;
/* BUG_ON(oldkey != obh->b_page->index); */
-   if (unlikely(oldkey != obh->b_page->index))
-   NILFS_PAGE_BUG(obh->b_page,
+   if (unlikely(oldkey != opage->index))
+   NILFS_PAGE_BUG(opage,
   "invalid oldkey %lld (newkey=%lld)",
   (unsigned long long)oldkey,
   (unsigned long long)newkey);
 
-   xa_lock_irq(>pages);
-   err = radix_tree_insert(>pages, newkey, obh->b_page);
-   xa_unlock_irq(>pages);
+   entry = xa_cmpxchg(>pages, newkey, NULL, opage, GFP_NOFS);
/*
 * Note: page->index will not change to newkey until
 * nilfs_btnode_commit_change_key() will be called.
 * To protect the page in intermediate state, the page lock
 * is held.
 */
-   radix_tree_preload_end();
-   if (!err)
+   if (!entry)
return 0;
-   else if (err != -EEXIST)
+   if (xa_is_err(entry)) {
+   err = xa_err(entry);
goto failed_unlock;
+   }
 
err = invalidate_inode_pages2_range(btnc, newkey, newkey);
if (!err)
goto retry;
/* fallback to copy mode */
-   unlock_page(obh->b_page);
+   unlock_page(opage);
}
 
nbh = nilfs_btnode_create_block(btnc, newkey);
@@ -252,9 +246,8 @@ void nilfs_btnode_commit_change_key(struct address_space 
*btnc,
mark_buffer_dirty(obh);
 
xa_lock_irq(>pages);
-   radix_tree_delete(>pages, oldkey);
-   radix_tree_tag_set(>pages, newkey,
-  PAGECACHE_TAG_DIRTY);
+   __xa_erase(>pages, oldkey);
+   __xa_set_tag(>pages, newkey, PAGECACHE_TAG_DIRTY);
xa_unlock_irq(>pages);
 
opage->index = obh->b_blocknr = newkey;
@@ -283,9 +276,7 @@ void nilfs_btnode_abort_change_key(struct address_space 
*btnc,
return;
 
if (nbh == NULL) {  /* blocksize == pagesize */
-   xa_lock_irq(>pages);
-   radix_tree_delete(>pages, newkey);
-   xa_unlock_irq(>pages);
+   xa_erase(>pages, newkey);
unlock_page(ctxt->bh->b_page);
} else
brelse(nbh);
diff --git a/fs/nilfs2/page.c b/fs/nilfs2/page.c
index 1c6703efde9e..31d20f624971 100644
--- a/fs/nilfs2/page.c
+++ b/fs/nilfs2/page.c
@@ -304,10 +304,10 @@ int nilfs_copy_dirty_pages(struct address_space *dmap,
 void nilfs_copy_back_pages(struct address_space *dmap,
   struct address_space *smap)
 {
+   XA_STATE(xas, >pages, 0);
struct pagevec pvec;
unsigned int i, n;
pgoff_t index = 0;
-   int err;
 
pagevec_init();
 repeat:
@@ -317,43 +317,56 @@ void nilfs_copy_back_pages(struct address_space *dmap,
 
for (i = 0; i < pagevec_count(); i++) {
struct page *page = pvec.pages[i], *dpage;
-   pgoff_t offset = page->index;
+   xas_set(, page->index);
 
lock_page(page);
-   dpage = find_lock_page(dmap, offset);
+   do {
+   xas_lock_irq();
+   dpage = xas_create();
+   if (!xas_error())
+   break;
+   xas_unlock_irq();
+   if (!xas_nomem(, GFP_NOFS)) {
+   unlock_page(page);
+

[PATCH v6 53/99] fs: Convert writeback to XArray

2018-01-17 Thread Matthew Wilcox

From: Matthew Wilcox 

A couple of short loops.

Signed-off-by: Matthew Wilcox 
---
 fs/fs-writeback.c | 25 +
 1 file changed, 9 insertions(+), 16 deletions(-)

diff --git a/fs/fs-writeback.c b/fs/fs-writeback.c
index e2c1ca667d9a..897a89489fe9 100644
--- a/fs/fs-writeback.c
+++ b/fs/fs-writeback.c
@@ -339,9 +339,9 @@ static void inode_switch_wbs_work_fn(struct work_struct 
*work)
struct address_space *mapping = inode->i_mapping;
struct bdi_writeback *old_wb = inode->i_wb;
struct bdi_writeback *new_wb = isw->new_wb;
-   struct radix_tree_iter iter;
+   XA_STATE(xas, >pages, 0);
+   struct page *page;
bool switched = false;
-   void **slot;
 
/*
 * By the time control reaches here, RCU grace period has passed
@@ -375,25 +375,18 @@ static void inode_switch_wbs_work_fn(struct work_struct 
*work)
 * to possibly dirty pages while PAGECACHE_TAG_WRITEBACK points to
 * pages actually under writeback.
 */
-   radix_tree_for_each_tagged(slot, >pages, , 0,
-  PAGECACHE_TAG_DIRTY) {
-   struct page *page = radix_tree_deref_slot_protected(slot,
-   >pages.xa_lock);
-   if (likely(page) && PageDirty(page)) {
+   xas_for_each_tag(, page, ULONG_MAX, PAGECACHE_TAG_DIRTY) {
+   if (PageDirty(page)) {
dec_wb_stat(old_wb, WB_RECLAIMABLE);
inc_wb_stat(new_wb, WB_RECLAIMABLE);
}
}
 
-   radix_tree_for_each_tagged(slot, >pages, , 0,
-  PAGECACHE_TAG_WRITEBACK) {
-   struct page *page = radix_tree_deref_slot_protected(slot,
-   >pages.xa_lock);
-   if (likely(page)) {
-   WARN_ON_ONCE(!PageWriteback(page));
-   dec_wb_stat(old_wb, WB_WRITEBACK);
-   inc_wb_stat(new_wb, WB_WRITEBACK);
-   }
+   xas_set(, 0);
+   xas_for_each_tag(, page, ULONG_MAX, PAGECACHE_TAG_WRITEBACK) {
+   WARN_ON_ONCE(!PageWriteback(page));
+   dec_wb_stat(old_wb, WB_WRITEBACK);
+   inc_wb_stat(new_wb, WB_WRITEBACK);
}
 
wb_get(new_wb);
-- 
2.15.1

[PATCH v6 53/99] fs: Convert writeback to XArray

2018-01-17 Thread Matthew Wilcox

From: Matthew Wilcox 

A couple of short loops.

Signed-off-by: Matthew Wilcox 
---
 fs/fs-writeback.c | 25 +
 1 file changed, 9 insertions(+), 16 deletions(-)

diff --git a/fs/fs-writeback.c b/fs/fs-writeback.c
index e2c1ca667d9a..897a89489fe9 100644
--- a/fs/fs-writeback.c
+++ b/fs/fs-writeback.c
@@ -339,9 +339,9 @@ static void inode_switch_wbs_work_fn(struct work_struct 
*work)
struct address_space *mapping = inode->i_mapping;
struct bdi_writeback *old_wb = inode->i_wb;
struct bdi_writeback *new_wb = isw->new_wb;
-   struct radix_tree_iter iter;
+   XA_STATE(xas, >pages, 0);
+   struct page *page;
bool switched = false;
-   void **slot;
 
/*
 * By the time control reaches here, RCU grace period has passed
@@ -375,25 +375,18 @@ static void inode_switch_wbs_work_fn(struct work_struct 
*work)
 * to possibly dirty pages while PAGECACHE_TAG_WRITEBACK points to
 * pages actually under writeback.
 */
-   radix_tree_for_each_tagged(slot, >pages, , 0,
-  PAGECACHE_TAG_DIRTY) {
-   struct page *page = radix_tree_deref_slot_protected(slot,
-   >pages.xa_lock);
-   if (likely(page) && PageDirty(page)) {
+   xas_for_each_tag(, page, ULONG_MAX, PAGECACHE_TAG_DIRTY) {
+   if (PageDirty(page)) {
dec_wb_stat(old_wb, WB_RECLAIMABLE);
inc_wb_stat(new_wb, WB_RECLAIMABLE);
}
}
 
-   radix_tree_for_each_tagged(slot, >pages, , 0,
-  PAGECACHE_TAG_WRITEBACK) {
-   struct page *page = radix_tree_deref_slot_protected(slot,
-   >pages.xa_lock);
-   if (likely(page)) {
-   WARN_ON_ONCE(!PageWriteback(page));
-   dec_wb_stat(old_wb, WB_WRITEBACK);
-   inc_wb_stat(new_wb, WB_WRITEBACK);
-   }
+   xas_set(, 0);
+   xas_for_each_tag(, page, ULONG_MAX, PAGECACHE_TAG_WRITEBACK) {
+   WARN_ON_ONCE(!PageWriteback(page));
+   dec_wb_stat(old_wb, WB_WRITEBACK);
+   inc_wb_stat(new_wb, WB_WRITEBACK);
}
 
wb_get(new_wb);
-- 
2.15.1

[PATCH v6 52/99] fs: Convert buffer to XArray

2018-01-17 Thread Matthew Wilcox

From: Matthew Wilcox 

Mostly comment fixes, but one use of __xa_set_tag.

Signed-off-by: Matthew Wilcox 
---
 fs/buffer.c | 14 +++---
 1 file changed, 7 insertions(+), 7 deletions(-)

diff --git a/fs/buffer.c b/fs/buffer.c
index 1a6ae530156b..e1d18307d5c8 100644
--- a/fs/buffer.c
+++ b/fs/buffer.c
@@ -592,7 +592,7 @@ void mark_buffer_dirty_inode(struct buffer_head *bh, struct 
inode *inode)
 EXPORT_SYMBOL(mark_buffer_dirty_inode);
 
 /*
- * Mark the page dirty, and set it dirty in the radix tree, and mark the inode
+ * Mark the page dirty, and set it dirty in the page cache, and mark the inode
  * dirty.
  *
  * If warn is true, then emit a warning if the page is not uptodate and has
@@ -609,8 +609,8 @@ void __set_page_dirty(struct page *page, struct 
address_space *mapping,
if (page->mapping) {/* Race with truncate? */
WARN_ON_ONCE(warn && !PageUptodate(page));
account_page_dirtied(page, mapping);
-   radix_tree_tag_set(>pages,
-   page_index(page), PAGECACHE_TAG_DIRTY);
+   __xa_set_tag(>pages, page_index(page),
+   PAGECACHE_TAG_DIRTY);
}
xa_unlock_irqrestore(>pages, flags);
 }
@@ -1072,7 +1072,7 @@ __getblk_slow(struct block_device *bdev, sector_t block,
  * The relationship between dirty buffers and dirty pages:
  *
  * Whenever a page has any dirty buffers, the page's dirty bit is set, and
- * the page is tagged dirty in its radix tree.
+ * the page is tagged dirty in the page cache.
  *
  * At all times, the dirtiness of the buffers represents the dirtiness of
  * subsections of the page.  If the page has buffers, the page dirty bit is
@@ -1095,9 +1095,9 @@ __getblk_slow(struct block_device *bdev, sector_t block,
  * mark_buffer_dirty - mark a buffer_head as needing writeout
  * @bh: the buffer_head to mark dirty
  *
- * mark_buffer_dirty() will set the dirty bit against the buffer, then set its
- * backing page dirty, then tag the page as dirty in its address_space's radix
- * tree and then attach the address_space's inode to its superblock's dirty
+ * mark_buffer_dirty() will set the dirty bit against the buffer, then set
+ * its backing page dirty, then tag the page as dirty in the page cache
+ * and then attach the address_space's inode to its superblock's dirty
  * inode list.
  *
  * mark_buffer_dirty() is atomic.  It takes bh->b_page->mapping->private_lock,
-- 
2.15.1

[PATCH v6 52/99] fs: Convert buffer to XArray

2018-01-17 Thread Matthew Wilcox

From: Matthew Wilcox 

Mostly comment fixes, but one use of __xa_set_tag.

Signed-off-by: Matthew Wilcox 
---
 fs/buffer.c | 14 +++---
 1 file changed, 7 insertions(+), 7 deletions(-)

diff --git a/fs/buffer.c b/fs/buffer.c
index 1a6ae530156b..e1d18307d5c8 100644
--- a/fs/buffer.c
+++ b/fs/buffer.c
@@ -592,7 +592,7 @@ void mark_buffer_dirty_inode(struct buffer_head *bh, struct 
inode *inode)
 EXPORT_SYMBOL(mark_buffer_dirty_inode);
 
 /*
- * Mark the page dirty, and set it dirty in the radix tree, and mark the inode
+ * Mark the page dirty, and set it dirty in the page cache, and mark the inode
  * dirty.
  *
  * If warn is true, then emit a warning if the page is not uptodate and has
@@ -609,8 +609,8 @@ void __set_page_dirty(struct page *page, struct 
address_space *mapping,
if (page->mapping) {/* Race with truncate? */
WARN_ON_ONCE(warn && !PageUptodate(page));
account_page_dirtied(page, mapping);
-   radix_tree_tag_set(>pages,
-   page_index(page), PAGECACHE_TAG_DIRTY);
+   __xa_set_tag(>pages, page_index(page),
+   PAGECACHE_TAG_DIRTY);
}
xa_unlock_irqrestore(>pages, flags);
 }
@@ -1072,7 +1072,7 @@ __getblk_slow(struct block_device *bdev, sector_t block,
  * The relationship between dirty buffers and dirty pages:
  *
  * Whenever a page has any dirty buffers, the page's dirty bit is set, and
- * the page is tagged dirty in its radix tree.
+ * the page is tagged dirty in the page cache.
  *
  * At all times, the dirtiness of the buffers represents the dirtiness of
  * subsections of the page.  If the page has buffers, the page dirty bit is
@@ -1095,9 +1095,9 @@ __getblk_slow(struct block_device *bdev, sector_t block,
  * mark_buffer_dirty - mark a buffer_head as needing writeout
  * @bh: the buffer_head to mark dirty
  *
- * mark_buffer_dirty() will set the dirty bit against the buffer, then set its
- * backing page dirty, then tag the page as dirty in its address_space's radix
- * tree and then attach the address_space's inode to its superblock's dirty
+ * mark_buffer_dirty() will set the dirty bit against the buffer, then set
+ * its backing page dirty, then tag the page as dirty in the page cache
+ * and then attach the address_space's inode to its superblock's dirty
  * inode list.
  *
  * mark_buffer_dirty() is atomic.  It takes bh->b_page->mapping->private_lock,
-- 
2.15.1

[PATCH v6 56/99] lustre: Convert to XArray

2018-01-17 Thread Matthew Wilcox

From: Matthew Wilcox 

Signed-off-by: Matthew Wilcox 
---
 drivers/staging/lustre/lustre/llite/glimpse.c   | 12 +---
 drivers/staging/lustre/lustre/mdc/mdc_request.c | 16 
 2 files changed, 13 insertions(+), 15 deletions(-)

diff --git a/drivers/staging/lustre/lustre/llite/glimpse.c 
b/drivers/staging/lustre/lustre/llite/glimpse.c
index 5f2843da911c..25232fdf5797 100644
--- a/drivers/staging/lustre/lustre/llite/glimpse.c
+++ b/drivers/staging/lustre/lustre/llite/glimpse.c
@@ -57,7 +57,7 @@ static const struct cl_lock_descr whole_file = {
 };
 
 /*
- * Check whether file has possible unwriten pages.
+ * Check whether file has possible unwritten pages.
  *
  * \retval 1file is mmap-ed or has dirty pages
  *  0otherwise
@@ -66,16 +66,14 @@ blkcnt_t dirty_cnt(struct inode *inode)
 {
blkcnt_t cnt = 0;
struct vvp_object *vob = cl_inode2vvp(inode);
-   void  *results[1];
 
-   if (inode->i_mapping)
-   cnt += radix_tree_gang_lookup_tag(>i_mapping->pages,
- results, 0, 1,
- PAGECACHE_TAG_DIRTY);
+   if (inode->i_mapping && xa_tagged(>i_mapping->pages,
+   PAGECACHE_TAG_DIRTY))
+   cnt = 1;
if (cnt == 0 && atomic_read(>vob_mmap_cnt) > 0)
cnt = 1;
 
-   return (cnt > 0) ? 1 : 0;
+   return cnt;
 }
 
 int cl_glimpse_lock(const struct lu_env *env, struct cl_io *io,
diff --git a/drivers/staging/lustre/lustre/mdc/mdc_request.c 
b/drivers/staging/lustre/lustre/mdc/mdc_request.c
index 2ec79a6b17da..ea23247e9e02 100644
--- a/drivers/staging/lustre/lustre/mdc/mdc_request.c
+++ b/drivers/staging/lustre/lustre/mdc/mdc_request.c
@@ -934,17 +934,18 @@ static struct page *mdc_page_locate(struct address_space 
*mapping, __u64 *hash,
 * hash _smaller_ than one we are looking for.
 */
unsigned long offset = hash_x_index(*hash, hash64);
+   XA_STATE(xas, >pages, offset);
struct page *page;
-   int found;
 
-   xa_lock_irq(>pages);
-   found = radix_tree_gang_lookup(>pages,
-  (void **), offset, 1);
-   if (found > 0 && !xa_is_value(page)) {
+   xas_lock_irq();
+   page = xas_find(, ULONG_MAX);
+   if (xa_is_value(page))
+   page = NULL;
+   if (page) {
struct lu_dirpage *dp;
 
get_page(page);
-   xa_unlock_irq(>pages);
+   xas_unlock_irq();
/*
 * In contrast to find_lock_page() we are sure that directory
 * page cannot be truncated (while DLM lock is held) and,
@@ -992,8 +993,7 @@ static struct page *mdc_page_locate(struct address_space 
*mapping, __u64 *hash,
page = ERR_PTR(-EIO);
}
} else {
-   xa_unlock_irq(>pages);
-   page = NULL;
+   xas_unlock_irq();
}
return page;
 }
-- 
2.15.1

[PATCH v6 56/99] lustre: Convert to XArray

2018-01-17 Thread Matthew Wilcox

From: Matthew Wilcox 

Signed-off-by: Matthew Wilcox 
---
 drivers/staging/lustre/lustre/llite/glimpse.c   | 12 +---
 drivers/staging/lustre/lustre/mdc/mdc_request.c | 16 
 2 files changed, 13 insertions(+), 15 deletions(-)

diff --git a/drivers/staging/lustre/lustre/llite/glimpse.c 
b/drivers/staging/lustre/lustre/llite/glimpse.c
index 5f2843da911c..25232fdf5797 100644
--- a/drivers/staging/lustre/lustre/llite/glimpse.c
+++ b/drivers/staging/lustre/lustre/llite/glimpse.c
@@ -57,7 +57,7 @@ static const struct cl_lock_descr whole_file = {
 };
 
 /*
- * Check whether file has possible unwriten pages.
+ * Check whether file has possible unwritten pages.
  *
  * \retval 1file is mmap-ed or has dirty pages
  *  0otherwise
@@ -66,16 +66,14 @@ blkcnt_t dirty_cnt(struct inode *inode)
 {
blkcnt_t cnt = 0;
struct vvp_object *vob = cl_inode2vvp(inode);
-   void  *results[1];
 
-   if (inode->i_mapping)
-   cnt += radix_tree_gang_lookup_tag(>i_mapping->pages,
- results, 0, 1,
- PAGECACHE_TAG_DIRTY);
+   if (inode->i_mapping && xa_tagged(>i_mapping->pages,
+   PAGECACHE_TAG_DIRTY))
+   cnt = 1;
if (cnt == 0 && atomic_read(>vob_mmap_cnt) > 0)
cnt = 1;
 
-   return (cnt > 0) ? 1 : 0;
+   return cnt;
 }
 
 int cl_glimpse_lock(const struct lu_env *env, struct cl_io *io,
diff --git a/drivers/staging/lustre/lustre/mdc/mdc_request.c 
b/drivers/staging/lustre/lustre/mdc/mdc_request.c
index 2ec79a6b17da..ea23247e9e02 100644
--- a/drivers/staging/lustre/lustre/mdc/mdc_request.c
+++ b/drivers/staging/lustre/lustre/mdc/mdc_request.c
@@ -934,17 +934,18 @@ static struct page *mdc_page_locate(struct address_space 
*mapping, __u64 *hash,
 * hash _smaller_ than one we are looking for.
 */
unsigned long offset = hash_x_index(*hash, hash64);
+   XA_STATE(xas, >pages, offset);
struct page *page;
-   int found;
 
-   xa_lock_irq(>pages);
-   found = radix_tree_gang_lookup(>pages,
-  (void **), offset, 1);
-   if (found > 0 && !xa_is_value(page)) {
+   xas_lock_irq();
+   page = xas_find(, ULONG_MAX);
+   if (xa_is_value(page))
+   page = NULL;
+   if (page) {
struct lu_dirpage *dp;
 
get_page(page);
-   xa_unlock_irq(>pages);
+   xas_unlock_irq();
/*
 * In contrast to find_lock_page() we are sure that directory
 * page cannot be truncated (while DLM lock is held) and,
@@ -992,8 +993,7 @@ static struct page *mdc_page_locate(struct address_space 
*mapping, __u64 *hash,
page = ERR_PTR(-EIO);
}
} else {
-   xa_unlock_irq(>pages);
-   page = NULL;
+   xas_unlock_irq();
}
return page;
 }
-- 
2.15.1

[PATCH v6 59/99] dax: More XArray conversion

2018-01-17 Thread Matthew Wilcox

From: Matthew Wilcox 

This time, we want to convert get_unlocked_mapping_entry() to use the
XArray.  That has a ripple effect, causing us to change the waitqueues
to hash on the address of the xarray rather than the address of the
mapping (functionally equivalent), and create a lot of on-the-stack
xa_state which are only used as a container for passing the xarray and
the index down to deeper function calls.

Also rename dax_wake_mapping_entry_waiter() to dax_wake_entry().

Signed-off-by: Matthew Wilcox 
---
 fs/dax.c | 72 +---
 1 file changed, 33 insertions(+), 39 deletions(-)

diff --git a/fs/dax.c b/fs/dax.c
index 8eab0b56f7f9..d3fe61b95216 100644
--- a/fs/dax.c
+++ b/fs/dax.c
@@ -120,7 +120,7 @@ static int dax_is_empty_entry(void *entry)
  * DAX radix tree locking
  */
 struct exceptional_entry_key {
-   struct address_space *mapping;
+   struct xarray *xa;
pgoff_t entry_start;
 };
 
@@ -129,9 +129,10 @@ struct wait_exceptional_entry_queue {
struct exceptional_entry_key key;
 };
 
-static wait_queue_head_t *dax_entry_waitqueue(struct address_space *mapping,
-   pgoff_t index, void *entry, struct exceptional_entry_key *key)
+static wait_queue_head_t *dax_entry_waitqueue(struct xa_state *xas,
+   void *entry, struct exceptional_entry_key *key)
 {
+   unsigned long index = xas->xa_index;
unsigned long hash;
 
/*
@@ -142,10 +143,10 @@ static wait_queue_head_t *dax_entry_waitqueue(struct 
address_space *mapping,
if (dax_is_pmd_entry(entry))
index &= ~PG_PMD_COLOUR;
 
-   key->mapping = mapping;
+   key->xa = xas->xa;
key->entry_start = index;
 
-   hash = hash_long((unsigned long)mapping ^ index, DAX_WAIT_TABLE_BITS);
+   hash = hash_long((unsigned long)xas->xa ^ index, DAX_WAIT_TABLE_BITS);
return wait_table + hash;
 }
 
@@ -156,7 +157,7 @@ static int wake_exceptional_entry_func(wait_queue_entry_t 
*wait, unsigned int mo
struct wait_exceptional_entry_queue *ewait =
container_of(wait, struct wait_exceptional_entry_queue, wait);
 
-   if (key->mapping != ewait->key.mapping ||
+   if (key->xa != ewait->key.xa ||
key->entry_start != ewait->key.entry_start)
return 0;
return autoremove_wake_function(wait, mode, sync, NULL);
@@ -167,13 +168,12 @@ static int wake_exceptional_entry_func(wait_queue_entry_t 
*wait, unsigned int mo
  * The important information it's conveying is whether the entry at
  * this index used to be a PMD entry.
  */
-static void dax_wake_mapping_entry_waiter(struct address_space *mapping,
-   pgoff_t index, void *entry, bool wake_all)
+static void dax_wake_entry(struct xa_state *xas, void *entry, bool wake_all)
 {
struct exceptional_entry_key key;
wait_queue_head_t *wq;
 
-   wq = dax_entry_waitqueue(mapping, index, entry, );
+   wq = dax_entry_waitqueue(xas, entry, );
 
/*
 * Checking for locked entry and prepare_to_wait_exclusive() happens
@@ -205,10 +205,9 @@ static inline void *lock_slot(struct xa_state *xas)
  *
  * Must be called with xa_lock held.
  */
-static void *get_unlocked_mapping_entry(struct address_space *mapping,
-   pgoff_t index, void ***slotp)
+static void *get_unlocked_mapping_entry(struct xa_state *xas)
 {
-   void *entry, **slot;
+   void *entry;
struct wait_exceptional_entry_queue ewait;
wait_queue_head_t *wq;
 
@@ -216,22 +215,19 @@ static void *get_unlocked_mapping_entry(struct 
address_space *mapping,
ewait.wait.func = wake_exceptional_entry_func;
 
for (;;) {
-   entry = __radix_tree_lookup(>pages, index, NULL,
- );
-   if (!entry ||
-   WARN_ON_ONCE(!xa_is_value(entry)) || !dax_locked(entry)) {
-   if (slotp)
-   *slotp = slot;
+   entry = xas_load(xas);
+   if (!entry || WARN_ON_ONCE(!xa_is_value(entry)) ||
+   !dax_locked(entry))
return entry;
-   }
 
-   wq = dax_entry_waitqueue(mapping, index, entry, );
+   wq = dax_entry_waitqueue(xas, entry, );
prepare_to_wait_exclusive(wq, ,
  TASK_UNINTERRUPTIBLE);
-   xa_unlock_irq(>pages);
+   xas_pause(xas);
+   xas_unlock_irq(xas);
schedule();
finish_wait(wq, );
-   xa_lock_irq(>pages);
+   xas_lock_irq(xas);
}
 }
 
@@ -251,7 +247,7 @@ static void dax_unlock_mapping_entry(struct address_space 
*mapping,
xas_store(, entry);
/* Safe to not call xas_pause here -- we don't touch the array after */

[PATCH v6 59/99] dax: More XArray conversion

2018-01-17 Thread Matthew Wilcox

From: Matthew Wilcox 

This time, we want to convert get_unlocked_mapping_entry() to use the
XArray.  That has a ripple effect, causing us to change the waitqueues
to hash on the address of the xarray rather than the address of the
mapping (functionally equivalent), and create a lot of on-the-stack
xa_state which are only used as a container for passing the xarray and
the index down to deeper function calls.

Also rename dax_wake_mapping_entry_waiter() to dax_wake_entry().

Signed-off-by: Matthew Wilcox 
---
 fs/dax.c | 72 +---
 1 file changed, 33 insertions(+), 39 deletions(-)

diff --git a/fs/dax.c b/fs/dax.c
index 8eab0b56f7f9..d3fe61b95216 100644
--- a/fs/dax.c
+++ b/fs/dax.c
@@ -120,7 +120,7 @@ static int dax_is_empty_entry(void *entry)
  * DAX radix tree locking
  */
 struct exceptional_entry_key {
-   struct address_space *mapping;
+   struct xarray *xa;
pgoff_t entry_start;
 };
 
@@ -129,9 +129,10 @@ struct wait_exceptional_entry_queue {
struct exceptional_entry_key key;
 };
 
-static wait_queue_head_t *dax_entry_waitqueue(struct address_space *mapping,
-   pgoff_t index, void *entry, struct exceptional_entry_key *key)
+static wait_queue_head_t *dax_entry_waitqueue(struct xa_state *xas,
+   void *entry, struct exceptional_entry_key *key)
 {
+   unsigned long index = xas->xa_index;
unsigned long hash;
 
/*
@@ -142,10 +143,10 @@ static wait_queue_head_t *dax_entry_waitqueue(struct 
address_space *mapping,
if (dax_is_pmd_entry(entry))
index &= ~PG_PMD_COLOUR;
 
-   key->mapping = mapping;
+   key->xa = xas->xa;
key->entry_start = index;
 
-   hash = hash_long((unsigned long)mapping ^ index, DAX_WAIT_TABLE_BITS);
+   hash = hash_long((unsigned long)xas->xa ^ index, DAX_WAIT_TABLE_BITS);
return wait_table + hash;
 }
 
@@ -156,7 +157,7 @@ static int wake_exceptional_entry_func(wait_queue_entry_t 
*wait, unsigned int mo
struct wait_exceptional_entry_queue *ewait =
container_of(wait, struct wait_exceptional_entry_queue, wait);
 
-   if (key->mapping != ewait->key.mapping ||
+   if (key->xa != ewait->key.xa ||
key->entry_start != ewait->key.entry_start)
return 0;
return autoremove_wake_function(wait, mode, sync, NULL);
@@ -167,13 +168,12 @@ static int wake_exceptional_entry_func(wait_queue_entry_t 
*wait, unsigned int mo
  * The important information it's conveying is whether the entry at
  * this index used to be a PMD entry.
  */
-static void dax_wake_mapping_entry_waiter(struct address_space *mapping,
-   pgoff_t index, void *entry, bool wake_all)
+static void dax_wake_entry(struct xa_state *xas, void *entry, bool wake_all)
 {
struct exceptional_entry_key key;
wait_queue_head_t *wq;
 
-   wq = dax_entry_waitqueue(mapping, index, entry, );
+   wq = dax_entry_waitqueue(xas, entry, );
 
/*
 * Checking for locked entry and prepare_to_wait_exclusive() happens
@@ -205,10 +205,9 @@ static inline void *lock_slot(struct xa_state *xas)
  *
  * Must be called with xa_lock held.
  */
-static void *get_unlocked_mapping_entry(struct address_space *mapping,
-   pgoff_t index, void ***slotp)
+static void *get_unlocked_mapping_entry(struct xa_state *xas)
 {
-   void *entry, **slot;
+   void *entry;
struct wait_exceptional_entry_queue ewait;
wait_queue_head_t *wq;
 
@@ -216,22 +215,19 @@ static void *get_unlocked_mapping_entry(struct 
address_space *mapping,
ewait.wait.func = wake_exceptional_entry_func;
 
for (;;) {
-   entry = __radix_tree_lookup(>pages, index, NULL,
- );
-   if (!entry ||
-   WARN_ON_ONCE(!xa_is_value(entry)) || !dax_locked(entry)) {
-   if (slotp)
-   *slotp = slot;
+   entry = xas_load(xas);
+   if (!entry || WARN_ON_ONCE(!xa_is_value(entry)) ||
+   !dax_locked(entry))
return entry;
-   }
 
-   wq = dax_entry_waitqueue(mapping, index, entry, );
+   wq = dax_entry_waitqueue(xas, entry, );
prepare_to_wait_exclusive(wq, ,
  TASK_UNINTERRUPTIBLE);
-   xa_unlock_irq(>pages);
+   xas_pause(xas);
+   xas_unlock_irq(xas);
schedule();
finish_wait(wq, );
-   xa_lock_irq(>pages);
+   xas_lock_irq(xas);
}
 }
 
@@ -251,7 +247,7 @@ static void dax_unlock_mapping_entry(struct address_space 
*mapping,
xas_store(, entry);
/* Safe to not call xas_pause here -- we don't touch the array after */
xas_unlock_irq();
-

[PATCH v6 58/99] dax: Convert lock_slot to XArray

2018-01-17 Thread Matthew Wilcox

From: Matthew Wilcox 

Signed-off-by: Matthew Wilcox 
---
 fs/dax.c | 22 --
 1 file changed, 12 insertions(+), 10 deletions(-)

diff --git a/fs/dax.c b/fs/dax.c
index f3463d93a6ce..8eab0b56f7f9 100644
--- a/fs/dax.c
+++ b/fs/dax.c
@@ -188,12 +188,11 @@ static void dax_wake_mapping_entry_waiter(struct 
address_space *mapping,
 /*
  * Mark the given slot as locked.  Must be called with xa_lock held.
  */
-static inline void *lock_slot(struct address_space *mapping, void **slot)
+static inline void *lock_slot(struct xa_state *xas)
 {
-   unsigned long v = xa_to_value(
-   radix_tree_deref_slot_protected(slot, >pages.xa_lock));
+   unsigned long v = xa_to_value(xas_load(xas));
void *entry = xa_mk_value(v | DAX_ENTRY_LOCK);
-   radix_tree_replace_slot(>pages, slot, entry);
+   xas_store(xas, entry);
return entry;
 }
 
@@ -244,7 +243,7 @@ static void dax_unlock_mapping_entry(struct address_space 
*mapping,
 
xas_lock_irq();
entry = xas_load();
-   if (WARN_ON_ONCE(!entry || !xa_is_value(entry) || !dax_locked(entry))) {
+   if (WARN_ON_ONCE(!xa_is_value(entry) || !dax_locked(entry))) {
xas_unlock_irq();
return;
}
@@ -303,6 +302,7 @@ static void put_unlocked_mapping_entry(struct address_space 
*mapping,
 static void *grab_mapping_entry(struct address_space *mapping, pgoff_t index,
unsigned long size_flag)
 {
+   XA_STATE(xas, >pages, index);
bool pmd_downgrade = false; /* splitting 2MiB entry into 4k entries? */
void *entry, **slot;
 
@@ -341,7 +341,7 @@ static void *grab_mapping_entry(struct address_space 
*mapping, pgoff_t index,
 * Make sure 'entry' remains valid while we drop
 * xa_lock.
 */
-   entry = lock_slot(mapping, slot);
+   entry = lock_slot();
}
 
xa_unlock_irq(>pages);
@@ -408,7 +408,7 @@ static void *grab_mapping_entry(struct address_space 
*mapping, pgoff_t index,
xa_unlock_irq(>pages);
return entry;
}
-   entry = lock_slot(mapping, slot);
+   entry = lock_slot();
  out_unlock:
xa_unlock_irq(>pages);
return entry;
@@ -639,6 +639,7 @@ static int dax_writeback_one(struct block_device *bdev,
pgoff_t index, void *entry)
 {
struct radix_tree_root *pages = >pages;
+   XA_STATE(xas, pages, index);
void *entry2, **slot, *kaddr;
long ret = 0, id;
sector_t sector;
@@ -675,7 +676,7 @@ static int dax_writeback_one(struct block_device *bdev,
if (!radix_tree_tag_get(pages, index, PAGECACHE_TAG_TOWRITE))
goto put_unlocked;
/* Lock the entry to serialize with page faults */
-   entry = lock_slot(mapping, slot);
+   entry = lock_slot();
/*
 * We can clear the tag now but we have to be careful so that concurrent
 * dax_writeback_one() calls for the same index cannot finish before we
@@ -1500,8 +1501,9 @@ static int dax_insert_pfn_mkwrite(struct vm_fault *vmf,
  pfn_t pfn)
 {
struct address_space *mapping = vmf->vma->vm_file->f_mapping;
-   void *entry, **slot;
pgoff_t index = vmf->pgoff;
+   XA_STATE(xas, >pages, index);
+   void *entry, **slot;
int vmf_ret, error;
 
xa_lock_irq(>pages);
@@ -1517,7 +1519,7 @@ static int dax_insert_pfn_mkwrite(struct vm_fault *vmf,
return VM_FAULT_NOPAGE;
}
radix_tree_tag_set(>pages, index, PAGECACHE_TAG_DIRTY);
-   entry = lock_slot(mapping, slot);
+   entry = lock_slot();
xa_unlock_irq(>pages);
switch (pe_size) {
case PE_SIZE_PTE:
-- 
2.15.1

[PATCH v6 58/99] dax: Convert lock_slot to XArray

2018-01-17 Thread Matthew Wilcox

From: Matthew Wilcox 

Signed-off-by: Matthew Wilcox 
---
 fs/dax.c | 22 --
 1 file changed, 12 insertions(+), 10 deletions(-)

diff --git a/fs/dax.c b/fs/dax.c
index f3463d93a6ce..8eab0b56f7f9 100644
--- a/fs/dax.c
+++ b/fs/dax.c
@@ -188,12 +188,11 @@ static void dax_wake_mapping_entry_waiter(struct 
address_space *mapping,
 /*
  * Mark the given slot as locked.  Must be called with xa_lock held.
  */
-static inline void *lock_slot(struct address_space *mapping, void **slot)
+static inline void *lock_slot(struct xa_state *xas)
 {
-   unsigned long v = xa_to_value(
-   radix_tree_deref_slot_protected(slot, >pages.xa_lock));
+   unsigned long v = xa_to_value(xas_load(xas));
void *entry = xa_mk_value(v | DAX_ENTRY_LOCK);
-   radix_tree_replace_slot(>pages, slot, entry);
+   xas_store(xas, entry);
return entry;
 }
 
@@ -244,7 +243,7 @@ static void dax_unlock_mapping_entry(struct address_space 
*mapping,
 
xas_lock_irq();
entry = xas_load();
-   if (WARN_ON_ONCE(!entry || !xa_is_value(entry) || !dax_locked(entry))) {
+   if (WARN_ON_ONCE(!xa_is_value(entry) || !dax_locked(entry))) {
xas_unlock_irq();
return;
}
@@ -303,6 +302,7 @@ static void put_unlocked_mapping_entry(struct address_space 
*mapping,
 static void *grab_mapping_entry(struct address_space *mapping, pgoff_t index,
unsigned long size_flag)
 {
+   XA_STATE(xas, >pages, index);
bool pmd_downgrade = false; /* splitting 2MiB entry into 4k entries? */
void *entry, **slot;
 
@@ -341,7 +341,7 @@ static void *grab_mapping_entry(struct address_space 
*mapping, pgoff_t index,
 * Make sure 'entry' remains valid while we drop
 * xa_lock.
 */
-   entry = lock_slot(mapping, slot);
+   entry = lock_slot();
}
 
xa_unlock_irq(>pages);
@@ -408,7 +408,7 @@ static void *grab_mapping_entry(struct address_space 
*mapping, pgoff_t index,
xa_unlock_irq(>pages);
return entry;
}
-   entry = lock_slot(mapping, slot);
+   entry = lock_slot();
  out_unlock:
xa_unlock_irq(>pages);
return entry;
@@ -639,6 +639,7 @@ static int dax_writeback_one(struct block_device *bdev,
pgoff_t index, void *entry)
 {
struct radix_tree_root *pages = >pages;
+   XA_STATE(xas, pages, index);
void *entry2, **slot, *kaddr;
long ret = 0, id;
sector_t sector;
@@ -675,7 +676,7 @@ static int dax_writeback_one(struct block_device *bdev,
if (!radix_tree_tag_get(pages, index, PAGECACHE_TAG_TOWRITE))
goto put_unlocked;
/* Lock the entry to serialize with page faults */
-   entry = lock_slot(mapping, slot);
+   entry = lock_slot();
/*
 * We can clear the tag now but we have to be careful so that concurrent
 * dax_writeback_one() calls for the same index cannot finish before we
@@ -1500,8 +1501,9 @@ static int dax_insert_pfn_mkwrite(struct vm_fault *vmf,
  pfn_t pfn)
 {
struct address_space *mapping = vmf->vma->vm_file->f_mapping;
-   void *entry, **slot;
pgoff_t index = vmf->pgoff;
+   XA_STATE(xas, >pages, index);
+   void *entry, **slot;
int vmf_ret, error;
 
xa_lock_irq(>pages);
@@ -1517,7 +1519,7 @@ static int dax_insert_pfn_mkwrite(struct vm_fault *vmf,
return VM_FAULT_NOPAGE;
}
radix_tree_tag_set(>pages, index, PAGECACHE_TAG_DIRTY);
-   entry = lock_slot(mapping, slot);
+   entry = lock_slot();
xa_unlock_irq(>pages);
switch (pe_size) {
case PE_SIZE_PTE:
-- 
2.15.1

Re: [PATCH] net: fs_enet: do not call phy_stop() in interrupts

2018-01-17 Thread David Miller

From: Christophe Leroy 
Date: Tue, 16 Jan 2018 10:33:05 +0100 (CET)

> In case of TX timeout, fs_timeout() calls phy_stop(), which
> triggers the following BUG_ON() as we are in interrupt.
 ...
> This patch moves fs_timeout() actions into an async worker.
> 
> Fixes: commit 48257c4f168e5 ("Add fs_enet ethernet network driver, for 
> several embedded platforms")
> Signed-off-by: Christophe Leroy 

Applied, thank you.

Re: [PATCH] net: fs_enet: do not call phy_stop() in interrupts

2018-01-17 Thread David Miller

From: Christophe Leroy 
Date: Tue, 16 Jan 2018 10:33:05 +0100 (CET)

> In case of TX timeout, fs_timeout() calls phy_stop(), which
> triggers the following BUG_ON() as we are in interrupt.
 ...
> This patch moves fs_timeout() actions into an async worker.
> 
> Fixes: commit 48257c4f168e5 ("Add fs_enet ethernet network driver, for 
> several embedded platforms")
> Signed-off-by: Christophe Leroy 

Applied, thank you.

[PATCH v6 57/99] dax: Convert dax_unlock_mapping_entry to XArray

2018-01-17 Thread Matthew Wilcox

From: Matthew Wilcox 

Replace slot_locked() with dax_locked() and inline unlock_slot() into
its only caller.

Signed-off-by: Matthew Wilcox 
---
 fs/dax.c | 48 
 1 file changed, 16 insertions(+), 32 deletions(-)

diff --git a/fs/dax.c b/fs/dax.c
index 5097a606da1a..f3463d93a6ce 100644
--- a/fs/dax.c
+++ b/fs/dax.c
@@ -73,6 +73,11 @@ fs_initcall(init_dax_wait_table);
 #define DAX_ZERO_PAGE  (1UL << 2)
 #define DAX_EMPTY  (1UL << 3)
 
+static bool dax_locked(void *entry)
+{
+   return xa_to_value(entry) & DAX_ENTRY_LOCK;
+}
+
 static unsigned long dax_radix_sector(void *entry)
 {
return xa_to_value(entry) >> DAX_SHIFT;
@@ -180,16 +185,6 @@ static void dax_wake_mapping_entry_waiter(struct 
address_space *mapping,
__wake_up(wq, TASK_NORMAL, wake_all ? 0 : 1, );
 }
 
-/*
- * Check whether the given slot is locked.  Must be called with xa_lock held.
- */
-static inline int slot_locked(struct address_space *mapping, void **slot)
-{
-   unsigned long entry = xa_to_value(
-   radix_tree_deref_slot_protected(slot, >pages.xa_lock));
-   return entry & DAX_ENTRY_LOCK;
-}
-
 /*
  * Mark the given slot as locked.  Must be called with xa_lock held.
  */
@@ -202,18 +197,6 @@ static inline void *lock_slot(struct address_space 
*mapping, void **slot)
return entry;
 }
 
-/*
- * Mark the given slot as unlocked.  Must be called with xa_lock held.
- */
-static inline void *unlock_slot(struct address_space *mapping, void **slot)
-{
-   unsigned long v = xa_to_value(
-   radix_tree_deref_slot_protected(slot, >pages.xa_lock));
-   void *entry = xa_mk_value(v & ~DAX_ENTRY_LOCK);
-   radix_tree_replace_slot(>pages, slot, entry);
-   return entry;
-}
-
 /*
  * Lookup entry in radix tree, wait for it to become unlocked if it is
  * a DAX entry and return it. The caller must call
@@ -237,8 +220,7 @@ static void *get_unlocked_mapping_entry(struct 
address_space *mapping,
entry = __radix_tree_lookup(>pages, index, NULL,
  );
if (!entry ||
-   WARN_ON_ONCE(!xa_is_value(entry)) ||
-   !slot_locked(mapping, slot)) {
+   WARN_ON_ONCE(!xa_is_value(entry)) || !dax_locked(entry)) {
if (slotp)
*slotp = slot;
return entry;
@@ -257,17 +239,19 @@ static void *get_unlocked_mapping_entry(struct 
address_space *mapping,
 static void dax_unlock_mapping_entry(struct address_space *mapping,
 pgoff_t index)
 {
-   void *entry, **slot;
+   XA_STATE(xas, >pages, index);
+   void *entry;
 
-   xa_lock_irq(>pages);
-   entry = __radix_tree_lookup(>pages, index, NULL, );
-   if (WARN_ON_ONCE(!entry || !xa_is_value(entry) ||
-!slot_locked(mapping, slot))) {
-   xa_unlock_irq(>pages);
+   xas_lock_irq();
+   entry = xas_load();
+   if (WARN_ON_ONCE(!entry || !xa_is_value(entry) || !dax_locked(entry))) {
+   xas_unlock_irq();
return;
}
-   unlock_slot(mapping, slot);
-   xa_unlock_irq(>pages);
+   entry = xa_mk_value(xa_to_value(entry) & ~DAX_ENTRY_LOCK);
+   xas_store(, entry);
+   /* Safe to not call xas_pause here -- we don't touch the array after */
+   xas_unlock_irq();
dax_wake_mapping_entry_waiter(mapping, index, entry, false);
 }
 
-- 
2.15.1

[PATCH v6 57/99] dax: Convert dax_unlock_mapping_entry to XArray

2018-01-17 Thread Matthew Wilcox

From: Matthew Wilcox 

Replace slot_locked() with dax_locked() and inline unlock_slot() into
its only caller.

Signed-off-by: Matthew Wilcox 
---
 fs/dax.c | 48 
 1 file changed, 16 insertions(+), 32 deletions(-)

diff --git a/fs/dax.c b/fs/dax.c
index 5097a606da1a..f3463d93a6ce 100644
--- a/fs/dax.c
+++ b/fs/dax.c
@@ -73,6 +73,11 @@ fs_initcall(init_dax_wait_table);
 #define DAX_ZERO_PAGE  (1UL << 2)
 #define DAX_EMPTY  (1UL << 3)
 
+static bool dax_locked(void *entry)
+{
+   return xa_to_value(entry) & DAX_ENTRY_LOCK;
+}
+
 static unsigned long dax_radix_sector(void *entry)
 {
return xa_to_value(entry) >> DAX_SHIFT;
@@ -180,16 +185,6 @@ static void dax_wake_mapping_entry_waiter(struct 
address_space *mapping,
__wake_up(wq, TASK_NORMAL, wake_all ? 0 : 1, );
 }
 
-/*
- * Check whether the given slot is locked.  Must be called with xa_lock held.
- */
-static inline int slot_locked(struct address_space *mapping, void **slot)
-{
-   unsigned long entry = xa_to_value(
-   radix_tree_deref_slot_protected(slot, >pages.xa_lock));
-   return entry & DAX_ENTRY_LOCK;
-}
-
 /*
  * Mark the given slot as locked.  Must be called with xa_lock held.
  */
@@ -202,18 +197,6 @@ static inline void *lock_slot(struct address_space 
*mapping, void **slot)
return entry;
 }
 
-/*
- * Mark the given slot as unlocked.  Must be called with xa_lock held.
- */
-static inline void *unlock_slot(struct address_space *mapping, void **slot)
-{
-   unsigned long v = xa_to_value(
-   radix_tree_deref_slot_protected(slot, >pages.xa_lock));
-   void *entry = xa_mk_value(v & ~DAX_ENTRY_LOCK);
-   radix_tree_replace_slot(>pages, slot, entry);
-   return entry;
-}
-
 /*
  * Lookup entry in radix tree, wait for it to become unlocked if it is
  * a DAX entry and return it. The caller must call
@@ -237,8 +220,7 @@ static void *get_unlocked_mapping_entry(struct 
address_space *mapping,
entry = __radix_tree_lookup(>pages, index, NULL,
  );
if (!entry ||
-   WARN_ON_ONCE(!xa_is_value(entry)) ||
-   !slot_locked(mapping, slot)) {
+   WARN_ON_ONCE(!xa_is_value(entry)) || !dax_locked(entry)) {
if (slotp)
*slotp = slot;
return entry;
@@ -257,17 +239,19 @@ static void *get_unlocked_mapping_entry(struct 
address_space *mapping,
 static void dax_unlock_mapping_entry(struct address_space *mapping,
 pgoff_t index)
 {
-   void *entry, **slot;
+   XA_STATE(xas, >pages, index);
+   void *entry;
 
-   xa_lock_irq(>pages);
-   entry = __radix_tree_lookup(>pages, index, NULL, );
-   if (WARN_ON_ONCE(!entry || !xa_is_value(entry) ||
-!slot_locked(mapping, slot))) {
-   xa_unlock_irq(>pages);
+   xas_lock_irq();
+   entry = xas_load();
+   if (WARN_ON_ONCE(!entry || !xa_is_value(entry) || !dax_locked(entry))) {
+   xas_unlock_irq();
return;
}
-   unlock_slot(mapping, slot);
-   xa_unlock_irq(>pages);
+   entry = xa_mk_value(xa_to_value(entry) & ~DAX_ENTRY_LOCK);
+   xas_store(, entry);
+   /* Safe to not call xas_pause here -- we don't touch the array after */
+   xas_unlock_irq();
dax_wake_mapping_entry_waiter(mapping, index, entry, false);
 }
 
-- 
2.15.1

[PATCH v6 61/99] dax: Convert dax_writeback_one to XArray

2018-01-17 Thread Matthew Wilcox

From: Matthew Wilcox 

Likewise easy

Signed-off-by: Matthew Wilcox 
---
 fs/dax.c | 17 +++--
 1 file changed, 7 insertions(+), 10 deletions(-)

diff --git a/fs/dax.c b/fs/dax.c
index 9a30224da4d6..b66b8c896ed8 100644
--- a/fs/dax.c
+++ b/fs/dax.c
@@ -632,8 +632,7 @@ static int dax_writeback_one(struct block_device *bdev,
struct dax_device *dax_dev, struct address_space *mapping,
pgoff_t index, void *entry)
 {
-   struct radix_tree_root *pages = >pages;
-   XA_STATE(xas, pages, index);
+   XA_STATE(xas, >pages, index);
void *entry2, *kaddr;
long ret = 0, id;
sector_t sector;
@@ -648,7 +647,7 @@ static int dax_writeback_one(struct block_device *bdev,
if (WARN_ON(!xa_is_value(entry)))
return -EIO;
 
-   xa_lock_irq(>pages);
+   xas_lock_irq();
entry2 = get_unlocked_mapping_entry();
/* Entry got punched out / reallocated? */
if (!entry2 || WARN_ON_ONCE(!xa_is_value(entry2)))
@@ -667,7 +666,7 @@ static int dax_writeback_one(struct block_device *bdev,
}
 
/* Another fsync thread may have already written back this entry */
-   if (!radix_tree_tag_get(pages, index, PAGECACHE_TAG_TOWRITE))
+   if (!xas_get_tag(, PAGECACHE_TAG_TOWRITE))
goto put_unlocked;
/* Lock the entry to serialize with page faults */
entry = lock_slot();
@@ -678,8 +677,8 @@ static int dax_writeback_one(struct block_device *bdev,
 * at the entry only under xa_lock and once they do that they will
 * see the entry locked and wait for it to unlock.
 */
-   radix_tree_tag_clear(pages, index, PAGECACHE_TAG_TOWRITE);
-   xa_unlock_irq(>pages);
+   xas_clear_tag(, PAGECACHE_TAG_TOWRITE);
+   xas_unlock_irq();
 
/*
 * Even if dax_writeback_mapping_range() was given a wbc->range_start
@@ -717,9 +716,7 @@ static int dax_writeback_one(struct block_device *bdev,
 * the pfn mappings are writeprotected and fault waits for mapping
 * entry lock.
 */
-   xa_lock_irq(>pages);
-   radix_tree_tag_clear(pages, index, PAGECACHE_TAG_DIRTY);
-   xa_unlock_irq(>pages);
+   xa_clear_tag(>pages, index, PAGECACHE_TAG_DIRTY);
trace_dax_writeback_one(mapping->host, index, size >> PAGE_SHIFT);
  dax_unlock:
dax_read_unlock(id);
@@ -728,7 +725,7 @@ static int dax_writeback_one(struct block_device *bdev,
 
  put_unlocked:
put_unlocked_mapping_entry(, entry2);
-   xa_unlock_irq(>pages);
+   xas_unlock_irq();
return ret;
 }
 
-- 
2.15.1

[PATCH v6 61/99] dax: Convert dax_writeback_one to XArray

2018-01-17 Thread Matthew Wilcox

From: Matthew Wilcox 

Likewise easy

Signed-off-by: Matthew Wilcox 
---
 fs/dax.c | 17 +++--
 1 file changed, 7 insertions(+), 10 deletions(-)

diff --git a/fs/dax.c b/fs/dax.c
index 9a30224da4d6..b66b8c896ed8 100644
--- a/fs/dax.c
+++ b/fs/dax.c
@@ -632,8 +632,7 @@ static int dax_writeback_one(struct block_device *bdev,
struct dax_device *dax_dev, struct address_space *mapping,
pgoff_t index, void *entry)
 {
-   struct radix_tree_root *pages = >pages;
-   XA_STATE(xas, pages, index);
+   XA_STATE(xas, >pages, index);
void *entry2, *kaddr;
long ret = 0, id;
sector_t sector;
@@ -648,7 +647,7 @@ static int dax_writeback_one(struct block_device *bdev,
if (WARN_ON(!xa_is_value(entry)))
return -EIO;
 
-   xa_lock_irq(>pages);
+   xas_lock_irq();
entry2 = get_unlocked_mapping_entry();
/* Entry got punched out / reallocated? */
if (!entry2 || WARN_ON_ONCE(!xa_is_value(entry2)))
@@ -667,7 +666,7 @@ static int dax_writeback_one(struct block_device *bdev,
}
 
/* Another fsync thread may have already written back this entry */
-   if (!radix_tree_tag_get(pages, index, PAGECACHE_TAG_TOWRITE))
+   if (!xas_get_tag(, PAGECACHE_TAG_TOWRITE))
goto put_unlocked;
/* Lock the entry to serialize with page faults */
entry = lock_slot();
@@ -678,8 +677,8 @@ static int dax_writeback_one(struct block_device *bdev,
 * at the entry only under xa_lock and once they do that they will
 * see the entry locked and wait for it to unlock.
 */
-   radix_tree_tag_clear(pages, index, PAGECACHE_TAG_TOWRITE);
-   xa_unlock_irq(>pages);
+   xas_clear_tag(, PAGECACHE_TAG_TOWRITE);
+   xas_unlock_irq();
 
/*
 * Even if dax_writeback_mapping_range() was given a wbc->range_start
@@ -717,9 +716,7 @@ static int dax_writeback_one(struct block_device *bdev,
 * the pfn mappings are writeprotected and fault waits for mapping
 * entry lock.
 */
-   xa_lock_irq(>pages);
-   radix_tree_tag_clear(pages, index, PAGECACHE_TAG_DIRTY);
-   xa_unlock_irq(>pages);
+   xa_clear_tag(>pages, index, PAGECACHE_TAG_DIRTY);
trace_dax_writeback_one(mapping->host, index, size >> PAGE_SHIFT);
  dax_unlock:
dax_read_unlock(id);
@@ -728,7 +725,7 @@ static int dax_writeback_one(struct block_device *bdev,
 
  put_unlocked:
put_unlocked_mapping_entry(, entry2);
-   xa_unlock_irq(>pages);
+   xas_unlock_irq();
return ret;
 }
 
-- 
2.15.1

Re: [PATCH v2 1/3] Revert "do_SAK: Don't recursively take the tasklist_lock"

2018-01-17 Thread Oleg Nesterov

On 01/17, Eric W. Biederman wrote:
>
> Oleg Nesterov  writes:
>
> >> To operate correctly, do_SAK() needs to kill everything that has the tty
> >> open.  Unless we can make that guarantee I don't see the point of
> >> changing do_SAK.
> >
> > OK, but how this connects to this change?
> >
> > Again, this force_sig() doesn't match other send_sig()'s in __do_SAK(),
> > and Kirill is going to turn them all into send_sig_info(SEND_SIG_FORCED).
> > Just we need to discuss whether we need to skip the global init or not
> > but this is another story.
> >
> > So why do you dislike this change?
> >
> > force_sig() should die anyway. At least in its current form, it should not
> > be used unless task == current. But this is off-topic.
>
> I see that as a fair criticism of force_sig,
> and a good argument to use send_sig(SIGKILL, SEND_SIG_FORCED).
>
> Which will kill the global init.

and iiuc you think this is right. I won't argue, but again, this needs some
discussion imo.

And in fact Kirill was going to do this before anything else, it was me who
(rightly or not) suggested to do this after cleanups because this is the user
visible change.

> What I don't like is a bunch of patches to introduce races and make
> something more racy

Why? I do not see how this series can add the new problems or races, technically
it looks correct to me.

> So we either need to say do_SAK is broken.  In which case the proper fix
> is to just delete the thing.

I personally never used it so I am fine with your suggestion ;)

> Or we need not to ensure the final
> implemenation is an atomic kill of everything that has the tty open.

Then I think we need some changes in drivers/tty/, with or without Kirill's
changes.

May be some flag set/cleared by __do_SAK() which should make tty_open() fail.
Not sure about tty's passed via unix sockets... and actually I have no idea
how much much paranoia __do_SAK() needs.

Oleg.

Re: [PATCH v2 1/3] Revert "do_SAK: Don't recursively take the tasklist_lock"

2018-01-17 Thread Oleg Nesterov

On 01/17, Eric W. Biederman wrote:
>
> Oleg Nesterov  writes:
>
> >> To operate correctly, do_SAK() needs to kill everything that has the tty
> >> open.  Unless we can make that guarantee I don't see the point of
> >> changing do_SAK.
> >
> > OK, but how this connects to this change?
> >
> > Again, this force_sig() doesn't match other send_sig()'s in __do_SAK(),
> > and Kirill is going to turn them all into send_sig_info(SEND_SIG_FORCED).
> > Just we need to discuss whether we need to skip the global init or not
> > but this is another story.
> >
> > So why do you dislike this change?
> >
> > force_sig() should die anyway. At least in its current form, it should not
> > be used unless task == current. But this is off-topic.
>
> I see that as a fair criticism of force_sig,
> and a good argument to use send_sig(SIGKILL, SEND_SIG_FORCED).
>
> Which will kill the global init.

and iiuc you think this is right. I won't argue, but again, this needs some
discussion imo.

And in fact Kirill was going to do this before anything else, it was me who
(rightly or not) suggested to do this after cleanups because this is the user
visible change.

> What I don't like is a bunch of patches to introduce races and make
> something more racy

Why? I do not see how this series can add the new problems or races, technically
it looks correct to me.

> So we either need to say do_SAK is broken.  In which case the proper fix
> is to just delete the thing.

I personally never used it so I am fine with your suggestion ;)

> Or we need not to ensure the final
> implemenation is an atomic kill of everything that has the tty open.

Then I think we need some changes in drivers/tty/, with or without Kirill's
changes.

May be some flag set/cleared by __do_SAK() which should make tty_open() fail.
Not sure about tty's passed via unix sockets... and actually I have no idea
how much much paranoia __do_SAK() needs.

Oleg.

[PATCH v6 60/99] dax: Convert __dax_invalidate_mapping_entry to XArray

2018-01-17 Thread Matthew Wilcox

From: Matthew Wilcox 

Simple now that we already have an xa_state!

Signed-off-by: Matthew Wilcox 
---
 fs/dax.c | 10 +-
 1 file changed, 5 insertions(+), 5 deletions(-)

diff --git a/fs/dax.c b/fs/dax.c
index d3fe61b95216..9a30224da4d6 100644
--- a/fs/dax.c
+++ b/fs/dax.c
@@ -413,24 +413,24 @@ static int __dax_invalidate_mapping_entry(struct 
address_space *mapping,
XA_STATE(xas, >pages, index);
int ret = 0;
void *entry;
-   struct radix_tree_root *pages = >pages;
 
xa_lock_irq(>pages);
entry = get_unlocked_mapping_entry();
if (!entry || WARN_ON_ONCE(!xa_is_value(entry)))
goto out;
if (!trunc &&
-   (radix_tree_tag_get(pages, index, PAGECACHE_TAG_DIRTY) ||
-radix_tree_tag_get(pages, index, PAGECACHE_TAG_TOWRITE)))
+   (xas_get_tag(, PAGECACHE_TAG_DIRTY) ||
+xas_get_tag(, PAGECACHE_TAG_TOWRITE)))
goto out;
-   radix_tree_delete(pages, index);
+   xas_store(, NULL);
mapping->nrexceptional--;
ret = 1;
 out:
put_unlocked_mapping_entry(, entry);
-   xa_unlock_irq(>pages);
+   xas_unlock_irq();
return ret;
 }
+
 /*
  * Delete DAX entry at @index from @mapping.  Wait for it
  * to be unlocked before deleting it.
-- 
2.15.1

[PATCH v6 60/99] dax: Convert __dax_invalidate_mapping_entry to XArray

2018-01-17 Thread Matthew Wilcox

From: Matthew Wilcox 

Simple now that we already have an xa_state!

Signed-off-by: Matthew Wilcox 
---
 fs/dax.c | 10 +-
 1 file changed, 5 insertions(+), 5 deletions(-)

diff --git a/fs/dax.c b/fs/dax.c
index d3fe61b95216..9a30224da4d6 100644
--- a/fs/dax.c
+++ b/fs/dax.c
@@ -413,24 +413,24 @@ static int __dax_invalidate_mapping_entry(struct 
address_space *mapping,
XA_STATE(xas, >pages, index);
int ret = 0;
void *entry;
-   struct radix_tree_root *pages = >pages;
 
xa_lock_irq(>pages);
entry = get_unlocked_mapping_entry();
if (!entry || WARN_ON_ONCE(!xa_is_value(entry)))
goto out;
if (!trunc &&
-   (radix_tree_tag_get(pages, index, PAGECACHE_TAG_DIRTY) ||
-radix_tree_tag_get(pages, index, PAGECACHE_TAG_TOWRITE)))
+   (xas_get_tag(, PAGECACHE_TAG_DIRTY) ||
+xas_get_tag(, PAGECACHE_TAG_TOWRITE)))
goto out;
-   radix_tree_delete(pages, index);
+   xas_store(, NULL);
mapping->nrexceptional--;
ret = 1;
 out:
put_unlocked_mapping_entry(, entry);
-   xa_unlock_irq(>pages);
+   xas_unlock_irq();
return ret;
 }
+
 /*
  * Delete DAX entry at @index from @mapping.  Wait for it
  * to be unlocked before deleting it.
-- 
2.15.1

[PATCH v6 25/99] page cache: Convert page deletion to XArray

2018-01-17 Thread Matthew Wilcox

From: Matthew Wilcox 

The code is slightly shorter and simpler.

Signed-off-by: Matthew Wilcox 
---
 mm/filemap.c | 30 ++
 1 file changed, 14 insertions(+), 16 deletions(-)

diff --git a/mm/filemap.c b/mm/filemap.c
index e6371b551de1..ed30d5310e50 100644
--- a/mm/filemap.c
+++ b/mm/filemap.c
@@ -112,30 +112,28 @@
  *   ->tasklist_lock(memory_failure, collect_procs_ao)
  */
 
-static void page_cache_tree_delete(struct address_space *mapping,
+static void page_cache_delete(struct address_space *mapping,
   struct page *page, void *shadow)
 {
-   int i, nr;
+   XA_STATE(xas, >pages, page->index);
+   unsigned int i, nr;
 
-   /* hugetlb pages are represented by one entry in the radix tree */
+   mapping_set_update(, mapping);
+
+   /* hugetlb pages are represented by a single entry in the xarray */
nr = PageHuge(page) ? 1 : hpage_nr_pages(page);
 
VM_BUG_ON_PAGE(!PageLocked(page), page);
VM_BUG_ON_PAGE(PageTail(page), page);
VM_BUG_ON_PAGE(nr != 1 && shadow, page);
 
-   for (i = 0; i < nr; i++) {
-   struct radix_tree_node *node;
-   void **slot;
-
-   __radix_tree_lookup(>pages, page->index + i,
-   , );
-
-   VM_BUG_ON_PAGE(!node && nr != 1, page);
-
-   radix_tree_clear_tags(>pages, node, slot);
-   __radix_tree_replace(>pages, node, slot, shadow,
-   workingset_lookup_update(mapping));
+   i = nr;
+repeat:
+   xas_store(, shadow);
+   xas_init_tags();
+   if (--i) {
+   xas_next();
+   goto repeat;
}
 
page->mapping = NULL;
@@ -235,7 +233,7 @@ void __delete_from_page_cache(struct page *page, void 
*shadow)
trace_mm_filemap_delete_from_page_cache(page);
 
unaccount_page_cache_page(mapping, page);
-   page_cache_tree_delete(mapping, page, shadow);
+   page_cache_delete(mapping, page, shadow);
 }
 
 static void page_cache_free_page(struct address_space *mapping,
-- 
2.15.1

[PATCH v6 25/99] page cache: Convert page deletion to XArray

2018-01-17 Thread Matthew Wilcox

From: Matthew Wilcox 

The code is slightly shorter and simpler.

Signed-off-by: Matthew Wilcox 
---
 mm/filemap.c | 30 ++
 1 file changed, 14 insertions(+), 16 deletions(-)

diff --git a/mm/filemap.c b/mm/filemap.c
index e6371b551de1..ed30d5310e50 100644
--- a/mm/filemap.c
+++ b/mm/filemap.c
@@ -112,30 +112,28 @@
  *   ->tasklist_lock(memory_failure, collect_procs_ao)
  */
 
-static void page_cache_tree_delete(struct address_space *mapping,
+static void page_cache_delete(struct address_space *mapping,
   struct page *page, void *shadow)
 {
-   int i, nr;
+   XA_STATE(xas, >pages, page->index);
+   unsigned int i, nr;
 
-   /* hugetlb pages are represented by one entry in the radix tree */
+   mapping_set_update(, mapping);
+
+   /* hugetlb pages are represented by a single entry in the xarray */
nr = PageHuge(page) ? 1 : hpage_nr_pages(page);
 
VM_BUG_ON_PAGE(!PageLocked(page), page);
VM_BUG_ON_PAGE(PageTail(page), page);
VM_BUG_ON_PAGE(nr != 1 && shadow, page);
 
-   for (i = 0; i < nr; i++) {
-   struct radix_tree_node *node;
-   void **slot;
-
-   __radix_tree_lookup(>pages, page->index + i,
-   , );
-
-   VM_BUG_ON_PAGE(!node && nr != 1, page);
-
-   radix_tree_clear_tags(>pages, node, slot);
-   __radix_tree_replace(>pages, node, slot, shadow,
-   workingset_lookup_update(mapping));
+   i = nr;
+repeat:
+   xas_store(, shadow);
+   xas_init_tags();
+   if (--i) {
+   xas_next();
+   goto repeat;
}
 
page->mapping = NULL;
@@ -235,7 +233,7 @@ void __delete_from_page_cache(struct page *page, void 
*shadow)
trace_mm_filemap_delete_from_page_cache(page);
 
unaccount_page_cache_page(mapping, page);
-   page_cache_tree_delete(mapping, page, shadow);
+   page_cache_delete(mapping, page, shadow);
 }
 
 static void page_cache_free_page(struct address_space *mapping,
-- 
2.15.1

< 2 3 4 5 6 7 8 9 10 11 >

601 - 700 of 2636 matches

Mail list logo