date:20180117

[PATCH v6 63/99] dax: Convert dax_insert_mapping_entry to XArray

2018-01-17 Thread Matthew Wilcox

From: Matthew Wilcox 

Signed-off-by: Matthew Wilcox 
---
 fs/dax.c | 18 ++
 1 file changed, 6 insertions(+), 12 deletions(-)

diff --git a/fs/dax.c b/fs/dax.c
index e6b25ef112f2..494e8fb7a98f 100644
--- a/fs/dax.c
+++ b/fs/dax.c
@@ -498,9 +498,9 @@ static void *dax_insert_mapping_entry(struct address_space 
*mapping,
  void *entry, sector_t sector,
  unsigned long flags, bool dirty)
 {
-   struct radix_tree_root *pages = >pages;
void *new_entry;
pgoff_t index = vmf->pgoff;
+   XA_STATE(xas, >pages, index);
 
if (dirty)
__mark_inode_dirty(mapping->host, I_DIRTY_PAGES);
@@ -516,7 +516,7 @@ static void *dax_insert_mapping_entry(struct address_space 
*mapping,
PAGE_SIZE, 0);
}
 
-   xa_lock_irq(>pages);
+   xas_lock_irq();
new_entry = dax_radix_locked_entry(sector, flags);
 
if (dax_is_zero_entry(entry) || dax_is_empty_entry(entry)) {
@@ -528,21 +528,15 @@ static void *dax_insert_mapping_entry(struct 
address_space *mapping,
 * existing entry is a PMD, we will just leave the PMD in the
 * tree and dirty it if necessary.
 */
-   struct radix_tree_node *node;
-   void **slot;
-   void *ret;
-
-   ret = __radix_tree_lookup(pages, index, , );
-   WARN_ON_ONCE(ret != entry);
-   __radix_tree_replace(pages, node, slot,
-new_entry, NULL);
+   void *prev = xas_store(, new_entry);
+   WARN_ON_ONCE(prev != entry);
entry = new_entry;
}
 
if (dirty)
-   radix_tree_tag_set(pages, index, PAGECACHE_TAG_DIRTY);
+   xas_set_tag(, PAGECACHE_TAG_DIRTY);
 
-   xa_unlock_irq(>pages);
+   xas_unlock_irq();
return entry;
 }
 
-- 
2.15.1

[PATCH v6 63/99] dax: Convert dax_insert_mapping_entry to XArray

2018-01-17 Thread Matthew Wilcox

From: Matthew Wilcox 

Signed-off-by: Matthew Wilcox 
---
 fs/dax.c | 18 ++
 1 file changed, 6 insertions(+), 12 deletions(-)

diff --git a/fs/dax.c b/fs/dax.c
index e6b25ef112f2..494e8fb7a98f 100644
--- a/fs/dax.c
+++ b/fs/dax.c
@@ -498,9 +498,9 @@ static void *dax_insert_mapping_entry(struct address_space 
*mapping,
  void *entry, sector_t sector,
  unsigned long flags, bool dirty)
 {
-   struct radix_tree_root *pages = >pages;
void *new_entry;
pgoff_t index = vmf->pgoff;
+   XA_STATE(xas, >pages, index);
 
if (dirty)
__mark_inode_dirty(mapping->host, I_DIRTY_PAGES);
@@ -516,7 +516,7 @@ static void *dax_insert_mapping_entry(struct address_space 
*mapping,
PAGE_SIZE, 0);
}
 
-   xa_lock_irq(>pages);
+   xas_lock_irq();
new_entry = dax_radix_locked_entry(sector, flags);
 
if (dax_is_zero_entry(entry) || dax_is_empty_entry(entry)) {
@@ -528,21 +528,15 @@ static void *dax_insert_mapping_entry(struct 
address_space *mapping,
 * existing entry is a PMD, we will just leave the PMD in the
 * tree and dirty it if necessary.
 */
-   struct radix_tree_node *node;
-   void **slot;
-   void *ret;
-
-   ret = __radix_tree_lookup(pages, index, , );
-   WARN_ON_ONCE(ret != entry);
-   __radix_tree_replace(pages, node, slot,
-new_entry, NULL);
+   void *prev = xas_store(, new_entry);
+   WARN_ON_ONCE(prev != entry);
entry = new_entry;
}
 
if (dirty)
-   radix_tree_tag_set(pages, index, PAGECACHE_TAG_DIRTY);
+   xas_set_tag(, PAGECACHE_TAG_DIRTY);
 
-   xa_unlock_irq(>pages);
+   xas_unlock_irq();
return entry;
 }
 
-- 
2.15.1

[PATCH v6 62/99] dax: Convert dax_insert_pfn_mkwrite to XArray

2018-01-17 Thread Matthew Wilcox

From: Matthew Wilcox 

Signed-off-by: Matthew Wilcox 
---
 fs/dax.c | 8 
 1 file changed, 4 insertions(+), 4 deletions(-)

diff --git a/fs/dax.c b/fs/dax.c
index b66b8c896ed8..e6b25ef112f2 100644
--- a/fs/dax.c
+++ b/fs/dax.c
@@ -1497,21 +1497,21 @@ static int dax_insert_pfn_mkwrite(struct vm_fault *vmf,
void *entry;
int vmf_ret, error;
 
-   xa_lock_irq(>pages);
+   xas_lock_irq();
entry = get_unlocked_mapping_entry();
/* Did we race with someone splitting entry or so? */
if (!entry ||
(pe_size == PE_SIZE_PTE && !dax_is_pte_entry(entry)) ||
(pe_size == PE_SIZE_PMD && !dax_is_pmd_entry(entry))) {
put_unlocked_mapping_entry(, entry);
-   xa_unlock_irq(>pages);
+   xas_unlock_irq();
trace_dax_insert_pfn_mkwrite_no_entry(mapping->host, vmf,
  VM_FAULT_NOPAGE);
return VM_FAULT_NOPAGE;
}
-   radix_tree_tag_set(>pages, index, PAGECACHE_TAG_DIRTY);
+   xas_set_tag(, PAGECACHE_TAG_DIRTY);
entry = lock_slot();
-   xa_unlock_irq(>pages);
+   xas_unlock_irq();
switch (pe_size) {
case PE_SIZE_PTE:
error = vm_insert_mixed_mkwrite(vmf->vma, vmf->address, pfn);
-- 
2.15.1

[PATCH v6 62/99] dax: Convert dax_insert_pfn_mkwrite to XArray

2018-01-17 Thread Matthew Wilcox

From: Matthew Wilcox 

Signed-off-by: Matthew Wilcox 
---
 fs/dax.c | 8 
 1 file changed, 4 insertions(+), 4 deletions(-)

diff --git a/fs/dax.c b/fs/dax.c
index b66b8c896ed8..e6b25ef112f2 100644
--- a/fs/dax.c
+++ b/fs/dax.c
@@ -1497,21 +1497,21 @@ static int dax_insert_pfn_mkwrite(struct vm_fault *vmf,
void *entry;
int vmf_ret, error;
 
-   xa_lock_irq(>pages);
+   xas_lock_irq();
entry = get_unlocked_mapping_entry();
/* Did we race with someone splitting entry or so? */
if (!entry ||
(pe_size == PE_SIZE_PTE && !dax_is_pte_entry(entry)) ||
(pe_size == PE_SIZE_PMD && !dax_is_pmd_entry(entry))) {
put_unlocked_mapping_entry(, entry);
-   xa_unlock_irq(>pages);
+   xas_unlock_irq();
trace_dax_insert_pfn_mkwrite_no_entry(mapping->host, vmf,
  VM_FAULT_NOPAGE);
return VM_FAULT_NOPAGE;
}
-   radix_tree_tag_set(>pages, index, PAGECACHE_TAG_DIRTY);
+   xas_set_tag(, PAGECACHE_TAG_DIRTY);
entry = lock_slot();
-   xa_unlock_irq(>pages);
+   xas_unlock_irq();
switch (pe_size) {
case PE_SIZE_PTE:
error = vm_insert_mixed_mkwrite(vmf->vma, vmf->address, pfn);
-- 
2.15.1

[PATCH v6 64/99] dax: Convert grab_mapping_entry to XArray

2018-01-17 Thread Matthew Wilcox

From: Matthew Wilcox 

Signed-off-by: Matthew Wilcox 
---
 fs/dax.c | 98 +---
 1 file changed, 26 insertions(+), 72 deletions(-)

diff --git a/fs/dax.c b/fs/dax.c
index 494e8fb7a98f..3eb0cf176d69 100644
--- a/fs/dax.c
+++ b/fs/dax.c
@@ -44,6 +44,7 @@
 
 /* The 'colour' (ie low bits) within a PMD of a page offset.  */
 #define PG_PMD_COLOUR  ((PMD_SIZE >> PAGE_SHIFT) - 1)
+#define PMD_ORDER  (PMD_SHIFT - PAGE_SHIFT)
 
 static wait_queue_head_t wait_table[DAX_WAIT_TABLE_ENTRIES];
 
@@ -89,10 +90,10 @@ static void *dax_radix_locked_entry(sector_t sector, 
unsigned long flags)
DAX_ENTRY_LOCK);
 }
 
-static unsigned int dax_radix_order(void *entry)
+static unsigned int dax_entry_order(void *entry)
 {
if (xa_to_value(entry) & DAX_PMD)
-   return PMD_SHIFT - PAGE_SHIFT;
+   return PMD_ORDER;
return 0;
 }
 
@@ -299,10 +300,11 @@ static void *grab_mapping_entry(struct address_space 
*mapping, pgoff_t index,
 {
XA_STATE(xas, >pages, index);
bool pmd_downgrade = false; /* splitting 2MiB entry into 4k entries? */
-   void *entry, **slot;
+   void *entry;
 
+   xas_set_order(, index, size_flag ? PMD_ORDER : 0);
 restart:
-   xa_lock_irq(>pages);
+   xas_lock_irq();
entry = get_unlocked_mapping_entry();
 
if (WARN_ON_ONCE(entry && !xa_is_value(entry))) {
@@ -326,84 +328,36 @@ static void *grab_mapping_entry(struct address_space 
*mapping, pgoff_t index,
}
}
 
-   /* No entry for given index? Make sure radix tree is big enough. */
-   if (!entry || pmd_downgrade) {
-   int err;
-
-   if (pmd_downgrade) {
-   /*
-* Make sure 'entry' remains valid while we drop
-* xa_lock.
-*/
-   entry = lock_slot();
-   }
-
-   xa_unlock_irq(>pages);
+   if (pmd_downgrade) {
+   entry = lock_slot();
/*
 * Besides huge zero pages the only other thing that gets
 * downgraded are empty entries which don't need to be
 * unmapped.
 */
-   if (pmd_downgrade && dax_is_zero_entry(entry))
+   if (dax_is_zero_entry(entry)) {
+   xas_pause();
+   xas_unlock_irq();
unmap_mapping_range(mapping,
(index << PAGE_SHIFT) & PMD_MASK, PMD_SIZE, 0);
-
-   err = radix_tree_preload(
-   mapping_gfp_mask(mapping) & ~__GFP_HIGHMEM);
-   if (err) {
-   if (pmd_downgrade)
-   put_locked_mapping_entry(mapping, index);
-   return ERR_PTR(err);
+   xas_lock_irq();
}
-   xa_lock_irq(>pages);
-
-   if (!entry) {
-   /*
-* We needed to drop the pages lock while calling
-* radix_tree_preload() and we didn't have an entry to
-* lock.  See if another thread inserted an entry at
-* our index during this time.
-*/
-   entry = __radix_tree_lookup(>pages, index,
-   NULL, );
-   if (entry) {
-   radix_tree_preload_end();
-   xa_unlock_irq(>pages);
-   goto restart;
-   }
-   }
-
-   if (pmd_downgrade) {
-   radix_tree_delete(>pages, index);
-   mapping->nrexceptional--;
-   dax_wake_entry(, entry, true);
-   }
-
+   xas_store(, NULL);
+   mapping->nrexceptional--;
+   dax_wake_entry(, entry, true);
+   }
+   if (!entry || pmd_downgrade) {
entry = dax_radix_locked_entry(0, size_flag | DAX_EMPTY);
-
-   err = __radix_tree_insert(>pages, index,
-   dax_radix_order(entry), entry);
-   radix_tree_preload_end();
-   if (err) {
-   xa_unlock_irq(>pages);
-   /*
-* Our insertion of a DAX entry failed, most likely
-* because we were inserting a PMD entry and it
-* collided with a PTE sized entry at a different
-* index in the PMD range.  We haven't inserted
-* anything into the radix tree and have no waiters to
-* wake.
-*/
-

[PATCH v6 65/99] dax: Fix sparse warning

2018-01-17 Thread Matthew Wilcox

From: Matthew Wilcox 

sparse doesn't know that follow_pte_pmd() conditionally acquires the ptl,
because it's in a separate compilation unit.  Move follow_pte_pmd() to
mm.h where sparse can see it.

Signed-off-by: Matthew Wilcox 
---
 include/linux/mm.h | 15 ++-
 mm/memory.c| 16 +---
 2 files changed, 15 insertions(+), 16 deletions(-)

diff --git a/include/linux/mm.h b/include/linux/mm.h
index fe1ee4313add..9c384c486edf 100644
--- a/include/linux/mm.h
+++ b/include/linux/mm.h
@@ -1314,7 +1314,7 @@ int copy_page_range(struct mm_struct *dst, struct 
mm_struct *src,
struct vm_area_struct *vma);
 void unmap_mapping_range(struct address_space *mapping,
loff_t const holebegin, loff_t const holelen, int even_cows);
-int follow_pte_pmd(struct mm_struct *mm, unsigned long address,
+int __follow_pte_pmd(struct mm_struct *mm, unsigned long address,
 unsigned long *start, unsigned long *end,
 pte_t **ptepp, pmd_t **pmdpp, spinlock_t **ptlp);
 int follow_pfn(struct vm_area_struct *vma, unsigned long address,
@@ -1324,6 +1324,19 @@ int follow_phys(struct vm_area_struct *vma, unsigned 
long address,
 int generic_access_phys(struct vm_area_struct *vma, unsigned long addr,
void *buf, int len, int write);
 
+static inline int follow_pte_pmd(struct mm_struct *mm, unsigned long address,
+unsigned long *start, unsigned long *end,
+pte_t **ptepp, pmd_t **pmdpp, spinlock_t **ptlp)
+{
+   int res;
+
+   /* (void) is needed to make gcc happy */
+   (void) __cond_lock(*ptlp,
+  !(res = __follow_pte_pmd(mm, address, start, end,
+   ptepp, pmdpp, ptlp)));
+   return res;
+}
+
 static inline void unmap_shared_mapping_range(struct address_space *mapping,
loff_t const holebegin, loff_t const holelen)
 {
diff --git a/mm/memory.c b/mm/memory.c
index ca5674cbaff2..66184601ac03 100644
--- a/mm/memory.c
+++ b/mm/memory.c
@@ -4201,7 +4201,7 @@ int __pmd_alloc(struct mm_struct *mm, pud_t *pud, 
unsigned long address)
 }
 #endif /* __PAGETABLE_PMD_FOLDED */
 
-static int __follow_pte_pmd(struct mm_struct *mm, unsigned long address,
+int __follow_pte_pmd(struct mm_struct *mm, unsigned long address,
unsigned long *start, unsigned long *end,
pte_t **ptepp, pmd_t **pmdpp, spinlock_t **ptlp)
 {
@@ -4278,20 +4278,6 @@ static inline int follow_pte(struct mm_struct *mm, 
unsigned long address,
return res;
 }
 
-int follow_pte_pmd(struct mm_struct *mm, unsigned long address,
-unsigned long *start, unsigned long *end,
-pte_t **ptepp, pmd_t **pmdpp, spinlock_t **ptlp)
-{
-   int res;
-
-   /* (void) is needed to make gcc happy */
-   (void) __cond_lock(*ptlp,
-  !(res = __follow_pte_pmd(mm, address, start, end,
-   ptepp, pmdpp, ptlp)));
-   return res;
-}
-EXPORT_SYMBOL(follow_pte_pmd);
-
 /**
  * follow_pfn - look up PFN at a user virtual address
  * @vma: memory mapping
-- 
2.15.1

Re: [PATCH RFC v1] arm64: Handle traps from accessing CNTVCT/CNTFRQ for CONFIG_COMPAT

2018-01-17 Thread Nicolin Chen

On Wed, Jan 17, 2018 at 09:03:48AM +, Marc Zyngier wrote:

> > So ignoring a condition for a Thumb instruction may cause its IT
> > scope shifting. For ARM mode, the only penalty could be two Rts
> > getting written -- which shouldn't corrupt userspace execution.
> > 
> > Please correct me if I am wrong or not thorough.
> 
> Consider the following:
>   
>   mov r0, #0
>   mov r1, #0
>   cmp r1, #3
>   mrrceq  r0, r1, cntvct // simplified version
> 
> Oh look, you've corrupted r0 and r1, which should never have be changed.
> Whatever uses the content r0 and r1 after the mrrc will misbehave. How
> is that an acceptable behaviour? How do you expect userspace to cope
> with such a brain damage?
> 
> If you intend to emulate the CPU, you must emulate it fully, to the
> letter of the architecture. No ifs, no buts.

Thanks for the explain. I see the point here.

I saw your version for arm64 compat doesn't check if (rt != 31)
as MRS handler does. Is there any reason for that?

Thank you
Nicolin

[PATCH v6 64/99] dax: Convert grab_mapping_entry to XArray

2018-01-17 Thread Matthew Wilcox

From: Matthew Wilcox 

Signed-off-by: Matthew Wilcox 
---
 fs/dax.c | 98 +---
 1 file changed, 26 insertions(+), 72 deletions(-)

diff --git a/fs/dax.c b/fs/dax.c
index 494e8fb7a98f..3eb0cf176d69 100644
--- a/fs/dax.c
+++ b/fs/dax.c
@@ -44,6 +44,7 @@
 
 /* The 'colour' (ie low bits) within a PMD of a page offset.  */
 #define PG_PMD_COLOUR  ((PMD_SIZE >> PAGE_SHIFT) - 1)
+#define PMD_ORDER  (PMD_SHIFT - PAGE_SHIFT)
 
 static wait_queue_head_t wait_table[DAX_WAIT_TABLE_ENTRIES];
 
@@ -89,10 +90,10 @@ static void *dax_radix_locked_entry(sector_t sector, 
unsigned long flags)
DAX_ENTRY_LOCK);
 }
 
-static unsigned int dax_radix_order(void *entry)
+static unsigned int dax_entry_order(void *entry)
 {
if (xa_to_value(entry) & DAX_PMD)
-   return PMD_SHIFT - PAGE_SHIFT;
+   return PMD_ORDER;
return 0;
 }
 
@@ -299,10 +300,11 @@ static void *grab_mapping_entry(struct address_space 
*mapping, pgoff_t index,
 {
XA_STATE(xas, >pages, index);
bool pmd_downgrade = false; /* splitting 2MiB entry into 4k entries? */
-   void *entry, **slot;
+   void *entry;
 
+   xas_set_order(, index, size_flag ? PMD_ORDER : 0);
 restart:
-   xa_lock_irq(>pages);
+   xas_lock_irq();
entry = get_unlocked_mapping_entry();
 
if (WARN_ON_ONCE(entry && !xa_is_value(entry))) {
@@ -326,84 +328,36 @@ static void *grab_mapping_entry(struct address_space 
*mapping, pgoff_t index,
}
}
 
-   /* No entry for given index? Make sure radix tree is big enough. */
-   if (!entry || pmd_downgrade) {
-   int err;
-
-   if (pmd_downgrade) {
-   /*
-* Make sure 'entry' remains valid while we drop
-* xa_lock.
-*/
-   entry = lock_slot();
-   }
-
-   xa_unlock_irq(>pages);
+   if (pmd_downgrade) {
+   entry = lock_slot();
/*
 * Besides huge zero pages the only other thing that gets
 * downgraded are empty entries which don't need to be
 * unmapped.
 */
-   if (pmd_downgrade && dax_is_zero_entry(entry))
+   if (dax_is_zero_entry(entry)) {
+   xas_pause();
+   xas_unlock_irq();
unmap_mapping_range(mapping,
(index << PAGE_SHIFT) & PMD_MASK, PMD_SIZE, 0);
-
-   err = radix_tree_preload(
-   mapping_gfp_mask(mapping) & ~__GFP_HIGHMEM);
-   if (err) {
-   if (pmd_downgrade)
-   put_locked_mapping_entry(mapping, index);
-   return ERR_PTR(err);
+   xas_lock_irq();
}
-   xa_lock_irq(>pages);
-
-   if (!entry) {
-   /*
-* We needed to drop the pages lock while calling
-* radix_tree_preload() and we didn't have an entry to
-* lock.  See if another thread inserted an entry at
-* our index during this time.
-*/
-   entry = __radix_tree_lookup(>pages, index,
-   NULL, );
-   if (entry) {
-   radix_tree_preload_end();
-   xa_unlock_irq(>pages);
-   goto restart;
-   }
-   }
-
-   if (pmd_downgrade) {
-   radix_tree_delete(>pages, index);
-   mapping->nrexceptional--;
-   dax_wake_entry(, entry, true);
-   }
-
+   xas_store(, NULL);
+   mapping->nrexceptional--;
+   dax_wake_entry(, entry, true);
+   }
+   if (!entry || pmd_downgrade) {
entry = dax_radix_locked_entry(0, size_flag | DAX_EMPTY);
-
-   err = __radix_tree_insert(>pages, index,
-   dax_radix_order(entry), entry);
-   radix_tree_preload_end();
-   if (err) {
-   xa_unlock_irq(>pages);
-   /*
-* Our insertion of a DAX entry failed, most likely
-* because we were inserting a PMD entry and it
-* collided with a PTE sized entry at a different
-* index in the PMD range.  We haven't inserted
-* anything into the radix tree and have no waiters to
-* wake.
-*/
-   return ERR_PTR(err);
-   }
-

[PATCH v6 65/99] dax: Fix sparse warning

2018-01-17 Thread Matthew Wilcox

From: Matthew Wilcox 

sparse doesn't know that follow_pte_pmd() conditionally acquires the ptl,
because it's in a separate compilation unit.  Move follow_pte_pmd() to
mm.h where sparse can see it.

Signed-off-by: Matthew Wilcox 
---
 include/linux/mm.h | 15 ++-
 mm/memory.c| 16 +---
 2 files changed, 15 insertions(+), 16 deletions(-)

diff --git a/include/linux/mm.h b/include/linux/mm.h
index fe1ee4313add..9c384c486edf 100644
--- a/include/linux/mm.h
+++ b/include/linux/mm.h
@@ -1314,7 +1314,7 @@ int copy_page_range(struct mm_struct *dst, struct 
mm_struct *src,
struct vm_area_struct *vma);
 void unmap_mapping_range(struct address_space *mapping,
loff_t const holebegin, loff_t const holelen, int even_cows);
-int follow_pte_pmd(struct mm_struct *mm, unsigned long address,
+int __follow_pte_pmd(struct mm_struct *mm, unsigned long address,
 unsigned long *start, unsigned long *end,
 pte_t **ptepp, pmd_t **pmdpp, spinlock_t **ptlp);
 int follow_pfn(struct vm_area_struct *vma, unsigned long address,
@@ -1324,6 +1324,19 @@ int follow_phys(struct vm_area_struct *vma, unsigned 
long address,
 int generic_access_phys(struct vm_area_struct *vma, unsigned long addr,
void *buf, int len, int write);
 
+static inline int follow_pte_pmd(struct mm_struct *mm, unsigned long address,
+unsigned long *start, unsigned long *end,
+pte_t **ptepp, pmd_t **pmdpp, spinlock_t **ptlp)
+{
+   int res;
+
+   /* (void) is needed to make gcc happy */
+   (void) __cond_lock(*ptlp,
+  !(res = __follow_pte_pmd(mm, address, start, end,
+   ptepp, pmdpp, ptlp)));
+   return res;
+}
+
 static inline void unmap_shared_mapping_range(struct address_space *mapping,
loff_t const holebegin, loff_t const holelen)
 {
diff --git a/mm/memory.c b/mm/memory.c
index ca5674cbaff2..66184601ac03 100644
--- a/mm/memory.c
+++ b/mm/memory.c
@@ -4201,7 +4201,7 @@ int __pmd_alloc(struct mm_struct *mm, pud_t *pud, 
unsigned long address)
 }
 #endif /* __PAGETABLE_PMD_FOLDED */
 
-static int __follow_pte_pmd(struct mm_struct *mm, unsigned long address,
+int __follow_pte_pmd(struct mm_struct *mm, unsigned long address,
unsigned long *start, unsigned long *end,
pte_t **ptepp, pmd_t **pmdpp, spinlock_t **ptlp)
 {
@@ -4278,20 +4278,6 @@ static inline int follow_pte(struct mm_struct *mm, 
unsigned long address,
return res;
 }
 
-int follow_pte_pmd(struct mm_struct *mm, unsigned long address,
-unsigned long *start, unsigned long *end,
-pte_t **ptepp, pmd_t **pmdpp, spinlock_t **ptlp)
-{
-   int res;
-
-   /* (void) is needed to make gcc happy */
-   (void) __cond_lock(*ptlp,
-  !(res = __follow_pte_pmd(mm, address, start, end,
-   ptepp, pmdpp, ptlp)));
-   return res;
-}
-EXPORT_SYMBOL(follow_pte_pmd);
-
 /**
  * follow_pfn - look up PFN at a user virtual address
  * @vma: memory mapping
-- 
2.15.1

Re: [PATCH RFC v1] arm64: Handle traps from accessing CNTVCT/CNTFRQ for CONFIG_COMPAT

2018-01-17 Thread Nicolin Chen

On Wed, Jan 17, 2018 at 09:03:48AM +, Marc Zyngier wrote:

> > So ignoring a condition for a Thumb instruction may cause its IT
> > scope shifting. For ARM mode, the only penalty could be two Rts
> > getting written -- which shouldn't corrupt userspace execution.
> > 
> > Please correct me if I am wrong or not thorough.
> 
> Consider the following:
>   
>   mov r0, #0
>   mov r1, #0
>   cmp r1, #3
>   mrrceq  r0, r1, cntvct // simplified version
> 
> Oh look, you've corrupted r0 and r1, which should never have be changed.
> Whatever uses the content r0 and r1 after the mrrc will misbehave. How
> is that an acceptable behaviour? How do you expect userspace to cope
> with such a brain damage?
> 
> If you intend to emulate the CPU, you must emulate it fully, to the
> letter of the architecture. No ifs, no buts.

Thanks for the explain. I see the point here.

I saw your version for arm64 compat doesn't check if (rt != 31)
as MRS handler does. Is there any reason for that?

Thank you
Nicolin

[PATCH v6 66/99] page cache: Finish XArray conversion

2018-01-17 Thread Matthew Wilcox

From: Matthew Wilcox 

With no more radix tree API users left, we can drop the GFP flags
and use xa_init() instead of INIT_RADIX_TREE().

Signed-off-by: Matthew Wilcox 
---
 fs/inode.c | 2 +-
 include/linux/fs.h | 2 +-
 mm/swap_state.c| 2 +-
 3 files changed, 3 insertions(+), 3 deletions(-)

diff --git a/fs/inode.c b/fs/inode.c
index c7b00573c10d..f5680b805336 100644
--- a/fs/inode.c
+++ b/fs/inode.c
@@ -348,7 +348,7 @@ EXPORT_SYMBOL(inc_nlink);
 void address_space_init_once(struct address_space *mapping)
 {
memset(mapping, 0, sizeof(*mapping));
-   INIT_RADIX_TREE(>pages, GFP_ATOMIC | __GFP_ACCOUNT);
+   xa_init_flags(>pages, XA_FLAGS_LOCK_IRQ);
init_rwsem(>i_mmap_rwsem);
INIT_LIST_HEAD(>private_list);
spin_lock_init(>private_lock);
diff --git a/include/linux/fs.h b/include/linux/fs.h
index c58bc3c619bf..b459bf4ddb62 100644
--- a/include/linux/fs.h
+++ b/include/linux/fs.h
@@ -410,7 +410,7 @@ int pagecache_write_end(struct file *, struct address_space 
*mapping,
  */
 struct address_space {
struct inode*host;
-   struct radix_tree_root  pages;
+   struct xarray   pages;
gfp_t   gfp_mask;
atomic_ti_mmap_writable;
struct rb_root_cached   i_mmap;
diff --git a/mm/swap_state.c b/mm/swap_state.c
index 219e3b4f09e6..25f027d0bb00 100644
--- a/mm/swap_state.c
+++ b/mm/swap_state.c
@@ -573,7 +573,7 @@ int init_swap_address_space(unsigned int type, unsigned 
long nr_pages)
return -ENOMEM;
for (i = 0; i < nr; i++) {
space = spaces + i;
-   INIT_RADIX_TREE(>pages, GFP_ATOMIC|__GFP_NOWARN);
+   xa_init_flags(>pages, XA_FLAGS_LOCK_IRQ);
atomic_set(>i_mmap_writable, 0);
space->a_ops = _aops;
/* swap cache doesn't use writeback related tags */
-- 
2.15.1

[PATCH v6 66/99] page cache: Finish XArray conversion

2018-01-17 Thread Matthew Wilcox

From: Matthew Wilcox 

With no more radix tree API users left, we can drop the GFP flags
and use xa_init() instead of INIT_RADIX_TREE().

Signed-off-by: Matthew Wilcox 
---
 fs/inode.c | 2 +-
 include/linux/fs.h | 2 +-
 mm/swap_state.c| 2 +-
 3 files changed, 3 insertions(+), 3 deletions(-)

diff --git a/fs/inode.c b/fs/inode.c
index c7b00573c10d..f5680b805336 100644
--- a/fs/inode.c
+++ b/fs/inode.c
@@ -348,7 +348,7 @@ EXPORT_SYMBOL(inc_nlink);
 void address_space_init_once(struct address_space *mapping)
 {
memset(mapping, 0, sizeof(*mapping));
-   INIT_RADIX_TREE(>pages, GFP_ATOMIC | __GFP_ACCOUNT);
+   xa_init_flags(>pages, XA_FLAGS_LOCK_IRQ);
init_rwsem(>i_mmap_rwsem);
INIT_LIST_HEAD(>private_list);
spin_lock_init(>private_lock);
diff --git a/include/linux/fs.h b/include/linux/fs.h
index c58bc3c619bf..b459bf4ddb62 100644
--- a/include/linux/fs.h
+++ b/include/linux/fs.h
@@ -410,7 +410,7 @@ int pagecache_write_end(struct file *, struct address_space 
*mapping,
  */
 struct address_space {
struct inode*host;
-   struct radix_tree_root  pages;
+   struct xarray   pages;
gfp_t   gfp_mask;
atomic_ti_mmap_writable;
struct rb_root_cached   i_mmap;
diff --git a/mm/swap_state.c b/mm/swap_state.c
index 219e3b4f09e6..25f027d0bb00 100644
--- a/mm/swap_state.c
+++ b/mm/swap_state.c
@@ -573,7 +573,7 @@ int init_swap_address_space(unsigned int type, unsigned 
long nr_pages)
return -ENOMEM;
for (i = 0; i < nr; i++) {
space = spaces + i;
-   INIT_RADIX_TREE(>pages, GFP_ATOMIC|__GFP_NOWARN);
+   xa_init_flags(>pages, XA_FLAGS_LOCK_IRQ);
atomic_set(>i_mmap_writable, 0);
space->a_ops = _aops;
/* swap cache doesn't use writeback related tags */
-- 
2.15.1

[PATCH v6 67/99] mm: Convert cgroup writeback to XArray

2018-01-17 Thread Matthew Wilcox

From: Matthew Wilcox 

This is a fairly naive conversion, leaving in place the GFP_ATOMIC
allocation.  By switching the locking around, we could use GFP_KERNEL
and probably simplify the error handling.

Signed-off-by: Matthew Wilcox 
---
 include/linux/backing-dev-defs.h |  2 +-
 include/linux/backing-dev.h  |  2 +-
 mm/backing-dev.c | 22 ++
 3 files changed, 12 insertions(+), 14 deletions(-)

diff --git a/include/linux/backing-dev-defs.h b/include/linux/backing-dev-defs.h
index bfe86b54f6c1..074a54aad33c 100644
--- a/include/linux/backing-dev-defs.h
+++ b/include/linux/backing-dev-defs.h
@@ -187,7 +187,7 @@ struct backing_dev_info {
struct bdi_writeback wb;  /* the root writeback info for this bdi */
struct list_head wb_list; /* list of all wbs */
 #ifdef CONFIG_CGROUP_WRITEBACK
-   struct radix_tree_root cgwb_tree; /* radix tree of active cgroup wbs */
+   struct xarray cgwb_xa;  /* radix tree of active cgroup wbs */
struct rb_root cgwb_congested_tree; /* their congested states */
 #else
struct bdi_writeback_congested *wb_congested;
diff --git a/include/linux/backing-dev.h b/include/linux/backing-dev.h
index 3df0d20e23f3..27e7b31bd802 100644
--- a/include/linux/backing-dev.h
+++ b/include/linux/backing-dev.h
@@ -271,7 +271,7 @@ static inline struct bdi_writeback *wb_find_current(struct 
backing_dev_info *bdi
if (!memcg_css->parent)
return >wb;
 
-   wb = radix_tree_lookup(>cgwb_tree, memcg_css->id);
+   wb = xa_load(>cgwb_xa, memcg_css->id);
 
/*
 * %current's blkcg equals the effective blkcg of its memcg.  No
diff --git a/mm/backing-dev.c b/mm/backing-dev.c
index b5f940ce0143..aa0f85df0928 100644
--- a/mm/backing-dev.c
+++ b/mm/backing-dev.c
@@ -417,8 +417,8 @@ static void wb_exit(struct bdi_writeback *wb)
 #include 
 
 /*
- * cgwb_lock protects bdi->cgwb_tree, bdi->cgwb_congested_tree,
- * blkcg->cgwb_list, and memcg->cgwb_list.  bdi->cgwb_tree is also RCU
+ * cgwb_lock protects bdi->cgwb_xa, bdi->cgwb_congested_tree,
+ * blkcg->cgwb_list, and memcg->cgwb_list.  bdi->cgwb_xa is also RCU
  * protected.
  */
 static DEFINE_SPINLOCK(cgwb_lock);
@@ -539,7 +539,7 @@ static void cgwb_kill(struct bdi_writeback *wb)
 {
lockdep_assert_held(_lock);
 
-   WARN_ON(!radix_tree_delete(>bdi->cgwb_tree, wb->memcg_css->id));
+   WARN_ON(xa_erase(>bdi->cgwb_xa, wb->memcg_css->id) != wb);
list_del(>memcg_node);
list_del(>blkcg_node);
percpu_ref_kill(>refcnt);
@@ -571,7 +571,7 @@ static int cgwb_create(struct backing_dev_info *bdi,
 
/* look up again under lock and discard on blkcg mismatch */
spin_lock_irqsave(_lock, flags);
-   wb = radix_tree_lookup(>cgwb_tree, memcg_css->id);
+   wb = xa_load(>cgwb_xa, memcg_css->id);
if (wb && wb->blkcg_css != blkcg_css) {
cgwb_kill(wb);
wb = NULL;
@@ -614,8 +614,7 @@ static int cgwb_create(struct backing_dev_info *bdi,
spin_lock_irqsave(_lock, flags);
if (test_bit(WB_registered, >wb.state) &&
blkcg_cgwb_list->next && memcg_cgwb_list->next) {
-   /* we might have raced another instance of this function */
-   ret = radix_tree_insert(>cgwb_tree, memcg_css->id, wb);
+   ret = xa_insert(>cgwb_xa, memcg_css->id, wb, GFP_ATOMIC);
if (!ret) {
list_add_tail_rcu(>bdi_node, >wb_list);
list_add(>memcg_node, memcg_cgwb_list);
@@ -682,7 +681,7 @@ struct bdi_writeback *wb_get_create(struct backing_dev_info 
*bdi,
 
do {
rcu_read_lock();
-   wb = radix_tree_lookup(>cgwb_tree, memcg_css->id);
+   wb = xa_load(>cgwb_xa, memcg_css->id);
if (wb) {
struct cgroup_subsys_state *blkcg_css;
 
@@ -704,7 +703,7 @@ static int cgwb_bdi_init(struct backing_dev_info *bdi)
 {
int ret;
 
-   INIT_RADIX_TREE(>cgwb_tree, GFP_ATOMIC);
+   xa_init(>cgwb_xa);
bdi->cgwb_congested_tree = RB_ROOT;
 
ret = wb_init(>wb, bdi, 1, GFP_KERNEL);
@@ -717,15 +716,14 @@ static int cgwb_bdi_init(struct backing_dev_info *bdi)
 
 static void cgwb_bdi_unregister(struct backing_dev_info *bdi)
 {
-   struct radix_tree_iter iter;
-   void **slot;
+   XA_STATE(xas, >cgwb_xa, 0);
struct bdi_writeback *wb;
 
WARN_ON(test_bit(WB_registered, >wb.state));
 
spin_lock_irq(_lock);
-   radix_tree_for_each_slot(slot, >cgwb_tree, , 0)
-   cgwb_kill(*slot);
+   xas_for_each(, wb, ULONG_MAX)
+   cgwb_kill(wb);
 
while (!list_empty(>wb_list)) {
wb = list_first_entry(>wb_list, struct bdi_writeback,
-- 
2.15.1

[PATCH v6 67/99] mm: Convert cgroup writeback to XArray

2018-01-17 Thread Matthew Wilcox

From: Matthew Wilcox 

This is a fairly naive conversion, leaving in place the GFP_ATOMIC
allocation.  By switching the locking around, we could use GFP_KERNEL
and probably simplify the error handling.

Signed-off-by: Matthew Wilcox 
---
 include/linux/backing-dev-defs.h |  2 +-
 include/linux/backing-dev.h  |  2 +-
 mm/backing-dev.c | 22 ++
 3 files changed, 12 insertions(+), 14 deletions(-)

diff --git a/include/linux/backing-dev-defs.h b/include/linux/backing-dev-defs.h
index bfe86b54f6c1..074a54aad33c 100644
--- a/include/linux/backing-dev-defs.h
+++ b/include/linux/backing-dev-defs.h
@@ -187,7 +187,7 @@ struct backing_dev_info {
struct bdi_writeback wb;  /* the root writeback info for this bdi */
struct list_head wb_list; /* list of all wbs */
 #ifdef CONFIG_CGROUP_WRITEBACK
-   struct radix_tree_root cgwb_tree; /* radix tree of active cgroup wbs */
+   struct xarray cgwb_xa;  /* radix tree of active cgroup wbs */
struct rb_root cgwb_congested_tree; /* their congested states */
 #else
struct bdi_writeback_congested *wb_congested;
diff --git a/include/linux/backing-dev.h b/include/linux/backing-dev.h
index 3df0d20e23f3..27e7b31bd802 100644
--- a/include/linux/backing-dev.h
+++ b/include/linux/backing-dev.h
@@ -271,7 +271,7 @@ static inline struct bdi_writeback *wb_find_current(struct 
backing_dev_info *bdi
if (!memcg_css->parent)
return >wb;
 
-   wb = radix_tree_lookup(>cgwb_tree, memcg_css->id);
+   wb = xa_load(>cgwb_xa, memcg_css->id);
 
/*
 * %current's blkcg equals the effective blkcg of its memcg.  No
diff --git a/mm/backing-dev.c b/mm/backing-dev.c
index b5f940ce0143..aa0f85df0928 100644
--- a/mm/backing-dev.c
+++ b/mm/backing-dev.c
@@ -417,8 +417,8 @@ static void wb_exit(struct bdi_writeback *wb)
 #include 
 
 /*
- * cgwb_lock protects bdi->cgwb_tree, bdi->cgwb_congested_tree,
- * blkcg->cgwb_list, and memcg->cgwb_list.  bdi->cgwb_tree is also RCU
+ * cgwb_lock protects bdi->cgwb_xa, bdi->cgwb_congested_tree,
+ * blkcg->cgwb_list, and memcg->cgwb_list.  bdi->cgwb_xa is also RCU
  * protected.
  */
 static DEFINE_SPINLOCK(cgwb_lock);
@@ -539,7 +539,7 @@ static void cgwb_kill(struct bdi_writeback *wb)
 {
lockdep_assert_held(_lock);
 
-   WARN_ON(!radix_tree_delete(>bdi->cgwb_tree, wb->memcg_css->id));
+   WARN_ON(xa_erase(>bdi->cgwb_xa, wb->memcg_css->id) != wb);
list_del(>memcg_node);
list_del(>blkcg_node);
percpu_ref_kill(>refcnt);
@@ -571,7 +571,7 @@ static int cgwb_create(struct backing_dev_info *bdi,
 
/* look up again under lock and discard on blkcg mismatch */
spin_lock_irqsave(_lock, flags);
-   wb = radix_tree_lookup(>cgwb_tree, memcg_css->id);
+   wb = xa_load(>cgwb_xa, memcg_css->id);
if (wb && wb->blkcg_css != blkcg_css) {
cgwb_kill(wb);
wb = NULL;
@@ -614,8 +614,7 @@ static int cgwb_create(struct backing_dev_info *bdi,
spin_lock_irqsave(_lock, flags);
if (test_bit(WB_registered, >wb.state) &&
blkcg_cgwb_list->next && memcg_cgwb_list->next) {
-   /* we might have raced another instance of this function */
-   ret = radix_tree_insert(>cgwb_tree, memcg_css->id, wb);
+   ret = xa_insert(>cgwb_xa, memcg_css->id, wb, GFP_ATOMIC);
if (!ret) {
list_add_tail_rcu(>bdi_node, >wb_list);
list_add(>memcg_node, memcg_cgwb_list);
@@ -682,7 +681,7 @@ struct bdi_writeback *wb_get_create(struct backing_dev_info 
*bdi,
 
do {
rcu_read_lock();
-   wb = radix_tree_lookup(>cgwb_tree, memcg_css->id);
+   wb = xa_load(>cgwb_xa, memcg_css->id);
if (wb) {
struct cgroup_subsys_state *blkcg_css;
 
@@ -704,7 +703,7 @@ static int cgwb_bdi_init(struct backing_dev_info *bdi)
 {
int ret;
 
-   INIT_RADIX_TREE(>cgwb_tree, GFP_ATOMIC);
+   xa_init(>cgwb_xa);
bdi->cgwb_congested_tree = RB_ROOT;
 
ret = wb_init(>wb, bdi, 1, GFP_KERNEL);
@@ -717,15 +716,14 @@ static int cgwb_bdi_init(struct backing_dev_info *bdi)
 
 static void cgwb_bdi_unregister(struct backing_dev_info *bdi)
 {
-   struct radix_tree_iter iter;
-   void **slot;
+   XA_STATE(xas, >cgwb_xa, 0);
struct bdi_writeback *wb;
 
WARN_ON(test_bit(WB_registered, >wb.state));
 
spin_lock_irq(_lock);
-   radix_tree_for_each_slot(slot, >cgwb_tree, , 0)
-   cgwb_kill(*slot);
+   xas_for_each(, wb, ULONG_MAX)
+   cgwb_kill(wb);
 
while (!list_empty(>wb_list)) {
wb = list_first_entry(>wb_list, struct bdi_writeback,
-- 
2.15.1

Re: [PATCH v2] r8152: disable RX aggregation on Dell TB16 dock

2018-01-17 Thread David Miller

From: Kai-Heng Feng 
Date: Tue, 16 Jan 2018 16:46:27 +0800

> r8153 on Dell TB15/16 dock corrupts rx packets.
> 
> This change is suggested by Realtek. They guess that the XHCI controller
> doesn't have enough buffer, and their guesswork is correct, once the RX
> aggregation gets disabled, the issue is gone.
> 
> ASMedia is currently working on a real sulotion for this issue.
> 
> Dell and ODM confirm the bcdDevice and iSerialNumber is unique for TB16.
> 
> Note that TB15 has different bcdDevice and iSerialNumber, which are not
> unique values. If you still have TB15, please contact Dell to replace it
> with TB16.
> 
> BugLink: https://bugs.launchpad.net/bugs/1729674
> Cc: Mario Limonciello 
> Signed-off-by: Kai-Heng Feng 
> ---
> v2:
> - Disable RX aggregation instead of disable RX checksum
> - Use bcdDevice and iSerialNumber to uniquely identify Dell TB16

Ok, since this is very restricted it's an acceptable way to deal with
this problem.

Applied, thanks.

Re: [PATCH v2] r8152: disable RX aggregation on Dell TB16 dock

2018-01-17 Thread David Miller

From: Kai-Heng Feng 
Date: Tue, 16 Jan 2018 16:46:27 +0800

> r8153 on Dell TB15/16 dock corrupts rx packets.
> 
> This change is suggested by Realtek. They guess that the XHCI controller
> doesn't have enough buffer, and their guesswork is correct, once the RX
> aggregation gets disabled, the issue is gone.
> 
> ASMedia is currently working on a real sulotion for this issue.
> 
> Dell and ODM confirm the bcdDevice and iSerialNumber is unique for TB16.
> 
> Note that TB15 has different bcdDevice and iSerialNumber, which are not
> unique values. If you still have TB15, please contact Dell to replace it
> with TB16.
> 
> BugLink: https://bugs.launchpad.net/bugs/1729674
> Cc: Mario Limonciello 
> Signed-off-by: Kai-Heng Feng 
> ---
> v2:
> - Disable RX aggregation instead of disable RX checksum
> - Use bcdDevice and iSerialNumber to uniquely identify Dell TB16

Ok, since this is very restricted it's an acceptable way to deal with
this problem.

Applied, thanks.

[PATCH v6 26/99] page cache: Convert page cache lookups to XArray

2018-01-17 Thread Matthew Wilcox

From: Matthew Wilcox 

Introduce page_cache_pin() to factor out the common logic between the
various lookup routines:

find_get_entry
find_get_entries
find_get_pages_range
find_get_pages_contig
find_get_pages_range_tag
find_get_entries_tag
filemap_map_pages

By using the xa_state to control the iteration, we can remove most of
the gotos and just use the normal break/continue loop control flow.

Also convert the regression1 read-side to XArray since that simulates
the functions being modified here.

Signed-off-by: Matthew Wilcox 
---
 include/linux/pagemap.h|   6 +-
 mm/filemap.c   | 380 +
 tools/testing/radix-tree/regression1.c |  68 +++---
 3 files changed, 129 insertions(+), 325 deletions(-)

diff --git a/include/linux/pagemap.h b/include/linux/pagemap.h
index 34d4fa3ad1c5..1a59f4a5424a 100644
--- a/include/linux/pagemap.h
+++ b/include/linux/pagemap.h
@@ -365,17 +365,17 @@ static inline unsigned find_get_pages(struct 
address_space *mapping,
 unsigned find_get_pages_contig(struct address_space *mapping, pgoff_t start,
   unsigned int nr_pages, struct page **pages);
 unsigned find_get_pages_range_tag(struct address_space *mapping, pgoff_t 
*index,
-   pgoff_t end, int tag, unsigned int nr_pages,
+   pgoff_t end, xa_tag_t tag, unsigned int nr_pages,
struct page **pages);
 static inline unsigned find_get_pages_tag(struct address_space *mapping,
-   pgoff_t *index, int tag, unsigned int nr_pages,
+   pgoff_t *index, xa_tag_t tag, unsigned int nr_pages,
struct page **pages)
 {
return find_get_pages_range_tag(mapping, index, (pgoff_t)-1, tag,
nr_pages, pages);
 }
 unsigned find_get_entries_tag(struct address_space *mapping, pgoff_t start,
-   int tag, unsigned int nr_entries,
+   xa_tag_t tag, unsigned int nr_entries,
struct page **entries, pgoff_t *indices);
 
 struct page *grab_cache_page_write_begin(struct address_space *mapping,
diff --git a/mm/filemap.c b/mm/filemap.c
index ed30d5310e50..317a89df1945 100644
--- a/mm/filemap.c
+++ b/mm/filemap.c
@@ -1401,6 +1401,32 @@ bool page_cache_range_empty(struct address_space 
*mapping, pgoff_t index,
 }
 EXPORT_SYMBOL_GPL(page_cache_range_empty);
 
+/*
+ * page_cache_pin() - Try to pin a page in the page cache.
+ * @xas: The XArray operation state.
+ * @pagep: The page which has been previously found at this location.
+ *
+ * On success, the page has an elevated refcount, but is not locked.
+ * This implements the lockless pagecache protocol as described in
+ * include/linux/pagemap.h; see page_cache_get_speculative().
+ *
+ * Return: True if the page is still in the cache.
+ */
+static bool page_cache_pin(struct xa_state *xas, struct page *page)
+{
+   struct page *head = compound_head(page);
+   bool got = page_cache_get_speculative(head);
+
+   if (likely(got && (xas_reload(xas) == page) &&
+   (compound_head(page) == head)))
+   return true;
+
+   if (got)
+   put_page(head);
+   xas_retry(xas, XA_RETRY_ENTRY);
+   return false;
+}
+
 /**
  * find_get_entry - find and get a page cache entry
  * @mapping: the address_space to search
@@ -1416,51 +1442,21 @@ EXPORT_SYMBOL_GPL(page_cache_range_empty);
  */
 struct page *find_get_entry(struct address_space *mapping, pgoff_t offset)
 {
-   void **pagep;
-   struct page *head, *page;
+   XA_STATE(xas, >pages, offset);
+   struct page *page;
 
rcu_read_lock();
-repeat:
-   page = NULL;
-   pagep = radix_tree_lookup_slot(>pages, offset);
-   if (pagep) {
-   page = radix_tree_deref_slot(pagep);
-   if (unlikely(!page))
-   goto out;
-   if (radix_tree_exception(page)) {
-   if (radix_tree_deref_retry(page))
-   goto repeat;
-   /*
-* A shadow entry of a recently evicted page,
-* or a swap entry from shmem/tmpfs.  Return
-* it without attempting to raise page count.
-*/
-   goto out;
-   }
-
-   head = compound_head(page);
-   if (!page_cache_get_speculative(head))
-   goto repeat;
-
-   /* The page was split under us? */
-   if (compound_head(page) != head) {
-   put_page(head);
-   goto repeat;
-   }
+   do {
+   page = xas_load();
+   if (xas_retry(, page))
+   continue;
+   if (!page

[PATCH v6 26/99] page cache: Convert page cache lookups to XArray

2018-01-17 Thread Matthew Wilcox

From: Matthew Wilcox 

Introduce page_cache_pin() to factor out the common logic between the
various lookup routines:

find_get_entry
find_get_entries
find_get_pages_range
find_get_pages_contig
find_get_pages_range_tag
find_get_entries_tag
filemap_map_pages

By using the xa_state to control the iteration, we can remove most of
the gotos and just use the normal break/continue loop control flow.

Also convert the regression1 read-side to XArray since that simulates
the functions being modified here.

Signed-off-by: Matthew Wilcox 
---
 include/linux/pagemap.h|   6 +-
 mm/filemap.c   | 380 +
 tools/testing/radix-tree/regression1.c |  68 +++---
 3 files changed, 129 insertions(+), 325 deletions(-)

diff --git a/include/linux/pagemap.h b/include/linux/pagemap.h
index 34d4fa3ad1c5..1a59f4a5424a 100644
--- a/include/linux/pagemap.h
+++ b/include/linux/pagemap.h
@@ -365,17 +365,17 @@ static inline unsigned find_get_pages(struct 
address_space *mapping,
 unsigned find_get_pages_contig(struct address_space *mapping, pgoff_t start,
   unsigned int nr_pages, struct page **pages);
 unsigned find_get_pages_range_tag(struct address_space *mapping, pgoff_t 
*index,
-   pgoff_t end, int tag, unsigned int nr_pages,
+   pgoff_t end, xa_tag_t tag, unsigned int nr_pages,
struct page **pages);
 static inline unsigned find_get_pages_tag(struct address_space *mapping,
-   pgoff_t *index, int tag, unsigned int nr_pages,
+   pgoff_t *index, xa_tag_t tag, unsigned int nr_pages,
struct page **pages)
 {
return find_get_pages_range_tag(mapping, index, (pgoff_t)-1, tag,
nr_pages, pages);
 }
 unsigned find_get_entries_tag(struct address_space *mapping, pgoff_t start,
-   int tag, unsigned int nr_entries,
+   xa_tag_t tag, unsigned int nr_entries,
struct page **entries, pgoff_t *indices);
 
 struct page *grab_cache_page_write_begin(struct address_space *mapping,
diff --git a/mm/filemap.c b/mm/filemap.c
index ed30d5310e50..317a89df1945 100644
--- a/mm/filemap.c
+++ b/mm/filemap.c
@@ -1401,6 +1401,32 @@ bool page_cache_range_empty(struct address_space 
*mapping, pgoff_t index,
 }
 EXPORT_SYMBOL_GPL(page_cache_range_empty);
 
+/*
+ * page_cache_pin() - Try to pin a page in the page cache.
+ * @xas: The XArray operation state.
+ * @pagep: The page which has been previously found at this location.
+ *
+ * On success, the page has an elevated refcount, but is not locked.
+ * This implements the lockless pagecache protocol as described in
+ * include/linux/pagemap.h; see page_cache_get_speculative().
+ *
+ * Return: True if the page is still in the cache.
+ */
+static bool page_cache_pin(struct xa_state *xas, struct page *page)
+{
+   struct page *head = compound_head(page);
+   bool got = page_cache_get_speculative(head);
+
+   if (likely(got && (xas_reload(xas) == page) &&
+   (compound_head(page) == head)))
+   return true;
+
+   if (got)
+   put_page(head);
+   xas_retry(xas, XA_RETRY_ENTRY);
+   return false;
+}
+
 /**
  * find_get_entry - find and get a page cache entry
  * @mapping: the address_space to search
@@ -1416,51 +1442,21 @@ EXPORT_SYMBOL_GPL(page_cache_range_empty);
  */
 struct page *find_get_entry(struct address_space *mapping, pgoff_t offset)
 {
-   void **pagep;
-   struct page *head, *page;
+   XA_STATE(xas, >pages, offset);
+   struct page *page;
 
rcu_read_lock();
-repeat:
-   page = NULL;
-   pagep = radix_tree_lookup_slot(>pages, offset);
-   if (pagep) {
-   page = radix_tree_deref_slot(pagep);
-   if (unlikely(!page))
-   goto out;
-   if (radix_tree_exception(page)) {
-   if (radix_tree_deref_retry(page))
-   goto repeat;
-   /*
-* A shadow entry of a recently evicted page,
-* or a swap entry from shmem/tmpfs.  Return
-* it without attempting to raise page count.
-*/
-   goto out;
-   }
-
-   head = compound_head(page);
-   if (!page_cache_get_speculative(head))
-   goto repeat;
-
-   /* The page was split under us? */
-   if (compound_head(page) != head) {
-   put_page(head);
-   goto repeat;
-   }
+   do {
+   page = xas_load();
+   if (xas_retry(, page))
+   continue;
+   if (!page || xa_is_value(page))
+

[PATCH v6 69/99] brd: Convert to XArray

2018-01-17 Thread Matthew Wilcox

From: Matthew Wilcox 

Convert brd_pages from a radix tree to an XArray.  Simpler and smaller
code; in particular another user of radix_tree_preload is eliminated.

Signed-off-by: Matthew Wilcox 
---
 drivers/block/brd.c | 93 -
 1 file changed, 28 insertions(+), 65 deletions(-)

diff --git a/drivers/block/brd.c b/drivers/block/brd.c
index 8028a3a7e7fd..59a1af7aaa79 100644
--- a/drivers/block/brd.c
+++ b/drivers/block/brd.c
@@ -17,7 +17,7 @@
 #include 
 #include 
 #include 
-#include 
+#include 
 #include 
 #include 
 #include 
@@ -29,9 +29,9 @@
 #define PAGE_SECTORS   (1 << PAGE_SECTORS_SHIFT)
 
 /*
- * Each block ramdisk device has a radix_tree brd_pages of pages that stores
- * the pages containing the block device's contents. A brd page's ->index is
- * its offset in PAGE_SIZE units. This is similar to, but in no way connected
+ * Each block ramdisk device has an xarray brd_pages that stores the pages
+ * containing the block device's contents. A brd page's ->index is its
+ * offset in PAGE_SIZE units. This is similar to, but in no way connected
  * with, the kernel's pagecache or buffer cache (which sit above our block
  * device).
  */
@@ -41,13 +41,7 @@ struct brd_device {
struct request_queue*brd_queue;
struct gendisk  *brd_disk;
struct list_headbrd_list;
-
-   /*
-* Backing store of pages and lock to protect it. This is the contents
-* of the block device.
-*/
-   spinlock_t  brd_lock;
-   struct radix_tree_root  brd_pages;
+   struct xarray   brd_pages;
 };
 
 /*
@@ -62,17 +56,9 @@ static struct page *brd_lookup_page(struct brd_device *brd, 
sector_t sector)
 * The page lifetime is protected by the fact that we have opened the
 * device node -- brd pages will never be deleted under us, so we
 * don't need any further locking or refcounting.
-*
-* This is strictly true for the radix-tree nodes as well (ie. we
-* don't actually need the rcu_read_lock()), however that is not a
-* documented feature of the radix-tree API so it is better to be
-* safe here (we don't have total exclusion from radix tree updates
-* here, only deletes).
 */
-   rcu_read_lock();
idx = sector >> PAGE_SECTORS_SHIFT; /* sector to page index */
-   page = radix_tree_lookup(>brd_pages, idx);
-   rcu_read_unlock();
+   page = xa_load(>brd_pages, idx);
 
BUG_ON(page && page->index != idx);
 
@@ -87,7 +73,7 @@ static struct page *brd_lookup_page(struct brd_device *brd, 
sector_t sector)
 static struct page *brd_insert_page(struct brd_device *brd, sector_t sector)
 {
pgoff_t idx;
-   struct page *page;
+   struct page *curr, *page;
gfp_t gfp_flags;
 
page = brd_lookup_page(brd, sector);
@@ -108,62 +94,40 @@ static struct page *brd_insert_page(struct brd_device 
*brd, sector_t sector)
if (!page)
return NULL;
 
-   if (radix_tree_preload(GFP_NOIO)) {
-   __free_page(page);
-   return NULL;
-   }
-
-   spin_lock(>brd_lock);
idx = sector >> PAGE_SECTORS_SHIFT;
page->index = idx;
-   if (radix_tree_insert(>brd_pages, idx, page)) {
+   curr = xa_cmpxchg(>brd_pages, idx, NULL, page, GFP_NOIO);
+   if (curr) {
__free_page(page);
-   page = radix_tree_lookup(>brd_pages, idx);
-   BUG_ON(!page);
-   BUG_ON(page->index != idx);
+   if (xa_err(curr)) {
+   page = NULL;
+   } else {
+   page = curr;
+   BUG_ON(!page);
+   BUG_ON(page->index != idx);
+   }
}
-   spin_unlock(>brd_lock);
-
-   radix_tree_preload_end();
 
return page;
 }
 
 /*
- * Free all backing store pages and radix tree. This must only be called when
+ * Free all backing store pages and xarray.  This must only be called when
  * there are no other users of the device.
  */
-#define FREE_BATCH 16
 static void brd_free_pages(struct brd_device *brd)
 {
-   unsigned long pos = 0;
-   struct page *pages[FREE_BATCH];
-   int nr_pages;
-
-   do {
-   int i;
-
-   nr_pages = radix_tree_gang_lookup(>brd_pages,
-   (void **)pages, pos, FREE_BATCH);
-
-   for (i = 0; i < nr_pages; i++) {
-   void *ret;
-
-   BUG_ON(pages[i]->index < pos);
-   pos = pages[i]->index;
-   ret = radix_tree_delete(>brd_pages, pos);
-   BUG_ON(!ret || ret != pages[i]);
-   __free_page(pages[i]);
-   }
-
-   pos++;
+   XA_STATE(xas, >brd_pages, 0);
+   struct

[PATCH v6 69/99] brd: Convert to XArray

2018-01-17 Thread Matthew Wilcox

From: Matthew Wilcox 

Convert brd_pages from a radix tree to an XArray.  Simpler and smaller
code; in particular another user of radix_tree_preload is eliminated.

Signed-off-by: Matthew Wilcox 
---
 drivers/block/brd.c | 93 -
 1 file changed, 28 insertions(+), 65 deletions(-)

diff --git a/drivers/block/brd.c b/drivers/block/brd.c
index 8028a3a7e7fd..59a1af7aaa79 100644
--- a/drivers/block/brd.c
+++ b/drivers/block/brd.c
@@ -17,7 +17,7 @@
 #include 
 #include 
 #include 
-#include 
+#include 
 #include 
 #include 
 #include 
@@ -29,9 +29,9 @@
 #define PAGE_SECTORS   (1 << PAGE_SECTORS_SHIFT)
 
 /*
- * Each block ramdisk device has a radix_tree brd_pages of pages that stores
- * the pages containing the block device's contents. A brd page's ->index is
- * its offset in PAGE_SIZE units. This is similar to, but in no way connected
+ * Each block ramdisk device has an xarray brd_pages that stores the pages
+ * containing the block device's contents. A brd page's ->index is its
+ * offset in PAGE_SIZE units. This is similar to, but in no way connected
  * with, the kernel's pagecache or buffer cache (which sit above our block
  * device).
  */
@@ -41,13 +41,7 @@ struct brd_device {
struct request_queue*brd_queue;
struct gendisk  *brd_disk;
struct list_headbrd_list;
-
-   /*
-* Backing store of pages and lock to protect it. This is the contents
-* of the block device.
-*/
-   spinlock_t  brd_lock;
-   struct radix_tree_root  brd_pages;
+   struct xarray   brd_pages;
 };
 
 /*
@@ -62,17 +56,9 @@ static struct page *brd_lookup_page(struct brd_device *brd, 
sector_t sector)
 * The page lifetime is protected by the fact that we have opened the
 * device node -- brd pages will never be deleted under us, so we
 * don't need any further locking or refcounting.
-*
-* This is strictly true for the radix-tree nodes as well (ie. we
-* don't actually need the rcu_read_lock()), however that is not a
-* documented feature of the radix-tree API so it is better to be
-* safe here (we don't have total exclusion from radix tree updates
-* here, only deletes).
 */
-   rcu_read_lock();
idx = sector >> PAGE_SECTORS_SHIFT; /* sector to page index */
-   page = radix_tree_lookup(>brd_pages, idx);
-   rcu_read_unlock();
+   page = xa_load(>brd_pages, idx);
 
BUG_ON(page && page->index != idx);
 
@@ -87,7 +73,7 @@ static struct page *brd_lookup_page(struct brd_device *brd, 
sector_t sector)
 static struct page *brd_insert_page(struct brd_device *brd, sector_t sector)
 {
pgoff_t idx;
-   struct page *page;
+   struct page *curr, *page;
gfp_t gfp_flags;
 
page = brd_lookup_page(brd, sector);
@@ -108,62 +94,40 @@ static struct page *brd_insert_page(struct brd_device 
*brd, sector_t sector)
if (!page)
return NULL;
 
-   if (radix_tree_preload(GFP_NOIO)) {
-   __free_page(page);
-   return NULL;
-   }
-
-   spin_lock(>brd_lock);
idx = sector >> PAGE_SECTORS_SHIFT;
page->index = idx;
-   if (radix_tree_insert(>brd_pages, idx, page)) {
+   curr = xa_cmpxchg(>brd_pages, idx, NULL, page, GFP_NOIO);
+   if (curr) {
__free_page(page);
-   page = radix_tree_lookup(>brd_pages, idx);
-   BUG_ON(!page);
-   BUG_ON(page->index != idx);
+   if (xa_err(curr)) {
+   page = NULL;
+   } else {
+   page = curr;
+   BUG_ON(!page);
+   BUG_ON(page->index != idx);
+   }
}
-   spin_unlock(>brd_lock);
-
-   radix_tree_preload_end();
 
return page;
 }
 
 /*
- * Free all backing store pages and radix tree. This must only be called when
+ * Free all backing store pages and xarray.  This must only be called when
  * there are no other users of the device.
  */
-#define FREE_BATCH 16
 static void brd_free_pages(struct brd_device *brd)
 {
-   unsigned long pos = 0;
-   struct page *pages[FREE_BATCH];
-   int nr_pages;
-
-   do {
-   int i;
-
-   nr_pages = radix_tree_gang_lookup(>brd_pages,
-   (void **)pages, pos, FREE_BATCH);
-
-   for (i = 0; i < nr_pages; i++) {
-   void *ret;
-
-   BUG_ON(pages[i]->index < pos);
-   pos = pages[i]->index;
-   ret = radix_tree_delete(>brd_pages, pos);
-   BUG_ON(!ret || ret != pages[i]);
-   __free_page(pages[i]);
-   }
-
-   pos++;
+   XA_STATE(xas, >brd_pages, 0);
+   struct page *page;
 
-   /*
-

Re: [PATCH net-next V3 0/2] tun: allow to attach eBPF filter

2018-01-17 Thread David Miller

From: Jason Wang 
Date: Tue, 16 Jan 2018 16:31:00 +0800

> Hi all:
> 
> This series tries to implement eBPF socket filter for tun. This could
> be used for implementing efficient virtio-net receive filter for
> vhost-net.
> 
> Thanks
> 
> Changes from V2:
> - fix typo
> - remove unnecessary double check

Series applied, thanks!

Re: [PATCH net-next V3 0/2] tun: allow to attach eBPF filter

2018-01-17 Thread David Miller

From: Jason Wang 
Date: Tue, 16 Jan 2018 16:31:00 +0800

> Hi all:
> 
> This series tries to implement eBPF socket filter for tun. This could
> be used for implementing efficient virtio-net receive filter for
> vhost-net.
> 
> Thanks
> 
> Changes from V2:
> - fix typo
> - remove unnecessary double check

Series applied, thanks!

[PATCH v6 68/99] vmalloc: Convert to XArray

2018-01-17 Thread Matthew Wilcox

From: Matthew Wilcox 

The radix tree of vmap blocks is simpler to express as an XArray.
Saves a couple of hundred bytes of text and eliminates a user of the
radix tree preload API.

Signed-off-by: Matthew Wilcox 
---
 mm/vmalloc.c | 39 +--
 1 file changed, 13 insertions(+), 26 deletions(-)

diff --git a/mm/vmalloc.c b/mm/vmalloc.c
index 673942094328..b6c138633592 100644
--- a/mm/vmalloc.c
+++ b/mm/vmalloc.c
@@ -23,7 +23,7 @@
 #include 
 #include 
 #include 
-#include 
+#include 
 #include 
 #include 
 #include 
@@ -821,12 +821,11 @@ struct vmap_block {
 static DEFINE_PER_CPU(struct vmap_block_queue, vmap_block_queue);
 
 /*
- * Radix tree of vmap blocks, indexed by address, to quickly find a vmap block
+ * XArray of vmap blocks, indexed by address, to quickly find a vmap block
  * in the free path. Could get rid of this if we change the API to return a
  * "cookie" from alloc, to be passed to free. But no big deal yet.
  */
-static DEFINE_SPINLOCK(vmap_block_tree_lock);
-static RADIX_TREE(vmap_block_tree, GFP_ATOMIC);
+static DEFINE_XARRAY(vmap_block_tree);
 
 /*
  * We should probably have a fallback mechanism to allocate virtual memory
@@ -865,8 +864,8 @@ static void *new_vmap_block(unsigned int order, gfp_t 
gfp_mask)
struct vmap_block *vb;
struct vmap_area *va;
unsigned long vb_idx;
-   int node, err;
-   void *vaddr;
+   int node;
+   void *ret, *vaddr;
 
node = numa_node_id();
 
@@ -883,13 +882,6 @@ static void *new_vmap_block(unsigned int order, gfp_t 
gfp_mask)
return ERR_CAST(va);
}
 
-   err = radix_tree_preload(gfp_mask);
-   if (unlikely(err)) {
-   kfree(vb);
-   free_vmap_area(va);
-   return ERR_PTR(err);
-   }
-
vaddr = vmap_block_vaddr(va->va_start, 0);
spin_lock_init(>lock);
vb->va = va;
@@ -902,11 +894,12 @@ static void *new_vmap_block(unsigned int order, gfp_t 
gfp_mask)
INIT_LIST_HEAD(>free_list);
 
vb_idx = addr_to_vb_idx(va->va_start);
-   spin_lock(_block_tree_lock);
-   err = radix_tree_insert(_block_tree, vb_idx, vb);
-   spin_unlock(_block_tree_lock);
-   BUG_ON(err);
-   radix_tree_preload_end();
+   ret = xa_store(_block_tree, vb_idx, vb, gfp_mask);
+   if (xa_is_err(ret)) {
+   kfree(vb);
+   free_vmap_area(va);
+   return ERR_PTR(xa_err(ret));
+   }
 
vbq = _cpu_var(vmap_block_queue);
spin_lock(>lock);
@@ -923,9 +916,7 @@ static void free_vmap_block(struct vmap_block *vb)
unsigned long vb_idx;
 
vb_idx = addr_to_vb_idx(vb->va->va_start);
-   spin_lock(_block_tree_lock);
-   tmp = radix_tree_delete(_block_tree, vb_idx);
-   spin_unlock(_block_tree_lock);
+   tmp = xa_erase(_block_tree, vb_idx);
BUG_ON(tmp != vb);
 
free_vmap_area_noflush(vb->va);
@@ -1031,7 +1022,6 @@ static void *vb_alloc(unsigned long size, gfp_t gfp_mask)
 static void vb_free(const void *addr, unsigned long size)
 {
unsigned long offset;
-   unsigned long vb_idx;
unsigned int order;
struct vmap_block *vb;
 
@@ -1045,10 +1035,7 @@ static void vb_free(const void *addr, unsigned long size)
offset = (unsigned long)addr & (VMAP_BLOCK_SIZE - 1);
offset >>= PAGE_SHIFT;
 
-   vb_idx = addr_to_vb_idx((unsigned long)addr);
-   rcu_read_lock();
-   vb = radix_tree_lookup(_block_tree, vb_idx);
-   rcu_read_unlock();
+   vb = xa_load(_block_tree, addr_to_vb_idx((unsigned long)addr));
BUG_ON(!vb);
 
vunmap_page_range((unsigned long)addr, (unsigned long)addr + size);
-- 
2.15.1

[PATCH v6 68/99] vmalloc: Convert to XArray

2018-01-17 Thread Matthew Wilcox

From: Matthew Wilcox 

The radix tree of vmap blocks is simpler to express as an XArray.
Saves a couple of hundred bytes of text and eliminates a user of the
radix tree preload API.

Signed-off-by: Matthew Wilcox 
---
 mm/vmalloc.c | 39 +--
 1 file changed, 13 insertions(+), 26 deletions(-)

diff --git a/mm/vmalloc.c b/mm/vmalloc.c
index 673942094328..b6c138633592 100644
--- a/mm/vmalloc.c
+++ b/mm/vmalloc.c
@@ -23,7 +23,7 @@
 #include 
 #include 
 #include 
-#include 
+#include 
 #include 
 #include 
 #include 
@@ -821,12 +821,11 @@ struct vmap_block {
 static DEFINE_PER_CPU(struct vmap_block_queue, vmap_block_queue);
 
 /*
- * Radix tree of vmap blocks, indexed by address, to quickly find a vmap block
+ * XArray of vmap blocks, indexed by address, to quickly find a vmap block
  * in the free path. Could get rid of this if we change the API to return a
  * "cookie" from alloc, to be passed to free. But no big deal yet.
  */
-static DEFINE_SPINLOCK(vmap_block_tree_lock);
-static RADIX_TREE(vmap_block_tree, GFP_ATOMIC);
+static DEFINE_XARRAY(vmap_block_tree);
 
 /*
  * We should probably have a fallback mechanism to allocate virtual memory
@@ -865,8 +864,8 @@ static void *new_vmap_block(unsigned int order, gfp_t 
gfp_mask)
struct vmap_block *vb;
struct vmap_area *va;
unsigned long vb_idx;
-   int node, err;
-   void *vaddr;
+   int node;
+   void *ret, *vaddr;
 
node = numa_node_id();
 
@@ -883,13 +882,6 @@ static void *new_vmap_block(unsigned int order, gfp_t 
gfp_mask)
return ERR_CAST(va);
}
 
-   err = radix_tree_preload(gfp_mask);
-   if (unlikely(err)) {
-   kfree(vb);
-   free_vmap_area(va);
-   return ERR_PTR(err);
-   }
-
vaddr = vmap_block_vaddr(va->va_start, 0);
spin_lock_init(>lock);
vb->va = va;
@@ -902,11 +894,12 @@ static void *new_vmap_block(unsigned int order, gfp_t 
gfp_mask)
INIT_LIST_HEAD(>free_list);
 
vb_idx = addr_to_vb_idx(va->va_start);
-   spin_lock(_block_tree_lock);
-   err = radix_tree_insert(_block_tree, vb_idx, vb);
-   spin_unlock(_block_tree_lock);
-   BUG_ON(err);
-   radix_tree_preload_end();
+   ret = xa_store(_block_tree, vb_idx, vb, gfp_mask);
+   if (xa_is_err(ret)) {
+   kfree(vb);
+   free_vmap_area(va);
+   return ERR_PTR(xa_err(ret));
+   }
 
vbq = _cpu_var(vmap_block_queue);
spin_lock(>lock);
@@ -923,9 +916,7 @@ static void free_vmap_block(struct vmap_block *vb)
unsigned long vb_idx;
 
vb_idx = addr_to_vb_idx(vb->va->va_start);
-   spin_lock(_block_tree_lock);
-   tmp = radix_tree_delete(_block_tree, vb_idx);
-   spin_unlock(_block_tree_lock);
+   tmp = xa_erase(_block_tree, vb_idx);
BUG_ON(tmp != vb);
 
free_vmap_area_noflush(vb->va);
@@ -1031,7 +1022,6 @@ static void *vb_alloc(unsigned long size, gfp_t gfp_mask)
 static void vb_free(const void *addr, unsigned long size)
 {
unsigned long offset;
-   unsigned long vb_idx;
unsigned int order;
struct vmap_block *vb;
 
@@ -1045,10 +1035,7 @@ static void vb_free(const void *addr, unsigned long size)
offset = (unsigned long)addr & (VMAP_BLOCK_SIZE - 1);
offset >>= PAGE_SHIFT;
 
-   vb_idx = addr_to_vb_idx((unsigned long)addr);
-   rcu_read_lock();
-   vb = radix_tree_lookup(_block_tree, vb_idx);
-   rcu_read_unlock();
+   vb = xa_load(_block_tree, addr_to_vb_idx((unsigned long)addr));
BUG_ON(!vb);
 
vunmap_page_range((unsigned long)addr, (unsigned long)addr + size);
-- 
2.15.1

[PATCH v6 70/99] xfs: Convert m_perag_tree to XArray

2018-01-17 Thread Matthew Wilcox

From: Matthew Wilcox 

Getting rid of the m_perag_lock lets us also get rid of the call to
radix_tree_preload().  This is a relatively naive conversion; we could
improve performance over the radix tree implementation by passing around
xa_state pointers instead of indices, possibly at the expense of extending
rcu_read_lock() periods.

Signed-off-by: Matthew Wilcox 
---
 fs/xfs/libxfs/xfs_sb.c |  9 -
 fs/xfs/xfs_icache.c| 35 +--
 fs/xfs/xfs_icache.h|  6 +++---
 fs/xfs/xfs_mount.c | 19 ---
 fs/xfs/xfs_mount.h |  3 +--
 5 files changed, 21 insertions(+), 51 deletions(-)

diff --git a/fs/xfs/libxfs/xfs_sb.c b/fs/xfs/libxfs/xfs_sb.c
index 9b5aae2bcc0b..3b0b65eb8224 100644
--- a/fs/xfs/libxfs/xfs_sb.c
+++ b/fs/xfs/libxfs/xfs_sb.c
@@ -59,7 +59,7 @@ xfs_perag_get(
int ref = 0;
 
rcu_read_lock();
-   pag = radix_tree_lookup(>m_perag_tree, agno);
+   pag = xa_load(>m_perag_xa, agno);
if (pag) {
ASSERT(atomic_read(>pag_ref) >= 0);
ref = atomic_inc_return(>pag_ref);
@@ -78,14 +78,13 @@ xfs_perag_get_tag(
xfs_agnumber_t  first,
int tag)
 {
+   XA_STATE(xas, >m_perag_xa, first);
struct xfs_perag*pag;
-   int found;
int ref;
 
rcu_read_lock();
-   found = radix_tree_gang_lookup_tag(>m_perag_tree,
-   (void **), first, 1, tag);
-   if (found <= 0) {
+   pag = xas_find_tag(, ULONG_MAX, tag);
+   if (!pag) {
rcu_read_unlock();
return NULL;
}
diff --git a/fs/xfs/xfs_icache.c b/fs/xfs/xfs_icache.c
index 3861d61fb265..65a8b91b2e70 100644
--- a/fs/xfs/xfs_icache.c
+++ b/fs/xfs/xfs_icache.c
@@ -156,13 +156,10 @@ static void
 xfs_reclaim_work_queue(
struct xfs_mount*mp)
 {
-
-   rcu_read_lock();
-   if (radix_tree_tagged(>m_perag_tree, XFS_ICI_RECLAIM_TAG)) {
+   if (xa_tagged(>m_perag_xa, XFS_ICI_RECLAIM_TAG)) {
queue_delayed_work(mp->m_reclaim_workqueue, >m_reclaim_work,
msecs_to_jiffies(xfs_syncd_centisecs / 6 * 10));
}
-   rcu_read_unlock();
 }
 
 /*
@@ -194,10 +191,7 @@ xfs_perag_set_reclaim_tag(
return;
 
/* propagate the reclaim tag up into the perag radix tree */
-   spin_lock(>m_perag_lock);
-   radix_tree_tag_set(>m_perag_tree, pag->pag_agno,
-  XFS_ICI_RECLAIM_TAG);
-   spin_unlock(>m_perag_lock);
+   xa_set_tag(>m_perag_xa, pag->pag_agno, XFS_ICI_RECLAIM_TAG);
 
/* schedule periodic background inode reclaim */
xfs_reclaim_work_queue(mp);
@@ -216,10 +210,7 @@ xfs_perag_clear_reclaim_tag(
return;
 
/* clear the reclaim tag from the perag radix tree */
-   spin_lock(>m_perag_lock);
-   radix_tree_tag_clear(>m_perag_tree, pag->pag_agno,
-XFS_ICI_RECLAIM_TAG);
-   spin_unlock(>m_perag_lock);
+   xa_clear_tag(>m_perag_xa, pag->pag_agno, XFS_ICI_RECLAIM_TAG);
trace_xfs_perag_clear_reclaim(mp, pag->pag_agno, -1, _RET_IP_);
 }
 
@@ -847,12 +838,10 @@ void
 xfs_queue_eofblocks(
struct xfs_mount *mp)
 {
-   rcu_read_lock();
-   if (radix_tree_tagged(>m_perag_tree, XFS_ICI_EOFBLOCKS_TAG))
+   if (xa_tagged(>m_perag_xa, XFS_ICI_EOFBLOCKS_TAG))
queue_delayed_work(mp->m_eofblocks_workqueue,
   >m_eofblocks_work,
   msecs_to_jiffies(xfs_eofb_secs * 1000));
-   rcu_read_unlock();
 }
 
 void
@@ -874,12 +863,10 @@ void
 xfs_queue_cowblocks(
struct xfs_mount *mp)
 {
-   rcu_read_lock();
-   if (radix_tree_tagged(>m_perag_tree, XFS_ICI_COWBLOCKS_TAG))
+   if (xa_tagged(>m_perag_xa, XFS_ICI_COWBLOCKS_TAG))
queue_delayed_work(mp->m_eofblocks_workqueue,
   >m_cowblocks_work,
   msecs_to_jiffies(xfs_cowb_secs * 1000));
-   rcu_read_unlock();
 }
 
 void
@@ -1557,7 +1544,7 @@ __xfs_inode_set_blocks_tag(
void(*execute)(struct xfs_mount *mp),
void(*set_tp)(struct xfs_mount *mp, xfs_agnumber_t agno,
  int error, unsigned long caller_ip),
-   int tag)
+   xa_tag_ttag)
 {
struct xfs_mount *mp = ip->i_mount;
struct xfs_perag *pag;
@@ -1581,11 +1568,9 @@ __xfs_inode_set_blocks_tag(
   XFS_INO_TO_AGINO(ip->i_mount, ip->i_ino), tag);
if (!tagged) {
/* propagate the eofblocks tag up into the perag radix tree */
-   spin_lock(>i_mount->m_perag_lock);
-   radix_tree_tag_set(>i_mount->m_perag_tree,
+

[PATCH v6 70/99] xfs: Convert m_perag_tree to XArray

2018-01-17 Thread Matthew Wilcox

From: Matthew Wilcox 

Getting rid of the m_perag_lock lets us also get rid of the call to
radix_tree_preload().  This is a relatively naive conversion; we could
improve performance over the radix tree implementation by passing around
xa_state pointers instead of indices, possibly at the expense of extending
rcu_read_lock() periods.

Signed-off-by: Matthew Wilcox 
---
 fs/xfs/libxfs/xfs_sb.c |  9 -
 fs/xfs/xfs_icache.c| 35 +--
 fs/xfs/xfs_icache.h|  6 +++---
 fs/xfs/xfs_mount.c | 19 ---
 fs/xfs/xfs_mount.h |  3 +--
 5 files changed, 21 insertions(+), 51 deletions(-)

diff --git a/fs/xfs/libxfs/xfs_sb.c b/fs/xfs/libxfs/xfs_sb.c
index 9b5aae2bcc0b..3b0b65eb8224 100644
--- a/fs/xfs/libxfs/xfs_sb.c
+++ b/fs/xfs/libxfs/xfs_sb.c
@@ -59,7 +59,7 @@ xfs_perag_get(
int ref = 0;
 
rcu_read_lock();
-   pag = radix_tree_lookup(>m_perag_tree, agno);
+   pag = xa_load(>m_perag_xa, agno);
if (pag) {
ASSERT(atomic_read(>pag_ref) >= 0);
ref = atomic_inc_return(>pag_ref);
@@ -78,14 +78,13 @@ xfs_perag_get_tag(
xfs_agnumber_t  first,
int tag)
 {
+   XA_STATE(xas, >m_perag_xa, first);
struct xfs_perag*pag;
-   int found;
int ref;
 
rcu_read_lock();
-   found = radix_tree_gang_lookup_tag(>m_perag_tree,
-   (void **), first, 1, tag);
-   if (found <= 0) {
+   pag = xas_find_tag(, ULONG_MAX, tag);
+   if (!pag) {
rcu_read_unlock();
return NULL;
}
diff --git a/fs/xfs/xfs_icache.c b/fs/xfs/xfs_icache.c
index 3861d61fb265..65a8b91b2e70 100644
--- a/fs/xfs/xfs_icache.c
+++ b/fs/xfs/xfs_icache.c
@@ -156,13 +156,10 @@ static void
 xfs_reclaim_work_queue(
struct xfs_mount*mp)
 {
-
-   rcu_read_lock();
-   if (radix_tree_tagged(>m_perag_tree, XFS_ICI_RECLAIM_TAG)) {
+   if (xa_tagged(>m_perag_xa, XFS_ICI_RECLAIM_TAG)) {
queue_delayed_work(mp->m_reclaim_workqueue, >m_reclaim_work,
msecs_to_jiffies(xfs_syncd_centisecs / 6 * 10));
}
-   rcu_read_unlock();
 }
 
 /*
@@ -194,10 +191,7 @@ xfs_perag_set_reclaim_tag(
return;
 
/* propagate the reclaim tag up into the perag radix tree */
-   spin_lock(>m_perag_lock);
-   radix_tree_tag_set(>m_perag_tree, pag->pag_agno,
-  XFS_ICI_RECLAIM_TAG);
-   spin_unlock(>m_perag_lock);
+   xa_set_tag(>m_perag_xa, pag->pag_agno, XFS_ICI_RECLAIM_TAG);
 
/* schedule periodic background inode reclaim */
xfs_reclaim_work_queue(mp);
@@ -216,10 +210,7 @@ xfs_perag_clear_reclaim_tag(
return;
 
/* clear the reclaim tag from the perag radix tree */
-   spin_lock(>m_perag_lock);
-   radix_tree_tag_clear(>m_perag_tree, pag->pag_agno,
-XFS_ICI_RECLAIM_TAG);
-   spin_unlock(>m_perag_lock);
+   xa_clear_tag(>m_perag_xa, pag->pag_agno, XFS_ICI_RECLAIM_TAG);
trace_xfs_perag_clear_reclaim(mp, pag->pag_agno, -1, _RET_IP_);
 }
 
@@ -847,12 +838,10 @@ void
 xfs_queue_eofblocks(
struct xfs_mount *mp)
 {
-   rcu_read_lock();
-   if (radix_tree_tagged(>m_perag_tree, XFS_ICI_EOFBLOCKS_TAG))
+   if (xa_tagged(>m_perag_xa, XFS_ICI_EOFBLOCKS_TAG))
queue_delayed_work(mp->m_eofblocks_workqueue,
   >m_eofblocks_work,
   msecs_to_jiffies(xfs_eofb_secs * 1000));
-   rcu_read_unlock();
 }
 
 void
@@ -874,12 +863,10 @@ void
 xfs_queue_cowblocks(
struct xfs_mount *mp)
 {
-   rcu_read_lock();
-   if (radix_tree_tagged(>m_perag_tree, XFS_ICI_COWBLOCKS_TAG))
+   if (xa_tagged(>m_perag_xa, XFS_ICI_COWBLOCKS_TAG))
queue_delayed_work(mp->m_eofblocks_workqueue,
   >m_cowblocks_work,
   msecs_to_jiffies(xfs_cowb_secs * 1000));
-   rcu_read_unlock();
 }
 
 void
@@ -1557,7 +1544,7 @@ __xfs_inode_set_blocks_tag(
void(*execute)(struct xfs_mount *mp),
void(*set_tp)(struct xfs_mount *mp, xfs_agnumber_t agno,
  int error, unsigned long caller_ip),
-   int tag)
+   xa_tag_ttag)
 {
struct xfs_mount *mp = ip->i_mount;
struct xfs_perag *pag;
@@ -1581,11 +1568,9 @@ __xfs_inode_set_blocks_tag(
   XFS_INO_TO_AGINO(ip->i_mount, ip->i_ino), tag);
if (!tagged) {
/* propagate the eofblocks tag up into the perag radix tree */
-   spin_lock(>i_mount->m_perag_lock);
-   radix_tree_tag_set(>i_mount->m_perag_tree,
+   xa_set_tag(>i_mount->m_perag_xa,

[PATCH v6 71/99] xfs: Convert pag_ici_root to XArray

2018-01-17 Thread Matthew Wilcox

From: Matthew Wilcox 

Rename pag_ici_root to pag_ici_xa and use XArray APIs instead of radix
tree APIs.  Shorter code, typechecking on tag numbers, better error
checking in xfs_reclaim_inode(), and eliminates a call to
radix_tree_preload().

Signed-off-by: Matthew Wilcox 
---
 fs/xfs/libxfs/xfs_sb.c |   2 +-
 fs/xfs/libxfs/xfs_sb.h |   2 +-
 fs/xfs/xfs_icache.c| 111 +++--
 fs/xfs/xfs_icache.h|   5 +--
 fs/xfs/xfs_inode.c |  24 ---
 fs/xfs/xfs_mount.c |   3 +-
 fs/xfs/xfs_mount.h |   3 +-
 7 files changed, 56 insertions(+), 94 deletions(-)

diff --git a/fs/xfs/libxfs/xfs_sb.c b/fs/xfs/libxfs/xfs_sb.c
index 3b0b65eb8224..8fb7c216c761 100644
--- a/fs/xfs/libxfs/xfs_sb.c
+++ b/fs/xfs/libxfs/xfs_sb.c
@@ -76,7 +76,7 @@ struct xfs_perag *
 xfs_perag_get_tag(
struct xfs_mount*mp,
xfs_agnumber_t  first,
-   int tag)
+   xa_tag_ttag)
 {
XA_STATE(xas, >m_perag_xa, first);
struct xfs_perag*pag;
diff --git a/fs/xfs/libxfs/xfs_sb.h b/fs/xfs/libxfs/xfs_sb.h
index 961e6475a309..d2de90b8f39c 100644
--- a/fs/xfs/libxfs/xfs_sb.h
+++ b/fs/xfs/libxfs/xfs_sb.h
@@ -23,7 +23,7 @@
  */
 extern struct xfs_perag *xfs_perag_get(struct xfs_mount *, xfs_agnumber_t);
 extern struct xfs_perag *xfs_perag_get_tag(struct xfs_mount *, xfs_agnumber_t,
-  int tag);
+  xa_tag_t tag);
 extern voidxfs_perag_put(struct xfs_perag *pag);
 extern int xfs_initialize_perag_data(struct xfs_mount *, xfs_agnumber_t);
 
diff --git a/fs/xfs/xfs_icache.c b/fs/xfs/xfs_icache.c
index 65a8b91b2e70..10c76209227b 100644
--- a/fs/xfs/xfs_icache.c
+++ b/fs/xfs/xfs_icache.c
@@ -186,7 +186,7 @@ xfs_perag_set_reclaim_tag(
 {
struct xfs_mount*mp = pag->pag_mount;
 
-   lockdep_assert_held(>pag_ici_lock);
+   lockdep_assert_held(>pag_ici_xa.xa_lock);
if (pag->pag_ici_reclaimable++)
return;
 
@@ -205,7 +205,7 @@ xfs_perag_clear_reclaim_tag(
 {
struct xfs_mount*mp = pag->pag_mount;
 
-   lockdep_assert_held(>pag_ici_lock);
+   lockdep_assert_held(>pag_ici_xa.xa_lock);
if (--pag->pag_ici_reclaimable)
return;
 
@@ -228,16 +228,16 @@ xfs_inode_set_reclaim_tag(
struct xfs_perag*pag;
 
pag = xfs_perag_get(mp, XFS_INO_TO_AGNO(mp, ip->i_ino));
-   spin_lock(>pag_ici_lock);
+   xa_lock(>pag_ici_xa);
spin_lock(>i_flags_lock);
 
-   radix_tree_tag_set(>pag_ici_root, XFS_INO_TO_AGINO(mp, ip->i_ino),
+   __xa_set_tag(>pag_ici_xa, XFS_INO_TO_AGINO(mp, ip->i_ino),
   XFS_ICI_RECLAIM_TAG);
xfs_perag_set_reclaim_tag(pag);
__xfs_iflags_set(ip, XFS_IRECLAIMABLE);
 
spin_unlock(>i_flags_lock);
-   spin_unlock(>pag_ici_lock);
+   xa_unlock(>pag_ici_xa);
xfs_perag_put(pag);
 }
 
@@ -246,7 +246,7 @@ xfs_inode_clear_reclaim_tag(
struct xfs_perag*pag,
xfs_ino_t   ino)
 {
-   radix_tree_tag_clear(>pag_ici_root,
+   __xa_clear_tag(>pag_ici_xa,
 XFS_INO_TO_AGINO(pag->pag_mount, ino),
 XFS_ICI_RECLAIM_TAG);
xfs_perag_clear_reclaim_tag(pag);
@@ -367,8 +367,8 @@ xfs_iget_cache_hit(
/*
 * We need to set XFS_IRECLAIM to prevent xfs_reclaim_inode
 * from stomping over us while we recycle the inode.  We can't
-* clear the radix tree reclaimable tag yet as it requires
-* pag_ici_lock to be held exclusive.
+* clear the xarray reclaimable tag yet as it requires
+* pag_ici_xa.xa_lock to be held exclusive.
 */
ip->i_flags |= XFS_IRECLAIM;
 
@@ -393,7 +393,7 @@ xfs_iget_cache_hit(
goto out_error;
}
 
-   spin_lock(>pag_ici_lock);
+   xa_lock(>pag_ici_xa);
spin_lock(>i_flags_lock);
 
/*
@@ -410,7 +410,7 @@ xfs_iget_cache_hit(
init_rwsem(>i_rwsem);
 
spin_unlock(>i_flags_lock);
-   spin_unlock(>pag_ici_lock);
+   xa_unlock(>pag_ici_xa);
} else {
/* If the VFS inode is being torn down, pause and try again. */
if (!igrab(inode)) {
@@ -471,17 +471,6 @@ xfs_iget_cache_miss(
goto out_destroy;
}
 
-   /*
-* Preload the radix tree so we can insert safely under the
-* write spinlock. Note that we cannot sleep inside the preload
-* region. Since we can be called from transaction context, don't
-* recurse into the file system.
-*/
-   if (radix_tree_preload(GFP_NOFS)) {
-   error = -EAGAIN;
-

[PATCH v6 71/99] xfs: Convert pag_ici_root to XArray

2018-01-17 Thread Matthew Wilcox

From: Matthew Wilcox 

Rename pag_ici_root to pag_ici_xa and use XArray APIs instead of radix
tree APIs.  Shorter code, typechecking on tag numbers, better error
checking in xfs_reclaim_inode(), and eliminates a call to
radix_tree_preload().

Signed-off-by: Matthew Wilcox 
---
 fs/xfs/libxfs/xfs_sb.c |   2 +-
 fs/xfs/libxfs/xfs_sb.h |   2 +-
 fs/xfs/xfs_icache.c| 111 +++--
 fs/xfs/xfs_icache.h|   5 +--
 fs/xfs/xfs_inode.c |  24 ---
 fs/xfs/xfs_mount.c |   3 +-
 fs/xfs/xfs_mount.h |   3 +-
 7 files changed, 56 insertions(+), 94 deletions(-)

diff --git a/fs/xfs/libxfs/xfs_sb.c b/fs/xfs/libxfs/xfs_sb.c
index 3b0b65eb8224..8fb7c216c761 100644
--- a/fs/xfs/libxfs/xfs_sb.c
+++ b/fs/xfs/libxfs/xfs_sb.c
@@ -76,7 +76,7 @@ struct xfs_perag *
 xfs_perag_get_tag(
struct xfs_mount*mp,
xfs_agnumber_t  first,
-   int tag)
+   xa_tag_ttag)
 {
XA_STATE(xas, >m_perag_xa, first);
struct xfs_perag*pag;
diff --git a/fs/xfs/libxfs/xfs_sb.h b/fs/xfs/libxfs/xfs_sb.h
index 961e6475a309..d2de90b8f39c 100644
--- a/fs/xfs/libxfs/xfs_sb.h
+++ b/fs/xfs/libxfs/xfs_sb.h
@@ -23,7 +23,7 @@
  */
 extern struct xfs_perag *xfs_perag_get(struct xfs_mount *, xfs_agnumber_t);
 extern struct xfs_perag *xfs_perag_get_tag(struct xfs_mount *, xfs_agnumber_t,
-  int tag);
+  xa_tag_t tag);
 extern voidxfs_perag_put(struct xfs_perag *pag);
 extern int xfs_initialize_perag_data(struct xfs_mount *, xfs_agnumber_t);
 
diff --git a/fs/xfs/xfs_icache.c b/fs/xfs/xfs_icache.c
index 65a8b91b2e70..10c76209227b 100644
--- a/fs/xfs/xfs_icache.c
+++ b/fs/xfs/xfs_icache.c
@@ -186,7 +186,7 @@ xfs_perag_set_reclaim_tag(
 {
struct xfs_mount*mp = pag->pag_mount;
 
-   lockdep_assert_held(>pag_ici_lock);
+   lockdep_assert_held(>pag_ici_xa.xa_lock);
if (pag->pag_ici_reclaimable++)
return;
 
@@ -205,7 +205,7 @@ xfs_perag_clear_reclaim_tag(
 {
struct xfs_mount*mp = pag->pag_mount;
 
-   lockdep_assert_held(>pag_ici_lock);
+   lockdep_assert_held(>pag_ici_xa.xa_lock);
if (--pag->pag_ici_reclaimable)
return;
 
@@ -228,16 +228,16 @@ xfs_inode_set_reclaim_tag(
struct xfs_perag*pag;
 
pag = xfs_perag_get(mp, XFS_INO_TO_AGNO(mp, ip->i_ino));
-   spin_lock(>pag_ici_lock);
+   xa_lock(>pag_ici_xa);
spin_lock(>i_flags_lock);
 
-   radix_tree_tag_set(>pag_ici_root, XFS_INO_TO_AGINO(mp, ip->i_ino),
+   __xa_set_tag(>pag_ici_xa, XFS_INO_TO_AGINO(mp, ip->i_ino),
   XFS_ICI_RECLAIM_TAG);
xfs_perag_set_reclaim_tag(pag);
__xfs_iflags_set(ip, XFS_IRECLAIMABLE);
 
spin_unlock(>i_flags_lock);
-   spin_unlock(>pag_ici_lock);
+   xa_unlock(>pag_ici_xa);
xfs_perag_put(pag);
 }
 
@@ -246,7 +246,7 @@ xfs_inode_clear_reclaim_tag(
struct xfs_perag*pag,
xfs_ino_t   ino)
 {
-   radix_tree_tag_clear(>pag_ici_root,
+   __xa_clear_tag(>pag_ici_xa,
 XFS_INO_TO_AGINO(pag->pag_mount, ino),
 XFS_ICI_RECLAIM_TAG);
xfs_perag_clear_reclaim_tag(pag);
@@ -367,8 +367,8 @@ xfs_iget_cache_hit(
/*
 * We need to set XFS_IRECLAIM to prevent xfs_reclaim_inode
 * from stomping over us while we recycle the inode.  We can't
-* clear the radix tree reclaimable tag yet as it requires
-* pag_ici_lock to be held exclusive.
+* clear the xarray reclaimable tag yet as it requires
+* pag_ici_xa.xa_lock to be held exclusive.
 */
ip->i_flags |= XFS_IRECLAIM;
 
@@ -393,7 +393,7 @@ xfs_iget_cache_hit(
goto out_error;
}
 
-   spin_lock(>pag_ici_lock);
+   xa_lock(>pag_ici_xa);
spin_lock(>i_flags_lock);
 
/*
@@ -410,7 +410,7 @@ xfs_iget_cache_hit(
init_rwsem(>i_rwsem);
 
spin_unlock(>i_flags_lock);
-   spin_unlock(>pag_ici_lock);
+   xa_unlock(>pag_ici_xa);
} else {
/* If the VFS inode is being torn down, pause and try again. */
if (!igrab(inode)) {
@@ -471,17 +471,6 @@ xfs_iget_cache_miss(
goto out_destroy;
}
 
-   /*
-* Preload the radix tree so we can insert safely under the
-* write spinlock. Note that we cannot sleep inside the preload
-* region. Since we can be called from transaction context, don't
-* recurse into the file system.
-*/
-   if (radix_tree_preload(GFP_NOFS)) {
-   error = -EAGAIN;
-   goto out_destroy;
-   }
-

[PATCH v6 74/99] usb: Convert xhci-mem to XArray

2018-01-17 Thread Matthew Wilcox

From: Matthew Wilcox 

The XArray API is a slightly better fit for xhci_insert_segment_mapping()
than the radix tree API was.

Signed-off-by: Matthew Wilcox 
---
 drivers/usb/host/xhci-mem.c | 68 +++--
 drivers/usb/host/xhci.h |  6 ++--
 2 files changed, 32 insertions(+), 42 deletions(-)

diff --git a/drivers/usb/host/xhci-mem.c b/drivers/usb/host/xhci-mem.c
index 3a29b32a3bd0..a2e15a9abc30 100644
--- a/drivers/usb/host/xhci-mem.c
+++ b/drivers/usb/host/xhci-mem.c
@@ -149,70 +149,60 @@ static void xhci_link_rings(struct xhci_hcd *xhci, struct 
xhci_ring *ring,
 }
 
 /*
- * We need a radix tree for mapping physical addresses of TRBs to which stream
- * ID they belong to.  We need to do this because the host controller won't 
tell
+ * We need to map physical addresses of TRBs to the stream ID they belong to.
+ * We need to do this because the host controller won't tell
  * us which stream ring the TRB came from.  We could store the stream ID in an
  * event data TRB, but that doesn't help us for the cancellation case, since 
the
  * endpoint may stop before it reaches that event data TRB.
  *
- * The radix tree maps the upper portion of the TRB DMA address to a ring
+ * The xarray maps the upper portion of the TRB DMA address to a ring
  * segment that has the same upper portion of DMA addresses.  For example, say 
I
  * have segments of size 1KB, that are always 1KB aligned.  A segment may
  * start at 0x10c91000 and end at 0x10c913f0.  If I use the upper 10 bits, the
- * key to the stream ID is 0x43244.  I can use the DMA address of the TRB to
- * pass the radix tree a key to get the right stream ID:
+ * index of the stream ID is 0x43244.  I can use the DMA address of the TRB as
+ * the xarray index to get the right stream ID:
  *
  * 0x10c90fff >> 10 = 0x43243
  * 0x10c912c0 >> 10 = 0x43244
  * 0x10c91400 >> 10 = 0x43245
  *
  * Obviously, only those TRBs with DMA addresses that are within the segment
- * will make the radix tree return the stream ID for that ring.
+ * will make the xarray return the stream ID for that ring.
  *
- * Caveats for the radix tree:
+ * Caveats for the xarray:
  *
- * The radix tree uses an unsigned long as a key pair.  On 32-bit systems, an
+ * The xarray uses an unsigned long for the index.  On 32-bit systems, an
  * unsigned long will be 32-bits; on a 64-bit system an unsigned long will be
  * 64-bits.  Since we only request 32-bit DMA addresses, we can use that as the
- * key on 32-bit or 64-bit systems (it would also be fine if we asked for 
64-bit
- * PCI DMA addresses on a 64-bit system).  There might be a problem on 32-bit
- * extended systems (where the DMA address can be bigger than 32-bits),
+ * index on 32-bit or 64-bit systems (it would also be fine if we asked for
+ * 64-bit PCI DMA addresses on a 64-bit system).  There might be a problem on
+ * 32-bit extended systems (where the DMA address can be bigger than 32-bits),
  * if we allow the PCI dma mask to be bigger than 32-bits.  So don't do that.
  */
-static int xhci_insert_segment_mapping(struct radix_tree_root *trb_address_map,
+
+static unsigned long trb_index(dma_addr_t dma)
+{
+   return (unsigned long)(dma >> TRB_SEGMENT_SHIFT);
+}
+
+static int xhci_insert_segment_mapping(struct xarray *trb_address_map,
struct xhci_ring *ring,
struct xhci_segment *seg,
-   gfp_t mem_flags)
+   gfp_t gfp)
 {
-   unsigned long key;
-   int ret;
-
-   key = (unsigned long)(seg->dma >> TRB_SEGMENT_SHIFT);
/* Skip any segments that were already added. */
-   if (radix_tree_lookup(trb_address_map, key))
-   return 0;
-
-   ret = radix_tree_maybe_preload(mem_flags);
-   if (ret)
-   return ret;
-   ret = radix_tree_insert(trb_address_map,
-   key, ring);
-   radix_tree_preload_end();
-   return ret;
+   return xa_err(xa_cmpxchg(trb_address_map, trb_index(seg->dma), NULL,
+   ring, gfp));
 }
 
-static void xhci_remove_segment_mapping(struct radix_tree_root 
*trb_address_map,
+static void xhci_remove_segment_mapping(struct xarray *trb_address_map,
struct xhci_segment *seg)
 {
-   unsigned long key;
-
-   key = (unsigned long)(seg->dma >> TRB_SEGMENT_SHIFT);
-   if (radix_tree_lookup(trb_address_map, key))
-   radix_tree_delete(trb_address_map, key);
+   xa_erase(trb_address_map, trb_index(seg->dma));
 }
 
 static int xhci_update_stream_segment_mapping(
-   struct radix_tree_root *trb_address_map,
+   struct xarray *trb_address_map,
struct xhci_ring *ring,
struct xhci_segment *first_seg,
struct xhci_segment *last_seg,
@@ -574,8 +564,8 @@ struct xhci_ring *xhci_dma_to_transfer_ring(
u64

[PATCH v6 74/99] usb: Convert xhci-mem to XArray

2018-01-17 Thread Matthew Wilcox

From: Matthew Wilcox 

The XArray API is a slightly better fit for xhci_insert_segment_mapping()
than the radix tree API was.

Signed-off-by: Matthew Wilcox 
---
 drivers/usb/host/xhci-mem.c | 68 +++--
 drivers/usb/host/xhci.h |  6 ++--
 2 files changed, 32 insertions(+), 42 deletions(-)

diff --git a/drivers/usb/host/xhci-mem.c b/drivers/usb/host/xhci-mem.c
index 3a29b32a3bd0..a2e15a9abc30 100644
--- a/drivers/usb/host/xhci-mem.c
+++ b/drivers/usb/host/xhci-mem.c
@@ -149,70 +149,60 @@ static void xhci_link_rings(struct xhci_hcd *xhci, struct 
xhci_ring *ring,
 }
 
 /*
- * We need a radix tree for mapping physical addresses of TRBs to which stream
- * ID they belong to.  We need to do this because the host controller won't 
tell
+ * We need to map physical addresses of TRBs to the stream ID they belong to.
+ * We need to do this because the host controller won't tell
  * us which stream ring the TRB came from.  We could store the stream ID in an
  * event data TRB, but that doesn't help us for the cancellation case, since 
the
  * endpoint may stop before it reaches that event data TRB.
  *
- * The radix tree maps the upper portion of the TRB DMA address to a ring
+ * The xarray maps the upper portion of the TRB DMA address to a ring
  * segment that has the same upper portion of DMA addresses.  For example, say 
I
  * have segments of size 1KB, that are always 1KB aligned.  A segment may
  * start at 0x10c91000 and end at 0x10c913f0.  If I use the upper 10 bits, the
- * key to the stream ID is 0x43244.  I can use the DMA address of the TRB to
- * pass the radix tree a key to get the right stream ID:
+ * index of the stream ID is 0x43244.  I can use the DMA address of the TRB as
+ * the xarray index to get the right stream ID:
  *
  * 0x10c90fff >> 10 = 0x43243
  * 0x10c912c0 >> 10 = 0x43244
  * 0x10c91400 >> 10 = 0x43245
  *
  * Obviously, only those TRBs with DMA addresses that are within the segment
- * will make the radix tree return the stream ID for that ring.
+ * will make the xarray return the stream ID for that ring.
  *
- * Caveats for the radix tree:
+ * Caveats for the xarray:
  *
- * The radix tree uses an unsigned long as a key pair.  On 32-bit systems, an
+ * The xarray uses an unsigned long for the index.  On 32-bit systems, an
  * unsigned long will be 32-bits; on a 64-bit system an unsigned long will be
  * 64-bits.  Since we only request 32-bit DMA addresses, we can use that as the
- * key on 32-bit or 64-bit systems (it would also be fine if we asked for 
64-bit
- * PCI DMA addresses on a 64-bit system).  There might be a problem on 32-bit
- * extended systems (where the DMA address can be bigger than 32-bits),
+ * index on 32-bit or 64-bit systems (it would also be fine if we asked for
+ * 64-bit PCI DMA addresses on a 64-bit system).  There might be a problem on
+ * 32-bit extended systems (where the DMA address can be bigger than 32-bits),
  * if we allow the PCI dma mask to be bigger than 32-bits.  So don't do that.
  */
-static int xhci_insert_segment_mapping(struct radix_tree_root *trb_address_map,
+
+static unsigned long trb_index(dma_addr_t dma)
+{
+   return (unsigned long)(dma >> TRB_SEGMENT_SHIFT);
+}
+
+static int xhci_insert_segment_mapping(struct xarray *trb_address_map,
struct xhci_ring *ring,
struct xhci_segment *seg,
-   gfp_t mem_flags)
+   gfp_t gfp)
 {
-   unsigned long key;
-   int ret;
-
-   key = (unsigned long)(seg->dma >> TRB_SEGMENT_SHIFT);
/* Skip any segments that were already added. */
-   if (radix_tree_lookup(trb_address_map, key))
-   return 0;
-
-   ret = radix_tree_maybe_preload(mem_flags);
-   if (ret)
-   return ret;
-   ret = radix_tree_insert(trb_address_map,
-   key, ring);
-   radix_tree_preload_end();
-   return ret;
+   return xa_err(xa_cmpxchg(trb_address_map, trb_index(seg->dma), NULL,
+   ring, gfp));
 }
 
-static void xhci_remove_segment_mapping(struct radix_tree_root 
*trb_address_map,
+static void xhci_remove_segment_mapping(struct xarray *trb_address_map,
struct xhci_segment *seg)
 {
-   unsigned long key;
-
-   key = (unsigned long)(seg->dma >> TRB_SEGMENT_SHIFT);
-   if (radix_tree_lookup(trb_address_map, key))
-   radix_tree_delete(trb_address_map, key);
+   xa_erase(trb_address_map, trb_index(seg->dma));
 }
 
 static int xhci_update_stream_segment_mapping(
-   struct radix_tree_root *trb_address_map,
+   struct xarray *trb_address_map,
struct xhci_ring *ring,
struct xhci_segment *first_seg,
struct xhci_segment *last_seg,
@@ -574,8 +564,8 @@ struct xhci_ring *xhci_dma_to_transfer_ring(
u64 address)
 {
if (ep->ep_state &

[PATCH] arm64: dts: hikey: Enable HS200 mode on eMMC

2018-01-17 Thread oscardagrach

According to the hi6220 datasheet, the MMC controller is JEDEC eMMC 4.5
compliant, in addition to supporting a clock of up to 150MHz. The Hikey
schematic also indicates the device utilizes 1.8v signaling. Define these
parameters in the device tree to enable HS200 mode.

Signed-off-by: Ryan Grachek 
---
 arch/arm64/boot/dts/hisilicon/hi6220-hikey.dts | 2 ++
 1 file changed, 2 insertions(+)

diff --git a/arch/arm64/boot/dts/hisilicon/hi6220-hikey.dts 
b/arch/arm64/boot/dts/hisilicon/hi6220-hikey.dts
index 3aee6123d161..964e43e05ac6 100644
--- a/arch/arm64/boot/dts/hisilicon/hi6220-hikey.dts
+++ b/arch/arm64/boot/dts/hisilicon/hi6220-hikey.dts
@@ -299,7 +299,9 @@
/* GPIO blocks 16 thru 19 do not appear to be routed to pins */
 
dwmmc_0: dwmmc0@f723d000 {
+   max-frequency = <15000>;
cap-mmc-highspeed;
+   mmc-hs200-1_8v;
non-removable;
bus-width = <0x8>;
vmmc-supply = <>;
-- 
2.11.0

[PATCH] arm64: dts: hikey: Enable HS200 mode on eMMC

2018-01-17 Thread oscardagrach

According to the hi6220 datasheet, the MMC controller is JEDEC eMMC 4.5
compliant, in addition to supporting a clock of up to 150MHz. The Hikey
schematic also indicates the device utilizes 1.8v signaling. Define these
parameters in the device tree to enable HS200 mode.

Signed-off-by: Ryan Grachek 
---
 arch/arm64/boot/dts/hisilicon/hi6220-hikey.dts | 2 ++
 1 file changed, 2 insertions(+)

diff --git a/arch/arm64/boot/dts/hisilicon/hi6220-hikey.dts 
b/arch/arm64/boot/dts/hisilicon/hi6220-hikey.dts
index 3aee6123d161..964e43e05ac6 100644
--- a/arch/arm64/boot/dts/hisilicon/hi6220-hikey.dts
+++ b/arch/arm64/boot/dts/hisilicon/hi6220-hikey.dts
@@ -299,7 +299,9 @@
/* GPIO blocks 16 thru 19 do not appear to be routed to pins */
 
dwmmc_0: dwmmc0@f723d000 {
+   max-frequency = <15000>;
cap-mmc-highspeed;
+   mmc-hs200-1_8v;
non-removable;
bus-width = <0x8>;
vmmc-supply = <>;
-- 
2.11.0

Re: [REGRESSION][v4.14.y][v4.15] x86/intel_rdt/cqm: Improve limbo list processing

2018-01-17 Thread Joseph Salisbury

On 01/16/2018 01:59 PM, Thomas Gleixner wrote:
> On Tue, 16 Jan 2018, Yu, Fenghua wrote:
>>> From: Thomas Gleixner [mailto:t...@linutronix.de]
>> Is this a Haswell specific issue?
>>
>> I run the following test forever without issue on Broadwell and 4.15.0-rc6 
>> with rdt mounted:
>> for ((;;)) do
>> for ((i=1;i<88;i++)) do
>> echo 0 >/sys/devices/system/cpu/cpu$i/online
>> done
>> echo "online cpus:"
>> grep processor /proc/cpuinfo |wc
>> for ((i=1;i<88;i++)) do
>> echo 1 >/sys/devices/system/cpu/cpu$i/online
>> done
>> echo "online cpus:"
>> grep processor /proc/cpuinfo|wc
>> done
>>
>> I'm finding a Haswell to reproduce the issue.
> Come on. This is crystal clear from the KASAN trace. And the fix is simple 
> enough.
>
> You simply do not run into it because on your machine
>
> is_llc_occupancy_enabled() is false...
>
> Thanks,
>
>   tglx
>   
> 8<
>
> diff --git a/arch/x86/kernel/cpu/intel_rdt.c b/arch/x86/kernel/cpu/intel_rdt.c
> index 88dcf8479013..99442370de40 100644
> --- a/arch/x86/kernel/cpu/intel_rdt.c
> +++ b/arch/x86/kernel/cpu/intel_rdt.c
> @@ -525,10 +525,6 @@ static void domain_remove_cpu(int cpu, struct 
> rdt_resource *r)
>*/
>   if (static_branch_unlikely(_mon_enable_key))
>   rmdir_mondata_subdir_allrdtgrp(r, d->id);
> - kfree(d->ctrl_val);
> - kfree(d->rmid_busy_llc);
> - kfree(d->mbm_total);
> - kfree(d->mbm_local);
>   list_del(>list);
>   if (is_mbm_enabled())
>   cancel_delayed_work(>mbm_over);
> @@ -545,6 +541,10 @@ static void domain_remove_cpu(int cpu, struct 
> rdt_resource *r)
>   cancel_delayed_work(>cqm_limbo);
>   }
>  
> + kfree(d->ctrl_val);
> + kfree(d->rmid_busy_llc);
> + kfree(d->mbm_total);
> + kfree(d->mbm_local);
>   kfree(d);
>   return;
>   }

Thanks, Thomas.  I'll build some test kernels and have your patch tested
out.


Thanks,


Joe

Re: [REGRESSION][v4.14.y][v4.15] x86/intel_rdt/cqm: Improve limbo list processing

2018-01-17 Thread Joseph Salisbury

On 01/16/2018 01:59 PM, Thomas Gleixner wrote:
> On Tue, 16 Jan 2018, Yu, Fenghua wrote:
>>> From: Thomas Gleixner [mailto:t...@linutronix.de]
>> Is this a Haswell specific issue?
>>
>> I run the following test forever without issue on Broadwell and 4.15.0-rc6 
>> with rdt mounted:
>> for ((;;)) do
>> for ((i=1;i<88;i++)) do
>> echo 0 >/sys/devices/system/cpu/cpu$i/online
>> done
>> echo "online cpus:"
>> grep processor /proc/cpuinfo |wc
>> for ((i=1;i<88;i++)) do
>> echo 1 >/sys/devices/system/cpu/cpu$i/online
>> done
>> echo "online cpus:"
>> grep processor /proc/cpuinfo|wc
>> done
>>
>> I'm finding a Haswell to reproduce the issue.
> Come on. This is crystal clear from the KASAN trace. And the fix is simple 
> enough.
>
> You simply do not run into it because on your machine
>
> is_llc_occupancy_enabled() is false...
>
> Thanks,
>
>   tglx
>   
> 8<
>
> diff --git a/arch/x86/kernel/cpu/intel_rdt.c b/arch/x86/kernel/cpu/intel_rdt.c
> index 88dcf8479013..99442370de40 100644
> --- a/arch/x86/kernel/cpu/intel_rdt.c
> +++ b/arch/x86/kernel/cpu/intel_rdt.c
> @@ -525,10 +525,6 @@ static void domain_remove_cpu(int cpu, struct 
> rdt_resource *r)
>*/
>   if (static_branch_unlikely(_mon_enable_key))
>   rmdir_mondata_subdir_allrdtgrp(r, d->id);
> - kfree(d->ctrl_val);
> - kfree(d->rmid_busy_llc);
> - kfree(d->mbm_total);
> - kfree(d->mbm_local);
>   list_del(>list);
>   if (is_mbm_enabled())
>   cancel_delayed_work(>mbm_over);
> @@ -545,6 +541,10 @@ static void domain_remove_cpu(int cpu, struct 
> rdt_resource *r)
>   cancel_delayed_work(>cqm_limbo);
>   }
>  
> + kfree(d->ctrl_val);
> + kfree(d->rmid_busy_llc);
> + kfree(d->mbm_total);
> + kfree(d->mbm_local);
>   kfree(d);
>   return;
>   }

Thanks, Thomas.  I'll build some test kernels and have your patch tested
out.


Thanks,


Joe

[PATCH v6 72/99] xfs: Convert xfs dquot to XArray

2018-01-17 Thread Matthew Wilcox

From: Matthew Wilcox 

This is a pretty straight-forward conversion.

Signed-off-by: Matthew Wilcox 
---
 fs/xfs/xfs_dquot.c | 38 +-
 fs/xfs/xfs_qm.c| 36 ++--
 fs/xfs/xfs_qm.h| 18 +-
 3 files changed, 48 insertions(+), 44 deletions(-)

diff --git a/fs/xfs/xfs_dquot.c b/fs/xfs/xfs_dquot.c
index e2a466df5dd1..c6832db23ca8 100644
--- a/fs/xfs/xfs_dquot.c
+++ b/fs/xfs/xfs_dquot.c
@@ -44,7 +44,7 @@
  * Lock order:
  *
  * ip->i_lock
- *   qi->qi_tree_lock
+ *   qi->qi_xa_lock
  * dquot->q_qlock (xfs_dqlock() and friends)
  *   dquot->q_flush (xfs_dqflock() and friends)
  *   qi->qi_lru_lock
@@ -752,8 +752,8 @@ xfs_qm_dqget(
xfs_dquot_t **O_dqpp) /* OUT : locked incore dquot */
 {
struct xfs_quotainfo*qi = mp->m_quotainfo;
-   struct radix_tree_root *tree = xfs_dquot_tree(qi, type);
-   struct xfs_dquot*dqp;
+   struct xarray   *xa = xfs_dquot_xa(qi, type);
+   struct xfs_dquot*dqp, *duplicate;
int error;
 
ASSERT(XFS_IS_QUOTA_RUNNING(mp));
@@ -772,23 +772,24 @@ xfs_qm_dqget(
}
 
 restart:
-   mutex_lock(>qi_tree_lock);
-   dqp = radix_tree_lookup(tree, id);
+   mutex_lock(>qi_xa_lock);
+   dqp = xa_load(xa, id);
+found:
if (dqp) {
xfs_dqlock(dqp);
if (dqp->dq_flags & XFS_DQ_FREEING) {
xfs_dqunlock(dqp);
-   mutex_unlock(>qi_tree_lock);
+   mutex_unlock(>qi_xa_lock);
trace_xfs_dqget_freeing(dqp);
delay(1);
goto restart;
}
 
-   /* uninit / unused quota found in radix tree, keep looking  */
+   /* uninit / unused quota found, keep looking  */
if (flags & XFS_QMOPT_DQNEXT) {
if (XFS_IS_DQUOT_UNINITIALIZED(dqp)) {
xfs_dqunlock(dqp);
-   mutex_unlock(>qi_tree_lock);
+   mutex_unlock(>qi_xa_lock);
error = xfs_dq_get_next_id(mp, type, );
if (error)
return error;
@@ -797,14 +798,14 @@ xfs_qm_dqget(
}
 
dqp->q_nrefs++;
-   mutex_unlock(>qi_tree_lock);
+   mutex_unlock(>qi_xa_lock);
 
trace_xfs_dqget_hit(dqp);
XFS_STATS_INC(mp, xs_qm_dqcachehits);
*O_dqpp = dqp;
return 0;
}
-   mutex_unlock(>qi_tree_lock);
+   mutex_unlock(>qi_xa_lock);
XFS_STATS_INC(mp, xs_qm_dqcachemisses);
 
/*
@@ -854,20 +855,23 @@ xfs_qm_dqget(
}
}
 
-   mutex_lock(>qi_tree_lock);
-   error = radix_tree_insert(tree, id, dqp);
-   if (unlikely(error)) {
-   WARN_ON(error != -EEXIST);
+   mutex_lock(>qi_xa_lock);
+   duplicate = xa_cmpxchg(xa, id, NULL, dqp, GFP_NOFS);
+   if (unlikely(duplicate)) {
+   if (xa_is_err(duplicate)) {
+   mutex_unlock(>qi_xa_lock);
+   return xa_err(duplicate);
+   }
 
/*
 * Duplicate found. Just throw away the new dquot and start
 * over.
 */
-   mutex_unlock(>qi_tree_lock);
trace_xfs_dqget_dup(dqp);
xfs_qm_dqdestroy(dqp);
XFS_STATS_INC(mp, xs_qm_dquot_dups);
-   goto restart;
+   dqp = duplicate;
+   goto found;
}
 
/*
@@ -877,7 +881,7 @@ xfs_qm_dqget(
dqp->q_nrefs = 1;
 
qi->qi_dquots++;
-   mutex_unlock(>qi_tree_lock);
+   mutex_unlock(>qi_xa_lock);
 
/* If we are asked to find next active id, keep looking */
if (flags & XFS_QMOPT_DQNEXT) {
diff --git a/fs/xfs/xfs_qm.c b/fs/xfs/xfs_qm.c
index b897b11afb2c..000b207762d6 100644
--- a/fs/xfs/xfs_qm.c
+++ b/fs/xfs/xfs_qm.c
@@ -67,7 +67,7 @@ xfs_qm_dquot_walk(
void*data)
 {
struct xfs_quotainfo*qi = mp->m_quotainfo;
-   struct radix_tree_root  *tree = xfs_dquot_tree(qi, type);
+   struct xarray   *xa = xfs_dquot_xa(qi, type);
uint32_tnext_index;
int last_error = 0;
int skipped;
@@ -83,11 +83,11 @@ xfs_qm_dquot_walk(
int error = 0;
int i;
 
-   mutex_lock(>qi_tree_lock);
-   nr_found = radix_tree_gang_lookup(tree, (void **)batch,
-   next_index, XFS_DQ_LOOKUP_BATCH);
+   mutex_lock(>qi_xa_lock);
+   nr_found =

[PATCH v6 72/99] xfs: Convert xfs dquot to XArray

2018-01-17 Thread Matthew Wilcox

From: Matthew Wilcox 

This is a pretty straight-forward conversion.

Signed-off-by: Matthew Wilcox 
---
 fs/xfs/xfs_dquot.c | 38 +-
 fs/xfs/xfs_qm.c| 36 ++--
 fs/xfs/xfs_qm.h| 18 +-
 3 files changed, 48 insertions(+), 44 deletions(-)

diff --git a/fs/xfs/xfs_dquot.c b/fs/xfs/xfs_dquot.c
index e2a466df5dd1..c6832db23ca8 100644
--- a/fs/xfs/xfs_dquot.c
+++ b/fs/xfs/xfs_dquot.c
@@ -44,7 +44,7 @@
  * Lock order:
  *
  * ip->i_lock
- *   qi->qi_tree_lock
+ *   qi->qi_xa_lock
  * dquot->q_qlock (xfs_dqlock() and friends)
  *   dquot->q_flush (xfs_dqflock() and friends)
  *   qi->qi_lru_lock
@@ -752,8 +752,8 @@ xfs_qm_dqget(
xfs_dquot_t **O_dqpp) /* OUT : locked incore dquot */
 {
struct xfs_quotainfo*qi = mp->m_quotainfo;
-   struct radix_tree_root *tree = xfs_dquot_tree(qi, type);
-   struct xfs_dquot*dqp;
+   struct xarray   *xa = xfs_dquot_xa(qi, type);
+   struct xfs_dquot*dqp, *duplicate;
int error;
 
ASSERT(XFS_IS_QUOTA_RUNNING(mp));
@@ -772,23 +772,24 @@ xfs_qm_dqget(
}
 
 restart:
-   mutex_lock(>qi_tree_lock);
-   dqp = radix_tree_lookup(tree, id);
+   mutex_lock(>qi_xa_lock);
+   dqp = xa_load(xa, id);
+found:
if (dqp) {
xfs_dqlock(dqp);
if (dqp->dq_flags & XFS_DQ_FREEING) {
xfs_dqunlock(dqp);
-   mutex_unlock(>qi_tree_lock);
+   mutex_unlock(>qi_xa_lock);
trace_xfs_dqget_freeing(dqp);
delay(1);
goto restart;
}
 
-   /* uninit / unused quota found in radix tree, keep looking  */
+   /* uninit / unused quota found, keep looking  */
if (flags & XFS_QMOPT_DQNEXT) {
if (XFS_IS_DQUOT_UNINITIALIZED(dqp)) {
xfs_dqunlock(dqp);
-   mutex_unlock(>qi_tree_lock);
+   mutex_unlock(>qi_xa_lock);
error = xfs_dq_get_next_id(mp, type, );
if (error)
return error;
@@ -797,14 +798,14 @@ xfs_qm_dqget(
}
 
dqp->q_nrefs++;
-   mutex_unlock(>qi_tree_lock);
+   mutex_unlock(>qi_xa_lock);
 
trace_xfs_dqget_hit(dqp);
XFS_STATS_INC(mp, xs_qm_dqcachehits);
*O_dqpp = dqp;
return 0;
}
-   mutex_unlock(>qi_tree_lock);
+   mutex_unlock(>qi_xa_lock);
XFS_STATS_INC(mp, xs_qm_dqcachemisses);
 
/*
@@ -854,20 +855,23 @@ xfs_qm_dqget(
}
}
 
-   mutex_lock(>qi_tree_lock);
-   error = radix_tree_insert(tree, id, dqp);
-   if (unlikely(error)) {
-   WARN_ON(error != -EEXIST);
+   mutex_lock(>qi_xa_lock);
+   duplicate = xa_cmpxchg(xa, id, NULL, dqp, GFP_NOFS);
+   if (unlikely(duplicate)) {
+   if (xa_is_err(duplicate)) {
+   mutex_unlock(>qi_xa_lock);
+   return xa_err(duplicate);
+   }
 
/*
 * Duplicate found. Just throw away the new dquot and start
 * over.
 */
-   mutex_unlock(>qi_tree_lock);
trace_xfs_dqget_dup(dqp);
xfs_qm_dqdestroy(dqp);
XFS_STATS_INC(mp, xs_qm_dquot_dups);
-   goto restart;
+   dqp = duplicate;
+   goto found;
}
 
/*
@@ -877,7 +881,7 @@ xfs_qm_dqget(
dqp->q_nrefs = 1;
 
qi->qi_dquots++;
-   mutex_unlock(>qi_tree_lock);
+   mutex_unlock(>qi_xa_lock);
 
/* If we are asked to find next active id, keep looking */
if (flags & XFS_QMOPT_DQNEXT) {
diff --git a/fs/xfs/xfs_qm.c b/fs/xfs/xfs_qm.c
index b897b11afb2c..000b207762d6 100644
--- a/fs/xfs/xfs_qm.c
+++ b/fs/xfs/xfs_qm.c
@@ -67,7 +67,7 @@ xfs_qm_dquot_walk(
void*data)
 {
struct xfs_quotainfo*qi = mp->m_quotainfo;
-   struct radix_tree_root  *tree = xfs_dquot_tree(qi, type);
+   struct xarray   *xa = xfs_dquot_xa(qi, type);
uint32_tnext_index;
int last_error = 0;
int skipped;
@@ -83,11 +83,11 @@ xfs_qm_dquot_walk(
int error = 0;
int i;
 
-   mutex_lock(>qi_tree_lock);
-   nr_found = radix_tree_gang_lookup(tree, (void **)batch,
-   next_index, XFS_DQ_LOOKUP_BATCH);
+   mutex_lock(>qi_xa_lock);
+   nr_found = xa_extract(xa, (void **)batch, next_index,
+

Re: [GIT PULL] x86 fixes

2018-01-17 Thread Linus Torvalds

On Wed, Jan 17, 2018 at 7:41 AM, Ingo Molnar  wrote:
>
>  - A kdump fix
>
>   out-of-topic modifications in x86-urgent-for-linus:
>   -
>   include/linux/crash_core.h # 9f15b9120f56: kdump: Write the correct 
> add
>   kernel/crash_core.c# 9f15b9120f56: kdump: Write the correct 
> add

This came through Andrew too. It all merged fine since there were no
other modifications, but it's a bit odd how this was in the x86 tree,
and even if that part makes sense it's a sign of lack of communication
at some point.

Oh well. Not a big deal. I just thought I'd mention it.

   Linus

Re: [GIT PULL] x86 fixes

2018-01-17 Thread Linus Torvalds

On Wed, Jan 17, 2018 at 7:41 AM, Ingo Molnar  wrote:
>
>  - A kdump fix
>
>   out-of-topic modifications in x86-urgent-for-linus:
>   -
>   include/linux/crash_core.h # 9f15b9120f56: kdump: Write the correct 
> add
>   kernel/crash_core.c# 9f15b9120f56: kdump: Write the correct 
> add

This came through Andrew too. It all merged fine since there were no
other modifications, but it's a bit odd how this was in the x86 tree,
and even if that part makes sense it's a sign of lack of communication
at some point.

Oh well. Not a big deal. I just thought I'd mention it.

   Linus

[PATCH v6 75/99] md: Convert raid5-cache to XArray

2018-01-17 Thread Matthew Wilcox

From: Matthew Wilcox 

This is the first user of the radix tree I've converted which was
storing numbers rather than pointers.  I'm fairly pleased with how
well it came out.  There's less boiler-plate involved than there was
with the radix tree, so that's a win.  It does use the advanced API,
and I think that's a signal that there needs to be a separate API for
using the XArray for only integers.

Signed-off-by: Matthew Wilcox 
---
 drivers/md/raid5-cache.c | 119 ---
 1 file changed, 40 insertions(+), 79 deletions(-)

diff --git a/drivers/md/raid5-cache.c b/drivers/md/raid5-cache.c
index 39f31f07ffe9..2c8ad0ed9b48 100644
--- a/drivers/md/raid5-cache.c
+++ b/drivers/md/raid5-cache.c
@@ -158,9 +158,8 @@ struct r5l_log {
/* to disable write back during in degraded mode */
struct work_struct disable_writeback_work;
 
-   /* to for chunk_aligned_read in writeback mode, details below */
-   spinlock_t tree_lock;
-   struct radix_tree_root big_stripe_tree;
+   /* for chunk_aligned_read in writeback mode, details below */
+   struct xarray big_stripe;
 };
 
 /*
@@ -170,9 +169,8 @@ struct r5l_log {
  * chunk contains 64 4kB-page, so this chunk contain 64 stripes). For
  * chunk_aligned_read, these stripes are grouped into one "big_stripe".
  * For each big_stripe, we count how many stripes of this big_stripe
- * are in the write back cache. These data are tracked in a radix tree
- * (big_stripe_tree). We use radix_tree item pointer as the counter.
- * r5c_tree_index() is used to calculate keys for the radix tree.
+ * are in the write back cache. This counter is tracked in an xarray
+ * (big_stripe). r5c_index() is used to calculate the index.
  *
  * chunk_aligned_read() calls r5c_big_stripe_cached() to look up
  * big_stripe of each chunk in the tree. If this big_stripe is in the
@@ -180,9 +178,9 @@ struct r5l_log {
  * rcu_read_lock().
  *
  * It is necessary to remember whether a stripe is counted in
- * big_stripe_tree. Instead of adding new flag, we reuses existing flags:
+ * big_stripe. Instead of adding new flag, we reuses existing flags:
  * STRIPE_R5C_PARTIAL_STRIPE and STRIPE_R5C_FULL_STRIPE. If either of these
- * two flags are set, the stripe is counted in big_stripe_tree. This
+ * two flags are set, the stripe is counted in big_stripe. This
  * requires moving set_bit(STRIPE_R5C_PARTIAL_STRIPE) to
  * r5c_try_caching_write(); and moving clear_bit of
  * STRIPE_R5C_PARTIAL_STRIPE and STRIPE_R5C_FULL_STRIPE to
@@ -190,23 +188,13 @@ struct r5l_log {
  */
 
 /*
- * radix tree requests lowest 2 bits of data pointer to be 2b'00.
- * So it is necessary to left shift the counter by 2 bits before using it
- * as data pointer of the tree.
- */
-#define R5C_RADIX_COUNT_SHIFT 2
-
-/*
- * calculate key for big_stripe_tree
+ * calculate key for big_stripe
  *
  * sect: align_bi->bi_iter.bi_sector or sh->sector
  */
-static inline sector_t r5c_tree_index(struct r5conf *conf,
- sector_t sect)
+static inline sector_t r5c_index(struct r5conf *conf, sector_t sect)
 {
-   sector_t offset;
-
-   offset = sector_div(sect, conf->chunk_sectors);
+   sector_div(sect, conf->chunk_sectors);
return sect;
 }
 
@@ -2646,10 +2634,6 @@ int r5c_try_caching_write(struct r5conf *conf,
int i;
struct r5dev *dev;
int to_cache = 0;
-   void **pslot;
-   sector_t tree_index;
-   int ret;
-   uintptr_t refcount;
 
BUG_ON(!r5c_is_writeback(log));
 
@@ -2697,39 +2681,29 @@ int r5c_try_caching_write(struct r5conf *conf,
}
}
 
-   /* if the stripe is not counted in big_stripe_tree, add it now */
+   /* if the stripe is not counted in big_stripe, add it now */
if (!test_bit(STRIPE_R5C_PARTIAL_STRIPE, >state) &&
!test_bit(STRIPE_R5C_FULL_STRIPE, >state)) {
-   tree_index = r5c_tree_index(conf, sh->sector);
-   spin_lock(>tree_lock);
-   pslot = radix_tree_lookup_slot(>big_stripe_tree,
-  tree_index);
-   if (pslot) {
-   refcount = (uintptr_t)radix_tree_deref_slot_protected(
-   pslot, >tree_lock) >>
-   R5C_RADIX_COUNT_SHIFT;
-   radix_tree_replace_slot(
-   >big_stripe_tree, pslot,
-   (void *)((refcount + 1) << 
R5C_RADIX_COUNT_SHIFT));
-   } else {
-   /*
-* this radix_tree_insert can fail safely, so no
-* need to call radix_tree_preload()
-*/
-   ret = radix_tree_insert(
-   >big_stripe_tree, tree_index,
-   (void *)(1 << R5C_RADIX_COUNT_SHIFT));
-

[PATCH v6 75/99] md: Convert raid5-cache to XArray

2018-01-17 Thread Matthew Wilcox

From: Matthew Wilcox 

This is the first user of the radix tree I've converted which was
storing numbers rather than pointers.  I'm fairly pleased with how
well it came out.  There's less boiler-plate involved than there was
with the radix tree, so that's a win.  It does use the advanced API,
and I think that's a signal that there needs to be a separate API for
using the XArray for only integers.

Signed-off-by: Matthew Wilcox 
---
 drivers/md/raid5-cache.c | 119 ---
 1 file changed, 40 insertions(+), 79 deletions(-)

diff --git a/drivers/md/raid5-cache.c b/drivers/md/raid5-cache.c
index 39f31f07ffe9..2c8ad0ed9b48 100644
--- a/drivers/md/raid5-cache.c
+++ b/drivers/md/raid5-cache.c
@@ -158,9 +158,8 @@ struct r5l_log {
/* to disable write back during in degraded mode */
struct work_struct disable_writeback_work;
 
-   /* to for chunk_aligned_read in writeback mode, details below */
-   spinlock_t tree_lock;
-   struct radix_tree_root big_stripe_tree;
+   /* for chunk_aligned_read in writeback mode, details below */
+   struct xarray big_stripe;
 };
 
 /*
@@ -170,9 +169,8 @@ struct r5l_log {
  * chunk contains 64 4kB-page, so this chunk contain 64 stripes). For
  * chunk_aligned_read, these stripes are grouped into one "big_stripe".
  * For each big_stripe, we count how many stripes of this big_stripe
- * are in the write back cache. These data are tracked in a radix tree
- * (big_stripe_tree). We use radix_tree item pointer as the counter.
- * r5c_tree_index() is used to calculate keys for the radix tree.
+ * are in the write back cache. This counter is tracked in an xarray
+ * (big_stripe). r5c_index() is used to calculate the index.
  *
  * chunk_aligned_read() calls r5c_big_stripe_cached() to look up
  * big_stripe of each chunk in the tree. If this big_stripe is in the
@@ -180,9 +178,9 @@ struct r5l_log {
  * rcu_read_lock().
  *
  * It is necessary to remember whether a stripe is counted in
- * big_stripe_tree. Instead of adding new flag, we reuses existing flags:
+ * big_stripe. Instead of adding new flag, we reuses existing flags:
  * STRIPE_R5C_PARTIAL_STRIPE and STRIPE_R5C_FULL_STRIPE. If either of these
- * two flags are set, the stripe is counted in big_stripe_tree. This
+ * two flags are set, the stripe is counted in big_stripe. This
  * requires moving set_bit(STRIPE_R5C_PARTIAL_STRIPE) to
  * r5c_try_caching_write(); and moving clear_bit of
  * STRIPE_R5C_PARTIAL_STRIPE and STRIPE_R5C_FULL_STRIPE to
@@ -190,23 +188,13 @@ struct r5l_log {
  */
 
 /*
- * radix tree requests lowest 2 bits of data pointer to be 2b'00.
- * So it is necessary to left shift the counter by 2 bits before using it
- * as data pointer of the tree.
- */
-#define R5C_RADIX_COUNT_SHIFT 2
-
-/*
- * calculate key for big_stripe_tree
+ * calculate key for big_stripe
  *
  * sect: align_bi->bi_iter.bi_sector or sh->sector
  */
-static inline sector_t r5c_tree_index(struct r5conf *conf,
- sector_t sect)
+static inline sector_t r5c_index(struct r5conf *conf, sector_t sect)
 {
-   sector_t offset;
-
-   offset = sector_div(sect, conf->chunk_sectors);
+   sector_div(sect, conf->chunk_sectors);
return sect;
 }
 
@@ -2646,10 +2634,6 @@ int r5c_try_caching_write(struct r5conf *conf,
int i;
struct r5dev *dev;
int to_cache = 0;
-   void **pslot;
-   sector_t tree_index;
-   int ret;
-   uintptr_t refcount;
 
BUG_ON(!r5c_is_writeback(log));
 
@@ -2697,39 +2681,29 @@ int r5c_try_caching_write(struct r5conf *conf,
}
}
 
-   /* if the stripe is not counted in big_stripe_tree, add it now */
+   /* if the stripe is not counted in big_stripe, add it now */
if (!test_bit(STRIPE_R5C_PARTIAL_STRIPE, >state) &&
!test_bit(STRIPE_R5C_FULL_STRIPE, >state)) {
-   tree_index = r5c_tree_index(conf, sh->sector);
-   spin_lock(>tree_lock);
-   pslot = radix_tree_lookup_slot(>big_stripe_tree,
-  tree_index);
-   if (pslot) {
-   refcount = (uintptr_t)radix_tree_deref_slot_protected(
-   pslot, >tree_lock) >>
-   R5C_RADIX_COUNT_SHIFT;
-   radix_tree_replace_slot(
-   >big_stripe_tree, pslot,
-   (void *)((refcount + 1) << 
R5C_RADIX_COUNT_SHIFT));
-   } else {
-   /*
-* this radix_tree_insert can fail safely, so no
-* need to call radix_tree_preload()
-*/
-   ret = radix_tree_insert(
-   >big_stripe_tree, tree_index,
-   (void *)(1 << R5C_RADIX_COUNT_SHIFT));
-   if (ret) {
-

[PATCH v6 73/99] xfs: Convert mru cache to XArray

2018-01-17 Thread Matthew Wilcox

From: Matthew Wilcox 

This eliminates a call to radix_tree_preload().

Signed-off-by: Matthew Wilcox 
---
 fs/xfs/xfs_mru_cache.c | 72 +++---
 1 file changed, 33 insertions(+), 39 deletions(-)

diff --git a/fs/xfs/xfs_mru_cache.c b/fs/xfs/xfs_mru_cache.c
index f8a674d7f092..2179bede5396 100644
--- a/fs/xfs/xfs_mru_cache.c
+++ b/fs/xfs/xfs_mru_cache.c
@@ -101,10 +101,9 @@
  * an infinite loop in the code.
  */
 struct xfs_mru_cache {
-   struct radix_tree_root  store; /* Core storage data structure.  */
+   struct xarray   store; /* Core storage data structure.  */
struct list_head*lists;/* Array of lists, one per grp.  */
struct list_headreap_list; /* Elements overdue for reaping. */
-   spinlock_t  lock;  /* Lock to protect this struct.  */
unsigned intgrp_count; /* Number of discrete groups.*/
unsigned intgrp_time;  /* Time period spanned by grps.  */
unsigned intlru_grp;   /* Group containing time zero.   */
@@ -232,22 +231,21 @@ _xfs_mru_cache_list_insert(
  * data store, removing it from the reap list, calling the client's free
  * function and deleting the element from the element zone.
  *
- * We get called holding the mru->lock, which we drop and then reacquire.
- * Sparse need special help with this to tell it we know what we are doing.
+ * We get called holding the mru->store lock, which we drop and then reacquire.
+ * Sparse needs special help with this to tell it we know what we are doing.
  */
 STATIC void
 _xfs_mru_cache_clear_reap_list(
struct xfs_mru_cache*mru)
-   __releases(mru->lock) __acquires(mru->lock)
+   __releases(mru->store) __acquires(mru->store)
 {
struct xfs_mru_cache_elem *elem, *next;
struct list_headtmp;
 
INIT_LIST_HEAD();
list_for_each_entry_safe(elem, next, >reap_list, list_node) {
-
/* Remove the element from the data store. */
-   radix_tree_delete(>store, elem->key);
+   __xa_erase(>store, elem->key);
 
/*
 * remove to temp list so it can be freed without
@@ -255,14 +253,14 @@ _xfs_mru_cache_clear_reap_list(
 */
list_move(>list_node, );
}
-   spin_unlock(>lock);
+   xa_unlock(>store);
 
list_for_each_entry_safe(elem, next, , list_node) {
list_del_init(>list_node);
mru->free_func(elem);
}
 
-   spin_lock(>lock);
+   xa_lock(>store);
 }
 
 /*
@@ -284,7 +282,7 @@ _xfs_mru_cache_reap(
if (!mru || !mru->lists)
return;
 
-   spin_lock(>lock);
+   xa_lock(>store);
next = _xfs_mru_cache_migrate(mru, jiffies);
_xfs_mru_cache_clear_reap_list(mru);
 
@@ -298,7 +296,7 @@ _xfs_mru_cache_reap(
queue_delayed_work(xfs_mru_reap_wq, >work, next);
}
 
-   spin_unlock(>lock);
+   xa_unlock(>store);
 }
 
 int
@@ -358,13 +356,8 @@ xfs_mru_cache_create(
for (grp = 0; grp < mru->grp_count; grp++)
INIT_LIST_HEAD(mru->lists + grp);
 
-   /*
-* We use GFP_KERNEL radix tree preload and do inserts under a
-* spinlock so GFP_ATOMIC is appropriate for the radix tree itself.
-*/
-   INIT_RADIX_TREE(>store, GFP_ATOMIC);
+   xa_init(>store);
INIT_LIST_HEAD(>reap_list);
-   spin_lock_init(>lock);
INIT_DELAYED_WORK(>work, _xfs_mru_cache_reap);
 
mru->grp_time  = grp_time;
@@ -394,17 +387,17 @@ xfs_mru_cache_flush(
if (!mru || !mru->lists)
return;
 
-   spin_lock(>lock);
+   xa_lock(>store);
if (mru->queued) {
-   spin_unlock(>lock);
+   xa_unlock(>store);
cancel_delayed_work_sync(>work);
-   spin_lock(>lock);
+   xa_lock(>store);
}
 
_xfs_mru_cache_migrate(mru, jiffies + mru->grp_count * mru->grp_time);
_xfs_mru_cache_clear_reap_list(mru);
 
-   spin_unlock(>lock);
+   xa_unlock(>store);
 }
 
 void
@@ -431,24 +424,24 @@ xfs_mru_cache_insert(
unsigned long   key,
struct xfs_mru_cache_elem *elem)
 {
+   XA_STATE(xas, >store, key);
int error;
 
ASSERT(mru && mru->lists);
if (!mru || !mru->lists)
return -EINVAL;
 
-   if (radix_tree_preload(GFP_NOFS))
-   return -ENOMEM;
-
INIT_LIST_HEAD(>list_node);
elem->key = key;
 
-   spin_lock(>lock);
-   error = radix_tree_insert(>store, key, elem);
-   radix_tree_preload_end();
-   if (!error)
-   _xfs_mru_cache_list_insert(mru, elem);
-   spin_unlock(>lock);
+   do {
+   xas_lock();
+   xas_store(, elem);
+

[PATCH v6 73/99] xfs: Convert mru cache to XArray

2018-01-17 Thread Matthew Wilcox

From: Matthew Wilcox 

This eliminates a call to radix_tree_preload().

Signed-off-by: Matthew Wilcox 
---
 fs/xfs/xfs_mru_cache.c | 72 +++---
 1 file changed, 33 insertions(+), 39 deletions(-)

diff --git a/fs/xfs/xfs_mru_cache.c b/fs/xfs/xfs_mru_cache.c
index f8a674d7f092..2179bede5396 100644
--- a/fs/xfs/xfs_mru_cache.c
+++ b/fs/xfs/xfs_mru_cache.c
@@ -101,10 +101,9 @@
  * an infinite loop in the code.
  */
 struct xfs_mru_cache {
-   struct radix_tree_root  store; /* Core storage data structure.  */
+   struct xarray   store; /* Core storage data structure.  */
struct list_head*lists;/* Array of lists, one per grp.  */
struct list_headreap_list; /* Elements overdue for reaping. */
-   spinlock_t  lock;  /* Lock to protect this struct.  */
unsigned intgrp_count; /* Number of discrete groups.*/
unsigned intgrp_time;  /* Time period spanned by grps.  */
unsigned intlru_grp;   /* Group containing time zero.   */
@@ -232,22 +231,21 @@ _xfs_mru_cache_list_insert(
  * data store, removing it from the reap list, calling the client's free
  * function and deleting the element from the element zone.
  *
- * We get called holding the mru->lock, which we drop and then reacquire.
- * Sparse need special help with this to tell it we know what we are doing.
+ * We get called holding the mru->store lock, which we drop and then reacquire.
+ * Sparse needs special help with this to tell it we know what we are doing.
  */
 STATIC void
 _xfs_mru_cache_clear_reap_list(
struct xfs_mru_cache*mru)
-   __releases(mru->lock) __acquires(mru->lock)
+   __releases(mru->store) __acquires(mru->store)
 {
struct xfs_mru_cache_elem *elem, *next;
struct list_headtmp;
 
INIT_LIST_HEAD();
list_for_each_entry_safe(elem, next, >reap_list, list_node) {
-
/* Remove the element from the data store. */
-   radix_tree_delete(>store, elem->key);
+   __xa_erase(>store, elem->key);
 
/*
 * remove to temp list so it can be freed without
@@ -255,14 +253,14 @@ _xfs_mru_cache_clear_reap_list(
 */
list_move(>list_node, );
}
-   spin_unlock(>lock);
+   xa_unlock(>store);
 
list_for_each_entry_safe(elem, next, , list_node) {
list_del_init(>list_node);
mru->free_func(elem);
}
 
-   spin_lock(>lock);
+   xa_lock(>store);
 }
 
 /*
@@ -284,7 +282,7 @@ _xfs_mru_cache_reap(
if (!mru || !mru->lists)
return;
 
-   spin_lock(>lock);
+   xa_lock(>store);
next = _xfs_mru_cache_migrate(mru, jiffies);
_xfs_mru_cache_clear_reap_list(mru);
 
@@ -298,7 +296,7 @@ _xfs_mru_cache_reap(
queue_delayed_work(xfs_mru_reap_wq, >work, next);
}
 
-   spin_unlock(>lock);
+   xa_unlock(>store);
 }
 
 int
@@ -358,13 +356,8 @@ xfs_mru_cache_create(
for (grp = 0; grp < mru->grp_count; grp++)
INIT_LIST_HEAD(mru->lists + grp);
 
-   /*
-* We use GFP_KERNEL radix tree preload and do inserts under a
-* spinlock so GFP_ATOMIC is appropriate for the radix tree itself.
-*/
-   INIT_RADIX_TREE(>store, GFP_ATOMIC);
+   xa_init(>store);
INIT_LIST_HEAD(>reap_list);
-   spin_lock_init(>lock);
INIT_DELAYED_WORK(>work, _xfs_mru_cache_reap);
 
mru->grp_time  = grp_time;
@@ -394,17 +387,17 @@ xfs_mru_cache_flush(
if (!mru || !mru->lists)
return;
 
-   spin_lock(>lock);
+   xa_lock(>store);
if (mru->queued) {
-   spin_unlock(>lock);
+   xa_unlock(>store);
cancel_delayed_work_sync(>work);
-   spin_lock(>lock);
+   xa_lock(>store);
}
 
_xfs_mru_cache_migrate(mru, jiffies + mru->grp_count * mru->grp_time);
_xfs_mru_cache_clear_reap_list(mru);
 
-   spin_unlock(>lock);
+   xa_unlock(>store);
 }
 
 void
@@ -431,24 +424,24 @@ xfs_mru_cache_insert(
unsigned long   key,
struct xfs_mru_cache_elem *elem)
 {
+   XA_STATE(xas, >store, key);
int error;
 
ASSERT(mru && mru->lists);
if (!mru || !mru->lists)
return -EINVAL;
 
-   if (radix_tree_preload(GFP_NOFS))
-   return -ENOMEM;
-
INIT_LIST_HEAD(>list_node);
elem->key = key;
 
-   spin_lock(>lock);
-   error = radix_tree_insert(>store, key, elem);
-   radix_tree_preload_end();
-   if (!error)
-   _xfs_mru_cache_list_insert(mru, elem);
-   spin_unlock(>lock);
+   do {
+   xas_lock();
+   xas_store(, elem);
+   error = xas_error();
+

[PATCH v6 77/99] fscache: Convert to XArray

2018-01-17 Thread Matthew Wilcox

From: Matthew Wilcox 

Removes another user of radix_tree_preload().

Signed-off-by: Matthew Wilcox 
---
 fs/fscache/cookie.c |   6 +-
 fs/fscache/internal.h   |   2 +-
 fs/fscache/object.c |   2 +-
 fs/fscache/page.c   | 152 +---
 fs/fscache/stats.c  |   6 +-
 include/linux/fscache.h |   8 +--
 6 files changed, 76 insertions(+), 100 deletions(-)

diff --git a/fs/fscache/cookie.c b/fs/fscache/cookie.c
index e9054e0c1a49..6d45134d609e 100644
--- a/fs/fscache/cookie.c
+++ b/fs/fscache/cookie.c
@@ -109,9 +109,7 @@ struct fscache_cookie *__fscache_acquire_cookie(
cookie->netfs_data  = netfs_data;
cookie->flags   = (1 << FSCACHE_COOKIE_NO_DATA_YET);
 
-   /* radix tree insertion won't use the preallocation pool unless it's
-* told it may not wait */
-   INIT_RADIX_TREE(>stores, GFP_NOFS & ~__GFP_DIRECT_RECLAIM);
+   xa_init(>stores);
 
switch (cookie->def->type) {
case FSCACHE_COOKIE_TYPE_INDEX:
@@ -608,7 +606,7 @@ void __fscache_relinquish_cookie(struct fscache_cookie 
*cookie, bool retire)
/* Clear pointers back to the netfs */
cookie->netfs_data  = NULL;
cookie->def = NULL;
-   BUG_ON(!radix_tree_empty(>stores));
+   BUG_ON(!xa_empty(>stores));
 
if (cookie->parent) {
ASSERTCMP(atomic_read(>parent->usage), >, 0);
diff --git a/fs/fscache/internal.h b/fs/fscache/internal.h
index 0ff4b49a0037..468d9bd7f8c3 100644
--- a/fs/fscache/internal.h
+++ b/fs/fscache/internal.h
@@ -200,7 +200,7 @@ extern atomic_t fscache_n_stores_oom;
 extern atomic_t fscache_n_store_ops;
 extern atomic_t fscache_n_store_calls;
 extern atomic_t fscache_n_store_pages;
-extern atomic_t fscache_n_store_radix_deletes;
+extern atomic_t fscache_n_store_xarray_deletes;
 extern atomic_t fscache_n_store_pages_over_limit;
 
 extern atomic_t fscache_n_store_vmscan_not_storing;
diff --git a/fs/fscache/object.c b/fs/fscache/object.c
index aa0e71f02c33..ed165736a358 100644
--- a/fs/fscache/object.c
+++ b/fs/fscache/object.c
@@ -956,7 +956,7 @@ static const struct fscache_state 
*_fscache_invalidate_object(struct fscache_obj
 * retire the object instead.
 */
if (!fscache_use_cookie(object)) {
-   ASSERT(radix_tree_empty(>cookie->stores));
+   ASSERT(xa_empty(>cookie->stores));
set_bit(FSCACHE_OBJECT_RETIRED, >flags);
_leave(" [no cookie]");
return transit_to(KILL_OBJECT);
diff --git a/fs/fscache/page.c b/fs/fscache/page.c
index 961029e04027..315e2745f822 100644
--- a/fs/fscache/page.c
+++ b/fs/fscache/page.c
@@ -22,13 +22,7 @@
  */
 bool __fscache_check_page_write(struct fscache_cookie *cookie, struct page 
*page)
 {
-   void *val;
-
-   rcu_read_lock();
-   val = radix_tree_lookup(>stores, page->index);
-   rcu_read_unlock();
-
-   return val != NULL;
+   return xa_load(>stores, page->index) != NULL;
 }
 EXPORT_SYMBOL(__fscache_check_page_write);
 
@@ -64,15 +58,15 @@ bool __fscache_maybe_release_page(struct fscache_cookie 
*cookie,
  struct page *page,
  gfp_t gfp)
 {
+   XA_STATE(xas, >stores, page->index);
struct page *xpage;
-   void *val;
 
_enter("%p,%p,%x", cookie, page, gfp);
 
 try_again:
rcu_read_lock();
-   val = radix_tree_lookup(>stores, page->index);
-   if (!val) {
+   xpage = xas_load();
+   if (!xpage) {
rcu_read_unlock();
fscache_stat(_n_store_vmscan_not_storing);
__fscache_uncache_page(cookie, page);
@@ -81,31 +75,32 @@ bool __fscache_maybe_release_page(struct fscache_cookie 
*cookie,
 
/* see if the page is actually undergoing storage - if so we can't get
 * rid of it till the cache has finished with it */
-   if (radix_tree_tag_get(>stores, page->index,
-  FSCACHE_COOKIE_STORING_TAG)) {
+   if (xas_get_tag(, FSCACHE_COOKIE_STORING_TAG)) {
rcu_read_unlock();
+   xas_retry(, XA_RETRY_ENTRY);
goto page_busy;
}
 
/* the page is pending storage, so we attempt to cancel the store and
 * discard the store request so that the page can be reclaimed */
-   spin_lock(>stores_lock);
+   xas_retry(, XA_RETRY_ENTRY);
+   xas_lock();
rcu_read_unlock();
 
-   if (radix_tree_tag_get(>stores, page->index,
-  FSCACHE_COOKIE_STORING_TAG)) {
+   xpage = xas_load();
+   if (xas_get_tag(, FSCACHE_COOKIE_STORING_TAG)) {
/* the page started to undergo storage whilst we were looking,
 * so now we can only wait or return */
spin_unlock(>stores_lock);
goto page_busy;
}
 
-   xpage =

[PATCH v6 77/99] fscache: Convert to XArray

2018-01-17 Thread Matthew Wilcox

From: Matthew Wilcox 

Removes another user of radix_tree_preload().

Signed-off-by: Matthew Wilcox 
---
 fs/fscache/cookie.c |   6 +-
 fs/fscache/internal.h   |   2 +-
 fs/fscache/object.c |   2 +-
 fs/fscache/page.c   | 152 +---
 fs/fscache/stats.c  |   6 +-
 include/linux/fscache.h |   8 +--
 6 files changed, 76 insertions(+), 100 deletions(-)

diff --git a/fs/fscache/cookie.c b/fs/fscache/cookie.c
index e9054e0c1a49..6d45134d609e 100644
--- a/fs/fscache/cookie.c
+++ b/fs/fscache/cookie.c
@@ -109,9 +109,7 @@ struct fscache_cookie *__fscache_acquire_cookie(
cookie->netfs_data  = netfs_data;
cookie->flags   = (1 << FSCACHE_COOKIE_NO_DATA_YET);
 
-   /* radix tree insertion won't use the preallocation pool unless it's
-* told it may not wait */
-   INIT_RADIX_TREE(>stores, GFP_NOFS & ~__GFP_DIRECT_RECLAIM);
+   xa_init(>stores);
 
switch (cookie->def->type) {
case FSCACHE_COOKIE_TYPE_INDEX:
@@ -608,7 +606,7 @@ void __fscache_relinquish_cookie(struct fscache_cookie 
*cookie, bool retire)
/* Clear pointers back to the netfs */
cookie->netfs_data  = NULL;
cookie->def = NULL;
-   BUG_ON(!radix_tree_empty(>stores));
+   BUG_ON(!xa_empty(>stores));
 
if (cookie->parent) {
ASSERTCMP(atomic_read(>parent->usage), >, 0);
diff --git a/fs/fscache/internal.h b/fs/fscache/internal.h
index 0ff4b49a0037..468d9bd7f8c3 100644
--- a/fs/fscache/internal.h
+++ b/fs/fscache/internal.h
@@ -200,7 +200,7 @@ extern atomic_t fscache_n_stores_oom;
 extern atomic_t fscache_n_store_ops;
 extern atomic_t fscache_n_store_calls;
 extern atomic_t fscache_n_store_pages;
-extern atomic_t fscache_n_store_radix_deletes;
+extern atomic_t fscache_n_store_xarray_deletes;
 extern atomic_t fscache_n_store_pages_over_limit;
 
 extern atomic_t fscache_n_store_vmscan_not_storing;
diff --git a/fs/fscache/object.c b/fs/fscache/object.c
index aa0e71f02c33..ed165736a358 100644
--- a/fs/fscache/object.c
+++ b/fs/fscache/object.c
@@ -956,7 +956,7 @@ static const struct fscache_state 
*_fscache_invalidate_object(struct fscache_obj
 * retire the object instead.
 */
if (!fscache_use_cookie(object)) {
-   ASSERT(radix_tree_empty(>cookie->stores));
+   ASSERT(xa_empty(>cookie->stores));
set_bit(FSCACHE_OBJECT_RETIRED, >flags);
_leave(" [no cookie]");
return transit_to(KILL_OBJECT);
diff --git a/fs/fscache/page.c b/fs/fscache/page.c
index 961029e04027..315e2745f822 100644
--- a/fs/fscache/page.c
+++ b/fs/fscache/page.c
@@ -22,13 +22,7 @@
  */
 bool __fscache_check_page_write(struct fscache_cookie *cookie, struct page 
*page)
 {
-   void *val;
-
-   rcu_read_lock();
-   val = radix_tree_lookup(>stores, page->index);
-   rcu_read_unlock();
-
-   return val != NULL;
+   return xa_load(>stores, page->index) != NULL;
 }
 EXPORT_SYMBOL(__fscache_check_page_write);
 
@@ -64,15 +58,15 @@ bool __fscache_maybe_release_page(struct fscache_cookie 
*cookie,
  struct page *page,
  gfp_t gfp)
 {
+   XA_STATE(xas, >stores, page->index);
struct page *xpage;
-   void *val;
 
_enter("%p,%p,%x", cookie, page, gfp);
 
 try_again:
rcu_read_lock();
-   val = radix_tree_lookup(>stores, page->index);
-   if (!val) {
+   xpage = xas_load();
+   if (!xpage) {
rcu_read_unlock();
fscache_stat(_n_store_vmscan_not_storing);
__fscache_uncache_page(cookie, page);
@@ -81,31 +75,32 @@ bool __fscache_maybe_release_page(struct fscache_cookie 
*cookie,
 
/* see if the page is actually undergoing storage - if so we can't get
 * rid of it till the cache has finished with it */
-   if (radix_tree_tag_get(>stores, page->index,
-  FSCACHE_COOKIE_STORING_TAG)) {
+   if (xas_get_tag(, FSCACHE_COOKIE_STORING_TAG)) {
rcu_read_unlock();
+   xas_retry(, XA_RETRY_ENTRY);
goto page_busy;
}
 
/* the page is pending storage, so we attempt to cancel the store and
 * discard the store request so that the page can be reclaimed */
-   spin_lock(>stores_lock);
+   xas_retry(, XA_RETRY_ENTRY);
+   xas_lock();
rcu_read_unlock();
 
-   if (radix_tree_tag_get(>stores, page->index,
-  FSCACHE_COOKIE_STORING_TAG)) {
+   xpage = xas_load();
+   if (xas_get_tag(, FSCACHE_COOKIE_STORING_TAG)) {
/* the page started to undergo storage whilst we were looking,
 * so now we can only wait or return */
spin_unlock(>stores_lock);
goto page_busy;
}
 
-   xpage = radix_tree_delete(>stores, page->index);
+

[PATCH v6 76/99] irqdomain: Convert to XArray

2018-01-17 Thread Matthew Wilcox

From: Matthew Wilcox 

In a non-critical path, irqdomain wants to know how many entries are
stored in the xarray, so add xa_count().  This is a pretty straightforward
conversion; mostly just removing now-redundant locking.  The only thing
of note is just how much simpler irq_domain_fix_revmap() becomes.

Signed-off-by: Matthew Wilcox 
Acked-by: Marc Zyngier 
---
 include/linux/irqdomain.h | 10 --
 include/linux/xarray.h|  1 +
 kernel/irq/irqdomain.c| 39 ++-
 lib/xarray.c  | 25 +
 4 files changed, 40 insertions(+), 35 deletions(-)

diff --git a/include/linux/irqdomain.h b/include/linux/irqdomain.h
index 48c7e86bb556..6c69d9141709 100644
--- a/include/linux/irqdomain.h
+++ b/include/linux/irqdomain.h
@@ -33,8 +33,7 @@
 #include 
 #include 
 #include 
-#include 
-#include 
+#include 
 
 struct device_node;
 struct irq_domain;
@@ -151,7 +150,7 @@ struct irq_domain_chip_generic;
  * @revmap_direct_max_irq: The largest hwirq that can be set for controllers 
that
  * support direct mapping
  * @revmap_size: Size of the linear map table @linear_revmap[]
- * @revmap_tree: Radix map tree for hwirqs that don't fit in the linear map
+ * @revmap_array: hwirqs that don't fit in the linear map
  * @linear_revmap: Linear table of hwirq->virq reverse mappings
  */
 struct irq_domain {
@@ -177,8 +176,7 @@ struct irq_domain {
irq_hw_number_t hwirq_max;
unsigned int revmap_direct_max_irq;
unsigned int revmap_size;
-   struct radix_tree_root revmap_tree;
-   struct mutex revmap_tree_mutex;
+   struct xarray revmap_array;
unsigned int linear_revmap[];
 };
 
@@ -378,7 +376,7 @@ extern void irq_dispose_mapping(unsigned int virq);
  * This is a fast path alternative to irq_find_mapping() that can be
  * called directly by irq controller code to save a handful of
  * instructions. It is always safe to call, but won't find irqs mapped
- * using the radix tree.
+ * using the xarray.
  */
 static inline unsigned int irq_linear_revmap(struct irq_domain *domain,
 irq_hw_number_t hwirq)
diff --git a/include/linux/xarray.h b/include/linux/xarray.h
index c3f7405c5517..892288fe9595 100644
--- a/include/linux/xarray.h
+++ b/include/linux/xarray.h
@@ -269,6 +269,7 @@ void *xa_find_after(struct xarray *xa, unsigned long *index,
unsigned long max, xa_tag_t) __attribute__((nonnull(2)));
 unsigned int xa_extract(struct xarray *, void **dst, unsigned long start,
unsigned long max, unsigned int n, xa_tag_t);
+unsigned long xa_count(struct xarray *);
 void xa_destroy(struct xarray *);
 
 /**
diff --git a/kernel/irq/irqdomain.c b/kernel/irq/irqdomain.c
index 62068ad46930..d6da3a8eadd2 100644
--- a/kernel/irq/irqdomain.c
+++ b/kernel/irq/irqdomain.c
@@ -114,7 +114,7 @@ EXPORT_SYMBOL_GPL(irq_domain_free_fwnode);
 /**
  * __irq_domain_add() - Allocate a new irq_domain data structure
  * @fwnode: firmware node for the interrupt controller
- * @size: Size of linear map; 0 for radix mapping only
+ * @size: Size of linear map; 0 for xarray mapping only
  * @hwirq_max: Maximum number of interrupts supported by controller
  * @direct_max: Maximum value of direct maps; Use ~0 for no limit; 0 for no
  *  direct mapping
@@ -209,8 +209,7 @@ struct irq_domain *__irq_domain_add(struct fwnode_handle 
*fwnode, int size,
of_node_get(of_node);
 
/* Fill structure */
-   INIT_RADIX_TREE(>revmap_tree, GFP_KERNEL);
-   mutex_init(>revmap_tree_mutex);
+   xa_init(>revmap_array);
domain->ops = ops;
domain->host_data = host_data;
domain->hwirq_max = hwirq_max;
@@ -241,7 +240,7 @@ void irq_domain_remove(struct irq_domain *domain)
mutex_lock(_domain_mutex);
debugfs_remove_domain_dir(domain);
 
-   WARN_ON(!radix_tree_empty(>revmap_tree));
+   WARN_ON(!xa_empty(>revmap_array));
 
list_del(>link);
 
@@ -462,9 +461,7 @@ static void irq_domain_clear_mapping(struct irq_domain 
*domain,
if (hwirq < domain->revmap_size) {
domain->linear_revmap[hwirq] = 0;
} else {
-   mutex_lock(>revmap_tree_mutex);
-   radix_tree_delete(>revmap_tree, hwirq);
-   mutex_unlock(>revmap_tree_mutex);
+   xa_erase(>revmap_array, hwirq);
}
 }
 
@@ -475,9 +472,7 @@ static void irq_domain_set_mapping(struct irq_domain 
*domain,
if (hwirq < domain->revmap_size) {
domain->linear_revmap[hwirq] = irq_data->irq;
} else {
-   mutex_lock(>revmap_tree_mutex);
-   radix_tree_insert(>revmap_tree, hwirq, irq_data);
-   mutex_unlock(>revmap_tree_mutex);
+   xa_store(>revmap_array, hwirq, irq_data, GFP_KERNEL);
}
 }
 
@@ -585,7 +580,7 @@

[PATCH v6 76/99] irqdomain: Convert to XArray

2018-01-17 Thread Matthew Wilcox

From: Matthew Wilcox 

In a non-critical path, irqdomain wants to know how many entries are
stored in the xarray, so add xa_count().  This is a pretty straightforward
conversion; mostly just removing now-redundant locking.  The only thing
of note is just how much simpler irq_domain_fix_revmap() becomes.

Signed-off-by: Matthew Wilcox 
Acked-by: Marc Zyngier 
---
 include/linux/irqdomain.h | 10 --
 include/linux/xarray.h|  1 +
 kernel/irq/irqdomain.c| 39 ++-
 lib/xarray.c  | 25 +
 4 files changed, 40 insertions(+), 35 deletions(-)

diff --git a/include/linux/irqdomain.h b/include/linux/irqdomain.h
index 48c7e86bb556..6c69d9141709 100644
--- a/include/linux/irqdomain.h
+++ b/include/linux/irqdomain.h
@@ -33,8 +33,7 @@
 #include 
 #include 
 #include 
-#include 
-#include 
+#include 
 
 struct device_node;
 struct irq_domain;
@@ -151,7 +150,7 @@ struct irq_domain_chip_generic;
  * @revmap_direct_max_irq: The largest hwirq that can be set for controllers 
that
  * support direct mapping
  * @revmap_size: Size of the linear map table @linear_revmap[]
- * @revmap_tree: Radix map tree for hwirqs that don't fit in the linear map
+ * @revmap_array: hwirqs that don't fit in the linear map
  * @linear_revmap: Linear table of hwirq->virq reverse mappings
  */
 struct irq_domain {
@@ -177,8 +176,7 @@ struct irq_domain {
irq_hw_number_t hwirq_max;
unsigned int revmap_direct_max_irq;
unsigned int revmap_size;
-   struct radix_tree_root revmap_tree;
-   struct mutex revmap_tree_mutex;
+   struct xarray revmap_array;
unsigned int linear_revmap[];
 };
 
@@ -378,7 +376,7 @@ extern void irq_dispose_mapping(unsigned int virq);
  * This is a fast path alternative to irq_find_mapping() that can be
  * called directly by irq controller code to save a handful of
  * instructions. It is always safe to call, but won't find irqs mapped
- * using the radix tree.
+ * using the xarray.
  */
 static inline unsigned int irq_linear_revmap(struct irq_domain *domain,
 irq_hw_number_t hwirq)
diff --git a/include/linux/xarray.h b/include/linux/xarray.h
index c3f7405c5517..892288fe9595 100644
--- a/include/linux/xarray.h
+++ b/include/linux/xarray.h
@@ -269,6 +269,7 @@ void *xa_find_after(struct xarray *xa, unsigned long *index,
unsigned long max, xa_tag_t) __attribute__((nonnull(2)));
 unsigned int xa_extract(struct xarray *, void **dst, unsigned long start,
unsigned long max, unsigned int n, xa_tag_t);
+unsigned long xa_count(struct xarray *);
 void xa_destroy(struct xarray *);
 
 /**
diff --git a/kernel/irq/irqdomain.c b/kernel/irq/irqdomain.c
index 62068ad46930..d6da3a8eadd2 100644
--- a/kernel/irq/irqdomain.c
+++ b/kernel/irq/irqdomain.c
@@ -114,7 +114,7 @@ EXPORT_SYMBOL_GPL(irq_domain_free_fwnode);
 /**
  * __irq_domain_add() - Allocate a new irq_domain data structure
  * @fwnode: firmware node for the interrupt controller
- * @size: Size of linear map; 0 for radix mapping only
+ * @size: Size of linear map; 0 for xarray mapping only
  * @hwirq_max: Maximum number of interrupts supported by controller
  * @direct_max: Maximum value of direct maps; Use ~0 for no limit; 0 for no
  *  direct mapping
@@ -209,8 +209,7 @@ struct irq_domain *__irq_domain_add(struct fwnode_handle 
*fwnode, int size,
of_node_get(of_node);
 
/* Fill structure */
-   INIT_RADIX_TREE(>revmap_tree, GFP_KERNEL);
-   mutex_init(>revmap_tree_mutex);
+   xa_init(>revmap_array);
domain->ops = ops;
domain->host_data = host_data;
domain->hwirq_max = hwirq_max;
@@ -241,7 +240,7 @@ void irq_domain_remove(struct irq_domain *domain)
mutex_lock(_domain_mutex);
debugfs_remove_domain_dir(domain);
 
-   WARN_ON(!radix_tree_empty(>revmap_tree));
+   WARN_ON(!xa_empty(>revmap_array));
 
list_del(>link);
 
@@ -462,9 +461,7 @@ static void irq_domain_clear_mapping(struct irq_domain 
*domain,
if (hwirq < domain->revmap_size) {
domain->linear_revmap[hwirq] = 0;
} else {
-   mutex_lock(>revmap_tree_mutex);
-   radix_tree_delete(>revmap_tree, hwirq);
-   mutex_unlock(>revmap_tree_mutex);
+   xa_erase(>revmap_array, hwirq);
}
 }
 
@@ -475,9 +472,7 @@ static void irq_domain_set_mapping(struct irq_domain 
*domain,
if (hwirq < domain->revmap_size) {
domain->linear_revmap[hwirq] = irq_data->irq;
} else {
-   mutex_lock(>revmap_tree_mutex);
-   radix_tree_insert(>revmap_tree, hwirq, irq_data);
-   mutex_unlock(>revmap_tree_mutex);
+   xa_store(>revmap_array, hwirq, irq_data, GFP_KERNEL);
}
 }
 
@@ -585,7 +580,7 @@ EXPORT_SYMBOL_GPL(irq_domain_associate_many);
  * This routine is used for irq

[PATCH v6 79/99] blk-cgroup: Convert to XArray

2018-01-17 Thread Matthew Wilcox

From: Matthew Wilcox 

This call to radix_tree_preload is awkward.  At the point of allocation,
we're under not only a local lock, but also under the queue lock.  So we
can't back out, drop the lock and retry the allocation.  Replace this
preload call with a call to xa_reserve() which will ensure the memory is
allocated.

Signed-off-by: Matthew Wilcox 
---
 block/bfq-cgroup.c |  4 ++--
 block/blk-cgroup.c | 52 ++
 block/cfq-iosched.c|  4 ++--
 include/linux/blk-cgroup.h |  5 ++---
 4 files changed, 31 insertions(+), 34 deletions(-)

diff --git a/block/bfq-cgroup.c b/block/bfq-cgroup.c
index da1525ec4c87..0648aaa6498b 100644
--- a/block/bfq-cgroup.c
+++ b/block/bfq-cgroup.c
@@ -860,7 +860,7 @@ static int bfq_io_set_weight_legacy(struct 
cgroup_subsys_state *css,
return ret;
 
ret = 0;
-   spin_lock_irq(>lock);
+   xa_lock_irq(>blkg_array);
bfqgd->weight = (unsigned short)val;
hlist_for_each_entry(blkg, >blkg_list, blkcg_node) {
struct bfq_group *bfqg = blkg_to_bfqg(blkg);
@@ -894,7 +894,7 @@ static int bfq_io_set_weight_legacy(struct 
cgroup_subsys_state *css,
bfqg->entity.prio_changed = 1;
}
}
-   spin_unlock_irq(>lock);
+   xa_unlock_irq(>blkg_array);
 
return ret;
 }
diff --git a/block/blk-cgroup.c b/block/blk-cgroup.c
index 4117524ca45b..37962d52f1a8 100644
--- a/block/blk-cgroup.c
+++ b/block/blk-cgroup.c
@@ -146,12 +146,12 @@ struct blkcg_gq *blkg_lookup_slowpath(struct blkcg *blkcg,
struct blkcg_gq *blkg;
 
/*
-* Hint didn't match.  Look up from the radix tree.  Note that the
+* Hint didn't match.  Fetch from the xarray.  Note that the
 * hint can only be updated under queue_lock as otherwise @blkg
-* could have already been removed from blkg_tree.  The caller is
+* could have already been removed from blkg_array.  The caller is
 * responsible for grabbing queue_lock if @update_hint.
 */
-   blkg = radix_tree_lookup(>blkg_tree, q->id);
+   blkg = xa_load(>blkg_array, q->id);
if (blkg && blkg->q == q) {
if (update_hint) {
lockdep_assert_held(q->queue_lock);
@@ -223,8 +223,8 @@ static struct blkcg_gq *blkg_create(struct blkcg *blkcg,
}
 
/* insert */
-   spin_lock(>lock);
-   ret = radix_tree_insert(>blkg_tree, q->id, blkg);
+   xa_lock(>blkg_array);
+   ret = xa_err(__xa_store(>blkg_array, q->id, blkg, GFP_NOWAIT));
if (likely(!ret)) {
hlist_add_head_rcu(>blkcg_node, >blkg_list);
list_add(>q_node, >blkg_list);
@@ -237,7 +237,7 @@ static struct blkcg_gq *blkg_create(struct blkcg *blkcg,
}
}
blkg->online = true;
-   spin_unlock(>lock);
+   xa_unlock(>blkg_array);
 
if (!ret)
return blkg;
@@ -314,7 +314,7 @@ static void blkg_destroy(struct blkcg_gq *blkg)
int i;
 
lockdep_assert_held(blkg->q->queue_lock);
-   lockdep_assert_held(>lock);
+   lockdep_assert_held(>blkg_array.xa_lock);
 
/* Something wrong if we are trying to remove same group twice */
WARN_ON_ONCE(list_empty(>q_node));
@@ -334,7 +334,7 @@ static void blkg_destroy(struct blkcg_gq *blkg)
 
blkg->online = false;
 
-   radix_tree_delete(>blkg_tree, blkg->q->id);
+   xa_erase(>blkg_array, blkg->q->id);
list_del_init(>q_node);
hlist_del_init_rcu(>blkcg_node);
 
@@ -368,9 +368,9 @@ static void blkg_destroy_all(struct request_queue *q)
list_for_each_entry_safe(blkg, n, >blkg_list, q_node) {
struct blkcg *blkcg = blkg->blkcg;
 
-   spin_lock(>lock);
+   xa_lock(>blkg_array);
blkg_destroy(blkg);
-   spin_unlock(>lock);
+   xa_unlock(>blkg_array);
}
 
q->root_blkg = NULL;
@@ -443,7 +443,7 @@ static int blkcg_reset_stats(struct cgroup_subsys_state 
*css,
int i;
 
mutex_lock(_pol_mutex);
-   spin_lock_irq(>lock);
+   xa_lock_irq(>blkg_array);
 
/*
 * Note that stat reset is racy - it doesn't synchronize against
@@ -462,7 +462,7 @@ static int blkcg_reset_stats(struct cgroup_subsys_state 
*css,
}
}
 
-   spin_unlock_irq(>lock);
+   xa_unlock_irq(>blkg_array);
mutex_unlock(_pol_mutex);
return 0;
 }
@@ -1012,7 +1012,7 @@ static void blkcg_css_offline(struct cgroup_subsys_state 
*css)
 {
struct blkcg *blkcg = css_to_blkcg(css);
 
-   spin_lock_irq(>lock);
+   xa_lock_irq(>blkg_array);
 
while (!hlist_empty(>blkg_list)) {
struct blkcg_gq *blkg = hlist_entry(blkcg->blkg_list.first,
@@ -1023,13 +1023,13 @@ static void blkcg_css_offline(struct 
cgroup_subsys_state

[PATCH v6 79/99] blk-cgroup: Convert to XArray

2018-01-17 Thread Matthew Wilcox

From: Matthew Wilcox 

This call to radix_tree_preload is awkward.  At the point of allocation,
we're under not only a local lock, but also under the queue lock.  So we
can't back out, drop the lock and retry the allocation.  Replace this
preload call with a call to xa_reserve() which will ensure the memory is
allocated.

Signed-off-by: Matthew Wilcox 
---
 block/bfq-cgroup.c |  4 ++--
 block/blk-cgroup.c | 52 ++
 block/cfq-iosched.c|  4 ++--
 include/linux/blk-cgroup.h |  5 ++---
 4 files changed, 31 insertions(+), 34 deletions(-)

diff --git a/block/bfq-cgroup.c b/block/bfq-cgroup.c
index da1525ec4c87..0648aaa6498b 100644
--- a/block/bfq-cgroup.c
+++ b/block/bfq-cgroup.c
@@ -860,7 +860,7 @@ static int bfq_io_set_weight_legacy(struct 
cgroup_subsys_state *css,
return ret;
 
ret = 0;
-   spin_lock_irq(>lock);
+   xa_lock_irq(>blkg_array);
bfqgd->weight = (unsigned short)val;
hlist_for_each_entry(blkg, >blkg_list, blkcg_node) {
struct bfq_group *bfqg = blkg_to_bfqg(blkg);
@@ -894,7 +894,7 @@ static int bfq_io_set_weight_legacy(struct 
cgroup_subsys_state *css,
bfqg->entity.prio_changed = 1;
}
}
-   spin_unlock_irq(>lock);
+   xa_unlock_irq(>blkg_array);
 
return ret;
 }
diff --git a/block/blk-cgroup.c b/block/blk-cgroup.c
index 4117524ca45b..37962d52f1a8 100644
--- a/block/blk-cgroup.c
+++ b/block/blk-cgroup.c
@@ -146,12 +146,12 @@ struct blkcg_gq *blkg_lookup_slowpath(struct blkcg *blkcg,
struct blkcg_gq *blkg;
 
/*
-* Hint didn't match.  Look up from the radix tree.  Note that the
+* Hint didn't match.  Fetch from the xarray.  Note that the
 * hint can only be updated under queue_lock as otherwise @blkg
-* could have already been removed from blkg_tree.  The caller is
+* could have already been removed from blkg_array.  The caller is
 * responsible for grabbing queue_lock if @update_hint.
 */
-   blkg = radix_tree_lookup(>blkg_tree, q->id);
+   blkg = xa_load(>blkg_array, q->id);
if (blkg && blkg->q == q) {
if (update_hint) {
lockdep_assert_held(q->queue_lock);
@@ -223,8 +223,8 @@ static struct blkcg_gq *blkg_create(struct blkcg *blkcg,
}
 
/* insert */
-   spin_lock(>lock);
-   ret = radix_tree_insert(>blkg_tree, q->id, blkg);
+   xa_lock(>blkg_array);
+   ret = xa_err(__xa_store(>blkg_array, q->id, blkg, GFP_NOWAIT));
if (likely(!ret)) {
hlist_add_head_rcu(>blkcg_node, >blkg_list);
list_add(>q_node, >blkg_list);
@@ -237,7 +237,7 @@ static struct blkcg_gq *blkg_create(struct blkcg *blkcg,
}
}
blkg->online = true;
-   spin_unlock(>lock);
+   xa_unlock(>blkg_array);
 
if (!ret)
return blkg;
@@ -314,7 +314,7 @@ static void blkg_destroy(struct blkcg_gq *blkg)
int i;
 
lockdep_assert_held(blkg->q->queue_lock);
-   lockdep_assert_held(>lock);
+   lockdep_assert_held(>blkg_array.xa_lock);
 
/* Something wrong if we are trying to remove same group twice */
WARN_ON_ONCE(list_empty(>q_node));
@@ -334,7 +334,7 @@ static void blkg_destroy(struct blkcg_gq *blkg)
 
blkg->online = false;
 
-   radix_tree_delete(>blkg_tree, blkg->q->id);
+   xa_erase(>blkg_array, blkg->q->id);
list_del_init(>q_node);
hlist_del_init_rcu(>blkcg_node);
 
@@ -368,9 +368,9 @@ static void blkg_destroy_all(struct request_queue *q)
list_for_each_entry_safe(blkg, n, >blkg_list, q_node) {
struct blkcg *blkcg = blkg->blkcg;
 
-   spin_lock(>lock);
+   xa_lock(>blkg_array);
blkg_destroy(blkg);
-   spin_unlock(>lock);
+   xa_unlock(>blkg_array);
}
 
q->root_blkg = NULL;
@@ -443,7 +443,7 @@ static int blkcg_reset_stats(struct cgroup_subsys_state 
*css,
int i;
 
mutex_lock(_pol_mutex);
-   spin_lock_irq(>lock);
+   xa_lock_irq(>blkg_array);
 
/*
 * Note that stat reset is racy - it doesn't synchronize against
@@ -462,7 +462,7 @@ static int blkcg_reset_stats(struct cgroup_subsys_state 
*css,
}
}
 
-   spin_unlock_irq(>lock);
+   xa_unlock_irq(>blkg_array);
mutex_unlock(_pol_mutex);
return 0;
 }
@@ -1012,7 +1012,7 @@ static void blkcg_css_offline(struct cgroup_subsys_state 
*css)
 {
struct blkcg *blkcg = css_to_blkcg(css);
 
-   spin_lock_irq(>lock);
+   xa_lock_irq(>blkg_array);
 
while (!hlist_empty(>blkg_list)) {
struct blkcg_gq *blkg = hlist_entry(blkcg->blkg_list.first,
@@ -1023,13 +1023,13 @@ static void blkcg_css_offline(struct 
cgroup_subsys_state *css)

[PATCH v6 78/99] sh: intc: Convert to XArray

2018-01-17 Thread Matthew Wilcox

From: Matthew Wilcox 

The radix tree was being protected by a raw spinlock.  I believe that
was not necessary, and the new internal regular spinlock will be
adequate for this array.

Signed-off-by: Matthew Wilcox 
---
 drivers/sh/intc/core.c  |  9 ++
 drivers/sh/intc/internals.h |  5 ++--
 drivers/sh/intc/virq.c  | 72 +
 3 files changed, 25 insertions(+), 61 deletions(-)

diff --git a/drivers/sh/intc/core.c b/drivers/sh/intc/core.c
index 8e72bcbd3d6d..356a423d9dcb 100644
--- a/drivers/sh/intc/core.c
+++ b/drivers/sh/intc/core.c
@@ -30,7 +30,6 @@
 #include 
 #include 
 #include 
-#include 
 #include 
 #include 
 #include "internals.h"
@@ -78,11 +77,8 @@ static void __init intc_register_irq(struct intc_desc *desc,
struct intc_handle_int *hp;
struct irq_data *irq_data;
unsigned int data[2], primary;
-   unsigned long flags;
 
-   raw_spin_lock_irqsave(_big_lock, flags);
-   radix_tree_insert(>tree, enum_id, intc_irq_xlate_get(irq));
-   raw_spin_unlock_irqrestore(_big_lock, flags);
+   xa_store(>array, enum_id, intc_irq_xlate_get(irq), GFP_ATOMIC);
 
/*
 * Prefer single interrupt source bitmap over other combinations:
@@ -196,8 +192,7 @@ int __init register_intc_controller(struct intc_desc *desc)
INIT_LIST_HEAD(>list);
list_add_tail(>list, _list);
 
-   raw_spin_lock_init(>lock);
-   INIT_RADIX_TREE(>tree, GFP_ATOMIC);
+   xa_init(>array);
 
d->index = nr_intc_controllers;
 
diff --git a/drivers/sh/intc/internals.h b/drivers/sh/intc/internals.h
index fa73c173b56a..9b6fd07e99a6 100644
--- a/drivers/sh/intc/internals.h
+++ b/drivers/sh/intc/internals.h
@@ -5,7 +5,7 @@
 #include 
 #include 
 #include 
-#include 
+#include 
 #include 
 
 #define _INTC_MK(fn, mode, addr_e, addr_d, width, shift) \
@@ -54,8 +54,7 @@ struct intc_subgroup_entry {
 struct intc_desc_int {
struct list_head list;
struct device dev;
-   struct radix_tree_root tree;
-   raw_spinlock_t lock;
+   struct xarray array;
unsigned int index;
unsigned long *reg;
 #ifdef CONFIG_SMP
diff --git a/drivers/sh/intc/virq.c b/drivers/sh/intc/virq.c
index a638c3048207..801c9c8b7556 100644
--- a/drivers/sh/intc/virq.c
+++ b/drivers/sh/intc/virq.c
@@ -12,7 +12,6 @@
 #include 
 #include 
 #include 
-#include 
 #include 
 #include 
 #include "internals.h"
@@ -27,10 +26,7 @@ struct intc_virq_list {
 #define for_each_virq(entry, head) \
for (entry = head; entry; entry = entry->next)
 
-/*
- * Tags for the radix tree
- */
-#define INTC_TAG_VIRQ_NEEDS_ALLOC  0
+#define INTC_TAG_VIRQ_NEEDS_ALLOC  XA_TAG_0
 
 void intc_irq_xlate_set(unsigned int irq, intc_enum id, struct intc_desc_int 
*d)
 {
@@ -54,23 +50,18 @@ int intc_irq_lookup(const char *chipname, intc_enum enum_id)
int irq = -1;
 
list_for_each_entry(d, _list, list) {
-   int tagged;
-
if (strcmp(d->chip.name, chipname) != 0)
continue;
 
/*
 * Catch early lookups for subgroup VIRQs that have not
-* yet been allocated an IRQ. This already includes a
-* fast-path out if the tree is untagged, so there is no
-* need to explicitly test the root tree.
+* yet been allocated an IRQ.
 */
-   tagged = radix_tree_tag_get(>tree, enum_id,
-   INTC_TAG_VIRQ_NEEDS_ALLOC);
-   if (unlikely(tagged))
+   if (unlikely(xa_get_tag(>array, enum_id,
+   INTC_TAG_VIRQ_NEEDS_ALLOC)))
break;
 
-   ptr = radix_tree_lookup(>tree, enum_id);
+   ptr = xa_load(>array, enum_id);
if (ptr) {
irq = ptr - intc_irq_xlate;
break;
@@ -148,22 +139,16 @@ static void __init intc_subgroup_init_one(struct 
intc_desc *desc,
 {
struct intc_map_entry *mapped;
unsigned int pirq;
-   unsigned long flags;
int i;
 
-   mapped = radix_tree_lookup(>tree, subgroup->parent_id);
-   if (!mapped) {
-   WARN_ON(1);
+   mapped = xa_load(>array, subgroup->parent_id);
+   if (WARN_ON(!mapped))
return;
-   }
 
pirq = mapped - intc_irq_xlate;
 
-   raw_spin_lock_irqsave(>lock, flags);
-
for (i = 0; i < ARRAY_SIZE(subgroup->enum_ids); i++) {
struct intc_subgroup_entry *entry;
-   int err;
 
if (!subgroup->enum_ids[i])
continue;
@@ -176,15 +161,14 @@ static void __init intc_subgroup_init_one(struct 
intc_desc *desc,
entry->enum_id = subgroup->enum_ids[i];
entry->handle = intc_subgroup_data(subgroup, d, i);
 
-

[PATCH v6 78/99] sh: intc: Convert to XArray

2018-01-17 Thread Matthew Wilcox

From: Matthew Wilcox 

The radix tree was being protected by a raw spinlock.  I believe that
was not necessary, and the new internal regular spinlock will be
adequate for this array.

Signed-off-by: Matthew Wilcox 
---
 drivers/sh/intc/core.c  |  9 ++
 drivers/sh/intc/internals.h |  5 ++--
 drivers/sh/intc/virq.c  | 72 +
 3 files changed, 25 insertions(+), 61 deletions(-)

diff --git a/drivers/sh/intc/core.c b/drivers/sh/intc/core.c
index 8e72bcbd3d6d..356a423d9dcb 100644
--- a/drivers/sh/intc/core.c
+++ b/drivers/sh/intc/core.c
@@ -30,7 +30,6 @@
 #include 
 #include 
 #include 
-#include 
 #include 
 #include 
 #include "internals.h"
@@ -78,11 +77,8 @@ static void __init intc_register_irq(struct intc_desc *desc,
struct intc_handle_int *hp;
struct irq_data *irq_data;
unsigned int data[2], primary;
-   unsigned long flags;
 
-   raw_spin_lock_irqsave(_big_lock, flags);
-   radix_tree_insert(>tree, enum_id, intc_irq_xlate_get(irq));
-   raw_spin_unlock_irqrestore(_big_lock, flags);
+   xa_store(>array, enum_id, intc_irq_xlate_get(irq), GFP_ATOMIC);
 
/*
 * Prefer single interrupt source bitmap over other combinations:
@@ -196,8 +192,7 @@ int __init register_intc_controller(struct intc_desc *desc)
INIT_LIST_HEAD(>list);
list_add_tail(>list, _list);
 
-   raw_spin_lock_init(>lock);
-   INIT_RADIX_TREE(>tree, GFP_ATOMIC);
+   xa_init(>array);
 
d->index = nr_intc_controllers;
 
diff --git a/drivers/sh/intc/internals.h b/drivers/sh/intc/internals.h
index fa73c173b56a..9b6fd07e99a6 100644
--- a/drivers/sh/intc/internals.h
+++ b/drivers/sh/intc/internals.h
@@ -5,7 +5,7 @@
 #include 
 #include 
 #include 
-#include 
+#include 
 #include 
 
 #define _INTC_MK(fn, mode, addr_e, addr_d, width, shift) \
@@ -54,8 +54,7 @@ struct intc_subgroup_entry {
 struct intc_desc_int {
struct list_head list;
struct device dev;
-   struct radix_tree_root tree;
-   raw_spinlock_t lock;
+   struct xarray array;
unsigned int index;
unsigned long *reg;
 #ifdef CONFIG_SMP
diff --git a/drivers/sh/intc/virq.c b/drivers/sh/intc/virq.c
index a638c3048207..801c9c8b7556 100644
--- a/drivers/sh/intc/virq.c
+++ b/drivers/sh/intc/virq.c
@@ -12,7 +12,6 @@
 #include 
 #include 
 #include 
-#include 
 #include 
 #include 
 #include "internals.h"
@@ -27,10 +26,7 @@ struct intc_virq_list {
 #define for_each_virq(entry, head) \
for (entry = head; entry; entry = entry->next)
 
-/*
- * Tags for the radix tree
- */
-#define INTC_TAG_VIRQ_NEEDS_ALLOC  0
+#define INTC_TAG_VIRQ_NEEDS_ALLOC  XA_TAG_0
 
 void intc_irq_xlate_set(unsigned int irq, intc_enum id, struct intc_desc_int 
*d)
 {
@@ -54,23 +50,18 @@ int intc_irq_lookup(const char *chipname, intc_enum enum_id)
int irq = -1;
 
list_for_each_entry(d, _list, list) {
-   int tagged;
-
if (strcmp(d->chip.name, chipname) != 0)
continue;
 
/*
 * Catch early lookups for subgroup VIRQs that have not
-* yet been allocated an IRQ. This already includes a
-* fast-path out if the tree is untagged, so there is no
-* need to explicitly test the root tree.
+* yet been allocated an IRQ.
 */
-   tagged = radix_tree_tag_get(>tree, enum_id,
-   INTC_TAG_VIRQ_NEEDS_ALLOC);
-   if (unlikely(tagged))
+   if (unlikely(xa_get_tag(>array, enum_id,
+   INTC_TAG_VIRQ_NEEDS_ALLOC)))
break;
 
-   ptr = radix_tree_lookup(>tree, enum_id);
+   ptr = xa_load(>array, enum_id);
if (ptr) {
irq = ptr - intc_irq_xlate;
break;
@@ -148,22 +139,16 @@ static void __init intc_subgroup_init_one(struct 
intc_desc *desc,
 {
struct intc_map_entry *mapped;
unsigned int pirq;
-   unsigned long flags;
int i;
 
-   mapped = radix_tree_lookup(>tree, subgroup->parent_id);
-   if (!mapped) {
-   WARN_ON(1);
+   mapped = xa_load(>array, subgroup->parent_id);
+   if (WARN_ON(!mapped))
return;
-   }
 
pirq = mapped - intc_irq_xlate;
 
-   raw_spin_lock_irqsave(>lock, flags);
-
for (i = 0; i < ARRAY_SIZE(subgroup->enum_ids); i++) {
struct intc_subgroup_entry *entry;
-   int err;
 
if (!subgroup->enum_ids[i])
continue;
@@ -176,15 +161,14 @@ static void __init intc_subgroup_init_one(struct 
intc_desc *desc,
entry->enum_id = subgroup->enum_ids[i];
entry->handle = intc_subgroup_data(subgroup, d, i);
 
-   err = radix_tree_insert(>tree,

[PATCH v6 85/99] btrfs: Remove unused spinlock

2018-01-17 Thread Matthew Wilcox

From: Matthew Wilcox 

The reada_lock in struct btrfs_device was only initialised, and not
actually used.  That's good because there's another lock also called
reada_lock in the btrfs_fs_info that was quite heavily used.  Remove
this one.

Signed-off-by: Matthew Wilcox 
---
 fs/btrfs/volumes.c | 1 -
 fs/btrfs/volumes.h | 1 -
 2 files changed, 2 deletions(-)

diff --git a/fs/btrfs/volumes.c b/fs/btrfs/volumes.c
index a25684287501..cba286183ff9 100644
--- a/fs/btrfs/volumes.c
+++ b/fs/btrfs/volumes.c
@@ -244,7 +244,6 @@ static struct btrfs_device *__alloc_device(void)
 
spin_lock_init(>io_lock);
 
-   spin_lock_init(>reada_lock);
atomic_set(>reada_in_flight, 0);
atomic_set(>dev_stats_ccnt, 0);
btrfs_device_data_ordered_init(dev);
diff --git a/fs/btrfs/volumes.h b/fs/btrfs/volumes.h
index ff15208344a7..335fd1590458 100644
--- a/fs/btrfs/volumes.h
+++ b/fs/btrfs/volumes.h
@@ -136,7 +136,6 @@ struct btrfs_device {
struct work_struct rcu_work;
 
/* readahead state */
-   spinlock_t reada_lock;
atomic_t reada_in_flight;
u64 reada_next;
struct reada_zone *reada_curr_zone;
-- 
2.15.1

[PATCH v6 84/99] btrfs: Convert fs_roots_radix to XArray

2018-01-17 Thread Matthew Wilcox

From: Matthew Wilcox 

Most of the gang lookups being done can be expressed just as efficiently
and somewhat more naturally as xa_for_each() loops.  I opted not to
change the one in btrfs_cleanup_fs_roots() as it's using SRCU which is
subtle and quick to anger.

Signed-off-by: Matthew Wilcox 
---
 fs/btrfs/ctree.h |  3 +-
 fs/btrfs/disk-io.c   | 65 +++--
 fs/btrfs/tests/btrfs-tests.c |  3 +-
 fs/btrfs/transaction.c   | 87 ++--
 4 files changed, 59 insertions(+), 99 deletions(-)

diff --git a/fs/btrfs/ctree.h b/fs/btrfs/ctree.h
index 13c260b525a1..173d72dfaab6 100644
--- a/fs/btrfs/ctree.h
+++ b/fs/btrfs/ctree.h
@@ -741,8 +741,7 @@ struct btrfs_fs_info {
/* the log root tree is a directory of all the other log roots */
struct btrfs_root *log_root_tree;
 
-   spinlock_t fs_roots_radix_lock;
-   struct radix_tree_root fs_roots_radix;
+   struct xarray fs_roots;
 
/* block group cache stuff */
spinlock_t block_group_cache_lock;
diff --git a/fs/btrfs/disk-io.c b/fs/btrfs/disk-io.c
index a8ecccfc36de..62995a55d112 100644
--- a/fs/btrfs/disk-io.c
+++ b/fs/btrfs/disk-io.c
@@ -1519,13 +1519,7 @@ int btrfs_init_fs_root(struct btrfs_root *root)
 struct btrfs_root *btrfs_lookup_fs_root(struct btrfs_fs_info *fs_info,
u64 root_id)
 {
-   struct btrfs_root *root;
-
-   spin_lock(_info->fs_roots_radix_lock);
-   root = radix_tree_lookup(_info->fs_roots_radix,
-(unsigned long)root_id);
-   spin_unlock(_info->fs_roots_radix_lock);
-   return root;
+   return xa_load(_info->fs_roots, (unsigned long)root_id);
 }
 
 int btrfs_insert_fs_root(struct btrfs_fs_info *fs_info,
@@ -1533,18 +1527,13 @@ int btrfs_insert_fs_root(struct btrfs_fs_info *fs_info,
 {
int ret;
 
-   ret = radix_tree_preload(GFP_NOFS);
-   if (ret)
-   return ret;
-
-   spin_lock(_info->fs_roots_radix_lock);
-   ret = radix_tree_insert(_info->fs_roots_radix,
+   xa_lock(_info->fs_roots);
+   ret = __xa_insert(_info->fs_roots,
(unsigned long)root->root_key.objectid,
-   root);
+   root, GFP_NOFS);
if (ret == 0)
set_bit(BTRFS_ROOT_IN_RADIX, >state);
-   spin_unlock(_info->fs_roots_radix_lock);
-   radix_tree_preload_end();
+   xa_unlock(_info->fs_roots);
 
return ret;
 }
@@ -2079,33 +2068,25 @@ static void free_root_pointers(struct btrfs_fs_info 
*info, int chunk_root)
 
 void btrfs_free_fs_roots(struct btrfs_fs_info *fs_info)
 {
-   int ret;
-   struct btrfs_root *gang[8];
-   int i;
+   struct btrfs_root *root;
+   unsigned long i = 0;
 
while (!list_empty(_info->dead_roots)) {
-   gang[0] = list_entry(fs_info->dead_roots.next,
+   root = list_entry(fs_info->dead_roots.next,
 struct btrfs_root, root_list);
-   list_del([0]->root_list);
+   list_del(>root_list);
 
-   if (test_bit(BTRFS_ROOT_IN_RADIX, [0]->state)) {
-   btrfs_drop_and_free_fs_root(fs_info, gang[0]);
+   if (test_bit(BTRFS_ROOT_IN_RADIX, >state)) {
+   btrfs_drop_and_free_fs_root(fs_info, root);
} else {
-   free_extent_buffer(gang[0]->node);
-   free_extent_buffer(gang[0]->commit_root);
-   btrfs_put_fs_root(gang[0]);
+   free_extent_buffer(root->node);
+   free_extent_buffer(root->commit_root);
+   btrfs_put_fs_root(root);
}
}
 
-   while (1) {
-   ret = radix_tree_gang_lookup(_info->fs_roots_radix,
-(void **)gang, 0,
-ARRAY_SIZE(gang));
-   if (!ret)
-   break;
-   for (i = 0; i < ret; i++)
-   btrfs_drop_and_free_fs_root(fs_info, gang[i]);
-   }
+   xa_for_each(_info->fs_roots, root, i, ULONG_MAX, XA_PRESENT)
+   btrfs_drop_and_free_fs_root(fs_info, root);
 
if (test_bit(BTRFS_FS_STATE_ERROR, _info->fs_state)) {
btrfs_free_log_root_tree(NULL, fs_info);
@@ -2447,7 +2428,7 @@ int open_ctree(struct super_block *sb,
goto fail_delalloc_bytes;
}
 
-   INIT_RADIX_TREE(_info->fs_roots_radix, GFP_ATOMIC);
+   xa_init(_info->fs_roots);
INIT_RADIX_TREE(_info->buffer_radix, GFP_ATOMIC);
INIT_LIST_HEAD(_info->trans_list);
INIT_LIST_HEAD(_info->dead_roots);
@@ -2456,7 +2437,6 @@ int open_ctree(struct super_block *sb,

[PATCH v6 85/99] btrfs: Remove unused spinlock

2018-01-17 Thread Matthew Wilcox

From: Matthew Wilcox 

The reada_lock in struct btrfs_device was only initialised, and not
actually used.  That's good because there's another lock also called
reada_lock in the btrfs_fs_info that was quite heavily used.  Remove
this one.

Signed-off-by: Matthew Wilcox 
---
 fs/btrfs/volumes.c | 1 -
 fs/btrfs/volumes.h | 1 -
 2 files changed, 2 deletions(-)

diff --git a/fs/btrfs/volumes.c b/fs/btrfs/volumes.c
index a25684287501..cba286183ff9 100644
--- a/fs/btrfs/volumes.c
+++ b/fs/btrfs/volumes.c
@@ -244,7 +244,6 @@ static struct btrfs_device *__alloc_device(void)
 
spin_lock_init(>io_lock);
 
-   spin_lock_init(>reada_lock);
atomic_set(>reada_in_flight, 0);
atomic_set(>dev_stats_ccnt, 0);
btrfs_device_data_ordered_init(dev);
diff --git a/fs/btrfs/volumes.h b/fs/btrfs/volumes.h
index ff15208344a7..335fd1590458 100644
--- a/fs/btrfs/volumes.h
+++ b/fs/btrfs/volumes.h
@@ -136,7 +136,6 @@ struct btrfs_device {
struct work_struct rcu_work;
 
/* readahead state */
-   spinlock_t reada_lock;
atomic_t reada_in_flight;
u64 reada_next;
struct reada_zone *reada_curr_zone;
-- 
2.15.1

[PATCH v6 84/99] btrfs: Convert fs_roots_radix to XArray

2018-01-17 Thread Matthew Wilcox

From: Matthew Wilcox 

Most of the gang lookups being done can be expressed just as efficiently
and somewhat more naturally as xa_for_each() loops.  I opted not to
change the one in btrfs_cleanup_fs_roots() as it's using SRCU which is
subtle and quick to anger.

Signed-off-by: Matthew Wilcox 
---
 fs/btrfs/ctree.h |  3 +-
 fs/btrfs/disk-io.c   | 65 +++--
 fs/btrfs/tests/btrfs-tests.c |  3 +-
 fs/btrfs/transaction.c   | 87 ++--
 4 files changed, 59 insertions(+), 99 deletions(-)

diff --git a/fs/btrfs/ctree.h b/fs/btrfs/ctree.h
index 13c260b525a1..173d72dfaab6 100644
--- a/fs/btrfs/ctree.h
+++ b/fs/btrfs/ctree.h
@@ -741,8 +741,7 @@ struct btrfs_fs_info {
/* the log root tree is a directory of all the other log roots */
struct btrfs_root *log_root_tree;
 
-   spinlock_t fs_roots_radix_lock;
-   struct radix_tree_root fs_roots_radix;
+   struct xarray fs_roots;
 
/* block group cache stuff */
spinlock_t block_group_cache_lock;
diff --git a/fs/btrfs/disk-io.c b/fs/btrfs/disk-io.c
index a8ecccfc36de..62995a55d112 100644
--- a/fs/btrfs/disk-io.c
+++ b/fs/btrfs/disk-io.c
@@ -1519,13 +1519,7 @@ int btrfs_init_fs_root(struct btrfs_root *root)
 struct btrfs_root *btrfs_lookup_fs_root(struct btrfs_fs_info *fs_info,
u64 root_id)
 {
-   struct btrfs_root *root;
-
-   spin_lock(_info->fs_roots_radix_lock);
-   root = radix_tree_lookup(_info->fs_roots_radix,
-(unsigned long)root_id);
-   spin_unlock(_info->fs_roots_radix_lock);
-   return root;
+   return xa_load(_info->fs_roots, (unsigned long)root_id);
 }
 
 int btrfs_insert_fs_root(struct btrfs_fs_info *fs_info,
@@ -1533,18 +1527,13 @@ int btrfs_insert_fs_root(struct btrfs_fs_info *fs_info,
 {
int ret;
 
-   ret = radix_tree_preload(GFP_NOFS);
-   if (ret)
-   return ret;
-
-   spin_lock(_info->fs_roots_radix_lock);
-   ret = radix_tree_insert(_info->fs_roots_radix,
+   xa_lock(_info->fs_roots);
+   ret = __xa_insert(_info->fs_roots,
(unsigned long)root->root_key.objectid,
-   root);
+   root, GFP_NOFS);
if (ret == 0)
set_bit(BTRFS_ROOT_IN_RADIX, >state);
-   spin_unlock(_info->fs_roots_radix_lock);
-   radix_tree_preload_end();
+   xa_unlock(_info->fs_roots);
 
return ret;
 }
@@ -2079,33 +2068,25 @@ static void free_root_pointers(struct btrfs_fs_info 
*info, int chunk_root)
 
 void btrfs_free_fs_roots(struct btrfs_fs_info *fs_info)
 {
-   int ret;
-   struct btrfs_root *gang[8];
-   int i;
+   struct btrfs_root *root;
+   unsigned long i = 0;
 
while (!list_empty(_info->dead_roots)) {
-   gang[0] = list_entry(fs_info->dead_roots.next,
+   root = list_entry(fs_info->dead_roots.next,
 struct btrfs_root, root_list);
-   list_del([0]->root_list);
+   list_del(>root_list);
 
-   if (test_bit(BTRFS_ROOT_IN_RADIX, [0]->state)) {
-   btrfs_drop_and_free_fs_root(fs_info, gang[0]);
+   if (test_bit(BTRFS_ROOT_IN_RADIX, >state)) {
+   btrfs_drop_and_free_fs_root(fs_info, root);
} else {
-   free_extent_buffer(gang[0]->node);
-   free_extent_buffer(gang[0]->commit_root);
-   btrfs_put_fs_root(gang[0]);
+   free_extent_buffer(root->node);
+   free_extent_buffer(root->commit_root);
+   btrfs_put_fs_root(root);
}
}
 
-   while (1) {
-   ret = radix_tree_gang_lookup(_info->fs_roots_radix,
-(void **)gang, 0,
-ARRAY_SIZE(gang));
-   if (!ret)
-   break;
-   for (i = 0; i < ret; i++)
-   btrfs_drop_and_free_fs_root(fs_info, gang[i]);
-   }
+   xa_for_each(_info->fs_roots, root, i, ULONG_MAX, XA_PRESENT)
+   btrfs_drop_and_free_fs_root(fs_info, root);
 
if (test_bit(BTRFS_FS_STATE_ERROR, _info->fs_state)) {
btrfs_free_log_root_tree(NULL, fs_info);
@@ -2447,7 +2428,7 @@ int open_ctree(struct super_block *sb,
goto fail_delalloc_bytes;
}
 
-   INIT_RADIX_TREE(_info->fs_roots_radix, GFP_ATOMIC);
+   xa_init(_info->fs_roots);
INIT_RADIX_TREE(_info->buffer_radix, GFP_ATOMIC);
INIT_LIST_HEAD(_info->trans_list);
INIT_LIST_HEAD(_info->dead_roots);
@@ -2456,7 +2437,6 @@ int open_ctree(struct super_block *sb,
INIT_LIST_HEAD(_info->caching_block_groups);

[PATCH v6 28/99] page cache: Remove stray radix comment

2018-01-17 Thread Matthew Wilcox

From: Matthew Wilcox 

Signed-off-by: Matthew Wilcox 
---
 mm/filemap.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/mm/filemap.c b/mm/filemap.c
index d2a0031d61f5..2536fcacb5bc 100644
--- a/mm/filemap.c
+++ b/mm/filemap.c
@@ -2606,7 +2606,7 @@ static struct page *do_read_cache_page(struct 
address_space *mapping,
put_page(page);
if (err == -EEXIST)
goto repeat;
-   /* Presumably ENOMEM for radix tree node */
+   /* Presumably ENOMEM for xarray node */
return ERR_PTR(err);
}
 
-- 
2.15.1

[PATCH v6 83/99] hwspinlock: Convert to XArray

2018-01-17 Thread Matthew Wilcox

From: Matthew Wilcox 

I had to mess with the locking a bit as I converted the code from a
mutex to the xa_lock.

Signed-off-by: Matthew Wilcox 
---
 drivers/hwspinlock/hwspinlock_core.c | 151 ---
 1 file changed, 52 insertions(+), 99 deletions(-)

diff --git a/drivers/hwspinlock/hwspinlock_core.c 
b/drivers/hwspinlock/hwspinlock_core.c
index 4074441444fe..acb6e315925f 100644
--- a/drivers/hwspinlock/hwspinlock_core.c
+++ b/drivers/hwspinlock/hwspinlock_core.c
@@ -23,43 +23,32 @@
 #include 
 #include 
 #include 
-#include 
+#include 
 #include 
 #include 
-#include 
 #include 
 
 #include "hwspinlock_internal.h"
 
-/* radix tree tags */
-#define HWSPINLOCK_UNUSED  (0) /* tags an hwspinlock as unused */
+#define HWSPINLOCK_UNUSED  XA_TAG_0
 
 /*
- * A radix tree is used to maintain the available hwspinlock instances.
- * The tree associates hwspinlock pointers with their integer key id,
+ * An xarray is used to maintain the available hwspinlock instances.
+ * The array associates hwspinlock pointers with their integer key id,
  * and provides easy-to-use API which makes the hwspinlock core code simple
  * and easy to read.
  *
- * Radix trees are quick on lookups, and reasonably efficient in terms of
+ * The XArray is quick on lookups, and reasonably efficient in terms of
  * storage, especially with high density usages such as this framework
  * requires (a continuous range of integer keys, beginning with zero, is
  * used as the ID's of the hwspinlock instances).
  *
- * The radix tree API supports tagging items in the tree, which this
- * framework uses to mark unused hwspinlock instances (see the
- * HWSPINLOCK_UNUSED tag above). As a result, the process of querying the
- * tree, looking for an unused hwspinlock instance, is now reduced to a
- * single radix tree API call.
+ * The xarray API supports tagging items, which this framework uses to mark
+ * unused hwspinlock instances (see the HWSPINLOCK_UNUSED tag above). As a
+ * result, the process of querying the array, looking for an unused
+ * hwspinlock instance, is reduced to a single call.
  */
-static RADIX_TREE(hwspinlock_tree, GFP_KERNEL);
-
-/*
- * Synchronization of access to the tree is achieved using this mutex,
- * as the radix-tree API requires that users provide all synchronisation.
- * A mutex is needed because we're using non-atomic radix tree allocations.
- */
-static DEFINE_MUTEX(hwspinlock_tree_lock);
-
+static DEFINE_XARRAY(hwspinlock_xa);
 
 /**
  * __hwspin_trylock() - attempt to lock a specific hwspinlock
@@ -294,10 +283,9 @@ of_hwspin_lock_simple_xlate(const struct of_phandle_args 
*hwlock_spec)
  */
 int of_hwspin_lock_get_id(struct device_node *np, int index)
 {
+   XA_STATE(xas, _xa, 0);
struct of_phandle_args args;
struct hwspinlock *hwlock;
-   struct radix_tree_iter iter;
-   void **slot;
int id;
int ret;
 
@@ -309,22 +297,15 @@ int of_hwspin_lock_get_id(struct device_node *np, int 
index)
/* Find the hwspinlock device: we need its base_id */
ret = -EPROBE_DEFER;
rcu_read_lock();
-   radix_tree_for_each_slot(slot, _tree, , 0) {
-   hwlock = radix_tree_deref_slot(slot);
-   if (unlikely(!hwlock))
-   continue;
-   if (radix_tree_deref_retry(hwlock)) {
-   slot = radix_tree_iter_retry();
+   xas_for_each(, hwlock, ULONG_MAX) {
+   if (xas_retry(, hwlock))
continue;
-   }
 
-   if (hwlock->bank->dev->of_node == args.np) {
-   ret = 0;
+   if (hwlock->bank->dev->of_node == args.np)
break;
-   }
}
rcu_read_unlock();
-   if (ret < 0)
+   if (!hwlock)
goto out;
 
id = of_hwspin_lock_simple_xlate();
@@ -332,6 +313,7 @@ int of_hwspin_lock_get_id(struct device_node *np, int index)
ret = -EINVAL;
goto out;
}
+   ret = 0;
id += hwlock->bank->base_id;
 
 out:
@@ -342,26 +324,19 @@ EXPORT_SYMBOL_GPL(of_hwspin_lock_get_id);
 
 static int hwspin_lock_register_single(struct hwspinlock *hwlock, int id)
 {
-   struct hwspinlock *tmp;
-   int ret;
+   void *curr;
 
-   mutex_lock(_tree_lock);
-
-   ret = radix_tree_insert(_tree, id, hwlock);
-   if (ret) {
-   if (ret == -EEXIST)
+   curr = xa_cmpxchg(_xa, id, NULL, hwlock, GFP_KERNEL);
+   if (curr) {
+   if (!xa_is_err(curr))
pr_err("hwspinlock id %d already exists!\n", id);
goto out;
}
 
/* mark this hwspinlock as available */
-   tmp = radix_tree_tag_set(_tree, id, HWSPINLOCK_UNUSED);
-
-   /* self-sanity check which should never fail */
-   WARN_ON(tmp != hwlock);
+   xa_set_tag(_xa, id,

Re: [RFC 1/2] softirq: Defer net rx/tx processing to ksoftirqd context

2018-01-17 Thread David Miller

From: Mike Galbraith 
Date: Fri, 12 Jan 2018 19:44:30 +0100

> On Fri, 2018-01-12 at 19:15 +0100, Mike Galbraith wrote:
>> On Fri, 2018-01-12 at 09:51 -0800, Linus Torvalds wrote:
>> > On Fri, Jan 12, 2018 at 9:44 AM, Mike Galbraith  wrote:
>> > >
>> > > Nah, a misunderstanding happened.  RT that still offers full threading
>> > > creates per-softirq threads per cpu.  The regular trees split ksoftirqd
>> > > into only two threads per cpu, one processes timer/hrtimer softriqs,
>> > > the other processes the rest.
>> > 
>> > Ok, that sounds like it should work, but it also sounds like it's very
>> > specific to RT itself.
>> > 
>> > For example, the dvb issue was not about the timer softirqs, but about
>> > the tasklet ones.
>> > 
>> > So maybe we wouldn't need to split it for _every_ softirq, but we'd
>> > need to split it more than just along the timer case.
>> > 
>> > And it does sound a bit excessive to have ten fixed threads for every
>> > CPU. The days when tens of CPU's meant "huge system" are gone. These
>> > days it can be a phone.
>> 
>> Yeah, it is excessive more often than not.  You get to prioritize, and
>> segregate, which is nice, but you pay for it.
> 
> BTW, much of the softirq load in RT is processed by the raising task.
> 
>   tbench_srv-6985  [000] d...112   293.902511: softirq_raise: vec=3 
> [action=NET_RX]
>   tbench_srv-6985  [000] .13   293.902511: softirq_entry: vec=3 
> [action=NET_RX]
>   tbench_srv-6985  [000] .13   293.902515: softirq_exit: vec=3 
> [action=NET_RX]
>   tbench-6984  [003] d...112   293.902520: softirq_raise: vec=3 
> [action=NET_RX]
>   tbench-6984  [003] .13   293.902520: softirq_entry: vec=3 
> [action=NET_RX]
>   tbench-6984  [003] .13   293.902523: softirq_exit: vec=3 
> [action=NET_RX]

And this is because tbench runs over loopback, which triggers softirqs in
the context of whatever generates the loopback packet transmit.  Which in
this case is the tbench process calling sendmsg().

I wanted to chime in about this earlier, and make it clear that it isn't
just IRQs that can trigger softirqs.  User context actions in the kernel
can trigger softirqs too.

[PATCH v6 28/99] page cache: Remove stray radix comment

2018-01-17 Thread Matthew Wilcox

From: Matthew Wilcox 

Signed-off-by: Matthew Wilcox 
---
 mm/filemap.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/mm/filemap.c b/mm/filemap.c
index d2a0031d61f5..2536fcacb5bc 100644
--- a/mm/filemap.c
+++ b/mm/filemap.c
@@ -2606,7 +2606,7 @@ static struct page *do_read_cache_page(struct 
address_space *mapping,
put_page(page);
if (err == -EEXIST)
goto repeat;
-   /* Presumably ENOMEM for radix tree node */
+   /* Presumably ENOMEM for xarray node */
return ERR_PTR(err);
}
 
-- 
2.15.1

[PATCH v6 83/99] hwspinlock: Convert to XArray

2018-01-17 Thread Matthew Wilcox

From: Matthew Wilcox 

I had to mess with the locking a bit as I converted the code from a
mutex to the xa_lock.

Signed-off-by: Matthew Wilcox 
---
 drivers/hwspinlock/hwspinlock_core.c | 151 ---
 1 file changed, 52 insertions(+), 99 deletions(-)

diff --git a/drivers/hwspinlock/hwspinlock_core.c 
b/drivers/hwspinlock/hwspinlock_core.c
index 4074441444fe..acb6e315925f 100644
--- a/drivers/hwspinlock/hwspinlock_core.c
+++ b/drivers/hwspinlock/hwspinlock_core.c
@@ -23,43 +23,32 @@
 #include 
 #include 
 #include 
-#include 
+#include 
 #include 
 #include 
-#include 
 #include 
 
 #include "hwspinlock_internal.h"
 
-/* radix tree tags */
-#define HWSPINLOCK_UNUSED  (0) /* tags an hwspinlock as unused */
+#define HWSPINLOCK_UNUSED  XA_TAG_0
 
 /*
- * A radix tree is used to maintain the available hwspinlock instances.
- * The tree associates hwspinlock pointers with their integer key id,
+ * An xarray is used to maintain the available hwspinlock instances.
+ * The array associates hwspinlock pointers with their integer key id,
  * and provides easy-to-use API which makes the hwspinlock core code simple
  * and easy to read.
  *
- * Radix trees are quick on lookups, and reasonably efficient in terms of
+ * The XArray is quick on lookups, and reasonably efficient in terms of
  * storage, especially with high density usages such as this framework
  * requires (a continuous range of integer keys, beginning with zero, is
  * used as the ID's of the hwspinlock instances).
  *
- * The radix tree API supports tagging items in the tree, which this
- * framework uses to mark unused hwspinlock instances (see the
- * HWSPINLOCK_UNUSED tag above). As a result, the process of querying the
- * tree, looking for an unused hwspinlock instance, is now reduced to a
- * single radix tree API call.
+ * The xarray API supports tagging items, which this framework uses to mark
+ * unused hwspinlock instances (see the HWSPINLOCK_UNUSED tag above). As a
+ * result, the process of querying the array, looking for an unused
+ * hwspinlock instance, is reduced to a single call.
  */
-static RADIX_TREE(hwspinlock_tree, GFP_KERNEL);
-
-/*
- * Synchronization of access to the tree is achieved using this mutex,
- * as the radix-tree API requires that users provide all synchronisation.
- * A mutex is needed because we're using non-atomic radix tree allocations.
- */
-static DEFINE_MUTEX(hwspinlock_tree_lock);
-
+static DEFINE_XARRAY(hwspinlock_xa);
 
 /**
  * __hwspin_trylock() - attempt to lock a specific hwspinlock
@@ -294,10 +283,9 @@ of_hwspin_lock_simple_xlate(const struct of_phandle_args 
*hwlock_spec)
  */
 int of_hwspin_lock_get_id(struct device_node *np, int index)
 {
+   XA_STATE(xas, _xa, 0);
struct of_phandle_args args;
struct hwspinlock *hwlock;
-   struct radix_tree_iter iter;
-   void **slot;
int id;
int ret;
 
@@ -309,22 +297,15 @@ int of_hwspin_lock_get_id(struct device_node *np, int 
index)
/* Find the hwspinlock device: we need its base_id */
ret = -EPROBE_DEFER;
rcu_read_lock();
-   radix_tree_for_each_slot(slot, _tree, , 0) {
-   hwlock = radix_tree_deref_slot(slot);
-   if (unlikely(!hwlock))
-   continue;
-   if (radix_tree_deref_retry(hwlock)) {
-   slot = radix_tree_iter_retry();
+   xas_for_each(, hwlock, ULONG_MAX) {
+   if (xas_retry(, hwlock))
continue;
-   }
 
-   if (hwlock->bank->dev->of_node == args.np) {
-   ret = 0;
+   if (hwlock->bank->dev->of_node == args.np)
break;
-   }
}
rcu_read_unlock();
-   if (ret < 0)
+   if (!hwlock)
goto out;
 
id = of_hwspin_lock_simple_xlate();
@@ -332,6 +313,7 @@ int of_hwspin_lock_get_id(struct device_node *np, int index)
ret = -EINVAL;
goto out;
}
+   ret = 0;
id += hwlock->bank->base_id;
 
 out:
@@ -342,26 +324,19 @@ EXPORT_SYMBOL_GPL(of_hwspin_lock_get_id);
 
 static int hwspin_lock_register_single(struct hwspinlock *hwlock, int id)
 {
-   struct hwspinlock *tmp;
-   int ret;
+   void *curr;
 
-   mutex_lock(_tree_lock);
-
-   ret = radix_tree_insert(_tree, id, hwlock);
-   if (ret) {
-   if (ret == -EEXIST)
+   curr = xa_cmpxchg(_xa, id, NULL, hwlock, GFP_KERNEL);
+   if (curr) {
+   if (!xa_is_err(curr))
pr_err("hwspinlock id %d already exists!\n", id);
goto out;
}
 
/* mark this hwspinlock as available */
-   tmp = radix_tree_tag_set(_tree, id, HWSPINLOCK_UNUSED);
-
-   /* self-sanity check which should never fail */
-   WARN_ON(tmp != hwlock);
+   xa_set_tag(_xa, id, HWSPINLOCK_UNUSED);
 
 out:
-   mutex_unlock(_tree_lock);

Re: [RFC 1/2] softirq: Defer net rx/tx processing to ksoftirqd context

2018-01-17 Thread David Miller

From: Mike Galbraith 
Date: Fri, 12 Jan 2018 19:44:30 +0100

> On Fri, 2018-01-12 at 19:15 +0100, Mike Galbraith wrote:
>> On Fri, 2018-01-12 at 09:51 -0800, Linus Torvalds wrote:
>> > On Fri, Jan 12, 2018 at 9:44 AM, Mike Galbraith  wrote:
>> > >
>> > > Nah, a misunderstanding happened.  RT that still offers full threading
>> > > creates per-softirq threads per cpu.  The regular trees split ksoftirqd
>> > > into only two threads per cpu, one processes timer/hrtimer softriqs,
>> > > the other processes the rest.
>> > 
>> > Ok, that sounds like it should work, but it also sounds like it's very
>> > specific to RT itself.
>> > 
>> > For example, the dvb issue was not about the timer softirqs, but about
>> > the tasklet ones.
>> > 
>> > So maybe we wouldn't need to split it for _every_ softirq, but we'd
>> > need to split it more than just along the timer case.
>> > 
>> > And it does sound a bit excessive to have ten fixed threads for every
>> > CPU. The days when tens of CPU's meant "huge system" are gone. These
>> > days it can be a phone.
>> 
>> Yeah, it is excessive more often than not.  You get to prioritize, and
>> segregate, which is nice, but you pay for it.
> 
> BTW, much of the softirq load in RT is processed by the raising task.
> 
>   tbench_srv-6985  [000] d...112   293.902511: softirq_raise: vec=3 
> [action=NET_RX]
>   tbench_srv-6985  [000] .13   293.902511: softirq_entry: vec=3 
> [action=NET_RX]
>   tbench_srv-6985  [000] .13   293.902515: softirq_exit: vec=3 
> [action=NET_RX]
>   tbench-6984  [003] d...112   293.902520: softirq_raise: vec=3 
> [action=NET_RX]
>   tbench-6984  [003] .13   293.902520: softirq_entry: vec=3 
> [action=NET_RX]
>   tbench-6984  [003] .13   293.902523: softirq_exit: vec=3 
> [action=NET_RX]

And this is because tbench runs over loopback, which triggers softirqs in
the context of whatever generates the loopback packet transmit.  Which in
this case is the tbench process calling sendmsg().

I wanted to chime in about this earlier, and make it clear that it isn't
just IRQs that can trigger softirqs.  User context actions in the kernel
can trigger softirqs too.

[PATCH v6 88/99] btrfs: Convert reada_tree to XArray

2018-01-17 Thread Matthew Wilcox

From: Matthew Wilcox 

Rename reada_tree to reada_array.  Use the xa_lock in reada_array to
replace reada_lock.  This has to use a nested spinlock as we take the
xa_lock of the reada_extents and reada_zones xarrays while holding
the reada_lock.

Signed-off-by: Matthew Wilcox 
---
 fs/btrfs/ctree.h   |  15 +--
 fs/btrfs/disk-io.c |   3 +-
 fs/btrfs/reada.c   | 119 +
 3 files changed, 70 insertions(+), 67 deletions(-)

diff --git a/fs/btrfs/ctree.h b/fs/btrfs/ctree.h
index 173d72dfaab6..272d099bed7e 100644
--- a/fs/btrfs/ctree.h
+++ b/fs/btrfs/ctree.h
@@ -1052,9 +1052,8 @@ struct btrfs_fs_info {
 
struct btrfs_delayed_root *delayed_root;
 
-   /* readahead tree */
-   spinlock_t reada_lock;
-   struct radix_tree_root reada_tree;
+   /* readahead extents */
+   struct xarray reada_array;
 
/* readahead works cnt */
atomic_t reada_works_cnt;
@@ -1102,6 +1101,16 @@ struct btrfs_fs_info {
 #endif
 };
 
+static inline void reada_lock(struct btrfs_fs_info *fs_info)
+{
+   spin_lock_nested(_info->reada_array.xa_lock, SINGLE_DEPTH_NESTING);
+}
+
+static inline void reada_unlock(struct btrfs_fs_info *fs_info)
+{
+   spin_unlock(_info->reada_array.xa_lock);
+}
+
 static inline struct btrfs_fs_info *btrfs_sb(struct super_block *sb)
 {
return sb->s_fs_info;
diff --git a/fs/btrfs/disk-io.c b/fs/btrfs/disk-io.c
index 62995a55d112..1eae29045d43 100644
--- a/fs/btrfs/disk-io.c
+++ b/fs/btrfs/disk-io.c
@@ -2478,8 +2478,7 @@ int open_ctree(struct super_block *sb,
fs_info->commit_interval = BTRFS_DEFAULT_COMMIT_INTERVAL;
fs_info->avg_delayed_ref_runtime = NSEC_PER_SEC >> 6; /* div by 64 */
/* readahead state */
-   INIT_RADIX_TREE(_info->reada_tree, GFP_NOFS & ~__GFP_DIRECT_RECLAIM);
-   spin_lock_init(_info->reada_lock);
+   xa_init(_info->reada_array);
btrfs_init_ref_verify(fs_info);
 
fs_info->thread_pool_size = min_t(unsigned long,
diff --git a/fs/btrfs/reada.c b/fs/btrfs/reada.c
index 8100f1565250..89ba0063903f 100644
--- a/fs/btrfs/reada.c
+++ b/fs/btrfs/reada.c
@@ -215,12 +215,11 @@ int btree_readahead_hook(struct extent_buffer *eb, int 
err)
struct reada_extent *re;
 
/* find extent */
-   spin_lock(_info->reada_lock);
-   re = radix_tree_lookup(_info->reada_tree,
-  eb->start >> PAGE_SHIFT);
+   reada_lock(fs_info);
+   re = xa_load(_info->reada_array, eb->start >> PAGE_SHIFT);
if (re)
re->refcnt++;
-   spin_unlock(_info->reada_lock);
+   reada_unlock(fs_info);
if (!re) {
ret = -1;
goto start_machine;
@@ -246,15 +245,15 @@ static struct reada_zone *reada_find_zone(struct 
btrfs_device *dev, u64 logical,
unsigned long index = logical >> PAGE_SHIFT;
int i;
 
-   spin_lock(_info->reada_lock);
+   reada_lock(fs_info);
zone = xa_find(>reada_zones, , ULONG_MAX, XA_PRESENT);
if (zone && logical >= zone->start && logical <= zone->end) {
kref_get(>refcnt);
-   spin_unlock(_info->reada_lock);
+   reada_unlock(fs_info);
return zone;
}
 
-   spin_unlock(_info->reada_lock);
+   reada_unlock(fs_info);
 
cache = btrfs_lookup_block_group(fs_info, logical);
if (!cache)
@@ -289,7 +288,7 @@ static struct reada_zone *reada_find_zone(struct 
btrfs_device *dev, u64 logical,
}
zone->ndevs = bbio->num_stripes;
 
-   spin_lock(_info->reada_lock);
+   reada_lock(fs_info);
curr = xa_cmpxchg(>reada_zones,
(unsigned long)(zone->end >> PAGE_SHIFT),
NULL, zone, GFP_NOWAIT | __GFP_NOWARN);
@@ -301,7 +300,7 @@ static struct reada_zone *reada_find_zone(struct 
btrfs_device *dev, u64 logical,
else
zone = NULL;
}
-   spin_unlock(_info->reada_lock);
+   reada_unlock(fs_info);
 
return zone;
 }
@@ -323,11 +322,11 @@ static struct reada_extent *reada_find_extent(struct 
btrfs_fs_info *fs_info,
int dev_replace_is_ongoing;
int have_zone = 0;
 
-   spin_lock(_info->reada_lock);
-   re = radix_tree_lookup(_info->reada_tree, index);
+   reada_lock(fs_info);
+   re = xa_load(_info->reada_array, index);
if (re)
re->refcnt++;
-   spin_unlock(_info->reada_lock);
+   reada_unlock(fs_info);
 
if (re)
return re;
@@ -378,38 +377,32 @@ static struct reada_extent *reada_find_extent(struct 
btrfs_fs_info *fs_info,
kref_get(>refcnt);
++zone->elems;
spin_unlock(>lock);
-   spin_lock(_info->reada_lock);
+   reada_lock(fs_info);
kref_put(>refcnt, reada_zone_release);
-

[PATCH v6 88/99] btrfs: Convert reada_tree to XArray

2018-01-17 Thread Matthew Wilcox

From: Matthew Wilcox 

Rename reada_tree to reada_array.  Use the xa_lock in reada_array to
replace reada_lock.  This has to use a nested spinlock as we take the
xa_lock of the reada_extents and reada_zones xarrays while holding
the reada_lock.

Signed-off-by: Matthew Wilcox 
---
 fs/btrfs/ctree.h   |  15 +--
 fs/btrfs/disk-io.c |   3 +-
 fs/btrfs/reada.c   | 119 +
 3 files changed, 70 insertions(+), 67 deletions(-)

diff --git a/fs/btrfs/ctree.h b/fs/btrfs/ctree.h
index 173d72dfaab6..272d099bed7e 100644
--- a/fs/btrfs/ctree.h
+++ b/fs/btrfs/ctree.h
@@ -1052,9 +1052,8 @@ struct btrfs_fs_info {
 
struct btrfs_delayed_root *delayed_root;
 
-   /* readahead tree */
-   spinlock_t reada_lock;
-   struct radix_tree_root reada_tree;
+   /* readahead extents */
+   struct xarray reada_array;
 
/* readahead works cnt */
atomic_t reada_works_cnt;
@@ -1102,6 +1101,16 @@ struct btrfs_fs_info {
 #endif
 };
 
+static inline void reada_lock(struct btrfs_fs_info *fs_info)
+{
+   spin_lock_nested(_info->reada_array.xa_lock, SINGLE_DEPTH_NESTING);
+}
+
+static inline void reada_unlock(struct btrfs_fs_info *fs_info)
+{
+   spin_unlock(_info->reada_array.xa_lock);
+}
+
 static inline struct btrfs_fs_info *btrfs_sb(struct super_block *sb)
 {
return sb->s_fs_info;
diff --git a/fs/btrfs/disk-io.c b/fs/btrfs/disk-io.c
index 62995a55d112..1eae29045d43 100644
--- a/fs/btrfs/disk-io.c
+++ b/fs/btrfs/disk-io.c
@@ -2478,8 +2478,7 @@ int open_ctree(struct super_block *sb,
fs_info->commit_interval = BTRFS_DEFAULT_COMMIT_INTERVAL;
fs_info->avg_delayed_ref_runtime = NSEC_PER_SEC >> 6; /* div by 64 */
/* readahead state */
-   INIT_RADIX_TREE(_info->reada_tree, GFP_NOFS & ~__GFP_DIRECT_RECLAIM);
-   spin_lock_init(_info->reada_lock);
+   xa_init(_info->reada_array);
btrfs_init_ref_verify(fs_info);
 
fs_info->thread_pool_size = min_t(unsigned long,
diff --git a/fs/btrfs/reada.c b/fs/btrfs/reada.c
index 8100f1565250..89ba0063903f 100644
--- a/fs/btrfs/reada.c
+++ b/fs/btrfs/reada.c
@@ -215,12 +215,11 @@ int btree_readahead_hook(struct extent_buffer *eb, int 
err)
struct reada_extent *re;
 
/* find extent */
-   spin_lock(_info->reada_lock);
-   re = radix_tree_lookup(_info->reada_tree,
-  eb->start >> PAGE_SHIFT);
+   reada_lock(fs_info);
+   re = xa_load(_info->reada_array, eb->start >> PAGE_SHIFT);
if (re)
re->refcnt++;
-   spin_unlock(_info->reada_lock);
+   reada_unlock(fs_info);
if (!re) {
ret = -1;
goto start_machine;
@@ -246,15 +245,15 @@ static struct reada_zone *reada_find_zone(struct 
btrfs_device *dev, u64 logical,
unsigned long index = logical >> PAGE_SHIFT;
int i;
 
-   spin_lock(_info->reada_lock);
+   reada_lock(fs_info);
zone = xa_find(>reada_zones, , ULONG_MAX, XA_PRESENT);
if (zone && logical >= zone->start && logical <= zone->end) {
kref_get(>refcnt);
-   spin_unlock(_info->reada_lock);
+   reada_unlock(fs_info);
return zone;
}
 
-   spin_unlock(_info->reada_lock);
+   reada_unlock(fs_info);
 
cache = btrfs_lookup_block_group(fs_info, logical);
if (!cache)
@@ -289,7 +288,7 @@ static struct reada_zone *reada_find_zone(struct 
btrfs_device *dev, u64 logical,
}
zone->ndevs = bbio->num_stripes;
 
-   spin_lock(_info->reada_lock);
+   reada_lock(fs_info);
curr = xa_cmpxchg(>reada_zones,
(unsigned long)(zone->end >> PAGE_SHIFT),
NULL, zone, GFP_NOWAIT | __GFP_NOWARN);
@@ -301,7 +300,7 @@ static struct reada_zone *reada_find_zone(struct 
btrfs_device *dev, u64 logical,
else
zone = NULL;
}
-   spin_unlock(_info->reada_lock);
+   reada_unlock(fs_info);
 
return zone;
 }
@@ -323,11 +322,11 @@ static struct reada_extent *reada_find_extent(struct 
btrfs_fs_info *fs_info,
int dev_replace_is_ongoing;
int have_zone = 0;
 
-   spin_lock(_info->reada_lock);
-   re = radix_tree_lookup(_info->reada_tree, index);
+   reada_lock(fs_info);
+   re = xa_load(_info->reada_array, index);
if (re)
re->refcnt++;
-   spin_unlock(_info->reada_lock);
+   reada_unlock(fs_info);
 
if (re)
return re;
@@ -378,38 +377,32 @@ static struct reada_extent *reada_find_extent(struct 
btrfs_fs_info *fs_info,
kref_get(>refcnt);
++zone->elems;
spin_unlock(>lock);
-   spin_lock(_info->reada_lock);
+   reada_lock(fs_info);
kref_put(>refcnt, reada_zone_release);
-   spin_unlock(_info->reada_lock);
+

[PATCH v6 87/99] btrfs: Convert reada_extents to XArray

2018-01-17 Thread Matthew Wilcox

From: Matthew Wilcox 

Straightforward conversion.

Signed-off-by: Matthew Wilcox 
---
 fs/btrfs/reada.c   | 32 +---
 fs/btrfs/volumes.c |  2 +-
 fs/btrfs/volumes.h |  2 +-
 3 files changed, 19 insertions(+), 17 deletions(-)

diff --git a/fs/btrfs/reada.c b/fs/btrfs/reada.c
index ef8e84ff2012..8100f1565250 100644
--- a/fs/btrfs/reada.c
+++ b/fs/btrfs/reada.c
@@ -438,13 +438,14 @@ static struct reada_extent *reada_find_extent(struct 
btrfs_fs_info *fs_info,
continue;
}
prev_dev = dev;
-   ret = radix_tree_insert(>reada_extents, index, re);
+   ret = xa_insert(>reada_extents, index, re,
+   GFP_NOFS & ~__GFP_DIRECT_RECLAIM);
if (ret) {
while (--nzones >= 0) {
dev = re->zones[nzones]->device;
BUG_ON(dev == NULL);
/* ignore whether the entry was inserted */
-   radix_tree_delete(>reada_extents, index);
+   xa_erase(>reada_extents, index);
}
radix_tree_delete(_info->reada_tree, index);
spin_unlock(_info->reada_lock);
@@ -504,7 +505,7 @@ static void reada_extent_put(struct btrfs_fs_info *fs_info,
for (i = 0; i < re->nzones; ++i) {
struct reada_zone *zone = re->zones[i];
 
-   radix_tree_delete(>device->reada_extents, index);
+   xa_erase(>device->reada_extents, index);
}
 
spin_unlock(_info->reada_lock);
@@ -644,6 +645,7 @@ static int reada_start_machine_dev(struct btrfs_device *dev)
int mirror_num = 0;
struct extent_buffer *eb = NULL;
u64 logical;
+   unsigned long index;
int ret;
int i;
 
@@ -660,19 +662,19 @@ static int reada_start_machine_dev(struct btrfs_device 
*dev)
 * a contiguous block of extents, we could also coagulate them or use
 * plugging to speed things up
 */
-   ret = radix_tree_gang_lookup(>reada_extents, (void **),
-dev->reada_next >> PAGE_SHIFT, 1);
-   if (ret == 0 || re->logical > dev->reada_curr_zone->end) {
+   index = dev->reada_next >> PAGE_SHIFT;
+   re = xa_find(>reada_extents, , ULONG_MAX, XA_PRESENT);
+   if (!re || re->logical > dev->reada_curr_zone->end) {
ret = reada_pick_zone(dev);
if (!ret) {
spin_unlock(_info->reada_lock);
return 0;
}
-   re = NULL;
-   ret = radix_tree_gang_lookup(>reada_extents, (void **),
-   dev->reada_next >> PAGE_SHIFT, 1);
+   index = dev->reada_next >> PAGE_SHIFT;
+   re = xa_find(>reada_extents, , ULONG_MAX,
+   XA_PRESENT);
}
-   if (ret == 0) {
+   if (!re) {
spin_unlock(_info->reada_lock);
return 0;
}
@@ -828,11 +830,11 @@ static void dump_devs(struct btrfs_fs_info *fs_info, int 
all)
cnt = 0;
index = 0;
while (all) {
-   struct reada_extent *re = NULL;
+   struct reada_extent *re;
 
-   ret = radix_tree_gang_lookup(>reada_extents,
-(void **), index, 1);
-   if (ret == 0)
+   re = xa_find(>reada_extents, , ULONG_MAX,
+   XA_PRESENT);
+   if (!re)
break;
pr_debug("  re: logical %llu size %u empty %d scheduled 
%d",
re->logical, fs_info->nodesize,
@@ -848,7 +850,7 @@ static void dump_devs(struct btrfs_fs_info *fs_info, int 
all)
}
}
pr_cont("\n");
-   index = (re->logical >> PAGE_SHIFT) + 1;
+   index++;
if (++cnt > 15)
break;
}
diff --git a/fs/btrfs/volumes.c b/fs/btrfs/volumes.c
index 8e683799b436..304c2ef4c557 100644
--- a/fs/btrfs/volumes.c
+++ b/fs/btrfs/volumes.c
@@ -248,7 +248,7 @@ static struct btrfs_device *__alloc_device(void)
atomic_set(>dev_stats_ccnt, 0);
btrfs_device_data_ordered_init(dev);
xa_init(>reada_zones);
-   INIT_RADIX_TREE(>reada_extents, GFP_NOFS & ~__GFP_DIRECT_RECLAIM);
+   xa_init(>reada_extents);
 
return dev;
 }
diff --git a/fs/btrfs/volumes.h b/fs/btrfs/volumes.h
index aeabe03d3e44..0e0c04e2613c 100644
---

[PATCH v6 87/99] btrfs: Convert reada_extents to XArray

2018-01-17 Thread Matthew Wilcox

From: Matthew Wilcox 

Straightforward conversion.

Signed-off-by: Matthew Wilcox 
---
 fs/btrfs/reada.c   | 32 +---
 fs/btrfs/volumes.c |  2 +-
 fs/btrfs/volumes.h |  2 +-
 3 files changed, 19 insertions(+), 17 deletions(-)

diff --git a/fs/btrfs/reada.c b/fs/btrfs/reada.c
index ef8e84ff2012..8100f1565250 100644
--- a/fs/btrfs/reada.c
+++ b/fs/btrfs/reada.c
@@ -438,13 +438,14 @@ static struct reada_extent *reada_find_extent(struct 
btrfs_fs_info *fs_info,
continue;
}
prev_dev = dev;
-   ret = radix_tree_insert(>reada_extents, index, re);
+   ret = xa_insert(>reada_extents, index, re,
+   GFP_NOFS & ~__GFP_DIRECT_RECLAIM);
if (ret) {
while (--nzones >= 0) {
dev = re->zones[nzones]->device;
BUG_ON(dev == NULL);
/* ignore whether the entry was inserted */
-   radix_tree_delete(>reada_extents, index);
+   xa_erase(>reada_extents, index);
}
radix_tree_delete(_info->reada_tree, index);
spin_unlock(_info->reada_lock);
@@ -504,7 +505,7 @@ static void reada_extent_put(struct btrfs_fs_info *fs_info,
for (i = 0; i < re->nzones; ++i) {
struct reada_zone *zone = re->zones[i];
 
-   radix_tree_delete(>device->reada_extents, index);
+   xa_erase(>device->reada_extents, index);
}
 
spin_unlock(_info->reada_lock);
@@ -644,6 +645,7 @@ static int reada_start_machine_dev(struct btrfs_device *dev)
int mirror_num = 0;
struct extent_buffer *eb = NULL;
u64 logical;
+   unsigned long index;
int ret;
int i;
 
@@ -660,19 +662,19 @@ static int reada_start_machine_dev(struct btrfs_device 
*dev)
 * a contiguous block of extents, we could also coagulate them or use
 * plugging to speed things up
 */
-   ret = radix_tree_gang_lookup(>reada_extents, (void **),
-dev->reada_next >> PAGE_SHIFT, 1);
-   if (ret == 0 || re->logical > dev->reada_curr_zone->end) {
+   index = dev->reada_next >> PAGE_SHIFT;
+   re = xa_find(>reada_extents, , ULONG_MAX, XA_PRESENT);
+   if (!re || re->logical > dev->reada_curr_zone->end) {
ret = reada_pick_zone(dev);
if (!ret) {
spin_unlock(_info->reada_lock);
return 0;
}
-   re = NULL;
-   ret = radix_tree_gang_lookup(>reada_extents, (void **),
-   dev->reada_next >> PAGE_SHIFT, 1);
+   index = dev->reada_next >> PAGE_SHIFT;
+   re = xa_find(>reada_extents, , ULONG_MAX,
+   XA_PRESENT);
}
-   if (ret == 0) {
+   if (!re) {
spin_unlock(_info->reada_lock);
return 0;
}
@@ -828,11 +830,11 @@ static void dump_devs(struct btrfs_fs_info *fs_info, int 
all)
cnt = 0;
index = 0;
while (all) {
-   struct reada_extent *re = NULL;
+   struct reada_extent *re;
 
-   ret = radix_tree_gang_lookup(>reada_extents,
-(void **), index, 1);
-   if (ret == 0)
+   re = xa_find(>reada_extents, , ULONG_MAX,
+   XA_PRESENT);
+   if (!re)
break;
pr_debug("  re: logical %llu size %u empty %d scheduled 
%d",
re->logical, fs_info->nodesize,
@@ -848,7 +850,7 @@ static void dump_devs(struct btrfs_fs_info *fs_info, int 
all)
}
}
pr_cont("\n");
-   index = (re->logical >> PAGE_SHIFT) + 1;
+   index++;
if (++cnt > 15)
break;
}
diff --git a/fs/btrfs/volumes.c b/fs/btrfs/volumes.c
index 8e683799b436..304c2ef4c557 100644
--- a/fs/btrfs/volumes.c
+++ b/fs/btrfs/volumes.c
@@ -248,7 +248,7 @@ static struct btrfs_device *__alloc_device(void)
atomic_set(>dev_stats_ccnt, 0);
btrfs_device_data_ordered_init(dev);
xa_init(>reada_zones);
-   INIT_RADIX_TREE(>reada_extents, GFP_NOFS & ~__GFP_DIRECT_RECLAIM);
+   xa_init(>reada_extents);
 
return dev;
 }
diff --git a/fs/btrfs/volumes.h b/fs/btrfs/volumes.h
index aeabe03d3e44..0e0c04e2613c 100644
--- a/fs/btrfs/volumes.h
+++ b/fs/btrfs/volumes.h
@@

[PATCH v6 86/99] btrfs: Convert reada_zones to XArray

2018-01-17 Thread Matthew Wilcox

From: Matthew Wilcox 

The use of the reada_lock means we have to use the xa_reserve() API.
If we can avoid using reada_lock to protect this xarray, we can drop
the use of that function.

Signed-off-by: Matthew Wilcox 
---
 fs/btrfs/reada.c   | 54 +++---
 fs/btrfs/volumes.c |  2 +-
 fs/btrfs/volumes.h |  2 +-
 3 files changed, 21 insertions(+), 37 deletions(-)

diff --git a/fs/btrfs/reada.c b/fs/btrfs/reada.c
index ab852b8e3e37..ef8e84ff2012 100644
--- a/fs/btrfs/reada.c
+++ b/fs/btrfs/reada.c
@@ -239,17 +239,16 @@ static struct reada_zone *reada_find_zone(struct 
btrfs_device *dev, u64 logical,
 {
struct btrfs_fs_info *fs_info = dev->fs_info;
int ret;
-   struct reada_zone *zone;
+   struct reada_zone *curr, *zone;
struct btrfs_block_group_cache *cache = NULL;
u64 start;
u64 end;
+   unsigned long index = logical >> PAGE_SHIFT;
int i;
 
-   zone = NULL;
spin_lock(_info->reada_lock);
-   ret = radix_tree_gang_lookup(>reada_zones, (void **),
-logical >> PAGE_SHIFT, 1);
-   if (ret == 1 && logical >= zone->start && logical <= zone->end) {
+   zone = xa_find(>reada_zones, , ULONG_MAX, XA_PRESENT);
+   if (zone && logical >= zone->start && logical <= zone->end) {
kref_get(>refcnt);
spin_unlock(_info->reada_lock);
return zone;
@@ -269,7 +268,8 @@ static struct reada_zone *reada_find_zone(struct 
btrfs_device *dev, u64 logical,
if (!zone)
return NULL;
 
-   ret = radix_tree_preload(GFP_KERNEL);
+   ret = xa_reserve(>reada_zones,
+(unsigned long)(end >> PAGE_SHIFT), GFP_KERNEL);
if (ret) {
kfree(zone);
return NULL;
@@ -290,21 +290,18 @@ static struct reada_zone *reada_find_zone(struct 
btrfs_device *dev, u64 logical,
zone->ndevs = bbio->num_stripes;
 
spin_lock(_info->reada_lock);
-   ret = radix_tree_insert(>reada_zones,
+   curr = xa_cmpxchg(>reada_zones,
(unsigned long)(zone->end >> PAGE_SHIFT),
-   zone);
-
-   if (ret == -EEXIST) {
+   NULL, zone, GFP_NOWAIT | __GFP_NOWARN);
+   if (curr) {
kfree(zone);
-   ret = radix_tree_gang_lookup(>reada_zones, (void **),
-logical >> PAGE_SHIFT, 1);
-   if (ret == 1 && logical >= zone->start && logical <= zone->end)
+   zone = curr;
+   if (logical >= zone->start && logical <= zone->end)
kref_get(>refcnt);
else
zone = NULL;
}
spin_unlock(_info->reada_lock);
-   radix_tree_preload_end();
 
return zone;
 }
@@ -537,9 +534,7 @@ static void reada_zone_release(struct kref *kref)
 {
struct reada_zone *zone = container_of(kref, struct reada_zone, refcnt);
 
-   radix_tree_delete(>device->reada_zones,
- zone->end >> PAGE_SHIFT);
-
+   xa_erase(>device->reada_zones, zone->end >> PAGE_SHIFT);
kfree(zone);
 }
 
@@ -592,7 +587,7 @@ static void reada_peer_zones_set_lock(struct reada_zone 
*zone, int lock)
 
for (i = 0; i < zone->ndevs; ++i) {
struct reada_zone *peer;
-   peer = radix_tree_lookup(>devs[i]->reada_zones, index);
+   peer = xa_load(>devs[i]->reada_zones, index);
if (peer && peer->device != zone->device)
peer->locked = lock;
}
@@ -603,12 +598,11 @@ static void reada_peer_zones_set_lock(struct reada_zone 
*zone, int lock)
  */
 static int reada_pick_zone(struct btrfs_device *dev)
 {
-   struct reada_zone *top_zone = NULL;
+   struct reada_zone *zone, *top_zone = NULL;
struct reada_zone *top_locked_zone = NULL;
u64 top_elems = 0;
u64 top_locked_elems = 0;
unsigned long index = 0;
-   int ret;
 
if (dev->reada_curr_zone) {
reada_peer_zones_set_lock(dev->reada_curr_zone, 0);
@@ -616,14 +610,7 @@ static int reada_pick_zone(struct btrfs_device *dev)
dev->reada_curr_zone = NULL;
}
/* pick the zone with the most elements */
-   while (1) {
-   struct reada_zone *zone;
-
-   ret = radix_tree_gang_lookup(>reada_zones,
-(void **), index, 1);
-   if (ret == 0)
-   break;
-   index = (zone->end >> PAGE_SHIFT) + 1;
+   xa_for_each(>reada_zones, zone, index, ULONG_MAX, XA_PRESENT) {
if (zone->locked) {
if (zone->elems > top_locked_elems) {
top_locked_elems = zone->elems;
@@ -819,15

[PATCH v6 86/99] btrfs: Convert reada_zones to XArray

2018-01-17 Thread Matthew Wilcox

From: Matthew Wilcox 

The use of the reada_lock means we have to use the xa_reserve() API.
If we can avoid using reada_lock to protect this xarray, we can drop
the use of that function.

Signed-off-by: Matthew Wilcox 
---
 fs/btrfs/reada.c   | 54 +++---
 fs/btrfs/volumes.c |  2 +-
 fs/btrfs/volumes.h |  2 +-
 3 files changed, 21 insertions(+), 37 deletions(-)

diff --git a/fs/btrfs/reada.c b/fs/btrfs/reada.c
index ab852b8e3e37..ef8e84ff2012 100644
--- a/fs/btrfs/reada.c
+++ b/fs/btrfs/reada.c
@@ -239,17 +239,16 @@ static struct reada_zone *reada_find_zone(struct 
btrfs_device *dev, u64 logical,
 {
struct btrfs_fs_info *fs_info = dev->fs_info;
int ret;
-   struct reada_zone *zone;
+   struct reada_zone *curr, *zone;
struct btrfs_block_group_cache *cache = NULL;
u64 start;
u64 end;
+   unsigned long index = logical >> PAGE_SHIFT;
int i;
 
-   zone = NULL;
spin_lock(_info->reada_lock);
-   ret = radix_tree_gang_lookup(>reada_zones, (void **),
-logical >> PAGE_SHIFT, 1);
-   if (ret == 1 && logical >= zone->start && logical <= zone->end) {
+   zone = xa_find(>reada_zones, , ULONG_MAX, XA_PRESENT);
+   if (zone && logical >= zone->start && logical <= zone->end) {
kref_get(>refcnt);
spin_unlock(_info->reada_lock);
return zone;
@@ -269,7 +268,8 @@ static struct reada_zone *reada_find_zone(struct 
btrfs_device *dev, u64 logical,
if (!zone)
return NULL;
 
-   ret = radix_tree_preload(GFP_KERNEL);
+   ret = xa_reserve(>reada_zones,
+(unsigned long)(end >> PAGE_SHIFT), GFP_KERNEL);
if (ret) {
kfree(zone);
return NULL;
@@ -290,21 +290,18 @@ static struct reada_zone *reada_find_zone(struct 
btrfs_device *dev, u64 logical,
zone->ndevs = bbio->num_stripes;
 
spin_lock(_info->reada_lock);
-   ret = radix_tree_insert(>reada_zones,
+   curr = xa_cmpxchg(>reada_zones,
(unsigned long)(zone->end >> PAGE_SHIFT),
-   zone);
-
-   if (ret == -EEXIST) {
+   NULL, zone, GFP_NOWAIT | __GFP_NOWARN);
+   if (curr) {
kfree(zone);
-   ret = radix_tree_gang_lookup(>reada_zones, (void **),
-logical >> PAGE_SHIFT, 1);
-   if (ret == 1 && logical >= zone->start && logical <= zone->end)
+   zone = curr;
+   if (logical >= zone->start && logical <= zone->end)
kref_get(>refcnt);
else
zone = NULL;
}
spin_unlock(_info->reada_lock);
-   radix_tree_preload_end();
 
return zone;
 }
@@ -537,9 +534,7 @@ static void reada_zone_release(struct kref *kref)
 {
struct reada_zone *zone = container_of(kref, struct reada_zone, refcnt);
 
-   radix_tree_delete(>device->reada_zones,
- zone->end >> PAGE_SHIFT);
-
+   xa_erase(>device->reada_zones, zone->end >> PAGE_SHIFT);
kfree(zone);
 }
 
@@ -592,7 +587,7 @@ static void reada_peer_zones_set_lock(struct reada_zone 
*zone, int lock)
 
for (i = 0; i < zone->ndevs; ++i) {
struct reada_zone *peer;
-   peer = radix_tree_lookup(>devs[i]->reada_zones, index);
+   peer = xa_load(>devs[i]->reada_zones, index);
if (peer && peer->device != zone->device)
peer->locked = lock;
}
@@ -603,12 +598,11 @@ static void reada_peer_zones_set_lock(struct reada_zone 
*zone, int lock)
  */
 static int reada_pick_zone(struct btrfs_device *dev)
 {
-   struct reada_zone *top_zone = NULL;
+   struct reada_zone *zone, *top_zone = NULL;
struct reada_zone *top_locked_zone = NULL;
u64 top_elems = 0;
u64 top_locked_elems = 0;
unsigned long index = 0;
-   int ret;
 
if (dev->reada_curr_zone) {
reada_peer_zones_set_lock(dev->reada_curr_zone, 0);
@@ -616,14 +610,7 @@ static int reada_pick_zone(struct btrfs_device *dev)
dev->reada_curr_zone = NULL;
}
/* pick the zone with the most elements */
-   while (1) {
-   struct reada_zone *zone;
-
-   ret = radix_tree_gang_lookup(>reada_zones,
-(void **), index, 1);
-   if (ret == 0)
-   break;
-   index = (zone->end >> PAGE_SHIFT) + 1;
+   xa_for_each(>reada_zones, zone, index, ULONG_MAX, XA_PRESENT) {
if (zone->locked) {
if (zone->elems > top_locked_elems) {
top_locked_elems = zone->elems;
@@ -819,15 +806,13 @@ static void dump_devs(struct btrfs_fs_info

[PATCH v6 90/99] btrfs: Convert delayed_nodes_tree to XArray

2018-01-17 Thread Matthew Wilcox

From: Matthew Wilcox 

Rename it to just 'delayed_nodes' and remove it from the protection of
btrfs_root->inode_lock.

Signed-off-by: Matthew Wilcox 
---
 fs/btrfs/ctree.h |  8 +++---
 fs/btrfs/delayed-inode.c | 65 
 fs/btrfs/disk-io.c   |  2 +-
 fs/btrfs/inode.c |  2 +-
 4 files changed, 27 insertions(+), 50 deletions(-)

diff --git a/fs/btrfs/ctree.h b/fs/btrfs/ctree.h
index 87984ce3a4c2..9acfdc623d15 100644
--- a/fs/btrfs/ctree.h
+++ b/fs/btrfs/ctree.h
@@ -1219,11 +1219,9 @@ struct btrfs_root {
/* red-black tree that keeps track of in-memory inodes */
struct rb_root inode_tree;
 
-   /*
-* radix tree that keeps track of delayed nodes of every inode,
-* protected by inode_lock
-*/
-   struct radix_tree_root delayed_nodes_tree;
+   /* track delayed nodes of every inode */
+   struct xarray delayed_nodes;
+
/*
 * right now this just gets used so that a root has its own devid
 * for stat.  It may be used for more later
diff --git a/fs/btrfs/delayed-inode.c b/fs/btrfs/delayed-inode.c
index 056276101c63..156a762f3809 100644
--- a/fs/btrfs/delayed-inode.c
+++ b/fs/btrfs/delayed-inode.c
@@ -86,7 +86,7 @@ static struct btrfs_delayed_node *btrfs_get_delayed_node(
}
 
spin_lock(>inode_lock);
-   node = radix_tree_lookup(>delayed_nodes_tree, ino);
+   node = xa_load(>delayed_nodes, ino);
 
if (node) {
if (btrfs_inode->delayed_node) {
@@ -131,10 +131,9 @@ static struct btrfs_delayed_node *btrfs_get_delayed_node(
 static struct btrfs_delayed_node *btrfs_get_or_create_delayed_node(
struct btrfs_inode *btrfs_inode)
 {
-   struct btrfs_delayed_node *node;
+   struct btrfs_delayed_node *node, *exists;
struct btrfs_root *root = btrfs_inode->root;
u64 ino = btrfs_ino(btrfs_inode);
-   int ret;
 
 again:
node = btrfs_get_delayed_node(btrfs_inode);
@@ -149,23 +148,18 @@ static struct btrfs_delayed_node 
*btrfs_get_or_create_delayed_node(
/* cached in the btrfs inode and can be accessed */
refcount_set(>refs, 2);
 
-   ret = radix_tree_preload(GFP_NOFS);
-   if (ret) {
+   xa_lock(>delayed_nodes);
+   exists = __xa_cmpxchg(>delayed_nodes, ino, NULL, node, GFP_NOFS);
+   if (unlikely(exists)) {
+   int ret = xa_err(exists);
+   xa_unlock(>delayed_nodes);
kmem_cache_free(delayed_node_cache, node);
+   if (ret == -EEXIST)
+   goto again;
return ERR_PTR(ret);
}
-
-   spin_lock(>inode_lock);
-   ret = radix_tree_insert(>delayed_nodes_tree, ino, node);
-   if (ret == -EEXIST) {
-   spin_unlock(>inode_lock);
-   kmem_cache_free(delayed_node_cache, node);
-   radix_tree_preload_end();
-   goto again;
-   }
btrfs_inode->delayed_node = node;
-   spin_unlock(>inode_lock);
-   radix_tree_preload_end();
+   xa_unlock(>delayed_nodes);
 
return node;
 }
@@ -278,15 +272,12 @@ static void __btrfs_release_delayed_node(
if (refcount_dec_and_test(_node->refs)) {
struct btrfs_root *root = delayed_node->root;
 
-   spin_lock(>inode_lock);
/*
 * Once our refcount goes to zero, nobody is allowed to bump it
 * back up.  We can delete it now.
 */
ASSERT(refcount_read(_node->refs) == 0);
-   radix_tree_delete(>delayed_nodes_tree,
- delayed_node->inode_id);
-   spin_unlock(>inode_lock);
+   xa_erase(>delayed_nodes, delayed_node->inode_id);
kmem_cache_free(delayed_node_cache, delayed_node);
}
 }
@@ -1926,31 +1917,19 @@ void btrfs_kill_delayed_inode_items(struct btrfs_inode 
*inode)
 
 void btrfs_kill_all_delayed_nodes(struct btrfs_root *root)
 {
-   u64 inode_id = 0;
-   struct btrfs_delayed_node *delayed_nodes[8];
-   int i, n;
-
-   while (1) {
-   spin_lock(>inode_lock);
-   n = radix_tree_gang_lookup(>delayed_nodes_tree,
-  (void **)delayed_nodes, inode_id,
-  ARRAY_SIZE(delayed_nodes));
-   if (!n) {
-   spin_unlock(>inode_lock);
-   break;
-   }
-
-   inode_id = delayed_nodes[n - 1]->inode_id + 1;
-
-   for (i = 0; i < n; i++)
-   refcount_inc(_nodes[i]->refs);
-   spin_unlock(>inode_lock);
+   struct btrfs_delayed_node *node;
+   unsigned long inode_id = 0;
 
-   for (i = 0; i < n; i++) {
-   __btrfs_kill_delayed_node(delayed_nodes[i]);
-

[PATCH v6 90/99] btrfs: Convert delayed_nodes_tree to XArray

2018-01-17 Thread Matthew Wilcox

From: Matthew Wilcox 

Rename it to just 'delayed_nodes' and remove it from the protection of
btrfs_root->inode_lock.

Signed-off-by: Matthew Wilcox 
---
 fs/btrfs/ctree.h |  8 +++---
 fs/btrfs/delayed-inode.c | 65 
 fs/btrfs/disk-io.c   |  2 +-
 fs/btrfs/inode.c |  2 +-
 4 files changed, 27 insertions(+), 50 deletions(-)

diff --git a/fs/btrfs/ctree.h b/fs/btrfs/ctree.h
index 87984ce3a4c2..9acfdc623d15 100644
--- a/fs/btrfs/ctree.h
+++ b/fs/btrfs/ctree.h
@@ -1219,11 +1219,9 @@ struct btrfs_root {
/* red-black tree that keeps track of in-memory inodes */
struct rb_root inode_tree;
 
-   /*
-* radix tree that keeps track of delayed nodes of every inode,
-* protected by inode_lock
-*/
-   struct radix_tree_root delayed_nodes_tree;
+   /* track delayed nodes of every inode */
+   struct xarray delayed_nodes;
+
/*
 * right now this just gets used so that a root has its own devid
 * for stat.  It may be used for more later
diff --git a/fs/btrfs/delayed-inode.c b/fs/btrfs/delayed-inode.c
index 056276101c63..156a762f3809 100644
--- a/fs/btrfs/delayed-inode.c
+++ b/fs/btrfs/delayed-inode.c
@@ -86,7 +86,7 @@ static struct btrfs_delayed_node *btrfs_get_delayed_node(
}
 
spin_lock(>inode_lock);
-   node = radix_tree_lookup(>delayed_nodes_tree, ino);
+   node = xa_load(>delayed_nodes, ino);
 
if (node) {
if (btrfs_inode->delayed_node) {
@@ -131,10 +131,9 @@ static struct btrfs_delayed_node *btrfs_get_delayed_node(
 static struct btrfs_delayed_node *btrfs_get_or_create_delayed_node(
struct btrfs_inode *btrfs_inode)
 {
-   struct btrfs_delayed_node *node;
+   struct btrfs_delayed_node *node, *exists;
struct btrfs_root *root = btrfs_inode->root;
u64 ino = btrfs_ino(btrfs_inode);
-   int ret;
 
 again:
node = btrfs_get_delayed_node(btrfs_inode);
@@ -149,23 +148,18 @@ static struct btrfs_delayed_node 
*btrfs_get_or_create_delayed_node(
/* cached in the btrfs inode and can be accessed */
refcount_set(>refs, 2);
 
-   ret = radix_tree_preload(GFP_NOFS);
-   if (ret) {
+   xa_lock(>delayed_nodes);
+   exists = __xa_cmpxchg(>delayed_nodes, ino, NULL, node, GFP_NOFS);
+   if (unlikely(exists)) {
+   int ret = xa_err(exists);
+   xa_unlock(>delayed_nodes);
kmem_cache_free(delayed_node_cache, node);
+   if (ret == -EEXIST)
+   goto again;
return ERR_PTR(ret);
}
-
-   spin_lock(>inode_lock);
-   ret = radix_tree_insert(>delayed_nodes_tree, ino, node);
-   if (ret == -EEXIST) {
-   spin_unlock(>inode_lock);
-   kmem_cache_free(delayed_node_cache, node);
-   radix_tree_preload_end();
-   goto again;
-   }
btrfs_inode->delayed_node = node;
-   spin_unlock(>inode_lock);
-   radix_tree_preload_end();
+   xa_unlock(>delayed_nodes);
 
return node;
 }
@@ -278,15 +272,12 @@ static void __btrfs_release_delayed_node(
if (refcount_dec_and_test(_node->refs)) {
struct btrfs_root *root = delayed_node->root;
 
-   spin_lock(>inode_lock);
/*
 * Once our refcount goes to zero, nobody is allowed to bump it
 * back up.  We can delete it now.
 */
ASSERT(refcount_read(_node->refs) == 0);
-   radix_tree_delete(>delayed_nodes_tree,
- delayed_node->inode_id);
-   spin_unlock(>inode_lock);
+   xa_erase(>delayed_nodes, delayed_node->inode_id);
kmem_cache_free(delayed_node_cache, delayed_node);
}
 }
@@ -1926,31 +1917,19 @@ void btrfs_kill_delayed_inode_items(struct btrfs_inode 
*inode)
 
 void btrfs_kill_all_delayed_nodes(struct btrfs_root *root)
 {
-   u64 inode_id = 0;
-   struct btrfs_delayed_node *delayed_nodes[8];
-   int i, n;
-
-   while (1) {
-   spin_lock(>inode_lock);
-   n = radix_tree_gang_lookup(>delayed_nodes_tree,
-  (void **)delayed_nodes, inode_id,
-  ARRAY_SIZE(delayed_nodes));
-   if (!n) {
-   spin_unlock(>inode_lock);
-   break;
-   }
-
-   inode_id = delayed_nodes[n - 1]->inode_id + 1;
-
-   for (i = 0; i < n; i++)
-   refcount_inc(_nodes[i]->refs);
-   spin_unlock(>inode_lock);
+   struct btrfs_delayed_node *node;
+   unsigned long inode_id = 0;
 
-   for (i = 0; i < n; i++) {
-   __btrfs_kill_delayed_node(delayed_nodes[i]);
-   btrfs_release_delayed_node(delayed_nodes[i]);

[PATCH v6 89/99] btrfs: Convert buffer_radix to XArray

2018-01-17 Thread Matthew Wilcox

From: Matthew Wilcox 

Eliminate the buffer_lock as the internal xa_lock provides all the
necessary protection.  We can remove the radix_tree_preload calls, but
I can't find a good way to use the 'exists' result from xa_cmpxchg().
We could resort to the advanced API to improve this, but it's a really
unlikely case (nothing in the xarray when we first look; something there
when we try to add the newly-allocated extent buffer), so I think it's
not worth optimising for.

Signed-off-by: Matthew Wilcox 
---
 fs/btrfs/ctree.h |  5 ++-
 fs/btrfs/disk-io.c   |  3 +-
 fs/btrfs/extent_io.c | 82 ++--
 fs/btrfs/tests/btrfs-tests.c | 26 +++---
 4 files changed, 40 insertions(+), 76 deletions(-)

diff --git a/fs/btrfs/ctree.h b/fs/btrfs/ctree.h
index 272d099bed7e..87984ce3a4c2 100644
--- a/fs/btrfs/ctree.h
+++ b/fs/btrfs/ctree.h
@@ -1058,9 +1058,8 @@ struct btrfs_fs_info {
/* readahead works cnt */
atomic_t reada_works_cnt;
 
-   /* Extent buffer radix tree */
-   spinlock_t buffer_lock;
-   struct radix_tree_root buffer_radix;
+   /* Extent buffer array */
+   struct xarray buffer_array;
 
/* next backup root to be overwritten */
int backup_root_index;
diff --git a/fs/btrfs/disk-io.c b/fs/btrfs/disk-io.c
index 1eae29045d43..650d1350b64d 100644
--- a/fs/btrfs/disk-io.c
+++ b/fs/btrfs/disk-io.c
@@ -2429,7 +2429,7 @@ int open_ctree(struct super_block *sb,
}
 
xa_init(_info->fs_roots);
-   INIT_RADIX_TREE(_info->buffer_radix, GFP_ATOMIC);
+   xa_init(_info->buffer_array);
INIT_LIST_HEAD(_info->trans_list);
INIT_LIST_HEAD(_info->dead_roots);
INIT_LIST_HEAD(_info->delayed_iputs);
@@ -2442,7 +2442,6 @@ int open_ctree(struct super_block *sb,
spin_lock_init(_info->tree_mod_seq_lock);
spin_lock_init(_info->super_lock);
spin_lock_init(_info->qgroup_op_lock);
-   spin_lock_init(_info->buffer_lock);
spin_lock_init(_info->unused_bgs_lock);
rwlock_init(_info->tree_mod_log_lock);
mutex_init(_info->unused_bg_unpin_mutex);
diff --git a/fs/btrfs/extent_io.c b/fs/btrfs/extent_io.c
index fd5e9d887328..2b43fa11c9e2 100644
--- a/fs/btrfs/extent_io.c
+++ b/fs/btrfs/extent_io.c
@@ -4884,8 +4884,7 @@ struct extent_buffer *find_extent_buffer(struct 
btrfs_fs_info *fs_info,
struct extent_buffer *eb;
 
rcu_read_lock();
-   eb = radix_tree_lookup(_info->buffer_radix,
-  start >> PAGE_SHIFT);
+   eb = xa_load(_info->buffer_array, start >> PAGE_SHIFT);
if (eb && atomic_inc_not_zero(>refs)) {
rcu_read_unlock();
/*
@@ -4919,31 +4918,24 @@ struct extent_buffer *find_extent_buffer(struct 
btrfs_fs_info *fs_info,
 struct extent_buffer *alloc_test_extent_buffer(struct btrfs_fs_info *fs_info,
u64 start)
 {
-   struct extent_buffer *eb, *exists = NULL;
-   int ret;
+   struct extent_buffer *exists, *eb = NULL;
 
-   eb = find_extent_buffer(fs_info, start);
-   if (eb)
-   return eb;
-   eb = alloc_dummy_extent_buffer(fs_info, start);
-   if (!eb)
-   return NULL;
-   eb->fs_info = fs_info;
 again:
-   ret = radix_tree_preload(GFP_NOFS);
-   if (ret)
+   exists = find_extent_buffer(fs_info, start);
+   if (exists)
goto free_eb;
-   spin_lock(_info->buffer_lock);
-   ret = radix_tree_insert(_info->buffer_radix,
-   start >> PAGE_SHIFT, eb);
-   spin_unlock(_info->buffer_lock);
-   radix_tree_preload_end();
-   if (ret == -EEXIST) {
-   exists = find_extent_buffer(fs_info, start);
-   if (exists)
+   if (!eb)
+   eb = alloc_dummy_extent_buffer(fs_info, start);
+   if (!eb)
+   return NULL;
+   exists = xa_cmpxchg(_info->buffer_array, start >> PAGE_SHIFT,
+   NULL, eb, GFP_NOFS);
+   if (unlikely(exists)) {
+   if (xa_is_err(exists)) {
+   exists = NULL;
goto free_eb;
-   else
-   goto again;
+   }
+   goto again;
}
check_buffer_tree_ref(eb);
set_bit(EXTENT_BUFFER_IN_TREE, >bflags);
@@ -4957,7 +4949,8 @@ struct extent_buffer *alloc_test_extent_buffer(struct 
btrfs_fs_info *fs_info,
atomic_inc(>refs);
return eb;
 free_eb:
-   btrfs_release_extent_buffer(eb);
+   if (eb)
+   btrfs_release_extent_buffer(eb);
return exists;
 }
 #endif
@@ -4969,22 +4962,24 @@ struct extent_buffer *alloc_extent_buffer(struct 
btrfs_fs_info *fs_info,
unsigned long num_pages = num_extent_pages(start, len);
unsigned long i;
unsigned long index = start >>

[PATCH v6 89/99] btrfs: Convert buffer_radix to XArray

2018-01-17 Thread Matthew Wilcox

From: Matthew Wilcox 

Eliminate the buffer_lock as the internal xa_lock provides all the
necessary protection.  We can remove the radix_tree_preload calls, but
I can't find a good way to use the 'exists' result from xa_cmpxchg().
We could resort to the advanced API to improve this, but it's a really
unlikely case (nothing in the xarray when we first look; something there
when we try to add the newly-allocated extent buffer), so I think it's
not worth optimising for.

Signed-off-by: Matthew Wilcox 
---
 fs/btrfs/ctree.h |  5 ++-
 fs/btrfs/disk-io.c   |  3 +-
 fs/btrfs/extent_io.c | 82 ++--
 fs/btrfs/tests/btrfs-tests.c | 26 +++---
 4 files changed, 40 insertions(+), 76 deletions(-)

diff --git a/fs/btrfs/ctree.h b/fs/btrfs/ctree.h
index 272d099bed7e..87984ce3a4c2 100644
--- a/fs/btrfs/ctree.h
+++ b/fs/btrfs/ctree.h
@@ -1058,9 +1058,8 @@ struct btrfs_fs_info {
/* readahead works cnt */
atomic_t reada_works_cnt;
 
-   /* Extent buffer radix tree */
-   spinlock_t buffer_lock;
-   struct radix_tree_root buffer_radix;
+   /* Extent buffer array */
+   struct xarray buffer_array;
 
/* next backup root to be overwritten */
int backup_root_index;
diff --git a/fs/btrfs/disk-io.c b/fs/btrfs/disk-io.c
index 1eae29045d43..650d1350b64d 100644
--- a/fs/btrfs/disk-io.c
+++ b/fs/btrfs/disk-io.c
@@ -2429,7 +2429,7 @@ int open_ctree(struct super_block *sb,
}
 
xa_init(_info->fs_roots);
-   INIT_RADIX_TREE(_info->buffer_radix, GFP_ATOMIC);
+   xa_init(_info->buffer_array);
INIT_LIST_HEAD(_info->trans_list);
INIT_LIST_HEAD(_info->dead_roots);
INIT_LIST_HEAD(_info->delayed_iputs);
@@ -2442,7 +2442,6 @@ int open_ctree(struct super_block *sb,
spin_lock_init(_info->tree_mod_seq_lock);
spin_lock_init(_info->super_lock);
spin_lock_init(_info->qgroup_op_lock);
-   spin_lock_init(_info->buffer_lock);
spin_lock_init(_info->unused_bgs_lock);
rwlock_init(_info->tree_mod_log_lock);
mutex_init(_info->unused_bg_unpin_mutex);
diff --git a/fs/btrfs/extent_io.c b/fs/btrfs/extent_io.c
index fd5e9d887328..2b43fa11c9e2 100644
--- a/fs/btrfs/extent_io.c
+++ b/fs/btrfs/extent_io.c
@@ -4884,8 +4884,7 @@ struct extent_buffer *find_extent_buffer(struct 
btrfs_fs_info *fs_info,
struct extent_buffer *eb;
 
rcu_read_lock();
-   eb = radix_tree_lookup(_info->buffer_radix,
-  start >> PAGE_SHIFT);
+   eb = xa_load(_info->buffer_array, start >> PAGE_SHIFT);
if (eb && atomic_inc_not_zero(>refs)) {
rcu_read_unlock();
/*
@@ -4919,31 +4918,24 @@ struct extent_buffer *find_extent_buffer(struct 
btrfs_fs_info *fs_info,
 struct extent_buffer *alloc_test_extent_buffer(struct btrfs_fs_info *fs_info,
u64 start)
 {
-   struct extent_buffer *eb, *exists = NULL;
-   int ret;
+   struct extent_buffer *exists, *eb = NULL;
 
-   eb = find_extent_buffer(fs_info, start);
-   if (eb)
-   return eb;
-   eb = alloc_dummy_extent_buffer(fs_info, start);
-   if (!eb)
-   return NULL;
-   eb->fs_info = fs_info;
 again:
-   ret = radix_tree_preload(GFP_NOFS);
-   if (ret)
+   exists = find_extent_buffer(fs_info, start);
+   if (exists)
goto free_eb;
-   spin_lock(_info->buffer_lock);
-   ret = radix_tree_insert(_info->buffer_radix,
-   start >> PAGE_SHIFT, eb);
-   spin_unlock(_info->buffer_lock);
-   radix_tree_preload_end();
-   if (ret == -EEXIST) {
-   exists = find_extent_buffer(fs_info, start);
-   if (exists)
+   if (!eb)
+   eb = alloc_dummy_extent_buffer(fs_info, start);
+   if (!eb)
+   return NULL;
+   exists = xa_cmpxchg(_info->buffer_array, start >> PAGE_SHIFT,
+   NULL, eb, GFP_NOFS);
+   if (unlikely(exists)) {
+   if (xa_is_err(exists)) {
+   exists = NULL;
goto free_eb;
-   else
-   goto again;
+   }
+   goto again;
}
check_buffer_tree_ref(eb);
set_bit(EXTENT_BUFFER_IN_TREE, >bflags);
@@ -4957,7 +4949,8 @@ struct extent_buffer *alloc_test_extent_buffer(struct 
btrfs_fs_info *fs_info,
atomic_inc(>refs);
return eb;
 free_eb:
-   btrfs_release_extent_buffer(eb);
+   if (eb)
+   btrfs_release_extent_buffer(eb);
return exists;
 }
 #endif
@@ -4969,22 +4962,24 @@ struct extent_buffer *alloc_extent_buffer(struct 
btrfs_fs_info *fs_info,
unsigned long num_pages = num_extent_pages(start, len);
unsigned long i;
unsigned long index = start >> PAGE_SHIFT;
-   struct extent_buffer *eb;
+

[PATCH v6 92/99] f2fs: Convert pids radix tree to XArray

2018-01-17 Thread Matthew Wilcox

From: Matthew Wilcox 

The XArray API works out rather well for this user.

Signed-off-by: Matthew Wilcox 
---
 fs/f2fs/super.c |  2 --
 fs/f2fs/trace.c | 60 -
 fs/f2fs/trace.h |  2 --
 3 files changed, 4 insertions(+), 60 deletions(-)

diff --git a/fs/f2fs/super.c b/fs/f2fs/super.c
index 708155d9c2e4..d608edffe69e 100644
--- a/fs/f2fs/super.c
+++ b/fs/f2fs/super.c
@@ -2831,8 +2831,6 @@ static int __init init_f2fs_fs(void)
 {
int err;
 
-   f2fs_build_trace_ios();
-
err = init_inodecache();
if (err)
goto fail;
diff --git a/fs/f2fs/trace.c b/fs/f2fs/trace.c
index bccbbf2616d2..f316a42c547f 100644
--- a/fs/f2fs/trace.c
+++ b/fs/f2fs/trace.c
@@ -16,8 +16,7 @@
 #include "f2fs.h"
 #include "trace.h"
 
-static RADIX_TREE(pids, GFP_ATOMIC);
-static spinlock_t pids_lock;
+static DEFINE_XARRAY(pids);
 static struct last_io_info last_io;
 
 static inline void __print_last_io(void)
@@ -57,28 +56,13 @@ void f2fs_trace_pid(struct page *page)
 {
struct inode *inode = page->mapping->host;
pid_t pid = task_pid_nr(current);
-   void *p;
 
set_page_private(page, (unsigned long)pid);
 
-   if (radix_tree_preload(GFP_NOFS))
-   return;
-
-   spin_lock(_lock);
-   p = radix_tree_lookup(, pid);
-   if (p == current)
-   goto out;
-   if (p)
-   radix_tree_delete(, pid);
-
-   f2fs_radix_tree_insert(, pid, current);
-
-   trace_printk("%3x:%3x %4x %-16s\n",
+   if (xa_store(, pid, current, GFP_NOFS) != current)
+   trace_printk("%3x:%3x %4x %-16s\n",
MAJOR(inode->i_sb->s_dev), MINOR(inode->i_sb->s_dev),
pid, current->comm);
-out:
-   spin_unlock(_lock);
-   radix_tree_preload_end();
 }
 
 void f2fs_trace_ios(struct f2fs_io_info *fio, int flush)
@@ -120,43 +104,7 @@ void f2fs_trace_ios(struct f2fs_io_info *fio, int flush)
return;
 }
 
-void f2fs_build_trace_ios(void)
-{
-   spin_lock_init(_lock);
-}
-
-#define PIDVEC_SIZE128
-static unsigned int gang_lookup_pids(pid_t *results, unsigned long first_index,
-   unsigned int max_items)
-{
-   struct radix_tree_iter iter;
-   void **slot;
-   unsigned int ret = 0;
-
-   if (unlikely(!max_items))
-   return 0;
-
-   radix_tree_for_each_slot(slot, , , first_index) {
-   results[ret] = iter.index;
-   if (++ret == max_items)
-   break;
-   }
-   return ret;
-}
-
 void f2fs_destroy_trace_ios(void)
 {
-   pid_t pid[PIDVEC_SIZE];
-   pid_t next_pid = 0;
-   unsigned int found;
-
-   spin_lock(_lock);
-   while ((found = gang_lookup_pids(pid, next_pid, PIDVEC_SIZE))) {
-   unsigned idx;
-
-   next_pid = pid[found - 1] + 1;
-   for (idx = 0; idx < found; idx++)
-   radix_tree_delete(, pid[idx]);
-   }
-   spin_unlock(_lock);
+   xa_destroy();
 }
diff --git a/fs/f2fs/trace.h b/fs/f2fs/trace.h
index 67db24ac1e85..157e4564e48b 100644
--- a/fs/f2fs/trace.h
+++ b/fs/f2fs/trace.h
@@ -34,12 +34,10 @@ struct last_io_info {
 
 extern void f2fs_trace_pid(struct page *);
 extern void f2fs_trace_ios(struct f2fs_io_info *, int);
-extern void f2fs_build_trace_ios(void);
 extern void f2fs_destroy_trace_ios(void);
 #else
 #define f2fs_trace_pid(p)
 #define f2fs_trace_ios(i, n)
-#define f2fs_build_trace_ios()
 #define f2fs_destroy_trace_ios()
 
 #endif
-- 
2.15.1

[PATCH v6 91/99] btrfs: Convert name_cache to XArray

2018-01-17 Thread Matthew Wilcox

From: Matthew Wilcox 

This is a very straightforward conversion.  The handling of collisions
in the namecache could be better handled with an hlist, but that's a
patch for another day.

Signed-off-by: Matthew Wilcox 
---
 fs/btrfs/send.c | 19 +--
 1 file changed, 9 insertions(+), 10 deletions(-)

diff --git a/fs/btrfs/send.c b/fs/btrfs/send.c
index 20d3300bd268..3891a8e958fa 100644
--- a/fs/btrfs/send.c
+++ b/fs/btrfs/send.c
@@ -23,7 +23,7 @@
 #include 
 #include 
 #include 
-#include 
+#include 
 #include 
 #include 
 #include 
@@ -118,7 +118,7 @@ struct send_ctx {
struct list_head new_refs;
struct list_head deleted_refs;
 
-   struct radix_tree_root name_cache;
+   struct xarray name_cache;
struct list_head name_cache_list;
int name_cache_size;
 
@@ -2021,8 +2021,7 @@ static int name_cache_insert(struct send_ctx *sctx,
int ret = 0;
struct list_head *nce_head;
 
-   nce_head = radix_tree_lookup(>name_cache,
-   (unsigned long)nce->ino);
+   nce_head = xa_load(>name_cache, (unsigned long)nce->ino);
if (!nce_head) {
nce_head = kmalloc(sizeof(*nce_head), GFP_KERNEL);
if (!nce_head) {
@@ -2031,7 +2030,8 @@ static int name_cache_insert(struct send_ctx *sctx,
}
INIT_LIST_HEAD(nce_head);
 
-   ret = radix_tree_insert(>name_cache, nce->ino, nce_head);
+   ret = xa_insert(>name_cache, nce->ino, nce_head,
+   GFP_KERNEL);
if (ret < 0) {
kfree(nce_head);
kfree(nce);
@@ -2050,8 +2050,7 @@ static void name_cache_delete(struct send_ctx *sctx,
 {
struct list_head *nce_head;
 
-   nce_head = radix_tree_lookup(>name_cache,
-   (unsigned long)nce->ino);
+   nce_head = xa_load(>name_cache, (unsigned long)nce->ino);
if (!nce_head) {
btrfs_err(sctx->send_root->fs_info,
  "name_cache_delete lookup failed ino %llu cache size %d, leaking 
memory",
@@ -2066,7 +2065,7 @@ static void name_cache_delete(struct send_ctx *sctx,
 * We may not get to the final release of nce_head if the lookup fails
 */
if (nce_head && list_empty(nce_head)) {
-   radix_tree_delete(>name_cache, (unsigned long)nce->ino);
+   xa_erase(>name_cache, (unsigned long)nce->ino);
kfree(nce_head);
}
 }
@@ -2077,7 +2076,7 @@ static struct name_cache_entry *name_cache_search(struct 
send_ctx *sctx,
struct list_head *nce_head;
struct name_cache_entry *cur;
 
-   nce_head = radix_tree_lookup(>name_cache, (unsigned long)ino);
+   nce_head = xa_load(>name_cache, (unsigned long)ino);
if (!nce_head)
return NULL;
 
@@ -6526,7 +6525,7 @@ long btrfs_ioctl_send(struct file *mnt_file, struct 
btrfs_ioctl_send_args *arg)
 
INIT_LIST_HEAD(>new_refs);
INIT_LIST_HEAD(>deleted_refs);
-   INIT_RADIX_TREE(>name_cache, GFP_KERNEL);
+   xa_init(>name_cache);
INIT_LIST_HEAD(>name_cache_list);
 
sctx->flags = arg->flags;
-- 
2.15.1

Re: [PATCH v5 00/17] ASoC: fsl_ssi: Clean up - program flow level

2018-01-17 Thread Maciej S. Szmigiero

On 17.01.2018 21:02, Nicolin Chen wrote:
> On Wed, Jan 17, 2018 at 08:38:48PM +0100, Maciej S. Szmigiero wrote:
> 
>> However, I have a small nitpick regarding a comment newly added in
>> this version of patch 16:
>> +/*
>> + * Do not set SSI dev as the parent of AC97 CODEC device since
>> + * it does not have a DT node. Otherwise ASoC core will assume
>> + * CODEC has the same DT node as the SSI, so it may return a
>> + * NULL pointer of CODEC when asked for SSI via the DT node
>>
>> The second part of the last sentence isn't really true, the ASoC core
>> will return a (valid, non-NULL) CODEC object pointer when asked for
>> the SSI one if we set the SSI as the parent device of a AC'97 CODEC
>> platform device.
>>
>> The NULL pointer dereference when starting a playback that I wrote
>> about in my previous message happens because in this situation the SSI
>> DAI probe callback won't ever get called and so won't setup DMA data
>> pointers (they will remain NULL).
> 
> Well, somehow the DMA data pointer of CODEC could be described
> as "a NULL pointer of CODEC" reluctantly...it confuses people
> though.
> 
>> And this in turn will cause the ASoC DMA code to dereference these
>> NULL pointers when starting a playback (the same will probably happen
>> also when starting a capture).
>>
>> Sorry if I wasn't 100% clear about these details in my previous
>> message describing this issue.
> 
> I would prefer to send an incremental patch later to update it,
> if there are no new comments against this version; Otherwise, I
> will update it in a next version once there is a need to send a
> v6 anyway.

IMHO it is such a tiny thing that it isn't worth respinning 17
patch series just for it, it can be easily improved later via
a separate patch.

> Thanks
> 

Thanks,
Maciej

[PATCH v6 92/99] f2fs: Convert pids radix tree to XArray

2018-01-17 Thread Matthew Wilcox

From: Matthew Wilcox 

The XArray API works out rather well for this user.

Signed-off-by: Matthew Wilcox 
---
 fs/f2fs/super.c |  2 --
 fs/f2fs/trace.c | 60 -
 fs/f2fs/trace.h |  2 --
 3 files changed, 4 insertions(+), 60 deletions(-)

diff --git a/fs/f2fs/super.c b/fs/f2fs/super.c
index 708155d9c2e4..d608edffe69e 100644
--- a/fs/f2fs/super.c
+++ b/fs/f2fs/super.c
@@ -2831,8 +2831,6 @@ static int __init init_f2fs_fs(void)
 {
int err;
 
-   f2fs_build_trace_ios();
-
err = init_inodecache();
if (err)
goto fail;
diff --git a/fs/f2fs/trace.c b/fs/f2fs/trace.c
index bccbbf2616d2..f316a42c547f 100644
--- a/fs/f2fs/trace.c
+++ b/fs/f2fs/trace.c
@@ -16,8 +16,7 @@
 #include "f2fs.h"
 #include "trace.h"
 
-static RADIX_TREE(pids, GFP_ATOMIC);
-static spinlock_t pids_lock;
+static DEFINE_XARRAY(pids);
 static struct last_io_info last_io;
 
 static inline void __print_last_io(void)
@@ -57,28 +56,13 @@ void f2fs_trace_pid(struct page *page)
 {
struct inode *inode = page->mapping->host;
pid_t pid = task_pid_nr(current);
-   void *p;
 
set_page_private(page, (unsigned long)pid);
 
-   if (radix_tree_preload(GFP_NOFS))
-   return;
-
-   spin_lock(_lock);
-   p = radix_tree_lookup(, pid);
-   if (p == current)
-   goto out;
-   if (p)
-   radix_tree_delete(, pid);
-
-   f2fs_radix_tree_insert(, pid, current);
-
-   trace_printk("%3x:%3x %4x %-16s\n",
+   if (xa_store(, pid, current, GFP_NOFS) != current)
+   trace_printk("%3x:%3x %4x %-16s\n",
MAJOR(inode->i_sb->s_dev), MINOR(inode->i_sb->s_dev),
pid, current->comm);
-out:
-   spin_unlock(_lock);
-   radix_tree_preload_end();
 }
 
 void f2fs_trace_ios(struct f2fs_io_info *fio, int flush)
@@ -120,43 +104,7 @@ void f2fs_trace_ios(struct f2fs_io_info *fio, int flush)
return;
 }
 
-void f2fs_build_trace_ios(void)
-{
-   spin_lock_init(_lock);
-}
-
-#define PIDVEC_SIZE128
-static unsigned int gang_lookup_pids(pid_t *results, unsigned long first_index,
-   unsigned int max_items)
-{
-   struct radix_tree_iter iter;
-   void **slot;
-   unsigned int ret = 0;
-
-   if (unlikely(!max_items))
-   return 0;
-
-   radix_tree_for_each_slot(slot, , , first_index) {
-   results[ret] = iter.index;
-   if (++ret == max_items)
-   break;
-   }
-   return ret;
-}
-
 void f2fs_destroy_trace_ios(void)
 {
-   pid_t pid[PIDVEC_SIZE];
-   pid_t next_pid = 0;
-   unsigned int found;
-
-   spin_lock(_lock);
-   while ((found = gang_lookup_pids(pid, next_pid, PIDVEC_SIZE))) {
-   unsigned idx;
-
-   next_pid = pid[found - 1] + 1;
-   for (idx = 0; idx < found; idx++)
-   radix_tree_delete(, pid[idx]);
-   }
-   spin_unlock(_lock);
+   xa_destroy();
 }
diff --git a/fs/f2fs/trace.h b/fs/f2fs/trace.h
index 67db24ac1e85..157e4564e48b 100644
--- a/fs/f2fs/trace.h
+++ b/fs/f2fs/trace.h
@@ -34,12 +34,10 @@ struct last_io_info {
 
 extern void f2fs_trace_pid(struct page *);
 extern void f2fs_trace_ios(struct f2fs_io_info *, int);
-extern void f2fs_build_trace_ios(void);
 extern void f2fs_destroy_trace_ios(void);
 #else
 #define f2fs_trace_pid(p)
 #define f2fs_trace_ios(i, n)
-#define f2fs_build_trace_ios()
 #define f2fs_destroy_trace_ios()
 
 #endif
-- 
2.15.1

[PATCH v6 91/99] btrfs: Convert name_cache to XArray

2018-01-17 Thread Matthew Wilcox

From: Matthew Wilcox 

This is a very straightforward conversion.  The handling of collisions
in the namecache could be better handled with an hlist, but that's a
patch for another day.

Signed-off-by: Matthew Wilcox 
---
 fs/btrfs/send.c | 19 +--
 1 file changed, 9 insertions(+), 10 deletions(-)

diff --git a/fs/btrfs/send.c b/fs/btrfs/send.c
index 20d3300bd268..3891a8e958fa 100644
--- a/fs/btrfs/send.c
+++ b/fs/btrfs/send.c
@@ -23,7 +23,7 @@
 #include 
 #include 
 #include 
-#include 
+#include 
 #include 
 #include 
 #include 
@@ -118,7 +118,7 @@ struct send_ctx {
struct list_head new_refs;
struct list_head deleted_refs;
 
-   struct radix_tree_root name_cache;
+   struct xarray name_cache;
struct list_head name_cache_list;
int name_cache_size;
 
@@ -2021,8 +2021,7 @@ static int name_cache_insert(struct send_ctx *sctx,
int ret = 0;
struct list_head *nce_head;
 
-   nce_head = radix_tree_lookup(>name_cache,
-   (unsigned long)nce->ino);
+   nce_head = xa_load(>name_cache, (unsigned long)nce->ino);
if (!nce_head) {
nce_head = kmalloc(sizeof(*nce_head), GFP_KERNEL);
if (!nce_head) {
@@ -2031,7 +2030,8 @@ static int name_cache_insert(struct send_ctx *sctx,
}
INIT_LIST_HEAD(nce_head);
 
-   ret = radix_tree_insert(>name_cache, nce->ino, nce_head);
+   ret = xa_insert(>name_cache, nce->ino, nce_head,
+   GFP_KERNEL);
if (ret < 0) {
kfree(nce_head);
kfree(nce);
@@ -2050,8 +2050,7 @@ static void name_cache_delete(struct send_ctx *sctx,
 {
struct list_head *nce_head;
 
-   nce_head = radix_tree_lookup(>name_cache,
-   (unsigned long)nce->ino);
+   nce_head = xa_load(>name_cache, (unsigned long)nce->ino);
if (!nce_head) {
btrfs_err(sctx->send_root->fs_info,
  "name_cache_delete lookup failed ino %llu cache size %d, leaking 
memory",
@@ -2066,7 +2065,7 @@ static void name_cache_delete(struct send_ctx *sctx,
 * We may not get to the final release of nce_head if the lookup fails
 */
if (nce_head && list_empty(nce_head)) {
-   radix_tree_delete(>name_cache, (unsigned long)nce->ino);
+   xa_erase(>name_cache, (unsigned long)nce->ino);
kfree(nce_head);
}
 }
@@ -2077,7 +2076,7 @@ static struct name_cache_entry *name_cache_search(struct 
send_ctx *sctx,
struct list_head *nce_head;
struct name_cache_entry *cur;
 
-   nce_head = radix_tree_lookup(>name_cache, (unsigned long)ino);
+   nce_head = xa_load(>name_cache, (unsigned long)ino);
if (!nce_head)
return NULL;
 
@@ -6526,7 +6525,7 @@ long btrfs_ioctl_send(struct file *mnt_file, struct 
btrfs_ioctl_send_args *arg)
 
INIT_LIST_HEAD(>new_refs);
INIT_LIST_HEAD(>deleted_refs);
-   INIT_RADIX_TREE(>name_cache, GFP_KERNEL);
+   xa_init(>name_cache);
INIT_LIST_HEAD(>name_cache_list);
 
sctx->flags = arg->flags;
-- 
2.15.1

Re: [PATCH v5 00/17] ASoC: fsl_ssi: Clean up - program flow level

2018-01-17 Thread Maciej S. Szmigiero

On 17.01.2018 21:02, Nicolin Chen wrote:
> On Wed, Jan 17, 2018 at 08:38:48PM +0100, Maciej S. Szmigiero wrote:
> 
>> However, I have a small nitpick regarding a comment newly added in
>> this version of patch 16:
>> +/*
>> + * Do not set SSI dev as the parent of AC97 CODEC device since
>> + * it does not have a DT node. Otherwise ASoC core will assume
>> + * CODEC has the same DT node as the SSI, so it may return a
>> + * NULL pointer of CODEC when asked for SSI via the DT node
>>
>> The second part of the last sentence isn't really true, the ASoC core
>> will return a (valid, non-NULL) CODEC object pointer when asked for
>> the SSI one if we set the SSI as the parent device of a AC'97 CODEC
>> platform device.
>>
>> The NULL pointer dereference when starting a playback that I wrote
>> about in my previous message happens because in this situation the SSI
>> DAI probe callback won't ever get called and so won't setup DMA data
>> pointers (they will remain NULL).
> 
> Well, somehow the DMA data pointer of CODEC could be described
> as "a NULL pointer of CODEC" reluctantly...it confuses people
> though.
> 
>> And this in turn will cause the ASoC DMA code to dereference these
>> NULL pointers when starting a playback (the same will probably happen
>> also when starting a capture).
>>
>> Sorry if I wasn't 100% clear about these details in my previous
>> message describing this issue.
> 
> I would prefer to send an incremental patch later to update it,
> if there are no new comments against this version; Otherwise, I
> will update it in a next version once there is a need to send a
> v6 anyway.

IMHO it is such a tiny thing that it isn't worth respinning 17
patch series just for it, it can be easily improved later via
a separate patch.

> Thanks
> 

Thanks,
Maciej

[PATCH v6 94/99] f2fs: Convert extent_tree_root to XArray

2018-01-17 Thread Matthew Wilcox

From: Matthew Wilcox 

Rename it to extent_array and use the xa_lock in place of the
extent_tree_lock mutex.

Signed-off-by: Matthew Wilcox 
---
 fs/f2fs/extent_cache.c | 59 +-
 fs/f2fs/f2fs.h |  3 +--
 2 files changed, 30 insertions(+), 32 deletions(-)

diff --git a/fs/f2fs/extent_cache.c b/fs/f2fs/extent_cache.c
index ff2352a0ed15..da5f3bd1808d 100644
--- a/fs/f2fs/extent_cache.c
+++ b/fs/f2fs/extent_cache.c
@@ -250,25 +250,25 @@ static struct extent_tree *__grab_extent_tree(struct 
inode *inode)
struct extent_tree *et;
nid_t ino = inode->i_ino;
 
-   mutex_lock(>extent_tree_lock);
-   et = radix_tree_lookup(>extent_tree_root, ino);
-   if (!et) {
-   et = f2fs_kmem_cache_alloc(extent_tree_slab, GFP_NOFS);
-   f2fs_radix_tree_insert(>extent_tree_root, ino, et);
-   memset(et, 0, sizeof(struct extent_tree));
-   et->ino = ino;
-   et->root = RB_ROOT;
-   et->cached_en = NULL;
-   rwlock_init(>lock);
-   INIT_LIST_HEAD(>list);
-   atomic_set(>node_cnt, 0);
-   atomic_inc(>total_ext_tree);
-   } else {
+   et = xa_load(>extent_array, ino);
+   if (et) {
atomic_dec(>total_zombie_tree);
list_del_init(>list);
+   goto out;
}
-   mutex_unlock(>extent_tree_lock);
 
+   et = f2fs_kmem_cache_alloc(extent_tree_slab, GFP_NOFS | __GFP_ZERO);
+   et->ino = ino;
+   et->root = RB_ROOT;
+   et->cached_en = NULL;
+   rwlock_init(>lock);
+   INIT_LIST_HEAD(>list);
+   atomic_set(>node_cnt, 0);
+
+   xa_store(>extent_array, ino, et, GFP_NOFS);
+   atomic_inc(>total_ext_tree);
+
+out:
/* never died until evict_inode */
F2FS_I(inode)->extent_tree = et;
 
@@ -622,7 +622,7 @@ unsigned int f2fs_shrink_extent_tree(struct f2fs_sb_info 
*sbi, int nr_shrink)
if (!atomic_read(>total_zombie_tree))
goto free_node;
 
-   if (!mutex_trylock(>extent_tree_lock))
+   if (!xa_trylock(>extent_array))
goto out;
 
/* 1. remove unreferenced extent tree */
@@ -634,7 +634,7 @@ unsigned int f2fs_shrink_extent_tree(struct f2fs_sb_info 
*sbi, int nr_shrink)
}
f2fs_bug_on(sbi, atomic_read(>node_cnt));
list_del_init(>list);
-   radix_tree_delete(>extent_tree_root, et->ino);
+   xa_erase(>extent_array, et->ino);
kmem_cache_free(extent_tree_slab, et);
atomic_dec(>total_ext_tree);
atomic_dec(>total_zombie_tree);
@@ -642,13 +642,13 @@ unsigned int f2fs_shrink_extent_tree(struct f2fs_sb_info 
*sbi, int nr_shrink)
 
if (node_cnt + tree_cnt >= nr_shrink)
goto unlock_out;
-   cond_resched();
+   cond_resched_lock(>extent_array.xa_lock);
}
-   mutex_unlock(>extent_tree_lock);
+   xa_unlock(>extent_array);
 
 free_node:
/* 2. remove LRU extent entries */
-   if (!mutex_trylock(>extent_tree_lock))
+   if (!xa_trylock(>extent_array))
goto out;
 
remained = nr_shrink - (node_cnt + tree_cnt);
@@ -678,7 +678,7 @@ unsigned int f2fs_shrink_extent_tree(struct f2fs_sb_info 
*sbi, int nr_shrink)
spin_unlock(>extent_lock);
 
 unlock_out:
-   mutex_unlock(>extent_tree_lock);
+   xa_unlock(>extent_array);
 out:
trace_f2fs_shrink_extent_tree(sbi, node_cnt, tree_cnt);
 
@@ -725,23 +725,23 @@ void f2fs_destroy_extent_tree(struct inode *inode)
 
if (inode->i_nlink && !is_bad_inode(inode) &&
atomic_read(>node_cnt)) {
-   mutex_lock(>extent_tree_lock);
+   xa_lock(>extent_array);
list_add_tail(>list, >zombie_list);
atomic_inc(>total_zombie_tree);
-   mutex_unlock(>extent_tree_lock);
+   xa_unlock(>extent_array);
return;
}
 
/* free all extent info belong to this extent tree */
node_cnt = f2fs_destroy_extent_node(inode);
 
-   /* delete extent tree entry in radix tree */
-   mutex_lock(>extent_tree_lock);
+   /* delete extent from array */
+   xa_lock(>extent_array);
f2fs_bug_on(sbi, atomic_read(>node_cnt));
-   radix_tree_delete(>extent_tree_root, inode->i_ino);
-   kmem_cache_free(extent_tree_slab, et);
+   __xa_erase(>extent_array, inode->i_ino);
atomic_dec(>total_ext_tree);
-   mutex_unlock(>extent_tree_lock);
+   xa_unlock(>extent_array);
+   kmem_cache_free(extent_tree_slab, et);
 
F2FS_I(inode)->extent_tree = NULL;
 
@@ -787,8 +787,7 @@ void f2fs_update_extent_cache_range(struct dnode_of_data 
*dn,
 
 void init_extent_cache_info(struct f2fs_sb_info *sbi)
 {
-

[PATCH v6 93/99] f2fs: Convert ino_root to XArray

2018-01-17 Thread Matthew Wilcox

From: Matthew Wilcox 

I did a fairly major rewrite of __add_ino_entry(); please check carefully.
Also, we can remove ino_list unless it's important to write out orphan
inodes in the order they were orphaned.  It may also make more sense to
combine the array of inode_management structures into a single XArray
with tags, but that would be a job for someone who understands this
filesystem better than I do.

Signed-off-by: Matthew Wilcox 
---
 fs/f2fs/checkpoint.c | 85 +++-
 fs/f2fs/f2fs.h   |  3 +-
 2 files changed, 38 insertions(+), 50 deletions(-)

diff --git a/fs/f2fs/checkpoint.c b/fs/f2fs/checkpoint.c
index 4aa69bc1c70a..04d69679da13 100644
--- a/fs/f2fs/checkpoint.c
+++ b/fs/f2fs/checkpoint.c
@@ -403,33 +403,30 @@ static void __add_ino_entry(struct f2fs_sb_info *sbi, 
nid_t ino,
struct inode_management *im = >im[type];
struct ino_entry *e, *tmp;
 
-   tmp = f2fs_kmem_cache_alloc(ino_entry_slab, GFP_NOFS);
-
-   radix_tree_preload(GFP_NOFS | __GFP_NOFAIL);
-
-   spin_lock(>ino_lock);
-   e = radix_tree_lookup(>ino_root, ino);
-   if (!e) {
-   e = tmp;
-   if (unlikely(radix_tree_insert(>ino_root, ino, e)))
-   f2fs_bug_on(sbi, 1);
-
-   memset(e, 0, sizeof(struct ino_entry));
-   e->ino = ino;
-
-   list_add_tail(>list, >ino_list);
-   if (type != ORPHAN_INO)
-   im->ino_num++;
+   xa_lock(>ino_root);
+   e = xa_load(>ino_root, ino);
+   if (e)
+   goto found;
+   xa_unlock(>ino_root);
+
+   tmp = f2fs_kmem_cache_alloc(ino_entry_slab, GFP_NOFS | __GFP_ZERO);
+   xa_lock(>ino_root);
+   e = __xa_cmpxchg(>ino_root, ino, NULL, tmp,
+   GFP_NOFS | __GFP_NOFAIL);
+   if (e) {
+   kmem_cache_free(ino_entry_slab, tmp);
+   goto found;
}
+   e = tmp;
 
+   e->ino = ino;
+   list_add_tail(>list, >ino_list);
+   if (type != ORPHAN_INO)
+   im->ino_num++;
+found:
if (type == FLUSH_INO)
f2fs_set_bit(devidx, (char *)>dirty_device);
-
-   spin_unlock(>ino_lock);
-   radix_tree_preload_end();
-
-   if (e != tmp)
-   kmem_cache_free(ino_entry_slab, tmp);
+   xa_unlock(>ino_root);
 }
 
 static void __remove_ino_entry(struct f2fs_sb_info *sbi, nid_t ino, int type)
@@ -437,17 +434,14 @@ static void __remove_ino_entry(struct f2fs_sb_info *sbi, 
nid_t ino, int type)
struct inode_management *im = >im[type];
struct ino_entry *e;
 
-   spin_lock(>ino_lock);
-   e = radix_tree_lookup(>ino_root, ino);
+   xa_lock(>ino_root);
+   e = __xa_erase(>ino_root, ino);
if (e) {
list_del(>list);
-   radix_tree_delete(>ino_root, ino);
im->ino_num--;
-   spin_unlock(>ino_lock);
kmem_cache_free(ino_entry_slab, e);
-   return;
}
-   spin_unlock(>ino_lock);
+   xa_unlock(>ino_root);
 }
 
 void add_ino_entry(struct f2fs_sb_info *sbi, nid_t ino, int type)
@@ -466,12 +460,8 @@ void remove_ino_entry(struct f2fs_sb_info *sbi, nid_t ino, 
int type)
 bool exist_written_data(struct f2fs_sb_info *sbi, nid_t ino, int mode)
 {
struct inode_management *im = >im[mode];
-   struct ino_entry *e;
 
-   spin_lock(>ino_lock);
-   e = radix_tree_lookup(>ino_root, ino);
-   spin_unlock(>ino_lock);
-   return e ? true : false;
+   return xa_load(>ino_root, ino) ? true : false;
 }
 
 void release_ino_entry(struct f2fs_sb_info *sbi, bool all)
@@ -482,14 +472,14 @@ void release_ino_entry(struct f2fs_sb_info *sbi, bool all)
for (i = all ? ORPHAN_INO : APPEND_INO; i < MAX_INO_ENTRY; i++) {
struct inode_management *im = >im[i];
 
-   spin_lock(>ino_lock);
+   xa_lock(>ino_root);
list_for_each_entry_safe(e, tmp, >ino_list, list) {
list_del(>list);
-   radix_tree_delete(>ino_root, e->ino);
+   __xa_erase(>ino_root, e->ino);
kmem_cache_free(ino_entry_slab, e);
im->ino_num--;
}
-   spin_unlock(>ino_lock);
+   xa_unlock(>ino_root);
}
 }
 
@@ -506,11 +496,11 @@ bool is_dirty_device(struct f2fs_sb_info *sbi, nid_t ino,
struct ino_entry *e;
bool is_dirty = false;
 
-   spin_lock(>ino_lock);
-   e = radix_tree_lookup(>ino_root, ino);
+   xa_lock(>ino_root);
+   e = xa_load(>ino_root, ino);
if (e && f2fs_test_bit(devidx, (char *)>dirty_device))
is_dirty = true;
-   spin_unlock(>ino_lock);
+   xa_unlock(>ino_root);
return is_dirty;
 }
 
@@ -519,11 +509,11 @@ int

[PATCH v6 94/99] f2fs: Convert extent_tree_root to XArray

2018-01-17 Thread Matthew Wilcox

From: Matthew Wilcox 

Rename it to extent_array and use the xa_lock in place of the
extent_tree_lock mutex.

Signed-off-by: Matthew Wilcox 
---
 fs/f2fs/extent_cache.c | 59 +-
 fs/f2fs/f2fs.h |  3 +--
 2 files changed, 30 insertions(+), 32 deletions(-)

diff --git a/fs/f2fs/extent_cache.c b/fs/f2fs/extent_cache.c
index ff2352a0ed15..da5f3bd1808d 100644
--- a/fs/f2fs/extent_cache.c
+++ b/fs/f2fs/extent_cache.c
@@ -250,25 +250,25 @@ static struct extent_tree *__grab_extent_tree(struct 
inode *inode)
struct extent_tree *et;
nid_t ino = inode->i_ino;
 
-   mutex_lock(>extent_tree_lock);
-   et = radix_tree_lookup(>extent_tree_root, ino);
-   if (!et) {
-   et = f2fs_kmem_cache_alloc(extent_tree_slab, GFP_NOFS);
-   f2fs_radix_tree_insert(>extent_tree_root, ino, et);
-   memset(et, 0, sizeof(struct extent_tree));
-   et->ino = ino;
-   et->root = RB_ROOT;
-   et->cached_en = NULL;
-   rwlock_init(>lock);
-   INIT_LIST_HEAD(>list);
-   atomic_set(>node_cnt, 0);
-   atomic_inc(>total_ext_tree);
-   } else {
+   et = xa_load(>extent_array, ino);
+   if (et) {
atomic_dec(>total_zombie_tree);
list_del_init(>list);
+   goto out;
}
-   mutex_unlock(>extent_tree_lock);
 
+   et = f2fs_kmem_cache_alloc(extent_tree_slab, GFP_NOFS | __GFP_ZERO);
+   et->ino = ino;
+   et->root = RB_ROOT;
+   et->cached_en = NULL;
+   rwlock_init(>lock);
+   INIT_LIST_HEAD(>list);
+   atomic_set(>node_cnt, 0);
+
+   xa_store(>extent_array, ino, et, GFP_NOFS);
+   atomic_inc(>total_ext_tree);
+
+out:
/* never died until evict_inode */
F2FS_I(inode)->extent_tree = et;
 
@@ -622,7 +622,7 @@ unsigned int f2fs_shrink_extent_tree(struct f2fs_sb_info 
*sbi, int nr_shrink)
if (!atomic_read(>total_zombie_tree))
goto free_node;
 
-   if (!mutex_trylock(>extent_tree_lock))
+   if (!xa_trylock(>extent_array))
goto out;
 
/* 1. remove unreferenced extent tree */
@@ -634,7 +634,7 @@ unsigned int f2fs_shrink_extent_tree(struct f2fs_sb_info 
*sbi, int nr_shrink)
}
f2fs_bug_on(sbi, atomic_read(>node_cnt));
list_del_init(>list);
-   radix_tree_delete(>extent_tree_root, et->ino);
+   xa_erase(>extent_array, et->ino);
kmem_cache_free(extent_tree_slab, et);
atomic_dec(>total_ext_tree);
atomic_dec(>total_zombie_tree);
@@ -642,13 +642,13 @@ unsigned int f2fs_shrink_extent_tree(struct f2fs_sb_info 
*sbi, int nr_shrink)
 
if (node_cnt + tree_cnt >= nr_shrink)
goto unlock_out;
-   cond_resched();
+   cond_resched_lock(>extent_array.xa_lock);
}
-   mutex_unlock(>extent_tree_lock);
+   xa_unlock(>extent_array);
 
 free_node:
/* 2. remove LRU extent entries */
-   if (!mutex_trylock(>extent_tree_lock))
+   if (!xa_trylock(>extent_array))
goto out;
 
remained = nr_shrink - (node_cnt + tree_cnt);
@@ -678,7 +678,7 @@ unsigned int f2fs_shrink_extent_tree(struct f2fs_sb_info 
*sbi, int nr_shrink)
spin_unlock(>extent_lock);
 
 unlock_out:
-   mutex_unlock(>extent_tree_lock);
+   xa_unlock(>extent_array);
 out:
trace_f2fs_shrink_extent_tree(sbi, node_cnt, tree_cnt);
 
@@ -725,23 +725,23 @@ void f2fs_destroy_extent_tree(struct inode *inode)
 
if (inode->i_nlink && !is_bad_inode(inode) &&
atomic_read(>node_cnt)) {
-   mutex_lock(>extent_tree_lock);
+   xa_lock(>extent_array);
list_add_tail(>list, >zombie_list);
atomic_inc(>total_zombie_tree);
-   mutex_unlock(>extent_tree_lock);
+   xa_unlock(>extent_array);
return;
}
 
/* free all extent info belong to this extent tree */
node_cnt = f2fs_destroy_extent_node(inode);
 
-   /* delete extent tree entry in radix tree */
-   mutex_lock(>extent_tree_lock);
+   /* delete extent from array */
+   xa_lock(>extent_array);
f2fs_bug_on(sbi, atomic_read(>node_cnt));
-   radix_tree_delete(>extent_tree_root, inode->i_ino);
-   kmem_cache_free(extent_tree_slab, et);
+   __xa_erase(>extent_array, inode->i_ino);
atomic_dec(>total_ext_tree);
-   mutex_unlock(>extent_tree_lock);
+   xa_unlock(>extent_array);
+   kmem_cache_free(extent_tree_slab, et);
 
F2FS_I(inode)->extent_tree = NULL;
 
@@ -787,8 +787,7 @@ void f2fs_update_extent_cache_range(struct dnode_of_data 
*dn,
 
 void init_extent_cache_info(struct f2fs_sb_info *sbi)
 {
-   INIT_RADIX_TREE(>extent_tree_root, GFP_NOIO);
-

[PATCH v6 93/99] f2fs: Convert ino_root to XArray

2018-01-17 Thread Matthew Wilcox

From: Matthew Wilcox 

I did a fairly major rewrite of __add_ino_entry(); please check carefully.
Also, we can remove ino_list unless it's important to write out orphan
inodes in the order they were orphaned.  It may also make more sense to
combine the array of inode_management structures into a single XArray
with tags, but that would be a job for someone who understands this
filesystem better than I do.

Signed-off-by: Matthew Wilcox 
---
 fs/f2fs/checkpoint.c | 85 +++-
 fs/f2fs/f2fs.h   |  3 +-
 2 files changed, 38 insertions(+), 50 deletions(-)

diff --git a/fs/f2fs/checkpoint.c b/fs/f2fs/checkpoint.c
index 4aa69bc1c70a..04d69679da13 100644
--- a/fs/f2fs/checkpoint.c
+++ b/fs/f2fs/checkpoint.c
@@ -403,33 +403,30 @@ static void __add_ino_entry(struct f2fs_sb_info *sbi, 
nid_t ino,
struct inode_management *im = >im[type];
struct ino_entry *e, *tmp;
 
-   tmp = f2fs_kmem_cache_alloc(ino_entry_slab, GFP_NOFS);
-
-   radix_tree_preload(GFP_NOFS | __GFP_NOFAIL);
-
-   spin_lock(>ino_lock);
-   e = radix_tree_lookup(>ino_root, ino);
-   if (!e) {
-   e = tmp;
-   if (unlikely(radix_tree_insert(>ino_root, ino, e)))
-   f2fs_bug_on(sbi, 1);
-
-   memset(e, 0, sizeof(struct ino_entry));
-   e->ino = ino;
-
-   list_add_tail(>list, >ino_list);
-   if (type != ORPHAN_INO)
-   im->ino_num++;
+   xa_lock(>ino_root);
+   e = xa_load(>ino_root, ino);
+   if (e)
+   goto found;
+   xa_unlock(>ino_root);
+
+   tmp = f2fs_kmem_cache_alloc(ino_entry_slab, GFP_NOFS | __GFP_ZERO);
+   xa_lock(>ino_root);
+   e = __xa_cmpxchg(>ino_root, ino, NULL, tmp,
+   GFP_NOFS | __GFP_NOFAIL);
+   if (e) {
+   kmem_cache_free(ino_entry_slab, tmp);
+   goto found;
}
+   e = tmp;
 
+   e->ino = ino;
+   list_add_tail(>list, >ino_list);
+   if (type != ORPHAN_INO)
+   im->ino_num++;
+found:
if (type == FLUSH_INO)
f2fs_set_bit(devidx, (char *)>dirty_device);
-
-   spin_unlock(>ino_lock);
-   radix_tree_preload_end();
-
-   if (e != tmp)
-   kmem_cache_free(ino_entry_slab, tmp);
+   xa_unlock(>ino_root);
 }
 
 static void __remove_ino_entry(struct f2fs_sb_info *sbi, nid_t ino, int type)
@@ -437,17 +434,14 @@ static void __remove_ino_entry(struct f2fs_sb_info *sbi, 
nid_t ino, int type)
struct inode_management *im = >im[type];
struct ino_entry *e;
 
-   spin_lock(>ino_lock);
-   e = radix_tree_lookup(>ino_root, ino);
+   xa_lock(>ino_root);
+   e = __xa_erase(>ino_root, ino);
if (e) {
list_del(>list);
-   radix_tree_delete(>ino_root, ino);
im->ino_num--;
-   spin_unlock(>ino_lock);
kmem_cache_free(ino_entry_slab, e);
-   return;
}
-   spin_unlock(>ino_lock);
+   xa_unlock(>ino_root);
 }
 
 void add_ino_entry(struct f2fs_sb_info *sbi, nid_t ino, int type)
@@ -466,12 +460,8 @@ void remove_ino_entry(struct f2fs_sb_info *sbi, nid_t ino, 
int type)
 bool exist_written_data(struct f2fs_sb_info *sbi, nid_t ino, int mode)
 {
struct inode_management *im = >im[mode];
-   struct ino_entry *e;
 
-   spin_lock(>ino_lock);
-   e = radix_tree_lookup(>ino_root, ino);
-   spin_unlock(>ino_lock);
-   return e ? true : false;
+   return xa_load(>ino_root, ino) ? true : false;
 }
 
 void release_ino_entry(struct f2fs_sb_info *sbi, bool all)
@@ -482,14 +472,14 @@ void release_ino_entry(struct f2fs_sb_info *sbi, bool all)
for (i = all ? ORPHAN_INO : APPEND_INO; i < MAX_INO_ENTRY; i++) {
struct inode_management *im = >im[i];
 
-   spin_lock(>ino_lock);
+   xa_lock(>ino_root);
list_for_each_entry_safe(e, tmp, >ino_list, list) {
list_del(>list);
-   radix_tree_delete(>ino_root, e->ino);
+   __xa_erase(>ino_root, e->ino);
kmem_cache_free(ino_entry_slab, e);
im->ino_num--;
}
-   spin_unlock(>ino_lock);
+   xa_unlock(>ino_root);
}
 }
 
@@ -506,11 +496,11 @@ bool is_dirty_device(struct f2fs_sb_info *sbi, nid_t ino,
struct ino_entry *e;
bool is_dirty = false;
 
-   spin_lock(>ino_lock);
-   e = radix_tree_lookup(>ino_root, ino);
+   xa_lock(>ino_root);
+   e = xa_load(>ino_root, ino);
if (e && f2fs_test_bit(devidx, (char *)>dirty_device))
is_dirty = true;
-   spin_unlock(>ino_lock);
+   xa_unlock(>ino_root);
return is_dirty;
 }
 
@@ -519,11 +509,11 @@ int acquire_orphan_inode(struct f2fs_sb_info *sbi)
struct

[PATCH v6 99/99] null_blk: Convert to XArray

2018-01-17 Thread Matthew Wilcox

From: Matthew Wilcox 

We can probably avoid the call to xa_reserve() by changing the locking,
but I didn't feel confident enough to do that.

Signed-off-by: Matthew Wilcox 
---
 drivers/block/null_blk.c | 87 +---
 1 file changed, 38 insertions(+), 49 deletions(-)

diff --git a/drivers/block/null_blk.c b/drivers/block/null_blk.c
index ad0477ae820f..d90d173b8885 100644
--- a/drivers/block/null_blk.c
+++ b/drivers/block/null_blk.c
@@ -15,6 +15,7 @@
 #include 
 #include 
 #include 
+#include 
 
 #define SECTOR_SHIFT   9
 #define PAGE_SECTORS_SHIFT (PAGE_SHIFT - SECTOR_SHIFT)
@@ -90,8 +91,8 @@ struct nullb_page {
 struct nullb_device {
struct nullb *nullb;
struct config_item item;
-   struct radix_tree_root data; /* data stored in the disk */
-   struct radix_tree_root cache; /* disk cache data */
+   struct xarray data; /* data stored in the disk */
+   struct xarray cache; /* disk cache data */
unsigned long flags; /* device flags */
unsigned int curr_cache;
struct badblocks badblocks;
@@ -558,8 +559,8 @@ static struct nullb_device *null_alloc_dev(void)
dev = kzalloc(sizeof(*dev), GFP_KERNEL);
if (!dev)
return NULL;
-   INIT_RADIX_TREE(>data, GFP_ATOMIC);
-   INIT_RADIX_TREE(>cache, GFP_ATOMIC);
+   xa_init_flags(>data, XA_FLAGS_LOCK_IRQ);
+   xa_init_flags(>cache, XA_FLAGS_LOCK_IRQ);
if (badblocks_init(>badblocks, 0)) {
kfree(dev);
return NULL;
@@ -752,18 +753,18 @@ static void null_free_sector(struct nullb *nullb, 
sector_t sector,
unsigned int sector_bit;
u64 idx;
struct nullb_page *t_page, *ret;
-   struct radix_tree_root *root;
+   struct xarray *xa;
 
-   root = is_cache ? >dev->cache : >dev->data;
+   xa = is_cache ? >dev->cache : >dev->data;
idx = sector >> PAGE_SECTORS_SHIFT;
sector_bit = (sector & SECTOR_MASK);
 
-   t_page = radix_tree_lookup(root, idx);
+   t_page = xa_load(xa, idx);
if (t_page) {
__clear_bit(sector_bit, _page->bitmap);
 
if (!t_page->bitmap) {
-   ret = radix_tree_delete_item(root, idx, t_page);
+   ret = xa_cmpxchg(xa, idx, t_page, NULL, 0);
WARN_ON(ret != t_page);
null_free_page(ret);
if (is_cache)
@@ -772,47 +773,34 @@ static void null_free_sector(struct nullb *nullb, 
sector_t sector,
}
 }
 
-static struct nullb_page *null_radix_tree_insert(struct nullb *nullb, u64 idx,
+static struct nullb_page *null_xa_insert(struct nullb *nullb, u64 idx,
struct nullb_page *t_page, bool is_cache)
 {
-   struct radix_tree_root *root;
+   struct xarray *xa = is_cache ? >dev->cache : >dev->data;
+   struct nullb_page *exist;
 
-   root = is_cache ? >dev->cache : >dev->data;
-
-   if (radix_tree_insert(root, idx, t_page)) {
+   exist = xa_cmpxchg(xa, idx, NULL, t_page, GFP_ATOMIC);
+   if (exist) {
null_free_page(t_page);
-   t_page = radix_tree_lookup(root, idx);
-   WARN_ON(!t_page || t_page->page->index != idx);
+   t_page = exist;
} else if (is_cache)
nullb->dev->curr_cache += PAGE_SIZE;
 
+   WARN_ON(t_page->page->index != idx);
return t_page;
 }
 
 static void null_free_device_storage(struct nullb_device *dev, bool is_cache)
 {
-   unsigned long pos = 0;
-   int nr_pages;
-   struct nullb_page *ret, *t_pages[FREE_BATCH];
-   struct radix_tree_root *root;
-
-   root = is_cache ? >cache : >data;
-
-   do {
-   int i;
-
-   nr_pages = radix_tree_gang_lookup(root,
-   (void **)t_pages, pos, FREE_BATCH);
-
-   for (i = 0; i < nr_pages; i++) {
-   pos = t_pages[i]->page->index;
-   ret = radix_tree_delete_item(root, pos, t_pages[i]);
-   WARN_ON(ret != t_pages[i]);
-   null_free_page(ret);
-   }
+   struct nullb_page *t_page;
+   XA_STATE(xas, is_cache ? >cache : >data, 0);
 
-   pos++;
-   } while (nr_pages == FREE_BATCH);
+   xas_lock();
+   xas_for_each(, t_page, ULONG_MAX) {
+   xas_store(, NULL);
+   null_free_page(t_page);
+   }
+   xas_unlock();
 
if (is_cache)
dev->curr_cache = 0;
@@ -824,13 +812,13 @@ static struct nullb_page *__null_lookup_page(struct nullb 
*nullb,
unsigned int sector_bit;
u64 idx;
struct nullb_page *t_page;
-   struct radix_tree_root *root;
+   struct xarray *xa;
 
idx = sector >> PAGE_SECTORS_SHIFT;
sector_bit = (sector & SECTOR_MASK);
 
-   root = is_cache ?

[PATCH v6 99/99] null_blk: Convert to XArray

2018-01-17 Thread Matthew Wilcox

From: Matthew Wilcox 

We can probably avoid the call to xa_reserve() by changing the locking,
but I didn't feel confident enough to do that.

Signed-off-by: Matthew Wilcox 
---
 drivers/block/null_blk.c | 87 +---
 1 file changed, 38 insertions(+), 49 deletions(-)

diff --git a/drivers/block/null_blk.c b/drivers/block/null_blk.c
index ad0477ae820f..d90d173b8885 100644
--- a/drivers/block/null_blk.c
+++ b/drivers/block/null_blk.c
@@ -15,6 +15,7 @@
 #include 
 #include 
 #include 
+#include 
 
 #define SECTOR_SHIFT   9
 #define PAGE_SECTORS_SHIFT (PAGE_SHIFT - SECTOR_SHIFT)
@@ -90,8 +91,8 @@ struct nullb_page {
 struct nullb_device {
struct nullb *nullb;
struct config_item item;
-   struct radix_tree_root data; /* data stored in the disk */
-   struct radix_tree_root cache; /* disk cache data */
+   struct xarray data; /* data stored in the disk */
+   struct xarray cache; /* disk cache data */
unsigned long flags; /* device flags */
unsigned int curr_cache;
struct badblocks badblocks;
@@ -558,8 +559,8 @@ static struct nullb_device *null_alloc_dev(void)
dev = kzalloc(sizeof(*dev), GFP_KERNEL);
if (!dev)
return NULL;
-   INIT_RADIX_TREE(>data, GFP_ATOMIC);
-   INIT_RADIX_TREE(>cache, GFP_ATOMIC);
+   xa_init_flags(>data, XA_FLAGS_LOCK_IRQ);
+   xa_init_flags(>cache, XA_FLAGS_LOCK_IRQ);
if (badblocks_init(>badblocks, 0)) {
kfree(dev);
return NULL;
@@ -752,18 +753,18 @@ static void null_free_sector(struct nullb *nullb, 
sector_t sector,
unsigned int sector_bit;
u64 idx;
struct nullb_page *t_page, *ret;
-   struct radix_tree_root *root;
+   struct xarray *xa;
 
-   root = is_cache ? >dev->cache : >dev->data;
+   xa = is_cache ? >dev->cache : >dev->data;
idx = sector >> PAGE_SECTORS_SHIFT;
sector_bit = (sector & SECTOR_MASK);
 
-   t_page = radix_tree_lookup(root, idx);
+   t_page = xa_load(xa, idx);
if (t_page) {
__clear_bit(sector_bit, _page->bitmap);
 
if (!t_page->bitmap) {
-   ret = radix_tree_delete_item(root, idx, t_page);
+   ret = xa_cmpxchg(xa, idx, t_page, NULL, 0);
WARN_ON(ret != t_page);
null_free_page(ret);
if (is_cache)
@@ -772,47 +773,34 @@ static void null_free_sector(struct nullb *nullb, 
sector_t sector,
}
 }
 
-static struct nullb_page *null_radix_tree_insert(struct nullb *nullb, u64 idx,
+static struct nullb_page *null_xa_insert(struct nullb *nullb, u64 idx,
struct nullb_page *t_page, bool is_cache)
 {
-   struct radix_tree_root *root;
+   struct xarray *xa = is_cache ? >dev->cache : >dev->data;
+   struct nullb_page *exist;
 
-   root = is_cache ? >dev->cache : >dev->data;
-
-   if (radix_tree_insert(root, idx, t_page)) {
+   exist = xa_cmpxchg(xa, idx, NULL, t_page, GFP_ATOMIC);
+   if (exist) {
null_free_page(t_page);
-   t_page = radix_tree_lookup(root, idx);
-   WARN_ON(!t_page || t_page->page->index != idx);
+   t_page = exist;
} else if (is_cache)
nullb->dev->curr_cache += PAGE_SIZE;
 
+   WARN_ON(t_page->page->index != idx);
return t_page;
 }
 
 static void null_free_device_storage(struct nullb_device *dev, bool is_cache)
 {
-   unsigned long pos = 0;
-   int nr_pages;
-   struct nullb_page *ret, *t_pages[FREE_BATCH];
-   struct radix_tree_root *root;
-
-   root = is_cache ? >cache : >data;
-
-   do {
-   int i;
-
-   nr_pages = radix_tree_gang_lookup(root,
-   (void **)t_pages, pos, FREE_BATCH);
-
-   for (i = 0; i < nr_pages; i++) {
-   pos = t_pages[i]->page->index;
-   ret = radix_tree_delete_item(root, pos, t_pages[i]);
-   WARN_ON(ret != t_pages[i]);
-   null_free_page(ret);
-   }
+   struct nullb_page *t_page;
+   XA_STATE(xas, is_cache ? >cache : >data, 0);
 
-   pos++;
-   } while (nr_pages == FREE_BATCH);
+   xas_lock();
+   xas_for_each(, t_page, ULONG_MAX) {
+   xas_store(, NULL);
+   null_free_page(t_page);
+   }
+   xas_unlock();
 
if (is_cache)
dev->curr_cache = 0;
@@ -824,13 +812,13 @@ static struct nullb_page *__null_lookup_page(struct nullb 
*nullb,
unsigned int sector_bit;
u64 idx;
struct nullb_page *t_page;
-   struct radix_tree_root *root;
+   struct xarray *xa;
 
idx = sector >> PAGE_SECTORS_SHIFT;
sector_bit = (sector & SECTOR_MASK);
 
-   root = is_cache ? >dev->cache : >dev->data;
-   t_page =

[PATCH v6 98/99] qrtr: Convert to XArray

2018-01-17 Thread Matthew Wilcox

From: Matthew Wilcox 

Moved the kref protection under the xa_lock too.

Signed-off-by: Matthew Wilcox 
---
 net/qrtr/qrtr.c | 21 +++--
 1 file changed, 11 insertions(+), 10 deletions(-)

diff --git a/net/qrtr/qrtr.c b/net/qrtr/qrtr.c
index 77ab05e23001..7de9a06d2aa2 100644
--- a/net/qrtr/qrtr.c
+++ b/net/qrtr/qrtr.c
@@ -104,10 +104,10 @@ static inline struct qrtr_sock *qrtr_sk(struct sock *sk)
 static unsigned int qrtr_local_nid = -1;
 
 /* for node ids */
-static RADIX_TREE(qrtr_nodes, GFP_KERNEL);
+static DEFINE_XARRAY(qrtr_nodes);
 /* broadcast list */
 static LIST_HEAD(qrtr_all_nodes);
-/* lock for qrtr_nodes, qrtr_all_nodes and node reference */
+/* lock for qrtr_all_nodes */
 static DEFINE_MUTEX(qrtr_node_lock);
 
 /* local port allocation management */
@@ -148,12 +148,15 @@ static int qrtr_bcast_enqueue(struct qrtr_node *node, 
struct sk_buff *skb,
  * kref_put_mutex.  As such, the node mutex is expected to be locked on call.
  */
 static void __qrtr_node_release(struct kref *kref)
+   __releases(qrtr_nodes.xa_lock)
 {
struct qrtr_node *node = container_of(kref, struct qrtr_node, ref);
 
if (node->nid != QRTR_EP_NID_AUTO)
-   radix_tree_delete(_nodes, node->nid);
+   __xa_erase(_nodes, node->nid);
+   xa_unlock(_nodes);
 
+   mutex_lock(_node_lock);
list_del(>item);
mutex_unlock(_node_lock);
 
@@ -174,7 +177,7 @@ static void qrtr_node_release(struct qrtr_node *node)
 {
if (!node)
return;
-   kref_put_mutex(>ref, __qrtr_node_release, _node_lock);
+   kref_put_lock(>ref, __qrtr_node_release, _nodes.xa_lock);
 }
 
 /* Pass an outgoing packet socket buffer to the endpoint driver. */
@@ -217,10 +220,10 @@ static struct qrtr_node *qrtr_node_lookup(unsigned int 
nid)
 {
struct qrtr_node *node;
 
-   mutex_lock(_node_lock);
-   node = radix_tree_lookup(_nodes, nid);
+   xa_lock(_nodes);
+   node = xa_load(_nodes, nid);
node = qrtr_node_acquire(node);
-   mutex_unlock(_node_lock);
+   xa_unlock(_nodes);
 
return node;
 }
@@ -235,10 +238,8 @@ static void qrtr_node_assign(struct qrtr_node *node, 
unsigned int nid)
if (node->nid != QRTR_EP_NID_AUTO || nid == QRTR_EP_NID_AUTO)
return;
 
-   mutex_lock(_node_lock);
-   radix_tree_insert(_nodes, nid, node);
node->nid = nid;
-   mutex_unlock(_node_lock);
+   xa_store(_nodes, nid, node, GFP_KERNEL);
 }
 
 /**
-- 
2.15.1

[PATCH v6 98/99] qrtr: Convert to XArray

2018-01-17 Thread Matthew Wilcox

From: Matthew Wilcox 

Moved the kref protection under the xa_lock too.

Signed-off-by: Matthew Wilcox 
---
 net/qrtr/qrtr.c | 21 +++--
 1 file changed, 11 insertions(+), 10 deletions(-)

diff --git a/net/qrtr/qrtr.c b/net/qrtr/qrtr.c
index 77ab05e23001..7de9a06d2aa2 100644
--- a/net/qrtr/qrtr.c
+++ b/net/qrtr/qrtr.c
@@ -104,10 +104,10 @@ static inline struct qrtr_sock *qrtr_sk(struct sock *sk)
 static unsigned int qrtr_local_nid = -1;
 
 /* for node ids */
-static RADIX_TREE(qrtr_nodes, GFP_KERNEL);
+static DEFINE_XARRAY(qrtr_nodes);
 /* broadcast list */
 static LIST_HEAD(qrtr_all_nodes);
-/* lock for qrtr_nodes, qrtr_all_nodes and node reference */
+/* lock for qrtr_all_nodes */
 static DEFINE_MUTEX(qrtr_node_lock);
 
 /* local port allocation management */
@@ -148,12 +148,15 @@ static int qrtr_bcast_enqueue(struct qrtr_node *node, 
struct sk_buff *skb,
  * kref_put_mutex.  As such, the node mutex is expected to be locked on call.
  */
 static void __qrtr_node_release(struct kref *kref)
+   __releases(qrtr_nodes.xa_lock)
 {
struct qrtr_node *node = container_of(kref, struct qrtr_node, ref);
 
if (node->nid != QRTR_EP_NID_AUTO)
-   radix_tree_delete(_nodes, node->nid);
+   __xa_erase(_nodes, node->nid);
+   xa_unlock(_nodes);
 
+   mutex_lock(_node_lock);
list_del(>item);
mutex_unlock(_node_lock);
 
@@ -174,7 +177,7 @@ static void qrtr_node_release(struct qrtr_node *node)
 {
if (!node)
return;
-   kref_put_mutex(>ref, __qrtr_node_release, _node_lock);
+   kref_put_lock(>ref, __qrtr_node_release, _nodes.xa_lock);
 }
 
 /* Pass an outgoing packet socket buffer to the endpoint driver. */
@@ -217,10 +220,10 @@ static struct qrtr_node *qrtr_node_lookup(unsigned int 
nid)
 {
struct qrtr_node *node;
 
-   mutex_lock(_node_lock);
-   node = radix_tree_lookup(_nodes, nid);
+   xa_lock(_nodes);
+   node = xa_load(_nodes, nid);
node = qrtr_node_acquire(node);
-   mutex_unlock(_node_lock);
+   xa_unlock(_nodes);
 
return node;
 }
@@ -235,10 +238,8 @@ static void qrtr_node_assign(struct qrtr_node *node, 
unsigned int nid)
if (node->nid != QRTR_EP_NID_AUTO || nid == QRTR_EP_NID_AUTO)
return;
 
-   mutex_lock(_node_lock);
-   radix_tree_insert(_nodes, nid, node);
node->nid = nid;
-   mutex_unlock(_node_lock);
+   xa_store(_nodes, nid, node, GFP_KERNEL);
 }
 
 /**
-- 
2.15.1

[PATCH v6 97/99] xen: Convert pvcalls-back to XArray

2018-01-17 Thread Matthew Wilcox

From: Matthew Wilcox 

This is a straightforward conversion.

Signed-off-by: Matthew Wilcox 
---
 drivers/xen/pvcalls-back.c | 51 ++
 1 file changed, 15 insertions(+), 36 deletions(-)

diff --git a/drivers/xen/pvcalls-back.c b/drivers/xen/pvcalls-back.c
index c7822d8078b9..e059d2e777e1 100644
--- a/drivers/xen/pvcalls-back.c
+++ b/drivers/xen/pvcalls-back.c
@@ -15,10 +15,10 @@
 #include 
 #include 
 #include 
-#include 
 #include 
 #include 
 #include 
+#include 
 #include 
 #include 
 #include 
@@ -50,7 +50,7 @@ struct pvcalls_fedata {
struct xen_pvcalls_back_ring ring;
int irq;
struct list_head socket_mappings;
-   struct radix_tree_root socketpass_mappings;
+   struct xarray socketpass_mappings;
struct semaphore socket_lock;
 };
 
@@ -492,10 +492,9 @@ static int pvcalls_back_release(struct xenbus_device *dev,
goto out;
}
}
-   mappass = radix_tree_lookup(>socketpass_mappings,
-   req->u.release.id);
+   mappass = xa_load(>socketpass_mappings, req->u.release.id);
if (mappass != NULL) {
-   radix_tree_delete(>socketpass_mappings, mappass->id);
+   xa_erase(>socketpass_mappings, mappass->id);
up(>socket_lock);
ret = pvcalls_back_release_passive(dev, fedata, mappass);
} else
@@ -650,10 +649,8 @@ static int pvcalls_back_bind(struct xenbus_device *dev,
map->fedata = fedata;
map->id = req->u.bind.id;
 
-   down(>socket_lock);
-   ret = radix_tree_insert(>socketpass_mappings, map->id,
-   map);
-   up(>socket_lock);
+   ret = xa_err(xa_store(>socketpass_mappings, map->id, map,
+   GFP_KERNEL));
if (ret)
goto out;
 
@@ -689,9 +686,7 @@ static int pvcalls_back_listen(struct xenbus_device *dev,
 
fedata = dev_get_drvdata(>dev);
 
-   down(>socket_lock);
-   map = radix_tree_lookup(>socketpass_mappings, req->u.listen.id);
-   up(>socket_lock);
+   map = xa_load(>socketpass_mappings, req->u.listen.id);
if (map == NULL)
goto out;
 
@@ -717,10 +712,7 @@ static int pvcalls_back_accept(struct xenbus_device *dev,
 
fedata = dev_get_drvdata(>dev);
 
-   down(>socket_lock);
-   mappass = radix_tree_lookup(>socketpass_mappings,
-   req->u.accept.id);
-   up(>socket_lock);
+   mappass = xa_load(>socketpass_mappings, req->u.accept.id);
if (mappass == NULL)
goto out_error;
 
@@ -765,10 +757,7 @@ static int pvcalls_back_poll(struct xenbus_device *dev,
 
fedata = dev_get_drvdata(>dev);
 
-   down(>socket_lock);
-   mappass = radix_tree_lookup(>socketpass_mappings,
-   req->u.poll.id);
-   up(>socket_lock);
+   mappass = xa_load(>socketpass_mappings, req->u.poll.id);
if (mappass == NULL)
return -EINVAL;
 
@@ -960,7 +949,7 @@ static int backend_connect(struct xenbus_device *dev)
fedata->dev = dev;
 
INIT_LIST_HEAD(>socket_mappings);
-   INIT_RADIX_TREE(>socketpass_mappings, GFP_KERNEL);
+   xa_init(>socketpass_mappings);
sema_init(>socket_lock, 1);
dev_set_drvdata(>dev, fedata);
 
@@ -984,9 +973,7 @@ static int backend_disconnect(struct xenbus_device *dev)
struct pvcalls_fedata *fedata;
struct sock_mapping *map, *n;
struct sockpass_mapping *mappass;
-   struct radix_tree_iter iter;
-   void **slot;
-
+   unsigned long index = 0;
 
fedata = dev_get_drvdata(>dev);
 
@@ -996,18 +983,10 @@ static int backend_disconnect(struct xenbus_device *dev)
pvcalls_back_release_active(dev, fedata, map);
}
 
-   radix_tree_for_each_slot(slot, >socketpass_mappings, , 0) {
-   mappass = radix_tree_deref_slot(slot);
-   if (!mappass)
-   continue;
-   if (radix_tree_exception(mappass)) {
-   if (radix_tree_deref_retry(mappass))
-   slot = radix_tree_iter_retry();
-   } else {
-   radix_tree_delete(>socketpass_mappings,
- mappass->id);
-   pvcalls_back_release_passive(dev, fedata, mappass);
-   }
+   xa_for_each(>socketpass_mappings, mappass, index, ULONG_MAX,
+   XA_PRESENT) {
+   xa_erase(>socketpass_mappings, index);
+   pvcalls_back_release_passive(dev, fedata, mappass);
}
up(>socket_lock);
 
-- 
2.15.1

[PATCH v6 97/99] xen: Convert pvcalls-back to XArray

2018-01-17 Thread Matthew Wilcox

From: Matthew Wilcox 

This is a straightforward conversion.

Signed-off-by: Matthew Wilcox 
---
 drivers/xen/pvcalls-back.c | 51 ++
 1 file changed, 15 insertions(+), 36 deletions(-)

diff --git a/drivers/xen/pvcalls-back.c b/drivers/xen/pvcalls-back.c
index c7822d8078b9..e059d2e777e1 100644
--- a/drivers/xen/pvcalls-back.c
+++ b/drivers/xen/pvcalls-back.c
@@ -15,10 +15,10 @@
 #include 
 #include 
 #include 
-#include 
 #include 
 #include 
 #include 
+#include 
 #include 
 #include 
 #include 
@@ -50,7 +50,7 @@ struct pvcalls_fedata {
struct xen_pvcalls_back_ring ring;
int irq;
struct list_head socket_mappings;
-   struct radix_tree_root socketpass_mappings;
+   struct xarray socketpass_mappings;
struct semaphore socket_lock;
 };
 
@@ -492,10 +492,9 @@ static int pvcalls_back_release(struct xenbus_device *dev,
goto out;
}
}
-   mappass = radix_tree_lookup(>socketpass_mappings,
-   req->u.release.id);
+   mappass = xa_load(>socketpass_mappings, req->u.release.id);
if (mappass != NULL) {
-   radix_tree_delete(>socketpass_mappings, mappass->id);
+   xa_erase(>socketpass_mappings, mappass->id);
up(>socket_lock);
ret = pvcalls_back_release_passive(dev, fedata, mappass);
} else
@@ -650,10 +649,8 @@ static int pvcalls_back_bind(struct xenbus_device *dev,
map->fedata = fedata;
map->id = req->u.bind.id;
 
-   down(>socket_lock);
-   ret = radix_tree_insert(>socketpass_mappings, map->id,
-   map);
-   up(>socket_lock);
+   ret = xa_err(xa_store(>socketpass_mappings, map->id, map,
+   GFP_KERNEL));
if (ret)
goto out;
 
@@ -689,9 +686,7 @@ static int pvcalls_back_listen(struct xenbus_device *dev,
 
fedata = dev_get_drvdata(>dev);
 
-   down(>socket_lock);
-   map = radix_tree_lookup(>socketpass_mappings, req->u.listen.id);
-   up(>socket_lock);
+   map = xa_load(>socketpass_mappings, req->u.listen.id);
if (map == NULL)
goto out;
 
@@ -717,10 +712,7 @@ static int pvcalls_back_accept(struct xenbus_device *dev,
 
fedata = dev_get_drvdata(>dev);
 
-   down(>socket_lock);
-   mappass = radix_tree_lookup(>socketpass_mappings,
-   req->u.accept.id);
-   up(>socket_lock);
+   mappass = xa_load(>socketpass_mappings, req->u.accept.id);
if (mappass == NULL)
goto out_error;
 
@@ -765,10 +757,7 @@ static int pvcalls_back_poll(struct xenbus_device *dev,
 
fedata = dev_get_drvdata(>dev);
 
-   down(>socket_lock);
-   mappass = radix_tree_lookup(>socketpass_mappings,
-   req->u.poll.id);
-   up(>socket_lock);
+   mappass = xa_load(>socketpass_mappings, req->u.poll.id);
if (mappass == NULL)
return -EINVAL;
 
@@ -960,7 +949,7 @@ static int backend_connect(struct xenbus_device *dev)
fedata->dev = dev;
 
INIT_LIST_HEAD(>socket_mappings);
-   INIT_RADIX_TREE(>socketpass_mappings, GFP_KERNEL);
+   xa_init(>socketpass_mappings);
sema_init(>socket_lock, 1);
dev_set_drvdata(>dev, fedata);
 
@@ -984,9 +973,7 @@ static int backend_disconnect(struct xenbus_device *dev)
struct pvcalls_fedata *fedata;
struct sock_mapping *map, *n;
struct sockpass_mapping *mappass;
-   struct radix_tree_iter iter;
-   void **slot;
-
+   unsigned long index = 0;
 
fedata = dev_get_drvdata(>dev);
 
@@ -996,18 +983,10 @@ static int backend_disconnect(struct xenbus_device *dev)
pvcalls_back_release_active(dev, fedata, map);
}
 
-   radix_tree_for_each_slot(slot, >socketpass_mappings, , 0) {
-   mappass = radix_tree_deref_slot(slot);
-   if (!mappass)
-   continue;
-   if (radix_tree_exception(mappass)) {
-   if (radix_tree_deref_retry(mappass))
-   slot = radix_tree_iter_retry();
-   } else {
-   radix_tree_delete(>socketpass_mappings,
- mappass->id);
-   pvcalls_back_release_passive(dev, fedata, mappass);
-   }
+   xa_for_each(>socketpass_mappings, mappass, index, ULONG_MAX,
+   XA_PRESENT) {
+   xa_erase(>socketpass_mappings, index);
+   pvcalls_back_release_passive(dev, fedata, mappass);
}
up(>socket_lock);
 
-- 
2.15.1

[PATCH v6 96/99] dma-debug: Convert to XArray

2018-01-17 Thread Matthew Wilcox

From: Matthew Wilcox 

This is an unusual way to use the xarray tags.  If any other users
come up, we can add an xas_get_tags() / xas_set_tags() API, but until
then I don't want to encourage this kind of abuse.

Signed-off-by: Matthew Wilcox 
---
 lib/dma-debug.c | 105 +---
 1 file changed, 46 insertions(+), 59 deletions(-)

diff --git a/lib/dma-debug.c b/lib/dma-debug.c
index fb4af570ce04..965b3837d060 100644
--- a/lib/dma-debug.c
+++ b/lib/dma-debug.c
@@ -22,7 +22,6 @@
 #include 
 #include 
 #include 
-#include 
 #include 
 #include 
 #include 
@@ -30,6 +29,7 @@
 #include 
 #include 
 #include 
+#include 
 #include 
 #include 
 #include 
@@ -465,9 +465,8 @@ EXPORT_SYMBOL(debug_dma_dump_mappings);
  * At any time debug_dma_assert_idle() can be called to trigger a
  * warning if any cachelines in the given page are in the active set.
  */
-static RADIX_TREE(dma_active_cacheline, GFP_NOWAIT);
-static DEFINE_SPINLOCK(radix_lock);
-#define ACTIVE_CACHELINE_MAX_OVERLAP ((1 << RADIX_TREE_MAX_TAGS) - 1)
+static DEFINE_XARRAY_FLAGS(dma_active_cacheline, XA_FLAGS_LOCK_IRQ);
+#define ACTIVE_CACHELINE_MAX_OVERLAP ((1 << XA_MAX_TAGS) - 1)
 #define CACHELINE_PER_PAGE_SHIFT (PAGE_SHIFT - L1_CACHE_SHIFT)
 #define CACHELINES_PER_PAGE (1 << CACHELINE_PER_PAGE_SHIFT)
 
@@ -477,37 +476,40 @@ static phys_addr_t to_cacheline_number(struct 
dma_debug_entry *entry)
(entry->offset >> L1_CACHE_SHIFT);
 }
 
-static int active_cacheline_read_overlap(phys_addr_t cln)
+static unsigned int active_cacheline_read_overlap(struct xa_state *xas)
 {
-   int overlap = 0, i;
+   unsigned int tags = 0;
+   xa_tag_t tag;
 
-   for (i = RADIX_TREE_MAX_TAGS - 1; i >= 0; i--)
-   if (radix_tree_tag_get(_active_cacheline, cln, i))
-   overlap |= 1 << i;
-   return overlap;
+   for (tag = 0; tag < XA_MAX_TAGS; tag++)
+   if (xas_get_tag(xas, tag))
+   tags |= 1U << tag;
+
+   return tags;
 }
 
-static int active_cacheline_set_overlap(phys_addr_t cln, int overlap)
+static int active_cacheline_set_overlap(struct xa_state *xas, int overlap)
 {
-   int i;
+   xa_tag_t tag;
 
if (overlap > ACTIVE_CACHELINE_MAX_OVERLAP || overlap < 0)
return overlap;
 
-   for (i = RADIX_TREE_MAX_TAGS - 1; i >= 0; i--)
-   if (overlap & 1 << i)
-   radix_tree_tag_set(_active_cacheline, cln, i);
+   for (tag = 0; tag < XA_MAX_TAGS; tag++) {
+   if (overlap & (1U << tag))
+   xas_set_tag(xas, tag);
else
-   radix_tree_tag_clear(_active_cacheline, cln, i);
+   xas_clear_tag(xas, tag);
+   }
 
return overlap;
 }
 
-static void active_cacheline_inc_overlap(phys_addr_t cln)
+static void active_cacheline_inc_overlap(struct xa_state *xas)
 {
-   int overlap = active_cacheline_read_overlap(cln);
+   int overlap = active_cacheline_read_overlap(xas);
 
-   overlap = active_cacheline_set_overlap(cln, ++overlap);
+   overlap = active_cacheline_set_overlap(xas, ++overlap);
 
/* If we overflowed the overlap counter then we're potentially
 * leaking dma-mappings.  Otherwise, if maps and unmaps are
@@ -517,21 +519,22 @@ static void active_cacheline_inc_overlap(phys_addr_t cln)
 */
WARN_ONCE(overlap > ACTIVE_CACHELINE_MAX_OVERLAP,
  "DMA-API: exceeded %d overlapping mappings of cacheline 
%pa\n",
- ACTIVE_CACHELINE_MAX_OVERLAP, );
+ ACTIVE_CACHELINE_MAX_OVERLAP, >xa_index);
 }
 
-static int active_cacheline_dec_overlap(phys_addr_t cln)
+static int active_cacheline_dec_overlap(struct xa_state *xas)
 {
-   int overlap = active_cacheline_read_overlap(cln);
+   int overlap = active_cacheline_read_overlap(xas);
 
-   return active_cacheline_set_overlap(cln, --overlap);
+   return active_cacheline_set_overlap(xas, --overlap);
 }
 
 static int active_cacheline_insert(struct dma_debug_entry *entry)
 {
phys_addr_t cln = to_cacheline_number(entry);
+   XA_STATE(xas, _active_cacheline, cln);
unsigned long flags;
-   int rc;
+   struct dma_debug_entry *exists;
 
/* If the device is not writing memory then we don't have any
 * concerns about the cpu consuming stale data.  This mitigates
@@ -540,32 +543,32 @@ static int active_cacheline_insert(struct dma_debug_entry 
*entry)
if (entry->direction == DMA_TO_DEVICE)
return 0;
 
-   spin_lock_irqsave(_lock, flags);
-   rc = radix_tree_insert(_active_cacheline, cln, entry);
-   if (rc == -EEXIST)
-   active_cacheline_inc_overlap(cln);
-   spin_unlock_irqrestore(_lock, flags);
+   xas_lock_irqsave(, flags);
+   exists = xas_create();
+   if (exists)
+

[PATCH v6 95/99] f2fs: Convert gclist.iroot to XArray

2018-01-17 Thread Matthew Wilcox

From: Matthew Wilcox 

Straightforward conversion.

Signed-off-by: Matthew Wilcox 
---
 fs/f2fs/gc.c | 14 +++---
 fs/f2fs/gc.h |  2 +-
 2 files changed, 8 insertions(+), 8 deletions(-)

diff --git a/fs/f2fs/gc.c b/fs/f2fs/gc.c
index aac1e02f75df..2b33068dc36b 100644
--- a/fs/f2fs/gc.c
+++ b/fs/f2fs/gc.c
@@ -417,7 +417,7 @@ static struct inode *find_gc_inode(struct gc_inode_list 
*gc_list, nid_t ino)
 {
struct inode_entry *ie;
 
-   ie = radix_tree_lookup(_list->iroot, ino);
+   ie = xa_load(_list->iroot, ino);
if (ie)
return ie->inode;
return NULL;
@@ -434,7 +434,7 @@ static void add_gc_inode(struct gc_inode_list *gc_list, 
struct inode *inode)
new_ie = f2fs_kmem_cache_alloc(inode_entry_slab, GFP_NOFS);
new_ie->inode = inode;
 
-   f2fs_radix_tree_insert(_list->iroot, inode->i_ino, new_ie);
+   xa_store(_list->iroot, inode->i_ino, new_ie, GFP_NOFS);
list_add_tail(_ie->list, _list->ilist);
 }
 
@@ -442,7 +442,7 @@ static void put_gc_inode(struct gc_inode_list *gc_list)
 {
struct inode_entry *ie, *next_ie;
list_for_each_entry_safe(ie, next_ie, _list->ilist, list) {
-   radix_tree_delete(_list->iroot, ie->inode->i_ino);
+   xa_erase(_list->iroot, ie->inode->i_ino);
iput(ie->inode);
list_del(>list);
kmem_cache_free(inode_entry_slab, ie);
@@ -989,10 +989,10 @@ int f2fs_gc(struct f2fs_sb_info *sbi, bool sync,
int ret = 0;
struct cp_control cpc;
unsigned int init_segno = segno;
-   struct gc_inode_list gc_list = {
-   .ilist = LIST_HEAD_INIT(gc_list.ilist),
-   .iroot = RADIX_TREE_INIT(gc_list.iroot, GFP_NOFS),
-   };
+   struct gc_inode_list gc_list;
+
+   xa_init(_list.iroot);
+   INIT_LIST_HEAD(_list.ilist);
 
trace_f2fs_gc_begin(sbi->sb, sync, background,
get_pages(sbi, F2FS_DIRTY_NODES),
diff --git a/fs/f2fs/gc.h b/fs/f2fs/gc.h
index 9325191fab2d..769259b0a4f6 100644
--- a/fs/f2fs/gc.h
+++ b/fs/f2fs/gc.h
@@ -41,7 +41,7 @@ struct f2fs_gc_kthread {
 
 struct gc_inode_list {
struct list_head ilist;
-   struct radix_tree_root iroot;
+   struct xarray iroot;
 };
 
 /*
-- 
2.15.1

[PATCH v6 96/99] dma-debug: Convert to XArray

2018-01-17 Thread Matthew Wilcox

From: Matthew Wilcox 

This is an unusual way to use the xarray tags.  If any other users
come up, we can add an xas_get_tags() / xas_set_tags() API, but until
then I don't want to encourage this kind of abuse.

Signed-off-by: Matthew Wilcox 
---
 lib/dma-debug.c | 105 +---
 1 file changed, 46 insertions(+), 59 deletions(-)

diff --git a/lib/dma-debug.c b/lib/dma-debug.c
index fb4af570ce04..965b3837d060 100644
--- a/lib/dma-debug.c
+++ b/lib/dma-debug.c
@@ -22,7 +22,6 @@
 #include 
 #include 
 #include 
-#include 
 #include 
 #include 
 #include 
@@ -30,6 +29,7 @@
 #include 
 #include 
 #include 
+#include 
 #include 
 #include 
 #include 
@@ -465,9 +465,8 @@ EXPORT_SYMBOL(debug_dma_dump_mappings);
  * At any time debug_dma_assert_idle() can be called to trigger a
  * warning if any cachelines in the given page are in the active set.
  */
-static RADIX_TREE(dma_active_cacheline, GFP_NOWAIT);
-static DEFINE_SPINLOCK(radix_lock);
-#define ACTIVE_CACHELINE_MAX_OVERLAP ((1 << RADIX_TREE_MAX_TAGS) - 1)
+static DEFINE_XARRAY_FLAGS(dma_active_cacheline, XA_FLAGS_LOCK_IRQ);
+#define ACTIVE_CACHELINE_MAX_OVERLAP ((1 << XA_MAX_TAGS) - 1)
 #define CACHELINE_PER_PAGE_SHIFT (PAGE_SHIFT - L1_CACHE_SHIFT)
 #define CACHELINES_PER_PAGE (1 << CACHELINE_PER_PAGE_SHIFT)
 
@@ -477,37 +476,40 @@ static phys_addr_t to_cacheline_number(struct 
dma_debug_entry *entry)
(entry->offset >> L1_CACHE_SHIFT);
 }
 
-static int active_cacheline_read_overlap(phys_addr_t cln)
+static unsigned int active_cacheline_read_overlap(struct xa_state *xas)
 {
-   int overlap = 0, i;
+   unsigned int tags = 0;
+   xa_tag_t tag;
 
-   for (i = RADIX_TREE_MAX_TAGS - 1; i >= 0; i--)
-   if (radix_tree_tag_get(_active_cacheline, cln, i))
-   overlap |= 1 << i;
-   return overlap;
+   for (tag = 0; tag < XA_MAX_TAGS; tag++)
+   if (xas_get_tag(xas, tag))
+   tags |= 1U << tag;
+
+   return tags;
 }
 
-static int active_cacheline_set_overlap(phys_addr_t cln, int overlap)
+static int active_cacheline_set_overlap(struct xa_state *xas, int overlap)
 {
-   int i;
+   xa_tag_t tag;
 
if (overlap > ACTIVE_CACHELINE_MAX_OVERLAP || overlap < 0)
return overlap;
 
-   for (i = RADIX_TREE_MAX_TAGS - 1; i >= 0; i--)
-   if (overlap & 1 << i)
-   radix_tree_tag_set(_active_cacheline, cln, i);
+   for (tag = 0; tag < XA_MAX_TAGS; tag++) {
+   if (overlap & (1U << tag))
+   xas_set_tag(xas, tag);
else
-   radix_tree_tag_clear(_active_cacheline, cln, i);
+   xas_clear_tag(xas, tag);
+   }
 
return overlap;
 }
 
-static void active_cacheline_inc_overlap(phys_addr_t cln)
+static void active_cacheline_inc_overlap(struct xa_state *xas)
 {
-   int overlap = active_cacheline_read_overlap(cln);
+   int overlap = active_cacheline_read_overlap(xas);
 
-   overlap = active_cacheline_set_overlap(cln, ++overlap);
+   overlap = active_cacheline_set_overlap(xas, ++overlap);
 
/* If we overflowed the overlap counter then we're potentially
 * leaking dma-mappings.  Otherwise, if maps and unmaps are
@@ -517,21 +519,22 @@ static void active_cacheline_inc_overlap(phys_addr_t cln)
 */
WARN_ONCE(overlap > ACTIVE_CACHELINE_MAX_OVERLAP,
  "DMA-API: exceeded %d overlapping mappings of cacheline 
%pa\n",
- ACTIVE_CACHELINE_MAX_OVERLAP, );
+ ACTIVE_CACHELINE_MAX_OVERLAP, >xa_index);
 }
 
-static int active_cacheline_dec_overlap(phys_addr_t cln)
+static int active_cacheline_dec_overlap(struct xa_state *xas)
 {
-   int overlap = active_cacheline_read_overlap(cln);
+   int overlap = active_cacheline_read_overlap(xas);
 
-   return active_cacheline_set_overlap(cln, --overlap);
+   return active_cacheline_set_overlap(xas, --overlap);
 }
 
 static int active_cacheline_insert(struct dma_debug_entry *entry)
 {
phys_addr_t cln = to_cacheline_number(entry);
+   XA_STATE(xas, _active_cacheline, cln);
unsigned long flags;
-   int rc;
+   struct dma_debug_entry *exists;
 
/* If the device is not writing memory then we don't have any
 * concerns about the cpu consuming stale data.  This mitigates
@@ -540,32 +543,32 @@ static int active_cacheline_insert(struct dma_debug_entry 
*entry)
if (entry->direction == DMA_TO_DEVICE)
return 0;
 
-   spin_lock_irqsave(_lock, flags);
-   rc = radix_tree_insert(_active_cacheline, cln, entry);
-   if (rc == -EEXIST)
-   active_cacheline_inc_overlap(cln);
-   spin_unlock_irqrestore(_lock, flags);
+   xas_lock_irqsave(, flags);
+   exists = xas_create();
+   if (exists)
+   active_cacheline_inc_overlap();
+   else
+

[PATCH v6 95/99] f2fs: Convert gclist.iroot to XArray

2018-01-17 Thread Matthew Wilcox

From: Matthew Wilcox 

Straightforward conversion.

Signed-off-by: Matthew Wilcox 
---
 fs/f2fs/gc.c | 14 +++---
 fs/f2fs/gc.h |  2 +-
 2 files changed, 8 insertions(+), 8 deletions(-)

diff --git a/fs/f2fs/gc.c b/fs/f2fs/gc.c
index aac1e02f75df..2b33068dc36b 100644
--- a/fs/f2fs/gc.c
+++ b/fs/f2fs/gc.c
@@ -417,7 +417,7 @@ static struct inode *find_gc_inode(struct gc_inode_list 
*gc_list, nid_t ino)
 {
struct inode_entry *ie;
 
-   ie = radix_tree_lookup(_list->iroot, ino);
+   ie = xa_load(_list->iroot, ino);
if (ie)
return ie->inode;
return NULL;
@@ -434,7 +434,7 @@ static void add_gc_inode(struct gc_inode_list *gc_list, 
struct inode *inode)
new_ie = f2fs_kmem_cache_alloc(inode_entry_slab, GFP_NOFS);
new_ie->inode = inode;
 
-   f2fs_radix_tree_insert(_list->iroot, inode->i_ino, new_ie);
+   xa_store(_list->iroot, inode->i_ino, new_ie, GFP_NOFS);
list_add_tail(_ie->list, _list->ilist);
 }
 
@@ -442,7 +442,7 @@ static void put_gc_inode(struct gc_inode_list *gc_list)
 {
struct inode_entry *ie, *next_ie;
list_for_each_entry_safe(ie, next_ie, _list->ilist, list) {
-   radix_tree_delete(_list->iroot, ie->inode->i_ino);
+   xa_erase(_list->iroot, ie->inode->i_ino);
iput(ie->inode);
list_del(>list);
kmem_cache_free(inode_entry_slab, ie);
@@ -989,10 +989,10 @@ int f2fs_gc(struct f2fs_sb_info *sbi, bool sync,
int ret = 0;
struct cp_control cpc;
unsigned int init_segno = segno;
-   struct gc_inode_list gc_list = {
-   .ilist = LIST_HEAD_INIT(gc_list.ilist),
-   .iroot = RADIX_TREE_INIT(gc_list.iroot, GFP_NOFS),
-   };
+   struct gc_inode_list gc_list;
+
+   xa_init(_list.iroot);
+   INIT_LIST_HEAD(_list.ilist);
 
trace_f2fs_gc_begin(sbi->sb, sync, background,
get_pages(sbi, F2FS_DIRTY_NODES),
diff --git a/fs/f2fs/gc.h b/fs/f2fs/gc.h
index 9325191fab2d..769259b0a4f6 100644
--- a/fs/f2fs/gc.h
+++ b/fs/f2fs/gc.h
@@ -41,7 +41,7 @@ struct f2fs_gc_kthread {
 
 struct gc_inode_list {
struct list_head ilist;
-   struct radix_tree_root iroot;
+   struct xarray iroot;
 };
 
 /*
-- 
2.15.1

[PATCH v6 80/99] blk-ioc: Convert to XArray

2018-01-17 Thread Matthew Wilcox

From: Matthew Wilcox 

Skip converting the lock to use xa_lock; I think this code can live with
the double-locking.

Signed-off-by: Matthew Wilcox 
---
 block/blk-ioc.c   | 13 +++--
 include/linux/iocontext.h |  6 +++---
 2 files changed, 10 insertions(+), 9 deletions(-)

diff --git a/block/blk-ioc.c b/block/blk-ioc.c
index f23311e4b201..baf83c8ac503 100644
--- a/block/blk-ioc.c
+++ b/block/blk-ioc.c
@@ -68,7 +68,7 @@ static void ioc_destroy_icq(struct io_cq *icq)
 
lockdep_assert_held(>lock);
 
-   radix_tree_delete(>icq_tree, icq->q->id);
+   xa_erase(>icq_array, icq->q->id);
hlist_del_init(>ioc_node);
list_del_init(>q_node);
 
@@ -278,7 +278,7 @@ int create_task_io_context(struct task_struct *task, gfp_t 
gfp_flags, int node)
atomic_set(>nr_tasks, 1);
atomic_set(>active_ref, 1);
spin_lock_init(>lock);
-   INIT_RADIX_TREE(>icq_tree, GFP_ATOMIC | __GFP_HIGH);
+   xa_init_flags(>icq_array, XA_FLAGS_LOCK_IRQ);
INIT_HLIST_HEAD(>icq_list);
INIT_WORK(>release_work, ioc_release_fn);
 
@@ -363,7 +363,7 @@ struct io_cq *ioc_lookup_icq(struct io_context *ioc, struct 
request_queue *q)
if (icq && icq->q == q)
goto out;
 
-   icq = radix_tree_lookup(>icq_tree, q->id);
+   icq = xa_load(>icq_array, q->id);
if (icq && icq->q == q)
rcu_assign_pointer(ioc->icq_hint, icq); /* allowed to race */
else
@@ -398,7 +398,7 @@ struct io_cq *ioc_create_icq(struct io_context *ioc, struct 
request_queue *q,
if (!icq)
return NULL;
 
-   if (radix_tree_maybe_preload(gfp_mask) < 0) {
+   if (xa_reserve(>icq_array, q->id, gfp_mask)) {
kmem_cache_free(et->icq_cache, icq);
return NULL;
}
@@ -412,7 +412,8 @@ struct io_cq *ioc_create_icq(struct io_context *ioc, struct 
request_queue *q,
spin_lock_irq(q->queue_lock);
spin_lock(>lock);
 
-   if (likely(!radix_tree_insert(>icq_tree, q->id, icq))) {
+   if (likely(!xa_store(>icq_array, q->id, icq,
+   GFP_ATOMIC | __GFP_HIGH))) {
hlist_add_head(>ioc_node, >icq_list);
list_add(>q_node, >icq_list);
if (et->uses_mq && et->ops.mq.init_icq)
@@ -421,6 +422,7 @@ struct io_cq *ioc_create_icq(struct io_context *ioc, struct 
request_queue *q,
et->ops.sq.elevator_init_icq_fn(icq);
} else {
kmem_cache_free(et->icq_cache, icq);
+   xa_erase(>icq_array, q->id);
icq = ioc_lookup_icq(ioc, q);
if (!icq)
printk(KERN_ERR "cfq: icq link failed!\n");
@@ -428,7 +430,6 @@ struct io_cq *ioc_create_icq(struct io_context *ioc, struct 
request_queue *q,
 
spin_unlock(>lock);
spin_unlock_irq(q->queue_lock);
-   radix_tree_preload_end();
return icq;
 }
 
diff --git a/include/linux/iocontext.h b/include/linux/iocontext.h
index dba15ca8e60b..e16224f70084 100644
--- a/include/linux/iocontext.h
+++ b/include/linux/iocontext.h
@@ -2,9 +2,9 @@
 #ifndef IOCONTEXT_H
 #define IOCONTEXT_H
 
-#include 
 #include 
 #include 
+#include 
 
 enum {
ICQ_EXITED  = 1 << 2,
@@ -56,7 +56,7 @@ enum {
  * - ioc->icq_list and icq->ioc_node are protected by ioc lock.
  *   q->icq_list and icq->q_node by q lock.
  *
- * - ioc->icq_tree and ioc->icq_hint are protected by ioc lock, while icq
+ * - ioc->icq_array and ioc->icq_hint are protected by ioc lock, while icq
  *   itself is protected by q lock.  However, both the indexes and icq
  *   itself are also RCU managed and lookup can be performed holding only
  *   the q lock.
@@ -111,7 +111,7 @@ struct io_context {
int nr_batch_requests; /* Number of requests left in the batch */
unsigned long last_waited; /* Time last woken after wait for request */
 
-   struct radix_tree_root  icq_tree;
+   struct xarray   icq_array;
struct io_cq __rcu  *icq_hint;
struct hlist_head   icq_list;
 
-- 
2.15.1

[PATCH v6 80/99] blk-ioc: Convert to XArray

2018-01-17 Thread Matthew Wilcox

From: Matthew Wilcox 

Skip converting the lock to use xa_lock; I think this code can live with
the double-locking.

Signed-off-by: Matthew Wilcox 
---
 block/blk-ioc.c   | 13 +++--
 include/linux/iocontext.h |  6 +++---
 2 files changed, 10 insertions(+), 9 deletions(-)

diff --git a/block/blk-ioc.c b/block/blk-ioc.c
index f23311e4b201..baf83c8ac503 100644
--- a/block/blk-ioc.c
+++ b/block/blk-ioc.c
@@ -68,7 +68,7 @@ static void ioc_destroy_icq(struct io_cq *icq)
 
lockdep_assert_held(>lock);
 
-   radix_tree_delete(>icq_tree, icq->q->id);
+   xa_erase(>icq_array, icq->q->id);
hlist_del_init(>ioc_node);
list_del_init(>q_node);
 
@@ -278,7 +278,7 @@ int create_task_io_context(struct task_struct *task, gfp_t 
gfp_flags, int node)
atomic_set(>nr_tasks, 1);
atomic_set(>active_ref, 1);
spin_lock_init(>lock);
-   INIT_RADIX_TREE(>icq_tree, GFP_ATOMIC | __GFP_HIGH);
+   xa_init_flags(>icq_array, XA_FLAGS_LOCK_IRQ);
INIT_HLIST_HEAD(>icq_list);
INIT_WORK(>release_work, ioc_release_fn);
 
@@ -363,7 +363,7 @@ struct io_cq *ioc_lookup_icq(struct io_context *ioc, struct 
request_queue *q)
if (icq && icq->q == q)
goto out;
 
-   icq = radix_tree_lookup(>icq_tree, q->id);
+   icq = xa_load(>icq_array, q->id);
if (icq && icq->q == q)
rcu_assign_pointer(ioc->icq_hint, icq); /* allowed to race */
else
@@ -398,7 +398,7 @@ struct io_cq *ioc_create_icq(struct io_context *ioc, struct 
request_queue *q,
if (!icq)
return NULL;
 
-   if (radix_tree_maybe_preload(gfp_mask) < 0) {
+   if (xa_reserve(>icq_array, q->id, gfp_mask)) {
kmem_cache_free(et->icq_cache, icq);
return NULL;
}
@@ -412,7 +412,8 @@ struct io_cq *ioc_create_icq(struct io_context *ioc, struct 
request_queue *q,
spin_lock_irq(q->queue_lock);
spin_lock(>lock);
 
-   if (likely(!radix_tree_insert(>icq_tree, q->id, icq))) {
+   if (likely(!xa_store(>icq_array, q->id, icq,
+   GFP_ATOMIC | __GFP_HIGH))) {
hlist_add_head(>ioc_node, >icq_list);
list_add(>q_node, >icq_list);
if (et->uses_mq && et->ops.mq.init_icq)
@@ -421,6 +422,7 @@ struct io_cq *ioc_create_icq(struct io_context *ioc, struct 
request_queue *q,
et->ops.sq.elevator_init_icq_fn(icq);
} else {
kmem_cache_free(et->icq_cache, icq);
+   xa_erase(>icq_array, q->id);
icq = ioc_lookup_icq(ioc, q);
if (!icq)
printk(KERN_ERR "cfq: icq link failed!\n");
@@ -428,7 +430,6 @@ struct io_cq *ioc_create_icq(struct io_context *ioc, struct 
request_queue *q,
 
spin_unlock(>lock);
spin_unlock_irq(q->queue_lock);
-   radix_tree_preload_end();
return icq;
 }
 
diff --git a/include/linux/iocontext.h b/include/linux/iocontext.h
index dba15ca8e60b..e16224f70084 100644
--- a/include/linux/iocontext.h
+++ b/include/linux/iocontext.h
@@ -2,9 +2,9 @@
 #ifndef IOCONTEXT_H
 #define IOCONTEXT_H
 
-#include 
 #include 
 #include 
+#include 
 
 enum {
ICQ_EXITED  = 1 << 2,
@@ -56,7 +56,7 @@ enum {
  * - ioc->icq_list and icq->ioc_node are protected by ioc lock.
  *   q->icq_list and icq->q_node by q lock.
  *
- * - ioc->icq_tree and ioc->icq_hint are protected by ioc lock, while icq
+ * - ioc->icq_array and ioc->icq_hint are protected by ioc lock, while icq
  *   itself is protected by q lock.  However, both the indexes and icq
  *   itself are also RCU managed and lookup can be performed holding only
  *   the q lock.
@@ -111,7 +111,7 @@ struct io_context {
int nr_batch_requests; /* Number of requests left in the batch */
unsigned long last_waited; /* Time last woken after wait for request */
 
-   struct radix_tree_root  icq_tree;
+   struct xarray   icq_array;
struct io_cq __rcu  *icq_hint;
struct hlist_head   icq_list;
 
-- 
2.15.1

[PATCH v6 81/99] i915: Convert handles_vma to XArray

2018-01-17 Thread Matthew Wilcox

From: Matthew Wilcox 

Straightforward conversion.

Signed-off-by: Matthew Wilcox 
---
 drivers/gpu/drm/i915/i915_gem.c   |  2 +-
 drivers/gpu/drm/i915/i915_gem_context.c   | 12 +---
 drivers/gpu/drm/i915/i915_gem_context.h   |  4 ++--
 drivers/gpu/drm/i915/i915_gem_execbuffer.c|  6 +++---
 drivers/gpu/drm/i915/selftests/mock_context.c |  2 +-
 5 files changed, 12 insertions(+), 14 deletions(-)

diff --git a/drivers/gpu/drm/i915/i915_gem.c b/drivers/gpu/drm/i915/i915_gem.c
index 25ce7bcf9988..69e944f4dfce 100644
--- a/drivers/gpu/drm/i915/i915_gem.c
+++ b/drivers/gpu/drm/i915/i915_gem.c
@@ -3351,7 +3351,7 @@ void i915_gem_close_object(struct drm_gem_object *gem, 
struct drm_file *file)
if (ctx->file_priv != fpriv)
continue;
 
-   vma = radix_tree_delete(>handles_vma, lut->handle);
+   vma = xa_erase(>handles_vma, lut->handle);
GEM_BUG_ON(vma->obj != obj);
 
/* We allow the process to have multiple handles to the same
diff --git a/drivers/gpu/drm/i915/i915_gem_context.c 
b/drivers/gpu/drm/i915/i915_gem_context.c
index f782cf2069c1..1aff35ba6e18 100644
--- a/drivers/gpu/drm/i915/i915_gem_context.c
+++ b/drivers/gpu/drm/i915/i915_gem_context.c
@@ -95,9 +95,9 @@
 
 static void lut_close(struct i915_gem_context *ctx)
 {
+   XA_STATE(xas, >handles_vma, 0);
struct i915_lut_handle *lut, *ln;
-   struct radix_tree_iter iter;
-   void __rcu **slot;
+   struct i915_vma *vma;
 
list_for_each_entry_safe(lut, ln, >handles_list, ctx_link) {
list_del(>obj_link);
@@ -105,10 +105,8 @@ static void lut_close(struct i915_gem_context *ctx)
}
 
rcu_read_lock();
-   radix_tree_for_each_slot(slot, >handles_vma, , 0) {
-   struct i915_vma *vma = rcu_dereference_raw(*slot);
-
-   radix_tree_iter_delete(>handles_vma, , slot);
+   xas_for_each(, vma, ULONG_MAX) {
+   xas_store(, NULL);
__i915_gem_object_release_unless_active(vma->obj);
}
rcu_read_unlock();
@@ -276,7 +274,7 @@ __create_hw_context(struct drm_i915_private *dev_priv,
ctx->i915 = dev_priv;
ctx->priority = I915_PRIORITY_NORMAL;
 
-   INIT_RADIX_TREE(>handles_vma, GFP_KERNEL);
+   xa_init(>handles_vma);
INIT_LIST_HEAD(>handles_list);
 
/* Default context will never have a file_priv */
diff --git a/drivers/gpu/drm/i915/i915_gem_context.h 
b/drivers/gpu/drm/i915/i915_gem_context.h
index 44688e22a5c2..8e3e0d002f77 100644
--- a/drivers/gpu/drm/i915/i915_gem_context.h
+++ b/drivers/gpu/drm/i915/i915_gem_context.h
@@ -181,11 +181,11 @@ struct i915_gem_context {
/** remap_slice: Bitmask of cache lines that need remapping */
u8 remap_slice;
 
-   /** handles_vma: rbtree to look up our context specific obj/vma for
+   /** handles_vma: lookup our context specific obj/vma for
 * the user handle. (user handles are per fd, but the binding is
 * per vm, which may be one per context or shared with the global GTT)
 */
-   struct radix_tree_root handles_vma;
+   struct xarray handles_vma;
 
/** handles_list: reverse list of all the rbtree entries in use for
 * this context, which allows us to free all the allocations on
diff --git a/drivers/gpu/drm/i915/i915_gem_execbuffer.c 
b/drivers/gpu/drm/i915/i915_gem_execbuffer.c
index 435ed95df144..828f4b5473ea 100644
--- a/drivers/gpu/drm/i915/i915_gem_execbuffer.c
+++ b/drivers/gpu/drm/i915/i915_gem_execbuffer.c
@@ -683,7 +683,7 @@ static int eb_select_context(struct i915_execbuffer *eb)
 
 static int eb_lookup_vmas(struct i915_execbuffer *eb)
 {
-   struct radix_tree_root *handles_vma = >ctx->handles_vma;
+   struct xarray *handles_vma = >ctx->handles_vma;
struct drm_i915_gem_object *obj;
unsigned int i;
int err;
@@ -702,7 +702,7 @@ static int eb_lookup_vmas(struct i915_execbuffer *eb)
struct i915_lut_handle *lut;
struct i915_vma *vma;
 
-   vma = radix_tree_lookup(handles_vma, handle);
+   vma = xa_load(handles_vma, handle);
if (likely(vma))
goto add_vma;
 
@@ -724,7 +724,7 @@ static int eb_lookup_vmas(struct i915_execbuffer *eb)
goto err_obj;
}
 
-   err = radix_tree_insert(handles_vma, handle, vma);
+   err = xa_err(xa_store(handles_vma, handle, vma, GFP_KERNEL));
if (unlikely(err)) {
kfree(lut);
goto err_obj;
diff --git a/drivers/gpu/drm/i915/selftests/mock_context.c 
b/drivers/gpu/drm/i915/selftests/mock_context.c
index bbf80d42e793..b664a7159242 100644
--- a/drivers/gpu/drm/i915/selftests/mock_context.c
+++ b/drivers/gpu/drm/i915/selftests/mock_context.c
@@ -40,7 +40,7

[PATCH v6 81/99] i915: Convert handles_vma to XArray

2018-01-17 Thread Matthew Wilcox

From: Matthew Wilcox 

Straightforward conversion.

Signed-off-by: Matthew Wilcox 
---
 drivers/gpu/drm/i915/i915_gem.c   |  2 +-
 drivers/gpu/drm/i915/i915_gem_context.c   | 12 +---
 drivers/gpu/drm/i915/i915_gem_context.h   |  4 ++--
 drivers/gpu/drm/i915/i915_gem_execbuffer.c|  6 +++---
 drivers/gpu/drm/i915/selftests/mock_context.c |  2 +-
 5 files changed, 12 insertions(+), 14 deletions(-)

diff --git a/drivers/gpu/drm/i915/i915_gem.c b/drivers/gpu/drm/i915/i915_gem.c
index 25ce7bcf9988..69e944f4dfce 100644
--- a/drivers/gpu/drm/i915/i915_gem.c
+++ b/drivers/gpu/drm/i915/i915_gem.c
@@ -3351,7 +3351,7 @@ void i915_gem_close_object(struct drm_gem_object *gem, 
struct drm_file *file)
if (ctx->file_priv != fpriv)
continue;
 
-   vma = radix_tree_delete(>handles_vma, lut->handle);
+   vma = xa_erase(>handles_vma, lut->handle);
GEM_BUG_ON(vma->obj != obj);
 
/* We allow the process to have multiple handles to the same
diff --git a/drivers/gpu/drm/i915/i915_gem_context.c 
b/drivers/gpu/drm/i915/i915_gem_context.c
index f782cf2069c1..1aff35ba6e18 100644
--- a/drivers/gpu/drm/i915/i915_gem_context.c
+++ b/drivers/gpu/drm/i915/i915_gem_context.c
@@ -95,9 +95,9 @@
 
 static void lut_close(struct i915_gem_context *ctx)
 {
+   XA_STATE(xas, >handles_vma, 0);
struct i915_lut_handle *lut, *ln;
-   struct radix_tree_iter iter;
-   void __rcu **slot;
+   struct i915_vma *vma;
 
list_for_each_entry_safe(lut, ln, >handles_list, ctx_link) {
list_del(>obj_link);
@@ -105,10 +105,8 @@ static void lut_close(struct i915_gem_context *ctx)
}
 
rcu_read_lock();
-   radix_tree_for_each_slot(slot, >handles_vma, , 0) {
-   struct i915_vma *vma = rcu_dereference_raw(*slot);
-
-   radix_tree_iter_delete(>handles_vma, , slot);
+   xas_for_each(, vma, ULONG_MAX) {
+   xas_store(, NULL);
__i915_gem_object_release_unless_active(vma->obj);
}
rcu_read_unlock();
@@ -276,7 +274,7 @@ __create_hw_context(struct drm_i915_private *dev_priv,
ctx->i915 = dev_priv;
ctx->priority = I915_PRIORITY_NORMAL;
 
-   INIT_RADIX_TREE(>handles_vma, GFP_KERNEL);
+   xa_init(>handles_vma);
INIT_LIST_HEAD(>handles_list);
 
/* Default context will never have a file_priv */
diff --git a/drivers/gpu/drm/i915/i915_gem_context.h 
b/drivers/gpu/drm/i915/i915_gem_context.h
index 44688e22a5c2..8e3e0d002f77 100644
--- a/drivers/gpu/drm/i915/i915_gem_context.h
+++ b/drivers/gpu/drm/i915/i915_gem_context.h
@@ -181,11 +181,11 @@ struct i915_gem_context {
/** remap_slice: Bitmask of cache lines that need remapping */
u8 remap_slice;
 
-   /** handles_vma: rbtree to look up our context specific obj/vma for
+   /** handles_vma: lookup our context specific obj/vma for
 * the user handle. (user handles are per fd, but the binding is
 * per vm, which may be one per context or shared with the global GTT)
 */
-   struct radix_tree_root handles_vma;
+   struct xarray handles_vma;
 
/** handles_list: reverse list of all the rbtree entries in use for
 * this context, which allows us to free all the allocations on
diff --git a/drivers/gpu/drm/i915/i915_gem_execbuffer.c 
b/drivers/gpu/drm/i915/i915_gem_execbuffer.c
index 435ed95df144..828f4b5473ea 100644
--- a/drivers/gpu/drm/i915/i915_gem_execbuffer.c
+++ b/drivers/gpu/drm/i915/i915_gem_execbuffer.c
@@ -683,7 +683,7 @@ static int eb_select_context(struct i915_execbuffer *eb)
 
 static int eb_lookup_vmas(struct i915_execbuffer *eb)
 {
-   struct radix_tree_root *handles_vma = >ctx->handles_vma;
+   struct xarray *handles_vma = >ctx->handles_vma;
struct drm_i915_gem_object *obj;
unsigned int i;
int err;
@@ -702,7 +702,7 @@ static int eb_lookup_vmas(struct i915_execbuffer *eb)
struct i915_lut_handle *lut;
struct i915_vma *vma;
 
-   vma = radix_tree_lookup(handles_vma, handle);
+   vma = xa_load(handles_vma, handle);
if (likely(vma))
goto add_vma;
 
@@ -724,7 +724,7 @@ static int eb_lookup_vmas(struct i915_execbuffer *eb)
goto err_obj;
}
 
-   err = radix_tree_insert(handles_vma, handle, vma);
+   err = xa_err(xa_store(handles_vma, handle, vma, GFP_KERNEL));
if (unlikely(err)) {
kfree(lut);
goto err_obj;
diff --git a/drivers/gpu/drm/i915/selftests/mock_context.c 
b/drivers/gpu/drm/i915/selftests/mock_context.c
index bbf80d42e793..b664a7159242 100644
--- a/drivers/gpu/drm/i915/selftests/mock_context.c
+++ b/drivers/gpu/drm/i915/selftests/mock_context.c
@@ -40,7 +40,7 @@ mock_context(struct drm_i915_private *i915,

[PATCH v6 82/99] s390: Convert gmap to XArray

2018-01-17 Thread Matthew Wilcox

From: Matthew Wilcox 

The three radix trees in gmap are all converted to the XArray.
This is another case where the multiple locks held mandates the use
of the xa_reserve() API.  The gmap_insert_rmap() function is
considerably simplified by using the advanced API;
gmap_radix_tree_free() turns out to just be xa_destroy(), and
gmap_rmap_radix_tree_free() is a nice little iteration followed
by xa_destroy().

Signed-off-by: Matthew Wilcox 
---
 arch/s390/include/asm/gmap.h |  12 ++--
 arch/s390/mm/gmap.c  | 133 +++
 2 files changed, 51 insertions(+), 94 deletions(-)

diff --git a/arch/s390/include/asm/gmap.h b/arch/s390/include/asm/gmap.h
index e07cce88dfb0..7695a01d19d7 100644
--- a/arch/s390/include/asm/gmap.h
+++ b/arch/s390/include/asm/gmap.h
@@ -14,14 +14,14 @@
  * @list: list head for the mm->context gmap list
  * @crst_list: list of all crst tables used in the guest address space
  * @mm: pointer to the parent mm_struct
- * @guest_to_host: radix tree with guest to host address translation
- * @host_to_guest: radix tree with pointer to segment table entries
+ * @guest_to_host: guest to host address translation
+ * @host_to_guest: pointers to segment table entries
  * @guest_table_lock: spinlock to protect all entries in the guest page table
  * @ref_count: reference counter for the gmap structure
  * @table: pointer to the page directory
  * @asce: address space control element for gmap page table
  * @pfault_enabled: defines if pfaults are applicable for the guest
- * @host_to_rmap: radix tree with gmap_rmap lists
+ * @host_to_rmap: gmap_rmap lists
  * @children: list of shadow gmap structures
  * @pt_list: list of all page tables used in the shadow guest address space
  * @shadow_lock: spinlock to protect the shadow gmap list
@@ -35,8 +35,8 @@ struct gmap {
struct list_head list;
struct list_head crst_list;
struct mm_struct *mm;
-   struct radix_tree_root guest_to_host;
-   struct radix_tree_root host_to_guest;
+   struct xarray guest_to_host;
+   struct xarray host_to_guest;
spinlock_t guest_table_lock;
atomic_t ref_count;
unsigned long *table;
@@ -45,7 +45,7 @@ struct gmap {
void *private;
bool pfault_enabled;
/* Additional data for shadow guest address spaces */
-   struct radix_tree_root host_to_rmap;
+   struct xarray host_to_rmap;
struct list_head children;
struct list_head pt_list;
spinlock_t shadow_lock;
diff --git a/arch/s390/mm/gmap.c b/arch/s390/mm/gmap.c
index 05d459b638f5..818a5e80914d 100644
--- a/arch/s390/mm/gmap.c
+++ b/arch/s390/mm/gmap.c
@@ -60,9 +60,9 @@ static struct gmap *gmap_alloc(unsigned long limit)
INIT_LIST_HEAD(>crst_list);
INIT_LIST_HEAD(>children);
INIT_LIST_HEAD(>pt_list);
-   INIT_RADIX_TREE(>guest_to_host, GFP_KERNEL);
-   INIT_RADIX_TREE(>host_to_guest, GFP_ATOMIC);
-   INIT_RADIX_TREE(>host_to_rmap, GFP_ATOMIC);
+   xa_init(>guest_to_host);
+   xa_init(>host_to_guest);
+   xa_init(>host_to_rmap);
spin_lock_init(>guest_table_lock);
spin_lock_init(>shadow_lock);
atomic_set(>ref_count, 1);
@@ -121,55 +121,16 @@ static void gmap_flush_tlb(struct gmap *gmap)
__tlb_flush_global();
 }
 
-static void gmap_radix_tree_free(struct radix_tree_root *root)
-{
-   struct radix_tree_iter iter;
-   unsigned long indices[16];
-   unsigned long index;
-   void __rcu **slot;
-   int i, nr;
-
-   /* A radix tree is freed by deleting all of its entries */
-   index = 0;
-   do {
-   nr = 0;
-   radix_tree_for_each_slot(slot, root, , index) {
-   indices[nr] = iter.index;
-   if (++nr == 16)
-   break;
-   }
-   for (i = 0; i < nr; i++) {
-   index = indices[i];
-   radix_tree_delete(root, index);
-   }
-   } while (nr > 0);
-}
-
-static void gmap_rmap_radix_tree_free(struct radix_tree_root *root)
+static void gmap_rmap_free(struct xarray *xa)
 {
struct gmap_rmap *rmap, *rnext, *head;
-   struct radix_tree_iter iter;
-   unsigned long indices[16];
-   unsigned long index;
-   void __rcu **slot;
-   int i, nr;
-
-   /* A radix tree is freed by deleting all of its entries */
-   index = 0;
-   do {
-   nr = 0;
-   radix_tree_for_each_slot(slot, root, , index) {
-   indices[nr] = iter.index;
-   if (++nr == 16)
-   break;
-   }
-   for (i = 0; i < nr; i++) {
-   index = indices[i];
-   head = radix_tree_delete(root, index);
-   gmap_for_each_rmap_safe(rmap, rnext, head)
-

[PATCH v6 82/99] s390: Convert gmap to XArray

2018-01-17 Thread Matthew Wilcox

From: Matthew Wilcox 

The three radix trees in gmap are all converted to the XArray.
This is another case where the multiple locks held mandates the use
of the xa_reserve() API.  The gmap_insert_rmap() function is
considerably simplified by using the advanced API;
gmap_radix_tree_free() turns out to just be xa_destroy(), and
gmap_rmap_radix_tree_free() is a nice little iteration followed
by xa_destroy().

Signed-off-by: Matthew Wilcox 
---
 arch/s390/include/asm/gmap.h |  12 ++--
 arch/s390/mm/gmap.c  | 133 +++
 2 files changed, 51 insertions(+), 94 deletions(-)

diff --git a/arch/s390/include/asm/gmap.h b/arch/s390/include/asm/gmap.h
index e07cce88dfb0..7695a01d19d7 100644
--- a/arch/s390/include/asm/gmap.h
+++ b/arch/s390/include/asm/gmap.h
@@ -14,14 +14,14 @@
  * @list: list head for the mm->context gmap list
  * @crst_list: list of all crst tables used in the guest address space
  * @mm: pointer to the parent mm_struct
- * @guest_to_host: radix tree with guest to host address translation
- * @host_to_guest: radix tree with pointer to segment table entries
+ * @guest_to_host: guest to host address translation
+ * @host_to_guest: pointers to segment table entries
  * @guest_table_lock: spinlock to protect all entries in the guest page table
  * @ref_count: reference counter for the gmap structure
  * @table: pointer to the page directory
  * @asce: address space control element for gmap page table
  * @pfault_enabled: defines if pfaults are applicable for the guest
- * @host_to_rmap: radix tree with gmap_rmap lists
+ * @host_to_rmap: gmap_rmap lists
  * @children: list of shadow gmap structures
  * @pt_list: list of all page tables used in the shadow guest address space
  * @shadow_lock: spinlock to protect the shadow gmap list
@@ -35,8 +35,8 @@ struct gmap {
struct list_head list;
struct list_head crst_list;
struct mm_struct *mm;
-   struct radix_tree_root guest_to_host;
-   struct radix_tree_root host_to_guest;
+   struct xarray guest_to_host;
+   struct xarray host_to_guest;
spinlock_t guest_table_lock;
atomic_t ref_count;
unsigned long *table;
@@ -45,7 +45,7 @@ struct gmap {
void *private;
bool pfault_enabled;
/* Additional data for shadow guest address spaces */
-   struct radix_tree_root host_to_rmap;
+   struct xarray host_to_rmap;
struct list_head children;
struct list_head pt_list;
spinlock_t shadow_lock;
diff --git a/arch/s390/mm/gmap.c b/arch/s390/mm/gmap.c
index 05d459b638f5..818a5e80914d 100644
--- a/arch/s390/mm/gmap.c
+++ b/arch/s390/mm/gmap.c
@@ -60,9 +60,9 @@ static struct gmap *gmap_alloc(unsigned long limit)
INIT_LIST_HEAD(>crst_list);
INIT_LIST_HEAD(>children);
INIT_LIST_HEAD(>pt_list);
-   INIT_RADIX_TREE(>guest_to_host, GFP_KERNEL);
-   INIT_RADIX_TREE(>host_to_guest, GFP_ATOMIC);
-   INIT_RADIX_TREE(>host_to_rmap, GFP_ATOMIC);
+   xa_init(>guest_to_host);
+   xa_init(>host_to_guest);
+   xa_init(>host_to_rmap);
spin_lock_init(>guest_table_lock);
spin_lock_init(>shadow_lock);
atomic_set(>ref_count, 1);
@@ -121,55 +121,16 @@ static void gmap_flush_tlb(struct gmap *gmap)
__tlb_flush_global();
 }
 
-static void gmap_radix_tree_free(struct radix_tree_root *root)
-{
-   struct radix_tree_iter iter;
-   unsigned long indices[16];
-   unsigned long index;
-   void __rcu **slot;
-   int i, nr;
-
-   /* A radix tree is freed by deleting all of its entries */
-   index = 0;
-   do {
-   nr = 0;
-   radix_tree_for_each_slot(slot, root, , index) {
-   indices[nr] = iter.index;
-   if (++nr == 16)
-   break;
-   }
-   for (i = 0; i < nr; i++) {
-   index = indices[i];
-   radix_tree_delete(root, index);
-   }
-   } while (nr > 0);
-}
-
-static void gmap_rmap_radix_tree_free(struct radix_tree_root *root)
+static void gmap_rmap_free(struct xarray *xa)
 {
struct gmap_rmap *rmap, *rnext, *head;
-   struct radix_tree_iter iter;
-   unsigned long indices[16];
-   unsigned long index;
-   void __rcu **slot;
-   int i, nr;
-
-   /* A radix tree is freed by deleting all of its entries */
-   index = 0;
-   do {
-   nr = 0;
-   radix_tree_for_each_slot(slot, root, , index) {
-   indices[nr] = iter.index;
-   if (++nr == 16)
-   break;
-   }
-   for (i = 0; i < nr; i++) {
-   index = indices[i];
-   head = radix_tree_delete(root, index);
-   gmap_for_each_rmap_safe(rmap, rnext, head)
-   kfree(rmap);
-

[PATCH v6 01/99] xarray: Add the xa_lock to the radix_tree_root

2018-01-17 Thread Matthew Wilcox

From: Matthew Wilcox 

This results in no change in structure size on 64-bit x86 as it fits in
the padding between the gfp_t and the void *.

Initialising the spinlock requires a name for the benefit of lockdep,
so RADIX_TREE_INIT() now needs to know the name of the radix tree it's
initialising, and so do IDR_INIT() and IDA_INIT().

Also add the xa_lock() and xa_unlock() family of wrappers to make it
easier to use the lock.  If we could rely on -fplan9-extensions in
the compiler, we could avoid all of this syntactic sugar, but that
wasn't added until gcc 4.6.

Signed-off-by: Matthew Wilcox 
---
 fs/f2fs/gc.c   |  2 +-
 include/linux/idr.h| 12 ++--
 include/linux/radix-tree.h |  7 +--
 include/linux/xarray.h | 24 
 kernel/pid.c   |  2 +-
 tools/include/linux/spinlock.h |  1 +
 6 files changed, 38 insertions(+), 10 deletions(-)
 create mode 100644 include/linux/xarray.h

diff --git a/fs/f2fs/gc.c b/fs/f2fs/gc.c
index d844dcb80570..aac1e02f75df 100644
--- a/fs/f2fs/gc.c
+++ b/fs/f2fs/gc.c
@@ -991,7 +991,7 @@ int f2fs_gc(struct f2fs_sb_info *sbi, bool sync,
unsigned int init_segno = segno;
struct gc_inode_list gc_list = {
.ilist = LIST_HEAD_INIT(gc_list.ilist),
-   .iroot = RADIX_TREE_INIT(GFP_NOFS),
+   .iroot = RADIX_TREE_INIT(gc_list.iroot, GFP_NOFS),
};
 
trace_f2fs_gc_begin(sbi->sb, sync, background,
diff --git a/include/linux/idr.h b/include/linux/idr.h
index ed1459d36b9d..11eea38b9629 100644
--- a/include/linux/idr.h
+++ b/include/linux/idr.h
@@ -32,11 +32,11 @@ struct idr {
 #define IDR_RT_MARKER  (ROOT_IS_IDR | (__force gfp_t)  \
(1 << (ROOT_TAG_SHIFT + IDR_FREE)))
 
-#define IDR_INIT   \
+#define IDR_INIT(name) \
 {  \
-   .idr_rt = RADIX_TREE_INIT(IDR_RT_MARKER)\
+   .idr_rt = RADIX_TREE_INIT(name, IDR_RT_MARKER)  \
 }
-#define DEFINE_IDR(name)   struct idr name = IDR_INIT
+#define DEFINE_IDR(name)   struct idr name = IDR_INIT(name)
 
 /**
  * idr_get_cursor - Return the current position of the cyclic allocator
@@ -195,10 +195,10 @@ struct ida {
struct radix_tree_root  ida_rt;
 };
 
-#define IDA_INIT   {   \
-   .ida_rt = RADIX_TREE_INIT(IDR_RT_MARKER | GFP_NOWAIT),  \
+#define IDA_INIT(name) {   \
+   .ida_rt = RADIX_TREE_INIT(name, IDR_RT_MARKER | GFP_NOWAIT),\
 }
-#define DEFINE_IDA(name)   struct ida name = IDA_INIT
+#define DEFINE_IDA(name)   struct ida name = IDA_INIT(name)
 
 int ida_pre_get(struct ida *ida, gfp_t gfp_mask);
 int ida_get_new_above(struct ida *ida, int starting_id, int *p_id);
diff --git a/include/linux/radix-tree.h b/include/linux/radix-tree.h
index 6c4e2e716dac..34149e8b5f73 100644
--- a/include/linux/radix-tree.h
+++ b/include/linux/radix-tree.h
@@ -110,20 +110,23 @@ struct radix_tree_node {
 #define ROOT_TAG_SHIFT (__GFP_BITS_SHIFT)
 
 struct radix_tree_root {
+   spinlock_t  xa_lock;
gfp_t   gfp_mask;
struct radix_tree_node  __rcu *rnode;
 };
 
-#define RADIX_TREE_INIT(mask)  {   \
+#define RADIX_TREE_INIT(name, mask){   \
+   .xa_lock = __SPIN_LOCK_UNLOCKED(name.xa_lock),  \
.gfp_mask = (mask), \
.rnode = NULL,  \
 }
 
 #define RADIX_TREE(name, mask) \
-   struct radix_tree_root name = RADIX_TREE_INIT(mask)
+   struct radix_tree_root name = RADIX_TREE_INIT(name, mask)
 
 #define INIT_RADIX_TREE(root, mask)\
 do {   \
+   spin_lock_init(&(root)->xa_lock);   \
(root)->gfp_mask = (mask);  \
(root)->rnode = NULL;   \
 } while (0)
diff --git a/include/linux/xarray.h b/include/linux/xarray.h
new file mode 100644
index ..2dfc8006fe64
--- /dev/null
+++ b/include/linux/xarray.h
@@ -0,0 +1,24 @@
+/* SPDX-License-Identifier: GPL-2.0+ */
+#ifndef _LINUX_XARRAY_H
+#define _LINUX_XARRAY_H
+/*
+ * eXtensible Arrays
+ * Copyright (c) 2017 Microsoft Corporation
+ * Author: Matthew Wilcox 
+ */
+
+#include 
+
+#define xa_trylock(xa) spin_trylock(&(xa)->xa_lock)
+#define xa_lock(xa)spin_lock(&(xa)->xa_lock)
+#define xa_unlock(xa)

[PATCH v6 01/99] xarray: Add the xa_lock to the radix_tree_root

2018-01-17 Thread Matthew Wilcox

From: Matthew Wilcox 

This results in no change in structure size on 64-bit x86 as it fits in
the padding between the gfp_t and the void *.

Initialising the spinlock requires a name for the benefit of lockdep,
so RADIX_TREE_INIT() now needs to know the name of the radix tree it's
initialising, and so do IDR_INIT() and IDA_INIT().

Also add the xa_lock() and xa_unlock() family of wrappers to make it
easier to use the lock.  If we could rely on -fplan9-extensions in
the compiler, we could avoid all of this syntactic sugar, but that
wasn't added until gcc 4.6.

Signed-off-by: Matthew Wilcox 
---
 fs/f2fs/gc.c   |  2 +-
 include/linux/idr.h| 12 ++--
 include/linux/radix-tree.h |  7 +--
 include/linux/xarray.h | 24 
 kernel/pid.c   |  2 +-
 tools/include/linux/spinlock.h |  1 +
 6 files changed, 38 insertions(+), 10 deletions(-)
 create mode 100644 include/linux/xarray.h

diff --git a/fs/f2fs/gc.c b/fs/f2fs/gc.c
index d844dcb80570..aac1e02f75df 100644
--- a/fs/f2fs/gc.c
+++ b/fs/f2fs/gc.c
@@ -991,7 +991,7 @@ int f2fs_gc(struct f2fs_sb_info *sbi, bool sync,
unsigned int init_segno = segno;
struct gc_inode_list gc_list = {
.ilist = LIST_HEAD_INIT(gc_list.ilist),
-   .iroot = RADIX_TREE_INIT(GFP_NOFS),
+   .iroot = RADIX_TREE_INIT(gc_list.iroot, GFP_NOFS),
};
 
trace_f2fs_gc_begin(sbi->sb, sync, background,
diff --git a/include/linux/idr.h b/include/linux/idr.h
index ed1459d36b9d..11eea38b9629 100644
--- a/include/linux/idr.h
+++ b/include/linux/idr.h
@@ -32,11 +32,11 @@ struct idr {
 #define IDR_RT_MARKER  (ROOT_IS_IDR | (__force gfp_t)  \
(1 << (ROOT_TAG_SHIFT + IDR_FREE)))
 
-#define IDR_INIT   \
+#define IDR_INIT(name) \
 {  \
-   .idr_rt = RADIX_TREE_INIT(IDR_RT_MARKER)\
+   .idr_rt = RADIX_TREE_INIT(name, IDR_RT_MARKER)  \
 }
-#define DEFINE_IDR(name)   struct idr name = IDR_INIT
+#define DEFINE_IDR(name)   struct idr name = IDR_INIT(name)
 
 /**
  * idr_get_cursor - Return the current position of the cyclic allocator
@@ -195,10 +195,10 @@ struct ida {
struct radix_tree_root  ida_rt;
 };
 
-#define IDA_INIT   {   \
-   .ida_rt = RADIX_TREE_INIT(IDR_RT_MARKER | GFP_NOWAIT),  \
+#define IDA_INIT(name) {   \
+   .ida_rt = RADIX_TREE_INIT(name, IDR_RT_MARKER | GFP_NOWAIT),\
 }
-#define DEFINE_IDA(name)   struct ida name = IDA_INIT
+#define DEFINE_IDA(name)   struct ida name = IDA_INIT(name)
 
 int ida_pre_get(struct ida *ida, gfp_t gfp_mask);
 int ida_get_new_above(struct ida *ida, int starting_id, int *p_id);
diff --git a/include/linux/radix-tree.h b/include/linux/radix-tree.h
index 6c4e2e716dac..34149e8b5f73 100644
--- a/include/linux/radix-tree.h
+++ b/include/linux/radix-tree.h
@@ -110,20 +110,23 @@ struct radix_tree_node {
 #define ROOT_TAG_SHIFT (__GFP_BITS_SHIFT)
 
 struct radix_tree_root {
+   spinlock_t  xa_lock;
gfp_t   gfp_mask;
struct radix_tree_node  __rcu *rnode;
 };
 
-#define RADIX_TREE_INIT(mask)  {   \
+#define RADIX_TREE_INIT(name, mask){   \
+   .xa_lock = __SPIN_LOCK_UNLOCKED(name.xa_lock),  \
.gfp_mask = (mask), \
.rnode = NULL,  \
 }
 
 #define RADIX_TREE(name, mask) \
-   struct radix_tree_root name = RADIX_TREE_INIT(mask)
+   struct radix_tree_root name = RADIX_TREE_INIT(name, mask)
 
 #define INIT_RADIX_TREE(root, mask)\
 do {   \
+   spin_lock_init(&(root)->xa_lock);   \
(root)->gfp_mask = (mask);  \
(root)->rnode = NULL;   \
 } while (0)
diff --git a/include/linux/xarray.h b/include/linux/xarray.h
new file mode 100644
index ..2dfc8006fe64
--- /dev/null
+++ b/include/linux/xarray.h
@@ -0,0 +1,24 @@
+/* SPDX-License-Identifier: GPL-2.0+ */
+#ifndef _LINUX_XARRAY_H
+#define _LINUX_XARRAY_H
+/*
+ * eXtensible Arrays
+ * Copyright (c) 2017 Microsoft Corporation
+ * Author: Matthew Wilcox 
+ */
+
+#include 
+
+#define xa_trylock(xa) spin_trylock(&(xa)->xa_lock)
+#define xa_lock(xa)spin_lock(&(xa)->xa_lock)
+#define xa_unlock(xa)  spin_unlock(&(xa)->xa_lock)
+#define xa_lock_bh(xa) spin_lock_bh(&(xa)->xa_lock)

[PATCH v2 1/4] dt-bindings: firmware: Add bindings for ZynqMP firmware

2018-01-17 Thread Jolly Shah

Add documentation to describe Xilinx ZynqMP firmware driver
bindings. Firmware driver provides an interface to firmware
APIs. Interface APIs can be used by any driver to communicate
to PMUFW (Platform Management Unit).

Signed-off-by: Jolly Shah 
Signed-off-by: Rajan Vaja 
---
 .../bindings/firmware/xilinx/xlnx,zynqmp-firmware.txt| 16 
 1 file changed, 16 insertions(+)
 create mode 100644 
Documentation/devicetree/bindings/firmware/xilinx/xlnx,zynqmp-firmware.txt

diff --git 
a/Documentation/devicetree/bindings/firmware/xilinx/xlnx,zynqmp-firmware.txt 
b/Documentation/devicetree/bindings/firmware/xilinx/xlnx,zynqmp-firmware.txt
new file mode 100644
index 000..a7aaf56
--- /dev/null
+++ b/Documentation/devicetree/bindings/firmware/xilinx/xlnx,zynqmp-firmware.txt
@@ -0,0 +1,16 @@
+Xilinx Zynq MPSoC Firmware Device Tree Bindings
+
+The zynqmp-firmware node describes the interface to platform firmware.
+
+Required properties:
+ - compatible: Must contain:  "xlnx,zynqmp-firmware"
+ - method: The method of calling the PM-API firmware layer.
+   Permitted values are:
+- "smc" : To be used in configurations without a hypervisor
+- "hvc" : To be used when hypervisor is present
+
+Examples:
+   zynqmp_firmware: zynqmp-firmware {
+   compatible = "xlnx,zynqmp-firmware";
+   method = "smc";
+   };
--
2.7.4

This email and any attachments are intended for the sole use of the named 
recipient(s) and contain(s) confidential information that may be proprietary, 
privileged or copyrighted under applicable law. If you are not the intended 
recipient, do not read, copy, or forward this email message or any attachments. 
Delete this email message and any attachments immediately.

[PATCH v2 1/4] dt-bindings: firmware: Add bindings for ZynqMP firmware

2018-01-17 Thread Jolly Shah

Add documentation to describe Xilinx ZynqMP firmware driver
bindings. Firmware driver provides an interface to firmware
APIs. Interface APIs can be used by any driver to communicate
to PMUFW (Platform Management Unit).

Signed-off-by: Jolly Shah 
Signed-off-by: Rajan Vaja 
---
 .../bindings/firmware/xilinx/xlnx,zynqmp-firmware.txt| 16 
 1 file changed, 16 insertions(+)
 create mode 100644 
Documentation/devicetree/bindings/firmware/xilinx/xlnx,zynqmp-firmware.txt

diff --git 
a/Documentation/devicetree/bindings/firmware/xilinx/xlnx,zynqmp-firmware.txt 
b/Documentation/devicetree/bindings/firmware/xilinx/xlnx,zynqmp-firmware.txt
new file mode 100644
index 000..a7aaf56
--- /dev/null
+++ b/Documentation/devicetree/bindings/firmware/xilinx/xlnx,zynqmp-firmware.txt
@@ -0,0 +1,16 @@
+Xilinx Zynq MPSoC Firmware Device Tree Bindings
+
+The zynqmp-firmware node describes the interface to platform firmware.
+
+Required properties:
+ - compatible: Must contain:  "xlnx,zynqmp-firmware"
+ - method: The method of calling the PM-API firmware layer.
+   Permitted values are:
+- "smc" : To be used in configurations without a hypervisor
+- "hvc" : To be used when hypervisor is present
+
+Examples:
+   zynqmp_firmware: zynqmp-firmware {
+   compatible = "xlnx,zynqmp-firmware";
+   method = "smc";
+   };
--
2.7.4

This email and any attachments are intended for the sole use of the named 
recipient(s) and contain(s) confidential information that may be proprietary, 
privileged or copyrighted under applicable law. If you are not the intended 
recipient, do not read, copy, or forward this email message or any attachments. 
Delete this email message and any attachments immediately.

< 3 4 5 6 7 8 9 10 11 12 >

701 - 800 of 2636 matches

Mail list logo