[PATCH v2 4/5] mm: use indirect zbud handle and radix tree

2013-09-11 Thread Krzysztof Kozlowski
Add radix tree to zbud pool and use indirect zbud handle as radix tree
index.

This allows migration of zbud pages while the handle used by zswap
remains untouched. Previously zbud handles were virtual addresses. This
imposed problem when page was migrated.

This change also exposes and fixes race condition between:
 - zbud_reclaim_page() (called from zswap_frontswap_store())
and
 - zbud_free() (called from zswap_frontswap_invalidate_page()).
This race was present already but additional locking and in-direct use
handle makes it frequent during high memory pressure.

Race typically looks like:
 - thread 1: zbud_reclaim_page()
   - thread 1: zswap_writeback_entry()
 - zbud_map()
 - thread 0: zswap_frontswap_invalidate_page()
   - zbud_free()
 - thread 1: read zswap_entry from memory or call zbud_unmap(), now on
   invalid memory address

The zbud_reclaim_page() calls evict handler (zswap_writeback_entry())
without holding pool lock. The zswap_writeback_entry() reads
zswap_header from memory obtained from zbud_map() without holding
tree's lock. If invalidate happens during this time the zbud_free()
will remove handle from the tree.

The new map_count fields in zbud_header try to address this problem by
protecting handles from freeing.
Also the call to zbud_unmap() in zswap_writeback_entry() was moved
further - when the tree's lock could be obtained.

Signed-off-by: Krzysztof Kozlowski 
---
 include/linux/zbud.h |2 +-
 mm/zbud.c|  313 +-
 mm/zswap.c   |   24 +++-
 3 files changed, 280 insertions(+), 59 deletions(-)

diff --git a/include/linux/zbud.h b/include/linux/zbud.h
index 2571a5c..12d72df 100644
--- a/include/linux/zbud.h
+++ b/include/linux/zbud.h
@@ -16,7 +16,7 @@ int zbud_alloc(struct zbud_pool *pool, int size, gfp_t gfp,
 void zbud_free(struct zbud_pool *pool, unsigned long handle);
 int zbud_reclaim_page(struct zbud_pool *pool, unsigned int retries);
 void *zbud_map(struct zbud_pool *pool, unsigned long handle);
-void zbud_unmap(struct zbud_pool *pool, unsigned long handle);
+int zbud_unmap(struct zbud_pool *pool, unsigned long handle);
 u64 zbud_get_pool_size(struct zbud_pool *pool);
 
 #endif /* _ZBUD_H_ */
diff --git a/mm/zbud.c b/mm/zbud.c
index 9be160c..795d56a 100644
--- a/mm/zbud.c
+++ b/mm/zbud.c
@@ -50,6 +50,7 @@
 #include 
 #include 
 #include 
+#include 
 #include 
 
 /*
@@ -69,6 +70,9 @@
 #define NCHUNKS(PAGE_SIZE >> CHUNK_SHIFT)
 #define ZHDR_SIZE_ALIGNED CHUNK_SIZE
 
+/* Empty handle, not yet allocated */
+#define HANDLE_EMPTY   0
+
 /**
  * struct zbud_pool - stores metadata for each zbud pool
  * @lock:  protects all pool fields and first|last_chunk fields of any
@@ -83,6 +87,10 @@
  * @pages_nr:  number of zbud pages in the pool.
  * @ops:   pointer to a structure of user defined operations specified at
  * pool creation time.
+ * @page_tree: mapping handle->zbud_header for zbud_map and migration;
+ * many pools may exist so do not use the mapping->page_tree
+ * @last_handle: last handle calculated; used as starting point when searching
+ * for next handle in page_tree in zbud_alloc().
  *
  * This structure is allocated at pool creation time and maintains metadata
  * pertaining to a particular zbud pool.
@@ -94,6 +102,8 @@ struct zbud_pool {
struct list_head lru;
u64 pages_nr;
struct zbud_ops *ops;
+   struct radix_tree_root page_tree;
+   unsigned long last_handle;
 };
 
 /*
@@ -103,12 +113,23 @@ struct zbud_pool {
  * @lru:   links the zbud page into the lru list in the pool
  * @first_chunks:  the size of the first buddy in chunks, 0 if free
  * @last_chunks:   the size of the last buddy in chunks, 0 if free
+ * @first_handle:  handle to page stored in first buddy
+ * @last_handle:   handle to page stored in last buddy
+ * @first_map_count:   mapped count of page stored in first buddy
+ * @last_map_count:mapped count of page stored in last buddy
+ *
+ * When map count reaches zero the corresponding handle is removed
+ * from radix tree and cannot be used any longer.
  */
 struct zbud_header {
struct list_head buddy;
struct list_head lru;
+   unsigned long first_handle;
+   unsigned long last_handle;
unsigned int first_chunks;
unsigned int last_chunks;
+   short int first_map_count;
+   short int last_map_count;
 };
 
 /*
@@ -135,38 +156,34 @@ static struct zbud_header *init_zbud_page(struct page 
*page)
struct zbud_header *zhdr = page_address(page);
zhdr->first_chunks = 0;
zhdr->last_chunks = 0;
+   zhdr->first_handle = HANDLE_EMPTY;
+   zhdr->last_handle = HANDLE_EMPTY;
+   zhdr->first_map_count = 0;
+   zhdr->last_map_count = 0;
INIT_LIST_HEAD(>buddy);
INIT_LIST_HEAD(>lru);
return zhdr;
 }
 
 /*
- * Encodes the handle of a particular buddy 

[PATCH v2 4/5] mm: use indirect zbud handle and radix tree

2013-09-11 Thread Krzysztof Kozlowski
Add radix tree to zbud pool and use indirect zbud handle as radix tree
index.

This allows migration of zbud pages while the handle used by zswap
remains untouched. Previously zbud handles were virtual addresses. This
imposed problem when page was migrated.

This change also exposes and fixes race condition between:
 - zbud_reclaim_page() (called from zswap_frontswap_store())
and
 - zbud_free() (called from zswap_frontswap_invalidate_page()).
This race was present already but additional locking and in-direct use
handle makes it frequent during high memory pressure.

Race typically looks like:
 - thread 1: zbud_reclaim_page()
   - thread 1: zswap_writeback_entry()
 - zbud_map()
 - thread 0: zswap_frontswap_invalidate_page()
   - zbud_free()
 - thread 1: read zswap_entry from memory or call zbud_unmap(), now on
   invalid memory address

The zbud_reclaim_page() calls evict handler (zswap_writeback_entry())
without holding pool lock. The zswap_writeback_entry() reads
zswap_header from memory obtained from zbud_map() without holding
tree's lock. If invalidate happens during this time the zbud_free()
will remove handle from the tree.

The new map_count fields in zbud_header try to address this problem by
protecting handles from freeing.
Also the call to zbud_unmap() in zswap_writeback_entry() was moved
further - when the tree's lock could be obtained.

Signed-off-by: Krzysztof Kozlowski k.kozlow...@samsung.com
---
 include/linux/zbud.h |2 +-
 mm/zbud.c|  313 +-
 mm/zswap.c   |   24 +++-
 3 files changed, 280 insertions(+), 59 deletions(-)

diff --git a/include/linux/zbud.h b/include/linux/zbud.h
index 2571a5c..12d72df 100644
--- a/include/linux/zbud.h
+++ b/include/linux/zbud.h
@@ -16,7 +16,7 @@ int zbud_alloc(struct zbud_pool *pool, int size, gfp_t gfp,
 void zbud_free(struct zbud_pool *pool, unsigned long handle);
 int zbud_reclaim_page(struct zbud_pool *pool, unsigned int retries);
 void *zbud_map(struct zbud_pool *pool, unsigned long handle);
-void zbud_unmap(struct zbud_pool *pool, unsigned long handle);
+int zbud_unmap(struct zbud_pool *pool, unsigned long handle);
 u64 zbud_get_pool_size(struct zbud_pool *pool);
 
 #endif /* _ZBUD_H_ */
diff --git a/mm/zbud.c b/mm/zbud.c
index 9be160c..795d56a 100644
--- a/mm/zbud.c
+++ b/mm/zbud.c
@@ -50,6 +50,7 @@
 #include linux/preempt.h
 #include linux/slab.h
 #include linux/spinlock.h
+#include linux/radix-tree.h
 #include linux/zbud.h
 
 /*
@@ -69,6 +70,9 @@
 #define NCHUNKS(PAGE_SIZE  CHUNK_SHIFT)
 #define ZHDR_SIZE_ALIGNED CHUNK_SIZE
 
+/* Empty handle, not yet allocated */
+#define HANDLE_EMPTY   0
+
 /**
  * struct zbud_pool - stores metadata for each zbud pool
  * @lock:  protects all pool fields and first|last_chunk fields of any
@@ -83,6 +87,10 @@
  * @pages_nr:  number of zbud pages in the pool.
  * @ops:   pointer to a structure of user defined operations specified at
  * pool creation time.
+ * @page_tree: mapping handle-zbud_header for zbud_map and migration;
+ * many pools may exist so do not use the mapping-page_tree
+ * @last_handle: last handle calculated; used as starting point when searching
+ * for next handle in page_tree in zbud_alloc().
  *
  * This structure is allocated at pool creation time and maintains metadata
  * pertaining to a particular zbud pool.
@@ -94,6 +102,8 @@ struct zbud_pool {
struct list_head lru;
u64 pages_nr;
struct zbud_ops *ops;
+   struct radix_tree_root page_tree;
+   unsigned long last_handle;
 };
 
 /*
@@ -103,12 +113,23 @@ struct zbud_pool {
  * @lru:   links the zbud page into the lru list in the pool
  * @first_chunks:  the size of the first buddy in chunks, 0 if free
  * @last_chunks:   the size of the last buddy in chunks, 0 if free
+ * @first_handle:  handle to page stored in first buddy
+ * @last_handle:   handle to page stored in last buddy
+ * @first_map_count:   mapped count of page stored in first buddy
+ * @last_map_count:mapped count of page stored in last buddy
+ *
+ * When map count reaches zero the corresponding handle is removed
+ * from radix tree and cannot be used any longer.
  */
 struct zbud_header {
struct list_head buddy;
struct list_head lru;
+   unsigned long first_handle;
+   unsigned long last_handle;
unsigned int first_chunks;
unsigned int last_chunks;
+   short int first_map_count;
+   short int last_map_count;
 };
 
 /*
@@ -135,38 +156,34 @@ static struct zbud_header *init_zbud_page(struct page 
*page)
struct zbud_header *zhdr = page_address(page);
zhdr-first_chunks = 0;
zhdr-last_chunks = 0;
+   zhdr-first_handle = HANDLE_EMPTY;
+   zhdr-last_handle = HANDLE_EMPTY;
+   zhdr-first_map_count = 0;
+   zhdr-last_map_count = 0;
INIT_LIST_HEAD(zhdr-buddy);