On Wed, Jul 18, 2012 at 11:55:56AM -0500, Seth Jennings wrote:
> This patchset provides page mapping via the page table.
> On some archs, most notably ARM, this method has been
> demonstrated to be faster than copying.
> 
> The logic controlling the method selection (copy vs page table)
> is controlled by the definition of USE_PGTABLE_MAPPING which
> is/can be defined for any arch that performs better with page
> table mapping.
> 
> Signed-off-by: Seth Jennings <sjenn...@linux.vnet.ibm.com>
> ---
>  drivers/staging/zsmalloc/zsmalloc-main.c |  182 
> ++++++++++++++++++++++--------
>  drivers/staging/zsmalloc/zsmalloc_int.h  |    6 -
>  2 files changed, 134 insertions(+), 54 deletions(-)
> 
> diff --git a/drivers/staging/zsmalloc/zsmalloc-main.c 
> b/drivers/staging/zsmalloc/zsmalloc-main.c
> index b86133f..defe350 100644
> --- a/drivers/staging/zsmalloc/zsmalloc-main.c
> +++ b/drivers/staging/zsmalloc/zsmalloc-main.c
> @@ -89,6 +89,30 @@
>  #define CLASS_IDX_MASK       ((1 << CLASS_IDX_BITS) - 1)
>  #define FULLNESS_MASK        ((1 << FULLNESS_BITS) - 1)
>  
> +/*
> + * By default, zsmalloc uses a copy-based object mapping method to access
> + * allocations that span two pages. However, if a particular architecture
> + * 1) Implements local_flush_tlb_kernel_range() and 2) Performs VM mapping
> + * faster than copying, then it should be added here so that

How about adding your benchmark url?

> + * USE_PGTABLE_MAPPING is defined. This causes zsmalloc to use page table
> + * mapping rather than copying
> + * for object mapping.

unnecessary new line.

> +*/
> +#if defined(CONFIG_ARM)
> +#define USE_PGTABLE_MAPPING
> +#endif

I had no better idea and I would like to add zsmalloc into mainline.
So no objection.
Nitin?

> +
> +struct mapping_area {
> +#ifdef USE_PGTABLE_MAPPING
> +     struct vm_struct *vm; /* vm area for mapping object that span pages */
> +#else
> +     char *vm_buf; /* copy buffer for objects that span pages */
> +#endif
> +     char *vm_addr; /* address of kmap_atomic()'ed pages */
> +     enum zs_mapmode vm_mm; /* mapping mode */
> +};
> +
> +
>  /* per-cpu VM mapping areas for zspage accesses that cross page boundaries */
>  static DEFINE_PER_CPU(struct mapping_area, zs_map_area);
>  
> @@ -471,16 +495,83 @@ static struct page *find_get_zspage(struct size_class 
> *class)
>       return page;
>  }
>  
> -static void zs_copy_map_object(char *buf, struct page *page,
> -                             int off, int size)
> +#ifdef USE_PGTABLE_MAPPING
> +static inline int __zs_cpu_up(struct mapping_area *area)
> +{
> +     /*
> +      * Make sure we don't leak memory if a cpu UP notification
> +      * and zs_init() race and both call zs_cpu_up() on the same cpu
> +      */
> +     if (area->vm)
> +             return 0;
> +     area->vm = alloc_vm_area(PAGE_SIZE * 2, NULL);
> +     if (!area->vm)
> +             return -ENOMEM;
> +     return 0;
> +}
> +
> +static inline void __zs_cpu_down(struct mapping_area *area)
> +{
> +     if (area->vm)
> +             free_vm_area(area->vm);
> +     area->vm = NULL;
> +}
> +
> +static inline void *__zs_map_object(struct mapping_area *area,
> +                             struct page *pages[2], int off, int size)
> +{
> +     BUG_ON(map_vm_area(area->vm, PAGE_KERNEL, &pages));
> +     area->vm_addr = area->vm->addr;
> +     return area->vm_addr + off;
> +}
> +
> +static inline void __zs_unmap_object(struct mapping_area *area,
> +                             struct page *pages[2], int off, int size)
> +{
> +     unsigned long addr = (unsigned long)area->vm_addr;
> +     unsigned long end = addr + (PAGE_SIZE * 2);
> +
> +     flush_cache_vunmap(addr, end);
> +     unmap_kernel_range_noflush(addr, PAGE_SIZE * 2);
> +     local_flush_tlb_kernel_range(addr, end);
> +}
> +
> +#else /* USE_PGTABLE_MAPPING */
> +
> +static inline int __zs_cpu_up(struct mapping_area *area)
> +{
> +     /*
> +      * Make sure we don't leak memory if a cpu UP notification
> +      * and zs_init() race and both call zs_cpu_up() on the same cpu
> +      */
> +     if (area->vm_buf)
> +             return 0;
> +     area->vm_buf = (char *)__get_free_page(GFP_KERNEL);
> +     if (!area->vm_buf)
> +             return -ENOMEM;
> +     return 0;
> +}
> +
> +static inline void __zs_cpu_down(struct mapping_area *area)
> +{
> +     if (area->vm_buf)
> +             free_page((unsigned long)area->vm_buf);
> +     area->vm_buf = NULL;
> +}
> +
> +static void *__zs_map_object(struct mapping_area *area,
> +                     struct page *pages[2], int off, int size)
>  {
> -     struct page *pages[2];
>       int sizes[2];
>       void *addr;
> +     char *buf = area->vm_buf;
>  
> -     pages[0] = page;
> -     pages[1] = get_next_page(page);
> -     BUG_ON(!pages[1]);
> +     /* disable page faults to match kmap_atomic() return conditions */
> +     pagefault_disable();
> +
> +     /* no read fastpath */
> +     if (area->vm_mm == ZS_MM_WO)
> +             goto out;
>  
>       sizes[0] = PAGE_SIZE - off;
>       sizes[1] = size - sizes[0];
> @@ -492,18 +583,20 @@ static void zs_copy_map_object(char *buf, struct page 
> *page,
>       addr = kmap_atomic(pages[1]);
>       memcpy(buf + sizes[0], addr, sizes[1]);
>       kunmap_atomic(addr);
> +out:
> +     return area->vm_buf;
>  }
>  
> -static void zs_copy_unmap_object(char *buf, struct page *page,
> -                             int off, int size)
> +static void __zs_unmap_object(struct mapping_area *area,
> +                     struct page *pages[2], int off, int size)
>  {
> -     struct page *pages[2];
>       int sizes[2];
>       void *addr;
> +     char *buf = area->vm_buf;
>  
> -     pages[0] = page;
> -     pages[1] = get_next_page(page);
> -     BUG_ON(!pages[1]);
> +     /* no write fastpath */
> +     if (area->vm_mm == ZS_MM_RO)
> +             goto out;
>  
>       sizes[0] = PAGE_SIZE - off;
>       sizes[1] = size - sizes[0];
> @@ -515,34 +608,31 @@ static void zs_copy_unmap_object(char *buf, struct page 
> *page,
>       addr = kmap_atomic(pages[1]);
>       memcpy(addr, buf + sizes[0], sizes[1]);
>       kunmap_atomic(addr);
> +
> +out:
> +     /* enable page faults to match kunmap_atomic() return conditions */
> +     pagefault_enable();
>  }
>  
> +#endif /* USE_PGTABLE_MAPPING */
> +
>  static int zs_cpu_notifier(struct notifier_block *nb, unsigned long action,
>                               void *pcpu)
>  {
> -     int cpu = (long)pcpu;
> +     int ret, cpu = (long)pcpu;
>       struct mapping_area *area;
>  
>       switch (action) {
>       case CPU_UP_PREPARE:
>               area = &per_cpu(zs_map_area, cpu);
> -             /*
> -              * Make sure we don't leak memory if a cpu UP notification
> -              * and zs_init() race and both call zs_cpu_up() on the same cpu
> -              */
> -             if (area->vm_buf)
> -                     return 0;
> -             area->vm_buf = (char *)__get_free_page(GFP_KERNEL);
> -             if (!area->vm_buf)
> -                     return -ENOMEM;
> -             return 0;
> +             ret = __zs_cpu_up(area);
> +             if (ret)
> +                     return notifier_from_errno(ret);
>               break;
>       case CPU_DEAD:
>       case CPU_UP_CANCELED:
>               area = &per_cpu(zs_map_area, cpu);
> -             if (area->vm_buf)
> -                     free_page((unsigned long)area->vm_buf);
> -             area->vm_buf = NULL;
> +             __zs_cpu_down(area);
>               break;
>       }
>  
> @@ -759,6 +849,7 @@ void *zs_map_object(struct zs_pool *pool, unsigned long 
> handle,
>       enum fullness_group fg;
>       struct size_class *class;
>       struct mapping_area *area;
> +     struct page *pages[2];
>  
>       BUG_ON(!handle);
>  
> @@ -775,19 +866,19 @@ void *zs_map_object(struct zs_pool *pool, unsigned long 
> handle,
>       off = obj_idx_to_offset(page, obj_idx, class->size);
>  
>       area = &get_cpu_var(zs_map_area);
> +     area->vm_mm = mm;
>       if (off + class->size <= PAGE_SIZE) {
>               /* this object is contained entirely within a page */
>               area->vm_addr = kmap_atomic(page);
>               return area->vm_addr + off;
>       }
>  
> -     /* disable page faults to match kmap_atomic() return conditions */
> -     pagefault_disable();
> +     /* this object spans two pages */
> +     pages[0] = page;
> +     pages[1] = get_next_page(page);
> +     BUG_ON(!pages[1]);
>  
> -     if (mm != ZS_MM_WO)
> -             zs_copy_map_object(area->vm_buf, page, off, class->size);
> -     area->vm_addr = NULL;
> -     return area->vm_buf;
> +     return __zs_map_object(area, pages, off, class->size);
>  }
>  EXPORT_SYMBOL_GPL(zs_map_object);
>  
> @@ -801,17 +892,6 @@ void zs_unmap_object(struct zs_pool *pool, unsigned long 
> handle)
>       struct size_class *class;
>       struct mapping_area *area;
>  
> -     area = &__get_cpu_var(zs_map_area);
> -     /* single-page object fastpath */
> -     if (area->vm_addr) {
> -             kunmap_atomic(area->vm_addr);
> -             goto out;
> -     }
> -
> -     /* no write fastpath */
> -     if (area->vm_mm == ZS_MM_RO)
> -             goto pfenable;
> -
>       BUG_ON(!handle);
>  
>       obj_handle_to_location(handle, &page, &obj_idx);
> @@ -819,12 +899,18 @@ void zs_unmap_object(struct zs_pool *pool, unsigned 
> long handle)
>       class = &pool->size_class[class_idx];
>       off = obj_idx_to_offset(page, obj_idx, class->size);
>  
> -     zs_copy_unmap_object(area->vm_buf, page, off, class->size);
> +     area = &__get_cpu_var(zs_map_area);
> +     if (off + class->size <= PAGE_SIZE)
> +             kunmap_atomic(area->vm_addr);
> +     else {
> +             struct page *pages[2];
> +
> +             pages[0] = page;
> +             pages[1] = get_next_page(page);
> +             BUG_ON(!pages[1]);
>  
> -pfenable:
> -     /* enable page faults to match kunmap_atomic() return conditions */
> -     pagefault_enable();
> -out:
> +             __zs_unmap_object(area, pages, off, class->size);
> +     }
>       put_cpu_var(zs_map_area);
>  }
>  EXPORT_SYMBOL_GPL(zs_unmap_object);
> diff --git a/drivers/staging/zsmalloc/zsmalloc_int.h 
> b/drivers/staging/zsmalloc/zsmalloc_int.h
> index 52805176..8c0b344 100644
> --- a/drivers/staging/zsmalloc/zsmalloc_int.h
> +++ b/drivers/staging/zsmalloc/zsmalloc_int.h
> @@ -109,12 +109,6 @@ enum fullness_group {
>   */
>  static const int fullness_threshold_frac = 4;
>  
> -struct mapping_area {
> -     char *vm_buf; /* copy buffer for objects that span pages */
> -     char *vm_addr; /* address of kmap_atomic()'ed pages */
> -     enum zs_mapmode vm_mm; /* mapping mode */
> -};
> -
>  struct size_class {
>       /*
>        * Size of objects stored in this class. Must be multiple
> -- 
> 1.7.9.5
> 
> --
> To unsubscribe, send a message with 'unsubscribe linux-mm' in
> the body to majord...@kvack.org.  For more info on Linux MM,
> see: http://www.linux-mm.org/ .
> Don't email: <a href=mailto:"d...@kvack.org";> em...@kvack.org </a>

-- 
Kind regards,
Minchan Kim
--
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Please read the FAQ at  http://www.tux.org/lkml/

Reply via email to