On Sat 04-02-17 00:30:05, Hoeun Ryu wrote:
>  Using virtually mapped stack, kernel stacks are allocated via vmalloc.
> In the current implementation, two stacks per cpu can be cached when
> tasks are freed and the cached stacks are used again in task duplications.
> but the array for the cached stacks is statically allocated by per-cpu api.
>  In this new implementation, the array for the cached stacks are dynamically
> allocted and freed by cpu hotplug callbacks and the cached stacks are freed
> when cpu is down. setup for cpu hotplug is established in fork_init().

Why do we want this? I can see that the follow up patch makes the number
configurable but the changelog doesn't describe the motivation for that.
Which workload would benefit from a higher value?

> Signed-off-by: Hoeun Ryu <hoeun....@gmail.com>
> ---
>  kernel/fork.c | 81 
> ++++++++++++++++++++++++++++++++++++++++++++++-------------
>  1 file changed, 64 insertions(+), 17 deletions(-)
> 
> diff --git a/kernel/fork.c b/kernel/fork.c
> index 61284d8..54421a9 100644
> --- a/kernel/fork.c
> +++ b/kernel/fork.c
> @@ -167,26 +167,71 @@ void __weak arch_release_thread_stack(unsigned long 
> *stack)
>   * flush.  Try to minimize the number of calls by caching stacks.
>   */
>  #define NR_CACHED_STACKS 2
> -static DEFINE_PER_CPU(struct vm_struct *, cached_stacks[NR_CACHED_STACKS]);
> +
> +struct vm_stack_cache {
> +     struct vm_struct **vm_stacks;
> +     int nr;
> +     int cur;
> +};
> +
> +static DEFINE_PER_CPU(struct vm_stack_cache, vm_stacks);
> +
> +static int alloc_vm_stack_cache(unsigned int cpu)
> +{
> +     struct vm_stack_cache *vm_stack_cache = &per_cpu(vm_stacks, cpu);
> +     struct vm_struct **vm_stacks = vm_stack_cache->vm_stacks;
> +     int i;
> +
> +     /* if free_vm_stack_cache() didn't free it */
> +     if (!vm_stacks) {
> +             vm_stacks =
> +                     vzalloc(sizeof(struct vm_struct *) * NR_CACHED_STACKS);
> +             if (!vm_stacks)
> +                     return -ENOMEM;
> +     }
> +
> +     vm_stack_cache->vm_stacks = vm_stacks;
> +     vm_stack_cache->cur = 0;
> +     vm_stack_cache->nr = 0;
> +
> +     return 0;
> +}
> +
> +static int free_vm_stack_cache(unsigned int cpu)
> +{
> +     struct vm_stack_cache *vm_stack_cache = &per_cpu(vm_stacks, cpu);
> +     struct vm_struct **vm_stacks = vm_stack_cache->vm_stacks;
> +     int i;
> +
> +     for (i = 0; i < vm_stack_cache->nr; i++) {
> +             vfree(vm_stacks[i]->addr);
> +             vm_stacks[i] = NULL;
> +     }
> +
> +     vm_stack_cache->nr = 0;
> +     vm_stack_cache->cur = 0;
> +     /* do not free vm_stack[cpu]->vm_stacks itself, reused in allocation */
> +
> +     return 0;
> +}
> +
>  #endif
>  
>  static unsigned long *alloc_thread_stack_node(struct task_struct *tsk, int 
> node)
>  {
>  #ifdef CONFIG_VMAP_STACK
> +     struct vm_stack_cache *vm_stack_cache =
> +             &per_cpu(vm_stacks, smp_processor_id());
> +     struct vm_struct **vm_stacks = vm_stack_cache->vm_stacks;
>       void *stack;
> -     int i;
>  
>       local_irq_disable();
> -     for (i = 0; i < NR_CACHED_STACKS; i++) {
> -             struct vm_struct *s = this_cpu_read(cached_stacks[i]);
> -
> -             if (!s)
> -                     continue;
> -             this_cpu_write(cached_stacks[i], NULL);
> -
> -             tsk->stack_vm_area = s;
> +     if (vm_stack_cache->cur > 0) {
> +             struct vm_struct *vm_stack = vm_stacks[--vm_stack_cache->cur];
> +             tsk->stack_vm_area = vm_stack;
>               local_irq_enable();
> -             return s->addr;
> +
> +             return vm_stack->addr;
>       }
>       local_irq_enable();
>  
> @@ -216,15 +261,14 @@ static inline void free_thread_stack(struct task_struct 
> *tsk)
>  {
>  #ifdef CONFIG_VMAP_STACK
>       if (task_stack_vm_area(tsk)) {
> +             struct vm_stack_cache *vm_stack_cache =
> +                     &per_cpu(vm_stacks, smp_processor_id());
> +             struct vm_struct **vm_stacks = vm_stack_cache->vm_stacks;
>               unsigned long flags;
> -             int i;
>  
>               local_irq_save(flags);
> -             for (i = 0; i < NR_CACHED_STACKS; i++) {
> -                     if (this_cpu_read(cached_stacks[i]))
> -                             continue;
> -
> -                     this_cpu_write(cached_stacks[i], tsk->stack_vm_area);
> +             if (vm_stack_cache->cur < vm_stack_cache->nr) {
> +                     vm_stacks[vm_stack_cache->cur++] = tsk->stack_vm_area;
>                       local_irq_restore(flags);
>                       return;
>               }
> @@ -456,6 +500,9 @@ void __init fork_init(void)
>       for (i = 0; i < UCOUNT_COUNTS; i++) {
>               init_user_ns.ucount_max[i] = max_threads/2;
>       }
> +
> +     cpuhp_setup_state(CPUHP_AP_ONLINE_DYN, "vm_stack_cache",
> +                       alloc_vm_stack_cache, free_vm_stack_cache);
>  }
>  
>  int __weak arch_dup_task_struct(struct task_struct *dst,
> -- 
> 2.7.4
> 

-- 
Michal Hocko
SUSE Labs

Reply via email to