Hi,

On 19/07/16 11:44, Tommaso Cucinotta wrote:
> This change achieves up to 10% of speed-up for cpudl_set() calls, as
> measured with a andomly generated workload of 1K,10K,100K random heap
> insertions and deletions (75% cpudl_set() calls with is_valid=1 and
> 25% with is_valid=0), and randomly generated cpu IDs, with up to 256
> CPUs, as measured on an Intel Core2 Duo.
> 
> Cc: Peter Zijlstra <pet...@infradead.org>
> Cc: Juri Lelli <juri.le...@arm.com>
> Cc: Luca Abeni <luca.ab...@unitn.it>
> Reviewed-by: Luca Abeni <luca.ab...@unitn.it>
> Signed-off-by: Tommaso Cucinotta <tommaso.cucino...@sssup.it>
> ---
>  kernel/sched/cpudeadline.c | 114 
> +++++++++++++++++++++++++++------------------
>  1 file changed, 69 insertions(+), 45 deletions(-)
> 
> diff --git a/kernel/sched/cpudeadline.c b/kernel/sched/cpudeadline.c
> index 3c42702..60f933a 100644
> --- a/kernel/sched/cpudeadline.c
> +++ b/kernel/sched/cpudeadline.c
> @@ -31,60 +31,82 @@ static inline int right_child(int i)
>       return (i << 1) + 2;
>  }
>  
> -static void cpudl_exchange(struct cpudl *cp, int a, int b)
> -{
> -     int cpu_a = cp->elements[a].cpu, cpu_b = cp->elements[b].cpu;
> -
> -     swap(cp->elements[a].cpu, cp->elements[b].cpu);
> -     swap(cp->elements[a].dl , cp->elements[b].dl );
> -
> -     swap(cp->elements[cpu_a].idx, cp->elements[cpu_b].idx);
> -}
> -
>  static void cpudl_heapify_down(struct cpudl *cp, int idx)
>  {
>       int l, r, largest;
>  
> +     int orig_cpu = cp->elements[idx].cpu;
> +     u64 orig_dl = cp->elements[idx].dl;
> +
> +     if (left_child(idx) >= cp->size)
> +             return;
> +
>       /* adapted from lib/prio_heap.c */
>       while(1) {
> +             u64 largest_dl;
>               l = left_child(idx);
>               r = right_child(idx);
>               largest = idx;
> +             largest_dl = orig_dl;
>  
> -             if ((l < cp->size) && dl_time_before(cp->elements[idx].dl,
> -                                                     cp->elements[l].dl))
> +             if ((l < cp->size) && dl_time_before(orig_dl, 
> cp->elements[l].dl)) {
>                       largest = l;
> -             if ((r < cp->size) && dl_time_before(cp->elements[largest].dl,
> -                                                     cp->elements[r].dl))
> +                     largest_dl = cp->elements[l].dl;

OK, I was a bit puzzled by the usage of largest_dl in this loop, but I
seemed to convince myself that it is right. It's basically a local tmp
variable that you use to pick left or right child at each iteration, and
you reset it to the deadline of the node your are moving (orig_dl) after
it iteration.

> +             }
> +             if ((r < cp->size) && dl_time_before(largest_dl, 
> cp->elements[r].dl))

Nitpick, this and the above conditions break 80 columns.

>                       largest = r;
> +
>               if (largest == idx)
>                       break;
>  
> -             /* Push idx down the heap one level and bump one up */
> -             cpudl_exchange(cp, largest, idx);
> +             /* pull largest child onto idx */
> +             cp->elements[idx].cpu = cp->elements[largest].cpu;
> +             cp->elements[idx].dl = cp->elements[largest].dl;
> +             cp->elements[cp->elements[idx].cpu].idx = idx;
>               idx = largest;
>       }
> +     /* actual push down of saved original values orig_* */
> +     cp->elements[idx].cpu = orig_cpu;
> +     cp->elements[idx].dl = orig_dl;
> +     cp->elements[cp->elements[idx].cpu].idx = idx;
>  }
>  
>  static void cpudl_heapify_up(struct cpudl *cp, int idx)
>  {
> -     while (idx > 0 && dl_time_before(cp->elements[parent(idx)].dl,
> -                     cp->elements[idx].dl)) {
> -             cpudl_exchange(cp, idx, parent(idx));
> -             idx = parent(idx);
> -     }
> +     int p;
> +
> +     int orig_cpu = cp->elements[idx].cpu;
> +     u64 orig_dl = cp->elements[idx].dl;
> +
> +     if (idx == 0)
> +             return;
> +
> +     do {
> +             p = parent(idx);
> +             if (dl_time_before(cp->elements[idx].dl, cp->elements[p].dl))
> +                     break;
> +             /* pull parent onto idx */
> +             cp->elements[idx].cpu = cp->elements[p].cpu;
> +             cp->elements[idx].dl = cp->elements[p].dl;
> +             cp->elements[cp->elements[idx].cpu].idx = idx;
> +             idx = p;
> +     } while (idx != 0);
> +     /* actual push up of saved original values orig_* */
> +     cp->elements[idx].cpu = orig_cpu;
> +     cp->elements[idx].dl = orig_dl;
> +     cp->elements[cp->elements[idx].cpu].idx = idx;
>  }
>  
> -static void cpudl_change_key(struct cpudl *cp, int idx, u64 new_dl)
> +static void cpudl_heapify(struct cpudl *cp, int idx)
>  {
>       WARN_ON(idx == IDX_INVALID || !cpu_present(idx));
> +     if (idx == IDX_INVALID)
> +             return;

Can we actually get here with IDX_INVALID? We bail out in the !is_valid
branch and the other call point is an else branch of old_idx ==
IDX_INVALID. So, it seems that this is more a BUG_ON().

>  
> -     if (dl_time_before(new_dl, cp->elements[idx].dl)) {
> -             cp->elements[idx].dl = new_dl;
> -             cpudl_heapify_down(cp, idx);
> -     } else {
> -             cp->elements[idx].dl = new_dl;
> +     if (idx > 0 && dl_time_before(cp->elements[parent(idx)].dl, 
> cp->elements[idx].dl)) {

You can wrap after the comma. And brackets are not required.

>               cpudl_heapify_up(cp, idx);
> +     } else {
> +             cpudl_heapify_down(cp, idx);
>       }
>  }
>  
> @@ -153,28 +175,30 @@ void cpudl_set(struct cpudl *cp, int cpu, u64 dl, int 
> is_valid)
>                        */
>                       goto out;
>               }
> -             new_cpu = cp->elements[cp->size - 1].cpu;
> -             cp->elements[old_idx].dl = cp->elements[cp->size - 1].dl;
> -             cp->elements[old_idx].cpu = new_cpu;
>               cp->size--;
> -             cp->elements[new_cpu].idx = old_idx;
>               cp->elements[cpu].idx = IDX_INVALID;
> -             cpudl_heapify_up(cp, old_idx);
> -             cpumask_set_cpu(cpu, cp->free_cpus);
> -                cpudl_heapify_down(cp, old_idx);
> -
> -             goto out;
> -     }
> +             if (old_idx != cp->size) {
> +                     new_cpu = cp->elements[cp->size].cpu;
> +                     cp->elements[old_idx].dl = cp->elements[cp->size].dl;
> +                     cp->elements[old_idx].cpu = new_cpu;
> +                     cp->elements[new_cpu].idx = old_idx;
> +                     cpudl_heapify(cp, old_idx);
> +             }
>  
> -     if (old_idx == IDX_INVALID) {
> -             int size1 = cp->size++;
> -             cp->elements[size1].dl = dl;
> -             cp->elements[size1].cpu = cpu;
> -             cp->elements[cpu].idx = size1;
> -             cpudl_heapify_up(cp, size1);
> -             cpumask_clear_cpu(cpu, cp->free_cpus);
> +             cpumask_set_cpu(cpu, cp->free_cpus);
>       } else {
> -             cpudl_change_key(cp, old_idx, dl);
> +             if (old_idx == IDX_INVALID) {
> +                     int size1 = cp->size++;
> +                     cp->elements[size1].dl = dl;
> +                     cp->elements[size1].cpu = cpu;
> +                     cp->elements[cpu].idx = size1;
> +                     cpudl_heapify_up(cp, size1);
> +
> +                     cpumask_clear_cpu(cpu, cp->free_cpus);
> +             } else {
> +                     cp->elements[old_idx].dl = dl;
> +                     cpudl_heapify(cp, old_idx);
> +             }
>       }
>  
>  out:
> -- 

Couldn't spot any problem with this change (apart from the minor things
I pointed out above). Didn't test it extensively, though. But it seems
you did. :)

Best,

- Juri

Reply via email to