I am not going to comment the intent, but to be honest I am skeptical too.

On 05/06, Sultan Alsawaf wrote:
>
> +static unsigned long find_victims(struct victim_info *varr, int *vindex,
> +                               int vmaxlen, int min_adj, int max_adj)
> +{
> +     unsigned long pages_found = 0;
> +     int old_vindex = *vindex;
> +     struct task_struct *tsk;
> +
> +     for_each_process(tsk) {
> +             struct task_struct *vtsk;
> +             unsigned long tasksize;
> +             short oom_score_adj;
> +
> +             /* Make sure there's space left in the victim array */
> +             if (*vindex == vmaxlen)
> +                     break;
> +
> +             /* Don't kill current, kthreads, init, or duplicates */
> +             if (same_thread_group(tsk, current) ||
> +                 tsk->flags & PF_KTHREAD ||
> +                 is_global_init(tsk) ||
> +                 vtsk_is_duplicate(varr, *vindex, tsk))
> +                     continue;
> +
> +             vtsk = find_lock_task_mm(tsk);

Did you test this patch with lockdep enabled?

If I read the patch correctly, lockdep should complain. vtsk_is_duplicate()
ensures that we do not take the same ->alloc_lock twice or more, but lockdep
can't know this.

> +static void scan_and_kill(unsigned long pages_needed)
> +{
> +     static DECLARE_WAIT_QUEUE_HEAD(victim_waitq);
> +     struct victim_info victims[MAX_VICTIMS];
> +     int i, nr_to_kill = 0, nr_victims = 0;
> +     unsigned long pages_found = 0;
> +     atomic_t victim_count;
> +
> +     /*
> +      * Hold the tasklist lock so tasks don't disappear while scanning. This
> +      * is preferred to holding an RCU read lock so that the list of tasks
> +      * is guaranteed to be up to date. Keep preemption disabled until the
> +      * SIGKILLs are sent so the victim kill process isn't interrupted.
> +      */
> +     read_lock(&tasklist_lock);
> +     preempt_disable();

read_lock() disables preemption, every task_lock() too, so this looks
unnecessary.

> +     for (i = 1; i < ARRAY_SIZE(adj_prio); i++) {
> +             pages_found += find_victims(victims, &nr_victims, MAX_VICTIMS,
> +                                         adj_prio[i], adj_prio[i - 1]);
> +             if (pages_found >= pages_needed || nr_victims == MAX_VICTIMS)
> +                     break;
> +     }
> +
> +     /*
> +      * Calculate the number of tasks that need to be killed and quickly
> +      * release the references to those that'll live.
> +      */
> +     for (i = 0, pages_found = 0; i < nr_victims; i++) {
> +             struct victim_info *victim = &victims[i];
> +             struct task_struct *vtsk = victim->tsk;
> +
> +             /* The victims' mm lock is taken in find_victims; release it */
> +             if (pages_found >= pages_needed) {
> +                     task_unlock(vtsk);
> +                     continue;
> +             }
> +
> +             /*
> +              * Grab a reference to the victim so it doesn't disappear after
> +              * the tasklist lock is released.
> +              */
> +             get_task_struct(vtsk);

The comment doesn't look correct. the victim can't dissapear until task_unlock()
below, it can't pass exit_mm().

> +             pages_found += victim->size;
> +             nr_to_kill++;
> +     }
> +     read_unlock(&tasklist_lock);
> +
> +     /* Kill the victims */
> +     victim_count = (atomic_t)ATOMIC_INIT(nr_to_kill);
> +     for (i = 0; i < nr_to_kill; i++) {
> +             struct victim_info *victim = &victims[i];
> +             struct task_struct *vtsk = victim->tsk;
> +
> +             pr_info("Killing %s with adj %d to free %lu kiB\n", vtsk->comm,
> +                     vtsk->signal->oom_score_adj,
> +                     victim->size << (PAGE_SHIFT - 10));
> +
> +             /* Configure the victim's mm to notify us when it's freed */
> +             vtsk->mm->slmk_waitq = &victim_waitq;
> +             vtsk->mm->slmk_counter = &victim_count;
> +
> +             /* Accelerate the victim's death by forcing the kill signal */
> +             do_send_sig_info(SIGKILL, SIG_INFO_TYPE, vtsk, true);
                                                               ^^^^
this should be PIDTYPE_TGID

> +
> +             /* Finally release the victim's mm lock */
> +             task_unlock(vtsk);
> +     }
> +     preempt_enable_no_resched();

See above. And I don't understand how can _no_resched() really help...

> +
> +     /* Try to speed up the death process now that we can schedule again */
> +     for (i = 0; i < nr_to_kill; i++) {
> +             struct task_struct *vtsk = victims[i].tsk;
> +
> +             /* Increase the victim's priority to make it die faster */
> +             set_user_nice(vtsk, MIN_NICE);
> +
> +             /* Allow the victim to run on any CPU */
> +             set_cpus_allowed_ptr(vtsk, cpu_all_mask);
> +
> +             /* Finally release the victim reference acquired earlier */
> +             put_task_struct(vtsk);
> +     }
> +
> +     /* Wait until all the victims die */
> +     wait_event(victim_waitq, !atomic_read(&victim_count));

Can't we avoid the new slmk_waitq/slmk_counter members in mm_struct?

I mean, can't we export victim_waitq and victim_count and, say, set/test
MMF_OOM_VICTIM. In fact I think you should try to re-use mark_oom_victim()
at least.

Oleg.

_______________________________________________
devel mailing list
de...@linuxdriverproject.org
http://driverdev.linuxdriverproject.org/mailman/listinfo/driverdev-devel

Reply via email to