Hi Julien,

On Fri, May 3, 2024 at 11:59 AM Julien Grall <jul...@xen.org> wrote:
>
> Hi Jens,
>
> On 02/05/2024 15:56, Jens Wiklander wrote:
> > -static bool ffa_probe(void)
> > +static int __init ffa_init(void)
> >   {
> >       uint32_t vers;
> >       unsigned int major_vers;
> > @@ -460,16 +511,16 @@ static bool ffa_probe(void)
> >           printk(XENLOG_ERR
> >                  "ffa: unsupported SMCCC version %#x (need at least %#x)\n",
> >                  smccc_ver, ARM_SMCCC_VERSION_1_2);
> > -        return false;
> > +        return 0;
> >       }
> >
> >       if ( !ffa_get_version(&vers) )
> > -        return false;
> > +        return 0;
> >
> >       if ( vers < FFA_MIN_SPMC_VERSION || vers > FFA_MY_VERSION )
> >       {
> >           printk(XENLOG_ERR "ffa: Incompatible version %#x found\n", vers);
> > -        return false;
> > +        return 0;
> >       }
> >
> >       major_vers = (vers >> FFA_VERSION_MAJOR_SHIFT) & 
> > FFA_VERSION_MAJOR_MASK;
> > @@ -492,26 +543,33 @@ static bool ffa_probe(void)
> >            !check_mandatory_feature(FFA_MEM_SHARE_32) ||
> >            !check_mandatory_feature(FFA_MEM_RECLAIM) ||
> >            !check_mandatory_feature(FFA_MSG_SEND_DIRECT_REQ_32) )
> > -        return false;
> > +        return 0;
> >
> >       if ( !ffa_rxtx_init() )
> > -        return false;
> > +        return 0;
> >
> >       ffa_version = vers;
> >
> >       if ( !ffa_partinfo_init() )
> >           goto err_rxtx_destroy;
> >
> > +    ffa_notif_init();
> >       INIT_LIST_HEAD(&ffa_teardown_head);
> >       init_timer(&ffa_teardown_timer, ffa_teardown_timer_callback, NULL, 0);
> >
> > -    return true;
> > +    return 0;
> >
> >   err_rxtx_destroy:
> >       ffa_rxtx_destroy();
> >       ffa_version = 0;
> >
> > -    return false;
> > +    return 0;
> > +}
> > +presmp_initcall(ffa_init);
> I would rather prefer if tee_init() is called from presmp_initcall().
> This would avoid FFA to have to try to initialize itself before all the
> others.

OK, I'll update.

>
> This is what I had in mind with my original request, but I guess I
> wasn't clear enough. Sorry for that.

No worries.

>
> [...]
>
> > +static void notif_irq_handler(int irq, void *data)
> > +{
> > +    const struct arm_smccc_1_2_regs arg = {
> > +        .a0 = FFA_NOTIFICATION_INFO_GET_64,
> > +    };
> > +    struct arm_smccc_1_2_regs resp;
> > +    unsigned int id_pos;
> > +    unsigned int list_count;
> > +    uint64_t ids_count;
> > +    unsigned int n;
> > +    int32_t res;
> > +
> > +    do {
> > +        arm_smccc_1_2_smc(&arg, &resp);
> > +        res = ffa_get_ret_code(&resp);
> > +        if ( res )
> > +        {
> > +            if ( res != FFA_RET_NO_DATA )
> > +                printk(XENLOG_ERR "ffa: notification info get failed: 
> > error %d\n",
> > +                       res);
> > +            return;
> > +        }
> > +
> > +        ids_count = resp.a2 >> FFA_NOTIF_INFO_GET_ID_LIST_SHIFT;
> > +        list_count = ( resp.a2 >> FFA_NOTIF_INFO_GET_ID_COUNT_SHIFT ) &
> > +                     FFA_NOTIF_INFO_GET_ID_COUNT_MASK;
> > +
> > +        id_pos = 0;
> > +        for ( n = 0; n < list_count; n++ )
> > +        {
> > +            unsigned int count = ((ids_count >> 2 * n) & 0x3) + 1;
> > +            uint16_t vm_id = get_id_from_resp(&resp, id_pos);
> > +            struct domain *d;
> > +
> > +            /*
> > +             * vm_id == 0 means a notifications pending for Xen itself, but
> > +             * we don't support that yet.
> > +             */
> > +            if (vm_id)
> > +                d = ffa_rcu_lock_domain_by_vm_id(vm_id);
>
> I am still unconvinced that this is sufficient. This will prevent
> "struct domain *" to be freed. But ...
>
> > +            else
> > +                d = NULL;
> > +
> > +            if ( d )
> > +            {
> > +                struct ffa_ctx *ctx = d->arch.tee;
>
> ... I don't think it will protect d->arch.tee to be freed as this is
> happening during teardorwn. You mostly need some other sort of locking
> to avoid d->arch.tee been freed behind your back.

OK, I'll need to work more on this. The outcome of the [1] thread may
affect this too.

[1] https://lists.xenproject.org/archives/html/xen-devel/2024-05/msg00156.html

>
> > +                struct vcpu *v;
> > +
> > +                ACCESS_ONCE(ctx->notif.secure_pending) = true;
> > +
> > +                /*
> > +                 * Since we're only delivering global notification, always
> > +                 * deliver to the first online vCPU. It doesn't matter
> > +                 * which we chose, as long as it's available.
> > +                 */
> > +                for_each_vcpu(d, v)
> > +                {
> > +                    if ( is_vcpu_online(v) )
> > +                    {
> > +                        vgic_inject_irq(d, v, GUEST_FFA_NOTIF_PEND_INTR_ID,
> > +                                        true);
> > +                        break;
> > +                    }
> > +                }
> > +                if ( !v )
> > +                    printk(XENLOG_ERR "ffa: can't inject NPI, all vCPUs 
> > offline\n");
> > +
> > +                rcu_unlock_domain(d);
> > +            }
> > +
> > +            id_pos += count;
> > +        }
> > +
> > +    } while (resp.a2 & FFA_NOTIF_INFO_GET_MORE_FLAG);
> > +}
>
> [...]
>
> > +static int ffa_setup_irq_callback(struct notifier_block *nfb,
> > +                                  unsigned long action, void *hcpu)
> > +{
> > +    unsigned int cpu = (unsigned long)hcpu;
> > +    struct notif_irq_info irq_info = { };
> > +
> > +    switch ( action )
> > +    {
> > +    case CPU_ONLINE:
>
> Can't you execute the notifier in CPU_STARTING? This will be called on
> the CPU directly, so you should be able to use request_irq(...).

I tried that first but it failed with the ASSERT_ALLOC_CONTEXT() in _xmalloc().

I've also tested a three-step solution with CPU_UP_PREPARE,
CPU_STARTING, and CPU_UP_CANCELED.
My approach here is more direct, but it still suffers from a weakness
in error handling even if it seems quite unlikely to run out of heap
or for setup_irq() to fail at this stage.

Thanks,
Jens

>
> > +        /*
> > +         * The notifier call is done on the primary or initiating CPU when
> > +         * the target CPU have come online, but the SGI must be setup on
> > +         * the target CPU.
> > +         *
> > +         * We make an IPI call to the target CPU to setup the SGI. The call
> > +         * is executed in interrupt context on the target CPU, so we can't
> > +         * call request_irq() directly since it allocates memory.
> > +         *
> > +         * We preallocate the needed irqaction here and pass it via the
> > +         * temporary struct notif_irq_info. The call is synchronous in the
> > +         * sense that when on_selected_cpus() returns the callback
> > +         * notif_irq_enable() has done the same on the target CPU.
> > +         *
> > +         * We deal with two errors, one where notif_irq_enable() hasn't
> > +         * been called for some reason, that error is logged below. The
> > +         * other where setup_irq() fails is logged in the callback. We must
> > +         * free the irqaction in both cases since it has failed to become
> > +         * registered.
> > +         *
> > +         * Failures leads to a problem notifications, the CPUs with failure
> > +         * will trigger on the SGI indicating that there are notifications
> > +         * pending, while the SPMC in the secure world will not notice that
> > +         * the interrupt was lost.
> > +         */
> > +        irq_info.action = xmalloc(struct irqaction);
> > +        if ( !irq_info.action )
> > +        {
> > +            printk(XENLOG_ERR "ffa: setup irq %u failed, out of memory\n",
> > +                   notif_sri_irq);
> > +            return -ENOMEM;
> > +        }
> > +
> > +        *irq_info.action = (struct irqaction){
> > +            .handler = notif_irq_handler,
> > +            .name = "FF-A notif",
> > +            .free_on_release = 1,
> > +        };
> > +
> > +        on_selected_cpus(cpumask_of(cpu), notif_irq_enable, &irq_info, 1);
> > +        if (!irq_info.called)
> > +            printk(XENLOG_ERR "ffa: on_selected_cpus(cpumask_of(%u)) 
> > failed\n",
> > +                   cpu);
> > +        /*
> > +         * The irqaction is unused and must be freed if irq_info.action is
> > +         * non-NULL at this stage.
> > +         */
> > +        XFREE(irq_info.action);
> > +        break;
> > +    default:
> > +        break;
> > +    }
> > +
> > +    return 0;
> > +}
>
> Cheers,
>
> --
> Julien Grall

Reply via email to