Hi,

I want to make a utility wrapper around a core.simd.float4, and have been trying to make the following code work, but have been met with no success.

auto add(float rhs)
{
    return __simd(XMM.ADDPS, lhs, rhs);
}

Then I tried

auto add(float4 lhs, float rhs)
{
    float4 tmp = [rhs, rhs, rhs, rhs];
    return __simd(XMM.ADDPS, lhs, rhs);
}

When that didn't work, I turned to IASM and threw together this:

float4 add(float4 lhs, float rhs)
{
    float4 res;
    float4 rhs_tmp = [rhs, rhs, rhs, rhs];
    auto lhs_addr = &lhs;
    auto rhs_addr = &rhs_tmp;
    asm
    {
        mov RAX, lhs_addr;
        mov RBX, rhs_addr;
        movups XMM0, [RAX];
        movups XMM1, [RBX];

        addps XMM0, XMM1;
        movups res, XMM0;
    }
    return res;
}

and it still didn't work. So, what am I doing wrong?

Reply via email to