Hi,
I want to make a utility wrapper around a core.simd.float4, and
have been trying to make the following code work, but have been
met with no success.
auto add(float rhs)
{
return __simd(XMM.ADDPS, lhs, rhs);
}
Then I tried
auto add(float4 lhs, float rhs)
{
float4 tmp = [rhs, rhs, rhs, rhs];
return __simd(XMM.ADDPS, lhs, rhs);
}
When that didn't work, I turned to IASM and threw together this:
float4 add(float4 lhs, float rhs)
{
float4 res;
float4 rhs_tmp = [rhs, rhs, rhs, rhs];
auto lhs_addr = &lhs;
auto rhs_addr = &rhs_tmp;
asm
{
mov RAX, lhs_addr;
mov RBX, rhs_addr;
movups XMM0, [RAX];
movups XMM1, [RBX];
addps XMM0, XMM1;
movups res, XMM0;
}
return res;
}
and it still didn't work. So, what am I doing wrong?