It's better to put a FIXME tag in the code to indicate there should be
better implementation than this unoptimized version.
The optimized version could leverage Gen's register region/indirect
addressing to optimize the byte swap operations. Let's defer this
type of optimization to next release.

Thanks for the patch. Will push latter.

On Tue, Nov 04, 2014 at 06:42:35AM +0800, xionghu....@intel.com wrote:
> From: Luo Xionghu <xionghu....@intel.com>
> 
> this intrinsic call is implemented at the GEN IR level currently,
> should be optimazed later.
> 
> Signed-off-by: Luo Xionghu <xionghu....@intel.com>
> ---
>  backend/src/llvm/llvm_gen_backend.cpp |   87 
> +++++++++++++++++++++++++++++++++
>  1 file changed, 87 insertions(+)
> 
> diff --git a/backend/src/llvm/llvm_gen_backend.cpp 
> b/backend/src/llvm/llvm_gen_backend.cpp
> index feb881d..4b6cfe7 100644
> --- a/backend/src/llvm/llvm_gen_backend.cpp
> +++ b/backend/src/llvm/llvm_gen_backend.cpp
> @@ -2827,6 +2827,93 @@ namespace gbe
>            NOT_IMPLEMENTED;
>            break;
>            case Intrinsic::bswap:
> +          {
> +            Type *llvmDstType = I.getType();
> +            uint32_t elementSize = getTypeByteSize(unit, llvmDstType);
> +
> +            const ir::Register dst0  = this->getRegister(&I);
> +            const ir::Register src0 = this->getRegister(I.getOperand(0));
> +            switch(elementSize)
> +            {
> +              case 2:
> +                {
> +                  ir::Type srcType = getUnsignedType(ctx, llvmDstType);
> +                  ir::Register tmp1 = ctx.reg(getFamily(srcType));
> +                  ir::Register tmp2 = ctx.reg(getFamily(srcType));
> +
> +                  ir::Register regWMask = ctx.reg( ir::FAMILY_WORD );
> +                  const ir::ImmediateIndex wMask = 
> ctx.newIntegerImmediate(0x00FF, ir::TYPE_S16);
> +                  ir::Register regShift = ctx.reg( ir::FAMILY_WORD );
> +                  const ir::ImmediateIndex shift = 
> ctx.newIntegerImmediate(8, ir::TYPE_S16);
> +
> +                  ctx.LOADI(ir::TYPE_S16, regWMask, wMask);
> +                  ctx.AND(srcType, tmp1, src0, regWMask);
> +
> +                  ctx.LOADI(ir::TYPE_S16, regShift, shift);
> +                  ctx.SHL(srcType, tmp2, tmp1, regShift);
> +
> +                  ir::Register tmp3 = ctx.reg( getFamily(srcType) );
> +                  ctx.SHR(srcType, tmp3, src0, regShift);
> +
> +                  ctx.OR(srcType, dst0, tmp2, tmp3);
> +                }
> +                break;
> +              case 4:
> +                {
> +                  ir::Type srcType = getUnsignedType(ctx, llvmDstType);
> +                  ir::Register tmp1 = ctx.reg(getFamily(srcType));
> +                  ir::Register tmp2 = ctx.reg(getFamily(srcType));
> +                  ir::Register tmp3 = ctx.reg(getFamily(srcType));
> +                  ir::Register tmp4 = ctx.reg(getFamily(srcType));
> +                  ir::Register tmp5 = ctx.reg(getFamily(srcType));
> +                  ir::Register tmp6 = ctx.reg(getFamily(srcType));
> +                  ir::Register tmp7 = ctx.reg(getFamily(srcType));
> +                  ir::Register tmp8 = ctx.reg(getFamily(srcType));
> +
> +                  ir::Register regDWMask = ctx.reg( ir::FAMILY_DWORD );
> +                  ir::Register regShift = ctx.reg( ir::FAMILY_DWORD );
> +                  ir::ImmediateIndex wMask = 
> ctx.newIntegerImmediate(0x000000FF, ir::TYPE_S32);
> +                  ir::ImmediateIndex shift = ctx.newIntegerImmediate(24, 
> ir::TYPE_S32);
> +                  ctx.LOADI(ir::TYPE_S32, regDWMask, wMask);
> +                  ctx.AND(srcType, tmp1, src0, regDWMask);
> +                  ctx.LOADI(ir::TYPE_S32, regShift, shift);
> +                  ctx.SHL(srcType, tmp2, tmp1, regShift);
> +
> +                  wMask = ctx.newIntegerImmediate(0x0000FF00, ir::TYPE_S32);
> +                  shift = ctx.newIntegerImmediate(8, ir::TYPE_S32);
> +                  ctx.LOADI(ir::TYPE_S32, regDWMask, wMask);
> +                  ctx.AND(srcType, tmp3, src0, regDWMask);
> +                  ctx.LOADI(ir::TYPE_S32, regShift, shift);
> +                  ctx.SHL(srcType, tmp4, tmp3, regShift);
> +
> +                  wMask = ctx.newIntegerImmediate(0x00FF0000, ir::TYPE_S32);
> +                  shift = ctx.newIntegerImmediate(8, ir::TYPE_S32);
> +                  ctx.LOADI(ir::TYPE_S32, regDWMask, wMask);
> +                  ctx.AND(srcType, tmp5, src0, regDWMask);
> +                  ctx.LOADI(ir::TYPE_S32, regShift, shift);
> +                  ctx.SHR(srcType, tmp6, tmp5, regShift);
> +
> +                  wMask = ctx.newIntegerImmediate(0xFF000000, ir::TYPE_S32);
> +                  shift = ctx.newIntegerImmediate(24, ir::TYPE_S32);
> +                  ctx.LOADI(ir::TYPE_S32, regDWMask, wMask);
> +                  ctx.AND(srcType, tmp7, src0, regDWMask);
> +                  ctx.LOADI(ir::TYPE_S32, regShift, shift);
> +                  ctx.SHR(srcType, tmp8, tmp7, regShift);
> +
> +                  ir::Register tmp9 = ctx.reg(getFamily(srcType));
> +                  ir::Register tmp10 = ctx.reg(getFamily(srcType));
> +                  ctx.OR(srcType, tmp9, tmp2, tmp4);
> +                  ctx.OR(srcType, tmp10, tmp6, tmp8);
> +                  ctx.OR(srcType, dst0, tmp9, tmp10);
> +                }
> +                break;
> +              case 8:
> +                NOT_IMPLEMENTED;
> +                break;
> +              default:
> +                GBE_ASSERT(0);
> +            }
> +          }
>            break;
>            default: NOT_IMPLEMENTED;
>          }
> -- 
> 1.7.9.5
> 
> _______________________________________________
> Beignet mailing list
> Beignet@lists.freedesktop.org
> http://lists.freedesktop.org/mailman/listinfo/beignet
_______________________________________________
Beignet mailing list
Beignet@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/beignet

Reply via email to