Here: https://cgit.freedesktop.org/~mareko/mesa/log/?h=master LLVM: https://reviews.llvm.org/D41663
Marek On Fri, Jan 5, 2018 at 7:19 PM, Samuel Pitoiset <samuel.pitoi...@gmail.com> wrote: > > > On 01/05/2018 02:45 AM, Marek Olšák wrote: >> >> On Thu, Jan 4, 2018 at 10:25 AM, Samuel Pitoiset >> <samuel.pitoi...@gmail.com> wrote: >>> >>> How about performance? >>> >>> Few weeks ago, I fixed a bug (5f81a43535e8512cef26ea3dcd1e3a489bd5a1bb) >>> which affected F1 2017 and DOW3 on RADV, and it was also a nice >>> performance >>> boost, this is why I'm asking. >> >> >> No idea. This just decreases the number of instructions in some PS >> epilogs. > > > Okay, the series no longer applies on master, do you have a branch > somewhere? I would like to run, at least, F1 and DOW3. > > >> >> Marek >> >>> >>> >>> On 01/04/2018 01:55 AM, Marek Olšák wrote: >>>> >>>> >>>> From: Marek Olšák <marek.ol...@amd.com> >>>> >>>> --- >>>> src/amd/common/ac_llvm_build.c | 164 >>>> +++++++++++++++++++++++++++++++ >>>> src/amd/common/ac_llvm_build.h | 13 +++ >>>> src/gallium/drivers/radeonsi/si_shader.c | 152 >>>> ++++++++-------------------- >>>> 3 files changed, 216 insertions(+), 113 deletions(-) >>>> >>>> diff --git a/src/amd/common/ac_llvm_build.c >>>> b/src/amd/common/ac_llvm_build.c >>>> index 7100e52..c48a186 100644 >>>> --- a/src/amd/common/ac_llvm_build.c >>>> +++ b/src/amd/common/ac_llvm_build.c >>>> @@ -61,20 +61,21 @@ ac_llvm_context_init(struct ac_llvm_context *ctx, >>>> LLVMContextRef context, >>>> ctx->voidt = LLVMVoidTypeInContext(ctx->context); >>>> ctx->i1 = LLVMInt1TypeInContext(ctx->context); >>>> ctx->i8 = LLVMInt8TypeInContext(ctx->context); >>>> ctx->i16 = LLVMIntTypeInContext(ctx->context, 16); >>>> ctx->i32 = LLVMIntTypeInContext(ctx->context, 32); >>>> ctx->i64 = LLVMIntTypeInContext(ctx->context, 64); >>>> ctx->intptr = HAVE_32BIT_POINTERS ? ctx->i32 : ctx->i64; >>>> ctx->f16 = LLVMHalfTypeInContext(ctx->context); >>>> ctx->f32 = LLVMFloatTypeInContext(ctx->context); >>>> ctx->f64 = LLVMDoubleTypeInContext(ctx->context); >>>> + ctx->v2i16 = LLVMVectorType(ctx->i16, 2); >>>> ctx->v2i32 = LLVMVectorType(ctx->i32, 2); >>>> ctx->v3i32 = LLVMVectorType(ctx->i32, 3); >>>> ctx->v4i32 = LLVMVectorType(ctx->i32, 4); >>>> ctx->v2f32 = LLVMVectorType(ctx->f32, 2); >>>> ctx->v4f32 = LLVMVectorType(ctx->f32, 4); >>>> ctx->v8i32 = LLVMVectorType(ctx->i32, 8); >>>> ctx->i32_0 = LLVMConstInt(ctx->i32, 0, false); >>>> ctx->i32_1 = LLVMConstInt(ctx->i32, 1, false); >>>> ctx->f32_0 = LLVMConstReal(ctx->f32, 0.0); >>>> @@ -1214,20 +1215,34 @@ LLVMValueRef ac_build_fmin(struct >>>> ac_llvm_context >>>> *ctx, LLVMValueRef a, >>>> } >>>> LLVMValueRef ac_build_fmax(struct ac_llvm_context *ctx, >>>> LLVMValueRef >>>> a, >>>> LLVMValueRef b) >>>> { >>>> LLVMValueRef args[2] = {a, b}; >>>> return ac_build_intrinsic(ctx, "llvm.maxnum.f32", ctx->f32, >>>> args, >>>> 2, >>>> AC_FUNC_ATTR_READNONE); >>>> } >>>> +LLVMValueRef ac_build_imin(struct ac_llvm_context *ctx, LLVMValueRef >>>> a, >>>> + LLVMValueRef b) >>>> +{ >>>> + LLVMValueRef cmp = LLVMBuildICmp(ctx->builder, LLVMIntSLE, a, b, >>>> ""); >>>> + return LLVMBuildSelect(ctx->builder, cmp, a, b, ""); >>>> +} >>>> + >>>> +LLVMValueRef ac_build_imax(struct ac_llvm_context *ctx, LLVMValueRef a, >>>> + LLVMValueRef b) >>>> +{ >>>> + LLVMValueRef cmp = LLVMBuildICmp(ctx->builder, LLVMIntSGT, a, b, >>>> ""); >>>> + return LLVMBuildSelect(ctx->builder, cmp, a, b, ""); >>>> +} >>>> + >>>> LLVMValueRef ac_build_umin(struct ac_llvm_context *ctx, LLVMValueRef >>>> a, >>>> LLVMValueRef b) >>>> { >>>> LLVMValueRef cmp = LLVMBuildICmp(ctx->builder, LLVMIntULE, a, >>>> b, >>>> ""); >>>> return LLVMBuildSelect(ctx->builder, cmp, a, b, ""); >>>> } >>>> LLVMValueRef ac_build_clamp(struct ac_llvm_context *ctx, >>>> LLVMValueRef >>>> value) >>>> { >>>> if (HAVE_LLVM >= 0x0500) { >>>> @@ -1439,20 +1454,169 @@ LLVMValueRef ac_build_cvt_pkrtz_f16(struct >>>> ac_llvm_context *ctx, >>>> v2f16, args, 2, >>>> AC_FUNC_ATTR_READNONE); >>>> return LLVMBuildBitCast(ctx->builder, res, ctx->i32, >>>> ""); >>>> } >>>> return ac_build_intrinsic(ctx, "llvm.SI.packf16", ctx->i32, >>>> args, >>>> 2, >>>> AC_FUNC_ATTR_READNONE | >>>> AC_FUNC_ATTR_LEGACY); >>>> } >>>> +/* Upper 16 bits must be zero. */ >>>> +static LLVMValueRef ac_llvm_pack_two_int16(struct ac_llvm_context *ctx, >>>> + LLVMValueRef val[2]) >>>> +{ >>>> + return LLVMBuildOr(ctx->builder, val[0], >>>> + LLVMBuildShl(ctx->builder, val[1], >>>> + LLVMConstInt(ctx->i32, 16, 0), >>>> + ""), ""); >>>> +} >>>> + >>>> +/* Upper 16 bits are ignored and will be dropped. */ >>>> +static LLVMValueRef ac_llvm_pack_two_int32_as_int16(struct >>>> ac_llvm_context *ctx, >>>> + LLVMValueRef val[2]) >>>> +{ >>>> + LLVMValueRef v[2] = { >>>> + LLVMBuildAnd(ctx->builder, val[0], >>>> + LLVMConstInt(ctx->i32, 0xffff, 0), ""), >>>> + val[1], >>>> + }; >>>> + return ac_llvm_pack_two_int16(ctx, v); >>>> +} >>>> + >>>> +LLVMValueRef ac_build_cvt_pknorm_i16(struct ac_llvm_context *ctx, >>>> + LLVMValueRef args[2]) >>>> +{ >>>> + if (HAVE_LLVM >= 0x0600) { >>>> + LLVMValueRef res = >>>> + ac_build_intrinsic(ctx, >>>> "llvm.amdgcn.cvt.pknorm.i16", >>>> + ctx->v2i16, args, 2, >>>> + AC_FUNC_ATTR_READNONE); >>>> + return LLVMBuildBitCast(ctx->builder, res, ctx->i32, >>>> ""); >>>> + } >>>> + >>>> + LLVMValueRef val[2]; >>>> + >>>> + for (int chan = 0; chan < 2; chan++) { >>>> + /* Clamp between [-1, 1]. */ >>>> + val[chan] = ac_build_fmin(ctx, args[chan], ctx->f32_1); >>>> + val[chan] = ac_build_fmax(ctx, val[chan], >>>> LLVMConstReal(ctx->f32, -1)); >>>> + /* Convert to a signed integer in [-32767, 32767]. */ >>>> + val[chan] = LLVMBuildFMul(ctx->builder, val[chan], >>>> + LLVMConstReal(ctx->f32, >>>> 32767), >>>> ""); >>>> + /* If positive, add 0.5, else add -0.5. */ >>>> + val[chan] = LLVMBuildFAdd(ctx->builder, val[chan], >>>> + LLVMBuildSelect(ctx->builder, >>>> + LLVMBuildFCmp(ctx->builder, >>>> LLVMRealOGE, >>>> + val[chan], >>>> ctx->f32_0, ""), >>>> + LLVMConstReal(ctx->f32, 0.5), >>>> + LLVMConstReal(ctx->f32, -0.5), >>>> ""), ""); >>>> + val[chan] = LLVMBuildFPToSI(ctx->builder, val[chan], >>>> ctx->i32, ""); >>>> + } >>>> + return ac_llvm_pack_two_int32_as_int16(ctx, val); >>>> +} >>>> + >>>> +LLVMValueRef ac_build_cvt_pknorm_u16(struct ac_llvm_context *ctx, >>>> + LLVMValueRef args[2]) >>>> +{ >>>> + if (HAVE_LLVM >= 0x0600) { >>>> + LLVMValueRef res = >>>> + ac_build_intrinsic(ctx, >>>> "llvm.amdgcn.cvt.pknorm.u16", >>>> + ctx->v2i16, args, 2, >>>> + AC_FUNC_ATTR_READNONE); >>>> + return LLVMBuildBitCast(ctx->builder, res, ctx->i32, >>>> ""); >>>> + } >>>> + >>>> + LLVMValueRef val[2]; >>>> + >>>> + for (int chan = 0; chan < 2; chan++) { >>>> + val[chan] = ac_build_clamp(ctx, args[chan]); >>>> + val[chan] = LLVMBuildFMul(ctx->builder, val[chan], >>>> + LLVMConstReal(ctx->f32, >>>> 65535), >>>> ""); >>>> + val[chan] = LLVMBuildFAdd(ctx->builder, val[chan], >>>> + LLVMConstReal(ctx->f32, 0.5), >>>> ""); >>>> + val[chan] = LLVMBuildFPToUI(ctx->builder, val[chan], >>>> + ctx->i32, ""); >>>> + } >>>> + return ac_llvm_pack_two_int32_as_int16(ctx, val); >>>> +} >>>> + >>>> +/* The 8-bit and 10-bit clamping is for HW workarounds. */ >>>> +LLVMValueRef ac_build_cvt_pk_i16(struct ac_llvm_context *ctx, >>>> + LLVMValueRef args[2], unsigned bits, >>>> bool >>>> hi) >>>> +{ >>>> + assert(bits == 8 || bits == 10 || bits == 16); >>>> + >>>> + LLVMValueRef max_rgb = LLVMConstInt(ctx->i32, >>>> + bits == 8 ? 127 : bits == 10 ? 511 : 32767, 0); >>>> + LLVMValueRef min_rgb = LLVMConstInt(ctx->i32, >>>> + bits == 8 ? -128 : bits == 10 ? -512 : -32768, 0); >>>> + LLVMValueRef max_alpha = >>>> + bits != 10 ? max_rgb : ctx->i32_1; >>>> + LLVMValueRef min_alpha = >>>> + bits != 10 ? min_rgb : LLVMConstInt(ctx->i32, -2, 0); >>>> + bool has_intrinsic = HAVE_LLVM >= 0x0600; >>>> + >>>> + /* Clamp. */ >>>> + if (!has_intrinsic || bits != 16) { >>>> + for (int i = 0; i < 2; i++) { >>>> + bool alpha = hi && i == 1; >>>> + args[i] = ac_build_imin(ctx, args[i], >>>> + alpha ? max_alpha : >>>> max_rgb); >>>> + args[i] = ac_build_imax(ctx, args[i], >>>> + alpha ? min_alpha : >>>> min_rgb); >>>> + } >>>> + } >>>> + >>>> + if (has_intrinsic) { >>>> + LLVMValueRef res = >>>> + ac_build_intrinsic(ctx, >>>> "llvm.amdgcn.cvt.pk.i16", >>>> + ctx->v2i16, args, 2, >>>> + AC_FUNC_ATTR_READNONE); >>>> + return LLVMBuildBitCast(ctx->builder, res, ctx->i32, >>>> ""); >>>> + } >>>> + >>>> + return ac_llvm_pack_two_int32_as_int16(ctx, args); >>>> +} >>>> + >>>> +/* The 8-bit and 10-bit clamping is for HW workarounds. */ >>>> +LLVMValueRef ac_build_cvt_pk_u16(struct ac_llvm_context *ctx, >>>> + LLVMValueRef args[2], unsigned bits, >>>> bool >>>> hi) >>>> +{ >>>> + assert(bits == 8 || bits == 10 || bits == 16); >>>> + >>>> + LLVMValueRef max_rgb = LLVMConstInt(ctx->i32, >>>> + bits == 8 ? 255 : bits == 10 ? 1023 : 65535, 0); >>>> + LLVMValueRef max_alpha = >>>> + bits != 10 ? max_rgb : LLVMConstInt(ctx->i32, 3, 0); >>>> + bool has_intrinsic = HAVE_LLVM >= 0x0600; >>>> + >>>> + /* Clamp. */ >>>> + if (!has_intrinsic || bits != 16) { >>>> + for (int i = 0; i < 2; i++) { >>>> + bool alpha = hi && i == 1; >>>> + args[i] = ac_build_umin(ctx, args[i], >>>> + alpha ? max_alpha : >>>> max_rgb); >>>> + } >>>> + } >>>> + >>>> + if (has_intrinsic) { >>>> + LLVMValueRef res = >>>> + ac_build_intrinsic(ctx, >>>> "llvm.amdgcn.cvt.pk.u16", >>>> + ctx->v2i16, args, 2, >>>> + AC_FUNC_ATTR_READNONE); >>>> + return LLVMBuildBitCast(ctx->builder, res, ctx->i32, >>>> ""); >>>> + } >>>> + >>>> + return ac_llvm_pack_two_int16(ctx, args); >>>> +} >>>> + >>>> LLVMValueRef ac_build_wqm_vote(struct ac_llvm_context *ctx, >>>> LLVMValueRef >>>> i1) >>>> { >>>> assert(HAVE_LLVM >= 0x0600); >>>> return ac_build_intrinsic(ctx, "llvm.amdgcn.wqm.vote", ctx->i1, >>>> &i1, 1, AC_FUNC_ATTR_READNONE); >>>> } >>>> void ac_build_kill_if_false(struct ac_llvm_context *ctx, >>>> LLVMValueRef >>>> i1) >>>> { >>>> if (HAVE_LLVM >= 0x0600) { >>>> diff --git a/src/amd/common/ac_llvm_build.h >>>> b/src/amd/common/ac_llvm_build.h >>>> index 0deb5b5..3f0e9e2 100644 >>>> --- a/src/amd/common/ac_llvm_build.h >>>> +++ b/src/amd/common/ac_llvm_build.h >>>> @@ -50,20 +50,21 @@ struct ac_llvm_context { >>>> LLVMTypeRef voidt; >>>> LLVMTypeRef i1; >>>> LLVMTypeRef i8; >>>> LLVMTypeRef i16; >>>> LLVMTypeRef i32; >>>> LLVMTypeRef i64; >>>> LLVMTypeRef intptr; >>>> LLVMTypeRef f16; >>>> LLVMTypeRef f32; >>>> LLVMTypeRef f64; >>>> + LLVMTypeRef v2i16; >>>> LLVMTypeRef v2i32; >>>> LLVMTypeRef v3i32; >>>> LLVMTypeRef v4i32; >>>> LLVMTypeRef v2f32; >>>> LLVMTypeRef v4f32; >>>> LLVMTypeRef v8i32; >>>> LLVMValueRef i32_0; >>>> LLVMValueRef i32_1; >>>> LLVMValueRef f32_0; >>>> @@ -238,20 +239,24 @@ LLVMValueRef ac_build_imsb(struct ac_llvm_context >>>> *ctx, >>>> LLVMValueRef arg, >>>> LLVMTypeRef dst_type); >>>> LLVMValueRef ac_build_umsb(struct ac_llvm_context *ctx, >>>> LLVMValueRef arg, >>>> LLVMTypeRef dst_type); >>>> LLVMValueRef ac_build_fmin(struct ac_llvm_context *ctx, LLVMValueRef >>>> a, >>>> LLVMValueRef b); >>>> LLVMValueRef ac_build_fmax(struct ac_llvm_context *ctx, LLVMValueRef >>>> a, >>>> LLVMValueRef b); >>>> +LLVMValueRef ac_build_imin(struct ac_llvm_context *ctx, LLVMValueRef a, >>>> + LLVMValueRef b); >>>> +LLVMValueRef ac_build_imax(struct ac_llvm_context *ctx, LLVMValueRef a, >>>> + LLVMValueRef b); >>>> LLVMValueRef ac_build_umin(struct ac_llvm_context *ctx, LLVMValueRef >>>> a, >>>> LLVMValueRef b); >>>> LLVMValueRef ac_build_clamp(struct ac_llvm_context *ctx, LLVMValueRef >>>> value); >>>> struct ac_export_args { >>>> LLVMValueRef out[4]; >>>> unsigned target; >>>> unsigned enabled_channels; >>>> bool compr; >>>> bool done; >>>> bool valid_mask; >>>> @@ -282,20 +287,28 @@ struct ac_image_args { >>>> LLVMValueRef addr; >>>> unsigned dmask; >>>> bool unorm; >>>> bool da; >>>> }; >>>> LLVMValueRef ac_build_image_opcode(struct ac_llvm_context *ctx, >>>> struct ac_image_args *a); >>>> LLVMValueRef ac_build_cvt_pkrtz_f16(struct ac_llvm_context *ctx, >>>> LLVMValueRef args[2]); >>>> +LLVMValueRef ac_build_cvt_pknorm_i16(struct ac_llvm_context *ctx, >>>> + LLVMValueRef args[2]); >>>> +LLVMValueRef ac_build_cvt_pknorm_u16(struct ac_llvm_context *ctx, >>>> + LLVMValueRef args[2]); >>>> +LLVMValueRef ac_build_cvt_pk_i16(struct ac_llvm_context *ctx, >>>> + LLVMValueRef args[2], unsigned bits, >>>> bool >>>> hi); >>>> +LLVMValueRef ac_build_cvt_pk_u16(struct ac_llvm_context *ctx, >>>> + LLVMValueRef args[2], unsigned bits, >>>> bool >>>> hi); >>>> LLVMValueRef ac_build_wqm_vote(struct ac_llvm_context *ctx, >>>> LLVMValueRef >>>> i1); >>>> void ac_build_kill_if_false(struct ac_llvm_context *ctx, LLVMValueRef >>>> i1); >>>> LLVMValueRef ac_build_bfe(struct ac_llvm_context *ctx, LLVMValueRef >>>> input, >>>> LLVMValueRef offset, LLVMValueRef width, >>>> bool is_signed); >>>> void ac_build_waitcnt(struct ac_llvm_context *ctx, unsigned >>>> simm16); >>>> void ac_get_image_intr_name(const char *base_name, >>>> LLVMTypeRef data_type, >>>> diff --git a/src/gallium/drivers/radeonsi/si_shader.c >>>> b/src/gallium/drivers/radeonsi/si_shader.c >>>> index 453822c..a695aad 100644 >>>> --- a/src/gallium/drivers/radeonsi/si_shader.c >>>> +++ b/src/gallium/drivers/radeonsi/si_shader.c >>>> @@ -2093,51 +2093,27 @@ static LLVMValueRef fetch_constant( >>>> >>>> ctx->num_const_buffers); >>>> index = LLVMBuildAdd(ctx->ac.builder, index, >>>> LLVMConstInt(ctx->i32, >>>> SI_NUM_SHADER_BUFFERS, 0), ""); >>>> bufp = ac_build_load_to_sgpr(&ctx->ac, ptr, index); >>>> } else >>>> bufp = load_const_buffer_desc(ctx, buf); >>>> return bitcast(bld_base, type, buffer_load_const(ctx, bufp, >>>> addr)); >>>> } >>>> -/* Upper 16 bits must be zero. */ >>>> -static LLVMValueRef si_llvm_pack_two_int16(struct si_shader_context >>>> *ctx, >>>> - LLVMValueRef val[2]) >>>> -{ >>>> - return LLVMBuildOr(ctx->ac.builder, val[0], >>>> - LLVMBuildShl(ctx->ac.builder, val[1], >>>> - LLVMConstInt(ctx->i32, 16, 0), >>>> - ""), ""); >>>> -} >>>> - >>>> -/* Upper 16 bits are ignored and will be dropped. */ >>>> -static LLVMValueRef si_llvm_pack_two_int32_as_int16(struct >>>> si_shader_context *ctx, >>>> - LLVMValueRef val[2]) >>>> -{ >>>> - LLVMValueRef v[2] = { >>>> - LLVMBuildAnd(ctx->ac.builder, val[0], >>>> - LLVMConstInt(ctx->i32, 0xffff, 0), ""), >>>> - val[1], >>>> - }; >>>> - return si_llvm_pack_two_int16(ctx, v); >>>> -} >>>> - >>>> /* Initialize arguments for the shader export intrinsic */ >>>> static void si_llvm_init_export_args(struct si_shader_context *ctx, >>>> LLVMValueRef *values, >>>> unsigned target, >>>> struct ac_export_args *args) >>>> { >>>> LLVMValueRef f32undef = LLVMGetUndef(ctx->ac.f32); >>>> - LLVMBuilderRef builder = ctx->ac.builder; >>>> - LLVMValueRef val[4]; >>>> unsigned spi_shader_col_format = V_028714_SPI_SHADER_32_ABGR; >>>> unsigned chan; >>>> bool is_int8, is_int10; >>>> /* Default is 0xf. Adjusted below depending on the format. */ >>>> args->enabled_channels = 0xf; /* writemask */ >>>> /* Specify whether the EXEC mask represents the valid mask */ >>>> args->valid_mask = 0; >>>> @@ -2157,20 +2133,24 @@ static void si_llvm_init_export_args(struct >>>> si_shader_context *ctx, >>>> is_int8 = (key->part.ps.epilog.color_is_int8 >> cbuf) & >>>> 0x1; >>>> is_int10 = (key->part.ps.epilog.color_is_int10 >> cbuf) >>>> & >>>> 0x1; >>>> } >>>> args->compr = false; >>>> args->out[0] = f32undef; >>>> args->out[1] = f32undef; >>>> args->out[2] = f32undef; >>>> args->out[3] = f32undef; >>>> + LLVMValueRef (*packf)(struct ac_llvm_context *ctx, LLVMValueRef >>>> args[2]) = NULL; >>>> + LLVMValueRef (*packi)(struct ac_llvm_context *ctx, LLVMValueRef >>>> args[2], >>>> + unsigned bits, bool hi) = NULL; >>>> + >>>> switch (spi_shader_col_format) { >>>> case V_028714_SPI_SHADER_ZERO: >>>> args->enabled_channels = 0; /* writemask */ >>>> args->target = V_008DFC_SQ_EXP_NULL; >>>> break; >>>> case V_028714_SPI_SHADER_32_R: >>>> args->enabled_channels = 1; /* writemask */ >>>> args->out[0] = values[0]; >>>> break; >>>> @@ -2181,127 +2161,73 @@ static void si_llvm_init_export_args(struct >>>> si_shader_context *ctx, >>>> args->out[1] = values[1]; >>>> break; >>>> case V_028714_SPI_SHADER_32_AR: >>>> args->enabled_channels = 0x9; /* writemask */ >>>> args->out[0] = values[0]; >>>> args->out[3] = values[3]; >>>> break; >>>> case V_028714_SPI_SHADER_FP16_ABGR: >>>> - args->compr = 1; /* COMPR flag */ >>>> - >>>> - for (chan = 0; chan < 2; chan++) { >>>> - LLVMValueRef pack_args[2] = { >>>> - values[2 * chan], >>>> - values[2 * chan + 1] >>>> - }; >>>> - LLVMValueRef packed; >>>> - >>>> - packed = ac_build_cvt_pkrtz_f16(&ctx->ac, >>>> pack_args); >>>> - args->out[chan] = ac_to_float(&ctx->ac, packed); >>>> - } >>>> + packf = ac_build_cvt_pkrtz_f16; >>>> break; >>>> case V_028714_SPI_SHADER_UNORM16_ABGR: >>>> - for (chan = 0; chan < 4; chan++) { >>>> - val[chan] = ac_build_clamp(&ctx->ac, >>>> values[chan]); >>>> - val[chan] = LLVMBuildFMul(builder, val[chan], >>>> - >>>> LLVMConstReal(ctx->f32, >>>> 65535), ""); >>>> - val[chan] = LLVMBuildFAdd(builder, val[chan], >>>> - >>>> LLVMConstReal(ctx->f32, >>>> 0.5), ""); >>>> - val[chan] = LLVMBuildFPToUI(builder, val[chan], >>>> - ctx->i32, ""); >>>> - } >>>> - >>>> - args->compr = 1; /* COMPR flag */ >>>> - args->out[0] = ac_to_float(&ctx->ac, >>>> si_llvm_pack_two_int16(ctx, val)); >>>> - args->out[1] = ac_to_float(&ctx->ac, >>>> si_llvm_pack_two_int16(ctx, val+2)); >>>> + packf = ac_build_cvt_pknorm_u16; >>>> break; >>>> case V_028714_SPI_SHADER_SNORM16_ABGR: >>>> - for (chan = 0; chan < 4; chan++) { >>>> - /* Clamp between [-1, 1]. */ >>>> - val[chan] = >>>> lp_build_emit_llvm_binary(&ctx->bld_base, TGSI_OPCODE_MIN, >>>> - >>>> values[chan], >>>> - >>>> LLVMConstReal(ctx->f32, 1)); >>>> - val[chan] = >>>> lp_build_emit_llvm_binary(&ctx->bld_base, TGSI_OPCODE_MAX, >>>> - val[chan], >>>> - >>>> LLVMConstReal(ctx->f32, -1)); >>>> - /* Convert to a signed integer in [-32767, >>>> 32767]. >>>> */ >>>> - val[chan] = LLVMBuildFMul(builder, val[chan], >>>> - >>>> LLVMConstReal(ctx->f32, >>>> 32767), ""); >>>> - /* If positive, add 0.5, else add -0.5. */ >>>> - val[chan] = LLVMBuildFAdd(builder, val[chan], >>>> - LLVMBuildSelect(builder, >>>> - LLVMBuildFCmp(builder, >>>> LLVMRealOGE, >>>> - val[chan], >>>> ctx->ac.f32_0, ""), >>>> - LLVMConstReal(ctx->f32, >>>> 0.5), >>>> - LLVMConstReal(ctx->f32, >>>> -0.5), ""), ""); >>>> - val[chan] = LLVMBuildFPToSI(builder, val[chan], >>>> ctx->i32, ""); >>>> - } >>>> - >>>> - args->compr = 1; /* COMPR flag */ >>>> - args->out[0] = ac_to_float(&ctx->ac, >>>> si_llvm_pack_two_int32_as_int16(ctx, val)); >>>> - args->out[1] = ac_to_float(&ctx->ac, >>>> si_llvm_pack_two_int32_as_int16(ctx, val+2)); >>>> + packf = ac_build_cvt_pknorm_i16; >>>> break; >>>> - case V_028714_SPI_SHADER_UINT16_ABGR: { >>>> - LLVMValueRef max_rgb = LLVMConstInt(ctx->i32, >>>> - is_int8 ? 255 : is_int10 ? 1023 : 65535, 0); >>>> - LLVMValueRef max_alpha = >>>> - !is_int10 ? max_rgb : LLVMConstInt(ctx->i32, 3, >>>> 0); >>>> + case V_028714_SPI_SHADER_UINT16_ABGR: >>>> + packi = ac_build_cvt_pk_u16; >>>> + break; >>>> - /* Clamp. */ >>>> - for (chan = 0; chan < 4; chan++) { >>>> - val[chan] = ac_to_integer(&ctx->ac, >>>> values[chan]); >>>> - val[chan] = >>>> lp_build_emit_llvm_binary(&ctx->bld_base, TGSI_OPCODE_UMIN, >>>> - val[chan], >>>> - chan == 3 ? max_alpha : >>>> max_rgb); >>>> - } >>>> + case V_028714_SPI_SHADER_SINT16_ABGR: >>>> + packi = ac_build_cvt_pk_i16; >>>> + break; >>>> - args->compr = 1; /* COMPR flag */ >>>> - args->out[0] = ac_to_float(&ctx->ac, >>>> si_llvm_pack_two_int16(ctx, val)); >>>> - args->out[1] = ac_to_float(&ctx->ac, >>>> si_llvm_pack_two_int16(ctx, val+2)); >>>> + case V_028714_SPI_SHADER_32_ABGR: >>>> + memcpy(&args->out[0], values, sizeof(values[0]) * 4); >>>> break; >>>> } >>>> - case V_028714_SPI_SHADER_SINT16_ABGR: { >>>> - LLVMValueRef max_rgb = LLVMConstInt(ctx->i32, >>>> - is_int8 ? 127 : is_int10 ? 511 : 32767, 0); >>>> - LLVMValueRef min_rgb = LLVMConstInt(ctx->i32, >>>> - is_int8 ? -128 : is_int10 ? -512 : -32768, 0); >>>> - LLVMValueRef max_alpha = >>>> - !is_int10 ? max_rgb : ctx->i32_1; >>>> - LLVMValueRef min_alpha = >>>> - !is_int10 ? min_rgb : LLVMConstInt(ctx->i32, -2, >>>> 0); >>>> + /* Pack f16 or norm_i16/u16. */ >>>> + if (packf) { >>>> + for (chan = 0; chan < 2; chan++) { >>>> + LLVMValueRef pack_args[2] = { >>>> + values[2 * chan], >>>> + values[2 * chan + 1] >>>> + }; >>>> + LLVMValueRef packed; >>>> - /* Clamp. */ >>>> - for (chan = 0; chan < 4; chan++) { >>>> - val[chan] = ac_to_integer(&ctx->ac, >>>> values[chan]); >>>> - val[chan] = >>>> lp_build_emit_llvm_binary(&ctx->bld_base, >>>> - TGSI_OPCODE_IMIN, >>>> - val[chan], chan == 3 ? max_alpha >>>> : >>>> max_rgb); >>>> - val[chan] = >>>> lp_build_emit_llvm_binary(&ctx->bld_base, >>>> - TGSI_OPCODE_IMAX, >>>> - val[chan], chan == 3 ? min_alpha >>>> : >>>> min_rgb); >>>> + packed = packf(&ctx->ac, pack_args); >>>> + args->out[chan] = ac_to_float(&ctx->ac, packed); >>>> } >>>> - >>>> args->compr = 1; /* COMPR flag */ >>>> - args->out[0] = ac_to_float(&ctx->ac, >>>> si_llvm_pack_two_int32_as_int16(ctx, val)); >>>> - args->out[1] = ac_to_float(&ctx->ac, >>>> si_llvm_pack_two_int32_as_int16(ctx, val+2)); >>>> - break; >>>> } >>>> + /* Pack i16/u16. */ >>>> + if (packi) { >>>> + for (chan = 0; chan < 2; chan++) { >>>> + LLVMValueRef pack_args[2] = { >>>> + ac_to_integer(&ctx->ac, values[2 * >>>> chan]), >>>> + ac_to_integer(&ctx->ac, values[2 * chan >>>> + >>>> 1]) >>>> + }; >>>> + LLVMValueRef packed; >>>> - case V_028714_SPI_SHADER_32_ABGR: >>>> - memcpy(&args->out[0], values, sizeof(values[0]) * 4); >>>> - break; >>>> + packed = packi(&ctx->ac, pack_args, >>>> + is_int8 ? 8 : is_int10 ? 10 : 16, >>>> + chan == 1); >>>> + args->out[chan] = ac_to_float(&ctx->ac, packed); >>>> + } >>>> + args->compr = 1; /* COMPR flag */ >>>> } >>>> } >>>> static void si_alpha_test(struct lp_build_tgsi_context *bld_base, >>>> LLVMValueRef alpha) >>>> { >>>> struct si_shader_context *ctx = si_shader_context(bld_base); >>>> if (ctx->shader->key.part.ps.epilog.alpha_func != >>>> PIPE_FUNC_NEVER) >>>> { >>>> static LLVMRealPredicate cond_map[PIPE_FUNC_ALWAYS + 1] >>>> = >>>> { >>>> >>> > _______________________________________________ mesa-dev mailing list mesa-dev@lists.freedesktop.org https://lists.freedesktop.org/mailman/listinfo/mesa-dev