From: Nicolai Hähnle <nicolai.haeh...@amd.com> --- src/gallium/drivers/radeon/radeon_llvm.h | 1 + .../drivers/radeon/radeon_setup_tgsi_llvm.c | 47 +++++++++++++++++----- 2 files changed, 38 insertions(+), 10 deletions(-)
diff --git a/src/gallium/drivers/radeon/radeon_llvm.h b/src/gallium/drivers/radeon/radeon_llvm.h index 13f3336..4c946b5 100644 --- a/src/gallium/drivers/radeon/radeon_llvm.h +++ b/src/gallium/drivers/radeon/radeon_llvm.h @@ -45,20 +45,21 @@ struct radeon_llvm_branch { unsigned has_else; }; struct radeon_llvm_loop { LLVMBasicBlockRef loop_block; LLVMBasicBlockRef endloop_block; }; struct radeon_llvm_array { struct tgsi_declaration_range range; + ubyte usagemask; LLVMValueRef alloca; }; struct radeon_llvm_context { struct lp_build_tgsi_soa_context soa; /*=== Front end configuration ===*/ /* Instructions that are not described by any of the TGSI opcodes. */ diff --git a/src/gallium/drivers/radeon/radeon_setup_tgsi_llvm.c b/src/gallium/drivers/radeon/radeon_setup_tgsi_llvm.c index d8ab5b0..73e4ce2 100644 --- a/src/gallium/drivers/radeon/radeon_setup_tgsi_llvm.c +++ b/src/gallium/drivers/radeon/radeon_setup_tgsi_llvm.c @@ -191,22 +191,30 @@ get_pointer_into_array(struct radeon_llvm_context *ctx, LLVMValueRef index; if (file != TGSI_FILE_TEMPORARY) return NULL; array = get_temp_array(&ctx->soa.bld_base, reg_index, reg_indirect); if (!array || !array->alloca) return NULL; index = emit_array_index(&ctx->soa, reg_indirect, reg_index - array->range.First); - index = LLVMBuildMul(builder, index, lp_build_const_int32(gallivm, TGSI_NUM_CHANNELS), ""); - index = LLVMBuildAdd(builder, index, lp_build_const_int32(gallivm, swizzle), ""); + index = LLVMBuildMul( + builder, index, + lp_build_const_int32(gallivm, util_bitcount(array->usagemask)), + ""); + index = LLVMBuildAdd( + builder, index, + lp_build_const_int32( + gallivm, + util_bitcount(array->usagemask & ((1 << swizzle) - 1))), + ""); idxs[0] = ctx->soa.bld_base.uint_bld.zero; idxs[1] = index; return LLVMBuildGEP(builder, array->alloca, idxs, 2, ""); } LLVMValueRef radeon_llvm_emit_fetch_64bit(struct lp_build_tgsi_context *bld_base, enum tgsi_opcode_type type, LLVMValueRef ptr, LLVMValueRef ptr2) @@ -466,54 +474,61 @@ static void emit_declaration(struct lp_build_tgsi_context *bld_base, } } break; } case TGSI_FILE_TEMPORARY: { char name[16] = ""; LLVMValueRef array_alloca = NULL; unsigned decl_size; + unsigned usagemask = decl->Declaration.UsageMask; first = decl->Range.First; last = decl->Range.Last; decl_size = 4 * ((last - first) + 1); + if (decl->Declaration.Array) { unsigned id = decl->Array.ArrayID - 1; + unsigned array_size; + if (!ctx->arrays) { int size = bld_base->info->array_max[TGSI_FILE_TEMPORARY]; ctx->arrays = CALLOC(size, sizeof(ctx->arrays[0])); } ctx->arrays[id].range = decl->Range; + ctx->arrays[id].usagemask = usagemask; + array_size = ((last - first) + 1) * util_bitcount(usagemask); /* If the array has more than 16 elements, store it * in memory using an alloca that spans the entire * array. * * Otherwise, store each array element individually. * We will then generate vectors (per-channel, up to - * <4 x float>) for indirect addressing. + * <16 x float> if the usagemask is a single bit) for + * indirect addressing. * * Note that 16 is the number of vector elements that * LLVM will store in a register, so theoretically an * array with up to 4 * 16 = 64 elements could be * handled this way, but whether that's a good idea * depends on VGPR register pressure elsewhere. * * FIXME: We shouldn't need to have the non-alloca * code path for arrays. LLVM should be smart enough to * promote allocas into registers when profitable. */ - if (decl_size > 16) { + if (array_size > 16) { array_alloca = LLVMBuildAlloca(builder, LLVMArrayType(bld_base->base.vec_type, - decl_size), "array"); + array_size), "array"); ctx->arrays[id].alloca = array_alloca; } } if (!ctx->temps_count) { ctx->temps_count = bld_base->info->file_max[TGSI_FILE_TEMPORARY] + 1; ctx->temps = MALLOC(TGSI_NUM_CHANNELS * ctx->temps_count * sizeof(LLVMValueRef)); } if (!array_alloca) { for (i = 0; i < decl_size; ++i) { @@ -524,28 +539,40 @@ static void emit_declaration(struct lp_build_tgsi_context *bld_base, ctx->temps[first * TGSI_NUM_CHANNELS + i] = si_build_alloca_undef(bld_base->base.gallivm, bld_base->base.vec_type, name); } } else { LLVMValueRef idxs[2] = { bld_base->uint_bld.zero, NULL }; + LLVMValueRef undef = NULL; + unsigned j = 0; + + if (usagemask != TGSI_WRITEMASK_XYZW) + undef = LLVMGetUndef(LLVMPointerType(bld_base->base.vec_type, 0)); + for (i = 0; i < decl_size; ++i) { + LLVMValueRef ptr; + if (usagemask & (1 << (i % 4))) { #ifdef DEBUG - snprintf(name, sizeof(name), "TEMP%d.%c", - first + i / 4, "xyzw"[i % 4]); + snprintf(name, sizeof(name), "TEMP%d.%c", + first + i / 4, "xyzw"[i % 4]); #endif - idxs[1] = lp_build_const_int32(bld_base->base.gallivm, i); - ctx->temps[first * TGSI_NUM_CHANNELS + i] = - LLVMBuildGEP(builder, array_alloca, idxs, 2, name); + idxs[1] = lp_build_const_int32(bld_base->base.gallivm, j); + ptr = LLVMBuildGEP(builder, array_alloca, idxs, 2, name); + j++; + } else { + ptr = undef; + } + ctx->temps[first * TGSI_NUM_CHANNELS + i] = ptr; } } break; } case TGSI_FILE_INPUT: { unsigned idx; for (idx = decl->Range.First; idx <= decl->Range.Last; idx++) { if (ctx->load_input) ctx->load_input(ctx, idx, decl); -- 2.7.4 _______________________________________________ mesa-dev mailing list mesa-dev@lists.freedesktop.org https://lists.freedesktop.org/mailman/listinfo/mesa-dev