[Mesa-dev] [PATCH 9/9] ac, radeonsi: use ac_build_gather_values more

2018-08-20 Thread Marek Olšák
From: Marek Olšák 

---
 src/amd/common/ac_nir_to_llvm.c   | 14 +++
 src/gallium/drivers/radeonsi/si_shader.c  |  8 +++---
 .../drivers/radeonsi/si_shader_tgsi_mem.c | 25 +++
 .../drivers/radeonsi/si_shader_tgsi_setup.c   | 17 -
 4 files changed, 20 insertions(+), 44 deletions(-)

diff --git a/src/amd/common/ac_nir_to_llvm.c b/src/amd/common/ac_nir_to_llvm.c
index 537ac33c044..700e48e14b7 100644
--- a/src/amd/common/ac_nir_to_llvm.c
+++ b/src/amd/common/ac_nir_to_llvm.c
@@ -479,35 +479,30 @@ static LLVMValueRef emit_pack_half_2x16(struct 
ac_llvm_context *ctx,
comp[1] = LLVMBuildExtractElement(ctx->builder, src0, ctx->i32_1, "");
 
return LLVMBuildBitCast(ctx->builder, ac_build_cvt_pkrtz_f16(ctx, comp),
ctx->i32, "");
 }
 
 static LLVMValueRef emit_unpack_half_2x16(struct ac_llvm_context *ctx,
  LLVMValueRef src0)
 {
LLVMValueRef const16 = LLVMConstInt(ctx->i32, 16, false);
-   LLVMValueRef temps[2], result, val;
+   LLVMValueRef temps[2], val;
int i;
 
for (i = 0; i < 2; i++) {
val = i == 1 ? LLVMBuildLShr(ctx->builder, src0, const16, "") : 
src0;
val = LLVMBuildTrunc(ctx->builder, val, ctx->i16, "");
val = LLVMBuildBitCast(ctx->builder, val, ctx->f16, "");
temps[i] = LLVMBuildFPExt(ctx->builder, val, ctx->f32, "");
}
-
-   result = LLVMBuildInsertElement(ctx->builder, LLVMGetUndef(ctx->v2f32), 
temps[0],
-   ctx->i32_0, "");
-   result = LLVMBuildInsertElement(ctx->builder, result, temps[1],
-   ctx->i32_1, "");
-   return result;
+   return ac_build_gather_values(ctx, temps, 2);
 }
 
 static LLVMValueRef emit_ddxy(struct ac_nir_context *ctx,
  nir_op op,
  LLVMValueRef src0)
 {
unsigned mask;
int idx;
LLVMValueRef result;
 
@@ -997,24 +992,21 @@ static void visit_alu(struct ac_nir_context *ctx, const 
nir_alu_instr *instr)
LLVMValueRef tmp = LLVMBuildBitCast(ctx->ac.builder, src[0],
ctx->ac.v2i32,
"");
result = LLVMBuildExtractElement(ctx->ac.builder, tmp,
 ctx->ac.i32_1, "");
break;
}
 
case nir_op_pack_64_2x32_split: {
LLVMValueRef tmp = LLVMGetUndef(ctx->ac.v2i32);
-   tmp = LLVMBuildInsertElement(ctx->ac.builder, tmp,
-src[0], ctx->ac.i32_0, "");
-   tmp = LLVMBuildInsertElement(ctx->ac.builder, tmp,
-src[1], ctx->ac.i32_1, "");
+   tmp = ac_build_gather_values(&ctx->ac, src, 2);
result = LLVMBuildBitCast(ctx->ac.builder, tmp, ctx->ac.i64, 
"");
break;
}
 
case nir_op_cube_face_coord: {
src[0] = ac_to_float(&ctx->ac, src[0]);
LLVMValueRef results[2];
LLVMValueRef in[3];
for (unsigned chan = 0; chan < 3; chan++)
in[chan] = ac_llvm_extract_elem(&ctx->ac, src[0], chan);
diff --git a/src/gallium/drivers/radeonsi/si_shader.c 
b/src/gallium/drivers/radeonsi/si_shader.c
index 66fe5fad218..cfd99b61601 100644
--- a/src/gallium/drivers/radeonsi/si_shader.c
+++ b/src/gallium/drivers/radeonsi/si_shader.c
@@ -2717,26 +2717,24 @@ static void emit_streamout_output(struct 
si_shader_context *ctx,
 
/* Pack the output. */
LLVMValueRef vdata = NULL;
 
switch (num_comps) {
case 1: /* as i32 */
vdata = out[0];
break;
case 2: /* as v2i32 */
case 3: /* as v4i32 (aligned to 4) */
+   out[3] = LLVMGetUndef(ctx->i32);
+   /* fall through */
case 4: /* as v4i32 */
-   vdata = LLVMGetUndef(LLVMVectorType(ctx->i32, 
util_next_power_of_two(num_comps)));
-   for (int j = 0; j < num_comps; j++) {
-   vdata = LLVMBuildInsertElement(ctx->ac.builder, vdata, 
out[j],
-  LLVMConstInt(ctx->i32, 
j, 0), "");
-   }
+   vdata = ac_build_gather_values(&ctx->ac, out, 
util_next_power_of_two(num_comps));
break;
}
 
ac_build_buffer_store_dword(&ctx->ac, so_buffers[buf_idx],
vdata, num_comps,
so_write_offsets[buf_idx],
ctx->i32_0,
stream_out->dst_offset * 4, 1, 1, true, 
false);
 }
 
diff --git a/src/gallium/drivers/radeonsi/si_shader_tgsi_mem.c 
b/src/

Re: [Mesa-dev] [PATCH 9/9] ac, radeonsi: use ac_build_gather_values more

2018-08-21 Thread Samuel Pitoiset

Patches 3-9 are:

Reviewed-by: Samuel Pitoiset 

On 8/21/18 5:23 AM, Marek Olšák wrote:

From: Marek Olšák 

---
  src/amd/common/ac_nir_to_llvm.c   | 14 +++
  src/gallium/drivers/radeonsi/si_shader.c  |  8 +++---
  .../drivers/radeonsi/si_shader_tgsi_mem.c | 25 +++
  .../drivers/radeonsi/si_shader_tgsi_setup.c   | 17 -
  4 files changed, 20 insertions(+), 44 deletions(-)

diff --git a/src/amd/common/ac_nir_to_llvm.c b/src/amd/common/ac_nir_to_llvm.c
index 537ac33c044..700e48e14b7 100644
--- a/src/amd/common/ac_nir_to_llvm.c
+++ b/src/amd/common/ac_nir_to_llvm.c
@@ -479,35 +479,30 @@ static LLVMValueRef emit_pack_half_2x16(struct 
ac_llvm_context *ctx,
comp[1] = LLVMBuildExtractElement(ctx->builder, src0, ctx->i32_1, "");
  
  	return LLVMBuildBitCast(ctx->builder, ac_build_cvt_pkrtz_f16(ctx, comp),

ctx->i32, "");
  }
  
  static LLVMValueRef emit_unpack_half_2x16(struct ac_llvm_context *ctx,

  LLVMValueRef src0)
  {
LLVMValueRef const16 = LLVMConstInt(ctx->i32, 16, false);
-   LLVMValueRef temps[2], result, val;
+   LLVMValueRef temps[2], val;
int i;
  
  	for (i = 0; i < 2; i++) {

val = i == 1 ? LLVMBuildLShr(ctx->builder, src0, const16, "") : 
src0;
val = LLVMBuildTrunc(ctx->builder, val, ctx->i16, "");
val = LLVMBuildBitCast(ctx->builder, val, ctx->f16, "");
temps[i] = LLVMBuildFPExt(ctx->builder, val, ctx->f32, "");
}
-
-   result = LLVMBuildInsertElement(ctx->builder, LLVMGetUndef(ctx->v2f32), 
temps[0],
-   ctx->i32_0, "");
-   result = LLVMBuildInsertElement(ctx->builder, result, temps[1],
-   ctx->i32_1, "");
-   return result;
+   return ac_build_gather_values(ctx, temps, 2);
  }
  
  static LLVMValueRef emit_ddxy(struct ac_nir_context *ctx,

  nir_op op,
  LLVMValueRef src0)
  {
unsigned mask;
int idx;
LLVMValueRef result;
  
@@ -997,24 +992,21 @@ static void visit_alu(struct ac_nir_context *ctx, const nir_alu_instr *instr)

LLVMValueRef tmp = LLVMBuildBitCast(ctx->ac.builder, src[0],
ctx->ac.v2i32,
"");
result = LLVMBuildExtractElement(ctx->ac.builder, tmp,
 ctx->ac.i32_1, "");
break;
}
  
  	case nir_op_pack_64_2x32_split: {

LLVMValueRef tmp = LLVMGetUndef(ctx->ac.v2i32);
-   tmp = LLVMBuildInsertElement(ctx->ac.builder, tmp,
-src[0], ctx->ac.i32_0, "");
-   tmp = LLVMBuildInsertElement(ctx->ac.builder, tmp,
-src[1], ctx->ac.i32_1, "");
+   tmp = ac_build_gather_values(&ctx->ac, src, 2);
result = LLVMBuildBitCast(ctx->ac.builder, tmp, ctx->ac.i64, 
"");
break;
}
  
  	case nir_op_cube_face_coord: {

src[0] = ac_to_float(&ctx->ac, src[0]);
LLVMValueRef results[2];
LLVMValueRef in[3];
for (unsigned chan = 0; chan < 3; chan++)
in[chan] = ac_llvm_extract_elem(&ctx->ac, src[0], chan);
diff --git a/src/gallium/drivers/radeonsi/si_shader.c 
b/src/gallium/drivers/radeonsi/si_shader.c
index 66fe5fad218..cfd99b61601 100644
--- a/src/gallium/drivers/radeonsi/si_shader.c
+++ b/src/gallium/drivers/radeonsi/si_shader.c
@@ -2717,26 +2717,24 @@ static void emit_streamout_output(struct 
si_shader_context *ctx,
  
  	/* Pack the output. */

LLVMValueRef vdata = NULL;
  
  	switch (num_comps) {

case 1: /* as i32 */
vdata = out[0];
break;
case 2: /* as v2i32 */
case 3: /* as v4i32 (aligned to 4) */
+   out[3] = LLVMGetUndef(ctx->i32);
+   /* fall through */
case 4: /* as v4i32 */
-   vdata = LLVMGetUndef(LLVMVectorType(ctx->i32, 
util_next_power_of_two(num_comps)));
-   for (int j = 0; j < num_comps; j++) {
-   vdata = LLVMBuildInsertElement(ctx->ac.builder, vdata, 
out[j],
-  LLVMConstInt(ctx->i32, j, 0), 
"");
-   }
+   vdata = ac_build_gather_values(&ctx->ac, out, 
util_next_power_of_two(num_comps));
break;
}
  
  	ac_build_buffer_store_dword(&ctx->ac, so_buffers[buf_idx],

vdata, num_comps,
so_write_offsets[buf_idx],
ctx->i32_0,
stream_out->dst_offset * 4, 1, 1, true,