Re: [Mesa-dev] [PATCH 1/1] r600: Enable FMA on chips that support it

2016-06-15 Thread Glenn Kennard

On Wed, 15 Jun 2016 20:13:13 +0200, Jan Vesely  wrote:


Signed-off-by: Jan Vesely 
---
Untested (I don't have the required hw)

 src/gallium/drivers/r600/r600_pipe.c   | 5 -
 src/gallium/drivers/r600/r600_shader.c | 2 +-
 2 files changed, 5 insertions(+), 2 deletions(-)

diff --git a/src/gallium/drivers/r600/r600_pipe.c 
b/src/gallium/drivers/r600/r600_pipe.c
index a49b00f..49c3e1d 100644
--- a/src/gallium/drivers/r600/r600_pipe.c
+++ b/src/gallium/drivers/r600/r600_pipe.c
@@ -548,7 +548,6 @@ static int r600_get_shader_param(struct pipe_screen* 
pscreen, unsigned shader, e
return 0;
case PIPE_SHADER_CAP_TGSI_DROUND_SUPPORTED:
case PIPE_SHADER_CAP_TGSI_DFRACEXP_DLDEXP_SUPPORTED:
-   case PIPE_SHADER_CAP_TGSI_FMA_SUPPORTED:
case PIPE_SHADER_CAP_MAX_SHADER_BUFFERS:
case PIPE_SHADER_CAP_MAX_SHADER_IMAGES:
return 0;
@@ -558,6 +557,10 @@ static int r600_get_shader_param(struct pipe_screen* 
pscreen, unsigned shader, e
 *https://bugs.freedesktop.org/show_bug.cgi?id=86720
 */
return 255;
+   case PIPE_SHADER_CAP_TGSI_FMA_SUPPORTED:
+   // Enable on CYPRESS(EG) and CAYMAN(NI)
+   return rscreen->b.family == CHIP_CYPRESS ||
+  rscreen->b.family == CHIP_CAYMAN;
}
return 0;
 }
diff --git a/src/gallium/drivers/r600/r600_shader.c 
b/src/gallium/drivers/r600/r600_shader.c
index 101f666..35019e3 100644
--- a/src/gallium/drivers/r600/r600_shader.c
+++ b/src/gallium/drivers/r600/r600_shader.c
@@ -8917,7 +8917,7 @@ static const struct r600_shader_tgsi_instruction 
r600_shader_tgsi_instruction[]
[TGSI_OPCODE_MAD]   = { ALU_OP3_MULADD, tgsi_op3},
[TGSI_OPCODE_SUB]   = { ALU_OP2_ADD, tgsi_op2},
[TGSI_OPCODE_LRP]   = { ALU_OP0_NOP, tgsi_lrp},
-   [TGSI_OPCODE_FMA]   = { ALU_OP0_NOP, tgsi_unsupported},
+   [TGSI_OPCODE_FMA]   = { ALU_OP3_FMA, tgsi_op3},
[TGSI_OPCODE_SQRT]  = { ALU_OP1_SQRT_IEEE, 
tgsi_trans_srcx_replicate},
[TGSI_OPCODE_DP2A]  = { ALU_OP0_NOP, tgsi_unsupported},
[22]= { ALU_OP0_NOP, tgsi_unsupported},


You probably meant to add the opcode to the eg_shader_tgsi_instruction and 
cm_shader_tgsi_instruction opcode tables rather than the R600/R700 one?


I'll also note in passing that FMA on CYPRESS/HEMLOCK has an issue rate of 
4/cycle vs MULADD 5/cycle since FMA cannot be issued in the 't' slot,
may or may not affect performance depending on if the GLSL front end decides to 
use fma for mul+add operations. On Cayman/Aruba they are the same rate.


/Glenn
___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev


[Mesa-dev] [PATCH 7/9] r600g/sb: Support scratch ops

2017-03-05 Thread Glenn Kennard
Signed-off-by: Glenn Kennard 
---
 src/gallium/drivers/r600/sb/sb_bc.h   | 11 ++
 src/gallium/drivers/r600/sb/sb_bc_builder.cpp | 46 -
 src/gallium/drivers/r600/sb/sb_bc_decoder.cpp | 49 ++-
 src/gallium/drivers/r600/sb/sb_bc_dump.cpp| 15 
 src/gallium/drivers/r600/sb/sb_bc_fmt_def.inc | 36 
 5 files changed, 155 insertions(+), 2 deletions(-)

diff --git a/src/gallium/drivers/r600/sb/sb_bc.h 
b/src/gallium/drivers/r600/sb/sb_bc.h
index 2c662ac..74c8699 100644
--- a/src/gallium/drivers/r600/sb/sb_bc.h
+++ b/src/gallium/drivers/r600/sb/sb_bc.h
@@ -580,6 +580,15 @@ struct bc_fetch {
unsigned mega_fetch:1;
 
unsigned src2_gpr:7; /* for GDS */
+
+   /* for MEM ops */
+   unsigned elem_size:2;
+   unsigned uncached:1;
+   unsigned indexed:1;
+   unsigned burst_count:4;
+   unsigned array_base:13;
+   unsigned array_size:12;
+
void set_op(unsigned op) { this->op = op; op_ptr = r600_isa_fetch(op); }
 };
 
@@ -747,6 +756,7 @@ private:
 
int decode_fetch_vtx(unsigned &i, bc_fetch &bc);
int decode_fetch_gds(unsigned &i, bc_fetch &bc);
+   int decode_fetch_mem(unsigned &i, bc_fetch &bc);
 };
 
 // bytecode format definition
@@ -966,6 +976,7 @@ private:
int build_fetch_clause(cf_node *n);
int build_fetch_tex(fetch_node *n);
int build_fetch_vtx(fetch_node *n);
+   int build_fetch_mem(fetch_node* n);
 };
 
 } // namespace r600_sb
diff --git a/src/gallium/drivers/r600/sb/sb_bc_builder.cpp 
b/src/gallium/drivers/r600/sb/sb_bc_builder.cpp
index b0df3d9..678844c 100644
--- a/src/gallium/drivers/r600/sb/sb_bc_builder.cpp
+++ b/src/gallium/drivers/r600/sb/sb_bc_builder.cpp
@@ -129,7 +129,9 @@ int bc_builder::build_fetch_clause(cf_node* n) {
I != E; ++I) {
fetch_node *f = static_cast(*I);
 
-   if (f->bc.op_ptr->flags & FF_VTX)
+   if (f->bc.op_ptr->flags & FF_MEM)
+   build_fetch_mem(f);
+   else if (f->bc.op_ptr->flags & FF_VTX)
build_fetch_vtx(f);
else
build_fetch_tex(f);
@@ -657,4 +659,46 @@ int bc_builder::build_fetch_vtx(fetch_node* n) {
return 0;
 }
 
+int bc_builder::build_fetch_mem(fetch_node* n) {
+   const bc_fetch &bc = n->bc;
+   const fetch_op_info *fop = bc.op_ptr;
+
+   assert(fop->flags & FF_MEM);
+
+   bb << MEM_RD_WORD0_R7EGCM()
+   .MEM_INST(2)
+   .ELEM_SIZE(bc.elem_size)
+   .FETCH_WHOLE_QUAD(bc.fetch_whole_quad)
+   .MEM_OP(0)
+   .UNCACHED(bc.uncached)
+   .INDEXED(bc.indexed)
+   .SRC_SEL_Y(bc.src_sel[1])
+   .SRC_GPR(bc.src_gpr)
+   .SRC_REL(bc.src_rel)
+   .SRC_SEL_X(bc.src_sel[0])
+   .BURST_COUNT(bc.burst_count)
+   .LDS_REQ(bc.lds_req)
+   .COALESCED_READ(bc.coalesced_read);
+
+   bb << MEM_RD_WORD1_R7EGCM()
+   .DST_GPR(bc.dst_gpr)
+   .DST_REL(bc.dst_rel)
+   .DST_SEL_X(bc.dst_sel[0])
+   .DST_SEL_Y(bc.dst_sel[1])
+   .DST_SEL_Z(bc.dst_sel[2])
+   .DST_SEL_W(bc.dst_sel[3])
+   .DATA_FORMAT(bc.data_format)
+   .NUM_FORMAT_ALL(bc.num_format_all)
+   .FORMAT_COMP_ALL(bc.format_comp_all)
+   .SRF_MODE_ALL(bc.srf_mode_all);
+
+   bb << MEM_RD_WORD2_R7EGCM()
+   .ARRAY_BASE(bc.array_base)
+   .ENDIAN_SWAP(bc.endian_swap)
+   .ARR_SIZE(bc.array_size);
+
+   bb << 0;
+   return 0;
+}
+
 }
diff --git a/src/gallium/drivers/r600/sb/sb_bc_decoder.cpp 
b/src/gallium/drivers/r600/sb/sb_bc_decoder.cpp
index 8712abe..1c63c38 100644
--- a/src/gallium/drivers/r600/sb/sb_bc_decoder.cpp
+++ b/src/gallium/drivers/r600/sb/sb_bc_decoder.cpp
@@ -413,7 +413,9 @@ int bc_decoder::decode_fetch(unsigned & i, bc_fetch& bc) {
if (fetch_opcode == 2) { // MEM_INST_MEM
unsigned mem_op = (dw0 >> 8) & 0x7;
unsigned gds_op;
-   if (mem_op == 4) {
+   if (mem_op == 0 || mem_op == 2) {
+   fetch_opcode = mem_op == 0 ? FETCH_OP_READ_SCRATCH : 
FETCH_OP_READ_MEM;
+   } else if (mem_op == 4) {
gds_op = (dw1 >> 9) & 0x1f;
fetch_opcode = FETCH_OP_GDS_ADD + gds_op;
} else if (mem_op == 5)
@@ -422,6 +424,9 @@ int bc_decoder::decode_fetch(unsigned & i, bc_fetch& bc) {
} else
bc.set_op(r600_isa_fetch_by_opcode(ctx.isa, fetch_opcode));
 
+   if (bc.op_ptr->flags & FF_MEM)
+   return decode_fetch_mem(i, bc);
+

[Mesa-dev] r600g: Support spilling temp arrays

2017-03-05 Thread Glenn Kennard
This patch series implements support for spilling temporary arrays on
R6xx/R7xx/Evergreen/NI if hardware GPR limits are exceeded. It opts for a
simple pessimistic scheme of spilling the largest arrays until things fit.

This fixes some subset of issues where "GPR limit exceeded" or "TGSI
translation error" is printed to the console.

Exercises left to reader:
* Test on R600/R700, I suspect R600 in particular might need some additional
  fixups for write masking in tgsi_src().
* Implement support for spilling regular TGSI temps. Most of the
  infrastructure needed is in this patch series so should be straightforward.
  This would fix the remaining GPR limit exceeded issues.


___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev


[Mesa-dev] [PATCH 5/9] r600g: Add pending output function

2017-03-05 Thread Glenn Kennard
Spills have to happen after the VLIW bundle currently
processed, so defer emitting the spill op.

Signed-off-by: Glenn Kennard 
---
 src/gallium/drivers/r600/r600_asm.c | 18 ++
 src/gallium/drivers/r600/r600_asm.h |  4 
 2 files changed, 22 insertions(+)

diff --git a/src/gallium/drivers/r600/r600_asm.c 
b/src/gallium/drivers/r600/r600_asm.c
index 7415543..69bd0d6 100644
--- a/src/gallium/drivers/r600/r600_asm.c
+++ b/src/gallium/drivers/r600/r600_asm.c
@@ -235,6 +235,15 @@ int r600_bytecode_add_output(struct r600_bytecode *bc,
return 0;
 }
 
+int r600_bytecode_add_pending_output(struct r600_bytecode *bc,
+   const struct r600_bytecode_output *output)
+{
+   assert(bc->n_pending_outputs + 1 < ARRAY_SIZE(bc->pending_outputs));
+   bc->pending_outputs[bc->n_pending_outputs++] = *output;
+
+   return 0;
+}
+
 /* alu instructions that can ony exits once per group */
 static int is_alu_once_inst(struct r600_bytecode *bc, struct r600_bytecode_alu 
*alu)
 {
@@ -1304,6 +1313,15 @@ int r600_bytecode_add_alu_type(struct r600_bytecode *bc,
if (nalu->dst.rel && bc->r6xx_nop_after_rel_dst)
insert_nop_r6xx(bc);
 
+   /* Might need to insert spill write ops after current clause */
+   if (nalu->last && bc->n_pending_outputs) {
+   while (bc->n_pending_outputs) {
+   r = r600_bytecode_add_output(bc, 
&bc->pending_outputs[--bc->n_pending_outputs]);
+   if (r)
+   return r;
+   }
+   }
+
return 0;
 }
 
diff --git a/src/gallium/drivers/r600/r600_asm.h 
b/src/gallium/drivers/r600/r600_asm.h
index 87a7c3a..df46db7 100644
--- a/src/gallium/drivers/r600/r600_asm.h
+++ b/src/gallium/drivers/r600/r600_asm.h
@@ -261,6 +261,8 @@ struct r600_bytecode {
unsignedindex_reg[2]; /* indexing register CF_INDEX_[01] */
unsigneddebug_id;
struct r600_isa* isa;
+   struct r600_bytecode_output pending_outputs[5];
+   int n_pending_outputs;
 };
 
 /* eg_asm.c */
@@ -285,6 +287,8 @@ int r600_bytecode_add_gds(struct r600_bytecode *bc,
const struct r600_bytecode_gds *gds);
 int r600_bytecode_add_output(struct r600_bytecode *bc,
const struct r600_bytecode_output *output);
+int r600_bytecode_add_pending_output(struct r600_bytecode *bc,
+   const struct r600_bytecode_output *output);
 int r600_bytecode_build(struct r600_bytecode *bc);
 int r600_bytecode_add_cf(struct r600_bytecode *bc);
 int r600_bytecode_add_cfinst(struct r600_bytecode *bc,
-- 
2.7.4

___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev


[Mesa-dev] [PATCH 2/9] r600g: Add instruction encoding defines for MEM_RD

2017-03-05 Thread Glenn Kennard
Signed-off-by: Glenn Kennard 
---
 src/gallium/drivers/r600/r700_sq.h | 30 ++
 1 file changed, 30 insertions(+)

diff --git a/src/gallium/drivers/r600/r700_sq.h 
b/src/gallium/drivers/r600/r700_sq.h
index d881012..81e0e7a 100644
--- a/src/gallium/drivers/r600/r700_sq.h
+++ b/src/gallium/drivers/r600/r700_sq.h
@@ -543,4 +543,34 @@
 #define   G_SQ_TEX_WORD2_SRC_SEL_W(x)(((x) >> 
29) & 0x7)
 #define   C_SQ_TEX_WORD2_SRC_SEL_W   0x1FFF
 
+#define P_SQ_MEM_RD_WORD0
+#define   S_SQ_MEM_RD_WORD0_MEM_INST(x)  (((x) & 
0x1F) << 0)
+#define   S_SQ_MEM_RD_WORD0_ELEM_SIZE(x) (((x) & 
0x3) << 5)
+#define   S_SQ_MEM_RD_WORD0_FETCH_WHOLE_QUAD(x)  (((x) & 
0x1) << 7)
+#define   S_SQ_MEM_RD_WORD0_MEM_OP(x)(((x) & 
0x7) << 8)
+#define   S_SQ_MEM_RD_WORD0_UNCACHED(x)  (((x) & 
0x1) << 11)
+#define   S_SQ_MEM_RD_WORD0_INDEXED(x)   (((x) & 
0x1) << 12)
+#define   S_SQ_MEM_RD_WORD0_SRC_SEL_Y(x) (((x) & 
0x3) << 13)
+#define   S_SQ_MEM_RD_WORD0_SRC_GPR(x)   (((x) & 
0x7F) << 16)
+#define   S_SQ_MEM_RD_WORD0_SRC_REL(x)   (((x) & 
0x1) << 23)
+#define   S_SQ_MEM_RD_WORD0_SRC_SEL_X(x) (((x) & 
0x3) << 24)
+#define   S_SQ_MEM_RD_WORD0_BURST_COUNT(x)   (((x) & 
0xF) << 26)
+#define   S_SQ_MEM_RD_WORD0_LDS_REQ(x)   (((x) & 
0x1) << 30)
+#define   S_SQ_MEM_RD_WORD0_COALESCED_READ(x)(((x) & 
0x1) << 31)
+#define P_SQ_MEM_RD_WORD1
+#define   S_SQ_MEM_RD_WORD1_DST_GPR(x)   (((x) & 
0x7f) << 0)
+#define   S_SQ_MEM_RD_WORD1_DST_REL(x)   (((x) & 
0x1) << 7)
+#define   S_SQ_MEM_RD_WORD1_DST_SEL_X(x) (((x) & 
0x7) << 9)
+#define   S_SQ_MEM_RD_WORD1_DST_SEL_Y(x) (((x) & 
0x7) << 12)
+#define   S_SQ_MEM_RD_WORD1_DST_SEL_Z(x) (((x) & 
0x7) << 15)
+#define   S_SQ_MEM_RD_WORD1_DST_SEL_W(x) (((x) & 
0x7) << 18)
+#define   S_SQ_MEM_RD_WORD1_DATA_FORMAT(x)   (((x) & 
0x3F) << 22)
+#define   S_SQ_MEM_RD_WORD1_NUM_FORMAT_ALL(x)(((x) & 
0x3) << 28)
+#define   S_SQ_MEM_RD_WORD1_FORMAT_COMP_ALL(x)   (((x) & 
0x1) << 30)
+#define   S_SQ_MEM_RD_WORD1_SRF_MODE_ALL(x)  (((x) & 
0x1) << 31)
+#define P_SQ_MEM_RD_WORD2
+#define   S_SQ_MEM_RD_WORD2_ARRAY_BASE(x)(((x) & 
0x1FFF) << 0)
+#define   S_SQ_MEM_RD_WORD2_ENDIAN_SWAP(x)   (((x) & 
0x3) << 16)
+#define   S_SQ_MEM_RD_WORD2_ARRAY_SIZE(x)(((x) & 
0xFFF) << 20)
+
 #endif
-- 
2.7.4

___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev


[Mesa-dev] [PATCH 3/9] r600g: Add defines for per-shader engine settings

2017-03-05 Thread Glenn Kennard
Signed-off-by: Glenn Kennard 
---
 src/gallium/drivers/r600/r600d.h | 6 ++
 1 file changed, 6 insertions(+)

diff --git a/src/gallium/drivers/r600/r600d.h b/src/gallium/drivers/r600/r600d.h
index 9155076..0d04708 100644
--- a/src/gallium/drivers/r600/r600d.h
+++ b/src/gallium/drivers/r600/r600d.h
@@ -3777,6 +3777,12 @@
 #define SQ_TEX_INST_SAMPLE_C_G_LB  0x1E
 #define SQ_TEX_INST_SAMPLE_C_G_LZ  0x1F
 
+#define EG_0802C_GRBM_GFX_INDEX0x802C
+#define   S_0802C_INSTANCE_INDEX(x)  (((x) 
& 0x) << 0)
+#define   S_0802C_SE_INDEX(x)(((x) 
& 0x3fff) << 16)
+#define   S_0802C_INSTANCE_BROADCAST_WRITES(x)   (((x) & 0x1) << 30)
+#define   S_0802C_SE_BROADCAST_WRITES(x) (((x) & 0x1) 
<< 31)
+
 #define CM_R_028AA8_IA_MULTI_VGT_PARAM0x028AA8
 #define   S_028AA8_PRIMGROUP_SIZE(x)   (((unsigned)(x) & 
0x) << 0)
 #define   G_028AA8_PRIMGROUP_SIZE(x)   (((x) >> 0) & 0x)
-- 
2.7.4

___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev


[Mesa-dev] [PATCH 1/9] r600g: Add scratch ring register defines

2017-03-05 Thread Glenn Kennard
Signed-off-by: Glenn Kennard 
---
 src/gallium/drivers/r600/evergreend.h | 14 ++
 src/gallium/drivers/r600/r600d.h  |  8 ++--
 2 files changed, 20 insertions(+), 2 deletions(-)

diff --git a/src/gallium/drivers/r600/evergreend.h 
b/src/gallium/drivers/r600/evergreend.h
index 40ba7c1..2fbb540 100644
--- a/src/gallium/drivers/r600/evergreend.h
+++ b/src/gallium/drivers/r600/evergreend.h
@@ -2021,10 +2021,24 @@
 #define R_0288EC_SQ_LDS_ALLOC_PS 0x000288EC
 #define R_028900_SQ_ESGS_RING_ITEMSIZE   0x00028900
 #define R_028904_SQ_GSVS_RING_ITEMSIZE   0x00028904
+#define R_008C50_SQ_ESTMP_RING_BASE  0x8C50
 #define R_028908_SQ_ESTMP_RING_ITEMSIZE  0x00028908
+#define R_008C54_SQ_ESTMP_RING_SIZE  0x8C54
+#define R_008C58_SQ_GSTMP_RING_BASE  0x8C58
 #define R_02890C_SQ_GSTMP_RING_ITEMSIZE  0x0002890C
+#define R_008C5C_SQ_GSTMP_RING_SIZE  0x8C5C
+#define R_008C60_SQ_VSTMP_RING_BASE  0x8C60
 #define R_028910_SQ_VSTMP_RING_ITEMSIZE  0x00028910
+#define R_008C64_SQ_VSTMP_RING_SIZE  0x8C64
+#define R_008C68_SQ_PSTMP_RING_BASE  0x8C68
 #define R_028914_SQ_PSTMP_RING_ITEMSIZE  0x00028914
+#define R_008C6C_SQ_PSTMP_RING_SIZE  0x8C6C
+#define R_008E10_SQ_LSTMP_RING_BASE  0x8E10
+#define R_028830_SQ_LSTMP_RING_ITEMSIZE  0x00028830
+#define R_008E14_SQ_LSTMP_RING_SIZE  0x8E14
+#define R_008E18_SQ_HSTMP_RING_BASE  0x8E18
+#define R_028834_SQ_HSTMP_RING_ITEMSIZE  0x00028834
+#define R_008E1C_SQ_HSTMP_RING_SIZE  0x8E1C
 #define R_02891C_SQ_GS_VERT_ITEMSIZE 0x0002891C
 #define R_028920_SQ_GS_VERT_ITEMSIZE_1   0x00028920
 #define R_028924_SQ_GS_VERT_ITEMSIZE_2   0x00028924
diff --git a/src/gallium/drivers/r600/r600d.h b/src/gallium/drivers/r600/r600d.h
index 75d64c1..9155076 100644
--- a/src/gallium/drivers/r600/r600d.h
+++ b/src/gallium/drivers/r600/r600d.h
@@ -219,8 +219,12 @@
 #define R_008C4C_SQ_GSVS_RING_SIZE   0x008C4C
 #define R_008C50_SQ_ESTMP_RING_BASE  0x008C50
 #define R_008C54_SQ_ESTMP_RING_SIZE  0x008C54
-#define R_008C50_SQ_GSTMP_RING_BASE  0x008C58
-#define R_008C54_SQ_GSTMP_RING_SIZE  0x008C5C
+#define R_008C58_SQ_GSTMP_RING_BASE  0x008C58
+#define R_008C5C_SQ_GSTMP_RING_SIZE  0x008C5C
+#define R_008C68_SQ_PSTMP_RING_BASE  0x008C68
+#define R_008C6C_SQ_PSTMP_RING_SIZE  0x008C6C
+#define R_008C60_SQ_VSTMP_RING_BASE  0x008C60
+#define R_008C64_SQ_VSTMP_RING_SIZE  0x008C64
 
 #define R_0088C8_VGT_GS_PER_ES   0x0088C8
 #define R_0088CC_VGT_ES_PER_GS   0x0088CC
-- 
2.7.4

___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev


[Mesa-dev] [PATCH 4/9] r600g: Support emitting scratch ops

2017-03-05 Thread Glenn Kennard
Signed-off-by: Glenn Kennard 
---
 src/gallium/drivers/r600/eg_asm.c   |  3 ++-
 src/gallium/drivers/r600/r600_asm.c | 25 +++-
 src/gallium/drivers/r600/r600_asm.h | 15 ++
 src/gallium/drivers/r600/r700_asm.c | 39 +
 4 files changed, 80 insertions(+), 2 deletions(-)

diff --git a/src/gallium/drivers/r600/eg_asm.c 
b/src/gallium/drivers/r600/eg_asm.c
index 46683c1..fa2e1d4 100644
--- a/src/gallium/drivers/r600/eg_asm.c
+++ b/src/gallium/drivers/r600/eg_asm.c
@@ -104,7 +104,8 @@ int eg_bytecode_cf_build(struct r600_bytecode *bc, struct 
r600_bytecode_cf *cf)

S_SQ_CF_ALLOC_EXPORT_WORD1_BARRIER(cf->barrier) |

S_SQ_CF_ALLOC_EXPORT_WORD1_CF_INST(opcode) |

S_SQ_CF_ALLOC_EXPORT_WORD1_BUF_COMP_MASK(cf->output.comp_mask) |
-   
S_SQ_CF_ALLOC_EXPORT_WORD1_BUF_ARRAY_SIZE(cf->output.array_size);
+   
S_SQ_CF_ALLOC_EXPORT_WORD1_BUF_ARRAY_SIZE(cf->output.array_size) |
+   
S_SQ_CF_ALLOC_EXPORT_WORD1_MARK(cf->output.mark);
if (bc->chip_class == EVERGREEN) /* no EOP on cayman */
bc->bytecode[id] |= 
S_SQ_CF_ALLOC_EXPORT_WORD1_END_OF_PROGRAM(cf->end_of_program);
id++;
diff --git a/src/gallium/drivers/r600/r600_asm.c 
b/src/gallium/drivers/r600/r600_asm.c
index f85993d..7415543 100644
--- a/src/gallium/drivers/r600/r600_asm.c
+++ b/src/gallium/drivers/r600/r600_asm.c
@@ -1491,6 +1491,9 @@ int cm_bytecode_add_cf_end(struct r600_bytecode *bc)
 /* common to all 3 families */
 static int r600_bytecode_vtx_build(struct r600_bytecode *bc, struct 
r600_bytecode_vtx *vtx, unsigned id)
 {
+   if (r600_isa_fetch(vtx->op)->flags & FF_MEM)
+   return r700_bytecode_fetch_mem_build(bc, vtx, id);
+
bc->bytecode[id] = S_SQ_VTX_WORD0_BUFFER_ID(vtx->buffer_id) |
S_SQ_VTX_WORD0_FETCH_TYPE(vtx->fetch_type) |
S_SQ_VTX_WORD0_SRC_GPR(vtx->src_gpr) |
@@ -2127,7 +2130,8 @@ void r600_bytecode_disasm(struct r600_bytecode *bc)
o += print_swizzle(7);
}
 
-   if (cf->output.type == 
V_SQ_CF_ALLOC_EXPORT_WORD0_SQ_EXPORT_WRITE_IND)
+   if (cf->output.type == 
V_SQ_CF_ALLOC_EXPORT_WORD0_SQ_EXPORT_WRITE_IND ||
+   cf->output.type == 3 
/*V_SQ_CF_ALLOC_EXPORT_WORD0_SQ_EXPORT_WRITE_IND_ACK */)
o += fprintf(stderr, " R%d", 
cf->output.index_gpr);
 
o += print_indent(o, 67);
@@ -2139,6 +2143,10 @@ void r600_bytecode_disasm(struct r600_bytecode *bc)
fprintf(stderr, "NO_BARRIER ");
if (cf->end_of_program)
fprintf(stderr, "EOP ");
+
+   if (cf->output.mark)
+   fprintf(stderr, "MARK ");
+
fprintf(stderr, "\n");
} else {
fprintf(stderr, "%04d %08X %08X  %s ", id, 
bc->bytecode[id],
@@ -2270,6 +2278,8 @@ void r600_bytecode_disasm(struct r600_bytecode *bc)
 
o += fprintf(stderr, ", R%d.", vtx->src_gpr);
o += print_swizzle(vtx->src_sel_x);
+   if (r600_isa_fetch(vtx->op)->flags & FF_MEM)
+   o += print_swizzle(vtx->src_sel_y);
 
if (vtx->offset)
fprintf(stderr, " +%db", vtx->offset);
@@ -2286,6 +2296,19 @@ void r600_bytecode_disasm(struct r600_bytecode *bc)
if (bc->chip_class >= EVERGREEN && 
vtx->buffer_index_mode)
fprintf(stderr, "SQ_%s ", 
index_mode[vtx->buffer_index_mode]);
 
+   if (r600_isa_fetch(vtx->op)->flags & FF_MEM) {
+   if (vtx->uncached)
+   fprintf(stderr, "UNCACHED ");
+   if (vtx->indexed)
+   fprintf(stderr, "INDEXED:%d ", 
vtx->indexed);
+
+   fprintf(stderr, "ELEM_SIZE:%d ", 
vtx->elem_size);
+   if (vtx->burst_count)
+   fprintf(stderr, "BURST_COUNT:%d ", 
vtx->burst_count);

[Mesa-dev] [PATCH 8/9] r600g/sb: Add dependency tracking for scratch ops

2017-03-05 Thread Glenn Kennard
Signed-off-by: Glenn Kennard 
---
 src/gallium/drivers/r600/r600_shader.h |  1 +
 src/gallium/drivers/r600/sb/sb_bc_finalize.cpp |  2 +-
 src/gallium/drivers/r600/sb/sb_bc_parser.cpp   | 12 
 src/gallium/drivers/r600/sb/sb_core.cpp|  3 ++-
 src/gallium/drivers/r600/sb/sb_ir.h|  6 +-
 src/gallium/drivers/r600/sb/sb_ra_init.cpp |  2 +-
 src/gallium/drivers/r600/sb/sb_sched.cpp   |  2 +-
 src/gallium/drivers/r600/sb/sb_valtable.cpp|  1 +
 8 files changed, 24 insertions(+), 5 deletions(-)

diff --git a/src/gallium/drivers/r600/r600_shader.h 
b/src/gallium/drivers/r600/r600_shader.h
index e94230f..3c35d48 100644
--- a/src/gallium/drivers/r600/r600_shader.h
+++ b/src/gallium/drivers/r600/r600_shader.h
@@ -67,6 +67,7 @@ struct r600_shader {
boolean uses_kill;
boolean fs_write_all;
boolean two_side;
+   boolean needs_scratch_space;
/* Number of color outputs in the TGSI shader,
 * sometimes it could be higher than nr_cbufs (bug?).
 * Also with writes_all property on eg+ it will be set to max CB number 
*/
diff --git a/src/gallium/drivers/r600/sb/sb_bc_finalize.cpp 
b/src/gallium/drivers/r600/sb/sb_bc_finalize.cpp
index 82826a9..5d74794 100644
--- a/src/gallium/drivers/r600/sb/sb_bc_finalize.cpp
+++ b/src/gallium/drivers/r600/sb/sb_bc_finalize.cpp
@@ -293,7 +293,7 @@ void bc_finalizer::finalize_alu_group(alu_group_node* g, 
node *prev_node) {
value *d = n->dst.empty() ? NULL : n->dst[0];
 
if (d && d->is_special_reg()) {
-   assert((n->bc.op_ptr->flags & AF_MOVA) || 
d->is_geometry_emit());
+   assert((n->bc.op_ptr->flags & AF_MOVA) || 
d->is_geometry_emit() || d->is_scratch());
d = NULL;
}
 
diff --git a/src/gallium/drivers/r600/sb/sb_bc_parser.cpp 
b/src/gallium/drivers/r600/sb/sb_bc_parser.cpp
index ae92a76..9c52342 100644
--- a/src/gallium/drivers/r600/sb/sb_bc_parser.cpp
+++ b/src/gallium/drivers/r600/sb/sb_bc_parser.cpp
@@ -667,6 +667,11 @@ int bc_parser::prepare_fetch_clause(cf_node *cf) {

n->src.push_back(get_cf_index_value(n->bc.resource_index_mode == 
V_SQ_CF_INDEX_1));
}
}
+
+   if (n->bc.op == FETCH_OP_READ_SCRATCH) {
+   n->src.push_back(sh->get_special_value(SV_SCRATCH));
+   n->dst.push_back(sh->get_special_value(SV_SCRATCH));
+   }
}
 
return 0;
@@ -797,6 +802,10 @@ int bc_parser::prepare_ir() {
c->flags |= NF_DONT_KILL;
}
}
+   else if (c->bc.op == CF_OP_MEM_SCRATCH) {
+   
c->src.push_back(sh->get_special_value(SV_SCRATCH));
+   
c->dst.push_back(sh->get_special_value(SV_SCRATCH));
+   }
 
if (!burst_count--)
break;
@@ -831,6 +840,9 @@ int bc_parser::prepare_ir() {

c->src.push_back(sh->get_special_value(SV_GEOMETRY_EMIT));

c->dst.push_back(sh->get_special_value(SV_GEOMETRY_EMIT));
}
+   } else if (c->bc.op == CF_OP_WAIT_ACK) {
+   c->src.push_back(sh->get_special_value(SV_SCRATCH));
+   c->dst.push_back(sh->get_special_value(SV_SCRATCH));
}
}
 
diff --git a/src/gallium/drivers/r600/sb/sb_core.cpp 
b/src/gallium/drivers/r600/sb/sb_core.cpp
index afea818..283c84f 100644
--- a/src/gallium/drivers/r600/sb/sb_core.cpp
+++ b/src/gallium/drivers/r600/sb/sb_core.cpp
@@ -191,7 +191,8 @@ int r600_sb_bytecode_process(struct r600_context *rctx,
 
// if conversion breaks the dependency tracking between CF_EMIT ops 
when it removes
// the phi nodes for SV_GEOMETRY_EMIT. Just disable it for GS
-   if (sh->target != TARGET_GS)
+   // Same for for shaders spilling to scratch memory using SV_SCRATCH
+   if (sh->target != TARGET_GS || pshader->needs_scratch_space)
SB_RUN_PASS(if_conversion,  1);
 
// if_conversion breaks info about uses, but next pass (peephole)
diff --git a/src/gallium/drivers/r600/sb/sb_ir.h 
b/src/gallium/drivers/r600/sb/sb_ir.h
index 74c0549..141bf5f 100644
--- a/src/gallium/drivers/r600/sb/sb_ir.h
+++ b/src/gallium/drivers/r600/sb/sb_ir.h
@@ -42,7 +42,8 @@ enum special_regs {
SV_EXEC_MASK,
SV_AR_INDEX,
SV_VALID_MASK,
-   SV_GEOMETRY_EMIT
+   SV_GEOMETRY_EMIT,
+   SV_SCRAT

[Mesa-dev] [PATCH 9/9] r600g: Implement spilling of temp arrays

2017-03-05 Thread Glenn Kennard
Pessimistically spills arrays if GPR limit is exceeded.

Signed-off-by: Glenn Kennard 
---
 src/gallium/drivers/r600/r600_shader.c | 308 ++---
 1 file changed, 285 insertions(+), 23 deletions(-)

diff --git a/src/gallium/drivers/r600/r600_shader.c 
b/src/gallium/drivers/r600/r600_shader.c
index 8cb3f8b..f716dae 100644
--- a/src/gallium/drivers/r600/r600_shader.c
+++ b/src/gallium/drivers/r600/r600_shader.c
@@ -165,7 +165,7 @@ int r600_pipe_shader_create(struct pipe_context *ctx,
bool dump = r600_can_dump_shader(&rctx->screen->b,
 tgsi_get_processor_type(sel->tokens));
unsigned use_sb = !(rctx->screen->b.debug_flags & DBG_NO_SB);
-   unsigned sb_disasm = use_sb || (rctx->screen->b.debug_flags & 
DBG_SB_DISASM);
+   unsigned sb_disasm;
unsigned export_shader;
 
shader->shader.bc.isa = rctx->isa;
@@ -203,6 +203,7 @@ int r600_pipe_shader_create(struct pipe_context *ctx,
}
}
 
+   sb_disasm = use_sb || (rctx->screen->b.debug_flags & DBG_SB_DISASM);
if (dump && !sb_disasm) {
fprintf(stderr, 
"--\n");
r600_bytecode_disasm(&shader->shader.bc);
@@ -317,6 +318,9 @@ struct eg_interp {
 
 struct r600_shader_ctx {
struct tgsi_shader_info info;
+   struct tgsi_array_info  *array_infos;
+   /* flag for each tgsi temp array if its been spilled or not */
+   bool*spilled_arrays;
struct tgsi_parse_context   parse;
const struct tgsi_token *tokens;
unsignedtype;
@@ -350,6 +354,7 @@ struct r600_shader_ctx {
unsignedenabled_stream_buffers_mask;
unsignedtess_input_info; /* temp with 
tess input offsets */
unsignedtess_output_info; /* temp with 
tess input offsets */
+   unsignedneed_wait_ack;
 };
 
 struct r600_shader_tgsi_instruction {
@@ -850,6 +855,96 @@ static int tgsi_barrier(struct r600_shader_ctx *ctx)
return 0;
 }
 
+static void choose_spill_arrays(struct r600_shader_ctx *ctx, int *regno, 
unsigned *scratch_space_needed)
+{
+   // pick largest array and spill it, repeat until the number of temps is 
under limit or we run out of arrays
+   unsigned n = ctx->info.array_max[TGSI_FILE_TEMPORARY];
+   unsigned narrays_left = n;
+   bool *spilled = ctx->spilled_arrays; // assumed calloc:ed
+
+   *scratch_space_needed = 0;
+   while (*regno > 124 && narrays_left) {
+   unsigned i;
+   unsigned largest = 0;
+   unsigned largest_index = 0;
+
+   for (i = 0; i < n; i++) {
+   unsigned size = ctx->array_infos[i].range.Last - 
ctx->array_infos[i].range.First + 1;
+   if (!spilled[i] && size > largest) {
+   largest = size;
+   largest_index = i;
+   }
+   }
+
+   spilled[largest_index] = true;
+   *regno -= largest;
+   *scratch_space_needed += largest;
+
+   narrays_left --;
+   }
+
+   if (narrays_left == 0) {
+   ctx->info.indirect_files &= ~(1 << TGSI_FILE_TEMPORARY);
+   }
+}
+
+/* take spilled temp arrays into account when translating tgsi register
+   indexes into r600 gprs if spilled is false, or scratch array offset if
+   spilled is true */
+static int map_tgsi_reg_index_to_r600_gpr(struct r600_shader_ctx *ctx, 
unsigned tgsi_reg_index, bool *spilled) {
+   unsigned i;
+   unsigned spilled_size = 0;
+
+   for (i = 0; i < ctx->info.array_max[TGSI_FILE_TEMPORARY]; i++) {
+   if (tgsi_reg_index >= ctx->array_infos[i].range.First && 
tgsi_reg_index <= ctx->array_infos[i].range.Last) {
+   if (ctx->spilled_arrays[i]) {
+   /* vec4 index into spilled scratch memory */
+   *spilled = true;
+
+   return tgsi_reg_index - 
ctx->array_infos[i].range.First + spilled_size;
+   }
+   else {
+   /* regular GPR array */
+   *spilled = false;
+
+   return tgsi_reg_index - spilled_size + 
ctx->file_offset[TGSI_FILE_TEMPORARY];
+   }
+   }
+
+   if (ctx->spilled_arrays[i]) {
+   spilled_size += ctx->array_infos[i].range.Las

[Mesa-dev] [PATCH 6/9] r600g: Implement scratch buffer state management

2017-03-05 Thread Glenn Kennard
Signed-off-by: Glenn Kennard 
---
 src/gallium/drivers/r600/evergreen_state.c   |  24 +++
 src/gallium/drivers/r600/r600_pipe.c |   3 +
 src/gallium/drivers/r600/r600_pipe.h |  14 
 src/gallium/drivers/r600/r600_shader.h   |   1 +
 src/gallium/drivers/r600/r600_state_common.c | 104 +++
 5 files changed, 146 insertions(+)

diff --git a/src/gallium/drivers/r600/evergreen_state.c 
b/src/gallium/drivers/r600/evergreen_state.c
index c5dd9f7..8e984b9 100644
--- a/src/gallium/drivers/r600/evergreen_state.c
+++ b/src/gallium/drivers/r600/evergreen_state.c
@@ -1976,6 +1976,30 @@ static void evergreen_emit_tcs_constant_buffers(struct 
r600_context *rctx, struc
0);
 }
 
+void evergreen_setup_scratch_buffers(struct r600_context *rctx) {
+   static const struct {
+   unsigned ring_base;
+   unsigned item_size;
+   unsigned ring_size;
+   } regs[EG_NUM_HW_STAGES] = {
+   [R600_HW_STAGE_PS] = { R_008C68_SQ_PSTMP_RING_BASE, 
R_028914_SQ_PSTMP_RING_ITEMSIZE, R_008C6C_SQ_PSTMP_RING_SIZE },
+   [R600_HW_STAGE_VS] = { R_008C60_SQ_VSTMP_RING_BASE, 
R_028910_SQ_VSTMP_RING_ITEMSIZE, R_008C64_SQ_VSTMP_RING_SIZE },
+   [R600_HW_STAGE_GS] = { R_008C58_SQ_GSTMP_RING_BASE, 
R_02890C_SQ_GSTMP_RING_ITEMSIZE, R_008C5C_SQ_GSTMP_RING_SIZE },
+   [R600_HW_STAGE_ES] = { R_008C50_SQ_ESTMP_RING_BASE, 
R_028908_SQ_ESTMP_RING_ITEMSIZE, R_008C54_SQ_ESTMP_RING_SIZE },
+   [EG_HW_STAGE_LS] = { R_008E10_SQ_LSTMP_RING_BASE, 
R_028830_SQ_LSTMP_RING_ITEMSIZE, R_008E14_SQ_LSTMP_RING_SIZE },
+   [EG_HW_STAGE_HS] = { R_008E18_SQ_HSTMP_RING_BASE, 
R_028834_SQ_HSTMP_RING_ITEMSIZE, R_008E1C_SQ_HSTMP_RING_SIZE }
+   };
+
+   for (unsigned i = 0; i < EG_NUM_HW_STAGES; i++) {
+   struct r600_pipe_shader *stage = 
rctx->hw_shader_stages[i].shader;
+
+   if (stage && unlikely(stage->scratch_space_needed)) {
+   r600_setup_scratch_area_for_shader(rctx, stage,
+   &rctx->scratch_buffers[i], regs[i].ring_base, 
regs[i].item_size, regs[i].ring_size);
+   }
+   }
+}
+
 static void evergreen_emit_sampler_views(struct r600_context *rctx,
 struct r600_samplerview_state *state,
 unsigned resource_id_base, unsigned 
pkt_flags)
diff --git a/src/gallium/drivers/r600/r600_pipe.c 
b/src/gallium/drivers/r600/r600_pipe.c
index 1803c26..fc03990 100644
--- a/src/gallium/drivers/r600/r600_pipe.c
+++ b/src/gallium/drivers/r600/r600_pipe.c
@@ -71,6 +71,9 @@ static void r600_destroy_context(struct pipe_context *context)
 
r600_sb_context_destroy(rctx->sb_context);
 
+   for (sh = 0; sh < (rctx->b.chip_class < EVERGREEN ? R600_NUM_HW_STAGES 
: EG_NUM_HW_STAGES); sh++) {
+   r600_resource_reference(&rctx->scratch_buffers[sh].buffer, 
NULL);
+   }
r600_resource_reference(&rctx->dummy_cmask, NULL);
r600_resource_reference(&rctx->dummy_fmask, NULL);
 
diff --git a/src/gallium/drivers/r600/r600_pipe.h 
b/src/gallium/drivers/r600/r600_pipe.h
index cf8eba3..c8cf87f 100644
--- a/src/gallium/drivers/r600/r600_pipe.h
+++ b/src/gallium/drivers/r600/r600_pipe.h
@@ -413,6 +413,13 @@ struct r600_shader_state {
struct r600_pipe_shader *shader;
 };
 
+/* Used to spill shader temps */
+struct r600_scratch_buffer {
+   struct r600_resource*buffer;
+   unsignedsize;
+   unsigneditem_size;
+};
+
 struct r600_context {
struct r600_common_context  b;
struct r600_screen  *screen;
@@ -522,6 +529,8 @@ struct r600_context {
struct r600_pipe_shader_selector *last_tcs;
unsigned last_num_tcs_input_cp;
unsigned lds_alloc;
+
+   struct r600_scratch_buffer scratch_buffers[MAX2(R600_NUM_HW_STAGES, 
EG_NUM_HW_STAGES)];
 };
 
 static inline void r600_emit_command_buffer(struct radeon_winsys_cs *cs,
@@ -621,6 +630,7 @@ void evergreen_init_color_surface_rat(struct r600_context 
*rctx,
struct r600_surface *surf);
 void evergreen_update_db_shader_control(struct r600_context * rctx);
 bool evergreen_adjust_gprs(struct r600_context *rctx);
+void evergreen_setup_scratch_buffers(struct r600_context *rctx);
 /* r600_blit.c */
 void r600_init_blit_functions(struct r600_context *rctx);
 void r600_decompress_depth_textures(struct r600_context *rctx,
@@ -665,6 +675,7 @@ boolean r600_is_format_supported(struct pipe_screen *screen,
 unsigned sample_count,
 unsigned usage);
 void r600_update_db_shader_control(struct r600_context * rctx);
+void r600_setup_scratch_buffers(struct r600_context *rctx);
 
 

Re: [Mesa-dev] [PATCH] r600/sb: remove superfluos assert

2017-09-12 Thread Glenn Kennard

On Tue, 12 Sep 2017 19:25:18 +0200, Vadim Girlin  wrote:


On 09/12/2017 12:49 PM, Gert Wollny wrote:

Am Dienstag, den 12.09.2017, 09:56 +0300 schrieb Vadim Girlin:

On 09/11/2017 07:09 PM, Emil Velikov wrote:



Anyway, if num_arrays is 0 there, I suspect it can be a result of
some other issue. At the very least it looks like a potential
performance problem, because in that case we assume all shader
registers can be  accessed with indirect addressing and it can limit
the optimizations significantly. So it might make sense to figure out
why it's zero in the first place, in theory it shouldn't happen.
Maybe something is wrong with the indirect_files bits?files


The shader that's failing is this (i.e. no arrays, and indirect access
only to SV).


Is the tested feature really supported by r600g? AFAICS the indirect
index value is unused in the shader code.

Anyway, at first glance it looks like we don't need indirect addressing
for GPRs in this case, so the outer "if" around that assert probably
should handle this case too and skip the assert. I'm not 100% sure though.



FRAG
DCL SV[0], SAMPLEMASK
DCL OUT[0], COLOR
DCL CONST[0][0]
DCL TEMP[0..1], LOCAL
DCL ADDR[0]
IMM[0] FLT32 {1., 0., 0., 0.}
IMM[1] INT32 {1, 0, 0, 0}
   0: MOV TEMP[0], IMM[0].xyyx
   1: UARL ADDR[0].x, CONST[0][0].
   2: USEQ TEMP[1].x, SV[ADDR[0].x]., IMM[1].
   3: UIF TEMP[1].
   4:   MOV TEMP[0].xy, IMM[0].yxyy
   5: ENDIF
   6: MOV OUT[0], TEMP[0]
   7: END

= SHADER #12 ==
PS/BARTS/EVERGREEN =
= 36 dw = 8 gprs = 1 stack
=
  4005 a418 ALU_PUSH_BEFORE 7 @10 KC0[CB0:0-15]
0010  00f9 00400c90 1 x: MOVR2.x,  1.0
0012  04f8 20400c90   y: MOVR2.y,  0
0014  04f8 40400c90   z: MOVR2.z,  0
0016  00f9 60400c90   w: MOVR2.w,  1.0
0018  8080 00800c90   t: MOVR4.x,  KC0[0].x
0020  801f4800 00601d10 2 x: SETE_INT   R3.x,  R0.z, 1
0022  801f00fe 00e0229c 3 MP  x: PRED_SETNE_INT R7.x,  PV.x, 0
0002  0003 8281 JUMP @6 POP:1
0004  000c a804 ALU_POP_AFTER 2 @24
0024  04f8 00400c90 4 x: MOVR2.x,  0
0026  80f9 20400c90   y: MOVR2.y,  1.0
0006  000e a00c ALU 4 @28
0028  0002 00200c90 5 x: MOVR1.x,  R2.x
0030  0402 20200c90   y: MOVR1.y,  R2.y
0032  0802 40200c90   z: MOVR1.z,  R2.z
0034  8c02 60200c90   w: MOVR1.w,  R2.w
0008  c0008000 95200688 EXPORT_DONEPIXEL 0 R1.xyzw  EOP
= SHADER_END






Hi Gert,

Vadim is correct, the fix is to extend the check in the if case above to also 
exclude TGSI_FILE_SYSTEM_VALUE, and keep the assert in place. ie:

 if (pshader->indirect_files & ~((1 << TGSI_FILE_CONSTANT) | (1 << TGSI_FILE_SAMPLER) 
| (1 << TGSI_FILE_SYSTEM_VALUE))) {


Although gl_SampleMaskIn is declared as an array in GLSL, its effectively a 32 
bit mask on all hardware supported by mesa so the array indexing is simply 
ignored. Thanks for looking in to this!


/Glenn
___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev


Re: [Mesa-dev] [PATCH] r600: refactor out some compressed resource state code.

2017-06-05 Thread Glenn Kennard

On Mon, 05 Jun 2017 05:35:02 +0200, Dave Airlie  wrote:


From: Dave Airlie 

This just takes this out to a separate function as it will
get more complex with images.
---
 src/gallium/drivers/r600/r600_state_common.c | 52 +++-
 1 file changed, 28 insertions(+), 24 deletions(-)

diff --git a/src/gallium/drivers/r600/r600_state_common.c 
b/src/gallium/drivers/r600/r600_state_common.c
index 3b24f36..8ace779 100644
--- a/src/gallium/drivers/r600/r600_state_common.c
+++ b/src/gallium/drivers/r600/r600_state_common.c
@@ -1400,6 +1400,32 @@ static void r600_generate_fixed_func_tcs(struct 
r600_context *rctx)
ureg_create_shader_and_destroy(ureg, &rctx->b.b);
 }
+static void r600_update_compressed_resource_state(struct r600_context *rctx)
+{
+   unsigned i;
+   unsigned counter;
+
+   counter = p_atomic_read(&rctx->screen->b.compressed_colortex_counter);
+   if (counter != rctx->b.last_compressed_colortex_counter) {
+   rctx->b.last_compressed_colortex_counter = counter;
+
+   for (i = 0; i < PIPE_SHADER_TYPES; ++i) {
+   
r600_update_compressed_colortex_mask(&rctx->samplers[i].views);
+   }
+   }
+
+   /* Decompress textures if needed. */
+   for (i = 0; i < PIPE_SHADER_TYPES; i++) {
+   struct r600_samplerview_state *views = &rctx->samplers[i].views;
+   if (views->compressed_depthtex_mask) {
+   r600_decompress_depth_textures(rctx, views);
+   }
+   if (views->compressed_colortex_mask) {
+   r600_decompress_color_textures(rctx, views);
+   }
+   }
+}
+
 #define SELECT_SHADER_OR_FAIL(x) do {  \
r600_shader_select(ctx, rctx->x##_shader, &x##_dirty);   \
if (unlikely(!rctx->x##_shader->current)) \
@@ -1440,30 +1466,8 @@ static bool r600_update_derived_state(struct 
r600_context *rctx)
bool need_buf_const;
struct r600_pipe_shader *clip_so_current = NULL;
-   if (!rctx->blitter->running) {
-   unsigned i;
-   unsigned counter;
-
-   counter = 
p_atomic_read(&rctx->screen->b.compressed_colortex_counter);
-   if (counter != rctx->b.last_compressed_colortex_counter) {
-   rctx->b.last_compressed_colortex_counter = counter;
-
-   for (i = 0; i < PIPE_SHADER_TYPES; ++i) {
-   
r600_update_compressed_colortex_mask(&rctx->samplers[i].views);
-   }
-   }
-
-   /* Decompress textures if needed. */
-   for (i = 0; i < PIPE_SHADER_TYPES; i++) {
-   struct r600_samplerview_state *views = 
&rctx->samplers[i].views;
-   if (views->compressed_depthtex_mask) {
-   r600_decompress_depth_textures(rctx, views);
-   }
-   if (views->compressed_colortex_mask) {
-   r600_decompress_color_textures(rctx, views);
-   }
-   }
-   }
+   if (!rctx->blitter->running)
+   r600_update_compressed_resource_state(rctx);
SELECT_SHADER_OR_FAIL(ps);



Patch series is Reviewed-by: Glenn Kennard 
___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev


Re: [Mesa-dev] [PATCH v3] r600g: Implement GL_ARB_draw_indirect for EG/CM

2015-02-06 Thread Glenn Kennard

On Fri, 06 Feb 2015 17:08:46 +0100, Marek Olšák  wrote:


Please bump the size of vgt_state for the SQ_VTX_BASE_VTX_LOC
register. It's set by r600_init_atom in r600_state.c and
evergreen_state.c

Please bump R600_MAX_DRAW_CS_DWORDS. It's an upper bound of how many
dwords draw_vbo can emit.



Thanks, will fix.


I don't understand what get_vfetch_type is good for. Could you please
explain it in the code? Also, I don't understand what constant buffer
fetches have to do with VertexID.



Will add some more blurb to get_vfetch_type, in particular i can point at  
the appropriate parts of gpu documentation.


As for the interaction of buffer fetches and VertexID, i'll attempt to  
explain:


The way R_03CFF0_SQ_VTX_BASE_VTX_LOC is delivered to the vertex shader is  
basically, it isn't. Instead what the
hardware does is poke the 64 unique values (one per wavefront thread, "64  
state" in the documentation) into the fetch units into a hidden state  
hardware register which the shader cannot read, at least not in any way  
that i've been able to find.


Setting FETCH_MODE=SQ_VTX_FETCH_VERTEX_DATA (=0) on a VFETCH instruction  
then tells the fetch unit to add the BASE_VTX and start instance offsets  
before reading the value - see  
r600_asm.c:r600_create_vertex_fetch_shader() which open codes 0 as the  
fetch mode for vertex fetches.


This creates a problem for GLSL gl_VertexId, since the shader cannot apply  
the offset. Lets look at the shader for the  
tests/spec/arb_draw_indirect/vertexid.c piglit test case:


"#version 140\n"
"\n"
"in vec4 piglit_vertex;\n"
"out vec3 c;\n"
"\n"
"const vec3 colors[] = vec3[](\n"
"  vec3(1, 0, 0),\n"
"  vec3(1, 0, 0),\n"
"  vec3(1, 0, 0),\n"
"  vec3(1, 0, 0),\n"
"\n"
...
"  vec3(1, 0, 1),\n"
"  vec3(1, 0, 1),\n"
"  vec3(1, 0, 1),\n"
"  vec3(1, 0, 1)\n"
");\n"
"void main() {\n"
"   c = colors[gl_VertexID];\n"
"  gl_Position = piglit_vertex;\n"
"}\n"

Colors here is a constant array, and base offset needs to be applied to  
look up the correct color value - the GL 4.5 spec is quite clear that it  
should be applied to gl_VertexID. Since the hardware offers no way to add  
base instance to gl_VertexID, i do the next best thing and enable offset  
on the array fetch operation instead.


The detection logic is quite hacky, since really it needs to look if the  
array expression depends in any way on gl_VertexId which requires looking  
at def use chains, which aren't available in r600_asm.c - can probably  
have SB compute the bit instead, but that sort of violates its "don't  
change program meaning" principle, not to mention different behavior with  
SB disabled.


All the actual shaders that i've found using gl_VertexId in conjunction  
with indirect draws only use one constant array. I figure partial support  
at least approximately matches what the binary driver supports, which  
doesn't produce the correct value for gl_VertexId either for indirect  
draws in various cases - in particular if the shader tries to compare  
gl_VertexID against some other expression you get an incorrect value.



The driver does something totally different for direct draws, it adds the  
base offset and start offset manually and feeds that to the hardware, with  
BASE_VTX always set to 0, which allows it to work for all cases. Not an  
option for indirect draws if you want any sort of performance out of them.



So to sum up, gl_VertexID i don't see the hardware being fully capable of  
following the spec in conjunction with indirect drawing for all cases, at  
least not without some very slow fallbacks reading back the draw  
parameters to the cpu which is useless. One option would be to just drop  
the attempt at supporting gl_VertexID from this patch if it's deemed too  
hacky.

___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/mesa-dev


[Mesa-dev] [PATCH] r600g/sb: Don't fold integer value into float CND

2015-02-12 Thread Glenn Kennard
Don't try to do float comparisons on signed integer values,
some of them look like NaNs.

Fixes fs-temp-array-mat3-index-col-row-rd.shader_test regression
caused by 0d4272cd8e7c45157140dc8e283707714a8238d5.

Signed-off-by: Glenn Kennard 
---
 src/gallium/drivers/r600/sb/sb_peephole.cpp | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/src/gallium/drivers/r600/sb/sb_peephole.cpp 
b/src/gallium/drivers/r600/sb/sb_peephole.cpp
index d4b9755..4161d59 100644
--- a/src/gallium/drivers/r600/sb/sb_peephole.cpp
+++ b/src/gallium/drivers/r600/sb/sb_peephole.cpp
@@ -250,7 +250,7 @@ void peephole::optimize_CNDcc_op(alu_node* a) {
return;
 
// TODO we can handle some cases for uint comparison
-   if (dcmp_type == AF_UINT_CMP)
+   if (dcmp_type == AF_UINT_CMP || dcmp_type == AF_INT_CMP)
return;
 
if (dcc == AF_CC_NE) {
-- 
1.9.1

___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/mesa-dev


Re: [Mesa-dev] [PATCH] r600g/sb: treat undefined values like constants

2015-02-17 Thread Glenn Kennard

On Wed, 18 Feb 2015 01:17:32 +0100, Dave Airlie  wrote:


From: Dave Airlie 

When we schedule an instructions with undefined value, we
eventually will use 0, which is a constant, however sb wasn't
taking this into account and creating ops with illegal scalar
swizzles.

this replaces my fix for op3 in t slots.

Signed-off-by: Dave Airlie 
---
 src/gallium/drivers/r600/sb/sb_sched.cpp | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/src/gallium/drivers/r600/sb/sb_sched.cpp  
b/src/gallium/drivers/r600/sb/sb_sched.cpp

index 4fbdc4f..63e7464 100644
--- a/src/gallium/drivers/r600/sb/sb_sched.cpp
+++ b/src/gallium/drivers/r600/sb/sb_sched.cpp
@@ -266,7 +266,7 @@ bool rp_gpr_tracker::try_reserve(alu_node* n) {
for (i = 0; i < nsrc; ++i) {
value *v = n->src[i];
-   if (v->is_readonly()) {
+   if (v->is_readonly() || v->is_undef()) {
const_count++;
if (trans && const_count == 3)
break;
@@ -295,7 +295,7 @@ bool rp_gpr_tracker::try_reserve(alu_node* n) {
if (need_unreserve && i--) {
do {
value *v = n->src[i];
-   if (!v->is_readonly()) {
+   if (!v->is_readonly() && !v->is_undef()) {
if (i == 1 && opt)
continue;
unreserve(bs_cycle(trans, bs, i), n->bc.src[i].sel,


Reviewed-by: Glenn Kennard 
___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/mesa-dev


Re: [Mesa-dev] [PATCH 2/2] r600g: add doubles support for CAYMAN

2015-02-19 Thread Glenn Kennard
_64, tgsi_op2_64_single_dest},
+   {TGSI_OPCODE_DSEQ,  0, ALU_OP2_SETE_64, tgsi_op2_64_single_dest},
+   {TGSI_OPCODE_DSNE,  0, ALU_OP2_SETNE_64, tgsi_op2_64_single_dest},
+   {TGSI_OPCODE_DRCP,  0, ALU_OP2_RECIP_64, cayman_emit_double_instr},
+   {TGSI_OPCODE_DSQRT, 0, ALU_OP2_SQRT_64, cayman_emit_double_instr},
+   {TGSI_OPCODE_DMAD,  0, ALU_OP3_FMA_64, tgsi_op3_64},
+   {TGSI_OPCODE_DFRAC, 0, ALU_OP1_FRACT_64, tgsi_op2_64},
+   {TGSI_OPCODE_DLDEXP,0, ALU_OP2_LDEXP_64, tgsi_op2_64},
+   {TGSI_OPCODE_DFRACEXP,  0, ALU_OP1_FREXP_64, tgsi_dfracexp},
+   {TGSI_OPCODE_D2I,   0, ALU_OP0_NOP, tgsi_unsupported},
+   {TGSI_OPCODE_I2D,   0, ALU_OP0_NOP, tgsi_unsupported},
+   {TGSI_OPCODE_D2U,   0, ALU_OP0_NOP, tgsi_unsupported},
+   {TGSI_OPCODE_U2D,   0, ALU_OP0_NOP, tgsi_unsupported},
+   {TGSI_OPCODE_DRSQ,  0, ALU_OP2_RECIPSQRT_64, 
cayman_emit_double_instr},
{TGSI_OPCODE_LAST,  0, ALU_OP0_NOP, tgsi_unsupported},
 };
diff --git a/src/gallium/drivers/r600/r600_shader.h  
b/src/gallium/drivers/r600/r600_shader.h

index b2559e9..a10004c 100644
--- a/src/gallium/drivers/r600/r600_shader.h
+++ b/src/gallium/drivers/r600/r600_shader.h
@@ -87,6 +87,8 @@ struct r600_shader {
unsignedvs_as_gs_a;
unsignedps_prim_id_input;
struct r600_shader_array * arrays;
+
+   boolean uses_doubles;
 };
struct r600_shader_key {


With above nits fixed,
Reviewed-by: Glenn Kennard 
___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/mesa-dev


Re: [Mesa-dev] [PATCH 4/9] radeonsi: implement gl_SampleMaskIn

2015-03-02 Thread Glenn Kennard

On Mon, 02 Mar 2015 12:54:18 +0100, Marek Olšák  wrote:


From: Marek Olšák 

---
 docs/GL3.txt | 2 +-
 src/gallium/drivers/radeonsi/si_shader.c | 4 
 2 files changed, 5 insertions(+), 1 deletion(-)

diff --git a/docs/GL3.txt b/docs/GL3.txt
index 43bbf85..0487cdf 100644
--- a/docs/GL3.txt
+++ b/docs/GL3.txt
@@ -106,7 +106,7 @@ GL 4.0, GLSL 4.00:
   - Enhanced textureGather DONE (r600,  
radeonsi)

   - Geometry shader instancing DONE (r600)
   - Geometry shader multiple streams   DONE ()
-  - Enhanced per-sample shadingDONE (r600)
+  - Enhanced per-sample shadingDONE (r600,  
radeonsi)

   - Interpolation functionsDONE (r600)
   - New overload resolution rules  DONE
   GL_ARB_gpu_shader_fp64   DONE (nvc0,  
softpipe)
diff --git a/src/gallium/drivers/radeonsi/si_shader.c  
b/src/gallium/drivers/radeonsi/si_shader.c

index 085a350..8001ea2 100644
--- a/src/gallium/drivers/radeonsi/si_shader.c
+++ b/src/gallium/drivers/radeonsi/si_shader.c
@@ -680,6 +680,10 @@ static void declare_system_value(
break;
}
+   case TGSI_SEMANTIC_SAMPLEMASK:
+   value = LLVMGetParam(radeon_bld->main_fn, 
SI_PARAM_SAMPLE_COVERAGE);
+   break;
+
default:
assert(!"unknown system value");
return;


Patches 4-9 are
Reviewed-by: Glenn Kennard 
___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/mesa-dev


[Mesa-dev] [PATCH] r600g/sb: Enable SB for geometry shaders

2015-03-20 Thread Glenn Kennard
Add SV_GEOMETRY_EMIT special variable type to track the
implicit dependencies between CUT/EMIT_VERTEX/MEM_RING
instructions so GCM/scheduler doesn't reorder them.

Mark emit instructions as unkillable so DCE doesn't eat them.

Signed-off-by: Glenn Kennard 
---
The hangs with SB on geometry shaders were all due to the CUT/EMIT
instructions either being DCE:d or emitted out of order from the
memory ring writes, so the hardware stalled forever waiting for
completed primitives.

Tested only on a Turks so far, but should behave the same across
all R600 generations.

This patch disables the if-conversion pass when running GS shaders,
didn't seem worth the effort to fix that pass up for the marginal
returns.

 src/gallium/drivers/r600/r600_isa.h|  8 
 src/gallium/drivers/r600/r600_shader.c |  8 
 src/gallium/drivers/r600/sb/sb_bc_finalize.cpp |  2 +-
 src/gallium/drivers/r600/sb/sb_bc_parser.cpp   | 25 +
 src/gallium/drivers/r600/sb/sb_core.cpp|  5 -
 src/gallium/drivers/r600/sb/sb_dump.cpp|  4 +++-
 src/gallium/drivers/r600/sb/sb_ir.h|  6 +-
 src/gallium/drivers/r600/sb/sb_ra_init.cpp |  2 +-
 src/gallium/drivers/r600/sb/sb_sched.cpp   |  2 +-
 src/gallium/drivers/r600/sb/sb_valtable.cpp|  1 +
 10 files changed, 49 insertions(+), 14 deletions(-)

diff --git a/src/gallium/drivers/r600/r600_isa.h 
b/src/gallium/drivers/r600/r600_isa.h
index ec3f702..381f06d 100644
--- a/src/gallium/drivers/r600/r600_isa.h
+++ b/src/gallium/drivers/r600/r600_isa.h
@@ -641,7 +641,7 @@ static const struct cf_op_info cf_op_table[] = {
 
{"MEM_SCRATCH",   { 0x24, 0x24, 0x50, 0x50 },  
CF_MEM  },
{"MEM_REDUCT",{ 0x25, 0x25,   -1,   -1 },  
CF_MEM  },
-   {"MEM_RING",  { 0x26, 0x26, 0x52, 0x52 },  
CF_MEM  },
+   {"MEM_RING",  { 0x26, 0x26, 0x52, 0x52 },  
CF_MEM | CF_EMIT },
 
{"EXPORT",{ 0x27, 0x27, 0x53, 0x53 },  
CF_EXP  },
{"EXPORT_DONE",   { 0x28, 0x28, 0x54, 0x54 },  
CF_EXP  },
@@ -649,9 +649,9 @@ static const struct cf_op_info cf_op_table[] = {
{"MEM_EXPORT",{   -1, 0x3A, 0x55, 0x55 },  
CF_MEM  },
{"MEM_RAT",   {   -1,   -1, 0x56, 0x56 },  
CF_MEM | CF_RAT },
{"MEM_RAT_NOCACHE",   {   -1,   -1, 0x57, 0x57 },  
CF_MEM | CF_RAT },
-   {"MEM_RING1", {   -1,   -1, 0x58, 0x58 },  
CF_MEM  },
-   {"MEM_RING2", {   -1,   -1, 0x59, 0x59 },  
CF_MEM  },
-   {"MEM_RING3", {   -1,   -1, 0x5A, 0x5A },  
CF_MEM  },
+   {"MEM_RING1", {   -1,   -1, 0x58, 0x58 },  
CF_MEM | CF_EMIT },
+   {"MEM_RING2", {   -1,   -1, 0x59, 0x59 },  
CF_MEM | CF_EMIT },
+   {"MEM_RING3", {   -1,   -1, 0x5A, 0x5A },  
CF_MEM | CF_EMIT },
{"MEM_MEM_COMBINED",  {   -1,   -1, 0x5B, 0x5B },  
CF_MEM  },
{"MEM_RAT_COMBINED_NOCACHE",  {   -1,   -1, 0x5C, 0x5C },  
CF_MEM | CF_RAT },
{"MEM_RAT_COMBINED",  {   -1,   -1,   -1, 0x5D },  
CF_MEM | CF_RAT }, /* ??? not in cayman isa doc */
diff --git a/src/gallium/drivers/r600/r600_shader.c 
b/src/gallium/drivers/r600/r600_shader.c
index 28b290a..ff2c784 100644
--- a/src/gallium/drivers/r600/r600_shader.c
+++ b/src/gallium/drivers/r600/r600_shader.c
@@ -159,8 +159,6 @@ int r600_pipe_shader_create(struct pipe_context *ctx,
goto error;
}
 
-   /* disable SB for geom shaders - it can't handle the CF_EMIT 
instructions */
-   use_sb &= (shader->shader.processor_type != TGSI_PROCESSOR_GEOMETRY);
/* disable SB for shaders using CF_INDEX_0/1 (sampler/ubo array 
indexing) as it doesn't handle those currently */
use_sb &= !shader->shader.uses_index_registers;
 
@@ -1141,6 +1139,8 @@ static int fetch_gs_input(struct r600_shader_ctx *ctx, 
struct tgsi_full_src_regi
for (i = 0; i < 3; i++) {
treg[i] = r600_get_temp(ctx);
}
+   r600_add_gpr_array(ctx->shader, treg[0], 3, 0x0F);
+
t2 = r600_get_temp(ctx);
for (i = 0; i < 3; i++) {
memset(&alu, 0, sizeof(struct r600_bytecode_alu));
@@ -1935,9 +1935,9 @@ static int r600_shader_from_tgsi(struct r600_context 
*rctx,
ctx.bc->index_reg[1] = ctx.bc->ar_reg + 3;
}
 
+   

Re: [Mesa-dev] [PATCH] r600g/sb: Enable SB for geometry shaders

2015-03-24 Thread Glenn Kennard
On Tue, 24 Mar 2015 17:21:35 +0100, Dieter Nützel   
wrote:



Am 20.03.2015 14:13, schrieb Glenn Kennard:

Add SV_GEOMETRY_EMIT special variable type to track the
implicit dependencies between CUT/EMIT_VERTEX/MEM_RING
instructions so GCM/scheduler doesn't reorder them.
 Mark emit instructions as unkillable so DCE doesn't eat them.
 Signed-off-by: Glenn Kennard 
---
The hangs with SB on geometry shaders were all due to the CUT/EMIT
instructions either being DCE:d or emitted out of order from the
memory ring writes, so the hardware stalled forever waiting for
completed primitives.
 Tested only on a Turks so far, but should behave the same across
all R600 generations.


Hello Glenn,

what tests are preferred?
Starting with a Turks XT here, too and could do some tests on RV730  
(AGP) then.


-Dieter


Just the usual piglit regression testing, at this point it's been tested  
on a Turks XT, and a RV770. A R6xx card and some VLIW4 gpu would complete  
the coverage needed.



/Glenn
___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/mesa-dev


Re: [Mesa-dev] [PATCH] r600g/sb: Enable SB for geometry shaders

2015-03-25 Thread Glenn Kennard

On Wed, 25 Mar 2015 14:26:40 +0100, Marc Dietrich  wrote:


Am Dienstag, 24. März 2015, 20:05:46 schrieb Glenn Kennard:

On Tue, 24 Mar 2015 17:21:35 +0100, Dieter Nützel 

wrote:
> Am 20.03.2015 14:13, schrieb Glenn Kennard:
>> Add SV_GEOMETRY_EMIT special variable type to track the
>> implicit dependencies between CUT/EMIT_VERTEX/MEM_RING
>> instructions so GCM/scheduler doesn't reorder them.
>>
>>  Mark emit instructions as unkillable so DCE doesn't eat them.
>>  Signed-off-by: Glenn Kennard 
>>
>> ---
>> The hangs with SB on geometry shaders were all due to the CUT/EMIT
>> instructions either being DCE:d or emitted out of order from the
>> memory ring writes, so the hardware stalled forever waiting for
>> completed primitives.
>>
>>  Tested only on a Turks so far, but should behave the same across
>>
>> all R600 generations.
>
> Hello Glenn,
>
> what tests are preferred?
> Starting with a Turks XT here, too and could do some tests on RV730
> (AGP) then.
>
> -Dieter

Just the usual piglit regression testing, at this point it's been tested
on a Turks XT, and a RV770. A R6xx card and some VLIW4 gpu would  
complete

the coverage needed.


I would like to, but "piglit run quick" stalls/crashes the gpu (rs880)  
too

often. Maybe you could tell me some special tests to run instead of all.

Marc


-t geometry should be the smallest useful subset. It's likely that most of  
the hangs you get on rs880 (and other r6xx devices) are geometry shader  
related though so that might end up taking as long as a full quick run,  
unfortunately.

___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/mesa-dev


[Mesa-dev] [PATCH] r600g/sb: Update last_cf for loops

2015-03-25 Thread Glenn Kennard
CF_END could end up emitted in the middle of a shader on cayman
when there was a loop at the very end.

Fixes glsl-1.50-geometry-end-primitive and
ext_transform_feedback-geometry-shaders-basic piglit tests.

Signed-off-by: Glenn Kennard 
---
Bug exposed by [PATCH] r600g/sb: Enable SB for geometry shaders

 src/gallium/drivers/r600/sb/sb_bc_finalize.cpp | 8 
 1 file changed, 8 insertions(+)

diff --git a/src/gallium/drivers/r600/sb/sb_bc_finalize.cpp 
b/src/gallium/drivers/r600/sb/sb_bc_finalize.cpp
index 8d0be06..08b7d77 100644
--- a/src/gallium/drivers/r600/sb/sb_bc_finalize.cpp
+++ b/src/gallium/drivers/r600/sb/sb_bc_finalize.cpp
@@ -127,6 +127,14 @@ void bc_finalizer::finalize_loop(region_node* r) {
cf_node *loop_start = sh.create_cf(CF_OP_LOOP_START_DX10);
cf_node *loop_end = sh.create_cf(CF_OP_LOOP_END);
 
+   // Update last_cf, but don't overwrite it if it's outside the current 
loop nest since
+   // it may point to a cf that is later in program order.
+   // The single parent level check is sufficient since finalize_loop() is 
processed in
+   // reverse order from innermost to outermost loop nest level.
+   if (!last_cf || last_cf->get_parent_region() == r) {
+   last_cf = loop_end;
+   }
+
loop_start->jump_after(loop_end);
loop_end->jump_after(loop_start);
 
-- 
1.9.1

___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/mesa-dev


[Mesa-dev] [PATCH 2/2] r600g: Implement sm5 UBO/sampler indexing

2014-10-15 Thread Glenn Kennard
Caveat: Shaders using UBO/sampler indexing will
not be optimized by SB, due to SB not currently
supporting the necessary CF_INDEX_[01] index
registers.

Signed-off-by: Glenn Kennard 
---
 docs/GL3.txt   |  4 +--
 src/gallium/drivers/r600/eg_asm.c  | 52 ---
 src/gallium/drivers/r600/r600_asm.c| 58 +-
 src/gallium/drivers/r600/r600_asm.h|  9 +
 src/gallium/drivers/r600/r600_shader.c | 52 +++
 src/gallium/drivers/r600/r600_shader.h |  2 ++
 src/gallium/drivers/r600/sb/sb_bc_dump.cpp |  8 -
 src/gallium/drivers/r600/sb/sb_sched.h |  2 ++
 8 files changed, 166 insertions(+), 21 deletions(-)

diff --git a/docs/GL3.txt b/docs/GL3.txt
index 5ccfdea..dba36e0 100644
--- a/docs/GL3.txt
+++ b/docs/GL3.txt
@@ -98,8 +98,8 @@ GL 4.0, GLSL 4.00:
   GL_ARB_draw_indirect DONE (i965, nvc0, 
radeonsi, llvmpipe, softpipe)
   GL_ARB_gpu_shader5   DONE (i965, nvc0)
   - 'precise' qualifierDONE
-  - Dynamically uniform sampler array indices  DONE ()
-  - Dynamically uniform UBO array indices  DONE ()
+  - Dynamically uniform sampler array indices  DONE (r600)
+  - Dynamically uniform UBO array indices  DONE (r600)
   - Implicit signed -> unsigned conversionsDONE
   - Fused multiply-add DONE ()
   - Packing/bitfield/conversion functions  DONE (r600)
diff --git a/src/gallium/drivers/r600/eg_asm.c 
b/src/gallium/drivers/r600/eg_asm.c
index acb3040..295cb4d 100644
--- a/src/gallium/drivers/r600/eg_asm.c
+++ b/src/gallium/drivers/r600/eg_asm.c
@@ -43,10 +43,10 @@ int eg_bytecode_cf_build(struct r600_bytecode *bc, struct 
r600_bytecode_cf *cf)
/* prepend ALU_EXTENDED if we need more than 2 kcache 
sets */
if (cf->eg_alu_extended) {
bc->bytecode[id++] =
-   
S_SQ_CF_ALU_WORD0_EXT_KCACHE_BANK_INDEX_MODE0(V_SQ_CF_INDEX_NONE) |
-   
S_SQ_CF_ALU_WORD0_EXT_KCACHE_BANK_INDEX_MODE1(V_SQ_CF_INDEX_NONE) |
-   
S_SQ_CF_ALU_WORD0_EXT_KCACHE_BANK_INDEX_MODE2(V_SQ_CF_INDEX_NONE) |
-   
S_SQ_CF_ALU_WORD0_EXT_KCACHE_BANK_INDEX_MODE3(V_SQ_CF_INDEX_NONE) |
+   
S_SQ_CF_ALU_WORD0_EXT_KCACHE_BANK_INDEX_MODE0(cf->kcache[0].index_mode) |
+   
S_SQ_CF_ALU_WORD0_EXT_KCACHE_BANK_INDEX_MODE1(cf->kcache[1].index_mode) |
+   
S_SQ_CF_ALU_WORD0_EXT_KCACHE_BANK_INDEX_MODE2(cf->kcache[2].index_mode) |
+   
S_SQ_CF_ALU_WORD0_EXT_KCACHE_BANK_INDEX_MODE3(cf->kcache[3].index_mode) |

S_SQ_CF_ALU_WORD0_EXT_KCACHE_BANK2(cf->kcache[2].bank) |

S_SQ_CF_ALU_WORD0_EXT_KCACHE_BANK3(cf->kcache[3].bank) |

S_SQ_CF_ALU_WORD0_EXT_KCACHE_MODE2(cf->kcache[2].mode);
@@ -143,3 +143,47 @@ void eg_bytecode_export_read(struct r600_bytecode *bc,
output->comp_mask = G_SQ_CF_ALLOC_EXPORT_WORD1_BUF_COMP_MASK(word1);
 }
 #endif
+
+int egcm_load_index_reg(struct r600_bytecode *bc, unsigned id, bool 
inside_alu_clause)
+{
+   struct r600_bytecode_alu alu;
+   int r;
+   unsigned type;
+
+   assert(id < 2);
+   assert(bc->chip_class >= EVERGREEN);
+
+   if (bc->index_loaded[id])
+   return 0;
+
+   memset(&alu, 0, sizeof(alu));
+   alu.op = ALU_OP1_MOVA_INT;
+   alu.src[0].sel = bc->index_reg[id];
+   alu.src[0].chan = 0;
+   alu.last = 1;
+   r = r600_bytecode_add_alu(bc, &alu);
+   if (r)
+   return r;
+
+   bc->ar_loaded = 0; /* clobbered */
+
+   memset(&alu, 0, sizeof(alu));
+   alu.op = id == 0 ? ALU_OP0_SET_CF_IDX0 : ALU_OP0_SET_CF_IDX1;
+   alu.last = 1;
+   r = r600_bytecode_add_alu(bc, &alu);
+   if (r)
+   return r;
+
+   /* Must split ALU group as index only applies to following group */
+   if (inside_alu_clause) {
+   type = bc->cf_last->op;
+   if ((r = r600_bytecode_add_cf(bc))) {
+   return r;
+   }
+   bc->cf_last->op = type;
+   }
+
+   bc->index_loaded[id] = 1;
+
+   return 0;
+}
diff --git a/src/gallium/drivers/r600/r600_asm.c 
b/src/gallium/drivers/r600/r600_asm.c
index 8aa69b5..ce3c2d1 100644
--- a/src/gallium/drivers/r600/r600_asm.c
+++ b/src/gallium/drivers/r600/r6

[Mesa-dev] [PATCH 1/2] r600g: Implement sm5 interpolation functions

2014-10-15 Thread Glenn Kennard
Requires evergreen/cayman

Signed-off-by: Glenn Kennard 
---
 docs/GL3.txt |   2 +-
 src/gallium/drivers/r600/r600_shader.c   | 237 ++-
 src/gallium/drivers/r600/r600_state_common.c |   3 +
 3 files changed, 238 insertions(+), 4 deletions(-)

diff --git a/docs/GL3.txt b/docs/GL3.txt
index 07d1d2c..5ccfdea 100644
--- a/docs/GL3.txt
+++ b/docs/GL3.txt
@@ -107,7 +107,7 @@ GL 4.0, GLSL 4.00:
   - Geometry shader instancing DONE (r600)
   - Geometry shader multiple streams   DONE ()
   - Enhanced per-sample shadingDONE (r600)
-  - Interpolation functionsDONE ()
+  - Interpolation functionsDONE (r600)
   - New overload resolution rules  DONE
   GL_ARB_gpu_shader_fp64   started (Dave)
   GL_ARB_sample_shadingDONE (i965, nv50, nvc0, 
r600, radeonsi)
diff --git a/src/gallium/drivers/r600/r600_shader.c 
b/src/gallium/drivers/r600/r600_shader.c
index 9e9a557..08125b7 100644
--- a/src/gallium/drivers/r600/r600_shader.c
+++ b/src/gallium/drivers/r600/r600_shader.c
@@ -761,10 +761,33 @@ static int allocate_system_value_inputs(struct 
r600_shader_ctx *ctx, int gpr_off
return 0;
}
 
+   /* need to scan shader for system values and 
interpolateAtSample/Offset/Centroid */
while (!tgsi_parse_end_of_tokens(&parse)) {
tgsi_parse_token(&parse);
 
-   if (parse.FullToken.Token.Type == TGSI_TOKEN_TYPE_DECLARATION) {
+   if (parse.FullToken.Token.Type == TGSI_TOKEN_TYPE_INSTRUCTION) {
+   const struct tgsi_full_instruction *inst = 
&parse.FullToken.FullInstruction;
+   if (inst->Instruction.Opcode == 
TGSI_OPCODE_INTERP_SAMPLE ||
+   inst->Instruction.Opcode == 
TGSI_OPCODE_INTERP_OFFSET ||
+   inst->Instruction.Opcode == 
TGSI_OPCODE_INTERP_CENTROID)
+   {
+   int interpolate, location, k;
+
+   if (inst->Instruction.Opcode == 
TGSI_OPCODE_INTERP_SAMPLE) {
+   location = TGSI_INTERPOLATE_LOC_CENTER;
+   inputs[1].enabled = true; /* needs 
SAMPLEID */
+   } else if (inst->Instruction.Opcode == 
TGSI_OPCODE_INTERP_OFFSET) {
+   location = TGSI_INTERPOLATE_LOC_CENTER;
+   /* Needs sample positions, currently 
those are always available */
+   } else {
+   location = 
TGSI_INTERPOLATE_LOC_CENTROID;
+   }
+
+   interpolate = 
ctx->info.input_interpolate[inst->Src[0].Register.Index];
+   k = eg_get_interpolator_index(interpolate, 
location);
+   ctx->eg_interpolators[k].enabled = true;
+   }
+   } else if (parse.FullToken.Token.Type == 
TGSI_TOKEN_TYPE_DECLARATION) {
struct tgsi_full_declaration *d = 
&parse.FullToken.FullDeclaration;
if (d->Declaration.File == TGSI_FILE_SYSTEM_VALUE) {
for (k = 0; k < Elements(inputs); k++) {
@@ -812,6 +835,7 @@ static int evergreen_gpr_count(struct r600_shader_ctx *ctx)
 {
int i;
int num_baryc;
+   struct tgsi_parse_context parse;
 
memset(&ctx->eg_interpolators, 0, sizeof(ctx->eg_interpolators));
 
@@ -831,6 +855,39 @@ static int evergreen_gpr_count(struct r600_shader_ctx *ctx)
ctx->eg_interpolators[k].enabled = TRUE;
}
 
+   if (tgsi_parse_init(&parse, ctx->tokens) != TGSI_PARSE_OK) {
+   return 0;
+   }
+
+   /* need to scan shader for system values and 
interpolateAtSample/Offset/Centroid */
+   while (!tgsi_parse_end_of_tokens(&parse)) {
+   tgsi_parse_token(&parse);
+
+   if (parse.FullToken.Token.Type == TGSI_TOKEN_TYPE_INSTRUCTION) {
+   const struct tgsi_full_instruction *inst = 
&parse.FullToken.FullInstruction;
+   if (inst->Instruction.Opcode == 
TGSI_OPCODE_INTERP_SAMPLE ||
+   inst->Instruction.Opcode == 
TGSI_OPCODE_INTERP_OFFSET ||
+   inst->Instruction.Opcode == 
TGSI_OPCODE_INTERP_CENTROID)
+   {
+   int interpolate, location, k;
+
+   if (inst->Instruction.Opcode == 
TGSI_OPCODE_INTERP_SAMPLE) {
+   location = TGSI_I

[Mesa-dev] [PATCH] r600g: Implement GL_ARB_draw_indirect

2014-11-08 Thread Glenn Kennard
Requires evergreen/cayman, and updated radeon kernel module.

Signed-off-by: Glenn Kennard 
---
See also kernel side patch sent to dri-de...@lists.freedesktop.org

 docs/GL3.txt |  4 +-
 docs/relnotes/10.4.html  |  1 +
 src/gallium/drivers/r600/evergreend.h|  7 ++-
 src/gallium/drivers/r600/r600_pipe.c |  6 ++-
 src/gallium/drivers/r600/r600_state_common.c | 80 ++--
 5 files changed, 77 insertions(+), 21 deletions(-)

diff --git a/docs/GL3.txt b/docs/GL3.txt
index 2854431..06c52f9 100644
--- a/docs/GL3.txt
+++ b/docs/GL3.txt
@@ -95,7 +95,7 @@ GL 3.3, GLSL 3.30 --- all DONE: i965, nv50, nvc0, r600, 
radeonsi, llvmpipe, soft
 GL 4.0, GLSL 4.00:
 
   GL_ARB_draw_buffers_blendDONE (i965, nv50, nvc0, 
r600, radeonsi, llvmpipe, softpipe)
-  GL_ARB_draw_indirect DONE (i965, nvc0, 
radeonsi, llvmpipe, softpipe)
+  GL_ARB_draw_indirect DONE (i965, nvc0, r600, 
radeonsi, llvmpipe, softpipe)
   GL_ARB_gpu_shader5   DONE (i965, nvc0)
   - 'precise' qualifierDONE
   - Dynamically uniform sampler array indices  DONE (r600)
@@ -159,7 +159,7 @@ GL 4.3, GLSL 4.30:
   GL_ARB_framebuffer_no_attachmentsnot started
   GL_ARB_internalformat_query2 not started
   GL_ARB_invalidate_subdataDONE (all drivers)
-  GL_ARB_multi_draw_indirect   DONE (i965, nvc0, 
radeonsi, llvmpipe, softpipe)
+  GL_ARB_multi_draw_indirect   DONE (i965, nvc0, r600, 
radeonsi, llvmpipe, softpipe)
   GL_ARB_program_interface_query   not started
   GL_ARB_robust_buffer_access_behavior not started
   GL_ARB_shader_image_size not started
diff --git a/docs/relnotes/10.4.html b/docs/relnotes/10.4.html
index d0fbd3b..9c2a491 100644
--- a/docs/relnotes/10.4.html
+++ b/docs/relnotes/10.4.html
@@ -49,6 +49,7 @@ Note: some of the new features are only available with 
certain drivers.
 GL_ARB_texture_view on nv50, nvc0
 GL_ARB_clip_control on llvmpipe, softpipe, r300, r600, radeonsi
 GL_KHR_context_flush_control on all drivers
+GL_ARB_draw_indirect, GL_ARB_multi_draw_indirect on r600
 
 
 
diff --git a/src/gallium/drivers/r600/evergreend.h 
b/src/gallium/drivers/r600/evergreend.h
index 4989996..b8880c8 100644
--- a/src/gallium/drivers/r600/evergreend.h
+++ b/src/gallium/drivers/r600/evergreend.h
@@ -64,6 +64,8 @@
 #define R600_TEXEL_PITCH_ALIGNMENT_MASK0x7
 
 #define PKT3_NOP   0x10
+#define PKT3_SET_BASE  0x11
+#define PKT3_INDEX_BUFFER_SIZE 0x13
 #define PKT3_DEALLOC_STATE 0x14
 #define PKT3_DISPATCH_DIRECT   0x15
 #define PKT3_DISPATCH_INDIRECT 0x16
@@ -72,12 +74,15 @@
 #define PKT3_REG_RMW   0x21
 #define PKT3_COND_EXEC 0x22
 #define PKT3_PRED_EXEC 0x23
-#define PKT3_START_3D_CMDBUF   0x24
+#define PKT3_DRAW_INDIRECT 0x24
+#define PKT3_DRAW_INDEX_INDIRECT   0x25
+#define PKT3_INDEX_BASE0x26
 #define PKT3_DRAW_INDEX_2  0x27
 #define PKT3_CONTEXT_CONTROL   0x28
 #define PKT3_DRAW_INDEX_IMMD_BE0x29
 #define PKT3_INDEX_TYPE0x2A
 #define PKT3_DRAW_INDEX0x2B
+#define PKT3_DRAW_INDIRECT_MULTI   0x2C
 #define PKT3_DRAW_INDEX_AUTO   0x2D
 #define PKT3_DRAW_INDEX_IMMD   0x2E
 #define PKT3_NUM_INSTANCES 0x2F
diff --git a/src/gallium/drivers/r600/r600_pipe.c 
b/src/gallium/drivers/r600/r600_pipe.c
index 0b571e4..829deaf 100644
--- a/src/gallium/drivers/r600/r600_pipe.c
+++ b/src/gallium/drivers/r600/r600_pipe.c
@@ -313,6 +313,11 @@ static int r600_get_param(struct pipe_screen* pscreen, 
enum pipe_cap param)
return family >= CHIP_CEDAR ? 1 : 0;
case PIPE_CAP_MAX_TEXTURE_GATHER_COMPONENTS:
return family >= CHIP_CEDAR ? 4 : 0;
+   case PIPE_CAP_DRAW_INDIRECT:
+   /* needs kernel command checking support to work */
+   if (family >= CHIP_CEDAR && rscreen->b.info.drm_minor >= 41)
+   return 1;
+   return 0;
 
/* Unsupported features. */
case PIPE_CAP_TGSI_FS_COORD_ORIGIN_LOWER_LEFT:
@@ -322,7 +327,6 @@ static int r600_get_param(struct pipe_screen* pscreen, enum 
pipe_cap param)
case PIPE_CAP_VERTEX_COLOR_CLAMPED:
case PIPE_CAP_USER_VERTEX_BUFFERS:
case PIPE_CAP_TEXTURE_GATHER_OFFSETS:
-   case PIPE_CAP_DRAW_INDIRECT:
case PIP

Re: [Mesa-dev] [PATCH] r600g/cayman: fix integer multiplication output overwrite

2014-11-17 Thread Glenn Kennard

On Tue, 18 Nov 2014 00:56:38 +0100, Dave Airlie  wrote:


From: Dave Airlie 

This fixes  
tests/spec/glsl-1.10/execution/fs-op-assign-mult-ivec2-ivec2-overwrite.shader_test.


Reported-by: ghallberg on irc
Signed-off-by: Dave Airlie 
---
 src/gallium/drivers/r600/r600_shader.c | 23 ++-
 1 file changed, 22 insertions(+), 1 deletion(-)

diff --git a/src/gallium/drivers/r600/r600_shader.c  
b/src/gallium/drivers/r600/r600_shader.c

index aab4215..02efc92 100644
--- a/src/gallium/drivers/r600/r600_shader.c
+++ b/src/gallium/drivers/r600/r600_shader.c
@@ -2729,6 +2729,9 @@ static int cayman_mul_int_instr(struct  
r600_shader_ctx *ctx)

int i, j, k, r;
struct r600_bytecode_alu alu;
int last_slot = (inst->Dst[0].Register.WriteMask & 0x8) ? 4 : 3;
+   int t1 = ctx->temp_reg;
+   int lasti = tgsi_last_instruction(inst->Dst[0].Register.WriteMask);
+
for (k = 0; k < last_slot; k++) {
if (!(inst->Dst[0].Register.WriteMask & (1 << k)))
continue;
@@ -2739,7 +2742,8 @@ static int cayman_mul_int_instr(struct  
r600_shader_ctx *ctx)

for (j = 0; j < inst->Instruction.NumSrcRegs; j++) {
r600_bytecode_src(&alu.src[j], &ctx->src[j], k);
}
-   tgsi_dst(ctx, &inst->Dst[0], i, &alu.dst);
+   alu.dst.sel = t1;
+   alu.dst.chan = i;
alu.dst.write = (i == k);
if (i == 3)
alu.last = 1;
@@ -2748,6 +2752,23 @@ static int cayman_mul_int_instr(struct  
r600_shader_ctx *ctx)

return r;
}
}
+
+   for (i = 0 ; i < last_slot; i++) {
+   if (!(inst->Dst[0].Register.WriteMask & (1 << i)))
+   continue;
+   memset(&alu, 0, sizeof(struct r600_bytecode_alu));
+   alu.op = ALU_OP1_MOV;
+   alu.src[0].sel = t1;
+   alu.src[0].chan = i;
+   tgsi_dst(ctx, &inst->Dst[0], i, &alu.dst);
+   alu.dst.write = 1;
+   if (i == lasti)
+   alu.last = 1;
+   r = r600_bytecode_add_alu(ctx->bc, &alu);
+   if (r)
+   return r;
+   }
+
return 0;
 }



Trivial nit: last_slot is no longer needed and can be removed.

With a bit of luck it will also fix  
https://bugs.freedesktop.org/show_bug.cgi?id=85376


Reviewed-by: Glenn Kennard 
___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/mesa-dev


Re: [Mesa-dev] [PATCH] r600g/cayman: fix texture gather tests

2014-11-17 Thread Glenn Kennard

On Tue, 18 Nov 2014 01:57:13 +0100, Dave Airlie  wrote:


From: Dave Airlie 

It appears on cayman the TG4 outputs were reordered.

This fixes a lot of piglit tests.

Signed-off-by: Dave Airlie 
---
 src/gallium/drivers/r600/r600_shader.c | 15 +++
 1 file changed, 11 insertions(+), 4 deletions(-)

diff --git a/src/gallium/drivers/r600/r600_shader.c  
b/src/gallium/drivers/r600/r600_shader.c

index 4c6ae45..709fcd7 100644
--- a/src/gallium/drivers/r600/r600_shader.c
+++ b/src/gallium/drivers/r600/r600_shader.c
@@ -5763,11 +5763,18 @@ static int tgsi_tex(struct r600_shader_ctx *ctx)
 		int8_t texture_component_select = ctx->literals[4 *  
inst->Src[1].Register.Index + inst->Src[1].Register.SwizzleX];

tex.inst_mod = texture_component_select;
+   if (ctx->bc->chip_class == CAYMAN) {
/* GATHER4 result order is different from TGSI TG4 */
-   tex.dst_sel_x = (inst->Dst[0].Register.WriteMask & 2) ? 1 : 7;
-   tex.dst_sel_y = (inst->Dst[0].Register.WriteMask & 4) ? 2 : 7;
-   tex.dst_sel_z = (inst->Dst[0].Register.WriteMask & 1) ? 0 : 7;
-   tex.dst_sel_w = (inst->Dst[0].Register.WriteMask & 8) ? 3 : 7;
+   tex.dst_sel_x = (inst->Dst[0].Register.WriteMask & 2) ? 
0 : 7;
+   tex.dst_sel_y = (inst->Dst[0].Register.WriteMask & 4) ? 
1 : 7;
+   tex.dst_sel_z = (inst->Dst[0].Register.WriteMask & 1) ? 
2 : 7;
+   tex.dst_sel_w = (inst->Dst[0].Register.WriteMask & 8) ? 
3 : 7;
+   } else {
+   tex.dst_sel_x = (inst->Dst[0].Register.WriteMask & 2) ? 
1 : 7;
+   tex.dst_sel_y = (inst->Dst[0].Register.WriteMask & 4) ? 
2 : 7;
+   tex.dst_sel_z = (inst->Dst[0].Register.WriteMask & 1) ? 
0 : 7;
+   tex.dst_sel_w = (inst->Dst[0].Register.WriteMask & 8) ? 
3 : 7;
+   }
}
else if (inst->Instruction.Opcode == TGSI_OPCODE_LODQ) {
tex.dst_sel_x = (inst->Dst[0].Register.WriteMask & 2) ? 1 : 7;


Gotta permute those tex op bit encodings between hardware generations or  
they go stale...


Reviewed-by: Glenn Kennard 
___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/mesa-dev


Re: [Mesa-dev] [PATCH] r600g/cayman: hande empty vertex shaders

2014-11-17 Thread Glenn Kennard

On Tue, 18 Nov 2014 02:23:51 +0100, Dave Airlie  wrote:


From: Dave Airlie 

Some of the geom shader tests produce an empty vertex shader,
on cayman we'd crash in the finaliser because last_cf was NULL.

cayman doesn't need the NOP workaround, so if the code arrives
here with no last_cf, just emit an END.

fixes crashes in a bunch of piglit geom shader tests.

Signed-off-by: Dave Airlie 
---
 src/gallium/drivers/r600/sb/sb_bc_finalize.cpp | 12 
 1 file changed, 8 insertions(+), 4 deletions(-)

diff --git a/src/gallium/drivers/r600/sb/sb_bc_finalize.cpp  
b/src/gallium/drivers/r600/sb/sb_bc_finalize.cpp

index 5c22f96..f0849ca 100644
--- a/src/gallium/drivers/r600/sb/sb_bc_finalize.cpp
+++ b/src/gallium/drivers/r600/sb/sb_bc_finalize.cpp
@@ -83,14 +83,18 @@ int bc_finalizer::run() {
last_cf = c;
}
-   if (last_cf->bc.op_ptr->flags & CF_ALU) {
+   if (!ctx.is_cayman() && last_cf->bc.op_ptr->flags & CF_ALU) {
last_cf = sh.create_cf(CF_OP_NOP);
sh.root->push_back(last_cf);
}
-   if (ctx.is_cayman())
-   last_cf->insert_after(sh.create_cf(CF_OP_CF_END));
-   else
+   if (ctx.is_cayman()) {
+   if (!last_cf) {
+   cf_node *c = sh.create_cf(CF_OP_CF_END);
+   sh.root->push_back(c);
+   } else
+   last_cf->insert_after(sh.create_cf(CF_OP_CF_END));
+   } else
last_cf->bc.end_of_program = 1;
for (unsigned t = EXP_PIXEL; t < EXP_TYPE_COUNT; ++t) {


Reviewed-by: Glenn Kennard 
___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/mesa-dev


Re: [Mesa-dev] [PATCH] r600g: geom shaders: always load texture src regs from inputs

2014-11-18 Thread Glenn Kennard

On Tue, 18 Nov 2014 05:09:05 +0100, Dave Airlie  wrote:


From: Dave Airlie 

Otherwise we seem to lose the split_gs_inputs and try and
pull from an uninitialised register.

fixes 9 texelFetch geom shader tests.

Signed-off-by: Dave Airlie 
---
 src/gallium/drivers/r600/r600_shader.c | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

diff --git a/src/gallium/drivers/r600/r600_shader.c  
b/src/gallium/drivers/r600/r600_shader.c

index 709fcd7..ab2a838 100644
--- a/src/gallium/drivers/r600/r600_shader.c
+++ b/src/gallium/drivers/r600/r600_shader.c
@@ -4919,7 +4919,8 @@ static inline boolean  
tgsi_tex_src_requires_loading(struct r600_shader_ctx *ctx,

return  (inst->Src[index].Register.File != TGSI_FILE_TEMPORARY &&
inst->Src[index].Register.File != TGSI_FILE_INPUT &&
inst->Src[index].Register.File != TGSI_FILE_OUTPUT) ||
-   ctx->src[index].neg || ctx->src[index].abs;
+   ctx->src[index].neg || ctx->src[index].abs ||
+		(inst->Src[index].Register.File == TGSI_FILE_INPUT && ctx->type ==  
TGSI_PROCESSOR_GEOMETRY);

 }
static inline unsigned tgsi_tex_get_src_gpr(struct r600_shader_ctx *ctx,


Confirmed fixes the same set of tests on a Turks.

Reviewed-by: Glenn Kennard 
___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/mesa-dev


Re: [Mesa-dev] [PATCH] r600g: limit texture offset application to specific types (v2)

2014-11-18 Thread Glenn Kennard

On Tue, 18 Nov 2014 07:59:23 +0100, Dave Airlie  wrote:


From: Dave Airlie 

For 1D and 2D arrays we don't want the other coordinates being
offset and affecting where we sample. I wrote this patch 6 months
ago but lost it.

Fixes:
./bin/tex-miplevel-selection textureLodOffset 1DArray
./bin/tex-miplevel-selection textureLodOffset 2DArray
./bin/tex-miplevel-selection textureOffset 1DArray
./bin/tex-miplevel-selection textureOffset 1DArrayShadow
./bin/tex-miplevel-selection textureOffset 2DArray
./bin/tex-miplevel-selection textureOffset(bias) 1DArray
./bin/tex-miplevel-selection textureOffset(bias) 2DArray

v2: rewrite to handle more cases and be consistent with code
above.

Signed-off-by: Dave Airlie 
---
 src/gallium/drivers/r600/r600_shader.c | 21 ++---
 1 file changed, 18 insertions(+), 3 deletions(-)

diff --git a/src/gallium/drivers/r600/r600_shader.c  
b/src/gallium/drivers/r600/r600_shader.c

index ab2a838..76daf2c 100644
--- a/src/gallium/drivers/r600/r600_shader.c
+++ b/src/gallium/drivers/r600/r600_shader.c
@@ -5535,9 +5535,24 @@ static int tgsi_tex(struct r600_shader_ctx *ctx)
/* texture offsets do not apply to other 
texture targets */
}
} else {
-			offset_x = ctx->literals[4 * inst->TexOffsets[0].Index +  
inst->TexOffsets[0].SwizzleX] << 1;
-			offset_y = ctx->literals[4 * inst->TexOffsets[0].Index +  
inst->TexOffsets[0].SwizzleY] << 1;
-			offset_z = ctx->literals[4 * inst->TexOffsets[0].Index +  
inst->TexOffsets[0].SwizzleZ] << 1;

+   switch (inst->Texture.Texture) {
+   case TGSI_TEXTURE_3D:
+offset_z = ctx->literals[4 * inst->TexOffsets[0].Index +  
inst->TexOffsets[0].SwizzleZ] << 1;

+   /* fallthrough */
+   case TGSI_TEXTURE_2D:
+   case TGSI_TEXTURE_SHADOW2D:
+   case TGSI_TEXTURE_RECT:
+   case TGSI_TEXTURE_SHADOWRECT:
+   case TGSI_TEXTURE_2D_ARRAY:
+   case TGSI_TEXTURE_SHADOW2D_ARRAY:
+offset_y = ctx->literals[4 * inst->TexOffsets[0].Index +  
inst->TexOffsets[0].SwizzleY] << 1;

+   /* fallthrough */
+   case TGSI_TEXTURE_1D:
+   case TGSI_TEXTURE_SHADOW1D:
+   case TGSI_TEXTURE_1D_ARRAY:
+   case TGSI_TEXTURE_SHADOW1D_ARRAY:
+offset_x = ctx->literals[4 * inst->TexOffsets[0].Index +  
inst->TexOffsets[0].SwizzleX] << 1;

+   }
        }
}



Confirmed fixes the same set of tests on a Turks.

Reviewed-by: Glenn Kennard 
___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/mesa-dev


Re: [Mesa-dev] [PATCH] r600: fix texture gradients instruction emission (v2)

2014-11-23 Thread Glenn Kennard
        if (inst->Texture.Texture != TGSI_TEXTURE_RECT) {


Reviewed-by: Glenn Kennard 
___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/mesa-dev


Re: [Mesa-dev] [PATCH] r600g: do all CUBE ALU operations before gradient texture operations (v2)

2014-11-23 Thread Glenn Kennard
sampler_index_mode = inst->Src[sampler_src_reg].Indirect.Index == 2 ?  
2 : 0; // CF_INDEX_1 : CF_INDEX_NONE

-   if (sampler_index_mode)
-   ctx->shader->uses_index_registers = true;
if ((inst->Texture.Texture == TGSI_TEXTURE_CUBE ||
 inst->Texture.Texture == TGSI_TEXTURE_CUBE_ARRAY ||
@@ -5454,6 +5399,69 @@ static int tgsi_tex(struct r600_shader_ctx *ctx)
src_gpr = ctx->temp_reg;
}
+   if (inst->Instruction.Opcode == TGSI_OPCODE_TXD) {
+   int temp_h, temp_v;
+   int start_val = 0;
+
+   /* if we've already loaded the src (i.e. CUBE don't reload it). 
*/
+   if (src_loaded == TRUE)
+   start_val = 1;
+   else
+   src_loaded = TRUE;
+   for (i = start_val; i < 3; i++) {
+   int treg = r600_get_temp(ctx);
+
+   if (i == 0)
+   src_gpr = treg;
+   else if (i == 1)
+   temp_h = treg;
+   else
+   temp_v = treg;
+
+   for (j = 0; j < 4; j++) {
+   memset(&alu, 0, sizeof(struct 
r600_bytecode_alu));
+   alu.op = ALU_OP1_MOV;
+r600_bytecode_src(&alu.src[0],  
&ctx->src[i], j);

+alu.dst.sel = treg;
+alu.dst.chan = j;
+if (j == 3)
+   alu.last = 1;
+alu.dst.write = 1;
+r = r600_bytecode_add_alu(ctx->bc,  
&alu);

+if (r)
+return r;
+   }
+   }
+   for (i = 1; i < 3; i++) {
+   /* set gradients h/v */
+   memset(&tex, 0, sizeof(struct r600_bytecode_tex));
+   tex.op = (i == 1) ? FETCH_OP_SET_GRADIENTS_H :
+   FETCH_OP_SET_GRADIENTS_V;
+   tex.sampler_id = tgsi_tex_get_src_gpr(ctx, 
sampler_src_reg);
+   tex.sampler_index_mode = sampler_index_mode;
+   tex.resource_id = tex.sampler_id + 
R600_MAX_CONST_BUFFERS;
+   tex.resource_index_mode = sampler_index_mode;
+
+   tex.src_gpr = (i == 1) ? temp_h : temp_v;
+   tex.src_sel_x = 0;
+   tex.src_sel_y = 1;
+   tex.src_sel_z = 2;
+   tex.src_sel_w = 3;
+
+			tex.dst_gpr = r600_get_temp(ctx); /* just to avoid confusing the asm  
scheduler */

+   tex.dst_sel_x = tex.dst_sel_y = tex.dst_sel_z = 
tex.dst_sel_w = 7;
+   if (inst->Texture.Texture != TGSI_TEXTURE_RECT) {
+   tex.coord_type_x = 1;
+   tex.coord_type_y = 1;
+   tex.coord_type_z = 1;
+   tex.coord_type_w = 1;
+   }
+   r = r600_bytecode_add_tex(ctx->bc, &tex);
+   if (r)
+   return r;
+   }
+   }
+
if (src_requires_loading && !src_loaded) {
for (i = 0; i < 4; i++) {
memset(&alu, 0, sizeof(struct r600_bytecode_alu));



ARB_shader_texture_lod piglits go from 76/90 to 88/90, and fixes a number  
of tex-miplevel-selection tests.


Some remaining Cube/1DArrayShadow failures.

Worthwhile improvement as is, so
Reviewed-by: Glenn Kennard 
___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/mesa-dev


Re: [Mesa-dev] [PATCH] r600g: merge the TXQ and BUFFER constant buffers

2014-11-26 Thread Glenn Kennard
 if (samplers->views.enabled_mask & (1 << i))
-			samplers->txq_constants[i] =  
samplers->views.views[i]->base.texture->array_size / 6;

-
-   cb.buffer = NULL;
-   cb.user_buffer = samplers->txq_constants;
-   cb.buffer_offset = 0;
-   cb.buffer_size = array_size;
-	rctx->b.b.set_constant_buffer(&rctx->b.b, shader_type,  
R600_TXQ_CONST_BUFFER, &cb);

-   pipe_resource_reference(&cb.buffer, NULL);
-}
-
 /* set sample xy locations as array of fragment shader constants */
 void r600_set_sample_locations_constant_buffer(struct r600_context  
*rctx)

 {
@@ -1175,7 +1151,7 @@ static bool r600_update_derived_state(struct  
r600_context *rctx)

struct pipe_context * ctx = (struct pipe_context*)rctx;
bool ps_dirty = false, vs_dirty = false, gs_dirty = false;
bool blend_disable;
-
+   bool need_buf_const;
if (!rctx->blitter->running) {
unsigned i;
@@ -1296,29 +1272,35 @@ static bool r600_update_derived_state(struct  
r600_context *rctx)

/* on R600 we stuff masks + txq info into one constant buffer */
/* on evergreen we only need a txq info one */
-   if (rctx->b.chip_class < EVERGREEN) {
-		if (rctx->ps_shader &&  
rctx->ps_shader->current->shader.uses_tex_buffers)

-   r600_setup_buffer_constants(rctx, PIPE_SHADER_FRAGMENT);
-		if (rctx->vs_shader &&  
rctx->vs_shader->current->shader.uses_tex_buffers)

-   r600_setup_buffer_constants(rctx, PIPE_SHADER_VERTEX);
-		if (rctx->gs_shader &&  
rctx->gs_shader->current->shader.uses_tex_buffers)

-   r600_setup_buffer_constants(rctx, PIPE_SHADER_GEOMETRY);
-   } else {
-		if (rctx->ps_shader &&  
rctx->ps_shader->current->shader.uses_tex_buffers)

-   eg_setup_buffer_constants(rctx, PIPE_SHADER_FRAGMENT);
-		if (rctx->vs_shader &&  
rctx->vs_shader->current->shader.uses_tex_buffers)

-   eg_setup_buffer_constants(rctx, PIPE_SHADER_VERTEX);
-		if (rctx->gs_shader &&  
rctx->gs_shader->current->shader.uses_tex_buffers)

-   eg_setup_buffer_constants(rctx, PIPE_SHADER_GEOMETRY);
+   if (rctx->ps_shader) {
+		need_buf_const = rctx->ps_shader->current->shader.uses_tex_buffers ||  
rctx->ps_shader->current->shader.has_txq_cube_array_z_comp;

+   if (need_buf_const) {
+   if (rctx->b.chip_class < EVERGREEN)
+   r600_setup_buffer_constants(rctx, 
PIPE_SHADER_FRAGMENT);
+   else
+   eg_setup_buffer_constants(rctx, 
PIPE_SHADER_FRAGMENT);
+   }
}
+   if (rctx->vs_shader) {
+		need_buf_const = rctx->vs_shader->current->shader.uses_tex_buffers ||  
rctx->vs_shader->current->shader.has_txq_cube_array_z_comp;

+   if (need_buf_const) {
+   if (rctx->b.chip_class < EVERGREEN)
+   r600_setup_buffer_constants(rctx, 
PIPE_SHADER_VERTEX);
+   else
+   eg_setup_buffer_constants(rctx, 
PIPE_SHADER_VERTEX);
+   }
+   }
-	if (rctx->ps_shader &&  
rctx->ps_shader->current->shader.has_txq_cube_array_z_comp)

-   r600_setup_txq_cube_array_constants(rctx, PIPE_SHADER_FRAGMENT);
-	if (rctx->vs_shader &&  
rctx->vs_shader->current->shader.has_txq_cube_array_z_comp)

-   r600_setup_txq_cube_array_constants(rctx, PIPE_SHADER_VERTEX);
-	if (rctx->gs_shader &&  
rctx->gs_shader->current->shader.has_txq_cube_array_z_comp)

-   r600_setup_txq_cube_array_constants(rctx, PIPE_SHADER_GEOMETRY);
+   if (rctx->gs_shader) {
+		need_buf_const = rctx->gs_shader->current->shader.uses_tex_buffers ||  
rctx->gs_shader->current->shader.has_txq_cube_array_z_comp;

+   if (need_buf_const) {
+   if (rctx->b.chip_class < EVERGREEN)
+   r600_setup_buffer_constants(rctx, 
PIPE_SHADER_GEOMETRY);
+   else
+   eg_setup_buffer_constants(rctx, 
PIPE_SHADER_GEOMETRY);
+   }
+   }
	if (rctx->b.chip_class < EVERGREEN && rctx->ps_shader &&  
rctx->vs_shader) {

if (!r600_adjust_gprs(rctx)) {



Passes piglits on a Turks with no obvious regressions, so with nits above  
fixed, consider it

Reviewed-by: Glenn Kennard 
___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/mesa-dev


Re: [Mesa-dev] [PATCH] r600g/sb: fix issues cause by GLSL switching to loops for switch

2014-11-30 Thread Glenn Kennard

On Fri, 28 Nov 2014 04:36:42 +0100, Dave Airlie  wrote:


From: Dave Airlie 

Since 73dd50acf6d244979c2a657906aa56d3ac60d550
glsl: implement switch flow control using a loop

The SB backend was falling over in an assert or crashing.

Tracked this down to the loops having no repeats, but requiring
a working break, initial code just called the loop handler for
all non-if statements, but this caused a regression in
tests/shaders/dead-code-break-interaction.shader_test.
So I had to add further code to detect if all the departure
nodes are empty and avoid generating an empty loop for that case.

Bugzilla: https://bugs.freedesktop.org/show_bug.cgi?id=86089
Signed-off-by: Dave Airlie 
---
 src/gallium/drivers/r600/sb/sb_bc_finalize.cpp | 51  
++

 1 file changed, 36 insertions(+), 15 deletions(-)

diff --git a/src/gallium/drivers/r600/sb/sb_bc_finalize.cpp  
b/src/gallium/drivers/r600/sb/sb_bc_finalize.cpp

index f0849ca..d91ffa5 100644
--- a/src/gallium/drivers/r600/sb/sb_bc_finalize.cpp
+++ b/src/gallium/drivers/r600/sb/sb_bc_finalize.cpp
@@ -46,15 +46,22 @@ int bc_finalizer::run() {
 	for (regions_vec::reverse_iterator I = rv.rbegin(), E = rv.rend(); I  
!= E;

++I) {
region_node *r = *I;
-
+   bool is_if = false;
assert(r);
-   bool loop = r->is_loop();
+   assert(r->first);
+   if (r->first->is_container()) {
+   container_node *repdep1 = 
static_cast(r->first);
+   assert(repdep1->is_depart() || repdep1->is_repeat());
+   if_node *n_if = static_cast(repdep1->first);
+   if (n_if && n_if->is_if())
+   is_if = true;
+   }
-   if (loop)
-   finalize_loop(r);
-   else
+   if (is_if)
finalize_if(r);
+   else
+   finalize_loop(r);
r->expand();
}
@@ -112,16 +119,31 @@ void bc_finalizer::finalize_loop(region_node* r) {
cf_node *loop_start = sh.create_cf(CF_OP_LOOP_START_DX10);
cf_node *loop_end = sh.create_cf(CF_OP_LOOP_END);
+   bool has_instr = false;
+
+   if (!r->is_loop()) {
+		for (depart_vec::iterator I = r->departs.begin(), E =  
r->departs.end();

+I != E; ++I) {
+   depart_node *dep = *I;
+   if (!dep->empty())
+   has_instr = true;


could break here


+   }
+   } else
+   has_instr = true;
-   loop_start->jump_after(loop_end);
-   loop_end->jump_after(loop_start);
+   if (has_instr) {
+   loop_start->jump_after(loop_end);
+   loop_end->jump_after(loop_start);
+   }
for (depart_vec::iterator I = r->departs.begin(), E = r->departs.end();
I != E; ++I) {
depart_node *dep = *I;
-   cf_node *loop_break = sh.create_cf(CF_OP_LOOP_BREAK);
-   loop_break->jump(loop_end);
-   dep->push_back(loop_break);
+   if (has_instr) {
+   cf_node *loop_break = sh.create_cf(CF_OP_LOOP_BREAK);
+   loop_break->jump(loop_end);
+   dep->push_back(loop_break);
+   }
dep->expand();
}
@@ -137,8 +159,10 @@ void bc_finalizer::finalize_loop(region_node* r) {
rep->expand();
}
-   r->push_front(loop_start);
-   r->push_back(loop_end);
+   if (has_instr) {
+   r->push_front(loop_start);
+   r->push_back(loop_end);
+   }
 }
void bc_finalizer::finalize_if(region_node* r) {
@@ -168,9 +192,6 @@ void bc_finalizer::finalize_if(region_node* r) {
if (n_if) {
-
-   assert(n_if->is_if());


shouldn't need to remove this assertion


-
container_node *repdep2 = 
static_cast(n_if->first);
assert(repdep2->is_depart() || repdep2->is_repeat());



I think i've managed to convince myself the above logic is correct, so
Reviewed-By: Glenn Kennard 
___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/mesa-dev


Re: [Mesa-dev] [PATCH] r600g: fix regression since UCMP change

2014-12-08 Thread Glenn Kennard

On Tue, 09 Dec 2014 02:31:01 +0100, Dave Airlie  wrote:


From: Dave Airlie 
Since d8da6deceadf5e48201d848b7061dad17a5b7cac where the
state tracker started using UCMP on cayman a number of tests
regressed.
this seems to be r600g is doing CNDGE_INT for UCMP which is >= 0,
we should be doing CNDE_INT with reverse arguments.
Signed-off-by: Dave Airlie 
---
 src/gallium/drivers/r600/r600_shader.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)
diff --git a/src/gallium/drivers/r600/r600_shader.c  
b/src/gallium/drivers/r600/r600_shader.c

index 0b988df..28137e1 100644
--- a/src/gallium/drivers/r600/r600_shader.c
+++ b/src/gallium/drivers/r600/r600_shader.c
@@ -6082,7 +6082,7 @@ static int tgsi_ucmp(struct r600_shader_ctx *ctx)
continue;
memset(&alu, 0, sizeof(struct r600_bytecode_alu));
-   alu.op = ALU_OP3_CNDGE_INT;
+   alu.op = ALU_OP3_CNDE_INT;
r600_bytecode_src(&alu.src[0], &ctx->src[0], i);
r600_bytecode_src(&alu.src[1], &ctx->src[2], i);
r600_bytecode_src(&alu.src[2], &ctx->src[1], i);


Reviewed-by: Glenn Kennard 
___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/mesa-dev


Re: [Mesa-dev] [PATCH] r600g/sb: implement r600 gpr index workaround. (v3)

2014-12-09 Thread Glenn Kennard
   alu_node *pn = 
static_cast(*pI);
+   if (pn->bc.dst_gpr == src.sel) {
+   add_nop = true;
+   break;
+   }
+   }
+   }
} else
src.rel = 0;
@@ -393,11 +426,23 @@ void  
bc_finalizer::finalize_alu_src(alu_group_node* g, alu_node* a) {

assert(!"unknown value kind");
break;
}
+   if (prev && !add_nop) {
+			for (node_iterator pI = prev->begin(), pE = prev->end(); pI != pE;  
++pI) {

+   alu_node *pn = static_cast(*pI);
+   if (pn->bc.dst_rel) {
+   if (pn->bc.dst_gpr == src.sel) {
+   add_nop = true;
+   break;
+   }
+   }
+   }
+   }
}
while (si < 3) {
a->bc.src[si++].sel = 0;
}
+   return add_nop;
 }
void bc_finalizer::copy_fetch_src(fetch_node &dst, fetch_node &src,  
unsigned arg_start)
diff --git a/src/gallium/drivers/r600/sb/sb_context.cpp  
b/src/gallium/drivers/r600/sb/sb_context.cpp

index 8e11428..5dba85b 100644
--- a/src/gallium/drivers/r600/sb/sb_context.cpp
+++ b/src/gallium/drivers/r600/sb/sb_context.cpp
@@ -61,6 +61,8 @@ int sb_context::init(r600_isa *isa, sb_hw_chip chip,  
sb_hw_class cclass) {

uses_mova_gpr = is_r600() && chip != HW_CHIP_RV670;
+	r6xx_gpr_index_workaround = is_r600() && chip != HW_CHIP_RV670 && chip  
!= HW_CHIP_RS780 && chip != HW_CHIP_RS880;

+
switch (chip) {
case HW_CHIP_RV610:
case HW_CHIP_RS780:
diff --git a/src/gallium/drivers/r600/sb/sb_pass.h  
b/src/gallium/drivers/r600/sb/sb_pass.h

index 812d14a..0346df1 100644
--- a/src/gallium/drivers/r600/sb/sb_pass.h
+++ b/src/gallium/drivers/r600/sb/sb_pass.h
@@ -695,8 +695,9 @@ public:
void run_on(container_node *c);
-   void finalize_alu_group(alu_group_node *g);
-   void finalize_alu_src(alu_group_node *g, alu_node *a);
+   void insert_rv6xx_load_ar_workaround(alu_group_node *b4);
+   void finalize_alu_group(alu_group_node *g, node *prev_node);
+	bool finalize_alu_src(alu_group_node *g, alu_node *a, alu_group_node  
*prev_node);

void emit_set_grad(fetch_node* f);
void finalize_fetch(fetch_node *f);


Reviewed-By: Glenn Kennard 
___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/mesa-dev


Re: [Mesa-dev] [PATCH] r600g: only init GS_VERT_ITEMSIZE on r600

2014-12-09 Thread Glenn Kennard

On Wed, 10 Dec 2014 04:55:21 +0100, Dave Airlie  wrote:


From: Dave Airlie 

On evergreen there are 4 regs, on r600/700 there is only one.

Don't initialise regs and trash someone elses state.

Not sure this fixes anything, but hey one less stupid.

Signed-off-by: Dave Airlie 
---
 src/gallium/drivers/r600/r600_state.c | 7 ++-
 1 file changed, 2 insertions(+), 5 deletions(-)

diff --git a/src/gallium/drivers/r600/r600_state.c  
b/src/gallium/drivers/r600/r600_state.c

index 61f5c5a..9a4b972 100644
--- a/src/gallium/drivers/r600/r600_state.c
+++ b/src/gallium/drivers/r600/r600_state.c
@@ -2659,11 +2659,8 @@ void r600_update_gs_state(struct pipe_context  
*ctx, struct r600_pipe_shader *sha

r600_store_context_reg(cb, R_028A6C_VGT_GS_OUT_PRIM_TYPE,
   
r600_conv_prim_to_gs_out(rshader->gs_output_prim));
-   r600_store_context_reg_seq(cb, R_0288C8_SQ_GS_VERT_ITEMSIZE, 4);
-   r600_store_value(cb, cp_shader->ring_item_size >> 2);
-   r600_store_value(cb, 0);
-   r600_store_value(cb, 0);
-   r600_store_value(cb, 0);
+   r600_store_context_reg(cb, R_0288C8_SQ_GS_VERT_ITEMSIZE,
+  cp_shader->ring_item_size >> 2);
r600_store_context_reg(cb, R_0288A8_SQ_ESGS_RING_ITEMSIZE,
   (rshader->ring_item_size) >> 2);


Reviewed-By: Glenn Kennard 
___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/mesa-dev


[Mesa-dev] [PATCH] r600g: Implement GL_ARB_texture_gather

2014-07-14 Thread Glenn Kennard
Only supported on evergreen and later. Limited to
single component textures as the hardware GATHER4
instruction ignores texture swizzles.

Piglit quick run passes on radeon 6670 with all
applicable textureGather tests, no regressions.

Signed-off-by: Glenn Kennard 
---
 docs/GL3.txt   |  2 +-
 docs/relnotes/10.3.html|  2 +-
 src/gallium/drivers/r600/r600_pipe.c   |  5 ++--
 src/gallium/drivers/r600/r600_shader.c | 47 +-
 4 files changed, 46 insertions(+), 10 deletions(-)

diff --git a/docs/GL3.txt b/docs/GL3.txt
index a2f438b..20e57b0 100644
--- a/docs/GL3.txt
+++ b/docs/GL3.txt
@@ -118,7 +118,7 @@ GL 4.0:
   GL_ARB_tessellation_shader   started (Fabian)
   GL_ARB_texture_buffer_object_rgb32   DONE (i965, nvc0, r600, 
radeonsi, softpipe)
   GL_ARB_texture_cube_map_arrayDONE (i965, nv50, nvc0, 
r600, radeonsi, softpipe)
-  GL_ARB_texture_gatherDONE (i965, nv50, nvc0, 
radeonsi)
+  GL_ARB_texture_gatherDONE (i965, nv50, nvc0, 
radeonsi, r600)
   GL_ARB_texture_query_lod DONE (i965, nv50, nvc0, 
radeonsi)
   GL_ARB_transform_feedback2   DONE (i965, nv50, nvc0, 
r600, radeonsi)
   GL_ARB_transform_feedback3   DONE (i965, nv50, nvc0, 
r600, radeonsi)
diff --git a/docs/relnotes/10.3.html b/docs/relnotes/10.3.html
index 2e718fc..1c0fab6 100644
--- a/docs/relnotes/10.3.html
+++ b/docs/relnotes/10.3.html
@@ -49,7 +49,7 @@ Note: some of the new features are only available with 
certain drivers.
 GL_ARB_sample_shading on radeonsi
 GL_ARB_stencil_texturing on nv50, nvc0, r600, and radeonsi
 GL_ARB_texture_cube_map_array on radeonsi
-GL_ARB_texture_gather on radeonsi
+GL_ARB_texture_gather on radeonsi, r600
 GL_ARB_texture_query_levels on nv50, nvc0, llvmpipe, r600, radeonsi, 
softpipe
 GL_ARB_texture_query_lod on radeonsi
 GL_ARB_viewport_array on nvc0
diff --git a/src/gallium/drivers/r600/r600_pipe.c 
b/src/gallium/drivers/r600/r600_pipe.c
index ca6399f..d967f0f 100644
--- a/src/gallium/drivers/r600/r600_pipe.c
+++ b/src/gallium/drivers/r600/r600_pipe.c
@@ -303,6 +303,9 @@ static int r600_get_param(struct pipe_screen* pscreen, enum 
pipe_cap param)
case PIPE_CAP_SEAMLESS_CUBE_MAP_PER_TEXTURE:
case PIPE_CAP_CUBE_MAP_ARRAY:
case PIPE_CAP_TGSI_VS_LAYER_VIEWPORT:
+   case PIPE_CAP_TEXTURE_GATHER_SM5:
+   return family >= CHIP_CEDAR ? 1 : 0;
+   case PIPE_CAP_MAX_TEXTURE_GATHER_COMPONENTS:
return family >= CHIP_CEDAR ? 1 : 0;
 
/* Unsupported features. */
@@ -312,8 +315,6 @@ static int r600_get_param(struct pipe_screen* pscreen, enum 
pipe_cap param)
case PIPE_CAP_FRAGMENT_COLOR_CLAMPED:
case PIPE_CAP_VERTEX_COLOR_CLAMPED:
case PIPE_CAP_USER_VERTEX_BUFFERS:
-   case PIPE_CAP_MAX_TEXTURE_GATHER_COMPONENTS:
-   case PIPE_CAP_TEXTURE_GATHER_SM5:
case PIPE_CAP_TEXTURE_QUERY_LOD:
case PIPE_CAP_SAMPLE_SHADING:
case PIPE_CAP_TEXTURE_GATHER_OFFSETS:
diff --git a/src/gallium/drivers/r600/r600_shader.c 
b/src/gallium/drivers/r600/r600_shader.c
index 6952e3c..db928f3 100644
--- a/src/gallium/drivers/r600/r600_shader.c
+++ b/src/gallium/drivers/r600/r600_shader.c
@@ -4477,7 +4477,8 @@ static int tgsi_tex(struct r600_shader_ctx *ctx)
 
if (inst->Instruction.Opcode == TGSI_OPCODE_TEX2 ||
inst->Instruction.Opcode == TGSI_OPCODE_TXB2 ||
-   inst->Instruction.Opcode == TGSI_OPCODE_TXL2)
+   inst->Instruction.Opcode == TGSI_OPCODE_TXL2 ||
+   inst->Instruction.Opcode == TGSI_OPCODE_TG4)
sampler_src_reg = 2;
 
src_gpr = tgsi_tex_get_src_gpr(ctx, 0);
@@ -5079,6 +5080,13 @@ static int tgsi_tex(struct r600_shader_ctx *ctx)
case FETCH_OP_SAMPLE_G:
opcode = FETCH_OP_SAMPLE_C_G;
break;
+   /* Texture gather variants */
+   case FETCH_OP_GATHER4:
+   tex.op = FETCH_OP_GATHER4_C;
+   break;
+   case FETCH_OP_GATHER4_O:
+   tex.op = FETCH_OP_GATHER4_C_O;
+   break;
}
}
 
@@ -5089,9 +5097,21 @@ static int tgsi_tex(struct r600_shader_ctx *ctx)
tex.resource_id = tex.sampler_id + R600_MAX_CONST_BUFFERS;
tex.src_gpr = src_gpr;
tex.dst_gpr = ctx->file_offset[inst->Dst[0].Register.File] + 
inst->Dst[0].Register.Index;
-   tex.dst_sel_x = (inst->Dst[0].Register.WriteMask & 1) ? 0 : 7;
-   tex.dst_sel_y = (inst->Dst[0].Register.WriteMask & 2) ? 1 : 7;
-   tex.dst_sel_z = (inst->Dst[0].Register.WriteMask & 4) ? 2 : 7;
+
+   if (inst->Instruction.Opcode == TGSI_OPCODE_TG4) {
+   int8_t tex

Re: [Mesa-dev] [PATCH] r600g: Implement GL_ARB_texture_gather

2014-07-14 Thread Glenn Kennard
On Mon, 14 Jul 2014 20:33:08 +0200, Ilia Mirkin   
wrote:


On Mon, Jul 14, 2014 at 2:20 PM, Glenn Kennard   
wrote:
diff --git a/src/gallium/drivers/r600/r600_pipe.c  
b/src/gallium/drivers/r600/r600_pipe.c

index ca6399f..d967f0f 100644
--- a/src/gallium/drivers/r600/r600_pipe.c
+++ b/src/gallium/drivers/r600/r600_pipe.c
@@ -303,6 +303,9 @@ static int r600_get_param(struct pipe_screen*  
pscreen, enum pipe_cap param)

case PIPE_CAP_SEAMLESS_CUBE_MAP_PER_TEXTURE:
case PIPE_CAP_CUBE_MAP_ARRAY:
case PIPE_CAP_TGSI_VS_LAYER_VIEWPORT:
+   case PIPE_CAP_TEXTURE_GATHER_SM5:
+   return family >= CHIP_CEDAR ? 1 : 0;


No clue what the hardware supports, and this CAP is actually not used
by the state tracker. However I believe it is meant to imply that you
can support the ARB_gs5 texture gather, including shadow comparisons
and non-constant offsets. This is not yet turned on by core mesa, but
do the textureGather tests still pass if you add
MESA_EXTENSION_OVERRIDE=GL_ARB_gpu_shader5? (You can do a piglit-run
with -t gather.) I'm not 100% sure, but I _think_ it also implies that
you can deal with selecting any component from a 4-component
texture... the ARB_gs5 spec says:

Since this extension requires support for gathering from  
multi-component
textures, the minimum value of  
MAX_PROGRAM_TEXTURE_GATHER_COMPONENTS_ARB

is increased to 4.

My understanding is that the hardware is meant to be able to support
GL4 one way or aonther, but sounds like more work needs to be done in
order to claim the SM5 compatibility cap...


Good catch, i missed that. It should return 0 for  
PIPE_CAP_TEXTURE_GATHER_SM5, and 1 for number
of components, which lets it expose the ARB_texture_gather extension for  
now, leaving SM5 support for
a later patch - the hardware apparently doesn't support texture sampler  
swizzling natively for the GATHER4
instruction so some logic is required to compile shader variants whenever  
the swizzle changes.




Also, have you looked at Dave Airlie's impl? Not sure what's been
going on there...
http://cgit.freedesktop.org/~airlied/mesa/log/?h=r600g-texture-gather
. He seemed to enable 4 components for >= CEDAR.



No, didn't pop up when i searched the archives for any prior work on this.  
Interesting! It looks like the GL_ARB_texture_gather portion in that  
branch is almost exactly equivalent to my patch.


It also has some of the additional parts needed for GL_ARB_gpu_shader5  
such as non-constant offsets, but i don't see where it would trigger  
shader recompiles if texture sampler swizzle changes? Does it pass piglit?



David, any opinions on how to move forward with this feature?
___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/mesa-dev


[Mesa-dev] [PATCH] r600g: Implement GL_ARB_texture_gather

2014-07-16 Thread Glenn Kennard
Only supported on evergreen and later. Currently limited
to single component textures as the hardware GATHER4
instruction ignores texture swizzles.

Piglit quick run passes on radeon 6670 with all
applicable textureGather tests, no regressions.

Signed-off-by: Glenn Kennard 
---
Changes from v1:
 Removed PIPE_CAP_TEXTURE_GATHER_SM5 cap
 
This patch should be equivalent to the ARB_texture_gather only
portions of David Airlie's work in progress gather implementation
http://cgit.freedesktop.org/~airlied/mesa/log/?h=r600g-texture-gather

Further work is needed to enable the GL_ARB_gpu_shader5 enhancements
to texture gather, in particular keying sampler swizzle state to
shader variants with the appropriate component selects.

 docs/GL3.txt   |  2 +-
 docs/relnotes/10.3.html|  2 +-
 src/gallium/drivers/r600/r600_pipe.c   |  3 ++-
 src/gallium/drivers/r600/r600_shader.c | 47 +-
 4 files changed, 45 insertions(+), 9 deletions(-)

diff --git a/docs/GL3.txt b/docs/GL3.txt
index a2f438b..20e57b0 100644
--- a/docs/GL3.txt
+++ b/docs/GL3.txt
@@ -118,7 +118,7 @@ GL 4.0:
   GL_ARB_tessellation_shader   started (Fabian)
   GL_ARB_texture_buffer_object_rgb32   DONE (i965, nvc0, r600, 
radeonsi, softpipe)
   GL_ARB_texture_cube_map_arrayDONE (i965, nv50, nvc0, 
r600, radeonsi, softpipe)
-  GL_ARB_texture_gatherDONE (i965, nv50, nvc0, 
radeonsi)
+  GL_ARB_texture_gatherDONE (i965, nv50, nvc0, 
radeonsi, r600)
   GL_ARB_texture_query_lod DONE (i965, nv50, nvc0, 
radeonsi)
   GL_ARB_transform_feedback2   DONE (i965, nv50, nvc0, 
r600, radeonsi)
   GL_ARB_transform_feedback3   DONE (i965, nv50, nvc0, 
r600, radeonsi)
diff --git a/docs/relnotes/10.3.html b/docs/relnotes/10.3.html
index 2e718fc..1c0fab6 100644
--- a/docs/relnotes/10.3.html
+++ b/docs/relnotes/10.3.html
@@ -49,7 +49,7 @@ Note: some of the new features are only available with 
certain drivers.
 GL_ARB_sample_shading on radeonsi
 GL_ARB_stencil_texturing on nv50, nvc0, r600, and radeonsi
 GL_ARB_texture_cube_map_array on radeonsi
-GL_ARB_texture_gather on radeonsi
+GL_ARB_texture_gather on radeonsi, r600
 GL_ARB_texture_query_levels on nv50, nvc0, llvmpipe, r600, radeonsi, 
softpipe
 GL_ARB_texture_query_lod on radeonsi
 GL_ARB_viewport_array on nvc0
diff --git a/src/gallium/drivers/r600/r600_pipe.c 
b/src/gallium/drivers/r600/r600_pipe.c
index ca6399f..a762b00 100644
--- a/src/gallium/drivers/r600/r600_pipe.c
+++ b/src/gallium/drivers/r600/r600_pipe.c
@@ -303,6 +303,8 @@ static int r600_get_param(struct pipe_screen* pscreen, enum 
pipe_cap param)
case PIPE_CAP_SEAMLESS_CUBE_MAP_PER_TEXTURE:
case PIPE_CAP_CUBE_MAP_ARRAY:
case PIPE_CAP_TGSI_VS_LAYER_VIEWPORT:
+   return 0;
+   case PIPE_CAP_MAX_TEXTURE_GATHER_COMPONENTS:
return family >= CHIP_CEDAR ? 1 : 0;
 
/* Unsupported features. */
@@ -312,7 +314,6 @@ static int r600_get_param(struct pipe_screen* pscreen, enum 
pipe_cap param)
case PIPE_CAP_FRAGMENT_COLOR_CLAMPED:
case PIPE_CAP_VERTEX_COLOR_CLAMPED:
case PIPE_CAP_USER_VERTEX_BUFFERS:
-   case PIPE_CAP_MAX_TEXTURE_GATHER_COMPONENTS:
case PIPE_CAP_TEXTURE_GATHER_SM5:
case PIPE_CAP_TEXTURE_QUERY_LOD:
case PIPE_CAP_SAMPLE_SHADING:
diff --git a/src/gallium/drivers/r600/r600_shader.c 
b/src/gallium/drivers/r600/r600_shader.c
index 6952e3c..db928f3 100644
--- a/src/gallium/drivers/r600/r600_shader.c
+++ b/src/gallium/drivers/r600/r600_shader.c
@@ -4477,7 +4477,8 @@ static int tgsi_tex(struct r600_shader_ctx *ctx)
 
if (inst->Instruction.Opcode == TGSI_OPCODE_TEX2 ||
inst->Instruction.Opcode == TGSI_OPCODE_TXB2 ||
-   inst->Instruction.Opcode == TGSI_OPCODE_TXL2)
+   inst->Instruction.Opcode == TGSI_OPCODE_TXL2 ||
+   inst->Instruction.Opcode == TGSI_OPCODE_TG4)
sampler_src_reg = 2;
 
src_gpr = tgsi_tex_get_src_gpr(ctx, 0);
@@ -5079,6 +5080,13 @@ static int tgsi_tex(struct r600_shader_ctx *ctx)
case FETCH_OP_SAMPLE_G:
opcode = FETCH_OP_SAMPLE_C_G;
break;
+   /* Texture gather variants */
+   case FETCH_OP_GATHER4:
+   tex.op = FETCH_OP_GATHER4_C;
+   break;
+   case FETCH_OP_GATHER4_O:
+   tex.op = FETCH_OP_GATHER4_C_O;
+   break;
}
}
 
@@ -5089,9 +5097,21 @@ static int tgsi_tex(struct r600_shader_ctx *ctx)
tex.resource_id = tex.sampler_id + R600_MAX_CONST_BUFFERS;
tex.src_gpr = src_gpr;
tex.dst_gpr = ctx->file_offset[inst->Dst[0].Register.File] +

[Mesa-dev] [PATCHi v3] r600g: Implement GL_ARB_texture_gather

2014-07-16 Thread Glenn Kennard
Only supported on evergreen and later. Currently limited
to single component textures as the hardware GATHER4
instruction ignores texture swizzles.

Piglit quick run passes on radeon 6670 with all
applicable textureGather tests, no regressions.

Signed-off-by: Glenn Kennard 
---
Changes from v2:
 Remove accidental disabling of unrelated caps that snuck in.
 Oddly enough not caught by comparing piglit "quick" runs.
Changes from v1:
 Removed PIPE_CAP_TEXTURE_GATHER_SM5 cap

 docs/GL3.txt   |  2 +-
 docs/relnotes/10.3.html|  2 +-
 src/gallium/drivers/r600/r600_pipe.c   |  2 +-
 src/gallium/drivers/r600/r600_shader.c | 47 +-
 4 files changed, 44 insertions(+), 9 deletions(-)

diff --git a/docs/GL3.txt b/docs/GL3.txt
index a2f438b..20e57b0 100644
--- a/docs/GL3.txt
+++ b/docs/GL3.txt
@@ -118,7 +118,7 @@ GL 4.0:
   GL_ARB_tessellation_shader   started (Fabian)
   GL_ARB_texture_buffer_object_rgb32   DONE (i965, nvc0, r600, 
radeonsi, softpipe)
   GL_ARB_texture_cube_map_arrayDONE (i965, nv50, nvc0, 
r600, radeonsi, softpipe)
-  GL_ARB_texture_gatherDONE (i965, nv50, nvc0, 
radeonsi)
+  GL_ARB_texture_gatherDONE (i965, nv50, nvc0, 
radeonsi, r600)
   GL_ARB_texture_query_lod DONE (i965, nv50, nvc0, 
radeonsi)
   GL_ARB_transform_feedback2   DONE (i965, nv50, nvc0, 
r600, radeonsi)
   GL_ARB_transform_feedback3   DONE (i965, nv50, nvc0, 
r600, radeonsi)
diff --git a/docs/relnotes/10.3.html b/docs/relnotes/10.3.html
index 2e718fc..1c0fab6 100644
--- a/docs/relnotes/10.3.html
+++ b/docs/relnotes/10.3.html
@@ -49,7 +49,7 @@ Note: some of the new features are only available with 
certain drivers.
 GL_ARB_sample_shading on radeonsi
 GL_ARB_stencil_texturing on nv50, nvc0, r600, and radeonsi
 GL_ARB_texture_cube_map_array on radeonsi
-GL_ARB_texture_gather on radeonsi
+GL_ARB_texture_gather on radeonsi, r600
 GL_ARB_texture_query_levels on nv50, nvc0, llvmpipe, r600, radeonsi, 
softpipe
 GL_ARB_texture_query_lod on radeonsi
 GL_ARB_viewport_array on nvc0
diff --git a/src/gallium/drivers/r600/r600_pipe.c 
b/src/gallium/drivers/r600/r600_pipe.c
index ca6399f..5bf9c00 100644
--- a/src/gallium/drivers/r600/r600_pipe.c
+++ b/src/gallium/drivers/r600/r600_pipe.c
@@ -303,6 +303,7 @@ static int r600_get_param(struct pipe_screen* pscreen, enum 
pipe_cap param)
case PIPE_CAP_SEAMLESS_CUBE_MAP_PER_TEXTURE:
case PIPE_CAP_CUBE_MAP_ARRAY:
case PIPE_CAP_TGSI_VS_LAYER_VIEWPORT:
+   case PIPE_CAP_MAX_TEXTURE_GATHER_COMPONENTS:
return family >= CHIP_CEDAR ? 1 : 0;
 
/* Unsupported features. */
@@ -312,7 +313,6 @@ static int r600_get_param(struct pipe_screen* pscreen, enum 
pipe_cap param)
case PIPE_CAP_FRAGMENT_COLOR_CLAMPED:
case PIPE_CAP_VERTEX_COLOR_CLAMPED:
case PIPE_CAP_USER_VERTEX_BUFFERS:
-   case PIPE_CAP_MAX_TEXTURE_GATHER_COMPONENTS:
case PIPE_CAP_TEXTURE_GATHER_SM5:
case PIPE_CAP_TEXTURE_QUERY_LOD:
case PIPE_CAP_SAMPLE_SHADING:
diff --git a/src/gallium/drivers/r600/r600_shader.c 
b/src/gallium/drivers/r600/r600_shader.c
index 6952e3c..db928f3 100644
--- a/src/gallium/drivers/r600/r600_shader.c
+++ b/src/gallium/drivers/r600/r600_shader.c
@@ -4477,7 +4477,8 @@ static int tgsi_tex(struct r600_shader_ctx *ctx)
 
if (inst->Instruction.Opcode == TGSI_OPCODE_TEX2 ||
inst->Instruction.Opcode == TGSI_OPCODE_TXB2 ||
-   inst->Instruction.Opcode == TGSI_OPCODE_TXL2)
+   inst->Instruction.Opcode == TGSI_OPCODE_TXL2 ||
+   inst->Instruction.Opcode == TGSI_OPCODE_TG4)
sampler_src_reg = 2;
 
src_gpr = tgsi_tex_get_src_gpr(ctx, 0);
@@ -5079,6 +5080,13 @@ static int tgsi_tex(struct r600_shader_ctx *ctx)
case FETCH_OP_SAMPLE_G:
opcode = FETCH_OP_SAMPLE_C_G;
break;
+   /* Texture gather variants */
+   case FETCH_OP_GATHER4:
+   tex.op = FETCH_OP_GATHER4_C;
+   break;
+   case FETCH_OP_GATHER4_O:
+   tex.op = FETCH_OP_GATHER4_C_O;
+   break;
}
}
 
@@ -5089,9 +5097,21 @@ static int tgsi_tex(struct r600_shader_ctx *ctx)
tex.resource_id = tex.sampler_id + R600_MAX_CONST_BUFFERS;
tex.src_gpr = src_gpr;
tex.dst_gpr = ctx->file_offset[inst->Dst[0].Register.File] + 
inst->Dst[0].Register.Index;
-   tex.dst_sel_x = (inst->Dst[0].Register.WriteMask & 1) ? 0 : 7;
-   tex.dst_sel_y = (inst->Dst[0].Register.WriteMask & 2) ? 1 : 7;
-   tex.dst_sel_z = (inst->Dst[0].Register.WriteMask & 4) ? 2 : 7;
+
+   if (inst->In

[Mesa-dev] [PATCH] r600g: Use hardware sqrt instruction

2014-07-18 Thread Glenn Kennard
Piglit quick tests including sqrt pass, no other regressions,
tested on radeon 6670.
---
Should be slightly more precise than the invsqrt/recip/mul combination
used previously, I reckon up to about 2 bits of mantissa, and saves
two instructions per sqrt emitted.

It would be good if someone could test this on Cayman since it uses
a slightly different codepath.

 src/gallium/drivers/r600/r600_pipe.c   | 2 +-
 src/gallium/drivers/r600/r600_shader.c | 9 +++--
 2 files changed, 4 insertions(+), 7 deletions(-)

diff --git a/src/gallium/drivers/r600/r600_pipe.c 
b/src/gallium/drivers/r600/r600_pipe.c
index 5bf9c00..ee6a416 100644
--- a/src/gallium/drivers/r600/r600_pipe.c
+++ b/src/gallium/drivers/r600/r600_pipe.c
@@ -428,7 +428,7 @@ static int r600_get_shader_param(struct pipe_screen* 
pscreen, unsigned shader, e
case PIPE_SHADER_CAP_TGSI_CONT_SUPPORTED:
return 1;
case PIPE_SHADER_CAP_TGSI_SQRT_SUPPORTED:
-   return 0;
+   return 1;
case PIPE_SHADER_CAP_INDIRECT_INPUT_ADDR:
case PIPE_SHADER_CAP_INDIRECT_OUTPUT_ADDR:
case PIPE_SHADER_CAP_INDIRECT_TEMP_ADDR:
diff --git a/src/gallium/drivers/r600/r600_shader.c 
b/src/gallium/drivers/r600/r600_shader.c
index db928f3..907547d 100644
--- a/src/gallium/drivers/r600/r600_shader.c
+++ b/src/gallium/drivers/r600/r600_shader.c
@@ -6498,8 +6498,7 @@ static struct r600_shader_tgsi_instruction 
r600_shader_tgsi_instruction[] = {
{TGSI_OPCODE_SUB,   0, ALU_OP2_ADD, tgsi_op2},
{TGSI_OPCODE_LRP,   0, ALU_OP0_NOP, tgsi_lrp},
{TGSI_OPCODE_CND,   0, ALU_OP0_NOP, tgsi_unsupported},
-   /* gap */
-   {20,0, ALU_OP0_NOP, tgsi_unsupported},
+   {TGSI_OPCODE_SQRT,  0, ALU_OP1_SQRT_IEEE, 
tgsi_trans_srcx_replicate},
{TGSI_OPCODE_DP2A,  0, ALU_OP0_NOP, tgsi_unsupported},
/* gap */
{22,0, ALU_OP0_NOP, tgsi_unsupported},
@@ -6693,8 +6692,7 @@ static struct r600_shader_tgsi_instruction 
eg_shader_tgsi_instruction[] = {
{TGSI_OPCODE_SUB,   0, ALU_OP2_ADD, tgsi_op2},
{TGSI_OPCODE_LRP,   0, ALU_OP0_NOP, tgsi_lrp},
{TGSI_OPCODE_CND,   0, ALU_OP0_NOP, tgsi_unsupported},
-   /* gap */
-   {20,0, ALU_OP0_NOP, tgsi_unsupported},
+   {TGSI_OPCODE_SQRT,  0, ALU_OP1_SQRT_IEEE, 
tgsi_trans_srcx_replicate},
{TGSI_OPCODE_DP2A,  0, ALU_OP0_NOP, tgsi_unsupported},
/* gap */
{22,0, ALU_OP0_NOP, tgsi_unsupported},
@@ -6888,8 +6886,7 @@ static struct r600_shader_tgsi_instruction 
cm_shader_tgsi_instruction[] = {
{TGSI_OPCODE_SUB,   0, ALU_OP2_ADD, tgsi_op2},
{TGSI_OPCODE_LRP,   0, ALU_OP0_NOP, tgsi_lrp},
{TGSI_OPCODE_CND,   0, ALU_OP0_NOP, tgsi_unsupported},
-   /* gap */
-   {20,0, ALU_OP0_NOP, tgsi_unsupported},
+   {TGSI_OPCODE_SQRT,  0, ALU_OP1_SQRT_IEEE, cayman_emit_float_instr},
{TGSI_OPCODE_DP2A,  0, ALU_OP0_NOP, tgsi_unsupported},
/* gap */
{22,0, ALU_OP0_NOP, tgsi_unsupported},
-- 
1.8.3.2

___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/mesa-dev


[Mesa-dev] [PATCH] r600g: Implement gpu_shader5 textureGather

2014-07-20 Thread Glenn Kennard
Adds 0-3 textureGather component selection and non-constant offsets

Caveat: 0 and 1 texture swizzles only work if textureGather component
select is 3 or a component that does not exist in the sampler texture
format. This is a hardware limitation, any other value returns
128/255=0.501961 for both 0 and 1.

Passes all textureGather piglit tests on radeon 6670, except for those
using 0/1 texture swizzles due to aforementioned reason.

Signed-off-by: Glenn Kennard 
---
It is possible to generate shader variants which gets the existing
textureGather 0-1 piglit tests passing, but the resulting code is not
pretty, and it will still fail if anyone uses indirect sampler
referencing in the shader for textureGather. I don't think it is
worth the effort, if an app really wants a constant 0/1 value it has
other ways to accomplish that.

 docs/GL3.txt   |  4 ++--
 src/gallium/drivers/r600/r600_pipe.c   |  5 +++--
 src/gallium/drivers/r600/r600_shader.c | 36 +++---
 3 files changed, 38 insertions(+), 7 deletions(-)

diff --git a/docs/GL3.txt b/docs/GL3.txt
index 0f37da4..eee0988 100644
--- a/docs/GL3.txt
+++ b/docs/GL3.txt
@@ -106,7 +106,7 @@ GL 4.0:
   - Implicit signed -> unsigned conversionsDONE
   - Fused multiply-add DONE (i965, nvc0)
   - Packing/bitfield/conversion functions  DONE (i965, nvc0)
-  - Enhanced textureGather DONE (i965, nvc0, 
radeonsi)
+  - Enhanced textureGather DONE (i965, nvc0, r600, 
radeonsi)
   - Geometry shader instancing DONE (i965, nvc0)
   - Geometry shader multiple streams   DONE (i965, nvc0)
   - Enhanced per-sample shadingDONE (i965)
@@ -118,7 +118,7 @@ GL 4.0:
   GL_ARB_tessellation_shader   started (Fabian)
   GL_ARB_texture_buffer_object_rgb32   DONE (i965, nvc0, r600, 
radeonsi, softpipe)
   GL_ARB_texture_cube_map_arrayDONE (i965, nv50, nvc0, 
r600, radeonsi, softpipe)
-  GL_ARB_texture_gatherDONE (i965, nv50, nvc0, 
radeonsi, r600)
+  GL_ARB_texture_gatherDONE (i965, nv50, nvc0, 
r600, radeonsi)
   GL_ARB_texture_query_lod DONE (i965, nv50, nvc0, 
radeonsi)
   GL_ARB_transform_feedback2   DONE (i965, nv50, nvc0, 
r600, radeonsi)
   GL_ARB_transform_feedback3   DONE (i965, nv50, nvc0, 
r600, radeonsi)
diff --git a/src/gallium/drivers/r600/r600_pipe.c 
b/src/gallium/drivers/r600/r600_pipe.c
index 5bf9c00..3f07f01 100644
--- a/src/gallium/drivers/r600/r600_pipe.c
+++ b/src/gallium/drivers/r600/r600_pipe.c
@@ -303,8 +303,10 @@ static int r600_get_param(struct pipe_screen* pscreen, 
enum pipe_cap param)
case PIPE_CAP_SEAMLESS_CUBE_MAP_PER_TEXTURE:
case PIPE_CAP_CUBE_MAP_ARRAY:
case PIPE_CAP_TGSI_VS_LAYER_VIEWPORT:
-   case PIPE_CAP_MAX_TEXTURE_GATHER_COMPONENTS:
+   case PIPE_CAP_TEXTURE_GATHER_SM5:
return family >= CHIP_CEDAR ? 1 : 0;
+   case PIPE_CAP_MAX_TEXTURE_GATHER_COMPONENTS:
+   return family >= CHIP_CEDAR ? 4 : 0;
 
/* Unsupported features. */
case PIPE_CAP_TGSI_FS_COORD_ORIGIN_LOWER_LEFT:
@@ -313,7 +315,6 @@ static int r600_get_param(struct pipe_screen* pscreen, enum 
pipe_cap param)
case PIPE_CAP_FRAGMENT_COLOR_CLAMPED:
case PIPE_CAP_VERTEX_COLOR_CLAMPED:
case PIPE_CAP_USER_VERTEX_BUFFERS:
-   case PIPE_CAP_TEXTURE_GATHER_SM5:
case PIPE_CAP_TEXTURE_QUERY_LOD:
case PIPE_CAP_SAMPLE_SHADING:
case PIPE_CAP_TEXTURE_GATHER_OFFSETS:
diff --git a/src/gallium/drivers/r600/r600_shader.c 
b/src/gallium/drivers/r600/r600_shader.c
index db928f3..044d67c 100644
--- a/src/gallium/drivers/r600/r600_shader.c
+++ b/src/gallium/drivers/r600/r600_shader.c
@@ -5060,6 +5060,35 @@ static int tgsi_tex(struct r600_shader_ctx *ctx)
}
 
opcode = ctx->inst_info->op;
+   if (opcode == FETCH_OP_GATHER4 &&
+   inst->TexOffsets[0].File != TGSI_FILE_NULL &&
+   inst->TexOffsets[0].File != TGSI_FILE_IMMEDIATE) {
+   opcode = FETCH_OP_GATHER4_O;
+
+   /* GATHER4_O/GATHER4_C_O use offset values loaded by
+  SET_TEXTURE_OFFSETS instruction. The immediate offset values
+  encoded in the instruction are ignored. */
+   memset(&tex, 0, sizeof(struct r600_bytecode_tex));
+   tex.op = FETCH_OP_SET_TEXTURE_OFFSETS;
+   tex.sampler_id = tgsi_tex_get_src_gpr(ctx, sampler_src_reg);
+   tex.resource_id = tex.sampler_id + R600_MAX_CONST_BUFFERS;
+
+   tex.src_gpr = ctx->file_offset[inst->TexOffsets[0].File] + 
inst->TexOffsets[0].Index;
+

[Mesa-dev] [PATCH] r600g: Implement GL_ARB_texture_query_lod

2014-07-23 Thread Glenn Kennard
Requires Evergreen or later
---
Passes ARB_texture_query_lod piglits, no other regressions,
tested on radeon 6670.

 docs/GL3.txt   |  2 +-
 src/gallium/drivers/r600/r600_pipe.c   |  2 +-
 src/gallium/drivers/r600/r600_shader.c | 13 -
 3 files changed, 14 insertions(+), 3 deletions(-)

diff --git a/docs/GL3.txt b/docs/GL3.txt
index 8128692..d481148 100644
--- a/docs/GL3.txt
+++ b/docs/GL3.txt
@@ -119,7 +119,7 @@ GL 4.0:
   GL_ARB_texture_buffer_object_rgb32   DONE (i965, nvc0, r600, 
radeonsi, softpipe)
   GL_ARB_texture_cube_map_arrayDONE (i965, nv50, nvc0, 
r600, radeonsi, softpipe)
   GL_ARB_texture_gatherDONE (i965, nv50, nvc0, 
radeonsi, r600)
-  GL_ARB_texture_query_lod DONE (i965, nv50, nvc0, 
radeonsi)
+  GL_ARB_texture_query_lod DONE (i965, nv50, nvc0, 
r600, radeonsi)
   GL_ARB_transform_feedback2   DONE (i965, nv50, nvc0, 
r600, radeonsi)
   GL_ARB_transform_feedback3   DONE (i965, nv50, nvc0, 
r600, radeonsi)
 
diff --git a/src/gallium/drivers/r600/r600_pipe.c 
b/src/gallium/drivers/r600/r600_pipe.c
index 5bf9c00..7c50169 100644
--- a/src/gallium/drivers/r600/r600_pipe.c
+++ b/src/gallium/drivers/r600/r600_pipe.c
@@ -304,6 +304,7 @@ static int r600_get_param(struct pipe_screen* pscreen, enum 
pipe_cap param)
case PIPE_CAP_CUBE_MAP_ARRAY:
case PIPE_CAP_TGSI_VS_LAYER_VIEWPORT:
case PIPE_CAP_MAX_TEXTURE_GATHER_COMPONENTS:
+   case PIPE_CAP_TEXTURE_QUERY_LOD:
return family >= CHIP_CEDAR ? 1 : 0;
 
/* Unsupported features. */
@@ -314,7 +315,6 @@ static int r600_get_param(struct pipe_screen* pscreen, enum 
pipe_cap param)
case PIPE_CAP_VERTEX_COLOR_CLAMPED:
case PIPE_CAP_USER_VERTEX_BUFFERS:
case PIPE_CAP_TEXTURE_GATHER_SM5:
-   case PIPE_CAP_TEXTURE_QUERY_LOD:
case PIPE_CAP_SAMPLE_SHADING:
case PIPE_CAP_TEXTURE_GATHER_OFFSETS:
case PIPE_CAP_DRAW_INDIRECT:
diff --git a/src/gallium/drivers/r600/r600_shader.c 
b/src/gallium/drivers/r600/r600_shader.c
index db928f3..499e511 100644
--- a/src/gallium/drivers/r600/r600_shader.c
+++ b/src/gallium/drivers/r600/r600_shader.c
@@ -5106,13 +5106,21 @@ static int tgsi_tex(struct r600_shader_ctx *ctx)
tex.dst_sel_x = (inst->Dst[0].Register.WriteMask & 2) ? 1 : 7;
tex.dst_sel_y = (inst->Dst[0].Register.WriteMask & 4) ? 2 : 7;
tex.dst_sel_z = (inst->Dst[0].Register.WriteMask & 1) ? 0 : 7;
+   tex.dst_sel_w = (inst->Dst[0].Register.WriteMask & 8) ? 3 : 7;
+   }
+   else if (inst->Instruction.Opcode == TGSI_OPCODE_LODQ) {
+   tex.dst_sel_x = (inst->Dst[0].Register.WriteMask & 2) ? 1 : 7;
+   tex.dst_sel_y = (inst->Dst[0].Register.WriteMask & 1) ? 0 : 7;
+   tex.dst_sel_z = 7;
+   tex.dst_sel_w = 7;
}
else {
tex.dst_sel_x = (inst->Dst[0].Register.WriteMask & 1) ? 0 : 7;
tex.dst_sel_y = (inst->Dst[0].Register.WriteMask & 2) ? 1 : 7;
tex.dst_sel_z = (inst->Dst[0].Register.WriteMask & 4) ? 2 : 7;
+   tex.dst_sel_w = (inst->Dst[0].Register.WriteMask & 8) ? 3 : 7;
}
-   tex.dst_sel_w = (inst->Dst[0].Register.WriteMask & 8) ? 3 : 7;
+
 
if (inst->Instruction.Opcode == TGSI_OPCODE_TXQ_LZ) {
tex.src_sel_x = 4;
@@ -6669,6 +6677,7 @@ static struct r600_shader_tgsi_instruction 
r600_shader_tgsi_instruction[] = {
{TGSI_OPCODE_IMUL_HI, 0, ALU_OP0_NOP, tgsi_unsupported},
{TGSI_OPCODE_UMUL_HI, 0, ALU_OP0_NOP, tgsi_unsupported},
{TGSI_OPCODE_TG4,   0, FETCH_OP_GATHER4, tgsi_unsupported},
+   {TGSI_OPCODE_LODQ,  0, FETCH_OP_GET_LOD, tgsi_unsupported},
{TGSI_OPCODE_LAST,  0, ALU_OP0_NOP, tgsi_unsupported},
 };
 
@@ -6864,6 +6873,7 @@ static struct r600_shader_tgsi_instruction 
eg_shader_tgsi_instruction[] = {
{TGSI_OPCODE_IMUL_HI, 0, ALU_OP0_NOP, tgsi_unsupported},
{TGSI_OPCODE_UMUL_HI, 0, ALU_OP0_NOP, tgsi_unsupported},
{TGSI_OPCODE_TG4,   0, FETCH_OP_GATHER4, tgsi_tex},
+   {TGSI_OPCODE_LODQ,  0, FETCH_OP_GET_LOD, tgsi_tex},
{TGSI_OPCODE_LAST,  0, ALU_OP0_NOP, tgsi_unsupported},
 };
 
@@ -7060,5 +7070,6 @@ static struct r600_shader_tgsi_instruction 
cm_shader_tgsi_instruction[] = {
{TGSI_OPCODE_IMUL_HI, 0, ALU_OP0_NOP, tgsi_unsupported},
{TGSI_OPCODE_UMUL_HI, 0, ALU_OP0_NOP, tgsi_unsupported},
{TGSI_OPCODE_TG4,   0, FETCH_OP_GATHER4, tgsi_tex},
+   {TGSI_OPCODE_LODQ,  0, FETCH_OP_GET_LOD, tgsi_tex},
{TGSI_OPCODE_LAST,  0, ALU_OP0_NOP, tgsi_unsupported},
 };
-- 
1.8.3.2

___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/mesa-de

[Mesa-dev] [PATCH] r600g: Add IMUL_HI/UMUL_HI support

2014-07-23 Thread Glenn Kennard
Fixes fs-imulExtended, fs-imulExtended-only-msb, fs-umulExtended,
fs-umulExtended-only-msb piglit tests.
---
Tested on radeon 6670

 src/gallium/drivers/r600/r600_shader.c | 12 ++--
 1 file changed, 6 insertions(+), 6 deletions(-)

diff --git a/src/gallium/drivers/r600/r600_shader.c 
b/src/gallium/drivers/r600/r600_shader.c
index db928f3..6ba9c0f 100644
--- a/src/gallium/drivers/r600/r600_shader.c
+++ b/src/gallium/drivers/r600/r600_shader.c
@@ -,8 +,8 @@ static struct r600_shader_tgsi_instruction 
r600_shader_tgsi_instruction[] = {
{TGSI_OPCODE_TEX2,  0, FETCH_OP_SAMPLE, tgsi_tex},
{TGSI_OPCODE_TXB2,  0, FETCH_OP_SAMPLE_LB, tgsi_tex},
{TGSI_OPCODE_TXL2,  0, FETCH_OP_SAMPLE_L, tgsi_tex},
-   {TGSI_OPCODE_IMUL_HI, 0, ALU_OP0_NOP, tgsi_unsupported},
-   {TGSI_OPCODE_UMUL_HI, 0, ALU_OP0_NOP, tgsi_unsupported},
+   {TGSI_OPCODE_IMUL_HI,   0, ALU_OP2_MULHI_INT, tgsi_op2_trans},
+   {TGSI_OPCODE_UMUL_HI,   0, ALU_OP2_MULHI_UINT, tgsi_op2_trans},
{TGSI_OPCODE_TG4,   0, FETCH_OP_GATHER4, tgsi_unsupported},
{TGSI_OPCODE_LAST,  0, ALU_OP0_NOP, tgsi_unsupported},
 };
@@ -6861,8 +6861,8 @@ static struct r600_shader_tgsi_instruction 
eg_shader_tgsi_instruction[] = {
{TGSI_OPCODE_TEX2,  0, FETCH_OP_SAMPLE, tgsi_tex},
{TGSI_OPCODE_TXB2,  0, FETCH_OP_SAMPLE_LB, tgsi_tex},
{TGSI_OPCODE_TXL2,  0, FETCH_OP_SAMPLE_L, tgsi_tex},
-   {TGSI_OPCODE_IMUL_HI, 0, ALU_OP0_NOP, tgsi_unsupported},
-   {TGSI_OPCODE_UMUL_HI, 0, ALU_OP0_NOP, tgsi_unsupported},
+   {TGSI_OPCODE_IMUL_HI,   0, ALU_OP2_MULHI_INT, tgsi_op2_trans},
+   {TGSI_OPCODE_UMUL_HI,   0, ALU_OP2_MULHI_UINT, tgsi_op2_trans},
{TGSI_OPCODE_TG4,   0, FETCH_OP_GATHER4, tgsi_tex},
{TGSI_OPCODE_LAST,  0, ALU_OP0_NOP, tgsi_unsupported},
 };
@@ -7057,8 +7057,8 @@ static struct r600_shader_tgsi_instruction 
cm_shader_tgsi_instruction[] = {
{TGSI_OPCODE_TEX2,  0, FETCH_OP_SAMPLE, tgsi_tex},
{TGSI_OPCODE_TXB2,  0, FETCH_OP_SAMPLE_LB, tgsi_tex},
{TGSI_OPCODE_TXL2,  0, FETCH_OP_SAMPLE_L, tgsi_tex},
-   {TGSI_OPCODE_IMUL_HI, 0, ALU_OP0_NOP, tgsi_unsupported},
-   {TGSI_OPCODE_UMUL_HI, 0, ALU_OP0_NOP, tgsi_unsupported},
+   {TGSI_OPCODE_IMUL_HI,   0, ALU_OP2_MULHI_INT, cayman_mul_int_instr},
+   {TGSI_OPCODE_UMUL_HI,   0, ALU_OP2_MULHI_UINT, cayman_mul_int_instr},
{TGSI_OPCODE_TG4,   0, FETCH_OP_GATHER4, tgsi_tex},
{TGSI_OPCODE_LAST,  0, ALU_OP0_NOP, tgsi_unsupported},
 };
-- 
1.8.3.2

___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/mesa-dev


[Mesa-dev] [PATCH] r600g: Implement gpu_shader5 integer ops

2014-07-23 Thread Glenn Kennard
---
Together with separate MUL_HI/UMUL_HI patch this passes piglit
ARB_gpu_shader5 integer tests.

This patch trivially depends on r600g-Implement-GL_ARB_texture_query_lod
for the TGSI_OPCODE_LODQ table entries.

 docs/GL3.txt   |   2 +-
 src/gallium/drivers/r600/r600_shader.c | 190 +
 2 files changed, 191 insertions(+), 1 deletion(-)

diff --git a/docs/GL3.txt b/docs/GL3.txt
index d481148..603413f 100644
--- a/docs/GL3.txt
+++ b/docs/GL3.txt
@@ -105,7 +105,7 @@ GL 4.0:
   - Dynamically uniform UBO array indices  started (Chris)
   - Implicit signed -> unsigned conversionsDONE
   - Fused multiply-add DONE (i965, nvc0)
-  - Packing/bitfield/conversion functions  DONE (i965, nvc0)
+  - Packing/bitfield/conversion functions  DONE (i965, nvc0, r600)
   - Enhanced textureGather DONE (i965, nvc0, 
radeonsi)
   - Geometry shader instancing DONE (i965, nvc0)
   - Geometry shader multiple streams   DONE (i965, nvc0)
diff --git a/src/gallium/drivers/r600/r600_shader.c 
b/src/gallium/drivers/r600/r600_shader.c
index 499e511..9abfee1 100644
--- a/src/gallium/drivers/r600/r600_shader.c
+++ b/src/gallium/drivers/r600/r600_shader.c
@@ -4192,6 +4192,172 @@ static int tgsi_ssg(struct r600_shader_ctx *ctx)
return 0;
 }
 
+static int tgsi_bfi(struct r600_shader_ctx *ctx)
+{
+   struct tgsi_full_instruction *inst = 
&ctx->parse.FullToken.FullInstruction;
+   struct r600_bytecode_alu alu;
+   int i, r, t1, t2;
+
+   unsigned write_mask = inst->Dst[0].Register.WriteMask;
+   int last_inst = tgsi_last_instruction(write_mask);
+
+   t1 = ctx->temp_reg;
+
+   for (i = 0; i < 4; i++) {
+   if (!(write_mask & (1src[2], i);
+
+   r = r600_bytecode_add_alu(ctx->bc, &alu);
+   if (r)
+   return r;
+   }
+
+   t2 = r600_get_temp(ctx);
+
+   for (i = 0; i < 4; i++) {
+   if (!(write_mask & (1src[2], i);
+
+   r = r600_bytecode_add_alu(ctx->bc, &alu);
+   if (r)
+   return r;
+   }
+
+   for (i = 0; i < 4; i++) {
+   if (!(write_mask & (1src[0], i);
+
+   r = r600_bytecode_add_alu(ctx->bc, &alu);
+   if (r)
+   return r;
+   }
+
+   return 0;
+}
+
+static int tgsi_msb(struct r600_shader_ctx *ctx)
+{
+   struct tgsi_full_instruction *inst = 
&ctx->parse.FullToken.FullInstruction;
+   struct r600_bytecode_alu alu;
+   int i, r, t1, t2;
+
+   unsigned write_mask = inst->Dst[0].Register.WriteMask;
+   int last_inst = tgsi_last_instruction(write_mask);
+
+   assert(ctx->inst_info->op == ALU_OP1_FFBH_INT ||
+   ctx->inst_info->op == ALU_OP1_FFBH_UINT);
+
+   t1 = ctx->temp_reg;
+
+   /* bit position is indexed from lsb by TGSI, and from msb by the 
hardware */
+   for (i = 0; i < 4; i++) {
+   if (!(write_mask & (1op;
+   alu.dst.sel = t1;
+   alu.dst.chan = i;
+   alu.dst.write = 1;
+   alu.last = i == last_inst;
+
+   r600_bytecode_src(&alu.src[0], &ctx->src[0], i);
+
+   r = r600_bytecode_add_alu(ctx->bc, &alu);
+   if (r)
+   return r;
+   }
+
+   t2 = r600_get_temp(ctx);
+
+   for (i = 0; i < 4; i++) {
+   if (!(write_mask & (1Dst[0], i, &alu.dst);
+   alu.dst.chan = i;
+   alu.dst.write = 1;
+   alu.last = i == last_inst;
+
+   alu.src[0].sel = t1;
+   alu.src[0].chan = i;
+   alu.src[1].sel = t2;
+   alu.src[1].chan = i;
+   alu.src[2].sel = t1;
+   alu.src[2].chan = i;
+
+   r = r600_bytecode_add_alu(ctx->bc, &alu);
+   if (r)
+   return r;
+   }
+
+   return 0;
+}
+
 static int tgsi_helper_copy(struct r600_shader_ctx *ctx, struct 
tgsi_full_instructio

[Mesa-dev] [PATCH] r600g: gpu_shader5 gl_SampleMaskIn support

2014-07-23 Thread Glenn Kennard
Map TGSI_SEMANTIC_SAMPLEMASK to register/component.
Enable face register when sample mask is needed by shader.
Requires Evergreen/Cayman
---
I think the rest of the sample related bits in gpu_shader5 are
from GL_ARB_sample_shading which isn't implemented yet in r600.

Passes samplemaskin-basic piglit, no regressions, on radeon 6670

 docs/GL3.txt   |  2 +-
 src/gallium/drivers/r600/evergreen_state.c | 10 ++--
 src/gallium/drivers/r600/r600_shader.c | 37 ++
 3 files changed, 41 insertions(+), 8 deletions(-)

diff --git a/docs/GL3.txt b/docs/GL3.txt
index 8128692..53e19e0 100644
--- a/docs/GL3.txt
+++ b/docs/GL3.txt
@@ -109,7 +109,7 @@ GL 4.0:
   - Enhanced textureGather DONE (i965, nvc0, 
radeonsi)
   - Geometry shader instancing DONE (i965, nvc0)
   - Geometry shader multiple streams   DONE (i965, nvc0)
-  - Enhanced per-sample shadingDONE (i965)
+  - Enhanced per-sample shadingDONE (i965, r600)
   - Interpolation functionsDONE (i965)
   - New overload resolution rules  DONE
   GL_ARB_gpu_shader_fp64   started (Dave)
diff --git a/src/gallium/drivers/r600/evergreen_state.c 
b/src/gallium/drivers/r600/evergreen_state.c
index 8f5ba5f..839d2ae 100644
--- a/src/gallium/drivers/r600/evergreen_state.c
+++ b/src/gallium/drivers/r600/evergreen_state.c
@@ -2843,8 +2843,14 @@ void evergreen_update_ps_state(struct pipe_context *ctx, 
struct r600_pipe_shader
   POSITION goes via GPRs from the SC so isn't counted */
if (rshader->input[i].name == TGSI_SEMANTIC_POSITION)
pos_index = i;
-   else if (rshader->input[i].name == TGSI_SEMANTIC_FACE)
-   face_index = i;
+   else if (rshader->input[i].name == TGSI_SEMANTIC_FACE) {
+   if (face_index == -1)
+   face_index = i;
+   }
+   else if (rshader->input[i].name == TGSI_SEMANTIC_SAMPLEMASK) {
+   if (face_index == -1)
+   face_index = i; /* lives in same register, same 
enable bit */
+   }
else {
ninterp++;
if (rshader->input[i].interpolate == 
TGSI_INTERPOLATE_LINEAR)
diff --git a/src/gallium/drivers/r600/r600_shader.c 
b/src/gallium/drivers/r600/r600_shader.c
index db928f3..c8ab4dd 100644
--- a/src/gallium/drivers/r600/r600_shader.c
+++ b/src/gallium/drivers/r600/r600_shader.c
@@ -287,7 +287,9 @@ struct r600_shader_ctx {
boolean input_linear;
boolean input_perspective;
int num_interp_gpr;
+   /* evergreen/cayman also store sample mask in face register */
int face_gpr;
+   boolean has_samplemask;
int colors_used;
boolean clip_vertex_write;
unsignedcv_output;
@@ -498,7 +500,8 @@ static int r600_spi_sid(struct r600_shader_io * io)
if (name == TGSI_SEMANTIC_POSITION ||
name == TGSI_SEMANTIC_PSIZE ||
name == TGSI_SEMANTIC_EDGEFLAG ||
-   name == TGSI_SEMANTIC_FACE)
+   name == TGSI_SEMANTIC_FACE ||
+   name == TGSI_SEMANTIC_SAMPLEMASK)
index = 0;
else {
if (name == TGSI_SEMANTIC_GENERIC) {
@@ -585,7 +588,8 @@ static int tgsi_declaration(struct r600_shader_ctx *ctx)
ctx->shader->input[i].spi_sid = 
r600_spi_sid(&ctx->shader->input[i]);
switch (ctx->shader->input[i].name) {
case TGSI_SEMANTIC_FACE:
-   ctx->face_gpr = ctx->shader->input[i].gpr;
+   if (ctx->face_gpr == -1)
+   ctx->face_gpr = 
ctx->shader->input[i].gpr;
break;
case TGSI_SEMANTIC_COLOR:
ctx->colors_used++;
@@ -675,7 +679,14 @@ static int tgsi_declaration(struct r600_shader_ctx *ctx)
break;
 
case TGSI_FILE_SYSTEM_VALUE:
-   if (d->Semantic.Name == TGSI_SEMANTIC_INSTANCEID) {
+   if (d->Semantic.Name == TGSI_SEMANTIC_SAMPLEMASK) {
+   ctx->has_samplemask = true;
+   /* lives in Front Face GPR */
+   if (ctx->face_gpr == -1)
+   ctx->face_gpr = 
ctx->file_offset[TGSI_FILE_SYSTEM_VALUE] + d->Range.First;
+   break;
+   }
+   else if (d->Semantic.Name == TGSI_SEMAN

[Mesa-dev] [PATCH] r600g: Implement BPTC texture support

2014-07-23 Thread Glenn Kennard
Signed-off-by: Glenn Kennard 
---
This patch depends on Ilia Mirkin's "nvc0: add BPTC format support"
and Neil Robert's core BPTC support patches.

 src/gallium/drivers/r600/r600_state_common.c | 23 +++
 1 file changed, 23 insertions(+)

diff --git a/src/gallium/drivers/r600/r600_state_common.c 
b/src/gallium/drivers/r600/r600_state_common.c
index 8c37d0d..2f39df3 100644
--- a/src/gallium/drivers/r600/r600_state_common.c
+++ b/src/gallium/drivers/r600/r600_state_common.c
@@ -1967,6 +1967,29 @@ uint32_t r600_translate_texformat(struct pipe_screen 
*screen,
}
}
 
+   if (desc->layout == UTIL_FORMAT_LAYOUT_BPTC) {
+   if (!enable_s3tc)
+   goto out_unknown;
+
+   if (rscreen->b.chip_class < EVERGREEN)
+   goto out_unknown;
+
+   switch (format) {
+   case PIPE_FORMAT_BPTC_RGBA_UNORM:
+   case PIPE_FORMAT_BPTC_SRGBA_UNORM:
+   result = FMT_BC7;
+   is_srgb_valid = TRUE;
+   goto out_word4;
+   case PIPE_FORMAT_BPTC_RGB_FLOAT:
+   case PIPE_FORMAT_BPTC_RGB_UFLOAT:
+   result = FMT_BC6;
+   is_srgb_valid = TRUE;
+   goto out_word4;
+   default:
+   goto out_unknown;
+   }
+   }
+
if (desc->layout == UTIL_FORMAT_LAYOUT_SUBSAMPLED) {
switch (format) {
case PIPE_FORMAT_R8G8_B8G8_UNORM:
-- 
1.8.3.2

___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/mesa-dev


[Mesa-dev] [PATCH v2] r600g: Implement BPTC texture support

2014-07-26 Thread Glenn Kennard
Signed-off-by: Glenn Kennard 
---
This patch depends on Ilia Mirkin's "nvc0: add BPTC format support"
and Neil Robert's core BPTC support patches.

Changes since patch v1:
Remove srgb flag from float formats.
Set RGB sign bits for BPTC_RGB_FLOAT.

This passes piglit, including newly written test for
float variants which will be sent to piglit mailing list
separately.

 src/gallium/drivers/r600/r600_state_common.c | 24 
 1 file changed, 24 insertions(+)

diff --git a/src/gallium/drivers/r600/r600_state_common.c 
b/src/gallium/drivers/r600/r600_state_common.c
index 8c37d0d..98a2382 100644
--- a/src/gallium/drivers/r600/r600_state_common.c
+++ b/src/gallium/drivers/r600/r600_state_common.c
@@ -1967,6 +1967,30 @@ uint32_t r600_translate_texformat(struct pipe_screen 
*screen,
}
}
 
+   if (desc->layout == UTIL_FORMAT_LAYOUT_BPTC) {
+   if (!enable_s3tc)
+   goto out_unknown;
+
+   if (rscreen->b.chip_class < EVERGREEN)
+   goto out_unknown;
+
+   switch (format) {
+   case PIPE_FORMAT_BPTC_RGBA_UNORM:
+   case PIPE_FORMAT_BPTC_SRGBA_UNORM:
+   result = FMT_BC7;
+   is_srgb_valid = TRUE;
+   goto out_word4;
+   case PIPE_FORMAT_BPTC_RGB_FLOAT:
+   word4 |= sign_bit[0] | sign_bit[1] | 
sign_bit[2];
+   /* fall through */
+   case PIPE_FORMAT_BPTC_RGB_UFLOAT:
+   result = FMT_BC6;
+   goto out_word4;
+   default:
+   goto out_unknown;
+   }
+   }
+
if (desc->layout == UTIL_FORMAT_LAYOUT_SUBSAMPLED) {
switch (format) {
case PIPE_FORMAT_R8G8_B8G8_UNORM:
-- 
1.8.3.2

___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/mesa-dev


Re: [Mesa-dev] [PATCH 1/5] mesa: add helper _mesa_is_format_etc2

2014-08-03 Thread Glenn Kennard

On Sun, 03 Aug 2014 14:40:36 +0200, Marek Olšák  wrote:


From: Marek Olšák 

---
 src/mesa/main/formats.c | 25 +
 src/mesa/main/formats.h |  3 +++
 2 files changed, 28 insertions(+)

diff --git a/src/mesa/main/formats.c b/src/mesa/main/formats.c
index 1f20a9a..b830dbc 100644
--- a/src/mesa/main/formats.c
+++ b/src/mesa/main/formats.c
@@ -2068,6 +2068,31 @@ _mesa_get_format_color_encoding(mesa_format  
format)

/**
+ * Return TRUE if format is an ETC2 compressed format specified
+ * by GL_ARB_ES3_compatibility.
+ */
+GLboolean
+_mesa_is_format_etc2(mesa_format format)
+{
+   switch (format) {
+   case MESA_FORMAT_ETC2_RGB8:
+   case MESA_FORMAT_ETC2_SRGB8:
+   case MESA_FORMAT_ETC2_RGBA8_EAC:
+   case MESA_FORMAT_ETC2_SRGB8_ALPHA8_EAC:
+   case MESA_FORMAT_ETC2_R11_EAC:
+   case MESA_FORMAT_ETC2_RG11_EAC:
+   case MESA_FORMAT_ETC2_SIGNED_R11_EAC:
+   case MESA_FORMAT_ETC2_SIGNED_RG11_EAC:
+   case MESA_FORMAT_ETC2_RGB8_PUNCHTHROUGH_ALPHA1:
+   case MESA_FORMAT_ETC2_SRGB8_PUNCHTHROUGH_ALPHA1:
+  return GL_TRUE;
+   default:
+  return GL_FALSE;
+   }
+}
+
+
+/**
  * For an sRGB format, return the corresponding linear color space  
format.

  * For non-sRGB formats, return the format as-is.
  */
diff --git a/src/mesa/main/formats.h b/src/mesa/main/formats.h
index dc50bc8..166cd22 100644
--- a/src/mesa/main/formats.h
+++ b/src/mesa/main/formats.h
@@ -446,6 +446,9 @@ _mesa_is_format_signed(mesa_format format);
 extern GLboolean
 _mesa_is_format_integer(mesa_format format);
+extern GLboolean
+_mesa_is_format_etc2(mesa_format format);
+
 extern GLenum
 _mesa_get_format_color_encoding(mesa_format format);



Reviewed-by: Glenn Kennard 
___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/mesa-dev


Re: [Mesa-dev] [PATCH 5/5] st/mesa: convert the ETC1 format to an uncompressed one if unsupported

2014-08-03 Thread Glenn Kennard
|| !strb->surface || !stImage->pt) {
   debug_printf("%s: null strb or stImage\n", __FUNCTION__);
diff --git a/src/mesa/state_tracker/st_context.c  
b/src/mesa/state_tracker/st_context.c

index c805a09..ccd19f3 100644
--- a/src/mesa/state_tracker/st_context.c
+++ b/src/mesa/state_tracker/st_context.c
@@ -189,6 +189,9 @@ st_create_context_priv( struct gl_context *ctx,  
struct pipe_context *pipe,

st->has_stencil_export =
   screen->get_param(screen, PIPE_CAP_SHADER_STENCIL_EXPORT);
st->has_shader_model3 = screen->get_param(screen, PIPE_CAP_SM3);
+   st->has_etc1 = screen->is_format_supported(screen,  
PIPE_FORMAT_ETC1_RGB8,

+  PIPE_TEXTURE_2D, 0,
+  PIPE_BIND_SAMPLER_VIEW);
st->prefer_blit_based_texture_transfer = screen->get_param(screen,
   PIPE_CAP_PREFER_BLIT_BASED_TEXTURE_TRANSFER);
diff --git a/src/mesa/state_tracker/st_context.h  
b/src/mesa/state_tracker/st_context.h

index 361a24b..6d572bd 100644
--- a/src/mesa/state_tracker/st_context.h
+++ b/src/mesa/state_tracker/st_context.h
@@ -86,6 +86,7 @@ struct st_context
boolean has_stencil_export; /**< can do shader stencil export? */
boolean has_time_elapsed;
boolean has_shader_model3;
+   boolean has_etc1;
boolean prefer_blit_based_texture_transfer;
   boolean needs_texcoord_semantic;
diff --git a/src/mesa/state_tracker/st_extensions.c  
b/src/mesa/state_tracker/st_extensions.c

index 3974adb..60aa8cc 100644
--- a/src/mesa/state_tracker/st_extensions.c
+++ b/src/mesa/state_tracker/st_extensions.c
@@ -513,7 +513,9 @@ void st_init_extensions(struct st_context *st)
 GL_TRUE }, /* at least one format must be supported */
  { { o(OES_compressed_ETC1_RGB8_texture) },
-{ PIPE_FORMAT_ETC1_RGB8 } },
+{ PIPE_FORMAT_ETC1_RGB8,
+  PIPE_FORMAT_R8G8B8A8_UNORM },
+GL_TRUE }, /* at least one format must be supported */
  { { o(ARB_stencil_texturing) },
 { PIPE_FORMAT_X24S8_UINT,
diff --git a/src/mesa/state_tracker/st_format.c  
b/src/mesa/state_tracker/st_format.c

index 3d822a5..ae71dd7 100644
--- a/src/mesa/state_tracker/st_format.c
+++ b/src/mesa/state_tracker/st_format.c
@@ -323,8 +323,10 @@ st_mesa_format_to_pipe_format(struct st_context  
*st, mesa_format mesaFormat)

case MESA_FORMAT_LA_LATC2_SNORM:
   return PIPE_FORMAT_LATC2_SNORM;
+   /* The destination RGBA format mustn't be changed, because it's also
+* a destination format of the unpack/decompression function. */
case MESA_FORMAT_ETC1_RGB8:
-  return PIPE_FORMAT_ETC1_RGB8;
+  return st->has_etc1 ? PIPE_FORMAT_ETC1_RGB8 :  
PIPE_FORMAT_R8G8B8A8_UNORM;

   /* signed normalized formats */
case MESA_FORMAT_R_SNORM8:
@@ -801,9 +803,11 @@ test_format_conversion(struct st_context *st)
   /* test all Mesa formats */
for (i = 1; i < MESA_FORMAT_COUNT; i++) {
-  /* ETC2 formats are translated differently, skip them. */
+  /* ETC formats are translated differently, skip them. */
   if (_mesa_is_format_etc2(i))
  continue;
+  if (i == MESA_FORMAT_ETC1_RGB8 && !st->has_etc1)
+ continue;
  enum pipe_format pf = st_mesa_format_to_pipe_format(st, i);
   if (pf != PIPE_FORMAT_NONE) {
@@ -815,6 +819,11 @@ test_format_conversion(struct st_context *st)
/* Test all Gallium formats */
for (i = 1; i < PIPE_FORMAT_COUNT; i++) {
   mesa_format mf = st_pipe_format_to_mesa_format(i);
+
+  /* ETC formats are translated differently, skip them. */
+  if (i == PIPE_FORMAT_ETC1_RGB8 && !st->has_etc1)
+ continue;
+
   if (mf != MESA_FORMAT_NONE) {
  enum pipe_format pf = st_mesa_format_to_pipe_format(st, mf);
  assert(pf == i);


Reviewed-by: Glenn Kennard 
___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/mesa-dev


Re: [Mesa-dev] [PATCH 4/5] st/mesa: add st_context parameter to st_mesa_format_to_pipe_format

2014-08-03 Thread Glenn Kennard
 baseFormat && iformat == basePackFormat) {
- pFormat = st_choose_matching_format(st->pipe->screen, bindings,
- format, type,
+ pFormat = st_choose_matching_format(st, bindings, format, type,
  ctx->Unpack.SwapBytes);
 if (pFormat != PIPE_FORMAT_NONE)
 return st_pipe_format_to_mesa_format(pFormat);
 /* try choosing format again, this time without render target  
bindings */

- pFormat = st_choose_matching_format(st->pipe->screen,
- PIPE_BIND_SAMPLER_VIEW,
+ pFormat = st_choose_matching_format(st, PIPE_BIND_SAMPLER_VIEW,
  format, type,
  ctx->Unpack.SwapBytes);
  if (pFormat != PIPE_FORMAT_NONE)
diff --git a/src/mesa/state_tracker/st_format.h  
b/src/mesa/state_tracker/st_format.h

index ce1e230..90e00e8 100644
--- a/src/mesa/state_tracker/st_format.h
+++ b/src/mesa/state_tracker/st_format.h
@@ -41,7 +41,7 @@ struct pipe_screen;
extern enum pipe_format
-st_mesa_format_to_pipe_format(mesa_format mesaFormat);
+st_mesa_format_to_pipe_format(struct st_context *st, mesa_format  
mesaFormat);

extern mesa_format
 st_pipe_format_to_mesa_format(enum pipe_format pipeFormat);
@@ -58,7 +58,7 @@ st_choose_renderbuffer_format(struct st_context *st,
   GLenum internalFormat, unsigned  
sample_count);

extern enum pipe_format
-st_choose_matching_format(struct pipe_screen *screen, unsigned bind,
+st_choose_matching_format(struct st_context *st, unsigned bind,
  GLenum format, GLenum type, GLboolean swapBytes);
extern mesa_format
diff --git a/src/mesa/state_tracker/st_texture.c  
b/src/mesa/state_tracker/st_texture.c

index 9f57cfb..af9b767 100644
--- a/src/mesa/state_tracker/st_texture.c
+++ b/src/mesa/state_tracker/st_texture.c
@@ -197,7 +197,8 @@ st_gl_texture_dims_to_pipe_dims(GLenum texture,
  * Check if a texture image can be pulled into a unified mipmap texture.
  */
 GLboolean
-st_texture_match_image(const struct pipe_resource *pt,
+st_texture_match_image(struct st_context *st,
+   const struct pipe_resource *pt,
const struct gl_texture_image *image)
 {
GLuint ptWidth, ptHeight, ptDepth, ptLayers;
@@ -209,7 +210,7 @@ st_texture_match_image(const struct pipe_resource  
*pt,
   /* Check if this image's format matches the established texture's  
format.

 */
-   if (st_mesa_format_to_pipe_format(image->TexFormat) != pt->format)
+   if (st_mesa_format_to_pipe_format(st, image->TexFormat) !=  
pt->format)

   return GL_FALSE;
   st_gl_texture_dims_to_pipe_dims(image->TexObject->Target,
diff --git a/src/mesa/state_tracker/st_texture.h  
b/src/mesa/state_tracker/st_texture.h

index 04b886e..ce1cf8b 100644
--- a/src/mesa/state_tracker/st_texture.h
+++ b/src/mesa/state_tracker/st_texture.h
@@ -199,7 +199,8 @@ st_gl_texture_dims_to_pipe_dims(GLenum texture,
 /* Check if an image fits into an existing texture object.
  */
 extern GLboolean
-st_texture_match_image(const struct pipe_resource *pt,
+st_texture_match_image(struct st_context *st,
+   const struct pipe_resource *pt,
const struct gl_texture_image *image);
/* Return a pointer to an image within a texture.  Return image stride as


Reviewed-by: Glenn Kennard 
___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/mesa-dev


Re: [Mesa-dev] [PATCH 2/5] st/mesa: add support for ETC2 formats

2014-08-03 Thread Glenn Kennard

On Sun, 03 Aug 2014 14:40:37 +0200, Marek Olšák  wrote:


From: Marek Olšák 

The formats are emulated by translating them into plain uncompressed
formats, because I don't know of any hardware which supports them.

This is required for GLES 3.0 and ARB_ES3_compatibility (GL 4.3).
---
 src/mesa/state_tracker/st_cb_texture.c | 54  
--

 src/mesa/state_tracker/st_format.c | 24 +++
 src/mesa/state_tracker/st_texture.c| 11 +++
 src/mesa/state_tracker/st_texture.h| 12 +++-
 4 files changed, 93 insertions(+), 8 deletions(-)

diff --git a/src/mesa/state_tracker/st_cb_texture.c  
b/src/mesa/state_tracker/st_cb_texture.c

index aa6b05f..88c4b25 100644
--- a/src/mesa/state_tracker/st_cb_texture.c
+++ b/src/mesa/state_tracker/st_cb_texture.c
@@ -37,6 +37,7 @@
 #include "main/pbo.h"
 #include "main/pixeltransfer.h"
 #include "main/texcompress.h"
+#include "main/texcompress_etc.h"
 #include "main/texgetimage.h"
 #include "main/teximage.h"
 #include "main/texobj.h"
@@ -207,8 +208,31 @@ st_MapTextureImage(struct gl_context *ctx,
map = st_texture_image_map(st, stImage, pipeMode, x, y, slice, w, h,  
1,

   &transfer);
if (map) {
-  *mapOut = map;
-  *rowStrideOut = transfer->stride;
+  if (_mesa_is_format_etc2(texImage->TexFormat)) {
+ /* ETC isn't supported by gallium and it's represented


Freedreno could definitely support it natively, at least for a3xx. Though  
it can cross that bridge once it gets there i suppose.


+  * by uncompressed formats. Only write transfers with  
precompressed

+  * data are supported by ES3, which makes this really simple.
+  *
+  * Just create a temporary storage where the ETC texture will
+  * be stored. It will be decompressed in the Unmap function.
+  */


Question: Is it possible to create a permanent map of a texture in GL?


+ unsigned z = transfer->box.z;


Nitpick: transfer->box.z is a signed int.

+ struct st_texture_image_transfer *itransfer =  
&stImage->transfer[z];

+
+ itransfer->temp_data =
+malloc(_mesa_format_image_size(texImage->TexFormat, w, h,  
1));

+ itransfer->temp_stride =
+_mesa_format_row_stride(texImage->TexFormat, w);
+ itransfer->map = map;
+
+ *mapOut = itransfer->temp_data;
+ *rowStrideOut = itransfer->temp_stride;
+  }
+  else {
+ /* supported mapping */
+ *mapOut = map;
+ *rowStrideOut = transfer->stride;
+  }
}
else {
   *mapOut = NULL;
@@ -225,6 +249,26 @@ st_UnmapTextureImage(struct gl_context *ctx,
 {
struct st_context *st = st_context(ctx);
struct st_texture_image *stImage  = st_texture_image(texImage);
+
+   if (_mesa_is_format_etc2(texImage->TexFormat)) {
+  /* Decompress the ETC texture to the mapped one. */
+  unsigned z = slice + stImage->base.Face;


int

+  struct st_texture_image_transfer *itransfer =  
&stImage->transfer[z];

+  struct pipe_transfer *transfer = itransfer->transfer;
+
+  assert(z == transfer->box.z);
+
+  _mesa_unpack_etc2_format(itransfer->map, transfer->stride,
+   itransfer->temp_data,  
itransfer->temp_stride,
+   transfer->box.width,  
transfer->box.height,

+   texImage->TexFormat);


Is the ETC source data always an integer number of blocks?


+
+  free(itransfer->temp_data);
+  itransfer->temp_data = NULL;
+  itransfer->temp_stride = 0;
+  itransfer->map = 0;
+   }
+
st_texture_image_unmap(st, stImage, slice);
 }
@@ -613,6 +657,8 @@ st_TexSubImage(struct gl_context *ctx, GLuint dims,
unsigned bind;
GLubyte *map;
+   assert(!_mesa_is_format_etc2(texImage->TexFormat));
+
if (!st->prefer_blit_based_texture_transfer) {
   goto fallback;
}
@@ -870,6 +916,8 @@ st_GetTexImage(struct gl_context * ctx,
ubyte *map = NULL;
boolean done = FALSE;
+   assert(!_mesa_is_format_etc2(texImage->TexFormat));
+
if (!st->prefer_blit_based_texture_transfer &&
!_mesa_is_format_compressed(texImage->TexFormat)) {
   /* Try to avoid the fallback if we're doing texture decompression  
here */
@@ -1306,6 +1354,8 @@ st_CopyTexSubImage(struct gl_context *ctx, GLuint  
dims,

unsigned bind;
GLint srcY0, srcY1;
+   assert(!_mesa_is_format_etc2(texImage->TexFormat));
+
if (!strb || !strb->surface || !stImage->pt) {
   debug_printf("%s: null strb or stImage\n", __FUNCTION__);
   return;
diff --git a/src/mesa/state_tracker/st_format.c  
b/src/mesa/state_tracker/st_format.c

index 409079b..ff3f494 100644
--- a/src/mesa/state_tracker/st_format.c
+++ b/src/mesa/state_tracker/st_format.c
@@ -402,6 +402,26 @@ st_mesa_format_to_pipe_format(mesa_format  
mesaFormat)

case MESA_FORMAT_B8G8R8X8_SRGB:
   return PIPE_FORMAT_B8G8R8X8_SRGB;
+   /* ETC2 formats are emulat

Re: [Mesa-dev] [PATCH] r600g: fix op3 abs issue

2015-03-31 Thread Glenn Kennard
tgsi_make_src_for_op3(ctx, temp_regs[1], i, &alu.src[1],  
&ctx->src[1]);

if (r)
return r;
alu.src[2].sel = ctx->temp_reg;
@@ -6109,8 +6119,15 @@ static int tgsi_cmp(struct r600_shader_ctx *ctx)
 {
 	struct tgsi_full_instruction *inst =  
&ctx->parse.FullToken.FullInstruction;

struct r600_bytecode_alu alu;
-   int i, r;
+   int i, r, j;
int lasti = tgsi_last_instruction(inst->Dst[0].Register.WriteMask);
+   int temp_regs[3];
+
+   for (j = 0; j < inst->Instruction.NumSrcRegs; j++) {
+   temp_regs[j] = 0;
+   if (ctx->src[j].abs)
+   temp_regs[j] = r600_get_temp(ctx);
+   }
for (i = 0; i < lasti + 1; i++) {
if (!(inst->Dst[0].Register.WriteMask & (1 << i)))
@@ -6118,13 +6135,13 @@ static int tgsi_cmp(struct r600_shader_ctx *ctx)
memset(&alu, 0, sizeof(struct r600_bytecode_alu));
alu.op = ALU_OP3_CNDGE;
-		r = tgsi_make_src_for_op3(ctx, ctx->temp_reg, 0, &alu.src[0],  
&ctx->src[0], i);
+		r = tgsi_make_src_for_op3(ctx, temp_regs[0], i, &alu.src[0],  
&ctx->src[0]);

if (r)
return r;
-		r = tgsi_make_src_for_op3(ctx, ctx->temp_reg, 1, &alu.src[1],  
&ctx->src[2], i);
+		r = tgsi_make_src_for_op3(ctx, temp_regs[1], i, &alu.src[1],  
&ctx->src[2]);

if (r)
return r;
-		r = tgsi_make_src_for_op3(ctx, ctx->temp_reg, 2, &alu.src[2],  
&ctx->src[1], i);
+		r = tgsi_make_src_for_op3(ctx, temp_regs[2], i, &alu.src[2],  
&ctx->src[1]);

if (r)
return r;
tgsi_dst(ctx, &inst->Dst[0], i, &alu.dst);



Reviewed-by: Glenn Kennard 
___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/mesa-dev


[Mesa-dev] [PATCH v2] r600g/sb: Enable SB for geometry shaders

2015-04-06 Thread Glenn Kennard
Add SV_GEOMETRY_EMIT special variable type to track the
implicit dependencies between CUT/EMIT_VERTEX/MEM_RING
instructions so GCM/scheduler doesn't reorder them.

Mark emit instructions as unkillable so DCE doesn't eat them.

Enable only for evergreen/cayman as there are a few
unexplained GS piglit regressions on R6xx/R7xx with SB
enabled otherwise.

Signed-off-by: Glenn Kennard 
---
Changes since v1:
* Enable SB only for >= EVERGREEN. Something strange going on
  with GS on R6xx/R7xx that the code emitted by SB triggers,
  haven't been able to pinpoint it yet.
* Avoid splitting live ranges for SV_GEOMETRY_EMIT values, useless
  since they are not actual values. Avoids unnecessary MOV operations
  being emitted.
* Ensure the asm dump prints out the SV_GEOMETRY_EMIT dst values
* One bytecode dumper fix spotted by Coverity

Note:
Requires 'r600g/sb: Update last_cf for loops' for cayman to
pass all GS piglits without regressions - not a GS bug but a
loop handling issue that only triggers in some GS piglit shaders.

 src/gallium/drivers/r600/r600_isa.h|  8 
 src/gallium/drivers/r600/r600_shader.c | 12 
 src/gallium/drivers/r600/sb/sb_bc_dump.cpp |  2 +-
 src/gallium/drivers/r600/sb/sb_bc_finalize.cpp |  2 +-
 src/gallium/drivers/r600/sb/sb_bc_parser.cpp   | 25 +
 src/gallium/drivers/r600/sb/sb_core.cpp|  5 -
 src/gallium/drivers/r600/sb/sb_dump.cpp|  4 +++-
 src/gallium/drivers/r600/sb/sb_ir.h|  6 +-
 src/gallium/drivers/r600/sb/sb_ra_init.cpp |  4 ++--
 src/gallium/drivers/r600/sb/sb_sched.cpp   |  2 +-
 src/gallium/drivers/r600/sb/sb_valtable.cpp|  1 +
 11 files changed, 55 insertions(+), 16 deletions(-)

diff --git a/src/gallium/drivers/r600/r600_isa.h 
b/src/gallium/drivers/r600/r600_isa.h
index ec3f702..381f06d 100644
--- a/src/gallium/drivers/r600/r600_isa.h
+++ b/src/gallium/drivers/r600/r600_isa.h
@@ -641,7 +641,7 @@ static const struct cf_op_info cf_op_table[] = {
 
{"MEM_SCRATCH",   { 0x24, 0x24, 0x50, 0x50 },  
CF_MEM  },
{"MEM_REDUCT",{ 0x25, 0x25,   -1,   -1 },  
CF_MEM  },
-   {"MEM_RING",  { 0x26, 0x26, 0x52, 0x52 },  
CF_MEM  },
+   {"MEM_RING",  { 0x26, 0x26, 0x52, 0x52 },  
CF_MEM | CF_EMIT },
 
{"EXPORT",{ 0x27, 0x27, 0x53, 0x53 },  
CF_EXP  },
{"EXPORT_DONE",   { 0x28, 0x28, 0x54, 0x54 },  
CF_EXP  },
@@ -649,9 +649,9 @@ static const struct cf_op_info cf_op_table[] = {
{"MEM_EXPORT",{   -1, 0x3A, 0x55, 0x55 },  
CF_MEM  },
{"MEM_RAT",   {   -1,   -1, 0x56, 0x56 },  
CF_MEM | CF_RAT },
{"MEM_RAT_NOCACHE",   {   -1,   -1, 0x57, 0x57 },  
CF_MEM | CF_RAT },
-   {"MEM_RING1", {   -1,   -1, 0x58, 0x58 },  
CF_MEM  },
-   {"MEM_RING2", {   -1,   -1, 0x59, 0x59 },  
CF_MEM  },
-   {"MEM_RING3", {   -1,   -1, 0x5A, 0x5A },  
CF_MEM  },
+   {"MEM_RING1", {   -1,   -1, 0x58, 0x58 },  
CF_MEM | CF_EMIT },
+   {"MEM_RING2", {   -1,   -1, 0x59, 0x59 },  
CF_MEM | CF_EMIT },
+   {"MEM_RING3", {   -1,   -1, 0x5A, 0x5A },  
CF_MEM | CF_EMIT },
{"MEM_MEM_COMBINED",  {   -1,   -1, 0x5B, 0x5B },  
CF_MEM  },
{"MEM_RAT_COMBINED_NOCACHE",  {   -1,   -1, 0x5C, 0x5C },  
CF_MEM | CF_RAT },
{"MEM_RAT_COMBINED",  {   -1,   -1,   -1, 0x5D },  
CF_MEM | CF_RAT }, /* ??? not in cayman isa doc */
diff --git a/src/gallium/drivers/r600/r600_shader.c 
b/src/gallium/drivers/r600/r600_shader.c
index 28b290a..a9338cc 100644
--- a/src/gallium/drivers/r600/r600_shader.c
+++ b/src/gallium/drivers/r600/r600_shader.c
@@ -159,8 +159,10 @@ int r600_pipe_shader_create(struct pipe_context *ctx,
goto error;
}
 
-   /* disable SB for geom shaders - it can't handle the CF_EMIT 
instructions */
-   use_sb &= (shader->shader.processor_type != TGSI_PROCESSOR_GEOMETRY);
+/* disable SB for geom shaders on R6xx/R7xx due to some mysterious gs 
piglit regressions with it enabled. */
+if (rctx->b.chip_class <= R700) {
+   use_sb &= (shader->shader.processor_type != 
TGSI_PROCESSOR_GEOMETRY);
+}
/* disable SB for shaders using CF_INDEX_0/1 (sampler/ubo array 
indexing) as it doesn't handle those currently */
use_sb &a

[Mesa-dev] [PATCH] r600g/sb: Skip empty ALU clause while scheduling

2015-04-08 Thread Glenn Kennard
Fixes assert triggered by
ext_transform_feedback-intervening-read output use_gs
piglit test.

Signed-off-by: Glenn Kennard 
---
 src/gallium/drivers/r600/sb/sb_sched.cpp | 3 +++
 1 file changed, 3 insertions(+)

diff --git a/src/gallium/drivers/r600/sb/sb_sched.cpp 
b/src/gallium/drivers/r600/sb/sb_sched.cpp
index 4248a3f..2e38a62 100644
--- a/src/gallium/drivers/r600/sb/sb_sched.cpp
+++ b/src/gallium/drivers/r600/sb/sb_sched.cpp
@@ -825,6 +825,9 @@ void post_scheduler::init_regmap() {
 
 void post_scheduler::process_alu(container_node *c) {
 
+   if (c->empty())
+   return;
+
ucm.clear();
alu.reset();
 
-- 
1.9.1

___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/mesa-dev


Re: [Mesa-dev] [PATCH 06/19] gallium: add tessellation shader properties

2015-05-03 Thread Glenn Kennard
On Sat, 02 May 2015 22:16:30 +0200, Ilia Mirkin   
wrote:



Signed-off-by: Ilia Mirkin 
---
 src/gallium/auxiliary/tgsi/tgsi_strings.c  |  7 ++-
 src/gallium/docs/source/tgsi.rst   | 33  
++

 src/gallium/include/pipe/p_defines.h   |  7 +++
 src/gallium/include/pipe/p_shader_tokens.h |  7 ++-
 4 files changed, 52 insertions(+), 2 deletions(-)

diff --git a/src/gallium/auxiliary/tgsi/tgsi_strings.c  
b/src/gallium/auxiliary/tgsi/tgsi_strings.c

index dad503e..6781248 100644
--- a/src/gallium/auxiliary/tgsi/tgsi_strings.c
+++ b/src/gallium/auxiliary/tgsi/tgsi_strings.c
@@ -131,7 +131,12 @@ const char  
*tgsi_property_names[TGSI_PROPERTY_COUNT] =

"FS_DEPTH_LAYOUT",
"VS_PROHIBIT_UCPS",
"GS_INVOCATIONS",
-   "VS_WINDOW_SPACE_POSITION"
+   "VS_WINDOW_SPACE_POSITION",
+   "TCS_VERTICES_OUT",
+   "TES_PRIM_MODE",
+   "TES_SPACING",
+   "TES_VERTEX_ORDER_CW",
+   "TES_POINT_MODE",


Stray comma


 };
const char *tgsi_return_type_names[TGSI_RETURN_TYPE_COUNT] =
diff --git a/src/gallium/docs/source/tgsi.rst  
b/src/gallium/docs/source/tgsi.rst

index 0116842..f77702a 100644
--- a/src/gallium/docs/source/tgsi.rst
+++ b/src/gallium/docs/source/tgsi.rst
@@ -3071,6 +3071,39 @@ Naturally, clipping is not performed on window  
coordinates either.
 The effect of this property is undefined if a geometry or tessellation  
shader

 are in use.
+TCS_VERTICES_OUT
+""""""""""""""""
+
+The number of vertices written by the tessellation control shader. This
+effectively defines the patch input size of the tessellation evaluation  
shader

+as well.
+
+TES_PRIM_MODE
+"""""""""""""
+
+This sets the tessellation primitive mode, one of  
``PIPE_PRIM_TRIANGLES``,

+``PIPE_PRIM_QUADS``, or ``PIPE_PRIM_LINES``. (Unlike in GL, there is no
+separate isolines settings, the regular lines is assumed to mean  
isolines.)

+
+TES_SPACING
+"""""""""""
+
+This sets the spacing mode of the tessellation generator, one of
+``PIPE_TESS_SPACING_*``.
+
+TES_VERTEX_ORDER_CW
+"""""""""""""""""""
+
+This sets the vertex order to be clockwise if the value is 1, or
+counter-clockwise if set to 0.
+
+TES_POINT_MODE
+""""""""""""""
+
+If set to a non-zero value, this turns on point mode for the  
tessellator,

+which means that points will be generated instead of primitives.
+
+
 Texture Sampling and Texture Formats
 
diff --git a/src/gallium/include/pipe/p_defines.h  
b/src/gallium/include/pipe/p_defines.h

index 59b7486..14e0db3 100644
--- a/src/gallium/include/pipe/p_defines.h
+++ b/src/gallium/include/pipe/p_defines.h
@@ -432,6 +432,13 @@ enum pipe_flush_flags
/**
+ * Tessellator spacing types
+ */
+#define PIPE_TESS_SPACING_FRACT_ODD  0
+#define PIPE_TESS_SPACING_FRACT_EVEN 1


GL spec types out the FRACTIONAL which is easier to grep the spec for.


+#define PIPE_TESS_SPACING_EQUAL  2
+
+/**
  * Query object types
  */
 #define PIPE_QUERY_OCCLUSION_COUNTER 0
diff --git a/src/gallium/include/pipe/p_shader_tokens.h  
b/src/gallium/include/pipe/p_shader_tokens.h

index c6ab899..ff1f7d6 100644
--- a/src/gallium/include/pipe/p_shader_tokens.h
+++ b/src/gallium/include/pipe/p_shader_tokens.h
@@ -262,7 +262,12 @@ union tgsi_immediate_data
 #define TGSI_PROPERTY_VS_PROHIBIT_UCPS   7
 #define TGSI_PROPERTY_GS_INVOCATIONS 8
 #define TGSI_PROPERTY_VS_WINDOW_SPACE_POSITION 9
-#define TGSI_PROPERTY_COUNT  10
+#define TGSI_PROPERTY_TCS_VERTICES_OUT   10
+#define TGSI_PROPERTY_TES_PRIM_MODE  11
+#define TGSI_PROPERTY_TES_SPACING12
+#define TGSI_PROPERTY_TES_VERTEX_ORDER_CW13
+#define TGSI_PROPERTY_TES_POINT_MODE 14
+#define TGSI_PROPERTY_COUNT  15
struct tgsi_property {
unsigned Type : 4;  /**< TGSI_TOKEN_TYPE_PROPERTY */


With above niggles fixed
Reviewed-by: Glenn Kennard 
___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/mesa-dev


Re: [Mesa-dev] [PATCH 00/19] gallium: basic tessellation support

2015-05-03 Thread Glenn Kennard
On Sat, 02 May 2015 22:16:24 +0200, Ilia Mirkin   
wrote:



This series adds tokens and updates some helper gallium functions to
know about tessellation. This provides no actual support for
tessellation in either core or drivers, however this will make it
possible to work on the core and driver pieces without crazy
interdependencies, as well as be landed separately and without
(direct) dependency.

Most of these patches have existed for about a year already, and have
been part of my and Marek's trees enabling tessellation in the nvc0
and radeonsi drivers. I've taken this opportunity to fix up and fold
some of them though.

This should be pretty safe to land, since even if I messed something
up, having this in-tree will make it easier for others to identify and
fix any issues collaboratively.

Ilia Mirkin (11):
  gallium: add tessellation shader types
  gallium: add new PATCHES primitive type
  gallium: add new semantics for tessellation
  gallium: add interfaces for controlling tess program state
  gallium: add tessellation shader properties
  gallium: add patch_vertices to draw info
  gallium: add set_tess_state to configure default tessellation
parameters
  tgsi/scan: allow scanning tessellation shaders
  tgsi/sanity: set implicit in/out array sizes based on patch sizes
  tgsi/ureg: allow ureg_dst to have dimension indices
  tgsi/dump: fix declaration printing of tessellation inputs/outputs

Marek Olšák (8):
  gallium: bump shader input and output limits
  trace: implement new tessellation functions
  gallium/util: print patch_vertices in util_dump_draw_info
  gallium/u_blitter: disable tessellation for all operations
  gallium/cso: add support for tessellation shaders
  gallium/cso: set NULL shaders at context destruction
  gallium: disable tessellation shaders for meta ops
  tgsi/ureg: use correct limit for max input count

 src/gallium/auxiliary/cso_cache/cso_context.c | 100  
++

 src/gallium/auxiliary/cso_cache/cso_context.h |  12 
 src/gallium/auxiliary/hud/hud_context.c   |   6 ++
 src/gallium/auxiliary/postprocess/pp_run.c|   6 ++
 src/gallium/auxiliary/tgsi/tgsi_dump.c|  20 +-
 src/gallium/auxiliary/tgsi/tgsi_info.c|   4 ++
 src/gallium/auxiliary/tgsi/tgsi_sanity.c  |  36 --
 src/gallium/auxiliary/tgsi/tgsi_scan.c|   6 +-
 src/gallium/auxiliary/tgsi/tgsi_strings.c |  19 -
 src/gallium/auxiliary/tgsi/tgsi_strings.h |   2 +-
 src/gallium/auxiliary/tgsi/tgsi_ureg.c|  26 ++-
 src/gallium/auxiliary/tgsi/tgsi_ureg.h|  59 +--
 src/gallium/auxiliary/util/u_blit.c   |   6 ++
 src/gallium/auxiliary/util/u_blitter.c|  27 +++
 src/gallium/auxiliary/util/u_blitter.h|  16 -
 src/gallium/auxiliary/util/u_dump_state.c |   2 +
 src/gallium/docs/source/context.rst   |   5 ++
 src/gallium/docs/source/tgsi.rst  |  70 ++
 src/gallium/drivers/trace/tr_context.c|  26 +++
 src/gallium/drivers/trace/tr_dump_state.c |   2 +
 src/gallium/include/pipe/p_context.h  |  14 
 src/gallium/include/pipe/p_defines.h  |  16 -
 src/gallium/include/pipe/p_shader_tokens.h|  18 -
 src/gallium/include/pipe/p_state.h|   6 +-
 src/mesa/state_tracker/st_cb_bitmap.c |   8 ++-
 src/mesa/state_tracker/st_cb_clear.c  |   6 ++
 src/mesa/state_tracker/st_cb_drawpixels.c |   8 ++-
 src/mesa/state_tracker/st_cb_drawtex.c|   6 ++
 28 files changed, 501 insertions(+), 31 deletions(-)



Some minor nits for patches 1, 6 and 7, see separate mails

Patches 2-5, 8-19 are
Reviewed-by: Glenn Kennard 
___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/mesa-dev


Re: [Mesa-dev] [PATCH 07/19] gallium: add patch_vertices to draw info

2015-05-03 Thread Glenn Kennard
On Sat, 02 May 2015 22:16:31 +0200, Ilia Mirkin   
wrote:



Signed-off-by: Ilia Mirkin 
---
 src/gallium/include/pipe/p_state.h | 2 ++
 1 file changed, 2 insertions(+)

diff --git a/src/gallium/include/pipe/p_state.h  
b/src/gallium/include/pipe/p_state.h

index e713a44..449c7f1 100644
--- a/src/gallium/include/pipe/p_state.h
+++ b/src/gallium/include/pipe/p_state.h
@@ -543,6 +543,8 @@ struct pipe_draw_info
unsigned start_instance; /**< first instance id */
unsigned instance_count; /**< number of instances */
+   unsigned patch_vertices; /**< the number of vertices per patch */
+


patch_vertex_count, this field isn't the actual patch vertices data
Don't forget to update patch 10 with the name


/**
 * For indexed drawing, these fields apply after index lookup.
 */


With above fixed,
Reviewed-by: Glenn Kennard 
___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/mesa-dev


Re: [Mesa-dev] [PATCH 01/19] gallium: add tessellation shader types

2015-05-03 Thread Glenn Kennard
On Sat, 02 May 2015 22:16:25 +0200, Ilia Mirkin   
wrote:



Signed-off-by: Ilia Mirkin 
---
 src/gallium/auxiliary/tgsi/tgsi_info.c | 4 
 src/gallium/auxiliary/tgsi/tgsi_strings.c  | 4 +++-
 src/gallium/auxiliary/tgsi/tgsi_strings.h  | 2 +-
 src/gallium/include/pipe/p_defines.h   | 6 --
 src/gallium/include/pipe/p_shader_tokens.h | 4 +++-
 5 files changed, 15 insertions(+), 5 deletions(-)

diff --git a/src/gallium/auxiliary/tgsi/tgsi_info.c  
b/src/gallium/auxiliary/tgsi/tgsi_info.c

index 3cab86e..eb447cb 100644
--- a/src/gallium/auxiliary/tgsi/tgsi_info.c
+++ b/src/gallium/auxiliary/tgsi/tgsi_info.c
@@ -302,6 +302,10 @@ tgsi_get_processor_name( uint processor )
   return "fragment shader";
case TGSI_PROCESSOR_GEOMETRY:
   return "geometry shader";
+   case TGSI_PROCESSOR_TESSCTRL:
+  return "tessellation control shader";
+   case TGSI_PROCESSOR_TESSEVAL:
+  return "tessellation evaluation shader";
default:
   return "unknown shader type!";
}
diff --git a/src/gallium/auxiliary/tgsi/tgsi_strings.c  
b/src/gallium/auxiliary/tgsi/tgsi_strings.c

index 9b727cf..e712f30 100644
--- a/src/gallium/auxiliary/tgsi/tgsi_strings.c
+++ b/src/gallium/auxiliary/tgsi/tgsi_strings.c
@@ -32,11 +32,13 @@
 #include "tgsi_strings.h"
-const char *tgsi_processor_type_names[4] =
+const char *tgsi_processor_type_names[6] =


Don't forget to update the declaration in tgsi_strings.h


 {
"FRAG",
"VERT",
"GEOM",
+   "TESSC",
+   "TESSE",


A bit silly to shorten these when the dumps dedicate an entire line for  
printing the name.



"COMP"
 };
diff --git a/src/gallium/auxiliary/tgsi/tgsi_strings.h  
b/src/gallium/auxiliary/tgsi/tgsi_strings.h

index 90014a2..71e7437 100644
--- a/src/gallium/auxiliary/tgsi/tgsi_strings.h
+++ b/src/gallium/auxiliary/tgsi/tgsi_strings.h
@@ -38,7 +38,7 @@ extern "C" {
 #endif
-extern const char *tgsi_processor_type_names[4];
+extern const char *tgsi_processor_type_names[6];
extern const char *tgsi_semantic_names[TGSI_SEMANTIC_COUNT];
diff --git a/src/gallium/include/pipe/p_defines.h  
b/src/gallium/include/pipe/p_defines.h

index 67f48e4..48c182f 100644
--- a/src/gallium/include/pipe/p_defines.h
+++ b/src/gallium/include/pipe/p_defines.h
@@ -404,8 +404,10 @@ enum pipe_flush_flags
 #define PIPE_SHADER_VERTEX   0
 #define PIPE_SHADER_FRAGMENT 1
 #define PIPE_SHADER_GEOMETRY 2
-#define PIPE_SHADER_COMPUTE  3
-#define PIPE_SHADER_TYPES4
+#define PIPE_SHADER_TESSCTRL 3
+#define PIPE_SHADER_TESSEVAL 4


Most of the gallium names are typed out without contractions, ie  
PIPE_SHADER_TESSELLATION_CONTROL/EVALUATION



+#define PIPE_SHADER_COMPUTE  5
+#define PIPE_SHADER_TYPES6
/**
diff --git a/src/gallium/include/pipe/p_shader_tokens.h  
b/src/gallium/include/pipe/p_shader_tokens.h

index c14bcbc..776b0d4 100644
--- a/src/gallium/include/pipe/p_shader_tokens.h
+++ b/src/gallium/include/pipe/p_shader_tokens.h
@@ -43,7 +43,9 @@ struct tgsi_header
 #define TGSI_PROCESSOR_FRAGMENT  0
 #define TGSI_PROCESSOR_VERTEX1
 #define TGSI_PROCESSOR_GEOMETRY  2
-#define TGSI_PROCESSOR_COMPUTE   3
+#define TGSI_PROCESSOR_TESSCTRL  3
+#define TGSI_PROCESSOR_TESSEVAL  4
+#define TGSI_PROCESSOR_COMPUTE   5
struct tgsi_processor
 {


With above niggles fixed
Reviewed-by: Glenn Kennard 
___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/mesa-dev


Re: [Mesa-dev] [PATCH 5/5] nir: Allow feq/fne/ieq/ine to be optimized with inot.

2015-05-06 Thread Glenn Kennard

On Wed, 06 May 2015 23:12:54 +0200, Matt Turner  wrote:


instructions in affected programs: 380 -> 376 (-1.05%)
helped:2
---
Did we just completely forget these in commit 391fb32b, or is there a
reason to not include them?

 src/glsl/nir/nir_opt_algebraic.py | 4 
 1 file changed, 4 insertions(+)

diff --git a/src/glsl/nir/nir_opt_algebraic.py  
b/src/glsl/nir/nir_opt_algebraic.py

index b0a1f24..400d60e 100644
--- a/src/glsl/nir/nir_opt_algebraic.py
+++ b/src/glsl/nir/nir_opt_algebraic.py
@@ -83,8 +83,12 @@ optimizations = [
# Comparison simplifications
(('inot', ('flt', a, b)), ('fge', a, b)),
(('inot', ('fge', a, b)), ('flt', a, b)),
+   (('inot', ('feq', a, b)), ('fne', a, b)),
+   (('inot', ('fne', a, b)), ('feq', a, b)),


These two will produce inverted results for NaN inputs. GLSL 4.5 spec  
doesn't mention requiring ieee754 compliant comparison operators though so  
probably okay.



(('inot', ('ilt', a, b)), ('ige', a, b)),
(('inot', ('ige', a, b)), ('ilt', a, b)),
+   (('inot', ('ieq', a, b)), ('ine', a, b)),
+   (('inot', ('ine', a, b)), ('ieq', a, b)),
(('fge', ('fneg', ('fabs', a)), 0.0), ('feq', a, 0.0)),
(('bcsel', ('flt', a, b), a, b), ('fmin', a, b)),
(('bcsel', ('flt', a, b), b, a), ('fmax', a, b)),



Patches 1-5 are
Reviewed-by: Glenn Kennard 
___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/mesa-dev


[Mesa-dev] [PATCH] r600g: Implement BPTC texture support

2014-08-13 Thread Glenn Kennard
Requires Evergreen/Cayman

Signed-off-by: Glenn Kennard 
---
 docs/GL3.txt |  2 +-
 docs/relnotes/10.3.html  |  2 +-
 src/gallium/drivers/r600/r600_state_common.c | 24 
 3 files changed, 26 insertions(+), 2 deletions(-)

diff --git a/docs/GL3.txt b/docs/GL3.txt
index 75ac69a..6f97f3d 100644
--- a/docs/GL3.txt
+++ b/docs/GL3.txt
@@ -131,7 +131,7 @@ GL 4.1, GLSL 4.10:
 
 GL 4.2, GLSL 4.20:
 
-  GL_ARB_texture_compression_bptc  DONE (i965, nvc0)
+  GL_ARB_texture_compression_bptc  DONE (i965, nvc0, r600)
   GL_ARB_compressed_texture_pixel_storage  DONE (all drivers)
   GL_ARB_shader_atomic_countersDONE (i965)
   GL_ARB_texture_storage   DONE (all drivers)
diff --git a/docs/relnotes/10.3.html b/docs/relnotes/10.3.html
index 519ea2c..6d9eafd 100644
--- a/docs/relnotes/10.3.html
+++ b/docs/relnotes/10.3.html
@@ -63,7 +63,7 @@ Note: some of the new features are only available with 
certain drivers.
 GL_OES_compressed_ETC1_RGB8_texture on nv30, nv50, nvc0, r300, r600, 
radeonsi, softpipe, llvmpipe
 A new software rasterizer driver (kms_swrast_dri.so) that works with
 DRM drivers that don't have a full-fledged GEM (such as qxl or simpledrm)
-GL_ARB_texture_compression_bptc on i965/gen7+, nvc0
+GL_ARB_texture_compression_bptc on i965/gen7+, nvc0, r600/evergreen+
 Distribute the Khronos GL/glcorearb.h header file.
 
 
diff --git a/src/gallium/drivers/r600/r600_state_common.c 
b/src/gallium/drivers/r600/r600_state_common.c
index 207f07e..51a4f8f 100644
--- a/src/gallium/drivers/r600/r600_state_common.c
+++ b/src/gallium/drivers/r600/r600_state_common.c
@@ -1967,6 +1967,30 @@ uint32_t r600_translate_texformat(struct pipe_screen 
*screen,
}
}
 
+   if (desc->layout == UTIL_FORMAT_LAYOUT_BPTC) {
+   if (!enable_s3tc)
+   goto out_unknown;
+
+   if (rscreen->b.chip_class < EVERGREEN)
+   goto out_unknown;
+
+   switch (format) {
+   case PIPE_FORMAT_BPTC_RGBA_UNORM:
+   case PIPE_FORMAT_BPTC_SRGBA:
+   result = FMT_BC7;
+   is_srgb_valid = TRUE;
+   goto out_word4;
+   case PIPE_FORMAT_BPTC_RGB_FLOAT:
+   word4 |= sign_bit[0] | sign_bit[1] | 
sign_bit[2];
+   /* fall through */
+   case PIPE_FORMAT_BPTC_RGB_UFLOAT:
+   result = FMT_BC6;
+   goto out_word4;
+   default:
+   goto out_unknown;
+   }
+   }
+
if (desc->layout == UTIL_FORMAT_LAYOUT_SUBSAMPLED) {
switch (format) {
case PIPE_FORMAT_R8G8_B8G8_UNORM:
-- 
1.8.3.2

___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/mesa-dev


[Mesa-dev] [PATCH] r600g: Implement ARB_derivative_control

2014-08-14 Thread Glenn Kennard
Requires Evergreen/Cayman

Signed-off-by: Glenn Kennard 
---
Passes ARB_derivative_control piglit tests, no regressions.
Depends on Ilia's derivative control gallium infrastructure
patches.

 docs/GL3.txt   |  2 +-
 src/gallium/drivers/r600/r600_pipe.c   |  2 +-
 src/gallium/drivers/r600/r600_shader.c | 20 
 3 files changed, 22 insertions(+), 2 deletions(-)

diff --git a/docs/GL3.txt b/docs/GL3.txt
index 9eba525..202dbed 100644
--- a/docs/GL3.txt
+++ b/docs/GL3.txt
@@ -188,7 +188,7 @@ GL 4.5, GLSL 4.50:
   GL_ARB_clip_control  not started
   GL_ARB_conditional_render_inverted   not started
   GL_ARB_cull_distance not started
-  GL_ARB_derivative_controlnot started
+  GL_ARB_derivative_controlDONE (nv50, nvc0, r600)
   GL_ARB_direct_state_access   not started
   GL_ARB_get_texture_sub_image started (Brian Paul)
   GL_ARB_shader_texture_image_samples  not started
diff --git a/src/gallium/drivers/r600/r600_pipe.c 
b/src/gallium/drivers/r600/r600_pipe.c
index 8a5ba79..bf52a19 100644
--- a/src/gallium/drivers/r600/r600_pipe.c
+++ b/src/gallium/drivers/r600/r600_pipe.c
@@ -305,6 +305,7 @@ static int r600_get_param(struct pipe_screen* pscreen, enum 
pipe_cap param)
case PIPE_CAP_TGSI_VS_LAYER_VIEWPORT:
case PIPE_CAP_TEXTURE_GATHER_SM5:
case PIPE_CAP_TEXTURE_QUERY_LOD:
+   case PIPE_CAP_TGSI_FS_FINE_DERIVATIVE:
return family >= CHIP_CEDAR ? 1 : 0;
case PIPE_CAP_MAX_TEXTURE_GATHER_COMPONENTS:
return family >= CHIP_CEDAR ? 4 : 0;
@@ -319,7 +320,6 @@ static int r600_get_param(struct pipe_screen* pscreen, enum 
pipe_cap param)
case PIPE_CAP_SAMPLE_SHADING:
case PIPE_CAP_TEXTURE_GATHER_OFFSETS:
case PIPE_CAP_DRAW_INDIRECT:
-   case PIPE_CAP_TGSI_FS_FINE_DERIVATIVE:
return 0;
 
/* Stream output. */
diff --git a/src/gallium/drivers/r600/r600_shader.c 
b/src/gallium/drivers/r600/r600_shader.c
index e3407d5..5a6fa05 100644
--- a/src/gallium/drivers/r600/r600_shader.c
+++ b/src/gallium/drivers/r600/r600_shader.c
@@ -5318,6 +5318,11 @@ static int tgsi_tex(struct r600_shader_ctx *ctx)
tex.src_gpr = src_gpr;
tex.dst_gpr = ctx->file_offset[inst->Dst[0].Register.File] + 
inst->Dst[0].Register.Index;
 
+   if (inst->Instruction.Opcode == TGSI_OPCODE_DDX_FINE ||
+   inst->Instruction.Opcode == TGSI_OPCODE_DDY_FINE) {
+   tex.inst_mod = 1; /* per pixel gradient calculation instead of 
per 2x2 quad */
+   }
+
if (inst->Instruction.Opcode == TGSI_OPCODE_TG4) {
int8_t texture_component_select = ctx->literals[4 * 
inst->Src[1].Register.Index + inst->Src[1].Register.SwizzleX];
tex.inst_mod = texture_component_select;
@@ -6906,6 +6911,11 @@ static struct r600_shader_tgsi_instruction 
r600_shader_tgsi_instruction[] = {
{TGSI_OPCODE_LSB,   0, ALU_OP1_FFBL_INT, tgsi_unsupported},
{TGSI_OPCODE_IMSB,  0, ALU_OP1_FFBH_INT, tgsi_unsupported},
{TGSI_OPCODE_UMSB,  0, ALU_OP1_FFBH_UINT, tgsi_unsupported},
+   {TGSI_OPCODE_INTERP_CENTROID,   0, ALU_OP0_NOP, tgsi_unsupported},
+   {TGSI_OPCODE_INTERP_SAMPLE, 0, ALU_OP0_NOP, 
tgsi_unsupported},
+   {TGSI_OPCODE_INTERP_OFFSET, 0, ALU_OP0_NOP, 
tgsi_unsupported},
+   {TGSI_OPCODE_DDX_FINE,  0, ALU_OP0_NOP, tgsi_unsupported},
+   {TGSI_OPCODE_DDY_FINE,  0, ALU_OP0_NOP, tgsi_unsupported},
{TGSI_OPCODE_LAST,  0, ALU_OP0_NOP, tgsi_unsupported},
 };
 
@@ -7109,6 +7119,11 @@ static struct r600_shader_tgsi_instruction 
eg_shader_tgsi_instruction[] = {
{TGSI_OPCODE_LSB,   0, ALU_OP1_FFBL_INT, tgsi_op2},
{TGSI_OPCODE_IMSB,  0, ALU_OP1_FFBH_INT, tgsi_msb},
{TGSI_OPCODE_UMSB,  0, ALU_OP1_FFBH_UINT, tgsi_msb},
+   {TGSI_OPCODE_INTERP_CENTROID,   0, ALU_OP0_NOP, tgsi_unsupported},
+   {TGSI_OPCODE_INTERP_SAMPLE, 0, ALU_OP0_NOP, 
tgsi_unsupported},
+   {TGSI_OPCODE_INTERP_OFFSET, 0, ALU_OP0_NOP, 
tgsi_unsupported},
+   {TGSI_OPCODE_DDX_FINE,  0, FETCH_OP_GET_GRADIENTS_H, tgsi_tex},
+   {TGSI_OPCODE_DDY_FINE,  0, FETCH_OP_GET_GRADIENTS_V, tgsi_tex},
{TGSI_OPCODE_LAST,  0, ALU_OP0_NOP, tgsi_unsupported},
 };
 
@@ -7313,5 +7328,10 @@ static struct r600_shader_tgsi_instruction 
cm_shader_tgsi_instruction[] = {
{TGSI_OPCODE_LSB,   0, ALU_OP1_FFBL_INT, tgsi_op2},
{TGSI_OPCODE_IMSB,  0, ALU_OP1_FFBH_INT, tgsi_msb},
{TGSI_OPCODE_UMSB,  0, ALU_OP1_FFBH_UINT, tgsi_msb},
+   {TGSI_OPCODE_INTERP_CENTROID,   0, ALU_OP0_NOP, tgsi_unsupported},
+   {TGSI_OPCODE_INTERP_SAMPLE, 0, ALU_OP0_NOP, 
tgsi_unsupporte

[Mesa-dev] [PATCH] r600g: Implement ARB_derivative_control

2014-08-15 Thread Glenn Kennard
Requires Evergreen/Cayman

Signed-off-by: Glenn Kennard 
---
Changes since v1:
 Move TGSI_OPCODE_DD[XY]_FINE in opcode tables to reflect
 changed enum values.

All ARB_derivative_control piglit tests pass on radeon 6670

 docs/GL3.txt   |  2 +-
 src/gallium/drivers/r600/r600_pipe.c   |  2 +-
 src/gallium/drivers/r600/r600_shader.c | 20 +++-
 3 files changed, 13 insertions(+), 11 deletions(-)

diff --git a/docs/GL3.txt b/docs/GL3.txt
index 12b8f62..b38e42c 100644
--- a/docs/GL3.txt
+++ b/docs/GL3.txt
@@ -188,7 +188,7 @@ GL 4.5, GLSL 4.50:
   GL_ARB_clip_control  not started
   GL_ARB_conditional_render_inverted   not started
   GL_ARB_cull_distance not started
-  GL_ARB_derivative_controlDONE (i965, nv50, nvc0)
+  GL_ARB_derivative_controlDONE (i965, nv50, nvc0, 
r600)
   GL_ARB_direct_state_access   not started
   GL_ARB_get_texture_sub_image started (Brian Paul)
   GL_ARB_shader_texture_image_samples  not started
diff --git a/src/gallium/drivers/r600/r600_pipe.c 
b/src/gallium/drivers/r600/r600_pipe.c
index 8a5ba79..bf52a19 100644
--- a/src/gallium/drivers/r600/r600_pipe.c
+++ b/src/gallium/drivers/r600/r600_pipe.c
@@ -305,6 +305,7 @@ static int r600_get_param(struct pipe_screen* pscreen, enum 
pipe_cap param)
case PIPE_CAP_TGSI_VS_LAYER_VIEWPORT:
case PIPE_CAP_TEXTURE_GATHER_SM5:
case PIPE_CAP_TEXTURE_QUERY_LOD:
+   case PIPE_CAP_TGSI_FS_FINE_DERIVATIVE:
return family >= CHIP_CEDAR ? 1 : 0;
case PIPE_CAP_MAX_TEXTURE_GATHER_COMPONENTS:
return family >= CHIP_CEDAR ? 4 : 0;
@@ -319,7 +320,6 @@ static int r600_get_param(struct pipe_screen* pscreen, enum 
pipe_cap param)
case PIPE_CAP_SAMPLE_SHADING:
case PIPE_CAP_TEXTURE_GATHER_OFFSETS:
case PIPE_CAP_DRAW_INDIRECT:
-   case PIPE_CAP_TGSI_FS_FINE_DERIVATIVE:
return 0;
 
/* Stream output. */
diff --git a/src/gallium/drivers/r600/r600_shader.c 
b/src/gallium/drivers/r600/r600_shader.c
index 481e9eb..3f089b4 100644
--- a/src/gallium/drivers/r600/r600_shader.c
+++ b/src/gallium/drivers/r600/r600_shader.c
@@ -5319,6 +5319,11 @@ static int tgsi_tex(struct r600_shader_ctx *ctx)
tex.src_gpr = src_gpr;
tex.dst_gpr = ctx->file_offset[inst->Dst[0].Register.File] + 
inst->Dst[0].Register.Index;
 
+   if (inst->Instruction.Opcode == TGSI_OPCODE_DDX_FINE ||
+   inst->Instruction.Opcode == TGSI_OPCODE_DDY_FINE) {
+   tex.inst_mod = 1; /* per pixel gradient calculation instead of 
per 2x2 quad */
+   }
+
if (inst->Instruction.Opcode == TGSI_OPCODE_TG4) {
int8_t texture_component_select = ctx->literals[4 * 
inst->Src[1].Register.Index + inst->Src[1].Register.SwizzleX];
tex.inst_mod = texture_component_select;
@@ -6789,9 +6794,8 @@ static struct r600_shader_tgsi_instruction 
r600_shader_tgsi_instruction[] = {
{76,0, ALU_OP0_NOP, tgsi_unsupported},
{TGSI_OPCODE_ELSE,  0, ALU_OP0_NOP, tgsi_else},
{TGSI_OPCODE_ENDIF, 0, ALU_OP0_NOP, tgsi_endif},
-   /* gap */
-   {79,0, ALU_OP0_NOP, tgsi_unsupported},
-   {80,0, ALU_OP0_NOP, tgsi_unsupported},
+   {TGSI_OPCODE_DDX_FINE,  0, ALU_OP0_NOP, tgsi_unsupported},
+   {TGSI_OPCODE_DDY_FINE,  0, ALU_OP0_NOP, tgsi_unsupported},
{TGSI_OPCODE_PUSHA, 0, ALU_OP0_NOP, tgsi_unsupported},
{TGSI_OPCODE_POPA,  0, ALU_OP0_NOP, tgsi_unsupported},
{TGSI_OPCODE_CEIL,  0, ALU_OP1_CEIL, tgsi_op2},
@@ -6992,9 +6996,8 @@ static struct r600_shader_tgsi_instruction 
eg_shader_tgsi_instruction[] = {
{76,0, ALU_OP0_NOP, tgsi_unsupported},
{TGSI_OPCODE_ELSE,  0, ALU_OP0_NOP, tgsi_else},
{TGSI_OPCODE_ENDIF, 0, ALU_OP0_NOP, tgsi_endif},
-   /* gap */
-   {79,0, ALU_OP0_NOP, tgsi_unsupported},
-   {80,0, ALU_OP0_NOP, tgsi_unsupported},
+   {TGSI_OPCODE_DDX_FINE,  0, FETCH_OP_GET_GRADIENTS_H, tgsi_tex},
+   {TGSI_OPCODE_DDY_FINE,  0, FETCH_OP_GET_GRADIENTS_V, tgsi_tex},
{TGSI_OPCODE_PUSHA, 0, ALU_OP0_NOP, tgsi_unsupported},
{TGSI_OPCODE_POPA,  0, ALU_OP0_NOP, tgsi_unsupported},
{TGSI_OPCODE_CEIL,  0, ALU_OP1_CEIL, tgsi_op2},
@@ -7195,9 +7198,8 @@ static struct r600_shader_tgsi_instruction 
cm_shader_tgsi_instruction[] = {
{76,0, ALU_OP0_NOP, tgsi_unsupported},
{TGSI_OPCODE_ELSE,  0, ALU_OP0_NOP, tgsi_else},
{TGSI_OPCODE_ENDIF, 0, ALU_OP0_NOP, tgsi_endif},
-   /* gap */
-   {79,0, ALU_OP0_NOP, tgsi_unsupported},
-   {80, 

[Mesa-dev] [PATCH] r600g: Fix missing SET_TEXTURE_OFFSETS

2014-08-17 Thread Glenn Kennard
SB needs a bit of special handling to handle
instructions without obvious side effects, to
avoid it deleting them.

Fixes failing non-const ARB_gpu_shader5
textureOffsets piglits with sb enabled.

Signed-off-by: Glenn Kennard 
---
 src/gallium/drivers/r600/r600_isa.h|   9 +-
 src/gallium/drivers/r600/sb/sb_bc_finalize.cpp | 119 ++---
 src/gallium/drivers/r600/sb/sb_bc_parser.cpp   |  13 ++-
 src/gallium/drivers/r600/sb/sb_pass.h  |   3 +
 4 files changed, 87 insertions(+), 57 deletions(-)

diff --git a/src/gallium/drivers/r600/r600_isa.h 
b/src/gallium/drivers/r600/r600_isa.h
index c6bb869..ec3f702 100644
--- a/src/gallium/drivers/r600/r600_isa.h
+++ b/src/gallium/drivers/r600/r600_isa.h
@@ -123,6 +123,9 @@ enum fetch_op_flags
 
FF_VTX  = (1<<5),
FF_MEM  = (1<<6),
+
+   FF_SET_TEXTURE_OFFSETS = (1<<7),
+   FF_USE_TEXTURE_OFFSETS = (1<<8),
 };
 
 /* flags for CF instructions */
@@ -523,7 +526,7 @@ static const struct fetch_op_info fetch_op_table[] = {
{"GET_GRADIENTS_H_FINE",  {   -1,-1,  
0x000107,  0x000107 }, FF_GETGRAD },
{"GET_GRADIENTS_V_FINE",  {   -1,-1,  
0x000108,  0x000108 }, FF_GETGRAD },
{"GET_LERP",  { 0x09,  0x09,
-1,-1 }, 0 },
-   {"SET_TEXTURE_OFFSETS",   {   -1,-1,  
0x09,  0x09 }, 0 },
+   {"SET_TEXTURE_OFFSETS",   {   -1,-1,  
0x09,  0x09 }, FF_SET_TEXTURE_OFFSETS },
{"KEEP_GRADIENTS",{   -1,  0x0A,  
0x0A,  0x0A }, 0 },
{"SET_GRADIENTS_H",   { 0x0B,  0x0B,  
0x0B,  0x0B }, FF_SETGRAD },
{"SET_GRADIENTS_V",   { 0x0C,  0x0C,  
0x0C,  0x0C }, FF_SETGRAD },
@@ -550,7 +553,7 @@ static const struct fetch_op_info fetch_op_table[] = {
{"GATHER4",   {   -1,-1,  
0x15,  0x15 }, FF_TEX },
{"SAMPLE_G_LB",   { 0x16,  0x16,  
0x16,  0x16 }, FF_TEX | FF_USEGRAD},
{"SAMPLE_G_LZ",   { 0x17,  0x17,
-1,-1 }, FF_TEX | FF_USEGRAD},
-   {"GATHER4_O", {   -1,-1,  
0x17,  0x17 }, FF_TEX },
+   {"GATHER4_O", {   -1,-1,  
0x17,  0x17 }, FF_TEX | FF_USE_TEXTURE_OFFSETS},
{"SAMPLE_C",  { 0x18,  0x18,  
0x18,  0x18 }, FF_TEX },
{"SAMPLE_C_L",{ 0x19,  0x19,  
0x19,  0x19 }, FF_TEX },
{"SAMPLE_C_LB",   { 0x1A,  0x1A,  
0x1A,  0x1A }, FF_TEX },
@@ -560,7 +563,7 @@ static const struct fetch_op_info fetch_op_table[] = {
{"GATHER4_C", {   -1,-1,  
0x1D,  0x1D }, FF_TEX },
{"SAMPLE_C_G_LB", { 0x1E,  0x1E,  
0x1E,  0x1E }, FF_TEX | FF_USEGRAD},
{"SAMPLE_C_G_LZ", { 0x1F,  0x1F,
-1,-1 }, FF_TEX | FF_USEGRAD},
-   {"GATHER4_C_O",   {   -1,-1,  
0x1F,  0x1F }, FF_TEX }
+   {"GATHER4_C_O",   {   -1,-1,  
0x1F,  0x1F }, FF_TEX | FF_USE_TEXTURE_OFFSETS}
 };
 
 static const struct cf_op_info cf_op_table[] = {
diff --git a/src/gallium/drivers/r600/sb/sb_bc_finalize.cpp 
b/src/gallium/drivers/r600/sb/sb_bc_finalize.cpp
index 99a20eb..5c22f96 100644
--- a/src/gallium/drivers/r600/sb/sb_bc_finalize.cpp
+++ b/src/gallium/drivers/r600/sb/sb_bc_finalize.cpp
@@ -394,81 +394,96 @@ void bc_finalizer::finalize_alu_src(alu_group_node* g, 
alu_node* a) {
}
 }
 
-void bc_finalizer::emit_set_grad(fetch_node* f) {
+void bc_finalizer::copy_fetch_src(fetch_node &dst, fetch_node &src, unsigned 
arg_start)
+{
+   int reg = -1;
 
-   assert(f->src.size() == 12);
-   unsigned ops[2] = { FETCH_OP_SET_GRADIENTS_V, FETCH_OP_SET_GRADIENTS_H 
};
+   for (unsigned chan = 0; chan < 4; ++chan) {
 
-   unsigned arg_start = 0;
+   dst.bc.dst_sel[chan] = SEL_MASK;
 
-   for (unsigned op = 0; op < 2; ++op) {
-   fetch_node *n = sh.create_fetch();
-   n->bc.set_op(ops[op]);
+   unsigned sel = SEL_MASK;
 
-   // FIXME extract this loop into a separate method and reuse it
+   value *v = src.src[arg_start + chan];
 

[Mesa-dev] [PATCH] r600g: Fix flat/smooth shade state toggle

2014-08-20 Thread Glenn Kennard
If only the flat/smooth shade state changed between
two calls the prior code would miss updating the
hardware state.

Bugzilla: https://bugs.freedesktop.org/show_bug.cgi?id=81967
Signed-off-by: Glenn Kennard 
---
Tested on radeon 6670, no piglit regressions

 src/gallium/drivers/r600/evergreen_state.c   | 2 --
 src/gallium/drivers/r600/r600_shader.h   | 2 +-
 src/gallium/drivers/r600/r600_state.c| 2 --
 src/gallium/drivers/r600/r600_state_common.c | 6 +++---
 4 files changed, 4 insertions(+), 8 deletions(-)

diff --git a/src/gallium/drivers/r600/evergreen_state.c 
b/src/gallium/drivers/r600/evergreen_state.c
index 841ad0c..b490145 100644
--- a/src/gallium/drivers/r600/evergreen_state.c
+++ b/src/gallium/drivers/r600/evergreen_state.c
@@ -2927,8 +2927,6 @@ void evergreen_update_ps_state(struct pipe_context *ctx, 
struct r600_pipe_shader
shader->ps_depth_export = z_export | stencil_export;
 
shader->sprite_coord_enable = sprite_coord_enable;
-   if (rctx->rasterizer)
-   shader->flatshade = rctx->rasterizer->flatshade;
 }
 
 void evergreen_update_es_state(struct pipe_context *ctx, struct 
r600_pipe_shader *shader)
diff --git a/src/gallium/drivers/r600/r600_shader.h 
b/src/gallium/drivers/r600/r600_shader.h
index d6db8f0..8b32966 100644
--- a/src/gallium/drivers/r600/r600_shader.h
+++ b/src/gallium/drivers/r600/r600_shader.h
@@ -89,6 +89,7 @@ struct r600_shader_key {
unsigned alpha_to_one:1;
unsigned nr_cbufs:4;
unsigned vs_as_es:1;
+   unsigned flatshade:1;
 };
 
 struct r600_shader_array {
@@ -106,7 +107,6 @@ struct r600_pipe_shader {
struct r600_command_buffer command_buffer; /* register writes */
struct r600_resource*bo;
unsignedsprite_coord_enable;
-   unsignedflatshade;
unsignedpa_cl_vs_out_cntl;
unsignednr_ps_color_outputs;
struct r600_shader_key  key;
diff --git a/src/gallium/drivers/r600/r600_state.c 
b/src/gallium/drivers/r600/r600_state.c
index 607b199..3f5cb2b 100644
--- a/src/gallium/drivers/r600/r600_state.c
+++ b/src/gallium/drivers/r600/r600_state.c
@@ -2532,8 +2532,6 @@ void r600_update_ps_state(struct pipe_context *ctx, 
struct r600_pipe_shader *sha
shader->ps_depth_export = z_export | stencil_export;
 
shader->sprite_coord_enable = sprite_coord_enable;
-   if (rctx->rasterizer)
-   shader->flatshade = rctx->rasterizer->flatshade;
 }
 
 void r600_update_vs_state(struct pipe_context *ctx, struct r600_pipe_shader 
*shader)
diff --git a/src/gallium/drivers/r600/r600_state_common.c 
b/src/gallium/drivers/r600/r600_state_common.c
index 7594d0e..d8243d1 100644
--- a/src/gallium/drivers/r600/r600_state_common.c
+++ b/src/gallium/drivers/r600/r600_state_common.c
@@ -699,6 +699,8 @@ static INLINE struct r600_shader_key 
r600_shader_selector_key(struct pipe_contex
/* Dual-source blending only makes sense with nr_cbufs == 1. */
if (key.nr_cbufs == 1 && rctx->dual_src_blend)
key.nr_cbufs = 2;
+   if (rctx->rasterizer->flatshade)
+   key.flatshade = 1;
} else if (sel->type == PIPE_SHADER_VERTEX) {
key.vs_as_es = (rctx->gs_shader != NULL);
}
@@ -1250,9 +1252,7 @@ static bool r600_update_derived_state(struct r600_context 
*rctx)
}
 
if (unlikely(!ps_dirty && rctx->ps_shader && rctx->rasterizer &&
-   ((rctx->rasterizer->sprite_coord_enable != 
rctx->ps_shader->current->sprite_coord_enable) ||
-   (rctx->rasterizer->flatshade != 
rctx->ps_shader->current->flatshade {
-
+   ((rctx->rasterizer->sprite_coord_enable != 
rctx->ps_shader->current->sprite_coord_enable {
if (rctx->b.chip_class >= EVERGREEN)
evergreen_update_ps_state(ctx, 
rctx->ps_shader->current);
else
-- 
1.9.1

___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/mesa-dev


Re: [Mesa-dev] [PATCH] r600g: Fix flat/smooth shade state toggle

2014-08-20 Thread Glenn Kennard

On Wed, 20 Aug 2014 20:16:50 +0200, Marek Olšák  wrote:


Generally, only states which need a full shader compilation must be in
the shader key. Flatshade is not one of them, because it only causes
register updates, so this is not a proper solution. Or I am missing
something?

Marek



Evergreen/Cayman need to recompile the shader since the interpolation is  
done using either INTERP_XY instruction for smooth or INTERP_LOAD_P0 for  
flat. R600-R700 technically don't need to, but the prior code already does  
anyway since flat/smooth register setup is done from output values  
computed when compiling the shader.



/Glenn
___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/mesa-dev


Re: [Mesa-dev] [PATCH] r600g: Fix flat/smooth shade state toggle

2014-08-20 Thread Glenn Kennard

On Wed, 20 Aug 2014 21:04:34 +0200, Marek Olšák  wrote:


The flag is only used to set S_028644_FLAT_SHADE on all r600g chips. I
don't see it being used by the shader code generation.

Marek



Ah, i see. Will respin patch with an alternate solution that won't require  
shader recompilation. Consider v1 dropped.



/Glenn
___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/mesa-dev


[Mesa-dev] [PATCH v2] r600g: Fix flat/smooth shade state toggle

2014-08-20 Thread Glenn Kennard
If only the flat/smooth shade state changed between
two render calls the prior code would miss updating the
hardware state.

Also add check for sprite coord, potentially same type
of issue otherwise for it.

Bugzilla: https://bugs.freedesktop.org/show_bug.cgi?id=81967
Signed-off-by: Glenn Kennard 
---
V2:
 - No new shader variant created
 - Also check for sprite coord enable since its state is updated
   in similar fashion to flatshade.

 src/gallium/drivers/r600/r600_state_common.c | 4 +++-
 1 file changed, 3 insertions(+), 1 deletion(-)

diff --git a/src/gallium/drivers/r600/r600_state_common.c 
b/src/gallium/drivers/r600/r600_state_common.c
index 7594d0e..028d800 100644
--- a/src/gallium/drivers/r600/r600_state_common.c
+++ b/src/gallium/drivers/r600/r600_state_common.c
@@ -1227,7 +1227,9 @@ static bool r600_update_derived_state(struct r600_context 
*rctx)
if (unlikely(!rctx->ps_shader->current))
return false;
 
-   if (unlikely(ps_dirty || rctx->pixel_shader.shader != 
rctx->ps_shader->current)) {
+   if (unlikely(ps_dirty || rctx->pixel_shader.shader != 
rctx->ps_shader->current ||
+   rctx->rasterizer->sprite_coord_enable != 
rctx->ps_shader->current->sprite_coord_enable ||
+   rctx->rasterizer->flatshade != 
rctx->ps_shader->current->flatshade)) {
 
if (rctx->cb_misc_state.nr_ps_color_outputs != 
rctx->ps_shader->current->nr_ps_color_outputs) {
rctx->cb_misc_state.nr_ps_color_outputs = 
rctx->ps_shader->current->nr_ps_color_outputs;
-- 
1.9.1

___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/mesa-dev


Re: [Mesa-dev] [PATCH 00/37] Geometry shader support in Sandy Bridge

2014-08-20 Thread Glenn Kennard
On Thu, 21 Aug 2014 01:49:06 +0200, Mike Lothian   
wrote:




Yes that fixed the Oil Rush one - I don't need to pass that with r600g
though so I'm thinking that maybe i965 isn't using that driconf file
___


r600g doesn't have support for GL_ARB_sample_shading quite just yet which  
is why it doesn't complain.


If you copy a current git src/mesa/drivers/dri/common/drirc file to  
/etc/drirc it contains the settings needed to work around a few Unigine  
buglets. The "allow_glsl_extension_directive_midshader" directive is  
hooked up both for gallium and intel afaik so should just work.



/Glenn
___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/mesa-dev


[Mesa-dev] [PATCH] r600g: Implement sm5 geometry shader instancing

2014-08-25 Thread Glenn Kennard
Requires Evergreen or later hardware.

Signed-off-by: Glenn Kennard 
---
 docs/GL3.txt   |  2 +-
 src/gallium/drivers/r600/evergreen_state.c |  4 ++--
 src/gallium/drivers/r600/r600_shader.c | 11 +++
 src/gallium/drivers/r600/r600_shader.h |  1 +
 4 files changed, 15 insertions(+), 3 deletions(-)

diff --git a/docs/GL3.txt b/docs/GL3.txt
index 76412c3..026580f 100644
--- a/docs/GL3.txt
+++ b/docs/GL3.txt
@@ -102,7 +102,7 @@ GL 4.0, GLSL 4.00:
   - Fused multiply-add DONE ()
   - Packing/bitfield/conversion functions  DONE (r600)
   - Enhanced textureGather DONE (r600, radeonsi)
-  - Geometry shader instancing DONE ()
+  - Geometry shader instancing DONE (r600)
   - Geometry shader multiple streams   DONE ()
   - Enhanced per-sample shadingDONE (r600)
   - Interpolation functionsDONE ()
diff --git a/src/gallium/drivers/r600/evergreen_state.c 
b/src/gallium/drivers/r600/evergreen_state.c
index 841ad0c..bae88e5 100644
--- a/src/gallium/drivers/r600/evergreen_state.c
+++ b/src/gallium/drivers/r600/evergreen_state.c
@@ -2968,8 +2968,8 @@ void evergreen_update_gs_state(struct pipe_context *ctx, 
struct r600_pipe_shader
 
if (rctx->screen->b.info.drm_minor >= 35) {
r600_store_context_reg(cb, R_028B90_VGT_GS_INSTANCE_CNT,
-   S_028B90_CNT(0) |
-   S_028B90_ENABLE(0));
+   S_028B90_CNT(MIN2(rshader->gs_num_invocations, 
127)) |
+   S_028B90_ENABLE(rshader->gs_num_invocations > 
0));
}
r600_store_context_reg_seq(cb, R_02891C_SQ_GS_VERT_ITEMSIZE, 4);
r600_store_value(cb, cp_shader->ring_item_size >> 2);
diff --git a/src/gallium/drivers/r600/r600_shader.c 
b/src/gallium/drivers/r600/r600_shader.c
index 3f089b4..9f10c20 100644
--- a/src/gallium/drivers/r600/r600_shader.c
+++ b/src/gallium/drivers/r600/r600_shader.c
@@ -706,6 +706,8 @@ static int tgsi_declaration(struct r600_shader_ctx *ctx)
break;
} else if (d->Semantic.Name == TGSI_SEMANTIC_VERTEXID)
break;
+   else if (d->Semantic.Name == TGSI_SEMANTIC_INVOCATIONID)
+   break;
default:
R600_ERR("unsupported file %d declaration\n", 
d->Declaration.File);
return -EINVAL;
@@ -811,6 +813,12 @@ static void tgsi_src(struct r600_shader_ctx *ctx,
r600_src->swizzle[2] = 0;
r600_src->swizzle[3] = 0;
r600_src->sel = 0;
+   } else if 
(ctx->info.system_value_semantic_name[tgsi_src->Register.Index] == 
TGSI_SEMANTIC_INVOCATIONID) {
+   r600_src->swizzle[0] = 3;
+   r600_src->swizzle[1] = 3;
+   r600_src->swizzle[2] = 3;
+   r600_src->swizzle[3] = 3;
+   r600_src->sel = 1;
}
} else {
if (tgsi_src->Register.Indirect)
@@ -1753,6 +1761,9 @@ static int r600_shader_from_tgsi(struct r600_context 
*rctx,
case TGSI_PROPERTY_GS_MAX_OUTPUT_VERTICES:
shader->gs_max_out_vertices = 
property->u[0].Data;
break;
+   case TGSI_PROPERTY_GS_INVOCATIONS:
+   shader->gs_num_invocations = 
property->u[0].Data;
+   break;
}
break;
default:
diff --git a/src/gallium/drivers/r600/r600_shader.h 
b/src/gallium/drivers/r600/r600_shader.h
index d6db8f0..4b27ede 100644
--- a/src/gallium/drivers/r600/r600_shader.h
+++ b/src/gallium/drivers/r600/r600_shader.h
@@ -74,6 +74,7 @@ struct r600_shader {
unsignedgs_input_prim;
unsignedgs_output_prim;
unsignedgs_max_out_vertices;
+   unsignedgs_num_invocations;
/* size in bytes of a data item in the ring (single vertex data) */
unsignedring_item_size;
 
-- 
1.9.1

___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/mesa-dev


[Mesa-dev] [PATCH] r600g: Implement GL_ARB_sample_shading

2014-08-31 Thread Glenn Kennard
Signed-off-by: Glenn Kennard 
---
Tested on radeon 6670, all sample shading piglits pass, no
regressions, as well as unigine valley basic, tesseract with
MSAA enabled.

It would be great if one or more people could test this
on pre-evergreen hardware, and cayman, since I don't
have any such hardware to test with.

Added a comment on a pre-existing bug discovered while
implementing sample shading where driver const buffers
can alias user provided ones.

 docs/GL3.txt |   4 +-
 docs/relnotes/10.4.html  |  62 ++
 src/gallium/drivers/r600/evergreen_state.c   | 104 ++---
 src/gallium/drivers/r600/evergreend.h|   3 +
 src/gallium/drivers/r600/r600_pipe.c |   2 +-
 src/gallium/drivers/r600/r600_pipe.h |   9 +
 src/gallium/drivers/r600/r600_shader.c   | 305 +--
 src/gallium/drivers/r600/r600_shader.h   |   7 +-
 src/gallium/drivers/r600/r600_state.c|  43 +++-
 src/gallium/drivers/r600/r600d.h |   3 +
 src/gallium/drivers/r600/sb/sb_bc_parser.cpp |  21 +-
 11 files changed, 444 insertions(+), 119 deletions(-)
 create mode 100644 docs/relnotes/10.4.html

diff --git a/docs/GL3.txt b/docs/GL3.txt
index 56c4994..5baacc1 100644
--- a/docs/GL3.txt
+++ b/docs/GL3.txt
@@ -104,13 +104,13 @@ GL 4.0, GLSL 4.00:
   - Fused multiply-add DONE ()
   - Packing/bitfield/conversion functions  DONE (r600)
   - Enhanced textureGather DONE (r600, radeonsi)
-  - Geometry shader instancing DONE ()
+  - Geometry shader instancing DONE (r600)
   - Geometry shader multiple streams   DONE ()
   - Enhanced per-sample shadingDONE (r600)
   - Interpolation functionsDONE ()
   - New overload resolution rules  DONE
   GL_ARB_gpu_shader_fp64   started (Dave)
-  GL_ARB_sample_shadingDONE (i965, nv50, nvc0, 
radeonsi)
+  GL_ARB_sample_shadingDONE (i965, nv50, nvc0, 
r600, radeonsi)
   GL_ARB_shader_subroutine not started
   GL_ARB_tessellation_shader   started (Chris, Ilia)
   GL_ARB_texture_buffer_object_rgb32   DONE (i965, nvc0, r600, 
radeonsi, llvmpipe, softpipe)
diff --git a/docs/relnotes/10.4.html b/docs/relnotes/10.4.html
new file mode 100644
index 000..d56275d
--- /dev/null
+++ b/docs/relnotes/10.4.html
@@ -0,0 +1,62 @@
+http://www.w3.org/TR/html4/loose.dtd";>
+
+
+  
+  Mesa Release Notes
+  
+
+
+
+
+  The Mesa 3D Graphics Library
+
+
+
+
+
+Mesa 10.4 Release Notes / TBD
+
+
+Mesa 10.4 is a new development release.
+People who are concerned with stability and reliability should stick
+with a previous release or wait for Mesa 10.4.1.
+
+
+Mesa 10.4 implements the OpenGL 3.3 API, but the version reported by
+glGetString(GL_VERSION) or glGetIntegerv(GL_MAJOR_VERSION) /
+glGetIntegerv(GL_MINOR_VERSION) depends on the particular driver being used.
+Some drivers don't support all the features required in OpenGL 3.3.  OpenGL
+3.3 is only available if requested at context creation
+because compatibility contexts are not supported.
+
+
+
+MD5 checksums
+
+TBD.
+
+
+
+New features
+
+
+Note: some of the new features are only available with certain drivers.
+
+
+
+GL_ARB_sample_shading on r600
+
+
+
+Bug fixes
+
+TBD.
+
+Changes
+
+
+
+
+
+
+
diff --git a/src/gallium/drivers/r600/evergreen_state.c 
b/src/gallium/drivers/r600/evergreen_state.c
index 9f0e82d..9531893 100644
--- a/src/gallium/drivers/r600/evergreen_state.c
+++ b/src/gallium/drivers/r600/evergreen_state.c
@@ -1398,7 +1398,7 @@ static void evergreen_set_framebuffer_state(struct 
pipe_context *ctx,
 
/* MSAA. */
if (rctx->b.chip_class == EVERGREEN)
-   rctx->framebuffer.atom.num_dw += 14; /* Evergreen */
+   rctx->framebuffer.atom.num_dw += 17; /* Evergreen */
else
rctx->framebuffer.atom.num_dw += 28; /* Cayman */
 
@@ -1418,8 +1418,36 @@ static void evergreen_set_framebuffer_state(struct 
pipe_context *ctx,
}
 
rctx->framebuffer.atom.dirty = true;
+
+   /* set sample xy locations as array of fragment shader constants */
+   {
+   struct pipe_constant_buffer constbuf = {0};
+   float values[4*16] = {0.0f};
+   int i;
+   assert(rctx->framebuffer.nr_samples <= Elements(values)/4);
+   for (i = 0; i < rctx->framebuffer.nr_samples; i++) {
+   ctx->get_sample_position(ctx, 
rctx->framebuffer.nr_samples, i, &values[4*i]);
+   }
+   constbuf.user_buffer = values;
+   constbuf.buffer_size = rctx->framebuffer.nr_samples * 4 * 4;
+   

[Mesa-dev] [PATCH] r600g: Implement sm5 interpolation functions

2014-09-01 Thread Glenn Kennard
Requires evergreen/cayman

Signed-off-by: Glenn Kennard 
---
This patch depends on r600g: Implement GL_ARB_sample_shading

Implementation note: interpolateAtSample/Offset is doing affine
rather than perspective correct reverse projection, I think for
the small intra-pixel offsets used this should typically give
more accurate results due to not losing bits by the reciprocal
and additional multiplication. As a bonus slightly fewer ops,
and as far as I can tell it is within spec to skip the
perspective correction.

 docs/GL3.txt   |   2 +-
 src/gallium/drivers/r600/evergreen_state.c |   3 +
 src/gallium/drivers/r600/r600_shader.c | 211 -
 3 files changed, 211 insertions(+), 5 deletions(-)

diff --git a/docs/GL3.txt b/docs/GL3.txt
index 5baacc1..8e69068 100644
--- a/docs/GL3.txt
+++ b/docs/GL3.txt
@@ -107,7 +107,7 @@ GL 4.0, GLSL 4.00:
   - Geometry shader instancing DONE (r600)
   - Geometry shader multiple streams   DONE ()
   - Enhanced per-sample shadingDONE (r600)
-  - Interpolation functionsDONE ()
+  - Interpolation functionsDONE (r600)
   - New overload resolution rules  DONE
   GL_ARB_gpu_shader_fp64   started (Dave)
   GL_ARB_sample_shadingDONE (i965, nv50, nvc0, 
r600, radeonsi)
diff --git a/src/gallium/drivers/r600/evergreen_state.c 
b/src/gallium/drivers/r600/evergreen_state.c
index 9531893..b857292 100644
--- a/src/gallium/drivers/r600/evergreen_state.c
+++ b/src/gallium/drivers/r600/evergreen_state.c
@@ -1427,6 +1427,9 @@ static void evergreen_set_framebuffer_state(struct 
pipe_context *ctx,
assert(rctx->framebuffer.nr_samples <= Elements(values)/4);
for (i = 0; i < rctx->framebuffer.nr_samples; i++) {
ctx->get_sample_position(ctx, 
rctx->framebuffer.nr_samples, i, &values[4*i]);
+   /* interpolateAtSample needs offset from center point */
+   values[4*i+2] = values[4*i+0] - 0.5f;
+   values[4*i+3] = values[4*i+1] - 0.5f;
}
constbuf.user_buffer = values;
constbuf.buffer_size = rctx->framebuffer.nr_samples * 4 * 4;
diff --git a/src/gallium/drivers/r600/r600_shader.c 
b/src/gallium/drivers/r600/r600_shader.c
index fd7d845..7e5a526 100644
--- a/src/gallium/drivers/r600/r600_shader.c
+++ b/src/gallium/drivers/r600/r600_shader.c
@@ -294,6 +294,8 @@ struct r600_shader_ctx {
int face_gpr;
/* sample id is .w component stored in fixed point position register */
int fixed_pt_position_gpr;
+   /* evergreen/cayman needs to add shader->input entries for 
SAMPLEMASK/SAMPLEID */
+   int num_system_value_inputs;
int colors_used;
boolean clip_vertex_write;
unsignedcv_output;
@@ -778,6 +780,8 @@ static int allocate_system_value_inputs(struct 
r600_shader_ctx *ctx, int gpr_off
}
}
 
+   ctx->num_system_value_inputs = num_regs;
+
return num_regs;
 }
 
@@ -814,7 +818,7 @@ static int evergreen_gpr_count(struct r600_shader_ctx *ctx)
ctx->eg_interpolators[k].enabled = TRUE;
}
 
-   /* Need to scan shader for system values */
+   /* Need to scan shader for interpolateAtSample/Offset/Centroid */
if (tgsi_parse_init(&parse, ctx->tokens) != TGSI_PARSE_OK) {
return 0;
}
@@ -824,7 +828,31 @@ static int evergreen_gpr_count(struct r600_shader_ctx *ctx)
while (!tgsi_parse_end_of_tokens(&parse)) {
tgsi_parse_token(&parse);
 
-   if (parse.FullToken.Token.Type == TGSI_TOKEN_TYPE_DECLARATION) {
+   if (parse.FullToken.Token.Type == TGSI_TOKEN_TYPE_INSTRUCTION) {
+   const struct tgsi_full_instruction *inst
+   = &parse.FullToken.FullInstruction;
+   if (inst->Instruction.Opcode == 
TGSI_OPCODE_INTERP_SAMPLE ||
+   inst->Instruction.Opcode == 
TGSI_OPCODE_INTERP_OFFSET ||
+   inst->Instruction.Opcode == 
TGSI_OPCODE_INTERP_CENTROID)
+   {
+   int interpolate, location, k;
+
+   if (inst->Instruction.Opcode == 
TGSI_OPCODE_INTERP_SAMPLE) {
+   location = TGSI_INTERPOLATE_LOC_CENTER;
+   has_sampleid = true;
+   } else if (inst->Instruction.Opcode == 
TGSI_OPCO

Re: [Mesa-dev] [PATCH] r600g: Implement GL_ARB_sample_shading

2014-09-02 Thread Glenn Kennard
On Mon, 01 Sep 2014 00:02:18 +0200, Glenn Kennard  
 wrote:



Signed-off-by: Glenn Kennard 
---
Tested on radeon 6670, all sample shading piglits pass, no
regressions, as well as unigine valley basic, tesseract with
MSAA enabled.

It would be great if one or more people could test this
on pre-evergreen hardware, and cayman, since I don't
have any such hardware to test with.



R600 is broken by this patch, so consider v1 NAK:ed for now.

Thanks to everyone who helped out with the testing!


/Glenn
___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/mesa-dev


[Mesa-dev] [PATCH v2] r600g: Implement GL_ARB_sample_shading

2014-09-04 Thread Glenn Kennard
Also fixes two sided lighting which was broken at least
on pre-evergreen by commit b1eb00.

Signed-off-by: Glenn Kennard 
---
Changes since patch v1:
Factor out and set sample positions also for pre-evergreen
Misc r600 breakage fixes
Some cleanup

Passes piglit without regressions on radeon 6670.

Cayman and pre-evergreen still needs testing before committing.

 docs/GL3.txt |   2 +-
 docs/relnotes/10.4.html  |  62 ++
 src/gallium/drivers/r600/evergreen_state.c   |  85 +---
 src/gallium/drivers/r600/evergreend.h|   3 +
 src/gallium/drivers/r600/r600_pipe.c |   2 +-
 src/gallium/drivers/r600/r600_pipe.h |  10 +
 src/gallium/drivers/r600/r600_shader.c   | 292 ---
 src/gallium/drivers/r600/r600_shader.h   |   6 +-
 src/gallium/drivers/r600/r600_state.c|  49 -
 src/gallium/drivers/r600/r600_state_common.c |  20 ++
 src/gallium/drivers/r600/r600d.h |   3 +
 src/gallium/drivers/r600/sb/sb_bc_parser.cpp |  21 +-
 12 files changed, 435 insertions(+), 120 deletions(-)
 create mode 100644 docs/relnotes/10.4.html

diff --git a/docs/GL3.txt b/docs/GL3.txt
index f5d5e72..6a54873 100644
--- a/docs/GL3.txt
+++ b/docs/GL3.txt
@@ -110,7 +110,7 @@ GL 4.0, GLSL 4.00:
   - Interpolation functionsDONE ()
   - New overload resolution rules  DONE
   GL_ARB_gpu_shader_fp64   started (Dave)
-  GL_ARB_sample_shadingDONE (i965, nv50, nvc0, 
radeonsi)
+  GL_ARB_sample_shadingDONE (i965, nv50, nvc0, 
r600, radeonsi)
   GL_ARB_shader_subroutine not started
   GL_ARB_tessellation_shader   started (Chris, Ilia)
   GL_ARB_texture_buffer_object_rgb32   DONE (i965, nvc0, r600, 
radeonsi, llvmpipe, softpipe)
diff --git a/docs/relnotes/10.4.html b/docs/relnotes/10.4.html
new file mode 100644
index 000..d56275d
--- /dev/null
+++ b/docs/relnotes/10.4.html
@@ -0,0 +1,62 @@
+http://www.w3.org/TR/html4/loose.dtd";>
+
+
+  
+  Mesa Release Notes
+  
+
+
+
+
+  The Mesa 3D Graphics Library
+
+
+
+
+
+Mesa 10.4 Release Notes / TBD
+
+
+Mesa 10.4 is a new development release.
+People who are concerned with stability and reliability should stick
+with a previous release or wait for Mesa 10.4.1.
+
+
+Mesa 10.4 implements the OpenGL 3.3 API, but the version reported by
+glGetString(GL_VERSION) or glGetIntegerv(GL_MAJOR_VERSION) /
+glGetIntegerv(GL_MINOR_VERSION) depends on the particular driver being used.
+Some drivers don't support all the features required in OpenGL 3.3.  OpenGL
+3.3 is only available if requested at context creation
+because compatibility contexts are not supported.
+
+
+
+MD5 checksums
+
+TBD.
+
+
+
+New features
+
+
+Note: some of the new features are only available with certain drivers.
+
+
+
+GL_ARB_sample_shading on r600
+
+
+
+Bug fixes
+
+TBD.
+
+Changes
+
+
+
+
+
+
+
diff --git a/src/gallium/drivers/r600/evergreen_state.c 
b/src/gallium/drivers/r600/evergreen_state.c
index e7faeaf..e0adb1e 100644
--- a/src/gallium/drivers/r600/evergreen_state.c
+++ b/src/gallium/drivers/r600/evergreen_state.c
@@ -1400,7 +1400,7 @@ static void evergreen_set_framebuffer_state(struct 
pipe_context *ctx,
 
/* MSAA. */
if (rctx->b.chip_class == EVERGREEN)
-   rctx->framebuffer.atom.num_dw += 14; /* Evergreen */
+   rctx->framebuffer.atom.num_dw += 17; /* Evergreen */
else
rctx->framebuffer.atom.num_dw += 28; /* Cayman */
 
@@ -1420,8 +1420,22 @@ static void evergreen_set_framebuffer_state(struct 
pipe_context *ctx,
}
 
rctx->framebuffer.atom.dirty = true;
+
+   r600_set_sample_locations_constant_buffer(rctx);
 }
 
+static void evergreen_set_min_samples(struct pipe_context *ctx, unsigned 
min_samples)
+{
+   struct r600_context *rctx = (struct r600_context *)ctx;
+
+   if (rctx->ps_iter_samples == min_samples)
+   return;
+
+   rctx->ps_iter_samples = min_samples;
+   if (rctx->framebuffer.nr_samples > 1) {
+   rctx->framebuffer.atom.dirty = true;
+   }
+}
 
 /* 8xMSAA */
 static uint32_t sample_locs_8x[] = {
@@ -1475,7 +1489,7 @@ static void evergreen_get_sample_position(struct 
pipe_context *ctx,
}
 }
 
-static void evergreen_emit_msaa_state(struct r600_context *rctx, int 
nr_samples)
+static void evergreen_emit_msaa_state(struct r600_context *rctx, int 
nr_samples, int ps_iter_samples)
 {
 
struct radeon_winsys_cs *cs = rctx->b.rings.gfx.cs;
@@ -1508,10 +1522,12 @@ static void evergreen_emit_msaa_state(struct 
r600_context *rctx, int nr_samples)
 S_028C00_EXPAND_LINE_WIDTH(1)); /* 
R_028C00_PA_SC_LINE_CNTL */
radeon_emit(cs, 
S_02

[Mesa-dev] [PATCH v3] r600g: Implement GL_ARB_sample_shading

2014-09-10 Thread Glenn Kennard
Also fixes two sided lighting which was broken at least
on pre-evergreen by commit b1eb00.

Signed-off-by: Glenn Kennard 
---
Changes since patch v2:
Added workarounds for known hardware issues on R600 and RV770 when
sample shading is used together with hyperz, thanks Marek.

Changes since patch v1:
Factor out and set sample positions also for pre-evergreen
Misc r600 breakage fixes
Some cleanup

Passes piglit without regressions on radeon 6670 and RV770.

R600/R700 fail the ignore-centroid-qualifier and interpolate-at-sample-position
piglit test cases, I believe that is due to the tests requiring more
interpolation precision than typical DX10 feature level hardware has, 11
bits for DX11 hardware vs 8 for DX10 if i remember correctly.

 docs/GL3.txt |   2 +-
 docs/relnotes/10.4.html  |  62 ++
 src/gallium/drivers/r600/evergreen_state.c   |  85 +---
 src/gallium/drivers/r600/evergreend.h|   3 +
 src/gallium/drivers/r600/r600_pipe.c |   2 +-
 src/gallium/drivers/r600/r600_pipe.h |  10 +
 src/gallium/drivers/r600/r600_shader.c   | 292 ---
 src/gallium/drivers/r600/r600_shader.h   |   6 +-
 src/gallium/drivers/r600/r600_state.c|  60 +-
 src/gallium/drivers/r600/r600_state_common.c |  20 ++
 src/gallium/drivers/r600/r600d.h |   3 +
 src/gallium/drivers/r600/sb/sb_bc_parser.cpp |  21 +-
 12 files changed, 446 insertions(+), 120 deletions(-)
 create mode 100644 docs/relnotes/10.4.html

diff --git a/docs/GL3.txt b/docs/GL3.txt
index f02ad67..372368f 100644
--- a/docs/GL3.txt
+++ b/docs/GL3.txt
@@ -110,7 +110,7 @@ GL 4.0, GLSL 4.00:
   - Interpolation functionsDONE ()
   - New overload resolution rules  DONE
   GL_ARB_gpu_shader_fp64   started (Dave)
-  GL_ARB_sample_shadingDONE (i965, nv50, nvc0, 
radeonsi)
+  GL_ARB_sample_shadingDONE (i965, nv50, nvc0, 
r600, radeonsi)
   GL_ARB_shader_subroutine not started
   GL_ARB_tessellation_shader   started (Chris, Ilia)
   GL_ARB_texture_buffer_object_rgb32   DONE (i965, nvc0, r600, 
radeonsi, llvmpipe, softpipe)
diff --git a/docs/relnotes/10.4.html b/docs/relnotes/10.4.html
new file mode 100644
index 000..d56275d
--- /dev/null
+++ b/docs/relnotes/10.4.html
@@ -0,0 +1,62 @@
+http://www.w3.org/TR/html4/loose.dtd";>
+
+
+  
+  Mesa Release Notes
+  
+
+
+
+
+  The Mesa 3D Graphics Library
+
+
+
+
+
+Mesa 10.4 Release Notes / TBD
+
+
+Mesa 10.4 is a new development release.
+People who are concerned with stability and reliability should stick
+with a previous release or wait for Mesa 10.4.1.
+
+
+Mesa 10.4 implements the OpenGL 3.3 API, but the version reported by
+glGetString(GL_VERSION) or glGetIntegerv(GL_MAJOR_VERSION) /
+glGetIntegerv(GL_MINOR_VERSION) depends on the particular driver being used.
+Some drivers don't support all the features required in OpenGL 3.3.  OpenGL
+3.3 is only available if requested at context creation
+because compatibility contexts are not supported.
+
+
+
+MD5 checksums
+
+TBD.
+
+
+
+New features
+
+
+Note: some of the new features are only available with certain drivers.
+
+
+
+GL_ARB_sample_shading on r600
+
+
+
+Bug fixes
+
+TBD.
+
+Changes
+
+
+
+
+
+
+
diff --git a/src/gallium/drivers/r600/evergreen_state.c 
b/src/gallium/drivers/r600/evergreen_state.c
index e7faeaf..e0adb1e 100644
--- a/src/gallium/drivers/r600/evergreen_state.c
+++ b/src/gallium/drivers/r600/evergreen_state.c
@@ -1400,7 +1400,7 @@ static void evergreen_set_framebuffer_state(struct 
pipe_context *ctx,
 
/* MSAA. */
if (rctx->b.chip_class == EVERGREEN)
-   rctx->framebuffer.atom.num_dw += 14; /* Evergreen */
+   rctx->framebuffer.atom.num_dw += 17; /* Evergreen */
else
rctx->framebuffer.atom.num_dw += 28; /* Cayman */
 
@@ -1420,8 +1420,22 @@ static void evergreen_set_framebuffer_state(struct 
pipe_context *ctx,
}
 
rctx->framebuffer.atom.dirty = true;
+
+   r600_set_sample_locations_constant_buffer(rctx);
 }
 
+static void evergreen_set_min_samples(struct pipe_context *ctx, unsigned 
min_samples)
+{
+   struct r600_context *rctx = (struct r600_context *)ctx;
+
+   if (rctx->ps_iter_samples == min_samples)
+   return;
+
+   rctx->ps_iter_samples = min_samples;
+   if (rctx->framebuffer.nr_samples > 1) {
+   rctx->framebuffer.atom.dirty = true;
+   }
+}
 
 /* 8xMSAA */
 static uint32_t sample_locs_8x[] = {
@@ -1475,7 +1489,7 @@ static void evergreen_get_sample_position(struct 
pipe_context *ctx,
}
 }
 
-static void evergreen_emit_msaa_state(struct r600_context *rctx, int 
nr_samples)
+static void evergreen_emit_msaa_state(struct r600_context 

Re: [Mesa-dev] [PATCH v2] r600g: Implement GL_ARB_sample_shading

2014-09-10 Thread Glenn Kennard
On Sat, 06 Sep 2014 04:00:01 +0200, Alexandre Demers  
 wrote:



Thanks Marek, you were right.

So, on cayman, after comparing results from both runs, it seems there is  
only one regression which is gs-atan-vec2 under glsl-1.50:
piglit/bin/shader_runner  
/home/ademers/projects/display/piglit/generated_tests/spec/glsl-1.50/execution/built-in-functions/gs-atan-vec2.shader_test  
-auto


The returned value is the following:
Probe color at (5,0)
   Expected: 0.00 1.00 0.00 1.00
   Observed: 1.00 0.00 0.00 1.00



I've found that some of the piglit geometry shader tests tend to fail once  
one GPU reset has occurred, and persist until the next power cycle. Did  
you do a cold restart after the x crash and re-running the tests? Might be  
worth trying just re-running the failing test after a restart and see if  
it goes away.



/Glenn
___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/mesa-dev


Re: [Mesa-dev] [PATCH v2] r600g: Implement GL_ARB_sample_shading

2014-09-22 Thread Glenn Kennard
On Wed, 17 Sep 2014 06:10:48 +0200, Alexandre Demers  
 wrote:


Tested with v3. I get the same result as before: everything is fine  
except the gs-atan-vec2 test.


I don't know if this is of any value, but running the command manually  
in a shell gives the following:  
/home/ademers/projects/display/piglit/bin/shader_runner  
/home/ademers/projects/display/piglit/generated_tests/spec/glsl-1.50/execution/built-in-functions/gs-atan-vec4.shader_test  
-auto

PIGLIT: {"result": "pass" }

I assume the test ran correctly, but not in the context of the piglit  
run, am I right?




I can make this test pass or fail pseudo-randomly, depending on timing, on  
my 6670 regardless of if this patch is applied or not, so at least its not  
an actual regression. If i run it under valgrind it always passes. At  
least its not a regression...



/Glenn
___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/mesa-dev


[Mesa-dev] [PATCH v2] r600g: Implement ARB_draw_indirect for EG/CM

2014-12-12 Thread Glenn Kennard
Requires Evergreen/Cayman and updated radeon kernel module

Signed-off-by: Glenn Kennard 
---
Changes since V1:
* Fixed 8 bit index case, only triggerable using GLES 3.1 which isn't supported 
yet
* Don't read info struct values that have no meaning for indirect case
* Don't update start_instance/instance_count for indirect cases
* Use bool expression directly in get_param

Benjamin, the #defines are essentially used, but due to a header conflict
its not possible to include them in this file. Would have broken the indirect 
cases
into evergreen_state.c, but this is a performance-sensitive section of code and
inlining is critical, so did the next best thing and typed out the define names
as comments.

Thanks Marek/Benjamin for V1 review

 docs/GL3.txt |   4 +-
 docs/relnotes/10.5.0.html|   1 +
 src/gallium/drivers/r600/evergreend.h|   6 +-
 src/gallium/drivers/r600/r600_pipe.c |   4 +-
 src/gallium/drivers/r600/r600_state_common.c | 116 ++-
 5 files changed, 105 insertions(+), 26 deletions(-)

diff --git a/docs/GL3.txt b/docs/GL3.txt
index 648f5ac..435054a 100644
--- a/docs/GL3.txt
+++ b/docs/GL3.txt
@@ -95,7 +95,7 @@ GL 3.3, GLSL 3.30 --- all DONE: i965, nv50, nvc0, r600, 
radeonsi, llvmpipe, soft
 GL 4.0, GLSL 4.00:
 
   GL_ARB_draw_buffers_blendDONE (i965, nv50, nvc0, 
r600, radeonsi, llvmpipe, softpipe)
-  GL_ARB_draw_indirect DONE (i965, nvc0, 
radeonsi, llvmpipe, softpipe)
+  GL_ARB_draw_indirect DONE (i965, nvc0, r600, 
radeonsi, llvmpipe, softpipe)
   GL_ARB_gpu_shader5   DONE (i965, nvc0)
   - 'precise' qualifierDONE
   - Dynamically uniform sampler array indices  DONE (r600)
@@ -159,7 +159,7 @@ GL 4.3, GLSL 4.30:
   GL_ARB_framebuffer_no_attachmentsnot started
   GL_ARB_internalformat_query2 not started
   GL_ARB_invalidate_subdataDONE (all drivers)
-  GL_ARB_multi_draw_indirect   DONE (i965, nvc0, 
radeonsi, llvmpipe, softpipe)
+  GL_ARB_multi_draw_indirect   DONE (i965, nvc0, r600, 
radeonsi, llvmpipe, softpipe)
   GL_ARB_program_interface_query   not started
   GL_ARB_robust_buffer_access_behavior not started
   GL_ARB_shader_image_size not started
diff --git a/docs/relnotes/10.5.0.html b/docs/relnotes/10.5.0.html
index 2987d53..72bb791 100644
--- a/docs/relnotes/10.5.0.html
+++ b/docs/relnotes/10.5.0.html
@@ -49,6 +49,7 @@ Note: some of the new features are only available with 
certain drivers.
 GL_EXT_packed_float on freedreno
 GL_EXT_texture_shared_exponent on freedreno
 GL_EXT_texture_snorm on freedreno
+GL_ARB_draw_indirect, GL_ARB_multi_draw_indirect on r600
 
 
 
diff --git a/src/gallium/drivers/r600/evergreend.h 
b/src/gallium/drivers/r600/evergreend.h
index 4989996..0725f0d 100644
--- a/src/gallium/drivers/r600/evergreend.h
+++ b/src/gallium/drivers/r600/evergreend.h
@@ -64,6 +64,8 @@
 #define R600_TEXEL_PITCH_ALIGNMENT_MASK0x7
 
 #define PKT3_NOP   0x10
+#define PKT3_SET_BASE  0x11
+#define PKT3_INDEX_BUFFER_SIZE 0x13
 #define PKT3_DEALLOC_STATE 0x14
 #define PKT3_DISPATCH_DIRECT   0x15
 #define PKT3_DISPATCH_INDIRECT 0x16
@@ -72,7 +74,9 @@
 #define PKT3_REG_RMW   0x21
 #define PKT3_COND_EXEC 0x22
 #define PKT3_PRED_EXEC 0x23
-#define PKT3_START_3D_CMDBUF   0x24
+#define PKT3_DRAW_INDIRECT 0x24
+#define PKT3_DRAW_INDEX_INDIRECT   0x25
+#define PKT3_INDEX_BASE0x26
 #define PKT3_DRAW_INDEX_2  0x27
 #define PKT3_CONTEXT_CONTROL   0x28
 #define PKT3_DRAW_INDEX_IMMD_BE0x29
diff --git a/src/gallium/drivers/r600/r600_pipe.c 
b/src/gallium/drivers/r600/r600_pipe.c
index 0b571e4..0d8bac2 100644
--- a/src/gallium/drivers/r600/r600_pipe.c
+++ b/src/gallium/drivers/r600/r600_pipe.c
@@ -313,6 +313,9 @@ static int r600_get_param(struct pipe_screen* pscreen, enum 
pipe_cap param)
return family >= CHIP_CEDAR ? 1 : 0;
case PIPE_CAP_MAX_TEXTURE_GATHER_COMPONENTS:
return family >= CHIP_CEDAR ? 4 : 0;
+   case PIPE_CAP_DRAW_INDIRECT:
+   /* kernel command checker support is also required */
+   return family >= CHIP_CEDAR && rscreen->b.info.drm_minor >= 41;
 
/* Unsupported features. */
case PIPE_CAP_TGSI_FS_COORD_ORIGIN_LOWER_LEFT:
@@ -322,7 +325,6 @@ static int r600_get_param(struct pipe_screen* pscreen, enum 
pipe_cap p

Re: [Mesa-dev] [PATCH 004/133] nir: add the core datastructures

2014-12-19 Thread Glenn Kennard
On Tue, 16 Dec 2014 07:04:14 +0100, Jason Ekstrand   
wrote:



From: Connor Abbott 

This includes all the instructions, ifs, loops, functions, etc. This is
similar to the information in ir.h.

v2: Jason Ekstrand :
   Include ralloc and hash_table from the util directory
---
 src/glsl/Makefile.sources |2 +
 src/glsl/nir/nir.h| 1150  
+

 src/glsl/nir/nir_intrinsics.c |   49 ++
 src/glsl/nir/nir_intrinsics.h |  158 ++
 src/glsl/nir/nir_opcodes.c|   46 ++
 src/glsl/nir/nir_opcodes.h|  346 +
 6 files changed, 1751 insertions(+)
 create mode 100644 src/glsl/nir/nir.h
 create mode 100644 src/glsl/nir/nir_intrinsics.c
 create mode 100644 src/glsl/nir/nir_intrinsics.h
 create mode 100644 src/glsl/nir/nir_opcodes.c
 create mode 100644 src/glsl/nir/nir_opcodes.h

diff --git a/src/glsl/Makefile.sources b/src/glsl/Makefile.sources
index c3a90f7..e8eedd1 100644
--- a/src/glsl/Makefile.sources
+++ b/src/glsl/Makefile.sources
@@ -14,6 +14,8 @@ LIBGLCPP_GENERATED_FILES = \
$(GLSL_BUILDDIR)/glcpp/glcpp-parse.c
NIR_FILES = \
+$(GLSL_SRCDIR)/nir/nir_intrinsics.c \
+$(GLSL_SRCDIR)/nir/nir_opcodes.c \
 $(GLSL_SRCDIR)/nir/nir_types.cpp
# libglsl
diff --git a/src/glsl/nir/nir.h b/src/glsl/nir/nir.h
new file mode 100644
index 000..ef486da
--- /dev/null
+++ b/src/glsl/nir/nir.h
@@ -0,0 +1,1150 @@
+/*
+ * Copyright © 2014 Connor Abbott
+ *
+ * Permission is hereby granted, free of charge, to any person  
obtaining a
+ * copy of this software and associated documentation files (the  
"Software"),
+ * to deal in the Software without restriction, including without  
limitation
+ * the rights to use, copy, modify, merge, publish, distribute,  
sublicense,

+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the  
next
+ * paragraph) shall be included in all copies or substantial portions  
of the

+ * Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,  
EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF  
MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT  
SHALL
+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR  
OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,  
ARISING
+ * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER  
DEALINGS

+ * IN THE SOFTWARE.
+ *
+ * Authors:
+ *Connor Abbott (cwabbo...@gmail.com)
+ *
+ */
+
+#pragma once
+
+#include "util/hash_table.h"
+#include "main/set.h"
+#include "../list.h"
+#include "GL/gl.h" /* GLenum */
+#include "util/ralloc.h"
+#include "nir_types.h"
+#include 
+
+#ifdef __cplusplus
+extern "C" {
+#endif
+
+struct nir_function_overload;
+struct nir_function;
+
+
+/**
+ * Description of built-in state associated with a uniform
+ *
+ * \sa nir_variable::state_slots
+ */
+typedef struct {
+   int tokens[5];
+   int swizzle;
+} nir_state_slot;
+
+typedef enum {
+   nir_var_shader_in,
+   nir_var_shader_out,
+   nir_var_global,
+   nir_var_local,
+   nir_var_uniform,
+   nir_var_system_value
+} nir_variable_mode;
+
+/**
+ * Data stored in an nir_constant
+ */
+union nir_constant_data {
+  unsigned u[16];
+  int i[16];
+  float f[16];
+  bool b[16];
+};
+
+typedef struct nir_constant {
+   /**
+* Value of the constant.
+*
+* The field used to back the values supplied by the constant is  
determined
+* by the type associated with the \c ir_instruction.  Constants may  
be

+* scalars, vectors, or matrices.
+*/
+   union nir_constant_data value;
+
+   /* Array elements / Structure Fields */
+   struct nir_constant **elements;
+} nir_constant;
+
+/**
+ * \brief Layout qualifiers for gl_FragDepth.
+ *
+ * The AMD/ARB_conservative_depth extensions allow gl_FragDepth to be  
redeclared

+ * with a layout qualifier.
+ */
+typedef enum {
+nir_depth_layout_none, /**< No depth layout is specified. */
+nir_depth_layout_any,
+nir_depth_layout_greater,
+nir_depth_layout_less,
+nir_depth_layout_unchanged
+} nir_depth_layout;
+
+/**
+ * Either a uniform, global variable, shader input, or shader output.  
Based on

+ * ir_variable - it should be easy to translate between the two.
+ */
+
+typedef struct {
+   struct exec_node node;
+
+   /**
+* Declared type of the variable
+*/
+   const struct glsl_type *type;
+
+   /**
+* Declared name of the variable
+*/
+   char *name;
+
+   /**
+* For variables which satisfy the is_interface_instance()  
predicate, this

+* points to an array of integers such that if the ith member of the
+* interface block is an array, max_ifc_array_access[i] is the  
maximum
+* array element of that member that has been accessed.  If the ith  
member

+* of 

Re: [Mesa-dev] [PATCH 146/133] nir: Use static inlines instead of macros for list getters

2014-12-19 Thread Glenn Kennard
ad_sentinel(node->node.prev);
+}
-#define nir_cf_node_is_last(_node) \
-   exec_node_is_tail_sentinel((_node)->node.next)
+static inline bool
+nir_cf_node_is_last(const nir_cf_node *node)
+{
+   return exec_node_is_tail_sentinel(node->node.next);
+}
NIR_DEFINE_CAST(nir_cf_node_as_block, nir_cf_node, nir_block, cf_node)
 NIR_DEFINE_CAST(nir_cf_node_as_if, nir_cf_node, nir_if, cf_node)


Reviewed-By: Glenn Kennard 
___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/mesa-dev


Re: [Mesa-dev] [PATCH] gallium: add double opcodes and TGSI execution (v2.1)

2014-12-23 Thread Glenn Kennard

On Tue, 23 Dec 2014 22:50:30 +0100, Dave Airlie  wrote:


This patch adds support for a set of double opcodes
to TGSI. It is an update of work done originally
by Michal Krol on the gallium-double-opcodes branch.

The opcodes have a hint where they came from in the
header file.

v2: add unsigned/int <-> double
v2.1:  update docs.
This is based on code by Michael Krol 

Signed-off-by: Dave Airlie 
---
 src/gallium/auxiliary/tgsi/tgsi_exec.c | 743  
-

 src/gallium/auxiliary/tgsi/tgsi_info.c |  24 +-
 src/gallium/docs/source/tgsi.rst   |  76 ++-
 src/gallium/include/pipe/p_shader_tokens.h |  26 +-
 4 files changed, 850 insertions(+), 19 deletions(-)

diff --git a/src/gallium/auxiliary/tgsi/tgsi_exec.c  
b/src/gallium/auxiliary/tgsi/tgsi_exec.c

index 834568b..6af4730 100644
--- a/src/gallium/auxiliary/tgsi/tgsi_exec.c
+++ b/src/gallium/auxiliary/tgsi/tgsi_exec.c
@@ -72,6 +72,16 @@
 #define TILE_BOTTOM_LEFT  2
 #define TILE_BOTTOM_RIGHT 3
+union tgsi_double_channel {
+   double d[TGSI_QUAD_SIZE];
+   unsigned u[TGSI_QUAD_SIZE][2];
+};
+
+struct tgsi_double_vector {
+   union tgsi_double_channel xy;
+   union tgsi_double_channel zw;
+};
+
 static void
 micro_abs(union tgsi_exec_channel *dst,
   const union tgsi_exec_channel *src)
@@ -147,6 +157,55 @@ micro_cos(union tgsi_exec_channel *dst,
 }
static void
+micro_d2f(union tgsi_exec_channel *dst,
+  const union tgsi_double_channel *src)
+{
+   dst->f[0] = (float)src->d[0];
+   dst->f[1] = (float)src->d[1];
+   dst->f[2] = (float)src->d[2];
+   dst->f[3] = (float)src->d[3];
+}
+
+static void
+micro_d2i(union tgsi_exec_channel *dst,
+  const union tgsi_double_channel *src)
+{
+   dst->i[0] = (int)src->d[0];
+   dst->i[1] = (int)src->d[1];
+   dst->i[2] = (int)src->d[2];
+   dst->i[3] = (int)src->d[3];
+}
+
+static void
+micro_d2u(union tgsi_exec_channel *dst,
+  const union tgsi_double_channel *src)
+{
+   dst->u[0] = (unsigned)src->d[0];
+   dst->u[1] = (unsigned)src->d[1];
+   dst->u[2] = (unsigned)src->d[2];
+   dst->u[3] = (unsigned)src->d[3];
+}
+static void
+micro_dabs(union tgsi_double_channel *dst,
+   const union tgsi_double_channel *src)
+{
+   dst->d[0] = src->d[0] >= 0.0 ? src->d[0] : -src->d[0];
+   dst->d[1] = src->d[1] >= 0.0 ? src->d[1] : -src->d[1];
+   dst->d[2] = src->d[2] >= 0.0 ? src->d[2] : -src->d[2];
+   dst->d[3] = src->d[3] >= 0.0 ? src->d[3] : -src->d[3];
+}
+
+static void
+micro_dadd(union tgsi_double_channel *dst,
+  const union tgsi_double_channel *src)
+{
+   dst->d[0] = src[0].d[0] + src[1].d[0];
+   dst->d[1] = src[0].d[1] + src[1].d[1];
+   dst->d[2] = src[0].d[2] + src[1].d[2];
+   dst->d[3] = src[0].d[3] + src[1].d[3];
+}
+
+static void
 micro_ddx(union tgsi_exec_channel *dst,
   const union tgsi_exec_channel *src)
 {
@@ -167,6 +226,159 @@ micro_ddy(union tgsi_exec_channel *dst,
 }
static void
+micro_ddiv(union tgsi_double_channel *dst,
+   const union tgsi_double_channel *src)
+{
+   dst->d[0] = src[0].d[0] / src[1].d[0];
+   dst->d[1] = src[0].d[1] / src[1].d[1];
+   dst->d[2] = src[0].d[2] / src[1].d[2];
+   dst->d[3] = src[0].d[3] / src[1].d[3];
+}
+
+static void
+micro_dmul(union tgsi_double_channel *dst,
+   const union tgsi_double_channel *src)
+{
+   dst->d[0] = src[0].d[0] * src[1].d[0];
+   dst->d[1] = src[0].d[1] * src[1].d[1];
+   dst->d[2] = src[0].d[2] * src[1].d[2];
+   dst->d[3] = src[0].d[3] * src[1].d[3];
+}
+
+static void
+micro_dmax(union tgsi_double_channel *dst,
+   const union tgsi_double_channel *src)
+{
+   dst->d[0] = src[0].d[0] > src[1].d[0] ? src[0].d[0] : src[1].d[0];
+   dst->d[1] = src[0].d[1] > src[1].d[1] ? src[0].d[1] : src[1].d[1];
+   dst->d[2] = src[0].d[2] > src[1].d[2] ? src[0].d[2] : src[1].d[2];
+   dst->d[3] = src[0].d[3] > src[1].d[3] ? src[0].d[3] : src[1].d[3];
+}
+
+static void
+micro_dmin(union tgsi_double_channel *dst,
+   const union tgsi_double_channel *src)
+{
+   dst->d[0] = src[0].d[0] < src[1].d[0] ? src[0].d[0] : src[1].d[0];
+   dst->d[1] = src[0].d[1] < src[1].d[1] ? src[0].d[1] : src[1].d[1];
+   dst->d[2] = src[0].d[2] < src[1].d[2] ? src[0].d[2] : src[1].d[2];
+   dst->d[3] = src[0].d[3] < src[1].d[3] ? src[0].d[3] : src[1].d[3];
+}
+
+static void
+micro_dneg(union tgsi_double_channel *dst,
+   const union tgsi_double_channel *src)
+{
+   dst->d[0] = -src->d[0];
+   dst->d[1] = -src->d[1];
+   dst->d[2] = -src->d[2];
+   dst->d[3] = -src->d[3];
+}
+
+static void
+micro_dslt(union tgsi_double_channel *dst,
+   const union tgsi_double_channel *src)
+{
+   dst->u[0][0] = src[0].d[0] < src[1].d[0] ? ~0U : 0U;
+   dst->u[1][0] = src[0].d[1] < src[1].d[1] ? ~0U : 0U;
+   dst->u[2][0] = src[0].d[2] < src[1].d[2] ? ~0U : 0U;
+   dst->u[3][0] = src[0].d[3] < src[1].d[3] ? ~0U : 0U;
+}
+
+static void
+micro_dsne(union tgsi_double_channel *dst,
+   const union tgsi_double_channel *src)
+{
+   dst->u[0][0] = src[0].d[0] != src[

Re: [Mesa-dev] Improving precision of mod(x,y)

2015-01-15 Thread Glenn Kennard
On Thu, 15 Jan 2015 15:32:59 +0100, Roland Scheidegger  
 wrote:



Am 15.01.2015 um 10:05 schrieb Iago Toral:

Hi,

We have 16 deqp tests that fail, at least on i965, because of
insufficient precision of the mod GLSL function.

Mesa lowers mod(x,y) to y * fract(x,y) so there can be some precision
lost due to fract operation. Since the result is multiplied by y the
total precision lost usually grows together with the value of y.

Did you mean fract(x/y) here?



Below are some examples to give an idea of the magnitude of this error.
The values on the right represent the precision error for each case:

mod(-1.951171875, 1.9980468750) =>  0.000447
mod(121.57, 13.29)  =>  0.023842
mod(3769.12, 321.99)=>  0.762939
mod(3769.12, 1321.99)   =>  0.0001220703
mod(-987654.125, 123456.984375) =>  0.0160663128
mod( 987654.125, 123456.984375) =>  0.031250

As you see, for large enough values, the precision error becomes
significant.

This can be fixed by lowering mod(x,y) to x - y * floor(x/y) instead,
which is the suggested implementation in the GLSL docs. I have a local
patch in my tree that does this and it does indeed fix the problem. the
down side is that this implementation adds and extra ADD instruction to
the generated code (besides replacing fract with floor, which I guess
have similar cost).

Since this is a case where there is some trade-off to the fix, I wonder
if we are interested in doing this or not. Is the precision fix worth
the additional ADD?



Well I can tell you that llvmpipe implements frc(x) as x - floor(x), so
this change looks good to me :-).
On a more serious note though, it looks to me like the cost of this
expression would be mostly dominated by the division, hence some add
more shouldn't be that bad. And if the test is legit, I don't think
there's much choice (unless you could make this optional for some old
glsl versions if they didn't require that much precision but even then
it's probably not worth bothering imho).



FWIW, I just typed out the following little piglit test and tried it on  
R600:


[require]
GLSL >= 3.30

[vertex shader passthrough]
[fragment shader]
uniform float a;
uniform float b;
out vec4 colour;

void
main(void)
{
//  colour = vec4(b * fract(a / b)); // current lowering of mod(x,y)
colour = vec4(a - b * floor(a/b)); // proposed lowering
}

[test]
clear color 0.5 0.5 0.5 0.5
clear

uniform float a 4.2
uniform float b 3.5
draw rect -1 -1 2 2
probe rgba 1 1 0.7 0.7 0.7 0.7


Resulting R600 assembly:

// y * fract(x,y)
// KC0[0].x is x and KC0[1] is y
1  t: RECIP_IEEE T0.x,  KC0[1].x
2  x: MULT0.x,  KC0[0].x, T0.x
3  x: FRACT  T0.x,  T0.x
4  x: MULR0.x,  KC0[1].x, T0.x
EXPORT_DONEPIXEL 0 R0.  EOP

// x - y * floor(x/y)
1  t: RECIP_IEEE T0.x,  KC0[1].x
2  x: MULT0.x,  KC0[0].x, T0.x
3  x: FLOOR  T0.x,  T0.x
4  x: MULADD R0.x,  KC0[1].x, -T0.x, KC0[0].x
EXPORT_DONEPIXEL 0 R0.  EOP

Same number of cycles/length of dependency chain/ALU pipe usage for both  
methods.



I'd expect most architectures that can do source negate with multiply-add  
in a single operation should get similar results with no extra cost for  
the subtraction.



/Glenn
___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/mesa-dev


Re: [Mesa-dev] [PATCH 1/2] r600g: move selecting the pixel shader earlier.

2015-01-27 Thread Glenn Kennard

On Tue, 27 Jan 2015 04:46:32 +0100, Dave Airlie  wrote:


From: Dave Airlie 

In order to detect that a pixel shader has a prim id
input when we have no geometry shader we need to reorder
the shader selection so the pixel shader is selected
first, then the vertex shader key can take into account
the primitive id input requirement and lack of geom shader.

Signed-off-by: Dave Airlie 
---
 src/gallium/drivers/r600/r600_state_common.c | 7 ---
 1 file changed, 4 insertions(+), 3 deletions(-)

diff --git a/src/gallium/drivers/r600/r600_state_common.c  
b/src/gallium/drivers/r600/r600_state_common.c

index 09d8952..1030620 100644
--- a/src/gallium/drivers/r600/r600_state_common.c
+++ b/src/gallium/drivers/r600/r600_state_common.c
@@ -1170,6 +1170,10 @@ static bool r600_update_derived_state(struct  
r600_context *rctx)

}
}
+   r600_shader_select(ctx, rctx->ps_shader, &ps_dirty);
+   if (unlikely(!rctx->ps_shader->current))
+   return false;
+
update_gs_block_state(rctx, rctx->gs_shader != NULL);
if (rctx->gs_shader) {
@@ -1232,9 +1236,6 @@ static bool r600_update_derived_state(struct  
r600_context *rctx)

}
}
-   r600_shader_select(ctx, rctx->ps_shader, &ps_dirty);
-   if (unlikely(!rctx->ps_shader->current))
-   return false;
	if (unlikely(ps_dirty || rctx->pixel_shader.shader !=  
rctx->ps_shader->current ||
 		rctx->rasterizer->sprite_coord_enable !=  
rctx->ps_shader->current->sprite_coord_enable ||



Reviewed-by: Glenn Kennard 
___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/mesa-dev


Re: [Mesa-dev] [PATCH 2/2] r600g: add support for primitive id without geom shader

2015-01-27 Thread Glenn Kennard
en(&ctx.parse);
switch (ctx.parse.FullToken.Token.Type) {
@@ -2335,7 +2361,14 @@ static int r600_shader_from_tgsi(struct  
r600_context *rctx,

output[j].swizzle_z = 4; /* 0 */
output[j].swizzle_w = 5; /* 1 */
break;
+   case TGSI_SEMANTIC_PRIMID:
+   output[j].swizzle_x = 2;
+   output[j].swizzle_y = 4; /* 0 */
+   output[j].swizzle_z = 4; /* 0 */
+   output[j].swizzle_w = 4; /* 0 */
+   break;
}
+
break;
case TGSI_PROCESSOR_FRAGMENT:
if (shader->output[i].name == 
TGSI_SEMANTIC_COLOR) {
diff --git a/src/gallium/drivers/r600/r600_shader.h  
b/src/gallium/drivers/r600/r600_shader.h

index ab67013..b2559e9 100644
--- a/src/gallium/drivers/r600/r600_shader.h
+++ b/src/gallium/drivers/r600/r600_shader.h
@@ -84,6 +84,8 @@ struct r600_shader {
unsignedmax_arrays;
unsignednum_arrays;
unsignedvs_as_es;
+   unsignedvs_as_gs_a;
+   unsignedps_prim_id_input;
struct r600_shader_array * arrays;
 };
@@ -92,6 +94,8 @@ struct r600_shader_key {
unsigned alpha_to_one:1;
unsigned nr_cbufs:4;
unsigned vs_as_es:1;
+   unsigned vs_as_gs_a:1;
+   unsigned vs_prim_id_out:8;
 };
struct r600_shader_array {
diff --git a/src/gallium/drivers/r600/r600_state_common.c  
b/src/gallium/drivers/r600/r600_state_common.c

index 1030620..b498d00 100644
--- a/src/gallium/drivers/r600/r600_state_common.c
+++ b/src/gallium/drivers/r600/r600_state_common.c
@@ -707,6 +707,10 @@ static INLINE struct r600_shader_key  
r600_shader_selector_key(struct pipe_contex

key.nr_cbufs = 2;
} else if (sel->type == PIPE_SHADER_VERTEX) {
key.vs_as_es = (rctx->gs_shader != NULL);
+		if (rctx->ps_shader->current->shader.gs_prim_id_input &&  
!rctx->gs_shader) {

+   key.vs_as_gs_a = true;
+			key.vs_prim_id_out =  
rctx->ps_shader->current->shader.input[rctx->ps_shader->current->shader.ps_prim_id_input].spi_sid;

+   }
}
return key;
 }
@@ -1265,6 +1269,7 @@ static bool r600_update_derived_state(struct  
r600_context *rctx)

r600_update_ps_state(ctx, 
rctx->ps_shader->current);
    }
+       rctx->shader_stages.atom.dirty = true;
 		update_shader_atom(ctx, &rctx->pixel_shader,  
rctx->ps_shader->current);

}



With r600/r700 bits added and debug print removed:
Reviewed-by: Glenn Kennard 
___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/mesa-dev


Re: [Mesa-dev] [PATCH 3/4] drirc: add workarounds for Unigine Tropics

2015-01-30 Thread Glenn Kennard
On Fri, 30 Jan 2015 15:19:49 +0100, Martin Peres  
 wrote:



Signed-off-by: Martin Peres 
---
 src/mesa/drivers/dri/common/drirc | 7 +++
 1 file changed, 7 insertions(+)

diff --git a/src/mesa/drivers/dri/common/drirc  
b/src/mesa/drivers/dri/common/drirc

index cecd6a9..073814e 100644
--- a/src/mesa/drivers/dri/common/drirc
+++ b/src/mesa/drivers/dri/common/drirc
@@ -10,6 +10,11 @@ Application bugs worked around in this file:
   Enabling all extensions for Unigine fixes most issues, but the GLSL  
version

   is still 1.10.
+* Unigine Tropics 1.3 makes use of the "sample" keyword which is  
reserved
+  with ARB_GL_gpu_shader5 which got enabled by  
force_glsl_extensions_warn.


There seems to be something weird going on here - as far as I can tell  
Tropics is using a GL legacy context, and for those
GL_ARB_GL_gpu_shader5 isn't supposed to be enabled, the extension spec  
mentions GL 3.2 compatibility/core profile being required.


If i test this on r600 the extension cannot be enabled in a legacy  
context, only in a core one. Maybe there is a check missing somewhere in  
the intel driver?


+  It also makes use of bitwise manipulation (when adding anistropic  
filtering)

+  which is illegal in GLSL 1.10. Adding "#version 130" fixes this.
+
 * Unigine Heaven 3.0 with ARB_texture_multisample uses a "ivec4 * vec4"
   expression, which is illegal in GLSL 1.10.
   Adding "#version 130" fixes this.
@@ -41,6 +46,8 @@ TODO: document the other workarounds.

 
+value="-GL_ARB_gpu_shader5" />

+
 




force_glsl_version addition LGTM.
___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/mesa-dev


Re: [Mesa-dev] r600g/sb: fix a bug in constants folding optimisation pass

2015-01-30 Thread Glenn Kennard

On Sat, 31 Jan 2015 01:36:30 +0100, Xavier B.  wrote:


r600g/sb: fix a bug in constants folding optimisation
 pass:

ADD R6.y.1,R5.w.1, ~1|3f80
ADD R6.y.2,|R6.y.1|, -0.0001|b8d1b717

was wrongly being converted to

ADD R6.y.1,R5.w.1, ~1|3f80
ADD R6.y.2,R5.w.1, -1.0001|bf800347

because abs() modifier was ignored.

Signed-off-by: Xavier Bouchoux 



Reviewed-by: Glenn Kennard 

Thanks Xavier! For future patches, please use git send-email as noted in  
http://www.mesa3d.org/devinfo.html so reviewers can comment inline.

___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/mesa-dev


Re: [Mesa-dev] [PATCH 1/6] glapi: add GL_EXT_polygon_offset_clamp

2015-02-01 Thread Glenn Kennard
On Sun, 01 Feb 2015 16:18:51 +0100, Ilia Mirkin   
wrote:



Signed-off-by: Ilia Mirkin 
---
 src/mapi/glapi/gen/gl_API.xml   | 11 +++
 src/mesa/main/polygon.c |  6 ++
 src/mesa/main/polygon.h |  5 -
 src/mesa/main/tests/dispatch_sanity.cpp |  3 +++
 4 files changed, 24 insertions(+), 1 deletion(-)

diff --git a/src/mapi/glapi/gen/gl_API.xml  
b/src/mapi/glapi/gen/gl_API.xml

index e3cbab3..17bf62a 100644
--- a/src/mapi/glapi/gen/gl_API.xml
+++ b/src/mapi/glapi/gen/gl_API.xml
@@ -12858,6 +12858,17 @@
xmlns:xi="http://www.w3.org/2001/XInclude"/>

+
+
+
+
+
+
+
+
+
+
+
 

diff --git a/src/mesa/main/polygon.c b/src/mesa/main/polygon.c
index cdaa244..e3b9073 100644
--- a/src/mesa/main/polygon.c
+++ b/src/mesa/main/polygon.c
@@ -265,6 +265,12 @@ _mesa_PolygonOffsetEXT( GLfloat factor, GLfloat  
bias )

_mesa_PolygonOffset(factor, bias * ctx->DrawBuffer->_DepthMaxF );
 }
+void GLAPIENTRY
+_mesa_PolygonOffsetClampEXT( GLfloat factor, GLfloat units, GLfloat  
clamp )

+{
+
+}
+
/**/
diff --git a/src/mesa/main/polygon.h b/src/mesa/main/polygon.h
index 530adba..6cf14d3 100644
--- a/src/mesa/main/polygon.h
+++ b/src/mesa/main/polygon.h
@@ -55,12 +55,15 @@ extern void GLAPIENTRY
 _mesa_PolygonOffsetEXT( GLfloat factor, GLfloat bias );
extern void GLAPIENTRY
+_mesa_PolygonOffsetClampEXT( GLfloat factor, GLfloat units, GLfloat  
clamp );

+
+extern void GLAPIENTRY
 _mesa_PolygonStipple( const GLubyte *mask );
extern void GLAPIENTRY
 _mesa_GetPolygonStipple( GLubyte *mask );
-extern void
+extern void
 _mesa_init_polygon( struct gl_context * ctx );
#endif
diff --git a/src/mesa/main/tests/dispatch_sanity.cpp  
b/src/mesa/main/tests/dispatch_sanity.cpp

index ee4db45..1f1a3a8 100644
--- a/src/mesa/main/tests/dispatch_sanity.cpp
+++ b/src/mesa/main/tests/dispatch_sanity.cpp
@@ -988,6 +988,9 @@ const struct function gl_core_functions_possible[] =  
{

{ "glTextureStorage3DMultisample", 45, -1 },
{ "glTextureBuffer", 45, -1 },
+   /* GL_EXT_polygon_offset_clamp */
+   { "glPolygonOffsetClampEXT", 11, -1 },
+
{ NULL, 0, -1 }
 };



Patches 1-5 (assuming fix for clamp in 2 noted already by Ilia) are
Reviewed-by: Glenn Kennard 
___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/mesa-dev


Re: [Mesa-dev] [PATCH 01/10] r600g, radeonsi: don't append to streamout buffers that haven't been used yet

2015-02-01 Thread Glenn Kennard

On Sun, 01 Feb 2015 18:36:58 +0100, Marek Olšák  wrote:


From: Marek Olšák 

The FILLED_SIZE counter is uninitialized at the beginning, so we can't  
use it.

Instead, use offset = 0, which is what we always do when not appending.

This unexpectedly fixes spec/ARB_texture_multisample/sample-position/*.
Yes, the test does use transform feedback.

Cc: 10.3 10.4 
---
 src/gallium/drivers/radeon/r600_pipe_common.h | 1 +
 src/gallium/drivers/radeon/r600_streamout.c   | 4 +++-
 2 files changed, 4 insertions(+), 1 deletion(-)

diff --git a/src/gallium/drivers/radeon/r600_pipe_common.h  
b/src/gallium/drivers/radeon/r600_pipe_common.h

index 6224668..46a6bf3 100644
--- a/src/gallium/drivers/radeon/r600_pipe_common.h
+++ b/src/gallium/drivers/radeon/r600_pipe_common.h
@@ -294,6 +294,7 @@ struct r600_so_target {
/* The buffer where BUFFER_FILLED_SIZE is stored. */
struct r600_resource*buf_filled_size;
unsignedbuf_filled_size_offset;
+   boolbuf_filled_size_valid;
unsignedstride_in_dw;
 };
diff --git a/src/gallium/drivers/radeon/r600_streamout.c  
b/src/gallium/drivers/radeon/r600_streamout.c

index c44f0f2..bc8bf97 100644
--- a/src/gallium/drivers/radeon/r600_streamout.c
+++ b/src/gallium/drivers/radeon/r600_streamout.c
@@ -237,7 +237,7 @@ static void r600_emit_streamout_begin(struct  
r600_common_context *rctx, struct r

}
}
-   if (rctx->streamout.append_bitmask & (1 << i)) {
+		if (rctx->streamout.append_bitmask & (1 << i) &&  
t[i]->buf_filled_size_valid) {

uint64_t va = t[i]->buf_filled_size->gpu_address +
  t[i]->buf_filled_size_offset;
@@ -302,6 +302,8 @@ void r600_emit_streamout_end(struct  
r600_common_context *rctx)

 * buffer bound. This ensures that the primitives-emitted query
 * won't increment. */
 		r600_write_context_reg(cs, R_028AD0_VGT_STRMOUT_BUFFER_SIZE_0 + 16*i,  
0);

+
+   t[i]->buf_filled_size_valid = true;
}
    rctx->streamout.begin_emitted = false;


Reviewed-by: Glenn Kennard 
___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/mesa-dev


Re: [Mesa-dev] [PATCH 1/9] tgsi: add tgsi_get_processor_type helper from radeon

2015-02-01 Thread Glenn Kennard

On Sun, 01 Feb 2015 18:15:53 +0100, Marek Olšák  wrote:


From: Marek Olšák 

---
 src/gallium/auxiliary/tgsi/tgsi_parse.c   | 12 
 src/gallium/auxiliary/tgsi/tgsi_parse.h   |  2 ++
 src/gallium/drivers/radeon/r600_pipe_common.c | 11 ---
 3 files changed, 14 insertions(+), 11 deletions(-)

diff --git a/src/gallium/auxiliary/tgsi/tgsi_parse.c  
b/src/gallium/auxiliary/tgsi/tgsi_parse.c

index f2370ed..9cc8383 100644
--- a/src/gallium/auxiliary/tgsi/tgsi_parse.c
+++ b/src/gallium/auxiliary/tgsi/tgsi_parse.c
@@ -315,3 +315,15 @@ tgsi_dump_tokens(const struct tgsi_token *tokens)
   debug_printf("0x%08x,\n", dwords[i]);
debug_printf("};\n");
 }
+
+unsigned
+tgsi_get_processor_type(const struct tgsi_token *tokens)
+{
+   struct tgsi_parse_context parse;
+
+   if (tgsi_parse_init( &parse, tokens ) != TGSI_PARSE_OK) {
+  debug_printf("tgsi_parse_init() failed in %s:%i!\n", __func__,  
__LINE__);

+  return ~0;
+   }
+   return parse.FullHeader.Processor.Processor;
+}
diff --git a/src/gallium/auxiliary/tgsi/tgsi_parse.h  
b/src/gallium/auxiliary/tgsi/tgsi_parse.h

index bfcca48..cd4b2af 100644
--- a/src/gallium/auxiliary/tgsi/tgsi_parse.h
+++ b/src/gallium/auxiliary/tgsi/tgsi_parse.h
@@ -153,6 +153,8 @@ tgsi_alloc_tokens(unsigned num_tokens);
 void
 tgsi_free_tokens(const struct tgsi_token *tokens);
+unsigned
+tgsi_get_processor_type(const struct tgsi_token *tokens);
#if defined __cplusplus
 }
diff --git a/src/gallium/drivers/radeon/r600_pipe_common.c  
b/src/gallium/drivers/radeon/r600_pipe_common.c

index ddb4142..ee4cda7 100644
--- a/src/gallium/drivers/radeon/r600_pipe_common.c
+++ b/src/gallium/drivers/radeon/r600_pipe_common.c
@@ -883,17 +883,6 @@ void r600_destroy_common_screen(struct  
r600_common_screen *rscreen)

FREE(rscreen);
 }
-static unsigned tgsi_get_processor_type(const struct tgsi_token *tokens)
-{
-   struct tgsi_parse_context parse;
-
-   if (tgsi_parse_init( &parse, tokens ) != TGSI_PARSE_OK) {
-		debug_printf("tgsi_parse_init() failed in %s:%i!\n", __func__,  
__LINE__);

-   return ~0;
-   }
-   return parse.FullHeader.Processor.Processor;
-}
-
 bool r600_can_dump_shader(struct r600_common_screen *rscreen,
  const struct tgsi_token *tokens)
 {



null_sampler_view(TGSI_TEXTURE_2D) test case segfaults on R600 (with  
GALLIUM_TESTS=1), which i presume is intentional to expose a bug there. If  
yes, then:


Patches 1-9 are Reviewed-by: Glenn Kennard 
___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/mesa-dev


Re: [Mesa-dev] [PATCH 1/2] glsl: Add define for ARB_shader_precision

2015-02-01 Thread Glenn Kennard
On Wed, 31 Dec 2014 21:43:51 +0100, Micah Fedke  
 wrote:



---
 src/glsl/glcpp/glcpp-parse.y| 3 +++
 src/glsl/glsl_parser_extras.cpp | 1 +
 src/glsl/glsl_parser_extras.h   | 2 ++
 src/mesa/main/extensions.c  | 1 +
 src/mesa/main/mtypes.h  | 1 +
 5 files changed, 8 insertions(+)

diff --git a/src/glsl/glcpp/glcpp-parse.y b/src/glsl/glcpp/glcpp-parse.y
index 9b1a4f4..c9cc68f 100644
--- a/src/glsl/glcpp/glcpp-parse.y
+++ b/src/glsl/glcpp/glcpp-parse.y
@@ -2473,6 +2473,9 @@  
_glcpp_parser_handle_version_declaration(glcpp_parser_t *parser,  
intmax_t versio

  if (extensions->ARB_derivative_control)
  add_builtin_define(parser,  
"GL_ARB_derivative_control", 1);

+
+  if (extensions->ARB_shader_precision)
+ add_builtin_define(parser, "GL_ARB_shader_precision",  
1);

   }
}
diff --git a/src/glsl/glsl_parser_extras.cpp  
b/src/glsl/glsl_parser_extras.cpp

index 27e2eaf3..8555af6 100644
--- a/src/glsl/glsl_parser_extras.cpp
+++ b/src/glsl/glsl_parser_extras.cpp
@@ -532,6 +532,7 @@ static const _mesa_glsl_extension  
_mesa_glsl_supported_extensions[] = {
EXT(ARB_shader_atomic_counters, true,  false,  
ARB_shader_atomic_counters),
EXT(ARB_shader_bit_encoding,true,  false,  
ARB_shader_bit_encoding),
EXT(ARB_shader_image_load_store,true,  false,  
ARB_shader_image_load_store),
+   EXT(ARB_shader_precision,   true,  false,  
ARB_shader_precision),
EXT(ARB_shader_stencil_export,  true,  false,  
ARB_shader_stencil_export),
EXT(ARB_shader_texture_lod, true,  false,  
ARB_shader_texture_lod),
EXT(ARB_shading_language_420pack,   true,  false,  
ARB_shading_language_420pack),
diff --git a/src/glsl/glsl_parser_extras.h  
b/src/glsl/glsl_parser_extras.h

index e04f7ce..0ca6053 100644
--- a/src/glsl/glsl_parser_extras.h
+++ b/src/glsl/glsl_parser_extras.h
@@ -424,6 +424,8 @@ struct _mesa_glsl_parse_state {
bool ARB_shader_bit_encoding_warn;
bool ARB_shader_image_load_store_enable;
bool ARB_shader_image_load_store_warn;
+   bool ARB_shader_precision_enable;
+   bool ARB_shader_precision_warn;
bool ARB_shader_stencil_export_enable;
bool ARB_shader_stencil_export_warn;
bool ARB_shader_texture_lod_enable;
diff --git a/src/mesa/main/extensions.c b/src/mesa/main/extensions.c
index 0df04c2..95c7a37 100644
--- a/src/mesa/main/extensions.c
+++ b/src/mesa/main/extensions.c
@@ -147,6 +147,7 @@ static const struct extension extension_table[] = {
{ "GL_ARB_shader_bit_encoding",  
o(ARB_shader_bit_encoding), GL, 2010 },
{ "GL_ARB_shader_image_load_store",  
o(ARB_shader_image_load_store), GL, 2011 },
{ "GL_ARB_shader_objects",   
o(dummy_true),  GL, 2002 },
+   { "GL_ARB_shader_precision", 
o(ARB_shader_precision),GL, 2014 },


Isn't this extension from 2010 rather than 2014?

{ "GL_ARB_shader_stencil_export",
o(ARB_shader_stencil_export),   GL, 2009 },
{ "GL_ARB_shader_texture_lod",   
o(ARB_shader_texture_lod),  GL, 2009 },
{ "GL_ARB_shading_language_100", 
o(dummy_true),  GLL,2003 },

diff --git a/src/mesa/main/mtypes.h b/src/mesa/main/mtypes.h
index b95dfb9..4c83379 100644
--- a/src/mesa/main/mtypes.h
+++ b/src/mesa/main/mtypes.h
@@ -3757,6 +3757,7 @@ struct gl_extensions
GLboolean ARB_shader_atomic_counters;
GLboolean ARB_shader_bit_encoding;
GLboolean ARB_shader_image_load_store;
+   GLboolean ARB_shader_precision;
GLboolean ARB_shader_stencil_export;
GLboolean ARB_shader_texture_lod;
GLboolean ARB_shading_language_packing;

___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/mesa-dev


[Mesa-dev] [PATCH v3] r600g: Implement GL_ARB_draw_indirect for EG/CM

2015-02-05 Thread Glenn Kennard
Requires Evergreen/Cayman and radeon kernel module
2.41.0 or newer.

Signed-off-by: Glenn Kennard 
---
Changes since v2:
* Fix failing arb_draw_indirect-vertexid piglit test cases.
* Ensure start_instance, base_vertex, index_offset are reset when
  switching back to direct draws.
* Juggled some header defines to avoid use of magic numbers.

 docs/GL3.txt |   4 +-
 docs/relnotes/10.5.0.html|   1 +
 src/gallium/drivers/r600/evergreend.h|   1 -
 src/gallium/drivers/r600/r600_pipe.c |   4 +-
 src/gallium/drivers/r600/r600_pipe.h |   1 +
 src/gallium/drivers/r600/r600_shader.c   |  14 ++-
 src/gallium/drivers/r600/r600_state_common.c | 128 ++-
 src/gallium/drivers/r600/r600d.h |   8 +-
 8 files changed, 130 insertions(+), 31 deletions(-)

diff --git a/docs/GL3.txt b/docs/GL3.txt
index 23f5561..ef4f0ae 100644
--- a/docs/GL3.txt
+++ b/docs/GL3.txt
@@ -95,7 +95,7 @@ GL 3.3, GLSL 3.30 --- all DONE: i965, nv50, nvc0, r600, 
radeonsi, llvmpipe, soft
 GL 4.0, GLSL 4.00:
 
   GL_ARB_draw_buffers_blendDONE (i965, nv50, nvc0, 
r600, radeonsi, llvmpipe, softpipe)
-  GL_ARB_draw_indirect DONE (i965, nvc0, 
radeonsi, llvmpipe, softpipe)
+  GL_ARB_draw_indirect DONE (i965, nvc0, r600, 
radeonsi, llvmpipe, softpipe)
   GL_ARB_gpu_shader5   DONE (i965, nvc0)
   - 'precise' qualifierDONE
   - Dynamically uniform sampler array indices  DONE (r600)
@@ -159,7 +159,7 @@ GL 4.3, GLSL 4.30:
   GL_ARB_framebuffer_no_attachmentsnot started
   GL_ARB_internalformat_query2 not started
   GL_ARB_invalidate_subdataDONE (all drivers)
-  GL_ARB_multi_draw_indirect   DONE (i965, nvc0, 
radeonsi, llvmpipe, softpipe)
+  GL_ARB_multi_draw_indirect   DONE (i965, nvc0, r600, 
radeonsi, llvmpipe, softpipe)
   GL_ARB_program_interface_query   not started
   GL_ARB_robust_buffer_access_behavior not started
   GL_ARB_shader_image_size not started
diff --git a/docs/relnotes/10.5.0.html b/docs/relnotes/10.5.0.html
index 4f921ea..47686c0 100644
--- a/docs/relnotes/10.5.0.html
+++ b/docs/relnotes/10.5.0.html
@@ -49,6 +49,7 @@ Note: some of the new features are only available with 
certain drivers.
 GL_EXT_packed_float on freedreno
 GL_EXT_texture_shared_exponent on freedreno
 GL_EXT_texture_snorm on freedreno
+GL_ARB_draw_indirect, GL_ARB_multi_draw_indirect on r600
 
 
 
diff --git a/src/gallium/drivers/r600/evergreend.h 
b/src/gallium/drivers/r600/evergreend.h
index 4989996..cd4ff46 100644
--- a/src/gallium/drivers/r600/evergreend.h
+++ b/src/gallium/drivers/r600/evergreend.h
@@ -72,7 +72,6 @@
 #define PKT3_REG_RMW   0x21
 #define PKT3_COND_EXEC 0x22
 #define PKT3_PRED_EXEC 0x23
-#define PKT3_START_3D_CMDBUF   0x24
 #define PKT3_DRAW_INDEX_2  0x27
 #define PKT3_CONTEXT_CONTROL   0x28
 #define PKT3_DRAW_INDEX_IMMD_BE0x29
diff --git a/src/gallium/drivers/r600/r600_pipe.c 
b/src/gallium/drivers/r600/r600_pipe.c
index b6f7859..3127e23 100644
--- a/src/gallium/drivers/r600/r600_pipe.c
+++ b/src/gallium/drivers/r600/r600_pipe.c
@@ -313,6 +313,9 @@ static int r600_get_param(struct pipe_screen* pscreen, enum 
pipe_cap param)
return family >= CHIP_CEDAR ? 1 : 0;
case PIPE_CAP_MAX_TEXTURE_GATHER_COMPONENTS:
return family >= CHIP_CEDAR ? 4 : 0;
+   case PIPE_CAP_DRAW_INDIRECT:
+   /* kernel command checker support is also required */
+   return family >= CHIP_CEDAR && rscreen->b.info.drm_minor >= 41;
 
/* Unsupported features. */
case PIPE_CAP_TGSI_FS_COORD_ORIGIN_LOWER_LEFT:
@@ -322,7 +325,6 @@ static int r600_get_param(struct pipe_screen* pscreen, enum 
pipe_cap param)
case PIPE_CAP_VERTEX_COLOR_CLAMPED:
case PIPE_CAP_USER_VERTEX_BUFFERS:
case PIPE_CAP_TEXTURE_GATHER_OFFSETS:
-   case PIPE_CAP_DRAW_INDIRECT:
case PIPE_CAP_CONDITIONAL_RENDER_INVERTED:
case PIPE_CAP_SAMPLER_VIEW_TARGET:
case PIPE_CAP_VERTEXID_NOBASE:
diff --git a/src/gallium/drivers/r600/r600_pipe.h 
b/src/gallium/drivers/r600/r600_pipe.h
index e110efe..1db43c4 100644
--- a/src/gallium/drivers/r600/r600_pipe.h
+++ b/src/gallium/drivers/r600/r600_pipe.h
@@ -145,6 +145,7 @@ struct r600_vgt_state {
uint32_t vgt_multi_prim_ib_reset_en;
uint32_t vgt_multi_prim_ib_reset_indx;
uint32_t vgt_indx_offset;
+   bool last_draw_was_indirect;
 };
 
 struct r600_blend_color {
diff --git a/src/gallium/drivers/r600/r600_sha

Re: [Mesa-dev] [PATCH 1/2] r300g: handle unsupported blend factor gracefully

2015-02-06 Thread Glenn Kennard
On Fri, 06 Feb 2015 20:53:21 +0100, Roland Scheidegger  
 wrote:



FWIW I'm wondering why you'd actually need them in a d3d9 state tracker,
as this is a feature first seen with d3d10. Unless you'd want to handle
d3d10 of course, but in this case there's probably not much hope for any
of the d3d9 capable hw drivers for lots of reasons...

Roland



Actually it got retrofitted into D3D9 Ex for Vista, see  
https://msdn.microsoft.com/en-us/library/windows/desktop/bb172513(v=vs.85).aspx

D3DPBLENDCAPS_INVSRCCOLOR2 and D3DPBLENDCAPS_SRCCOLOR2.


/Glenn's .02 cents
___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/mesa-dev


Re: [Mesa-dev] [PATCH] r600g: fix sampler/ubo indexing on cayman

2015-07-09 Thread Glenn Kennard

On Thu, 09 Jul 2015 07:37:59 +0200, Dave Airlie  wrote:


From: Dave Airlie 

Cayman needs a different method to upload the CF IDX0/1

This fixes 31 piglits when ARB_gpu_shader5 is forced on
with cayman.

Signed-off-by: Dave Airlie 
---
 src/gallium/drivers/r600/eg_asm.c | 17 +++--
 src/gallium/drivers/r600/eg_sq.h  | 11 +++
 2 files changed, 22 insertions(+), 6 deletions(-)

diff --git a/src/gallium/drivers/r600/eg_asm.c  
b/src/gallium/drivers/r600/eg_asm.c

index d04921e..c32d317 100644
--- a/src/gallium/drivers/r600/eg_asm.c
+++ b/src/gallium/drivers/r600/eg_asm.c
@@ -161,6 +161,9 @@ int egcm_load_index_reg(struct r600_bytecode *bc,  
unsigned id, bool inside_alu_c

alu.op = ALU_OP1_MOVA_INT;
alu.src[0].sel = bc->index_reg[id];
alu.src[0].chan = 0;
+   if (bc->chip_class == CAYMAN)
+		alu.dst.sel = id == 0 ? CM_V_SQ_MOVA_DST_CF_IDX0 :  
CM_V_SQ_MOVA_DST_CF_IDX1;

+
alu.last = 1;
r = r600_bytecode_add_alu(bc, &alu);
if (r)
@@ -168,12 +171,14 @@ int egcm_load_index_reg(struct r600_bytecode *bc,  
unsigned id, bool inside_alu_c

bc->ar_loaded = 0; /* clobbered */


Could split ar_loaded into 3 bits for AR/IDX0/IDX1 for cayman, however I  
think it would be better to teach SB to handle sampler/ubo indexing and  
keep things simple here.



-   memset(&alu, 0, sizeof(alu));
-   alu.op = id == 0 ? ALU_OP0_SET_CF_IDX0 : ALU_OP0_SET_CF_IDX1;
-   alu.last = 1;
-   r = r600_bytecode_add_alu(bc, &alu);
-   if (r)
-   return r;
+   if (bc->chip_class == EVERGREEN) {
+   memset(&alu, 0, sizeof(alu));
+   alu.op = id == 0 ? ALU_OP0_SET_CF_IDX0 : ALU_OP0_SET_CF_IDX1;
+   alu.last = 1;
+   r = r600_bytecode_add_alu(bc, &alu);
+   if (r)
+   return r;
+   }
/* Must split ALU group as index only applies to following group */
if (inside_alu_clause) {
diff --git a/src/gallium/drivers/r600/eg_sq.h  
b/src/gallium/drivers/r600/eg_sq.h

index b534872..10caa07 100644
--- a/src/gallium/drivers/r600/eg_sq.h
+++ b/src/gallium/drivers/r600/eg_sq.h
@@ -521,4 +521,15 @@
#define V_SQ_REL_ABSOLUTE 0
 #define V_SQ_REL_RELATIVE 1
+
+/* CAYMAN has special encoding for MOVA_INT destination */
+#define CM_V_SQ_MOVA_DST_AR_X 0
+#define CM_V_SQ_MOVA_DST_CF_PC 1
+#define CM_V_SQ_MOVA_DST_CF_IDX0 2
+#define CM_V_SQ_MOVA_DST_CF_IDX1 3



+#define CM_V_SQ_MOVA_DST_CF_CLAUSE_GLOBAL_7_0 4
+#define CM_V_SQ_MOVA_DST_CF_CLAUSE_GLOBAL_15_8 5
+#define CM_V_SQ_MOVA_DST_CF_CLAUSE_GLOBAL_23_16 6
+#define CM_V_SQ_MOVA_DST_CF_CLAUSE_GLOBAL_31_24 7


Can't think of any useful cases for the cayman specific ALU global  
register. Drop these four?



+
 #endif



Reviewed-by: Glenn Kennard 
___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/mesa-dev


Re: [Mesa-dev] [PATCH] r600g: move sampler/ubo index registers before temp reg

2015-07-09 Thread Glenn Kennard

On Thu, 09 Jul 2015 08:00:48 +0200, Dave Airlie  wrote:


From: Dave Airlie 

temp_reg needs to be last, as we increment things
away from it, otherwise on cayman some tests were overwriting
the index regs.

Fixes 2 piglit with ARB_gpu_shader5 forced on cayman.

Signed-off-by: Dave Airlie 
---
 src/gallium/drivers/r600/r600_shader.c | 13 ++---
 1 file changed, 6 insertions(+), 7 deletions(-)

diff --git a/src/gallium/drivers/r600/r600_shader.c  
b/src/gallium/drivers/r600/r600_shader.c

index af7622e..1a72bf6 100644
--- a/src/gallium/drivers/r600/r600_shader.c
+++ b/src/gallium/drivers/r600/r600_shader.c
@@ -1931,15 +1931,14 @@ static int r600_shader_from_tgsi(struct  
r600_context *rctx,

ctx.file_offset[TGSI_FILE_IMMEDIATE] = V_SQ_ALU_SRC_LITERAL;
ctx.bc->ar_reg = ctx.file_offset[TGSI_FILE_TEMPORARY] +
ctx.info.file_max[TGSI_FILE_TEMPORARY] + 1;
+   ctx.bc->index_reg[0] = ctx.bc->ar_reg + 1;
+   ctx.bc->index_reg[1] = ctx.bc->ar_reg + 2;
+
if (ctx.type == TGSI_PROCESSOR_GEOMETRY) {
-   ctx.gs_export_gpr_treg = ctx.bc->ar_reg + 1;
-   ctx.temp_reg = ctx.bc->ar_reg + 2;
-   ctx.bc->index_reg[0] = ctx.bc->ar_reg + 3;
-   ctx.bc->index_reg[1] = ctx.bc->ar_reg + 4;
+   ctx.gs_export_gpr_treg = ctx.bc->ar_reg + 3;
+   ctx.temp_reg = ctx.bc->ar_reg + 4;
} else {
-   ctx.temp_reg = ctx.bc->ar_reg + 1;
-   ctx.bc->index_reg[0] = ctx.bc->ar_reg + 2;
-   ctx.bc->index_reg[1] = ctx.bc->ar_reg + 3;
+   ctx.temp_reg = ctx.bc->ar_reg + 3;
    }
    shader->max_arrays = 0;


Reviewed-by: Glenn Kennard 
___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/mesa-dev


[Mesa-dev] [PATCH] r600g: Fix handling of TGSI_OPCODE_ARR with SB

2015-08-13 Thread Glenn Kennard
FLT_TO_INT goes in the vector pipes on evergreen/NI,
not the trans unit as on earlier chips.

Signed-off-by: Glenn Kennard 
---
Fixes issue found on nine: https://github.com/iXit/Mesa-3D/issues/119

 src/gallium/drivers/r600/r600_isa.h | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/src/gallium/drivers/r600/r600_isa.h 
b/src/gallium/drivers/r600/r600_isa.h
index 381f06d..fdbe1c0 100644
--- a/src/gallium/drivers/r600/r600_isa.h
+++ b/src/gallium/drivers/r600/r600_isa.h
@@ -262,7 +262,7 @@ static const struct alu_op_info alu_op_table[] = {
{"PRED_SETNE_PUSH_INT",   2, { 0x4D, 0x4D },{  AF_VS, 
AF_VS, AF_VS, AF_VS},  AF_PRED_PUSH | AF_CC_NE | AF_INT_CMP },
{"PRED_SETLT_PUSH_INT",   2, { 0x4E, 0x4E },{  AF_VS, 
AF_VS, AF_VS, AF_VS},  AF_PRED_PUSH | AF_CC_LT | AF_INT_CMP },
{"PRED_SETLE_PUSH_INT",   2, { 0x4F, 0x4F },{  AF_VS, 
AF_VS, AF_VS, AF_VS},  AF_PRED_PUSH | AF_CC_LE | AF_INT_CMP },
-   {"FLT_TO_INT",1, { 0x6B, 0x50 },{   AF_S,  
AF_S, AF_VS, AF_VS},  AF_INT_DST | AF_CVT },
+   {"FLT_TO_INT",1, { 0x6B, 0x50 },{   AF_S,  
AF_S,  AF_V,  AF_V},  AF_INT_DST | AF_CVT },
{"BFREV_INT", 1, {   -1, 0x51 },{  0, 
0, AF_VS, AF_VS},  AF_INT_DST },
{"ADDC_UINT", 2, {   -1, 0x52 },{  0, 
0, AF_VS, AF_VS},  AF_UINT_DST },
{"SUBB_UINT", 2, {   -1, 0x53 },{  0, 
0, AF_VS, AF_VS},  AF_UINT_DST },
-- 
1.9.1

___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/mesa-dev


[Mesa-dev] [PATCH 1/2] r600g/sb: SB support for UBO indexing

2015-10-07 Thread Glenn Kennard
Signed-off-by: Glenn Kennard 
---
This patch depends on prior patch:
  r600g/sb: Support gs5 sampler indexing

Two items that could be improved on in some future patch:
Clauses using UBO indexing still lock the cache line for a
constant used to load the index register, which causes some
instruction groups to be broken up as SB thinks they are
using too many constant read ports.

The MOVA_INT/SET_CF_IDX[01] ops can often be emitted directly into
the preceeding clause rather than always creating a new one.

 src/gallium/drivers/r600/r600_shader.c |   6 --
 src/gallium/drivers/r600/r600_shader.h |   2 -
 src/gallium/drivers/r600/sb/sb_bc.h|   4 +-
 src/gallium/drivers/r600/sb/sb_bc_finalize.cpp |   6 +-
 src/gallium/drivers/r600/sb/sb_bc_parser.cpp   |  20 -
 src/gallium/drivers/r600/sb/sb_expr.cpp|   3 +-
 src/gallium/drivers/r600/sb/sb_ir.h|   7 ++
 src/gallium/drivers/r600/sb/sb_sched.cpp   | 108 ++---
 src/gallium/drivers/r600/sb/sb_sched.h |   4 +
 src/gallium/drivers/r600/sb/sb_shader.cpp  |   4 +-
 src/gallium/drivers/r600/sb/sb_shader.h|   2 +-
 11 files changed, 139 insertions(+), 27 deletions(-)

diff --git a/src/gallium/drivers/r600/r600_shader.c 
b/src/gallium/drivers/r600/r600_shader.c
index 24c3d43..8efe902 100644
--- a/src/gallium/drivers/r600/r600_shader.c
+++ b/src/gallium/drivers/r600/r600_shader.c
@@ -166,8 +166,6 @@ int r600_pipe_shader_create(struct pipe_context *ctx,
 if (rctx->b.chip_class <= R700) {
use_sb &= (shader->shader.processor_type != 
TGSI_PROCESSOR_GEOMETRY);
 }
-   /* disable SB for shaders using ubo array indexing as it doesn't handle 
those currently */
-   use_sb &= !shader->shader.uses_ubo_indexing;
/* disable SB for shaders using doubles */
use_sb &= !shader->shader.uses_doubles;
 
@@ -1250,9 +1248,6 @@ static int tgsi_split_constant(struct r600_shader_ctx 
*ctx)
continue;
}
 
-   if (ctx->src[i].kc_rel)
-   ctx->shader->uses_ubo_indexing = true;
-
if (ctx->src[i].rel) {
int chan = inst->Src[i].Indirect.Swizzle;
int treg = r600_get_temp(ctx);
@@ -1936,7 +1931,6 @@ static int r600_shader_from_tgsi(struct r600_context 
*rctx,
ctx.gs_next_vertex = 0;
ctx.gs_stream_output_info = &so;
 
-   shader->uses_ubo_indexing = false;
ctx.face_gpr = -1;
ctx.fixed_pt_position_gpr = -1;
ctx.fragcoord_input = -1;
diff --git a/src/gallium/drivers/r600/r600_shader.h 
b/src/gallium/drivers/r600/r600_shader.h
index 8ba32ae..c240e71 100644
--- a/src/gallium/drivers/r600/r600_shader.h
+++ b/src/gallium/drivers/r600/r600_shader.h
@@ -75,8 +75,6 @@ struct r600_shader {
boolean has_txq_cube_array_z_comp;
boolean uses_tex_buffers;
boolean gs_prim_id_input;
-   /* Temporarily workaround SB not handling ubo indexing */
-   boolean uses_ubo_indexing;
 
/* Size in bytes of a data item in the ring(s) (single vertex data).
   Stages with only one ring items 123 will be set to 0. */
diff --git a/src/gallium/drivers/r600/sb/sb_bc.h 
b/src/gallium/drivers/r600/sb/sb_bc.h
index 126750d..9c2a917 100644
--- a/src/gallium/drivers/r600/sb/sb_bc.h
+++ b/src/gallium/drivers/r600/sb/sb_bc.h
@@ -478,7 +478,9 @@ struct bc_cf {
 
bool is_alu_extended() {
assert(op_ptr->flags & CF_ALU);
-   return kc[2].mode != KC_LOCK_NONE || kc[3].mode != KC_LOCK_NONE;
+   return kc[2].mode != KC_LOCK_NONE || kc[3].mode != KC_LOCK_NONE 
||
+   kc[0].index_mode != KC_INDEX_NONE || kc[1].index_mode 
!= KC_INDEX_NONE ||
+   kc[2].index_mode != KC_INDEX_NONE || kc[3].index_mode 
!= KC_INDEX_NONE;
}
 
 };
diff --git a/src/gallium/drivers/r600/sb/sb_bc_finalize.cpp 
b/src/gallium/drivers/r600/sb/sb_bc_finalize.cpp
index 522ff9d..17fe2a5 100644
--- a/src/gallium/drivers/r600/sb/sb_bc_finalize.cpp
+++ b/src/gallium/drivers/r600/sb/sb_bc_finalize.cpp
@@ -514,7 +514,7 @@ void bc_finalizer::copy_fetch_src(fetch_node &dst, 
fetch_node &src, unsigned arg
 
 void bc_finalizer::emit_set_grad(fetch_node* f) {
 
-   assert(f->src.size() == 12);
+   assert(f->src.size() == 12 || f->src.size() == 13);
unsigned ops[2] = { FETCH_OP_SET_GRADIENTS_V, FETCH_OP_SET_GRADIENTS_H 
};
 
unsigned arg_start = 0;
@@ -809,8 +809,8 @@ void bc_finalizer::finalize_cf(cf_node* c) {
 }
 
 sel_chan bc_finalizer::translate_kcache(cf_node* alu, value* v) {
-   unsigned sel = v->select.sel();
-   unsigned bank = sel >> 12;
+   unsigned sel = v->select.kcache_sel();
+   unsigned bank = v->select.kcache_bank();
un

[Mesa-dev] [PATCH 2/2] r600g: Enable GL_ARB_gpu_shader5 extension

2015-10-07 Thread Glenn Kennard
Signed-off-by: Glenn Kennard 
---
Now that SB supports the GS5 features we can safely enable the
extension.

Note that gallium state tracker clamps the GLSL language / GL version
since GL_ARB_tessellation_shader isn't implemented yet.

 docs/GL3.txt | 16 
 docs/relnotes/11.1.0.html|  1 +
 src/gallium/drivers/r600/r600_pipe.c |  2 +-
 3 files changed, 10 insertions(+), 9 deletions(-)

diff --git a/docs/GL3.txt b/docs/GL3.txt
index e17e783..6503e2a 100644
--- a/docs/GL3.txt
+++ b/docs/GL3.txt
@@ -96,18 +96,18 @@ GL 4.0, GLSL 4.00 --- all DONE: nvc0, radeonsi
 
   GL_ARB_draw_buffers_blendDONE (i965, nv50, r600, 
llvmpipe, softpipe)
   GL_ARB_draw_indirect DONE (i965, r600, 
llvmpipe, softpipe)
-  GL_ARB_gpu_shader5   DONE (i965)
+  GL_ARB_gpu_shader5   DONE (i965, r600)
   - 'precise' qualifierDONE
-  - Dynamically uniform sampler array indices  DONE (r600, softpipe)
-  - Dynamically uniform UBO array indices  DONE (r600)
+  - Dynamically uniform sampler array indices  DONE (softpipe)
+  - Dynamically uniform UBO array indices  DONE ()
   - Implicit signed -> unsigned conversionsDONE
   - Fused multiply-add DONE ()
-  - Packing/bitfield/conversion functions  DONE (r600, softpipe)
-  - Enhanced textureGather DONE (r600, softpipe)
-  - Geometry shader instancing DONE (r600, llvmpipe, 
softpipe)
+  - Packing/bitfield/conversion functions  DONE (softpipe)
+  - Enhanced textureGather DONE (softpipe)
+  - Geometry shader instancing DONE (llvmpipe, 
softpipe)
   - Geometry shader multiple streams   DONE ()
-  - Enhanced per-sample shadingDONE (r600)
-  - Interpolation functionsDONE (r600)
+  - Enhanced per-sample shadingDONE ()
+  - Interpolation functionsDONE ()
   - New overload resolution rules  DONE
   GL_ARB_gpu_shader_fp64   DONE (r600, llvmpipe, 
softpipe)
   GL_ARB_sample_shadingDONE (i965, nv50, r600)
diff --git a/docs/relnotes/11.1.0.html b/docs/relnotes/11.1.0.html
index c755c98..e537d98 100644
--- a/docs/relnotes/11.1.0.html
+++ b/docs/relnotes/11.1.0.html
@@ -50,6 +50,7 @@ Note: some of the new features are only available with 
certain drivers.
 GL_ARB_texture_barrier / GL_NV_texture_barrier on i965
 GL_ARB_texture_query_lod on softpipe
 GL_ARB_gpu_shader_fp64 on r600 for Cypress/Cayman/Aruba chips
+GL_ARB_gpu_shader5 on r600 for Evergreen and later chips
 
 
 Bug fixes
diff --git a/src/gallium/drivers/r600/r600_pipe.c 
b/src/gallium/drivers/r600/r600_pipe.c
index efb4889..32ce76a 100644
--- a/src/gallium/drivers/r600/r600_pipe.c
+++ b/src/gallium/drivers/r600/r600_pipe.c
@@ -305,7 +305,7 @@ static int r600_get_param(struct pipe_screen* pscreen, enum 
pipe_cap param)
 
case PIPE_CAP_GLSL_FEATURE_LEVEL:
if (family >= CHIP_CEDAR)
-  return 330;
+  return 410;
/* pre-evergreen geom shaders need newer kernel */
if (rscreen->b.info.drm_minor >= 37)
   return 330;
-- 
1.9.1

___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/mesa-dev


Re: [Mesa-dev] [PATCH 2/2] r600g: Enable GL_ARB_gpu_shader5 extension

2015-10-07 Thread Glenn Kennard
On Wed, 07 Oct 2015 19:04:15 +0200, Benjamin Bellec   
wrote:



Hi Glenn,

The series doesn't apply on current master.

Regard.



It's not meant to apply directly on master. Quoting from the notes in  
patch 1/2:


This patch depends on prior patch:
  r600g/sb: Support gs5 sampler indexing


/Glenn
___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/mesa-dev


Re: [Mesa-dev] [PATCH 2/2] r600g: Enable GL_ARB_gpu_shader5 extension

2015-10-07 Thread Glenn Kennard
On Wed, 07 Oct 2015 19:59:03 +0200, Benjamin Bellec   
wrote:



Le 07/10/2015 19:13, Glenn Kennard a écrit :

On Wed, 07 Oct 2015 19:04:15 +0200, Benjamin Bellec
 wrote:


Hi Glenn,

The series doesn't apply on current master.

Regard.



It's not meant to apply directly on master. Quoting from the notes in
patch 1/2:

This patch depends on prior patch:
  r600g/sb: Support gs5 sampler indexing


/Glenn


OK sorry, I read too quickly.

Is that normal glxinfo still reports GLSL 330 ? With your series applied
I still get :
OpenGL renderer string: Gallium 0.4 on AMD CYPRESS (DRM 2.42.0)
OpenGL core profile version string: 3.3 (Core Profile) Mesa 11.1.0-devel
(git-6ed8fd3)
OpenGL core profile shading language version string: 3.30




Quoting from the notes in patch 2/2:
"Note that gallium state tracker clamps the GLSL language / GL version
since GL_ARB_tessellation_shader isn't implemented yet."


/Glenn
___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/mesa-dev


  1   2   >