Mesa (master): glsl/tests: Update .gitignore for new unit test.

2013-09-16 Thread Kenneth Graunke
Module: Mesa
Branch: master
Commit: 120d100627820364dfec9b6ed1c031bc2580e17f
URL:
http://cgit.freedesktop.org/mesa/mesa/commit/?id=120d100627820364dfec9b6ed1c031bc2580e17f

Author: Kenneth Graunke kenn...@whitecape.org
Date:   Mon Sep 16 08:25:44 2013 -0700

glsl/tests: Update .gitignore for new unit test.

I rarely run 'git status', so I failed to notice this was missing.

Signed-off-by: Kenneth Graunke kenn...@whitecape.org

---

 src/glsl/tests/.gitignore |1 +
 1 files changed, 1 insertions(+), 0 deletions(-)

diff --git a/src/glsl/tests/.gitignore b/src/glsl/tests/.gitignore
index c06ab2b..de81adf 100644
--- a/src/glsl/tests/.gitignore
+++ b/src/glsl/tests/.gitignore
@@ -1,2 +1,3 @@
 ralloc-test
 uniform-initializer-test
+sampler-types-test

___
mesa-commit mailing list
mesa-commit@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/mesa-commit


Mesa (master): ilo: make ilo_bind_sampler_states return void

2013-09-16 Thread Chia-I Wu
Module: Mesa
Branch: master
Commit: 4a6939edae2cc36fc931824ce91484cc0480ec8c
URL:
http://cgit.freedesktop.org/mesa/mesa/commit/?id=4a6939edae2cc36fc931824ce91484cc0480ec8c

Author: Chia-I Wu olva...@gmail.com
Date:   Fri Sep 13 11:34:19 2013 +0800

ilo: make ilo_bind_sampler_states return void

So that it can be hooked up pipe_context::bind_sampler_states that is
currently living on another branch.

---

 src/gallium/drivers/ilo/ilo_state.c |   41 +-
 1 files changed, 25 insertions(+), 16 deletions(-)

diff --git a/src/gallium/drivers/ilo/ilo_state.c 
b/src/gallium/drivers/ilo/ilo_state.c
index a0c80ae..1a0a08d 100644
--- a/src/gallium/drivers/ilo/ilo_state.c
+++ b/src/gallium/drivers/ilo/ilo_state.c
@@ -246,7 +246,7 @@ ilo_create_sampler_state(struct pipe_context *pipe,
return sampler;
 }
 
-static bool
+static void
 ilo_bind_sampler_states(struct pipe_context *pipe, unsigned shader,
 unsigned start, unsigned count, void **samplers)
 {
@@ -289,7 +289,22 @@ ilo_bind_sampler_states(struct pipe_context *pipe, 
unsigned shader,
   dst-count = count;
}
 
-   return changed;
+   if (changed) {
+  switch (shader) {
+  case PIPE_SHADER_VERTEX:
+ ilo-dirty |= ILO_DIRTY_SAMPLER_VS;
+ break;
+  case PIPE_SHADER_GEOMETRY:
+ ilo-dirty |= ILO_DIRTY_SAMPLER_GS;
+ break;
+  case PIPE_SHADER_FRAGMENT:
+ ilo-dirty |= ILO_DIRTY_SAMPLER_FS;
+ break;
+  case PIPE_SHADER_COMPUTE:
+ ilo-dirty |= ILO_DIRTY_SAMPLER_CS;
+ break;
+  }
+   }
 }
 
 static void
@@ -299,9 +314,8 @@ ilo_bind_fragment_sampler_states(struct pipe_context *pipe,
 {
struct ilo_context *ilo = ilo_context(pipe);
 
-   if (ilo_bind_sampler_states(pipe, PIPE_SHADER_FRAGMENT,
- 0, num_samplers, samplers))
-  ilo-dirty |= ILO_DIRTY_SAMPLER_FS;
+   ilo_bind_sampler_states(pipe, PIPE_SHADER_FRAGMENT,
+ 0, num_samplers, samplers);
 
if (ilo-sampler[PIPE_SHADER_FRAGMENT].count  num_samplers) {
   ilo_bind_sampler_states(pipe, PIPE_SHADER_FRAGMENT, num_samplers,
@@ -316,9 +330,8 @@ ilo_bind_vertex_sampler_states(struct pipe_context *pipe,
 {
struct ilo_context *ilo = ilo_context(pipe);
 
-   if (ilo_bind_sampler_states(pipe, PIPE_SHADER_VERTEX,
- 0, num_samplers, samplers))
-  ilo-dirty |= ILO_DIRTY_SAMPLER_VS;
+   ilo_bind_sampler_states(pipe, PIPE_SHADER_VERTEX,
+ 0, num_samplers, samplers);
 
if (ilo-sampler[PIPE_SHADER_VERTEX].count  num_samplers) {
   ilo_bind_sampler_states(pipe, PIPE_SHADER_VERTEX, num_samplers,
@@ -333,9 +346,8 @@ ilo_bind_geometry_sampler_states(struct pipe_context *pipe,
 {
struct ilo_context *ilo = ilo_context(pipe);
 
-   if (ilo_bind_sampler_states(pipe, PIPE_SHADER_GEOMETRY,
-0, num_samplers, samplers))
-  ilo-dirty |= ILO_DIRTY_SAMPLER_GS;
+   ilo_bind_sampler_states(pipe, PIPE_SHADER_GEOMETRY,
+0, num_samplers, samplers);
 
if (ilo-sampler[PIPE_SHADER_GEOMETRY].count  num_samplers) {
   ilo_bind_sampler_states(pipe, PIPE_SHADER_GEOMETRY, num_samplers,
@@ -349,11 +361,8 @@ ilo_bind_compute_sampler_states(struct pipe_context *pipe,
 unsigned num_samplers,
 void **samplers)
 {
-   struct ilo_context *ilo = ilo_context(pipe);
-
-   if (ilo_bind_sampler_states(pipe, PIPE_SHADER_COMPUTE,
-start_slot, num_samplers, samplers))
-  ilo-dirty |= ILO_DIRTY_SAMPLER_CS;
+   ilo_bind_sampler_states(pipe, PIPE_SHADER_COMPUTE,
+start_slot, num_samplers, samplers);
 }
 
 static void

___
mesa-commit mailing list
mesa-commit@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/mesa-commit


Mesa (master): i965/fs: Change brw_wm_prog_data:: urb_read_length to num_varying_inputs.

2013-09-16 Thread Paul Berry
Module: Mesa
Branch: master
Commit: 8c69eaba1a8a5e8a82112eb5c51b2f8978dd2c23
URL:
http://cgit.freedesktop.org/mesa/mesa/commit/?id=8c69eaba1a8a5e8a82112eb5c51b2f8978dd2c23

Author: Paul Berry stereotype...@gmail.com
Date:   Mon Sep  2 17:35:32 2013 -0700

i965/fs: Change brw_wm_prog_data::urb_read_length to num_varying_inputs.

On gen4-5, the FS stage reads varying inputs from URB entries that
were output by the SF thread, where each register stores the
interpolation setup for two components of a vec4, therefore the FS
urb_read_length is twice the number of FS input varyings.  On gen6+,
varying inputs are directly deposited in the FS payload by the SF/SBE
fixed function logic, so urb_read_length is irrelevant.

However, in future patches, it will be nice to be able to consult
brw_wm_prog_data to determine how many varying inputs the FS expects
(rather than inferring it from gl_program::InputsRead).  So instead of
storing urb_read_length, we simply store num_varying_inputs in
brw_wm_prog_data.  On gen4-5, we multiply this by 2 to recover the URB
read length.

Reviewed-by: Kenneth Graunke kenn...@whitecape.org

---

 src/mesa/drivers/dri/i965/brw_context.h  |2 +-
 src/mesa/drivers/dri/i965/brw_fs.cpp |7 ---
 src/mesa/drivers/dri/i965/brw_wm_state.c |3 ++-
 3 files changed, 7 insertions(+), 5 deletions(-)

diff --git a/src/mesa/drivers/dri/i965/brw_context.h 
b/src/mesa/drivers/dri/i965/brw_context.h
index 988e9fb..b9c9f50 100644
--- a/src/mesa/drivers/dri/i965/brw_context.h
+++ b/src/mesa/drivers/dri/i965/brw_context.h
@@ -314,7 +314,7 @@ struct brw_shader {
  */
 struct brw_wm_prog_data {
GLuint curb_read_length;
-   GLuint urb_read_length;
+   GLuint num_varying_inputs;
 
GLuint first_curbe_grf;
GLuint first_curbe_grf_16;
diff --git a/src/mesa/drivers/dri/i965/brw_fs.cpp 
b/src/mesa/drivers/dri/i965/brw_fs.cpp
index e788196..2967665 100644
--- a/src/mesa/drivers/dri/i965/brw_fs.cpp
+++ b/src/mesa/drivers/dri/i965/brw_fs.cpp
@@ -1272,8 +1272,7 @@ fs_visitor::calculate_urb_setup()
  c-prog_data.urb_setup[VARYING_SLOT_PNTC] = urb_next++;
}
 
-   /* Each attribute is 4 setup channels, each of which is half a reg. */
-   c-prog_data.urb_read_length = urb_next * 2;
+   c-prog_data.num_varying_inputs = urb_next;
 }
 
 void
@@ -1298,7 +1297,9 @@ fs_visitor::assign_urb_setup()
   }
}
 
-   this-first_non_payload_grf = urb_start + c-prog_data.urb_read_length;
+   /* Each attribute is 4 setup channels, each of which is half a reg. */
+   this-first_non_payload_grf =
+  urb_start + c-prog_data.num_varying_inputs * 2;
 }
 
 /**
diff --git a/src/mesa/drivers/dri/i965/brw_wm_state.c 
b/src/mesa/drivers/dri/i965/brw_wm_state.c
index c98ebe7..9aa32c0 100644
--- a/src/mesa/drivers/dri/i965/brw_wm_state.c
+++ b/src/mesa/drivers/dri/i965/brw_wm_state.c
@@ -133,7 +133,8 @@ brw_upload_wm_unit(struct brw_context *brw)
}
 
wm-thread3.dispatch_grf_start_reg = brw-wm.prog_data-first_curbe_grf;
-   wm-thread3.urb_entry_read_length = brw-wm.prog_data-urb_read_length;
+   wm-thread3.urb_entry_read_length =
+  brw-wm.prog_data-num_varying_inputs * 2;
wm-thread3.urb_entry_read_offset = 0;
wm-thread3.const_urb_entry_read_length =
   brw-wm.prog_data-curb_read_length;

___
mesa-commit mailing list
mesa-commit@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/mesa-commit


Mesa (master): i965/fs: Expose urb_setup as part of brw_wm_prog_data.

2013-09-16 Thread Paul Berry
Module: Mesa
Branch: master
Commit: 58f01bd17d5587c21d7f543b8f3769f3405dc420
URL:
http://cgit.freedesktop.org/mesa/mesa/commit/?id=58f01bd17d5587c21d7f543b8f3769f3405dc420

Author: Paul Berry stereotype...@gmail.com
Date:   Mon Sep  2 17:24:19 2013 -0700

i965/fs: Expose urb_setup as part of brw_wm_prog_data.

At the moment, for Gen6+, the FS assumes that all varying inputs are
delivered to it in the order in which they appear in the
gl_program::InputsRead bitfield, and the SF/SBE setup code ensures
that they are delivered in this order.

When we add support for more than 64 varying components, this will no
longer always be possible, because the Gen6+ SF/SBE stage is only
capable of performing arbitrary reorderings of 16 varying slots.

To allow extra flexibility in the ordering of FS varyings, this patch
causes the FS to advertise exactly what ordering it expects.

Reviewed-by: Kenneth Graunke kenn...@whitecape.org

---

 src/mesa/drivers/dri/i965/brw_context.h  |7 +++
 src/mesa/drivers/dri/i965/brw_fs.cpp |   10 +-
 src/mesa/drivers/dri/i965/brw_fs.h   |1 -
 src/mesa/drivers/dri/i965/brw_fs_visitor.cpp |4 ++--
 4 files changed, 14 insertions(+), 8 deletions(-)

diff --git a/src/mesa/drivers/dri/i965/brw_context.h 
b/src/mesa/drivers/dri/i965/brw_context.h
index 108e98c..988e9fb 100644
--- a/src/mesa/drivers/dri/i965/brw_context.h
+++ b/src/mesa/drivers/dri/i965/brw_context.h
@@ -336,6 +336,13 @@ struct brw_wm_prog_data {
 */
uint32_t barycentric_interp_modes;
 
+   /**
+* Map from gl_varying_slot to the position within the FS setup data
+* payload where the varying's attribute vertex deltas should be delivered.
+* For varying slots that are not used by the FS, the value is -1.
+*/
+   int urb_setup[VARYING_SLOT_MAX];
+
/* Pointers to tracked values (only valid once
 * _mesa_load_state_parameters has been called at runtime).
 *
diff --git a/src/mesa/drivers/dri/i965/brw_fs.cpp 
b/src/mesa/drivers/dri/i965/brw_fs.cpp
index daa23b4..e788196 100644
--- a/src/mesa/drivers/dri/i965/brw_fs.cpp
+++ b/src/mesa/drivers/dri/i965/brw_fs.cpp
@@ -1004,7 +1004,7 @@ fs_visitor::emit_general_interpolation(ir_variable *ir)
int location = ir-location;
for (unsigned int i = 0; i  array_elements; i++) {
   for (unsigned int j = 0; j  type-matrix_columns; j++) {
-if (urb_setup[location] == -1) {
+if (c-prog_data.urb_setup[location] == -1) {
/* If there's no incoming setup data for this slot, don't
 * emit interpolation for it.
 */
@@ -1231,7 +1231,7 @@ void
 fs_visitor::calculate_urb_setup()
 {
for (unsigned int i = 0; i  VARYING_SLOT_MAX; i++) {
-  urb_setup[i] = -1;
+  c-prog_data.urb_setup[i] = -1;
}
 
int urb_next = 0;
@@ -1239,7 +1239,7 @@ fs_visitor::calculate_urb_setup()
if (brw-gen = 6) {
   for (unsigned int i = 0; i  VARYING_SLOT_MAX; i++) {
 if (fp-Base.InputsRead  BITFIELD64_BIT(i)) {
-   urb_setup[i] = urb_next++;
+   c-prog_data.urb_setup[i] = urb_next++;
 }
   }
} else {
@@ -1257,7 +1257,7 @@ fs_visitor::calculate_urb_setup()
 * incremented, mapped or not.
 */
if (_mesa_varying_slot_in_fs((gl_varying_slot) i))
-  urb_setup[i] = urb_next;
+  c-prog_data.urb_setup[i] = urb_next;
 urb_next++;
 }
   }
@@ -1269,7 +1269,7 @@ fs_visitor::calculate_urb_setup()
* See compile_sf_prog() for more info.
*/
   if (fp-Base.InputsRead  BITFIELD64_BIT(VARYING_SLOT_PNTC))
- urb_setup[VARYING_SLOT_PNTC] = urb_next++;
+ c-prog_data.urb_setup[VARYING_SLOT_PNTC] = urb_next++;
}
 
/* Each attribute is 4 setup channels, each of which is half a reg. */
diff --git a/src/mesa/drivers/dri/i965/brw_fs.h 
b/src/mesa/drivers/dri/i965/brw_fs.h
index cb4ac3b..b77d4de 100644
--- a/src/mesa/drivers/dri/i965/brw_fs.h
+++ b/src/mesa/drivers/dri/i965/brw_fs.h
@@ -454,7 +454,6 @@ public:
int first_non_payload_grf;
/** Either BRW_MAX_GRF or GEN7_MRF_HACK_START */
int max_grf;
-   int urb_setup[VARYING_SLOT_MAX];
 
fs_reg *fp_temp_regs;
fs_reg *fp_input_regs;
diff --git a/src/mesa/drivers/dri/i965/brw_fs_visitor.cpp 
b/src/mesa/drivers/dri/i965/brw_fs_visitor.cpp
index d935c7b..8b505a0 100644
--- a/src/mesa/drivers/dri/i965/brw_fs_visitor.cpp
+++ b/src/mesa/drivers/dri/i965/brw_fs_visitor.cpp
@@ -2171,10 +2171,10 @@ fs_visitor::emit_dummy_fs()
 struct brw_reg
 fs_visitor::interp_reg(int location, int channel)
 {
-   int regnr = urb_setup[location] * 2 + channel / 2;
+   int regnr = c-prog_data.urb_setup[location] * 2 + channel / 2;
int stride = (channel  1) * 4;
 
-   assert(urb_setup[location] != -1);
+   assert(c-prog_data.urb_setup[location] != -1);
 
return brw_vec1_grf(regnr, stride);
 }

___
mesa-commit mailing list

Mesa (master): i965/sf: Use BRW_SF_URB_ENTRY_READ_OFFSET rather than hardcoded values.

2013-09-16 Thread Paul Berry
Module: Mesa
Branch: master
Commit: d5b4095356566b33a1c0a8163099d878fe83822a
URL:
http://cgit.freedesktop.org/mesa/mesa/commit/?id=d5b4095356566b33a1c0a8163099d878fe83822a

Author: Paul Berry stereotype...@gmail.com
Date:   Mon Sep  2 08:43:02 2013 -0700

i965/sf: Use BRW_SF_URB_ENTRY_READ_OFFSET rather than hardcoded values.

We always program the SF unit to start reading the vertex URB entry at
offset 1.  In upcoming patches, we'll be adding FS code that relies on
this.  So consistently use the constant BRW_SF_URB_ENTRY_READ_OFFSET
rather than hardcoding a 1.

Reviewed-by: Kenneth Graunke kenn...@whitecape.org

---

 src/mesa/drivers/dri/i965/brw_context.h   |   10 ++
 src/mesa/drivers/dri/i965/brw_sf.h|2 --
 src/mesa/drivers/dri/i965/gen6_sf_state.c |2 +-
 src/mesa/drivers/dri/i965/gen7_sf_state.c |2 +-
 4 files changed, 12 insertions(+), 4 deletions(-)

diff --git a/src/mesa/drivers/dri/i965/brw_context.h 
b/src/mesa/drivers/dri/i965/brw_context.h
index b9c9f50..4c6bedc 100644
--- a/src/mesa/drivers/dri/i965/brw_context.h
+++ b/src/mesa/drivers/dri/i965/brw_context.h
@@ -478,6 +478,16 @@ struct brw_sf_prog_data {
GLuint urb_entry_size;
 };
 
+
+/**
+ * We always program SF to start reading at an offset of 1 (2 varying slots)
+ * from the start of the vertex URB entry.  This causes it to skip:
+ * - VARYING_SLOT_PSIZ and BRW_VARYING_SLOT_NDC on gen4-5
+ * - VARYING_SLOT_PSIZ and VARYING_SLOT_POS on gen6+
+ */
+#define BRW_SF_URB_ENTRY_READ_OFFSET 1
+
+
 struct brw_clip_prog_data {
GLuint curb_read_length;/* user planes? */
GLuint clip_mode;
diff --git a/src/mesa/drivers/dri/i965/brw_sf.h 
b/src/mesa/drivers/dri/i965/brw_sf.h
index 09880fe..0006239 100644
--- a/src/mesa/drivers/dri/i965/brw_sf.h
+++ b/src/mesa/drivers/dri/i965/brw_sf.h
@@ -105,6 +105,4 @@ void brw_emit_point_setup( struct brw_sf_compile *c, bool 
allocate );
 void brw_emit_point_sprite_setup( struct brw_sf_compile *c, bool allocate );
 void brw_emit_anyprim_setup( struct brw_sf_compile *c );
 
-#define BRW_SF_URB_ENTRY_READ_OFFSET 1
-
 #endif
diff --git a/src/mesa/drivers/dri/i965/gen6_sf_state.c 
b/src/mesa/drivers/dri/i965/gen6_sf_state.c
index c76debe..dfe9a31 100644
--- a/src/mesa/drivers/dri/i965/gen6_sf_state.c
+++ b/src/mesa/drivers/dri/i965/gen6_sf_state.c
@@ -138,7 +138,7 @@ upload_sf_state(struct brw_context *brw)
bool multisampled_fbo = ctx-DrawBuffer-Visual.samples  1;
 
int attr = 0, input_index = 0;
-   int urb_entry_read_offset = 1;
+   const int urb_entry_read_offset = BRW_SF_URB_ENTRY_READ_OFFSET;
float point_size;
uint16_t attr_overrides[VARYING_SLOT_MAX];
uint32_t point_sprite_origin;
diff --git a/src/mesa/drivers/dri/i965/gen7_sf_state.c 
b/src/mesa/drivers/dri/i965/gen7_sf_state.c
index 0ff3388..715eb6c 100644
--- a/src/mesa/drivers/dri/i965/gen7_sf_state.c
+++ b/src/mesa/drivers/dri/i965/gen7_sf_state.c
@@ -40,7 +40,7 @@ upload_sbe_state(struct brw_context *brw)
uint32_t dw1, dw10, dw11;
int i;
int attr = 0, input_index = 0;
-   int urb_entry_read_offset = 1;
+   const int urb_entry_read_offset = BRW_SF_URB_ENTRY_READ_OFFSET;
uint16_t attr_overrides[VARYING_SLOT_MAX];
/* _NEW_BUFFERS */
bool render_to_fbo = _mesa_is_user_fbo(ctx-DrawBuffer);

___
mesa-commit mailing list
mesa-commit@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/mesa-commit


Mesa (master): i965/fs: Stop wasting input attribute space on gl_FragCoord and gl_FrontFacing.

2013-09-16 Thread Paul Berry
Module: Mesa
Branch: master
Commit: 3a83b20dcccf21ec184e35bcfa9bc577379dfd51
URL:
http://cgit.freedesktop.org/mesa/mesa/commit/?id=3a83b20dcccf21ec184e35bcfa9bc577379dfd51

Author: Paul Berry stereotype...@gmail.com
Date:   Mon Sep  2 22:18:27 2013 -0700

i965/fs: Stop wasting input attribute space on gl_FragCoord and gl_FrontFacing.

Previously, if a fragment shader accessed gl_FragCoord or
gl_FrontFacing, we would assign them their own slots in the fragment
shader input attribute array, using up space that could be made
available to real varyings.  This was not strictly necessary (since
these values are not true varyings, and are instead computed from
other data available in the FS payload).  But we had to do it anyway
because the SF/SBE setup code assumed that every 1 bit in the
gl_program::InputsRead bitfield corresponded to a genuine varying
variable.

Now that the SF/SBE code consults brw_wm_prog_data and only sets up
the attributes that the fragment shader actually needs, we don't have
to do this anymore.

Reviewed-by: Kenneth Graunke kenn...@whitecape.org

---

 src/mesa/drivers/dri/i965/brw_context.h   |9 +
 src/mesa/drivers/dri/i965/brw_fs.cpp  |3 ++-
 src/mesa/drivers/dri/i965/gen6_sf_state.c |8 
 3 files changed, 11 insertions(+), 9 deletions(-)

diff --git a/src/mesa/drivers/dri/i965/brw_context.h 
b/src/mesa/drivers/dri/i965/brw_context.h
index 4c6bedc..040a8d3 100644
--- a/src/mesa/drivers/dri/i965/brw_context.h
+++ b/src/mesa/drivers/dri/i965/brw_context.h
@@ -439,6 +439,15 @@ void brw_compute_vue_map(struct brw_context *brw, struct 
brw_vue_map *vue_map,
  GLbitfield64 slots_valid, bool userclip_active);
 
 
+/**
+ * Bitmask indicating which fragment shader inputs represent varyings (and
+ * hence have to be delivered to the fragment shader by the SF/SBE stage).
+ */
+#define BRW_FS_VARYING_INPUT_MASK \
+   (BITFIELD64_RANGE(0, VARYING_SLOT_MAX)  \
+~VARYING_BIT_POS  ~VARYING_BIT_FACE)
+
+
 /*
  * Mapping of VUE map slots to interpolation modes.
  */
diff --git a/src/mesa/drivers/dri/i965/brw_fs.cpp 
b/src/mesa/drivers/dri/i965/brw_fs.cpp
index 2967665..9cb7950 100644
--- a/src/mesa/drivers/dri/i965/brw_fs.cpp
+++ b/src/mesa/drivers/dri/i965/brw_fs.cpp
@@ -1238,7 +1238,8 @@ fs_visitor::calculate_urb_setup()
/* Figure out where each of the incoming setup attributes lands. */
if (brw-gen = 6) {
   for (unsigned int i = 0; i  VARYING_SLOT_MAX; i++) {
-if (fp-Base.InputsRead  BITFIELD64_BIT(i)) {
+if (fp-Base.InputsRead  BRW_FS_VARYING_INPUT_MASK 
+ BITFIELD64_BIT(i)) {
c-prog_data.urb_setup[i] = urb_next++;
 }
   }
diff --git a/src/mesa/drivers/dri/i965/gen6_sf_state.c 
b/src/mesa/drivers/dri/i965/gen6_sf_state.c
index 8bac559..6a9fa60 100644
--- a/src/mesa/drivers/dri/i965/gen6_sf_state.c
+++ b/src/mesa/drivers/dri/i965/gen6_sf_state.c
@@ -56,14 +56,6 @@ static uint32_t
 get_attr_override(const struct brw_vue_map *vue_map, int urb_entry_read_offset,
   int fs_attr, bool two_side_color, uint32_t *max_source_attr)
 {
-   if (fs_attr == VARYING_SLOT_POS) {
-  /* This attribute will be overwritten by the fragment shader's
-   * interpolation code (see emit_interp() in brw_wm_fp.c), so just let it
-   * reference the first available attribute.
-   */
-  return 0;
-   }
-
/* Find the VUE slot for this attribute. */
int slot = vue_map-varying_to_slot[fs_attr];
 

___
mesa-commit mailing list
mesa-commit@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/mesa-commit


Mesa (master): i965/sf: Consult brw_wm_prog_data when setting up SF/ SBE state.

2013-09-16 Thread Paul Berry
Module: Mesa
Branch: master
Commit: 0af1252ae478a28a76326a5c5d784864327793e2
URL:
http://cgit.freedesktop.org/mesa/mesa/commit/?id=0af1252ae478a28a76326a5c5d784864327793e2

Author: Paul Berry stereotype...@gmail.com
Date:   Mon Sep  2 21:59:04 2013 -0700

i965/sf: Consult brw_wm_prog_data when setting up SF/SBE state.

Previously, the SF/SBE setup code delivered varying inputs to the FS
in the order in which they appear in the gl_program::InputsRead
bitfield, since that's what the FS expects.

When we add support for more than 64 varying components, this will no
longer always be the case, because the Gen6+ SF/SBE stage is only
capable of performing arbitrary reorderings of 16 varying slots.  So,
when there are more than 16 vec4's worth of varying inputs, the FS
will have to adjust the order its input varyings in order to partially
match the order of outputs from the geometry or vertex shader.

To allow extra flexibility in the ordering of FS varyings, this patch
causes the SF/SBE to deliver varying inputs to the FS in exactly the
order that the FS requests, by consulting brw_wm_prog_data::urb_setup
and brw_wm_prog_data::num_varying_inputs.

Reviewed-by: Kenneth Graunke kenn...@whitecape.org

---

 src/mesa/drivers/dri/i965/gen6_sf_state.c |   45 +---
 src/mesa/drivers/dri/i965/gen7_sf_state.c |   13 +---
 2 files changed, 35 insertions(+), 23 deletions(-)

diff --git a/src/mesa/drivers/dri/i965/gen6_sf_state.c 
b/src/mesa/drivers/dri/i965/gen6_sf_state.c
index 91b640d..8bac559 100644
--- a/src/mesa/drivers/dri/i965/gen6_sf_state.c
+++ b/src/mesa/drivers/dri/i965/gen6_sf_state.c
@@ -137,17 +137,23 @@ calculate_attr_overrides(const struct brw_context *brw,
 {
const int urb_entry_read_offset = BRW_SF_URB_ENTRY_READ_OFFSET;
uint32_t max_source_attr = 0;
-   int input_index = 0;
 
/* _NEW_LIGHT */
bool shade_model_flat = brw-ctx.Light.ShadeModel == GL_FLAT;
 
+   /* Initialize all the attr_overrides to 0.  In the loop below we'll modify
+* just the ones that correspond to inputs used by the fs.
+*/
+   memset(attr_overrides, 0, 16*sizeof(*attr_overrides));
+
for (int attr = 0; attr  VARYING_SLOT_MAX; attr++) {
   enum glsl_interp_qualifier interp_qualifier =
  brw-fragment_program-InterpQualifier[attr];
   bool is_gl_Color = attr == VARYING_SLOT_COL0 || attr == 
VARYING_SLOT_COL1;
+  /* CACHE_NEW_WM_PROG */
+  int input_index = brw-wm.prog_data-urb_setup[attr];
 
-  if (!(brw-fragment_program-Base.InputsRead  BITFIELD64_BIT(attr)))
+  if (input_index  0)
 continue;
 
   /* _NEW_POINT */
@@ -166,23 +172,23 @@ calculate_attr_overrides(const struct brw_context *brw,
interp_qualifier == INTERP_QUALIFIER_NONE))
  *flat_enables |= (1  input_index);
 
-  /* The hardware can only do the overrides on 16 overrides at a
-   * time, and the other up to 16 have to be lined up so that the
-   * input index = the output index.  We'll need to do some
-   * tweaking to make sure that's the case.
-   */
-  assert(input_index  16 || attr == input_index);
-
   /* BRW_NEW_VUE_MAP_GEOM_OUT | _NEW_LIGHT | _NEW_PROGRAM */
-  attr_overrides[input_index++] =
+  uint16_t attr_override =
  get_attr_override(brw-vue_map_geom_out,
   urb_entry_read_offset, attr,
brw-ctx.VertexProgram._TwoSideEnabled,
max_source_attr);
-   }
 
-   for (; input_index  VARYING_SLOT_MAX; input_index++)
-  attr_overrides[input_index] = 0;
+  /* The hardware can only do the overrides on 16 overrides at a
+   * time, and the other up to 16 have to be lined up so that the
+   * input index = the output index.  We'll need to do some
+   * tweaking to make sure that's the case.
+   */
+  if (input_index  16)
+ attr_overrides[input_index] = attr_override;
+  else
+ assert(attr_override == input_index);
+   }
 
/* From the Sandy Bridge PRM, Volume 2, Part 1, documentation for
 * 3DSTATE_SF DWord 1 bits 15:11, Vertex URB Entry Read Length:
@@ -207,8 +213,8 @@ static void
 upload_sf_state(struct brw_context *brw)
 {
struct gl_context *ctx = brw-ctx;
-   /* BRW_NEW_FRAGMENT_PROGRAM */
-   uint32_t num_outputs = 
_mesa_bitcount_64(brw-fragment_program-Base.InputsRead);
+   /* CACHE_NEW_WM_PROG */
+   uint32_t num_outputs = brw-wm.prog_data-num_varying_inputs;
uint32_t dw1, dw2, dw3, dw4, dw16, dw17;
int i;
/* _NEW_BUFFER */
@@ -217,7 +223,7 @@ upload_sf_state(struct brw_context *brw)
 
const int urb_entry_read_offset = BRW_SF_URB_ENTRY_READ_OFFSET;
float point_size;
-   uint16_t attr_overrides[VARYING_SLOT_MAX];
+   uint16_t attr_overrides[16];
uint32_t point_sprite_origin;
 
dw1 = GEN6_SF_SWIZZLE_ENABLE | num_outputs  GEN6_SF_NUM_OUTPUTS_SHIFT;
@@ -353,7 +359,9 @@ upload_sf_state(struct brw_context *brw)
 (1  

Mesa (master): i965/fs: Consult brw_wm_prog_data:: num_varying_inputs when setting up WM state.

2013-09-16 Thread Paul Berry
Module: Mesa
Branch: master
Commit: 8c2b9bd1dfd482622260acbbc122bbfc8bd3bbf9
URL:
http://cgit.freedesktop.org/mesa/mesa/commit/?id=8c2b9bd1dfd482622260acbbc122bbfc8bd3bbf9

Author: Paul Berry stereotype...@gmail.com
Date:   Tue Sep  3 11:30:19 2013 -0700

i965/fs: Consult brw_wm_prog_data::num_varying_inputs when setting up WM state.

Previously, we assumed that the number of varying inputs consumed by
the fragment shader was equal to the number of bits set in
gl_program::InputsRead.  However, we'll soon be making two changes
that will cause that not to be true:

- We'll stop wasting varying input space for gl_FragCoord and
  gl_FrontFacing, which aren't varyings.

- For fragment shaders that have more than 16 varying inputs, we'll
  adjust the layout of the inputs to account for the fact that the
  SF/SBE pipeline stage can't reorder inputs beyond the first 16; if
  there are GS outputs that the FS doens't use (or vice versa) this
  may cause the number of FS varying inputs to change.

So, instead of trying to guess the number of FS inputs from
gl_program::InputsRead, simply read it from
brw_wm_prog_data:num_varying_inputs, which is guaranteed to be correct
since it's populated by fs_visitor::calculate_urb_setup().

Reviewed-by: Kenneth Graunke kenn...@whitecape.org

---

 src/mesa/drivers/dri/i965/gen6_wm_state.c |3 ++-
 src/mesa/drivers/dri/i965/gen7_wm_state.c |5 +++--
 2 files changed, 5 insertions(+), 3 deletions(-)

diff --git a/src/mesa/drivers/dri/i965/gen6_wm_state.c 
b/src/mesa/drivers/dri/i965/gen6_wm_state.c
index 2b9f19c..af1c044 100644
--- a/src/mesa/drivers/dri/i965/gen6_wm_state.c
+++ b/src/mesa/drivers/dri/i965/gen6_wm_state.c
@@ -188,7 +188,8 @@ upload_wm_state(struct brw_context *brw)
   dw5 |= GEN6_WM_DISPATCH_ENABLE;
}
 
-   dw6 |= _mesa_bitcount_64(brw-fragment_program-Base.InputsRead) 
+   /* CACHE_NEW_WM_PROG */
+   dw6 |= brw-wm.prog_data-num_varying_inputs 
   GEN6_WM_NUM_SF_OUTPUTS_SHIFT;
if (multisampled_fbo) {
   /* _NEW_MULTISAMPLE */
diff --git a/src/mesa/drivers/dri/i965/gen7_wm_state.c 
b/src/mesa/drivers/dri/i965/gen7_wm_state.c
index e7bb861..e9fe95f 100644
--- a/src/mesa/drivers/dri/i965/gen7_wm_state.c
+++ b/src/mesa/drivers/dri/i965/gen7_wm_state.c
@@ -168,6 +168,7 @@ upload_ps_state(struct brw_context *brw)
 * rendering, CurrentFragmentProgram is used for this check to
 * differentiate between the GLSL and non-GLSL cases.
 */
+   /* BRW_NEW_FRAGMENT_PROGRAM */
if (ctx-Shader.CurrentFragmentProgram == NULL)
   dw2 |= GEN7_PS_FLOATING_POINT_MODE_ALT;
 
@@ -191,8 +192,8 @@ upload_ps_state(struct brw_context *brw)
   dw4 |= GEN7_PS_DUAL_SOURCE_BLEND_ENABLE;
}
 
-   /* BRW_NEW_FRAGMENT_PROGRAM */
-   if (brw-fragment_program-Base.InputsRead != 0)
+   /* CACHE_NEW_WM_PROG */
+   if (brw-wm.prog_data-num_varying_inputs != 0)
   dw4 |= GEN7_PS_ATTRIBUTE_ENABLE;
 
dw4 |= GEN7_PS_8_DISPATCH_ENABLE;

___
mesa-commit mailing list
mesa-commit@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/mesa-commit


Mesa (master): i965/sf: Consolidate common code for setting up gen6-7 attribute overrides.

2013-09-16 Thread Paul Berry
Module: Mesa
Branch: master
Commit: af84bbd2caec19c556d279c5a843566cbd99ecb3
URL:
http://cgit.freedesktop.org/mesa/mesa/commit/?id=af84bbd2caec19c556d279c5a843566cbd99ecb3

Author: Paul Berry stereotype...@gmail.com
Date:   Mon Sep  2 18:09:08 2013 -0700

i965/sf: Consolidate common code for setting up gen6-7 attribute overrides.

Reviewed-by: Kenneth Graunke kenn...@whitecape.org

---

 src/mesa/drivers/dri/i965/brw_state.h |9 +-
 src/mesa/drivers/dri/i965/gen6_sf_state.c |  153 
 src/mesa/drivers/dri/i965/gen7_sf_state.c |   64 +---
 3 files changed, 97 insertions(+), 129 deletions(-)

diff --git a/src/mesa/drivers/dri/i965/brw_state.h 
b/src/mesa/drivers/dri/i965/brw_state.h
index 04c1a97..14f5feb 100644
--- a/src/mesa/drivers/dri/i965/brw_state.h
+++ b/src/mesa/drivers/dri/i965/brw_state.h
@@ -225,9 +225,12 @@ void gen4_init_vtable_sampler_functions(struct brw_context 
*brw);
 void gen7_init_vtable_sampler_functions(struct brw_context *brw);
 
 /* gen6_sf_state.c */
-uint32_t
-get_attr_override(const struct brw_vue_map *vue_map, int urb_entry_read_offset,
-  int fs_attr, bool two_side_color, uint32_t *max_source_attr);
+void
+calculate_attr_overrides(const struct brw_context *brw,
+ uint16_t *attr_overrides,
+ uint32_t *point_sprite_enables,
+ uint32_t *flat_enables,
+ uint32_t *urb_entry_read_length);
 
 /* brw_vs_surface_state.c */
 void
diff --git a/src/mesa/drivers/dri/i965/gen6_sf_state.c 
b/src/mesa/drivers/dri/i965/gen6_sf_state.c
index dfe9a31..91b640d 100644
--- a/src/mesa/drivers/dri/i965/gen6_sf_state.c
+++ b/src/mesa/drivers/dri/i965/gen6_sf_state.c
@@ -52,7 +52,7 @@
  * the VUE that are not needed by the fragment shader.  It is measured in
  * 256-bit increments.
  */
-uint32_t
+static uint32_t
 get_attr_override(const struct brw_vue_map *vue_map, int urb_entry_read_offset,
   int fs_attr, bool two_side_color, uint32_t *max_source_attr)
 {
@@ -123,21 +123,98 @@ get_attr_override(const struct brw_vue_map *vue_map, int 
urb_entry_read_offset,
return source_attr;
 }
 
+
+/**
+ * Create the mapping from the FS inputs we produce to the previous pipeline
+ * stage (GS or VS) outputs they source from.
+ */
+void
+calculate_attr_overrides(const struct brw_context *brw,
+ uint16_t *attr_overrides,
+ uint32_t *point_sprite_enables,
+ uint32_t *flat_enables,
+ uint32_t *urb_entry_read_length)
+{
+   const int urb_entry_read_offset = BRW_SF_URB_ENTRY_READ_OFFSET;
+   uint32_t max_source_attr = 0;
+   int input_index = 0;
+
+   /* _NEW_LIGHT */
+   bool shade_model_flat = brw-ctx.Light.ShadeModel == GL_FLAT;
+
+   for (int attr = 0; attr  VARYING_SLOT_MAX; attr++) {
+  enum glsl_interp_qualifier interp_qualifier =
+ brw-fragment_program-InterpQualifier[attr];
+  bool is_gl_Color = attr == VARYING_SLOT_COL0 || attr == 
VARYING_SLOT_COL1;
+
+  if (!(brw-fragment_program-Base.InputsRead  BITFIELD64_BIT(attr)))
+continue;
+
+  /* _NEW_POINT */
+  if (brw-ctx.Point.PointSprite 
+ (attr = VARYING_SLOT_TEX0  attr = VARYING_SLOT_TEX7) 
+ brw-ctx.Point.CoordReplace[attr - VARYING_SLOT_TEX0]) {
+*point_sprite_enables |= (1  input_index);
+  }
+
+  if (attr == VARYING_SLOT_PNTC)
+*point_sprite_enables |= (1  input_index);
+
+  /* flat shading */
+  if (interp_qualifier == INTERP_QUALIFIER_FLAT ||
+  (shade_model_flat  is_gl_Color 
+   interp_qualifier == INTERP_QUALIFIER_NONE))
+ *flat_enables |= (1  input_index);
+
+  /* The hardware can only do the overrides on 16 overrides at a
+   * time, and the other up to 16 have to be lined up so that the
+   * input index = the output index.  We'll need to do some
+   * tweaking to make sure that's the case.
+   */
+  assert(input_index  16 || attr == input_index);
+
+  /* BRW_NEW_VUE_MAP_GEOM_OUT | _NEW_LIGHT | _NEW_PROGRAM */
+  attr_overrides[input_index++] =
+ get_attr_override(brw-vue_map_geom_out,
+  urb_entry_read_offset, attr,
+   brw-ctx.VertexProgram._TwoSideEnabled,
+   max_source_attr);
+   }
+
+   for (; input_index  VARYING_SLOT_MAX; input_index++)
+  attr_overrides[input_index] = 0;
+
+   /* From the Sandy Bridge PRM, Volume 2, Part 1, documentation for
+* 3DSTATE_SF DWord 1 bits 15:11, Vertex URB Entry Read Length:
+*
+* This field should be set to the minimum length required to read the
+*  maximum source attribute.  The maximum source attribute is indicated
+*  by the maximum value of the enabled Attribute # Source Attribute if
+*  Attribute Swizzle Enable is set, Number of Output Attributes-1 if
+*  enable is not set.
+*  

Mesa (master): i965/fs: Simplify computation of key. input_slots_valid during precompile.

2013-09-16 Thread Paul Berry
Module: Mesa
Branch: master
Commit: a4546ec114853235db375b20fb47ddcd6a7f21e7
URL:
http://cgit.freedesktop.org/mesa/mesa/commit/?id=a4546ec114853235db375b20fb47ddcd6a7f21e7

Author: Paul Berry stereotype...@gmail.com
Date:   Tue Sep  3 11:55:17 2013 -0700

i965/fs: Simplify computation of key.input_slots_valid during precompile.

The for loop was rather silly.  In addition to checking brw-gen  6
on each loop iteration, it took pains to exclude bits from
fp-Base.InputsRead that don't correspond to fragment shader inputs.
But those bits would never have been set in the first place, since the
only bits that are ever set in fp-Base.InputsRead are fragment shader
inputs.

Reviewed-by: Kenneth Graunke kenn...@whitecape.org

---

 src/mesa/drivers/dri/i965/brw_fs.cpp |   12 +---
 1 files changed, 1 insertions(+), 11 deletions(-)

diff --git a/src/mesa/drivers/dri/i965/brw_fs.cpp 
b/src/mesa/drivers/dri/i965/brw_fs.cpp
index 9cb7950..004b3b5 100644
--- a/src/mesa/drivers/dri/i965/brw_fs.cpp
+++ b/src/mesa/drivers/dri/i965/brw_fs.cpp
@@ -3150,17 +3150,7 @@ brw_fs_precompile(struct gl_context *ctx, struct 
gl_shader_program *prog)
}
 
if (brw-gen  6)
-  key.input_slots_valid |= BITFIELD64_BIT(VARYING_SLOT_POS);
-
-   for (int i = 0; i  VARYING_SLOT_MAX; i++) {
-  if (!(fp-Base.InputsRead  BITFIELD64_BIT(i)))
-continue;
-
-  if (brw-gen  6) {
- if (_mesa_varying_slot_in_fs((gl_varying_slot) i))
-key.input_slots_valid |= BITFIELD64_BIT(i);
-  }
-   }
+  key.input_slots_valid = fp-Base.InputsRead | VARYING_BIT_POS;
 
key.clamp_fragment_color = ctx-API == API_OPENGL_COMPAT;
 

___
mesa-commit mailing list
mesa-commit@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/mesa-commit


Mesa (master): i965/gs: Stop storing an input VUE map in the GS program key .

2013-09-16 Thread Paul Berry
Module: Mesa
Branch: master
Commit: 8a36f4382be0b9ad357f589dc326e14593bdbe8e
URL:
http://cgit.freedesktop.org/mesa/mesa/commit/?id=8a36f4382be0b9ad357f589dc326e14593bdbe8e

Author: Paul Berry stereotype...@gmail.com
Date:   Mon Sep  2 14:02:22 2013 -0700

i965/gs: Stop storing an input VUE map in the GS program key.

Now that the vertex shader output VUE map is determined solely by a
64-bit bitfield, we don't have to store it in its entirety in the
geometry shader program key; instead, we can just store the bitfield,
and let the geometry shader infer the VUE map at compile time.

This dramatically reduces the size of the geometry shader program key,
which we want to keep small since it gets recomputed whenever the
active program changes.

Reviewed-by: Kenneth Graunke kenn...@whitecape.org

---

 src/mesa/drivers/dri/i965/brw_vec4_gs.c   |6 --
 src/mesa/drivers/dri/i965/brw_vec4_gs_visitor.cpp |4 ++--
 src/mesa/drivers/dri/i965/brw_vec4_gs_visitor.h   |3 ++-
 3 files changed, 8 insertions(+), 5 deletions(-)

diff --git a/src/mesa/drivers/dri/i965/brw_vec4_gs.c 
b/src/mesa/drivers/dri/i965/brw_vec4_gs.c
index 2d1d163..c526107 100644
--- a/src/mesa/drivers/dri/i965/brw_vec4_gs.c
+++ b/src/mesa/drivers/dri/i965/brw_vec4_gs.c
@@ -200,10 +200,12 @@ do_gs_prog(struct brw_context *brw,
 
c.prog_data.output_topology = prim_to_hw_prim[gp-program.OutputType];
 
+   brw_compute_vue_map(brw, c.input_vue_map, c.key.input_varyings);
+
/* GS inputs are read from the VUE 256 bits (2 vec4's) at a time, so we
 * need to program a URB read length of ceiling(num_slots / 2).
 */
-   c.prog_data.base.urb_read_length = (c.key.input_vue_map.num_slots + 1) / 2;
+   c.prog_data.base.urb_read_length = (c.input_vue_map.num_slots + 1) / 2;
 
void *mem_ctx = ralloc_context(NULL);
unsigned program_size;
@@ -272,7 +274,7 @@ brw_upload_gs_prog(struct brw_context *brw)
   key.base.tex);
 
/* BRW_NEW_VUE_MAP_VS */
-   key.input_vue_map = brw-vue_map_vs;
+   key.input_varyings = brw-vue_map_vs.slots_valid;
 
if (!brw_search_cache(brw-cache, BRW_GS_PROG,
  key, sizeof(key),
diff --git a/src/mesa/drivers/dri/i965/brw_vec4_gs_visitor.cpp 
b/src/mesa/drivers/dri/i965/brw_vec4_gs_visitor.cpp
index 960f970..cb7de8d 100644
--- a/src/mesa/drivers/dri/i965/brw_vec4_gs_visitor.cpp
+++ b/src/mesa/drivers/dri/i965/brw_vec4_gs_visitor.cpp
@@ -70,8 +70,8 @@ vec4_gs_visitor::setup_varying_inputs(int payload_reg, int 
*attribute_map)
assert(num_input_vertices = MAX_GS_INPUT_VERTICES);
unsigned input_array_stride = c-prog_data.base.urb_read_length * 2;
 
-   for (int slot = 0; slot  c-key.input_vue_map.num_slots; slot++) {
-  int varying = c-key.input_vue_map.slot_to_varying[slot];
+   for (int slot = 0; slot  c-input_vue_map.num_slots; slot++) {
+  int varying = c-input_vue_map.slot_to_varying[slot];
   for (unsigned vertex = 0; vertex  num_input_vertices; vertex++) {
  attribute_map[BRW_VARYING_SLOT_COUNT * vertex + varying] =
 payload_reg + input_array_stride * vertex + slot;
diff --git a/src/mesa/drivers/dri/i965/brw_vec4_gs_visitor.h 
b/src/mesa/drivers/dri/i965/brw_vec4_gs_visitor.h
index 90dd1de..e8da2e3 100644
--- a/src/mesa/drivers/dri/i965/brw_vec4_gs_visitor.h
+++ b/src/mesa/drivers/dri/i965/brw_vec4_gs_visitor.h
@@ -37,7 +37,7 @@ struct brw_gs_prog_key
 {
struct brw_vec4_prog_key base;
 
-   struct brw_vue_map input_vue_map;
+   GLbitfield64 input_varyings;
 };
 
 
@@ -49,6 +49,7 @@ struct brw_gs_compile
struct brw_vec4_compile base;
struct brw_gs_prog_key key;
struct brw_gs_prog_data prog_data;
+   struct brw_vue_map input_vue_map;
 
struct brw_geometry_program *gp;
 

___
mesa-commit mailing list
mesa-commit@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/mesa-commit


Mesa (master): i965/vec4: Generate URB writes using a loop.

2013-09-16 Thread Paul Berry
Module: Mesa
Branch: master
Commit: 784044c206efd774ce1f7a481311480f85446887
URL:
http://cgit.freedesktop.org/mesa/mesa/commit/?id=784044c206efd774ce1f7a481311480f85446887

Author: Paul Berry stereotype...@gmail.com
Date:   Tue Sep  3 12:30:06 2013 -0700

i965/vec4: Generate URB writes using a loop.

Previously we only ever did 1 or 2 URB writes, since the maximum
number of varyings we support is small enough to fit in 2 URB writes.
But GL 3.2 requires the geometry shader to support 128 output varying
components, and this could require up to 3 URB writes.

Reviewed-by: Kenneth Graunke kenn...@whitecape.org

---

 src/mesa/drivers/dri/i965/brw_vec4_visitor.cpp |   52 ++--
 1 files changed, 21 insertions(+), 31 deletions(-)

diff --git a/src/mesa/drivers/dri/i965/brw_vec4_visitor.cpp 
b/src/mesa/drivers/dri/i965/brw_vec4_visitor.cpp
index 304636a..874e6e3 100644
--- a/src/mesa/drivers/dri/i965/brw_vec4_visitor.cpp
+++ b/src/mesa/drivers/dri/i965/brw_vec4_visitor.cpp
@@ -2694,47 +2694,37 @@ vec4_visitor::emit_vertex()
   emit_clip_distances(output_reg[VARYING_SLOT_CLIP_DIST1], 4);
}
 
-   /* Set up the VUE data for the first URB write */
-   int slot;
-   for (slot = 0; slot  prog_data-vue_map.num_slots; ++slot) {
-  emit_urb_slot(mrf++, prog_data-vue_map.slot_to_varying[slot]);
-
-  /* If this was max_usable_mrf, we can't fit anything more into this URB
-   * WRITE.
+   /* We may need to split this up into several URB writes, so do them in a
+* loop.
+*/
+   int slot = 0;
+   bool complete = false;
+   do {
+  /* URB offset is in URB row increments, and each of our MRFs is half of
+   * one of those, since we're doing interleaved writes.
*/
-  if (mrf  max_usable_mrf) {
-slot++;
-break;
-  }
-   }
-
-   bool complete = slot = prog_data-vue_map.num_slots;
-   current_annotation = URB write;
-   vec4_instruction *inst = emit_urb_write_opcode(complete);
-   inst-base_mrf = base_mrf;
-   inst-mlen = align_interleaved_urb_mlen(brw, mrf - base_mrf);
+  int offset = slot / 2;
 
-   /* Optional second URB write */
-   if (!complete) {
   mrf = base_mrf + 1;
-
   for (; slot  prog_data-vue_map.num_slots; ++slot) {
-assert(mrf  max_usable_mrf);
-
  emit_urb_slot(mrf++, prog_data-vue_map.slot_to_varying[slot]);
+
+ /* If this was max_usable_mrf, we can't fit anything more into this
+  * URB WRITE.
+  */
+ if (mrf  max_usable_mrf) {
+slot++;
+break;
+ }
   }
 
+  complete = slot = prog_data-vue_map.num_slots;
   current_annotation = URB write;
-  inst = emit_urb_write_opcode(true /* complete */);
+  vec4_instruction *inst = emit_urb_write_opcode(complete);
   inst-base_mrf = base_mrf;
   inst-mlen = align_interleaved_urb_mlen(brw, mrf - base_mrf);
-  /* URB destination offset.  In the previous write, we got MRFs
-   * 2-13 minus the one header MRF, so 12 regs.  URB offset is in
-   * URB row increments, and each of our MRFs is half of one of
-   * those, since we're doing interleaved writes.
-   */
-  inst-offset += (max_usable_mrf - base_mrf) / 2;
-   }
+  inst-offset += offset;
+   } while(!complete);
 }
 
 

___
mesa-commit mailing list
mesa-commit@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/mesa-commit


Mesa (master): i965/gen6+: Remove VUE map dependency on userclip_active.

2013-09-16 Thread Paul Berry
Module: Mesa
Branch: master
Commit: d1ad447f0187f5e6044fec65ace6ce1e10b156c2
URL:
http://cgit.freedesktop.org/mesa/mesa/commit/?id=d1ad447f0187f5e6044fec65ace6ce1e10b156c2

Author: Paul Berry stereotype...@gmail.com
Date:   Mon Sep  2 13:46:25 2013 -0700

i965/gen6+: Remove VUE map dependency on userclip_active.

Previously, on Gen6+, we laid out the vertex (or geometry) shader VUE
map differently depending whether user clipping was active.  If it was
active, we put the clip distances in slots 2 and 3 (where the clipper
expects them); if it was inactive, we assigned them in the order of
the gl_varying_slot enum.

This made for unnecessary recompiles, since turning clipping on/off
for a shader that used gl_ClipDistance might rearrange the varyings.
It also required extra bookkeeping, since it required the user
clipping flag to be provided to brw_compute_vue_map() as a parameter.

With this patch, we always put clip distances at in slots 2 and 3 if
they are written to.  do_vs_prog() and do_gs_prog() are responsible
for ensuring that clip distances are written to when user clipping is
enabled (as do_vs_prog() previously did for gen4-5).

This makes the only input to brw_compute_vue_map() a bitfield of which
varyings the shader writes to, a fact that we'll take advantage of in
forthcoming patches.

Reviewed-by: Kenneth Graunke kenn...@whitecape.org

---

 src/mesa/drivers/dri/i965/brw_context.h |2 +-
 src/mesa/drivers/dri/i965/brw_vec4_gs.c |   15 ---
 src/mesa/drivers/dri/i965/brw_vs.c  |   26 +-
 3 files changed, 26 insertions(+), 17 deletions(-)

diff --git a/src/mesa/drivers/dri/i965/brw_context.h 
b/src/mesa/drivers/dri/i965/brw_context.h
index 040a8d3..9d41529 100644
--- a/src/mesa/drivers/dri/i965/brw_context.h
+++ b/src/mesa/drivers/dri/i965/brw_context.h
@@ -436,7 +436,7 @@ static inline GLuint brw_varying_to_offset(struct 
brw_vue_map *vue_map,
 }
 
 void brw_compute_vue_map(struct brw_context *brw, struct brw_vue_map *vue_map,
- GLbitfield64 slots_valid, bool userclip_active);
+ GLbitfield64 slots_valid);
 
 
 /**
diff --git a/src/mesa/drivers/dri/i965/brw_vec4_gs.c 
b/src/mesa/drivers/dri/i965/brw_vec4_gs.c
index f67ae2b..2d1d163 100644
--- a/src/mesa/drivers/dri/i965/brw_vec4_gs.c
+++ b/src/mesa/drivers/dri/i965/brw_vec4_gs.c
@@ -94,9 +94,18 @@ do_gs_prog(struct brw_context *brw,
c.prog_data.control_data_header_size_hwords =
   ALIGN(c.control_data_header_size_bits, 256) / 256;
 
-   brw_compute_vue_map(brw, c.prog_data.base.vue_map,
-   gp-program.Base.OutputsWritten,
-   c.key.base.userclip_active);
+   GLbitfield64 outputs_written = gp-program.Base.OutputsWritten;
+
+   /* In order for legacy clipping to work, we need to populate the clip
+* distance varying slots whenever clipping is enabled, even if the vertex
+* shader doesn't write to gl_ClipDistance.
+*/
+   if (c.key.base.userclip_active) {
+  outputs_written |= BITFIELD64_BIT(VARYING_SLOT_CLIP_DIST0);
+  outputs_written |= BITFIELD64_BIT(VARYING_SLOT_CLIP_DIST1);
+   }
+
+   brw_compute_vue_map(brw, c.prog_data.base.vue_map, outputs_written);
 
/* Compute the output vertex size.
 *
diff --git a/src/mesa/drivers/dri/i965/brw_vs.c 
b/src/mesa/drivers/dri/i965/brw_vs.c
index 7c7493f..d5909a5 100644
--- a/src/mesa/drivers/dri/i965/brw_vs.c
+++ b/src/mesa/drivers/dri/i965/brw_vs.c
@@ -52,14 +52,10 @@ static inline void assign_vue_slot(struct brw_vue_map 
*vue_map,
 
 /**
  * Compute the VUE map for vertex shader program.
- *
- * Note that consumers of this map using cache keys must include
- * prog_data-userclip and prog_data-outputs_written in their key
- * (generated by CACHE_NEW_VS_PROG).
  */
 void
 brw_compute_vue_map(struct brw_context *brw, struct brw_vue_map *vue_map,
-GLbitfield64 slots_valid, bool userclip_active)
+GLbitfield64 slots_valid)
 {
vue_map-slots_valid = slots_valid;
int i;
@@ -112,10 +108,11 @@ brw_compute_vue_map(struct brw_context *brw, struct 
brw_vue_map *vue_map,
*/
   assign_vue_slot(vue_map, VARYING_SLOT_PSIZ);
   assign_vue_slot(vue_map, VARYING_SLOT_POS);
-  if (userclip_active) {
+  if (slots_valid  BITFIELD64_BIT(VARYING_SLOT_CLIP_DIST0))
  assign_vue_slot(vue_map, VARYING_SLOT_CLIP_DIST0);
+  if (slots_valid  BITFIELD64_BIT(VARYING_SLOT_CLIP_DIST1))
  assign_vue_slot(vue_map, VARYING_SLOT_CLIP_DIST1);
-  }
+
   /* front and back colors need to be consecutive so that we can use
* ATTRIBUTE_SWIZZLE_INPUTATTR_FACING to swizzle them when doing
* two-sided color.
@@ -272,15 +269,18 @@ do_vs_prog(struct brw_context *brw,
  outputs_written |= BITFIELD64_BIT(VARYING_SLOT_COL0);
   if (outputs_written  BITFIELD64_BIT(VARYING_SLOT_BFC1))
  outputs_written |= BITFIELD64_BIT(VARYING_SLOT_COL1);
+   }
 
-  if 

Mesa (master): i965/fs: When 64 input components, order them to match prev pipeline stage.

2013-09-16 Thread Paul Berry
Module: Mesa
Branch: master
Commit: 875972029eddfd53cb90a8e34e9f27b2afed119f
URL:
http://cgit.freedesktop.org/mesa/mesa/commit/?id=875972029eddfd53cb90a8e34e9f27b2afed119f

Author: Paul Berry stereotype...@gmail.com
Date:   Tue Sep  3 12:15:53 2013 -0700

i965/fs: When 64 input components, order them to match prev pipeline stage.

Since the SF/SBE stage is only capable of performing arbitrary
reorderings of 16 varying slots, we can't arrange the fragment shader
inputs in an arbitrary order if there are more than 16 input varying
slots in use.  We need to make sure that slots 16-31 match the
corresponding outputs of the previous pipeline stage.

The easiest way to accomplish this is to just make all varying slots
match up with the previous pipeline stage.

Reviewed-by: Kenneth Graunke kenn...@whitecape.org

---

 src/mesa/drivers/dri/i965/brw_fs.cpp |   49 +
 src/mesa/drivers/dri/i965/brw_wm.c   |3 +-
 2 files changed, 45 insertions(+), 7 deletions(-)

diff --git a/src/mesa/drivers/dri/i965/brw_fs.cpp 
b/src/mesa/drivers/dri/i965/brw_fs.cpp
index 004b3b5..2ebadc8 100644
--- a/src/mesa/drivers/dri/i965/brw_fs.cpp
+++ b/src/mesa/drivers/dri/i965/brw_fs.cpp
@@ -1237,11 +1237,47 @@ fs_visitor::calculate_urb_setup()
int urb_next = 0;
/* Figure out where each of the incoming setup attributes lands. */
if (brw-gen = 6) {
-  for (unsigned int i = 0; i  VARYING_SLOT_MAX; i++) {
-if (fp-Base.InputsRead  BRW_FS_VARYING_INPUT_MASK 
- BITFIELD64_BIT(i)) {
-   c-prog_data.urb_setup[i] = urb_next++;
-}
+  if (_mesa_bitcount_64(fp-Base.InputsRead 
+BRW_FS_VARYING_INPUT_MASK) = 16) {
+ /* The SF/SBE pipeline stage can do arbitrary rearrangement of the
+  * first 16 varying inputs, so we can put them wherever we want.
+  * Just put them in order.
+  *
+  * This is useful because it means that (a) inputs not used by the
+  * fragment shader won't take up valuable register space, and (b) we
+  * won't have to recompile the fragment shader if it gets paired with
+  * a different vertex (or geometry) shader.
+  */
+ for (unsigned int i = 0; i  VARYING_SLOT_MAX; i++) {
+if (fp-Base.InputsRead  BRW_FS_VARYING_INPUT_MASK 
+BITFIELD64_BIT(i)) {
+   c-prog_data.urb_setup[i] = urb_next++;
+}
+ }
+  } else {
+ /* We have enough input varyings that the SF/SBE pipeline stage can't
+  * arbitrarily rearrange them to suit our whim; we have to put them
+  * in an order that matches the output of the previous pipeline stage
+  * (geometry or vertex shader).
+  */
+ struct brw_vue_map prev_stage_vue_map;
+ brw_compute_vue_map(brw, prev_stage_vue_map,
+ c-key.input_slots_valid);
+ int first_slot = 2 * BRW_SF_URB_ENTRY_READ_OFFSET;
+ assert(prev_stage_vue_map.num_slots = first_slot + 32);
+ for (int slot = first_slot; slot  prev_stage_vue_map.num_slots;
+  slot++) {
+int varying = prev_stage_vue_map.slot_to_varying[slot];
+/* Note that varying == BRW_VARYING_SLOT_COUNT when a slot is
+ * unused.
+ */
+if (varying != BRW_VARYING_SLOT_COUNT 
+(fp-Base.InputsRead  BRW_FS_VARYING_INPUT_MASK 
+ BITFIELD64_BIT(varying))) {
+   c-prog_data.urb_setup[varying] = slot - first_slot;
+}
+ }
+ urb_next = prev_stage_vue_map.num_slots - first_slot;
   }
} else {
   /* FINISHME: The sf doesn't map VS-FS inputs for us very well. */
@@ -3149,7 +3185,8 @@ brw_fs_precompile(struct gl_context *ctx, struct 
gl_shader_program *prog)
   key.iz_lookup |= IZ_DEPTH_WRITE_ENABLE_BIT;
}
 
-   if (brw-gen  6)
+   if (brw-gen  6 || _mesa_bitcount_64(fp-Base.InputsRead 
+ BRW_FS_VARYING_INPUT_MASK)  16)
   key.input_slots_valid = fp-Base.InputsRead | VARYING_BIT_POS;
 
key.clamp_fragment_color = ctx-API == API_OPENGL_COMPAT;
diff --git a/src/mesa/drivers/dri/i965/brw_wm.c 
b/src/mesa/drivers/dri/i965/brw_wm.c
index 1b23a4f..3d7ca2a 100644
--- a/src/mesa/drivers/dri/i965/brw_wm.c
+++ b/src/mesa/drivers/dri/i965/brw_wm.c
@@ -466,7 +466,8 @@ static void brw_wm_populate_key( struct brw_context *brw,
   (ctx-Multisample.SampleAlphaToCoverage || ctx-Color.AlphaEnabled);
 
/* BRW_NEW_VUE_MAP_GEOM_OUT */
-   if (brw-gen  6)
+   if (brw-gen  6 || _mesa_bitcount_64(fp-program.Base.InputsRead 
+ BRW_FS_VARYING_INPUT_MASK)  16)
   key-input_slots_valid = brw-vue_map_geom_out.slots_valid;
 
/* The unique fragment program ID */

___
mesa-commit mailing list
mesa-commit@lists.freedesktop.org

Mesa (master): i965/ff_gs: Generate URB writes using a loop.

2013-09-16 Thread Paul Berry
Module: Mesa
Branch: master
Commit: f5d38c58ee2a63849295cb2006c78d415af0b6ae
URL:
http://cgit.freedesktop.org/mesa/mesa/commit/?id=f5d38c58ee2a63849295cb2006c78d415af0b6ae

Author: Paul Berry stereotype...@gmail.com
Date:   Tue Sep  3 14:38:19 2013 -0700

i965/ff_gs: Generate URB writes using a loop.

Previously we only ever did 1 URB write, since the maximum number of
varyings we support is small enough to fit in 1 URB write (when using
BRW_URB_SWIZZLE_NONE, which is what the pre-Gen7 GS always uses).  But
we're about to increase the number of varying components we support
from 64 to 128.

With 128 varyings, the most URB writes we'll have to do is 2, but it's
just as easy to write a general-purpose loop.

Reviewed-by: Kenneth Graunke kenn...@whitecape.org

---

 src/mesa/drivers/dri/i965/brw_gs_emit.c |   65 +++
 1 files changed, 40 insertions(+), 25 deletions(-)

diff --git a/src/mesa/drivers/dri/i965/brw_gs_emit.c 
b/src/mesa/drivers/dri/i965/brw_gs_emit.c
index 2c94eb0..9050b95 100644
--- a/src/mesa/drivers/dri/i965/brw_gs_emit.c
+++ b/src/mesa/drivers/dri/i965/brw_gs_emit.c
@@ -169,31 +169,46 @@ static void brw_ff_gs_emit_vue(struct brw_ff_gs_compile 
*c,
bool last)
 {
struct brw_compile *p = c-func;
-   bool allocate = !last;
+   int write_offset = 0;
+   bool complete = false;
 
-   /* Copy the vertex from vertn into m1..mN+1:
-*/
-   brw_copy8(p, brw_message_reg(1), vert, c-nr_regs);
+   do {
+  /* We can't write more than 14 registers at a time to the URB */
+  int write_len = MIN2(c-nr_regs - write_offset, 14);
+  if (write_len == c-nr_regs - write_offset)
+ complete = true;
 
-   /* Send each vertex as a seperate write to the urb.  This is
-* different to the concept in brw_sf_emit.c, where subsequent
-* writes are used to build up a single urb entry.  Each of these
-* writes instantiates a seperate urb entry, and a new one must be
-* allocated each time.
-*/
-   brw_urb_WRITE(p, 
-allocate ? c-reg.temp
-  : retype(brw_null_reg(), BRW_REGISTER_TYPE_UD),
-0,
-c-reg.header,
-allocate ? BRW_URB_WRITE_ALLOCATE_COMPLETE
-  : BRW_URB_WRITE_EOT_COMPLETE,
-c-nr_regs + 1, /* msg length */
-allocate ? 1 : 0, /* response length */
-0, /* urb offset */
-BRW_URB_SWIZZLE_NONE);
-
-   if (allocate) {
+  /* Copy the vertex from vertn into m1..mN+1:
+   */
+  brw_copy8(p, brw_message_reg(1), offset(vert, write_offset), write_len);
+
+  /* Send the vertex data to the URB.  If this is the last write for this
+   * vertex, then we mark it as complete, and either end the thread or
+   * allocate another vertex URB entry (depending whether this is the last
+   * vertex).
+   */
+  enum brw_urb_write_flags flags;
+  if (!complete)
+ flags = BRW_URB_WRITE_NO_FLAGS;
+  else if (last)
+ flags = BRW_URB_WRITE_EOT_COMPLETE;
+  else
+ flags = BRW_URB_WRITE_ALLOCATE_COMPLETE;
+  brw_urb_WRITE(p,
+(flags  BRW_URB_WRITE_ALLOCATE) ? c-reg.temp
+: retype(brw_null_reg(), BRW_REGISTER_TYPE_UD),
+0,
+c-reg.header,
+flags,
+write_len + 1, /* msg length */
+(flags  BRW_URB_WRITE_ALLOCATE) ? 1
+: 0, /* response length */
+write_offset,  /* urb offset */
+BRW_URB_SWIZZLE_NONE);
+  write_offset += write_len;
+   } while (!complete);
+
+   if (!last) {
   brw_MOV(p, get_element_ud(c-reg.header, 0),
   get_element_ud(c-reg.temp, 0));
}

___
mesa-commit mailing list
mesa-commit@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/mesa-commit


Mesa (master): i965/gen6+: Support 128 varying components.

2013-09-16 Thread Paul Berry
Module: Mesa
Branch: master
Commit: 24765c58bd7b041f92ff51ec354f5d6b0ce35daa
URL:
http://cgit.freedesktop.org/mesa/mesa/commit/?id=24765c58bd7b041f92ff51ec354f5d6b0ce35daa

Author: Paul Berry stereotype...@gmail.com
Date:   Tue Sep  3 12:37:47 2013 -0700

i965/gen6+: Support 128 varying components.

GL 3.2 requires us to support 128 varying components for geometry
shader outputs and fragment shader inputs, and 64 varying components
otherwise.  But there's no hardware limitation that restricts us to 64
varying components, and core Mesa doesn't currently allow different
stages to have different maximum values, so just go ahead and enable
128 varying components for all stages.  This gets us better test
coverage anyway.

Even though we are only working on GL 3.2 support for gen7 right now,
gen6 also supports 128 varying components, so go ahead and switch it
on there too.

Reviewed-by: Kenneth Graunke kenn...@whitecape.org

---

 src/mesa/drivers/dri/i965/brw_context.c |3 +++
 1 files changed, 3 insertions(+), 0 deletions(-)

diff --git a/src/mesa/drivers/dri/i965/brw_context.c 
b/src/mesa/drivers/dri/i965/brw_context.c
index 4fcc9fb..7b38ea3 100644
--- a/src/mesa/drivers/dri/i965/brw_context.c
+++ b/src/mesa/drivers/dri/i965/brw_context.c
@@ -247,6 +247,9 @@ brw_initialize_context_constants(struct brw_context *brw)
ctx-Const.DisableGLSLLineContinuations =
   driQueryOptionb(brw-optionCache, disable_glsl_line_continuations);
 
+   if (brw-gen = 6)
+  ctx-Const.MaxVarying = 32;
+
/* We want the GLSL compiler to emit code that uses condition codes */
for (int i = 0; i  MESA_SHADER_TYPES; i++) {
   ctx-ShaderCompilerOptions[i].MaxIfDepth = brw-gen  6 ? 16 : UINT_MAX;

___
mesa-commit mailing list
mesa-commit@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/mesa-commit


Mesa (master): i965/gen6: Fix assertions on VS/GS URB size.

2013-09-16 Thread Paul Berry
Module: Mesa
Branch: master
Commit: 57b8cff33cab9ce93e3ee1252e12d66e48bb67e0
URL:
http://cgit.freedesktop.org/mesa/mesa/commit/?id=57b8cff33cab9ce93e3ee1252e12d66e48bb67e0

Author: Paul Berry stereotype...@gmail.com
Date:   Tue Sep  3 14:19:18 2013 -0700

i965/gen6: Fix assertions on VS/GS URB size.

The {VS,GS} URB Entry Allocation Size fields of 3DSTATE_URB allow
values in the range 0-4, but they are U8-1 fields, so the range of
possible allocation sizes is 1-5.  We were erroneously prohibiting a
size of 5.

Reviewed-by: Kenneth Graunke kenn...@whitecape.org

---

 src/mesa/drivers/dri/i965/gen6_urb.c |4 ++--
 1 files changed, 2 insertions(+), 2 deletions(-)

diff --git a/src/mesa/drivers/dri/i965/gen6_urb.c 
b/src/mesa/drivers/dri/i965/gen6_urb.c
index bb4cfa8..b694f5d 100644
--- a/src/mesa/drivers/dri/i965/gen6_urb.c
+++ b/src/mesa/drivers/dri/i965/gen6_urb.c
@@ -86,8 +86,8 @@ gen6_upload_urb( struct brw_context *brw )
assert(brw-urb.nr_vs_entries = brw-urb.min_vs_entries);
assert(brw-urb.nr_vs_entries % 4 == 0);
assert(brw-urb.nr_gs_entries % 4 == 0);
-   assert(vs_size  5);
-   assert(gs_size  5);
+   assert(vs_size = 5);
+   assert(gs_size = 5);
 
BEGIN_BATCH(3);
OUT_BATCH(_3DSTATE_URB  16 | (3 - 2));

___
mesa-commit mailing list
mesa-commit@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/mesa-commit


Mesa (9.2): radeon/winsys: pad IBs to a multiple of 8 DWs

2013-09-16 Thread Dave Airlie
Module: Mesa
Branch: 9.2
Commit: 2cda3f0e905ca6dd5a42a6bcf5d5197fcc3c6bfa
URL:
http://cgit.freedesktop.org/mesa/mesa/commit/?id=2cda3f0e905ca6dd5a42a6bcf5d5197fcc3c6bfa

Author: Alex Deucher alexander.deuc...@amd.com
Date:   Fri Sep  6 16:43:34 2013 -0400

radeon/winsys: pad IBs to a multiple of 8 DWs

This aligns the gfx, compute, and dma IBs to 8 DW boundries.
This aligns the the IB to the fetch size of the CP for optimal
performance. Additionally, r6xx hardware requires at least 4
DW alignment to avoid a hw bug.  This also aligns the DMA
IBs to 8 DW which is required for the DMA engine.  This
alignment is already handled in the gallium driver, but that
patch can be removed now that it's done in the winsys.

Reviewed-by: Marek Olšák marek.ol...@amd.com
Signed-off-by: Alex Deucher alexander.deuc...@amd.com
CC: 9.2 mesa-sta...@lists.freedesktop.org
CC: 9.1 mesa-sta...@lists.freedesktop.org
(cherry picked from commit a81beee37e0dd7b75422448420e8e8b0b4b76c1e)

---

 src/gallium/winsys/radeon/drm/radeon_drm_cs.c |   30 +
 1 files changed, 30 insertions(+), 0 deletions(-)

diff --git a/src/gallium/winsys/radeon/drm/radeon_drm_cs.c 
b/src/gallium/winsys/radeon/drm/radeon_drm_cs.c
index ea0c99d..38a9209 100644
--- a/src/gallium/winsys/radeon/drm/radeon_drm_cs.c
+++ b/src/gallium/winsys/radeon/drm/radeon_drm_cs.c
@@ -466,6 +466,36 @@ static void radeon_drm_cs_flush(struct radeon_winsys_cs 
*rcs, unsigned flags, ui
 struct radeon_drm_cs *cs = radeon_drm_cs(rcs);
 struct radeon_cs_context *tmp;
 
+switch (cs-base.ring_type) {
+case RING_DMA:
+   /* pad DMA ring to 8 DWs */
+   if (cs-ws-info.chip_class = SI) {
+   while (rcs-cdw  7)
+   OUT_CS(cs-base, 0xf000); /* NOP packet */
+   } else {
+   while (rcs-cdw  7)
+   OUT_CS(cs-base, 0x); /* NOP packet */
+   }
+   break;
+case RING_GFX:
+   /* pad DMA ring to 8 DWs to meet CP fetch alignment requirements
+* r6xx, requires at least 4 dw alignment to avoid a hw bug.
+*/
+if (flags  RADEON_FLUSH_COMPUTE) {
+   if (cs-ws-info.chip_class = SI) {
+   while (rcs-cdw  7)
+   OUT_CS(cs-base, 0x8000); /* type2 nop 
packet */
+   } else {
+   while (rcs-cdw  7)
+   OUT_CS(cs-base, 0x1000); /* type3 nop 
packet */
+   }
+   } else {
+   while (rcs-cdw  7)
+   OUT_CS(cs-base, 0x8000); /* type2 nop packet 
*/
+   }
+   break;
+}
+
 if (rcs-cdw  RADEON_MAX_CMDBUF_DWORDS) {
fprintf(stderr, radeon: command stream overflowed\n);
 }

___
mesa-commit mailing list
mesa-commit@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/mesa-commit


Mesa (9.2): nv30: find first unused texcoord rather than bailing if first is used

2013-09-16 Thread Dave Airlie
Module: Mesa
Branch: 9.2
Commit: 3b852f9d52732066bffd498313b3ccae4b615481
URL:
http://cgit.freedesktop.org/mesa/mesa/commit/?id=3b852f9d52732066bffd498313b3ccae4b615481

Author: Ilia Mirkin imir...@alum.mit.edu
Date:   Sun Sep  1 12:38:52 2013 -0400

nv30: find first unused texcoord rather than bailing if first is used

This fixes shaders produced by supertuxkart.

Cc: 9.2 mesa-sta...@lists.freedesktop.org
Signed-off-by: Ilia Mirkin imir...@alum.mit.edu
(cherry picked from commit 3282697621241e646247b85327c50747416a766b)

---

 src/gallium/drivers/nv30/nvfx_fragprog.c |3 +--
 1 files changed, 1 insertions(+), 2 deletions(-)

diff --git a/src/gallium/drivers/nv30/nvfx_fragprog.c 
b/src/gallium/drivers/nv30/nvfx_fragprog.c
index 1231862..a734330 100644
--- a/src/gallium/drivers/nv30/nvfx_fragprog.c
+++ b/src/gallium/drivers/nv30/nvfx_fragprog.c
@@ -976,9 +976,8 @@ nvfx_fragprog_assign_generic(struct nv30_context *nvfx, 
struct nvfx_fpc *fpc,
 fpc-r_input[idx] = nvfx_reg(NVFXSR_INPUT, hw);
 return TRUE;
  }
- return FALSE;
   }
-  return TRUE;
+  return FALSE;
default:
   return TRUE;
}

___
mesa-commit mailing list
mesa-commit@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/mesa-commit


Mesa (master): i965: Introduce a prog_data temporary in gen6_upload_wm_push_constants.

2013-09-16 Thread Kenneth Graunke
Module: Mesa
Branch: master
Commit: d385edf4c32bf92daa415d32fc1e70934cfca268
URL:
http://cgit.freedesktop.org/mesa/mesa/commit/?id=d385edf4c32bf92daa415d32fc1e70934cfca268

Author: Kenneth Graunke kenn...@whitecape.org
Date:   Fri Sep 13 14:34:48 2013 -0700

i965: Introduce a prog_data temporary in gen6_upload_wm_push_constants.

This saves a bit of typing and shortens a few lines.

Signed-off-by: Kenneth Graunke kenn...@whitecape.org
Reviewed-by: Paul Berry stereotype...@gmail.com

---

 src/mesa/drivers/dri/i965/gen6_wm_state.c |   16 
 1 files changed, 8 insertions(+), 8 deletions(-)

diff --git a/src/mesa/drivers/dri/i965/gen6_wm_state.c 
b/src/mesa/drivers/dri/i965/gen6_wm_state.c
index af1c044..1fa5d95 100644
--- a/src/mesa/drivers/dri/i965/gen6_wm_state.c
+++ b/src/mesa/drivers/dri/i965/gen6_wm_state.c
@@ -41,6 +41,8 @@ gen6_upload_wm_push_constants(struct brw_context *brw)
/* BRW_NEW_FRAGMENT_PROGRAM */
const struct brw_fragment_program *fp =
   brw_fragment_program_const(brw-fragment_program);
+   /* CACHE_NEW_WM_PROG */
+   const struct brw_wm_prog_data *prog_data = brw-wm.prog_data;
 
/* Updates the ParameterValues[i] pointers for all parameters of the
 * basic type of PROGRAM_STATE_VAR.
@@ -48,25 +50,23 @@ gen6_upload_wm_push_constants(struct brw_context *brw)
/* XXX: Should this happen somewhere before to get our state flag set? */
_mesa_load_state_parameters(ctx, fp-program.Base.Parameters);
 
-   /* CACHE_NEW_WM_PROG */
-   if (brw-wm.prog_data-nr_params != 0) {
+   if (prog_data-nr_params != 0) {
   float *constants;
   unsigned int i;
 
   constants = brw_state_batch(brw, AUB_TRACE_WM_CONSTANTS,
- brw-wm.prog_data-nr_params *
- sizeof(float),
+ prog_data-nr_params * sizeof(float),
  32, brw-wm.base.push_const_offset);
 
-  for (i = 0; i  brw-wm.prog_data-nr_params; i++) {
-constants[i] = *brw-wm.prog_data-param[i];
+  for (i = 0; i  prog_data-nr_params; i++) {
+constants[i] = *prog_data-param[i];
   }
 
   if (0) {
 printf(WM constants:\n);
-for (i = 0; i  brw-wm.prog_data-nr_params; i++) {
+for (i = 0; i  prog_data-nr_params; i++) {
if ((i  7) == 0)
-  printf(g%d: , brw-wm.prog_data-first_curbe_grf + i / 8);
+  printf(g%d: , prog_data-first_curbe_grf + i / 8);
printf(%8f , constants[i]);
if ((i  7) == 7)
   printf(\n);

___
mesa-commit mailing list
mesa-commit@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/mesa-commit


Mesa (master): i965: Set brw_stage_state::push_const_size for PS constants.

2013-09-16 Thread Kenneth Graunke
Module: Mesa
Branch: master
Commit: e776c18afb356aeb4a1d1d26ac1e440f37174e37
URL:
http://cgit.freedesktop.org/mesa/mesa/commit/?id=e776c18afb356aeb4a1d1d26ac1e440f37174e37

Author: Kenneth Graunke kenn...@whitecape.org
Date:   Fri Sep 13 14:37:09 2013 -0700

i965: Set brw_stage_state::push_const_size for PS constants.

This paves the way for using gen7_upload_constant_state for PS data.

The formula is copied from gen7_wm_state.c.

Signed-off-by: Kenneth Graunke kenn...@whitecape.org
Reviewed-by: Paul Berry stereotype...@gmail.com

---

 src/mesa/drivers/dri/i965/gen6_wm_state.c |7 ++-
 1 files changed, 6 insertions(+), 1 deletions(-)

diff --git a/src/mesa/drivers/dri/i965/gen6_wm_state.c 
b/src/mesa/drivers/dri/i965/gen6_wm_state.c
index 1fa5d95..c96a107 100644
--- a/src/mesa/drivers/dri/i965/gen6_wm_state.c
+++ b/src/mesa/drivers/dri/i965/gen6_wm_state.c
@@ -50,7 +50,9 @@ gen6_upload_wm_push_constants(struct brw_context *brw)
/* XXX: Should this happen somewhere before to get our state flag set? */
_mesa_load_state_parameters(ctx, fp-program.Base.Parameters);
 
-   if (prog_data-nr_params != 0) {
+   if (prog_data-nr_params == 0) {
+  brw-wm.base.push_const_size = 0;
+   } else {
   float *constants;
   unsigned int i;
 
@@ -75,6 +77,9 @@ gen6_upload_wm_push_constants(struct brw_context *brw)
printf(\n);
 printf(\n);
   }
+
+  brw-wm.base.push_const_size =
+ ALIGN(prog_data-nr_params, prog_data-dispatch_width) / 8;
}
 }
 

___
mesa-commit mailing list
mesa-commit@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/mesa-commit


Mesa (master): i965: Use gen7_upload_constant_state for 3DSTATE_CONSTANT_PS as well.

2013-09-16 Thread Kenneth Graunke
Module: Mesa
Branch: master
Commit: 9ad6dda21e149de021889f224cc98852654d175b
URL:
http://cgit.freedesktop.org/mesa/mesa/commit/?id=9ad6dda21e149de021889f224cc98852654d175b

Author: Kenneth Graunke kenn...@whitecape.org
Date:   Fri Sep 13 14:41:04 2013 -0700

i965: Use gen7_upload_constant_state for 3DSTATE_CONSTANT_PS as well.

Now we use gen7_upload_constant_state() for all three shader stages.

Signed-off-by: Kenneth Graunke kenn...@whitecape.org
Reviewed-by: Paul Berry stereotype...@gmail.com

---

 src/mesa/drivers/dri/i965/gen7_wm_state.c |   28 +---
 1 files changed, 1 insertions(+), 27 deletions(-)

diff --git a/src/mesa/drivers/dri/i965/gen7_wm_state.c 
b/src/mesa/drivers/dri/i965/gen7_wm_state.c
index e9fe95f..80073cd 100644
--- a/src/mesa/drivers/dri/i965/gen7_wm_state.c
+++ b/src/mesa/drivers/dri/i965/gen7_wm_state.c
@@ -129,33 +129,7 @@ upload_ps_state(struct brw_context *brw)
ADVANCE_BATCH();
 
/* CACHE_NEW_WM_PROG */
-   if (brw-wm.prog_data-nr_params == 0) {
-  /* Disable the push constant buffers. */
-  BEGIN_BATCH(7);
-  OUT_BATCH(_3DSTATE_CONSTANT_PS  16 | (7 - 2));
-  OUT_BATCH(0);
-  OUT_BATCH(0);
-  OUT_BATCH(0);
-  OUT_BATCH(0);
-  OUT_BATCH(0);
-  OUT_BATCH(0);
-  ADVANCE_BATCH();
-   } else {
-  BEGIN_BATCH(7);
-  OUT_BATCH(_3DSTATE_CONSTANT_PS  16 | (7 - 2));
-
-  OUT_BATCH(ALIGN(brw-wm.prog_data-nr_params,
- brw-wm.prog_data-dispatch_width) / 8);
-  OUT_BATCH(0);
-  /* Pointer to the WM constant buffer.  Covered by the set of
-   * state flags from gen6_upload_wm_push_constants.
-   */
-  OUT_BATCH(brw-wm.base.push_const_offset | GEN7_MOCS_L3);
-  OUT_BATCH(0);
-  OUT_BATCH(0);
-  OUT_BATCH(0);
-  ADVANCE_BATCH();
-   }
+   gen7_upload_constant_state(brw, brw-wm.base, true, _3DSTATE_CONSTANT_PS);
 
dw2 = dw4 = dw5 = 0;
 

___
mesa-commit mailing list
mesa-commit@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/mesa-commit