[Mesa-dev] [PATCH 11/15] i965/vec4: add support for packing tcs outputs

2016-07-18 Thread Timothy Arceri
Reviewed-by: Edward O'Callaghan 
---
 src/mesa/drivers/dri/i965/brw_vec4_tcs.cpp | 7 +++
 1 file changed, 7 insertions(+)

diff --git a/src/mesa/drivers/dri/i965/brw_vec4_tcs.cpp 
b/src/mesa/drivers/dri/i965/brw_vec4_tcs.cpp
index 8bd150a..4bc3be7 100644
--- a/src/mesa/drivers/dri/i965/brw_vec4_tcs.cpp
+++ b/src/mesa/drivers/dri/i965/brw_vec4_tcs.cpp
@@ -406,6 +406,13 @@ vec4_tcs_visitor::nir_emit_intrinsic(nir_intrinsic_instr 
*instr)
  }
   }
 
+  unsigned first_component = nir_intrinsic_component(instr);
+  if (first_component) {
+ assert(swiz == BRW_SWIZZLE_XYZW);
+ swiz = BRW_SWZ_COMP_OUTPUT(first_component);
+ mask = mask << first_component;
+  }
+
   emit_urb_write(swizzle(value, swiz), mask,
  imm_offset, indirect_offset);
   break;
-- 
2.7.4

___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev


[Mesa-dev] [PATCH 08/15] i965/vec4: add support for packing vs/gs/tes outputs

2016-07-18 Thread Timothy Arceri
Here we create a new output_generic_reg array with the ability to
store the dst_reg for each component of user defined varyings.
This is needed as the previous code only stored the dst_reg based
on the varying location which meant packed varyings would overwrite
each other.
---
 src/mesa/drivers/dri/i965/brw_vec4.h   |  3 +++
 src/mesa/drivers/dri/i965/brw_vec4_nir.cpp |  9 ++-
 src/mesa/drivers/dri/i965/brw_vec4_visitor.cpp | 37 +++---
 3 files changed, 45 insertions(+), 4 deletions(-)

diff --git a/src/mesa/drivers/dri/i965/brw_vec4.h 
b/src/mesa/drivers/dri/i965/brw_vec4.h
index 3043147..4236b51 100644
--- a/src/mesa/drivers/dri/i965/brw_vec4.h
+++ b/src/mesa/drivers/dri/i965/brw_vec4.h
@@ -114,6 +114,8 @@ public:
 * for the ir->location's used.
 */
dst_reg output_reg[BRW_VARYING_SLOT_COUNT];
+   dst_reg output_generic_reg[MAX_VARYINGS_INCL_PATCH][4];
+   unsigned output_generic_num_components[MAX_VARYINGS_INCL_PATCH][4];
const char *output_reg_annotation[BRW_VARYING_SLOT_COUNT];
int uniforms;
 
@@ -270,6 +272,7 @@ public:
void emit_ndc_computation();
void emit_psiz_and_flags(dst_reg reg);
vec4_instruction *emit_generic_urb_slot(dst_reg reg, int varying);
+   void emit_generic_urb_slot(dst_reg reg, int varying, int component);
virtual void emit_urb_slot(dst_reg reg, int varying);
 
void emit_shader_time_begin();
diff --git a/src/mesa/drivers/dri/i965/brw_vec4_nir.cpp 
b/src/mesa/drivers/dri/i965/brw_vec4_nir.cpp
index 33ad852..e5a091d 100644
--- a/src/mesa/drivers/dri/i965/brw_vec4_nir.cpp
+++ b/src/mesa/drivers/dri/i965/brw_vec4_nir.cpp
@@ -416,7 +416,14 @@ vec4_visitor::nir_emit_intrinsic(nir_intrinsic_instr 
*instr)
   src = get_nir_src(instr->src[0], BRW_REGISTER_TYPE_F,
 instr->num_components);
 
-  output_reg[varying] = dst_reg(src);
+  if (varying >= VARYING_SLOT_VAR0) {
+ unsigned c = nir_intrinsic_component(instr);
+ unsigned v = varying - VARYING_SLOT_VAR0;
+ output_generic_reg[v][c] = dst_reg(src);
+ output_generic_num_components[v][c] = instr->num_components;
+  } else {
+ output_reg[varying] = dst_reg(src);
+  }
   break;
}
 
diff --git a/src/mesa/drivers/dri/i965/brw_vec4_visitor.cpp 
b/src/mesa/drivers/dri/i965/brw_vec4_visitor.cpp
index 652b453..e6eea69 100644
--- a/src/mesa/drivers/dri/i965/brw_vec4_visitor.cpp
+++ b/src/mesa/drivers/dri/i965/brw_vec4_visitor.cpp
@@ -1279,13 +1279,35 @@ vec4_visitor::emit_generic_urb_slot(dst_reg reg, int 
varying)
assert(varying < VARYING_SLOT_MAX);
assert(output_reg[varying].type == reg.type);
current_annotation = output_reg_annotation[varying];
-   if (output_reg[varying].file != BAD_FILE)
+   if (output_reg[varying].file != BAD_FILE) {
   return emit(MOV(reg, src_reg(output_reg[varying])));
-   else
+   } else
   return NULL;
 }
 
 void
+vec4_visitor::emit_generic_urb_slot(dst_reg reg, int varying, int component)
+{
+   assert(varying < VARYING_SLOT_MAX);
+   assert(varying >= VARYING_SLOT_VAR0);
+   varying = varying - VARYING_SLOT_VAR0;
+
+   unsigned num_comps = output_generic_num_components[varying][component];
+   if (num_comps == 0)
+  return;
+
+   assert(output_generic_reg[varying][component].type == reg.type);
+   current_annotation = output_reg_annotation[varying];
+   if (output_generic_reg[varying][component].file != BAD_FILE) {
+  src_reg src = src_reg(output_generic_reg[varying][component]);
+  src.swizzle = BRW_SWZ_COMP_OUTPUT(component);
+  reg.writemask =
+ brw_writemask_for_component_packing(num_comps, component);
+  emit(MOV(reg, src));
+   }
+}
+
+void
 vec4_visitor::emit_urb_slot(dst_reg reg, int varying)
 {
reg.type = BRW_REGISTER_TYPE_F;
@@ -1324,7 +1346,13 @@ vec4_visitor::emit_urb_slot(dst_reg reg, int varying)
   /* No need to write to this slot */
   break;
default:
-  emit_generic_urb_slot(reg, varying);
+  if (varying >= VARYING_SLOT_VAR0) {
+ for (int i = 0; i < 4; i++) {
+emit_generic_urb_slot(reg, varying, i);
+ }
+  } else {
+ emit_generic_urb_slot(reg, varying);
+  }
   break;
}
 }
@@ -1772,6 +1800,9 @@ vec4_visitor::vec4_visitor(const struct brw_compiler 
*compiler,
this->current_annotation = NULL;
memset(this->output_reg_annotation, 0, sizeof(this->output_reg_annotation));
 
+   memset(this->output_generic_num_components, 0,
+  sizeof(this->output_generic_num_components));
+
this->virtual_grf_start = NULL;
this->virtual_grf_end = NULL;
this->live_intervals = NULL;
-- 
2.7.4

___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev


[Mesa-dev] [PATCH 05/15] i965: add helpers for creating component layout swizzle

2016-07-18 Thread Timothy Arceri
This will be used to swizzle components to the beginning or end
of the vector based on the component layout qualifier and whether
we are doing a load or store.

Reviewed-by: Edward O'Callaghan 
---
 src/mesa/drivers/dri/i965/brw_reg.h | 3 +++
 1 file changed, 3 insertions(+)

diff --git a/src/mesa/drivers/dri/i965/brw_reg.h 
b/src/mesa/drivers/dri/i965/brw_reg.h
index 38cf8e3..7eab7b5 100644
--- a/src/mesa/drivers/dri/i965/brw_reg.h
+++ b/src/mesa/drivers/dri/i965/brw_reg.h
@@ -88,6 +88,9 @@ struct brw_device_info;
 #define BRW_SWIZZLE_ZWZW  BRW_SWIZZLE4(2,3,2,3)
 #define BRW_SWIZZLE_WZYX  BRW_SWIZZLE4(3,2,1,0)
 
+#define BRW_SWZ_COMP_INPUT(comp) (BRW_SWIZZLE_XYZW >> ((comp)*2))
+#define BRW_SWZ_COMP_OUTPUT(comp) (BRW_SWIZZLE_XYZW << ((comp)*2))
+
 static inline bool
 brw_is_single_value_swizzle(unsigned swiz)
 {
-- 
2.7.4

___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev


[Mesa-dev] [PATCH 04/15] nir: add doubles component packing support

2016-07-18 Thread Timothy Arceri
This makes sure we give the correct driver location
for doubles when using component packing.
---
 src/compiler/nir/nir_lower_io.c | 16 
 1 file changed, 16 insertions(+)

diff --git a/src/compiler/nir/nir_lower_io.c b/src/compiler/nir/nir_lower_io.c
index e480264..7a72e69 100644
--- a/src/compiler/nir/nir_lower_io.c
+++ b/src/compiler/nir/nir_lower_io.c
@@ -75,6 +75,22 @@ nir_assign_var_locations(struct exec_list *var_list, 
unsigned *size,
  if (locations[idx][var->data.index] == -1) {
 var->data.driver_location = location;
 locations[idx][var->data.index] = location;
+
+/* A dvec3 can be packed with a double we need special handling
+ * for this as we are packing across two locations.
+ */
+if (glsl_get_base_type(var->type) == GLSL_TYPE_DOUBLE &&
+glsl_get_vector_elements(var->type) == 3) {
+   /* Hack around type_size functions that expect vectors to be
+* padded out to vec4.
+*/
+   unsigned dsize = type_size(glsl_double_type());
+   unsigned offset =
+  dsize == type_size(glsl_float_type()) ? dsize : dsize * 2;
+
+   locations[idx + 1][var->data.index] = location + offset;
+}
+
 location += type_size(var->type);
  } else {
 var->data.driver_location = locations[idx][var->data.index];
-- 
2.7.4

___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev


[Mesa-dev] [PATCH 14/15] i965: enable ARB_enhanced_layouts for gen6+

2016-07-18 Thread Timothy Arceri
Reviewed-by: Edward O'Callaghan 
---
 src/mesa/drivers/dri/i965/intel_extensions.c | 1 +
 1 file changed, 1 insertion(+)

diff --git a/src/mesa/drivers/dri/i965/intel_extensions.c 
b/src/mesa/drivers/dri/i965/intel_extensions.c
index c557137..ec89094 100644
--- a/src/mesa/drivers/dri/i965/intel_extensions.c
+++ b/src/mesa/drivers/dri/i965/intel_extensions.c
@@ -294,6 +294,7 @@ intelInitExtensions(struct gl_context *ctx)
   ctx->Extensions.ARB_conditional_render_inverted = true;
   ctx->Extensions.ARB_cull_distance = true;
   ctx->Extensions.ARB_draw_buffers_blend = true;
+  ctx->Extensions.ARB_enhanced_layouts = true;
   ctx->Extensions.ARB_ES3_compatibility = true;
   ctx->Extensions.ARB_fragment_layer_viewport = true;
   ctx->Extensions.ARB_sample_shading = true;
-- 
2.7.4

___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev


[Mesa-dev] [PATCH 03/15] i965: add component packing support for load_output intrinsics

2016-07-18 Thread Timothy Arceri
---
 src/mesa/drivers/dri/i965/brw_fs_nir.cpp | 38 +++-
 1 file changed, 33 insertions(+), 5 deletions(-)

diff --git a/src/mesa/drivers/dri/i965/brw_fs_nir.cpp 
b/src/mesa/drivers/dri/i965/brw_fs_nir.cpp
index 395594f..e75e7f7 100644
--- a/src/mesa/drivers/dri/i965/brw_fs_nir.cpp
+++ b/src/mesa/drivers/dri/i965/brw_fs_nir.cpp
@@ -2481,6 +2481,7 @@ fs_visitor::nir_emit_tcs_intrinsic(const fs_builder &bld,
case nir_intrinsic_load_per_vertex_output: {
   fs_reg indirect_offset = get_indirect_offset(instr);
   unsigned imm_offset = instr->const_index[0];
+  unsigned first_component = nir_intrinsic_component(instr);
 
   fs_inst *inst;
   if (indirect_offset.file == BAD_FILE) {
@@ -2561,10 +2562,24 @@ fs_visitor::nir_emit_tcs_intrinsic(const fs_builder 
&bld,
 }
 bld.LOAD_PAYLOAD(dst, srcs, num_components, 0);
  } else {
-inst = bld.emit(SHADER_OPCODE_URB_READ_SIMD8, dst, patch_handle);
+if (first_component != 0) {
+   unsigned read_components =
+  instr->num_components + first_component;
+   fs_reg tmp = bld.vgrf(dst.type, read_components);
+   inst = bld.emit(SHADER_OPCODE_URB_READ_SIMD8, tmp,
+   patch_handle);
+   inst->regs_written = read_components;
+   for (unsigned i = 0; i < instr->num_components; i++) {
+  bld.MOV(offset(dst, bld, i),
+  offset(tmp, bld, i + first_component));
+   }
+} else {
+   inst = bld.emit(SHADER_OPCODE_URB_READ_SIMD8, dst,
+   patch_handle);
+   inst->regs_written = instr->num_components;
+}
 inst->offset = imm_offset;
 inst->mlen = 1;
-inst->regs_written = instr->num_components;
  }
   } else {
  /* Indirect indexing - use per-slot offsets as well. */
@@ -2574,11 +2589,24 @@ fs_visitor::nir_emit_tcs_intrinsic(const fs_builder 
&bld,
  };
  fs_reg payload = bld.vgrf(BRW_REGISTER_TYPE_UD, 2);
  bld.LOAD_PAYLOAD(payload, srcs, ARRAY_SIZE(srcs), 0);
-
- inst = bld.emit(SHADER_OPCODE_URB_READ_SIMD8_PER_SLOT, dst, payload);
+ if (first_component != 0) {
+unsigned read_components =
+   instr->num_components + first_component;
+fs_reg tmp = bld.vgrf(dst.type, read_components);
+inst = bld.emit(SHADER_OPCODE_URB_READ_SIMD8_PER_SLOT, tmp,
+payload);
+inst->regs_written = read_components;
+for (unsigned i = 0; i < instr->num_components; i++) {
+   bld.MOV(offset(dst, bld, i),
+   offset(tmp, bld, i + first_component));
+}
+ } else {
+inst = bld.emit(SHADER_OPCODE_URB_READ_SIMD8_PER_SLOT, dst,
+payload);
+inst->regs_written = instr->num_components;
+ }
  inst->offset = imm_offset;
  inst->mlen = 2;
- inst->regs_written = instr->num_components;
   }
   break;
}
-- 
2.7.4

___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev


[Mesa-dev] [PATCH 15/15] docs: mark ARB_enhanced_layouts as DONE for i965

2016-07-18 Thread Timothy Arceri
Reviewed-by: Edward O'Callaghan 
---
 docs/GL3.txt | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/docs/GL3.txt b/docs/GL3.txt
index 1335397..ebaf4bf 100644
--- a/docs/GL3.txt
+++ b/docs/GL3.txt
@@ -193,11 +193,11 @@ GL 4.4, GLSL 4.40:
   GL_MAX_VERTEX_ATTRIB_STRIDE   DONE (all drivers)
   GL_ARB_buffer_storage DONE (i965, nv50, 
nvc0, r600, radeonsi)
   GL_ARB_clear_texture  DONE (i965, nv50, nvc0)
-  GL_ARB_enhanced_layouts   in progress (Timothy)
+  GL_ARB_enhanced_layouts   DONE (i965)
   - compile-time constant expressions   DONE
   - explicit byte offsets for blocksDONE
   - forced alignment within blocks  DONE
-  - specified vec4-slot component numbers   in progress
+  - specified vec4-slot component numbers   DONE (i965)
   - specified transform/feedback layout DONE
   - input/output block locationsDONE
   GL_ARB_multi_bind DONE (all drivers)
-- 
2.7.4

___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev


[Mesa-dev] [PATCH 09/15] i965/vec4: add component packing for gs

2016-07-18 Thread Timothy Arceri
Reviewed-by: Edward O'Callaghan 
---
 src/mesa/drivers/dri/i965/brw_vec4_gs_nir.cpp | 2 ++
 1 file changed, 2 insertions(+)

diff --git a/src/mesa/drivers/dri/i965/brw_vec4_gs_nir.cpp 
b/src/mesa/drivers/dri/i965/brw_vec4_gs_nir.cpp
index 9ebfb27..16d2410 100644
--- a/src/mesa/drivers/dri/i965/brw_vec4_gs_nir.cpp
+++ b/src/mesa/drivers/dri/i965/brw_vec4_gs_nir.cpp
@@ -72,6 +72,8 @@ vec4_gs_visitor::nir_emit_intrinsic(nir_intrinsic_instr 
*instr)
   src = src_reg(ATTR, BRW_VARYING_SLOT_COUNT * vertex->u32[0] +
   instr->const_index[0] + offset->u32[0],
 type);
+  src.swizzle = BRW_SWZ_COMP_INPUT(nir_intrinsic_component(instr));
+
   /* gl_PointSize is passed in the .w component of the VUE header */
   if (instr->const_index[0] == VARYING_SLOT_PSIZ)
  src.swizzle = BRW_SWIZZLE_;
-- 
2.7.4

___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev


[Mesa-dev] [PATCH 12/15] i965/vec4: add support for packing tes inputs

2016-07-18 Thread Timothy Arceri
---
 src/mesa/drivers/dri/i965/brw_vec4_tes.cpp | 14 ++
 1 file changed, 10 insertions(+), 4 deletions(-)

diff --git a/src/mesa/drivers/dri/i965/brw_vec4_tes.cpp 
b/src/mesa/drivers/dri/i965/brw_vec4_tes.cpp
index 6639c86..8266a9d 100644
--- a/src/mesa/drivers/dri/i965/brw_vec4_tes.cpp
+++ b/src/mesa/drivers/dri/i965/brw_vec4_tes.cpp
@@ -177,7 +177,9 @@ vec4_tes_visitor::nir_emit_intrinsic(nir_intrinsic_instr 
*instr)
case nir_intrinsic_load_input:
case nir_intrinsic_load_per_vertex_input: {
   src_reg indirect_offset = get_indirect_offset(instr);
+  dst_reg dst = get_nir_dest(instr->dest, BRW_REGISTER_TYPE_D);
   unsigned imm_offset = instr->const_index[0];
+  unsigned fist_component = nir_intrinsic_component(instr);
   src_reg header = input_read_header;
 
   if (indirect_offset.file != BAD_FILE) {
@@ -190,8 +192,10 @@ vec4_tes_visitor::nir_emit_intrinsic(nir_intrinsic_instr 
*instr)
   */
  const unsigned max_push_slots = 24;
  if (imm_offset < max_push_slots) {
-emit(MOV(get_nir_dest(instr->dest, BRW_REGISTER_TYPE_D),
- src_reg(ATTR, imm_offset, glsl_type::ivec4_type)));
+src_reg src = src_reg(ATTR, imm_offset, glsl_type::ivec4_type);
+src.swizzle = BRW_SWZ_COMP_INPUT(fist_component);
+
+emit(MOV(dst, src));
 prog_data->urb_read_length =
MAX2(prog_data->urb_read_length,
 DIV_ROUND_UP(imm_offset + 1, 2));
@@ -205,12 +209,14 @@ vec4_tes_visitor::nir_emit_intrinsic(nir_intrinsic_instr 
*instr)
   read->offset = imm_offset;
   read->urb_write_flags = BRW_URB_WRITE_PER_SLOT_OFFSET;
 
+  src_reg src = src_reg(temp);
+  src.swizzle = BRW_SWZ_COMP_INPUT(fist_component);
+
   /* Copy to target.  We might end up with some funky writemasks landing
* in here, but we really don't want them in the above pseudo-ops.
*/
-  dst_reg dst = get_nir_dest(instr->dest, BRW_REGISTER_TYPE_D);
   dst.writemask = brw_writemask_for_size(instr->num_components);
-  emit(MOV(dst, src_reg(temp)));
+  emit(MOV(dst, src));
   break;
}
default:
-- 
2.7.4

___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev


[Mesa-dev] [PATCH 10/15] i965/vec4: support packing tcs inputs

2016-07-18 Thread Timothy Arceri
Reviewed-by: Edward O'Callaghan 
---
 src/mesa/drivers/dri/i965/brw_vec4_tcs.cpp | 8 ++--
 src/mesa/drivers/dri/i965/brw_vec4_tcs.h   | 1 +
 2 files changed, 7 insertions(+), 2 deletions(-)

diff --git a/src/mesa/drivers/dri/i965/brw_vec4_tcs.cpp 
b/src/mesa/drivers/dri/i965/brw_vec4_tcs.cpp
index f61c612..8bd150a 100644
--- a/src/mesa/drivers/dri/i965/brw_vec4_tcs.cpp
+++ b/src/mesa/drivers/dri/i965/brw_vec4_tcs.cpp
@@ -166,6 +166,7 @@ void
 vec4_tcs_visitor::emit_input_urb_read(const dst_reg &dst,
   const src_reg &vertex_index,
   unsigned base_offset,
+  unsigned first_component,
   const src_reg &indirect_offset)
 {
vec4_instruction *inst;
@@ -191,7 +192,9 @@ vec4_tcs_visitor::emit_input_urb_read(const dst_reg &dst,
if (inst->offset == 0 && indirect_offset.file == BAD_FILE) {
   emit(MOV(dst, swizzle(src_reg(temp), BRW_SWIZZLE_)));
} else {
-  emit(MOV(dst, src_reg(temp)));
+  src_reg src = src_reg(temp);
+  src.swizzle = BRW_SWZ_COMP_INPUT(first_component);
+  emit(MOV(dst, src));
}
 }
 
@@ -267,7 +270,8 @@ vec4_tcs_visitor::nir_emit_intrinsic(nir_intrinsic_instr 
*instr)
   dst_reg dst = get_nir_dest(instr->dest, BRW_REGISTER_TYPE_D);
   dst.writemask = brw_writemask_for_size(instr->num_components);
 
-  emit_input_urb_read(dst, vertex_index, imm_offset, indirect_offset);
+  emit_input_urb_read(dst, vertex_index, imm_offset,
+  nir_intrinsic_component(instr), indirect_offset);
   break;
}
case nir_intrinsic_load_input:
diff --git a/src/mesa/drivers/dri/i965/brw_vec4_tcs.h 
b/src/mesa/drivers/dri/i965/brw_vec4_tcs.h
index 329cd7d..d408e56 100644
--- a/src/mesa/drivers/dri/i965/brw_vec4_tcs.h
+++ b/src/mesa/drivers/dri/i965/brw_vec4_tcs.h
@@ -60,6 +60,7 @@ protected:
void emit_input_urb_read(const dst_reg &dst,
 const src_reg &vertex_index,
 unsigned base_offset,
+unsigned first_component,
 const src_reg &indirect_offset);
void emit_output_urb_read(const dst_reg &dst,
  unsigned base_offset,
-- 
2.7.4

___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev


[Mesa-dev] [PATCH 13/15] i965/vec4: add packing support for tes load outputs

2016-07-18 Thread Timothy Arceri
Reviewed-by: Edward O'Callaghan 
---
 src/mesa/drivers/dri/i965/brw_vec4_tcs.cpp | 17 +
 src/mesa/drivers/dri/i965/brw_vec4_tcs.h   |  1 +
 2 files changed, 14 insertions(+), 4 deletions(-)

diff --git a/src/mesa/drivers/dri/i965/brw_vec4_tcs.cpp 
b/src/mesa/drivers/dri/i965/brw_vec4_tcs.cpp
index 4bc3be7..30c81c5 100644
--- a/src/mesa/drivers/dri/i965/brw_vec4_tcs.cpp
+++ b/src/mesa/drivers/dri/i965/brw_vec4_tcs.cpp
@@ -201,6 +201,7 @@ vec4_tcs_visitor::emit_input_urb_read(const dst_reg &dst,
 void
 vec4_tcs_visitor::emit_output_urb_read(const dst_reg &dst,
unsigned base_offset,
+   unsigned first_component,
const src_reg &indirect_offset)
 {
vec4_instruction *inst;
@@ -216,6 +217,12 @@ vec4_tcs_visitor::emit_output_urb_read(const dst_reg &dst,
read->offset = base_offset;
read->mlen = 1;
read->base_mrf = -1;
+
+   if (first_component) {
+  src_reg src = src_reg(dst);
+  src.swizzle = BRW_SWZ_COMP_INPUT(first_component);
+  emit(MOV(dst, src));
+   }
 }
 
 void
@@ -295,14 +302,15 @@ vec4_tcs_visitor::nir_emit_intrinsic(nir_intrinsic_instr 
*instr)
  case GL_QUADS: {
 /* DWords 3-2 (reversed); use offset 0 and WZYX swizzle. */
 dst_reg tmp(this, glsl_type::vec4_type);
-emit_output_urb_read(tmp, 0, src_reg());
+emit_output_urb_read(tmp, 0, 0, src_reg());
 emit(MOV(writemask(dst, WRITEMASK_XY),
  swizzle(src_reg(tmp), BRW_SWIZZLE_WZYX)));
 break;
  }
  case GL_TRIANGLES:
 /* DWord 4; use offset 1 but normal swizzle/writemask. */
-emit_output_urb_read(writemask(dst, WRITEMASK_X), 1, src_reg());
+emit_output_urb_read(writemask(dst, WRITEMASK_X), 1, 0,
+ src_reg());
 break;
  case GL_ISOLINES:
 /* All channels are undefined. */
@@ -334,10 +342,11 @@ vec4_tcs_visitor::nir_emit_intrinsic(nir_intrinsic_instr 
*instr)
  }
 
  dst_reg tmp(this, glsl_type::vec4_type);
- emit_output_urb_read(tmp, 1, src_reg());
+ emit_output_urb_read(tmp, 1, 0, src_reg());
  emit(MOV(dst, swizzle(src_reg(tmp), swiz)));
   } else {
- emit_output_urb_read(dst, imm_offset, indirect_offset);
+ emit_output_urb_read(dst, imm_offset, nir_intrinsic_component(instr),
+  indirect_offset);
   }
   break;
}
diff --git a/src/mesa/drivers/dri/i965/brw_vec4_tcs.h 
b/src/mesa/drivers/dri/i965/brw_vec4_tcs.h
index d408e56..030eb5e 100644
--- a/src/mesa/drivers/dri/i965/brw_vec4_tcs.h
+++ b/src/mesa/drivers/dri/i965/brw_vec4_tcs.h
@@ -64,6 +64,7 @@ protected:
 const src_reg &indirect_offset);
void emit_output_urb_read(const dst_reg &dst,
  unsigned base_offset,
+ unsigned first_component,
  const src_reg &indirect_offset);
 
void emit_urb_write(const src_reg &value, unsigned writemask,
-- 
2.7.4

___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev


[Mesa-dev] [PATCH 07/15] i965/vec4: add support for packing inputs

2016-07-18 Thread Timothy Arceri
Reviewed-by: Edward O'Callaghan 
---
 src/mesa/drivers/dri/i965/brw_vec4_nir.cpp | 2 ++
 1 file changed, 2 insertions(+)

diff --git a/src/mesa/drivers/dri/i965/brw_vec4_nir.cpp 
b/src/mesa/drivers/dri/i965/brw_vec4_nir.cpp
index f3b4528..33ad852 100644
--- a/src/mesa/drivers/dri/i965/brw_vec4_nir.cpp
+++ b/src/mesa/drivers/dri/i965/brw_vec4_nir.cpp
@@ -397,6 +397,8 @@ vec4_visitor::nir_emit_intrinsic(nir_intrinsic_instr *instr)
 
   src = src_reg(ATTR, instr->const_index[0] + const_offset->u32[0],
 glsl_type::uvec4_type);
+  /* Swizzle source based on component layout qualifier */
+  src.swizzle = BRW_SWZ_COMP_INPUT(nir_intrinsic_component(instr));
 
   dest = get_nir_dest(instr->dest, src.type);
   dest.writemask = brw_writemask_for_size(instr->num_components);
-- 
2.7.4

___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev


[Mesa-dev] [PATCH 01/15] i965: bring back type_size_vec4_times_4()

2016-07-18 Thread Timothy Arceri
We will use this for output varyings. To make component
packing simpler we will just treat all varyings as vec4s.
---
 src/mesa/drivers/dri/i965/brw_fs.cpp   | 13 +
 src/mesa/drivers/dri/i965/brw_shader.h |  1 +
 2 files changed, 14 insertions(+)

diff --git a/src/mesa/drivers/dri/i965/brw_fs.cpp 
b/src/mesa/drivers/dri/i965/brw_fs.cpp
index 120d6dd..547a0c2 100644
--- a/src/mesa/drivers/dri/i965/brw_fs.cpp
+++ b/src/mesa/drivers/dri/i965/brw_fs.cpp
@@ -506,6 +506,19 @@ type_size_scalar(const struct glsl_type *type)
return 0;
 }
 
+/**
+ * Returns the number of scalar components needed to store type, assuming
+ * that vectors are padded out to vec4.
+ *
+ * This has the packing rules of type_size_vec4(), but counts components
+ * similar to type_size_scalar().
+ */
+extern "C" int
+type_size_vec4_times_4(const struct glsl_type *type)
+{
+ return 4 * type_size_vec4(type);
+}
+
 /* Attribute arrays are loaded as one vec4 per element (or matrix column),
  * except for double-precision types, which are loaded as one dvec4.
  */
diff --git a/src/mesa/drivers/dri/i965/brw_shader.h 
b/src/mesa/drivers/dri/i965/brw_shader.h
index dd9eb2d..e61c080 100644
--- a/src/mesa/drivers/dri/i965/brw_shader.h
+++ b/src/mesa/drivers/dri/i965/brw_shader.h
@@ -294,6 +294,7 @@ struct gl_linked_shader *brw_new_shader(gl_shader_stage 
stage);
 int type_size_scalar(const struct glsl_type *type);
 int type_size_vec4(const struct glsl_type *type);
 int type_size_dvec4(const struct glsl_type *type);
+int type_size_vec4_times_4(const struct glsl_type *type);
 int type_size_vs_input(const struct glsl_type *type);
 
 unsigned tesslevel_outer_components(GLenum tes_primitive_mode);
-- 
2.7.4

___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev


[Mesa-dev] [PATCH 02/15] i965: enable component packing for vs and fs

2016-07-18 Thread Timothy Arceri
Rather than trying to work out the total number of components
used at a location we simply treat all outputs as vec4s.
---
 src/mesa/drivers/dri/i965/brw_fs.h   |  1 -
 src/mesa/drivers/dri/i965/brw_fs_nir.cpp | 22 ++
 src/mesa/drivers/dri/i965/brw_fs_visitor.cpp | 10 ++
 src/mesa/drivers/dri/i965/brw_nir.c  |  8 
 4 files changed, 16 insertions(+), 25 deletions(-)

diff --git a/src/mesa/drivers/dri/i965/brw_fs.h 
b/src/mesa/drivers/dri/i965/brw_fs.h
index 574475f..fc1e1c4 100644
--- a/src/mesa/drivers/dri/i965/brw_fs.h
+++ b/src/mesa/drivers/dri/i965/brw_fs.h
@@ -317,7 +317,6 @@ public:
fs_reg frag_stencil;
fs_reg sample_mask;
fs_reg outputs[VARYING_SLOT_MAX];
-   unsigned output_components[VARYING_SLOT_MAX];
fs_reg dual_src_output;
bool do_dual_src;
int first_non_payload_grf;
diff --git a/src/mesa/drivers/dri/i965/brw_fs_nir.cpp 
b/src/mesa/drivers/dri/i965/brw_fs_nir.cpp
index 610c151..395594f 100644
--- a/src/mesa/drivers/dri/i965/brw_fs_nir.cpp
+++ b/src/mesa/drivers/dri/i965/brw_fs_nir.cpp
@@ -67,13 +67,12 @@ fs_visitor::nir_setup_single_output_varying(fs_reg *reg,
   }
} else {
   assert(type->is_scalar() || type->is_vector());
-  unsigned num_elements = type->vector_elements;
+  unsigned num_iter = 1;
   if (type->is_double())
- num_elements *= 2;
-  for (unsigned count = 0; count < num_elements; count += 4) {
+ num_iter = 2;
+  for (unsigned count = 0; count < num_iter; count++) {
  this->outputs[*location] = *reg;
- this->output_components[*location] = MIN2(4, num_elements - count);
- *reg = offset(*reg, bld, this->output_components[*location]);
+ *reg = offset(*reg, bld, 4);
  (*location)++;
   }
}
@@ -114,7 +113,6 @@ fs_visitor::nir_setup_outputs()
 /* Writing gl_FragColor outputs to all color regions. */
 for (unsigned int i = 0; i < MAX2(key->nr_color_regions, 1); i++) {
this->outputs[i] = reg;
-   this->output_components[i] = 4;
 }
  } else if (var->data.location == FRAG_RESULT_DEPTH) {
 this->frag_depth = reg;
@@ -123,8 +121,6 @@ fs_visitor::nir_setup_outputs()
  } else if (var->data.location == FRAG_RESULT_SAMPLE_MASK) {
 this->sample_mask = reg;
  } else {
-int vector_elements = var->type->without_array()->vector_elements;
-
 /* gl_FragData or a user-defined FS output */
 assert(var->data.location >= FRAG_RESULT_DATA0 &&
var->data.location < 
FRAG_RESULT_DATA0+BRW_MAX_DRAW_BUFFERS);
@@ -132,8 +128,7 @@ fs_visitor::nir_setup_outputs()
 /* General color output. */
 for (unsigned int i = 0; i < MAX2(1, var->type->length); i++) {
int output = var->data.location - FRAG_RESULT_DATA0 + i;
-   this->outputs[output] = offset(reg, bld, vector_elements * i);
-   this->output_components[output] = vector_elements;
+   this->outputs[output] = offset(reg, bld, 4 * i);
 }
  }
  break;
@@ -3892,6 +3887,7 @@ fs_visitor::nir_emit_intrinsic(const fs_builder &bld, 
nir_intrinsic_instr *instr
 
case nir_intrinsic_load_input: {
   fs_reg src = fs_reg(ATTR, instr->const_index[0], dest.type);
+  unsigned first_component = nir_intrinsic_component(instr);
   unsigned num_components = instr->num_components;
   enum brw_reg_type type = dest.type;
 
@@ -3900,7 +3896,7 @@ fs_visitor::nir_emit_intrinsic(const fs_builder &bld, 
nir_intrinsic_instr *instr
   src = offset(src, bld, const_offset->u32[0]);
 
   for (unsigned j = 0; j < num_components; j++) {
- bld.MOV(offset(dest, bld, j), offset(src, bld, j));
+ bld.MOV(offset(dest, bld, j), offset(src, bld, j + first_component));
   }
 
   if (type == BRW_REGISTER_TYPE_DF) {
@@ -4026,6 +4022,7 @@ fs_visitor::nir_emit_intrinsic(const fs_builder &bld, 
nir_intrinsic_instr *instr
   new_dest = offset(new_dest, bld, const_offset->u32[0]);
 
   unsigned num_components = instr->num_components;
+  unsigned first_component = nir_intrinsic_component(instr);
   unsigned bit_size = instr->src[0].is_ssa ?
  instr->src[0].ssa->bit_size : instr->src[0].reg.reg->bit_size;
   if (bit_size == 64) {
@@ -4039,7 +4036,8 @@ fs_visitor::nir_emit_intrinsic(const fs_builder &bld, 
nir_intrinsic_instr *instr
   }
 
   for (unsigned j = 0; j < num_components; j++) {
- bld.MOV(offset(new_dest, bld, j), offset(src, bld, j));
+ bld.MOV(offset(new_dest, bld, j + first_component),
+ offset(src, bld, j));
   }
   break;
}
diff --git a/src/mesa/drivers/dri/i965/brw_fs_visitor.cpp 
b/src/mesa/drivers/dri/i965/brw_fs_visitor.cpp
index 156a630..6d84374 100644
--- a/src/mesa/drivers/dri/i965/brw_fs_visitor.cpp
+++ b/src/mesa/drivers/dri/i96

[Mesa-dev] V5 ARB_enhanced_layouts packing support for i965 Gen6+

2016-07-18 Thread Timothy Arceri
V5:
 - rebase on Ken's interpolation clean-ups [1]

V4:
 - add vec4 backend support and enable for Gen6+
 
 V3:
- Rewrite patch 9 (add support for packing arrays) to not add
 hacks to the type_size() functions.
 - Add packing support for the load_output intrinsics (patch 12)
 - Add glsl_dvec_type() helper (patch 8)
 
 V2:
 - validation fixes patches 1-2
 - added support for packing doubles now that explicit location
  fixes have landed.
 - fix various issues with intel debug output with new COMPONENT const
 index.
 
 This adds component packing support for Gen6+.
 
 Series can be found in my component_packing_gen6+_v2 branch:
 
 https://github.com/tarceri/Mesa_arrays_of_arrays.git 

[1] https://patchwork.freedesktop.org/series/1/

___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev


[Mesa-dev] [PATCH 06/15] i965: add helper for creating packing writemask

2016-07-18 Thread Timothy Arceri
For example where n=3 first_component=1 this will give us
0xE (WRITEMASK_YZW).

Reviewed-by: Edward O'Callaghan 
---
 src/mesa/drivers/dri/i965/brw_reg.h | 6 ++
 1 file changed, 6 insertions(+)

diff --git a/src/mesa/drivers/dri/i965/brw_reg.h 
b/src/mesa/drivers/dri/i965/brw_reg.h
index 7eab7b5..abd63e4 100644
--- a/src/mesa/drivers/dri/i965/brw_reg.h
+++ b/src/mesa/drivers/dri/i965/brw_reg.h
@@ -972,6 +972,12 @@ brw_writemask_for_size(unsigned n)
return (1 << n) - 1;
 }
 
+static inline unsigned
+brw_writemask_for_component_packing(unsigned n, unsigned first_component)
+{
+   return (((1 << n) - 1) << first_component);
+}
+
 static inline struct brw_reg
 negate(struct brw_reg reg)
 {
-- 
2.7.4

___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev


Re: [Mesa-dev] [PATCH] i965: Use tex_mocs instead of rb_mocs for GL images.

2016-07-18 Thread Kenneth Graunke
On Monday, July 18, 2016 10:58:31 PM PDT Ben Widawsky wrote:
> On Mon, Jul 18, 2016 at 07:08:46PM -0700, Kenneth Graunke wrote:
> > Fixes a 10-20% performance regression in OglCSDof caused by commit
> > 5a8c89038abab0184ea72664ab390ec6ca58b4d6, which made images (in the
> > image load/store sense) use BDW_MOCS_PTE instead of BDW_MOCS_WB.
> > 
> > This seems sketchy, as the default PTE value is supposed to be
> > WB LLC eLLC, which is the same as our MOCS WB setting.  It's only
> > supposed to change when using a surface for display, which won't
> > ever happen for images.  Something may be wrong in the kernel...
> 
> Which platform was the regression on? Broadwell has some weirdness if you
> disable PPGTT which might cause PTE MOCS to fall back to UC. I think at least
> aliasing PPGTT has been enabled since pretty early for Broadwell. If there is 
> no
> ppgtt overrides here, then I'm not sure what would be going on - it sound
> sketchy to me too.

My Broadwell GT2 laptop running drm-intel-nightly from
2016y-05m-27d-12h-32m-45s (4.6.0 based), and Mark's Broadwell GT3e
boxes in Jenkins.

I tried to verify the kernel PTE settings, but I got totally lost
by the _PAGE_PAT stuff.  Earlier platforms I can follow easily...

This patch fixes the observed problem, but I still would like to
understand why the PTE isn't good enough...for renderbuffers, we
use the "Use PTE" setting...and could be missing out on some
performance there...


signature.asc
Description: This is a digitally signed message part.
___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev


Re: [Mesa-dev] [PATCH] i965: Use tex_mocs instead of rb_mocs for GL images.

2016-07-18 Thread Ben Widawsky
On Mon, Jul 18, 2016 at 07:08:46PM -0700, Kenneth Graunke wrote:
> Fixes a 10-20% performance regression in OglCSDof caused by commit
> 5a8c89038abab0184ea72664ab390ec6ca58b4d6, which made images (in the
> image load/store sense) use BDW_MOCS_PTE instead of BDW_MOCS_WB.
> 
> This seems sketchy, as the default PTE value is supposed to be
> WB LLC eLLC, which is the same as our MOCS WB setting.  It's only
> supposed to change when using a surface for display, which won't
> ever happen for images.  Something may be wrong in the kernel...

Which platform was the regression on? Broadwell has some weirdness if you
disable PPGTT which might cause PTE MOCS to fall back to UC. I think at least
aliasing PPGTT has been enabled since pretty early for Broadwell. If there is no
ppgtt overrides here, then I'm not sure what would be going on - it sound
sketchy to me too.

> 
> Signed-off-by: Kenneth Graunke 
> Reviewed-by: Jason Ekstrand 

I haven't really followed any of the isl stuff, but it lgtm.


> ---
>  src/mesa/drivers/dri/i965/brw_wm_surface_state.c | 2 +-
>  1 file changed, 1 insertion(+), 1 deletion(-)
> 
> diff --git a/src/mesa/drivers/dri/i965/brw_wm_surface_state.c 
> b/src/mesa/drivers/dri/i965/brw_wm_surface_state.c
> index d896789..87f8601 100644
> --- a/src/mesa/drivers/dri/i965/brw_wm_surface_state.c
> +++ b/src/mesa/drivers/dri/i965/brw_wm_surface_state.c
> @@ -1516,7 +1516,7 @@ update_image_surface(struct brw_context *brw,
>  const int surf_index = surf_offset - 
> &brw->wm.base.surf_offset[0];
>  
>  brw_emit_surface_state(brw, mt, &view,
> -   surface_state_infos[brw->gen].rb_mocs, 
> false,
> +   surface_state_infos[brw->gen].tex_mocs, 
> false,
> surf_offset, surf_index,
> I915_GEM_DOMAIN_SAMPLER,
> access == GL_READ_ONLY ? 0 :
> -- 
> 2.9.0
> 

-- 
Ben Widawsky, Intel Open Source Technology Center
___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev


Re: [Mesa-dev] [PATCH 3a/3] i965: Correctly set RENDER_SURFACE_STATE::Depth for cube map textures

2016-07-18 Thread Pohjolainen, Topi
On Mon, Jul 18, 2016 at 10:16:53PM -0700, Jason Ekstrand wrote:
> From the Sky Lake PRM:
> 
>"For SURFTYPE_CUBE: For Sampling Engine Surfaces and Typed Data Port
>Surfaces, the range of this field is [0,340], indicating the number of
>cube array elements (equal to the number of underlying 2D array elements
>divided by 6). For other surfaces, this field must be zero."
> 
> In other words, the depth field for cube maps is in number of cubes not
> number of 2-D slices so we need to divide by 6.  It appears as if we've
> been doing this wrong ever since we first added cube map arrays for Sandy
> Bridge.  Also, we now need to remoe the shader hacks we've always done

remove

> since they were only needed because we were setting the depth field six
> times too large.
> 
> Signed-off-by: Jason Ekstrand 
> Cc: "12.0 11.2 11.1" 
> ---
>  src/mesa/drivers/dri/i965/brw_fs_nir.cpp  | 21 +
>  src/mesa/drivers/dri/i965/brw_wm_surface_state.c  |  6 +-
>  src/mesa/drivers/dri/i965/gen7_wm_surface_state.c |  3 ++-
>  src/mesa/drivers/dri/i965/gen8_surface_state.c|  3 ++-
>  4 files changed, 14 insertions(+), 19 deletions(-)
> 
> diff --git a/src/mesa/drivers/dri/i965/brw_fs_nir.cpp 
> b/src/mesa/drivers/dri/i965/brw_fs_nir.cpp
> index 129984a..eeec0e2 100644
> --- a/src/mesa/drivers/dri/i965/brw_fs_nir.cpp
> +++ b/src/mesa/drivers/dri/i965/brw_fs_nir.cpp
> @@ -4423,26 +4423,15 @@ fs_visitor::nir_emit_texture(const fs_builder &bld, 
> nir_tex_instr *instr)
> for (unsigned i = 0; i < dest_size; i++)
>nir_dest[i] = offset(dst, bld, i);
>  
> -   bool is_cube_array = instr->sampler_dim == GLSL_SAMPLER_DIM_CUBE &&
> -instr->is_array;
> -
> if (instr->op == nir_texop_query_levels) {
>/* # levels is in .w */
>nir_dest[0] = offset(dst, bld, 3);
> -   } else if (instr->op == nir_texop_txs && dest_size >= 3 &&
> -  (devinfo->gen < 7 || is_cube_array)) {
> +   } else if (instr->op == nir_texop_txs &&
> +  dest_size >= 3 && devinfo->gen < 7) {
> +  /* Gen4-6 return 0 instead of 1 for single layer surfaces. */
>fs_reg depth = offset(dst, bld, 2);
> -  fs_reg fixed_depth = vgrf(glsl_type::int_type);
> -
> -  if (is_cube_array) {
> - /* fixup #layers for cube map arrays */
> - bld.emit(SHADER_OPCODE_INT_QUOTIENT, fixed_depth, depth, 
> brw_imm_d(6));
> -  } else if (devinfo->gen < 7) {
> - /* Gen4-6 return 0 instead of 1 for single layer surfaces. */
> - bld.emit_minmax(fixed_depth, depth, brw_imm_d(1), 
> BRW_CONDITIONAL_GE);
> -  }
> -
> -  nir_dest[2] = fixed_depth;
> +  nir_dest[2] = vgrf(glsl_type::int_type);
> +  bld.emit_minmax(nir_dest[2], depth, brw_imm_d(1), BRW_CONDITIONAL_GE);
> }
>  
> bld.LOAD_PAYLOAD(get_nir_dest(instr->dest), nir_dest, dest_size, 0);
> diff --git a/src/mesa/drivers/dri/i965/brw_wm_surface_state.c 
> b/src/mesa/drivers/dri/i965/brw_wm_surface_state.c
> index c101e05..a96eae5 100644
> --- a/src/mesa/drivers/dri/i965/brw_wm_surface_state.c
> +++ b/src/mesa/drivers/dri/i965/brw_wm_surface_state.c
> @@ -33,6 +33,7 @@
>  #include "main/context.h"
>  #include "main/blend.h"
>  #include "main/mtypes.h"
> +#include "main/teximage.h"
>  #include "main/samplerobj.h"
>  #include "main/shaderimage.h"
>  #include "program/prog_parameter.h"
> @@ -360,8 +361,11 @@ brw_update_texture_surface(struct gl_context *ctx,
> (mt->logical_width0 - 1) << BRW_SURFACE_WIDTH_SHIFT |
> (mt->logical_height0 - 1) << BRW_SURFACE_HEIGHT_SHIFT);
>  
> +   const unsigned depth = mt->logical_depth0 /
> +  (_mesa_is_cube_map_texture(tObj->Target) ? 6 : 1);
> +
> surf[3] = (brw_get_surface_tiling_bits(mt->tiling) |
> -   (mt->logical_depth0 - 1) << BRW_SURFACE_DEPTH_SHIFT |
> +   (depth - 1) << BRW_SURFACE_DEPTH_SHIFT |
> (mt->pitch - 1) << BRW_SURFACE_PITCH_SHIFT);
>  
> const unsigned min_lod = tObj->MinLevel + tObj->BaseLevel - 
> mt->first_level;
> diff --git a/src/mesa/drivers/dri/i965/gen7_wm_surface_state.c 
> b/src/mesa/drivers/dri/i965/gen7_wm_surface_state.c
> index 932e62e..f4a88f3 100644
> --- a/src/mesa/drivers/dri/i965/gen7_wm_surface_state.c
> +++ b/src/mesa/drivers/dri/i965/gen7_wm_surface_state.c
> @@ -276,7 +276,8 @@ gen7_emit_texture_surface_state(struct brw_context *brw,
>  int surf_index /* unused */,
>  bool rw, bool for_gather)
>  {
> -   const unsigned depth = max_layer - min_layer;
> +   const unsigned depth = (max_layer - min_layer) /
> +  (_mesa_is_cube_map_texture(target) ? 6 : 1);
> uint32_t *surf = brw_state_batch(brw, AUB_TRACE_SURFACE_STATE,
>  8 * 4, 32, surf_offset);
>  
> diff --git a/src/mesa/drivers/dri/i965/gen8_surface_state.c 
> b/src/mesa/drivers/dri/i965/gen8_surface_state.c
> index bd9e2a1..89e

Re: [Mesa-dev] [PATCH 7/7] i965: Delete the FS_OPCODE_INTERPOLATE_AT_CENTROID virtual opcode.

2016-07-18 Thread Jason Ekstrand
On Jul 18, 2016 10:11 PM, "Chris Forbes"  wrote:
>
> I remember arguing about this when it got added -- tradeoff was payload
size/register pressure vs needing to call out to this unit, if centroid
barycentric coords weren't required for anything else? It does seem fairly
pointless, though.
>
> For the series:-
>
> Reviewed-by: Chris Forbes 

I'd like to chip in before you get too excited and push. I'll take a proper
look tomorrow.

> On Tue, Jul 19, 2016 at 8:26 AM, Kenneth Graunke 
wrote:
>>
>> We no longer use this message.  As far as I can tell, it's fairly
>> useless - the equivalent information is provided in the payload.
>>
>> Signed-off-by: Kenneth Graunke 
>> ---
>>  src/mesa/drivers/dri/i965/brw_defines.h| 1 -
>>  src/mesa/drivers/dri/i965/brw_fs.cpp   | 2 --
>>  src/mesa/drivers/dri/i965/brw_fs_generator.cpp | 5 -
>>  src/mesa/drivers/dri/i965/brw_shader.cpp   | 2 --
>>  4 files changed, 10 deletions(-)
>>
>> diff --git a/src/mesa/drivers/dri/i965/brw_defines.h
b/src/mesa/drivers/dri/i965/brw_defines.h
>> index b5a259e..2814fa7 100644
>> --- a/src/mesa/drivers/dri/i965/brw_defines.h
>> +++ b/src/mesa/drivers/dri/i965/brw_defines.h
>> @@ -1120,7 +1120,6 @@ enum opcode {
>> FS_OPCODE_UNPACK_HALF_2x16_SPLIT_X,
>> FS_OPCODE_UNPACK_HALF_2x16_SPLIT_Y,
>> FS_OPCODE_PLACEHOLDER_HALT,
>> -   FS_OPCODE_INTERPOLATE_AT_CENTROID,
>> FS_OPCODE_INTERPOLATE_AT_SAMPLE,
>> FS_OPCODE_INTERPOLATE_AT_SHARED_OFFSET,
>> FS_OPCODE_INTERPOLATE_AT_PER_SLOT_OFFSET,
>> diff --git a/src/mesa/drivers/dri/i965/brw_fs.cpp
b/src/mesa/drivers/dri/i965/brw_fs.cpp
>> index 06007fe..120d6dd 100644
>> --- a/src/mesa/drivers/dri/i965/brw_fs.cpp
>> +++ b/src/mesa/drivers/dri/i965/brw_fs.cpp
>> @@ -250,7 +250,6 @@ fs_inst::is_send_from_grf() const
>> switch (opcode) {
>> case FS_OPCODE_VARYING_PULL_CONSTANT_LOAD_GEN7:
>> case SHADER_OPCODE_SHADER_TIME_ADD:
>> -   case FS_OPCODE_INTERPOLATE_AT_CENTROID:
>> case FS_OPCODE_INTERPOLATE_AT_SAMPLE:
>> case FS_OPCODE_INTERPOLATE_AT_SHARED_OFFSET:
>> case FS_OPCODE_INTERPOLATE_AT_PER_SLOT_OFFSET:
>> @@ -4785,7 +4784,6 @@ get_lowered_simd_width(const struct
brw_device_info *devinfo,
>> case FS_OPCODE_PACK_HALF_2x16_SPLIT:
>> case FS_OPCODE_UNPACK_HALF_2x16_SPLIT_X:
>> case FS_OPCODE_UNPACK_HALF_2x16_SPLIT_Y:
>> -   case FS_OPCODE_INTERPOLATE_AT_CENTROID:
>> case FS_OPCODE_INTERPOLATE_AT_SAMPLE:
>> case FS_OPCODE_INTERPOLATE_AT_SHARED_OFFSET:
>> case FS_OPCODE_INTERPOLATE_AT_PER_SLOT_OFFSET:
>> diff --git a/src/mesa/drivers/dri/i965/brw_fs_generator.cpp
b/src/mesa/drivers/dri/i965/brw_fs_generator.cpp
>> index 1e9c7da..a390184 100644
>> --- a/src/mesa/drivers/dri/i965/brw_fs_generator.cpp
>> +++ b/src/mesa/drivers/dri/i965/brw_fs_generator.cpp
>> @@ -2054,11 +2054,6 @@ fs_generator::generate_code(const cfg_t *cfg, int
dispatch_width)
>>   }
>>   break;
>>
>> -  case FS_OPCODE_INTERPOLATE_AT_CENTROID:
>> - generate_pixel_interpolator_query(inst, dst, src[0], src[1],
>> -
 GEN7_PIXEL_INTERPOLATOR_LOC_CENTROID);
>> - break;
>> -
>>case FS_OPCODE_INTERPOLATE_AT_SAMPLE:
>>   generate_pixel_interpolator_query(inst, dst, src[0], src[1],
>>
GEN7_PIXEL_INTERPOLATOR_LOC_SAMPLE);
>> diff --git a/src/mesa/drivers/dri/i965/brw_shader.cpp
b/src/mesa/drivers/dri/i965/brw_shader.cpp
>> index f3b5487..559e44c 100644
>> --- a/src/mesa/drivers/dri/i965/brw_shader.cpp
>> +++ b/src/mesa/drivers/dri/i965/brw_shader.cpp
>> @@ -367,8 +367,6 @@ brw_instruction_name(const struct brw_device_info
*devinfo, enum opcode op)
>> case FS_OPCODE_PLACEHOLDER_HALT:
>>return "placeholder_halt";
>>
>> -   case FS_OPCODE_INTERPOLATE_AT_CENTROID:
>> -  return "interp_centroid";
>> case FS_OPCODE_INTERPOLATE_AT_SAMPLE:
>>return "interp_sample";
>> case FS_OPCODE_INTERPOLATE_AT_SHARED_OFFSET:
>> --
>> 2.9.0
>>
>> ___
>> mesa-dev mailing list
>> mesa-dev@lists.freedesktop.org
>> https://lists.freedesktop.org/mailman/listinfo/mesa-dev
>
>
>
> ___
> mesa-dev mailing list
> mesa-dev@lists.freedesktop.org
> https://lists.freedesktop.org/mailman/listinfo/mesa-dev
>
___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev


Re: [Mesa-dev] [PATCH 0/3] i965: Correctly set RENDER_SURFACE_STATE::Depth for

2016-07-18 Thread Chris Forbes
Oh dear, hacks on both sides. Sorry for this nonsense.

Series is:-

Reviewed-by: Chris Forbes 

On Tue, Jul 19, 2016 at 5:16 PM, Jason Ekstrand 
wrote:

> From the Sky Lake PRM:
>
>"For SURFTYPE_CUBE: For Sampling Engine Surfaces and Typed Data Port
>Surfaces, the range of this field is [0,340], indicating the number of
>cube array elements (equal to the number of underlying 2D array elements
>divided by 6). For other surfaces, this field must be zero."
>
> In other words, the depth field for cube maps is in number of cubes not
> number of 2-D slices so we need to divide by 6.  It appears as if we've
> been doing this wrong ever since we first added cube map arrays for Sandy
> Bridge.  We've also had a shader hack to divide the size Z dimension of
> cube maps by 6 in the textureSize call.  This is completely bogus and the
> only reason for it is that we've been setting the depth six times too
> large.
>
> This little series fixes this.  In order to keep things back-portable,
> patch 3 comes in two versions.  Version (a) is based on pre-ISL and should
> be backportable to 12.0 or maybe even 11.2 or 11.1.  Version (b) on the
> other hand is based on top of the ISL work and can be applied on master.
>
> Cc: Emil Velikov 
>
> Jason Ekstrand (3):
>   i965: Use intel_get_image_dims in alloc_texture_storage
>   i965/miptree: Set logical_depth0 == 6 for cube maps
>   i965: Correctly set RENDER_SURFACE_STATE::Depth for cube map textures
>
>  src/mesa/drivers/dri/i965/brw_fs_nir.cpp  | 21
> +
>  src/mesa/drivers/dri/i965/brw_wm_surface_state.c  |  6 +-
>  src/mesa/drivers/dri/i965/gen7_wm_surface_state.c |  3 ++-
>  src/mesa/drivers/dri/i965/gen8_surface_state.c|  3 ++-
>  src/mesa/drivers/dri/i965/intel_mipmap_tree.c | 14 ++
>  src/mesa/drivers/dri/i965/intel_tex.c |  2 ++
>  6 files changed, 26 insertions(+), 23 deletions(-)
>
> --
> 2.5.0.400.gff86faf
>
> ___
> mesa-dev mailing list
> mesa-dev@lists.freedesktop.org
> https://lists.freedesktop.org/mailman/listinfo/mesa-dev
>
___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev


[Mesa-dev] [PATCH 2/3] i965/miptree: Set logical_depth0 == 6 for cube maps

2016-07-18 Thread Jason Ekstrand
This matches what we do for cube maps where logical_depth0 is in number of
face-layers rather than number of cubes.  This does mean that we will
temporarily be setting the surface bounds too loose for cube map textures
but we are already setting them too loose for cube arrays and we will be
fixing that in the next commit anyway.

Signed-off-by: Jason Ekstrand 
Cc: "12.0 11.2 11.1" 
---
 src/mesa/drivers/dri/i965/intel_mipmap_tree.c | 14 ++
 1 file changed, 10 insertions(+), 4 deletions(-)

diff --git a/src/mesa/drivers/dri/i965/intel_mipmap_tree.c 
b/src/mesa/drivers/dri/i965/intel_mipmap_tree.c
index b6265dc..fd20f3f 100644
--- a/src/mesa/drivers/dri/i965/intel_mipmap_tree.c
+++ b/src/mesa/drivers/dri/i965/intel_mipmap_tree.c
@@ -518,10 +518,8 @@ intel_miptree_create_layout(struct brw_context *brw,
   }
}
 
-   if (target == GL_TEXTURE_CUBE_MAP) {
-  assert(depth0 == 1);
-  depth0 = 6;
-   }
+   if (target == GL_TEXTURE_CUBE_MAP)
+  assert(depth0 == 6);
 
mt->physical_width0 = width0;
mt->physical_height0 = height0;
@@ -1054,6 +1052,14 @@ intel_get_image_dims(struct gl_texture_image *image,
   *height = 1;
   *depth = image->Height;
   break;
+   case GL_TEXTURE_CUBE_MAP:
+  /* For Cube maps, the mesa/main api layer gives us a depth of 1 even
+   * though we really have 6 slices.
+   */
+  *width = image->Width;
+  *height = image->Height;
+  *depth = 6;
+  break;
default:
   *width = image->Width;
   *height = image->Height;
-- 
2.5.0.400.gff86faf

___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev


[Mesa-dev] [PATCH 3a/3] i965: Correctly set RENDER_SURFACE_STATE::Depth for cube map textures

2016-07-18 Thread Jason Ekstrand
From the Sky Lake PRM:

   "For SURFTYPE_CUBE: For Sampling Engine Surfaces and Typed Data Port
   Surfaces, the range of this field is [0,340], indicating the number of
   cube array elements (equal to the number of underlying 2D array elements
   divided by 6). For other surfaces, this field must be zero."

In other words, the depth field for cube maps is in number of cubes not
number of 2-D slices so we need to divide by 6.  It appears as if we've
been doing this wrong ever since we first added cube map arrays for Sandy
Bridge.  Also, we now need to remoe the shader hacks we've always done
since they were only needed because we were setting the depth field six
times too large.

Signed-off-by: Jason Ekstrand 
Cc: "12.0 11.2 11.1" 
---
 src/mesa/drivers/dri/i965/brw_fs_nir.cpp  | 21 +
 src/mesa/drivers/dri/i965/brw_wm_surface_state.c  |  6 +-
 src/mesa/drivers/dri/i965/gen7_wm_surface_state.c |  3 ++-
 src/mesa/drivers/dri/i965/gen8_surface_state.c|  3 ++-
 4 files changed, 14 insertions(+), 19 deletions(-)

diff --git a/src/mesa/drivers/dri/i965/brw_fs_nir.cpp 
b/src/mesa/drivers/dri/i965/brw_fs_nir.cpp
index 129984a..eeec0e2 100644
--- a/src/mesa/drivers/dri/i965/brw_fs_nir.cpp
+++ b/src/mesa/drivers/dri/i965/brw_fs_nir.cpp
@@ -4423,26 +4423,15 @@ fs_visitor::nir_emit_texture(const fs_builder &bld, 
nir_tex_instr *instr)
for (unsigned i = 0; i < dest_size; i++)
   nir_dest[i] = offset(dst, bld, i);
 
-   bool is_cube_array = instr->sampler_dim == GLSL_SAMPLER_DIM_CUBE &&
-instr->is_array;
-
if (instr->op == nir_texop_query_levels) {
   /* # levels is in .w */
   nir_dest[0] = offset(dst, bld, 3);
-   } else if (instr->op == nir_texop_txs && dest_size >= 3 &&
-  (devinfo->gen < 7 || is_cube_array)) {
+   } else if (instr->op == nir_texop_txs &&
+  dest_size >= 3 && devinfo->gen < 7) {
+  /* Gen4-6 return 0 instead of 1 for single layer surfaces. */
   fs_reg depth = offset(dst, bld, 2);
-  fs_reg fixed_depth = vgrf(glsl_type::int_type);
-
-  if (is_cube_array) {
- /* fixup #layers for cube map arrays */
- bld.emit(SHADER_OPCODE_INT_QUOTIENT, fixed_depth, depth, 
brw_imm_d(6));
-  } else if (devinfo->gen < 7) {
- /* Gen4-6 return 0 instead of 1 for single layer surfaces. */
- bld.emit_minmax(fixed_depth, depth, brw_imm_d(1), BRW_CONDITIONAL_GE);
-  }
-
-  nir_dest[2] = fixed_depth;
+  nir_dest[2] = vgrf(glsl_type::int_type);
+  bld.emit_minmax(nir_dest[2], depth, brw_imm_d(1), BRW_CONDITIONAL_GE);
}
 
bld.LOAD_PAYLOAD(get_nir_dest(instr->dest), nir_dest, dest_size, 0);
diff --git a/src/mesa/drivers/dri/i965/brw_wm_surface_state.c 
b/src/mesa/drivers/dri/i965/brw_wm_surface_state.c
index c101e05..a96eae5 100644
--- a/src/mesa/drivers/dri/i965/brw_wm_surface_state.c
+++ b/src/mesa/drivers/dri/i965/brw_wm_surface_state.c
@@ -33,6 +33,7 @@
 #include "main/context.h"
 #include "main/blend.h"
 #include "main/mtypes.h"
+#include "main/teximage.h"
 #include "main/samplerobj.h"
 #include "main/shaderimage.h"
 #include "program/prog_parameter.h"
@@ -360,8 +361,11 @@ brw_update_texture_surface(struct gl_context *ctx,
  (mt->logical_width0 - 1) << BRW_SURFACE_WIDTH_SHIFT |
  (mt->logical_height0 - 1) << BRW_SURFACE_HEIGHT_SHIFT);
 
+   const unsigned depth = mt->logical_depth0 /
+  (_mesa_is_cube_map_texture(tObj->Target) ? 6 : 1);
+
surf[3] = (brw_get_surface_tiling_bits(mt->tiling) |
- (mt->logical_depth0 - 1) << BRW_SURFACE_DEPTH_SHIFT |
+ (depth - 1) << BRW_SURFACE_DEPTH_SHIFT |
  (mt->pitch - 1) << BRW_SURFACE_PITCH_SHIFT);
 
const unsigned min_lod = tObj->MinLevel + tObj->BaseLevel - mt->first_level;
diff --git a/src/mesa/drivers/dri/i965/gen7_wm_surface_state.c 
b/src/mesa/drivers/dri/i965/gen7_wm_surface_state.c
index 932e62e..f4a88f3 100644
--- a/src/mesa/drivers/dri/i965/gen7_wm_surface_state.c
+++ b/src/mesa/drivers/dri/i965/gen7_wm_surface_state.c
@@ -276,7 +276,8 @@ gen7_emit_texture_surface_state(struct brw_context *brw,
 int surf_index /* unused */,
 bool rw, bool for_gather)
 {
-   const unsigned depth = max_layer - min_layer;
+   const unsigned depth = (max_layer - min_layer) /
+  (_mesa_is_cube_map_texture(target) ? 6 : 1);
uint32_t *surf = brw_state_batch(brw, AUB_TRACE_SURFACE_STATE,
 8 * 4, 32, surf_offset);
 
diff --git a/src/mesa/drivers/dri/i965/gen8_surface_state.c 
b/src/mesa/drivers/dri/i965/gen8_surface_state.c
index bd9e2a1..89ea8cc 100644
--- a/src/mesa/drivers/dri/i965/gen8_surface_state.c
+++ b/src/mesa/drivers/dri/i965/gen8_surface_state.c
@@ -235,7 +235,8 @@ gen8_emit_texture_surface_state(struct brw_context *brw,
 uint32_t *surf_offset, int surf_index,
 bool rw, bool

[Mesa-dev] [PATCH 3b/3] i965: Stop muging cube array lengths by 6

2016-07-18 Thread Jason Ekstrand
From the Sky Lake PRM:

   "For SURFTYPE_CUBE: For Sampling Engine Surfaces and Typed Data Port
   Surfaces, the range of this field is [0,340], indicating the number of
   cube array elements (equal to the number of underlying 2D array elements
   divided by 6). For other surfaces, this field must be zero."

In other words, the depth field for cube maps is in number of cubes not
number of 2-D slices so we need to divide by 6.  ISL will do this correctly
for us assuming that we provide it with the correct array bounds which it
expects to be in 2-D slices.  It appears as if we've been doing this wrong
ever since we first added cube map arrays for Sandy Bridge and the change
to ISL made things slightly worse.  While we're at it, we now need to remoe
the shader hacks we've always done since they were only needed because we
were setting the depth field six times too large.

Signed-off-by: Jason Ekstrand 
---
 src/mesa/drivers/dri/i965/brw_fs_nir.cpp | 21 +
 src/mesa/drivers/dri/i965/brw_wm_surface_state.c |  4 +---
 2 files changed, 6 insertions(+), 19 deletions(-)

diff --git a/src/mesa/drivers/dri/i965/brw_fs_nir.cpp 
b/src/mesa/drivers/dri/i965/brw_fs_nir.cpp
index 6265dc6..14a8adb 100644
--- a/src/mesa/drivers/dri/i965/brw_fs_nir.cpp
+++ b/src/mesa/drivers/dri/i965/brw_fs_nir.cpp
@@ -4457,26 +4457,15 @@ fs_visitor::nir_emit_texture(const fs_builder &bld, 
nir_tex_instr *instr)
for (unsigned i = 0; i < dest_size; i++)
   nir_dest[i] = offset(dst, bld, i);
 
-   bool is_cube_array = instr->sampler_dim == GLSL_SAMPLER_DIM_CUBE &&
-instr->is_array;
-
if (instr->op == nir_texop_query_levels) {
   /* # levels is in .w */
   nir_dest[0] = offset(dst, bld, 3);
-   } else if (instr->op == nir_texop_txs && dest_size >= 3 &&
-  (devinfo->gen < 7 || is_cube_array)) {
+   } else if (instr->op == nir_texop_txs &&
+  dest_size >= 3 && devinfo->gen < 7) {
+  /* Gen4-6 return 0 instead of 1 for single layer surfaces. */
   fs_reg depth = offset(dst, bld, 2);
-  fs_reg fixed_depth = vgrf(glsl_type::int_type);
-
-  if (is_cube_array) {
- /* fixup #layers for cube map arrays */
- bld.emit(SHADER_OPCODE_INT_QUOTIENT, fixed_depth, depth, 
brw_imm_d(6));
-  } else if (devinfo->gen < 7) {
- /* Gen4-6 return 0 instead of 1 for single layer surfaces. */
- bld.emit_minmax(fixed_depth, depth, brw_imm_d(1), BRW_CONDITIONAL_GE);
-  }
-
-  nir_dest[2] = fixed_depth;
+  nir_dest[2] = vgrf(glsl_type::int_type);
+  bld.emit_minmax(nir_dest[2], depth, brw_imm_d(1), BRW_CONDITIONAL_GE);
}
 
bld.LOAD_PAYLOAD(get_nir_dest(instr->dest), nir_dest, dest_size, 0);
diff --git a/src/mesa/drivers/dri/i965/brw_wm_surface_state.c 
b/src/mesa/drivers/dri/i965/brw_wm_surface_state.c
index d896789..e5a3acf 100644
--- a/src/mesa/drivers/dri/i965/brw_wm_surface_state.c
+++ b/src/mesa/drivers/dri/i965/brw_wm_surface_state.c
@@ -396,11 +396,9 @@ brw_update_texture_surface(struct gl_context *ctx,
   /* If this is a view with restricted NumLayers, then our effective depth
* is not just the miptree depth.
*/
-  const unsigned mt_num_layers =
- mt->logical_depth0 * (_mesa_is_cube_map_texture(mt->target) ? 6 : 1);
   const unsigned view_num_layers =
  (obj->Immutable && obj->Target != GL_TEXTURE_3D) ? obj->NumLayers :
-mt_num_layers;
+mt->logical_depth0;
 
   /* Handling GL_ALPHA as a surface format override breaks 1.30+ style
* texturing functions that return a float, as our code generation always
-- 
2.5.0.400.gff86faf

___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev


[Mesa-dev] [PATCH 1/3] i965: Use intel_get_image_dims in alloc_texture_storage

2016-07-18 Thread Jason Ekstrand
The intel_get_image_dims helper function handles some image dimension
sanitization for us for things such as 1-D array textures.  We should
probably be using it here.

Signed-off-by: Jason Ekstrand 
Cc: "12.0 11.2 11.1" 
---
 src/mesa/drivers/dri/i965/intel_tex.c | 2 ++
 1 file changed, 2 insertions(+)

diff --git a/src/mesa/drivers/dri/i965/intel_tex.c 
b/src/mesa/drivers/dri/i965/intel_tex.c
index 8c32fe3..d3e24f4 100644
--- a/src/mesa/drivers/dri/i965/intel_tex.c
+++ b/src/mesa/drivers/dri/i965/intel_tex.c
@@ -141,6 +141,8 @@ intel_alloc_texture_storage(struct gl_context *ctx,
!intel_miptree_match_image(intel_texobj->mt, first_image) ||
intel_texobj->mt->last_level != levels - 1) {
   intel_miptree_release(&intel_texobj->mt);
+
+  intel_get_image_dims(first_image, &width, &height, &depth);
   intel_texobj->mt = intel_miptree_create(brw, texobj->Target,
   first_image->TexFormat,
   0, levels - 1,
-- 
2.5.0.400.gff86faf

___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev


[Mesa-dev] [PATCH 0/3] i965: Correctly set RENDER_SURFACE_STATE::Depth for

2016-07-18 Thread Jason Ekstrand
From the Sky Lake PRM:

   "For SURFTYPE_CUBE: For Sampling Engine Surfaces and Typed Data Port
   Surfaces, the range of this field is [0,340], indicating the number of
   cube array elements (equal to the number of underlying 2D array elements
   divided by 6). For other surfaces, this field must be zero."

In other words, the depth field for cube maps is in number of cubes not
number of 2-D slices so we need to divide by 6.  It appears as if we've
been doing this wrong ever since we first added cube map arrays for Sandy
Bridge.  We've also had a shader hack to divide the size Z dimension of
cube maps by 6 in the textureSize call.  This is completely bogus and the
only reason for it is that we've been setting the depth six times too
large.

This little series fixes this.  In order to keep things back-portable,
patch 3 comes in two versions.  Version (a) is based on pre-ISL and should
be backportable to 12.0 or maybe even 11.2 or 11.1.  Version (b) on the
other hand is based on top of the ISL work and can be applied on master.

Cc: Emil Velikov 

Jason Ekstrand (3):
  i965: Use intel_get_image_dims in alloc_texture_storage
  i965/miptree: Set logical_depth0 == 6 for cube maps
  i965: Correctly set RENDER_SURFACE_STATE::Depth for cube map textures

 src/mesa/drivers/dri/i965/brw_fs_nir.cpp  | 21 +
 src/mesa/drivers/dri/i965/brw_wm_surface_state.c  |  6 +-
 src/mesa/drivers/dri/i965/gen7_wm_surface_state.c |  3 ++-
 src/mesa/drivers/dri/i965/gen8_surface_state.c|  3 ++-
 src/mesa/drivers/dri/i965/intel_mipmap_tree.c | 14 ++
 src/mesa/drivers/dri/i965/intel_tex.c |  2 ++
 6 files changed, 26 insertions(+), 23 deletions(-)

-- 
2.5.0.400.gff86faf

___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev


[Mesa-dev] [Bug 96950] Another regression from bc4e0c486: vbo: Use a bitmask to track the active arrays in vbo_exec*.

2016-07-18 Thread bugzilla-daemon
https://bugs.freedesktop.org/show_bug.cgi?id=96950

--- Comment #5 from Mathias Fröhlich  ---
Or Can you provide an apitrace?
I have no such system to reproduce at hands.

-- 
You are receiving this mail because:
You are the assignee for the bug.
You are the QA Contact for the bug.___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev


Re: [Mesa-dev] [PATCH 7/7] i965: Delete the FS_OPCODE_INTERPOLATE_AT_CENTROID virtual opcode.

2016-07-18 Thread Chris Forbes
I remember arguing about this when it got added -- tradeoff was payload
size/register pressure vs needing to call out to this unit, if centroid
barycentric coords weren't required for anything else? It does seem fairly
pointless, though.

For the series:-

Reviewed-by: Chris Forbes 

On Tue, Jul 19, 2016 at 8:26 AM, Kenneth Graunke 
wrote:

> We no longer use this message.  As far as I can tell, it's fairly
> useless - the equivalent information is provided in the payload.
>
> Signed-off-by: Kenneth Graunke 
> ---
>  src/mesa/drivers/dri/i965/brw_defines.h| 1 -
>  src/mesa/drivers/dri/i965/brw_fs.cpp   | 2 --
>  src/mesa/drivers/dri/i965/brw_fs_generator.cpp | 5 -
>  src/mesa/drivers/dri/i965/brw_shader.cpp   | 2 --
>  4 files changed, 10 deletions(-)
>
> diff --git a/src/mesa/drivers/dri/i965/brw_defines.h
> b/src/mesa/drivers/dri/i965/brw_defines.h
> index b5a259e..2814fa7 100644
> --- a/src/mesa/drivers/dri/i965/brw_defines.h
> +++ b/src/mesa/drivers/dri/i965/brw_defines.h
> @@ -1120,7 +1120,6 @@ enum opcode {
> FS_OPCODE_UNPACK_HALF_2x16_SPLIT_X,
> FS_OPCODE_UNPACK_HALF_2x16_SPLIT_Y,
> FS_OPCODE_PLACEHOLDER_HALT,
> -   FS_OPCODE_INTERPOLATE_AT_CENTROID,
> FS_OPCODE_INTERPOLATE_AT_SAMPLE,
> FS_OPCODE_INTERPOLATE_AT_SHARED_OFFSET,
> FS_OPCODE_INTERPOLATE_AT_PER_SLOT_OFFSET,
> diff --git a/src/mesa/drivers/dri/i965/brw_fs.cpp
> b/src/mesa/drivers/dri/i965/brw_fs.cpp
> index 06007fe..120d6dd 100644
> --- a/src/mesa/drivers/dri/i965/brw_fs.cpp
> +++ b/src/mesa/drivers/dri/i965/brw_fs.cpp
> @@ -250,7 +250,6 @@ fs_inst::is_send_from_grf() const
> switch (opcode) {
> case FS_OPCODE_VARYING_PULL_CONSTANT_LOAD_GEN7:
> case SHADER_OPCODE_SHADER_TIME_ADD:
> -   case FS_OPCODE_INTERPOLATE_AT_CENTROID:
> case FS_OPCODE_INTERPOLATE_AT_SAMPLE:
> case FS_OPCODE_INTERPOLATE_AT_SHARED_OFFSET:
> case FS_OPCODE_INTERPOLATE_AT_PER_SLOT_OFFSET:
> @@ -4785,7 +4784,6 @@ get_lowered_simd_width(const struct brw_device_info
> *devinfo,
> case FS_OPCODE_PACK_HALF_2x16_SPLIT:
> case FS_OPCODE_UNPACK_HALF_2x16_SPLIT_X:
> case FS_OPCODE_UNPACK_HALF_2x16_SPLIT_Y:
> -   case FS_OPCODE_INTERPOLATE_AT_CENTROID:
> case FS_OPCODE_INTERPOLATE_AT_SAMPLE:
> case FS_OPCODE_INTERPOLATE_AT_SHARED_OFFSET:
> case FS_OPCODE_INTERPOLATE_AT_PER_SLOT_OFFSET:
> diff --git a/src/mesa/drivers/dri/i965/brw_fs_generator.cpp
> b/src/mesa/drivers/dri/i965/brw_fs_generator.cpp
> index 1e9c7da..a390184 100644
> --- a/src/mesa/drivers/dri/i965/brw_fs_generator.cpp
> +++ b/src/mesa/drivers/dri/i965/brw_fs_generator.cpp
> @@ -2054,11 +2054,6 @@ fs_generator::generate_code(const cfg_t *cfg, int
> dispatch_width)
>   }
>   break;
>
> -  case FS_OPCODE_INTERPOLATE_AT_CENTROID:
> - generate_pixel_interpolator_query(inst, dst, src[0], src[1],
> -
>  GEN7_PIXEL_INTERPOLATOR_LOC_CENTROID);
> - break;
> -
>case FS_OPCODE_INTERPOLATE_AT_SAMPLE:
>   generate_pixel_interpolator_query(inst, dst, src[0], src[1],
>
> GEN7_PIXEL_INTERPOLATOR_LOC_SAMPLE);
> diff --git a/src/mesa/drivers/dri/i965/brw_shader.cpp
> b/src/mesa/drivers/dri/i965/brw_shader.cpp
> index f3b5487..559e44c 100644
> --- a/src/mesa/drivers/dri/i965/brw_shader.cpp
> +++ b/src/mesa/drivers/dri/i965/brw_shader.cpp
> @@ -367,8 +367,6 @@ brw_instruction_name(const struct brw_device_info
> *devinfo, enum opcode op)
> case FS_OPCODE_PLACEHOLDER_HALT:
>return "placeholder_halt";
>
> -   case FS_OPCODE_INTERPOLATE_AT_CENTROID:
> -  return "interp_centroid";
> case FS_OPCODE_INTERPOLATE_AT_SAMPLE:
>return "interp_sample";
> case FS_OPCODE_INTERPOLATE_AT_SHARED_OFFSET:
> --
> 2.9.0
>
> ___
> mesa-dev mailing list
> mesa-dev@lists.freedesktop.org
> https://lists.freedesktop.org/mailman/listinfo/mesa-dev
>
___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev


Re: [Mesa-dev] [PATCH 2/7] nir: Add a nir_lower_io flag for using load_interpolated_input intrins.

2016-07-18 Thread Chris Forbes
Seems a little unfortunate to add a random bool to this interface which is
otherwise fairly descriptive, but OK.

On Tue, Jul 19, 2016 at 8:26 AM, Kenneth Graunke 
wrote:

> While my intention is that the new intrinsics should be usable by all
> drivers, we need to make them optional until all drivers switch.
>
> This doesn't do anything yet, but I added it as a separate patch to
> keep the interface churn separate for easier review.
>
> Signed-off-by: Kenneth Graunke 
> ---
>  src/compiler/nir/nir.h  |  3 ++-
>  src/compiler/nir/nir_lower_io.c | 15 +++
>  src/gallium/drivers/freedreno/ir3/ir3_cmdline.c |  2 +-
>  src/mesa/drivers/dri/i965/brw_blorp.c   |  2 +-
>  src/mesa/drivers/dri/i965/brw_nir.c | 18 +-
>  src/mesa/drivers/dri/i965/brw_program.c |  4 ++--
>  src/mesa/state_tracker/st_glsl_to_nir.cpp   |  2 +-
>  7 files changed, 27 insertions(+), 19 deletions(-)
>
> diff --git a/src/compiler/nir/nir.h b/src/compiler/nir/nir.h
> index ac11998..e996e0e 100644
> --- a/src/compiler/nir/nir.h
> +++ b/src/compiler/nir/nir.h
> @@ -2324,7 +2324,8 @@ void nir_assign_var_locations(struct exec_list
> *var_list, unsigned *size,
>
>  void nir_lower_io(nir_shader *shader,
>nir_variable_mode modes,
> -  int (*type_size)(const struct glsl_type *));
> +  int (*type_size)(const struct glsl_type *),
> +  bool use_load_interpolated_input_intrinsics);
>  nir_src *nir_get_io_offset_src(nir_intrinsic_instr *instr);
>  nir_src *nir_get_io_vertex_index_src(nir_intrinsic_instr *instr);
>
> diff --git a/src/compiler/nir/nir_lower_io.c
> b/src/compiler/nir/nir_lower_io.c
> index b05a73f..aa8a517 100644
> --- a/src/compiler/nir/nir_lower_io.c
> +++ b/src/compiler/nir/nir_lower_io.c
> @@ -39,6 +39,7 @@ struct lower_io_state {
> void *mem_ctx;
> int (*type_size)(const struct glsl_type *type);
> nir_variable_mode modes;
> +   bool use_interpolated_input;
>  };
>
>  void
> @@ -394,7 +395,8 @@ nir_lower_io_block(nir_block *block,
>  static void
>  nir_lower_io_impl(nir_function_impl *impl,
>nir_variable_mode modes,
> -  int (*type_size)(const struct glsl_type *))
> +  int (*type_size)(const struct glsl_type *),
> +  bool use_interpolated_input)
>  {
> struct lower_io_state state;
>
> @@ -402,6 +404,7 @@ nir_lower_io_impl(nir_function_impl *impl,
> state.mem_ctx = ralloc_parent(impl);
> state.modes = modes;
> state.type_size = type_size;
> +   state.use_interpolated_input = use_interpolated_input;
>
> nir_foreach_block(block, impl) {
>nir_lower_io_block(block, &state);
> @@ -413,11 +416,15 @@ nir_lower_io_impl(nir_function_impl *impl,
>
>  void
>  nir_lower_io(nir_shader *shader, nir_variable_mode modes,
> - int (*type_size)(const struct glsl_type *))
> + int (*type_size)(const struct glsl_type *),
> + bool use_interpolated_input)
>  {
> nir_foreach_function(function, shader) {
> -  if (function->impl)
> - nir_lower_io_impl(function->impl, modes, type_size);
> +  if (function->impl) {
> + nir_lower_io_impl(function->impl, modes, type_size,
> +   use_interpolated_input &&
> +   shader->stage == MESA_SHADER_FRAGMENT);
> +  }
> }
>  }
>
> diff --git a/src/gallium/drivers/freedreno/ir3/ir3_cmdline.c
> b/src/gallium/drivers/freedreno/ir3/ir3_cmdline.c
> index 41532fc..a8a8c1b 100644
> --- a/src/gallium/drivers/freedreno/ir3/ir3_cmdline.c
> +++ b/src/gallium/drivers/freedreno/ir3/ir3_cmdline.c
> @@ -93,7 +93,7 @@ load_glsl(unsigned num_files, char* const* files,
> gl_shader_stage stage)
> // TODO nir_assign_var_locations??
>
> NIR_PASS_V(nir, nir_lower_system_values);
> -   NIR_PASS_V(nir, nir_lower_io, nir_var_all, st_glsl_type_size);
> +   NIR_PASS_V(nir, nir_lower_io, nir_var_all, st_glsl_type_size,
> false);
> NIR_PASS_V(nir, nir_lower_samplers, prog);
>
> return nir;
> diff --git a/src/mesa/drivers/dri/i965/brw_blorp.c
> b/src/mesa/drivers/dri/i965/brw_blorp.c
> index 282a5b2..0473cfe 100644
> --- a/src/mesa/drivers/dri/i965/brw_blorp.c
> +++ b/src/mesa/drivers/dri/i965/brw_blorp.c
> @@ -209,7 +209,7 @@ brw_blorp_compile_nir_shader(struct brw_context *brw,
> struct nir_shader *nir,
>unsigned end = var->data.location +
> nir_uniform_type_size(var->type);
>nir->num_uniforms = MAX2(nir->num_uniforms, end);
> }
> -   nir_lower_io(nir, nir_var_uniform, nir_uniform_type_size);
> +   nir_lower_io(nir, nir_var_uniform, nir_uniform_type_size, false);
>
> const unsigned *program =
>brw_compile_fs(compiler, brw, mem_ctx, wm_key, &wm_prog_data, nir,
> diff --git a/src/mesa/drivers/dri/i965/brw_nir.c
> b/src/mesa/drivers/dri/i965/brw_nir.c
> index 6c3e1d1..caf9fe0 100644
> --- a/src/mesa/dri

Re: [Mesa-dev] [PATCH 6/7] i965: Rewrite FS input handling to use the new NIR intrinsics.

2016-07-18 Thread Chris Forbes
On Tue, Jul 19, 2016 at 8:26 AM, Kenneth Graunke 
wrote:

> +   default:
> +  assert(!"invalid intrinsic");
>

unreachable() ?
___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev


Re: [Mesa-dev] [PATCH 00/10] egl/android: Improve the Android EGL backend

2016-07-18 Thread Tomasz Figa
On Tue, Jul 19, 2016 at 12:35 PM, Rob Herring  wrote:
> On Fri, Jul 15, 2016 at 2:53 AM, Tomasz Figa  wrote:
>> Hi,
>>
>> This series is a collection of various fixes and extensions we came up
>> with during our attempt to use Mesa for Android.
>>
>> Fixes included in this series:
>>  - added mandatory EGL_MAX_PBUFFER_WIDTH and _HEIGHT attributes to EGL
>>configs,
>>  - fixed multiple issues with handling pbuffers in the backend,
>>  - found and fixed a DRI image leak,
>>  - made the implementation of DRI image loader .getBuffers callback
>>conform better to the extension semantics.
>>
>> New features added by this series:
>>  - possibility to build the Android EGL platform without drm_gralloc
>>headers,
>>  - support for creating EGL images from Android native buffers with
>>YV12 pixel format (prime-only),
>>  - fallback to kms_swrast driver when no hardware driver can be loaded
>>but there is still some usable DRI node present in the system.
>>  - more logging in case of errors to help diagnosing problems.
>>
>> Testing was done using classic i965 (gen 8) and gallium softpipe drivers
>> on an internal build of Android, based on gralloc backed by a DRM render
>> node and sharing buffers by PRIME FDs.
>
> I've tested out patches 1-6 with virgl and I don't get anything
> displayed. I get this message:
>
> EGL-DRI2: Front buffer is not supported for window surfaces
>
> That's as far as I investigated. I'll look into it some more tomorrow.

Thanks a lot for testing!

It looks like somehow your driver (or gallium) is triggering a call to
DRI image loader getBuffers() callback with front buffer bit set in
the image mask, but window surfaces on Android provide only back
buffers.

My understanding of the semantics was that the callback should deny
such requests, so that's how I implemented it. However it isn't really
well documented, so potentially it should only provide buffers that
are available and ignore the rest without bailing out. Could someone
more familiar with this extension comment on this?

>
> Patches 7-10 wouldn't apply. Do you have a git tree with the series?

Hmm, I rebased them on Mesa master just before sending. Let me try to
create a sandbox branch in our chromium tree.

Best regards,
Tomasz
___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev


Re: [Mesa-dev] [PATCH 00/10] egl/android: Improve the Android EGL backend

2016-07-18 Thread Rob Herring
On Fri, Jul 15, 2016 at 2:53 AM, Tomasz Figa  wrote:
> Hi,
>
> This series is a collection of various fixes and extensions we came up
> with during our attempt to use Mesa for Android.
>
> Fixes included in this series:
>  - added mandatory EGL_MAX_PBUFFER_WIDTH and _HEIGHT attributes to EGL
>configs,
>  - fixed multiple issues with handling pbuffers in the backend,
>  - found and fixed a DRI image leak,
>  - made the implementation of DRI image loader .getBuffers callback
>conform better to the extension semantics.
>
> New features added by this series:
>  - possibility to build the Android EGL platform without drm_gralloc
>headers,
>  - support for creating EGL images from Android native buffers with
>YV12 pixel format (prime-only),
>  - fallback to kms_swrast driver when no hardware driver can be loaded
>but there is still some usable DRI node present in the system.
>  - more logging in case of errors to help diagnosing problems.
>
> Testing was done using classic i965 (gen 8) and gallium softpipe drivers
> on an internal build of Android, based on gralloc backed by a DRM render
> node and sharing buffers by PRIME FDs.

I've tested out patches 1-6 with virgl and I don't get anything
displayed. I get this message:

EGL-DRI2: Front buffer is not supported for window surfaces

That's as far as I investigated. I'll look into it some more tomorrow.

Patches 7-10 wouldn't apply. Do you have a git tree with the series?

Rob
___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev


Re: [Mesa-dev] [PATCH 07/10] egl/android: Make drm_gralloc headers optional

2016-07-18 Thread Tomasz Figa
On Tue, Jul 19, 2016 at 2:35 AM, Emil Velikov  wrote:
> On 18 July 2016 at 16:38, Tomasz Figa  wrote:
>> On Mon, Jul 18, 2016 at 11:58 PM, Emil Velikov  
>> wrote:
>>> On 18 July 2016 at 13:02, Tomasz Figa  wrote:
 On Mon, Jul 18, 2016 at 7:28 PM, Emil Velikov  
 wrote:
> Hi Tomasz,
>
> On 15 July 2016 at 08:53, Tomasz Figa  wrote:
>
>> +#define DRM_RENDER_DEV_NAME  "%s/renderD%d"
>> +
>> +static int
>> +droid_open_device(_EGLDisplay *dpy)
>> +{
>> +   struct dri2_egl_display *dri2_dpy = dpy->DriverData;
>> +   const int limit = 64;
>> +   const int base = 128;
>> +   int fd;
>> +   int i;
>> +
>> +   for (i = 0; i < limit; ++i) {
>> +  char *card_path;
>> +  if (asprintf(&card_path, DRM_RENDER_DEV_NAME, DRM_DIR_NAME, base 
>> + i) < 0)
> Why do we need any of this ? What gralloc implementation are you guys 
> using ?

 We are using our heavily rewritten fork of some old drm_gralloc
 release. It supports only render nodes and PRIME FDs and doesn't
 export the DRI device FD outside of its internals (which isn't
 actually even fully correct, at least for PRIME and render nodes, see
 my reply to Rob's comments).

>>> That explain it, since https://chromium.googlesource.com/ does not
>>> have gralloc, and
>>> https://android.googlesource.com/platform/external/drm_gralloc/ has
>>> both the DRM_FD define and the gem/flink function(s)?
>>>
>>> Can I suggest porting the fd drm_gralloc/gbm_gralloc patches to your
>>> private copy/repo. This way we'll have some consistency throughout
>>> gralloc implementations
>>
>> I'd prefer if any code using flink names was not added back. On top of
>> that, our drm_gralloc doesn't really have much in common with that
>> from android-x86 anymore (as I said, it was heavily rewritten) and
>> there is not even a chance that with its current design flink names
>> could even work.
>>
>> Also I'm wondering why we want to consider current brokenness of
>> drm_gralloc as something to be consistent with. It's supposed to be a
>> HAL library providing an uniform abstraction, but it exports private
>> APIs on the side instead. Moreover, as I mentioned before, flink names
>> are considered insecure and it would be really much better if we could
>> just forget about them.
>>
>>> and you can use gbm_gralloc directly in the
>>> (hopefully) not too distant future.
>>
>> I agree with this part, though. gbm_gralloc is definitely something
>> that we might want to migrate to in the future. Although it's a bit
>> lacking at the moment, so it might need a bit more time to develop the
>> missing bits. [I'm CCing Gurchetan, who was investigating GBM-backed
>> gralloc usable for our purposes.]
>>
>> In any case, the missing flink API is quite easy to handle and can be
>> just stubbed out in a local header as you suggested. I don't think it
>> would hurt anyone and would definitely help us and anyone not willing
>> to export any private APIs from their gralloc and rely only on the
>> public HAL API.
>>
> Looks like I wasn't clear enough here, realyl sorry about that. No
> objection on nuking _any_ of the gem/flink paths, but hoping to have
> the behaviour consistent with the one described in
> get_native_buffer_fd.

Did you mean having the PRIME FD in native_handle_t::data[0]?

If so, it's more or less guaranteed by the API, because all file
descriptors in handle have to be stored in first N (equals to
native_handle_t::numFds) ints of native_handle_t::data[] for
respective general code to properly transfer the FDs through binder
when sharing between processes.

Our gralloc currently supports only one PRIME FD per buffer (no
separate memory planes for planar YUV) and stores it exactly in
native_handle_t::data[0].

>
>>>
>
> Afaict the latter must provide reasonable result for
> hw_get_module(GRALLOC_HARDWARE_MODULE_ID...) and as it's missing the
> perform hook existing code should work just fine. Right ?

 Existing code would fail with -1 as file descriptor, wouldn't it? Or
 I'm failing to see something?

>>> Nope you're spot on - I had a dull moment. May I suggest revering the
>>> patch which removed the GRALLOC_MODULE_PERFORM_GET_DRM_FD handling in
>>> your gralloc ? Reason being is that the proposed code is very 'flaky'
>>> and can open the wrong render node on systems which have more than
>>> one.
>>
>> I think the answer is a bit of yes and no at the same time.
>>
>> Starting with no, it's incorrect for gralloc to share the DRI device
>> FD with Mesa for multiple reasons:
>>  - there are cases when the allocator used is different that the render node,
> Can you please provide an example how the current open-source
> gralloc/EGL stack might hit this ? Only a mix of closed and
> open-source components comes to mind :-\

Well, yes. I don't think I can hide the fact that we have to use
closed source components on some platforms. To put it simply,

Re: [Mesa-dev] Required Mako version? (WAS: mesa from git fails to compile)

2016-07-18 Thread Kenneth Graunke
On Monday, July 18, 2016 10:58:25 PM PDT Pali Rohár wrote:
> Any conclusion or fix for this issue?

Dylan suggested you use pip --user to install a newer version of Mako.
Is that a workable solution to your problem?

I'm having a hard time getting excited about making upstream support
building against 2011-era software, when doing so makes transitioning
to Python 3 harder (which is useful for forward-looking distros).
Especially when Debian stable and the last two Ubuntu LTS releases
(2016 and 2014) ship a recent enough version...it's just the LTS
three back (2012) that doesn't work...

--Ken


signature.asc
Description: This is a digitally signed message part.
___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev


Re: [Mesa-dev] [PATCH] gallium/util: Fix debug_printf under Haiku

2016-07-18 Thread Alexander von Gluck IV
July 18 2016 1:10 PM, "Emil Velikov"  wrote:
> On 18 July 2016 at 16:28, Alexander von Gluck IV  
> wrote:
> 
>> July 18 2016 9:20 AM, "Emil Velikov"  wrote:
>>> On 18 July 2016 at 14:39, Alexander von Gluck IV  
>>> wrote:
>>> 
 July 18 2016 3:29 AM, "Nicolai Hähnle"  wrote:
> A comment further up in the same file says
> 
> /* Haiku provides debug_printf in libroot with OS.h */
> 
> Is that no longer true?
> 
> Nicolai
> 
> On 16.07.2016 16:27, Alexander von Gluck IV wrote:
> 
>> ---
>> src/gallium/auxiliary/util/u_debug.h | 5 -
>> 1 file changed, 4 insertions(+), 1 deletion(-)
>> 
>> diff --git a/src/gallium/auxiliary/util/u_debug.h 
>> b/src/gallium/auxiliary/util/u_debug.h
>> index 7da7f53..7dc4ce8 100644
>> --- a/src/gallium/auxiliary/util/u_debug.h
>> +++ b/src/gallium/auxiliary/util/u_debug.h
>> @@ -83,7 +83,10 @@ _debug_printf(const char *format, ...)
>> * - avoid outputing large strings (512 bytes is the current maximum 
>> length
>> * that is guaranteed to be printed in all platforms)
>> */
>> -#if !defined(PIPE_OS_HAIKU)
>> +#if defined(PIPE_OS_HAIKU)
>> +void
>> +debug_printf(const char *format, ...) _util_printf_format(1,2);
>> +#else
>> static inline void
>> debug_printf(const char *format, ...) _util_printf_format(1,2);
>>> 
>>> Hmm I moved the include further up with commit
>>> 373f118c6c750d717fd0727fc3fc191828714c6f although that should not have
>>> made any difference, barring fragile include file order. Can you check
>>> if reverting the u_debug.h gets you up and running ? If so can you
>>> please:
>>> - Please add the stable tag Cc: 
>>> - Attempt to straighten the includes (it might be mesa, llvm and/or
>>> Haiku that is getting confused)
>>> 
 It's still true, however without the _util_printf_format I get odd llvm
 symbol errors.
>>> 
>>> I would suspect that the above is in play, but without details
>>> (build/error log) little to no one will be able to tell you if this is
>>> the correct fix, I'm afraid.
>> 
>> gcc 5.4.0 / llvm 3.8.0
>> Sorry, I wasn't near the machine, here is the error without any changes:
>> 
>> src/gallium/auxiliary/gallivm/lp_bld_assert.c: In function 'lp_assert':
>> src/gallium/auxiliary/gallivm/lp_bld_assert.c:43:7: warning: implicit 
>> declaration of function
>> 'debug_printf' [-Wimplicit-function-declaration]
>> debug_printf("LLVM assertion '%s' failed!\n", msg);
>> ^
> 
> Ok, this happens as PIPE_OS_HAIKU isn't defined that early in
> u_debug.h, thus the header is not included
> 
>> Compiling src/gallium/auxiliary/gallivm/lp_bld_const.c ...
>> Compiling src/gallium/auxiliary/gallivm/lp_bld_conv.c ...
>> Compiling src/gallium/auxiliary/gallivm/lp_bld_debug.cpp ...
>> Compiling src/gallium/auxiliary/gallivm/lp_bld_flow.c ...
>> Compiling src/gallium/auxiliary/gallivm/lp_bld_format_aos_array.c ...
>> Compiling src/gallium/auxiliary/gallivm/lp_bld_format_aos.c ...
>> Compiling src/gallium/auxiliary/gallivm/lp_bld_format_cached.c ...
>> Compiling src/gallium/auxiliary/gallivm/lp_bld_format_float.c ...
>> Compiling src/gallium/auxiliary/gallivm/lp_bld_format.c ...
>> Compiling src/gallium/auxiliary/gallivm/lp_bld_format_soa.c ...
>> Compiling src/gallium/auxiliary/gallivm/lp_bld_format_srgb.c ...
>> Compiling src/gallium/auxiliary/gallivm/lp_bld_format_yuv.c ...
>> Compiling src/gallium/auxiliary/gallivm/lp_bld_gather.c ...
>> Compiling src/gallium/auxiliary/gallivm/lp_bld_init.c ...
>> Compiling src/gallium/auxiliary/gallivm/lp_bld_intr.c ...
>> src/gallium/auxiliary/gallivm/lp_bld_intr.c: In function 
>> 'lp_build_intrinsic_binary_anylength':
>> src/gallium/auxiliary/gallivm/lp_bld_intr.c:252:10: warning: implicit 
>> declaration of function
>> 'debug_printf' [-Wimplicit-function-declaration]
>> debug_printf("%s: should handle arbitrary vector size\n",
>> ^
>> Compiling src/gallium/auxiliary/gallivm/lp_bld_logic.c ...
>> Compiling src/gallium/auxiliary/gallivm/lp_bld_misc.cpp ...
>> Compiling src/gallium/auxiliary/gallivm/lp_bld_pack.c ...
>> Compiling src/gallium/auxiliary/gallivm/lp_bld_printf.c ...
>> src/gallium/auxiliary/gallivm/lp_bld_printf.c: In function 
>> 'lp_build_print_args':
>> src/gallium/auxiliary/gallivm/lp_bld_printf.c:68:84: error: 'debug_printf' 
>> undeclared (first use in
>> this function)
>> func_printf = lp_build_const_int_pointer(gallivm, 
>> func_to_pointer((func_pointer)debug_printf));
>> ^
>> src/gallium/auxiliary/gallivm/lp_bld_printf.c:68:84: note: each undeclared 
>> identifier is reported
>> only once for each function it appears in
>> scons: *** 
>> [build/haiku-x86_64-debug/gallium/auxiliary/gallivm/lp_bld_printf.os] Error 1
>> 
>> debug_printf is definitely declared however (and it should be all c code, no 
>> C++ thus no mangling)
>> 
 The linux code just below defines debug_printf twice as well:
 
 https://cgit.freedesktop.org/mesa/mesa/tree/src/gallium/auxiliary/util/

[Mesa-dev] [PATCH] i965: Use tex_mocs instead of rb_mocs for GL images.

2016-07-18 Thread Kenneth Graunke
Fixes a 10-20% performance regression in OglCSDof caused by commit
5a8c89038abab0184ea72664ab390ec6ca58b4d6, which made images (in the
image load/store sense) use BDW_MOCS_PTE instead of BDW_MOCS_WB.

This seems sketchy, as the default PTE value is supposed to be
WB LLC eLLC, which is the same as our MOCS WB setting.  It's only
supposed to change when using a surface for display, which won't
ever happen for images.  Something may be wrong in the kernel...

Signed-off-by: Kenneth Graunke 
Reviewed-by: Jason Ekstrand 
---
 src/mesa/drivers/dri/i965/brw_wm_surface_state.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/src/mesa/drivers/dri/i965/brw_wm_surface_state.c 
b/src/mesa/drivers/dri/i965/brw_wm_surface_state.c
index d896789..87f8601 100644
--- a/src/mesa/drivers/dri/i965/brw_wm_surface_state.c
+++ b/src/mesa/drivers/dri/i965/brw_wm_surface_state.c
@@ -1516,7 +1516,7 @@ update_image_surface(struct brw_context *brw,
 const int surf_index = surf_offset - &brw->wm.base.surf_offset[0];
 
 brw_emit_surface_state(brw, mt, &view,
-   surface_state_infos[brw->gen].rb_mocs, 
false,
+   surface_state_infos[brw->gen].tex_mocs, 
false,
surf_offset, surf_index,
I915_GEM_DOMAIN_SAMPLER,
access == GL_READ_ONLY ? 0 :
-- 
2.9.0

___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev


Re: [Mesa-dev] [PATCH 8/8] anv: Properly call gen75_emit_state_base_address on Haswell.

2016-07-18 Thread Jason Ekstrand
Also, feel free to cc the while lot to stable since it really is a gen7
fix.  I guess you could leave 6-7 off stable if you want but Meh.

On Jul 18, 2016 6:33 PM, "Jason Ekstrand"  wrote:

> Series is
>
> Reviewed-by: Jason Ekstrand 
>
> Thanks for cleaning this up. The more we can share code the better I say.
>
> On Jul 18, 2016 6:06 PM, "Kenneth Graunke"  wrote:
>
>> This should fix MOCS values.  Caught by Coverity.
>>
>> CID: 1364155
>>
>> Signed-off-by: Kenneth Graunke 
>> ---
>>  src/intel/vulkan/anv_cmd_buffer.c | 2 +-
>>  1 file changed, 1 insertion(+), 1 deletion(-)
>>
>> diff --git a/src/intel/vulkan/anv_cmd_buffer.c
>> b/src/intel/vulkan/anv_cmd_buffer.c
>> index 6256df8..380260a 100644
>> --- a/src/intel/vulkan/anv_cmd_buffer.c
>> +++ b/src/intel/vulkan/anv_cmd_buffer.c
>> @@ -359,7 +359,7 @@ anv_cmd_buffer_emit_state_base_address(struct
>> anv_cmd_buffer *cmd_buffer)
>> switch (cmd_buffer->device->info.gen) {
>> case 7:
>>if (cmd_buffer->device->info.is_haswell)
>> - return gen7_cmd_buffer_emit_state_base_address(cmd_buffer);
>> + return gen75_cmd_buffer_emit_state_base_address(cmd_buffer);
>>else
>>   return gen7_cmd_buffer_emit_state_base_address(cmd_buffer);
>> case 8:
>> --
>> 2.9.0
>>
>> ___
>> mesa-dev mailing list
>> mesa-dev@lists.freedesktop.org
>> https://lists.freedesktop.org/mailman/listinfo/mesa-dev
>>
>
___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev


Re: [Mesa-dev] [PATCH 8/8] anv: Properly call gen75_emit_state_base_address on Haswell.

2016-07-18 Thread Jason Ekstrand
Series is

Reviewed-by: Jason Ekstrand 

Thanks for cleaning this up. The more we can share code the better I say.

On Jul 18, 2016 6:06 PM, "Kenneth Graunke"  wrote:

> This should fix MOCS values.  Caught by Coverity.
>
> CID: 1364155
>
> Signed-off-by: Kenneth Graunke 
> ---
>  src/intel/vulkan/anv_cmd_buffer.c | 2 +-
>  1 file changed, 1 insertion(+), 1 deletion(-)
>
> diff --git a/src/intel/vulkan/anv_cmd_buffer.c
> b/src/intel/vulkan/anv_cmd_buffer.c
> index 6256df8..380260a 100644
> --- a/src/intel/vulkan/anv_cmd_buffer.c
> +++ b/src/intel/vulkan/anv_cmd_buffer.c
> @@ -359,7 +359,7 @@ anv_cmd_buffer_emit_state_base_address(struct
> anv_cmd_buffer *cmd_buffer)
> switch (cmd_buffer->device->info.gen) {
> case 7:
>if (cmd_buffer->device->info.is_haswell)
> - return gen7_cmd_buffer_emit_state_base_address(cmd_buffer);
> + return gen75_cmd_buffer_emit_state_base_address(cmd_buffer);
>else
>   return gen7_cmd_buffer_emit_state_base_address(cmd_buffer);
> case 8:
> --
> 2.9.0
>
> ___
> mesa-dev mailing list
> mesa-dev@lists.freedesktop.org
> https://lists.freedesktop.org/mailman/listinfo/mesa-dev
>
___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev


Re: [Mesa-dev] Required Mako version? (WAS: mesa from git fails to compile)

2016-07-18 Thread Pali Rohár
Any conclusion or fix for this issue?

On Saturday 16 July 2016 02:52:50 Jason Ekstrand wrote:
> Adding Dylan
> 
> On Jul 14, 2016 10:24 PM, "Samuel Iglesias Gonsálvez"
> 
> 
> wrote:
> > On 14/07/16 18:34, Eric Engestrom wrote:
> > > On Thu, Jul 14, 2016 at 04:01:13PM +0100, Eric Engestrom wrote:
> > >> Oh right, there's already check for the Mako version, but the
> > >> minimum is currently set to 0.3.4 (configure.ac:92).
> > >> 
> > >> Emil, you were the one to mention 0.8.0; is that the actual
> > >> minimum, or just a known working version?
> > > 
> > > OK, so I did a bit of digging, and the version check was
> > > introduced by Samuel Iglesias Gonsalvez a couple years ago
> > > (2b37bea0) at 0.7.3, and he later lowered it to 0.3.4
> > > (6d43a4c3), but I can't find any discussion regarding this
> > > change: it seems there was none on the mailing list [0].
> > > 
> > > Adding Samuel so he can enlighten us :)
> > > 
> > > [0]
> > 
> > https://lists.freedesktop.org/archives/mesa-dev/2015-January/074366
> > .html
> > 
> > 
> > There was a discussion in the mailing list. Just after I pushed
> > this patch to master [0] setting it to 0.7.3 (because that was the
> > version I had back then), Dave Airlie mentioned that RHEL6 only
> > ships mako 0.3.4 [1] and asked if we really need a later version
> > or not. We did some tests [2][3] and finally this patch [4] was
> > pushed upstream.
> > 
> > I don't know if we need some feature from mako 0.8.0 to generate
> > isl_format_layout because this file was added later than my change,
> > probably Emil knows it.
> > 
> > Sam
> > 
> > [0]
> > https://lists.freedesktop.org/archives/mesa-dev/2015-January/074000
> > .html [1]
> > https://lists.freedesktop.org/archives/mesa-dev/2015-January/074283
> > .html [2]
> > https://lists.freedesktop.org/archives/mesa-dev/2015-January/074287
> > .html [3]
> > https://lists.freedesktop.org/archives/mesa-dev/2015-January/074332
> > .html [4]
> > https://lists.freedesktop.org/archives/mesa-dev/2015-January/074366
> > .html
> > 
> > 
> > ___
> > mesa-dev mailing list
> > mesa-dev@lists.freedesktop.org
> > https://lists.freedesktop.org/mailman/listinfo/mesa-dev

-- 
Pali Rohár
pali.ro...@gmail.com


signature.asc
Description: This is a digitally signed message part.
___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev


[Mesa-dev] Switch OpenMAX state tracker in Mesa/Gallium to use Tizonia

2016-07-18 Thread Sunny Bhadani
I am interested in this project idea.I want some help regarding it.
___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev


[Mesa-dev] [PATCH 8/8] anv: Properly call gen75_emit_state_base_address on Haswell.

2016-07-18 Thread Kenneth Graunke
This should fix MOCS values.  Caught by Coverity.

CID: 1364155

Signed-off-by: Kenneth Graunke 
---
 src/intel/vulkan/anv_cmd_buffer.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/src/intel/vulkan/anv_cmd_buffer.c 
b/src/intel/vulkan/anv_cmd_buffer.c
index 6256df8..380260a 100644
--- a/src/intel/vulkan/anv_cmd_buffer.c
+++ b/src/intel/vulkan/anv_cmd_buffer.c
@@ -359,7 +359,7 @@ anv_cmd_buffer_emit_state_base_address(struct 
anv_cmd_buffer *cmd_buffer)
switch (cmd_buffer->device->info.gen) {
case 7:
   if (cmd_buffer->device->info.is_haswell)
- return gen7_cmd_buffer_emit_state_base_address(cmd_buffer);
+ return gen75_cmd_buffer_emit_state_base_address(cmd_buffer);
   else
  return gen7_cmd_buffer_emit_state_base_address(cmd_buffer);
case 8:
-- 
2.9.0

___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev


[Mesa-dev] [PATCH 5/8] anv: Unify 3DSTATE_CLIP code across generations.

2016-07-18 Thread Kenneth Graunke
The bulk of this is the same.  There are just a couple fields that only
exist on one generation or another, and we can easily handle those with
an #ifdef.

Signed-off-by: Kenneth Graunke 
---
 src/intel/vulkan/gen7_pipeline.c  | 23 ++
 src/intel/vulkan/gen8_pipeline.c  | 25 +++-
 src/intel/vulkan/genX_pipeline_util.h | 36 +++
 3 files changed, 41 insertions(+), 43 deletions(-)

diff --git a/src/intel/vulkan/gen7_pipeline.c b/src/intel/vulkan/gen7_pipeline.c
index a9f5e0b..8ce50be 100644
--- a/src/intel/vulkan/gen7_pipeline.c
+++ b/src/intel/vulkan/gen7_pipeline.c
@@ -117,27 +117,8 @@ genX(graphics_pipeline_create)(
 
emit_urb_setup(pipeline);
 
-   const VkPipelineRasterizationStateCreateInfo *rs_info =
-  pCreateInfo->pRasterizationState;
-
-   anv_batch_emit(&pipeline->batch, GENX(3DSTATE_CLIP), clip) {
-  clip.FrontWinding = vk_to_gen_front_face[rs_info->frontFace],
-  clip.EarlyCullEnable  = true,
-  clip.CullMode = vk_to_gen_cullmode[rs_info->cullMode],
-  clip.ClipEnable   = !(extra && extra->use_rectlist),
-  clip.APIMode  = APIMODE_D3D,
-  clip.ViewportXYClipTestEnable = true,
-  clip.ViewportZClipTestEnable  = !pipeline->depth_clamp_enable,
-  clip.ClipMode = CLIPMODE_NORMAL,
-
-  clip.TriangleStripListProvokingVertexSelect   = 0,
-  clip.LineStripListProvokingVertexSelect   = 0,
-  clip.TriangleFanProvokingVertexSelect = 1,
-
-  clip.MinimumPointWidth= 0.125,
-  clip.MaximumPointWidth= 255.875,
-  clip.MaximumVPIndex = pCreateInfo->pViewportState->viewportCount - 1;
-   }
+   emit_3dstate_clip(pipeline, pCreateInfo->pViewportState,
+ pCreateInfo->pRasterizationState, extra);
 
if (pCreateInfo->pMultisampleState &&
pCreateInfo->pMultisampleState->rasterizationSamples > 1)
diff --git a/src/intel/vulkan/gen8_pipeline.c b/src/intel/vulkan/gen8_pipeline.c
index 52792a9..cc10d3a 100644
--- a/src/intel/vulkan/gen8_pipeline.c
+++ b/src/intel/vulkan/gen8_pipeline.c
@@ -186,29 +186,10 @@ genX(graphics_pipeline_create)(
 
emit_urb_setup(pipeline);
 
-   const struct brw_wm_prog_data *wm_prog_data = get_wm_prog_data(pipeline);
-   anv_batch_emit(&pipeline->batch, GENX(3DSTATE_CLIP), clip) {
-  clip.ClipEnable   = !(extra && extra->use_rectlist);
-  clip.EarlyCullEnable  = true;
-  clip.APIMode  = APIMODE_D3D;
-  clip.ViewportXYClipTestEnable = true;
-
-  clip.ClipMode =
- pCreateInfo->pRasterizationState->rasterizerDiscardEnable ?
- CLIPMODE_REJECT_ALL : CLIPMODE_NORMAL;
-
-  clip.NonPerspectiveBarycentricEnable = wm_prog_data ?
- (wm_prog_data->barycentric_interp_modes & 0x38) != 0 : 0;
-
-  clip.TriangleStripListProvokingVertexSelect  = 0;
-  clip.LineStripListProvokingVertexSelect  = 0;
-  clip.TriangleFanProvokingVertexSelect= 1;
-
-  clip.MinimumPointWidth  = 0.125;
-  clip.MaximumPointWidth  = 255.875;
-  clip.MaximumVPIndex = pCreateInfo->pViewportState->viewportCount - 1;
-   }
+   emit_3dstate_clip(pipeline, pCreateInfo->pViewportState,
+ pCreateInfo->pRasterizationState, extra);
 
+   const struct brw_wm_prog_data *wm_prog_data = get_wm_prog_data(pipeline);
anv_batch_emit(&pipeline->batch, GENX(3DSTATE_WM), wm) {
   wm.StatisticsEnable= true;
   wm.LineEndCapAntialiasingRegionWidth   = _05pixels;
diff --git a/src/intel/vulkan/genX_pipeline_util.h 
b/src/intel/vulkan/genX_pipeline_util.h
index 4385112..52263df 100644
--- a/src/intel/vulkan/genX_pipeline_util.h
+++ b/src/intel/vulkan/genX_pipeline_util.h
@@ -646,3 +646,39 @@ emit_cb_state(struct anv_pipeline *pipeline,
 #endif
}
 }
+
+static void
+emit_3dstate_clip(struct anv_pipeline *pipeline,
+  const VkPipelineViewportStateCreateInfo *vp_info,
+  const VkPipelineRasterizationStateCreateInfo *rs_info,
+  const struct anv_graphics_pipeline_create_info *extra)
+{
+   const struct brw_wm_prog_data *wm_prog_data = get_wm_prog_data(pipeline);
+   (void) wm_prog_data;
+   anv_batch_emit(&pipeline->batch, GENX(3DSTATE_CLIP), clip) {
+  clip.ClipEnable   = !(extra && extra->use_rectlist);
+  clip.EarlyCullEnable  = true;
+  clip.APIMode  = APIMODE_D3D,
+  clip.ViewportXYClipTestEnable = true;
+
+  clip.ClipMode = rs_info->rasterizerDiscardEnable ?
+ CLIPMODE_REJECT_ALL : CLIPMODE_NORMAL;
+
+  clip.TriangleStripListProvokingVertexSelect = 0;
+  clip.LineStripListProvokingVertexSelect = 0;
+  clip.TriangleFanProvokingVertexSelect   = 1;
+
+  clip.MinimumPointWidth = 0.125;
+  clip.MaximumPointWidth = 255.875;
+  clip.MaximumVPIndex= vp_info->viewportCou

[Mesa-dev] [PATCH 2/8] genxml: Add APIMODE_D3D missing enum values and improve consistency.

2016-07-18 Thread Kenneth Graunke
Signed-off-by: Kenneth Graunke 
---
 src/intel/genxml/gen6.xml| 1 +
 src/intel/genxml/gen7.xml| 1 +
 src/intel/genxml/gen75.xml   | 1 +
 src/intel/genxml/gen8.xml| 3 ++-
 src/intel/genxml/gen9.xml| 3 ++-
 src/intel/vulkan/gen8_pipeline.c | 2 +-
 6 files changed, 8 insertions(+), 3 deletions(-)

diff --git a/src/intel/genxml/gen6.xml b/src/intel/genxml/gen6.xml
index c465288..cf94efc 100644
--- a/src/intel/genxml/gen6.xml
+++ b/src/intel/genxml/gen6.xml
@@ -787,6 +787,7 @@
 
 
   
+  
 
 
 
diff --git a/src/intel/genxml/gen7.xml b/src/intel/genxml/gen7.xml
index b1c324f..1084093 100644
--- a/src/intel/genxml/gen7.xml
+++ b/src/intel/genxml/gen7.xml
@@ -959,6 +959,7 @@
 
 
   
+  
 
 
 
diff --git a/src/intel/genxml/gen75.xml b/src/intel/genxml/gen75.xml
index 1239164..b7bf13a 100644
--- a/src/intel/genxml/gen75.xml
+++ b/src/intel/genxml/gen75.xml
@@ -1068,6 +1068,7 @@
 
 
   
+  
 
 
 
diff --git a/src/intel/genxml/gen8.xml b/src/intel/genxml/gen8.xml
index 386e8fc..dfeda94 100644
--- a/src/intel/genxml/gen8.xml
+++ b/src/intel/genxml/gen8.xml
@@ -1115,7 +1115,8 @@
 
 
 
-  
+  
+  
 
 
 
diff --git a/src/intel/genxml/gen9.xml b/src/intel/genxml/gen9.xml
index 896143b..06a3cd4 100644
--- a/src/intel/genxml/gen9.xml
+++ b/src/intel/genxml/gen9.xml
@@ -1167,7 +1167,8 @@
 
 
 
-  
+  
+  
 
 
 
diff --git a/src/intel/vulkan/gen8_pipeline.c b/src/intel/vulkan/gen8_pipeline.c
index 4e04aad..52792a9 100644
--- a/src/intel/vulkan/gen8_pipeline.c
+++ b/src/intel/vulkan/gen8_pipeline.c
@@ -190,7 +190,7 @@ genX(graphics_pipeline_create)(
anv_batch_emit(&pipeline->batch, GENX(3DSTATE_CLIP), clip) {
   clip.ClipEnable   = !(extra && extra->use_rectlist);
   clip.EarlyCullEnable  = true;
-  clip.APIMode  = 1; /* D3D */
+  clip.APIMode  = APIMODE_D3D;
   clip.ViewportXYClipTestEnable = true;
 
   clip.ClipMode =
-- 
2.9.0

___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev


[Mesa-dev] [PATCH 7/8] anv: Perform rasterizer discard in the SOL stage instead of the clipper.

2016-07-18 Thread Kenneth Graunke
See commit b0629e6894513a2c49a018bc3342a4e55435a236, where we discovered
that the SOL stage's "Rendering Disable" feature is a lot faster at
throwing away all geometry than the clipper's "reject all" mode.

Signed-off-by: Kenneth Graunke 
---
 src/intel/vulkan/gen7_pipeline.c  |  1 +
 src/intel/vulkan/gen8_pipeline.c  |  1 +
 src/intel/vulkan/genX_pipeline_util.h | 12 ++--
 src/intel/vulkan/genX_state.c |  1 -
 4 files changed, 12 insertions(+), 3 deletions(-)

diff --git a/src/intel/vulkan/gen7_pipeline.c b/src/intel/vulkan/gen7_pipeline.c
index 8ce50be..6acdd85 100644
--- a/src/intel/vulkan/gen7_pipeline.c
+++ b/src/intel/vulkan/gen7_pipeline.c
@@ -119,6 +119,7 @@ genX(graphics_pipeline_create)(
 
emit_3dstate_clip(pipeline, pCreateInfo->pViewportState,
  pCreateInfo->pRasterizationState, extra);
+   emit_3dstate_streamout(pipeline, pCreateInfo->pRasterizationState);
 
if (pCreateInfo->pMultisampleState &&
pCreateInfo->pMultisampleState->rasterizationSamples > 1)
diff --git a/src/intel/vulkan/gen8_pipeline.c b/src/intel/vulkan/gen8_pipeline.c
index cc10d3a..0010955 100644
--- a/src/intel/vulkan/gen8_pipeline.c
+++ b/src/intel/vulkan/gen8_pipeline.c
@@ -188,6 +188,7 @@ genX(graphics_pipeline_create)(
 
emit_3dstate_clip(pipeline, pCreateInfo->pViewportState,
  pCreateInfo->pRasterizationState, extra);
+   emit_3dstate_streamout(pipeline, pCreateInfo->pRasterizationState);
 
const struct brw_wm_prog_data *wm_prog_data = get_wm_prog_data(pipeline);
anv_batch_emit(&pipeline->batch, GENX(3DSTATE_WM), wm) {
diff --git a/src/intel/vulkan/genX_pipeline_util.h 
b/src/intel/vulkan/genX_pipeline_util.h
index 52263df..3a545a0 100644
--- a/src/intel/vulkan/genX_pipeline_util.h
+++ b/src/intel/vulkan/genX_pipeline_util.h
@@ -661,8 +661,7 @@ emit_3dstate_clip(struct anv_pipeline *pipeline,
   clip.APIMode  = APIMODE_D3D,
   clip.ViewportXYClipTestEnable = true;
 
-  clip.ClipMode = rs_info->rasterizerDiscardEnable ?
- CLIPMODE_REJECT_ALL : CLIPMODE_NORMAL;
+  clip.ClipMode = CLIPMODE_NORMAL;
 
   clip.TriangleStripListProvokingVertexSelect = 0;
   clip.LineStripListProvokingVertexSelect = 0;
@@ -682,3 +681,12 @@ emit_3dstate_clip(struct anv_pipeline *pipeline,
 #endif
}
 }
+
+static void
+emit_3dstate_streamout(struct anv_pipeline *pipeline,
+   const VkPipelineRasterizationStateCreateInfo *rs_info)
+{
+   anv_batch_emit(&pipeline->batch, GENX(3DSTATE_STREAMOUT), so) {
+  so.RenderingDisable = rs_info->rasterizerDiscardEnable;
+   }
+}
diff --git a/src/intel/vulkan/genX_state.c b/src/intel/vulkan/genX_state.c
index f67b0a7..8253a8b 100644
--- a/src/intel/vulkan/genX_state.c
+++ b/src/intel/vulkan/genX_state.c
@@ -58,7 +58,6 @@ genX(init_device_state)(struct anv_device *device)
anv_batch_emit(&batch, GENX(3DSTATE_TE), ts);
anv_batch_emit(&batch, GENX(3DSTATE_DS), ds);
 
-   anv_batch_emit(&batch, GENX(3DSTATE_STREAMOUT), so);
anv_batch_emit(&batch, GENX(3DSTATE_AA_LINE_PARAMETERS), aa);
 
anv_batch_emit(&batch, GENX(3DSTATE_DRAWING_RECTANGLE), rect) {
-- 
2.9.0

___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev


[Mesa-dev] [PATCH 1/8] genxml: Add CLIPMODE_* prefix to 3DSTATE_CLIP's "Clip Mode" enum values.

2016-07-18 Thread Kenneth Graunke
Gen6-7.5 use CLIPMODE_REJECT_ALL, while Gen8+ just used REJECT_ALL.
Being consistent will let me unify code, and I prefer having the prefix.

Signed-off-by: Kenneth Graunke 
---
 src/intel/genxml/gen8.xml| 6 +++---
 src/intel/genxml/gen9.xml| 6 +++---
 src/intel/vulkan/gen8_pipeline.c | 2 +-
 3 files changed, 7 insertions(+), 7 deletions(-)

diff --git a/src/intel/genxml/gen8.xml b/src/intel/genxml/gen8.xml
index 97af191..386e8fc 100644
--- a/src/intel/genxml/gen8.xml
+++ b/src/intel/genxml/gen8.xml
@@ -1121,9 +1121,9 @@
 
 
 
-  
-  
-  
+  
+  
+  
 
 
 
diff --git a/src/intel/genxml/gen9.xml b/src/intel/genxml/gen9.xml
index 5e3e2e1..896143b 100644
--- a/src/intel/genxml/gen9.xml
+++ b/src/intel/genxml/gen9.xml
@@ -1173,9 +1173,9 @@
 
 
 
-  
-  
-  
+  
+  
+  
 
 
 
diff --git a/src/intel/vulkan/gen8_pipeline.c b/src/intel/vulkan/gen8_pipeline.c
index 4908bbd..4e04aad 100644
--- a/src/intel/vulkan/gen8_pipeline.c
+++ b/src/intel/vulkan/gen8_pipeline.c
@@ -195,7 +195,7 @@ genX(graphics_pipeline_create)(
 
   clip.ClipMode =
  pCreateInfo->pRasterizationState->rasterizerDiscardEnable ?
- REJECT_ALL : NORMAL;
+ CLIPMODE_REJECT_ALL : CLIPMODE_NORMAL;
 
   clip.NonPerspectiveBarycentricEnable = wm_prog_data ?
  (wm_prog_data->barycentric_interp_modes & 0x38) != 0 : 0;
-- 
2.9.0

___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev


[Mesa-dev] [PATCH 4/8] anv: Enable early culling on Gen7.

2016-07-18 Thread Kenneth Graunke
We set the cull mode, but forgot the enable bit.  Gen8 uses this.

Signed-off-by: Kenneth Graunke 
---
 src/intel/vulkan/gen7_pipeline.c | 1 +
 1 file changed, 1 insertion(+)

diff --git a/src/intel/vulkan/gen7_pipeline.c b/src/intel/vulkan/gen7_pipeline.c
index a50d9c7..a9f5e0b 100644
--- a/src/intel/vulkan/gen7_pipeline.c
+++ b/src/intel/vulkan/gen7_pipeline.c
@@ -122,6 +122,7 @@ genX(graphics_pipeline_create)(
 
anv_batch_emit(&pipeline->batch, GENX(3DSTATE_CLIP), clip) {
   clip.FrontWinding = vk_to_gen_front_face[rs_info->frontFace],
+  clip.EarlyCullEnable  = true,
   clip.CullMode = vk_to_gen_cullmode[rs_info->cullMode],
   clip.ClipEnable   = !(extra && extra->use_rectlist),
   clip.APIMode  = APIMODE_D3D,
-- 
2.9.0

___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev


[Mesa-dev] [PATCH 3/8] anv: Fix near plane clipping on Gen7/7.5.

2016-07-18 Thread Kenneth Graunke
The Gen7/7.5 clip code used APIMODE_OGL, while the Gen8+ clip code used
APIMODE_D3D.  The meaning hasn't changed, so one of these must be wrong.

It appears that the hardware documentation is completely wrong.  It
claims that the "API Mode" bit means:

   0hAPIMODE_OGLNEAR_VP boundary == 0.0 (NDC)
   1hAPIMODE_D3DNEAR_VP boundary == -1.0 (NDC)

However, DirectX typically uses 0.0 for the near plane, while unextended
OpenGL uses -1.0.  i965's gen6_clip_state.c uses APIMODE_D3D for the
GL_ZERO_TO_ONE case, so I believe the meanings are backwards from what
the documentation says.

Section 23.2 ("Primitive Clipping") of the Vulkan 1.0.21 specification
contains the following equations:

   -w_c <= x_c <= w_c
   -w_c <= y_c <= w_c
  0 <= z_c <= w_c

This means that Vulkan follows D3D semantics.

Signed-off-by: Kenneth Graunke 
---
 src/intel/vulkan/gen7_pipeline.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/src/intel/vulkan/gen7_pipeline.c b/src/intel/vulkan/gen7_pipeline.c
index 01c04f3..a50d9c7 100644
--- a/src/intel/vulkan/gen7_pipeline.c
+++ b/src/intel/vulkan/gen7_pipeline.c
@@ -124,7 +124,7 @@ genX(graphics_pipeline_create)(
   clip.FrontWinding = vk_to_gen_front_face[rs_info->frontFace],
   clip.CullMode = vk_to_gen_cullmode[rs_info->cullMode],
   clip.ClipEnable   = !(extra && extra->use_rectlist),
-  clip.APIMode  = APIMODE_OGL,
+  clip.APIMode  = APIMODE_D3D,
   clip.ViewportXYClipTestEnable = true,
   clip.ViewportZClipTestEnable  = !pipeline->depth_clamp_enable,
   clip.ClipMode = CLIPMODE_NORMAL,
-- 
2.9.0

___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev


[Mesa-dev] [PATCH 6/8] genxml: Rename "API Rendering Disable" to "Rendering Disable".

2016-07-18 Thread Kenneth Graunke
Gen7/7.5 call it "Rendering Disable" while Gen8/9 prefix it with "API".

Pick one for consistency, and so we can share code between generations.

Signed-off-by: Kenneth Graunke 
---
 src/intel/genxml/gen8.xml | 2 +-
 src/intel/genxml/gen9.xml | 2 +-
 2 files changed, 2 insertions(+), 2 deletions(-)

diff --git a/src/intel/genxml/gen8.xml b/src/intel/genxml/gen8.xml
index dfeda94..8145ddd 100644
--- a/src/intel/genxml/gen8.xml
+++ b/src/intel/genxml/gen8.xml
@@ -2036,7 +2036,7 @@
 
 
 
-
+
 
 
   
diff --git a/src/intel/genxml/gen9.xml b/src/intel/genxml/gen9.xml
index 06a3cd4..1838d2c 100644
--- a/src/intel/genxml/gen9.xml
+++ b/src/intel/genxml/gen9.xml
@@ -2239,7 +2239,7 @@
 
 
 
-
+
 
 
   
-- 
2.9.0

___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev


Re: [Mesa-dev] [PATCH 2/2] i965: Fix shared atomic intrinsics to pay attention to base.

2016-07-18 Thread Timothy Arceri
On Mon, 2016-07-18 at 15:49 -0700, Kenneth Graunke wrote:

So this fixes a bug with indirects right? Is there a piglit test for
this?

With the typo Ilia pointed out fixed, both are:

Reviewed-by: Timothy Arceri 


> Signed-off-by: Kenneth Graunke 
> ---
>  src/mesa/drivers/dri/i965/brw_fs_nir.cpp | 15 +--
>  1 file changed, 13 insertions(+), 2 deletions(-)
> 
> diff --git a/src/mesa/drivers/dri/i965/brw_fs_nir.cpp
> b/src/mesa/drivers/dri/i965/brw_fs_nir.cpp
> index 6265dc6..a39c37e 100644
> --- a/src/mesa/drivers/dri/i965/brw_fs_nir.cpp
> +++ b/src/mesa/drivers/dri/i965/brw_fs_nir.cpp
> @@ -4177,13 +4177,24 @@ fs_visitor::nir_emit_shared_atomic(const
> fs_builder &bld,
>    dest = get_nir_dest(instr->dest);
>  
> fs_reg surface = brw_imm_ud(GEN7_BTI_SLM);
> -   fs_reg offset = get_nir_src(instr->src[0]);
> +   fs_reg offset;
> fs_reg data1 = get_nir_src(instr->src[1]);
> fs_reg data2;
> if (op == BRW_AOP_CMPWR)
>    data2 = get_nir_src(instr->src[2]);
>  
> -   /* Emit the actual atomic operation operation */
> +   /* Get the offset */
> +   nir_const_value *const_offset = nir_src_as_const_value(instr-
> >src[0]);
> +   if (const_offset) {
> +  offset = brw_imm_ud(instr->const_index[0] + const_offset-
> >u32[0]);
> +   } else {
> +  offset = vgrf(glsl_type::uint_type);
> +  bld.ADD(offset,
> +   retype(get_nir_src(instr->src[0]),
> BRW_REGISTER_TYPE_UD),
> +   brw_imm_ud(instr->const_index[0]));
> +   }
> +
> +   /* Emit the actua atomic operation operation */
>  
> fs_reg atomic_result = emit_untyped_atomic(bld, surface, offset,
>    data1, data2,
___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev


Re: [Mesa-dev] [PATCH 2/2] i965: Fix shared atomic intrinsics to pay attention to base.

2016-07-18 Thread Timothy Arceri
On Mon, 2016-07-18 at 15:49 -0700, Kenneth Graunke wrote:

So this fixes a bug with indirects right? Is there a piglit test for
this?

With the typo Ilia pointed out fixed.

Reviewed-by: Timothy Arceri 


> Signed-off-by: Kenneth Graunke 
> ---
>  src/mesa/drivers/dri/i965/brw_fs_nir.cpp | 15 +--
>  1 file changed, 13 insertions(+), 2 deletions(-)
> 
> diff --git a/src/mesa/drivers/dri/i965/brw_fs_nir.cpp
> b/src/mesa/drivers/dri/i965/brw_fs_nir.cpp
> index 6265dc6..a39c37e 100644
> --- a/src/mesa/drivers/dri/i965/brw_fs_nir.cpp
> +++ b/src/mesa/drivers/dri/i965/brw_fs_nir.cpp
> @@ -4177,13 +4177,24 @@ fs_visitor::nir_emit_shared_atomic(const
> fs_builder &bld,
>    dest = get_nir_dest(instr->dest);
>  
> fs_reg surface = brw_imm_ud(GEN7_BTI_SLM);
> -   fs_reg offset = get_nir_src(instr->src[0]);
> +   fs_reg offset;
> fs_reg data1 = get_nir_src(instr->src[1]);
> fs_reg data2;
> if (op == BRW_AOP_CMPWR)
>    data2 = get_nir_src(instr->src[2]);
>  
> -   /* Emit the actual atomic operation operation */
> +   /* Get the offset */
> +   nir_const_value *const_offset = nir_src_as_const_value(instr-
> >src[0]);
> +   if (const_offset) {
> +  offset = brw_imm_ud(instr->const_index[0] + const_offset-
> >u32[0]);
> +   } else {
> +  offset = vgrf(glsl_type::uint_type);
> +  bld.ADD(offset,
> +   retype(get_nir_src(instr->src[0]),
> BRW_REGISTER_TYPE_UD),
> +   brw_imm_ud(instr->const_index[0]));
> +   }
> +
> +   /* Emit the actua atomic operation operation */
>  
> fs_reg atomic_result = emit_untyped_atomic(bld, surface, offset,
>    data1, data2,
___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev


Re: [Mesa-dev] [PATCH 06/11] vl/util: add copy func for yv12image to nv12surface

2016-07-18 Thread Andy Furniss

Zhang, Boyuan wrote:

Hi Andy,

I just submitted another patch set, most of the issues you reported are solved, 
please see the information below:

- Giving different frame rate should result different output size. The final 
result from my side is very close to the CBR I set. Please give a try with 
different frame rate and bit rate.

- Picture corruption (half height pic) is caused by interlaced setting. 
Interlace encoding is not supported. However, for transcoding case, VAAPI 
decode will use interlace mode, which will cause this issue. The temp solution 
is to use an Environmental Variable to disable interlace when doing 
transcoding. Please try the following command with the new patch:
DISABLE_INTERLACE=true gst-launch-1.0 filesrc 
location=~/big_buck_bunny_720p_1mb.mp4 ! qtdemux ! h264parse ! vaapidecode ! 
vaapih264enc ! filesink location=out.264

- I420 yuv -> nv12 case seems working fine on my side, can you please provide 
the testing raw file and command you were using? I want to reproduce the issue 
from my side and try to fix it if possible. Thanks a lot!


Will try new patches tomorrow.

Here's a few frames of I420 in mkv at 1 fps, should play directly OK 
with mplayer/mpv - building is yellow.


https://drive.google.com/file/d/0BxP5-S1t9VEEc3RhNzBQclhlNWc/view?usp=sharing

Of course google will make a preview but you should be able to download 
the raw file by moving mouse towards the top of the screen.


Do

gst-launch-1.0 -f filesrc location=I420-5f.mkv ! matroskademux ! 
vaapih264enc ! h264parse ! mp4mux ! filesink location=out-I420-1.mp4


and the result is blue building, force gstreamer to convert to nv12 
result is OK.


gst-launch-1.0 -f filesrc location=~/I420-5f.mkv ! matroskademux ! 
videoconvert ! video/x-raw,format=NV12 ! vaapih264enc ! h264parse ! 
mp4mux ! filesink location=out-I420-2.mp4







___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev


Re: [Mesa-dev] [RFC] gallium/u_queue: add barrier function

2016-07-18 Thread Rob Clark
possibly.. although sprinkling queue_barrier() calls (which is at
least useful for debugging, although I think I won't use it in the end
after debugging) hasn't found the issue yet.  I did at least find an
issue w/ fence handling (I was grabbing the fence # potentially before
the batch was flushed), but that also doesn't seem to be the issue I
am seeing.

The idea of having a ring of N fences (where N is given by
max_jobs-1), rather than embedding the fence in the refcnt'd batch, is
interesting, and sounds like it might solve some problems.  I may end
up doing that..

BR,
-R

On Mon, Jul 18, 2016 at 7:34 PM, Marek Olšák  wrote:
> I think your issue is that you have self-releasing jobs with the
> cleanup callback and you automatically lose fences that way, so there
> is no way to wait for completion.
>
> Since you have only 1 thread with N jobs at most, I suggest you keep
> N+1 fences around (a ring of fences) that you reuse for new jobs and
> keep a pointer to the most-recently-used fence. That way you know
> which fence you need to wait on to make the whole queue idle.
>
> Marek
>
> On Mon, Jul 18, 2016 at 10:25 PM, Rob Clark  wrote:
>> Helper to block until all previous jobs are complete.
>> ---
>> So I think this might end up being useful to me in some cases.. but
>> the implementation only works for a single threaded queue (which is
>> all I need).  I could also just put a helper in my driver code.
>>
>> Opinions?
>>
>>  src/gallium/auxiliary/util/u_queue.c | 12 
>>  src/gallium/auxiliary/util/u_queue.h |  2 ++
>>  2 files changed, 14 insertions(+)
>>
>> diff --git a/src/gallium/auxiliary/util/u_queue.c 
>> b/src/gallium/auxiliary/util/u_queue.c
>> index 838464f..861faca 100644
>> --- a/src/gallium/auxiliary/util/u_queue.c
>> +++ b/src/gallium/auxiliary/util/u_queue.c
>> @@ -242,3 +242,15 @@ util_queue_add_job(struct util_queue *queue,
>> pipe_condvar_signal(queue->has_queued_cond);
>> pipe_mutex_unlock(queue->lock);
>>  }
>> +
>> +static void dummy_execute(void *job, int thread_index) {}
>> +
>> +/* blocks until all previously queued jobs complete: */
>> +void util_queue_barrier(struct util_queue *queue)
>> +{
>> +   struct util_queue_fence fence;
>> +   util_queue_fence_init(&fence);
>> +   util_queue_add_job(queue, &fence /*dummy*/, &fence, dummy_execute, NULL);
>> +   util_queue_job_wait(&fence);
>> +   util_queue_fence_destroy(&fence);
>> +}
>> diff --git a/src/gallium/auxiliary/util/u_queue.h 
>> b/src/gallium/auxiliary/util/u_queue.h
>> index 59646cc..8a22ee0 100644
>> --- a/src/gallium/auxiliary/util/u_queue.h
>> +++ b/src/gallium/auxiliary/util/u_queue.h
>> @@ -85,6 +85,8 @@ void util_queue_add_job(struct util_queue *queue,
>>
>>  void util_queue_job_wait(struct util_queue_fence *fence);
>>
>> +void util_queue_barrier(struct util_queue *queue);
>> +
>>  /* util_queue needs to be cleared to zeroes for this to work */
>>  static inline bool
>>  util_queue_is_initialized(struct util_queue *queue)
>> --
>> 2.7.4
>>
___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev


Re: [Mesa-dev] [PATCH] i965: Skip update_texture_surface when the plane doesn't exist

2016-07-18 Thread Jason Ekstrand
On Mon, Jul 18, 2016 at 9:50 AM, Jordan Justen 
wrote:

> On 2016-07-18 09:14:48, Jason Ekstrand wrote:
> > Thanks to rebase fail, recent surface state changes effectively reverted
>
> This happened in 09b5a71517fadd6c20b72e7ad9ea1f7539c93a42, right?
> Should we mention that commit?
>

Done


> Reviewed-by: Jordan Justen 
>

Thanks!


>
> > 727a9b24933 and 367cf3a2e3e which was unintentional.  This should bring
> it
> > back.
> >
> > Signed-off-by: Jason Ekstrand 
> > Cc: Jordan Justen 
> > ---
> >  src/mesa/drivers/dri/i965/brw_wm_surface_state.c | 18 ++
> >  1 file changed, 10 insertions(+), 8 deletions(-)
> >
> > diff --git a/src/mesa/drivers/dri/i965/brw_wm_surface_state.c
> b/src/mesa/drivers/dri/i965/brw_wm_surface_state.c
> > index c1a7579..d896789 100644
> > --- a/src/mesa/drivers/dri/i965/brw_wm_surface_state.c
> > +++ b/src/mesa/drivers/dri/i965/brw_wm_surface_state.c
> > @@ -385,6 +385,13 @@ brw_update_texture_surface(struct gl_context *ctx,
> > } else {
> >struct intel_texture_object *intel_obj =
> intel_texture_object(obj);
> >struct intel_mipmap_tree *mt = intel_obj->mt;
> > +
> > +  if (plane > 0) {
> > + if (mt->plane[plane - 1] == NULL)
> > +return;
> > + mt = mt->plane[plane - 1];
> > +  }
> > +
> >struct gl_sampler_object *sampler = _mesa_get_samplerobj(ctx,
> unit);
> >/* If this is a view with restricted NumLayers, then our
> effective depth
> > * is not just the miptree depth.
> > @@ -406,8 +413,9 @@ brw_update_texture_surface(struct gl_context *ctx,
> >const unsigned swizzle = (unlikely(alpha_depth) ? SWIZZLE_XYZW :
> >  brw_get_texture_swizzle(&brw->ctx,
> obj));
> >
> > -  unsigned format = translate_tex_format(
> > - brw, intel_obj->_Format, sampler->sRGBDecode);
> > +  mesa_format mesa_fmt = plane == 0 ? intel_obj->_Format :
> mt->format;
> > +  unsigned format = translate_tex_format(brw, mesa_fmt,
> > + sampler->sRGBDecode);
> >
> >/* Implement gen6 and gen7 gather work-around */
> >bool need_green_to_blue = false;
> > @@ -449,12 +457,6 @@ brw_update_texture_surface(struct gl_context *ctx,
> >   assert(brw->gen >= 8);
> >   mt = mt->stencil_mt;
> >   format = BRW_SURFACEFORMAT_R8_UINT;
> > -  } else if (obj->Target == GL_TEXTURE_EXTERNAL_OES) {
> > - if (plane > 0)
> > -mt = mt->plane[plane - 1];
> > - if (mt == NULL)
> > -return;
> > - format = translate_tex_format(brw, mt->format,
> sampler->sRGBDecode);
> >}
> >
> >const int surf_index = surf_offset - &brw->wm.base.surf_offset[0];
> > --
> > 2.5.0.400.gff86faf
> >
>
___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev


Re: [Mesa-dev] [RFC] gallium/u_queue: add barrier function

2016-07-18 Thread Marek Olšák
I think your issue is that you have self-releasing jobs with the
cleanup callback and you automatically lose fences that way, so there
is no way to wait for completion.

Since you have only 1 thread with N jobs at most, I suggest you keep
N+1 fences around (a ring of fences) that you reuse for new jobs and
keep a pointer to the most-recently-used fence. That way you know
which fence you need to wait on to make the whole queue idle.

Marek

On Mon, Jul 18, 2016 at 10:25 PM, Rob Clark  wrote:
> Helper to block until all previous jobs are complete.
> ---
> So I think this might end up being useful to me in some cases.. but
> the implementation only works for a single threaded queue (which is
> all I need).  I could also just put a helper in my driver code.
>
> Opinions?
>
>  src/gallium/auxiliary/util/u_queue.c | 12 
>  src/gallium/auxiliary/util/u_queue.h |  2 ++
>  2 files changed, 14 insertions(+)
>
> diff --git a/src/gallium/auxiliary/util/u_queue.c 
> b/src/gallium/auxiliary/util/u_queue.c
> index 838464f..861faca 100644
> --- a/src/gallium/auxiliary/util/u_queue.c
> +++ b/src/gallium/auxiliary/util/u_queue.c
> @@ -242,3 +242,15 @@ util_queue_add_job(struct util_queue *queue,
> pipe_condvar_signal(queue->has_queued_cond);
> pipe_mutex_unlock(queue->lock);
>  }
> +
> +static void dummy_execute(void *job, int thread_index) {}
> +
> +/* blocks until all previously queued jobs complete: */
> +void util_queue_barrier(struct util_queue *queue)
> +{
> +   struct util_queue_fence fence;
> +   util_queue_fence_init(&fence);
> +   util_queue_add_job(queue, &fence /*dummy*/, &fence, dummy_execute, NULL);
> +   util_queue_job_wait(&fence);
> +   util_queue_fence_destroy(&fence);
> +}
> diff --git a/src/gallium/auxiliary/util/u_queue.h 
> b/src/gallium/auxiliary/util/u_queue.h
> index 59646cc..8a22ee0 100644
> --- a/src/gallium/auxiliary/util/u_queue.h
> +++ b/src/gallium/auxiliary/util/u_queue.h
> @@ -85,6 +85,8 @@ void util_queue_add_job(struct util_queue *queue,
>
>  void util_queue_job_wait(struct util_queue_fence *fence);
>
> +void util_queue_barrier(struct util_queue *queue);
> +
>  /* util_queue needs to be cleared to zeroes for this to work */
>  static inline bool
>  util_queue_is_initialized(struct util_queue *queue)
> --
> 2.7.4
>
___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev


[Mesa-dev] [Bug 96979] Mesa 10.5.7 implementation error: Trying to disable permanently enabled extensions

2016-07-18 Thread bugzilla-daemon
https://bugs.freedesktop.org/show_bug.cgi?id=96979

--- Comment #3 from Ilia Mirkin  ---
(In reply to Kenneth Graunke from comment #2)
> This isn't something we want to support.
> 
> We should probably use fprintf rather than _mesa_problem so it doesn't print
> "Mesa:  implementation error" as it isn't an implementation issue -
> it's a warning to the user that the debug options they requested won't take
> effect because it's unsupported.

Actually the issue is that glxinfo hits a segfault later. I guess
glGetStringi() or GL_NUM_EXTENSIONS gets confused...

As I recall, at some point Brian explicitly added support for disabling
always-on exts. Not sure if that work was already in 10.5 or not... def worth
checking a more recent version of mesa -- 10.5 is long out of support.

-- 
You are receiving this mail because:
You are the assignee for the bug.
You are the QA Contact for the bug.___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev


Re: [Mesa-dev] [PATCH 06/11] vl/util: add copy func for yv12image to nv12surface

2016-07-18 Thread Zhang, Boyuan
Hi Andy,

I just submitted another patch set, most of the issues you reported are solved, 
please see the information below:

- Giving different frame rate should result different output size. The final 
result from my side is very close to the CBR I set. Please give a try with 
different frame rate and bit rate.

- Picture corruption (half height pic) is caused by interlaced setting. 
Interlace encoding is not supported. However, for transcoding case, VAAPI 
decode will use interlace mode, which will cause this issue. The temp solution 
is to use an Environmental Variable to disable interlace when doing 
transcoding. Please try the following command with the new patch:
DISABLE_INTERLACE=true gst-launch-1.0 filesrc 
location=~/big_buck_bunny_720p_1mb.mp4 ! qtdemux ! h264parse ! vaapidecode ! 
vaapih264enc ! filesink location=out.264

- I420 yuv -> nv12 case seems working fine on my side, can you please provide 
the testing raw file and command you were using? I want to reproduce the issue 
from my side and try to fix it if possible. Thanks a lot!


Hi Christian,

Besides fixing those issue listed above, I also modified the code based on your 
suggestions, e.g. adding mutex lock/unlock. Please take a look at the new patch 
set, and feel free to give any suggestions/comments. Thanks!

Regards,
Boyuan

-Original Message-
From: Christian König [mailto:deathsim...@vodafone.de]
Sent: July-18-16 10:15 AM
To: Zhang, Boyuan; mesa-dev@lists.freedesktop.org
Cc: adf.li...@gmail.com
Subject: Re: [PATCH 06/11] vl/util: add copy func for yv12image to nv12surface

Am 16.07.2016 um 00:41 schrieb Boyuan Zhang:
> Add function to copy from yv12 image to nv12 surface for VAAPI putimage call. 
> We need this function in VaPutImage call where copying from yv12 image to 
> nv12 surface for encoding. Existing function can't be used because it only 
> work for copying from yv12 surface to nv12 image in Vaapi.

I think we can keep the patches mostly as they are now, but I would like to get 
a bit more positive feedback from Andy and maybe others.

E.g. at least we should be able to encode something without crashing on Tonga 
and other hardware generations as well before we push it upstream.

Regards,
Christian.

>
> Signed-off-by: Boyuan Zhang 
> mailto:boyuan.zh...@amd.com>>
> ---
>   src/gallium/auxiliary/util/u_video.h | 23 +++
>   1 file changed, 23 insertions(+)
>
> diff --git a/src/gallium/auxiliary/util/u_video.h
> b/src/gallium/auxiliary/util/u_video.h
> index 9196afc..d147295 100644
> --- a/src/gallium/auxiliary/util/u_video.h
> +++ b/src/gallium/auxiliary/util/u_video.h
> @@ -130,6 +130,29 @@ u_copy_yv12_to_nv12(void *const *destination_data,
>   }
>
>   static inline void
> +u_copy_yv12_img_to_nv12_surf(uint8_t *const *src,
> + uint8_t *dest,
> + int *offset,
> + int field) {
> +   if (field == 0) {
> +  for (int i = 0; i < offset[1] ; i++)
> + dest[i] = src[field][i];
> +   } else if (field == 1) {
> +  bool odd = false;
> +  for (int i = 0; i < (offset[1]/2) ; i++){
> + if (odd == false) {
> +dest[i] = src[field][i/2];
> +odd = true;
> + } else {
> +dest[i] = src[field+1][i/2];
> +odd = false;
> + }
> +  }
> +   }
> +}
> +
> +static inline void
>   u_copy_swap422_packed(void *const *destination_data,
>  uint32_t const *destination_pitches,
>  int src_plane, int src_field,


___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev


Re: [Mesa-dev] [PATCH 2/2] i965: Fix shared atomic intrinsics to pay attention to base.

2016-07-18 Thread Ilia Mirkin
On Mon, Jul 18, 2016 at 6:49 PM, Kenneth Graunke  wrote:
> Signed-off-by: Kenneth Graunke 
> ---
>  src/mesa/drivers/dri/i965/brw_fs_nir.cpp | 15 +--
>  1 file changed, 13 insertions(+), 2 deletions(-)
>
> diff --git a/src/mesa/drivers/dri/i965/brw_fs_nir.cpp 
> b/src/mesa/drivers/dri/i965/brw_fs_nir.cpp
> index 6265dc6..a39c37e 100644
> --- a/src/mesa/drivers/dri/i965/brw_fs_nir.cpp
> +++ b/src/mesa/drivers/dri/i965/brw_fs_nir.cpp
> @@ -4177,13 +4177,24 @@ fs_visitor::nir_emit_shared_atomic(const fs_builder 
> &bld,
>dest = get_nir_dest(instr->dest);
>
> fs_reg surface = brw_imm_ud(GEN7_BTI_SLM);
> -   fs_reg offset = get_nir_src(instr->src[0]);
> +   fs_reg offset;
> fs_reg data1 = get_nir_src(instr->src[1]);
> fs_reg data2;
> if (op == BRW_AOP_CMPWR)
>data2 = get_nir_src(instr->src[2]);
>
> -   /* Emit the actual atomic operation operation */
> +   /* Get the offset */
> +   nir_const_value *const_offset = nir_src_as_const_value(instr->src[0]);
> +   if (const_offset) {
> +  offset = brw_imm_ud(instr->const_index[0] + const_offset->u32[0]);
> +   } else {
> +  offset = vgrf(glsl_type::uint_type);
> +  bld.ADD(offset,
> + retype(get_nir_src(instr->src[0]), BRW_REGISTER_TYPE_UD),
> + brw_imm_ud(instr->const_index[0]));
> +   }
> +
> +   /* Emit the actua atomic operation operation */

An l got lost...

>
> fs_reg atomic_result = emit_untyped_atomic(bld, surface, offset,
>data1, data2,
> --
> 2.9.0
>
> ___
> mesa-dev mailing list
> mesa-dev@lists.freedesktop.org
> https://lists.freedesktop.org/mailman/listinfo/mesa-dev
___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev


[Mesa-dev] [PATCH 1/2] nir: Add a base const_index to shared atomic intrinsics.

2016-07-18 Thread Kenneth Graunke
Commit 52e75dcb8c04c0dde989970c4c587cbe8313f7cf made nir_lower_io
start using nir_intrinsic_set_base instead of writing const_index[0]
directly.  However, those intrinsics apparently don't /have/ a base,
so this caused assert failures.

However, the old code was happily setting non-existent const_index
fields, so it was pretty bogus too.

Jason pointed out that load_shared and store_shared have a base,
and that the i965 driver uses that field.  So presumably atomics
should have one as well, so that loads/stores/atomics all refer
to variables with consistent addressing.

Signed-off-by: Kenneth Graunke 
---
 src/compiler/nir/nir_intrinsics.h | 20 ++--
 1 file changed, 10 insertions(+), 10 deletions(-)

diff --git a/src/compiler/nir/nir_intrinsics.h 
b/src/compiler/nir/nir_intrinsics.h
index 2f74555..b2ed67c 100644
--- a/src/compiler/nir/nir_intrinsics.h
+++ b/src/compiler/nir/nir_intrinsics.h
@@ -266,16 +266,16 @@ INTRINSIC(ssbo_atomic_comp_swap, 4, ARR(1, 1, 1, 1), 
true, 1, 0, 0, xx, xx, xx,
  *in shared_atomic_add, etc).
  * 2: For CompSwap only: the second data parameter.
  */
-INTRINSIC(shared_atomic_add, 2, ARR(1, 1), true, 1, 0, 0, xx, xx, xx, 0)
-INTRINSIC(shared_atomic_imin, 2, ARR(1, 1), true, 1, 0, 0, xx, xx, xx, 0)
-INTRINSIC(shared_atomic_umin, 2, ARR(1, 1), true, 1, 0, 0, xx, xx, xx, 0)
-INTRINSIC(shared_atomic_imax, 2, ARR(1, 1), true, 1, 0, 0, xx, xx, xx, 0)
-INTRINSIC(shared_atomic_umax, 2, ARR(1, 1), true, 1, 0, 0, xx, xx, xx, 0)
-INTRINSIC(shared_atomic_and, 2, ARR(1, 1), true, 1, 0, 0, xx, xx, xx, 0)
-INTRINSIC(shared_atomic_or, 2, ARR(1, 1), true, 1, 0, 0, xx, xx, xx, 0)
-INTRINSIC(shared_atomic_xor, 2, ARR(1, 1), true, 1, 0, 0, xx, xx, xx, 0)
-INTRINSIC(shared_atomic_exchange, 2, ARR(1, 1), true, 1, 0, 0, xx, xx, xx, 0)
-INTRINSIC(shared_atomic_comp_swap, 3, ARR(1, 1, 1), true, 1, 0, 0, xx, xx, xx, 
0)
+INTRINSIC(shared_atomic_add, 2, ARR(1, 1), true, 1, 0, 1, BASE, xx, xx, 0)
+INTRINSIC(shared_atomic_imin, 2, ARR(1, 1), true, 1, 0, 1, BASE, xx, xx, 0)
+INTRINSIC(shared_atomic_umin, 2, ARR(1, 1), true, 1, 0, 1, BASE, xx, xx, 0)
+INTRINSIC(shared_atomic_imax, 2, ARR(1, 1), true, 1, 0, 1, BASE, xx, xx, 0)
+INTRINSIC(shared_atomic_umax, 2, ARR(1, 1), true, 1, 0, 1, BASE, xx, xx, 0)
+INTRINSIC(shared_atomic_and, 2, ARR(1, 1), true, 1, 0, 1, BASE, xx, xx, 0)
+INTRINSIC(shared_atomic_or, 2, ARR(1, 1), true, 1, 0, 1, BASE, xx, xx, 0)
+INTRINSIC(shared_atomic_xor, 2, ARR(1, 1), true, 1, 0, 1, BASE, xx, xx, 0)
+INTRINSIC(shared_atomic_exchange, 2, ARR(1, 1), true, 1, 0, 1, BASE, xx, xx, 0)
+INTRINSIC(shared_atomic_comp_swap, 3, ARR(1, 1, 1), true, 1, 0, 1, BASE, xx, 
xx, 0)
 
 #define SYSTEM_VALUE(name, components, num_indices, idx0, idx1, idx2) \
INTRINSIC(load_##name, 0, ARR(0), true, components, 0, num_indices, \
-- 
2.9.0

___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev


[Mesa-dev] [PATCH 2/2] i965: Fix shared atomic intrinsics to pay attention to base.

2016-07-18 Thread Kenneth Graunke
Signed-off-by: Kenneth Graunke 
---
 src/mesa/drivers/dri/i965/brw_fs_nir.cpp | 15 +--
 1 file changed, 13 insertions(+), 2 deletions(-)

diff --git a/src/mesa/drivers/dri/i965/brw_fs_nir.cpp 
b/src/mesa/drivers/dri/i965/brw_fs_nir.cpp
index 6265dc6..a39c37e 100644
--- a/src/mesa/drivers/dri/i965/brw_fs_nir.cpp
+++ b/src/mesa/drivers/dri/i965/brw_fs_nir.cpp
@@ -4177,13 +4177,24 @@ fs_visitor::nir_emit_shared_atomic(const fs_builder 
&bld,
   dest = get_nir_dest(instr->dest);
 
fs_reg surface = brw_imm_ud(GEN7_BTI_SLM);
-   fs_reg offset = get_nir_src(instr->src[0]);
+   fs_reg offset;
fs_reg data1 = get_nir_src(instr->src[1]);
fs_reg data2;
if (op == BRW_AOP_CMPWR)
   data2 = get_nir_src(instr->src[2]);
 
-   /* Emit the actual atomic operation operation */
+   /* Get the offset */
+   nir_const_value *const_offset = nir_src_as_const_value(instr->src[0]);
+   if (const_offset) {
+  offset = brw_imm_ud(instr->const_index[0] + const_offset->u32[0]);
+   } else {
+  offset = vgrf(glsl_type::uint_type);
+  bld.ADD(offset,
+ retype(get_nir_src(instr->src[0]), BRW_REGISTER_TYPE_UD),
+ brw_imm_ud(instr->const_index[0]));
+   }
+
+   /* Emit the actua atomic operation operation */
 
fs_reg atomic_result = emit_untyped_atomic(bld, surface, offset,
   data1, data2,
-- 
2.9.0

___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev


[Mesa-dev] [PATCH 10/12] st/va: add preset values for VAAPI encode

2016-07-18 Thread Boyuan Zhang
Add some hardcoded values hardware needs mainly for rate control purpose. With 
previously hardcoded values for OMX, the rate control result is not correct. 
This change fixed the rate control result by setting correct values for Vaapi.

Signed-off-by: Boyuan Zhang 
---
 src/gallium/state_trackers/va/picture.c | 27 +++
 1 file changed, 27 insertions(+)

diff --git a/src/gallium/state_trackers/va/picture.c 
b/src/gallium/state_trackers/va/picture.c
index 4793194..518831f 100644
--- a/src/gallium/state_trackers/va/picture.c
+++ b/src/gallium/state_trackers/va/picture.c
@@ -95,6 +95,32 @@ vlVaGetReferenceFrame(vlVaDriver *drv, VASurfaceID 
surface_id,
   *ref_frame = NULL;
 }
 
+static void
+getEncParamPreset(vlVaContext *context)
+{
+   //motion estimation preset
+   context->desc.h264enc.motion_est.motion_est_quarter_pixel = 0x0001;
+   context->desc.h264enc.motion_est.lsmvert = 0x0002;
+   context->desc.h264enc.motion_est.enc_disable_sub_mode = 0x0078;
+   context->desc.h264enc.motion_est.enc_en_ime_overw_dis_subm = 0x0001;
+   context->desc.h264enc.motion_est.enc_ime_overw_dis_subm_no = 0x0001;
+   context->desc.h264enc.motion_est.enc_ime2_search_range_x = 0x0004;
+   context->desc.h264enc.motion_est.enc_ime2_search_range_y = 0x0004;
+
+   //pic control preset
+   context->desc.h264enc.pic_ctrl.enc_cabac_enable = 0x0001;
+   context->desc.h264enc.pic_ctrl.enc_constraint_set_flags = 0x0040;
+
+   //rate control
+   context->desc.h264enc.rate_ctrl.vbv_buffer_size = 2000;
+   context->desc.h264enc.rate_ctrl.vbv_buf_lv = 48;
+   context->desc.h264enc.rate_ctrl.fill_data_enable = 1;
+   context->desc.h264enc.rate_ctrl.enforce_hrd = 1;
+   context->desc.h264enc.enable_vui = false;
+
+   context->desc.h264enc.ref_pic_mode = 0x0201;
+}
+
 static VAStatus
 handlePictureParameterBuffer(vlVaDriver *drv, vlVaContext *context, vlVaBuffer 
*buf)
 {
@@ -521,6 +547,7 @@ vlVaEndPicture(VADriverContextP ctx, VAContextID context_id)
 
if (context->decoder->entrypoint == PIPE_VIDEO_ENTRYPOINT_ENCODE) {
   coded_buf = context->coded_buf;
+  getEncParamPreset(context);
   context->decoder->begin_frame(context->decoder, context->target, 
&context->desc.base);
   context->decoder->encode_bitstream(context->decoder, context->target,
  coded_buf->derived_surface.resource, 
&feedback);
-- 
2.7.4

___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev


[Mesa-dev] [PATCH 12/12] st/va: enable h264 VAAPI encode

2016-07-18 Thread Boyuan Zhang
Enable H.264 VAAPI encoding through config. Currently only H.264 baseline is 
supported.

Signed-off-by: Boyuan Zhang 
---
 src/gallium/state_trackers/va/config.c | 32 ++--
 1 file changed, 22 insertions(+), 10 deletions(-)

diff --git a/src/gallium/state_trackers/va/config.c 
b/src/gallium/state_trackers/va/config.c
index 6a36fb3..668c89d 100644
--- a/src/gallium/state_trackers/va/config.c
+++ b/src/gallium/state_trackers/va/config.c
@@ -74,6 +74,7 @@ vlVaQueryConfigEntrypoints(VADriverContextP ctx, VAProfile 
profile,
 {
struct pipe_screen *pscreen;
enum pipe_video_profile p;
+   int va_status = VA_STATUS_ERROR_UNSUPPORTED_PROFILE;
 
if (!ctx)
   return VA_STATUS_ERROR_INVALID_CONTEXT;
@@ -90,12 +91,18 @@ vlVaQueryConfigEntrypoints(VADriverContextP ctx, VAProfile 
profile,
   return VA_STATUS_ERROR_UNSUPPORTED_PROFILE;
 
pscreen = VL_VA_PSCREEN(ctx);
-   if (!pscreen->get_video_param(pscreen, p, PIPE_VIDEO_ENTRYPOINT_BITSTREAM, 
PIPE_VIDEO_CAP_SUPPORTED))
-  return VA_STATUS_ERROR_UNSUPPORTED_PROFILE;
-
-   entrypoint_list[(*num_entrypoints)++] = VAEntrypointVLD;
+   if (pscreen->get_video_param(pscreen, p, PIPE_VIDEO_ENTRYPOINT_BITSTREAM, 
PIPE_VIDEO_CAP_SUPPORTED)) {
+  entrypoint_list[(*num_entrypoints)++] = VAEntrypointVLD;
+  va_status = VA_STATUS_SUCCESS;
+   }
+   if (pscreen->get_video_param(pscreen, p, PIPE_VIDEO_ENTRYPOINT_ENCODE, 
PIPE_VIDEO_CAP_SUPPORTED) &&
+   p == PIPE_VIDEO_PROFILE_MPEG4_AVC_BASELINE) {
+  entrypoint_list[(*num_entrypoints)++] = VAEntrypointEncSlice;
+  entrypoint_list[(*num_entrypoints)++] = VAEntrypointEncPicture;
+  va_status = VA_STATUS_SUCCESS;
+   }
 
-   return VA_STATUS_SUCCESS;
+   return va_status;
 }
 
 VAStatus
@@ -114,7 +121,7 @@ vlVaGetConfigAttributes(VADriverContextP ctx, VAProfile 
profile, VAEntrypoint en
  value = VA_RT_FORMAT_YUV420;
  break;
   case VAConfigAttribRateControl:
- value = VA_RC_NONE;
+ value = VA_RC_CQP | VA_RC_CBR;
  break;
   default:
  value = VA_ATTRIB_NOT_SUPPORTED;
@@ -161,10 +168,15 @@ vlVaCreateConfig(VADriverContextP ctx, VAProfile profile, 
VAEntrypoint entrypoin
   return VA_STATUS_ERROR_UNSUPPORTED_PROFILE;
 
pscreen = VL_VA_PSCREEN(ctx);
-   if (!pscreen->get_video_param(pscreen, p, PIPE_VIDEO_ENTRYPOINT_BITSTREAM, 
PIPE_VIDEO_CAP_SUPPORTED))
-  return VA_STATUS_ERROR_UNSUPPORTED_PROFILE;
-
-   if (entrypoint != VAEntrypointVLD)
+   if (entrypoint == VAEntrypointVLD) {
+  if (!pscreen->get_video_param(pscreen, p, 
PIPE_VIDEO_ENTRYPOINT_BITSTREAM, PIPE_VIDEO_CAP_SUPPORTED))
+ return VA_STATUS_ERROR_UNSUPPORTED_PROFILE;
+   }
+   else if (entrypoint == VAEntrypointEncSlice) {
+  if (!pscreen->get_video_param(pscreen, p, PIPE_VIDEO_ENTRYPOINT_ENCODE, 
PIPE_VIDEO_CAP_SUPPORTED))
+ return VA_STATUS_ERROR_UNSUPPORTED_PROFILE;
+   }
+   else
   return VA_STATUS_ERROR_UNSUPPORTED_ENTRYPOINT;
 
if (entrypoint == VAEntrypointEncSlice || entrypoint == 
VAEntrypointEncPicture)
-- 
2.7.4

___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev


[Mesa-dev] [PATCH 11/12] st/va: add environmental variable to disable interlace

2016-07-18 Thread Boyuan Zhang
Add environmental variable to disable interlace mode. At VAAPI decoding stage, 
driver can not distinguish b/w pure decoding case and transcoding case. And 
since interlace encoding is not supported, we have to disable interlace for 
transcoding case. The temporary solution is to use enviromental variable to 
disable interlace mode.

Signed-off-by: Boyuan Zhang 
---
 src/gallium/state_trackers/va/surface.c | 4 
 1 file changed, 4 insertions(+)

diff --git a/src/gallium/state_trackers/va/surface.c 
b/src/gallium/state_trackers/va/surface.c
index 8ce4143..d1296b0 100644
--- a/src/gallium/state_trackers/va/surface.c
+++ b/src/gallium/state_trackers/va/surface.c
@@ -43,6 +43,8 @@
 
 #include "va_private.h"
 
+DEBUG_GET_ONCE_BOOL_OPTION(nointerlace, "DISABLE_INTERLACE", FALSE);
+
 #include 
 
 static const enum pipe_format vpp_surface_formats[] = {
@@ -620,6 +622,8 @@ vlVaCreateSurfaces2(VADriverContextP ctx, unsigned int 
format,
 
templat.width = width;
templat.height = height;
+   if (debug_get_option_nointerlace())
+  templat.interlaced = false;
 
memset(surfaces, VA_INVALID_ID, num_surfaces * sizeof(VASurfaceID));
 
-- 
2.7.4

___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev


[Mesa-dev] [PATCH 08/12] st/va: get rate control method from configattrib

2016-07-18 Thread Boyuan Zhang
Rate control method is passed from app to driver through config attrib list. 
That is why we need to store this rate control method to config. And later on, 
we will pass this value to context->desc.h264enc.rate_ctrl.rate_ctrl_method.

Signed-off-by: Boyuan Zhang 
---
 src/gallium/state_trackers/va/config.c | 11 +++
 src/gallium/state_trackers/va/context.c|  2 ++
 src/gallium/state_trackers/va/va_private.h |  1 +
 3 files changed, 14 insertions(+)

diff --git a/src/gallium/state_trackers/va/config.c 
b/src/gallium/state_trackers/va/config.c
index 7ea7e24..6a36fb3 100644
--- a/src/gallium/state_trackers/va/config.c
+++ b/src/gallium/state_trackers/va/config.c
@@ -174,6 +174,17 @@ vlVaCreateConfig(VADriverContextP ctx, VAProfile profile, 
VAEntrypoint entrypoin
 
config->profile = p;
 
+   for (int i = 0; i rc = PIPE_H264_ENC_RATE_CONTROL_METHOD_CONSTANT;
+ else if (attrib_list[i].value == VA_RC_VBR)
+config->rc = PIPE_H264_ENC_RATE_CONTROL_METHOD_VARIABLE;
+ else
+config->rc = PIPE_H264_ENC_RATE_CONTROL_METHOD_DISABLE;
+  }
+   }
+
pipe_mutex_lock(drv->mutex);
*config_id = handle_table_add(drv->htab, config);
pipe_mutex_unlock(drv->mutex);
diff --git a/src/gallium/state_trackers/va/context.c 
b/src/gallium/state_trackers/va/context.c
index 8882cba..65ba7db 100644
--- a/src/gallium/state_trackers/va/context.c
+++ b/src/gallium/state_trackers/va/context.c
@@ -276,6 +276,8 @@ vlVaCreateContext(VADriverContextP ctx, VAConfigID 
config_id, int picture_width,
 
context->desc.base.profile = config->profile;
context->desc.base.entry_point = config->entrypoint;
+   if (config->entrypoint == PIPE_VIDEO_ENTRYPOINT_ENCODE)
+  context->desc.h264enc.rate_ctrl.rate_ctrl_method = config->rc;
 
pipe_mutex_lock(drv->mutex);
*context_id = handle_table_add(drv->htab, context);
diff --git a/src/gallium/state_trackers/va/va_private.h 
b/src/gallium/state_trackers/va/va_private.h
index 723983d..ad9010a 100644
--- a/src/gallium/state_trackers/va/va_private.h
+++ b/src/gallium/state_trackers/va/va_private.h
@@ -246,6 +246,7 @@ typedef struct {
 typedef struct {
VAEntrypoint entrypoint;
enum pipe_video_profile profile;
+   enum pipe_h264_enc_rate_control_method rc;
 } vlVaConfig;
 
 typedef struct {
-- 
2.7.4

___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev


[Mesa-dev] [PATCH 06/12] vl/util: add copy func for yv12image to nv12surface

2016-07-18 Thread Boyuan Zhang
Add function to copy from yv12 image to nv12 surface for VAAPI putimage call. 
We need this function in VaPutImage call where copying from yv12 image to nv12 
surface for encoding. Existing function can't be used because it only work for 
copying from yv12 surface to nv12 image in Vaapi.

Signed-off-by: Boyuan Zhang 
---
 src/gallium/auxiliary/util/u_video.h | 23 +++
 1 file changed, 23 insertions(+)

diff --git a/src/gallium/auxiliary/util/u_video.h 
b/src/gallium/auxiliary/util/u_video.h
index 9196afc..d147295 100644
--- a/src/gallium/auxiliary/util/u_video.h
+++ b/src/gallium/auxiliary/util/u_video.h
@@ -130,6 +130,29 @@ u_copy_yv12_to_nv12(void *const *destination_data,
 }
 
 static inline void
+u_copy_yv12_img_to_nv12_surf(uint8_t *const *src,
+ uint8_t *dest,
+ int *offset,
+ int field)
+{
+   if (field == 0) {
+  for (int i = 0; i < offset[1] ; i++)
+ dest[i] = src[field][i];
+   } else if (field == 1) {
+  bool odd = false;
+  for (int i = 0; i < (offset[1]/2) ; i++){
+ if (odd == false) {
+dest[i] = src[field][i/2];
+odd = true;
+ } else {
+dest[i] = src[field+1][i/2];
+odd = false;
+ }
+  }
+   }
+}
+
+static inline void
 u_copy_swap422_packed(void *const *destination_data,
uint32_t const *destination_pitches,
int src_plane, int src_field,
-- 
2.7.4

___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev


[Mesa-dev] [PATCH 09/12] st/va: add functions for VAAPI encode

2016-07-18 Thread Boyuan Zhang
Add necessary functions/changes for VAAPI encoding to buffer and picture. These 
changes will allow driver to handle all Vaapi encode related operations. This 
patch doesn't change the Vaapi decode behaviour.

Signed-off-by: Boyuan Zhang 
---
 src/gallium/state_trackers/va/buffer.c |   6 +
 src/gallium/state_trackers/va/picture.c| 169 -
 src/gallium/state_trackers/va/va_private.h |   3 +
 3 files changed, 176 insertions(+), 2 deletions(-)

diff --git a/src/gallium/state_trackers/va/buffer.c 
b/src/gallium/state_trackers/va/buffer.c
index 7d3167b..dfcebbe 100644
--- a/src/gallium/state_trackers/va/buffer.c
+++ b/src/gallium/state_trackers/va/buffer.c
@@ -133,6 +133,12 @@ vlVaMapBuffer(VADriverContextP ctx, VABufferID buf_id, 
void **pbuff)
   if (!buf->derived_surface.transfer || !*pbuff)
  return VA_STATUS_ERROR_INVALID_BUFFER;
 
+  if (buf->type == VAEncCodedBufferType) {
+ ((VACodedBufferSegment*)buf->data)->buf = *pbuff;
+ ((VACodedBufferSegment*)buf->data)->size = buf->coded_size;
+ ((VACodedBufferSegment*)buf->data)->next = NULL;
+ *pbuff = buf->data;
+  }
} else {
   pipe_mutex_unlock(drv->mutex);
   *pbuff = buf->data;
diff --git a/src/gallium/state_trackers/va/picture.c 
b/src/gallium/state_trackers/va/picture.c
index 89ac024..4793194 100644
--- a/src/gallium/state_trackers/va/picture.c
+++ b/src/gallium/state_trackers/va/picture.c
@@ -78,7 +78,8 @@ vlVaBeginPicture(VADriverContextP ctx, VAContextID 
context_id, VASurfaceID rende
   return VA_STATUS_SUCCESS;
}
 
-   context->decoder->begin_frame(context->decoder, context->target, 
&context->desc.base);
+   if (context->decoder->entrypoint != PIPE_VIDEO_ENTRYPOINT_ENCODE)
+  context->decoder->begin_frame(context->decoder, context->target, 
&context->desc.base);
 
return VA_STATUS_SUCCESS;
 }
@@ -278,6 +279,139 @@ handleVASliceDataBufferType(vlVaContext *context, 
vlVaBuffer *buf)
   num_buffers, (const void * const*)buffers, sizes);
 }
 
+static VAStatus
+handleVAEncMiscParameterTypeRateControl(vlVaContext *context, 
VAEncMiscParameterBuffer *misc)
+{
+   VAEncMiscParameterRateControl *rc = (VAEncMiscParameterRateControl 
*)misc->data;
+   if (context->desc.h264enc.rate_ctrl.rate_ctrl_method ==
+   PIPE_H264_ENC_RATE_CONTROL_METHOD_CONSTANT)
+  context->desc.h264enc.rate_ctrl.target_bitrate = rc->bits_per_second;
+   else
+  context->desc.h264enc.rate_ctrl.target_bitrate = rc->bits_per_second * 
rc->target_percentage;
+   context->desc.h264enc.rate_ctrl.peak_bitrate = rc->bits_per_second;
+   if (context->desc.h264enc.rate_ctrl.target_bitrate < 200)
+  context->desc.h264enc.rate_ctrl.vbv_buffer_size = 
MIN2((context->desc.h264enc.rate_ctrl.target_bitrate * 2.75), 200);
+   else
+  context->desc.h264enc.rate_ctrl.vbv_buffer_size = 
context->desc.h264enc.rate_ctrl.target_bitrate;
+   context->desc.h264enc.rate_ctrl.target_bits_picture =
+  context->desc.h264enc.rate_ctrl.target_bitrate / 
context->desc.h264enc.rate_ctrl.frame_rate_num;
+   context->desc.h264enc.rate_ctrl.peak_bits_picture_integer =
+  context->desc.h264enc.rate_ctrl.peak_bitrate / 
context->desc.h264enc.rate_ctrl.frame_rate_num;
+   context->desc.h264enc.rate_ctrl.peak_bits_picture_fraction = 0;
+
+   return VA_STATUS_SUCCESS;
+}
+
+static VAStatus
+handleVAEncSequenceParameterBufferType(vlVaDriver *drv, vlVaContext *context, 
vlVaBuffer *buf)
+{
+   VAEncSequenceParameterBufferH264 *h264 = (VAEncSequenceParameterBufferH264 
*)buf->data;
+   if (!context->decoder) {
+  context->templat.max_references = h264->max_num_ref_frames;
+  context->templat.level = h264->level_idc;
+  context->decoder = drv->pipe->create_video_codec(drv->pipe, 
&context->templat);
+  if (!context->decoder)
+ return VA_STATUS_ERROR_ALLOCATION_FAILED;
+   }
+   context->desc.h264enc.gop_size = h264->intra_idr_period;
+   context->desc.h264enc.rate_ctrl.frame_rate_num = h264->time_scale / 2;
+   context->desc.h264enc.rate_ctrl.frame_rate_den = 1;
+   return VA_STATUS_SUCCESS;
+}
+
+static VAStatus
+handleVAEncMiscParameterBufferType(vlVaContext *context, vlVaBuffer *buf)
+{
+   VAStatus vaStatus = VA_STATUS_SUCCESS;
+   VAEncMiscParameterBuffer *misc;
+   misc = buf->data;
+
+   switch (misc->type) {
+   case VAEncMiscParameterTypeRateControl:
+  vaStatus = handleVAEncMiscParameterTypeRateControl(context, misc);
+  break;
+
+   default:
+  break;
+   }
+
+   return vaStatus;
+}
+
+static VAStatus
+handleVAEncPictureParameterBufferType(vlVaDriver *drv, vlVaContext *context, 
vlVaBuffer *buf)
+{
+   VAEncPictureParameterBufferH264 *h264;
+   vlVaBuffer *coded_buf;
+
+   h264 = buf->data;
+   context->desc.h264enc.frame_num = h264->frame_num;
+   context->desc.h264enc.not_referenced = false;
+   context->desc.h264enc.is_idr = (h264->pic_fields.bits.idr_pic_flag == 1);
+   context->desc.h264enc.pic_order_cnt = h264->CurrP

[Mesa-dev] [PATCH 07/12] st/va: add conversion for yv12 to nv12in putimage

2016-07-18 Thread Boyuan Zhang
For putimage call, if image format is yv12 (or IYUV with U V field swap) and 
surface format is nv12, then we need to convert yv12 to nv12 and then copy the 
converted data from image to surface. We can't use the existing logic where 
surface is destroyed and re-created with yv12 format.

Signed-off-by: Boyuan Zhang 
---
 src/gallium/state_trackers/va/image.c | 33 ++---
 1 file changed, 26 insertions(+), 7 deletions(-)

diff --git a/src/gallium/state_trackers/va/image.c 
b/src/gallium/state_trackers/va/image.c
index 1b956e3..47895ee 100644
--- a/src/gallium/state_trackers/va/image.c
+++ b/src/gallium/state_trackers/va/image.c
@@ -471,7 +471,9 @@ vlVaPutImage(VADriverContextP ctx, VASurfaceID surface, 
VAImageID image,
   return VA_STATUS_ERROR_OPERATION_FAILED;
}
 
-   if (format != surf->buffer->buffer_format) {
+   if ((format != surf->buffer->buffer_format) &&
+ ((format != PIPE_FORMAT_YV12) || (surf->buffer->buffer_format != 
PIPE_FORMAT_NV12)) &&
+ ((format != PIPE_FORMAT_IYUV) || (surf->buffer->buffer_format != 
PIPE_FORMAT_NV12))) {
   struct pipe_video_buffer *tmp_buf;
   struct pipe_video_buffer templat = surf->templat;
 
@@ -513,12 +515,29 @@ vlVaPutImage(VADriverContextP ctx, VASurfaceID surface, 
VAImageID image,
   unsigned width, height;
   if (!views[i]) continue;
   vlVaVideoSurfaceSize(surf, i, &width, &height);
-  for (j = 0; j < views[i]->texture->array_size; ++j) {
- struct pipe_box dst_box = {0, 0, j, width, height, 1};
- drv->pipe->transfer_inline_write(drv->pipe, views[i]->texture, 0,
-PIPE_TRANSFER_WRITE, &dst_box,
-data[i] + pitches[i] * j,
-pitches[i] * views[i]->texture->array_size, 0);
+  if ((format == PIPE_FORMAT_YV12) || (format == PIPE_FORMAT_IYUV) &&
+(surf->buffer->buffer_format == PIPE_FORMAT_NV12)) {
+ struct pipe_transfer *transfer = NULL;
+ uint8_t *map = NULL;
+ struct pipe_box dst_box_1 = {0, 0, 0, width, height, 1};
+ map = drv->pipe->transfer_map(drv->pipe,
+   views[i]->texture,
+   0,
+   PIPE_TRANSFER_DISCARD_RANGE,
+   &dst_box_1, &transfer);
+ if (map == NULL)
+return VA_STATUS_ERROR_OPERATION_FAILED;
+
+ u_copy_yv12_img_to_nv12_surf (data, map, vaimage->offsets, i);
+ pipe_transfer_unmap(drv->pipe, transfer);
+  } else {
+ for (j = 0; j < views[i]->texture->array_size; ++j) {
+struct pipe_box dst_box = {0, 0, j, width, height, 1};
+drv->pipe->transfer_inline_write(drv->pipe, views[i]->texture, 0,
+ PIPE_TRANSFER_WRITE, &dst_box,
+ data[i] + pitches[i] * j,
+ pitches[i] * 
views[i]->texture->array_size, 0);
+ }
   }
}
pipe_mutex_unlock(drv->mutex);
-- 
2.7.4

___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev


[Mesa-dev] [PATCH 05/12] st/va: add encode entrypoint

2016-07-18 Thread Boyuan Zhang
VAAPI passes PIPE_VIDEO_ENTRYPOINT_ENCODE as entry point for encoding case. We 
will save this encode entry point in config. config_id was used as profile 
previously. Now, config has both profile and entrypoint field, and config_id is 
used to get the config object. Later on, we pass this entrypoint to 
context->templat.entrypoint instead of always hardcoded to 
PIPE_VIDEO_ENTRYPOINT_BITSTREAM for decoding case previously.

Signed-off-by: Boyuan Zhang 
---
 src/gallium/state_trackers/va/config.c | 69 +++---
 src/gallium/state_trackers/va/context.c| 59 ++---
 src/gallium/state_trackers/va/surface.c| 14 --
 src/gallium/state_trackers/va/va_private.h |  5 +++
 4 files changed, 115 insertions(+), 32 deletions(-)

diff --git a/src/gallium/state_trackers/va/config.c 
b/src/gallium/state_trackers/va/config.c
index 9ca0aa8..7ea7e24 100644
--- a/src/gallium/state_trackers/va/config.c
+++ b/src/gallium/state_trackers/va/config.c
@@ -34,6 +34,8 @@
 
 #include "va_private.h"
 
+#include "util/u_handle_table.h"
+
 DEBUG_GET_ONCE_BOOL_OPTION(mpeg4, "VAAPI_MPEG4_ENABLED", false)
 
 VAStatus
@@ -128,14 +130,29 @@ VAStatus
 vlVaCreateConfig(VADriverContextP ctx, VAProfile profile, VAEntrypoint 
entrypoint,
  VAConfigAttrib *attrib_list, int num_attribs, VAConfigID 
*config_id)
 {
+   vlVaDriver *drv;
+   vlVaConfig *config;
struct pipe_screen *pscreen;
enum pipe_video_profile p;
 
if (!ctx)
   return VA_STATUS_ERROR_INVALID_CONTEXT;
 
+   drv = VL_VA_DRIVER(ctx);
+
+   if (!drv)
+  return VA_STATUS_ERROR_INVALID_CONTEXT;
+
+   config = CALLOC(1, sizeof(vlVaConfig));
+   if (!config)
+  return VA_STATUS_ERROR_ALLOCATION_FAILED;
+
if (profile == VAProfileNone && entrypoint == VAEntrypointVideoProc) {
-  *config_id = PIPE_VIDEO_PROFILE_UNKNOWN;
+  config->entrypoint = VAEntrypointVideoProc;
+  config->profile = PIPE_VIDEO_PROFILE_UNKNOWN;
+  pipe_mutex_lock(drv->mutex);
+  *config_id = handle_table_add(drv->htab, config);
+  pipe_mutex_unlock(drv->mutex);
   return VA_STATUS_SUCCESS;
}
 
@@ -150,7 +167,16 @@ vlVaCreateConfig(VADriverContextP ctx, VAProfile profile, 
VAEntrypoint entrypoin
if (entrypoint != VAEntrypointVLD)
   return VA_STATUS_ERROR_UNSUPPORTED_ENTRYPOINT;
 
-   *config_id = p;
+   if (entrypoint == VAEntrypointEncSlice || entrypoint == 
VAEntrypointEncPicture)
+  config->entrypoint = PIPE_VIDEO_ENTRYPOINT_ENCODE;
+   else
+  config->entrypoint = PIPE_VIDEO_ENTRYPOINT_BITSTREAM;
+
+   config->profile = p;
+
+   pipe_mutex_lock(drv->mutex);
+   *config_id = handle_table_add(drv->htab, config);
+   pipe_mutex_unlock(drv->mutex);
 
return VA_STATUS_SUCCESS;
 }
@@ -158,9 +184,27 @@ vlVaCreateConfig(VADriverContextP ctx, VAProfile profile, 
VAEntrypoint entrypoin
 VAStatus
 vlVaDestroyConfig(VADriverContextP ctx, VAConfigID config_id)
 {
+   vlVaDriver *drv;
+   vlVaConfig *config;
+
if (!ctx)
   return VA_STATUS_ERROR_INVALID_CONTEXT;
 
+   drv = VL_VA_DRIVER(ctx);
+
+   if (!drv)
+  return VA_STATUS_ERROR_INVALID_CONTEXT;
+
+   pipe_mutex_lock(drv->mutex);
+   config = handle_table_get(drv->htab, config_id);
+
+   if (!config)
+  return VA_STATUS_ERROR_INVALID_CONFIG;
+
+   FREE(config);
+   handle_table_remove(drv->htab, config_id);
+   pipe_mutex_unlock(drv->mutex);
+
return VA_STATUS_SUCCESS;
 }
 
@@ -168,18 +212,33 @@ VAStatus
 vlVaQueryConfigAttributes(VADriverContextP ctx, VAConfigID config_id, 
VAProfile *profile,
   VAEntrypoint *entrypoint, VAConfigAttrib 
*attrib_list, int *num_attribs)
 {
+   vlVaDriver *drv;
+   vlVaConfig *config;
+
if (!ctx)
   return VA_STATUS_ERROR_INVALID_CONTEXT;
 
-   *profile = PipeToProfile(config_id);
+   drv = VL_VA_DRIVER(ctx);
+
+   if (!drv)
+  return VA_STATUS_ERROR_INVALID_CONTEXT;
+
+   pipe_mutex_lock(drv->mutex);
+   config = handle_table_get(drv->htab, config_id);
+   pipe_mutex_unlock(drv->mutex);
+
+   if (!config)
+  return VA_STATUS_ERROR_INVALID_CONFIG;
+
+   *profile = PipeToProfile(config->profile);
 
-   if (config_id == PIPE_VIDEO_PROFILE_UNKNOWN) {
+   if (config->profile == PIPE_VIDEO_PROFILE_UNKNOWN) {
   *entrypoint = VAEntrypointVideoProc;
   *num_attribs = 0;
   return VA_STATUS_SUCCESS;
}
 
-   *entrypoint = VAEntrypointVLD;
+   *entrypoint = config->entrypoint;
 
*num_attribs = 1;
attrib_list[0].type = VAConfigAttribRTFormat;
diff --git a/src/gallium/state_trackers/va/context.c 
b/src/gallium/state_trackers/va/context.c
index 402fbb2..8882cba 100644
--- a/src/gallium/state_trackers/va/context.c
+++ b/src/gallium/state_trackers/va/context.c
@@ -195,18 +195,23 @@ vlVaCreateContext(VADriverContextP ctx, VAConfigID 
config_id, int picture_width,
 {
vlVaDriver *drv;
vlVaContext *context;
+   vlVaConfig *config;
int is_vpp;
 
if (!ctx)
   return VA_STATUS_ERROR_INVALID_CONTEXT;
 
-   is_vpp = confi

[Mesa-dev] [PATCH 02/12] vl: add entry point

2016-07-18 Thread Boyuan Zhang
Add entrypoint to distinguish H.264 decode and encode. For example, in patch 
5/11 when is calling "VaCreateContext", "pps" and "sps" shouldn't be allocated 
for H.264 encoding. So we need to use the entry_point to determine this is 
H.264 decode or H.264 encode. We can use config to determine the entrypoint 
since config_id is passed to us for VaCreateContext call. However, for 
VaDestoyContext call, only context_id is passed to us. So we need to know the 
entrypoint in order to not free the pps/sps for encoding case.

Signed-off-by: Boyuan Zhang 
---
 src/gallium/include/pipe/p_video_state.h | 1 +
 1 file changed, 1 insertion(+)

diff --git a/src/gallium/include/pipe/p_video_state.h 
b/src/gallium/include/pipe/p_video_state.h
index 754d013..39b3905 100644
--- a/src/gallium/include/pipe/p_video_state.h
+++ b/src/gallium/include/pipe/p_video_state.h
@@ -131,6 +131,7 @@ enum pipe_h264_enc_rate_control_method
 struct pipe_picture_desc
 {
enum pipe_video_profile profile;
+   enum pipe_video_entrypoint entry_point;
 };
 
 struct pipe_quant_matrix
-- 
2.7.4

___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev


[Mesa-dev] [Bug 96979] Mesa 10.5.7 implementation error: Trying to disable permanently enabled extensions

2016-07-18 Thread bugzilla-daemon
https://bugs.freedesktop.org/show_bug.cgi?id=96979

--- Comment #2 from Kenneth Graunke  ---
This isn't something we want to support.

We should probably use fprintf rather than _mesa_problem so it doesn't print
"Mesa:  implementation error" as it isn't an implementation issue -
it's a warning to the user that the debug options they requested won't take
effect because it's unsupported.

-- 
You are receiving this mail because:
You are the assignee for the bug.
You are the QA Contact for the bug.___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev


Re: [Mesa-dev] [PATCH RFC 2/8] nvc0: bind images for 3d/cp shaders on GM107+

2016-07-18 Thread Samuel Pitoiset



On 07/18/2016 11:13 PM, Ilia Mirkin wrote:

On Mon, Jul 18, 2016 at 4:55 PM, Samuel Pitoiset
 wrote:

On Maxwell, images binding is slightly different (and much better)
regarding Fermi and Kepler because a texture view needs to be uploaded
for each image and this is going to simplify the thing a lot.

Signed-off-by: Samuel Pitoiset 
---
 src/gallium/drivers/nouveau/nvc0/nvc0_context.c |   5 +-
 src/gallium/drivers/nouveau/nvc0/nvc0_context.h |   4 +
 src/gallium/drivers/nouveau/nvc0/nvc0_state.c   |  10 ++-
 src/gallium/drivers/nouveau/nvc0/nvc0_tex.c | 110 ++--
 src/gallium/drivers/nouveau/nvc0/nve4_compute.c |  91 ++--
 5 files changed, 202 insertions(+), 18 deletions(-)

diff --git a/src/gallium/drivers/nouveau/nvc0/nvc0_context.c 
b/src/gallium/drivers/nouveau/nvc0/nvc0_context.c
index 1137e6c..4bd240b 100644
--- a/src/gallium/drivers/nouveau/nvc0/nvc0_context.c
+++ b/src/gallium/drivers/nouveau/nvc0/nvc0_context.c
@@ -161,8 +161,11 @@ nvc0_context_unreference_resources(struct nvc0_context 
*nvc0)
   for (i = 0; i < NVC0_MAX_BUFFERS; ++i)
  pipe_resource_reference(&nvc0->buffers[s][i].buffer, NULL);

-  for (i = 0; i < NVC0_MAX_IMAGES; ++i)
+  for (i = 0; i < NVC0_MAX_IMAGES; ++i) {
  pipe_resource_reference(&nvc0->images[s][i].resource, NULL);
+ if (nvc0->screen->base.class_3d >= GM107_3D_CLASS)
+pipe_sampler_view_reference(&nvc0->images_tic[s][i], NULL);
+  }
}

for (s = 0; s < 2; ++s) {
diff --git a/src/gallium/drivers/nouveau/nvc0/nvc0_context.h 
b/src/gallium/drivers/nouveau/nvc0/nvc0_context.h
index 4b73ec3..1d9fca1 100644
--- a/src/gallium/drivers/nouveau/nvc0/nvc0_context.h
+++ b/src/gallium/drivers/nouveau/nvc0/nvc0_context.h
@@ -246,6 +246,7 @@ struct nvc0_context {
uint32_t buffers_valid[6];

struct pipe_image_view images[6][NVC0_MAX_IMAGES];
+   struct pipe_sampler_view *images_tic[6][NVC0_MAX_IMAGES]; /* GM107+ */
uint16_t images_dirty[6];
uint16_t images_valid[6];

@@ -349,6 +350,9 @@ struct pipe_sampler_view *
 nvc0_create_sampler_view(struct pipe_context *,
  struct pipe_resource *,
  const struct pipe_sampler_view *);
+struct pipe_sampler_view *
+gm107_create_texture_view_from_image(struct pipe_context *,
+ struct pipe_image_view *);

 /* nvc0_transfer.c */
 void
diff --git a/src/gallium/drivers/nouveau/nvc0/nvc0_state.c 
b/src/gallium/drivers/nouveau/nvc0/nvc0_state.c
index 441cfc9..98becf4 100644
--- a/src/gallium/drivers/nouveau/nvc0/nvc0_state.c
+++ b/src/gallium/drivers/nouveau/nvc0/nvc0_state.c
@@ -1303,8 +1303,16 @@ nvc0_bind_images_range(struct nvc0_context *nvc0, const 
unsigned s,
   mask = ((1 << nr) - 1) << start;
   if (!(nvc0->images_valid[s] & mask))
  return false;
-  for (i = start; i < end; ++i)
+  for (i = start; i < end; ++i) {
  pipe_resource_reference(&nvc0->images[s][i].resource, NULL);
+ if (nvc0->screen->base.class_3d >= GM107_3D_CLASS) {
+struct nv50_tic_entry *old = 
nv50_tic_entry(nvc0->images_tic[s][i]);
+if (old) {
+   nvc0_screen_tic_unlock(nvc0->screen, old);
+   pipe_sampler_view_reference(&nvc0->images_tic[s][i], NULL);
+}
+ }
+  }
   nvc0->images_valid[s] &= ~mask;
}
nvc0->images_dirty[s] |= mask;
diff --git a/src/gallium/drivers/nouveau/nvc0/nvc0_tex.c 
b/src/gallium/drivers/nouveau/nvc0/nvc0_tex.c
index 5f7bba8..efbaacf 100644
--- a/src/gallium/drivers/nouveau/nvc0/nvc0_tex.c
+++ b/src/gallium/drivers/nouveau/nvc0/nvc0_tex.c
@@ -236,6 +236,38 @@ gm107_create_texture_view(struct pipe_context *pipe,
return &view->pipe;
 }

+struct pipe_sampler_view *
+gm107_create_texture_view_from_image(struct pipe_context *pipe,
+ struct pipe_image_view *view)
+{
+   struct nv04_resource *res = nv04_resource(view->resource);
+   enum pipe_texture_target target = res->base.target;
+   struct pipe_sampler_view templ = {};
+   uint32_t flags = 0;
+
+   if (target == PIPE_TEXTURE_CUBE || target == PIPE_TEXTURE_CUBE_ARRAY)
+  target = PIPE_TEXTURE_2D_ARRAY;
+
+   templ.format = view->format;
+   templ.swizzle_r = PIPE_SWIZZLE_X;
+   templ.swizzle_g = PIPE_SWIZZLE_Y;
+   templ.swizzle_b = PIPE_SWIZZLE_Z;
+   templ.swizzle_a = PIPE_SWIZZLE_W;
+
+   if (target == PIPE_BUFFER) {
+  templ.u.buf.first_element = view->u.buf.first_element;
+  templ.u.buf.last_element = view->u.buf.last_element;
+   } else {
+  templ.u.tex.first_layer = view->u.tex.first_layer;
+  templ.u.tex.last_layer = view->u.tex.last_layer;
+  templ.u.tex.first_level = templ.u.tex.last_level = view->u.tex.level;
+   }
+
+   flags = res->base.last_level ? 0 : NV50_TEXVIEW_SCALED_COORDS;


I think you just want 0 here, always.


I thought too, but this will hit the assert at nvc0_tex.c:134 for image 
buffers. And this is loose

Re: [Mesa-dev] [PATCH] virgl: add exported dmabuf to BO hash table

2016-07-18 Thread Rob Herring
On Fri, Jun 17, 2016 at 5:25 PM, Rob Herring  wrote:
> Exported dmabufs can get imported by the same process, but the handle was
> not getting added to the hash table on export. Add the handle to the hash
> table on export.

Ping. Dave, can you please apply.

Rob

>
> Cc: Dave Airlie 
> Signed-off-by: Rob Herring 
> ---
>  src/gallium/winsys/virgl/drm/virgl_drm_winsys.c | 3 +++
>  1 file changed, 3 insertions(+)
>
> diff --git a/src/gallium/winsys/virgl/drm/virgl_drm_winsys.c 
> b/src/gallium/winsys/virgl/drm/virgl_drm_winsys.c
> index cbd416c..8336a33 100644
> --- a/src/gallium/winsys/virgl/drm/virgl_drm_winsys.c
> +++ b/src/gallium/winsys/virgl/drm/virgl_drm_winsys.c
> @@ -486,6 +486,9 @@ static boolean 
> virgl_drm_winsys_resource_get_handle(struct virgl_winsys *qws,
> } else if (whandle->type == DRM_API_HANDLE_TYPE_FD) {
>if (drmPrimeHandleToFD(qdws->fd, res->bo_handle, DRM_CLOEXEC, 
> (int*)&whandle->handle))
>  return FALSE;
> +  pipe_mutex_lock(qdws->bo_handles_mutex);
> +  util_hash_table_set(qdws->bo_handles, (void 
> *)(uintptr_t)res->bo_handle, res);
> +  pipe_mutex_unlock(qdws->bo_handles_mutex);
> }
> whandle->stride = stride;
> return TRUE;
> --
> 2.7.4
>
___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev


Re: [Mesa-dev] [PATCH] nvc0: add support for BGRA8 images

2016-07-18 Thread Samuel Pitoiset



On 07/18/2016 11:16 PM, Ilia Mirkin wrote:

Karol tested it on Kepler1, so we should be good.


Reviewed-by: Samuel Pitoiset 



On Mon, Jul 18, 2016 at 5:15 PM, Samuel Pitoiset
 wrote:

Works fine on Fermi, but still need to test on Kepler1.


On 07/16/2016 09:09 PM, Ilia Mirkin wrote:


This is useful for pbo downloads, which are now accelerated with images.
BGRA8 is a moderately common format to do that in.

Signed-off-by: Ilia Mirkin 
---

This needs testing on SM20 and SM30. I've tested it on SM35 and

bin/pbo-readpixels-small -auto

worked fine. (Didn't until I properly fixed the various items.)

 src/gallium/drivers/nouveau/codegen/nv50_ir.cpp   | 2 ++
 src/gallium/drivers/nouveau/codegen/nv50_ir.h | 3 +++
 src/gallium/drivers/nouveau/codegen/nv50_ir_from_tgsi.cpp | 2 ++
 src/gallium/drivers/nouveau/codegen/nv50_ir_lowering_nvc0.cpp | 4 
 src/gallium/drivers/nouveau/nv50/g80_defs.xml.h   | 1 +
 src/gallium/drivers/nouveau/nv50/nv50_formats.c   | 3 ++-
 src/gallium/drivers/nouveau/nvc0/nvc0_tex.c   | 2 ++
 7 files changed, 16 insertions(+), 1 deletion(-)

diff --git a/src/gallium/drivers/nouveau/codegen/nv50_ir.cpp
b/src/gallium/drivers/nouveau/codegen/nv50_ir.cpp
index 2caebe8..179ad0b 100644
--- a/src/gallium/drivers/nouveau/codegen/nv50_ir.cpp
+++ b/src/gallium/drivers/nouveau/codegen/nv50_ir.cpp
@@ -1012,6 +1012,8 @@ const struct TexInstruction::ImgFormatDesc
TexInstruction::formatTable[] =
{ "RG8_SNORM",2, {  8,  8,  0,  0 }, SNORM },
{ "R16_SNORM",1, { 16,  0,  0,  0 }, SNORM },
{ "R8_SNORM", 1, {  8,  0,  0,  0 }, SNORM },
+
+   { "BGRA8",4, {  8,  8,  8,  8 }, UNORM, true },
 };

 void
diff --git a/src/gallium/drivers/nouveau/codegen/nv50_ir.h
b/src/gallium/drivers/nouveau/codegen/nv50_ir.h
index 41804b6..6d2ee8b 100644
--- a/src/gallium/drivers/nouveau/codegen/nv50_ir.h
+++ b/src/gallium/drivers/nouveau/codegen/nv50_ir.h
@@ -412,6 +412,8 @@ enum ImgFormat
FMT_R16_SNORM,
FMT_R8_SNORM,

+   FMT_BGRA8,
+
IMG_FORMAT_COUNT,
 };

@@ -967,6 +969,7 @@ public:
   uint8_t components;
   uint8_t bits[4];
   ImgType type;
+  bool bgra;
};

static const struct ImgFormatDesc formatTable[IMG_FORMAT_COUNT];
diff --git a/src/gallium/drivers/nouveau/codegen/nv50_ir_from_tgsi.cpp
b/src/gallium/drivers/nouveau/codegen/nv50_ir_from_tgsi.cpp
index 61eb7f5..7dff08a 100644
--- a/src/gallium/drivers/nouveau/codegen/nv50_ir_from_tgsi.cpp
+++ b/src/gallium/drivers/nouveau/codegen/nv50_ir_from_tgsi.cpp
@@ -508,6 +508,8 @@ static nv50_ir::ImgFormat translateImgFormat(uint
format)
FMT_CASE(R8G8_SNORM, RG8_SNORM);
FMT_CASE(R16_SNORM, R16_SNORM);
FMT_CASE(R8_SNORM, R8_SNORM);
+
+   FMT_CASE(B8G8R8A8_UNORM, BGRA8);
}

assert(!"Unexpected format");
diff --git a/src/gallium/drivers/nouveau/codegen/nv50_ir_lowering_nvc0.cpp
b/src/gallium/drivers/nouveau/codegen/nv50_ir_lowering_nvc0.cpp
index 18955eb..92bc0bb 100644
--- a/src/gallium/drivers/nouveau/codegen/nv50_ir_lowering_nvc0.cpp
+++ b/src/gallium/drivers/nouveau/codegen/nv50_ir_lowering_nvc0.cpp
@@ -1940,6 +1940,10 @@
NVC0LoweringPass::convertSurfaceFormat(TexInstruction *su)
  bld.mkCvt(OP_CVT, TYPE_F32, typedDst[i], TYPE_F16, typedDst[i]);
   }
}
+
+   if (format->bgra) {
+  std::swap(typedDst[0], typedDst[2]);
+   }
 }

 void
diff --git a/src/gallium/drivers/nouveau/nv50/g80_defs.xml.h
b/src/gallium/drivers/nouveau/nv50/g80_defs.xml.h
index 5d40624..49bf860 100644
--- a/src/gallium/drivers/nouveau/nv50/g80_defs.xml.h
+++ b/src/gallium/drivers/nouveau/nv50/g80_defs.xml.h
@@ -177,6 +177,7 @@ WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
SOFTWARE.
 #define GK104_IMAGE_FORMAT_RG32_FLOAT  0x000d
 #define GK104_IMAGE_FORMAT_RG32_SINT   0x000e
 #define GK104_IMAGE_FORMAT_RG32_UINT   0x000f
+#define GK104_IMAGE_FORMAT_BGRA8_UNORM 0x0011
 #define GK104_IMAGE_FORMAT_RGB10_A2_UNORM  0x0013
 #define GK104_IMAGE_FORMAT_RGB10_A2_UINT   0x0015
 #define GK104_IMAGE_FORMAT_RGBA8_UNORM 0x0018
diff --git a/src/gallium/drivers/nouveau/nv50/nv50_formats.c
b/src/gallium/drivers/nouveau/nv50/nv50_formats.c
index 34d32d1..07c4419 100644
--- a/src/gallium/drivers/nouveau/nv50/nv50_formats.c
+++ b/src/gallium/drivers/nouveau/nv50/nv50_formats.c
@@ -50,6 +50,7 @@
 #define U_IB  PIPE_BIND_BLENDABLE | U_IR
 #define U_TD  PIPE_BIND_SCANOUT | PIPE_BIND_DISPLAY_TARGET | U_TB
 #define U_TZ  PIPE_BIND_DEPTH_STENCIL | U_T
+#define U_ID  U_TD | U_I
 #if NOUVEAU_DRIVER == 0xc0
 # define U_TC  U_TB
 # define U_IC  U_IB
@@ -122,7 +123,7 @@ const struct nvc0_format
nvc0_format_table[PIPE_FORMAT_COUNT] =
 const struct nv50_format nv50_format_table[PIPE_FORMAT_COUNT] =
 #endif
 {
-   C4(A, B8G8R8A8_UNORM, BGRA8_UNORM, B, G, R, A, UNORM

Re: [Mesa-dev] [PATCH] nvc0: add support for BGRA8 images

2016-07-18 Thread Ilia Mirkin
Karol tested it on Kepler1, so we should be good.

On Mon, Jul 18, 2016 at 5:15 PM, Samuel Pitoiset
 wrote:
> Works fine on Fermi, but still need to test on Kepler1.
>
>
> On 07/16/2016 09:09 PM, Ilia Mirkin wrote:
>>
>> This is useful for pbo downloads, which are now accelerated with images.
>> BGRA8 is a moderately common format to do that in.
>>
>> Signed-off-by: Ilia Mirkin 
>> ---
>>
>> This needs testing on SM20 and SM30. I've tested it on SM35 and
>>
>> bin/pbo-readpixels-small -auto
>>
>> worked fine. (Didn't until I properly fixed the various items.)
>>
>>  src/gallium/drivers/nouveau/codegen/nv50_ir.cpp   | 2 ++
>>  src/gallium/drivers/nouveau/codegen/nv50_ir.h | 3 +++
>>  src/gallium/drivers/nouveau/codegen/nv50_ir_from_tgsi.cpp | 2 ++
>>  src/gallium/drivers/nouveau/codegen/nv50_ir_lowering_nvc0.cpp | 4 
>>  src/gallium/drivers/nouveau/nv50/g80_defs.xml.h   | 1 +
>>  src/gallium/drivers/nouveau/nv50/nv50_formats.c   | 3 ++-
>>  src/gallium/drivers/nouveau/nvc0/nvc0_tex.c   | 2 ++
>>  7 files changed, 16 insertions(+), 1 deletion(-)
>>
>> diff --git a/src/gallium/drivers/nouveau/codegen/nv50_ir.cpp
>> b/src/gallium/drivers/nouveau/codegen/nv50_ir.cpp
>> index 2caebe8..179ad0b 100644
>> --- a/src/gallium/drivers/nouveau/codegen/nv50_ir.cpp
>> +++ b/src/gallium/drivers/nouveau/codegen/nv50_ir.cpp
>> @@ -1012,6 +1012,8 @@ const struct TexInstruction::ImgFormatDesc
>> TexInstruction::formatTable[] =
>> { "RG8_SNORM",2, {  8,  8,  0,  0 }, SNORM },
>> { "R16_SNORM",1, { 16,  0,  0,  0 }, SNORM },
>> { "R8_SNORM", 1, {  8,  0,  0,  0 }, SNORM },
>> +
>> +   { "BGRA8",4, {  8,  8,  8,  8 }, UNORM, true },
>>  };
>>
>>  void
>> diff --git a/src/gallium/drivers/nouveau/codegen/nv50_ir.h
>> b/src/gallium/drivers/nouveau/codegen/nv50_ir.h
>> index 41804b6..6d2ee8b 100644
>> --- a/src/gallium/drivers/nouveau/codegen/nv50_ir.h
>> +++ b/src/gallium/drivers/nouveau/codegen/nv50_ir.h
>> @@ -412,6 +412,8 @@ enum ImgFormat
>> FMT_R16_SNORM,
>> FMT_R8_SNORM,
>>
>> +   FMT_BGRA8,
>> +
>> IMG_FORMAT_COUNT,
>>  };
>>
>> @@ -967,6 +969,7 @@ public:
>>uint8_t components;
>>uint8_t bits[4];
>>ImgType type;
>> +  bool bgra;
>> };
>>
>> static const struct ImgFormatDesc formatTable[IMG_FORMAT_COUNT];
>> diff --git a/src/gallium/drivers/nouveau/codegen/nv50_ir_from_tgsi.cpp
>> b/src/gallium/drivers/nouveau/codegen/nv50_ir_from_tgsi.cpp
>> index 61eb7f5..7dff08a 100644
>> --- a/src/gallium/drivers/nouveau/codegen/nv50_ir_from_tgsi.cpp
>> +++ b/src/gallium/drivers/nouveau/codegen/nv50_ir_from_tgsi.cpp
>> @@ -508,6 +508,8 @@ static nv50_ir::ImgFormat translateImgFormat(uint
>> format)
>> FMT_CASE(R8G8_SNORM, RG8_SNORM);
>> FMT_CASE(R16_SNORM, R16_SNORM);
>> FMT_CASE(R8_SNORM, R8_SNORM);
>> +
>> +   FMT_CASE(B8G8R8A8_UNORM, BGRA8);
>> }
>>
>> assert(!"Unexpected format");
>> diff --git a/src/gallium/drivers/nouveau/codegen/nv50_ir_lowering_nvc0.cpp
>> b/src/gallium/drivers/nouveau/codegen/nv50_ir_lowering_nvc0.cpp
>> index 18955eb..92bc0bb 100644
>> --- a/src/gallium/drivers/nouveau/codegen/nv50_ir_lowering_nvc0.cpp
>> +++ b/src/gallium/drivers/nouveau/codegen/nv50_ir_lowering_nvc0.cpp
>> @@ -1940,6 +1940,10 @@
>> NVC0LoweringPass::convertSurfaceFormat(TexInstruction *su)
>>   bld.mkCvt(OP_CVT, TYPE_F32, typedDst[i], TYPE_F16, typedDst[i]);
>>}
>> }
>> +
>> +   if (format->bgra) {
>> +  std::swap(typedDst[0], typedDst[2]);
>> +   }
>>  }
>>
>>  void
>> diff --git a/src/gallium/drivers/nouveau/nv50/g80_defs.xml.h
>> b/src/gallium/drivers/nouveau/nv50/g80_defs.xml.h
>> index 5d40624..49bf860 100644
>> --- a/src/gallium/drivers/nouveau/nv50/g80_defs.xml.h
>> +++ b/src/gallium/drivers/nouveau/nv50/g80_defs.xml.h
>> @@ -177,6 +177,7 @@ WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
>> SOFTWARE.
>>  #define GK104_IMAGE_FORMAT_RG32_FLOAT  0x000d
>>  #define GK104_IMAGE_FORMAT_RG32_SINT   0x000e
>>  #define GK104_IMAGE_FORMAT_RG32_UINT   0x000f
>> +#define GK104_IMAGE_FORMAT_BGRA8_UNORM 0x0011
>>  #define GK104_IMAGE_FORMAT_RGB10_A2_UNORM  0x0013
>>  #define GK104_IMAGE_FORMAT_RGB10_A2_UINT   0x0015
>>  #define GK104_IMAGE_FORMAT_RGBA8_UNORM 0x0018
>> diff --git a/src/gallium/drivers/nouveau/nv50/nv50_formats.c
>> b/src/gallium/drivers/nouveau/nv50/nv50_formats.c
>> index 34d32d1..07c4419 100644
>> --- a/src/gallium/drivers/nouveau/nv50/nv50_formats.c
>> +++ b/src/gallium/drivers/nouveau/nv50/nv50_formats.c
>> @@ -50,6 +50,7 @@
>>  #define U_IB  PIPE_BIND_BLENDABLE | U_IR
>>  #define U_TD  PIPE_BIND_SCANOUT | PIPE_BIND_DISPLAY_TARGET | U_TB
>>  #define U_TZ  PIPE_BIND_DEPTH_STENCIL | U_T
>> +#define U_ID  U_TD | U_I
>>  #if NOUVEAU_DRIVER == 0xc0

Re: [Mesa-dev] [PATCH] nvc0: add support for BGRA8 images

2016-07-18 Thread Samuel Pitoiset

Works fine on Fermi, but still need to test on Kepler1.

On 07/16/2016 09:09 PM, Ilia Mirkin wrote:

This is useful for pbo downloads, which are now accelerated with images.
BGRA8 is a moderately common format to do that in.

Signed-off-by: Ilia Mirkin 
---

This needs testing on SM20 and SM30. I've tested it on SM35 and

bin/pbo-readpixels-small -auto

worked fine. (Didn't until I properly fixed the various items.)

 src/gallium/drivers/nouveau/codegen/nv50_ir.cpp   | 2 ++
 src/gallium/drivers/nouveau/codegen/nv50_ir.h | 3 +++
 src/gallium/drivers/nouveau/codegen/nv50_ir_from_tgsi.cpp | 2 ++
 src/gallium/drivers/nouveau/codegen/nv50_ir_lowering_nvc0.cpp | 4 
 src/gallium/drivers/nouveau/nv50/g80_defs.xml.h   | 1 +
 src/gallium/drivers/nouveau/nv50/nv50_formats.c   | 3 ++-
 src/gallium/drivers/nouveau/nvc0/nvc0_tex.c   | 2 ++
 7 files changed, 16 insertions(+), 1 deletion(-)

diff --git a/src/gallium/drivers/nouveau/codegen/nv50_ir.cpp 
b/src/gallium/drivers/nouveau/codegen/nv50_ir.cpp
index 2caebe8..179ad0b 100644
--- a/src/gallium/drivers/nouveau/codegen/nv50_ir.cpp
+++ b/src/gallium/drivers/nouveau/codegen/nv50_ir.cpp
@@ -1012,6 +1012,8 @@ const struct TexInstruction::ImgFormatDesc 
TexInstruction::formatTable[] =
{ "RG8_SNORM",2, {  8,  8,  0,  0 }, SNORM },
{ "R16_SNORM",1, { 16,  0,  0,  0 }, SNORM },
{ "R8_SNORM", 1, {  8,  0,  0,  0 }, SNORM },
+
+   { "BGRA8",4, {  8,  8,  8,  8 }, UNORM, true },
 };

 void
diff --git a/src/gallium/drivers/nouveau/codegen/nv50_ir.h 
b/src/gallium/drivers/nouveau/codegen/nv50_ir.h
index 41804b6..6d2ee8b 100644
--- a/src/gallium/drivers/nouveau/codegen/nv50_ir.h
+++ b/src/gallium/drivers/nouveau/codegen/nv50_ir.h
@@ -412,6 +412,8 @@ enum ImgFormat
FMT_R16_SNORM,
FMT_R8_SNORM,

+   FMT_BGRA8,
+
IMG_FORMAT_COUNT,
 };

@@ -967,6 +969,7 @@ public:
   uint8_t components;
   uint8_t bits[4];
   ImgType type;
+  bool bgra;
};

static const struct ImgFormatDesc formatTable[IMG_FORMAT_COUNT];
diff --git a/src/gallium/drivers/nouveau/codegen/nv50_ir_from_tgsi.cpp 
b/src/gallium/drivers/nouveau/codegen/nv50_ir_from_tgsi.cpp
index 61eb7f5..7dff08a 100644
--- a/src/gallium/drivers/nouveau/codegen/nv50_ir_from_tgsi.cpp
+++ b/src/gallium/drivers/nouveau/codegen/nv50_ir_from_tgsi.cpp
@@ -508,6 +508,8 @@ static nv50_ir::ImgFormat translateImgFormat(uint format)
FMT_CASE(R8G8_SNORM, RG8_SNORM);
FMT_CASE(R16_SNORM, R16_SNORM);
FMT_CASE(R8_SNORM, R8_SNORM);
+
+   FMT_CASE(B8G8R8A8_UNORM, BGRA8);
}

assert(!"Unexpected format");
diff --git a/src/gallium/drivers/nouveau/codegen/nv50_ir_lowering_nvc0.cpp 
b/src/gallium/drivers/nouveau/codegen/nv50_ir_lowering_nvc0.cpp
index 18955eb..92bc0bb 100644
--- a/src/gallium/drivers/nouveau/codegen/nv50_ir_lowering_nvc0.cpp
+++ b/src/gallium/drivers/nouveau/codegen/nv50_ir_lowering_nvc0.cpp
@@ -1940,6 +1940,10 @@ NVC0LoweringPass::convertSurfaceFormat(TexInstruction 
*su)
  bld.mkCvt(OP_CVT, TYPE_F32, typedDst[i], TYPE_F16, typedDst[i]);
   }
}
+
+   if (format->bgra) {
+  std::swap(typedDst[0], typedDst[2]);
+   }
 }

 void
diff --git a/src/gallium/drivers/nouveau/nv50/g80_defs.xml.h 
b/src/gallium/drivers/nouveau/nv50/g80_defs.xml.h
index 5d40624..49bf860 100644
--- a/src/gallium/drivers/nouveau/nv50/g80_defs.xml.h
+++ b/src/gallium/drivers/nouveau/nv50/g80_defs.xml.h
@@ -177,6 +177,7 @@ WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 
SOFTWARE.
 #define GK104_IMAGE_FORMAT_RG32_FLOAT  0x000d
 #define GK104_IMAGE_FORMAT_RG32_SINT   0x000e
 #define GK104_IMAGE_FORMAT_RG32_UINT   0x000f
+#define GK104_IMAGE_FORMAT_BGRA8_UNORM 0x0011
 #define GK104_IMAGE_FORMAT_RGB10_A2_UNORM  0x0013
 #define GK104_IMAGE_FORMAT_RGB10_A2_UINT   0x0015
 #define GK104_IMAGE_FORMAT_RGBA8_UNORM 0x0018
diff --git a/src/gallium/drivers/nouveau/nv50/nv50_formats.c 
b/src/gallium/drivers/nouveau/nv50/nv50_formats.c
index 34d32d1..07c4419 100644
--- a/src/gallium/drivers/nouveau/nv50/nv50_formats.c
+++ b/src/gallium/drivers/nouveau/nv50/nv50_formats.c
@@ -50,6 +50,7 @@
 #define U_IB  PIPE_BIND_BLENDABLE | U_IR
 #define U_TD  PIPE_BIND_SCANOUT | PIPE_BIND_DISPLAY_TARGET | U_TB
 #define U_TZ  PIPE_BIND_DEPTH_STENCIL | U_T
+#define U_ID  U_TD | U_I
 #if NOUVEAU_DRIVER == 0xc0
 # define U_TC  U_TB
 # define U_IC  U_IB
@@ -122,7 +123,7 @@ const struct nvc0_format 
nvc0_format_table[PIPE_FORMAT_COUNT] =
 const struct nv50_format nv50_format_table[PIPE_FORMAT_COUNT] =
 #endif
 {
-   C4(A, B8G8R8A8_UNORM, BGRA8_UNORM, B, G, R, A, UNORM, A8B8G8R8, TD),
+   C4(A, B8G8R8A8_UNORM, BGRA8_UNORM, B, G, R, A, UNORM, A8B8G8R8, ID),
F3(A, B8G8R8X8_UNORM, BGRX8_UNORM, B, G, R, xx, UNORM, A8B8G8R8, TD),
C4(A, B8G8R8A

Re: [Mesa-dev] [PATCH RFC 2/8] nvc0: bind images for 3d/cp shaders on GM107+

2016-07-18 Thread Ilia Mirkin
On Mon, Jul 18, 2016 at 4:55 PM, Samuel Pitoiset
 wrote:
> On Maxwell, images binding is slightly different (and much better)
> regarding Fermi and Kepler because a texture view needs to be uploaded
> for each image and this is going to simplify the thing a lot.
>
> Signed-off-by: Samuel Pitoiset 
> ---
>  src/gallium/drivers/nouveau/nvc0/nvc0_context.c |   5 +-
>  src/gallium/drivers/nouveau/nvc0/nvc0_context.h |   4 +
>  src/gallium/drivers/nouveau/nvc0/nvc0_state.c   |  10 ++-
>  src/gallium/drivers/nouveau/nvc0/nvc0_tex.c | 110 
> ++--
>  src/gallium/drivers/nouveau/nvc0/nve4_compute.c |  91 ++--
>  5 files changed, 202 insertions(+), 18 deletions(-)
>
> diff --git a/src/gallium/drivers/nouveau/nvc0/nvc0_context.c 
> b/src/gallium/drivers/nouveau/nvc0/nvc0_context.c
> index 1137e6c..4bd240b 100644
> --- a/src/gallium/drivers/nouveau/nvc0/nvc0_context.c
> +++ b/src/gallium/drivers/nouveau/nvc0/nvc0_context.c
> @@ -161,8 +161,11 @@ nvc0_context_unreference_resources(struct nvc0_context 
> *nvc0)
>for (i = 0; i < NVC0_MAX_BUFFERS; ++i)
>   pipe_resource_reference(&nvc0->buffers[s][i].buffer, NULL);
>
> -  for (i = 0; i < NVC0_MAX_IMAGES; ++i)
> +  for (i = 0; i < NVC0_MAX_IMAGES; ++i) {
>   pipe_resource_reference(&nvc0->images[s][i].resource, NULL);
> + if (nvc0->screen->base.class_3d >= GM107_3D_CLASS)
> +pipe_sampler_view_reference(&nvc0->images_tic[s][i], NULL);
> +  }
> }
>
> for (s = 0; s < 2; ++s) {
> diff --git a/src/gallium/drivers/nouveau/nvc0/nvc0_context.h 
> b/src/gallium/drivers/nouveau/nvc0/nvc0_context.h
> index 4b73ec3..1d9fca1 100644
> --- a/src/gallium/drivers/nouveau/nvc0/nvc0_context.h
> +++ b/src/gallium/drivers/nouveau/nvc0/nvc0_context.h
> @@ -246,6 +246,7 @@ struct nvc0_context {
> uint32_t buffers_valid[6];
>
> struct pipe_image_view images[6][NVC0_MAX_IMAGES];
> +   struct pipe_sampler_view *images_tic[6][NVC0_MAX_IMAGES]; /* GM107+ */
> uint16_t images_dirty[6];
> uint16_t images_valid[6];
>
> @@ -349,6 +350,9 @@ struct pipe_sampler_view *
>  nvc0_create_sampler_view(struct pipe_context *,
>   struct pipe_resource *,
>   const struct pipe_sampler_view *);
> +struct pipe_sampler_view *
> +gm107_create_texture_view_from_image(struct pipe_context *,
> + struct pipe_image_view *);
>
>  /* nvc0_transfer.c */
>  void
> diff --git a/src/gallium/drivers/nouveau/nvc0/nvc0_state.c 
> b/src/gallium/drivers/nouveau/nvc0/nvc0_state.c
> index 441cfc9..98becf4 100644
> --- a/src/gallium/drivers/nouveau/nvc0/nvc0_state.c
> +++ b/src/gallium/drivers/nouveau/nvc0/nvc0_state.c
> @@ -1303,8 +1303,16 @@ nvc0_bind_images_range(struct nvc0_context *nvc0, 
> const unsigned s,
>mask = ((1 << nr) - 1) << start;
>if (!(nvc0->images_valid[s] & mask))
>   return false;
> -  for (i = start; i < end; ++i)
> +  for (i = start; i < end; ++i) {
>   pipe_resource_reference(&nvc0->images[s][i].resource, NULL);
> + if (nvc0->screen->base.class_3d >= GM107_3D_CLASS) {
> +struct nv50_tic_entry *old = 
> nv50_tic_entry(nvc0->images_tic[s][i]);
> +if (old) {
> +   nvc0_screen_tic_unlock(nvc0->screen, old);
> +   pipe_sampler_view_reference(&nvc0->images_tic[s][i], NULL);
> +}
> + }
> +  }
>nvc0->images_valid[s] &= ~mask;
> }
> nvc0->images_dirty[s] |= mask;
> diff --git a/src/gallium/drivers/nouveau/nvc0/nvc0_tex.c 
> b/src/gallium/drivers/nouveau/nvc0/nvc0_tex.c
> index 5f7bba8..efbaacf 100644
> --- a/src/gallium/drivers/nouveau/nvc0/nvc0_tex.c
> +++ b/src/gallium/drivers/nouveau/nvc0/nvc0_tex.c
> @@ -236,6 +236,38 @@ gm107_create_texture_view(struct pipe_context *pipe,
> return &view->pipe;
>  }
>
> +struct pipe_sampler_view *
> +gm107_create_texture_view_from_image(struct pipe_context *pipe,
> + struct pipe_image_view *view)
> +{
> +   struct nv04_resource *res = nv04_resource(view->resource);
> +   enum pipe_texture_target target = res->base.target;
> +   struct pipe_sampler_view templ = {};
> +   uint32_t flags = 0;
> +
> +   if (target == PIPE_TEXTURE_CUBE || target == PIPE_TEXTURE_CUBE_ARRAY)
> +  target = PIPE_TEXTURE_2D_ARRAY;
> +
> +   templ.format = view->format;
> +   templ.swizzle_r = PIPE_SWIZZLE_X;
> +   templ.swizzle_g = PIPE_SWIZZLE_Y;
> +   templ.swizzle_b = PIPE_SWIZZLE_Z;
> +   templ.swizzle_a = PIPE_SWIZZLE_W;
> +
> +   if (target == PIPE_BUFFER) {
> +  templ.u.buf.first_element = view->u.buf.first_element;
> +  templ.u.buf.last_element = view->u.buf.last_element;
> +   } else {
> +  templ.u.tex.first_layer = view->u.tex.first_layer;
> +  templ.u.tex.last_layer = view->u.tex.last_layer;
> +  templ.u.tex.first_level = templ.u.tex.last_level = view->u.tex.level;
> +   }
> +
> +   flags = res->base.last_

Re: [Mesa-dev] [PATCH v3] glsl: reuse main extension table to appropriately restrict extensions

2016-07-18 Thread Ilia Mirkin
Well, I have a basic review on this from Eric Engestrom, who is not a
mesa expert (yet?) but has been giving out a lot of good review
comments lately, and nobody else has piped up saying they hate this,
so I'm going to push this in the next few days unless I hear any
objections. IMHO this is a nice simplification of the glsl parser
boilerplate, and removes the oft-forgotten glcpp annoyance.

On Tue, Jul 12, 2016 at 11:07 AM, Ilia Mirkin  wrote:
> ping^2
>
> On Tue, Jul 5, 2016 at 6:41 PM, Ilia Mirkin  wrote:
>> ping
>>
>> On Fri, Jun 24, 2016 at 1:42 AM, Ilia Mirkin  wrote:
>>> Previously we were only restricting based on ES/non-ES-ness and whether
>>> the overall enable bit had been flipped on. However we have been adding
>>> more fine-grained restrictions, such as based on compat profiles, as
>>> well as specific ES versions. Most of the time this doesn't matter, but
>>> it can create awkward situations and duplication of logic.
>>>
>>> Here we separate the main extension table into a separate object file,
>>> linked to the glsl compiler, which makes use of it with a custom
>>> function which takes the ES-ness of the shader into account (thus
>>> allowing desktop shaders to properly use ES extensions that would
>>> otherwise have been disallowed.)
>>>
>>> The effect of this change should be nil in most cases. However in some
>>> situations, extensions like GL_ARB_gpu_shader5 which were formerly
>>> available in compat contexts on the GLSL side of things will now become
>>> inaccessible.
>>>
>>> Signed-off-by: Ilia Mirkin 
>>> Reviewed-by: Eric Engestrom  (v2)
>>> v2 -> v3: integrate glcpp defines into the same mechanism
>>> ---
>>>
>>> FWIW I hate the method I had to invent to get this information to
>>> glcpp. A callback that takes a callback. Ugh. Sorry. If someone can
>>> come up with something cleaner, I'm all ears.
>>>
>>> This does appear to pass some basic testing.
>>>
>>>  src/Makefile.am  |   1 +
>>>  src/compiler/SConscript.glsl |   2 +
>>>  src/compiler/glsl/glcpp/glcpp-parse.y| 204 +-
>>>  src/compiler/glsl/glcpp/glcpp.c  |   2 +-
>>>  src/compiler/glsl/glcpp/glcpp.h  |  19 ++-
>>>  src/compiler/glsl/glcpp/pp.c |   6 +-
>>>  src/compiler/glsl/glsl_parser_extras.cpp | 283 
>>> +--
>>>  src/compiler/glsl/glsl_parser_extras.h   |  17 +-
>>>  src/compiler/glsl/test_optpass.cpp   |   2 +-
>>>  src/mesa/Android.libmesa_glsl_utils.mk   |   2 +
>>>  src/mesa/Makefile.sources|   1 +
>>>  src/mesa/main/extensions.c   |  33 +---
>>>  src/mesa/main/extensions.h   |   1 +
>>>  src/mesa/main/extensions_table.c |  51 ++
>>>  14 files changed, 269 insertions(+), 355 deletions(-)
>>>  create mode 100644 src/mesa/main/extensions_table.c
>>>
>>> diff --git a/src/Makefile.am b/src/Makefile.am
>>> index 32372da..d38f7c4 100644
>>> --- a/src/Makefile.am
>>> +++ b/src/Makefile.am
>>> @@ -114,6 +114,7 @@ AM_CPPFLAGS = \
>>>  noinst_LTLIBRARIES = libglsl_util.la
>>>
>>>  libglsl_util_la_SOURCES = \
>>> +   mesa/main/extensions_table.c \
>>> mesa/main/imports.c \
>>> mesa/program/prog_hash_table.c \
>>> mesa/program/symbol_table.c \
>>> diff --git a/src/compiler/SConscript.glsl b/src/compiler/SConscript.glsl
>>> index 4252ce1..31d8f6d 100644
>>> --- a/src/compiler/SConscript.glsl
>>> +++ b/src/compiler/SConscript.glsl
>>> @@ -70,6 +70,7 @@ if env['msvc']:
>>>  # Copy these files to avoid generation object files into src/mesa/program
>>>  env.Prepend(CPPPATH = ['#src/mesa/main'])
>>>  env.Command('glsl/imports.c', '#src/mesa/main/imports.c', Copy('$TARGET', 
>>> '$SOURCE'))
>>> +env.Command('glsl/extensions_table.c', 
>>> '#src/mesa/main/extensions_table.c', Copy('$TARGET', '$SOURCE'))
>>>  # Copy these files to avoid generation object files into src/mesa/program
>>>  env.Prepend(CPPPATH = ['#src/mesa/program'])
>>>  env.Command('glsl/prog_hash_table.c', 
>>> '#src/mesa/program/prog_hash_table.c', Copy('$TARGET', '$SOURCE'))
>>> @@ -79,6 +80,7 @@ env.Command('glsl/dummy_errors.c', 
>>> '#src/mesa/program/dummy_errors.c', Copy('$TA
>>>  compiler_objs = env.StaticObject(source_lists['GLSL_COMPILER_CXX_FILES'])
>>>
>>>  mesa_objs = env.StaticObject([
>>> +'glsl/extensions_table.c',
>>>  'glsl/imports.c',
>>>  'glsl/prog_hash_table.c',
>>>  'glsl/symbol_table.c',
>>> diff --git a/src/compiler/glsl/glcpp/glcpp-parse.y 
>>> b/src/compiler/glsl/glcpp/glcpp-parse.y
>>> index b9d690d..ca376d9 100644
>>> --- a/src/compiler/glsl/glcpp/glcpp-parse.y
>>> +++ b/src/compiler/glsl/glcpp/glcpp-parse.y
>>> @@ -1311,7 +1311,7 @@ add_builtin_define(glcpp_parser_t *parser, const char 
>>> *name, int value)
>>>  }
>>>
>>>  glcpp_parser_t *
>>> -glcpp_parser_create(const struct gl_extensions *extensions, gl_api api)
>>> +glcpp_parser_create(glcpp_extension_iterator extensions, void *state, 
>>> gl_api api)
>>>  {
>>> glcpp_pa

Re: [Mesa-dev] [PATCH] anv: Drop unnecessary is_haswell check in state base address code.

2016-07-18 Thread Matt Turner
On Mon, Jul 18, 2016 at 1:41 PM, Kenneth Graunke  wrote:
> Both branches are identical.  Fixes a Coverity warning.
>
> CID: 1364155
>
> Signed-off-by: Kenneth Graunke 
> ---
>  src/intel/vulkan/anv_cmd_buffer.c | 5 +
>  1 file changed, 1 insertion(+), 4 deletions(-)
>
> diff --git a/src/intel/vulkan/anv_cmd_buffer.c 
> b/src/intel/vulkan/anv_cmd_buffer.c
> index 6256df8..ce4266f 100644
> --- a/src/intel/vulkan/anv_cmd_buffer.c
> +++ b/src/intel/vulkan/anv_cmd_buffer.c
> @@ -358,10 +358,7 @@ anv_cmd_buffer_emit_state_base_address(struct 
> anv_cmd_buffer *cmd_buffer)
>  {
> switch (cmd_buffer->device->info.gen) {
> case 7:
> -  if (cmd_buffer->device->info.is_haswell)
> - return gen7_cmd_buffer_emit_state_base_address(cmd_buffer);
> -  else
> - return gen7_cmd_buffer_emit_state_base_address(cmd_buffer);
> +  return gen7_cmd_buffer_emit_state_base_address(cmd_buffer);
> case 8:
>return gen8_cmd_buffer_emit_state_base_address(cmd_buffer);
> case 9:
> --
> 2.9.0

This was added in commit 6f613abc2b, which looks really sketchy.

The patch adds a prototype for
gen75_cmd_buffer_emit_state_base_address(), which I assume was
intended to be used in this if-statement, and the commit summary says
"Incidentally, this should fix MOCS settings for dynamic and surface
state on Haswell." but other than the useless is_haswell check your
patch removes it didn't touch anything related to Haswell.

Jason should really take a look. Clearly /something/ is wrong.
___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev


[Mesa-dev] [PATCH RFC 6/8] gm107/ir: add emission for SUREDx

2016-07-18 Thread Samuel Pitoiset
Signed-off-by: Samuel Pitoiset 
---
 .../drivers/nouveau/codegen/nv50_ir_emit_gm107.cpp | 50 ++
 1 file changed, 50 insertions(+)

diff --git a/src/gallium/drivers/nouveau/codegen/nv50_ir_emit_gm107.cpp 
b/src/gallium/drivers/nouveau/codegen/nv50_ir_emit_gm107.cpp
index 871ffd2..9818c4d 100644
--- a/src/gallium/drivers/nouveau/codegen/nv50_ir_emit_gm107.cpp
+++ b/src/gallium/drivers/nouveau/codegen/nv50_ir_emit_gm107.cpp
@@ -205,6 +205,7 @@ private:
void emitSUHandle(const int s);
void emitSUSTx();
void emitSULDx();
+   void emitSUREDx();
 };
 
 
/***
@@ -2913,6 +2914,51 @@ CodeEmitterGM107::emitSULDx()
 
emitSUHandle(1);
 }
+
+void
+CodeEmitterGM107::emitSUREDx()
+{
+   const TexInstruction *insn = this->insn->asTex();
+   uint8_t type = 0, subOp;
+
+   if (insn->subOp == NV50_IR_SUBOP_ATOM_CAS)
+  emitInsn(0xeac0);
+   else
+  emitInsn(0xea60);
+
+   if (insn->op == OP_SUREDB)
+  emitField(0x34, 1, 1);
+   emitSUTarget();
+
+   // destination type
+   switch (insn->dType) {
+   case TYPE_S32: type = 1; break;
+   case TYPE_U64: type = 2; break;
+   case TYPE_F32: type = 3; break;
+   case TYPE_S64: type = 5; break;
+   default:
+  assert(insn->dType == TYPE_U32);
+  break;
+   }
+
+   // atomic operation
+   if (insn->subOp == NV50_IR_SUBOP_ATOM_CAS) {
+  subOp = 0;
+   } else if (insn->subOp == NV50_IR_SUBOP_ATOM_EXCH) {
+  subOp = 8;
+   } else {
+  subOp = insn->subOp;
+   }
+
+   emitField(0x24, 3, type);
+   emitField(0x1d, 4, subOp);
+   emitGPR  (0x14, insn->src(1));
+   emitGPR  (0x08, insn->src(0));
+   emitGPR  (0x00, insn->def(0));
+
+   emitSUHandle(2);
+}
+
 
/***
  * assembler front-end
  
**/
@@ -3235,6 +3281,10 @@ CodeEmitterGM107::emitInstruction(Instruction *i)
case OP_SULDP:
   emitSULDx();
   break;
+   case OP_SUREDB:
+   case OP_SUREDP:
+  emitSUREDx();
+  break;
default:
   assert(!"invalid opcode");
   emitNOP();
-- 
2.9.0

___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev


[Mesa-dev] [PATCH RFC 7/8] nv50/ir: print OP_SUREDB subops in debug mode

2016-07-18 Thread Samuel Pitoiset
Signed-off-by: Samuel Pitoiset 
---
 src/gallium/drivers/nouveau/codegen/nv50_ir_print.cpp | 1 +
 1 file changed, 1 insertion(+)

diff --git a/src/gallium/drivers/nouveau/codegen/nv50_ir_print.cpp 
b/src/gallium/drivers/nouveau/codegen/nv50_ir_print.cpp
index ae0dd78..22f2f5d 100644
--- a/src/gallium/drivers/nouveau/codegen/nv50_ir_print.cpp
+++ b/src/gallium/drivers/nouveau/codegen/nv50_ir_print.cpp
@@ -570,6 +570,7 @@ void Instruction::print() const
  PRINT("%s ", interpStr[ipa]);
   switch (op) {
   case OP_SUREDP:
+  case OP_SUREDB:
   case OP_ATOM:
  if (subOp < ARRAY_SIZE(atomSubOpStr))
 PRINT("%s ", atomSubOpStr[subOp]);
-- 
2.9.0

___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev


[Mesa-dev] [PATCH RFC 8/8] nvc0: disable MS images on GM107+

2016-07-18 Thread Samuel Pitoiset
MS images have to be handled explicitly and I don't plan to implement
them for now.

Signed-off-by: Samuel Pitoiset 
---
 src/gallium/drivers/nouveau/nvc0/nvc0_screen.c | 7 +++
 1 file changed, 7 insertions(+)

diff --git a/src/gallium/drivers/nouveau/nvc0/nvc0_screen.c 
b/src/gallium/drivers/nouveau/nvc0/nvc0_screen.c
index f681631..a0d3495 100644
--- a/src/gallium/drivers/nouveau/nvc0/nvc0_screen.c
+++ b/src/gallium/drivers/nouveau/nvc0/nvc0_screen.c
@@ -90,6 +90,13 @@ nvc0_screen_is_format_supported(struct pipe_screen *pscreen,
  PIPE_BIND_LINEAR |
  PIPE_BIND_SHARED);
 
+   if (bindings & PIPE_BIND_SHADER_IMAGE &&
+   nouveau_screen(pscreen)->class_3d >= GM107_3D_CLASS) {
+  /* MS images are currently unsupported on Maxwell because they have to
+   * be handled explicitly. */
+  return false;
+   }
+
return (( nvc0_format_table[format].usage |
 nvc0_vertex_format[format].usage) & bindings) == bindings;
 }
-- 
2.9.0

___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev


[Mesa-dev] [PATCH RFC 2/8] nvc0: bind images for 3d/cp shaders on GM107+

2016-07-18 Thread Samuel Pitoiset
On Maxwell, images binding is slightly different (and much better)
regarding Fermi and Kepler because a texture view needs to be uploaded
for each image and this is going to simplify the thing a lot.

Signed-off-by: Samuel Pitoiset 
---
 src/gallium/drivers/nouveau/nvc0/nvc0_context.c |   5 +-
 src/gallium/drivers/nouveau/nvc0/nvc0_context.h |   4 +
 src/gallium/drivers/nouveau/nvc0/nvc0_state.c   |  10 ++-
 src/gallium/drivers/nouveau/nvc0/nvc0_tex.c | 110 ++--
 src/gallium/drivers/nouveau/nvc0/nve4_compute.c |  91 ++--
 5 files changed, 202 insertions(+), 18 deletions(-)

diff --git a/src/gallium/drivers/nouveau/nvc0/nvc0_context.c 
b/src/gallium/drivers/nouveau/nvc0/nvc0_context.c
index 1137e6c..4bd240b 100644
--- a/src/gallium/drivers/nouveau/nvc0/nvc0_context.c
+++ b/src/gallium/drivers/nouveau/nvc0/nvc0_context.c
@@ -161,8 +161,11 @@ nvc0_context_unreference_resources(struct nvc0_context 
*nvc0)
   for (i = 0; i < NVC0_MAX_BUFFERS; ++i)
  pipe_resource_reference(&nvc0->buffers[s][i].buffer, NULL);
 
-  for (i = 0; i < NVC0_MAX_IMAGES; ++i)
+  for (i = 0; i < NVC0_MAX_IMAGES; ++i) {
  pipe_resource_reference(&nvc0->images[s][i].resource, NULL);
+ if (nvc0->screen->base.class_3d >= GM107_3D_CLASS)
+pipe_sampler_view_reference(&nvc0->images_tic[s][i], NULL);
+  }
}
 
for (s = 0; s < 2; ++s) {
diff --git a/src/gallium/drivers/nouveau/nvc0/nvc0_context.h 
b/src/gallium/drivers/nouveau/nvc0/nvc0_context.h
index 4b73ec3..1d9fca1 100644
--- a/src/gallium/drivers/nouveau/nvc0/nvc0_context.h
+++ b/src/gallium/drivers/nouveau/nvc0/nvc0_context.h
@@ -246,6 +246,7 @@ struct nvc0_context {
uint32_t buffers_valid[6];
 
struct pipe_image_view images[6][NVC0_MAX_IMAGES];
+   struct pipe_sampler_view *images_tic[6][NVC0_MAX_IMAGES]; /* GM107+ */
uint16_t images_dirty[6];
uint16_t images_valid[6];
 
@@ -349,6 +350,9 @@ struct pipe_sampler_view *
 nvc0_create_sampler_view(struct pipe_context *,
  struct pipe_resource *,
  const struct pipe_sampler_view *);
+struct pipe_sampler_view *
+gm107_create_texture_view_from_image(struct pipe_context *,
+ struct pipe_image_view *);
 
 /* nvc0_transfer.c */
 void
diff --git a/src/gallium/drivers/nouveau/nvc0/nvc0_state.c 
b/src/gallium/drivers/nouveau/nvc0/nvc0_state.c
index 441cfc9..98becf4 100644
--- a/src/gallium/drivers/nouveau/nvc0/nvc0_state.c
+++ b/src/gallium/drivers/nouveau/nvc0/nvc0_state.c
@@ -1303,8 +1303,16 @@ nvc0_bind_images_range(struct nvc0_context *nvc0, const 
unsigned s,
   mask = ((1 << nr) - 1) << start;
   if (!(nvc0->images_valid[s] & mask))
  return false;
-  for (i = start; i < end; ++i)
+  for (i = start; i < end; ++i) {
  pipe_resource_reference(&nvc0->images[s][i].resource, NULL);
+ if (nvc0->screen->base.class_3d >= GM107_3D_CLASS) {
+struct nv50_tic_entry *old = 
nv50_tic_entry(nvc0->images_tic[s][i]);
+if (old) {
+   nvc0_screen_tic_unlock(nvc0->screen, old);
+   pipe_sampler_view_reference(&nvc0->images_tic[s][i], NULL);
+}
+ }
+  }
   nvc0->images_valid[s] &= ~mask;
}
nvc0->images_dirty[s] |= mask;
diff --git a/src/gallium/drivers/nouveau/nvc0/nvc0_tex.c 
b/src/gallium/drivers/nouveau/nvc0/nvc0_tex.c
index 5f7bba8..efbaacf 100644
--- a/src/gallium/drivers/nouveau/nvc0/nvc0_tex.c
+++ b/src/gallium/drivers/nouveau/nvc0/nvc0_tex.c
@@ -236,6 +236,38 @@ gm107_create_texture_view(struct pipe_context *pipe,
return &view->pipe;
 }
 
+struct pipe_sampler_view *
+gm107_create_texture_view_from_image(struct pipe_context *pipe,
+ struct pipe_image_view *view)
+{
+   struct nv04_resource *res = nv04_resource(view->resource);
+   enum pipe_texture_target target = res->base.target;
+   struct pipe_sampler_view templ = {};
+   uint32_t flags = 0;
+
+   if (target == PIPE_TEXTURE_CUBE || target == PIPE_TEXTURE_CUBE_ARRAY)
+  target = PIPE_TEXTURE_2D_ARRAY;
+
+   templ.format = view->format;
+   templ.swizzle_r = PIPE_SWIZZLE_X;
+   templ.swizzle_g = PIPE_SWIZZLE_Y;
+   templ.swizzle_b = PIPE_SWIZZLE_Z;
+   templ.swizzle_a = PIPE_SWIZZLE_W;
+
+   if (target == PIPE_BUFFER) {
+  templ.u.buf.first_element = view->u.buf.first_element;
+  templ.u.buf.last_element = view->u.buf.last_element;
+   } else {
+  templ.u.tex.first_layer = view->u.tex.first_layer;
+  templ.u.tex.last_layer = view->u.tex.last_layer;
+  templ.u.tex.first_level = templ.u.tex.last_level = view->u.tex.level;
+   }
+
+   flags = res->base.last_level ? 0 : NV50_TEXVIEW_SCALED_COORDS;
+
+   return gm107_create_texture_view(pipe, &res->base, &templ, flags, target);
+}
+
 static struct pipe_sampler_view *
 gf100_create_texture_view(struct pipe_context *pipe,
   struct pipe_resource *texture,
@@ -109

[Mesa-dev] [PATCH RFC 3/8] gm107/ir: lower surface operations

2016-07-18 Thread Samuel Pitoiset
Signed-off-by: Samuel Pitoiset 
---
 .../nouveau/codegen/nv50_ir_lowering_nvc0.cpp  | 76 +-
 .../nouveau/codegen/nv50_ir_lowering_nvc0.h|  2 +
 2 files changed, 77 insertions(+), 1 deletion(-)

diff --git a/src/gallium/drivers/nouveau/codegen/nv50_ir_lowering_nvc0.cpp 
b/src/gallium/drivers/nouveau/codegen/nv50_ir_lowering_nvc0.cpp
index 18955eb..b7dc624 100644
--- a/src/gallium/drivers/nouveau/codegen/nv50_ir_lowering_nvc0.cpp
+++ b/src/gallium/drivers/nouveau/codegen/nv50_ir_lowering_nvc0.cpp
@@ -2108,6 +2108,78 @@ NVC0LoweringPass::handleSurfaceOpNVC0(TexInstruction *su)
}
 }
 
+void
+NVC0LoweringPass::processSurfaceCoordsGM107(TexInstruction *su)
+{
+   const int slot = su->tex.r;
+   const int dim = su->tex.target.getDim();
+   const int arg = dim + (su->tex.target.isArray() || su->tex.target.isCube());
+   Value *ind = su->getIndirectR();
+   int pos = 0;
+
+   bld.setPosition(su, false);
+
+   // add texture handle
+   switch (su->op) {
+   case OP_SUSTP:
+  pos = 4;
+  break;
+   case OP_SUREDP:
+  pos = (su->subOp == NV50_IR_SUBOP_ATOM_CAS) ? 2 : 1;
+  break;
+   default:
+  assert(pos == 0);
+  break;
+   }
+   su->setSrc(arg + pos, loadTexHandle(ind, slot + 32));
+
+   // prevent read fault when the image is not actually bound
+   CmpInstruction *pred =
+  bld.mkCmp(OP_SET, CC_EQ, TYPE_U32, bld.getSSA(1, FILE_PREDICATE),
+TYPE_U32, bld.mkImm(0),
+loadSuInfo32(ind, slot, NVC0_SU_INFO_ADDR));
+   if (su->op != OP_SUSTP && su->tex.format) {
+  const TexInstruction::ImgFormatDesc *format = su->tex.format;
+  int blockwidth = format->bits[0] + format->bits[1] +
+   format->bits[2] + format->bits[3];
+
+  assert(format->components != 0);
+  // make sure that the format doesn't mismatch when it's not FMT_NONE
+  bld.mkCmp(OP_SET_OR, CC_NE, TYPE_U32, pred->getDef(0),
+TYPE_U32, bld.loadImm(NULL, blockwidth / 8),
+loadSuInfo32(ind, slot, NVC0_SU_INFO_BSIZE),
+pred->getDef(0));
+   }
+   su->setPredicate(CC_NOT_P, pred->getDef(0));
+}
+
+void
+NVC0LoweringPass::handleSurfaceOpGM107(TexInstruction *su)
+{
+   processSurfaceCoordsGM107(su);
+
+   if (su->op == OP_SULDP)
+  convertSurfaceFormat(su);
+
+   if (su->op == OP_SUREDP) {
+  Value *def = su->getDef(0);
+
+  su->op = OP_SUREDB;
+  su->setDef(0, bld.getSSA());
+
+  bld.setPosition(su, true);
+
+  // make sure to initialize dst value when the atomic operation is not
+  // performed
+  Instruction *mov = bld.mkMov(bld.getSSA(), bld.loadImm(NULL, 0));
+
+  assert(su->cc == CC_NOT_P);
+  mov->setPredicate(CC_P, su->getPredicate());
+
+  bld.mkOp2(OP_UNION, TYPE_U32, def, su->getDef(0), mov->getDef(0));
+   }
+}
+
 bool
 NVC0LoweringPass::handleWRSV(Instruction *i)
 {
@@ -2600,7 +2672,9 @@ NVC0LoweringPass::visit(Instruction *i)
case OP_SUSTP:
case OP_SUREDB:
case OP_SUREDP:
-  if (targ->getChipset() >= NVISA_GK104_CHIPSET)
+  if (targ->getChipset() >= NVISA_GM107_CHIPSET)
+ handleSurfaceOpGM107(i->asTex());
+  else if (targ->getChipset() >= NVISA_GK104_CHIPSET)
  handleSurfaceOpNVE4(i->asTex());
   else
  handleSurfaceOpNVC0(i->asTex());
diff --git a/src/gallium/drivers/nouveau/codegen/nv50_ir_lowering_nvc0.h 
b/src/gallium/drivers/nouveau/codegen/nv50_ir_lowering_nvc0.h
index 4d7d8cc..104bc03 100644
--- a/src/gallium/drivers/nouveau/codegen/nv50_ir_lowering_nvc0.h
+++ b/src/gallium/drivers/nouveau/codegen/nv50_ir_lowering_nvc0.h
@@ -106,6 +106,7 @@ protected:
bool handleSUQ(TexInstruction *);
bool handleATOM(Instruction *);
bool handleCasExch(Instruction *, bool needCctl);
+   void handleSurfaceOpGM107(TexInstruction *);
void handleSurfaceOpNVE4(TexInstruction *);
void handleSurfaceOpNVC0(TexInstruction *);
void handleSharedATOM(Instruction *);
@@ -135,6 +136,7 @@ private:
Value *loadTexHandle(Value *ptr, unsigned int slot);
 
void adjustCoordinatesMS(TexInstruction *);
+   void processSurfaceCoordsGM107(TexInstruction *);
void processSurfaceCoordsNVE4(TexInstruction *);
void processSurfaceCoordsNVC0(TexInstruction *);
void convertSurfaceFormat(TexInstruction *);
-- 
2.9.0

___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev


[Mesa-dev] [PATCH RFC 4/8] gm107/ra: fix constraints for surface operations

2016-07-18 Thread Samuel Pitoiset
Signed-off-by: Samuel Pitoiset 
---
 src/gallium/drivers/nouveau/codegen/nv50_ir_ra.cpp | 25 --
 1 file changed, 23 insertions(+), 2 deletions(-)

diff --git a/src/gallium/drivers/nouveau/codegen/nv50_ir_ra.cpp 
b/src/gallium/drivers/nouveau/codegen/nv50_ir_ra.cpp
index 63fe9c0..2d3486b 100644
--- a/src/gallium/drivers/nouveau/codegen/nv50_ir_ra.cpp
+++ b/src/gallium/drivers/nouveau/codegen/nv50_ir_ra.cpp
@@ -2093,8 +2093,29 @@ 
RegAlloc::InsertConstraintsPass::texConstraintGM107(TexInstruction *tex)
   textureMask(tex);
condenseDefs(tex);
 
-   if (tex->op == OP_SUSTB || tex->op == OP_SUSTP) {
-  condenseSrcs(tex, 3, (3 + typeSizeof(tex->dType) / 4) - 1);
+   if (isSurfaceOp(tex->op)) {
+  int s = tex->tex.target.getDim() +
+ (tex->tex.target.isArray() || tex->tex.target.isCube());
+  int n = 0;
+
+  switch (tex->op) {
+  case OP_SUSTB:
+  case OP_SUSTP:
+ n = 4;
+ break;
+  case OP_SUREDB:
+  case OP_SUREDP:
+ if (tex->subOp == NV50_IR_SUBOP_ATOM_CAS)
+n = 2;
+ break;
+  default:
+ break;
+  }
+
+  if (s > 1)
+ condenseSrcs(tex, 0, s - 1);
+  if (n > 1)
+ condenseSrcs(tex, 1, n); // do not condense the tex handle
} else
if (isTextureOp(tex->op)) {
   if (tex->op != OP_TXQ) {
-- 
2.9.0

___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev


[Mesa-dev] [PATCH RFC 5/8] gm107/ir: add emission for SUSTx and SULDx

2016-07-18 Thread Samuel Pitoiset
Signed-off-by: Samuel Pitoiset 
---
 .../drivers/nouveau/codegen/nv50_ir_emit_gm107.cpp | 105 +
 1 file changed, 105 insertions(+)

diff --git a/src/gallium/drivers/nouveau/codegen/nv50_ir_emit_gm107.cpp 
b/src/gallium/drivers/nouveau/codegen/nv50_ir_emit_gm107.cpp
index f1ba27a..871ffd2 100644
--- a/src/gallium/drivers/nouveau/codegen/nv50_ir_emit_gm107.cpp
+++ b/src/gallium/drivers/nouveau/codegen/nv50_ir_emit_gm107.cpp
@@ -200,6 +200,11 @@ private:
void emitMEMBAR();
 
void emitVOTE();
+
+   void emitSUTarget();
+   void emitSUHandle(const int s);
+   void emitSUSTx();
+   void emitSULDx();
 };
 
 
/***
@@ -2816,6 +2821,98 @@ CodeEmitterGM107::emitVOTE()
emitPRED (0x27, insn->src(0));
 }
 
+void
+CodeEmitterGM107::emitSUTarget()
+{
+   const TexInstruction *insn = this->insn->asTex();
+   int target = 0;
+
+   assert(insn->op >= OP_SULDB && insn->op <= OP_SUREDP);
+
+   if (insn->tex.target == TEX_TARGET_BUFFER) {
+  target = 2;
+   } else if (insn->tex.target == TEX_TARGET_1D_ARRAY) {
+  target = 4;
+   } else if (insn->tex.target == TEX_TARGET_2D ||
+  insn->tex.target == TEX_TARGET_RECT) {
+  target = 6;
+   } else if (insn->tex.target == TEX_TARGET_2D_ARRAY ||
+  insn->tex.target == TEX_TARGET_CUBE ||
+  insn->tex.target == TEX_TARGET_CUBE_ARRAY) {
+  target = 8;
+   } else if (insn->tex.target == TEX_TARGET_3D ||
+  insn->tex.target == TEX_TARGET_CUBE_ARRAY) {
+  target = 10;
+   } else {
+  assert(insn->tex.target == TEX_TARGET_1D);
+   }
+   emitField(0x20, 4, target);
+}
+
+void
+CodeEmitterGM107::emitSUHandle(const int s)
+{
+   const TexInstruction *insn = this->insn->asTex();
+
+   assert(insn->op >= OP_SULDB && insn->op <= OP_SUREDP);
+
+   if (insn->src(s).getFile() == FILE_GPR) {
+  emitGPR(0x27, insn->src(s));
+   } else {
+  ImmediateValue *imm = insn->getSrc(s)->asImm();
+  assert(imm);
+  emitField(0x33, 1, 1);
+  emitField(0x24, 13, imm->reg.data.u32);
+   }
+}
+
+void
+CodeEmitterGM107::emitSUSTx()
+{
+   const TexInstruction *insn = this->insn->asTex();
+
+   emitInsn(0xeb20);
+   if (insn->op == OP_SUSTB)
+  emitField(0x34, 1, 1);
+   emitSUTarget();
+
+   emitLDSTc(0x18);
+   emitField(0x14, 4, 0xf); // rgba
+   emitGPR  (0x08, insn->src(0));
+   emitGPR  (0x00, insn->src(1));
+
+   emitSUHandle(2);
+}
+
+void
+CodeEmitterGM107::emitSULDx()
+{
+   const TexInstruction *insn = this->insn->asTex();
+   int type = 0;
+
+   emitInsn(0xeb00);
+   if (insn->op == OP_SULDB)
+  emitField(0x34, 1, 1);
+   emitSUTarget();
+
+   switch (insn->dType) {
+   case TYPE_S8:   type = 1; break;
+   case TYPE_U16:  type = 2; break;
+   case TYPE_S16:  type = 3; break;
+   case TYPE_U32:  type = 4; break;
+   case TYPE_U64:  type = 5; break;
+   case TYPE_B128: type = 6; break;
+   default:
+  assert(insn->dType == TYPE_U8);
+  break;
+   }
+   emitLDSTc(0x18);
+   emitField(0x14, 3, type);
+   emitGPR  (0x00, insn->def(0));
+   emitGPR  (0x08, insn->src(0));
+
+   emitSUHandle(1);
+}
 
/***
  * assembler front-end
  
**/
@@ -3130,6 +3227,14 @@ CodeEmitterGM107::emitInstruction(Instruction *i)
case OP_VOTE:
   emitVOTE();
   break;
+   case OP_SUSTB:
+   case OP_SUSTP:
+  emitSUSTx();
+  break;
+   case OP_SULDB:
+   case OP_SULDP:
+  emitSULDx();
+  break;
default:
   assert(!"invalid opcode");
   emitNOP();
-- 
2.9.0

___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev


[Mesa-dev] [PATCH RFC 1/8] nvc0: increase the tex handles area size in the driver cb

2016-07-18 Thread Samuel Pitoiset
Currently, we can store 32 tex handles of 32-bits integer each and
that fits perfectly with the underlying hardware except on GM107+
which requires to upload a texture view for each images.

This patch increases the number of storable texture handles in the
driver constant buffer from 32 to 40 because we expose 8 images.

Signed-off-by: Samuel Pitoiset 
---
 src/gallium/drivers/nouveau/nvc0/nvc0_context.h | 22 +++---
 1 file changed, 11 insertions(+), 11 deletions(-)

diff --git a/src/gallium/drivers/nouveau/nvc0/nvc0_context.h 
b/src/gallium/drivers/nouveau/nvc0/nvc0_context.h
index 7acd477..4b73ec3 100644
--- a/src/gallium/drivers/nouveau/nvc0/nvc0_context.h
+++ b/src/gallium/drivers/nouveau/nvc0/nvc0_context.h
@@ -108,34 +108,34 @@
 /* XXX: Figure out what this UNK data is. */
 #define NVC0_CB_AUX_UNK_INFO0x000
 #define NVC0_CB_AUX_UNK_SIZE(8 * 4)
-/* 32 textures handles, at 1 32-bits integer each */
+/* 40 textures handles (8 for GM107+ images only), at 1 32-bits integer each */
 #define NVC0_CB_AUX_TEX_INFO(i) 0x020 + (i) * 4
-#define NVC0_CB_AUX_TEX_SIZE(32 * 4)
+#define NVC0_CB_AUX_TEX_SIZE(40 * 4)
 /* 8 sets of 32-bits coordinate offsets */
-#define NVC0_CB_AUX_MS_INFO 0x0a0
+#define NVC0_CB_AUX_MS_INFO 0x0c0
 #define NVC0_CB_AUX_MS_SIZE (8 * 2 * 4)
 /* block/grid size, at 3 32-bits integers each, gridid and work_dim */
-#define NVC0_CB_AUX_GRID_INFO(i)0x0e0 + (i) * 4 /* CP */
+#define NVC0_CB_AUX_GRID_INFO(i)0x100 + (i) * 4 /* CP */
 #define NVC0_CB_AUX_GRID_SIZE   (8 * 4)
 /* 8 user clip planes, at 4 32-bits floats each */
-#define NVC0_CB_AUX_UCP_INFO0x100
+#define NVC0_CB_AUX_UCP_INFO0x120
 #define NVC0_CB_AUX_UCP_SIZE(PIPE_MAX_CLIP_PLANES * 4 * 4)
 /* 13 ubos, at 4 32-bits integer each */
-#define NVC0_CB_AUX_UBO_INFO(i) 0x100 + (i) * 4 * 4 /* CP */
+#define NVC0_CB_AUX_UBO_INFO(i) 0x120 + (i) * 4 * 4 /* CP */
 #define NVC0_CB_AUX_UBO_SIZE((NVC0_MAX_PIPE_CONSTBUFS - 1) * 4 * 4)
 /* 8 sets of 32-bits integer pairs sample offsets */
-#define NVC0_CB_AUX_SAMPLE_INFO 0x180 /* FP */
+#define NVC0_CB_AUX_SAMPLE_INFO 0x1a0 /* FP */
 #define NVC0_CB_AUX_SAMPLE_SIZE (8 * 4 * 2)
 /* draw parameters (index bais, base instance, drawid) */
-#define NVC0_CB_AUX_DRAW_INFO   0x180 /* VP */
+#define NVC0_CB_AUX_DRAW_INFO   0x1a0 /* VP */
 /* 32 user buffers, at 4 32-bits integers each */
-#define NVC0_CB_AUX_BUF_INFO(i) 0x200 + (i) * 4 * 4
+#define NVC0_CB_AUX_BUF_INFO(i) 0x220 + (i) * 4 * 4
 #define NVC0_CB_AUX_BUF_SIZE(NVC0_MAX_BUFFERS * 4 * 4)
 /* 8 surfaces, at 16 32-bits integers each */
-#define NVC0_CB_AUX_SU_INFO(i)  0x400 + (i) * 16 * 4
+#define NVC0_CB_AUX_SU_INFO(i)  0x420 + (i) * 16 * 4
 #define NVC0_CB_AUX_SU_SIZE (NVC0_MAX_IMAGES * 16 * 4)
 /* 1 64-bits address and 1 32-bits sequence */
-#define NVC0_CB_AUX_MP_INFO 0x600
+#define NVC0_CB_AUX_MP_INFO 0x620
 #define NVC0_CB_AUX_MP_SIZE 3 * 4
 /* 4 32-bits floats for the vertex runout, put at the end */
 #define NVC0_CB_AUX_RUNOUT_INFO NVC0_CB_USR_SIZE + (NVC0_CB_AUX_SIZE * 6)
-- 
2.9.0

___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev


[Mesa-dev] [PATCH RFC 0/8] nvc0: ARB_shader_image_load_store for Maxwell

2016-07-18 Thread Samuel Pitoiset
Hi folks,

This series adds support for ARB_shader_image_load_store (GL 4.2) and
ARB_shader_image_size (GL 4.3) on Maxwell GPUs.

Maxwell family is slightly different regarding Fermi and Kepler because it
requires to use a texture view for each images. But this is actually quite
fine because the underlying hardware will handle a bunch of things that we
needed to do in software for previous generations.

However, this series *WILL NOT ENABLE* ARB_shader_image_load_store by default
for the following reasons:

a) Some subtests in arb_shader_image_load_store-atomicity totally hangs the
GPU because there is a race condition. The code is a loop which uses the
result of the surface atomic operation as the condition. The only way for
fixing this bad is to implement a sched calculator pass for Maxwell.
This actually allows to define read/write barriers like the blob does for
this specific case, and this might also improve performance.

b) Maxwell currently only exposes GL 3.3 because tessellation has still not
been figured out (I'm working on this but it's quite hard). So enabling the
extension is not going to help until this part is done.

To sum up, this series add "basic" support for images but it should work
in most cases.

Please review,
Thanks!

Samuel Pitoiset (8):
  nvc0: increase the tex handles area size in the driver cb
  nvc0: bind images for 3d/cp shaders on GM107+
  gm107/ir: lower surface operations
  gm107/ra: fix constraints for surface operations
  gm107/ir: add emission for SUSTx and SULDx
  gm107/ir: add emission for SUREDx
  nv50/ir: print OP_SUREDB subops in debug mode
  nvc0: disable MS images on GM107+

 .../drivers/nouveau/codegen/nv50_ir_emit_gm107.cpp | 155 +
 .../nouveau/codegen/nv50_ir_lowering_nvc0.cpp  |  76 +-
 .../nouveau/codegen/nv50_ir_lowering_nvc0.h|   2 +
 .../drivers/nouveau/codegen/nv50_ir_print.cpp  |   1 +
 src/gallium/drivers/nouveau/codegen/nv50_ir_ra.cpp |  25 +++-
 src/gallium/drivers/nouveau/nvc0/nvc0_context.c|   5 +-
 src/gallium/drivers/nouveau/nvc0/nvc0_context.h|  26 ++--
 src/gallium/drivers/nouveau/nvc0/nvc0_screen.c |   7 +
 src/gallium/drivers/nouveau/nvc0/nvc0_state.c  |  10 +-
 src/gallium/drivers/nouveau/nvc0/nvc0_tex.c| 110 ++-
 src/gallium/drivers/nouveau/nvc0/nve4_compute.c|  91 ++--
 11 files changed, 476 insertions(+), 32 deletions(-)

-- 
2.9.0

___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev


Re: [Mesa-dev] [PATCH] mapi: Massage code to allow clang to compile.

2016-07-18 Thread Matt Turner
On Mon, Jul 11, 2016 at 10:49 AM, Matt Turner  wrote:
> According to https://llvm.org/bugs/show_bug.cgi?id=19778#c3 this code
> was violating the spec, resulting in it failing to compile.
>
> Cc: mesa-sta...@lists.freedesktop.org
> Co-authored-by: Tomasz Paweł Gajc 
> Bugzilla: https://bugs.freedesktop.org/show_bug.cgi?id=89599
> ---
> I've tried for months to reproduce this, and I've still never been
> able to on 64-bit builds. I can reproduce it on 32-bit however.
>
> On MSVC, this patch will have the effect of changing the variables
> from static to extern. I do not know if this will adversely affect
> anything, so this patch would benefit from MSVC testing.

Ping...
___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev


[Mesa-dev] [Bug 89599] symbol 'x86_64_entry_start' is already defined when building with LLVM/clang

2016-07-18 Thread bugzilla-daemon
https://bugs.freedesktop.org/show_bug.cgi?id=89599

--- Comment #11 from Matt Turner  ---
I sent a modified version of Tomasz's patch last week to mesa-dev.

Would anyone like to test it?

[PATCH] mapi: Massage code to allow clang to compile.

-- 
You are receiving this mail because:
You are the assignee for the bug.
You are the QA Contact for the bug.___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev


Re: [Mesa-dev] [RFC] gallium/u_queue: add barrier function

2016-07-18 Thread Rob Clark
On Mon, Jul 18, 2016 at 4:34 PM, Nicolai Hähnle  wrote:
> On 18.07.2016 22:25, Rob Clark wrote:
>>
>> Helper to block until all previous jobs are complete.
>> ---
>> So I think this might end up being useful to me in some cases.. but
>> the implementation only works for a single threaded queue (which is
>> all I need).  I could also just put a helper in my driver code.
>>
>> Opinions?
>
>
> What do you need it for? ISTR Marek had a half-finished patch for
> dependencies, maybe that does what you need and is more expressive?

no, I don't think dependencies would really help me..

This issue I'm chancing down is a race condition which, I think,
amounts to we initially flush a batch when we don't think we need to
wait for it to complete, and later get a flush_resource() or a flush()
and realize we did actually need it to complete, but no longer have a
ref to the batch (or it's contained fence)..

BR,
-R

> Cheers,
> Nicolai
>
>
>>
>>   src/gallium/auxiliary/util/u_queue.c | 12 
>>   src/gallium/auxiliary/util/u_queue.h |  2 ++
>>   2 files changed, 14 insertions(+)
>>
>> diff --git a/src/gallium/auxiliary/util/u_queue.c
>> b/src/gallium/auxiliary/util/u_queue.c
>> index 838464f..861faca 100644
>> --- a/src/gallium/auxiliary/util/u_queue.c
>> +++ b/src/gallium/auxiliary/util/u_queue.c
>> @@ -242,3 +242,15 @@ util_queue_add_job(struct util_queue *queue,
>>  pipe_condvar_signal(queue->has_queued_cond);
>>  pipe_mutex_unlock(queue->lock);
>>   }
>> +
>> +static void dummy_execute(void *job, int thread_index) {}
>> +
>> +/* blocks until all previously queued jobs complete: */
>> +void util_queue_barrier(struct util_queue *queue)
>> +{
>> +   struct util_queue_fence fence;
>> +   util_queue_fence_init(&fence);
>> +   util_queue_add_job(queue, &fence /*dummy*/, &fence, dummy_execute,
>> NULL);
>> +   util_queue_job_wait(&fence);
>> +   util_queue_fence_destroy(&fence);
>> +}
>> diff --git a/src/gallium/auxiliary/util/u_queue.h
>> b/src/gallium/auxiliary/util/u_queue.h
>> index 59646cc..8a22ee0 100644
>> --- a/src/gallium/auxiliary/util/u_queue.h
>> +++ b/src/gallium/auxiliary/util/u_queue.h
>> @@ -85,6 +85,8 @@ void util_queue_add_job(struct util_queue *queue,
>>
>>   void util_queue_job_wait(struct util_queue_fence *fence);
>>
>> +void util_queue_barrier(struct util_queue *queue);
>> +
>>   /* util_queue needs to be cleared to zeroes for this to work */
>>   static inline bool
>>   util_queue_is_initialized(struct util_queue *queue)
>>
>
___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev


[Mesa-dev] [PATCH] anv: Drop unnecessary is_haswell check in state base address code.

2016-07-18 Thread Kenneth Graunke
Both branches are identical.  Fixes a Coverity warning.

CID: 1364155

Signed-off-by: Kenneth Graunke 
---
 src/intel/vulkan/anv_cmd_buffer.c | 5 +
 1 file changed, 1 insertion(+), 4 deletions(-)

diff --git a/src/intel/vulkan/anv_cmd_buffer.c 
b/src/intel/vulkan/anv_cmd_buffer.c
index 6256df8..ce4266f 100644
--- a/src/intel/vulkan/anv_cmd_buffer.c
+++ b/src/intel/vulkan/anv_cmd_buffer.c
@@ -358,10 +358,7 @@ anv_cmd_buffer_emit_state_base_address(struct 
anv_cmd_buffer *cmd_buffer)
 {
switch (cmd_buffer->device->info.gen) {
case 7:
-  if (cmd_buffer->device->info.is_haswell)
- return gen7_cmd_buffer_emit_state_base_address(cmd_buffer);
-  else
- return gen7_cmd_buffer_emit_state_base_address(cmd_buffer);
+  return gen7_cmd_buffer_emit_state_base_address(cmd_buffer);
case 8:
   return gen8_cmd_buffer_emit_state_base_address(cmd_buffer);
case 9:
-- 
2.9.0

___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev


Re: [Mesa-dev] [RFC] gallium/u_queue: add barrier function

2016-07-18 Thread Nicolai Hähnle

On 18.07.2016 22:25, Rob Clark wrote:

Helper to block until all previous jobs are complete.
---
So I think this might end up being useful to me in some cases.. but
the implementation only works for a single threaded queue (which is
all I need).  I could also just put a helper in my driver code.

Opinions?


What do you need it for? ISTR Marek had a half-finished patch for 
dependencies, maybe that does what you need and is more expressive?


Cheers,
Nicolai



  src/gallium/auxiliary/util/u_queue.c | 12 
  src/gallium/auxiliary/util/u_queue.h |  2 ++
  2 files changed, 14 insertions(+)

diff --git a/src/gallium/auxiliary/util/u_queue.c 
b/src/gallium/auxiliary/util/u_queue.c
index 838464f..861faca 100644
--- a/src/gallium/auxiliary/util/u_queue.c
+++ b/src/gallium/auxiliary/util/u_queue.c
@@ -242,3 +242,15 @@ util_queue_add_job(struct util_queue *queue,
 pipe_condvar_signal(queue->has_queued_cond);
 pipe_mutex_unlock(queue->lock);
  }
+
+static void dummy_execute(void *job, int thread_index) {}
+
+/* blocks until all previously queued jobs complete: */
+void util_queue_barrier(struct util_queue *queue)
+{
+   struct util_queue_fence fence;
+   util_queue_fence_init(&fence);
+   util_queue_add_job(queue, &fence /*dummy*/, &fence, dummy_execute, NULL);
+   util_queue_job_wait(&fence);
+   util_queue_fence_destroy(&fence);
+}
diff --git a/src/gallium/auxiliary/util/u_queue.h 
b/src/gallium/auxiliary/util/u_queue.h
index 59646cc..8a22ee0 100644
--- a/src/gallium/auxiliary/util/u_queue.h
+++ b/src/gallium/auxiliary/util/u_queue.h
@@ -85,6 +85,8 @@ void util_queue_add_job(struct util_queue *queue,

  void util_queue_job_wait(struct util_queue_fence *fence);

+void util_queue_barrier(struct util_queue *queue);
+
  /* util_queue needs to be cleared to zeroes for this to work */
  static inline bool
  util_queue_is_initialized(struct util_queue *queue)


___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev


[Mesa-dev] [PATCH 7/7] i965: Delete the FS_OPCODE_INTERPOLATE_AT_CENTROID virtual opcode.

2016-07-18 Thread Kenneth Graunke
We no longer use this message.  As far as I can tell, it's fairly
useless - the equivalent information is provided in the payload.

Signed-off-by: Kenneth Graunke 
---
 src/mesa/drivers/dri/i965/brw_defines.h| 1 -
 src/mesa/drivers/dri/i965/brw_fs.cpp   | 2 --
 src/mesa/drivers/dri/i965/brw_fs_generator.cpp | 5 -
 src/mesa/drivers/dri/i965/brw_shader.cpp   | 2 --
 4 files changed, 10 deletions(-)

diff --git a/src/mesa/drivers/dri/i965/brw_defines.h 
b/src/mesa/drivers/dri/i965/brw_defines.h
index b5a259e..2814fa7 100644
--- a/src/mesa/drivers/dri/i965/brw_defines.h
+++ b/src/mesa/drivers/dri/i965/brw_defines.h
@@ -1120,7 +1120,6 @@ enum opcode {
FS_OPCODE_UNPACK_HALF_2x16_SPLIT_X,
FS_OPCODE_UNPACK_HALF_2x16_SPLIT_Y,
FS_OPCODE_PLACEHOLDER_HALT,
-   FS_OPCODE_INTERPOLATE_AT_CENTROID,
FS_OPCODE_INTERPOLATE_AT_SAMPLE,
FS_OPCODE_INTERPOLATE_AT_SHARED_OFFSET,
FS_OPCODE_INTERPOLATE_AT_PER_SLOT_OFFSET,
diff --git a/src/mesa/drivers/dri/i965/brw_fs.cpp 
b/src/mesa/drivers/dri/i965/brw_fs.cpp
index 06007fe..120d6dd 100644
--- a/src/mesa/drivers/dri/i965/brw_fs.cpp
+++ b/src/mesa/drivers/dri/i965/brw_fs.cpp
@@ -250,7 +250,6 @@ fs_inst::is_send_from_grf() const
switch (opcode) {
case FS_OPCODE_VARYING_PULL_CONSTANT_LOAD_GEN7:
case SHADER_OPCODE_SHADER_TIME_ADD:
-   case FS_OPCODE_INTERPOLATE_AT_CENTROID:
case FS_OPCODE_INTERPOLATE_AT_SAMPLE:
case FS_OPCODE_INTERPOLATE_AT_SHARED_OFFSET:
case FS_OPCODE_INTERPOLATE_AT_PER_SLOT_OFFSET:
@@ -4785,7 +4784,6 @@ get_lowered_simd_width(const struct brw_device_info 
*devinfo,
case FS_OPCODE_PACK_HALF_2x16_SPLIT:
case FS_OPCODE_UNPACK_HALF_2x16_SPLIT_X:
case FS_OPCODE_UNPACK_HALF_2x16_SPLIT_Y:
-   case FS_OPCODE_INTERPOLATE_AT_CENTROID:
case FS_OPCODE_INTERPOLATE_AT_SAMPLE:
case FS_OPCODE_INTERPOLATE_AT_SHARED_OFFSET:
case FS_OPCODE_INTERPOLATE_AT_PER_SLOT_OFFSET:
diff --git a/src/mesa/drivers/dri/i965/brw_fs_generator.cpp 
b/src/mesa/drivers/dri/i965/brw_fs_generator.cpp
index 1e9c7da..a390184 100644
--- a/src/mesa/drivers/dri/i965/brw_fs_generator.cpp
+++ b/src/mesa/drivers/dri/i965/brw_fs_generator.cpp
@@ -2054,11 +2054,6 @@ fs_generator::generate_code(const cfg_t *cfg, int 
dispatch_width)
  }
  break;
 
-  case FS_OPCODE_INTERPOLATE_AT_CENTROID:
- generate_pixel_interpolator_query(inst, dst, src[0], src[1],
-   
GEN7_PIXEL_INTERPOLATOR_LOC_CENTROID);
- break;
-
   case FS_OPCODE_INTERPOLATE_AT_SAMPLE:
  generate_pixel_interpolator_query(inst, dst, src[0], src[1],
GEN7_PIXEL_INTERPOLATOR_LOC_SAMPLE);
diff --git a/src/mesa/drivers/dri/i965/brw_shader.cpp 
b/src/mesa/drivers/dri/i965/brw_shader.cpp
index f3b5487..559e44c 100644
--- a/src/mesa/drivers/dri/i965/brw_shader.cpp
+++ b/src/mesa/drivers/dri/i965/brw_shader.cpp
@@ -367,8 +367,6 @@ brw_instruction_name(const struct brw_device_info *devinfo, 
enum opcode op)
case FS_OPCODE_PLACEHOLDER_HALT:
   return "placeholder_halt";
 
-   case FS_OPCODE_INTERPOLATE_AT_CENTROID:
-  return "interp_centroid";
case FS_OPCODE_INTERPOLATE_AT_SAMPLE:
   return "interp_sample";
case FS_OPCODE_INTERPOLATE_AT_SHARED_OFFSET:
-- 
2.9.0

___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev


[Mesa-dev] [PATCH 5/7] i965: Move load_interpolated_input/barycentric_* intrinsics to the top.

2016-07-18 Thread Kenneth Graunke
Currently, i965 interpolates all FS inputs at the top of the program.
This has advantages and disadvantages, but I'd like to keep that policy
while reworking this code.  We can consider changing it independently.

The next patch will make the compiler generate PLN instructions "on the
fly", when it encounters an input load intrinsic, rather than doing it
for all inputs at the start of the program.

To emulate this behavior, we introduce an ugly pass to move all NIR
load_interpolated_input and payload-based (not interpolator message)
load_barycentric_* intrinsics to the shader's start block.

This helps avoid regressions in shader-db for cases such as:

   if (...) {
  ...load some input...
   } else {
  ...load that same input...
   }

which CSE can't handle, because there's no dominance relationship
between the two loads.  Because the start block dominates all others,
we can CSE all inputs and emit PLNs exactly once, as we did before.

Ideally, global value numbering would eliminate these redundant loads,
while not forcing them all the way to the start block.  When that lands,
we should consider dropping this hacky pass.

Again, this pass currently does nothing, as i965 doesn't generate these
intrinsics yet.  But it will shortly, and I figured I'd separate this
code as it's relatively self-contained.

Signed-off-by: Kenneth Graunke 
---
 src/mesa/drivers/dri/i965/brw_fs.cpp | 78 
 1 file changed, 78 insertions(+)

diff --git a/src/mesa/drivers/dri/i965/brw_fs.cpp 
b/src/mesa/drivers/dri/i965/brw_fs.cpp
index ea6616b..94127bc 100644
--- a/src/mesa/drivers/dri/i965/brw_fs.cpp
+++ b/src/mesa/drivers/dri/i965/brw_fs.cpp
@@ -6400,6 +6400,83 @@ computed_depth_mode(const nir_shader *shader)
 }
 
 /**
+ * Move load_interpolated_input with simple (payload-based) barycentric modes
+ * to the top of the program so we don't emit multiple PLNs for the same input.
+ *
+ * This works around CSE not being able to handle non-dominating cases
+ * such as:
+ *
+ *if (...) {
+ *   interpolate input
+ *} else {
+ *   interpolate the same exact input
+ *}
+ *
+ * This should be replaced by global value numbering someday.
+ */
+void
+move_interpolation_to_top(nir_shader *nir)
+{
+   nir_foreach_function(f, nir) {
+  if (!f->impl)
+ continue;
+
+  nir_builder b;
+  nir_builder_init(&b, f->impl);
+  b.cursor = nir_before_block(nir_start_block(f->impl));
+
+  nir_foreach_block(block, f->impl) {
+ nir_foreach_instr_safe(instr, block) {
+if (instr->type != nir_instr_type_intrinsic)
+   continue;
+
+nir_intrinsic_instr *load = nir_instr_as_intrinsic(instr);
+if (load->intrinsic != nir_intrinsic_load_interpolated_input)
+   continue;
+
+nir_intrinsic_instr *bary =
+   nir_instr_as_intrinsic(load->src[0].ssa->parent_instr);
+
+/* Leave interpolateAtSample/Offset() where it is. */
+if (bary->intrinsic == nir_intrinsic_load_barycentric_at_sample ||
+bary->intrinsic == nir_intrinsic_load_barycentric_at_offset)
+   continue;
+
+/* Make a new load_barycentric_* intrinsic at the top */
+nir_ssa_def *top_bary =
+   nir_load_barycentric(&b, bary->intrinsic,
+nir_intrinsic_interp_mode(bary));
+
+/* Make a new load_intrinsic_input at the top */
+nir_intrinsic_instr *top_load = nir_intrinsic_instr_create(nir,
+   nir_intrinsic_load_interpolated_input);
+top_load->num_components = load->num_components;
+top_load->src[0] = nir_src_for_ssa(top_bary);
+/* We don't support indirects today - otherwise we might not
+ * be able to move this to the top. add_const_offset_to_base
+ * guarantees the offset will be 0.
+ */
+assert(nir_src_as_const_value(load->src[1]) &&
+   nir_src_as_const_value(load->src[1])->u32[0] == 0);
+top_load->src[1] = nir_src_for_ssa(nir_imm_int(&b, 0));
+top_load->const_index[0] = load->const_index[0];
+top_load->const_index[1] = load->const_index[1];
+nir_ssa_dest_init(&top_load->instr, &top_load->dest,
+  load->dest.ssa.num_components,
+  load->dest.ssa.bit_size, NULL);
+
+nir_ssa_def_rewrite_uses(&load->dest.ssa,
+ nir_src_for_ssa(&top_load->dest.ssa));
+nir_builder_instr_insert(&b, &top_load->instr);
+ }
+  }
+  nir_metadata_preserve(f->impl, (nir_metadata)
+((unsigned) nir_metadata_block_index |
+ (unsigned) nir_metadata_dominance));
+   }
+}
+
+/**
  * Apply default interpolation settings to FS inputs which don't specify any.
  */
 static void
@@ -6506,6 +658

[Mesa-dev] [PATCH 4/7] i965: Add a pass to demote sample interpolation intrinsics.

2016-07-18 Thread Kenneth Graunke
When working with a non-multisampled render target, asking for "sample"
interpolation locations doesn't make sense.  We demote them to centroid.

In a couple of patches, brw_compute_barycentric_modes will begin looking
at these intrinsics to determine the barycentric modes.  fs_visitor also
will use them to code-generate pixel interpolator messages or payload
references.  Handling the "but what if it's not MSAA?" logic ahead of
time in a NIR pass simplifies things and prevents duplicated logic.

This patch doesn't actually do anything useful yet as we don't generate
these intrinsics.  I decided to keep it separate as it's self-contained,
in the hopes of shrinking the "convert everything" patch for reviewers.

Signed-off-by: Kenneth Graunke 
---
 src/mesa/drivers/dri/i965/brw_fs.cpp | 44 
 1 file changed, 44 insertions(+)

diff --git a/src/mesa/drivers/dri/i965/brw_fs.cpp 
b/src/mesa/drivers/dri/i965/brw_fs.cpp
index 7316247..ea6616b 100644
--- a/src/mesa/drivers/dri/i965/brw_fs.cpp
+++ b/src/mesa/drivers/dri/i965/brw_fs.cpp
@@ -39,6 +39,7 @@
 #include "brw_program.h"
 #include "brw_dead_control_flow.h"
 #include "compiler/glsl_types.h"
+#include "compiler/nir/nir_builder.h"
 #include "program/prog_parameter.h"
 
 using namespace brw;
@@ -6442,6 +6443,47 @@ brw_nir_set_default_interpolation(const struct 
brw_device_info *devinfo,
}
 }
 
+/**
+ * Demote per-sample barycentric intrinsics to centroid.
+ *
+ * Useful when rendering to a non-multisampled buffer.
+ */
+static void
+demote_sample_qualifiers(nir_shader *nir)
+{
+   nir_foreach_function(f, nir) {
+  if (!f->impl)
+ continue;
+
+  nir_builder b;
+  nir_builder_init(&b, f->impl);
+
+  nir_foreach_block(block, f->impl) {
+ nir_foreach_instr_safe(instr, block) {
+if (instr->type != nir_instr_type_intrinsic)
+   continue;
+
+nir_intrinsic_instr *intrin = nir_instr_as_intrinsic(instr);
+if (intrin->intrinsic != nir_intrinsic_load_barycentric_sample &&
+intrin->intrinsic != nir_intrinsic_load_barycentric_at_sample)
+   continue;
+
+b.cursor = nir_before_instr(instr);
+nir_ssa_def *centroid =
+   nir_load_barycentric(&b, 
nir_intrinsic_load_barycentric_centroid,
+nir_intrinsic_interp_mode(intrin));
+nir_ssa_def_rewrite_uses(&intrin->dest.ssa,
+ nir_src_for_ssa(centroid));
+nir_instr_remove(instr);
+ }
+  }
+
+  nir_metadata_preserve(f->impl, (nir_metadata)
+((unsigned) nir_metadata_block_index |
+ (unsigned) nir_metadata_dominance));
+   }
+}
+
 const unsigned *
 brw_compile_fs(const struct brw_compiler *compiler, void *log_data,
void *mem_ctx,
@@ -6462,6 +6504,8 @@ brw_compile_fs(const struct brw_compiler *compiler, void 
*log_data,
  key->flat_shade, key->persample_interp);
brw_nir_lower_fs_inputs(shader);
brw_nir_lower_fs_outputs(shader);
+   if (!key->multisample_fbo)
+  NIR_PASS_V(shader, demote_sample_qualifiers);
shader = brw_postprocess_nir(shader, compiler->devinfo, true);
 
/* key->alpha_test_func means simulating alpha testing via discards,
-- 
2.9.0

___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev


[Mesa-dev] [PATCH 3/7] nir: Add nir_load_interpolated_input lowering code.

2016-07-18 Thread Kenneth Graunke
Now nir_lower_io can optionally produce load_interpolated_input
and load_barycentric_* intrinsics for fragment shader inputs.

flat inputs continue using regular load_input.

Signed-off-by: Kenneth Graunke 
---
 src/compiler/nir/nir_lower_io.c | 94 ++---
 1 file changed, 89 insertions(+), 5 deletions(-)

diff --git a/src/compiler/nir/nir_lower_io.c b/src/compiler/nir/nir_lower_io.c
index aa8a517..e480264 100644
--- a/src/compiler/nir/nir_lower_io.c
+++ b/src/compiler/nir/nir_lower_io.c
@@ -174,12 +174,30 @@ lower_load(nir_intrinsic_instr *intrin, struct 
lower_io_state *state,
 {
nir_variable *var = intrin->variables[0]->var;
nir_variable_mode mode = var->data.mode;
+   nir_ssa_def *barycentric = NULL;
 
nir_intrinsic_op op;
switch (mode) {
case nir_var_shader_in:
-  op = vertex_index ? nir_intrinsic_load_per_vertex_input :
-  nir_intrinsic_load_input;
+  if (state->use_interpolated_input &&
+  var->data.interpolation != INTERP_MODE_FLAT) {
+ assert(vertex_index == NULL);
+
+ nir_intrinsic_op bary_op;
+ if (var->data.sample)
+bary_op = nir_intrinsic_load_barycentric_sample;
+ else if (var->data.centroid)
+bary_op = nir_intrinsic_load_barycentric_centroid;
+ else
+bary_op = nir_intrinsic_load_barycentric_pixel;
+
+ barycentric = nir_load_barycentric(&state->builder, bary_op,
+var->data.interpolation);
+ op = nir_intrinsic_load_interpolated_input;
+  } else {
+ op = vertex_index ? nir_intrinsic_load_per_vertex_input :
+ nir_intrinsic_load_input;
+  }
   break;
case nir_var_shader_out:
   op = vertex_index ? nir_intrinsic_load_per_vertex_output :
@@ -205,10 +223,15 @@ lower_load(nir_intrinsic_instr *intrin, struct 
lower_io_state *state,
if (load->intrinsic == nir_intrinsic_load_uniform)
   nir_intrinsic_set_range(load, state->type_size(var->type));
 
-   if (vertex_index)
+   if (vertex_index) {
   load->src[0] = nir_src_for_ssa(vertex_index);
-
-   load->src[vertex_index ? 1 : 0] = nir_src_for_ssa(offset);
+  load->src[1] = nir_src_for_ssa(offset);
+   } else if (barycentric) {
+  load->src[0] = nir_src_for_ssa(barycentric);
+  load->src[1] = nir_src_for_ssa(offset);
+   } else {
+  load->src[0] = nir_src_for_ssa(offset);
+   }
 
return load;
 }
@@ -288,6 +311,54 @@ lower_atomic(nir_intrinsic_instr *intrin, struct 
lower_io_state *state,
return atomic;
 }
 
+static nir_intrinsic_instr *
+lower_interpolate_at(nir_intrinsic_instr *intrin, struct lower_io_state *state,
+ nir_ssa_def *offset)
+{
+   nir_variable *var = intrin->variables[0]->var;
+
+   assert(var->data.mode == nir_var_shader_in);
+
+   nir_intrinsic_op bary_op;
+   switch (intrin->intrinsic) {
+   case nir_intrinsic_interp_var_at_centroid:
+  bary_op = nir_intrinsic_load_barycentric_centroid;
+  break;
+   case nir_intrinsic_interp_var_at_sample:
+  bary_op = nir_intrinsic_load_barycentric_at_sample;
+  break;
+   case nir_intrinsic_interp_var_at_offset:
+  bary_op = nir_intrinsic_load_barycentric_at_offset;
+  break;
+   default:
+  unreachable("Bogus interpolateAt() intrinsic.");
+   }
+
+   nir_intrinsic_instr *bary_setup =
+  nir_intrinsic_instr_create(state->mem_ctx, bary_op);
+
+   nir_ssa_dest_init(&bary_setup->instr, &bary_setup->dest, 2, 32, NULL);
+   nir_intrinsic_set_interp_mode(bary_setup, var->data.interpolation);
+
+   if (intrin->intrinsic != nir_intrinsic_interp_var_at_centroid)
+  nir_src_copy(&bary_setup->src[0], &intrin->src[0], bary_setup);
+
+   nir_builder_instr_insert(&state->builder, &bary_setup->instr);
+
+   nir_intrinsic_instr *load =
+  nir_intrinsic_instr_create(state->mem_ctx,
+ nir_intrinsic_load_interpolated_input);
+   load->num_components = intrin->num_components;
+
+   nir_intrinsic_set_base(load, var->data.driver_location);
+   nir_intrinsic_set_component(load, var->data.location_frac);
+
+   load->src[0] = nir_src_for_ssa(&bary_setup->dest.ssa);
+   load->src[1] = nir_src_for_ssa(offset);
+
+   return load;
+}
+
 static bool
 nir_lower_io_block(nir_block *block,
struct lower_io_state *state)
@@ -315,6 +386,12 @@ nir_lower_io_block(nir_block *block,
   case nir_intrinsic_var_atomic_comp_swap:
  /* We can lower the io for this nir instrinsic */
  break;
+  case nir_intrinsic_interp_var_at_centroid:
+  case nir_intrinsic_interp_var_at_sample:
+  case nir_intrinsic_interp_var_at_offset:
+ /* We can optionally lower these to load_interpolated_input */
+ if (state->use_interpolated_input)
+break;
   default:
  /* We can't lower the io for this nir instrinsic, so skip it */
  continue;
@@ -369,6 +446,13 @@ ni

[Mesa-dev] [PATCH 6/7] i965: Rewrite FS input handling to use the new NIR intrinsics.

2016-07-18 Thread Kenneth Graunke
This eliminates the need to walk the list of input variables, recurse
into their types (via logic largely redundant with nir_lower_io), and
interpolate all possible inputs up front.  The backend no longer has
to care about variables at all, which eliminates complications from
trying to pack multiple variables into the same location.  Instead,
each intrinsic specifies exactly what's needed.

This should unblock Timothy's work on GL_ARB_enhanced_layouts.

Each load_interpolated_input intrinsic corresponds to PLN instructions,
while load_barycentric_at_* intrinsics correspond to pixel interpolator
messages.  The pixel/centroid/sample barycentric intrinsics simply refer
to payload fields (delta_xy[]), and don't actually generate any code.

Because we use a single intrinsic for both centroid-qualified variables
and interpolateAtCentroid(), they become indistinguishable.  We stop
sending pixel interpolator messages for those, and instead use the
payload provided data, which should be considerably faster.

On Broadwell:

total instructions in shared programs: 9067751 -> 9067570 (-0.00%)
instructions in affected programs: 145902 -> 145721 (-0.12%)
helped: 422
HURT: 209

total spills in shared programs: 2849 -> 2899 (1.76%)
spills in affected programs: 760 -> 810 (6.58%)
helped: 0
HURT: 10

total fills in shared programs: 3910 -> 3950 (1.02%)
fills in affected programs: 617 -> 657 (6.48%)
helped: 0
HURT: 10

LOST:   3
GAINED: 3

The differences mostly appear to be slight changes in MOVs.

Signed-off-by: Kenneth Graunke 
---
 src/mesa/drivers/dri/i965/brw_fs.cpp | 175 -
 src/mesa/drivers/dri/i965/brw_fs.h   |   9 +-
 src/mesa/drivers/dri/i965/brw_fs_nir.cpp | 410 ---
 src/mesa/drivers/dri/i965/brw_nir.c  |  16 +-
 4 files changed, 269 insertions(+), 341 deletions(-)

diff --git a/src/mesa/drivers/dri/i965/brw_fs.cpp 
b/src/mesa/drivers/dri/i965/brw_fs.cpp
index 94127bc..06007fe 100644
--- a/src/mesa/drivers/dri/i965/brw_fs.cpp
+++ b/src/mesa/drivers/dri/i965/brw_fs.cpp
@@ -1067,21 +1067,27 @@ fs_visitor::emit_fragcoord_interpolation(fs_reg wpos)
bld.MOV(wpos, this->wpos_w);
 }
 
-static enum brw_barycentric_mode
-barycentric_mode(enum glsl_interp_mode mode,
- bool is_centroid, bool is_sample)
+enum brw_barycentric_mode
+brw_barycentric_mode(enum glsl_interp_mode mode, nir_intrinsic_op op)
 {
-   unsigned bary;
-
/* Barycentric modes don't make sense for flat inputs. */
assert(mode != INTERP_MODE_FLAT);
 
-   if (is_sample) {
-  bary = BRW_BARYCENTRIC_PERSPECTIVE_SAMPLE;
-   } else if (is_centroid) {
-  bary = BRW_BARYCENTRIC_PERSPECTIVE_CENTROID;
-   } else {
+   unsigned bary;
+   switch (op) {
+   case nir_intrinsic_load_barycentric_pixel:
+   case nir_intrinsic_load_barycentric_at_offset:
   bary = BRW_BARYCENTRIC_PERSPECTIVE_PIXEL;
+  break;
+   case nir_intrinsic_load_barycentric_centroid:
+  bary = BRW_BARYCENTRIC_PERSPECTIVE_CENTROID;
+  break;
+   case nir_intrinsic_load_barycentric_sample:
+   case nir_intrinsic_load_barycentric_at_sample:
+  bary = BRW_BARYCENTRIC_PERSPECTIVE_SAMPLE;
+  break;
+   default:
+  assert(!"invalid intrinsic");
}
 
if (mode == INTERP_MODE_NOPERSPECTIVE)
@@ -1101,107 +1107,6 @@ centroid_to_pixel(enum brw_barycentric_mode bary)
return (enum brw_barycentric_mode) ((unsigned) bary - 1);
 }
 
-void
-fs_visitor::emit_general_interpolation(fs_reg *attr, const char *name,
-   const glsl_type *type,
-   glsl_interp_mode interpolation_mode,
-   int *location, bool mod_centroid,
-   bool mod_sample)
-{
-   assert(stage == MESA_SHADER_FRAGMENT);
-   brw_wm_prog_data *prog_data = (brw_wm_prog_data*) this->prog_data;
-
-   if (type->is_array() || type->is_matrix()) {
-  const glsl_type *elem_type = glsl_get_array_element(type);
-  const unsigned length = glsl_get_length(type);
-
-  for (unsigned i = 0; i < length; i++) {
- emit_general_interpolation(attr, name, elem_type, interpolation_mode,
-location, mod_centroid, mod_sample);
-  }
-   } else if (type->is_record()) {
-  for (unsigned i = 0; i < type->length; i++) {
- const glsl_type *field_type = type->fields.structure[i].type;
- emit_general_interpolation(attr, name, field_type, interpolation_mode,
-location, mod_centroid, mod_sample);
-  }
-   } else {
-  assert(type->is_scalar() || type->is_vector());
-
-  if (prog_data->urb_setup[*location] == -1) {
- /* If there's no incoming setup data for this slot, don't
-  * emit interpolation for it.
-  */
- *attr = offset(*attr, bld, type->vector_elements);
- (*location)++;
- return;
-  }
-
-  attr->type = brw_type_for_base_type(type->get_scalar_typ

[Mesa-dev] [PATCH 2/7] nir: Add a nir_lower_io flag for using load_interpolated_input intrins.

2016-07-18 Thread Kenneth Graunke
While my intention is that the new intrinsics should be usable by all
drivers, we need to make them optional until all drivers switch.

This doesn't do anything yet, but I added it as a separate patch to
keep the interface churn separate for easier review.

Signed-off-by: Kenneth Graunke 
---
 src/compiler/nir/nir.h  |  3 ++-
 src/compiler/nir/nir_lower_io.c | 15 +++
 src/gallium/drivers/freedreno/ir3/ir3_cmdline.c |  2 +-
 src/mesa/drivers/dri/i965/brw_blorp.c   |  2 +-
 src/mesa/drivers/dri/i965/brw_nir.c | 18 +-
 src/mesa/drivers/dri/i965/brw_program.c |  4 ++--
 src/mesa/state_tracker/st_glsl_to_nir.cpp   |  2 +-
 7 files changed, 27 insertions(+), 19 deletions(-)

diff --git a/src/compiler/nir/nir.h b/src/compiler/nir/nir.h
index ac11998..e996e0e 100644
--- a/src/compiler/nir/nir.h
+++ b/src/compiler/nir/nir.h
@@ -2324,7 +2324,8 @@ void nir_assign_var_locations(struct exec_list *var_list, 
unsigned *size,
 
 void nir_lower_io(nir_shader *shader,
   nir_variable_mode modes,
-  int (*type_size)(const struct glsl_type *));
+  int (*type_size)(const struct glsl_type *),
+  bool use_load_interpolated_input_intrinsics);
 nir_src *nir_get_io_offset_src(nir_intrinsic_instr *instr);
 nir_src *nir_get_io_vertex_index_src(nir_intrinsic_instr *instr);
 
diff --git a/src/compiler/nir/nir_lower_io.c b/src/compiler/nir/nir_lower_io.c
index b05a73f..aa8a517 100644
--- a/src/compiler/nir/nir_lower_io.c
+++ b/src/compiler/nir/nir_lower_io.c
@@ -39,6 +39,7 @@ struct lower_io_state {
void *mem_ctx;
int (*type_size)(const struct glsl_type *type);
nir_variable_mode modes;
+   bool use_interpolated_input;
 };
 
 void
@@ -394,7 +395,8 @@ nir_lower_io_block(nir_block *block,
 static void
 nir_lower_io_impl(nir_function_impl *impl,
   nir_variable_mode modes,
-  int (*type_size)(const struct glsl_type *))
+  int (*type_size)(const struct glsl_type *),
+  bool use_interpolated_input)
 {
struct lower_io_state state;
 
@@ -402,6 +404,7 @@ nir_lower_io_impl(nir_function_impl *impl,
state.mem_ctx = ralloc_parent(impl);
state.modes = modes;
state.type_size = type_size;
+   state.use_interpolated_input = use_interpolated_input;
 
nir_foreach_block(block, impl) {
   nir_lower_io_block(block, &state);
@@ -413,11 +416,15 @@ nir_lower_io_impl(nir_function_impl *impl,
 
 void
 nir_lower_io(nir_shader *shader, nir_variable_mode modes,
- int (*type_size)(const struct glsl_type *))
+ int (*type_size)(const struct glsl_type *),
+ bool use_interpolated_input)
 {
nir_foreach_function(function, shader) {
-  if (function->impl)
- nir_lower_io_impl(function->impl, modes, type_size);
+  if (function->impl) {
+ nir_lower_io_impl(function->impl, modes, type_size,
+   use_interpolated_input &&
+   shader->stage == MESA_SHADER_FRAGMENT);
+  }
}
 }
 
diff --git a/src/gallium/drivers/freedreno/ir3/ir3_cmdline.c 
b/src/gallium/drivers/freedreno/ir3/ir3_cmdline.c
index 41532fc..a8a8c1b 100644
--- a/src/gallium/drivers/freedreno/ir3/ir3_cmdline.c
+++ b/src/gallium/drivers/freedreno/ir3/ir3_cmdline.c
@@ -93,7 +93,7 @@ load_glsl(unsigned num_files, char* const* files, 
gl_shader_stage stage)
// TODO nir_assign_var_locations??
 
NIR_PASS_V(nir, nir_lower_system_values);
-   NIR_PASS_V(nir, nir_lower_io, nir_var_all, st_glsl_type_size);
+   NIR_PASS_V(nir, nir_lower_io, nir_var_all, st_glsl_type_size, false);
NIR_PASS_V(nir, nir_lower_samplers, prog);
 
return nir;
diff --git a/src/mesa/drivers/dri/i965/brw_blorp.c 
b/src/mesa/drivers/dri/i965/brw_blorp.c
index 282a5b2..0473cfe 100644
--- a/src/mesa/drivers/dri/i965/brw_blorp.c
+++ b/src/mesa/drivers/dri/i965/brw_blorp.c
@@ -209,7 +209,7 @@ brw_blorp_compile_nir_shader(struct brw_context *brw, 
struct nir_shader *nir,
   unsigned end = var->data.location + nir_uniform_type_size(var->type);
   nir->num_uniforms = MAX2(nir->num_uniforms, end);
}
-   nir_lower_io(nir, nir_var_uniform, nir_uniform_type_size);
+   nir_lower_io(nir, nir_var_uniform, nir_uniform_type_size, false);
 
const unsigned *program =
   brw_compile_fs(compiler, brw, mem_ctx, wm_key, &wm_prog_data, nir,
diff --git a/src/mesa/drivers/dri/i965/brw_nir.c 
b/src/mesa/drivers/dri/i965/brw_nir.c
index 6c3e1d1..caf9fe0 100644
--- a/src/mesa/drivers/dri/i965/brw_nir.c
+++ b/src/mesa/drivers/dri/i965/brw_nir.c
@@ -204,7 +204,7 @@ brw_nir_lower_vs_inputs(nir_shader *nir,
 * loaded as one vec4 or dvec4 per element (or matrix column), depending on
 * whether it is a double-precision type or not.
 */
-   nir_lower_io(nir, nir_var_shader_in, type_size_vs_input);
+   nir_lower_io(nir, nir_var_shader_in, type_siz

[Mesa-dev] [PATCH 1/7] nir: Add new intrinsics for fragment shader input interpolation.

2016-07-18 Thread Kenneth Graunke
Backends can normally handle shader inputs solely by looking at
load_input intrinsics, and ignore the nir_variables in nir->inputs.

One exception is fragment shader inputs.  load_input doesn't capture
the necessary interpolation information - flat, smooth, noperspective
mode, and centroid, sample, or pixel for the location.  This means
that backends have to interpolate based on the nir_variables, then
associate those with the load_input intrinsics (say, by storing a
map of which variables are at which locations).

With GL_ARB_enhanced_layouts, we're going to have multiple varyings
packed into a single vec4 location.  The intrinsics make this easy:
simply load N components from location .  However,
working with variables and correlating the two is very awkward; we'd
much rather have intrinsics capture all the necessary information.

Fragment shader input interpolation typically works by producing a
set of barycentric coordinates, then using those to do a linear
interpolation between the values at the triangle's corners.

We represent this by introducing five new load_barycentric_* intrinsics:

- load_barycentric_pixel (ordinary variable)
- load_barycentric_centroid  (centroid qualified variable)
- load_barycentric_sample(sample qualified variable)
- load_barycentric_at_sample (ARB_gpu_shader5's interpolateAtSample())
- load_barycentric_at_offset (ARB_gpu_shader5's interpolateAtOffset())

Each of these take the interpolation mode (smooth or noperspective only)
as a const_index, and produce a vec2.  The last two also take a sample
or offset source.

We then introduce a new load_interpolated_input intrinsic, which
is like a normal load_input intrinsic, but with an additional
barycentric coordinate source.

The intention is that flat inputs will still use regular load_input
intrinsics.  This makes them distinguishable from normal inputs that
need fancy interpolation, while also providing all the necessary data.

This nicely unifies regular inputs and interpolateAt functions.
Qualifiers and variables become irrelevant; there are just
load_barycentric intrinsics that determine the interpolation.

Signed-off-by: Kenneth Graunke 
---
 src/compiler/nir/nir.h|  6 ++
 src/compiler/nir/nir_builder.h| 11 +++
 src/compiler/nir/nir_intrinsics.h | 24 
 src/compiler/nir/nir_lower_io.c   |  1 +
 src/compiler/nir/nir_print.c  |  1 +
 5 files changed, 43 insertions(+)

diff --git a/src/compiler/nir/nir.h b/src/compiler/nir/nir.h
index c5d3b6b..ac11998 100644
--- a/src/compiler/nir/nir.h
+++ b/src/compiler/nir/nir.h
@@ -992,6 +992,11 @@ typedef enum {
 */
NIR_INTRINSIC_COMPONENT = 8,
 
+   /**
+* Interpolation mode (only meaningful for FS inputs).
+*/
+   NIR_INTRINSIC_INTERP_MODE = 9,
+
NIR_INTRINSIC_NUM_INDEX_FLAGS,
 
 } nir_intrinsic_index_flag;
@@ -1059,6 +1064,7 @@ INTRINSIC_IDX_ACCESSORS(range, RANGE, unsigned)
 INTRINSIC_IDX_ACCESSORS(desc_set, DESC_SET, unsigned)
 INTRINSIC_IDX_ACCESSORS(binding, BINDING, unsigned)
 INTRINSIC_IDX_ACCESSORS(component, COMPONENT, unsigned)
+INTRINSIC_IDX_ACCESSORS(interp_mode, INTERP_MODE, unsigned)
 
 /**
  * \group texture information
diff --git a/src/compiler/nir/nir_builder.h b/src/compiler/nir/nir_builder.h
index 09cdf72..435582a 100644
--- a/src/compiler/nir/nir_builder.h
+++ b/src/compiler/nir/nir_builder.h
@@ -458,6 +458,17 @@ nir_load_system_value(nir_builder *build, nir_intrinsic_op 
op, int index)
return &load->dest.ssa;
 }
 
+static inline nir_ssa_def *
+nir_load_barycentric(nir_builder *build, nir_intrinsic_op op,
+ unsigned interp_mode)
+{
+   nir_intrinsic_instr *bary = nir_intrinsic_instr_create(build->shader, op);
+   nir_ssa_dest_init(&bary->instr, &bary->dest, 2, 32, NULL);
+   nir_intrinsic_set_interp_mode(bary, interp_mode);
+   nir_builder_instr_insert(build, &bary->instr);
+   return &bary->dest.ssa;
+}
+
 static inline void
 nir_jump(nir_builder *build, nir_jump_type jump_type)
 {
diff --git a/src/compiler/nir/nir_intrinsics.h 
b/src/compiler/nir/nir_intrinsics.h
index 2f74555..29917e3 100644
--- a/src/compiler/nir/nir_intrinsics.h
+++ b/src/compiler/nir/nir_intrinsics.h
@@ -306,6 +306,27 @@ SYSTEM_VALUE(num_work_groups, 3, 0, xx, xx, xx)
 SYSTEM_VALUE(helper_invocation, 1, 0, xx, xx, xx)
 SYSTEM_VALUE(channel_num, 1, 0, xx, xx, xx)
 
+/**
+ * Barycentric coordinate intrinsics.
+ *
+ * These set up the barycentric coordinates for a particular interpolation.
+ * The first three are for the simple cases: pixel, centroid, or per-sample
+ * (at gl_SampleID).  The next two handle interpolating at a specified
+ * sample location, or interpolating with a vec2 offset,
+ *
+ * The vec2 value produced by these intrinsics is intended for use as the
+ * barycoord source of a load_interpolated_input intrinsic.
+ */
+SYSTEM_VALUE(barycentric_pixel, 2, 1, INTERP_MODE, xx, xx)
+SYSTEM_VALUE(barycentric_centroid, 2, 1, INTERP_MODE, xx, xx)
+SYSTEM_VALUE(barycentric_sample, 2

[Mesa-dev] [RFC] gallium/u_queue: add barrier function

2016-07-18 Thread Rob Clark
Helper to block until all previous jobs are complete.
---
So I think this might end up being useful to me in some cases.. but
the implementation only works for a single threaded queue (which is
all I need).  I could also just put a helper in my driver code.

Opinions?

 src/gallium/auxiliary/util/u_queue.c | 12 
 src/gallium/auxiliary/util/u_queue.h |  2 ++
 2 files changed, 14 insertions(+)

diff --git a/src/gallium/auxiliary/util/u_queue.c 
b/src/gallium/auxiliary/util/u_queue.c
index 838464f..861faca 100644
--- a/src/gallium/auxiliary/util/u_queue.c
+++ b/src/gallium/auxiliary/util/u_queue.c
@@ -242,3 +242,15 @@ util_queue_add_job(struct util_queue *queue,
pipe_condvar_signal(queue->has_queued_cond);
pipe_mutex_unlock(queue->lock);
 }
+
+static void dummy_execute(void *job, int thread_index) {}
+
+/* blocks until all previously queued jobs complete: */
+void util_queue_barrier(struct util_queue *queue)
+{
+   struct util_queue_fence fence;
+   util_queue_fence_init(&fence);
+   util_queue_add_job(queue, &fence /*dummy*/, &fence, dummy_execute, NULL);
+   util_queue_job_wait(&fence);
+   util_queue_fence_destroy(&fence);
+}
diff --git a/src/gallium/auxiliary/util/u_queue.h 
b/src/gallium/auxiliary/util/u_queue.h
index 59646cc..8a22ee0 100644
--- a/src/gallium/auxiliary/util/u_queue.h
+++ b/src/gallium/auxiliary/util/u_queue.h
@@ -85,6 +85,8 @@ void util_queue_add_job(struct util_queue *queue,
 
 void util_queue_job_wait(struct util_queue_fence *fence);
 
+void util_queue_barrier(struct util_queue *queue);
+
 /* util_queue needs to be cleared to zeroes for this to work */
 static inline bool
 util_queue_is_initialized(struct util_queue *queue)
-- 
2.7.4

___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev


[Mesa-dev] [PATCH v2 2/2] st/vdapu: use lanczos filter for scaling v2

2016-07-18 Thread Nayan Deshmukh
HIGH_QUALITY_SCALING_L2 to HIGH_QUALTIY_SCALING_L9
uses lanczos filter with number representing the size
of the sinc window.

Signed-off-by: Nayan Deshmukh 
---
 src/gallium/state_trackers/vdpau/mixer.c | 116 ---
 src/gallium/state_trackers/vdpau/query.c |   8 ++
 src/gallium/state_trackers/vdpau/vdpau_private.h |   7 ++
 3 files changed, 97 insertions(+), 34 deletions(-)

diff --git a/src/gallium/state_trackers/vdpau/mixer.c 
b/src/gallium/state_trackers/vdpau/mixer.c
index cb0ef03..87822c8 100644
--- a/src/gallium/state_trackers/vdpau/mixer.c
+++ b/src/gallium/state_trackers/vdpau/mixer.c
@@ -82,14 +82,6 @@ vlVdpVideoMixerCreate(VdpDevice device,
   switch (features[i]) {
   /* they are valid, but we doesn't support them */
   case VDP_VIDEO_MIXER_FEATURE_DEINTERLACE_TEMPORAL_SPATIAL:
-  case VDP_VIDEO_MIXER_FEATURE_HIGH_QUALITY_SCALING_L2:
-  case VDP_VIDEO_MIXER_FEATURE_HIGH_QUALITY_SCALING_L3:
-  case VDP_VIDEO_MIXER_FEATURE_HIGH_QUALITY_SCALING_L4:
-  case VDP_VIDEO_MIXER_FEATURE_HIGH_QUALITY_SCALING_L5:
-  case VDP_VIDEO_MIXER_FEATURE_HIGH_QUALITY_SCALING_L6:
-  case VDP_VIDEO_MIXER_FEATURE_HIGH_QUALITY_SCALING_L7:
-  case VDP_VIDEO_MIXER_FEATURE_HIGH_QUALITY_SCALING_L8:
-  case VDP_VIDEO_MIXER_FEATURE_HIGH_QUALITY_SCALING_L9:
   case VDP_VIDEO_MIXER_FEATURE_INVERSE_TELECINE:
  break;
 
@@ -112,6 +104,17 @@ vlVdpVideoMixerCreate(VdpDevice device,
   case VDP_VIDEO_MIXER_FEATURE_HIGH_QUALITY_SCALING_L1:
  vmixer->bicubic.supported = true;
  break;
+
+  case VDP_VIDEO_MIXER_FEATURE_HIGH_QUALITY_SCALING_L2:
+  case VDP_VIDEO_MIXER_FEATURE_HIGH_QUALITY_SCALING_L3:
+  case VDP_VIDEO_MIXER_FEATURE_HIGH_QUALITY_SCALING_L4:
+  case VDP_VIDEO_MIXER_FEATURE_HIGH_QUALITY_SCALING_L5:
+  case VDP_VIDEO_MIXER_FEATURE_HIGH_QUALITY_SCALING_L6:
+  case VDP_VIDEO_MIXER_FEATURE_HIGH_QUALITY_SCALING_L7:
+  case VDP_VIDEO_MIXER_FEATURE_HIGH_QUALITY_SCALING_L8:
+  case VDP_VIDEO_MIXER_FEATURE_HIGH_QUALITY_SCALING_L9:
+ vmixer->lanczos.supported = true;
+ break;
   default: goto no_params;
   }
}
@@ -209,6 +212,10 @@ vlVdpVideoMixerDestroy(VdpVideoMixer mixer)
   vl_bicubic_filter_cleanup(vmixer->bicubic.filter);
   FREE(vmixer->bicubic.filter);
}
+   if (vmixer->lanczos.filter) {
+  vl_lanczos_filter_cleanup(vmixer->lanczos.filter);
+  FREE(vmixer->lanczos.filter);
+   }
pipe_mutex_unlock(vmixer->device->mutex);
DeviceReference(&vmixer->device, NULL);
 
@@ -335,7 +342,7 @@ VdpStatus vlVdpVideoMixerRender(VdpVideoMixer mixer,
}
vl_compositor_set_buffer_layer(&vmixer->cstate, compositor, layer, 
video_buffer, prect, NULL, deinterlace);
 
-   if(vmixer->bicubic.filter) {
+   if(vmixer->bicubic.filter || vmixer->lanczos.filter) {
   struct pipe_context *pipe;
   struct pipe_resource res_tmpl, *res;
   struct pipe_sampler_view sv_templ;
@@ -389,7 +396,7 @@ VdpStatus vlVdpVideoMixerRender(VdpVideoMixer mixer,
   ++layers;
}
 
-   if (!vmixer->noise_reduction.filter && !vmixer->sharpness.filter && 
!vmixer->bicubic.filter)
+   if (!vmixer->noise_reduction.filter && !vmixer->sharpness.filter && 
!vmixer->bicubic.filter && !vmixer->lanczos.filter)
   vlVdpSave4DelayedRendering(vmixer->device, destination_surface, 
&vmixer->cstate);
else {
   vl_compositor_render(&vmixer->cstate, compositor, surface, &dirty_area, 
true);
@@ -408,6 +415,12 @@ VdpStatus vlVdpVideoMixerRender(VdpVideoMixer mixer,
  RectToPipe(destination_video_rect, &rect),
  RectToPipe(destination_rect, &clip));
 
+  if (vmixer->lanczos.filter)
+ vl_lanczos_filter_render(vmixer->lanczos.filter,
+  sampler_view, dst->surface,
+  RectToPipe(destination_video_rect, &rect),
+  RectToPipe(destination_rect, &clip));
+
   if(surface != dst->surface) {
  pipe_sampler_view_reference(&sampler_view, NULL);
  pipe_surface_reference(&surface, NULL);
@@ -536,6 +549,29 @@ vlVdpVideoMixerUpdateBicubicFilter(vlVdpVideoMixer *vmixer)
 }
 
 /**
+ * Update the lanczos filter
+ */
+static void
+vlVdpVideoMixerUpdateLanczosFilter(vlVdpVideoMixer *vmixer)
+{
+   assert(vmixer);
+
+   /* if present remove the old filter first */
+   if (vmixer->lanczos.filter) {
+  vl_lanczos_filter_cleanup(vmixer->lanczos.filter);
+  FREE(vmixer->lanczos.filter);
+  vmixer->lanczos.filter = NULL;
+   }
+   /* and create a new filter as needed */
+   if (vmixer->lanczos.enabled) {
+  vmixer->lanczos.filter = MALLOC(sizeof(struct vl_lanczos_filter));
+  vl_lanczos_filter_init(vmixer->lanczos.filter, vmixer->device->context,
+ vmixer->lanczos.size, vmixer->video_width,
+ vmixer->video_height);
+   }
+}
+
+/**

  1   2   >