date:20140122

[Mesa-dev] [PATCH V3 4/8] glsl: only call mark_max_array if we are assigning an array

2014-01-22 Thread Timothy Arceri

This change does not help fix or prevent any bugs
it just seems reasonable to do

Signed-off-by: Timothy Arceri t_arc...@yahoo.com.au
---
 src/glsl/ast_to_hir.cpp | 6 --
 1 file changed, 4 insertions(+), 2 deletions(-)

diff --git a/src/glsl/ast_to_hir.cpp b/src/glsl/ast_to_hir.cpp
index e25cba3..be12f97 100644
--- a/src/glsl/ast_to_hir.cpp
+++ b/src/glsl/ast_to_hir.cpp
@@ -830,8 +830,10 @@ do_assignment(exec_list *instructions, struct 
_mesa_glsl_parse_state *state,
   rhs-type-array_size());
 d-type = var-type;
   }
-  mark_whole_array_access(rhs);
-  mark_whole_array_access(lhs);
+  if (lhs-type-is_array()) {
+ mark_whole_array_access(rhs);
+ mark_whole_array_access(lhs);
+  }
}
 
/* Most callers of do_assignment (assign, add_assign, pre_inc/dec,
-- 
1.8.3.1

___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/mesa-dev

[Mesa-dev] [PATCH V3 0/8] glsl: Start implementing ARB_arrays_of_arrays

2014-01-22 Thread Timothy Arceri

This is a first pass at implementing the ARB_arrays_of_arrays extension in Mesa.
This series allows the glsl front-end to parse and compile shaders that don't 
require linking of
arrays of arrays e.g. multi dimensional uniforms. I'm sending this extension in 
parts because my
time working on this is about to decrease and I'm not sure how long it will be 
before I finish the second half.
If this is not a good idea please let me know.

Patches 2-3 need to be squashed when committed but I've split them here to make 
reviewing easier.

Patch 4 is optional

What's missing is linking and backend support that goes with it. This includes 
the linking checks the spec talks about for geometry shaders.

No piglit regressions with these patches applied.

Compile tests have been submitted to the piglit mailing list, the extension 
just needs to be enabled in the driver code in order to test.

V3
- rebased on Paul Berry's awesome simplify aggregate type interface patch
- removed now redundant is_array
- made a bunch of other changes based on Paul Berry's advice

V2
- Removed patches 2 and 6 from the first version as it turns out they are not 
actually needed (left overs from my first attempt at the extension).
- Removed all unrelated whitespace changes, and other small whitespace cleanups
- Fixed order of array dimensions in glsl_type name.

___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/mesa-dev

[Mesa-dev] [PATCH V3 2/8] glsl: Add arrays_of_arrays to yacc definition

2014-01-22 Thread Timothy Arceri

Signed-off-by: Timothy Arceri t_arc...@yahoo.com.au
---
 src/glsl/glsl_parser.yy | 128 +---
 1 file changed, 56 insertions(+), 72 deletions(-)

diff --git a/src/glsl/glsl_parser.yy b/src/glsl/glsl_parser.yy
index 5451b76..2786e92 100644
--- a/src/glsl/glsl_parser.yy
+++ b/src/glsl/glsl_parser.yy
@@ -97,6 +97,7 @@ static bool match_layout_qualifier(const char *s1, const char 
*s2,
 
ast_node *node;
ast_type_specifier *type_specifier;
+   ast_array_specifier *array_specifier;
ast_fully_specified_type *fully_specified_type;
ast_function *function;
ast_parameter_declarator *parameter_declarator;
@@ -202,6 +203,7 @@ static bool match_layout_qualifier(const char *s1, const 
char *s2,
 %type type_qualifier interface_qualifier
 %type type_specifier type_specifier
 %type type_specifier type_specifier_nonarray
+%type array_specifier array_specifier
 %type identifier basic_type_specifier_nonarray
 %type fully_specified_type fully_specified_type
 %type function function_prototype
@@ -880,7 +882,7 @@ parameter_declarator:
   $$-type-specifier = $1;
   $$-identifier = $2;
}
-   | type_specifier any_identifier '[' constant_expression ']'
+   | type_specifier any_identifier array_specifier
{
   void *ctx = state;
   $$ = new(ctx) ast_parameter_declarator();
@@ -889,8 +891,7 @@ parameter_declarator:
   $$-type-set_location(yylloc);
   $$-type-specifier = $1;
   $$-identifier = $2;
-  $$-is_array = true;
-  $$-array_size = $4;
+  $$-array_specifier = $3;
}
;
 
@@ -983,40 +984,20 @@ init_declarator_list:
   $$-declarations.push_tail(decl-link);
   state-symbols-add_variable(new(state) ir_variable(NULL, $3, 
ir_var_auto));
}
-   | init_declarator_list ',' any_identifier '[' ']'
+   | init_declarator_list ',' any_identifier array_specifier
{
   void *ctx = state;
-  ast_declaration *decl = new(ctx) ast_declaration($3, true, NULL, NULL);
+  ast_declaration *decl = new(ctx) ast_declaration($3, true, $4, NULL);
   decl-set_location(yylloc);
 
   $$ = $1;
   $$-declarations.push_tail(decl-link);
   state-symbols-add_variable(new(state) ir_variable(NULL, $3, 
ir_var_auto));
}
-   | init_declarator_list ',' any_identifier '[' constant_expression ']'
+   | init_declarator_list ',' any_identifier array_specifier '=' initializer
{
   void *ctx = state;
-  ast_declaration *decl = new(ctx) ast_declaration($3, true, $5, NULL);
-  decl-set_location(yylloc);
-
-  $$ = $1;
-  $$-declarations.push_tail(decl-link);
-  state-symbols-add_variable(new(state) ir_variable(NULL, $3, 
ir_var_auto));
-   }
-   | init_declarator_list ',' any_identifier '[' ']' '=' initializer
-   {
-  void *ctx = state;
-  ast_declaration *decl = new(ctx) ast_declaration($3, true, NULL, $7);
-  decl-set_location(yylloc);
-
-  $$ = $1;
-  $$-declarations.push_tail(decl-link);
-  state-symbols-add_variable(new(state) ir_variable(NULL, $3, 
ir_var_auto));
-   }
-   | init_declarator_list ',' any_identifier '[' constant_expression ']' '=' 
initializer
-   {
-  void *ctx = state;
-  ast_declaration *decl = new(ctx) ast_declaration($3, true, $5, $8);
+  ast_declaration *decl = new(ctx) ast_declaration($3, true, $4, $6);
   decl-set_location(yylloc);
 
   $$ = $1;
@@ -1053,37 +1034,19 @@ single_declaration:
   $$-set_location(yylloc);
   $$-declarations.push_tail(decl-link);
}
-   | fully_specified_type any_identifier '[' ']'
+   | fully_specified_type any_identifier array_specifier
{
   void *ctx = state;
-  ast_declaration *decl = new(ctx) ast_declaration($2, true, NULL, NULL);
+  ast_declaration *decl = new(ctx) ast_declaration($2, true, $3, NULL);
 
   $$ = new(ctx) ast_declarator_list($1);
   $$-set_location(yylloc);
   $$-declarations.push_tail(decl-link);
}
-   | fully_specified_type any_identifier '[' constant_expression ']'
+   | fully_specified_type any_identifier array_specifier '=' initializer
{
   void *ctx = state;
-  ast_declaration *decl = new(ctx) ast_declaration($2, true, $4, NULL);
-
-  $$ = new(ctx) ast_declarator_list($1);
-  $$-set_location(yylloc);
-  $$-declarations.push_tail(decl-link);
-   }
-   | fully_specified_type any_identifier '[' ']' '=' initializer
-   {
-  void *ctx = state;
-  ast_declaration *decl = new(ctx) ast_declaration($2, true, NULL, $6);
-
-  $$ = new(ctx) ast_declarator_list($1);
-  $$-set_location(yylloc);
-  $$-declarations.push_tail(decl-link);
-   }
-   | fully_specified_type any_identifier '[' constant_expression ']' '=' 
initializer
-   {
-  void *ctx = state;
-  ast_declaration *decl = new(ctx) ast_declaration($2, true, $4, $7);
+  ast_declaration *decl = new(ctx) ast_declaration($2, true, $3, $5);
 
   $$ = new(ctx) ast_declarator_list($1);
   $$-set_location(yylloc);
@@ -1584,19

[Mesa-dev] [PATCH V3 1/8] mesa: Add ARB_arrays_of_arrays

2014-01-22 Thread Timothy Arceri

Signed-off-by: Timothy Arceri t_arc...@yahoo.com.au
Reviewed-by: Paul Berry stereotype441 at gmail.com
---
 src/glsl/glcpp/glcpp-parse.y | 3 +++
 src/mesa/main/extensions.c   | 1 +
 src/mesa/main/mtypes.h   | 1 +
 3 files changed, 5 insertions(+)

diff --git a/src/glsl/glcpp/glcpp-parse.y b/src/glsl/glcpp/glcpp-parse.y
index 55c4981..4df69a8 100644
--- a/src/glsl/glcpp/glcpp-parse.y
+++ b/src/glsl/glcpp/glcpp-parse.y
@@ -1222,6 +1222,9 @@ glcpp_parser_create (const struct gl_extensions 
*extensions, int api)
 add_builtin_define(parser, GL_EXT_texture_array, 1);
  }
 
+  if (extensions-ARB_arrays_of_arrays)
+ add_builtin_define(parser, GL_ARB_arrays_of_arrays, 1);
+
  if (extensions-ARB_fragment_coord_conventions)
 add_builtin_define(parser, GL_ARB_fragment_coord_conventions,
1);
diff --git a/src/mesa/main/extensions.c b/src/mesa/main/extensions.c
index 2e0ccc3..95eb7ca 100644
--- a/src/mesa/main/extensions.c
+++ b/src/mesa/main/extensions.c
@@ -80,6 +80,7 @@ static const struct extension extension_table[] = {
/* ARB Extensions */
{ GL_ARB_ES2_compatibility,   o(ARB_ES2_compatibility),   
GL, 2009 },
{ GL_ARB_ES3_compatibility,   o(ARB_ES3_compatibility),   
GL, 2012 },
+   { GL_ARB_arrays_of_arrays,o(ARB_arrays_of_arrays),
GL, 2012 },
{ GL_ARB_base_instance,   o(ARB_base_instance),   
GL, 2011 },
{ GL_ARB_blend_func_extended, o(ARB_blend_func_extended), 
GL, 2009 },
{ GL_ARB_clear_buffer_object, o(dummy_true),  
GL, 2012 },
diff --git a/src/mesa/main/mtypes.h b/src/mesa/main/mtypes.h
index 33df682..eef67a8 100644
--- a/src/mesa/main/mtypes.h
+++ b/src/mesa/main/mtypes.h
@@ -3366,6 +3366,7 @@ struct gl_extensions
GLboolean ANGLE_texture_compression_dxt;
GLboolean ARB_ES2_compatibility;
GLboolean ARB_ES3_compatibility;
+   GLboolean ARB_arrays_of_arrays;
GLboolean ARB_base_instance;
GLboolean ARB_blend_func_extended;
GLboolean ARB_color_buffer_float;
-- 
1.8.3.1

___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/mesa-dev

[Mesa-dev] [PATCH V3 8/8] docs: Mark ARB_arrays_of_arrays as started

2014-01-22 Thread Timothy Arceri

Signed-off-by: Timothy Arceri t_arc...@yahoo.com.au
Reviewed-by: Paul Berry stereotype441 at gmail.com
---
 docs/GL3.txt | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/docs/GL3.txt b/docs/GL3.txt
index 0672ec7..9f756e9 100644
--- a/docs/GL3.txt
+++ b/docs/GL3.txt
@@ -142,7 +142,7 @@ GL 4.2:
 GL 4.3:
 
   GLSL 4.3 not started
-  GL_ARB_arrays_of_arrays  not started
+  GL_ARB_arrays_of_arrays  started
   GL_ARB_ES3_compatibility DONE (i965)
   GL_ARB_clear_buffer_object   not started
   GL_ARB_compute_shadernot started
-- 
1.8.3.1

___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/mesa-dev

[Mesa-dev] [PATCH V3 5/8] glsl: Allow arrays of arrays as input to vertex shader

2014-01-22 Thread Timothy Arceri

Signed-off-by: Timothy Arceri t_arc...@yahoo.com.au
Reviewed-by: Paul Berry stereotype441 at gmail.com
---
 src/glsl/ast_to_hir.cpp | 5 +++--
 1 file changed, 3 insertions(+), 2 deletions(-)

diff --git a/src/glsl/ast_to_hir.cpp b/src/glsl/ast_to_hir.cpp
index be12f97..f13ac51 100644
--- a/src/glsl/ast_to_hir.cpp
+++ b/src/glsl/ast_to_hir.cpp
@@ -3143,8 +3143,9 @@ ast_declarator_list::hir(exec_list *instructions,
  *vectors. Vertex shader inputs cannot be arrays or
  *structures.
 */
-   const glsl_type *check_type = var-type-is_array()
-  ? var-type-fields.array : var-type;
+const glsl_type *check_type = var-type;
+while (check_type-is_array())
+   check_type = check_type-element_type();
 
switch (check_type-base_type) {
case GLSL_TYPE_FLOAT:
-- 
1.8.3.1

___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/mesa-dev

[Mesa-dev] [PATCH V3 7/8] glsl: remove remaining is_array variables

2014-01-22 Thread Timothy Arceri

Previously the reason we needed is_array was because we used array_size == NULL 
to 
represent both non-arrays and unsized arrays.  Now that we use a non-NULL
array_specifier to represent an unsized array, is_array is redundant.

Signed-off-by: Timothy Arceri t_arc...@yahoo.com.au
---
 src/glsl/ast.h  | 32 +++-
 src/glsl/ast_to_hir.cpp | 10 +-
 src/glsl/ast_type.cpp   |  6 ++
 src/glsl/glsl_parser.yy | 30 +++---
 src/glsl/glsl_parser_extras.cpp | 15 ++-
 5 files changed, 35 insertions(+), 58 deletions(-)

diff --git a/src/glsl/ast.h b/src/glsl/ast.h
index c15a119..4660f78 100644
--- a/src/glsl/ast.h
+++ b/src/glsl/ast.h
@@ -371,14 +371,13 @@ public:
 
 class ast_declaration : public ast_node {
 public:
-   ast_declaration(const char *identifier, bool is_array,
+   ast_declaration(const char *identifier,
ast_array_specifier *array_specifier,
ast_expression *initializer);
virtual void print(void) const;
 
const char *identifier;
-   
-   bool is_array;
+
ast_array_specifier *array_specifier;
 
ast_expression *initializer;
@@ -588,10 +587,10 @@ public:
 * Use only if the objects are allocated from the same context and will not
 * be modified. Zeros the inherited ast_node's fields.
 */
-   ast_type_specifier(const ast_type_specifier *that, bool is_array,
+   ast_type_specifier(const ast_type_specifier *that,
   ast_array_specifier *array_specifier)
   : ast_node(), type_name(that-type_name), structure(that-structure),
-is_array(is_array), array_specifier(array_specifier),
+array_specifier(array_specifier),
 default_precision(that-default_precision)
{
   /* empty */
@@ -599,8 +598,7 @@ public:
 
/** Construct a type specifier from a type name */
ast_type_specifier(const char *name) 
-  : type_name(name), structure(NULL),
-   is_array(false), array_specifier(NULL),
+  : type_name(name), structure(NULL), array_specifier(NULL),
default_precision(ast_precision_none)
{
   /* empty */
@@ -608,8 +606,7 @@ public:
 
/** Construct a type specifier from a structure definition */
ast_type_specifier(ast_struct_specifier *s)
-  : type_name(s-name), structure(s),
-   is_array(false), array_specifier(NULL),
+  : type_name(s-name), structure(s), array_specifier(NULL),
default_precision(ast_precision_none)
{
   /* empty */
@@ -626,7 +623,6 @@ public:
const char *type_name;
ast_struct_specifier *structure;
 
-   bool is_array;
ast_array_specifier *array_specifier;
 
/** For precision statements, this is the given precision; otherwise none. 
*/
@@ -680,7 +676,6 @@ public:
ast_parameter_declarator() :
   type(NULL),
   identifier(NULL),
-  is_array(false),
   array_specifier(NULL),
   formal_parameter(false),
   is_void(false)
@@ -695,7 +690,6 @@ public:
 
ast_fully_specified_type *type;
const char *identifier;
-   bool is_array;
ast_array_specifier *array_specifier;
 
static void parameters_to_hir(exec_list *ast_parameters,
@@ -943,13 +937,10 @@ class ast_interface_block : public ast_node {
 public:
ast_interface_block(ast_type_qualifier layout,
const char *instance_name,
-   bool is_array,
ast_array_specifier *array_specifier)
: layout(layout), block_name(NULL), instance_name(instance_name),
- is_array(is_array), array_specifier(array_specifier)
+ array_specifier(array_specifier)
{
-  if (!is_array)
- assert(array_specifier == NULL);
}
 
virtual ir_rvalue *hir(exec_list *instructions,
@@ -970,15 +961,6 @@ public:
exec_list declarations;
 
/**
-* True if the block is declared as an array
-*
-* \note
-* A block can only be an array if it also has an instance name.  If this
-* field is true, ::instance_name must also not be \c NULL.
-*/
-   bool is_array;
-
-   /**
 * Declared array size of the block instance
 *
 * If the block is not declared as an array or if the block instance array
diff --git a/src/glsl/ast_to_hir.cpp b/src/glsl/ast_to_hir.cpp
index f13ac51..c9f3b92 100644
--- a/src/glsl/ast_to_hir.cpp
+++ b/src/glsl/ast_to_hir.cpp
@@ -4507,7 +4507,7 @@ ast_type_specifier::hir(exec_list *instructions,
  return NULL;
   }
 
-  if (this-is_array) {
+  if (this-array_specifier != NULL) {
  _mesa_glsl_error(loc, state,
   default precision statements do not apply to 
   arrays);
@@ -4924,7 +4924,7 @@ ast_interface_block::hir(exec_list *instructions,
  _mesa_shader_stage_to_string(state-stage));
  }
  if (this-instance_name == NULL ||
- strcmp(this-instance_name, gl_in) != 0 ||

[Mesa-dev] [PATCH V3 6/8] glsl: create type name for arrays of arrays

2014-01-22 Thread Timothy Arceri

We need to insert outermost dimensions in the correct spot otherwise
 the dimension order will be backwards

Signed-off-by: Timothy Arceri t_arc...@yahoo.com.au
Reviewed-by: Paul Berry stereotype441 at gmail.com
---
 src/glsl/glsl_types.cpp | 16 ++--
 1 file changed, 14 insertions(+), 2 deletions(-)

diff --git a/src/glsl/glsl_types.cpp b/src/glsl/glsl_types.cpp
index 12d4ac0..62a409a 100644
--- a/src/glsl/glsl_types.cpp
+++ b/src/glsl/glsl_types.cpp
@@ -300,8 +300,20 @@ glsl_type::glsl_type(const glsl_type *array, unsigned 
length) :
 
if (length == 0)
   snprintf(n, name_length, %s[], array-name);
-   else
-  snprintf(n, name_length, %s[%u], array-name, length);
+   else {
+  /* insert outermost dimensions in the correct spot
+   * otherwise the dimension order will be backwards
+   */
+  const char *pos = strchr(array-name, '[');
+  if (pos) {
+ int idx = pos - array-name;
+ snprintf(n, idx+1, %s, array-name);
+ snprintf(n + idx, name_length - idx, [%u]%s,
+  length, array-name + idx);
+  } else {
+ snprintf(n, name_length, %s[%u], array-name, length);
+  }
+   }
 
this-name = n;
 }
-- 
1.8.3.1

___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/mesa-dev

[Mesa-dev] [PATCH V3 3/8] glsl: Add array specifier to ast code

2014-01-22 Thread Timothy Arceri

Signed-off-by: Timothy Arceri t_arc...@yahoo.com.au
---
 src/glsl/ast.h  |  66 +++
 src/glsl/ast_array_index.cpp|  13 +++
 src/glsl/ast_to_hir.cpp | 172 
 src/glsl/ast_type.cpp   |   8 +-
 src/glsl/glsl_parser_extras.cpp |  19 ++---
 src/glsl/glsl_parser_extras.h   |   2 +
 6 files changed, 179 insertions(+), 101 deletions(-)

diff --git a/src/glsl/ast.h b/src/glsl/ast.h
index b24052b..c15a119 100644
--- a/src/glsl/ast.h
+++ b/src/glsl/ast.h
@@ -276,6 +276,43 @@ private:
bool cons;
 };
 
+class ast_array_specifier : public ast_node {
+public:
+   /** Unsized array specifier ([]) */
+   explicit ast_array_specifier(const struct YYLTYPE locp)
+ : dimension_count(1), is_unsized_array(true)
+   {
+  set_location(locp);
+   }
+
+   /** Sized array specifier ([dim]) */
+   ast_array_specifier(const struct YYLTYPE locp, ast_expression *dim)
+ : dimension_count(1), is_unsized_array(false)
+   {
+  set_location(locp);
+  array_dimensions.push_tail(dim-link);
+   }
+
+   void add_dimension(ast_expression *dim)
+   {
+  array_dimensions.push_tail(dim-link);
+  dimension_count++;
+   }
+
+   virtual void print(void) const;
+
+   /* Count including sized and unsized dimensions */
+   unsigned dimension_count;
+
+   /* If true, this means that the array has an unsized outermost dimension. */
+   bool is_unsized_array;
+
+   /* This list contains objects of type ast_node containing the
+* sized dimensions only, in outermost-to-innermost order.
+*/
+   exec_list array_dimensions;
+};
+
 /**
  * C-style aggregate initialization class
  *
@@ -334,14 +371,15 @@ public:
 
 class ast_declaration : public ast_node {
 public:
-   ast_declaration(const char *identifier, bool is_array, ast_expression 
*array_size,
-  ast_expression *initializer);
+   ast_declaration(const char *identifier, bool is_array,
+   ast_array_specifier *array_specifier,
+   ast_expression *initializer);
virtual void print(void) const;
 
const char *identifier;

bool is_array;
-   ast_expression *array_size;
+   ast_array_specifier *array_specifier;
 
ast_expression *initializer;
 };
@@ -551,9 +589,9 @@ public:
 * be modified. Zeros the inherited ast_node's fields.
 */
ast_type_specifier(const ast_type_specifier *that, bool is_array,
-  ast_expression *array_size)
+  ast_array_specifier *array_specifier)
   : ast_node(), type_name(that-type_name), structure(that-structure),
-is_array(is_array), array_size(array_size),
+is_array(is_array), array_specifier(array_specifier),
 default_precision(that-default_precision)
{
   /* empty */
@@ -562,7 +600,7 @@ public:
/** Construct a type specifier from a type name */
ast_type_specifier(const char *name) 
   : type_name(name), structure(NULL),
-   is_array(false), array_size(NULL),
+   is_array(false), array_specifier(NULL),
default_precision(ast_precision_none)
{
   /* empty */
@@ -571,7 +609,7 @@ public:
/** Construct a type specifier from a structure definition */
ast_type_specifier(ast_struct_specifier *s)
   : type_name(s-name), structure(s),
-   is_array(false), array_size(NULL),
+   is_array(false), array_specifier(NULL),
default_precision(ast_precision_none)
{
   /* empty */
@@ -589,7 +627,7 @@ public:
ast_struct_specifier *structure;
 
bool is_array;
-   ast_expression *array_size;
+   ast_array_specifier *array_specifier;
 
/** For precision statements, this is the given precision; otherwise none. 
*/
unsigned default_precision:2;
@@ -643,7 +681,7 @@ public:
   type(NULL),
   identifier(NULL),
   is_array(false),
-  array_size(NULL),
+  array_specifier(NULL),
   formal_parameter(false),
   is_void(false)
{
@@ -658,7 +696,7 @@ public:
ast_fully_specified_type *type;
const char *identifier;
bool is_array;
-   ast_expression *array_size;
+   ast_array_specifier *array_specifier;
 
static void parameters_to_hir(exec_list *ast_parameters,
 bool formal, exec_list *ir_parameters,
@@ -906,12 +944,12 @@ public:
ast_interface_block(ast_type_qualifier layout,
const char *instance_name,
bool is_array,
-   ast_expression *array_size)
+   ast_array_specifier *array_specifier)
: layout(layout), block_name(NULL), instance_name(instance_name),
- is_array(is_array), array_size(array_size)
+ is_array(is_array), array_specifier(array_specifier)
{
   if (!is_array)
- assert(array_size == NULL);
+ assert(array_specifier == NULL);
}
 
virtual ir_rvalue *hir(exec_list *instructions,
@@ -946,7 +984,7 @@ public:
 * If the block is not declared as an

Re: [Mesa-dev] [PATCH] glsl: Simplify aggregate type inference to prepare for ARB_arrays_of_arrays.

2014-01-22 Thread Timothy Arceri

On Tue, 2014-01-21 at 16:14 -0800, Paul Berry wrote:
 Timothy: I was inspired to write this patch by the complexities you
 encountered during [PATCH V2 5/8] glsl: Aggregate initializer support
 for arrays of array.  

Glad I could help spark some inspiration.

 Can you try rebasing your series on top of this
 patch to see if it simplifies things?  I believe that with these
 changes, you should be able to drop patch 5/8 entirely.

Yes with this patch I can drop patch 5 and still pass my piglit tests
thanks. And thanks again for the code review.

___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/mesa-dev

Re: [Mesa-dev] [PATCH] gallium/util: util_format_srgb should not return FORMAT_NONE for sRGB formats

2014-01-22 Thread Marek Olšák

There is only one caller, which is fixed by this patch.

Marek

On Wed, Jan 22, 2014 at 12:52 AM, Brian Paul bri...@vmware.com wrote:
 On 01/21/2014 02:21 PM, Marek Olšák wrote:

 From: Marek Olšák marek.ol...@amd.com

 This fixes a serious regression introduced
 in 4e549ddb500cf677b6fa16d9ebdfa67cc23da097.

 Cc: 9.2 10.0 mesa-sta...@lists.freedesktop.org
 ---
   src/gallium/auxiliary/util/u_format.h | 3 +++
   1 file changed, 3 insertions(+)

 diff --git a/src/gallium/auxiliary/util/u_format.h
 b/src/gallium/auxiliary/util/u_format.h
 index 0fbaf4c..5f86e2d 100644
 --- a/src/gallium/auxiliary/util/u_format.h
 +++ b/src/gallium/auxiliary/util/u_format.h
 @@ -872,6 +872,9 @@ util_format_get_component_bits(enum pipe_format
 format,
   static INLINE enum pipe_format
   util_format_srgb(enum pipe_format format)
   {
 +   if (util_format_is_srgb(format))
 +  return format;
 +
  switch (format) {
  case PIPE_FORMAT_L8_UNORM:
 return PIPE_FORMAT_L8_SRGB;


 Reviewed-by: Brian Paul bri...@vmware.com

 This looks fine for fixing things but longer-term, maybe we should change
 util_format_srgb() to return the format unchanged in the default case, as we
 do for util_format_linear().

 I don't know if any callers depend on the default-PIPE_FORMAT_NONE behavior
 now.

 -Brian

 ___
 mesa-dev mailing list
 mesa-dev@lists.freedesktop.org
 http://lists.freedesktop.org/mailman/listinfo/mesa-dev
___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/mesa-dev

Re: [Mesa-dev] [PATCH v3 1/1] clover: Don't crash on NULL global buffer objects.

2014-01-22 Thread Francisco Jerez

Jan Vesely jan.ves...@rutgers.edu writes:

 On Sat, 2014-01-18 at 13:34 +0100, Francisco Jerez wrote:
 Jan Vesely jan.ves...@rutgers.edu writes:
 
  Specs say If the argument is a buffer object, the arg_value
  pointer can be NULL or point to a NULL value in which case a NULL
  value will be used as the value for the argument declared as a
  pointer to __global or __constant memory in the kernel.
 
  So don't crash when somebody does that.
 
  v2: Insert NULL into input buffer instead of buffer handle pair
  Fix constant_argument too
  Drop r600 driver changes
 
  v3: Fix inserting NULL pointer
 
  Signed-off-by: Jan Vesely jan.ves...@rutgers.edu
 
 Looks OK to me,
 
 Reviewed-by: Francisco Jerez curroje...@riseup.net

 Thank you, can you push it as well? I don't have commit access.

Done.  Thank you.


 
 Thanks.
 
  ---
   src/gallium/state_trackers/clover/core/kernel.cpp | 34 
  +++
   1 file changed, 23 insertions(+), 11 deletions(-)
 
  diff --git a/src/gallium/state_trackers/clover/core/kernel.cpp 
  b/src/gallium/state_trackers/clover/core/kernel.cpp
  index 58780d6..fb826c1 100644
  --- a/src/gallium/state_trackers/clover/core/kernel.cpp
  +++ b/src/gallium/state_trackers/clover/core/kernel.cpp
  @@ -327,7 +327,7 @@ kernel::global_argument::set(size_t size, const void 
  *value) {
  if (size != sizeof(cl_mem))
 throw error(CL_INVALID_ARG_SIZE);
   
  -   buf = objbuffer(*(cl_mem *)value);
  +   buf = pobjbuffer(value ? *(cl_mem *)value : NULL);
  _set = true;
   }
   
  @@ -335,8 +335,14 @@ void
   kernel::global_argument::bind(exec_context ctx,
 const module::argument marg) {
  align(ctx.input, marg.target_align);
  -   ctx.g_handles.push_back(allocate(ctx.input, marg.target_size));
  -   ctx.g_buffers.push_back(buf-resource(*ctx.q).pipe);
  +
  +   if (buf) {
  +  ctx.g_handles.push_back(allocate(ctx.input, marg.target_size));
  +  ctx.g_buffers.push_back(buf-resource(*ctx.q).pipe);
  +   } else {
  +  //NULL pointer
  +  allocate(ctx.input, marg.target_size);
  +   }
   }
   
   void
  @@ -379,22 +385,28 @@ kernel::constant_argument::set(size_t size, const 
  void *value) {
  if (size != sizeof(cl_mem))
 throw error(CL_INVALID_ARG_SIZE);
   
  -   buf = objbuffer(*(cl_mem *)value);
  +   buf = pobjbuffer(value ? *(cl_mem *)value : NULL);
  _set = true;
   }
   
   void
   kernel::constant_argument::bind(exec_context ctx,
   const module::argument marg) {
  -   auto v = bytes(ctx.resources.size()  24);
  -
  -   extend(v, module::argument::zero_ext, marg.target_size);
  -   byteswap(v, ctx.q-dev.endianness());
  align(ctx.input, marg.target_align);
  -   insert(ctx.input, v);
   
  -   st = buf-resource(*ctx.q).bind_surface(*ctx.q, false);
  -   ctx.resources.push_back(st);
  +   if (buf) {
  +  auto v = bytes(ctx.resources.size()  24);
  +
  +  extend(v, module::argument::zero_ext, marg.target_size);
  +  byteswap(v, ctx.q-dev.endianness());
  +  insert(ctx.input, v);
  +
  +  st = buf-resource(*ctx.q).bind_surface(*ctx.q, false);
  +  ctx.resources.push_back(st);
  +   } else {
  +  //NULL pointer
  +  allocate(ctx.input, marg.target_size);
  +   }
   }
   
   void
  -- 
  1.8.4.2

 -- 
 Jan Vesely jan.ves...@rutgers.edu

 -- 
 Jan Vesely jan.ves...@rutgers.edu


pgp8TA8dLconv.pgp
Description: PGP signature
___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/mesa-dev

Re: [Mesa-dev] [PATCH] glsl: Add image type to the GLSL IR.

2014-01-22 Thread Francisco Jerez

Paul Berry stereotype...@gmail.com writes:

 On 15 January 2014 11:42, Francisco Jerez curroje...@riseup.net wrote:

 v2: Reuse the glsl_sampler_dim enum for images.  Reuse the
 glsl_type::sampler_* fields instead of creating new ones specific
 to image types.  Reuse the same constructor as for samplers adding
 a new 'base_type' argument.
 ---
 Is this what you had in mind Paul?


 Yes, that seems reasonable.  Thanks.

Does your previous reviewed-by still apply for v2 of this patch?

Thanks.


pgpZ0ZjwnfuXL.pgp
Description: PGP signature
___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/mesa-dev

[Mesa-dev] [Bug 73672] Half Life 2 in Wine is broken since 4e549ddb

2014-01-22 Thread bugzilla-daemon

https://bugs.freedesktop.org/show_bug.cgi?id=73672

--- Comment #21 from Benjamin Bellec b.bel...@gmail.com ---
I confirm that the patch works.

-- 
You are receiving this mail because:
You are the assignee for the bug.
___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/mesa-dev

[Mesa-dev] [Bug 73854] GoldSrc and Source Games Segfault on Launch

2014-01-22 Thread bugzilla-daemon

https://bugs.freedesktop.org/show_bug.cgi?id=73854

--- Comment #20 from Benjamin Bellec b.bel...@gmail.com ---
(In reply to comment #18)
 Created attachment 92554 [details] [review]
 glx: link loader when building with dri3
 
 While Keith's patch does work on the overall issue with libudev, we should
 not link the loader util for non dri3 builds.
 
 Here is a trivial fix that will resolve the problem. Thanks to Benjamin
 Bellec for the bisection.

I confirm that the patch works.

-- 
You are receiving this mail because:
You are the assignee for the bug.
___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/mesa-dev

Re: [Mesa-dev] [PATCH] glsl: Add image type to the GLSL IR.

2014-01-22 Thread Paul Berry

On 22 January 2014 04:50, Francisco Jerez curroje...@riseup.net wrote:

 Paul Berry stereotype...@gmail.com writes:

  On 15 January 2014 11:42, Francisco Jerez curroje...@riseup.net wrote:
 
  v2: Reuse the glsl_sampler_dim enum for images.  Reuse the
  glsl_type::sampler_* fields instead of creating new ones specific
  to image types.  Reuse the same constructor as for samplers adding
  a new 'base_type' argument.
  ---
  Is this what you had in mind Paul?
 
 
  Yes, that seems reasonable.  Thanks.

 Does your previous reviewed-by still apply for v2 of this patch?

 Thanks.


Yes.
___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/mesa-dev

[Mesa-dev] Atomics not really atomic?

2014-01-22 Thread Lauri Kasanen

Hi,

The two atomic helpers in u_atomic.h, p_atomic_set and p_atomic_read,
are just passthrough macros, without the atomic guarantees. Why?

I believe I saw some corruption because of this, where a 64-bit 
write on a 32-bit platform may not be naturally atomic.

- Lauri
___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/mesa-dev

[Mesa-dev] [PATCH 1/2] mesa: Ensure that transform feedback refers to the correct program.

2014-01-22 Thread Paul Berry

Previous to this patch, the _mesa_{Begin,Resume}TransformFeedback
functions were using ctx-Shader.CurrentProgram[MESA_SHADER_VERTEX] to
find the program that would be the source of transform feedback data.
This isn't correct--if there's a geometry shader present it should be
ctx-Shader.CurrentProgram[MESA_SHADER_GEOMETRY].  (These might be
different if separate shader objects are in use).

This patch creates a function get_xfb_source(), which figures out the
correct program to use based on GL state, and updates
_mesa_{Begin,Resume}TransformFeedback to call it.  get_xfb_source() is
written in terms of the gl_shader_stage enum, so it should not need
modification when we add tessellation shaders in the future.  It also
creates a new driver flag, NewTransformFeedbackProg, which is flagged
whenever this program changes.

To reduce future confusion, this patch also rewords some comments and
error message text to avoid referring to vertex shaders.
---
 src/mesa/main/mtypes.h|  8 --
 src/mesa/main/transformfeedback.c | 52 +--
 2 files changed, 45 insertions(+), 15 deletions(-)

diff --git a/src/mesa/main/mtypes.h b/src/mesa/main/mtypes.h
index 3dd9678..7fd3298 100644
--- a/src/mesa/main/mtypes.h
+++ b/src/mesa/main/mtypes.h
@@ -1815,8 +1815,9 @@ struct gl_transform_feedback_object
 
/**
 * The shader program active when BeginTransformFeedback() was called.
-* When active and unpaused, this equals
-* ctx-Shader.CurrentProgram[MESA_SHADER_VERTEX].
+* When active and unpaused, this equals ctx-Shader.CurrentProgram[stage],
+* where stage is the pipeline stage that is the source of data for
+* transform feedback.
 */
struct gl_shader_program *shader_program;
 
@@ -3779,6 +3780,9 @@ struct gl_driver_flags
/** gl_context::TransformFeedback::CurrentObject */
GLbitfield NewTransformFeedback;
 
+   /** gl_context::TransformFeedback::CurrentObject::shader_program */
+   GLbitfield NewTransformFeedbackProg;
+
/** gl_context::RasterDiscard */
GLbitfield NewRasterizerDiscard;
 
diff --git a/src/mesa/main/transformfeedback.c 
b/src/mesa/main/transformfeedback.c
index 74897ba..9376a9e 100644
--- a/src/mesa/main/transformfeedback.c
+++ b/src/mesa/main/transformfeedback.c
@@ -24,7 +24,7 @@
 
 
 /*
- * Vertex transform feedback support.
+ * Transform feedback support.
  *
  * Authors:
  *   Brian Paul
@@ -376,25 +376,48 @@ _mesa_compute_max_transform_feedback_vertices(
  **/
 
 
+/**
+ * Figure out which stage of the pipeline is the source of transform feedback
+ * data given the current context state, and return its gl_shader_program.
+ *
+ * If no active program can generate transform feedback data (i.e. no vertex
+ * shader is active), returns NULL.
+ */
+static struct gl_shader_program *
+get_xfb_source(struct gl_context *ctx)
+{
+   int i;
+   for (i = MESA_SHADER_FRAGMENT - 1; i = MESA_SHADER_VERTEX; i--) {
+  if (ctx-Shader.CurrentProgram[i] != NULL)
+ return ctx-Shader.CurrentProgram[i];
+   }
+   return NULL;
+}
+
+
 void GLAPIENTRY
 _mesa_BeginTransformFeedback(GLenum mode)
 {
struct gl_transform_feedback_object *obj;
-   struct gl_transform_feedback_info *info;
+   struct gl_transform_feedback_info *info = NULL;
+   struct gl_shader_program *source;
GLuint i;
unsigned vertices_per_prim;
GET_CURRENT_CONTEXT(ctx);
 
obj = ctx-TransformFeedback.CurrentObject;
 
-   if (ctx-Shader.CurrentProgram[MESA_SHADER_VERTEX] == NULL) {
+   /* Figure out what pipeline stage is the source of data for transform
+* feedback.
+*/
+   source = get_xfb_source(ctx);
+   if (source == NULL) {
   _mesa_error(ctx, GL_INVALID_OPERATION,
   glBeginTransformFeedback(no program active));
   return;
}
 
-   info =
-  ctx-Shader.CurrentProgram[MESA_SHADER_VERTEX]-LinkedTransformFeedback;
+   info = source-LinkedTransformFeedback;
 
if (info-NumOutputs == 0) {
   _mesa_error(ctx, GL_INVALID_OPERATION,
@@ -452,7 +475,10 @@ _mesa_BeginTransformFeedback(GLenum mode)
   obj-GlesRemainingPrims = max_vertices / vertices_per_prim;
}
 
-   obj-shader_program = ctx-Shader.CurrentProgram[MESA_SHADER_VERTEX];
+   if (obj-shader_program != source) {
+  ctx-NewDriverState |= ctx-DriverFlags.NewTransformFeedbackProg;
+  obj-shader_program = source;
+   }
 
assert(ctx-Driver.BeginTransformFeedback);
ctx-Driver.BeginTransformFeedback(ctx, mode, obj);
@@ -519,7 +545,7 @@ bind_buffer_range(struct gl_context *ctx, GLuint index,
 
 
 /**
- * Specify a buffer object to receive vertex shader results.  Plus,
+ * Specify a buffer object to receive transform feedback results.  Plus,
  * specify the starting offset to place the results, and max size.
  * Called from the glBindBufferRange() function.
  */
@@ -563,7 +589,7 @@ _mesa_bind_buffer_range_transform_feedback(struct 
gl_context *ctx,
 
 
 /**
- * Specify a buffer object to receive vertex shader results.
+ * Specify a

[Mesa-dev] [PATCH 2/2] i965/gen7: Use to the correct program when uploading transform feedback state.

2014-01-22 Thread Paul Berry

Transform feedback may come from either the geometry shader or the
vertex shader, so we can't use
ctx-Shader.CurrentProgram[MESA_SHADER_VERTEX] to find the current
post-link transform feedback information.  Fortunately we can use
ctx-TransformFeedback.CurrentObject-shader_program.
---
 src/mesa/drivers/dri/i965/brw_state_upload.c |  1 +
 src/mesa/drivers/dri/i965/gen7_sol_state.c   | 15 +--
 2 files changed, 6 insertions(+), 10 deletions(-)

diff --git a/src/mesa/drivers/dri/i965/brw_state_upload.c 
b/src/mesa/drivers/dri/i965/brw_state_upload.c
index af554eb..ca256c2 100644
--- a/src/mesa/drivers/dri/i965/brw_state_upload.c
+++ b/src/mesa/drivers/dri/i965/brw_state_upload.c
@@ -305,6 +305,7 @@ void brw_init_state( struct brw_context *brw )
STATIC_ASSERT(BRW_NUM_STATE_BITS = 8 * sizeof(brw-state.dirty.brw));
 
ctx-DriverFlags.NewTransformFeedback = BRW_NEW_TRANSFORM_FEEDBACK;
+   ctx-DriverFlags.NewTransformFeedbackProg = BRW_NEW_TRANSFORM_FEEDBACK;
ctx-DriverFlags.NewRasterizerDiscard = BRW_NEW_RASTERIZER_DISCARD;
ctx-DriverFlags.NewUniformBuffer = BRW_NEW_UNIFORM_BUFFER;
ctx-DriverFlags.NewAtomicBuffer = BRW_NEW_ATOMIC_BUFFER;
diff --git a/src/mesa/drivers/dri/i965/gen7_sol_state.c 
b/src/mesa/drivers/dri/i965/gen7_sol_state.c
index 1ca2b74..5d2f243 100644
--- a/src/mesa/drivers/dri/i965/gen7_sol_state.c
+++ b/src/mesa/drivers/dri/i965/gen7_sol_state.c
@@ -39,14 +39,11 @@ static void
 upload_3dstate_so_buffers(struct brw_context *brw)
 {
struct gl_context *ctx = brw-ctx;
-   /* BRW_NEW_VERTEX_PROGRAM */
-   const struct gl_shader_program *vs_prog =
-  ctx-Shader.CurrentProgram[MESA_SHADER_VERTEX];
-   const struct gl_transform_feedback_info *linked_xfb_info =
-  vs_prog-LinkedTransformFeedback;
/* BRW_NEW_TRANSFORM_FEEDBACK */
struct gl_transform_feedback_object *xfb_obj =
   ctx-TransformFeedback.CurrentObject;
+   const struct gl_transform_feedback_info *linked_xfb_info =
+  xfb_obj-shader_program-LinkedTransformFeedback;
int i;
 
/* Set up the up to 4 output buffers.  These are the ranges defined in the
@@ -102,12 +99,11 @@ gen7_upload_3dstate_so_decl_list(struct brw_context *brw,
  const struct brw_vue_map *vue_map)
 {
struct gl_context *ctx = brw-ctx;
-   /* BRW_NEW_VERTEX_PROGRAM */
-   const struct gl_shader_program *vs_prog =
-  ctx-Shader.CurrentProgram[MESA_SHADER_VERTEX];
/* BRW_NEW_TRANSFORM_FEEDBACK */
+   struct gl_transform_feedback_object *xfb_obj =
+  ctx-TransformFeedback.CurrentObject;
const struct gl_transform_feedback_info *linked_xfb_info =
-  vs_prog-LinkedTransformFeedback;
+  xfb_obj-shader_program-LinkedTransformFeedback;
uint16_t so_decl[128];
int buffer_mask = 0;
int next_offset[4] = {0, 0, 0, 0};
@@ -260,7 +256,6 @@ const struct brw_tracked_state gen7_sol_state = {
.dirty = {
   .mesa  = (_NEW_LIGHT),
   .brw   = (BRW_NEW_BATCH |
-   BRW_NEW_VERTEX_PROGRAM |
 BRW_NEW_VUE_MAP_GEOM_OUT |
 BRW_NEW_TRANSFORM_FEEDBACK)
},
-- 
1.8.5.3

___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/mesa-dev

Re: [Mesa-dev] Atomics not really atomic?

2014-01-22 Thread Maarten Lankhorst


op 22-01-14 14:44, Lauri Kasanen schreef:

Hi,

The two atomic helpers in u_atomic.h, p_atomic_set and p_atomic_read,
are just passthrough macros, without the atomic guarantees. Why?

I believe I saw some corruption because of this, where a 64-bit
write on a 32-bit platform may not be naturally atomic.

From what I can tell p_atomic only supports 32-bits atomics. atomics don't 
imply barriers, so
the code is probably correct for 32-bits. There is no 64-bits support in that 
code,
so I don't see how that would have protected a 64-bits write..

~Maarten
___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/mesa-dev

[Mesa-dev] [Bug 73934] New: Function roundf undeclared in textparam.c when building with MSVC11

2014-01-22 Thread bugzilla-daemon

https://bugs.freedesktop.org/show_bug.cgi?id=73934

  Priority: medium
Bug ID: 73934
  Assignee: mesa-dev@lists.freedesktop.org
   Summary: Function roundf undeclared in textparam.c when
building with MSVC11
  Severity: normal
Classification: Unclassified
OS: Windows (All)
  Reporter: ztgr...@live.com
  Hardware: x86 (IA32)
Status: NEW
   Version: 10.0
 Component: Mesa core
   Product: Mesa

I am trying to build Mesa for x86 using Scons on Windows 8.1 x64 with MSVC11,
and I am getting a message stating that 'roundf' is undeclared in the file
textparam.c (in the function _mesa_GetTexParameteriv). I tried adding math.h to
the included headers for that file, but that didn't work.

I don't do much native development (I am building Mesa for use in a Game
Development Studio I am working on using SharpGL) so I have no other idea on
what could be causing this error.

-- 
You are receiving this mail because:
You are the assignee for the bug.
___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/mesa-dev

[Mesa-dev] [Bug 73934] Function roundf undeclared in textparam.c when building with MSVC11

2014-01-22 Thread bugzilla-daemon

https://bugs.freedesktop.org/show_bug.cgi?id=73934

--- Comment #1 from Grigori Goronzy g...@chown.ath.cx ---
MSVC has bad C99 support. roundf was introduced by C99. You can easily
implement roundf yourself, though. Calling floorf or ceilf according to sign of
the number should do it.

-- 
You are receiving this mail because:
You are the assignee for the bug.
___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/mesa-dev

Re: [Mesa-dev] [PATCH 05/18] glapi: Add infrastructure for ARB_multi_bind

2014-01-22 Thread Brian Paul


On 01/21/2014 03:35 PM, Fredrik Höglund wrote:

---
  src/mapi/glapi/gen/ARB_multi_bind.xml   |   53 +++
  src/mapi/glapi/gen/Makefile.am  |1 +
  src/mapi/glapi/gen/gl_API.xml   |4 +++
  src/mesa/main/bufferobj.c   |   13 
  src/mesa/main/bufferobj.h   |7 
  src/mesa/main/samplerobj.c  |6 
  src/mesa/main/samplerobj.h  |2 ++
  src/mesa/main/shaderimage.c |5 +++
  src/mesa/main/shaderimage.h |3 ++
  src/mesa/main/tests/dispatch_sanity.cpp |8 +
  src/mesa/main/texobj.c  |6 
  src/mesa/main/texobj.h  |4 +++
  src/mesa/main/varray.c  |7 
  src/mesa/main/varray.h  |4 +++
  14 files changed, 123 insertions(+)
  create mode 100644 src/mapi/glapi/gen/ARB_multi_bind.xml

diff --git a/src/mapi/glapi/gen/ARB_multi_bind.xml 
b/src/mapi/glapi/gen/ARB_multi_bind.xml
new file mode 100644
index 000..4a59d8c
--- /dev/null
+++ b/src/mapi/glapi/gen/ARB_multi_bind.xml
@@ -0,0 +1,53 @@
+?xml version=1.0?
+!DOCTYPE OpenGLAPI SYSTEM gl_API.dtd
+
+!-- Note: no GLX protocol info yet. --
+
+OpenGLAPI
+
+category name=GL_ARB_multi_bind number=147
+
+function name=BindBuffersBase offset=assign
+param name=target type=GLenum/
+param name=first type=GLuint/
+param name=count type=GLsizei/
+param name=buffers type=const GLuint*/


I don't recall if the glapi python code parses these types, but it might 
be safer to put add space before '*' as we do elsewhere.  So:


param name=buffers type=const GLuint */

Same thing below.



+/function
+
+function name=BindBuffersRange offset=assign
+param name=target type=GLenum/
+param name=first type=GLuint/
+param name=count type=GLsizei/
+param name=buffers type=const GLuint */
+param name=offsets type=const GLintptr */
+param name=sizes type=const GLsizeiptr */
+/function
+
+function name=BindTextures offset=assign
+param name=first type=GLuint/
+param name=count type=GLsizei/
+param name=textures type=const GLuint*/
+/function
+
+function name=BindSamplers offset=assign
+param name=first type=GLuint/
+param name=count type=GLsizei/
+param name=samplers type=const GLuint*/
+/function
+
+function name=BindImageTextures offset=assign
+param name=first type=GLuint/
+param name=count type=GLsizei/
+param name=textures type=const GLuint*/
+/function
+
+function name=BindVertexBuffers offset=assign
+param name=first type=GLuint/
+param name=count type=GLsizei/
+param name=buffers type=const GLuint*/
+param name=offsets type=const GLintptr*/
+param name=strides type=const GLsizei*/
+/function
+

[...]


___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/mesa-dev

Re: [Mesa-dev] [PATCH 04/18] mesa: Add functions for doing unlocked hash table lookups

2014-01-22 Thread Brian Paul


On 01/21/2014 03:35 PM, Fredrik Höglund wrote:

This patch adds functions for locking/unlocking the mutex, along with
_mesa_HashLookupWithoutLocking() and _mesa_HashInsertWithoutLocking()
that do lookups and insertions without locking the mutex.

These functions will be used by the ARB_multi_bind entry points to
avoid locking/unlocking the mutex for each binding point.
---
  src/mesa/main/hash.c |   90 --
  src/mesa/main/hash.h |9 +
  2 files changed, 89 insertions(+), 10 deletions(-)

diff --git a/src/mesa/main/hash.c b/src/mesa/main/hash.c
index b31fd48..263397e 100644
--- a/src/mesa/main/hash.c
+++ b/src/mesa/main/hash.c
@@ -150,7 +150,6 @@ _mesa_DeleteHashTable(struct _mesa_HashTable *table)
  }


-
  /**
   * Lookup an entry in the hash table, without locking.
   * \sa _mesa_HashLookup
@@ -195,15 +194,55 @@ _mesa_HashLookup(struct _mesa_HashTable *table, GLuint 
key)


  /**
- * Insert a key/pointer pair into the hash table.
- * If an entry with this key already exists we'll replace the existing entry.
- *
+ * Lookup an entry in the hash table without locking the mutex.
+ *
+ * The hash table mutex must be locked manually by calling
+ * _mesa_HashLockMutex() before calling this function.
+ *
+ * \param table the hash table.
+ * \param key the key.
+ *
+ * \return pointer to user's data or NULL if key not in table
+ */
+void *
+_mesa_HashLookupWithoutLocking(struct _mesa_HashTable *table, GLuint key)
+{
+   return _mesa_HashLookup_unlocked(table, key);
+}
+
+
+/**
+ * Lock the hash table mutex.
+ *
+ * This function should be used when multiple objects need
+ * to be looked up in the hash table, to avoid having to lock
+ * and unlock the mutex each time.
+ *
   * \param table the hash table.
- * \param key the key (not zero).
- * \param data pointer to user data.
   */
  void
-_mesa_HashInsert(struct _mesa_HashTable *table, GLuint key, void *data)
+_mesa_HashLockMutex(struct _mesa_HashTable *table)
+{
+   assert(table);
+   _glthread_LOCK_MUTEX(table-Mutex);
+}
+
+
+/**
+ * Unlock the hash table mutex.
+ *
+ * \param table the hash table.
+ */
+void
+_mesa_HashUnlockMutex(struct _mesa_HashTable *table)
+{
+   assert(table);
+   _glthread_UNLOCK_MUTEX(table-Mutex);
+}
+
+
+static inline void
+_mesa_HashInsert_unlocked(struct _mesa_HashTable *table, GLuint key, void 
*data)
  {
 uint32_t hash = uint_hash(key);
 struct hash_entry *entry;
@@ -211,8 +250,6 @@ _mesa_HashInsert(struct _mesa_HashTable *table, GLuint key, 
void *data)
 assert(table);
 assert(key);

-   _glthread_LOCK_MUTEX(table-Mutex);
-
 if (key  table-MaxKey)
table-MaxKey = key;

@@ -226,11 +263,44 @@ _mesa_HashInsert(struct _mesa_HashTable *table, GLuint 
key, void *data)
   _mesa_hash_table_insert(table-ht, hash, uint_key(key), data);
}
 }
+}

-   _glthread_UNLOCK_MUTEX(table-Mutex);
+
+/**
+ * Insert a key/pointer pair into the hash table without locking the mutex.
+ * If an entry with this key already exists we'll replace the existing entry.
+ *
+ * The hash table mutex must be locked manually by calling
+ * _mesa_HashLockMutex() before calling this function.
+ *
+ * \param table the hash table.
+ * \param key the key (not zero).
+ * \param data pointer to user data.
+ */
+void
+_mesa_HashInsertWithoutLocking(struct _mesa_HashTable *table, GLuint key, void 
*data)
+{
+   _mesa_HashInsert_unlocked(table, key, data);
  }


+/**
+ * Insert a key/pointer pair into the hash table.
+ * If an entry with this key already exists we'll replace the existing entry.
+ *
+ * \param table the hash table.
+ * \param key the key (not zero).
+ * \param data pointer to user data.
+ */
+void
+_mesa_HashInsert(struct _mesa_HashTable *table, GLuint key, void *data)
+{
+   assert(table);
+   _glthread_LOCK_MUTEX(table-Mutex);
+   _mesa_HashInsert_unlocked(table, key, data);
+   _glthread_UNLOCK_MUTEX(table-Mutex);
+}
+

  /**
   * Remove an entry from the hash table.
diff --git a/src/mesa/main/hash.h b/src/mesa/main/hash.h
index b34f328..7ef8186 100644
--- a/src/mesa/main/hash.h
+++ b/src/mesa/main/hash.h
@@ -45,6 +45,15 @@ extern void _mesa_HashInsert(struct _mesa_HashTable *table, 
GLuint key, void *da

  extern void _mesa_HashRemove(struct _mesa_HashTable *table, GLuint key);

+extern void _mesa_HashLockMutex(struct _mesa_HashTable *table);
+
+extern void _mesa_HashUnlockMutex(struct _mesa_HashTable *table);
+
+extern void *_mesa_HashLookupWithoutLocking(struct _mesa_HashTable *table, 
GLuint key);
+
+extern void _mesa_HashInsertWithoutLocking(struct _mesa_HashTable *table,
+   GLuint key, void *data);
+
  extern void
  _mesa_HashDeleteAll(struct _mesa_HashTable *table,
  void (*callback)(GLuint key, void *data, void *userData),



In other parts of Mesa/gallium I believe we use function names such as 
FooLocked() to indicate that Foo should be called when the caller 
already holds the lock.

Re: [Mesa-dev] [PATCH 08/18] mesa: Implement glBindTextures()

2014-01-22 Thread Brian Paul


On 01/21/2014 03:35 PM, Fredrik Höglund wrote:

---
  src/mesa/main/texobj.c |  121 
  1 file changed, 121 insertions(+)

diff --git a/src/mesa/main/texobj.c b/src/mesa/main/texobj.c
index c9fe11d..2ff3f9d 100644
--- a/src/mesa/main/texobj.c
+++ b/src/mesa/main/texobj.c
@@ -1140,6 +1140,28 @@ unbind_texobj_from_imgunits(struct gl_context *ctx,
 }
  }

+/**
+ * Unbinds all textures bound to the given texture image unit.
+ */
+static void
+unbind_textures_from_unit(struct gl_context *ctx, GLuint unit)
+{
+   struct gl_texture_unit *texUnit = ctx-Texture.Unit[unit];
+
+   while (texUnit-_BoundTextures) {
+  const GLuint index = ffs(texUnit-_BoundTextures) - 1;
+  struct gl_texture_object *texObj = ctx-Shared-DefaultTex[index];
+
+  _mesa_reference_texobj(texUnit-CurrentTex[index], texObj);
+
+  /* Pass BindTexture call to device driver */
+  if (ctx-Driver.BindTexture)
+ ctx-Driver.BindTexture(ctx, 0, texObj);
+
+  texUnit-_BoundTextures ^= (1  index);


  texUnit-_BoundTextures = ~(1  index);



+  ctx-NewState |= _NEW_TEXTURE;
+   }
+}

  /**
   * Delete named textures.
@@ -1384,6 +1406,105 @@ _mesa_BindTexture( GLenum target, GLuint texName )
  void GLAPIENTRY
  _mesa_BindTextures(GLuint first, GLsizei count, const GLuint *textures)
  {
+   GET_CURRENT_CONTEXT(ctx);
+   GLuint i;


GLint/GLsizei i;



+
+   /* The ARB_multi_bind spec says:
+*
+* An INVALID_OPERATION error is generated if first + count
+*  is greater than the number of texture image units supported
+*  by the implementation.
+*/
+   if (first + count  ctx-Const.MaxCombinedTextureImageUnits) {
+  _mesa_error(ctx, GL_INVALID_OPERATION,
+  glBindTextures(first=%u + count=%u  the value of 
+  GL_MAX_COMBINED_TEXTURE_IMAGE_UNITS=%u),
+  first, count, ctx-Const.MaxCombinedTextureImageUnits);
+  return;
+   }
+
+   /* Flush before changing bindings */
+   FLUSH_VERTICES(ctx, 0);
+
+   if (textures) {
+  /* Note that the error semantics for multi-bind commands differ from
+   * those of other GL commands.
+   *
+   * The issues section in the ARB_multi_bind spec says:
+   *
+   *(11) Typically, OpenGL specifies that if an error is generated by
+   *  a command, that command has no effect.  This is somewhat
+   *  unfortunate for multi-bind commands, because it would require
+   *  a first pass to scan the entire list of bound objects for
+   *  errors and then a second pass to actually perform the
+   *  bindings.  Should we have different error semantics?
+   *
+   *   RESOLVED:  Yes.  In this specification, when the parameters for
+   *   one of the count binding points are invalid, that binding
+   *   point is not updated and an error will be generated.  However,
+   *   other binding points in the same command will be updated if
+   *   their parameters are valid and no other error occurs.
+   */
+
+  _mesa_begin_texture_lookups(ctx);
+
+  for (i = 0; i  count; i++) {
+ if (textures[i] != 0) {
+struct gl_texture_unit *texUnit = ctx-Texture.Unit[first + i];
+struct gl_texture_object *current = texUnit-_Current;
+struct gl_texture_object *texObj;
+GLenum target;
+GLuint targetIndex;


gl_texture_index targetIndex;



+
+if (current  current-Name == textures[i])
+   texObj = current;
+else
+   texObj = _mesa_lookup_texture_without_locking(ctx, textures[i]);
+
+/* The ARB_multi_bind spec says:
+ *
+ * An INVALID_OPERATION error is generated if any value
+ *  in textures is not zero or the name of an existing
+ *  texture object (per binding).
+ */
+if (!texObj || texObj-Target == 0) {
+   _mesa_error(ctx, GL_INVALID_OPERATION,
+   glBindTextures(textures[%u]=%u is not zero 
+   or the name of an existing texture object),
+   i, textures[i]);
+   continue;
+}
+
+target = texObj-Target;
+targetIndex = texObj-TargetIndex;
+
+if (texUnit-CurrentTex[targetIndex] == texObj)
+   continue;
+
+/* Do the actual binding.  The refcount on the previously bound
+ * texture object will be decremented.  It will be deleted if the
+ * count hits zero.
+ */
+_mesa_reference_texobj(texUnit-CurrentTex[targetIndex], texObj);
+ASSERT(texUnit-CurrentTex[targetIndex]);
+
+texUnit-_BoundTextures |= (1  targetIndex);
+ctx-NewState |= _NEW_TEXTURE;
+
+/* Pass BindTexture call to device

Re: [Mesa-dev] [PATCH] rtasm: deal with size overflows by casting to ptrdiff_t

2014-01-22 Thread Brian Paul


On 01/21/2014 06:37 PM, Ilia Mirkin wrote:

This was discovered as a result of the draw-elements-base-vertex-neg
piglit test, which passes very negative offsets in, followed up by large
indices. The nouveau code correctly adjusts the pointer, but the
transfer code needs to do the proper inverse correction. Similarly fix
up the SSE code to do a 64-bit multiply to compute the proper offset.

Signed-off-by: Ilia Mirkin imir...@alum.mit.edu
---

With this change, nouveau passes for the draw-elements-base-vertex-neg piglit
test with user_varrays, on a 64-bit setup both with and without
GALLIUM_NOSSE=1. I'm pretty sure that the change should be minimal to a
non-x86 setup since the rexw will be a no-op. I guess there will be an extra
register use for the mov, but it shouldn't be too expensive, esp on anything
remotely current.

  src/gallium/auxiliary/translate/translate_generic.c | 2 +-
  src/gallium/auxiliary/translate/translate_sse.c | 8 ++--
  2 files changed, 7 insertions(+), 3 deletions(-)

diff --git a/src/gallium/auxiliary/translate/translate_generic.c 
b/src/gallium/auxiliary/translate/translate_generic.c
index 5bf97db..5ffce32 100644
--- a/src/gallium/auxiliary/translate/translate_generic.c
+++ b/src/gallium/auxiliary/translate/translate_generic.c
@@ -638,7 +638,7 @@ static ALWAYS_INLINE void PIPE_CDECL generic_run_one( 
struct translate_generic *
   }

   src = tg-attrib[attr].input_ptr +
-   tg-attrib[attr].input_stride * index;
+   (ptrdiff_t)tg-attrib[attr].input_stride * index;

   copy_size = tg-attrib[attr].copy_size;
   if(likely(copy_size = 0))
diff --git a/src/gallium/auxiliary/translate/translate_sse.c 
b/src/gallium/auxiliary/translate/translate_sse.c
index a78ea91..a72454a 100644
--- a/src/gallium/auxiliary/translate/translate_sse.c
+++ b/src/gallium/auxiliary/translate/translate_sse.c
@@ -1121,7 +1121,9 @@ static boolean init_inputs( struct translate_sse *p,
  x86_cmovcc(p-func, tmp_EAX, buf_max_index, cc_AE);
   }

- x86_imul(p-func, tmp_EAX, buf_stride);
+ x86_mov(p-func, p-tmp2_EDX, buf_stride);
+ x64_rexw(p-func);
+ x86_imul(p-func, tmp_EAX, p-tmp2_EDX);
   x64_rexw(p-func);
   x86_add(p-func, tmp_EAX, buf_base_ptr);

@@ -1207,7 +1209,9 @@ static struct x86_reg get_buffer_ptr( struct 
translate_sse *p,
x86_cmp(p-func, ptr, buf_max_index);
x86_cmovcc(p-func, ptr, buf_max_index, cc_AE);

-  x86_imul(p-func, ptr, buf_stride);
+  x86_mov(p-func, p-tmp2_EDX, buf_stride);
+  x64_rexw(p-func);
+  x86_imul(p-func, ptr, p-tmp2_EDX);
x64_rexw(p-func);
x86_add(p-func, ptr, buf_base_ptr);
return ptr;



I'm no x86 expert, but this looks OK to me, and if it works for you...

Reviewed-by: Brian Paul bri...@vmware.com

Should this go into the 10.0.x branch too?

___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/mesa-dev

Re: [Mesa-dev] [PATCH 00/18] Implement GL_ARB_multi_bind

2014-01-22 Thread Brian Paul


On 01/21/2014 03:35 PM, Fredrik Höglund wrote:

So here is my take on GL_ARB_multi_bind.

I tried to come up with names for the new hash table functions that
don't suggest that they should be used to do unlocked insertions/lookups.
I'm not entirely happy with the ones I came up with though, so I'm
hoping someone will have better suggestions.

When binding 32 textures glBindTextures() seems to be about three times
faster than calling glActiveTexture() + glBindTexture() in a loop.
When binding 4 textures it's about twice as fast.

I hope to land this series this week if there are no major issues.



Looks good, Fredrik!  I just have a bunch of little nit picks.

I only have time to review 1-11 for today.  Hopefully someone else can 
review the later ones.  Esp. areas like xform-fb, glBindImageTextures, 
UBOs, glBindBuffersRange(), etc.


-Brian


___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/mesa-dev

Re: [Mesa-dev] [PATCH 03/18] mesa: Optimize unbind_texobj_from_texunits()

2014-01-22 Thread Brian Paul


On 01/21/2014 03:35 PM, Fredrik Höglund wrote:

The texture can only be bound to the index that corresponds to its
target, so there is no need to loop over all possible indices
for every unit and checking if the texture is bound to it.
---
  src/mesa/main/texobj.c |   20 ++--
  1 file changed, 10 insertions(+), 10 deletions(-)

diff --git a/src/mesa/main/texobj.c b/src/mesa/main/texobj.c
index fbbc577..5a3cfb2 100644
--- a/src/mesa/main/texobj.c
+++ b/src/mesa/main/texobj.c
@@ -1084,19 +1084,19 @@ static void
  unbind_texobj_from_texunits(struct gl_context *ctx,
  struct gl_texture_object *texObj)
  {
-   GLuint u, tex;
+   const GLuint index = texObj-TargetIndex;
+   GLuint u;

 for (u = 0; u  Elements(ctx-Texture.Unit); u++) {
struct gl_texture_unit *unit = ctx-Texture.Unit[u];
-  for (tex = 0; tex  NUM_TEXTURE_TARGETS; tex++) {
- if (texObj == unit-CurrentTex[tex]) {
-_mesa_reference_texobj(unit-CurrentTex[tex],
-   ctx-Shared-DefaultTex[tex]);
-ASSERT(unit-CurrentTex[tex]);
-unit-_BoundTextures ^= (1  tex);
-break;
- }
-  }
+
+  if (texObj != unit-CurrentTex[index])
+ continue;
+
+  _mesa_reference_texobj(unit-CurrentTex[index],
+ ctx-Shared-DefaultTex[index]);
+  ASSERT(unit-CurrentTex[index]);
+  unit-_BoundTextures ^= (1  index);
 }
  }


I guess I'd like to see the last part written as:

  if (texObj == unit-CurrentTex[index]) {
 /* Bind the default texture for this unit/target */
 _mesa_reference_texobj(unit-CurrentTex[index],
ctx-Shared-DefaultTex[index]);
 unit-_BoundTextures = ~(1  index);
  }

I try to only use continue when really needed.  I don't think the 
assertion is needed since we wouldn't have created the context if we 
failed to create any DefaultTex.


-Brian

___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/mesa-dev

Re: [Mesa-dev] [PATCH 01/18] mesa: Store the target index in gl_texture_object

2014-01-22 Thread Brian Paul


On 01/21/2014 03:35 PM, Fredrik Höglund wrote:

This will be used by glBindTextures() so we don't have to look it up
for each texture.
---
  src/mesa/main/mtypes.h |1 +
  src/mesa/main/texobj.c |2 ++
  2 files changed, 3 insertions(+)

diff --git a/src/mesa/main/mtypes.h b/src/mesa/main/mtypes.h
index c396609..2fe47c4 100644
--- a/src/mesa/main/mtypes.h
+++ b/src/mesa/main/mtypes.h
@@ -1194,6 +1194,7 @@ struct gl_texture_object
 GLuint Name;/** the user-visible texture object ID */
 GLchar *Label;   /** GL_KHR_debug */
 GLenum Target;  /** GL_TEXTURE_1D, GL_TEXTURE_2D, etc. */
+   GLuint TargetIndex; /** The gl_texture_unit::CurrentTex index */


Could this be gl_texture_index TargetIndex;?




 struct gl_sampler_object Sampler;

diff --git a/src/mesa/main/texobj.c b/src/mesa/main/texobj.c
index 3c64c437..4d97275 100644
--- a/src/mesa/main/texobj.c
+++ b/src/mesa/main/texobj.c
@@ -259,6 +259,7 @@ _mesa_copy_texture_object( struct gl_texture_object *dest,
 const struct gl_texture_object *src )
  {
 dest-Target = src-Target;
+   dest-TargetIndex = src-TargetIndex;
 dest-Name = src-Name;
 dest-Priority = src-Priority;
 dest-Sampler.BorderColor.f[0] = src-Sampler.BorderColor.f[0];
@@ -1318,6 +1319,7 @@ _mesa_BindTexture( GLenum target, GLuint texName )
   _glthread_UNLOCK_MUTEX(ctx-Shared-Mutex);
}
newTexObj-Target = target;
+  newTexObj-TargetIndex = targetIndex;
 }

 assert(valid_texture_object(newTexObj));




So the memset() in _mesa_initialize_texture_object() would implicitly 
set TargetIndex = TEXTURE_2D_MULTISAMPLE_INDEX (zero).  Is that going to 
cause any trouble?


-Brian

___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/mesa-dev

Re: [Mesa-dev] [PATCH 02/18] mesa: Add a _BoundTextures field in gl_texture_unit

2014-01-22 Thread Brian Paul


On 01/21/2014 03:35 PM, Fredrik Höglund wrote:

This will be used by glBindTextures() when unbinding textures,
to avoid having to loop over all the targets.
---
  src/mesa/main/mtypes.h   |3 +++
  src/mesa/main/texobj.c   |6 ++
  src/mesa/main/texstate.c |3 +++
  3 files changed, 12 insertions(+)

diff --git a/src/mesa/main/mtypes.h b/src/mesa/main/mtypes.h
index 2fe47c4..c1a17b8 100644
--- a/src/mesa/main/mtypes.h
+++ b/src/mesa/main/mtypes.h
@@ -1368,6 +1368,9 @@ struct gl_texture_unit

 /** Points to highest priority, complete and enabled texture object */
 struct gl_texture_object *_Current;
+
+   /** Texture targets that have a non-default texture bound */
+   GLbitfield _BoundTextures;
  };


diff --git a/src/mesa/main/texobj.c b/src/mesa/main/texobj.c
index 4d97275..fbbc577 100644
--- a/src/mesa/main/texobj.c
+++ b/src/mesa/main/texobj.c
@@ -1093,6 +1093,7 @@ unbind_texobj_from_texunits(struct gl_context *ctx,
  _mesa_reference_texobj(unit-CurrentTex[tex],
 ctx-Shared-DefaultTex[tex]);
  ASSERT(unit-CurrentTex[tex]);
+unit-_BoundTextures ^= (1  tex);


I'd prefer:
unit-_BoundTextures = ~(1  tex);

since it tells the reader that we want to clear the bit, not just toggle it.



  break;
   }
}
@@ -1348,6 +1349,11 @@ _mesa_BindTexture( GLenum target, GLuint texName )
 _mesa_reference_texobj(texUnit-CurrentTex[targetIndex], newTexObj);
 ASSERT(texUnit-CurrentTex[targetIndex]);

+   if (texName != 0)
+  texUnit-_BoundTextures |= (1  targetIndex);
+   else
+  texUnit-_BoundTextures = ~(1  targetIndex);
+
 /* Pass BindTexture call to device driver */
 if (ctx-Driver.BindTexture)
ctx-Driver.BindTexture(ctx, target, newTexObj);
diff --git a/src/mesa/main/texstate.c b/src/mesa/main/texstate.c
index 7720965..c86324f 100644
--- a/src/mesa/main/texstate.c
+++ b/src/mesa/main/texstate.c
@@ -109,6 +109,7 @@ _mesa_copy_texture_state( const struct gl_context *src, 
struct gl_context *dst )
  _mesa_reference_texobj(dst-Texture.Unit[u].CurrentTex[tex],
 src-Texture.Unit[u].CurrentTex[tex]);
   }
+ dst-Texture.Unit[u]._BoundTextures = 
src-Texture.Unit[u]._BoundTextures;
   _mesa_unlock_context_textures(dst);
}
 }
@@ -798,6 +799,8 @@ init_texture_unit( struct gl_context *ctx, GLuint unit )
_mesa_reference_texobj(texUnit-CurrentTex[tex],
   ctx-Shared-DefaultTex[tex]);
 }
+
+   texUnit-_BoundTextures = 0;
  }





___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/mesa-dev

Re: [Mesa-dev] [PATCH 11/18] mesa: Refactor set_ubo_binding()

2014-01-22 Thread Brian Paul


On 01/21/2014 03:35 PM, Fredrik Höglund wrote:

Make set_ubo_binding() just update the binding, and move the code
that does validation, flushes the vertices etc. into a new
bind_uniform_buffer() function.
---
  src/mesa/main/bufferobj.c |   50 -
  1 file changed, 31 insertions(+), 19 deletions(-)

diff --git a/src/mesa/main/bufferobj.c b/src/mesa/main/bufferobj.c
index 81344ac..9ef9dd0 100644
--- a/src/mesa/main/bufferobj.c
+++ b/src/mesa/main/bufferobj.c
@@ -2456,15 +2456,30 @@ _mesa_GetObjectParameterivAPPLE(GLenum objectType, 
GLuint name, GLenum pname,

  static void
  set_ubo_binding(struct gl_context *ctx,
-   int index,
-   struct gl_buffer_object *bufObj,
-   GLintptr offset,
-   GLsizeiptr size,
-   GLboolean autoSize)
+struct gl_uniform_buffer_binding *binding,
+struct gl_buffer_object *bufObj,
+GLintptr offset,
+GLsizeiptr size,
+GLboolean autoSize)
  {
-   struct gl_uniform_buffer_binding *binding;
+   _mesa_reference_buffer_object(ctx, binding-BufferObject, bufObj);
+
+   binding-Offset = offset;
+   binding-Size = size;
+   binding-AutomaticSize = autoSize;
+}
+
+static void
+bind_uniform_buffer(struct gl_context *ctx,
+GLuint index,
+struct gl_buffer_object *bufObj,
+GLintptr offset,
+GLsizeiptr size,
+GLboolean autoSize)


I think we need comments on set_ubo_binding() and bind_uniform_buffer() 
to explain how/why they're different.




+{
+   struct gl_uniform_buffer_binding *binding =
+  ctx-UniformBufferBindings[index];

-   binding = ctx-UniformBufferBindings[index];
 if (binding-BufferObject == bufObj 
 binding-Offset == offset 
 binding-Size == size 
@@ -2475,10 +2490,7 @@ set_ubo_binding(struct gl_context *ctx,
 FLUSH_VERTICES(ctx, 0);
 ctx-NewDriverState |= ctx-DriverFlags.NewUniformBuffer;

-   _mesa_reference_buffer_object(ctx, binding-BufferObject, bufObj);
-   binding-Offset = offset;
-   binding-Size = size;
-   binding-AutomaticSize = autoSize;
+   set_ubo_binding(ctx, binding, bufObj, offset, size, autoSize);
  }

  /**
@@ -2507,13 +2519,12 @@ bind_buffer_range_uniform_buffer(struct gl_context *ctx,
return;
 }

-   if (bufObj == ctx-Shared-NullBufferObj) {
-  offset = -1;
-  size = -1;
-   }
-
 _mesa_reference_buffer_object(ctx, ctx-UniformBuffer, bufObj);
-   set_ubo_binding(ctx, index, bufObj, offset, size, GL_FALSE);
+
+   if (bufObj == ctx-Shared-NullBufferObj)
+  bind_uniform_buffer(ctx, index, bufObj, -1, -1, GL_TRUE);
+   else
+  bind_uniform_buffer(ctx, index, bufObj, offset, size, GL_FALSE);
  }


@@ -2532,10 +2543,11 @@ bind_buffer_base_uniform_buffer(struct gl_context *ctx,
 }

 _mesa_reference_buffer_object(ctx, ctx-UniformBuffer, bufObj);
+
 if (bufObj == ctx-Shared-NullBufferObj)
-  set_ubo_binding(ctx, index, bufObj, -1, -1, GL_TRUE);
+  bind_uniform_buffer(ctx, index, bufObj, -1, -1, GL_TRUE);
 else
-  set_ubo_binding(ctx, index, bufObj, 0, 0, GL_TRUE);
+  bind_uniform_buffer(ctx, index, bufObj, 0, 0, GL_TRUE);
  }

  static void



___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/mesa-dev

Re: [Mesa-dev] [PATCH 1/2] mesa: Ensure that transform feedback refers to the correct program.

2014-01-22 Thread Kenneth Graunke

On 01/22/2014 06:07 AM, Paul Berry wrote:
 Previous to this patch, the _mesa_{Begin,Resume}TransformFeedback
 functions were using ctx-Shader.CurrentProgram[MESA_SHADER_VERTEX] to
 find the program that would be the source of transform feedback data.
 This isn't correct--if there's a geometry shader present it should be
 ctx-Shader.CurrentProgram[MESA_SHADER_GEOMETRY].  (These might be
 different if separate shader objects are in use).
 
 This patch creates a function get_xfb_source(), which figures out the
 correct program to use based on GL state, and updates
 _mesa_{Begin,Resume}TransformFeedback to call it.  get_xfb_source() is
 written in terms of the gl_shader_stage enum, so it should not need
 modification when we add tessellation shaders in the future.  It also
 creates a new driver flag, NewTransformFeedbackProg, which is flagged
 whenever this program changes.
 
 To reduce future confusion, this patch also rewords some comments and
 error message text to avoid referring to vertex shaders.
 ---
  src/mesa/main/mtypes.h|  8 --
  src/mesa/main/transformfeedback.c | 52 
 +--
  2 files changed, 45 insertions(+), 15 deletions(-)
 
 diff --git a/src/mesa/main/mtypes.h b/src/mesa/main/mtypes.h
 index 3dd9678..7fd3298 100644
 --- a/src/mesa/main/mtypes.h
 +++ b/src/mesa/main/mtypes.h
 @@ -1815,8 +1815,9 @@ struct gl_transform_feedback_object
  
 /**
  * The shader program active when BeginTransformFeedback() was called.
 -* When active and unpaused, this equals
 -* ctx-Shader.CurrentProgram[MESA_SHADER_VERTEX].
 +* When active and unpaused, this equals 
 ctx-Shader.CurrentProgram[stage],
 +* where stage is the pipeline stage that is the source of data for
 +* transform feedback.
  */
 struct gl_shader_program *shader_program;
  
 @@ -3779,6 +3780,9 @@ struct gl_driver_flags
 /** gl_context::TransformFeedback::CurrentObject */
 GLbitfield NewTransformFeedback;
  
 +   /** gl_context::TransformFeedback::CurrentObject::shader_program */
 +   GLbitfield NewTransformFeedbackProg;
 +
 /** gl_context::RasterDiscard */
 GLbitfield NewRasterizerDiscard;
  
 diff --git a/src/mesa/main/transformfeedback.c 
 b/src/mesa/main/transformfeedback.c
 index 74897ba..9376a9e 100644
 --- a/src/mesa/main/transformfeedback.c
 +++ b/src/mesa/main/transformfeedback.c
 @@ -24,7 +24,7 @@
  
  
  /*
 - * Vertex transform feedback support.
 + * Transform feedback support.
   *
   * Authors:
   *   Brian Paul
 @@ -376,25 +376,48 @@ _mesa_compute_max_transform_feedback_vertices(
   **/
  
  
 +/**
 + * Figure out which stage of the pipeline is the source of transform feedback
 + * data given the current context state, and return its gl_shader_program.
 + *
 + * If no active program can generate transform feedback data (i.e. no vertex
 + * shader is active), returns NULL.
 + */
 +static struct gl_shader_program *
 +get_xfb_source(struct gl_context *ctx)
 +{
 +   int i;
 +   for (i = MESA_SHADER_FRAGMENT - 1; i = MESA_SHADER_VERTEX; i--) {

I think this would be clearer as:

for (i = MESA_SHADER_GEOMETRY; i = MESA_SHADER_VERTEX; i--) {
   ...
}

Note that the pipeline ordering is:
Vertex - Tess. Control - Tess. Eval - Geometry - Transform Feedback
(http://www.opengl.org/wiki/Rendering_Pipeline_Overview)

So either implementation would work even with tessellation shaders.

Either way, this series is:
Reviewed-by: Kenneth Graunke kenn...@whitecape.org
Cc: 10.0 mesa-sta...@lists.freedesktop.org



signature.asc
Description: OpenPGP digital signature
___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/mesa-dev

[Mesa-dev] [PATCH] mesa: Set gl_constants::MinMapBufferAlignment

2014-01-22 Thread Ian Romanick

From: Ian Romanick ian.d.roman...@intel.com

Leaving it set to zero isn't really correct since every allocation has
at least an alignment of 1 byte.  It also caused a problem in the i965
driver after I removed the MAX(64, ...) from the alignment calculation.
That's what I get for changing a patch without retesting it. :(

Signed-off-by: Ian Romanick ian.d.roman...@intel.com
Bugzilla: https://bugs.freedesktop.org/show_bug.cgi?id=73907
Cc: Kenneth Graunke kenn...@whitecape.org
Cc: Lu Ha huax...@intel.com
---
 src/mesa/main/context.c | 1 +
 1 file changed, 1 insertion(+)

diff --git a/src/mesa/main/context.c b/src/mesa/main/context.c
index 7c3b2d7..b7cd568 100644
--- a/src/mesa/main/context.c
+++ b/src/mesa/main/context.c
@@ -587,6 +587,7 @@ _mesa_init_constants(struct gl_context *ctx)
ctx-Const.MaxSpotExponent = 128.0;
ctx-Const.MaxViewportWidth = MAX_VIEWPORT_WIDTH;
ctx-Const.MaxViewportHeight = MAX_VIEWPORT_HEIGHT;
+   ctx-Const.MinMapBufferAlignment = 1;
 
/* Driver must override these values if ARB_viewport_array is supported. */
ctx-Const.MaxViewports = 1;
-- 
1.8.1.4

___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/mesa-dev

Re: [Mesa-dev] [PATCH] mesa: Set gl_constants::MinMapBufferAlignment

2014-01-22 Thread Kenneth Graunke

On 01/22/2014 08:31 AM, Ian Romanick wrote:
 From: Ian Romanick ian.d.roman...@intel.com
 
 Leaving it set to zero isn't really correct since every allocation has
 at least an alignment of 1 byte.  It also caused a problem in the i965
 driver after I removed the MAX(64, ...) from the alignment calculation.
 That's what I get for changing a patch without retesting it. :(
 
 Signed-off-by: Ian Romanick ian.d.roman...@intel.com
 Bugzilla: https://bugs.freedesktop.org/show_bug.cgi?id=73907
 Cc: Kenneth Graunke kenn...@whitecape.org
 Cc: Lu Ha huax...@intel.com
 ---
  src/mesa/main/context.c | 1 +
  1 file changed, 1 insertion(+)
 
 diff --git a/src/mesa/main/context.c b/src/mesa/main/context.c
 index 7c3b2d7..b7cd568 100644
 --- a/src/mesa/main/context.c
 +++ b/src/mesa/main/context.c
 @@ -587,6 +587,7 @@ _mesa_init_constants(struct gl_context *ctx)
 ctx-Const.MaxSpotExponent = 128.0;
 ctx-Const.MaxViewportWidth = MAX_VIEWPORT_WIDTH;
 ctx-Const.MaxViewportHeight = MAX_VIEWPORT_HEIGHT;
 +   ctx-Const.MinMapBufferAlignment = 1;
  
 /* Driver must override these values if ARB_viewport_array is supported. 
 */
 ctx-Const.MaxViewports = 1;
 

Ah, thanks for fixing this!
Reviewed-by: Kenneth Graunke kenn...@whitecape.org



signature.asc
Description: OpenPGP digital signature
___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/mesa-dev

Re: [Mesa-dev] [PATCH] rtasm: deal with size overflows by casting to ptrdiff_t

2014-01-22 Thread Ilia Mirkin

On Wed, Jan 22, 2014 at 12:03 PM, Brian Paul bri...@vmware.com wrote:
 On 01/21/2014 06:37 PM, Ilia Mirkin wrote:

 This was discovered as a result of the draw-elements-base-vertex-neg
 piglit test, which passes very negative offsets in, followed up by large
 indices. The nouveau code correctly adjusts the pointer, but the
 transfer code needs to do the proper inverse correction. Similarly fix
 up the SSE code to do a 64-bit multiply to compute the proper offset.

 Signed-off-by: Ilia Mirkin imir...@alum.mit.edu
 ---

 With this change, nouveau passes for the draw-elements-base-vertex-neg
 piglit
 test with user_varrays, on a 64-bit setup both with and without
 GALLIUM_NOSSE=1. I'm pretty sure that the change should be minimal to a
 non-x86 setup since the rexw will be a no-op. I guess there will be an
 extra
 register use for the mov, but it shouldn't be too expensive, esp on
 anything
 remotely current.

   src/gallium/auxiliary/translate/translate_generic.c | 2 +-
   src/gallium/auxiliary/translate/translate_sse.c | 8 ++--
   2 files changed, 7 insertions(+), 3 deletions(-)

 diff --git a/src/gallium/auxiliary/translate/translate_generic.c
 b/src/gallium/auxiliary/translate/translate_generic.c
 index 5bf97db..5ffce32 100644
 --- a/src/gallium/auxiliary/translate/translate_generic.c
 +++ b/src/gallium/auxiliary/translate/translate_generic.c
 @@ -638,7 +638,7 @@ static ALWAYS_INLINE void PIPE_CDECL generic_run_one(
 struct translate_generic *
}

src = tg-attrib[attr].input_ptr +
 -   tg-attrib[attr].input_stride * index;
 +   (ptrdiff_t)tg-attrib[attr].input_stride * index;

copy_size = tg-attrib[attr].copy_size;
if(likely(copy_size = 0))
 diff --git a/src/gallium/auxiliary/translate/translate_sse.c
 b/src/gallium/auxiliary/translate/translate_sse.c
 index a78ea91..a72454a 100644
 --- a/src/gallium/auxiliary/translate/translate_sse.c
 +++ b/src/gallium/auxiliary/translate/translate_sse.c
 @@ -1121,7 +1121,9 @@ static boolean init_inputs( struct translate_sse *p,
   x86_cmovcc(p-func, tmp_EAX, buf_max_index, cc_AE);
}

 - x86_imul(p-func, tmp_EAX, buf_stride);
 + x86_mov(p-func, p-tmp2_EDX, buf_stride);
 + x64_rexw(p-func);
 + x86_imul(p-func, tmp_EAX, p-tmp2_EDX);
x64_rexw(p-func);
x86_add(p-func, tmp_EAX, buf_base_ptr);

 @@ -1207,7 +1209,9 @@ static struct x86_reg get_buffer_ptr( struct
 translate_sse *p,
 x86_cmp(p-func, ptr, buf_max_index);
 x86_cmovcc(p-func, ptr, buf_max_index, cc_AE);

 -  x86_imul(p-func, ptr, buf_stride);
 +  x86_mov(p-func, p-tmp2_EDX, buf_stride);
 +  x64_rexw(p-func);
 +  x86_imul(p-func, ptr, p-tmp2_EDX);
 x64_rexw(p-func);
 x86_add(p-func, ptr, buf_base_ptr);
 return ptr;


 I'm no x86 expert, but this looks OK to me, and if it works for you...

I'm no expert either, but perhaps this will put your mind at ease (as
it did mine):

void *func(void *a, int b, int c) {
  return a + (ptrdiff_t)b * c;
}

gets compiled into, by gcc -O2 -S,

movslq  %edx, %rdx
movslq  %esi, %rsi
imulq   %rdx, %rsi
leaq(%rdi,%rsi), %rax

Which, to be honest, is pretty clever, using leaq that way. But it's
basically an add (but if it were, e.g., int *, it would be leaq
(%rdi,%rsi,2), %rax saving on a shl). (Note that this is att/gas
syntax, so all backwards.) The new code generated with the
modifications I made is:

0020 8b977004 mov edx, [rdi+0x470]
0026 480fafca imul rcx, rdx
002a 48038f6804   add rcx, [rdi+0x468]

So the difference is that I'm first moving the value into edx, rather
than using the [rdi+bla] directly in the imul, and then making the
imul use 64-bit registers (by using the REX.W prefix). By moving the
32-bit value into edx, the upper rdx bits are cleared. The only
potential problem is that I'm using EDX where it wasn't used before,
but looking at the call-site of get_buffer_ptr, that seems like it
should be OK.


 Reviewed-by: Brian Paul bri...@vmware.com

Thanks! Would you be able to commit this as well? I noticed that I
messed up the patch subject and a word in the description...
s/rtasm/translate, s/transfer/translate. You can see these fixed at
https://github.com/imirkin/mesa/commit/3bc3081b2106ab0544adc483768ab2a1321565ff


 Should this go into the 10.0.x branch too?

Ehh... I don't know if it qualifies. The only legit way that this can
happen (at least for nouveau) is if an application allocates a 4GB
VBO. Seems unlikely. (vs the non-0 chance that I messed something up
in there.) Up to you though.

  -ilia
___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/mesa-dev

[Mesa-dev] [Bug 73934] Function roundf undeclared in textparam.c when building with MSVC11

2014-01-22 Thread bugzilla-daemon

https://bugs.freedesktop.org/show_bug.cgi?id=73934

--- Comment #2 from Roland Scheidegger srol...@vmware.com ---
The stable branches and hence the tarballs are missing commit
bba8f10598866776ae198b363b3752c2e3bbb126 from master at the moment which fixes
this. You can just manually replace
*params = (GLint) roundf(obj-Sampler.LodBias);
with
*params = IROUND(obj-Sampler.LodBias);
to fix this.

-- 
You are receiving this mail because:
You are the assignee for the bug.
___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/mesa-dev

[Mesa-dev] [v2 03/23] i965: rename tex_ms to tex_cms

2014-01-22 Thread Topi Pohjolainen

Prepares for the introduction of non-compressed multi-sampled
lookup used in the blorp programs.

v2: now also taking into account gen8

Signed-off-by: Topi Pohjolainen topi.pohjolai...@intel.com
Reviewed-by: Paul Berry stereotype...@gmail.com (v1)
---
 src/mesa/drivers/dri/i965/brw_defines.h   | 2 +-
 src/mesa/drivers/dri/i965/brw_fs.cpp  | 2 +-
 src/mesa/drivers/dri/i965/brw_fs_generator.cpp| 4 ++--
 src/mesa/drivers/dri/i965/brw_fs_visitor.cpp  | 4 ++--
 src/mesa/drivers/dri/i965/brw_shader.cpp  | 6 +++---
 src/mesa/drivers/dri/i965/brw_vec4.cpp| 2 +-
 src/mesa/drivers/dri/i965/brw_vec4_generator.cpp  | 4 ++--
 src/mesa/drivers/dri/i965/brw_vec4_visitor.cpp| 2 +-
 src/mesa/drivers/dri/i965/gen8_fs_generator.cpp   | 4 ++--
 src/mesa/drivers/dri/i965/gen8_vec4_generator.cpp | 4 ++--
 10 files changed, 17 insertions(+), 17 deletions(-)

diff --git a/src/mesa/drivers/dri/i965/brw_defines.h 
b/src/mesa/drivers/dri/i965/brw_defines.h
index 0faad66..12f7e40 100644
--- a/src/mesa/drivers/dri/i965/brw_defines.h
+++ b/src/mesa/drivers/dri/i965/brw_defines.h
@@ -770,7 +770,7 @@ enum opcode {
SHADER_OPCODE_TXL,
SHADER_OPCODE_TXS,
FS_OPCODE_TXB,
-   SHADER_OPCODE_TXF_MS,
+   SHADER_OPCODE_TXF_CMS,
SHADER_OPCODE_TXF_MCS,
SHADER_OPCODE_LOD,
SHADER_OPCODE_TG4,
diff --git a/src/mesa/drivers/dri/i965/brw_fs.cpp 
b/src/mesa/drivers/dri/i965/brw_fs.cpp
index a0e4830..1e6c3e0 100644
--- a/src/mesa/drivers/dri/i965/brw_fs.cpp
+++ b/src/mesa/drivers/dri/i965/brw_fs.cpp
@@ -766,7 +766,7 @@ fs_visitor::implied_mrf_writes(fs_inst *inst)
case FS_OPCODE_TXB:
case SHADER_OPCODE_TXD:
case SHADER_OPCODE_TXF:
-   case SHADER_OPCODE_TXF_MS:
+   case SHADER_OPCODE_TXF_CMS:
case SHADER_OPCODE_TXF_MCS:
case SHADER_OPCODE_TG4:
case SHADER_OPCODE_TG4_OFFSET:
diff --git a/src/mesa/drivers/dri/i965/brw_fs_generator.cpp 
b/src/mesa/drivers/dri/i965/brw_fs_generator.cpp
index e701fc5..d257748 100644
--- a/src/mesa/drivers/dri/i965/brw_fs_generator.cpp
+++ b/src/mesa/drivers/dri/i965/brw_fs_generator.cpp
@@ -425,7 +425,7 @@ fs_generator::generate_tex(fs_inst *inst, struct brw_reg 
dst, struct brw_reg src
   case SHADER_OPCODE_TXF:
 msg_type = GEN5_SAMPLER_MESSAGE_SAMPLE_LD;
 break;
-  case SHADER_OPCODE_TXF_MS:
+  case SHADER_OPCODE_TXF_CMS:
  if (brw-gen = 7)
 msg_type = GEN7_SAMPLER_MESSAGE_SAMPLE_LD2DMS;
  else
@@ -1657,7 +1657,7 @@ fs_generator::generate_code(exec_list *instructions)
   case FS_OPCODE_TXB:
   case SHADER_OPCODE_TXD:
   case SHADER_OPCODE_TXF:
-  case SHADER_OPCODE_TXF_MS:
+  case SHADER_OPCODE_TXF_CMS:
   case SHADER_OPCODE_TXF_MCS:
   case SHADER_OPCODE_TXL:
   case SHADER_OPCODE_TXS:
diff --git a/src/mesa/drivers/dri/i965/brw_fs_visitor.cpp 
b/src/mesa/drivers/dri/i965/brw_fs_visitor.cpp
index 69ca940..4180377 100644
--- a/src/mesa/drivers/dri/i965/brw_fs_visitor.cpp
+++ b/src/mesa/drivers/dri/i965/brw_fs_visitor.cpp
@@ -1214,7 +1214,7 @@ fs_visitor::emit_texture_gen5(ir_texture *ir, fs_reg dst, 
fs_reg coordinate,
   /* sample index */
   emit(MOV(fs_reg(MRF, base_mrf + mlen, BRW_REGISTER_TYPE_UD), 
sample_index));
   mlen += reg_width;
-  inst = emit(SHADER_OPCODE_TXF_MS, dst);
+  inst = emit(SHADER_OPCODE_TXF_CMS, dst);
   break;
case ir_lod:
   inst = emit(SHADER_OPCODE_LOD, dst);
@@ -1405,7 +1405,7 @@ fs_visitor::emit_texture_gen7(ir_texture *ir, fs_reg dst, 
fs_reg coordinate,
case ir_txl: inst = emit(SHADER_OPCODE_TXL, dst, payload); break;
case ir_txd: inst = emit(SHADER_OPCODE_TXD, dst, payload); break;
case ir_txf: inst = emit(SHADER_OPCODE_TXF, dst, payload); break;
-   case ir_txf_ms: inst = emit(SHADER_OPCODE_TXF_MS, dst, payload); break;
+   case ir_txf_ms: inst = emit(SHADER_OPCODE_TXF_CMS, dst, payload); break;
case ir_txs: inst = emit(SHADER_OPCODE_TXS, dst, payload); break;
case ir_query_levels: inst = emit(SHADER_OPCODE_TXS, dst, payload); break;
case ir_lod: inst = emit(SHADER_OPCODE_LOD, dst, payload); break;
diff --git a/src/mesa/drivers/dri/i965/brw_shader.cpp 
b/src/mesa/drivers/dri/i965/brw_shader.cpp
index 446c917..b74d6e8 100644
--- a/src/mesa/drivers/dri/i965/brw_shader.cpp
+++ b/src/mesa/drivers/dri/i965/brw_shader.cpp
@@ -440,8 +440,8 @@ brw_instruction_name(enum opcode op)
   return txs;
case FS_OPCODE_TXB:
   return txb;
-   case SHADER_OPCODE_TXF_MS:
-  return txf_ms;
+   case SHADER_OPCODE_TXF_CMS:
+  return txf_cms;
case SHADER_OPCODE_TXF_MCS:
   return txf_mcs;
case SHADER_OPCODE_TG4:
@@ -538,7 +538,7 @@ backend_instruction::is_tex()
opcode == FS_OPCODE_TXB ||
opcode == SHADER_OPCODE_TXD ||
opcode == SHADER_OPCODE_TXF ||
-   opcode == SHADER_OPCODE_TXF_MS ||
+   opcode == SHADER_OPCODE_TXF_CMS ||
opcode == SHADER_OPCODE_TXF_MCS ||
opcode ==

[Mesa-dev] [v2 14/23] i965/blorp: wrap SHL (/brw_SHL(func, /emit_shl(/)

2014-01-22 Thread Topi Pohjolainen

Signed-off-by: Topi Pohjolainen topi.pohjolai...@intel.com
Reviewed-by: Paul Berry stereotype...@gmail.com
---
 src/mesa/drivers/dri/i965/brw_blorp_blit.cpp  | 24 
 src/mesa/drivers/dri/i965/brw_blorp_blit_eu.h |  7 +++
 2 files changed, 19 insertions(+), 12 deletions(-)

diff --git a/src/mesa/drivers/dri/i965/brw_blorp_blit.cpp 
b/src/mesa/drivers/dri/i965/brw_blorp_blit.cpp
index 715c716..432c11c 100644
--- a/src/mesa/drivers/dri/i965/brw_blorp_blit.cpp
+++ b/src/mesa/drivers/dri/i965/brw_blorp_blit.cpp
@@ -1166,12 +1166,12 @@ brw_blorp_blit_program::translate_tiling(bool 
old_tiled_w, bool new_tiled_w)
   emit_and(t1, X, brw_imm_uw(0xfff4)); /* X  ~0b1011 */
   emit_shr(t1, t1, brw_imm_uw(1)); /* (X  ~0b1011)  1 */
   emit_and(t2, Y, brw_imm_uw(1)); /* Y  0b1 */
-  brw_SHL(func, t2, t2, brw_imm_uw(2)); /* (Y  0b1)  2 */
+  emit_shl(t2, t2, brw_imm_uw(2)); /* (Y  0b1)  2 */
   brw_OR(func, t1, t1, t2); /* (X  ~0b1011)  1 | (Y  0b1)  2 */
   emit_and(t2, X, brw_imm_uw(1)); /* X  0b1 */
   brw_OR(func, Xp, t1, t2);
   emit_and(t1, Y, brw_imm_uw(0xfffe)); /* Y  ~0b1 */
-  brw_SHL(func, t1, t1, brw_imm_uw(1)); /* (Y  ~0b1)  1 */
+  emit_shl(t1, t1, brw_imm_uw(1)); /* (Y  ~0b1)  1 */
   emit_and(t2, X, brw_imm_uw(8)); /* X  0b1000 */
   emit_shr(t2, t2, brw_imm_uw(2)); /* (X  0b1000)  2 */
   brw_OR(func, t1, t1, t2); /* (Y  ~0b1)  1 | (X  0b1000)  2 */
@@ -1187,12 +1187,12 @@ brw_blorp_blit_program::translate_tiling(bool 
old_tiled_w, bool new_tiled_w)
* Y' = (Y  ~0b11)  1 | (X  0b100)  2
*/
   emit_and(t1, X, brw_imm_uw(0xfffa)); /* X  ~0b101 */
-  brw_SHL(func, t1, t1, brw_imm_uw(1)); /* (X  ~0b101)  1 */
+  emit_shl(t1, t1, brw_imm_uw(1)); /* (X  ~0b101)  1 */
   emit_and(t2, Y, brw_imm_uw(2)); /* Y  0b10 */
-  brw_SHL(func, t2, t2, brw_imm_uw(2)); /* (Y  0b10)  2 */
+  emit_shl(t2, t2, brw_imm_uw(2)); /* (Y  0b10)  2 */
   brw_OR(func, t1, t1, t2); /* (X  ~0b101)  1 | (Y  0b10)  2 */
   emit_and(t2, Y, brw_imm_uw(1)); /* Y  0b1 */
-  brw_SHL(func, t2, t2, brw_imm_uw(1)); /* (Y  0b1)  1 */
+  emit_shl(t2, t2, brw_imm_uw(1)); /* (Y  0b1)  1 */
   brw_OR(func, t1, t1, t2); /* (X  ~0b101)  1 | (Y  0b10)  2
 | (Y  0b1)  1 */
   emit_and(t2, X, brw_imm_uw(1)); /* X  0b1 */
@@ -1246,12 +1246,12 @@ brw_blorp_blit_program::encode_msaa(unsigned 
num_samples,
 emit_and(t2, S, brw_imm_uw(1)); /* S  0b1 */
 brw_OR(func, t1, t1, t2); /* (X  ~0b1) | (S  0b1) */
  }
- brw_SHL(func, t1, t1, brw_imm_uw(1)); /* (X  ~0b1)  1
+ emit_shl(t1, t1, brw_imm_uw(1)); /* (X  ~0b1)  1
| (S  0b1)  1 */
  emit_and(t2, X, brw_imm_uw(1)); /* X  0b1 */
  brw_OR(func, Xp, t1, t2);
  emit_and(t1, Y, brw_imm_uw(0xfffe)); /* Y  ~0b1 */
- brw_SHL(func, t1, t1, brw_imm_uw(1)); /* (Y  ~0b1)  1 */
+ emit_shl(t1, t1, brw_imm_uw(1)); /* (Y  ~0b1)  1 */
  if (!s_is_zero) {
 emit_and(t2, S, brw_imm_uw(2)); /* S  0b10 */
 brw_OR(func, t1, t1, t2); /* (Y  ~0b1)  1 | (S  0b10) */
@@ -1266,19 +1266,19 @@ brw_blorp_blit_program::encode_msaa(unsigned 
num_samples,
   * Y' = (Y  ~0b1)  1 | (S  0b10) | (Y  0b1)
   */
  emit_and(t1, X, brw_imm_uw(0xfffe)); /* X  ~0b1 */
- brw_SHL(func, t1, t1, brw_imm_uw(2)); /* (X  ~0b1)  2 */
+ emit_shl(t1, t1, brw_imm_uw(2)); /* (X  ~0b1)  2 */
  if (!s_is_zero) {
 emit_and(t2, S, brw_imm_uw(4)); /* S  0b100 */
 brw_OR(func, t1, t1, t2); /* (X  ~0b1)  2 | (S  0b100) */
 emit_and(t2, S, brw_imm_uw(1)); /* S  0b1 */
-brw_SHL(func, t2, t2, brw_imm_uw(1)); /* (S  0b1)  1 */
+emit_shl(t2, t2, brw_imm_uw(1)); /* (S  0b1)  1 */
 brw_OR(func, t1, t1, t2); /* (X  ~0b1)  2 | (S  0b100)
   | (S  0b1)  1 */
  }
  emit_and(t2, X, brw_imm_uw(1)); /* X  0b1 */
  brw_OR(func, Xp, t1, t2);
  emit_and(t1, Y, brw_imm_uw(0xfffe)); /* Y  ~0b1 */
- brw_SHL(func, t1, t1, brw_imm_uw(1)); /* (Y  ~0b1)  1 */
+ emit_shl(t1, t1, brw_imm_uw(1)); /* (Y  ~0b1)  1 */
  if (!s_is_zero) {
 emit_and(t2, S, brw_imm_uw(2)); /* S  0b10 */
 brw_OR(func, t1, t1, t2); /* (Y  ~0b1)  1 | (S  0b10) */
@@ -1459,8 +1459,8 @@ brw_blorp_blit_program::single_to_blend()
 * that maxe up a pixel).  So we need to multiply our X and Y coordinates
 * each by 2 and then add 1.
 */
-   brw_SHL(func, t1, X, brw_imm_w(1));
-   brw_SHL(func, t2, Y, brw_imm_w(1));
+   emit_shl(t1, X, brw_imm_w(1));
+   emit_shl(t2, Y, brw_imm_w(1));
emit_add(Xp, t1, brw_imm_w(1));
emit_add(Yp, t2, brw_imm_w(1));
SWAP_XY_AND_XPYP();
diff --git

[Mesa-dev] [v2 06/23] i965/blorp: move emission of rt-write into eu-emitter

2014-01-22 Thread Topi Pohjolainen

Signed-off-by: Topi Pohjolainen topi.pohjolai...@intel.com
Reviewed-by: Paul Berry stereotype...@gmail.com
---
 src/mesa/drivers/dri/i965/brw_blorp_blit.cpp| 15 +--
 src/mesa/drivers/dri/i965/brw_blorp_blit_eu.cpp | 18 ++
 src/mesa/drivers/dri/i965/brw_blorp_blit_eu.h   |  5 +
 3 files changed, 28 insertions(+), 10 deletions(-)

diff --git a/src/mesa/drivers/dri/i965/brw_blorp_blit.cpp 
b/src/mesa/drivers/dri/i965/brw_blorp_blit.cpp
index 034a82b..4bbdf3d 100644
--- a/src/mesa/drivers/dri/i965/brw_blorp_blit.cpp
+++ b/src/mesa/drivers/dri/i965/brw_blorp_blit.cpp
@@ -1954,16 +1954,11 @@ brw_blorp_blit_program::render_target_write()
}
 
/* Now write to the render target and terminate the thread */
-   brw_fb_WRITE(func,
-16 /* dispatch_width */,
-base_mrf /* msg_reg_nr */,
-mrf_rt_write /* src0 */,
-BRW_DATAPORT_RENDER_TARGET_WRITE_SIMD16_SINGLE_SOURCE,
-BRW_BLORP_RENDERBUFFER_BINDING_TABLE_INDEX,
-mrf_offset /* msg_length.  TODO: Should be smaller for 
non-RGBA formats. */,
-0 /* response_length */,
-true /* eot */,
-use_header);
+   emit_render_target_write(
+  mrf_rt_write,
+  base_mrf, 
+  mrf_offset /* msg_length.  TODO: Should be smaller for non-RGBA formats. 
*/,
+  use_header);
 }
 
 
diff --git a/src/mesa/drivers/dri/i965/brw_blorp_blit_eu.cpp 
b/src/mesa/drivers/dri/i965/brw_blorp_blit_eu.cpp
index ff45b6a..df8d63d 100644
--- a/src/mesa/drivers/dri/i965/brw_blorp_blit_eu.cpp
+++ b/src/mesa/drivers/dri/i965/brw_blorp_blit_eu.cpp
@@ -134,3 +134,21 @@ brw_blorp_eu_emitter::emit_texture_lookup(const struct 
brw_reg dst,
   BRW_SAMPLER_SIMD_MODE_SIMD16,
   BRW_SAMPLER_RETURN_FORMAT_FLOAT32);
 }
+
+void
+brw_blorp_eu_emitter::emit_render_target_write(const struct brw_reg src0,
+   unsigned msg_reg_nr,
+   unsigned msg_length,
+   bool use_header)
+{
+   brw_fb_WRITE(func,
+16 /* dispatch_width */,
+msg_reg_nr,
+src0,
+BRW_DATAPORT_RENDER_TARGET_WRITE_SIMD16_SINGLE_SOURCE,
+BRW_BLORP_RENDERBUFFER_BINDING_TABLE_INDEX,
+msg_length,
+0 /* response_length */,
+true /* eot */,
+use_header);
+}
diff --git a/src/mesa/drivers/dri/i965/brw_blorp_blit_eu.h 
b/src/mesa/drivers/dri/i965/brw_blorp_blit_eu.h
index 9e7c43f..5f0c8cf 100644
--- a/src/mesa/drivers/dri/i965/brw_blorp_blit_eu.h
+++ b/src/mesa/drivers/dri/i965/brw_blorp_blit_eu.h
@@ -47,6 +47,11 @@ protected:
 unsigned base_mrf,
 unsigned msg_length);
 
+   void emit_render_target_write(const struct brw_reg src0,
+ unsigned msg_reg_nr,
+ unsigned msg_length,
+ bool use_header);
+
void *mem_ctx;
struct brw_compile func;
 };
-- 
1.8.3.1

___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/mesa-dev

[Mesa-dev] [v2 10/23] i965/blorp: wrap MOV (/brw_MOV(func, /emit_mov(/)

2014-01-22 Thread Topi Pohjolainen

In addition, the two special cases requiring explicit execution
size control are wrapped manually.

Signed-off-by: Topi Pohjolainen topi.pohjolai...@intel.com
Reviewed-by: Paul Berry stereotype...@gmail.com
---
 src/mesa/drivers/dri/i965/brw_blorp_blit.cpp  | 68 +--
 src/mesa/drivers/dri/i965/brw_blorp_blit_eu.h | 12 +
 2 files changed, 45 insertions(+), 35 deletions(-)

diff --git a/src/mesa/drivers/dri/i965/brw_blorp_blit.cpp 
b/src/mesa/drivers/dri/i965/brw_blorp_blit.cpp
index da10cf0..ff32e25 100644
--- a/src/mesa/drivers/dri/i965/brw_blorp_blit.cpp
+++ b/src/mesa/drivers/dri/i965/brw_blorp_blit.cpp
@@ -1046,8 +1046,8 @@ brw_blorp_blit_program::compute_frag_coords()
stride(suboffset(R1, 5), 2, 4, 0), brw_imm_v(0x11001100));
 
/* Move the coordinates to UD registers. */
-   brw_MOV(func, vec16(Xp), retype(X, BRW_REGISTER_TYPE_UW));
-   brw_MOV(func, vec16(Yp), retype(Y, BRW_REGISTER_TYPE_UW));
+   emit_mov(vec16(Xp), retype(X, BRW_REGISTER_TYPE_UW));
+   emit_mov(vec16(Yp), retype(Y, BRW_REGISTER_TYPE_UW));
SWAP_XY_AND_XPYP();
 
if (key-persample_msaa_dispatch) {
@@ -1063,12 +1063,10 @@ brw_blorp_blit_program::compute_frag_coords()
   * then copy from it using vstride=1, width=4, hstride=0.
   */
  struct brw_reg t1_uw1 = retype(t1, BRW_REGISTER_TYPE_UW);
- brw_MOV(func, vec16(t1_uw1), brw_imm_v(0x3210));
+ emit_mov(vec16(t1_uw1), brw_imm_v(0x3210));
  /* Move to UD sample_index register. */
- brw_set_compression_control(func, BRW_COMPRESSION_NONE);
- brw_MOV(func, S, stride(t1_uw1, 1, 4, 0));
- brw_MOV(func, offset(S, 1), suboffset(stride(t1_uw1, 1, 4, 0), 2));
- brw_set_compression_control(func, BRW_COMPRESSION_COMPRESSED);
+ emit_mov_8(S, stride(t1_uw1, 1, 4, 0));
+ emit_mov_8(offset(S, 1), suboffset(stride(t1_uw1, 1, 4, 0), 2));
  break;
   }
   case 8: {
@@ -1090,7 +1088,7 @@ brw_blorp_blit_program::compute_frag_coords()
  struct brw_reg r0_ud1 = vec1(retype(R0, BRW_REGISTER_TYPE_UD));
  brw_AND(func, t1_ud1, r0_ud1, brw_imm_ud(0xc0));
  brw_SHR(func, t1_ud1, t1_ud1, brw_imm_ud(5));
- brw_MOV(func, vec16(t2_uw1), brw_imm_v(0x3210));
+ emit_mov(vec16(t2_uw1), brw_imm_v(0x3210));
  brw_ADD(func, vec16(S), retype(t1_ud1, BRW_REGISTER_TYPE_UW),
  stride(t2_uw1, 1, 4, 0));
  brw_set_compression_control(func, BRW_COMPRESSION_NONE);
@@ -1388,8 +1386,8 @@ brw_blorp_blit_program::translate_dst_to_src()
struct brw_reg Yp_f = retype(Yp, BRW_REGISTER_TYPE_F);
 
/* Move the UD coordinates to float registers. */
-   brw_MOV(func, Xp_f, X);
-   brw_MOV(func, Yp_f, Y);
+   emit_mov(Xp_f, X);
+   emit_mov(Yp_f, Y);
/* Scale and offset */
brw_MUL(func, X_f, Xp_f, x_transform.multiplier);
brw_MUL(func, Y_f, Yp_f, y_transform.multiplier);
@@ -1430,8 +1428,8 @@ brw_blorp_blit_program::translate_dst_to_src()
   /* Round the float coordinates down to nearest integer by moving to
* UD registers.
*/
-  brw_MOV(func, Xp, X_f);
-  brw_MOV(func, Yp, Y_f);
+  emit_mov(Xp, X_f);
+  emit_mov(Yp, Y_f);
   SWAP_XY_AND_XPYP();
}
 }
@@ -1533,7 +1531,7 @@ brw_blorp_blit_program::manual_blend_average(unsigned 
num_samples)
  s_is_zero = true;
   } else {
  s_is_zero = false;
- brw_MOV(func, vec16(S), brw_imm_ud(i));
+ emit_mov(vec16(S), brw_imm_ud(i));
   }
   texel_fetch(texture_data[stack_depth++]);
 
@@ -1633,8 +1631,8 @@ brw_blorp_blit_program::manual_blend_bilinear(unsigned 
num_samples)
   brw_imm_f((float)(i  0x1) * (1.0 / key-x_scale)));
   brw_ADD(func, vec16(y_sample_coords), Yp_f,
   brw_imm_f((float)((i  1)  0x1) * (1.0 / key-y_scale)));
-  brw_MOV(func, vec16(X), x_sample_coords);
-  brw_MOV(func, vec16(Y), y_sample_coords);
+  emit_mov(vec16(X), x_sample_coords);
+  emit_mov(vec16(Y), y_sample_coords);
 
   /* The MCS value we fetch has to match up with the pixel that we're
* sampling from. Since we sample from different pixels in each
@@ -1673,7 +1671,7 @@ brw_blorp_blit_program::manual_blend_bilinear(unsigned 
num_samples)
   brw_MUL(func, vec16(t1_f), t1_f, brw_imm_f(key-x_scale));
   brw_MUL(func, vec16(t2_f), t2_f, brw_imm_f(key-x_scale * 
key-y_scale));
   brw_ADD(func, vec16(t1_f), t1_f, t2_f);
-  brw_MOV(func, vec16(S), t1_f);
+  emit_mov(vec16(S), t1_f);
 
   if (num_samples == 8) {
  /* Map the sample index to a sample number */
@@ -1681,20 +1679,20 @@ brw_blorp_blit_program::manual_blend_bilinear(unsigned 
num_samples)
  S, brw_imm_d(4));
  brw_IF(func, BRW_EXECUTE_16);
  {
-brw_MOV(func, vec16(t2), brw_imm_d(5));
+emit_mov(vec16(t2), brw_imm_d(5));
 emit_if_eq_mov(S, 1, vec16(t2), 2);
 emit_if_eq_mov(S,

[Mesa-dev] [v2 12/23] i965/blorp: wrap ADD (/brw_ADD(func, /emit_add(/)

2014-01-22 Thread Topi Pohjolainen

In addition, the special case requiring explicit execution size
control is wrapped manually.

Signed-off-by: Topi Pohjolainen topi.pohjolai...@intel.com
Reviewed-by: Paul Berry stereotype...@gmail.com
---
 src/mesa/drivers/dri/i965/brw_blorp_blit.cpp  | 34 +--
 src/mesa/drivers/dri/i965/brw_blorp_blit_eu.h | 16 +
 2 files changed, 32 insertions(+), 18 deletions(-)

diff --git a/src/mesa/drivers/dri/i965/brw_blorp_blit.cpp 
b/src/mesa/drivers/dri/i965/brw_blorp_blit.cpp
index 5833d83..392e7a5 100644
--- a/src/mesa/drivers/dri/i965/brw_blorp_blit.cpp
+++ b/src/mesa/drivers/dri/i965/brw_blorp_blit.cpp
@@ -1031,7 +1031,7 @@ brw_blorp_blit_program::compute_frag_coords()
 * Then, we need to add the repeating sequence (0, 1, 0, 1, ...) to the
 * result, since pixels n+1 and n+3 are in the right half of the subspan.
 */
-   brw_ADD(func, vec16(retype(X, BRW_REGISTER_TYPE_UW)),
+   emit_add(vec16(retype(X, BRW_REGISTER_TYPE_UW)),
stride(suboffset(R1, 4), 2, 4, 0), brw_imm_v(0x10101010));
 
/* Similarly, Y coordinates for subspans come from R1.2[31:16] through
@@ -1042,7 +1042,7 @@ brw_blorp_blit_program::compute_frag_coords()
 * And we need to add the repeating sequence (0, 0, 1, 1, ...), since
 * pixels n+2 and n+3 are in the bottom half of the subspan.
 */
-   brw_ADD(func, vec16(retype(Y, BRW_REGISTER_TYPE_UW)),
+   emit_add(vec16(retype(Y, BRW_REGISTER_TYPE_UW)),
stride(suboffset(R1, 5), 2, 4, 0), brw_imm_v(0x11001100));
 
/* Move the coordinates to UD registers. */
@@ -1089,13 +1089,11 @@ brw_blorp_blit_program::compute_frag_coords()
  emit_and(t1_ud1, r0_ud1, brw_imm_ud(0xc0));
  brw_SHR(func, t1_ud1, t1_ud1, brw_imm_ud(5));
  emit_mov(vec16(t2_uw1), brw_imm_v(0x3210));
- brw_ADD(func, vec16(S), retype(t1_ud1, BRW_REGISTER_TYPE_UW),
- stride(t2_uw1, 1, 4, 0));
- brw_set_compression_control(func, BRW_COMPRESSION_NONE);
- brw_ADD(func, offset(S, 1),
- retype(t1_ud1, BRW_REGISTER_TYPE_UW),
- suboffset(stride(t2_uw1, 1, 4, 0), 2));
- brw_set_compression_control(func, BRW_COMPRESSION_COMPRESSED);
+ emit_add(vec16(S), retype(t1_ud1, BRW_REGISTER_TYPE_UW),
+  stride(t2_uw1, 1, 4, 0));
+ emit_add_8(offset(S, 1),
+retype(t1_ud1, BRW_REGISTER_TYPE_UW),
+suboffset(stride(t2_uw1, 1, 4, 0), 2));
  break;
   }
   default:
@@ -1391,8 +1389,8 @@ brw_blorp_blit_program::translate_dst_to_src()
/* Scale and offset */
brw_MUL(func, X_f, Xp_f, x_transform.multiplier);
brw_MUL(func, Y_f, Yp_f, y_transform.multiplier);
-   brw_ADD(func, X_f, X_f, x_transform.offset);
-   brw_ADD(func, Y_f, Y_f, y_transform.offset);
+   emit_add(X_f, X_f, x_transform.offset);
+   emit_add(Y_f, Y_f, y_transform.offset);
if (key-blit_scaled  key-blend) {
   /* Translate coordinates to lay out the samples in a rectangular  grid
* roughly corresponding to sample locations.
@@ -1402,8 +1400,8 @@ brw_blorp_blit_program::translate_dst_to_src()
  /* Adjust coordinates so that integers represent pixel centers rather
   * than pixel edges.
   */
-  brw_ADD(func, X_f, X_f, brw_imm_f(-0.5));
-  brw_ADD(func, Y_f, Y_f, brw_imm_f(-0.5));
+  emit_add(X_f, X_f, brw_imm_f(-0.5));
+  emit_add(Y_f, Y_f, brw_imm_f(-0.5));
 
   /* Clamp the X, Y texture coordinates to properly handle the sampling of
*  texels on texture edges.
@@ -1463,8 +1461,8 @@ brw_blorp_blit_program::single_to_blend()
 */
brw_SHL(func, t1, X, brw_imm_w(1));
brw_SHL(func, t2, Y, brw_imm_w(1));
-   brw_ADD(func, Xp, t1, brw_imm_w(1));
-   brw_ADD(func, Yp, t2, brw_imm_w(1));
+   emit_add(Xp, t1, brw_imm_w(1));
+   emit_add(Yp, t2, brw_imm_w(1));
SWAP_XY_AND_XPYP();
 }
 
@@ -1627,9 +1625,9 @@ brw_blorp_blit_program::manual_blend_bilinear(unsigned 
num_samples)
   s_is_zero = false;
 
   /* Compute pixel coordinates */
-  brw_ADD(func, vec16(x_sample_coords), Xp_f,
+  emit_add(vec16(x_sample_coords), Xp_f,
   brw_imm_f((float)(i  0x1) * (1.0 / key-x_scale)));
-  brw_ADD(func, vec16(y_sample_coords), Yp_f,
+  emit_add(vec16(y_sample_coords), Yp_f,
   brw_imm_f((float)((i  1)  0x1) * (1.0 / key-y_scale)));
   emit_mov(vec16(X), x_sample_coords);
   emit_mov(vec16(Y), y_sample_coords);
@@ -1670,7 +1668,7 @@ brw_blorp_blit_program::manual_blend_bilinear(unsigned 
num_samples)
   brw_FRC(func, vec16(t2_f), y_sample_coords);
   brw_MUL(func, vec16(t1_f), t1_f, brw_imm_f(key-x_scale));
   brw_MUL(func, vec16(t2_f), t2_f, brw_imm_f(key-x_scale * 
key-y_scale));
-  brw_ADD(func, vec16(t1_f), t1_f, t2_f);
+  emit_add(vec16(t1_f), t1_f, t2_f);
   emit_mov(vec16(S), t1_f);
 
   if (num_samples == 8) {
diff --git a/src/mesa/drivers/dri/i965/brw_blorp_blit_eu.h

[Mesa-dev] [v2 16/23] i965/blorp: wrap MUL (/brw_MUL(func, /emit_mul(/)

2014-01-22 Thread Topi Pohjolainen

Signed-off-by: Topi Pohjolainen topi.pohjolai...@intel.com
Reviewed-by: Paul Berry stereotype...@gmail.com
---
 src/mesa/drivers/dri/i965/brw_blorp_blit.cpp  | 18 +-
 src/mesa/drivers/dri/i965/brw_blorp_blit_eu.h |  7 +++
 2 files changed, 16 insertions(+), 9 deletions(-)

diff --git a/src/mesa/drivers/dri/i965/brw_blorp_blit.cpp 
b/src/mesa/drivers/dri/i965/brw_blorp_blit.cpp
index f9d1079..2b9224b 100644
--- a/src/mesa/drivers/dri/i965/brw_blorp_blit.cpp
+++ b/src/mesa/drivers/dri/i965/brw_blorp_blit.cpp
@@ -1387,16 +1387,16 @@ brw_blorp_blit_program::translate_dst_to_src()
emit_mov(Xp_f, X);
emit_mov(Yp_f, Y);
/* Scale and offset */
-   brw_MUL(func, X_f, Xp_f, x_transform.multiplier);
-   brw_MUL(func, Y_f, Yp_f, y_transform.multiplier);
+   emit_mul(X_f, Xp_f, x_transform.multiplier);
+   emit_mul(Y_f, Yp_f, y_transform.multiplier);
emit_add(X_f, X_f, x_transform.offset);
emit_add(Y_f, Y_f, y_transform.offset);
if (key-blit_scaled  key-blend) {
   /* Translate coordinates to lay out the samples in a rectangular  grid
* roughly corresponding to sample locations.
*/
-  brw_MUL(func, X_f, X_f, brw_imm_f(key-x_scale));
-  brw_MUL(func, Y_f, Y_f, brw_imm_f(key-y_scale));
+  emit_mul(X_f, X_f, brw_imm_f(key-x_scale));
+  emit_mul(Y_f, Y_f, brw_imm_f(key-y_scale));
  /* Adjust coordinates so that integers represent pixel centers rather
   * than pixel edges.
   */
@@ -1419,8 +1419,8 @@ brw_blorp_blit_program::translate_dst_to_src()
   /* Round the float coordinates down to nearest integer */
   brw_RNDD(func, Xp_f, X_f);
   brw_RNDD(func, Yp_f, Y_f);
-  brw_MUL(func, X_f, Xp_f, brw_imm_f(1 / key-x_scale));
-  brw_MUL(func, Y_f, Yp_f, brw_imm_f(1 / key-y_scale));
+  emit_mul(X_f, Xp_f, brw_imm_f(1 / key-x_scale));
+  emit_mul(Y_f, Yp_f, brw_imm_f(1 / key-y_scale));
   SWAP_XY_AND_XPYP();
} else if (!key-bilinear_filter) {
   /* Round the float coordinates down to nearest integer by moving to
@@ -1576,7 +1576,7 @@ brw_blorp_blit_program::manual_blend_average(unsigned 
num_samples)
   /* Scale the result down by a factor of num_samples */
   /* TODO: should use a smaller loop bound for non-RGBA formats */
   for (int j = 0; j  4; ++j) {
- brw_MUL(func, offset(texture_data[0], 2*j),
+ emit_mul(offset(texture_data[0], 2*j),
  offset(vec8(texture_data[0]), 2*j),
  brw_imm_f(1.0/num_samples));
   }
@@ -1666,8 +1666,8 @@ brw_blorp_blit_program::manual_blend_bilinear(unsigned 
num_samples)
   */
   brw_FRC(func, vec16(t1_f), x_sample_coords);
   brw_FRC(func, vec16(t2_f), y_sample_coords);
-  brw_MUL(func, vec16(t1_f), t1_f, brw_imm_f(key-x_scale));
-  brw_MUL(func, vec16(t2_f), t2_f, brw_imm_f(key-x_scale * 
key-y_scale));
+  emit_mul(vec16(t1_f), t1_f, brw_imm_f(key-x_scale));
+  emit_mul(vec16(t2_f), t2_f, brw_imm_f(key-x_scale * key-y_scale));
   emit_add(vec16(t1_f), t1_f, t2_f);
   emit_mov(vec16(S), t1_f);
 
diff --git a/src/mesa/drivers/dri/i965/brw_blorp_blit_eu.h 
b/src/mesa/drivers/dri/i965/brw_blorp_blit_eu.h
index 1100789..c083ad8 100644
--- a/src/mesa/drivers/dri/i965/brw_blorp_blit_eu.h
+++ b/src/mesa/drivers/dri/i965/brw_blorp_blit_eu.h
@@ -109,6 +109,13 @@ protected:
   brw_set_compression_control(func, BRW_COMPRESSION_COMPRESSED);
}
 
+   inline void emit_mul(const struct brw_reg dst,
+const struct brw_reg src1,
+const struct brw_reg src2)
+   {
+  brw_MUL(func, dst, src1, src2);
+   }
+
inline void emit_shr(const struct brw_reg dst,
 const struct brw_reg src1,
 const struct brw_reg src2)
-- 
1.8.3.1

___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/mesa-dev

[Mesa-dev] [v2 17/23] i965/blorp: wrap FRC (/brw_FRC(func, /emit_frc(/)

2014-01-22 Thread Topi Pohjolainen

Signed-off-by: Topi Pohjolainen topi.pohjolai...@intel.com
Reviewed-by: Paul Berry stereotype...@gmail.com
---
 src/mesa/drivers/dri/i965/brw_blorp_blit.cpp  | 8 
 src/mesa/drivers/dri/i965/brw_blorp_blit_eu.h | 6 ++
 2 files changed, 10 insertions(+), 4 deletions(-)

diff --git a/src/mesa/drivers/dri/i965/brw_blorp_blit.cpp 
b/src/mesa/drivers/dri/i965/brw_blorp_blit.cpp
index 2b9224b..4d0b882 100644
--- a/src/mesa/drivers/dri/i965/brw_blorp_blit.cpp
+++ b/src/mesa/drivers/dri/i965/brw_blorp_blit.cpp
@@ -1413,8 +1413,8 @@ brw_blorp_blit_program::translate_dst_to_src()
   /* Store the fractional parts to be used as bilinear interpolation
*  coefficients.
   */
-  brw_FRC(func, x_frac, X_f);
-  brw_FRC(func, y_frac, Y_f);
+  emit_frc(x_frac, X_f);
+  emit_frc(y_frac, Y_f);
 
   /* Round the float coordinates down to nearest integer */
   brw_RNDD(func, Xp_f, X_f);
@@ -1664,8 +1664,8 @@ brw_blorp_blit_program::manual_blend_bilinear(unsigned 
num_samples)
   *| 6 | 7 || 7 | 1 |
   *--
   */
-  brw_FRC(func, vec16(t1_f), x_sample_coords);
-  brw_FRC(func, vec16(t2_f), y_sample_coords);
+  emit_frc(vec16(t1_f), x_sample_coords);
+  emit_frc(vec16(t2_f), y_sample_coords);
   emit_mul(vec16(t1_f), t1_f, brw_imm_f(key-x_scale));
   emit_mul(vec16(t2_f), t2_f, brw_imm_f(key-x_scale * key-y_scale));
   emit_add(vec16(t1_f), t1_f, t2_f);
diff --git a/src/mesa/drivers/dri/i965/brw_blorp_blit_eu.h 
b/src/mesa/drivers/dri/i965/brw_blorp_blit_eu.h
index c083ad8..f22207d 100644
--- a/src/mesa/drivers/dri/i965/brw_blorp_blit_eu.h
+++ b/src/mesa/drivers/dri/i965/brw_blorp_blit_eu.h
@@ -137,6 +137,12 @@ protected:
   brw_OR(func, dst, src1, src2);
}
 
+   inline void emit_frc(const struct brw_reg dst,
+const struct brw_reg src)
+   {
+  brw_FRC(func, dst, src);
+   }
+
void *mem_ctx;
struct brw_compile func;
 };
-- 
1.8.3.1

___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/mesa-dev

[Mesa-dev] [v2 05/23] i965/blorp: move emission of texture lookup into eu-emitter

2014-01-22 Thread Topi Pohjolainen

Resolving of the hardware message type is moved into the
emitter also in preparation for switching to use fs_generator.
The generator wants to translate the high level op-code into
the message type and hence the emitter needs to know the
original op-code.

Signed-off-by: Topi Pohjolainen topi.pohjolai...@intel.com
Reviewed-by: Paul Berry stereotype...@gmail.com
---
 src/mesa/drivers/dri/i965/brw_blorp_blit.cpp| 34 +++
 src/mesa/drivers/dri/i965/brw_blorp_blit_eu.cpp | 43 +
 src/mesa/drivers/dri/i965/brw_blorp_blit_eu.h   |  5 +++
 3 files changed, 60 insertions(+), 22 deletions(-)

diff --git a/src/mesa/drivers/dri/i965/brw_blorp_blit.cpp 
b/src/mesa/drivers/dri/i965/brw_blorp_blit.cpp
index 03fabd6..034a82b 100644
--- a/src/mesa/drivers/dri/i965/brw_blorp_blit.cpp
+++ b/src/mesa/drivers/dri/i965/brw_blorp_blit.cpp
@@ -650,7 +650,7 @@ private:
void sample(struct brw_reg dst);
void texel_fetch(struct brw_reg dst);
void mcs_fetch();
-   void texture_lookup(struct brw_reg dst, GLuint msg_type,
+   void texture_lookup(struct brw_reg dst, enum opcode op,
const sampler_message_arg *args, int num_args);
void render_target_write();
 
@@ -1765,8 +1765,7 @@ brw_blorp_blit_program::sample(struct brw_reg dst)
   SAMPLER_MESSAGE_ARG_V_FLOAT
};
 
-   texture_lookup(dst, GEN5_SAMPLER_MESSAGE_SAMPLE, args,
-  ARRAY_SIZE(args));
+   texture_lookup(dst, SHADER_OPCODE_TEX, args, ARRAY_SIZE(args));
 }
 
 /**
@@ -1802,8 +1801,7 @@ brw_blorp_blit_program::texel_fetch(struct brw_reg dst)
 
switch (brw-gen) {
case 6:
-  texture_lookup(dst, GEN5_SAMPLER_MESSAGE_SAMPLE_LD, gen6_args,
- s_is_zero ? 2 : 5);
+  texture_lookup(dst, SHADER_OPCODE_TXF, gen6_args, s_is_zero ? 2 : 5);
   break;
case 7:
   switch (key-tex_layout) {
@@ -1819,16 +1817,16 @@ brw_blorp_blit_program::texel_fetch(struct brw_reg dst)
   * INTEL_MSAA_LAYOUT_CMS.
   */
   case INTEL_MSAA_LAYOUT_CMS:
- texture_lookup(dst, GEN7_SAMPLER_MESSAGE_SAMPLE_LD2DMS,
+ texture_lookup(dst, SHADER_OPCODE_TXF_CMS,
 gen7_ld2dms_args, ARRAY_SIZE(gen7_ld2dms_args));
  break;
   case INTEL_MSAA_LAYOUT_UMS:
- texture_lookup(dst, GEN7_SAMPLER_MESSAGE_SAMPLE_LD2DSS,
+ texture_lookup(dst, SHADER_OPCODE_TXF_UMS,
 gen7_ld2dss_args, ARRAY_SIZE(gen7_ld2dss_args));
  break;
   case INTEL_MSAA_LAYOUT_NONE:
  assert(s_is_zero);
- texture_lookup(dst, GEN5_SAMPLER_MESSAGE_SAMPLE_LD, gen7_ld_args,
+ texture_lookup(dst, SHADER_OPCODE_TXF, gen7_ld_args,
 ARRAY_SIZE(gen7_ld_args));
  break;
   }
@@ -1846,13 +1844,13 @@ brw_blorp_blit_program::mcs_fetch()
   SAMPLER_MESSAGE_ARG_U_INT,
   SAMPLER_MESSAGE_ARG_V_INT
};
-   texture_lookup(vec16(mcs_data), GEN7_SAMPLER_MESSAGE_SAMPLE_LD_MCS,
+   texture_lookup(vec16(mcs_data), SHADER_OPCODE_TXF_MCS,
   gen7_ld_mcs_args, ARRAY_SIZE(gen7_ld_mcs_args));
 }
 
 void
 brw_blorp_blit_program::texture_lookup(struct brw_reg dst,
-   GLuint msg_type,
+   enum opcode op,
const sampler_message_arg *args,
int num_args)
 {
@@ -1916,18 +1914,10 @@ brw_blorp_blit_program::texture_lookup(struct brw_reg 
dst,
   mrf.nr += 2;
}
 
-   brw_SAMPLE(func,
-  retype(dst, BRW_REGISTER_TYPE_UW) /* dest */,
-  base_mrf /* msg_reg_nr */,
-  brw_message_reg(base_mrf) /* src0 */,
-  BRW_BLORP_TEXTURE_BINDING_TABLE_INDEX,
-  0 /* sampler */,
-  msg_type,
-  8 /* response_length.  TODO: should be smaller for non-RGBA 
formats? */,
-  mrf.nr - base_mrf /* msg_length */,
-  0 /* header_present */,
-  BRW_SAMPLER_SIMD_MODE_SIMD16,
-  BRW_SAMPLER_RETURN_FORMAT_FLOAT32);
+   emit_texture_lookup(retype(dst, BRW_REGISTER_TYPE_UW) /* dest */,
+   op,
+   base_mrf,
+   mrf.nr - base_mrf /* msg_length */);
 }
 
 #undef X
diff --git a/src/mesa/drivers/dri/i965/brw_blorp_blit_eu.cpp 
b/src/mesa/drivers/dri/i965/brw_blorp_blit_eu.cpp
index 161c679..ff45b6a 100644
--- a/src/mesa/drivers/dri/i965/brw_blorp_blit_eu.cpp
+++ b/src/mesa/drivers/dri/i965/brw_blorp_blit_eu.cpp
@@ -23,6 +23,7 @@
 
 #include glsl/ralloc.h
 #include brw_blorp_blit_eu.h
+#include brw_blorp.h
 
 brw_blorp_eu_emitter::brw_blorp_eu_emitter(struct brw_context *brw)
: mem_ctx(ralloc_context(NULL))
@@ -91,3 +92,45 @@ brw_blorp_eu_emitter::emit_kill_if_outside_rect(const struct 
brw_reg x,
struct brw_instruction *inst = brw_AND(func, g1, f0, g1);
inst-header.mask_control = BRW_MASK_DISABLE;
 }
+
+void

[Mesa-dev] [v2] Blorp blit compiler to use FS LIR

2014-01-22 Thread Topi Pohjolainen

Here are the remaining patches rebased on top of the two small
fixes submitted earlier. Even though I included the entire
remaining series, I have revised only patches 1, 3, 4, 7, 19,
20, 21 and 23. These consist of manual changes due to the
aforementioned fixes, similar patching of gen8 as gen6/7
(earlier the gen8 generators were not upstreamed yet) and
finally the fixes and improvements suggested by Paul.

Topi Pohjolainen (23):
  i965/blorp: introduce separate eu-emitter for blit compiler
  i965/blorp: move emission of pixel kill into eu-emitter
  i965: rename tex_ms to tex_cms
  i965/fs: introduce non-compressed equivalent of tex_cms
  i965/blorp: move emission of texture lookup into eu-emitter
  i965/blorp: move emission of rt-write into eu-emitter
  i965/blorp: move emission of sample combining into eu-emitter
  i965/blorp: wrap emission of conditional assignment
  i965/blorp: wrap emission of if-equal-assignment
  i965/blorp: wrap MOV (/brw_MOV(func, /emit_mov(/)
  i965/blorp: wrap AND (/brw_AND(func, /emit_and(/)
  i965/blorp: wrap ADD (/brw_ADD(func, /emit_add(/)
  i965/blorp: wrap SHR (/brw_SHR(func, /emit_shr(/)
  i965/blorp: wrap SHL (/brw_SHL(func, /emit_shl(/)
  i965/blorp: wrap OR (/brw_OR(func, /emit_or(/)
  i965/blorp: wrap MUL (/brw_MUL(func, /emit_mul(/)
  i965/blorp: wrap FRC (/brw_FRC(func, /emit_frc(/)
  i965/blorp: wrap RNDD (/brw_RNDD(func, /emit_rndd(/)
  i965/blorp: wrap brw_IF/ELSE/ENDIF() into eu-emitter
  i965/fs: allow unit tests to dump the final patched assembly
  i965/fs: introduce blorp specific rt-write for fs_generator
  i965/fs: add support for BRW_OPCODE_AVG in fs_generator
  i965/blorp: switch eu-emitter to use FS IR and fs_generator

 src/mesa/drivers/dri/i965/Makefile.sources|   1 +
 src/mesa/drivers/dri/i965/brw_blorp_blit.cpp  | 523 --
 src/mesa/drivers/dri/i965/brw_blorp_blit_eu.cpp   | 136 ++
 src/mesa/drivers/dri/i965/brw_blorp_blit_eu.h | 190 
 src/mesa/drivers/dri/i965/brw_defines.h   |   4 +-
 src/mesa/drivers/dri/i965/brw_fs.cpp  |   2 +-
 src/mesa/drivers/dri/i965/brw_fs.h|   6 +-
 src/mesa/drivers/dri/i965/brw_fs_generator.cpp|  44 +-
 src/mesa/drivers/dri/i965/brw_fs_visitor.cpp  |   4 +-
 src/mesa/drivers/dri/i965/brw_shader.cpp  |  11 +-
 src/mesa/drivers/dri/i965/brw_vec4.cpp|   2 +-
 src/mesa/drivers/dri/i965/brw_vec4_generator.cpp  |   4 +-
 src/mesa/drivers/dri/i965/brw_vec4_visitor.cpp|   2 +-
 src/mesa/drivers/dri/i965/gen8_fs_generator.cpp   |   8 +-
 src/mesa/drivers/dri/i965/gen8_vec4_generator.cpp |   4 +-
 15 files changed, 589 insertions(+), 352 deletions(-)
 create mode 100644 src/mesa/drivers/dri/i965/brw_blorp_blit_eu.cpp
 create mode 100644 src/mesa/drivers/dri/i965/brw_blorp_blit_eu.h

-- 
1.8.3.1

___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/mesa-dev

[Mesa-dev] [v2 13/23] i965/blorp: wrap SHR (/brw_SHR(func, /emit_shr(/)

2014-01-22 Thread Topi Pohjolainen

Signed-off-by: Topi Pohjolainen topi.pohjolai...@intel.com
Reviewed-by: Paul Berry stereotype...@gmail.com
---
 src/mesa/drivers/dri/i965/brw_blorp_blit.cpp  | 24 
 src/mesa/drivers/dri/i965/brw_blorp_blit_eu.h |  7 +++
 2 files changed, 19 insertions(+), 12 deletions(-)

diff --git a/src/mesa/drivers/dri/i965/brw_blorp_blit.cpp 
b/src/mesa/drivers/dri/i965/brw_blorp_blit.cpp
index 392e7a5..715c716 100644
--- a/src/mesa/drivers/dri/i965/brw_blorp_blit.cpp
+++ b/src/mesa/drivers/dri/i965/brw_blorp_blit.cpp
@@ -1087,7 +1087,7 @@ brw_blorp_blit_program::compute_frag_coords()
  struct brw_reg t2_uw1 = retype(t2, BRW_REGISTER_TYPE_UW);
  struct brw_reg r0_ud1 = vec1(retype(R0, BRW_REGISTER_TYPE_UD));
  emit_and(t1_ud1, r0_ud1, brw_imm_ud(0xc0));
- brw_SHR(func, t1_ud1, t1_ud1, brw_imm_ud(5));
+ emit_shr(t1_ud1, t1_ud1, brw_imm_ud(5));
  emit_mov(vec16(t2_uw1), brw_imm_v(0x3210));
  emit_add(vec16(S), retype(t1_ud1, BRW_REGISTER_TYPE_UW),
   stride(t2_uw1, 1, 4, 0));
@@ -1164,7 +1164,7 @@ brw_blorp_blit_program::translate_tiling(bool 
old_tiled_w, bool new_tiled_w)
*   Y' = (Y  ~0b1)  1 | (X  0b1000)  2 | (X  0b10)  1
*/
   emit_and(t1, X, brw_imm_uw(0xfff4)); /* X  ~0b1011 */
-  brw_SHR(func, t1, t1, brw_imm_uw(1)); /* (X  ~0b1011)  1 */
+  emit_shr(t1, t1, brw_imm_uw(1)); /* (X  ~0b1011)  1 */
   emit_and(t2, Y, brw_imm_uw(1)); /* Y  0b1 */
   brw_SHL(func, t2, t2, brw_imm_uw(2)); /* (Y  0b1)  2 */
   brw_OR(func, t1, t1, t2); /* (X  ~0b1011)  1 | (Y  0b1)  2 */
@@ -1173,10 +1173,10 @@ brw_blorp_blit_program::translate_tiling(bool 
old_tiled_w, bool new_tiled_w)
   emit_and(t1, Y, brw_imm_uw(0xfffe)); /* Y  ~0b1 */
   brw_SHL(func, t1, t1, brw_imm_uw(1)); /* (Y  ~0b1)  1 */
   emit_and(t2, X, brw_imm_uw(8)); /* X  0b1000 */
-  brw_SHR(func, t2, t2, brw_imm_uw(2)); /* (X  0b1000)  2 */
+  emit_shr(t2, t2, brw_imm_uw(2)); /* (X  0b1000)  2 */
   brw_OR(func, t1, t1, t2); /* (Y  ~0b1)  1 | (X  0b1000)  2 */
   emit_and(t2, X, brw_imm_uw(2)); /* X  0b10 */
-  brw_SHR(func, t2, t2, brw_imm_uw(1)); /* (X  0b10)  1 */
+  emit_shr(t2, t2, brw_imm_uw(1)); /* (X  0b10)  1 */
   brw_OR(func, Yp, t1, t2);
   SWAP_XY_AND_XPYP();
} else {
@@ -1198,9 +1198,9 @@ brw_blorp_blit_program::translate_tiling(bool 
old_tiled_w, bool new_tiled_w)
   emit_and(t2, X, brw_imm_uw(1)); /* X  0b1 */
   brw_OR(func, Xp, t1, t2);
   emit_and(t1, Y, brw_imm_uw(0xfffc)); /* Y  ~0b11 */
-  brw_SHR(func, t1, t1, brw_imm_uw(1)); /* (Y  ~0b11)  1 */
+  emit_shr(t1, t1, brw_imm_uw(1)); /* (Y  ~0b11)  1 */
   emit_and(t2, X, brw_imm_uw(4)); /* X  0b100 */
-  brw_SHR(func, t2, t2, brw_imm_uw(2)); /* (X  0b100)  2 */
+  emit_shr(t2, t2, brw_imm_uw(2)); /* (X  0b100)  2 */
   brw_OR(func, Yp, t1, t2);
   SWAP_XY_AND_XPYP();
}
@@ -1331,16 +1331,16 @@ brw_blorp_blit_program::decode_msaa(unsigned 
num_samples,
   * S = (Y  0b10) | (X  0b10)  1
   */
  emit_and(t1, X, brw_imm_uw(0xfffc)); /* X  ~0b11 */
- brw_SHR(func, t1, t1, brw_imm_uw(1)); /* (X  ~0b11)  1 */
+ emit_shr(t1, t1, brw_imm_uw(1)); /* (X  ~0b11)  1 */
  emit_and(t2, X, brw_imm_uw(1)); /* X  0b1 */
  brw_OR(func, Xp, t1, t2);
  emit_and(t1, Y, brw_imm_uw(0xfffc)); /* Y  ~0b11 */
- brw_SHR(func, t1, t1, brw_imm_uw(1)); /* (Y  ~0b11)  1 */
+ emit_shr(t1, t1, brw_imm_uw(1)); /* (Y  ~0b11)  1 */
  emit_and(t2, Y, brw_imm_uw(1)); /* Y  0b1 */
  brw_OR(func, Yp, t1, t2);
  emit_and(t1, Y, brw_imm_uw(2)); /* Y  0b10 */
  emit_and(t2, X, brw_imm_uw(2)); /* X  0b10 */
- brw_SHR(func, t2, t2, brw_imm_uw(1)); /* (X  0b10)  1 */
+ emit_shr(t2, t2, brw_imm_uw(1)); /* (X  0b10)  1 */
  brw_OR(func, S, t1, t2);
  break;
   case 8:
@@ -1350,18 +1350,18 @@ brw_blorp_blit_program::decode_msaa(unsigned 
num_samples,
   * S = (X  0b100) | (Y  0b10) | (X  0b10)  1
   */
  emit_and(t1, X, brw_imm_uw(0xfff8)); /* X  ~0b111 */
- brw_SHR(func, t1, t1, brw_imm_uw(2)); /* (X  ~0b111)  2 */
+ emit_shr(t1, t1, brw_imm_uw(2)); /* (X  ~0b111)  2 */
  emit_and(t2, X, brw_imm_uw(1)); /* X  0b1 */
  brw_OR(func, Xp, t1, t2);
  emit_and(t1, Y, brw_imm_uw(0xfffc)); /* Y  ~0b11 */
- brw_SHR(func, t1, t1, brw_imm_uw(1)); /* (Y  ~0b11)  1 */
+ emit_shr(t1, t1, brw_imm_uw(1)); /* (Y  ~0b11)  1 */
  emit_and(t2, Y, brw_imm_uw(1)); /* Y  0b1 */
  brw_OR(func, Yp, t1, t2);
  emit_and(t1, X, brw_imm_uw(4)); /* X  0b100 */
  emit_and(t2, Y, brw_imm_uw(2)); /* Y  0b10 */
  brw_OR(func, t1, t1, t2); /* (X  0b100) | (Y  0b10) */
  emit_and(t2, X, brw_imm_uw(2)); /* X  0b10 */
- brw_SHR(func, t2, t2, brw_imm_uw(1));

[Mesa-dev] [v2 22/23] i965/fs: add support for BRW_OPCODE_AVG in fs_generator

2014-01-22 Thread Topi Pohjolainen

Needed for compiling blorp blit programs.

Signed-off-by: Topi Pohjolainen topi.pohjolai...@intel.com
Reviewed-by: Paul Berry stereotype...@gmail.com
---
 src/mesa/drivers/dri/i965/brw_fs_generator.cpp | 3 +++
 1 file changed, 3 insertions(+)

diff --git a/src/mesa/drivers/dri/i965/brw_fs_generator.cpp 
b/src/mesa/drivers/dri/i965/brw_fs_generator.cpp
index 29050c9..9d647fb 100644
--- a/src/mesa/drivers/dri/i965/brw_fs_generator.cpp
+++ b/src/mesa/drivers/dri/i965/brw_fs_generator.cpp
@@ -1421,6 +1421,9 @@ fs_generator::generate_code(exec_list *instructions, FILE 
*dump_file)
   case BRW_OPCODE_MUL:
 brw_MUL(p, dst, src[0], src[1]);
 break;
+  case BRW_OPCODE_AVG:
+brw_AVG(p, dst, src[0], src[1]);
+break;
   case BRW_OPCODE_MACH:
 brw_set_acc_write_control(p, 1);
 brw_MACH(p, dst, src[0], src[1]);
-- 
1.8.3.1

___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/mesa-dev

[Mesa-dev] [v2 11/23] i965/blorp: wrap AND (/brw_AND(func, /emit_and(/)

2014-01-22 Thread Topi Pohjolainen

Signed-off-by: Topi Pohjolainen topi.pohjolai...@intel.com
Reviewed-by: Paul Berry stereotype...@gmail.com
---
 src/mesa/drivers/dri/i965/brw_blorp_blit.cpp  | 78 +--
 src/mesa/drivers/dri/i965/brw_blorp_blit_eu.h |  7 +++
 2 files changed, 46 insertions(+), 39 deletions(-)

diff --git a/src/mesa/drivers/dri/i965/brw_blorp_blit.cpp 
b/src/mesa/drivers/dri/i965/brw_blorp_blit.cpp
index ff32e25..5833d83 100644
--- a/src/mesa/drivers/dri/i965/brw_blorp_blit.cpp
+++ b/src/mesa/drivers/dri/i965/brw_blorp_blit.cpp
@@ -1086,7 +1086,7 @@ brw_blorp_blit_program::compute_frag_coords()
  struct brw_reg t1_ud1 = vec1(retype(t1, BRW_REGISTER_TYPE_UD));
  struct brw_reg t2_uw1 = retype(t2, BRW_REGISTER_TYPE_UW);
  struct brw_reg r0_ud1 = vec1(retype(R0, BRW_REGISTER_TYPE_UD));
- brw_AND(func, t1_ud1, r0_ud1, brw_imm_ud(0xc0));
+ emit_and(t1_ud1, r0_ud1, brw_imm_ud(0xc0));
  brw_SHR(func, t1_ud1, t1_ud1, brw_imm_ud(5));
  emit_mov(vec16(t2_uw1), brw_imm_v(0x3210));
  brw_ADD(func, vec16(S), retype(t1_ud1, BRW_REGISTER_TYPE_UW),
@@ -1165,19 +1165,19 @@ brw_blorp_blit_program::translate_tiling(bool 
old_tiled_w, bool new_tiled_w)
*   X' = (X  ~0b1011)  1 | (Y  0b1)  2 | X  0b1 (4)
*   Y' = (Y  ~0b1)  1 | (X  0b1000)  2 | (X  0b10)  1
*/
-  brw_AND(func, t1, X, brw_imm_uw(0xfff4)); /* X  ~0b1011 */
+  emit_and(t1, X, brw_imm_uw(0xfff4)); /* X  ~0b1011 */
   brw_SHR(func, t1, t1, brw_imm_uw(1)); /* (X  ~0b1011)  1 */
-  brw_AND(func, t2, Y, brw_imm_uw(1)); /* Y  0b1 */
+  emit_and(t2, Y, brw_imm_uw(1)); /* Y  0b1 */
   brw_SHL(func, t2, t2, brw_imm_uw(2)); /* (Y  0b1)  2 */
   brw_OR(func, t1, t1, t2); /* (X  ~0b1011)  1 | (Y  0b1)  2 */
-  brw_AND(func, t2, X, brw_imm_uw(1)); /* X  0b1 */
+  emit_and(t2, X, brw_imm_uw(1)); /* X  0b1 */
   brw_OR(func, Xp, t1, t2);
-  brw_AND(func, t1, Y, brw_imm_uw(0xfffe)); /* Y  ~0b1 */
+  emit_and(t1, Y, brw_imm_uw(0xfffe)); /* Y  ~0b1 */
   brw_SHL(func, t1, t1, brw_imm_uw(1)); /* (Y  ~0b1)  1 */
-  brw_AND(func, t2, X, brw_imm_uw(8)); /* X  0b1000 */
+  emit_and(t2, X, brw_imm_uw(8)); /* X  0b1000 */
   brw_SHR(func, t2, t2, brw_imm_uw(2)); /* (X  0b1000)  2 */
   brw_OR(func, t1, t1, t2); /* (Y  ~0b1)  1 | (X  0b1000)  2 */
-  brw_AND(func, t2, X, brw_imm_uw(2)); /* X  0b10 */
+  emit_and(t2, X, brw_imm_uw(2)); /* X  0b10 */
   brw_SHR(func, t2, t2, brw_imm_uw(1)); /* (X  0b10)  1 */
   brw_OR(func, Yp, t1, t2);
   SWAP_XY_AND_XPYP();
@@ -1188,20 +1188,20 @@ brw_blorp_blit_program::translate_tiling(bool 
old_tiled_w, bool new_tiled_w)
* X' = (X  ~0b101)  1 | (Y  0b10)  2 | (Y  0b1)  1 | X  0b1
* Y' = (Y  ~0b11)  1 | (X  0b100)  2
*/
-  brw_AND(func, t1, X, brw_imm_uw(0xfffa)); /* X  ~0b101 */
+  emit_and(t1, X, brw_imm_uw(0xfffa)); /* X  ~0b101 */
   brw_SHL(func, t1, t1, brw_imm_uw(1)); /* (X  ~0b101)  1 */
-  brw_AND(func, t2, Y, brw_imm_uw(2)); /* Y  0b10 */
+  emit_and(t2, Y, brw_imm_uw(2)); /* Y  0b10 */
   brw_SHL(func, t2, t2, brw_imm_uw(2)); /* (Y  0b10)  2 */
   brw_OR(func, t1, t1, t2); /* (X  ~0b101)  1 | (Y  0b10)  2 */
-  brw_AND(func, t2, Y, brw_imm_uw(1)); /* Y  0b1 */
+  emit_and(t2, Y, brw_imm_uw(1)); /* Y  0b1 */
   brw_SHL(func, t2, t2, brw_imm_uw(1)); /* (Y  0b1)  1 */
   brw_OR(func, t1, t1, t2); /* (X  ~0b101)  1 | (Y  0b10)  2
 | (Y  0b1)  1 */
-  brw_AND(func, t2, X, brw_imm_uw(1)); /* X  0b1 */
+  emit_and(t2, X, brw_imm_uw(1)); /* X  0b1 */
   brw_OR(func, Xp, t1, t2);
-  brw_AND(func, t1, Y, brw_imm_uw(0xfffc)); /* Y  ~0b11 */
+  emit_and(t1, Y, brw_imm_uw(0xfffc)); /* Y  ~0b11 */
   brw_SHR(func, t1, t1, brw_imm_uw(1)); /* (Y  ~0b11)  1 */
-  brw_AND(func, t2, X, brw_imm_uw(4)); /* X  0b100 */
+  emit_and(t2, X, brw_imm_uw(4)); /* X  0b100 */
   brw_SHR(func, t2, t2, brw_imm_uw(2)); /* (X  0b100)  2 */
   brw_OR(func, Yp, t1, t2);
   SWAP_XY_AND_XPYP();
@@ -1243,22 +1243,22 @@ brw_blorp_blit_program::encode_msaa(unsigned 
num_samples,
   *   where X' = (X  ~0b1)  1 | (S  0b1)  1 | (X  0b1)
   * Y' = (Y  ~0b1)  1 | (S  0b10) | (Y  0b1)
   */
- brw_AND(func, t1, X, brw_imm_uw(0xfffe)); /* X  ~0b1 */
+ emit_and(t1, X, brw_imm_uw(0xfffe)); /* X  ~0b1 */
  if (!s_is_zero) {
-brw_AND(func, t2, S, brw_imm_uw(1)); /* S  0b1 */
+emit_and(t2, S, brw_imm_uw(1)); /* S  0b1 */
 brw_OR(func, t1, t1, t2); /* (X  ~0b1) | (S  0b1) */
  }
  brw_SHL(func, t1, t1, brw_imm_uw(1)); /* (X  ~0b1)  1
| (S  0b1)  1 */
- brw_AND(func, t2, X, brw_imm_uw(1)); /* X  0b1 */
+ emit_and(t2, X, brw_imm_uw(1)); /* X  0b1 */
  brw_OR(func, Xp, t1, t2);
-

[Mesa-dev] [PATCH RFC 04/11] glsl: add dead branch analysis

2014-01-22 Thread Connor Abbott

Dead branch analysis determines when the then or else branches of an
if statement will always terminate in a loop jump or return statement,
and hence once we enter that branch we will never get to the statements
after the if. This is useful for determining the dominance tree, which
is needed for the conversion to SSA, as well as various other SSA-based
optimizations.
---
 src/glsl/Makefile.sources |   1 +
 src/glsl/ir_dead_branches.cpp | 226 ++
 src/glsl/ir_dead_branches.h   |  78 +++
 3 files changed, 305 insertions(+)
 create mode 100644 src/glsl/ir_dead_branches.cpp
 create mode 100644 src/glsl/ir_dead_branches.h

diff --git a/src/glsl/Makefile.sources b/src/glsl/Makefile.sources
index e69c1ac..a43bfa7 100644
--- a/src/glsl/Makefile.sources
+++ b/src/glsl/Makefile.sources
@@ -33,6 +33,7 @@ LIBGLSL_FILES = \
$(GLSL_SRCDIR)/ir_clone.cpp \
$(GLSL_SRCDIR)/ir_constant_expression.cpp \
$(GLSL_SRCDIR)/ir.cpp \
+   $(GLSL_SRCDIR)/ir_dead_branches.cpp \
$(GLSL_SRCDIR)/ir_equals.cpp \
$(GLSL_SRCDIR)/ir_expression_flattening.cpp \
$(GLSL_SRCDIR)/ir_function_can_inline.cpp \
diff --git a/src/glsl/ir_dead_branches.cpp b/src/glsl/ir_dead_branches.cpp
new file mode 100644
index 000..f86f009
--- /dev/null
+++ b/src/glsl/ir_dead_branches.cpp
@@ -0,0 +1,226 @@
+/*
+ * Copyright © 2013 Connor Abbott (con...@abbott.cx)
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the Software),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the next
+ * paragraph) shall be included in all copies or substantial portions of the
+ * Software.
+ *
+ * THE SOFTWARE IS PROVIDED AS IS, WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+ * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
+ * DEALINGS IN THE SOFTWARE.
+ */
+
+#include ir.h
+#include ir_visitor.h
+#include ir_dead_branches.h
+#include main/hash_table.h
+
+/**
+ * \file ir_dead_branches.h
+ *
+ * Provides a visitor which determines, for each if instruction, whether
+ * control will never flow the from the then-block or else-block
+ * to the next instruction due to jump statements (break, continue, return,
+ * discard).
+ */
+
+/*
+ * Note that we keep track of whether a given branch is dead due to a return-
+ * like statement (return or discard) or due to a loop jump. For example,
+ * imagine you have a control flow like the following:
+ *
+ * if (...) {
+ *while (...) {
+ *  if (...) {
+ * ...
+ * continue;
+ *  } else {
+ * ...
+ * return;
+ *  }
+ *}
+ * }
+ *
+ * After processing the inner if statement, we see that both branches are dead;
+ * normally, this would result in declaring the then-branch of the outer if
+ * statement dead, but in this case, there is a loop in between the inner and
+ * outer if statement, so the branch can in fact be taken. However, if the
+ * continue statement were a discard or return instead, then control would
+ * always leave the function as soon as the while loop was reached, so in this
+ * case the dead branch must skip across the loop. So we keep track of 
whether
+ * the immediately enclosing control statement is a loop (in_loop), and if we
+ * are, then after processing an if statement, we only propagate the dead 
branch
+ * through the loop if both branches of the inner if statement are dead due to
+ * a return or discard statement (then_dead_return and else_dead_return).
+ */
+
+ir_dead_branches_visitor::ir_dead_branches_visitor()
+{
+   this-ht = _mesa_hash_table_create(NULL, _mesa_key_pointer_equal);
+   this-in_loop = false;
+   this-outer_if = NULL;
+   this-in_then = false;
+}
+
+static void
+free_entry(struct hash_entry *entry)
+{
+   ir_dead_branches *dead_branches = (ir_dead_branches *) entry-data;
+   delete dead_branches;
+}
+
+ir_dead_branches_visitor::~ir_dead_branches_visitor()
+{
+   _mesa_hash_table_destroy(this-ht, free_entry);
+}
+
+ir_dead_branches::ir_dead_branches(ir_if *ir)
+{
+   this-ir = ir;
+   this-then_dead = false;
+   this-else_dead = false;
+   this-then_dead_return = false;
+   this-else_dead_return = false;
+}
+
+ir_dead_branches *
+ir_dead_branches_visitor::get_dead_branches(ir_if *ir)
+{
+   assert(ir);
+
+   struct

[Mesa-dev] [v2 19/23] i965/blorp: wrap brw_IF/ELSE/ENDIF() into eu-emitter

2014-01-22 Thread Topi Pohjolainen

v2 (Paul): renamed emit_if() to emit_cmp_if()

Signed-off-by: Topi Pohjolainen topi.pohjolai...@intel.com
Reviewed-by: Paul Berry stereotype...@gmail.com
---
 src/mesa/drivers/dri/i965/brw_blorp_blit.cpp  | 14 +-
 src/mesa/drivers/dri/i965/brw_blorp_blit_eu.h | 18 ++
 2 files changed, 23 insertions(+), 9 deletions(-)

diff --git a/src/mesa/drivers/dri/i965/brw_blorp_blit.cpp 
b/src/mesa/drivers/dri/i965/brw_blorp_blit.cpp
index aae0704..6454d2a 100644
--- a/src/mesa/drivers/dri/i965/brw_blorp_blit.cpp
+++ b/src/mesa/drivers/dri/i965/brw_blorp_blit.cpp
@@ -1548,9 +1548,7 @@ brw_blorp_blit_program::manual_blend_average(unsigned 
num_samples)
   * Since we have already sampled from sample 0, all we need to do is
   * skip the remaining fetches and averaging if MCS is zero.
   */
- brw_CMP(func, vec16(brw_null_reg()), BRW_CONDITIONAL_NZ,
- mcs_data, brw_imm_ud(0));
- brw_IF(func, BRW_EXECUTE_16);
+ emit_cmp_if(BRW_CONDITIONAL_NZ, mcs_data, brw_imm_ud(0));
   }
 
   /* Do count_trailing_one_bits(i) times */
@@ -1583,7 +1581,7 @@ brw_blorp_blit_program::manual_blend_average(unsigned 
num_samples)
}
 
if (key-tex_layout == INTEL_MSAA_LAYOUT_CMS)
-  brw_ENDIF(func);
+  emit_endif();
 }
 
 void
@@ -1673,23 +1671,21 @@ brw_blorp_blit_program::manual_blend_bilinear(unsigned 
num_samples)
 
   if (num_samples == 8) {
  /* Map the sample index to a sample number */
- brw_CMP(func, vec16(brw_null_reg()), BRW_CONDITIONAL_L,
- S, brw_imm_d(4));
- brw_IF(func, BRW_EXECUTE_16);
+ emit_cmp_if(BRW_CONDITIONAL_L, S, brw_imm_d(4));
  {
 emit_mov(vec16(t2), brw_imm_d(5));
 emit_if_eq_mov(S, 1, vec16(t2), 2);
 emit_if_eq_mov(S, 2, vec16(t2), 4);
 emit_if_eq_mov(S, 3, vec16(t2), 6);
  }
- brw_ELSE(func);
+ emit_else();
  {
 emit_mov(vec16(t2), brw_imm_d(0));
 emit_if_eq_mov(S, 5, vec16(t2), 3);
 emit_if_eq_mov(S, 6, vec16(t2), 7);
 emit_if_eq_mov(S, 7, vec16(t2), 1);
  }
- brw_ENDIF(func);
+ emit_endif();
  emit_mov(vec16(S), t2);
   }
   texel_fetch(texture_data[i]);
diff --git a/src/mesa/drivers/dri/i965/brw_blorp_blit_eu.h 
b/src/mesa/drivers/dri/i965/brw_blorp_blit_eu.h
index 07c96b0..736f5b0 100644
--- a/src/mesa/drivers/dri/i965/brw_blorp_blit_eu.h
+++ b/src/mesa/drivers/dri/i965/brw_blorp_blit_eu.h
@@ -149,6 +149,24 @@ protected:
   brw_RNDD(func, dst, src);
}
 
+   inline void emit_cmp_if(int op,
+   const struct brw_reg x,
+   const struct brw_reg y)
+   {
+  brw_CMP(func, vec16(brw_null_reg()), op, x, y);
+  brw_IF(func, BRW_EXECUTE_16);
+   }
+
+   inline void emit_else(void)
+   {
+  brw_ELSE(func);
+   }
+
+   inline void emit_endif(void)
+   {
+  brw_ENDIF(func);
+   }
+
void *mem_ctx;
struct brw_compile func;
 };
-- 
1.8.3.1

___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/mesa-dev

[Mesa-dev] [PATCH RFC 05/11] glsl: add loop jump visitor

2014-01-22 Thread Connor Abbott

This visitor will allow us to determine all the loop jumps that
correspond to each loop. In SSA form, each input to a phi node is
associated with a predecessor basic block. In the case of phi nodes
at the beginning and end of loops, these predecessor blocks will
include all blocks that end with a loop_jump (break or continue), and
so in order to insert phi nodes we must know all the loop_jump
instructions that correspond to each loop.
---
 src/glsl/Makefile.sources  |   1 +
 src/glsl/ir_loop_jumps.cpp | 129 +
 src/glsl/ir_loop_jumps.h   |  71 +
 3 files changed, 201 insertions(+)
 create mode 100644 src/glsl/ir_loop_jumps.cpp
 create mode 100644 src/glsl/ir_loop_jumps.h

diff --git a/src/glsl/Makefile.sources b/src/glsl/Makefile.sources
index a43bfa7..869158a 100644
--- a/src/glsl/Makefile.sources
+++ b/src/glsl/Makefile.sources
@@ -42,6 +42,7 @@ LIBGLSL_FILES = \
$(GLSL_SRCDIR)/ir_hierarchical_visitor.cpp \
$(GLSL_SRCDIR)/ir_hv_accept.cpp \
$(GLSL_SRCDIR)/ir_import_prototypes.cpp \
+   $(GLSL_SRCDIR)/ir_loop_jumps.cpp \
$(GLSL_SRCDIR)/ir_print_visitor.cpp \
$(GLSL_SRCDIR)/ir_reader.cpp \
$(GLSL_SRCDIR)/ir_rvalue_visitor.cpp \
diff --git a/src/glsl/ir_loop_jumps.cpp b/src/glsl/ir_loop_jumps.cpp
new file mode 100644
index 000..1386340
--- /dev/null
+++ b/src/glsl/ir_loop_jumps.cpp
@@ -0,0 +1,129 @@
+/*
+ * Copyright © 2013 Connor Abbott (con...@abbott.cx)
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the Software),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the next
+ * paragraph) shall be included in all copies or substantial portions of the
+ * Software.
+ *
+ * THE SOFTWARE IS PROVIDED AS IS, WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+ * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
+ * DEALINGS IN THE SOFTWARE.
+ */
+
+#include ir.h
+#include ir_visitor.h
+#include ir_loop_jumps.h
+#include main/hash_table.h
+
+/**
+ * \file ir_loop_jumps.h
+ *
+ * Provides a visitor that collects all the continue and break statements for
+ * each loop.
+ */
+
+ir_loop_jumps::ir_loop_jumps(ir_loop *loop) : loop(loop)
+{
+   this-mem_ctx = ralloc_context(NULL);
+}
+
+ir_loop_jumps::~ir_loop_jumps()
+{
+   ralloc_free(this-mem_ctx);
+}
+
+void
+ir_loop_jumps::add_continue(ir_loop_jump *ir)
+{
+   ir_loop_jump_entry *entry = new(this-mem_ctx) ir_loop_jump_entry();
+   entry-ir = ir;
+   this-continues.push_tail(entry);
+}
+
+void
+ir_loop_jumps::add_break(ir_loop_jump *ir)
+{
+   ir_loop_jump_entry *entry = new(this-mem_ctx) ir_loop_jump_entry();
+   entry-ir = ir;
+   this-breaks.push_tail(entry);
+}
+
+ir_loop_jumps_visitor::ir_loop_jumps_visitor()
+{
+   this-ht = _mesa_hash_table_create(NULL, _mesa_key_pointer_equal);
+   this-outer_loop = NULL;
+}
+
+static void
+free_entry(struct hash_entry *entry)
+{
+   ir_loop_jumps *loop_jumps = (ir_loop_jumps *) entry-data;
+   delete loop_jumps;
+}
+
+ir_loop_jumps_visitor::~ir_loop_jumps_visitor()
+{
+   _mesa_hash_table_destroy(this-ht, free_entry);
+}
+
+ir_visitor_status
+ir_loop_jumps_visitor::visit_enter(ir_loop *ir)
+{
+   ir_loop_jumps *loop_jumps = new ir_loop_jumps(ir);
+   _mesa_hash_table_insert(this-ht, _mesa_hash_pointer(ir), ir, loop_jumps);
+
+   ir_loop *old_outer_loop = this-outer_loop;
+   this-outer_loop = ir;
+
+   visit_list_elements(this, ir-body_instructions);
+
+   this-outer_loop = old_outer_loop;
+   return visit_continue_with_parent;
+}
+
+ir_visitor_status
+ir_loop_jumps_visitor::visit(ir_loop_jump *ir)
+{
+   ir_loop_jumps *loop_jumps = this-get_loop_jumps(this-outer_loop);
+   switch (ir-mode) {
+  case ir_loop_jump::jump_break:
+loop_jumps-add_break(ir);
+break;
+
+  case ir_loop_jump::jump_continue:
+loop_jumps-add_continue(ir);
+break;
+
+  default:
+assert(!unknown loop jump mode);
+break;
+   }
+
+   return visit_continue;
+}
+
+ir_loop_jumps *
+ir_loop_jumps_visitor::get_loop_jumps(ir_loop *ir)
+{
+   assert(ir);
+
+   struct hash_entry *e = _mesa_hash_table_search(this-ht,
+ _mesa_hash_pointer(ir),
+ ir);
+   if (e)
+

[Mesa-dev] [PATCH RFC 09/11] glsl: add pass to convert GLSL IR to SSA form

2014-01-22 Thread Connor Abbott

opt_to_ssa will convert temporaries and local variables to SSA form,
although for now it can't handle array and record dereferences.
---
 src/glsl/Makefile.sources  |1 +
 src/glsl/ir_optimization.h |2 +
 src/glsl/opt_to_ssa.cpp| 1155 
 3 files changed, 1158 insertions(+)
 create mode 100644 src/glsl/opt_to_ssa.cpp

diff --git a/src/glsl/Makefile.sources b/src/glsl/Makefile.sources
index 869158a..961784b 100644
--- a/src/glsl/Makefile.sources
+++ b/src/glsl/Makefile.sources
@@ -100,6 +100,7 @@ LIBGLSL_FILES = \
$(GLSL_SRCDIR)/opt_redundant_jumps.cpp \
$(GLSL_SRCDIR)/opt_structure_splitting.cpp \
$(GLSL_SRCDIR)/opt_swizzle_swizzle.cpp \
+   $(GLSL_SRCDIR)/opt_to_ssa.cpp \
$(GLSL_SRCDIR)/opt_tree_grafting.cpp \
$(GLSL_SRCDIR)/opt_vectorize.cpp \
$(GLSL_SRCDIR)/s_expression.cpp \
diff --git a/src/glsl/ir_optimization.h b/src/glsl/ir_optimization.h
index 055d655..92c8b57 100644
--- a/src/glsl/ir_optimization.h
+++ b/src/glsl/ir_optimization.h
@@ -65,6 +65,8 @@ enum lower_packing_builtins_op {
LOWER_UNPACK_UNORM_4x8   = 0x0800
 };
 
+void convert_to_ssa(exec_list *instructions);
+
 bool do_common_optimization(exec_list *ir, bool linked,
bool uniform_locations_assigned,
unsigned max_unroll_iterations,
diff --git a/src/glsl/opt_to_ssa.cpp b/src/glsl/opt_to_ssa.cpp
new file mode 100644
index 000..c1044f6
--- /dev/null
+++ b/src/glsl/opt_to_ssa.cpp
@@ -0,0 +1,1155 @@
+/*
+ * Copyright © 2013 Connor Abbott (con...@abbott.cx)
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the Software),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the next
+ * paragraph) shall be included in all copies or substantial portions of the
+ * Software.
+ *
+ * THE SOFTWARE IS PROVIDED AS IS, WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+ * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
+ * DEALINGS IN THE SOFTWARE.
+ */
+
+#include ir.h
+#include ir_optimization.h
+#include ir_hierarchical_visitor.h
+#include ir_dead_branches.h
+#include ir_loop_jumps.h
+#include ir_builder.h
+#include ralloc.h
+#include glsl_types.h
+#include main/hash_table.h
+
+/**
+ * \file opt_to_ssa.cpp
+ *
+ * This pass will convert all temporaries and local variables to SSA
+ * temporaries, except for variables which are derefenced as an array or
+ * structure (which we cannot support in SSA form). The algorithm is loosely
+ * based on Efficiently Computing Static Single Assignment Form and the
+ * Control Dependence Graph by Cytron et. al., although there are a number of
+ * differences caused by the fact that we are operating on a hierachical tree
+ * of if's and loops instead of the graph of basic blocks that Cytron et. al.
+ * assume. In particular, instead of explicitly constructing the dominance 
tree,
+ * we use an approximation simple enough that all the information we need can
+ * be found on the fly. The approximation we use is this:
+ *
+ * - The instruction before an if statement dominates the then and else 
branches
+ * as well as the instructions after the branch, unless one of the branches is
+ * dead. If, for example, the then branch is dead, then the instruction before
+ * the if statement dominates the then branch and the else branch, and the else
+ * branch dominates the instruction after the if statement because if we get
+ * past the branch then we know we must have gone through the else branch.
+ *
+ * - The instruction before the loop dominates the instructions inside the loop
+ * as well as the instructions after the loop. Here is where the approximation
+ * lies: really, since the loop is guarenteed to execute at least once, the
+ * instructions after the loop can potentially be dominated by an instruction
+ * inside the loop. Computing that instruction, though, would be complicated,
+ * and in the end it doesn't hurt much if we ignore that detail. In the end, we
+ * may have some phi nodes where all the sources are the same, but these can
+ * easily be optimized away.
+ *
+ * The iterated dominance frontier of an instruction can then be calculated by
+ * walking up the stack of control flow elements (if's and loops) that

[Mesa-dev] [v2 09/23] i965/blorp: wrap emission of if-equal-assignment

2014-01-22 Thread Topi Pohjolainen

Signed-off-by: Topi Pohjolainen topi.pohjolai...@intel.com
Reviewed-by: Paul Berry stereotype...@gmail.com
---
 src/mesa/drivers/dri/i965/brw_blorp_blit.cpp  | 30 ++-
 src/mesa/drivers/dri/i965/brw_blorp_blit_eu.h |  6 ++
 2 files changed, 12 insertions(+), 24 deletions(-)

diff --git a/src/mesa/drivers/dri/i965/brw_blorp_blit.cpp 
b/src/mesa/drivers/dri/i965/brw_blorp_blit.cpp
index 356bb92..da10cf0 100644
--- a/src/mesa/drivers/dri/i965/brw_blorp_blit.cpp
+++ b/src/mesa/drivers/dri/i965/brw_blorp_blit.cpp
@@ -1682,34 +1682,16 @@ brw_blorp_blit_program::manual_blend_bilinear(unsigned 
num_samples)
  brw_IF(func, BRW_EXECUTE_16);
  {
 brw_MOV(func, vec16(t2), brw_imm_d(5));
-brw_CMP(func, vec16(brw_null_reg()), BRW_CONDITIONAL_EQ,
-S, brw_imm_d(1));
-brw_MOV(func, vec16(t2), brw_imm_d(2));
-brw_set_predicate_control(func, BRW_PREDICATE_NONE);
-brw_CMP(func, vec16(brw_null_reg()), BRW_CONDITIONAL_EQ,
-S, brw_imm_d(2));
-brw_MOV(func, vec16(t2), brw_imm_d(4));
-brw_set_predicate_control(func, BRW_PREDICATE_NONE);
-brw_CMP(func, vec16(brw_null_reg()), BRW_CONDITIONAL_EQ,
-S, brw_imm_d(3));
-brw_MOV(func, vec16(t2), brw_imm_d(6));
-brw_set_predicate_control(func, BRW_PREDICATE_NONE);
+emit_if_eq_mov(S, 1, vec16(t2), 2);
+emit_if_eq_mov(S, 2, vec16(t2), 4);
+emit_if_eq_mov(S, 3, vec16(t2), 6);
  }
  brw_ELSE(func);
  {
 brw_MOV(func, vec16(t2), brw_imm_d(0));
-brw_CMP(func, vec16(brw_null_reg()), BRW_CONDITIONAL_EQ,
-S, brw_imm_d(5));
-brw_MOV(func, vec16(t2), brw_imm_d(3));
-brw_set_predicate_control(func, BRW_PREDICATE_NONE);
-brw_CMP(func, vec16(brw_null_reg()), BRW_CONDITIONAL_EQ,
-S, brw_imm_d(6));
-brw_MOV(func, vec16(t2), brw_imm_d(7));
-brw_set_predicate_control(func, BRW_PREDICATE_NONE);
-brw_CMP(func, vec16(brw_null_reg()), BRW_CONDITIONAL_EQ,
-S, brw_imm_d(7));
-brw_MOV(func, vec16(t2), brw_imm_d(1));
-brw_set_predicate_control(func, BRW_PREDICATE_NONE);
+emit_if_eq_mov(S, 5, vec16(t2), 3);
+emit_if_eq_mov(S, 6, vec16(t2), 7);
+emit_if_eq_mov(S, 7, vec16(t2), 1);
  }
  brw_ENDIF(func);
  brw_MOV(func, vec16(S), t2);
diff --git a/src/mesa/drivers/dri/i965/brw_blorp_blit_eu.h 
b/src/mesa/drivers/dri/i965/brw_blorp_blit_eu.h
index 555b6d3..34e8da9 100644
--- a/src/mesa/drivers/dri/i965/brw_blorp_blit_eu.h
+++ b/src/mesa/drivers/dri/i965/brw_blorp_blit_eu.h
@@ -68,6 +68,12 @@ protected:
   brw_set_predicate_control(func, BRW_PREDICATE_NONE);
}
 
+   inline void emit_if_eq_mov(const struct brw_reg x, unsigned y,
+  const struct brw_reg dst, unsigned src)
+   {
+  emit_cond_mov(x, brw_imm_d(y), BRW_CONDITIONAL_EQ, dst, brw_imm_d(src));
+   }
+
void *mem_ctx;
struct brw_compile func;
 };
-- 
1.8.3.1

___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/mesa-dev

[Mesa-dev] [v2 04/23] i965/fs: introduce non-compressed equivalent of tex_cms

2014-01-22 Thread Topi Pohjolainen

v2: introduces 'SHADER_OPCODE_TXF_UMS' also for gen8

Signed-off-by: Topi Pohjolainen topi.pohjolai...@intel.com
Reviewed-by: Paul Berry stereotype...@gmail.com (v1)
---
 src/mesa/drivers/dri/i965/brw_defines.h | 1 +
 src/mesa/drivers/dri/i965/brw_fs_generator.cpp  | 5 +
 src/mesa/drivers/dri/i965/brw_shader.cpp| 3 +++
 src/mesa/drivers/dri/i965/gen8_fs_generator.cpp | 4 
 4 files changed, 13 insertions(+)

diff --git a/src/mesa/drivers/dri/i965/brw_defines.h 
b/src/mesa/drivers/dri/i965/brw_defines.h
index 12f7e40..7beda72 100644
--- a/src/mesa/drivers/dri/i965/brw_defines.h
+++ b/src/mesa/drivers/dri/i965/brw_defines.h
@@ -771,6 +771,7 @@ enum opcode {
SHADER_OPCODE_TXS,
FS_OPCODE_TXB,
SHADER_OPCODE_TXF_CMS,
+   SHADER_OPCODE_TXF_UMS,
SHADER_OPCODE_TXF_MCS,
SHADER_OPCODE_LOD,
SHADER_OPCODE_TG4,
diff --git a/src/mesa/drivers/dri/i965/brw_fs_generator.cpp 
b/src/mesa/drivers/dri/i965/brw_fs_generator.cpp
index d257748..a92b8ba 100644
--- a/src/mesa/drivers/dri/i965/brw_fs_generator.cpp
+++ b/src/mesa/drivers/dri/i965/brw_fs_generator.cpp
@@ -431,6 +431,10 @@ fs_generator::generate_tex(fs_inst *inst, struct brw_reg 
dst, struct brw_reg src
  else
 msg_type = GEN5_SAMPLER_MESSAGE_SAMPLE_LD;
  break;
+  case SHADER_OPCODE_TXF_UMS:
+ assert(brw-gen = 7);
+ msg_type = GEN7_SAMPLER_MESSAGE_SAMPLE_LD2DSS;
+ break;
   case SHADER_OPCODE_TXF_MCS:
  assert(brw-gen = 7);
  msg_type = GEN7_SAMPLER_MESSAGE_SAMPLE_LD_MCS;
@@ -1658,6 +1662,7 @@ fs_generator::generate_code(exec_list *instructions)
   case SHADER_OPCODE_TXD:
   case SHADER_OPCODE_TXF:
   case SHADER_OPCODE_TXF_CMS:
+  case SHADER_OPCODE_TXF_UMS:
   case SHADER_OPCODE_TXF_MCS:
   case SHADER_OPCODE_TXL:
   case SHADER_OPCODE_TXS:
diff --git a/src/mesa/drivers/dri/i965/brw_shader.cpp 
b/src/mesa/drivers/dri/i965/brw_shader.cpp
index b74d6e8..b38032e 100644
--- a/src/mesa/drivers/dri/i965/brw_shader.cpp
+++ b/src/mesa/drivers/dri/i965/brw_shader.cpp
@@ -442,6 +442,8 @@ brw_instruction_name(enum opcode op)
   return txb;
case SHADER_OPCODE_TXF_CMS:
   return txf_cms;
+   case SHADER_OPCODE_TXF_UMS:
+  return txf_ums;
case SHADER_OPCODE_TXF_MCS:
   return txf_mcs;
case SHADER_OPCODE_TG4:
@@ -539,6 +541,7 @@ backend_instruction::is_tex()
opcode == SHADER_OPCODE_TXD ||
opcode == SHADER_OPCODE_TXF ||
opcode == SHADER_OPCODE_TXF_CMS ||
+   opcode == SHADER_OPCODE_TXF_UMS ||
opcode == SHADER_OPCODE_TXF_MCS ||
opcode == SHADER_OPCODE_TXL ||
opcode == SHADER_OPCODE_TXS ||
diff --git a/src/mesa/drivers/dri/i965/gen8_fs_generator.cpp 
b/src/mesa/drivers/dri/i965/gen8_fs_generator.cpp
index 4e70534..6c710bc 100644
--- a/src/mesa/drivers/dri/i965/gen8_fs_generator.cpp
+++ b/src/mesa/drivers/dri/i965/gen8_fs_generator.cpp
@@ -197,6 +197,9 @@ gen8_fs_generator::generate_tex(fs_inst *ir,
case SHADER_OPCODE_TXF_CMS:
   msg_type = GEN7_SAMPLER_MESSAGE_SAMPLE_LD2DMS;
   break;
+   case SHADER_OPCODE_TXF_UMS:
+  msg_type = GEN7_SAMPLER_MESSAGE_SAMPLE_LD2DSS;
+  break;
case SHADER_OPCODE_TXF_MCS:
   msg_type = GEN7_SAMPLER_MESSAGE_SAMPLE_LD_MCS;
   break;
@@ -864,6 +867,7 @@ gen8_fs_generator::generate_code(exec_list *instructions)
   case SHADER_OPCODE_TXD:
   case SHADER_OPCODE_TXF:
   case SHADER_OPCODE_TXF_CMS:
+  case SHADER_OPCODE_TXF_UMS:
   case SHADER_OPCODE_TXF_MCS:
   case SHADER_OPCODE_TXL:
   case SHADER_OPCODE_TXS:
-- 
1.8.3.1

___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/mesa-dev

[Mesa-dev] [v2 02/23] i965/blorp: move emission of pixel kill into eu-emitter

2014-01-22 Thread Topi Pohjolainen

The combination of four separate comparison operations and
and the masked and require special treatment when moving
to FS LIR.

Signed-off-by: Topi Pohjolainen topi.pohjolai...@intel.com
Reviewed-by: Paul Berry stereotype...@gmail.com
---
 src/mesa/drivers/dri/i965/brw_blorp_blit.cpp| 28 +++--
 src/mesa/drivers/dri/i965/brw_blorp_blit_eu.cpp | 28 +
 src/mesa/drivers/dri/i965/brw_blorp_blit_eu.h   |  7 +++
 3 files changed, 38 insertions(+), 25 deletions(-)

diff --git a/src/mesa/drivers/dri/i965/brw_blorp_blit.cpp 
b/src/mesa/drivers/dri/i965/brw_blorp_blit.cpp
index f9c355b..03fabd6 100644
--- a/src/mesa/drivers/dri/i965/brw_blorp_blit.cpp
+++ b/src/mesa/drivers/dri/i965/brw_blorp_blit.cpp
@@ -640,7 +640,6 @@ private:
void translate_tiling(bool old_tiled_w, bool new_tiled_w);
void encode_msaa(unsigned num_samples, intel_msaa_layout layout);
void decode_msaa(unsigned num_samples, intel_msaa_layout layout);
-   void kill_if_outside_dst_rect();
void translate_dst_to_src();
void clamp_tex_coords(struct brw_reg regX, struct brw_reg regY,
  struct brw_reg clampX0, struct brw_reg clampY0,
@@ -833,7 +832,9 @@ brw_blorp_blit_program::compile(struct brw_context *brw,
 */
 
if (key-use_kill)
-  kill_if_outside_dst_rect();
+  emit_kill_if_outside_rect(x_coords[xy_coord_index],
+y_coords[xy_coord_index],
+dst_x0, dst_x1, dst_y0, dst_y1);
 
/* Next, apply a translation to obtain coordinates in the source image. */
translate_dst_to_src();
@@ -1375,29 +1376,6 @@ brw_blorp_blit_program::decode_msaa(unsigned num_samples,
 }
 
 /**
- * Emit code that kills pixels whose X and Y coordinates are outside the
- * boundary of the rectangle defined by the push constants (dst_x0, dst_y0,
- * dst_x1, dst_y1).
- */
-void
-brw_blorp_blit_program::kill_if_outside_dst_rect()
-{
-   struct brw_reg f0 = brw_flag_reg(0, 0);
-   struct brw_reg g1 = retype(brw_vec1_grf(1, 7), BRW_REGISTER_TYPE_UW);
-   struct brw_reg null32 = vec16(retype(brw_null_reg(), BRW_REGISTER_TYPE_UD));
-
-   brw_CMP(func, null32, BRW_CONDITIONAL_GE, X, dst_x0);
-   brw_CMP(func, null32, BRW_CONDITIONAL_GE, Y, dst_y0);
-   brw_CMP(func, null32, BRW_CONDITIONAL_L, X, dst_x1);
-   brw_CMP(func, null32, BRW_CONDITIONAL_L, Y, dst_y1);
-
-   brw_set_predicate_control(func, BRW_PREDICATE_NONE);
-
-   struct brw_instruction *inst = brw_AND(func, g1, f0, g1);
-   inst-header.mask_control = BRW_MASK_DISABLE;
-}
-
-/**
  * Emit code to translate from destination (X, Y) coordinates to source (X, Y)
  * coordinates.
  */
diff --git a/src/mesa/drivers/dri/i965/brw_blorp_blit_eu.cpp 
b/src/mesa/drivers/dri/i965/brw_blorp_blit_eu.cpp
index 8d723d6..161c679 100644
--- a/src/mesa/drivers/dri/i965/brw_blorp_blit_eu.cpp
+++ b/src/mesa/drivers/dri/i965/brw_blorp_blit_eu.cpp
@@ -63,3 +63,31 @@ brw_blorp_eu_emitter::get_program(unsigned *program_size, 
FILE *dump_file)
 
return brw_get_program(func, program_size);
 }
+
+/**
+ * Emit code that kills pixels whose X and Y coordinates are outside the
+ * boundary of the rectangle defined by the push constants (dst_x0, dst_y0,
+ * dst_x1, dst_y1).
+ */
+void
+brw_blorp_eu_emitter::emit_kill_if_outside_rect(const struct brw_reg x,
+const struct brw_reg y,
+const struct brw_reg dst_x0,
+const struct brw_reg dst_x1,
+const struct brw_reg dst_y0,
+const struct brw_reg dst_y1)
+{
+   struct brw_reg f0 = brw_flag_reg(0, 0);
+   struct brw_reg g1 = retype(brw_vec1_grf(1, 7), BRW_REGISTER_TYPE_UW);
+   struct brw_reg null32 = vec16(retype(brw_null_reg(), BRW_REGISTER_TYPE_UD));
+
+   brw_CMP(func, null32, BRW_CONDITIONAL_GE, x, dst_x0);
+   brw_CMP(func, null32, BRW_CONDITIONAL_GE, y, dst_y0);
+   brw_CMP(func, null32, BRW_CONDITIONAL_L, x, dst_x1);
+   brw_CMP(func, null32, BRW_CONDITIONAL_L, y, dst_y1);
+
+   brw_set_predicate_control(func, BRW_PREDICATE_NONE);
+
+   struct brw_instruction *inst = brw_AND(func, g1, f0, g1);
+   inst-header.mask_control = BRW_MASK_DISABLE;
+}
diff --git a/src/mesa/drivers/dri/i965/brw_blorp_blit_eu.h 
b/src/mesa/drivers/dri/i965/brw_blorp_blit_eu.h
index 1bcb0d9..3f74e0e 100644
--- a/src/mesa/drivers/dri/i965/brw_blorp_blit_eu.h
+++ b/src/mesa/drivers/dri/i965/brw_blorp_blit_eu.h
@@ -35,6 +35,13 @@ protected:
 
const unsigned *get_program(unsigned *program_size, FILE *dump_file);
 
+   void emit_kill_if_outside_rect(const struct brw_reg x,
+  const struct brw_reg y,
+  const struct brw_reg dst_x0,
+  const struct brw_reg dst_x1,
+  const struct brw_reg dst_y0,
+

[Mesa-dev] [v2 07/23] i965/blorp: move emission of sample combining into eu-emitter

2014-01-22 Thread Topi Pohjolainen

v2 (Paul): pass the combining opcode as an argument to emit_combine().
   This keeps manual_blend_average() selfcontained
   documentation wise.

Signed-off-by: Topi Pohjolainen topi.pohjolai...@intel.com
Reviewed-by: Paul Berry stereotype...@gmail.com (v1)
---
 src/mesa/drivers/dri/i965/brw_blorp_blit.cpp| 14 +-
 src/mesa/drivers/dri/i965/brw_blorp_blit_eu.cpp | 14 ++
 src/mesa/drivers/dri/i965/brw_blorp_blit_eu.h   |  5 +
 3 files changed, 24 insertions(+), 9 deletions(-)

diff --git a/src/mesa/drivers/dri/i965/brw_blorp_blit.cpp 
b/src/mesa/drivers/dri/i965/brw_blorp_blit.cpp
index 4bbdf3d..b5f1907 100644
--- a/src/mesa/drivers/dri/i965/brw_blorp_blit.cpp
+++ b/src/mesa/drivers/dri/i965/brw_blorp_blit.cpp
@@ -1534,12 +1534,6 @@ brw_blorp_blit_program::manual_blend_average(unsigned 
num_samples)
 * For integer formats, we replace the add operations with average
 * operations and skip the final division.
 */
-   typedef struct brw_instruction *(*brw_op2_ptr)(struct brw_compile *,
-  struct brw_reg,
-  struct brw_reg,
-  struct brw_reg);
-   brw_op2_ptr combine_op =
-  key-texture_data_type == BRW_REGISTER_TYPE_F ? brw_ADD : brw_AVG;
unsigned stack_depth = 0;
for (unsigned i = 0; i  num_samples; ++i) {
   assert(stack_depth == _mesa_bitcount(i)); /* Loop invariant */
@@ -1581,9 +1575,11 @@ brw_blorp_blit_program::manual_blend_average(unsigned 
num_samples)
 
  /* TODO: should use a smaller loop bound for non_RGBA formats */
  for (int k = 0; k  4; ++k) {
-combine_op(func, offset(texture_data[stack_depth - 1], 2*k),
-   offset(vec8(texture_data[stack_depth - 1]), 2*k),
-   offset(vec8(texture_data[stack_depth]), 2*k));
+emit_combine(key-texture_data_type == BRW_REGISTER_TYPE_F ?
+BRW_OPCODE_ADD : BRW_OPCODE_AVG,
+ offset(texture_data[stack_depth - 1], 2*k),
+ offset(vec8(texture_data[stack_depth - 1]), 2*k),
+ offset(vec8(texture_data[stack_depth]), 2*k));
  }
   }
}
diff --git a/src/mesa/drivers/dri/i965/brw_blorp_blit_eu.cpp 
b/src/mesa/drivers/dri/i965/brw_blorp_blit_eu.cpp
index df8d63d..9b63458 100644
--- a/src/mesa/drivers/dri/i965/brw_blorp_blit_eu.cpp
+++ b/src/mesa/drivers/dri/i965/brw_blorp_blit_eu.cpp
@@ -152,3 +152,17 @@ brw_blorp_eu_emitter::emit_render_target_write(const 
struct brw_reg src0,
 true /* eot */,
 use_header);
 }
+
+void
+brw_blorp_eu_emitter::emit_combine(enum opcode combine_opcode,
+   const struct brw_reg dst,
+   const struct brw_reg src_1,
+   const struct brw_reg src_2)
+{
+   assert(combine_opcode == BRW_OPCODE_ADD || combine_opcode == 
BRW_OPCODE_AVG);
+
+   if (combine_opcode == BRW_OPCODE_ADD)
+  brw_ADD(func, dst, src_1, src_2);
+   else
+  brw_AVG(func, dst, src_1, src_2);
+}
diff --git a/src/mesa/drivers/dri/i965/brw_blorp_blit_eu.h 
b/src/mesa/drivers/dri/i965/brw_blorp_blit_eu.h
index 5f0c8cf..55e05f7 100644
--- a/src/mesa/drivers/dri/i965/brw_blorp_blit_eu.h
+++ b/src/mesa/drivers/dri/i965/brw_blorp_blit_eu.h
@@ -52,6 +52,11 @@ protected:
  unsigned msg_length,
  bool use_header);
 
+   void emit_combine(enum opcode combine_opcode,
+ const struct brw_reg dst,
+ const struct brw_reg src_1,
+ const struct brw_reg src_2);
+
void *mem_ctx;
struct brw_compile func;
 };
-- 
1.8.3.1

___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/mesa-dev

[Mesa-dev] [v2 18/23] i965/blorp: wrap RNDD (/brw_RNDD(func, /emit_rndd(/)

2014-01-22 Thread Topi Pohjolainen

Signed-off-by: Topi Pohjolainen topi.pohjolai...@intel.com
Reviewed-by: Paul Berry stereotype...@gmail.com
---
 src/mesa/drivers/dri/i965/brw_blorp_blit.cpp  | 4 ++--
 src/mesa/drivers/dri/i965/brw_blorp_blit_eu.h | 6 ++
 2 files changed, 8 insertions(+), 2 deletions(-)

diff --git a/src/mesa/drivers/dri/i965/brw_blorp_blit.cpp 
b/src/mesa/drivers/dri/i965/brw_blorp_blit.cpp
index 4d0b882..aae0704 100644
--- a/src/mesa/drivers/dri/i965/brw_blorp_blit.cpp
+++ b/src/mesa/drivers/dri/i965/brw_blorp_blit.cpp
@@ -1417,8 +1417,8 @@ brw_blorp_blit_program::translate_dst_to_src()
   emit_frc(y_frac, Y_f);
 
   /* Round the float coordinates down to nearest integer */
-  brw_RNDD(func, Xp_f, X_f);
-  brw_RNDD(func, Yp_f, Y_f);
+  emit_rndd(Xp_f, X_f);
+  emit_rndd(Yp_f, Y_f);
   emit_mul(X_f, Xp_f, brw_imm_f(1 / key-x_scale));
   emit_mul(Y_f, Yp_f, brw_imm_f(1 / key-y_scale));
   SWAP_XY_AND_XPYP();
diff --git a/src/mesa/drivers/dri/i965/brw_blorp_blit_eu.h 
b/src/mesa/drivers/dri/i965/brw_blorp_blit_eu.h
index f22207d..07c96b0 100644
--- a/src/mesa/drivers/dri/i965/brw_blorp_blit_eu.h
+++ b/src/mesa/drivers/dri/i965/brw_blorp_blit_eu.h
@@ -143,6 +143,12 @@ protected:
   brw_FRC(func, dst, src);
}
 
+   inline void emit_rndd(const struct brw_reg dst,
+ const struct brw_reg src)
+   {
+  brw_RNDD(func, dst, src);
+   }
+
void *mem_ctx;
struct brw_compile func;
 };
-- 
1.8.3.1

___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/mesa-dev

[Mesa-dev] [PATCH RFC 01/11] glsl: fix handling of quadop_vector constant expression

2014-01-22 Thread Connor Abbott

We forgot to handle the case where the base type was a boolean.
---
 src/glsl/ir_constant_expression.cpp | 3 +++
 1 file changed, 3 insertions(+)

diff --git a/src/glsl/ir_constant_expression.cpp 
b/src/glsl/ir_constant_expression.cpp
index f811fd1..9edc378 100644
--- a/src/glsl/ir_constant_expression.cpp
+++ b/src/glsl/ir_constant_expression.cpp
@@ -1501,6 +1501,9 @@ ir_expression::constant_expression_value(struct 
hash_table *variable_context)
 case GLSL_TYPE_FLOAT:
data.f[c] = op[c]-value.f[0];
break;
+case GLSL_TYPE_BOOL:
+   data.b[c] = op[c]-value.b[0];
+   break;
 default:
assert(0);
 }
-- 
1.8.3.1

___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/mesa-dev

[Mesa-dev] [v2 15/23] i965/blorp: wrap OR (/brw_OR(func, /emit_or(/)

2014-01-22 Thread Topi Pohjolainen

Signed-off-by: Topi Pohjolainen topi.pohjolai...@intel.com
Reviewed-by: Paul Berry stereotype...@gmail.com
---
 src/mesa/drivers/dri/i965/brw_blorp_blit.cpp  | 48 +--
 src/mesa/drivers/dri/i965/brw_blorp_blit_eu.h |  7 
 2 files changed, 31 insertions(+), 24 deletions(-)

diff --git a/src/mesa/drivers/dri/i965/brw_blorp_blit.cpp 
b/src/mesa/drivers/dri/i965/brw_blorp_blit.cpp
index 432c11c..f9d1079 100644
--- a/src/mesa/drivers/dri/i965/brw_blorp_blit.cpp
+++ b/src/mesa/drivers/dri/i965/brw_blorp_blit.cpp
@@ -1167,17 +1167,17 @@ brw_blorp_blit_program::translate_tiling(bool 
old_tiled_w, bool new_tiled_w)
   emit_shr(t1, t1, brw_imm_uw(1)); /* (X  ~0b1011)  1 */
   emit_and(t2, Y, brw_imm_uw(1)); /* Y  0b1 */
   emit_shl(t2, t2, brw_imm_uw(2)); /* (Y  0b1)  2 */
-  brw_OR(func, t1, t1, t2); /* (X  ~0b1011)  1 | (Y  0b1)  2 */
+  emit_or(t1, t1, t2); /* (X  ~0b1011)  1 | (Y  0b1)  2 */
   emit_and(t2, X, brw_imm_uw(1)); /* X  0b1 */
-  brw_OR(func, Xp, t1, t2);
+  emit_or(Xp, t1, t2);
   emit_and(t1, Y, brw_imm_uw(0xfffe)); /* Y  ~0b1 */
   emit_shl(t1, t1, brw_imm_uw(1)); /* (Y  ~0b1)  1 */
   emit_and(t2, X, brw_imm_uw(8)); /* X  0b1000 */
   emit_shr(t2, t2, brw_imm_uw(2)); /* (X  0b1000)  2 */
-  brw_OR(func, t1, t1, t2); /* (Y  ~0b1)  1 | (X  0b1000)  2 */
+  emit_or(t1, t1, t2); /* (Y  ~0b1)  1 | (X  0b1000)  2 */
   emit_and(t2, X, brw_imm_uw(2)); /* X  0b10 */
   emit_shr(t2, t2, brw_imm_uw(1)); /* (X  0b10)  1 */
-  brw_OR(func, Yp, t1, t2);
+  emit_or(Yp, t1, t2);
   SWAP_XY_AND_XPYP();
} else {
   /* Applying the same logic as above, but in reverse, we obtain the
@@ -1190,18 +1190,18 @@ brw_blorp_blit_program::translate_tiling(bool 
old_tiled_w, bool new_tiled_w)
   emit_shl(t1, t1, brw_imm_uw(1)); /* (X  ~0b101)  1 */
   emit_and(t2, Y, brw_imm_uw(2)); /* Y  0b10 */
   emit_shl(t2, t2, brw_imm_uw(2)); /* (Y  0b10)  2 */
-  brw_OR(func, t1, t1, t2); /* (X  ~0b101)  1 | (Y  0b10)  2 */
+  emit_or(t1, t1, t2); /* (X  ~0b101)  1 | (Y  0b10)  2 */
   emit_and(t2, Y, brw_imm_uw(1)); /* Y  0b1 */
   emit_shl(t2, t2, brw_imm_uw(1)); /* (Y  0b1)  1 */
-  brw_OR(func, t1, t1, t2); /* (X  ~0b101)  1 | (Y  0b10)  2
+  emit_or(t1, t1, t2); /* (X  ~0b101)  1 | (Y  0b10)  2
 | (Y  0b1)  1 */
   emit_and(t2, X, brw_imm_uw(1)); /* X  0b1 */
-  brw_OR(func, Xp, t1, t2);
+  emit_or(Xp, t1, t2);
   emit_and(t1, Y, brw_imm_uw(0xfffc)); /* Y  ~0b11 */
   emit_shr(t1, t1, brw_imm_uw(1)); /* (Y  ~0b11)  1 */
   emit_and(t2, X, brw_imm_uw(4)); /* X  0b100 */
   emit_shr(t2, t2, brw_imm_uw(2)); /* (X  0b100)  2 */
-  brw_OR(func, Yp, t1, t2);
+  emit_or(Yp, t1, t2);
   SWAP_XY_AND_XPYP();
}
 }
@@ -1244,20 +1244,20 @@ brw_blorp_blit_program::encode_msaa(unsigned 
num_samples,
  emit_and(t1, X, brw_imm_uw(0xfffe)); /* X  ~0b1 */
  if (!s_is_zero) {
 emit_and(t2, S, brw_imm_uw(1)); /* S  0b1 */
-brw_OR(func, t1, t1, t2); /* (X  ~0b1) | (S  0b1) */
+emit_or(t1, t1, t2); /* (X  ~0b1) | (S  0b1) */
  }
  emit_shl(t1, t1, brw_imm_uw(1)); /* (X  ~0b1)  1
| (S  0b1)  1 */
  emit_and(t2, X, brw_imm_uw(1)); /* X  0b1 */
- brw_OR(func, Xp, t1, t2);
+ emit_or(Xp, t1, t2);
  emit_and(t1, Y, brw_imm_uw(0xfffe)); /* Y  ~0b1 */
  emit_shl(t1, t1, brw_imm_uw(1)); /* (Y  ~0b1)  1 */
  if (!s_is_zero) {
 emit_and(t2, S, brw_imm_uw(2)); /* S  0b10 */
-brw_OR(func, t1, t1, t2); /* (Y  ~0b1)  1 | (S  0b10) */
+emit_or(t1, t1, t2); /* (Y  ~0b1)  1 | (S  0b10) */
  }
  emit_and(t2, Y, brw_imm_uw(1)); /* Y  0b1 */
- brw_OR(func, Yp, t1, t2);
+ emit_or(Yp, t1, t2);
  break;
   case 8:
  /* encode_msaa(8, IMS, X, Y, S) = (X', Y', 0)
@@ -1269,22 +1269,22 @@ brw_blorp_blit_program::encode_msaa(unsigned 
num_samples,
  emit_shl(t1, t1, brw_imm_uw(2)); /* (X  ~0b1)  2 */
  if (!s_is_zero) {
 emit_and(t2, S, brw_imm_uw(4)); /* S  0b100 */
-brw_OR(func, t1, t1, t2); /* (X  ~0b1)  2 | (S  0b100) */
+emit_or(t1, t1, t2); /* (X  ~0b1)  2 | (S  0b100) */
 emit_and(t2, S, brw_imm_uw(1)); /* S  0b1 */
 emit_shl(t2, t2, brw_imm_uw(1)); /* (S  0b1)  1 */
-brw_OR(func, t1, t1, t2); /* (X  ~0b1)  2 | (S  0b100)
+emit_or(t1, t1, t2); /* (X  ~0b1)  2 | (S  0b100)
   | (S  0b1)  1 */
  }
  emit_and(t2, X, brw_imm_uw(1)); /* X  0b1 */
- brw_OR(func, Xp, t1, t2);
+ emit_or(Xp, t1, t2);
  emit_and(t1, Y, brw_imm_uw(0xfffe)); /* Y  ~0b1 */
  emit_shl(t1, t1, brw_imm_uw(1)); /* (Y  ~0b1)  1 */
  if

[Mesa-dev] [PATCH RFC 10/11] glsl: add a pass to convert out of SSA form

2014-01-22 Thread Connor Abbott

Right now we are being basically as naive as possible, and inserting
more copies than necessary. It is possible to implement a more
sophisticated algorithm later, although extending the current copy
propagation pass to support loops better and/or relying on backends to
do copy propagation may make this unecessary.
---
 src/glsl/Makefile.sources  |   1 +
 src/glsl/ir_optimization.h |   1 +
 src/glsl/opt_from_ssa.cpp  | 198 +
 3 files changed, 200 insertions(+)
 create mode 100644 src/glsl/opt_from_ssa.cpp

diff --git a/src/glsl/Makefile.sources b/src/glsl/Makefile.sources
index 961784b..55859ed 100644
--- a/src/glsl/Makefile.sources
+++ b/src/glsl/Makefile.sources
@@ -94,6 +94,7 @@ LIBGLSL_FILES = \
$(GLSL_SRCDIR)/opt_dead_functions.cpp \
$(GLSL_SRCDIR)/opt_flatten_nested_if_blocks.cpp \
$(GLSL_SRCDIR)/opt_flip_matrices.cpp \
+   $(GLSL_SRCDIR)/opt_from_ssa.cpp \
$(GLSL_SRCDIR)/opt_function_inlining.cpp \
$(GLSL_SRCDIR)/opt_if_simplification.cpp \
$(GLSL_SRCDIR)/opt_noop_swizzle.cpp \
diff --git a/src/glsl/ir_optimization.h b/src/glsl/ir_optimization.h
index 92c8b57..9c0ff31 100644
--- a/src/glsl/ir_optimization.h
+++ b/src/glsl/ir_optimization.h
@@ -66,6 +66,7 @@ enum lower_packing_builtins_op {
 };
 
 void convert_to_ssa(exec_list *instructions);
+void convert_from_ssa(exec_list *instructions);
 
 bool do_common_optimization(exec_list *ir, bool linked,
bool uniform_locations_assigned,
diff --git a/src/glsl/opt_from_ssa.cpp b/src/glsl/opt_from_ssa.cpp
new file mode 100644
index 000..6071c45
--- /dev/null
+++ b/src/glsl/opt_from_ssa.cpp
@@ -0,0 +1,198 @@
+/*
+ * Copyright © 2013 Connor Abbott (con...@abbott.cx)
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the Software),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the next
+ * paragraph) shall be included in all copies or substantial portions of the
+ * Software.
+ *
+ * THE SOFTWARE IS PROVIDED AS IS, WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+ * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
+ * DEALINGS IN THE SOFTWARE.
+ */
+
+#include ir.h
+#include ir_builder.h
+
+/**
+ * \file opt_from_ssa.cpp
+ *
+ * This file removes all the SSA temporaries and phi nodes from a program. It
+ * immplements Method I of the paper Translating out of Single Static
+ * Assignment Form by Sreedhar et. al., a naive method that inserts many more
+ * copies than necessary; it is assumed that later copy propagation passes will
+ * clean up the result of this pass.
+ */
+
+using namespace ir_builder;
+
+static ir_variable *
+insert_decl(exec_list *instrs, const glsl_type *type, void *mem_ctx)
+{
+   ir_variable *var = new(mem_ctx) ir_variable(type, phi_temp,
+  ir_var_temporary);
+   instrs-push_head(var);
+   return var;
+}
+
+static void
+eliminate_phi_if(ir_phi_if *phi, ir_if *ir, exec_list *instrs)
+{
+   ir_variable *var = insert_decl(instrs, phi-dest-type, ralloc_parent(ir));
+
+   /*
+* This converts the destination of the phi node into a non-SSA variable,
+* which ir_from_ssa_visitor::visit(ir_dereference_variable *) would 
normally
+* do. We need to do this here because otherwise, the assignment we're
+* inserting here will get skipped by the list visitor macro and it won't
+* get converted.
+*/
+
+   ir-insert_after(phi-dest);
+   phi-dest-insert_after(assign(phi-dest, var));
+   phi-dest-data.mode = ir_var_temporary;
+
+   if (phi-if_src != NULL)
+  ir-then_instructions.push_tail(assign(var, phi-if_src));
+
+   if (phi-else_src != NULL)
+  ir-else_instructions.push_tail(assign(var, phi-else_src));
+
+   phi-remove();
+}
+
+static void
+eliminate_phi_loop_begin(ir_phi_loop_begin *phi, ir_loop *ir, exec_list 
*instrs)
+{
+   ir_variable *var = insert_decl(instrs, phi-dest-type, ralloc_parent(ir));
+   ir-body_instructions.push_head(phi-dest);
+   phi-dest-insert_after(assign(phi-dest, var));
+   phi-dest-data.mode = ir_var_temporary;
+
+   if (phi-enter_src != NULL)
+  ir-insert_before(assign(var, phi-enter_src));
+
+   if (phi-repeat_src != NULL)
+  ir-body_instructions.push_tail(assign(var, phi-repeat_src));
+
+

[Mesa-dev] [PATCH RFC 07/11] glsl: add SSA infrastructure

2014-01-22 Thread Connor Abbott

This patch introduces all the changes to the IR that are necessary for
representing programs in the SSA form. This consists of a new variable
mode, the SSA temporary, which is guarenteed to be written to exactly
once, and classes to represent phi nodes in the IR.

In the current code, variables are first declared using an ir_variable
instruction inserted into the instruction stream, and then every
dereference will point to the ir_variable declared earlier. SSA
temporaries, however, do not work this way. Instead, the variable
is declared when it is assigned. That is, the variable is owned by
the one and only instruction where it is defined.

In SSA, phi nodes may exist at the beginning of any join nodes, or
basic blocks with more than one predecessor. In our IR, this can happen
in one of three places:

- After an if statement, where the then branch and the else branch
converge.
- At the beginning of a loop, which can be reached from either before
the loop (on the first iteration), the end of the loop (when we get to
the end of the loop and jump back to the beginning), or any continue
statement.
- At the end of a loop, which can be reached from any break statement
within the loop.

Accordingly, there are three different types of phi nodes: if phi nodes,
phi nodes at the beginning of a loop, and phi nodes at the end of a
loop, all of which derive from the ir_phi base class.
---
 src/glsl/ir.cpp|  56 +++
 src/glsl/ir.h  | 196 -
 src/glsl/ir_clone.cpp  | 147 ---
 src/glsl/ir_hierarchical_visitor.cpp   |  36 +
 src/glsl/ir_hierarchical_visitor.h |  11 ++
 src/glsl/ir_hv_accept.cpp  |  55 ++-
 src/glsl/ir_print_visitor.cpp  | 196 -
 src/glsl/ir_print_visitor.h|  15 ++
 src/glsl/ir_validate.cpp   | 158 +++-
 src/glsl/ir_visitor.h  |   8 +
 src/mesa/drivers/dri/i965/brw_fs.h |   4 +
 src/mesa/drivers/dri/i965/brw_fs_visitor.cpp   |  28 
 src/mesa/drivers/dri/i965/brw_vec4.h   |   4 +
 src/mesa/drivers/dri/i965/brw_vec4_visitor.cpp |  24 +++
 src/mesa/program/ir_to_mesa.cpp|  28 
 src/mesa/state_tracker/st_glsl_to_tgsi.cpp |  29 
 16 files changed, 956 insertions(+), 39 deletions(-)

diff --git a/src/glsl/ir.cpp b/src/glsl/ir.cpp
index 1a36bd6..f1ded80 100644
--- a/src/glsl/ir.cpp
+++ b/src/glsl/ir.cpp
@@ -1229,6 +1229,37 @@ ir_loop::ir_loop()
 }
 
 
+ir_phi::ir_phi()
+{
+   this-dest = NULL;
+}
+
+
+ir_phi_if::ir_phi_if(ir_variable *dest, ir_variable *if_src,
+ir_variable *else_src)
+   : if_src(if_src), else_src(else_src)
+{
+   this-ir_type = ir_type_phi_if;
+   this-dest = dest;
+}
+
+
+ir_phi_loop_begin::ir_phi_loop_begin(ir_variable* dest, ir_variable* enter_src,
+ir_variable* repeat_src)
+   : enter_src(enter_src), repeat_src(repeat_src)
+{
+   this-ir_type = ir_type_phi_loop_begin;
+   this-dest = dest;
+}
+
+
+ir_phi_loop_end::ir_phi_loop_end(ir_variable *dest)
+{
+   this-ir_type = ir_type_phi_loop_end;
+   this-dest = dest;
+}
+
+
 ir_dereference_variable::ir_dereference_variable(ir_variable *var)
 {
assert(var != NULL);
@@ -1554,6 +1585,9 @@ ir_variable::ir_variable(const struct glsl_type *type, 
const char *name,
this-data.max_array_access = 0;
this-data.atomic.buffer_index = 0;
this-data.atomic.offset = 0;
+   this-ssa_assignment = NULL;
+   this-ssa_phi = NULL;
+   this-ssa_call = NULL;
 
if (type != NULL) {
   if (type-base_type == GLSL_TYPE_SAMPLER)
@@ -1722,12 +1756,19 @@ steal_memory(ir_instruction *ir, void *new_ctx)
 {
ir_variable *var = ir-as_variable();
ir_constant *constant = ir-as_constant();
+   ir_dereference_variable *deref = ir-as_dereference_variable();
+   ir_phi *phi = ir-as_phi();
+   ir_phi_loop_begin *phi_loop_begin = ir-as_phi_loop_begin();
+   ir_phi_loop_end *phi_loop_end = ir-as_phi_loop_end();
if (var != NULL  var-constant_value != NULL)
   steal_memory(var-constant_value, ir);
 
if (var != NULL  var-constant_initializer != NULL)
   steal_memory(var-constant_initializer, ir);
 
+   if (deref != NULL  deref-var-data.mode == ir_var_temporary_ssa)
+  steal_memory(deref-var, ir);
+
/* The components of aggregate constants are not visited by the normal
 * visitor, so steal their values by hand.
 */
@@ -1744,6 +1785,21 @@ steal_memory(ir_instruction *ir, void *new_ctx)
   }
}
 
+   if (phi != NULL)
+  steal_memory(phi-dest, new_ctx);
+
+   if (phi_loop_begin != NULL) {
+  foreach_list(n, phi_loop_begin-continue_srcs) {
+ralloc_steal(new_ctx, n);
+  }
+   }
+
+   if (phi_loop_end != NULL) {
+  foreach_list(n, phi_loop_end-break_srcs) {
+ralloc_steal(new_ctx, n);
+  }
+   }
+

[Mesa-dev] [v2 20/23] i965/fs: allow unit tests to dump the final patched assembly

2014-01-22 Thread Topi Pohjolainen

Unit tests comparing generated blorp programs to known good need
to have the dump in designated file instead of in default
standard output. The comparison also expects the jump counters
of if-else-instructions to be correctly set and hence the dump
needs to be taken _after_ 'patch_IF_ELSE()' is run (the default
dump of the fs_generator does this before).

v2 (Paul): dropped the redundant 'dump_enabled' argument

Signed-off-by: Topi Pohjolainen topi.pohjolai...@intel.com
Reviewed-by: Paul Berry stereotype...@gmail.com
---
 src/mesa/drivers/dri/i965/brw_fs.h |  5 +++--
 src/mesa/drivers/dri/i965/brw_fs_generator.cpp | 13 +++--
 2 files changed, 10 insertions(+), 8 deletions(-)

diff --git a/src/mesa/drivers/dri/i965/brw_fs.h 
b/src/mesa/drivers/dri/i965/brw_fs.h
index a903908..9984802 100644
--- a/src/mesa/drivers/dri/i965/brw_fs.h
+++ b/src/mesa/drivers/dri/i965/brw_fs.h
@@ -517,10 +517,11 @@ public:
 
const unsigned *generate_assembly(exec_list *simd8_instructions,
  exec_list *simd16_instructions,
- unsigned *assembly_size);
+ unsigned *assembly_size,
+ FILE *dump_file = NULL);
 
 private:
-   void generate_code(exec_list *instructions);
+   void generate_code(exec_list *instructions, FILE *dump_file);
void generate_fb_write(fs_inst *inst);
void generate_pixel_xy(struct brw_reg dst, bool is_x);
void generate_linterp(fs_inst *inst, struct brw_reg dst,
diff --git a/src/mesa/drivers/dri/i965/brw_fs_generator.cpp 
b/src/mesa/drivers/dri/i965/brw_fs_generator.cpp
index a92b8ba..bcb47e6 100644
--- a/src/mesa/drivers/dri/i965/brw_fs_generator.cpp
+++ b/src/mesa/drivers/dri/i965/brw_fs_generator.cpp
@@ -1298,7 +1298,7 @@ fs_generator::generate_untyped_surface_read(fs_inst 
*inst, struct brw_reg dst,
 }
 
 void
-fs_generator::generate_code(exec_list *instructions)
+fs_generator::generate_code(exec_list *instructions, FILE *dump_file)
 {
int last_native_insn_offset = p-next_insn_offset;
const char *last_annotation_string = NULL;
@@ -1807,21 +1807,22 @@ fs_generator::generate_code(exec_list *instructions)
 * which is often something we want to debug.  So this is here in
 * case you're doing that.
 */
-   if (0) {
-  brw_dump_compile(p, stdout, 0, p-next_insn_offset);
+   if (dump_file) {
+  brw_dump_compile(p, dump_file, 0, p-next_insn_offset);
}
 }
 
 const unsigned *
 fs_generator::generate_assembly(exec_list *simd8_instructions,
 exec_list *simd16_instructions,
-unsigned *assembly_size)
+unsigned *assembly_size,
+FILE *dump_file)
 {
assert(simd8_instructions || simd16_instructions);
 
if (simd8_instructions) {
   dispatch_width = 8;
-  generate_code(simd8_instructions);
+  generate_code(simd8_instructions, dump_file);
}
 
if (simd16_instructions) {
@@ -1842,7 +1843,7 @@ fs_generator::generate_assembly(exec_list 
*simd8_instructions,
   brw_set_compression_control(p, BRW_COMPRESSION_COMPRESSED);
 
   dispatch_width = 16;
-  generate_code(simd16_instructions);
+  generate_code(simd16_instructions, dump_file);
}
 
return brw_get_program(p, assembly_size);
-- 
1.8.3.1

___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/mesa-dev

[Mesa-dev] [v2 01/23] i965/blorp: introduce separate eu-emitter for blit compiler

2014-01-22 Thread Topi Pohjolainen

Prepares for presenting blorp blit programs using FS IR that
allows EU-assembly generation using i965 glsl-compiler
backend (fs_generator).

v2: rebased on top of endif-jump counter fix (moving the
added brw_set_uip_jip() into the emitter)

Signed-off-by: Topi Pohjolainen topi.pohjolai...@intel.com
Reviewed-by: Paul Berry stereotype...@gmail.com (v1)
---
 src/mesa/drivers/dri/i965/Makefile.sources  |  1 +
 src/mesa/drivers/dri/i965/brw_blorp_blit.cpp| 43 ++--
 src/mesa/drivers/dri/i965/brw_blorp_blit_eu.cpp | 65 +
 src/mesa/drivers/dri/i965/brw_blorp_blit_eu.h   | 42 
 4 files changed, 113 insertions(+), 38 deletions(-)
 create mode 100644 src/mesa/drivers/dri/i965/brw_blorp_blit_eu.cpp
 create mode 100644 src/mesa/drivers/dri/i965/brw_blorp_blit_eu.h

diff --git a/src/mesa/drivers/dri/i965/Makefile.sources 
b/src/mesa/drivers/dri/i965/Makefile.sources
index d0c85cf..a3fb417 100644
--- a/src/mesa/drivers/dri/i965/Makefile.sources
+++ b/src/mesa/drivers/dri/i965/Makefile.sources
@@ -30,6 +30,7 @@ i965_FILES = \
brw_binding_tables.c \
brw_blorp.cpp \
brw_blorp_blit.cpp \
+   brw_blorp_blit_eu.cpp \
brw_blorp_clear.cpp \
brw_cc.c \
brw_cfg.cpp \
diff --git a/src/mesa/drivers/dri/i965/brw_blorp_blit.cpp 
b/src/mesa/drivers/dri/i965/brw_blorp_blit.cpp
index 3b92c56..f9c355b 100644
--- a/src/mesa/drivers/dri/i965/brw_blorp_blit.cpp
+++ b/src/mesa/drivers/dri/i965/brw_blorp_blit.cpp
@@ -25,13 +25,11 @@
 #include main/fbobject.h
 #include main/renderbuffer.h
 
-#include glsl/ralloc.h
-
 #include intel_fbo.h
 
 #include brw_blorp.h
 #include brw_context.h
-#include brw_eu.h
+#include brw_blorp_blit_eu.h
 #include brw_state.h
 
 #define FILE_DEBUG_FLAG DEBUG_BLORP
@@ -624,12 +622,11 @@ enum sampler_message_arg
  * (In these formulas, pitch is the number of bytes occupied by a single row
  * of samples).
  */
-class brw_blorp_blit_program
+class brw_blorp_blit_program : public brw_blorp_eu_emitter
 {
 public:
brw_blorp_blit_program(struct brw_context *brw,
   const brw_blorp_blit_prog_key *key);
-   ~brw_blorp_blit_program();
 
const GLuint *compile(struct brw_context *brw, GLuint *program_size,
  FILE *dump_file = stdout);
@@ -668,10 +665,8 @@ private:
 */
static const unsigned LOG2_MAX_BLEND_SAMPLES = 3;
 
-   void *mem_ctx;
struct brw_context *brw;
const brw_blorp_blit_prog_key *key;
-   struct brw_compile func;
 
/* Thread dispatch header */
struct brw_reg R0;
@@ -745,16 +740,10 @@ private:
 brw_blorp_blit_program::brw_blorp_blit_program(
   struct brw_context *brw,
   const brw_blorp_blit_prog_key *key)
-   : mem_ctx(ralloc_context(NULL)),
+   : brw_blorp_eu_emitter(brw),
  brw(brw),
  key(key)
 {
-   brw_init_compile(brw, func, mem_ctx);
-}
-
-brw_blorp_blit_program::~brw_blorp_blit_program()
-{
-   ralloc_free(mem_ctx);
 }
 
 const GLuint *
@@ -806,21 +795,6 @@ brw_blorp_blit_program::compile(struct brw_context *brw,
memset(prog_data, 0, sizeof(prog_data));
prog_data.persample_msaa_dispatch = key-persample_msaa_dispatch;
 
-   /*
-* By default everything is emitted as 16-wide with only a few exceptions
-* handled explicitly either here in the compiler or by one of the specific
-* code emission calls.
-* It should be also noted that here in this file any alterations of the
-* compression control settings are only used to affect the execution size
-* of the instructions. The instruction template used to initialise all the
-* instructions is effectively not altered -- the value stays at zero
-* representing either GEN6_COMPRESSION_1Q or GEN6_COMPRESSION_1H depending
-* on the context.
-* If any other settings are used in the instruction headers, they are set
-* elsewhere by the individual code emission calls.
-*/
-   brw_set_compression_control(func, BRW_COMPRESSION_COMPRESSED);
-
alloc_regs();
compute_frag_coords();
 
@@ -928,14 +902,7 @@ brw_blorp_blit_program::compile(struct brw_context *brw,
 */
render_target_write();
 
-   brw_set_uip_jip(func);
-
-   if (unlikely(INTEL_DEBUG  DEBUG_BLORP)) {
-  printf(Native code for BLORP blit:\n);
-  brw_dump_compile(func, dump_file, 0, func.next_insn_offset);
-  printf(\n);
-   }
-   return brw_get_program(func, program_size);
+   return get_program(program_size, dump_file);
 }
 
 void
@@ -2385,7 +2352,7 @@ brw_blorp_blit_params::get_wm_prog(struct brw_context 
*brw,
  prog_offset, prog_data)) {
   brw_blorp_blit_program prog(brw, this-wm_prog_key);
   GLuint program_size;
-  const GLuint *program = prog.compile(brw, program_size);
+  const GLuint *program = prog.compile(brw, program_size, stdout);
   brw_upload_cache(brw-cache, BRW_BLORP_BLIT_PROG,
this-wm_prog_key, sizeof(this-wm_prog_key),

[Mesa-dev] [v2 08/23] i965/blorp: wrap emission of conditional assignment

2014-01-22 Thread Topi Pohjolainen

Signed-off-by: Topi Pohjolainen topi.pohjolai...@intel.com
Reviewed-by: Paul Berry stereotype...@gmail.com
---
 src/mesa/drivers/dri/i965/brw_blorp_blit.cpp  | 19 ---
 src/mesa/drivers/dri/i965/brw_blorp_blit_eu.h | 11 +++
 2 files changed, 15 insertions(+), 15 deletions(-)

diff --git a/src/mesa/drivers/dri/i965/brw_blorp_blit.cpp 
b/src/mesa/drivers/dri/i965/brw_blorp_blit.cpp
index b5f1907..356bb92 100644
--- a/src/mesa/drivers/dri/i965/brw_blorp_blit.cpp
+++ b/src/mesa/drivers/dri/i965/brw_blorp_blit.cpp
@@ -1444,21 +1444,10 @@ brw_blorp_blit_program::clamp_tex_coords(struct brw_reg 
regX,
  struct brw_reg clampX1,
  struct brw_reg clampY1)
 {
-   brw_CMP(func, vec16(brw_null_reg()), BRW_CONDITIONAL_L, regX, clampX0);
-   brw_MOV(func, regX, clampX0);
-   brw_set_predicate_control(func, BRW_PREDICATE_NONE);
-
-   brw_CMP(func, vec16(brw_null_reg()), BRW_CONDITIONAL_G, regX, clampX1);
-   brw_MOV(func, regX, clampX1);
-   brw_set_predicate_control(func, BRW_PREDICATE_NONE);
-
-   brw_CMP(func, vec16(brw_null_reg()), BRW_CONDITIONAL_L, regY, clampY0);
-   brw_MOV(func, regY, clampY0);
-   brw_set_predicate_control(func, BRW_PREDICATE_NONE);
-
-   brw_CMP(func, vec16(brw_null_reg()), BRW_CONDITIONAL_G, regY, clampY1);
-   brw_MOV(func, regY, clampY1);
-   brw_set_predicate_control(func, BRW_PREDICATE_NONE);
+   emit_cond_mov(regX, clampX0, BRW_CONDITIONAL_L, regX, clampX0);
+   emit_cond_mov(regX, clampX1, BRW_CONDITIONAL_G, regX, clampX1);
+   emit_cond_mov(regY, clampY0, BRW_CONDITIONAL_L, regY, clampY0);
+   emit_cond_mov(regY, clampY1, BRW_CONDITIONAL_G, regY, clampY1);
 }
 
 /**
diff --git a/src/mesa/drivers/dri/i965/brw_blorp_blit_eu.h 
b/src/mesa/drivers/dri/i965/brw_blorp_blit_eu.h
index 55e05f7..555b6d3 100644
--- a/src/mesa/drivers/dri/i965/brw_blorp_blit_eu.h
+++ b/src/mesa/drivers/dri/i965/brw_blorp_blit_eu.h
@@ -57,6 +57,17 @@ protected:
  const struct brw_reg src_1,
  const struct brw_reg src_2);
 
+   inline void emit_cond_mov(const struct brw_reg x,
+ const struct brw_reg y,
+ int op,
+ const struct brw_reg dst,
+ const struct brw_reg src)
+   {
+  brw_CMP(func, vec16(brw_null_reg()), op, x, y);
+  brw_MOV(func, dst, src);
+  brw_set_predicate_control(func, BRW_PREDICATE_NONE);
+   }
+
void *mem_ctx;
struct brw_compile func;
 };
-- 
1.8.3.1

___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/mesa-dev

[Mesa-dev] [v2 23/23] i965/blorp: switch eu-emitter to use FS IR and fs_generator

2014-01-22 Thread Topi Pohjolainen

No regressions on IVB (piglit quick + unit tests).

v2 (Paul):
  - no need to patch the unit tests anymore. Original logic
was altered and unit tests updated to match the
fs-generator
  - lrp emission moves from the blorp compiler core into the
emitter here (previously there was a separate refactoring
patch which is not really needed anymore as the lrp logic
got refactored when the original lrp logic got fixed).
  - pass 'BRW_BLORP_RENDERBUFFER_BINDING_TABLE_INDEX' to the
generator in fs_inst::target instead of hardcoding it

CC: Paul Berry stereotype...@gmail.com
Signed-off-by: Topi Pohjolainen topi.pohjolai...@intel.com
---
 src/mesa/drivers/dri/i965/brw_blorp_blit.cpp|  20 
 src/mesa/drivers/dri/i965/brw_blorp_blit_eu.cpp | 120 +---
 src/mesa/drivers/dri/i965/brw_blorp_blit_eu.h   |  64 -
 3 files changed, 84 insertions(+), 120 deletions(-)

diff --git a/src/mesa/drivers/dri/i965/brw_blorp_blit.cpp 
b/src/mesa/drivers/dri/i965/brw_blorp_blit.cpp
index 6454d2a..c4d1108 100644
--- a/src/mesa/drivers/dri/i965/brw_blorp_blit.cpp
+++ b/src/mesa/drivers/dri/i965/brw_blorp_blit.cpp
@@ -654,11 +654,6 @@ private:
const sampler_message_arg *args, int num_args);
void render_target_write();
 
-   void emit_lrp(const struct brw_reg dst,
- const struct brw_reg src1,
- const struct brw_reg src2,
- const struct brw_reg src3);
-
/**
 * Base-2 logarithm of the maximum number of samples that can be blended.
 */
@@ -1585,21 +1580,6 @@ brw_blorp_blit_program::manual_blend_average(unsigned 
num_samples)
 }
 
 void
-brw_blorp_blit_program::emit_lrp(const struct brw_reg dst,
- const struct brw_reg src1,
- const struct brw_reg src2,
- const struct brw_reg src3)
-{
-   brw_set_access_mode(func, BRW_ALIGN_16);
-   brw_set_compression_control(func, BRW_COMPRESSION_NONE);
-   brw_LRP(func, dst, src1, src2, src3);
-   brw_set_compression_control(func, BRW_COMPRESSION_2NDHALF);
-   brw_LRP(func, sechalf(dst), sechalf(src1), sechalf(src2), sechalf(src3));
-   brw_set_compression_control(func, BRW_COMPRESSION_COMPRESSED);
-   brw_set_access_mode(func, BRW_ALIGN_1);
-}
-
-void
 brw_blorp_blit_program::manual_blend_bilinear(unsigned num_samples)
 {
/* We do this computation by performing the following operations:
diff --git a/src/mesa/drivers/dri/i965/brw_blorp_blit_eu.cpp 
b/src/mesa/drivers/dri/i965/brw_blorp_blit_eu.cpp
index 9b63458..5b652ad 100644
--- a/src/mesa/drivers/dri/i965/brw_blorp_blit_eu.cpp
+++ b/src/mesa/drivers/dri/i965/brw_blorp_blit_eu.cpp
@@ -26,24 +26,9 @@
 #include brw_blorp.h
 
 brw_blorp_eu_emitter::brw_blorp_eu_emitter(struct brw_context *brw)
-   : mem_ctx(ralloc_context(NULL))
+   : mem_ctx(ralloc_context(NULL)), c(rzalloc(mem_ctx, struct brw_wm_compile)),
+ generator(brw, c, NULL, NULL, false)
 {
-   brw_init_compile(brw, func, mem_ctx);
-
-   /*
-* By default everything is emitted as 16-wide with only a few expections
-* handled explicitly either here in the compiler or by one of the specific
-* code emission calls.
-* It should be also noted that here in this file any alterations of the
-* compression control settings are only used to affect the execution size
-* of the instructions. The instruction template used to initialise all the
-* instructions is effectively not altered -- the value stays at zero
-* representing either GEN6_COMPRESSION_1Q or GEN6_COMPRESSION_1H depending
-* on the context.
-* If any other settings are used in the instruction headers, they are set
-* elsewhere by the individual code emission calls.
-*/
-   brw_set_compression_control(func, BRW_COMPRESSION_COMPRESSED);
 }
 
 brw_blorp_eu_emitter::~brw_blorp_eu_emitter()
@@ -54,15 +39,17 @@ brw_blorp_eu_emitter::~brw_blorp_eu_emitter()
 const unsigned *
 brw_blorp_eu_emitter::get_program(unsigned *program_size, FILE *dump_file)
 {
-   brw_set_uip_jip(func);
+   const unsigned *res;
 
if (unlikely(INTEL_DEBUG  DEBUG_BLORP)) {
   printf(Native code for BLORP blit:\n);
-  brw_dump_compile(func, dump_file, 0, func.next_insn_offset);
+  res = generator.generate_assembly(NULL, insts, program_size, dump_file);
   printf(\n);
+   } else {
+  res = generator.generate_assembly(NULL, insts, program_size);
}
 
-   return brw_get_program(func, program_size);
+   return res;
 }
 
 /**
@@ -80,17 +67,15 @@ brw_blorp_eu_emitter::emit_kill_if_outside_rect(const 
struct brw_reg x,
 {
struct brw_reg f0 = brw_flag_reg(0, 0);
struct brw_reg g1 = retype(brw_vec1_grf(1, 7), BRW_REGISTER_TYPE_UW);
-   struct brw_reg null32 = vec16(retype(brw_null_reg(), BRW_REGISTER_TYPE_UD));
 
-   brw_CMP(func, null32, BRW_CONDITIONAL_GE, x, dst_x0);
-   brw_CMP(func, null32, BRW_CONDITIONAL_GE, y, dst_y0);
-   brw_CMP(func, null32,

[Mesa-dev] [v2 21/23] i965/fs: introduce blorp specific rt-write for fs_generator

2014-01-22 Thread Topi Pohjolainen

The compiler for blorp programs likes to emit instructions for
the message construction itself meaning that the generator needs
to skip any such when blorp programs are translated for the hw.
In addition, the binding table control is special for blorp
programs and the generator does not need to update the binding
tables associated with the compiler bookkeeping (this in fact
gets thrown away as the blorp compiler sets the program data
in its own way).

v2 (Paul): do not hardcode the binding table index but use
   fs_inst::target instead.

Signed-off-by: Topi Pohjolainen topi.pohjolai...@intel.com
---
 src/mesa/drivers/dri/i965/brw_defines.h|  1 +
 src/mesa/drivers/dri/i965/brw_fs.h |  1 +
 src/mesa/drivers/dri/i965/brw_fs_generator.cpp | 19 +++
 src/mesa/drivers/dri/i965/brw_shader.cpp   |  2 ++
 4 files changed, 23 insertions(+)

diff --git a/src/mesa/drivers/dri/i965/brw_defines.h 
b/src/mesa/drivers/dri/i965/brw_defines.h
index 7beda72..7f4cd10 100644
--- a/src/mesa/drivers/dri/i965/brw_defines.h
+++ b/src/mesa/drivers/dri/i965/brw_defines.h
@@ -753,6 +753,7 @@ enum opcode {
 * instructions.
 */
FS_OPCODE_FB_WRITE = 128,
+   FS_OPCODE_BLORP_FB_WRITE,
SHADER_OPCODE_RCP,
SHADER_OPCODE_RSQ,
SHADER_OPCODE_SQRT,
diff --git a/src/mesa/drivers/dri/i965/brw_fs.h 
b/src/mesa/drivers/dri/i965/brw_fs.h
index 9984802..ea5de22 100644
--- a/src/mesa/drivers/dri/i965/brw_fs.h
+++ b/src/mesa/drivers/dri/i965/brw_fs.h
@@ -523,6 +523,7 @@ public:
 private:
void generate_code(exec_list *instructions, FILE *dump_file);
void generate_fb_write(fs_inst *inst);
+   void generate_blorp_fb_write(fs_inst *inst);
void generate_pixel_xy(struct brw_reg dst, bool is_x);
void generate_linterp(fs_inst *inst, struct brw_reg dst,
 struct brw_reg *src);
diff --git a/src/mesa/drivers/dri/i965/brw_fs_generator.cpp 
b/src/mesa/drivers/dri/i965/brw_fs_generator.cpp
index bcb47e6..29050c9 100644
--- a/src/mesa/drivers/dri/i965/brw_fs_generator.cpp
+++ b/src/mesa/drivers/dri/i965/brw_fs_generator.cpp
@@ -190,6 +190,21 @@ fs_generator::generate_fb_write(fs_inst *inst)
mark_surface_used(surf_index);
 }
 
+void
+fs_generator::generate_blorp_fb_write(fs_inst *inst)
+{
+   brw_fb_WRITE(p,
+16 /* dispatch_width */,
+inst-base_mrf,
+brw_reg_from_fs_reg(inst-src[0]),
+BRW_DATAPORT_RENDER_TARGET_WRITE_SIMD16_SINGLE_SOURCE,
+inst-target,
+inst-mlen,
+0,
+true,
+inst-header_present);
+}
+
 /* Computes the integer pixel x,y values from the origin.
  *
  * This is the basis of gl_FragCoord computation, but is also used
@@ -1714,6 +1729,10 @@ fs_generator::generate_code(exec_list *instructions, 
FILE *dump_file)
 generate_fb_write(inst);
 break;
 
+  case FS_OPCODE_BLORP_FB_WRITE:
+generate_blorp_fb_write(inst);
+break;
+
   case FS_OPCODE_MOV_DISPATCH_TO_FLAGS:
  generate_mov_dispatch_to_flags(inst);
  break;
diff --git a/src/mesa/drivers/dri/i965/brw_shader.cpp 
b/src/mesa/drivers/dri/i965/brw_shader.cpp
index b38032e..45bf730 100644
--- a/src/mesa/drivers/dri/i965/brw_shader.cpp
+++ b/src/mesa/drivers/dri/i965/brw_shader.cpp
@@ -406,6 +406,8 @@ brw_instruction_name(enum opcode op)
switch (op) {
case FS_OPCODE_FB_WRITE:
   return fb_write;
+   case FS_OPCODE_BLORP_FB_WRITE:
+  return blorp_fb_write;
 
case SHADER_OPCODE_RCP:
   return rcp;
-- 
1.8.3.1

___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/mesa-dev

[Mesa-dev] [PATCH RFC 00/11] glsl: add Single Static Assignment (SSA)

2014-01-22 Thread Connor Abbott

This series enables GLSL IR support for SSA, including passes to convert
to and from SSA form. SSA is a form of the intermediate representation
of a compiler in which each variable is assigned exactly once. SSA form
makes many optimizations faster and easier to write, and enables other
more powerful optimizations. SSA is used in GCC [1] and LLVM [2] as well
as various compiler backends within Mesa itself, such as r600g-sb and
Nouveau. Adding support for SSA will allow the various optimizations
these backends perform to be implemented in one place, instead of
making each driver reinvent the wheel (as several have already done).
Additionally, all new backends would recieve these optimizations,
reducing the burden of writing a compiler backend for a new driver.

Even though no optimization passes are now implemented, I am putting out
this series to solicit feedback on the design, to make sure I don't have
to rewrite things before I go ahead and write these new passes.

There are no piglit regressions on Softpipe, except for the
spec/OpenGL 2.0/max-samplers test, which only passed before because the
compiler happened to unroll the loop; the extra copies caused by the
conversion to and from SSA stop the compiler from unrolling, meaning
that the resulting GLSL IR code contains an indirect sampler index which
glsl-to-tgsi can't handle.

Patch 01 is a fix for a bug that came up while Piglit testing this
series.
Patches 02-06 are changes to GLSL IR that are not explicitly related to
enabling SSA, but which are needed by the later patches.
Patch 07 modifies the core GLSL IR support to allow it to represent
shaders in SSA form, and modifies the printer to print phi nodes and SSA
temporaries correctly.
Patch 08 adds a function that will come in handy in patch 09, as well as
later SSA-based optimizations.
Patch 09 adds the code to convert programs to SSA form.
Patch 10 adds the code to eliminate phi nodes and SSA temporaries,
undoing what the code in Patch 09 does.
Patch 11 allows us to Piglit test the series, and will get replaced once
some actual optimization passes are in place.

Some design choices that may need to be discussed:

- ir_variables in SSA form are now owned by the instruction where they
are defined, i.e. there are no seperate ir_variable declarations. This
is different from what the compiler currently assumes and requires a lot
of rework in different areas, but I thought it was justified for a
couple of different reasons:

1. In SSA form, usually variable dereferences point to the instruction
in which the variable is written to. Although doing this would be too
much of a rewrite, making variables owned by the instruction where they
are defined provides some of the benefit of this, making some
optimizations such as Global Code Motion [3] easier to write.

2. The original reason for having each ir_variable be declared before it
is read/written to was to preserve the tree structure of the IR by
making sure each ir_variable appeared as a child only once (i.e. in its
declaration). With SSA form, where variables are now written to once, it
makes sense for each variable to be a child of the one time it is
written to.

- The conversion from SSA is currently very naive and inserts many more
copies than necessary. It appears that the current copy propagation pass
is not able to remove many of those copies, especially in loops. It
seems there are a couple different options:

1. Implement Sreedhar's full algorithm; this requires that we implement
liveness analysis in GLSL IR.

2. Improve the current copy propagation pass to eliminate the copies it
can't handle.

3. Leave it alone, and require that backends remove the copies. i965
vec4 and fs backends, for example, already have a more sophisticated
register coalescing pass that does what we need to do, so i965 should be
fine with the extra copies.

Things that are left to do:

- Fixup ir_reader, fix the existing GLSL IR tests, and add more tests
for the conversion to/from SSA.

- Add more optimizations and convert over the existing optimizations.
Some optimizations need to be converted to use SSA, while others will be
replaced by a more powerful version. For example, Global Code Motion and
Global Value Numbering (GVN-GCM) [4] will replace constant propagation,
local value numbering, and some of the loop analysis framework while
being more powerful than all of those passes.

- As mentioned in the introduction, there are various drivers which
already use SSA. These drivers are all Gallium drivers, so it would make
sense to add support for SSA to TGSI so that the code isn't converted to
SSA twice (first in GLSL IR, then in the driver). Also, this would help
new drivers like freedreno that want to use SSA optimizations in their
backend. This may be more controversial, though, and it's outside of the
current scope of this work.

This series is also available at
https://github.com/cwabbott0/mesa/tree/glsl-ir-ssa-rfc

[1] http://gcc.gnu.org/onlinedocs/gccint/SSA.html
[2]

[Mesa-dev] [PATCH RFC 02/11] glsl: add as_loop_jump() method to ir_instruction

2014-01-22 Thread Connor Abbott

This will let us dynamically downcast to ir_loop_jump, which will be
needed later.
---
 src/glsl/ir.h | 6 ++
 1 file changed, 6 insertions(+)

diff --git a/src/glsl/ir.h b/src/glsl/ir.h
index 19e8383..d1e790d 100644
--- a/src/glsl/ir.h
+++ b/src/glsl/ir.h
@@ -138,6 +138,7 @@ public:
virtual class ir_constant *  as_constant() { return NULL; }
virtual class ir_discard *   as_discard()  { return NULL; }
virtual class ir_jump *  as_jump() { return NULL; }
+   virtual class ir_loop_jump * as_loop_jump(){ return NULL; }
/*@}*/
 
/**
@@ -1617,6 +1618,11 @@ public:
 
virtual ir_loop_jump *clone(void *mem_ctx, struct hash_table *) const;
 
+   virtual ir_loop_jump *as_loop_jump()
+   {
+  return this;
+   }
+
virtual void accept(ir_visitor *v)
{
   v-visit(this);
-- 
1.8.3.1

___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/mesa-dev

[Mesa-dev] [PATCH RFC 08/11] glsl: add ssa_assign() to ir_builder

2014-01-22 Thread Connor Abbott

ssa_assign() creates an SSA variable and assignment at the same
time. With this, simple sequences of SSA statements can be easily
created.
---
 src/glsl/ir_builder.cpp | 14 ++
 src/glsl/ir_builder.h   |  1 +
 2 files changed, 15 insertions(+)

diff --git a/src/glsl/ir_builder.cpp b/src/glsl/ir_builder.cpp
index 2229cd2..03cb38f 100644
--- a/src/glsl/ir_builder.cpp
+++ b/src/glsl/ir_builder.cpp
@@ -76,6 +76,20 @@ assign(deref lhs, operand rhs, operand condition)
return assign(lhs, rhs, condition, (1  lhs.val-type-vector_elements) - 
1);
 }
 
+ir_assignment *
+ssa_assign(const char *name, operand rhs)
+{
+   void *mem_ctx = ralloc_parent(rhs.val);
+
+   ir_variable *var = new(mem_ctx) ir_variable(rhs.val-type, name,
+  ir_var_temporary_ssa);
+
+   ir_assignment *ret = assign(var, rhs);
+   var-ssa_assignment = ret;
+   return ret;
+}
+
+
 ir_return *
 ret(operand retval)
 {
diff --git a/src/glsl/ir_builder.h b/src/glsl/ir_builder.h
index ae10995..415112b 100644
--- a/src/glsl/ir_builder.h
+++ b/src/glsl/ir_builder.h
@@ -124,6 +124,7 @@ ir_assignment *assign(deref lhs, operand rhs);
 ir_assignment *assign(deref lhs, operand rhs, int writemask);
 ir_assignment *assign(deref lhs, operand rhs, operand condition);
 ir_assignment *assign(deref lhs, operand rhs, operand condition, int 
writemask);
+ir_assignment *ssa_assign(const char *name, operand rhs);
 
 ir_return *ret(operand retval);
 
-- 
1.8.3.1

___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/mesa-dev

[Mesa-dev] [PATCH RFC 11/11] glsl: convert to and from SSA form in the compiler

2014-01-22 Thread Connor Abbott

This patch is mainly for allowing me to test these changes with piglit.
In the future, a do_ssa_optimizations() function will need to be
created and used by this code, as well as all other users of
do_common_optimizations().
---
 src/glsl/glsl_parser_extras.cpp | 4 
 1 file changed, 4 insertions(+)

diff --git a/src/glsl/glsl_parser_extras.cpp b/src/glsl/glsl_parser_extras.cpp
index 9a1e0a2..fc88a4b 100644
--- a/src/glsl/glsl_parser_extras.cpp
+++ b/src/glsl/glsl_parser_extras.cpp
@@ -1488,6 +1488,10 @@ _mesa_glsl_compile_shader(struct gl_context *ctx, struct 
gl_shader *shader,
   struct gl_shader_compiler_options *options =
  ctx-ShaderCompilerOptions[shader-Stage];
 
+  convert_to_ssa(shader-ir);
+
+  convert_from_ssa(shader-ir);
+
   /* Do some optimization at compile time to reduce shader IR size
* and reduce later work if the same shader is linked multiple times
*/
-- 
1.8.3.1

___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/mesa-dev

Re: [Mesa-dev] [PATCH 1/2] mesa: Ensure that transform feedback refers to the correct program.

2014-01-22 Thread Paul Berry

On 22 January 2014 08:20, Kenneth Graunke kenn...@whitecape.org wrote:

 On 01/22/2014 06:07 AM, Paul Berry wrote:
  @@ -376,25 +376,48 @@ _mesa_compute_max_transform_feedback_vertices(
**/
 
 
  +/**
  + * Figure out which stage of the pipeline is the source of transform
 feedback
  + * data given the current context state, and return its
 gl_shader_program.
  + *
  + * If no active program can generate transform feedback data (i.e. no
 vertex
  + * shader is active), returns NULL.
  + */
  +static struct gl_shader_program *
  +get_xfb_source(struct gl_context *ctx)
  +{
  +   int i;
  +   for (i = MESA_SHADER_FRAGMENT - 1; i = MESA_SHADER_VERTEX; i--) {

 I think this would be clearer as:

 for (i = MESA_SHADER_GEOMETRY; i = MESA_SHADER_VERTEX; i--) {
...
 }

 Note that the pipeline ordering is:
 Vertex - Tess. Control - Tess. Eval - Geometry - Transform Feedback
 (http://www.opengl.org/wiki/Rendering_Pipeline_Overview)

 So either implementation would work even with tessellation shaders.

 Either way, this series is:
 Reviewed-by: Kenneth Graunke kenn...@whitecape.org
 Cc: 10.0 mesa-sta...@lists.freedesktop.org


That's a good point--I like your suggestion.  Thanks for the review!
___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/mesa-dev

Re: [Mesa-dev] [v2 23/23] i965/blorp: switch eu-emitter to use FS IR and fs_generator

2014-01-22 Thread Paul Berry

On 22 January 2014 09:17, Topi Pohjolainen topi.pohjolai...@intel.comwrote:

 No regressions on IVB (piglit quick + unit tests).

 v2 (Paul):
   - no need to patch the unit tests anymore. Original logic
 was altered and unit tests updated to match the
 fs-generator
   - lrp emission moves from the blorp compiler core into the
 emitter here (previously there was a separate refactoring
 patch which is not really needed anymore as the lrp logic
 got refactored when the original lrp logic got fixed).
   - pass 'BRW_BLORP_RENDERBUFFER_BINDING_TABLE_INDEX' to the
 generator in fs_inst::target instead of hardcoding it

 CC: Paul Berry stereotype...@gmail.com
 Signed-off-by: Topi Pohjolainen topi.pohjolai...@intel.com


Thanks, Topi.  The whole series is now:

Reviewed-by: Paul Berry stereotype...@gmail.com
___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/mesa-dev

Re: [Mesa-dev] [PATCH 1/4] radeon / r200: Fix incompatible pointer type warning

2014-01-22 Thread Alex Deucher

On Mon, Jan 20, 2014 at 2:26 PM, Ian Romanick i...@freedesktop.org wrote:
 From: Ian Romanick ian.d.roman...@intel.com

 When parameters were removed from dd_function_table::Viewport (commit
 065bd6ff), radeon_viewport (in both radeon and r200) started generating
 a warning.

 radeon_common.c: In function 'r200_radeon_viewport':
 radeon_common.c:415:15: warning: assignment from incompatible pointer type 
 [enabled by default]
 radeon_common.c:419:23: warning: assignment from incompatible pointer type 
 [enabled by default]

 I didn't notice this initially, and it's harmless because the function is
 never called through the incorrectly typed pointer.

 Signed-off-by: Ian Romanick ian.d.roman...@intel.com
 Cc: Alex Deucher alexander.deuc...@amd.com
 Cc: Marek Olšák marek.ol...@amd.com

For the series:

Reviewed-by: Alex Deucher alexander.deuc...@amd.com


 ---
  src/mesa/drivers/dri/radeon/radeon_common.c | 3 +--
  1 file changed, 1 insertion(+), 2 deletions(-)

 diff --git a/src/mesa/drivers/dri/radeon/radeon_common.c 
 b/src/mesa/drivers/dri/radeon/radeon_common.c
 index 2df4388..5c2b823 100644
 --- a/src/mesa/drivers/dri/radeon/radeon_common.c
 +++ b/src/mesa/drivers/dri/radeon/radeon_common.c
 @@ -400,8 +400,7 @@ void radeon_viewport(struct gl_context *ctx)
  {
 radeonContextPtr radeon = RADEON_CONTEXT(ctx);
 __DRIcontext *driContext = radeon-dri.context;
 -   void (*old_viewport)(struct gl_context *ctx, GLint x, GLint y,
 -GLsizei w, GLsizei h);
 +   void (*old_viewport)(struct gl_context *ctx);

 if (_mesa_is_winsys_fbo(ctx-DrawBuffer)) {
 if (radeon-is_front_buffer_rendering) {
 --
 1.8.1.4

 ___
 mesa-dev mailing list
 mesa-dev@lists.freedesktop.org
 http://lists.freedesktop.org/mailman/listinfo/mesa-dev
___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/mesa-dev

Re: [Mesa-dev] [PATCH V3 7/8] glsl: remove remaining is_array variables

2014-01-22 Thread Paul Berry

On 22 January 2014 03:33, Timothy Arceri t_arc...@yahoo.com.au wrote:

 Previously the reason we needed is_array was because we used array_size ==
 NULL to
 represent both non-arrays and unsized arrays.  Now that we use a non-NULL
 array_specifier to represent an unsized array, is_array is redundant.

 Signed-off-by: Timothy Arceri t_arc...@yahoo.com.au
 ---
  src/glsl/ast.h  | 32 +++-
  src/glsl/ast_to_hir.cpp | 10 +-
  src/glsl/ast_type.cpp   |  6 ++
  src/glsl/glsl_parser.yy | 30 +++---
  src/glsl/glsl_parser_extras.cpp | 15 ++-
  5 files changed, 35 insertions(+), 58 deletions(-)


When I try to build this patch I get the compile error:

  CXX  ast_to_hir.lo
../../src/glsl/ast_to_hir.cpp: In member function 'virtual ir_rvalue*
ast_declarator_list::hir(exec_list*, _mesa_glsl_parse_state*)':
../../src/glsl/ast_to_hir.cpp:2877:12: error: 'class ast_declaration' has
no member named 'is_array'
   assert(!decl-is_array);
^

I'm guessing you missed this because you are building in release mode, so
asserts didn't get compiled.  If that's the case, I'd like to encourage you
to do a debug build and double-check that piglit tests still pass.  You can
do that by adding the --enable-debug option when you run ./autogen.sh.


With that fixed, the series is:

Reviewed-by: Paul Berry stereotype...@gmail.com

I'll try to push my patch glsl: Simplify aggregate type inference to
prepare for ARB_arrays_of_arrays. within the next hour.  Do you have
commit access to Mesa or would you like me to push the series for you?
___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/mesa-dev

[Mesa-dev] Can we commit the Spanish and Catalan translations?

2014-01-22 Thread Alex Henrie

Hi,

It's been a week and no new concerns have been raised with the
proposed Spanish and Catalan translations:

http://lists.freedesktop.org/archives/mesa-dev/2014-January/051610.html
http://lists.freedesktop.org/archives/mesa-dev/2014-January/051611.html
http://lists.freedesktop.org/archives/mesa-dev/2014-January/051612.html

Can they be committed now?

-Alex
___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/mesa-dev

[Mesa-dev] [PATCH] i965: Replace *_generator::shader with is_glsl boolean.

2014-01-22 Thread Paul Berry

The shader field in fs_generator, vec4_generator, and gen8_generator
was only used for one purpose; to figure out if we were compiling an
assembly shader or a GLSL shader.  And it wasn't being used properly:
in vec4 shaders we were always initializing it based on
prog-_LinkedShaders[MESA_SHADER_FRAGMENT], regardless of whether we
were compiling a geometry shader or a vertex shader.

This was a fairly benign problem, since it's unlikely that a
real-world program will try to mix and match GLSL and assembly shaders
using separate shader objects.  But it seems worth fixing.

This patch replaces the shader field with a new is_glsl boolean, and
initializes it based on information from the caller, so that it always
refers to the correct shader stage.
---
 src/mesa/drivers/dri/i965/brw_fs.cpp  |  6 --
 src/mesa/drivers/dri/i965/brw_fs.h|  8 +---
 src/mesa/drivers/dri/i965/brw_fs_generator.cpp| 12 ++--
 src/mesa/drivers/dri/i965/brw_vec4.cpp|  4 ++--
 src/mesa/drivers/dri/i965/brw_vec4.h  |  8 +---
 src/mesa/drivers/dri/i965/brw_vec4_generator.cpp  | 11 +--
 src/mesa/drivers/dri/i965/brw_vec4_gs_visitor.cpp |  7 +--
 src/mesa/drivers/dri/i965/gen8_fs_generator.cpp   | 13 ++---
 src/mesa/drivers/dri/i965/gen8_generator.cpp  |  6 --
 src/mesa/drivers/dri/i965/gen8_generator.h|  5 +++--
 src/mesa/drivers/dri/i965/gen8_vec4_generator.cpp | 10 +-
 11 files changed, 50 insertions(+), 40 deletions(-)

diff --git a/src/mesa/drivers/dri/i965/brw_fs.cpp 
b/src/mesa/drivers/dri/i965/brw_fs.cpp
index a0e4830..c0d65d5 100644
--- a/src/mesa/drivers/dri/i965/brw_fs.cpp
+++ b/src/mesa/drivers/dri/i965/brw_fs.cpp
@@ -3512,11 +3512,13 @@ brw_wm_fs_emit(struct brw_context *brw, struct 
brw_wm_compile *c,
 
const unsigned *assembly = NULL;
if (brw-gen = 8) {
-  gen8_fs_generator g(brw, c, prog, fp, v.dual_src_output.file != 
BAD_FILE);
+  gen8_fs_generator g(brw, c, prog, fp, v.dual_src_output.file != BAD_FILE,
+  shader != NULL);
   assembly = g.generate_assembly(v.instructions, simd16_instructions,
  final_assembly_size);
} else {
-  fs_generator g(brw, c, prog, fp, v.dual_src_output.file != BAD_FILE);
+  fs_generator g(brw, c, prog, fp, v.dual_src_output.file != BAD_FILE,
+ shader != NULL);
   assembly = g.generate_assembly(v.instructions, simd16_instructions,
  final_assembly_size);
}
diff --git a/src/mesa/drivers/dri/i965/brw_fs.h 
b/src/mesa/drivers/dri/i965/brw_fs.h
index a903908..ad0aa99 100644
--- a/src/mesa/drivers/dri/i965/brw_fs.h
+++ b/src/mesa/drivers/dri/i965/brw_fs.h
@@ -512,7 +512,8 @@ public:
 struct brw_wm_compile *c,
 struct gl_shader_program *prog,
 struct gl_fragment_program *fp,
-bool dual_source_output);
+bool dual_source_output,
+bool is_glsl);
~fs_generator();
 
const unsigned *generate_assembly(exec_list *simd8_instructions,
@@ -615,7 +616,6 @@ private:
struct brw_wm_compile *c;
 
struct gl_shader_program *prog;
-   struct gl_shader *shader;
const struct gl_fragment_program *fp;
 
unsigned dispatch_width; /** 8 or 16 */
@@ -623,6 +623,7 @@ private:
exec_list discard_halt_patches;
bool dual_source_output;
void *mem_ctx;
+   const bool is_glsl;
 };
 
 /**
@@ -637,7 +638,8 @@ public:
  struct brw_wm_compile *c,
  struct gl_shader_program *prog,
  struct gl_fragment_program *fp,
- bool dual_source_output);
+ bool dual_source_output,
+ bool is_glsl);
~gen8_fs_generator();
 
const unsigned *generate_assembly(exec_list *simd8_instructions,
diff --git a/src/mesa/drivers/dri/i965/brw_fs_generator.cpp 
b/src/mesa/drivers/dri/i965/brw_fs_generator.cpp
index e701fc5..a8e81b8 100644
--- a/src/mesa/drivers/dri/i965/brw_fs_generator.cpp
+++ b/src/mesa/drivers/dri/i965/brw_fs_generator.cpp
@@ -40,14 +40,14 @@ fs_generator::fs_generator(struct brw_context *brw,
struct brw_wm_compile *c,
struct gl_shader_program *prog,
struct gl_fragment_program *fp,
-   bool dual_source_output)
+   bool dual_source_output,
+   bool is_glsl)
 
-   : brw(brw), c(c), prog(prog), fp(fp), dual_source_output(dual_source_output)
+   : brw(brw), c(c), prog(prog), fp(fp),
+ dual_source_output(dual_source_output), is_glsl(is_glsl)
 {
ctx = brw-ctx;
 
-   shader = prog ? prog-_LinkedShaders[MESA_SHADER_FRAGMENT] : NULL;
-
mem_ctx = c;
 
p = rzalloc(mem_ctx, struct brw_compile);
@@ -1301,7 +1301,7 @@ fs_generator::generate_code(exec_list *instructions)

[Mesa-dev] [PATCH 02/22] r600g: only emit NOP relocations for queries if VM is disabled

2014-01-22 Thread Marek Olšák

From: Marek Olšák marek.ol...@amd.com

---
 src/gallium/drivers/radeon/r600_cs.h| 14 ++
 src/gallium/drivers/radeon/r600_query.c | 14 +-
 src/gallium/drivers/radeon/r600_streamout.c | 14 --
 3 files changed, 19 insertions(+), 23 deletions(-)

diff --git a/src/gallium/drivers/radeon/r600_cs.h 
b/src/gallium/drivers/radeon/r600_cs.h
index fa749da..c3af3de 100644
--- a/src/gallium/drivers/radeon/r600_cs.h
+++ b/src/gallium/drivers/radeon/r600_cs.h
@@ -66,6 +66,20 @@ static INLINE unsigned r600_context_bo_reloc(struct 
r600_common_context *rctx,
return rctx-ws-cs_add_reloc(ring-cs, rbo-cs_buf, usage, 
rbo-domains) * 4;
 }
 
+static INLINE void r600_emit_reloc(struct r600_common_context *rctx,
+  struct r600_ring *ring, struct r600_resource 
*rbo,
+  enum radeon_bo_usage usage)
+{
+   struct radeon_winsys_cs *cs = ring-cs;
+   bool has_vm = ((struct 
r600_common_screen*)rctx-b.screen)-info.r600_virtual_address;
+   unsigned reloc = r600_context_bo_reloc(rctx, ring, rbo, usage);
+
+   if (!has_vm) {
+   radeon_emit(cs, PKT3(PKT3_NOP, 0, 0));
+   radeon_emit(cs, reloc);
+   }
+}
+
 static INLINE void r600_write_config_reg_seq(struct radeon_winsys_cs *cs, 
unsigned reg, unsigned num)
 {
assert(reg  R600_CONTEXT_REG_OFFSET);
diff --git a/src/gallium/drivers/radeon/r600_query.c 
b/src/gallium/drivers/radeon/r600_query.c
index ea9ad11..14e8427 100644
--- a/src/gallium/drivers/radeon/r600_query.c
+++ b/src/gallium/drivers/radeon/r600_query.c
@@ -206,8 +206,7 @@ static void r600_emit_query_begin(struct 
r600_common_context *ctx, struct r600_q
default:
assert(0);
}
-   cs-buf[cs-cdw++] = PKT3(PKT3_NOP, 0, 0);
-   cs-buf[cs-cdw++] = r600_context_bo_reloc(ctx, ctx-rings.gfx, 
query-buffer.buf, RADEON_USAGE_WRITE);
+   r600_emit_reloc(ctx, ctx-rings.gfx, query-buffer.buf, 
RADEON_USAGE_WRITE);
 
if (!r600_is_timer_query(query-type)) {
ctx-num_cs_dw_nontimer_queries_suspend += query-num_cs_dw;
@@ -272,8 +271,7 @@ static void r600_emit_query_end(struct r600_common_context 
*ctx, struct r600_que
default:
assert(0);
}
-   cs-buf[cs-cdw++] = PKT3(PKT3_NOP, 0, 0);
-   cs-buf[cs-cdw++] = r600_context_bo_reloc(ctx, ctx-rings.gfx, 
query-buffer.buf, RADEON_USAGE_WRITE);
+   r600_emit_reloc(ctx, ctx-rings.gfx, query-buffer.buf, 
RADEON_USAGE_WRITE);
 
query-buffer.results_end += query-result_size;
 
@@ -322,8 +320,7 @@ static void r600_emit_query_predication(struct 
r600_common_context *ctx, struct
cs-buf[cs-cdw++] = PKT3(PKT3_SET_PREDICATION, 
1, 0);
cs-buf[cs-cdw++] = (va + results_base)  
0xUL;
cs-buf[cs-cdw++] = op | (((va + results_base) 
 32UL)  0xFF);
-   cs-buf[cs-cdw++] = PKT3(PKT3_NOP, 0, 0);
-   cs-buf[cs-cdw++] = r600_context_bo_reloc(ctx, 
ctx-rings.gfx, qbuf-buf, RADEON_USAGE_READ);
+   r600_emit_reloc(ctx, ctx-rings.gfx, 
qbuf-buf, RADEON_USAGE_READ);
results_base += query-result_size;
 
/* set CONTINUE bit for all packets except the 
first */
@@ -818,10 +815,9 @@ void r600_query_init_backend_mask(struct 
r600_common_context *ctx)
cs-buf[cs-cdw++] = PKT3(PKT3_EVENT_WRITE, 2, 0);
cs-buf[cs-cdw++] = EVENT_TYPE(EVENT_TYPE_ZPASS_DONE) | 
EVENT_INDEX(1);
cs-buf[cs-cdw++] = va;
-   cs-buf[cs-cdw++] = (va  32UL)  0xFF;
+   cs-buf[cs-cdw++] = va  32;
 
-   cs-buf[cs-cdw++] = PKT3(PKT3_NOP, 0, 0);
-   cs-buf[cs-cdw++] = r600_context_bo_reloc(ctx, 
ctx-rings.gfx, buffer, RADEON_USAGE_WRITE);
+   r600_emit_reloc(ctx, ctx-rings.gfx, buffer, 
RADEON_USAGE_WRITE);
 
/* analyze results */
results = r600_buffer_map_sync_with_rings(ctx, buffer, 
PIPE_TRANSFER_READ);
diff --git a/src/gallium/drivers/radeon/r600_streamout.c 
b/src/gallium/drivers/radeon/r600_streamout.c
index 0f65de4..adc11e0 100644
--- a/src/gallium/drivers/radeon/r600_streamout.c
+++ b/src/gallium/drivers/radeon/r600_streamout.c
@@ -199,20 +199,6 @@ static void evergreen_set_streamout_enable(struct 
r600_common_context *rctx, uns
}
 }
 
-static void r600_emit_reloc(struct r600_common_context *rctx,
-   struct r600_ring *ring, struct r600_resource *rbo,
-   enum radeon_bo_usage usage)
-{
-   struct radeon_winsys_cs *cs = ring-cs;
-   bool has_vm = ((struct 
r600_common_screen*)rctx-b.screen)-info.r600_virtual_address;
-   unsigned reloc = r600_context_bo_reloc(rctx, ring, rbo, usage);
-
-   if (!has_vm) {
-

[Mesa-dev] [PATCH 10/22] r600g, radeonsi: consolidate get_timestamp, get_driver_query_info

2014-01-22 Thread Marek Olšák

From: Marek Olšák marek.ol...@amd.com

This enables more queries for the Gallium HUD with radeonsi.
---
 src/gallium/drivers/r600/r600_pipe.c  | 32 --
 src/gallium/drivers/radeon/r600_pipe_common.c | 33 +++
 src/gallium/drivers/radeonsi/si_pipe.c|  9 
 src/gallium/drivers/radeonsi/si_state_draw.c  |  1 +
 4 files changed, 34 insertions(+), 41 deletions(-)

diff --git a/src/gallium/drivers/r600/r600_pipe.c 
b/src/gallium/drivers/r600/r600_pipe.c
index 30cf8c4..182a122 100644
--- a/src/gallium/drivers/r600/r600_pipe.c
+++ b/src/gallium/drivers/r600/r600_pipe.c
@@ -750,36 +750,6 @@ static void r600_destroy_screen(struct pipe_screen* 
pscreen)
FREE(rscreen);
 }
 
-static uint64_t r600_get_timestamp(struct pipe_screen *screen)
-{
-   struct r600_screen *rscreen = (struct r600_screen*)screen;
-
-   return 100 * rscreen-b.ws-query_value(rscreen-b.ws, 
RADEON_TIMESTAMP) /
-   rscreen-b.info.r600_clock_crystal_freq;
-}
-
-static int r600_get_driver_query_info(struct pipe_screen *screen,
- unsigned index,
- struct pipe_driver_query_info *info)
-{
-   struct r600_screen *rscreen = (struct r600_screen*)screen;
-   struct pipe_driver_query_info list[] = {
-   {draw-calls, R600_QUERY_DRAW_CALLS, 0},
-   {requested-VRAM, R600_QUERY_REQUESTED_VRAM, 
rscreen-b.info.vram_size, TRUE},
-   {requested-GTT, R600_QUERY_REQUESTED_GTT, 
rscreen-b.info.gart_size, TRUE},
-   {buffer-wait-time, R600_QUERY_BUFFER_WAIT_TIME, 0, FALSE}
-   };
-
-   if (!info)
-   return Elements(list);
-
-   if (index = Elements(list))
-   return 0;
-
-   *info = list[index];
-   return 1;
-}
-
 static struct pipe_resource *r600_resource_create(struct pipe_screen *screen,
  const struct pipe_resource 
*templ)
 {
@@ -807,13 +777,11 @@ struct pipe_screen *r600_screen_create(struct 
radeon_winsys *ws)
rscreen-b.b.get_shader_param = r600_get_shader_param;
rscreen-b.b.get_paramf = r600_get_paramf;
rscreen-b.b.get_compute_param = r600_get_compute_param;
-   rscreen-b.b.get_timestamp = r600_get_timestamp;
if (rscreen-b.info.chip_class = EVERGREEN) {
rscreen-b.b.is_format_supported = 
evergreen_is_format_supported;
} else {
rscreen-b.b.is_format_supported = r600_is_format_supported;
}
-   rscreen-b.b.get_driver_query_info = r600_get_driver_query_info;
if (rscreen-b.info.has_uvd) {
rscreen-b.b.get_video_param = ruvd_get_video_param;
rscreen-b.b.is_video_format_supported = 
ruvd_is_format_supported;
diff --git a/src/gallium/drivers/radeon/r600_pipe_common.c 
b/src/gallium/drivers/radeon/r600_pipe_common.c
index cabc6ef..ccd27fd 100644
--- a/src/gallium/drivers/radeon/r600_pipe_common.c
+++ b/src/gallium/drivers/radeon/r600_pipe_common.c
@@ -27,6 +27,7 @@
 #include r600_pipe_common.h
 #include r600_cs.h
 #include tgsi/tgsi_parse.h
+#include util/u_memory.h
 #include util/u_format_s3tc.h
 #include util/u_upload_mgr.h
 #include inttypes.h
@@ -188,6 +189,36 @@ static const char* r600_get_name(struct pipe_screen* 
pscreen)
}
 }
 
+static uint64_t r600_get_timestamp(struct pipe_screen *screen)
+{
+   struct r600_common_screen *rscreen = (struct r600_common_screen*)screen;
+
+   return 100 * rscreen-ws-query_value(rscreen-ws, 
RADEON_TIMESTAMP) /
+   rscreen-info.r600_clock_crystal_freq;
+}
+
+static int r600_get_driver_query_info(struct pipe_screen *screen,
+ unsigned index,
+ struct pipe_driver_query_info *info)
+{
+   struct r600_common_screen *rscreen = (struct r600_common_screen*)screen;
+   struct pipe_driver_query_info list[] = {
+   {draw-calls, R600_QUERY_DRAW_CALLS, 0},
+   {requested-VRAM, R600_QUERY_REQUESTED_VRAM, 
rscreen-info.vram_size, TRUE},
+   {requested-GTT, R600_QUERY_REQUESTED_GTT, 
rscreen-info.gart_size, TRUE},
+   {buffer-wait-time, R600_QUERY_BUFFER_WAIT_TIME, 0, FALSE}
+   };
+
+   if (!info)
+   return Elements(list);
+
+   if (index = Elements(list))
+   return 0;
+
+   *info = list[index];
+   return 1;
+}
+
 static void r600_fence_reference(struct pipe_screen *screen,
 struct pipe_fence_handle **ptr,
 struct pipe_fence_handle *fence)
@@ -343,6 +374,8 @@ bool r600_common_screen_init(struct r600_common_screen 
*rscreen,
 
rscreen-b.get_name = r600_get_name;
rscreen-b.get_vendor = r600_get_vendor;
+   rscreen-b.get_driver_query_info = r600_get_driver_query_info;
+   rscreen-b.get_timestamp =

[Mesa-dev] [PATCH 12/22] r600g, radeonsi: consolidate get_paramf and get_video_param

2014-01-22 Thread Marek Olšák

From: Marek Olšák marek.ol...@amd.com

radeonsi now reports PIPE_VIDEO_CAP_SUPPORTS_PROGRESSIVE = true if UVD support
isn't available. It's what all the other drivers do.

Also, some #include directives were missing in radeon_uvd.h.
---
 src/gallium/drivers/r600/r600_pipe.c  | 64 -
 src/gallium/drivers/radeon/r600_pipe_common.c | 67 +++
 src/gallium/drivers/radeon/radeon_uvd.h   |  3 ++
 src/gallium/drivers/radeonsi/si_pipe.c| 54 -
 4 files changed, 70 insertions(+), 118 deletions(-)

diff --git a/src/gallium/drivers/r600/r600_pipe.c 
b/src/gallium/drivers/r600/r600_pipe.c
index e2d97e5..aa6ebc0 100644
--- a/src/gallium/drivers/r600/r600_pipe.c
+++ b/src/gallium/drivers/r600/r600_pipe.c
@@ -445,34 +445,6 @@ static int r600_get_param(struct pipe_screen* pscreen, 
enum pipe_cap param)
return 0;
 }
 
-static float r600_get_paramf(struct pipe_screen* pscreen,
-enum pipe_capf param)
-{
-   struct r600_screen *rscreen = (struct r600_screen *)pscreen;
-   enum radeon_family family = rscreen-b.family;
-
-   switch (param) {
-   case PIPE_CAPF_MAX_LINE_WIDTH:
-   case PIPE_CAPF_MAX_LINE_WIDTH_AA:
-   case PIPE_CAPF_MAX_POINT_WIDTH:
-   case PIPE_CAPF_MAX_POINT_WIDTH_AA:
-   if (family = CHIP_CEDAR)
-   return 16384.0f;
-   else
-   return 8192.0f;
-   case PIPE_CAPF_MAX_TEXTURE_ANISOTROPY:
-   return 16.0f;
-   case PIPE_CAPF_MAX_TEXTURE_LOD_BIAS:
-   return 16.0f;
-   case PIPE_CAPF_GUARD_BAND_LEFT:
-   case PIPE_CAPF_GUARD_BAND_TOP:
-   case PIPE_CAPF_GUARD_BAND_RIGHT:
-   case PIPE_CAPF_GUARD_BAND_BOTTOM:
-   return 0.0f;
-   }
-   return 0.0f;
-}
-
 static int r600_get_shader_param(struct pipe_screen* pscreen, unsigned shader, 
enum pipe_shader_cap param)
 {
switch(shader)
@@ -536,34 +508,6 @@ static int r600_get_shader_param(struct pipe_screen* 
pscreen, unsigned shader, e
return 0;
 }
 
-static int r600_get_video_param(struct pipe_screen *screen,
-   enum pipe_video_profile profile,
-   enum pipe_video_entrypoint entrypoint,
-   enum pipe_video_cap param)
-{
-   switch (param) {
-   case PIPE_VIDEO_CAP_SUPPORTED:
-   return vl_profile_supported(screen, profile, entrypoint);
-   case PIPE_VIDEO_CAP_NPOT_TEXTURES:
-   return 1;
-   case PIPE_VIDEO_CAP_MAX_WIDTH:
-   case PIPE_VIDEO_CAP_MAX_HEIGHT:
-   return vl_video_buffer_max_size(screen);
-   case PIPE_VIDEO_CAP_PREFERED_FORMAT:
-   return PIPE_FORMAT_NV12;
-   case PIPE_VIDEO_CAP_PREFERS_INTERLACED:
-   return false;
-   case PIPE_VIDEO_CAP_SUPPORTS_INTERLACED:
-   return false;
-   case PIPE_VIDEO_CAP_SUPPORTS_PROGRESSIVE:
-   return true;
-   case PIPE_VIDEO_CAP_MAX_LEVEL:
-   return vl_level_supported(screen, profile);
-   default:
-   return 0;
-   }
-}
-
 const char * r600_llvm_gpu_string(enum radeon_family family)
 {
const char * gpu_family;
@@ -775,20 +719,12 @@ struct pipe_screen *r600_screen_create(struct 
radeon_winsys *ws)
rscreen-b.b.destroy = r600_destroy_screen;
rscreen-b.b.get_param = r600_get_param;
rscreen-b.b.get_shader_param = r600_get_shader_param;
-   rscreen-b.b.get_paramf = r600_get_paramf;
rscreen-b.b.get_compute_param = r600_get_compute_param;
if (rscreen-b.info.chip_class = EVERGREEN) {
rscreen-b.b.is_format_supported = 
evergreen_is_format_supported;
} else {
rscreen-b.b.is_format_supported = r600_is_format_supported;
}
-   if (rscreen-b.info.has_uvd) {
-   rscreen-b.b.get_video_param = ruvd_get_video_param;
-   rscreen-b.b.is_video_format_supported = 
ruvd_is_format_supported;
-   } else {
-   rscreen-b.b.get_video_param = r600_get_video_param;
-   rscreen-b.b.is_video_format_supported = 
vl_video_buffer_is_format_supported;
-   }
rscreen-b.b.resource_create = r600_resource_create;
 
if (!r600_common_screen_init(rscreen-b, ws)) {
diff --git a/src/gallium/drivers/radeon/r600_pipe_common.c 
b/src/gallium/drivers/radeon/r600_pipe_common.c
index ccd27fd..7447eea 100644
--- a/src/gallium/drivers/radeon/r600_pipe_common.c
+++ b/src/gallium/drivers/radeon/r600_pipe_common.c
@@ -24,12 +24,15 @@
  *
  */
 
+#include radeon/radeon_uvd.h
 #include r600_pipe_common.h
 #include r600_cs.h
 #include tgsi/tgsi_parse.h
 #include util/u_memory.h
 #include util/u_format_s3tc.h
 #include util/u_upload_mgr.h
+#include vl/vl_decoder.h
+#include vl/vl_video_buffer.h
 #include inttypes.h
 
 /*
@@ -189,6 +192,61 @@ static const char*

[Mesa-dev] [PATCH 05/22] radeonsi: use queries from r600g

2014-01-22 Thread Marek Olšák

From: Marek Olšák marek.ol...@amd.com

---
 src/gallium/drivers/radeonsi/Makefile.sources |   1 -
 src/gallium/drivers/radeonsi/si.h |  45 ---
 src/gallium/drivers/radeonsi/si_blit.c|  22 +-
 src/gallium/drivers/radeonsi/si_hw_context.c  | 525 +-
 src/gallium/drivers/radeonsi/si_pipe.c|  13 +-
 src/gallium/drivers/radeonsi/si_pipe.h|  19 -
 src/gallium/drivers/radeonsi/si_query.c   | 147 
 src/gallium/drivers/radeonsi/si_state.c   |  15 +
 src/gallium/drivers/radeonsi/si_state_draw.c  |  10 +-
 9 files changed, 40 insertions(+), 757 deletions(-)
 delete mode 100644 src/gallium/drivers/radeonsi/si_query.c

diff --git a/src/gallium/drivers/radeonsi/Makefile.sources 
b/src/gallium/drivers/radeonsi/Makefile.sources
index 33f1492..5629572 100644
--- a/src/gallium/drivers/radeonsi/Makefile.sources
+++ b/src/gallium/drivers/radeonsi/Makefile.sources
@@ -7,7 +7,6 @@ C_SOURCES := \
si_hw_context.c \
si_pipe.c \
si_pm4.c \
-   si_query.c \
si_resource.c \
si_shader.c \
si_state.c \
diff --git a/src/gallium/drivers/radeonsi/si.h 
b/src/gallium/drivers/radeonsi/si.h
index 46184ec..5c69b07 100644
--- a/src/gallium/drivers/radeonsi/si.h
+++ b/src/gallium/drivers/radeonsi/si.h
@@ -33,56 +33,11 @@
 #include si_resource.h
 
 struct winsys_handle;
-
-/* R600/R700 STATES */
-struct si_query {
-   union {
-   uint64_tu64;
-   boolean b;
-   struct pipe_query_data_so_statistics so;
-   } result;
-   /* The kind of query */
-   unsignedtype;
-   /* Offset of the first result for current query */
-   unsignedresults_start;
-   /* Offset of the next free result after current query data */
-   unsignedresults_end;
-   /* Size of the result in memory for both begin_query and end_query,
-* this can be one or two numbers, or it could even be a size of a 
structure. */
-   unsignedresult_size;
-   /* The buffer where query results are stored. It's used as a ring,
-* data blocks for current query are stored sequentially from
-* results_start to results_end, with wrapping on the buffer end */
-   struct r600_resource*buffer;
-   /* The number of dwords for begin_query or end_query. */
-   unsignednum_cs_dw;
-   /* linked list of queries */
-   struct list_headlist;
-};
-
 struct si_context;
 struct si_screen;
 
-void si_get_backend_mask(struct si_context *ctx);
 void si_context_flush(struct si_context *ctx, unsigned flags);
 void si_begin_new_cs(struct si_context *ctx);
-
-struct si_query *si_context_query_create(struct si_context *ctx, unsigned 
query_type);
-void si_context_query_destroy(struct si_context *ctx, struct si_query *query);
-boolean si_context_query_result(struct si_context *ctx,
-   struct si_query *query,
-   boolean wait, void *vresult);
-void si_query_begin(struct si_context *ctx, struct si_query *query);
-void si_query_end(struct si_context *ctx, struct si_query *query);
-void si_context_queries_suspend(struct si_context *ctx);
-void si_context_queries_resume(struct si_context *ctx);
-void si_query_predication(struct si_context *ctx, struct si_query *query, int 
operation,
-   int flag_wait);
-
-bool si_is_timer_query(unsigned type);
-bool si_query_needs_begin(unsigned type);
 void si_need_cs_space(struct si_context *ctx, unsigned num_dw, boolean 
count_draw_in);
 
-int si_context_init(struct si_context *ctx);
-
 #endif
diff --git a/src/gallium/drivers/radeonsi/si_blit.c 
b/src/gallium/drivers/radeonsi/si_blit.c
index ba2ebe7..250caab 100644
--- a/src/gallium/drivers/radeonsi/si_blit.c
+++ b/src/gallium/drivers/radeonsi/si_blit.c
@@ -51,7 +51,7 @@ static void si_blitter_begin(struct pipe_context *ctx, enum 
si_blitter_op op)
 {
struct si_context *sctx = (struct si_context *)ctx;
 
-   si_context_queries_suspend(sctx);
+   r600_suspend_nontimer_queries(sctx-b);
 
util_blitter_save_blend(sctx-blitter, sctx-queued.named.blend);
util_blitter_save_depth_stencil_alpha(sctx-blitter, 
sctx-queued.named.dsa);
@@ -81,26 +81,18 @@ static void si_blitter_begin(struct pipe_context *ctx, enum 
si_blitter_op op)
sctx-samplers[PIPE_SHADER_FRAGMENT].views.views);
}
 
-   if ((op  SI_DISABLE_RENDER_COND)  sctx-current_render_cond) {
-   sctx-saved_render_cond = sctx-current_render_cond;
-   sctx-saved_render_cond_cond = sctx-current_render_cond_cond;
-   sctx-saved_render_cond_mode = sctx-current_render_cond_mode;
-

[Mesa-dev] [PATCH 13/22] r600g, radeonsi: consolidate get_compute_param

2014-01-22 Thread Marek Olšák

From: Marek Olšák marek.ol...@amd.com

---
 src/gallium/drivers/r600/r600_pipe.c  | 162 --
 src/gallium/drivers/r600/r600_pipe.h  |   3 -
 src/gallium/drivers/radeon/r600_pipe_common.c | 157 +
 src/gallium/drivers/radeon/r600_pipe_common.h |   1 +
 src/gallium/drivers/radeonsi/si_pipe.c| 100 
 src/gallium/drivers/radeonsi/si_pipe.h|   1 -
 src/gallium/drivers/radeonsi/si_shader.c  |   2 +-
 7 files changed, 159 insertions(+), 267 deletions(-)

diff --git a/src/gallium/drivers/r600/r600_pipe.c 
b/src/gallium/drivers/r600/r600_pipe.c
index aa6ebc0..d95e717 100644
--- a/src/gallium/drivers/r600/r600_pipe.c
+++ b/src/gallium/drivers/r600/r600_pipe.c
@@ -508,167 +508,6 @@ static int r600_get_shader_param(struct pipe_screen* 
pscreen, unsigned shader, e
return 0;
 }
 
-const char * r600_llvm_gpu_string(enum radeon_family family)
-{
-   const char * gpu_family;
-
-   switch (family) {
-   case CHIP_R600:
-   case CHIP_RV630:
-   case CHIP_RV635:
-   case CHIP_RV670:
-   gpu_family = r600;
-   break;
-   case CHIP_RV610:
-   case CHIP_RV620:
-   case CHIP_RS780:
-   case CHIP_RS880:
-   gpu_family = rs880;
-   break;
-   case CHIP_RV710:
-   gpu_family = rv710;
-   break;
-   case CHIP_RV730:
-   gpu_family = rv730;
-   break;
-   case CHIP_RV740:
-   case CHIP_RV770:
-   gpu_family = rv770;
-   break;
-   case CHIP_PALM:
-   case CHIP_CEDAR:
-   gpu_family = cedar;
-   break;
-   case CHIP_SUMO:
-   case CHIP_SUMO2:
-   gpu_family = sumo;
-   break;
-   case CHIP_REDWOOD:
-   gpu_family = redwood;
-   break;
-   case CHIP_JUNIPER:
-   gpu_family = juniper;
-   break;
-   case CHIP_HEMLOCK:
-   case CHIP_CYPRESS:
-   gpu_family = cypress;
-   break;
-   case CHIP_BARTS:
-   gpu_family = barts;
-   break;
-   case CHIP_TURKS:
-   gpu_family = turks;
-   break;
-   case CHIP_CAICOS:
-   gpu_family = caicos;
-   break;
-   case CHIP_CAYMAN:
-case CHIP_ARUBA:
-   gpu_family = cayman;
-   break;
-   default:
-   gpu_family = ;
-   fprintf(stderr, Chip not supported by r600 llvm 
-   backend, please file a bug at  PACKAGE_BUGREPORT 
\n);
-   break;
-   }
-   return gpu_family;
-}
-
-
-static int r600_get_compute_param(struct pipe_screen *screen,
-enum pipe_compute_cap param,
-void *ret)
-{
-   struct r600_screen *rscreen = (struct r600_screen *)screen;
-   //TODO: select these params by asic
-   switch (param) {
-   case PIPE_COMPUTE_CAP_IR_TARGET: {
-   const char *gpu = r600_llvm_gpu_string(rscreen-b.family);
-   if (ret) {
-   sprintf(ret, %s-r600--, gpu);
-   }
-   return (8 + strlen(gpu)) * sizeof(char);
-   }
-   case PIPE_COMPUTE_CAP_GRID_DIMENSION:
-   if (ret) {
-   uint64_t * grid_dimension = ret;
-   grid_dimension[0] = 3;
-   }
-   return 1 * sizeof(uint64_t);
-
-   case PIPE_COMPUTE_CAP_MAX_GRID_SIZE:
-   if (ret) {
-   uint64_t * grid_size = ret;
-   grid_size[0] = 65535;
-   grid_size[1] = 65535;
-   grid_size[2] = 1;
-   }
-   return 3 * sizeof(uint64_t) ;
-
-   case PIPE_COMPUTE_CAP_MAX_BLOCK_SIZE:
-   if (ret) {
-   uint64_t * block_size = ret;
-   block_size[0] = 256;
-   block_size[1] = 256;
-   block_size[2] = 256;
-   }
-   return 3 * sizeof(uint64_t);
-
-   case PIPE_COMPUTE_CAP_MAX_THREADS_PER_BLOCK:
-   if (ret) {
-   uint64_t * max_threads_per_block = ret;
-   *max_threads_per_block = 256;
-   }
-   return sizeof(uint64_t);
-
-   case PIPE_COMPUTE_CAP_MAX_GLOBAL_SIZE:
-   if (ret) {
-   uint64_t * max_global_size = ret;
-   /* XXX: This is what the proprietary driver reports, we
-* may want to use a different value. */
-   *max_global_size = 201326592;
-   }
-   return sizeof(uint64_t);
-
-   case PIPE_COMPUTE_CAP_MAX_INPUT_SIZE:
-   if (ret) {
-   uint64_t * max_input_size = ret;
-   *max_input_size = 1024;
-

[Mesa-dev] [PATCH 18/22] radeonsi: remove si.h

2014-01-22 Thread Marek Olšák

From: Marek Olšák marek.ol...@amd.com

---
 src/gallium/drivers/radeonsi/si.h  | 43 --
 src/gallium/drivers/radeonsi/si_pipe.c |  1 -
 src/gallium/drivers/radeonsi/si_pipe.h |  6 -
 3 files changed, 5 insertions(+), 45 deletions(-)
 delete mode 100644 src/gallium/drivers/radeonsi/si.h

diff --git a/src/gallium/drivers/radeonsi/si.h 
b/src/gallium/drivers/radeonsi/si.h
deleted file mode 100644
index 5c69b07..000
--- a/src/gallium/drivers/radeonsi/si.h
+++ /dev/null
@@ -1,43 +0,0 @@
-/*
- * Copyright 2010 Jerome Glisse gli...@freedesktop.org
- *
- * Permission is hereby granted, free of charge, to any person obtaining a
- * copy of this software and associated documentation files (the Software),
- * to deal in the Software without restriction, including without limitation
- * on the rights to use, copy, modify, merge, publish, distribute, sub
- * license, and/or sell copies of the Software, and to permit persons to whom
- * the Software is furnished to do so, subject to the following conditions:
- *
- * The above copyright notice and this permission notice (including the next
- * paragraph) shall be included in all copies or substantial portions of the
- * Software.
- *
- * THE SOFTWARE IS PROVIDED AS IS, WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
- * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
- * FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL
- * THE AUTHOR(S) AND/OR THEIR SUPPLIERS BE LIABLE FOR ANY CLAIM,
- * DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR
- * OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE
- * USE OR OTHER DEALINGS IN THE SOFTWARE.
- *
- * Authors:
- *  Jerome Glisse
- */
-#ifndef SI_H
-#define SI_H
-
-#include ../../winsys/radeon/drm/radeon_winsys.h
-#include util/u_double_list.h
-#include util/u_transfer.h
-
-#include si_resource.h
-
-struct winsys_handle;
-struct si_context;
-struct si_screen;
-
-void si_context_flush(struct si_context *ctx, unsigned flags);
-void si_begin_new_cs(struct si_context *ctx);
-void si_need_cs_space(struct si_context *ctx, unsigned num_dw, boolean 
count_draw_in);
-
-#endif
diff --git a/src/gallium/drivers/radeonsi/si_pipe.c 
b/src/gallium/drivers/radeonsi/si_pipe.c
index f4df4b1..3d5b998 100644
--- a/src/gallium/drivers/radeonsi/si_pipe.c
+++ b/src/gallium/drivers/radeonsi/si_pipe.c
@@ -44,7 +44,6 @@
 #include pipebuffer/pb_buffer.h
 #include si_pipe.h
 #include radeon/radeon_uvd.h
-#include si.h
 #include sid.h
 #include si_resource.h
 #include si_pipe.h
diff --git a/src/gallium/drivers/radeonsi/si_pipe.h 
b/src/gallium/drivers/radeonsi/si_pipe.h
index fc75854..2828734 100644
--- a/src/gallium/drivers/radeonsi/si_pipe.h
+++ b/src/gallium/drivers/radeonsi/si_pipe.h
@@ -33,7 +33,6 @@
 #include util/u_format.h
 #include util/u_math.h
 #include util/u_slab.h
-#include si.h
 #include sid.h
 #include si_public.h
 #include si_pm4.h
@@ -161,6 +160,11 @@ void si_flush_depth_textures(struct si_context *sctx,
 void si_decompress_color_textures(struct si_context *sctx,
  struct si_textures_info *textures);
 
+/* si_hw_context.c */
+void si_context_flush(struct si_context *ctx, unsigned flags);
+void si_begin_new_cs(struct si_context *ctx);
+void si_need_cs_space(struct si_context *ctx, unsigned num_dw, boolean 
count_draw_in);
+
 /* si_pipe.c */
 void si_flush(struct pipe_context *ctx, struct pipe_fence_handle **fence,
   unsigned flags);
-- 
1.8.3.2

___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/mesa-dev

[Mesa-dev] [PATCH 21/22] radeonsi: remove open-coded PS_PARTIAL_FLUSH event

2014-01-22 Thread Marek Olšák

From: Marek Olšák marek.ol...@amd.com

---
 src/gallium/drivers/radeonsi/si_hw_context.c | 8 +++-
 1 file changed, 3 insertions(+), 5 deletions(-)

diff --git a/src/gallium/drivers/radeonsi/si_hw_context.c 
b/src/gallium/drivers/radeonsi/si_hw_context.c
index bb3d080..2e1e9f2 100644
--- a/src/gallium/drivers/radeonsi/si_hw_context.c
+++ b/src/gallium/drivers/radeonsi/si_hw_context.c
@@ -107,13 +107,11 @@ void si_context_flush(struct si_context *ctx, unsigned 
flags)
R600_CONTEXT_FLUSH_AND_INV_CB_META |
R600_CONTEXT_FLUSH_AND_INV_DB |
R600_CONTEXT_FLUSH_AND_INV_DB_META |
-   R600_CONTEXT_INV_TEX_CACHE;
+   R600_CONTEXT_INV_TEX_CACHE |
+   /* this is probably not needed anymore */
+   R600_CONTEXT_PS_PARTIAL_FLUSH;
si_emit_cache_flush(ctx-b, NULL);
 
-   /* this is probably not needed anymore */
-   cs-buf[cs-cdw++] = PKT3(PKT3_EVENT_WRITE, 0, 0);
-   cs-buf[cs-cdw++] = EVENT_TYPE(EVENT_TYPE_PS_PARTIAL_FLUSH) | 
EVENT_INDEX(4);
-
/* force to keep tiling flags */
flags |= RADEON_FLUSH_KEEP_TILING_FLAGS;
 
-- 
1.8.3.2

___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/mesa-dev

[Mesa-dev] [PATCH 04/22] r600g: remove a no-op while loop

2014-01-22 Thread Marek Olšák

From: Marek Olšák marek.ol...@amd.com

for (;;) {

} while ();

I was surprised to see such a statement.
---
 src/gallium/drivers/radeon/r600_query.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/src/gallium/drivers/radeon/r600_query.c 
b/src/gallium/drivers/radeon/r600_query.c
index bf73a14..915d37a 100644
--- a/src/gallium/drivers/radeon/r600_query.c
+++ b/src/gallium/drivers/radeon/r600_query.c
@@ -326,7 +326,7 @@ static void r600_emit_query_predication(struct 
r600_common_context *ctx, struct
/* set CONTINUE bit for all packets except the 
first */
op |= PREDICATION_CONTINUE;
}
-   } while (qbuf);
+   }
}
 }
 
-- 
1.8.3.2

___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/mesa-dev

[Mesa-dev] [PATCH 06/22] radeonsi: advertise the pipeline statistics query

2014-01-22 Thread Marek Olšák

From: Marek Olšák marek.ol...@amd.com

Implemented by the common code. You can now visualize the statistics
with the HUD, see GALLIUM_HUD=help for all available queries. For example:

GALLIUM_HUD=clipper-primitives-generated
---
 src/gallium/drivers/radeonsi/si_pipe.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/src/gallium/drivers/radeonsi/si_pipe.c 
b/src/gallium/drivers/radeonsi/si_pipe.c
index d2bcd5a..6680ee8 100644
--- a/src/gallium/drivers/radeonsi/si_pipe.c
+++ b/src/gallium/drivers/radeonsi/si_pipe.c
@@ -310,6 +310,7 @@ static int si_get_param(struct pipe_screen* pscreen, enum 
pipe_cap param)
case PIPE_CAP_COMPUTE:
case PIPE_CAP_TEXTURE_BUFFER_OBJECTS:
 case PIPE_CAP_TGSI_VS_LAYER:
+   case PIPE_CAP_QUERY_PIPELINE_STATISTICS:
return 1;
 
case PIPE_CAP_TEXTURE_MULTISAMPLE:
@@ -342,7 +343,6 @@ static int si_get_param(struct pipe_screen* pscreen, enum 
pipe_cap param)
case PIPE_CAP_VERTEX_COLOR_CLAMPED:
case PIPE_CAP_QUADS_FOLLOW_PROVOKING_VERTEX_CONVENTION:
case PIPE_CAP_USER_VERTEX_BUFFERS:
-   case PIPE_CAP_QUERY_PIPELINE_STATISTICS:
case PIPE_CAP_CUBE_MAP_ARRAY:
return 0;
 
-- 
1.8.3.2

___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/mesa-dev

[Mesa-dev] [PATCH 17/22] radeonsi: move si_upload_const_buffer to a better place

2014-01-22 Thread Marek Olšák

From: Marek Olšák marek.ol...@amd.com

This gets rid of another file.
---
 src/gallium/drivers/radeonsi/Makefile.sources |  1 -
 src/gallium/drivers/radeonsi/si_buffer.c  | 63 ---
 src/gallium/drivers/radeonsi/si_descriptors.c | 27 
 src/gallium/drivers/radeonsi/si_resource.h|  6 ---
 src/gallium/drivers/radeonsi/si_state.h   |  2 +
 5 files changed, 29 insertions(+), 70 deletions(-)
 delete mode 100644 src/gallium/drivers/radeonsi/si_buffer.c

diff --git a/src/gallium/drivers/radeonsi/Makefile.sources 
b/src/gallium/drivers/radeonsi/Makefile.sources
index 4e1f971..11b3319 100644
--- a/src/gallium/drivers/radeonsi/Makefile.sources
+++ b/src/gallium/drivers/radeonsi/Makefile.sources
@@ -1,6 +1,5 @@
 C_SOURCES := \
si_blit.c \
-   si_buffer.c \
si_commands.c \
si_compute.c \
si_descriptors.c \
diff --git a/src/gallium/drivers/radeonsi/si_buffer.c 
b/src/gallium/drivers/radeonsi/si_buffer.c
deleted file mode 100644
index 7994405..000
--- a/src/gallium/drivers/radeonsi/si_buffer.c
+++ /dev/null
@@ -1,63 +0,0 @@
-/*
- * Copyright 2010 Jerome Glisse gli...@freedesktop.org
- *
- * Permission is hereby granted, free of charge, to any person obtaining a
- * copy of this software and associated documentation files (the Software),
- * to deal in the Software without restriction, including without limitation
- * on the rights to use, copy, modify, merge, publish, distribute, sub
- * license, and/or sell copies of the Software, and to permit persons to whom
- * the Software is furnished to do so, subject to the following conditions:
- *
- * The above copyright notice and this permission notice (including the next
- * paragraph) shall be included in all copies or substantial portions of the
- * Software.
- *
- * THE SOFTWARE IS PROVIDED AS IS, WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
- * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
- * FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL
- * THE AUTHOR(S) AND/OR THEIR SUPPLIERS BE LIABLE FOR ANY CLAIM,
- * DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR
- * OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE
- * USE OR OTHER DEALINGS IN THE SOFTWARE.
- *
- * Authors:
- *  Jerome Glisse
- *  Corbin Simpson mostawesomed...@gmail.com
- */
-
-#include pipe/p_screen.h
-#include util/u_format.h
-#include util/u_math.h
-#include util/u_inlines.h
-#include util/u_memory.h
-#include util/u_upload_mgr.h
-
-#include si.h
-#include si_pipe.h
-
-void si_upload_const_buffer(struct si_context *sctx, struct r600_resource 
**rbuffer,
-   const uint8_t *ptr, unsigned size,
-   uint32_t *const_offset)
-{
-   if (SI_BIG_ENDIAN) {
-   uint32_t *tmpPtr;
-   unsigned i;
-
-   if (!(tmpPtr = malloc(size))) {
-   R600_ERR(Failed to allocate BE swap buffer.\n);
-   return;
-   }
-
-   for (i = 0; i  size / 4; ++i) {
-   tmpPtr[i] = util_bswap32(((uint32_t *)ptr)[i]);
-   }
-
-   u_upload_data(sctx-b.uploader, 0, size, tmpPtr, const_offset,
-   (struct pipe_resource**)rbuffer);
-
-   free(tmpPtr);
-   } else {
-   u_upload_data(sctx-b.uploader, 0, size, ptr, const_offset,
-   (struct pipe_resource**)rbuffer);
-   }
-}
diff --git a/src/gallium/drivers/radeonsi/si_descriptors.c 
b/src/gallium/drivers/radeonsi/si_descriptors.c
index e64799d..f29d8bb 100644
--- a/src/gallium/drivers/radeonsi/si_descriptors.c
+++ b/src/gallium/drivers/radeonsi/si_descriptors.c
@@ -29,6 +29,7 @@
 #include si_shader.h
 
 #include util/u_memory.h
+#include util/u_upload_mgr.h
 
 #define SI_NUM_CONTEXTS 16
 
@@ -400,6 +401,32 @@ static void si_buffer_resources_begin_new_cs(struct 
si_context *sctx,
 
 /* CONSTANT BUFFERS */
 
+void si_upload_const_buffer(struct si_context *sctx, struct r600_resource 
**rbuffer,
+   const uint8_t *ptr, unsigned size, uint32_t 
*const_offset)
+{
+   if (SI_BIG_ENDIAN) {
+   uint32_t *tmpPtr;
+   unsigned i;
+
+   if (!(tmpPtr = malloc(size))) {
+   R600_ERR(Failed to allocate BE swap buffer.\n);
+   return;
+   }
+
+   for (i = 0; i  size / 4; ++i) {
+   tmpPtr[i] = util_bswap32(((uint32_t *)ptr)[i]);
+   }
+
+   u_upload_data(sctx-b.uploader, 0, size, tmpPtr, const_offset,
+   (struct pipe_resource**)rbuffer);
+
+   free(tmpPtr);
+   } else {
+   u_upload_data(sctx-b.uploader, 0, size, ptr, const_offset,
+   (struct pipe_resource**)rbuffer);
+   }
+}
+
 static

[Mesa-dev] [PATCH 20/22] radeonsi: move some inline functions from si_pipe.h to si_state.c

2014-01-22 Thread Marek Olšák

From: Marek Olšák marek.ol...@amd.com

And si_tex_aniso_filter is unused.
---
 src/gallium/drivers/radeonsi/si_pipe.h  | 39 -
 src/gallium/drivers/radeonsi/si_state.c | 30 +
 2 files changed, 30 insertions(+), 39 deletions(-)

diff --git a/src/gallium/drivers/radeonsi/si_pipe.h 
b/src/gallium/drivers/radeonsi/si_pipe.h
index 69f49d1..38eb128 100644
--- a/src/gallium/drivers/radeonsi/si_pipe.h
+++ b/src/gallium/drivers/radeonsi/si_pipe.h
@@ -189,45 +189,6 @@ struct pipe_video_buffer *si_video_buffer_create(struct 
pipe_context *pipe,
 /*
  * common helpers
  */
-static INLINE uint32_t S_FIXED(float value, uint32_t frac_bits)
-{
-   return value * (1  frac_bits);
-}
-#define ALIGN_DIVUP(x, y) (((x) + (y) - 1) / (y))
-
-static INLINE unsigned si_map_swizzle(unsigned swizzle)
-{
-   switch (swizzle) {
-   case UTIL_FORMAT_SWIZZLE_Y:
-   return V_008F0C_SQ_SEL_Y;
-   case UTIL_FORMAT_SWIZZLE_Z:
-   return V_008F0C_SQ_SEL_Z;
-   case UTIL_FORMAT_SWIZZLE_W:
-   return V_008F0C_SQ_SEL_W;
-   case UTIL_FORMAT_SWIZZLE_0:
-   return V_008F0C_SQ_SEL_0;
-   case UTIL_FORMAT_SWIZZLE_1:
-   return V_008F0C_SQ_SEL_1;
-   default: /* UTIL_FORMAT_SWIZZLE_X */
-   return V_008F0C_SQ_SEL_X;
-   }
-}
-
-static inline unsigned si_tex_aniso_filter(unsigned filter)
-{
-   if (filter = 1)   return 0;
-   if (filter = 2)   return 1;
-   if (filter = 4)   return 2;
-   if (filter = 8)   return 3;
-/* else */return 4;
-}
-
-/* 12.4 fixed-point */
-static INLINE unsigned si_pack_float_12p4(float x)
-{
-   return x = 0? 0 :
-  x = 4096 ? 0x : x * 16;
-}
 
 static INLINE struct r600_resource *
 si_resource_create_custom(struct pipe_screen *screen,
diff --git a/src/gallium/drivers/radeonsi/si_state.c 
b/src/gallium/drivers/radeonsi/si_state.c
index 3eab4d9..fd4e26a 100644
--- a/src/gallium/drivers/radeonsi/si_state.c
+++ b/src/gallium/drivers/radeonsi/si_state.c
@@ -166,6 +166,36 @@ static unsigned cik_db_pipe_config(struct si_screen 
*sscreen, unsigned tile_mode
}
 }
 
+static INLINE unsigned si_map_swizzle(unsigned swizzle)
+{
+   switch (swizzle) {
+   case UTIL_FORMAT_SWIZZLE_Y:
+   return V_008F0C_SQ_SEL_Y;
+   case UTIL_FORMAT_SWIZZLE_Z:
+   return V_008F0C_SQ_SEL_Z;
+   case UTIL_FORMAT_SWIZZLE_W:
+   return V_008F0C_SQ_SEL_W;
+   case UTIL_FORMAT_SWIZZLE_0:
+   return V_008F0C_SQ_SEL_0;
+   case UTIL_FORMAT_SWIZZLE_1:
+   return V_008F0C_SQ_SEL_1;
+   default: /* UTIL_FORMAT_SWIZZLE_X */
+   return V_008F0C_SQ_SEL_X;
+   }
+}
+
+static INLINE uint32_t S_FIXED(float value, uint32_t frac_bits)
+{
+   return value * (1  frac_bits);
+}
+
+/* 12.4 fixed-point */
+static INLINE unsigned si_pack_float_12p4(float x)
+{
+   return x = 0? 0 :
+  x = 4096 ? 0x : x * 16;
+}
+
 /*
  * inferred framebuffer and blender state
  */
-- 
1.8.3.2

___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/mesa-dev

[Mesa-dev] [PATCH 15/22] radeonsi: inline si_upload_index_buffer

2014-01-22 Thread Marek Olšák

From: Marek Olšák marek.ol...@amd.com

---
 src/gallium/drivers/radeonsi/si_buffer.c | 7 ---
 src/gallium/drivers/radeonsi/si_pipe.h   | 5 -
 src/gallium/drivers/radeonsi/si_state_draw.c | 3 ++-
 3 files changed, 2 insertions(+), 13 deletions(-)

diff --git a/src/gallium/drivers/radeonsi/si_buffer.c 
b/src/gallium/drivers/radeonsi/si_buffer.c
index 6b05c9f..7994405 100644
--- a/src/gallium/drivers/radeonsi/si_buffer.c
+++ b/src/gallium/drivers/radeonsi/si_buffer.c
@@ -35,13 +35,6 @@
 #include si.h
 #include si_pipe.h
 
-void si_upload_index_buffer(struct si_context *sctx,
-   struct pipe_index_buffer *ib, unsigned count)
-{
-   u_upload_data(sctx-b.uploader, 0, count * ib-index_size,
- ib-user_buffer, ib-offset, ib-buffer);
-}
-
 void si_upload_const_buffer(struct si_context *sctx, struct r600_resource 
**rbuffer,
const uint8_t *ptr, unsigned size,
uint32_t *const_offset)
diff --git a/src/gallium/drivers/radeonsi/si_pipe.h 
b/src/gallium/drivers/radeonsi/si_pipe.h
index d7d701b..bb4d82a 100644
--- a/src/gallium/drivers/radeonsi/si_pipe.h
+++ b/src/gallium/drivers/radeonsi/si_pipe.h
@@ -161,11 +161,6 @@ void si_flush_depth_textures(struct si_context *sctx,
 void si_decompress_color_textures(struct si_context *sctx,
  struct si_textures_info *textures);
 
-/* si_buffer.c */
-void si_upload_index_buffer(struct si_context *sctx,
-   struct pipe_index_buffer *ib, unsigned count);
-
-
 /* si_pipe.c */
 void si_flush(struct pipe_context *ctx, struct pipe_fence_handle **fence,
   unsigned flags);
diff --git a/src/gallium/drivers/radeonsi/si_state_draw.c 
b/src/gallium/drivers/radeonsi/si_state_draw.c
index 45dd4ba..f325a64 100644
--- a/src/gallium/drivers/radeonsi/si_state_draw.c
+++ b/src/gallium/drivers/radeonsi/si_state_draw.c
@@ -721,7 +721,8 @@ void si_draw_vbo(struct pipe_context *ctx, const struct 
pipe_draw_info *info)
si_translate_index_buffer(sctx, ib, info-count);
 
if (ib.user_buffer  !ib.buffer) {
-   si_upload_index_buffer(sctx, ib, info-count);
+   u_upload_data(sctx-b.uploader, 0, info-count * 
ib.index_size,
+ ib.user_buffer, ib.offset, ib.buffer);
}
}
 
-- 
1.8.3.2

___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/mesa-dev

[Mesa-dev] [PATCH 14/22] r600g, radeonsi: consolidate remaining obviously duplicated pipe_screen code

2014-01-22 Thread Marek Olšák

From: Marek Olšák marek.ol...@amd.com

---
 src/gallium/drivers/r600/r600_pipe.c  | 35 ++-
 src/gallium/drivers/radeon/r600_pipe_common.c | 24 +++---
 src/gallium/drivers/radeon/r600_pipe_common.h |  2 +-
 src/gallium/drivers/radeonsi/si_pipe.c| 26 +---
 4 files changed, 30 insertions(+), 57 deletions(-)

diff --git a/src/gallium/drivers/r600/r600_pipe.c 
b/src/gallium/drivers/r600/r600_pipe.c
index d95e717..49521e0 100644
--- a/src/gallium/drivers/r600/r600_pipe.c
+++ b/src/gallium/drivers/r600/r600_pipe.c
@@ -518,19 +518,11 @@ static void r600_destroy_screen(struct pipe_screen* 
pscreen)
if (!radeon_winsys_unref(rscreen-b.ws))
return;
 
-   r600_common_screen_cleanup(rscreen-b);
-
if (rscreen-global_pool) {
compute_memory_pool_delete(rscreen-global_pool);
}
 
-   if (rscreen-b.trace_bo) {
-   rscreen-b.ws-buffer_unmap(rscreen-b.trace_bo-cs_buf);
-   pipe_resource_reference((struct 
pipe_resource**)rscreen-b.trace_bo, NULL);
-   }
-
-   rscreen-b.ws-destroy(rscreen-b.ws);
-   FREE(rscreen);
+   r600_destroy_common_screen(rscreen-b);
 }
 
 static struct pipe_resource *r600_resource_create(struct pipe_screen *screen,
@@ -551,18 +543,11 @@ struct pipe_screen *r600_screen_create(struct 
radeon_winsys *ws)
return NULL;
}
 
-   ws-query_info(ws, rscreen-b.info);
-
/* Set functions first. */
rscreen-b.b.context_create = r600_create_context;
rscreen-b.b.destroy = r600_destroy_screen;
rscreen-b.b.get_param = r600_get_param;
rscreen-b.b.get_shader_param = r600_get_shader_param;
-   if (rscreen-b.info.chip_class = EVERGREEN) {
-   rscreen-b.b.is_format_supported = 
evergreen_is_format_supported;
-   } else {
-   rscreen-b.b.is_format_supported = r600_is_format_supported;
-   }
rscreen-b.b.resource_create = r600_resource_create;
 
if (!r600_common_screen_init(rscreen-b, ws)) {
@@ -570,6 +555,12 @@ struct pipe_screen *r600_screen_create(struct 
radeon_winsys *ws)
return NULL;
}
 
+   if (rscreen-b.info.chip_class = EVERGREEN) {
+   rscreen-b.b.is_format_supported = 
evergreen_is_format_supported;
+   } else {
+   rscreen-b.b.is_format_supported = r600_is_format_supported;
+   }
+
rscreen-b.debug_flags |= debug_get_flags_option(R600_DEBUG, 
r600_debug_options, 0);
if (debug_get_bool_option(R600_DEBUG_COMPUTE, FALSE))
rscreen-b.debug_flags |= DBG_COMPUTE;
@@ -632,18 +623,6 @@ struct pipe_screen *r600_screen_create(struct 
radeon_winsys *ws)
 
rscreen-global_pool = compute_memory_pool_new(rscreen);
 
-   rscreen-b.cs_count = 0;
-   if (rscreen-b.info.drm_minor = 28  (rscreen-b.debug_flags  
DBG_TRACE_CS)) {
-   rscreen-b.trace_bo = (struct 
r600_resource*)pipe_buffer_create(rscreen-b.b,
-   
PIPE_BIND_CUSTOM,
-   
PIPE_USAGE_STAGING,
-   
4096);
-   if (rscreen-b.trace_bo) {
-   rscreen-b.trace_ptr = 
rscreen-b.ws-buffer_map(rscreen-b.trace_bo-cs_buf, NULL,
-   
PIPE_TRANSFER_UNSYNCHRONIZED);
-   }
-   }
-
/* Create the auxiliary context. This must be done last. */
rscreen-b.aux_context = rscreen-b.b.context_create(rscreen-b.b, 
NULL);
 
diff --git a/src/gallium/drivers/radeon/r600_pipe_common.c 
b/src/gallium/drivers/radeon/r600_pipe_common.c
index 931c91c..396ff86 100644
--- a/src/gallium/drivers/radeon/r600_pipe_common.c
+++ b/src/gallium/drivers/radeon/r600_pipe_common.c
@@ -616,17 +616,35 @@ bool r600_common_screen_init(struct r600_common_screen 
*rscreen,
if (!r600_init_tiling(rscreen)) {
return false;
}
-
util_format_s3tc_init();
-
pipe_mutex_init(rscreen-aux_context_lock);
+
+   if (rscreen-info.drm_minor = 28  (rscreen-debug_flags  
DBG_TRACE_CS)) {
+   rscreen-trace_bo = (struct 
r600_resource*)pipe_buffer_create(rscreen-b,
+   
PIPE_BIND_CUSTOM,
+   
PIPE_USAGE_STAGING,
+   
4096);
+   if (rscreen-trace_bo) {
+   rscreen-trace_ptr = 
rscreen-ws-buffer_map(rscreen-trace_bo-cs_buf, NULL,
+   
PIPE_TRANSFER_UNSYNCHRONIZED);
+   }
+   }
+
return

[Mesa-dev] [PATCH 07/22] r600g, radeonsi: consolidate the contents of r600_resource.c

2014-01-22 Thread Marek Olšák

From: Marek Olšák marek.ol...@amd.com

---
 src/gallium/drivers/r600/Makefile.sources |  1 -
 src/gallium/drivers/r600/r600_pipe.c  | 13 -
 src/gallium/drivers/r600/r600_pipe.h  |  3 --
 src/gallium/drivers/r600/r600_resource.c  | 76 ---
 src/gallium/drivers/r600/r600_resource.h  |  3 --
 src/gallium/drivers/radeon/r600_pipe_common.c | 19 +++
 src/gallium/drivers/radeon/r600_pipe_common.h |  6 +--
 src/gallium/drivers/radeon/r600_texture.c | 18 ---
 src/gallium/drivers/radeonsi/Makefile.sources |  1 -
 src/gallium/drivers/radeonsi/si_pipe.c|  2 -
 src/gallium/drivers/radeonsi/si_pipe.h|  3 --
 src/gallium/drivers/radeonsi/si_resource.c| 61 -
 src/gallium/drivers/radeonsi/si_resource.h|  2 -
 13 files changed, 45 insertions(+), 163 deletions(-)
 delete mode 100644 src/gallium/drivers/r600/r600_resource.c
 delete mode 100644 src/gallium/drivers/radeonsi/si_resource.c

diff --git a/src/gallium/drivers/r600/Makefile.sources 
b/src/gallium/drivers/r600/Makefile.sources
index f04e156..82560fc 100644
--- a/src/gallium/drivers/r600/Makefile.sources
+++ b/src/gallium/drivers/r600/Makefile.sources
@@ -4,7 +4,6 @@ C_SOURCES = \
r600_hw_context.c \
r600_isa.c \
r600_pipe.c \
-   r600_resource.c \
r600_shader.c \
r600_state.c \
r700_asm.c \
diff --git a/src/gallium/drivers/r600/r600_pipe.c 
b/src/gallium/drivers/r600/r600_pipe.c
index f9f7b0e..9c0cb0d 100644
--- a/src/gallium/drivers/r600/r600_pipe.c
+++ b/src/gallium/drivers/r600/r600_pipe.c
@@ -211,7 +211,6 @@ static struct pipe_context *r600_create_context(struct 
pipe_screen *screen, void
rctx-keep_tiling_flags = rscreen-b.info.drm_minor = 12;
 
r600_init_blit_functions(rctx);
-   r600_init_context_resource_functions(rctx);
 
if (rscreen-b.info.has_uvd) {
rctx-b.b.create_video_codec = r600_uvd_create_decoder;
@@ -824,6 +823,16 @@ static int r600_get_driver_query_info(struct pipe_screen 
*screen,
return 1;
 }
 
+static struct pipe_resource *r600_resource_create(struct pipe_screen *screen,
+ const struct pipe_resource 
*templ)
+{
+   if (templ-target == PIPE_BUFFER 
+   (templ-bind  PIPE_BIND_GLOBAL))
+   return r600_compute_global_buffer_create(screen, templ);
+
+   return r600_resource_create_common(screen, templ);
+}
+
 struct pipe_screen *r600_screen_create(struct radeon_winsys *ws)
 {
struct r600_screen *rscreen = CALLOC_STRUCT(r600_screen);
@@ -857,7 +866,7 @@ struct pipe_screen *r600_screen_create(struct radeon_winsys 
*ws)
rscreen-b.b.get_video_param = r600_get_video_param;
rscreen-b.b.is_video_format_supported = 
vl_video_buffer_is_format_supported;
}
-   r600_init_screen_resource_functions(rscreen-b.b);
+   rscreen-b.b.resource_create = r600_resource_create;
 
if (!r600_common_screen_init(rscreen-b, ws)) {
FREE(rscreen);
diff --git a/src/gallium/drivers/r600/r600_pipe.h 
b/src/gallium/drivers/r600/r600_pipe.h
index a3e4c24..7f4f482 100644
--- a/src/gallium/drivers/r600/r600_pipe.h
+++ b/src/gallium/drivers/r600/r600_pipe.h
@@ -545,9 +545,6 @@ void r600_decompress_color_textures(struct r600_context 
*rctx,
 /* r600_pipe.c */
 const char * r600_llvm_gpu_string(enum radeon_family family);
 
-/* r600_resource.c */
-void r600_init_context_resource_functions(struct r600_context *r600);
-
 /* r600_shader.c */
 int r600_pipe_shader_create(struct pipe_context *ctx,
struct r600_pipe_shader *shader,
diff --git a/src/gallium/drivers/r600/r600_resource.c 
b/src/gallium/drivers/r600/r600_resource.c
deleted file mode 100644
index a8fa357..000
--- a/src/gallium/drivers/r600/r600_resource.c
+++ /dev/null
@@ -1,76 +0,0 @@
-/*
- * Copyright 2010 Marek Olšák mar...@gmail.com
- *
- * Permission is hereby granted, free of charge, to any person obtaining a
- * copy of this software and associated documentation files (the Software),
- * to deal in the Software without restriction, including without limitation
- * on the rights to use, copy, modify, merge, publish, distribute, sub
- * license, and/or sell copies of the Software, and to permit persons to whom
- * the Software is furnished to do so, subject to the following conditions:
- *
- * The above copyright notice and this permission notice (including the next
- * paragraph) shall be included in all copies or substantial portions of the
- * Software.
- *
- * THE SOFTWARE IS PROVIDED AS IS, WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
- * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
- * FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL
- * THE AUTHOR(S) AND/OR THEIR SUPPLIERS BE LIABLE FOR ANY CLAIM,
- * DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR
- *

[Mesa-dev] [PATCH 11/22] r600g, radeonsi: consolidate variables for CS tracing

2014-01-22 Thread Marek Olšák

From: Marek Olšák marek.ol...@amd.com

---
 src/gallium/drivers/r600/r600_hw_context.c|  6 +++---
 src/gallium/drivers/r600/r600_pipe.c  | 18 +-
 src/gallium/drivers/r600/r600_pipe.h  |  5 +
 src/gallium/drivers/r600/r600_state_common.c  |  8 
 src/gallium/drivers/radeon/r600_pipe_common.h |  4 
 src/gallium/drivers/radeonsi/si_hw_context.c  | 22 +++---
 src/gallium/drivers/radeonsi/si_pipe.c| 14 +++---
 src/gallium/drivers/radeonsi/si_pipe.h|  5 -
 src/gallium/drivers/radeonsi/si_pm4.c |  4 ++--
 src/gallium/drivers/radeonsi/si_state_draw.c  |  2 +-
 10 files changed, 42 insertions(+), 46 deletions(-)

diff --git a/src/gallium/drivers/r600/r600_hw_context.c 
b/src/gallium/drivers/r600/r600_hw_context.c
index 790363f..23c9c1c 100644
--- a/src/gallium/drivers/r600/r600_hw_context.c
+++ b/src/gallium/drivers/r600/r600_hw_context.c
@@ -54,7 +54,7 @@ void r600_need_cs_space(struct r600_context *ctx, unsigned 
num_dw,
for (i = 0; i  R600_NUM_ATOMS; i++) {
if (ctx-atoms[i]  ctx-atoms[i]-dirty) {
num_dw += ctx-atoms[i]-num_dw;
-   if (ctx-screen-trace_bo) {
+   if (ctx-screen-b.trace_bo) {
num_dw += R600_TRACE_CS_DWORDS;
}
}
@@ -62,7 +62,7 @@ void r600_need_cs_space(struct r600_context *ctx, unsigned 
num_dw,
 
/* The upper-bound of how much space a draw command would take. 
*/
num_dw += R600_MAX_FLUSH_CS_DWORDS + R600_MAX_DRAW_CS_DWORDS;
-   if (ctx-screen-trace_bo) {
+   if (ctx-screen-b.trace_bo) {
num_dw += R600_TRACE_CS_DWORDS;
}
}
@@ -270,7 +270,7 @@ void r600_context_flush(struct r600_context *ctx, unsigned 
flags)
}
 
/* Flush the CS. */
-   ctx-b.ws-cs_flush(ctx-b.rings.gfx.cs, flags, 
ctx-screen-cs_count++);
+   ctx-b.ws-cs_flush(ctx-b.rings.gfx.cs, flags, 
ctx-screen-b.cs_count++);
 
ctx-skip_surface_sync_on_next_cs_flush = false;
 }
diff --git a/src/gallium/drivers/r600/r600_pipe.c 
b/src/gallium/drivers/r600/r600_pipe.c
index 182a122..e2d97e5 100644
--- a/src/gallium/drivers/r600/r600_pipe.c
+++ b/src/gallium/drivers/r600/r600_pipe.c
@@ -259,8 +259,8 @@ static struct pipe_context *r600_create_context(struct 
pipe_screen *screen, void
goto fail;
}
 
-   if (rscreen-trace_bo) {
-   rctx-b.rings.gfx.cs = rctx-b.ws-cs_create(rctx-b.ws, 
RING_GFX, rscreen-trace_bo-cs_buf);
+   if (rscreen-b.trace_bo) {
+   rctx-b.rings.gfx.cs = rctx-b.ws-cs_create(rctx-b.ws, 
RING_GFX, rscreen-b.trace_bo-cs_buf);
} else {
rctx-b.rings.gfx.cs = rctx-b.ws-cs_create(rctx-b.ws, 
RING_GFX, NULL);
}
@@ -741,9 +741,9 @@ static void r600_destroy_screen(struct pipe_screen* pscreen)
compute_memory_pool_delete(rscreen-global_pool);
}
 
-   if (rscreen-trace_bo) {
-   rscreen-b.ws-buffer_unmap(rscreen-trace_bo-cs_buf);
-   pipe_resource_reference((struct 
pipe_resource**)rscreen-trace_bo, NULL);
+   if (rscreen-b.trace_bo) {
+   rscreen-b.ws-buffer_unmap(rscreen-b.trace_bo-cs_buf);
+   pipe_resource_reference((struct 
pipe_resource**)rscreen-b.trace_bo, NULL);
}
 
rscreen-b.ws-destroy(rscreen-b.ws);
@@ -858,14 +858,14 @@ struct pipe_screen *r600_screen_create(struct 
radeon_winsys *ws)
 
rscreen-global_pool = compute_memory_pool_new(rscreen);
 
-   rscreen-cs_count = 0;
+   rscreen-b.cs_count = 0;
if (rscreen-b.info.drm_minor = 28  (rscreen-b.debug_flags  
DBG_TRACE_CS)) {
-   rscreen-trace_bo = (struct 
r600_resource*)pipe_buffer_create(rscreen-b.b,
+   rscreen-b.trace_bo = (struct 
r600_resource*)pipe_buffer_create(rscreen-b.b,

PIPE_BIND_CUSTOM,

PIPE_USAGE_STAGING,

4096);
-   if (rscreen-trace_bo) {
-   rscreen-trace_ptr = 
rscreen-b.ws-buffer_map(rscreen-trace_bo-cs_buf, NULL,
+   if (rscreen-b.trace_bo) {
+   rscreen-b.trace_ptr = 
rscreen-b.ws-buffer_map(rscreen-b.trace_bo-cs_buf, NULL,

PIPE_TRANSFER_UNSYNCHRONIZED);
}
}
diff --git a/src/gallium/drivers/r600/r600_pipe.h 
b/src/gallium/drivers/r600/r600_pipe.h
index 7f4f482..bdaeb32 100644
--- a/src/gallium/drivers/r600/r600_pipe.h
+++ b/src/gallium/drivers/r600/r600_pipe.h
@@ -204,9 +204,6 @@ struct

[Mesa-dev] [PATCH 08/22] radeon: place context-related functions first in r600_pipe_common.c

2014-01-22 Thread Marek Olšák

From: Marek Olšák marek.ol...@amd.com

To follow the unwritten convention of r600g and radeonsi.
---
 src/gallium/drivers/radeon/r600_pipe_common.c | 166 ++
 1 file changed, 87 insertions(+), 79 deletions(-)

diff --git a/src/gallium/drivers/radeon/r600_pipe_common.c 
b/src/gallium/drivers/radeon/r600_pipe_common.c
index 031f858..7462d43 100644
--- a/src/gallium/drivers/radeon/r600_pipe_common.c
+++ b/src/gallium/drivers/radeon/r600_pipe_common.c
@@ -31,6 +31,93 @@
 #include util/u_upload_mgr.h
 #include inttypes.h
 
+/*
+ * pipe_context
+ */
+
+bool r600_common_context_init(struct r600_common_context *rctx,
+ struct r600_common_screen *rscreen)
+{
+   util_slab_create(rctx-pool_transfers,
+sizeof(struct r600_transfer), 64,
+UTIL_SLAB_SINGLETHREADED);
+
+   rctx-screen = rscreen;
+   rctx-ws = rscreen-ws;
+   rctx-family = rscreen-family;
+   rctx-chip_class = rscreen-chip_class;
+   rctx-max_db = rscreen-chip_class = EVERGREEN ? 8 : 4;
+
+   rctx-b.transfer_map = u_transfer_map_vtbl;
+   rctx-b.transfer_flush_region = u_default_transfer_flush_region;
+   rctx-b.transfer_unmap = u_transfer_unmap_vtbl;
+   rctx-b.transfer_inline_write = u_default_transfer_inline_write;
+
+   r600_streamout_init(rctx);
+   r600_query_init(rctx);
+
+   rctx-allocator_so_filled_size = u_suballocator_create(rctx-b, 4096, 
4,
+  0, 
PIPE_USAGE_STATIC, TRUE);
+   if (!rctx-allocator_so_filled_size)
+   return false;
+
+   rctx-uploader = u_upload_create(rctx-b, 1024 * 1024, 256,
+   PIPE_BIND_INDEX_BUFFER |
+   PIPE_BIND_CONSTANT_BUFFER);
+   if (!rctx-uploader)
+   return false;
+
+   return true;
+}
+
+void r600_common_context_cleanup(struct r600_common_context *rctx)
+{
+   if (rctx-rings.gfx.cs) {
+   rctx-ws-cs_destroy(rctx-rings.gfx.cs);
+   }
+   if (rctx-rings.dma.cs) {
+   rctx-ws-cs_destroy(rctx-rings.dma.cs);
+   }
+
+   if (rctx-uploader) {
+   u_upload_destroy(rctx-uploader);
+   }
+
+   util_slab_destroy(rctx-pool_transfers);
+
+   if (rctx-allocator_so_filled_size) {
+   u_suballocator_destroy(rctx-allocator_so_filled_size);
+   }
+}
+
+void r600_context_add_resource_size(struct pipe_context *ctx, struct 
pipe_resource *r)
+{
+   struct r600_common_context *rctx = (struct r600_common_context *)ctx;
+   struct r600_resource *rr = (struct r600_resource *)r;
+
+   if (r == NULL) {
+   return;
+   }
+
+   /*
+* The idea is to compute a gross estimate of memory requirement of
+* each draw call. After each draw call, memory will be precisely
+* accounted. So the uncertainty is only on the current draw call.
+* In practice this gave very good estimate (+/- 10% of the target
+* memory limit).
+*/
+   if (rr-domains  RADEON_DOMAIN_GTT) {
+   rctx-gtt += rr-buf-size;
+   }
+   if (rr-domains  RADEON_DOMAIN_VRAM) {
+   rctx-vram += rr-buf-size;
+   }
+}
+
+/*
+ * pipe_screen
+ */
+
 static const struct debug_named_value common_debug_options[] = {
/* logging */
{ tex, DBG_TEX, Print texture info },
@@ -235,85 +322,6 @@ void r600_common_screen_cleanup(struct r600_common_screen 
*rscreen)
rscreen-aux_context-destroy(rscreen-aux_context);
 }
 
-bool r600_common_context_init(struct r600_common_context *rctx,
- struct r600_common_screen *rscreen)
-{
-   util_slab_create(rctx-pool_transfers,
-sizeof(struct r600_transfer), 64,
-UTIL_SLAB_SINGLETHREADED);
-
-   rctx-screen = rscreen;
-   rctx-ws = rscreen-ws;
-   rctx-family = rscreen-family;
-   rctx-chip_class = rscreen-chip_class;
-   rctx-max_db = rscreen-chip_class = EVERGREEN ? 8 : 4;
-
-   rctx-b.transfer_map = u_transfer_map_vtbl;
-   rctx-b.transfer_flush_region = u_default_transfer_flush_region;
-   rctx-b.transfer_unmap = u_transfer_unmap_vtbl;
-   rctx-b.transfer_inline_write = u_default_transfer_inline_write;
-
-   r600_streamout_init(rctx);
-   r600_query_init(rctx);
-
-   rctx-allocator_so_filled_size = u_suballocator_create(rctx-b, 4096, 
4,
-  0, 
PIPE_USAGE_STATIC, TRUE);
-   if (!rctx-allocator_so_filled_size)
-   return false;
-
-   rctx-uploader = u_upload_create(rctx-b, 1024 * 1024, 256,
-   PIPE_BIND_INDEX_BUFFER |
-   PIPE_BIND_CONSTANT_BUFFER);
-   if (!rctx-uploader)
-   return false;
-
-   return true;
-}
-
-void

[Mesa-dev] [PATCH 22/22] radeonsi: cleanup includes, add missing license

2014-01-22 Thread Marek Olšák

From: Marek Olšák marek.ol...@amd.com

---
 src/gallium/drivers/radeonsi/si_blit.c|  5 ++---
 src/gallium/drivers/radeonsi/si_commands.c|  1 -
 src/gallium/drivers/radeonsi/si_compute.c | 25 +++
 src/gallium/drivers/radeonsi/si_descriptors.c |  1 +
 src/gallium/drivers/radeonsi/si_hw_context.c  |  7 +--
 src/gallium/drivers/radeonsi/si_pipe.c| 29 +--
 src/gallium/drivers/radeonsi/si_pipe.h| 10 -
 src/gallium/drivers/radeonsi/si_pm4.c |  1 -
 src/gallium/drivers/radeonsi/si_shader.c  |  8 
 src/gallium/drivers/radeonsi/si_state.c   | 21 +--
 src/gallium/drivers/radeonsi/si_state_draw.c  | 13 ++--
 src/gallium/drivers/radeonsi/si_uvd.c | 14 -
 12 files changed, 49 insertions(+), 86 deletions(-)

diff --git a/src/gallium/drivers/radeonsi/si_blit.c 
b/src/gallium/drivers/radeonsi/si_blit.c
index 250caab..aa3177a 100644
--- a/src/gallium/drivers/radeonsi/si_blit.c
+++ b/src/gallium/drivers/radeonsi/si_blit.c
@@ -20,11 +20,10 @@
  * OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE
  * USE OR OTHER DEALINGS IN THE SOFTWARE.
  */
-#include util/u_surface.h
+
+#include si_pipe.h
 #include util/u_blitter.h
 #include util/u_format.h
-#include si_pipe.h
-#include si_state.h
 
 enum si_blitter_op /* bitmask */
 {
diff --git a/src/gallium/drivers/radeonsi/si_commands.c 
b/src/gallium/drivers/radeonsi/si_commands.c
index a020ac3..5ddc40e 100644
--- a/src/gallium/drivers/radeonsi/si_commands.c
+++ b/src/gallium/drivers/radeonsi/si_commands.c
@@ -26,7 +26,6 @@
 
 #include sid.h
 #include si_pipe.h
-#include si_pm4.h
 
 void si_cmd_context_control(struct si_pm4_state *pm4)
 {
diff --git a/src/gallium/drivers/radeonsi/si_compute.c 
b/src/gallium/drivers/radeonsi/si_compute.c
index 3aea799..a7f49e7 100644
--- a/src/gallium/drivers/radeonsi/si_compute.c
+++ b/src/gallium/drivers/radeonsi/si_compute.c
@@ -1,8 +1,33 @@
+/*
+ * Copyright 2013 Advanced Micro Devices, Inc.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the Software),
+ * to deal in the Software without restriction, including without limitation
+ * on the rights to use, copy, modify, merge, publish, distribute, sub
+ * license, and/or sell copies of the Software, and to permit persons to whom
+ * the Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the next
+ * paragraph) shall be included in all copies or substantial portions of the
+ * Software.
+ *
+ * THE SOFTWARE IS PROVIDED AS IS, WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL
+ * THE AUTHOR(S) AND/OR THEIR SUPPLIERS BE LIABLE FOR ANY CLAIM,
+ * DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR
+ * OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE
+ * USE OR OTHER DEALINGS IN THE SOFTWARE.
+ *
+ */
+
 #include util/u_memory.h
 
 #include ../radeon/r600_cs.h
 #include si_pipe.h
 #include si_shader.h
+#include sid.h
 
 #include radeon_llvm_util.h
 
diff --git a/src/gallium/drivers/radeonsi/si_descriptors.c 
b/src/gallium/drivers/radeonsi/si_descriptors.c
index d48dbc0..5a7fac1 100644
--- a/src/gallium/drivers/radeonsi/si_descriptors.c
+++ b/src/gallium/drivers/radeonsi/si_descriptors.c
@@ -26,6 +26,7 @@
 #include ../radeon/r600_cs.h
 #include si_pipe.h
 #include si_shader.h
+#include sid.h
 
 #include util/u_memory.h
 #include util/u_upload_mgr.h
diff --git a/src/gallium/drivers/radeonsi/si_hw_context.c 
b/src/gallium/drivers/radeonsi/si_hw_context.c
index 2e1e9f2..7e40255 100644
--- a/src/gallium/drivers/radeonsi/si_hw_context.c
+++ b/src/gallium/drivers/radeonsi/si_hw_context.c
@@ -23,13 +23,8 @@
  * Authors:
  *  Jerome Glisse
  */
-#include ../radeon/r600_cs.h
-#include sid.h
-#include si_pm4.h
-#include si_pipe.h
-#include util/u_memory.h
-#include errno.h
 
+#include si_pipe.h
 
 /* initialize */
 void si_need_cs_space(struct si_context *ctx, unsigned num_dw,
diff --git a/src/gallium/drivers/radeonsi/si_pipe.c 
b/src/gallium/drivers/radeonsi/si_pipe.c
index 85a9177..a08f872 100644
--- a/src/gallium/drivers/radeonsi/si_pipe.c
+++ b/src/gallium/drivers/radeonsi/si_pipe.c
@@ -20,34 +20,15 @@
  * OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE
  * USE OR OTHER DEALINGS IN THE SOFTWARE.
  */
-#include stdio.h
-#include errno.h
-#include pipe/p_defines.h
-#include pipe/p_state.h
-#include pipe/p_context.h
-#include tgsi/tgsi_scan.h
-#include tgsi/tgsi_parse.h
-#include tgsi/tgsi_util.h
+
+#include si_pipe.h
+#include si_public.h
+
+#include radeon/radeon_uvd.h
 #include util/u_blitter.h
-#include util/u_double_list.h
-#include

[Mesa-dev] [PATCH 16/22] radeonsi: inline si_translate_index_buffer

2014-01-22 Thread Marek Olšák

From: Marek Olšák marek.ol...@amd.com

---
 src/gallium/drivers/radeonsi/Makefile.sources |  1 -
 src/gallium/drivers/radeonsi/si_pipe.h|  5 ---
 src/gallium/drivers/radeonsi/si_state_draw.c  | 20 +-
 src/gallium/drivers/radeonsi/si_translate.c   | 53 ---
 4 files changed, 19 insertions(+), 60 deletions(-)
 delete mode 100644 src/gallium/drivers/radeonsi/si_translate.c

diff --git a/src/gallium/drivers/radeonsi/Makefile.sources 
b/src/gallium/drivers/radeonsi/Makefile.sources
index c24eb75..4e1f971 100644
--- a/src/gallium/drivers/radeonsi/Makefile.sources
+++ b/src/gallium/drivers/radeonsi/Makefile.sources
@@ -10,5 +10,4 @@ C_SOURCES := \
si_shader.c \
si_state.c \
si_state_draw.c \
-   si_translate.c \
si_uvd.c
diff --git a/src/gallium/drivers/radeonsi/si_pipe.h 
b/src/gallium/drivers/radeonsi/si_pipe.h
index bb4d82a..fc75854 100644
--- a/src/gallium/drivers/radeonsi/si_pipe.h
+++ b/src/gallium/drivers/radeonsi/si_pipe.h
@@ -165,11 +165,6 @@ void si_decompress_color_textures(struct si_context *sctx,
 void si_flush(struct pipe_context *ctx, struct pipe_fence_handle **fence,
   unsigned flags);
 
-/* si_translate.c */
-void si_translate_index_buffer(struct si_context *sctx,
-  struct pipe_index_buffer *ib,
-  unsigned count);
-
 #if SI_TRACE_CS
 void si_trace_emit(struct si_context *sctx);
 #endif
diff --git a/src/gallium/drivers/radeonsi/si_state_draw.c 
b/src/gallium/drivers/radeonsi/si_state_draw.c
index f325a64..8dbf373 100644
--- a/src/gallium/drivers/radeonsi/si_state_draw.c
+++ b/src/gallium/drivers/radeonsi/si_state_draw.c
@@ -27,6 +27,8 @@
 #include util/u_memory.h
 #include util/u_framebuffer.h
 #include util/u_blitter.h
+#include util/u_index_modify.h
+#include util/u_upload_mgr.h
 #include tgsi/tgsi_parse.h
 #include si_pipe.h
 #include si_shader.h
@@ -718,7 +720,23 @@ void si_draw_vbo(struct pipe_context *ctx, const struct 
pipe_draw_info *info)
ib.offset = sctx-index_buffer.offset + info-start * 
ib.index_size;
 
/* Translate or upload, if needed. */
-   si_translate_index_buffer(sctx, ib, info-count);
+   if (ib.index_size == 1) {
+   struct pipe_resource *out_buffer = NULL;
+   unsigned out_offset;
+   void *ptr;
+
+   u_upload_alloc(sctx-b.uploader, 0, info-count * 2,
+  out_offset, out_buffer, ptr);
+
+   util_shorten_ubyte_elts_to_userptr(
+   sctx-b.b, ib, 0, ib.offset, 
info-count, ptr);
+
+   pipe_resource_reference(ib.buffer, NULL);
+   ib.user_buffer = NULL;
+   ib.buffer = out_buffer;
+   ib.offset = out_offset;
+   ib.index_size = 2;
+   }
 
if (ib.user_buffer  !ib.buffer) {
u_upload_data(sctx-b.uploader, 0, info-count * 
ib.index_size,
diff --git a/src/gallium/drivers/radeonsi/si_translate.c 
b/src/gallium/drivers/radeonsi/si_translate.c
deleted file mode 100644
index be9d621..000
--- a/src/gallium/drivers/radeonsi/si_translate.c
+++ /dev/null
@@ -1,53 +0,0 @@
-/*
- * Copyright 2010 Red Hat Inc.
- *
- * Permission is hereby granted, free of charge, to any person obtaining a
- * copy of this software and associated documentation files (the Software),
- * to deal in the Software without restriction, including without limitation
- * on the rights to use, copy, modify, merge, publish, distribute, sub
- * license, and/or sell copies of the Software, and to permit persons to whom
- * the Software is furnished to do so, subject to the following conditions:
- *
- * The above copyright notice and this permission notice (including the next
- * paragraph) shall be included in all copies or substantial portions of the
- * Software.
- *
- * THE SOFTWARE IS PROVIDED AS IS, WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
- * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
- * FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL
- * THE AUTHOR(S) AND/OR THEIR SUPPLIERS BE LIABLE FOR ANY CLAIM,
- * DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR
- * OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE
- * USE OR OTHER DEALINGS IN THE SOFTWARE.
- *
- * Authors: Dave Airlie airl...@redhat.com
- */
-
-#include util/u_index_modify.h
-#include util/u_upload_mgr.h
-#include si_pipe.h
-
-
-void si_translate_index_buffer(struct si_context *sctx,
-  struct pipe_index_buffer *ib,
-  unsigned count)
-{
-   struct pipe_resource *out_buffer = NULL;
-   unsigned out_offset;
-   void *ptr;
-
-   switch (ib-index_size) {
-   case

Re: [Mesa-dev] [PATCH] rtasm: deal with size overflows by casting to ptrdiff_t

2014-01-22 Thread Marek Olšák

Does Nouveau still work if you report PIPE_CAP_USER_VERTEX_BUFFERS = 0?

Marek

On Wed, Jan 22, 2014 at 3:37 AM, Ilia Mirkin imir...@alum.mit.edu wrote:
 This was discovered as a result of the draw-elements-base-vertex-neg
 piglit test, which passes very negative offsets in, followed up by large
 indices. The nouveau code correctly adjusts the pointer, but the
 transfer code needs to do the proper inverse correction. Similarly fix
 up the SSE code to do a 64-bit multiply to compute the proper offset.

 Signed-off-by: Ilia Mirkin imir...@alum.mit.edu
 ---

 With this change, nouveau passes for the draw-elements-base-vertex-neg piglit
 test with user_varrays, on a 64-bit setup both with and without
 GALLIUM_NOSSE=1. I'm pretty sure that the change should be minimal to a
 non-x86 setup since the rexw will be a no-op. I guess there will be an extra
 register use for the mov, but it shouldn't be too expensive, esp on anything
 remotely current.

  src/gallium/auxiliary/translate/translate_generic.c | 2 +-
  src/gallium/auxiliary/translate/translate_sse.c | 8 ++--
  2 files changed, 7 insertions(+), 3 deletions(-)

 diff --git a/src/gallium/auxiliary/translate/translate_generic.c 
 b/src/gallium/auxiliary/translate/translate_generic.c
 index 5bf97db..5ffce32 100644
 --- a/src/gallium/auxiliary/translate/translate_generic.c
 +++ b/src/gallium/auxiliary/translate/translate_generic.c
 @@ -638,7 +638,7 @@ static ALWAYS_INLINE void PIPE_CDECL generic_run_one( 
 struct translate_generic *
   }

   src = tg-attrib[attr].input_ptr +
 -   tg-attrib[attr].input_stride * index;
 +   (ptrdiff_t)tg-attrib[attr].input_stride * index;

   copy_size = tg-attrib[attr].copy_size;
   if(likely(copy_size = 0))
 diff --git a/src/gallium/auxiliary/translate/translate_sse.c 
 b/src/gallium/auxiliary/translate/translate_sse.c
 index a78ea91..a72454a 100644
 --- a/src/gallium/auxiliary/translate/translate_sse.c
 +++ b/src/gallium/auxiliary/translate/translate_sse.c
 @@ -1121,7 +1121,9 @@ static boolean init_inputs( struct translate_sse *p,
  x86_cmovcc(p-func, tmp_EAX, buf_max_index, cc_AE);
   }

 - x86_imul(p-func, tmp_EAX, buf_stride);
 + x86_mov(p-func, p-tmp2_EDX, buf_stride);
 + x64_rexw(p-func);
 + x86_imul(p-func, tmp_EAX, p-tmp2_EDX);
   x64_rexw(p-func);
   x86_add(p-func, tmp_EAX, buf_base_ptr);

 @@ -1207,7 +1209,9 @@ static struct x86_reg get_buffer_ptr( struct 
 translate_sse *p,
x86_cmp(p-func, ptr, buf_max_index);
x86_cmovcc(p-func, ptr, buf_max_index, cc_AE);

 -  x86_imul(p-func, ptr, buf_stride);
 +  x86_mov(p-func, p-tmp2_EDX, buf_stride);
 +  x64_rexw(p-func);
 +  x86_imul(p-func, ptr, p-tmp2_EDX);
x64_rexw(p-func);
x86_add(p-func, ptr, buf_base_ptr);
return ptr;
 --
 1.8.3.2

 ___
 mesa-dev mailing list
 mesa-dev@lists.freedesktop.org
 http://lists.freedesktop.org/mailman/listinfo/mesa-dev
___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/mesa-dev

Re: [Mesa-dev] [PATCH] rtasm: deal with size overflows by casting to ptrdiff_t

2014-01-22 Thread Ilia Mirkin

On Wed, Jan 22, 2014 at 3:27 PM, Marek Olšák mar...@gmail.com wrote:
 Does Nouveau still work if you report PIPE_CAP_USER_VERTEX_BUFFERS = 0?

I'm not in front of a machine with nouveau, so I can't tell you right
now, but I'll test it out later tonight. Out of curiousity though, why
do you ask? Is it related to this patch, or just idle curiiousity on
your end?


 Marek

 On Wed, Jan 22, 2014 at 3:37 AM, Ilia Mirkin imir...@alum.mit.edu wrote:
 This was discovered as a result of the draw-elements-base-vertex-neg
 piglit test, which passes very negative offsets in, followed up by large
 indices. The nouveau code correctly adjusts the pointer, but the
 transfer code needs to do the proper inverse correction. Similarly fix
 up the SSE code to do a 64-bit multiply to compute the proper offset.

 Signed-off-by: Ilia Mirkin imir...@alum.mit.edu
 ---

 With this change, nouveau passes for the draw-elements-base-vertex-neg piglit
 test with user_varrays, on a 64-bit setup both with and without
 GALLIUM_NOSSE=1. I'm pretty sure that the change should be minimal to a
 non-x86 setup since the rexw will be a no-op. I guess there will be an extra
 register use for the mov, but it shouldn't be too expensive, esp on anything
 remotely current.

  src/gallium/auxiliary/translate/translate_generic.c | 2 +-
  src/gallium/auxiliary/translate/translate_sse.c | 8 ++--
  2 files changed, 7 insertions(+), 3 deletions(-)

 diff --git a/src/gallium/auxiliary/translate/translate_generic.c 
 b/src/gallium/auxiliary/translate/translate_generic.c
 index 5bf97db..5ffce32 100644
 --- a/src/gallium/auxiliary/translate/translate_generic.c
 +++ b/src/gallium/auxiliary/translate/translate_generic.c
 @@ -638,7 +638,7 @@ static ALWAYS_INLINE void PIPE_CDECL generic_run_one( 
 struct translate_generic *
   }

   src = tg-attrib[attr].input_ptr +
 -   tg-attrib[attr].input_stride * index;
 +   (ptrdiff_t)tg-attrib[attr].input_stride * index;

   copy_size = tg-attrib[attr].copy_size;
   if(likely(copy_size = 0))
 diff --git a/src/gallium/auxiliary/translate/translate_sse.c 
 b/src/gallium/auxiliary/translate/translate_sse.c
 index a78ea91..a72454a 100644
 --- a/src/gallium/auxiliary/translate/translate_sse.c
 +++ b/src/gallium/auxiliary/translate/translate_sse.c
 @@ -1121,7 +1121,9 @@ static boolean init_inputs( struct translate_sse *p,
  x86_cmovcc(p-func, tmp_EAX, buf_max_index, cc_AE);
   }

 - x86_imul(p-func, tmp_EAX, buf_stride);
 + x86_mov(p-func, p-tmp2_EDX, buf_stride);
 + x64_rexw(p-func);
 + x86_imul(p-func, tmp_EAX, p-tmp2_EDX);
   x64_rexw(p-func);
   x86_add(p-func, tmp_EAX, buf_base_ptr);

 @@ -1207,7 +1209,9 @@ static struct x86_reg get_buffer_ptr( struct 
 translate_sse *p,
x86_cmp(p-func, ptr, buf_max_index);
x86_cmovcc(p-func, ptr, buf_max_index, cc_AE);

 -  x86_imul(p-func, ptr, buf_stride);
 +  x86_mov(p-func, p-tmp2_EDX, buf_stride);
 +  x64_rexw(p-func);
 +  x86_imul(p-func, ptr, p-tmp2_EDX);
x64_rexw(p-func);
x86_add(p-func, ptr, buf_base_ptr);
return ptr;
 --
 1.8.3.2

 ___
 mesa-dev mailing list
 mesa-dev@lists.freedesktop.org
 http://lists.freedesktop.org/mailman/listinfo/mesa-dev
___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/mesa-dev

Re: [Mesa-dev] [V3 PATCH 1/8] mesa: 's/\bgl_format\b/mesa_format/g'. Use better name for Mesa Formats enum

2014-01-22 Thread Mark Mueller

On Fri, Jan 17, 2014 at 8:58 AM, Brian Paul bri...@vmware.com wrote:

 On 01/16/2014 10:13 PM, Mark Mueller wrote:

 This series encompases the much discussed specification and renaming of
 MESA_FORMATs,
 which now is packed into 8 patches

 Signed-off-by: Mark Mueller markkmuel...@gmail.com
 ---


 Well, our other enum typedefs (and structs) all use the gl_ prefix.  But
 the other enum values don't use MESA_ prefixes so gl_formats are weird that
 way.  I'm kind on the fence about this change.

 -Brian


Obviously it's not critical, but the gl_ prefix is confusing because of the
weirdness, and Ken recommended a name change thus I took a stab at it. I've
left this change in V4 of the series. Would it be more convincing with a
different name, like mgl_formats, or mesa_gl_formats?

Mark
___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/mesa-dev

Re: [Mesa-dev] [PATCH 1/2] gallium: remove PIPE_CAP_MAX_COMBINED_SAMPLERS

2014-01-22 Thread Marek Olšák

On Mon, Jan 20, 2014 at 3:35 PM, Brian Paul bri...@vmware.com wrote:
 I'm not sure about this.  The concept of MAX_COMBINED_TEXTURE_IMAGE_UNITS
 goes back to the first shader hardware where texture image units might be
 shared between the VS and FS.

 The question is whether any of the hardware we care about has that
 restriction.

I'm not aware of any hardware which has the restriction. I think
the reason MAX_COMBINED_TEXTURE_IMAGE_UNITS exists is that the OpenGL
texture units are shared by all shader stages.

Marek
___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/mesa-dev

Re: [Mesa-dev] [PATCH] rtasm: deal with size overflows by casting to ptrdiff_t

2014-01-22 Thread Marek Olšák

The draw-elements-base-vertex-neg test passes on Radeon, which uses
the common util/u_vbuf for uploading vertices. I know Nouveau is
probably the only driver which doesn't use it, not counting the swrast
drivers. I'm afraid that your change from fail to pass for Nouveau
will break the test for everybody else. You can switch to using
util/u_vbuf by reporting PIPE_CAP_USER_VERTEX_BUFFERS = 0. Then you
will hit the same code path as Radeon.

Marek

On Wed, Jan 22, 2014 at 9:32 PM, Ilia Mirkin imir...@alum.mit.edu wrote:
 On Wed, Jan 22, 2014 at 3:27 PM, Marek Olšák mar...@gmail.com wrote:
 Does Nouveau still work if you report PIPE_CAP_USER_VERTEX_BUFFERS = 0?

 I'm not in front of a machine with nouveau, so I can't tell you right
 now, but I'll test it out later tonight. Out of curiousity though, why
 do you ask? Is it related to this patch, or just idle curiiousity on
 your end?


 Marek

 On Wed, Jan 22, 2014 at 3:37 AM, Ilia Mirkin imir...@alum.mit.edu wrote:
 This was discovered as a result of the draw-elements-base-vertex-neg
 piglit test, which passes very negative offsets in, followed up by large
 indices. The nouveau code correctly adjusts the pointer, but the
 transfer code needs to do the proper inverse correction. Similarly fix
 up the SSE code to do a 64-bit multiply to compute the proper offset.

 Signed-off-by: Ilia Mirkin imir...@alum.mit.edu
 ---

 With this change, nouveau passes for the draw-elements-base-vertex-neg 
 piglit
 test with user_varrays, on a 64-bit setup both with and without
 GALLIUM_NOSSE=1. I'm pretty sure that the change should be minimal to a
 non-x86 setup since the rexw will be a no-op. I guess there will be an extra
 register use for the mov, but it shouldn't be too expensive, esp on anything
 remotely current.

  src/gallium/auxiliary/translate/translate_generic.c | 2 +-
  src/gallium/auxiliary/translate/translate_sse.c | 8 ++--
  2 files changed, 7 insertions(+), 3 deletions(-)

 diff --git a/src/gallium/auxiliary/translate/translate_generic.c 
 b/src/gallium/auxiliary/translate/translate_generic.c
 index 5bf97db..5ffce32 100644
 --- a/src/gallium/auxiliary/translate/translate_generic.c
 +++ b/src/gallium/auxiliary/translate/translate_generic.c
 @@ -638,7 +638,7 @@ static ALWAYS_INLINE void PIPE_CDECL generic_run_one( 
 struct translate_generic *
   }

   src = tg-attrib[attr].input_ptr +
 -   tg-attrib[attr].input_stride * index;
 +   (ptrdiff_t)tg-attrib[attr].input_stride * index;

   copy_size = tg-attrib[attr].copy_size;
   if(likely(copy_size = 0))
 diff --git a/src/gallium/auxiliary/translate/translate_sse.c 
 b/src/gallium/auxiliary/translate/translate_sse.c
 index a78ea91..a72454a 100644
 --- a/src/gallium/auxiliary/translate/translate_sse.c
 +++ b/src/gallium/auxiliary/translate/translate_sse.c
 @@ -1121,7 +1121,9 @@ static boolean init_inputs( struct translate_sse *p,
  x86_cmovcc(p-func, tmp_EAX, buf_max_index, cc_AE);
   }

 - x86_imul(p-func, tmp_EAX, buf_stride);
 + x86_mov(p-func, p-tmp2_EDX, buf_stride);
 + x64_rexw(p-func);
 + x86_imul(p-func, tmp_EAX, p-tmp2_EDX);
   x64_rexw(p-func);
   x86_add(p-func, tmp_EAX, buf_base_ptr);

 @@ -1207,7 +1209,9 @@ static struct x86_reg get_buffer_ptr( struct 
 translate_sse *p,
x86_cmp(p-func, ptr, buf_max_index);
x86_cmovcc(p-func, ptr, buf_max_index, cc_AE);

 -  x86_imul(p-func, ptr, buf_stride);
 +  x86_mov(p-func, p-tmp2_EDX, buf_stride);
 +  x64_rexw(p-func);
 +  x86_imul(p-func, ptr, p-tmp2_EDX);
x64_rexw(p-func);
x86_add(p-func, ptr, buf_base_ptr);
return ptr;
 --
 1.8.3.2

 ___
 mesa-dev mailing list
 mesa-dev@lists.freedesktop.org
 http://lists.freedesktop.org/mailman/listinfo/mesa-dev
___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/mesa-dev

Re: [Mesa-dev] [PATCH 10/10] i965: Support 32 texture image units on Haswell+.

2014-01-22 Thread Matt Turner

On Sat, Jan 18, 2014 at 8:20 PM, Kenneth Graunke kenn...@whitecape.org wrote:
 The Intel closed source OpenGL driver recently began supporting 32
 texture image units on Haswell.  This makes the open source driver
 support 32 as well.

 Earlier generations don't have the message header field required to
 support more than 16 sampler states, so we continue to advertise 16
 there.

 On Haswell, this causes us to advertise:
 - GL_MAX_TEXTURE_IMAGE_UNITS = 32
 - GL_MAX_VERTEX_TEXTURE_IMAGE_UNITS = 32
 - GL_MAX_COMBINED_TEXTURE_IMAGE_UNITS = 96
 instead of the old values of 16, 16, and 48.

 Signed-off-by: Kenneth Graunke kenn...@whitecape.org
 ---
  src/mesa/drivers/dri/i965/brw_context.c | 9 ++---
  src/mesa/drivers/dri/i965/brw_context.h | 2 +-
  2 files changed, 7 insertions(+), 4 deletions(-)

 diff --git a/src/mesa/drivers/dri/i965/brw_context.c 
 b/src/mesa/drivers/dri/i965/brw_context.c
 index 1b42751..806f3db 100644
 --- a/src/mesa/drivers/dri/i965/brw_context.c
 +++ b/src/mesa/drivers/dri/i965/brw_context.c
 @@ -282,20 +282,23 @@ brw_initialize_context_constants(struct brw_context 
 *brw)
  {
 struct gl_context *ctx = brw-ctx;

 +   unsigned max_samplers =
 +  brw-gen = 8 || brw-is_haswell ? BRW_MAX_TEX_UNIT : 16;
 +
 ctx-Const.QueryCounterBits.Timestamp = 36;

 ctx-Const.StripTextureBorder = true;

 ctx-Const.MaxDualSourceDrawBuffers = 1;
 ctx-Const.MaxDrawBuffers = BRW_MAX_DRAW_BUFFERS;
 -   ctx-Const.Program[MESA_SHADER_FRAGMENT].MaxTextureImageUnits = 
 BRW_MAX_TEX_UNIT;
 +   ctx-Const.Program[MESA_SHADER_FRAGMENT].MaxTextureImageUnits = 
 max_samplers;
 ctx-Const.MaxTextureCoordUnits = 8; /* Mesa limit */
 ctx-Const.MaxTextureUnits =
MIN2(ctx-Const.MaxTextureCoordUnits,
 ctx-Const.Program[MESA_SHADER_FRAGMENT].MaxTextureImageUnits);
 -   ctx-Const.Program[MESA_SHADER_VERTEX].MaxTextureImageUnits = 
 BRW_MAX_TEX_UNIT;
 +   ctx-Const.Program[MESA_SHADER_VERTEX].MaxTextureImageUnits = 
 max_samplers;
 if (brw-gen = 7)
 -  ctx-Const.Program[MESA_SHADER_GEOMETRY].MaxTextureImageUnits = 
 BRW_MAX_TEX_UNIT;
 +  ctx-Const.Program[MESA_SHADER_GEOMETRY].MaxTextureImageUnits = 
 max_samplers;
 else
ctx-Const.Program[MESA_SHADER_GEOMETRY].MaxTextureImageUnits = 0;
 ctx-Const.MaxCombinedTextureImageUnits =
 diff --git a/src/mesa/drivers/dri/i965/brw_context.h 
 b/src/mesa/drivers/dri/i965/brw_context.h
 index 63dd4a0..5908659 100644
 --- a/src/mesa/drivers/dri/i965/brw_context.h
 +++ b/src/mesa/drivers/dri/i965/brw_context.h
 @@ -650,7 +650,7 @@ struct brw_gs_prog_data
  };

  /** Number of texture sampler units */
 -#define BRW_MAX_TEX_UNIT 16
 +#define BRW_MAX_TEX_UNIT 32

  /** Max number of render targets in a shader */
  #define BRW_MAX_DRAW_BUFFERS 8
 --
 1.8.5.2

Unfortunate that the PRMs that contain the relevant information for
this series are missing...

Anyway, the series is

Reviewed-by: Matt Turner matts...@gmail.com
___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/mesa-dev

1 2 >

1 - 100 of 161 matches

Mail list logo