[Mesa-dev] [PATCH] nir: add support for 4 constant offsets in tg4

2018-03-29 Thread Karol Herbst
Nvidia hardware can do that natively so there is no need to lower that to four
TG4s instructions.

Signed-off-by: Karol Herbst 
---
 src/compiler/glsl/glsl_to_nir.cpp | 25 ++---
 src/compiler/nir/nir.h|  9 -
 src/compiler/nir/nir_print.c  |  9 +
 3 files changed, 35 insertions(+), 8 deletions(-)

diff --git a/src/compiler/glsl/glsl_to_nir.cpp 
b/src/compiler/glsl/glsl_to_nir.cpp
index c4a6d52a5b2..4ea5f1616a7 100644
--- a/src/compiler/glsl/glsl_to_nir.cpp
+++ b/src/compiler/glsl/glsl_to_nir.cpp
@@ -2042,7 +2042,9 @@ nir_visitor::visit(ir_texture *ir)
   num_srcs++;
if (ir->shadow_comparator != NULL)
   num_srcs++;
-   if (ir->offset != NULL)
+   if (ir->offset != NULL && ir->offset->type->is_array())
+  num_srcs += ir->offset->type->array_size();
+   else if (ir->offset != NULL)
   num_srcs++;
 
nir_tex_instr *instr = nir_tex_instr_create(this->shader, num_srcs);
@@ -2097,12 +2099,21 @@ nir_visitor::visit(ir_texture *ir)
 
if (ir->offset != NULL) {
   /* we don't support multiple offsets yet */
-  assert(ir->offset->type->is_vector() || ir->offset->type->is_scalar());
-
-  instr->src[src_number].src =
- nir_src_for_ssa(evaluate_rvalue(ir->offset));
-  instr->src[src_number].src_type = nir_tex_src_offset;
-  src_number++;
+  if (ir->offset->type->is_vector() || ir->offset->type->is_scalar()) {
+ instr->src[src_number].src =
+nir_src_for_ssa(evaluate_rvalue(ir->offset));
+ instr->src[src_number].src_type = nir_tex_src_offset;
+ src_number++;
+  } else if (ir->offset->type->is_array()) {
+ for (int i = 0; i < ir->offset->type->array_size(); i++) {
+instr->src[src_number].src =
+   
nir_src_for_ssa(evaluate_rvalue(ir->offset->as_constant()->get_array_element(i)->as_rvalue()));
+instr->src[src_number].src_type = 
(nir_tex_src_type)(nir_tex_src_offset + i);
+src_number++;
+ }
+  } else {
+ assert(false);
+  }
}
 
switch (ir->op) {
diff --git a/src/compiler/nir/nir.h b/src/compiler/nir/nir.h
index 9fff1f4647d..7b02c4af05f 100644
--- a/src/compiler/nir/nir.h
+++ b/src/compiler/nir/nir.h
@@ -1175,6 +1175,9 @@ typedef enum {
nir_tex_src_projector,
nir_tex_src_comparator, /* shadow comparator */
nir_tex_src_offset,
+   nir_tex_src_offset1,
+   nir_tex_src_offset2,
+   nir_tex_src_offset3,
nir_tex_src_bias,
nir_tex_src_lod,
nir_tex_src_ms_index, /* MSAA sample index */
@@ -1377,6 +1380,9 @@ nir_tex_instr_src_type(const nir_tex_instr *instr, 
unsigned src)
   return nir_type_float;
 
case nir_tex_src_offset:
+   case nir_tex_src_offset1:
+   case nir_tex_src_offset2:
+   case nir_tex_src_offset3:
case nir_tex_src_ms_index:
case nir_tex_src_texture_offset:
case nir_tex_src_sampler_offset:
@@ -1408,7 +1414,8 @@ nir_tex_instr_src_size(const nir_tex_instr *instr, 
unsigned src)
/* Usual APIs don't allow cube + offset, but we allow it, with 2 coords for
 * the offset, since a cube maps to a single face.
 */
-   if (instr->src[src].src_type == nir_tex_src_offset) {
+   if (instr->src[src].src_type >= nir_tex_src_offset &&
+   instr->src[src].src_type <= nir_tex_src_offset3) {
   if (instr->sampler_dim == GLSL_SAMPLER_DIM_CUBE)
  return 2;
   else if (instr->is_array)
diff --git a/src/compiler/nir/nir_print.c b/src/compiler/nir/nir_print.c
index 21f13097651..e13a4f9aa6d 100644
--- a/src/compiler/nir/nir_print.c
+++ b/src/compiler/nir/nir_print.c
@@ -751,6 +751,15 @@ print_tex_instr(nir_tex_instr *instr, print_state *state)
   case nir_tex_src_offset:
  fprintf(fp, "(offset)");
  break;
+  case nir_tex_src_offset1:
+ fprintf(fp, "(offset1)");
+ break;
+  case nir_tex_src_offset2:
+ fprintf(fp, "(offset2)");
+ break;
+  case nir_tex_src_offset3:
+ fprintf(fp, "(offset3)");
+ break;
   case nir_tex_src_bias:
  fprintf(fp, "(bias)");
  break;
-- 
2.14.3

___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev


Re: [Mesa-dev] [PATCH] nir: add support for 4 constant offsets in tg4

2018-03-30 Thread Karol Herbst
On Fri, Mar 30, 2018 at 9:35 PM, Eric Anholt  wrote:
> Karol Herbst  writes:
>
>> Nvidia hardware can do that natively so there is no need to lower that to 
>> four
>> TG4s instructions.
>>
>> Signed-off-by: Karol Herbst 
>> ---
>>  src/compiler/glsl/glsl_to_nir.cpp | 25 ++---
>>  src/compiler/nir/nir.h|  9 -
>>  src/compiler/nir/nir_print.c  |  9 +
>>  3 files changed, 35 insertions(+), 8 deletions(-)
>>
>> diff --git a/src/compiler/glsl/glsl_to_nir.cpp 
>> b/src/compiler/glsl/glsl_to_nir.cpp
>> index c4a6d52a5b2..4ea5f1616a7 100644
>> --- a/src/compiler/glsl/glsl_to_nir.cpp
>> +++ b/src/compiler/glsl/glsl_to_nir.cpp
>> @@ -2042,7 +2042,9 @@ nir_visitor::visit(ir_texture *ir)
>>num_srcs++;
>> if (ir->shadow_comparator != NULL)
>>num_srcs++;
>> -   if (ir->offset != NULL)
>> +   if (ir->offset != NULL && ir->offset->type->is_array())
>> +  num_srcs += ir->offset->type->array_size();
>> +   else if (ir->offset != NULL)
>>num_srcs++;
>>
>> nir_tex_instr *instr = nir_tex_instr_create(this->shader, num_srcs);
>> @@ -2097,12 +2099,21 @@ nir_visitor::visit(ir_texture *ir)
>>
>> if (ir->offset != NULL) {
>>/* we don't support multiple offsets yet */
>> -  assert(ir->offset->type->is_vector() || 
>> ir->offset->type->is_scalar());
>> -
>> -  instr->src[src_number].src =
>> - nir_src_for_ssa(evaluate_rvalue(ir->offset));
>> -  instr->src[src_number].src_type = nir_tex_src_offset;
>> -  src_number++;
>> +  if (ir->offset->type->is_vector() || ir->offset->type->is_scalar()) {
>> + instr->src[src_number].src =
>> +nir_src_for_ssa(evaluate_rvalue(ir->offset));
>> + instr->src[src_number].src_type = nir_tex_src_offset;
>> + src_number++;
>> +  } else if (ir->offset->type->is_array()) {
>> + for (int i = 0; i < ir->offset->type->array_size(); i++) {
>> +instr->src[src_number].src =
>> +   
>> nir_src_for_ssa(evaluate_rvalue(ir->offset->as_constant()->get_array_element(i)->as_rvalue()));
>> +instr->src[src_number].src_type = 
>> (nir_tex_src_type)(nir_tex_src_offset + i);
>> +src_number++;
>> + }
>> +  } else {
>> + assert(false);
>
> Maybe just do assert(ir->offset->type->is_array()) in the previous block
> instead of the extra else.  And optionally pull
> ir->offset->as_constant() out to a temporary for nicer column wrapping.
> Other than that, this seems good.
>

well the thing is, it only works with constants within the array. If
you have non constant values the code wouldn't assert on that. But I
will try to think about something nice there.

> Reviewed-by: Eric Anholt 
>
> If I'm reading my specs right, I'll be able to use this on vc6, too.
___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev


[Mesa-dev] [PATCH 0/4] nir: add support for bindless_texture

2018-04-03 Thread Karol Herbst
I think most of the changes are straigh forward. The changes needed for images
should be discussed, because in its current form it would require changing all
drivers using nir and supporting images.

Karol Herbst (4):
  nir/split_var_copies: handle IMAGE and SAMPLER for bindless vars
  nir: add support for bindless_texture samplers
  glsl/nir: fix variable type for image intrinsics and ubos
  RFC nir: add support for bindless_texture images

 src/compiler/glsl/glsl_to_nir.cpp   | 38 -
 src/compiler/nir/nir.h  |  3 ++-
 src/compiler/nir/nir_intrinsics.py  | 24 ++---
 src/compiler/nir/nir_print.c|  3 +++
 src/compiler/nir/nir_split_var_copies.c |  4 
 5 files changed, 54 insertions(+), 18 deletions(-)

-- 
2.14.3

___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev


[Mesa-dev] [PATCH 4/4] RFC nir: add support for bindless_texture images

2018-04-03 Thread Karol Herbst
I added another source for all image_var_* intrinsics. Drivers have to be
adjusted with this change.

There was some discussion to add new intrinsics to handle operations on
bindless images. Maybe we can continue with this here?

Signed-off-by: Karol Herbst 
---
 src/compiler/glsl/glsl_to_nir.cpp  | 19 +--
 src/compiler/nir/nir.h |  2 +-
 src/compiler/nir/nir_intrinsics.py | 24 
 3 files changed, 30 insertions(+), 15 deletions(-)

diff --git a/src/compiler/glsl/glsl_to_nir.cpp 
b/src/compiler/glsl/glsl_to_nir.cpp
index 1fc0cac4736..4e053c140c2 100644
--- a/src/compiler/glsl/glsl_to_nir.cpp
+++ b/src/compiler/glsl/glsl_to_nir.cpp
@@ -894,10 +894,14 @@ nir_visitor::visit(ir_call *ir)
  ir_dereference *image = (ir_dereference *)param;
  const glsl_type *type =
 image->type->without_array();
+ bool bindless = image->variable_referenced()->contains_bindless();
 
  instr->variables[0] = evaluate_deref(&instr->instr, image);
  param = param->get_next();
 
+ if (bindless)
+instr->variables[0]->var->data.bindless = true;
+
  /* Set the intrinsic destination. */
  if (ir->return_deref) {
 unsigned num_components = ir->return_deref->type->vector_elements;
@@ -909,6 +913,11 @@ nir_visitor::visit(ir_call *ir)
 
  if (op == nir_intrinsic_image_var_size ||
  op == nir_intrinsic_image_var_samples) {
+if (bindless) {
+   instr->src[0] = nir_src_for_ssa(evaluate_rvalue(image));
+} else {
+   instr->src[0] = nir_src_for_ssa(&instr_undef->def);
+}
 nir_builder_instr_insert(&b, &instr->instr);
 break;
  }
@@ -941,15 +950,21 @@ nir_visitor::visit(ir_call *ir)
 instr->src[1] = nir_src_for_ssa(&instr_undef->def);
  }
 
+ if (bindless) {
+instr->src[2] = nir_src_for_ssa(evaluate_rvalue(image));
+ } else {
+instr->src[2] = nir_src_for_ssa(&instr_undef->def);
+ }
+
  /* Set the intrinsic parameters. */
  if (!param->is_tail_sentinel()) {
-instr->src[2] =
+instr->src[3] =
nir_src_for_ssa(evaluate_rvalue((ir_dereference *)param));
 param = param->get_next();
  }
 
  if (!param->is_tail_sentinel()) {
-instr->src[3] =
+instr->src[4] =
nir_src_for_ssa(evaluate_rvalue((ir_dereference *)param));
 param = param->get_next();
  }
diff --git a/src/compiler/nir/nir.h b/src/compiler/nir/nir.h
index e4d626d263e..c6081cbb61f 100644
--- a/src/compiler/nir/nir.h
+++ b/src/compiler/nir/nir.h
@@ -1108,7 +1108,7 @@ typedef enum {
 
 } nir_intrinsic_index_flag;
 
-#define NIR_INTRINSIC_MAX_INPUTS 4
+#define NIR_INTRINSIC_MAX_INPUTS 5
 
 typedef struct {
const char *name;
diff --git a/src/compiler/nir/nir_intrinsics.py 
b/src/compiler/nir/nir_intrinsics.py
index 1bc99552cd7..d6da63ab769 100644
--- a/src/compiler/nir/nir_intrinsics.py
+++ b/src/compiler/nir/nir_intrinsics.py
@@ -291,19 +291,19 @@ atomic3("atomic_counter_comp_swap")
 # argument with the value to be written, and image atomic operations take
 # either one or two additional scalar arguments with the same meaning as in
 # the ARB_shader_image_load_store specification.
-intrinsic("image_var_load", src_comp=[4, 1], dest_comp=4, num_vars=1,
+intrinsic("image_var_load", src_comp=[4, 1, 1], dest_comp=4, num_vars=1,
   flags=[CAN_ELIMINATE])
-intrinsic("image_var_store", src_comp=[4, 1, 4], num_vars=1)
-intrinsic("image_var_atomic_add",  src_comp=[4, 1, 1], dest_comp=1, num_vars=1)
-intrinsic("image_var_atomic_min",  src_comp=[4, 1, 1], dest_comp=1, num_vars=1)
-intrinsic("image_var_atomic_max",  src_comp=[4, 1, 1], dest_comp=1, num_vars=1)
-intrinsic("image_var_atomic_and",  src_comp=[4, 1, 1], dest_comp=1, num_vars=1)
-intrinsic("image_var_atomic_or",   src_comp=[4, 1, 1], dest_comp=1, num_vars=1)
-intrinsic("image_var_atomic_xor",  src_comp=[4, 1, 1], dest_comp=1, num_vars=1)
-intrinsic("image_var_atomic_exchange",  src_comp=[4, 1, 1], dest_comp=1, 
num_vars=1)
-intrinsic("image_var_atomic_comp_swap", src_comp=[4, 1, 1, 1], dest_comp=1, 
num_vars=1)
-intrinsic("image_var_size",dest_comp=0, num_vars=1, flags=[CAN_ELIMINATE, 
CAN_REORDER])
-intrinsic("image_var_samples", dest_comp=1, num_vars=1, flags=[CAN_ELIMINATE, 
CAN_REORDER])
+intrinsic("image_var_store", src_comp=[4, 1, 1, 4], num_vars=1)
+intrinsic("image_var_atomic_add",  src_comp=[4, 1, 1, 1], dest_comp=1, 
num_vars=1)
+intrinsic("image_var_atomic_min",  src_comp=[4, 1,

[Mesa-dev] [PATCH 3/4] glsl/nir: fix variable type for image intrinsics and ubos

2018-04-03 Thread Karol Herbst
If the bindless image is passed through a struct we ended up getting the
glsl_type of the struct, not the image.

variable_referenced points to the declaration of the struct, so it won't work
for bindless images. So just drop it.

Signed-off-by: Karol Herbst 
---
 src/compiler/glsl/glsl_to_nir.cpp | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/src/compiler/glsl/glsl_to_nir.cpp 
b/src/compiler/glsl/glsl_to_nir.cpp
index 8e2d96a2361..1fc0cac4736 100644
--- a/src/compiler/glsl/glsl_to_nir.cpp
+++ b/src/compiler/glsl/glsl_to_nir.cpp
@@ -893,7 +893,7 @@ nir_visitor::visit(ir_call *ir)
  exec_node *param = ir->actual_parameters.get_head();
  ir_dereference *image = (ir_dereference *)param;
  const glsl_type *type =
-image->variable_referenced()->type->without_array();
+image->type->without_array();
 
  instr->variables[0] = evaluate_deref(&instr->instr, image);
  param = param->get_next();
-- 
2.14.3

___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev


[Mesa-dev] [PATCH 2/4] nir: add support for bindless_texture samplers

2018-04-03 Thread Karol Herbst
Signed-off-by: Karol Herbst 
---
 src/compiler/glsl/glsl_to_nir.cpp | 17 +++--
 src/compiler/nir/nir.h|  1 +
 src/compiler/nir/nir_print.c  |  3 +++
 3 files changed, 19 insertions(+), 2 deletions(-)

diff --git a/src/compiler/glsl/glsl_to_nir.cpp 
b/src/compiler/glsl/glsl_to_nir.cpp
index dbb58d82e8f..8e2d96a2361 100644
--- a/src/compiler/glsl/glsl_to_nir.cpp
+++ b/src/compiler/glsl/glsl_to_nir.cpp
@@ -1971,6 +1971,8 @@ nir_visitor::visit(ir_texture *ir)
 {
unsigned num_srcs;
nir_texop op;
+   bool bindless = ir->sampler->variable_referenced()->contains_bindless();
+
switch (ir->op) {
case ir_tex:
   op = nir_texop_tex;
@@ -2044,6 +2046,8 @@ nir_visitor::visit(ir_texture *ir)
   num_srcs++;
if (ir->offset != NULL)
   num_srcs++;
+   if (bindless)
+  num_srcs=+;
 
nir_tex_instr *instr = nir_tex_instr_create(this->shader, num_srcs);
 
@@ -2069,10 +2073,19 @@ nir_visitor::visit(ir_texture *ir)
   unreachable("not reached");
}
 
-   instr->texture = evaluate_deref(&instr->instr, ir->sampler);
-
unsigned src_number = 0;
 
+   /* for bindless we use the handle src */
+   if (bindless) {
+  instr->texture = NULL;
+  instr->src[src_number].src =
+ nir_src_for_ssa(evaluate_rvalue(ir->sampler));
+  instr->src[src_number].src_type = nir_tex_src_handle;
+  src_number++;
+   } else {
+  instr->texture = evaluate_deref(&instr->instr, ir->sampler);
+   }
+
if (ir->coordinate != NULL) {
   instr->coord_components = ir->coordinate->type->vector_elements;
   instr->src[src_number].src =
diff --git a/src/compiler/nir/nir.h b/src/compiler/nir/nir.h
index f33049d7134..e4d626d263e 100644
--- a/src/compiler/nir/nir.h
+++ b/src/compiler/nir/nir.h
@@ -1218,6 +1218,7 @@ typedef enum {
nir_tex_src_texture_offset, /* < dynamically uniform indirect offset */
nir_tex_src_sampler_offset, /* < dynamically uniform indirect offset */
nir_tex_src_plane,  /* < selects plane for planar textures */
+   nir_tex_src_handle, /* < handle for bindless samples */
nir_num_tex_src_types
 } nir_tex_src_type;
 
diff --git a/src/compiler/nir/nir_print.c b/src/compiler/nir/nir_print.c
index 21f13097651..c9431555f2f 100644
--- a/src/compiler/nir/nir_print.c
+++ b/src/compiler/nir/nir_print.c
@@ -778,6 +778,9 @@ print_tex_instr(nir_tex_instr *instr, print_state *state)
   case nir_tex_src_plane:
  fprintf(fp, "(plane)");
  break;
+  case nir_tex_src_handle:
+ fprintf(fp, "(handle)");
+ break;
 
   default:
  unreachable("Invalid texture source type");
-- 
2.14.3

___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev


[Mesa-dev] [PATCH 1/4] nir/split_var_copies: handle IMAGE and SAMPLER for bindless vars

2018-04-03 Thread Karol Herbst
Signed-off-by: Karol Herbst 
---
 src/compiler/nir/nir_split_var_copies.c | 4 
 1 file changed, 4 insertions(+)

diff --git a/src/compiler/nir/nir_split_var_copies.c 
b/src/compiler/nir/nir_split_var_copies.c
index bc3ceedbdb8..231a89add4d 100644
--- a/src/compiler/nir/nir_split_var_copies.c
+++ b/src/compiler/nir/nir_split_var_copies.c
@@ -241,6 +241,10 @@ split_var_copies_block(nir_block *block, struct 
split_var_copies_state *state)
 ralloc_steal(state->dead_ctx, instr);
  }
  break;
+  /* for bindless those are uint64 */
+  case GLSL_TYPE_IMAGE:
+  case GLSL_TYPE_SAMPLER:
+ assert(src_head->var->data.bindless);
   case GLSL_TYPE_INT:
   case GLSL_TYPE_UINT:
   case GLSL_TYPE_INT16:
-- 
2.14.3

___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev


Re: [Mesa-dev] [PATCH 2/4] nir: add support for bindless_texture samplers

2018-04-03 Thread Karol Herbst
On Tue, Apr 3, 2018 at 3:21 PM, Karol Herbst  wrote:
> Signed-off-by: Karol Herbst 
> ---
>  src/compiler/glsl/glsl_to_nir.cpp | 17 +++--
>  src/compiler/nir/nir.h|  1 +
>  src/compiler/nir/nir_print.c  |  3 +++
>  3 files changed, 19 insertions(+), 2 deletions(-)
>
> diff --git a/src/compiler/glsl/glsl_to_nir.cpp 
> b/src/compiler/glsl/glsl_to_nir.cpp
> index dbb58d82e8f..8e2d96a2361 100644
> --- a/src/compiler/glsl/glsl_to_nir.cpp
> +++ b/src/compiler/glsl/glsl_to_nir.cpp
> @@ -1971,6 +1971,8 @@ nir_visitor::visit(ir_texture *ir)
>  {
> unsigned num_srcs;
> nir_texop op;
> +   bool bindless = ir->sampler->variable_referenced()->contains_bindless();
> +
> switch (ir->op) {
> case ir_tex:
>op = nir_texop_tex;
> @@ -2044,6 +2046,8 @@ nir_visitor::visit(ir_texture *ir)
>num_srcs++;
> if (ir->offset != NULL)
>num_srcs++;
> +   if (bindless)
> +  num_srcs=+;

small type here, should have been "num_srcs++" instead.

>
> nir_tex_instr *instr = nir_tex_instr_create(this->shader, num_srcs);
>
> @@ -2069,10 +2073,19 @@ nir_visitor::visit(ir_texture *ir)
>unreachable("not reached");
> }
>
> -   instr->texture = evaluate_deref(&instr->instr, ir->sampler);
> -
> unsigned src_number = 0;
>
> +   /* for bindless we use the handle src */
> +   if (bindless) {
> +  instr->texture = NULL;
> +  instr->src[src_number].src =
> + nir_src_for_ssa(evaluate_rvalue(ir->sampler));
> +  instr->src[src_number].src_type = nir_tex_src_handle;
> +  src_number++;
> +   } else {
> +  instr->texture = evaluate_deref(&instr->instr, ir->sampler);
> +   }
> +
> if (ir->coordinate != NULL) {
>instr->coord_components = ir->coordinate->type->vector_elements;
>instr->src[src_number].src =
> diff --git a/src/compiler/nir/nir.h b/src/compiler/nir/nir.h
> index f33049d7134..e4d626d263e 100644
> --- a/src/compiler/nir/nir.h
> +++ b/src/compiler/nir/nir.h
> @@ -1218,6 +1218,7 @@ typedef enum {
> nir_tex_src_texture_offset, /* < dynamically uniform indirect offset */
> nir_tex_src_sampler_offset, /* < dynamically uniform indirect offset */
> nir_tex_src_plane,  /* < selects plane for planar textures */
> +   nir_tex_src_handle, /* < handle for bindless samples */
> nir_num_tex_src_types
>  } nir_tex_src_type;
>
> diff --git a/src/compiler/nir/nir_print.c b/src/compiler/nir/nir_print.c
> index 21f13097651..c9431555f2f 100644
> --- a/src/compiler/nir/nir_print.c
> +++ b/src/compiler/nir/nir_print.c
> @@ -778,6 +778,9 @@ print_tex_instr(nir_tex_instr *instr, print_state *state)
>case nir_tex_src_plane:
>   fprintf(fp, "(plane)");
>   break;
> +  case nir_tex_src_handle:
> + fprintf(fp, "(handle)");
> + break;
>
>default:
>   unreachable("Invalid texture source type");
> --
> 2.14.3
>
___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev


Re: [Mesa-dev] [PATCH 2/4] nir: add support for bindless_texture samplers

2018-04-04 Thread Karol Herbst
On Wed, Apr 4, 2018 at 2:16 AM, Jason Ekstrand  wrote:
> On Tue, Apr 3, 2018 at 6:21 AM, Karol Herbst  wrote:
>>
>> Signed-off-by: Karol Herbst 
>> ---
>>  src/compiler/glsl/glsl_to_nir.cpp | 17 +++--
>>  src/compiler/nir/nir.h|  1 +
>>  src/compiler/nir/nir_print.c  |  3 +++
>>  3 files changed, 19 insertions(+), 2 deletions(-)
>>
>> diff --git a/src/compiler/glsl/glsl_to_nir.cpp
>> b/src/compiler/glsl/glsl_to_nir.cpp
>> index dbb58d82e8f..8e2d96a2361 100644
>> --- a/src/compiler/glsl/glsl_to_nir.cpp
>> +++ b/src/compiler/glsl/glsl_to_nir.cpp
>> @@ -1971,6 +1971,8 @@ nir_visitor::visit(ir_texture *ir)
>>  {
>> unsigned num_srcs;
>> nir_texop op;
>> +   bool bindless =
>> ir->sampler->variable_referenced()->contains_bindless();
>> +
>> switch (ir->op) {
>> case ir_tex:
>>op = nir_texop_tex;
>> @@ -2044,6 +2046,8 @@ nir_visitor::visit(ir_texture *ir)
>>num_srcs++;
>> if (ir->offset != NULL)
>>num_srcs++;
>> +   if (bindless)
>> +  num_srcs=+;
>>
>> nir_tex_instr *instr = nir_tex_instr_create(this->shader, num_srcs);
>>
>> @@ -2069,10 +2073,19 @@ nir_visitor::visit(ir_texture *ir)
>>unreachable("not reached");
>> }
>>
>> -   instr->texture = evaluate_deref(&instr->instr, ir->sampler);
>> -
>> unsigned src_number = 0;
>>
>> +   /* for bindless we use the handle src */
>> +   if (bindless) {
>> +  instr->texture = NULL;
>> +  instr->src[src_number].src =
>> + nir_src_for_ssa(evaluate_rvalue(ir->sampler));
>> +  instr->src[src_number].src_type = nir_tex_src_handle;
>> +  src_number++;
>> +   } else {
>> +  instr->texture = evaluate_deref(&instr->instr, ir->sampler);
>> +   }
>> +
>> if (ir->coordinate != NULL) {
>>instr->coord_components = ir->coordinate->type->vector_elements;
>>instr->src[src_number].src =
>> diff --git a/src/compiler/nir/nir.h b/src/compiler/nir/nir.h
>> index f33049d7134..e4d626d263e 100644
>> --- a/src/compiler/nir/nir.h
>> +++ b/src/compiler/nir/nir.h
>> @@ -1218,6 +1218,7 @@ typedef enum {
>> nir_tex_src_texture_offset, /* < dynamically uniform indirect offset
>> */
>> nir_tex_src_sampler_offset, /* < dynamically uniform indirect offset
>> */
>> nir_tex_src_plane,  /* < selects plane for planar textures */
>> +   nir_tex_src_handle, /* < handle for bindless samples */
>
>
> Do we want to have separate texture and sampler handles?  We don't care for
> GL but I kind-of think we will for Vulkan.
>

Don't know. Never looked into vulkan yet. I could rename it to
sample_handle for now and we can add the texture handle later for
vulkan?

>>
>> nir_num_tex_src_types
>>  } nir_tex_src_type;
>>
>> diff --git a/src/compiler/nir/nir_print.c b/src/compiler/nir/nir_print.c
>> index 21f13097651..c9431555f2f 100644
>> --- a/src/compiler/nir/nir_print.c
>> +++ b/src/compiler/nir/nir_print.c
>> @@ -778,6 +778,9 @@ print_tex_instr(nir_tex_instr *instr, print_state
>> *state)
>>case nir_tex_src_plane:
>>   fprintf(fp, "(plane)");
>>   break;
>> +  case nir_tex_src_handle:
>> + fprintf(fp, "(handle)");
>> + break;
>>
>>default:
>>   unreachable("Invalid texture source type");
>> --
>> 2.14.3
>>
>> ___
>> mesa-dev mailing list
>> mesa-dev@lists.freedesktop.org
>> https://lists.freedesktop.org/mailman/listinfo/mesa-dev
>
>
___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev


Re: [Mesa-dev] [PATCH 1/4] nir/split_var_copies: handle IMAGE and SAMPLER for bindless vars

2018-04-04 Thread Karol Herbst
On Wed, Apr 4, 2018 at 2:23 AM, Jason Ekstrand  wrote:
> I have a very strong feeling that this isn't the only place where
> reading/writing IMAGE and SAMPLER variables is going to cause NIR heartburn.
> For example, we have special cases in nir_validate for SUBROUTINE variables
> and we probably need IMAGE and SAMPLER support everywhere we have SUBROUTINE
> plus some (since you can write to them now as well).
>

yeah. I was just making piglit happy here. I guess I will try to run
it with some games using bindless_textures and fix all the crashes I
encounter there at least. More piglit tests might be useful as well.
Sadly I don't see any bindless_textures tests in the CTS :(

>
> On Tue, Apr 3, 2018 at 6:21 AM, Karol Herbst  wrote:
>>
>> Signed-off-by: Karol Herbst 
>> ---
>>  src/compiler/nir/nir_split_var_copies.c | 4 
>>  1 file changed, 4 insertions(+)
>>
>> diff --git a/src/compiler/nir/nir_split_var_copies.c
>> b/src/compiler/nir/nir_split_var_copies.c
>> index bc3ceedbdb8..231a89add4d 100644
>> --- a/src/compiler/nir/nir_split_var_copies.c
>> +++ b/src/compiler/nir/nir_split_var_copies.c
>> @@ -241,6 +241,10 @@ split_var_copies_block(nir_block *block, struct
>> split_var_copies_state *state)
>>  ralloc_steal(state->dead_ctx, instr);
>>   }
>>   break;
>> +  /* for bindless those are uint64 */
>> +  case GLSL_TYPE_IMAGE:
>> +  case GLSL_TYPE_SAMPLER:
>> + assert(src_head->var->data.bindless);
>>case GLSL_TYPE_INT:
>>case GLSL_TYPE_UINT:
>>case GLSL_TYPE_INT16:
>> --
>> 2.14.3
>>
>> ___
>> mesa-dev mailing list
>> mesa-dev@lists.freedesktop.org
>> https://lists.freedesktop.org/mailman/listinfo/mesa-dev
>
>
___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev


[Mesa-dev] [PATCH] RFC gallium: add 64 bit integer formats

2018-04-09 Thread Karol Herbst
unsigneds are needed by ARB_bindless_texture 64 bit vertex attribs, both for
NV_vertex_attrib_integer64.

Fixes the new piglit sampler-vertex-attrib-input-output test I sent some days
ago for bindless_texture.

The change inside vbo_attrtype_to_double_flag is what I am most concerned
about. Maybe I should add another flag for 64 bit ints. Or rework what Doubles
mean in gl_array_attributes. Or Rename that to is64Bit and rework all users of
Doubles.

Any suggestions?

Signed-off-by: Karol Herbst 
---
 src/gallium/drivers/svga/svga_format.c |  8 
 src/gallium/include/pipe/p_format.h|  9 +
 src/mesa/main/glformats.c  |  3 +++
 src/mesa/state_tracker/st_atom_array.c | 30 +++---
 src/mesa/vbo/vbo_private.h |  2 +-
 5 files changed, 48 insertions(+), 4 deletions(-)

diff --git a/src/gallium/drivers/svga/svga_format.c 
b/src/gallium/drivers/svga/svga_format.c
index 20a6e6b159f..f01a0e79c72 100644
--- a/src/gallium/drivers/svga/svga_format.c
+++ b/src/gallium/drivers/svga/svga_format.c
@@ -369,6 +369,14 @@ static const struct vgpu10_format_entry 
format_conversion_table[] =
{ PIPE_FORMAT_A1B5G5R5_UNORM,SVGA3D_FORMAT_INVALID,  
SVGA3D_FORMAT_INVALID,   0 },
{ PIPE_FORMAT_X1B5G5R5_UNORM,SVGA3D_FORMAT_INVALID,  
SVGA3D_FORMAT_INVALID,   0 },
{ PIPE_FORMAT_A4B4G4R4_UNORM,SVGA3D_FORMAT_INVALID,  
SVGA3D_FORMAT_INVALID,   0 },
+   { PIPE_FORMAT_R64_UINT,  SVGA3D_FORMAT_INVALID,  
SVGA3D_FORMAT_INVALID,   0 },
+   { PIPE_FORMAT_R64G64_UINT,   SVGA3D_FORMAT_INVALID,  
SVGA3D_FORMAT_INVALID,   0 },
+   { PIPE_FORMAT_R64G64B64_UINT,SVGA3D_FORMAT_INVALID,  
SVGA3D_FORMAT_INVALID,   0 },
+   { PIPE_FORMAT_R64G64B64A64_UINT, SVGA3D_FORMAT_INVALID,  
SVGA3D_FORMAT_INVALID,   0 },
+   { PIPE_FORMAT_R64_SINT,  SVGA3D_FORMAT_INVALID,  
SVGA3D_FORMAT_INVALID,   0 },
+   { PIPE_FORMAT_R64G64_SINT,   SVGA3D_FORMAT_INVALID,  
SVGA3D_FORMAT_INVALID,   0 },
+   { PIPE_FORMAT_R64G64B64_SINT,SVGA3D_FORMAT_INVALID,  
SVGA3D_FORMAT_INVALID,   0 },
+   { PIPE_FORMAT_R64G64B64A64_SINT, SVGA3D_FORMAT_INVALID,  
SVGA3D_FORMAT_INVALID,   0 },
 };
 
 
diff --git a/src/gallium/include/pipe/p_format.h 
b/src/gallium/include/pipe/p_format.h
index 57399800fa4..df698856b70 100644
--- a/src/gallium/include/pipe/p_format.h
+++ b/src/gallium/include/pipe/p_format.h
@@ -396,6 +396,15 @@ enum pipe_format {
PIPE_FORMAT_X1B5G5R5_UNORM  = 310,
PIPE_FORMAT_A4B4G4R4_UNORM  = 311,
 
+   PIPE_FORMAT_R64_UINT= 312,
+   PIPE_FORMAT_R64G64_UINT = 313,
+   PIPE_FORMAT_R64G64B64_UINT  = 314,
+   PIPE_FORMAT_R64G64B64A64_UINT   = 315,
+   PIPE_FORMAT_R64_SINT= 316,
+   PIPE_FORMAT_R64G64_SINT = 317,
+   PIPE_FORMAT_R64G64B64_SINT  = 318,
+   PIPE_FORMAT_R64G64B64A64_SINT   = 319,
+
PIPE_FORMAT_COUNT
 };
 
diff --git a/src/mesa/main/glformats.c b/src/mesa/main/glformats.c
index 1e797c24c2a..feafd97f5ee 100644
--- a/src/mesa/main/glformats.c
+++ b/src/mesa/main/glformats.c
@@ -543,6 +543,9 @@ _mesa_bytes_per_vertex_attrib(GLint comps, GLenum type)
case GL_INT:
case GL_UNSIGNED_INT:
   return comps * sizeof(GLint);
+   /* ARB_bindless_texture */
+   case GL_UNSIGNED_INT64_ARB:
+  return comps * sizeof(GLuint64EXT);
case GL_FLOAT:
   return comps * sizeof(GLfloat);
case GL_HALF_FLOAT_ARB:
diff --git a/src/mesa/state_tracker/st_atom_array.c 
b/src/mesa/state_tracker/st_atom_array.c
index 2fd67e8d840..1c3f677d4bf 100644
--- a/src/mesa/state_tracker/st_atom_array.c
+++ b/src/mesa/state_tracker/st_atom_array.c
@@ -230,6 +230,27 @@ static const uint16_t vertex_formats[][4][4] = {
  PIPE_FORMAT_R32G32B32A32_FIXED
   },
},
+   {{0}}, /* gap */
+   { /* GL_INT64_ARB */
+  {0},
+  {0},
+  {
+ PIPE_FORMAT_R64_SINT,
+ PIPE_FORMAT_R64G64_SINT,
+ PIPE_FORMAT_R64G64B64_SINT,
+ PIPE_FORMAT_R64G64B64A64_SINT
+  },
+   },
+   { /* GL_UNSIGNED_INT64_ARB */
+  {0},
+  {0},
+  {
+ PIPE_FORMAT_R64_UINT,
+ PIPE_FORMAT_R64G64_UINT,
+ PIPE_FORMAT_R64G64B64_UINT,
+ PIPE_FORMAT_R64G64B64A64_UINT
+  },
+   },
 };
 
 
@@ -244,7 +265,7 @@ st_pipe_vertex_format(const struct gl_array_attributes 
*attrib)
const bool normalized = attrib->Normalized;
const bool integer = attrib->Integer;
GLenum16 type = attrib->Type;
-   unsigned index;
+   unsigned index = integer*2 + normalized;
 
assert(size >= 1 && size <= 4);
assert(format == GL_RGBA || format == GL_BGRA);
@@ -298,11 +319,14 @@ st_pipe_vertex_format(const struct gl_array_attributes 
*attrib)
  return PIPE_FORMAT_B8G8R8A8_UNORM;
   }
   break;
+   case GL_UNSIGNED_INT64_ARB:
+   case GL_INT64_ARB:
+

Re: [Mesa-dev] [PATCH] RFC gallium: add 64 bit integer formats

2018-04-09 Thread Karol Herbst
On Tue, Apr 10, 2018 at 2:43 AM, Ilia Mirkin  wrote:
> On Mon, Apr 9, 2018 at 8:39 PM, Karol Herbst  wrote:
>> unsigneds are needed by ARB_bindless_texture 64 bit vertex attribs, both for
>> NV_vertex_attrib_integer64.
>>
>> Fixes the new piglit sampler-vertex-attrib-input-output test I sent some days
>> ago for bindless_texture.
>>
>> The change inside vbo_attrtype_to_double_flag is what I am most concerned
>> about. Maybe I should add another flag for 64 bit ints. Or rework what 
>> Doubles
>> mean in gl_array_attributes. Or Rename that to is64Bit and rework all users 
>> of
>> Doubles.
>>
>> Any suggestions?
>>
>> Signed-off-by: Karol Herbst 
>> ---
>>  src/gallium/drivers/svga/svga_format.c |  8 
>>  src/gallium/include/pipe/p_format.h|  9 +
>>  src/mesa/main/glformats.c  |  3 +++
>>  src/mesa/state_tracker/st_atom_array.c | 30 +++---
>>  src/mesa/vbo/vbo_private.h |  2 +-
>>  5 files changed, 48 insertions(+), 4 deletions(-)
>>
>> diff --git a/src/gallium/drivers/svga/svga_format.c 
>> b/src/gallium/drivers/svga/svga_format.c
>> index 20a6e6b159f..f01a0e79c72 100644
>> --- a/src/gallium/drivers/svga/svga_format.c
>> +++ b/src/gallium/drivers/svga/svga_format.c
>> @@ -369,6 +369,14 @@ static const struct vgpu10_format_entry 
>> format_conversion_table[] =
>> { PIPE_FORMAT_A1B5G5R5_UNORM,SVGA3D_FORMAT_INVALID,  
>> SVGA3D_FORMAT_INVALID,   0 },
>> { PIPE_FORMAT_X1B5G5R5_UNORM,SVGA3D_FORMAT_INVALID,  
>> SVGA3D_FORMAT_INVALID,   0 },
>> { PIPE_FORMAT_A4B4G4R4_UNORM,SVGA3D_FORMAT_INVALID,  
>> SVGA3D_FORMAT_INVALID,   0 },
>> +   { PIPE_FORMAT_R64_UINT,  SVGA3D_FORMAT_INVALID,  
>> SVGA3D_FORMAT_INVALID,   0 },
>> +   { PIPE_FORMAT_R64G64_UINT,   SVGA3D_FORMAT_INVALID,  
>> SVGA3D_FORMAT_INVALID,   0 },
>> +   { PIPE_FORMAT_R64G64B64_UINT,SVGA3D_FORMAT_INVALID,  
>> SVGA3D_FORMAT_INVALID,   0 },
>> +   { PIPE_FORMAT_R64G64B64A64_UINT, SVGA3D_FORMAT_INVALID,  
>> SVGA3D_FORMAT_INVALID,   0 },
>> +   { PIPE_FORMAT_R64_SINT,  SVGA3D_FORMAT_INVALID,  
>> SVGA3D_FORMAT_INVALID,   0 },
>> +   { PIPE_FORMAT_R64G64_SINT,   SVGA3D_FORMAT_INVALID,  
>> SVGA3D_FORMAT_INVALID,   0 },
>> +   { PIPE_FORMAT_R64G64B64_SINT,SVGA3D_FORMAT_INVALID,  
>> SVGA3D_FORMAT_INVALID,   0 },
>> +   { PIPE_FORMAT_R64G64B64A64_SINT, SVGA3D_FORMAT_INVALID,  
>> SVGA3D_FORMAT_INVALID,   0 },
>>  };
>>
>>
>> diff --git a/src/gallium/include/pipe/p_format.h 
>> b/src/gallium/include/pipe/p_format.h
>> index 57399800fa4..df698856b70 100644
>> --- a/src/gallium/include/pipe/p_format.h
>> +++ b/src/gallium/include/pipe/p_format.h
>> @@ -396,6 +396,15 @@ enum pipe_format {
>> PIPE_FORMAT_X1B5G5R5_UNORM  = 310,
>> PIPE_FORMAT_A4B4G4R4_UNORM  = 311,
>>
>> +   PIPE_FORMAT_R64_UINT= 312,
>> +   PIPE_FORMAT_R64G64_UINT = 313,
>> +   PIPE_FORMAT_R64G64B64_UINT  = 314,
>> +   PIPE_FORMAT_R64G64B64A64_UINT   = 315,
>> +   PIPE_FORMAT_R64_SINT= 316,
>> +   PIPE_FORMAT_R64G64_SINT = 317,
>> +   PIPE_FORMAT_R64G64B64_SINT  = 318,
>> +   PIPE_FORMAT_R64G64B64A64_SINT   = 319,
>> +
>> PIPE_FORMAT_COUNT
>>  };
>>
>> diff --git a/src/mesa/main/glformats.c b/src/mesa/main/glformats.c
>> index 1e797c24c2a..feafd97f5ee 100644
>> --- a/src/mesa/main/glformats.c
>> +++ b/src/mesa/main/glformats.c
>> @@ -543,6 +543,9 @@ _mesa_bytes_per_vertex_attrib(GLint comps, GLenum type)
>> case GL_INT:
>> case GL_UNSIGNED_INT:
>>return comps * sizeof(GLint);
>> +   /* ARB_bindless_texture */
>> +   case GL_UNSIGNED_INT64_ARB:
>> +  return comps * sizeof(GLuint64EXT);
>> case GL_FLOAT:
>>return comps * sizeof(GLfloat);
>> case GL_HALF_FLOAT_ARB:
>> diff --git a/src/mesa/state_tracker/st_atom_array.c 
>> b/src/mesa/state_tracker/st_atom_array.c
>> index 2fd67e8d840..1c3f677d4bf 100644
>> --- a/src/mesa/state_tracker/st_atom_array.c
>> +++ b/src/mesa/state_tracker/st_atom_array.c
>> @@ -230,6 +230,27 @@ static const uint16_t vertex_formats[][4][4] = {
>>   PIPE_FORMAT_R32G32B32A32_FIXED
>>},
>> },
>> +   {{0}}, /* gap */
>> +   { /* GL_INT64_ARB */
&g

Re: [Mesa-dev] [PATCH v2] nv50/ir: make a copy of tex src if it's referenced multiple times

2018-04-10 Thread Karol Herbst
I guess this fixes a bug somewhere?

On Tue, Apr 10, 2018 at 6:11 AM, Ilia Mirkin  wrote:
> For nv50 we coalesce the srcs and defs into a single node. As such, we
> can end up with impossible constraints if the source is referenced
> after the tex operation (which, due to the coalescing of values, will
> have overwritten it).
>
> This logic already exists for inserting moves for MERGE/UNION sources.
> It's the exact same idea here, so leverage that code, which also
> includes a few optimizations around not extending live ranges
> unnecessarily.
>
> Signed-off-by: Ilia Mirkin 
> ---
>
> v1 -> v2: make use of existing logic in insertConstraintMoves
>
>  src/gallium/drivers/nouveau/codegen/nv50_ir_ra.cpp | 86 
> --
>  1 file changed, 49 insertions(+), 37 deletions(-)
>
> diff --git a/src/gallium/drivers/nouveau/codegen/nv50_ir_ra.cpp 
> b/src/gallium/drivers/nouveau/codegen/nv50_ir_ra.cpp
> index 3a0e56e1385..7d107aca68d 100644
> --- a/src/gallium/drivers/nouveau/codegen/nv50_ir_ra.cpp
> +++ b/src/gallium/drivers/nouveau/codegen/nv50_ir_ra.cpp
> @@ -257,6 +257,7 @@ private:
> private:
>virtual bool visit(BasicBlock *);
>
> +  void insertConstraintMove(Instruction *, int s);
>bool insertConstraintMoves();
>
>void condenseDefs(Instruction *);
> @@ -2216,6 +2217,8 @@ 
> RegAlloc::InsertConstraintsPass::texConstraintNV50(TexInstruction *tex)
> for (c = 0; tex->srcExists(c) || tex->defExists(c); ++c) {
>if (!tex->srcExists(c))
>   tex->setSrc(c, new_LValue(func, tex->getSrc(0)->asLValue()));
> +  else
> + insertConstraintMove(tex, c);
>if (!tex->defExists(c))
>   tex->setDef(c, new_LValue(func, tex->getDef(0)->asLValue()));
> }
> @@ -2288,6 +2291,51 @@ RegAlloc::InsertConstraintsPass::visit(BasicBlock *bb)
> return true;
>  }
>
> +void
> +RegAlloc::InsertConstraintsPass::insertConstraintMove(Instruction *cst, int 
> s)
> +{
> +   const uint8_t size = cst->src(s).getSize();
> +
> +   assert(cst->getSrc(s)->defs.size() == 1); // still SSA
> +
> +   Instruction *defi = cst->getSrc(s)->defs.front()->getInsn();
> +   bool imm = defi->op == OP_MOV &&
> +  defi->src(0).getFile() == FILE_IMMEDIATE;
> +   bool load = defi->op == OP_LOAD &&
> +  defi->src(0).getFile() == FILE_MEMORY_CONST &&
> +  !defi->src(0).isIndirect(0);
> +   // catch some cases where don't really need MOVs
> +   if (cst->getSrc(s)->refCount() == 1 && !defi->constrainedDefs()) {
> +  if (imm || load) {
> + // Move the defi right before the cst. No point in expanding
> + // the range.
> + defi->bb->remove(defi);
> + cst->bb->insertBefore(cst, defi);
> +  }
> +  return;
> +   }
> +
> +   LValue *lval = new_LValue(func, cst->src(s).getFile());
> +   lval->reg.size = size;
> +
> +   Instruction *mov = new_Instruction(func, OP_MOV, typeOfSize(size));
> +   mov->setDef(0, lval);
> +   mov->setSrc(0, cst->getSrc(s));
> +
> +   if (load) {
> +  mov->op = OP_LOAD;
> +  mov->setSrc(0, defi->getSrc(0));
> +   } else if (imm) {
> +  mov->setSrc(0, defi->getSrc(0));
> +   }
> +
> +   if (defi->getPredicate())
> +  mov->setPredicate(defi->cc, defi->getPredicate());
> +
> +   cst->setSrc(s, mov->getDef(0));
> +   cst->bb->insertBefore(cst, mov);
> +}
> +
>  // Insert extra moves so that, if multiple register constraints on a value 
> are
>  // in conflict, these conflicts can be resolved.
>  bool
> @@ -2328,46 +2376,10 @@ 
> RegAlloc::InsertConstraintsPass::insertConstraintMoves()
> cst->bb->insertBefore(cst, mov);
> continue;
>  }
> -assert(cst->getSrc(s)->defs.size() == 1); // still SSA
> -
> -Instruction *defi = cst->getSrc(s)->defs.front()->getInsn();
> -bool imm = defi->op == OP_MOV &&
> -   defi->src(0).getFile() == FILE_IMMEDIATE;
> -bool load = defi->op == OP_LOAD &&
> -   defi->src(0).getFile() == FILE_MEMORY_CONST &&
> -   !defi->src(0).isIndirect(0);
> -// catch some cases where don't really need MOVs
> -if (cst->getSrc(s)->refCount() == 1 && !defi->constrainedDefs()) 
> {
> -   if (imm || load) {
> -  // Move the defi right before the cst. No point in 
> expanding
> -  // the range.
> -  defi->bb->remove(defi);
> -  cst->bb->insertBefore(cst, defi);
> -   }
> -   continue;
> -}
>
> -LValue *lval = new_LValue(func, cst->src(s).getFile());
> -lval->reg.size = size;
> -
> -mov = new_Instruction(func, OP_MOV, typeOfSize(size));
> -mov->setDef(0, lval);
> -mov->setSrc(0, cst->getSrc(s));
> -
> -if (load) {
> -   mov->op = OP_LOAD;
> -   mov->setSrc(0, defi->getSrc(0));
> -} else if (imm) {
> -   mov->setSrc

[Mesa-dev] [PATCH] glsl: properly handle bindless sampler and image parameters

2018-04-10 Thread Karol Herbst
fixes a piglit test I sent to the list:
spec@arb_bindless_texture@execution@samplers@basic-arithmetic-func-call-uvec2-texture2D

Signed-off-by: Karol Herbst 
---
 src/compiler/glsl/opt_function_inlining.cpp | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/src/compiler/glsl/opt_function_inlining.cpp 
b/src/compiler/glsl/opt_function_inlining.cpp
index 04690b6cf45..3d00074bbc3 100644
--- a/src/compiler/glsl/opt_function_inlining.cpp
+++ b/src/compiler/glsl/opt_function_inlining.cpp
@@ -155,7 +155,7 @@ ir_call::generate_inline(ir_instruction *next_ir)
   ir_rvalue *param = (ir_rvalue *) actual_node;
 
   /* Generate a new variable for the parameter. */
-  if (sig_param->type->contains_opaque()) {
+  if (!sig_param->contains_bindless() && 
sig_param->type->contains_opaque()) {
 /* For opaque types, we want the inlined variable references
  * referencing the passed in variable, since that will have
  * the location information, which an assignment of an opaque
-- 
2.14.3

___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev


[Mesa-dev] [PATCH v2 0/3] nir: add support for ARB_bindless_texture texture handles

2018-04-10 Thread Karol Herbst
With this it should be possible to add support for texture handles for backends
using NIR.

changes since v2:
* dropped patch for image handles, still need to work on that

Karol Herbst (3):
  nir/split_var_copies: handle IMAGE and SAMPLER for bindless vars
  nir: add support for bindless_texture samplers
  glsl/nir: fix variable type for image intrinsics and ubos

 src/compiler/glsl/glsl_to_nir.cpp   | 19 ---
 src/compiler/nir/nir.h  |  2 ++
 src/compiler/nir/nir_print.c|  6 ++
 src/compiler/nir/nir_split_var_copies.c |  8 +++-
 4 files changed, 31 insertions(+), 4 deletions(-)

-- 
2.14.3

___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev


[Mesa-dev] [PATCH v2 1/3] nir/split_var_copies: handle IMAGE and SAMPLER for bindless vars

2018-04-10 Thread Karol Herbst
v2: fix assertion for bindless to non bindless assignments

Signed-off-by: Karol Herbst 
---
 src/compiler/nir/nir_split_var_copies.c | 8 +++-
 1 file changed, 7 insertions(+), 1 deletion(-)

diff --git a/src/compiler/nir/nir_split_var_copies.c 
b/src/compiler/nir/nir_split_var_copies.c
index bc3ceedbdb8..e592754d770 100644
--- a/src/compiler/nir/nir_split_var_copies.c
+++ b/src/compiler/nir/nir_split_var_copies.c
@@ -222,8 +222,9 @@ split_var_copies_block(nir_block *block, struct 
split_var_copies_state *state)
   nir_deref_var *src_head = intrinsic->variables[1];
   nir_deref *dest_tail = nir_deref_tail(&dest_head->deref);
   nir_deref *src_tail = nir_deref_tail(&src_head->deref);
+  enum glsl_base_type base_type = glsl_get_base_type(src_tail->type);
 
-  switch (glsl_get_base_type(src_tail->type)) {
+  switch (base_type) {
   case GLSL_TYPE_ARRAY:
   case GLSL_TYPE_STRUCT:
  split_var_copy_instr(intrinsic, dest_head, src_head,
@@ -241,6 +242,11 @@ split_var_copies_block(nir_block *block, struct 
split_var_copies_state *state)
 ralloc_steal(state->dead_ctx, instr);
  }
  break;
+  /* for bindless those are uint64 */
+  case GLSL_TYPE_IMAGE:
+  case GLSL_TYPE_SAMPLER:
+ assert(src_head->var->data.bindless ||
+glsl_get_base_type(src_head->var->type) == base_type);
   case GLSL_TYPE_INT:
   case GLSL_TYPE_UINT:
   case GLSL_TYPE_INT16:
-- 
2.14.3

___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev


[Mesa-dev] [PATCH v2 3/3] glsl/nir: fix variable type for image intrinsics and ubos

2018-04-10 Thread Karol Herbst
If the bindless image is passed through a struct we ended up getting the
glsl_type of the struct, not the image.

variable_referenced points to the declaration of the struct, so it won't work
for bindless images. So just drop it.

Signed-off-by: Karol Herbst 
---
 src/compiler/glsl/glsl_to_nir.cpp | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/src/compiler/glsl/glsl_to_nir.cpp 
b/src/compiler/glsl/glsl_to_nir.cpp
index 9f233637306..bb9ba3af04a 100644
--- a/src/compiler/glsl/glsl_to_nir.cpp
+++ b/src/compiler/glsl/glsl_to_nir.cpp
@@ -893,7 +893,7 @@ nir_visitor::visit(ir_call *ir)
  exec_node *param = ir->actual_parameters.get_head();
  ir_dereference *image = (ir_dereference *)param;
  const glsl_type *type =
-image->variable_referenced()->type->without_array();
+image->type->without_array();
 
  instr->variables[0] = evaluate_deref(&instr->instr, image);
  param = param->get_next();
-- 
2.14.3

___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev


[Mesa-dev] [PATCH v2 2/3] nir: add support for bindless_texture samplers

2018-04-10 Thread Karol Herbst
v2: add both texture and sampler handles

Signed-off-by: Karol Herbst 
---
 src/compiler/glsl/glsl_to_nir.cpp | 17 +++--
 src/compiler/nir/nir.h|  2 ++
 src/compiler/nir/nir_print.c  |  6 ++
 3 files changed, 23 insertions(+), 2 deletions(-)

diff --git a/src/compiler/glsl/glsl_to_nir.cpp 
b/src/compiler/glsl/glsl_to_nir.cpp
index dbb58d82e8f..9f233637306 100644
--- a/src/compiler/glsl/glsl_to_nir.cpp
+++ b/src/compiler/glsl/glsl_to_nir.cpp
@@ -1971,6 +1971,8 @@ nir_visitor::visit(ir_texture *ir)
 {
unsigned num_srcs;
nir_texop op;
+   bool bindless = ir->sampler->variable_referenced()->contains_bindless();
+
switch (ir->op) {
case ir_tex:
   op = nir_texop_tex;
@@ -2044,6 +2046,8 @@ nir_visitor::visit(ir_texture *ir)
   num_srcs++;
if (ir->offset != NULL)
   num_srcs++;
+   if (bindless)
+  num_srcs++;
 
nir_tex_instr *instr = nir_tex_instr_create(this->shader, num_srcs);
 
@@ -2069,10 +2073,19 @@ nir_visitor::visit(ir_texture *ir)
   unreachable("not reached");
}
 
-   instr->texture = evaluate_deref(&instr->instr, ir->sampler);
-
unsigned src_number = 0;
 
+   /* for bindless we use the texture handle src */
+   if (bindless) {
+  instr->texture = NULL;
+  instr->src[src_number].src =
+ nir_src_for_ssa(evaluate_rvalue(ir->sampler));
+  instr->src[src_number].src_type = nir_tex_src_texture_handle;
+  src_number++;
+   } else {
+  instr->texture = evaluate_deref(&instr->instr, ir->sampler);
+   }
+
if (ir->coordinate != NULL) {
   instr->coord_components = ir->coordinate->type->vector_elements;
   instr->src[src_number].src =
diff --git a/src/compiler/nir/nir.h b/src/compiler/nir/nir.h
index f33049d7134..e395352f89c 100644
--- a/src/compiler/nir/nir.h
+++ b/src/compiler/nir/nir.h
@@ -1218,6 +1218,8 @@ typedef enum {
nir_tex_src_texture_offset, /* < dynamically uniform indirect offset */
nir_tex_src_sampler_offset, /* < dynamically uniform indirect offset */
nir_tex_src_plane,  /* < selects plane for planar textures */
+   nir_tex_src_texture_handle, /* < handle for bindless texture */
+   nir_tex_src_sampler_handle, /* < handle for bindless sampler */
nir_num_tex_src_types
 } nir_tex_src_type;
 
diff --git a/src/compiler/nir/nir_print.c b/src/compiler/nir/nir_print.c
index 21f13097651..52f20b1eb10 100644
--- a/src/compiler/nir/nir_print.c
+++ b/src/compiler/nir/nir_print.c
@@ -778,6 +778,12 @@ print_tex_instr(nir_tex_instr *instr, print_state *state)
   case nir_tex_src_plane:
  fprintf(fp, "(plane)");
  break;
+  case nir_tex_src_texture_handle:
+ fprintf(fp, "(texture_handle)");
+ break;
+  case nir_tex_src_sampler_handle:
+ fprintf(fp, "(sampler_handle)");
+ break;
 
   default:
  unreachable("Invalid texture source type");
-- 
2.14.3

___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev


Re: [Mesa-dev] [PATCH v2 1/3] nir/split_var_copies: handle IMAGE and SAMPLER for bindless vars

2018-04-10 Thread Karol Herbst
On Tue, Apr 10, 2018 at 5:12 PM, Jason Ekstrand  wrote:
> I still don't see anything to make nir_validate not fail out on you if it
> sees a read or a write to/from an IMAGE or SAMPLER.
>

what kind of glsl code are you talking about here? I wrote some tests
and things just seem to work out. I wasn't able to hit any other
issues.

> On Tue, Apr 10, 2018 at 8:05 AM, Karol Herbst  wrote:
>>
>> v2: fix assertion for bindless to non bindless assignments
>>
>> Signed-off-by: Karol Herbst 
>> ---
>>  src/compiler/nir/nir_split_var_copies.c | 8 +++-
>>  1 file changed, 7 insertions(+), 1 deletion(-)
>>
>> diff --git a/src/compiler/nir/nir_split_var_copies.c
>> b/src/compiler/nir/nir_split_var_copies.c
>> index bc3ceedbdb8..e592754d770 100644
>> --- a/src/compiler/nir/nir_split_var_copies.c
>> +++ b/src/compiler/nir/nir_split_var_copies.c
>> @@ -222,8 +222,9 @@ split_var_copies_block(nir_block *block, struct
>> split_var_copies_state *state)
>>nir_deref_var *src_head = intrinsic->variables[1];
>>nir_deref *dest_tail = nir_deref_tail(&dest_head->deref);
>>nir_deref *src_tail = nir_deref_tail(&src_head->deref);
>> +  enum glsl_base_type base_type = glsl_get_base_type(src_tail->type);
>>
>> -  switch (glsl_get_base_type(src_tail->type)) {
>> +  switch (base_type) {
>>case GLSL_TYPE_ARRAY:
>>case GLSL_TYPE_STRUCT:
>>   split_var_copy_instr(intrinsic, dest_head, src_head,
>> @@ -241,6 +242,11 @@ split_var_copies_block(nir_block *block, struct
>> split_var_copies_state *state)
>>  ralloc_steal(state->dead_ctx, instr);
>>   }
>>   break;
>> +  /* for bindless those are uint64 */
>> +  case GLSL_TYPE_IMAGE:
>> +  case GLSL_TYPE_SAMPLER:
>> + assert(src_head->var->data.bindless ||
>> +glsl_get_base_type(src_head->var->type) == base_type);
>>case GLSL_TYPE_INT:
>>case GLSL_TYPE_UINT:
>>case GLSL_TYPE_INT16:
>> --
>> 2.14.3
>>
>
>
> ___
> mesa-dev mailing list
> mesa-dev@lists.freedesktop.org
> https://lists.freedesktop.org/mailman/listinfo/mesa-dev
>
___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev


Re: [Mesa-dev] [PATCH v2 3/3] glsl/nir: fix variable type for image intrinsics and ubos

2018-04-10 Thread Karol Herbst
On Tue, Apr 10, 2018 at 5:11 PM, Jason Ekstrand  wrote:
> On Tue, Apr 10, 2018 at 8:05 AM, Karol Herbst  wrote:
>>
>> If the bindless image is passed through a struct we ended up getting the
>> glsl_type of the struct, not the image.
>>
>> variable_referenced points to the declaration of the struct, so it won't
>> work
>> for bindless images. So just drop it.
>>
>> Signed-off-by: Karol Herbst 
>> ---
>>  src/compiler/glsl/glsl_to_nir.cpp | 2 +-
>>  1 file changed, 1 insertion(+), 1 deletion(-)
>>
>> diff --git a/src/compiler/glsl/glsl_to_nir.cpp
>> b/src/compiler/glsl/glsl_to_nir.cpp
>> index 9f233637306..bb9ba3af04a 100644
>> --- a/src/compiler/glsl/glsl_to_nir.cpp
>> +++ b/src/compiler/glsl/glsl_to_nir.cpp
>> @@ -893,7 +893,7 @@ nir_visitor::visit(ir_call *ir)
>>   exec_node *param = ir->actual_parameters.get_head();
>>   ir_dereference *image = (ir_dereference *)param;
>>   const glsl_type *type =
>> -image->variable_referenced()->type->without_array();
>> +image->type->without_array();
>
>
> I asked this question on the last version as well: Do we really need
> without_array()?
>

I don't think so actually, because it should be the sampler type
already. I just forgot about that.

>>
>>   instr->variables[0] = evaluate_deref(&instr->instr, image);
>>   param = param->get_next();
>> --
>> 2.14.3
>>
>
>
> ___
> mesa-dev mailing list
> mesa-dev@lists.freedesktop.org
> https://lists.freedesktop.org/mailman/listinfo/mesa-dev
>
___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev


Re: [Mesa-dev] [PATCH v2 1/3] nir/split_var_copies: handle IMAGE and SAMPLER for bindless vars

2018-04-10 Thread Karol Herbst
On Tue, Apr 10, 2018 at 6:01 PM, Jason Ekstrand  wrote:
> On Tue, Apr 10, 2018 at 8:35 AM, Karol Herbst  wrote:
>>
>> On Tue, Apr 10, 2018 at 5:12 PM, Jason Ekstrand 
>> wrote:
>> > I still don't see anything to make nir_validate not fail out on you if
>> > it
>> > sees a read or a write to/from an IMAGE or SAMPLER.
>> >
>>
>> what kind of glsl code are you talking about here? I wrote some tests
>> and things just seem to work out. I wasn't able to hit any other
>> issues.
>
>
> Were they tests where GLSL was able to copy propagate such that NIR never
> saw a write to the image/sampler variable?
>

Well the trivial one is where you directly consume the uniform.

>>
>> > On Tue, Apr 10, 2018 at 8:05 AM, Karol Herbst 
>> > wrote:
>> >>
>> >> v2: fix assertion for bindless to non bindless assignments
>> >>
>> >> Signed-off-by: Karol Herbst 
>> >> ---
>> >>  src/compiler/nir/nir_split_var_copies.c | 8 +++-
>> >>  1 file changed, 7 insertions(+), 1 deletion(-)
>> >>
>> >> diff --git a/src/compiler/nir/nir_split_var_copies.c
>> >> b/src/compiler/nir/nir_split_var_copies.c
>> >> index bc3ceedbdb8..e592754d770 100644
>> >> --- a/src/compiler/nir/nir_split_var_copies.c
>> >> +++ b/src/compiler/nir/nir_split_var_copies.c
>> >> @@ -222,8 +222,9 @@ split_var_copies_block(nir_block *block, struct
>> >> split_var_copies_state *state)
>> >>nir_deref_var *src_head = intrinsic->variables[1];
>> >>nir_deref *dest_tail = nir_deref_tail(&dest_head->deref);
>> >>nir_deref *src_tail = nir_deref_tail(&src_head->deref);
>> >> +  enum glsl_base_type base_type =
>> >> glsl_get_base_type(src_tail->type);
>> >>
>> >> -  switch (glsl_get_base_type(src_tail->type)) {
>> >> +  switch (base_type) {
>> >>case GLSL_TYPE_ARRAY:
>> >>case GLSL_TYPE_STRUCT:
>> >>   split_var_copy_instr(intrinsic, dest_head, src_head,
>> >> @@ -241,6 +242,11 @@ split_var_copies_block(nir_block *block, struct
>> >> split_var_copies_state *state)
>> >>  ralloc_steal(state->dead_ctx, instr);
>> >>   }
>> >>   break;
>> >> +  /* for bindless those are uint64 */
>> >> +  case GLSL_TYPE_IMAGE:
>> >> +  case GLSL_TYPE_SAMPLER:
>> >> + assert(src_head->var->data.bindless ||
>> >> +glsl_get_base_type(src_head->var->type) == base_type);
>> >>case GLSL_TYPE_INT:
>> >>case GLSL_TYPE_UINT:
>> >>case GLSL_TYPE_INT16:
>> >> --
>> >> 2.14.3
>> >>
>> >
>> >
>> > ___
>> > mesa-dev mailing list
>> > mesa-dev@lists.freedesktop.org
>> > https://lists.freedesktop.org/mailman/listinfo/mesa-dev
>> >
>
>
___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev


Re: [Mesa-dev] [PATCH v2 2/3] nir: add support for bindless_texture samplers

2018-04-12 Thread Karol Herbst
On Tue, Apr 10, 2018 at 5:10 PM, Jason Ekstrand  wrote:
> On Tue, Apr 10, 2018 at 8:05 AM, Karol Herbst  wrote:
>>
>> v2: add both texture and sampler handles
>>
>> Signed-off-by: Karol Herbst 
>> ---
>>  src/compiler/glsl/glsl_to_nir.cpp | 17 +++--
>>  src/compiler/nir/nir.h|  2 ++
>>  src/compiler/nir/nir_print.c  |  6 ++
>>  3 files changed, 23 insertions(+), 2 deletions(-)
>>
>> diff --git a/src/compiler/glsl/glsl_to_nir.cpp
>> b/src/compiler/glsl/glsl_to_nir.cpp
>> index dbb58d82e8f..9f233637306 100644
>> --- a/src/compiler/glsl/glsl_to_nir.cpp
>> +++ b/src/compiler/glsl/glsl_to_nir.cpp
>> @@ -1971,6 +1971,8 @@ nir_visitor::visit(ir_texture *ir)
>>  {
>> unsigned num_srcs;
>> nir_texop op;
>> +   bool bindless =
>> ir->sampler->variable_referenced()->contains_bindless();
>
>
> What happens if I have a uniform struct containing both a regular sampler
> and a bindless sampler?  I think this should be possible.
>

well currently mesa just fails to compile, but even if it would I
don't see a way how we know with a ir_dereference if we reference a
bindless or bound sampler.

The glsl_type doesn't tell us either and maybe it makes sense to add a
is_bindless method to glsl_type so that we can use it in places like
here? ir->sampler->type gives me the sampler type, but lacks the
information if it is bindless or not. Any thoughts?

>>
>> +
>> switch (ir->op) {
>> case ir_tex:
>>op = nir_texop_tex;
>> @@ -2044,6 +2046,8 @@ nir_visitor::visit(ir_texture *ir)
>>num_srcs++;
>> if (ir->offset != NULL)
>>num_srcs++;
>> +   if (bindless)
>> +  num_srcs++;
>>
>> nir_tex_instr *instr = nir_tex_instr_create(this->shader, num_srcs);
>>
>> @@ -2069,10 +2073,19 @@ nir_visitor::visit(ir_texture *ir)
>>unreachable("not reached");
>> }
>>
>> -   instr->texture = evaluate_deref(&instr->instr, ir->sampler);
>> -
>> unsigned src_number = 0;
>>
>> +   /* for bindless we use the texture handle src */
>> +   if (bindless) {
>> +  instr->texture = NULL;
>> +  instr->src[src_number].src =
>> + nir_src_for_ssa(evaluate_rvalue(ir->sampler));
>> +  instr->src[src_number].src_type = nir_tex_src_texture_handle;
>> +  src_number++;
>> +   } else {
>> +  instr->texture = evaluate_deref(&instr->instr, ir->sampler);
>> +   }
>> +
>> if (ir->coordinate != NULL) {
>>instr->coord_components = ir->coordinate->type->vector_elements;
>>instr->src[src_number].src =
>> diff --git a/src/compiler/nir/nir.h b/src/compiler/nir/nir.h
>> index f33049d7134..e395352f89c 100644
>> --- a/src/compiler/nir/nir.h
>> +++ b/src/compiler/nir/nir.h
>> @@ -1218,6 +1218,8 @@ typedef enum {
>> nir_tex_src_texture_offset, /* < dynamically uniform indirect offset
>> */
>> nir_tex_src_sampler_offset, /* < dynamically uniform indirect offset
>> */
>> nir_tex_src_plane,  /* < selects plane for planar textures */
>> +   nir_tex_src_texture_handle, /* < handle for bindless texture */
>> +   nir_tex_src_sampler_handle, /* < handle for bindless sampler */
>> nir_num_tex_src_types
>>  } nir_tex_src_type;
>>
>> diff --git a/src/compiler/nir/nir_print.c b/src/compiler/nir/nir_print.c
>> index 21f13097651..52f20b1eb10 100644
>> --- a/src/compiler/nir/nir_print.c
>> +++ b/src/compiler/nir/nir_print.c
>> @@ -778,6 +778,12 @@ print_tex_instr(nir_tex_instr *instr, print_state
>> *state)
>>case nir_tex_src_plane:
>>   fprintf(fp, "(plane)");
>>   break;
>> +  case nir_tex_src_texture_handle:
>> + fprintf(fp, "(texture_handle)");
>> + break;
>> +  case nir_tex_src_sampler_handle:
>> + fprintf(fp, "(sampler_handle)");
>> + break;
>>
>>default:
>>   unreachable("Invalid texture source type");
>> --
>> 2.14.3
>>
>
___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev


Re: [Mesa-dev] [PATCH v2 2/3] nir: add support for bindless_texture samplers

2018-04-12 Thread Karol Herbst
On Thu, Apr 12, 2018 at 6:33 PM, Jason Ekstrand  wrote:
> On Thu, Apr 12, 2018 at 7:36 AM, Karol Herbst  wrote:
>>
>> On Tue, Apr 10, 2018 at 5:10 PM, Jason Ekstrand 
>> wrote:
>> > On Tue, Apr 10, 2018 at 8:05 AM, Karol Herbst 
>> > wrote:
>> >>
>> >> v2: add both texture and sampler handles
>> >>
>> >> Signed-off-by: Karol Herbst 
>> >> ---
>> >>  src/compiler/glsl/glsl_to_nir.cpp | 17 +++--
>> >>  src/compiler/nir/nir.h|  2 ++
>> >>  src/compiler/nir/nir_print.c  |  6 ++
>> >>  3 files changed, 23 insertions(+), 2 deletions(-)
>> >>
>> >> diff --git a/src/compiler/glsl/glsl_to_nir.cpp
>> >> b/src/compiler/glsl/glsl_to_nir.cpp
>> >> index dbb58d82e8f..9f233637306 100644
>> >> --- a/src/compiler/glsl/glsl_to_nir.cpp
>> >> +++ b/src/compiler/glsl/glsl_to_nir.cpp
>> >> @@ -1971,6 +1971,8 @@ nir_visitor::visit(ir_texture *ir)
>> >>  {
>> >> unsigned num_srcs;
>> >> nir_texop op;
>> >> +   bool bindless =
>> >> ir->sampler->variable_referenced()->contains_bindless();
>> >
>> >
>> > What happens if I have a uniform struct containing both a regular
>> > sampler
>> > and a bindless sampler?  I think this should be possible.
>> >
>>
>> well currently mesa just fails to compile, but even if it would I
>> don't see a way how we know with a ir_dereference if we reference a
>> bindless or bound sampler.
>>
>> The glsl_type doesn't tell us either and maybe it makes sense to add a
>> is_bindless method to glsl_type so that we can use it in places like
>> here? ir->sampler->type gives me the sampler type, but lacks the
>> information if it is bindless or not. Any thoughts?
>
>
> That seems like it's probably reasonable.  I'm not sure if we really want
> different types.  Another option would be to handle it as a layout qualifier
> on the structure type fields.  I'm not sure which is better.
>

I think we should add a field and add a is_opaque method to fix
glsl_type::contains_opaque, which is also broken, but we could do that
with a new type as well :(

>>
>> >>
>> >> +
>> >> switch (ir->op) {
>> >> case ir_tex:
>> >>op = nir_texop_tex;
>> >> @@ -2044,6 +2046,8 @@ nir_visitor::visit(ir_texture *ir)
>> >>num_srcs++;
>> >> if (ir->offset != NULL)
>> >>num_srcs++;
>> >> +   if (bindless)
>> >> +  num_srcs++;
>> >>
>> >> nir_tex_instr *instr = nir_tex_instr_create(this->shader,
>> >> num_srcs);
>> >>
>> >> @@ -2069,10 +2073,19 @@ nir_visitor::visit(ir_texture *ir)
>> >>unreachable("not reached");
>> >> }
>> >>
>> >> -   instr->texture = evaluate_deref(&instr->instr, ir->sampler);
>> >> -
>> >> unsigned src_number = 0;
>> >>
>> >> +   /* for bindless we use the texture handle src */
>> >> +   if (bindless) {
>> >> +  instr->texture = NULL;
>> >> +  instr->src[src_number].src =
>> >> + nir_src_for_ssa(evaluate_rvalue(ir->sampler));
>> >> +  instr->src[src_number].src_type = nir_tex_src_texture_handle;
>> >> +  src_number++;
>> >> +   } else {
>> >> +  instr->texture = evaluate_deref(&instr->instr, ir->sampler);
>> >> +   }
>> >> +
>> >> if (ir->coordinate != NULL) {
>> >>instr->coord_components = ir->coordinate->type->vector_elements;
>> >>instr->src[src_number].src =
>> >> diff --git a/src/compiler/nir/nir.h b/src/compiler/nir/nir.h
>> >> index f33049d7134..e395352f89c 100644
>> >> --- a/src/compiler/nir/nir.h
>> >> +++ b/src/compiler/nir/nir.h
>> >> @@ -1218,6 +1218,8 @@ typedef enum {
>> >> nir_tex_src_texture_offset, /* < dynamically uniform indirect
>> >> offset
>> >> */
>> >> nir_tex_src_sampler_offset, /* < dynamically uniform indirect
>> >> offset
>> >> */
>> >> nir_tex_src_plane,  /* < selects plane for planar textures
>> >> */
>> >> +   nir_tex_src_texture_handle, /* < handle for bindless texture */
>> >> +   nir_tex_src_sampler_handle, /* < handle for bindless sampler */
>> >> nir_num_tex_src_types
>> >>  } nir_tex_src_type;
>> >>
>> >> diff --git a/src/compiler/nir/nir_print.c
>> >> b/src/compiler/nir/nir_print.c
>> >> index 21f13097651..52f20b1eb10 100644
>> >> --- a/src/compiler/nir/nir_print.c
>> >> +++ b/src/compiler/nir/nir_print.c
>> >> @@ -778,6 +778,12 @@ print_tex_instr(nir_tex_instr *instr, print_state
>> >> *state)
>> >>case nir_tex_src_plane:
>> >>   fprintf(fp, "(plane)");
>> >>   break;
>> >> +  case nir_tex_src_texture_handle:
>> >> + fprintf(fp, "(texture_handle)");
>> >> + break;
>> >> +  case nir_tex_src_sampler_handle:
>> >> + fprintf(fp, "(sampler_handle)");
>> >> + break;
>> >>
>> >>default:
>> >>   unreachable("Invalid texture source type");
>> >> --
>> >> 2.14.3
>> >>
>> >
>
>
___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev


[Mesa-dev] [PATCH v7 01/35] st/glsl_to_nir: run lower_output_reads on !PIPE_CAP_TGSI_CAN_READ_OUTPUTS

2018-04-16 Thread Karol Herbst
this is required for Drivers which don't allow reading from outputs.

Reviewed-by: Timothy Arceri 
Signed-off-by: Karol Herbst 
---
 src/mesa/state_tracker/st_glsl_to_nir.cpp | 6 ++
 1 file changed, 6 insertions(+)

diff --git a/src/mesa/state_tracker/st_glsl_to_nir.cpp 
b/src/mesa/state_tracker/st_glsl_to_nir.cpp
index bcf6a7ceb6a..6502aec370f 100644
--- a/src/mesa/state_tracker/st_glsl_to_nir.cpp
+++ b/src/mesa/state_tracker/st_glsl_to_nir.cpp
@@ -44,6 +44,7 @@
 #include "compiler/glsl_types.h"
 #include "compiler/glsl/glsl_to_nir.h"
 #include "compiler/glsl/ir.h"
+#include "compiler/glsl/ir_optimization.h"
 #include "compiler/glsl/string_to_uint_map.h"
 
 
@@ -553,6 +554,7 @@ st_nir_get_mesa_program(struct gl_context *ctx,
 struct gl_linked_shader *shader)
 {
struct st_context *st = st_context(ctx);
+   struct pipe_screen *pscreen = ctx->st->pipe->screen;
struct gl_program *prog;
 
validate_ir_tree(shader->ir);
@@ -565,6 +567,10 @@ st_nir_get_mesa_program(struct gl_context *ctx,
_mesa_generate_parameters_list_for_uniforms(ctx, shader_program, shader,
prog->Parameters);
 
+   /* Remove reads from output registers. */
+   if (!pscreen->get_param(pscreen, PIPE_CAP_TGSI_CAN_READ_OUTPUTS))
+  lower_output_reads(shader->Stage, shader->ir);
+
if (ctx->_Shader->Flags & GLSL_DUMP) {
   _mesa_log("\n");
   _mesa_log("GLSL IR for linked %s program %d:\n",
-- 
2.14.3

___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev


[Mesa-dev] [PATCH v7 07/35] nouveau: add env var to make nir default

2018-04-16 Thread Karol Herbst
v2: allow for non debug builds as well
v3: move reading out env var more global
disable tg4 with multiple offsets with nir
disable caps for 64 bit types
v6: nv50 support
disable MS images
disable bindless textures

Signed-off-by: Karol Herbst 
---
 src/gallium/drivers/nouveau/nouveau_screen.c   |  5 +
 src/gallium/drivers/nouveau/nouveau_screen.h   |  2 ++
 src/gallium/drivers/nouveau/nv50/nv50_screen.c |  4 +++-
 src/gallium/drivers/nouveau/nvc0/nvc0_screen.c | 19 +--
 4 files changed, 23 insertions(+), 7 deletions(-)

diff --git a/src/gallium/drivers/nouveau/nouveau_screen.c 
b/src/gallium/drivers/nouveau/nouveau_screen.c
index c144b39b2dd..2598c78a45b 100644
--- a/src/gallium/drivers/nouveau/nouveau_screen.c
+++ b/src/gallium/drivers/nouveau/nouveau_screen.c
@@ -176,9 +176,14 @@ nouveau_screen_init(struct nouveau_screen *screen, struct 
nouveau_device *dev)
union nouveau_bo_config mm_config;
 
char *nv_dbg = getenv("NOUVEAU_MESA_DEBUG");
+   char *use_nir = getenv("NV50_PROG_USE_NIR");
+
if (nv_dbg)
   nouveau_mesa_debug = atoi(nv_dbg);
 
+   if (use_nir)
+  screen->prefer_nir = strtol(use_nir, NULL, 0) == 1;
+
/* These must be set before any failure is possible, as the cleanup
 * paths assume they're responsible for deleting them.
 */
diff --git a/src/gallium/drivers/nouveau/nouveau_screen.h 
b/src/gallium/drivers/nouveau/nouveau_screen.h
index e4fbae99ca4..1229b66b26f 100644
--- a/src/gallium/drivers/nouveau/nouveau_screen.h
+++ b/src/gallium/drivers/nouveau/nouveau_screen.h
@@ -62,6 +62,8 @@ struct nouveau_screen {
 
struct disk_cache *disk_shader_cache;
 
+   bool prefer_nir;
+
 #ifdef NOUVEAU_ENABLE_DRIVER_STATISTICS
union {
   uint64_t v[29];
diff --git a/src/gallium/drivers/nouveau/nv50/nv50_screen.c 
b/src/gallium/drivers/nouveau/nv50/nv50_screen.c
index e91ea8d08c1..6f0a30ea026 100644
--- a/src/gallium/drivers/nouveau/nv50/nv50_screen.c
+++ b/src/gallium/drivers/nouveau/nv50/nv50_screen.c
@@ -311,6 +311,8 @@ nv50_screen_get_shader_param(struct pipe_screen *pscreen,
  enum pipe_shader_type shader,
  enum pipe_shader_cap param)
 {
+   const struct nouveau_screen *screen = nouveau_screen(pscreen);
+
switch (shader) {
case PIPE_SHADER_VERTEX:
case PIPE_SHADER_GEOMETRY:
@@ -364,7 +366,7 @@ nv50_screen_get_shader_param(struct pipe_screen *pscreen,
case PIPE_SHADER_CAP_MAX_SAMPLER_VIEWS:
   return MIN2(16, PIPE_MAX_SAMPLERS);
case PIPE_SHADER_CAP_PREFERRED_IR:
-  return PIPE_SHADER_IR_TGSI;
+  return screen->prefer_nir ? PIPE_SHADER_IR_NIR : PIPE_SHADER_IR_TGSI;
case PIPE_SHADER_CAP_MAX_UNROLL_ITERATIONS_HINT:
   return 32;
case PIPE_SHADER_CAP_TGSI_DROUND_SUPPORTED:
diff --git a/src/gallium/drivers/nouveau/nvc0/nvc0_screen.c 
b/src/gallium/drivers/nouveau/nvc0/nvc0_screen.c
index 15662093eb6..1f558aeaf4b 100644
--- a/src/gallium/drivers/nouveau/nvc0/nvc0_screen.c
+++ b/src/gallium/drivers/nouveau/nvc0/nvc0_screen.c
@@ -90,9 +90,11 @@ nvc0_screen_is_format_supported(struct pipe_screen *pscreen,
 
if (bindings & PIPE_BIND_SHADER_IMAGE) {
   if (sample_count > 0 &&
-  nouveau_screen(pscreen)->class_3d >= GM107_3D_CLASS) {
+  (nouveau_screen(pscreen)->class_3d >= GM107_3D_CLASS ||
+   nouveau_screen(pscreen)->prefer_nir)) {
  /* MS images are currently unsupported on Maxwell because they have to
   * be handled explicitly. */
+ /* MS images are currently unsupported with NIR */
  return false;
   }
 
@@ -112,7 +114,8 @@ static int
 nvc0_screen_get_param(struct pipe_screen *pscreen, enum pipe_cap param)
 {
const uint16_t class_3d = nouveau_screen(pscreen)->class_3d;
-   struct nouveau_device *dev = nouveau_screen(pscreen)->device;
+   const struct nouveau_screen *screen = nouveau_screen(pscreen);
+   struct nouveau_device *dev = screen->device;
 
switch (param) {
/* non-boolean caps */
@@ -216,7 +219,6 @@ nvc0_screen_get_param(struct pipe_screen *pscreen, enum 
pipe_cap param)
case PIPE_CAP_USER_VERTEX_BUFFERS:
case PIPE_CAP_TEXTURE_QUERY_LOD:
case PIPE_CAP_SAMPLE_SHADING:
-   case PIPE_CAP_TEXTURE_GATHER_OFFSETS:
case PIPE_CAP_TEXTURE_GATHER_SM5:
case PIPE_CAP_TGSI_FS_FINE_DERIVATIVE:
case PIPE_CAP_CONDITIONAL_RENDER_INVERTED:
@@ -257,6 +259,9 @@ nvc0_screen_get_param(struct pipe_screen *pscreen, enum 
pipe_cap param)
case PIPE_CAP_ALLOW_MAPPED_BUFFERS_DURING_EXECUTION:
case PIPE_CAP_QUERY_SO_OVERFLOW:
   return 1;
+   case PIPE_CAP_TEXTURE_GATHER_OFFSETS:
+  /* TODO: nir doesn't support tg4 with multiple offsets */
+  return screen->prefer_nir ? 0 : 1;
case PIPE_CAP_PREFER_BLIT_BASED_TEXTURE_TRANSFER:
   return nouveau_screen(pscreen)->vram_domain & NOUVEAU_BO_VRAM ? 1 : 0;
case PIPE_CAP_TGSI_FS_FBFETCH:

[Mesa-dev] [PATCH v7 00/35] Nouveau NIR support

2018-04-16 Thread Karol Herbst
Changes to v6:
* fixed automake build
* fixed shader cache collision with TGSI shaders
* fixed handling of inot
* fixed location of images
* fixed load_output of fragment shaders (FBFETCH)
* added load_barycentric_sample handling
* some preparation for bindless_texture support

There will be some follow up patches to add support for:
* 4 constant offsets in TG4
* bindless_texture support for samplers and images

Review is important for patches 1-8, all the other paches touch the from_nir
file only.

Connor Abbott (1):
  nv50/ir/ra: Fix copying compound for moves

Karol Herbst (34):
  st/glsl_to_nir: run lower_output_reads on
!PIPE_CAP_TGSI_CAN_READ_OUTPUTS
  nvir: print the shader type when dumping headers
  nvir: move common converter code in base class
  nvir: add lowering helper
  nouveau: add support for nir
  nouveau: add env var to make nir default
  nouveau: fix nir and TGSI shader cache collision
  nvir/nir: run some passes to make the conversion easier
  nvir/nir: track defs and provide easy access functions
  nvir/nir: add nir type helper functions
  nvir/nir: run assignSlots
  nvir/nir: add loadFrom and storeTo helpler
  nvir/nir: parse NIR shader info
  nvir/nir: implement CFG handling
  nvir/nir: implement nir_load_const_instr
  nvir/nir: add skeleton for nir_intrinsic_instr
  nvir/nir: implement nir_alu_instr handling
  nvir/nir: implement nir_intrinsic_load_uniform
  nvir/nir: implement nir_intrinsic_store_(per_vertex_)output
  nvir/nir: implement load_(interpolated_)input/output
  nvir/nir: implement intrinsic_discard(_if)
  nvir/nir: implement loading system values
  nvir/nir: implement nir_ssa_undef_instr
  nvir/nir: implement nir_instr_type_tex
  nvir/nir: add getOperation for intrinsics
  nvir/nir: implement vote and ballot
  nvir/nir: implement variable indexing
  nvir/nir: implement geometry shader nir_intrinsics
  nvir/nir: implement nir_intrinsic_load_ubo
  nvir/nir: implement ssbo intrinsics
  nvir/nir: implement images
  nvir/nir: add memory barriers
  nvir/nir: implement load_per_vertex_output
  nvir/nir: implement intrinsic shader_clock

 src/gallium/drivers/nouveau/Automake.inc   |3 +
 src/gallium/drivers/nouveau/Makefile.am|5 +
 src/gallium/drivers/nouveau/Makefile.sources   |5 +
 src/gallium/drivers/nouveau/codegen/nv50_ir.cpp|3 +
 src/gallium/drivers/nouveau/codegen/nv50_ir.h  |1 +
 .../nouveau/codegen/nv50_ir_from_common.cpp|  107 +
 .../drivers/nouveau/codegen/nv50_ir_from_common.h  |   58 +
 .../drivers/nouveau/codegen/nv50_ir_from_nir.cpp   | 3145 
 .../drivers/nouveau/codegen/nv50_ir_from_tgsi.cpp  |  106 +-
 .../nouveau/codegen/nv50_ir_lowering_helper.cpp|  275 ++
 .../nouveau/codegen/nv50_ir_lowering_helper.h  |   53 +
 src/gallium/drivers/nouveau/codegen/nv50_ir_ra.cpp |   60 +-
 src/gallium/drivers/nouveau/meson.build|   13 +-
 src/gallium/drivers/nouveau/nouveau_screen.c   |   11 +-
 src/gallium/drivers/nouveau/nouveau_screen.h   |2 +
 src/gallium/drivers/nouveau/nv50/nv50_program.c|   19 +-
 src/gallium/drivers/nouveau/nv50/nv50_screen.c |   44 +-
 src/gallium/drivers/nouveau/nv50/nv50_state.c  |   31 +-
 src/gallium/drivers/nouveau/nvc0/nvc0_program.c|   19 +-
 src/gallium/drivers/nouveau/nvc0/nvc0_screen.c |   61 +-
 src/gallium/drivers/nouveau/nvc0/nvc0_state.c  |   27 +-
 src/mesa/state_tracker/st_glsl_to_nir.cpp  |6 +
 22 files changed, 3908 insertions(+), 146 deletions(-)
 create mode 100644 src/gallium/drivers/nouveau/codegen/nv50_ir_from_common.cpp
 create mode 100644 src/gallium/drivers/nouveau/codegen/nv50_ir_from_common.h
 create mode 100644 src/gallium/drivers/nouveau/codegen/nv50_ir_from_nir.cpp
 create mode 100644 
src/gallium/drivers/nouveau/codegen/nv50_ir_lowering_helper.cpp
 create mode 100644 
src/gallium/drivers/nouveau/codegen/nv50_ir_lowering_helper.h

-- 
2.14.3

___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev


[Mesa-dev] [PATCH v7 02/35] nv50/ir/ra: Fix copying compound for moves

2018-04-16 Thread Karol Herbst
From: Connor Abbott 

In order to reduce moves when coalescing multiple registers into a
larger register, RA will try to coalesce MERGE instructions with their
definitions. For example, for something like this in GLSL:

uint a = ...;
uint b = ...;
uint64 x = packUint2x32(a, b);

The compiler will try to coalesce x with a and b, in the same way as
something like:

uint a = ...;
uint b = ...;
...
uint x = phi(a, b);

with the crucial difference that the definitions of a and b only clobber
part of the register, instead of the whole thing. This information is
carried through the compound flag and compMask bitmask. If compound is
set, then the value has been coalesced in such a way that not all the
defs clobber the entire register. The compMask bitmask describes which
subregister each def clobbers, although it does it in a slightly
convoluted way. It's an invariant that once compound is set on one def,
it must be set for all the defs in a given coalesced value.

In more detail, the constraints pass will first create extra moves:

uint a = ...;
uint b = ...;
uint a' = a;
uint b' = b;
uint64 x = packUint2x32(a', b');

and then RA will merge values involved in MERGE/SPLIT instructions,
merging x with a' and b' and making the combined value compound -- this
is relatively simple, and will always succeed since we just created a'
and b', so they never interfere with x, and x has no other definitions,
since we haven't started coalescing moves yet. Basically, we just replaced
the MERGE instruction with an equivalent sequence of partial writes to the
destination. The tricky part comes when we try to merge a' with a
and b' with b. We need to transfer the compound information from a' to a
and b' to b, which copyCompound() does, but we also need to transfer it
to any defs coalesced with a and b, which the code failed to do. Similarly,
if x is the argument to a phi instruction, then when we try to merge it
with other arguments to the same phi by coalescing moves, we'd have
problems guaranteeing that all the other merged defs stay up-to-date.

One tricky part of fixing this is that in order to properly propagate
the information from a' to a, we need to do it before the defs for a and
a' are merged in coalesceValues(), since we need to know which defs are
merged with a but not a' -- after coalesceValues() returns, all the defs
have been combined, so we don't know which is which. I took the approach
of calling copyCompound() inside coalesceValues(), instead of
afterwards.

Cc: Ilia Mirkin 
Cc: Karol Herbst 
Tested-by: Karol Herbst 
Signed-off-by: Karol Herbst 
---
 src/gallium/drivers/nouveau/codegen/nv50_ir_ra.cpp | 60 ++
 1 file changed, 39 insertions(+), 21 deletions(-)

diff --git a/src/gallium/drivers/nouveau/codegen/nv50_ir_ra.cpp 
b/src/gallium/drivers/nouveau/codegen/nv50_ir_ra.cpp
index 3a0e56e1385..df3116a6d73 100644
--- a/src/gallium/drivers/nouveau/codegen/nv50_ir_ra.cpp
+++ b/src/gallium/drivers/nouveau/codegen/nv50_ir_ra.cpp
@@ -890,6 +890,35 @@ GCRA::RIG_Node::init(const RegisterSet& regs, LValue *lval)
livei.insert(lval->livei);
 }
 
+// Used when coalescing moves. The non-compound value will become one, e.g.:
+// mov b32 $r0 $r2/ merge b64 $r0d { $r0 $r1 }
+// split b64 { $r0 $r1 } $r0d / mov b64 $r0d f64 $r2d
+static inline void copyCompound(Value *dst, Value *src)
+{
+   LValue *ldst = dst->asLValue();
+   LValue *lsrc = src->asLValue();
+
+   if (ldst->compound && !lsrc->compound) {
+  LValue *swap = lsrc;
+  lsrc = ldst;
+  ldst = swap;
+   }
+
+   assert(!ldst->compound);
+
+   if (lsrc->compound) {
+  Value *dstRep = ldst->join;
+  for (Value::DefIterator d = dstRep->defs.begin(); d != 
dstRep->defs.end();
+   ++d) {
+ LValue *ldst = (*d)->get()->asLValue();
+ if (!ldst->compound)
+ldst->compMask = 0xff;
+ ldst->compound = 1;
+ ldst->compMask &= lsrc->compMask;
+  }
+   }
+}
+
 bool
 GCRA::coalesceValues(Value *dst, Value *src, bool force)
 {
@@ -932,9 +961,16 @@ GCRA::coalesceValues(Value *dst, Value *src, bool force)
if (!force && nRep->livei.overlaps(nVal->livei))
   return false;
 
+   // TODO: Handle this case properly.
+   if (!force && rep->compound && val->compound)
+  return false;
+
INFO_DBG(prog->dbgFlags, REG_ALLOC, "joining %%%i($%i) <- %%%i\n",
 rep->id, rep->reg.data.id, val->id);
 
+   if (!force)
+  copyCompound(dst, src);
+
// set join pointer of all values joined with val
for (Value::DefIterator def = val->defs.begin(); def != val->defs.end();
 ++def)
@@ -997,24 +1033,6 @@ static inline uint8_t makeCompMask(int compSize, int 
base, int size)
}
 }
 
-// Used when coalescing moves. The non-compou

[Mesa-dev] [PATCH v7 04/35] nvir: move common converter code in base class

2018-04-16 Thread Karol Herbst
v2: remove TGSI related bits

Signed-off-by: Karol Herbst 
Reviewed-by: Pierre Moreau 
---
 src/gallium/drivers/nouveau/Makefile.sources   |   2 +
 .../nouveau/codegen/nv50_ir_from_common.cpp| 107 +
 .../drivers/nouveau/codegen/nv50_ir_from_common.h  |  58 +++
 .../drivers/nouveau/codegen/nv50_ir_from_tgsi.cpp  | 106 +---
 src/gallium/drivers/nouveau/meson.build|   2 +
 5 files changed, 172 insertions(+), 103 deletions(-)
 create mode 100644 src/gallium/drivers/nouveau/codegen/nv50_ir_from_common.cpp
 create mode 100644 src/gallium/drivers/nouveau/codegen/nv50_ir_from_common.h

diff --git a/src/gallium/drivers/nouveau/Makefile.sources 
b/src/gallium/drivers/nouveau/Makefile.sources
index 65f08c7d8d8..fee5e59522e 100644
--- a/src/gallium/drivers/nouveau/Makefile.sources
+++ b/src/gallium/drivers/nouveau/Makefile.sources
@@ -115,6 +115,8 @@ NV50_CODEGEN_SOURCES := \
codegen/nv50_ir_build_util.h \
codegen/nv50_ir_driver.h \
codegen/nv50_ir_emit_nv50.cpp \
+   codegen/nv50_ir_from_common.cpp \
+   codegen/nv50_ir_from_common.h \
codegen/nv50_ir_from_tgsi.cpp \
codegen/nv50_ir_graph.cpp \
codegen/nv50_ir_graph.h \
diff --git a/src/gallium/drivers/nouveau/codegen/nv50_ir_from_common.cpp 
b/src/gallium/drivers/nouveau/codegen/nv50_ir_from_common.cpp
new file mode 100644
index 000..0ad6087e588
--- /dev/null
+++ b/src/gallium/drivers/nouveau/codegen/nv50_ir_from_common.cpp
@@ -0,0 +1,107 @@
+/*
+ * Copyright 2011 Christoph Bumiller
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR
+ * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
+ * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
+ * OTHER DEALINGS IN THE SOFTWARE.
+ */
+
+#include "codegen/nv50_ir_from_common.h"
+
+namespace nv50_ir {
+
+ConverterCommon::ConverterCommon(Program *prog, nv50_ir_prog_info *info)
+   :  BuildUtil(prog),
+  info(info) {}
+
+ConverterCommon::Subroutine *
+ConverterCommon::getSubroutine(unsigned ip)
+{
+   std::map::iterator it = sub.map.find(ip);
+
+   if (it == sub.map.end())
+  it = sub.map.insert(std::make_pair(
+  ip, Subroutine(new Function(prog, "SUB", ip.first;
+
+   return &it->second;
+}
+
+ConverterCommon::Subroutine *
+ConverterCommon::getSubroutine(Function *f)
+{
+   unsigned ip = f->getLabel();
+   std::map::iterator it = sub.map.find(ip);
+
+   if (it == sub.map.end())
+  it = sub.map.insert(std::make_pair(ip, Subroutine(f))).first;
+
+   return &it->second;
+}
+
+uint8_t
+ConverterCommon::translateInterpMode(const struct nv50_ir_varying *var, 
operation& op)
+{
+   uint8_t mode = NV50_IR_INTERP_PERSPECTIVE;
+
+   if (var->flat)
+  mode = NV50_IR_INTERP_FLAT;
+   else
+   if (var->linear)
+  mode = NV50_IR_INTERP_LINEAR;
+   else
+   if (var->sc)
+  mode = NV50_IR_INTERP_SC;
+
+   op = (mode == NV50_IR_INTERP_PERSPECTIVE || mode == NV50_IR_INTERP_SC)
+  ? OP_PINTERP : OP_LINTERP;
+
+   if (var->centroid)
+  mode |= NV50_IR_INTERP_CENTROID;
+
+   return mode;
+}
+
+void
+ConverterCommon::handleUserClipPlanes()
+{
+   Value *res[8];
+   int n, i, c;
+
+   for (c = 0; c < 4; ++c) {
+  for (i = 0; i < info->io.genUserClip; ++i) {
+ Symbol *sym = mkSymbol(FILE_MEMORY_CONST, info->io.auxCBSlot,
+TYPE_F32, info->io.ucpBase + i * 16 + c * 4);
+ Value *ucp = mkLoadv(TYPE_F32, sym, NULL);
+ if (c == 0)
+res[i] = mkOp2v(OP_MUL, TYPE_F32, getScratch(), clipVtx[c], ucp);
+ else
+mkOp3(OP_MAD, TYPE_F32, res[i], clipVtx[c], ucp, res[i]);
+  }
+   }
+
+   const int first = info->numOutputs - (info->io.genUserClip + 3) / 4;
+
+   for (i = 0; i < info->io.genUserClip; ++i) {
+  n = i / 4 + first;
+  c = i % 4;
+  Symbol *sym =
+ mkSymbol(FILE_SHADER_OUTPUT, 0, TYPE_F32, info->out[n].slot[c] * 4);
+  mkStore(OP_EXPORT, TYPE_F32, sym, NULL, res[i]);
+   }

[Mesa-dev] [PATCH v7 06/35] nouveau: add support for nir

2018-04-16 Thread Karol Herbst
not all those nir options are actually required, it just made the work a
little easier.

v2: fix asserts
parse compute shaders
don't lower bitfield_insert
v3: fix memory leak
v4: don't lower fmod32
v5: set lower_all_io_to_temps to false
fix memory leak because we take over ownership of the nir shader
merge: use the lowering helper
v6: include TGSI debug header for proper assert call
add nv50 support
v7: fix Automake build

Signed-off-by: Karol Herbst 
---
 src/gallium/drivers/nouveau/Automake.inc   |  3 +
 src/gallium/drivers/nouveau/Makefile.am|  5 ++
 src/gallium/drivers/nouveau/Makefile.sources   |  1 +
 src/gallium/drivers/nouveau/codegen/nv50_ir.cpp|  3 +
 src/gallium/drivers/nouveau/codegen/nv50_ir.h  |  1 +
 .../drivers/nouveau/codegen/nv50_ir_from_nir.cpp   | 76 ++
 src/gallium/drivers/nouveau/meson.build|  9 +--
 src/gallium/drivers/nouveau/nv50/nv50_program.c| 19 +-
 src/gallium/drivers/nouveau/nv50/nv50_screen.c | 40 
 src/gallium/drivers/nouveau/nv50/nv50_state.c  | 31 -
 src/gallium/drivers/nouveau/nvc0/nvc0_program.c| 18 -
 src/gallium/drivers/nouveau/nvc0/nvc0_screen.c | 42 +++-
 src/gallium/drivers/nouveau/nvc0/nvc0_state.c  | 27 +++-
 13 files changed, 261 insertions(+), 14 deletions(-)
 create mode 100644 src/gallium/drivers/nouveau/codegen/nv50_ir_from_nir.cpp

diff --git a/src/gallium/drivers/nouveau/Automake.inc 
b/src/gallium/drivers/nouveau/Automake.inc
index 1d383fcb7b1..657790494dc 100644
--- a/src/gallium/drivers/nouveau/Automake.inc
+++ b/src/gallium/drivers/nouveau/Automake.inc
@@ -8,4 +8,7 @@ TARGET_LIB_DEPS += \
$(NOUVEAU_LIBS) \
$(LIBDRM_LIBS)
 
+TARGET_COMPILER_LIB_DEPS = \
+   $(top_builddir)/src/compiler/nir/libnir.la
+
 endif
diff --git a/src/gallium/drivers/nouveau/Makefile.am 
b/src/gallium/drivers/nouveau/Makefile.am
index f6126b54481..478dfcf437b 100644
--- a/src/gallium/drivers/nouveau/Makefile.am
+++ b/src/gallium/drivers/nouveau/Makefile.am
@@ -25,6 +25,10 @@ include $(top_srcdir)/src/gallium/Automake.inc
 
 AM_CPPFLAGS = \
-I$(top_srcdir)/include/drm-uapi \
+   -I$(top_builddir)/src/compiler/nir \
+   -I$(top_srcdir)/src/compiler/nir \
+   -I$(top_srcdir)/src/mapi \
+   -I$(top_srcdir)/src/mesa \
$(GALLIUM_DRIVER_CFLAGS) \
$(LIBDRM_CFLAGS) \
$(NOUVEAU_CFLAGS)
@@ -47,6 +51,7 @@ nouveau_compiler_SOURCES = \
 
 nouveau_compiler_LDADD = \
libnouveau.la \
+   $(top_builddir)/src/compiler/nir/libnir.la \
$(top_builddir)/src/gallium/auxiliary/libgallium.la \
$(top_builddir)/src/util/libmesautil.la \
$(GALLIUM_COMMON_LIB_DEPS)
diff --git a/src/gallium/drivers/nouveau/Makefile.sources 
b/src/gallium/drivers/nouveau/Makefile.sources
index ec344c63169..c6a1aff7110 100644
--- a/src/gallium/drivers/nouveau/Makefile.sources
+++ b/src/gallium/drivers/nouveau/Makefile.sources
@@ -117,6 +117,7 @@ NV50_CODEGEN_SOURCES := \
codegen/nv50_ir_emit_nv50.cpp \
codegen/nv50_ir_from_common.cpp \
codegen/nv50_ir_from_common.h \
+   codegen/nv50_ir_from_nir.cpp \
codegen/nv50_ir_from_tgsi.cpp \
codegen/nv50_ir_graph.cpp \
codegen/nv50_ir_graph.h \
diff --git a/src/gallium/drivers/nouveau/codegen/nv50_ir.cpp 
b/src/gallium/drivers/nouveau/codegen/nv50_ir.cpp
index c987da99085..b3efef72b0f 100644
--- a/src/gallium/drivers/nouveau/codegen/nv50_ir.cpp
+++ b/src/gallium/drivers/nouveau/codegen/nv50_ir.cpp
@@ -1231,6 +1231,9 @@ nv50_ir_generate_code(struct nv50_ir_prog_info *info)
prog->optLevel = info->optLevel;
 
switch (info->bin.sourceRep) {
+   case PIPE_SHADER_IR_NIR:
+  ret = prog->makeFromNIR(info) ? 0 : -2;
+  break;
case PIPE_SHADER_IR_TGSI:
   ret = prog->makeFromTGSI(info) ? 0 : -2;
   break;
diff --git a/src/gallium/drivers/nouveau/codegen/nv50_ir.h 
b/src/gallium/drivers/nouveau/codegen/nv50_ir.h
index f4f3c708886..e5b4592a61e 100644
--- a/src/gallium/drivers/nouveau/codegen/nv50_ir.h
+++ b/src/gallium/drivers/nouveau/codegen/nv50_ir.h
@@ -1255,6 +1255,7 @@ public:
inline void del(Function *fn, int& id) { allFuncs.remove(id); }
inline void add(Value *rval, int& id) { allRValues.insert(rval, id); }
 
+   bool makeFromNIR(struct nv50_ir_prog_info *);
bool makeFromTGSI(struct nv50_ir_prog_info *);
bool convertToSSA();
bool optimizeSSA(int level);
diff --git a/src/gallium/drivers/nouveau/codegen/nv50_ir_from_nir.cpp 
b/src/gallium/drivers/nouveau/codegen/nv50_ir_from_nir.cpp
new file mode 100644
index 000..b22c62fd434
--- /dev/null
+++ b/src/gallium/drivers/nouveau/codegen/nv50_ir_from_nir.cpp
@@ -0,0 +1,76 @@
+/*
+ * Copyright 2017 Red Hat Inc.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "S

[Mesa-dev] [PATCH v7 03/35] nvir: print the shader type when dumping headers

2018-04-16 Thread Karol Herbst
this makes debugging the shader header a little easier

Signed-off-by: Karol Herbst 
Acked-by: Pierre Moreau 
---
 src/gallium/drivers/nouveau/nvc0/nvc0_program.c | 1 +
 1 file changed, 1 insertion(+)

diff --git a/src/gallium/drivers/nouveau/nvc0/nvc0_program.c 
b/src/gallium/drivers/nouveau/nvc0/nvc0_program.c
index 9520d984bb3..3a11534df83 100644
--- a/src/gallium/drivers/nouveau/nvc0/nvc0_program.c
+++ b/src/gallium/drivers/nouveau/nvc0/nvc0_program.c
@@ -551,6 +551,7 @@ nvc0_program_dump(struct nvc0_program *prog)
unsigned pos;
 
if (prog->type != PIPE_SHADER_COMPUTE) {
+  debug_printf("dumping HDR for type %i\n", prog->type);
   for (pos = 0; pos < ARRAY_SIZE(prog->hdr); ++pos)
  debug_printf("HDR[%02"PRIxPTR"] = 0x%08x\n",
   pos * sizeof(prog->hdr[0]), prog->hdr[pos]);
-- 
2.14.3

___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev


[Mesa-dev] [PATCH v7 05/35] nvir: add lowering helper

2018-04-16 Thread Karol Herbst
this is mostly usefull for lazy IR converters not wanting to deal with 64 bit
lowering and other illegal stuff

v5: also handle SAT
v6: rename type variables
fixed lowering of NEG
add lowering of NOT

Signed-off-by: Karol Herbst 
---
 src/gallium/drivers/nouveau/Makefile.sources   |   2 +
 .../nouveau/codegen/nv50_ir_lowering_helper.cpp| 275 +
 .../nouveau/codegen/nv50_ir_lowering_helper.h  |  53 
 src/gallium/drivers/nouveau/meson.build|   2 +
 4 files changed, 332 insertions(+)
 create mode 100644 
src/gallium/drivers/nouveau/codegen/nv50_ir_lowering_helper.cpp
 create mode 100644 
src/gallium/drivers/nouveau/codegen/nv50_ir_lowering_helper.h

diff --git a/src/gallium/drivers/nouveau/Makefile.sources 
b/src/gallium/drivers/nouveau/Makefile.sources
index fee5e59522e..ec344c63169 100644
--- a/src/gallium/drivers/nouveau/Makefile.sources
+++ b/src/gallium/drivers/nouveau/Makefile.sources
@@ -122,6 +122,8 @@ NV50_CODEGEN_SOURCES := \
codegen/nv50_ir_graph.h \
codegen/nv50_ir.h \
codegen/nv50_ir_inlines.h \
+   codegen/nv50_ir_lowering_helper.cpp \
+   codegen/nv50_ir_lowering_helper.h \
codegen/nv50_ir_lowering_nv50.cpp \
codegen/nv50_ir_peephole.cpp \
codegen/nv50_ir_print.cpp \
diff --git a/src/gallium/drivers/nouveau/codegen/nv50_ir_lowering_helper.cpp 
b/src/gallium/drivers/nouveau/codegen/nv50_ir_lowering_helper.cpp
new file mode 100644
index 000..9373531b0b1
--- /dev/null
+++ b/src/gallium/drivers/nouveau/codegen/nv50_ir_lowering_helper.cpp
@@ -0,0 +1,275 @@
+/*
+ * Copyright 2018 Red Hat Inc.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR
+ * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
+ * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
+ * OTHER DEALINGS IN THE SOFTWARE.
+ *
+ * Authors: Karol Herbst 
+ */
+
+#include "codegen/nv50_ir_lowering_helper.h"
+
+namespace nv50_ir {
+
+bool
+LoweringHelper::visit(Instruction *insn)
+{
+   switch (insn->op) {
+   case OP_ABS:
+  return handleABS(insn);
+   case OP_CVT:
+  return handleCVT(insn);
+   case OP_MAX:
+   case OP_MIN:
+  return handleMAXMIN(insn);
+   case OP_MOV:
+  return handleMOV(insn);
+   case OP_NEG:
+  return handleNEG(insn);
+   case OP_SAT:
+  return handleSAT(insn);
+   case OP_SLCT:
+  return handleSLCT(insn->asCmp());
+   case OP_AND:
+   case OP_NOT:
+   case OP_OR:
+   case OP_XOR:
+  return handleLogOp(insn);
+   default:
+  return true;
+   }
+}
+
+bool
+LoweringHelper::handleABS(Instruction *insn)
+{
+   DataType dTy = insn->dType;
+   if (!(dTy == TYPE_U64 || dTy == TYPE_S64))
+  return true;
+
+   bld.setPosition(insn, false);
+
+   Value *neg = bld.getSSA(8);
+   Value *negComp[2], *srcComp[2];
+   Value *lo = bld.getSSA(), *hi = bld.getSSA();
+   bld.mkOp2(OP_SUB, dTy, neg, bld.mkImm((uint64_t)0), insn->getSrc(0));
+   bld.mkSplit(negComp, 4, neg);
+   bld.mkSplit(srcComp, 4, insn->getSrc(0));
+   bld.mkCmp(OP_SLCT, CC_LT, TYPE_S32, lo, TYPE_S32, negComp[0], srcComp[0], 
srcComp[1]);
+   bld.mkCmp(OP_SLCT, CC_LT, TYPE_S32, hi, TYPE_S32, negComp[1], srcComp[1], 
srcComp[1]);
+   insn->op = OP_MERGE;
+   insn->setSrc(0, lo);
+   insn->setSrc(1, hi);
+
+   return true;
+}
+
+bool
+LoweringHelper::handleCVT(Instruction *insn)
+{
+   DataType dTy = insn->dType;
+   DataType sTy = insn->sType;
+
+   if (typeSizeof(dTy) <= 4 && typeSizeof(sTy) <= 4)
+  return true;
+
+   bld.setPosition(insn, false);
+
+   if ((dTy == TYPE_S32 && sTy == TYPE_S64) ||
+   (dTy == TYPE_U32 && sTy == TYPE_U64)) {
+  Value *src[2];
+  bld.mkSplit(src, 4, insn->getSrc(0));
+  insn->op = OP_MOV;
+  insn->setSrc(0, src[0]);
+   } else if (dTy == TYPE_S64 && sTy == TYPE_S32) {
+  Value *tmp = bld.getSSA();
+  bld.mkOp2(OP_SHR, TYPE_S32, tmp, insn->getSrc(0), 
bld.loadImm(bld.getSSA(), 31));
+  insn->op = OP_MERGE;
+  insn->setSrc(1, tmp);
+   } else if (

[Mesa-dev] [PATCH v7 11/35] nvir/nir: add nir type helper functions

2018-04-16 Thread Karol Herbst
v4: treat imul as unsigned
v5: remove pointless !!
v7: inot is unsigned as well

Signed-off-by: Karol Herbst 
---
 .../drivers/nouveau/codegen/nv50_ir_from_nir.cpp   | 116 +
 1 file changed, 116 insertions(+)

diff --git a/src/gallium/drivers/nouveau/codegen/nv50_ir_from_nir.cpp 
b/src/gallium/drivers/nouveau/codegen/nv50_ir_from_nir.cpp
index b61c6e90b1a..89c55a08ef8 100644
--- a/src/gallium/drivers/nouveau/codegen/nv50_ir_from_nir.cpp
+++ b/src/gallium/drivers/nouveau/codegen/nv50_ir_from_nir.cpp
@@ -53,6 +53,7 @@ public:
 private:
typedef std::vector LValues;
typedef decltype(nir_ssa_def().index) NirSSADefIdx;
+   typedef decltype(nir_ssa_def().bit_size) NirSSADefBitSize;
typedef std::unordered_map NirDefMap;
 
LValues& convert(nir_alu_dest *);
@@ -68,6 +69,18 @@ private:
uint32_t getIndirect(nir_src *, uint8_t, Value*&);
uint32_t getIndirect(nir_intrinsic_instr *, uint8_t s, uint8_t c, Value*&);
 
+   bool isFloatType(nir_alu_type);
+   bool isSignedType(nir_alu_type);
+   bool isResultFloat(nir_op);
+   bool isResultSigned(nir_op);
+
+   DataType getDType(nir_alu_instr*);
+   DataType getDType(nir_intrinsic_instr*);
+   DataType getDType(nir_op, NirSSADefBitSize);
+
+   std::vector getSTypes(nir_alu_instr*);
+   DataType getSType(nir_src&, bool isFloat, bool isSigned);
+
nir_shader *nir;
 
NirDefMap ssaDefs;
@@ -78,6 +91,109 @@ Converter::Converter(Program *prog, nir_shader *nir, 
nv50_ir_prog_info *info)
: ConverterCommon(prog, info),
  nir(nir) {}
 
+bool
+Converter::isFloatType(nir_alu_type type)
+{
+   return nir_alu_type_get_base_type(type) == nir_type_float;
+}
+
+bool
+Converter::isSignedType(nir_alu_type type)
+{
+   return nir_alu_type_get_base_type(type) == nir_type_int;
+}
+
+bool
+Converter::isResultFloat(nir_op op)
+{
+   const nir_op_info &info = nir_op_infos[op];
+   if (info.output_type != nir_type_invalid)
+  return isFloatType(info.output_type);
+
+   ERROR("isResultFloat not implemented for %s\n", nir_op_infos[op].name);
+   assert(false);
+   return true;
+}
+
+bool
+Converter::isResultSigned(nir_op op)
+{
+   switch (op) {
+   /* there is no umul and we get wrong results if the treat all muls as 
signed */
+   case nir_op_imul:
+   case nir_op_inot:
+  return false;
+   default:
+  const nir_op_info &info = nir_op_infos[op];
+  if (info.output_type != nir_type_invalid)
+ return isSignedType(info.output_type);
+  ERROR("isResultSigned not implemented for %s\n", nir_op_infos[op].name);
+  assert(false);
+  return true;
+   }
+}
+
+DataType
+Converter::getDType(nir_alu_instr *insn)
+{
+   if (insn->dest.dest.is_ssa)
+  return getDType(insn->op, insn->dest.dest.ssa.bit_size);
+   else
+  return getDType(insn->op, insn->dest.dest.reg.reg->bit_size);
+}
+
+DataType
+Converter::getDType(nir_intrinsic_instr *insn)
+{
+   if (insn->dest.is_ssa)
+  return typeOfSize(insn->dest.ssa.bit_size / 8, false, false);
+   else
+  return typeOfSize(insn->dest.reg.reg->bit_size / 8, false, false);
+}
+
+DataType
+Converter::getDType(nir_op op, Converter::NirSSADefBitSize bitSize)
+{
+   DataType ty = typeOfSize(bitSize / 8, isResultFloat(op), 
isResultSigned(op));
+   if (ty == TYPE_NONE) {
+  ERROR("couldn't get Type for op %s with bitSize %u\n", 
nir_op_infos[op].name, bitSize);
+  assert(false);
+   }
+   return ty;
+}
+
+std::vector
+Converter::getSTypes(nir_alu_instr *insn)
+{
+   const nir_op_info &info = nir_op_infos[insn->op];
+   std::vector res(info.num_inputs);
+
+   for (auto i = 0u; i < info.num_inputs; ++i) {
+  if (info.input_types[i] != nir_type_invalid) {
+ res[i] = getSType(insn->src[i].src, isFloatType(info.input_types[i]), 
isSignedType(info.input_types[i]));
+  } else {
+ ERROR("getSType not implemented for %s idx %u\n", info.name, i);
+ assert(false);
+ res[i] = TYPE_NONE;
+ break;
+  }
+   }
+
+   return res;
+}
+
+DataType
+Converter::getSType(nir_src &src, bool isFloat, bool isSigned)
+{
+   NirSSADefBitSize bitSize;
+   if (src.is_ssa)
+  bitSize = src.ssa->bit_size;
+   else
+  bitSize = src.reg.reg->bit_size;
+
+   return typeOfSize(bitSize / 8, isFloat, isSigned);
+}
+
 Converter::LValues&
 Converter::convert(nir_dest *dest)
 {
-- 
2.14.3

___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev


[Mesa-dev] [PATCH v7 08/35] nouveau: fix nir and TGSI shader cache collision

2018-04-16 Thread Karol Herbst
Signed-off-by: Karol Herbst 
---
 src/gallium/drivers/nouveau/nouveau_screen.c | 6 +-
 1 file changed, 5 insertions(+), 1 deletion(-)

diff --git a/src/gallium/drivers/nouveau/nouveau_screen.c 
b/src/gallium/drivers/nouveau/nouveau_screen.c
index 2598c78a45b..655d2d090f6 100644
--- a/src/gallium/drivers/nouveau/nouveau_screen.c
+++ b/src/gallium/drivers/nouveau/nouveau_screen.c
@@ -156,9 +156,13 @@ nouveau_disk_cache_create(struct nouveau_screen *screen)
  &mesa_timestamp)) {
   res = asprintf(×tamp_str, "%u", mesa_timestamp);
   if (res != -1) {
+ uint64_t shader_debug_flags = 0;
+ if (screen->prefer_nir)
+shader_debug_flags |= 1 << 0;
+
  screen->disk_shader_cache =
 disk_cache_create(nouveau_screen_get_name(&screen->base),
-  timestamp_str, 0);
+  timestamp_str, shader_debug_flags);
  free(timestamp_str);
   }
}
-- 
2.14.3

___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev


[Mesa-dev] [PATCH v7 10/35] nvir/nir: track defs and provide easy access functions

2018-04-16 Thread Karol Herbst
v2: add helper function for indirects
v4: add new getIndirect overload for easier use
v5: use getSSA for ssa values
we can just create the values for unassigned registers in getSrc
v6: always create at least 32 bit values

Signed-off-by: Karol Herbst 
---
 .../drivers/nouveau/codegen/nv50_ir_from_nir.cpp   | 132 +
 1 file changed, 132 insertions(+)

diff --git a/src/gallium/drivers/nouveau/codegen/nv50_ir_from_nir.cpp 
b/src/gallium/drivers/nouveau/codegen/nv50_ir_from_nir.cpp
index 0b7a5981f73..b61c6e90b1a 100644
--- a/src/gallium/drivers/nouveau/codegen/nv50_ir_from_nir.cpp
+++ b/src/gallium/drivers/nouveau/codegen/nv50_ir_from_nir.cpp
@@ -31,6 +31,9 @@
 #include "codegen/nv50_ir_lowering_helper.h"
 #include "codegen/nv50_ir_util.h"
 
+#include 
+#include 
+
 static int
 type_size(const struct glsl_type *type)
 {
@@ -48,13 +51,142 @@ public:
 
bool run();
 private:
+   typedef std::vector LValues;
+   typedef decltype(nir_ssa_def().index) NirSSADefIdx;
+   typedef std::unordered_map NirDefMap;
+
+   LValues& convert(nir_alu_dest *);
+   LValues& convert(nir_dest *);
+   LValues& convert(nir_register *);
+   LValues& convert(nir_ssa_def *);
+
+   Value* getSrc(nir_alu_src *, uint8_t component = 0);
+   Value* getSrc(nir_register *, uint8_t);
+   Value* getSrc(nir_src *, uint8_t, bool indirect = false);
+   Value* getSrc(nir_ssa_def *, uint8_t);
+
+   uint32_t getIndirect(nir_src *, uint8_t, Value*&);
+   uint32_t getIndirect(nir_intrinsic_instr *, uint8_t s, uint8_t c, Value*&);
+
nir_shader *nir;
+
+   NirDefMap ssaDefs;
+   NirDefMap regDefs;
 };
 
 Converter::Converter(Program *prog, nir_shader *nir, nv50_ir_prog_info *info)
: ConverterCommon(prog, info),
  nir(nir) {}
 
+Converter::LValues&
+Converter::convert(nir_dest *dest)
+{
+   if (dest->is_ssa)
+  return convert(&dest->ssa);
+   if (dest->reg.indirect) {
+  ERROR("no support for indirects.");
+  assert(false);
+   }
+   return convert(dest->reg.reg);
+}
+
+Converter::LValues&
+Converter::convert(nir_register *reg)
+{
+   NirDefMap::iterator it = regDefs.find(reg->index);
+   if (it != regDefs.end())
+  return (*it).second;
+
+   LValues newDef(reg->num_components);
+   for (auto i = 0u; i < reg->num_components; i++)
+  newDef[i] = getScratch(std::max(4, reg->bit_size / 8));
+   return regDefs[reg->index] = newDef;
+}
+
+Converter::LValues&
+Converter::convert(nir_ssa_def *def)
+{
+   NirDefMap::iterator it = ssaDefs.find(def->index);
+   if (it != ssaDefs.end())
+  return (*it).second;
+
+   LValues newDef(def->num_components);
+   for (auto i = 0; i < def->num_components; i++)
+  newDef[i] = getSSA(std::max(4, def->bit_size / 8));
+   return ssaDefs[def->index] = newDef;
+}
+
+Value*
+Converter::getSrc(nir_alu_src *src, uint8_t component)
+{
+   if (src->abs || src->negate) {
+  ERROR("modifiers currently not supported on nir_alu_src\n");
+  assert(false);
+   }
+   return getSrc(&src->src, src->swizzle[component]);
+}
+
+Value*
+Converter::getSrc(nir_register *reg, uint8_t idx)
+{
+   NirDefMap::iterator it = regDefs.find(reg->index);
+   if (it == regDefs.end())
+  return convert(reg)[idx];
+   return (*it).second[idx];
+}
+
+Value*
+Converter::getSrc(nir_src *src, uint8_t idx, bool indirect)
+{
+   if (src->is_ssa)
+  return getSrc(src->ssa, idx);
+
+   if (src->reg.indirect) {
+  if (indirect)
+ return getSrc(src->reg.indirect, idx);
+  ERROR("no support for indirects.");
+  assert(false);
+  return nullptr;
+   }
+
+   return getSrc(src->reg.reg, idx);
+}
+
+Value*
+Converter::getSrc(nir_ssa_def *src, uint8_t idx)
+{
+   NirDefMap::iterator it = ssaDefs.find(src->index);
+   if (it == ssaDefs.end()) {
+  ERROR("SSA value %u not found\n", src->index);
+  assert(false);
+  return nullptr;
+   }
+   return (*it).second[idx];
+}
+
+uint32_t
+Converter::getIndirect(nir_src *src, uint8_t idx, Value *&indirect)
+{
+   nir_const_value *offset = nir_src_as_const_value(*src);
+
+   if (offset) {
+  indirect = nullptr;
+  return offset->u32[0];
+   }
+
+   indirect = getSrc(src, idx, true);
+   return 0;
+}
+
+uint32_t
+Converter::getIndirect(nir_intrinsic_instr *insn, uint8_t s, uint8_t c, Value 
*&indirect)
+{
+   auto idx = nir_intrinsic_base(insn) + getIndirect(&insn->src[s], c, 
indirect);
+   if (indirect)
+  indirect = mkOp2v(OP_SHL, TYPE_U32, getSSA(4, FILE_ADDRESS), indirect, 
loadImm(nullptr, 4));
+   return idx;
+}
+
 bool
 Converter::run()
 {
-- 
2.14.3

___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev


[Mesa-dev] [PATCH v7 13/35] nvir/nir: add loadFrom and storeTo helpler

2018-04-16 Thread Karol Herbst
Signed-off-by: Karol Herbst 
---
 .../drivers/nouveau/codegen/nv50_ir_from_nir.cpp   | 72 ++
 1 file changed, 72 insertions(+)

diff --git a/src/gallium/drivers/nouveau/codegen/nv50_ir_from_nir.cpp 
b/src/gallium/drivers/nouveau/codegen/nv50_ir_from_nir.cpp
index 1d1c4526d2b..00ca1ae1512 100644
--- a/src/gallium/drivers/nouveau/codegen/nv50_ir_from_nir.cpp
+++ b/src/gallium/drivers/nouveau/codegen/nv50_ir_from_nir.cpp
@@ -76,6 +76,13 @@ private:
bool centroid,
unsigned semantics);
 
+   Instruction *loadFrom(DataFile, uint8_t, DataType, Value *def, uint32_t 
base,
+ uint8_t c, Value *indirect0 = nullptr,
+ Value *indirect1 = nullptr, bool patch = false);
+   void storeTo(nir_intrinsic_instr *, DataFile, operation, DataType,
+Value *src, uint8_t idx, uint8_t c, Value *indirect0 = nullptr,
+Value *indirect1 = nullptr);
+
bool isFloatType(nir_alu_type);
bool isSignedType(nir_alu_type);
bool isResultFloat(nir_op);
@@ -924,6 +931,71 @@ Converter::getSlotAddress(nir_intrinsic_instr *insn, 
uint8_t idx, uint8_t slot)
return vary[idx].slot[slot] * 4;
 }
 
+Instruction *
+Converter::loadFrom(DataFile file, uint8_t i, DataType ty, Value *def,
+uint32_t base, uint8_t c, Value *indirect0,
+Value *indirect1, bool patch)
+{
+   auto tySize = typeSizeof(ty);
+
+   if (tySize == 8 &&
+   (file == FILE_MEMORY_CONST || file == FILE_MEMORY_BUFFER || indirect0)) 
{
+  Value *lo = getSSA();
+  Value *hi = getSSA();
+
+  Instruction *loi =
+ mkLoad(TYPE_U32, lo,
+mkSymbol(file, i, TYPE_U32, base + c * tySize),
+indirect0);
+  loi->setIndirect(0, 1, indirect1);
+  loi->perPatch = patch;
+
+  Instruction *hii =
+ mkLoad(TYPE_U32, hi,
+mkSymbol(file, i, TYPE_U32, base + c * tySize + 4),
+indirect0);
+  hii->setIndirect(0, 1, indirect1);
+  hii->perPatch = patch;
+
+  return mkOp2(OP_MERGE, ty, def, lo, hi);
+   } else {
+  Instruction *ld =
+ mkLoad(ty, def, mkSymbol(file, i, ty, base + c * tySize), indirect0);
+  ld->setIndirect(0, 1, indirect1);
+  ld->perPatch = patch;
+  return ld;
+   }
+}
+
+void
+Converter::storeTo(nir_intrinsic_instr *insn, DataFile file, operation op,
+   DataType ty, Value *src, uint8_t idx, uint8_t c,
+   Value *indirect0, Value *indirect1)
+{
+   uint8_t size = typeSizeof(ty);
+   uint32_t address = getSlotAddress(insn, idx, c);
+
+   if (size == 8 && indirect0) {
+  Value *split[2];
+  mkSplit(split, 4, src);
+
+  if (op == OP_EXPORT) {
+ split[0] = mkMov(getSSA(), split[0], ty)->getDef(0);
+ split[1] = mkMov(getSSA(), split[1], ty)->getDef(0);
+  }
+
+  mkStore(op, TYPE_U32, mkSymbol(file, 0, TYPE_U32, address), indirect0,
+  split[0])->perPatch = info->out[idx].patch;
+  mkStore(op, TYPE_U32, mkSymbol(file, 0, TYPE_U32, address + 4), 
indirect0,
+  split[1])->perPatch = info->out[idx].patch;
+   } else {
+  if (op == OP_EXPORT)
+ src = mkMov(getSSA(size), src, ty)->getDef(0);
+  mkStore(op, ty, mkSymbol(file, 0, ty, address), indirect0,
+  src)->perPatch = info->out[idx].patch;
+   }
+}
+
 bool
 Converter::run()
 {
-- 
2.14.3

___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev


[Mesa-dev] [PATCH v7 12/35] nvir/nir: run assignSlots

2018-04-16 Thread Karol Herbst
v2: add support for geometry shaders
set idx
add some missing mappings
fix for 64bit inputs/outputs
fix up some FP color output index messup
parse centroid flag
v3: fix arrays in outputs as well
fix input/ouput size calculation for tessellation shaders
v4: add getSlotAddress helper
fix for 64 bit typed inputs
v5: change getSlotAddress interface for easier use
fix sample inputs
fix slot counting for mat
v7: fix driver_location of images

Signed-off-by: Karol Herbst 
---
 .../drivers/nouveau/codegen/nv50_ir_from_nir.cpp   | 626 +
 1 file changed, 626 insertions(+)

diff --git a/src/gallium/drivers/nouveau/codegen/nv50_ir_from_nir.cpp 
b/src/gallium/drivers/nouveau/codegen/nv50_ir_from_nir.cpp
index 89c55a08ef8..1d1c4526d2b 100644
--- a/src/gallium/drivers/nouveau/codegen/nv50_ir_from_nir.cpp
+++ b/src/gallium/drivers/nouveau/codegen/nv50_ir_from_nir.cpp
@@ -69,6 +69,13 @@ private:
uint32_t getIndirect(nir_src *, uint8_t, Value*&);
uint32_t getIndirect(nir_intrinsic_instr *, uint8_t s, uint8_t c, Value*&);
 
+   uint32_t getSlotAddress(nir_intrinsic_instr *, uint8_t idx, uint8_t slot);
+
+   void setInterpolate(nv50_ir_varying *,
+   decltype(nir_variable().data.interpolation),
+   bool centroid,
+   unsigned semantics);
+
bool isFloatType(nir_alu_type);
bool isSignedType(nir_alu_type);
bool isResultFloat(nir_op);
@@ -81,6 +88,8 @@ private:
std::vector getSTypes(nir_alu_instr*);
DataType getSType(nir_src&, bool isFloat, bool isSigned);
 
+   bool assignSlots();
+
nir_shader *nir;
 
NirDefMap ssaDefs;
@@ -303,6 +312,618 @@ Converter::getIndirect(nir_intrinsic_instr *insn, uint8_t 
s, uint8_t c, Value *&
return idx;
 }
 
+static void
+vert_attrib_to_tgsi_semantic(unsigned slot, unsigned *name, unsigned *index)
+{
+   if (slot >= VERT_ATTRIB_GENERIC0) {
+  *name = TGSI_SEMANTIC_GENERIC;
+  *index = slot - VERT_ATTRIB_GENERIC0;
+  return;
+   }
+
+   if (slot == VERT_ATTRIB_POINT_SIZE) {
+  ERROR("unknown vert attrib slot %u\n", slot);
+  assert(false);
+  return;
+   }
+
+   if (slot >= VERT_ATTRIB_TEX0) {
+  *name = TGSI_SEMANTIC_TEXCOORD;
+  *index = slot - VERT_ATTRIB_TEX0;
+  return;
+   }
+
+   switch (slot) {
+   case VERT_ATTRIB_COLOR0:
+  *name = TGSI_SEMANTIC_COLOR;
+  *index = 0;
+  break;
+   case VERT_ATTRIB_COLOR1:
+  *name = TGSI_SEMANTIC_COLOR;
+  *index = 1;
+  break;
+   case VERT_ATTRIB_EDGEFLAG:
+  *name = TGSI_SEMANTIC_EDGEFLAG;
+  *index = 0;
+  break;
+   case VERT_ATTRIB_FOG:
+  *name = TGSI_SEMANTIC_FOG;
+  *index = 0;
+  break;
+   case VERT_ATTRIB_NORMAL:
+  *name = TGSI_SEMANTIC_NORMAL;
+  *index = 0;
+  break;
+   case VERT_ATTRIB_POS:
+  *name = TGSI_SEMANTIC_POSITION;
+  *index = 0;
+  break;
+   default:
+  ERROR("unknown vert attrib slot %u\n", slot);
+  assert(false);
+  break;
+   }
+}
+
+static void
+varying_slot_to_tgsi_semantic(unsigned slot, unsigned *name, unsigned *index)
+{
+   if (slot >= VARYING_SLOT_PATCH0) {
+  *name = TGSI_SEMANTIC_PATCH;
+  *index = slot - VARYING_SLOT_PATCH0;
+  return;
+   }
+
+   if (slot >= VARYING_SLOT_VAR0) {
+  *name = TGSI_SEMANTIC_GENERIC;
+  *index = slot - VARYING_SLOT_VAR0;
+  return;
+   }
+
+   if (slot >= VARYING_SLOT_TEX0 && slot <= VARYING_SLOT_TEX7) {
+  *name = TGSI_SEMANTIC_TEXCOORD;
+  *index = slot - VARYING_SLOT_TEX0;
+  return;
+   }
+
+   switch (slot) {
+   case VARYING_SLOT_BFC0:
+  *name = TGSI_SEMANTIC_BCOLOR;
+  *index = 0;
+  break;
+   case VARYING_SLOT_BFC1:
+  *name = TGSI_SEMANTIC_BCOLOR;
+  *index = 1;
+  break;
+   case VARYING_SLOT_CLIP_DIST0:
+  *name = TGSI_SEMANTIC_CLIPDIST;
+  *index = 0;
+  break;
+   case VARYING_SLOT_CLIP_DIST1:
+  *name = TGSI_SEMANTIC_CLIPDIST;
+  *index = 1;
+  break;
+   case VARYING_SLOT_CLIP_VERTEX:
+  *name = TGSI_SEMANTIC_CLIPVERTEX;
+  *index = 0;
+  break;
+   case VARYING_SLOT_COL0:
+  *name = TGSI_SEMANTIC_COLOR;
+  *index = 0;
+  break;
+   case VARYING_SLOT_COL1:
+  *name = TGSI_SEMANTIC_COLOR;
+  *index = 1;
+  break;
+   case VARYING_SLOT_EDGE:
+  *name = TGSI_SEMANTIC_EDGEFLAG;
+  *index = 0;
+  break;
+   case VARYING_SLOT_FACE:
+  *name = TGSI_SEMANTIC_FACE;
+  *index = 0;
+  break;
+   case VARYING_SLOT_FOGC:
+  *name = TGSI_SEMANTIC_FOG;
+  *index = 0;
+  break;
+   case VARYING_SLOT_LAYER:
+  *name = TGSI_SEMANTIC_LAYER;
+  *index = 0;
+  break;
+   case VARYING_SLOT_PNTC:
+  *name = TGSI_SEMANTIC_PCOORD;
+  *index = 0;
+  break;
+   case VARYING_SLOT_POS:
+  *name = TGSI_SEMANTIC_POSITION;
+  *index = 0;
+  break;
+   case VARYING_SLOT_PRIMIT

[Mesa-dev] [PATCH v7 15/35] nvir/nir: implement CFG handling

2018-04-16 Thread Karol Herbst
v6: fix loops with blocks at the end nothing points to
skip blocks with no instructions and no predecessors

Signed-off-by: Karol Herbst 
---
 .../drivers/nouveau/codegen/nv50_ir_from_nir.cpp   | 270 -
 1 file changed, 268 insertions(+), 2 deletions(-)

diff --git a/src/gallium/drivers/nouveau/codegen/nv50_ir_from_nir.cpp 
b/src/gallium/drivers/nouveau/codegen/nv50_ir_from_nir.cpp
index 4bb99c6635c..c2512b01d5a 100644
--- a/src/gallium/drivers/nouveau/codegen/nv50_ir_from_nir.cpp
+++ b/src/gallium/drivers/nouveau/codegen/nv50_ir_from_nir.cpp
@@ -55,8 +55,10 @@ private:
typedef decltype(nir_ssa_def().index) NirSSADefIdx;
typedef decltype(nir_ssa_def().bit_size) NirSSADefBitSize;
typedef std::unordered_map NirDefMap;
+   typedef std::unordered_map 
NirBlockMap;
 
LValues& convert(nir_alu_dest *);
+   BasicBlock* convert(nir_block *);
LValues& convert(nir_dest *);
LValues& convert(nir_register *);
LValues& convert(nir_ssa_def *);
@@ -98,15 +100,46 @@ private:
bool assignSlots();
bool parseNIR();
 
+   bool visit(nir_block *);
+   bool visit(nir_cf_node *);
+   bool visit(nir_function *);
+   bool visit(nir_if *);
+   bool visit(nir_instr *);
+   bool visit(nir_jump_instr *);
+   bool visit(nir_loop *);
+
nir_shader *nir;
 
NirDefMap ssaDefs;
NirDefMap regDefs;
+   NirBlockMap blocks;
+   unsigned int curLoopDepth;
+
+   BasicBlock *exit;
+
+   union {
+  struct {
+ Value *position;
+  } fp;
+   };
 };
 
 Converter::Converter(Program *prog, nir_shader *nir, nv50_ir_prog_info *info)
: ConverterCommon(prog, info),
- nir(nir) {}
+ nir(nir),
+ curLoopDepth(0) {}
+
+BasicBlock *
+Converter::convert(nir_block *block)
+{
+   NirBlockMap::iterator it = blocks.find(block->index);
+   if (it != blocks.end())
+  return (*it).second;
+
+   BasicBlock *bb = new BasicBlock(func);
+   blocks[block->index] = bb;
+   return bb;
+}
 
 bool
 Converter::isFloatType(nir_alu_type type)
@@ -1051,6 +1084,234 @@ Converter::parseNIR()
return true;
 }
 
+bool
+Converter::visit(nir_function *function)
+{
+   /* we only support emiting the main function for now */
+   assert(!strcmp(function->name, "main"));
+   assert(function->impl);
+
+   /* usually the blocks will set everything up, but main is special */
+   BasicBlock *entry = new BasicBlock(prog->main);
+   exit = new BasicBlock(prog->main);
+   blocks[nir_start_block(function->impl)->index] = entry;
+   prog->main->setEntry(entry);
+   prog->main->setExit(exit);
+
+   setPosition(entry, true);
+
+   switch (prog->getType()) {
+   case Program::TYPE_TESSELLATION_CONTROL:
+  outBase = mkOp2v(
+ OP_SUB, TYPE_U32, getSSA(),
+ mkOp1v(OP_RDSV, TYPE_U32, getSSA(), mkSysVal(SV_LANEID, 0)),
+ mkOp1v(OP_RDSV, TYPE_U32, getSSA(), mkSysVal(SV_INVOCATION_ID, 0)));
+  break;
+   case Program::TYPE_FRAGMENT: {
+  Symbol *sv = mkSysVal(SV_POSITION, 3);
+  fragCoord[3] = mkOp1v(OP_RDSV, TYPE_F32, getSSA(), sv);
+  fp.position = mkOp1v(OP_RCP, TYPE_F32, fragCoord[3], fragCoord[3]);
+  break;
+   }
+   default:
+  break;
+   }
+
+   nir_index_ssa_defs(function->impl);
+   foreach_list_typed(nir_cf_node, node, node, &function->impl->body) {
+  if (!visit(node))
+ return false;
+   }
+
+   bb->cfg.attach(&exit->cfg, Graph::Edge::TREE);
+   setPosition(exit, true);
+
+   /* TODO: for non main function this needs to be a OP_RETURN */
+   mkOp(OP_EXIT, TYPE_NONE, NULL)->terminator = 1;
+   return true;
+}
+
+bool
+Converter::visit(nir_cf_node *node)
+{
+   switch (node->type) {
+   case nir_cf_node_block:
+  if (!visit(nir_cf_node_as_block(node)))
+ return false;
+  break;
+   case nir_cf_node_if:
+  if (!visit(nir_cf_node_as_if(node)))
+ return false;
+  break;
+   case nir_cf_node_loop:
+  if (!visit(nir_cf_node_as_loop(node)))
+ return false;
+  break;
+   default:
+  ERROR("unknown nir_cf_node type %u\n", node->type);
+  return false;
+   }
+   return true;
+}
+
+bool
+Converter::visit(nir_block *block)
+{
+   if (!block->predecessors->entries && block->instr_list.is_empty())
+  return true;
+
+   BasicBlock *bb = convert(block);
+
+   setPosition(bb, true);
+   nir_foreach_instr(insn, block) {
+  if (!visit(insn))
+ return false;
+   }
+   return true;
+}
+
+bool
+Converter::visit(nir_if *nif)
+{
+   DataType sType = getSType(nif->condition, false, false);
+   Value *src = getSrc(&nif->condition, 0);
+
+   nir_block *lastThen = nir_if_last_then_block(nif);
+   nir_block *lastElse = nir_if_last_else_block(nif);
+
+   assert(!lastThen->successors[1]);
+   assert(!lastElse->successors[1]);
+
+   BasicBlock *ifBB = convert(nir_if_first_then_block(nif));
+   BasicBlock *elseBB = convert(nir_if_first_else_block(nif));
+
+ 

[Mesa-dev] [PATCH v7 16/35] nvir/nir: implement nir_load_const_instr

2018-04-16 Thread Karol Herbst
Signed-off-by: Karol Herbst 
---
 .../drivers/nouveau/codegen/nv50_ir_from_nir.cpp   | 18 ++
 1 file changed, 18 insertions(+)

diff --git a/src/gallium/drivers/nouveau/codegen/nv50_ir_from_nir.cpp 
b/src/gallium/drivers/nouveau/codegen/nv50_ir_from_nir.cpp
index c2512b01d5a..f4f844021a2 100644
--- a/src/gallium/drivers/nouveau/codegen/nv50_ir_from_nir.cpp
+++ b/src/gallium/drivers/nouveau/codegen/nv50_ir_from_nir.cpp
@@ -106,6 +106,7 @@ private:
bool visit(nir_if *);
bool visit(nir_instr *);
bool visit(nir_jump_instr *);
+   bool visit(nir_load_const_instr*);
bool visit(nir_loop *);
 
nir_shader *nir;
@@ -1278,6 +1279,8 @@ Converter::visit(nir_instr *insn)
switch (insn->type) {
case nir_instr_type_jump:
   return visit(nir_instr_as_jump(insn));
+   case nir_instr_type_load_const:
+  return visit(nir_instr_as_load_const(insn));
default:
   ERROR("unknown nir_instr type %u\n", insn->type);
   return false;
@@ -1312,6 +1315,21 @@ Converter::visit(nir_jump_instr *insn)
return true;
 }
 
+bool
+Converter::visit(nir_load_const_instr *insn)
+{
+   assert(insn->def.bit_size <= 64);
+
+   LValues &newDefs = convert(&insn->def);
+   for (int i = 0; i < insn->def.num_components; i++) {
+  if (insn->def.bit_size > 32)
+ loadImm(newDefs[i], insn->value.u64[i]);
+  else
+ loadImm(newDefs[i], insn->value.u32[i]);
+   }
+   return true;
+}
+
 bool
 Converter::run()
 {
-- 
2.14.3

___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev


[Mesa-dev] [PATCH v7 14/35] nvir/nir: parse NIR shader info

2018-04-16 Thread Karol Herbst
v2: parse a few more fields
v3: add special handling for GL_ISOLINES

Signed-off-by: Karol Herbst 
---
 .../drivers/nouveau/codegen/nv50_ir_from_nir.cpp   | 60 ++
 1 file changed, 60 insertions(+)

diff --git a/src/gallium/drivers/nouveau/codegen/nv50_ir_from_nir.cpp 
b/src/gallium/drivers/nouveau/codegen/nv50_ir_from_nir.cpp
index 00ca1ae1512..4bb99c6635c 100644
--- a/src/gallium/drivers/nouveau/codegen/nv50_ir_from_nir.cpp
+++ b/src/gallium/drivers/nouveau/codegen/nv50_ir_from_nir.cpp
@@ -96,6 +96,7 @@ private:
DataType getSType(nir_src&, bool isFloat, bool isSigned);
 
bool assignSlots();
+   bool parseNIR();
 
nir_shader *nir;
 
@@ -996,6 +997,60 @@ Converter::storeTo(nir_intrinsic_instr *insn, DataFile 
file, operation op,
}
 }
 
+bool
+Converter::parseNIR()
+{
+   info->io.clipDistances = nir->info.clip_distance_array_size;
+   info->io.cullDistances = nir->info.cull_distance_array_size;
+
+   switch(prog->getType()) {
+   case Program::TYPE_COMPUTE:
+  info->prop.cp.numThreads[0] = nir->info.cs.local_size[0];
+  info->prop.cp.numThreads[1] = nir->info.cs.local_size[1];
+  info->prop.cp.numThreads[2] = nir->info.cs.local_size[2];
+  info->bin.smemSize = nir->info.cs.shared_size;
+  break;
+   case Program::TYPE_FRAGMENT:
+  info->prop.fp.earlyFragTests = nir->info.fs.early_fragment_tests;
+  info->prop.fp.persampleInvocation =
+ (nir->info.system_values_read & SYSTEM_BIT_SAMPLE_ID) ||
+ (nir->info.system_values_read & SYSTEM_BIT_SAMPLE_POS);
+  info->prop.fp.postDepthCoverage = nir->info.fs.post_depth_coverage;
+  info->prop.fp.usesDiscard = nir->info.fs.uses_discard;
+  info->prop.fp.usesSampleMaskIn =
+ !!(nir->info.system_values_read & SYSTEM_BIT_SAMPLE_MASK_IN);
+  break;
+   case Program::TYPE_GEOMETRY:
+  info->prop.gp.inputPrim = nir->info.gs.input_primitive;
+  info->prop.gp.instanceCount = nir->info.gs.invocations;
+  info->prop.gp.maxVertices = nir->info.gs.vertices_out;
+  info->prop.gp.outputPrim = nir->info.gs.output_primitive;
+  break;
+   case Program::TYPE_TESSELLATION_CONTROL:
+   case Program::TYPE_TESSELLATION_EVAL:
+  if (nir->info.tess.primitive_mode == GL_ISOLINES)
+ info->prop.tp.domain = GL_LINES;
+  else
+ info->prop.tp.domain = nir->info.tess.primitive_mode;
+  info->prop.tp.outputPatchSize = nir->info.tess.tcs_vertices_out;
+  info->prop.tp.outputPrim =
+ nir->info.tess.point_mode ? PIPE_PRIM_POINTS : PIPE_PRIM_TRIANGLES;
+  info->prop.tp.partitioning = (nir->info.tess.spacing + 1) % 3;
+  info->prop.tp.winding = !nir->info.tess.ccw;
+  break;
+   case Program::TYPE_VERTEX:
+  info->prop.vp.usesDrawParameters =
+ (nir->info.system_values_read & 
BITFIELD64_BIT(SYSTEM_VALUE_BASE_VERTEX)) ||
+ (nir->info.system_values_read & 
BITFIELD64_BIT(SYSTEM_VALUE_BASE_INSTANCE)) ||
+ (nir->info.system_values_read & BITFIELD64_BIT(SYSTEM_VALUE_DRAW_ID));
+  break;
+   default:
+  break;
+   }
+
+   return true;
+}
+
 bool
 Converter::run()
 {
@@ -1029,6 +1084,11 @@ Converter::run()
/* Garbage collect dead instructions */
nir_sweep(nir);
 
+   if (!parseNIR()) {
+  ERROR("Couldn't prase NIR!\n");
+  return false;
+   }
+
if (!assignSlots()) {
   ERROR("Couldn't assign slots!\n");
   return false;
-- 
2.14.3

___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev


[Mesa-dev] [PATCH v7 09/35] nvir/nir: run some passes to make the conversion easier

2018-04-16 Thread Karol Herbst
v2: add constant_folding
v6: print non final NIR only for verbose debugging

Signed-off-by: Karol Herbst 
---
 .../drivers/nouveau/codegen/nv50_ir_from_nir.cpp   | 39 ++
 1 file changed, 39 insertions(+)

diff --git a/src/gallium/drivers/nouveau/codegen/nv50_ir_from_nir.cpp 
b/src/gallium/drivers/nouveau/codegen/nv50_ir_from_nir.cpp
index b22c62fd434..0b7a5981f73 100644
--- a/src/gallium/drivers/nouveau/codegen/nv50_ir_from_nir.cpp
+++ b/src/gallium/drivers/nouveau/codegen/nv50_ir_from_nir.cpp
@@ -31,6 +31,12 @@
 #include "codegen/nv50_ir_lowering_helper.h"
 #include "codegen/nv50_ir_util.h"
 
+static int
+type_size(const struct glsl_type *type)
+{
+   return glsl_count_attribute_slots(type, false);
+}
+
 namespace {
 
 using namespace nv50_ir;
@@ -52,6 +58,39 @@ Converter::Converter(Program *prog, nir_shader *nir, 
nv50_ir_prog_info *info)
 bool
 Converter::run()
 {
+   bool progress;
+
+   if (prog->dbgFlags & NV50_IR_DEBUG_VERBOSE)
+  nir_print_shader(nir, stderr);
+
+   NIR_PASS_V(nir, nir_lower_io, nir_var_all, type_size, 
(nir_lower_io_options)0);
+   NIR_PASS_V(nir, nir_lower_regs_to_ssa);
+   NIR_PASS_V(nir, nir_lower_load_const_to_scalar);
+   NIR_PASS_V(nir, nir_lower_vars_to_ssa);
+   NIR_PASS_V(nir, nir_lower_alu_to_scalar);
+   NIR_PASS_V(nir, nir_lower_phis_to_scalar);
+
+   do {
+  progress = false;
+  /* some ops depend on having constants as sources, but those can also
+   * point to expressions made from constants like 0 + 1
+   */
+  NIR_PASS(progress, nir, nir_opt_constant_folding);
+  NIR_PASS(progress, nir, nir_copy_prop);
+  NIR_PASS(progress, nir, nir_opt_dce);
+  NIR_PASS(progress, nir, nir_opt_dead_cf);
+   } while (progress);
+
+   NIR_PASS_V(nir, nir_lower_locals_to_regs);
+   NIR_PASS_V(nir, nir_remove_dead_variables, nir_var_local);
+   NIR_PASS_V(nir, nir_convert_from_ssa, true);
+
+   /* Garbage collect dead instructions */
+   nir_sweep(nir);
+
+   if (prog->dbgFlags & NV50_IR_DEBUG_BASIC)
+  nir_print_shader(nir, stderr);
+
return false;
 }
 
-- 
2.14.3

___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev


[Mesa-dev] [PATCH v7 21/35] nvir/nir: implement load_(interpolated_)input/output

2018-04-16 Thread Karol Herbst
v3: and load_output
v4: use smarter getIndirect helper
use new getSlotAddress helper
v5: don't use const_offset directly
fix for indirects
v6: add support for interpolateAt
v7: fix compiler warnings
add load_barycentric_sample
handle load_output for fragment shaders

Signed-off-by: Karol Herbst 
---
 .../drivers/nouveau/codegen/nv50_ir_from_nir.cpp   | 134 +
 1 file changed, 134 insertions(+)

diff --git a/src/gallium/drivers/nouveau/codegen/nv50_ir_from_nir.cpp 
b/src/gallium/drivers/nouveau/codegen/nv50_ir_from_nir.cpp
index b34fe7739d8..740dee5c95a 100644
--- a/src/gallium/drivers/nouveau/codegen/nv50_ir_from_nir.cpp
+++ b/src/gallium/drivers/nouveau/codegen/nv50_ir_from_nir.cpp
@@ -1523,6 +1523,140 @@ Converter::visit(nir_intrinsic_instr *insn)
   }
   break;
}
+   case nir_intrinsic_load_input:
+   case nir_intrinsic_load_interpolated_input:
+   case nir_intrinsic_load_output: {
+  LValues &newDefs = convert(&insn->dest);
+
+  /* FBFetch */
+  if (prog->getType() == Program::TYPE_FRAGMENT &&
+  op == nir_intrinsic_load_output) {
+ std::vector defs, srcs;
+ uint8_t mask = 0;
+
+ srcs.push_back(getSSA());
+ srcs.push_back(getSSA());
+ Value *x = mkOp1v(OP_RDSV, TYPE_F32, getSSA(), mkSysVal(SV_POSITION, 
0));
+ Value *y = mkOp1v(OP_RDSV, TYPE_F32, getSSA(), mkSysVal(SV_POSITION, 
1));
+ mkCvt(OP_CVT, TYPE_U32, srcs[0], TYPE_F32, x)->rnd = ROUND_Z;
+ mkCvt(OP_CVT, TYPE_U32, srcs[1], TYPE_F32, y)->rnd = ROUND_Z;
+
+ srcs.push_back(mkOp1v(OP_RDSV, TYPE_U32, getSSA(), mkSysVal(SV_LAYER, 
0)));
+ srcs.push_back(mkOp1v(OP_RDSV, TYPE_U32, getSSA(), 
mkSysVal(SV_SAMPLE_INDEX, 0)));
+
+ for (auto i = 0u; i < insn->num_components; ++i) {
+defs.push_back(newDefs[i]);
+mask |= 1 << i;
+ }
+
+ TexInstruction *texi = mkTex(OP_TXF, TEX_TARGET_2D_MS_ARRAY, 0, 0, 
defs, srcs);
+ texi->tex.levelZero = 1;
+ texi->tex.mask = mask;
+ texi->tex.useOffsets = 0;
+ texi->tex.r = 0x;
+ texi->tex.s = 0x;
+
+ info->prop.fp.readsFramebuffer = true;
+ break;
+  }
+
+  const DataType dType = getDType(insn);
+  Value *indirect;
+  bool input = op != nir_intrinsic_load_output;
+  operation nvirOp;
+  uint32_t mode = 0;
+
+  auto idx = getIndirect(insn, op == nir_intrinsic_load_interpolated_input 
? 1 : 0, 0, indirect);
+  nv50_ir_varying& vary = input ? info->in[idx] : info->out[idx];
+
+  /* see load_barycentric_* handling */
+  if (prog->getType() == Program::TYPE_FRAGMENT) {
+ mode = translateInterpMode(&vary, nvirOp);
+ if (op == nir_intrinsic_load_interpolated_input) {
+ImmediateValue immMode;
+if (getSrc(&insn->src[0], 
1)->getUniqueInsn()->src(0).getImmediate(immMode))
+   mode |= immMode.reg.data.u32;
+ }
+  }
+
+  for (auto i = 0u; i < insn->num_components; ++i) {
+ uint32_t address = getSlotAddress(insn, idx, i);
+ Symbol *sym = mkSymbol(input ? FILE_SHADER_INPUT : 
FILE_SHADER_OUTPUT, 0, dType, address);
+ if (prog->getType() == Program::TYPE_FRAGMENT) {
+int s = 1;
+if (typeSizeof(dType) == 8) {
+   Value *lo = getSSA();
+   Value *hi = getSSA();
+   Instruction *interp;
+
+   interp = mkOp1(nvirOp, TYPE_U32, lo, sym);
+   if (nvirOp == OP_PINTERP)
+  interp->setSrc(s, fp.position);
+   if (mode & NV50_IR_INTERP_OFFSET)
+  interp->setSrc(s, getSrc(&insn->src[0], 0));
+   interp->setInterpolate(mode);
+   interp->setIndirect(0, 0, indirect);
+
+   Symbol *sym1 = mkSymbol(input ? FILE_SHADER_INPUT : 
FILE_SHADER_OUTPUT, 0, dType, address + 4);
+   interp = mkOp1(nvirOp, TYPE_U32, hi, sym1);
+   if (nvirOp == OP_PINTERP)
+  interp->setSrc(s++, fp.position);
+   if (mode & NV50_IR_INTERP_OFFSET)
+  interp->setSrc(s++, getSrc(&insn->src[0], 0));
+   interp->setInterpolate(mode);
+   interp->setIndirect(0, 0, indirect);
+
+   mkOp2(OP_MERGE, dType, newDefs[i], lo, hi);
+} else {
+   Instruction *interp = mkOp1(nvirOp, dType, newDefs[i], sym);
+   if (nvirOp == OP_PINTERP)
+  interp->setSrc(s++, fp.position);
+   if (mode & NV50_IR_INTERP_OFFSET)
+  interp->setSrc(s++, getSrc(&insn->src[0], 0));
+   interp->setInterpolate(mode);
+   interp->setIndirect(0, 0, indirect);
+}
+ } else {
+  

[Mesa-dev] [PATCH v7 25/35] nvir/nir: implement nir_instr_type_tex

2018-04-16 Thread Karol Herbst
a lot of those fields are not valid for a lot of tex ops. Not quite sure if
it's worth the effort to check for those or just keep it like that. It seems
to kind of work.

v2: reworked offset handling
add tex support with indirect R/S arguments
handle GLSL_SAMPLER_DIM_EXTERNAL
drop reference in convert(glsl_sampler_dim&, bool, bool)
fix tg4 component selection
v5: fill up coords args with scratch values if coords provided is less than 
TexTarget.getArgCount()
v7: prepare for bindless_texture support

Signed-off-by: Karol Herbst 
---
 .../drivers/nouveau/codegen/nv50_ir_from_nir.cpp   | 232 +
 1 file changed, 232 insertions(+)

diff --git a/src/gallium/drivers/nouveau/codegen/nv50_ir_from_nir.cpp 
b/src/gallium/drivers/nouveau/codegen/nv50_ir_from_nir.cpp
index badb2398abb..553f74f7a75 100644
--- a/src/gallium/drivers/nouveau/codegen/nv50_ir_from_nir.cpp
+++ b/src/gallium/drivers/nouveau/codegen/nv50_ir_from_nir.cpp
@@ -82,6 +82,7 @@ private:
typedef std::unordered_map NirDefMap;
typedef std::unordered_map 
NirBlockMap;
 
+   TexTarget convert(glsl_sampler_dim, bool isArray, bool isShadow);
LValues& convert(nir_alu_dest *);
BasicBlock* convert(nir_block *);
LValues& convert(nir_dest *);
@@ -124,6 +125,7 @@ private:
DataType getSType(nir_src&, bool isFloat, bool isSigned);
 
operation getOperation(nir_op);
+   operation getOperation(nir_texop);
operation preOperationNeeded(nir_op);
 
int getSubOp(nir_op);
@@ -144,6 +146,10 @@ private:
bool visit(nir_load_const_instr*);
bool visit(nir_loop *);
bool visit(nir_ssa_undef_instr *);
+   bool visit(nir_tex_instr *);
+
+   /* tex stuff */
+   Value* applyProjection(Value *src, Value *proj);
 
nir_shader *nir;
 
@@ -369,6 +375,36 @@ Converter::getOperation(nir_op op)
}
 }
 
+operation
+Converter::getOperation(nir_texop op)
+{
+   switch (op) {
+   case nir_texop_tex:
+  return OP_TEX;
+   case nir_texop_lod:
+  return OP_TXLQ;
+   case nir_texop_txb:
+  return OP_TXB;
+   case nir_texop_txd:
+  return OP_TXD;
+   case nir_texop_txf:
+   case nir_texop_txf_ms:
+  return OP_TXF;
+   case nir_texop_tg4:
+  return OP_TXG;
+   case nir_texop_txl:
+  return OP_TXL;
+   case nir_texop_query_levels:
+   case nir_texop_texture_samples:
+   case nir_texop_txs:
+  return OP_TXQ;
+   default:
+  ERROR("couldn't get operation for nir_texop %u\n", op);
+  assert(false);
+  return OP_NOP;
+   }
+}
+
 operation
 Converter::preOperationNeeded(nir_op op)
 {
@@ -1466,6 +1502,8 @@ Converter::visit(nir_instr *insn)
   return visit(nir_instr_as_load_const(insn));
case nir_instr_type_ssa_undef:
   return visit(nir_instr_as_ssa_undef(insn));
+   case nir_instr_type_tex:
+  return visit(nir_instr_as_tex(insn));
default:
   ERROR("unknown nir_instr type %u\n", insn->type);
   return false;
@@ -2173,6 +2211,200 @@ Converter::visit(nir_ssa_undef_instr *insn)
return true;
 }
 
+#define CASE_SAMPLER(ty) \
+   case GLSL_SAMPLER_DIM_ ## ty : \
+  if (isArray && !isShadow) \
+ return TEX_TARGET_ ## ty ## _ARRAY; \
+  else if (!isArray && isShadow) \
+ return TEX_TARGET_## ty ## _SHADOW; \
+  else if (isArray && isShadow) \
+ return TEX_TARGET_## ty ## _ARRAY_SHADOW; \
+  else \
+ return TEX_TARGET_ ## ty
+
+TexTarget
+Converter::convert(glsl_sampler_dim dim, bool isArray, bool isShadow)
+{
+   switch (dim) {
+   CASE_SAMPLER(1D);
+   CASE_SAMPLER(2D);
+   CASE_SAMPLER(CUBE);
+   case GLSL_SAMPLER_DIM_3D:
+  return TEX_TARGET_3D;
+   case GLSL_SAMPLER_DIM_MS:
+  if (isArray)
+ return TEX_TARGET_2D_MS_ARRAY;
+  return TEX_TARGET_2D_MS;
+   case GLSL_SAMPLER_DIM_RECT:
+  if (isShadow)
+ return TEX_TARGET_RECT_SHADOW;
+  return TEX_TARGET_RECT;
+   case GLSL_SAMPLER_DIM_BUF:
+  return TEX_TARGET_BUFFER;
+   case GLSL_SAMPLER_DIM_EXTERNAL:
+  return TEX_TARGET_2D;
+   default:
+  ERROR("unknown glsl_sampler_dim %u\n", dim);
+  assert(false);
+  return TEX_TARGET_COUNT;
+   }
+}
+#undef CASE_SAMPLER
+
+Value*
+Converter::applyProjection(Value *src, Value *proj)
+{
+   if (!proj)
+  return src;
+   return mkOp2v(OP_MUL, TYPE_F32, getScratch(), src, proj);
+}
+
+bool
+Converter::visit(nir_tex_instr *insn)
+{
+   switch (insn->op) {
+   case nir_texop_lod:
+   case nir_texop_query_levels:
+   case nir_texop_tex:
+   case nir_texop_texture_samples:
+   case nir_texop_tg4:
+   case nir_texop_txb:
+   case nir_texop_txd:
+   case nir_texop_txf:
+   case nir_texop_txf_ms:
+   case nir_texop_txl:
+   case nir_texop_txs: {
+  LValues &newDefs = convert(&insn->dest);
+  std::vector srcs;
+  std::vector defs;
+  std::vector offsets;
+  uint8_t mask = 0;
+  bool lz = false;
+  Value *proj = nullptr;
+  TexInstruction::

[Mesa-dev] [PATCH v7 22/35] nvir/nir: implement intrinsic_discard(_if)

2018-04-16 Thread Karol Herbst
Signed-off-by: Karol Herbst 
---
 src/gallium/drivers/nouveau/codegen/nv50_ir_from_nir.cpp | 14 ++
 1 file changed, 14 insertions(+)

diff --git a/src/gallium/drivers/nouveau/codegen/nv50_ir_from_nir.cpp 
b/src/gallium/drivers/nouveau/codegen/nv50_ir_from_nir.cpp
index 740dee5c95a..40bc00d7e14 100644
--- a/src/gallium/drivers/nouveau/codegen/nv50_ir_from_nir.cpp
+++ b/src/gallium/drivers/nouveau/codegen/nv50_ir_from_nir.cpp
@@ -1657,6 +1657,20 @@ Converter::visit(nir_intrinsic_instr *insn)
   loadImm(newDefs[1], mode);
   break;
}
+   case nir_intrinsic_discard:
+  mkOp(OP_DISCARD, TYPE_NONE, NULL);
+  break;
+   case nir_intrinsic_discard_if: {
+  Value *pred = new_LValue(func, FILE_PREDICATE);
+  if (insn->num_components > 1) {
+ ERROR("nir_intrinsic_discard_if only with 1 component supported!\n");
+ assert(false);
+ return false;
+  }
+  mkCmp(OP_SET, CC_NE, TYPE_U8, pred, TYPE_U32, getSrc(&insn->src[0], 0), 
zero);
+  mkOp(OP_DISCARD, TYPE_NONE, NULL)->setPredicate(CC_P, pred);
+  break;
+   }
default:
   ERROR("unknown nir_intrinsic_op %s\n", nir_intrinsic_infos[op].name);
   return false;
-- 
2.14.3

___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev


[Mesa-dev] [PATCH v7 19/35] nvir/nir: implement nir_intrinsic_load_uniform

2018-04-16 Thread Karol Herbst
v2: use new getIndirect helper
fixes symbols for 64 bit types
v4: use smarter getIndirect helper
simplify address calculation
use loadFrom helper

Signed-off-by: Karol Herbst 
---
 src/gallium/drivers/nouveau/codegen/nv50_ir_from_nir.cpp | 10 ++
 1 file changed, 10 insertions(+)

diff --git a/src/gallium/drivers/nouveau/codegen/nv50_ir_from_nir.cpp 
b/src/gallium/drivers/nouveau/codegen/nv50_ir_from_nir.cpp
index 8368bbcc015..8a83a885889 100644
--- a/src/gallium/drivers/nouveau/codegen/nv50_ir_from_nir.cpp
+++ b/src/gallium/drivers/nouveau/codegen/nv50_ir_from_nir.cpp
@@ -1467,6 +1467,16 @@ Converter::visit(nir_intrinsic_instr *insn)
nir_intrinsic_op op = insn->intrinsic;
 
switch (op) {
+   case nir_intrinsic_load_uniform: {
+  LValues &newDefs = convert(&insn->dest);
+  const DataType dType = getDType(insn);
+  Value *indirect;
+  auto coffset = getIndirect(insn, 0, 0, indirect);
+  for (auto i = 0; i < insn->num_components; ++i) {
+ loadFrom(FILE_MEMORY_CONST, 0, dType, newDefs[i], 16 * coffset, i, 
indirect);
+  }
+  break;
+   }
default:
   ERROR("unknown nir_intrinsic_op %s\n", nir_intrinsic_infos[op].name);
   return false;
-- 
2.14.3

___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev


[Mesa-dev] [PATCH v7 31/35] nvir/nir: implement ssbo intrinsics

2018-04-16 Thread Karol Herbst
v4: use loadFrom helper
v5: support indirect buffer access

Signed-off-by: Karol Herbst 
---
 .../drivers/nouveau/codegen/nv50_ir_from_nir.cpp   | 90 ++
 1 file changed, 90 insertions(+)

diff --git a/src/gallium/drivers/nouveau/codegen/nv50_ir_from_nir.cpp 
b/src/gallium/drivers/nouveau/codegen/nv50_ir_from_nir.cpp
index d65754ec4f6..24c70d0c5ae 100644
--- a/src/gallium/drivers/nouveau/codegen/nv50_ir_from_nir.cpp
+++ b/src/gallium/drivers/nouveau/codegen/nv50_ir_from_nir.cpp
@@ -450,6 +450,24 @@ int
 Converter::getSubOp(nir_intrinsic_op op)
 {
switch (op) {
+   case nir_intrinsic_ssbo_atomic_add:
+  return NV50_IR_SUBOP_ATOM_ADD;
+   case nir_intrinsic_ssbo_atomic_and:
+  return NV50_IR_SUBOP_ATOM_AND;
+   case nir_intrinsic_ssbo_atomic_comp_swap:
+  return NV50_IR_SUBOP_ATOM_CAS;
+   case nir_intrinsic_ssbo_atomic_exchange:
+  return NV50_IR_SUBOP_ATOM_EXCH;
+   case nir_intrinsic_ssbo_atomic_or:
+  return NV50_IR_SUBOP_ATOM_OR;
+   case nir_intrinsic_ssbo_atomic_imax:
+   case nir_intrinsic_ssbo_atomic_umax:
+  return NV50_IR_SUBOP_ATOM_MAX;
+   case nir_intrinsic_ssbo_atomic_imin:
+   case nir_intrinsic_ssbo_atomic_umin:
+  return NV50_IR_SUBOP_ATOM_MIN;
+   case nir_intrinsic_ssbo_atomic_xor:
+  return NV50_IR_SUBOP_ATOM_XOR;
case nir_intrinsic_vote_all:
   return NV50_IR_SUBOP_VOTE_ALL;
case nir_intrinsic_vote_any:
@@ -1952,6 +1970,78 @@ Converter::visit(nir_intrinsic_instr *insn)
   }
   break;
}
+   case nir_intrinsic_get_buffer_size: {
+  LValues &newDefs = convert(&insn->dest);
+  const DataType dType = getDType(insn);
+  Value *indirectBuffer;
+  uint32_t buffer = getIndirect(&insn->src[0], 0, indirectBuffer);
+
+  Symbol *sym = mkSymbol(FILE_MEMORY_BUFFER, buffer, dType, 0);
+  mkOp1(OP_BUFQ, dType, newDefs[0], sym)->setIndirect(0, 0, 
indirectBuffer);
+  break;
+   }
+   case nir_intrinsic_store_ssbo: {
+  DataType sType = getSType(insn->src[0], false, false);
+  Value *indirectBuffer;
+  Value *indirectOffset;
+  uint32_t buffer = getIndirect(&insn->src[1], 0, indirectBuffer);
+  uint32_t offset = getIndirect(&insn->src[2], 0, indirectOffset);
+
+  for (auto i = 0u; i < insn->num_components; ++i) {
+ if (!((1u << i) & nir_intrinsic_write_mask(insn)))
+continue;
+ Symbol *sym = mkSymbol(FILE_MEMORY_BUFFER, buffer, sType,
+offset + i * typeSizeof(sType));
+ mkStore(OP_STORE, sType, sym, indirectOffset, getSrc(&insn->src[0], 
i))
+->setIndirect(0, 1, indirectBuffer);
+  }
+  info->io.globalAccess |= 0x2;
+  break;
+   }
+   case nir_intrinsic_load_ssbo: {
+  const DataType dType = getDType(insn);
+  LValues &newDefs = convert(&insn->dest);
+  Value *indirectBuffer;
+  Value *indirectOffset;
+  uint32_t buffer = getIndirect(&insn->src[0], 0, indirectBuffer);
+  uint32_t offset = getIndirect(&insn->src[1], 0, indirectOffset);
+
+  for (auto i = 0u; i < insn->num_components; ++i)
+ loadFrom(FILE_MEMORY_BUFFER, buffer, dType, newDefs[i], offset, i,
+  indirectOffset, indirectBuffer);
+
+  info->io.globalAccess |= 0x1;
+  break;
+   }
+   case nir_intrinsic_ssbo_atomic_add:
+   case nir_intrinsic_ssbo_atomic_and:
+   case nir_intrinsic_ssbo_atomic_comp_swap:
+   case nir_intrinsic_ssbo_atomic_exchange:
+   case nir_intrinsic_ssbo_atomic_or:
+   case nir_intrinsic_ssbo_atomic_imax:
+   case nir_intrinsic_ssbo_atomic_imin:
+   case nir_intrinsic_ssbo_atomic_umax:
+   case nir_intrinsic_ssbo_atomic_umin:
+   case nir_intrinsic_ssbo_atomic_xor: {
+  const DataType dType = getDType(insn);
+  LValues &newDefs = convert(&insn->dest);
+  Value *indirectBuffer;
+  Value *indirectOffset;
+  uint32_t buffer = getIndirect(&insn->src[0], 0, indirectBuffer);
+  uint32_t offset = getIndirect(&insn->src[1], 0, indirectOffset);
+
+  Symbol *sym = mkSymbol(FILE_MEMORY_BUFFER, buffer, dType, offset);
+  Instruction *atom = mkOp2(OP_ATOM, dType, newDefs[0], sym,
+getSrc(&insn->src[2], 0));
+  if (op == nir_intrinsic_ssbo_atomic_comp_swap)
+ atom->setSrc(2, getSrc(&insn->src[3], 0));
+  atom->setIndirect(0, 0, indirectOffset);
+  atom->setIndirect(0, 1, indirectBuffer);
+  atom->subOp = getSubOp(op);
+
+  info->io.globalAccess |= 0x2;
+  break;
+   }
default:
   ERROR("unknown nir_intrinsic_op %s\n", nir_intrinsic_infos[op].name);
   return false;
-- 
2.14.3

___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev


[Mesa-dev] [PATCH v7 34/35] nvir/nir: implement load_per_vertex_output

2018-04-16 Thread Karol Herbst
v4: use smarter getIndirect helper
use new getSlotAddress helper
v5: use loadFrom helper

Signed-off-by: Karol Herbst 
---
 .../drivers/nouveau/codegen/nv50_ir_from_nir.cpp   | 23 ++
 1 file changed, 23 insertions(+)

diff --git a/src/gallium/drivers/nouveau/codegen/nv50_ir_from_nir.cpp 
b/src/gallium/drivers/nouveau/codegen/nv50_ir_from_nir.cpp
index e5d85940ca7..dd73acb2366 100644
--- a/src/gallium/drivers/nouveau/codegen/nv50_ir_from_nir.cpp
+++ b/src/gallium/drivers/nouveau/codegen/nv50_ir_from_nir.cpp
@@ -2069,6 +2069,29 @@ Converter::visit(nir_intrinsic_instr *insn)
   }
   break;
}
+   case nir_intrinsic_load_per_vertex_output: {
+  const DataType dType = getDType(insn);
+  LValues &newDefs = convert(&insn->dest);
+  Value *indirectVertex;
+  Value *indirectOffset;
+  auto baseVertex = getIndirect(&insn->src[0], 0, indirectVertex);
+  auto idx = getIndirect(insn, 1, 0, indirectOffset);
+  Value *vtxBase = nullptr;
+
+  if (indirectVertex)
+ vtxBase = indirectVertex;
+  else
+ vtxBase = loadImm(nullptr, baseVertex);
+
+  vtxBase = mkOp2v(OP_ADD, TYPE_U32, getSSA(4, FILE_ADDRESS), outBase, 
vtxBase);
+
+  for (auto i = 0u; i < insn->num_components; ++i) {
+ uint32_t address = getSlotAddress(insn, idx, i);
+ loadFrom(FILE_SHADER_OUTPUT, 0, dType, newDefs[i], address, 0,
+  indirectOffset, vtxBase, info->in[idx].patch);
+  }
+  break;
+   }
case nir_intrinsic_emit_vertex:
case nir_intrinsic_end_primitive: {
   auto idx = nir_intrinsic_stream_id(insn);
-- 
2.14.3

___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev


[Mesa-dev] [PATCH v7 23/35] nvir/nir: implement loading system values

2018-04-16 Thread Karol Herbst
v2: support more sys values
fixed a bug where for multi component reads all values ended up in x
v3: add load_patch_vertices_in
v4: add subgroup stuff
v5: add helper invocation
v6: fix loading 64 bit system values

Signed-off-by: Karol Herbst 
---
 .../drivers/nouveau/codegen/nv50_ir_from_nir.cpp   | 122 +
 1 file changed, 122 insertions(+)

diff --git a/src/gallium/drivers/nouveau/codegen/nv50_ir_from_nir.cpp 
b/src/gallium/drivers/nouveau/codegen/nv50_ir_from_nir.cpp
index 40bc00d7e14..4606ffb792e 100644
--- a/src/gallium/drivers/nouveau/codegen/nv50_ir_from_nir.cpp
+++ b/src/gallium/drivers/nouveau/codegen/nv50_ir_from_nir.cpp
@@ -85,6 +85,7 @@ private:
LValues& convert(nir_alu_dest *);
BasicBlock* convert(nir_block *);
LValues& convert(nir_dest *);
+   SVSemantic convert(nir_intrinsic_op);
LValues& convert(nir_register *);
LValues& convert(nir_ssa_def *);
 
@@ -1469,6 +1470,70 @@ Converter::visit(nir_instr *insn)
return true;
 }
 
+SVSemantic
+Converter::convert(nir_intrinsic_op intr)
+{
+   switch (intr) {
+   case nir_intrinsic_load_base_vertex:
+  return SV_BASEVERTEX;
+   case nir_intrinsic_load_base_instance:
+  return SV_BASEINSTANCE;
+   case nir_intrinsic_load_draw_id:
+  return SV_DRAWID;
+   case nir_intrinsic_load_front_face:
+  return SV_FACE;
+   case nir_intrinsic_load_helper_invocation:
+  return SV_THREAD_KILL;
+   case nir_intrinsic_load_instance_id:
+  return SV_INSTANCE_ID;
+   case nir_intrinsic_load_invocation_id:
+  return SV_INVOCATION_ID;
+   case nir_intrinsic_load_local_group_size:
+  return SV_NTID;
+   case nir_intrinsic_load_local_invocation_id:
+  return SV_TID;
+   case nir_intrinsic_load_num_work_groups:
+  return SV_NCTAID;
+   case nir_intrinsic_load_patch_vertices_in:
+  return SV_VERTEX_COUNT;
+   case nir_intrinsic_load_primitive_id:
+  return SV_PRIMITIVE_ID;
+   case nir_intrinsic_load_sample_id:
+  return SV_SAMPLE_INDEX;
+   case nir_intrinsic_load_sample_mask_in:
+  return SV_SAMPLE_MASK;
+   case nir_intrinsic_load_sample_pos:
+  return SV_SAMPLE_POS;
+   case nir_intrinsic_load_subgroup_eq_mask:
+  return SV_LANEMASK_EQ;
+   case nir_intrinsic_load_subgroup_ge_mask:
+  return SV_LANEMASK_GE;
+   case nir_intrinsic_load_subgroup_gt_mask:
+  return SV_LANEMASK_GT;
+   case nir_intrinsic_load_subgroup_le_mask:
+  return SV_LANEMASK_LE;
+   case nir_intrinsic_load_subgroup_lt_mask:
+  return SV_LANEMASK_LT;
+   case nir_intrinsic_load_subgroup_invocation:
+  return SV_LANEID;
+   case nir_intrinsic_load_tess_coord:
+  return SV_TESS_COORD;
+   case nir_intrinsic_load_tess_level_inner:
+  return SV_TESS_INNER;
+   case nir_intrinsic_load_tess_level_outer:
+  return SV_TESS_OUTER;
+   case nir_intrinsic_load_vertex_id:
+  return SV_VERTEX_ID;
+   case nir_intrinsic_load_work_group_id:
+  return SV_CTAID;
+   default:
+  ERROR("unknown SVSemantic for nir_intrinsic_op %s\n",
+nir_intrinsic_infos[intr].name);
+  assert(false);
+  return SV_LAST;
+   }
+}
+
 bool
 Converter::visit(nir_intrinsic_instr *insn)
 {
@@ -1671,6 +1736,63 @@ Converter::visit(nir_intrinsic_instr *insn)
   mkOp(OP_DISCARD, TYPE_NONE, NULL)->setPredicate(CC_P, pred);
   break;
}
+   case nir_intrinsic_load_base_vertex:
+   case nir_intrinsic_load_base_instance:
+   case nir_intrinsic_load_draw_id:
+   case nir_intrinsic_load_front_face:
+   case nir_intrinsic_load_helper_invocation:
+   case nir_intrinsic_load_instance_id:
+   case nir_intrinsic_load_invocation_id:
+   case nir_intrinsic_load_local_group_size:
+   case nir_intrinsic_load_local_invocation_id:
+   case nir_intrinsic_load_num_work_groups:
+   case nir_intrinsic_load_patch_vertices_in:
+   case nir_intrinsic_load_primitive_id:
+   case nir_intrinsic_load_sample_id:
+   case nir_intrinsic_load_sample_mask_in:
+   case nir_intrinsic_load_sample_pos:
+   case nir_intrinsic_load_subgroup_eq_mask:
+   case nir_intrinsic_load_subgroup_ge_mask:
+   case nir_intrinsic_load_subgroup_gt_mask:
+   case nir_intrinsic_load_subgroup_le_mask:
+   case nir_intrinsic_load_subgroup_lt_mask:
+   case nir_intrinsic_load_subgroup_invocation:
+   case nir_intrinsic_load_tess_coord:
+   case nir_intrinsic_load_tess_level_inner:
+   case nir_intrinsic_load_tess_level_outer:
+   case nir_intrinsic_load_vertex_id:
+   case nir_intrinsic_load_work_group_id: {
+  const DataType dType = getDType(insn);
+  SVSemantic sv = convert(op);
+  LValues &newDefs = convert(&insn->dest);
+
+  for (auto i = 0u; i < insn->num_components; ++i) {
+ Value *def;
+ if (typeSizeof(dType) == 8)
+def = getSSA();
+ else
+def = newDefs[i];
+
+ if (sv == SV_TID && info->prop.cp.numThreads[i] == 1) {
+loadImm(def, 0u);
+ } else {
+Symbol *sym 

[Mesa-dev] [PATCH v7 27/35] nvir/nir: implement vote and ballot

2018-04-16 Thread Karol Herbst
v2: add vote_eq support
use the new subop intrinsic helper
add ballot
v3: add read_(first_)invocation

Signed-off-by: Karol Herbst 
---
 .../drivers/nouveau/codegen/nv50_ir_from_nir.cpp   | 42 ++
 1 file changed, 42 insertions(+)

diff --git a/src/gallium/drivers/nouveau/codegen/nv50_ir_from_nir.cpp 
b/src/gallium/drivers/nouveau/codegen/nv50_ir_from_nir.cpp
index 8088309272b..7dce61a1069 100644
--- a/src/gallium/drivers/nouveau/codegen/nv50_ir_from_nir.cpp
+++ b/src/gallium/drivers/nouveau/codegen/nv50_ir_from_nir.cpp
@@ -444,6 +444,12 @@ int
 Converter::getSubOp(nir_intrinsic_op op)
 {
switch (op) {
+   case nir_intrinsic_vote_all:
+  return NV50_IR_SUBOP_VOTE_ALL;
+   case nir_intrinsic_vote_any:
+  return NV50_IR_SUBOP_VOTE_ANY;
+   case nir_intrinsic_vote_ieq:
+  return NV50_IR_SUBOP_VOTE_UNI;
default:
   return 0;
}
@@ -1856,6 +1862,42 @@ Converter::visit(nir_intrinsic_instr *insn)
   loadImm(newDefs[0], 32u);
   break;
}
+   case nir_intrinsic_vote_all:
+   case nir_intrinsic_vote_any:
+   case nir_intrinsic_vote_ieq: {
+  LValues &newDefs = convert(&insn->dest);
+  Value *pred = new_LValue(func, FILE_PREDICATE);
+  mkCmp(OP_SET, CC_NE, TYPE_U32, pred, TYPE_U32, getSrc(&insn->src[0], 0), 
zero);
+  mkOp1(OP_VOTE, TYPE_U32, pred, pred)->subOp = getSubOp(op);
+  mkCvt(OP_CVT, TYPE_U32, newDefs[0], TYPE_U8, pred);
+  break;
+   }
+   case nir_intrinsic_ballot: {
+  LValues &newDefs = convert(&insn->dest);
+  Value *pred = new_LValue(func, FILE_PREDICATE);
+  mkCmp(OP_SET, CC_NE, TYPE_U32, pred, TYPE_U32, getSrc(&insn->src[0], 0), 
zero);
+  Instruction *ballot = mkOp1(OP_VOTE, TYPE_U32, getSSA(), pred);
+  ballot->subOp = NV50_IR_SUBOP_VOTE_ANY;
+  mkOp2(OP_MERGE, TYPE_U64, newDefs[0], ballot->getDef(0), 
loadImm(getSSA(), 0));
+  break;
+   }
+   case nir_intrinsic_read_first_invocation:
+   case nir_intrinsic_read_invocation: {
+  LValues &newDefs = convert(&insn->dest);
+  const DataType dType = getDType(insn);
+  Value *tmp = getScratch();
+
+  if (op == nir_intrinsic_read_first_invocation) {
+ mkOp1(OP_VOTE, TYPE_U32, tmp, mkImm(1))->subOp = 
NV50_IR_SUBOP_VOTE_ANY;
+ mkOp2(OP_EXTBF, TYPE_U32, tmp, tmp, mkImm(0x2000))->subOp = 
NV50_IR_SUBOP_EXTBF_REV;
+ mkOp1(OP_BFIND, TYPE_U32, tmp, tmp)->subOp = NV50_IR_SUBOP_BFIND_SAMT;
+  } else
+ tmp = getSrc(&insn->src[1], 0);
+
+  mkOp3(OP_SHFL, dType, newDefs[0], getSrc(&insn->src[0], 0), tmp, 
mkImm(0x1f))
+ ->subOp = NV50_IR_SUBOP_SHFL_IDX;
+  break;
+   }
default:
   ERROR("unknown nir_intrinsic_op %s\n", nir_intrinsic_infos[op].name);
   return false;
-- 
2.14.3

___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev


[Mesa-dev] [PATCH v7 20/35] nvir/nir: implement nir_intrinsic_store_(per_vertex_)output

2018-04-16 Thread Karol Herbst
v3: add workaround for RA issues
indirects have to be multiplied by 0x10
fix indirect access
v4: use smarter getIndirect helper
use storeTo helper
v5: don't use const_offset directly

Signed-off-by: Karol Herbst 
---
 .../drivers/nouveau/codegen/nv50_ir_from_nir.cpp   | 46 ++
 1 file changed, 46 insertions(+)

diff --git a/src/gallium/drivers/nouveau/codegen/nv50_ir_from_nir.cpp 
b/src/gallium/drivers/nouveau/codegen/nv50_ir_from_nir.cpp
index 8a83a885889..b34fe7739d8 100644
--- a/src/gallium/drivers/nouveau/codegen/nv50_ir_from_nir.cpp
+++ b/src/gallium/drivers/nouveau/codegen/nv50_ir_from_nir.cpp
@@ -1270,6 +1270,11 @@ Converter::visit(nir_function *function)
 
setPosition(entry, true);
 
+   if (info->io.genUserClip > 0) {
+  for (int c = 0; c < 4; ++c)
+ clipVtx[c] = getScratch();
+   }
+
switch (prog->getType()) {
case Program::TYPE_TESSELLATION_CONTROL:
   outBase = mkOp2v(
@@ -1296,6 +1301,9 @@ Converter::visit(nir_function *function)
bb->cfg.attach(&exit->cfg, Graph::Edge::TREE);
setPosition(exit, true);
 
+   if (info->io.genUserClip > 0)
+  handleUserClipPlanes();
+
/* TODO: for non main function this needs to be a OP_RETURN */
mkOp(OP_EXIT, TYPE_NONE, NULL)->terminator = 1;
return true;
@@ -1477,6 +1485,44 @@ Converter::visit(nir_intrinsic_instr *insn)
   }
   break;
}
+   case nir_intrinsic_store_output:
+   case nir_intrinsic_store_per_vertex_output: {
+  Value *indirect;
+  DataType dType = getSType(insn->src[0], false, false);
+  auto idx = getIndirect(insn, op == nir_intrinsic_store_output ? 1 : 2, 
0, indirect);
+
+  for (auto i = 0u; i < insn->num_components; ++i) {
+ if (!((1u << i) & nir_intrinsic_write_mask(insn)))
+continue;
+
+ uint8_t offset = 0;
+ Value *src = getSrc(&insn->src[0], i);
+ switch (prog->getType()) {
+ case Program::TYPE_FRAGMENT: {
+if (info->out[idx].sn == TGSI_SEMANTIC_POSITION) {
+   /* TGSI uses a different interface than NIR, TGSI stores that
+* value in the z component, NIR in X
+*/
+   offset += 2;
+   src = mkOp1v(OP_SAT, TYPE_F32, getScratch(), src);
+}
+break;
+ }
+ case Program::TYPE_VERTEX: {
+if (info->io.genUserClip > 0) {
+   mkMov(clipVtx[i], src);
+   src = clipVtx[i];
+}
+break;
+ }
+ default:
+break;
+ }
+
+ storeTo(insn, FILE_SHADER_OUTPUT, OP_EXPORT, dType, src, idx, i + 
offset, indirect);
+  }
+  break;
+   }
default:
   ERROR("unknown nir_intrinsic_op %s\n", nir_intrinsic_infos[op].name);
   return false;
-- 
2.14.3

___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev


[Mesa-dev] [PATCH v7 26/35] nvir/nir: add getOperation for intrinsics

2018-04-16 Thread Karol Herbst
v7: don't assert in default case

Signed-off-by: Karol Herbst 
---
 .../drivers/nouveau/codegen/nv50_ir_from_nir.cpp   | 22 ++
 1 file changed, 22 insertions(+)

diff --git a/src/gallium/drivers/nouveau/codegen/nv50_ir_from_nir.cpp 
b/src/gallium/drivers/nouveau/codegen/nv50_ir_from_nir.cpp
index 553f74f7a75..8088309272b 100644
--- a/src/gallium/drivers/nouveau/codegen/nv50_ir_from_nir.cpp
+++ b/src/gallium/drivers/nouveau/codegen/nv50_ir_from_nir.cpp
@@ -124,10 +124,12 @@ private:
std::vector getSTypes(nir_alu_instr*);
DataType getSType(nir_src&, bool isFloat, bool isSigned);
 
+   operation getOperation(nir_intrinsic_op);
operation getOperation(nir_op);
operation getOperation(nir_texop);
operation preOperationNeeded(nir_op);
 
+   int getSubOp(nir_intrinsic_op);
int getSubOp(nir_op);
 
CondCode getCondCode(nir_op);
@@ -405,6 +407,17 @@ Converter::getOperation(nir_texop op)
}
 }
 
+operation
+Converter::getOperation(nir_intrinsic_op op)
+{
+   switch (op) {
+   default:
+  ERROR("couldn't get operation for nir_intrinsic_op %u\n", op);
+  assert(false);
+  return OP_NOP;
+   }
+}
+
 operation
 Converter::preOperationNeeded(nir_op op)
 {
@@ -427,6 +440,15 @@ Converter::getSubOp(nir_op op)
}
 }
 
+int
+Converter::getSubOp(nir_intrinsic_op op)
+{
+   switch (op) {
+   default:
+  return 0;
+   }
+}
+
 CondCode
 Converter::getCondCode(nir_op op)
 {
-- 
2.14.3

___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev


[Mesa-dev] [PATCH v7 18/35] nvir/nir: implement nir_alu_instr handling

2018-04-16 Thread Karol Herbst
v2: user bitfield_insert instead of bfi
rework switch helper macros
remove some lowering code (LoweringHelper is now used for this)
v3: add pack_half_2x16_split
add unpack_half_2x16_split_x/y
v5: replace first argument with nullptr in loadImm calls
prefer getSSA over getScratch

Signed-off-by: Karol Herbst 
---
 .../drivers/nouveau/codegen/nv50_ir_from_nir.cpp   | 489 -
 1 file changed, 488 insertions(+), 1 deletion(-)

diff --git a/src/gallium/drivers/nouveau/codegen/nv50_ir_from_nir.cpp 
b/src/gallium/drivers/nouveau/codegen/nv50_ir_from_nir.cpp
index 8a474eb1a8c..8368bbcc015 100644
--- a/src/gallium/drivers/nouveau/codegen/nv50_ir_from_nir.cpp
+++ b/src/gallium/drivers/nouveau/codegen/nv50_ir_from_nir.cpp
@@ -34,6 +34,31 @@
 #include 
 #include 
 
+#define CASE_OPFI(ni) \
+   case nir_op_f ## ni : \
+   case nir_op_i ## ni
+#define CASE_OPFIU(ni) \
+   case nir_op_f ## ni : \
+   case nir_op_i ## ni : \
+   case nir_op_u ## ni
+#define CASE_OPIU(ni) \
+   case nir_op_i ## ni : \
+   case nir_op_u ## ni
+
+#define CASE_OPFI_RET(ni, val) \
+   case nir_op_f ## ni : \
+   case nir_op_i ## ni : \
+  return val
+#define CASE_OPFIU_RET(ni, val) \
+   case nir_op_f ## ni : \
+   case nir_op_i ## ni : \
+   case nir_op_u ## ni : \
+  return val
+#define CASE_OPIU_RET(ni, val) \
+   case nir_op_i ## ni : \
+   case nir_op_u ## ni : \
+  return val
+
 static int
 type_size(const struct glsl_type *type)
 {
@@ -97,9 +122,17 @@ private:
std::vector getSTypes(nir_alu_instr*);
DataType getSType(nir_src&, bool isFloat, bool isSigned);
 
+   operation getOperation(nir_op);
+   operation preOperationNeeded(nir_op);
+
+   int getSubOp(nir_op);
+
+   CondCode getCondCode(nir_op);
+
bool assignSlots();
bool parseNIR();
 
+   bool visit(nir_alu_instr *);
bool visit(nir_block *);
bool visit(nir_cf_node *);
bool visit(nir_function *);
@@ -118,6 +151,7 @@ private:
unsigned int curLoopDepth;
 
BasicBlock *exit;
+   Value *zero;
 
union {
   struct {
@@ -129,7 +163,10 @@ private:
 Converter::Converter(Program *prog, nir_shader *nir, nv50_ir_prog_info *info)
: ConverterCommon(prog, info),
  nir(nir),
- curLoopDepth(0) {}
+ curLoopDepth(0)
+{
+   zero = mkImm((uint32_t)0);
+}
 
 BasicBlock *
 Converter::convert(nir_block *block)
@@ -246,6 +283,137 @@ Converter::getSType(nir_src &src, bool isFloat, bool 
isSigned)
return typeOfSize(bitSize / 8, isFloat, isSigned);
 }
 
+operation
+Converter::getOperation(nir_op op)
+{
+   switch (op) {
+   // basic ops with float and int variants
+   CASE_OPFI_RET(abs, OP_ABS);
+   CASE_OPFI_RET(add, OP_ADD);
+   CASE_OPFI_RET(and, OP_AND);
+   CASE_OPFIU_RET(div, OP_DIV);
+   CASE_OPIU_RET(find_msb, OP_BFIND);
+   CASE_OPFIU_RET(max, OP_MAX);
+   CASE_OPFIU_RET(min, OP_MIN);
+   CASE_OPFIU_RET(mod, OP_MOD);
+   CASE_OPFI_RET(rem, OP_MOD);
+   CASE_OPFI_RET(mul, OP_MUL);
+   CASE_OPIU_RET(mul_high, OP_MUL);
+   CASE_OPFI_RET(neg, OP_NEG);
+   CASE_OPFI_RET(not, OP_NOT);
+   CASE_OPFI_RET(or, OP_OR);
+   CASE_OPFI_RET(eq, OP_SET);
+   CASE_OPFIU_RET(ge, OP_SET);
+   CASE_OPFIU_RET(lt, OP_SET);
+   CASE_OPFI_RET(ne, OP_SET);
+   CASE_OPIU_RET(shr, OP_SHR);
+   CASE_OPFI_RET(sub, OP_SUB);
+   CASE_OPFI_RET(xor, OP_XOR);
+   case nir_op_fceil:
+  return OP_CEIL;
+   case nir_op_fcos:
+  return OP_COS;
+   case nir_op_f2f32:
+   case nir_op_f2f64:
+   case nir_op_f2i32:
+   case nir_op_f2i64:
+   case nir_op_f2u32:
+   case nir_op_f2u64:
+   case nir_op_i2f32:
+   case nir_op_i2f64:
+   case nir_op_i2i32:
+   case nir_op_i2i64:
+   case nir_op_u2f32:
+   case nir_op_u2f64:
+   case nir_op_u2u32:
+   case nir_op_u2u64:
+  return OP_CVT;
+   case nir_op_fddx:
+   case nir_op_fddx_coarse:
+   case nir_op_fddx_fine:
+  return OP_DFDX;
+   case nir_op_fddy:
+   case nir_op_fddy_coarse:
+   case nir_op_fddy_fine:
+  return OP_DFDY;
+   case nir_op_fexp2:
+  return OP_EX2;
+   case nir_op_ffloor:
+  return OP_FLOOR;
+   case nir_op_ffma:
+  return OP_FMA;
+   case nir_op_flog2:
+  return OP_LG2;
+   case nir_op_pack_64_2x32_split:
+  return OP_MERGE;
+   case nir_op_frcp:
+  return OP_RCP;
+   case nir_op_frsq:
+  return OP_RSQ;
+   case nir_op_fsat:
+  return OP_SAT;
+   case nir_op_ishl:
+  return OP_SHL;
+   case nir_op_fsin:
+  return OP_SIN;
+   case nir_op_fsqrt:
+  return OP_SQRT;
+   case nir_op_ftrunc:
+  return OP_TRUNC;
+   default:
+  ERROR("couldn't get operation for op %s\n", nir_op_infos[op].name);
+  assert(false);
+  return OP_NOP;
+   }
+}
+
+operation
+Converter::preOperationNeeded(nir_op op)
+{
+   switch (op) {
+   case nir_op_fcos:
+   case nir_op_fsin:
+  return OP_PRESIN;
+   default:
+  return OP_NOP;
+   }
+}
+
+int
+Converter::getSubOp(nir_op op)
+{
+   switch (op) {
+   CASE_OPIU_RET(mul_high, NV50_IR_SUBOP_MUL_HIGH);
+   default:
+  return 0;
+   }
+}

[Mesa-dev] [PATCH v7 35/35] nvir/nir: implement intrinsic shader_clock

2018-04-16 Thread Karol Herbst
Signed-off-by: Karol Herbst 
---
 src/gallium/drivers/nouveau/codegen/nv50_ir_from_nir.cpp | 8 
 1 file changed, 8 insertions(+)

diff --git a/src/gallium/drivers/nouveau/codegen/nv50_ir_from_nir.cpp 
b/src/gallium/drivers/nouveau/codegen/nv50_ir_from_nir.cpp
index dd73acb2366..d9ea665b9e1 100644
--- a/src/gallium/drivers/nouveau/codegen/nv50_ir_from_nir.cpp
+++ b/src/gallium/drivers/nouveau/codegen/nv50_ir_from_nir.cpp
@@ -2354,6 +2354,14 @@ Converter::visit(nir_intrinsic_instr *insn)
   bar->subOp = getSubOp(op);
   break;
}
+   case nir_intrinsic_shader_clock: {
+  const DataType dType = getDType(insn);
+  LValues &newDefs = convert(&insn->dest);
+
+  loadImm(newDefs[0], 0u);
+  mkOp1v(OP_RDSV, dType, newDefs[1], mkSysVal(SV_CLOCK, 0));
+  break;
+   }
default:
   ERROR("unknown nir_intrinsic_op %s\n", nir_intrinsic_infos[op].name);
   return false;
-- 
2.14.3

___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev


[Mesa-dev] [PATCH v7 32/35] nvir/nir: implement images

2018-04-16 Thread Karol Herbst
v3: fix compiler warnings
v4: use loadFrom helper
v5: fix signed min/max
v6: set tex mask
add support for indirect image access
set cache mode
v7: make compatible with 884d27bcf688d36c3bbe01bceca525595add3b33
rework the whole deref thing to prepare for bindless

Signed-off-by: Karol Herbst 
---
 .../drivers/nouveau/codegen/nv50_ir_from_nir.cpp   | 402 -
 1 file changed, 382 insertions(+), 20 deletions(-)

diff --git a/src/gallium/drivers/nouveau/codegen/nv50_ir_from_nir.cpp 
b/src/gallium/drivers/nouveau/codegen/nv50_ir_from_nir.cpp
index 24c70d0c5ae..15d77256a06 100644
--- a/src/gallium/drivers/nouveau/codegen/nv50_ir_from_nir.cpp
+++ b/src/gallium/drivers/nouveau/codegen/nv50_ir_from_nir.cpp
@@ -91,6 +91,8 @@ private:
LValues& convert(nir_register *);
LValues& convert(nir_ssa_def *);
 
+   ImgFormat convertGLImgFormat(GLuint);
+
Value* getSrc(nir_alu_src *, uint8_t component = 0);
Value* getSrc(nir_register *, uint8_t);
Value* getSrc(nir_src *, uint8_t, bool indirect = false);
@@ -120,6 +122,7 @@ private:
 
DataType getDType(nir_alu_instr*);
DataType getDType(nir_intrinsic_instr*);
+   DataType getDType(nir_intrinsic_instr*, bool isSigned);
DataType getDType(nir_op, NirSSADefBitSize);
 
std::vector getSTypes(nir_alu_instr*);
@@ -153,6 +156,11 @@ private:
 
/* tex stuff */
Value* applyProjection(Value *src, Value *proj);
+   unsigned int getNIRArgCount(TexInstruction::Target&);
+
+   /* image stuff */
+   uint16_t derefImageVar(nir_deref_var *, Value *&indirect, const glsl_type 
*&);
+   CacheMode getCacheModeFromVar(nir_variable *);
 
nir_shader *nir;
 
@@ -245,11 +253,30 @@ Converter::getDType(nir_alu_instr *insn)
 
 DataType
 Converter::getDType(nir_intrinsic_instr *insn)
+{
+   bool isSigned;
+   switch (insn->intrinsic) {
+   case nir_intrinsic_shared_atomic_imax:
+   case nir_intrinsic_shared_atomic_imin:
+   case nir_intrinsic_ssbo_atomic_imax:
+   case nir_intrinsic_ssbo_atomic_imin:
+  isSigned = true;
+  break;
+   default:
+  isSigned = false;
+  break;
+   }
+
+   return getDType(insn, isSigned);
+}
+
+DataType
+Converter::getDType(nir_intrinsic_instr *insn, bool isSigned)
 {
if (insn->dest.is_ssa)
-  return typeOfSize(insn->dest.ssa.bit_size / 8, false, false);
+  return typeOfSize(insn->dest.ssa.bit_size / 8, false, isSigned);
else
-  return typeOfSize(insn->dest.reg.reg->bit_size / 8, false, false);
+  return typeOfSize(insn->dest.reg.reg->bit_size / 8, false, isSigned);
 }
 
 DataType
@@ -417,6 +444,22 @@ Converter::getOperation(nir_intrinsic_op op)
   return OP_EMIT;
case nir_intrinsic_end_primitive:
   return OP_RESTART;
+   case nir_intrinsic_image_var_atomic_add:
+   case nir_intrinsic_image_var_atomic_and:
+   case nir_intrinsic_image_var_atomic_comp_swap:
+   case nir_intrinsic_image_var_atomic_exchange:
+   case nir_intrinsic_image_var_atomic_max:
+   case nir_intrinsic_image_var_atomic_min:
+   case nir_intrinsic_image_var_atomic_or:
+   case nir_intrinsic_image_var_atomic_xor:
+  return OP_SUREDP;
+   case nir_intrinsic_image_var_load:
+  return OP_SULDP;
+   case nir_intrinsic_image_var_samples:
+   case nir_intrinsic_image_var_size:
+  return OP_SUQ;
+   case nir_intrinsic_image_var_store:
+  return OP_SUSTP;
default:
   ERROR("couldn't get operation for nir_intrinsic_op %u\n", op);
   assert(false);
@@ -446,28 +489,31 @@ Converter::getSubOp(nir_op op)
}
 }
 
+#define CASE_OP_INTR_ATOM(nir, nvir) \
+   case nir_intrinsic_image_var_atomic_ ## nir : \
+   case nir_intrinsic_shared_atomic_ ## nir : \
+   case nir_intrinsic_ssbo_atomic_ ## nir : \
+  return NV50_IR_SUBOP_ATOM_ ## nvir
+#define CASE_OP_INTR_ATOM_S(nir, nvir) \
+   case nir_intrinsic_shared_atomic_ ## nir : \
+   case nir_intrinsic_ssbo_atomic_ ## nir : \
+  return NV50_IR_SUBOP_ATOM_ ## nvir
 int
 Converter::getSubOp(nir_intrinsic_op op)
 {
switch (op) {
-   case nir_intrinsic_ssbo_atomic_add:
-  return NV50_IR_SUBOP_ATOM_ADD;
-   case nir_intrinsic_ssbo_atomic_and:
-  return NV50_IR_SUBOP_ATOM_AND;
-   case nir_intrinsic_ssbo_atomic_comp_swap:
-  return NV50_IR_SUBOP_ATOM_CAS;
-   case nir_intrinsic_ssbo_atomic_exchange:
-  return NV50_IR_SUBOP_ATOM_EXCH;
-   case nir_intrinsic_ssbo_atomic_or:
-  return NV50_IR_SUBOP_ATOM_OR;
-   case nir_intrinsic_ssbo_atomic_imax:
-   case nir_intrinsic_ssbo_atomic_umax:
-  return NV50_IR_SUBOP_ATOM_MAX;
-   case nir_intrinsic_ssbo_atomic_imin:
-   case nir_intrinsic_ssbo_atomic_umin:
-  return NV50_IR_SUBOP_ATOM_MIN;
-   case nir_intrinsic_ssbo_atomic_xor:
-  return NV50_IR_SUBOP_ATOM_XOR;
+   CASE_OP_INTR_ATOM(add, ADD);
+   CASE_OP_INTR_ATOM(and, AND);
+   CASE_OP_INTR_ATOM(comp_swap, CAS);
+   CASE_OP_INTR_ATOM(exchange, EXCH);
+   CASE_OP_INTR_ATOM(or, OR);
+   case nir_intrinsic_image_var_atomic_max:
+   CASE_O

[Mesa-dev] [PATCH v7 17/35] nvir/nir: add skeleton for nir_intrinsic_instr

2018-04-16 Thread Karol Herbst
Signed-off-by: Karol Herbst 
---
 .../drivers/nouveau/codegen/nv50_ir_from_nir.cpp| 17 +
 1 file changed, 17 insertions(+)

diff --git a/src/gallium/drivers/nouveau/codegen/nv50_ir_from_nir.cpp 
b/src/gallium/drivers/nouveau/codegen/nv50_ir_from_nir.cpp
index f4f844021a2..8a474eb1a8c 100644
--- a/src/gallium/drivers/nouveau/codegen/nv50_ir_from_nir.cpp
+++ b/src/gallium/drivers/nouveau/codegen/nv50_ir_from_nir.cpp
@@ -105,6 +105,7 @@ private:
bool visit(nir_function *);
bool visit(nir_if *);
bool visit(nir_instr *);
+   bool visit(nir_intrinsic_instr *);
bool visit(nir_jump_instr *);
bool visit(nir_load_const_instr*);
bool visit(nir_loop *);
@@ -1277,6 +1278,8 @@ bool
 Converter::visit(nir_instr *insn)
 {
switch (insn->type) {
+   case nir_instr_type_intrinsic:
+  return visit(nir_instr_as_intrinsic(insn));
case nir_instr_type_jump:
   return visit(nir_instr_as_jump(insn));
case nir_instr_type_load_const:
@@ -1288,6 +1291,20 @@ Converter::visit(nir_instr *insn)
return true;
 }
 
+bool
+Converter::visit(nir_intrinsic_instr *insn)
+{
+   nir_intrinsic_op op = insn->intrinsic;
+
+   switch (op) {
+   default:
+  ERROR("unknown nir_intrinsic_op %s\n", nir_intrinsic_infos[op].name);
+  return false;
+   }
+
+   return true;
+}
+
 bool
 Converter::visit(nir_jump_instr *insn)
 {
-- 
2.14.3

___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev


[Mesa-dev] [PATCH v7 28/35] nvir/nir: implement variable indexing

2018-04-16 Thread Karol Herbst
we store those arrays in local memory and reserve some space for each of the
arrays. The arrays are stored in a packed format, because we know quite easily
the context of each index. We don't do that in TGSI so far.

This causes various issues to come up in the MemoryOpt pass, because ld/st with
indirects aren't guarenteed to be aligned to 0x10 anymore.

v3: use fixed size vec4 arrays until we fix MemoryOpt
v4: fix for 64 bit types
v5: use loadFrom helper

Signed-off-by: Karol Herbst 
---
 .../drivers/nouveau/codegen/nv50_ir_from_nir.cpp   | 58 ++
 1 file changed, 58 insertions(+)

diff --git a/src/gallium/drivers/nouveau/codegen/nv50_ir_from_nir.cpp 
b/src/gallium/drivers/nouveau/codegen/nv50_ir_from_nir.cpp
index 7dce61a1069..604a30db79f 100644
--- a/src/gallium/drivers/nouveau/codegen/nv50_ir_from_nir.cpp
+++ b/src/gallium/drivers/nouveau/codegen/nv50_ir_from_nir.cpp
@@ -80,6 +80,7 @@ private:
typedef decltype(nir_ssa_def().index) NirSSADefIdx;
typedef decltype(nir_ssa_def().bit_size) NirSSADefBitSize;
typedef std::unordered_map NirDefMap;
+   typedef std::unordered_map NirArrayLMemOffsets;
typedef std::unordered_map 
NirBlockMap;
 
TexTarget convert(glsl_sampler_dim, bool isArray, bool isShadow);
@@ -157,6 +158,7 @@ private:
 
NirDefMap ssaDefs;
NirDefMap regDefs;
+   NirArrayLMemOffsets regToLmemOffset;
NirBlockMap blocks;
unsigned int curLoopDepth;
 
@@ -1269,6 +1271,7 @@ Converter::storeTo(nir_intrinsic_instr *insn, DataFile 
file, operation op,
 bool
 Converter::parseNIR()
 {
+   info->bin.tlsSpace = 0;
info->io.clipDistances = nir->info.clip_distance_array_size;
info->io.cullDistances = nir->info.cull_distance_array_size;
 
@@ -1358,6 +1361,16 @@ Converter::visit(nir_function *function)
   break;
}
 
+   nir_foreach_register(reg, &function->impl->registers) {
+  if (reg->num_array_elems) {
+ /* TODO: packed variables would be nice, but MemoryOpt fails */
+ /* replace 4 with reg->num_components */
+ uint32_t size = 4 * reg->num_array_elems * (reg->bit_size / 8);
+ regToLmemOffset[reg->index] = info->bin.tlsSpace;
+ info->bin.tlsSpace += size;
+  }
+   }
+
nir_index_ssa_defs(function->impl);
foreach_list_typed(nir_cf_node, node, node, &function->impl->body) {
   if (!visit(node))
@@ -2082,6 +2095,51 @@ Converter::visit(nir_alu_instr *insn)
 *   2. they basically just merge multiple values into one data type
 */
CASE_OPFI(mov):
+  if (!insn->dest.dest.is_ssa && insn->dest.dest.reg.reg->num_array_elems) 
{
+ nir_reg_dest& reg = insn->dest.dest.reg;
+ auto goffset = regToLmemOffset[reg.reg->index];
+ auto comps = reg.reg->num_components;
+ auto size = reg.reg->bit_size / 8;
+ auto csize = 4 * size; /* TODO after fixing MemoryOpts: comps * size; 
*/
+ auto aoffset = csize * reg.base_offset;
+ Value *indirect = nullptr;
+
+ if (reg.indirect)
+indirect = mkOp2v(OP_MUL, TYPE_U32, getSSA(4, FILE_ADDRESS),
+  getSrc(reg.indirect, 0), mkImm(csize));
+
+ for (auto i = 0u; i < comps; ++i) {
+if (!((1u << i) & insn->dest.write_mask))
+   continue;
+
+Symbol *sym = mkSymbol(FILE_MEMORY_LOCAL, 0, dType, goffset + 
aoffset + i * size);
+mkStore(OP_STORE, dType, sym, indirect, getSrc(&insn->src[0], i));
+ }
+ break;
+  } else if (!insn->src[0].src.is_ssa && 
insn->src[0].src.reg.reg->num_array_elems) {
+ LValues &newDefs = convert(&insn->dest);
+ nir_reg_src& reg = insn->src[0].src.reg;
+ auto goffset = regToLmemOffset[reg.reg->index];
+ /* auto comps = reg.reg->num_components; */
+ auto size = reg.reg->bit_size / 8;
+ auto csize = 4 * size; /* TODO after fixing MemoryOpts: comps * size; 
*/
+ auto aoffset = csize * reg.base_offset;
+ Value *indirect = nullptr;
+
+ if (reg.indirect)
+indirect = mkOp2v(OP_MUL, TYPE_U32, getSSA(4, FILE_ADDRESS), 
getSrc(reg.indirect, 0), mkImm(csize));
+
+ for (auto i = 0u; i < newDefs.size(); ++i)
+loadFrom(FILE_MEMORY_LOCAL, 0, dType, newDefs[i], goffset + 
aoffset, i, indirect);
+
+ break;
+  } else {
+ LValues &newDefs = convert(&insn->dest);
+ for (LValues::size_type c = 0u; c < newDefs.size(); ++c) {
+mkMov(newDefs[c], getSrc(&insn->src[0], c), dType);
+ }
+  }
+  break;
case nir_op_vec2:
case nir_op_vec3:
case nir_op_vec4: {
-- 
2.14.3

___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev


[Mesa-dev] [PATCH v7 29/35] nvir/nir: implement geometry shader nir_intrinsics

2018-04-16 Thread Karol Herbst
v4: use smarter getIndirect helper
use new getSlotAddress helper
use loadFrom helper

Signed-off-by: Karol Herbst 
---
 .../drivers/nouveau/codegen/nv50_ir_from_nir.cpp   | 27 ++
 1 file changed, 27 insertions(+)

diff --git a/src/gallium/drivers/nouveau/codegen/nv50_ir_from_nir.cpp 
b/src/gallium/drivers/nouveau/codegen/nv50_ir_from_nir.cpp
index 604a30db79f..618b9ccfcc4 100644
--- a/src/gallium/drivers/nouveau/codegen/nv50_ir_from_nir.cpp
+++ b/src/gallium/drivers/nouveau/codegen/nv50_ir_from_nir.cpp
@@ -413,6 +413,10 @@ operation
 Converter::getOperation(nir_intrinsic_op op)
 {
switch (op) {
+   case nir_intrinsic_emit_vertex:
+  return OP_EMIT;
+   case nir_intrinsic_end_primitive:
+  return OP_RESTART;
default:
   ERROR("couldn't get operation for nir_intrinsic_op %u\n", op);
   assert(false);
@@ -1911,6 +1915,29 @@ Converter::visit(nir_intrinsic_instr *insn)
  ->subOp = NV50_IR_SUBOP_SHFL_IDX;
   break;
}
+   case nir_intrinsic_load_per_vertex_input: {
+  const DataType dType = getDType(insn);
+  LValues &newDefs = convert(&insn->dest);
+  Value *indirectVertex;
+  Value *indirectOffset;
+  auto baseVertex = getIndirect(&insn->src[0], 0, indirectVertex);
+  auto idx = getIndirect(insn, 1, 0, indirectOffset);
+
+  Value *vtxBase = mkOp2v(OP_PFETCH, TYPE_U32, getSSA(4, FILE_ADDRESS),
+  mkImm(baseVertex), indirectVertex);
+  for (auto i = 0u; i < insn->num_components; ++i) {
+ uint32_t address = getSlotAddress(insn, idx, i);
+ loadFrom(FILE_SHADER_INPUT, 0, dType, newDefs[i], address, 0,
+  indirectOffset, vtxBase, info->in[idx].patch);
+  }
+  break;
+   }
+   case nir_intrinsic_emit_vertex:
+   case nir_intrinsic_end_primitive: {
+  auto idx = nir_intrinsic_stream_id(insn);
+  mkOp1(getOperation(op), TYPE_U32, NULL, mkImm(idx))->fixed = 1;
+  break;
+   }
default:
   ERROR("unknown nir_intrinsic_op %s\n", nir_intrinsic_infos[op].name);
   return false;
-- 
2.14.3

___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev


[Mesa-dev] [PATCH v7 24/35] nvir/nir: implement nir_ssa_undef_instr

2018-04-16 Thread Karol Herbst
v2: use mkOp

Signed-off-by: Karol Herbst 
---
 src/gallium/drivers/nouveau/codegen/nv50_ir_from_nir.cpp | 13 +
 1 file changed, 13 insertions(+)

diff --git a/src/gallium/drivers/nouveau/codegen/nv50_ir_from_nir.cpp 
b/src/gallium/drivers/nouveau/codegen/nv50_ir_from_nir.cpp
index 4606ffb792e..badb2398abb 100644
--- a/src/gallium/drivers/nouveau/codegen/nv50_ir_from_nir.cpp
+++ b/src/gallium/drivers/nouveau/codegen/nv50_ir_from_nir.cpp
@@ -143,6 +143,7 @@ private:
bool visit(nir_jump_instr *);
bool visit(nir_load_const_instr*);
bool visit(nir_loop *);
+   bool visit(nir_ssa_undef_instr *);
 
nir_shader *nir;
 
@@ -1463,6 +1464,8 @@ Converter::visit(nir_instr *insn)
   return visit(nir_instr_as_jump(insn));
case nir_instr_type_load_const:
   return visit(nir_instr_as_load_const(insn));
+   case nir_instr_type_ssa_undef:
+  return visit(nir_instr_as_ssa_undef(insn));
default:
   ERROR("unknown nir_instr type %u\n", insn->type);
   return false;
@@ -2160,6 +2163,16 @@ Converter::visit(nir_alu_instr *insn)
 }
 #undef DEFAULT_CHECKS
 
+bool
+Converter::visit(nir_ssa_undef_instr *insn)
+{
+   LValues &newDefs = convert(&insn->def);
+   for (auto i = 0u; i < insn->def.num_components; ++i) {
+  mkOp(OP_NOP, TYPE_NONE, newDefs[i]);
+   }
+   return true;
+}
+
 bool
 Converter::run()
 {
-- 
2.14.3

___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev


[Mesa-dev] [PATCH v7 30/35] nvir/nir: implement nir_intrinsic_load_ubo

2018-04-16 Thread Karol Herbst
v4: use loadFrom helper

Signed-off-by: Karol Herbst 
---
 src/gallium/drivers/nouveau/codegen/nv50_ir_from_nir.cpp | 14 ++
 1 file changed, 14 insertions(+)

diff --git a/src/gallium/drivers/nouveau/codegen/nv50_ir_from_nir.cpp 
b/src/gallium/drivers/nouveau/codegen/nv50_ir_from_nir.cpp
index 618b9ccfcc4..d65754ec4f6 100644
--- a/src/gallium/drivers/nouveau/codegen/nv50_ir_from_nir.cpp
+++ b/src/gallium/drivers/nouveau/codegen/nv50_ir_from_nir.cpp
@@ -1938,6 +1938,20 @@ Converter::visit(nir_intrinsic_instr *insn)
   mkOp1(getOperation(op), TYPE_U32, NULL, mkImm(idx))->fixed = 1;
   break;
}
+   case nir_intrinsic_load_ubo: {
+  const DataType dType = getDType(insn);
+  LValues &newDefs = convert(&insn->dest);
+  Value *indirectIndex;
+  Value *indirectOffset;
+  uint32_t index = getIndirect(&insn->src[0], 0, indirectIndex) + 1;
+  uint32_t offset = getIndirect(&insn->src[1], 0, indirectOffset);
+
+  for (auto i = 0u; i < insn->num_components; ++i) {
+ loadFrom(FILE_MEMORY_CONST, index, dType, newDefs[i], offset, i,
+  indirectOffset, indirectIndex);
+  }
+  break;
+   }
default:
   ERROR("unknown nir_intrinsic_op %s\n", nir_intrinsic_infos[op].name);
   return false;
-- 
2.14.3

___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev


[Mesa-dev] [PATCH v7 33/35] nvir/nir: add memory barriers

2018-04-16 Thread Karol Herbst
v5: add more barrier intrinsics

Signed-off-by: Karol Herbst 
---
 .../drivers/nouveau/codegen/nv50_ir_from_nir.cpp  | 19 +++
 1 file changed, 19 insertions(+)

diff --git a/src/gallium/drivers/nouveau/codegen/nv50_ir_from_nir.cpp 
b/src/gallium/drivers/nouveau/codegen/nv50_ir_from_nir.cpp
index 15d77256a06..e5d85940ca7 100644
--- a/src/gallium/drivers/nouveau/codegen/nv50_ir_from_nir.cpp
+++ b/src/gallium/drivers/nouveau/codegen/nv50_ir_from_nir.cpp
@@ -506,6 +506,14 @@ Converter::getSubOp(nir_intrinsic_op op)
CASE_OP_INTR_ATOM(and, AND);
CASE_OP_INTR_ATOM(comp_swap, CAS);
CASE_OP_INTR_ATOM(exchange, EXCH);
+   case nir_intrinsic_group_memory_barrier:
+   case nir_intrinsic_memory_barrier:
+   case nir_intrinsic_memory_barrier_atomic_counter:
+   case nir_intrinsic_memory_barrier_buffer:
+   case nir_intrinsic_memory_barrier_image:
+  return NV50_IR_SUBOP_MEMBAR(M, GL);
+   case nir_intrinsic_memory_barrier_shared:
+  return NV50_IR_SUBOP_MEMBAR(M, CTA);
CASE_OP_INTR_ATOM(or, OR);
case nir_intrinsic_image_var_atomic_max:
CASE_OP_INTR_ATOM_S(imax, MAX);
@@ -2312,6 +2320,17 @@ Converter::visit(nir_intrinsic_instr *insn)
   bar->subOp = NV50_IR_SUBOP_BAR_SYNC;
   break;
}
+   case nir_intrinsic_group_memory_barrier:
+   case nir_intrinsic_memory_barrier:
+   case nir_intrinsic_memory_barrier_atomic_counter:
+   case nir_intrinsic_memory_barrier_buffer:
+   case nir_intrinsic_memory_barrier_image:
+   case nir_intrinsic_memory_barrier_shared: {
+  Instruction *bar = mkOp(OP_MEMBAR, TYPE_NONE, NULL);
+  bar->fixed = 1;
+  bar->subOp = getSubOp(op);
+  break;
+   }
default:
   ERROR("unknown nir_intrinsic_op %s\n", nir_intrinsic_infos[op].name);
   return false;
-- 
2.14.3

___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev


[Mesa-dev] [PATCH] nouveau: handle new CAPS

2019-07-02 Thread Karol Herbst
Signed-off-by: Karol Herbst 
---
 src/gallium/drivers/nouveau/nv50/nv50_screen.c | 13 +
 src/gallium/drivers/nouveau/nvc0/nvc0_screen.c | 13 +
 2 files changed, 26 insertions(+)

diff --git a/src/gallium/drivers/nouveau/nv50/nv50_screen.c 
b/src/gallium/drivers/nouveau/nv50/nv50_screen.c
index b84330b4b38..24796aff1ce 100644
--- a/src/gallium/drivers/nouveau/nv50/nv50_screen.c
+++ b/src/gallium/drivers/nouveau/nv50/nv50_screen.c
@@ -320,6 +320,13 @@ nv50_screen_get_param(struct pipe_screen *pscreen, enum 
pipe_cap param)
case PIPE_CAP_NIR_COMPACT_ARRAYS:
case PIPE_CAP_COMPUTE:
case PIPE_CAP_IMAGE_LOAD_FORMATTED:
+   case PIPE_CAP_COMPUTE_SHADER_DERIVATIVES:
+   case PIPE_CAP_ATOMIC_FLOAT_MINMAX:
+   case PIPE_CAP_CONSERVATIVE_RASTER_INNER_COVERAGE:
+   case PIPE_CAP_FRAGMENT_SHADER_INTERLOCK:
+   case PIPE_CAP_CS_DERIVED_SYSTEM_VALUES_SUPPORTED:
+   case PIPE_CAP_FBFETCH_COHERENT:
+   case PIPE_CAP_TGSI_SKIP_SHRINK_IO_ARRAYS:
   return 0;
 
case PIPE_CAP_VENDOR_ID:
@@ -338,8 +345,14 @@ nv50_screen_get_param(struct pipe_screen *pscreen, enum 
pipe_cap param)
   return dev->vram_size >> 20;
case PIPE_CAP_UMA:
   return 0;
+
default:
   debug_printf("%s: unhandled cap %d\n", __func__, param);
+  /* fallthrough */
+   /* caps where we want the default value */
+   case PIPE_CAP_DMABUF:
+   case PIPE_CAP_ESSL_FEATURE_LEVEL:
+   case PIPE_CAP_MAX_FRAMES_IN_FLIGHT:
   return u_pipe_screen_get_param_defaults(pscreen, param);
}
 }
diff --git a/src/gallium/drivers/nouveau/nvc0/nvc0_screen.c 
b/src/gallium/drivers/nouveau/nvc0/nvc0_screen.c
index 3a543e54d1f..bf883631b86 100644
--- a/src/gallium/drivers/nouveau/nvc0/nvc0_screen.c
+++ b/src/gallium/drivers/nouveau/nvc0/nvc0_screen.c
@@ -355,6 +355,13 @@ nvc0_screen_get_param(struct pipe_screen *pscreen, enum 
pipe_cap param)
case PIPE_CAP_GLSL_TESS_LEVELS_AS_INPUTS:
case PIPE_CAP_NIR_COMPACT_ARRAYS:
case PIPE_CAP_IMAGE_LOAD_FORMATTED:
+   case PIPE_CAP_COMPUTE_SHADER_DERIVATIVES:
+   case PIPE_CAP_ATOMIC_FLOAT_MINMAX:
+   case PIPE_CAP_CONSERVATIVE_RASTER_INNER_COVERAGE:
+   case PIPE_CAP_FRAGMENT_SHADER_INTERLOCK:
+   case PIPE_CAP_CS_DERIVED_SYSTEM_VALUES_SUPPORTED:
+   case PIPE_CAP_FBFETCH_COHERENT:
+   case PIPE_CAP_TGSI_SKIP_SHRINK_IO_ARRAYS:
   return 0;
 
case PIPE_CAP_VENDOR_ID:
@@ -373,8 +380,14 @@ nvc0_screen_get_param(struct pipe_screen *pscreen, enum 
pipe_cap param)
   return dev->vram_size >> 20;
case PIPE_CAP_UMA:
   return 0;
+
default:
   debug_printf("%s: unhandled cap %d\n", __func__, param);
+  /* fallthrough */
+   /* caps where we want the default value */
+   case PIPE_CAP_DMABUF:
+   case PIPE_CAP_ESSL_FEATURE_LEVEL:
+   case PIPE_CAP_MAX_FRAMES_IN_FLIGHT:
   return u_pipe_screen_get_param_defaults(pscreen, param);
}
 }
-- 
2.21.0

___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev

Re: [Mesa-dev] [PATCH] nouveau: handle new CAPS

2019-07-02 Thread Karol Herbst
On Tue, Jul 2, 2019 at 5:54 PM Ilia Mirkin  wrote:
>
> Can you check on PIPE_CAP_COMPUTE_SHADER_DERIVATIVES ? I think we
> should be able to just flip that on for nvc0. Also the
> CS_DERIVED_SYSTEM_VALUES thing might be useful -- I had wanted to do
> that a while ago but laziness defeated me. Now that it's there though
> ... we have sysvals for many of those derived things.
>
> Or at least add commentary about each one, like "should be enabled
> when we get to it" sort of thing.
>

I added a trello card for the PIPE_CAP_COMPUTE_SHADER_DERIVATIVES one,
but I could add another one for CS_DERIVED_SYSTEM_VALUES

> On Tue, Jul 2, 2019 at 11:49 AM Karol Herbst  wrote:
> >
> > Signed-off-by: Karol Herbst 
> > ---
> >  src/gallium/drivers/nouveau/nv50/nv50_screen.c | 13 +
> >  src/gallium/drivers/nouveau/nvc0/nvc0_screen.c | 13 +
> >  2 files changed, 26 insertions(+)
> >
> > diff --git a/src/gallium/drivers/nouveau/nv50/nv50_screen.c 
> > b/src/gallium/drivers/nouveau/nv50/nv50_screen.c
> > index b84330b4b38..24796aff1ce 100644
> > --- a/src/gallium/drivers/nouveau/nv50/nv50_screen.c
> > +++ b/src/gallium/drivers/nouveau/nv50/nv50_screen.c
> > @@ -320,6 +320,13 @@ nv50_screen_get_param(struct pipe_screen *pscreen, 
> > enum pipe_cap param)
> > case PIPE_CAP_NIR_COMPACT_ARRAYS:
> > case PIPE_CAP_COMPUTE:
> > case PIPE_CAP_IMAGE_LOAD_FORMATTED:
> > +   case PIPE_CAP_COMPUTE_SHADER_DERIVATIVES:
> > +   case PIPE_CAP_ATOMIC_FLOAT_MINMAX:
> > +   case PIPE_CAP_CONSERVATIVE_RASTER_INNER_COVERAGE:
> > +   case PIPE_CAP_FRAGMENT_SHADER_INTERLOCK:
> > +   case PIPE_CAP_CS_DERIVED_SYSTEM_VALUES_SUPPORTED:
> > +   case PIPE_CAP_FBFETCH_COHERENT:
> > +   case PIPE_CAP_TGSI_SKIP_SHRINK_IO_ARRAYS:
> >return 0;
> >
> > case PIPE_CAP_VENDOR_ID:
> > @@ -338,8 +345,14 @@ nv50_screen_get_param(struct pipe_screen *pscreen, 
> > enum pipe_cap param)
> >return dev->vram_size >> 20;
> > case PIPE_CAP_UMA:
> >return 0;
> > +
> > default:
> >debug_printf("%s: unhandled cap %d\n", __func__, param);
> > +  /* fallthrough */
> > +   /* caps where we want the default value */
> > +   case PIPE_CAP_DMABUF:
> > +   case PIPE_CAP_ESSL_FEATURE_LEVEL:
> > +   case PIPE_CAP_MAX_FRAMES_IN_FLIGHT:
> >return u_pipe_screen_get_param_defaults(pscreen, param);
> > }
> >  }
> > diff --git a/src/gallium/drivers/nouveau/nvc0/nvc0_screen.c 
> > b/src/gallium/drivers/nouveau/nvc0/nvc0_screen.c
> > index 3a543e54d1f..bf883631b86 100644
> > --- a/src/gallium/drivers/nouveau/nvc0/nvc0_screen.c
> > +++ b/src/gallium/drivers/nouveau/nvc0/nvc0_screen.c
> > @@ -355,6 +355,13 @@ nvc0_screen_get_param(struct pipe_screen *pscreen, 
> > enum pipe_cap param)
> > case PIPE_CAP_GLSL_TESS_LEVELS_AS_INPUTS:
> > case PIPE_CAP_NIR_COMPACT_ARRAYS:
> > case PIPE_CAP_IMAGE_LOAD_FORMATTED:
> > +   case PIPE_CAP_COMPUTE_SHADER_DERIVATIVES:
> > +   case PIPE_CAP_ATOMIC_FLOAT_MINMAX:
> > +   case PIPE_CAP_CONSERVATIVE_RASTER_INNER_COVERAGE:
> > +   case PIPE_CAP_FRAGMENT_SHADER_INTERLOCK:
> > +   case PIPE_CAP_CS_DERIVED_SYSTEM_VALUES_SUPPORTED:
> > +   case PIPE_CAP_FBFETCH_COHERENT:
> > +   case PIPE_CAP_TGSI_SKIP_SHRINK_IO_ARRAYS:
> >return 0;
> >
> > case PIPE_CAP_VENDOR_ID:
> > @@ -373,8 +380,14 @@ nvc0_screen_get_param(struct pipe_screen *pscreen, 
> > enum pipe_cap param)
> >return dev->vram_size >> 20;
> > case PIPE_CAP_UMA:
> >return 0;
> > +
> > default:
> >debug_printf("%s: unhandled cap %d\n", __func__, param);
> > +  /* fallthrough */
> > +   /* caps where we want the default value */
> > +   case PIPE_CAP_DMABUF:
> > +   case PIPE_CAP_ESSL_FEATURE_LEVEL:
> > +   case PIPE_CAP_MAX_FRAMES_IN_FLIGHT:
> >return u_pipe_screen_get_param_defaults(pscreen, param);
> > }
> >  }
> > --
> > 2.21.0
> >
> > ___
> > mesa-dev mailing list
> > mesa-dev@lists.freedesktop.org
> > https://lists.freedesktop.org/mailman/listinfo/mesa-dev
___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev

[Mesa-dev] [PATCH] nvc0: remove nvc0_program.tp.input_patch_size

2019-07-08 Thread Karol Herbst
right now that's dead code

Signed-off-by: Karol Herbst 
---
 src/gallium/drivers/nouveau/codegen/nv50_ir_driver.h | 1 -
 src/gallium/drivers/nouveau/nvc0/nvc0_program.c  | 4 
 src/gallium/drivers/nouveau/nvc0/nvc0_program.h  | 1 -
 3 files changed, 6 deletions(-)

diff --git a/src/gallium/drivers/nouveau/codegen/nv50_ir_driver.h 
b/src/gallium/drivers/nouveau/codegen/nv50_ir_driver.h
index 7c835ceab8d..95b3d633ee6 100644
--- a/src/gallium/drivers/nouveau/codegen/nv50_ir_driver.h
+++ b/src/gallium/drivers/nouveau/codegen/nv50_ir_driver.h
@@ -123,7 +123,6 @@ struct nv50_ir_prog_info
  bool usesDrawParameters;
   } vp;
   struct {
- uint8_t inputPatchSize;
  uint8_t outputPatchSize;
  uint8_t partitioning;/* PIPE_TESS_PART */
  int8_t winding;  /* +1 (clockwise) / -1 (counter-clockwise) */
diff --git a/src/gallium/drivers/nouveau/nvc0/nvc0_program.c 
b/src/gallium/drivers/nouveau/nvc0/nvc0_program.c
index 1ff9f19f139..180b31ea893 100644
--- a/src/gallium/drivers/nouveau/nvc0/nvc0_program.c
+++ b/src/gallium/drivers/nouveau/nvc0/nvc0_program.c
@@ -343,8 +343,6 @@ nvc0_tcp_gen_header(struct nvc0_program *tcp, struct 
nv50_ir_prog_info *info)
 {
unsigned opcs = 6; /* output patch constants (at least the TessFactors) */
 
-   tcp->tp.input_patch_size = info->prop.tp.inputPatchSize;
-
if (info->numPatchConstants)
   opcs = 8 + info->numPatchConstants * 4;
 
@@ -374,8 +372,6 @@ nvc0_tcp_gen_header(struct nvc0_program *tcp, struct 
nv50_ir_prog_info *info)
 static int
 nvc0_tep_gen_header(struct nvc0_program *tep, struct nv50_ir_prog_info *info)
 {
-   tep->tp.input_patch_size = ~0;
-
tep->hdr[0] = 0x20061 | (3 << 10);
tep->hdr[4] = 0xff000;
 
diff --git a/src/gallium/drivers/nouveau/nvc0/nvc0_program.h 
b/src/gallium/drivers/nouveau/nvc0/nvc0_program.h
index b73822ea9f7..183b14a42c2 100644
--- a/src/gallium/drivers/nouveau/nvc0/nvc0_program.h
+++ b/src/gallium/drivers/nouveau/nvc0/nvc0_program.h
@@ -54,7 +54,6 @@ struct nvc0_program {
} fp;
struct {
   uint32_t tess_mode; /* ~0 if defined by the other stage */
-  uint32_t input_patch_size;
} tp;
struct {
   uint32_t lmem_size; /* local memory (TGSI PRIVATE resource) size */
-- 
2.21.0

___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev

Re: [Mesa-dev] [PATCH] nv50/ir: Add mul and mod constant optimizations

2019-07-23 Thread Karol Herbst
On Tue, Jul 23, 2019 at 4:50 PM Ilia Mirkin  wrote:
>
> You handle 1/n but not 1%n? TBH, the 1/n code isn't 100% obvious to
> me... 1/n = |n|-1 > 0 ?  i forget how SLCT works, but I can't
> think of a way to finish that expression in terms of |n|-1 and n. And
> what about n == 0. I'd just as soon drop that case.
>

is 1/0 actually defined in glsl? I thought that the result is
undefined and we can basically do whatever, no? At least intel seems
to return INT_MAX for 1/0

> On Tue, Jul 23, 2019 at 10:20 AM Mark Menzynski  wrote:
> >
> > Optimizations for 0/n, 1/n and 0%n.
> > No changes in shader db tests, because it is never used here, but it
> > should become handy.
> >
> > Signed-off-by: Mark Menzynski 
> > ---
> >  .../nouveau/codegen/nv50_ir_peephole.cpp  | 30 +--
> >  1 file changed, 28 insertions(+), 2 deletions(-)
> >
> > diff --git a/src/gallium/drivers/nouveau/codegen/nv50_ir_peephole.cpp 
> > b/src/gallium/drivers/nouveau/codegen/nv50_ir_peephole.cpp
> > index 0b3220903b9..12069e19808 100644
> > --- a/src/gallium/drivers/nouveau/codegen/nv50_ir_peephole.cpp
> > +++ b/src/gallium/drivers/nouveau/codegen/nv50_ir_peephole.cpp
> > @@ -1177,10 +1177,28 @@ ConstantFolding::opnd(Instruction *i, 
> > ImmediateValue &imm0, int s)
> >break;
> >
> > case OP_DIV:
> > -  if (s != 1 || (i->dType != TYPE_S32 && i->dType != TYPE_U32))
> > +  if (i->dType != TYPE_S32 && i->dType != TYPE_U32)
> >   break;
> > +
> >bld.setPosition(i, false);
> > -  if (imm0.reg.data.u32 == 0) {
> > +  if (s == 0) {
> > + if (imm0.reg.data.u32 == 0) {
> > +i->op = OP_MOV;
> > +i->setSrc(1, NULL);
> > + }
> > + else if (imm0.reg.data.u32 == 1) {
> > +Value *tA, *tB;
> > +Instruction *slct;
> > +
> > +tA = bld.mkOp1v(OP_ABS, TYPE_U32, bld.getSSA(), i->getSrc(1));
> > +tB = bld.mkOp2v(OP_ADD, TYPE_S32, bld.getSSA(), tA, 
> > bld.loadImm(NULL, -1));
> > +slct = bld.mkCmp(OP_SLCT, CC_GT, i->dType, bld.getSSA(), 
> > TYPE_U32, bld.loadImm(NULL, 0), i->getSrc(1), tB);
> > +i->def(0).replace(slct->getDef(0), false);
> > + }
> > + break;
> > +  }
> > +
> > +  if (s != 1 || imm0.reg.data.u32 == 0) {
> >   break;
> >} else
> >if (imm0.reg.data.u32 == 1) {
> > @@ -1259,6 +1277,14 @@ ConstantFolding::opnd(Instruction *i, ImmediateValue 
> > &imm0, int s)
> >break;
> >
> > case OP_MOD:
> > +  if (s == 0) {
> > + if (imm0.reg.data.u32 == 0) {
> > +i->op = OP_MOV;
> > +i->setSrc(1, NULL);
> > + }
> > + break;
> > +  }
> > +
> >if (s == 1 && imm0.isPow2()) {
> >   bld.setPosition(i, false);
> >   if (i->sType == TYPE_U32) {
> > --
> > 2.21.0
> >
> > ___
> > mesa-dev mailing list
> > mesa-dev@lists.freedesktop.org
> > https://lists.freedesktop.org/mailman/listinfo/mesa-dev
> ___
> mesa-dev mailing list
> mesa-dev@lists.freedesktop.org
> https://lists.freedesktop.org/mailman/listinfo/mesa-dev
___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev

Re: [Mesa-dev] [PATCH] nv50/ir: Add mul and mod constant optimizations

2019-07-23 Thread Karol Herbst
yeah.. I am not quite sure myself about it. But skipping on the div
emulation seems like a good idea in general. But it's also not common
enough to actually care all that much about it.

On Tue, Jul 23, 2019 at 5:18 PM Ilia Mirkin  wrote:
>
> On Tue, Jul 23, 2019 at 11:15 AM Karol Herbst  wrote:
> >
> > On Tue, Jul 23, 2019 at 4:50 PM Ilia Mirkin  wrote:
> > >
> > > You handle 1/n but not 1%n? TBH, the 1/n code isn't 100% obvious to
> > > me... 1/n = |n|-1 > 0 ?  i forget how SLCT works, but I can't
> > > think of a way to finish that expression in terms of |n|-1 and n. And
> > > what about n == 0. I'd just as soon drop that case.
> > >
> >
> > is 1/0 actually defined in glsl? I thought that the result is
> > undefined and we can basically do whatever, no? At least intel seems
> > to return INT_MAX for 1/0
>
> If you guys really like it, just add more comments that cover my questions.
___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev

Re: [Mesa-dev] [PATCH] nvc0/ir: Fix assert accessing null pointer

2019-07-24 Thread Karol Herbst
it's only fixing a crash in a build with asserts enabled, but if
somebody wants to apply those to stable, then go ahead.

On Wed, Jul 24, 2019 at 12:48 PM Juan A. Suarez Romero
 wrote:
>
> On Fri, 2019-07-19 at 13:56 +0200, Mark Menzynski wrote:
> > Fixes: https://bugs.freedesktop.org/show_bug.cgi?id=111007
> > Fixes: https://bugs.freedesktop.org/show_bug.cgi?id=67
> > Signed-off-by: Mark Menzynski 
> > ---
> >  src/gallium/drivers/nouveau/codegen/nv50_ir_lowering_nvc0.cpp | 2 +-
> >  1 file changed, 1 insertion(+), 1 deletion(-)
>
>
> Looks like a good candidate for 19.1 stable. WDYT?
>
> J.A.
>
> >
> > diff --git a/src/gallium/drivers/nouveau/codegen/nv50_ir_lowering_nvc0.cpp 
> > b/src/gallium/drivers/nouveau/codegen/nv50_ir_lowering_nvc0.cpp
> > index aca3b0afb1e..1f702a987d8 100644
> > --- a/src/gallium/drivers/nouveau/codegen/nv50_ir_lowering_nvc0.cpp
> > +++ b/src/gallium/drivers/nouveau/codegen/nv50_ir_lowering_nvc0.cpp
> > @@ -51,12 +51,12 @@ NVC0LegalizeSSA::handleDIV(Instruction *i)
> > // Generate movs to the input regs for the call we want to generate
> > for (int s = 0; i->srcExists(s); ++s) {
> >Instruction *ld = i->getSrc(s)->getInsn();
> > -  assert(ld->getSrc(0) != NULL);
> >// check if we are moving an immediate, propagate it in that case
> >if (!ld || ld->fixed || (ld->op != OP_LOAD && ld->op != OP_MOV) ||
> >  !(ld->src(0).getFile() == FILE_IMMEDIATE))
> >   bld.mkMovToReg(s, i->getSrc(s));
> >else {
> > + assert(ld->getSrc(0) != NULL);
> >   bld.mkMovToReg(s, ld->getSrc(0));
> >   // Clear the src, to make code elimination possible here before we
> >   // delete the instruction i later
>
> ___
> mesa-dev mailing list
> mesa-dev@lists.freedesktop.org
> https://lists.freedesktop.org/mailman/listinfo/mesa-dev
___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev

Re: [Mesa-dev] [PATCH 2/4] nv50, nvc0: update sampler/view bind functions to accept NULL array

2019-07-26 Thread Karol Herbst
Reviewed-by: Karol Herbst 

On Fri, Jul 26, 2019 at 5:31 AM Ilia Mirkin  wrote:
>
> Apparently vl (or vdpau) wants to pass that in now. Handle it.
>
> Bugzilla: https://bugs.freedesktop.org/show_bug.cgi?id=111213
> Signed-off-by: Ilia Mirkin 
> Cc: mesa-sta...@lists.freedesktop.org
> ---
>  src/gallium/drivers/nouveau/nv50/nv50_state.c | 14 --
>  src/gallium/drivers/nouveau/nvc0/nvc0_state.c | 18 ++
>  2 files changed, 18 insertions(+), 14 deletions(-)
>
> diff --git a/src/gallium/drivers/nouveau/nv50/nv50_state.c 
> b/src/gallium/drivers/nouveau/nv50/nv50_state.c
> index 8b294be6d86..a4163aa1713 100644
> --- a/src/gallium/drivers/nouveau/nv50/nv50_state.c
> +++ b/src/gallium/drivers/nouveau/nv50/nv50_state.c
> @@ -599,19 +599,20 @@ nv50_sampler_state_delete(struct pipe_context *pipe, 
> void *hwcso)
>
>  static inline void
>  nv50_stage_sampler_states_bind(struct nv50_context *nv50, int s,
> -   unsigned nr, void **hwcso)
> +   unsigned nr, void **hwcsos)
>  {
> unsigned highest_found = 0;
> unsigned i;
>
> assert(nr <= PIPE_MAX_SAMPLERS);
> for (i = 0; i < nr; ++i) {
> +  struct nv50_tsc_entry *hwcso = hwcsos ? nv50_tsc_entry(hwcsos[i]) : 
> NULL;
>struct nv50_tsc_entry *old = nv50->samplers[s][i];
>
> -  if (hwcso[i])
> +  if (hwcso)
>   highest_found = i;
>
> -  nv50->samplers[s][i] = nv50_tsc_entry(hwcso[i]);
> +  nv50->samplers[s][i] = hwcso;
>if (old)
>   nv50_screen_tsc_unlock(nv50->screen, old);
> }
> @@ -685,12 +686,13 @@ nv50_stage_set_sampler_views(struct nv50_context *nv50, 
> int s,
>
> assert(nr <= PIPE_MAX_SAMPLERS);
> for (i = 0; i < nr; ++i) {
> +  struct pipe_sampler_view *view = views ? views[i] : NULL;
>struct nv50_tic_entry *old = nv50_tic_entry(nv50->textures[s][i]);
>if (old)
>   nv50_screen_tic_unlock(nv50->screen, old);
>
> -  if (views[i] && views[i]->texture) {
> - struct pipe_resource *res = views[i]->texture;
> +  if (view && view->texture) {
> + struct pipe_resource *res = view->texture;
>   if (res->target == PIPE_BUFFER &&
>   (res->flags & PIPE_RESOURCE_FLAG_MAP_COHERENT))
>  nv50->textures_coherent[s] |= 1 << i;
> @@ -700,7 +702,7 @@ nv50_stage_set_sampler_views(struct nv50_context *nv50, 
> int s,
>   nv50->textures_coherent[s] &= ~(1 << i);
>}
>
> -  pipe_sampler_view_reference(&nv50->textures[s][i], views[i]);
> +  pipe_sampler_view_reference(&nv50->textures[s][i], view);
> }
>
> assert(nv50->num_textures[s] <= PIPE_MAX_SAMPLERS);
> diff --git a/src/gallium/drivers/nouveau/nvc0/nvc0_state.c 
> b/src/gallium/drivers/nouveau/nvc0/nvc0_state.c
> index a9ee7b784bd..60dcbe3ec39 100644
> --- a/src/gallium/drivers/nouveau/nvc0/nvc0_state.c
> +++ b/src/gallium/drivers/nouveau/nvc0/nvc0_state.c
> @@ -463,22 +463,23 @@ nvc0_sampler_state_delete(struct pipe_context *pipe, 
> void *hwcso)
>  static inline void
>  nvc0_stage_sampler_states_bind(struct nvc0_context *nvc0,
> unsigned s,
> -   unsigned nr, void **hwcso)
> +   unsigned nr, void **hwcsos)
>  {
> unsigned highest_found = 0;
> unsigned i;
>
> for (i = 0; i < nr; ++i) {
> +  struct nv50_tsc_entry *hwcso = hwcsos ? nv50_tsc_entry(hwcsos[i]) : 
> NULL;
>struct nv50_tsc_entry *old = nvc0->samplers[s][i];
>
> -  if (hwcso[i])
> +  if (hwcso)
>   highest_found = i;
>
> -  if (hwcso[i] == old)
> +  if (hwcso == old)
>   continue;
>nvc0->samplers_dirty[s] |= 1 << i;
>
> -  nvc0->samplers[s][i] = nv50_tsc_entry(hwcso[i]);
> +  nvc0->samplers[s][i] = hwcso;
>if (old)
>   nvc0_screen_tsc_unlock(nvc0->screen, old);
> }
> @@ -523,14 +524,15 @@ nvc0_stage_set_sampler_views(struct nvc0_context *nvc0, 
> int s,
> unsigned i;
>
> for (i = 0; i < nr; ++i) {
> +  struct pipe_sampler_view *view = views ? views[i] : NULL;
>struct nv50_tic_entry *old = nv50_tic_entry(nvc0->textures[s][i]);
>
> -  if (views[i] == nvc0->textures[s][i])
> +  if (view == nvc0->textures[s][i])
>   continue;
>nvc0->textures_dirty[s] |= 1 << i;
>
> -  if (views[i] && views[i]->texture) {
> - struct pipe_resource *res = views[i]->

Re: [Mesa-dev] [PATCH 3/4] nvc0: allow a non-user buffer to be bound at position 0

2019-07-26 Thread Karol Herbst
Reviewed-by: Karol Herbst 

On Fri, Jul 26, 2019 at 5:31 AM Ilia Mirkin  wrote:
>
> Previously the code only handled it for positions 1 and up (as would be
> for UBO's in GL). It's not a lot of trouble to handle this, and vl or
> vdpau want this.
>
> Bugzilla: https://bugs.freedesktop.org/show_bug.cgi?id=111213
> Signed-off-by: Ilia Mirkin 
> Cc: mesa-sta...@lists.freedesktop.org
> ---
>  .../drivers/nouveau/nvc0/nve4_compute.c   | 45 +++
>  1 file changed, 27 insertions(+), 18 deletions(-)
>
> diff --git a/src/gallium/drivers/nouveau/nvc0/nve4_compute.c 
> b/src/gallium/drivers/nouveau/nvc0/nve4_compute.c
> index c5e4dec20bd..a1c40d1e6b9 100644
> --- a/src/gallium/drivers/nouveau/nvc0/nve4_compute.c
> +++ b/src/gallium/drivers/nouveau/nvc0/nve4_compute.c
> @@ -393,23 +393,24 @@ nve4_compute_validate_constbufs(struct nvc0_context 
> *nvc0)
>  uint64_t address
> = nvc0->screen->uniform_bo->offset + NVC0_CB_AUX_INFO(s);
>
> -assert(i > 0); /* we really only want uniform buffer objects */
> -
> -BEGIN_NVC0(push, NVE4_CP(UPLOAD_DST_ADDRESS_HIGH), 2);
> -PUSH_DATAh(push, address + NVC0_CB_AUX_UBO_INFO(i - 1));
> -PUSH_DATA (push, address + NVC0_CB_AUX_UBO_INFO(i - 1));
> -BEGIN_NVC0(push, NVE4_CP(UPLOAD_LINE_LENGTH_IN), 2);
> -PUSH_DATA (push, 4 * 4);
> -PUSH_DATA (push, 0x1);
> -BEGIN_1IC0(push, NVE4_CP(UPLOAD_EXEC), 1 + 4);
> -PUSH_DATA (push, NVE4_COMPUTE_UPLOAD_EXEC_LINEAR | (0x20 << 1));
> -
> -PUSH_DATA (push, res->address + nvc0->constbuf[s][i].offset);
> -PUSH_DATAh(push, res->address + nvc0->constbuf[s][i].offset);
> -PUSH_DATA (push, nvc0->constbuf[5][i].size);
> -PUSH_DATA (push, 0);
> -BCTX_REFN(nvc0->bufctx_cp, CP_CB(i), res, RD);
> +/* constbufs above 0 will are fetched via ubo info in the shader 
> */
> +if (i > 0) {
> +   BEGIN_NVC0(push, NVE4_CP(UPLOAD_DST_ADDRESS_HIGH), 2);
> +   PUSH_DATAh(push, address + NVC0_CB_AUX_UBO_INFO(i - 1));
> +   PUSH_DATA (push, address + NVC0_CB_AUX_UBO_INFO(i - 1));
> +   BEGIN_NVC0(push, NVE4_CP(UPLOAD_LINE_LENGTH_IN), 2);
> +   PUSH_DATA (push, 4 * 4);
> +   PUSH_DATA (push, 0x1);
> +   BEGIN_1IC0(push, NVE4_CP(UPLOAD_EXEC), 1 + 4);
> +   PUSH_DATA (push, NVE4_COMPUTE_UPLOAD_EXEC_LINEAR | (0x20 << 
> 1));
> +
> +   PUSH_DATA (push, res->address + nvc0->constbuf[s][i].offset);
> +   PUSH_DATAh(push, res->address + nvc0->constbuf[s][i].offset);
> +   PUSH_DATA (push, nvc0->constbuf[s][i].size);
> +   PUSH_DATA (push, 0);
> +}
>
> +BCTX_REFN(nvc0->bufctx_cp, CP_CB(i), res, RD);
>  res->cb_bindings[s] |= 1 << i;
>   }
>}
> @@ -554,9 +555,9 @@ nve4_compute_derive_cache_split(struct nvc0_context 
> *nvc0, uint32_t shared_size)
>  static void
>  nve4_compute_setup_buf_cb(struct nvc0_context *nvc0, bool gp100, void *desc)
>  {
> -   // only user constant buffers 1-6 can be put in the descriptor, the rest 
> are
> +   // only user constant buffers 0-6 can be put in the descriptor, the rest 
> are
> // loaded through global memory
> -   for (int i = 1; i <= 6; i++) {
> +   for (int i = 0; i <= 6; i++) {
>if (nvc0->constbuf[5][i].user || !nvc0->constbuf[5][i].u.buf)
>   continue;
>
> @@ -609,6 +610,10 @@ nve4_compute_setup_launch_desc(struct nvc0_context *nvc0,
> if (nvc0->constbuf[5][0].user || cp->parm_size) {
>nve4_cp_launch_desc_set_cb(desc, 0, screen->uniform_bo,
>   NVC0_CB_USR_INFO(5), 1 << 16);
> +
> +  // Later logic will attempt to bind a real buffer at position 0. That
> +  // should not happen if we've bound a user buffer.
> +  assert(!nvc0->constbuf[5][0].u.buf);
> }
> nve4_cp_launch_desc_set_cb(desc, 7, screen->uniform_bo,
>NVC0_CB_AUX_INFO(5), 1 << 11);
> @@ -649,6 +654,10 @@ gp100_compute_setup_launch_desc(struct nvc0_context 
> *nvc0,
> if (nvc0->constbuf[5][0].user || cp->parm_size) {
>gp100_cp_launch_desc_set_cb(desc, 0, screen->uniform_bo,
>NVC0_CB_USR_INFO(5), 1 << 16);
> +
> +  // Later logic will attempt to bind a real buffer at position 0. That
> +  // should not happen if we've bound a user buffer.
> +

Re: [Mesa-dev] [PATCH 4/4] nouveau: flip DEBUG -> !NDEBUG

2019-07-26 Thread Karol Herbst
Reviewed-by: Karol Herbst 

On Fri, Jul 26, 2019 at 5:31 AM Ilia Mirkin  wrote:
>
> The meson conversion chose to change the meaning of DEBUG to "used for
> debugging" to be "used for expensive things for debugging", primarily
> for nir_validate. Flip things over so that we get nice things with
> optimizations enabled.
>
> While we're at it, also kill off nouveau_statebuf.h which is unused (and
> has a mention of DEBUG which is how I found it).
>
> Signed-off-by: Ilia Mirkin 
> ---
>  src/gallium/drivers/nouveau/Makefile.sources  |  1 -
>  .../drivers/nouveau/codegen/nv50_ir_driver.h  |  2 +-
>  .../drivers/nouveau/codegen/nv50_ir_inlines.h |  2 +-
>  .../drivers/nouveau/codegen/nv50_ir_util.h|  8 ++---
>  src/gallium/drivers/nouveau/meson.build   |  1 -
>  src/gallium/drivers/nouveau/nouveau_screen.h  |  2 +-
>  .../drivers/nouveau/nouveau_statebuf.h| 32 ---
>  .../drivers/nouveau/nv50/nv50_program.c   |  2 +-
>  .../drivers/nouveau/nvc0/nvc0_program.c   |  8 ++---
>  .../drivers/nouveau/nvc0/nve4_compute.c   |  6 ++--
>  10 files changed, 15 insertions(+), 49 deletions(-)
>  delete mode 100644 src/gallium/drivers/nouveau/nouveau_statebuf.h
>
> diff --git a/src/gallium/drivers/nouveau/Makefile.sources 
> b/src/gallium/drivers/nouveau/Makefile.sources
> index c6a1aff7110..6c360992a53 100644
> --- a/src/gallium/drivers/nouveau/Makefile.sources
> +++ b/src/gallium/drivers/nouveau/Makefile.sources
> @@ -12,7 +12,6 @@ C_SOURCES := \
> nouveau_mm.h \
> nouveau_screen.c \
> nouveau_screen.h \
> -   nouveau_statebuf.h \
> nouveau_video.c \
> nouveau_video.h \
> nouveau_vp3_video_bsp.c \
> diff --git a/src/gallium/drivers/nouveau/codegen/nv50_ir_driver.h 
> b/src/gallium/drivers/nouveau/codegen/nv50_ir_driver.h
> index 95b3d633ee6..322bdd02557 100644
> --- a/src/gallium/drivers/nouveau/codegen/nv50_ir_driver.h
> +++ b/src/gallium/drivers/nouveau/codegen/nv50_ir_driver.h
> @@ -54,7 +54,7 @@ struct nv50_ir_varying
> ubyte si; /* TGSI semantic index */
>  };
>
> -#ifdef DEBUG
> +#ifndef NDEBUG
>  # define NV50_IR_DEBUG_BASIC (1 << 0)
>  # define NV50_IR_DEBUG_VERBOSE   (2 << 0)
>  # define NV50_IR_DEBUG_REG_ALLOC (1 << 2)
> diff --git a/src/gallium/drivers/nouveau/codegen/nv50_ir_inlines.h 
> b/src/gallium/drivers/nouveau/codegen/nv50_ir_inlines.h
> index 4cb53ab42ed..b4ca5ed8215 100644
> --- a/src/gallium/drivers/nouveau/codegen/nv50_ir_inlines.h
> +++ b/src/gallium/drivers/nouveau/codegen/nv50_ir_inlines.h
> @@ -222,7 +222,7 @@ Instruction *Value::getUniqueInsn() const
>  return (*it)->getInsn();
>// should be unreachable and trigger assertion at the end
> }
> -#ifdef DEBUG
> +#ifndef NDEBUG
> if (reg.data.id < 0) {
>int n = 0;
>for (DefCIterator it = defs.begin(); n < 2 && it != defs.end(); ++it)
> diff --git a/src/gallium/drivers/nouveau/codegen/nv50_ir_util.h 
> b/src/gallium/drivers/nouveau/codegen/nv50_ir_util.h
> index affe04a2dd9..307c23d5e03 100644
> --- a/src/gallium/drivers/nouveau/codegen/nv50_ir_util.h
> +++ b/src/gallium/drivers/nouveau/codegen/nv50_ir_util.h
> @@ -36,14 +36,14 @@
>  #include "util/u_inlines.h"
>  #include "util/u_memory.h"
>
> -#define ERROR(args...) debug_printf("ERROR: " args)
> -#define WARN(args...) debug_printf("WARNING: " args)
> -#define INFO(args...) debug_printf(args)
> +#define ERROR(args...) _debug_printf("ERROR: " args)
> +#define WARN(args...) _debug_printf("WARNING: " args)
> +#define INFO(args...) _debug_printf(args)
>
>  #define INFO_DBG(m, f, args...)  \
> do {  \
>if (m & NV50_IR_DEBUG_##f) \
> - debug_printf(args); \
> + _debug_printf(args); \
> } while(0)
>
>  #define FATAL(args...)  \
> diff --git a/src/gallium/drivers/nouveau/meson.build 
> b/src/gallium/drivers/nouveau/meson.build
> index 64138212b5b..b3e79bf7089 100644
> --- a/src/gallium/drivers/nouveau/meson.build
> +++ b/src/gallium/drivers/nouveau/meson.build
> @@ -32,7 +32,6 @@ files_libnouveau = files(
>'nouveau_mm.h',
>'nouveau_screen.c',
>'nouveau_screen.h',
> -  'nouveau_statebuf.h',
>'nouveau_video.c',
>'nouveau_video.h',
>'nouveau_vp3_video_bsp.c',
> diff --git a/src/gallium/drivers/nouveau/nouveau_screen.h 
> b/src/gallium/drivers/nouveau/nouveau_screen.h
> index 1302c608bec..450c7c466be 100644
> --- a/src/gallium/

Re: [Mesa-dev] [PATCH] nv50/ir: handle insn not being there for definition of CVT arg

2019-07-26 Thread Karol Herbst
Reviewed-by: Karol Herbst 

On Fri, Jul 26, 2019 at 7:03 AM Ilia Mirkin  wrote:
>
> This can happen if it's e.g. a uniform or a function argument.
>
> Bugzilla: https://bugs.freedesktop.org/show_bug.cgi?id=111217
> Signed-off-by: Ilia Mirkin 
> Cc: mesa-sta...@lists.freedesktop.org
> ---
>  src/gallium/drivers/nouveau/codegen/nv50_ir_peephole.cpp | 5 +++--
>  1 file changed, 3 insertions(+), 2 deletions(-)
>
> diff --git a/src/gallium/drivers/nouveau/codegen/nv50_ir_peephole.cpp 
> b/src/gallium/drivers/nouveau/codegen/nv50_ir_peephole.cpp
> index 0b3220903b9..bfdb923379b 100644
> --- a/src/gallium/drivers/nouveau/codegen/nv50_ir_peephole.cpp
> +++ b/src/gallium/drivers/nouveau/codegen/nv50_ir_peephole.cpp
> @@ -2080,14 +2080,15 @@ void
>  AlgebraicOpt::handleCVT_CVT(Instruction *cvt)
>  {
> Instruction *insn = cvt->getSrc(0)->getInsn();
> -   RoundMode rnd = insn->rnd;
>
> -   if (insn->saturate ||
> +   if (!insn ||
> +   insn->saturate ||
> insn->subOp ||
> insn->dType != insn->sType ||
> insn->dType != cvt->sType)
>return;
>
> +   RoundMode rnd = insn->rnd;
> switch (insn->op) {
> case OP_CEIL:
>rnd = ROUND_PI;
> --
> 2.21.0
>
> ___
> mesa-dev mailing list
> mesa-dev@lists.freedesktop.org
> https://lists.freedesktop.org/mailman/listinfo/mesa-dev
___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev

Re: [Mesa-dev] [PATCH] nv50/ir: don't consider the main compute function as taking arguments

2019-07-26 Thread Karol Herbst
I think this was there for generic support for functions actually and
that for OpenCL + TGSI the idea was to not inline everything by
default, so return values were handled there as well.

The proper way to handle is, to declare kernel inputs as real inputs,
because kernel inputs are fundamentally different from function
arguments and trying to handle them exactly the same will just result
in pain and fun issues like the VDPAU/VA one.

The correct way to handle that on the TGSI side is to never generate a
MAIN functions out of the actual source, then add a "main" function
which reads the shader IN variables and passes them as arguments to
the entry point called (which should be a named function inside the
TGSI). This way the now new main function has no parameters and no
return value, the world becomes sane and everybody is happy.

That's also how I implemented that for the OpenCL nir path and that
works out quite nicely as now you can just call different entry points
without having to deal with this "if this function is the entry point,
args get passed differently than being a called function" situation.

Anyway, the for the patch itself:
Reviewed-by: Karol Herbst 

On Fri, Jul 26, 2019 at 7:20 AM Ilia Mirkin  wrote:
>
> With OpenCL, kernels can take arguments and return values (?). However
> in practice, there is no more TGSI compute implementation, and even if
> there were, it would probably have named functions and no explicit main.
>
> This improves RA considerably for compute shaders, since temps are not
> kept around as return values.
>
> Signed-off-by: Ilia Mirkin 
> ---
>  src/gallium/drivers/nouveau/codegen/nv50_ir_from_tgsi.cpp | 2 +-
>  1 file changed, 1 insertion(+), 1 deletion(-)
>
> diff --git a/src/gallium/drivers/nouveau/codegen/nv50_ir_from_tgsi.cpp 
> b/src/gallium/drivers/nouveau/codegen/nv50_ir_from_tgsi.cpp
> index 9d0ab336c75..2dd13e70d0e 100644
> --- a/src/gallium/drivers/nouveau/codegen/nv50_ir_from_tgsi.cpp
> +++ b/src/gallium/drivers/nouveau/codegen/nv50_ir_from_tgsi.cpp
> @@ -4298,7 +4298,7 @@ Converter::BindArgumentsPass::visit(Function *f)
>}
> }
>
> -   if (func == prog->main && prog->getType() != Program::TYPE_COMPUTE)
> +   if (func == prog->main /* && prog->getType() != Program::TYPE_COMPUTE */)
>return true;
> updatePrototype(&BasicBlock::get(f->cfg.getRoot())->liveSet,
> &Function::buildLiveSets, &Function::ins);
> --
> 2.21.0
>
> ___
> mesa-dev mailing list
> mesa-dev@lists.freedesktop.org
> https://lists.freedesktop.org/mailman/listinfo/mesa-dev
___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev

Re: [Mesa-dev] [PATCH 3/4] nvc0: allow a non-user buffer to be bound at position 0

2019-07-26 Thread Karol Herbst
On Fri, Jul 26, 2019 at 2:59 PM Ilia Mirkin  wrote:
>
> Thanks! I had to make a small update to the asserts:
>
> assert(nvc0->constbuf[5][0].user || !nvc0->constbuf[5][0].u.buf);
>
> u.buf is not valid to check when .user is set. (in fact it aliases
> with the "data" pointer)
>
> Let me know if you want me to resend.
>

no, that's fine..

> On Fri, Jul 26, 2019 at 5:51 AM Karol Herbst  wrote:
> >
> > Reviewed-by: Karol Herbst 
> >
> > On Fri, Jul 26, 2019 at 5:31 AM Ilia Mirkin  wrote:
> > >
> > > Previously the code only handled it for positions 1 and up (as would be
> > > for UBO's in GL). It's not a lot of trouble to handle this, and vl or
> > > vdpau want this.
> > >
> > > Bugzilla: https://bugs.freedesktop.org/show_bug.cgi?id=111213
> > > Signed-off-by: Ilia Mirkin 
> > > Cc: mesa-sta...@lists.freedesktop.org
> > > ---
> > >  .../drivers/nouveau/nvc0/nve4_compute.c   | 45 +++
> > >  1 file changed, 27 insertions(+), 18 deletions(-)
> > >
> > > diff --git a/src/gallium/drivers/nouveau/nvc0/nve4_compute.c 
> > > b/src/gallium/drivers/nouveau/nvc0/nve4_compute.c
> > > index c5e4dec20bd..a1c40d1e6b9 100644
> > > --- a/src/gallium/drivers/nouveau/nvc0/nve4_compute.c
> > > +++ b/src/gallium/drivers/nouveau/nvc0/nve4_compute.c
> > > @@ -393,23 +393,24 @@ nve4_compute_validate_constbufs(struct nvc0_context 
> > > *nvc0)
> > >  uint64_t address
> > > = nvc0->screen->uniform_bo->offset + NVC0_CB_AUX_INFO(s);
> > >
> > > -assert(i > 0); /* we really only want uniform buffer objects 
> > > */
> > > -
> > > -BEGIN_NVC0(push, NVE4_CP(UPLOAD_DST_ADDRESS_HIGH), 2);
> > > -PUSH_DATAh(push, address + NVC0_CB_AUX_UBO_INFO(i - 1));
> > > -PUSH_DATA (push, address + NVC0_CB_AUX_UBO_INFO(i - 1));
> > > -BEGIN_NVC0(push, NVE4_CP(UPLOAD_LINE_LENGTH_IN), 2);
> > > -PUSH_DATA (push, 4 * 4);
> > > -PUSH_DATA (push, 0x1);
> > > -BEGIN_1IC0(push, NVE4_CP(UPLOAD_EXEC), 1 + 4);
> > > -PUSH_DATA (push, NVE4_COMPUTE_UPLOAD_EXEC_LINEAR | (0x20 << 
> > > 1));
> > > -
> > > -PUSH_DATA (push, res->address + nvc0->constbuf[s][i].offset);
> > > -PUSH_DATAh(push, res->address + nvc0->constbuf[s][i].offset);
> > > -PUSH_DATA (push, nvc0->constbuf[5][i].size);
> > > -PUSH_DATA (push, 0);
> > > -BCTX_REFN(nvc0->bufctx_cp, CP_CB(i), res, RD);
> > > +/* constbufs above 0 will are fetched via ubo info in the 
> > > shader */
> > > +if (i > 0) {
> > > +   BEGIN_NVC0(push, NVE4_CP(UPLOAD_DST_ADDRESS_HIGH), 2);
> > > +   PUSH_DATAh(push, address + NVC0_CB_AUX_UBO_INFO(i - 1));
> > > +   PUSH_DATA (push, address + NVC0_CB_AUX_UBO_INFO(i - 1));
> > > +   BEGIN_NVC0(push, NVE4_CP(UPLOAD_LINE_LENGTH_IN), 2);
> > > +   PUSH_DATA (push, 4 * 4);
> > > +   PUSH_DATA (push, 0x1);
> > > +   BEGIN_1IC0(push, NVE4_CP(UPLOAD_EXEC), 1 + 4);
> > > +   PUSH_DATA (push, NVE4_COMPUTE_UPLOAD_EXEC_LINEAR | (0x20 
> > > << 1));
> > > +
> > > +   PUSH_DATA (push, res->address + 
> > > nvc0->constbuf[s][i].offset);
> > > +   PUSH_DATAh(push, res->address + 
> > > nvc0->constbuf[s][i].offset);
> > > +   PUSH_DATA (push, nvc0->constbuf[s][i].size);
> > > +   PUSH_DATA (push, 0);
> > > +}
> > >
> > > +BCTX_REFN(nvc0->bufctx_cp, CP_CB(i), res, RD);
> > >  res->cb_bindings[s] |= 1 << i;
> > >   }
> > >}
> > > @@ -554,9 +555,9 @@ nve4_compute_derive_cache_split(struct nvc0_context 
> > > *nvc0, uint32_t shared_size)
> > >  static void
> > >  nve4_compute_setup_buf_cb(struct nvc0_context *nvc0, bool gp100, void 
> > > *desc)
> > >  {
> > > -   // only user constant buffers 1-6 can be put in the descriptor, the 
> > > rest are
> > > +   // only user constant buffers 0-6 can be put in the descriptor, the 
> > > rest are
> > > // loaded through global memory
> > > -   for (int i = 1; i <= 6; i++) {
> &g

[Mesa-dev] [PATCH 1/4] nv50ir: fix memset on non trivial types warning

2019-09-20 Thread Karol Herbst
Signed-off-by: Karol Herbst 
---
 src/gallium/drivers/nouveau/codegen/nv50_ir.cpp| 4 +---
 src/gallium/drivers/nouveau/codegen/nv50_ir.h  | 2 +-
 src/gallium/drivers/nouveau/codegen/nv50_ir_target.cpp | 2 +-
 3 files changed, 3 insertions(+), 5 deletions(-)

diff --git a/src/gallium/drivers/nouveau/codegen/nv50_ir.cpp 
b/src/gallium/drivers/nouveau/codegen/nv50_ir.cpp
index a181a13a3b1..45ee95bb103 100644
--- a/src/gallium/drivers/nouveau/codegen/nv50_ir.cpp
+++ b/src/gallium/drivers/nouveau/codegen/nv50_ir.cpp
@@ -903,10 +903,8 @@ Instruction::isCommutationLegal(const Instruction *i) const
 }
 
 TexInstruction::TexInstruction(Function *fn, operation op)
-   : Instruction(fn, op, TYPE_F32)
+   : Instruction(fn, op, TYPE_F32), tex()
 {
-   memset(&tex, 0, sizeof(tex));
-
tex.rIndirectSrc = -1;
tex.sIndirectSrc = -1;
 
diff --git a/src/gallium/drivers/nouveau/codegen/nv50_ir.h 
b/src/gallium/drivers/nouveau/codegen/nv50_ir.h
index b19751ab372..5163e1a7ec2 100644
--- a/src/gallium/drivers/nouveau/codegen/nv50_ir.h
+++ b/src/gallium/drivers/nouveau/codegen/nv50_ir.h
@@ -957,7 +957,7 @@ public:
class Target
{
public:
-  Target(TexTarget targ = TEX_TARGET_2D) : target(targ) { }
+  Target(TexTarget targ = TEX_TARGET_1D) : target(targ) { }
 
   const char *getName() const { return descTable[target].name; }
   unsigned int getArgCount() const { return descTable[target].argc; }
diff --git a/src/gallium/drivers/nouveau/codegen/nv50_ir_target.cpp 
b/src/gallium/drivers/nouveau/codegen/nv50_ir_target.cpp
index 5c6d0570ae2..609e7b89290 100644
--- a/src/gallium/drivers/nouveau/codegen/nv50_ir_target.cpp
+++ b/src/gallium/drivers/nouveau/codegen/nv50_ir_target.cpp
@@ -455,7 +455,7 @@ CodeEmitter::addInterp(int ipa, int reg, FixupApply apply)
   if (!fixupInfo)
  return false;
   if (n == 0)
- memset(fixupInfo, 0, sizeof(FixupInfo));
+ fixupInfo->count = 0;
}
++fixupInfo->count;
 
-- 
2.21.0

___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev

[Mesa-dev] [PATCH 2/4] nv50ir: fix unnecessary parentheses warning

2019-09-20 Thread Karol Herbst
Signed-off-by: Karol Herbst 
---
 src/gallium/drivers/nouveau/codegen/nv50_ir_util.h | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/src/gallium/drivers/nouveau/codegen/nv50_ir_util.h 
b/src/gallium/drivers/nouveau/codegen/nv50_ir_util.h
index 307c23d5e03..b1766f48205 100644
--- a/src/gallium/drivers/nouveau/codegen/nv50_ir_util.h
+++ b/src/gallium/drivers/nouveau/codegen/nv50_ir_util.h
@@ -145,7 +145,7 @@ public:
 #define DLLIST_EMPTY(__list) ((__list)->next == (__list))
 
 #define DLLIST_FOR_EACH(list, it) \
-   for (DLList::Iterator (it) = (list)->iterator(); !(it).end(); (it).next())
+   for (DLList::Iterator it = (list)->iterator(); !(it).end(); (it).next())
 
 class DLList
 {
-- 
2.21.0

___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev

[Mesa-dev] [PATCH 3/4] nv50ir/nir: comparison of integer expressions of different signedness warning

2019-09-20 Thread Karol Herbst
Signed-off-by: Karol Herbst 
---
 src/gallium/drivers/nouveau/codegen/nv50_ir_from_nir.cpp | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/src/gallium/drivers/nouveau/codegen/nv50_ir_from_nir.cpp 
b/src/gallium/drivers/nouveau/codegen/nv50_ir_from_nir.cpp
index 4e86ab8f8cc..95b60d2c7d0 100644
--- a/src/gallium/drivers/nouveau/codegen/nv50_ir_from_nir.cpp
+++ b/src/gallium/drivers/nouveau/codegen/nv50_ir_from_nir.cpp
@@ -1957,7 +1957,7 @@ Converter::visit(nir_intrinsic_instr *insn)
  }
  case Program::TYPE_GEOMETRY:
  case Program::TYPE_VERTEX: {
-if (info->io.genUserClip > 0 && idx == clipVertexOutput) {
+if (info->io.genUserClip > 0 && idx == (uint32_t)clipVertexOutput) 
{
mkMov(clipVtx[i], src);
src = clipVtx[i];
 }
-- 
2.21.0

___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev

[Mesa-dev] [PATCH 4/4] nv50, nvc0: fix must_check warning of util_dynarray_resize_bytes

2019-09-20 Thread Karol Herbst
Signed-off-by: Karol Herbst 
---
 src/gallium/drivers/nouveau/nv50/nv50_state.c | 10 +++---
 src/gallium/drivers/nouveau/nvc0/nvc0_state.c | 10 +++---
 2 files changed, 14 insertions(+), 6 deletions(-)

diff --git a/src/gallium/drivers/nouveau/nv50/nv50_state.c 
b/src/gallium/drivers/nouveau/nv50/nv50_state.c
index a4163aa1713..9390b61b748 100644
--- a/src/gallium/drivers/nouveau/nv50/nv50_state.c
+++ b/src/gallium/drivers/nouveau/nv50/nv50_state.c
@@ -1267,9 +1267,13 @@ nv50_set_global_bindings(struct pipe_context *pipe,
 
if (nv50->global_residents.size <= (end * sizeof(struct pipe_resource *))) {
   const unsigned old_size = nv50->global_residents.size;
-  util_dynarray_resize(&nv50->global_residents, struct pipe_resource *, 
end);
-  memset((uint8_t *)nv50->global_residents.data + old_size, 0,
- nv50->global_residents.size - old_size);
+  if (util_dynarray_resize(&nv50->global_residents, struct pipe_resource 
*, end)) {
+ memset((uint8_t *)nv50->global_residents.data + old_size, 0,
+nv50->global_residents.size - old_size);
+  } else {
+ NOUVEAU_ERR("Could not resize global residents array\n");
+ return;
+  }
}
 
if (resources) {
diff --git a/src/gallium/drivers/nouveau/nvc0/nvc0_state.c 
b/src/gallium/drivers/nouveau/nvc0/nvc0_state.c
index 60dcbe3ec39..956bd78defa 100644
--- a/src/gallium/drivers/nouveau/nvc0/nvc0_state.c
+++ b/src/gallium/drivers/nouveau/nvc0/nvc0_state.c
@@ -1374,9 +1374,13 @@ nvc0_set_global_bindings(struct pipe_context *pipe,
 
if (nvc0->global_residents.size <= (end * sizeof(struct pipe_resource *))) {
   const unsigned old_size = nvc0->global_residents.size;
-  util_dynarray_resize(&nvc0->global_residents, struct pipe_resource *, 
end);
-  memset((uint8_t *)nvc0->global_residents.data + old_size, 0,
- nvc0->global_residents.size - old_size);
+  if (util_dynarray_resize(&nvc0->global_residents, struct pipe_resource 
*, end)) {
+ memset((uint8_t *)nvc0->global_residents.data + old_size, 0,
+nvc0->global_residents.size - old_size);
+  } else {
+ NOUVEAU_ERR("Could not resize global residents array\n");
+ return;
+  }
}
 
if (resources) {
-- 
2.21.0

___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev

Re: [Mesa-dev] [clover/spirv] radeonsi/NIR (with Nine) - final linking failed on libOpenCL.so.1.0.0

2019-09-26 Thread Karol Herbst
I think you only need to recompile the translator with -fPIC enabled.
At least that's what the error is saying.

On Thu, Sep 26, 2019 at 6:53 AM Aaron Watry  wrote:
>
> Pretty sure I'm running into the same thing trying to build clover
> with llvm-spirv enabled.  If it's a known solution, I wouldn't mind
> having some time saved :)
>
> --Aaron
>
> On Wed, Sep 25, 2019 at 10:30 AM Dieter Nützel  wrote:
> >
> > Hello Karol and Pierre,
> >
> > tried it on radeonsi/NIR with Nine and OpenCL enabled
> > (-Dgallium-nine=true -Dopencl-spirv=true -Dgallium-opencl=standalone).
> >
> > I think I have all SPIRV-LLVM-Translator stuff in place
> > (/opt/llvm/projects/SPIRV-LLVM-Translator/). Resulting lib is installed
> > at /usr/local/lib/libLLVMSPIRVLib.a.
> >
> > Do I need a shared version (*.so ) of it? 'ld' output point at this
> > (relocation R_X86_64_32 against symbol `_ZTVN4SPIR13PrimitiveTypeE' can
> > not be used when making a shared object; recompile with -fPIC).
> >
> > Thanks,
> > Dieter
> >
> > [1384/1384] Linking target
> > src/gallium/targets/opencl/libOpenCL.so.1.0.0.
> > FAILED: src/gallium/targets/opencl/libOpenCL.so.1.0.0
> > ccache c++  -o src/gallium/targets/opencl/libOpenCL.so.1.0.0
> > -Wl,--no-undefined -Wl,--as-needed -Wl,-O1 -shared -fPIC
> > -Wl,--start-group -Wl,-soname,libOpenCL.so.1 -Wl,--whole-archive
> > src/gallium/state_trackers/clover/libclover.a -Wl,--no-whole-archive
> > src/gallium/auxiliary/pipe-loader/libpipe_loader_dynamic.a
> > src/loader/libloader.a src/util/libxmlconfig.a src/util/libmesa_util.a
> > src/gallium/auxiliary/libgallium.a src/compiler/nir/libnir.a
> > src/compiler/libcompiler.a src/gallium/state_trackers/clover/libclllvm.a
> > src/gallium/state_trackers/clover/libclspirv.a
> > src/gallium/state_trackers/clover/libclnir.a -Wl,--gc-sections
> > -Wl,--version-script /opt/mesa/src/gallium/targets/opencl/opencl.sym
> > /usr/lib64/gcc/x86_64-suse-linux/9/../../../../lib64/libz.so -pthread
> > -lm -ldl /usr/lib64/libunwind.so /usr/lib64/libelf.so
> > /usr/local/lib/libclangCodeGen.a /usr/local/lib/libclangFrontendTool.a
> > /usr/local/lib/libclangFrontend.a /usr/local/lib/libclangDriver.a
> > /usr/local/lib/libclangSerialization.a /usr/local/lib/libclangParse.a
> > /usr/local/lib/libclangSema.a /usr/local/lib/libclangAnalysis.a
> > /usr/local/lib/libclangAST.a /usr/local/lib/libclangASTMatchers.a
> > /usr/local/lib/libclangEdit.a /usr/local/lib/libclangLex.a
> > /usr/local/lib/libclangBasic.a /usr/lib64/libdrm.so
> > /usr/lib64/libexpat.so -L/usr/local/lib -lLLVM-10svn -lsensors
> > -L/usr/local/lib -lLLVM-10svn /usr/local/lib/libLLVMSPIRVLib.a
> > /usr/lib64/gcc/x86_64-suse-linux/9/../../../../lib64/libSPIRV-Tools.so
> > /usr/lib64/gcc/x86_64-suse-linux/9/../../../../lib64/libSPIRV-Tools-link.so
> > /usr/lib64/gcc/x86_64-suse-linux/9/../../../../lib64/libSPIRV-Tools-opt.so
> > -Wl,--end-group
> > '-Wl,-rpath,$ORIGIN/../../auxiliary/pipe-loader:$ORIGIN/../../../loader:$ORIGIN/../../../util:$ORIGIN/../../auxiliary:$ORIGIN/../../../compiler/nir:$ORIGIN/../../../compiler'
> > -Wl,-rpath-link,/opt/mesa/build/src/gallium/auxiliary/pipe-loader
> > -Wl,-rpath-link,/opt/mesa/build/src/loader
> > -Wl,-rpath-link,/opt/mesa/build/src/util
> > -Wl,-rpath-link,/opt/mesa/build/src/gallium/auxiliary
> > -Wl,-rpath-link,/opt/mesa/build/src/compiler/nir
> > -Wl,-rpath-link,/opt/mesa/build/src/compiler
> > /usr/lib64/gcc/x86_64-suse-linux/9/../../../../x86_64-suse-linux/bin/ld:
> > /usr/local/lib/libLLVMSPIRVLib.a(SPIRVWriter.cpp.o): relocation
> > R_X86_64_32 against symbol `__pthread_key_create@@GLIBC_2.2.5' can not
> > be used when making a shared object; recompile with -fPIC
> > /usr/lib64/gcc/x86_64-suse-linux/9/../../../../x86_64-suse-linux/bin/ld:
> > /usr/local/lib/libLLVMSPIRVLib.a(PreprocessMetadata.cpp.o): relocation
> > R_X86_64_32 against `.rodata' can not be used when making a shared
> > object; recompile with -fPIC
> > /usr/lib64/gcc/x86_64-suse-linux/9/../../../../x86_64-suse-linux/bin/ld:
> > /usr/local/lib/libLLVMSPIRVLib.a(SPIRVDebug.cpp.o): relocation
> > R_X86_64_32 against `.bss' can not be used when making a shared object;
> > recompile with -fPIC
> > /usr/lib64/gcc/x86_64-suse-linux/9/../../../../x86_64-suse-linux/bin/ld:
> > /usr/local/lib/libLLVMSPIRVLib.a(SPIRVDecorate.cpp.o): relocation
> > R_X86_64_32 against symbol `_ZTVN5SPIRV20SPIRVDecorateGenericE' can not
> > be used when making a shared object; recompile with -fPIC
> > /usr/lib64/gcc/x86_64-suse-linux/9/../../../../x86_64-suse-linux/bin/ld:
> > /usr/local/lib/libLLVMSPIRVLib.a(SPIRVEntry.cpp.o): relocation
> > R_X86_64_32 against symbol `__pthread_key_create@@GLIBC_2.2.5' can not
> > be used when making a shared object; recompile with -fPIC
> > /usr/lib64/gcc/x86_64-suse-linux/9/../../../../x86_64-suse-linux/bin/ld:
> > /usr/local/lib/libLLVMSPIRVLib.a(SPIRVFunction.cpp.o): relocation
> > R_X86_64_32 against symbol `_ZTVN5SPIRV22SPIRVFunctionParameterE' can
> > not be used when making a shared object; re

Re: [Mesa-dev] [Nouveau] [PATCH] gm107/ir: fix loading z offset for layered 3d image bindings

2019-10-14 Thread Karol Herbst
I don't think this is a good idea overall.

The way simpler solution would be to disable tiling on the z axis for
3d images so that we don't hurt the most common case, 2d images. And
that's what I was seeing nvidia doing anyway.

So with that we would end up adding a bunch of instructions hurting
the 2d image case, just to support something no user will care about
anyway.

On Mon, Oct 14, 2019 at 7:22 AM Ilia Mirkin  wrote:
>
> Unfortuantely we don't know if a particular load is a real 2d image (as
> would be a cube face or 2d array element), or a layer of a 3d image.
> Since we pass in the TIC reference, the instruction's type has to match
> what's in the TIC (experimentally). In order to properly support
> bindless images, this also can't be done by looking at the current
> bindings and generating appropriate code.
>
> As a result all plain 2d loads are converted into a pair of 2d/3d loads,
> with appropriate predicates to ensure only one of those actually
> executes, and the values are all merged in.
>
> This goes somewhat against the current flow, so for GM107 we do the OOB
> handling directly in the surface processing logic. Perhaps the other
> gens should do something similar, but that is left to another change.
>
> This fixes dEQP tests like image_load_store.3d.*_single_layer and GL-CTS
> tests like shader_image_load_store.non-layered_binding without breaking
> anything else.
>
> Signed-off-by: Ilia Mirkin 
> ---
>
> OK, first of all -- to whoever thought that binding single layers of a 3d
> image and telling the shader they were regular 2d images was a good idea --
> I disagree.
>
> This change feels super super dirty, but I honestly don't see a materially
> cleaner way of handling it. Instead of being able to reuse the OOB
> handling, it's put in with the coord processing (!), and the surface
> conversion function is seriously hacked up.
>
> But splitting it up is harder, since a lot of information has to flow
> from stage to stage, like when to do what kind of access, and cloning
> the surface op is much easier in the coord processing stage.
>
>  .../nouveau/codegen/nv50_ir_emit_gm107.cpp|  34 ++-
>  .../nouveau/codegen/nv50_ir_lowering_nvc0.cpp | 206 +-
>  .../nouveau/codegen/nv50_ir_lowering_nvc0.h   |   4 +-
>  src/gallium/drivers/nouveau/nvc0/nvc0_tex.c   |  10 +-
>  4 files changed, 201 insertions(+), 53 deletions(-)
>
> diff --git a/src/gallium/drivers/nouveau/codegen/nv50_ir_emit_gm107.cpp 
> b/src/gallium/drivers/nouveau/codegen/nv50_ir_emit_gm107.cpp
> index 6eefe8f0025..e244bd0d610 100644
> --- a/src/gallium/drivers/nouveau/codegen/nv50_ir_emit_gm107.cpp
> +++ b/src/gallium/drivers/nouveau/codegen/nv50_ir_emit_gm107.cpp
> @@ -122,6 +122,8 @@ private:
> void emitSAM();
> void emitRAM();
>
> +   void emitPSETP();
> +
> void emitMOV();
> void emitS2R();
> void emitCS2R();
> @@ -690,6 +692,31 @@ CodeEmitterGM107::emitRAM()
>   * predicate/cc
>   
> **/
>
> +void
> +CodeEmitterGM107::emitPSETP()
> +{
> +
> +   emitInsn(0x5090);
> +
> +   switch (insn->op) {
> +   case OP_AND: emitField(0x18, 3, 0); break;
> +   case OP_OR:  emitField(0x18, 3, 1); break;
> +   case OP_XOR: emitField(0x18, 3, 2); break;
> +   default:
> +  assert(!"unexpected operation");
> +  break;
> +   }
> +
> +   // emitINV (0x2a);
> +   emitPRED(0x27); // TODO: support 3-arg
> +   emitINV (0x20, insn->src(1));
> +   emitPRED(0x1d, insn->src(1));
> +   emitINV (0x0f, insn->src(0));
> +   emitPRED(0x0c, insn->src(0));
> +   emitPRED(0x03, insn->def(0));
> +   emitPRED(0x00);
> +}
> +
>  
> /***
>   * movement / conversion
>   
> **/
> @@ -3557,7 +3584,12 @@ CodeEmitterGM107::emitInstruction(Instruction *i)
> case OP_AND:
> case OP_OR:
> case OP_XOR:
> -  emitLOP();
> +  switch (insn->def(0).getFile()) {
> +  case FILE_GPR: emitLOP(); break;
> +  case FILE_PREDICATE: emitPSETP(); break;
> +  default:
> + assert(!"invalid bool op");
> +  }
>break;
> case OP_NOT:
>emitNOT();
> diff --git a/src/gallium/drivers/nouveau/codegen/nv50_ir_lowering_nvc0.cpp 
> b/src/gallium/drivers/nouveau/codegen/nv50_ir_lowering_nvc0.cpp
> index 1f702a987d8..0f68a9a229f 100644
> --- a/src/gallium/drivers/nouveau/codegen/nv50_ir_lowering_nvc0.cpp
> +++ b/src/gallium/drivers/nouveau/codegen/nv50_ir_lowering_nvc0.cpp
> @@ -1802,6 +1802,9 @@ NVC0LoweringPass::loadSuInfo32(Value *ptr, int slot, 
> uint32_t off, bool bindless
>  {
> uint32_t base = slot * NVC0_SU_INFO__STRIDE;
>
> +   // We don't upload surface info for bindless for GM107+
> +   assert(!bindless || targ->getChipset() < NVISA_GM107_CHIPSET);
> +
> if (ptr) {
>ptr = bld.mkOp2v(OP_ADD, TYPE_U32, bld.getSSA(), ptr, bld.mkImm(slot));
>   

Re: [Mesa-dev] [Nouveau] [PATCH] nv50/ir: mark STORE destination inputs as used

2019-10-14 Thread Karol Herbst
Reviewed-by: Karol Herbst 

On Mon, Oct 14, 2019 at 8:47 AM Ilia Mirkin  wrote:
>
> Observed an issue when looking at the code generatedy by the
> image-vertex-attrib-input-output piglit test. Even though the test
> itself worked fine (due to TIC 0 being used for the image), this needs
> to be fixed.
>
> Signed-off-by: Ilia Mirkin 
> ---
>  src/gallium/drivers/nouveau/codegen/nv50_ir_from_tgsi.cpp | 6 ++
>  1 file changed, 6 insertions(+)
>
> diff --git a/src/gallium/drivers/nouveau/codegen/nv50_ir_from_tgsi.cpp 
> b/src/gallium/drivers/nouveau/codegen/nv50_ir_from_tgsi.cpp
> index d62d36008e6..8c429026452 100644
> --- a/src/gallium/drivers/nouveau/codegen/nv50_ir_from_tgsi.cpp
> +++ b/src/gallium/drivers/nouveau/codegen/nv50_ir_from_tgsi.cpp
> @@ -1591,6 +1591,12 @@ bool Source::scanInstruction(const struct 
> tgsi_full_instruction *inst)
>if (insn.getOpcode() == TGSI_OPCODE_STORE &&
>dst.getFile() != TGSI_FILE_MEMORY) {
>   info->io.globalAccess |= 0x2;
> +
> + if (dst.getFile() == TGSI_FILE_INPUT) {
> +// TODO: Handle indirect somehow?
> +const int i = dst.getIndex(0);
> +info->in[i].mask |= 1;
> + }
>}
>
>if (dst.getFile() == TGSI_FILE_OUTPUT) {
> --
> 2.21.0
>
> ___
> Nouveau mailing list
> nouv...@lists.freedesktop.org
> https://lists.freedesktop.org/mailman/listinfo/nouveau

___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev

[Mesa-dev] [PATCH] nv50/ir: remove DUMMY edge type

2019-10-14 Thread Karol Herbst
it was never used

Signed-off-by: Karol Herbst 
---
 src/gallium/drivers/nouveau/codegen/nv50_ir_bb.cpp| 3 ---
 src/gallium/drivers/nouveau/codegen/nv50_ir_graph.cpp | 8 +---
 src/gallium/drivers/nouveau/codegen/nv50_ir_graph.h   | 1 -
 src/gallium/drivers/nouveau/codegen/nv50_ir_ra.cpp| 2 --
 4 files changed, 1 insertion(+), 13 deletions(-)

diff --git a/src/gallium/drivers/nouveau/codegen/nv50_ir_bb.cpp 
b/src/gallium/drivers/nouveau/codegen/nv50_ir_bb.cpp
index 9f0e0733326..76fee8c791e 100644
--- a/src/gallium/drivers/nouveau/codegen/nv50_ir_bb.cpp
+++ b/src/gallium/drivers/nouveau/codegen/nv50_ir_bb.cpp
@@ -536,9 +536,6 @@ Function::printCFGraph(const char *filePath)
  case Graph::Edge::BACK:
 fprintf(out, "\t%i -> %i;\n", idA, idB);
 break;
- case Graph::Edge::DUMMY:
-fprintf(out, "\t%i -> %i [style=dotted];\n", idA, idB);
-break;
  default:
 assert(0);
 break;
diff --git a/src/gallium/drivers/nouveau/codegen/nv50_ir_graph.cpp 
b/src/gallium/drivers/nouveau/codegen/nv50_ir_graph.cpp
index b1076cf4129..e9a9981746a 100644
--- a/src/gallium/drivers/nouveau/codegen/nv50_ir_graph.cpp
+++ b/src/gallium/drivers/nouveau/codegen/nv50_ir_graph.cpp
@@ -77,7 +77,6 @@ const char *Graph::Edge::typeStr() const
case FORWARD: return "forward";
case BACK:return "back";
case CROSS:   return "cross";
-   case DUMMY:   return "dummy";
case UNKNOWN:
default:
   return "unk";
@@ -184,7 +183,7 @@ Graph::Node::reachableBy(const Node *node, const Node 
*term) const
  continue;
 
   for (EdgeIterator ei = pos->outgoing(); !ei.end(); ei.next()) {
- if (ei.getType() == Edge::BACK || ei.getType() == Edge::DUMMY)
+ if (ei.getType() == Edge::BACK)
 continue;
  if (ei.getNode()->visit(seq))
 stack.push(ei.getNode());
@@ -301,7 +300,6 @@ private:
 switch (ei.getType()) {
 case Graph::Edge::TREE:
 case Graph::Edge::FORWARD:
-case Graph::Edge::DUMMY:
if (++(ei.getNode()->tag) == ei.getNode()->incidentCountFwd())
   bb.push(ei.getNode());
break;
@@ -371,8 +369,6 @@ void Graph::classifyDFS(Node *curr, int& seq)
 
for (edge = curr->out; edge; edge = edge->next[0]) {
   node = edge->target;
-  if (edge->type == Edge::DUMMY)
- continue;
 
   if (node->getSequence() == 0) {
  edge->type = Edge::TREE;
@@ -387,8 +383,6 @@ void Graph::classifyDFS(Node *curr, int& seq)
 
for (edge = curr->in; edge; edge = edge->next[1]) {
   node = edge->origin;
-  if (edge->type == Edge::DUMMY)
- continue;
 
   if (node->getSequence() == 0) {
  edge->type = Edge::TREE;
diff --git a/src/gallium/drivers/nouveau/codegen/nv50_ir_graph.h 
b/src/gallium/drivers/nouveau/codegen/nv50_ir_graph.h
index 115f20e5e99..fc85e78a50c 100644
--- a/src/gallium/drivers/nouveau/codegen/nv50_ir_graph.h
+++ b/src/gallium/drivers/nouveau/codegen/nv50_ir_graph.h
@@ -47,7 +47,6 @@ public:
  FORWARD,
  BACK,
  CROSS, // e.g. loop break
- DUMMY
   };
 
   Edge(Node *dst, Node *src, Type kind);
diff --git a/src/gallium/drivers/nouveau/codegen/nv50_ir_ra.cpp 
b/src/gallium/drivers/nouveau/codegen/nv50_ir_ra.cpp
index f25bce00884..6df2664da22 100644
--- a/src/gallium/drivers/nouveau/codegen/nv50_ir_ra.cpp
+++ b/src/gallium/drivers/nouveau/codegen/nv50_ir_ra.cpp
@@ -624,8 +624,6 @@ RegAlloc::BuildIntervalsPass::collectLiveValues(BasicBlock 
*bb)
   // trickery to save a loop of OR'ing liveSets
   // aliasing works fine with BitSet::setOr
   for (Graph::EdgeIterator ei = bb->cfg.outgoing(); !ei.end(); ei.next()) {
- if (ei.getType() == Graph::Edge::DUMMY)
-continue;
  if (bbA) {
 bb->liveSet.setOr(&bbA->liveSet, &bbB->liveSet);
 bbA = bb;
-- 
2.21.0

___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev

Re: [Mesa-dev] [PATCH] nv50/ir: remove DUMMY edge type

2019-10-14 Thread Karol Herbst
isn't that what "UNKNOWN" is for?

On Mon, Oct 14, 2019 at 11:16 PM Ilia Mirkin  wrote:
>
> The idea was that this type would be used when you're not sure, and
> then run the classifier afterwards. Otherwise the classifier doesn't
> know which edges to classify...
>
> On Mon, Oct 14, 2019 at 5:10 PM Karol Herbst  wrote:
> >
> > it was never used
> >
> > Signed-off-by: Karol Herbst 
> > ---
> >  src/gallium/drivers/nouveau/codegen/nv50_ir_bb.cpp| 3 ---
> >  src/gallium/drivers/nouveau/codegen/nv50_ir_graph.cpp | 8 +---
> >  src/gallium/drivers/nouveau/codegen/nv50_ir_graph.h   | 1 -
> >  src/gallium/drivers/nouveau/codegen/nv50_ir_ra.cpp| 2 --
> >  4 files changed, 1 insertion(+), 13 deletions(-)
> >
> > diff --git a/src/gallium/drivers/nouveau/codegen/nv50_ir_bb.cpp 
> > b/src/gallium/drivers/nouveau/codegen/nv50_ir_bb.cpp
> > index 9f0e0733326..76fee8c791e 100644
> > --- a/src/gallium/drivers/nouveau/codegen/nv50_ir_bb.cpp
> > +++ b/src/gallium/drivers/nouveau/codegen/nv50_ir_bb.cpp
> > @@ -536,9 +536,6 @@ Function::printCFGraph(const char *filePath)
> >   case Graph::Edge::BACK:
> >  fprintf(out, "\t%i -> %i;\n", idA, idB);
> >  break;
> > - case Graph::Edge::DUMMY:
> > -fprintf(out, "\t%i -> %i [style=dotted];\n", idA, idB);
> > -break;
> >   default:
> >  assert(0);
> >  break;
> > diff --git a/src/gallium/drivers/nouveau/codegen/nv50_ir_graph.cpp 
> > b/src/gallium/drivers/nouveau/codegen/nv50_ir_graph.cpp
> > index b1076cf4129..e9a9981746a 100644
> > --- a/src/gallium/drivers/nouveau/codegen/nv50_ir_graph.cpp
> > +++ b/src/gallium/drivers/nouveau/codegen/nv50_ir_graph.cpp
> > @@ -77,7 +77,6 @@ const char *Graph::Edge::typeStr() const
> > case FORWARD: return "forward";
> > case BACK:return "back";
> > case CROSS:   return "cross";
> > -   case DUMMY:   return "dummy";
> > case UNKNOWN:
> > default:
> >return "unk";
> > @@ -184,7 +183,7 @@ Graph::Node::reachableBy(const Node *node, const Node 
> > *term) const
> >   continue;
> >
> >for (EdgeIterator ei = pos->outgoing(); !ei.end(); ei.next()) {
> > - if (ei.getType() == Edge::BACK || ei.getType() == Edge::DUMMY)
> > + if (ei.getType() == Edge::BACK)
> >  continue;
> >   if (ei.getNode()->visit(seq))
> >  stack.push(ei.getNode());
> > @@ -301,7 +300,6 @@ private:
> >  switch (ei.getType()) {
> >  case Graph::Edge::TREE:
> >  case Graph::Edge::FORWARD:
> > -case Graph::Edge::DUMMY:
> > if (++(ei.getNode()->tag) == 
> > ei.getNode()->incidentCountFwd())
> >bb.push(ei.getNode());
> > break;
> > @@ -371,8 +369,6 @@ void Graph::classifyDFS(Node *curr, int& seq)
> >
> > for (edge = curr->out; edge; edge = edge->next[0]) {
> >node = edge->target;
> > -  if (edge->type == Edge::DUMMY)
> > - continue;
> >
> >if (node->getSequence() == 0) {
> >   edge->type = Edge::TREE;
> > @@ -387,8 +383,6 @@ void Graph::classifyDFS(Node *curr, int& seq)
> >
> > for (edge = curr->in; edge; edge = edge->next[1]) {
> >node = edge->origin;
> > -  if (edge->type == Edge::DUMMY)
> > - continue;
> >
> >if (node->getSequence() == 0) {
> >   edge->type = Edge::TREE;
> > diff --git a/src/gallium/drivers/nouveau/codegen/nv50_ir_graph.h 
> > b/src/gallium/drivers/nouveau/codegen/nv50_ir_graph.h
> > index 115f20e5e99..fc85e78a50c 100644
> > --- a/src/gallium/drivers/nouveau/codegen/nv50_ir_graph.h
> > +++ b/src/gallium/drivers/nouveau/codegen/nv50_ir_graph.h
> > @@ -47,7 +47,6 @@ public:
> >   FORWARD,
> >   BACK,
> >   CROSS, // e.g. loop break
> > - DUMMY
> >};
> >
> >Edge(Node *dst, Node *src, Type kind);
> > diff --git a/src/gallium/drivers/nouveau/codegen/nv50_ir_ra.cpp 
> > b/src/gallium/drivers/nouveau/codegen/nv50_ir_ra.cpp
> > index f25bce00884..6df2664da22 100644
> > --- a/src/gallium/drivers/nouveau/codegen/nv50_ir_ra.cpp
> > +++ b/src/gallium/drivers/nouveau/codegen/nv50_ir_ra.cpp
> > @@ -624,8 +624,6 

[Mesa-dev] [PATCH] nv50/ir: fix crash in isUniform for undefined values

2019-11-02 Thread Karol Herbst
Signed-off-by: Karol Herbst 
---
 src/gallium/drivers/nouveau/codegen/nv50_ir.cpp | 2 ++
 1 file changed, 2 insertions(+)

diff --git a/src/gallium/drivers/nouveau/codegen/nv50_ir.cpp 
b/src/gallium/drivers/nouveau/codegen/nv50_ir.cpp
index a181a13a3b1..ae07d967221 100644
--- a/src/gallium/drivers/nouveau/codegen/nv50_ir.cpp
+++ b/src/gallium/drivers/nouveau/codegen/nv50_ir.cpp
@@ -274,6 +274,8 @@ LValue::isUniform() const
if (defs.size() > 1)
   return false;
Instruction *insn = getInsn();
+   if (!insn)
+  return false;
// let's not try too hard here for now ...
return !insn->srcExists(1) && insn->getSrc(0)->isUniform();
 }
-- 
2.23.0

___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev

[Mesa-dev] [PATCH] nv50/ir/ra: fix memory corruption when spilling

2019-11-12 Thread Karol Herbst
a80075470:[fd]fd fd fd fd fd fd fd fd fd fd fd fd fd fd fd
  0x0c2a80075480: fd fd fd fd fd fd fd fd fd fd fd fd fd fd fd fd
  0x0c2a80075490: fd fd fd fd fd fd fd fd fd fd fd fd fd fd fd fd
  0x0c2a800754a0: fd fd fd fd fd fd fd fd fd fd fd fd fd fd fd fa
  0x0c2a800754b0: fa fa fa fa fa fa fa fa fa fa fa fa fa fa fa fa
  0x0c2a800754c0: fd fd fd fd fd fd fd fd fd fd fd fd fd fd fd fd
Shadow byte legend (one shadow byte represents 8 application bytes):
  Addressable:   00
  Partially addressable: 01 02 03 04 05 06 07
  Heap left redzone:   fa
  Freed heap region:   fd
  Stack left redzone:  f1
  Stack mid redzone:   f2
  Stack right redzone: f3
  Stack after return:  f5
  Stack use after scope:   f8
  Global redzone:  f9
  Global init order:   f6
  Poisoned by user:f7
  Container overflow:  fc
  Array cookie:ac
  Intra object redzone:bb
  ASan internal:   fe
  Left alloca redzone: ca
  Right alloca redzone:cb
  Shadow gap:  cc
==612087==ABORTING

Signed-off-by: Karol Herbst 
---
 .../drivers/nouveau/codegen/nv50_ir_ra.cpp| 34 ++-
 1 file changed, 33 insertions(+), 1 deletion(-)

diff --git a/src/gallium/drivers/nouveau/codegen/nv50_ir_ra.cpp 
b/src/gallium/drivers/nouveau/codegen/nv50_ir_ra.cpp
index 6df2664da22..d72932748f1 100644
--- a/src/gallium/drivers/nouveau/codegen/nv50_ir_ra.cpp
+++ b/src/gallium/drivers/nouveau/codegen/nv50_ir_ra.cpp
@@ -1745,6 +1745,34 @@ value_cmp(ValueRef *a, ValueRef *b) {
return ai->serial < bi->serial;
 }
 
+class RepairSSAAfterSpillPass : public Pass
+{
+public:
+   RepairSSAAfterSpillPass(Instruction *insn) : insn(insn) {}
+private:
+   void removeStaleRefs(Instruction *it, ValueDef *def) {
+  for (int d = 0; it->defExists(d); ++d) {
+ std::list &defs = it->getDef(d)->defs;
+ std::list::iterator it = std::find(defs.begin(), 
defs.end(), def);
+ if (it != defs.end())
+defs.erase(it);
+  }
+   }
+
+   virtual bool visit(Instruction *it)
+   {
+  if (it == insn)
+ return true;
+
+  for (int d = 0; insn->defExists(d); ++d)
+ removeStaleRefs(it, &insn->def(d));
+
+  return true;
+   }
+
+   Instruction *insn;
+};
+
 // For each value that is to be spilled, go through all its definitions.
 // A value can have multiple definitions if it has been coalesced before.
 // For each definition, first go through all its uses and insert an unspill
@@ -1815,8 +1843,12 @@ SpillCodeInserter::run(const std::list& lst)
   }
 
   for (unordered_set::const_iterator it = to_del.begin();
-   it != to_del.end(); ++it)
+   it != to_del.end(); ++it) {
+ Instruction *insn = *it;
+ RepairSSAAfterSpillPass repair(insn);
+ repair.run(insn->bb->getFunction());
  delete_Instruction(func->getProgram(), *it);
+  }
}
 
// TODO: We're not trying to reuse old slots in a potential next iteration.
-- 
2.23.0

___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev

Re: [Mesa-dev] LLVM + SPIRV-LLVM-Translator - compilation errors

2019-11-14 Thread Karol Herbst
might be that those definitions moved elsewhere or the headers were
never directly included.

In llvm 9 there are in llvm/InitializePasses.h, but maybe that's
changed? And if not, maybe that file needs to be included in
SPIRVLowerSPIRBlocks.cpp?

On Fri, Nov 15, 2019 at 2:34 AM Dieter Nützel  wrote:
>
> Hello Karol and Ilya,
>
> do you have any hints/pointers for me to solve these LLVM +
> SPIRV-LLVM-Translator - compilation errors.
>
> llvm-project git taken 'today'.
>
> [-]
> commit 95c770fbfb14b07e1af7c2d427c16745617d9f1f (HEAD -> master,
> origin/master, origin/HEAD)
> Author: Davide Italiano 
> Date:   Thu Nov 14 15:29:28 2019 -0800
>
>  [Utility] Remove a dead header [PPC64LE_ehframe_Registers.h]
> [-]
>
> opt/llvm-project/llvm/projects/SPIRV-LLVM-Translator/lib/SPIRV/SPIRVLowerSPIRBlocks.cpp:617:1:
> note: in expansion of macro ‘INITIALIZE_PASS_DEPENDENCY’
>617 | INITIALIZE_PASS_DEPENDENCY(CallGraphWrapperPass)
>| ^~
> /opt/llvm-project/llvm/include/llvm/PassSupport.h:50:45: error:
> ‘initializeAssumptionCacheTrackerPass’ was not declared in this scope
> 50 | #define INITIALIZE_PASS_DEPENDENCY(
> initialize##depName##Pass(Registry);
>| ^~
> /opt/llvm-project/llvm/projects/SPIRV-LLVM-Translator/lib/SPIRV/SPIRVLowerSPIRBlocks.cpp:618:1:
> note: in expansion of macro ‘INITIALIZE_PASS_DEPENDENCY’
>618 | INITIALIZE_PASS_DEPENDENCY(AssumptionCacheTracker)
>| ^~
> /opt/llvm-project/llvm/include/llvm/PassSupport.h:50:45: error:
> ‘initializeAAResultsWrapperPassPass’ was not declared in this scope
> 50 | #define INITIALIZE_PASS_DEPENDENCY(depName)
> initialize##depName##Pass(Registry);
>| ^~
> /opt/llvm-project/llvm/projects/SPIRV-LLVM-Translator/lib/SPIRV/SPIRVLowerSPIRBlocks.cpp:619:1:
> note: in expansion of macro ‘INITIALIZE_PASS_DEPENDENCY’
>619 | INITIALIZE_PASS_DEPENDENCY(AAResultsWrapperPass)
>| ^~
>
>
> Thank you very much in advance.
> Dieter
>

___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev

[Mesa-dev] [PATCH] nv50/ir: implement global atomics and handle it for nir

2019-12-05 Thread Karol Herbst
TGSI doesn't have any concept of global memory right now.

Signed-off-by: Karol Herbst 
---
 .../nouveau/codegen/nv50_ir_from_nir.cpp  | 43 +--
 .../nouveau/codegen/nv50_ir_lowering_nvc0.cpp |  2 +
 2 files changed, 41 insertions(+), 4 deletions(-)

diff --git a/src/gallium/drivers/nouveau/codegen/nv50_ir_from_nir.cpp 
b/src/gallium/drivers/nouveau/codegen/nv50_ir_from_nir.cpp
index 08365988069..31f764d63d4 100644
--- a/src/gallium/drivers/nouveau/codegen/nv50_ir_from_nir.cpp
+++ b/src/gallium/drivers/nouveau/codegen/nv50_ir_from_nir.cpp
@@ -582,40 +582,47 @@ Converter::getSubOp(nir_intrinsic_op op)
 {
switch (op) {
case nir_intrinsic_bindless_image_atomic_add:
+   case nir_intrinsic_global_atomic_add:
case nir_intrinsic_image_atomic_add:
case nir_intrinsic_image_deref_atomic_add:
case nir_intrinsic_shared_atomic_add:
case nir_intrinsic_ssbo_atomic_add:
   return  NV50_IR_SUBOP_ATOM_ADD;
case nir_intrinsic_bindless_image_atomic_and:
+   case nir_intrinsic_global_atomic_and:
case nir_intrinsic_image_atomic_and:
case nir_intrinsic_image_deref_atomic_and:
case nir_intrinsic_shared_atomic_and:
case nir_intrinsic_ssbo_atomic_and:
   return  NV50_IR_SUBOP_ATOM_AND;
case nir_intrinsic_bindless_image_atomic_comp_swap:
+   case nir_intrinsic_global_atomic_comp_swap:
case nir_intrinsic_image_atomic_comp_swap:
case nir_intrinsic_image_deref_atomic_comp_swap:
case nir_intrinsic_shared_atomic_comp_swap:
case nir_intrinsic_ssbo_atomic_comp_swap:
   return  NV50_IR_SUBOP_ATOM_CAS;
case nir_intrinsic_bindless_image_atomic_exchange:
+   case nir_intrinsic_global_atomic_exchange:
case nir_intrinsic_image_atomic_exchange:
case nir_intrinsic_image_deref_atomic_exchange:
case nir_intrinsic_shared_atomic_exchange:
case nir_intrinsic_ssbo_atomic_exchange:
   return  NV50_IR_SUBOP_ATOM_EXCH;
case nir_intrinsic_bindless_image_atomic_or:
+   case nir_intrinsic_global_atomic_or:
case nir_intrinsic_image_atomic_or:
case nir_intrinsic_image_deref_atomic_or:
case nir_intrinsic_shared_atomic_or:
case nir_intrinsic_ssbo_atomic_or:
   return  NV50_IR_SUBOP_ATOM_OR;
case nir_intrinsic_bindless_image_atomic_imax:
-   case nir_intrinsic_image_atomic_imax:
-   case nir_intrinsic_image_deref_atomic_imax:
case nir_intrinsic_bindless_image_atomic_umax:
+   case nir_intrinsic_global_atomic_imax:
+   case nir_intrinsic_global_atomic_umax:
+   case nir_intrinsic_image_atomic_imax:
case nir_intrinsic_image_atomic_umax:
+   case nir_intrinsic_image_deref_atomic_imax:
case nir_intrinsic_image_deref_atomic_umax:
case nir_intrinsic_shared_atomic_imax:
case nir_intrinsic_shared_atomic_umax:
@@ -623,10 +630,12 @@ Converter::getSubOp(nir_intrinsic_op op)
case nir_intrinsic_ssbo_atomic_umax:
   return  NV50_IR_SUBOP_ATOM_MAX;
case nir_intrinsic_bindless_image_atomic_imin:
-   case nir_intrinsic_image_atomic_imin:
-   case nir_intrinsic_image_deref_atomic_imin:
case nir_intrinsic_bindless_image_atomic_umin:
+   case nir_intrinsic_global_atomic_imin:
+   case nir_intrinsic_global_atomic_umin:
+   case nir_intrinsic_image_atomic_imin:
case nir_intrinsic_image_atomic_umin:
+   case nir_intrinsic_image_deref_atomic_imin:
case nir_intrinsic_image_deref_atomic_umin:
case nir_intrinsic_shared_atomic_imin:
case nir_intrinsic_shared_atomic_umin:
@@ -634,6 +643,7 @@ Converter::getSubOp(nir_intrinsic_op op)
case nir_intrinsic_ssbo_atomic_umin:
   return  NV50_IR_SUBOP_ATOM_MIN;
case nir_intrinsic_bindless_image_atomic_xor:
+   case nir_intrinsic_global_atomic_xor:
case nir_intrinsic_image_atomic_xor:
case nir_intrinsic_image_deref_atomic_xor:
case nir_intrinsic_shared_atomic_xor:
@@ -2379,6 +2389,30 @@ Converter::visit(nir_intrinsic_instr *insn)
   info->io.globalAccess |= 0x2;
   break;
}
+   case nir_intrinsic_global_atomic_add:
+   case nir_intrinsic_global_atomic_and:
+   case nir_intrinsic_global_atomic_comp_swap:
+   case nir_intrinsic_global_atomic_exchange:
+   case nir_intrinsic_global_atomic_or:
+   case nir_intrinsic_global_atomic_imax:
+   case nir_intrinsic_global_atomic_imin:
+   case nir_intrinsic_global_atomic_umax:
+   case nir_intrinsic_global_atomic_umin:
+   case nir_intrinsic_global_atomic_xor: {
+  const DataType dType = getDType(insn);
+  LValues &newDefs = convert(&insn->dest);
+  Value *address;
+  uint32_t offset = getIndirect(&insn->src[0], 0, address);
+
+  Symbol *sym = mkSymbol(FILE_MEMORY_GLOBAL, 0, dType, offset);
+  Instruction *atom =
+ mkOp2(OP_ATOM, dType, newDefs[0], sym, getSrc(&insn->src[1], 0));
+  atom->setIndirect(0, 0, address);
+  atom->subOp = getSubOp(op);
+
+  info->io.globalAccess |= 0x2;
+  break;
+   }
case nir_intrinsic_bindless_image_atomic_add:
case nir_int

[Mesa-dev] [PATCH] nouveau: limit reported compute max memory and allocation size

2019-12-05 Thread Karol Herbst
Otherwise applications (like the OpenCL CTS) will try to allocate more memory
than what the GPU is actually able to provide.

Signed-off-by: Karol Herbst 
---
 src/gallium/drivers/nouveau/nv50/nv50_screen.c | 7 +--
 src/gallium/drivers/nouveau/nvc0/nvc0_screen.c | 7 +--
 2 files changed, 10 insertions(+), 4 deletions(-)

diff --git a/src/gallium/drivers/nouveau/nv50/nv50_screen.c 
b/src/gallium/drivers/nouveau/nv50/nv50_screen.c
index ad35bd8cd42..5942458b0b2 100644
--- a/src/gallium/drivers/nouveau/nv50/nv50_screen.c
+++ b/src/gallium/drivers/nouveau/nv50/nv50_screen.c
@@ -472,6 +472,7 @@ nv50_screen_get_compute_param(struct pipe_screen *pscreen,
   enum pipe_compute_cap param, void *data)
 {
struct nv50_screen *screen = nv50_screen(pscreen);
+   struct nouveau_device *dev = screen->base.device;
 
 #define RET(x) do {  \
if (data) \
@@ -489,7 +490,8 @@ nv50_screen_get_compute_param(struct pipe_screen *pscreen,
case PIPE_COMPUTE_CAP_MAX_THREADS_PER_BLOCK:
   RET((uint64_t []) { 512 });
case PIPE_COMPUTE_CAP_MAX_GLOBAL_SIZE: /* g0-15[] */
-  RET((uint64_t []) { 1ULL << 32 });
+  // TODO what to do if vram_size is 0?
+  RET((uint64_t []) { MIN2(1ULL << 32, dev->vram_size) });
case PIPE_COMPUTE_CAP_MAX_LOCAL_SIZE: /* s[] */
   RET((uint64_t []) { 16 << 10 });
case PIPE_COMPUTE_CAP_MAX_PRIVATE_SIZE: /* l[] */
@@ -499,7 +501,8 @@ nv50_screen_get_compute_param(struct pipe_screen *pscreen,
case PIPE_COMPUTE_CAP_SUBGROUP_SIZE:
   RET((uint32_t []) { 32 });
case PIPE_COMPUTE_CAP_MAX_MEM_ALLOC_SIZE:
-  RET((uint64_t []) { 1ULL << 40 });
+  // TODO what to do if vram_size is 0?
+  RET((uint64_t []) { dev->vram_size });
case PIPE_COMPUTE_CAP_IMAGES_SUPPORTED:
   RET((uint32_t []) { 0 });
case PIPE_COMPUTE_CAP_MAX_COMPUTE_UNITS:
diff --git a/src/gallium/drivers/nouveau/nvc0/nvc0_screen.c 
b/src/gallium/drivers/nouveau/nvc0/nvc0_screen.c
index f5e1373a37e..57b1c70f7b3 100644
--- a/src/gallium/drivers/nouveau/nvc0/nvc0_screen.c
+++ b/src/gallium/drivers/nouveau/nvc0/nvc0_screen.c
@@ -533,6 +533,7 @@ nvc0_screen_get_compute_param(struct pipe_screen *pscreen,
 {
struct nvc0_screen *screen = nvc0_screen(pscreen);
const uint16_t obj_class = screen->compute->oclass;
+   struct nouveau_device *dev = screen->base.device;
 
 #define RET(x) do {  \
if (data) \
@@ -560,7 +561,8 @@ nvc0_screen_get_compute_param(struct pipe_screen *pscreen,
  RET((uint64_t []) { 512 });
   }
case PIPE_COMPUTE_CAP_MAX_GLOBAL_SIZE: /* g[] */
-  RET((uint64_t []) { 1ULL << 40 });
+  // TODO what to do when vram_size is 0?
+  RET((uint64_t []) { dev->vram_size });
case PIPE_COMPUTE_CAP_MAX_LOCAL_SIZE: /* s[] */
   switch (obj_class) {
   case GM200_COMPUTE_CLASS:
@@ -580,7 +582,8 @@ nvc0_screen_get_compute_param(struct pipe_screen *pscreen,
case PIPE_COMPUTE_CAP_SUBGROUP_SIZE:
   RET((uint32_t []) { 32 });
case PIPE_COMPUTE_CAP_MAX_MEM_ALLOC_SIZE:
-  RET((uint64_t []) { 1ULL << 40 });
+  // TODO what to do when vram_size is 0?
+  RET((uint64_t []) { dev->vram_size });
case PIPE_COMPUTE_CAP_IMAGES_SUPPORTED:
   RET((uint32_t []) { 0 });
case PIPE_COMPUTE_CAP_MAX_COMPUTE_UNITS:
-- 
2.23.0

___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev

Re: [Mesa-dev] [PATCH] nv50/ir: implement global atomics and handle it for nir

2019-12-05 Thread Karol Herbst
On Thu, Dec 5, 2019 at 11:57 AM Karol Herbst  wrote:
>
> TGSI doesn't have any concept of global memory right now.
>
> Signed-off-by: Karol Herbst 
> ---
>  .../nouveau/codegen/nv50_ir_from_nir.cpp  | 43 +--
>  .../nouveau/codegen/nv50_ir_lowering_nvc0.cpp |  2 +
>  2 files changed, 41 insertions(+), 4 deletions(-)
>
> diff --git a/src/gallium/drivers/nouveau/codegen/nv50_ir_from_nir.cpp 
> b/src/gallium/drivers/nouveau/codegen/nv50_ir_from_nir.cpp
> index 08365988069..31f764d63d4 100644
> --- a/src/gallium/drivers/nouveau/codegen/nv50_ir_from_nir.cpp
> +++ b/src/gallium/drivers/nouveau/codegen/nv50_ir_from_nir.cpp
> @@ -582,40 +582,47 @@ Converter::getSubOp(nir_intrinsic_op op)
>  {
> switch (op) {
> case nir_intrinsic_bindless_image_atomic_add:
> +   case nir_intrinsic_global_atomic_add:
> case nir_intrinsic_image_atomic_add:
> case nir_intrinsic_image_deref_atomic_add:
> case nir_intrinsic_shared_atomic_add:
> case nir_intrinsic_ssbo_atomic_add:
>return  NV50_IR_SUBOP_ATOM_ADD;
> case nir_intrinsic_bindless_image_atomic_and:
> +   case nir_intrinsic_global_atomic_and:
> case nir_intrinsic_image_atomic_and:
> case nir_intrinsic_image_deref_atomic_and:
> case nir_intrinsic_shared_atomic_and:
> case nir_intrinsic_ssbo_atomic_and:
>return  NV50_IR_SUBOP_ATOM_AND;
> case nir_intrinsic_bindless_image_atomic_comp_swap:
> +   case nir_intrinsic_global_atomic_comp_swap:
> case nir_intrinsic_image_atomic_comp_swap:
> case nir_intrinsic_image_deref_atomic_comp_swap:
> case nir_intrinsic_shared_atomic_comp_swap:
> case nir_intrinsic_ssbo_atomic_comp_swap:
>return  NV50_IR_SUBOP_ATOM_CAS;
> case nir_intrinsic_bindless_image_atomic_exchange:
> +   case nir_intrinsic_global_atomic_exchange:
> case nir_intrinsic_image_atomic_exchange:
> case nir_intrinsic_image_deref_atomic_exchange:
> case nir_intrinsic_shared_atomic_exchange:
> case nir_intrinsic_ssbo_atomic_exchange:
>return  NV50_IR_SUBOP_ATOM_EXCH;
> case nir_intrinsic_bindless_image_atomic_or:
> +   case nir_intrinsic_global_atomic_or:
> case nir_intrinsic_image_atomic_or:
> case nir_intrinsic_image_deref_atomic_or:
> case nir_intrinsic_shared_atomic_or:
> case nir_intrinsic_ssbo_atomic_or:
>return  NV50_IR_SUBOP_ATOM_OR;
> case nir_intrinsic_bindless_image_atomic_imax:
> -   case nir_intrinsic_image_atomic_imax:
> -   case nir_intrinsic_image_deref_atomic_imax:
> case nir_intrinsic_bindless_image_atomic_umax:
> +   case nir_intrinsic_global_atomic_imax:
> +   case nir_intrinsic_global_atomic_umax:
> +   case nir_intrinsic_image_atomic_imax:
> case nir_intrinsic_image_atomic_umax:
> +   case nir_intrinsic_image_deref_atomic_imax:
> case nir_intrinsic_image_deref_atomic_umax:
> case nir_intrinsic_shared_atomic_imax:
> case nir_intrinsic_shared_atomic_umax:
> @@ -623,10 +630,12 @@ Converter::getSubOp(nir_intrinsic_op op)
> case nir_intrinsic_ssbo_atomic_umax:
>return  NV50_IR_SUBOP_ATOM_MAX;
> case nir_intrinsic_bindless_image_atomic_imin:
> -   case nir_intrinsic_image_atomic_imin:
> -   case nir_intrinsic_image_deref_atomic_imin:
> case nir_intrinsic_bindless_image_atomic_umin:
> +   case nir_intrinsic_global_atomic_imin:
> +   case nir_intrinsic_global_atomic_umin:
> +   case nir_intrinsic_image_atomic_imin:
> case nir_intrinsic_image_atomic_umin:
> +   case nir_intrinsic_image_deref_atomic_imin:
> case nir_intrinsic_image_deref_atomic_umin:
> case nir_intrinsic_shared_atomic_imin:
> case nir_intrinsic_shared_atomic_umin:
> @@ -634,6 +643,7 @@ Converter::getSubOp(nir_intrinsic_op op)
> case nir_intrinsic_ssbo_atomic_umin:
>return  NV50_IR_SUBOP_ATOM_MIN;
> case nir_intrinsic_bindless_image_atomic_xor:
> +   case nir_intrinsic_global_atomic_xor:
> case nir_intrinsic_image_atomic_xor:
> case nir_intrinsic_image_deref_atomic_xor:
> case nir_intrinsic_shared_atomic_xor:
> @@ -2379,6 +2389,30 @@ Converter::visit(nir_intrinsic_instr *insn)
>info->io.globalAccess |= 0x2;
>break;
> }
> +   case nir_intrinsic_global_atomic_add:
> +   case nir_intrinsic_global_atomic_and:
> +   case nir_intrinsic_global_atomic_comp_swap:
> +   case nir_intrinsic_global_atomic_exchange:
> +   case nir_intrinsic_global_atomic_or:
> +   case nir_intrinsic_global_atomic_imax:
> +   case nir_intrinsic_global_atomic_imin:
> +   case nir_intrinsic_global_atomic_umax:
> +   case nir_intrinsic_global_atomic_umin:
> +   case nir_intrinsic_global_atomic_xor: {
> +  const DataType dType = getDType(insn);
> +

[Mesa-dev] [PATCH v2 1/2] nv50/ir/ra: convert some for loops to Range-based for loops

2020-01-15 Thread Karol Herbst
I will touch them in the next commit

Signed-off-by: Karol Herbst 
---
 .../drivers/nouveau/codegen/nv50_ir_ra.cpp| 19 ---
 1 file changed, 8 insertions(+), 11 deletions(-)

diff --git a/src/gallium/drivers/nouveau/codegen/nv50_ir_ra.cpp 
b/src/gallium/drivers/nouveau/codegen/nv50_ir_ra.cpp
index 6df2664da22..d6d3e70cce6 100644
--- a/src/gallium/drivers/nouveau/codegen/nv50_ir_ra.cpp
+++ b/src/gallium/drivers/nouveau/codegen/nv50_ir_ra.cpp
@@ -954,9 +954,8 @@ GCRA::coalesceValues(Value *dst, Value *src, bool force)
 rep->id, rep->reg.data.id, val->id);
 
// set join pointer of all values joined with val
-   for (Value::DefIterator def = val->defs.begin(); def != val->defs.end();
-++def)
-  (*def)->get()->join = rep;
+   for (ValueDef *def : val->defs)
+  def->get()->join = rep;
assert(rep->join == rep && val->join == rep);
 
// add val's definitions to rep and extend the live interval of its RIG node
@@ -1259,10 +1258,8 @@ GCRA::calculateSpillWeights()
 
   if (!val->noSpill) {
  int rc = 0;
- for (Value::DefIterator it = val->defs.begin();
-  it != val->defs.end();
-  ++it)
-rc += (*it)->get()->refCount();
+ for (ValueDef *def : val->defs)
+rc += def->get()->refCount();
 
  nodes[i].weight =
 (float)rc * (float)rc / (float)nodes[i].livei.extent();
@@ -1370,10 +1367,10 @@ GCRA::checkInterference(const RIG_Node *node, 
Graph::EdgeIterator& ei)
 
if (vA->compound | vB->compound) {
   // NOTE: this only works for >aligned< register tuples !
-  for (Value::DefCIterator D = vA->defs.begin(); D != vA->defs.end(); ++D) 
{
-  for (Value::DefCIterator d = vB->defs.begin(); d != vB->defs.end(); ++d) 
{
- const LValue *vD = (*D)->get()->asLValue();
- const LValue *vd = (*d)->get()->asLValue();
+  for (const ValueDef *D : vA->defs) {
+  for (const ValueDef *d : vB->defs) {
+ const LValue *vD = D->get()->asLValue();
+ const LValue *vd = d->get()->asLValue();
 
  if (!vD->livei.overlaps(vd->livei)) {
 INFO_DBG(prog->dbgFlags, REG_ALLOC, "(%%%i) X (%%%i): no 
overlap\n",
-- 
2.24.1

___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev


[Mesa-dev] [PATCH v2 2/2] nv50/ir/ra: fix memory corruption when spilling

2020-01-15 Thread Karol Herbst
0 00 fa
  0x0c2a80075460: fa fa fa fa fa fa fa fa fa fa fa fa fa fa fa fa
=>0x0c2a80075470:[fd]fd fd fd fd fd fd fd fd fd fd fd fd fd fd fd
  0x0c2a80075480: fd fd fd fd fd fd fd fd fd fd fd fd fd fd fd fd
  0x0c2a80075490: fd fd fd fd fd fd fd fd fd fd fd fd fd fd fd fd
  0x0c2a800754a0: fd fd fd fd fd fd fd fd fd fd fd fd fd fd fd fa
  0x0c2a800754b0: fa fa fa fa fa fa fa fa fa fa fa fa fa fa fa fa
  0x0c2a800754c0: fd fd fd fd fd fd fd fd fd fd fd fd fd fd fd fd
Shadow byte legend (one shadow byte represents 8 application bytes):
  Addressable:   00
  Partially addressable: 01 02 03 04 05 06 07
  Heap left redzone:   fa
  Freed heap region:   fd
  Stack left redzone:  f1
  Stack mid redzone:   f2
  Stack right redzone: f3
  Stack after return:  f5
  Stack use after scope:   f8
  Global redzone:  f9
  Global init order:   f6
  Poisoned by user:f7
  Container overflow:  fc
  Array cookie:ac
  Intra object redzone:bb
  ASan internal:   fe
  Left alloca redzone: ca
  Right alloca redzone:cb
  Shadow gap:      cc
==612087==ABORTING

v2: full rework

Signed-off-by: Karol Herbst 
---
 .../drivers/nouveau/codegen/nv50_ir_ra.cpp| 87 ++-
 1 file changed, 66 insertions(+), 21 deletions(-)

diff --git a/src/gallium/drivers/nouveau/codegen/nv50_ir_ra.cpp 
b/src/gallium/drivers/nouveau/codegen/nv50_ir_ra.cpp
index d6d3e70cce6..9a106eff2d1 100644
--- a/src/gallium/drivers/nouveau/codegen/nv50_ir_ra.cpp
+++ b/src/gallium/drivers/nouveau/codegen/nv50_ir_ra.cpp
@@ -295,10 +295,48 @@ private:
 
 typedef std::pair ValuePair;
 
+class MergedDefs
+{
+public:
+   std::list operator()(Value *val) {
+  std::list res;
+  res.insert(res.end(), val->defs.begin(), val->defs.end());
+  res.insert(res.end(), defs[val].begin(), defs[val].end());
+  return res;
+   }
+
+   void add(Value *val, std::list &vals) {
+  assert(val);
+  defs[val].insert(defs[val].end(), vals.begin(), vals.end());
+   }
+
+   void remove(Value *val, ValueDef *def) {
+  defs[val].remove(def);
+   }
+
+   void removeDefsOfInstruction(Instruction *insn) {
+  for (int d = 0; insn->defExists(d); ++d) {
+ ValueDef *def = &insn->def(d);
+ defs.erase(def->get());
+ for (auto &p : defs)
+p.second.remove(def);
+  }
+   }
+
+   void merge() {
+  for (auto &p : defs)
+ p.first->defs.insert(p.first->defs.end(), p.second.begin(), 
p.second.end());
+  defs.clear();
+   }
+
+private:
+   std::unordered_map > defs;
+};
+
 class SpillCodeInserter
 {
 public:
-   SpillCodeInserter(Function *fn) : func(fn), stackSize(0), stackBase(0) { }
+   SpillCodeInserter(Function *fn, MergedDefs &mergedDefs) : func(fn), 
mergedDefs(mergedDefs), stackSize(0), stackBase(0) { }
 
bool run(const std::list&);
 
@@ -308,6 +346,7 @@ public:
 
 private:
Function *func;
+   MergedDefs &mergedDefs;
 
struct SpillSlot
{
@@ -708,7 +747,7 @@ RegAlloc::BuildIntervalsPass::visit(BasicBlock *bb)
 class GCRA
 {
 public:
-   GCRA(Function *, SpillCodeInserter&);
+   GCRA(Function *, SpillCodeInserter&, MergedDefs&);
~GCRA();
 
bool allocateRegisters(ArrayList& insns);
@@ -825,6 +864,8 @@ private:
 
SpillCodeInserter& spill;
std::list mustSpill;
+
+   MergedDefs &mergedDefs;
 };
 
 const GCRA::RelDegree GCRA::relDegree;
@@ -954,12 +995,13 @@ GCRA::coalesceValues(Value *dst, Value *src, bool force)
 rep->id, rep->reg.data.id, val->id);
 
// set join pointer of all values joined with val
-   for (ValueDef *def : val->defs)
+   std::list defs = mergedDefs(val);
+   for (ValueDef *def : defs)
   def->get()->join = rep;
assert(rep->join == rep && val->join == rep);
 
// add val's definitions to rep and extend the live interval of its RIG node
-   rep->defs.insert(rep->defs.end(), val->defs.begin(), val->defs.end());
+   mergedDefs.add(rep, defs);
nRep->livei.unify(nVal->livei);
nRep->degreeLimit = MIN2(nRep->degreeLimit, nVal->degreeLimit);
nRep->maxReg = MIN2(nRep->maxReg, nVal->maxReg);
@@ -1160,10 +1202,11 @@ GCRA::RIG_Node::addRegPreference(RIG_Node *node)
prefRegs.push_back(node);
 }
 
-GCRA::GCRA(Function *fn, SpillCodeInserter& spill) :
+GCRA::GCRA(Function *fn, SpillCodeInserter& spill, MergedDefs& mergedDefs) :
func(fn),
regs(fn->getProgram()->getTarget()),
-   spill(spill)
+   spill(spill),
+   mergedDefs(mergedDefs)
 {
prog = func->getProgram();
 }
@@ -1258,7 +1301,7 @@ GCRA::calculateSpillWeights()
 
   if (!val->noSpill) {
  int rc = 0;
- for (ValueDef *def : val->defs)
+ for (ValueDef *def : mergedDefs(val))
 rc += def->get()->refCount();
 
  nodes[i].weight =
@@ -1360,15 +1403,15 @@ GCR

[Mesa-dev] [PATCH v3 1/2] nv50/ir/ra: convert some for loops to Range-based for loops

2020-01-18 Thread Karol Herbst
I will touch them in the next commit

Signed-off-by: Karol Herbst 
---
 .../drivers/nouveau/codegen/nv50_ir_ra.cpp| 19 ---
 1 file changed, 8 insertions(+), 11 deletions(-)

diff --git a/src/gallium/drivers/nouveau/codegen/nv50_ir_ra.cpp 
b/src/gallium/drivers/nouveau/codegen/nv50_ir_ra.cpp
index 6df2664da22..d6d3e70cce6 100644
--- a/src/gallium/drivers/nouveau/codegen/nv50_ir_ra.cpp
+++ b/src/gallium/drivers/nouveau/codegen/nv50_ir_ra.cpp
@@ -954,9 +954,8 @@ GCRA::coalesceValues(Value *dst, Value *src, bool force)
 rep->id, rep->reg.data.id, val->id);
 
// set join pointer of all values joined with val
-   for (Value::DefIterator def = val->defs.begin(); def != val->defs.end();
-++def)
-  (*def)->get()->join = rep;
+   for (ValueDef *def : val->defs)
+  def->get()->join = rep;
assert(rep->join == rep && val->join == rep);
 
// add val's definitions to rep and extend the live interval of its RIG node
@@ -1259,10 +1258,8 @@ GCRA::calculateSpillWeights()
 
   if (!val->noSpill) {
  int rc = 0;
- for (Value::DefIterator it = val->defs.begin();
-  it != val->defs.end();
-  ++it)
-rc += (*it)->get()->refCount();
+ for (ValueDef *def : val->defs)
+rc += def->get()->refCount();
 
  nodes[i].weight =
 (float)rc * (float)rc / (float)nodes[i].livei.extent();
@@ -1370,10 +1367,10 @@ GCRA::checkInterference(const RIG_Node *node, 
Graph::EdgeIterator& ei)
 
if (vA->compound | vB->compound) {
   // NOTE: this only works for >aligned< register tuples !
-  for (Value::DefCIterator D = vA->defs.begin(); D != vA->defs.end(); ++D) 
{
-  for (Value::DefCIterator d = vB->defs.begin(); d != vB->defs.end(); ++d) 
{
- const LValue *vD = (*D)->get()->asLValue();
- const LValue *vd = (*d)->get()->asLValue();
+  for (const ValueDef *D : vA->defs) {
+  for (const ValueDef *d : vB->defs) {
+ const LValue *vD = D->get()->asLValue();
+ const LValue *vd = d->get()->asLValue();
 
  if (!vD->livei.overlaps(vd->livei)) {
 INFO_DBG(prog->dbgFlags, REG_ALLOC, "(%%%i) X (%%%i): no 
overlap\n",
-- 
2.24.1

___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev


[Mesa-dev] [PATCH v3 1/2] nv50/ir/ra: convert some for loops to Range-based for loops

2020-01-18 Thread Karol Herbst
I will touch them in the next commit

Signed-off-by: Karol Herbst 
---
 .../drivers/nouveau/codegen/nv50_ir_ra.cpp| 19 ---
 1 file changed, 8 insertions(+), 11 deletions(-)

diff --git a/src/gallium/drivers/nouveau/codegen/nv50_ir_ra.cpp 
b/src/gallium/drivers/nouveau/codegen/nv50_ir_ra.cpp
index 6df2664da22..d6d3e70cce6 100644
--- a/src/gallium/drivers/nouveau/codegen/nv50_ir_ra.cpp
+++ b/src/gallium/drivers/nouveau/codegen/nv50_ir_ra.cpp
@@ -954,9 +954,8 @@ GCRA::coalesceValues(Value *dst, Value *src, bool force)
 rep->id, rep->reg.data.id, val->id);
 
// set join pointer of all values joined with val
-   for (Value::DefIterator def = val->defs.begin(); def != val->defs.end();
-++def)
-  (*def)->get()->join = rep;
+   for (ValueDef *def : val->defs)
+  def->get()->join = rep;
assert(rep->join == rep && val->join == rep);
 
// add val's definitions to rep and extend the live interval of its RIG node
@@ -1259,10 +1258,8 @@ GCRA::calculateSpillWeights()
 
   if (!val->noSpill) {
  int rc = 0;
- for (Value::DefIterator it = val->defs.begin();
-  it != val->defs.end();
-  ++it)
-rc += (*it)->get()->refCount();
+ for (ValueDef *def : val->defs)
+rc += def->get()->refCount();
 
  nodes[i].weight =
 (float)rc * (float)rc / (float)nodes[i].livei.extent();
@@ -1370,10 +1367,10 @@ GCRA::checkInterference(const RIG_Node *node, 
Graph::EdgeIterator& ei)
 
if (vA->compound | vB->compound) {
   // NOTE: this only works for >aligned< register tuples !
-  for (Value::DefCIterator D = vA->defs.begin(); D != vA->defs.end(); ++D) 
{
-  for (Value::DefCIterator d = vB->defs.begin(); d != vB->defs.end(); ++d) 
{
- const LValue *vD = (*D)->get()->asLValue();
- const LValue *vd = (*d)->get()->asLValue();
+  for (const ValueDef *D : vA->defs) {
+  for (const ValueDef *d : vB->defs) {
+ const LValue *vD = D->get()->asLValue();
+ const LValue *vd = d->get()->asLValue();
 
  if (!vD->livei.overlaps(vd->livei)) {
 INFO_DBG(prog->dbgFlags, REG_ALLOC, "(%%%i) X (%%%i): no 
overlap\n",
-- 
2.24.1

___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev


[Mesa-dev] [PATCH v3 2/2] nv50/ir/ra: fix memory corruption when spilling

2020-01-18 Thread Karol Herbst
0 00 fa
  0x0c2a80075460: fa fa fa fa fa fa fa fa fa fa fa fa fa fa fa fa
=>0x0c2a80075470:[fd]fd fd fd fd fd fd fd fd fd fd fd fd fd fd fd
  0x0c2a80075480: fd fd fd fd fd fd fd fd fd fd fd fd fd fd fd fd
  0x0c2a80075490: fd fd fd fd fd fd fd fd fd fd fd fd fd fd fd fd
  0x0c2a800754a0: fd fd fd fd fd fd fd fd fd fd fd fd fd fd fd fa
  0x0c2a800754b0: fa fa fa fa fa fa fa fa fa fa fa fa fa fa fa fa
  0x0c2a800754c0: fd fd fd fd fd fd fd fd fd fd fd fd fd fd fd fd
Shadow byte legend (one shadow byte represents 8 application bytes):
  Addressable:   00
  Partially addressable: 01 02 03 04 05 06 07
  Heap left redzone:   fa
  Freed heap region:   fd
  Stack left redzone:  f1
  Stack mid redzone:   f2
  Stack right redzone: f3
  Stack after return:  f5
  Stack use after scope:   f8
  Global redzone:  f9
  Global init order:   f6
  Poisoned by user:f7
  Container overflow:  fc
  Array cookie:ac
  Intra object redzone:bb
  ASan internal:   fe
  Left alloca redzone: ca
  Right alloca redzone:cb
  Shadow gap:  cc
==612087==ABORTING

v2: full rework
v3: manage a full copy instead of recreating new lists on every access

Signed-off-by: Karol Herbst 
---
 .../drivers/nouveau/codegen/nv50_ir_ra.cpp| 93 ++-
 1 file changed, 71 insertions(+), 22 deletions(-)

diff --git a/src/gallium/drivers/nouveau/codegen/nv50_ir_ra.cpp 
b/src/gallium/drivers/nouveau/codegen/nv50_ir_ra.cpp
index d6d3e70cce6..dabf0cfacc6 100644
--- a/src/gallium/drivers/nouveau/codegen/nv50_ir_ra.cpp
+++ b/src/gallium/drivers/nouveau/codegen/nv50_ir_ra.cpp
@@ -295,10 +295,53 @@ private:
 
 typedef std::pair ValuePair;
 
+class MergedDefs
+{
+private:
+   std::list& entry(Value *val) {
+  auto it = defs.find(val);
+
+  if (it == defs.end()) {
+ std::list &res = defs[val];
+ res = val->defs;
+ return res;
+  } else {
+ return (*it).second;
+  }
+   }
+
+   std::unordered_map > defs;
+
+public:
+   std::list& operator()(Value *val) {
+  return entry(val);
+   }
+
+   void add(Value *val, const std::list &vals) {
+  assert(val);
+  std::list &valdefs = entry(val);
+  valdefs.insert(valdefs.end(), vals.begin(), vals.end());
+   }
+
+   void removeDefsOfInstruction(Instruction *insn) {
+  for (int d = 0; insn->defExists(d); ++d) {
+ ValueDef *def = &insn->def(d);
+ defs.erase(def->get());
+ for (auto &p : defs)
+p.second.remove(def);
+  }
+   }
+
+   void merge() {
+  for (auto &p : defs)
+ p.first->defs = p.second;
+   }
+};
+
 class SpillCodeInserter
 {
 public:
-   SpillCodeInserter(Function *fn) : func(fn), stackSize(0), stackBase(0) { }
+   SpillCodeInserter(Function *fn, MergedDefs &mergedDefs) : func(fn), 
mergedDefs(mergedDefs), stackSize(0), stackBase(0) { }
 
bool run(const std::list&);
 
@@ -308,6 +351,7 @@ public:
 
 private:
Function *func;
+   MergedDefs &mergedDefs;
 
struct SpillSlot
{
@@ -708,7 +752,7 @@ RegAlloc::BuildIntervalsPass::visit(BasicBlock *bb)
 class GCRA
 {
 public:
-   GCRA(Function *, SpillCodeInserter&);
+   GCRA(Function *, SpillCodeInserter&, MergedDefs&);
~GCRA();
 
bool allocateRegisters(ArrayList& insns);
@@ -825,6 +869,8 @@ private:
 
SpillCodeInserter& spill;
std::list mustSpill;
+
+   MergedDefs &mergedDefs;
 };
 
 const GCRA::RelDegree GCRA::relDegree;
@@ -954,12 +1000,13 @@ GCRA::coalesceValues(Value *dst, Value *src, bool force)
 rep->id, rep->reg.data.id, val->id);
 
// set join pointer of all values joined with val
-   for (ValueDef *def : val->defs)
+   const std::list &defs = mergedDefs(val);
+   for (ValueDef *def : defs)
   def->get()->join = rep;
assert(rep->join == rep && val->join == rep);
 
// add val's definitions to rep and extend the live interval of its RIG node
-   rep->defs.insert(rep->defs.end(), val->defs.begin(), val->defs.end());
+   mergedDefs.add(rep, defs);
nRep->livei.unify(nVal->livei);
nRep->degreeLimit = MIN2(nRep->degreeLimit, nVal->degreeLimit);
nRep->maxReg = MIN2(nRep->maxReg, nVal->maxReg);
@@ -1160,10 +1207,11 @@ GCRA::RIG_Node::addRegPreference(RIG_Node *node)
prefRegs.push_back(node);
 }
 
-GCRA::GCRA(Function *fn, SpillCodeInserter& spill) :
+GCRA::GCRA(Function *fn, SpillCodeInserter& spill, MergedDefs& mergedDefs) :
func(fn),
regs(fn->getProgram()->getTarget()),
-   spill(spill)
+   spill(spill),
+   mergedDefs(mergedDefs)
 {
prog = func->getProgram();
 }
@@ -1258,7 +1306,7 @@ GCRA::calculateSpillWeights()
 
   if (!val->noSpill) {
  int rc = 0;
- for (ValueDef *def : val->defs)
+ for (ValueDef *def : mergedDefs(val))
 rc += def

Re: [Mesa-dev] [ANNOUNCE] Mesa 20.0 branchpoint planned for 2020/01/29, Milestone opened

2020-01-29 Thread Karol Herbst
On Thu, Jan 30, 2020 at 2:37 AM Dieter Nützel  wrote:
>
> Maybe compilation with '-Dopencl-spirv=true', again.
>
> It is broken, now.
> Even LLVM 10.0 won't compile for me with SPIRV-LLVM-Translator,
> currently.
>

do you have any more details on that? It could be that the
spirv-llvm-translator  diverged somewhere as I am only compiling
against llvm 9 right now.

> Greetings,
> Dieter
>
> Am 22.01.2020 19:27, schrieb Dylan Baker:
> > Hi list, due to some last minute changes in plan I'll be managing the
> > 20.0
> > release. The release calendar has been updated, but the gitlab
> > milestone wasn't
> > opened. That has been corrected, and is here
> > https://gitlab.freedesktop.org/mesa/mesa/-/milestones/9, please add any
> > issues
> > or MRs you would like to land before the branchpoint to the milestone.
> >
> > Thanks,
> > Dylan
> >
> > ___
> > mesa-dev mailing list
> > mesa-dev@lists.freedesktop.org
> > https://lists.freedesktop.org/mailman/listinfo/mesa-dev
>

___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev


Re: [Mesa-dev] [PATCH 2/8] util/blob: Add overwrite function for uint8

2020-02-17 Thread Karol Herbst
On Mon, Feb 17, 2020 at 6:41 PM Mark Menzynski  wrote:
>
> Overwrite function for this type  was missing and I needed it for my project.
>
> Signed-off-by: Mark Menzynski 
> ---
>  src/util/blob.c |  9 +
>  src/util/blob.h | 15 +++
>  2 files changed, 24 insertions(+)
>
> diff --git a/src/util/blob.c b/src/util/blob.c
> index 94d5a9dea74..5bf4b924c91 100644
> --- a/src/util/blob.c
> +++ b/src/util/blob.c
> @@ -214,6 +214,15 @@ BLOB_WRITE_TYPE(blob_write_intptr, intptr_t)
>  #define ASSERT_ALIGNED(_offset, _align) \
> assert(ALIGN((_offset), (_align)) == (_offset))
>
> +bool
> +blob_overwrite_uint8 (struct blob *blob,
> +  size_t offset,
> +  uint8_t value)
> +{
> +   ASSERT_ALIGNED(offset, sizeof(value));
> +   return blob_overwrite_bytes(blob, offset, &value, sizeof(value));
> +}
> +

I think it would be better to do the same as with the write functions
and define a macro for the implementation.

>  bool
>  blob_overwrite_uint32 (struct blob *blob,
> size_t offset,
> diff --git a/src/util/blob.h b/src/util/blob.h
> index 9113331254a..d5496fef1cd 100644
> --- a/src/util/blob.h
> +++ b/src/util/blob.h
> @@ -209,6 +209,21 @@ blob_write_uint16(struct blob *blob, uint16_t value);
>  bool
>  blob_write_uint32(struct blob *blob, uint32_t value);
>
> +/**
> + * Overwrite a uint8_t previously written to the blob.
> + *
> + * Writes a uint8_t value to an existing portion of the blob at an offset of
> + * \offset.  This data range must have previously been written to the blob by
> + * one of the blob_write_* calls.
> + *
> + * \return True unless the requested position or position+to_write lie 
> outside
> + * the current blob's size.
> + */
> +bool
> +blob_overwrite_uint8(struct blob *blob,
> + size_t offset,
> + uint8_t value);
> +

following the existing pattern, I think this should be moved after the
blob_write_uint8 declaration.

>  /**
>   * Overwrite a uint32_t previously written to the blob.
>   *
> --
> 2.21.1
>
> ___
> mesa-dev mailing list
> mesa-dev@lists.freedesktop.org
> https://lists.freedesktop.org/mailman/listinfo/mesa-dev
>

___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev


Re: [Mesa-dev] [PATCH 3/8] nv50/ir: Add nv50_ir_prog_info_out serialize and deserialize

2020-02-17 Thread Karol Herbst
On Mon, Feb 17, 2020 at 6:41 PM Mark Menzynski  wrote:
>
> Adds functions for serializing and deserializing
> nv50_ir_prog_info_out structure, which are needed for shader caching.
>
> Signed-off-by: Mark Menzynski 
> ---
>  .../drivers/nouveau/codegen/nv50_ir_driver.h  |  44 
>  .../nouveau/codegen/nv50_ir_emit_gk110.cpp|  14 +-
>  .../nouveau/codegen/nv50_ir_emit_gm107.cpp|  14 +-
>  .../nouveau/codegen/nv50_ir_emit_nv50.cpp |   6 +-
>  .../nouveau/codegen/nv50_ir_emit_nvc0.cpp |  14 +-
>  .../nouveau/codegen/nv50_ir_serialize.cpp | 196 ++
>  src/gallium/drivers/nouveau/meson.build   |   1 +
>  7 files changed, 265 insertions(+), 24 deletions(-)
>  create mode 100644 src/gallium/drivers/nouveau/codegen/nv50_ir_serialize.cpp
>
> diff --git a/src/gallium/drivers/nouveau/codegen/nv50_ir_driver.h 
> b/src/gallium/drivers/nouveau/codegen/nv50_ir_driver.h
> index f6b5415bc95..bc92a3bc4ee 100644
> --- a/src/gallium/drivers/nouveau/codegen/nv50_ir_driver.h
> +++ b/src/gallium/drivers/nouveau/codegen/nv50_ir_driver.h
> @@ -25,6 +25,7 @@
>
>  #include "pipe/p_shader_tokens.h"
>
> +#include "util/blob.h"
>  #include "tgsi/tgsi_util.h"
>  #include "tgsi/tgsi_parse.h"
>  #include "tgsi/tgsi_scan.h"
> @@ -242,6 +243,49 @@ nv50_ir_apply_fixups(void *fixupData, uint32_t *code,
>  extern void nv50_ir_get_target_library(uint32_t chipset,
> const uint32_t **code, uint32_t 
> *size);
>
> +
> +#ifdef __cplusplus
> +namespace nv50_ir
> +{
> +   class FixupEntry;
> +   class FixupData;
> +
> +   void
> +   gk110_interpApply(const nv50_ir::FixupEntry *entry, uint32_t *code,
> + const nv50_ir::FixupData& data);
> +   void
> +   gm107_interpApply(const nv50_ir::FixupEntry *entry, uint32_t *code,
> + const nv50_ir::FixupData& data);
> +   void
> +   nv50_interpApply(const nv50_ir::FixupEntry *entry, uint32_t *code,
> +const nv50_ir::FixupData& data);
> +   void
> +   nvc0_interpApply(const nv50_ir::FixupEntry *entry, uint32_t *code,
> +const nv50_ir::FixupData& data);
> +   void
> +   gk110_selpFlip(const nv50_ir::FixupEntry *entry, uint32_t *code,
> +  const nv50_ir::FixupData& data);
> +   void
> +   gm107_selpFlip(const nv50_ir::FixupEntry *entry, uint32_t *code,
> +  const nv50_ir::FixupData& data);
> +   void
> +   nvc0_selpFlip(const nv50_ir::FixupEntry *entry, uint32_t *code,
> + const nv50_ir::FixupData& data);
> +
> +}
> +#endif
> +
> +/* Serialize a nv50_ir_prog_info_out structure and save it into blob */
> +extern bool
> +nv50_ir_prog_info_out_serialize(struct blob *, struct nv50_ir_prog_info_out 
> *);
> +
> +/* Deserialize from data and save into a nv50_ir_prog_info_out structure
> + * using a pointer. Size is a total size of the serialized data.
> + * Offset points to where info_out in data is located. */
> +extern bool
> +nv50_ir_prog_info_out_deserialize(void *data, size_t size, size_t offset,
> + struct nv50_ir_prog_info_out *);

some spaces missing. Also I'd drop the offset argument and require the
callee to pass in an adjusted pointer already.

> +
>  #ifdef __cplusplus
>  }
>  #endif
> diff --git a/src/gallium/drivers/nouveau/codegen/nv50_ir_emit_gk110.cpp 
> b/src/gallium/drivers/nouveau/codegen/nv50_ir_emit_gk110.cpp
> index 2118c3153f7..e651d7fdcb0 100644
> --- a/src/gallium/drivers/nouveau/codegen/nv50_ir_emit_gk110.cpp
> +++ b/src/gallium/drivers/nouveau/codegen/nv50_ir_emit_gk110.cpp
> @@ -1209,8 +1209,8 @@ CodeEmitterGK110::emitSLCT(const CmpInstruction *i)
> }
>  }
>
> -static void
> -selpFlip(const FixupEntry *entry, uint32_t *code, const FixupData& data)
> +void
> +gk110_selpFlip(const FixupEntry *entry, uint32_t *code, const FixupData& 
> data)
>  {
> int loc = entry->loc;
> if (data.force_persample_interp)
> @@ -1227,7 +1227,7 @@ void CodeEmitterGK110::emitSELP(const Instruction *i)
>code[1] |= 1 << 13;
>
> if (i->subOp == 1) {
> -  addInterp(0, 0, selpFlip);
> +  addInterp(0, 0, gk110_selpFlip);
> }
>  }
>
> @@ -2042,8 +2042,8 @@ CodeEmitterGK110::emitInterpMode(const Instruction *i)
> code[1] |= (i->ipa & 0xc) << (19 - 2);
>  }
>
> -static void
> -interpApply(const FixupEntry *entry, uint32_t *code, const FixupData& data)
> +void
> +gk110_interpApply(const struct FixupEntry *entry, uint32_t *code, const 
> FixupData& data)
>  {
> int ipa = entry->ipa;
> int reg = entry->reg;
> @@ -2078,10 +2078,10 @@ CodeEmitterGK110::emitINTERP(const Instruction *i)
>
> if (i->op == OP_PINTERP) {
>srcId(i->src(1), 23);
> -  addInterp(i->ipa, SDATA(i->src(1)).id, interpApply);
> +  addInterp(i->ipa, SDATA(i->src(1)).id, gk110_interpApply);
> } else {
>code[0] |= 0xff << 23;
> -  addInterp(i->ipa, 0xff, interpApply);
> +  addInterp(i->ipa, 0xff, gk110_interpApply);
> }
>
> srcId(i->src(

Re: [Mesa-dev] [PATCH 4/8] nv50/ir: Add prog_info_out print

2020-02-17 Thread Karol Herbst
On Mon, Feb 17, 2020 at 6:41 PM Mark Menzynski  wrote:
>
> Adds a function for printing nv50_ir_prog_info_out structure
> in JSON-like format, which could be used in debugging.
>
> Signed-off-by: Mark Menzynski 
> ---
>  .../drivers/nouveau/codegen/nv50_ir_driver.h  |   3 +
>  .../drivers/nouveau/codegen/nv50_ir_print.cpp | 155 ++
>  2 files changed, 158 insertions(+)
>
> diff --git a/src/gallium/drivers/nouveau/codegen/nv50_ir_driver.h 
> b/src/gallium/drivers/nouveau/codegen/nv50_ir_driver.h
> index bc92a3bc4ee..9eb8a4c4798 100644
> --- a/src/gallium/drivers/nouveau/codegen/nv50_ir_driver.h
> +++ b/src/gallium/drivers/nouveau/codegen/nv50_ir_driver.h
> @@ -275,6 +275,9 @@ namespace nv50_ir
>  }
>  #endif
>
> +extern void
> +nv50_ir_prog_info_out_print(struct nv50_ir_prog_info_out *);
> +
>  /* Serialize a nv50_ir_prog_info_out structure and save it into blob */
>  extern bool
>  nv50_ir_prog_info_out_serialize(struct blob *, struct nv50_ir_prog_info_out 
> *);
> diff --git a/src/gallium/drivers/nouveau/codegen/nv50_ir_print.cpp 
> b/src/gallium/drivers/nouveau/codegen/nv50_ir_print.cpp
> index 5dcbf3c3e0c..f19d1a7d280 100644
> --- a/src/gallium/drivers/nouveau/codegen/nv50_ir_print.cpp
> +++ b/src/gallium/drivers/nouveau/codegen/nv50_ir_print.cpp
> @@ -22,6 +22,7 @@
>
>  #include "codegen/nv50_ir.h"
>  #include "codegen/nv50_ir_target.h"
> +#include "codegen/nv50_ir_driver.h"
>
>  #include 
>
> @@ -852,3 +853,157 @@ Function::printLiveIntervals() const
>  }
>
>  } // namespace nv50_ir
> +
> +extern void
> +nv50_ir_prog_info_out_print(struct nv50_ir_prog_info_out *info_out)
> +{
> +   int i;
> +
> +   INFO("{\n");
> +   INFO("   \"target\":\"%d\",\n", info_out->target);
> +   INFO("   \"type\":\"%d\",\n", info_out->type);
> +
> +   // Bin
> +   INFO("   \"bin\":{\n");
> +   INFO("  \"maxGPR\":\"%d\",\n", info_out->bin.maxGPR);
> +   INFO("  \"tlsSpace\":\"%d\",\n", info_out->bin.tlsSpace);
> +   INFO("  \"smemSize\":\"%d\",\n", info_out->bin.smemSize);
> +   INFO("  \"codeSize\":\"%d\",\n", info_out->bin.codeSize);
> +   INFO("  \"instructions\":\"%d\",\n", info_out->bin.instructions);
> +
> +   // RelocInfo
> +   INFO("  \"RelocInfo\":");
> +   if (!info_out->bin.relocData) {
> +  INFO("\"NULL\",\n");
> +   }
> +   else {

please keep it in one line.

> +  nv50_ir::RelocInfo *reloc = (nv50_ir::RelocInfo 
> *)info_out->bin.relocData;
> +  INFO("{\n");
> +  INFO(" \"codePos\":\"%d\",\n", reloc->codePos);
> +  INFO(" \"libPos\":\"%d\",\n", reloc->libPos);
> +  INFO(" \"dataPos\":\"%d\",\n", reloc->dataPos);
> +  INFO(" \"count\":\"%d\",\n", reloc->count);
> +  INFO(" \"RelocEntry\":[\n");
> +  for (unsigned int i = 0; i < reloc->count; i++) {
> + INFO("
> {\"data\":\"%d\",\t\"mask\":\"%d\",\t\"offset\":\"%d\",\t\"bitPos\":\"%d\",\t\"type\":\"%d\"}",
> +   reloc->entry[i].data, reloc->entry[i].mask, 
> reloc->entry[i].offset, reloc->entry[i].bitPos, reloc->entry[i].type
> +   );
> +  }
> +  INFO("\n");
> +  INFO(" ]\n");
> +  INFO("  },\n");
> +   }
> +
> +   // FixupInfo
> +   INFO("  \"FixupInfo\":");
> +   if (!info_out->bin.fixupData) {
> +  INFO("\"NULL\"\n");
> +   }
> +   else {

here as well

> +  nv50_ir::FixupInfo *fixup = (nv50_ir::FixupInfo 
> *)info_out->bin.fixupData;
> +  INFO("{\n");
> +  INFO(" \"count\":\"%d\"\n", fixup->count);
> +  INFO(" \"FixupEntry\":[\n");
> +  for (unsigned int i = 0; i < fixup->count; i++) {
> + INFO("
> {\"apply\":\"%p\",\t\"ipa\":\"%d\",\t\"reg\":\"%d\",\t\"loc\":\"%d\"}",
> +   fixup->entry[i].apply, fixup->entry[i].ipa, 
> fixup->entry[i].reg, fixup->entry[i].loc);
> +  }
> +  INFO("\n");
> +  INFO(" ]\n");
> +  INFO("  }\n");
> +
> +  INFO("   },\n");
> +   }
> +
> +   if (info_out->numSysVals) {
> +  INFO("   \"sv\":[\n");
> +  for (i = 0; i < info_out->numSysVals; i++) {
> + if (&(info_out->sv[i])) {
> +INFO("  {\"id\":\"%d\", \"sn\":\"%d\", \"si\":\"%d\"}",
> +   info_out->sv[i].id, info_out->sv[i].sn, 
> info_out->sv[i].si);
> + }
> +  }
> +  INFO("\n   ],\n");
> +   }
> +   if (info_out->numInputs) {
> +  INFO("   \"in\":[\n");
> +  for (i = 0; i < info_out->numInputs; i++) {
> + if (&(info_out->in[i])) {
> +INFO("  {\"id\":\"%d\",\t\"sn\":\"%d\",\t\"si\":\"%d\"}",
> +info_out->in[i].id, info_out->in[i].sn, info_out->in[i].si);
> + }
> +  }
> +  INFO("\n   ],\n");
> +   }
> +   if (info_out->numOutputs) {
> +  INFO("   \"out\":[\n");
> +  for (i = 0; i < info_out->numOutputs; i++) {
> + if (&(info_out->out[i])) {
> +INFO("  {\"id\":\"%d\",\t\"sn\":\"%d\",\t\"si\":\"%d\"}",
> +   in

Re: [Mesa-dev] [PATCH 5/8] nv50/ir: Add nv50_ir_prog_info serialize

2020-02-17 Thread Karol Herbst
On Mon, Feb 17, 2020 at 6:41 PM Mark Menzynski  wrote:
>
> Adds a function for serializing a nv50_ir_prog_info structure, which is
> needed for shader caching.
>
> Signed-off-by: Mark Menzynski 
> ---
>  .../drivers/nouveau/codegen/nv50_ir_driver.h  |  4 +
>  .../nouveau/codegen/nv50_ir_serialize.cpp | 81 +++
>  2 files changed, 85 insertions(+)
>
> diff --git a/src/gallium/drivers/nouveau/codegen/nv50_ir_driver.h 
> b/src/gallium/drivers/nouveau/codegen/nv50_ir_driver.h
> index 9eb8a4c4798..cdf19eeabcf 100644
> --- a/src/gallium/drivers/nouveau/codegen/nv50_ir_driver.h
> +++ b/src/gallium/drivers/nouveau/codegen/nv50_ir_driver.h
> @@ -278,6 +278,10 @@ namespace nv50_ir
>  extern void
>  nv50_ir_prog_info_out_print(struct nv50_ir_prog_info_out *);
>
> +/* Serialize a nv50_ir_prog_info structure and save it into blob */
> +extern bool
> +nv50_ir_prog_info_serialize(struct blob *, struct nv50_ir_prog_info *);
> +
>  /* Serialize a nv50_ir_prog_info_out structure and save it into blob */
>  extern bool
>  nv50_ir_prog_info_out_serialize(struct blob *, struct nv50_ir_prog_info_out 
> *);
> diff --git a/src/gallium/drivers/nouveau/codegen/nv50_ir_serialize.cpp 
> b/src/gallium/drivers/nouveau/codegen/nv50_ir_serialize.cpp
> index 077f3eba6c8..0f47189f10b 100644
> --- a/src/gallium/drivers/nouveau/codegen/nv50_ir_serialize.cpp
> +++ b/src/gallium/drivers/nouveau/codegen/nv50_ir_serialize.cpp
> @@ -17,6 +17,87 @@ enum InterpApply {
> FLIP_GM107 = 7
>  };
>
> +extern bool
> +nv50_ir_prog_info_serialize(struct blob *blob, struct nv50_ir_prog_info 
> *info)
> +{
> +   blob_write_uint16(blob, info->target);
> +   blob_write_uint8(blob, info->type);
> +   blob_write_uint8(blob, info->optLevel);
> +   blob_write_uint8(blob, info->dbgFlags);
> +   blob_write_uint8(blob, info->omitLineNum);
> +   blob_write_uint32(blob, info->bin.smemSize);
> +   blob_write_uint16(blob, info->bin.maxOutput);
> +   blob_write_uint8(blob, info->bin.sourceRep);
> +
> +   switch(info->bin.sourceRep) {
> +  case PIPE_SHADER_IR_TGSI: {
> + struct tgsi_token *tokens = (struct tgsi_token *)info->bin.source;
> + unsigned int num_tokens = tgsi_num_tokens(tokens);
> +
> + blob_write_uint32(blob, num_tokens);
> + blob_write_bytes(blob, tokens, num_tokens * sizeof(struct 
> tgsi_token));
> + break;
> +  }
> +  case PIPE_SHADER_IR_NIR: {
> + struct nir_shader *nir = (struct nir_shader *)info->bin.source;
> + nir_serialize(blob, nir, false);
> + break;
> +  }
> +  default:
> + assert(!"unhandled info->bin.sourceRep");
> + return false;
> +   }
> +
> +   blob_write_uint16(blob, info->immd.bufSize);
> +   blob_write_bytes(blob, info->immd.buf, info->immd.bufSize * 
> sizeof(*info->immd.buf));
> +   blob_write_uint16(blob, info->immd.count);
> +   blob_write_bytes(blob, info->immd.data, info->immd.count * 
> sizeof(*info->immd.data));
> +   blob_write_bytes(blob, info->immd.type, info->immd.count * 16); // for 
> each vec4 (128 bit)
> +
> +   switch (info->type) {
> +  case PIPE_SHADER_VERTEX:
> + blob_write_bytes(blob, info->prop.vp.inputMask,
> +  4 * sizeof(*info->prop.vp.inputMask)); /* array of 
> size 4 */

we have an ARRAY_SIZE macro, but sizeof(info->prop.vp.inputMask)
should give you the full array size already, no?

> + break;
> +  case PIPE_SHADER_TESS_CTRL:
> + blob_write_uint32(blob, info->prop.cp.inputOffset);
> + blob_write_uint32(blob, info->prop.cp.sharedOffset);
> + blob_write_uint32(blob, info->prop.cp.gridInfoBase);
> + blob_write_bytes(blob, info->prop.cp.numThreads,
> +  3 * sizeof(*info->prop.cp.numThreads)); /* array 
> of size 3 */

same here

> +  case PIPE_SHADER_GEOMETRY:
> + blob_write_uint8(blob, info->prop.gp.inputPrim);
> + break;
> +  case PIPE_SHADER_FRAGMENT:
> + blob_write_uint8(blob, info->prop.fp.persampleInvocation);
> + break;
> +  default:
> + break;
> +   }
> +
> +   blob_write_uint8(blob, info->io.auxCBSlot);
> +   blob_write_uint16(blob, info->io.ucpBase);
> +   blob_write_uint16(blob, info->io.drawInfoBase);
> +   blob_write_uint16(blob, info->io.alphaRefBase);
> +   blob_write_uint8(blob, info->io.pointSize);
> +   blob_write_uint8(blob, info->io.viewportId);
> +   blob_write_bytes(blob, info->io.backFaceColor, 2 * 
> sizeof(*info->io.backFaceColor));

and here

> +   blob_write_uint8(blob, info->io.mul_zero_wins);
> +   blob_write_uint8(blob, info->io.nv50styleSurfaces);
> +   blob_write_uint16(blob, info->io.texBindBase);
> +   blob_write_uint16(blob, info->io.fbtexBindBase);
> +   blob_write_uint16(blob, info->io.suInfoBase);
> +   blob_write_uint16(blob, info->io.bindlessBase);
> +   blob_write_uint16(blob, info->io.bufInfoBase);
> +   blob_write_uint16(blob, info->io.sampleInfoBase);
> +   blob_write_uint8(blob, info->io.msInfoCBS

Re: [Mesa-dev] [PATCH 6/8] tgsi/util: Change boolean for bool

2020-02-17 Thread Karol Herbst
Reviewed-by: Karol Herbst 

On Mon, Feb 17, 2020 at 6:41 PM Mark Menzynski  wrote:
>
> I was getting errors with "boolean" when compiling. This patch changes
> boolean to bool from .
>
> Signed-off-by: Mark Menzynski 
> ---
>  src/gallium/auxiliary/tgsi/tgsi_util.c | 2 +-
>  src/gallium/auxiliary/tgsi/tgsi_util.h | 5 +++--
>  2 files changed, 4 insertions(+), 3 deletions(-)
>
> diff --git a/src/gallium/auxiliary/tgsi/tgsi_util.c 
> b/src/gallium/auxiliary/tgsi/tgsi_util.c
> index 1e5582ba273..e1b604cff0e 100644
> --- a/src/gallium/auxiliary/tgsi/tgsi_util.c
> +++ b/src/gallium/auxiliary/tgsi/tgsi_util.c
> @@ -537,7 +537,7 @@ tgsi_util_get_shadow_ref_src_index(enum tgsi_texture_type 
> tgsi_tex)
>  }
>
>
> -boolean
> +bool
>  tgsi_is_shadow_target(enum tgsi_texture_type target)
>  {
> switch (target) {
> diff --git a/src/gallium/auxiliary/tgsi/tgsi_util.h 
> b/src/gallium/auxiliary/tgsi/tgsi_util.h
> index 686b90f467e..6dc576b1a00 100644
> --- a/src/gallium/auxiliary/tgsi/tgsi_util.h
> +++ b/src/gallium/auxiliary/tgsi/tgsi_util.h
> @@ -28,6 +28,7 @@
>  #ifndef TGSI_UTIL_H
>  #define TGSI_UTIL_H
>
> +#include 
>  #include "pipe/p_shader_tokens.h"
>
>  #if defined __cplusplus
> @@ -84,11 +85,11 @@ tgsi_util_get_texture_coord_dim(enum tgsi_texture_type 
> tgsi_tex);
>  int
>  tgsi_util_get_shadow_ref_src_index(enum tgsi_texture_type tgsi_tex);
>
> -boolean
> +bool
>  tgsi_is_shadow_target(enum tgsi_texture_type target);
>
>
> -static inline boolean
> +static inline bool
>  tgsi_is_msaa_target(enum tgsi_texture_type target)
>  {
> return (target == TGSI_TEXTURE_2D_MSAA ||
> --
> 2.21.1
>
> ___
> mesa-dev mailing list
> mesa-dev@lists.freedesktop.org
> https://lists.freedesktop.org/mailman/listinfo/mesa-dev
>

___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev


Re: [Mesa-dev] [PATCH 6/8] tgsi/util: Change boolean for bool

2020-02-17 Thread Karol Herbst
by the way: Mind creating a MR on gitlab with this and the 2nd patch?
This way we can get them reviewed and tested there and merged before
the nouveau related patches.

On Mon, Feb 17, 2020 at 9:09 PM Karol Herbst  wrote:
>
> Reviewed-by: Karol Herbst 
>
> On Mon, Feb 17, 2020 at 6:41 PM Mark Menzynski  wrote:
> >
> > I was getting errors with "boolean" when compiling. This patch changes
> > boolean to bool from .
> >
> > Signed-off-by: Mark Menzynski 
> > ---
> >  src/gallium/auxiliary/tgsi/tgsi_util.c | 2 +-
> >  src/gallium/auxiliary/tgsi/tgsi_util.h | 5 +++--
> >  2 files changed, 4 insertions(+), 3 deletions(-)
> >
> > diff --git a/src/gallium/auxiliary/tgsi/tgsi_util.c 
> > b/src/gallium/auxiliary/tgsi/tgsi_util.c
> > index 1e5582ba273..e1b604cff0e 100644
> > --- a/src/gallium/auxiliary/tgsi/tgsi_util.c
> > +++ b/src/gallium/auxiliary/tgsi/tgsi_util.c
> > @@ -537,7 +537,7 @@ tgsi_util_get_shadow_ref_src_index(enum 
> > tgsi_texture_type tgsi_tex)
> >  }
> >
> >
> > -boolean
> > +bool
> >  tgsi_is_shadow_target(enum tgsi_texture_type target)
> >  {
> > switch (target) {
> > diff --git a/src/gallium/auxiliary/tgsi/tgsi_util.h 
> > b/src/gallium/auxiliary/tgsi/tgsi_util.h
> > index 686b90f467e..6dc576b1a00 100644
> > --- a/src/gallium/auxiliary/tgsi/tgsi_util.h
> > +++ b/src/gallium/auxiliary/tgsi/tgsi_util.h
> > @@ -28,6 +28,7 @@
> >  #ifndef TGSI_UTIL_H
> >  #define TGSI_UTIL_H
> >
> > +#include 
> >  #include "pipe/p_shader_tokens.h"
> >
> >  #if defined __cplusplus
> > @@ -84,11 +85,11 @@ tgsi_util_get_texture_coord_dim(enum tgsi_texture_type 
> > tgsi_tex);
> >  int
> >  tgsi_util_get_shadow_ref_src_index(enum tgsi_texture_type tgsi_tex);
> >
> > -boolean
> > +bool
> >  tgsi_is_shadow_target(enum tgsi_texture_type target);
> >
> >
> > -static inline boolean
> > +static inline bool
> >  tgsi_is_msaa_target(enum tgsi_texture_type target)
> >  {
> > return (target == TGSI_TEXTURE_2D_MSAA ||
> > --
> > 2.21.1
> >
> > ___
> > mesa-dev mailing list
> > mesa-dev@lists.freedesktop.org
> > https://lists.freedesktop.org/mailman/listinfo/mesa-dev
> >

___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev


Re: [Mesa-dev] [PATCH 7/8] nv50/ir: Move separateFragData

2020-02-17 Thread Karol Herbst
On Mon, Feb 17, 2020 at 6:41 PM Mark Menzynski  wrote:
>
> Nv50_ir_prog_info (input) was in the wrong place, moved it to
> nv50_ir_prog_info_out.
>
> Signed-off-by: Mark Menzynski 
> ---
>  src/gallium/drivers/nouveau/codegen/nv50_ir_driver.h  | 2 +-
>  src/gallium/drivers/nouveau/codegen/nv50_ir_from_nir.cpp  | 2 +-
>  src/gallium/drivers/nouveau/codegen/nv50_ir_from_tgsi.cpp | 2 +-
>  3 files changed, 3 insertions(+), 3 deletions(-)
>
> diff --git a/src/gallium/drivers/nouveau/codegen/nv50_ir_driver.h 
> b/src/gallium/drivers/nouveau/codegen/nv50_ir_driver.h
> index cdf19eeabcf..30498ceffaf 100644
> --- a/src/gallium/drivers/nouveau/codegen/nv50_ir_driver.h
> +++ b/src/gallium/drivers/nouveau/codegen/nv50_ir_driver.h
> @@ -112,7 +112,6 @@ struct nv50_ir_prog_info
>   uint8_t inputPrim;
>} gp;
>struct {
> - bool separateFragData;
>   bool persampleInvocation;
>} fp;
>struct {
> @@ -200,6 +199,7 @@ struct nv50_ir_prog_info_out
>   bool usesSampleMaskIn;
>   bool readsFramebuffer;
>   bool readsSampleLocations;
> + bool separateFragData;
>} fp;
> } prop;
>
> diff --git a/src/gallium/drivers/nouveau/codegen/nv50_ir_from_nir.cpp 
> b/src/gallium/drivers/nouveau/codegen/nv50_ir_from_nir.cpp
> index 3efeaab4569..cf5f3d6d7e7 100644
> --- a/src/gallium/drivers/nouveau/codegen/nv50_ir_from_nir.cpp
> +++ b/src/gallium/drivers/nouveau/codegen/nv50_ir_from_nir.cpp
> @@ -2100,7 +2100,7 @@ Converter::visit(nir_intrinsic_instr *insn)
>atom->setIndirect(0, 0, address);
>atom->subOp = getSubOp(op);
>
> -  info->io.globalAccess |= 0x2;
> +  info_out->io.globalAccess |= 0x2;
>break;
> }
> case nir_intrinsic_bindless_image_atomic_add:
> diff --git a/src/gallium/drivers/nouveau/codegen/nv50_ir_from_tgsi.cpp 
> b/src/gallium/drivers/nouveau/codegen/nv50_ir_from_tgsi.cpp
> index 5850dc18fec..c2322f3856a 100644
> --- a/src/gallium/drivers/nouveau/codegen/nv50_ir_from_tgsi.cpp
> +++ b/src/gallium/drivers/nouveau/codegen/nv50_ir_from_tgsi.cpp
> @@ -1176,7 +1176,7 @@ void Source::scanProperty(const struct 
> tgsi_full_property *prop)
>info_out->prop.gp.instanceCount = prop->u[0].Data;
>break;
> case TGSI_PROPERTY_FS_COLOR0_WRITES_ALL_CBUFS:
> -  info->prop.fp.separateFragData = true;
> +  info_out->prop.fp.separateFragData = true;
>break;
> case TGSI_PROPERTY_FS_COORD_ORIGIN:
> case TGSI_PROPERTY_FS_COORD_PIXEL_CENTER:
> --
> 2.21.1
>
> ___
> mesa-dev mailing list
> mesa-dev@lists.freedesktop.org
> https://lists.freedesktop.org/mailman/listinfo/mesa-dev
>

mind merging those changes into the 1st patch? Just add a "v2 (mark):
..." note or something.

___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev


Re: [Mesa-dev] [PATCH 8/8] nvc0: Add shader disk caching

2020-02-17 Thread Karol Herbst
On Mon, Feb 17, 2020 at 6:41 PM Mark Menzynski  wrote:
>
> Adds shader disk caching for nvc0 to reduce the need to every time compile
> shaders. Shaders are saved into disk_shader_cache from nvc0_screen structure.
>
> It serializes the input nv50_ir_prog_info to compute the hash key and
> also to do a byte compare between the original nv50_ir_prog_info and the one
> saved in the cache. If keys match and also the byte compare returns they
> are equal, shaders are same, and the compiled nv50_ir_prog_info_out from the
> cache can be used instead of compiling input info.
>
> Seems to be significantly improving loading times. Piglit tests seem
> to be OK.
>
> Signed-off-by: Mark Menzynski 
> ---
>  .../drivers/nouveau/nvc0/nvc0_context.h   |  1 +
>  .../drivers/nouveau/nvc0/nvc0_program.c   | 49 ---
>  .../drivers/nouveau/nvc0/nvc0_shader_state.c  |  3 +-
>  src/gallium/drivers/nouveau/nvc0/nvc0_state.c |  2 +
>  4 files changed, 46 insertions(+), 9 deletions(-)
>
> diff --git a/src/gallium/drivers/nouveau/nvc0/nvc0_context.h 
> b/src/gallium/drivers/nouveau/nvc0/nvc0_context.h
> index 8a2a8f2797e..4b83d1afeb4 100644
> --- a/src/gallium/drivers/nouveau/nvc0/nvc0_context.h
> +++ b/src/gallium/drivers/nouveau/nvc0/nvc0_context.h
> @@ -321,6 +321,7 @@ extern struct draw_stage *nvc0_draw_render_stage(struct 
> nvc0_context *);
>
>  /* nvc0_program.c */
>  bool nvc0_program_translate(struct nvc0_program *, uint16_t chipset,
> +struct disk_cache *,
>  struct pipe_debug_callback *);
>  bool nvc0_program_upload(struct nvc0_context *, struct nvc0_program *);
>  void nvc0_program_destroy(struct nvc0_context *, struct nvc0_program *);
> diff --git a/src/gallium/drivers/nouveau/nvc0/nvc0_program.c 
> b/src/gallium/drivers/nouveau/nvc0/nvc0_program.c
> index 1a5073292e8..06b6f7b4db5 100644
> --- a/src/gallium/drivers/nouveau/nvc0/nvc0_program.c
> +++ b/src/gallium/drivers/nouveau/nvc0/nvc0_program.c
> @@ -24,6 +24,7 @@
>
>  #include "compiler/nir/nir.h"
>  #include "tgsi/tgsi_ureg.h"
> +#include "util/blob.h"
>
>  #include "nvc0/nvc0_context.h"
>
> @@ -568,11 +569,19 @@ nvc0_program_dump(struct nvc0_program *prog)
>
>  bool
>  nvc0_program_translate(struct nvc0_program *prog, uint16_t chipset,
> +   struct disk_cache *disk_shader_cache,
> struct pipe_debug_callback *debug)
>  {
> +   struct blob blob;
> struct nv50_ir_prog_info *info;
> struct nv50_ir_prog_info_out info_out = {};
> -   int ret;
> +
> +   void *cached_data = NULL;
> +   size_t cached_size;
> +   bool shader_found = false;
> +
> +   int ret = 0;
> +   cache_key key;
>
> info = CALLOC_STRUCT(nv50_ir_prog_info);
> if (!info)
> @@ -631,14 +640,38 @@ nvc0_program_translate(struct nvc0_program *prog, 
> uint16_t chipset,
> info->assignSlots = nvc0_program_assign_varying_slots;
>
> /* these fields might be overwritten by the compiler */
> -   info_out.bin.smemSize = prog->cp.smem_size;
> -   info_out.io.genUserClip = prog->vp.num_ucps;
> -
> -   ret = nv50_ir_generate_code(info, &info_out);
> -   if (ret) {
> -  NOUVEAU_ERR("shader translation failed: %i\n", ret);
> -  goto out;
> +   info->bin.smemSize = prog->cp.smem_size;
> +   info->io.genUserClip = prog->vp.num_ucps;
> +
> +   blob_init(&blob);
> +   nv50_ir_prog_info_serialize(&blob, info);
> +
> +   if (disk_shader_cache) {
> +  disk_cache_compute_key(disk_shader_cache, blob.data, blob.size, key);
> +  cached_data = disk_cache_get(disk_shader_cache, key, &cached_size);
> +
> +  if (cached_data && cached_size >= blob.size) { // blob.size is the 
> size of serialized "info"
> + if (memcmp(cached_data, blob.data, blob.size) == 0) {
> +shader_found = true;
> +/* Blob contains only "info". In disk cache, "info_out" comes 
> right after it */
> +size_t offset = blob.size;
> +nv50_ir_prog_info_out_deserialize(cached_data, cached_size, 
> offset, &info_out);
> + }

I am still a bit unsure if we really really need this check... other
drivers don't seem to do it either, but it's definitely safer to keep
it... let's see what others think about it.

> +  }
> +  free(cached_data);
> +   }
> +   if (!shader_found) {
> +  ret = nv50_ir_generate_code(info, &info_out);
> +  if (ret) {
> + NOUVEAU_ERR("shader translation failed: %i\n", ret);
> + goto out;
> +  }
> +  if (disk_shader_cache) {
> + nv50_ir_prog_info_out_serialize(&blob, &info_out);
> + disk_cache_put(disk_shader_cache, key, blob.data, blob.size, NULL);
> +  }
> }
> +   blob_finish(&blob);
>
> prog->code = info_out.bin.code;
> prog->code_size = info_out.bin.codeSize;
> diff --git a/src/gallium/drivers/nouveau/nvc0/nvc0_shader_state.c 
> b/src/gallium/drivers/nouveau/nvc0/nvc0_shader_state.c
> index 774c5648113..4327a89454b 100644
> --- a/src/gallium/drivers/nouveau

<    4   5   6   7   8   9   10   >