Re: [Mesa-dev] [PATCH 12/13] i965: use nir loop unrolling pass

2016-12-22 Thread Jason Ekstrand
On Wed, Dec 21, 2016 at 6:26 PM, Timothy Arceri <
timothy.arc...@collabora.com> wrote:

> shader-db results for BDW:
>
> total instructions in shared programs: 12589614 -> 12590119 (0.00%)
> instructions in affected programs: 50525 -> 51030 (1.00%)
> helped: 7
> HURT: 145
>
> total cycles in shared programs: 241524604 -> 241490502 (-0.01%)
> cycles in affected programs: 1941404 -> 1907302 (-1.76%)
> helped: 302
> HURT: 449
>
> total loops in shared programs: 4245 -> 2947 (-30.58%)
> loops in affected programs: 1535 -> 237 (-84.56%)
> helped: 1142
> HURT: 0
>
> total spills in shared programs: 14453 -> 14453 (0.00%)
> spills in affected programs: 0 -> 0
> helped: 0
> HURT: 0
>
> total fills in shared programs: 18984 -> 18984 (0.00%)
> fills in affected programs: 0 -> 0
> helped: 0
> HURT: 0
>
> LOST:   26
> GAINED: 15
> ---
>  src/mesa/drivers/dri/i965/brw_compiler.c |  3 +++
>  src/mesa/drivers/dri/i965/brw_nir.c  | 22 +-
>  2 files changed, 20 insertions(+), 5 deletions(-)
>
> diff --git a/src/mesa/drivers/dri/i965/brw_compiler.c
> b/src/mesa/drivers/dri/i965/brw_compiler.c
> index 6a73719..d7900a7 100644
> --- a/src/mesa/drivers/dri/i965/brw_compiler.c
> +++ b/src/mesa/drivers/dri/i965/brw_compiler.c
> @@ -55,6 +55,7 @@ static const struct nir_shader_compiler_options
> scalar_nir_options = {
> .lower_unpack_snorm_4x8 = true,
> .lower_unpack_unorm_2x16 = true,
> .lower_unpack_unorm_4x8 = true,
> +   .max_unroll_iterations = 32,
>  };
>
>  static const struct nir_shader_compiler_options vector_nir_options = {
> @@ -75,6 +76,7 @@ static const struct nir_shader_compiler_options
> vector_nir_options = {
> .lower_unpack_unorm_2x16 = true,
> .lower_extract_byte = true,
> .lower_extract_word = true,
> +   .max_unroll_iterations = 32,
>  };
>
>  static const struct nir_shader_compiler_options vector_nir_options_gen6 =
> {
> @@ -92,6 +94,7 @@ static const struct nir_shader_compiler_options
> vector_nir_options_gen6 = {
> .lower_unpack_unorm_2x16 = true,
> .lower_extract_byte = true,
> .lower_extract_word = true,
> +   .max_unroll_iterations = 32,
>  };
>
>  struct brw_compiler *
> diff --git a/src/mesa/drivers/dri/i965/brw_nir.c
> b/src/mesa/drivers/dri/i965/brw_nir.c
> index b44cbe8..0c1fb44 100644
> --- a/src/mesa/drivers/dri/i965/brw_nir.c
> +++ b/src/mesa/drivers/dri/i965/brw_nir.c
> @@ -396,8 +396,17 @@ brw_nir_lower_cs_shared(nir_shader *nir)
>  #define OPT_V(pass, ...) NIR_PASS_V(nir, pass, ##__VA_ARGS__)
>
>  static nir_shader *
> -nir_optimize(nir_shader *nir, bool is_scalar)
> +nir_optimize(nir_shader *nir, const struct brw_compiler *compiler,
> + bool is_scalar)
>  {
> +   nir_variable_mode indirect_mask = 0;
> +   if (compiler->glsl_compiler_options[nir->stage].EmitNoIndirectInput)
> +  indirect_mask |= nir_var_shader_in;
> +   if (compiler->glsl_compiler_options[nir->stage].EmitNoIndirectOutput)
> +  indirect_mask |= nir_var_shader_out;
> +   if (compiler->glsl_compiler_options[nir->stage].EmitNoIndirectTemp)
> +  indirect_mask |= nir_var_local;
>

At some point we should stop setting glsl_compilerOptions[].EmitNo to
anything other than false and just put your indirect mask in brw_compiler
directly.


> +
> bool progress;
> do {
>progress = false;
> @@ -420,6 +429,9 @@ nir_optimize(nir_shader *nir, bool is_scalar)
>OPT(nir_opt_algebraic);
>OPT(nir_opt_constant_folding);
>OPT(nir_opt_dead_cf);
> +  if (nir->options->max_unroll_iterations != 0) {
> + OPT(nir_opt_loop_unroll, indirect_mask);
> +  }
>OPT(nir_opt_remove_phis);
>OPT(nir_opt_undef);
>OPT_V(nir_lower_doubles, nir_lower_drcp |
> @@ -477,7 +489,7 @@ brw_preprocess_nir(const struct brw_compiler
> *compiler, nir_shader *nir)
>
> OPT(nir_split_var_copies);
>
> -   nir = nir_optimize(nir, is_scalar);
> +   nir = nir_optimize(nir, compiler, is_scalar);
>
> if (is_scalar) {
>OPT_V(nir_lower_load_const_to_scalar);
> @@ -497,7 +509,7 @@ brw_preprocess_nir(const struct brw_compiler
> *compiler, nir_shader *nir)
> nir_lower_indirect_derefs(nir, indirect_mask);
>
> /* Get rid of split copies */
> -   nir = nir_optimize(nir, is_scalar);
> +   nir = nir_optimize(nir, compiler, is_scalar);
>
> OPT_V(nir_lower_clip_cull_distance_arrays);
>
> @@ -524,7 +536,7 @@ brw_postprocess_nir(nir_shader *nir, const struct
> brw_compiler *compiler,
> bool progress; /* Written by OPT and OPT_V */
> (void)progress;
>
> -   nir = nir_optimize(nir, is_scalar);
> +   nir = nir_optimize(nir, compiler, is_scalar);
>
> if (devinfo->gen >= 6) {
>/* Try and fuse multiply-adds */
> @@ -616,7 +628,7 @@ brw_nir_apply_sampler_key(nir_shader *nir,
>
> if (nir_lower_tex(nir, _options)) {
>nir_validate_shader(nir);
> -  nir = nir_optimize(nir, is_scalar);
> +  nir = nir_optimize(nir, compiler, is_scalar);
> }
>
> return nir;
> --
> 2.9.3
>
> 

[Mesa-dev] [PATCH 12/13] i965: use nir loop unrolling pass

2016-12-21 Thread Timothy Arceri
shader-db results for BDW:

total instructions in shared programs: 12589614 -> 12590119 (0.00%)
instructions in affected programs: 50525 -> 51030 (1.00%)
helped: 7
HURT: 145

total cycles in shared programs: 241524604 -> 241490502 (-0.01%)
cycles in affected programs: 1941404 -> 1907302 (-1.76%)
helped: 302
HURT: 449

total loops in shared programs: 4245 -> 2947 (-30.58%)
loops in affected programs: 1535 -> 237 (-84.56%)
helped: 1142
HURT: 0

total spills in shared programs: 14453 -> 14453 (0.00%)
spills in affected programs: 0 -> 0
helped: 0
HURT: 0

total fills in shared programs: 18984 -> 18984 (0.00%)
fills in affected programs: 0 -> 0
helped: 0
HURT: 0

LOST:   26
GAINED: 15
---
 src/mesa/drivers/dri/i965/brw_compiler.c |  3 +++
 src/mesa/drivers/dri/i965/brw_nir.c  | 22 +-
 2 files changed, 20 insertions(+), 5 deletions(-)

diff --git a/src/mesa/drivers/dri/i965/brw_compiler.c 
b/src/mesa/drivers/dri/i965/brw_compiler.c
index 6a73719..d7900a7 100644
--- a/src/mesa/drivers/dri/i965/brw_compiler.c
+++ b/src/mesa/drivers/dri/i965/brw_compiler.c
@@ -55,6 +55,7 @@ static const struct nir_shader_compiler_options 
scalar_nir_options = {
.lower_unpack_snorm_4x8 = true,
.lower_unpack_unorm_2x16 = true,
.lower_unpack_unorm_4x8 = true,
+   .max_unroll_iterations = 32,
 };
 
 static const struct nir_shader_compiler_options vector_nir_options = {
@@ -75,6 +76,7 @@ static const struct nir_shader_compiler_options 
vector_nir_options = {
.lower_unpack_unorm_2x16 = true,
.lower_extract_byte = true,
.lower_extract_word = true,
+   .max_unroll_iterations = 32,
 };
 
 static const struct nir_shader_compiler_options vector_nir_options_gen6 = {
@@ -92,6 +94,7 @@ static const struct nir_shader_compiler_options 
vector_nir_options_gen6 = {
.lower_unpack_unorm_2x16 = true,
.lower_extract_byte = true,
.lower_extract_word = true,
+   .max_unroll_iterations = 32,
 };
 
 struct brw_compiler *
diff --git a/src/mesa/drivers/dri/i965/brw_nir.c 
b/src/mesa/drivers/dri/i965/brw_nir.c
index b44cbe8..0c1fb44 100644
--- a/src/mesa/drivers/dri/i965/brw_nir.c
+++ b/src/mesa/drivers/dri/i965/brw_nir.c
@@ -396,8 +396,17 @@ brw_nir_lower_cs_shared(nir_shader *nir)
 #define OPT_V(pass, ...) NIR_PASS_V(nir, pass, ##__VA_ARGS__)
 
 static nir_shader *
-nir_optimize(nir_shader *nir, bool is_scalar)
+nir_optimize(nir_shader *nir, const struct brw_compiler *compiler,
+ bool is_scalar)
 {
+   nir_variable_mode indirect_mask = 0;
+   if (compiler->glsl_compiler_options[nir->stage].EmitNoIndirectInput)
+  indirect_mask |= nir_var_shader_in;
+   if (compiler->glsl_compiler_options[nir->stage].EmitNoIndirectOutput)
+  indirect_mask |= nir_var_shader_out;
+   if (compiler->glsl_compiler_options[nir->stage].EmitNoIndirectTemp)
+  indirect_mask |= nir_var_local;
+
bool progress;
do {
   progress = false;
@@ -420,6 +429,9 @@ nir_optimize(nir_shader *nir, bool is_scalar)
   OPT(nir_opt_algebraic);
   OPT(nir_opt_constant_folding);
   OPT(nir_opt_dead_cf);
+  if (nir->options->max_unroll_iterations != 0) {
+ OPT(nir_opt_loop_unroll, indirect_mask);
+  }
   OPT(nir_opt_remove_phis);
   OPT(nir_opt_undef);
   OPT_V(nir_lower_doubles, nir_lower_drcp |
@@ -477,7 +489,7 @@ brw_preprocess_nir(const struct brw_compiler *compiler, 
nir_shader *nir)
 
OPT(nir_split_var_copies);
 
-   nir = nir_optimize(nir, is_scalar);
+   nir = nir_optimize(nir, compiler, is_scalar);
 
if (is_scalar) {
   OPT_V(nir_lower_load_const_to_scalar);
@@ -497,7 +509,7 @@ brw_preprocess_nir(const struct brw_compiler *compiler, 
nir_shader *nir)
nir_lower_indirect_derefs(nir, indirect_mask);
 
/* Get rid of split copies */
-   nir = nir_optimize(nir, is_scalar);
+   nir = nir_optimize(nir, compiler, is_scalar);
 
OPT_V(nir_lower_clip_cull_distance_arrays);
 
@@ -524,7 +536,7 @@ brw_postprocess_nir(nir_shader *nir, const struct 
brw_compiler *compiler,
bool progress; /* Written by OPT and OPT_V */
(void)progress;
 
-   nir = nir_optimize(nir, is_scalar);
+   nir = nir_optimize(nir, compiler, is_scalar);
 
if (devinfo->gen >= 6) {
   /* Try and fuse multiply-adds */
@@ -616,7 +628,7 @@ brw_nir_apply_sampler_key(nir_shader *nir,
 
if (nir_lower_tex(nir, _options)) {
   nir_validate_shader(nir);
-  nir = nir_optimize(nir, is_scalar);
+  nir = nir_optimize(nir, compiler, is_scalar);
}
 
return nir;
-- 
2.9.3

___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev