Re: [Mesa-dev] [PATCH v2] i965/fs: Define new shader opcode to set rounding modes

2017-09-06 Thread Chema Casanova

On 05/09/17 23:41, Francisco Jerez wrote:
> Alejandro Piñeiro  writes:
> 
>> Although it is possible to emit them directly as AND/OR on brw_fs_nir,
>> having a specific opcode makes it easier to remove duplicate settings
>> later.
>>
>> v2: (Curro)
>>   - Set thread control to 'switch' when using the control register
>>   - Use a single SHADER_OPCODE_RND_MODE opcode taking an immediate
>> with the rounding mode.
>>   - Avoid magic numbers setting rounding mode field at control register.
>>
>> Signed-off-by:  Alejandro Piñeiro 
>> Signed-off-by:  Jose Maria Casanova Crespo 
>> ---
>>  src/intel/compiler/brw_eu.h |  3 +++
>>  src/intel/compiler/brw_eu_defines.h | 17 +
>>  src/intel/compiler/brw_eu_emit.c| 34 
>> +
>>  src/intel/compiler/brw_fs_generator.cpp |  5 +
>>  src/intel/compiler/brw_shader.cpp   |  4 
>>  5 files changed, 63 insertions(+)
>>
>> diff --git a/src/intel/compiler/brw_eu.h b/src/intel/compiler/brw_eu.h
>> index 8e597b212a6..106bf03530d 100644
>> --- a/src/intel/compiler/brw_eu.h
>> +++ b/src/intel/compiler/brw_eu.h
>> @@ -500,6 +500,9 @@ brw_broadcast(struct brw_codegen *p,
>>struct brw_reg src,
>>struct brw_reg idx);
>>  
>> +void
>> +brw_rounding_mode(struct brw_codegen *p,
>> +  enum brw_rnd_mode mode);
> 
> Missing whitespace line.

Ok

> 
>>  /***
>>   * brw_eu_util.c:
>>   */
>> diff --git a/src/intel/compiler/brw_eu_defines.h 
>> b/src/intel/compiler/brw_eu_defines.h
>> index da482b73c58..91d88fe8952 100644
>> --- a/src/intel/compiler/brw_eu_defines.h
>> +++ b/src/intel/compiler/brw_eu_defines.h
>> @@ -388,6 +388,9 @@ enum opcode {
>> SHADER_OPCODE_TYPED_SURFACE_WRITE,
>> SHADER_OPCODE_TYPED_SURFACE_WRITE_LOGICAL,
>>  
>> +
> 
> Redundant whitespace.

OK.

> 
>> +   SHADER_OPCODE_RND_MODE,
>> +
>> SHADER_OPCODE_MEMORY_FENCE,
>>  
>> SHADER_OPCODE_GEN4_SCRATCH_READ,
>> @@ -1214,4 +1217,18 @@ enum brw_message_target {
>>  /* R0 */
>>  # define GEN7_GS_PAYLOAD_INSTANCE_ID_SHIFT  27
>>  
>> +/* CR0.0[5:4] Floating-Point Rounding Modes
>> + *  Skylake PRM, Volume 7 Part 1, "Control Register", page 756
>> + */
>> +
>> +#define BRW_CR0_RND_MODE_MASK 0x30
>> +#define BRW_CR0_RND_MODE_SHIFT4
>> +
>> +enum PACKED brw_rnd_mode {
>> +   BRW_RND_MODE_RTNE = 0,  /* Round to Nearest or Even */
>> +   BRW_RND_MODE_RU = 1,/* Round Up, toward +inf */
>> +   BRW_RND_MODE_RD = 2,/* Round Down, toward -inf */
>> +   BRW_RND_MODE_RTZ = 3/* Round Toward Zero */
>> +};
>> +
>>  #endif /* BRW_EU_DEFINES_H */
>> diff --git a/src/intel/compiler/brw_eu_emit.c 
>> b/src/intel/compiler/brw_eu_emit.c
>> index 8c952e7da26..12164653e47 100644
>> --- a/src/intel/compiler/brw_eu_emit.c
>> +++ b/src/intel/compiler/brw_eu_emit.c
>> @@ -3530,3 +3530,37 @@ brw_WAIT(struct brw_codegen *p)
>> brw_inst_set_exec_size(devinfo, insn, BRW_EXECUTE_1);
>> brw_inst_set_mask_control(devinfo, insn, BRW_MASK_DISABLE);
>>  }
>> +
>> +/**
>> + * Changes the floating point rounding mode updating the control register
>> + * field defined at cr0.0[5-6] bits. This function supports the changes to
>> + * RTNE (00), RU (01), RD (10) and RTZ (11) rounding using bitwise 
>> operations.
>> + * Only RTNE and RTZ rounding are enabled at nir.
>> + */
>> +
> 
> Redundant whitespace.

OK.

> 
>> +void
>> +brw_rounding_mode(struct brw_codegen *p,
>> +  enum brw_rnd_mode mode)
>> +{
>> +   const unsigned bits  = mode << BRW_CR0_RND_MODE_SHIFT;
>> +
>> +   if (bits != BRW_CR0_RND_MODE_MASK) {
>> +  brw_inst *inst = brw_AND(p, brw_cr0_reg(0), brw_cr0_reg(0),
>> +   brw_imm_ud(~BRW_CR0_RND_MODE_MASK));
>> +
>> +  /* From the Skylake PRM, Volume 7, page 760:
>> +   *  "Implementation Restriction on Register Access: When the control
>> +   *   register is used as an explicit source and/or destination, 
>> hardware
>> +   *   does not ensure execution pipeline coherency. Software must set 
>> the
>> +   *   thread control field to ‘switch’ for an instruction that uses
>> +   *   control register as an explicit operand."
>> +   */
>> +  brw_inst_set_thread_control(p->devinfo, inst, BRW_THREAD_SWITCH);
>> +}
>> +
>> +   if (bits) {
>> +  brw_inst *inst = brw_OR(p, brw_cr0_reg(0), brw_cr0_reg(0),
>> +  brw_imm_ud(bits));
>> +  brw_inst_set_thread_control(p->devinfo, inst, BRW_THREAD_SWITCH);
>> +   }
>> +}
>> diff --git a/src/intel/compiler/brw_fs_generator.cpp 
>> b/src/intel/compiler/brw_fs_generator.cpp
>> index afaec5c9497..ff9880ebfe8 100644
>> --- a/src/intel/compiler/brw_fs_generator.cpp
>> +++ b/src/intel/compiler/brw_fs_generator.cpp
>> @@ -2144,6 +2144,11 @@ fs_generator::generate_code(const cfg_t *cfg, int 
>> 

Re: [Mesa-dev] [PATCH v2] i965/fs: Define new shader opcode to set rounding modes

2017-09-05 Thread Francisco Jerez
Alejandro Piñeiro  writes:

> Although it is possible to emit them directly as AND/OR on brw_fs_nir,
> having a specific opcode makes it easier to remove duplicate settings
> later.
>
> v2: (Curro)
>   - Set thread control to 'switch' when using the control register
>   - Use a single SHADER_OPCODE_RND_MODE opcode taking an immediate
> with the rounding mode.
>   - Avoid magic numbers setting rounding mode field at control register.
>
> Signed-off-by:  Alejandro Piñeiro 
> Signed-off-by:  Jose Maria Casanova Crespo 
> ---
>  src/intel/compiler/brw_eu.h |  3 +++
>  src/intel/compiler/brw_eu_defines.h | 17 +
>  src/intel/compiler/brw_eu_emit.c| 34 
> +
>  src/intel/compiler/brw_fs_generator.cpp |  5 +
>  src/intel/compiler/brw_shader.cpp   |  4 
>  5 files changed, 63 insertions(+)
>
> diff --git a/src/intel/compiler/brw_eu.h b/src/intel/compiler/brw_eu.h
> index 8e597b212a6..106bf03530d 100644
> --- a/src/intel/compiler/brw_eu.h
> +++ b/src/intel/compiler/brw_eu.h
> @@ -500,6 +500,9 @@ brw_broadcast(struct brw_codegen *p,
>struct brw_reg src,
>struct brw_reg idx);
>  
> +void
> +brw_rounding_mode(struct brw_codegen *p,
> +  enum brw_rnd_mode mode);

Missing whitespace line.

>  /***
>   * brw_eu_util.c:
>   */
> diff --git a/src/intel/compiler/brw_eu_defines.h 
> b/src/intel/compiler/brw_eu_defines.h
> index da482b73c58..91d88fe8952 100644
> --- a/src/intel/compiler/brw_eu_defines.h
> +++ b/src/intel/compiler/brw_eu_defines.h
> @@ -388,6 +388,9 @@ enum opcode {
> SHADER_OPCODE_TYPED_SURFACE_WRITE,
> SHADER_OPCODE_TYPED_SURFACE_WRITE_LOGICAL,
>  
> +

Redundant whitespace.

> +   SHADER_OPCODE_RND_MODE,
> +
> SHADER_OPCODE_MEMORY_FENCE,
>  
> SHADER_OPCODE_GEN4_SCRATCH_READ,
> @@ -1214,4 +1217,18 @@ enum brw_message_target {
>  /* R0 */
>  # define GEN7_GS_PAYLOAD_INSTANCE_ID_SHIFT   27
>  
> +/* CR0.0[5:4] Floating-Point Rounding Modes
> + *  Skylake PRM, Volume 7 Part 1, "Control Register", page 756
> + */
> +
> +#define BRW_CR0_RND_MODE_MASK 0x30
> +#define BRW_CR0_RND_MODE_SHIFT4
> +
> +enum PACKED brw_rnd_mode {
> +   BRW_RND_MODE_RTNE = 0,  /* Round to Nearest or Even */
> +   BRW_RND_MODE_RU = 1,/* Round Up, toward +inf */
> +   BRW_RND_MODE_RD = 2,/* Round Down, toward -inf */
> +   BRW_RND_MODE_RTZ = 3/* Round Toward Zero */
> +};
> +
>  #endif /* BRW_EU_DEFINES_H */
> diff --git a/src/intel/compiler/brw_eu_emit.c 
> b/src/intel/compiler/brw_eu_emit.c
> index 8c952e7da26..12164653e47 100644
> --- a/src/intel/compiler/brw_eu_emit.c
> +++ b/src/intel/compiler/brw_eu_emit.c
> @@ -3530,3 +3530,37 @@ brw_WAIT(struct brw_codegen *p)
> brw_inst_set_exec_size(devinfo, insn, BRW_EXECUTE_1);
> brw_inst_set_mask_control(devinfo, insn, BRW_MASK_DISABLE);
>  }
> +
> +/**
> + * Changes the floating point rounding mode updating the control register
> + * field defined at cr0.0[5-6] bits. This function supports the changes to
> + * RTNE (00), RU (01), RD (10) and RTZ (11) rounding using bitwise 
> operations.
> + * Only RTNE and RTZ rounding are enabled at nir.
> + */
> +

Redundant whitespace.

> +void
> +brw_rounding_mode(struct brw_codegen *p,
> +  enum brw_rnd_mode mode)
> +{
> +   const unsigned bits  = mode << BRW_CR0_RND_MODE_SHIFT;
> +
> +   if (bits != BRW_CR0_RND_MODE_MASK) {
> +  brw_inst *inst = brw_AND(p, brw_cr0_reg(0), brw_cr0_reg(0),
> +   brw_imm_ud(~BRW_CR0_RND_MODE_MASK));
> +
> +  /* From the Skylake PRM, Volume 7, page 760:
> +   *  "Implementation Restriction on Register Access: When the control
> +   *   register is used as an explicit source and/or destination, 
> hardware
> +   *   does not ensure execution pipeline coherency. Software must set 
> the
> +   *   thread control field to ‘switch’ for an instruction that uses
> +   *   control register as an explicit operand."
> +   */
> +  brw_inst_set_thread_control(p->devinfo, inst, BRW_THREAD_SWITCH);
> +}
> +
> +   if (bits) {
> +  brw_inst *inst = brw_OR(p, brw_cr0_reg(0), brw_cr0_reg(0),
> +  brw_imm_ud(bits));
> +  brw_inst_set_thread_control(p->devinfo, inst, BRW_THREAD_SWITCH);
> +   }
> +}
> diff --git a/src/intel/compiler/brw_fs_generator.cpp 
> b/src/intel/compiler/brw_fs_generator.cpp
> index afaec5c9497..ff9880ebfe8 100644
> --- a/src/intel/compiler/brw_fs_generator.cpp
> +++ b/src/intel/compiler/brw_fs_generator.cpp
> @@ -2144,6 +2144,11 @@ fs_generator::generate_code(const cfg_t *cfg, int 
> dispatch_width)
>   brw_DIM(p, dst, retype(src[0], BRW_REGISTER_TYPE_F));
>   break;
>  
> +  case SHADER_OPCODE_RND_MODE:
> + assert(src[0].file == BRW_IMMEDIATE_VALUE);
> +   

[Mesa-dev] [PATCH v2] i965/fs: Define new shader opcode to set rounding modes

2017-08-30 Thread Alejandro Piñeiro
Although it is possible to emit them directly as AND/OR on brw_fs_nir,
having a specific opcode makes it easier to remove duplicate settings
later.

v2: (Curro)
  - Set thread control to 'switch' when using the control register
  - Use a single SHADER_OPCODE_RND_MODE opcode taking an immediate
with the rounding mode.
  - Avoid magic numbers setting rounding mode field at control register.

Signed-off-by:  Alejandro Piñeiro 
Signed-off-by:  Jose Maria Casanova Crespo 
---
 src/intel/compiler/brw_eu.h |  3 +++
 src/intel/compiler/brw_eu_defines.h | 17 +
 src/intel/compiler/brw_eu_emit.c| 34 +
 src/intel/compiler/brw_fs_generator.cpp |  5 +
 src/intel/compiler/brw_shader.cpp   |  4 
 5 files changed, 63 insertions(+)

diff --git a/src/intel/compiler/brw_eu.h b/src/intel/compiler/brw_eu.h
index 8e597b212a6..106bf03530d 100644
--- a/src/intel/compiler/brw_eu.h
+++ b/src/intel/compiler/brw_eu.h
@@ -500,6 +500,9 @@ brw_broadcast(struct brw_codegen *p,
   struct brw_reg src,
   struct brw_reg idx);
 
+void
+brw_rounding_mode(struct brw_codegen *p,
+  enum brw_rnd_mode mode);
 /***
  * brw_eu_util.c:
  */
diff --git a/src/intel/compiler/brw_eu_defines.h 
b/src/intel/compiler/brw_eu_defines.h
index da482b73c58..91d88fe8952 100644
--- a/src/intel/compiler/brw_eu_defines.h
+++ b/src/intel/compiler/brw_eu_defines.h
@@ -388,6 +388,9 @@ enum opcode {
SHADER_OPCODE_TYPED_SURFACE_WRITE,
SHADER_OPCODE_TYPED_SURFACE_WRITE_LOGICAL,
 
+
+   SHADER_OPCODE_RND_MODE,
+
SHADER_OPCODE_MEMORY_FENCE,
 
SHADER_OPCODE_GEN4_SCRATCH_READ,
@@ -1214,4 +1217,18 @@ enum brw_message_target {
 /* R0 */
 # define GEN7_GS_PAYLOAD_INSTANCE_ID_SHIFT 27
 
+/* CR0.0[5:4] Floating-Point Rounding Modes
+ *  Skylake PRM, Volume 7 Part 1, "Control Register", page 756
+ */
+
+#define BRW_CR0_RND_MODE_MASK 0x30
+#define BRW_CR0_RND_MODE_SHIFT4
+
+enum PACKED brw_rnd_mode {
+   BRW_RND_MODE_RTNE = 0,  /* Round to Nearest or Even */
+   BRW_RND_MODE_RU = 1,/* Round Up, toward +inf */
+   BRW_RND_MODE_RD = 2,/* Round Down, toward -inf */
+   BRW_RND_MODE_RTZ = 3/* Round Toward Zero */
+};
+
 #endif /* BRW_EU_DEFINES_H */
diff --git a/src/intel/compiler/brw_eu_emit.c b/src/intel/compiler/brw_eu_emit.c
index 8c952e7da26..12164653e47 100644
--- a/src/intel/compiler/brw_eu_emit.c
+++ b/src/intel/compiler/brw_eu_emit.c
@@ -3530,3 +3530,37 @@ brw_WAIT(struct brw_codegen *p)
brw_inst_set_exec_size(devinfo, insn, BRW_EXECUTE_1);
brw_inst_set_mask_control(devinfo, insn, BRW_MASK_DISABLE);
 }
+
+/**
+ * Changes the floating point rounding mode updating the control register
+ * field defined at cr0.0[5-6] bits. This function supports the changes to
+ * RTNE (00), RU (01), RD (10) and RTZ (11) rounding using bitwise operations.
+ * Only RTNE and RTZ rounding are enabled at nir.
+ */
+
+void
+brw_rounding_mode(struct brw_codegen *p,
+  enum brw_rnd_mode mode)
+{
+   const unsigned bits  = mode << BRW_CR0_RND_MODE_SHIFT;
+
+   if (bits != BRW_CR0_RND_MODE_MASK) {
+  brw_inst *inst = brw_AND(p, brw_cr0_reg(0), brw_cr0_reg(0),
+   brw_imm_ud(~BRW_CR0_RND_MODE_MASK));
+
+  /* From the Skylake PRM, Volume 7, page 760:
+   *  "Implementation Restriction on Register Access: When the control
+   *   register is used as an explicit source and/or destination, hardware
+   *   does not ensure execution pipeline coherency. Software must set the
+   *   thread control field to ‘switch’ for an instruction that uses
+   *   control register as an explicit operand."
+   */
+  brw_inst_set_thread_control(p->devinfo, inst, BRW_THREAD_SWITCH);
+}
+
+   if (bits) {
+  brw_inst *inst = brw_OR(p, brw_cr0_reg(0), brw_cr0_reg(0),
+  brw_imm_ud(bits));
+  brw_inst_set_thread_control(p->devinfo, inst, BRW_THREAD_SWITCH);
+   }
+}
diff --git a/src/intel/compiler/brw_fs_generator.cpp 
b/src/intel/compiler/brw_fs_generator.cpp
index afaec5c9497..ff9880ebfe8 100644
--- a/src/intel/compiler/brw_fs_generator.cpp
+++ b/src/intel/compiler/brw_fs_generator.cpp
@@ -2144,6 +2144,11 @@ fs_generator::generate_code(const cfg_t *cfg, int 
dispatch_width)
  brw_DIM(p, dst, retype(src[0], BRW_REGISTER_TYPE_F));
  break;
 
+  case SHADER_OPCODE_RND_MODE:
+ assert(src[0].file == BRW_IMMEDIATE_VALUE);
+ brw_rounding_mode(p, (brw_rnd_mode) src[0].d);
+ break;
+
   default:
  unreachable("Unsupported opcode");
 
diff --git a/src/intel/compiler/brw_shader.cpp 
b/src/intel/compiler/brw_shader.cpp
index c62b8ba6140..19dd960be3a 100644
--- a/src/intel/compiler/brw_shader.cpp
+++ b/src/intel/compiler/brw_shader.cpp
@@ -486,6 +486,9 @@