Re: [Mesa-dev] [PATCH 20/47] i965/fs: Define new shader opcodes to set rounding modes

2017-08-29 Thread Chema Casanova


On 29/08/17 21:18, Francisco Jerez wrote:
> Chema Casanova  writes:
> 
>> El 25/08/17 a las 20:09, Francisco Jerez escribió:
>>> Alejandro Piñeiro  writes:
>>>
 Although it is possible to emit them directly as AND/OR on brw_fs_nir,
 having specific opcodes makes it easier to remove duplicate settings
 later.

 Signed-off-by:  Alejandro Piñeiro 
 Signed-off-by:  Jose Maria Casanova Crespo 
 ---
  src/intel/compiler/brw_eu.h |  3 +++
  src/intel/compiler/brw_eu_defines.h |  9 +
  src/intel/compiler/brw_eu_emit.c| 19 +++
  src/intel/compiler/brw_fs_generator.cpp |  8 
  src/intel/compiler/brw_shader.cpp   |  5 +
  5 files changed, 44 insertions(+)

 diff --git a/src/intel/compiler/brw_eu.h b/src/intel/compiler/brw_eu.h
 index a3a9c63239d..0a7f8020398 100644
 --- a/src/intel/compiler/brw_eu.h
 +++ b/src/intel/compiler/brw_eu.h
 @@ -500,6 +500,9 @@ brw_broadcast(struct brw_codegen *p,
struct brw_reg src,
struct brw_reg idx);
  
 +void
 +brw_rounding_mode(struct brw_codegen *p,
 +  enum brw_rnd_mode mode);
  /***
   * brw_eu_util.c:
   */
 diff --git a/src/intel/compiler/brw_eu_defines.h 
 b/src/intel/compiler/brw_eu_defines.h
 index 1af835d47ed..50435df2fcf 100644
 --- a/src/intel/compiler/brw_eu_defines.h
 +++ b/src/intel/compiler/brw_eu_defines.h
 @@ -388,6 +388,9 @@ enum opcode {
 SHADER_OPCODE_TYPED_SURFACE_WRITE,
 SHADER_OPCODE_TYPED_SURFACE_WRITE_LOGICAL,
  
 +   SHADER_OPCODE_RND_MODE_RTE,
 +   SHADER_OPCODE_RND_MODE_RTZ,
 +
>>> We don't need an opcode for each possible rounding mode (there's also RU
>>> and RD).  How about you add a single SHADER_OPCODE_RND_MODE opcode
>>> taking an immediate with the right rounding mode?
>> I like the proposal. It is better having a unique opcode for setting the
>> rounding mode. Changed for v2 of this patch.
>>> Also, you should be marking the rounding mode opcodes as
>>> has_side_effects(), because otherwise you're giving the scheduler the
>>> freedom of moving your rounding mode update instruction past the
>>> instruction you wanted it to have an effect on...
>> Well pointed, we already realized that it was missing while debugging
>> the latency problem of the control register. It was hiding the problem
>> re-scheduling the cr0 modification to the beginning of the shader.
 SHADER_OPCODE_MEMORY_FENCE,
  
 SHADER_OPCODE_GEN4_SCRATCH_READ,
 @@ -1233,4 +1236,10 @@ enum brw_message_target {
  /* R0 */
  # define GEN7_GS_PAYLOAD_INSTANCE_ID_SHIFT27
  
 +enum PACKED brw_rnd_mode {
 +   BRW_RND_MODE_UNSPECIFIED,
 +   BRW_RND_MODE_RTE,
 +   BRW_RND_MODE_RTZ,
>>> Since you're introducing a back-end-specific rounding mode enum already,
>>> why not use the hardware values right away so you avoid hard-coding
>>> magic constants below.
>> At the end we removed  MODE_UNSPECIFIED as it isn't really needed and we
>> include Round Up and Down in the enum assignation based using the PRM
>> values. Also renamed RTE for RTNE to maintain coherence with nir
>> conversion modifiers using the same acronym as PRM.
>>
>> +enum PACKED brw_rnd_mode {
>> +   BRW_RND_MODE_RTNE = 0,  /* Round to Nearest or Even */
>> +   BRW_RND_MODE_RU = 1,/* Round Up, toward +inf */
>> +   BRW_RND_MODE_RD = 2,/* Round Down, toward -inf */
>> +   BRW_RND_MODE_RTZ = 3/* Round Toward Zero */
>> +};
>>
>> I have a doubt about how to avoid the magic constants to close the v2 of
>> this patch. One approach would be using the same code structure and
>> taking advantage using the codification of rounding field. This way we
>> just formula and expect that the C compiler optimizer to guess that the
>> immediate value is a constant.
>>
> 
> Hm, I'm not particularly worried about the C++ compiler behavior, it's
> most likely immaterial.
> 
>> switch (mode) {
>> case BRW_RND_MODE_RTZ:
>> -  inst = brw_OR(p, brw_cr0_reg(0), brw_cr0_reg(0),
>> brw_imm_ud(0x0030u));
>> +  inst = brw_OR(p, brw_cr0_reg(0), brw_cr0_reg(0),
>> brw_imm_ud(((unsigned int) mode << 4)));
>>break;
>> -   case BRW_RND_MODE_RTE:
>> -  inst = brw_AND(p, brw_cr0_reg(0), brw_cr0_reg(0),
>> brw_imm_ud(0xffcfu));
>> +   case BRW_RND_MODE_RTNE:
>> +  inst = brw_AND(p, brw_cr0_reg(0), brw_cr0_reg(0),
>> brw_imm_ud(((unsigned int) mode << 4) | ~0x0030u));
>>break;
>> default:
>>
> 
> I don't think this is particularly advantageous since you still need to
> special-case every possible rounding mode.
> 
>> Another approach could be to implement a general solution for all
>> supported rounding modes by 

Re: [Mesa-dev] [PATCH 20/47] i965/fs: Define new shader opcodes to set rounding modes

2017-08-29 Thread Francisco Jerez
Francisco Jerez  writes:

> Chema Casanova  writes:
>
>> El 25/08/17 a las 20:09, Francisco Jerez escribió:
>>> Alejandro Piñeiro  writes:
>>>
 Although it is possible to emit them directly as AND/OR on brw_fs_nir,
 having specific opcodes makes it easier to remove duplicate settings
 later.

 Signed-off-by:  Alejandro Piñeiro 
 Signed-off-by:  Jose Maria Casanova Crespo 
 ---
  src/intel/compiler/brw_eu.h |  3 +++
  src/intel/compiler/brw_eu_defines.h |  9 +
  src/intel/compiler/brw_eu_emit.c| 19 +++
  src/intel/compiler/brw_fs_generator.cpp |  8 
  src/intel/compiler/brw_shader.cpp   |  5 +
  5 files changed, 44 insertions(+)

 diff --git a/src/intel/compiler/brw_eu.h b/src/intel/compiler/brw_eu.h
 index a3a9c63239d..0a7f8020398 100644
 --- a/src/intel/compiler/brw_eu.h
 +++ b/src/intel/compiler/brw_eu.h
 @@ -500,6 +500,9 @@ brw_broadcast(struct brw_codegen *p,
struct brw_reg src,
struct brw_reg idx);
  
 +void
 +brw_rounding_mode(struct brw_codegen *p,
 +  enum brw_rnd_mode mode);
  /***
   * brw_eu_util.c:
   */
 diff --git a/src/intel/compiler/brw_eu_defines.h 
 b/src/intel/compiler/brw_eu_defines.h
 index 1af835d47ed..50435df2fcf 100644
 --- a/src/intel/compiler/brw_eu_defines.h
 +++ b/src/intel/compiler/brw_eu_defines.h
 @@ -388,6 +388,9 @@ enum opcode {
 SHADER_OPCODE_TYPED_SURFACE_WRITE,
 SHADER_OPCODE_TYPED_SURFACE_WRITE_LOGICAL,
  
 +   SHADER_OPCODE_RND_MODE_RTE,
 +   SHADER_OPCODE_RND_MODE_RTZ,
 +
>>> We don't need an opcode for each possible rounding mode (there's also RU
>>> and RD).  How about you add a single SHADER_OPCODE_RND_MODE opcode
>>> taking an immediate with the right rounding mode?
>> I like the proposal. It is better having a unique opcode for setting the
>> rounding mode. Changed for v2 of this patch.
>>> Also, you should be marking the rounding mode opcodes as
>>> has_side_effects(), because otherwise you're giving the scheduler the
>>> freedom of moving your rounding mode update instruction past the
>>> instruction you wanted it to have an effect on...
>> Well pointed, we already realized that it was missing while debugging
>> the latency problem of the control register. It was hiding the problem
>> re-scheduling the cr0 modification to the beginning of the shader.
 SHADER_OPCODE_MEMORY_FENCE,
  
 SHADER_OPCODE_GEN4_SCRATCH_READ,
 @@ -1233,4 +1236,10 @@ enum brw_message_target {
  /* R0 */
  # define GEN7_GS_PAYLOAD_INSTANCE_ID_SHIFT27
  
 +enum PACKED brw_rnd_mode {
 +   BRW_RND_MODE_UNSPECIFIED,
 +   BRW_RND_MODE_RTE,
 +   BRW_RND_MODE_RTZ,
>>> Since you're introducing a back-end-specific rounding mode enum already,
>>> why not use the hardware values right away so you avoid hard-coding
>>> magic constants below.
>> At the end we removed  MODE_UNSPECIFIED as it isn't really needed and we
>> include Round Up and Down in the enum assignation based using the PRM
>> values. Also renamed RTE for RTNE to maintain coherence with nir
>> conversion modifiers using the same acronym as PRM.
>>
>> +enum PACKED brw_rnd_mode {
>> +   BRW_RND_MODE_RTNE = 0,  /* Round to Nearest or Even */
>> +   BRW_RND_MODE_RU = 1,    /* Round Up, toward +inf */
>> +   BRW_RND_MODE_RD = 2,    /* Round Down, toward -inf */
>> +   BRW_RND_MODE_RTZ = 3    /* Round Toward Zero */
>> +};
>>
>> I have a doubt about how to avoid the magic constants to close the v2 of
>> this patch. One approach would be using the same code structure and
>> taking advantage using the codification of rounding field. This way we
>> just formula and expect that the C compiler optimizer to guess that the
>> immediate value is a constant.
>>
>
> Hm, I'm not particularly worried about the C++ compiler behavior, it's
> most likely immaterial.
>
>> switch (mode) {
>>     case BRW_RND_MODE_RTZ:
>> -  inst = brw_OR(p, brw_cr0_reg(0), brw_cr0_reg(0),
>> brw_imm_ud(0x0030u));
>> +  inst = brw_OR(p, brw_cr0_reg(0), brw_cr0_reg(0),
>> brw_imm_ud(((unsigned int) mode << 4)));
>>    break;
>> -   case BRW_RND_MODE_RTE:
>> -  inst = brw_AND(p, brw_cr0_reg(0), brw_cr0_reg(0),
>> brw_imm_ud(0xffcfu));
>> +   case BRW_RND_MODE_RTNE:
>> +  inst = brw_AND(p, brw_cr0_reg(0), brw_cr0_reg(0),
>> brw_imm_ud(((unsigned int) mode << 4) | ~0x0030u));
>>    break;
>>     default:
>>
>
> I don't think this is particularly advantageous since you still need to
> special-case every possible rounding mode.
>
>> Another approach could be to implement a general solution for all
>> supported rounding modes by 

Re: [Mesa-dev] [PATCH 20/47] i965/fs: Define new shader opcodes to set rounding modes

2017-08-29 Thread Francisco Jerez
Chema Casanova  writes:

> El 25/08/17 a las 20:09, Francisco Jerez escribió:
>> Alejandro Piñeiro  writes:
>>
>>> Although it is possible to emit them directly as AND/OR on brw_fs_nir,
>>> having specific opcodes makes it easier to remove duplicate settings
>>> later.
>>>
>>> Signed-off-by:  Alejandro Piñeiro 
>>> Signed-off-by:  Jose Maria Casanova Crespo 
>>> ---
>>>  src/intel/compiler/brw_eu.h |  3 +++
>>>  src/intel/compiler/brw_eu_defines.h |  9 +
>>>  src/intel/compiler/brw_eu_emit.c| 19 +++
>>>  src/intel/compiler/brw_fs_generator.cpp |  8 
>>>  src/intel/compiler/brw_shader.cpp   |  5 +
>>>  5 files changed, 44 insertions(+)
>>>
>>> diff --git a/src/intel/compiler/brw_eu.h b/src/intel/compiler/brw_eu.h
>>> index a3a9c63239d..0a7f8020398 100644
>>> --- a/src/intel/compiler/brw_eu.h
>>> +++ b/src/intel/compiler/brw_eu.h
>>> @@ -500,6 +500,9 @@ brw_broadcast(struct brw_codegen *p,
>>>struct brw_reg src,
>>>struct brw_reg idx);
>>>  
>>> +void
>>> +brw_rounding_mode(struct brw_codegen *p,
>>> +  enum brw_rnd_mode mode);
>>>  /***
>>>   * brw_eu_util.c:
>>>   */
>>> diff --git a/src/intel/compiler/brw_eu_defines.h 
>>> b/src/intel/compiler/brw_eu_defines.h
>>> index 1af835d47ed..50435df2fcf 100644
>>> --- a/src/intel/compiler/brw_eu_defines.h
>>> +++ b/src/intel/compiler/brw_eu_defines.h
>>> @@ -388,6 +388,9 @@ enum opcode {
>>> SHADER_OPCODE_TYPED_SURFACE_WRITE,
>>> SHADER_OPCODE_TYPED_SURFACE_WRITE_LOGICAL,
>>>  
>>> +   SHADER_OPCODE_RND_MODE_RTE,
>>> +   SHADER_OPCODE_RND_MODE_RTZ,
>>> +
>> We don't need an opcode for each possible rounding mode (there's also RU
>> and RD).  How about you add a single SHADER_OPCODE_RND_MODE opcode
>> taking an immediate with the right rounding mode?
> I like the proposal. It is better having a unique opcode for setting the
> rounding mode. Changed for v2 of this patch.
>> Also, you should be marking the rounding mode opcodes as
>> has_side_effects(), because otherwise you're giving the scheduler the
>> freedom of moving your rounding mode update instruction past the
>> instruction you wanted it to have an effect on...
> Well pointed, we already realized that it was missing while debugging
> the latency problem of the control register. It was hiding the problem
> re-scheduling the cr0 modification to the beginning of the shader.
>>> SHADER_OPCODE_MEMORY_FENCE,
>>>  
>>> SHADER_OPCODE_GEN4_SCRATCH_READ,
>>> @@ -1233,4 +1236,10 @@ enum brw_message_target {
>>>  /* R0 */
>>>  # define GEN7_GS_PAYLOAD_INSTANCE_ID_SHIFT 27
>>>  
>>> +enum PACKED brw_rnd_mode {
>>> +   BRW_RND_MODE_UNSPECIFIED,
>>> +   BRW_RND_MODE_RTE,
>>> +   BRW_RND_MODE_RTZ,
>> Since you're introducing a back-end-specific rounding mode enum already,
>> why not use the hardware values right away so you avoid hard-coding
>> magic constants below.
> At the end we removed  MODE_UNSPECIFIED as it isn't really needed and we
> include Round Up and Down in the enum assignation based using the PRM
> values. Also renamed RTE for RTNE to maintain coherence with nir
> conversion modifiers using the same acronym as PRM.
>
> +enum PACKED brw_rnd_mode {
> +   BRW_RND_MODE_RTNE = 0,  /* Round to Nearest or Even */
> +   BRW_RND_MODE_RU = 1,    /* Round Up, toward +inf */
> +   BRW_RND_MODE_RD = 2,    /* Round Down, toward -inf */
> +   BRW_RND_MODE_RTZ = 3    /* Round Toward Zero */
> +};
>
> I have a doubt about how to avoid the magic constants to close the v2 of
> this patch. One approach would be using the same code structure and
> taking advantage using the codification of rounding field. This way we
> just formula and expect that the C compiler optimizer to guess that the
> immediate value is a constant.
>

Hm, I'm not particularly worried about the C++ compiler behavior, it's
most likely immaterial.

> switch (mode) {
>     case BRW_RND_MODE_RTZ:
> -  inst = brw_OR(p, brw_cr0_reg(0), brw_cr0_reg(0),
> brw_imm_ud(0x0030u));
> +  inst = brw_OR(p, brw_cr0_reg(0), brw_cr0_reg(0),
> brw_imm_ud(((unsigned int) mode << 4)));
>    break;
> -   case BRW_RND_MODE_RTE:
> -  inst = brw_AND(p, brw_cr0_reg(0), brw_cr0_reg(0),
> brw_imm_ud(0xffcfu));
> +   case BRW_RND_MODE_RTNE:
> +  inst = brw_AND(p, brw_cr0_reg(0), brw_cr0_reg(0),
> brw_imm_ud(((unsigned int) mode << 4) | ~0x0030u));
>    break;
>     default:
>

I don't think this is particularly advantageous since you still need to
special-case every possible rounding mode.

> Another approach could be to implement a general solution for all
> supported rounding modes by the hw including Round Up and Down using
> bitwise operations.
>
> /**
>  * Changes the floating point rounding mode updating the control register
>  * field defined at 

Re: [Mesa-dev] [PATCH 20/47] i965/fs: Define new shader opcodes to set rounding modes

2017-08-29 Thread Chema Casanova
El 25/08/17 a las 20:09, Francisco Jerez escribió:
> Alejandro Piñeiro  writes:
>
>> Although it is possible to emit them directly as AND/OR on brw_fs_nir,
>> having specific opcodes makes it easier to remove duplicate settings
>> later.
>>
>> Signed-off-by:  Alejandro Piñeiro 
>> Signed-off-by:  Jose Maria Casanova Crespo 
>> ---
>>  src/intel/compiler/brw_eu.h |  3 +++
>>  src/intel/compiler/brw_eu_defines.h |  9 +
>>  src/intel/compiler/brw_eu_emit.c| 19 +++
>>  src/intel/compiler/brw_fs_generator.cpp |  8 
>>  src/intel/compiler/brw_shader.cpp   |  5 +
>>  5 files changed, 44 insertions(+)
>>
>> diff --git a/src/intel/compiler/brw_eu.h b/src/intel/compiler/brw_eu.h
>> index a3a9c63239d..0a7f8020398 100644
>> --- a/src/intel/compiler/brw_eu.h
>> +++ b/src/intel/compiler/brw_eu.h
>> @@ -500,6 +500,9 @@ brw_broadcast(struct brw_codegen *p,
>>struct brw_reg src,
>>struct brw_reg idx);
>>  
>> +void
>> +brw_rounding_mode(struct brw_codegen *p,
>> +  enum brw_rnd_mode mode);
>>  /***
>>   * brw_eu_util.c:
>>   */
>> diff --git a/src/intel/compiler/brw_eu_defines.h 
>> b/src/intel/compiler/brw_eu_defines.h
>> index 1af835d47ed..50435df2fcf 100644
>> --- a/src/intel/compiler/brw_eu_defines.h
>> +++ b/src/intel/compiler/brw_eu_defines.h
>> @@ -388,6 +388,9 @@ enum opcode {
>> SHADER_OPCODE_TYPED_SURFACE_WRITE,
>> SHADER_OPCODE_TYPED_SURFACE_WRITE_LOGICAL,
>>  
>> +   SHADER_OPCODE_RND_MODE_RTE,
>> +   SHADER_OPCODE_RND_MODE_RTZ,
>> +
> We don't need an opcode for each possible rounding mode (there's also RU
> and RD).  How about you add a single SHADER_OPCODE_RND_MODE opcode
> taking an immediate with the right rounding mode?
I like the proposal. It is better having a unique opcode for setting the
rounding mode. Changed for v2 of this patch.
> Also, you should be marking the rounding mode opcodes as
> has_side_effects(), because otherwise you're giving the scheduler the
> freedom of moving your rounding mode update instruction past the
> instruction you wanted it to have an effect on...
Well pointed, we already realized that it was missing while debugging
the latency problem of the control register. It was hiding the problem
re-scheduling the cr0 modification to the beginning of the shader.
>> SHADER_OPCODE_MEMORY_FENCE,
>>  
>> SHADER_OPCODE_GEN4_SCRATCH_READ,
>> @@ -1233,4 +1236,10 @@ enum brw_message_target {
>>  /* R0 */
>>  # define GEN7_GS_PAYLOAD_INSTANCE_ID_SHIFT  27
>>  
>> +enum PACKED brw_rnd_mode {
>> +   BRW_RND_MODE_UNSPECIFIED,
>> +   BRW_RND_MODE_RTE,
>> +   BRW_RND_MODE_RTZ,
> Since you're introducing a back-end-specific rounding mode enum already,
> why not use the hardware values right away so you avoid hard-coding
> magic constants below.
At the end we removed  MODE_UNSPECIFIED as it isn't really needed and we
include Round Up and Down in the enum assignation based using the PRM
values. Also renamed RTE for RTNE to maintain coherence with nir
conversion modifiers using the same acronym as PRM.

+enum PACKED brw_rnd_mode {
+   BRW_RND_MODE_RTNE = 0,  /* Round to Nearest or Even */
+   BRW_RND_MODE_RU = 1,    /* Round Up, toward +inf */
+   BRW_RND_MODE_RD = 2,    /* Round Down, toward -inf */
+   BRW_RND_MODE_RTZ = 3    /* Round Toward Zero */
+};

I have a doubt about how to avoid the magic constants to close the v2 of
this patch. One approach would be using the same code structure and
taking advantage using the codification of rounding field. This way we
just formula and expect that the C compiler optimizer to guess that the
immediate value is a constant.

switch (mode) {
    case BRW_RND_MODE_RTZ:
-  inst = brw_OR(p, brw_cr0_reg(0), brw_cr0_reg(0),
brw_imm_ud(0x0030u));
+  inst = brw_OR(p, brw_cr0_reg(0), brw_cr0_reg(0),
brw_imm_ud(((unsigned int) mode << 4)));
   break;
-   case BRW_RND_MODE_RTE:
-  inst = brw_AND(p, brw_cr0_reg(0), brw_cr0_reg(0),
brw_imm_ud(0xffcfu));
+   case BRW_RND_MODE_RTNE:
+  inst = brw_AND(p, brw_cr0_reg(0), brw_cr0_reg(0),
brw_imm_ud(((unsigned int) mode << 4) | ~0x0030u));
   break;
    default:

Another approach could be to implement a general solution for all
supported rounding modes by the hw including Round Up and Down using
bitwise operations.

/**
 * Changes the floating point rounding mode updating the control register
 * field defined at cr0.0[5-6] bits. This function supports the changes to
 * RTNE (00), RU (01), RD (10) and RTZ (11) rounding using bitwise
operations.
 * Only RTNE and RTZ rounding are enabled at nir.
 */

void
brw_rounding_mode(struct brw_codegen *p,
  enum brw_rnd_mode mode)
{

   const unsigned int mask = 0x30u;
   const unsigned int enable_bits =  ((unsigned int) mode) << 4;
   const unsigned int 

Re: [Mesa-dev] [PATCH 20/47] i965/fs: Define new shader opcodes to set rounding modes

2017-08-28 Thread Alejandro Piñeiro
On 25/08/17 20:09, Francisco Jerez wrote:
> Alejandro Piñeiro  writes:
>
>> Although it is possible to emit them directly as AND/OR on brw_fs_nir,
>> having specific opcodes makes it easier to remove duplicate settings
>> later.
>>
>> Signed-off-by:  Alejandro Piñeiro 
>> Signed-off-by:  Jose Maria Casanova Crespo 
>> ---
>>  src/intel/compiler/brw_eu.h |  3 +++
>>  src/intel/compiler/brw_eu_defines.h |  9 +
>>  src/intel/compiler/brw_eu_emit.c| 19 +++
>>  src/intel/compiler/brw_fs_generator.cpp |  8 
>>  src/intel/compiler/brw_shader.cpp   |  5 +
>>  5 files changed, 44 insertions(+)
>>
>> diff --git a/src/intel/compiler/brw_eu.h b/src/intel/compiler/brw_eu.h
>> index a3a9c63239d..0a7f8020398 100644
>> --- a/src/intel/compiler/brw_eu.h
>> +++ b/src/intel/compiler/brw_eu.h
>> @@ -500,6 +500,9 @@ brw_broadcast(struct brw_codegen *p,
>>struct brw_reg src,
>>struct brw_reg idx);
>>  
>> +void
>> +brw_rounding_mode(struct brw_codegen *p,
>> +  enum brw_rnd_mode mode);
>>  /***
>>   * brw_eu_util.c:
>>   */
>> diff --git a/src/intel/compiler/brw_eu_defines.h 
>> b/src/intel/compiler/brw_eu_defines.h
>> index 1af835d47ed..50435df2fcf 100644
>> --- a/src/intel/compiler/brw_eu_defines.h
>> +++ b/src/intel/compiler/brw_eu_defines.h
>> @@ -388,6 +388,9 @@ enum opcode {
>> SHADER_OPCODE_TYPED_SURFACE_WRITE,
>> SHADER_OPCODE_TYPED_SURFACE_WRITE_LOGICAL,
>>  
>> +   SHADER_OPCODE_RND_MODE_RTE,
>> +   SHADER_OPCODE_RND_MODE_RTZ,
>> +
> We don't need an opcode for each possible rounding mode (there's also RU
> and RD).  How about you add a single SHADER_OPCODE_RND_MODE opcode
> taking an immediate with the right rounding mode?
>
> Also, you should be marking the rounding mode opcodes as
> has_side_effects(), because otherwise you're giving the scheduler the
> freedom of moving your rounding mode update instruction past the
> instruction you wanted it to have an effect on...
>
>> SHADER_OPCODE_MEMORY_FENCE,
>>  
>> SHADER_OPCODE_GEN4_SCRATCH_READ,
>> @@ -1233,4 +1236,10 @@ enum brw_message_target {
>>  /* R0 */
>>  # define GEN7_GS_PAYLOAD_INSTANCE_ID_SHIFT  27
>>  
>> +enum PACKED brw_rnd_mode {
>> +   BRW_RND_MODE_UNSPECIFIED,
>> +   BRW_RND_MODE_RTE,
>> +   BRW_RND_MODE_RTZ,
> Since you're introducing a back-end-specific rounding mode enum already,
> why not use the hardware values right away so you avoid hard-coding
> magic constants below.
>
>> +};
>> +
>>  #endif /* BRW_EU_DEFINES_H */
>> diff --git a/src/intel/compiler/brw_eu_emit.c 
>> b/src/intel/compiler/brw_eu_emit.c
>> index 0b0d67a5c56..07ad3d9384b 100644
>> --- a/src/intel/compiler/brw_eu_emit.c
>> +++ b/src/intel/compiler/brw_eu_emit.c
>> @@ -3723,3 +3723,22 @@ brw_WAIT(struct brw_codegen *p)
>> brw_inst_set_exec_size(devinfo, insn, BRW_EXECUTE_1);
>> brw_inst_set_mask_control(devinfo, insn, BRW_MASK_DISABLE);
>>  }
>> +
>> +void
>> +brw_rounding_mode(struct brw_codegen *p,
>> +  enum brw_rnd_mode mode)
>> +{
>> +   switch (mode) {
>> +   case BRW_RND_MODE_UNSPECIFIED:
>> +  /* nothing to do here */
>> +  break;
>> +   case BRW_RND_MODE_RTZ:
>> +  brw_OR(p, brw_cr0_reg(0), brw_cr0_reg(0), brw_imm_ud(0x0030u));
>> +  break;
>> +   case BRW_RND_MODE_RTE:
>> +  brw_AND(p, brw_cr0_reg(0), brw_cr0_reg(0), brw_imm_ud(0xffcfu));
> This has undefined behavior because the ALU instructions you use to set
> cr0 have non-zero latency, so the rounding mode change won't take effect
> till ~8 cycles after the instruction is issued.  Any instructions issued
> in that window will pick up the wrong rounding mode.  This is likely one
> of the reasons for your observations off-list regarding some shaders
> using the right or wrong rounding mode non-deterministically depending
> on the scheduler's behaviour.
>
> Here's a spec quote from the SKL PRM suggesting a workaround you should
> probably include in this commit:
>
> | Implementation Restriction on Register Access: When the control
> | register is used as an explicit source and/or destination, hardware
> | does not ensure execution pipeline coherency. Software must set the
> | thread control field to ‘switch’ for an instruction that uses control
> | register as an explicit operand. This is important as the control
> | register is an implicit source for most instructions. For example,
> | fields like FPMode and Accumulator Disable control the arithmetic
> | and/or logic instructions. Therefore, if the instruction updating the
> | control register doesn’t set ‘switch’, subsequent instructions may
> | have undefined results.

Ok, thanks for the spec reference. I will use it to update the patch (or
create a new one).

BR
>
>
>> +  break;
>> +   default:
>> +  unreachable("Not reached");
>> +   }
>> +}

Re: [Mesa-dev] [PATCH 20/47] i965/fs: Define new shader opcodes to set rounding modes

2017-08-25 Thread Francisco Jerez
Alejandro Piñeiro  writes:

> Although it is possible to emit them directly as AND/OR on brw_fs_nir,
> having specific opcodes makes it easier to remove duplicate settings
> later.
>
> Signed-off-by:  Alejandro Piñeiro 
> Signed-off-by:  Jose Maria Casanova Crespo 
> ---
>  src/intel/compiler/brw_eu.h |  3 +++
>  src/intel/compiler/brw_eu_defines.h |  9 +
>  src/intel/compiler/brw_eu_emit.c| 19 +++
>  src/intel/compiler/brw_fs_generator.cpp |  8 
>  src/intel/compiler/brw_shader.cpp   |  5 +
>  5 files changed, 44 insertions(+)
>
> diff --git a/src/intel/compiler/brw_eu.h b/src/intel/compiler/brw_eu.h
> index a3a9c63239d..0a7f8020398 100644
> --- a/src/intel/compiler/brw_eu.h
> +++ b/src/intel/compiler/brw_eu.h
> @@ -500,6 +500,9 @@ brw_broadcast(struct brw_codegen *p,
>struct brw_reg src,
>struct brw_reg idx);
>  
> +void
> +brw_rounding_mode(struct brw_codegen *p,
> +  enum brw_rnd_mode mode);
>  /***
>   * brw_eu_util.c:
>   */
> diff --git a/src/intel/compiler/brw_eu_defines.h 
> b/src/intel/compiler/brw_eu_defines.h
> index 1af835d47ed..50435df2fcf 100644
> --- a/src/intel/compiler/brw_eu_defines.h
> +++ b/src/intel/compiler/brw_eu_defines.h
> @@ -388,6 +388,9 @@ enum opcode {
> SHADER_OPCODE_TYPED_SURFACE_WRITE,
> SHADER_OPCODE_TYPED_SURFACE_WRITE_LOGICAL,
>  
> +   SHADER_OPCODE_RND_MODE_RTE,
> +   SHADER_OPCODE_RND_MODE_RTZ,
> +

We don't need an opcode for each possible rounding mode (there's also RU
and RD).  How about you add a single SHADER_OPCODE_RND_MODE opcode
taking an immediate with the right rounding mode?

Also, you should be marking the rounding mode opcodes as
has_side_effects(), because otherwise you're giving the scheduler the
freedom of moving your rounding mode update instruction past the
instruction you wanted it to have an effect on...

> SHADER_OPCODE_MEMORY_FENCE,
>  
> SHADER_OPCODE_GEN4_SCRATCH_READ,
> @@ -1233,4 +1236,10 @@ enum brw_message_target {
>  /* R0 */
>  # define GEN7_GS_PAYLOAD_INSTANCE_ID_SHIFT   27
>  
> +enum PACKED brw_rnd_mode {
> +   BRW_RND_MODE_UNSPECIFIED,
> +   BRW_RND_MODE_RTE,
> +   BRW_RND_MODE_RTZ,

Since you're introducing a back-end-specific rounding mode enum already,
why not use the hardware values right away so you avoid hard-coding
magic constants below.

> +};
> +
>  #endif /* BRW_EU_DEFINES_H */
> diff --git a/src/intel/compiler/brw_eu_emit.c 
> b/src/intel/compiler/brw_eu_emit.c
> index 0b0d67a5c56..07ad3d9384b 100644
> --- a/src/intel/compiler/brw_eu_emit.c
> +++ b/src/intel/compiler/brw_eu_emit.c
> @@ -3723,3 +3723,22 @@ brw_WAIT(struct brw_codegen *p)
> brw_inst_set_exec_size(devinfo, insn, BRW_EXECUTE_1);
> brw_inst_set_mask_control(devinfo, insn, BRW_MASK_DISABLE);
>  }
> +
> +void
> +brw_rounding_mode(struct brw_codegen *p,
> +  enum brw_rnd_mode mode)
> +{
> +   switch (mode) {
> +   case BRW_RND_MODE_UNSPECIFIED:
> +  /* nothing to do here */
> +  break;
> +   case BRW_RND_MODE_RTZ:
> +  brw_OR(p, brw_cr0_reg(0), brw_cr0_reg(0), brw_imm_ud(0x0030u));
> +  break;
> +   case BRW_RND_MODE_RTE:
> +  brw_AND(p, brw_cr0_reg(0), brw_cr0_reg(0), brw_imm_ud(0xffcfu));

This has undefined behavior because the ALU instructions you use to set
cr0 have non-zero latency, so the rounding mode change won't take effect
till ~8 cycles after the instruction is issued.  Any instructions issued
in that window will pick up the wrong rounding mode.  This is likely one
of the reasons for your observations off-list regarding some shaders
using the right or wrong rounding mode non-deterministically depending
on the scheduler's behaviour.

Here's a spec quote from the SKL PRM suggesting a workaround you should
probably include in this commit:

| Implementation Restriction on Register Access: When the control
| register is used as an explicit source and/or destination, hardware
| does not ensure execution pipeline coherency. Software must set the
| thread control field to ‘switch’ for an instruction that uses control
| register as an explicit operand. This is important as the control
| register is an implicit source for most instructions. For example,
| fields like FPMode and Accumulator Disable control the arithmetic
| and/or logic instructions. Therefore, if the instruction updating the
| control register doesn’t set ‘switch’, subsequent instructions may
| have undefined results.


> +  break;
> +   default:
> +  unreachable("Not reached");
> +   }
> +}
> diff --git a/src/intel/compiler/brw_fs_generator.cpp 
> b/src/intel/compiler/brw_fs_generator.cpp
> index 2ade486705b..e0bd191ea7e 100644
> --- a/src/intel/compiler/brw_fs_generator.cpp
> +++ b/src/intel/compiler/brw_fs_generator.cpp
> @@ -2139,6 +2139,14 @@ 

[Mesa-dev] [PATCH 20/47] i965/fs: Define new shader opcodes to set rounding modes

2017-08-24 Thread Alejandro Piñeiro
Although it is possible to emit them directly as AND/OR on brw_fs_nir,
having specific opcodes makes it easier to remove duplicate settings
later.

Signed-off-by:  Alejandro Piñeiro 
Signed-off-by:  Jose Maria Casanova Crespo 
---
 src/intel/compiler/brw_eu.h |  3 +++
 src/intel/compiler/brw_eu_defines.h |  9 +
 src/intel/compiler/brw_eu_emit.c| 19 +++
 src/intel/compiler/brw_fs_generator.cpp |  8 
 src/intel/compiler/brw_shader.cpp   |  5 +
 5 files changed, 44 insertions(+)

diff --git a/src/intel/compiler/brw_eu.h b/src/intel/compiler/brw_eu.h
index a3a9c63239d..0a7f8020398 100644
--- a/src/intel/compiler/brw_eu.h
+++ b/src/intel/compiler/brw_eu.h
@@ -500,6 +500,9 @@ brw_broadcast(struct brw_codegen *p,
   struct brw_reg src,
   struct brw_reg idx);
 
+void
+brw_rounding_mode(struct brw_codegen *p,
+  enum brw_rnd_mode mode);
 /***
  * brw_eu_util.c:
  */
diff --git a/src/intel/compiler/brw_eu_defines.h 
b/src/intel/compiler/brw_eu_defines.h
index 1af835d47ed..50435df2fcf 100644
--- a/src/intel/compiler/brw_eu_defines.h
+++ b/src/intel/compiler/brw_eu_defines.h
@@ -388,6 +388,9 @@ enum opcode {
SHADER_OPCODE_TYPED_SURFACE_WRITE,
SHADER_OPCODE_TYPED_SURFACE_WRITE_LOGICAL,
 
+   SHADER_OPCODE_RND_MODE_RTE,
+   SHADER_OPCODE_RND_MODE_RTZ,
+
SHADER_OPCODE_MEMORY_FENCE,
 
SHADER_OPCODE_GEN4_SCRATCH_READ,
@@ -1233,4 +1236,10 @@ enum brw_message_target {
 /* R0 */
 # define GEN7_GS_PAYLOAD_INSTANCE_ID_SHIFT 27
 
+enum PACKED brw_rnd_mode {
+   BRW_RND_MODE_UNSPECIFIED,
+   BRW_RND_MODE_RTE,
+   BRW_RND_MODE_RTZ,
+};
+
 #endif /* BRW_EU_DEFINES_H */
diff --git a/src/intel/compiler/brw_eu_emit.c b/src/intel/compiler/brw_eu_emit.c
index 0b0d67a5c56..07ad3d9384b 100644
--- a/src/intel/compiler/brw_eu_emit.c
+++ b/src/intel/compiler/brw_eu_emit.c
@@ -3723,3 +3723,22 @@ brw_WAIT(struct brw_codegen *p)
brw_inst_set_exec_size(devinfo, insn, BRW_EXECUTE_1);
brw_inst_set_mask_control(devinfo, insn, BRW_MASK_DISABLE);
 }
+
+void
+brw_rounding_mode(struct brw_codegen *p,
+  enum brw_rnd_mode mode)
+{
+   switch (mode) {
+   case BRW_RND_MODE_UNSPECIFIED:
+  /* nothing to do here */
+  break;
+   case BRW_RND_MODE_RTZ:
+  brw_OR(p, brw_cr0_reg(0), brw_cr0_reg(0), brw_imm_ud(0x0030u));
+  break;
+   case BRW_RND_MODE_RTE:
+  brw_AND(p, brw_cr0_reg(0), brw_cr0_reg(0), brw_imm_ud(0xffcfu));
+  break;
+   default:
+  unreachable("Not reached");
+   }
+}
diff --git a/src/intel/compiler/brw_fs_generator.cpp 
b/src/intel/compiler/brw_fs_generator.cpp
index 2ade486705b..e0bd191ea7e 100644
--- a/src/intel/compiler/brw_fs_generator.cpp
+++ b/src/intel/compiler/brw_fs_generator.cpp
@@ -2139,6 +2139,14 @@ fs_generator::generate_code(const cfg_t *cfg, int 
dispatch_width)
  brw_DIM(p, dst, retype(src[0], BRW_REGISTER_TYPE_F));
  break;
 
+  case SHADER_OPCODE_RND_MODE_RTE:
+ brw_rounding_mode(p, BRW_RND_MODE_RTE);
+ break;
+
+  case SHADER_OPCODE_RND_MODE_RTZ:
+ brw_rounding_mode(p, BRW_RND_MODE_RTZ);
+ break;
+
   default:
  unreachable("Unsupported opcode");
 
diff --git a/src/intel/compiler/brw_shader.cpp 
b/src/intel/compiler/brw_shader.cpp
index c62b8ba6140..f22e204e262 100644
--- a/src/intel/compiler/brw_shader.cpp
+++ b/src/intel/compiler/brw_shader.cpp
@@ -486,6 +486,11 @@ brw_instruction_name(const struct gen_device_info 
*devinfo, enum opcode op)
   return "tes_add_indirect_urb_offset";
case TES_OPCODE_GET_PRIMITIVE_ID:
   return "tes_get_primitive_id";
+
+   case SHADER_OPCODE_RND_MODE_RTE:
+  return "round_mode_rte";
+   case SHADER_OPCODE_RND_MODE_RTZ:
+  return "round_mode_rtz";
}
 
unreachable("not reached");
-- 
2.11.0

___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev