On Wed, Dec 19, 2018 at 12:51:03PM +0100, Iago Toral Quiroga wrote:
> Broadwell hardware has a bug that manifests in SIMD8 executions of
> 16-bit MAD instructions when any of the sources is a Y or W component.
> We pack these components in the same SIMD register as components X and
> Z respectively, but starting at offset 16B (so they live in the second
> half of the register). The problem does not exist in SKL or later.
> 
> We work around this issue by moving any such sources to a temporary
> starting at offset 0B. We want to do this after the main optimization loop
> to prevent copy-propagation and friends to undo the fix.
> ---
>  src/intel/compiler/brw_fs.cpp | 48 +++++++++++++++++++++++++++++++++++
>  src/intel/compiler/brw_fs.h   |  1 +
>  2 files changed, 49 insertions(+)
> 
> diff --git a/src/intel/compiler/brw_fs.cpp b/src/intel/compiler/brw_fs.cpp
> index 933b0b6ffc4..1343c2f4993 100644
> --- a/src/intel/compiler/brw_fs.cpp
> +++ b/src/intel/compiler/brw_fs.cpp
> @@ -6449,6 +6449,48 @@ fs_visitor::optimize()
>     validate();
>  }
>  
> +/**
> + * Broadwell hardware has a bug that manifests in SIMD8 executions of 16-bit
> + * MAD instructions when any of the sources is a Y or W component. We pack
> + * these components in the same SIMD register as components X and Z
> + * respectively, but starting at offset 16B (so they live in the second half
> + * of the register).
> + *
> + * We work around this issue by moving any such sources to a temporary
> + * starting at offset 0B. We want to do this after the main optimization loop
> + * to prevent copy-propagation and friends to undo the fix.
> + */
> +void
> +fs_visitor::fixup_hf_mad()
> +{
> +   if (devinfo->gen > 8)

We don't want to run this for gen < 8 either as it would iterate the
instructions in vain. So just:

      if (devinfo->gen == 8)

Otherwise:

Reviewed-by: Topi Pohjolainen <topi.pohjolai...@intel.com>

> +      return;
> +
> +   bool progress = false;
> +
> +   foreach_block_and_inst_safe (block, fs_inst, inst, cfg) {
> +      if (inst->opcode != BRW_OPCODE_MAD ||
> +          inst->dst.type != BRW_REGISTER_TYPE_HF ||
> +          inst->exec_size > 8)
> +         continue;
> +
> +      for (int i = 0; i < 3; i++) {
> +         if (inst->src[i].offset > 0) {
> +            assert(inst->src[i].type == BRW_REGISTER_TYPE_HF);
> +            const fs_builder ibld =
> +               bld.at(block, inst).exec_all().group(inst->exec_size, 0);
> +            fs_reg tmp = ibld.vgrf(inst->src[i].type);
> +            ibld.MOV(tmp, inst->src[i]);
> +            inst->src[i] = tmp;
> +            progress = true;
> +         }
> +      }
> +   }
> +
> +   if (progress)
> +      invalidate_live_intervals();
> +}
> +
>  /**
>   * Three source instruction must have a GRF/MRF destination register.
>   * ARF NULL is not allowed.  Fix that up by allocating a temporary GRF.
> @@ -6607,6 +6649,7 @@ fs_visitor::run_vs()
>     assign_curb_setup();
>     assign_vs_urb_setup();
>  
> +   fixup_hf_mad();
>     fixup_3src_null_dest();
>     allocate_registers(8, true);
>  
> @@ -6691,6 +6734,7 @@ fs_visitor::run_tcs_single_patch()
>     assign_curb_setup();
>     assign_tcs_single_patch_urb_setup();
>  
> +   fixup_hf_mad();
>     fixup_3src_null_dest();
>     allocate_registers(8, true);
>  
> @@ -6725,6 +6769,7 @@ fs_visitor::run_tes()
>     assign_curb_setup();
>     assign_tes_urb_setup();
>  
> +   fixup_hf_mad();
>     fixup_3src_null_dest();
>     allocate_registers(8, true);
>  
> @@ -6774,6 +6819,7 @@ fs_visitor::run_gs()
>     assign_curb_setup();
>     assign_gs_urb_setup();
>  
> +   fixup_hf_mad();
>     fixup_3src_null_dest();
>     allocate_registers(8, true);
>  
> @@ -6874,6 +6920,7 @@ fs_visitor::run_fs(bool allow_spilling, bool 
> do_rep_send)
>  
>        assign_urb_setup();
>  
> +      fixup_hf_mad();
>        fixup_3src_null_dest();
>        allocate_registers(8, allow_spilling);
>  
> @@ -6918,6 +6965,7 @@ fs_visitor::run_cs(unsigned min_dispatch_width)
>  
>     assign_curb_setup();
>  
> +   fixup_hf_mad();
>     fixup_3src_null_dest();
>     allocate_registers(min_dispatch_width, true);
>  
> diff --git a/src/intel/compiler/brw_fs.h b/src/intel/compiler/brw_fs.h
> index 163c0008820..f79f8554fb9 100644
> --- a/src/intel/compiler/brw_fs.h
> +++ b/src/intel/compiler/brw_fs.h
> @@ -103,6 +103,7 @@ public:
>     void setup_vs_payload();
>     void setup_gs_payload();
>     void setup_cs_payload();
> +   void fixup_hf_mad();
>     void fixup_3src_null_dest();
>     void assign_curb_setup();
>     void calculate_urb_setup();
> -- 
> 2.17.1
> 
> _______________________________________________
> mesa-dev mailing list
> mesa-dev@lists.freedesktop.org
> https://lists.freedesktop.org/mailman/listinfo/mesa-dev
_______________________________________________
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev

Reply via email to