Module: Mesa
Branch: staging/21.0
Commit: da38b604e3f772084faf3b637b44df1a6aff4f1c
URL:    
http://cgit.freedesktop.org/mesa/mesa/commit/?id=da38b604e3f772084faf3b637b44df1a6aff4f1c

Author: Lionel Landwerlin <[email protected]>
Date:   Wed Mar 24 09:56:42 2021 +0200

intel/fs/copy_prop: check stride constraints with actual final type

In some cases we will change the type of the destination register of
an instruction. This is the type we should use to verify that we're
allow to do the replacement.

Otherwise we can hit restrictions on CHV and upcoming Xe-Hp for
instance where the copy propagation transforms this :

send(16) (mlen: 2) vgrf10:UD, 0u, 0u, vgrf35:D, null:UD
mov(16) vgrf11:UW, vgrf10<2>:UW
mov(16) vgrf12:UW, vgrf10+0.2<2>:UW
mov(16) vgrf15:HF, |vgrf11|:HF
mov(16) vgrf16:HF, |vgrf12|:HF
mov(8) vgrf41<2>:UW, vgrf15+0.0:UW group0
mov(8) vgrf42<2>:UW, vgrf15+0.16:UW group8
mov(8) vgrf45<2>:UW, vgrf16+0.0:UW group0
mov(8) vgrf46<2>:UW, vgrf16+0.16:UW group8

into this :

send(16) (mlen: 2) vgrf10:UD, 0u, 0u, vgrf35:D, null:UD
mov(8) vgrf41<2>:HF, |vgrf10+0.0|<2>:HF group0
mov(8) vgrf42<2>:HF, |vgrf10+1.0|<2>:HF group8
mov(8) vgrf45<2>:HF, |vgrf10+0.2|<2>:HF group0
mov(8) vgrf46<2>:HF, |vgrf10+1.2|<2>:HF group8

Because of the floating point use, stride and offets should be the
same.

v2: Fix final destination type selection (Curro)

v3: constify (Curro)

Signed-off-by: Lionel Landwerlin <[email protected]>
Cc: <[email protected]>
Reviewed-by: Francisco Jerez <[email protected]>
Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/9832>
(cherry picked from commit aa53665fda63484495d736ddd1d4542c66814e61)

---

 .pick_status.json                              |  2 +-
 src/intel/compiler/brw_fs_copy_propagation.cpp | 14 ++++++++++----
 src/intel/compiler/brw_ir_fs.h                 | 12 ++++++++++--
 3 files changed, 21 insertions(+), 7 deletions(-)

diff --git a/.pick_status.json b/.pick_status.json
index 1a075e082da..be1a9e301f1 100644
--- a/.pick_status.json
+++ b/.pick_status.json
@@ -391,7 +391,7 @@
         "description": "intel/fs/copy_prop: check stride constraints with 
actual final type",
         "nominated": true,
         "nomination_type": 0,
-        "resolution": 0,
+        "resolution": 1,
         "master_sha": null,
         "because_sha": null
     },
diff --git a/src/intel/compiler/brw_fs_copy_propagation.cpp 
b/src/intel/compiler/brw_fs_copy_propagation.cpp
index 6896987055f..2bb70b36de0 100644
--- a/src/intel/compiler/brw_fs_copy_propagation.cpp
+++ b/src/intel/compiler/brw_fs_copy_propagation.cpp
@@ -367,7 +367,8 @@ is_logic_op(enum opcode opcode)
 }
 
 static bool
-can_take_stride(fs_inst *inst, unsigned arg, unsigned stride,
+can_take_stride(fs_inst *inst, brw_reg_type dst_type,
+                unsigned arg, unsigned stride,
                 const gen_device_info *devinfo)
 {
    if (stride > 4)
@@ -377,9 +378,9 @@ can_take_stride(fs_inst *inst, unsigned arg, unsigned 
stride,
     * of the corresponding channel of the destination, and the provided stride
     * would break this restriction.
     */
-   if (has_dst_aligned_region_restriction(devinfo, inst) &&
+   if (has_dst_aligned_region_restriction(devinfo, inst, dst_type) &&
        !(type_sz(inst->src[arg].type) * stride ==
-           type_sz(inst->dst.type) * inst->dst.stride ||
+           type_sz(dst_type) * inst->dst.stride ||
          stride == 0))
       return false;
 
@@ -528,10 +529,15 @@ fs_visitor::try_copy_propagate(fs_inst *inst, int arg, 
acp_entry *entry)
    if (instruction_requires_packed_data(inst) && entry_stride != 1)
       return false;
 
+   const brw_reg_type dst_type = (has_source_modifiers &&
+                                  entry->dst.type != inst->src[arg].type) ?
+      entry->dst.type : inst->dst.type;
+
    /* Bail if the result of composing both strides would exceed the
     * hardware limit.
     */
-   if (!can_take_stride(inst, arg, entry_stride * inst->src[arg].stride,
+   if (!can_take_stride(inst, dst_type, arg,
+                        entry_stride * inst->src[arg].stride,
                         devinfo))
       return false;
 
diff --git a/src/intel/compiler/brw_ir_fs.h b/src/intel/compiler/brw_ir_fs.h
index 3a4acc1834a..07e59ff8946 100644
--- a/src/intel/compiler/brw_ir_fs.h
+++ b/src/intel/compiler/brw_ir_fs.h
@@ -549,7 +549,8 @@ is_unordered(const fs_inst *inst)
  */
 static inline bool
 has_dst_aligned_region_restriction(const gen_device_info *devinfo,
-                                   const fs_inst *inst)
+                                   const fs_inst *inst,
+                                   brw_reg_type dst_type)
 {
    const brw_reg_type exec_type = get_exec_type(inst);
    /* Even though the hardware spec claims that "integer DWord multiply"
@@ -563,13 +564,20 @@ has_dst_aligned_region_restriction(const gen_device_info 
*devinfo,
        (inst->opcode == BRW_OPCODE_MAD &&
         MIN2(type_sz(inst->src[1].type), type_sz(inst->src[2].type)) >= 4));
 
-   if (type_sz(inst->dst.type) > 4 || type_sz(exec_type) > 4 ||
+   if (type_sz(dst_type) > 4 || type_sz(exec_type) > 4 ||
        (type_sz(exec_type) == 4 && is_dword_multiply))
       return devinfo->is_cherryview || gen_device_info_is_9lp(devinfo);
    else
       return false;
 }
 
+static inline bool
+has_dst_aligned_region_restriction(const gen_device_info *devinfo,
+                                   const fs_inst *inst)
+{
+   return has_dst_aligned_region_restriction(devinfo, inst, inst->dst.type);
+}
+
 /**
  * Return whether the LOAD_PAYLOAD instruction is a plain copy of bits from
  * the specified register file into a VGRF.

_______________________________________________
mesa-commit mailing list
[email protected]
https://lists.freedesktop.org/mailman/listinfo/mesa-commit

Reply via email to