Module: Mesa
Branch: main
Commit: eda940c8557dd68d60e085d8d2df5590ee3cf4f8
URL:    
http://cgit.freedesktop.org/mesa/mesa/commit/?id=eda940c8557dd68d60e085d8d2df5590ee3cf4f8

Author: Faith Ekstrand <[email protected]>
Date:   Mon Dec  4 09:03:14 2023 -0600

nak: Make barriers SSA-friendly

The NIR intrinsics now take and return a barrier whenever one is
modified instead of modifying in-place.  In NAK, we give the internal
instructions the same treatment and convert everything to use barrier
SSA values and RegRefs.  In nak_from_nir, we move all barriers to/from
GPRs.  We'll clean up the massive pile of OpBMov later.

Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/26463>

---

 src/compiler/nir/nir_intrinsics.py          |   5 +-
 src/nouveau/compiler/nak_encode_sm70.rs     |  10 ++-
 src/nouveau/compiler/nak_from_nir.rs        | 133 +++++++++-------------------
 src/nouveau/compiler/nak_ir.rs              |  46 +++-------
 src/nouveau/compiler/nak_nir_add_barriers.c |  16 +++-
 5 files changed, 77 insertions(+), 133 deletions(-)

diff --git a/src/compiler/nir/nir_intrinsics.py 
b/src/compiler/nir/nir_intrinsics.py
index 183c6e2c989..380c153f0dd 100644
--- a/src/compiler/nir/nir_intrinsics.py
+++ b/src/compiler/nir/nir_intrinsics.py
@@ -2036,8 +2036,9 @@ intrinsic("end_primitive_nv", dest_comp=1, src_comp=[1], 
indices=[STREAM_ID])
 intrinsic("final_primitive_nv", src_comp=[1])
 
 intrinsic("bar_set_nv", dest_comp=1, bit_sizes=[32], flags=[CAN_ELIMINATE])
-intrinsic("bar_break_nv", src_comp=[1])
-intrinsic("bar_sync_nv", src_comp=[1])
+intrinsic("bar_break_nv", dest_comp=1, bit_sizes=[32], src_comp=[1])
+# src[] = { bar, bar_set }
+intrinsic("bar_sync_nv", src_comp=[1, 1])
 
 # In order to deal with flipped render targets, gl_PointCoord may be flipped
 # in the shader requiring a shader key or extra instructions or it may be
diff --git a/src/nouveau/compiler/nak_encode_sm70.rs 
b/src/nouveau/compiler/nak_encode_sm70.rs
index f5922845ce6..f1c15575a18 100644
--- a/src/nouveau/compiler/nak_encode_sm70.rs
+++ b/src/nouveau/compiler/nak_encode_sm70.rs
@@ -1692,7 +1692,7 @@ impl SM70Instr {
         self.set_opcode(0x355);
 
         self.set_dst(Dst::None);
-        self.set_field(24..28, op.dst.idx());
+        self.set_bar_dst(24..28, op.dst);
 
         self.set_bit(84, true); // .CLEAR
     }
@@ -1717,7 +1717,8 @@ impl SM70Instr {
 
     fn encode_break(&mut self, op: &OpBreak) {
         self.set_opcode(0x942);
-        self.set_field(16..20, op.bar.idx());
+        assert!(op.bar_in.src_ref.as_reg() == op.bar_out.as_reg());
+        self.set_bar_dst(16..20, op.bar_out);
         self.set_pred_src(87..90, 90, op.cond);
     }
 
@@ -1728,14 +1729,15 @@ impl SM70Instr {
         labels: &HashMap<Label, usize>,
     ) {
         self.set_opcode(0x945);
-        self.set_field(16..20, op.bar.idx());
+        assert!(op.bar_in.src_ref.as_reg() == op.bar_out.as_reg());
+        self.set_bar_dst(16..20, op.bar_out);
         self.set_rel_offset(34..64, &op.target, ip, labels);
         self.set_pred_src(87..90, 90, op.cond);
     }
 
     fn encode_bsync(&mut self, op: &OpBSync) {
         self.set_opcode(0x941);
-        self.set_field(16..20, op.bar.idx());
+        self.set_bar_src(16..20, op.bar);
         self.set_pred_src(87..90, 90, op.cond);
     }
 
diff --git a/src/nouveau/compiler/nak_from_nir.rs 
b/src/nouveau/compiler/nak_from_nir.rs
index c7fd2523c21..1e58d8cc54a 100644
--- a/src/nouveau/compiler/nak_from_nir.rs
+++ b/src/nouveau/compiler/nak_from_nir.rs
@@ -144,53 +144,13 @@ impl<'a> PhiAllocMap<'a> {
     }
 }
 
-struct BarAlloc {
-    used: u16,
-    num_bars: u8,
-}
-
-impl BarAlloc {
-    pub fn new() -> BarAlloc {
-        BarAlloc {
-            used: 0,
-            num_bars: 0,
-        }
-    }
-
-    pub fn num_bars(&self) -> u8 {
-        self.num_bars
-    }
-
-    pub fn reserve(&mut self, idx: u8) {
-        self.num_bars = max(self.num_bars, idx + 1);
-        let bit = 1 << idx;
-        assert!(self.used & bit == 0);
-        self.used |= bit;
-    }
-
-    pub fn alloc(&mut self) -> BarRef {
-        let idx = self.used.trailing_ones();
-        assert!(idx < 16);
-        let idx = idx as u8;
-        self.reserve(idx);
-        BarRef::new(idx)
-    }
-
-    pub fn free(&mut self, bar: BarRef) {
-        let bit = 1 << bar.idx();
-        assert!(self.used & bit != 0);
-        self.used &= !bit;
-    }
-}
-
 struct ShaderFromNir<'a> {
     nir: &'a nir_shader,
     info: ShaderInfo,
     cfg: CFGBuilder<u32, BasicBlock>,
     label_alloc: LabelAllocator,
     block_label: HashMap<u32, Label>,
-    bar_alloc: BarAlloc,
-    bar_ref_label: HashMap<u32, (BarRef, Label)>,
+    bar_label: HashMap<u32, Label>,
     fs_out_regs: [SSAValue; 34],
     end_block_id: u32,
     ssa_map: HashMap<u32, Vec<SSAValue>>,
@@ -205,8 +165,7 @@ impl<'a> ShaderFromNir<'a> {
             cfg: CFGBuilder::new(),
             label_alloc: LabelAllocator::new(),
             block_label: HashMap::new(),
-            bar_alloc: BarAlloc::new(),
-            bar_ref_label: HashMap::new(),
+            bar_label: HashMap::new(),
             fs_out_regs: [SSAValue::NONE; 34],
             end_block_id: 0,
             ssa_map: HashMap::new(),
@@ -1614,50 +1573,53 @@ impl<'a> ShaderFromNir<'a> {
                 self.set_dst(&intrin.def, dst);
             }
             nir_intrinsic_bar_break_nv => {
-                let idx = &srcs[0].as_def().index;
-                let (bar, _) = self.bar_ref_label.get(idx).unwrap();
+                let src = self.get_src(&srcs[0]);
+                let bar_in = b.bmov_to_bar(src);
 
-                let brk = b.push_op(OpBreak {
-                    bar: *bar,
+                let bar_out = b.alloc_ssa(RegFile::Bar, 1);
+                b.push_op(OpBreak {
+                    bar_out: bar_out.into(),
+                    bar_in: bar_in.into(),
                     cond: SrcRef::True.into(),
                 });
-                brk.deps.yld = true;
+
+                self.set_dst(&intrin.def, b.bmov_to_gpr(bar_out.into()));
             }
             nir_intrinsic_bar_set_nv => {
                 let label = self.label_alloc.alloc();
-                let bar = self.bar_alloc.alloc();
+                let old = self.bar_label.insert(intrin.def.index, label);
+                assert!(old.is_none());
 
-                let bmov = b.push_op(OpBClear {
-                    dst: bar,
+                let bar_clear = b.alloc_ssa(RegFile::Bar, 1);
+                b.push_op(OpBClear {
+                    dst: bar_clear.into(),
                 });
-                bmov.deps.yld = true;
 
-                let bssy = b.push_op(OpBSSy {
-                    bar: bar,
+                let bar_out = b.alloc_ssa(RegFile::Bar, 1);
+                b.push_op(OpBSSy {
+                    bar_out: bar_out.into(),
+                    bar_in: bar_clear.into(),
                     cond: SrcRef::True.into(),
                     target: label,
                 });
-                bssy.deps.yld = true;
 
-                let old =
-                    self.bar_ref_label.insert(intrin.def.index, (bar, label));
-                assert!(old.is_none());
+                self.set_dst(&intrin.def, b.bmov_to_gpr(bar_out.into()));
             }
             nir_intrinsic_bar_sync_nv => {
-                let idx = &srcs[0].as_def().index;
-                let (bar, label) = self.bar_ref_label.get(idx).unwrap();
+                let src = self.get_src(&srcs[0]);
 
-                let bsync = b.push_op(OpBSync {
-                    bar: *bar,
+                let bar = b.bmov_to_bar(src);
+                b.push_op(OpBSync {
+                    bar: bar.into(),
                     cond: SrcRef::True.into(),
                 });
-                bsync.deps.yld = true;
-
-                self.bar_alloc.free(*bar);
 
-                b.push_op(OpNop {
-                    label: Some(*label),
-                });
+                let bar_set_idx = &srcs[1].as_def().index;
+                if let Some(label) = self.bar_label.get(bar_set_idx) {
+                    b.push_op(OpNop {
+                        label: Some(*label),
+                    });
+                }
             }
             nir_intrinsic_bindless_image_atomic
             | nir_intrinsic_bindless_image_atomic_swap => {
@@ -2126,6 +2088,9 @@ impl<'a> ShaderFromNir<'a> {
                     SCOPE_NONE => (),
                     SCOPE_WORKGROUP => {
                         if self.nir.info.stage() == MESA_SHADER_COMPUTE {
+                            // OpBar needs num_barriers > 0 but, as far as we
+                            // know, it doesn't actually use a barrier.
+                            self.info.num_barriers = 1;
                             b.push_op(OpBar {});
                             b.push_op(OpNop { label: None });
                         }
@@ -2538,27 +2503,24 @@ impl<'a> ShaderFromNir<'a> {
             // memory.  Perhaps this is to ensure that our allocation is
             // actually available and not in use by another thread?
             let label = self.label_alloc.alloc();
-            let bar = self.bar_alloc.alloc();
+            let bar_clear = b.alloc_ssa(RegFile::Bar, 1);
 
-            let bmov = b.push_op(OpBClear {
-                dst: bar,
+            b.push_op(OpBClear {
+                dst: bar_clear.into(),
             });
-            bmov.deps.yld = true;
 
-            let bssy = b.push_op(OpBSSy {
-                bar: bar,
+            let bar = b.alloc_ssa(RegFile::Bar, 1);
+            b.push_op(OpBSSy {
+                bar_out: bar.into(),
+                bar_in: bar_clear.into(),
                 cond: SrcRef::True.into(),
                 target: label,
             });
-            bssy.deps.yld = true;
 
-            let bsync = b.push_op(OpBSync {
-                bar: bar,
+            b.push_op(OpBSync {
+                bar: bar.into(),
                 cond: SrcRef::True.into(),
             });
-            bsync.deps.yld = true;
-
-            self.bar_alloc.free(bar);
 
             b.push_op(OpNop { label: Some(label) });
         }
@@ -2743,15 +2705,6 @@ impl<'a> ShaderFromNir<'a> {
     }
 
     pub fn parse_shader(mut self) -> Shader {
-        if self.nir.info.stage() == MESA_SHADER_COMPUTE
-            && self.nir.info.uses_control_barrier()
-        {
-            // We know OpBar uses a barrier but we don't know which one.  
Assume
-            // it implicitly uses B0 and reserve it so it doesn't stomp any
-            // other barriers
-            self.bar_alloc.reserve(0);
-        }
-
         let mut functions = Vec::new();
         for nf in self.nir.iter_functions() {
             if let Some(nfi) = nf.get_impl() {
@@ -2760,8 +2713,6 @@ impl<'a> ShaderFromNir<'a> {
             }
         }
 
-        self.info.num_barriers = self.bar_alloc.num_bars();
-
         // Tessellation evaluation shaders MUST claim to read gl_TessCoord or
         // the hardware will throw an SPH error.
         match &self.info.stage {
diff --git a/src/nouveau/compiler/nak_ir.rs b/src/nouveau/compiler/nak_ir.rs
index 272f1da4000..ec6350d201a 100644
--- a/src/nouveau/compiler/nak_ir.rs
+++ b/src/nouveau/compiler/nak_ir.rs
@@ -642,28 +642,6 @@ impl fmt::Display for RegRef {
     }
 }
 
-#[derive(Clone, Copy, Eq, Hash, PartialEq)]
-pub struct BarRef {
-    idx: u8,
-}
-
-impl BarRef {
-    pub fn new(idx: u8) -> BarRef {
-        assert!(idx < 16);
-        BarRef { idx: idx }
-    }
-
-    pub fn idx(&self) -> u8 {
-        self.idx
-    }
-}
-
-impl fmt::Display for BarRef {
-    fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
-        write!(f, "B{}", self.idx())
-    }
-}
-
 #[derive(Clone, Copy)]
 pub enum Dst {
     None,
@@ -3633,7 +3611,7 @@ impl_display_for_op!(OpMemBar);
 #[repr(C)]
 #[derive(SrcsAsSlice, DstsAsSlice)]
 pub struct OpBClear {
-    pub dst: BarRef,
+    pub dst: Dst,
 }
 
 impl DisplayOp for OpBClear {
@@ -3665,7 +3643,10 @@ impl_display_for_op!(OpBMov);
 #[repr(C)]
 #[derive(SrcsAsSlice, DstsAsSlice)]
 pub struct OpBreak {
-    pub bar: BarRef,
+    pub bar_out: Dst,
+
+    #[src_type(Bar)]
+    pub bar_in: Src,
 
     #[src_type(Pred)]
     pub cond: Src,
@@ -3673,7 +3654,7 @@ pub struct OpBreak {
 
 impl DisplayOp for OpBreak {
     fn fmt_op(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
-        write!(f, "break {} {}", self.cond, self.bar)
+        write!(f, "break {} {}", self.bar_in, self.cond)
     }
 }
 impl_display_for_op!(OpBreak);
@@ -3681,7 +3662,10 @@ impl_display_for_op!(OpBreak);
 #[repr(C)]
 #[derive(SrcsAsSlice, DstsAsSlice)]
 pub struct OpBSSy {
-    pub bar: BarRef,
+    pub bar_out: Dst,
+
+    #[src_type(Pred)]
+    pub bar_in: Src,
 
     #[src_type(Pred)]
     pub cond: Src,
@@ -3691,7 +3675,7 @@ pub struct OpBSSy {
 
 impl DisplayOp for OpBSSy {
     fn fmt_op(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
-        write!(f, "bssy {} {} {}", self.cond, self.bar, self.target)
+        write!(f, "bssy {} {} {}", self.bar_in, self.cond, self.target)
     }
 }
 impl_display_for_op!(OpBSSy);
@@ -3699,7 +3683,8 @@ impl_display_for_op!(OpBSSy);
 #[repr(C)]
 #[derive(SrcsAsSlice, DstsAsSlice)]
 pub struct OpBSync {
-    pub bar: BarRef,
+    #[src_type(Bar)]
+    pub bar: Src,
 
     #[src_type(Pred)]
     pub cond: Src,
@@ -3707,7 +3692,7 @@ pub struct OpBSync {
 
 impl DisplayOp for OpBSync {
     fn fmt_op(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
-        write!(f, "bsync {} {}", self.cond, self.bar)
+        write!(f, "bsync {} {}", self.bar, self.cond)
     }
 }
 impl_display_for_op!(OpBSync);
@@ -4704,9 +4689,6 @@ impl Instr {
             | Op::MemBar(_)
             | Op::Kill(_)
             | Op::Nop(_)
-            | Op::BClear(_)
-            | Op::Break(_)
-            | Op::BSSy(_)
             | Op::BSync(_)
             | Op::Bra(_)
             | Op::Exit(_)
diff --git a/src/nouveau/compiler/nak_nir_add_barriers.c 
b/src/nouveau/compiler/nak_nir_add_barriers.c
index c8b644ae7f3..b3147313126 100644
--- a/src/nouveau/compiler/nak_nir_add_barriers.c
+++ b/src/nouveau/compiler/nak_nir_add_barriers.c
@@ -10,7 +10,8 @@
 
 struct barrier {
    nir_cf_node *node;
-   nir_def *bar;
+   nir_def *bar_set;
+   nir_def *bar_reg;
 };
 
 struct add_barriers_state {
@@ -30,13 +31,16 @@ add_bar_cf_node(nir_cf_node *node, struct 
add_barriers_state *state)
 
    b->cursor = nir_after_block(before);
    nir_def *bar = nir_bar_set_nv(b);
+   nir_def *bar_reg = nir_decl_reg(b, 1, 32, 0);
+   nir_store_reg(b, bar, bar_reg);
 
    b->cursor = nir_before_block_after_phis(after);
-   nir_bar_sync_nv(b, bar);
+   nir_bar_sync_nv(b, nir_load_reg(b, bar_reg), bar);
 
    struct barrier barrier = {
       .node = node,
-      .bar = bar,
+      .bar_set = bar,
+      .bar_reg = bar_reg,
    };
    util_dynarray_append(&state->barriers, struct barrier, barrier);
 
@@ -72,7 +76,9 @@ break_loop_bars(nir_block *block, struct add_barriers_state 
*state)
       const struct barrier *bar =
          util_dynarray_element(&state->barriers, struct barrier, idx);
       if (bar->node == p) {
-         nir_bar_break_nv(b, bar->bar);
+         nir_def *bar_val = nir_load_reg(b, bar->bar_reg);
+         bar_val = nir_bar_break_nv(b, bar_val);
+         nir_store_reg(b, bar_val, bar->bar_reg);
          idx--;
       }
    }
@@ -281,6 +287,8 @@ nak_nir_add_barriers_impl(nir_function_impl *impl,
       nir_metadata_preserve(impl, nir_metadata_block_index |
                                   nir_metadata_dominance |
                                   nir_metadata_loop_analysis);
+
+      nir_lower_reg_intrinsics_to_ssa_impl(impl);
    } else {
       nir_metadata_preserve(impl, nir_metadata_all);
    }

Reply via email to