Module: Mesa
Branch: main
Commit: 7cd9680554b16e7de07873cd3be428953be9ad07
URL:    
http://cgit.freedesktop.org/mesa/mesa/commit/?id=7cd9680554b16e7de07873cd3be428953be9ad07

Author: Faith Ekstrand <[email protected]>
Date:   Mon Dec  4 09:14:56 2023 -0600

nak: Add back OpBMov with better semantics

It now takes a regular Src and Dst and we handle both GPR -> Bar vs.
Bar -> GPR forms in the emit code.

Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/26463>

---

 src/nouveau/compiler/nak_builder.rs     | 22 +++++++++++++++++
 src/nouveau/compiler/nak_encode_sm70.rs | 43 +++++++++++++++++++++++++++++++++
 src/nouveau/compiler/nak_ir.rs          | 31 +++++++++++++++++++++++-
 3 files changed, 95 insertions(+), 1 deletion(-)

diff --git a/src/nouveau/compiler/nak_builder.rs 
b/src/nouveau/compiler/nak_builder.rs
index 7280f96bf76..f06665703a5 100644
--- a/src/nouveau/compiler/nak_builder.rs
+++ b/src/nouveau/compiler/nak_builder.rs
@@ -354,6 +354,28 @@ pub trait SSABuilder: Builder {
         self.copy_to(dst.into(), src);
         dst
     }
+
+    fn bmov_to_bar(&mut self, src: Src) -> SSARef {
+        assert!(src.src_ref.as_ssa().unwrap().file() == RegFile::GPR);
+        let dst = self.alloc_ssa(RegFile::Bar, 1);
+        self.push_op(OpBMov {
+            dst: dst.into(),
+            src: src,
+            clear: false,
+        });
+        dst
+    }
+
+    fn bmov_to_gpr(&mut self, src: Src) -> SSARef {
+        assert!(src.src_ref.as_ssa().unwrap().file() == RegFile::Bar);
+        let dst = self.alloc_ssa(RegFile::GPR, 1);
+        self.push_op(OpBMov {
+            dst: dst.into(),
+            src: src,
+            clear: false,
+        });
+        dst
+    }
 }
 
 pub struct InstrBuilder {
diff --git a/src/nouveau/compiler/nak_encode_sm70.rs 
b/src/nouveau/compiler/nak_encode_sm70.rs
index f625a27bfb3..f5922845ce6 100644
--- a/src/nouveau/compiler/nak_encode_sm70.rs
+++ b/src/nouveau/compiler/nak_encode_sm70.rs
@@ -51,6 +51,14 @@ fn src_mod_is_bnot(src_mod: SrcMod) -> bool {
     }
 }
 
+fn dst_is_bar(dst: Dst) -> bool {
+    match dst {
+        Dst::None => false,
+        Dst::SSA(ssa) => ssa.file() == RegFile::Bar,
+        Dst::Reg(reg) => reg.file() == RegFile::Bar,
+    }
+}
+
 impl ALUSrc {
     fn from_src_file(src: &Src, file: RegFile) -> ALUSrc {
         match src.src_ref {
@@ -243,6 +251,22 @@ impl SM70Instr {
         }
     }
 
+    fn set_bar_reg(&mut self, range: Range<usize>, reg: RegRef) {
+        assert!(range.len() == 4);
+        assert!(reg.file() == RegFile::Bar);
+        assert!(reg.comps() == 1);
+        self.set_field(range, reg.base_idx());
+    }
+
+    fn set_bar_dst(&mut self, range: Range<usize>, dst: Dst) {
+        self.set_bar_reg(range, *dst.as_reg().unwrap());
+    }
+
+    fn set_bar_src(&mut self, range: Range<usize>, src: Src) {
+        assert!(src.src_mod.is_none());
+        self.set_bar_reg(range, *src.src_ref.as_reg().unwrap());
+    }
+
     fn set_alu_reg(
         &mut self,
         range: Range<usize>,
@@ -1673,6 +1697,24 @@ impl SM70Instr {
         self.set_bit(84, true); // .CLEAR
     }
 
+    fn encode_bmov(&mut self, op: &OpBMov) {
+        if dst_is_bar(op.dst) {
+            self.set_opcode(0x356);
+
+            self.set_bar_dst(24..28, op.dst);
+            self.set_reg_src(32..40, op.src);
+
+            self.set_bit(84, op.clear);
+        } else {
+            self.set_opcode(0x355);
+
+            self.set_dst(op.dst);
+            self.set_bar_src(24..28, op.src);
+
+            self.set_bit(84, op.clear);
+        }
+    }
+
     fn encode_break(&mut self, op: &OpBreak) {
         self.set_opcode(0x942);
         self.set_field(16..20, op.bar.idx());
@@ -1909,6 +1951,7 @@ impl SM70Instr {
             Op::CCtl(op) => si.encode_cctl(&op),
             Op::MemBar(op) => si.encode_membar(&op),
             Op::BClear(op) => si.encode_bclear(&op),
+            Op::BMov(op) => si.encode_bmov(&op),
             Op::Break(op) => si.encode_break(&op),
             Op::BSSy(op) => si.encode_bssy(&op, ip, labels),
             Op::BSync(op) => si.encode_bsync(&op),
diff --git a/src/nouveau/compiler/nak_ir.rs b/src/nouveau/compiler/nak_ir.rs
index 75bc352b2c0..272f1da4000 100644
--- a/src/nouveau/compiler/nak_ir.rs
+++ b/src/nouveau/compiler/nak_ir.rs
@@ -3643,6 +3643,25 @@ impl DisplayOp for OpBClear {
 }
 impl_display_for_op!(OpBClear);
 
+#[repr(C)]
+#[derive(SrcsAsSlice, DstsAsSlice)]
+pub struct OpBMov {
+    pub dst: Dst,
+    pub src: Src,
+    pub clear: bool,
+}
+
+impl DisplayOp for OpBMov {
+    fn fmt_op(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
+        write!(f, "bmov.32")?;
+        if self.clear {
+            write!(f, ".clear")?;
+        }
+        write!(f, " {}", self.src)
+    }
+}
+impl_display_for_op!(OpBMov);
+
 #[repr(C)]
 #[derive(SrcsAsSlice, DstsAsSlice)]
 pub struct OpBreak {
@@ -4326,6 +4345,7 @@ pub enum Op {
     CCtl(OpCCtl),
     MemBar(OpMemBar),
     BClear(OpBClear),
+    BMov(OpBMov),
     Break(OpBreak),
     BSSy(OpBSSy),
     BSync(OpBSync),
@@ -4695,12 +4715,13 @@ impl Instr {
             | Op::FSOut(_)
             | Op::Out(_)
             | Op::OutFinal(_) => false,
+            Op::BMov(op) => !op.clear,
             _ => true,
         }
     }
 
     pub fn has_fixed_latency(&self) -> bool {
-        match self.op {
+        match &self.op {
             // Float ALU
             Op::FAdd(_)
             | Op::FFma(_)
@@ -4768,6 +4789,14 @@ impl Instr {
             Op::Bra(_) | Op::Exit(_) => true,
             Op::WarpSync(_) => false,
 
+            // BMOV: barriers only when using gprs (and only valid for the 
gpr),
+            // no barriers for the others.
+            Op::BMov(op) => match &op.dst {
+                Dst::None => true,
+                Dst::SSA(vec) => vec.file() == RegFile::Bar,
+                Dst::Reg(reg) => reg.file() == RegFile::Bar,
+            },
+
             // Geometry ops
             Op::Out(_) | Op::OutFinal(_) => false,
 

Reply via email to