On Tue, Mar 31, 2026 at 05:20:34PM -0400, Joel Fernandes wrote:
> Add TLB (Translation Lookaside Buffer) flush support for GPU MMU.
> 
> After modifying page table entries, the GPU's TLB must be invalidated
> to ensure the new mappings take effect. The Tlb struct provides flush
> functionality through BAR0 registers.
> 
> The flush operation writes the page directory base address and triggers
> an invalidation, polling for completion with a 2 second timeout matching
> the Nouveau driver.
> 
> Cc: Nikola Djukic <[email protected]>
> Signed-off-by: Joel Fernandes <[email protected]>
> ---
>  drivers/gpu/nova-core/mm.rs     |  1 +
>  drivers/gpu/nova-core/mm/tlb.rs | 95 +++++++++++++++++++++++++++++++++
>  drivers/gpu/nova-core/regs.rs   | 42 +++++++++++++++
>  3 files changed, 138 insertions(+)
>  create mode 100644 drivers/gpu/nova-core/mm/tlb.rs
> 
> diff --git a/drivers/gpu/nova-core/mm.rs b/drivers/gpu/nova-core/mm.rs
> index 8f3089a5fa88..cfe9cbe11d57 100644
> --- a/drivers/gpu/nova-core/mm.rs
> +++ b/drivers/gpu/nova-core/mm.rs
> @@ -5,6 +5,7 @@
>  #![expect(dead_code)]
>  
>  pub(crate) mod pramin;
> +pub(crate) mod tlb;
>  
>  use kernel::sizes::SZ_4K;
>  
> diff --git a/drivers/gpu/nova-core/mm/tlb.rs b/drivers/gpu/nova-core/mm/tlb.rs
> new file mode 100644
> index 000000000000..cd3cbcf4c739
> --- /dev/null
> +++ b/drivers/gpu/nova-core/mm/tlb.rs
> @@ -0,0 +1,95 @@
> +// SPDX-License-Identifier: GPL-2.0
> +
> +//! TLB (Translation Lookaside Buffer) flush support for GPU MMU.
> +//!
> +//! After modifying page table entries, the GPU's TLB must be flushed to
> +//! ensure the new mappings take effect. This module provides TLB flush
> +//! functionality for virtual memory managers.
> +//!
> +//! # Example
> +//!
> +//! ```ignore
> +//! use crate::mm::tlb::Tlb;
> +//!
> +//! fn page_table_update(tlb: &Tlb, pdb_addr: VramAddress) -> Result<()> {
> +//!     // ... modify page tables ...
> +//!
> +//!     // Flush TLB to make changes visible (polls for completion).
> +//!     tlb.flush(pdb_addr)?;
> +//!
> +//!     Ok(())
> +//! }
> +//! ```
> +
> +use kernel::{
> +    devres::Devres,
> +    io::poll::read_poll_timeout,
> +    io::Io,
> +    new_mutex,
> +    prelude::*,
> +    sync::{
> +        Arc,
> +        Mutex, //
> +    },
> +    time::Delta, //
> +};
> +
> +use crate::{
> +    driver::Bar0,
> +    mm::VramAddress,
> +    regs, //
> +};
> +
> +/// TLB manager for GPU translation buffer operations.
> +#[pin_data]
> +pub(crate) struct Tlb {
> +    bar: Arc<Devres<Bar0>>,
> +    /// TLB flush serialization lock: This lock is acquired during the
> +    /// DMA fence signalling critical path. It must NEVER be held across any
> +    /// reclaimable CPU memory allocations because the memory reclaim path 
> can
> +    /// call `dma_fence_wait()`, which would deadlock with this lock held.
> +    #[pin]
> +    lock: Mutex<()>,
> +}
> +
> +impl Tlb {
> +    /// Create a new TLB manager.
> +    pub(super) fn new(bar: Arc<Devres<Bar0>>) -> impl PinInit<Self> {
> +        pin_init!(Self {
> +            bar,
> +            lock <- new_mutex!((), "tlb_flush"),
> +        })
> +    }
> +
> +    /// Flush the GPU TLB for a specific page directory base.
> +    ///
> +    /// This invalidates all TLB entries associated with the given PDB 
> address.
> +    /// Must be called after modifying page table entries to ensure the GPU 
> sees
> +    /// the updated mappings.
> +    pub(crate) fn flush(&self, pdb_addr: VramAddress) -> Result {

This landed on my list randomly, so I took a look.

Wouldn’t you want to virtualize the invalidation based on your device?

For example, what if you need to register interface changes on future hardware?

Or, if you’re a VF, can you even do MMIO?

I’d relayer this.

Matt

> +        let _guard = self.lock.lock();
> +
> +        let bar = self.bar.try_access().ok_or(ENODEV)?;
> +
> +        // Write PDB address.
> +        
> bar.write_reg(regs::NV_TLB_FLUSH_PDB_LO::from_pdb_addr(pdb_addr.raw_u64()));
> +        
> bar.write_reg(regs::NV_TLB_FLUSH_PDB_HI::from_pdb_addr(pdb_addr.raw_u64()));
> +
> +        // Trigger flush: invalidate all pages and enable.
> +        bar.write_reg(
> +            regs::NV_TLB_FLUSH_CTRL::zeroed()
> +                .with_page_all(true)
> +                .with_enable(true),
> +        );
> +
> +        // Poll for completion - enable bit clears when flush is done.
> +        read_poll_timeout(
> +            || Ok(bar.read(regs::NV_TLB_FLUSH_CTRL)),
> +            |ctrl: &regs::NV_TLB_FLUSH_CTRL| !ctrl.enable(),
> +            Delta::ZERO,
> +            Delta::from_secs(2),
> +        )?;
> +
> +        Ok(())
> +    }
> +}
> diff --git a/drivers/gpu/nova-core/regs.rs b/drivers/gpu/nova-core/regs.rs
> index a3ca02345e20..5e3f5933a55c 100644
> --- a/drivers/gpu/nova-core/regs.rs
> +++ b/drivers/gpu/nova-core/regs.rs
> @@ -548,3 +548,45 @@ pub(crate) mod ga100 {
>          }
>      }
>  }
> +
> +// MMU TLB
> +
> +register! {
> +    /// TLB flush register: PDB address bits [39:8].
> +    pub(crate) NV_TLB_FLUSH_PDB_LO(u32) @ 0x00b830a0 {
> +        /// PDB address bits [39:8].
> +        31:0    pdb_lo => u32;
> +    }
> +
> +    /// TLB flush register: PDB address bits [47:40].
> +    pub(crate) NV_TLB_FLUSH_PDB_HI(u32) @ 0x00b830a4 {
> +        /// PDB address bits [47:40].
> +        7:0     pdb_hi => u8;
> +    }
> +
> +    /// TLB flush control register.
> +    pub(crate) NV_TLB_FLUSH_CTRL(u32) @ 0x00b830b0 {
> +        /// Invalidate all pages.
> +        0:0     page_all => bool;
> +        /// Enable/trigger flush (clears when flush completes).
> +        31:31   enable => bool;
> +    }
> +}
> +
> +impl NV_TLB_FLUSH_PDB_LO {
> +    /// Create a register value from a PDB address.
> +    ///
> +    /// Extracts bits [39:8] of the address and shifts it right by 8 bits.
> +    pub(crate) fn from_pdb_addr(addr: u64) -> Self {
> +        Self::zeroed().with_pdb_lo(((addr >> 8) & 0xFFFF_FFFF) as u32)
> +    }
> +}
> +
> +impl NV_TLB_FLUSH_PDB_HI {
> +    /// Create a register value from a PDB address.
> +    ///
> +    /// Extracts bits [47:40] of the address and shifts it right by 40 bits.
> +    pub(crate) fn from_pdb_addr(addr: u64) -> Self {
> +        Self::zeroed().with_pdb_hi(((addr >> 40) & 0xFF) as u8)
> +    }
> +}
> -- 
> 2.34.1
> 

Reply via email to