This removes a "TODO" item in the code, which was hardcoded to work on Ampere and Ada GPUs. Hopper/Blackwell+ have a larger width, so do an early read of boot42, in order to pick the correct value.
Signed-off-by: John Hubbard <[email protected]> --- drivers/gpu/nova-core/driver.rs | 33 +++++++++++++++++---------------- drivers/gpu/nova-core/gpu.rs | 29 ++++++++++++++++++++++++++++- 2 files changed, 45 insertions(+), 17 deletions(-) diff --git a/drivers/gpu/nova-core/driver.rs b/drivers/gpu/nova-core/driver.rs index d91bbc50cde7..3179a4d47af4 100644 --- a/drivers/gpu/nova-core/driver.rs +++ b/drivers/gpu/nova-core/driver.rs @@ -4,8 +4,10 @@ auxiliary, c_str, device::Core, - dma::Device, - dma::DmaMask, + dma::{ + Device, + DmaMask, // + }, pci, pci::{ Class, @@ -17,7 +19,10 @@ sync::Arc, // }; -use crate::gpu::Gpu; +use crate::gpu::{ + read_architecture, + Gpu, // +}; #[pin_data] pub(crate) struct NovaCore { @@ -28,14 +33,6 @@ pub(crate) struct NovaCore { const BAR0_SIZE: usize = SZ_16M; -// For now we only support Ampere which can use up to 47-bit DMA addresses. -// -// TODO: Add an abstraction for this to support newer GPUs which may support -// larger DMA addresses. Limiting these GPUs to smaller address widths won't -// have any adverse affects, unless installed on systems which require larger -// DMA addresses. These systems should be quite rare. -const GPU_DMA_BITS: u32 = 47; - pub(crate) type Bar0 = pci::Bar<BAR0_SIZE>; kernel::pci_device_table!( @@ -73,11 +70,6 @@ fn probe(pdev: &pci::Device<Core>, _info: &Self::IdInfo) -> Result<Pin<KBox<Self pdev.enable_device_mem()?; pdev.set_master(); - // SAFETY: No concurrent DMA allocations or mappings can be made because - // the device is still being probed and therefore isn't being used by - // other threads of execution. - unsafe { pdev.dma_set_mask_and_coherent(DmaMask::new::<GPU_DMA_BITS>())? }; - let devres_bar = Arc::pin_init( pdev.iomap_region_sized::<BAR0_SIZE>(0, c_str!("nova-core/bar0")), GFP_KERNEL, @@ -88,6 +80,15 @@ fn probe(pdev: &pci::Device<Core>, _info: &Self::IdInfo) -> Result<Pin<KBox<Self let bar_clone = Arc::clone(&devres_bar); let bar = bar_clone.access(pdev.as_ref())?; + // Read the GPU architecture early to determine the correct DMA address width. + // Hopper/Blackwell+ support 52-bit DMA addresses, earlier architectures use 47-bit. + let arch = read_architecture(bar)?; + + // SAFETY: No concurrent DMA allocations or mappings can be made because + // the device is still being probed and therefore isn't being used by + // other threads of execution. + unsafe { pdev.dma_set_mask_and_coherent(DmaMask::try_new(arch.dma_addr_bits())?)? }; + let this = KBox::pin_init( try_pin_init!(Self { gpu <- Gpu::new(pdev, devres_bar, bar), diff --git a/drivers/gpu/nova-core/gpu.rs b/drivers/gpu/nova-core/gpu.rs index c21ce91924f5..624bbc2a54e8 100644 --- a/drivers/gpu/nova-core/gpu.rs +++ b/drivers/gpu/nova-core/gpu.rs @@ -102,7 +102,7 @@ fn try_from(value: u32) -> Result<Self, Self::Error> { }); impl Chipset { - pub(crate) fn arch(&self) -> Architecture { + pub(crate) const fn arch(&self) -> Architecture { match self { Self::TU102 | Self::TU104 | Self::TU106 | Self::TU117 | Self::TU116 => { Architecture::Turing @@ -155,6 +155,19 @@ pub(crate) enum Architecture { Blackwell = 0x1b, } +impl Architecture { + /// Returns the number of DMA address bits supported by this architecture. + /// + /// Hopper and Blackwell support 52-bit DMA addresses, while earlier architectures + /// (Turing, Ampere, Ada) support 47-bit DMA addresses. + pub(crate) const fn dma_addr_bits(&self) -> u32 { + match self { + Self::Turing | Self::Ampere | Self::Ada => 47, + Self::Hopper | Self::Blackwell => 52, + } + } +} + impl TryFrom<u8> for Architecture { type Error = Error; @@ -203,6 +216,20 @@ pub(crate) struct Spec { revision: Revision, } +/// Reads the GPU architecture from BAR0 registers. +/// +/// This is a lightweight check used early in probe to determine the correct DMA address width +/// before the full [`Spec`] is constructed. +pub(crate) fn read_architecture(bar: &Bar0) -> Result<Architecture> { + let boot0 = regs::NV_PMC_BOOT_0::read(bar); + + if boot0.is_older_than_fermi() { + return Err(ENODEV); + } + + regs::NV_PMC_BOOT_42::read(bar).architecture() +} + impl Spec { fn new(dev: &device::Device, bar: &Bar0) -> Result<Spec> { // Some brief notes about boot0 and boot42, in chronological order: -- 2.52.0
