This removes a "TODO" item in the code, which was hardcoded to work on Ampere and Ada GPUs. Hopper/Blackwell+ have a larger width, so do an early read of boot42, in order to pick the correct value.
Cc: Gary Guo <[email protected]> Signed-off-by: John Hubbard <[email protected]> --- drivers/gpu/nova-core/driver.rs | 33 ++++++++++++++-------------- drivers/gpu/nova-core/gpu.rs | 38 ++++++++++++++++++++++++--------- 2 files changed, 44 insertions(+), 27 deletions(-) diff --git a/drivers/gpu/nova-core/driver.rs b/drivers/gpu/nova-core/driver.rs index 5a4cc047bcfc..1babde79aba8 100644 --- a/drivers/gpu/nova-core/driver.rs +++ b/drivers/gpu/nova-core/driver.rs @@ -5,7 +5,6 @@ device::Core, devres::Devres, dma::Device, - dma::DmaMask, pci, pci::{ Class, @@ -17,7 +16,10 @@ sync::Arc, // }; -use crate::gpu::Gpu; +use crate::gpu::{ + Gpu, + Spec, // +}; #[pin_data] pub(crate) struct NovaCore { @@ -29,14 +31,6 @@ pub(crate) struct NovaCore { const BAR0_SIZE: usize = SZ_16M; -// For now we only support Ampere which can use up to 47-bit DMA addresses. -// -// TODO: Add an abstraction for this to support newer GPUs which may support -// larger DMA addresses. Limiting these GPUs to smaller address widths won't -// have any adverse affects, unless installed on systems which require larger -// DMA addresses. These systems should be quite rare. -const GPU_DMA_BITS: u32 = 47; - pub(crate) type Bar0 = pci::Bar<BAR0_SIZE>; kernel::pci_device_table!( @@ -75,18 +69,23 @@ fn probe(pdev: &pci::Device<Core>, _info: &Self::IdInfo) -> impl PinInit<Self, E pdev.enable_device_mem()?; pdev.set_master(); - // SAFETY: No concurrent DMA allocations or mappings can be made because - // the device is still being probed and therefore isn't being used by - // other threads of execution. - unsafe { pdev.dma_set_mask_and_coherent(DmaMask::new::<GPU_DMA_BITS>())? }; - - let bar = Arc::pin_init( + let devres_bar = Arc::pin_init( pdev.iomap_region_sized::<BAR0_SIZE>(0, c"nova-core/bar0"), GFP_KERNEL, )?; + // Read the GPU spec early to determine the correct DMA address width. + // Hopper/Blackwell+ support 52-bit DMA addresses, earlier architectures use 47-bit. + let spec = Spec::new(pdev.as_ref(), devres_bar.access(pdev.as_ref())?)?; + dev_info!(pdev.as_ref(), "NVIDIA ({})\n", spec); + + // SAFETY: No concurrent DMA allocations or mappings can be made because + // the device is still being probed and therefore isn't being used by + // other threads of execution. + unsafe { pdev.dma_set_mask_and_coherent(spec.chipset().arch().dma_mask())? }; + Ok(try_pin_init!(Self { - gpu <- Gpu::new(pdev, bar.clone(), bar.access(pdev.as_ref())?), + gpu <- Gpu::new(pdev, devres_bar.clone(), devres_bar.access(pdev.as_ref())?, spec), _reg <- auxiliary::Registration::new( pdev.as_ref(), c"nova-drm", diff --git a/drivers/gpu/nova-core/gpu.rs b/drivers/gpu/nova-core/gpu.rs index 289785530ad7..2e7b90b80877 100644 --- a/drivers/gpu/nova-core/gpu.rs +++ b/drivers/gpu/nova-core/gpu.rs @@ -3,6 +3,7 @@ use kernel::{ device, devres::Devres, + dma::DmaMask, fmt, pci, prelude::*, @@ -102,7 +103,7 @@ fn try_from(value: u32) -> Result<Self, Self::Error> { }); impl Chipset { - pub(crate) fn arch(&self) -> Architecture { + pub(crate) const fn arch(&self) -> Architecture { match self { Self::TU102 | Self::TU104 | Self::TU106 | Self::TU117 | Self::TU116 => { Architecture::Turing @@ -155,6 +156,19 @@ pub(crate) enum Architecture { Blackwell = 0x1b, } +impl Architecture { + /// Returns the DMA mask supported by this architecture. + /// + /// Hopper and Blackwell support 52-bit DMA addresses, while earlier architectures + /// (Turing, Ampere, Ada) support 47-bit DMA addresses. + pub(crate) const fn dma_mask(&self) -> DmaMask { + match self { + Self::Turing | Self::Ampere | Self::Ada => DmaMask::new::<47>(), + Self::Hopper | Self::Blackwell => DmaMask::new::<52>(), + } + } +} + impl TryFrom<u8> for Architecture { type Error = Error; @@ -204,7 +218,7 @@ pub(crate) struct Spec { } impl Spec { - fn new(dev: &device::Device, bar: &Bar0) -> Result<Spec> { + pub(crate) fn new(dev: &device::Device, bar: &Bar0) -> Result<Spec> { // Some brief notes about boot0 and boot42, in chronological order: // // NV04 through NV50: @@ -234,6 +248,10 @@ fn new(dev: &device::Device, bar: &Bar0) -> Result<Spec> { dev_err!(dev, "Unsupported chipset: {}\n", boot42); }) } + + pub(crate) fn chipset(&self) -> Chipset { + self.chipset + } } impl TryFrom<regs::NV_PMC_BOOT_42> for Spec { @@ -281,33 +299,33 @@ pub(crate) fn new<'a>( pdev: &'a pci::Device<device::Bound>, devres_bar: Arc<Devres<Bar0>>, bar: &'a Bar0, + spec: Spec, ) -> impl PinInit<Self, Error> + 'a { - try_pin_init!(Self { - spec: Spec::new(pdev.as_ref(), bar).inspect(|spec| { - dev_info!(pdev.as_ref(),"NVIDIA ({})\n", spec); - })?, + let chipset = spec.chipset(); + try_pin_init!(Self { // We must wait for GFW_BOOT completion before doing any significant setup on the GPU. _: { gfw::wait_gfw_boot_completion(bar) .inspect_err(|_| dev_err!(pdev.as_ref(), "GFW boot did not complete\n"))?; }, - sysmem_flush: SysmemFlush::register(pdev.as_ref(), bar, spec.chipset)?, + sysmem_flush: SysmemFlush::register(pdev.as_ref(), bar, chipset)?, gsp_falcon: Falcon::new( pdev.as_ref(), - spec.chipset, + chipset, ) .inspect(|falcon| falcon.clear_swgen0_intr(bar))?, - sec2_falcon: Falcon::new(pdev.as_ref(), spec.chipset)?, + sec2_falcon: Falcon::new(pdev.as_ref(), chipset)?, gsp <- Gsp::new(pdev), - _: { gsp.boot(pdev, bar, spec.chipset, gsp_falcon, sec2_falcon)? }, + _: { gsp.boot(pdev, bar, chipset, gsp_falcon, sec2_falcon)? }, bar: devres_bar, + spec, }) } -- 2.52.0
