Turing and GA100 use programmed I/O (PIO) instead of DMA to upload firmware images into Falcon memory.
A new firmware called the Generic Bootloader (as opposed to the GSP Bootloader) is used to upload FWSEC. Signed-off-by: Timur Tabi <[email protected]> --- drivers/gpu/nova-core/falcon.rs | 149 +++++++++++++++++++++++- drivers/gpu/nova-core/firmware.rs | 4 +- drivers/gpu/nova-core/firmware/fwsec.rs | 142 +++++++++++++++++++++- drivers/gpu/nova-core/gsp/boot.rs | 10 +- 4 files changed, 293 insertions(+), 12 deletions(-) diff --git a/drivers/gpu/nova-core/falcon.rs b/drivers/gpu/nova-core/falcon.rs index 2770d608a2cf..88f65ee7805a 100644 --- a/drivers/gpu/nova-core/falcon.rs +++ b/drivers/gpu/nova-core/falcon.rs @@ -12,14 +12,17 @@ io::poll::read_poll_timeout, prelude::*, sync::aref::ARef, - time::{ - Delta, // - }, + time::Delta, + transmute::AsBytes, // }; use crate::{ dma::DmaObject, driver::Bar0, + firmware::fwsec::{ + BootloaderDmemDescV2, + GenericBootloader, // + }, gpu::Chipset, num::{ FromSafeCast, @@ -406,6 +409,146 @@ pub(crate) fn reset(&self, bar: &Bar0) -> Result { Ok(()) } + + /// See nvkm_falcon_pio_wr - takes a byte array instead of a FalconFirmware + fn pio_wr_bytes( + &self, + bar: &Bar0, + img: &[u8], + mem_base: u16, + target_mem: FalconMem, + port: u8, + tag: u16 + ) { + let port = usize::from(port); + + match target_mem { + FalconMem::ImemSecure | FalconMem::ImemNonSecure => { + regs::NV_PFALCON_FALCON_IMEMC::default() + .set_secure(target_mem == FalconMem::ImemSecure) + .set_aincw(true) + .set_offs(mem_base) + .write(bar, &E::ID, port); + + let mut tag = tag; + for block in img.chunks(256) { + regs::NV_PFALCON_FALCON_IMEMT::default() + .set_tag(tag) + .write(bar, &E::ID, port); + for word in block.chunks(4) { + let w = u32::from_le_bytes(word.try_into().unwrap()); + regs::NV_PFALCON_FALCON_IMEMD::default() + .set_data(w) + .write(bar, &E::ID, port); + } + tag += 1; + } + }, + FalconMem::Dmem => { + regs::NV_PFALCON_FALCON_DMEMC::default() + .set_aincw(true) + .set_offs(mem_base) + .write(bar, &E::ID, port); + + for block in img.chunks(256) { + for word in block.chunks_exact(4) { + regs::NV_PFALCON_FALCON_DMEMD::default() + .set_data(u32::from_le_bytes(word.try_into().unwrap())) + .write(bar, &E::ID, port); + } + } + }, + } + } + + fn pio_wr<F: FalconFirmware<Target = E>>( + &self, + bar: &Bar0, + fw: &F, + target_mem: FalconMem, + load_offsets: &FalconLoadTarget, + port: u8, + tag: u16, + ) -> Result { + let start = usize::from_safe_cast(load_offsets.src_start); + let len = usize::from_safe_cast(load_offsets.len); + + // SAFETY: as_slice() ensures that start+len is within range + let data = unsafe { fw.as_slice(start, len)? }; + + self.pio_wr_bytes(bar, data, u16::try_from(load_offsets.dst_start)?, target_mem, port, tag); + + Ok(()) + } + + /// Perform a PIO copy into `IMEM` and `DMEM` of `fw`, and prepare the falcon to run it. + pub(crate) fn pio_load<F: FalconFirmware<Target = E>>( + &self, + bar: &Bar0, + fw: &F, + gbl: Option<&GenericBootloader> + ) -> Result { + let imem_sec = fw.imem_sec_load_params(); + let imem_ns = fw.imem_ns_load_params().unwrap(); + let dmem = fw.dmem_load_params(); + + regs::NV_PFALCON_FBIF_CTL::read(bar, &E::ID) + .set_allow_phys_no_ctx(true) + .write(bar, &E::ID); + + regs::NV_PFALCON_FALCON_DMACTL::default() + .write(bar, &E::ID); + + // If the Generic Bootloader was passed, then use it to boot FRTS + if let Some(gbl) = gbl { + let dst_start = u16::try_from(0x10000 - gbl.desc.code_size)?; + let data = &gbl.ucode[..usize::from_safe_cast(gbl.desc.code_size)]; + let tag = u16::try_from(gbl.desc.start_tag)?; + + self.pio_wr_bytes(bar, data, dst_start, FalconMem::ImemNonSecure, 0, tag); + + // This structure tells the generic bootloader where to find the FWSEC + // image. + let dmem_desc = BootloaderDmemDescV2 { + reserved: [0; 4], + signature: [0; 4], + ctx_dma: 4, // FALCON_DMAIDX_PHYS_SYS_NCOH + code_dma_base: fw.dma_handle(), + non_sec_code_off: imem_ns.dst_start, + non_sec_code_size: imem_ns.len, + sec_code_off: imem_sec.dst_start, + sec_code_size: imem_sec.len, + code_entry_point: 0, + data_dma_base: fw.dma_handle() + u64::from(dmem.src_start), + data_size: dmem.len, + argc: 0, + argv: 0, + }; + + regs::NV_PFALCON_FBIF_TRANSCFG::update(bar, &E::ID, 4, |v| { + v.set_target(FalconFbifTarget::CoherentSysmem) + .set_mem_type(FalconFbifMemType::Physical) + }); + + self.pio_wr_bytes(bar, dmem_desc.as_bytes(), 0, FalconMem::Dmem, 0, 0); + } else { + self.pio_wr(bar, fw, FalconMem::ImemNonSecure, &imem_ns, 0, + u16::try_from(imem_ns.dst_start >> 8)?)?; + self.pio_wr(bar, fw, FalconMem::ImemSecure, &imem_sec, 0, + u16::try_from(imem_sec.dst_start >> 8)?)?; + self.pio_wr(bar, fw, FalconMem::Dmem, &dmem, 0, 0)?; + } + + self.hal.program_brom(self, bar, &fw.brom_params())?; + + // Set `BootVec` to start of non-secure code. + regs::NV_PFALCON_FALCON_BOOTVEC::default() + .set_value(fw.boot_addr()) + .write(bar, &E::ID); + + Ok(()) + } + /// Perform a DMA write according to `load_offsets` from `dma_handle` into the falcon's /// `target_mem`. /// diff --git a/drivers/gpu/nova-core/firmware.rs b/drivers/gpu/nova-core/firmware.rs index 169b07ca340a..3008d18f9313 100644 --- a/drivers/gpu/nova-core/firmware.rs +++ b/drivers/gpu/nova-core/firmware.rs @@ -31,7 +31,7 @@ pub(crate) const FIRMWARE_VERSION: &str = "570.144"; /// Requests the GPU firmware `name` suitable for `chipset`, with version `ver`. -fn request_firmware( +pub(crate) fn request_firmware( dev: &device::Device, chipset: gpu::Chipset, name: &str, @@ -258,7 +258,7 @@ fn no_patch_signature(self) -> FirmwareDmaObject<F, Signed> { /// Header common to most firmware files. #[repr(C)] #[derive(Debug, Clone)] -struct BinHdr { +pub(crate) struct BinHdr { /// Magic number, must be `0x10de`. bin_magic: u32, /// Version of the header. diff --git a/drivers/gpu/nova-core/firmware/fwsec.rs b/drivers/gpu/nova-core/firmware/fwsec.rs index 36ff8ed51c23..159aedd221e8 100644 --- a/drivers/gpu/nova-core/firmware/fwsec.rs +++ b/drivers/gpu/nova-core/firmware/fwsec.rs @@ -40,12 +40,15 @@ FalconLoadTarget, // }, firmware::{ + FIRMWARE_VERSION, + BinHdr, FalconUCodeDesc, FirmwareDmaObject, FirmwareSignature, Signed, Unsigned, // }, + gpu::Chipset, num::{ FromSafeCast, IntoSafeCast, // @@ -213,6 +216,72 @@ unsafe fn transmute_mut<T: Sized + FromBytes + AsBytes>( T::from_bytes_mut(unsafe { fw.as_slice_mut(offset, size_of::<T>())? }).ok_or(EINVAL) } +/// Descriptor used by RM to figure out the requirements of the boot loader. +#[repr(C)] +#[derive(Debug, Clone)] +pub(crate) struct BootloaderDesc { + /// Starting tag of bootloader. + pub start_tag: u32, + /// DMEM offset where [`BootloaderDmemDescV2`] is to be loaded. + pub dmem_load_off: u32, + /// Offset of code section in the image. + pub code_off: u32, + /// Size of code section in the image. + pub code_size: u32, + /// Offset of data section in the image. + pub data_off: u32, + /// Size of data section in the image. + pub data_size: u32, +} +// SAFETY: any byte sequence is valid for this struct. +unsafe impl FromBytes for BootloaderDesc {} +// SAFETY: This struct doesn't contain uninitialized bytes and doesn't have interior mutability. +unsafe impl AsBytes for BootloaderDesc {} + +/// Structure used by the boot-loader to load the rest of the code. +/// +/// This has to be filled by the GPU driver and copied into DMEM at offset +/// [`BootloaderDesc.dmem_load_off`]. +#[repr(C, packed)] +#[derive(Debug, Clone)] +pub(crate) struct BootloaderDmemDescV2 { + /// Reserved, should always be first element. + pub reserved: [u32; 4], + /// 16B signature for secure code, 0s if no secure code. + pub signature: [u32; 4], + /// DMA context used by the bootloader while loading code/data. + pub ctx_dma: u32, + /// 256B-aligned physical FB address where code is located. + pub code_dma_base: u64, + /// Offset from `code_dma_base` where the non-secure code is located (must be multiple of 256). + pub non_sec_code_off: u32, + /// Size of the non-secure code part. + pub non_sec_code_size: u32, + /// Offset from `code_dma_base` where the secure code is located (must be multiple of 256). + pub sec_code_off: u32, + /// Size of the secure code part. + pub sec_code_size: u32, + /// Code entry point invoked by the bootloader after code is loaded. + pub code_entry_point: u32, + /// 256B-aligned physical FB address where data is located. + pub data_dma_base: u64, + /// Size of data block (should be multiple of 256B). + pub data_size: u32, + /// Arguments to be passed to the target firmware being loaded. + pub argc: u32, + /// Number of arguments to be passed to the target firmware being loaded. + pub argv: u32, +} +// SAFETY: any byte sequence is valid for this struct. +unsafe impl FromBytes for BootloaderDmemDescV2 {} +// SAFETY: This struct doesn't contain uninitialized bytes and doesn't have interior mutability. +unsafe impl AsBytes for BootloaderDmemDescV2 {} + +pub(crate) struct GenericBootloader { + pub desc: BootloaderDesc, + pub ucode: Vec<u8, kernel::alloc::allocator::Kmalloc>, +} + /// The FWSEC microcode, extracted from the BIOS and to be run on the GSP falcon. /// /// It is responsible for e.g. carving out the WPR2 region as the first step of the GSP bootflow. @@ -221,6 +290,8 @@ pub(crate) struct FwsecFirmware { desc: FalconUCodeDesc, /// GPU-accessible DMA object containing the firmware. ucode: FirmwareDmaObject<Self, Signed>, + /// Generic bootloader + gen_bootloader: Option<GenericBootloader>, } impl FalconLoadParams for FwsecFirmware { @@ -275,7 +346,19 @@ fn brom_params(&self) -> FalconBromParams { } fn boot_addr(&self) -> u32 { - 0 + match &self.desc { + FalconUCodeDesc::V2(_v2) => { + // On V2 platforms, the boot address is extracted from the + // generic bootloader, because the gbl is what actually copies + // FWSEC into memory, so that is what needs to be booted. + if let Some(ref gbl) = self.gen_bootloader { + gbl.desc.start_tag << 8 + } else { + 0 + } + }, + FalconUCodeDesc::V3(_v3) => 0, + } } } @@ -376,6 +459,7 @@ impl FwsecFirmware { /// command. pub(crate) fn new( dev: &Device<device::Bound>, + chipset: Chipset, falcon: &Falcon<Gsp>, bar: &Bar0, bios: &Vbios, @@ -432,9 +516,49 @@ pub(crate) fn new( ucode_dma.no_patch_signature() }; + // The Generic Bootloader exists only on Turing and GA100. To avoid a bogus + // console error message on other platforms, only try to load it if it's + // supposed to be there. + let gbl_fw = if chipset < Chipset::GA102 { + super::request_firmware(dev, chipset, "gen_bootloader", FIRMWARE_VERSION) + } else { + Err(ENOENT) + }; + + let gbl = match gbl_fw { + Ok(fw) => { + let hdr = fw.data() + .get(0..size_of::<BinHdr>()) + .and_then(BinHdr::from_bytes_copy) + .ok_or(EINVAL)?; + + let desc_offset = usize::from_safe_cast(hdr.header_offset); + let desc = fw.data() + .get(desc_offset..desc_offset + size_of::<BootloaderDesc>()) + .and_then(BootloaderDesc::from_bytes_copy) + .ok_or(EINVAL)?; + + let ucode_start = usize::from_safe_cast(hdr.data_offset); + let ucode_size = usize::from_safe_cast(hdr.data_size); + let ucode_data = fw.data() + .get(ucode_start..ucode_start + ucode_size) + .ok_or(EINVAL)?; + + let mut ucode = KVec::new(); + ucode.extend_from_slice(ucode_data, GFP_KERNEL)?; + + Some(GenericBootloader { + desc, + ucode, + }) + }, + Err(_) => None, + }; + Ok(FwsecFirmware { - desc: desc, + desc, ucode: ucode_signed, + gen_bootloader: gbl, }) } @@ -449,9 +573,17 @@ pub(crate) fn run( falcon .reset(bar) .inspect_err(|e| dev_err!(dev, "Failed to reset GSP falcon: {:?}\n", e))?; - falcon - .dma_load(bar, self) - .inspect_err(|e| dev_err!(dev, "Failed to load FWSEC firmware: {:?}\n", e))?; + + // If the Generic Bootloader was found, then upload it via PIO , otherwise + if let Some(ref gbl) = self.gen_bootloader { + falcon + .pio_load(bar, self, Some(gbl)) + .inspect_err(|e| dev_err!(dev, "Failed to load FWSEC firmware: {:?}\n", e))?; + } else { + falcon + .dma_load(bar, self) + .inspect_err(|e| dev_err!(dev, "Failed to load FWSEC firmware: {:?}\n", e))?; + } let (mbox0, _) = falcon .boot(bar, Some(0), None) .inspect_err(|e| dev_err!(dev, "Failed to boot FWSEC firmware: {:?}\n", e))?; diff --git a/drivers/gpu/nova-core/gsp/boot.rs b/drivers/gpu/nova-core/gsp/boot.rs index 54937606b5b0..fda01afda9ed 100644 --- a/drivers/gpu/nova-core/gsp/boot.rs +++ b/drivers/gpu/nova-core/gsp/boot.rs @@ -48,6 +48,7 @@ impl super::Gsp { /// created the WPR2 region. fn run_fwsec_frts( dev: &device::Device<device::Bound>, + chipset: Chipset, falcon: &Falcon<Gsp>, bar: &Bar0, bios: &Vbios, @@ -65,6 +66,7 @@ fn run_fwsec_frts( let fwsec_frts = FwsecFirmware::new( dev, + chipset, falcon, bar, bios, @@ -147,7 +149,7 @@ pub(crate) fn boot( let fb_layout = FbLayout::new(chipset, bar, &gsp_fw)?; dev_dbg!(dev, "{:#x?}\n", fb_layout); - Self::run_fwsec_frts(dev, gsp_falcon, bar, &bios, &fb_layout)?; + Self::run_fwsec_frts(dev, chipset, gsp_falcon, bar, &bios, &fb_layout)?; let booter_loader = BooterFirmware::new( dev, @@ -186,7 +188,11 @@ pub(crate) fn boot( ); sec2_falcon.reset(bar)?; - sec2_falcon.dma_load(bar, &booter_loader)?; + if sec2_falcon.supports_dma() { + sec2_falcon.dma_load(bar, &booter_loader)?; + } else { + sec2_falcon.pio_load(bar, &booter_loader, None)?; + } let wpr_handle = wpr_meta.dma_handle(); let (mbox0, mbox1) = sec2_falcon.boot( bar, -- 2.52.0
