Turing and GA100 use programmed I/O (PIO) instead of DMA to upload firmware images into Falcon memory.
A new firmware called the Generic Bootloader (as opposed to the GSP Bootloader) is used to upload FWSEC. Signed-off-by: Timur Tabi <[email protected]> --- drivers/gpu/nova-core/falcon.rs | 183 ++++++++++++++++++++++++ drivers/gpu/nova-core/firmware/fwsec.rs | 129 ++++++++++++++++- drivers/gpu/nova-core/gsp/boot.rs | 6 +- drivers/gpu/nova-core/regs.rs | 30 ++++ 4 files changed, 344 insertions(+), 4 deletions(-) diff --git a/drivers/gpu/nova-core/falcon.rs b/drivers/gpu/nova-core/falcon.rs index d779fcda0e2a..ccb5390ae9c2 100644 --- a/drivers/gpu/nova-core/falcon.rs +++ b/drivers/gpu/nova-core/falcon.rs @@ -18,11 +18,17 @@ time::{ Delta, // }, + transmute::AsBytes, // }; use crate::{ dma::DmaObject, driver::Bar0, + falcon::hal::LoadMethod, + firmware::fwsec::{ + BootloaderDmemDescV2, + GenericBootloader, // + }, gpu::Chipset, num::{ FromSafeCast, @@ -409,6 +415,170 @@ pub(crate) fn reset(&self, bar: &Bar0) -> Result { Ok(()) } + /// Write a slice to Falcon memory using programmed I/O (PIO). + /// + /// Writes `img` to the specified `target_mem` (IMEM or DMEM) starting at `mem_base`. + /// For IMEM writes, tags are set for each 256-byte block starting from `start_tag`. + /// For DMEM, start_tag is ignored. + /// + /// Returns `EINVAL` if `img.len()` is not a multiple of 4. + fn pio_wr_slice( + &self, + bar: &Bar0, + img: &[u8], + mem_base: u16, + target_mem: FalconMem, + start_tag: u16, + ) -> Result { + // Rejecting misaligned images here allows us to avoid checking + // inside the loops. + if img.len() % 4 != 0 { + return Err(EINVAL); + } + + // NV_PFALCON_FALCON_IMEMC supports up to four ports, + // but we only ever use one, so just hard-code it. + const PORT: usize = 0; + + match target_mem { + FalconMem::ImemSecure | FalconMem::ImemNonSecure => { + regs::NV_PFALCON_FALCON_IMEMC::default() + .set_secure(target_mem == FalconMem::ImemSecure) + .set_aincw(true) + .set_offs(mem_base) + .write(bar, &E::ID, PORT); + + for (n, block) in img.chunks(256).enumerate() { + let n = u16::try_from(n)?; + let tag: u16 = start_tag.checked_add(n).ok_or(ERANGE)?; + regs::NV_PFALCON_FALCON_IMEMT::default() + .set_tag(tag) + .write(bar, &E::ID, PORT); + for word in block.chunks_exact(4) { + let w = [word[0], word[1], word[2], word[3]]; + regs::NV_PFALCON_FALCON_IMEMD::default() + .set_data(u32::from_le_bytes(w)) + .write(bar, &E::ID, PORT); + } + } + } + FalconMem::Dmem => { + regs::NV_PFALCON_FALCON_DMEMC::default() + .set_aincw(true) + .set_offs(mem_base) + .write(bar, &E::ID, PORT); + + for word in img.chunks_exact(4) { + let w = [word[0], word[1], word[2], word[3]]; + regs::NV_PFALCON_FALCON_DMEMD::default() + .set_data(u32::from_le_bytes(w)) + .write(bar, &E::ID, PORT); + } + } + } + + Ok(()) + } + + /// Perform a PIO write of a firmware section to falcon memory. + /// + /// Extracts the data slice specified by `load_offsets` from `fw` and writes it to + /// `target_mem` using the given port and tag. + fn pio_wr<F: FalconFirmware<Target = E>>( + &self, + bar: &Bar0, + fw: &F, + target_mem: FalconMem, + load_offsets: &FalconLoadTarget, + start_tag: u16, + ) -> Result { + let start = usize::from_safe_cast(load_offsets.src_start); + let len = usize::from_safe_cast(load_offsets.len); + let mem_base = u16::try_from(load_offsets.dst_start)?; + + // SAFETY: we are the only user of the firmware image at this stage + let data = unsafe { fw.as_slice(start, len).map_err(|_| EINVAL)? }; + + self.pio_wr_slice(bar, data, mem_base, target_mem, start_tag) + } + + /// Perform a PIO copy into `IMEM` and `DMEM` of `fw`, and prepare the falcon to run it. + pub(crate) fn pio_load<F: FalconFirmware<Target = E>>( + &self, + bar: &Bar0, + fw: &F, + gbl: Option<&GenericBootloader>, + ) -> Result { + let imem_sec = fw.imem_sec_load_params(); + let imem_ns = fw.imem_ns_load_params().ok_or(EINVAL)?; + let dmem = fw.dmem_load_params(); + + regs::NV_PFALCON_FBIF_CTL::read(bar, &E::ID) + .set_allow_phys_no_ctx(true) + .write(bar, &E::ID); + + regs::NV_PFALCON_FALCON_DMACTL::default().write(bar, &E::ID); + + // If the Generic Bootloader was passed, then use it to boot FRTS + if let Some(gbl) = gbl { + let dst_start = u16::try_from(0x10000 - gbl.desc.code_size)?; + let data = &gbl.ucode[..usize::from_safe_cast(gbl.desc.code_size)]; + let tag = u16::try_from(gbl.desc.start_tag)?; + + self.pio_wr_slice(bar, data, dst_start, FalconMem::ImemNonSecure, tag)?; + + // This structure tells the generic bootloader where to find the FWSEC + // image. + let dmem_desc = BootloaderDmemDescV2 { + reserved: [0; 4], + signature: [0; 4], + ctx_dma: 4, // FALCON_DMAIDX_PHYS_SYS_NCOH + code_dma_base: fw.dma_handle(), + non_sec_code_off: imem_ns.dst_start, + non_sec_code_size: imem_ns.len, + sec_code_off: imem_sec.dst_start, + sec_code_size: imem_sec.len, + code_entry_point: 0, + data_dma_base: fw.dma_handle() + u64::from(dmem.src_start), + data_size: dmem.len, + argc: 0, + argv: 0, + }; + + regs::NV_PFALCON_FBIF_TRANSCFG::update(bar, &E::ID, 4, |v| { + v.set_target(FalconFbifTarget::CoherentSysmem) + .set_mem_type(FalconFbifMemType::Physical) + }); + + self.pio_wr_slice(bar, dmem_desc.as_bytes(), 0, FalconMem::Dmem, 0)?; + } else { + self.pio_wr( + bar, + fw, + FalconMem::ImemNonSecure, + &imem_ns, + u16::try_from(imem_ns.dst_start >> 8)?, + )?; + self.pio_wr( + bar, + fw, + FalconMem::ImemSecure, + &imem_sec, + u16::try_from(imem_sec.dst_start >> 8)?, + )?; + self.pio_wr(bar, fw, FalconMem::Dmem, &dmem, 0)?; + } + + self.hal.program_brom(self, bar, &fw.brom_params())?; + + // Set `BootVec` to start of non-secure code. + regs::NV_PFALCON_FALCON_BOOTVEC::default() + .set_value(fw.boot_addr()) + .write(bar, &E::ID); + + Ok(()) + } + /// Perform a DMA write according to `load_offsets` from `dma_handle` into the falcon's /// `target_mem`. /// @@ -637,6 +807,19 @@ pub(crate) fn is_riscv_active(&self, bar: &Bar0) -> bool { self.hal.is_riscv_active(bar) } + // Load a firmware image into Falcon memory + pub(crate) fn load<F: FalconFirmware<Target = E>>( + &self, + bar: &Bar0, + fw: &F, + gbl: Option<&GenericBootloader>, + ) -> Result { + match self.hal.load_method() { + LoadMethod::Pio => self.pio_load(bar, fw, gbl), + LoadMethod::Dma => self.dma_load(bar, fw), + } + } + /// Write the application version to the OS register. pub(crate) fn write_os_version(&self, bar: &Bar0, app_version: u32) { regs::NV_PFALCON_FALCON_OS::default() diff --git a/drivers/gpu/nova-core/firmware/fwsec.rs b/drivers/gpu/nova-core/firmware/fwsec.rs index 89dc4526041b..762674ca5087 100644 --- a/drivers/gpu/nova-core/firmware/fwsec.rs +++ b/drivers/gpu/nova-core/firmware/fwsec.rs @@ -40,12 +40,15 @@ FalconLoadTarget, // }, firmware::{ + BinHdr, FalconUCodeDesc, FirmwareDmaObject, FirmwareSignature, Signed, Unsigned, // + FIRMWARE_VERSION, }, + gpu::Chipset, num::{ FromSafeCast, IntoSafeCast, // @@ -213,6 +216,68 @@ unsafe fn transmute_mut<T: Sized + FromBytes + AsBytes>( T::from_bytes_mut(unsafe { fw.as_slice_mut(offset, size_of::<T>())? }).ok_or(EINVAL) } +/// Descriptor used by RM to figure out the requirements of the boot loader. +#[repr(C)] +#[derive(Debug, Clone)] +pub(crate) struct BootloaderDesc { + /// Starting tag of bootloader. + pub start_tag: u32, + /// DMEM offset where [`BootloaderDmemDescV2`] is to be loaded. + pub dmem_load_off: u32, + /// Offset of code section in the image. + pub code_off: u32, + /// Size of code section in the image. + pub code_size: u32, + /// Offset of data section in the image. + pub data_off: u32, + /// Size of data section in the image. + pub data_size: u32, +} +// SAFETY: any byte sequence is valid for this struct. +unsafe impl FromBytes for BootloaderDesc {} + +/// Structure used by the boot-loader to load the rest of the code. +/// +/// This has to be filled by the GPU driver and copied into DMEM at offset +/// [`BootloaderDesc.dmem_load_off`]. +#[repr(C, packed)] +#[derive(Debug, Clone)] +pub(crate) struct BootloaderDmemDescV2 { + /// Reserved, should always be first element. + pub reserved: [u32; 4], + /// 16B signature for secure code, 0s if no secure code. + pub signature: [u32; 4], + /// DMA context used by the bootloader while loading code/data. + pub ctx_dma: u32, + /// 256B-aligned physical FB address where code is located. + pub code_dma_base: u64, + /// Offset from `code_dma_base` where the non-secure code is located (must be multiple of 256). + pub non_sec_code_off: u32, + /// Size of the non-secure code part. + pub non_sec_code_size: u32, + /// Offset from `code_dma_base` where the secure code is located (must be multiple of 256). + pub sec_code_off: u32, + /// Size of the secure code part. + pub sec_code_size: u32, + /// Code entry point invoked by the bootloader after code is loaded. + pub code_entry_point: u32, + /// 256B-aligned physical FB address where data is located. + pub data_dma_base: u64, + /// Size of data block (should be multiple of 256B). + pub data_size: u32, + /// Arguments to be passed to the target firmware being loaded. + pub argc: u32, + /// Number of arguments to be passed to the target firmware being loaded. + pub argv: u32, +} +// SAFETY: This struct doesn't contain uninitialized bytes and doesn't have interior mutability. +unsafe impl AsBytes for BootloaderDmemDescV2 {} + +pub(crate) struct GenericBootloader { + pub desc: BootloaderDesc, + pub ucode: KVec<u8>, +} + /// The FWSEC microcode, extracted from the BIOS and to be run on the GSP falcon. /// /// It is responsible for e.g. carving out the WPR2 region as the first step of the GSP bootflow. @@ -221,6 +286,8 @@ pub(crate) struct FwsecFirmware { desc: FalconUCodeDesc, /// GPU-accessible DMA object containing the firmware. ucode: FirmwareDmaObject<Self, Signed>, + /// Generic bootloader + gen_bootloader: Option<GenericBootloader>, } impl FalconLoadParams for FwsecFirmware { @@ -245,7 +312,19 @@ fn brom_params(&self) -> FalconBromParams { } fn boot_addr(&self) -> u32 { - 0 + match &self.desc { + FalconUCodeDesc::V2(_v2) => { + // On V2 platforms, the boot address is extracted from the + // generic bootloader, because the gbl is what actually copies + // FWSEC into memory, so that is what needs to be booted. + if let Some(ref gbl) = self.gen_bootloader { + gbl.desc.start_tag << 8 + } else { + 0 + } + } + FalconUCodeDesc::V3(_v3) => 0, + } } } @@ -346,6 +425,7 @@ impl FwsecFirmware { /// command. pub(crate) fn new( dev: &Device<device::Bound>, + chipset: Chipset, falcon: &Falcon<Gsp>, bar: &Bar0, bios: &Vbios, @@ -402,9 +482,54 @@ pub(crate) fn new( ucode_dma.no_patch_signature() }; + // The Generic Bootloader exists only on Turing and GA100. To avoid a bogus + // console error message on other platforms, only try to load it if it's + // supposed to be there. + let gbl_fw = if chipset < Chipset::GA102 { + Some(super::request_firmware( + dev, + chipset, + "gen_bootloader", + FIRMWARE_VERSION, + )?) + } else { + None + }; + + let gbl = match gbl_fw { + Some(fw) => { + let hdr = fw + .data() + .get(0..size_of::<BinHdr>()) + .and_then(BinHdr::from_bytes_copy) + .ok_or(EINVAL)?; + + let desc_offset = usize::from_safe_cast(hdr.header_offset); + let desc = fw + .data() + .get(desc_offset..desc_offset + size_of::<BootloaderDesc>()) + .and_then(BootloaderDesc::from_bytes_copy) + .ok_or(EINVAL)?; + + let ucode_start = usize::from_safe_cast(hdr.data_offset); + let ucode_size = usize::from_safe_cast(hdr.data_size); + let ucode_data = fw + .data() + .get(ucode_start..ucode_start + ucode_size) + .ok_or(EINVAL)?; + + let mut ucode = KVec::new(); + ucode.extend_from_slice(ucode_data, GFP_KERNEL)?; + + Some(GenericBootloader { desc, ucode }) + } + None => None, + }; + Ok(FwsecFirmware { desc, ucode: ucode_signed, + gen_bootloader: gbl, }) } @@ -420,7 +545,7 @@ pub(crate) fn run( .reset(bar) .inspect_err(|e| dev_err!(dev, "Failed to reset GSP falcon: {:?}\n", e))?; falcon - .dma_load(bar, self) + .load(bar, self, self.gen_bootloader.as_ref()) .inspect_err(|e| dev_err!(dev, "Failed to load FWSEC firmware: {:?}\n", e))?; let (mbox0, _) = falcon .boot(bar, Some(0), None) diff --git a/drivers/gpu/nova-core/gsp/boot.rs b/drivers/gpu/nova-core/gsp/boot.rs index 581b412554dc..f253d5f12252 100644 --- a/drivers/gpu/nova-core/gsp/boot.rs +++ b/drivers/gpu/nova-core/gsp/boot.rs @@ -48,6 +48,7 @@ impl super::Gsp { /// created the WPR2 region. fn run_fwsec_frts( dev: &device::Device<device::Bound>, + chipset: Chipset, falcon: &Falcon<Gsp>, bar: &Bar0, bios: &Vbios, @@ -65,6 +66,7 @@ fn run_fwsec_frts( let fwsec_frts = FwsecFirmware::new( dev, + chipset, falcon, bar, bios, @@ -144,7 +146,7 @@ pub(crate) fn boot( let fb_layout = FbLayout::new(chipset, bar, &gsp_fw)?; dev_dbg!(dev, "{:#x?}\n", fb_layout); - Self::run_fwsec_frts(dev, gsp_falcon, bar, &bios, &fb_layout)?; + Self::run_fwsec_frts(dev, chipset, gsp_falcon, bar, &bios, &fb_layout)?; let booter_loader = BooterFirmware::new( dev, @@ -183,7 +185,7 @@ pub(crate) fn boot( ); sec2_falcon.reset(bar)?; - sec2_falcon.dma_load(bar, &booter_loader)?; + sec2_falcon.load(bar, &booter_loader, None)?; let wpr_handle = wpr_meta.dma_handle(); let (mbox0, mbox1) = sec2_falcon.boot( bar, diff --git a/drivers/gpu/nova-core/regs.rs b/drivers/gpu/nova-core/regs.rs index ea0d32f5396c..53f412f0ca32 100644 --- a/drivers/gpu/nova-core/regs.rs +++ b/drivers/gpu/nova-core/regs.rs @@ -364,6 +364,36 @@ pub(crate) fn with_falcon_mem(self, mem: FalconMem) -> Self { 1:1 startcpu as bool; }); +// IMEM access control register. Up to 4 ports are available for IMEM access. +register!(NV_PFALCON_FALCON_IMEMC @ PFalconBase[0x00000180[4; 16]] { + 15:0 offs as u16, "IMEM block and word offset"; + 24:24 aincw as bool, "Auto-increment on write"; + 28:28 secure as bool, "Access secure IMEM"; +}); + +// IMEM data register. Reading/writing this register accesses IMEM at the address +// specified by the corresponding IMEMC register. +register!(NV_PFALCON_FALCON_IMEMD @ PFalconBase[0x00000184[4; 16]] { + 31:0 data as u32; +}); + +// IMEM tag register. Used to set the tag for the current IMEM block. +register!(NV_PFALCON_FALCON_IMEMT @ PFalconBase[0x00000188[4; 16]] { + 15:0 tag as u16; +}); + +// DMEM access control register. Up to 8 ports are available for DMEM access. +register!(NV_PFALCON_FALCON_DMEMC @ PFalconBase[0x000001c0[8; 8]] { + 15:0 offs as u16, "DMEM block and word offset"; + 24:24 aincw as bool, "Auto-increment on write"; +}); + +// DMEM data register. Reading/writing this register accesses DMEM at the address +// specified by the corresponding DMEMC register. +register!(NV_PFALCON_FALCON_DMEMD @ PFalconBase[0x000001c4[8; 8]] { + 31:0 data as u32; +}); + // Actually known as `NV_PSEC_FALCON_ENGINE` and `NV_PGSP_FALCON_ENGINE` depending on the falcon // instance. register!(NV_PFALCON_FALCON_ENGINE @ PFalconBase[0x000003c0] { -- 2.52.0
