On Mon Mar 9, 2026 at 5:34 PM CET, Tim Kovalenko via B4 Relay wrote: > From: Tim Kovalenko <[email protected]> > > The `Cmdq::new` function was allocating a `PteArray` struct on the stack > and was causing a stack overflow with 8216 bytes. > > Modify the `PteArray` to calculate and write the Page Table Entries > directly into the coherent DMA buffer one-by-one. This reduces the stack > usage quite a lot. >
Reported-by: Gary Guo <[email protected]> Closes: https://rust-for-linux.zulipchat.com/#narrow/channel/509436-Nova/topic/.60Cmdq.3A.3Anew.60.20uses.20excessive.20stack.20size/near/570375549 Fixes: f38b4f105cfc ("gpu: nova-core: Create initial Gsp") > Signed-off-by: Tim Kovalenko <[email protected]> A few nits below, but I can fix them up [1] on apply. > + for (i, chunk) in > pte_region.chunks_exact_mut(size_of::<u64>()).enumerate() { > + let pte_value = start_addr > + .checked_add(num::usize_as_u64(i) << GSP_PAGE_SHIFT) > + .ok_or(EOVERFLOW)?; This should use PteArray::entry(). It would also be nice to get rid of the unsafe {} and use dma_write!() instead, but this can be a follow-up patch. > + > + chunk.copy_from_slice(&pte_value.to_ne_bytes()); > + } > + > Ok(obj) > } > } > diff --git a/drivers/gpu/nova-core/gsp/cmdq.rs > b/drivers/gpu/nova-core/gsp/cmdq.rs > index > 0056bfbf0a44cfbc5a0ca08d069f881b877e1edc..c8327d3098f73f9b880eee99038ad10a16e1e32d > 100644 > --- a/drivers/gpu/nova-core/gsp/cmdq.rs > +++ b/drivers/gpu/nova-core/gsp/cmdq.rs > @@ -202,7 +202,20 @@ fn new(dev: &device::Device<device::Bound>) -> > Result<Self> { > > let gsp_mem = > CoherentAllocation::<GspMem>::alloc_coherent(dev, 1, GFP_KERNEL > | __GFP_ZERO)?; > - dma_write!(gsp_mem, [0]?.ptes, PteArray::new(gsp_mem.dma_handle())?); > + > + const NUM_PTES: usize = GSP_PAGE_SIZE / size_of::<u64>(); We can avoid duplicating this by making it a constant of GspMem. > + let start = gsp_mem.dma_handle(); > + // One by one GSP Page write to the memory to avoid stack overflow > when allocating > + // the whole array at once. > + for i in 0..NUM_PTES { > + dma_write!( > + gsp_mem, > + [0]?.ptes.0[i], > + PteArray::<NUM_PTES>::entry(start, i)? > + ); > + } -- [1] -- diff --git a/drivers/gpu/nova-core/gsp.rs b/drivers/gpu/nova-core/gsp.rs index 20170e483e04..b5ea14b7dad7 100644 --- a/drivers/gpu/nova-core/gsp.rs +++ b/drivers/gpu/nova-core/gsp.rs @@ -48,6 +48,7 @@ unsafe impl<const NUM_ENTRIES: usize> AsBytes for PteArray<NUM_ENTRIES> {} impl<const NUM_PAGES: usize> PteArray<NUM_PAGES> { /// Returns the page table entry for `index`, for a mapping starting at `start` DmaAddress. + // TODO: Replace with `IoView` projection once available. fn entry(start: DmaAddress, index: usize) -> Result<u64> { start .checked_add(num::usize_as_u64(index) << GSP_PAGE_SHIFT) @@ -90,12 +91,9 @@ fn new(dev: &device::Device<device::Bound>) -> Result<Self> { .as_slice_mut(size_of::<u64>(), NUM_PAGES * size_of::<u64>())? }; - // This is a one by one GSP Page write to the memory - // to avoid stack overflow when allocating the whole array at once. + // Write values one by one to avoid an on-stack instance of `PteArray`. for (i, chunk) in pte_region.chunks_exact_mut(size_of::<u64>()).enumerate() { - let pte_value = start_addr - .checked_add(num::usize_as_u64(i) << GSP_PAGE_SHIFT) - .ok_or(EOVERFLOW)?; + let pte_value = PteArray::<NUM_PAGES>::entry(start_addr, i)?; chunk.copy_from_slice(&pte_value.to_ne_bytes()); } diff --git a/drivers/gpu/nova-core/gsp/cmdq.rs b/drivers/gpu/nova-core/gsp/cmdq.rs index 9107a1473797..aa42d180f0d5 100644 --- a/drivers/gpu/nova-core/gsp/cmdq.rs +++ b/drivers/gpu/nova-core/gsp/cmdq.rs @@ -159,7 +159,7 @@ struct Msgq { #[repr(C)] struct GspMem { /// Self-mapping page table entries. - ptes: PteArray<{ GSP_PAGE_SIZE / size_of::<u64>() }>, + ptes: PteArray<{ Self::PTE_ARRAY_SIZE }>, /// CPU queue: the driver writes commands here, and the GSP reads them. It also contains the /// write and read pointers that the CPU updates. /// @@ -172,6 +172,10 @@ struct GspMem { gspq: Msgq, } +impl GspMem { + const PTE_ARRAY_SIZE: usize = GSP_PAGE_SIZE / size_of::<u64>(); +} + // SAFETY: These structs don't meet the no-padding requirements of AsBytes but // that is not a problem because they are not used outside the kernel. unsafe impl AsBytes for GspMem {} @@ -202,16 +206,14 @@ fn new(dev: &device::Device<device::Bound>) -> Result<Self> { let gsp_mem = CoherentAllocation::<GspMem>::alloc_coherent(dev, 1, GFP_KERNEL | __GFP_ZERO)?; - const NUM_PTES: usize = GSP_PAGE_SIZE / size_of::<u64>(); - let start = gsp_mem.dma_handle(); // One by one GSP Page write to the memory to avoid stack overflow when allocating // the whole array at once. - for i in 0..NUM_PTES { + for i in 0..GspMem::PTE_ARRAY_SIZE { dma_write!( gsp_mem, [0]?.ptes.0[i], - PteArray::<NUM_PTES>::entry(start, i)? + PteArray::<{ GspMem::PTE_ARRAY_SIZE }>::entry(start, i)? ); }
