On Tue Mar 10, 2026 at 1:40 AM GMT, Alexandre Courbot wrote:
> On Tue Mar 10, 2026 at 1:34 AM JST, Tim Kovalenko via B4 Relay wrote:
>> From: Tim Kovalenko <[email protected]>
>>
>> The `Cmdq::new` function was allocating a `PteArray` struct on the stack
>> and was causing a stack overflow with 8216 bytes.
>>
>> Modify the `PteArray` to calculate and write the Page Table Entries
>> directly into the coherent DMA buffer one-by-one. This reduces the stack
>> usage quite a lot.
>>
>> Signed-off-by: Tim Kovalenko <[email protected]>
>> ---
>>  drivers/gpu/nova-core/gsp.rs      | 34 +++++++++++++++++++---------------
>>  drivers/gpu/nova-core/gsp/cmdq.rs | 15 ++++++++++++++-
>>  2 files changed, 33 insertions(+), 16 deletions(-)
>>
>> diff --git a/drivers/gpu/nova-core/gsp.rs b/drivers/gpu/nova-core/gsp.rs
>> index 
>> 25cd48514c777cb405a2af0acf57196b2e2e7837..20170e483e04c476efce8997b3916b0ad829ed38
>>  100644
>> --- a/drivers/gpu/nova-core/gsp.rs
>> +++ b/drivers/gpu/nova-core/gsp.rs
>> @@ -47,16 +47,11 @@
>>  unsafe impl<const NUM_ENTRIES: usize> AsBytes for PteArray<NUM_ENTRIES> {}
>>  
>>  impl<const NUM_PAGES: usize> PteArray<NUM_PAGES> {
>> -    /// Creates a new page table array mapping `NUM_PAGES` GSP pages 
>> starting at address `start`.
>> -    fn new(start: DmaAddress) -> Result<Self> {
>> -        let mut ptes = [0u64; NUM_PAGES];
>> -        for (i, pte) in ptes.iter_mut().enumerate() {
>> -            *pte = start
>> -                .checked_add(num::usize_as_u64(i) << GSP_PAGE_SHIFT)
>> -                .ok_or(EOVERFLOW)?;
>> -        }
>> -
>> -        Ok(Self(ptes))
>> +    /// Returns the page table entry for `index`, for a mapping starting at 
>> `start` DmaAddress.
>> +    fn entry(start: DmaAddress, index: usize) -> Result<u64> {
>> +        start
>> +            .checked_add(num::usize_as_u64(index) << GSP_PAGE_SHIFT)
>> +            .ok_or(EOVERFLOW)
>>      }
>>  }
>>  
>> @@ -86,16 +81,25 @@ fn new(dev: &device::Device<device::Bound>) -> 
>> Result<Self> {
>>              NUM_PAGES * GSP_PAGE_SIZE,
>>              GFP_KERNEL | __GFP_ZERO,
>>          )?);
>> -        let ptes = PteArray::<NUM_PAGES>::new(obj.0.dma_handle())?;
>> +
>> +        let start_addr = obj.0.dma_handle();
>>  
>>          // SAFETY: `obj` has just been created and we are its sole user.
>> -        unsafe {
>> -            // Copy the self-mapping PTE at the expected location.
>> +        let pte_region = unsafe {
>>              obj.0
>> -                .as_slice_mut(size_of::<u64>(), size_of_val(&ptes))?
>> -                .copy_from_slice(ptes.as_bytes())
>> +                .as_slice_mut(size_of::<u64>(), NUM_PAGES * 
>> size_of::<u64>())?
>>          };
>>  
>> +        // This is a  one by one GSP Page write to the memory
>> +        // to avoid stack overflow when allocating the whole array at once.
>> +        for (i, chunk) in 
>> pte_region.chunks_exact_mut(size_of::<u64>()).enumerate() {
>> +            let pte_value = start_addr
>> +                .checked_add(num::usize_as_u64(i) << GSP_PAGE_SHIFT)
>> +                .ok_or(EOVERFLOW)?;
>> +
>> +            chunk.copy_from_slice(&pte_value.to_ne_bytes());
>> +        }
>> +
>>          Ok(obj)
>>      }
>>  }
>> diff --git a/drivers/gpu/nova-core/gsp/cmdq.rs 
>> b/drivers/gpu/nova-core/gsp/cmdq.rs
>> index 
>> 0056bfbf0a44cfbc5a0ca08d069f881b877e1edc..c8327d3098f73f9b880eee99038ad10a16e1e32d
>>  100644
>> --- a/drivers/gpu/nova-core/gsp/cmdq.rs
>> +++ b/drivers/gpu/nova-core/gsp/cmdq.rs
>> @@ -202,7 +202,20 @@ fn new(dev: &device::Device<device::Bound>) -> 
>> Result<Self> {
>>  
>>          let gsp_mem =
>>              CoherentAllocation::<GspMem>::alloc_coherent(dev, 1, GFP_KERNEL 
>> | __GFP_ZERO)?;
>> -        dma_write!(gsp_mem, [0]?.ptes, 
>> PteArray::new(gsp_mem.dma_handle())?);
>> +
>> +        const NUM_PTES: usize = GSP_PAGE_SIZE / size_of::<u64>();
>> +
>> +        let start = gsp_mem.dma_handle();
>> +        // One by one GSP Page write to the memory to avoid stack overflow 
>> when allocating
>> +        // the whole array at once.
>> +        for i in 0..NUM_PTES {
>> +            dma_write!(
>> +                gsp_mem,
>> +                [0]?.ptes.0[i],
>> +                PteArray::<NUM_PTES>::entry(start, i)?
>
> Does `::<NUM_PTES>` need to be mentioned here, or is the compiler able
> to infer it?

The function signature doesn't mention NUM_PTES at all, so no. In fact, perhaps
the `entry` shouldn't be an associated method at all (even if is, it probably
should be of `PteArray::<0>` or something.

Best,
Gary

>
> In any case, the updated patch
>
> Acked-by: Alexandre Courbot <[email protected]>
>
> Thanks!

Reply via email to