On Tue, Mar 10, 2026 at 01:21:27PM +0100, Paolo Abeni wrote:
> This is an AI-generated review of your patch. The human sending this
> email has considered the AI review valid, or at least plausible.
> 
> For local reproduction steps see 
> https://netdev-ai.bots.linux.dev/ai-local.html
> ---
> net: mana: Force full-page RX buffers for 4K page size on specific systems.
> 
> This commit adds processor-specific optimization for ARM64 systems with
> 4K pages where page_pool fragments cause throughput regression. It
> detects affected processors via SMBIOS and forces one RX buffer per page.
>
yes, I assume no action needed here. 
> > diff --git a/drivers/net/ethernet/microsoft/mana/gdma_main.c 
> > b/drivers/net/ethernet/microsoft/mana/gdma_main.c
> > index aef8612b73cb..05fecc00a90c 100644
> > --- a/drivers/net/ethernet/microsoft/mana/gdma_main.c
> > +++ b/drivers/net/ethernet/microsoft/mana/gdma_main.c
> 
> [ ... ]
> 
> > @@ -1959,6 +1960,128 @@ static bool mana_is_pf(unsigned short dev_id)
> >     return dev_id == MANA_PF_DEVICE_ID;
> >  }
> >
> > +/*
> > + * Table for Processor Version strings found from SMBIOS Type 4 
> > information,
> > + * for processors that needs to force single RX buffer per page quirk for
> > + * meeting line rate performance with ARM64 + 4K pages.
> > + * Note: These strings are exactly matched with version fetched from 
> > SMBIOS.
> > + */
> > +static const char * const mana_single_rxbuf_per_page_quirk_tbl[] = {
> > +   "Cobalt 200",
> > +};
> > +
> > +/* On some systems with 4K PAGE_SIZE, page_pool RX fragments can
> > + * trigger a throughput regression. Hence identify those processors
> > + * from the extracted SMBIOS table and apply the quirk to forces one
> > + * RX buffer per page to avoid the fragment allocation/refcounting
> > + * overhead in the RX refill path for those processors only.
> > + */
> > +static bool mana_needs_single_rxbuf_per_page(struct gdma_context *gc)
> > +{
> > +   int i = 0;
> > +   const char *ver = gc->processor_version;
> > +
> > +   if (!ver)
> > +           return false;
> > +
> > +   if (PAGE_SIZE != SZ_4K)
> > +           return false;
> > +
> > +   while (i < ARRAY_SIZE(mana_single_rxbuf_per_page_quirk_tbl)) {
> > +           if (!strcmp(ver, mana_single_rxbuf_per_page_quirk_tbl[i]))
> > +                   return true;
> > +           i++;
> > +   }
> > +
> > +   return false;
> > +}
> > +
> > +static void mana_get_proc_ver_strno(const struct dmi_header *hdr, void 
> > *data)
> > +{
> > +   struct gdma_context *gc = data;
> > +   const u8 *d = (const u8 *)hdr;
> > +
> > +   /* We are only looking for Type 4: Processor Information */
> > +   if (hdr->type != SMBIOS_TYPE_4_PROCESSOR_INFO)
> > +           return;
> > +
> > +   /* Ensure the record is long enough to contain the Processor Version
> > +    * field
> > +    */
> > +   if (hdr->length <= SMBIOS_TYPE4_PROC_VERSION_OFFSET)
> > +           return;
> > +
> > +   /* The 'Processor Version' string is located at index pointed by
> > +    * SMBIOS_TYPE4_PROC_VERSION_OFFSET.  Make a copy of the index.
> > +    * There could be multiple Type 4 tables so read and store the
> > +    * processor version index found the first time.
> > +    */
> > +   if (gc->proc_ver_strno)
> > +           return;
> > +
> > +   gc->proc_ver_strno = d[SMBIOS_TYPE4_PROC_VERSION_OFFSET];
> > +}
> > +
> > +static const char *mana_dmi_string_nosave(const struct dmi_header *hdr, u8 
> > s)
> > +{
> > +   const char *bp = (const char *)hdr + hdr->length;
> > +
> > +   if (!s)
> > +           return NULL;
> > +
> > +   /* String numbers start at 1 */
> > +   while (--s > 0 && *bp)
> > +           bp += strlen(bp) + 1;
> > +
> > +   if (!*bp)
> > +           return NULL;
> > +
> > +   return bp;
> > +}
> > +
> > +static void mana_fetch_proc_ver_string(const struct dmi_header *hdr,
> > +                                  void *data)
> > +{
> > +   struct gdma_context *gc = data;
> > +   const char *ver;
> > +
> > +   /* We are only looking for Type 4: Processor Information */
> > +   if (hdr->type != SMBIOS_TYPE_4_PROCESSOR_INFO)
> > +           return;
> > +
> > +   /* Extract proc version found the first time only */
> > +   if (!gc->proc_ver_strno || gc->processor_version)
> > +           return;
> > +
> > +   ver = mana_dmi_string_nosave(hdr, gc->proc_ver_strno);
> > +   if (ver)
> > +           gc->processor_version = kstrdup(ver, GFP_KERNEL);
> > +}
> > +
> > +/* Check and initialize all processor optimizations/quirks here */
> > +static bool mana_init_processor_optimization(struct gdma_context *gc)
> > +{
> > +   bool opt_initialized = false;
> > +
> > +   gc->proc_ver_strno = 0;
> > +   gc->processor_version = NULL;
> > +
> > +   dmi_walk(mana_get_proc_ver_strno, gc);
> > +   if (!gc->proc_ver_strno)
> > +           return false;
> > +
> > +   dmi_walk(mana_fetch_proc_ver_string, gc);
> > +   if (!gc->processor_version)
> > +           return false;
> > +
> > +   if (mana_needs_single_rxbuf_per_page(gc)) {
> > +           gc->force_full_page_rx_buffer = true;
> > +           opt_initialized = true;
> > +   }
> > +
> > +   return opt_initialized;
> > +}
> > +
> >  static int mana_gd_probe(struct pci_dev *pdev, const struct pci_device_id 
> > *ent)
> >  {
> >     struct gdma_context *gc;
> 
> [ ... ]
> 
> > @@ -2013,6 +2136,11 @@ static int mana_gd_probe(struct pci_dev *pdev, const 
> > struct pci_device_id *ent)
> >             gc->mana_pci_debugfs = 
> > debugfs_create_dir(pci_slot_name(pdev->slot),
> >                                                       mana_debugfs_root);
> >
> > +   if (mana_init_processor_optimization(gc))
> > +           dev_info(&pdev->dev,
> > +                    "Processor specific optimization initialized on: %s\n",
> > +                   gc->processor_version);
> > +
> >     err = mana_gd_setup(pdev);
> >     if (err)
> >             goto unmap_bar;
> 
> [ ... ]
> 
> > @@ -2055,6 +2183,8 @@ static int mana_gd_probe(struct pci_dev *pdev, const 
> > struct pci_device_id *ent)
> >     pci_iounmap(pdev, bar0_va);
> >  free_gc:
> >     pci_set_drvdata(pdev, NULL);
> > +   kfree(gc->processor_version);
> > +   gc->processor_version = NULL;
> >     vfree(gc);
> >  release_region:
> >     pci_release_regions(pdev);
> 
> [ ... ]
> 
> > @@ -2110,6 +2240,9 @@ static void mana_gd_remove(struct pci_dev *pdev)
> >
> >     pci_iounmap(pdev, gc->bar0_va);
> >
> > +   kfree(gc->processor_version);
> > +   gc->processor_version = NULL;
> > +
> >     vfree(gc);
> >
> >     pci_release_regions(pdev);
> 
> [ ... ]
> 
> > diff --git a/drivers/net/ethernet/microsoft/mana/mana_en.c 
> > b/drivers/net/ethernet/microsoft/mana/mana_en.c
> > index ea71de39f996..f17cc2cc7dd9 100644
> > --- a/drivers/net/ethernet/microsoft/mana/mana_en.c
> > +++ b/drivers/net/ethernet/microsoft/mana/mana_en.c
> 
> [ ... ]
> 
> > @@ -744,6 +744,26 @@ static void *mana_get_rxbuf_pre(struct mana_rxq *rxq, 
> > dma_addr_t *da)
> >     return va;
> >  }
> >
> > +static bool
> > +mana_use_single_rxbuf_per_page(struct mana_port_context *apc, u32 mtu)
> > +{
> > +   struct gdma_context *gc = apc->ac->gdma_dev->gdma_context;
> > +
> > +   /* On some systems with 4K PAGE_SIZE, page_pool RX fragments can
> > +    * trigger a throughput regression. Hence forces one RX buffer per page
> > +    * to avoid the fragment allocation/refcounting overhead in the RX
> > +    * refill path for those processors only.
> > +    */
> > +   if (gc->force_full_page_rx_buffer)
> > +           return true;
> > +
> > +   /* For xdp and jumbo frames make sure only one packet fits per page. */
> > +   if (mtu + MANA_RXBUF_PAD > PAGE_SIZE / 2 || mana_xdp_get(apc))
> > +           return true;
> > +
> > +   return false;
> > +}
> > +
> >  /* Get RX buffer's data size, alloc size, XDP headroom based on MTU */
> >  static void mana_get_rxbuf_cfg(struct mana_port_context *apc,
> >                            int mtu, u32 *datasize, u32 *alloc_size,
> 
> [ ... ]
> 
> > @@ -754,8 +774,7 @@ static void mana_get_rxbuf_cfg(struct mana_port_context 
> > *apc,
> >     /* Calculate datasize first (consistent across all cases) */
> >     *datasize = mtu + ETH_HLEN;
> >
> > -   /* For xdp and jumbo frames make sure only one packet fits per page */
> > -   if (mtu + MANA_RXBUF_PAD > PAGE_SIZE / 2 || mana_xdp_get(apc)) {
> > +   if (mana_use_single_rxbuf_per_page(apc, mtu)) {
> >             if (mana_xdp_get(apc)) {
> >                     *headroom = XDP_PACKET_HEADROOM;
> >                     *alloc_size = PAGE_SIZE;
> 
> [ ... ]
> 
> > diff --git a/include/net/mana/gdma.h b/include/net/mana/gdma.h
> > index ec17004b10c0..be56b347f3f6 100644
> > --- a/include/net/mana/gdma.h
> > +++ b/include/net/mana/gdma.h
> 
> [ ... ]
> 
> > @@ -9,6 +9,12 @@
> >
> >  #include "shm_channel.h"
> >
> > +/* SMBIOS Type 4: Processor Information table */
> > +#define SMBIOS_TYPE_4_PROCESSOR_INFO 4
> > +
> > +/* Byte offset containing the Processor Version string number.*/
> > +#define SMBIOS_TYPE4_PROC_VERSION_OFFSET 0x10
> > +
> >  #define GDMA_STATUS_MORE_ENTRIES   0x00000105
> >  #define GDMA_STATUS_CMD_UNSUPPORTED        0xffffffff
> >
> 
> [ ... ]
> 
> > @@ -444,6 +450,9 @@ struct gdma_context {
> >     struct workqueue_struct *service_wq;
> >
> >     unsigned long           flags;
> > +   u8                      *processor_version;
> > +   u8                      proc_ver_strno;
> > +   bool                    force_full_page_rx_buffer;
> >  };
> >
> >  static inline bool mana_gd_is_mana(struct gdma_dev *gd)
> 
> Should processor_version be declared as char * instead of u8 *?
> 
> The field is used as a string throughout the code: it's assigned from
> kstrdup() in mana_fetch_proc_ver_string(), read as const char *ver
> in mana_needs_single_rxbuf_per_page(), passed to strcmp(), and printed
> with %s format in dev_info().
> 
> Using u8 * for string data can trigger compiler warnings with
> -Wpointer-sign, since string functions expect char * types.
> 
Ack, I will send a v3 addressing this.


Thanks Paolo,
For the review comments I will address this warning in a v3.

Regards
Dipayaan Roy

Reply via email to