On 7/9/21 8:29 PM, Ferruh Yigit wrote:
> There is a confusion on setting max Rx packet length, this patch aims to
> clarify it.
> 
> 'rte_eth_dev_configure()' API accepts max Rx packet size via
> 'uint32_t max_rx_pkt_len' filed of the config struct 'struct
> rte_eth_conf'.
> 
> Also 'rte_eth_dev_set_mtu()' API can be used to set the MTU, and result
> stored into '(struct rte_eth_dev)->data->mtu'.
> 
> These two APIs are related but they work in a disconnected way, they
> store the set values in different variables which makes hard to figure
> out which one to use, also two different related method is confusing for
> the users.
> 
> Other issues causing confusion is:
> * maximum transmission unit (MTU) is payload of the Ethernet frame. And
>   'max_rx_pkt_len' is the size of the Ethernet frame. Difference is
>   Ethernet frame overhead, but this may be different from device to
>   device based on what device supports, like VLAN and QinQ.
> * 'max_rx_pkt_len' is only valid when application requested jumbo frame,
>   which adds additional confusion and some APIs and PMDs already
>   discards this documented behavior.
> * For the jumbo frame enabled case, 'max_rx_pkt_len' is an mandatory
>   field, this adds configuration complexity for application.
> 
> As solution, both APIs gets MTU as parameter, and both saves the result
> in same variable '(struct rte_eth_dev)->data->mtu'. For this
> 'max_rx_pkt_len' updated as 'mtu', and it is always valid independent
> from jumbo frame.
> 
> For 'rte_eth_dev_configure()', 'dev->data->dev_conf.rxmode.mtu' is user
> request and it should be used only within configure function and result
> should be stored to '(struct rte_eth_dev)->data->mtu'. After that point
> both application and PMD uses MTU from this variable.
> 
> When application doesn't provide an MTU during 'rte_eth_dev_configure()'
> default 'RTE_ETHER_MTU' value is used.
> 
> As additional clarification, MTU is used to configure the device for
> physical Rx/Tx limitation. Other related issue is size of the buffer to
> store Rx packets, many PMDs use mbuf data buffer size as Rx buffer size.
> And compares MTU against Rx buffer size to decide enabling scattered Rx
> or not, if PMD supports it. If scattered Rx is not supported by device,
> MTU bigger than Rx buffer size should fail.
> 

Do I understand correctly that target is 21.11?

Really huge work. Many thanks.

See my notes below.

> Signed-off-by: Ferruh Yigit <ferruh.yi...@intel.com>

[snip]

> diff --git a/app/test-eventdev/test_pipeline_common.c 
> b/app/test-eventdev/test_pipeline_common.c
> index 6ee530d4cdc9..5fcea74b4d43 100644
> --- a/app/test-eventdev/test_pipeline_common.c
> +++ b/app/test-eventdev/test_pipeline_common.c
> @@ -197,8 +197,9 @@ pipeline_ethdev_setup(struct evt_test *test, struct 
> evt_options *opt)
>               return -EINVAL;
>       }
>  
> -     port_conf.rxmode.max_rx_pkt_len = opt->max_pkt_sz;
> -     if (opt->max_pkt_sz > RTE_ETHER_MAX_LEN)
> +     port_conf.rxmode.mtu = opt->max_pkt_sz - RTE_ETHER_HDR_LEN -
> +             RTE_ETHER_CRC_LEN;

Subtract requires overflow check. May max_pkt_size be 0 or just
smaller that RTE_ETHER_HDR_LEN + RTE_ETHER_CRC_LEN?

> +     if (port_conf.rxmode.mtu > RTE_ETHER_MTU)
>               port_conf.rxmode.offloads |= DEV_RX_OFFLOAD_JUMBO_FRAME;
>  
>       t->internal_port = 1;
> diff --git a/app/test-pmd/cmdline.c b/app/test-pmd/cmdline.c
> index 8468018cf35d..8bdc042f6e8e 100644
> --- a/app/test-pmd/cmdline.c
> +++ b/app/test-pmd/cmdline.c
> @@ -1892,43 +1892,36 @@ cmd_config_max_pkt_len_parsed(void *parsed_result,
>                               __rte_unused void *data)
>  {
>       struct cmd_config_max_pkt_len_result *res = parsed_result;
> -     uint32_t max_rx_pkt_len_backup = 0;
> -     portid_t pid;
> +     portid_t port_id;
>       int ret;
>  
> +     if (strcmp(res->name, "max-pkt-len")) {
> +             printf("Unknown parameter\n");
> +             return;
> +     }
> +
>       if (!all_ports_stopped()) {
>               printf("Please stop all ports first\n");
>               return;
>       }
>  
> -     RTE_ETH_FOREACH_DEV(pid) {
> -             struct rte_port *port = &ports[pid];
> -
> -             if (!strcmp(res->name, "max-pkt-len")) {
> -                     if (res->value < RTE_ETHER_MIN_LEN) {
> -                             printf("max-pkt-len can not be less than %d\n",
> -                                             RTE_ETHER_MIN_LEN);
> -                             return;
> -                     }
> -                     if (res->value == port->dev_conf.rxmode.max_rx_pkt_len)
> -                             return;
> -
> -                     ret = eth_dev_info_get_print_err(pid, &port->dev_info);
> -                     if (ret != 0) {
> -                             printf("rte_eth_dev_info_get() failed for port 
> %u\n",
> -                                     pid);
> -                             return;
> -                     }
> +     RTE_ETH_FOREACH_DEV(port_id) {
> +             struct rte_port *port = &ports[port_id];
>  
> -                     max_rx_pkt_len_backup = 
> port->dev_conf.rxmode.max_rx_pkt_len;
> +             if (res->value < RTE_ETHER_MIN_LEN) {
> +                     printf("max-pkt-len can not be less than %d\n",

fprintf() to stderr, please.
Here and in a number of places below.

> +                                     RTE_ETHER_MIN_LEN);
> +                     return;
> +             }
>  
> -                     port->dev_conf.rxmode.max_rx_pkt_len = res->value;
> -                     if (update_jumbo_frame_offload(pid) != 0)
> -                             port->dev_conf.rxmode.max_rx_pkt_len = 
> max_rx_pkt_len_backup;
> -             } else {
> -                     printf("Unknown parameter\n");
> +             ret = eth_dev_info_get_print_err(port_id, &port->dev_info);
> +             if (ret != 0) {
> +                     printf("rte_eth_dev_info_get() failed for port %u\n",
> +                             port_id);
>                       return;
>               }
> +
> +             update_jumbo_frame_offload(port_id, res->value);
>       }
>  
>       init_port_config();
> diff --git a/app/test-pmd/config.c b/app/test-pmd/config.c
> index 04ae0feb5852..a87265d7638b 100644
> --- a/app/test-pmd/config.c
> +++ b/app/test-pmd/config.c

[snip]

> @@ -1155,20 +1154,17 @@ port_mtu_set(portid_t port_id, uint16_t mtu)
>               return;
>       }
>       diag = rte_eth_dev_set_mtu(port_id, mtu);
> -     if (diag)
> +     if (diag) {
>               printf("Set MTU failed. diag=%d\n", diag);
> -     else if (dev_info.rx_offload_capa & DEV_RX_OFFLOAD_JUMBO_FRAME) {
> -             /*
> -              * Ether overhead in driver is equal to the difference of
> -              * max_rx_pktlen and max_mtu in rte_eth_dev_info when the
> -              * device supports jumbo frame.
> -              */
> -             eth_overhead = dev_info.max_rx_pktlen - dev_info.max_mtu;
> +             return;
> +     }
> +
> +     rte_port->dev_conf.rxmode.mtu = mtu;
> +
> +     if (dev_info.rx_offload_capa & DEV_RX_OFFLOAD_JUMBO_FRAME) {
>               if (mtu > RTE_ETHER_MTU) {
>                       rte_port->dev_conf.rxmode.offloads |=
>                                               DEV_RX_OFFLOAD_JUMBO_FRAME;
> -                     rte_port->dev_conf.rxmode.max_rx_pkt_len =
> -                                             mtu + eth_overhead;
>               } else

I guess curly brackets should be removed now.

>                       rte_port->dev_conf.rxmode.offloads &=
>                                               ~DEV_RX_OFFLOAD_JUMBO_FRAME;

[snip]

> diff --git a/app/test-pmd/testpmd.c b/app/test-pmd/testpmd.c
> index 1cdd3cdd12b6..2c79cae05664 100644
> --- a/app/test-pmd/testpmd.c
> +++ b/app/test-pmd/testpmd.c

[snip]

> @@ -1465,7 +1473,7 @@ init_config(void)
>                       rte_exit(EXIT_FAILURE,
>                                "rte_eth_dev_info_get() failed\n");
>  
> -             ret = update_jumbo_frame_offload(pid);
> +             ret = update_jumbo_frame_offload(pid, 0);
>               if (ret != 0)
>                       printf("Updating jumbo frame offload failed for port 
> %u\n",
>                               pid);
> @@ -1512,14 +1520,19 @@ init_config(void)
>                */
>               if (port->dev_info.rx_desc_lim.nb_mtu_seg_max != UINT16_MAX &&
>                               port->dev_info.rx_desc_lim.nb_mtu_seg_max != 0) 
> {
> -                     data_size = rx_mode.max_rx_pkt_len /
> -                             port->dev_info.rx_desc_lim.nb_mtu_seg_max;
> +                     uint32_t eth_overhead = 
> get_eth_overhead(&port->dev_info);
> +                     uint16_t mtu;
>  
> -                     if ((data_size + RTE_PKTMBUF_HEADROOM) >
> +                     if (rte_eth_dev_get_mtu(pid, &mtu) == 0) {
> +                             data_size = mtu + eth_overhead /
> +                                     
> port->dev_info.rx_desc_lim.nb_mtu_seg_max;
> +
> +                             if ((data_size + RTE_PKTMBUF_HEADROOM) >

Unnecessary parenthesis.

>                                                       mbuf_data_size[0]) {
> -                             mbuf_data_size[0] = data_size +
> -                                              RTE_PKTMBUF_HEADROOM;
> -                             warning = 1;
> +                                     mbuf_data_size[0] = data_size +
> +                                                      RTE_PKTMBUF_HEADROOM;
> +                                     warning = 1;
> +                             }
>                       }
>               }
>       }

[snip]

> diff --git a/drivers/net/tap/rte_eth_tap.c b/drivers/net/tap/rte_eth_tap.c
> index c515de3bf71d..0a8d29277aeb 100644
> --- a/drivers/net/tap/rte_eth_tap.c
> +++ b/drivers/net/tap/rte_eth_tap.c
> @@ -1627,13 +1627,8 @@ tap_mtu_set(struct rte_eth_dev *dev, uint16_t mtu)
>  {
>       struct pmd_internals *pmd = dev->data->dev_private;
>       struct ifreq ifr = { .ifr_mtu = mtu };
> -     int err = 0;
>  
> -     err = tap_ioctl(pmd, SIOCSIFMTU, &ifr, 1, LOCAL_AND_REMOTE);
> -     if (!err)
> -             dev->data->mtu = mtu;
> -
> -     return err;
> +     return tap_ioctl(pmd, SIOCSIFMTU, &ifr, 1, LOCAL_AND_REMOTE);

The cleanup could be done separately before the patch, since
it just makes the long patch longer and unrelated in fact,
since assignment after callback is already done.

>  }
>  
>  static int

[snip]

> diff --git a/examples/ip_fragmentation/main.c 
> b/examples/ip_fragmentation/main.c
> index 77a6a18d1914..f97287ce2243 100644
> --- a/examples/ip_fragmentation/main.c
> +++ b/examples/ip_fragmentation/main.c
> @@ -146,7 +146,7 @@ struct lcore_queue_conf lcore_queue_conf[RTE_MAX_LCORE];
>  
>  static struct rte_eth_conf port_conf = {
>       .rxmode = {
> -             .max_rx_pkt_len = JUMBO_FRAME_MAX_SIZE,
> +             .mtu = JUMBO_FRAME_MAX_SIZE,

Before the patch JUMBO_FRAME_MAX_SIZE inluded overhad, but
after the patch it is used as it is does not include overhead.

There a number of similiar cases in other apps.

>               .split_hdr_size = 0,
>               .offloads = (DEV_RX_OFFLOAD_CHECKSUM |
>                            DEV_RX_OFFLOAD_SCATTER |

[snip]

> diff --git a/examples/ip_pipeline/link.c b/examples/ip_pipeline/link.c
> index 16bcffe356bc..8628db22f56b 100644
> --- a/examples/ip_pipeline/link.c
> +++ b/examples/ip_pipeline/link.c
> @@ -46,7 +46,7 @@ static struct rte_eth_conf port_conf_default = {
>       .link_speeds = 0,
>       .rxmode = {
>               .mq_mode = ETH_MQ_RX_NONE,
> -             .max_rx_pkt_len = 9000, /* Jumbo frame max packet len */
> +             .mtu = 9000, /* Jumbo frame MTU */

Strictly speaking 9000 included overhead before the patch and
does not include overhead after the patch.

There a number of similiar cases in other apps.

>               .split_hdr_size = 0, /* Header split buffer size */
>       },
>       .rx_adv_conf = {

[snip]

> diff --git a/examples/l3fwd-acl/main.c b/examples/l3fwd-acl/main.c
> index a1f457b564b6..913037d5f835 100644
> --- a/examples/l3fwd-acl/main.c
> +++ b/examples/l3fwd-acl/main.c

[snip]

> @@ -1833,12 +1832,12 @@ parse_args(int argc, char **argv)
>                                       print_usage(prgname);
>                                       return -1;
>                               }
> -                             port_conf.rxmode.max_rx_pkt_len = ret;
> +                             port_conf.rxmode.mtu = ret - (RTE_ETHER_HDR_LEN 
> +
> +                                     RTE_ETHER_CRC_LEN);
>                       }
> -                     printf("set jumbo frame max packet length "
> -                             "to %u\n",
> -                             (unsigned int)
> -                             port_conf.rxmode.max_rx_pkt_len);
> +                     printf("set jumbo frame max packet length to %u\n",
> +                             (unsigned int)port_conf.rxmode.mtu +
> +                             RTE_ETHER_HDR_LEN + RTE_ETHER_CRC_LEN);


I think that overhead should be obtainded from dev_info with
fallback to value used above.

There are many similar cases in other apps.

[snip]

> diff --git a/lib/ethdev/rte_ethdev.c b/lib/ethdev/rte_ethdev.c
> index c607eabb5b0c..3451125639f9 100644
> --- a/lib/ethdev/rte_ethdev.c
> +++ b/lib/ethdev/rte_ethdev.c
> @@ -1249,15 +1249,15 @@ rte_eth_dev_tx_offload_name(uint64_t offload)
>  
>  static inline int
>  eth_dev_check_lro_pkt_size(uint16_t port_id, uint32_t config_size,
> -                uint32_t max_rx_pkt_len, uint32_t dev_info_size)
> +                uint32_t max_rx_pktlen, uint32_t dev_info_size)
>  {
>       int ret = 0;
>  
>       if (dev_info_size == 0) {
> -             if (config_size != max_rx_pkt_len) {
> +             if (config_size != max_rx_pktlen) {
>                       RTE_ETHDEV_LOG(ERR, "Ethdev port_id=%d max_lro_pkt_size"
>                                      " %u != %u is not allowed\n",
> -                                    port_id, config_size, max_rx_pkt_len);
> +                                    port_id, config_size, max_rx_pktlen);

This patch looks a bit unrelated and make the long patch
even more longer. May be it is better to do the cleanup
first (before the patch).

>                       ret = -EINVAL;
>               }
>       } else if (config_size > dev_info_size) {
> @@ -1325,6 +1325,19 @@ eth_dev_validate_offloads(uint16_t port_id, uint64_t 
> req_offloads,
>       return ret;
>  }
>  
> +static uint16_t
> +eth_dev_get_overhead_len(uint32_t max_rx_pktlen, uint16_t max_mtu)
> +{
> +     uint16_t overhead_len;
> +
> +     if (max_mtu != UINT16_MAX && max_rx_pktlen > max_mtu)
> +             overhead_len = max_rx_pktlen - max_mtu;
> +     else
> +             overhead_len = RTE_ETHER_HDR_LEN + RTE_ETHER_CRC_LEN;
> +
> +     return overhead_len;
> +}
> +
>  int
>  rte_eth_dev_configure(uint16_t port_id, uint16_t nb_rx_q, uint16_t nb_tx_q,
>                     const struct rte_eth_conf *dev_conf)
> @@ -1332,6 +1345,7 @@ rte_eth_dev_configure(uint16_t port_id, uint16_t 
> nb_rx_q, uint16_t nb_tx_q,
>       struct rte_eth_dev *dev;
>       struct rte_eth_dev_info dev_info;
>       struct rte_eth_conf orig_conf;
> +     uint32_t max_rx_pktlen;
>       uint16_t overhead_len;
>       int diag;
>       int ret;
> @@ -1375,11 +1389,8 @@ rte_eth_dev_configure(uint16_t port_id, uint16_t 
> nb_rx_q, uint16_t nb_tx_q,
>               goto rollback;
>  
>       /* Get the real Ethernet overhead length */
> -     if (dev_info.max_mtu != UINT16_MAX &&
> -         dev_info.max_rx_pktlen > dev_info.max_mtu)
> -             overhead_len = dev_info.max_rx_pktlen - dev_info.max_mtu;
> -     else
> -             overhead_len = RTE_ETHER_HDR_LEN + RTE_ETHER_CRC_LEN;
> +     overhead_len = eth_dev_get_overhead_len(dev_info.max_rx_pktlen,
> +                     dev_info.max_mtu);
>  
>       /* If number of queues specified by application for both Rx and Tx is
>        * zero, use driver preferred values. This cannot be done individually
> @@ -1448,49 +1459,45 @@ rte_eth_dev_configure(uint16_t port_id, uint16_t 
> nb_rx_q, uint16_t nb_tx_q,
>       }
>  
>       /*
> -      * If jumbo frames are enabled, check that the maximum RX packet
> -      * length is supported by the configured device.
> +      * Check that the maximum RX packet length is supported by the
> +      * configured device.
>        */
> -     if (dev_conf->rxmode.offloads & DEV_RX_OFFLOAD_JUMBO_FRAME) {
> -             if (dev_conf->rxmode.max_rx_pkt_len > dev_info.max_rx_pktlen) {
> -                     RTE_ETHDEV_LOG(ERR,
> -                             "Ethdev port_id=%u max_rx_pkt_len %u > max 
> valid value %u\n",
> -                             port_id, dev_conf->rxmode.max_rx_pkt_len,
> -                             dev_info.max_rx_pktlen);
> -                     ret = -EINVAL;
> -                     goto rollback;
> -             } else if (dev_conf->rxmode.max_rx_pkt_len < RTE_ETHER_MIN_LEN) 
> {
> -                     RTE_ETHDEV_LOG(ERR,
> -                             "Ethdev port_id=%u max_rx_pkt_len %u < min 
> valid value %u\n",
> -                             port_id, dev_conf->rxmode.max_rx_pkt_len,
> -                             (unsigned int)RTE_ETHER_MIN_LEN);
> -                     ret = -EINVAL;
> -                     goto rollback;
> -             }
> +     if (dev_conf->rxmode.mtu == 0)
> +             dev->data->dev_conf.rxmode.mtu = RTE_ETHER_MTU;
> +     max_rx_pktlen = dev->data->dev_conf.rxmode.mtu + overhead_len;
> +     if (max_rx_pktlen > dev_info.max_rx_pktlen) {
> +             RTE_ETHDEV_LOG(ERR,
> +                     "Ethdev port_id=%u max_rx_pktlen %u > max valid value 
> %u\n",
> +                     port_id, max_rx_pktlen, dev_info.max_rx_pktlen);
> +             ret = -EINVAL;
> +             goto rollback;
> +     } else if (max_rx_pktlen < RTE_ETHER_MIN_LEN) {
> +             RTE_ETHDEV_LOG(ERR,
> +                     "Ethdev port_id=%u max_rx_pktlen %u < min valid value 
> %u\n",
> +                     port_id, max_rx_pktlen, RTE_ETHER_MIN_LEN);
> +             ret = -EINVAL;
> +             goto rollback;
> +     }
>  
> -             /* Scale the MTU size to adapt max_rx_pkt_len */
> -             dev->data->mtu = dev->data->dev_conf.rxmode.max_rx_pkt_len -
> -                             overhead_len;
> -     } else {
> -             uint16_t pktlen = dev_conf->rxmode.max_rx_pkt_len;
> -             if (pktlen < RTE_ETHER_MIN_MTU + overhead_len ||
> -                 pktlen > RTE_ETHER_MTU + overhead_len)
> +     if ((dev_conf->rxmode.offloads & DEV_RX_OFFLOAD_JUMBO_FRAME) == 0) {
> +             if (dev->data->dev_conf.rxmode.mtu < RTE_ETHER_MIN_MTU ||
> +                             dev->data->dev_conf.rxmode.mtu > RTE_ETHER_MTU)
>                       /* Use default value */
> -                     dev->data->dev_conf.rxmode.max_rx_pkt_len =
> -                                             RTE_ETHER_MTU + overhead_len;
> +                     dev->data->dev_conf.rxmode.mtu = RTE_ETHER_MTU;

I don't understand it. It would be good to add comments to
explain logic above.

>       }
>  
> +     dev->data->mtu = dev->data->dev_conf.rxmode.mtu;
> +
>       /*
>        * If LRO is enabled, check that the maximum aggregated packet
>        * size is supported by the configured device.
>        */
>       if (dev_conf->rxmode.offloads & DEV_RX_OFFLOAD_TCP_LRO) {
>               if (dev_conf->rxmode.max_lro_pkt_size == 0)
> -                     dev->data->dev_conf.rxmode.max_lro_pkt_size =
> -                             dev->data->dev_conf.rxmode.max_rx_pkt_len;
> +                     dev->data->dev_conf.rxmode.max_lro_pkt_size = 
> max_rx_pktlen;
>               ret = eth_dev_check_lro_pkt_size(port_id,
>                               dev->data->dev_conf.rxmode.max_lro_pkt_size,
> -                             dev->data->dev_conf.rxmode.max_rx_pkt_len,
> +                             max_rx_pktlen,
>                               dev_info.max_lro_pkt_size);
>               if (ret != 0)
>                       goto rollback;

[snip]

> diff --git a/lib/ethdev/rte_ethdev.h b/lib/ethdev/rte_ethdev.h
> index faf3bd901d75..9f288f98329c 100644
> --- a/lib/ethdev/rte_ethdev.h
> +++ b/lib/ethdev/rte_ethdev.h
> @@ -410,7 +410,7 @@ enum rte_eth_tx_mq_mode {
>  struct rte_eth_rxmode {
>       /** The multi-queue packet distribution mode to be used, e.g. RSS. */
>       enum rte_eth_rx_mq_mode mq_mode;
> -     uint32_t max_rx_pkt_len;  /**< Only used if JUMBO_FRAME enabled. */
> +     uint32_t mtu;  /**< Requested MTU. */

Maximum Transmit Unit looks a bit confusing in Rx mode
structure.

>       /** Maximum allowed size of LRO aggregated packet. */
>       uint32_t max_lro_pkt_size;
>       uint16_t split_hdr_size;  /**< hdr buf size (header_split enabled).*/

[snip]

Reply via email to