[PATCH net-next 8/8] net: ena: bug fix in lost tx packets detection mechanism
From: Netanel Belgazal check_for_missing_tx_completions() is called from a timer task and looking for lost tx packets. The old implementation accumulate all the lost tx packets and did not check if those packets were retrieved on a later stage. This cause to a situation where the driver reset the device for no reason. Signed-off-by: Netanel Belgazal --- drivers/net/ethernet/amazon/ena/ena_ethtool.c | 1 - drivers/net/ethernet/amazon/ena/ena_netdev.c | 66 +++ drivers/net/ethernet/amazon/ena/ena_netdev.h | 14 +- 3 files changed, 50 insertions(+), 31 deletions(-) diff --git a/drivers/net/ethernet/amazon/ena/ena_ethtool.c b/drivers/net/ethernet/amazon/ena/ena_ethtool.c index 533b2fb..3ee55e2 100644 --- a/drivers/net/ethernet/amazon/ena/ena_ethtool.c +++ b/drivers/net/ethernet/amazon/ena/ena_ethtool.c @@ -80,7 +80,6 @@ static const struct ena_stats ena_stats_tx_strings[] = { ENA_STAT_TX_ENTRY(tx_poll), ENA_STAT_TX_ENTRY(doorbells), ENA_STAT_TX_ENTRY(prepare_ctx_err), - ENA_STAT_TX_ENTRY(missing_tx_comp), ENA_STAT_TX_ENTRY(bad_req_id), }; diff --git a/drivers/net/ethernet/amazon/ena/ena_netdev.c b/drivers/net/ethernet/amazon/ena/ena_netdev.c index 3c366bf..4f16ed3 100644 --- a/drivers/net/ethernet/amazon/ena/ena_netdev.c +++ b/drivers/net/ethernet/amazon/ena/ena_netdev.c @@ -1995,6 +1995,7 @@ static netdev_tx_t ena_start_xmit(struct sk_buff *skb, struct net_device *dev) tx_info->tx_descs = nb_hw_desc; tx_info->last_jiffies = jiffies; + tx_info->print_once = 0; tx_ring->next_to_use = ENA_TX_RING_IDX_NEXT(next_to_use, tx_ring->ring_size); @@ -2564,13 +2565,44 @@ static void ena_fw_reset_device(struct work_struct *work) "Reset attempt failed. Can not reset the device\n"); } -static void check_for_missing_tx_completions(struct ena_adapter *adapter) +static int check_missing_comp_in_queue(struct ena_adapter *adapter, + struct ena_ring *tx_ring) { struct ena_tx_buffer *tx_buf; unsigned long last_jiffies; + u32 missed_tx = 0; + int i; + + for (i = 0; i < tx_ring->ring_size; i++) { + tx_buf = &tx_ring->tx_buffer_info[i]; + last_jiffies = tx_buf->last_jiffies; + if (unlikely(last_jiffies && +time_is_before_jiffies(last_jiffies + TX_TIMEOUT))) { + if (!tx_buf->print_once) + netif_notice(adapter, tx_err, adapter->netdev, +"Found a Tx that wasn't completed on time, qid %d, index %d.\n", +tx_ring->qid, i); + + tx_buf->print_once = 1; + missed_tx++; + + if (unlikely(missed_tx > MAX_NUM_OF_TIMEOUTED_PACKETS)) { + netif_err(adapter, tx_err, adapter->netdev, + "The number of lost tx completions is above the threshold (%d > %d). Reset the device\n", + missed_tx, MAX_NUM_OF_TIMEOUTED_PACKETS); + set_bit(ENA_FLAG_TRIGGER_RESET, &adapter->flags); + return -EIO; + } + } + } + + return 0; +} + +static void check_for_missing_tx_completions(struct ena_adapter *adapter) +{ struct ena_ring *tx_ring; - int i, j, budget; - u32 missed_tx; + int i, budget, rc; /* Make sure the driver doesn't turn the device in other process */ smp_rmb(); @@ -2586,31 +2618,9 @@ static void check_for_missing_tx_completions(struct ena_adapter *adapter) for (i = adapter->last_monitored_tx_qid; i < adapter->num_queues; i++) { tx_ring = &adapter->tx_ring[i]; - for (j = 0; j < tx_ring->ring_size; j++) { - tx_buf = &tx_ring->tx_buffer_info[j]; - last_jiffies = tx_buf->last_jiffies; - if (unlikely(last_jiffies && time_is_before_jiffies(last_jiffies + TX_TIMEOUT))) { - netif_notice(adapter, tx_err, adapter->netdev, -"Found a Tx that wasn't completed on time, qid %d, index %d.\n", -tx_ring->qid, j); - - u64_stats_update_begin(&tx_ring->syncp); - missed_tx = tx_ring->tx_stats.missing_tx_comp++; - u64_stats_update_end(&tx_ring->syncp); - - /* Clear last jiffies so the lost buffer won't -* be counted twice. -*/ - tx_buf->last_jiffies = 0; - - if (unlikely(m
[PATCH net-next 6/8] net: ena: fix theoretical Rx stuck on low memory systems
From: Netanel Belgazal For the rare case where the device runs out of free rx buffer descriptors (in case of pressure on kernel memory), and the napi handler continuously fail to refill new Rx descriptors until device rx queue totally runs out of all free rx buffers to post incoming packet, leading to a deadlock: * The device won't send interrupts since all the new Rx packets will be dropped. * The napi handler won't try to allocate new Rx descriptors since allocation is part of NAPI that's not being invoked any more The fix involves detecting this scenario and rescheduling NAPI (to refill buffers) by the keepalive/watchdog task. Signed-off-by: Netanel Belgazal --- drivers/net/ethernet/amazon/ena/ena_ethtool.c | 1 + drivers/net/ethernet/amazon/ena/ena_netdev.c | 55 +++ drivers/net/ethernet/amazon/ena/ena_netdev.h | 2 + 3 files changed, 58 insertions(+) diff --git a/drivers/net/ethernet/amazon/ena/ena_ethtool.c b/drivers/net/ethernet/amazon/ena/ena_ethtool.c index 67b2338f..533b2fb 100644 --- a/drivers/net/ethernet/amazon/ena/ena_ethtool.c +++ b/drivers/net/ethernet/amazon/ena/ena_ethtool.c @@ -94,6 +94,7 @@ static const struct ena_stats ena_stats_rx_strings[] = { ENA_STAT_RX_ENTRY(dma_mapping_err), ENA_STAT_RX_ENTRY(bad_desc_num), ENA_STAT_RX_ENTRY(rx_copybreak_pkt), + ENA_STAT_RX_ENTRY(empty_rx_ring), }; static const struct ena_stats ena_stats_ena_com_strings[] = { diff --git a/drivers/net/ethernet/amazon/ena/ena_netdev.c b/drivers/net/ethernet/amazon/ena/ena_netdev.c index 4e9fbdd..3c366bf 100644 --- a/drivers/net/ethernet/amazon/ena/ena_netdev.c +++ b/drivers/net/ethernet/amazon/ena/ena_netdev.c @@ -190,6 +190,7 @@ static void ena_init_io_rings(struct ena_adapter *adapter) rxr->sgl_size = adapter->max_rx_sgl_size; rxr->smoothed_interval = ena_com_get_nonadaptive_moderation_interval_rx(ena_dev); + rxr->empty_rx_queue = 0; } } @@ -2619,6 +2620,58 @@ static void check_for_missing_tx_completions(struct ena_adapter *adapter) adapter->last_monitored_tx_qid = i % adapter->num_queues; } +/* trigger napi schedule after 2 consecutive detections */ +#define EMPTY_RX_REFILL 2 +/* For the rare case where the device runs out of Rx descriptors and the + * napi handler failed to refill new Rx descriptors (due to a lack of memory + * for example). + * This case will lead to a deadlock: + * The device won't send interrupts since all the new Rx packets will be dropped + * The napi handler won't allocate new Rx descriptors so the device will be + * able to send new packets. + * + * This scenario can happen when the kernel's vm.min_free_kbytes is too small. + * It is recommended to have at least 512MB, with a minimum of 128MB for + * constrained environment). + * + * When such a situation is detected - Reschedule napi + */ +static void check_for_empty_rx_ring(struct ena_adapter *adapter) +{ + struct ena_ring *rx_ring; + int i, refill_required; + + if (!test_bit(ENA_FLAG_DEV_UP, &adapter->flags)) + return; + + if (test_bit(ENA_FLAG_TRIGGER_RESET, &adapter->flags)) + return; + + for (i = 0; i < adapter->num_queues; i++) { + rx_ring = &adapter->rx_ring[i]; + + refill_required = + ena_com_sq_empty_space(rx_ring->ena_com_io_sq); + if (unlikely(refill_required == (rx_ring->ring_size - 1))) { + rx_ring->empty_rx_queue++; + + if (rx_ring->empty_rx_queue >= EMPTY_RX_REFILL) { + u64_stats_update_begin(&rx_ring->syncp); + rx_ring->rx_stats.empty_rx_ring++; + u64_stats_update_end(&rx_ring->syncp); + + netif_err(adapter, drv, adapter->netdev, + "trigger refill for ring %d\n", i); + + napi_schedule(rx_ring->napi); + rx_ring->empty_rx_queue = 0; + } + } else { + rx_ring->empty_rx_queue = 0; + } + } +} + /* Check for keep alive expiration */ static void check_for_missing_keep_alive(struct ena_adapter *adapter) { @@ -2673,6 +2726,8 @@ static void ena_timer_service(unsigned long data) check_for_missing_tx_completions(adapter); + check_for_empty_rx_ring(adapter); + if (debug_area) ena_dump_stats_to_buf(adapter, debug_area); diff --git a/drivers/net/ethernet/amazon/ena/ena_netdev.h b/drivers/net/ethernet/amazon/ena/ena_netdev.h index 0e22bce..8828f1d 100644 --- a/drivers/net/ethernet/amazon/ena/ena_netdev.h +++ b/drivers/net/ethernet/amazon/ena/ena_netdev.h @@ -184,6 +184,7 @@ struct ena_stats_rx { u64 dma_mapping_err; u64 bad_desc_num; u64 rx_copybrea
[PATCH net-next 7/8] net: ena: disable admin msix while working in polling mode
From: Netanel Belgazal Signed-off-by: Netanel Belgazal --- drivers/net/ethernet/amazon/ena/ena_com.c | 8 1 file changed, 8 insertions(+) diff --git a/drivers/net/ethernet/amazon/ena/ena_com.c b/drivers/net/ethernet/amazon/ena/ena_com.c index ea60b9e..f5b237e 100644 --- a/drivers/net/ethernet/amazon/ena/ena_com.c +++ b/drivers/net/ethernet/amazon/ena/ena_com.c @@ -61,6 +61,8 @@ #define ENA_MMIO_READ_TIMEOUT 0x +#define ENA_REGS_ADMIN_INTR_MASK 1 + /*/ /*/ /*/ @@ -1454,6 +1456,12 @@ void ena_com_admin_destroy(struct ena_com_dev *ena_dev) void ena_com_set_admin_polling_mode(struct ena_com_dev *ena_dev, bool polling) { + u32 mask_value = 0; + + if (polling) + mask_value = ENA_REGS_ADMIN_INTR_MASK; + + writel(mask_value, ena_dev->reg_bar + ENA_REGS_INTR_MASK_OFF); ena_dev->admin_queue.polling = polling; } -- 2.7.4
[PATCH net-next 5/8] net: ena: add missing unmap bars on device removal
From: Netanel Belgazal This patch also change the mapping functions to devm_ functions Signed-off-by: Netanel Belgazal --- drivers/net/ethernet/amazon/ena/ena_netdev.c | 15 +++ 1 file changed, 11 insertions(+), 4 deletions(-) diff --git a/drivers/net/ethernet/amazon/ena/ena_netdev.c b/drivers/net/ethernet/amazon/ena/ena_netdev.c index 1e71e89..4e9fbdd 100644 --- a/drivers/net/ethernet/amazon/ena/ena_netdev.c +++ b/drivers/net/ethernet/amazon/ena/ena_netdev.c @@ -2853,6 +2853,11 @@ static void ena_release_bars(struct ena_com_dev *ena_dev, struct pci_dev *pdev) { int release_bars; + if (ena_dev->mem_bar) + devm_iounmap(&pdev->dev, ena_dev->mem_bar); + + devm_iounmap(&pdev->dev, ena_dev->reg_bar); + release_bars = pci_select_bars(pdev, IORESOURCE_MEM) & ENA_BAR_MASK; pci_release_selected_regions(pdev, release_bars); } @@ -2940,8 +2945,9 @@ static int ena_probe(struct pci_dev *pdev, const struct pci_device_id *ent) goto err_free_ena_dev; } - ena_dev->reg_bar = ioremap(pci_resource_start(pdev, ENA_REG_BAR), - pci_resource_len(pdev, ENA_REG_BAR)); + ena_dev->reg_bar = devm_ioremap(&pdev->dev, + pci_resource_start(pdev, ENA_REG_BAR), + pci_resource_len(pdev, ENA_REG_BAR)); if (!ena_dev->reg_bar) { dev_err(&pdev->dev, "failed to remap regs bar\n"); rc = -EFAULT; @@ -2961,8 +2967,9 @@ static int ena_probe(struct pci_dev *pdev, const struct pci_device_id *ent) ena_set_push_mode(pdev, ena_dev, &get_feat_ctx); if (ena_dev->tx_mem_queue_type == ENA_ADMIN_PLACEMENT_POLICY_DEV) { - ena_dev->mem_bar = ioremap_wc(pci_resource_start(pdev, ENA_MEM_BAR), - pci_resource_len(pdev, ENA_MEM_BAR)); + ena_dev->mem_bar = devm_ioremap_wc(&pdev->dev, + pci_resource_start(pdev, ENA_MEM_BAR), + pci_resource_len(pdev, ENA_MEM_BAR)); if (!ena_dev->mem_bar) { rc = -EFAULT; goto err_device_destroy; -- 2.7.4
[PATCH net-next 3/8] net: ena: add missing return when ena_com_get_io_handlers() fails
From: Netanel Belgazal Signed-off-by: Netanel Belgazal --- drivers/net/ethernet/amazon/ena/ena_netdev.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/drivers/net/ethernet/amazon/ena/ena_netdev.c b/drivers/net/ethernet/amazon/ena/ena_netdev.c index 0e3c60c7..1e71e89 100644 --- a/drivers/net/ethernet/amazon/ena/ena_netdev.c +++ b/drivers/net/ethernet/amazon/ena/ena_netdev.c @@ -1543,6 +1543,7 @@ static int ena_create_io_tx_queue(struct ena_adapter *adapter, int qid) "Failed to get TX queue handlers. TX queue num %d rc: %d\n", qid, rc); ena_com_destroy_io_queue(ena_dev, ena_qid); + return rc; } ena_com_update_numa_node(tx_ring->ena_com_io_cq, ctx.numa_node); @@ -1607,6 +1608,7 @@ static int ena_create_io_rx_queue(struct ena_adapter *adapter, int qid) "Failed to get RX queue handlers. RX queue num %d rc: %d\n", qid, rc); ena_com_destroy_io_queue(ena_dev, ena_qid); + return rc; } ena_com_update_numa_node(rx_ring->ena_com_io_cq, ctx.numa_node); -- 2.7.4
[PATCH net-next 6/8] net: ena: fix theoretical Rx hang on low memory systems
From: Netanel Belgazal For the rare case where the device runs out of free rx buffer descriptors (in case of pressure on kernel memory), and the napi handler continuously fail to refill new Rx descriptors until device rx queue totally runs out of all free rx buffers to post incoming packet, leading to a deadlock: * The device won't send interrupts since all the new Rx packets will be dropped. * The napi handler won't try to allocate new Rx descriptors since allocation is part of NAPI that's not being invoked any more The fix involves detecting this scenario and rescheduling NAPI (to refill buffers) by the keepalive/watchdog task. Signed-off-by: Netanel Belgazal --- drivers/net/ethernet/amazon/ena/ena_ethtool.c | 1 + drivers/net/ethernet/amazon/ena/ena_netdev.c | 55 +++ drivers/net/ethernet/amazon/ena/ena_netdev.h | 2 + 3 files changed, 58 insertions(+) diff --git a/drivers/net/ethernet/amazon/ena/ena_ethtool.c b/drivers/net/ethernet/amazon/ena/ena_ethtool.c index 67b2338f..533b2fb 100644 --- a/drivers/net/ethernet/amazon/ena/ena_ethtool.c +++ b/drivers/net/ethernet/amazon/ena/ena_ethtool.c @@ -94,6 +94,7 @@ static const struct ena_stats ena_stats_rx_strings[] = { ENA_STAT_RX_ENTRY(dma_mapping_err), ENA_STAT_RX_ENTRY(bad_desc_num), ENA_STAT_RX_ENTRY(rx_copybreak_pkt), + ENA_STAT_RX_ENTRY(empty_rx_ring), }; static const struct ena_stats ena_stats_ena_com_strings[] = { diff --git a/drivers/net/ethernet/amazon/ena/ena_netdev.c b/drivers/net/ethernet/amazon/ena/ena_netdev.c index 4e9fbdd..3c366bf 100644 --- a/drivers/net/ethernet/amazon/ena/ena_netdev.c +++ b/drivers/net/ethernet/amazon/ena/ena_netdev.c @@ -190,6 +190,7 @@ static void ena_init_io_rings(struct ena_adapter *adapter) rxr->sgl_size = adapter->max_rx_sgl_size; rxr->smoothed_interval = ena_com_get_nonadaptive_moderation_interval_rx(ena_dev); + rxr->empty_rx_queue = 0; } } @@ -2619,6 +2620,58 @@ static void check_for_missing_tx_completions(struct ena_adapter *adapter) adapter->last_monitored_tx_qid = i % adapter->num_queues; } +/* trigger napi schedule after 2 consecutive detections */ +#define EMPTY_RX_REFILL 2 +/* For the rare case where the device runs out of Rx descriptors and the + * napi handler failed to refill new Rx descriptors (due to a lack of memory + * for example). + * This case will lead to a deadlock: + * The device won't send interrupts since all the new Rx packets will be dropped + * The napi handler won't allocate new Rx descriptors so the device will be + * able to send new packets. + * + * This scenario can happen when the kernel's vm.min_free_kbytes is too small. + * It is recommended to have at least 512MB, with a minimum of 128MB for + * constrained environment). + * + * When such a situation is detected - Reschedule napi + */ +static void check_for_empty_rx_ring(struct ena_adapter *adapter) +{ + struct ena_ring *rx_ring; + int i, refill_required; + + if (!test_bit(ENA_FLAG_DEV_UP, &adapter->flags)) + return; + + if (test_bit(ENA_FLAG_TRIGGER_RESET, &adapter->flags)) + return; + + for (i = 0; i < adapter->num_queues; i++) { + rx_ring = &adapter->rx_ring[i]; + + refill_required = + ena_com_sq_empty_space(rx_ring->ena_com_io_sq); + if (unlikely(refill_required == (rx_ring->ring_size - 1))) { + rx_ring->empty_rx_queue++; + + if (rx_ring->empty_rx_queue >= EMPTY_RX_REFILL) { + u64_stats_update_begin(&rx_ring->syncp); + rx_ring->rx_stats.empty_rx_ring++; + u64_stats_update_end(&rx_ring->syncp); + + netif_err(adapter, drv, adapter->netdev, + "trigger refill for ring %d\n", i); + + napi_schedule(rx_ring->napi); + rx_ring->empty_rx_queue = 0; + } + } else { + rx_ring->empty_rx_queue = 0; + } + } +} + /* Check for keep alive expiration */ static void check_for_missing_keep_alive(struct ena_adapter *adapter) { @@ -2673,6 +2726,8 @@ static void ena_timer_service(unsigned long data) check_for_missing_tx_completions(adapter); + check_for_empty_rx_ring(adapter); + if (debug_area) ena_dump_stats_to_buf(adapter, debug_area); diff --git a/drivers/net/ethernet/amazon/ena/ena_netdev.h b/drivers/net/ethernet/amazon/ena/ena_netdev.h index 0e22bce..8828f1d 100644 --- a/drivers/net/ethernet/amazon/ena/ena_netdev.h +++ b/drivers/net/ethernet/amazon/ena/ena_netdev.h @@ -184,6 +184,7 @@ struct ena_stats_rx { u64 dma_mapping_err; u64 bad_desc_num; u64 rx_copybrea
[PATCH net-next 2/8] net: ena: fix bug that might cause hang after consecutive open/close interface.
From: Netanel Belgazal Fixing a bug that the driver does not unmask the IO interrupts in ndo_open(): occasionally, the MSI-X interrupt (for one or more IO queues) can be masked when ndo_close() was called. If that is followed by ndo open(), then the MSI-X will be still masked so no interrupt will be received by the driver. Signed-off-by: Netanel Belgazal --- drivers/net/ethernet/amazon/ena/ena_netdev.c | 41 ++-- 1 file changed, 26 insertions(+), 15 deletions(-) diff --git a/drivers/net/ethernet/amazon/ena/ena_netdev.c b/drivers/net/ethernet/amazon/ena/ena_netdev.c index 7c1214d..0e3c60c7 100644 --- a/drivers/net/ethernet/amazon/ena/ena_netdev.c +++ b/drivers/net/ethernet/amazon/ena/ena_netdev.c @@ -1078,6 +1078,26 @@ inline void ena_adjust_intr_moderation(struct ena_ring *rx_ring, rx_ring->per_napi_bytes = 0; } +static inline void ena_unmask_interrupt(struct ena_ring *tx_ring, + struct ena_ring *rx_ring) +{ + struct ena_eth_io_intr_reg intr_reg; + + /* Update intr register: rx intr delay, +* tx intr delay and interrupt unmask +*/ + ena_com_update_intr_reg(&intr_reg, + rx_ring->smoothed_interval, + tx_ring->smoothed_interval, + true); + + /* It is a shared MSI-X. +* Tx and Rx CQ have pointer to it. +* So we use one of them to reach the intr reg +*/ + ena_com_unmask_intr(rx_ring->ena_com_io_cq, &intr_reg); +} + static inline void ena_update_ring_numa_node(struct ena_ring *tx_ring, struct ena_ring *rx_ring) { @@ -1108,7 +1128,6 @@ static int ena_io_poll(struct napi_struct *napi, int budget) { struct ena_napi *ena_napi = container_of(napi, struct ena_napi, napi); struct ena_ring *tx_ring, *rx_ring; - struct ena_eth_io_intr_reg intr_reg; u32 tx_work_done; u32 rx_work_done; @@ -1149,22 +1168,9 @@ static int ena_io_poll(struct napi_struct *napi, int budget) if (ena_com_get_adaptive_moderation_enabled(rx_ring->ena_dev)) ena_adjust_intr_moderation(rx_ring, tx_ring); - /* Update intr register: rx intr delay, -* tx intr delay and interrupt unmask -*/ - ena_com_update_intr_reg(&intr_reg, - rx_ring->smoothed_interval, - tx_ring->smoothed_interval, - true); - - /* It is a shared MSI-X. -* Tx and Rx CQ have pointer to it. -* So we use one of them to reach the intr reg -*/ - ena_com_unmask_intr(rx_ring->ena_com_io_cq, &intr_reg); + ena_unmask_interrupt(tx_ring, rx_ring); } - ena_update_ring_numa_node(tx_ring, rx_ring); ret = rx_work_done; @@ -1485,6 +1491,11 @@ static int ena_up_complete(struct ena_adapter *adapter) ena_napi_enable_all(adapter); + /* Enable completion queues interrupt */ + for (i = 0; i < adapter->num_queues; i++) + ena_unmask_interrupt(&adapter->tx_ring[i], +&adapter->rx_ring[i]); + /* schedule napi in case we had pending packets * from the last time we disable napi */ -- 2.7.4
[PATCH net-next 1/8] net: ena: fix rare uncompleted admin command false alarm
From: Netanel Belgazal The current flow to detect admin completion is: while (command_not_completed) { if (timeout) error check_for_completion() sleep() } So in case the sleep took more than the timeout (in case the thread/workqueue was not scheduled due to higher priority task or prolonged VMexit), the driver can detect a stall even if the completion is present. The fix changes the order of this function to first check for completion and only after that check if the timeout expired. Signed-off-by: Netanel Belgazal --- drivers/net/ethernet/amazon/ena/ena_com.c | 21 +++-- 1 file changed, 11 insertions(+), 10 deletions(-) diff --git a/drivers/net/ethernet/amazon/ena/ena_com.c b/drivers/net/ethernet/amazon/ena/ena_com.c index 08d11ce..e1c2fab 100644 --- a/drivers/net/ethernet/amazon/ena/ena_com.c +++ b/drivers/net/ethernet/amazon/ena/ena_com.c @@ -508,15 +508,20 @@ static int ena_com_comp_status_to_errno(u8 comp_status) static int ena_com_wait_and_process_admin_cq_polling(struct ena_comp_ctx *comp_ctx, struct ena_com_admin_queue *admin_queue) { - unsigned long flags; - u32 start_time; + unsigned long flags, timeout; int ret; - start_time = ((u32)jiffies_to_usecs(jiffies)); + timeout = jiffies + ADMIN_CMD_TIMEOUT_US; + + while (1) { + spin_lock_irqsave(&admin_queue->q_lock, flags); + ena_com_handle_admin_completion(admin_queue); + spin_unlock_irqrestore(&admin_queue->q_lock, flags); - while (comp_ctx->status == ENA_CMD_SUBMITTED) { - if u32)jiffies_to_usecs(jiffies)) - start_time) > - ADMIN_CMD_TIMEOUT_US) { + if (comp_ctx->status != ENA_CMD_SUBMITTED) + break; + + if (time_is_before_jiffies(timeout)) { pr_err("Wait for completion (polling) timeout\n"); /* ENA didn't have any completion */ spin_lock_irqsave(&admin_queue->q_lock, flags); @@ -528,10 +533,6 @@ static int ena_com_wait_and_process_admin_cq_polling(struct ena_comp_ctx *comp_c goto err; } - spin_lock_irqsave(&admin_queue->q_lock, flags); - ena_com_handle_admin_completion(admin_queue); - spin_unlock_irqrestore(&admin_queue->q_lock, flags); - msleep(100); } -- 2.7.4
[PATCH net-next 0/8] Bug fixes in ena ethernet driver
From: Netanel Belgazal This patchset contains fixes for the bugs that were discovered so far. Netanel Belgazal (8): net: ena: fix rare uncompleted admin command false alarm net: ena: fix bug that might cause hang after consecutive open/close interface. net: ena: add missing return when ena_com_get_io_handlers() fails net: ena: fix race condition between submit and completion admin command net: ena: add missing unmap bars on device removal net: ena: fix theoretical Rx hang on low memory systems net: ena: disable admin msix while working in polling mode net: ena: bug fix in lost tx packets detection mechanism drivers/net/ethernet/amazon/ena/ena_com.c | 35 +++-- drivers/net/ethernet/amazon/ena/ena_ethtool.c | 2 +- drivers/net/ethernet/amazon/ena/ena_netdev.c | 179 +++--- drivers/net/ethernet/amazon/ena/ena_netdev.h | 16 ++- 4 files changed, 168 insertions(+), 64 deletions(-) -- 2.7.4
[PATCH net-next 4/8] net: ena: fix race condition between submit and completion admin command
From: Netanel Belgazal Bug: "Completion context is occupied" error printout will be noticed in dmesg. This error will cause the admin command to fail, which will lead to an ena_probe() failure or a watchdog reset (depends on which admin command failed). Root cause: __ena_com_submit_admin_cmd() is the function that submits new entries to the admin queue. The function have a check that makes sure the queue is not full and the function does not override any outstanding command. It uses head and tail indexes for this check. The head is increased by ena_com_handle_admin_completion() which runs from interrupt context, and the tail index is increased by the submit function (the function is running under ->q_lock, so there is no risk of multithread increment). Each command is associated with a completion context. This context allocated before call to __ena_com_submit_admin_cmd() and freed by ena_com_wait_and_process_admin_cq_interrupts(), right after the command was completed. This can lead to a state where the head was increased, the check passed, but the completion context is still in use. Solution: Use the atomic variable ->outstanding_cmds instead of using the head and the tail indexes. This variable is safe for use since it is bumped in get_comp_ctx() in __ena_com_submit_admin_cmd() and is freed by comp_ctxt_release() Signed-off-by: Netanel Belgazal --- drivers/net/ethernet/amazon/ena/ena_com.c | 6 ++ 1 file changed, 2 insertions(+), 4 deletions(-) diff --git a/drivers/net/ethernet/amazon/ena/ena_com.c b/drivers/net/ethernet/amazon/ena/ena_com.c index e1c2fab..ea60b9e 100644 --- a/drivers/net/ethernet/amazon/ena/ena_com.c +++ b/drivers/net/ethernet/amazon/ena/ena_com.c @@ -232,11 +232,9 @@ static struct ena_comp_ctx *__ena_com_submit_admin_cmd(struct ena_com_admin_queu tail_masked = admin_queue->sq.tail & queue_size_mask; /* In case of queue FULL */ - cnt = admin_queue->sq.tail - admin_queue->sq.head; + cnt = atomic_read(&admin_queue->outstanding_cmds); if (cnt >= admin_queue->q_depth) { - pr_debug("admin queue is FULL (tail %d head %d depth: %d)\n", -admin_queue->sq.tail, admin_queue->sq.head, -admin_queue->q_depth); + pr_debug("admin queue is full.\n"); admin_queue->stats.out_of_space++; return ERR_PTR(-ENOSPC); } -- 2.7.4
Re: [PATCH v2 7/8] net: mvmdio: add xmdio support
Hi Andrew, On Thu, Jun 08, 2017 at 06:03:31PM +0200, Andrew Lunn wrote: > On Thu, Jun 08, 2017 at 11:26:52AM +0200, Antoine Tenart wrote: > > +#define MVMDIO_XSMI_MGNT_REG 0x0 > > +#define MVMDIO_XSMI_READ_VALIDBIT(29) > > +#define MVMDIO_XSMI_BUSY BIT(30) > > +#define MVMDIO_XSMI_ADDR_REG 0x8 > > +#define MVMDIO_XSMI_PHYADDR_SHIFT 16 > > +#define MVMDIO_XSMI_DEVADDR_SHIFT 21 > > +#define MVMDIO_XSMI_READ_OPERATION(0x7 << 26) > > +#define MVMDIO_XSMI_WRITE_OPERATION (0x5 << 27) > > These two operations seem odd. Generally ops have the same shift. Indeed, this is odd. I'll have a look at this. Thanks, Antoine -- Antoine Ténart, Free Electrons Embedded Linux and Kernel engineering http://free-electrons.com signature.asc Description: PGP signature
Re: [PATCH v2 7/8] net: mvmdio: add xmdio support
Hello Florian, Andrew, On Thu, Jun 08, 2017 at 06:55:46PM +0200, Andrew Lunn wrote: > On Thu, Jun 08, 2017 at 09:42:21AM -0700, Florian Fainelli wrote: > > On 06/08/2017 02:26 AM, Antoine Tenart wrote: > > > This patch adds the xMDIO interface support in the mvmdio driver. This > > > interface is used in Ethernet controllers on Marvell 370, 7k and 8k (as > > > of now). The xSMI interface supported by this driver complies with the > > > IEEE 802.3 clause 45 (while the SMI interface complies with the clause > > > 22). The xSMI interface is used by 10GbE devices. > > > > In the previous version you were properly defining a new compatibles > > strings for xmdio, but now you don't and instead you runtime select the > > operations based on whether MII_ADDR_C45 is set in the register which is > > fine from a functional perspective. > > > > If I get this right, the xMDIO controller is actually a superset of the > > MDIO controller and has an extra MVMDIO_XSMI_ADDR_REG register to > > preform C45 accesses? > > > > If that is the case (and looking at patch 8 that seems to be the case), > > you probably still need to define a new compatible string for that > > block, because it has a different register layout than its predecessor. > > Yes, i think you need the compatible string to return -EOPNOSUP when > somebody tries to do a C45 access on the older IP which only has C22. That's a very good point. I'll update the series to fix this. Thanks! Antoine -- Antoine Ténart, Free Electrons Embedded Linux and Kernel engineering http://free-electrons.com signature.asc Description: PGP signature
Re: [PATCH net] ipv4: igmp: fix a use after free
On Fri, Jun 9, 2017 at 2:05 PM, Cong Wang wrote: > On Thu, Jun 8, 2017 at 6:37 PM, Eric Dumazet wrote: >> On Thu, 2017-06-08 at 17:59 -0700, Cong Wang wrote: >>> On Thu, Jun 8, 2017 at 1:33 PM, Eric Dumazet wrote: >>> > I mentioned (in https://lkml.org/lkml/2017/5/31/619 ) that we might need >>> > to defer freeing after rcu grace period but for some reason decided it >>> > was not needed. >>> >>> This one makes sense, it is the second time I saw the use-after-free >>> in igmp code, both are because we don't respect the RCU rule to free >>> an element in the list. >>> >>> > >>> > What about : >>> >>> But not sure if all ip_ma_put() callers want ip_mc_clear_src(). >> >> That would lead to a memory leak if this was the case ? > > Maybe, but looking at igmpv3_clear_delrec() again, seems > we can just acquire pmc->lock in ip_mc_clear_src() to serialize > with the readers? just refreshed the page and saw your reply, the same way as I just replied
Re: [PATCH net] ipv4: igmp: fix a use after free
On Fri, Jun 9, 2017 at 8:59 AM, Cong Wang wrote: > On Thu, Jun 8, 2017 at 1:33 PM, Eric Dumazet wrote: >> I mentioned (in https://lkml.org/lkml/2017/5/31/619 ) that we might need >> to defer freeing after rcu grace period but for some reason decided it >> was not needed. Yes, this one could fix it. > > This one makes sense, it is the second time I saw the use-after-free > in igmp code, both are because we don't respect the RCU rule to free > an element in the list. > >> >> What about : > > But not sure if all ip_ma_put() callers want ip_mc_clear_src(). If that's problem, there may be another way: leave ip_mc_clear_src as it is, just add pmc->lock to protect this call. this use-after-free was actually caused by using pmc->sources/tomb in add_grec while ip_mc_clear_src is freeing them. add_grec is already under pmc->lock, so to add pmc->lock for ip_mc_clear_src should be enough to protect the list pmc->sources/tomb. wdyt ?
ixgbe tx hang with XDP_TX beyond queue limit
Hi, I am doing some XDP testing on a dual socket, combined 40 core machine with ixgbe. I have found that with the default settings, depending on which core a packet is received on, the xdp tx queue will hang with: ixgbe :01:00.0 eno1: Detected Tx Unit Hang (XDP) Tx Queue <38> TDH, TDT <0>, <8> next_to_use <8> next_to_clean<0> tx_buffer_info[next_to_clean] time_stamp <0> jiffies <101f21bb8> ixgbe :01:00.0 eno1: tx hang 1 detected on queue 38, resetting adapter ixgbe :01:00.0 eno1: initiating reset due to tx timeout ixgbe :01:00.0 eno1: Reset adapter When the received core is such that the xdp queue falls beyond MAX_TX_QUEUES, then the hang results. In other words, if I leave `ethtool -L eno1 combined 40` (the default), and a packet is received on core 24 or greater, it hangs. However, if I lower the tx queue count to 24 (since XDP is forced to nr_cpu_ids), or if I force the incoming packets onto core < 24 with an ntuple filter, then no hang occurs. I imagine that some limits on the number of queues is in order here, or some error reporting when loading the xdp program/allocating queues. For now I am working around by lowering the rx queue count to leave space for the xdp queues. Thanks, Brenden
Re: [PATCH net] ipv4: igmp: fix a use after free
On Thu, Jun 8, 2017 at 6:37 PM, Eric Dumazet wrote: > On Thu, 2017-06-08 at 17:59 -0700, Cong Wang wrote: >> On Thu, Jun 8, 2017 at 1:33 PM, Eric Dumazet wrote: >> > I mentioned (in https://lkml.org/lkml/2017/5/31/619 ) that we might need >> > to defer freeing after rcu grace period but for some reason decided it >> > was not needed. >> >> This one makes sense, it is the second time I saw the use-after-free >> in igmp code, both are because we don't respect the RCU rule to free >> an element in the list. >> >> > >> > What about : >> >> But not sure if all ip_ma_put() callers want ip_mc_clear_src(). > > That would lead to a memory leak if this was the case ? Maybe, but looking at igmpv3_clear_delrec() again, seems we can just acquire pmc->lock in ip_mc_clear_src() to serialize with the readers?
Re: Repeatable inet6_dump_fib crash in stock 4.12.0-rc4+
On Thu, Jun 8, 2017 at 2:27 PM, Ben Greear wrote: > > As far as I can tell, the patch did not help, or at least we still reproduce > the > crash easily. netlink dump is serialized by nlk->cb_mutex so I don't think that patch makes any sense w.r.t race condition. > (gdb) l *(fib6_walk_continue+0x76) > 0x188c6 is in fib6_walk_continue > (/home/greearb/git/linux-2.6/net/ipv6/ip6_fib.c:1593). > 1588if (fn == w->root) > 1589return 0; > 1590pn = fn->parent; > 1591w->node = pn; > 1592#ifdef CONFIG_IPV6_SUBTREES > 1593if (FIB6_SUBTREE(pn) == fn) { Apparently fn->parent is NULL here for some reason, but I don't know if that is expected or not. If a simple NULL check is not enough here, we have to trace why it is NULL. > 1594WARN_ON(!(fn->fn_flags & RTN_ROOT)); > 1595w->state = FWS_L; > 1596continue; > 1597} > (gdb) l *(inet6_dump_fib+0x1ab) > 0x1939b is in inet6_dump_fib > (/home/greearb/git/linux-2.6/net/ipv6/ip6_fib.c:392). > 387 w->skip = w->count; > 388 } else > 389 w->skip = 0; > 390 > 391 res = fib6_walk_continue(w); > 392 read_unlock_bh(&table->tb6_lock); > 393 if (res <= 0) { > 394 fib6_walker_unlink(net, w); > 395 cb->args[4] = 0; > 396 } > (gdb) Thanks.
[PATCH net-next v2] cxgb4: handle interrupt raised when FW crashes
From: Rahul Lakkireddy Handle TIMER0INT when FW crashes. Check for PCIE_FW[FW_EVAL] and if it says "Device FW Crashed", then treat it as fatal. Else, non-fatal. Signed-off-by: Rahul Lakkireddy Signed-off-by: Ganesh Goudar --- v2: Following the reverse chirstmas tree variable ordering --- drivers/net/ethernet/chelsio/cxgb4/t4_hw.c| 19 ++- drivers/net/ethernet/chelsio/cxgb4/t4_regs.h | 4 drivers/net/ethernet/chelsio/cxgb4/t4fw_api.h | 4 3 files changed, 26 insertions(+), 1 deletion(-) diff --git a/drivers/net/ethernet/chelsio/cxgb4/t4_hw.c b/drivers/net/ethernet/chelsio/cxgb4/t4_hw.c index da1322d..16af646 100644 --- a/drivers/net/ethernet/chelsio/cxgb4/t4_hw.c +++ b/drivers/net/ethernet/chelsio/cxgb4/t4_hw.c @@ -4040,6 +4040,7 @@ static void cim_intr_handler(struct adapter *adapter) { MBHOSTPARERR_F, "CIM mailbox host parity error", -1, 1 }, { TIEQINPARERRINT_F, "CIM TIEQ outgoing parity error", -1, 1 }, { TIEQOUTPARERRINT_F, "CIM TIEQ incoming parity error", -1, 1 }, + { TIMER0INT_F, "CIM TIMER0 interrupt", -1, 1 }, { 0 } }; static const struct intr_info cim_upintr_info[] = { @@ -4074,11 +4075,27 @@ static void cim_intr_handler(struct adapter *adapter) { 0 } }; + u32 val, fw_err; int fat; - if (t4_read_reg(adapter, PCIE_FW_A) & PCIE_FW_ERR_F) + fw_err = t4_read_reg(adapter, PCIE_FW_A); + if (fw_err & PCIE_FW_ERR_F) t4_report_fw_error(adapter); + /* When the Firmware detects an internal error which normally +* wouldn't raise a Host Interrupt, it forces a CIM Timer0 interrupt +* in order to make sure the Host sees the Firmware Crash. So +* if we have a Timer0 interrupt and don't see a Firmware Crash, +* ignore the Timer0 interrupt. +*/ + + val = t4_read_reg(adapter, CIM_HOST_INT_CAUSE_A); + if (val & TIMER0INT_F) + if (!(fw_err & PCIE_FW_ERR_F) || + (PCIE_FW_EVAL_G(fw_err) != PCIE_FW_EVAL_CRASH)) + t4_write_reg(adapter, CIM_HOST_INT_CAUSE_A, +TIMER0INT_F); + fat = t4_handle_intr_status(adapter, CIM_HOST_INT_CAUSE_A, cim_intr_info) + t4_handle_intr_status(adapter, CIM_HOST_UPACC_INT_CAUSE_A, diff --git a/drivers/net/ethernet/chelsio/cxgb4/t4_regs.h b/drivers/net/ethernet/chelsio/cxgb4/t4_regs.h index 3348d33..3884336 100644 --- a/drivers/net/ethernet/chelsio/cxgb4/t4_regs.h +++ b/drivers/net/ethernet/chelsio/cxgb4/t4_regs.h @@ -1077,6 +1077,10 @@ #define TIEQINPARERRINT_V(x) ((x) << TIEQINPARERRINT_S) #define TIEQINPARERRINT_FTIEQINPARERRINT_V(1U) +#define TIMER0INT_S2 +#define TIMER0INT_V(x) ((x) << TIMER0INT_S) +#define TIMER0INT_FTIMER0INT_V(1U) + #define PREFDROPINT_S1 #define PREFDROPINT_V(x) ((x) << PREFDROPINT_S) #define PREFDROPINT_FPREFDROPINT_V(1U) diff --git a/drivers/net/ethernet/chelsio/cxgb4/t4fw_api.h b/drivers/net/ethernet/chelsio/cxgb4/t4fw_api.h index c65c33c..f47461a 100644 --- a/drivers/net/ethernet/chelsio/cxgb4/t4fw_api.h +++ b/drivers/net/ethernet/chelsio/cxgb4/t4fw_api.h @@ -3088,6 +3088,10 @@ struct fw_debug_cmd { #define FW_DEBUG_CMD_TYPE_G(x) \ (((x) >> FW_DEBUG_CMD_TYPE_S) & FW_DEBUG_CMD_TYPE_M) +enum pcie_fw_eval { + PCIE_FW_EVAL_CRASH = 0, +}; + #define PCIE_FW_ERR_S 31 #define PCIE_FW_ERR_V(x) ((x) << PCIE_FW_ERR_S) #define PCIE_FW_ERR_F PCIE_FW_ERR_V(1U) -- 2.1.0
[PATCH net-next 1/2] bpf: Fix test_bpf_obj_id() when the bpf_jit_enable sysctl is diabled
test_bpf_obj_id() should not expect a non zero jited_prog_len to be returned by bpf_obj_get_info_by_fd() when net.core.bpf_jit_enable is 0. The patch checks for net.core.bpf_jit_enable and has different expectation on jited_prog_len. This patch also removes the pwd.h header which I forgot to remove after making changes. Fixes: 95b9afd3987f ("bpf: Test for bpf ID") Reported-by: Yonghong Song Signed-off-by: Martin KaFai Lau --- tools/testing/selftests/bpf/test_progs.c | 19 +++ 1 file changed, 15 insertions(+), 4 deletions(-) diff --git a/tools/testing/selftests/bpf/test_progs.c b/tools/testing/selftests/bpf/test_progs.c index 8189bfc7e277..fec13ab84fca 100644 --- a/tools/testing/selftests/bpf/test_progs.c +++ b/tools/testing/selftests/bpf/test_progs.c @@ -23,7 +23,7 @@ typedef __u16 __sum16; #include #include #include -#include +#include #include #include @@ -297,6 +297,7 @@ static void test_bpf_obj_id(void) const __u32 array_key = 0; const int nr_iters = 2; const char *file = "./test_obj_id.o"; + const char *jit_sysctl = "/proc/sys/net/core/bpf_jit_enable"; struct bpf_object *objs[nr_iters]; int prog_fds[nr_iters], map_fds[nr_iters]; @@ -305,9 +306,18 @@ static void test_bpf_obj_id(void) struct bpf_map_info map_infos[nr_iters + 1]; char jited_insns[128], xlated_insns[128]; __u32 i, next_id, info_len, nr_id_found, duration = 0; - int err = 0; + int sysctl_fd, jit_enabled = 0, err = 0; __u64 array_value; + sysctl_fd = open(jit_sysctl, 0, O_RDONLY); + if (sysctl_fd != -1) { + char tmpc; + + if (read(sysctl_fd, &tmpc, sizeof(tmpc)) == 1) + jit_enabled = (tmpc != '0'); + close(sysctl_fd); + } + err = bpf_prog_get_fd_by_id(0); CHECK(err >= 0 || errno != ENOENT, "get-fd-by-notexist-prog-id", "err %d errno %d\n", err, errno); @@ -339,13 +349,14 @@ static void test_bpf_obj_id(void) if (CHECK(err || prog_infos[i].type != BPF_PROG_TYPE_SOCKET_FILTER || info_len != sizeof(struct bpf_prog_info) || - !prog_infos[i].jited_prog_len || + (jit_enabled && !prog_infos[i].jited_prog_len) || !prog_infos[i].xlated_prog_len, "get-prog-info(fd)", - "err %d errno %d i %d type %d(%d) info_len %u(%lu) jited_prog_len %u xlated_prog_len %u\n", + "err %d errno %d i %d type %d(%d) info_len %u(%lu) jit_enabled %d jited_prog_len %u xlated_prog_len %u\n", err, errno, i, prog_infos[i].type, BPF_PROG_TYPE_SOCKET_FILTER, info_len, sizeof(struct bpf_prog_info), + jit_enabled, prog_infos[i].jited_prog_len, prog_infos[i].xlated_prog_len)) goto done; -- 2.9.3
[PATCH net-next 2/2] bpf: Fix test_obj_id.c for llvm 5.0
llvm 5.0 does not like the section name and the function name to be the same: clang -I. -I./include/uapi -I../../../include/uapi \ -I../../../../samples/bpf/ \ -Wno-compare-distinct-pointer-types \ -O2 -target bpf -c \ linux/tools/testing/selftests/bpf/test_obj_id.c -o \ linux/tools/testing/selftests/bpf/test_obj_id.o fatal error: error in backend: 'test_prog_id' label emitted multiple times to assembly file clang-5.0: error: clang frontend command failed with exit code 70 (use -v to see invocation) clang version 5.0.0 (trunk 304326) (llvm/trunk 304329) This patch makes changes to the section name and the function name. Fixes: 95b9afd3987f ("bpf: Test for bpf ID") Reported-by: Alexei Starovoitov Reported-by: Yonghong Song Signed-off-by: Martin KaFai Lau --- tools/testing/selftests/bpf/test_obj_id.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/tools/testing/selftests/bpf/test_obj_id.c b/tools/testing/selftests/bpf/test_obj_id.c index d8723aaf827a..880d2963b472 100644 --- a/tools/testing/selftests/bpf/test_obj_id.c +++ b/tools/testing/selftests/bpf/test_obj_id.c @@ -23,8 +23,8 @@ struct bpf_map_def SEC("maps") test_map_id = { .max_entries = 1, }; -SEC("test_prog_id") -int test_prog_id(struct __sk_buff *skb) +SEC("test_obj_id_dummy") +int test_obj_id(struct __sk_buff *skb) { __u32 key = 0; __u64 *value; -- 2.9.3
[PATCH] netfilter: ctnetlink: move CTA_TIMEOUT case to outside
When cda[CTA_TIMEOUT] is zero, ctnetlink_new_conntrack will free allocated ct and return, so move it to outside to optimize this situation. Signed-off-by: Haishuang Yan --- net/netfilter/nf_conntrack_netlink.c | 5 + 1 file changed, 1 insertion(+), 4 deletions(-) diff --git a/net/netfilter/nf_conntrack_netlink.c b/net/netfilter/nf_conntrack_netlink.c index a8be9b7..d1e6b1c 100644 --- a/net/netfilter/nf_conntrack_netlink.c +++ b/net/netfilter/nf_conntrack_netlink.c @@ -1768,9 +1768,6 @@ static int change_seq_adj(struct nf_ct_seqadj *seq, if (IS_ERR(ct)) return ERR_PTR(-ENOMEM); - if (!cda[CTA_TIMEOUT]) - goto err1; - ct->timeout = nfct_time_stamp + ntohl(nla_get_be32(cda[CTA_TIMEOUT])) * HZ; rcu_read_lock(); @@ -1944,7 +1941,7 @@ static int ctnetlink_new_conntrack(struct net *net, struct sock *ctnl, if (nlh->nlmsg_flags & NLM_F_CREATE) { enum ip_conntrack_events events; - if (!cda[CTA_TUPLE_ORIG] || !cda[CTA_TUPLE_REPLY]) + if (!cda[CTA_TUPLE_ORIG] || !cda[CTA_TUPLE_REPLY] || !cda[CTA_TIMEOUT]) return -EINVAL; if (otuple.dst.protonum != rtuple.dst.protonum) return -EINVAL; -- 1.8.3.1
[PATCH net-next 2/5] nfp: remove automatic caching of RTsym table
The fact that RTsym table is cached inside nfp_cpp handle is a relic of old times when nfpcore was a library module. All the nfp_cpp "caches" are awkward to deal with because of concurrency and prone to keeping stale information. Make the run time symbol table be an object read out from the device and managed by whoever requested it. Since the driver loads FW at ->probe() and never reloads, we can hold onto the table for ever. Signed-off-by: Jakub Kicinski --- drivers/net/ethernet/netronome/nfp/nfp_main.c | 6 +- drivers/net/ethernet/netronome/nfp/nfp_main.h | 3 + drivers/net/ethernet/netronome/nfp/nfp_net_main.c | 4 +- .../net/ethernet/netronome/nfp/nfpcore/nfp_cpp.h | 4 - .../ethernet/netronome/nfp/nfpcore/nfp_cppcore.c | 26 - .../net/ethernet/netronome/nfp/nfpcore/nfp_nffw.h | 13 ++- .../net/ethernet/netronome/nfp/nfpcore/nfp_nsp.c | 8 +- .../net/ethernet/netronome/nfp/nfpcore/nfp_rtsym.c | 111 - 8 files changed, 63 insertions(+), 112 deletions(-) diff --git a/drivers/net/ethernet/netronome/nfp/nfp_main.c b/drivers/net/ethernet/netronome/nfp/nfp_main.c index 0c2e64d217b5..51fe8de34b67 100644 --- a/drivers/net/ethernet/netronome/nfp/nfp_main.c +++ b/drivers/net/ethernet/netronome/nfp/nfp_main.c @@ -77,7 +77,7 @@ static int nfp_pcie_sriov_read_nfd_limit(struct nfp_pf *pf) { int err; - pf->limit_vfs = nfp_rtsym_read_le(pf->cpp, "nfd_vf_cfg_max_vfs", &err); + pf->limit_vfs = nfp_rtsym_read_le(pf->rtbl, "nfd_vf_cfg_max_vfs", &err); if (!err) return pci_sriov_set_totalvfs(pf->pdev, pf->limit_vfs); @@ -373,6 +373,8 @@ static int nfp_pci_probe(struct pci_dev *pdev, if (err) goto err_devlink_unreg; + pf->rtbl = nfp_rtsym_table_read(pf->cpp); + err = nfp_pcie_sriov_read_nfd_limit(pf); if (err) goto err_fw_unload; @@ -394,6 +396,7 @@ static int nfp_pci_probe(struct pci_dev *pdev, err_sriov_unlimit: pci_sriov_set_totalvfs(pf->pdev, 0); err_fw_unload: + kfree(pf->rtbl); if (pf->fw_loaded) nfp_fw_unload(pf); kfree(pf->eth_tbl); @@ -430,6 +433,7 @@ static void nfp_pci_remove(struct pci_dev *pdev) devlink_unregister(devlink); + kfree(pf->rtbl); if (pf->fw_loaded) nfp_fw_unload(pf); diff --git a/drivers/net/ethernet/netronome/nfp/nfp_main.h b/drivers/net/ethernet/netronome/nfp/nfp_main.h index 37832853b0b3..907852f00423 100644 --- a/drivers/net/ethernet/netronome/nfp/nfp_main.h +++ b/drivers/net/ethernet/netronome/nfp/nfp_main.h @@ -56,6 +56,7 @@ struct nfp_cpp_area; struct nfp_eth_table; struct nfp_net; struct nfp_nsp_identify; +struct nfp_rtsym_table; /** * struct nfp_pf - NFP PF-specific device structure @@ -70,6 +71,7 @@ struct nfp_nsp_identify; * @num_vfs: Number of SR-IOV VFs enabled * @fw_loaded: Is the firmware loaded? * @ctrl_vnic: Pointer to the control vNIC if available + * @rtbl: RTsym table * @eth_tbl: NSP ETH table * @nspi: NSP identification info * @hwmon_dev: pointer to hwmon device @@ -101,6 +103,7 @@ struct nfp_pf { struct nfp_net *ctrl_vnic; + struct nfp_rtsym_table *rtbl; struct nfp_eth_table *eth_tbl; struct nfp_nsp_identify *nspi; diff --git a/drivers/net/ethernet/netronome/nfp/nfp_net_main.c b/drivers/net/ethernet/netronome/nfp/nfp_net_main.c index cdd25dc5988d..c845049fcff2 100644 --- a/drivers/net/ethernet/netronome/nfp/nfp_net_main.c +++ b/drivers/net/ethernet/netronome/nfp/nfp_net_main.c @@ -201,7 +201,7 @@ nfp_net_pf_rtsym_read_optional(struct nfp_pf *pf, const char *format, snprintf(name, sizeof(name), format, nfp_cppcore_pcie_unit(pf->cpp)); - val = nfp_rtsym_read_le(pf->cpp, name, &err); + val = nfp_rtsym_read_le(pf->rtbl, name, &err); if (err) { if (err == -ENOENT) return default_val; @@ -234,7 +234,7 @@ nfp_net_pf_map_rtsym(struct nfp_pf *pf, const char *name, const char *sym_fmt, snprintf(pf_symbol, sizeof(pf_symbol), sym_fmt, nfp_cppcore_pcie_unit(pf->cpp)); - sym = nfp_rtsym_lookup(pf->cpp, pf_symbol); + sym = nfp_rtsym_lookup(pf->rtbl, pf_symbol); if (!sym) { nfp_err(pf->cpp, "Failed to find PF symbol %s\n", pf_symbol); return (u8 __iomem *)ERR_PTR(-ENOENT); diff --git a/drivers/net/ethernet/netronome/nfp/nfpcore/nfp_cpp.h b/drivers/net/ethernet/netronome/nfp/nfpcore/nfp_cpp.h index 0a46c0984e68..e3a2201eb658 100644 --- a/drivers/net/ethernet/netronome/nfp/nfpcore/nfp_cpp.h +++ b/drivers/net/ethernet/netronome/nfp/nfpcore/nfp_cpp.h @@ -224,10 +224,6 @@ int nfp_cpp_serial(struct nfp_cpp *cpp, const u8 **serial); void *nfp_hwinfo_cache(struct nfp_cpp *cpp); void nfp_hwinfo_cache_set(struct nfp_cpp *cpp, void *val); -void *nfp_rtsym_cach
[PATCH net-next 1/5] nfp: make sure to cancel port refresh on the error path
If very last stages of netdev registering and init fail some other netdevs and devlink ports may have been visible to user space before we torn them back down. In this case there is a slight chance user may have triggered port refresh. We need to make sure the async work is cancelled. We have to cancel after releasing pf->lock, so we will always try to cancel, regardless of which part of probe has failed. Signed-off-by: Jakub Kicinski --- drivers/net/ethernet/netronome/nfp/nfp_net_main.c | 1 + 1 file changed, 1 insertion(+) diff --git a/drivers/net/ethernet/netronome/nfp/nfp_net_main.c b/drivers/net/ethernet/netronome/nfp/nfp_net_main.c index 5f27703060c2..cdd25dc5988d 100644 --- a/drivers/net/ethernet/netronome/nfp/nfp_net_main.c +++ b/drivers/net/ethernet/netronome/nfp/nfp_net_main.c @@ -813,6 +813,7 @@ int nfp_net_pci_probe(struct nfp_pf *pf) nfp_cpp_area_release_free(pf->data_vnic_bar); err_unlock: mutex_unlock(&pf->lock); + cancel_work_sync(&pf->port_refresh_work); return err; } -- 2.11.0
[PATCH net-next 4/5] nfp: keep MIP object around
Microcode Information Page contains some useful information, like application firmware build name. Keep it around, similar to RTSym and HWInfo. Signed-off-by: Jakub Kicinski --- drivers/net/ethernet/netronome/nfp/nfp_main.c | 5 - drivers/net/ethernet/netronome/nfp/nfp_main.h | 3 +++ drivers/net/ethernet/netronome/nfp/nfpcore/nfp_nffw.h | 2 ++ drivers/net/ethernet/netronome/nfp/nfpcore/nfp_rtsym.c | 16 +--- 4 files changed, 22 insertions(+), 4 deletions(-) diff --git a/drivers/net/ethernet/netronome/nfp/nfp_main.c b/drivers/net/ethernet/netronome/nfp/nfp_main.c index 94211e245257..4e59dcb78c36 100644 --- a/drivers/net/ethernet/netronome/nfp/nfp_main.c +++ b/drivers/net/ethernet/netronome/nfp/nfp_main.c @@ -375,7 +375,8 @@ static int nfp_pci_probe(struct pci_dev *pdev, if (err) goto err_devlink_unreg; - pf->rtbl = nfp_rtsym_table_read(pf->cpp); + pf->mip = nfp_mip_open(pf->cpp); + pf->rtbl = __nfp_rtsym_table_read(pf->cpp, pf->mip); err = nfp_pcie_sriov_read_nfd_limit(pf); if (err) @@ -399,6 +400,7 @@ static int nfp_pci_probe(struct pci_dev *pdev, pci_sriov_set_totalvfs(pf->pdev, 0); err_fw_unload: kfree(pf->rtbl); + nfp_mip_close(pf->mip); if (pf->fw_loaded) nfp_fw_unload(pf); kfree(pf->eth_tbl); @@ -437,6 +439,7 @@ static void nfp_pci_remove(struct pci_dev *pdev) devlink_unregister(devlink); kfree(pf->rtbl); + nfp_mip_close(pf->mip); if (pf->fw_loaded) nfp_fw_unload(pf); diff --git a/drivers/net/ethernet/netronome/nfp/nfp_main.h b/drivers/net/ethernet/netronome/nfp/nfp_main.h index 041643807f7e..88724f8d0dcd 100644 --- a/drivers/net/ethernet/netronome/nfp/nfp_main.h +++ b/drivers/net/ethernet/netronome/nfp/nfp_main.h @@ -55,6 +55,7 @@ struct nfp_cpp; struct nfp_cpp_area; struct nfp_eth_table; struct nfp_hwinfo; +struct nfp_mip; struct nfp_net; struct nfp_nsp_identify; struct nfp_rtsym_table; @@ -72,6 +73,7 @@ struct nfp_rtsym_table; * @num_vfs: Number of SR-IOV VFs enabled * @fw_loaded: Is the firmware loaded? * @ctrl_vnic: Pointer to the control vNIC if available + * @mip: MIP handle * @rtbl: RTsym table * @hwinfo:HWInfo table * @eth_tbl: NSP ETH table @@ -105,6 +107,7 @@ struct nfp_pf { struct nfp_net *ctrl_vnic; + const struct nfp_mip *mip; struct nfp_rtsym_table *rtbl; struct nfp_hwinfo *hwinfo; struct nfp_eth_table *eth_tbl; diff --git a/drivers/net/ethernet/netronome/nfp/nfpcore/nfp_nffw.h b/drivers/net/ethernet/netronome/nfp/nfpcore/nfp_nffw.h index f845cf5dd762..c7266baec0eb 100644 --- a/drivers/net/ethernet/netronome/nfp/nfpcore/nfp_nffw.h +++ b/drivers/net/ethernet/netronome/nfp/nfpcore/nfp_nffw.h @@ -90,6 +90,8 @@ struct nfp_rtsym { struct nfp_rtsym_table; struct nfp_rtsym_table *nfp_rtsym_table_read(struct nfp_cpp *cpp); +struct nfp_rtsym_table * +__nfp_rtsym_table_read(struct nfp_cpp *cpp, const struct nfp_mip *mip); int nfp_rtsym_count(struct nfp_rtsym_table *rtbl); const struct nfp_rtsym *nfp_rtsym_get(struct nfp_rtsym_table *rtbl, int idx); const struct nfp_rtsym * diff --git a/drivers/net/ethernet/netronome/nfp/nfpcore/nfp_rtsym.c b/drivers/net/ethernet/netronome/nfp/nfpcore/nfp_rtsym.c index ef3566163cb0..203f9cbae0fb 100644 --- a/drivers/net/ethernet/netronome/nfp/nfpcore/nfp_rtsym.c +++ b/drivers/net/ethernet/netronome/nfp/nfpcore/nfp_rtsym.c @@ -109,21 +109,31 @@ nfp_rtsym_sw_entry_init(struct nfp_rtsym_table *cache, u32 strtab_size, struct nfp_rtsym_table *nfp_rtsym_table_read(struct nfp_cpp *cpp) { + struct nfp_rtsym_table *rtbl; + const struct nfp_mip *mip; + + mip = nfp_mip_open(cpp); + rtbl = __nfp_rtsym_table_read(cpp, mip); + nfp_mip_close(mip); + + return rtbl; +} + +struct nfp_rtsym_table * +__nfp_rtsym_table_read(struct nfp_cpp *cpp, const struct nfp_mip *mip) +{ const u32 dram = NFP_CPP_ID(NFP_CPP_TARGET_MU, NFP_CPP_ACTION_RW, 0) | NFP_ISL_EMEM0; u32 strtab_addr, symtab_addr, strtab_size, symtab_size; struct nfp_rtsym_entry *rtsymtab; struct nfp_rtsym_table *cache; - const struct nfp_mip *mip; int err, n, size; - mip = nfp_mip_open(cpp); if (!mip) return NULL; nfp_mip_strtab(mip, &strtab_addr, &strtab_size); nfp_mip_symtab(mip, &symtab_addr, &symtab_size); - nfp_mip_close(mip); if (!symtab_size || !strtab_size || symtab_size % sizeof(*rtsymtab)) return NULL; -- 2.11.0
[PATCH net-next 0/5] nfp: FW app build name reporting
Hi! This series adds reporting FW build name in ethtool -i. Most of the patches are restructuring where information caching is done. There is also a minor error path fix. These are last few patches finishing the basic nfp_app support. Jakub Kicinski (5): nfp: make sure to cancel port refresh on the error path nfp: remove automatic caching of RTsym table nfp: remove automatic caching of HWInfo nfp: keep MIP object around nfp: report application FW build name in ethtool -i drivers/net/ethernet/netronome/nfp/nfp_app.c | 8 ++ drivers/net/ethernet/netronome/nfp/nfp_app.h | 1 + drivers/net/ethernet/netronome/nfp/nfp_app_nic.c | 2 +- drivers/net/ethernet/netronome/nfp/nfp_main.c | 29 +++-- drivers/net/ethernet/netronome/nfp/nfp_main.h | 11 +- .../net/ethernet/netronome/nfp/nfp_net_ethtool.c | 4 +- drivers/net/ethernet/netronome/nfp/nfp_net_main.c | 17 +-- drivers/net/ethernet/netronome/nfp/nfpcore/nfp.h | 4 +- .../net/ethernet/netronome/nfp/nfpcore/nfp_cpp.h | 7 -- .../ethernet/netronome/nfp/nfpcore/nfp_cppcore.c | 43 --- .../ethernet/netronome/nfp/nfpcore/nfp_hwinfo.c| 70 +--- .../net/ethernet/netronome/nfp/nfpcore/nfp_mip.c | 7 ++ .../net/ethernet/netronome/nfp/nfpcore/nfp_nffw.h | 16 ++- .../net/ethernet/netronome/nfp/nfpcore/nfp_nsp.c | 8 +- .../net/ethernet/netronome/nfp/nfpcore/nfp_rtsym.c | 127 + 15 files changed, 158 insertions(+), 196 deletions(-) -- 2.11.0
[PATCH net-next 5/5] nfp: report application FW build name in ethtool -i
Make sure application FW build name is NULL-terminated and print it as a part of ethtool's firmware version string. Signed-off-by: Jakub Kicinski --- drivers/net/ethernet/netronome/nfp/nfp_app.c | 8 drivers/net/ethernet/netronome/nfp/nfp_app.h | 1 + drivers/net/ethernet/netronome/nfp/nfp_net_ethtool.c | 4 ++-- drivers/net/ethernet/netronome/nfp/nfpcore/nfp_mip.c | 7 +++ drivers/net/ethernet/netronome/nfp/nfpcore/nfp_nffw.h | 1 + 5 files changed, 19 insertions(+), 2 deletions(-) diff --git a/drivers/net/ethernet/netronome/nfp/nfp_app.c b/drivers/net/ethernet/netronome/nfp/nfp_app.c index de07517da1bd..396b93f54823 100644 --- a/drivers/net/ethernet/netronome/nfp/nfp_app.c +++ b/drivers/net/ethernet/netronome/nfp/nfp_app.c @@ -35,6 +35,7 @@ #include #include "nfpcore/nfp_cpp.h" +#include "nfpcore/nfp_nffw.h" #include "nfp_app.h" #include "nfp_main.h" @@ -43,6 +44,13 @@ static const struct nfp_app_type *apps[] = { &app_bpf, }; +const char *nfp_app_mip_name(struct nfp_app *app) +{ + if (!app || !app->pf->mip) + return ""; + return nfp_mip_name(app->pf->mip); +} + struct sk_buff *nfp_app_ctrl_msg_alloc(struct nfp_app *app, unsigned int size) { struct sk_buff *skb; diff --git a/drivers/net/ethernet/netronome/nfp/nfp_app.h b/drivers/net/ethernet/netronome/nfp/nfp_app.h index 3fbf68f8577c..f5e373fa8c3b 100644 --- a/drivers/net/ethernet/netronome/nfp/nfp_app.h +++ b/drivers/net/ethernet/netronome/nfp/nfp_app.h @@ -216,6 +216,7 @@ static inline void nfp_app_ctrl_rx(struct nfp_app *app, struct sk_buff *skb) app->type->ctrl_msg_rx(app, skb); } +const char *nfp_app_mip_name(struct nfp_app *app); struct sk_buff *nfp_app_ctrl_msg_alloc(struct nfp_app *app, unsigned int size); struct nfp_app *nfp_app_alloc(struct nfp_pf *pf, enum nfp_app_id id); diff --git a/drivers/net/ethernet/netronome/nfp/nfp_net_ethtool.c b/drivers/net/ethernet/netronome/nfp/nfp_net_ethtool.c index 83664ca25213..6e31355c3567 100644 --- a/drivers/net/ethernet/netronome/nfp/nfp_net_ethtool.c +++ b/drivers/net/ethernet/netronome/nfp/nfp_net_ethtool.c @@ -166,10 +166,10 @@ static void nfp_net_get_drvinfo(struct net_device *netdev, nfp_net_get_nspinfo(nn->app, nsp_version); snprintf(drvinfo->fw_version, sizeof(drvinfo->fw_version), -"%d.%d.%d.%d %s %s", +"%d.%d.%d.%d %s %s %s", nn->fw_ver.resv, nn->fw_ver.class, nn->fw_ver.major, nn->fw_ver.minor, nsp_version, -nfp_app_name(nn->app)); +nfp_app_mip_name(nn->app), nfp_app_name(nn->app)); strlcpy(drvinfo->bus_info, pci_name(nn->pdev), sizeof(drvinfo->bus_info)); diff --git a/drivers/net/ethernet/netronome/nfp/nfpcore/nfp_mip.c b/drivers/net/ethernet/netronome/nfp/nfpcore/nfp_mip.c index 3d15dd03647e..5f193fe2d69e 100644 --- a/drivers/net/ethernet/netronome/nfp/nfpcore/nfp_mip.c +++ b/drivers/net/ethernet/netronome/nfp/nfpcore/nfp_mip.c @@ -141,6 +141,8 @@ const struct nfp_mip *nfp_mip_open(struct nfp_cpp *cpp) return NULL; } + mip->name[sizeof(mip->name) - 1] = 0; + return mip; } @@ -149,6 +151,11 @@ void nfp_mip_close(const struct nfp_mip *mip) kfree(mip); } +const char *nfp_mip_name(const struct nfp_mip *mip) +{ + return mip->name; +} + /** * nfp_mip_symtab() - Get the address and size of the MIP symbol table * @mip: MIP handle diff --git a/drivers/net/ethernet/netronome/nfp/nfpcore/nfp_nffw.h b/drivers/net/ethernet/netronome/nfp/nfpcore/nfp_nffw.h index c7266baec0eb..d27d29782a12 100644 --- a/drivers/net/ethernet/netronome/nfp/nfpcore/nfp_nffw.h +++ b/drivers/net/ethernet/netronome/nfp/nfpcore/nfp_nffw.h @@ -55,6 +55,7 @@ struct nfp_mip; const struct nfp_mip *nfp_mip_open(struct nfp_cpp *cpp); void nfp_mip_close(const struct nfp_mip *mip); +const char *nfp_mip_name(const struct nfp_mip *mip); void nfp_mip_symtab(const struct nfp_mip *mip, u32 *addr, u32 *size); void nfp_mip_strtab(const struct nfp_mip *mip, u32 *addr, u32 *size); -- 2.11.0
[PATCH net-next 3/5] nfp: remove automatic caching of HWInfo
Make callers take care of managing life time of HWInfo. Signed-off-by: Jakub Kicinski --- drivers/net/ethernet/netronome/nfp/nfp_app_nic.c | 2 +- drivers/net/ethernet/netronome/nfp/nfp_main.c | 20 --- drivers/net/ethernet/netronome/nfp/nfp_main.h | 5 +- drivers/net/ethernet/netronome/nfp/nfp_net_main.c | 12 ++-- drivers/net/ethernet/netronome/nfp/nfpcore/nfp.h | 4 +- .../net/ethernet/netronome/nfp/nfpcore/nfp_cpp.h | 3 - .../ethernet/netronome/nfp/nfpcore/nfp_cppcore.c | 17 -- .../ethernet/netronome/nfp/nfpcore/nfp_hwinfo.c| 70 +- 8 files changed, 54 insertions(+), 79 deletions(-) diff --git a/drivers/net/ethernet/netronome/nfp/nfp_app_nic.c b/drivers/net/ethernet/netronome/nfp/nfp_app_nic.c index 1a33ad9f4170..83c65e6291ee 100644 --- a/drivers/net/ethernet/netronome/nfp/nfp_app_nic.c +++ b/drivers/net/ethernet/netronome/nfp/nfp_app_nic.c @@ -80,7 +80,7 @@ int nfp_app_nic_vnic_init(struct nfp_app *app, struct nfp_net *nn, if (err) return err < 0 ? err : 0; - nfp_net_get_mac_addr(nn, app->cpp, id); + nfp_net_get_mac_addr(app->pf, nn, id); return 0; } diff --git a/drivers/net/ethernet/netronome/nfp/nfp_main.c b/drivers/net/ethernet/netronome/nfp/nfp_main.c index 51fe8de34b67..94211e245257 100644 --- a/drivers/net/ethernet/netronome/nfp/nfp_main.c +++ b/drivers/net/ethernet/netronome/nfp/nfp_main.c @@ -170,7 +170,7 @@ nfp_net_fw_find(struct pci_dev *pdev, struct nfp_pf *pf) return NULL; } - fw_model = nfp_hwinfo_lookup(pf->cpp, "assembly.partno"); + fw_model = nfp_hwinfo_lookup(pf->hwinfo, "assembly.partno"); if (!fw_model) { dev_err(&pdev->dev, "Error: can't read part number\n"); return NULL; @@ -358,16 +358,18 @@ static int nfp_pci_probe(struct pci_dev *pdev, goto err_disable_msix; } + pf->hwinfo = nfp_hwinfo_read(pf->cpp); + dev_info(&pdev->dev, "Assembly: %s%s%s-%s CPLD: %s\n", -nfp_hwinfo_lookup(pf->cpp, "assembly.vendor"), -nfp_hwinfo_lookup(pf->cpp, "assembly.partno"), -nfp_hwinfo_lookup(pf->cpp, "assembly.serial"), -nfp_hwinfo_lookup(pf->cpp, "assembly.revision"), -nfp_hwinfo_lookup(pf->cpp, "cpld.version")); +nfp_hwinfo_lookup(pf->hwinfo, "assembly.vendor"), +nfp_hwinfo_lookup(pf->hwinfo, "assembly.partno"), +nfp_hwinfo_lookup(pf->hwinfo, "assembly.serial"), +nfp_hwinfo_lookup(pf->hwinfo, "assembly.revision"), +nfp_hwinfo_lookup(pf->hwinfo, "cpld.version")); err = devlink_register(devlink, &pdev->dev); if (err) - goto err_cpp_free; + goto err_hwinfo_free; err = nfp_nsp_init(pdev, pf); if (err) @@ -403,7 +405,8 @@ static int nfp_pci_probe(struct pci_dev *pdev, kfree(pf->nspi); err_devlink_unreg: devlink_unregister(devlink); -err_cpp_free: +err_hwinfo_free: + kfree(pf->hwinfo); nfp_cpp_free(pf->cpp); err_disable_msix: pci_set_drvdata(pdev, NULL); @@ -438,6 +441,7 @@ static void nfp_pci_remove(struct pci_dev *pdev) nfp_fw_unload(pf); pci_set_drvdata(pdev, NULL); + kfree(pf->hwinfo); nfp_cpp_free(pf->cpp); kfree(pf->eth_tbl); diff --git a/drivers/net/ethernet/netronome/nfp/nfp_main.h b/drivers/net/ethernet/netronome/nfp/nfp_main.h index 907852f00423..041643807f7e 100644 --- a/drivers/net/ethernet/netronome/nfp/nfp_main.h +++ b/drivers/net/ethernet/netronome/nfp/nfp_main.h @@ -54,6 +54,7 @@ struct pci_dev; struct nfp_cpp; struct nfp_cpp_area; struct nfp_eth_table; +struct nfp_hwinfo; struct nfp_net; struct nfp_nsp_identify; struct nfp_rtsym_table; @@ -72,6 +73,7 @@ struct nfp_rtsym_table; * @fw_loaded: Is the firmware loaded? * @ctrl_vnic: Pointer to the control vNIC if available * @rtbl: RTsym table + * @hwinfo:HWInfo table * @eth_tbl: NSP ETH table * @nspi: NSP identification info * @hwmon_dev: pointer to hwmon device @@ -104,6 +106,7 @@ struct nfp_pf { struct nfp_net *ctrl_vnic; struct nfp_rtsym_table *rtbl; + struct nfp_hwinfo *hwinfo; struct nfp_eth_table *eth_tbl; struct nfp_nsp_identify *nspi; @@ -133,7 +136,7 @@ void nfp_hwmon_unregister(struct nfp_pf *pf); struct nfp_eth_table_port * nfp_net_find_port(struct nfp_eth_table *eth_tbl, unsigned int id); void -nfp_net_get_mac_addr(struct nfp_net *nn, struct nfp_cpp *cpp, unsigned int id); +nfp_net_get_mac_addr(struct nfp_pf *pf, struct nfp_net *nn, unsigned int id); bool nfp_ctrl_tx(struct nfp_net *nn, struct sk_buff *skb); diff --git a/drivers/net/ethernet/netronome/nfp/nfp_net_main.c b/drivers/net/ethernet/netronome/nfp/nfp_net_main.c index c845049fcff2..bc2bc0886176 10
[PATCH] wireless: wlcore: spi: remove unnecessary variable
Remove unnecessary variable and refactor the code. Addresses-Coverity-ID: 1365000 Signed-off-by: Gustavo A. R. Silva --- drivers/net/wireless/ti/wlcore/spi.c | 7 ++- 1 file changed, 2 insertions(+), 5 deletions(-) diff --git a/drivers/net/wireless/ti/wlcore/spi.c b/drivers/net/wireless/ti/wlcore/spi.c index fa3547e..d2d899a 100644 --- a/drivers/net/wireless/ti/wlcore/spi.c +++ b/drivers/net/wireless/ti/wlcore/spi.c @@ -366,17 +366,14 @@ static int __wl12xx_spi_raw_write(struct device *child, int addr, static int __must_check wl12xx_spi_raw_write(struct device *child, int addr, void *buf, size_t len, bool fixed) { - int ret; - /* The ELP wakeup write may fail the first time due to internal * hardware latency. It is safer to send the wakeup command twice to * avoid unexpected failures. */ if (addr == HW_ACCESS_ELP_CTRL_REG) - ret = __wl12xx_spi_raw_write(child, addr, buf, len, fixed); - ret = __wl12xx_spi_raw_write(child, addr, buf, len, fixed); + __wl12xx_spi_raw_write(child, addr, buf, len, fixed); - return ret; + return __wl12xx_spi_raw_write(child, addr, buf, len, fixed); } /** -- 2.5.0
[PATCH net-next] liquidio: disallow enabling firmware debug from a VF
From: Derek Chickles Disallow enabling firmware debug from a VF. Only PF is allowed to do that. Signed-off-by: Derek Chickles Signed-off-by: Felix Manlunas --- drivers/net/ethernet/cavium/liquidio/lio_ethtool.c | 9 - drivers/net/ethernet/cavium/liquidio/lio_vf_main.c | 4 2 files changed, 8 insertions(+), 5 deletions(-) diff --git a/drivers/net/ethernet/cavium/liquidio/lio_ethtool.c b/drivers/net/ethernet/cavium/liquidio/lio_ethtool.c index 579dc73..9a520a7 100644 --- a/drivers/net/ethernet/cavium/liquidio/lio_ethtool.c +++ b/drivers/net/ethernet/cavium/liquidio/lio_ethtool.c @@ -700,6 +700,13 @@ static void lio_set_msglevel(struct net_device *netdev, u32 msglvl) lio->msg_enable = msglvl; } +static void lio_vf_set_msglevel(struct net_device *netdev, u32 msglvl) +{ + struct lio *lio = GET_LIO(netdev); + + lio->msg_enable = msglvl; +} + static void lio_get_pauseparam(struct net_device *netdev, struct ethtool_pauseparam *pause) { @@ -2611,7 +2618,7 @@ static const struct ethtool_ops lio_vf_ethtool_ops = { .get_regs_len = lio_get_regs_len, .get_regs = lio_get_regs, .get_msglevel = lio_get_msglevel, - .set_msglevel = lio_set_msglevel, + .set_msglevel = lio_vf_set_msglevel, .get_sset_count = lio_vf_get_sset_count, .get_coalesce = lio_get_intr_coalesce, .set_coalesce = lio_set_intr_coalesce, diff --git a/drivers/net/ethernet/cavium/liquidio/lio_vf_main.c b/drivers/net/ethernet/cavium/liquidio/lio_vf_main.c index 31d737c..ad7ea6d 100644 --- a/drivers/net/ethernet/cavium/liquidio/lio_vf_main.c +++ b/drivers/net/ethernet/cavium/liquidio/lio_vf_main.c @@ -2997,10 +2997,6 @@ static int setup_nic_devices(struct octeon_device *octeon_dev) liquidio_set_feature(netdev, OCTNET_CMD_LRO_ENABLE, OCTNIC_LROIPV4 | OCTNIC_LROIPV6); - if ((debug != -1) && (debug & NETIF_MSG_HW)) - liquidio_set_feature(netdev, OCTNET_CMD_VERBOSE_ENABLE, -0); - if (setup_link_status_change_wq(netdev)) goto setup_nic_dev_fail;
Re: [PATCH net] ipv4: igmp: fix a use after free
On Thu, 2017-06-08 at 17:59 -0700, Cong Wang wrote: > On Thu, Jun 8, 2017 at 1:33 PM, Eric Dumazet wrote: > > I mentioned (in https://lkml.org/lkml/2017/5/31/619 ) that we might need > > to defer freeing after rcu grace period but for some reason decided it > > was not needed. > > This one makes sense, it is the second time I saw the use-after-free > in igmp code, both are because we don't respect the RCU rule to free > an element in the list. > > > > > What about : > > But not sure if all ip_ma_put() callers want ip_mc_clear_src(). That would lead to a memory leak if this was the case ?
Re: [PATCH net] ipv4: igmp: fix a use after free
On Thu, Jun 8, 2017 at 1:33 PM, Eric Dumazet wrote: > I mentioned (in https://lkml.org/lkml/2017/5/31/619 ) that we might need > to defer freeing after rcu grace period but for some reason decided it > was not needed. This one makes sense, it is the second time I saw the use-after-free in igmp code, both are because we don't respect the RCU rule to free an element in the list. > > What about : But not sure if all ip_ma_put() callers want ip_mc_clear_src().
Re: [PATCH v3 2/2] ip6_tunnel: fix potential issue in __ip6_tnl_rcv
> On 8 Jun 2017, at 9:59 PM, David Miller wrote: > > From: 严海双 > Date: Thu, 8 Jun 2017 15:33:58 +0800 > >>> On 8 Jun 2017, at 1:00 PM, Alexei Starovoitov >>> wrote: >>> >>> On Thu, Jun 08, 2017 at 12:56:58PM +0800, 严海双 wrote: > On 8 Jun 2017, at 12:38 PM, Alexei Starovoitov > wrote: > > On Thu, Jun 08, 2017 at 12:32:44PM +0800, Haishuang Yan wrote: >> When __ip6_tnl_rcv fails, the tun_dst won't be freed, so call >> dst_release to free it in error code path. >> >> CC: Alexei Starovoitov >> Fixes: 8d79266bc48c ("ip6_tunnel: add collect_md mode to IPv6 tunnels") >> Signed-off-by: Haishuang Yan > > I don't get it. Why did you send another version of the patch? > What was wrong with previous approach that myself and Eric acked? > > Sorry for your confusing, because Pravin Shelar give a feedback in ipv4 patch, see below: >>> >>> hmm. right. >>> Then it raises the question: How did you test this and previous patch? >>> >>> since previous version was sort-of fixing the bug, but completely >>> breaking the logic... >>> >>> >> >> Sorry for my previous fault, I tried to fix this problem in theory without >> testing carefully. >> I have tested the latest patches, it works ok now. > > This does not instill a lot of confidence in us. > > I want someone else to test these patches, then you can resubmit them > with proper Tested-by: tags added, since you thought it was OK to submit > a patch without testing in the first place. Ok, thanks.
[PATCH net-next v2] geneve: add missing rx stats accounting
There are few places on the receive path where packet drops and packet errors were not accounted for. This patch fixes that issue. Signed-off-by: Girish Moodalbail --- v0 -> v1: -modified to use canonical post-increment "x++" --- drivers/net/geneve.c | 36 1 file changed, 24 insertions(+), 12 deletions(-) diff --git a/drivers/net/geneve.c b/drivers/net/geneve.c index 6ebb0f5..ff626db 100644 --- a/drivers/net/geneve.c +++ b/drivers/net/geneve.c @@ -212,6 +212,7 @@ static void geneve_rx(struct geneve_dev *geneve, struct geneve_sock *gs, struct genevehdr *gnvh = geneve_hdr(skb); struct metadata_dst *tun_dst = NULL; struct pcpu_sw_netstats *stats; + unsigned int len; int err = 0; void *oiph; @@ -225,8 +226,10 @@ static void geneve_rx(struct geneve_dev *geneve, struct geneve_sock *gs, tun_dst = udp_tun_rx_dst(skb, geneve_get_sk_family(gs), flags, vni_to_tunnel_id(gnvh->vni), gnvh->opt_len * 4); - if (!tun_dst) + if (!tun_dst) { + geneve->dev->stats.rx_dropped++; goto drop; + } /* Update tunnel dst according to Geneve options. */ ip_tunnel_info_opts_set(&tun_dst->u.tun_info, gnvh->options, gnvh->opt_len * 4); @@ -234,8 +237,11 @@ static void geneve_rx(struct geneve_dev *geneve, struct geneve_sock *gs, /* Drop packets w/ critical options, * since we don't support any... */ - if (gnvh->critical) + if (gnvh->critical) { + geneve->dev->stats.rx_frame_errors++; + geneve->dev->stats.rx_errors++; goto drop; + } } skb_reset_mac_header(skb); @@ -246,8 +252,10 @@ static void geneve_rx(struct geneve_dev *geneve, struct geneve_sock *gs, skb_dst_set(skb, &tun_dst->dst); /* Ignore packet loops (and multicast echo) */ - if (ether_addr_equal(eth_hdr(skb)->h_source, geneve->dev->dev_addr)) + if (ether_addr_equal(eth_hdr(skb)->h_source, geneve->dev->dev_addr)) { + geneve->dev->stats.rx_errors++; goto drop; + } oiph = skb_network_header(skb); skb_reset_network_header(skb); @@ -279,13 +287,15 @@ static void geneve_rx(struct geneve_dev *geneve, struct geneve_sock *gs, } } - stats = this_cpu_ptr(geneve->dev->tstats); - u64_stats_update_begin(&stats->syncp); - stats->rx_packets++; - stats->rx_bytes += skb->len; - u64_stats_update_end(&stats->syncp); - - gro_cells_receive(&geneve->gro_cells, skb); + len = skb->len; + err = gro_cells_receive(&geneve->gro_cells, skb); + if (likely(err == NET_RX_SUCCESS)) { + stats = this_cpu_ptr(geneve->dev->tstats); + u64_stats_update_begin(&stats->syncp); + stats->rx_packets++; + stats->rx_bytes += len; + u64_stats_update_end(&stats->syncp); + } return; drop: /* Consume bad packet */ @@ -334,7 +344,7 @@ static int geneve_udp_encap_recv(struct sock *sk, struct sk_buff *skb) struct geneve_sock *gs; int opts_len; - /* Need Geneve and inner Ethernet header to be present */ + /* Need UDP and Geneve header to be present */ if (unlikely(!pskb_may_pull(skb, GENEVE_BASE_HLEN))) goto drop; @@ -357,8 +367,10 @@ static int geneve_udp_encap_recv(struct sock *sk, struct sk_buff *skb) opts_len = geneveh->opt_len * 4; if (iptunnel_pull_header(skb, GENEVE_BASE_HLEN + opts_len, htons(ETH_P_TEB), -!net_eq(geneve->net, dev_net(geneve->dev +!net_eq(geneve->net, dev_net(geneve->dev { + geneve->dev->stats.rx_dropped++; goto drop; + } geneve_rx(geneve, gs, skb); return 0; -- 1.8.3.1
Re: [PATCH net 2/3] bonding: fix 802.3ad support for 14G speed
On Thu, 2017-06-08 at 11:18 +0200, Nicolas Dichtel wrote: > This patch adds 14 Gbps enum definition, and fixes > aggregated bandwidth calculation based on above slave links. Doesn't 14G need to be added to phy.c? --- diff --git a/drivers/net/phy/phy.c b/drivers/net/phy/phy.c index a4238cb56731..d1c33a8cbaf5 100644 --- a/drivers/net/phy/phy.c +++ b/drivers/net/phy/phy.c @@ -54,6 +54,8 @@ static const char *phy_speed_to_str(int speed) return "5Gbps"; case SPEED_1: return "10Gbps"; + case SPEED_14000: + return "14Gbps"; case SPEED_2: return "20Gbps"; case SPEED_25000:
Re: [PATCH net-next 1/5] net: dsa: Remove master_netdev and use dst->cpu_dp->netdev
Hi Florian, [auto build test WARNING on net-next/master] url: https://github.com/0day-ci/linux/commits/Florian-Fainelli/net-dsa-Multi-CPU-ground-work/20170607-164203 config: x86_64-randconfig-s0-06090601 (attached as .config) compiler: gcc-4.4 (Debian 4.4.7-8) 4.4.7 reproduce: # save the attached .config to linux build tree make ARCH=x86_64 All warnings (new ones prefixed by >>): net//dsa/slave.c: In function 'dsa_slave_netpoll_setup': >> net//dsa/slave.c:686: warning: unused variable 'ds' vim +/ds +686 net//dsa/slave.c 7905288f Florian Fainelli 2014-09-24 670 afdcf151 Vivien Didelot 2017-01-27 671 ret = ds->ops->get_eee(ds, p->dp->index, e); 7905288f Florian Fainelli 2014-09-24 672 if (ret) 7905288f Florian Fainelli 2014-09-24 673 return ret; 7905288f Florian Fainelli 2014-09-24 674 7905288f Florian Fainelli 2014-09-24 675 if (p->phy) 7905288f Florian Fainelli 2014-09-24 676 ret = phy_ethtool_get_eee(p->phy, e); 7905288f Florian Fainelli 2014-09-24 677 7905288f Florian Fainelli 2014-09-24 678 return ret; 7905288f Florian Fainelli 2014-09-24 679 } 7905288f Florian Fainelli 2014-09-24 680 04ff53f9 Florian Fainelli 2015-07-31 681 #ifdef CONFIG_NET_POLL_CONTROLLER 04ff53f9 Florian Fainelli 2015-07-31 682 static int dsa_slave_netpoll_setup(struct net_device *dev, 04ff53f9 Florian Fainelli 2015-07-31 683 struct netpoll_info *ni) 04ff53f9 Florian Fainelli 2015-07-31 684 { 04ff53f9 Florian Fainelli 2015-07-31 685 struct dsa_slave_priv *p = netdev_priv(dev); afdcf151 Vivien Didelot 2017-01-27 @686 struct dsa_switch *ds = p->dp->ds; 6ab303ef Florian Fainelli 2017-06-06 687 struct net_device *master = dsa_master_netdev(p); 04ff53f9 Florian Fainelli 2015-07-31 688 struct netpoll *netpoll; 04ff53f9 Florian Fainelli 2015-07-31 689 int err = 0; 04ff53f9 Florian Fainelli 2015-07-31 690 04ff53f9 Florian Fainelli 2015-07-31 691 netpoll = kzalloc(sizeof(*netpoll), GFP_KERNEL); 04ff53f9 Florian Fainelli 2015-07-31 692 if (!netpoll) 04ff53f9 Florian Fainelli 2015-07-31 693 return -ENOMEM; 04ff53f9 Florian Fainelli 2015-07-31 694 :: The code at line 686 was first introduced by commit :: afdcf151c1f7346207dcee3f8d6d82991dbbb7e5 net: dsa: store a dsa_port in dsa_slave_priv :: TO: Vivien Didelot :: CC: David S. Miller --- 0-DAY kernel test infrastructureOpen Source Technology Center https://lists.01.org/pipermail/kbuild-all Intel Corporation .config.gz Description: application/gzip
Re: [PATCH net-next] net: ethernet: ti: cpdma: do not enable host error misc irq
From: Grygorii Strashko Date: Thu, 8 Jun 2017 13:51:52 -0500 > CPSW driver does not handle this interrupt, so there are no reasons to enable > it in hardware. > > Signed-off-by: Grygorii Strashko Applied.
Re: [PATCH net-next] net: ethernet: ti: cpsw: enable HWTSTAMP_FILTER_PTP_V1_L4_EVENT filter
From: Grygorii Strashko Date: Thu, 8 Jun 2017 13:51:31 -0500 > CPSW driver supports PTP v1 messages, but for unknown reasons this filter > is not advertised. As result, > ./tools/testing/selftests/networking/timestamping/timestamping utility > can't be used for testing of CPSW RX timestamping with option > SOF_TIMESTAMPING_RX_HARDWARE, because it uses > HWTSTAMP_FILTER_PTP_V1_L4_SYNC filter. > > Hence, fix it by advertising HWTSTAMP_FILTER_PTP_V1_L4_XXX filters > in CPSW driver. > > Signed-off-by: Grygorii Strashko Applied.
Re: [PATCH 1/2 net-next] net: stmmac: fix RX routing function name
A patch series should always have a proper "[PATCH 0/N] ..." header posting explaining what the patch series is doing at a high level, how it is doing it, and why it is doing it that way. Thank you.
Re: [PATCH] geneve: add missing rx stats accounting
From: Girish Moodalbail Date: Thu, 8 Jun 2017 11:39:49 -0700 > + ++geneve->dev->stats.rx_dropped; Please use the more canonical post-increment "x++" Please do this in your entire patch. Thanks.
Re: [PATCH] net: vrf: Make add_fib_rules per network namespace flag
From: David Ahern Date: Thu, 8 Jun 2017 11:31:11 -0600 > Commit 1aa6c4f6b8cd8 ("net: vrf: Add l3mdev rules on first device create") > adds the l3mdev FIB rule the first time a VRF device is created. However, > it only creates the rule once and only in the namespace the first device > is created - which may not be init_net. Fix by using the net_generic > capability to make the add_fib_rules flag per network namespace. > > Fixes: 1aa6c4f6b8cd8 ("net: vrf: Add l3mdev rules on first device create") > Reported-by: Petr Machata > Signed-off-by: David Ahern Yeah that's not very helpful behavior. Applied and queued up for -stable, thanks David.
Re: [PATCH net-next 2/8] qed: Revise ll2 Rx completion
From: Yuval Mintz Date: Thu, 8 Jun 2017 19:13:17 +0300 > +struct qed_ll2_comp_rx_data { > + u8 connection_handle; > + void *cookie; > + dma_addr_t rx_buf_addr; > + u16 parse_flags; > + u16 vlan; > + bool b_last_packet; > + > + union { > + u8 placement_offset; > + u8 data_length_error; > + } u; > + union { > + u16 packet_length; > + u16 data_length; > + } length; > + > + u32 opaque_data_0; > + u32 opaque_data_1; > + > + /* GSI only */ > + u32 gid_dst[4]; > + u16 qp_id; > +}; > + Again, a lot of unnecessary padding in the structure due to suboptimal member ordering. Please fix this. Thank you.
Re: [PATCH net-next 1/8] qed: LL2 to use packed information for tx
From: Yuval Mintz Date: Thu, 8 Jun 2017 19:13:16 +0300 > @@ -67,6 +79,21 @@ struct qed_ll2_stats { > u64 sent_bcast_pkts; > }; > > +struct qed_ll2_tx_pkt_info { > + u8 num_of_bds; > + u16 vlan; > + u8 bd_flags; > + u16 l4_hdr_offset_w;/* from start of packet */ > + enum qed_ll2_tx_dest tx_dest; > + enum qed_ll2_roce_flavor_type qed_roce_flavor; > + dma_addr_t first_frag; > + u16 first_frag_len; > + bool enable_ip_cksum; > + bool enable_l4_cksum; > + bool calc_ip_len; > + void *cookie; > +}; > + This layout is extremely inefficient, with lots of padding in between struct members. Group small u8 members and u16 members together so that they consume full 32-bit areas so you can eliminate all of the padding.
[PATCH net-next 0/6] netvsc: small cleanups
These are all small optimizations found during development of later features. Stephen Hemminger (6): netvsc: optimize calculation of number of slots netvsc: use hv_get_bytes_to_read netvsc: use typed pointer for internal state netvsc: mark error cases as unlikely netvsc: pass net_device to netvsc_init_buf and netvsc_connect_vsp netvsc: fold in get_outbound_net_device drivers/net/hyperv/hyperv_net.h | 3 +-- drivers/net/hyperv/netvsc.c | 49 - drivers/net/hyperv/netvsc_drv.c | 53 ++--- 3 files changed, 29 insertions(+), 76 deletions(-) -- 2.11.0
[PATCH net-next 2/6] netvsc: use hv_get_bytes_to_read
Don't need need to look at write space in netvsc_close. Signed-off-by: Stephen Hemminger --- drivers/net/hyperv/netvsc_drv.c | 10 +++--- 1 file changed, 3 insertions(+), 7 deletions(-) diff --git a/drivers/net/hyperv/netvsc_drv.c b/drivers/net/hyperv/netvsc_drv.c index df6d8e28949e..436a3ad55cfd 100644 --- a/drivers/net/hyperv/netvsc_drv.c +++ b/drivers/net/hyperv/netvsc_drv.c @@ -120,7 +120,7 @@ static int netvsc_close(struct net_device *net) struct net_device_context *net_device_ctx = netdev_priv(net); struct netvsc_device *nvdev = rtnl_dereference(net_device_ctx->nvdev); int ret; - u32 aread, awrite, i, msec = 10, retry = 0, retry_max = 20; + u32 aread, i, msec = 10, retry = 0, retry_max = 20; struct vmbus_channel *chn; netif_tx_disable(net); @@ -141,15 +141,11 @@ static int netvsc_close(struct net_device *net) if (!chn) continue; - hv_get_ringbuffer_availbytes(&chn->inbound, &aread, -&awrite); - + aread = hv_get_bytes_to_read(&chn->inbound); if (aread) break; - hv_get_ringbuffer_availbytes(&chn->outbound, &aread, -&awrite); - + aread = hv_get_bytes_to_read(&chn->outbound); if (aread) break; } -- 2.11.0
[PATCH net-next 5/6] netvsc: pass net_device to netvsc_init_buf and netvsc_connect_vsp
Don't need to find netvsc_device structure, caller already had it. Also rearrange declarations. Signed-off-by: Stephen Hemminger --- drivers/net/hyperv/netvsc.c | 31 +++ 1 file changed, 11 insertions(+), 20 deletions(-) diff --git a/drivers/net/hyperv/netvsc.c b/drivers/net/hyperv/netvsc.c index caf89a245ba6..4d4fde0c7974 100644 --- a/drivers/net/hyperv/netvsc.c +++ b/drivers/net/hyperv/netvsc.c @@ -243,18 +243,15 @@ static void netvsc_destroy_buf(struct hv_device *device) kfree(net_device->send_section_map); } -static int netvsc_init_buf(struct hv_device *device) +static int netvsc_init_buf(struct hv_device *device, + struct netvsc_device *net_device) { int ret = 0; - struct netvsc_device *net_device; struct nvsp_message *init_packet; struct net_device *ndev; size_t map_words; int node; - net_device = get_outbound_net_device(device); - if (!net_device) - return -ENODEV; ndev = hv_get_drvdata(device); node = cpu_to_node(device->channel->target_cpu); @@ -285,9 +282,7 @@ static int netvsc_init_buf(struct hv_device *device) /* Notify the NetVsp of the gpadl handle */ init_packet = &net_device->channel_init_pkt; - memset(init_packet, 0, sizeof(struct nvsp_message)); - init_packet->hdr.msg_type = NVSP_MSG1_TYPE_SEND_RECV_BUF; init_packet->msg.v1_msg.send_recv_buf. gpadl_handle = net_device->recv_buf_gpadl_handle; @@ -486,20 +481,15 @@ static int negotiate_nvsp_ver(struct hv_device *device, return ret; } -static int netvsc_connect_vsp(struct hv_device *device) +static int netvsc_connect_vsp(struct hv_device *device, + struct netvsc_device *net_device) { - int ret; - struct netvsc_device *net_device; - struct nvsp_message *init_packet; - int ndis_version; const u32 ver_list[] = { NVSP_PROTOCOL_VERSION_1, NVSP_PROTOCOL_VERSION_2, - NVSP_PROTOCOL_VERSION_4, NVSP_PROTOCOL_VERSION_5 }; - int i; - - net_device = get_outbound_net_device(device); - if (!net_device) - return -ENODEV; + NVSP_PROTOCOL_VERSION_4, NVSP_PROTOCOL_VERSION_5 + }; + struct nvsp_message *init_packet; + int ndis_version, i, ret; init_packet = &net_device->channel_init_pkt; @@ -549,7 +539,7 @@ static int netvsc_connect_vsp(struct hv_device *device) net_device->recv_buf_size = NETVSC_RECEIVE_BUFFER_SIZE; net_device->send_buf_size = NETVSC_SEND_BUFFER_SIZE; - ret = netvsc_init_buf(device); + ret = netvsc_init_buf(device, net_device); cleanup: return ret; @@ -1349,7 +1339,7 @@ int netvsc_device_add(struct hv_device *device, rcu_assign_pointer(net_device_ctx->nvdev, net_device); /* Connect with the NetVsp */ - ret = netvsc_connect_vsp(device); + ret = netvsc_connect_vsp(device, net_device); if (ret != 0) { netdev_err(ndev, "unable to connect to NetVSP - %d\n", ret); @@ -1368,4 +1358,5 @@ int netvsc_device_add(struct hv_device *device, free_netvsc_device(&net_device->rcu); return ret; + } -- 2.11.0
[PATCH net-next 1/6] netvsc: optimize calculation of number of slots
Speed up transmit check for fragmented packets by using existing macros to compute number of pages, and eliminate loop since skb fragments each take a page. Number of slots is also unsigned. Signed-off-by: Stephen Hemminger --- drivers/net/hyperv/netvsc_drv.c | 43 ++--- 1 file changed, 10 insertions(+), 33 deletions(-) diff --git a/drivers/net/hyperv/netvsc_drv.c b/drivers/net/hyperv/netvsc_drv.c index 2564ac83eb64..df6d8e28949e 100644 --- a/drivers/net/hyperv/netvsc_drv.c +++ b/drivers/net/hyperv/netvsc_drv.c @@ -345,34 +345,14 @@ static u32 init_page_array(void *hdr, u32 len, struct sk_buff *skb, return slots_used; } -static int count_skb_frag_slots(struct sk_buff *skb) -{ - int i, frags = skb_shinfo(skb)->nr_frags; - int pages = 0; - - for (i = 0; i < frags; i++) { - skb_frag_t *frag = skb_shinfo(skb)->frags + i; - unsigned long size = skb_frag_size(frag); - unsigned long offset = frag->page_offset; - - /* Skip unused frames from start of page */ - offset &= ~PAGE_MASK; - pages += PFN_UP(offset + size); - } - return pages; -} - -static int netvsc_get_slots(struct sk_buff *skb) +/* Estimate number of page buffers neede to transmit + * Need at most 2 for RNDIS header plus skb body and fragments. + */ +static unsigned int netvsc_get_slots(const struct sk_buff *skb) { - char *data = skb->data; - unsigned int offset = offset_in_page(data); - unsigned int len = skb_headlen(skb); - int slots; - int frag_slots; - - slots = DIV_ROUND_UP(offset + len, PAGE_SIZE); - frag_slots = count_skb_frag_slots(skb); - return slots + frag_slots; + return PFN_UP(offset_in_page(skb->data) + skb_headlen(skb)) + + skb_shinfo(skb)->nr_frags + + 2; } static u32 net_checksum_info(struct sk_buff *skb) @@ -410,21 +390,18 @@ static int netvsc_start_xmit(struct sk_buff *skb, struct net_device *net) struct hv_page_buffer page_buf[MAX_PAGE_BUFFER_COUNT]; struct hv_page_buffer *pb = page_buf; - /* We will atmost need two pages to describe the rndis -* header. We can only transmit MAX_PAGE_BUFFER_COUNT number + /* We can only transmit MAX_PAGE_BUFFER_COUNT number * of pages in a single packet. If skb is scattered around * more pages we try linearizing it. */ - - num_data_pgs = netvsc_get_slots(skb) + 2; - + num_data_pgs = netvsc_get_slots(skb); if (unlikely(num_data_pgs > MAX_PAGE_BUFFER_COUNT)) { ++net_device_ctx->eth_stats.tx_scattered; if (skb_linearize(skb)) goto no_memory; - num_data_pgs = netvsc_get_slots(skb) + 2; + num_data_pgs = netvsc_get_slots(skb); if (num_data_pgs > MAX_PAGE_BUFFER_COUNT) { ++net_device_ctx->eth_stats.tx_too_big; goto drop; -- 2.11.0
[PATCH net-next 6/6] netvsc: fold in get_outbound_net_device
No longer need common code to find get_outbound_net_device. Signed-off-by: Stephen Hemminger --- drivers/net/hyperv/netvsc.c | 16 +++- 1 file changed, 3 insertions(+), 13 deletions(-) diff --git a/drivers/net/hyperv/netvsc.c b/drivers/net/hyperv/netvsc.c index 4d4fde0c7974..7c5ed8fe7a4f 100644 --- a/drivers/net/hyperv/netvsc.c +++ b/drivers/net/hyperv/netvsc.c @@ -97,16 +97,6 @@ static void free_netvsc_device_rcu(struct netvsc_device *nvdev) call_rcu(&nvdev->rcu, free_netvsc_device); } -static struct netvsc_device *get_outbound_net_device(struct hv_device *device) -{ - struct netvsc_device *net_device = hv_device_to_netvsc_device(device); - - if (net_device && net_device->destroy) - net_device = NULL; - - return net_device; -} - static void netvsc_destroy_buf(struct hv_device *device) { struct nvsp_message *revoke_packet; @@ -833,7 +823,7 @@ int netvsc_send(struct hv_device *device, struct hv_page_buffer **pb, struct sk_buff *skb) { - struct netvsc_device *net_device; + struct netvsc_device *net_device = hv_device_to_netvsc_device(device); int ret = 0; struct netvsc_channel *nvchan; u32 pktlen = packet->total_data_buflen, msd_len = 0; @@ -844,8 +834,8 @@ int netvsc_send(struct hv_device *device, bool try_batch; bool xmit_more = (skb != NULL) ? skb->xmit_more : false; - net_device = get_outbound_net_device(device); - if (unlikely(!net_device)) + /* If device is rescinded, return error and packet will get dropped. */ + if (unlikely(net_device->destroy)) return -ENODEV; /* We may race with netvsc_connect_vsp()/netvsc_init_buf() and get -- 2.11.0
[PATCH net-next 4/6] netvsc: mark error cases as unlikely
Mark if() statements used for error handling only as unlikely() Signed-off-by: Stephen Hemminger --- drivers/net/hyperv/netvsc.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/drivers/net/hyperv/netvsc.c b/drivers/net/hyperv/netvsc.c index 652453d9fb08..caf89a245ba6 100644 --- a/drivers/net/hyperv/netvsc.c +++ b/drivers/net/hyperv/netvsc.c @@ -855,14 +855,14 @@ int netvsc_send(struct hv_device *device, bool xmit_more = (skb != NULL) ? skb->xmit_more : false; net_device = get_outbound_net_device(device); - if (!net_device) + if (unlikely(!net_device)) return -ENODEV; /* We may race with netvsc_connect_vsp()/netvsc_init_buf() and get * here before the negotiation with the host is finished and * send_section_map may not be allocated yet. */ - if (!net_device->send_section_map) + if (unlikely(!net_device->send_section_map)) return -EAGAIN; nvchan = &net_device->chan_table[packet->q_idx]; -- 2.11.0
[PATCH net-next 3/6] netvsc: use typed pointer for internal state
The element netvsc_device:extension is always a point to RNDIS information. Signed-off-by: Stephen Hemminger --- drivers/net/hyperv/hyperv_net.h | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/drivers/net/hyperv/hyperv_net.h b/drivers/net/hyperv/hyperv_net.h index 262b2ea576a3..f82d54e0208c 100644 --- a/drivers/net/hyperv/hyperv_net.h +++ b/drivers/net/hyperv/hyperv_net.h @@ -763,8 +763,7 @@ struct netvsc_device { refcount_t sc_offered; - /* Holds rndis device info */ - void *extension; + struct rndis_device *extension; int ring_size; -- 2.11.0
Re: [PATCH net-next 0/8] Bug fixes in ena ethernet driver
Two parallel patch series to the same driver and targetting the same GIT tree is extremely undesirable, please don't do this. Submit one series, and once applied submit the second series. I'm deleting all of your patches from my queue, please resubmit things properly. Thank you.
[PATCH net-next v2 2/7] net: dsa: mv88e6xxx: add egress mode enumeration
As for the frame mode, add a mv88e6xxx_egress_mode enumeration instead of a 16-bit register mask. Reviewed-by: Andrew Lunn Signed-off-by: Vivien Didelot --- drivers/net/dsa/mv88e6xxx/chip.c | 11 ++- drivers/net/dsa/mv88e6xxx/chip.h | 7 +++ drivers/net/dsa/mv88e6xxx/port.c | 20 ++-- drivers/net/dsa/mv88e6xxx/port.h | 2 +- 4 files changed, 32 insertions(+), 8 deletions(-) diff --git a/drivers/net/dsa/mv88e6xxx/chip.c b/drivers/net/dsa/mv88e6xxx/chip.c index bf7ad2e8b4d7..b610429f7516 100644 --- a/drivers/net/dsa/mv88e6xxx/chip.c +++ b/drivers/net/dsa/mv88e6xxx/chip.c @@ -1722,8 +1722,8 @@ static int mv88e6xxx_switch_reset(struct mv88e6xxx_chip *chip) } static int mv88e6xxx_set_port_mode(struct mv88e6xxx_chip *chip, int port, - enum mv88e6xxx_frame_mode frame, u16 egress, - u16 etype) + enum mv88e6xxx_frame_mode frame, + enum mv88e6xxx_egress_mode egress, u16 etype) { int err; @@ -1747,14 +1747,14 @@ static int mv88e6xxx_set_port_mode(struct mv88e6xxx_chip *chip, int port, static int mv88e6xxx_set_port_mode_normal(struct mv88e6xxx_chip *chip, int port) { return mv88e6xxx_set_port_mode(chip, port, MV88E6XXX_FRAME_MODE_NORMAL, - PORT_CONTROL_EGRESS_UNMODIFIED, + MV88E6XXX_EGRESS_MODE_UNMODIFIED, PORT_ETH_TYPE_DEFAULT); } static int mv88e6xxx_set_port_mode_dsa(struct mv88e6xxx_chip *chip, int port) { return mv88e6xxx_set_port_mode(chip, port, MV88E6XXX_FRAME_MODE_DSA, - PORT_CONTROL_EGRESS_UNMODIFIED, + MV88E6XXX_EGRESS_MODE_UNMODIFIED, PORT_ETH_TYPE_DEFAULT); } @@ -1762,7 +1762,8 @@ static int mv88e6xxx_set_port_mode_edsa(struct mv88e6xxx_chip *chip, int port) { return mv88e6xxx_set_port_mode(chip, port, MV88E6XXX_FRAME_MODE_ETHERTYPE, - PORT_CONTROL_EGRESS_ADD_TAG, ETH_P_EDSA); + MV88E6XXX_EGRESS_MODE_ETHERTYPE, + ETH_P_EDSA); } static int mv88e6xxx_setup_port_mode(struct mv88e6xxx_chip *chip, int port) diff --git a/drivers/net/dsa/mv88e6xxx/chip.h b/drivers/net/dsa/mv88e6xxx/chip.h index 98c24af977fd..fb7dea33a44a 100644 --- a/drivers/net/dsa/mv88e6xxx/chip.h +++ b/drivers/net/dsa/mv88e6xxx/chip.h @@ -39,6 +39,13 @@ #define MV88E6XXX_MAX_PVT_SWITCHES 32 #define MV88E6XXX_MAX_PVT_PORTS16 +enum mv88e6xxx_egress_mode { + MV88E6XXX_EGRESS_MODE_UNMODIFIED, + MV88E6XXX_EGRESS_MODE_UNTAGGED, + MV88E6XXX_EGRESS_MODE_TAGGED, + MV88E6XXX_EGRESS_MODE_ETHERTYPE, +}; + enum mv88e6xxx_frame_mode { MV88E6XXX_FRAME_MODE_NORMAL, MV88E6XXX_FRAME_MODE_DSA, diff --git a/drivers/net/dsa/mv88e6xxx/port.c b/drivers/net/dsa/mv88e6xxx/port.c index fc09b26f9b49..09e17131a6bd 100644 --- a/drivers/net/dsa/mv88e6xxx/port.c +++ b/drivers/net/dsa/mv88e6xxx/port.c @@ -425,7 +425,7 @@ int mv88e6xxx_port_set_state(struct mv88e6xxx_chip *chip, int port, u8 state) } int mv88e6xxx_port_set_egress_mode(struct mv88e6xxx_chip *chip, int port, - u16 mode) + enum mv88e6xxx_egress_mode mode) { int err; u16 reg; @@ -435,7 +435,23 @@ int mv88e6xxx_port_set_egress_mode(struct mv88e6xxx_chip *chip, int port, return err; reg &= ~PORT_CONTROL_EGRESS_MASK; - reg |= mode; + + switch (mode) { + case MV88E6XXX_EGRESS_MODE_UNMODIFIED: + reg |= PORT_CONTROL_EGRESS_UNMODIFIED; + break; + case MV88E6XXX_EGRESS_MODE_UNTAGGED: + reg |= PORT_CONTROL_EGRESS_UNTAGGED; + break; + case MV88E6XXX_EGRESS_MODE_TAGGED: + reg |= PORT_CONTROL_EGRESS_TAGGED; + break; + case MV88E6XXX_EGRESS_MODE_ETHERTYPE: + reg |= PORT_CONTROL_EGRESS_ADD_TAG; + break; + default: + return -EINVAL; + } return mv88e6xxx_port_write(chip, port, PORT_CONTROL, reg); } diff --git a/drivers/net/dsa/mv88e6xxx/port.h b/drivers/net/dsa/mv88e6xxx/port.h index 4f5e1ccfadc6..1de200074e21 100644 --- a/drivers/net/dsa/mv88e6xxx/port.h +++ b/drivers/net/dsa/mv88e6xxx/port.h @@ -212,7 +212,7 @@ int mv88e6xxx_port_set_8021q_mode(struct mv88e6xxx_chip *chip, int port, int mv88e6095_port_tag_remap(struct mv88e6xxx_chip *chip, int port); int mv88e6390_port_tag_remap(struct mv88e6xxx_chip *chip, int port); int mv88e6xxx_port_set_egress_mode(struct mv88e6xxx_chip *chip, int port, - u16 mode); + enum
[PATCH net-next v2 4/7] net: dsa: mv88e6xxx: do not prefix ops with g1
The mv88e6xxx_ops describe functionalities, regardless their locations (which can be Global1, Global2, or whatever register set.) Rename the g1_set_cpu_port and g1_set_egress_port ops to set_cpu_port and set_egress_port. No functional changes. Signed-off-by: Vivien Didelot --- drivers/net/dsa/mv88e6xxx/chip.c | 108 +++ drivers/net/dsa/mv88e6xxx/chip.h | 4 +- 2 files changed, 56 insertions(+), 56 deletions(-) diff --git a/drivers/net/dsa/mv88e6xxx/chip.c b/drivers/net/dsa/mv88e6xxx/chip.c index e25d48ecf880..f33b83bf3ee8 100644 --- a/drivers/net/dsa/mv88e6xxx/chip.c +++ b/drivers/net/dsa/mv88e6xxx/chip.c @@ -2015,14 +2015,14 @@ static int mv88e6xxx_g1_setup(struct mv88e6xxx_chip *chip) u32 upstream_port = dsa_upstream_port(ds); int err; - if (chip->info->ops->g1_set_cpu_port) { - err = chip->info->ops->g1_set_cpu_port(chip, upstream_port); + if (chip->info->ops->set_cpu_port) { + err = chip->info->ops->set_cpu_port(chip, upstream_port); if (err) return err; } - if (chip->info->ops->g1_set_egress_port) { - err = chip->info->ops->g1_set_egress_port(chip, upstream_port); + if (chip->info->ops->set_egress_port) { + err = chip->info->ops->set_egress_port(chip, upstream_port); if (err) return err; } @@ -2369,8 +2369,8 @@ static const struct mv88e6xxx_ops mv88e6085_ops = { .stats_get_sset_count = mv88e6095_stats_get_sset_count, .stats_get_strings = mv88e6095_stats_get_strings, .stats_get_stats = mv88e6095_stats_get_stats, - .g1_set_cpu_port = mv88e6095_g1_set_cpu_port, - .g1_set_egress_port = mv88e6095_g1_set_egress_port, + .set_cpu_port = mv88e6095_g1_set_cpu_port, + .set_egress_port = mv88e6095_g1_set_egress_port, .watchdog_ops = &mv88e6097_watchdog_ops, .mgmt_rsvd2cpu = mv88e6095_g2_mgmt_rsvd2cpu, .ppu_enable = mv88e6185_g1_ppu_enable, @@ -2424,8 +2424,8 @@ static const struct mv88e6xxx_ops mv88e6097_ops = { .stats_get_sset_count = mv88e6095_stats_get_sset_count, .stats_get_strings = mv88e6095_stats_get_strings, .stats_get_stats = mv88e6095_stats_get_stats, - .g1_set_cpu_port = mv88e6095_g1_set_cpu_port, - .g1_set_egress_port = mv88e6095_g1_set_egress_port, + .set_cpu_port = mv88e6095_g1_set_cpu_port, + .set_egress_port = mv88e6095_g1_set_egress_port, .watchdog_ops = &mv88e6097_watchdog_ops, .mgmt_rsvd2cpu = mv88e6095_g2_mgmt_rsvd2cpu, .reset = mv88e6352_g1_reset, @@ -2449,8 +2449,8 @@ static const struct mv88e6xxx_ops mv88e6123_ops = { .stats_get_sset_count = mv88e6095_stats_get_sset_count, .stats_get_strings = mv88e6095_stats_get_strings, .stats_get_stats = mv88e6095_stats_get_stats, - .g1_set_cpu_port = mv88e6095_g1_set_cpu_port, - .g1_set_egress_port = mv88e6095_g1_set_egress_port, + .set_cpu_port = mv88e6095_g1_set_cpu_port, + .set_egress_port = mv88e6095_g1_set_egress_port, .watchdog_ops = &mv88e6097_watchdog_ops, .mgmt_rsvd2cpu = mv88e6095_g2_mgmt_rsvd2cpu, .reset = mv88e6352_g1_reset, @@ -2478,8 +2478,8 @@ static const struct mv88e6xxx_ops mv88e6131_ops = { .stats_get_sset_count = mv88e6095_stats_get_sset_count, .stats_get_strings = mv88e6095_stats_get_strings, .stats_get_stats = mv88e6095_stats_get_stats, - .g1_set_cpu_port = mv88e6095_g1_set_cpu_port, - .g1_set_egress_port = mv88e6095_g1_set_egress_port, + .set_cpu_port = mv88e6095_g1_set_cpu_port, + .set_egress_port = mv88e6095_g1_set_egress_port, .watchdog_ops = &mv88e6097_watchdog_ops, .mgmt_rsvd2cpu = mv88e6095_g2_mgmt_rsvd2cpu, .ppu_enable = mv88e6185_g1_ppu_enable, @@ -2513,8 +2513,8 @@ static const struct mv88e6xxx_ops mv88e6141_ops = { .stats_get_sset_count = mv88e6320_stats_get_sset_count, .stats_get_strings = mv88e6320_stats_get_strings, .stats_get_stats = mv88e6390_stats_get_stats, - .g1_set_cpu_port = mv88e6390_g1_set_cpu_port, - .g1_set_egress_port = mv88e6390_g1_set_egress_port, + .set_cpu_port = mv88e6390_g1_set_cpu_port, + .set_egress_port = mv88e6390_g1_set_egress_port, .watchdog_ops = &mv88e6390_watchdog_ops, .mgmt_rsvd2cpu = mv88e6390_g1_mgmt_rsvd2cpu, .reset = mv88e6352_g1_reset, @@ -2543,8 +2543,8 @@ static const struct mv88e6xxx_ops mv88e6161_ops = { .stats_get_sset_count = mv88e6095_stats_get_sset_count, .stats_get_strings = mv88e6095_stats_get_strings, .stats_get_stats = mv88e6095_stats_get_stats, - .g1_set_cpu_port = mv88e6095_g1_set_cpu_port, - .g1_set_egress_port = mv88e6095_g1_set_egress_port, + .set_cpu_port = mv88e6095_g1_set_cpu_port, + .set_egress_port = mv88e6095_g1_set_egress_por
[PATCH net-next v2 5/7] net: dsa: mv88e6xxx: rework pause limit operation
All Marvell chips supporting Pause frames limiting use 1-byte value for input and output. Old chips have both bytes adjacent in a 16-bit register. New ones have an indirect table using 8-bit data. The mv88e6xxx library functions (such as in port.c) must not contain driver logic, but only generic helpers. This patch changes the port_pause_config operation for port_pause_limit taking two u8 arguments for input and output limits. There is no functional changes. Reviewed-by: Andrew Lunn Signed-off-by: Vivien Didelot --- drivers/net/dsa/mv88e6xxx/chip.c | 48 drivers/net/dsa/mv88e6xxx/chip.h | 3 ++- drivers/net/dsa/mv88e6xxx/port.c | 12 +- drivers/net/dsa/mv88e6xxx/port.h | 6 +++-- 4 files changed, 37 insertions(+), 32 deletions(-) diff --git a/drivers/net/dsa/mv88e6xxx/chip.c b/drivers/net/dsa/mv88e6xxx/chip.c index f33b83bf3ee8..a0f450cb45fb 100644 --- a/drivers/net/dsa/mv88e6xxx/chip.c +++ b/drivers/net/dsa/mv88e6xxx/chip.c @@ -1901,8 +1901,8 @@ static int mv88e6xxx_setup_port(struct mv88e6xxx_chip *chip, int port) if (err) return err; - if (chip->info->ops->port_pause_config) { - err = chip->info->ops->port_pause_config(chip, port); + if (chip->info->ops->port_pause_limit) { + err = chip->info->ops->port_pause_limit(chip, port, 0, 0); if (err) return err; } @@ -2362,7 +2362,7 @@ static const struct mv88e6xxx_ops mv88e6085_ops = { .port_set_egress_floods = mv88e6352_port_set_egress_floods, .port_set_ether_type = mv88e6351_port_set_ether_type, .port_egress_rate_limiting = mv88e6097_port_egress_rate_limiting, - .port_pause_config = mv88e6097_port_pause_config, + .port_pause_limit = mv88e6097_port_pause_limit, .port_disable_learn_limit = mv88e6xxx_port_disable_learn_limit, .port_disable_pri_override = mv88e6xxx_port_disable_pri_override, .stats_snapshot = mv88e6xxx_g1_stats_snapshot, @@ -2417,7 +2417,7 @@ static const struct mv88e6xxx_ops mv88e6097_ops = { .port_set_ether_type = mv88e6351_port_set_ether_type, .port_jumbo_config = mv88e6165_port_jumbo_config, .port_egress_rate_limiting = mv88e6095_port_egress_rate_limiting, - .port_pause_config = mv88e6097_port_pause_config, + .port_pause_limit = mv88e6097_port_pause_limit, .port_disable_learn_limit = mv88e6xxx_port_disable_learn_limit, .port_disable_pri_override = mv88e6xxx_port_disable_pri_override, .stats_snapshot = mv88e6xxx_g1_stats_snapshot, @@ -2473,7 +2473,7 @@ static const struct mv88e6xxx_ops mv88e6131_ops = { .port_set_upstream_port = mv88e6095_port_set_upstream_port, .port_jumbo_config = mv88e6165_port_jumbo_config, .port_egress_rate_limiting = mv88e6097_port_egress_rate_limiting, - .port_pause_config = mv88e6097_port_pause_config, + .port_pause_limit = mv88e6097_port_pause_limit, .stats_snapshot = mv88e6xxx_g1_stats_snapshot, .stats_get_sset_count = mv88e6095_stats_get_sset_count, .stats_get_strings = mv88e6095_stats_get_strings, @@ -2506,7 +2506,7 @@ static const struct mv88e6xxx_ops mv88e6141_ops = { .port_set_ether_type = mv88e6351_port_set_ether_type, .port_jumbo_config = mv88e6165_port_jumbo_config, .port_egress_rate_limiting = mv88e6097_port_egress_rate_limiting, - .port_pause_config = mv88e6097_port_pause_config, + .port_pause_limit = mv88e6097_port_pause_limit, .port_disable_learn_limit = mv88e6xxx_port_disable_learn_limit, .port_disable_pri_override = mv88e6xxx_port_disable_pri_override, .stats_snapshot = mv88e6390_g1_stats_snapshot, @@ -2536,7 +2536,7 @@ static const struct mv88e6xxx_ops mv88e6161_ops = { .port_set_ether_type = mv88e6351_port_set_ether_type, .port_jumbo_config = mv88e6165_port_jumbo_config, .port_egress_rate_limiting = mv88e6097_port_egress_rate_limiting, - .port_pause_config = mv88e6097_port_pause_config, + .port_pause_limit = mv88e6097_port_pause_limit, .port_disable_learn_limit = mv88e6xxx_port_disable_learn_limit, .port_disable_pri_override = mv88e6xxx_port_disable_pri_override, .stats_snapshot = mv88e6320_g1_stats_snapshot, @@ -2590,7 +2590,7 @@ static const struct mv88e6xxx_ops mv88e6171_ops = { .port_set_ether_type = mv88e6351_port_set_ether_type, .port_jumbo_config = mv88e6165_port_jumbo_config, .port_egress_rate_limiting = mv88e6097_port_egress_rate_limiting, - .port_pause_config = mv88e6097_port_pause_config, + .port_pause_limit = mv88e6097_port_pause_limit, .port_disable_learn_limit = mv88e6xxx_port_disable_learn_limit, .port_disable_pri_override = mv88e6xxx_port_disable_pri_override, .stats_snapshot = mv88e6320_g1_stats_snapshot, @@ -2623,7 +2623,7 @@ static const struct mv88e6xxx_ops
[PATCH net-next v2 0/7] net: dsa: mv88e6xxx: ops cosmetics
This patchset brings no functional changes. It is a first step in a bigger cosmetics change to the driver. It simplifies print messages and polishes data types and chip operations. The next patchs will only prefix and document the port registers macros. Changes in v2: - KISS and simply use dev_* since chip->ds may not be initialized - add reviewers tags Vivien Didelot (7): net: dsa: mv888e6xxx: do not use netdev printing net: dsa: mv88e6xxx: add egress mode enumeration net: dsa: mv88e6xxx: use bridge state values net: dsa: mv88e6xxx: do not prefix ops with g1 net: dsa: mv88e6xxx: rework pause limit operation net: dsa: mv88e6xxx: rework jumbo size operation net: dsa: mv88e6xxx: prefix PHY macros drivers/net/dsa/mv88e6xxx/chip.c | 259 ++- drivers/net/dsa/mv88e6xxx/chip.h | 17 ++- drivers/net/dsa/mv88e6xxx/phy.c | 11 +- drivers/net/dsa/mv88e6xxx/phy.h | 4 +- drivers/net/dsa/mv88e6xxx/port.c | 105 +++- drivers/net/dsa/mv88e6xxx/port.h | 12 +- 6 files changed, 224 insertions(+), 184 deletions(-) -- 2.13.1
[PATCH net-next v2 6/7] net: dsa: mv88e6xxx: rework jumbo size operation
Marvell chips have a Jumbo Mode to set the maximum frame size (MTU). The mv88e6xxx_ops structure is meant to contain generic functionalities, no driver logic. Change port_jumbo_config to port_set_jumbo_size setting the mode from a given maximum size value. There is no functional changes since we still use 10240 bytes. At the same time, correctly clear all Jumbo Mode bits before writing. Reviewed-by: Andrew Lunn Signed-off-by: Vivien Didelot --- drivers/net/dsa/mv88e6xxx/chip.c | 38 +++--- drivers/net/dsa/mv88e6xxx/chip.h | 3 ++- drivers/net/dsa/mv88e6xxx/port.c | 14 -- drivers/net/dsa/mv88e6xxx/port.h | 4 +++- 4 files changed, 36 insertions(+), 23 deletions(-) diff --git a/drivers/net/dsa/mv88e6xxx/chip.c b/drivers/net/dsa/mv88e6xxx/chip.c index a0f450cb45fb..a4cf0366765f 100644 --- a/drivers/net/dsa/mv88e6xxx/chip.c +++ b/drivers/net/dsa/mv88e6xxx/chip.c @@ -1876,8 +1876,8 @@ static int mv88e6xxx_setup_port(struct mv88e6xxx_chip *chip, int port) if (err) return err; - if (chip->info->ops->port_jumbo_config) { - err = chip->info->ops->port_jumbo_config(chip, port); + if (chip->info->ops->port_set_jumbo_size) { + err = chip->info->ops->port_set_jumbo_size(chip, port, 10240); if (err) return err; } @@ -2415,7 +2415,7 @@ static const struct mv88e6xxx_ops mv88e6097_ops = { .port_set_frame_mode = mv88e6351_port_set_frame_mode, .port_set_egress_floods = mv88e6352_port_set_egress_floods, .port_set_ether_type = mv88e6351_port_set_ether_type, - .port_jumbo_config = mv88e6165_port_jumbo_config, + .port_set_jumbo_size = mv88e6165_port_set_jumbo_size, .port_egress_rate_limiting = mv88e6095_port_egress_rate_limiting, .port_pause_limit = mv88e6097_port_pause_limit, .port_disable_learn_limit = mv88e6xxx_port_disable_learn_limit, @@ -2471,7 +2471,7 @@ static const struct mv88e6xxx_ops mv88e6131_ops = { .port_set_egress_floods = mv88e6185_port_set_egress_floods, .port_set_ether_type = mv88e6351_port_set_ether_type, .port_set_upstream_port = mv88e6095_port_set_upstream_port, - .port_jumbo_config = mv88e6165_port_jumbo_config, + .port_set_jumbo_size = mv88e6165_port_set_jumbo_size, .port_egress_rate_limiting = mv88e6097_port_egress_rate_limiting, .port_pause_limit = mv88e6097_port_pause_limit, .stats_snapshot = mv88e6xxx_g1_stats_snapshot, @@ -2504,7 +2504,7 @@ static const struct mv88e6xxx_ops mv88e6141_ops = { .port_set_frame_mode = mv88e6351_port_set_frame_mode, .port_set_egress_floods = mv88e6352_port_set_egress_floods, .port_set_ether_type = mv88e6351_port_set_ether_type, - .port_jumbo_config = mv88e6165_port_jumbo_config, + .port_set_jumbo_size = mv88e6165_port_set_jumbo_size, .port_egress_rate_limiting = mv88e6097_port_egress_rate_limiting, .port_pause_limit = mv88e6097_port_pause_limit, .port_disable_learn_limit = mv88e6xxx_port_disable_learn_limit, @@ -2534,7 +2534,7 @@ static const struct mv88e6xxx_ops mv88e6161_ops = { .port_set_frame_mode = mv88e6351_port_set_frame_mode, .port_set_egress_floods = mv88e6352_port_set_egress_floods, .port_set_ether_type = mv88e6351_port_set_ether_type, - .port_jumbo_config = mv88e6165_port_jumbo_config, + .port_set_jumbo_size = mv88e6165_port_set_jumbo_size, .port_egress_rate_limiting = mv88e6097_port_egress_rate_limiting, .port_pause_limit = mv88e6097_port_pause_limit, .port_disable_learn_limit = mv88e6xxx_port_disable_learn_limit, @@ -2588,7 +2588,7 @@ static const struct mv88e6xxx_ops mv88e6171_ops = { .port_set_frame_mode = mv88e6351_port_set_frame_mode, .port_set_egress_floods = mv88e6352_port_set_egress_floods, .port_set_ether_type = mv88e6351_port_set_ether_type, - .port_jumbo_config = mv88e6165_port_jumbo_config, + .port_set_jumbo_size = mv88e6165_port_set_jumbo_size, .port_egress_rate_limiting = mv88e6097_port_egress_rate_limiting, .port_pause_limit = mv88e6097_port_pause_limit, .port_disable_learn_limit = mv88e6xxx_port_disable_learn_limit, @@ -2621,7 +2621,7 @@ static const struct mv88e6xxx_ops mv88e6172_ops = { .port_set_frame_mode = mv88e6351_port_set_frame_mode, .port_set_egress_floods = mv88e6352_port_set_egress_floods, .port_set_ether_type = mv88e6351_port_set_ether_type, - .port_jumbo_config = mv88e6165_port_jumbo_config, + .port_set_jumbo_size = mv88e6165_port_set_jumbo_size, .port_egress_rate_limiting = mv88e6097_port_egress_rate_limiting, .port_pause_limit = mv88e6097_port_pause_limit, .port_disable_learn_limit = mv88e6xxx_port_disable_learn_limit, @@ -2653,7 +2653,7 @@ static const struct mv88e6xxx_ops mv88e6175_ops = { .port_set_f
[PATCH net-next v2 7/7] net: dsa: mv88e6xxx: prefix PHY macros
Prefix the PHY_* macros with a Marvell specific MV88E6XXX_ prefix. There is no functional changes. Reviewed-by: Andrew Lunn Signed-off-by: Vivien Didelot --- drivers/net/dsa/mv88e6xxx/phy.c | 11 ++- drivers/net/dsa/mv88e6xxx/phy.h | 4 ++-- 2 files changed, 8 insertions(+), 7 deletions(-) diff --git a/drivers/net/dsa/mv88e6xxx/phy.c b/drivers/net/dsa/mv88e6xxx/phy.c index 0db624f0993c..3500ac0ea848 100644 --- a/drivers/net/dsa/mv88e6xxx/phy.c +++ b/drivers/net/dsa/mv88e6xxx/phy.c @@ -62,7 +62,7 @@ int mv88e6xxx_phy_write(struct mv88e6xxx_chip *chip, int phy, int reg, u16 val) static int mv88e6xxx_phy_page_get(struct mv88e6xxx_chip *chip, int phy, u8 page) { - return mv88e6xxx_phy_write(chip, phy, PHY_PAGE, page); + return mv88e6xxx_phy_write(chip, phy, MV88E6XXX_PHY_PAGE, page); } static void mv88e6xxx_phy_page_put(struct mv88e6xxx_chip *chip, int phy) @@ -72,7 +72,8 @@ static void mv88e6xxx_phy_page_put(struct mv88e6xxx_chip *chip, int phy) /* Restore PHY page Copper 0x0 for access via the registered * MDIO bus */ - err = mv88e6xxx_phy_write(chip, phy, PHY_PAGE, PHY_PAGE_COPPER); + err = mv88e6xxx_phy_write(chip, phy, MV88E6XXX_PHY_PAGE, + MV88E6XXX_PHY_PAGE_COPPER); if (unlikely(err)) { dev_err(chip->dev, "failed to restore PHY %d page Copper (%d)\n", @@ -86,7 +87,7 @@ int mv88e6xxx_phy_page_read(struct mv88e6xxx_chip *chip, int phy, int err; /* There is no paging for registers 22 */ - if (reg == PHY_PAGE) + if (reg == MV88E6XXX_PHY_PAGE) return -EINVAL; err = mv88e6xxx_phy_page_get(chip, phy, page); @@ -104,12 +105,12 @@ int mv88e6xxx_phy_page_write(struct mv88e6xxx_chip *chip, int phy, int err; /* There is no paging for registers 22 */ - if (reg == PHY_PAGE) + if (reg == MV88E6XXX_PHY_PAGE) return -EINVAL; err = mv88e6xxx_phy_page_get(chip, phy, page); if (!err) { - err = mv88e6xxx_phy_write(chip, phy, PHY_PAGE, page); + err = mv88e6xxx_phy_write(chip, phy, MV88E6XXX_PHY_PAGE, page); mv88e6xxx_phy_page_put(chip, phy); } diff --git a/drivers/net/dsa/mv88e6xxx/phy.h b/drivers/net/dsa/mv88e6xxx/phy.h index 4131a4e8206a..556b74a0502a 100644 --- a/drivers/net/dsa/mv88e6xxx/phy.h +++ b/drivers/net/dsa/mv88e6xxx/phy.h @@ -14,8 +14,8 @@ #ifndef _MV88E6XXX_PHY_H #define _MV88E6XXX_PHY_H -#define PHY_PAGE 0x16 -#define PHY_PAGE_COPPER0x00 +#define MV88E6XXX_PHY_PAGE 0x16 +#define MV88E6XXX_PHY_PAGE_COPPER 0x00 /* PHY Registers accesses implementations */ int mv88e6165_phy_read(struct mv88e6xxx_chip *chip, struct mii_bus *bus, -- 2.13.1
[PATCH net-next v2 3/7] net: dsa: mv88e6xxx: use bridge state values
Reuse the BR_STATE_* values to abstract a port STP state value. This provides shorter names and better control over the DSA switch operation call. Signed-off-by: Vivien Didelot --- drivers/net/dsa/mv88e6xxx/chip.c | 23 ++- drivers/net/dsa/mv88e6xxx/port.c | 20 2 files changed, 22 insertions(+), 21 deletions(-) diff --git a/drivers/net/dsa/mv88e6xxx/chip.c b/drivers/net/dsa/mv88e6xxx/chip.c index b610429f7516..e25d48ecf880 100644 --- a/drivers/net/dsa/mv88e6xxx/chip.c +++ b/drivers/net/dsa/mv88e6xxx/chip.c @@ -915,28 +915,10 @@ static void mv88e6xxx_port_stp_state_set(struct dsa_switch *ds, int port, u8 state) { struct mv88e6xxx_chip *chip = ds->priv; - int stp_state; int err; - switch (state) { - case BR_STATE_DISABLED: - stp_state = PORT_CONTROL_STATE_DISABLED; - break; - case BR_STATE_BLOCKING: - case BR_STATE_LISTENING: - stp_state = PORT_CONTROL_STATE_BLOCKING; - break; - case BR_STATE_LEARNING: - stp_state = PORT_CONTROL_STATE_LEARNING; - break; - case BR_STATE_FORWARDING: - default: - stp_state = PORT_CONTROL_STATE_FORWARDING; - break; - } - mutex_lock(&chip->reg_lock); - err = mv88e6xxx_port_set_state(chip, port, stp_state); + err = mv88e6xxx_port_set_state(chip, port, state); mutex_unlock(&chip->reg_lock); if (err) @@ -1694,8 +1676,7 @@ static int mv88e6xxx_disable_ports(struct mv88e6xxx_chip *chip) /* Set all ports to the Disabled state */ for (i = 0; i < mv88e6xxx_num_ports(chip); i++) { - err = mv88e6xxx_port_set_state(chip, i, - PORT_CONTROL_STATE_DISABLED); + err = mv88e6xxx_port_set_state(chip, i, BR_STATE_DISABLED); if (err) return err; } diff --git a/drivers/net/dsa/mv88e6xxx/port.c b/drivers/net/dsa/mv88e6xxx/port.c index 09e17131a6bd..46e73ca0ac4d 100644 --- a/drivers/net/dsa/mv88e6xxx/port.c +++ b/drivers/net/dsa/mv88e6xxx/port.c @@ -12,6 +12,7 @@ * (at your option) any later version. */ +#include #include #include "chip.h" @@ -412,6 +413,25 @@ int mv88e6xxx_port_set_state(struct mv88e6xxx_chip *chip, int port, u8 state) return err; reg &= ~PORT_CONTROL_STATE_MASK; + + switch (state) { + case BR_STATE_DISABLED: + state = PORT_CONTROL_STATE_DISABLED; + break; + case BR_STATE_BLOCKING: + case BR_STATE_LISTENING: + state = PORT_CONTROL_STATE_BLOCKING; + break; + case BR_STATE_LEARNING: + state = PORT_CONTROL_STATE_LEARNING; + break; + case BR_STATE_FORWARDING: + state = PORT_CONTROL_STATE_FORWARDING; + break; + default: + return -EINVAL; + } + reg |= state; err = mv88e6xxx_port_write(chip, port, PORT_CONTROL, reg); -- 2.13.1
[PATCH net-next v2 1/7] net: dsa: mv888e6xxx: do not use netdev printing
The mv888e6xxx driver accesses a port's netdev mostly for printing. This is bad for 2 reasons: DSA and CPU ports do not have a netdev pointer; it doesn't give us a correct picture of why a DSA driver might need to access a port's netdev. Instead simply use dev_* printing functions with chip->dev (or ds->dev depending on the scope, both guaranteed to exist), with a p%d prefix for the target port. Signed-off-by: Vivien Didelot --- drivers/net/dsa/mv88e6xxx/chip.c | 31 +++ drivers/net/dsa/mv88e6xxx/port.c | 39 ++- 2 files changed, 33 insertions(+), 37 deletions(-) diff --git a/drivers/net/dsa/mv88e6xxx/chip.c b/drivers/net/dsa/mv88e6xxx/chip.c index 0534eb706caa..bf7ad2e8b4d7 100644 --- a/drivers/net/dsa/mv88e6xxx/chip.c +++ b/drivers/net/dsa/mv88e6xxx/chip.c @@ -489,8 +489,7 @@ static int mv88e6xxx_port_setup_mac(struct mv88e6xxx_chip *chip, int port, err = 0; restore_link: if (chip->info->ops->port_set_link(chip, port, link)) - netdev_err(chip->ds->ports[port].netdev, - "failed to restore MAC's link\n"); + dev_err(chip->dev, "p%d: failed to restore MAC's link\n", port); return err; } @@ -514,7 +513,7 @@ static void mv88e6xxx_adjust_link(struct dsa_switch *ds, int port, mutex_unlock(&chip->reg_lock); if (err && err != -EOPNOTSUPP) - netdev_err(ds->ports[port].netdev, "failed to configure MAC\n"); + dev_err(ds->dev, "p%d: failed to configure MAC\n", port); } static int mv88e6xxx_stats_snapshot(struct mv88e6xxx_chip *chip, int port) @@ -941,7 +940,7 @@ static void mv88e6xxx_port_stp_state_set(struct dsa_switch *ds, int port, mutex_unlock(&chip->reg_lock); if (err) - netdev_err(ds->ports[port].netdev, "failed to update state\n"); + dev_err(ds->dev, "p%d: failed to update state\n", port); } static int mv88e6xxx_atu_setup(struct mv88e6xxx_chip *chip) @@ -1009,7 +1008,7 @@ static void mv88e6xxx_port_fast_age(struct dsa_switch *ds, int port) mutex_unlock(&chip->reg_lock); if (err) - netdev_err(ds->ports[port].netdev, "failed to flush ATU\n"); + dev_err(ds->dev, "p%d: failed to flush ATU\n", port); } static int mv88e6xxx_vtu_setup(struct mv88e6xxx_chip *chip) @@ -1214,10 +1213,9 @@ static int mv88e6xxx_port_check_hw_vlan(struct dsa_switch *ds, int port, if (!ds->ports[i].bridge_dev) continue; - netdev_warn(ds->ports[port].netdev, - "hardware VLAN %d already used by %s\n", - vlan.vid, - netdev_name(ds->ports[i].bridge_dev)); + dev_err(ds->dev, "p%d: hw VLAN %d already used by %s\n", + port, vlan.vid, + netdev_name(ds->ports[i].bridge_dev)); err = -EOPNOTSUPP; goto unlock; } @@ -1311,13 +1309,12 @@ static void mv88e6xxx_port_vlan_add(struct dsa_switch *ds, int port, for (vid = vlan->vid_begin; vid <= vlan->vid_end; ++vid) if (_mv88e6xxx_port_vlan_add(chip, port, vid, member)) - netdev_err(ds->ports[port].netdev, - "failed to add VLAN %d%c\n", - vid, untagged ? 'u' : 't'); + dev_err(ds->dev, "p%d: failed to add VLAN %d%c\n", port, + vid, untagged ? 'u' : 't'); if (pvid && mv88e6xxx_port_set_pvid(chip, port, vlan->vid_end)) - netdev_err(ds->ports[port].netdev, "failed to set PVID %d\n", - vlan->vid_end); + dev_err(ds->dev, "p%d: failed to set PVID %d\n", port, + vlan->vid_end); mutex_unlock(&chip->reg_lock); } @@ -1451,7 +1448,8 @@ static void mv88e6xxx_port_fdb_add(struct dsa_switch *ds, int port, mutex_lock(&chip->reg_lock); if (mv88e6xxx_port_db_load_purge(chip, port, fdb->addr, fdb->vid, GLOBAL_ATU_DATA_STATE_UC_STATIC)) - netdev_err(ds->ports[port].netdev, "failed to load unicast MAC address\n"); + dev_err(ds->dev, "p%d: failed to load unicast MAC address\n", + port); mutex_unlock(&chip->reg_lock); } @@ -3793,7 +3791,8 @@ static void mv88e6xxx_port_mdb_add(struct dsa_switch *ds, int port, mutex_lock(&chip->reg_lock); if (mv88e6xxx_port_db_load_purge(chip, port, mdb->addr, mdb->vid, GLOBAL_ATU_DATA_STATE_MC_STATIC)) - netdev_err(ds->ports[port].netdev, "failed to load multicast MAC address\n"); + dev_err(ds->dev, "p%d: failed to lo
[PATCH net-next 10/13] net: ena: add mtu limitation in ena_change_mtu()
From: Netanel Belgazal Signed-off-by: Netanel Belgazal --- drivers/net/ethernet/amazon/ena/ena_netdev.c | 9 +++-- 1 file changed, 7 insertions(+), 2 deletions(-) diff --git a/drivers/net/ethernet/amazon/ena/ena_netdev.c b/drivers/net/ethernet/amazon/ena/ena_netdev.c index 7dee448..7f31f4c 100644 --- a/drivers/net/ethernet/amazon/ena/ena_netdev.c +++ b/drivers/net/ethernet/amazon/ena/ena_netdev.c @@ -108,6 +108,13 @@ static int ena_change_mtu(struct net_device *dev, int new_mtu) struct ena_adapter *adapter = netdev_priv(dev); int ret; + if ((new_mtu > adapter->max_mtu) || (new_mtu < ENA_MIN_MTU)) { + netif_err(adapter, drv, dev, + "Invalid MTU setting. new_mtu: %d max mtu: %d min mtu: %d\n", + new_mtu, adapter->max_mtu, ENA_MIN_MTU); + return -EINVAL; + } + ret = ena_com_set_dev_mtu(adapter->ena_dev, new_mtu); if (!ret) { netif_dbg(adapter, drv, dev, "set MTU to %d\n", new_mtu); @@ -3008,8 +3015,6 @@ static void ena_set_conf_feat_params(struct ena_adapter *adapter, ena_set_dev_offloads(feat, netdev); adapter->max_mtu = feat->dev_attr.max_mtu; - netdev->max_mtu = adapter->max_mtu; - netdev->min_mtu = ENA_MIN_MTU; } static int ena_rss_init_default(struct ena_adapter *adapter) -- 2.7.4
[PATCH net-next 12/13] net: ena: change validate_tx_req_id() to be inline function
From: Netanel Belgazal for optimization purpose, change validate_tx_req_id() to be inline function. Signed-off-by: Netanel Belgazal --- drivers/net/ethernet/amazon/ena/ena_netdev.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/net/ethernet/amazon/ena/ena_netdev.c b/drivers/net/ethernet/amazon/ena/ena_netdev.c index 4540cd3..da14b78 100644 --- a/drivers/net/ethernet/amazon/ena/ena_netdev.c +++ b/drivers/net/ethernet/amazon/ena/ena_netdev.c @@ -698,7 +698,7 @@ static void ena_destroy_all_io_queues(struct ena_adapter *adapter) ena_destroy_all_rx_queues(adapter); } -static int validate_tx_req_id(struct ena_ring *tx_ring, u16 req_id) +static inline int validate_tx_req_id(struct ena_ring *tx_ring, u16 req_id) { struct ena_tx_buffer *tx_info = NULL; -- 2.7.4
[PATCH net-next 13/13] net: ena: update ena driver to version 1.2.0
From: Netanel Belgazal Signed-off-by: Netanel Belgazal --- drivers/net/ethernet/amazon/ena/ena_netdev.h | 7 --- 1 file changed, 4 insertions(+), 3 deletions(-) diff --git a/drivers/net/ethernet/amazon/ena/ena_netdev.h b/drivers/net/ethernet/amazon/ena/ena_netdev.h index 4518a9d..f309a58 100644 --- a/drivers/net/ethernet/amazon/ena/ena_netdev.h +++ b/drivers/net/ethernet/amazon/ena/ena_netdev.h @@ -44,15 +44,16 @@ #include "ena_eth_com.h" #define DRV_MODULE_VER_MAJOR 1 -#define DRV_MODULE_VER_MINOR 1 -#define DRV_MODULE_VER_SUBMINOR 2 +#define DRV_MODULE_VER_MINOR 2 +#define DRV_MODULE_VER_SUBMINOR 0 #define DRV_MODULE_NAME"ena" #ifndef DRV_MODULE_VERSION #define DRV_MODULE_VERSION \ __stringify(DRV_MODULE_VER_MAJOR) "." \ __stringify(DRV_MODULE_VER_MINOR) "." \ - __stringify(DRV_MODULE_VER_SUBMINOR) + __stringify(DRV_MODULE_VER_SUBMINOR) \ + "k" #endif #define DEVICE_NAME"Elastic Network Adapter (ENA)" -- 2.7.4
[PATCH net-next 07/13] net: ena: use napi_schedule_irqoff when possible
From: Netanel Belgazal Signed-off-by: Netanel Belgazal --- drivers/net/ethernet/amazon/ena/ena_netdev.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/net/ethernet/amazon/ena/ena_netdev.c b/drivers/net/ethernet/amazon/ena/ena_netdev.c index 04aade8..424b4d7 100644 --- a/drivers/net/ethernet/amazon/ena/ena_netdev.c +++ b/drivers/net/ethernet/amazon/ena/ena_netdev.c @@ -1264,7 +1264,7 @@ static irqreturn_t ena_intr_msix_io(int irq, void *data) { struct ena_napi *ena_napi = data; - napi_schedule(&ena_napi->napi); + napi_schedule_irqoff(&ena_napi->napi); return IRQ_HANDLED; } -- 2.7.4
[PATCH net-next 11/13] net: ena: update driver's rx drop statistics
From: Netanel Belgazal rx drop counter is reported by the device in the keep-alive event. update the driver's counter with the device counter. Signed-off-by: Netanel Belgazal --- drivers/net/ethernet/amazon/ena/ena_netdev.c | 9 + 1 file changed, 9 insertions(+) diff --git a/drivers/net/ethernet/amazon/ena/ena_netdev.c b/drivers/net/ethernet/amazon/ena/ena_netdev.c index 7f31f4c..4540cd3 100644 --- a/drivers/net/ethernet/amazon/ena/ena_netdev.c +++ b/drivers/net/ethernet/amazon/ena/ena_netdev.c @@ -3483,8 +3483,17 @@ static void ena_keep_alive_wd(void *adapter_data, struct ena_admin_aenq_entry *aenq_e) { struct ena_adapter *adapter = (struct ena_adapter *)adapter_data; + struct ena_admin_aenq_keep_alive_desc *desc; + u64 rx_drops; + desc = (struct ena_admin_aenq_keep_alive_desc *)aenq_e; adapter->last_keep_alive_jiffies = jiffies; + + rx_drops = ((u64)desc->rx_drops_high << 32) | desc->rx_drops_low; + + u64_stats_update_begin(&adapter->syncp); + adapter->dev_stats.rx_drops = rx_drops; + u64_stats_update_end(&adapter->syncp); } static void ena_notification(void *adapter_data, -- 2.7.4
[PATCH net-next 08/13] net: ena: separate skb allocation to dedicated function
From: Netanel Belgazal Signed-off-by: Netanel Belgazal --- drivers/net/ethernet/amazon/ena/ena_netdev.c | 44 +--- 1 file changed, 27 insertions(+), 17 deletions(-) diff --git a/drivers/net/ethernet/amazon/ena/ena_netdev.c b/drivers/net/ethernet/amazon/ena/ena_netdev.c index 424b4d7..7dee448 100644 --- a/drivers/net/ethernet/amazon/ena/ena_netdev.c +++ b/drivers/net/ethernet/amazon/ena/ena_netdev.c @@ -825,6 +825,28 @@ static int ena_clean_tx_irq(struct ena_ring *tx_ring, u32 budget) return tx_pkts; } +static struct sk_buff *ena_alloc_skb(struct ena_ring *rx_ring, bool frags) +{ + struct sk_buff *skb; + + if (frags) + skb = napi_get_frags(rx_ring->napi); + else + skb = netdev_alloc_skb_ip_align(rx_ring->netdev, + rx_ring->rx_copybreak); + + if (unlikely(!skb)) { + u64_stats_update_begin(&rx_ring->syncp); + rx_ring->rx_stats.skb_alloc_fail++; + u64_stats_update_end(&rx_ring->syncp); + netif_dbg(rx_ring->adapter, rx_err, rx_ring->netdev, + "Failed to allocate skb. frags: %d\n", frags); + return NULL; + } + + return skb; +} + static struct sk_buff *ena_rx_skb(struct ena_ring *rx_ring, struct ena_com_rx_buf_info *ena_bufs, u32 descs, @@ -854,16 +876,9 @@ static struct sk_buff *ena_rx_skb(struct ena_ring *rx_ring, prefetch(va + NET_IP_ALIGN); if (len <= rx_ring->rx_copybreak) { - skb = netdev_alloc_skb_ip_align(rx_ring->netdev, - rx_ring->rx_copybreak); - if (unlikely(!skb)) { - u64_stats_update_begin(&rx_ring->syncp); - rx_ring->rx_stats.skb_alloc_fail++; - u64_stats_update_end(&rx_ring->syncp); - netif_err(rx_ring->adapter, rx_err, rx_ring->netdev, - "Failed to allocate skb\n"); + skb = ena_alloc_skb(rx_ring, false); + if (unlikely(!skb)) return NULL; - } netif_dbg(rx_ring->adapter, rx_status, rx_ring->netdev, "rx allocated small packet. len %d. data_len %d\n", @@ -882,20 +897,15 @@ static struct sk_buff *ena_rx_skb(struct ena_ring *rx_ring, skb_put(skb, len); skb->protocol = eth_type_trans(skb, rx_ring->netdev); + rx_ring->free_rx_ids[*next_to_clean] = req_id; *next_to_clean = ENA_RX_RING_IDX_ADD(*next_to_clean, descs, rx_ring->ring_size); return skb; } - skb = napi_get_frags(rx_ring->napi); - if (unlikely(!skb)) { - netif_dbg(rx_ring->adapter, rx_status, rx_ring->netdev, - "Failed allocating skb\n"); - u64_stats_update_begin(&rx_ring->syncp); - rx_ring->rx_stats.skb_alloc_fail++; - u64_stats_update_end(&rx_ring->syncp); + skb = ena_alloc_skb(rx_ring, true); + if (unlikely(!skb)) return NULL; - } do { dma_unmap_page(rx_ring->dev, -- 2.7.4
[PATCH net-next 09/13] net: ena: adding missing cast in ena_com_mem_addr_set()
From: Netanel Belgazal Signed-off-by: Netanel Belgazal --- drivers/net/ethernet/amazon/ena/ena_com.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/net/ethernet/amazon/ena/ena_com.c b/drivers/net/ethernet/amazon/ena/ena_com.c index f6e1d30..8efb85e 100644 --- a/drivers/net/ethernet/amazon/ena/ena_com.c +++ b/drivers/net/ethernet/amazon/ena/ena_com.c @@ -100,7 +100,7 @@ static inline int ena_com_mem_addr_set(struct ena_com_dev *ena_dev, } ena_addr->mem_addr_low = (u32)addr; - ena_addr->mem_addr_high = (u64)addr >> 32; + ena_addr->mem_addr_high = (u16)((u64)addr >> 32); return 0; } -- 2.7.4
[PATCH net-next 07/13] net: ena: use napi_schedule_irqoff when possible
From: Netanel Belgazal Signed-off-by: Netanel Belgazal --- drivers/net/ethernet/amazon/ena/ena_netdev.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/net/ethernet/amazon/ena/ena_netdev.c b/drivers/net/ethernet/amazon/ena/ena_netdev.c index 04aade8..424b4d7 100644 --- a/drivers/net/ethernet/amazon/ena/ena_netdev.c +++ b/drivers/net/ethernet/amazon/ena/ena_netdev.c @@ -1264,7 +1264,7 @@ static irqreturn_t ena_intr_msix_io(int irq, void *data) { struct ena_napi *ena_napi = data; - napi_schedule(&ena_napi->napi); + napi_schedule_irqoff(&ena_napi->napi); return IRQ_HANDLED; } -- 2.7.4
[PATCH net-next 04/13] net: ena: add reset reason for each device FLR
From: Netanel Belgazal For each device reset, log to the device what is the cause the reset occur. Signed-off-by: Netanel Belgazal --- drivers/net/ethernet/amazon/ena/ena_com.c | 5 +++- drivers/net/ethernet/amazon/ena/ena_com.h | 4 +++- drivers/net/ethernet/amazon/ena/ena_netdev.c| 17 + drivers/net/ethernet/amazon/ena/ena_netdev.h| 2 ++ drivers/net/ethernet/amazon/ena/ena_regs_defs.h | 32 + 5 files changed, 54 insertions(+), 6 deletions(-) diff --git a/drivers/net/ethernet/amazon/ena/ena_com.c b/drivers/net/ethernet/amazon/ena/ena_com.c index 2721c70..f6e1d30 100644 --- a/drivers/net/ethernet/amazon/ena/ena_com.c +++ b/drivers/net/ethernet/amazon/ena/ena_com.c @@ -1825,7 +1825,8 @@ void ena_com_aenq_intr_handler(struct ena_com_dev *dev, void *data) writel((u32)aenq->head, dev->reg_bar + ENA_REGS_AENQ_HEAD_DB_OFF); } -int ena_com_dev_reset(struct ena_com_dev *ena_dev) +int ena_com_dev_reset(struct ena_com_dev *ena_dev, + enum ena_regs_reset_reason_types reset_reason) { u32 stat, timeout, cap, reset_val; int rc; @@ -1853,6 +1854,8 @@ int ena_com_dev_reset(struct ena_com_dev *ena_dev) /* start reset */ reset_val = ENA_REGS_DEV_CTL_DEV_RESET_MASK; + reset_val |= (reset_reason << ENA_REGS_DEV_CTL_RESET_REASON_SHIFT) & +ENA_REGS_DEV_CTL_RESET_REASON_MASK; writel(reset_val, ena_dev->reg_bar + ENA_REGS_DEV_CTL_OFF); /* Write again the MMIO read request address */ diff --git a/drivers/net/ethernet/amazon/ena/ena_com.h b/drivers/net/ethernet/amazon/ena/ena_com.h index 630c09a..7b784f8 100644 --- a/drivers/net/ethernet/amazon/ena/ena_com.h +++ b/drivers/net/ethernet/amazon/ena/ena_com.h @@ -420,10 +420,12 @@ void ena_com_admin_destroy(struct ena_com_dev *ena_dev); /* ena_com_dev_reset - Perform device FLR to the device. * @ena_dev: ENA communication layer struct + * @reset_reason: Specify what is the trigger for the reset in case of an error. * * @return - 0 on success, negative value on failure. */ -int ena_com_dev_reset(struct ena_com_dev *ena_dev); +int ena_com_dev_reset(struct ena_com_dev *ena_dev, + enum ena_regs_reset_reason_types reset_reason); /* ena_com_create_io_queue - Create io queue. * @ena_dev: ENA communication layer struct diff --git a/drivers/net/ethernet/amazon/ena/ena_netdev.c b/drivers/net/ethernet/amazon/ena/ena_netdev.c index 1ee06e1..0d35a4cc 100644 --- a/drivers/net/ethernet/amazon/ena/ena_netdev.c +++ b/drivers/net/ethernet/amazon/ena/ena_netdev.c @@ -87,6 +87,7 @@ static void ena_tx_timeout(struct net_device *dev) if (test_and_set_bit(ENA_FLAG_TRIGGER_RESET, &adapter->flags)) return; + adapter->reset_reason = ENA_REGS_RESET_OS_NETDEV_WD; u64_stats_update_begin(&adapter->syncp); adapter->dev_stats.tx_timeout++; u64_stats_update_end(&adapter->syncp); @@ -670,6 +671,7 @@ static int validate_tx_req_id(struct ena_ring *tx_ring, u16 req_id) u64_stats_update_end(&tx_ring->syncp); /* Trigger device reset */ + tx_ring->adapter->reset_reason = ENA_REGS_RESET_INV_TX_REQ_ID; set_bit(ENA_FLAG_TRIGGER_RESET, &tx_ring->adapter->flags); return -EFAULT; } @@ -1055,6 +1057,7 @@ static int ena_clean_rx_irq(struct ena_ring *rx_ring, struct napi_struct *napi, u64_stats_update_end(&rx_ring->syncp); /* Too many desc from the device. Trigger reset */ + adapter->reset_reason = ENA_REGS_RESET_TOO_MANY_RX_DESCS; set_bit(ENA_FLAG_TRIGGER_RESET, &adapter->flags); return 0; @@ -1720,7 +1723,7 @@ static void ena_down(struct ena_adapter *adapter) if (test_bit(ENA_FLAG_TRIGGER_RESET, &adapter->flags)) { int rc; - rc = ena_com_dev_reset(adapter->ena_dev); + rc = ena_com_dev_reset(adapter->ena_dev, adapter->reset_reason); if (rc) dev_err(&adapter->pdev->dev, "Device reset failed\n"); } @@ -2353,7 +2356,7 @@ static int ena_device_init(struct ena_com_dev *ena_dev, struct pci_dev *pdev, readless_supported = !(pdev->revision & ENA_MMIO_DISABLE_REG_READ); ena_com_set_mmio_read_mode(ena_dev, readless_supported); - rc = ena_com_dev_reset(ena_dev); + rc = ena_com_dev_reset(ena_dev, ENA_REGS_RESET_NORMAL); if (rc) { dev_err(dev, "Can not reset device\n"); goto err_mmio_read_less; @@ -2512,6 +2515,7 @@ static void ena_fw_reset_device(struct work_struct *work) ena_com_mmio_reg_read_request_destroy(ena_dev); + adapter->reset_reason = ENA_REGS_RESET_NORMAL; clear_bit(ENA_FLAG_TRIGGER_RESET, &adapter->flags); /* Finish with the destroy part. Start the init part */ @@ -2591,6 +2595,8 @@ static int check_missing_comp_in_queue(struct ena_adapter *adapter,
[PATCH net-next 6/8] net: ena: fix theoretical Rx hang on low memory systems
From: Netanel Belgazal For the rare case where the device runs out of free rx buffer descriptors (in case of pressure on kernel memory), and the napi handler continuously fail to refill new Rx descriptors until device rx queue totally runs out of all free rx buffers to post incoming packet, leading to a deadlock: * The device won't send interrupts since all the new Rx packets will be dropped. * The napi handler won't try to allocate new Rx descriptors since allocation is part of NAPI that's not being invoked any more The fix involves detecting this scenario and rescheduling NAPI (to refill buffers) by the keepalive/watchdog task. Signed-off-by: Netanel Belgazal --- drivers/net/ethernet/amazon/ena/ena_ethtool.c | 1 + drivers/net/ethernet/amazon/ena/ena_netdev.c | 55 +++ drivers/net/ethernet/amazon/ena/ena_netdev.h | 2 + 3 files changed, 58 insertions(+) diff --git a/drivers/net/ethernet/amazon/ena/ena_ethtool.c b/drivers/net/ethernet/amazon/ena/ena_ethtool.c index 67b2338f..533b2fb 100644 --- a/drivers/net/ethernet/amazon/ena/ena_ethtool.c +++ b/drivers/net/ethernet/amazon/ena/ena_ethtool.c @@ -94,6 +94,7 @@ static const struct ena_stats ena_stats_rx_strings[] = { ENA_STAT_RX_ENTRY(dma_mapping_err), ENA_STAT_RX_ENTRY(bad_desc_num), ENA_STAT_RX_ENTRY(rx_copybreak_pkt), + ENA_STAT_RX_ENTRY(empty_rx_ring), }; static const struct ena_stats ena_stats_ena_com_strings[] = { diff --git a/drivers/net/ethernet/amazon/ena/ena_netdev.c b/drivers/net/ethernet/amazon/ena/ena_netdev.c index 4e9fbdd..3c366bf 100644 --- a/drivers/net/ethernet/amazon/ena/ena_netdev.c +++ b/drivers/net/ethernet/amazon/ena/ena_netdev.c @@ -190,6 +190,7 @@ static void ena_init_io_rings(struct ena_adapter *adapter) rxr->sgl_size = adapter->max_rx_sgl_size; rxr->smoothed_interval = ena_com_get_nonadaptive_moderation_interval_rx(ena_dev); + rxr->empty_rx_queue = 0; } } @@ -2619,6 +2620,58 @@ static void check_for_missing_tx_completions(struct ena_adapter *adapter) adapter->last_monitored_tx_qid = i % adapter->num_queues; } +/* trigger napi schedule after 2 consecutive detections */ +#define EMPTY_RX_REFILL 2 +/* For the rare case where the device runs out of Rx descriptors and the + * napi handler failed to refill new Rx descriptors (due to a lack of memory + * for example). + * This case will lead to a deadlock: + * The device won't send interrupts since all the new Rx packets will be dropped + * The napi handler won't allocate new Rx descriptors so the device will be + * able to send new packets. + * + * This scenario can happen when the kernel's vm.min_free_kbytes is too small. + * It is recommended to have at least 512MB, with a minimum of 128MB for + * constrained environment). + * + * When such a situation is detected - Reschedule napi + */ +static void check_for_empty_rx_ring(struct ena_adapter *adapter) +{ + struct ena_ring *rx_ring; + int i, refill_required; + + if (!test_bit(ENA_FLAG_DEV_UP, &adapter->flags)) + return; + + if (test_bit(ENA_FLAG_TRIGGER_RESET, &adapter->flags)) + return; + + for (i = 0; i < adapter->num_queues; i++) { + rx_ring = &adapter->rx_ring[i]; + + refill_required = + ena_com_sq_empty_space(rx_ring->ena_com_io_sq); + if (unlikely(refill_required == (rx_ring->ring_size - 1))) { + rx_ring->empty_rx_queue++; + + if (rx_ring->empty_rx_queue >= EMPTY_RX_REFILL) { + u64_stats_update_begin(&rx_ring->syncp); + rx_ring->rx_stats.empty_rx_ring++; + u64_stats_update_end(&rx_ring->syncp); + + netif_err(adapter, drv, adapter->netdev, + "trigger refill for ring %d\n", i); + + napi_schedule(rx_ring->napi); + rx_ring->empty_rx_queue = 0; + } + } else { + rx_ring->empty_rx_queue = 0; + } + } +} + /* Check for keep alive expiration */ static void check_for_missing_keep_alive(struct ena_adapter *adapter) { @@ -2673,6 +2726,8 @@ static void ena_timer_service(unsigned long data) check_for_missing_tx_completions(adapter); + check_for_empty_rx_ring(adapter); + if (debug_area) ena_dump_stats_to_buf(adapter, debug_area); diff --git a/drivers/net/ethernet/amazon/ena/ena_netdev.h b/drivers/net/ethernet/amazon/ena/ena_netdev.h index 0e22bce..8828f1d 100644 --- a/drivers/net/ethernet/amazon/ena/ena_netdev.h +++ b/drivers/net/ethernet/amazon/ena/ena_netdev.h @@ -184,6 +184,7 @@ struct ena_stats_rx { u64 dma_mapping_err; u64 bad_desc_num; u64 rx_copybrea
[PATCH net-next 05/13] net: ena: add support for out of order rx buffers refill
From: Netanel Belgazal ENA driver post Rx buffers through the Rx submission queue for the ENA device to fill them with receive packets. Each Rx buffer is marked with req_id in the Rx descriptor. Newer ENA devices could consume the posted Rx buffer in out of order, and as result the corresponding Rx completion queue will have Rx completion descriptors with non contiguous req_id(s) In this change the driver holds two rings. The first ring (called free_rx_ids) is a mapping ring. It holds all the unused request ids. The values in this ring are from 0 to ring_size -1. When the driver wants to allocate a new Rx buffer it uses the head of free_rx_ids and uses it's value as the index for rx_buffer_info ring. The req_id is also written to the Rx descriptor Upon Rx completion, The driver took the req_id from the completion descriptor and uses it as index in rx_buffer_info. The req_id is then return to the free_rx_ids ring. This patch also adds statistics to inform when the driver receive out of range or unused req_id. Note: free_rx_ids is only accessible from the napi handler, so no locking is required Signed-off-by: Netanel Belgazal --- drivers/net/ethernet/amazon/ena/ena_eth_com.c | 5 ++ drivers/net/ethernet/amazon/ena/ena_ethtool.c | 1 + drivers/net/ethernet/amazon/ena/ena_netdev.c | 83 ++- drivers/net/ethernet/amazon/ena/ena_netdev.h | 11 +++- 4 files changed, 83 insertions(+), 17 deletions(-) diff --git a/drivers/net/ethernet/amazon/ena/ena_eth_com.c b/drivers/net/ethernet/amazon/ena/ena_eth_com.c index f999305..b11e573 100644 --- a/drivers/net/ethernet/amazon/ena/ena_eth_com.c +++ b/drivers/net/ethernet/amazon/ena/ena_eth_com.c @@ -493,6 +493,11 @@ int ena_com_tx_comp_req_id_get(struct ena_com_io_cq *io_cq, u16 *req_id) if (cdesc_phase != expected_phase) return -EAGAIN; + if (unlikely(cdesc->req_id >= io_cq->q_depth)) { + pr_err("Invalid req id %d\n", cdesc->req_id); + return -EINVAL; + } + ena_com_cq_inc_head(io_cq); *req_id = READ_ONCE(cdesc->req_id); diff --git a/drivers/net/ethernet/amazon/ena/ena_ethtool.c b/drivers/net/ethernet/amazon/ena/ena_ethtool.c index d51a67f..b1212de 100644 --- a/drivers/net/ethernet/amazon/ena/ena_ethtool.c +++ b/drivers/net/ethernet/amazon/ena/ena_ethtool.c @@ -93,6 +93,7 @@ static const struct ena_stats ena_stats_rx_strings[] = { ENA_STAT_RX_ENTRY(dma_mapping_err), ENA_STAT_RX_ENTRY(bad_desc_num), ENA_STAT_RX_ENTRY(rx_copybreak_pkt), + ENA_STAT_RX_ENTRY(bad_req_id), ENA_STAT_RX_ENTRY(empty_rx_ring), }; diff --git a/drivers/net/ethernet/amazon/ena/ena_netdev.c b/drivers/net/ethernet/amazon/ena/ena_netdev.c index 0d35a4cc..fcbcd18 100644 --- a/drivers/net/ethernet/amazon/ena/ena_netdev.c +++ b/drivers/net/ethernet/amazon/ena/ena_netdev.c @@ -304,6 +304,24 @@ static void ena_free_all_io_tx_resources(struct ena_adapter *adapter) ena_free_tx_resources(adapter, i); } +static inline int validate_rx_req_id(struct ena_ring *rx_ring, u16 req_id) +{ + if (likely(req_id < rx_ring->ring_size)) + return 0; + + netif_err(rx_ring->adapter, rx_err, rx_ring->netdev, + "Invalid rx req_id: %hu\n", req_id); + + u64_stats_update_begin(&rx_ring->syncp); + rx_ring->rx_stats.bad_req_id++; + u64_stats_update_end(&rx_ring->syncp); + + /* Trigger device reset */ + rx_ring->adapter->reset_reason = ENA_REGS_RESET_INV_RX_REQ_ID; + set_bit(ENA_FLAG_TRIGGER_RESET, &rx_ring->adapter->flags); + return -EFAULT; +} + /* ena_setup_rx_resources - allocate I/O Rx resources (Descriptors) * @adapter: network interface device structure * @qid: queue index @@ -315,7 +333,7 @@ static int ena_setup_rx_resources(struct ena_adapter *adapter, { struct ena_ring *rx_ring = &adapter->rx_ring[qid]; struct ena_irq *ena_irq = &adapter->irq_tbl[ENA_IO_IRQ_IDX(qid)]; - int size, node; + int size, node, i; if (rx_ring->rx_buffer_info) { netif_err(adapter, ifup, adapter->netdev, @@ -336,6 +354,20 @@ static int ena_setup_rx_resources(struct ena_adapter *adapter, return -ENOMEM; } + size = sizeof(u16) * rx_ring->ring_size; + rx_ring->free_rx_ids = vzalloc_node(size, node); + if (!rx_ring->free_rx_ids) { + rx_ring->free_rx_ids = vzalloc(size); + if (!rx_ring->free_rx_ids) { + vfree(rx_ring->rx_buffer_info); + return -ENOMEM; + } + } + + /* Req id ring for receiving RX pkts out of order */ + for (i = 0; i < rx_ring->ring_size; i++) + rx_ring->free_rx_ids[i] = i; + /* Reset rx statistics */ memset(&rx_ring->rx_stats, 0x0, sizeof(rx_ring->rx_stats)); @@ -359,6 +391,9 @@ static void ena_free_rx_resources(struct ena_adapter
[PATCH net-next 02/13] net: ena: add hardware hints capability to the driver
From: Netanel Belgazal With this patch, ENA device can update the ena driver about the desired timeout values: These values are part of the "hardware hints" which are transmitted to the driver as Asynchronous event through ENA async event notification queue. In case the ENA device does not support this capability, the driver will use its own default values. Signed-off-by: Netanel Belgazal --- drivers/net/ethernet/amazon/ena/ena_admin_defs.h | 31 +++ drivers/net/ethernet/amazon/ena/ena_com.c| 38 +++--- drivers/net/ethernet/amazon/ena/ena_com.h| 6 +++ drivers/net/ethernet/amazon/ena/ena_netdev.c | 66 ++-- drivers/net/ethernet/amazon/ena/ena_netdev.h | 5 ++ drivers/net/ethernet/amazon/ena/ena_regs_defs.h | 2 + 6 files changed, 137 insertions(+), 11 deletions(-) diff --git a/drivers/net/ethernet/amazon/ena/ena_admin_defs.h b/drivers/net/ethernet/amazon/ena/ena_admin_defs.h index 5b6509d..305dc19 100644 --- a/drivers/net/ethernet/amazon/ena/ena_admin_defs.h +++ b/drivers/net/ethernet/amazon/ena/ena_admin_defs.h @@ -70,6 +70,8 @@ enum ena_admin_aq_feature_id { ENA_ADMIN_MAX_QUEUES_NUM= 2, + ENA_ADMIN_HW_HINTS = 3, + ENA_ADMIN_RSS_HASH_FUNCTION = 10, ENA_ADMIN_STATELESS_OFFLOAD_CONFIG = 11, @@ -749,6 +751,31 @@ struct ena_admin_feature_rss_ind_table { struct ena_admin_rss_ind_table_entry inline_entry; }; +/* When hint value is 0, driver should use it's own predefined value */ +struct ena_admin_ena_hw_hints { + /* value in ms */ + u16 mmio_read_timeout; + + /* value in ms */ + u16 driver_watchdog_timeout; + + /* Per packet tx completion timeout. value in ms */ + u16 missing_tx_completion_timeout; + + u16 missed_tx_completion_count_threshold_to_reset; + + /* value in ms */ + u16 admin_completion_tx_timeout; + + u16 netdev_wd_timeout; + + u16 max_tx_sgl_size; + + u16 max_rx_sgl_size; + + u16 reserved[8]; +}; + struct ena_admin_get_feat_cmd { struct ena_admin_aq_common_desc aq_common_descriptor; @@ -782,6 +809,8 @@ struct ena_admin_get_feat_resp { struct ena_admin_feature_rss_ind_table ind_table; struct ena_admin_feature_intr_moder_desc intr_moderation; + + struct ena_admin_ena_hw_hints hw_hints; } u; }; @@ -857,6 +886,8 @@ enum ena_admin_aenq_notification_syndrom { ENA_ADMIN_SUSPEND = 0, ENA_ADMIN_RESUME= 1, + + ENA_ADMIN_UPDATE_HINTS = 2, }; struct ena_admin_aenq_entry { diff --git a/drivers/net/ethernet/amazon/ena/ena_com.c b/drivers/net/ethernet/amazon/ena/ena_com.c index 02752d5..65d53d5 100644 --- a/drivers/net/ethernet/amazon/ena/ena_com.c +++ b/drivers/net/ethernet/amazon/ena/ena_com.c @@ -511,7 +511,7 @@ static int ena_com_wait_and_process_admin_cq_polling(struct ena_comp_ctx *comp_c unsigned long flags, timeout; int ret; - timeout = jiffies + ADMIN_CMD_TIMEOUT_US; + timeout = jiffies + usecs_to_jiffies(admin_queue->completion_timeout); while (1) { spin_lock_irqsave(&admin_queue->q_lock, flags); @@ -561,7 +561,8 @@ static int ena_com_wait_and_process_admin_cq_interrupts(struct ena_comp_ctx *com int ret; wait_for_completion_timeout(&comp_ctx->wait_event, - usecs_to_jiffies(ADMIN_CMD_TIMEOUT_US)); + usecs_to_jiffies( + admin_queue->completion_timeout)); /* In case the command wasn't completed find out the root cause. * There might be 2 kinds of errors @@ -601,12 +602,15 @@ static u32 ena_com_reg_bar_read32(struct ena_com_dev *ena_dev, u16 offset) struct ena_com_mmio_read *mmio_read = &ena_dev->mmio_read; volatile struct ena_admin_ena_mmio_req_read_less_resp *read_resp = mmio_read->read_resp; - u32 mmio_read_reg, ret; + u32 mmio_read_reg, ret, i; unsigned long flags; - int i; + u32 timeout = mmio_read->reg_read_to; might_sleep(); + if (timeout == 0) + timeout = ENA_REG_READ_TIMEOUT; + /* If readless is disabled, perform regular read */ if (!mmio_read->readless_supported) return readl(ena_dev->reg_bar + offset); @@ -627,14 +631,14 @@ static u32 ena_com_reg_bar_read32(struct ena_com_dev *ena_dev, u16 offset) writel(mmio_read_reg, ena_dev->reg_bar + ENA_REGS_MMIO_REG_READ_OFF); - for (i = 0; i < ENA_REG_READ_TIMEOUT; i++) { + for (i = 0; i < timeout; i++) { if (read_resp->req_id == mmio_read->seq_num) break; udelay(1); } - if (unlikely(i == ENA_REG_READ_TIMEOUT)) { + if (unlikely(i == timeout)) { pr_err(
[PATCH net-next 01/13] net: ena: change return value for unsupported features unsupported return value
From: Netanel Belgazal return -EOPNOTSUPP instead of -EPERM. Signed-off-by: Netanel Belgazal --- drivers/net/ethernet/amazon/ena/ena_com.c | 22 +++--- drivers/net/ethernet/amazon/ena/ena_ethtool.c | 10 +++--- drivers/net/ethernet/amazon/ena/ena_netdev.c | 20 ++-- 3 files changed, 24 insertions(+), 28 deletions(-) diff --git a/drivers/net/ethernet/amazon/ena/ena_com.c b/drivers/net/ethernet/amazon/ena/ena_com.c index f5b237e..02752d5 100644 --- a/drivers/net/ethernet/amazon/ena/ena_com.c +++ b/drivers/net/ethernet/amazon/ena/ena_com.c @@ -494,7 +494,7 @@ static int ena_com_comp_status_to_errno(u8 comp_status) case ENA_ADMIN_RESOURCE_ALLOCATION_FAILURE: return -ENOMEM; case ENA_ADMIN_UNSUPPORTED_OPCODE: - return -EPERM; + return -EOPNOTSUPP; case ENA_ADMIN_BAD_OPCODE: case ENA_ADMIN_MALFORMED_REQUEST: case ENA_ADMIN_ILLEGAL_PARAMETER: @@ -786,7 +786,7 @@ static int ena_com_get_feature_ex(struct ena_com_dev *ena_dev, if (!ena_com_check_supported_feature_id(ena_dev, feature_id)) { pr_debug("Feature %d isn't supported\n", feature_id); - return -EPERM; + return -EOPNOTSUPP; } memset(&get_cmd, 0x0, sizeof(get_cmd)); @@ -1324,7 +1324,7 @@ int ena_com_set_aenq_config(struct ena_com_dev *ena_dev, u32 groups_flag) if ((get_resp.u.aenq.supported_groups & groups_flag) != groups_flag) { pr_warn("Trying to set unsupported aenq events. supported flag: %x asked flag: %x\n", get_resp.u.aenq.supported_groups, groups_flag); - return -EPERM; + return -EOPNOTSUPP; } memset(&cmd, 0x0, sizeof(cmd)); @@ -1909,7 +1909,7 @@ int ena_com_set_dev_mtu(struct ena_com_dev *ena_dev, int mtu) if (!ena_com_check_supported_feature_id(ena_dev, ENA_ADMIN_MTU)) { pr_debug("Feature %d isn't supported\n", ENA_ADMIN_MTU); - return -EPERM; + return -EOPNOTSUPP; } memset(&cmd, 0x0, sizeof(cmd)); @@ -1963,7 +1963,7 @@ int ena_com_set_hash_function(struct ena_com_dev *ena_dev) ENA_ADMIN_RSS_HASH_FUNCTION)) { pr_debug("Feature %d isn't supported\n", ENA_ADMIN_RSS_HASH_FUNCTION); - return -EPERM; + return -EOPNOTSUPP; } /* Validate hash function is supported */ @@ -1975,7 +1975,7 @@ int ena_com_set_hash_function(struct ena_com_dev *ena_dev) if (get_resp.u.flow_hash_func.supported_func & (1 << rss->hash_func)) { pr_err("Func hash %d isn't supported by device, abort\n", rss->hash_func); - return -EPERM; + return -EOPNOTSUPP; } memset(&cmd, 0x0, sizeof(cmd)); @@ -2034,7 +2034,7 @@ int ena_com_fill_hash_function(struct ena_com_dev *ena_dev, if (!((1 << func) & get_resp.u.flow_hash_func.supported_func)) { pr_err("Flow hash function %d isn't supported\n", func); - return -EPERM; + return -EOPNOTSUPP; } switch (func) { @@ -2127,7 +2127,7 @@ int ena_com_set_hash_ctrl(struct ena_com_dev *ena_dev) ENA_ADMIN_RSS_HASH_INPUT)) { pr_debug("Feature %d isn't supported\n", ENA_ADMIN_RSS_HASH_INPUT); - return -EPERM; + return -EOPNOTSUPP; } memset(&cmd, 0x0, sizeof(cmd)); @@ -2208,7 +2208,7 @@ int ena_com_set_default_hash_ctrl(struct ena_com_dev *ena_dev) pr_err("hash control doesn't support all the desire configuration. proto %x supported %x selected %x\n", i, hash_ctrl->supported_fields[i].fields, hash_ctrl->selected_fields[i].fields); - return -EPERM; + return -EOPNOTSUPP; } } @@ -2286,7 +2286,7 @@ int ena_com_indirect_table_set(struct ena_com_dev *ena_dev) ena_dev, ENA_ADMIN_RSS_REDIRECTION_TABLE_CONFIG)) { pr_debug("Feature %d isn't supported\n", ENA_ADMIN_RSS_REDIRECTION_TABLE_CONFIG); - return -EPERM; + return -EOPNOTSUPP; } ret = ena_com_ind_tbl_convert_to_device(ena_dev); @@ -2553,7 +2553,7 @@ int ena_com_init_interrupt_moderation(struct ena_com_dev *ena_dev) ENA_ADMIN_INTERRUPT_MODERATION); if (rc) { - if (rc == -EPERM) { + if (rc == -EOPNOTSUPP) { pr_debug("Feature %d isn't supported\n", ENA_ADMIN_INTERRUPT_MODERATION); rc = 0; diff --git a/drivers/net/
[PATCH net-next 00/13] update ena ethernet driver to version 1.2.0
From: Netanel Belgazal This patchset contains some new features/improvements that were added to the ENA driver to increase its robustness and are based on experience of wide ENA deployment. Depends on: [PATCH net-next 0/8] Bug fixes in ena ethernet driver Netanel Belgazal (13): net: ena: change return value for unsupported features unsupported return value net: ena: add hardware hints capability to the driver net: ena: change sizeof() argument to be the type pointer net: ena: add reset reason for each device FLR net: ena: add support for out of order rx buffers refill net: ena: allow the driver to work with small number of msix vectors net: ena: use napi_schedule_irqoff when possible net: ena: separate skb allocation to dedicated function net: ena: adding missing cast in ena_com_mem_addr_set() net: ena: add mtu limitation in ena_change_mtu() net: ena: update driver's rx drop statistics net: ena: change validate_tx_req_id() to be inline function net: ena: update ena driver to version 1.2.0 drivers/net/ethernet/amazon/ena/ena_admin_defs.h | 31 +++ drivers/net/ethernet/amazon/ena/ena_com.c| 83 -- drivers/net/ethernet/amazon/ena/ena_com.h| 10 +- drivers/net/ethernet/amazon/ena/ena_eth_com.c| 5 + drivers/net/ethernet/amazon/ena/ena_ethtool.c| 11 +- drivers/net/ethernet/amazon/ena/ena_netdev.c | 317 +-- drivers/net/ethernet/amazon/ena/ena_netdev.h | 31 ++- drivers/net/ethernet/amazon/ena/ena_regs_defs.h | 34 +++ 8 files changed, 407 insertions(+), 115 deletions(-) -- 2.7.4
[PATCH net-next 06/13] net: ena: allow the driver to work with small number of msix vectors
From: Netanel Belgazal Current driver tries to allocate msix vectors as the number of the negotiated io queues. (with another msix vector for management). If pci_alloc_irq_vectors() fails, the driver aborts the probe and the ENA network device is never brought up. With this patch, the driver's logic will reduce the number of IO queues to the number of allocated msix vectors (minus one for management) instead of failing probe(). Signed-off-by: Netanel Belgazal --- drivers/net/ethernet/amazon/ena/ena_netdev.c | 65 drivers/net/ethernet/amazon/ena/ena_netdev.h | 6 ++- 2 files changed, 51 insertions(+), 20 deletions(-) diff --git a/drivers/net/ethernet/amazon/ena/ena_netdev.c b/drivers/net/ethernet/amazon/ena/ena_netdev.c index fcbcd18..04aade8 100644 --- a/drivers/net/ethernet/amazon/ena/ena_netdev.c +++ b/drivers/net/ethernet/amazon/ena/ena_netdev.c @@ -1269,9 +1269,20 @@ static irqreturn_t ena_intr_msix_io(int irq, void *data) return IRQ_HANDLED; } +/* Reserve a single MSI-X vector for management (admin + aenq). + * plus reserve one vector for each potential io queue. + * the number of potential io queues is the minimum of what the device + * supports and the number of vCPUs. + */ static int ena_enable_msix(struct ena_adapter *adapter, int num_queues) { - int msix_vecs, rc; + int msix_vecs, irq_cnt; + + if (test_bit(ENA_FLAG_MSIX_ENABLED, &adapter->flags)) { + netif_err(adapter, probe, adapter->netdev, + "Error, MSI-X is already enabled\n"); + return -EPERM; + } /* Reserved the max msix vectors we might need */ msix_vecs = ENA_MAX_MSIX_VEC(num_queues); @@ -1279,25 +1290,28 @@ static int ena_enable_msix(struct ena_adapter *adapter, int num_queues) netif_dbg(adapter, probe, adapter->netdev, "trying to enable MSI-X, vectors %d\n", msix_vecs); - rc = pci_alloc_irq_vectors(adapter->pdev, msix_vecs, msix_vecs, - PCI_IRQ_MSIX); - if (rc < 0) { + irq_cnt = pci_alloc_irq_vectors(adapter->pdev, ENA_MIN_MSIX_VEC, + msix_vecs, PCI_IRQ_MSIX); + + if (irq_cnt < 0) { netif_err(adapter, probe, adapter->netdev, - "Failed to enable MSI-X, vectors %d rc %d\n", - msix_vecs, rc); + "Failed to enable MSI-X. irq_cnt %d\n", irq_cnt); return -ENOSPC; } - netif_dbg(adapter, probe, adapter->netdev, "enable MSI-X, vectors %d\n", - msix_vecs); - - if (msix_vecs >= 1) { - if (ena_init_rx_cpu_rmap(adapter)) - netif_warn(adapter, probe, adapter->netdev, - "Failed to map IRQs to CPUs\n"); + if (irq_cnt != msix_vecs) { + netif_notice(adapter, probe, adapter->netdev, +"enable only %d MSI-X (out of %d), reduce the number of queues\n", +irq_cnt, msix_vecs); + adapter->num_queues = irq_cnt - ENA_ADMIN_MSIX_VEC; } - adapter->msix_vecs = msix_vecs; + if (ena_init_rx_cpu_rmap(adapter)) + netif_warn(adapter, probe, adapter->netdev, + "Failed to map IRQs to CPUs\n"); + + adapter->msix_vecs = irq_cnt; + set_bit(ENA_FLAG_MSIX_ENABLED, &adapter->flags); return 0; } @@ -1374,6 +1388,12 @@ static int ena_request_io_irq(struct ena_adapter *adapter) struct ena_irq *irq; int rc = 0, i, k; + if (!test_bit(ENA_FLAG_MSIX_ENABLED, &adapter->flags)) { + netif_err(adapter, ifup, adapter->netdev, + "Failed to request I/O IRQ: MSI-X is not enabled\n"); + return -EINVAL; + } + for (i = ENA_IO_IRQ_FIRST_IDX; i < adapter->msix_vecs; i++) { irq = &adapter->irq_tbl[i]; rc = request_irq(irq->vector, irq->handler, flags, irq->name, @@ -1432,6 +1452,12 @@ static void ena_free_io_irq(struct ena_adapter *adapter) } } +static void ena_disable_msix(struct ena_adapter *adapter) +{ + if (test_and_clear_bit(ENA_FLAG_MSIX_ENABLED, &adapter->flags)) + pci_free_irq_vectors(adapter->pdev); +} + static void ena_disable_io_intr_sync(struct ena_adapter *adapter) { int i; @@ -2520,7 +2546,8 @@ static int ena_enable_msix_and_set_admin_interrupts(struct ena_adapter *adapter, return 0; err_disable_msix: - pci_free_irq_vectors(adapter->pdev); + ena_disable_msix(adapter); + return rc; } @@ -2558,7 +2585,7 @@ static void ena_fw_reset_device(struct work_struct *work) ena_free_mgmnt_irq(adapter); - pci_free_irq_vectors(adapter->pdev); + ena_disable_msix(adapter); ena_com_abort_admin_commands(ena_dev); @@ -2610,7 +2637,7 @@ static vo
[PATCH net-next 7/8] net: ena: disable admin msix while working in polling mode
From: Netanel Belgazal Signed-off-by: Netanel Belgazal --- drivers/net/ethernet/amazon/ena/ena_com.c | 8 1 file changed, 8 insertions(+) diff --git a/drivers/net/ethernet/amazon/ena/ena_com.c b/drivers/net/ethernet/amazon/ena/ena_com.c index ea60b9e..f5b237e 100644 --- a/drivers/net/ethernet/amazon/ena/ena_com.c +++ b/drivers/net/ethernet/amazon/ena/ena_com.c @@ -61,6 +61,8 @@ #define ENA_MMIO_READ_TIMEOUT 0x +#define ENA_REGS_ADMIN_INTR_MASK 1 + /*/ /*/ /*/ @@ -1454,6 +1456,12 @@ void ena_com_admin_destroy(struct ena_com_dev *ena_dev) void ena_com_set_admin_polling_mode(struct ena_com_dev *ena_dev, bool polling) { + u32 mask_value = 0; + + if (polling) + mask_value = ENA_REGS_ADMIN_INTR_MASK; + + writel(mask_value, ena_dev->reg_bar + ENA_REGS_INTR_MASK_OFF); ena_dev->admin_queue.polling = polling; } -- 2.7.4
[PATCH net-next 03/13] net: ena: change sizeof() argument to be the type pointer
From: Netanel Belgazal Instead of using: memset(ptr, 0x0, sizeof(struct ...)) use: memset(ptr, 0x0, sizeor(*ptr)) Signed-off-by: Netanel Belgazal --- drivers/net/ethernet/amazon/ena/ena_com.c | 16 1 file changed, 8 insertions(+), 8 deletions(-) diff --git a/drivers/net/ethernet/amazon/ena/ena_com.c b/drivers/net/ethernet/amazon/ena/ena_com.c index 65d53d5..2721c70 100644 --- a/drivers/net/ethernet/amazon/ena/ena_com.c +++ b/drivers/net/ethernet/amazon/ena/ena_com.c @@ -329,7 +329,7 @@ static int ena_com_init_io_sq(struct ena_com_dev *ena_dev, size_t size; int dev_node = 0; - memset(&io_sq->desc_addr, 0x0, sizeof(struct ena_com_io_desc_addr)); + memset(&io_sq->desc_addr, 0x0, sizeof(io_sq->desc_addr)); io_sq->desc_entry_size = (io_sq->direction == ENA_COM_IO_QUEUE_DIRECTION_TX) ? @@ -383,7 +383,7 @@ static int ena_com_init_io_cq(struct ena_com_dev *ena_dev, size_t size; int prev_node = 0; - memset(&io_cq->cdesc_addr, 0x0, sizeof(struct ena_com_io_desc_addr)); + memset(&io_cq->cdesc_addr, 0x0, sizeof(io_cq->cdesc_addr)); /* Use the basic completion descriptor for Rx */ io_cq->cdesc_entry_size_in_bytes = @@ -685,7 +685,7 @@ static int ena_com_destroy_io_sq(struct ena_com_dev *ena_dev, u8 direction; int ret; - memset(&destroy_cmd, 0x0, sizeof(struct ena_admin_aq_destroy_sq_cmd)); + memset(&destroy_cmd, 0x0, sizeof(destroy_cmd)); if (io_sq->direction == ENA_COM_IO_QUEUE_DIRECTION_TX) direction = ENA_ADMIN_SQ_DIRECTION_TX; @@ -967,7 +967,7 @@ static int ena_com_create_io_sq(struct ena_com_dev *ena_dev, u8 direction; int ret; - memset(&create_cmd, 0x0, sizeof(struct ena_admin_aq_create_sq_cmd)); + memset(&create_cmd, 0x0, sizeof(create_cmd)); create_cmd.aq_common_descriptor.opcode = ENA_ADMIN_CREATE_SQ; @@ -1159,7 +1159,7 @@ int ena_com_create_io_cq(struct ena_com_dev *ena_dev, struct ena_admin_acq_create_cq_resp_desc cmd_completion; int ret; - memset(&create_cmd, 0x0, sizeof(struct ena_admin_aq_create_cq_cmd)); + memset(&create_cmd, 0x0, sizeof(create_cmd)); create_cmd.aq_common_descriptor.opcode = ENA_ADMIN_CREATE_CQ; @@ -1267,7 +1267,7 @@ int ena_com_destroy_io_cq(struct ena_com_dev *ena_dev, struct ena_admin_acq_destroy_cq_resp_desc destroy_resp; int ret; - memset(&destroy_cmd, 0x0, sizeof(struct ena_admin_aq_destroy_sq_cmd)); + memset(&destroy_cmd, 0x0, sizeof(destroy_cmd)); destroy_cmd.cq_idx = io_cq->idx; destroy_cmd.aq_common_descriptor.opcode = ENA_ADMIN_DESTROY_CQ; @@ -1623,8 +1623,8 @@ int ena_com_create_io_queue(struct ena_com_dev *ena_dev, io_sq = &ena_dev->io_sq_queues[ctx->qid]; io_cq = &ena_dev->io_cq_queues[ctx->qid]; - memset(io_sq, 0x0, sizeof(struct ena_com_io_sq)); - memset(io_cq, 0x0, sizeof(struct ena_com_io_cq)); + memset(io_sq, 0x0, sizeof(*io_sq)); + memset(io_cq, 0x0, sizeof(*io_cq)); /* Init CQ */ io_cq->q_depth = ctx->queue_size; -- 2.7.4
[PATCH net-next 8/8] net: ena: bug fix in lost tx packets detection mechanism
From: Netanel Belgazal check_for_missing_tx_completions() is called from a timer task and looking for lost tx packets. The old implementation accumulate all the lost tx packets and did not check if those packets were retrieved on a later stage. This cause to a situation where the driver reset the device for no reason. Signed-off-by: Netanel Belgazal --- drivers/net/ethernet/amazon/ena/ena_ethtool.c | 1 - drivers/net/ethernet/amazon/ena/ena_netdev.c | 66 +++ drivers/net/ethernet/amazon/ena/ena_netdev.h | 14 +- 3 files changed, 50 insertions(+), 31 deletions(-) diff --git a/drivers/net/ethernet/amazon/ena/ena_ethtool.c b/drivers/net/ethernet/amazon/ena/ena_ethtool.c index 533b2fb..3ee55e2 100644 --- a/drivers/net/ethernet/amazon/ena/ena_ethtool.c +++ b/drivers/net/ethernet/amazon/ena/ena_ethtool.c @@ -80,7 +80,6 @@ static const struct ena_stats ena_stats_tx_strings[] = { ENA_STAT_TX_ENTRY(tx_poll), ENA_STAT_TX_ENTRY(doorbells), ENA_STAT_TX_ENTRY(prepare_ctx_err), - ENA_STAT_TX_ENTRY(missing_tx_comp), ENA_STAT_TX_ENTRY(bad_req_id), }; diff --git a/drivers/net/ethernet/amazon/ena/ena_netdev.c b/drivers/net/ethernet/amazon/ena/ena_netdev.c index 3c366bf..4f16ed3 100644 --- a/drivers/net/ethernet/amazon/ena/ena_netdev.c +++ b/drivers/net/ethernet/amazon/ena/ena_netdev.c @@ -1995,6 +1995,7 @@ static netdev_tx_t ena_start_xmit(struct sk_buff *skb, struct net_device *dev) tx_info->tx_descs = nb_hw_desc; tx_info->last_jiffies = jiffies; + tx_info->print_once = 0; tx_ring->next_to_use = ENA_TX_RING_IDX_NEXT(next_to_use, tx_ring->ring_size); @@ -2564,13 +2565,44 @@ static void ena_fw_reset_device(struct work_struct *work) "Reset attempt failed. Can not reset the device\n"); } -static void check_for_missing_tx_completions(struct ena_adapter *adapter) +static int check_missing_comp_in_queue(struct ena_adapter *adapter, + struct ena_ring *tx_ring) { struct ena_tx_buffer *tx_buf; unsigned long last_jiffies; + u32 missed_tx = 0; + int i; + + for (i = 0; i < tx_ring->ring_size; i++) { + tx_buf = &tx_ring->tx_buffer_info[i]; + last_jiffies = tx_buf->last_jiffies; + if (unlikely(last_jiffies && +time_is_before_jiffies(last_jiffies + TX_TIMEOUT))) { + if (!tx_buf->print_once) + netif_notice(adapter, tx_err, adapter->netdev, +"Found a Tx that wasn't completed on time, qid %d, index %d.\n", +tx_ring->qid, i); + + tx_buf->print_once = 1; + missed_tx++; + + if (unlikely(missed_tx > MAX_NUM_OF_TIMEOUTED_PACKETS)) { + netif_err(adapter, tx_err, adapter->netdev, + "The number of lost tx completions is above the threshold (%d > %d). Reset the device\n", + missed_tx, MAX_NUM_OF_TIMEOUTED_PACKETS); + set_bit(ENA_FLAG_TRIGGER_RESET, &adapter->flags); + return -EIO; + } + } + } + + return 0; +} + +static void check_for_missing_tx_completions(struct ena_adapter *adapter) +{ struct ena_ring *tx_ring; - int i, j, budget; - u32 missed_tx; + int i, budget, rc; /* Make sure the driver doesn't turn the device in other process */ smp_rmb(); @@ -2586,31 +2618,9 @@ static void check_for_missing_tx_completions(struct ena_adapter *adapter) for (i = adapter->last_monitored_tx_qid; i < adapter->num_queues; i++) { tx_ring = &adapter->tx_ring[i]; - for (j = 0; j < tx_ring->ring_size; j++) { - tx_buf = &tx_ring->tx_buffer_info[j]; - last_jiffies = tx_buf->last_jiffies; - if (unlikely(last_jiffies && time_is_before_jiffies(last_jiffies + TX_TIMEOUT))) { - netif_notice(adapter, tx_err, adapter->netdev, -"Found a Tx that wasn't completed on time, qid %d, index %d.\n", -tx_ring->qid, j); - - u64_stats_update_begin(&tx_ring->syncp); - missed_tx = tx_ring->tx_stats.missing_tx_comp++; - u64_stats_update_end(&tx_ring->syncp); - - /* Clear last jiffies so the lost buffer won't -* be counted twice. -*/ - tx_buf->last_jiffies = 0; - - if (unlikely(m
Re: [PATCH v3 1/2] ip_tunnel: fix potential issue in ip_tunnel_rcv
On Wed, Jun 7, 2017 at 9:32 PM, Haishuang Yan wrote: > When ip_tunnel_rcv fails, the tun_dst won't be freed, so call > dst_release to free it in error code path. > > CC: Pravin B Shelar > Fixes: 2e15ea390e6f ("ip_gre: Add support to collect tunnel metadata.") > Signed-off-by: Haishuang Yan > > --- > Changes in v2: > - Add the the missing Fixes information > Changes in v3: > - Free tun_dst from error code path > --- Acked-by: Pravin B Shelar
[PATCH net-next 2/8] net: ena: fix bug that might cause hang after consecutive open/close interface.
From: Netanel Belgazal Fixing a bug that the driver does not unmask the IO interrupts in ndo_open(): occasionally, the MSI-X interrupt (for one or more IO queues) can be masked when ndo_close() was called. If that is followed by ndo open(), then the MSI-X will be still masked so no interrupt will be received by the driver. Signed-off-by: Netanel Belgazal --- drivers/net/ethernet/amazon/ena/ena_netdev.c | 41 ++-- 1 file changed, 26 insertions(+), 15 deletions(-) diff --git a/drivers/net/ethernet/amazon/ena/ena_netdev.c b/drivers/net/ethernet/amazon/ena/ena_netdev.c index 7c1214d..0e3c60c7 100644 --- a/drivers/net/ethernet/amazon/ena/ena_netdev.c +++ b/drivers/net/ethernet/amazon/ena/ena_netdev.c @@ -1078,6 +1078,26 @@ inline void ena_adjust_intr_moderation(struct ena_ring *rx_ring, rx_ring->per_napi_bytes = 0; } +static inline void ena_unmask_interrupt(struct ena_ring *tx_ring, + struct ena_ring *rx_ring) +{ + struct ena_eth_io_intr_reg intr_reg; + + /* Update intr register: rx intr delay, +* tx intr delay and interrupt unmask +*/ + ena_com_update_intr_reg(&intr_reg, + rx_ring->smoothed_interval, + tx_ring->smoothed_interval, + true); + + /* It is a shared MSI-X. +* Tx and Rx CQ have pointer to it. +* So we use one of them to reach the intr reg +*/ + ena_com_unmask_intr(rx_ring->ena_com_io_cq, &intr_reg); +} + static inline void ena_update_ring_numa_node(struct ena_ring *tx_ring, struct ena_ring *rx_ring) { @@ -1108,7 +1128,6 @@ static int ena_io_poll(struct napi_struct *napi, int budget) { struct ena_napi *ena_napi = container_of(napi, struct ena_napi, napi); struct ena_ring *tx_ring, *rx_ring; - struct ena_eth_io_intr_reg intr_reg; u32 tx_work_done; u32 rx_work_done; @@ -1149,22 +1168,9 @@ static int ena_io_poll(struct napi_struct *napi, int budget) if (ena_com_get_adaptive_moderation_enabled(rx_ring->ena_dev)) ena_adjust_intr_moderation(rx_ring, tx_ring); - /* Update intr register: rx intr delay, -* tx intr delay and interrupt unmask -*/ - ena_com_update_intr_reg(&intr_reg, - rx_ring->smoothed_interval, - tx_ring->smoothed_interval, - true); - - /* It is a shared MSI-X. -* Tx and Rx CQ have pointer to it. -* So we use one of them to reach the intr reg -*/ - ena_com_unmask_intr(rx_ring->ena_com_io_cq, &intr_reg); + ena_unmask_interrupt(tx_ring, rx_ring); } - ena_update_ring_numa_node(tx_ring, rx_ring); ret = rx_work_done; @@ -1485,6 +1491,11 @@ static int ena_up_complete(struct ena_adapter *adapter) ena_napi_enable_all(adapter); + /* Enable completion queues interrupt */ + for (i = 0; i < adapter->num_queues; i++) + ena_unmask_interrupt(&adapter->tx_ring[i], +&adapter->rx_ring[i]); + /* schedule napi in case we had pending packets * from the last time we disable napi */ -- 2.7.4
[PATCH net-next 3/8] net: ena: add missing return when ena_com_get_io_handlers() fails
From: Netanel Belgazal Signed-off-by: Netanel Belgazal --- drivers/net/ethernet/amazon/ena/ena_netdev.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/drivers/net/ethernet/amazon/ena/ena_netdev.c b/drivers/net/ethernet/amazon/ena/ena_netdev.c index 0e3c60c7..1e71e89 100644 --- a/drivers/net/ethernet/amazon/ena/ena_netdev.c +++ b/drivers/net/ethernet/amazon/ena/ena_netdev.c @@ -1543,6 +1543,7 @@ static int ena_create_io_tx_queue(struct ena_adapter *adapter, int qid) "Failed to get TX queue handlers. TX queue num %d rc: %d\n", qid, rc); ena_com_destroy_io_queue(ena_dev, ena_qid); + return rc; } ena_com_update_numa_node(tx_ring->ena_com_io_cq, ctx.numa_node); @@ -1607,6 +1608,7 @@ static int ena_create_io_rx_queue(struct ena_adapter *adapter, int qid) "Failed to get RX queue handlers. RX queue num %d rc: %d\n", qid, rc); ena_com_destroy_io_queue(ena_dev, ena_qid); + return rc; } ena_com_update_numa_node(rx_ring->ena_com_io_cq, ctx.numa_node); -- 2.7.4
[PATCH net-next 6/8] net: ena: fix theoretical Rx stuck on low memory systems
From: Netanel Belgazal For the rare case where the device runs out of free rx buffer descriptors (in case of pressure on kernel memory), and the napi handler continuously fail to refill new Rx descriptors until device rx queue totally runs out of all free rx buffers to post incoming packet, leading to a deadlock: * The device won't send interrupts since all the new Rx packets will be dropped. * The napi handler won't try to allocate new Rx descriptors since allocation is part of NAPI that's not being invoked any more The fix involves detecting this scenario and rescheduling NAPI (to refill buffers) by the keepalive/watchdog task. Signed-off-by: Netanel Belgazal --- drivers/net/ethernet/amazon/ena/ena_ethtool.c | 1 + drivers/net/ethernet/amazon/ena/ena_netdev.c | 55 +++ drivers/net/ethernet/amazon/ena/ena_netdev.h | 2 + 3 files changed, 58 insertions(+) diff --git a/drivers/net/ethernet/amazon/ena/ena_ethtool.c b/drivers/net/ethernet/amazon/ena/ena_ethtool.c index 67b2338f..533b2fb 100644 --- a/drivers/net/ethernet/amazon/ena/ena_ethtool.c +++ b/drivers/net/ethernet/amazon/ena/ena_ethtool.c @@ -94,6 +94,7 @@ static const struct ena_stats ena_stats_rx_strings[] = { ENA_STAT_RX_ENTRY(dma_mapping_err), ENA_STAT_RX_ENTRY(bad_desc_num), ENA_STAT_RX_ENTRY(rx_copybreak_pkt), + ENA_STAT_RX_ENTRY(empty_rx_ring), }; static const struct ena_stats ena_stats_ena_com_strings[] = { diff --git a/drivers/net/ethernet/amazon/ena/ena_netdev.c b/drivers/net/ethernet/amazon/ena/ena_netdev.c index 4e9fbdd..3c366bf 100644 --- a/drivers/net/ethernet/amazon/ena/ena_netdev.c +++ b/drivers/net/ethernet/amazon/ena/ena_netdev.c @@ -190,6 +190,7 @@ static void ena_init_io_rings(struct ena_adapter *adapter) rxr->sgl_size = adapter->max_rx_sgl_size; rxr->smoothed_interval = ena_com_get_nonadaptive_moderation_interval_rx(ena_dev); + rxr->empty_rx_queue = 0; } } @@ -2619,6 +2620,58 @@ static void check_for_missing_tx_completions(struct ena_adapter *adapter) adapter->last_monitored_tx_qid = i % adapter->num_queues; } +/* trigger napi schedule after 2 consecutive detections */ +#define EMPTY_RX_REFILL 2 +/* For the rare case where the device runs out of Rx descriptors and the + * napi handler failed to refill new Rx descriptors (due to a lack of memory + * for example). + * This case will lead to a deadlock: + * The device won't send interrupts since all the new Rx packets will be dropped + * The napi handler won't allocate new Rx descriptors so the device will be + * able to send new packets. + * + * This scenario can happen when the kernel's vm.min_free_kbytes is too small. + * It is recommended to have at least 512MB, with a minimum of 128MB for + * constrained environment). + * + * When such a situation is detected - Reschedule napi + */ +static void check_for_empty_rx_ring(struct ena_adapter *adapter) +{ + struct ena_ring *rx_ring; + int i, refill_required; + + if (!test_bit(ENA_FLAG_DEV_UP, &adapter->flags)) + return; + + if (test_bit(ENA_FLAG_TRIGGER_RESET, &adapter->flags)) + return; + + for (i = 0; i < adapter->num_queues; i++) { + rx_ring = &adapter->rx_ring[i]; + + refill_required = + ena_com_sq_empty_space(rx_ring->ena_com_io_sq); + if (unlikely(refill_required == (rx_ring->ring_size - 1))) { + rx_ring->empty_rx_queue++; + + if (rx_ring->empty_rx_queue >= EMPTY_RX_REFILL) { + u64_stats_update_begin(&rx_ring->syncp); + rx_ring->rx_stats.empty_rx_ring++; + u64_stats_update_end(&rx_ring->syncp); + + netif_err(adapter, drv, adapter->netdev, + "trigger refill for ring %d\n", i); + + napi_schedule(rx_ring->napi); + rx_ring->empty_rx_queue = 0; + } + } else { + rx_ring->empty_rx_queue = 0; + } + } +} + /* Check for keep alive expiration */ static void check_for_missing_keep_alive(struct ena_adapter *adapter) { @@ -2673,6 +2726,8 @@ static void ena_timer_service(unsigned long data) check_for_missing_tx_completions(adapter); + check_for_empty_rx_ring(adapter); + if (debug_area) ena_dump_stats_to_buf(adapter, debug_area); diff --git a/drivers/net/ethernet/amazon/ena/ena_netdev.h b/drivers/net/ethernet/amazon/ena/ena_netdev.h index 0e22bce..8828f1d 100644 --- a/drivers/net/ethernet/amazon/ena/ena_netdev.h +++ b/drivers/net/ethernet/amazon/ena/ena_netdev.h @@ -184,6 +184,7 @@ struct ena_stats_rx { u64 dma_mapping_err; u64 bad_desc_num; u64 rx_copybrea
[PATCH net-next 4/8] net: ena: fix race condition between submit and completion admin command
From: Netanel Belgazal Bug: "Completion context is occupied" error printout will be noticed in dmesg. This error will cause the admin command to fail, which will lead to an ena_probe() failure or a watchdog reset (depends on which admin command failed). Root cause: __ena_com_submit_admin_cmd() is the function that submits new entries to the admin queue. The function have a check that makes sure the queue is not full and the function does not override any outstanding command. It uses head and tail indexes for this check. The head is increased by ena_com_handle_admin_completion() which runs from interrupt context, and the tail index is increased by the submit function (the function is running under ->q_lock, so there is no risk of multithread increment). Each command is associated with a completion context. This context allocated before call to __ena_com_submit_admin_cmd() and freed by ena_com_wait_and_process_admin_cq_interrupts(), right after the command was completed. This can lead to a state where the head was increased, the check passed, but the completion context is still in use. Solution: Use the atomic variable ->outstanding_cmds instead of using the head and the tail indexes. This variable is safe for use since it is bumped in get_comp_ctx() in __ena_com_submit_admin_cmd() and is freed by comp_ctxt_release() Signed-off-by: Netanel Belgazal --- drivers/net/ethernet/amazon/ena/ena_com.c | 6 ++ 1 file changed, 2 insertions(+), 4 deletions(-) diff --git a/drivers/net/ethernet/amazon/ena/ena_com.c b/drivers/net/ethernet/amazon/ena/ena_com.c index e1c2fab..ea60b9e 100644 --- a/drivers/net/ethernet/amazon/ena/ena_com.c +++ b/drivers/net/ethernet/amazon/ena/ena_com.c @@ -232,11 +232,9 @@ static struct ena_comp_ctx *__ena_com_submit_admin_cmd(struct ena_com_admin_queu tail_masked = admin_queue->sq.tail & queue_size_mask; /* In case of queue FULL */ - cnt = admin_queue->sq.tail - admin_queue->sq.head; + cnt = atomic_read(&admin_queue->outstanding_cmds); if (cnt >= admin_queue->q_depth) { - pr_debug("admin queue is FULL (tail %d head %d depth: %d)\n", -admin_queue->sq.tail, admin_queue->sq.head, -admin_queue->q_depth); + pr_debug("admin queue is full.\n"); admin_queue->stats.out_of_space++; return ERR_PTR(-ENOSPC); } -- 2.7.4
[PATCH net-next 5/8] net: ena: add missing unmap bars on device removal
From: Netanel Belgazal This patch also change the mapping functions to devm_ functions Signed-off-by: Netanel Belgazal --- drivers/net/ethernet/amazon/ena/ena_netdev.c | 15 +++ 1 file changed, 11 insertions(+), 4 deletions(-) diff --git a/drivers/net/ethernet/amazon/ena/ena_netdev.c b/drivers/net/ethernet/amazon/ena/ena_netdev.c index 1e71e89..4e9fbdd 100644 --- a/drivers/net/ethernet/amazon/ena/ena_netdev.c +++ b/drivers/net/ethernet/amazon/ena/ena_netdev.c @@ -2853,6 +2853,11 @@ static void ena_release_bars(struct ena_com_dev *ena_dev, struct pci_dev *pdev) { int release_bars; + if (ena_dev->mem_bar) + devm_iounmap(&pdev->dev, ena_dev->mem_bar); + + devm_iounmap(&pdev->dev, ena_dev->reg_bar); + release_bars = pci_select_bars(pdev, IORESOURCE_MEM) & ENA_BAR_MASK; pci_release_selected_regions(pdev, release_bars); } @@ -2940,8 +2945,9 @@ static int ena_probe(struct pci_dev *pdev, const struct pci_device_id *ent) goto err_free_ena_dev; } - ena_dev->reg_bar = ioremap(pci_resource_start(pdev, ENA_REG_BAR), - pci_resource_len(pdev, ENA_REG_BAR)); + ena_dev->reg_bar = devm_ioremap(&pdev->dev, + pci_resource_start(pdev, ENA_REG_BAR), + pci_resource_len(pdev, ENA_REG_BAR)); if (!ena_dev->reg_bar) { dev_err(&pdev->dev, "failed to remap regs bar\n"); rc = -EFAULT; @@ -2961,8 +2967,9 @@ static int ena_probe(struct pci_dev *pdev, const struct pci_device_id *ent) ena_set_push_mode(pdev, ena_dev, &get_feat_ctx); if (ena_dev->tx_mem_queue_type == ENA_ADMIN_PLACEMENT_POLICY_DEV) { - ena_dev->mem_bar = ioremap_wc(pci_resource_start(pdev, ENA_MEM_BAR), - pci_resource_len(pdev, ENA_MEM_BAR)); + ena_dev->mem_bar = devm_ioremap_wc(&pdev->dev, + pci_resource_start(pdev, ENA_MEM_BAR), + pci_resource_len(pdev, ENA_MEM_BAR)); if (!ena_dev->mem_bar) { rc = -EFAULT; goto err_device_destroy; -- 2.7.4
[PATCH net-next 0/8] Bug fixes in ena ethernet driver
From: Netanel Belgazal This patchset contains fixes for the bugs that were discovered so far. Netanel Belgazal (8): net: ena: fix rare uncompleted admin command false alarm net: ena: fix bug that might cause hang after consecutive open/close interface. net: ena: add missing return when ena_com_get_io_handlers() fails net: ena: fix race condition between submit and completion admin command net: ena: add missing unmap bars on device removal net: ena: fix theoretical Rx hang on low memory systems net: ena: disable admin msix while working in polling mode net: ena: bug fix in lost tx packets detection mechanism drivers/net/ethernet/amazon/ena/ena_com.c | 35 +++-- drivers/net/ethernet/amazon/ena/ena_ethtool.c | 2 +- drivers/net/ethernet/amazon/ena/ena_netdev.c | 179 +++--- drivers/net/ethernet/amazon/ena/ena_netdev.h | 16 ++- 4 files changed, 168 insertions(+), 64 deletions(-) -- 2.7.4
[PATCH net-next 1/8] net: ena: fix rare uncompleted admin command false alarm
From: Netanel Belgazal The current flow to detect admin completion is: while (command_not_completed) { if (timeout) error check_for_completion() sleep() } So in case the sleep took more than the timeout (in case the thread/workqueue was not scheduled due to higher priority task or prolonged VMexit), the driver can detect a stall even if the completion is present. The fix changes the order of this function to first check for completion and only after that check if the timeout expired. Signed-off-by: Netanel Belgazal --- drivers/net/ethernet/amazon/ena/ena_com.c | 21 +++-- 1 file changed, 11 insertions(+), 10 deletions(-) diff --git a/drivers/net/ethernet/amazon/ena/ena_com.c b/drivers/net/ethernet/amazon/ena/ena_com.c index 08d11ce..e1c2fab 100644 --- a/drivers/net/ethernet/amazon/ena/ena_com.c +++ b/drivers/net/ethernet/amazon/ena/ena_com.c @@ -508,15 +508,20 @@ static int ena_com_comp_status_to_errno(u8 comp_status) static int ena_com_wait_and_process_admin_cq_polling(struct ena_comp_ctx *comp_ctx, struct ena_com_admin_queue *admin_queue) { - unsigned long flags; - u32 start_time; + unsigned long flags, timeout; int ret; - start_time = ((u32)jiffies_to_usecs(jiffies)); + timeout = jiffies + ADMIN_CMD_TIMEOUT_US; + + while (1) { + spin_lock_irqsave(&admin_queue->q_lock, flags); + ena_com_handle_admin_completion(admin_queue); + spin_unlock_irqrestore(&admin_queue->q_lock, flags); - while (comp_ctx->status == ENA_CMD_SUBMITTED) { - if u32)jiffies_to_usecs(jiffies)) - start_time) > - ADMIN_CMD_TIMEOUT_US) { + if (comp_ctx->status != ENA_CMD_SUBMITTED) + break; + + if (time_is_before_jiffies(timeout)) { pr_err("Wait for completion (polling) timeout\n"); /* ENA didn't have any completion */ spin_lock_irqsave(&admin_queue->q_lock, flags); @@ -528,10 +533,6 @@ static int ena_com_wait_and_process_admin_cq_polling(struct ena_comp_ctx *comp_c goto err; } - spin_lock_irqsave(&admin_queue->q_lock, flags); - ena_com_handle_admin_completion(admin_queue); - spin_unlock_irqrestore(&admin_queue->q_lock, flags); - msleep(100); } -- 2.7.4
Re: [PATCH net] Fix an intermittent pr_emerg warning about lo becoming free.
> Wei Wan is actually working on a patch series removing all this > dst_garbage list stuff. Yes. I am working on removing the dst garbage collector completely. dst_dev_event() will be removed from the list of callbacks that the netdevice notifiers invoke in my patch series. On Thu, Jun 8, 2017 at 2:06 PM, Eric Dumazet wrote: > On Thu, 2017-06-08 at 13:12 -0700, Krister Johansen wrote: > > ... > >> Looking at the gc_task intervals, they started at 663ms when we invoked >> __dst_free(). After that, they increased to 1663, 3136, 5567, 8191, >> 10751, and 14848. The release that set the refcnt to 0 on our dst entry >> occurred after the gc_task was enqueued for 14 second interval so we had >> to wait longer than the warning time in wait_allrefs in order for the >> dst entry to get free'd and the hold on 'lo' to be released. >> >> A simple solution to this problem is to have dst_dev_event() reset the >> gc timer, which causes us to process this list shortly after the >> gc_mutex is relased when dst_dev_event() completes. >> >> Signed-off-by: Krister Johansen >> --- >> net/core/dst.c | 14 ++ >> 1 file changed, 14 insertions(+) >> >> diff --git a/net/core/dst.c b/net/core/dst.c >> index 6192f11..13ba4a0 100644 >> --- a/net/core/dst.c >> +++ b/net/core/dst.c >> @@ -469,6 +469,20 @@ static int dst_dev_event(struct notifier_block *this, >> unsigned long event, >> spin_lock_bh(&dst_garbage.lock); >> dst = dst_garbage.list; >> dst_garbage.list = NULL; >> + /* The code in dst_ifdown places a hold on the loopback device. >> + * If the gc entry processing is set to expire after a lengthy >> + * interval, this hold can cause netdev_wait_allrefs() to hang >> + * out and wait for a long time -- until the the loopback >> + * interface is released. If we're really unlucky, it'll emit >> + * pr_emerg messages to console too. Reset the interval here, >> + * so dst cleanups occur in a more timely fashion. >> + */ >> + if (dst_garbage.timer_inc > DST_GC_INC) { >> + dst_garbage.timer_inc = DST_GC_INC; >> + dst_garbage.timer_expires = DST_GC_MIN; >> + mod_delayed_work(system_wq, &dst_gc_work, >> + dst_garbage.timer_expires); >> + } >> spin_unlock_bh(&dst_garbage.lock); >> >> if (last) > > Looks very nice to me ! > > Acked-by: Eric Dumazet > > Wei Wan is actually working on a patch series removing all this > dst_garbage list stuff. > > >
[PATCH 2/2(net.git)] stmmac: fix for hw timestamp of GMAC3 unit
>From d5c520880a5f6b470cb150b9aae67341089b9395 Mon Sep 17 00:00:00 2001 From: Mario Molitor Date: Thu, 8 Jun 2017 23:03:09 +0200 Subject: [PATCH 2/2] stmmac: fix for hw timestamp of GMAC3 unit 1.) Bugfix of function stmmac_get_tx_hwtstamp. Corrected the tx timestamp available check (same as 4.8 and older) Change printout from info syslevel to debug. 2.) Bugfix of function stmmac_get_rx_hwtstamp. Corrected the rx timestamp available check (same as 4.8 and older) Change printout from info syslevel to debug. Fixes: ba1ffd74df74 ("stmmac: fix PTP support for GMAC4") Signed-off-by: Mario Molitor --- drivers/net/ethernet/stmicro/stmmac/dwmac4_descs.c | 11 +++ drivers/net/ethernet/stmicro/stmmac/stmmac_main.c | 10 +- 2 files changed, 12 insertions(+), 9 deletions(-) diff --git a/drivers/net/ethernet/stmicro/stmmac/dwmac4_descs.c b/drivers/net/ethernet/stmicro/stmmac/dwmac4_descs.c index aa64764..e0ef02f 100644 --- a/drivers/net/ethernet/stmicro/stmmac/dwmac4_descs.c +++ b/drivers/net/ethernet/stmicro/stmmac/dwmac4_descs.c @@ -214,13 +214,13 @@ static int dwmac4_wrback_get_tx_timestamp_status(struct dma_desc *p) { /* Context type from W/B descriptor must be zero */ if (le32_to_cpu(p->des3) & TDES3_CONTEXT_TYPE) - return -EINVAL; + return 0; /* Tx Timestamp Status is 1 so des0 and des1'll have valid values */ if (le32_to_cpu(p->des3) & TDES3_TIMESTAMP_STATUS) - return 0; + return 1; - return 1; + return 0; } static inline u64 dwmac4_get_timestamp(void *desc, u32 ats) @@ -282,7 +282,10 @@ static int dwmac4_wrback_get_rx_timestamp_status(void *desc, u32 ats) } } exit: - return ret; + if (likely(ret == 0)) + return 1; + + return 0; } static void dwmac4_rd_init_rx_desc(struct dma_desc *p, int disable_rx_ic, diff --git a/drivers/net/ethernet/stmicro/stmmac/stmmac_main.c b/drivers/net/ethernet/stmicro/stmmac/stmmac_main.c index d54e5d7..d16d11b 100644 --- a/drivers/net/ethernet/stmicro/stmmac/stmmac_main.c +++ b/drivers/net/ethernet/stmicro/stmmac/stmmac_main.c @@ -434,14 +434,14 @@ static void stmmac_get_tx_hwtstamp(struct stmmac_priv *priv, return; /* check tx tstamp status */ - if (!priv->hw->desc->get_tx_timestamp_status(p)) { + if (priv->hw->desc->get_tx_timestamp_status(p)) { /* get the valid tstamp */ ns = priv->hw->desc->get_timestamp(p, priv->adv_ts); memset(&shhwtstamp, 0, sizeof(struct skb_shared_hwtstamps)); shhwtstamp.hwtstamp = ns_to_ktime(ns); - netdev_info(priv->dev, "get valid TX hw timestamp %llu\n", ns); + netdev_dbg(priv->dev, "get valid TX hw timestamp %llu\n", ns); /* pass tstamp to stack */ skb_tstamp_tx(skb, &shhwtstamp); } @@ -468,19 +468,19 @@ static void stmmac_get_rx_hwtstamp(struct stmmac_priv *priv, struct dma_desc *p, return; /* Check if timestamp is available */ - if (!priv->hw->desc->get_rx_timestamp_status(p, priv->adv_ts)) { + if (priv->hw->desc->get_rx_timestamp_status(p, priv->adv_ts)) { /* For GMAC4, the valid timestamp is from CTX next desc. */ if (priv->plat->has_gmac4) ns = priv->hw->desc->get_timestamp(np, priv->adv_ts); else ns = priv->hw->desc->get_timestamp(p, priv->adv_ts); - netdev_info(priv->dev, "get valid RX hw timestamp %llu\n", ns); + netdev_dbg(priv->dev, "get valid RX hw timestamp %llu\n", ns); shhwtstamp = skb_hwtstamps(skb); memset(shhwtstamp, 0, sizeof(struct skb_shared_hwtstamps)); shhwtstamp->hwtstamp = ns_to_ktime(ns); } else { - netdev_err(priv->dev, "cannot get RX hw timestamp\n"); + netdev_dbg(priv->dev, "cannot get RX hw timestamp\n"); } } -- 2.7.4
[PATCH 1/2(net.git)] stmmac: fix ptp header for GMAC3 hw timestamp
>From ce9c334037fce37ccd715124cda57d1fd6d8cfe8 Mon Sep 17 00:00:00 2001 From: Mario Molitor Date: Thu, 8 Jun 2017 22:41:02 +0200 Subject: [PATCH 1/2] stmmac: fix ptp header for GMAC3 hw timestamp According the CYCLON V documention only the bit 16 of snaptypesel should set. (more information see Table 17-20 (cv_5v4.pdf) : Timestamp Snapshot Dependency on Register Bits) Fixes: d2042052a0aa ("stmmac: update the PTP header file") Signed-off-by: Mario Molitor --- drivers/net/ethernet/stmicro/stmmac/stmmac_main.c | 15 --- drivers/net/ethernet/stmicro/stmmac/stmmac_ptp.h | 3 ++- 2 files changed, 14 insertions(+), 4 deletions(-) diff --git a/drivers/net/ethernet/stmicro/stmmac/stmmac_main.c b/drivers/net/ethernet/stmicro/stmmac/stmmac_main.c index 12236da..d54e5d7 100644 --- a/drivers/net/ethernet/stmicro/stmmac/stmmac_main.c +++ b/drivers/net/ethernet/stmicro/stmmac/stmmac_main.c @@ -546,7 +546,10 @@ static int stmmac_hwtstamp_ioctl(struct net_device *dev, struct ifreq *ifr) /* PTP v1, UDP, any kind of event packet */ config.rx_filter = HWTSTAMP_FILTER_PTP_V1_L4_EVENT; /* take time stamp for all event messages */ - snap_type_sel = PTP_TCR_SNAPTYPSEL_1; + if (priv->plat->has_gmac4) + snap_type_sel = PTP_GMAC4_TCR_SNAPTYPSEL_1; + else + snap_type_sel = PTP_TCR_SNAPTYPSEL_1; ptp_over_ipv4_udp = PTP_TCR_TSIPV4ENA; ptp_over_ipv6_udp = PTP_TCR_TSIPV6ENA; @@ -578,7 +581,10 @@ static int stmmac_hwtstamp_ioctl(struct net_device *dev, struct ifreq *ifr) config.rx_filter = HWTSTAMP_FILTER_PTP_V2_L4_EVENT; ptp_v2 = PTP_TCR_TSVER2ENA; /* take time stamp for all event messages */ - snap_type_sel = PTP_TCR_SNAPTYPSEL_1; + if (priv->plat->has_gmac4) + snap_type_sel = PTP_GMAC4_TCR_SNAPTYPSEL_1; + else + snap_type_sel = PTP_TCR_SNAPTYPSEL_1; ptp_over_ipv4_udp = PTP_TCR_TSIPV4ENA; ptp_over_ipv6_udp = PTP_TCR_TSIPV6ENA; @@ -612,7 +618,10 @@ static int stmmac_hwtstamp_ioctl(struct net_device *dev, struct ifreq *ifr) config.rx_filter = HWTSTAMP_FILTER_PTP_V2_EVENT; ptp_v2 = PTP_TCR_TSVER2ENA; /* take time stamp for all event messages */ - snap_type_sel = PTP_TCR_SNAPTYPSEL_1; + if (priv->plat->has_gmac4) + snap_type_sel = PTP_GMAC4_TCR_SNAPTYPSEL_1; + else + snap_type_sel = PTP_TCR_SNAPTYPSEL_1; ptp_over_ipv4_udp = PTP_TCR_TSIPV4ENA; ptp_over_ipv6_udp = PTP_TCR_TSIPV6ENA; diff --git a/drivers/net/ethernet/stmicro/stmmac/stmmac_ptp.h b/drivers/net/ethernet/stmicro/stmmac/stmmac_ptp.h index 48fb72f..f4b31d6 100644 --- a/drivers/net/ethernet/stmicro/stmmac/stmmac_ptp.h +++ b/drivers/net/ethernet/stmicro/stmmac/stmmac_ptp.h @@ -59,7 +59,8 @@ /* Enable Snapshot for Messages Relevant to Master */ #definePTP_TCR_TSMSTRENA BIT(15) /* Select PTP packets for Taking Snapshots */ -#definePTP_TCR_SNAPTYPSEL_1GENMASK(17, 16) +#definePTP_TCR_SNAPTYPSEL_1BIT(16) +#definePTP_GMAC4_TCR_SNAPTYPSEL_1 GENMASK(17, 16) /* Enable MAC address for PTP Frame Filtering */ #definePTP_TCR_TSENMACADDR BIT(18) -- 2.7.4
Re: Repeatable inet6_dump_fib crash in stock 4.12.0-rc4+
On 06/06/2017 09:19 PM, Eric Dumazet wrote: On Tue, 2017-06-06 at 18:34 -0600, David Ahern wrote: On 6/6/17 6:27 PM, Eric Dumazet wrote: Good catch, but it looks like similar fix is needed a few lines before. diff --git a/net/ipv6/ip6_fib.c b/net/ipv6/ip6_fib.c index deea901746c8570c5e801e40592c91e3b62812e0..b214443dc8346cef3690df7f27cc48a864028865 100644 --- a/net/ipv6/ip6_fib.c +++ b/net/ipv6/ip6_fib.c @@ -372,12 +372,13 @@ static int fib6_dump_table(struct fib6_table *table, struct sk_buff *skb, read_lock_bh(&table->tb6_lock); res = fib6_walk(net, w); - read_unlock_bh(&table->tb6_lock); if (res > 0) { cb->args[4] = 1; cb->args[5] = w->root->fn_sernum; } + read_unlock_bh(&table->tb6_lock); indeed. tunnel vision on Ben's problem BTW, bug was already Ben's problem when Patrick tried to fix it in commit 2bec5a369ee79 ("ipv6: fib: fix crash when changing large fib while dumping it") seven years ago ;) As far as I can tell, the patch did not help, or at least we still reproduce the crash easily. ct524-ffb0 login: BUG: unable to handle kernel NULL pointer dereference at 0018 IP: fib6_walk_continue+0x76/0x180 [ipv6] PGD 3ec59a067 P4D 3ec59a067 PUD 3eb939067 PMD 0 Oops: [#1] PREEMPT SMP Modules linked in: nf_conntrack_netlink nf_conntrack nfnetlink nf_defrag_ipv4 libcrc32c bridge stp llc veth bnep fuse macvlan pktgen cfg80211 ipmi_ssif iTCO_wdt iTCO_vendor_support coretemp intel_rapl x86_pkg_temp_thermal intel_powerclamp kvm_intel kvm irqbypass joydev i2c_i801 ie31200_edac intel_pch_thermal shpchp hci_uart ipmi_si btbcm btqca ipmi_devintf btintel ipmi_msghandler pinctrl_sunrisepoint bluetooth intel_lpss_acpi acpi_als video pinctrl_intel intel_lpss kfifo_buf tpm_tis tpm_tis_core industrialio acpi_power_meter tpm acpi_pad sch_fq_codel nfsd auth_rpcgss nfs_acl lockd grace sunrpc ast drm_kms_helper ttm drm igb hwmon ptp pps_core dca i2c_algo_bit i2c_hid i2c_core ipv6 crc_ccitt [last unloaded: nfnetlink] CPU: 3 PID: 2185 Comm: ip Not tainted 4.12.0-rc4+ #32 Hardware name: Supermicro Super Server/X11SSM-F, BIOS 1.0b 12/29/2015 task: 8803e87fd940 task.stack: c90009ae8000 RIP: 0010:fib6_walk_continue+0x76/0x180 [ipv6] RSP: 0018:c90009aebbc0 EFLAGS: 00010287 RAX: 880460df8ca0 RBX: 8803f20a2c60 RCX: RDX: RSI: c90009aebc00 RDI: 81eee280 RBP: c90009aebbc8 R08: 0008 R09: 8803e87b47cd R10: c90009aebb70 R11: R12: 0001 R13: 0001 R14: 8803f20a2c60 R15: 8803ec601f80 FS: 7f43520ee700() GS:8804778c() knlGS: CS: 0010 DS: ES: CR0: 80050033 CR2: 0018 CR3: 0003ebb46000 CR4: 003406e0 DR0: DR1: DR2: DR3: DR6: fffe0ff0 DR7: 0400 Call Trace: inet6_dump_fib+0x1ab/0x2a0 [ipv6] netlink_dump+0x11d/0x290 netlink_recvmsg+0x260/0x3f0 sock_recvmsg+0x38/0x40 ___sys_recvmsg+0xe9/0x230 ? alloc_pages_vma+0x9d/0x260 ? page_add_new_anon_rmap+0x88/0xc0 ? lru_cache_add_active_or_unevictable+0x31/0xb0 ? __handle_mm_fault+0xce3/0xf70 __sys_recvmsg+0x3d/0x70 ? __sys_recvmsg+0x3d/0x70 SyS_recvmsg+0xd/0x20 do_syscall_64+0x56/0xc0 entry_SYSCALL64_slow_path+0x25/0x25 RIP: 0033:0x7f4351a23050 RSP: 002b:7ffdb1bfafb8 EFLAGS: 0246 ORIG_RAX: 002f RAX: ffda RBX: RCX: 7f4351a23050 RDX: RSI: 7ffdb1bfb020 RDI: 0004 RBP: 7ffdb1bff044 R08: 3fe4 R09: R10: 7ffdb1bfb060 R11: 0246 R12: 0064f360 R13: 7ffdb1bff0b0 R14: 3fe4 R15: Code: f6 40 2a 04 74 11 8b 53 30 85 d2 0f 84 02 01 00 00 83 ea 01 89 53 30 c7 43 28 04 00 00 00 48 39 43 10 74 33 48 8b 10 48 89 53 18 <48> 39 42 18 0f 84 a3 00 00 00 48 39 42 08 0f 84 ae 00 00 00 48 RIP: fib6_walk_continue+0x76/0x180 [ipv6] RSP: c90009aebbc0 CR2: 0018 ---[ end trace 06ac9dee8b14db6b ]--- Kernel panic - not syncing: Fatal exception in interrupt Kernel Offset: disabled (gdb) l *(fib6_walk_continue+0x76) 0x188c6 is in fib6_walk_continue (/home/greearb/git/linux-2.6/net/ipv6/ip6_fib.c:1593). 1588if (fn == w->root) 1589return 0; 1590pn = fn->parent; 1591w->node = pn; 1592#ifdef CONFIG_IPV6_SUBTREES 1593if (FIB6_SUBTREE(pn) == fn) { 1594WARN_ON(!(fn->fn_flags & RTN_ROOT)); 1595w->state = FWS_L; 1596continue; 1597} (gdb) l *(inet6_dump_fib+0x1ab) 0x1939b is in inet6_dump_fib
Aw: Re: [PATCH 2/2(net.git)] stmmac: fix for hw timestamp of GMAC3 unit
Hello David and Andy, thanks for review response. I will fix the patches with your responses. Thanks, Mario
Re: [RFC PATCH net-next 2/5] bpf/verifier: rework value tracking
On Thu, Jun 08, 2017 at 08:38:29PM +0100, Edward Cree wrote: > On 08/06/17 17:45, Alexei Starovoitov wrote: > > On Thu, Jun 08, 2017 at 03:53:36PM +0100, Edward Cree wrote: > > -} else if (reg->type == FRAME_PTR || reg->type == PTR_TO_STACK) > { > +} else if (reg->type == PTR_TO_STACK) { > +/* stack accesses must be at a fixed offset, so that we > can > + * determine what type of data were returned. > + */ > +if (reg->align.mask) { > +char tn_buf[48]; > + > +tn_strn(tn_buf, sizeof(tn_buf), reg->align); > +verbose("variable stack access align=%s off=%d > size=%d", > +tn_buf, off, size); > +return -EACCES; > >>> hmm. why this restriction? > >>> I thought one of key points of the diff that ptr+var tracking logic > >>> will now apply not only to map_value, but to stack_ptr as well? > >> As the comment above it says, we need to determine what was returned: > >> was it STACK_MISC or STACK_SPILL, and if the latter, what kind of pointer > >> was spilled there? See check_stack_read(), which I should probably > >> mention in the comment. > > this piece of code is not only spill/fill, but normal ldx/stx stack access. > > Consider the frequent pattern that many folks tried to do: > > bpf_prog() > > { > > char buf[64]; > > int len; > > > > bpf_probe_read(&len, sizeof(len), kernel_ptr_to_filename_len); > > bpf_probe_read(buf, sizeof(buf), kernel_ptr_to_filename); > > buf[len & (sizeof(buf) - 1)] = 0; > > ... > > > > currently above is not supported, but when 'buf' is a pointer to map value > > it works fine. Allocating extra bpf map just to do such workaround > > isn't nice and since this patch generalized map_value_adj with ptr_to_stack > > we can support above code too. > > We can check that all bytes of stack for this variable access were > > initialized already. > > In the example above it will happen by bpf_probe_read (in the verifier > > code): > > for (i = 0; i < meta.access_size; i++) { > > err = check_mem_access(env, meta.regno, i, BPF_B, > > BPF_WRITE, -1); > > so at the time of > > buf[len & ..] = 0 > > we can check that 'stx' is within the range of inited stack and allow it. > Yes, we could check every byte of the stack within the range [buf, buf+63] > is a STACK_MISC and if so allow it. But since this is not supported by the > existing code (so it's not a regression), I'd prefer to leave that for a > future patch - this one is quite big enough already ;-) of course! just exploring. > +if (!err && size < BPF_REG_SIZE && value_regno >= 0 && t == > BPF_READ && > +state->regs[value_regno].type == SCALAR_VALUE) { > +/* b/h/w load zero-extends, mark upper bits as known 0 > */ > +state->regs[value_regno].align.value &= (1ULL << (size > * 8)) - 1; > +state->regs[value_regno].align.mask &= (1ULL << (size * > 8)) - 1; > >>> probably another helper from tnum.h is needed. > >> I could rewrite as > >> reg->align = tn_and(reg->align, tn_const((1ULL << (size * 8)) - 1)) > > yep. that's perfect. > In the end I settled on adding a helper > struct tnum tnum_cast(struct tnum a, u8 size); > since I have a bunch of other places that cast things to 32 bits. sounds good to me > > I see. May be print verifier state in such warn_ons and make error > > more human readable? > Good idea, I'll do that. > +case PTR_TO_MAP_VALUE_OR_NULL: > >>> does this new state comparison logic helps? Do you have any numbers > >>> before/after in the number of insns it had to process for the tests in > >>> selftests ? > >> I don't have the numbers, no (I'll try to collect them). This rewrite was > > Thanks. The main concern is that right now some complex programs > > that cilium is using are close to the verifier complexity limit and these > > big changes to amount of info recognized by the verifier can cause pruning > > to be ineffective, so we need to test on big programs. > > I think Daniel will be happy to test your next rev of the patches. > > I'll test them as well. > > At least 'insn_processed' from C code in tools/testing/selftests/bpf/ > > is a good estimate of how these changes affect pruning. > It looks like the only place this gets recorded is as "processed %d insns" > in the log_buf. Is there a convenient way to get at this, or am I going > to have to make bpf_verify_program grovel through the log sscanf()ing for > a matching line? typically we just run the tests with hacked log_level and grep. similar stuff Dave did in test_align.c
Re: [RFC PATCH net-next 3/5] bpf/verifier: feed pointer-to-unknown-scalar casts into scalar ALU path
On Thu, Jun 08, 2017 at 08:07:53PM +0100, Edward Cree wrote: > On 08/06/17 19:41, Alexei Starovoitov wrote: > > On Thu, Jun 08, 2017 at 06:12:39PM +0100, Edward Cree wrote: > >> On 08/06/17 17:50, Alexei Starovoitov wrote: > >>> On Thu, Jun 08, 2017 at 04:25:39PM +0100, Edward Cree wrote: > On 08/06/17 03:35, Alexei Starovoitov wrote: > > such large back and forth move doesn't help reviewing. > > may be just merge it into previous patch? > > Or keep that function in the right place in patch 2 already? > I think 'diff' got a bit confused, and maybe with different options I > could > have got it to produce something more readable. But I think I will just > merge this into patch 2; it's only separate because it started out as an > experiment. > >>> after sleeping on it I'm not sure we should be allowing such pointer > >>> arithmetic. In normal C code people do fancy tricks with lower 3 bits > >>> of the pointer, but in bpf code I cannot see such use case. > >>> What kind of realistic code will be doing ptr & 0x40 ? > >> Well, I didn't support it because I saw a use case. I supported it because > >> it seemed easy to do and the code came out reasonably elegant-looking. > >> Since this is guarded by env->allow_ptr_leaks, I can't see any reason _not_ > >> to let people try fancy tricks with the low bits of pointers. > >> I agree ptr & 0x40 is a crazy thing with no imaginable use case, but... > >> "Unix was not designed to stop its users from doing stupid things, as that > >> would also stop them from doing clever things." ;-) > > well, I agree with the philosophy :) but I also see few reasons not to > > allow it: > > 1. it immediately becomes uapi and if later we find out that it's > > preventing us > > to do something we actually really need we'll be stuck looking for > > workaround > What could it prevent us from doing, though? It's basically equivalent to > giving > BPF an opcode that casts a pointer to a u64, which of course is only allowed > if > allow_ptr_leaks is true. And since we don't feed any knowledge about the > pointer > into the verifier, it's just like any other way of filling a register with > arbitrary, unknown bits. > I can fully appreciate why you're being cautious, what with uapi and all. > But I > don't think there's any actual problem here. Open to being convinced, > though. The leaking is not a concern. It's if we started accepting a certain class of programs we need to keep accepting them in the future. Another reason is 'ptr & mask' could have been simply a bug and rejecting it suppose to help users find issues sooner... but I don't have a strong opinion here. > > 2. it's the same pruning concern. probably doesn't fully apply here, but > > the reason we don't track 'if (reg == 1) ...' > Don't we though? > http://elixir.free-electrons.com/linux/v4.12-rc4/source/kernel/bpf/verifier.c#L2127 > > is if we mark that > > register as known const_imm in the true branch, it will screw up > > pruning quite badly. It's trivial to track and may seem useful, > > but hurts instead. > (Thinking out loud...) > > What would be really nice is a way to propagate limits backwards as well as > forwards, so that the verifier can say "when I tested this branch, I used > this part of the state, I read four bytes past this pointer". Then when it > wants to prune, it can say "well, the state this time isn't as strong, but > it still satisfies everything I actually used". > But that sounds like it would be very hard indeed to do. that's more or less what i'm trying to do. liveness info per basic block will trim the state. > Maybe with the basic-block DAG stuff David's been talking about, we could > find all the paths that reach a block, and take the union of their states, > and then run through the block feeding it that combined state. But that > could reject code that relies on correlation of the state (i.e. if r1 != 0 > then r2 is valid ptr I can access, etc) so would still need the 'walk with > each individual state' as a fallback. Though at least you'd have all the > states at once so you could find out which ones were subsumed, instead of > hoping you get to them in the right order. I think it's important to optimize verification speed for good programs. If bad program takes slightly longer, not a big deal. Right now we have global lock which needs to go away, but that's a minor fix. In that sense I see that combining the state can help find bad programs sooner, but I don't see it's helping good programs. Also we already have programs like: if (...) { var1 = ptr var2 = size } else { var1 = different ptr var2 = different size } call_helper(...var1, var2) So the state needs to be considered together. Cannot just mix and match. Initially I was thinking to build Use/Def chains for all operands of loads, stores and calls and follow them from Use spot to all Defs recursively to determine validity, but above use case brea
Re: [PATCH net] Fix an intermittent pr_emerg warning about lo becoming free.
On Thu, 2017-06-08 at 13:12 -0700, Krister Johansen wrote: ... > Looking at the gc_task intervals, they started at 663ms when we invoked > __dst_free(). After that, they increased to 1663, 3136, 5567, 8191, > 10751, and 14848. The release that set the refcnt to 0 on our dst entry > occurred after the gc_task was enqueued for 14 second interval so we had > to wait longer than the warning time in wait_allrefs in order for the > dst entry to get free'd and the hold on 'lo' to be released. > > A simple solution to this problem is to have dst_dev_event() reset the > gc timer, which causes us to process this list shortly after the > gc_mutex is relased when dst_dev_event() completes. > > Signed-off-by: Krister Johansen > --- > net/core/dst.c | 14 ++ > 1 file changed, 14 insertions(+) > > diff --git a/net/core/dst.c b/net/core/dst.c > index 6192f11..13ba4a0 100644 > --- a/net/core/dst.c > +++ b/net/core/dst.c > @@ -469,6 +469,20 @@ static int dst_dev_event(struct notifier_block *this, > unsigned long event, > spin_lock_bh(&dst_garbage.lock); > dst = dst_garbage.list; > dst_garbage.list = NULL; > + /* The code in dst_ifdown places a hold on the loopback device. > + * If the gc entry processing is set to expire after a lengthy > + * interval, this hold can cause netdev_wait_allrefs() to hang > + * out and wait for a long time -- until the the loopback > + * interface is released. If we're really unlucky, it'll emit > + * pr_emerg messages to console too. Reset the interval here, > + * so dst cleanups occur in a more timely fashion. > + */ > + if (dst_garbage.timer_inc > DST_GC_INC) { > + dst_garbage.timer_inc = DST_GC_INC; > + dst_garbage.timer_expires = DST_GC_MIN; > + mod_delayed_work(system_wq, &dst_gc_work, > + dst_garbage.timer_expires); > + } > spin_unlock_bh(&dst_garbage.lock); > > if (last) Looks very nice to me ! Acked-by: Eric Dumazet Wei Wan is actually working on a patch series removing all this dst_garbage list stuff.
[PATCHv5] wlcore: add wl1285 compatible
Motorola Droid 4 uses a WL 1285C. With differences between chips not being public let's add explicit binding for wl1285 instead of relying on wl1283 being very similar. Reviewed-by: Rob Herring Acked-by: Kalle Valo Acked-by: Tony Lindgren Signed-off-by: Sebastian Reichel --- Changes since PATCHv4: - Dropped droid4.dts change, patch can go in normally now Changes since PATCHv3: - add netdev@vger.kernel.org to cc - add Acked-By from Tony & Kalle Changes since PATCHv2: - merge patch for DTS and driver - add Acked-By from Rob Changes since PATCHv1: - patches did not exist in patchv1 --- Documentation/devicetree/bindings/net/wireless/ti,wlcore.txt | 1 + drivers/net/wireless/ti/wlcore/sdio.c| 1 + drivers/net/wireless/ti/wlcore/spi.c | 1 + 3 files changed, 3 insertions(+) diff --git a/Documentation/devicetree/bindings/net/wireless/ti,wlcore.txt b/Documentation/devicetree/bindings/net/wireless/ti,wlcore.txt index 2a3d90de18ee..7b2cbb14113e 100644 --- a/Documentation/devicetree/bindings/net/wireless/ti,wlcore.txt +++ b/Documentation/devicetree/bindings/net/wireless/ti,wlcore.txt @@ -10,6 +10,7 @@ Required properties: * "ti,wl1273" * "ti,wl1281" * "ti,wl1283" +* "ti,wl1285" * "ti,wl1801" * "ti,wl1805" * "ti,wl1807" diff --git a/drivers/net/wireless/ti/wlcore/sdio.c b/drivers/net/wireless/ti/wlcore/sdio.c index 287023ef4a78..2fb38717346f 100644 --- a/drivers/net/wireless/ti/wlcore/sdio.c +++ b/drivers/net/wireless/ti/wlcore/sdio.c @@ -237,6 +237,7 @@ static const struct of_device_id wlcore_sdio_of_match_table[] = { { .compatible = "ti,wl1273", .data = &wl127x_data }, { .compatible = "ti,wl1281", .data = &wl128x_data }, { .compatible = "ti,wl1283", .data = &wl128x_data }, + { .compatible = "ti,wl1285", .data = &wl128x_data }, { .compatible = "ti,wl1801", .data = &wl18xx_data }, { .compatible = "ti,wl1805", .data = &wl18xx_data }, { .compatible = "ti,wl1807", .data = &wl18xx_data }, diff --git a/drivers/net/wireless/ti/wlcore/spi.c b/drivers/net/wireless/ti/wlcore/spi.c index f949ad2bd898..1f5d9ebb0925 100644 --- a/drivers/net/wireless/ti/wlcore/spi.c +++ b/drivers/net/wireless/ti/wlcore/spi.c @@ -433,6 +433,7 @@ static const struct of_device_id wlcore_spi_of_match_table[] = { { .compatible = "ti,wl1273", .data = &wl127x_data}, { .compatible = "ti,wl1281", .data = &wl128x_data}, { .compatible = "ti,wl1283", .data = &wl128x_data}, + { .compatible = "ti,wl1285", .data = &wl128x_data}, { .compatible = "ti,wl1801", .data = &wl18xx_data}, { .compatible = "ti,wl1805", .data = &wl18xx_data}, { .compatible = "ti,wl1807", .data = &wl18xx_data}, -- 2.11.0
Re: [PATCH net] ipv4: igmp: fix a use after free
On Fri, 2017-06-09 at 02:22 +0800, Xin Long wrote: > On Thu, Jun 8, 2017 at 9:43 PM, Eric Dumazet wrote: > > From: Eric Dumazet > > > > Andrey reported a use-after-free in add_grec(), courtesy of syzkaller. > > > > Problem here is that igmp_stop_timer() uses a del_timer(), so we can not > > guarantee that another cpu is not servicing the timer. > > > > Therefore, if igmp_group_dropped() call from ip_mc_dec_group() is > > immediately followed by ip_mc_clear_src(), ip_mc_clear_src() might free > > memory that could be used by the other cpu servicing the timer. > > > > To fix this issue, we should defer the memory freeing > > (ip_mc_clear_src()) to the point all references to (struct > > ip_mc_list)->refcnt have been released. > > This happens in ip_ma_put() > > > > > > == > > BUG: KASAN: use-after-free in add_grec+0x101e/0x1090 net/ipv4/igmp.c:473 > > Read of size 8 at addr 88003053c1a0 by task swapper/0/0 > > > > CPU: 0 PID: 0 Comm: swapper/0 Not tainted 4.12.0-rc3+ #370 > > Hardware name: QEMU Standard PC (i440FX + PIIX, 1996), BIOS Bochs 01/01/2011 > > Call Trace: > > > > __dump_stack lib/dump_stack.c:16 [inline] > > dump_stack+0x292/0x395 lib/dump_stack.c:52 > > print_address_description+0x73/0x280 mm/kasan/report.c:252 > > kasan_report_error mm/kasan/report.c:351 [inline] > > kasan_report+0x22b/0x340 mm/kasan/report.c:408 > > __asan_report_load8_noabort+0x14/0x20 mm/kasan/report.c:429 > > add_grec+0x101e/0x1090 net/ipv4/igmp.c:473 > > igmpv3_send_cr net/ipv4/igmp.c:663 [inline] > > igmp_ifc_timer_expire+0x46d/0xa80 net/ipv4/igmp.c:768 > the call trace is igmp_ifc_timer_expire -> igmpv3_send_cr -> add_grec > and the timer should be in_dev->mr_ifc_timer. > but igmp_stop_timer you mentioned is used to stop im->timer > > It's possible that ip_mc_clear_src is done in ip_ma_put() > while igmp_ifc_timer_expire is still using ip_mc_list under > rcu_read_lock(). no ? You might be right. I looked at the freeing side > kfree+0xe8/0x2b0 mm/slub.c:3882 > ip_mc_clear_src+0x69/0x1c0 net/ipv4/igmp.c:2078 > ip_mc_dec_group+0x19a/0x470 net/ipv4/igmp.c:1618 > ip_mc_drop_socket+0x145/0x230 net/ipv4/igmp.c:2609 > inet_release+0x4e/0x1c0 net/ipv4/af_inet.c:411 > sock_release+0x8d/0x1e0 net/socket.c:597 > sock_close+0x16/0x20 net/socket.c:1072 Then I tried to catch a problem happening on another cpu, and found one. I mentioned (in https://lkml.org/lkml/2017/5/31/619 ) that we might need to defer freeing after rcu grace period but for some reason decided it was not needed. What about : diff --git a/net/ipv4/igmp.c b/net/ipv4/igmp.c index 44fd86de2823dd17de16276a8ec01b190e69b8b4..80932880af861046849d7dbac5f5aa0a1117f166 100644 --- a/net/ipv4/igmp.c +++ b/net/ipv4/igmp.c @@ -171,12 +171,20 @@ static void ip_mc_clear_src(struct ip_mc_list *pmc); static int ip_mc_add_src(struct in_device *in_dev, __be32 *pmca, int sfmode, int sfcount, __be32 *psfsrc, int delta); + +static void ip_mc_list_reclaim(struct rcu_head *head) +{ + struct ip_mc_list *im = container_of(head, struct ip_mc_list, rcu); + + ip_mc_clear_src(im); + in_dev_put(im->interface); + kfree(im); +} + static void ip_ma_put(struct ip_mc_list *im) { - if (atomic_dec_and_test(&im->refcnt)) { - in_dev_put(im->interface); - kfree_rcu(im, rcu); - } + if (atomic_dec_and_test(&im->refcnt)) + call_rcu(&im->rcu, ip_mc_list_reclaim); } #define for_each_pmc_rcu(in_dev, pmc) \ @@ -1615,7 +1623,6 @@ void ip_mc_dec_group(struct in_device *in_dev, __be32 addr) *ip = i->next_rcu; in_dev->mc_count--; igmp_group_dropped(i); - ip_mc_clear_src(i); if (!in_dev->dead) ip_rt_multicast_event(in_dev);
Re: [RFC PATCH net-next 0/5] bpf: rewrite value tracking in verifier
From: Edward Cree Date: Wed, 7 Jun 2017 15:55:57 +0100 > This series simplifies alignment tracking, generalises bounds tracking and > fixes some bounds-tracking bugs in the BPF verifier. Pointer arithmetic on > packet pointers, stack pointers, map value pointers and context pointers has > been unified, and bounds on these pointers are only checked when the pointer > is dereferenced. > Operations on pointers which destroy all relation to the original pointer > (such as multiplies and shifts) are disallowed if !env->allow_ptr_leaks, > otherwise they convert the pointer to an unknown scalar and feed it to the > normal scalar arithmetic handling. > Pointer types have been unified with the corresponding adjusted-pointer types > where those existed (e.g. PTR_TO_MAP_VALUE[_ADJ] or FRAME_PTR vs > PTR_TO_STACK); similarly, CONST_IMM and UNKNOWN_VALUE have been unified into > SCALAR_VALUE. > Pointer types (except CONST_PTR_TO_MAP, PTR_TO_MAP_VALUE_OR_NULL and > PTR_TO_PACKET_END, which do not allow arithmetic) have a 'fixed offset' and > a 'variable offset'; the former is used when e.g. adding an immediate or a > known-constant register, as long as it does not overflow. Otherwise the > latter is used, and any operation creating a new variable offset creates a > new 'id' (and, for PTR_TO_PACKET, clears the 'range'). > SCALAR_VALUEs use the 'variable offset' fields to track the range of possible > values; the 'fixed offset' should never be set on a scalar. > > Patch 2/5 is rather on the big side, but since it changes the contents and > semantics of a fairly central data structure, I'm not really sure how to go > about splitting it up further without producing broken intermediate states. > > With the changes in patch 5/5, all tools/testing/selftests/bpf/test_verifier > tests pass. Edward, I haven't had a chance to review this yet, but I wanted to thank you for working on this. I will find some time to test your work on sparc too. Thanks again!
Re: [PATCH net] bpf, tests: fix endianness selection
From: Daniel Borkmann Date: Thu, 8 Jun 2017 19:06:25 +0200 > I noticed that test_l4lb was failing in selftests: ... > Tracking down the issue actually revealed that endianness selection > in bpf_endian.h is broken when compiled with clang with bpf target. > test_pkt_access.c, test_l4lb.c is compiled with __BYTE_ORDER as > __BIG_ENDIAN, test_xdp.c as __LITTLE_ENDIAN! test_l4lb noticeably > fails, because the test accounts bytes via bpf_ntohs(ip6h->payload_len) > and bpf_ntohs(iph->tot_len), and compares them against a defined > value and given a wrong endianness, the test outcome is different, > of course. > > Turns out that there are actually two bugs: i) when we do __BYTE_ORDER > comparison with __LITTLE_ENDIAN/__BIG_ENDIAN, then depending on the > include order we see different outcomes. Reason is that __BYTE_ORDER > is undefined due to missing endian.h include. Before we include the > asm/byteorder.h (e.g. through linux/in.h), then __BYTE_ORDER equals > __LITTLE_ENDIAN since both are undefined, after the include which > correctly pulls in linux/byteorder/little_endian.h, __LITTLE_ENDIAN > is defined, but given __BYTE_ORDER is still undefined, we match on > __BYTE_ORDER equals to __BIG_ENDIAN since __BIG_ENDIAN is also > undefined at that point, sigh. ii) But even that would be wrong, > since when compiling the test cases with clang, one can select between > bpfeb and bpfel targets for cross compilation. Hence, we can also not > rely on what the system's endian.h provides, but we need to look at > the compiler's defined endianness. The compiler defines __BYTE_ORDER__, > and we can match __ORDER_LITTLE_ENDIAN__ and __ORDER_BIG_ENDIAN__, > which also reflects targets bpf (native), bpfel, bpfeb correctly, > thus really only rely on that. After patch: > > # ./test_progs > test_pkt_access:PASS:ipv4 74 nsec > test_pkt_access:PASS:ipv6 42 nsec > test_xdp:PASS:ipv4 2340 nsec > test_xdp:PASS:ipv6 1461 nsec > test_l4lb:PASS:ipv4 400 nsec > test_l4lb:PASS:ipv6 530 nsec > test_tcp_estats:PASS: 0 nsec > Summary: 7 PASSED, 0 FAILED > > Fixes: 43bcf707ccdc ("bpf: fix _htons occurences in test_progs") > Signed-off-by: Daniel Borkmann > Acked-by: Alexei Starovoitov Applied, thanks Daniel.
Re: [PATCH] ila_xlat: add missing hash secret initialization
On Thu, Jun 8, 2017 at 12:54 AM, Arnd Bergmann wrote: > While discussing the possible merits of clang warning about unused initialized > functions, I found one function that was clearly meant to be called but > never actually is. > > __ila_hash_secret_init() initializes the hash value for the ila locator, > apparently this is intended to prevent hash collision attacks, but this ends > up being a read-only zero constant since there is no caller. I could find > no indication of why it was never called, the earliest patch submission > for the module already was like this. If my interpretation is right, we > certainly want to backport the patch to stable kernels as well. > > I considered adding it to the ila_xlat_init callback, but for best effect > the random data is read as late as possible, just before it is first used. > The underlying net_get_random_once() is already highly optimized to avoid > overhead when called frequently. > > Fixes: 7f00feaf1076 ("ila: Add generic ILA translation facility") > Cc: sta...@vger.kernel.org > Link: https://www.spinics.net/lists/kernel/msg2527243.html > Signed-off-by: Arnd Bergmann > --- > net/ipv6/ila/ila_xlat.c | 1 + > 1 file changed, 1 insertion(+) > > diff --git a/net/ipv6/ila/ila_xlat.c b/net/ipv6/ila/ila_xlat.c > index 2fd5ca151dcf..77f7f8c7d93d 100644 > --- a/net/ipv6/ila/ila_xlat.c > +++ b/net/ipv6/ila/ila_xlat.c > @@ -62,6 +62,7 @@ static inline u32 ila_locator_hash(struct ila_locator loc) > { > u32 *v = (u32 *)loc.v32; > > + __ila_hash_secret_init(); > return jhash_2words(v[0], v[1], hashrnd); > } > > -- > 2.9.0 > Thanks Arnd! Acked-by: Tom Herbert
[PATCH net] Fix an intermittent pr_emerg warning about lo becoming free.
It looks like this: Message from syslogd@flamingo at Apr 26 00:45:00 ... kernel:unregister_netdevice: waiting for lo to become free. Usage count = 4 They seem to coincide with net namespace teardown. The message is emitted by netdev_wait_allrefs(). Forced a kdump in netdev_run_todo, but found that the refcount on the lo device was already 0 at the time we got to the panic. Used bcc to check the blocking in netdev_run_todo. The only places where we're off cpu there are in the rcu_barrier() and msleep() calls. That behavior is expected. The msleep time coincides with the amount of time we spend waiting for the refcount to reach zero; the rcu_barrier() wait times are not excessive. After looking through the list of callbacks that the netdevice notifiers invoke in this path, it appears that the dst_dev_event is the most interesting. The dst_ifdown path places a hold on the loopback_dev as part of releasing the dev associated with the original dst cache entry. Most of our notifier callbacks are straight-forward, but this one a) looks complex, and b) places a hold on the network interface in question. I constructed a new bcc script that watches various events in the liftime of a dst cache entry. Note that dst_ifdown will take a hold on the loopback device until the invalidated dst entry gets freed. [ __dst_free] on DST: 883ccabb7900 IF tap1008300eth0 invoked at 1282115677036183 __dst_free rcu_nocb_kthread kthread ret_from_fork --- [ dst_gc_task] completed in 4: start: 1282115783057516 end 1282115783062462 [ dst_gc_task] completed in 5: start: 1282116447054101 end 1282116447059392 [ dst_gc_task] completed in 5: start: 1282118111030391 end 1282118111036041 [ dst_gc_task] completed in 6: start: 1282121247074938 end 1282121247081141 [ dst_gc_task] completed in 5: start: 1282126815053252 end 1282126815058751 [ dst_gc_task] completed in 6: start: 1282135007055673 end 1282135007061877 [ dst_gc_task] completed in 6: start: 1282145759021745 end 1282145759027830 [ dst_release] on DST: 883ccabb7900 IF tap1008300eth0 invoked at 1282147838674257 dst_release ip_queue_xmit tcp_transmit_skb tcp_write_xmit __tcp_push_pending_frames tcp_send_fin tcp_close inet_release sock_release sock_close __fput fput task_work_run do_exit do_group_exit SyS_exit_group do_syscall_64 return_from_SYSCALL_64 [ dst_ifdown] on DST: 883ccabb7900 IF tap1008300eth0 invoked at 1282148067377680 dst_ifdown notifier_call_chain raw_notifier_call_chain call_netdevice_notifiers_info netdev_run_todo kretprobe_trampoline default_device_exit_batch ops_exit_list.isra.4 cleanup_net process_one_work worker_thread kthread ret_from_fork --- [ dst_ifdown] on DST: 883ccabb7900 IF lo invoked at 1282148067411269 dst_ifdown notifier_call_chain raw_notifier_call_chain call_netdevice_notifiers_info netdev_run_todo kretprobe_trampoline default_device_exit_batch ops_exit_list.isra.4 cleanup_net process_one_work worker_thread kthread ret_from_fork --- <...> [ dst_destroy] on DST: 883ccabb7900 IF lo invoked at 1282160607074022 dst_destroy kretprobe_trampoline worker_thread kthread ret_from_fork --- [ dst_gc_task] completed in 30: start: 1282160607047161 end 1282160607077572 The way this works is that if there's still a reference on the dst entry at the time we try to free it, it gets placed in the gc list by __dst_free and the dst_destroy() call is invoked by the gc task once the refcount is 0. If the gc task processes a 10th or less of its entries on a single pass, it inreases the amount of time it waits between gc intervals. Looking at the gc_task intervals, they started at 663ms when we invoked __dst_free(). After that, they increased to 1663, 3136, 5567, 8191, 10751, and 14848. The release that set the refcnt to 0 on our dst entry occurred after the gc_task was enqueued for 14 second interval so we had to wait longer than the warning time in wait_allrefs in order for the dst entry to get free'd and the hold on 'lo' to be released. A simple solution to this problem is to have dst_dev_event() reset the gc timer, which causes us to process this list shortly after the gc_mutex is relased when dst_dev_event() completes. Signed-off-by: Krister Johansen --- net/core/dst.c | 14 ++ 1 file changed, 14 insertions(+) diff --git a/net/core/dst.c b/net/core/dst.c index 6192f11..13ba4a0 100644 --- a/net/core/dst.c +++ b/net/core/dst.c @@ -469,6 +469,20 @@ static int dst_dev_event(struct notifier_block *this, unsigned long event, spin_lock_bh(&dst_garbage.lock); dst = dst_garbage.list; dst_garbage.list = NULL; + /* The code in
[PATCH v2 net-next] Ipvlan should return an error when an address is already in use.
The ipvlan code already knows how to detect when a duplicate address is about to be assigned to an ipvlan device. However, that failure is not propogated outward and leads to a silent failure. Introduce a validation step at ip address creation time and allow device drivers to register to validate the incoming ip addresses. The ipvlan code is the first consumer. If it detects an address in use, we can return an error to the user before beginning to commit the new ifa in the networking code. This can be especially useful if it is necessary to provision many ipvlans in containers. The provisioning software (or operator) can use this to detect situations where an ip address is unexpectedly in use. Signed-off-by: Krister Johansen --- drivers/net/ipvlan/ipvlan_main.c | 69 include/linux/inetdevice.h | 7 include/net/addrconf.h | 10 +- net/ipv4/devinet.c | 33 +++ net/ipv6/addrconf.c | 17 +- net/ipv6/addrconf_core.c | 19 +++ 6 files changed, 153 insertions(+), 2 deletions(-) Apologies for letting this one languish for so long. This iteration is more code than I hoped it would be, but I believe it addresses the concerns raised in the prior iteration of the review. Changes v1 -> v2: - Add a separate validator chain with ipvlan as the first consumer. [Address Dave M.'s comment about needing all chain users to agree about use of notifier_[to|from]_errno] - Run validator chain only during address creation. [Address Dave M.'s comment about prior version failing to handle primary address promotions.] - Run validator step before the atomic section in the ip address create path. [Address my own dissatisfaction with having to rollback and potentially issue an immediate up and then down RTNETLINK event.] diff --git a/drivers/net/ipvlan/ipvlan_main.c b/drivers/net/ipvlan/ipvlan_main.c index 618ed88..e4141d6 100644 --- a/drivers/net/ipvlan/ipvlan_main.c +++ b/drivers/net/ipvlan/ipvlan_main.c @@ -824,6 +824,33 @@ static int ipvlan_addr6_event(struct notifier_block *unused, return NOTIFY_OK; } +static int ipvlan_addr6_validator_event(struct notifier_block *unused, + unsigned long event, void *ptr) +{ + struct in6_validator_info *i6vi = (struct in6_validator_info *)ptr; + struct net_device *dev = (struct net_device *)i6vi->i6vi_dev->dev; + struct ipvl_dev *ipvlan = netdev_priv(dev); + + /* FIXME IPv6 autoconf calls us from bh without RTNL */ + if (in_softirq()) + return NOTIFY_DONE; + + if (!netif_is_ipvlan(dev)) + return NOTIFY_DONE; + + if (!ipvlan || !ipvlan->port) + return NOTIFY_DONE; + + switch (event) { + case NETDEV_UP: + if (ipvlan_addr_busy(ipvlan->port, &i6vi->i6vi_addr, true)) + return notifier_from_errno(-EADDRINUSE); + break; + } + + return NOTIFY_OK; +} + static int ipvlan_add_addr4(struct ipvl_dev *ipvlan, struct in_addr *ip4_addr) { if (ipvlan_addr_busy(ipvlan->port, ip4_addr, false)) { @@ -871,10 +898,37 @@ static int ipvlan_addr4_event(struct notifier_block *unused, return NOTIFY_OK; } +static int ipvlan_addr4_validator_event(struct notifier_block *unused, + unsigned long event, void *ptr) +{ + struct in_validator_info *ivi = (struct in_validator_info *)ptr; + struct net_device *dev = (struct net_device *)ivi->ivi_dev->dev; + struct ipvl_dev *ipvlan = netdev_priv(dev); + + if (!netif_is_ipvlan(dev)) + return NOTIFY_DONE; + + if (!ipvlan || !ipvlan->port) + return NOTIFY_DONE; + + switch (event) { + case NETDEV_UP: + if (ipvlan_addr_busy(ipvlan->port, &ivi->ivi_addr, false)) + return notifier_from_errno(-EADDRINUSE); + break; + } + + return NOTIFY_OK; +} + static struct notifier_block ipvlan_addr4_notifier_block __read_mostly = { .notifier_call = ipvlan_addr4_event, }; +static struct notifier_block ipvlan_addr4_vtor_notifier_block __read_mostly = { + .notifier_call = ipvlan_addr4_validator_event, +}; + static struct notifier_block ipvlan_notifier_block __read_mostly = { .notifier_call = ipvlan_device_event, }; @@ -883,6 +937,10 @@ static struct notifier_block ipvlan_addr6_notifier_block __read_mostly = { .notifier_call = ipvlan_addr6_event, }; +static struct notifier_block ipvlan_addr6_vtor_notifier_block __read_mostly = { + .notifier_call = ipvlan_addr6_validator_event, +}; + static void ipvlan_ns_exit(struct net *net) { struct ipvlan_netns *vnet = net_generic(net, ipvlan_netid); @@ -907,7 +965,10 @@ static int __init ipvlan_init_module(void) ipvlan_init_secret(); register_netdevice_
Re: [PATCH 0/5] net: mvpp2: fixes and cleanups
From: Thomas Petazzoni Date: Thu, 8 Jun 2017 17:27:22 +0200 > Here is a small set of fixes/improvements for the mvpp2 driver. > > The first two patches are fixes: they fix bogus usage of > smp_processor_id() in a migration-enabled context. Indeed currently > the driver outputs some fat warnings in CONFIG_DEBUG_PREEMPT-enabled > kernels. Therefore, some fixes should be pushed to stable. > > The last three patches are cleanups and not needed for stable, but > they stack on top of the fixes. Please do not mix genuine bug fixes and cleanups. You must submit them separately. Send the fixes targetting 'net', and once those are accepted and 'net' is merged into 'net-next', you can submit the cleanups. Thank you.