Re: [ovs-dev] [PATCH] dpif-netdev: avoid hw_miss_packet_recover() for devices with no support

2021-11-20 Thread Eli Britstein via dev

Hi Harsha,

It's a clever idea, though have some problems in the implementation. PSB.


On 11/20/2021 11:20 AM, Sriharsha Basavapatna wrote:

The hw_miss_packet_recover() API results in performance degradation, for
ports that are either not offload capable or do not support this specific
offload API.

For example, in the test configuration shown below, the vhost-user port
does not support offloads and the VF port doesn't support hw_miss offload
API. But because tunnel offload needs to be configured in other bridges
(br-vxlan and br-phy), OVS has been built with -DALLOW_EXPERIMENTAL_API.

 br-vhostbr-vxlanbr-phy
vhost-user<-->VFVF-Rep<-->VxLAN   uplink-port

For every packet between the VF and the vhost-user ports, hw_miss API is
called even though it is not supported by the ports involved. This leads
to significant performance drop (~3x in some cases; both cycles and pps).

To fix this, return EOPNOTSUPP when this API fails for a device that

"To fix" -> "To improve"

doesn't support it and avoid this API on that port for subsequent packets.

Signed-off-by: Sriharsha Basavapatna 
---
  lib/dpif-netdev-private.h |  2 +-
  lib/dpif-netdev.c | 29 +
  lib/netdev-offload-dpdk.c |  9 +++--
  3 files changed, 29 insertions(+), 11 deletions(-)

diff --git a/lib/dpif-netdev-private.h b/lib/dpif-netdev-private.h
index 4593649bd..e2a6a9d3a 100644
--- a/lib/dpif-netdev-private.h
+++ b/lib/dpif-netdev-private.h
@@ -46,7 +46,7 @@ dp_netdev_batch_execute(struct dp_netdev_pmd_thread *pmd,
  
  int

  dp_netdev_hw_flow(const struct dp_netdev_pmd_thread *pmd,
-  odp_port_t port_no,
+  void *port,


void * -> struct tx_port *. use a forward declaration.


struct dp_packet *packet,
struct dp_netdev_flow **flow);
  
diff --git a/lib/dpif-netdev.c b/lib/dpif-netdev.c

index 69d7ec26e..207b1961c 100644
--- a/lib/dpif-netdev.c
+++ b/lib/dpif-netdev.c
@@ -434,6 +434,7 @@ struct tx_port {
  long long last_used;
  struct hmap_node node;
  long long flush_time;
+bool hw_miss_api_supported;
  struct dp_packet_batch output_pkts;
  struct dp_netdev_rxq *output_pkts_rxqs[NETDEV_MAX_BURST];
  };
@@ -6972,6 +6973,7 @@ dp_netdev_add_port_tx_to_pmd(struct dp_netdev_pmd_thread 
*pmd,
  tx->port = port;
  tx->qid = -1;
  tx->flush_time = 0LL;
+tx->hw_miss_api_supported = true;
  dp_packet_batch_init(>output_pkts);
  
  hmap_insert(>tx_ports, >node, hash_port_no(tx->port->port_no));

@@ -7327,22 +7329,28 @@ static struct tx_port * pmd_send_port_cache_lookup(
  
  inline int

  dp_netdev_hw_flow(const struct dp_netdev_pmd_thread *pmd,
-  odp_port_t port_no OVS_UNUSED,
+  void *port,

don't omit OVS_UNUSED. it is for compiling without ALLOW_EXPERIMENTAL_API

struct dp_packet *packet,
struct dp_netdev_flow **flow)
  {
-struct tx_port *p OVS_UNUSED;
+struct tx_port *p = port;

no need for this local variable, you get it from the function arguments

  uint32_t mark;
  
  #ifdef ALLOW_EXPERIMENTAL_API /* Packet restoration API required. */

  /* Restore the packet if HW processing was terminated before completion. 
*/
-p = pmd_send_port_cache_lookup(pmd, port_no);
-if (OVS_LIKELY(p)) {
+if (OVS_LIKELY(p) && p->hw_miss_api_supported) {
  int err = netdev_hw_miss_packet_recover(p->port->netdev, packet);
  
-if (err && err != EOPNOTSUPP) {

-COVERAGE_INC(datapath_drop_hw_miss_recover);
-return -1;
+if (err) {
+if (err != EOPNOTSUPP) {
+COVERAGE_INC(datapath_drop_hw_miss_recover);
+return -1;
+} else {
+/* API unsupported by the port; avoid subsequent calls. */
+VLOG_DBG("hw_miss_api unsupported: port: %d",
+ p->port->port_no);
+p->hw_miss_api_supported = false;
+}
  }
  }
  #endif
@@ -7394,6 +7402,11 @@ dfc_processing(struct dp_netdev_pmd_thread *pmd,
  uint16_t tcp_flags;
  size_t map_cnt = 0;
  bool batch_enable = true;
+struct tx_port *port = NULL;
+
+#ifdef ALLOW_EXPERIMENTAL_API /* Packet restoration API required. */
+port = pmd_send_port_cache_lookup(pmd, port_no);
+#endif
  
  pmd_perf_update_counter(>perf_stats,

  md_is_valid ? PMD_STAT_RECIRC : PMD_STAT_RECV,
@@ -7420,7 +7433,7 @@ dfc_processing(struct dp_netdev_pmd_thread *pmd,
  }
  
  if (netdev_flow_api && recirc_depth == 0) {

-if (OVS_UNLIKELY(dp_netdev_hw_flow(pmd, port_no, packet, ))) {
+if (OVS_UNLIKELY(dp_netdev_hw_flow(pmd, port, packet, ))) {
  /* Packet restoration failed and it was dropped, do not
   * continue processing.
   */
diff --git 

[ovs-dev] [PATCH v3] Add monitor_cond_since support

2021-11-20 Thread Terry Wilson
Add support for monitor_cond_since / update3 to python-ovs to
allow more efficient reconnections when connecting to clustered
OVSDB servers.

Signed-off-by: Terry Wilson 
---
 python/ovs/db/idl.py | 231 ---
 tests/ovsdb-idl.at   |   2 +-
 2 files changed, 197 insertions(+), 36 deletions(-)

diff --git a/python/ovs/db/idl.py b/python/ovs/db/idl.py
index 87ee06cde..c5d3ccdba 100644
--- a/python/ovs/db/idl.py
+++ b/python/ovs/db/idl.py
@@ -13,6 +13,7 @@
 # limitations under the License.
 
 import collections
+import enum
 import functools
 import uuid
 
@@ -36,6 +37,7 @@ ROW_DELETE = "delete"
 
 OVSDB_UPDATE = 0
 OVSDB_UPDATE2 = 1
+OVSDB_UPDATE3 = 2
 
 CLUSTERED = "clustered"
 RELAY = "relay"
@@ -45,6 +47,60 @@ Notice = collections.namedtuple('Notice', ('event', 'row', 
'updates'))
 Notice.__new__.__defaults__ = (None,)  # default updates=None
 
 
+class Monitor(enum.IntEnum):
+monitor = OVSDB_UPDATE
+monitor_cond = OVSDB_UPDATE2
+monitor_cond_since = OVSDB_UPDATE3
+
+
+class ConditionState(object):
+def __init__(self):
+self._ack_cond = None
+self._req_cond = None
+self._new_cond = [True]
+
+def __iter__(self):
+return iter([self._new_cond, self._req_cond, self._ack_cond])
+
+@property
+def new(self):
+"""The latest freshly initialized condition change"""
+return self._new_cond
+
+@property
+def acked(self):
+"""The last condition change that has been accepted by the server"""
+return self._ack_cond
+
+@property
+def latest(self):
+"""The most recent condition change"""
+return next(cond for cond in self if cond is not None)
+
+@staticmethod
+def is_true(condition):
+return condition == [True]
+
+def init(self, cond):
+"""Signal that a a condition change is being initiated"""
+self._new_cond = cond
+
+def ack(self):
+"""Signal that a condition change has been acked"""
+if self._req_cond is not None:
+self._ack_cond, self._req_cond = (self._req_cond, None)
+
+def request(self):
+"""Signal that a condition change has been requested"""
+if self._new_cond is not None:
+self._req_cond, self._new_cond = (self._new_cond, None)
+
+def reset(self):
+"""Reset a requested condition change back to new"""
+if self._req_cond is not None and self._new_cond is None:
+self._new_cond, self._req_cond = (self._req_cond, None)
+
+
 class Idl(object):
 """Open vSwitch Database Interface Definition Language (OVSDB IDL).
 
@@ -102,7 +158,13 @@ class Idl(object):
 IDL_S_SERVER_MONITOR_REQUESTED = 2
 IDL_S_DATA_MONITOR_REQUESTED = 3
 IDL_S_DATA_MONITOR_COND_REQUESTED = 4
-IDL_S_MONITORING = 5
+IDL_S_DATA_MONITOR_COND_SINCE_REQUESTED = 5
+IDL_S_MONITORING = 6
+
+monitor_map = {
+Monitor.monitor: IDL_S_SERVER_MONITOR_REQUESTED,
+Monitor.monitor_cond: IDL_S_DATA_MONITOR_COND_REQUESTED,
+Monitor.monitor_cond_since: IDL_S_DATA_MONITOR_COND_SINCE_REQUESTED}
 
 def __init__(self, remote, schema_helper, probe_interval=None,
  leader_only=True):
@@ -146,10 +208,12 @@ class Idl(object):
 remotes = self._parse_remotes(remote)
 self._session = ovs.jsonrpc.Session.open_multiple(remotes,
 probe_interval=probe_interval)
+self._request_id = None
 self._monitor_request_id = None
 self._last_seqno = None
 self.change_seqno = 0
 self.uuid = uuid.uuid1()
+self.last_id = str(uuid.UUID(int=0))
 
 # Server monitor.
 self._server_schema_request_id = None
@@ -176,6 +240,9 @@ class Idl(object):
 self.txn = None
 self._outstanding_txns = {}
 
+self.cond_changed = False
+self.cond_seqno = 0
+
 for table in schema.tables.values():
 for column in table.columns.values():
 if not hasattr(column, 'alert'):
@@ -183,8 +250,7 @@ class Idl(object):
 table.need_table = False
 table.rows = custom_index.IndexedRows(table)
 table.idl = self
-table.condition = [True]
-table.cond_changed = False
+table.condition = ConditionState()
 
 def _parse_remotes(self, remote):
 # If remote is -
@@ -222,6 +288,38 @@ class Idl(object):
 update."""
 self._session.close()
 
+def ack_conditions(self):
+"""Mark all requested table conditions as acked"""
+for table in self.tables.values():
+table.condition.ack()
+
+def sync_conditions(self):
+"""Synchronize condition state when the FSM is restarted
+
+If a non-zero last_id is available for the DB, then upon reconnect
+the IDL should first request acked conditions to avoid missing updates
+about records that were added before the transaction with
+

Re: [ovs-dev] 回复: [PATCH v2] netdev-vport : Fix userspace tunnel ioctl(SIOCGIFINDEX) info logs.

2021-11-20 Thread 0-day Robot
Bleep bloop.  Greetings lin huang, I am a robot and I have tried out your patch.
Thanks for your contribution.

I encountered some error that I wasn't expecting.  See the details below.


git-am:
error: cannot convert from eucgb2312_cn to UTF-8
fatal: could not parse patch


Patch skipped due to previous failure.

Please check this out.  If you feel there has been an error, please email 
acon...@redhat.com

Thanks,
0-day Robot
___
dev mailing list
d...@openvswitch.org
https://mail.openvswitch.org/mailman/listinfo/ovs-dev


[ovs-dev] 回复: [PATCH v2] netdev-vport : Fix userspace tunnel ioctl(SIOCGIFINDEX) info logs.

2021-11-20 Thread lin huang
From: linhuang 

Userspace tunnel doesn't have a valid device in the kernel. So
get_ifindex() function (ioctl) always get error during
adding a port, deleting a port or updating a port status.

The info log is
"2021-08-29T09:17:39.830Z|00059|netdev_linux|INFO|ioctl(SIOCGIFINDEX)
on vxlan_sys_4789 device failed: No such device"

If there are a lot of userspace tunnel ports on a bridge, the
iface_refresh_netdev_status() function will spend a lot of time.

So ignore userspace tunnel port ioctl(SIOCGIFINDEX) operation, just
return -ENODEV.

Signed-off-by: linhuang 
Reviewed-by: Aaron Conole 
---
 lib/dpif.c   | 2 +-
 lib/netdev-offload.c | 2 --
 lib/netdev-vport.c   | 3 ++-
 3 files changed, 3 insertions(+), 4 deletions(-)

diff --git a/lib/dpif.c b/lib/dpif.c
index 8c4aed4..7adb620 100644
--- a/lib/dpif.c
+++ b/lib/dpif.c
@@ -362,8 +362,8 @@ do_open(const char *name, const char *type, bool create, 
struct dpif **dpifp)
 }

 err = netdev_open(dpif_port.name, dpif_port.type, );
-
 if (!err) {
+netdev_set_dpif_type(netdev, dpif_type_str);
 netdev_ports_insert(netdev, dpif_type_str, _port);
 netdev_close(netdev);
 } else {
diff --git a/lib/netdev-offload.c b/lib/netdev-offload.c
index 8075cfb..1221170 100644
--- a/lib/netdev-offload.c
+++ b/lib/netdev-offload.c
@@ -596,8 +596,6 @@ netdev_ports_insert(struct netdev *netdev, const char 
*dpif_type,
 data->ifindex = -1;
 }

-netdev_set_dpif_type(netdev, dpif_type);
-
 hmap_insert(_to_netdev, >portno_node,
 netdev_ports_hash(dpif_port->port_no, dpif_type));
 ovs_rwlock_unlock(_hmap_rwlock);
diff --git a/lib/netdev-vport.c b/lib/netdev-vport.c
index 499c029..ad24933 100644
--- a/lib/netdev-vport.c
+++ b/lib/netdev-vport.c
@@ -1151,8 +1151,9 @@ netdev_vport_get_ifindex(const struct netdev *netdev_)
 {
 char buf[NETDEV_VPORT_NAME_BUFSIZE];
 const char *name = netdev_vport_get_dpif_port(netdev_, buf, sizeof(buf));
+const char *dpif_type = netdev_get_dpif_type(netdev_);

-return linux_get_ifindex(name);
+return !strcmp(type, "system") ? linux_get_ifindex(name) : -ENODEV;
 }

 #define NETDEV_VPORT_GET_IFINDEX netdev_vport_get_ifindex
--
2.12.2


发件人: dev  代表 lin huang 
发送时间: 2021年11月17日 12:39
收件人: dev
抄送: Ben Pfaff; i.maximets
主题: Re: [ovs-dev] [PATCH v2] netdev-vport : Fix userspace tunnel 
ioctl(SIOCGIFINDEX) info logs.

hi all,

pls review my code.

Regards,
Lin Huang

From: lin huang
Date: 2021-10-25 13:16
To: d...@openvswitch.org
CC: Aaron Conole; 
i.maxim...@ovn.org
Subject: [PATCH v2] netdev-vport : Fix userspace tunnel ioctl(SIOCGIFINDEX) 
info logs.
From: linhuang 

Userspace tunnel doesn't have a valid device in the kernel. So
get_ifindex() function (ioctl) always get error during
adding a port, deleting a port or updating a port status.

The info log is
"2021-08-29T09:17:39.830Z|00059|netdev_linux|INFO|ioctl(SIOCGIFINDEX)
on vxlan_sys_4789 device failed: No such device"

If there are a lot of userspace tunnel ports on a bridge, the
iface_refresh_netdev_status() function will spend a lot of time.

So ignore userspace tunnel port ioctl(SIOCGIFINDEX) operation, just
return -ENODEV.

Signed-off-by: linhuang 
Reviewed-by: Aaron Conole 
---
lib/netdev-offload.c | 6 --
lib/netdev-vport.c   | 3 ++-
vswitchd/bridge.c| 2 ++
3 files changed, 8 insertions(+), 3 deletions(-)

diff --git a/lib/netdev-offload.c b/lib/netdev-offload.c
index 8075cfbd8..00b7515cf 100644
--- a/lib/netdev-offload.c
+++ b/lib/netdev-offload.c
@@ -577,7 +577,9 @@ netdev_ports_insert(struct netdev *netdev, const char 
*dpif_type,
 struct dpif_port *dpif_port)
{
 struct port_to_netdev_data *data;
-int ifindex = netdev_get_ifindex(netdev);
+int ifindex;
+
+netdev_set_dpif_type(netdev, dpif_type);
 ovs_rwlock_wrlock(_hmap_rwlock);
 if (netdev_ports_lookup(dpif_port->port_no, dpif_type)) {
@@ -589,6 +591,7 @@ netdev_ports_insert(struct netdev *netdev, const char 
*dpif_type,
 data->netdev = netdev_ref(netdev);
 dpif_port_clone(>dpif_port, dpif_port);
+ifindex = netdev_get_ifindex(netdev);
 if (ifindex >= 0) {
 data->ifindex = ifindex;
 hmap_insert(_to_port, >ifindex_node, ifindex);
@@ -596,7 +599,6 @@ netdev_ports_insert(struct netdev *netdev, const char 
*dpif_type,
 data->ifindex = -1;
 }
-netdev_set_dpif_type(netdev, dpif_type);
 hmap_insert(_to_netdev, >portno_node,
 netdev_ports_hash(dpif_port->port_no, dpif_type));
diff --git a/lib/netdev-vport.c b/lib/netdev-vport.c
index 499c0291c..411ac343a 100644
--- a/lib/netdev-vport.c
+++ b/lib/netdev-vport.c
@@ -1151,8 +1151,9 @@ netdev_vport_get_ifindex(const struct netdev *netdev_)
{
 char buf[NETDEV_VPORT_NAME_BUFSIZE];
 

Re: [ovs-dev] [ovs-dev v4] ipf: add ipf context

2021-11-20 Thread 0-day Robot
Bleep bloop.  Greetings Peng He, I am a robot and I have tried out your patch.
Thanks for your contribution.

I encountered some error that I wasn't expecting.  See the details below.


checkpatch:
ERROR: Author Peng He  needs to sign off.
ERROR: Co-author Mike Pattrick  needs to sign off.
WARNING: Unexpected sign-offs from developers who are not authors or co-authors 
or committers: Peng He 
Lines checked: 411, Warnings: 1, Errors: 2


Please check this out.  If you feel there has been an error, please email 
acon...@redhat.com

Thanks,
0-day Robot
___
dev mailing list
d...@openvswitch.org
https://mail.openvswitch.org/mailman/listinfo/ovs-dev


[ovs-dev] [ovs-dev v4] ipf: add ipf context

2021-11-20 Thread Peng He
From: Peng He 

ipf_postprocess will emit packets into the datapath pipeline ignoring
the conntrack context, this might casuse weird issues when a packet
batch has less space to hold all the fragments belonging to single
packet.

Given the below ruleest and consider sending a 64K ICMP packet which
is splitted into 64 fragments.

priority=1,action=drop
priority=10,arp,action=normal
priority=100,in_port=1,ct_state=-trk,icmp,action=ct(zone=9,table=0)
priority=100,in_port=1,ct_state=+new+trk,icmp,action=ct(zone=9,commit),2
priority=100,in_port=1,ct_state=-new+est+trk,icmp,action=2
priority=100,in_port=2,ct_state=-trk,icmp,action=ct(table=0,zone=9)
priority=100,in_port=2,ct_state=+trk+est-new,icmp,action=1

Batch 1:
the first 32 packets will be buffered in the ipf preprocessing, nothing
more proceeds.

Batch 2:
the second 32 packets succeed the fragment reassembly and goes to ct
and ipf_post will emits the first 32 packets due to the limit of batch
size.

the first 32 packets goes to the datapath again due to the
recirculation, and again buffered at ipf preprocessing before ct commit,
then the ovs tries to call ct commit as well as ipf_postprocessing which emits
the last 32 packets, in this case the last 32 packets will follow
the current actions which will be sent to port 2 directly without
recirculation and going to ipf preprocssing and ct commit again.

This will cause the first 32 packets never get the chance to
reassemble and evevntually this large ICMP packets fail to transmit.

this patch fixes this issue by adding firstly ipf context to avoid
ipf_postprocessing emits packets in the wrong context. Then by
re-executing the action list again to emit the last 32 packets
in the right context to correctly transmitting multiple fragments.

Last, we reuse the recirc_depth to limit the times of re-execution
as re-execution is implemented by recursive function call.

Signed-off-by: Peng He 
Co-authored-by: Mike Pattrick 
---
 lib/conntrack.c |  9 ---
 lib/conntrack.h | 15 ++--
 lib/dpif-netdev.c   | 52 +
 lib/ipf.c   | 40 ---
 lib/ipf.h   | 11 +++--
 tests/system-traffic.at | 33 ++
 tests/test-conntrack.c  |  6 ++---
 7 files changed, 136 insertions(+), 30 deletions(-)

diff --git a/lib/conntrack.c b/lib/conntrack.c
index 33a1a9295..72999f1ae 100644
--- a/lib/conntrack.c
+++ b/lib/conntrack.c
@@ -1434,14 +1434,14 @@ process_one(struct conntrack *ct, struct dp_packet *pkt,
  * connection tables.  'setmark', if not NULL, should point to a two
  * elements array containing a value and a mask to set the connection mark.
  * 'setlabel' behaves similarly for the connection label.*/
-int
+bool
 conntrack_execute(struct conntrack *ct, struct dp_packet_batch *pkt_batch,
   ovs_be16 dl_type, bool force, bool commit, uint16_t zone,
   const uint32_t *setmark,
   const struct ovs_key_ct_labels *setlabel,
   ovs_be16 tp_src, ovs_be16 tp_dst, const char *helper,
   const struct nat_action_info_t *nat_action_info,
-  long long now, uint32_t tp_id)
+  long long now, uint32_t tp_id, struct ipf_ctx *ipf_ctx)
 {
 ipf_preprocess_conntrack(ct->ipf, pkt_batch, now, dl_type, zone,
  ct->hash_basis);
@@ -1468,9 +1468,8 @@ conntrack_execute(struct conntrack *ct, struct 
dp_packet_batch *pkt_batch,
 }
 }
 
-ipf_postprocess_conntrack(ct->ipf, pkt_batch, now, dl_type);
-
-return 0;
+return ipf_postprocess_conntrack(ct->ipf, pkt_batch, ipf_ctx, \
+ now, dl_type);
 }
 
 void
diff --git a/lib/conntrack.h b/lib/conntrack.h
index 9553b188a..211efad3f 100644
--- a/lib/conntrack.h
+++ b/lib/conntrack.h
@@ -64,6 +64,7 @@
 struct dp_packet_batch;
 
 struct conntrack;
+struct ipf_ctx;
 
 union ct_addr {
 ovs_be32 ipv4;
@@ -88,13 +89,13 @@ struct nat_action_info_t {
 struct conntrack *conntrack_init(void);
 void conntrack_destroy(struct conntrack *);
 
-int conntrack_execute(struct conntrack *ct, struct dp_packet_batch *pkt_batch,
-  ovs_be16 dl_type, bool force, bool commit, uint16_t zone,
-  const uint32_t *setmark,
-  const struct ovs_key_ct_labels *setlabel,
-  ovs_be16 tp_src, ovs_be16 tp_dst, const char *helper,
-  const struct nat_action_info_t *nat_action_info,
-  long long now, uint32_t tp_id);
+bool conntrack_execute(struct conntrack *ct, struct dp_packet_batch *pkt_batch,
+   ovs_be16 dl_type, bool force, bool commit,
+   uint16_t zone, const uint32_t *setmark,
+   const struct ovs_key_ct_labels *setlabel,
+   ovs_be16 tp_src, ovs_be16 tp_dst, const char *helper,
+   

Re: [ovs-dev] [PATCH] dpif-netdev: avoid hw_miss_packet_recover() for devices with no support

2021-11-20 Thread 0-day Robot
Bleep bloop.  Greetings Sriharsha Basavapatna, I am a robot and I have tried 
out your patch.
Thanks for your contribution.

I encountered some error that I wasn't expecting.  See the details below.


build:
/bin/sh ./libtool  --tag=CC   --mode=compile gcc -std=gnu99 -DHAVE_CONFIG_H -I. 
   -I ./include -I ./include -I ./lib -I ./lib-Wstrict-prototypes -Wall 
-Wextra -Wno-sign-compare -Wpointer-arith -Wformat -Wformat-security 
-Wswitch-enum -Wunused-parameter -Wbad-function-cast -Wcast-align 
-Wstrict-prototypes -Wold-style-definition -Wmissing-prototypes 
-Wmissing-field-initializers -fno-strict-aliasing -Wshadow -Werror -Werror   -g 
-O2 -MT lib/dpif-netdev-lookup.lo -MD -MP -MF $depbase.Tpo -c -o 
lib/dpif-netdev-lookup.lo lib/dpif-netdev-lookup.c &&\
mv -f $depbase.Tpo $depbase.Plo
libtool: compile:  gcc -std=gnu99 -DHAVE_CONFIG_H -I. -I ./include -I ./include 
-I ./lib -I ./lib -Wstrict-prototypes -Wall -Wextra -Wno-sign-compare 
-Wpointer-arith -Wformat -Wformat-security -Wswitch-enum -Wunused-parameter 
-Wbad-function-cast -Wcast-align -Wstrict-prototypes -Wold-style-definition 
-Wmissing-prototypes -Wmissing-field-initializers -fno-strict-aliasing -Wshadow 
-Werror -Werror -g -O2 -MT lib/dpif-netdev-lookup.lo -MD -MP -MF 
lib/.deps/dpif-netdev-lookup.Tpo -c lib/dpif-netdev-lookup.c -o 
lib/dpif-netdev-lookup.o
depbase=`echo lib/dpif-netdev-lookup-autovalidator.lo | sed 
's|[^/]*$|.deps/&|;s|\.lo$||'`;\
/bin/sh ./libtool  --tag=CC   --mode=compile gcc -std=gnu99 -DHAVE_CONFIG_H -I. 
   -I ./include -I ./include -I ./lib -I ./lib-Wstrict-prototypes -Wall 
-Wextra -Wno-sign-compare -Wpointer-arith -Wformat -Wformat-security 
-Wswitch-enum -Wunused-parameter -Wbad-function-cast -Wcast-align 
-Wstrict-prototypes -Wold-style-definition -Wmissing-prototypes 
-Wmissing-field-initializers -fno-strict-aliasing -Wshadow -Werror -Werror   -g 
-O2 -MT lib/dpif-netdev-lookup-autovalidator.lo -MD -MP -MF $depbase.Tpo -c -o 
lib/dpif-netdev-lookup-autovalidator.lo lib/dpif-netdev-lookup-autovalidator.c 
&&\
mv -f $depbase.Tpo $depbase.Plo
libtool: compile:  gcc -std=gnu99 -DHAVE_CONFIG_H -I. -I ./include -I ./include 
-I ./lib -I ./lib -Wstrict-prototypes -Wall -Wextra -Wno-sign-compare 
-Wpointer-arith -Wformat -Wformat-security -Wswitch-enum -Wunused-parameter 
-Wbad-function-cast -Wcast-align -Wstrict-prototypes -Wold-style-definition 
-Wmissing-prototypes -Wmissing-field-initializers -fno-strict-aliasing -Wshadow 
-Werror -Werror -g -O2 -MT lib/dpif-netdev-lookup-autovalidator.lo -MD -MP -MF 
lib/.deps/dpif-netdev-lookup-autovalidator.Tpo -c 
lib/dpif-netdev-lookup-autovalidator.c -o lib/dpif-netdev-lookup-autovalidator.o
depbase=`echo lib/dpif-netdev-lookup-generic.lo | sed 
's|[^/]*$|.deps/&|;s|\.lo$||'`;\
/bin/sh ./libtool  --tag=CC   --mode=compile gcc -std=gnu99 -DHAVE_CONFIG_H -I. 
   -I ./include -I ./include -I ./lib -I ./lib-Wstrict-prototypes -Wall 
-Wextra -Wno-sign-compare -Wpointer-arith -Wformat -Wformat-security 
-Wswitch-enum -Wunused-parameter -Wbad-function-cast -Wcast-align 
-Wstrict-prototypes -Wold-style-definition -Wmissing-prototypes 
-Wmissing-field-initializers -fno-strict-aliasing -Wshadow -Werror -Werror   -g 
-O2 -MT lib/dpif-netdev-lookup-generic.lo -MD -MP -MF $depbase.Tpo -c -o 
lib/dpif-netdev-lookup-generic.lo lib/dpif-netdev-lookup-generic.c &&\
mv -f $depbase.Tpo $depbase.Plo
libtool: compile:  gcc -std=gnu99 -DHAVE_CONFIG_H -I. -I ./include -I ./include 
-I ./lib -I ./lib -Wstrict-prototypes -Wall -Wextra -Wno-sign-compare 
-Wpointer-arith -Wformat -Wformat-security -Wswitch-enum -Wunused-parameter 
-Wbad-function-cast -Wcast-align -Wstrict-prototypes -Wold-style-definition 
-Wmissing-prototypes -Wmissing-field-initializers -fno-strict-aliasing -Wshadow 
-Werror -Werror -g -O2 -MT lib/dpif-netdev-lookup-generic.lo -MD -MP -MF 
lib/.deps/dpif-netdev-lookup-generic.Tpo -c lib/dpif-netdev-lookup-generic.c -o 
lib/dpif-netdev-lookup-generic.o
depbase=`echo lib/dpif-netdev.lo | sed 's|[^/]*$|.deps/&|;s|\.lo$||'`;\
/bin/sh ./libtool  --tag=CC   --mode=compile gcc -std=gnu99 -DHAVE_CONFIG_H -I. 
   -I ./include -I ./include -I ./lib -I ./lib-Wstrict-prototypes -Wall 
-Wextra -Wno-sign-compare -Wpointer-arith -Wformat -Wformat-security 
-Wswitch-enum -Wunused-parameter -Wbad-function-cast -Wcast-align 
-Wstrict-prototypes -Wold-style-definition -Wmissing-prototypes 
-Wmissing-field-initializers -fno-strict-aliasing -Wshadow -Werror -Werror   -g 
-O2 -MT lib/dpif-netdev.lo -MD -MP -MF $depbase.Tpo -c -o lib/dpif-netdev.lo 
lib/dpif-netdev.c &&\
mv -f $depbase.Tpo $depbase.Plo
libtool: compile:  gcc -std=gnu99 -DHAVE_CONFIG_H -I. -I ./include -I ./include 
-I ./lib -I ./lib -Wstrict-prototypes -Wall -Wextra -Wno-sign-compare 
-Wpointer-arith -Wformat -Wformat-security -Wswitch-enum -Wunused-parameter 
-Wbad-function-cast -Wcast-align -Wstrict-prototypes -Wold-style-definition 
-Wmissing-prototypes -Wmissing-field-initializers -fno-strict-aliasing -Wshadow 
-Werror 

[ovs-dev] [PATCH] dpif-netdev: avoid hw_miss_packet_recover() for devices with no support

2021-11-20 Thread Sriharsha Basavapatna via dev
The hw_miss_packet_recover() API results in performance degradation, for
ports that are either not offload capable or do not support this specific
offload API.

For example, in the test configuration shown below, the vhost-user port
does not support offloads and the VF port doesn't support hw_miss offload
API. But because tunnel offload needs to be configured in other bridges
(br-vxlan and br-phy), OVS has been built with -DALLOW_EXPERIMENTAL_API.

br-vhostbr-vxlanbr-phy
vhost-user<-->VFVF-Rep<-->VxLAN   uplink-port

For every packet between the VF and the vhost-user ports, hw_miss API is
called even though it is not supported by the ports involved. This leads
to significant performance drop (~3x in some cases; both cycles and pps).

To fix this, return EOPNOTSUPP when this API fails for a device that
doesn't support it and avoid this API on that port for subsequent packets.

Signed-off-by: Sriharsha Basavapatna 
---
 lib/dpif-netdev-private.h |  2 +-
 lib/dpif-netdev.c | 29 +
 lib/netdev-offload-dpdk.c |  9 +++--
 3 files changed, 29 insertions(+), 11 deletions(-)

diff --git a/lib/dpif-netdev-private.h b/lib/dpif-netdev-private.h
index 4593649bd..e2a6a9d3a 100644
--- a/lib/dpif-netdev-private.h
+++ b/lib/dpif-netdev-private.h
@@ -46,7 +46,7 @@ dp_netdev_batch_execute(struct dp_netdev_pmd_thread *pmd,
 
 int
 dp_netdev_hw_flow(const struct dp_netdev_pmd_thread *pmd,
-  odp_port_t port_no,
+  void *port,
   struct dp_packet *packet,
   struct dp_netdev_flow **flow);
 
diff --git a/lib/dpif-netdev.c b/lib/dpif-netdev.c
index 69d7ec26e..207b1961c 100644
--- a/lib/dpif-netdev.c
+++ b/lib/dpif-netdev.c
@@ -434,6 +434,7 @@ struct tx_port {
 long long last_used;
 struct hmap_node node;
 long long flush_time;
+bool hw_miss_api_supported;
 struct dp_packet_batch output_pkts;
 struct dp_netdev_rxq *output_pkts_rxqs[NETDEV_MAX_BURST];
 };
@@ -6972,6 +6973,7 @@ dp_netdev_add_port_tx_to_pmd(struct dp_netdev_pmd_thread 
*pmd,
 tx->port = port;
 tx->qid = -1;
 tx->flush_time = 0LL;
+tx->hw_miss_api_supported = true;
 dp_packet_batch_init(>output_pkts);
 
 hmap_insert(>tx_ports, >node, hash_port_no(tx->port->port_no));
@@ -7327,22 +7329,28 @@ static struct tx_port * pmd_send_port_cache_lookup(
 
 inline int
 dp_netdev_hw_flow(const struct dp_netdev_pmd_thread *pmd,
-  odp_port_t port_no OVS_UNUSED,
+  void *port,
   struct dp_packet *packet,
   struct dp_netdev_flow **flow)
 {
-struct tx_port *p OVS_UNUSED;
+struct tx_port *p = port;
 uint32_t mark;
 
 #ifdef ALLOW_EXPERIMENTAL_API /* Packet restoration API required. */
 /* Restore the packet if HW processing was terminated before completion. */
-p = pmd_send_port_cache_lookup(pmd, port_no);
-if (OVS_LIKELY(p)) {
+if (OVS_LIKELY(p) && p->hw_miss_api_supported) {
 int err = netdev_hw_miss_packet_recover(p->port->netdev, packet);
 
-if (err && err != EOPNOTSUPP) {
-COVERAGE_INC(datapath_drop_hw_miss_recover);
-return -1;
+if (err) {
+if (err != EOPNOTSUPP) {
+COVERAGE_INC(datapath_drop_hw_miss_recover);
+return -1;
+} else {
+/* API unsupported by the port; avoid subsequent calls. */
+VLOG_DBG("hw_miss_api unsupported: port: %d",
+ p->port->port_no);
+p->hw_miss_api_supported = false;
+}
 }
 }
 #endif
@@ -7394,6 +7402,11 @@ dfc_processing(struct dp_netdev_pmd_thread *pmd,
 uint16_t tcp_flags;
 size_t map_cnt = 0;
 bool batch_enable = true;
+struct tx_port *port = NULL;
+
+#ifdef ALLOW_EXPERIMENTAL_API /* Packet restoration API required. */
+port = pmd_send_port_cache_lookup(pmd, port_no);
+#endif
 
 pmd_perf_update_counter(>perf_stats,
 md_is_valid ? PMD_STAT_RECIRC : PMD_STAT_RECV,
@@ -7420,7 +7433,7 @@ dfc_processing(struct dp_netdev_pmd_thread *pmd,
 }
 
 if (netdev_flow_api && recirc_depth == 0) {
-if (OVS_UNLIKELY(dp_netdev_hw_flow(pmd, port_no, packet, ))) {
+if (OVS_UNLIKELY(dp_netdev_hw_flow(pmd, port, packet, ))) {
 /* Packet restoration failed and it was dropped, do not
  * continue processing.
  */
diff --git a/lib/netdev-offload-dpdk.c b/lib/netdev-offload-dpdk.c
index 9fee7570a..8bd2e 100644
--- a/lib/netdev-offload-dpdk.c
+++ b/lib/netdev-offload-dpdk.c
@@ -2292,11 +2292,16 @@ netdev_offload_dpdk_hw_miss_packet_recover(struct 
netdev *netdev,
 odp_port_t vport_odp;
 int ret = 0;
 
-if (netdev_dpdk_rte_flow_get_restore_info(netdev, packet,
-  _restore_info, NULL)) {
+ret =