[ovs-dev] [PATCH] conntrack: remove the nat_action_info from the conn

2021-08-04 Thread wenxu
From: wenxu 

Only the nat_action in the nat_action_info is used for conn
packet forward and other item such as min/max_ip/port only 
used for the new conn operation. So the whole nat_ction_info
no need store in conn. This will also avoid unnecessary memory
allocation.

Signed-off-by: wenxu 
---
 lib/conntrack-private.h |   2 +-
 lib/conntrack.c | 101 +---
 2 files changed, 53 insertions(+), 50 deletions(-)

diff --git a/lib/conntrack-private.h b/lib/conntrack-private.h
index 169ace5..dfdf4e6 100644
--- a/lib/conntrack-private.h
+++ b/lib/conntrack-private.h
@@ -98,7 +98,7 @@ struct conn {
 struct conn_key parent_key; /* Only used for orig_tuple support. */
 struct ovs_list exp_node;
 struct cmap_node cm_node;
-struct nat_action_info_t *nat_info;
+uint16_t nat_action;
 char *alg;
 struct conn *nat_conn; /* The NAT 'conn' context, if there is one. */
 
diff --git a/lib/conntrack.c b/lib/conntrack.c
index 551c206..33a1a92 100644
--- a/lib/conntrack.c
+++ b/lib/conntrack.c
@@ -111,7 +111,8 @@ static void *clean_thread_main(void *f_);
 
 static bool
 nat_get_unique_tuple(struct conntrack *ct, const struct conn *conn,
- struct conn *nat_conn);
+ struct conn *nat_conn,
+ const struct nat_action_info_t *nat_info);
 
 static uint8_t
 reverse_icmp_type(uint8_t type);
@@ -724,7 +725,7 @@ handle_alg_ctl(struct conntrack *ct, const struct 
conn_lookup_ctx *ctx,
 static void
 pat_packet(struct dp_packet *pkt, const struct conn *conn)
 {
-if (conn->nat_info->nat_action & NAT_ACTION_SRC) {
+if (conn->nat_action & NAT_ACTION_SRC) {
 if (conn->key.nw_proto == IPPROTO_TCP) {
 struct tcp_header *th = dp_packet_l4(pkt);
 packet_set_tcp_port(pkt, conn->rev_key.dst.port, th->tcp_dst);
@@ -732,7 +733,7 @@ pat_packet(struct dp_packet *pkt, const struct conn *conn)
 struct udp_header *uh = dp_packet_l4(pkt);
 packet_set_udp_port(pkt, conn->rev_key.dst.port, uh->udp_dst);
 }
-} else if (conn->nat_info->nat_action & NAT_ACTION_DST) {
+} else if (conn->nat_action & NAT_ACTION_DST) {
 if (conn->key.nw_proto == IPPROTO_TCP) {
 packet_set_tcp_port(pkt, conn->rev_key.dst.port,
 conn->rev_key.src.port);
@@ -746,7 +747,7 @@ pat_packet(struct dp_packet *pkt, const struct conn *conn)
 static void
 nat_packet(struct dp_packet *pkt, const struct conn *conn, bool related)
 {
-if (conn->nat_info->nat_action & NAT_ACTION_SRC) {
+if (conn->nat_action & NAT_ACTION_SRC) {
 pkt->md.ct_state |= CS_SRC_NAT;
 if (conn->key.dl_type == htons(ETH_TYPE_IP)) {
 struct ip_header *nh = dp_packet_l3(pkt);
@@ -761,7 +762,7 @@ nat_packet(struct dp_packet *pkt, const struct conn *conn, 
bool related)
 if (!related) {
 pat_packet(pkt, conn);
 }
-} else if (conn->nat_info->nat_action & NAT_ACTION_DST) {
+} else if (conn->nat_action & NAT_ACTION_DST) {
 pkt->md.ct_state |= CS_DST_NAT;
 if (conn->key.dl_type == htons(ETH_TYPE_IP)) {
 struct ip_header *nh = dp_packet_l3(pkt);
@@ -782,7 +783,7 @@ nat_packet(struct dp_packet *pkt, const struct conn *conn, 
bool related)
 static void
 un_pat_packet(struct dp_packet *pkt, const struct conn *conn)
 {
-if (conn->nat_info->nat_action & NAT_ACTION_SRC) {
+if (conn->nat_action & NAT_ACTION_SRC) {
 if (conn->key.nw_proto == IPPROTO_TCP) {
 struct tcp_header *th = dp_packet_l4(pkt);
 packet_set_tcp_port(pkt, th->tcp_src, conn->key.src.port);
@@ -790,7 +791,7 @@ un_pat_packet(struct dp_packet *pkt, const struct conn 
*conn)
 struct udp_header *uh = dp_packet_l4(pkt);
 packet_set_udp_port(pkt, uh->udp_src, conn->key.src.port);
 }
-} else if (conn->nat_info->nat_action & NAT_ACTION_DST) {
+} else if (conn->nat_action & NAT_ACTION_DST) {
 if (conn->key.nw_proto == IPPROTO_TCP) {
 packet_set_tcp_port(pkt, conn->key.dst.port, conn->key.src.port);
 } else if (conn->key.nw_proto == IPPROTO_UDP) {
@@ -802,7 +803,7 @@ un_pat_packet(struct dp_packet *pkt, const struct conn 
*conn)
 static void
 reverse_pat_packet(struct dp_packet *pkt, const struct conn *conn)
 {
-if (conn->nat_info->nat_action & NAT_ACTION_SRC) {
+if (conn->nat_action & NAT_ACTION_SRC) {
 if (conn->key.nw_proto == IPPROTO_TCP) {
 struct tcp_header *th_in = dp_packet_l4(pkt);
 packet_set_tcp_port(pkt, conn->key.src.port,
@@ -812,7 +813,7 @@ reverse_pat_packet(struct dp_packet *pkt, const struct conn 
*conn)
 packet_set_udp_port(pkt, conn->key.src.port,
 uh_in->udp_dst);
 }
-} else if (conn->nat_info->nat_action & NAT_ACTION_DST) {
+} else if (conn->nat_action & NAT_ACTION_DST) {
 if (conn->key.nw_

Re: [ovs-dev] [RFC PATCH ovn 0/4] Use Distributed Gateway Port for ovn-controller scalability.

2021-08-04 Thread Han Zhou
On Wed, Aug 4, 2021 at 3:48 AM Krzysztof Klimonda <
kklimo...@syntaxhighlighted.com> wrote:
>
> Hi Han,
>
> On Tue, Aug 3, 2021, at 20:33, Han Zhou wrote:
> > On Tue, Aug 3, 2021 at 11:09 AM Numan Siddique  wrote:
> > >
> > > On Fri, Jul 30, 2021 at 3:22 AM Han Zhou  wrote:
> > > >
> > > > Note: This patch series is on top of a pending patch that is still
under
> > > > review:
> >
http://patchwork.ozlabs.org/project/ovn/patch/20210729080218.1235041-1-hz...@ovn.org/
> > > >
> > > > It is RFC because: a) it is based on the unmerged patch. b) DDlog
> > > > changes are not done yet. Below is a copy of the commit message of
the
> > last
> > > > patch in this series:
> > > >
> > > > For a fully distributed virtual network dataplane, ovn-controller
> > > > flood-fills datapaths that are connected through patch ports. This
> > > > creates scale problems in ovn-controller when the connected
datapaths
> > > > are too many.
> > > >
> > > > In a particular situation, when distributed gateway ports are used
to
> > > > connect logical routers to logical switches, when there is no need
for
> > > > distributed processing of those gateway ports (e.g. no dnat_and_snat
> > > > configured), the datapaths on the other side of the gateway ports
are
> > > > not needed locally on the current chassis. This patch avoids pulling
> > > > those datapaths to local in those scenarios.
> > > >
> > > > There are two scenarios that can greatly benefit from this
optimization.
> > > >
> > > > 1) When there are multiple tenants, each has its own logical
topology,
> > > >but sharing the same external/provider networks, connected to
their
> > > >own logical routers with DGPs. Without this optimization, each
> > > >ovn-controller would process all logical topology of all tenants
and
> > > >program flows for all of them, even if there are only workloads
of a
> > > >very few number of tenants on the node where the ovn-controller
is
> > > >running, because the shared external network connects all
tenants.
> > > >With this change, only the logical topologies relevant to the
node
> > > >are processed and programmed on the node.
> > > >
> > > > 2) In some deployments, such as ovn-kubernetes, logical switches are
> > > >bound to chassises instead of distributed, because each chassis
is
> > > >assigned dedicated subnets. With the current implementation,
> > > >ovn-controller on each node processes all logical switches and
all
> > > >ports on them, without knowing that they are not distributed at
all.
> > > >At large scale with N nodes (N = hundreds or even more), there
are
> > > >roughly N times processing power wasted for the logical
connectivity
> > > >related flows. With this change, those depolyments can utilize
DGP
> > > >to connect the node level logical switches to distributed
router(s),
> > > >with gateway chassis (or HA chassis without really HA) of the DGP
> > > >set to the chassis where the logical switch is bound. This
inherently
> > > >tells OVN the mapping between logical switch and chassis, and
> > > >ovn-controller would smartly avoid processing topologies of other
> > node
> > > >level logical switches, which would hugely save compute cost of
each
> > > >ovn-controller.
> > > >
> > > > For 2), test result for an ovn-kubernetes alike deployment shows
> > > > signficant improvement of ovn-controller, both CPU (>90% reduced)
and
> > memory.
> > > >
> > > > Topology:
> > > >
> > > > - 1000 nodes, 1 LS with 10 LSPs per node, connected to a distributed
> > > >   router.
> > > >
> > > > - 2 large port-groups PG1 and PG2, each with 2000 LSPs
> > > >
> > > > - 10 stateful ACLs: 5 from PG1 to PG2, 5 from PG2 to PG1
> > > >
> > > > - 1 GR per node, connected to the distributed router through a join
> > > >   switch. Each GR also connects to an external logical switch per
node.
> > > >   (This part is to keep the test environment close to a real
> > > >ovn-kubernetes setup but shouldn't make much difference for the
> > > >comparison)
> > > >
> > > >  Before the change 
> > > > OVS flows per node: 297408
> > > > ovn-controller memory: 772696 KB
> > > > ovn-controller recompute: 13s
> > > > ovn-controller restart (recompute + reinstall OVS flows): 63s
> > > >
> > > >  After the change (also use DGP to connect node level LSes) 
> > > > OVS flows per node: 81139 (~70% reduced)
> > > > ovn-controller memory: 163464 KB (~80% reduced)
> > > > ovn-controller recompute: 0.86s (>90% reduced)
> > > > ovn-controller restart (recompute + reinstall OVS flows): 5s (>90%
> > reduced)
> > >
> > > Hi Han,
> > >
> > > Thanks for these RFC patches.  The improvements are significant.
> > > That's awesome.
> > >
> > > If I understand this RFC correctly, ovn-k8s will set the
> > > gateway_chassis for each logical
> > > router port of the cluster router (ovn_cluster_router) connecting to
> > > the node logical switch right ?
> > >
> > > If so, instead of using t

Re: [ovs-dev] [PATCH ovn] northd: Fix routing loop in LRs with one-to-many SNAT

2021-08-04 Thread Han Zhou
On Mon, Mar 22, 2021 at 2:16 AM Krzysztof Klimonda <
kklimo...@syntaxhighlighted.com> wrote:
>
> If there are snat entries on the router, and some logical_ip are set to
> network instead of an IP address then given SNAT is masquerade. In such
> case ct_snat action is used in lr_in_unsnat table to ensure that the
> packet is matched against conntrack and destination IP is replaced with
> one from matching conntrack entry.
>
> This however breaks down for new connections sent to router's external IP
> address. In such case, when packet is checked against conntrack table,
> there is no match, and its destination IP remains unchanged. This causes a
> loop in lr_in_ip_routing.
>
> This commit installs a new logical flow in lr_in_ip_routing table for
> routers that have SNAT entry with logical_ip set to network (that being
> masquerade). This flow drops packets that, after going through conntrack
> via ct_snat action in lr_in_unsnat table, are not in established or
> related state (!ct.est && !ct.rel) and which destination IP still matches
> router's external IP. This prevents vswitchd from looping such packets
> until their TTL reaches zero, as well as installing bogus flows in
> datapath that lead to ovs module dropping such packages with "deferred
> action limit reached, drop recirc action" message.
>
> Signed-off-by: Krzysztof Klimonda 

Thanks for the patch. As mentioned in the discussion ML could you post more
details on how the loop happens?
Please see some more comments below.

> ---
>  northd/ovn-northd.c |  57 +++
>  tests/ovn.at|  45 ++
>  tests/system-ovn.at | 108 
>  3 files changed, 210 insertions(+)
>
> diff --git a/northd/ovn-northd.c b/northd/ovn-northd.c
> index 4783e43d7..ea7db3d47 100644
> --- a/northd/ovn-northd.c
> +++ b/northd/ovn-northd.c
> @@ -11304,6 +11304,7 @@ build_lrouter_nat_defrag_and_lb(struct
ovn_datapath *od,
>  ovs_be32 ip, mask;
>  struct in6_addr ipv6, mask_v6, v6_exact = IN6ADDR_EXACT_INIT;
>  bool is_v6 = false;
> +bool is_masquerade = false;
>  bool stateless = lrouter_nat_is_stateless(nat);
>  struct nbrec_address_set *allowed_ext_ips =
>nat->allowed_ext_ips;
> @@ -11343,9 +11344,15 @@ build_lrouter_nat_defrag_and_lb(struct
ovn_datapath *od,
>  if (is_v6) {
>  error = ipv6_parse_masked(nat->logical_ip, &ipv6, &mask_v6);
>  cidr_bits = ipv6_count_cidr_bits(&mask_v6);
> +if (cidr_bits < 128) {
> +is_masquerade = true;
> +}
>  } else {
>  error = ip_parse_masked(nat->logical_ip, &ip, &mask);
>  cidr_bits = ip_count_cidr_bits(mask);
> +if (cidr_bits < 32) {
> +is_masquerade = 32;

Typo? s/32/true

> +}
>  }
>  if (!strcmp(nat->type, "snat")) {
>  if (error) {
> @@ -11396,6 +11403,56 @@ build_lrouter_nat_defrag_and_lb(struct
ovn_datapath *od,
>  build_lrouter_in_dnat_flow(lflows, od, nat, match, actions,
distributed,
> mask, is_v6);
>
> +/* When router have SNAT enabled, and logical_ip is a network
(router
> + * is doing masquerade), then we need to make sure that packets
> + * unrelated to any established connection that still have
router's
> + * external IP as a next hop after going through lr_in_unsnat
table
> + * are dropped properly. Otherwise such packets will loop around
> + * between tables until their ttl reaches zero - this
additionally
> + * causes kernel module to drop such packages due to
recirculation
> + * limit being exceeded.
> + *
> + * Install a logical flow in lr_in_ip_routing table that will
> + * match packet with router's external IP that have no related
> + * conntrack entries and drop them. Flow priority must be higher
> + * than any other flow in lr_in_ip_routing that matches router's
> + * external IP.
> + *
> + * The priority for destination routes is calculated as
> + * (prefix length * 2) + 1, and there is an additional flow
> + * for when BFD is in play with priority + 1. Set priority that
> + * is higher than any other potential routing flow for that
> + * network, that is (prefix length * 2) + offset, where offset
> + * is 1 (dst) + 1 (bfd) + 1. */
> +if (is_masquerade) {
> +uint16_t priority, prefix_length, offset;
> +
> +if (is_v6) {
> +prefix_length = 128;
> +} else {
> +prefix_length = 32;
> +}
> +offset = 3;
> +priority = (prefix_length * 2) + offset;
> +
> +ds_clear(match);
> +
> +if (is_v6) {
> +ds_put_format(match,
> +   

Re: [ovs-dev] [PATCH v4 2/2] Match: Do not print "igmp" match keyword

2021-08-04 Thread Ilya Maximets
On 7/23/21 4:58 PM, Salvatore Daniele wrote:
> From: Adrian Moreno 
> 
> The match keyword "igmp" is not supported in ofp-parse, which means
> that flow dumps cannot be restored. Previously a workaround was
> added to ovs-save to avoid changing output in stable branches.
> 
> This patch changes the output to print igmp match in the accepted
> ofp-parse format (ip,nw_proto=2). Tests are added, and NEWS is
> updated to reflect this change.
> 
> The workaround in ovs-save is still included to ensure that flows
> can be restored when upgrading an older ovs-vswitchd. This workaround
> should be removed in later versions.
> 
> Signed-off-by: Adrian Moreno 
> Signed-off-by: Salvatore Daniele 
> Co-authored-by: Salvatore Daniele 
> Acked-by: Flavio Leitner 
> ---
>  NEWS   | 2 ++
>  lib/match.c| 2 --
>  tests/ovs-ofctl.at | 6 ++
>  3 files changed, 8 insertions(+), 2 deletions(-)
> 
> diff --git a/NEWS b/NEWS
> index 75045b67d..3d63fd136 100644
> --- a/NEWS
> +++ b/NEWS
> @@ -90,6 +90,8 @@ v2.16.0 - xx xxx 
> space in order to resolve a number of issues with per-vport dispatch.
>   * New vswitchd unixctl command `dpif-netlink/dispatch-mode` will return
> the current dispatch mode for each datapath.
> +   - ovs-ofctl dump-flows no longer prints "igmp". Instead the flag
> +   "ip,nw_proto=2" is used.
>  
>  
>  v2.15.0 - 15 Feb 2021
> diff --git a/lib/match.c b/lib/match.c
> index ba716579d..4a0778c30 100644
> --- a/lib/match.c
> +++ b/lib/match.c
> @@ -1556,8 +1556,6 @@ match_format(const struct match *match,
>  skip_proto = true;
>  if (f->nw_proto == IPPROTO_ICMP) {
>  ds_put_format(s, "%sicmp%s,", colors.value, colors.end);
> -} else if (f->nw_proto == IPPROTO_IGMP) {
> -ds_put_format(s, "%sigmp%s,", colors.value, colors.end);
>  } else if (f->nw_proto == IPPROTO_TCP) {
>  ds_put_format(s, "%stcp%s,", colors.value, colors.end);
>  } else if (f->nw_proto == IPPROTO_UDP) {

Same question here.  This function prints 'igmp_type' and 'igmp_code' as well.
And I don't think that OVS is able to parse them.  Moreover, printing them
and not printing the 'igmp' type doesn't make much sense.  So, the printing
branch should, probably fall to the default case and print generic 'tp_src'
and 'tp_dst' instead.  Will that work?

Best regards, Ilya Maximets.
___
dev mailing list
d...@openvswitch.org
https://mail.openvswitch.org/mailman/listinfo/ovs-dev


Re: [ovs-dev] [PATCH v4 1/2] ovs-save: Save igmp flows in ofp_parse syntax

2021-08-04 Thread Ilya Maximets
On 7/23/21 4:58 PM, Salvatore Daniele wrote:
> match.c generates the keyword "igmp", which is not supported in ofp-parse.
> This means that flow dumps containing 'igmp' can not be restored.
> 
> Removing the 'igmp' keyword entirely could break existing scripts in stable
> branches, so this patch creates a workaround within ovs-save by converting any
> instances of "igmp" within $bridge.flows.dump into "ip, nw_proto=2".
> 
> Signed-off-by: Salvatore Daniele 
> Acked-by: Aaron Conole 
> ---
>  utilities/ovs-save | 3 ++-
>  1 file changed, 2 insertions(+), 1 deletion(-)
> 
> diff --git a/utilities/ovs-save b/utilities/ovs-save
> index 27ce3a9aa..23cb0d9d9 100755
> --- a/utilities/ovs-save
> +++ b/utilities/ovs-save
> @@ -150,7 +150,8 @@ save_flows () {
>  ovs-ofctl -O $ofp_version dump-flows --no-names --no-stats "$bridge" 
> | \
>  sed -e '/NXST_FLOW/d' \
>  -e '/OFPST_FLOW/d' \
> --e 's/\(idle\|hard\)_age=[^,]*,//g' > \
> +-e 's/\(idle\|hard\)_age=[^,]*,//g' \
> +-e 's/igmp/ip,nw_proto=2/g' > \
>  "$workdir/$bridge.flows.dump"
>  done
>  echo "rm -rf \"$workdir\""
> 

Hmm.  What about 'igmp_type' and 'igmp_code'?  I don't think that OVS
will be able to parse them.  Should we replace them with generic
'tp_src' and 'tp_dst' ?  Will that work?

Best regards, Ilya Maximets.
___
dev mailing list
d...@openvswitch.org
https://mail.openvswitch.org/mailman/listinfo/ovs-dev


[ovs-dev] [PATCH v2] conntrack: Extract l4 information for SCTP.

2021-08-04 Thread Paolo Valerio
since a27d70a89 ("conntrack: add generic IP protocol support") all
the unrecognized IP protocols get handled using ct_proto_other ops
and are managed as L3 using 3 tuples.

This patch stores L4 information for SCTP in the conn_key so that
multiple conn instances, instead of one with ports zeroed, will be
created when there are multiple SCTP connections between two hosts.
It also performs crc32c check when not offloaded, and adds SCTP to
pat_enabled.

With this patch, given two SCTP association between two hosts, and
given for example the following rules:

in_port=tap0,ip,action=ct(commit,zone=1,nat(src=10.1.1.240:12345-12346)),tap1
in_port=tap1,ct_state=-trk,ip,action=ct(table=0,zone=1,nat)
in_port=tap1,ct_state=+trk,ct_zone=1,ip,action=tap0

the following entries will be created:

sctp,orig=(src=192.168.100.100,dst=10.1.1.1,sport=55884,dport=5201),reply=(src=10.1.1.1,dst=10.1.1.240,sport=5201,dport=12345),zone=1
sctp,orig=(src=192.168.100.100,dst=10.1.1.1,sport=59874,dport=5202),reply=(src=10.1.1.1,dst=10.1.1.240,sport=5202,dport=12346),zone=1

instead of:

sctp,orig=(src=192.168.100.100,dst=10.1.1.1,sport=0,dport=0),reply=(src=10.1.1.1,dst=10.1.1.240,sport=0,dport=0),zone=1

Signed-off-by: Paolo Valerio 
Acked-by: Gaetan Rivet 

---
v2:
- ordered includes
- while at it, slightly modified the commit subject (capital letter
  and period)
---
 lib/conntrack.c  |   94 ++
 lib/packets.h|   18 +++
 tests/system-kmod-macros.at  |   11 
 tests/system-traffic.at  |   80 
 tests/system-userspace-macros.at |7 +++
 5 files changed, 209 insertions(+), 1 deletion(-)

diff --git a/lib/conntrack.c b/lib/conntrack.c
index 551c2061a..31df0d941 100644
--- a/lib/conntrack.c
+++ b/lib/conntrack.c
@@ -27,6 +27,7 @@
 #include "conntrack-private.h"
 #include "conntrack-tp.h"
 #include "coverage.h"
+#include "crc32c.h"
 #include "csum.h"
 #include "ct-dpif.h"
 #include "dp-packet.h"
@@ -40,6 +41,7 @@
 #include "openvswitch/poll-loop.h"
 #include "random.h"
 #include "timeval.h"
+#include "unaligned.h"
 
 VLOG_DEFINE_THIS_MODULE(conntrack);
 
@@ -731,6 +733,9 @@ pat_packet(struct dp_packet *pkt, const struct conn *conn)
 } else if (conn->key.nw_proto == IPPROTO_UDP) {
 struct udp_header *uh = dp_packet_l4(pkt);
 packet_set_udp_port(pkt, conn->rev_key.dst.port, uh->udp_dst);
+} else if (conn->key.nw_proto == IPPROTO_SCTP) {
+struct sctp_header *sh = dp_packet_l4(pkt);
+packet_set_sctp_port(pkt, conn->rev_key.dst.port, sh->sctp_dst);
 }
 } else if (conn->nat_info->nat_action & NAT_ACTION_DST) {
 if (conn->key.nw_proto == IPPROTO_TCP) {
@@ -739,6 +744,9 @@ pat_packet(struct dp_packet *pkt, const struct conn *conn)
 } else if (conn->key.nw_proto == IPPROTO_UDP) {
 packet_set_udp_port(pkt, conn->rev_key.dst.port,
 conn->rev_key.src.port);
+} else if (conn->key.nw_proto == IPPROTO_SCTP) {
+packet_set_sctp_port(pkt, conn->rev_key.dst.port,
+ conn->rev_key.src.port);
 }
 }
 }
@@ -789,12 +797,17 @@ un_pat_packet(struct dp_packet *pkt, const struct conn 
*conn)
 } else if (conn->key.nw_proto == IPPROTO_UDP) {
 struct udp_header *uh = dp_packet_l4(pkt);
 packet_set_udp_port(pkt, uh->udp_src, conn->key.src.port);
+} else if (conn->key.nw_proto == IPPROTO_SCTP) {
+struct sctp_header *sh = dp_packet_l4(pkt);
+packet_set_sctp_port(pkt, sh->sctp_src, conn->key.src.port);
 }
 } else if (conn->nat_info->nat_action & NAT_ACTION_DST) {
 if (conn->key.nw_proto == IPPROTO_TCP) {
 packet_set_tcp_port(pkt, conn->key.dst.port, conn->key.src.port);
 } else if (conn->key.nw_proto == IPPROTO_UDP) {
 packet_set_udp_port(pkt, conn->key.dst.port, conn->key.src.port);
+} else if (conn->key.nw_proto == IPPROTO_SCTP) {
+packet_set_sctp_port(pkt, conn->key.dst.port, conn->key.src.port);
 }
 }
 }
@@ -811,6 +824,10 @@ reverse_pat_packet(struct dp_packet *pkt, const struct 
conn *conn)
 struct udp_header *uh_in = dp_packet_l4(pkt);
 packet_set_udp_port(pkt, conn->key.src.port,
 uh_in->udp_dst);
+} else if (conn->key.nw_proto == IPPROTO_SCTP) {
+struct sctp_header *sh_in = dp_packet_l4(pkt);
+packet_set_sctp_port(pkt, conn->key.src.port,
+ sh_in->sctp_dst);
 }
 } else if (conn->nat_info->nat_action & NAT_ACTION_DST) {
 if (conn->key.nw_proto == IPPROTO_TCP) {
@@ -819,6 +836,9 @@ reverse_pat_packet(struct dp_packet *pkt, const struct conn 
*conn)
 } else if (conn->key.nw_proto == IPPROTO_UDP) {
 packet_set_udp_port(pkt, conn->key.src.port,
 

Re: [ovs-dev] [PATCH v5 2/2] Minimize the number of time calls in time_poll()

2021-08-04 Thread Anton Ivanov

On 04/08/2021 15:45, Gaëtan Rivet wrote:

On Wed, Aug 4, 2021, at 10:34, anton.iva...@cambridgegreys.com wrote:

From: Anton Ivanov 

time_poll() makes an excessive number of time_msec() calls
which incur a performance penalty.

1. Avoid time_msec() call for timeout calculation when time_poll()
is asked to skip poll()

2. Reuse the time_msec() result from deadline calculation for
last_wakeup and timeout calculation.

Signed-off-by: Anton Ivanov 

To conclude our previous thread, I don't disagree that
reducing time_msec() calls is interesting. Coherency traffic can take
a toll on some architectures, and this code is used by all cores,
even fastpath ones for dpif-netdev.

That being said, there is still the 'else { retval = 0; }' from the previous
version.


Sorry, missed that.




---
  lib/timeval.c | 12 +++-
  1 file changed, 7 insertions(+), 5 deletions(-)

diff --git a/lib/timeval.c b/lib/timeval.c
index dbfbcb9e6..45a934513 100644
--- a/lib/timeval.c
+++ b/lib/timeval.c
@@ -287,7 +287,7 @@ time_poll(struct pollfd *pollfds, int n_pollfds,
HANDLE *handles OVS_UNUSED,
long long int timeout_when, int *elapsed)
  {
  long long int *last_wakeup = last_wakeup_get();
-long long int start;
+long long int start, now;
  bool quiescent;
  int retval = 0;
  
@@ -297,13 +297,12 @@ time_poll(struct pollfd *pollfds, int n_pollfds,

HANDLE *handles OVS_UNUSED,
  if (*last_wakeup && !thread_is_pmd()) {
  log_poll_interval(*last_wakeup);
  }
-start = time_msec();
+now = start = time_msec();
  
  timeout_when = MIN(timeout_when, deadline);

  quiescent = ovsrcu_is_quiescent();
  
  for (;;) {

-long long int now = time_msec();
  int time_left;
  retval = 0;
  
@@ -331,6 +330,8 @@ time_poll(struct pollfd *pollfds, int n_pollfds,

HANDLE *handles OVS_UNUSED,
   */
  if (timeout_when != LLONG_MIN) {
  retval = poll(pollfds, n_pollfds, time_left);
+} else {
+retval = 0;
  }
  if (retval < 0) {
  retval = -errno;
@@ -357,7 +358,8 @@ time_poll(struct pollfd *pollfds, int n_pollfds,
HANDLE *handles OVS_UNUSED,
  ovsrcu_quiesce_end();
  }
  
-if (deadline <= time_msec()) {

+now = time_msec();
+if (deadline <= now) {
  #ifndef _WIN32
  fatal_signal_handler(SIGALRM);
  #else
@@ -374,7 +376,7 @@ time_poll(struct pollfd *pollfds, int n_pollfds,
HANDLE *handles OVS_UNUSED,
  break;
  }
  }
-*last_wakeup = time_msec();
+*last_wakeup = now;
  refresh_rusage();
  *elapsed = *last_wakeup - start;
  return retval;
--
2.20.1

___
dev mailing list
d...@openvswitch.org
https://mail.openvswitch.org/mailman/listinfo/ovs-dev





--
Anton R. Ivanov
Cambridgegreys Limited. Registered in England. Company Number 10273661
https://www.cambridgegreys.com/

___
dev mailing list
d...@openvswitch.org
https://mail.openvswitch.org/mailman/listinfo/ovs-dev


Re: [ovs-dev] [PATCH v5 2/2] Minimize the number of time calls in time_poll()

2021-08-04 Thread Gaëtan Rivet
On Wed, Aug 4, 2021, at 10:34, anton.iva...@cambridgegreys.com wrote:
> From: Anton Ivanov 
> 
> time_poll() makes an excessive number of time_msec() calls
> which incur a performance penalty.
> 
> 1. Avoid time_msec() call for timeout calculation when time_poll()
> is asked to skip poll()
> 
> 2. Reuse the time_msec() result from deadline calculation for
> last_wakeup and timeout calculation.
> 
> Signed-off-by: Anton Ivanov 

To conclude our previous thread, I don't disagree that
reducing time_msec() calls is interesting. Coherency traffic can take
a toll on some architectures, and this code is used by all cores,
even fastpath ones for dpif-netdev.

That being said, there is still the 'else { retval = 0; }' from the previous
version.

> ---
>  lib/timeval.c | 12 +++-
>  1 file changed, 7 insertions(+), 5 deletions(-)
> 
> diff --git a/lib/timeval.c b/lib/timeval.c
> index dbfbcb9e6..45a934513 100644
> --- a/lib/timeval.c
> +++ b/lib/timeval.c
> @@ -287,7 +287,7 @@ time_poll(struct pollfd *pollfds, int n_pollfds, 
> HANDLE *handles OVS_UNUSED,
>long long int timeout_when, int *elapsed)
>  {
>  long long int *last_wakeup = last_wakeup_get();
> -long long int start;
> +long long int start, now;
>  bool quiescent;
>  int retval = 0;
>  
> @@ -297,13 +297,12 @@ time_poll(struct pollfd *pollfds, int n_pollfds, 
> HANDLE *handles OVS_UNUSED,
>  if (*last_wakeup && !thread_is_pmd()) {
>  log_poll_interval(*last_wakeup);
>  }
> -start = time_msec();
> +now = start = time_msec();
>  
>  timeout_when = MIN(timeout_when, deadline);
>  quiescent = ovsrcu_is_quiescent();
>  
>  for (;;) {
> -long long int now = time_msec();
>  int time_left;
>  retval = 0;
>  
> @@ -331,6 +330,8 @@ time_poll(struct pollfd *pollfds, int n_pollfds, 
> HANDLE *handles OVS_UNUSED,
>   */
>  if (timeout_when != LLONG_MIN) {
>  retval = poll(pollfds, n_pollfds, time_left);
> +} else {
> +retval = 0;
>  }
>  if (retval < 0) {
>  retval = -errno;
> @@ -357,7 +358,8 @@ time_poll(struct pollfd *pollfds, int n_pollfds, 
> HANDLE *handles OVS_UNUSED,
>  ovsrcu_quiesce_end();
>  }
>  
> -if (deadline <= time_msec()) {
> +now = time_msec();
> +if (deadline <= now) {
>  #ifndef _WIN32
>  fatal_signal_handler(SIGALRM);
>  #else
> @@ -374,7 +376,7 @@ time_poll(struct pollfd *pollfds, int n_pollfds, 
> HANDLE *handles OVS_UNUSED,
>  break;
>  }
>  }
> -*last_wakeup = time_msec();
> +*last_wakeup = now;
>  refresh_rusage();
>  *elapsed = *last_wakeup - start;
>  return retval;
> -- 
> 2.20.1
> 
> ___
> dev mailing list
> d...@openvswitch.org
> https://mail.openvswitch.org/mailman/listinfo/ovs-dev
> 


-- 
Gaetan Rivet
___
dev mailing list
d...@openvswitch.org
https://mail.openvswitch.org/mailman/listinfo/ovs-dev


Re: [ovs-dev] [PATCH v5 1/2] Optimize the poll loop for poll_immediate_wake()

2021-08-04 Thread Gaëtan Rivet
On Wed, Aug 4, 2021, at 10:34, anton.iva...@cambridgegreys.com wrote:
> From: Anton Ivanov 
> 
> If we are not obtaining any useful information out of the poll(),
> such as is a fd busy or not, we do not need to do a poll() if
> an immediate_wake() has been requested.
> 
> This cuts out all the pollfd hash additions, forming the poll
> arguments and the actual poll() after a call to
> poll_immediate_wake()
> 
> Signed-off-by: Anton Ivanov 

Hello Anton, thanks for the explanation regarding fatal_signal_wait()
and the new version.

Acked-by: Gaetan Rivet 

-- 
Gaetan Rivet
___
dev mailing list
d...@openvswitch.org
https://mail.openvswitch.org/mailman/listinfo/ovs-dev


Re: [ovs-dev] [PATCH ovn v2] ofctrl: Add memory usage statistics.

2021-08-04 Thread Numan Siddique
On Thu, Jul 29, 2021 at 10:52 AM Mark Gray  wrote:
>
> From: Dumitru Ceara 
>
> Track amount of allocated/freed memory for each of the major memory
> consumers in ofctrl.
>
> Co-authored-by: Mark Gray 
> Reported-at: https://bugzilla.redhat.com/1986821
> Signed-off-by: Dumitru Ceara 
> Signed-off-by: Mark Gray 

Thanks.  I applied to the main branch.

Numan

> ---
>  controller/ofctrl.c | 76 +
>  controller/ofctrl.h |  1 +
>  controller/ovn-controller.c |  1 +
>  3 files changed, 78 insertions(+)
>
> diff --git a/controller/ofctrl.c b/controller/ofctrl.c
> index 6d5da60db117..08fcfed8bb21 100644
> --- a/controller/ofctrl.c
> +++ b/controller/ofctrl.c
> @@ -213,6 +213,16 @@ struct installed_flow {
>  struct ovs_list desired_refs;
>  };
>
> +/* Global ofctrl memory usage specific statistics, all in bytes. */
> +struct ofctrl_mem_stats {
> +uint64_t sb_flow_ref_usage;
> +uint64_t desired_flow_usage;
> +uint64_t installed_flow_usage;
> +uint64_t oflow_update_usage;
> +};
> +
> +static struct ofctrl_mem_stats mem_stats;
> +
>  typedef bool
>  (*desired_flow_match_cb)(const struct desired_flow *candidate,
>   const void *arg);
> @@ -223,6 +233,7 @@ static struct desired_flow *desired_flow_alloc(
>  const struct match *match,
>  const struct ofpbuf *actions,
>  uint32_t meter_id);
> +static size_t desired_flow_size(const struct desired_flow *);
>  static struct desired_flow *desired_flow_lookup(
>  struct ovn_desired_flow_table *,
>  const struct ovn_flow *target);
> @@ -239,6 +250,7 @@ static struct installed_flow *installed_flow_lookup(
>  const struct ovn_flow *target, struct hmap *installed_flows);
>  static void installed_flow_destroy(struct installed_flow *);
>  static struct installed_flow *installed_flow_dup(struct desired_flow *);
> +static size_t installed_flow_size(const struct installed_flow *);
>  static struct desired_flow *installed_flow_get_active(struct installed_flow 
> *);
>
>  static uint32_t ovn_flow_match_hash(const struct ovn_flow *);
> @@ -287,6 +299,12 @@ ofctrl_flow_update_from_list_node(const struct ovs_list 
> *list_node)
>  return CONTAINER_OF(list_node, struct ofctrl_flow_update, list_node);
>  }
>
> +static size_t
> +ofctrl_flow_update_size(const struct ofctrl_flow_update *fup)
> +{
> +return sizeof *fup;
> +}
> +
>  /* Currently in-flight updates. */
>  static struct ovs_list flow_updates;
>
> @@ -602,6 +620,7 @@ run_S_CLEAR_FLOWS(void)
>  /* All flow updates are irrelevant now. */
>  struct ofctrl_flow_update *fup, *next;
>  LIST_FOR_EACH_SAFE (fup, next, list_node, &flow_updates) {
> +mem_stats.oflow_update_usage -= ofctrl_flow_update_size(fup);
>  ovs_list_remove(&fup->list_node);
>  free(fup);
>  }
> @@ -643,6 +662,7 @@ recv_S_UPDATE_FLOWS(const struct ofp_header *oh, enum 
> ofptype type,
>  if (fup->req_cfg >= cur_cfg) {
>  cur_cfg = fup->req_cfg;
>  }
> +mem_stats.oflow_update_usage -= ofctrl_flow_update_size(fup);
>  ovs_list_remove(&fup->list_node);
>  free(fup);
>  }
> @@ -980,6 +1000,18 @@ track_or_destroy_for_flow_del(struct 
> ovn_desired_flow_table *flow_table,
>  }
>  }
>
> +static size_t
> +sb_flow_ref_size(const struct sb_flow_ref *sfr)
> +{
> +return sizeof *sfr;
> +}
> +
> +static size_t
> +sb_to_flow_size(const struct sb_to_flow *stf)
> +{
> +return sizeof *stf;
> +}
> +
>  static struct sb_to_flow *
>  sb_to_flow_find(struct hmap *uuid_flow_table, const struct uuid *sb_uuid)
>  {
> @@ -998,6 +1030,7 @@ link_flow_to_sb(struct ovn_desired_flow_table 
> *flow_table,
>  struct desired_flow *f, const struct uuid *sb_uuid)
>  {
>  struct sb_flow_ref *sfr = xmalloc(sizeof *sfr);
> +mem_stats.sb_flow_ref_usage += sb_flow_ref_size(sfr);
>  sfr->flow = f;
>  sfr->sb_uuid = *sb_uuid;
>  ovs_list_insert(&f->references, &sfr->sb_list);
> @@ -1005,6 +1038,7 @@ link_flow_to_sb(struct ovn_desired_flow_table 
> *flow_table,
>   sb_uuid);
>  if (!stf) {
>  stf = xmalloc(sizeof *stf);
> +mem_stats.sb_flow_ref_usage += sb_to_flow_size(stf);
>  stf->sb_uuid = *sb_uuid;
>  ovs_list_init(&stf->flows);
>  hmap_insert(&flow_table->uuid_flow_table, &stf->hmap_node,
> @@ -1108,9 +1142,11 @@ ofctrl_add_or_append_flow(struct 
> ovn_desired_flow_table *desired_flows,
> existing->flow.ofpacts_len);
>  ofpbuf_put(&compound, f->flow.ofpacts, f->flow.ofpacts_len);
>
> +mem_stats.desired_flow_usage -= desired_flow_size(existing);
>  free(existing->flow.ofpacts);
>  existing->flow.ofpacts = xmemdup(compound.data, compound.size);
>  existing->flow.ofpacts_len = compound.size;
> +mem_stats.desired_flow_usage += desired_flow_size(existing);
>
>

[ovs-dev] [PATCH] netdev-offload-dpdk: fix a crash in dump_flow_attr()

2021-08-04 Thread Sriharsha Basavapatna via dev
In netdev_offload_dpdk_flow_create() when an offload request fails,
dump_flow() is called to log a warning message. The 's_tnl' string
in flow_patterns gets initialized in vport_to_rte_tunnel() conditionally
via ds_put_format(). If it is not initialized, it crashes later in
dump_flow_attr()->ds_clone()->memcpy() while dereferencing this string.

Fix this by initializing s_tnl using ds_cstr(). Fix a similar
issue with actions->s_tnl.

Signed-off-by: Sriharsha Basavapatna 
---
 lib/netdev-offload-dpdk.c | 4 
 1 file changed, 4 insertions(+)

diff --git a/lib/netdev-offload-dpdk.c b/lib/netdev-offload-dpdk.c
index f6706ee0c..243e2c430 100644
--- a/lib/netdev-offload-dpdk.c
+++ b/lib/netdev-offload-dpdk.c
@@ -788,6 +788,7 @@ free_flow_patterns(struct flow_patterns *patterns)
 free(CONST_CAST(void *, patterns->items[i].mask));
 }
 }
+ds_destroy(&patterns->s_tnl);
 free(patterns->items);
 patterns->items = NULL;
 patterns->cnt = 0;
@@ -1334,6 +1335,7 @@ netdev_offload_dpdk_mark_rss(struct flow_patterns 
*patterns,
 struct rte_flow_error error;
 struct rte_flow *flow;
 
+ds_cstr(&actions.s_tnl);
 add_flow_mark_rss_actions(&actions, flow_mark, netdev);
 
 flow = netdev_offload_dpdk_flow_create(netdev, &flow_attr, patterns,
@@ -1814,6 +1816,7 @@ netdev_offload_dpdk_actions(struct netdev *netdev,
 struct rte_flow_error error;
 int ret;
 
+ds_cstr(&actions.s_tnl);
 ret = parse_flow_actions(netdev, &actions, nl_actions, actions_len);
 if (ret) {
 goto out;
@@ -1838,6 +1841,7 @@ netdev_offload_dpdk_add_flow(struct netdev *netdev,
 bool actions_offloaded = true;
 struct rte_flow *flow;
 
+ds_cstr(&patterns.s_tnl);
 if (parse_flow_match(netdev, info->orig_in_port, &patterns, match)) {
 VLOG_DBG_RL(&rl, "%s: matches of ufid "UUID_FMT" are not supported",
 netdev_get_name(netdev), UUID_ARGS((struct uuid *) ufid));
-- 
2.30.0.349.g30b29f044a


-- 
This electronic communication and the information and any files transmitted 
with it, or attached to it, are confidential and are intended solely for 
the use of the individual or entity to whom it is addressed and may contain 
information that is confidential, legally privileged, protected by privacy 
laws, or otherwise restricted from disclosure to anyone else. If you are 
not the intended recipient or the person responsible for delivering the 
e-mail to the intended recipient, you are hereby notified that any use, 
copying, distributing, dissemination, forwarding, printing, or copying of 
this e-mail is strictly prohibited. If you received this e-mail in error, 
please return the e-mail to the sender, delete it from your computer, and 
destroy any printed copy of it.
___
dev mailing list
d...@openvswitch.org
https://mail.openvswitch.org/mailman/listinfo/ovs-dev


Re: [ovs-dev] [PATCH] acinclude: Don't set AVX512-related configuration via CFLAGS.

2021-08-04 Thread Ferriter, Cian


> -Original Message-
> From: Ilya Maximets 
> Sent: Tuesday 3 August 2021 18:37
> To: ovs-dev@openvswitch.org; Stokes, Ian 
> Cc: Flavio Leitner ; Amber, Kumar ; 
> Ferriter, Cian
> ; Van Haaren, Harry ; 
> Ilya Maximets
> 
> Subject: [PATCH] acinclude: Don't set AVX512-related configuration via CFLAGS.
> 
> The correct way to pass configuration options is to define them
> inside the config.h.  Additionally, few long lines wrapped and
> fixed the unnecessary double check for -mavx512f.
> 
> Fixes: abb807e27dd4 ("dpif-netdev: Add command to switch dpif 
> implementation.")
> Fixes: 5324b54e606a ("dpif-netdev: Add configure to enable autovalidator at 
> build time.")
> Fixes: e90e115a01af ("dpif-netdev: implement subtable lookup validation.")
> Fixes: 352b6c7116cd ("dpif-lookup: add avx512 gather implementation.")
> Signed-off-by: Ilya Maximets 
> ---
> 
> Only tested that it builds.  Didn't run any runtime checks
> since I have no HW for that.
> 
>  acinclude.m4  | 36 ++--
>  configure.ac  |  4 +---
>  m4/openvswitch.m4 |  5 -
>  3 files changed, 35 insertions(+), 10 deletions(-)
> 



Hi Ilya,

Thanks for the changes, much appreciated.

I've tested out this patch on a system with AVX512 and everything works as 
expected.

I can verify that:
* I see an additional output line in the ./configure step like below:
  checking whether gcc accepts -mavx512f... yes
* When I add " --enable-autovalidator --enable-dpif-default-avx512 
--enable-mfex-default-autovalidator" options to configure, these options still 
work as expected, enabling the respective features at compile time.
* I can see the configuration options in config.h after the patch is applied:
~# diff config.h_before_patch config.h_after_patch
26a27,30
> /* Autovalidator for the userspace datapath classifier is a default
>implementation. */
> #define DPCLS_AUTOVALIDATOR_DEFAULT 1
>
29a34,37
> /* DPIF AVX512 is a default implementation of the userspace datapath
>interface. */
> #define DPIF_AVX512_DEFAULT 1
>
35a44,46
> /* Define to 1 if compiler supports AVX512. */
> #define HAVE_AVX512F 1
>
90a102,104
> /* Define to 1 if binutils correctly supports AVX512. */
> #define HAVE_LD_AVX512_GOOD 1
>
263a278,280
>
> /* Autovalidator for miniflow_extract is a default implementation. */
> #define MFEX_AUTOVALIDATOR_DEFAULT 1

Acked-by: Cian Ferriter 
___
dev mailing list
d...@openvswitch.org
https://mail.openvswitch.org/mailman/listinfo/ovs-dev


Re: [ovs-dev] [RFC PATCH ovn 0/4] Use Distributed Gateway Port for ovn-controller scalability.

2021-08-04 Thread Krzysztof Klimonda
Hi Han,

On Tue, Aug 3, 2021, at 20:33, Han Zhou wrote:
> On Tue, Aug 3, 2021 at 11:09 AM Numan Siddique  wrote:
> >
> > On Fri, Jul 30, 2021 at 3:22 AM Han Zhou  wrote:
> > >
> > > Note: This patch series is on top of a pending patch that is still under
> > > review:
> http://patchwork.ozlabs.org/project/ovn/patch/20210729080218.1235041-1-hz...@ovn.org/
> > >
> > > It is RFC because: a) it is based on the unmerged patch. b) DDlog
> > > changes are not done yet. Below is a copy of the commit message of the
> last
> > > patch in this series:
> > >
> > > For a fully distributed virtual network dataplane, ovn-controller
> > > flood-fills datapaths that are connected through patch ports. This
> > > creates scale problems in ovn-controller when the connected datapaths
> > > are too many.
> > >
> > > In a particular situation, when distributed gateway ports are used to
> > > connect logical routers to logical switches, when there is no need for
> > > distributed processing of those gateway ports (e.g. no dnat_and_snat
> > > configured), the datapaths on the other side of the gateway ports are
> > > not needed locally on the current chassis. This patch avoids pulling
> > > those datapaths to local in those scenarios.
> > >
> > > There are two scenarios that can greatly benefit from this optimization.
> > >
> > > 1) When there are multiple tenants, each has its own logical topology,
> > >but sharing the same external/provider networks, connected to their
> > >own logical routers with DGPs. Without this optimization, each
> > >ovn-controller would process all logical topology of all tenants and
> > >program flows for all of them, even if there are only workloads of a
> > >very few number of tenants on the node where the ovn-controller is
> > >running, because the shared external network connects all tenants.
> > >With this change, only the logical topologies relevant to the node
> > >are processed and programmed on the node.
> > >
> > > 2) In some deployments, such as ovn-kubernetes, logical switches are
> > >bound to chassises instead of distributed, because each chassis is
> > >assigned dedicated subnets. With the current implementation,
> > >ovn-controller on each node processes all logical switches and all
> > >ports on them, without knowing that they are not distributed at all.
> > >At large scale with N nodes (N = hundreds or even more), there are
> > >roughly N times processing power wasted for the logical connectivity
> > >related flows. With this change, those depolyments can utilize DGP
> > >to connect the node level logical switches to distributed router(s),
> > >with gateway chassis (or HA chassis without really HA) of the DGP
> > >set to the chassis where the logical switch is bound. This inherently
> > >tells OVN the mapping between logical switch and chassis, and
> > >ovn-controller would smartly avoid processing topologies of other
> node
> > >level logical switches, which would hugely save compute cost of each
> > >ovn-controller.
> > >
> > > For 2), test result for an ovn-kubernetes alike deployment shows
> > > signficant improvement of ovn-controller, both CPU (>90% reduced) and
> memory.
> > >
> > > Topology:
> > >
> > > - 1000 nodes, 1 LS with 10 LSPs per node, connected to a distributed
> > >   router.
> > >
> > > - 2 large port-groups PG1 and PG2, each with 2000 LSPs
> > >
> > > - 10 stateful ACLs: 5 from PG1 to PG2, 5 from PG2 to PG1
> > >
> > > - 1 GR per node, connected to the distributed router through a join
> > >   switch. Each GR also connects to an external logical switch per node.
> > >   (This part is to keep the test environment close to a real
> > >ovn-kubernetes setup but shouldn't make much difference for the
> > >comparison)
> > >
> > >  Before the change 
> > > OVS flows per node: 297408
> > > ovn-controller memory: 772696 KB
> > > ovn-controller recompute: 13s
> > > ovn-controller restart (recompute + reinstall OVS flows): 63s
> > >
> > >  After the change (also use DGP to connect node level LSes) 
> > > OVS flows per node: 81139 (~70% reduced)
> > > ovn-controller memory: 163464 KB (~80% reduced)
> > > ovn-controller recompute: 0.86s (>90% reduced)
> > > ovn-controller restart (recompute + reinstall OVS flows): 5s (>90%
> reduced)
> >
> > Hi Han,
> >
> > Thanks for these RFC patches.  The improvements are significant.
> > That's awesome.
> >
> > If I understand this RFC correctly, ovn-k8s will set the
> > gateway_chassis for each logical
> > router port of the cluster router (ovn_cluster_router) connecting to
> > the node logical switch right ?
> >
> > If so, instead of using the multiple gw port feature, why can't
> > ovn-k8s just set the chassis=
> > in the logical switch other_config option ?
> >
> > ovn-controllers can exclude the logical switches from the
> > local_datapaths if they don't belong to the local chassis.
> >
> > I'm not entirely sure if this would

Re: [ovs-dev] [PATCH] conntrack: extract l4 information for SCTP

2021-08-04 Thread Paolo Valerio
Hello Gaëtan,

Gaëtan Rivet  writes:

> On Tue, Jul 13, 2021, at 22:59, Paolo Valerio wrote:
>> since a27d70a89 ("conntrack: add generic IP protocol support") all
>> the unrecognized IP protocols get handled using ct_proto_other ops
>> and are managed as L3 using 3 tuples.
>> 
>> This patch stores L4 information for SCTP in the conn_key so that
>> multiple conn instances, instead of one with ports zeroed, will be
>> created when there are multiple SCTP connections between two hosts.
>> It also performs crc32c check when not offloaded, and adds SCTP to
>> pat_enabled.
>> 
>> With this patch, given two SCTP association between two hosts, and
>> given for example the following rules:
>> 
>> in_port=tap0,ip,action=ct(commit,zone=1,nat(src=10.1.1.240:12345-12346)),tap1
>> in_port=tap1,ct_state=-trk,ip,action=ct(table=0,zone=1,nat)
>> in_port=tap1,ct_state=+trk,ct_zone=1,ip,action=tap0
>> 
>> the following entries will be created:
>> 
>> sctp,orig=(src=192.168.100.100,dst=10.1.1.1,sport=55884,dport=5201),reply=(src=10.1.1.1,dst=10.1.1.240,sport=5201,dport=12345),zone=1
>> sctp,orig=(src=192.168.100.100,dst=10.1.1.1,sport=59874,dport=5202),reply=(src=10.1.1.1,dst=10.1.1.240,sport=5202,dport=12346),zone=1
>> 
>> instead of:
>> 
>> sctp,orig=(src=192.168.100.100,dst=10.1.1.1,sport=0,dport=0),reply=(src=10.1.1.1,dst=10.1.1.240,sport=0,dport=0),zone=1
>> 
>> Signed-off-by: Paolo Valerio 
>> ---
>>  lib/conntrack.c  |   94 
>> ++
>>  lib/packets.h|   18 +++
>>  tests/system-kmod-macros.at  |   11 
>>  tests/system-traffic.at  |   80 
>>  tests/system-userspace-macros.at |7 +++
>>  5 files changed, 209 insertions(+), 1 deletion(-)
>> 
>> diff --git a/lib/conntrack.c b/lib/conntrack.c
>> index 551c2061a..9c628c052 100644
>> --- a/lib/conntrack.c
>> +++ b/lib/conntrack.c
>> @@ -28,8 +28,10 @@
>>  #include "conntrack-tp.h"
>>  #include "coverage.h"
>>  #include "csum.h"
>> +#include "crc32c.h"
>>  #include "ct-dpif.h"
>>  #include "dp-packet.h"
>> +#include "unaligned.h"
>
> Hello Paolo,
>
> The code looks good to me and the test runs properly.
>

thanks for the review.

> I have only a small question. The "unaligned.h" include above is
> not sorted alphabetically. Is there a reason for this that forces
> to put it here?
>

not really, most probably I forgot to reorder :)
Thanks for spotting this. I'll send a v2.

> With either a comment explaining this constraint or the include line moved,
> Acked-by: Gaetan Rivet 
>
> Thanks!
> -- 
> Gaetan Rivet

___
dev mailing list
d...@openvswitch.org
https://mail.openvswitch.org/mailman/listinfo/ovs-dev


[ovs-dev] [PATCH ovn] system-test: Fix "2 LSs IGMP and MLD"

2021-08-04 Thread Xavier Simonart
When ADD_NAMESPACES or ADD_VETH were executed with variables
within the arguments (e.g. ADD_NAMESPACES(sw1-p$i)), the macros were
not expanded properly, resulting in bad on_exit calls.
This caused multiples tests (e.g. 2 LSs IGMP and MLD) to be skipped.

Reported-at: https://bugzilla.redhat.com/show_bug.cgi?id=1858221
Signed-off-by: Xavier Simonart 
---
 tests/system-common-macros.at | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/tests/system-common-macros.at b/tests/system-common-macros.at
index b742a2cb9..616a87fcf 100644
--- a/tests/system-common-macros.at
+++ b/tests/system-common-macros.at
@@ -16,7 +16,7 @@ m4_define([ADD_NAMESPACES],
[m4_foreach([ns], [$@],
[DEL_NAMESPACES(ns)
 AT_CHECK([ip netns add ns || return 77])
-on_exit 'DEL_NAMESPACES(ns)'
+on_exit "DEL_NAMESPACES(ns)"
 ip netns exec ns sysctl -w net.netfilter.nf_conntrack_helper=0
])
]
@@ -85,7 +85,7 @@ m4_define([ADD_VETH],
   if test -n "$6"; then
 NS_CHECK_EXEC([$2], [ip route add default via $6])
   fi
-  on_exit 'ip link del ovs-$1'
+  on_exit "ip link del ovs-$1"
 ]
 )
 
-- 
2.31.1

___
dev mailing list
d...@openvswitch.org
https://mail.openvswitch.org/mailman/listinfo/ovs-dev


[ovs-dev] [PATCH v5 2/2] Minimize the number of time calls in time_poll()

2021-08-04 Thread anton . ivanov
From: Anton Ivanov 

time_poll() makes an excessive number of time_msec() calls
which incur a performance penalty.

1. Avoid time_msec() call for timeout calculation when time_poll()
is asked to skip poll()

2. Reuse the time_msec() result from deadline calculation for
last_wakeup and timeout calculation.

Signed-off-by: Anton Ivanov 
---
 lib/timeval.c | 12 +++-
 1 file changed, 7 insertions(+), 5 deletions(-)

diff --git a/lib/timeval.c b/lib/timeval.c
index dbfbcb9e6..45a934513 100644
--- a/lib/timeval.c
+++ b/lib/timeval.c
@@ -287,7 +287,7 @@ time_poll(struct pollfd *pollfds, int n_pollfds, HANDLE 
*handles OVS_UNUSED,
   long long int timeout_when, int *elapsed)
 {
 long long int *last_wakeup = last_wakeup_get();
-long long int start;
+long long int start, now;
 bool quiescent;
 int retval = 0;
 
@@ -297,13 +297,12 @@ time_poll(struct pollfd *pollfds, int n_pollfds, HANDLE 
*handles OVS_UNUSED,
 if (*last_wakeup && !thread_is_pmd()) {
 log_poll_interval(*last_wakeup);
 }
-start = time_msec();
+now = start = time_msec();
 
 timeout_when = MIN(timeout_when, deadline);
 quiescent = ovsrcu_is_quiescent();
 
 for (;;) {
-long long int now = time_msec();
 int time_left;
 retval = 0;
 
@@ -331,6 +330,8 @@ time_poll(struct pollfd *pollfds, int n_pollfds, HANDLE 
*handles OVS_UNUSED,
  */
 if (timeout_when != LLONG_MIN) {
 retval = poll(pollfds, n_pollfds, time_left);
+} else {
+retval = 0;
 }
 if (retval < 0) {
 retval = -errno;
@@ -357,7 +358,8 @@ time_poll(struct pollfd *pollfds, int n_pollfds, HANDLE 
*handles OVS_UNUSED,
 ovsrcu_quiesce_end();
 }
 
-if (deadline <= time_msec()) {
+now = time_msec();
+if (deadline <= now) {
 #ifndef _WIN32
 fatal_signal_handler(SIGALRM);
 #else
@@ -374,7 +376,7 @@ time_poll(struct pollfd *pollfds, int n_pollfds, HANDLE 
*handles OVS_UNUSED,
 break;
 }
 }
-*last_wakeup = time_msec();
+*last_wakeup = now;
 refresh_rusage();
 *elapsed = *last_wakeup - start;
 return retval;
-- 
2.20.1

___
dev mailing list
d...@openvswitch.org
https://mail.openvswitch.org/mailman/listinfo/ovs-dev


[ovs-dev] [PATCH v5 1/2] Optimize the poll loop for poll_immediate_wake()

2021-08-04 Thread anton . ivanov
From: Anton Ivanov 

If we are not obtaining any useful information out of the poll(),
such as is a fd busy or not, we do not need to do a poll() if
an immediate_wake() has been requested.

This cuts out all the pollfd hash additions, forming the poll
arguments and the actual poll() after a call to
poll_immediate_wake()

Signed-off-by: Anton Ivanov 
---
 lib/poll-loop.c | 82 +
 lib/timeval.c   | 15 +++--
 2 files changed, 68 insertions(+), 29 deletions(-)

diff --git a/lib/poll-loop.c b/lib/poll-loop.c
index 4e751ff2c..ce681129c 100644
--- a/lib/poll-loop.c
+++ b/lib/poll-loop.c
@@ -107,6 +107,13 @@ poll_create_node(int fd, HANDLE wevent, short int events, 
const char *where)
 
 COVERAGE_INC(poll_create_node);
 
+if (loop->timeout_when == LLONG_MIN) {
+/* There was a previous request to bail out of this poll loop.
+ * There is no point to engage in yack shaving a poll hmap.
+ */
+return;
+}
+
 /* Both 'fd' and 'wevent' cannot be set. */
 ovs_assert(!fd != !wevent);
 
@@ -181,8 +188,21 @@ poll_wevent_wait_at(HANDLE wevent, const char *where)
 void
 poll_timer_wait_at(long long int msec, const char *where)
 {
-long long int now = time_msec();
+long long int now;
 long long int when;
+struct poll_loop *loop = poll_loop();
+
+/* We have an outstanding request for an immediate wake -
+ * either explicit (from poll_immediate_wake()) or as a
+ * result of a previous timeout calculation which gave
+ * a negative timeout.
+ * No point trying to recalculate the timeout.
+ */
+if (loop->timeout_when == LLONG_MIN) {
+return;
+}
+
+now = time_msec();
 
 if (msec <= 0) {
 /* Wake up immediately. */
@@ -229,7 +249,7 @@ poll_timer_wait_until_at(long long int when, const char 
*where)
 void
 poll_immediate_wake_at(const char *where)
 {
-poll_timer_wait_at(0, where);
+poll_timer_wait_at(LLONG_MIN, where);
 }
 
 /* Logs, if appropriate, that the poll loop was awakened by an event
@@ -320,49 +340,57 @@ poll_block(void)
 {
 struct poll_loop *loop = poll_loop();
 struct poll_node *node;
-struct pollfd *pollfds;
+struct pollfd *pollfds = NULL;
 HANDLE *wevents = NULL;
 int elapsed;
-int retval;
+int retval = 0;
 int i;
 
-/* Register fatal signal events before actually doing any real work for
- * poll_block. */
-fatal_signal_wait();
 
 if (loop->timeout_when == LLONG_MIN) {
 COVERAGE_INC(poll_zero_timeout);
+} else {
+/* Register fatal signal poll events only if we intend to do real
+ * work for poll_block. The fatal signals themselves are handled
+ * in either case by invoking fatal_signal_run() at the end of the
+ * loop.
+ */
+fatal_signal_wait();
 }
 
 timewarp_run();
-pollfds = xmalloc(hmap_count(&loop->poll_nodes) * sizeof *pollfds);
+if (loop->timeout_when != LLONG_MIN) {
+pollfds = xmalloc(hmap_count(&loop->poll_nodes) * sizeof *pollfds);
 
 #ifdef _WIN32
-wevents = xmalloc(hmap_count(&loop->poll_nodes) * sizeof *wevents);
+wevents = xmalloc(hmap_count(&loop->poll_nodes) * sizeof *wevents);
 #endif
 
-/* Populate with all the fds and events. */
-i = 0;
-HMAP_FOR_EACH (node, hmap_node, &loop->poll_nodes) {
-pollfds[i] = node->pollfd;
+/* Populate with all the fds and events. */
+i = 0;
+HMAP_FOR_EACH (node, hmap_node, &loop->poll_nodes) {
+pollfds[i] = node->pollfd;
 #ifdef _WIN32
-wevents[i] = node->wevent;
-if (node->pollfd.fd && node->wevent) {
-short int wsa_events = 0;
-if (node->pollfd.events & POLLIN) {
-wsa_events |= FD_READ | FD_ACCEPT | FD_CLOSE;
+wevents[i] = node->wevent;
+if (node->pollfd.fd && node->wevent) {
+short int wsa_events = 0;
+if (node->pollfd.events & POLLIN) {
+wsa_events |= FD_READ | FD_ACCEPT | FD_CLOSE;
+}
+if (node->pollfd.events & POLLOUT) {
+wsa_events |= FD_WRITE | FD_CONNECT | FD_CLOSE;
+}
+WSAEventSelect(node->pollfd.fd, node->wevent, wsa_events);
 }
-if (node->pollfd.events & POLLOUT) {
-wsa_events |= FD_WRITE | FD_CONNECT | FD_CLOSE;
-}
-WSAEventSelect(node->pollfd.fd, node->wevent, wsa_events);
-}
 #endif
-i++;
-}
+i++;
+}
 
-retval = time_poll(pollfds, hmap_count(&loop->poll_nodes), wevents,
-   loop->timeout_when, &elapsed);
+retval = time_poll(pollfds, hmap_count(&loop->poll_nodes), wevents,
+   loop->timeout_when, &elapsed);
+} else {
+retval = time_poll(NULL, 0, NULL, loop->timeout_when, &elapsed);
+}
 if (retv

[ovs-dev] [PATCH ovn v3 2/2] system-ovn.at: Fix flaky "load-balancing" test (all servers targetted).

2021-08-04 Thread Xavier Simonart
This test sends requests to LB and checks that each server receives at
least one request. However, even if 20 requests are sent to 3 servers,
there was a possibility that one server did not get any request (0.3%).

Signed-off-by: Xavier Simonart 
---
 tests/system-ovn.at | 115 +---
 1 file changed, 75 insertions(+), 40 deletions(-)

diff --git a/tests/system-ovn.at b/tests/system-ovn.at
index 2d21fb262..456bb0599 100644
--- a/tests/system-ovn.at
+++ b/tests/system-ovn.at
@@ -1449,45 +1449,72 @@ OVS_START_L7([bar2], [http])
 OVS_START_L7([bar3], [http])
 
 dnl Should work with the virtual IP 30.0.0.1 address through NAT
-for i in `seq 1 20`; do
-echo Request $i
-NS_CHECK_EXEC([foo1], [wget 30.0.0.1 -t 5 -T 1 --retry-connrefused -v -o 
wget$i.log])
-done
-
 dnl Each server should have at least one connection.
-AT_CHECK([ovs-appctl dpctl/dump-conntrack | FORMAT_CT(30.0.0.1) | \
-sed -e 's/zone=[[0-9]]*/zone=/'], [0], [dnl
-tcp,orig=(src=192.168.1.2,dst=30.0.0.1,sport=,dport=),reply=(src=172.16.1.2,dst=192.168.1.2,sport=,dport=),zone=,labels=0x2,protoinfo=(state=)
-tcp,orig=(src=192.168.1.2,dst=30.0.0.1,sport=,dport=),reply=(src=172.16.1.3,dst=192.168.1.2,sport=,dport=),zone=,labels=0x2,protoinfo=(state=)
-tcp,orig=(src=192.168.1.2,dst=30.0.0.1,sport=,dport=),reply=(src=172.16.1.4,dst=192.168.1.2,sport=,dport=),zone=,labels=0x2,protoinfo=(state=)
+dnl With 20 requests, one server might not receive any connection
+dnl in 0.3% of cases, so run a few times.
+
+OVS_WAIT_UNTIL([
+for i in `seq 1 20`; do
+echo Request $i;
+ip netns exec foo1 wget 30.0.0.1 -t 5 -T 1 --retry-connrefused -v -o 
wget$i.log;
+done
+ct1=$(ovs-appctl dpctl/dump-conntrack | FORMAT_CT(30.0.0.1) | \
+  sed -e 's/zone=[[0-9]]*/zone=/'| \
+  grep 
"tcp,orig=(src=192.168.1.2,dst=30.0.0.1,sport=,dport=),reply=(src=172.16.1.2,dst=192.168.1.2,sport=,dport=),zone=,labels=0x2,protoinfo=(state=)"
 -c)
+ct2=$(ovs-appctl dpctl/dump-conntrack | FORMAT_CT(30.0.0.1) | \
+  sed -e 's/zone=[[0-9]]*/zone=/'| \
+  grep 
"tcp,orig=(src=192.168.1.2,dst=30.0.0.1,sport=,dport=),reply=(src=172.16.1.3,dst=192.168.1.2,sport=,dport=),zone=,labels=0x2,protoinfo=(state=)"
 -c)
+ct3=$(ovs-appctl dpctl/dump-conntrack | FORMAT_CT(30.0.0.1) | \
+  sed -e 's/zone=[[0-9]]*/zone=/'| \
+  grep 
"tcp,orig=(src=192.168.1.2,dst=30.0.0.1,sport=,dport=),reply=(src=172.16.1.4,dst=192.168.1.2,sport=,dport=),zone=,labels=0x2,protoinfo=(state=)"
 -c)
+test $ct1 -gt 0 && echo "There are flows for 172.16.1.2"
+test $ct2 -gt 0 && echo "There are flows for 172.16.1.3"
+test $ct3 -gt 0 && echo "There are flows for 172.16.1.4"
+test $ct1 -gt 0 && test $ct2 -gt 0 && test $ct3 -gt 0
 ])
 
 dnl Should work with the virtual IP 30.0.0.3 address through NAT
-for i in `seq 1 20`; do
-echo Request $i
-NS_CHECK_EXEC([foo1], [wget 30.0.0.3 -t 5 -T 1 --retry-connrefused -v -o 
wget$i.log])
-done
-
 dnl Each server should have at least one connection.
-AT_CHECK([ovs-appctl dpctl/dump-conntrack | FORMAT_CT(30.0.0.3) | \
-sed -e 's/zone=[[0-9]]*/zone=/'], [0], [dnl
-tcp,orig=(src=192.168.1.2,dst=30.0.0.3,sport=,dport=),reply=(src=172.16.1.2,dst=192.168.1.2,sport=,dport=),zone=,labels=0x2,protoinfo=(state=)
-tcp,orig=(src=192.168.1.2,dst=30.0.0.3,sport=,dport=),reply=(src=172.16.1.3,dst=192.168.1.2,sport=,dport=),zone=,labels=0x2,protoinfo=(state=)
-tcp,orig=(src=192.168.1.2,dst=30.0.0.3,sport=,dport=),reply=(src=172.16.1.4,dst=192.168.1.2,sport=,dport=),zone=,labels=0x2,protoinfo=(state=)
+OVS_WAIT_UNTIL([
+for i in `seq 1 20`; do
+echo Request $i;
+ip netns exec foo1 wget 30.0.0.3 -t 5 -T 1 --retry-connrefused -v -o 
wget$i.log;
+done
+ct1=$(ovs-appctl dpctl/dump-conntrack | FORMAT_CT(30.0.0.3) | \
+  sed -e 's/zone=[[0-9]]*/zone=/'| \
+  grep 
"tcp,orig=(src=192.168.1.2,dst=30.0.0.3,sport=,dport=),reply=(src=172.16.1.2,dst=192.168.1.2,sport=,dport=),zone=,labels=0x2,protoinfo=(state=)"
 -c)
+ct2=$(ovs-appctl dpctl/dump-conntrack | FORMAT_CT(30.0.0.3) | \
+  sed -e 's/zone=[[0-9]]*/zone=/'| \
+  grep 
"tcp,orig=(src=192.168.1.2,dst=30.0.0.3,sport=,dport=),reply=(src=172.16.1.3,dst=192.168.1.2,sport=,dport=),zone=,labels=0x2,protoinfo=(state=)"
 -c)
+ct3=$(ovs-appctl dpctl/dump-conntrack | FORMAT_CT(30.0.0.3) | \
+  sed -e 's/zone=[[0-9]]*/zone=/'| \
+  grep 
"tcp,orig=(src=192.168.1.2,dst=30.0.0.3,sport=,dport=),reply=(src=172.16.1.4,dst=192.168.1.2,sport=,dport=),zone=,labels=0x2,protoinfo=(state=)"
 -c)
+test $ct1 -gt 0 && echo "There are flows for 172.16.1.2"
+test $ct2 -gt 0 && echo "There are flows for 172.16.1.3"
+test $ct3 -gt 0 && echo "There are flows for 172.16.1.4"
+test $ct1 -gt 0 && test $ct2 -gt 0 && test $ct3 -gt 0
 ])
 
 dnl Test load-balancing that includes L4 ports in NAT.
-for i in `seq 1 20`; do
-echo Request $i
-NS_CHECK_EXEC([foo1], [wget 30.0.0.2:8000 -t 5 -T 1 --retry-c

[ovs-dev] [PATCH ovn v3 1/2] system-ovn.at: Fix flaky "load-balancing" test.

2021-08-04 Thread Xavier Simonart
When running load-balancing test, tcpdump is executed in background.
When the backround tcpdump stops, it sends to stderr a message
(such as number of packets captured) which might be catched by the
following command (wget) causing the test to fail.

Fixes: ebbcd8e8cc ("northd: add reject action for lb with no backends")
Reported-at: https://bugzilla.redhat.com/show_bug.cgi?id=1989517
Signed-off-by: Xavier Simonart 
---
 tests/system-ovn.at | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/tests/system-ovn.at b/tests/system-ovn.at
index 4288d80e5..2d21fb262 100644
--- a/tests/system-ovn.at
+++ b/tests/system-ovn.at
@@ -1593,7 +1593,7 @@ OVS_WAIT_UNTIL([
 ovn-nbctl --reject lb-add lb3 30.0.0.10:80 ""
 ovn-nbctl ls-lb-add foo lb3
 # Filter reset segments
-NS_CHECK_EXEC([foo1], [tcpdump -c 1 -neei foo1 ip[[33:1]]=0x14 > rst.pcap &])
+NS_CHECK_EXEC([foo1], [tcpdump -c 1 -neei foo1 ip[[33:1]]=0x14 > rst.pcap 
2>/dev/null &])
 sleep 1
 NS_CHECK_EXEC([foo1], [wget -q 30.0.0.10],[4])
 
-- 
2.31.1

___
dev mailing list
d...@openvswitch.org
https://mail.openvswitch.org/mailman/listinfo/ovs-dev


Re: [ovs-dev] [PATCH v7 ovn 3/3] northd: add check_pkt_larger lflows for ingress traffic

2021-08-04 Thread Lorenzo Bianconi
[...]
> 
> I think REGBIT_EGRESS_LOOPBACK check is required so that the injected icmp4
> packet generated from ovn-controller is skipped in this stage.
> Otherwise there will
> be recursion of the packet.
> 
> @Lorenzo Bianconi  Please correct me if I'm wrong.

correct, REGBIT_EGRESS_LOOPBACK is used to skip re-injected packets

> 
> 
> > >
> > > (It would be good if this is intentional because it would make my next
> > rebasing easier, but I just want to make sure I understand it correctly)
> > >
> >
> > I checked further on this change, and it seems the outport is not needed
> > for this flow because in the previous stage ( s_ROUTER_IN_CHK_PKT_LEN) it
> > is already matched. (although I am still not sure why adding the LOOPBACK
> > check)
> > However, for the ingress part, check_pkt_larger() is added to the stage
> > s_ROUTER_IN_ADMISSION regardless of whether it is a DGP, but based on
> > whether the gw_mtu option exists. I think that is reasonable. It didn't
> > need to distinguish DGP and ports on gateway routers. So I wonder if we
> > could do the same for the egress, i.e. combining the logic of DGP and ports
> > on gateway routers. This would simply the code and also the flows. I can
> > make that change if it sounds good. (It can also simplify my rebasing for
> > the multiple DGP support)
> 
> I'm fine with it if you think it will not break the existing functionality.
> 
> @Lorenzo Bianconi  If you've any comments here please let us know.

I am fine with the change if existing functionality is ok.
Btw I am not able to trigger any failure in "router - check packet length - icmp
defrag" in unit-test (I tried multiple times running all the tests and
each test individually).

Regards,
Lorenzo

> 
> Thanks
> Numan
> 
> >
> > > > Regarding the test case,  it seems to me it's flaky.  When I run the
> > > > whole test suite with -j5, almost 100% of the
> > > > time the test fails, but it passes when run individually for me.
> > > >
> > > Thanks for confirming. This is interesting, completely reverse behavior
> > on my machine :)
> > >
> > > > The test case added in this patch series, does try to make sure that
> > > > both northd-c and northd-ddlog generate
> > > > the exact same flows.
> > > >
> > > > Thanks
> > > > Numan
> > > >
> > > > >
> > > > > Thanks
> > > > > Numan
> > > > >
> > > > > >
> > > > > > Thanks,
> > > > > > Han
> > > > > > ___
> > > > > > dev mailing list
> > > > > > d...@openvswitch.org
> > > > > > https://mail.openvswitch.org/mailman/listinfo/ovs-dev
> > > > > >
> > ___
> > dev mailing list
> > d...@openvswitch.org
> > https://mail.openvswitch.org/mailman/listinfo/ovs-dev
> >
> 
___
dev mailing list
d...@openvswitch.org
https://mail.openvswitch.org/mailman/listinfo/ovs-dev