When multiple gateway routers exist, a packet can
enter any gateway router. Once the packet reaches its
destination, its reverse direction should be via the
same gateway router.  This is achieved by doing a SNAT
of the packet in the inward direction (towards logical space)
with a IP address of the gateway router such that packet travels back
to the same gateway router.

The above means that you can have SNAT rules in the NB
database for both directions.  For e.g. if the logical
ip address are of the range 192.168.1.0/24, you will have
one SNAT rule to transform packet from 192.168.1.0/24 to
an external_ip and another SNAT rule to transform
"0.0.0.0/0" (all external initiated traffic) to a gateway_ip.

For a particular connection, we should do SNAT in only one
direction.  And to do that in the pipeline, we check whether a packet
has already been SNATted and if it has a transformation, we should not
do it again.

Signed-off-by: Gurucharan Shetty <g...@ovn.org>
---
 ovn/lib/actions.c           |  8 --------
 ovn/lib/logical-fields.c    |  2 ++
 ovn/lib/logical-fields.h    |  4 ++++
 ovn/northd/ovn-northd.8.xml | 49 +++++++++++++++++++++++++++++++++++----------
 ovn/northd/ovn-northd.c     | 31 ++++++++++++++++------------
 ovn/ovn-sb.xml              |  8 ++++----
 tests/ovn.at                |  2 +-
 7 files changed, 67 insertions(+), 37 deletions(-)

diff --git a/ovn/lib/actions.c b/ovn/lib/actions.c
index df526c0..7896d00 100644
--- a/ovn/lib/actions.c
+++ b/ovn/lib/actions.c
@@ -787,14 +787,6 @@ encode_ct_nat(const struct ovnact_ct_nat *cn,
     ct = ofpacts->header;
     if (cn->ip) {
         ct->flags |= NX_CT_F_COMMIT;
-    } else if (snat) {
-        /* XXX: For performance reasons, we try to prevent additional
-         * recirculations.  So far, ct_snat which is used in a gateway router
-         * does not need a recirculation. ct_snat(IP) does need a
-         * recirculation.  Should we consider a method to let the actions
-         * specify whether an action needs recirculation if there more use
-         * cases?. */
-        ct->recirc_table = NX_CT_RECIRC_NONE;
     }
     ofpact_finish(ofpacts, &ct->ofpact);
     ofpbuf_push_uninit(ofpacts, ct_offset);
diff --git a/ovn/lib/logical-fields.c b/ovn/lib/logical-fields.c
index d4578c3..27eba01 100644
--- a/ovn/lib/logical-fields.c
+++ b/ovn/lib/logical-fields.c
@@ -88,6 +88,8 @@ ovn_init_symtab(struct shash *symtab)
     char flags_str[16];
     snprintf(flags_str, sizeof flags_str, "flags[%d]", MLF_ALLOW_LOOPBACK_BIT);
     expr_symtab_add_subfield(symtab, "flags.loopback", NULL, flags_str);
+    snprintf(flags_str, sizeof flags_str, "flags[%d]", MLF_SNAT_DONE);
+    expr_symtab_add_subfield(symtab, "flags.snat", NULL, flags_str);
 
     /* Connection tracking state. */
     expr_symtab_add_field(symtab, "ct_mark", MFF_CT_MARK, NULL, false);
diff --git a/ovn/lib/logical-fields.h b/ovn/lib/logical-fields.h
index a1f1da6..8f12e06 100644
--- a/ovn/lib/logical-fields.h
+++ b/ovn/lib/logical-fields.h
@@ -47,6 +47,7 @@ void ovn_init_symtab(struct shash *symtab);
 enum mff_log_flags_bits {
     MLF_ALLOW_LOOPBACK_BIT = 0,
     MLF_RCV_FROM_VXLAN_BIT = 1,
+    MLF_SNAT_DONE_BIT = 2,
 };
 
 /* MFF_LOG_FLAGS_REG flag assignments */
@@ -59,6 +60,9 @@ enum mff_log_flags {
      * VXLAN encapsulation.  Egress port information is available for
      * Geneve and STT tunnel types. */
     MLF_RCV_FROM_VXLAN = (1 << MLF_RCV_FROM_VXLAN_BIT),
+
+    /* Indicate that a packet has already been transformed in a SNAT zone. */
+    MLF_SNAT_DONE = (1 << MLF_SNAT_DONE_BIT),
 };
 
 #endif /* ovn/lib/logical-fields.h */
diff --git a/ovn/northd/ovn-northd.8.xml b/ovn/northd/ovn-northd.8.xml
index b406db6..abae49c 100644
--- a/ovn/northd/ovn-northd.8.xml
+++ b/ovn/northd/ovn-northd.8.xml
@@ -1149,7 +1149,7 @@ icmp4 {
           to change the source IP address of a packet from <var>A</var> to
           <var>B</var>, a priority-100 flow matches <code>ip &amp;&amp;
           ip4.dst == <var>B</var></code> with an action
-          <code>ct_snat; next;</code>.
+          <code>ct_snat;</code>.
         </p>
 
         <p>
@@ -1159,7 +1159,34 @@ icmp4 {
       </li>
     </ul>
 
-    <h3>Ingress Table 4: DNAT</h3>
+    <h3>Ingress Table 4: POST_UNSNAT</h3>
+
+    <p>
+      This is to get the SNAT state of the packet after having sent the packet
+      through the SNAT zone in the previous table.
+    </p>
+
+    <ul>
+      <li>
+        <p>
+          A priority-100 flow that matches <code>ip &amp;&amp; ct.dnat</code>
+          with an action <code>flags.snat = 1; next;</code> indicating that
+          a UNSNAT happened in the previous table.
+        </p>
+
+        <p>
+          A priority-90 flow with match <code>ip</code>
+          with an action <code>flags.snat = 0; next;</code>.
+        </p>
+
+        <p>
+          A priority-0 logical flow with match <code>1</code> has actions
+          <code>next;</code>.
+        </p>
+      </li>
+    </ul>
+
+    <h3>Ingress Table 5: DNAT</h3>
 
     <p>
       Packets enter the pipeline with destination IP address that needs to
@@ -1208,7 +1235,7 @@ icmp4 {
       </li>
     </ul>
 
-    <h3>Ingress Table 5: IP Routing</h3>
+    <h3>Ingress Table 6: IP Routing</h3>
 
     <p>
       A packet that arrives at this table is an IP packet that should be
@@ -1219,9 +1246,9 @@ icmp4 {
       packet's final destination, unchanged) and advances to the next
       table for ARP resolution.  It also sets <code>reg1</code> (or
       <code>xxreg1</code>) to the IP address owned by the selected router
-      port (Table 7 will generate ARP request, if needed, with
-      <code>reg0</code> as the target protocol address and <code>reg1</code>
-      as the source protocol address).
+      port (Ingress table <code>ARP Request</code> will generate ARP request,
+      if needed, with <code>reg0</code> as the target protocol address and
+      <code>reg1</code> as the source protocol address).
     </p>
 
     <p>
@@ -1300,7 +1327,7 @@ next;
       </li>
     </ul>
 
-    <h3>Ingress Table 6: ARP/ND Resolution</h3>
+    <h3>Ingress Table 7: ARP/ND Resolution</h3>
 
     <p>
       Any packet that reaches this table is an IP packet whose next-hop
@@ -1382,7 +1409,7 @@ next;
       </li>
     </ul>
 
-    <h3>Ingress Table 7: ARP Request</h3>
+    <h3>Ingress Table 8: ARP Request</h3>
 
     <p>
       In the common case where the Ethernet destination has been resolved, this
@@ -1408,9 +1435,9 @@ arp {
         </pre>
 
         <p>
-          (Ingress table 4 initialized <code>reg1</code> with the IP address
-          owned by <code>outport</code> and <code>reg0</code> with the next-hop
-          IP address)
+          (Ingress table <code>IP Routing</code> initialized <code>reg1</code>
+           with the IP address owned by <code>outport</code> and
+           <code>reg0</code> with the next-hop IP address)
         </p>
 
         <p>
diff --git a/ovn/northd/ovn-northd.c b/ovn/northd/ovn-northd.c
index 86504aa..72205e5 100644
--- a/ovn/northd/ovn-northd.c
+++ b/ovn/northd/ovn-northd.c
@@ -129,10 +129,11 @@ enum ovn_stage {
     PIPELINE_STAGE(ROUTER, IN,  IP_INPUT,    1, "lr_in_ip_input")     \
     PIPELINE_STAGE(ROUTER, IN,  DEFRAG,      2, "lr_in_defrag")       \
     PIPELINE_STAGE(ROUTER, IN,  UNSNAT,      3, "lr_in_unsnat")       \
-    PIPELINE_STAGE(ROUTER, IN,  DNAT,        4, "lr_in_dnat")         \
-    PIPELINE_STAGE(ROUTER, IN,  IP_ROUTING,  5, "lr_in_ip_routing")   \
-    PIPELINE_STAGE(ROUTER, IN,  ARP_RESOLVE, 6, "lr_in_arp_resolve")  \
-    PIPELINE_STAGE(ROUTER, IN,  ARP_REQUEST, 7, "lr_in_arp_request")  \
+    PIPELINE_STAGE(ROUTER, IN,  POST_UNSNAT, 4, "lr_in_post_unsnat")  \
+    PIPELINE_STAGE(ROUTER, IN,  DNAT,        5, "lr_in_dnat")         \
+    PIPELINE_STAGE(ROUTER, IN,  IP_ROUTING,  6, "lr_in_ip_routing")   \
+    PIPELINE_STAGE(ROUTER, IN,  ARP_RESOLVE, 7, "lr_in_arp_resolve")  \
+    PIPELINE_STAGE(ROUTER, IN,  ARP_REQUEST, 8, "lr_in_arp_request")  \
                                                                       \
     /* Logical router egress stages. */                               \
     PIPELINE_STAGE(ROUTER, OUT, SNAT,      0, "lr_out_snat")          \
@@ -3840,6 +3841,7 @@ build_lrouter_flows(struct hmap *datapaths, struct hmap 
*ports,
         /* Packets are allowed by default. */
         ovn_lflow_add(lflows, od, S_ROUTER_IN_DEFRAG, 0, "1", "next;");
         ovn_lflow_add(lflows, od, S_ROUTER_IN_UNSNAT, 0, "1", "next;");
+        ovn_lflow_add(lflows, od, S_ROUTER_IN_POST_UNSNAT, 0, "1", "next;");
         ovn_lflow_add(lflows, od, S_ROUTER_OUT_SNAT, 0, "1", "next;");
         ovn_lflow_add(lflows, od, S_ROUTER_IN_DNAT, 0, "1", "next;");
 
@@ -3917,6 +3919,13 @@ build_lrouter_flows(struct hmap *datapaths, struct hmap 
*ports,
 
         sset_destroy(&all_ips);
 
+        /* When a UNSNAT happens, ct.dnat is set because the destination
+         * IP address is the one that is changed. */
+        ovn_lflow_add(lflows, od, S_ROUTER_IN_POST_UNSNAT, 100,
+                      "ip && ct.dnat", "flags.snat = 1; next;");
+        ovn_lflow_add(lflows, od, S_ROUTER_IN_POST_UNSNAT, 90,
+                      "ip", "flags.snat = 0; next;");
+
         for (int i = 0; i < od->nbr->n_nat; i++) {
             const struct nbrec_nat *nat;
 
@@ -3976,7 +3985,7 @@ build_lrouter_flows(struct hmap *datapaths, struct hmap 
*ports,
                 ds_clear(&match);
                 ds_put_format(&match, "ip && ip4.dst == %s", nat->external_ip);
                 ovn_lflow_add(lflows, od, S_ROUTER_IN_UNSNAT, 100,
-                              ds_cstr(&match), "ct_snat; next;");
+                              ds_cstr(&match), "ct_snat;");
             }
 
             /* Ingress DNAT table: Packets enter the pipeline with destination
@@ -4011,7 +4020,8 @@ build_lrouter_flows(struct hmap *datapaths, struct hmap 
*ports,
             if (!strcmp(nat->type, "snat")
                 || !strcmp(nat->type, "dnat_and_snat")) {
                 ds_clear(&match);
-                ds_put_format(&match, "ip && ip4.src == %s", nat->logical_ip);
+                ds_put_format(&match, "ip && ip4.src == %s && flags.snat == 0",
+                              nat->logical_ip);
                 ds_clear(&actions);
                 ds_put_format(&actions, "ct_snat(%s);", nat->external_ip);
 
@@ -4025,7 +4035,7 @@ build_lrouter_flows(struct hmap *datapaths, struct hmap 
*ports,
         }
 
         /* Re-circulate every packet through the DNAT zone.
-        * This helps with three things.
+        * This helps with two things.
         *
         * 1. Any packet that needs to be unDNATed in the reverse
         * direction gets unDNATed. Ideally this could be done in
@@ -4035,12 +4045,7 @@ build_lrouter_flows(struct hmap *datapaths, struct hmap 
*ports,
         * we can do it here, saving a future re-circulation.
         *
         * 2. Established load-balanced connections automatically get
-        * DNATed.
-        *
-        * 3. Any packet that was sent through SNAT zone in the
-        * previous table automatically gets re-circulated to get
-        * back the new destination IP address that is needed for
-        * routing in the openflow pipeline. */
+        * DNATed. */
         ovn_lflow_add(lflows, od, S_ROUTER_IN_DNAT, 50,
                       "ip", "flags.loopback = 1; ct_dnat;");
     }
diff --git a/ovn/ovn-sb.xml b/ovn/ovn-sb.xml
index 65191ed..e564c8a 100644
--- a/ovn/ovn-sb.xml
+++ b/ovn/ovn-sb.xml
@@ -1103,10 +1103,10 @@
         <dd>
           <p>
             <code>ct_snat</code> sends the packet through the SNAT zone to
-            unSNAT any packet that was SNATed in the opposite direction.  If
-            the packet needs to be sent to the next tables, then it should be
-            followed by a <code>next;</code> action.  The next tables will not
-            see the changes in the packet caused by the connection tracker.
+            unSNAT any packet that was SNATed in the opposite direction.
+            The packet is then automatically sent to the next tables as if
+            followed by <code>next;</code> action.  The next tables will see
+            the changes in the packet caused by the connection tracker.
           </p>
           <p>
             <code>ct_snat(<var>IP</var>)</code> sends the packet through the
diff --git a/tests/ovn.at b/tests/ovn.at
index 69f5277..bc8c9b8 100644
--- a/tests/ovn.at
+++ b/tests/ovn.at
@@ -838,7 +838,7 @@ ct_dnat();
 
 # ct_snat
 ct_snat;
-    encodes as ct(zone=NXM_NX_REG12[0..15],nat)
+    encodes as ct(table=27,zone=NXM_NX_REG12[0..15],nat)
     has prereqs ip
 ct_snat(192.168.1.2);
     encodes as 
ct(commit,table=27,zone=NXM_NX_REG12[0..15],nat(src=192.168.1.2))
-- 
1.9.1

_______________________________________________
dev mailing list
dev@openvswitch.org
http://openvswitch.org/mailman/listinfo/dev

Reply via email to