Hi Yin, Thanks for clarifying the comments. I will ack the next version.
@Alin - will you have sometime to review the changes? Thanks, Sairam On 5/16/17, 5:11 PM, "Yin Lin" <[email protected]> wrote: >Thanks Sai for the review! I fixed most of them and explained the remaining >ones in the inline comments. > >-----Original Message----- >From: Sairam Venugopal >Sent: Tuesday, May 16, 2017 4:50 PM >To: Yin Lin <[email protected]>; [email protected] >Subject: Re: [ovs-dev] [PATCH v7 2/4] datapath-windows: Add NAT module in >conntrack > >Hi Yin, > >Thanks for the patch. Please find my comments inline. > >Thanks, >Sairam > > > > >On 5/9/17, 3:59 PM, "[email protected] on behalf of Yin Lin" ><[email protected] on behalf of [email protected]> wrote: > >>Signed-off-by: Yin Lin <[email protected]> >>--- >> datapath-windows/automake.mk | 2 + >> datapath-windows/ovsext/Conntrack-nat.c | 424 >> ++++++++++++++++++++++++++++++++ >> datapath-windows/ovsext/Conntrack-nat.h | 39 +++ >> 3 files changed, 465 insertions(+) >> create mode 100644 datapath-windows/ovsext/Conntrack-nat.c >> create mode 100644 datapath-windows/ovsext/Conntrack-nat.h >> >>diff --git a/datapath-windows/automake.mk b/datapath-windows/automake.mk >>index 4f7b55a..7177630 100644 >>--- a/datapath-windows/automake.mk >>+++ b/datapath-windows/automake.mk >>@@ -16,7 +16,9 @@ EXTRA_DIST += \ >> datapath-windows/ovsext/Conntrack-icmp.c \ >> datapath-windows/ovsext/Conntrack-other.c \ >> datapath-windows/ovsext/Conntrack-related.c \ >>+ datapath-windows/ovsext/Conntrack-nat.c \ >> datapath-windows/ovsext/Conntrack-tcp.c \ >>+ datapath-windows/ovsext/Conntrack-nat.h \ >> datapath-windows/ovsext/Conntrack.c \ >> datapath-windows/ovsext/Conntrack.h \ >> datapath-windows/ovsext/Datapath.c \ >>diff --git a/datapath-windows/ovsext/Conntrack-nat.c >>b/datapath-windows/ovsext/Conntrack-nat.c >>new file mode 100644 >>index 0000000..edf5f8f >>--- /dev/null >>+++ b/datapath-windows/ovsext/Conntrack-nat.c >>@@ -0,0 +1,424 @@ >>+#include "Conntrack-nat.h" >>+#include "Jhash.h" >>+ >>+PLIST_ENTRY ovsNatTable = NULL; >>+PLIST_ENTRY ovsUnNatTable = NULL; >>+ >>+/* >>+ *--------------------------------------------------------------------------- >>+ * OvsHashNatKey >>+ * Hash NAT related fields in a Conntrack key. >>+ *--------------------------------------------------------------------------- >>+ */ >>+static __inline UINT32 >>+OvsHashNatKey(const OVS_CT_KEY *key) >>+{ >>+ UINT32 hash = 0; >>+#define HASH_ADD(field) \ >>+ hash = OvsJhashBytes(&key->field, sizeof(key->field), hash) >>+ >>+ HASH_ADD(src.addr.ipv4_aligned); >>+ HASH_ADD(dst.addr.ipv4_aligned); >>+ HASH_ADD(src.port); >>+ HASH_ADD(dst.port); >>+ HASH_ADD(zone); >>+#undef HASH_ADD >>+ return hash; >>+} >>+ >>+/* >>+ *--------------------------------------------------------------------------- >>+ * OvsNatKeyAreSame >>+ * Compare NAT related fields in a Conntrack key. >>+ *--------------------------------------------------------------------------- >>+ */ >>+static __inline BOOLEAN >>+OvsNatKeyAreSame(const OVS_CT_KEY *key1, const OVS_CT_KEY *key2) >>+{ >>+ // XXX: Compare IPv6 key as well >>+#define FIELD_COMPARE(field) \ > >[Sai]: Nit: move return statement to next line. >[Yin]: This is a MACRO and I don't want to spread it over lines. (Note that I >don't even add a semicolon at the end to make it look like a function) > >>+ if (key1->field != key2->field) return FALSE >>+ >>+ FIELD_COMPARE(src.addr.ipv4_aligned); >>+ FIELD_COMPARE(dst.addr.ipv4_aligned); >>+ FIELD_COMPARE(src.port); >>+ FIELD_COMPARE(dst.port); >>+ FIELD_COMPARE(zone); >>+ return TRUE; >>+#undef FIELD_COMPARE >>+} >>+ >>+/* >>+ *--------------------------------------------------------------------------- > >[Sai]: Nit - OvsNatGetBucket >[Yin]: Fixed > >>+ * OvsNaGetBucket >>+ * Returns the row of NAT table that has the same hash as the given NAT >>+ * hash key. If isReverse is TRUE, returns the row of reverse NAT table >>+ * instead. >>+ *--------------------------------------------------------------------------- >>+ */ >>+static __inline PLIST_ENTRY >>+OvsNatGetBucket(const OVS_CT_KEY *key, BOOLEAN isReverse) >>+{ >>+ uint32_t hash = OvsHashNatKey(key); >>+ if (isReverse) { >>+ return &ovsUnNatTable[hash & NAT_HASH_TABLE_MASK]; >>+ } else { >>+ return &ovsNatTable[hash & NAT_HASH_TABLE_MASK]; >>+ } >>+} >>+ >>+/* >>+ *--------------------------------------------------------------------------- >>+ * OvsNatInit >>+ * Initialize NAT related resources. >>+ *--------------------------------------------------------------------------- >>+ */ >>+NTSTATUS OvsNatInit() >>+{ >>+ ASSERT(ovsNatTable == NULL); >>+ >>+ /* Init the Hash Buffer */ >>+ ovsNatTable = OvsAllocateMemoryWithTag( >>+ sizeof(LIST_ENTRY) * NAT_HASH_TABLE_SIZE, >>+ OVS_CT_POOL_TAG); >>+ if (ovsNatTable == NULL) { >>+ goto failNoMem; >>+ } >>+ >>+ ovsUnNatTable = OvsAllocateMemoryWithTag( >>+ sizeof(LIST_ENTRY) * NAT_HASH_TABLE_SIZE, >>+ OVS_CT_POOL_TAG); >>+ if (ovsUnNatTable == NULL) { >>+ goto freeNatTable; >>+ } >>+ >>+ for (int i = 0; i < NAT_HASH_TABLE_SIZE; i++) { >>+ InitializeListHead(&ovsNatTable[i]); >>+ InitializeListHead(&ovsUnNatTable[i]); >>+ } >>+ return STATUS_SUCCESS; >>+ >>+freeNatTable: >>+ OvsFreeMemoryWithTag(ovsNatTable, OVS_CT_POOL_TAG); >>+failNoMem: >>+ return STATUS_INSUFFICIENT_RESOURCES; >>+} >>+ >>+/* >>+ >>*---------------------------------------------------------------------------- >>+ * OvsNatFlush >>+ * Flushes out all NAT entries that match the given zone. >>+ >>*---------------------------------------------------------------------------- >>+ */ >>+VOID OvsNatFlush(UINT16 zone) >>+{ >>+ PLIST_ENTRY link, next; >>+ for (int i = 0; i < NAT_HASH_TABLE_SIZE; i++) { >>+ LIST_FORALL_SAFE(&ovsNatTable[i], link, next) { >>+ POVS_NAT_ENTRY entry = >>+ CONTAINING_RECORD(link, OVS_NAT_ENTRY, link); >>+ /* zone is a non-zero value */ >>+ if (!zone || zone == entry->key.zone) { >>+ OvsNatDeleteEntry(entry); >>+ } >>+ } >>+ } >>+} >>+ >>+/* >>+ >>*---------------------------------------------------------------------------- >>+ * OvsNatCleanup >>+ * Releases all NAT related resources. >>+ >>*---------------------------------------------------------------------------- >>+ */ >>+VOID OvsNatCleanup() >>+{ > >[Sai]: Nit: move return statement to next line. >[Yin] Fixed and also added brackets around it according to coding standard. > > >>+ if (ovsNatTable == NULL) return; >>+ OvsFreeMemoryWithTag(ovsNatTable, OVS_CT_POOL_TAG); >>+ OvsFreeMemoryWithTag(ovsUnNatTable, OVS_CT_POOL_TAG); >>+ ovsNatTable = NULL; >>+ ovsUnNatTable = NULL; >>+} >>+ >>+/* >>+ >>*---------------------------------------------------------------------------- >>+ * OvsNatPacket >>+ * Performs NAT operation on the packet by replacing the >>source/destinaton >>+ * address/port based on natAction. If reverse is TRUE, perform unNAT >>+ * instead. >>+ >>*---------------------------------------------------------------------------- >>+ */ >>+VOID >>+OvsNatPacket(OvsForwardingContext *ovsFwdCtx, >>+ const OVS_CT_ENTRY *entry, >>+ UINT16 natAction, >>+ OvsFlowKey *key, >>+ BOOLEAN reverse) >>+{ >>+ UINT32 natFlag; >>+ const struct ct_endpoint* endpoint; >>+ /* When it is NAT, only entry->rev_key contains NATTED address; >>+ When it is unNAT, only entry->key contains the UNNATTED address;*/ >>+ const OVS_CT_KEY *ctKey = reverse ? &entry->key : &entry->rev_key; >>+ BOOLEAN isSrcNat; >>+ >>+ if (!(natAction & (NAT_ACTION_SRC | NAT_ACTION_DST))) { >>+ return; >>+ } >>+ isSrcNat = (((natAction & NAT_ACTION_SRC) && !reverse) || >>+ ((natAction & NAT_ACTION_DST) && reverse)); >>+ >>+ if (isSrcNat) { >>+ /* Flag is set to SNAT for SNAT case and the reverse DNAT case */ >>+ natFlag = OVS_CS_F_SRC_NAT; >>+ /* Note that ctKey is the key in the other direction, so >>+ endpoint has to be reverted, i.e. ctKey->dst for SNAT >>+ and ctKey->src for DNAT */ >>+ endpoint = &ctKey->dst; >>+ } else { >>+ natFlag = OVS_CS_F_DST_NAT; >>+ endpoint = &ctKey->src; >>+ } >>+ key->ct.state |= natFlag; >>+ if (ctKey->dl_type == htons(ETH_TYPE_IPV4)) { >>+ OvsUpdateAddressAndPort(ovsFwdCtx, >>+ endpoint->addr.ipv4_aligned, >>+ endpoint->port, isSrcNat, >>+ !reverse); >>+ if (isSrcNat) { >>+ key->ipKey.nwSrc = endpoint->addr.ipv4_aligned; >>+ } else { >>+ key->ipKey.nwDst = endpoint->addr.ipv4_aligned; >>+ } >>+ } else if (ctKey->dl_type == htons(ETH_TYPE_IPV6)){ >>+ // XXX: IPv6 packet not supported yet. >>+ return; >>+ } >>+ if (natAction & (NAT_ACTION_SRC_PORT | NAT_ACTION_DST_PORT)) { >>+ if (isSrcNat) { >>+ if (key->ipKey.l4.tpSrc != 0) { >>+ key->ipKey.l4.tpSrc = endpoint->port; >>+ } >>+ } else { >>+ if (key->ipKey.l4.tpDst != 0) { >>+ key->ipKey.l4.tpDst = endpoint->port; >>+ } >>+ } >>+ } >>+} >>+ >>+ >>+/* >>+ >>*---------------------------------------------------------------------------- >>+ * OvsNatHashRange >>+ * Compute hash for a range of addresses specified in natInfo. >>+ >>*---------------------------------------------------------------------------- >>+ */ >>+static UINT32 OvsNatHashRange(const OVS_CT_ENTRY *entry, UINT32 basis) >>+{ >>+ UINT32 hash = basis; >>+#define HASH_ADD(field) \ >>+ hash = OvsJhashBytes(&field, sizeof(field), hash) >>+ >>+ HASH_ADD(entry->natInfo.minAddr); >>+ HASH_ADD(entry->natInfo.maxAddr); >>+ HASH_ADD(entry->key.dl_type); >>+ HASH_ADD(entry->key.nw_proto); >>+ HASH_ADD(entry->key.zone); >>+#undef HASH_ADD >>+ return hash; >>+} >>+ >>+/* >>+ >>*---------------------------------------------------------------------------- >>+ * OvsNatAddEntry >>+ * Add an entry to the NAT table. Also updates the reverse NAT lookup >>+ * table. >>+ >>*---------------------------------------------------------------------------- >>+ */ >>+VOID >>+OvsNatAddEntry(OVS_NAT_ENTRY* entry) >>+{ >>+ InsertHeadList(OvsNatGetBucket(&entry->key, FALSE), >>+ &entry->link); >>+ InsertHeadList(OvsNatGetBucket(&entry->value, TRUE), >>+ &entry->reverseLink); >>+} >>+ >>+/* >>+ >>*---------------------------------------------------------------------------- >>+ * OvsNatCtEntry >>+ * Update an Conntrack entry with NAT information. Translated address and >>+ * port will be generated and write back to the conntrack entry as a >>+ * result. >>+ >>*---------------------------------------------------------------------------- >>+ */ >[Sai] - Can you name this OvsNatUpdateCtEntry? Or something to imply what this >function does. >[Yin] - Update is a little bit confusing as it doesn't explain what's the >update is about. I intended to mean the function NATs a CtEntry and NAT is a >verb here. But I see what you are coming from and understand that the name is >very confusing. Let me rename it OvsNatTranslateCtEntry as a compromise. > >>+BOOLEAN >>+OvsNatCtEntry(OVS_CT_ENTRY *entry) >>+{ >>+ const uint16_t MIN_NAT_EPHEMERAL_PORT = 1024; >>+ const uint16_t MAX_NAT_EPHEMERAL_PORT = 65535; >>+ >>+ uint16_t minPort; >>+ uint16_t maxPort; >>+ uint16_t firstPort; >>+ >>+ uint32_t hash = OvsNatHashRange(entry, 0); >>+ >>+ if ((entry->natInfo.natAction & NAT_ACTION_SRC) && >>+ (!(entry->natInfo.natAction & NAT_ACTION_SRC_PORT))) { >>+ firstPort = minPort = maxPort = ntohs(entry->key.src.port); >>+ } else if ((entry->natInfo.natAction & NAT_ACTION_DST) && >>+ (!(entry->natInfo.natAction & NAT_ACTION_DST_PORT))) { >>+ firstPort = minPort = maxPort = ntohs(entry->key.dst.port); >>+ } else { >>+ uint16_t portDelta = entry->natInfo.maxPort - entry->natInfo.minPort; >>+ uint16_t portIndex = (uint16_t) hash % (portDelta + 1); >>+ firstPort = entry->natInfo.minPort + portIndex; >>+ minPort = entry->natInfo.minPort; >>+ maxPort = entry->natInfo.maxPort; >>+ } >>+ > >[Sai] - Can you move these definitions to the top? >[Yin] Fixed > > >>+ uint32_t addrDelta = 0; >>+ uint32_t addrIndex; >>+ struct ct_addr ctAddr, maxCtAddr; >>+ memset(&ctAddr, 0, sizeof ctAddr); >>+ memset(&maxCtAddr, 0, sizeof maxCtAddr); >>+ maxCtAddr = entry->natInfo.maxAddr; >>+ >>+ if (entry->key.dl_type == htons(ETH_TYPE_IPV4)) { >>+ addrDelta = ntohl(entry->natInfo.maxAddr.ipv4_aligned) - >>+ ntohl(entry->natInfo.minAddr.ipv4_aligned); >>+ addrIndex = hash % (addrDelta + 1); >>+ ctAddr.ipv4_aligned = htonl( >>+ ntohl(entry->natInfo.minAddr.ipv4_aligned) + addrIndex); >>+ } else { >>+ // XXX: IPv6 not supported >>+ return FALSE; >>+ } > >[Sai] - Can you move these definitions to the top? >[Yin] Fixed > > >>+ > >>+ uint16_t port = firstPort; >>+ BOOLEAN allPortsTried = FALSE; >>+ BOOLEAN originalPortsTried = FALSE; >>+ struct ct_addr firstAddr = ctAddr; >>+ for (;;) { >>+ if (entry->natInfo.natAction & NAT_ACTION_SRC) { >>+ entry->rev_key.dst.addr = ctAddr; >>+ entry->rev_key.dst.port = htons(port); >>+ } else { >>+ entry->rev_key.src.addr = ctAddr; >>+ entry->rev_key.src.port = htons(port); >>+ } >>+ >>+ OVS_NAT_ENTRY *natEntry = OvsNatLookup(&entry->rev_key, TRUE); >>+ >>+ if (!natEntry) { >>+ natEntry = OvsAllocateMemoryWithTag(sizeof(*natEntry), >>+ OVS_CT_POOL_TAG); > >[Sai]: Need to check if natEntry is not NULL and handle the Insufficient >resources case. >[Yin] Fixed. > >nit: NdisMoveMemory instead of memcpy > >>+ memcpy(&natEntry->key, &entry->key, >>+ sizeof natEntry->key); >>+ memcpy(&natEntry->value, &entry->rev_key, >>+ sizeof natEntry->value); >>+ natEntry->ctEntry = entry; >>+ OvsNatAddEntry(natEntry); >>+ return TRUE; >>+ } else if (!allPortsTried) { >>+ if (minPort == maxPort) { >>+ allPortsTried = TRUE; >>+ } else if (port == maxPort) { >>+ port = minPort; >>+ } else { >>+ port++; >>+ } >>+ if (port == firstPort) { >>+ allPortsTried = TRUE; >>+ } >>+ } else { >>+ if (memcmp(&ctAddr, &maxCtAddr, sizeof ctAddr)) { >>+ if (entry->key.dl_type == htons(ETH_TYPE_IPV4)) { >>+ ctAddr.ipv4_aligned = htonl( >>+ ntohl(ctAddr.ipv4_aligned) + 1); >>+ } else { >>+ // XXX: IPv6 not supported > >[Sai] - This can be done later. However, it will be good to return NDIS_STATUS > >and return NDIS_STATUS_INVALID. At the very minimum, we should add a log msg. > >[Yin] Shashank raised the similar concern earlier. We don't have IPv6 support >anywhere and feels bad about >spamming error message whenever an address is concerned. We don't have log >rate control so if there is a IPv6 rule then the log >is going to explode and prevent us from seeing the entries we are really >interested in. If we have partial IPv6 support, >then I agree that we should add logs in areas where the support is not >extended yet. > >>+ return FALSE; >>+ } >>+ } else { >>+ ctAddr = entry->natInfo.minAddr; >>+ } >>+ if (!memcmp(&ctAddr, &firstAddr, sizeof ctAddr)) { >>+ if (!originalPortsTried) { >>+ originalPortsTried = TRUE; >>+ ctAddr = entry->natInfo.minAddr; >>+ minPort = MIN_NAT_EPHEMERAL_PORT; >>+ maxPort = MAX_NAT_EPHEMERAL_PORT; >>+ } else { >>+ break; >>+ } >>+ } >>+ firstPort = minPort; >>+ port = firstPort; >>+ allPortsTried = FALSE; >>+ } >>+ } >>+ return FALSE; >>+} >>+ >>+/* >>+ >>*---------------------------------------------------------------------------- >>+ * OvsNatLookup >>+ * Look up a NAT entry with the given key in the NAT table. >>+ * If reverse is TRUE, look up a NAT entry with the given value instead. >>+ >>*---------------------------------------------------------------------------- >>+ */ >>+POVS_NAT_ENTRY >>+OvsNatLookup(const OVS_CT_KEY *ctKey, BOOLEAN reverse) >>+{ >>+ PLIST_ENTRY link; >>+ POVS_NAT_ENTRY entry; >>+ >>+ LIST_FORALL(OvsNatGetBucket(ctKey, reverse), link) { >>+ if (reverse) { >>+ entry = CONTAINING_RECORD(link, OVS_NAT_ENTRY, reverseLink); >>+ >>+ if (OvsNatKeyAreSame(ctKey, &entry->value)) { >>+ return entry; >>+ } >>+ } else { >>+ entry = CONTAINING_RECORD(link, OVS_NAT_ENTRY, link); >>+ >>+ if (OvsNatKeyAreSame(ctKey, &entry->key)) { >>+ return entry; >>+ } >>+ } >>+ } >>+ return NULL; >>+} >>+ >>+/* >>+ >>*---------------------------------------------------------------------------- >>+ * OvsNatDeleteEntry >>+ * Delete a NAT entry. >>+ >>*---------------------------------------------------------------------------- >>+ */ >>+VOID >>+OvsNatDeleteEntry(POVS_NAT_ENTRY entry) >>+{ >>+ if (entry == NULL) { >>+ return; >>+ } >>+ RemoveEntryList(&entry->link); >>+ RemoveEntryList(&entry->reverseLink); >>+ OvsFreeMemoryWithTag(entry, OVS_CT_POOL_TAG); >>+} >>+ >>+/* >>+ >>*---------------------------------------------------------------------------- >>+ * OvsNatDeleteKey >>+ * Delete a NAT entry with the given key. >>+ >>*---------------------------------------------------------------------------- >>+ */ >>+VOID >>+OvsNatDeleteKey(const OVS_CT_KEY *key) >>+{ >>+ OvsNatDeleteEntry(OvsNatLookup(key, FALSE)); >>+} >>diff --git a/datapath-windows/ovsext/Conntrack-nat.h >>b/datapath-windows/ovsext/Conntrack-nat.h >>new file mode 100644 >>index 0000000..aaa8ceb >>--- /dev/null >>+++ b/datapath-windows/ovsext/Conntrack-nat.h >>@@ -0,0 +1,39 @@ >>+#ifndef _CONNTRACK_NAT_H >>+#define _CONNTRACK_NAT_H >>+ >>+#include "precomp.h" >>+#include "Flow.h" >>+#include "Debug.h" >>+#include <stddef.h> >>+#include "Conntrack.h" >>+ >>+#define NAT_HASH_TABLE_SIZE ((UINT32)1 << 10) >>+#define NAT_HASH_TABLE_MASK (NAT_HASH_TABLE_SIZE - 1) >>+ >>+typedef struct OVS_NAT_ENTRY { >>+ LIST_ENTRY link; >>+ LIST_ENTRY reverseLink; >>+ OVS_CT_KEY key; >>+ OVS_CT_KEY value; >>+ POVS_CT_ENTRY ctEntry; >>+} OVS_NAT_ENTRY, *POVS_NAT_ENTRY; >>+ >>+__inline static BOOLEAN OvsIsForwardNat(UINT16 natAction) { > >[Sai] - What does the double !! do here? >[Yin] - This is a bitwise masking operation on integer and the return value is >a Boolean, so it needs double !! to ensure return value is either 0 or 1. > > >>+ return !!(natAction & (NAT_ACTION_SRC | NAT_ACTION_DST)); >>+} >>+ >>+NTSTATUS OvsNatInit(); >>+VOID OvsNatFlush(UINT16 zone); >>+ >>+VOID OvsNatAddEntry(OVS_NAT_ENTRY* entry); >>+ >>+VOID OvsNatDeleteEntry(POVS_NAT_ENTRY entry); >>+VOID OvsNatDeleteKey(const OVS_CT_KEY *key); >>+VOID OvsNatCleanup(); >>+ >>+POVS_NAT_ENTRY OvsNatLookup(const OVS_CT_KEY *ctKey, BOOLEAN reverse); >>+BOOLEAN OvsNatCtEntry(OVS_CT_ENTRY *ctEntry); >>+VOID OvsNatPacket(OvsForwardingContext *ovsFwdCtx, const OVS_CT_ENTRY *entry, >>+ UINT16 natAction, OvsFlowKey *key, BOOLEAN reverse); >>+ >>+#endif >>-- >>2.10.2.windows.1 >> >>_______________________________________________ >>dev mailing list >>[email protected] >>https://urldefense.proofpoint.com/v2/url?u=https-3A__mail.openvswitch.org_mailman_listinfo_ovs-2Ddev&d=DwICAg&c=uilaK90D4TOVoH58JNXRgQ&r=Z6vowHUOjP5ysP_g372c49Nqc1vEKqHKNBkR5Q5Z7uo&m=veEopphpitZ1dkD8nov8CuLArrdnUU8WDczrz9UClwo&s=U88QV-uHXUzVrccSWR_EYp7F7ySpFOMRBlwZy34qfzA&e= >> _______________________________________________ dev mailing list [email protected] https://mail.openvswitch.org/mailman/listinfo/ovs-dev
