Signed-off-by: Yin Lin <li...@vmware.com> --- datapath-windows/ovsext/Actions.c | 158 ++++++++++++++++++----------- datapath-windows/ovsext/Actions.h | 77 ++++++++++++++ datapath-windows/ovsext/Conntrack.c | 178 ++++++++++++++++++++------------- datapath-windows/ovsext/Conntrack.h | 25 +++-- datapath-windows/ovsext/ovsext.vcxproj | 2 + 5 files changed, 303 insertions(+), 137 deletions(-)
diff --git a/datapath-windows/ovsext/Actions.c b/datapath-windows/ovsext/Actions.c index 46f84bc..9f4a22d 100644 --- a/datapath-windows/ovsext/Actions.c +++ b/datapath-windows/ovsext/Actions.c @@ -71,63 +71,6 @@ typedef struct _OVS_ACTION_STATS { OVS_ACTION_STATS ovsActionStats; /* - * There a lot of data that needs to be maintained while executing the pipeline - * as dictated by the actions of a flow, across different functions at different - * levels. Such data is put together in a 'context' structure. Care should be - * exercised while adding new members to the structure - only add ones that get - * used across multiple stages in the pipeline/get used in multiple functions. - */ -typedef struct OvsForwardingContext { - POVS_SWITCH_CONTEXT switchContext; - /* The NBL currently used in the pipeline. */ - PNET_BUFFER_LIST curNbl; - /* NDIS forwarding detail for 'curNbl'. */ - PNDIS_SWITCH_FORWARDING_DETAIL_NET_BUFFER_LIST_INFO fwdDetail; - /* Array of destination ports for 'curNbl'. */ - PNDIS_SWITCH_FORWARDING_DESTINATION_ARRAY destinationPorts; - /* send flags while sending 'curNbl' into NDIS. */ - ULONG sendFlags; - /* Total number of output ports, used + unused, in 'curNbl'. */ - UINT32 destPortsSizeIn; - /* Total number of used output ports in 'curNbl'. */ - UINT32 destPortsSizeOut; - /* - * If 'curNbl' is not owned by OVS, they need to be tracked, if they need to - * be freed/completed. - */ - OvsCompletionList *completionList; - /* - * vport number of 'curNbl' when it is passed from the PIF bridge to the INT - * bridge. ie. during tunneling on the Rx side. - */ - UINT32 srcVportNo; - - /* - * Tunnel key: - * - specified in actions during tunneling Tx - * - extracted from an NBL during tunneling Rx - */ - OvsIPv4TunnelKey tunKey; - - /* - * Tunneling - Tx: - * To store the output port, when it is a tunneled port. We don't foresee - * multiple tunneled ports as outport for any given NBL. - */ - POVS_VPORT_ENTRY tunnelTxNic; - - /* - * Tunneling - Rx: - * Points to the Internal port on the PIF Bridge, if the packet needs to be - * de-tunneled. - */ - POVS_VPORT_ENTRY tunnelRxNic; - - /* header information */ - OVS_PACKET_HDR_INFO layers; -} OvsForwardingContext; - -/* * -------------------------------------------------------------------------- * OvsInitForwardingCtx -- * Function to init/re-init the 'ovsFwdCtx' context as the actions pipeline @@ -1388,7 +1331,7 @@ PUINT8 OvsGetHeaderBySize(OvsForwardingContext *ovsFwdCtx, * based on the specified key. *---------------------------------------------------------------------------- */ -static __inline NDIS_STATUS +NDIS_STATUS OvsUpdateUdpPorts(OvsForwardingContext *ovsFwdCtx, const struct ovs_key_udp *udpAttr) { @@ -1435,7 +1378,7 @@ OvsUpdateUdpPorts(OvsForwardingContext *ovsFwdCtx, * based on the specified key. *---------------------------------------------------------------------------- */ -static __inline NDIS_STATUS +NDIS_STATUS OvsUpdateTcpPorts(OvsForwardingContext *ovsFwdCtx, const struct ovs_key_tcp *tcpAttr) { @@ -1474,11 +1417,104 @@ OvsUpdateTcpPorts(OvsForwardingContext *ovsFwdCtx, /* *---------------------------------------------------------------------------- * OvsUpdateIPv4Header -- + * Updates the source/destination IP field of IPv4 header in + * ovsFwdCtx.curNbl inline based on the specified key. + *---------------------------------------------------------------------------- + */ +OvsUpdateAddressAndPort(OvsForwardingContext *ovsFwdCtx, + UINT32 newAddr, UINT16 newPort, + BOOLEAN isSource) +{ + PUINT8 bufferStart; + UINT32 hdrSize; + OVS_PACKET_HDR_INFO *layers = &ovsFwdCtx->layers; + IPHdr *ipHdr; + TCPHdr *tcpHdr = NULL; + UDPHdr *udpHdr = NULL; + UINT32 *addrField = NULL; + UINT16 *portField = NULL; + UINT16 *checkField = NULL; + + ASSERT(layers->value != 0); + + if (layers->isTcp || layers->isUdp) { + hdrSize = layers->l4Offset + + layers->isTcp ? sizeof (*tcpHdr) : sizeof (*udpHdr); + } else { + hdrSize = layers->l3Offset + sizeof (*ipHdr); + } + + bufferStart = OvsGetHeaderBySize(ovsFwdCtx, hdrSize); + if (!bufferStart) { + return NDIS_STATUS_RESOURCES; + } + + ipHdr = (IPHdr *)(bufferStart + layers->l3Offset); + + if (layers->isTcp) { + tcpHdr = (TCPHdr *)(bufferStart + layers->l4Offset); + } else if (layers->isUdp) { + udpHdr = (UDPHdr *)(bufferStart + layers->l4Offset); + } + + /* + * Adjust the IP header inline as dictated by the action, and also update + * the IP and the TCP checksum for the data modified. + * + * In the future, this could be optimized to make one call to + * ChecksumUpdate32(). Ignoring this for now, since for the most common + * case, we only update the TTL. + */ + if (isSource) + { + addrField = &ipHdr->saddr; + if (tcpHdr) { + portField = &tcpHdr->source; + } + else if (udpHdr) { + portField = &udpHdr->source; + } + } else { + addrField = &ipHdr->daddr; + if (tcpHdr) { + portField = &tcpHdr->dest; + checkField = &tcpHdr->check; + } else if (udpHdr) { + portField = &udpHdr->dest; + checkField = &udpHdr->check; + } + } + + if (*addrField != newAddr) { + if (checkField && *checkField != 0) { + *checkField = ChecksumUpdate32(*checkField, *addrField, + newAddr); + } + if (ipHdr->check != 0) { + ipHdr->check = ChecksumUpdate32(ipHdr->check, *addrField, + newAddr); + } + *addrField = newAddr; + } + + if (portField && *portField != newPort) { + if (checkField && *checkField != 0) { + *checkField = ChecksumUpdate16(*checkField, *portField, + newPort); + *portField = newPort; + } + } + return NDIS_STATUS_SUCCESS; +} + +/* + *---------------------------------------------------------------------------- + * OvsUpdateIPv4Header -- * Updates the IPv4 header in ovsFwdCtx.curNbl inline based on the * specified key. *---------------------------------------------------------------------------- */ -static __inline NDIS_STATUS +NDIS_STATUS OvsUpdateIPv4Header(OvsForwardingContext *ovsFwdCtx, const struct ovs_key_ipv4 *ipAttr) { @@ -2032,7 +2068,7 @@ OvsDoExecuteActions(POVS_SWITCH_CONTEXT switchContext, } } - status = OvsExecuteConntrackAction(ovsFwdCtx.curNbl, layers, + status = OvsExecuteConntrackAction(&ovsFwdCtx, layers, key, (const PNL_ATTR)a); if (status != NDIS_STATUS_SUCCESS) { OVS_LOG_ERROR("CT Action failed"); diff --git a/datapath-windows/ovsext/Actions.h b/datapath-windows/ovsext/Actions.h index c56c260..75bb8db 100644 --- a/datapath-windows/ovsext/Actions.h +++ b/datapath-windows/ovsext/Actions.h @@ -52,4 +52,81 @@ OvsDoRecirc(POVS_SWITCH_CONTEXT switchContext, UINT32 srcPortNo, OVS_PACKET_HDR_INFO *layers); +/* + * There a lot of data that needs to be maintained while executing the pipeline + * as dictated by the actions of a flow, across different functions at different + * levels. Such data is put together in a 'context' structure. Care should be + * exercised while adding new members to the structure - only add ones that get + * used across multiple stages in the pipeline/get used in multiple functions. + */ +typedef struct OvsForwardingContext { + POVS_SWITCH_CONTEXT switchContext; + /* The NBL currently used in the pipeline. */ + PNET_BUFFER_LIST curNbl; + /* NDIS forwarding detail for 'curNbl'. */ + PNDIS_SWITCH_FORWARDING_DETAIL_NET_BUFFER_LIST_INFO fwdDetail; + /* Array of destination ports for 'curNbl'. */ + PNDIS_SWITCH_FORWARDING_DESTINATION_ARRAY destinationPorts; + /* send flags while sending 'curNbl' into NDIS. */ + ULONG sendFlags; + /* Total number of output ports, used + unused, in 'curNbl'. */ + UINT32 destPortsSizeIn; + /* Total number of used output ports in 'curNbl'. */ + UINT32 destPortsSizeOut; + /* + * If 'curNbl' is not owned by OVS, they need to be tracked, if they need to + * be freed/completed. + */ + OvsCompletionList *completionList; + /* + * vport number of 'curNbl' when it is passed from the PIF bridge to the INT + * bridge. ie. during tunneling on the Rx side. + */ + UINT32 srcVportNo; + + /* + * Tunnel key: + * - specified in actions during tunneling Tx + * - extracted from an NBL during tunneling Rx + */ + OvsIPv4TunnelKey tunKey; + + /* + * Tunneling - Tx: + * To store the output port, when it is a tunneled port. We don't foresee + * multiple tunneled ports as outport for any given NBL. + */ + POVS_VPORT_ENTRY tunnelTxNic; + + /* + * Tunneling - Rx: + * Points to the Internal port on the PIF Bridge, if the packet needs to be + * de-tunneled. + */ + POVS_VPORT_ENTRY tunnelRxNic; + + /* header information */ + OVS_PACKET_HDR_INFO layers; +} OvsForwardingContext; + +PUINT8 OvsGetHeaderBySize(OvsForwardingContext *ovsFwdCtx, + UINT32 size); + +NDIS_STATUS +OvsUpdateUdpPorts(OvsForwardingContext *ovsFwdCtx, + const struct ovs_key_udp *udpAttr); + +NDIS_STATUS +OvsUpdateTcpPorts(OvsForwardingContext *ovsFwdCtx, + const struct ovs_key_tcp *tcpAttr); + +NDIS_STATUS +OvsUpdateIPv4Header(OvsForwardingContext *ovsFwdCtx, + const struct ovs_key_ipv4 *ipAttr); + +NDIS_STATUS +OvsUpdateAddressAndPort(OvsForwardingContext *ovsFwdCtx, + UINT32 newAddr, UINT16 newPort, + BOOLEAN isSource); + #endif /* __ACTIONS_H_ */ diff --git a/datapath-windows/ovsext/Conntrack.c b/datapath-windows/ovsext/Conntrack.c index 8a87dce..b22a99a 100644 --- a/datapath-windows/ovsext/Conntrack.c +++ b/datapath-windows/ovsext/Conntrack.c @@ -18,6 +18,7 @@ #include "Jhash.h" #include "PacketParser.h" #include "Event.h" +#include "Conntrack-nat.h" #pragma warning(push) #pragma warning(disable:4311) @@ -26,7 +27,7 @@ #define SEC_TO_UNIX_EPOCH 11644473600LL #define SEC_TO_NANOSEC 1000000000LL -KSTART_ROUTINE ovsConntrackEntryCleaner; +KSTART_ROUTINE OvsConntrackEntryCleaner; static PLIST_ENTRY ovsConntrackTable; static OVS_CT_THREAD_CTX ctThreadCtx; static PNDIS_RW_LOCK_EX ovsConntrackLockObj; @@ -71,7 +72,7 @@ OvsInitConntrack(POVS_SWITCH_CONTEXT context) /* Init CT Cleaner Thread */ KeInitializeEvent(&ctThreadCtx.event, NotificationEvent, FALSE); status = PsCreateSystemThread(&threadHandle, SYNCHRONIZE, NULL, NULL, - NULL, ovsConntrackEntryCleaner, + NULL, OvsConntrackEntryCleaner, &ctThreadCtx); if (status != STATUS_SUCCESS) { @@ -88,6 +89,13 @@ OvsInitConntrack(POVS_SWITCH_CONTEXT context) &ctThreadCtx.threadObject, NULL); ZwClose(threadHandle); threadHandle = NULL; + + status = OvsNatInit(context); + + if (status != STATUS_SUCCESS) { + OvsCleanupConntrack(); + return status; + } return STATUS_SUCCESS; } @@ -120,6 +128,7 @@ OvsCleanupConntrack(VOID) NdisFreeRWLock(ovsConntrackLockObj); ovsConntrackLockObj = NULL; + OvsNatCleanup(); } static __inline VOID @@ -159,30 +168,48 @@ OvsPostCtEventEntry(POVS_CT_ENTRY entry, UINT8 type) OvsPostCtEvent(&ctEventEntry); } -static __inline VOID -OvsCtAddEntry(POVS_CT_ENTRY entry, OvsConntrackKeyLookupCtx *ctx, UINT64 now) +static __inline BOOLEAN +OvsCtAddEntry(POVS_CT_ENTRY entry, OvsConntrackKeyLookupCtx *ctx, + PNAT_ACTION_INFO natInfo, UINT64 now) { NdisMoveMemory(&entry->key, &ctx->key, sizeof (OVS_CT_KEY)); - NdisMoveMemory(&entry->rev_key, &ctx->key, sizeof (OVS_CT_KEY)); + NdisMoveMemory(&entry->rev_key, &ctx->key, sizeof(OVS_CT_KEY)); OvsCtKeyReverse(&entry->rev_key); + + // NatInfo is always initialized to be disabled, so that if NAT action + // fails, we will not end up deleting an non-existent NAT entry. + if (natInfo != NULL && OvsIsForwardNat(natInfo->natAction)) { + entry->natInfo = *natInfo; + if (!OvsNatCtEntry(entry)) { + return FALSE; + } + entry->natInfo = *natInfo; + ctx->hash = OvsHashCtKey(&entry->key); + } else { + entry->natInfo.natAction = natInfo->natAction; + } + entry->timestampStart = now; InsertHeadList(&ovsConntrackTable[ctx->hash & CT_HASH_TABLE_MASK], &entry->link); OvsPostCtEventEntry(entry, OVS_EVENT_CT_NEW); ctTotalEntries++; + return TRUE; } static __inline POVS_CT_ENTRY -OvsCtEntryCreate(PNET_BUFFER_LIST curNbl, +OvsCtEntryCreate(OvsForwardingContext *fwdCtx, UINT8 ipProto, UINT32 l4Offset, OvsConntrackKeyLookupCtx *ctx, OvsFlowKey *key, + PNAT_ACTION_INFO natInfo, BOOLEAN commit, UINT64 currentTime) { POVS_CT_ENTRY entry = NULL; UINT32 state = 0; + PNET_BUFFER_LIST curNbl = fwdCtx->curNbl; switch (ipProto) { case IPPROTO_TCP: @@ -210,11 +237,8 @@ OvsCtEntryCreate(PNET_BUFFER_LIST curNbl, if (parentEntry != NULL) { entry->parent = parentEntry; } - OvsCtAddEntry(entry, ctx, currentTime); } - - OvsCtUpdateFlowKey(key, state, ctx->key.zone, 0, NULL); - return entry; + break; } case IPPROTO_ICMP: { @@ -228,33 +252,25 @@ OvsCtEntryCreate(PNET_BUFFER_LIST curNbl, state |= OVS_CS_F_NEW; if (commit) { entry = OvsConntrackCreateIcmpEntry(currentTime); - if (!entry) { - return NULL; - } - OvsCtAddEntry(entry, ctx, currentTime); } - - OvsCtUpdateFlowKey(key, state, ctx->key.zone, 0, NULL); - return entry; + break; } case IPPROTO_UDP: { state |= OVS_CS_F_NEW; if (commit) { entry = OvsConntrackCreateOtherEntry(currentTime); - if (!entry) { - return NULL; - } - OvsCtAddEntry(entry, ctx, currentTime); } - - OvsCtUpdateFlowKey(key, state, ctx->key.zone, 0, NULL); - return entry; + break; } default: goto invalid; } + if (!entry || !OvsCtAddEntry(entry, ctx, natInfo, currentTime)) { + return NULL; + } + OvsCtUpdateFlowKey(key, state, ctx->key.zone, 0, NULL); invalid: state |= OVS_CS_F_INVALID; OvsCtUpdateFlowKey(key, state, ctx->key.zone, 0, NULL); @@ -263,11 +279,11 @@ invalid: static enum CT_UPDATE_RES OvsCtUpdateEntry(OVS_CT_ENTRY* entry, - PNET_BUFFER_LIST nbl, - UINT8 ipProto, - UINT32 l4Offset, - BOOLEAN reply, - UINT64 now) + PNET_BUFFER_LIST nbl, + UINT8 ipProto, + UINT32 l4Offset, + BOOLEAN reply, + UINT64 now) { switch (ipProto) { @@ -293,6 +309,12 @@ OvsCtUpdateEntry(OVS_CT_ENTRY* entry, static __inline VOID OvsCtEntryDelete(POVS_CT_ENTRY entry) { + if (entry == NULL) { + return; + } + if (entry->natInfo.natAction) { + OvsNatDeleteKey(&entry->key); + } OvsPostCtEventEntry(entry, OVS_EVENT_CT_DELETE); RemoveEntryList(&entry->link); OvsFreeMemoryWithTag(entry, OVS_CT_POOL_TAG); @@ -302,10 +324,6 @@ OvsCtEntryDelete(POVS_CT_ENTRY entry) static __inline BOOLEAN OvsCtEntryExpired(POVS_CT_ENTRY entry) { - if (entry == NULL) { - return TRUE; - } - UINT64 currentTime; NdisGetCurrentSystemTime((LARGE_INTEGER *)¤tTime); return entry->expiration < currentTime; @@ -333,7 +351,7 @@ OvsDetectCtPacket(OvsFlowKey *key) return NDIS_STATUS_NOT_SUPPORTED; } -static __inline BOOLEAN +BOOLEAN OvsCtKeyAreSame(OVS_CT_KEY ctxKey, OVS_CT_KEY entryKey) { return ((ctxKey.src.addr.ipv4 == entryKey.src.addr.ipv4) && @@ -359,13 +377,14 @@ OvsCtIncrementCounters(POVS_CT_ENTRY entry, BOOLEAN reply, PNET_BUFFER_LIST nbl) } } -static __inline POVS_CT_ENTRY +POVS_CT_ENTRY OvsCtLookup(OvsConntrackKeyLookupCtx *ctx) { PLIST_ENTRY link; POVS_CT_ENTRY entry; BOOLEAN reply = FALSE; POVS_CT_ENTRY found = NULL; + OVS_CT_KEY key = ctx->key; if (!ctTotalEntries) { return found; @@ -374,13 +393,18 @@ OvsCtLookup(OvsConntrackKeyLookupCtx *ctx) LIST_FORALL(&ovsConntrackTable[ctx->hash & CT_HASH_TABLE_MASK], link) { entry = CONTAINING_RECORD(link, OVS_CT_ENTRY, link); - if (OvsCtKeyAreSame(ctx->key,entry->key)) { + if (OvsCtKeyAreSame(key,entry->key)) { found = entry; reply = FALSE; break; } - if (OvsCtKeyAreSame(ctx->key,entry->rev_key)) { + /* Reverse NAT must be performed before OvsCtLookup, so here + * we simply need to flip the src and dst in key and compare + * they are equal. Note that flipped key is not equal to + * rev_key due to NAT effect. */ + OvsCtKeyReverse(&key); + if (OvsCtKeyAreSame(key, entry->key)) { found = entry; reply = TRUE; break; @@ -399,17 +423,18 @@ OvsCtLookup(OvsConntrackKeyLookupCtx *ctx) return found; } -static __inline UINT32 -OvsExtractLookupCtxHash(OvsConntrackKeyLookupCtx *ctx) +UINT32 +OvsHashCtKey(const OVS_CT_KEY *key) { - UINT32 hsrc, hdst,hash; - hsrc = OvsJhashBytes((UINT32*) &ctx->key.src, sizeof(ctx->key.src), 0); - hdst = OvsJhashBytes((UINT32*) &ctx->key.dst, sizeof(ctx->key.dst), 0); + UINT32 hsrc, hdst, hash; + hsrc = OvsJhashBytes((UINT32*) &key->src, sizeof(key->src), 0); + hdst = OvsJhashBytes((UINT32*) &key->dst, sizeof(key->dst), 0); hash = hsrc ^ hdst; /* TO identify reverse traffic */ - return OvsJhashBytes((uint32_t *) &ctx->key.dst + 1, - ((uint32_t *) (&ctx->key + 1) - - (uint32_t *) (&ctx->key.dst + 1)), + hash = OvsJhashBytes((uint32_t *) &key->dst + 1, + ((uint32_t *) (key + 1) - + (uint32_t *) (&key->dst + 1)), hash); + return hash; } static UINT8 @@ -440,6 +465,7 @@ OvsCtSetupLookupCtx(OvsFlowKey *flowKey, PNET_BUFFER_LIST curNbl, UINT32 l4Offset) { + const OVS_NAT_ENTRY *natEntry; ctx->key.zone = zone; ctx->key.dl_type = flowKey->l2.dlType; ctx->related = FALSE; @@ -501,7 +527,14 @@ OvsCtSetupLookupCtx(OvsFlowKey *flowKey, return NDIS_STATUS_INVALID_PACKET; } - ctx->hash = OvsExtractLookupCtxHash(ctx); + natEntry = OvsNatLookup(&ctx->key, TRUE); + if (natEntry) { + // Translate address first for reverse NAT + ctx->key = natEntry->ctEntry->key; + OvsCtKeyReverse(&ctx->key); + } + + ctx->hash = OvsHashCtKey(&ctx->key); return NDIS_STATUS_SUCCESS; } @@ -519,16 +552,18 @@ OvsDetectFtpPacket(OvsFlowKey *key) { *---------------------------------------------------------------------------- */ static __inline POVS_CT_ENTRY -OvsProcessConntrackEntry(PNET_BUFFER_LIST curNbl, +OvsProcessConntrackEntry(OvsForwardingContext *fwdCtx, UINT32 l4Offset, OvsConntrackKeyLookupCtx *ctx, OvsFlowKey *key, UINT16 zone, + NAT_ACTION_INFO *natInfo, BOOLEAN commit, UINT64 currentTime) { POVS_CT_ENTRY entry = ctx->entry; UINT32 state = 0; + PNET_BUFFER_LIST curNbl = fwdCtx->curNbl; /* If an entry was found, update the state based on TCP flags */ if (ctx->related) { @@ -554,8 +589,8 @@ OvsProcessConntrackEntry(PNET_BUFFER_LIST curNbl, //Delete and update the Conntrack OvsCtEntryDelete(ctx->entry); ctx->entry = NULL; - entry = OvsCtEntryCreate(curNbl, key->ipKey.nwProto, l4Offset, - ctx, key, commit, currentTime); + entry = OvsCtEntryCreate(fwdCtx, key->ipKey.nwProto, l4Offset, + ctx, key, natInfo, commit, currentTime); if (!entry) { return NULL; } @@ -621,7 +656,7 @@ OvsConntrackSetLabels(OvsFlowKey *key, } static __inline NDIS_STATUS -OvsCtExecute_(PNET_BUFFER_LIST curNbl, +OvsCtExecute_(OvsForwardingContext *fwdCtx, OvsFlowKey *key, OVS_PACKET_HDR_INFO *layers, BOOLEAN commit, @@ -633,13 +668,12 @@ OvsCtExecute_(PNET_BUFFER_LIST curNbl, { NDIS_STATUS status = NDIS_STATUS_SUCCESS; POVS_CT_ENTRY entry = NULL; + PNET_BUFFER_LIST curNbl = fwdCtx->curNbl; OvsConntrackKeyLookupCtx ctx = { 0 }; LOCK_STATE_EX lockState; UINT64 currentTime; NdisGetCurrentSystemTime((LARGE_INTEGER *) ¤tTime); - /* XXX: Not referenced for now */ - UNREFERENCED_PARAMETER(natInfo); /* Retrieve the Conntrack Key related fields from packet */ OvsCtSetupLookupCtx(key, zone, &ctx, curNbl, layers->l4Offset); @@ -651,14 +685,25 @@ OvsCtExecute_(PNET_BUFFER_LIST curNbl, if (!entry) { /* If no matching entry was found, create one and add New state */ - entry = OvsCtEntryCreate(curNbl, key->ipKey.nwProto, + entry = OvsCtEntryCreate(fwdCtx, key->ipKey.nwProto, layers->l4Offset, &ctx, - key, commit, currentTime); + key, natInfo, commit, currentTime); } else { /* Process the entry and update CT flags */ OvsCtIncrementCounters(entry, ctx.reply, curNbl); - entry = OvsProcessConntrackEntry(curNbl, layers->l4Offset, &ctx, key, - zone, commit, currentTime); + entry = OvsProcessConntrackEntry(fwdCtx, layers->l4Offset, &ctx, key, + zone, natInfo, commit, currentTime); + } + + /* Note that natInfo is not the same as entry->natInfo here. natInfo + is decided by action in the openflow rule, entry->natInfo is decided + when the entry is created. In the reverse NAT case, natInfo is + NAT_ACTION_REVERSE, yet entry->natInfo is NAT_ACTION_SRC or + NAT_ACTION_DST without NAT_ACTION_REVERSE */ + if (entry && natInfo->natAction != NAT_ACTION_NONE) + { + OvsNatPacket(fwdCtx, entry, entry->natInfo.natAction, + key, ctx.reply); } if (entry && mark) { @@ -692,7 +737,7 @@ OvsCtExecute_(PNET_BUFFER_LIST curNbl, *--------------------------------------------------------------------------- */ NDIS_STATUS -OvsExecuteConntrackAction(PNET_BUFFER_LIST curNbl, +OvsExecuteConntrackAction(OvsForwardingContext *fwdCtx, OVS_PACKET_HDR_INFO *layers, OvsFlowKey *key, const PNL_ATTR a) @@ -739,12 +784,12 @@ OvsExecuteConntrackAction(PNET_BUFFER_LIST curNbl, BOOLEAN hasMaxIp = FALSE; BOOLEAN hasMaxPort = FALSE; NL_NESTED_FOR_EACH_UNSAFE (natAttr, left, ctAttr) { - enum ovs_nat_attr sub_type_nest = NlAttrType(natAttr); - switch(sub_type_nest) { + enum ovs_nat_attr subtype = NlAttrType(natAttr); + switch(subtype) { case OVS_NAT_ATTR_SRC: case OVS_NAT_ATTR_DST: natActionInfo.natAction |= - ((sub_type_nest == OVS_NAT_ATTR_SRC) + ((subtype == OVS_NAT_ATTR_SRC) ? NAT_ACTION_SRC : NAT_ACTION_DST); break; case OVS_NAT_ATTR_IP_MIN: @@ -802,19 +847,19 @@ OvsExecuteConntrackAction(PNET_BUFFER_LIST curNbl, } } - status = OvsCtExecute_(curNbl, key, layers, + status = OvsCtExecute_(fwdCtx, key, layers, commit, zone, mark, labels, helper, &natActionInfo); return status; } /* *---------------------------------------------------------------------------- - * ovsConntrackEntryCleaner + * OvsConntrackEntryCleaner * Runs periodically and cleans up the connection tracker *---------------------------------------------------------------------------- */ VOID -ovsConntrackEntryCleaner(PVOID data) +OvsConntrackEntryCleaner(PVOID data) { POVS_CT_THREAD_CTX context = (POVS_CT_THREAD_CTX)data; @@ -831,15 +876,13 @@ ovsConntrackEntryCleaner(PVOID data) } /* Set the timeout for the thread and cleanup */ - UINT64 currentTime, threadSleepTimeout; - NdisGetCurrentSystemTime((LARGE_INTEGER *)¤tTime); - threadSleepTimeout = currentTime + CT_CLEANUP_INTERVAL; + INT64 threadSleepTimeout = -CT_CLEANUP_INTERVAL; if (ctTotalEntries) { for (int i = 0; i < CT_HASH_TABLE_SIZE; i++) { LIST_FORALL_SAFE(&ovsConntrackTable[i], link, next) { entry = CONTAINING_RECORD(link, OVS_CT_ENTRY, link); - if (entry->expiration < currentTime) { + if (entry && OvsCtEntryExpired(entry)) { OvsCtEntryDelete(entry); } } @@ -880,6 +923,7 @@ OvsCtFlush(UINT16 zone) } NdisReleaseRWLock(ovsConntrackLockObj, &lockState); + OvsNatFlush(zone); return NDIS_STATUS_SUCCESS; } diff --git a/datapath-windows/ovsext/Conntrack.h b/datapath-windows/ovsext/Conntrack.h index 875c434..584c8ff 100644 --- a/datapath-windows/ovsext/Conntrack.h +++ b/datapath-windows/ovsext/Conntrack.h @@ -20,6 +20,7 @@ #include "precomp.h" #include "Flow.h" #include "Debug.h" +#include "Actions.h" #include <stddef.h> #ifdef OVS_DBG_MOD @@ -86,6 +87,14 @@ typedef struct _OVS_CT_KEY { UINT64 byteCount; } OVS_CT_KEY, *POVS_CT_KEY; +typedef struct _NAT_ACTION_INFO { + struct ct_addr minAddr; + struct ct_addr maxAddr; + uint16_t minPort; + uint16_t maxPort; + uint16_t natAction; +} NAT_ACTION_INFO, *PNAT_ACTION_INFO; + typedef struct OVS_CT_ENTRY { OVS_CT_KEY key; OVS_CT_KEY rev_key; @@ -94,6 +103,7 @@ typedef struct OVS_CT_ENTRY { UINT32 mark; UINT64 timestampStart; struct ovs_key_ct_labels labels; + NAT_ACTION_INFO natInfo; PVOID parent; /* Points to main connection */ } OVS_CT_ENTRY, *POVS_CT_ENTRY; @@ -118,14 +128,6 @@ typedef struct OvsConntrackKeyLookupCtx { BOOLEAN related; } OvsConntrackKeyLookupCtx; -typedef struct _NAT_ACTION_INFO { - struct ct_addr minAddr; - struct ct_addr maxAddr; - uint16_t minPort; - uint16_t maxPort; - uint16_t natAction; -} NAT_ACTION_INFO, *PNAT_ACTION_INFO; - #define CT_HASH_TABLE_SIZE ((UINT32)1 << 10) #define CT_HASH_TABLE_MASK (CT_HASH_TABLE_SIZE - 1) #define CT_INTERVAL_SEC 10000000LL //1s @@ -172,7 +174,7 @@ OvsGetTcpPayloadLength(PNET_BUFFER_LIST nbl) VOID OvsCleanupConntrack(VOID); NTSTATUS OvsInitConntrack(POVS_SWITCH_CONTEXT context); -NDIS_STATUS OvsExecuteConntrackAction(PNET_BUFFER_LIST curNbl, +NDIS_STATUS OvsExecuteConntrackAction(OvsForwardingContext *fwdCtx, OVS_PACKET_HDR_INFO *layers, OvsFlowKey *key, const PNL_ATTR a); @@ -225,4 +227,9 @@ NDIS_STATUS OvsCtHandleFtp(PNET_BUFFER_LIST curNbl, POVS_CT_ENTRY entry, BOOLEAN request); +UINT32 OvsHashCtKey(const OVS_CT_KEY *key); +BOOLEAN OvsCtKeyAreSame(OVS_CT_KEY ctxKey, OVS_CT_KEY entryKey); +POVS_CT_ENTRY OvsCtLookup(OvsConntrackKeyLookupCtx *ctx); + + #endif /* __OVS_CONNTRACK_H_ */ diff --git a/datapath-windows/ovsext/ovsext.vcxproj b/datapath-windows/ovsext/ovsext.vcxproj index 44aea19..75754fb 100644 --- a/datapath-windows/ovsext/ovsext.vcxproj +++ b/datapath-windows/ovsext/ovsext.vcxproj @@ -103,6 +103,7 @@ <ClInclude Include="Actions.h" /> <ClInclude Include="Atomic.h" /> <ClInclude Include="BufferMgmt.h" /> + <ClInclude Include="Conntrack-nat.h" /> <ClInclude Include="Conntrack.h" /> <ClInclude Include="Datapath.h" /> <ClInclude Include="Debug.h" /> @@ -256,6 +257,7 @@ <ItemGroup> <ClCompile Include="Actions.c" /> <ClCompile Include="BufferMgmt.c" /> + <ClCompile Include="Conntrack-nat.c" /> <ClCompile Include="Conntrack-related.c" /> <ClCompile Include="Conntrack-ftp.c" /> <ClCompile Include="Conntrack-icmp.c" /> -- 2.8.0.windows.1
_______________________________________________ dev mailing list d...@openvswitch.org https://mail.openvswitch.org/mailman/listinfo/ovs-dev