[ovs-dev] [PATCH] datapath-windows:Correct checksum for DNAT action
From: Wilson Peng While testing OVS-windows flows for the DNAT action, the checksum In TCP header is set incorrectly when TCP offload is enabled by Default. As a result, the packet will be dropped on receiver linuxVM. >>>sample flow default configuration on both Windows VM and Linux VM (src=40.0.1.2,dst=10.150.0.1) --dnat--> (src=40.0.1.2,dst==30.1.0.2) Without the fix for some TCP packet(40.0.1.2->30.1.0.2 with payload len 207) the TCP checksum will be pseduo header checksum and the value is 0x01d6. With the fix the checksum will be 0x47ee, it could be got the correct TCP checksum on the receiver Linux VM. Signed-off-by: Wilson Peng Signed-off-by: Anand Kumar --- datapath-windows/ovsext/Actions.c | 12 1 file changed, 12 insertions(+) diff --git a/datapath-windows/ovsext/Actions.c b/datapath-windows/ovsext/Actions.c index 4f43369..e130c2f 100644 --- a/datapath-windows/ovsext/Actions.c +++ b/datapath-windows/ovsext/Actions.c @@ -1550,9 +1550,21 @@ OvsUpdateAddressAndPort(OvsForwardingContext *ovsFwdCtx, if (tcpHdr) { portField = >dest; checkField = >check; +l4Offload = isTx ? (BOOLEAN)csumInfo.Transmit.TcpChecksum : +((BOOLEAN)csumInfo.Receive.TcpChecksumSucceeded || + (BOOLEAN)csumInfo.Receive.TcpChecksumFailed); } else if (udpHdr) { portField = >dest; checkField = >check; +l4Offload = isTx ? (BOOLEAN)csumInfo.Transmit.UdpChecksum : +((BOOLEAN)csumInfo.Receive.UdpChecksumSucceeded || + (BOOLEAN)csumInfo.Receive.UdpChecksumFailed); +} + + if (l4Offload) { +*checkField = IPPseudoChecksum(>saddr, , +tcpHdr ? IPPROTO_TCP : IPPROTO_UDP, +ntohs(ipHdr->tot_len) - ipHdr->ihl * 4); } } -- 2.9.3.windows.1 ___ dev mailing list d...@openvswitch.org https://mail.openvswitch.org/mailman/listinfo/ovs-dev
Re: [ovs-dev] [ovs-dev 1/1] datapath-windows: Reset ct_mark/ct_label to support ALG
Hi Jinjun, Thanks for the patch. It looks good to me, just a minor comment on the style. Thanks, Anand Kumar On 16/07/20, 10:13 AM, "Jinjun Gao" wrote: The ct_mark/ct_label setting on related connection keep the same behavior with Linux datapath. If one CT entry has parent/master entry, its ct_mark and ct_label should inherit from the corresponding part of parent/master entry at initialization. Signed-off-by: Jinjun Gao --- datapath-windows/ovsext/Conntrack.c | 87 +++-- 1 file changed, 55 insertions(+), 32 deletions(-) diff --git a/datapath-windows/ovsext/Conntrack.c b/datapath-windows/ovsext/Conntrack.c index d065591..83baf99 100644 --- a/datapath-windows/ovsext/Conntrack.c +++ b/datapath-windows/ovsext/Conntrack.c @@ -789,60 +789,83 @@ OvsProcessConntrackEntry(OvsForwardingContext *fwdCtx, static __inline VOID OvsConntrackSetMark(OvsFlowKey *key, POVS_CT_ENTRY entry, -UINT32 value, -UINT32 mask, +MD_MARK *mark, BOOLEAN *markChanged) { -UINT32 newMark; -newMark = value | (entry->mark & ~(mask)); -if (entry->mark != newMark) { +POVS_CT_ENTRY parent = entry->parent; +BOOLEAN changed = FALSE; +UINT32 newMark = 0; + +if (parent && parent->mark) { +newMark = parent->mark; +changed = TRUE; +} else if (mark) { +newMark = mark->value | (entry->mark & ~(mark->mask)); +changed = TRUE; +} + +if (changed && entry->mark != newMark) { entry->mark = newMark; key->ct.mark = newMark; *markChanged = TRUE; } } +static __inline BOOLEAN +OvsConntrackIsLabelsNonZero(const struct ovs_key_ct_labels *labels) +{ +UINT8 i; + +for (i = 0; i < OVS_CT_LABELS_LEN_32; i++) { +if (labels->ct_labels_32[i]) [Anand] : Please add braces for the if block. +return TRUE; +} + +return FALSE; +} + static __inline void OvsConntrackSetLabels(OvsFlowKey *key, POVS_CT_ENTRY entry, - struct ovs_key_ct_labels *val, - struct ovs_key_ct_labels *mask, + MD_LABELS *labels, BOOLEAN *labelChanged) { -ovs_u128 v, m, pktMdLabel = {0}; -memcpy(, val, sizeof v); -memcpy(, mask, sizeof m); -memcpy(, >labels, sizeof(struct ovs_key_ct_labels)); +POVS_CT_ENTRY parent = entry->parent; -pktMdLabel.u64.lo = v.u64.lo | (pktMdLabel.u64.lo & ~(m.u64.lo)); -pktMdLabel.u64.hi = v.u64.hi | (pktMdLabel.u64.hi & ~(m.u64.hi)); +/* Inherit master's labels at labels initialization, if any. */ +if (!OvsConntrackIsLabelsNonZero(>labels) && +parent && OvsConntrackIsLabelsNonZero(>labels)) { +memcpy(>labels, >labels, OVS_CT_LABELS_LEN); +*labelChanged = TRUE; +} + +/* Use the same computing method with Linux kernel datapath. + * It is more clean and easy understanding. + */ +if (labels && OvsConntrackIsLabelsNonZero(>mask)) { +UINT8 i; +UINT32 * dst = entry->labels.ct_labels_32; +for (i = 0; i < OVS_CT_LABELS_LEN_32; i++) { +dst[i] = (dst[i] & ~(labels->mask.ct_labels_32[i])) | + (labels->value.ct_labels_32[i] & labels->mask.ct_labels_32[i]); +} -if (!NdisEqualMemory(>labels, , - sizeof(struct ovs_key_ct_labels))) { *labelChanged = TRUE; } -NdisMoveMemory(>labels, , - sizeof(struct ovs_key_ct_labels)); -NdisMoveMemory(>ct.labels, , - sizeof(struct ovs_key_ct_labels)); + +/* Update flow key's ct labels */ +NdisMoveMemory(>ct.labels, >labels, OVS_CT_LABELS_LEN); } static void OvsCtSetMarkLabel(OvsFlowKey *key, - POVS_CT_ENTRY entry, - MD_MARK *mark, - MD_LABELS *labels, - BOOLEAN *triggerUpdateEvent) + POVS_CT_ENTRY entry, + MD_MARK *mark, + MD_LABELS *labels, + BOOLEAN *triggerUpdateEvent) { -if (mark) { -OvsConntrackSetMark(key, entry, mark->value, mark->mask, -triggerUpdateEvent); -} - -if (labels) {
Re: [ovs-dev] [PATCH]lib/stream-windows.c: Grant Access Privilege of Named Pipe to Creator
Acked-by: Anand Kumar Thanks, Anand Kumar On 1/22/20, 12:19 AM, "Ning Wu" wrote: From e42950665acee9aab941b26ebdd067ca0de908a3 Mon Sep 17 00:00:00 2001 From: Ning Wu Date: Tue, 21 Jan 2020 23:46:58 -0800 Subject: [PATCH]lib/stream-windows.c: Grant Access Privilege of Named Pipe to Creator Current implementation of ovs on windows only allows LocalSystem and Administrators to access the named pipe created with API of ovs. Thus any service that needs to invoke the API to create named pipe has to run as System account to interactive with ovs. It causes the system more vulnerable if one of those services was break into. The patch adds the creator owner account to allowed ACLs. Signed-off-by: Ning Wu --- Documentation/ref/ovsdb.7.rst | 3 ++- lib/stream-windows.c | 33 - 2 files changed, 34 insertions(+), 2 deletions(-) diff --git a/Documentation/ref/ovsdb.7.rst b/Documentation/ref/ovsdb.7.rst index b1f3f5d..da4dbed 100644 --- a/Documentation/ref/ovsdb.7.rst +++ b/Documentation/ref/ovsdb.7.rst @@ -422,7 +422,8 @@ punix: named . On Windows, listens on a local named pipe, creating a named pipe - to mimic the behavior of a Unix domain socket. + to mimic the behavior of a Unix domain socket. The ACLs of the named +pipe include LocalSystem, Administrators, and Creator Owner. All IP-based connection methods accept IPv4 and IPv6 addresses. To specify an IPv6 address, wrap it in square brackets, e.g. ``ssl:[::1]:6640``. Passive diff --git a/lib/stream-windows.c b/lib/stream-windows.c index 34bc610..5c4c55e 100644 --- a/lib/stream-windows.c +++ b/lib/stream-windows.c @@ -41,7 +41,7 @@ static void maybe_unlink_and_free(char *path); #define LOCAL_PREFIX ".\\pipe\\" /* Size of the allowed PSIDs for securing Named Pipe. */ -#define ALLOWED_PSIDS_SIZE 2 +#define ALLOWED_PSIDS_SIZE 3 /* This function has the purpose to remove all the slashes received in s. */ static char * @@ -412,6 +412,9 @@ create_pnpipe(char *name) PACL acl = NULL; PSECURITY_DESCRIPTOR psd = NULL; HANDLE npipe; +HANDLE hToken = NULL; +DWORD dwBufSize = 0; +PTOKEN_USER pTokenUsr = NULL; /* Disable access over network. */ if (!AllocateAndInitializeSid(, 1, SECURITY_NETWORK_RID, @@ -438,6 +441,32 @@ create_pnpipe(char *name) goto handle_error; } +/* Open the access token of calling process */ +if (!OpenProcessToken(GetCurrentProcess(), TOKEN_QUERY, )) { +VLOG_ERR_RL(, "Error opening access token of calling process."); +goto handle_error; +} + +/* get the buffer size buffer needed for SID */ +GetTokenInformation(hToken, TokenUser, NULL, 0, ); + +pTokenUsr = xmalloc(dwBufSize); +memset(pTokenUsr, 0, dwBufSize); + +/* Retrieve the token information in a TOKEN_USER structure. */ +if (!GetTokenInformation(hToken, TokenUser, pTokenUsr, dwBufSize, +)) { +VLOG_ERR_RL(, "Error retrieving token information."); +goto handle_error; +} +CloseHandle(hToken); + +if (!IsValidSid(pTokenUsr->User.Sid)) { +VLOG_ERR_RL(, "Invalid SID."); +goto handle_error; +} +allowedPsid[2] = pTokenUsr->User.Sid; + for (int i = 0; i < ALLOWED_PSIDS_SIZE; i++) { aclSize += sizeof(ACCESS_ALLOWED_ACE) + GetLengthSid(allowedPsid[i]) - @@ -490,11 +519,13 @@ create_pnpipe(char *name) npipe = CreateNamedPipe(name, PIPE_ACCESS_DUPLEX | FILE_FLAG_OVERLAPPED, PIPE_TYPE_MESSAGE | PIPE_READMODE_BYTE | PIPE_WAIT, 64, BUFSIZE, BUFSIZE, 0, ); +free(pTokenUsr); free(acl); free(psd); return npipe; handle_error: +free(pTokenUsr); free(acl); free(psd); return INVALID_HANDLE_VALUE; -- 2.6.2 -Original Message- From: Alin Serdean Sent: Wednesday, January 22, 2020 12:19 To: Ning Wu ; d...@openvswitch.org; Anand Kumar Cc: Lina Li ; Roy Luo Subject: RE: [PATCH] Grant Access Privilege of Named Pipe to Creator Hi, Sorry I missed the email. The direction sounds ok with me. It will surely help with unit tests, since right now they require elevated permissions. Adding also Anand in the loop. Anand do you like the idea? Please also add a few lines to the documentation so users are aware of the change. The patch as is, fails to apply. Rebase on master.
[ovs-dev] [PATCH v1] datapath-windows: Fix updating ct label when mask is specified
From: kumaranand When an existing label needs to be changed by specifing bits to be updated using mask, instead of updating only the masked bits, new label was getting overridden. This patch fixes this issue. Signed-off-by: Anand Kumar --- datapath-windows/ovsext/Conntrack.c | 1 + 1 file changed, 1 insertion(+) diff --git a/datapath-windows/ovsext/Conntrack.c b/datapath-windows/ovsext/Conntrack.c index bc00b60..ba56116 100644 --- a/datapath-windows/ovsext/Conntrack.c +++ b/datapath-windows/ovsext/Conntrack.c @@ -807,6 +807,7 @@ OvsConntrackSetLabels(OvsFlowKey *key, ovs_u128 v, m, pktMdLabel = {0}; memcpy(, val, sizeof v); memcpy(, mask, sizeof m); +memcpy(, >labels, sizeof(struct ovs_key_ct_labels)); pktMdLabel.u64.lo = v.u64.lo | (pktMdLabel.u64.lo & ~(m.u64.lo)); pktMdLabel.u64.hi = v.u64.hi | (pktMdLabel.u64.hi & ~(m.u64.hi)); -- 2.9.3.windows.1 ___ dev mailing list d...@openvswitch.org https://mail.openvswitch.org/mailman/listinfo/ovs-dev
Re: [ovs-dev] [PATCH] [windows][wmi] Switch from internal port to all ports defined
Acked-by: Anand Kumar Thanks, Anand Kumar -- Forwarded message - From: Alin Gabriel Serdean mailto:aserd...@ovn.org>> Date: Mon, Mar 25, 2019 at 3:13 AM Subject: [ovs-dev] [PATCH] [windows][wmi] Switch from internal port to all ports defined To: mailto:d...@openvswitch.org>> Cc: Danting Liu mailto:danti...@vmware.com>> This patch changes the way we try to figure out if a port is defined on a given switch. Instead of looking only in the internal ports defined switch to all ports defined. This caused issues when trying to add a Hyper-V container port to a given OVS bridge. Reported-by: Danting Liu mailto:danti...@vmware.com>> Signed-off-by: Alin Gabriel Serdean mailto:aserd...@ovn.org>> --- lib/wmi.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/lib/wmi.c b/lib/wmi.c index e6dc63cde..44c1d75e9 100644 --- a/lib/wmi.c +++ b/lib/wmi.c @@ -686,7 +686,7 @@ create_wmi_port(char *name) { /* Check if the element already exists on the switch. */ wchar_t internal_port_query[WMI_QUERY_COUNT] = L"SELECT * FROM " -L"Msvm_InternalEthernetPort WHERE ElementName = \""; +L"CIM_EthernetPort WHERE ElementName = \""; wide_name = xmalloc((strlen(name) + 1) * sizeof(wchar_t)); -- 2.21.0.windows.1 ___ dev mailing list d...@openvswitch.org<mailto:d...@openvswitch.org> https://mail.openvswitch.org/mailman/listinfo/ovs-dev<https://nam04.safelinks.protection.outlook.com/?url=https%3A%2F%2Fmail.openvswitch.org%2Fmailman%2Flistinfo%2Fovs-dev=02%7C01%7Ckumaranand%40vmware.com%7Cd35c1331377b4dd3e23508d716292b12%7Cb39138ca3cee4b4aa4d6cd83d9dd62f0%7C0%7C0%7C637002238380040756=po6WCKDv6AScLWTXk7IlQqqpLijzRiLZIuQlTqu%2BTBU%3D=0> ___ dev mailing list d...@openvswitch.org https://mail.openvswitch.org/mailman/listinfo/ovs-dev
[ovs-dev] [PATCH] datapath-windows: Copy mru information when cloning a nbl.
When a nbl is cloned, mru value stored in the original nbl context is lost, which skips refragemting the cloned nbls. This patch fixes it. Signed-off-by: Anand Kumar --- datapath-windows/ovsext/BufferMgmt.c | 15 --- 1 file changed, 8 insertions(+), 7 deletions(-) diff --git a/datapath-windows/ovsext/BufferMgmt.c b/datapath-windows/ovsext/BufferMgmt.c index 6627acf..acf3c13 100644 --- a/datapath-windows/ovsext/BufferMgmt.c +++ b/datapath-windows/ovsext/BufferMgmt.c @@ -260,14 +260,15 @@ static VOID OvsInitNBLContext(POVS_BUFFER_CONTEXT ctx, UINT16 flags, UINT32 origDataLength, - UINT32 srcPortNo) + UINT32 srcPortNo, + UINT16 mru) { ctx->magic = OVS_CTX_MAGIC; ctx->refCount = 1; ctx->flags = flags; ctx->srcPortNo = srcPortNo; ctx->origDataLength = origDataLength; -ctx->mru = 0; +ctx->mru = mru; ctx->pendingSend = 0; } @@ -434,7 +435,7 @@ OvsAllocateFixSizeNBL(PVOID ovsContext, OvsInitNBLContext(ctx, OVS_BUFFER_FROM_FIX_SIZE_POOL | OVS_BUFFER_PRIVATE_FORWARD_CONTEXT, size, - OVS_DPPORT_NUMBER_INVALID); + OVS_DPPORT_NUMBER_INVALID, 0); line = __LINE__; allocate_done: OVS_LOG_LOUD("Allocate Fix NBL: %p, line: %d", nbl, line); @@ -547,7 +548,7 @@ OvsAllocateVariableSizeNBL(PVOID ovsContext, OvsInitNBLContext(ctx, OVS_BUFFER_PRIVATE_MDL | OVS_BUFFER_PRIVATE_DATA | OVS_BUFFER_PRIVATE_FORWARD_CONTEXT | OVS_BUFFER_FROM_ZERO_SIZE_POOL, - size, OVS_DPPORT_NUMBER_INVALID); + size, OVS_DPPORT_NUMBER_INVALID, 0); OVS_LOG_LOUD("Allocate variable size NBL: %p", nbl); return nbl; @@ -600,7 +601,7 @@ OvsInitExternalNBLContext(PVOID ovsContext, * complete. */ OvsInitNBLContext(ctx, flags, NET_BUFFER_DATA_LENGTH(nb), - OVS_DPPORT_NUMBER_INVALID); + OVS_DPPORT_NUMBER_INVALID, 0); return ctx; } @@ -817,7 +818,7 @@ OvsPartialCopyNBL(PVOID ovsContext, srcNb = NET_BUFFER_LIST_FIRST_NB(nbl); ASSERT(srcNb); OvsInitNBLContext(dstCtx, flags, NET_BUFFER_DATA_LENGTH(srcNb) - copySize, - OVS_DPPORT_NUMBER_INVALID); + OVS_DPPORT_NUMBER_INVALID, srcCtx->mru); InterlockedIncrement((LONG volatile *)>refCount); @@ -1074,7 +1075,7 @@ OvsFullCopyNBL(PVOID ovsContext, OVS_BUFFER_PRIVATE_FORWARD_CONTEXT; OvsInitNBLContext(dstCtx, flags, NET_BUFFER_DATA_LENGTH(firstNb), - OVS_DPPORT_NUMBER_INVALID); + OVS_DPPORT_NUMBER_INVALID, srcCtx->mru); #ifdef DBG OvsDumpNetBufferList(nbl); -- 2.9.3.windows.1 ___ dev mailing list d...@openvswitch.org https://mail.openvswitch.org/mailman/listinfo/ovs-dev
Re: [ovs-dev] [PATCH v2] datapath-windows: Add Win10Analyze target
Thanks for the adding windows 10 analyze target. Acked-by: Anand Kumar Regards, Anand Kumar On 4/3/19, 10:48 AM, "Alin Gabriel Serdean" wrote: This patch adds a new target called `Win10Analyze` to the driver solution. It enables us to trigger static analysis over the Win10 target. Since the location of the ruleset of drivers is somewhat random starting from 1803: https://nam04.safelinks.protection.outlook.com/?url=https%3A%2F%2Fwww.osr.com%2Fblog%2F2018%2F05%2F21%2Fwdk-1803-ca%2Fdata=02%7C01%7Ckumaranand%40vmware.com%7C2580925eae8c4b9a4e4608d6b85c8b60%7Cb39138ca3cee4b4aa4d6cd83d9dd62f0%7C0%7C0%7C636899104959730588sdata=%2FLesq2PWVRbSB55zyEQbRTrsL2W07s6on8SGlhGTg0w%3Dreserved=0 Commit the ruleset inside our repository. This is the same ruleset used for 8,8.1 and 10. Signed-off-by: Alin Gabriel Serdean --- datapath-windows/Package/package.VcxProj | 21 - datapath-windows/Package/package.VcxProj.user | 5 +- datapath-windows/automake.mk | 2 + .../misc/DriverRecommendedRules.ruleset | 86 +++ datapath-windows/ovsext.sln | 5 ++ datapath-windows/ovsext/ovsext.vcxproj| 39 - datapath-windows/ovsext/ovsext.vcxproj.user | 5 +- 7 files changed, 159 insertions(+), 4 deletions(-) create mode 100644 datapath-windows/misc/DriverRecommendedRules.ruleset diff --git a/datapath-windows/Package/package.VcxProj b/datapath-windows/Package/package.VcxProj index de747eed2..5939a8816 100644 --- a/datapath-windows/Package/package.VcxProj +++ b/datapath-windows/Package/package.VcxProj @@ -9,6 +9,10 @@ Win10 Release x64 + + Win10Analyze + x64 + Win8.1 Debug x64 @@ -71,6 +75,13 @@ WindowsKernelModeDriver$(PlatformToolsetVer) Desktop + + +true +10.0 + WindowsKernelModeDriver$(PlatformToolsetVer) +Desktop + Windows8 true @@ -113,6 +124,9 @@ true + +true + DbgengKernelDebugger False @@ -148,6 +162,11 @@ true + + + true + + true @@ -185,4 +204,4 @@ - + \ No newline at end of file diff --git a/datapath-windows/Package/package.VcxProj.user b/datapath-windows/Package/package.VcxProj.user index 6231d93f7..5b0c53539 100644 --- a/datapath-windows/Package/package.VcxProj.user +++ b/datapath-windows/Package/package.VcxProj.user @@ -15,6 +15,9 @@ TestSign + +TestSign + TestSign @@ -24,4 +27,4 @@ TestSign - + \ No newline at end of file diff --git a/datapath-windows/automake.mk b/datapath-windows/automake.mk index 3820041f6..b8cf5dd95 100644 --- a/datapath-windows/automake.mk +++ b/datapath-windows/automake.mk @@ -3,6 +3,7 @@ EXTRA_DIST += \ datapath-windows/Package/package.VcxProj.user \ datapath-windows/include/OvsDpInterfaceExt.h \ datapath-windows/include/OvsDpInterfaceCtExt.h \ + datapath-windows/misc/DriverRecommendedRules.ruleset \ datapath-windows/misc/OVS.psm1 \ datapath-windows/misc/install.cmd \ datapath-windows/misc/uninstall.cmd \ @@ -86,5 +87,6 @@ EXTRA_DIST += \ datapath-windows/ovsext/resource.h datapath_windows_analyze: all + MSBuild.exe //nologo //maxcpucount datapath-windows/ovsext.sln /target:Build /property:Configuration="Win10Analyze" MSBuild.exe //nologo //maxcpucount datapath-windows/ovsext.sln /target:Build /property:Configuration="Win8.1Analyze" MSBuild.exe //nologo //maxcpucount datapath-windows/ovsext.sln /target:Build /property:Configuration="Win8Analyze" diff --git a/datapath-windows/misc/DriverRecommendedRules.ruleset b/datapath-windows/misc/DriverRecommendedRules.ruleset new file mode 100644 index 0..0faae599c --- /dev/null +++ b/datapath-windows/misc/DriverRe
Re: [ovs-dev] [PATCH] compiler: Fix compilation when using VStudio 2015/2017
Acked-by: Anand Kumar Thanks, Anand Kumar On 4/3/19, 5:02 AM, "ovs-dev-boun...@openvswitch.org on behalf of Alin Gabriel Serdean" wrote: This is somewhat a regression of: https://nam04.safelinks.protection.outlook.com/?url=https%3A%2F%2Fgithub.com%2Fopenvswitch%2Fovs%2Fcommit%2F27f141d44d95b4cabfd7eac47ace8d1201668b2cdata=02%7C01%7Ckumaranand%40vmware.com%7C8c1a3cec316246469a7408d6b82c379e%7Cb39138ca3cee4b4aa4d6cd83d9dd62f0%7C0%7C0%7C636898897407724956sdata=ALCufRrll3wawrYBVISSBDuI%2FoLONal%2FnkbH9JwrW1c%3Dreserved=0 The main issue using `offsetof` from via the C compiler from MSVC 2015/2017 has issues and is buggy: https://nam04.safelinks.protection.outlook.com/?url=https%3A%2F%2Fbit.ly%2F2UvWwtidata=02%7C01%7Ckumaranand%40vmware.com%7C8c1a3cec316246469a7408d6b82c379e%7Cb39138ca3cee4b4aa4d6cd83d9dd62f0%7C0%7C0%7C636898897407734950sdata=NqGbZONawrVrvtb57jh%2FYIRxAu4A1blZAYFOpyDtHEQ%3Dreserved=0 Until it is fixed, we define our own definition of `offsetof`. Signed-off-by: Alin Gabriel Serdean --- include/openvswitch/compiler.h | 12 1 file changed, 12 insertions(+) diff --git a/include/openvswitch/compiler.h b/include/openvswitch/compiler.h index c7cb9308d..5289a70f6 100644 --- a/include/openvswitch/compiler.h +++ b/include/openvswitch/compiler.h @@ -236,6 +236,18 @@ #define OVS_PREFETCH_WRITE(addr) #endif +/* Since Visual Studio 2015 there has been an effort to make offsetof a + * builtin_offsetof, unfortunately both implementation (the regular define and + * the built in one) are buggy and cause issues when using them via + * the C compiler. + * e.g.: https://nam04.safelinks.protection.outlook.com/?url=https%3A%2F%2Fbit.ly%2F2UvWwtidata=02%7C01%7Ckumaranand%40vmware.com%7C8c1a3cec316246469a7408d6b82c379e%7Cb39138ca3cee4b4aa4d6cd83d9dd62f0%7C0%7C0%7C636898897407734950sdata=NqGbZONawrVrvtb57jh%2FYIRxAu4A1blZAYFOpyDtHEQ%3Dreserved=0 + */ +#if _MSC_VER >= 1900 +#undef offsetof +#define offsetof(type, member) \ +((size_t)((char *)&(((type *)0)->member) - (char *)0)) +#endif + /* Build assertions. * * Use BUILD_ASSERT_DECL as a declaration or a statement, or BUILD_ASSERT as -- 2.21.0.windows.1 ___ dev mailing list d...@openvswitch.org https://nam04.safelinks.protection.outlook.com/?url=https%3A%2F%2Fmail.openvswitch.org%2Fmailman%2Flistinfo%2Fovs-devdata=02%7C01%7Ckumaranand%40vmware.com%7C8c1a3cec316246469a7408d6b82c379e%7Cb39138ca3cee4b4aa4d6cd83d9dd62f0%7C0%7C0%7C636898897407734950sdata=MmmiiH7FfvcAfTB7FUybE6icufW5vebGuY0DqKljnZ8%3Dreserved=0 ___ dev mailing list d...@openvswitch.org https://mail.openvswitch.org/mailman/listinfo/ovs-dev
[ovs-dev] [PATCH v2] datapath-windows: Do not send out nbls when cloned nbls are being accessed
As per MSDN documentation, "As soon as a filter driver calls the NdisFSendNetBufferLists function, it relinquishes ownership of the NET_BUFFER_LIST structures and all associated resources. A filter driver should never try to examine the NET_BUFFER_LIST structures or any associated data after calling NdisFSendNetBufferLists". https://docs.microsoft.com/en-us/windows-hardware/drivers/ddi/content/ndis/nf-ndis-ndisfsendnetbufferlists When freeing up memory of a cloned nbl, parent's nbl and context is being accessed, which is incorrect can cause BSOD. With this patch, original nbl is sent out only when cloned nbl is done with packet processing and its memory is freed. Signed-off-by: Anand Kumar --- v1->v2: - Remove the else block and by default try to send the packet out. --- datapath-windows/ovsext/BufferMgmt.c | 9 - datapath-windows/ovsext/BufferMgmt.h | 2 ++ datapath-windows/ovsext/PacketIO.c | 10 ++ 3 files changed, 20 insertions(+), 1 deletion(-) diff --git a/datapath-windows/ovsext/BufferMgmt.c b/datapath-windows/ovsext/BufferMgmt.c index 47d872d..6627acf 100644 --- a/datapath-windows/ovsext/BufferMgmt.c +++ b/datapath-windows/ovsext/BufferMgmt.c @@ -81,6 +81,7 @@ #include "Flow.h" #include "Offload.h" #include "NetProto.h" +#include "PacketIO.h" #include "PacketParser.h" #include "Switch.h" #include "Vport.h" @@ -267,6 +268,7 @@ OvsInitNBLContext(POVS_BUFFER_CONTEXT ctx, ctx->srcPortNo = srcPortNo; ctx->origDataLength = origDataLength; ctx->mru = 0; +ctx->pendingSend = 0; } @@ -1746,8 +1748,13 @@ OvsCompleteNBL(PVOID switch_ctx, if (parent != NULL) { ctx = (POVS_BUFFER_CONTEXT)NET_BUFFER_LIST_CONTEXT_DATA_START(parent); ASSERT(ctx && ctx->magic == OVS_CTX_MAGIC); +UINT16 pendingSend = 1, exchange = 0; value = InterlockedDecrement((LONG volatile *)>refCount); -if (value == 0) { +InterlockedCompareExchange16((SHORT volatile *), exchange, (SHORT)ctx->pendingSend); +if (value == 1 && pendingSend == exchange) { +InterlockedExchange16((SHORT volatile *)>pendingSend, 0); +OvsSendNBLIngress(context, parent, ctx->sendFlags); +} else if (value == 0){ return OvsCompleteNBL(context, parent, FALSE); } } diff --git a/datapath-windows/ovsext/BufferMgmt.h b/datapath-windows/ovsext/BufferMgmt.h index 2a74988..2ae3272 100644 --- a/datapath-windows/ovsext/BufferMgmt.h +++ b/datapath-windows/ovsext/BufferMgmt.h @@ -55,7 +55,9 @@ typedef union _OVS_BUFFER_CONTEXT { UINT32 origDataLength; UINT32 dataOffsetDelta; }; +ULONG sendFlags; UINT16 mru; +UINT16 pendingSend; /* Indicates packet can be sent or not. */ }; CHAR value[MEM_ALIGN_SIZE(sizeof(struct dummy))]; diff --git a/datapath-windows/ovsext/PacketIO.c b/datapath-windows/ovsext/PacketIO.c index 57c583c..cc08407 100644 --- a/datapath-windows/ovsext/PacketIO.c +++ b/datapath-windows/ovsext/PacketIO.c @@ -161,6 +161,16 @@ OvsSendNBLIngress(POVS_SWITCH_CONTEXT switchContext, ASSERT(switchContext->dataFlowState == OvsSwitchRunning); +POVS_BUFFER_CONTEXT ctx = (POVS_BUFFER_CONTEXT)NET_BUFFER_LIST_CONTEXT_DATA_START(netBufferLists); +LONG refCount = 1, exchange = 0; +InterlockedCompareExchange((LONG volatile *), exchange, (LONG)ctx->refCount); +if (refCount != exchange) { +InterlockedExchange((LONG volatile *)>sendFlags, sendFlags); +InterlockedExchange16((SHORT volatile *)>pendingSend, 1); +return; +} + +InterlockedExchange16((SHORT volatile *)>pendingSend, 0); NdisFSendNetBufferLists(switchContext->NdisFilterHandle, netBufferLists, NDIS_DEFAULT_PORT_NUMBER, sendFlags); } -- 2.9.3.windows.1 ___ dev mailing list d...@openvswitch.org https://mail.openvswitch.org/mailman/listinfo/ovs-dev
[ovs-dev] [PATCH v1] datapath-windows: Do not send out nbls when cloned nbls are being accessed
As per MSDN documentation, "As soon as a filter driver calls the NdisFSendNetBufferLists function, it relinquishes ownership of the NET_BUFFER_LIST structures and all associated resources. A filter driver should never try to examine the NET_BUFFER_LIST structures or any associated data after calling NdisFSendNetBufferLists". https://docs.microsoft.com/en-us/windows-hardware/drivers/ddi/content/ndis/nf-ndis-ndisfsendnetbufferlists When freeing up memory of a cloned nbl, parent's nbl and context is being accessed, which is incorrect can cause BSOD. With this patch, original nbl is sent out only when cloned nbl is done with packet processing and its memory is freed. Signed-off-by: Anand Kumar Change-Id: Ie662133a6fcd5a26ca3c87d31c9cee1fc56c2d27 --- datapath-windows/ovsext/BufferMgmt.c | 9 - datapath-windows/ovsext/BufferMgmt.h | 2 ++ datapath-windows/ovsext/PacketIO.c | 13 +++-- 3 files changed, 21 insertions(+), 3 deletions(-) diff --git a/datapath-windows/ovsext/BufferMgmt.c b/datapath-windows/ovsext/BufferMgmt.c index 47d872d..6627acf 100644 --- a/datapath-windows/ovsext/BufferMgmt.c +++ b/datapath-windows/ovsext/BufferMgmt.c @@ -81,6 +81,7 @@ #include "Flow.h" #include "Offload.h" #include "NetProto.h" +#include "PacketIO.h" #include "PacketParser.h" #include "Switch.h" #include "Vport.h" @@ -267,6 +268,7 @@ OvsInitNBLContext(POVS_BUFFER_CONTEXT ctx, ctx->srcPortNo = srcPortNo; ctx->origDataLength = origDataLength; ctx->mru = 0; +ctx->pendingSend = 0; } @@ -1746,8 +1748,13 @@ OvsCompleteNBL(PVOID switch_ctx, if (parent != NULL) { ctx = (POVS_BUFFER_CONTEXT)NET_BUFFER_LIST_CONTEXT_DATA_START(parent); ASSERT(ctx && ctx->magic == OVS_CTX_MAGIC); +UINT16 pendingSend = 1, exchange = 0; value = InterlockedDecrement((LONG volatile *)>refCount); -if (value == 0) { +InterlockedCompareExchange16((SHORT volatile *), exchange, (SHORT)ctx->pendingSend); +if (value == 1 && pendingSend == exchange) { +InterlockedExchange16((SHORT volatile *)>pendingSend, 0); +OvsSendNBLIngress(context, parent, ctx->sendFlags); +} else if (value == 0){ return OvsCompleteNBL(context, parent, FALSE); } } diff --git a/datapath-windows/ovsext/BufferMgmt.h b/datapath-windows/ovsext/BufferMgmt.h index 2a74988..2ae3272 100644 --- a/datapath-windows/ovsext/BufferMgmt.h +++ b/datapath-windows/ovsext/BufferMgmt.h @@ -55,7 +55,9 @@ typedef union _OVS_BUFFER_CONTEXT { UINT32 origDataLength; UINT32 dataOffsetDelta; }; +ULONG sendFlags; UINT16 mru; +UINT16 pendingSend; /* Indicates packet can be sent or not. */ }; CHAR value[MEM_ALIGN_SIZE(sizeof(struct dummy))]; diff --git a/datapath-windows/ovsext/PacketIO.c b/datapath-windows/ovsext/PacketIO.c index 57c583c..56876f2 100644 --- a/datapath-windows/ovsext/PacketIO.c +++ b/datapath-windows/ovsext/PacketIO.c @@ -161,8 +161,17 @@ OvsSendNBLIngress(POVS_SWITCH_CONTEXT switchContext, ASSERT(switchContext->dataFlowState == OvsSwitchRunning); -NdisFSendNetBufferLists(switchContext->NdisFilterHandle, netBufferLists, -NDIS_DEFAULT_PORT_NUMBER, sendFlags); +POVS_BUFFER_CONTEXT ctx = (POVS_BUFFER_CONTEXT)NET_BUFFER_LIST_CONTEXT_DATA_START(netBufferLists); +LONG refCount = 1, exchange = 0; +InterlockedCompareExchange((LONG volatile *), exchange, (LONG)ctx->refCount); +if (refCount != exchange) { +InterlockedExchange((LONG volatile *)>sendFlags, sendFlags); +InterlockedExchange16((SHORT volatile *)>pendingSend, 1); +} else { +InterlockedExchange16((SHORT volatile *)>pendingSend, 0); +NdisFSendNetBufferLists(switchContext->NdisFilterHandle, netBufferLists, +NDIS_DEFAULT_PORT_NUMBER, sendFlags); +} } static __inline VOID -- 2.9.3.windows.1 ___ dev mailing list d...@openvswitch.org https://mail.openvswitch.org/mailman/listinfo/ovs-dev
[ovs-dev] [PATCH] datapath-windows: Fix vlan key getting stored in host byte order.
Update flowkey to set vlan information in network byte order. Signed-off-by: Anand Kumar --- datapath-windows/ovsext/Flow.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/datapath-windows/ovsext/Flow.c b/datapath-windows/ovsext/Flow.c index 7994786..fdb1010 100644 --- a/datapath-windows/ovsext/Flow.c +++ b/datapath-windows/ovsext/Flow.c @@ -2350,8 +2350,8 @@ OvsExtractFlow(const NET_BUFFER_LIST *packet, } else { if (eth->dix.typeNBO == ETH_TYPE_802_1PQ_NBO) { Eth_802_1pq_Tag *tag= (Eth_802_1pq_Tag *)>dix.typeNBO; -flow->l2.vlanKey.vlanTci = ((UINT16)tag->priority << 13) | -OVSWIN_VLAN_CFI | ((UINT16)tag->vidHi << 8) | tag->vidLo; +flow->l2.vlanKey.vlanTci = htons(((UINT16)tag->priority << 13) | +OVSWIN_VLAN_CFI | ((UINT16)tag->vidHi << 8) | tag->vidLo); flow->l2.vlanKey.vlanTpid = htons(ETH_TYPE_802_1PQ); offset = sizeof (Eth_802_1pq_Tag); } else { -- 2.9.3.windows.1 ___ dev mailing list d...@openvswitch.org https://mail.openvswitch.org/mailman/listinfo/ovs-dev
[ovs-dev] [PATCH v2] datapath-windows: Address memory allocation issues for OVS_BUFFER_CONTEXT
With current implementation, when nbl pool is allocated, context size is specified as 64 bytes, while the OVS_BUFFER_CONTEXT size is only 32 bytes. Since context size is never changed, additional memory is not required. This patch makes it simpler to allocate memory for OVS_BUFFER_CONTEXT so that it is always aligned to MEMORY_ALLOCATION_ALIGNMENT. This is acheived by updating "value" field in the context structure, so that number of elements in array is always a multiple of MEMORY_ALLOCATION_ALIGNMENT. Also change the DEFAULT_CONTEXT_SIZE to accomodate OVS_BUFFER_CONTEXT size. Signed-off-by: Anand Kumar --- datapath-windows/ovsext/BufferMgmt.h | 11 --- 1 file changed, 4 insertions(+), 7 deletions(-) diff --git a/datapath-windows/ovsext/BufferMgmt.h b/datapath-windows/ovsext/BufferMgmt.h index dcf310a..2a74988 100644 --- a/datapath-windows/ovsext/BufferMgmt.h +++ b/datapath-windows/ovsext/BufferMgmt.h @@ -20,11 +20,8 @@ #define MEM_ALIGN MEMORY_ALLOCATION_ALIGNMENT #define MEM_ALIGN_SIZE(_x) ((MEM_ALIGN - 1 + (_x))/MEM_ALIGN * MEM_ALIGN) #define OVS_CTX_MAGIC 0xabcd - -#define OVS_DEFAULT_NBL_CONTEXT_SIZEMEM_ALIGN_SIZE(64) -#define OVS_DEFAULT_NBL_CONTEXT_FILL\ - (OVS_DEFAULT_NBL_CONTEXT_SIZE - sizeof (OVS_BUFFER_CONTEXT)) - +#define OVS_DEFAULT_NBL_CONTEXT_SIZEsizeof(OVS_BUFFER_CONTEXT) +#define OVS_DEFAULT_NBL_CONTEXT_FILL0 #define OVS_DEFAULT_DATA_SIZE 256 #define OVS_DEFAULT_HEADROOM_SIZE 128 #define OVS_FIX_NBL_DATA_SIZE(OVS_DEFAULT_DATA_SIZE + OVS_DEFAULT_HEADROOM_SIZE) @@ -49,7 +46,7 @@ enum { }; typedef union _OVS_BUFFER_CONTEXT { -struct { +struct dummy { UINT16 magic; UINT16 flags; UINT32 srcPortNo; @@ -61,7 +58,7 @@ typedef union _OVS_BUFFER_CONTEXT { UINT16 mru; }; -UINT64 value[MEM_ALIGN_SIZE(32) >> 3]; +CHAR value[MEM_ALIGN_SIZE(sizeof(struct dummy))]; } OVS_BUFFER_CONTEXT, *POVS_BUFFER_CONTEXT; typedef struct _OVS_NBL_POOL { -- 2.9.3.windows.1 ___ dev mailing list d...@openvswitch.org https://mail.openvswitch.org/mailman/listinfo/ovs-dev
[ovs-dev] [PATCH] datapath-windows: Address memory allocation issues for OVS_BUFFER_CONTEXT
With current implementation, when nbl pool is allocated, context size is specified as 64 bytes, while the OVS_BUFFER_CONTEXT size is only 32 bytes. Since context size is never changed, additional memory is not required. This patch makes it simpler to allocate memory for OVS_BUFFER_CONTEXT so that it is always aligned to MEMORY_ALLOCATION_ALIGNMENT. This is acheived by updating "value" field in the context structure, so that number of elements in array is always a multiple of MEMORY_ALLOCATION_ALIGNMENT. Also change the DEFAULT_CONTEXT_SIZE to accomodate OVS_BUFFER_CONTEXT size. Signed-off-by: Anand Kumar --- datapath-windows/ovsext/BufferMgmt.h | 11 --- 1 file changed, 4 insertions(+), 7 deletions(-) diff --git a/datapath-windows/ovsext/BufferMgmt.h b/datapath-windows/ovsext/BufferMgmt.h index dcf310a..714ab3d 100644 --- a/datapath-windows/ovsext/BufferMgmt.h +++ b/datapath-windows/ovsext/BufferMgmt.h @@ -20,11 +20,8 @@ #define MEM_ALIGN MEMORY_ALLOCATION_ALIGNMENT #define MEM_ALIGN_SIZE(_x) ((MEM_ALIGN - 1 + (_x))/MEM_ALIGN * MEM_ALIGN) #define OVS_CTX_MAGIC 0xabcd - -#define OVS_DEFAULT_NBL_CONTEXT_SIZEMEM_ALIGN_SIZE(64) -#define OVS_DEFAULT_NBL_CONTEXT_FILL\ - (OVS_DEFAULT_NBL_CONTEXT_SIZE - sizeof (OVS_BUFFER_CONTEXT)) - +#define OVS_DEFAULT_NBL_CONTEXT_SIZEsizeof(OVS_BUFFER_CONTEXT)) +#define OVS_DEFAULT_NBL_CONTEXT_FILL0 #define OVS_DEFAULT_DATA_SIZE 256 #define OVS_DEFAULT_HEADROOM_SIZE 128 #define OVS_FIX_NBL_DATA_SIZE(OVS_DEFAULT_DATA_SIZE + OVS_DEFAULT_HEADROOM_SIZE) @@ -49,7 +46,7 @@ enum { }; typedef union _OVS_BUFFER_CONTEXT { -struct { +struct dummy { UINT16 magic; UINT16 flags; UINT32 srcPortNo; @@ -61,7 +58,7 @@ typedef union _OVS_BUFFER_CONTEXT { UINT16 mru; }; -UINT64 value[MEM_ALIGN_SIZE(32) >> 3]; +CHAR value[MEM_ALIGN_SIZE(sizeof(struct dummy))]; } OVS_BUFFER_CONTEXT, *POVS_BUFFER_CONTEXT; typedef struct _OVS_NBL_POOL { -- 2.9.3.windows.1 ___ dev mailing list d...@openvswitch.org https://mail.openvswitch.org/mailman/listinfo/ovs-dev
Re: [ovs-dev] [PATCH] datapath-windows: Fix nbl cleanup when memory allocation fails
Acked-by: Anand Kumar Thanks, Anand Kumar On 3/8/19, 1:23 PM, "ovs-dev-boun...@openvswitch.org on behalf of Sairam Venugopal via dev" wrote: StartNblIngressError should be called only when an NBL hasn't been modified. In this case the nbl context was initialized. Rely on existing packet completion mechanism to cleanup the NBL. Found while testing with DriverVerifier with limited memory setting enabled. Signed-off-by: Sairam Venugopal --- datapath-windows/ovsext/PacketIO.c | 5 ++--- 1 file changed, 2 insertions(+), 3 deletions(-) diff --git a/datapath-windows/ovsext/PacketIO.c b/datapath-windows/ovsext/PacketIO.c index 38e3e5f..57c583c 100644 --- a/datapath-windows/ovsext/PacketIO.c +++ b/datapath-windows/ovsext/PacketIO.c @@ -283,9 +283,8 @@ OvsStartNBLIngress(POVS_SWITCH_CONTEXT switchContext, RtlInitUnicodeString(, L"Cannot allocate NBLs with single NB."); -OvsStartNBLIngressError(switchContext, curNbl, -sendCompleteFlags, , -NDIS_STATUS_RESOURCES); +OvsAddPktCompletionList(, TRUE, sourcePort, +curNbl, 0, ); continue; } -- 2.9.0.windows.1 ___ dev mailing list d...@openvswitch.org https://nam04.safelinks.protection.outlook.com/?url=https%3A%2F%2Fmail.openvswitch.org%2Fmailman%2Flistinfo%2Fovs-devdata=02%7C01%7Ckumaranand%40vmware.com%7Cf88cb293def34509deba08d6a40c3a25%7Cb39138ca3cee4b4aa4d6cd83d9dd62f0%7C0%7C0%7C636876769821233618sdata=zY5NZ0d2F9esM76HXvWahDOOFLSQ%2FFBUqioPBJ8Tdqo%3Dreserved=0 ___ dev mailing list d...@openvswitch.org https://mail.openvswitch.org/mailman/listinfo/ovs-dev
Re: [ovs-dev] [PATCH] datapath-windows: Guard vport usage in user.c
Acked-by: Anand Kumar Thanks, Anand Kumar On 2/27/19, 6:10 AM, "ovs-dev-boun...@openvswitch.org on behalf of Alin Gabriel Serdean" wrote: When using a vport we need to guard its usage with the dispatch lock. Signed-off-by: Alin Gabriel Serdean --- datapath-windows/ovsext/User.c | 16 1 file changed, 8 insertions(+), 8 deletions(-) diff --git a/datapath-windows/ovsext/User.c b/datapath-windows/ovsext/User.c index b43d7cc04..ed1fcbea8 100644 --- a/datapath-windows/ovsext/User.c +++ b/datapath-windows/ovsext/User.c @@ -452,14 +452,6 @@ OvsExecuteDpIoctl(OvsPacketExecute *execute) } fwdDetail = NET_BUFFER_LIST_SWITCH_FORWARDING_DETAIL(pNbl); -vport = OvsFindVportByPortNo(gOvsSwitchContext, execute->inPort); -if (vport) { -fwdDetail->SourcePortId = vport->portId; -fwdDetail->SourceNicIndex = vport->nicIndex; -} else { -fwdDetail->SourcePortId = NDIS_SWITCH_DEFAULT_PORT_ID; -fwdDetail->SourceNicIndex = 0; -} // XXX: Figure out if any of the other members of fwdDetail need to be set. status = OvsGetFlowMetadata(, execute->keyAttrs); @@ -502,6 +494,14 @@ OvsExecuteDpIoctl(OvsPacketExecute *execute) if (ndisStatus == NDIS_STATUS_SUCCESS) { NdisAcquireRWLockRead(gOvsSwitchContext->dispatchLock, , 0); +vport = OvsFindVportByPortNo(gOvsSwitchContext, execute->inPort); +if (vport) { +fwdDetail->SourcePortId = vport->portId; +fwdDetail->SourceNicIndex = vport->nicIndex; +} else { +fwdDetail->SourcePortId = NDIS_SWITCH_DEFAULT_PORT_ID; +fwdDetail->SourceNicIndex = 0; +} ndisStatus = OvsActionsExecute(gOvsSwitchContext, NULL, pNbl, vport ? vport->portNo : OVS_DPPORT_NUMBER_INVALID, -- 2.16.1.windows.1 ___ dev mailing list d...@openvswitch.org https://na01.safelinks.protection.outlook.com/?url=https%3A%2F%2Fmail.openvswitch.org%2Fmailman%2Flistinfo%2Fovs-devdata=02%7C01%7Ckumaranand%40vmware.com%7C0bd42693441d43f12bfa08d69cbd589d%7Cb39138ca3cee4b4aa4d6cd83d9dd62f0%7C0%7C0%7C636868734386921985sdata=KCb%2FeQOwJIyXj6C7ZS9QePNUW6CZp7CwBWJ1obSHMvA%3Dreserved=0 ___ dev mailing list d...@openvswitch.org https://mail.openvswitch.org/mailman/listinfo/ovs-dev
Re: [ovs-dev] [PATCH] datapath-windows: Add annotations to find vport functions
Acked-by: Anand Kumar Thanks, Anand Kumar On 2/27/19, 9:34 AM, "ovs-dev-boun...@openvswitch.org on behalf of Alin Gabriel Serdean" wrote: Add annotations to find vport functions to check if the dispatch lock is held. Signed-off-by: Alin Gabriel Serdean --- datapath-windows/ovsext/Vport.c | 7 ++- datapath-windows/ovsext/Vport.h | 5 + 2 files changed, 11 insertions(+), 1 deletion(-) diff --git a/datapath-windows/ovsext/Vport.c b/datapath-windows/ovsext/Vport.c index e08cb90ce..f79324d28 100644 --- a/datapath-windows/ovsext/Vport.c +++ b/datapath-windows/ovsext/Vport.c @@ -135,7 +135,7 @@ HvCreatePort(POVS_SWITCH_CONTEXT switchContext, * Lookup by port name to see if this port with this name had been added * (and deleted) previously. */ -vport = OvsFindVportByHvNameW(gOvsSwitchContext, +vport = OvsFindVportByHvNameW(switchContext, portParam->PortFriendlyName.String, portParam->PortFriendlyName.Length); if (vport && vport->isAbsentOnHv == FALSE) { @@ -693,6 +693,7 @@ done: /* * OVS Vport related functionality. */ +_Use_decl_annotations_ POVS_VPORT_ENTRY OvsFindVportByPortNo(POVS_SWITCH_CONTEXT switchContext, UINT32 portNo) @@ -787,6 +788,7 @@ OvsFindTunnelVportByPortType(POVS_SWITCH_CONTEXT switchContext, return NULL; } +_Use_decl_annotations_ POVS_VPORT_ENTRY OvsFindVportByOvsName(POVS_SWITCH_CONTEXT switchContext, PSTR name) @@ -810,6 +812,7 @@ OvsFindVportByOvsName(POVS_SWITCH_CONTEXT switchContext, } /* OvsFindVportByHvName: "name" is assumed to be null-terminated */ +_Use_decl_annotations_ POVS_VPORT_ENTRY OvsFindVportByHvNameW(POVS_SWITCH_CONTEXT switchContext, PWSTR wsName, SIZE_T wstrSize) @@ -862,6 +865,7 @@ Cleanup: return vport; } +_Use_decl_annotations_ POVS_VPORT_ENTRY OvsFindVportByHvNameA(POVS_SWITCH_CONTEXT switchContext, PSTR name) @@ -884,6 +888,7 @@ OvsFindVportByHvNameA(POVS_SWITCH_CONTEXT switchContext, return vport; } +_Use_decl_annotations_ POVS_VPORT_ENTRY OvsFindVportByPortIdAndNicIndex(POVS_SWITCH_CONTEXT switchContext, NDIS_SWITCH_PORT_ID portId, diff --git a/datapath-windows/ovsext/Vport.h b/datapath-windows/ovsext/Vport.h index 7d88f86fb..32cbf8bcc 100644 --- a/datapath-windows/ovsext/Vport.h +++ b/datapath-windows/ovsext/Vport.h @@ -122,15 +122,20 @@ typedef struct _OVS_VPORT_ENTRY { struct _OVS_SWITCH_CONTEXT; +_Requires_lock_held_(switchContext->dispatchLock) POVS_VPORT_ENTRY OvsFindVportByPortNo(POVS_SWITCH_CONTEXT switchContext, UINT32 portNo); /* "name" is null-terminated */ +_Requires_lock_held_(switchContext->dispatchLock) POVS_VPORT_ENTRY OvsFindVportByOvsName(POVS_SWITCH_CONTEXT switchContext, PSTR name); +_Requires_lock_held_(switchContext->dispatchLock) POVS_VPORT_ENTRY OvsFindVportByHvNameA(POVS_SWITCH_CONTEXT switchContext, PSTR name); +_Requires_lock_held_(switchContext->dispatchLock) POVS_VPORT_ENTRY OvsFindVportByHvNameW(POVS_SWITCH_CONTEXT switchContext, PWSTR wsName, SIZE_T wstrSize); +_Requires_lock_held_(switchContext->dispatchLock) POVS_VPORT_ENTRY OvsFindVportByPortIdAndNicIndex(POVS_SWITCH_CONTEXT switchContext, NDIS_SWITCH_PORT_ID portId, NDIS_SWITCH_NIC_INDEX index); -- 2.16.1.windows.1 ___ dev mailing list d...@openvswitch.org https://na01.safelinks.protection.outlook.com/?url=https%3A%2F%2Fmail.openvswitch.org%2Fmailman%2Flistinfo%2Fovs-devdata=02%7C01%7Ckumaranand%40vmware.com%7C5ae778841e0345fbaf6608d69cd9d189%7Cb39138ca3cee4b4aa4d6cd83d9dd62f0%7C0%7C0%7C636868856711003678sdata=fUVDFay5I8qBsqoUyiCckwN%2BrEv8BtFIjHgOIwuNMYY%3Dreserved=0 ___ dev mailing list d...@openvswitch.org https://mail.openvswitch.org/mailman/listinfo/ovs-dev
[ovs-dev] [PATCH] faq: Update features supported on Hyper-V
These features were added a while back, so updating the documentation. Signed-off-by: Anand Kumar --- Documentation/faq/releases.rst | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/Documentation/faq/releases.rst b/Documentation/faq/releases.rst index fd45efd..cd5aad1 100644 --- a/Documentation/faq/releases.rst +++ b/Documentation/faq/releases.rst @@ -110,8 +110,8 @@ Q: Are all features available with all datapaths? == == == = === Connection tracking 4.3YES YES YES Conntrack Fragment Reass. 4.3YES YES YES -NAT 4.6YES YES NO -Conntrack zone limit4.18 YES NO NO +NAT 4.6YES YES YES +Conntrack zone limit4.18 YES NO YES Tunnel - LISP NO YES NO NO Tunnel - STTNO YES NO YES Tunnel - GRE3.11 YES YES YES @@ -125,7 +125,7 @@ Q: Are all features available with all datapaths? QoS - Policing YESYES YES NO QoS - Shaping YESYES NO NO sFlow YESYES YES NO -IPFIX 3.10 YES YES NO +IPFIX 3.10 YES YES YES Set action YESYES YESPARTIAL NIC Bonding YESYES YES YES Multiple VTEPs YESYES YES YES -- 2.9.3.windows.1 ___ dev mailing list d...@openvswitch.org https://mail.openvswitch.org/mailman/listinfo/ovs-dev
[ovs-dev] [PATCH] datapath-windows: Add support for 'OVS_KEY_ATTR_ENCAP' key attribute.
Add a new structure in l2 header to accomodate vlan header, based of commit "d7efce7beff25052bd9083419200e1a47f0d6066 datapath: 802.1AD Flow handling, actions, vlan parsing, netlink attributes" Also reset vlan header in flow key, after deleting vlan tag from nbl With this change a sample vlan flow would look like, eth(src=0a:ea:8a:24:03:86,dst=0a:cd:fa:4d:15:5c),in_port(3),eth_type(0x8100), vlan(vid=2239,pcp=0),encap(eth_type(0x0800),ipv4(src=13.12.11.149,dst=13.12.11.107, proto=1,tos=0,ttl=128,frag=no),icmp(type=8,code=0)) Signed-off-by: Anand Kumar --- datapath-windows/ovsext/Actions.c| 3 + datapath-windows/ovsext/DpInternal.h | 12 +++- datapath-windows/ovsext/Flow.c | 126 +++ datapath-windows/ovsext/User.c | 19 ++ 4 files changed, 144 insertions(+), 16 deletions(-) diff --git a/datapath-windows/ovsext/Actions.c b/datapath-windows/ovsext/Actions.c index 6922f05..5c9b5c3 100644 --- a/datapath-windows/ovsext/Actions.c +++ b/datapath-windows/ovsext/Actions.c @@ -2057,6 +2057,9 @@ OvsDoExecuteActions(POVS_SWITCH_CONTEXT switchContext, goto dropit; } } +/* Reset vlan header info in flowkey. */ +key->l2.vlanKey.vlanTci = 0; +key->l2.vlanKey.vlanTpid = 0; break; } diff --git a/datapath-windows/ovsext/DpInternal.h b/datapath-windows/ovsext/DpInternal.h index 3e351b7..58e7ed8 100644 --- a/datapath-windows/ovsext/DpInternal.h +++ b/datapath-windows/ovsext/DpInternal.h @@ -112,6 +112,11 @@ typedef struct Icmp6Key { struct in6_addr ndTarget;/* IPv6 neighbor discovery (ND) target. */ } Icmp6Key; /* Size of 72 byte. */ +typedef struct VlanKey { +ovs_be16 vlanTci;/* If 802.1Q, TCI | VLAN_CFI; otherwise 0. */ +ovs_be16 vlanTpid; /* Vlan type. Generally 802.1q or 802.1ad.*/ +} VlanKey; + typedef struct L2Key { uint32_t inPort; /* Port number of input port. */ union { @@ -123,9 +128,10 @@ typedef struct L2Key { }; uint8_t dlSrc[6];/* Ethernet source address. */ uint8_t dlDst[6];/* Ethernet destination address. */ -ovs_be16 vlanTci;/* If 802.1Q, TCI | VLAN_CFI; otherwise 0. */ ovs_be16 dlType; /* Ethernet frame type. */ -} L2Key; /* Size of 24 byte. */ +struct VlanKey vlanKey; /* VLAN header. */ +uint16_t pad[3]; /* Padding 6 bytes. */ +} L2Key; /* Size of 32 byte. */ /* Number of packet attributes required to store OVS tunnel key. */ #define NUM_PKT_ATTR_REQUIRED 35 @@ -182,7 +188,7 @@ typedef struct MplsKey { typedef __declspec(align(8)) struct OvsFlowKey { OvsIPv4TunnelKey tunKey; /* 280 bytes */ -L2Key l2;/* 24 bytes */ +L2Key l2;/* 32 bytes */ union { /* These headers are mutually exclusive. */ IpKey ipKey; /* size 16 */ diff --git a/datapath-windows/ovsext/Flow.c b/datapath-windows/ovsext/Flow.c index f880987..7994786 100644 --- a/datapath-windows/ovsext/Flow.c +++ b/datapath-windows/ovsext/Flow.c @@ -115,7 +115,7 @@ const NL_POLICY nlFlowKeyPolicy[] = { [OVS_KEY_ATTR_PRIORITY] = {.type = NL_A_UNSPEC, .minLen = 4, .maxLen = 4, .optional = TRUE}, [OVS_KEY_ATTR_IN_PORT] = {.type = NL_A_UNSPEC, .minLen = 4, - .maxLen = 4, .optional = FALSE}, + .maxLen = 4, .optional = TRUE}, [OVS_KEY_ATTR_ETHERNET] = {.type = NL_A_UNSPEC, .minLen = sizeof(struct ovs_key_ethernet), .maxLen = sizeof(struct ovs_key_ethernet), @@ -457,6 +457,7 @@ _FlowNlGetCmdHandler(POVS_USER_PARAMS_CONTEXT usrParamsCtx, NL_BUFFER nlBuf; PNL_ATTR keyAttrs[__OVS_KEY_ATTR_MAX]; PNL_ATTR tunnelAttrs[__OVS_TUNNEL_KEY_ATTR_MAX]; +PNL_ATTR encapAttrs[__OVS_KEY_ATTR_MAX]; NlBufInit(, usrParamsCtx->outputBuffer, usrParamsCtx->outputLength); @@ -464,6 +465,7 @@ _FlowNlGetCmdHandler(POVS_USER_PARAMS_CONTEXT usrParamsCtx, RtlZeroMemory(, sizeof(OvsFlowGetOutput)); UINT32 keyAttrOffset = 0; UINT32 tunnelKeyAttrOffset = 0; +UINT32 encapOffset = 0; BOOLEAN ok; NL_ERROR nlError = NL_ERROR_SUCCESS; @@ -503,6 +505,23 @@ _FlowNlGetCmdHandler(POVS_USER_PARAMS_CONTEXT usrParamsCtx, goto done; } +if (keyAttrs[OVS_KEY_ATTR_ENCAP]) { +encapOffset = (UINT32)((PCHAR) (keyAttrs[OVS_KEY_ATTR_ENCAP]) + - (PCHAR)nlMsgHdr); + +if ((NlAttrParseNested(nlMsgHdr, encapOffset, + NlAttrLen(keyAttrs[OVS_KEY_ATTR_ENCAP]), + nlFlowKeyPolicy, + ARRAY_SIZE(nlFlowKeyPolicy), + encapAttrs,
Re: [ovs-dev] [PATCH] datapath-windows: Fix race condition when deleting internal ports
Acked-by: Anand Kumar Thanks, Anand Kumar On 12/21/18, 5:56 AM, "ovs-dev-boun...@openvswitch.org on behalf of Alin Gabriel Serdean" wrote: We need to hold the port lock until all the operations with a port are completed. Found by inspection. Signed-off-by: Alin Gabriel Serdean --- datapath-windows/ovsext/Vport.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/datapath-windows/ovsext/Vport.c b/datapath-windows/ovsext/Vport.c index 380870a11..e08cb90ce 100644 --- a/datapath-windows/ovsext/Vport.c +++ b/datapath-windows/ovsext/Vport.c @@ -632,13 +632,13 @@ HvDisconnectNic(POVS_SWITCH_CONTEXT switchContext, OvsRemoveAndDeleteVport(NULL, switchContext, vport, FALSE, TRUE); OvsPostVportEvent(); } -NdisReleaseRWLock(switchContext->dispatchLock, ); if (isInternalPort) { OvsInternalAdapterDown(vport->portNo, vport->netCfgInstanceId); OvsRemoveAndDeleteVport(NULL, switchContext, vport, TRUE, TRUE); OvsPostVportEvent(); } +NdisReleaseRWLock(switchContext->dispatchLock, ); done: VPORT_NIC_EXIT(nicParam); -- 2.16.1.windows.1 ___ dev mailing list d...@openvswitch.org https://na01.safelinks.protection.outlook.com/?url=https%3A%2F%2Fmail.openvswitch.org%2Fmailman%2Flistinfo%2Fovs-devdata=02%7C01%7Ckumaranand%40vmware.com%7Cebdfb5ed4e7a4d43445108d6674c0aa0%7Cb39138ca3cee4b4aa4d6cd83d9dd62f0%7C0%7C0%7C636809973664095278sdata=VKRorIGV6IFhZ1kd6PPeQ22nNwzJok9WThrZxU0dedQ%3Dreserved=0 ___ dev mailing list d...@openvswitch.org https://mail.openvswitch.org/mailman/listinfo/ovs-dev
[ovs-dev] [PATCH] datapath-windows: Use layers info to extract IP header in IpFragment
- Rely on layers l3Offset field to get offset of IP header. - Aslo fix passing 'newNbl' to IP fragment which is not required. - Fixed including a header file twice. Signed-off-by: Anand Kumar --- datapath-windows/ovsext/BufferMgmt.c | 16 -- datapath-windows/ovsext/Conntrack.c | 11 +++ datapath-windows/ovsext/Conntrack.h | 1 - datapath-windows/ovsext/IpFragment.c | 62 datapath-windows/ovsext/IpFragment.h | 4 +-- 5 files changed, 41 insertions(+), 53 deletions(-) diff --git a/datapath-windows/ovsext/BufferMgmt.c b/datapath-windows/ovsext/BufferMgmt.c index 448cd76..c163836 100644 --- a/datapath-windows/ovsext/BufferMgmt.c +++ b/datapath-windows/ovsext/BufferMgmt.c @@ -1101,9 +1101,9 @@ nblcopy_error: NDIS_STATUS GetIpHeaderInfo(PNET_BUFFER_LIST curNbl, +const POVS_PACKET_HDR_INFO hdrInfo, UINT32 *hdrSize) { -CHAR *ethBuf[sizeof(EthHdr)]; EthHdr *eth; IPHdr *ipHdr; PNET_BUFFER curNb; @@ -,16 +,14 @@ GetIpHeaderInfo(PNET_BUFFER_LIST curNbl, curNb = NET_BUFFER_LIST_FIRST_NB(curNbl); ASSERT(NET_BUFFER_NEXT_NB(curNb) == NULL); -eth = (EthHdr *)NdisGetDataBuffer(curNb, ETH_HEADER_LENGTH, - (PVOID), 1, 0); +eth = (EthHdr *)NdisGetDataBuffer(curNb, + hdrInfo->l4Offset, + NULL, 1, 0); if (eth == NULL) { return NDIS_STATUS_INVALID_PACKET; } -ipHdr = (IPHdr *)((PCHAR)eth + ETH_HEADER_LENGTH); -if (ipHdr == NULL) { -return NDIS_STATUS_INVALID_PACKET; -} -*hdrSize = (UINT32)(ETH_HEADER_LENGTH + (ipHdr->ihl * 4)); +ipHdr = (IPHdr *)((PCHAR)eth + hdrInfo->l3Offset); +*hdrSize = (UINT32)(hdrInfo->l3Offset + (ipHdr->ihl * 4)); return NDIS_STATUS_SUCCESS; } @@ -1380,7 +1378,7 @@ OvsFragmentNBL(PVOID ovsContext, /* Figure out the header size */ if (isIpFragment) { -status = GetIpHeaderInfo(nbl, ); +status = GetIpHeaderInfo(nbl, hdrInfo, ); } else { status = GetSegmentHeaderInfo(nbl, hdrInfo, , ); } diff --git a/datapath-windows/ovsext/Conntrack.c b/datapath-windows/ovsext/Conntrack.c index 5be8e4d..bc00b60 100644 --- a/datapath-windows/ovsext/Conntrack.c +++ b/datapath-windows/ovsext/Conntrack.c @@ -489,10 +489,8 @@ OvsCtEntryDelete(POVS_CT_ENTRY entry, BOOLEAN forceDelete) static __inline NDIS_STATUS OvsDetectCtPacket(OvsForwardingContext *fwdCtx, - OvsFlowKey *key, - PNET_BUFFER_LIST *newNbl) + OvsFlowKey *key) { -/* Currently we support only Unfragmented TCP packets */ switch (ntohs(key->l2.dlType)) { case ETH_TYPE_IPV4: if (key->ipKey.nwFrag != OVS_FRAG_TYPE_NONE) { @@ -500,8 +498,8 @@ OvsDetectCtPacket(OvsForwardingContext *fwdCtx, >curNbl, fwdCtx->completionList, fwdCtx->fwdDetail->SourcePortId, - key->tunKey.tunnelId, - newNbl); + >layers, + key->tunKey.tunnelId); } if (key->ipKey.nwProto == IPPROTO_TCP || key->ipKey.nwProto == IPPROTO_UDP @@ -1010,11 +1008,10 @@ OvsExecuteConntrackAction(OvsForwardingContext *fwdCtx, PCHAR helper = NULL; NAT_ACTION_INFO natActionInfo; OVS_PACKET_HDR_INFO *layers = >layers; -PNET_BUFFER_LIST newNbl = NULL; NDIS_STATUS status; memset(, 0, sizeof natActionInfo); -status = OvsDetectCtPacket(fwdCtx, key, ); +status = OvsDetectCtPacket(fwdCtx, key); if (status != NDIS_STATUS_SUCCESS) { return status; } diff --git a/datapath-windows/ovsext/Conntrack.h b/datapath-windows/ovsext/Conntrack.h index c3d317f..bc6580d 100644 --- a/datapath-windows/ovsext/Conntrack.h +++ b/datapath-windows/ovsext/Conntrack.h @@ -21,7 +21,6 @@ #include "Actions.h" #include "Debug.h" #include "Flow.h" -#include "Actions.h" #include #ifdef OVS_DBG_MOD diff --git a/datapath-windows/ovsext/IpFragment.c b/datapath-windows/ovsext/IpFragment.c index bb2cfe0..afb8e50 100644 --- a/datapath-windows/ovsext/IpFragment.c +++ b/datapath-windows/ovsext/IpFragment.c @@ -140,7 +140,7 @@ OvsIpv4Reassemble(POVS_SWITCH_CONTEXT switchContext, OvsCompletionList *completionList, NDIS_SWITCH_PORT_ID sourcePort, POVS_IPFRAG_ENTRY entry, - PNET_BUFFER_LIST *newNbl) + POVS_PACKET_HDR_INFO layers) { NDIS_STATUS status = NDIS_STATUS_SUCCESS; NDIS_STRING filterReason; @@ -148,29 +148,27 @@ OvsIpv4Reassemble(POVS_SWITCH_CONTEXT switc
Re: [ovs-dev] [PATCH v3 2/2] windows: Add set_detach function to daemon-windows.c
Acked-by: Anand Kumar Thanks, Anand Kumar On 10/5/18, 7:55 AM, "ovs-dev-boun...@openvswitch.org on behalf of Alin Gabriel Serdean" wrote: The daemon-windows file is missing a `set_detach` routine, so add it. This will be useful in the long run. Signed-off-by: Alin Gabriel Serdean Acked-by: Ben Pfaff --- v3: no change. v2: Fix typo in title, add Ack --- lib/daemon-windows.c | 10 +- lib/daemon.h | 3 +++ 2 files changed, 12 insertions(+), 1 deletion(-) diff --git a/lib/daemon-windows.c b/lib/daemon-windows.c index 70c5f5d56..7e5f264f5 100644 --- a/lib/daemon-windows.c +++ b/lib/daemon-windows.c @@ -82,6 +82,14 @@ daemon_usage(void) "unexpected failure. \n"); } +/* Sets up a following call to service_start() to detach from the foreground + * session, running this process in the background. */ +void +set_detach(void) +{ +detach = true; +} + /* Registers the call-back and configures the actions in case of a failure * with the Windows services manager. */ void @@ -357,7 +365,7 @@ detach_process(int argc, char *argv[]) /* We are only interested in the '--detach' and '--pipe-handle'. */ for (i = 0; i < argc; i ++) { -if (!strcmp(argv[i], "--detach")) { +if (!detach && !strcmp(argv[i], "--detach")) { detach = true; } else if (!strncmp(argv[i], "--pipe-handle", 13)) { /* If running as a child, return. */ diff --git a/lib/daemon.h b/lib/daemon.h index f33e9df8d..094157496 100644 --- a/lib/daemon.h +++ b/lib/daemon.h @@ -121,6 +121,7 @@ pid_t read_pidfile(const char *name); #define DAEMON_OPTION_HANDLERS \ case OPT_DETACH:\ +set_detach(); \ break; \ \ case OPT_NO_SELF_CONFINEMENT: \ @@ -139,6 +140,7 @@ pid_t read_pidfile(const char *name); break; \ \ case OPT_SERVICE: \ +set_detach(); \ break; \ \ case OPT_SERVICE_MONITOR: \ @@ -159,6 +161,7 @@ pid_t read_pidfile(const char *name); void control_handler(DWORD request); void set_pipe_handle(const char *pipe_handle); +void set_detach(void); #endif /* _WIN32 */ bool get_detach(void); -- 2.16.1.windows.1 ___ dev mailing list d...@openvswitch.org https://na01.safelinks.protection.outlook.com/?url=https%3A%2F%2Fmail.openvswitch.org%2Fmailman%2Flistinfo%2Fovs-devdata=02%7C01%7Ckumaranand%40vmware.com%7C8a0d9c61c3f34216ab8408d62ad29470%7Cb39138ca3cee4b4aa4d6cd83d9dd62f0%7C1%7C0%7C636743481259851516sdata=I3H2lLIx4%2FpkmibFxXcB3Z5BkCCY9vNW3I9%2BZpMHt1s%3Dreserved=0 ___ dev mailing list d...@openvswitch.org https://mail.openvswitch.org/mailman/listinfo/ovs-dev
[ovs-dev] [RFC PATCH v2] datapath-windows: Remove IP neighbor entries when internal adapter is down.
Remove the IP neighboring entries when adapter is down, so that when 'OVS_IPHELPER_INSTANCE' is deleted, no stale entries are present Also fix accessing iphelper instance without acquiring the lock. Signed-off-by: Anand Kumar --- v1->v2: Rebase and address comments --- datapath-windows/ovsext/IpHelper.c | 36 ++-- 1 file changed, 18 insertions(+), 18 deletions(-) diff --git a/datapath-windows/ovsext/IpHelper.c b/datapath-windows/ovsext/IpHelper.c index 876da92..d7fa2ca 100644 --- a/datapath-windows/ovsext/IpHelper.c +++ b/datapath-windows/ovsext/IpHelper.c @@ -72,7 +72,7 @@ static OVS_IP_HELPER_THREAD_CONTEXT ovsIpHelperThreadContext; static POVS_IPFORWARD_ENTRY OvsLookupIPForwardEntry(PIP_ADDRESS_PREFIX prefix); static VOID OvsRemoveIPForwardEntry(POVS_IPFORWARD_ENTRY ipf); static VOID OvsRemoveAllFwdEntriesWithSrc(UINT32 ipAddr); -static VOID OvsRemoveAllFwdEntriesWithPortNo(UINT32 portNo); +static VOID OvsRemoveIPNeighEntriesWithInstance(POVS_IPHELPER_INSTANCE instance); static VOID OvsCleanupIpHelperRequestList(VOID); static VOID OvsCleanupFwdTable(VOID); static VOID OvsAddToSortedNeighList(POVS_IPNEIGH_ENTRY ipn); @@ -1243,18 +1243,16 @@ OvsRemoveAllFwdEntriesWithSrc(UINT32 ipAddr) static VOID -OvsRemoveAllFwdEntriesWithPortNo(UINT32 portNo) +OvsRemoveIPNeighEntriesWithInstance(POVS_IPHELPER_INSTANCE instance) { -UINT32 i; -PLIST_ENTRY link, next; - -for (i = 0; i < OVS_FWD_HASH_TABLE_SIZE; i++) { -LIST_FORALL_SAFE([i], link, next) { -POVS_FWD_ENTRY fwdEntry; - -fwdEntry = CONTAINING_RECORD(link, OVS_FWD_ENTRY, link); -if (fwdEntry->info.srcPortNo == portNo) { -OvsRemoveFwdEntry(fwdEntry); +if (ovsNumFwdEntries) { +POVS_IPNEIGH_ENTRY ipn; +PLIST_ENTRY link, next; +LIST_FORALL_SAFE(, link, next) { +ipn = CONTAINING_RECORD(link, OVS_IPNEIGH_ENTRY, slink); +POVS_IPHELPER_INSTANCE ipnInstance = ipn->instance; +if (ipnInstance == instance) { +OvsRemoveIPNeighEntry(ipn); } } } @@ -1857,7 +1855,7 @@ OvsStartIpHelper(PVOID data) IsEqualGUID(>netCfgId, )) { NdisAcquireRWLockWrite(ovsTableLock, , 0); -OvsRemoveAllFwdEntriesWithPortNo(instance->portNo); +OvsRemoveIPNeighEntriesWithInstance(instance); NdisReleaseRWLock(ovsTableLock, ); RemoveEntryList(>link); @@ -1908,14 +1906,16 @@ OvsStartIpHelper(PVOID data) NTSTATUS status; POVS_IPHELPER_INSTANCE instance = ipn->instance; NdisReleaseSpinLock(); -ExAcquireResourceExclusiveLite(, TRUE); -status = OvsGetOrResolveIPNeigh(>internalRow, -ipAddr, ); -OvsUpdateIPNeighEntry(ipAddr, , status); +if (instance) { +ExAcquireResourceExclusiveLite(>lock, TRUE); -ExReleaseResourceLite(); +status = OvsGetOrResolveIPNeigh(>internalRow, +ipAddr, ); +OvsUpdateIPNeighEntry(ipAddr, , status); +ExReleaseResourceLite(>lock); +} NdisAcquireSpinLock(); } if (!IsListEmpty()) { -- 2.9.3.windows.1 ___ dev mailing list d...@openvswitch.org https://mail.openvswitch.org/mailman/listinfo/ovs-dev
Re: [ovs-dev] [PATCH RFC] datapath-windows: Remove neighbor entries when Iphelper instance is deleted
Hi Shashank, Thanks for the review, please find my response inline. Regards, Anand Kumar On 10/4/18, 6:31 PM, "Shashank Ram" wrote: On 10/3/18, 4:30 AM, "ovs-dev-boun...@openvswitch.org on behalf of Anand Kumar" wrote: 'OVS_IPHELPER_INSTANCE' is linked to ovsSortedIPNeighList. So when an Iphelper instance is deleted, also delete the ip neighboring entries associated with that instance. Also fix accessing Iphelper instance without acquiring thelock. Signed-off-by: Anand Kumar --- datapath-windows/ovsext/IpHelper.c | 18 ++ 1 file changed, 14 insertions(+), 4 deletions(-) diff --git a/datapath-windows/ovsext/IpHelper.c b/datapath-windows/ovsext/IpHelper.c index 6bbd096..581be61 100644 --- a/datapath-windows/ovsext/IpHelper.c +++ b/datapath-windows/ovsext/IpHelper.c @@ -1446,6 +1446,17 @@ static VOID OvsIpHelperDeleteInstance(POVS_IPHELPER_INSTANCE instance) { if (instance) { +if (ovsNumFwdEntries) { Is this check really needed? If there are no entries, then LIST_FORALL_SAFE will not enter the loop? Yes, this is required. Ip Neighboring entry (ipn) and Ip forwarding entry (ipf) have 1:1 mapping, i.e. each ipf will have ipn associated with it. +POVS_IPNEIGH_ENTRY ipn; +PLIST_ENTRY link, next; +LIST_FORALL_SAFE(, link, next) { +ipn = CONTAINING_RECORD(link, OVS_IPNEIGH_ENTRY, slink); +POVS_IPHELPER_INSTANCE ipnInstance = (POVS_IPHELPER_INSTANCE)ipn->context; +if (ipnInstance == instance) { +OvsRemoveIPNeighEntry(ipn); +} +} +} ExDeleteResourceLite(>lock); OvsFreeMemoryWithTag(instance, OVS_IPHELPER_POOL_TAG); } @@ -1942,13 +1953,13 @@ OvsStartIpHelper(PVOID data) NTSTATUS status; POVS_IPHELPER_INSTANCE instance = (POVS_IPHELPER_INSTANCE)ipn->context; NdisReleaseSpinLock(); -ExAcquireResourceExclusiveLite(, TRUE); +ExAcquireResourceExclusiveLite(>lock, TRUE); status = OvsGetOrResolveIPNeigh(>internalRow, ipAddr, ); OvsUpdateIPNeighEntry(ipAddr, , status); -ExReleaseResourceLite(); +ExReleaseResourceLite(>lock); NdisAcquireSpinLock(); } @@ -2098,11 +2109,10 @@ OvsCleanupIpHelper(VOID) OvsFreeMemoryWithTag(ovsFwdHashTable, OVS_IPHELPER_POOL_TAG); OvsFreeMemoryWithTag(ovsRouteHashTable, OVS_IPHELPER_POOL_TAG); OvsFreeMemoryWithTag(ovsNeighHashTable, OVS_IPHELPER_POOL_TAG); - +OvsIpHelperDeleteAllInstances(); Why is this being changed? This is required because any write operation to 'ovsSortedIPNeighList ' is protected by 'ovsIpHelperLock'. With this patch, ipn entry is removed from sorted list in ' OvsIpHelperDeleteInstance ' . NdisFreeRWLock(ovsTableLock); NdisFreeSpinLock(); -OvsIpHelperDeleteAllInstances(); ExDeleteResourceLite(); } -- 2.9.3.windows.1 ___ dev mailing list d...@openvswitch.org https://na01.safelinks.protection.outlook.com/?url=https%3A%2F%2Fmail.openvswitch.org%2Fmailman%2Flistinfo%2Fovs-devdata=02%7C01%7Crams%40vmware.com%7Cc2c81b5b031b48819dca08d628bb034a%7Cb39138ca3cee4b4aa4d6cd83d9dd62f0%7C1%7C0%7C636741181029502269sdata=WfD24VN49hX0vKBrxBaxf7FVIF5JhkTpk1YI%2BzdtwX4%3Dreserved=0 ___ dev mailing list d...@openvswitch.org https://mail.openvswitch.org/mailman/listinfo/ovs-dev
[ovs-dev] [PATCH RFC] datapath-windows: Remove neighbor entries when Iphelper instance is deleted
'OVS_IPHELPER_INSTANCE' is linked to ovsSortedIPNeighList. So when an Iphelper instance is deleted, also delete the ip neighboring entries associated with that instance. Also fix accessing Iphelper instance without acquiring thelock. Signed-off-by: Anand Kumar --- datapath-windows/ovsext/IpHelper.c | 18 ++ 1 file changed, 14 insertions(+), 4 deletions(-) diff --git a/datapath-windows/ovsext/IpHelper.c b/datapath-windows/ovsext/IpHelper.c index 6bbd096..581be61 100644 --- a/datapath-windows/ovsext/IpHelper.c +++ b/datapath-windows/ovsext/IpHelper.c @@ -1446,6 +1446,17 @@ static VOID OvsIpHelperDeleteInstance(POVS_IPHELPER_INSTANCE instance) { if (instance) { +if (ovsNumFwdEntries) { +POVS_IPNEIGH_ENTRY ipn; +PLIST_ENTRY link, next; +LIST_FORALL_SAFE(, link, next) { +ipn = CONTAINING_RECORD(link, OVS_IPNEIGH_ENTRY, slink); +POVS_IPHELPER_INSTANCE ipnInstance = (POVS_IPHELPER_INSTANCE)ipn->context; +if (ipnInstance == instance) { +OvsRemoveIPNeighEntry(ipn); +} +} +} ExDeleteResourceLite(>lock); OvsFreeMemoryWithTag(instance, OVS_IPHELPER_POOL_TAG); } @@ -1942,13 +1953,13 @@ OvsStartIpHelper(PVOID data) NTSTATUS status; POVS_IPHELPER_INSTANCE instance = (POVS_IPHELPER_INSTANCE)ipn->context; NdisReleaseSpinLock(); -ExAcquireResourceExclusiveLite(, TRUE); +ExAcquireResourceExclusiveLite(>lock, TRUE); status = OvsGetOrResolveIPNeigh(>internalRow, ipAddr, ); OvsUpdateIPNeighEntry(ipAddr, , status); -ExReleaseResourceLite(); +ExReleaseResourceLite(>lock); NdisAcquireSpinLock(); } @@ -2098,11 +2109,10 @@ OvsCleanupIpHelper(VOID) OvsFreeMemoryWithTag(ovsFwdHashTable, OVS_IPHELPER_POOL_TAG); OvsFreeMemoryWithTag(ovsRouteHashTable, OVS_IPHELPER_POOL_TAG); OvsFreeMemoryWithTag(ovsNeighHashTable, OVS_IPHELPER_POOL_TAG); - +OvsIpHelperDeleteAllInstances(); NdisFreeRWLock(ovsTableLock); NdisFreeSpinLock(); -OvsIpHelperDeleteAllInstances(); ExDeleteResourceLite(); } -- 2.9.3.windows.1 ___ dev mailing list d...@openvswitch.org https://mail.openvswitch.org/mailman/listinfo/ovs-dev
Re: [ovs-dev] [PATCH] datapath-windows: Fix payload length calculation in Conntrack.h
Hi Alin, Thanks for fixing this, Acked-by: Anand Kumar Regards, Anand Kumar On 9/19/18, 3:37 PM, "ovs-dev-boun...@openvswitch.org on behalf of Alin Gabriel Serdean" wrote: The payload calculation in OvsGetTcpHeader is wrong: `ntohs(ipHdr->tot_len) - expr` instead of `ntohs((ipHdr->tot_len) - expr)`. We already have a macro for that calculation defined in NetProto.h so use it. Signed-off-by: Alin Gabriel Serdean --- datapath-windows/ovsext/Conntrack.h | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/datapath-windows/ovsext/Conntrack.h b/datapath-windows/ovsext/Conntrack.h index d4152b33a..044fb436c 100644 --- a/datapath-windows/ovsext/Conntrack.h +++ b/datapath-windows/ovsext/Conntrack.h @@ -175,8 +175,7 @@ OvsGetTcpHeader(PNET_BUFFER_LIST nbl, tcp = (TCPHdr *)((PCHAR)ipHdr + ipHdr->ihl * 4); if (tcp->doff * 4 >= sizeof *tcp) { NdisMoveMemory(dest, tcp, sizeof(TCPHdr)); -*tcpPayloadLen = ntohs((ipHdr->tot_len) - (ipHdr->ihl * 4) - - (TCP_HDR_LEN(tcp))); +*tcpPayloadLen = TCP_DATA_LENGTH(ipHdr, tcp); return storage; } -- 2.16.1.windows.1 ___ dev mailing list d...@openvswitch.org https://na01.safelinks.protection.outlook.com/?url=https%3A%2F%2Fmail.openvswitch.org%2Fmailman%2Flistinfo%2Fovs-devdata=02%7C01%7Ckumaranand%40vmware.com%7C30dc4304c4934da3166a08d61e807a8f%7Cb39138ca3cee4b4aa4d6cd83d9dd62f0%7C1%7C0%7C636729934502072772sdata=FWmdwckcC%2B5ymgR0Ryt8iBU0YHAc6%2BRFPoFz2w66vlc%3Dreserved=0 ___ dev mailing list d...@openvswitch.org https://mail.openvswitch.org/mailman/listinfo/ovs-dev
Re: [ovs-dev] [PATCH v3] datapath-windows: Add support to configure ct zone limits
Hi Ben, Thanks for the review. I think this bug got introduced when addressing review comments. I have fixed it and sent out a v4. Regards, Anand Kumar On 9/18/18, 1:28 AM, "Ben Pfaff" wrote: I didn't really review this but a glance at it showed one possible issue. Missing () around definition here: > +#define CT_MAX_ZONE UINT16_MAX + 1 Bang? > +zoneInfo = OvsAllocateMemoryWithTag(sizeof(OVS_CT_ZONE_INFO) * > +CT_MAX_ZONE, OVS_CT_POOL_TAG); Thanks, Ben. ___ dev mailing list d...@openvswitch.org https://mail.openvswitch.org/mailman/listinfo/ovs-dev
[ovs-dev] [PATCH v4] datapath-windows: Add support to configure ct zone limits
This patch implements limiting conntrack entries per zone using dpctl commands. Example: ovs-appctl dpctl/ct-set-limits default=5 zone=1,limit=2 zone=1,limit=3 ovs-appctl dpct/ct-del-limits zone=4 ovs-appctl dpct/ct-get-limits zone=1,2,3 - Also update the netlink-socket.c to support netlink family 'OVS_WIN_NL_CTLIMIT_FAMILY_ID' for conntrack zone limit. Signed-off-by: Anand Kumar --- v3->v4: - Fix macro defnition v2->v3: - Change loop index variable from UINT16 to UINT32 v1->v2: - Use spinlock to guard against multiple access. - Use Interlock api to update zone counters. - Address review comments. --- datapath-windows/include/OvsDpInterfaceExt.h | 1 + datapath-windows/ovsext/Conntrack.c | 167 ++- datapath-windows/ovsext/Conntrack.h | 12 ++ datapath-windows/ovsext/Datapath.c | 34 +- lib/netlink-socket.c | 5 + 5 files changed, 216 insertions(+), 3 deletions(-) diff --git a/datapath-windows/include/OvsDpInterfaceExt.h b/datapath-windows/include/OvsDpInterfaceExt.h index db91c3e..5fd8000 100644 --- a/datapath-windows/include/OvsDpInterfaceExt.h +++ b/datapath-windows/include/OvsDpInterfaceExt.h @@ -72,6 +72,7 @@ */ #define OVS_WIN_NL_CT_FAMILY_ID (NLMSG_MIN_TYPE + 7) +#define OVS_WIN_NL_CTLIMIT_FAMILY_ID (NLMSG_MIN_TYPE + 8) #define OVS_WIN_NL_INVALID_MCGRP_ID 0 #define OVS_WIN_NL_MCGRP_START_ID100 diff --git a/datapath-windows/ovsext/Conntrack.c b/datapath-windows/ovsext/Conntrack.c index dd16602..5be8e4d 100644 --- a/datapath-windows/ovsext/Conntrack.c +++ b/datapath-windows/ovsext/Conntrack.c @@ -27,13 +27,17 @@ #define WINDOWS_TICK 1000 #define SEC_TO_UNIX_EPOCH 11644473600LL #define SEC_TO_NANOSEC 10LL +#define CT_MAX_ZONE (UINT16_MAX + 1) KSTART_ROUTINE OvsConntrackEntryCleaner; static PLIST_ENTRY ovsConntrackTable; static OVS_CT_THREAD_CTX ctThreadCtx; static PNDIS_RW_LOCK_EX *ovsCtBucketLock = NULL; +static NDIS_SPIN_LOCK ovsCtZoneLock; +static POVS_CT_ZONE_INFO zoneInfo = NULL; extern POVS_SWITCH_CONTEXT gOvsSwitchContext; static ULONG ctTotalEntries; +static ULONG defaultCtLimit; static __inline OvsCtFlush(UINT16 zone, struct ovs_key_ct_tuple_ipv4 *tuple); static __inline NDIS_STATUS @@ -94,6 +98,20 @@ OvsInitConntrack(POVS_SWITCH_CONTEXT context) ZwClose(threadHandle); threadHandle = NULL; +zoneInfo = OvsAllocateMemoryWithTag(sizeof(OVS_CT_ZONE_INFO) * +CT_MAX_ZONE, OVS_CT_POOL_TAG); +if (zoneInfo == NULL) { +status = STATUS_INSUFFICIENT_RESOURCES; +goto freeBucketLock; +} + +NdisAllocateSpinLock(); +defaultCtLimit = CT_MAX_ENTRIES; +for (UINT32 i = 0; i < CT_MAX_ZONE; i++) { +zoneInfo[i].entries = 0; +zoneInfo[i].limit = defaultCtLimit; +} + status = OvsNatInit(); if (status != STATUS_SUCCESS) { @@ -149,6 +167,25 @@ OvsCleanupConntrack(VOID) OvsFreeMemoryWithTag(ovsCtBucketLock, OVS_CT_POOL_TAG); ovsCtBucketLock = NULL; OvsNatCleanup(); +NdisFreeSpinLock(); +if (zoneInfo) { +OvsFreeMemoryWithTag(zoneInfo, OVS_CT_POOL_TAG); +} +} + +VOID +OvsCtSetZoneLimit(int zone, ULONG value) { +NdisAcquireSpinLock(); +if (zone == -1) { +/* Set default limit for all zones. */ +defaultCtLimit = value; +for (UINT32 i = 0; i < CT_MAX_ZONE; i++) { +zoneInfo[i].limit = value; +} +} else { +zoneInfo[(UINT16)zone].limit = value; +} +NdisReleaseSpinLock(); } /* @@ -263,6 +300,7 @@ OvsCtAddEntry(POVS_CT_ENTRY entry, >link); NdisInterlockedIncrement((PLONG)); +NdisInterlockedIncrement((PLONG)[ctx->key.zone].entries); NdisReleaseRWLock(ovsCtBucketLock[bucketIdx], ); return TRUE; } @@ -437,6 +475,7 @@ OvsCtEntryDelete(POVS_CT_ENTRY entry, BOOLEAN forceDelete) if (entry->natInfo.natAction) { OvsNatDeleteKey(>key); } +NdisInterlockedDecrement((PLONG)[entry->key.zone].entries); OvsPostCtEventEntry(entry, OVS_EVENT_CT_DELETE); RemoveEntryList(>link); OVS_RELEASE_SPIN_LOCK(&(entry->lock), irql); @@ -877,12 +916,16 @@ OvsCtExecute_(OvsForwardingContext *fwdCtx, ); } else { -if (commit && ctTotalEntries >= CT_MAX_ENTRIES) { +if (commit && (ctTotalEntries >= CT_MAX_ENTRIES || +zoneInfo[ctx.key.zone].entries >= zoneInfo[ctx.key.zone].limit)) { /* Don't proceed with processing if the max limit has been hit. * This blocks only new entries from being created and doesn't * affect existing connections. */ -OVS_LOG_ERROR("Conntrack Limit hit: %lu", ctTotalEntries
Re: [ovs-dev] [PATCH v2] datapath-windows: Add support to configure ct zone limits
Hi Shashank, Thanks for catching this. I have addressed this and send out a v3 of the patch. Regards, Anand Kumar On 8/31/18, 10:12 AM, "Shashank Ram" wrote: Hi Anand, one thing looks off to me here. In the signature of OvsCreateNlMsgFromCtLimit(), numAttrs is UINT32, but the loop’s index variable is an UINT16: for (UINT16 i = 0; i < numAttrs; i++) Please address this discrepancy. -- Thanks, Shashank On 8/28/18, 10:37 AM, "ovs-dev-boun...@openvswitch.org on behalf of Anand Kumar" wrote: This patch implements limiting conntrack entries per zone using dpctl commands. Example: ovs-appctl dpctl/ct-set-limits default=5 zone=1,limit=2 zone=1,limit=3 ovs-appctl dpct/ct-del-limits zone=4 ovs-appctl dpct/ct-get-limits zone=1,2,3 - Also update the netlink-socket.c to support netlink family 'OVS_WIN_NL_CTLIMIT_FAMILY_ID' for conntrack zone limit. Signed-off-by: Anand Kumar v1->v2: - Use spinlock to guard against multiple access. - Use Interlock api to update zone counters. - Address review comments. --- datapath-windows/include/OvsDpInterfaceExt.h | 1 + datapath-windows/ovsext/Conntrack.c | 167 ++- datapath-windows/ovsext/Conntrack.h | 12 ++ datapath-windows/ovsext/Datapath.c | 34 +- lib/netlink-socket.c | 5 + 5 files changed, 216 insertions(+), 3 deletions(-) diff --git a/datapath-windows/include/OvsDpInterfaceExt.h b/datapath-windows/include/OvsDpInterfaceExt.h index db91c3e..5fd8000 100644 --- a/datapath-windows/include/OvsDpInterfaceExt.h +++ b/datapath-windows/include/OvsDpInterfaceExt.h @@ -72,6 +72,7 @@ */ #define OVS_WIN_NL_CT_FAMILY_ID (NLMSG_MIN_TYPE + 7) +#define OVS_WIN_NL_CTLIMIT_FAMILY_ID (NLMSG_MIN_TYPE + 8) #define OVS_WIN_NL_INVALID_MCGRP_ID 0 #define OVS_WIN_NL_MCGRP_START_ID100 diff --git a/datapath-windows/ovsext/Conntrack.c b/datapath-windows/ovsext/Conntrack.c index dd16602..d0900bd 100644 --- a/datapath-windows/ovsext/Conntrack.c +++ b/datapath-windows/ovsext/Conntrack.c @@ -27,13 +27,17 @@ #define WINDOWS_TICK 1000 #define SEC_TO_UNIX_EPOCH 11644473600LL #define SEC_TO_NANOSEC 10LL +#define CT_MAX_ZONE UINT16_MAX + 1 KSTART_ROUTINE OvsConntrackEntryCleaner; static PLIST_ENTRY ovsConntrackTable; static OVS_CT_THREAD_CTX ctThreadCtx; static PNDIS_RW_LOCK_EX *ovsCtBucketLock = NULL; +static NDIS_SPIN_LOCK ovsCtZoneLock; +static POVS_CT_ZONE_INFO zoneInfo = NULL; extern POVS_SWITCH_CONTEXT gOvsSwitchContext; static ULONG ctTotalEntries; +static ULONG defaultCtLimit; static __inline OvsCtFlush(UINT16 zone, struct ovs_key_ct_tuple_ipv4 *tuple); static __inline NDIS_STATUS @@ -94,6 +98,20 @@ OvsInitConntrack(POVS_SWITCH_CONTEXT context) ZwClose(threadHandle); threadHandle = NULL; +zoneInfo = OvsAllocateMemoryWithTag(sizeof(OVS_CT_ZONE_INFO) * +CT_MAX_ZONE, OVS_CT_POOL_TAG); +if (zoneInfo == NULL) { +status = STATUS_INSUFFICIENT_RESOURCES; +goto freeBucketLock; +} + +NdisAllocateSpinLock(); +defaultCtLimit = CT_MAX_ENTRIES; +for (UINT32 i = 0; i < CT_MAX_ZONE; i++) { +zoneInfo[i].entries = 0; +zoneInfo[i].limit = defaultCtLimit; +} + status = OvsNatInit(); if (status != STATUS_SUCCESS) { @@ -149,6 +167,25 @@ OvsCleanupConntrack(VOID) OvsFreeMemoryWithTag(ovsCtBucketLock, OVS_CT_POOL_TAG); ovsCtBucketLock = NULL; OvsNatCleanup(); +NdisFreeSpinLock(); +if (zoneInfo) { +OvsFreeMemoryWithTag(zoneInfo, OVS_CT_POOL_TAG); +} +} + +VOID +OvsCtSetZoneLimit(int zone, ULONG value) { +NdisAcquireSpinLock(); +if (zone == -1) { +/* Set default limit for all zones. */ +defaultCtLimit = value; +for (UINT32 i = 0; i < CT_MAX_ZONE; i++) { +zoneInfo[i].limit = value; +} +} else { +zoneInfo[(UINT16)zone].limit = value; +} +NdisReleaseSpinLock(); } /* @@ -263,6 +300,7 @@ O
[ovs-dev] [PATCH v3] datapath-windows: Add support to configure ct zone limits
This patch implements limiting conntrack entries per zone using dpctl commands. Example: ovs-appctl dpctl/ct-set-limits default=5 zone=1,limit=2 zone=1,limit=3 ovs-appctl dpct/ct-del-limits zone=4 ovs-appctl dpct/ct-get-limits zone=1,2,3 - Also update the netlink-socket.c to support netlink family 'OVS_WIN_NL_CTLIMIT_FAMILY_ID' for conntrack zone limit. Signed-off-by: Anand Kumar --- v2->v3: - Change loop index variable from UINT16 to UINT32 v1->v2: - Use spinlock to guard against multiple access. - Use Interlock api to update zone counters. - Address review comments. --- datapath-windows/include/OvsDpInterfaceExt.h | 1 + datapath-windows/ovsext/Conntrack.c | 167 ++- datapath-windows/ovsext/Conntrack.h | 12 ++ datapath-windows/ovsext/Datapath.c | 34 +- lib/netlink-socket.c | 5 + 5 files changed, 216 insertions(+), 3 deletions(-) diff --git a/datapath-windows/include/OvsDpInterfaceExt.h b/datapath-windows/include/OvsDpInterfaceExt.h index db91c3e..5fd8000 100644 --- a/datapath-windows/include/OvsDpInterfaceExt.h +++ b/datapath-windows/include/OvsDpInterfaceExt.h @@ -72,6 +72,7 @@ */ #define OVS_WIN_NL_CT_FAMILY_ID (NLMSG_MIN_TYPE + 7) +#define OVS_WIN_NL_CTLIMIT_FAMILY_ID (NLMSG_MIN_TYPE + 8) #define OVS_WIN_NL_INVALID_MCGRP_ID 0 #define OVS_WIN_NL_MCGRP_START_ID100 diff --git a/datapath-windows/ovsext/Conntrack.c b/datapath-windows/ovsext/Conntrack.c index dd16602..537f1d8 100644 --- a/datapath-windows/ovsext/Conntrack.c +++ b/datapath-windows/ovsext/Conntrack.c @@ -27,13 +27,17 @@ #define WINDOWS_TICK 1000 #define SEC_TO_UNIX_EPOCH 11644473600LL #define SEC_TO_NANOSEC 10LL +#define CT_MAX_ZONE UINT16_MAX + 1 KSTART_ROUTINE OvsConntrackEntryCleaner; static PLIST_ENTRY ovsConntrackTable; static OVS_CT_THREAD_CTX ctThreadCtx; static PNDIS_RW_LOCK_EX *ovsCtBucketLock = NULL; +static NDIS_SPIN_LOCK ovsCtZoneLock; +static POVS_CT_ZONE_INFO zoneInfo = NULL; extern POVS_SWITCH_CONTEXT gOvsSwitchContext; static ULONG ctTotalEntries; +static ULONG defaultCtLimit; static __inline OvsCtFlush(UINT16 zone, struct ovs_key_ct_tuple_ipv4 *tuple); static __inline NDIS_STATUS @@ -94,6 +98,20 @@ OvsInitConntrack(POVS_SWITCH_CONTEXT context) ZwClose(threadHandle); threadHandle = NULL; +zoneInfo = OvsAllocateMemoryWithTag(sizeof(OVS_CT_ZONE_INFO) * +CT_MAX_ZONE, OVS_CT_POOL_TAG); +if (zoneInfo == NULL) { +status = STATUS_INSUFFICIENT_RESOURCES; +goto freeBucketLock; +} + +NdisAllocateSpinLock(); +defaultCtLimit = CT_MAX_ENTRIES; +for (UINT32 i = 0; i < CT_MAX_ZONE; i++) { +zoneInfo[i].entries = 0; +zoneInfo[i].limit = defaultCtLimit; +} + status = OvsNatInit(); if (status != STATUS_SUCCESS) { @@ -149,6 +167,25 @@ OvsCleanupConntrack(VOID) OvsFreeMemoryWithTag(ovsCtBucketLock, OVS_CT_POOL_TAG); ovsCtBucketLock = NULL; OvsNatCleanup(); +NdisFreeSpinLock(); +if (zoneInfo) { +OvsFreeMemoryWithTag(zoneInfo, OVS_CT_POOL_TAG); +} +} + +VOID +OvsCtSetZoneLimit(int zone, ULONG value) { +NdisAcquireSpinLock(); +if (zone == -1) { +/* Set default limit for all zones. */ +defaultCtLimit = value; +for (UINT32 i = 0; i < CT_MAX_ZONE; i++) { +zoneInfo[i].limit = value; +} +} else { +zoneInfo[(UINT16)zone].limit = value; +} +NdisReleaseSpinLock(); } /* @@ -263,6 +300,7 @@ OvsCtAddEntry(POVS_CT_ENTRY entry, >link); NdisInterlockedIncrement((PLONG)); +NdisInterlockedIncrement((PLONG)[ctx->key.zone].entries); NdisReleaseRWLock(ovsCtBucketLock[bucketIdx], ); return TRUE; } @@ -437,6 +475,7 @@ OvsCtEntryDelete(POVS_CT_ENTRY entry, BOOLEAN forceDelete) if (entry->natInfo.natAction) { OvsNatDeleteKey(>key); } +NdisInterlockedDecrement((PLONG)[entry->key.zone].entries); OvsPostCtEventEntry(entry, OVS_EVENT_CT_DELETE); RemoveEntryList(>link); OVS_RELEASE_SPIN_LOCK(&(entry->lock), irql); @@ -877,12 +916,16 @@ OvsCtExecute_(OvsForwardingContext *fwdCtx, ); } else { -if (commit && ctTotalEntries >= CT_MAX_ENTRIES) { +if (commit && (ctTotalEntries >= CT_MAX_ENTRIES || +zoneInfo[ctx.key.zone].entries >= zoneInfo[ctx.key.zone].limit)) { /* Don't proceed with processing if the max limit has been hit. * This blocks only new entries from being created and doesn't * affect existing connections. */ -OVS_LOG_ERROR("Conntrack Limit hit: %lu", ctTotalEntries); +OVS_LOG_ERROR("Conntrack Limit hit: zone(%u), zone
[ovs-dev] [PATCH v2] datapath-windows: Add support to configure ct zone limits
This patch implements limiting conntrack entries per zone using dpctl commands. Example: ovs-appctl dpctl/ct-set-limits default=5 zone=1,limit=2 zone=1,limit=3 ovs-appctl dpct/ct-del-limits zone=4 ovs-appctl dpct/ct-get-limits zone=1,2,3 - Also update the netlink-socket.c to support netlink family 'OVS_WIN_NL_CTLIMIT_FAMILY_ID' for conntrack zone limit. Signed-off-by: Anand Kumar v1->v2: - Use spinlock to guard against multiple access. - Use Interlock api to update zone counters. - Address review comments. --- datapath-windows/include/OvsDpInterfaceExt.h | 1 + datapath-windows/ovsext/Conntrack.c | 167 ++- datapath-windows/ovsext/Conntrack.h | 12 ++ datapath-windows/ovsext/Datapath.c | 34 +- lib/netlink-socket.c | 5 + 5 files changed, 216 insertions(+), 3 deletions(-) diff --git a/datapath-windows/include/OvsDpInterfaceExt.h b/datapath-windows/include/OvsDpInterfaceExt.h index db91c3e..5fd8000 100644 --- a/datapath-windows/include/OvsDpInterfaceExt.h +++ b/datapath-windows/include/OvsDpInterfaceExt.h @@ -72,6 +72,7 @@ */ #define OVS_WIN_NL_CT_FAMILY_ID (NLMSG_MIN_TYPE + 7) +#define OVS_WIN_NL_CTLIMIT_FAMILY_ID (NLMSG_MIN_TYPE + 8) #define OVS_WIN_NL_INVALID_MCGRP_ID 0 #define OVS_WIN_NL_MCGRP_START_ID100 diff --git a/datapath-windows/ovsext/Conntrack.c b/datapath-windows/ovsext/Conntrack.c index dd16602..d0900bd 100644 --- a/datapath-windows/ovsext/Conntrack.c +++ b/datapath-windows/ovsext/Conntrack.c @@ -27,13 +27,17 @@ #define WINDOWS_TICK 1000 #define SEC_TO_UNIX_EPOCH 11644473600LL #define SEC_TO_NANOSEC 10LL +#define CT_MAX_ZONE UINT16_MAX + 1 KSTART_ROUTINE OvsConntrackEntryCleaner; static PLIST_ENTRY ovsConntrackTable; static OVS_CT_THREAD_CTX ctThreadCtx; static PNDIS_RW_LOCK_EX *ovsCtBucketLock = NULL; +static NDIS_SPIN_LOCK ovsCtZoneLock; +static POVS_CT_ZONE_INFO zoneInfo = NULL; extern POVS_SWITCH_CONTEXT gOvsSwitchContext; static ULONG ctTotalEntries; +static ULONG defaultCtLimit; static __inline OvsCtFlush(UINT16 zone, struct ovs_key_ct_tuple_ipv4 *tuple); static __inline NDIS_STATUS @@ -94,6 +98,20 @@ OvsInitConntrack(POVS_SWITCH_CONTEXT context) ZwClose(threadHandle); threadHandle = NULL; +zoneInfo = OvsAllocateMemoryWithTag(sizeof(OVS_CT_ZONE_INFO) * +CT_MAX_ZONE, OVS_CT_POOL_TAG); +if (zoneInfo == NULL) { +status = STATUS_INSUFFICIENT_RESOURCES; +goto freeBucketLock; +} + +NdisAllocateSpinLock(); +defaultCtLimit = CT_MAX_ENTRIES; +for (UINT32 i = 0; i < CT_MAX_ZONE; i++) { +zoneInfo[i].entries = 0; +zoneInfo[i].limit = defaultCtLimit; +} + status = OvsNatInit(); if (status != STATUS_SUCCESS) { @@ -149,6 +167,25 @@ OvsCleanupConntrack(VOID) OvsFreeMemoryWithTag(ovsCtBucketLock, OVS_CT_POOL_TAG); ovsCtBucketLock = NULL; OvsNatCleanup(); +NdisFreeSpinLock(); +if (zoneInfo) { +OvsFreeMemoryWithTag(zoneInfo, OVS_CT_POOL_TAG); +} +} + +VOID +OvsCtSetZoneLimit(int zone, ULONG value) { +NdisAcquireSpinLock(); +if (zone == -1) { +/* Set default limit for all zones. */ +defaultCtLimit = value; +for (UINT32 i = 0; i < CT_MAX_ZONE; i++) { +zoneInfo[i].limit = value; +} +} else { +zoneInfo[(UINT16)zone].limit = value; +} +NdisReleaseSpinLock(); } /* @@ -263,6 +300,7 @@ OvsCtAddEntry(POVS_CT_ENTRY entry, >link); NdisInterlockedIncrement((PLONG)); +NdisInterlockedIncrement((PLONG)[ctx->key.zone].entries); NdisReleaseRWLock(ovsCtBucketLock[bucketIdx], ); return TRUE; } @@ -437,6 +475,7 @@ OvsCtEntryDelete(POVS_CT_ENTRY entry, BOOLEAN forceDelete) if (entry->natInfo.natAction) { OvsNatDeleteKey(>key); } +NdisInterlockedDecrement((PLONG)[entry->key.zone].entries); OvsPostCtEventEntry(entry, OVS_EVENT_CT_DELETE); RemoveEntryList(>link); OVS_RELEASE_SPIN_LOCK(&(entry->lock), irql); @@ -877,12 +916,16 @@ OvsCtExecute_(OvsForwardingContext *fwdCtx, ); } else { -if (commit && ctTotalEntries >= CT_MAX_ENTRIES) { +if (commit && (ctTotalEntries >= CT_MAX_ENTRIES || +zoneInfo[ctx.key.zone].entries >= zoneInfo[ctx.key.zone].limit)) { /* Don't proceed with processing if the max limit has been hit. * This blocks only new entries from being created and doesn't * affect existing connections. */ -OVS_LOG_ERROR("Conntrack Limit hit: %lu", ctTotalEntries); +OVS_LOG_ERROR("Conntrack Limit hit: zone(%u), zoneLimit(%lu)," + "zoneEntries(%lu),
Re: [ovs-dev] [PATCH] datapath-windows: Add support to configure ct zone limits
Hi Sairam, Thanks for the review, please find my response inline. I send out a v2 addressing review comments. Regards, Anand Kumar On 8/24/18, 2:58 PM, "Sairam Venugopal" wrote: Hi Anand, Thanks for the patch. See comments inline. Thanks, Sairam On 8/21/18, 2:58 PM, "ovs-dev-boun...@openvswitch.org on behalf of Anand Kumar" wrote: This patch implements limiting conntrack entries per zone using dpctl commands. Example: ovs-appctl dpctl/ct-set-limits default=5 zone=1,limit=2 zone=1,limit=3 ovs-appctl dpct/ct-del-limits zone=4 ovs-appctl dpct/ct-get-limits zone=1,2,3 - Also update the netlink-socket.c to support netlink family 'OVS_WIN_NL_CTLIMIT_FAMILY_ID' for conntrack zone limit. Signed-off-by: Anand Kumar --- datapath-windows/include/OvsDpInterfaceExt.h | 1 + datapath-windows/ovsext/Conntrack.c | 163 ++- datapath-windows/ovsext/Conntrack.h | 12 ++ datapath-windows/ovsext/Datapath.c | 34 +- lib/netlink-socket.c | 5 + 5 files changed, 212 insertions(+), 3 deletions(-) diff --git a/datapath-windows/include/OvsDpInterfaceExt.h b/datapath-windows/include/OvsDpInterfaceExt.h index db91c3e..5fd8000 100644 --- a/datapath-windows/include/OvsDpInterfaceExt.h +++ b/datapath-windows/include/OvsDpInterfaceExt.h @@ -72,6 +72,7 @@ */ #define OVS_WIN_NL_CT_FAMILY_ID (NLMSG_MIN_TYPE + 7) +#define OVS_WIN_NL_CTLIMIT_FAMILY_ID (NLMSG_MIN_TYPE + 8) #define OVS_WIN_NL_INVALID_MCGRP_ID 0 #define OVS_WIN_NL_MCGRP_START_ID100 diff --git a/datapath-windows/ovsext/Conntrack.c b/datapath-windows/ovsext/Conntrack.c index dd16602..b806cd7 100644 --- a/datapath-windows/ovsext/Conntrack.c +++ b/datapath-windows/ovsext/Conntrack.c @@ -34,6 +34,8 @@ static OVS_CT_THREAD_CTX ctThreadCtx; static PNDIS_RW_LOCK_EX *ovsCtBucketLock = NULL; extern POVS_SWITCH_CONTEXT gOvsSwitchContext; static ULONG ctTotalEntries; +static POVS_CT_ZONE_INFO zoneInfo = NULL; +static ULONG defaultCtLimit; static __inline OvsCtFlush(UINT16 zone, struct ovs_key_ct_tuple_ipv4 *tuple); static __inline NDIS_STATUS @@ -99,6 +101,19 @@ OvsInitConntrack(POVS_SWITCH_CONTEXT context) if (status != STATUS_SUCCESS) { OvsCleanupConntrack(); } + Sai: Can you move the following prior to the OvsNatInit or handle OvsCleanupConntrack()? Also, shouldn't zoneInfo have a lock for manipulation? [AK]: Sure, I will move it before OvsNatInit(). A lock is needed only when there are multiple calls to manipulate same zone id/defaults in parallel. I will handle this with a spinlock in v2 patch. +zoneInfo = OvsAllocateMemoryWithTag(sizeof(OVS_CT_ZONE_INFO) * +(UINT16_MAX + 1), OVS_CT_POOL_TAG); +if (zoneInfo == NULL) { +status = STATUS_INSUFFICIENT_RESOURCES; +goto freeBucketLock; +} + +defaultCtLimit = CT_MAX_ENTRIES; +for (int i = 0; i <= UINT16_MAX; i++) { +zoneInfo[i].entries = 0; +zoneInfo[i].limit = defaultCtLimit; +} return STATUS_SUCCESS; freeBucketLock: @@ -149,6 +164,22 @@ OvsCleanupConntrack(VOID) OvsFreeMemoryWithTag(ovsCtBucketLock, OVS_CT_POOL_TAG); ovsCtBucketLock = NULL; OvsNatCleanup(); +if (zoneInfo) { +OvsFreeMemoryWithTag(zoneInfo, OVS_CT_POOL_TAG); +} +} + +VOID Sai: can we set zone to UINT16 instead of int? [AK]: zone cannot be UINT16 since it is set to -1 when ct-set-limits is called with a default argument. +OvsCtSetZoneLimit(int zone, ULONG value) { + if (zone == -1) { +/* Set default limit for all zones. */ +defaultCtLimit = value; +for (UINT32 i = 0; i <= UINT16_MAX; i++) { +zoneInfo[i].limit = value; +} +} else { +zoneInfo[(UINT16)zone].limit = value; +} } /* @@ -263,6 +294,7 @@ OvsCtAddEntry(POVS_CT_ENTRY entry, >link); NdisInterlockedIncrement((PLONG)); +zoneInfo[ctx->key.zone].entries++; NdisReleaseRWLock(ovsCtBucketLock[bucketIdx], ); return TRUE; } @@ -437,6 +469,7 @
Re: [ovs-dev] [PATCH] datapath-windows: Add support to configure ct zone limits
Hi Shashank, Thanks for reviewing the patch. Please find response inline. Regards, Anand Kumar On 8/27/18, 9:00 AM, "Shashank Ram" wrote: On 08/21/2018 02:57 PM, Anand Kumar wrote: > This patch implements limiting conntrack entries > per zone using dpctl commands. > > Example: > ovs-appctl dpctl/ct-set-limits default=5 zone=1,limit=2 zone=1,limit=3 > ovs-appctl dpct/ct-del-limits zone=4 > ovs-appctl dpct/ct-get-limits zone=1,2,3 > > - Also update the netlink-socket.c to support netlink family >'OVS_WIN_NL_CTLIMIT_FAMILY_ID' for conntrack zone limit. > > Signed-off-by: Anand Kumar > --- > datapath-windows/include/OvsDpInterfaceExt.h | 1 + > datapath-windows/ovsext/Conntrack.c | 163 ++- > datapath-windows/ovsext/Conntrack.h | 12 ++ > datapath-windows/ovsext/Datapath.c | 34 +- > lib/netlink-socket.c | 5 + > 5 files changed, 212 insertions(+), 3 deletions(-) > > diff --git a/datapath-windows/include/OvsDpInterfaceExt.h b/datapath-windows/include/OvsDpInterfaceExt.h > index db91c3e..5fd8000 100644 > --- a/datapath-windows/include/OvsDpInterfaceExt.h > +++ b/datapath-windows/include/OvsDpInterfaceExt.h > @@ -72,6 +72,7 @@ >*/ > > #define OVS_WIN_NL_CT_FAMILY_ID (NLMSG_MIN_TYPE + 7) > +#define OVS_WIN_NL_CTLIMIT_FAMILY_ID (NLMSG_MIN_TYPE + 8) > > #define OVS_WIN_NL_INVALID_MCGRP_ID 0 > #define OVS_WIN_NL_MCGRP_START_ID100 > diff --git a/datapath-windows/ovsext/Conntrack.c b/datapath-windows/ovsext/Conntrack.c > index dd16602..b806cd7 100644 > --- a/datapath-windows/ovsext/Conntrack.c > +++ b/datapath-windows/ovsext/Conntrack.c > @@ -34,6 +34,8 @@ static OVS_CT_THREAD_CTX ctThreadCtx; > static PNDIS_RW_LOCK_EX *ovsCtBucketLock = NULL; > extern POVS_SWITCH_CONTEXT gOvsSwitchContext; > static ULONG ctTotalEntries; > +static POVS_CT_ZONE_INFO zoneInfo = NULL; > +static ULONG defaultCtLimit; > > static __inline OvsCtFlush(UINT16 zone, struct ovs_key_ct_tuple_ipv4 *tuple); > static __inline NDIS_STATUS > @@ -99,6 +101,19 @@ OvsInitConntrack(POVS_SWITCH_CONTEXT context) > if (status != STATUS_SUCCESS) { > OvsCleanupConntrack(); > } > + > +zoneInfo = OvsAllocateMemoryWithTag(sizeof(OVS_CT_ZONE_INFO) * > +(UINT16_MAX + 1), OVS_CT_POOL_TAG); Please define UINT16_MAX as an appropriate macro in the CT module and use that. It is not only more intuitive in the code, but is also more safe in terms of the abstraction. [AK]: Done > +if (zoneInfo == NULL) { > +status = STATUS_INSUFFICIENT_RESOURCES; > +goto freeBucketLock; > +} > + > +defaultCtLimit = CT_MAX_ENTRIES; > +for (int i = 0; i <= UINT16_MAX; i++) { Please define UINT16_MAX as CT_XXX [AK]: Done > +zoneInfo[i].entries = 0; > +zoneInfo[i].limit = defaultCtLimit; > +} > return STATUS_SUCCESS; > > freeBucketLock: > @@ -149,6 +164,22 @@ OvsCleanupConntrack(VOID) > OvsFreeMemoryWithTag(ovsCtBucketLock, OVS_CT_POOL_TAG); > ovsCtBucketLock = NULL; > OvsNatCleanup(); > +if (zoneInfo) { > +OvsFreeMemoryWithTag(zoneInfo, OVS_CT_POOL_TAG); > +} > +} > + > +VOID > +OvsCtSetZoneLimit(int zone, ULONG value) { > + if (zone == -1) { > +/* Set default limit for all zones. */ > +defaultCtLimit = value; > +for (UINT32 i = 0; i <= UINT16_MAX; i++) { Why is a UINT32 being used as the index variable? This is because of not properly making use of a well defined macro in the CT module that abstracts UNINT16_MAX + 1 as the max limit. [AK]: Need a 32 bit variable as the index, since max value exceeds 16 bits. > +zoneInfo[i].limit = value; > +} > +} else { > +zoneInfo[(UINT16)zone].limit = value; > +} > } > > /* > @@ -263,6 +294,7 @@ OvsCtAddEntry(POVS_CT_ENTRY entry, > >link); > > NdisInterlockedIncrement((PLONG)); > +zoneInfo[ctx->key.zone].entries++; > NdisReleaseRWLock(ovsCtBucketLock[bucketIdx], ); > return TRUE; > } > @@ -437,6 +469,7 @@ OvsCtEntry
[ovs-dev] [PATCH] datapath-windows: Add support to configure ct zone limits
This patch implements limiting conntrack entries per zone using dpctl commands. Example: ovs-appctl dpctl/ct-set-limits default=5 zone=1,limit=2 zone=1,limit=3 ovs-appctl dpct/ct-del-limits zone=4 ovs-appctl dpct/ct-get-limits zone=1,2,3 - Also update the netlink-socket.c to support netlink family 'OVS_WIN_NL_CTLIMIT_FAMILY_ID' for conntrack zone limit. Signed-off-by: Anand Kumar --- datapath-windows/include/OvsDpInterfaceExt.h | 1 + datapath-windows/ovsext/Conntrack.c | 163 ++- datapath-windows/ovsext/Conntrack.h | 12 ++ datapath-windows/ovsext/Datapath.c | 34 +- lib/netlink-socket.c | 5 + 5 files changed, 212 insertions(+), 3 deletions(-) diff --git a/datapath-windows/include/OvsDpInterfaceExt.h b/datapath-windows/include/OvsDpInterfaceExt.h index db91c3e..5fd8000 100644 --- a/datapath-windows/include/OvsDpInterfaceExt.h +++ b/datapath-windows/include/OvsDpInterfaceExt.h @@ -72,6 +72,7 @@ */ #define OVS_WIN_NL_CT_FAMILY_ID (NLMSG_MIN_TYPE + 7) +#define OVS_WIN_NL_CTLIMIT_FAMILY_ID (NLMSG_MIN_TYPE + 8) #define OVS_WIN_NL_INVALID_MCGRP_ID 0 #define OVS_WIN_NL_MCGRP_START_ID100 diff --git a/datapath-windows/ovsext/Conntrack.c b/datapath-windows/ovsext/Conntrack.c index dd16602..b806cd7 100644 --- a/datapath-windows/ovsext/Conntrack.c +++ b/datapath-windows/ovsext/Conntrack.c @@ -34,6 +34,8 @@ static OVS_CT_THREAD_CTX ctThreadCtx; static PNDIS_RW_LOCK_EX *ovsCtBucketLock = NULL; extern POVS_SWITCH_CONTEXT gOvsSwitchContext; static ULONG ctTotalEntries; +static POVS_CT_ZONE_INFO zoneInfo = NULL; +static ULONG defaultCtLimit; static __inline OvsCtFlush(UINT16 zone, struct ovs_key_ct_tuple_ipv4 *tuple); static __inline NDIS_STATUS @@ -99,6 +101,19 @@ OvsInitConntrack(POVS_SWITCH_CONTEXT context) if (status != STATUS_SUCCESS) { OvsCleanupConntrack(); } + +zoneInfo = OvsAllocateMemoryWithTag(sizeof(OVS_CT_ZONE_INFO) * +(UINT16_MAX + 1), OVS_CT_POOL_TAG); +if (zoneInfo == NULL) { +status = STATUS_INSUFFICIENT_RESOURCES; +goto freeBucketLock; +} + +defaultCtLimit = CT_MAX_ENTRIES; +for (int i = 0; i <= UINT16_MAX; i++) { +zoneInfo[i].entries = 0; +zoneInfo[i].limit = defaultCtLimit; +} return STATUS_SUCCESS; freeBucketLock: @@ -149,6 +164,22 @@ OvsCleanupConntrack(VOID) OvsFreeMemoryWithTag(ovsCtBucketLock, OVS_CT_POOL_TAG); ovsCtBucketLock = NULL; OvsNatCleanup(); +if (zoneInfo) { +OvsFreeMemoryWithTag(zoneInfo, OVS_CT_POOL_TAG); +} +} + +VOID +OvsCtSetZoneLimit(int zone, ULONG value) { + if (zone == -1) { +/* Set default limit for all zones. */ +defaultCtLimit = value; +for (UINT32 i = 0; i <= UINT16_MAX; i++) { +zoneInfo[i].limit = value; +} +} else { +zoneInfo[(UINT16)zone].limit = value; +} } /* @@ -263,6 +294,7 @@ OvsCtAddEntry(POVS_CT_ENTRY entry, >link); NdisInterlockedIncrement((PLONG)); +zoneInfo[ctx->key.zone].entries++; NdisReleaseRWLock(ovsCtBucketLock[bucketIdx], ); return TRUE; } @@ -437,6 +469,7 @@ OvsCtEntryDelete(POVS_CT_ENTRY entry, BOOLEAN forceDelete) if (entry->natInfo.natAction) { OvsNatDeleteKey(>key); } +zoneInfo[entry->key.zone].entries--; OvsPostCtEventEntry(entry, OVS_EVENT_CT_DELETE); RemoveEntryList(>link); OVS_RELEASE_SPIN_LOCK(&(entry->lock), irql); @@ -877,12 +910,16 @@ OvsCtExecute_(OvsForwardingContext *fwdCtx, ); } else { -if (commit && ctTotalEntries >= CT_MAX_ENTRIES) { +if (commit && (ctTotalEntries >= CT_MAX_ENTRIES || +zoneInfo[ctx.key.zone].entries >= zoneInfo[ctx.key.zone].limit)) { /* Don't proceed with processing if the max limit has been hit. * This blocks only new entries from being created and doesn't * affect existing connections. */ -OVS_LOG_ERROR("Conntrack Limit hit: %lu", ctTotalEntries); +OVS_LOG_ERROR("Conntrack Limit hit: zone(%u), zoneLimit(%lu)," + "zoneEntries(%lu), ctTotalEntries(%lu),", zone, + zoneInfo[ctx.key.zone].limit, + zoneInfo[ctx.key.zone].entries, ctTotalEntries); return NDIS_STATUS_RESOURCES; } /* If no matching entry was found, create one and add New state */ @@ -1783,4 +1820,126 @@ OvsCtDumpCmdHandler(POVS_USER_PARAMS_CONTEXT usrParamsCtx, return STATUS_SUCCESS; } +static NTSTATUS +OvsCreateNlMsgFromCtLimit(POVS_MESSAGE msgIn, + PVOID outBuffer, + UINT32 outBuf
[ovs-dev] [PATCH v5 2/3] datapath-windows: Implement locking in conntrack NAT.
This patch primarily replaces existing ndis RWlock based implementaion for NAT in conntrack with a spinlock based implementation inside NAT, module along with some conntrack optimization. - The 'ovsNatTable' and 'ovsUnNatTable' tables are shared between cleanup threads and packet processing thread. In order to protect these two tables use a spinlock. Also introduce counters to track number of nat entries. - Introduce a new function OvsGetTcpHeader() to retrieve TCP header and payload length, to optimize for TCP traffic. - Optimize conntrack look up. - Remove 'bucketlockRef' member from conntrack entry structure. Testing: Verified loading/unloading the driver with driver verified enabled. Ran TCP/UDP and ICMP traffic. Signed-off-by: Anand Kumar Acked-by: Alin Gabriel Serdean --- v1->v2: Merge patch 2 and 3 so that NAT locks related changes are in a single patch. v2->v3: No change v3->v4: No change v4->v5: Fix freeing up spinlock during unload. --- datapath-windows/ovsext/Conntrack-ftp.c | 4 +- datapath-windows/ovsext/Conntrack-nat.c | 28 +++- datapath-windows/ovsext/Conntrack-tcp.c | 15 ++--- datapath-windows/ovsext/Conntrack.c | 110 +--- datapath-windows/ovsext/Conntrack.h | 36 +++ 5 files changed, 101 insertions(+), 92 deletions(-) diff --git a/datapath-windows/ovsext/Conntrack-ftp.c b/datapath-windows/ovsext/Conntrack-ftp.c index 6830dfa..ce09a65 100644 --- a/datapath-windows/ovsext/Conntrack-ftp.c +++ b/datapath-windows/ovsext/Conntrack-ftp.c @@ -129,14 +129,14 @@ OvsCtHandleFtp(PNET_BUFFER_LIST curNbl, char temp[256] = { 0 }; char ftpMsg[256] = { 0 }; +UINT32 len; TCPHdr tcpStorage; const TCPHdr *tcp; -tcp = OvsGetTcp(curNbl, layers->l4Offset, ); +tcp = OvsGetTcpHeader(curNbl, layers, , ); if (!tcp) { return NDIS_STATUS_INVALID_PACKET; } -UINT32 len = OvsGetTcpPayloadLength(curNbl); if (len > sizeof(temp)) { /* We only care up to 256 */ len = sizeof(temp); diff --git a/datapath-windows/ovsext/Conntrack-nat.c b/datapath-windows/ovsext/Conntrack-nat.c index da1814f..1607d4c 100644 --- a/datapath-windows/ovsext/Conntrack-nat.c +++ b/datapath-windows/ovsext/Conntrack-nat.c @@ -3,7 +3,8 @@ PLIST_ENTRY ovsNatTable = NULL; PLIST_ENTRY ovsUnNatTable = NULL; - +static NDIS_SPIN_LOCK ovsCtNatLock; +static ULONG ovsNatEntries; /* *--- * OvsHashNatKey @@ -109,6 +110,8 @@ NTSTATUS OvsNatInit() InitializeListHead([i]); } +NdisAllocateSpinLock(); +ovsNatEntries = 0; return STATUS_SUCCESS; } @@ -121,6 +124,11 @@ NTSTATUS OvsNatInit() VOID OvsNatFlush(UINT16 zone) { PLIST_ENTRY link, next; +if (!ovsNatEntries) { +return; +} + +NdisAcquireSpinLock(); for (int i = 0; i < NAT_HASH_TABLE_SIZE; i++) { LIST_FORALL_SAFE([i], link, next) { POVS_NAT_ENTRY entry = @@ -131,6 +139,7 @@ VOID OvsNatFlush(UINT16 zone) } } } +NdisReleaseSpinLock(); } /* @@ -142,12 +151,17 @@ VOID OvsNatFlush(UINT16 zone) VOID OvsNatCleanup() { if (ovsNatTable == NULL) { + NdisFreeSpinLock(); return; } + +NdisAcquireSpinLock(); OvsFreeMemoryWithTag(ovsNatTable, OVS_CT_POOL_TAG); OvsFreeMemoryWithTag(ovsUnNatTable, OVS_CT_POOL_TAG); ovsNatTable = NULL; ovsUnNatTable = NULL; +NdisReleaseSpinLock(); +NdisFreeSpinLock(); } /* @@ -250,10 +264,13 @@ static UINT32 OvsNatHashRange(const OVS_CT_ENTRY *entry, UINT32 basis) VOID OvsNatAddEntry(OVS_NAT_ENTRY* entry) { +NdisAcquireSpinLock(); InsertHeadList(OvsNatGetBucket(>key, FALSE), >link); InsertHeadList(OvsNatGetBucket(>value, TRUE), >reverseLink); +NdisReleaseSpinLock(); +NdisInterlockedIncrement((PLONG)); } /* @@ -399,21 +416,29 @@ OvsNatLookup(const OVS_CT_KEY *ctKey, BOOLEAN reverse) PLIST_ENTRY link; POVS_NAT_ENTRY entry; +if (!ovsNatEntries) { +return NULL; +} + +NdisAcquireSpinLock(); LIST_FORALL(OvsNatGetBucket(ctKey, reverse), link) { if (reverse) { entry = CONTAINING_RECORD(link, OVS_NAT_ENTRY, reverseLink); if (OvsNatKeyAreSame(ctKey, >value)) { +NdisReleaseSpinLock(); return entry; } } else { entry = CONTAINING_RECORD(link, OVS_NAT_ENTRY, link); if (OvsNatKeyAreSame(ctKey, >key)) { +NdisReleaseSpinLock(); return entry; } } } +NdisReleaseSpinLock(); return NULL; } @@ -432,6 +457,7 @@ OvsNatDeleteEntry(POVS_NAT_ENTRY entry) RemoveEntryList(>link); RemoveEntryList(>reverseLink); OvsFreeMemoryWithTag(entry, OVS_CT_PO
[ovs-dev] [PATCH v5 3/3] datapath-windows: Compute ct hash based on 5-tuple and zone
Conntrack 5-tuple consists of src address, dst address, src port, dst port and protocol which will be unique to a ct session. Use this information along with zone to compute hash. Also re-factor conntrack code related to parsing netlink attributes. Testing: Verified loading/unloading the driver with driver verified enabled. Ran TCP/UDP and ICMP traffic. Signed-off-by: Anand Kumar Acked-by: Alin Gabriel Serdean --- v1->v2: Updated commit message to include testing done. v2->v3: No change v3->v4: No change v4->v5: No change --- datapath-windows/ovsext/Conntrack.c | 228 ++-- datapath-windows/ovsext/Conntrack.h | 2 - 2 files changed, 116 insertions(+), 114 deletions(-) diff --git a/datapath-windows/ovsext/Conntrack.c b/datapath-windows/ovsext/Conntrack.c index 8fa1e07..dd16602 100644 --- a/datapath-windows/ovsext/Conntrack.c +++ b/datapath-windows/ovsext/Conntrack.c @@ -151,6 +151,24 @@ OvsCleanupConntrack(VOID) OvsNatCleanup(); } +/* + * + * OvsCtHashKey + * Compute hash using 5-tuple and zone. + * + */ +UINT32 +OvsCtHashKey(const OVS_CT_KEY *key) +{ +UINT32 hsrc, hdst, hash; +hsrc = key->src.addr.ipv4 | ntohl(key->src.port); +hdst = key->dst.addr.ipv4 | ntohl(key->dst.port); +hash = hsrc ^ hdst; /* TO identify reverse traffic */ +hash = hash | (key->zone + key->nw_proto); +hash = OvsJhashWords((uint32_t*) , 1, hash); +return hash; +} + static __inline VOID OvsCtKeyReverse(OVS_CT_KEY *key) { @@ -231,7 +249,7 @@ OvsCtAddEntry(POVS_CT_ENTRY entry, if (!OvsNatTranslateCtEntry(entry)) { return FALSE; } -ctx->hash = OvsHashCtKey(>key); +ctx->hash = OvsCtHashKey(>key); } else { entry->natInfo.natAction = natInfo->natAction; } @@ -531,20 +549,6 @@ OvsCtLookup(OvsConntrackKeyLookupCtx *ctx) return found; } -UINT32 -OvsHashCtKey(const OVS_CT_KEY *key) -{ -UINT32 hsrc, hdst, hash; -hsrc = OvsJhashBytes((UINT32*) >src, sizeof(key->src), 0); -hdst = OvsJhashBytes((UINT32*) >dst, sizeof(key->dst), 0); -hash = hsrc ^ hdst; /* TO identify reverse traffic */ -hash = OvsJhashBytes((uint32_t *) >dst + 1, - ((uint32_t *) (key + 1) - - (uint32_t *) (>dst + 1)), - hash); -return hash; -} - static UINT8 OvsReverseIcmpType(UINT8 type) { @@ -642,7 +646,7 @@ OvsCtSetupLookupCtx(OvsFlowKey *flowKey, OvsCtKeyReverse(>key); } -ctx->hash = OvsHashCtKey(>key); +ctx->hash = OvsCtHashKey(>key); return NDIS_STATUS_SUCCESS; } @@ -953,7 +957,6 @@ OvsExecuteConntrackAction(OvsForwardingContext *fwdCtx, OvsFlowKey *key, const PNL_ATTR a) { -PNL_ATTR ctAttr; BOOLEAN commit = FALSE; BOOLEAN force = FALSE; BOOLEAN postUpdateEvent = FALSE; @@ -973,109 +976,110 @@ OvsExecuteConntrackAction(OvsForwardingContext *fwdCtx, return status; } -/* XXX Convert this to NL_ATTR_FOR_EACH */ -ctAttr = NlAttrFindNested(a, OVS_CT_ATTR_ZONE); -if (ctAttr) { -zone = NlAttrGetU16(ctAttr); -} -ctAttr = NlAttrFindNested(a, OVS_CT_ATTR_COMMIT); -if (ctAttr) { -commit = TRUE; -} -ctAttr = NlAttrFindNested(a, OVS_CT_ATTR_MARK); -if (ctAttr) { -mark = NlAttrGet(ctAttr); -} -ctAttr = NlAttrFindNested(a, OVS_CT_ATTR_LABELS); -if (ctAttr) { -labels = NlAttrGet(ctAttr); -} -natActionInfo.natAction = NAT_ACTION_NONE; -ctAttr = NlAttrFindNested(a, OVS_CT_ATTR_NAT); -if (ctAttr) { -/* Pares Nested NAT attributes. */ -PNL_ATTR natAttr; -unsigned int left; -BOOLEAN hasMinIp = FALSE; -BOOLEAN hasMinPort = FALSE; -BOOLEAN hasMaxIp = FALSE; -BOOLEAN hasMaxPort = FALSE; -NL_NESTED_FOR_EACH_UNSAFE (natAttr, left, ctAttr) { -enum ovs_nat_attr subtype = NlAttrType(natAttr); -switch(subtype) { -case OVS_NAT_ATTR_SRC: -case OVS_NAT_ATTR_DST: -natActionInfo.natAction |= -((subtype == OVS_NAT_ATTR_SRC) -? NAT_ACTION_SRC : NAT_ACTION_DST); +PNL_ATTR ctAttr = NULL; +INT left; + +NL_NESTED_FOR_EACH (ctAttr, left, a) { +switch(NlAttrType(ctAttr)) { +case OVS_CT_ATTR_ZONE: +zone = NlAttrGetU16(ctAttr); +break; +case OVS_CT_ATTR_COMMIT: +commit = TRUE; +break; +case OVS_CT_ATTR_MARK: +mark = NlAttrGet(ctAttr); break; -case OVS_NAT_
[ovs-dev] [PATCH v5 1/3] datapath-windows: Use spinlock instead of RW lock for ct entry
This patch mainly changes a ndis RW lock for conntrack entry to a spinlock along with some minor refactor in conntrack. Using spinlock instead of RW lock as RW locks causes performance hits when acquired/released multiple times. - Use NdisInterlockedXX wrapper api's instead of InterlockedXX. - Update 'ctTotalRelatedEntries' using interlocked functions. - Move conntrack lock out of NAT module. Testing: Verified loading/unloading the driver with driver verified enabled. Ran TCP/UDP and ICMP traffic. Signed-off-by: Anand Kumar Acked-by: Alin Gabriel Serdean --- v1->v2: Calculate the dispatch level only in cases where the locks are being acquired multiple times within a given context and minor style change. v2->v3: Fix kernel crash while executing cleanup thread in conntrack-related v3->v4: Fix a bug found through static code analysis v4->v5: No change --- datapath-windows/ovsext/Conntrack-nat.c | 7 +- datapath-windows/ovsext/Conntrack-related.c | 21 ++--- datapath-windows/ovsext/Conntrack.c | 135 ++-- datapath-windows/ovsext/Conntrack.h | 2 +- datapath-windows/ovsext/Util.h | 18 5 files changed, 96 insertions(+), 87 deletions(-) diff --git a/datapath-windows/ovsext/Conntrack-nat.c b/datapath-windows/ovsext/Conntrack-nat.c index 316c946..da1814f 100644 --- a/datapath-windows/ovsext/Conntrack-nat.c +++ b/datapath-windows/ovsext/Conntrack-nat.c @@ -167,16 +167,13 @@ OvsNatPacket(OvsForwardingContext *ovsFwdCtx, { UINT32 natFlag; const struct ct_endpoint* endpoint; -LOCK_STATE_EX lockState; -/* XXX: Move conntrack locks out of NAT after implementing lock in NAT. */ -NdisAcquireRWLockRead(entry->lock, , 0); + /* When it is NAT, only entry->rev_key contains NATTED address; When it is unNAT, only entry->key contains the UNNATTED address;*/ const OVS_CT_KEY *ctKey = reverse ? >key : >rev_key; BOOLEAN isSrcNat; if (!(natAction & (NAT_ACTION_SRC | NAT_ACTION_DST))) { -NdisReleaseRWLock(entry->lock, ); return; } isSrcNat = (((natAction & NAT_ACTION_SRC) && !reverse) || @@ -206,7 +203,6 @@ OvsNatPacket(OvsForwardingContext *ovsFwdCtx, } } else if (ctKey->dl_type == htons(ETH_TYPE_IPV6)){ // XXX: IPv6 packet not supported yet. -NdisReleaseRWLock(entry->lock, ); return; } if (natAction & (NAT_ACTION_SRC_PORT | NAT_ACTION_DST_PORT)) { @@ -220,7 +216,6 @@ OvsNatPacket(OvsForwardingContext *ovsFwdCtx, } } } -NdisReleaseRWLock(entry->lock, ); } diff --git a/datapath-windows/ovsext/Conntrack-related.c b/datapath-windows/ovsext/Conntrack-related.c index ec4b536..950be98 100644 --- a/datapath-windows/ovsext/Conntrack-related.c +++ b/datapath-windows/ovsext/Conntrack-related.c @@ -18,7 +18,7 @@ #include "Jhash.h" static PLIST_ENTRY ovsCtRelatedTable; /* Holds related entries */ -static UINT64 ctTotalRelatedEntries; +static ULONG ctTotalRelatedEntries; static OVS_CT_THREAD_CTX ctRelThreadCtx; static PNDIS_RW_LOCK_EX ovsCtRelatedLockObj; extern POVS_SWITCH_CONTEXT gOvsSwitchContext; @@ -75,13 +75,11 @@ OvsCtRelatedLookup(OVS_CT_KEY key, UINT64 currentTime) POVS_CT_REL_ENTRY entry; LOCK_STATE_EX lockState; -NdisAcquireRWLockRead(ovsCtRelatedLockObj, , 0); - if (!ctTotalRelatedEntries) { -NdisReleaseRWLock(ovsCtRelatedLockObj, ); return NULL; } +NdisAcquireRWLockRead(ovsCtRelatedLockObj, , 0); for (int i = 0; i < CT_HASH_TABLE_SIZE; i++) { /* XXX - Scan the table based on the hash instead */ LIST_FORALL_SAFE([i], link, next) { @@ -103,7 +101,7 @@ OvsCtRelatedEntryDelete(POVS_CT_REL_ENTRY entry) { RemoveEntryList(>link); OvsFreeMemoryWithTag(entry, OVS_CT_POOL_TAG); -ctTotalRelatedEntries--; +NdisInterlockedDecrement((PLONG)); } NDIS_STATUS @@ -139,7 +137,7 @@ OvsCtRelatedEntryCreate(UINT8 ipProto, NdisAcquireRWLockWrite(ovsCtRelatedLockObj, , 0); InsertHeadList([hash & CT_HASH_TABLE_MASK], >link); -ctTotalRelatedEntries++; +NdisInterlockedIncrement((PLONG)); NdisReleaseRWLock(ovsCtRelatedLockObj, ); return NDIS_STATUS_SUCCESS; @@ -150,20 +148,19 @@ OvsCtRelatedFlush() { PLIST_ENTRY link, next; POVS_CT_REL_ENTRY entry; - LOCK_STATE_EX lockState; -NdisAcquireRWLockWrite(ovsCtRelatedLockObj, , 0); if (ctTotalRelatedEntries) { +NdisAcquireRWLockWrite(ovsCtRelatedLockObj, , 0); for (int i = 0; i < CT_HASH_TABLE_SIZE; i++) { LIST_FORALL_SAFE([i], link, next) { entry = CONTAINING_RECORD(link, OVS_CT_REL_ENTRY, link); OvsCtRelatedEntryDelete(entry); } } +NdisReleaseRWLock(ovsCtRelatedLockObj, ); } -NdisRelease
[ovs-dev] [PATCH v5 0/3] Optimize conntrack performance
This patch series is primarily to refactor conntrack code for better throughput with conntrack. With this patch series TCP throughput with conntrack increased by ~50%. Anand Kumar (3): datapath-windows: Use spinlock instead of RW lock for ct entry datapath-windows: Implement locking in conntrack NAT. datapath-windows: Compute ct hash based on 5-tuple and zone datapath-windows/ovsext/Conntrack-ftp.c | 4 +- datapath-windows/ovsext/Conntrack-nat.c | 35 ++- datapath-windows/ovsext/Conntrack-related.c | 21 +- datapath-windows/ovsext/Conntrack-tcp.c | 15 +- datapath-windows/ovsext/Conntrack.c | 469 +--- datapath-windows/ovsext/Conntrack.h | 40 ++- datapath-windows/ovsext/Util.h | 18 ++ 7 files changed, 311 insertions(+), 291 deletions(-) -- 2.9.3.windows.1 ___ dev mailing list d...@openvswitch.org https://mail.openvswitch.org/mailman/listinfo/ovs-dev
Re: [ovs-dev] [PATCH v4 2/3] datapath-windows: Implement locking in conntrack NAT.
Hi Alin, Thanks for reviewing the patch series. I will send out a v5 to address this issue and retain the acks. Thanks, Anand Kumar On 6/22/18, 7:18 AM, "Alin Serdean" wrote: > -Mesaj original- > De la: ovs-dev-boun...@openvswitch.org boun...@openvswitch.org> În numele Anand Kumar > Trimis: Tuesday, June 19, 2018 8:33 PM > Către: d...@openvswitch.org > Subiect: [ovs-dev] [PATCH v4 2/3] datapath-windows: Implement locking in > conntrack NAT. > > This patch primarily replaces existing ndis RWlock based implementaion for > NAT in conntrack with a spinlock based implementation inside NAT, module > along with some conntrack optimization. > > - The 'ovsNatTable' and 'ovsUnNatTable' tables are shared > between cleanup threads and packet processing thread. > In order to protect these two tables use a spinlock. > Also introduce counters to track number of nat entries. > - Introduce a new function OvsGetTcpHeader() to retrieve TCP header > and payload length, to optimize for TCP traffic. > - Optimize conntrack look up. > - Remove 'bucketlockRef' member from conntrack entry structure. > > Testing: > Verified loading/unloading the driver with driver verified enabled. > Ran TCP/UDP and ICMP traffic. > > Signed-off-by: Anand Kumar > --- > v1->v2: Merge patch 2 and 3 so that NAT locks related changes are in a > single patch. > v2->v3: No change > v3->v4: No change > --- > datapath-windows/ovsext/Conntrack-ftp.c | 4 +- > datapath-windows/ovsext/Conntrack-nat.c | 27 +++- datapath- > windows/ovsext/Conntrack-tcp.c | 15 ++--- > datapath-windows/ovsext/Conntrack.c | 110 +-- > - > datapath-windows/ovsext/Conntrack.h | 36 +++ > 5 files changed, 100 insertions(+), 92 deletions(-) > Can you please fold in the following: diff --git a/datapath-windows/ovsext/Conntrack-nat.c b/datapath-windows/ovsext/Conntrack-nat.c index 11057e6ed..559a7f689 100644 --- a/datapath-windows/ovsext/Conntrack-nat.c +++ b/datapath-windows/ovsext/Conntrack-nat.c @@ -151,7 +151,8 @@ VOID OvsNatFlush(UINT16 zone) VOID OvsNatCleanup() { if (ovsNatTable == NULL) { - return; +NdisFreeSpinLock(); +return; } NdisAcquireSpinLock(); The rest looks good. Acked-by: Alin Gabriel Serdean ___ dev mailing list d...@openvswitch.org https://mail.openvswitch.org/mailman/listinfo/ovs-dev
Re: [ovs-dev] [PATCH v2 0/3] Optimize conntrack performance
Hi Alin, Thanks for running the code analysis on the patch series. As discussed in the Hyper-V meeting, I have addressed them and send out a v4 of my patch series. Thanks, Anand Kumar On 6/19/18, 9:14 AM, "Alin Serdean" wrote: Thanks a lot for the series and the benchmarks . This is not an actual review. I applied the patches and ran the code analysis and I get the following: ovs\datapath-windows\ovsext\conntrack-nat.c(151): warning C28167: The function 'OvsNatCleanup' changes the IRQL and does not restore the IRQL before it exits. It should be annotated to reflect the change or the IRQL should be restored. IRQL was last set at line 162. ovs\datapath-windows\ovsext\conntrack-related.c(147): warning C28167: The function 'OvsCtRelatedFlush' changes the IRQL and does not restore the IRQL before it exits. It should be annotated to reflect the change or the IRQL should be restored. IRQL was last set at line 163. ovs\datapath-windows\ovsext\conntrack-related.c(163): warning C6001: Using uninitialized memory 'lockState'. ovs\datapath-windows\ovsext\conntrack-related.c(163): warning C26110: Caller failing to hold lock 'ovsCtRelatedLockObj' before calling function 'NdisReleaseRWLock'. ovs\datapath-windows\ovsext\conntrack-related.c(210): warning C28122: The function 'NdisReleaseRWLock' is not permitted to be called at a low IRQ level. Prior function calls are inconsistent with this constraint: It may be that the error is actually in some prior call that limited the range. Maximum legal IRQL was last set to 1 at line 211. ovs\datapath-windows\ovsext\conntrack-related.c(176): warning C28166: The function 'OvsCtRelatedEntryCleaner' does not restore the IRQL to the value that was current at function entry and is required to do so. IRQL was last set at line 210. ovs\datapath-windows\ovsext\conntrack-related.c(210): warning C6001: Using uninitialized memory 'lockState'. ovs\datapath-windows\ovsext\conntrack-related.c(210): warning C26110: Caller failing to hold lock 'ovsCtRelatedLockObj' before calling function 'NdisReleaseRWLock'. Can you please add code annotations where needed? Thanks, Alin. > -Mesaj original- > De la: ovs-dev-boun...@openvswitch.org boun...@openvswitch.org> În numele Anand Kumar > Trimis: Tuesday, June 19, 2018 3:56 AM > Către: d...@openvswitch.org > Subiect: [ovs-dev] [PATCH v2 0/3] Optimize conntrack performance > > This patch series is primarily to refactor conntrack code for better throughput > with conntrack. > > With this patch series TCP throughput with conntrack increased by ~50%. > > Anand Kumar (3): > datapath-windows: Use spinlock instead of RW lock for ct entry > datapath-windows: Implement locking in conntrack NAT. > datapath-windows: Compute ct hash based on 5-tuple and zone > > datapath-windows/ovsext/Conntrack-ftp.c | 4 +- > datapath-windows/ovsext/Conntrack-nat.c | 34 +- > datapath-windows/ovsext/Conntrack-related.c | 17 +- > datapath-windows/ovsext/Conntrack-tcp.c | 15 +- > datapath-windows/ovsext/Conntrack.c | 469 + > --- > datapath-windows/ovsext/Conntrack.h | 40 ++- > datapath-windows/ovsext/Util.h | 18 ++ > 7 files changed, 308 insertions(+), 289 deletions(-) > > -- > 2.9.3.windows.1 > > ___ > dev mailing list > d...@openvswitch.org > https://na01.safelinks.protection.outlook.com/?url=https%3A%2F%2Fmail.openvswitch.org%2Fmailman%2Flistinfo%2Fovs-dev=02%7C01%7Ckumaranand%40vmware.com%7C640738ce7d194db0ec1f08d5d5ffce4a%7Cb39138ca3cee4b4aa4d6cd83d9dd62f0%7C1%7C0%7C636650217048180105=afwy4KiFTylRjqBu1K7Qg8cbQdmSfIiXB3%2FrNdkviF0%3D=0 ___ dev mailing list d...@openvswitch.org https://mail.openvswitch.org/mailman/listinfo/ovs-dev
[ovs-dev] [PATCH v4 3/3] datapath-windows: Compute ct hash based on 5-tuple and zone
Conntrack 5-tuple consists of src address, dst address, src port, dst port and protocol which will be unique to a ct session. Use this information along with zone to compute hash. Also re-factor conntrack code related to parsing netlink attributes. Testing: Verified loading/unloading the driver with driver verified enabled. Ran TCP/UDP and ICMP traffic. Signed-off-by: Anand Kumar --- v1->v2: Updated commit message to include testing done. v2->v3: No change v3->v4: No change --- datapath-windows/ovsext/Conntrack.c | 228 ++-- datapath-windows/ovsext/Conntrack.h | 2 - 2 files changed, 116 insertions(+), 114 deletions(-) diff --git a/datapath-windows/ovsext/Conntrack.c b/datapath-windows/ovsext/Conntrack.c index 8fa1e07..dd16602 100644 --- a/datapath-windows/ovsext/Conntrack.c +++ b/datapath-windows/ovsext/Conntrack.c @@ -151,6 +151,24 @@ OvsCleanupConntrack(VOID) OvsNatCleanup(); } +/* + * + * OvsCtHashKey + * Compute hash using 5-tuple and zone. + * + */ +UINT32 +OvsCtHashKey(const OVS_CT_KEY *key) +{ +UINT32 hsrc, hdst, hash; +hsrc = key->src.addr.ipv4 | ntohl(key->src.port); +hdst = key->dst.addr.ipv4 | ntohl(key->dst.port); +hash = hsrc ^ hdst; /* TO identify reverse traffic */ +hash = hash | (key->zone + key->nw_proto); +hash = OvsJhashWords((uint32_t*) , 1, hash); +return hash; +} + static __inline VOID OvsCtKeyReverse(OVS_CT_KEY *key) { @@ -231,7 +249,7 @@ OvsCtAddEntry(POVS_CT_ENTRY entry, if (!OvsNatTranslateCtEntry(entry)) { return FALSE; } -ctx->hash = OvsHashCtKey(>key); +ctx->hash = OvsCtHashKey(>key); } else { entry->natInfo.natAction = natInfo->natAction; } @@ -531,20 +549,6 @@ OvsCtLookup(OvsConntrackKeyLookupCtx *ctx) return found; } -UINT32 -OvsHashCtKey(const OVS_CT_KEY *key) -{ -UINT32 hsrc, hdst, hash; -hsrc = OvsJhashBytes((UINT32*) >src, sizeof(key->src), 0); -hdst = OvsJhashBytes((UINT32*) >dst, sizeof(key->dst), 0); -hash = hsrc ^ hdst; /* TO identify reverse traffic */ -hash = OvsJhashBytes((uint32_t *) >dst + 1, - ((uint32_t *) (key + 1) - - (uint32_t *) (>dst + 1)), - hash); -return hash; -} - static UINT8 OvsReverseIcmpType(UINT8 type) { @@ -642,7 +646,7 @@ OvsCtSetupLookupCtx(OvsFlowKey *flowKey, OvsCtKeyReverse(>key); } -ctx->hash = OvsHashCtKey(>key); +ctx->hash = OvsCtHashKey(>key); return NDIS_STATUS_SUCCESS; } @@ -953,7 +957,6 @@ OvsExecuteConntrackAction(OvsForwardingContext *fwdCtx, OvsFlowKey *key, const PNL_ATTR a) { -PNL_ATTR ctAttr; BOOLEAN commit = FALSE; BOOLEAN force = FALSE; BOOLEAN postUpdateEvent = FALSE; @@ -973,109 +976,110 @@ OvsExecuteConntrackAction(OvsForwardingContext *fwdCtx, return status; } -/* XXX Convert this to NL_ATTR_FOR_EACH */ -ctAttr = NlAttrFindNested(a, OVS_CT_ATTR_ZONE); -if (ctAttr) { -zone = NlAttrGetU16(ctAttr); -} -ctAttr = NlAttrFindNested(a, OVS_CT_ATTR_COMMIT); -if (ctAttr) { -commit = TRUE; -} -ctAttr = NlAttrFindNested(a, OVS_CT_ATTR_MARK); -if (ctAttr) { -mark = NlAttrGet(ctAttr); -} -ctAttr = NlAttrFindNested(a, OVS_CT_ATTR_LABELS); -if (ctAttr) { -labels = NlAttrGet(ctAttr); -} -natActionInfo.natAction = NAT_ACTION_NONE; -ctAttr = NlAttrFindNested(a, OVS_CT_ATTR_NAT); -if (ctAttr) { -/* Pares Nested NAT attributes. */ -PNL_ATTR natAttr; -unsigned int left; -BOOLEAN hasMinIp = FALSE; -BOOLEAN hasMinPort = FALSE; -BOOLEAN hasMaxIp = FALSE; -BOOLEAN hasMaxPort = FALSE; -NL_NESTED_FOR_EACH_UNSAFE (natAttr, left, ctAttr) { -enum ovs_nat_attr subtype = NlAttrType(natAttr); -switch(subtype) { -case OVS_NAT_ATTR_SRC: -case OVS_NAT_ATTR_DST: -natActionInfo.natAction |= -((subtype == OVS_NAT_ATTR_SRC) -? NAT_ACTION_SRC : NAT_ACTION_DST); +PNL_ATTR ctAttr = NULL; +INT left; + +NL_NESTED_FOR_EACH (ctAttr, left, a) { +switch(NlAttrType(ctAttr)) { +case OVS_CT_ATTR_ZONE: +zone = NlAttrGetU16(ctAttr); +break; +case OVS_CT_ATTR_COMMIT: +commit = TRUE; +break; +case OVS_CT_ATTR_MARK: +mark = NlAttrGet(ctAttr); break; -case OVS_NAT_ATTR_IP_MIN: -memcpy(, -
[ovs-dev] [PATCH v4 2/3] datapath-windows: Implement locking in conntrack NAT.
This patch primarily replaces existing ndis RWlock based implementaion for NAT in conntrack with a spinlock based implementation inside NAT, module along with some conntrack optimization. - The 'ovsNatTable' and 'ovsUnNatTable' tables are shared between cleanup threads and packet processing thread. In order to protect these two tables use a spinlock. Also introduce counters to track number of nat entries. - Introduce a new function OvsGetTcpHeader() to retrieve TCP header and payload length, to optimize for TCP traffic. - Optimize conntrack look up. - Remove 'bucketlockRef' member from conntrack entry structure. Testing: Verified loading/unloading the driver with driver verified enabled. Ran TCP/UDP and ICMP traffic. Signed-off-by: Anand Kumar --- v1->v2: Merge patch 2 and 3 so that NAT locks related changes are in a single patch. v2->v3: No change v3->v4: No change --- datapath-windows/ovsext/Conntrack-ftp.c | 4 +- datapath-windows/ovsext/Conntrack-nat.c | 27 +++- datapath-windows/ovsext/Conntrack-tcp.c | 15 ++--- datapath-windows/ovsext/Conntrack.c | 110 +--- datapath-windows/ovsext/Conntrack.h | 36 +++ 5 files changed, 100 insertions(+), 92 deletions(-) diff --git a/datapath-windows/ovsext/Conntrack-ftp.c b/datapath-windows/ovsext/Conntrack-ftp.c index 6830dfa..ce09a65 100644 --- a/datapath-windows/ovsext/Conntrack-ftp.c +++ b/datapath-windows/ovsext/Conntrack-ftp.c @@ -129,14 +129,14 @@ OvsCtHandleFtp(PNET_BUFFER_LIST curNbl, char temp[256] = { 0 }; char ftpMsg[256] = { 0 }; +UINT32 len; TCPHdr tcpStorage; const TCPHdr *tcp; -tcp = OvsGetTcp(curNbl, layers->l4Offset, ); +tcp = OvsGetTcpHeader(curNbl, layers, , ); if (!tcp) { return NDIS_STATUS_INVALID_PACKET; } -UINT32 len = OvsGetTcpPayloadLength(curNbl); if (len > sizeof(temp)) { /* We only care up to 256 */ len = sizeof(temp); diff --git a/datapath-windows/ovsext/Conntrack-nat.c b/datapath-windows/ovsext/Conntrack-nat.c index da1814f..11057e6 100644 --- a/datapath-windows/ovsext/Conntrack-nat.c +++ b/datapath-windows/ovsext/Conntrack-nat.c @@ -3,7 +3,8 @@ PLIST_ENTRY ovsNatTable = NULL; PLIST_ENTRY ovsUnNatTable = NULL; - +static NDIS_SPIN_LOCK ovsCtNatLock; +static ULONG ovsNatEntries; /* *--- * OvsHashNatKey @@ -109,6 +110,8 @@ NTSTATUS OvsNatInit() InitializeListHead([i]); } +NdisAllocateSpinLock(); +ovsNatEntries = 0; return STATUS_SUCCESS; } @@ -121,6 +124,11 @@ NTSTATUS OvsNatInit() VOID OvsNatFlush(UINT16 zone) { PLIST_ENTRY link, next; +if (!ovsNatEntries) { +return; +} + +NdisAcquireSpinLock(); for (int i = 0; i < NAT_HASH_TABLE_SIZE; i++) { LIST_FORALL_SAFE([i], link, next) { POVS_NAT_ENTRY entry = @@ -131,6 +139,7 @@ VOID OvsNatFlush(UINT16 zone) } } } +NdisReleaseSpinLock(); } /* @@ -144,10 +153,14 @@ VOID OvsNatCleanup() if (ovsNatTable == NULL) { return; } + +NdisAcquireSpinLock(); OvsFreeMemoryWithTag(ovsNatTable, OVS_CT_POOL_TAG); OvsFreeMemoryWithTag(ovsUnNatTable, OVS_CT_POOL_TAG); ovsNatTable = NULL; ovsUnNatTable = NULL; +NdisReleaseSpinLock(); +NdisFreeSpinLock(); } /* @@ -250,10 +263,13 @@ static UINT32 OvsNatHashRange(const OVS_CT_ENTRY *entry, UINT32 basis) VOID OvsNatAddEntry(OVS_NAT_ENTRY* entry) { +NdisAcquireSpinLock(); InsertHeadList(OvsNatGetBucket(>key, FALSE), >link); InsertHeadList(OvsNatGetBucket(>value, TRUE), >reverseLink); +NdisReleaseSpinLock(); +NdisInterlockedIncrement((PLONG)); } /* @@ -399,21 +415,29 @@ OvsNatLookup(const OVS_CT_KEY *ctKey, BOOLEAN reverse) PLIST_ENTRY link; POVS_NAT_ENTRY entry; +if (!ovsNatEntries) { +return NULL; +} + +NdisAcquireSpinLock(); LIST_FORALL(OvsNatGetBucket(ctKey, reverse), link) { if (reverse) { entry = CONTAINING_RECORD(link, OVS_NAT_ENTRY, reverseLink); if (OvsNatKeyAreSame(ctKey, >value)) { +NdisReleaseSpinLock(); return entry; } } else { entry = CONTAINING_RECORD(link, OVS_NAT_ENTRY, link); if (OvsNatKeyAreSame(ctKey, >key)) { +NdisReleaseSpinLock(); return entry; } } } +NdisReleaseSpinLock(); return NULL; } @@ -432,6 +456,7 @@ OvsNatDeleteEntry(POVS_NAT_ENTRY entry) RemoveEntryList(>link); RemoveEntryList(>reverseLink); OvsFreeMemoryWithTag(entry, OVS_CT_POOL_TAG); +NdisInterlockedDecrement((PLONG)); } /* diff --git a/datapath-windows/ovsext/Conntrack-tcp.c b/datapath-windows/ovsext/Con
[ovs-dev] [PATCH v4 1/3] datapath-windows: Use spinlock instead of RW lock for ct entry
This patch mainly changes a ndis RW lock for conntrack entry to a spinlock along with some minor refactor in conntrack. Using spinlock instead of RW lock as RW locks causes performance hits when acquired/released multiple times. - Use NdisInterlockedXX wrapper api's instead of InterlockedXX. - Update 'ctTotalRelatedEntries' using interlocked functions. - Move conntrack lock out of NAT module. Testing: Verified loading/unloading the driver with driver verified enabled. Ran TCP/UDP and ICMP traffic. Signed-off-by: Anand Kumar --- v1->v2: Calculate the dispatch level only in cases where the locks are being acquired multiple times within a given context and minor style change. v2->v3: Fix kernel crash while executing cleanup thread in conntrack-related v3->v4: Fix a bug found through code analysis --- datapath-windows/ovsext/Conntrack-nat.c | 7 +- datapath-windows/ovsext/Conntrack-related.c | 21 ++--- datapath-windows/ovsext/Conntrack.c | 135 ++-- datapath-windows/ovsext/Conntrack.h | 2 +- datapath-windows/ovsext/Util.h | 18 5 files changed, 96 insertions(+), 87 deletions(-) diff --git a/datapath-windows/ovsext/Conntrack-nat.c b/datapath-windows/ovsext/Conntrack-nat.c index 316c946..da1814f 100644 --- a/datapath-windows/ovsext/Conntrack-nat.c +++ b/datapath-windows/ovsext/Conntrack-nat.c @@ -167,16 +167,13 @@ OvsNatPacket(OvsForwardingContext *ovsFwdCtx, { UINT32 natFlag; const struct ct_endpoint* endpoint; -LOCK_STATE_EX lockState; -/* XXX: Move conntrack locks out of NAT after implementing lock in NAT. */ -NdisAcquireRWLockRead(entry->lock, , 0); + /* When it is NAT, only entry->rev_key contains NATTED address; When it is unNAT, only entry->key contains the UNNATTED address;*/ const OVS_CT_KEY *ctKey = reverse ? >key : >rev_key; BOOLEAN isSrcNat; if (!(natAction & (NAT_ACTION_SRC | NAT_ACTION_DST))) { -NdisReleaseRWLock(entry->lock, ); return; } isSrcNat = (((natAction & NAT_ACTION_SRC) && !reverse) || @@ -206,7 +203,6 @@ OvsNatPacket(OvsForwardingContext *ovsFwdCtx, } } else if (ctKey->dl_type == htons(ETH_TYPE_IPV6)){ // XXX: IPv6 packet not supported yet. -NdisReleaseRWLock(entry->lock, ); return; } if (natAction & (NAT_ACTION_SRC_PORT | NAT_ACTION_DST_PORT)) { @@ -220,7 +216,6 @@ OvsNatPacket(OvsForwardingContext *ovsFwdCtx, } } } -NdisReleaseRWLock(entry->lock, ); } diff --git a/datapath-windows/ovsext/Conntrack-related.c b/datapath-windows/ovsext/Conntrack-related.c index ec4b536..950be98 100644 --- a/datapath-windows/ovsext/Conntrack-related.c +++ b/datapath-windows/ovsext/Conntrack-related.c @@ -18,7 +18,7 @@ #include "Jhash.h" static PLIST_ENTRY ovsCtRelatedTable; /* Holds related entries */ -static UINT64 ctTotalRelatedEntries; +static ULONG ctTotalRelatedEntries; static OVS_CT_THREAD_CTX ctRelThreadCtx; static PNDIS_RW_LOCK_EX ovsCtRelatedLockObj; extern POVS_SWITCH_CONTEXT gOvsSwitchContext; @@ -75,13 +75,11 @@ OvsCtRelatedLookup(OVS_CT_KEY key, UINT64 currentTime) POVS_CT_REL_ENTRY entry; LOCK_STATE_EX lockState; -NdisAcquireRWLockRead(ovsCtRelatedLockObj, , 0); - if (!ctTotalRelatedEntries) { -NdisReleaseRWLock(ovsCtRelatedLockObj, ); return NULL; } +NdisAcquireRWLockRead(ovsCtRelatedLockObj, , 0); for (int i = 0; i < CT_HASH_TABLE_SIZE; i++) { /* XXX - Scan the table based on the hash instead */ LIST_FORALL_SAFE([i], link, next) { @@ -103,7 +101,7 @@ OvsCtRelatedEntryDelete(POVS_CT_REL_ENTRY entry) { RemoveEntryList(>link); OvsFreeMemoryWithTag(entry, OVS_CT_POOL_TAG); -ctTotalRelatedEntries--; +NdisInterlockedDecrement((PLONG)); } NDIS_STATUS @@ -139,7 +137,7 @@ OvsCtRelatedEntryCreate(UINT8 ipProto, NdisAcquireRWLockWrite(ovsCtRelatedLockObj, , 0); InsertHeadList([hash & CT_HASH_TABLE_MASK], >link); -ctTotalRelatedEntries++; +NdisInterlockedIncrement((PLONG)); NdisReleaseRWLock(ovsCtRelatedLockObj, ); return NDIS_STATUS_SUCCESS; @@ -150,20 +148,19 @@ OvsCtRelatedFlush() { PLIST_ENTRY link, next; POVS_CT_REL_ENTRY entry; - LOCK_STATE_EX lockState; -NdisAcquireRWLockWrite(ovsCtRelatedLockObj, , 0); if (ctTotalRelatedEntries) { +NdisAcquireRWLockWrite(ovsCtRelatedLockObj, , 0); for (int i = 0; i < CT_HASH_TABLE_SIZE; i++) { LIST_FORALL_SAFE([i], link, next) { entry = CONTAINING_RECORD(link, OVS_CT_REL_ENTRY, link); OvsCtRelatedEntryDelete(entry); } } +NdisReleaseRWLock(ovsCtRelatedLockObj, ); } -NdisReleaseRWLock(ovsCtRelatedLockObj, ); return NDIS_STATUS_SUCCESS; } @@ -
[ovs-dev] [PATCH v4 0/3] Optimize conntrack performance
This patch series is primarily to refactor conntrack code for better throughput with conntrack. With this patch series TCP throughput with conntrack increased by ~50%. Anand Kumar (3): datapath-windows: Use spinlock instead of RW lock for ct entry datapath-windows: Implement locking in conntrack NAT. datapath-windows: Compute ct hash based on 5-tuple and zone datapath-windows/ovsext/Conntrack-ftp.c | 4 +- datapath-windows/ovsext/Conntrack-nat.c | 34 +- datapath-windows/ovsext/Conntrack-related.c | 21 +- datapath-windows/ovsext/Conntrack-tcp.c | 15 +- datapath-windows/ovsext/Conntrack.c | 469 +--- datapath-windows/ovsext/Conntrack.h | 40 ++- datapath-windows/ovsext/Util.h | 18 ++ 7 files changed, 310 insertions(+), 291 deletions(-) -- 2.9.3.windows.1 ___ dev mailing list d...@openvswitch.org https://mail.openvswitch.org/mailman/listinfo/ovs-dev
[ovs-dev] [PATCH v3 3/3] datapath-windows: Compute ct hash based on 5-tuple and zone
Conntrack 5-tuple consists of src address, dst address, src port, dst port and protocol which will be unique to a ct session. Use this information along with zone to compute hash. Also re-factor conntrack code related to parsing netlink attributes. Testing: Verified loading/unloading the driver with driver verified enabled. Ran TCP/UDP and ICMP traffic. Signed-off-by: Anand Kumar --- v1->v2: Updated commit message to include testing done. v2->v3: No change. --- datapath-windows/ovsext/Conntrack.c | 228 ++-- datapath-windows/ovsext/Conntrack.h | 2 - 2 files changed, 116 insertions(+), 114 deletions(-) diff --git a/datapath-windows/ovsext/Conntrack.c b/datapath-windows/ovsext/Conntrack.c index 8fa1e07..dd16602 100644 --- a/datapath-windows/ovsext/Conntrack.c +++ b/datapath-windows/ovsext/Conntrack.c @@ -151,6 +151,24 @@ OvsCleanupConntrack(VOID) OvsNatCleanup(); } +/* + * + * OvsCtHashKey + * Compute hash using 5-tuple and zone. + * + */ +UINT32 +OvsCtHashKey(const OVS_CT_KEY *key) +{ +UINT32 hsrc, hdst, hash; +hsrc = key->src.addr.ipv4 | ntohl(key->src.port); +hdst = key->dst.addr.ipv4 | ntohl(key->dst.port); +hash = hsrc ^ hdst; /* TO identify reverse traffic */ +hash = hash | (key->zone + key->nw_proto); +hash = OvsJhashWords((uint32_t*) , 1, hash); +return hash; +} + static __inline VOID OvsCtKeyReverse(OVS_CT_KEY *key) { @@ -231,7 +249,7 @@ OvsCtAddEntry(POVS_CT_ENTRY entry, if (!OvsNatTranslateCtEntry(entry)) { return FALSE; } -ctx->hash = OvsHashCtKey(>key); +ctx->hash = OvsCtHashKey(>key); } else { entry->natInfo.natAction = natInfo->natAction; } @@ -531,20 +549,6 @@ OvsCtLookup(OvsConntrackKeyLookupCtx *ctx) return found; } -UINT32 -OvsHashCtKey(const OVS_CT_KEY *key) -{ -UINT32 hsrc, hdst, hash; -hsrc = OvsJhashBytes((UINT32*) >src, sizeof(key->src), 0); -hdst = OvsJhashBytes((UINT32*) >dst, sizeof(key->dst), 0); -hash = hsrc ^ hdst; /* TO identify reverse traffic */ -hash = OvsJhashBytes((uint32_t *) >dst + 1, - ((uint32_t *) (key + 1) - - (uint32_t *) (>dst + 1)), - hash); -return hash; -} - static UINT8 OvsReverseIcmpType(UINT8 type) { @@ -642,7 +646,7 @@ OvsCtSetupLookupCtx(OvsFlowKey *flowKey, OvsCtKeyReverse(>key); } -ctx->hash = OvsHashCtKey(>key); +ctx->hash = OvsCtHashKey(>key); return NDIS_STATUS_SUCCESS; } @@ -953,7 +957,6 @@ OvsExecuteConntrackAction(OvsForwardingContext *fwdCtx, OvsFlowKey *key, const PNL_ATTR a) { -PNL_ATTR ctAttr; BOOLEAN commit = FALSE; BOOLEAN force = FALSE; BOOLEAN postUpdateEvent = FALSE; @@ -973,109 +976,110 @@ OvsExecuteConntrackAction(OvsForwardingContext *fwdCtx, return status; } -/* XXX Convert this to NL_ATTR_FOR_EACH */ -ctAttr = NlAttrFindNested(a, OVS_CT_ATTR_ZONE); -if (ctAttr) { -zone = NlAttrGetU16(ctAttr); -} -ctAttr = NlAttrFindNested(a, OVS_CT_ATTR_COMMIT); -if (ctAttr) { -commit = TRUE; -} -ctAttr = NlAttrFindNested(a, OVS_CT_ATTR_MARK); -if (ctAttr) { -mark = NlAttrGet(ctAttr); -} -ctAttr = NlAttrFindNested(a, OVS_CT_ATTR_LABELS); -if (ctAttr) { -labels = NlAttrGet(ctAttr); -} -natActionInfo.natAction = NAT_ACTION_NONE; -ctAttr = NlAttrFindNested(a, OVS_CT_ATTR_NAT); -if (ctAttr) { -/* Pares Nested NAT attributes. */ -PNL_ATTR natAttr; -unsigned int left; -BOOLEAN hasMinIp = FALSE; -BOOLEAN hasMinPort = FALSE; -BOOLEAN hasMaxIp = FALSE; -BOOLEAN hasMaxPort = FALSE; -NL_NESTED_FOR_EACH_UNSAFE (natAttr, left, ctAttr) { -enum ovs_nat_attr subtype = NlAttrType(natAttr); -switch(subtype) { -case OVS_NAT_ATTR_SRC: -case OVS_NAT_ATTR_DST: -natActionInfo.natAction |= -((subtype == OVS_NAT_ATTR_SRC) -? NAT_ACTION_SRC : NAT_ACTION_DST); +PNL_ATTR ctAttr = NULL; +INT left; + +NL_NESTED_FOR_EACH (ctAttr, left, a) { +switch(NlAttrType(ctAttr)) { +case OVS_CT_ATTR_ZONE: +zone = NlAttrGetU16(ctAttr); +break; +case OVS_CT_ATTR_COMMIT: +commit = TRUE; +break; +case OVS_CT_ATTR_MARK: +mark = NlAttrGet(ctAttr); break; -case OVS_NAT_ATTR_IP_MIN: -memcpy(, - NlA
[ovs-dev] [PATCH v3 2/3] datapath-windows: Implement locking in conntrack NAT.
This patch primarily replaces existing ndis RWlock based implementaion for NAT in conntrack with a spinlock based implementation inside NAT, module along with some conntrack optimization. - The 'ovsNatTable' and 'ovsUnNatTable' tables are shared between cleanup threads and packet processing thread. In order to protect these two tables use a spinlock. Also introduce counters to track number of nat entries. - Introduce a new function OvsGetTcpHeader() to retrieve TCP header and payload length, to optimize for TCP traffic. - Optimize conntrack look up. - Remove 'bucketlockRef' member from conntrack entry structure. Testing: Verified loading/unloading the driver with driver verified enabled. Ran TCP/UDP and ICMP traffic. Signed-off-by: Anand Kumar --- v1->v2: Merge patch 2 and 3 so that NAT locks related changes are in a single patch. v2->v3: No change. --- datapath-windows/ovsext/Conntrack-ftp.c | 4 +- datapath-windows/ovsext/Conntrack-nat.c | 27 +++- datapath-windows/ovsext/Conntrack-tcp.c | 15 ++--- datapath-windows/ovsext/Conntrack.c | 110 +--- datapath-windows/ovsext/Conntrack.h | 36 +++ 5 files changed, 100 insertions(+), 92 deletions(-) diff --git a/datapath-windows/ovsext/Conntrack-ftp.c b/datapath-windows/ovsext/Conntrack-ftp.c index 6830dfa..ce09a65 100644 --- a/datapath-windows/ovsext/Conntrack-ftp.c +++ b/datapath-windows/ovsext/Conntrack-ftp.c @@ -129,14 +129,14 @@ OvsCtHandleFtp(PNET_BUFFER_LIST curNbl, char temp[256] = { 0 }; char ftpMsg[256] = { 0 }; +UINT32 len; TCPHdr tcpStorage; const TCPHdr *tcp; -tcp = OvsGetTcp(curNbl, layers->l4Offset, ); +tcp = OvsGetTcpHeader(curNbl, layers, , ); if (!tcp) { return NDIS_STATUS_INVALID_PACKET; } -UINT32 len = OvsGetTcpPayloadLength(curNbl); if (len > sizeof(temp)) { /* We only care up to 256 */ len = sizeof(temp); diff --git a/datapath-windows/ovsext/Conntrack-nat.c b/datapath-windows/ovsext/Conntrack-nat.c index da1814f..11057e6 100644 --- a/datapath-windows/ovsext/Conntrack-nat.c +++ b/datapath-windows/ovsext/Conntrack-nat.c @@ -3,7 +3,8 @@ PLIST_ENTRY ovsNatTable = NULL; PLIST_ENTRY ovsUnNatTable = NULL; - +static NDIS_SPIN_LOCK ovsCtNatLock; +static ULONG ovsNatEntries; /* *--- * OvsHashNatKey @@ -109,6 +110,8 @@ NTSTATUS OvsNatInit() InitializeListHead([i]); } +NdisAllocateSpinLock(); +ovsNatEntries = 0; return STATUS_SUCCESS; } @@ -121,6 +124,11 @@ NTSTATUS OvsNatInit() VOID OvsNatFlush(UINT16 zone) { PLIST_ENTRY link, next; +if (!ovsNatEntries) { +return; +} + +NdisAcquireSpinLock(); for (int i = 0; i < NAT_HASH_TABLE_SIZE; i++) { LIST_FORALL_SAFE([i], link, next) { POVS_NAT_ENTRY entry = @@ -131,6 +139,7 @@ VOID OvsNatFlush(UINT16 zone) } } } +NdisReleaseSpinLock(); } /* @@ -144,10 +153,14 @@ VOID OvsNatCleanup() if (ovsNatTable == NULL) { return; } + +NdisAcquireSpinLock(); OvsFreeMemoryWithTag(ovsNatTable, OVS_CT_POOL_TAG); OvsFreeMemoryWithTag(ovsUnNatTable, OVS_CT_POOL_TAG); ovsNatTable = NULL; ovsUnNatTable = NULL; +NdisReleaseSpinLock(); +NdisFreeSpinLock(); } /* @@ -250,10 +263,13 @@ static UINT32 OvsNatHashRange(const OVS_CT_ENTRY *entry, UINT32 basis) VOID OvsNatAddEntry(OVS_NAT_ENTRY* entry) { +NdisAcquireSpinLock(); InsertHeadList(OvsNatGetBucket(>key, FALSE), >link); InsertHeadList(OvsNatGetBucket(>value, TRUE), >reverseLink); +NdisReleaseSpinLock(); +NdisInterlockedIncrement((PLONG)); } /* @@ -399,21 +415,29 @@ OvsNatLookup(const OVS_CT_KEY *ctKey, BOOLEAN reverse) PLIST_ENTRY link; POVS_NAT_ENTRY entry; +if (!ovsNatEntries) { +return NULL; +} + +NdisAcquireSpinLock(); LIST_FORALL(OvsNatGetBucket(ctKey, reverse), link) { if (reverse) { entry = CONTAINING_RECORD(link, OVS_NAT_ENTRY, reverseLink); if (OvsNatKeyAreSame(ctKey, >value)) { +NdisReleaseSpinLock(); return entry; } } else { entry = CONTAINING_RECORD(link, OVS_NAT_ENTRY, link); if (OvsNatKeyAreSame(ctKey, >key)) { +NdisReleaseSpinLock(); return entry; } } } +NdisReleaseSpinLock(); return NULL; } @@ -432,6 +456,7 @@ OvsNatDeleteEntry(POVS_NAT_ENTRY entry) RemoveEntryList(>link); RemoveEntryList(>reverseLink); OvsFreeMemoryWithTag(entry, OVS_CT_POOL_TAG); +NdisInterlockedDecrement((PLONG)); } /* diff --git a/datapath-windows/ovsext/Conntrack-tcp.c b/datapath-windows/ovsext/Conntrack-tcp.c index 8cbab24
[ovs-dev] [PATCH v3 0/3] Optimize conntrack performance
This patch series is primarily to refactor conntrack code for better throughput with conntrack. With this patch series TCP throughput with conntrack increased by ~50%. Anand Kumar (3): datapath-windows: Use spinlock instead of RW lock for ct entry datapath-windows: Implement locking in conntrack NAT. datapath-windows: Compute ct hash based on 5-tuple and zone datapath-windows/ovsext/Conntrack-ftp.c | 4 +- datapath-windows/ovsext/Conntrack-nat.c | 34 +- datapath-windows/ovsext/Conntrack-related.c | 19 +- datapath-windows/ovsext/Conntrack-tcp.c | 15 +- datapath-windows/ovsext/Conntrack.c | 469 +--- datapath-windows/ovsext/Conntrack.h | 40 ++- datapath-windows/ovsext/Util.h | 18 ++ 7 files changed, 309 insertions(+), 290 deletions(-) -- 2.9.3.windows.1 ___ dev mailing list d...@openvswitch.org https://mail.openvswitch.org/mailman/listinfo/ovs-dev
[ovs-dev] [PATCH v3 1/3] datapath-windows: Use spinlock instead of RW lock for ct entry
This patch mainly changes a ndis RW lock for conntrack entry to a spinlock along with some minor refactor in conntrack. Using spinlock instead of RW lock as RW locks causes performance hits when acquired/released multiple times. - Use NdisInterlockedXX wrapper api's instead of InterlockedXX. - Update 'ctTotalRelatedEntries' using interlocked functions. - Move conntrack lock out of NAT module. Testing: Verified loading/unloading the driver with driver verified enabled. Ran TCP/UDP and ICMP traffic. Signed-off-by: Anand Kumar --- v1->v2: Calculate the dispatch level only in cases where the locks are being acquired multiple times within a given context and minor style change. v2->v3: Fix kernel crash while executing cleanup thread in conntrack-related --- datapath-windows/ovsext/Conntrack-nat.c | 7 +- datapath-windows/ovsext/Conntrack-related.c | 19 ++-- datapath-windows/ovsext/Conntrack.c | 135 ++-- datapath-windows/ovsext/Conntrack.h | 2 +- datapath-windows/ovsext/Util.h | 18 5 files changed, 95 insertions(+), 86 deletions(-) diff --git a/datapath-windows/ovsext/Conntrack-nat.c b/datapath-windows/ovsext/Conntrack-nat.c index 316c946..da1814f 100644 --- a/datapath-windows/ovsext/Conntrack-nat.c +++ b/datapath-windows/ovsext/Conntrack-nat.c @@ -167,16 +167,13 @@ OvsNatPacket(OvsForwardingContext *ovsFwdCtx, { UINT32 natFlag; const struct ct_endpoint* endpoint; -LOCK_STATE_EX lockState; -/* XXX: Move conntrack locks out of NAT after implementing lock in NAT. */ -NdisAcquireRWLockRead(entry->lock, , 0); + /* When it is NAT, only entry->rev_key contains NATTED address; When it is unNAT, only entry->key contains the UNNATTED address;*/ const OVS_CT_KEY *ctKey = reverse ? >key : >rev_key; BOOLEAN isSrcNat; if (!(natAction & (NAT_ACTION_SRC | NAT_ACTION_DST))) { -NdisReleaseRWLock(entry->lock, ); return; } isSrcNat = (((natAction & NAT_ACTION_SRC) && !reverse) || @@ -206,7 +203,6 @@ OvsNatPacket(OvsForwardingContext *ovsFwdCtx, } } else if (ctKey->dl_type == htons(ETH_TYPE_IPV6)){ // XXX: IPv6 packet not supported yet. -NdisReleaseRWLock(entry->lock, ); return; } if (natAction & (NAT_ACTION_SRC_PORT | NAT_ACTION_DST_PORT)) { @@ -220,7 +216,6 @@ OvsNatPacket(OvsForwardingContext *ovsFwdCtx, } } } -NdisReleaseRWLock(entry->lock, ); } diff --git a/datapath-windows/ovsext/Conntrack-related.c b/datapath-windows/ovsext/Conntrack-related.c index ec4b536..257dd37 100644 --- a/datapath-windows/ovsext/Conntrack-related.c +++ b/datapath-windows/ovsext/Conntrack-related.c @@ -18,7 +18,7 @@ #include "Jhash.h" static PLIST_ENTRY ovsCtRelatedTable; /* Holds related entries */ -static UINT64 ctTotalRelatedEntries; +static ULONG ctTotalRelatedEntries; static OVS_CT_THREAD_CTX ctRelThreadCtx; static PNDIS_RW_LOCK_EX ovsCtRelatedLockObj; extern POVS_SWITCH_CONTEXT gOvsSwitchContext; @@ -75,13 +75,11 @@ OvsCtRelatedLookup(OVS_CT_KEY key, UINT64 currentTime) POVS_CT_REL_ENTRY entry; LOCK_STATE_EX lockState; -NdisAcquireRWLockRead(ovsCtRelatedLockObj, , 0); - if (!ctTotalRelatedEntries) { -NdisReleaseRWLock(ovsCtRelatedLockObj, ); return NULL; } +NdisAcquireRWLockRead(ovsCtRelatedLockObj, , 0); for (int i = 0; i < CT_HASH_TABLE_SIZE; i++) { /* XXX - Scan the table based on the hash instead */ LIST_FORALL_SAFE([i], link, next) { @@ -103,7 +101,7 @@ OvsCtRelatedEntryDelete(POVS_CT_REL_ENTRY entry) { RemoveEntryList(>link); OvsFreeMemoryWithTag(entry, OVS_CT_POOL_TAG); -ctTotalRelatedEntries--; +NdisInterlockedDecrement((PLONG)); } NDIS_STATUS @@ -139,7 +137,7 @@ OvsCtRelatedEntryCreate(UINT8 ipProto, NdisAcquireRWLockWrite(ovsCtRelatedLockObj, , 0); InsertHeadList([hash & CT_HASH_TABLE_MASK], >link); -ctTotalRelatedEntries++; +NdisInterlockedIncrement((PLONG)); NdisReleaseRWLock(ovsCtRelatedLockObj, ); return NDIS_STATUS_SUCCESS; @@ -150,11 +148,10 @@ OvsCtRelatedFlush() { PLIST_ENTRY link, next; POVS_CT_REL_ENTRY entry; - LOCK_STATE_EX lockState; -NdisAcquireRWLockWrite(ovsCtRelatedLockObj, , 0); if (ctTotalRelatedEntries) { +NdisAcquireRWLockWrite(ovsCtRelatedLockObj, , 0); for (int i = 0; i < CT_HASH_TABLE_SIZE; i++) { LIST_FORALL_SAFE([i], link, next) { entry = CONTAINING_RECORD(link, OVS_CT_REL_ENTRY, link); @@ -189,9 +186,8 @@ OvsCtRelatedEntryCleaner(PVOID data) /* Lock has been freed by 'OvsCleanupCtRelated()' */ break; } -NdisAcquireRWLockWrite(ovsCtRelatedLockObj, , 0); + if (context->exit) { -NdisRelea
[ovs-dev] [PATCH v2 3/3] datapath-windows: Compute ct hash based on 5-tuple and zone
Conntrack 5-tuple consists of src address, dst address, src port, dst port and protocol which will be unique to a ct session. Use this information along with zone to compute hash. Also re-factor conntrack code related to parsing netlink attributes. Testing: Verified loading/unloading the driver with driver verified enabled. Ran TCP/UDP and ICMP traffic. Signed-off-by: Anand Kumar --- v1->v2: Updated commit message to include testing done. --- datapath-windows/ovsext/Conntrack.c | 228 ++-- datapath-windows/ovsext/Conntrack.h | 2 - 2 files changed, 116 insertions(+), 114 deletions(-) diff --git a/datapath-windows/ovsext/Conntrack.c b/datapath-windows/ovsext/Conntrack.c index 8fa1e07..dd16602 100644 --- a/datapath-windows/ovsext/Conntrack.c +++ b/datapath-windows/ovsext/Conntrack.c @@ -151,6 +151,24 @@ OvsCleanupConntrack(VOID) OvsNatCleanup(); } +/* + * + * OvsCtHashKey + * Compute hash using 5-tuple and zone. + * + */ +UINT32 +OvsCtHashKey(const OVS_CT_KEY *key) +{ +UINT32 hsrc, hdst, hash; +hsrc = key->src.addr.ipv4 | ntohl(key->src.port); +hdst = key->dst.addr.ipv4 | ntohl(key->dst.port); +hash = hsrc ^ hdst; /* TO identify reverse traffic */ +hash = hash | (key->zone + key->nw_proto); +hash = OvsJhashWords((uint32_t*) , 1, hash); +return hash; +} + static __inline VOID OvsCtKeyReverse(OVS_CT_KEY *key) { @@ -231,7 +249,7 @@ OvsCtAddEntry(POVS_CT_ENTRY entry, if (!OvsNatTranslateCtEntry(entry)) { return FALSE; } -ctx->hash = OvsHashCtKey(>key); +ctx->hash = OvsCtHashKey(>key); } else { entry->natInfo.natAction = natInfo->natAction; } @@ -531,20 +549,6 @@ OvsCtLookup(OvsConntrackKeyLookupCtx *ctx) return found; } -UINT32 -OvsHashCtKey(const OVS_CT_KEY *key) -{ -UINT32 hsrc, hdst, hash; -hsrc = OvsJhashBytes((UINT32*) >src, sizeof(key->src), 0); -hdst = OvsJhashBytes((UINT32*) >dst, sizeof(key->dst), 0); -hash = hsrc ^ hdst; /* TO identify reverse traffic */ -hash = OvsJhashBytes((uint32_t *) >dst + 1, - ((uint32_t *) (key + 1) - - (uint32_t *) (>dst + 1)), - hash); -return hash; -} - static UINT8 OvsReverseIcmpType(UINT8 type) { @@ -642,7 +646,7 @@ OvsCtSetupLookupCtx(OvsFlowKey *flowKey, OvsCtKeyReverse(>key); } -ctx->hash = OvsHashCtKey(>key); +ctx->hash = OvsCtHashKey(>key); return NDIS_STATUS_SUCCESS; } @@ -953,7 +957,6 @@ OvsExecuteConntrackAction(OvsForwardingContext *fwdCtx, OvsFlowKey *key, const PNL_ATTR a) { -PNL_ATTR ctAttr; BOOLEAN commit = FALSE; BOOLEAN force = FALSE; BOOLEAN postUpdateEvent = FALSE; @@ -973,109 +976,110 @@ OvsExecuteConntrackAction(OvsForwardingContext *fwdCtx, return status; } -/* XXX Convert this to NL_ATTR_FOR_EACH */ -ctAttr = NlAttrFindNested(a, OVS_CT_ATTR_ZONE); -if (ctAttr) { -zone = NlAttrGetU16(ctAttr); -} -ctAttr = NlAttrFindNested(a, OVS_CT_ATTR_COMMIT); -if (ctAttr) { -commit = TRUE; -} -ctAttr = NlAttrFindNested(a, OVS_CT_ATTR_MARK); -if (ctAttr) { -mark = NlAttrGet(ctAttr); -} -ctAttr = NlAttrFindNested(a, OVS_CT_ATTR_LABELS); -if (ctAttr) { -labels = NlAttrGet(ctAttr); -} -natActionInfo.natAction = NAT_ACTION_NONE; -ctAttr = NlAttrFindNested(a, OVS_CT_ATTR_NAT); -if (ctAttr) { -/* Pares Nested NAT attributes. */ -PNL_ATTR natAttr; -unsigned int left; -BOOLEAN hasMinIp = FALSE; -BOOLEAN hasMinPort = FALSE; -BOOLEAN hasMaxIp = FALSE; -BOOLEAN hasMaxPort = FALSE; -NL_NESTED_FOR_EACH_UNSAFE (natAttr, left, ctAttr) { -enum ovs_nat_attr subtype = NlAttrType(natAttr); -switch(subtype) { -case OVS_NAT_ATTR_SRC: -case OVS_NAT_ATTR_DST: -natActionInfo.natAction |= -((subtype == OVS_NAT_ATTR_SRC) -? NAT_ACTION_SRC : NAT_ACTION_DST); +PNL_ATTR ctAttr = NULL; +INT left; + +NL_NESTED_FOR_EACH (ctAttr, left, a) { +switch(NlAttrType(ctAttr)) { +case OVS_CT_ATTR_ZONE: +zone = NlAttrGetU16(ctAttr); +break; +case OVS_CT_ATTR_COMMIT: +commit = TRUE; +break; +case OVS_CT_ATTR_MARK: +mark = NlAttrGet(ctAttr); break; -case OVS_NAT_ATTR_IP_MIN: -memcpy(, - NlAttrData(natAttr), NlAt
[ovs-dev] [PATCH v2 2/3] datapath-windows: Implement locking in conntrack NAT.
This patch primarily replaces existing ndis RWlock based implementaion for NAT in conntrack with a spinlock based implementation inside NAT, module along with some conntrack optimization. - The 'ovsNatTable' and 'ovsUnNatTable' tables are shared between cleanup threads and packet processing thread. In order to protect these two tables use a spinlock. Also introduce counters to track number of nat entries. - Introduce a new function OvsGetTcpHeader() to retrieve TCP header and payload length, to optimize for TCP traffic. - Optimize conntrack look up. - Remove 'bucketlockRef' member from conntrack entry structure. Testing: Verified loading/unloading the driver with driver verified enabled. Ran TCP/UDP and ICMP traffic. Signed-off-by: Anand Kumar --- v1->v2: Merge patch 2 and 3 so that NAT locks related changes are in a single patch. --- datapath-windows/ovsext/Conntrack-ftp.c | 4 +- datapath-windows/ovsext/Conntrack-nat.c | 27 +++- datapath-windows/ovsext/Conntrack-tcp.c | 15 ++--- datapath-windows/ovsext/Conntrack.c | 110 +--- datapath-windows/ovsext/Conntrack.h | 36 +++ 5 files changed, 100 insertions(+), 92 deletions(-) diff --git a/datapath-windows/ovsext/Conntrack-ftp.c b/datapath-windows/ovsext/Conntrack-ftp.c index 6830dfa..ce09a65 100644 --- a/datapath-windows/ovsext/Conntrack-ftp.c +++ b/datapath-windows/ovsext/Conntrack-ftp.c @@ -129,14 +129,14 @@ OvsCtHandleFtp(PNET_BUFFER_LIST curNbl, char temp[256] = { 0 }; char ftpMsg[256] = { 0 }; +UINT32 len; TCPHdr tcpStorage; const TCPHdr *tcp; -tcp = OvsGetTcp(curNbl, layers->l4Offset, ); +tcp = OvsGetTcpHeader(curNbl, layers, , ); if (!tcp) { return NDIS_STATUS_INVALID_PACKET; } -UINT32 len = OvsGetTcpPayloadLength(curNbl); if (len > sizeof(temp)) { /* We only care up to 256 */ len = sizeof(temp); diff --git a/datapath-windows/ovsext/Conntrack-nat.c b/datapath-windows/ovsext/Conntrack-nat.c index da1814f..11057e6 100644 --- a/datapath-windows/ovsext/Conntrack-nat.c +++ b/datapath-windows/ovsext/Conntrack-nat.c @@ -3,7 +3,8 @@ PLIST_ENTRY ovsNatTable = NULL; PLIST_ENTRY ovsUnNatTable = NULL; - +static NDIS_SPIN_LOCK ovsCtNatLock; +static ULONG ovsNatEntries; /* *--- * OvsHashNatKey @@ -109,6 +110,8 @@ NTSTATUS OvsNatInit() InitializeListHead([i]); } +NdisAllocateSpinLock(); +ovsNatEntries = 0; return STATUS_SUCCESS; } @@ -121,6 +124,11 @@ NTSTATUS OvsNatInit() VOID OvsNatFlush(UINT16 zone) { PLIST_ENTRY link, next; +if (!ovsNatEntries) { +return; +} + +NdisAcquireSpinLock(); for (int i = 0; i < NAT_HASH_TABLE_SIZE; i++) { LIST_FORALL_SAFE([i], link, next) { POVS_NAT_ENTRY entry = @@ -131,6 +139,7 @@ VOID OvsNatFlush(UINT16 zone) } } } +NdisReleaseSpinLock(); } /* @@ -144,10 +153,14 @@ VOID OvsNatCleanup() if (ovsNatTable == NULL) { return; } + +NdisAcquireSpinLock(); OvsFreeMemoryWithTag(ovsNatTable, OVS_CT_POOL_TAG); OvsFreeMemoryWithTag(ovsUnNatTable, OVS_CT_POOL_TAG); ovsNatTable = NULL; ovsUnNatTable = NULL; +NdisReleaseSpinLock(); +NdisFreeSpinLock(); } /* @@ -250,10 +263,13 @@ static UINT32 OvsNatHashRange(const OVS_CT_ENTRY *entry, UINT32 basis) VOID OvsNatAddEntry(OVS_NAT_ENTRY* entry) { +NdisAcquireSpinLock(); InsertHeadList(OvsNatGetBucket(>key, FALSE), >link); InsertHeadList(OvsNatGetBucket(>value, TRUE), >reverseLink); +NdisReleaseSpinLock(); +NdisInterlockedIncrement((PLONG)); } /* @@ -399,21 +415,29 @@ OvsNatLookup(const OVS_CT_KEY *ctKey, BOOLEAN reverse) PLIST_ENTRY link; POVS_NAT_ENTRY entry; +if (!ovsNatEntries) { +return NULL; +} + +NdisAcquireSpinLock(); LIST_FORALL(OvsNatGetBucket(ctKey, reverse), link) { if (reverse) { entry = CONTAINING_RECORD(link, OVS_NAT_ENTRY, reverseLink); if (OvsNatKeyAreSame(ctKey, >value)) { +NdisReleaseSpinLock(); return entry; } } else { entry = CONTAINING_RECORD(link, OVS_NAT_ENTRY, link); if (OvsNatKeyAreSame(ctKey, >key)) { +NdisReleaseSpinLock(); return entry; } } } +NdisReleaseSpinLock(); return NULL; } @@ -432,6 +456,7 @@ OvsNatDeleteEntry(POVS_NAT_ENTRY entry) RemoveEntryList(>link); RemoveEntryList(>reverseLink); OvsFreeMemoryWithTag(entry, OVS_CT_POOL_TAG); +NdisInterlockedDecrement((PLONG)); } /* diff --git a/datapath-windows/ovsext/Conntrack-tcp.c b/datapath-windows/ovsext/Conntrack-tcp.c index 8cbab24..eda42ac 100644 --- a/dat
[ovs-dev] [PATCH v2 1/3] datapath-windows: Use spinlock instead of RW lock for ct entry
This patch mainly changes a ndis RW lock for conntrack entry to a spinlock along with some minor refactor in conntrack. Using spinlock instead of RW lock as RW locks causes performance hits when acquired/released multiple times. - Use NdisInterlockedXX wrapper api's instead of InterlockedXX. - Update 'ctTotalRelatedEntries' using interlocked functions. - Move conntrack lock out of NAT module. Testing: Verified loading/unloading the driver with driver verified enabled. Ran TCP/UDP and ICMP traffic. Signed-off-by: Anand Kumar --- v1->v2: Calculate the dispatch level only in cases where the locks are being acquired multiple times within a given context and minor style change. --- datapath-windows/ovsext/Conntrack-nat.c | 7 +- datapath-windows/ovsext/Conntrack-related.c | 17 ++-- datapath-windows/ovsext/Conntrack.c | 135 ++-- datapath-windows/ovsext/Conntrack.h | 2 +- datapath-windows/ovsext/Util.h | 18 5 files changed, 94 insertions(+), 85 deletions(-) diff --git a/datapath-windows/ovsext/Conntrack-nat.c b/datapath-windows/ovsext/Conntrack-nat.c index 316c946..da1814f 100644 --- a/datapath-windows/ovsext/Conntrack-nat.c +++ b/datapath-windows/ovsext/Conntrack-nat.c @@ -167,16 +167,13 @@ OvsNatPacket(OvsForwardingContext *ovsFwdCtx, { UINT32 natFlag; const struct ct_endpoint* endpoint; -LOCK_STATE_EX lockState; -/* XXX: Move conntrack locks out of NAT after implementing lock in NAT. */ -NdisAcquireRWLockRead(entry->lock, , 0); + /* When it is NAT, only entry->rev_key contains NATTED address; When it is unNAT, only entry->key contains the UNNATTED address;*/ const OVS_CT_KEY *ctKey = reverse ? >key : >rev_key; BOOLEAN isSrcNat; if (!(natAction & (NAT_ACTION_SRC | NAT_ACTION_DST))) { -NdisReleaseRWLock(entry->lock, ); return; } isSrcNat = (((natAction & NAT_ACTION_SRC) && !reverse) || @@ -206,7 +203,6 @@ OvsNatPacket(OvsForwardingContext *ovsFwdCtx, } } else if (ctKey->dl_type == htons(ETH_TYPE_IPV6)){ // XXX: IPv6 packet not supported yet. -NdisReleaseRWLock(entry->lock, ); return; } if (natAction & (NAT_ACTION_SRC_PORT | NAT_ACTION_DST_PORT)) { @@ -220,7 +216,6 @@ OvsNatPacket(OvsForwardingContext *ovsFwdCtx, } } } -NdisReleaseRWLock(entry->lock, ); } diff --git a/datapath-windows/ovsext/Conntrack-related.c b/datapath-windows/ovsext/Conntrack-related.c index ec4b536..b798137 100644 --- a/datapath-windows/ovsext/Conntrack-related.c +++ b/datapath-windows/ovsext/Conntrack-related.c @@ -18,7 +18,7 @@ #include "Jhash.h" static PLIST_ENTRY ovsCtRelatedTable; /* Holds related entries */ -static UINT64 ctTotalRelatedEntries; +static ULONG ctTotalRelatedEntries; static OVS_CT_THREAD_CTX ctRelThreadCtx; static PNDIS_RW_LOCK_EX ovsCtRelatedLockObj; extern POVS_SWITCH_CONTEXT gOvsSwitchContext; @@ -75,13 +75,11 @@ OvsCtRelatedLookup(OVS_CT_KEY key, UINT64 currentTime) POVS_CT_REL_ENTRY entry; LOCK_STATE_EX lockState; -NdisAcquireRWLockRead(ovsCtRelatedLockObj, , 0); - if (!ctTotalRelatedEntries) { -NdisReleaseRWLock(ovsCtRelatedLockObj, ); return NULL; } +NdisAcquireRWLockRead(ovsCtRelatedLockObj, , 0); for (int i = 0; i < CT_HASH_TABLE_SIZE; i++) { /* XXX - Scan the table based on the hash instead */ LIST_FORALL_SAFE([i], link, next) { @@ -103,7 +101,7 @@ OvsCtRelatedEntryDelete(POVS_CT_REL_ENTRY entry) { RemoveEntryList(>link); OvsFreeMemoryWithTag(entry, OVS_CT_POOL_TAG); -ctTotalRelatedEntries--; +NdisInterlockedDecrement((PLONG)); } NDIS_STATUS @@ -139,7 +137,7 @@ OvsCtRelatedEntryCreate(UINT8 ipProto, NdisAcquireRWLockWrite(ovsCtRelatedLockObj, , 0); InsertHeadList([hash & CT_HASH_TABLE_MASK], >link); -ctTotalRelatedEntries++; +NdisInterlockedIncrement((PLONG)); NdisReleaseRWLock(ovsCtRelatedLockObj, ); return NDIS_STATUS_SUCCESS; @@ -150,11 +148,10 @@ OvsCtRelatedFlush() { PLIST_ENTRY link, next; POVS_CT_REL_ENTRY entry; - LOCK_STATE_EX lockState; -NdisAcquireRWLockWrite(ovsCtRelatedLockObj, , 0); if (ctTotalRelatedEntries) { +NdisAcquireRWLockWrite(ovsCtRelatedLockObj, , 0); for (int i = 0; i < CT_HASH_TABLE_SIZE; i++) { LIST_FORALL_SAFE([i], link, next) { entry = CONTAINING_RECORD(link, OVS_CT_REL_ENTRY, link); @@ -189,9 +186,8 @@ OvsCtRelatedEntryCleaner(PVOID data) /* Lock has been freed by 'OvsCleanupCtRelated()' */ break; } -NdisAcquireRWLockWrite(ovsCtRelatedLockObj, , 0); + if (context->exit) { -NdisReleaseRWLock(ovsCtRelatedLockObj, ); break; } @@ -201,6 +197,7 @@
[ovs-dev] [PATCH v2 0/3] Optimize conntrack performance
This patch series is primarily to refactor conntrack code for better throughput with conntrack. With this patch series TCP throughput with conntrack increased by ~50%. Anand Kumar (3): datapath-windows: Use spinlock instead of RW lock for ct entry datapath-windows: Implement locking in conntrack NAT. datapath-windows: Compute ct hash based on 5-tuple and zone datapath-windows/ovsext/Conntrack-ftp.c | 4 +- datapath-windows/ovsext/Conntrack-nat.c | 34 +- datapath-windows/ovsext/Conntrack-related.c | 17 +- datapath-windows/ovsext/Conntrack-tcp.c | 15 +- datapath-windows/ovsext/Conntrack.c | 469 +--- datapath-windows/ovsext/Conntrack.h | 40 ++- datapath-windows/ovsext/Util.h | 18 ++ 7 files changed, 308 insertions(+), 289 deletions(-) -- 2.9.3.windows.1 ___ dev mailing list d...@openvswitch.org https://mail.openvswitch.org/mailman/listinfo/ovs-dev
Re: [ovs-dev] [PATCH v4 3/4] datapath-windows: Implement locking in conntrack NAT.
Hi Shashank, I will address this in next version of the patch. Thanks, Anand Kumar On 6/18/18, 2:36 PM, "Shashank Ram" wrote: This patch should be combined with the patch where NAT lock is removed from CT. Keeping this separate will cause the previous patches in this series to break NAT functionality. Thanks, Shashank On 06/17/2018 10:37 PM, Anand Kumar wrote: > The 'ovsNatTable' and 'ovsUnNatTable' tables are shared > between cleanup threads and packet processing thread. > In order to protect these two tables use a spinlock. > > Also introduce counters to track number of nat entries. > > Signed-off-by: Anand Kumar > --- > datapath-windows/ovsext/Conntrack-nat.c | 27 ++- > 1 file changed, 26 insertions(+), 1 deletion(-) > > diff --git a/datapath-windows/ovsext/Conntrack-nat.c b/datapath-windows/ovsext/Conntrack-nat.c > index da1814f..11057e6 100644 > --- a/datapath-windows/ovsext/Conntrack-nat.c > +++ b/datapath-windows/ovsext/Conntrack-nat.c > @@ -3,7 +3,8 @@ > > PLIST_ENTRY ovsNatTable = NULL; > PLIST_ENTRY ovsUnNatTable = NULL; > - > +static NDIS_SPIN_LOCK ovsCtNatLock; > +static ULONG ovsNatEntries; > /* > *--- >* OvsHashNatKey > @@ -109,6 +110,8 @@ NTSTATUS OvsNatInit() > InitializeListHead([i]); > } > > +NdisAllocateSpinLock(); > +ovsNatEntries = 0; > return STATUS_SUCCESS; > } > > @@ -121,6 +124,11 @@ NTSTATUS OvsNatInit() > VOID OvsNatFlush(UINT16 zone) > { > PLIST_ENTRY link, next; > +if (!ovsNatEntries) { > +return; > +} > + > +NdisAcquireSpinLock(); > for (int i = 0; i < NAT_HASH_TABLE_SIZE; i++) { > LIST_FORALL_SAFE([i], link, next) { > POVS_NAT_ENTRY entry = > @@ -131,6 +139,7 @@ VOID OvsNatFlush(UINT16 zone) > } > } > } > +NdisReleaseSpinLock(); > } > > /* > @@ -144,10 +153,14 @@ VOID OvsNatCleanup() > if (ovsNatTable == NULL) { > return; > } > + > +NdisAcquireSpinLock(); > OvsFreeMemoryWithTag(ovsNatTable, OVS_CT_POOL_TAG); > OvsFreeMemoryWithTag(ovsUnNatTable, OVS_CT_POOL_TAG); > ovsNatTable = NULL; > ovsUnNatTable = NULL; > +NdisReleaseSpinLock(); > +NdisFreeSpinLock(); > } > > /* > @@ -250,10 +263,13 @@ static UINT32 OvsNatHashRange(const OVS_CT_ENTRY *entry, UINT32 basis) > VOID > OvsNatAddEntry(OVS_NAT_ENTRY* entry) > { > +NdisAcquireSpinLock(); > InsertHeadList(OvsNatGetBucket(>key, FALSE), > >link); > InsertHeadList(OvsNatGetBucket(>value, TRUE), > >reverseLink); > +NdisReleaseSpinLock(); > +NdisInterlockedIncrement((PLONG)); > } > > /* > @@ -399,21 +415,29 @@ OvsNatLookup(const OVS_CT_KEY *ctKey, BOOLEAN reverse) > PLIST_ENTRY link; > POVS_NAT_ENTRY entry; > > +if (!ovsNatEntries) { > +return NULL; > +} > + > +NdisAcquireSpinLock(); > LIST_FORALL(OvsNatGetBucket(ctKey, reverse), link) { > if (reverse) { > entry = CONTAINING_RECORD(link, OVS_NAT_ENTRY, reverseLink); > > if (OvsNatKeyAreSame(ctKey, >value)) { > +NdisReleaseSpinLock(); > return entry; > } > } else { > entry = CONTAINING_RECORD(link, OVS_NAT_ENTRY, link); > > if (OvsNatKeyAreSame(ctKey, >key)) { > +NdisReleaseSpinLock(); > return entry; > } > } > } > +NdisReleaseSpinLock(); > return NULL; > } > > @@ -432,6 +456,7 @@ OvsNatDeleteEntry(POVS_NAT_ENTRY entry) > RemoveEntryList(>link); > RemoveEntryList(>reverseLink); > OvsFreeMemoryWithTag(entry, OVS_CT_POOL_TAG); > +NdisInterlockedDecrement((PLONG)); > } > > /* ___ dev mailing list d...@openvswitch.org https://mail.openvswitch.org/mailman/listinfo/ovs-dev
Re: [ovs-dev] [PATCH v4 2/4] datapath-windows: Remove NAT locks in conntrack.
Hi Shashank, Thanks for the review. Please find my response inline. Thanks, Anand Kumar On 6/18/18, 11:54 AM, "Shashank Ram" wrote: On 06/17/2018 10:37 PM, Anand Kumar wrote: > This patch primarily gets rid of NdisRWLock in conntrack for NAT > functionality along with some conntrack optimization. The subsequent > patch will have a lock implementation inside NAT module. > > - Introduce a new function OvsGetTcpHeader() to retrieve TCP header >and payload length, to optimize for TCP traffic. > - Optimize conntrack look up. > - Remove 'bucketlockRef' member from conntrack entry structure. > > Signed-off-by: Anand Kumar > --- > datapath-windows/ovsext/Conntrack-ftp.c | 4 +- > datapath-windows/ovsext/Conntrack-tcp.c | 15 ++--- > datapath-windows/ovsext/Conntrack.c | 110 +--- > datapath-windows/ovsext/Conntrack.h | 36 +++ > 4 files changed, 74 insertions(+), 91 deletions(-) > > diff --git a/datapath-windows/ovsext/Conntrack-ftp.c b/datapath-windows/ovsext/Conntrack-ftp.c > index 6830dfa..ce09a65 100644 > --- a/datapath-windows/ovsext/Conntrack-ftp.c > +++ b/datapath-windows/ovsext/Conntrack-ftp.c > @@ -129,14 +129,14 @@ OvsCtHandleFtp(PNET_BUFFER_LIST curNbl, > char temp[256] = { 0 }; > char ftpMsg[256] = { 0 }; > > +UINT32 len; > TCPHdr tcpStorage; > const TCPHdr *tcp; > -tcp = OvsGetTcp(curNbl, layers->l4Offset, ); > +tcp = OvsGetTcpHeader(curNbl, layers, , ); > if (!tcp) { > return NDIS_STATUS_INVALID_PACKET; > } > > -UINT32 len = OvsGetTcpPayloadLength(curNbl); > if (len > sizeof(temp)) { > /* We only care up to 256 */ > len = sizeof(temp); > diff --git a/datapath-windows/ovsext/Conntrack-tcp.c b/datapath-windows/ovsext/Conntrack-tcp.c > index 8cbab24..eda42ac 100644 > --- a/datapath-windows/ovsext/Conntrack-tcp.c > +++ b/datapath-windows/ovsext/Conntrack-tcp.c > @@ -194,9 +194,9 @@ OvsCastConntrackEntryToTcpEntry(OVS_CT_ENTRY* conn) > enum CT_UPDATE_RES > OvsConntrackUpdateTcpEntry(OVS_CT_ENTRY* conn_, > const TCPHdr *tcp, > - PNET_BUFFER_LIST nbl, > BOOLEAN reply, > - UINT64 now) > + UINT64 now, > + UINT32 tcpPayloadLen) > { > struct conn_tcp *conn = OvsCastConntrackEntryToTcpEntry(conn_); > /* The peer that sent 'pkt' */ > @@ -207,7 +207,6 @@ OvsConntrackUpdateTcpEntry(OVS_CT_ENTRY* conn_, > UINT16 tcp_flags = ntohs(tcp->flags); > uint16_t win = ntohs(tcp->window); > uint32_t ack, end, seq, orig_seq; > -uint32_t p_len = OvsGetTcpPayloadLength(nbl); > int ackskew; > > if (OvsCtInvalidTcpFlags(tcp_flags)) { > @@ -248,7 +247,7 @@ OvsConntrackUpdateTcpEntry(OVS_CT_ENTRY* conn_, > > ack = ntohl(tcp->ack_seq); > > -end = seq + p_len; > +end = seq + tcpPayloadLen; > if (tcp_flags & TCP_SYN) { > end++; > if (dst->wscale & CT_WSCALE_FLAG) { > @@ -287,7 +286,7 @@ OvsConntrackUpdateTcpEntry(OVS_CT_ENTRY* conn_, > > } else { > ack = ntohl(tcp->ack_seq); > -end = seq + p_len; > +end = seq + tcpPayloadLen; > if (tcp_flags & TCP_SYN) { > end++; > } > @@ -469,8 +468,8 @@ OvsConntrackValidateTcpPacket(const TCPHdr *tcp) > > OVS_CT_ENTRY * > OvsConntrackCreateTcpEntry(const TCPHdr *tcp, > - PNET_BUFFER_LIST nbl, > - UINT64 now) > + UINT64 now, > + UINT32 tcpPayloadLen) > { > struct conn_tcp* newconn; > struct tcp_peer *src, *dst; > @@ -486,7 +485,7 @@ OvsConntrackCreateTcpEntry(const TCPHdr *tcp, > dst = >peer[1]; > > src->seqlo = ntohl(tcp->seq); > -src->seqhi = src->seqlo + OvsGetTcpPayloadLength(nbl) + 1; > +src->seqhi = src->seqlo + tcpPayloadLen + 1; > > if (tcp->flags & TCP_SYN) { > src->seqhi++; > diff --git a/datapath-windows/ovsext/Conntrack.c b/dat
Re: [ovs-dev] [PATCH v4 1/4] datapath-windows: Use spinlock instead of RW lock for ct entry
Hi Shashank, Thanks for the review. Please find my response inline. Thanks, Anand Kumar From: Shashank Ram Date: Monday, June 18, 2018 at 11:27 AM To: Anand Kumar , "d...@openvswitch.org" Subject: Re: [ovs-dev] [PATCH v4 1/4] datapath-windows: Use spinlock instead of RW lock for ct entry On 06/17/2018 10:37 PM, Anand Kumar wrote: This patch mainly changes a ndis RW lock for conntrack entry to a spinlock along with some minor refactor in conntrack. Using spinlock instead of RW lock as RW locks causes performance hits when acquired/released multiple times. - Use NdisInterlockedXX wrapper api's instead of InterlockedXX. - Update 'ctTotalRelatedEntries' using interlocked functions. - Move conntrack lock out of NAT module. Signed-off-by: Anand Kumar <mailto:kumaran...@vmware.com> --- datapath-windows/ovsext/Conntrack-nat.c | 7 +- datapath-windows/ovsext/Conntrack-related.c | 17 ++-- datapath-windows/ovsext/Conntrack.c | 134 ++-- datapath-windows/ovsext/Conntrack.h | 2 +- datapath-windows/ovsext/Util.h | 18 5 files changed, 93 insertions(+), 85 deletions(-) diff --git a/datapath-windows/ovsext/Conntrack-nat.c b/datapath-windows/ovsext/Conntrack-nat.c index 316c946..da1814f 100644 --- a/datapath-windows/ovsext/Conntrack-nat.c +++ b/datapath-windows/ovsext/Conntrack-nat.c @@ -167,16 +167,13 @@ OvsNatPacket(OvsForwardingContext *ovsFwdCtx, { UINT32 natFlag; const struct ct_endpoint* endpoint; -LOCK_STATE_EX lockState; -/* XXX: Move conntrack locks out of NAT after implementing lock in NAT. */ -NdisAcquireRWLockRead(entry->lock, , 0); + /* When it is NAT, only entry->rev_key contains NATTED address; When it is unNAT, only entry->key contains the UNNATTED address;*/ const OVS_CT_KEY *ctKey = reverse ? >key : >rev_key; BOOLEAN isSrcNat; if (!(natAction & (NAT_ACTION_SRC | NAT_ACTION_DST))) { -NdisReleaseRWLock(entry->lock, ); return; } isSrcNat = (((natAction & NAT_ACTION_SRC) && !reverse) || @@ -206,7 +203,6 @@ OvsNatPacket(OvsForwardingContext *ovsFwdCtx, } } else if (ctKey->dl_type == htons(ETH_TYPE_IPV6)){ // XXX: IPv6 packet not supported yet. -NdisReleaseRWLock(entry->lock, ); return; } if (natAction & (NAT_ACTION_SRC_PORT | NAT_ACTION_DST_PORT)) { @@ -220,7 +216,6 @@ OvsNatPacket(OvsForwardingContext *ovsFwdCtx, } } } -NdisReleaseRWLock(entry->lock, ); } diff --git a/datapath-windows/ovsext/Conntrack-related.c b/datapath-windows/ovsext/Conntrack-related.c index ec4b536..b798137 100644 --- a/datapath-windows/ovsext/Conntrack-related.c +++ b/datapath-windows/ovsext/Conntrack-related.c @@ -18,7 +18,7 @@ #include "Jhash.h" static PLIST_ENTRY ovsCtRelatedTable; /* Holds related entries */ -static UINT64 ctTotalRelatedEntries; +static ULONG ctTotalRelatedEntries; static OVS_CT_THREAD_CTX ctRelThreadCtx; static PNDIS_RW_LOCK_EX ovsCtRelatedLockObj; extern POVS_SWITCH_CONTEXT gOvsSwitchContext; @@ -75,13 +75,11 @@ OvsCtRelatedLookup(OVS_CT_KEY key, UINT64 currentTime) POVS_CT_REL_ENTRY entry; LOCK_STATE_EX lockState; -NdisAcquireRWLockRead(ovsCtRelatedLockObj, , 0); - if (!ctTotalRelatedEntries) { -NdisReleaseRWLock(ovsCtRelatedLockObj, ); return NULL; } +NdisAcquireRWLockRead(ovsCtRelatedLockObj, , 0); for (int i = 0; i < CT_HASH_TABLE_SIZE; i++) { /* XXX - Scan the table based on the hash instead */ LIST_FORALL_SAFE([i], link, next) { @@ -103,7 +101,7 @@ OvsCtRelatedEntryDelete(POVS_CT_REL_ENTRY entry) { RemoveEntryList(>link); OvsFreeMemoryWithTag(entry, OVS_CT_POOL_TAG); -ctTotalRelatedEntries--; +NdisInterlockedDecrement((PLONG)); } NDIS_STATUS @@ -139,7 +137,7 @@ OvsCtRelatedEntryCreate(UINT8 ipProto, NdisAcquireRWLockWrite(ovsCtRelatedLockObj, , 0); InsertHeadList([hash & CT_HASH_TABLE_MASK], >link); -ctTotalRelatedEntries++; +NdisInterlockedIncrement((PLONG)); NdisReleaseRWLock(ovsCtRelatedLockObj, ); return NDIS_STATUS_SUCCESS; @@ -150,11 +148,10 @@ OvsCtRelatedFlush() { PLIST_ENTRY link, next; POVS_CT_REL_ENTRY entry; - LOCK_STATE_EX lockState; -NdisAcquireRWLockWrite(ovsCtRelatedLockObj, , 0); if (ctTotalRelatedEntries) { +NdisAcquireRWLockWrite(ovsCtRelatedLockObj, , 0); for (int i = 0; i < CT_HASH_TABLE_SIZE; i++) { LIST_FORALL_SAFE([i], link, next) { entry = CONTAINING_RECORD(link, OVS_CT_REL_ENTRY, link); @@ -189,9 +186,8 @@ OvsCtRelatedEntryCleaner(PVOID data) /* Lock has been freed by 'OvsCleanupCtRelated()' */
[ovs-dev] [PATCH v4 3/4] datapath-windows: Implement locking in conntrack NAT.
The 'ovsNatTable' and 'ovsUnNatTable' tables are shared between cleanup threads and packet processing thread. In order to protect these two tables use a spinlock. Also introduce counters to track number of nat entries. Signed-off-by: Anand Kumar --- datapath-windows/ovsext/Conntrack-nat.c | 27 ++- 1 file changed, 26 insertions(+), 1 deletion(-) diff --git a/datapath-windows/ovsext/Conntrack-nat.c b/datapath-windows/ovsext/Conntrack-nat.c index da1814f..11057e6 100644 --- a/datapath-windows/ovsext/Conntrack-nat.c +++ b/datapath-windows/ovsext/Conntrack-nat.c @@ -3,7 +3,8 @@ PLIST_ENTRY ovsNatTable = NULL; PLIST_ENTRY ovsUnNatTable = NULL; - +static NDIS_SPIN_LOCK ovsCtNatLock; +static ULONG ovsNatEntries; /* *--- * OvsHashNatKey @@ -109,6 +110,8 @@ NTSTATUS OvsNatInit() InitializeListHead([i]); } +NdisAllocateSpinLock(); +ovsNatEntries = 0; return STATUS_SUCCESS; } @@ -121,6 +124,11 @@ NTSTATUS OvsNatInit() VOID OvsNatFlush(UINT16 zone) { PLIST_ENTRY link, next; +if (!ovsNatEntries) { +return; +} + +NdisAcquireSpinLock(); for (int i = 0; i < NAT_HASH_TABLE_SIZE; i++) { LIST_FORALL_SAFE([i], link, next) { POVS_NAT_ENTRY entry = @@ -131,6 +139,7 @@ VOID OvsNatFlush(UINT16 zone) } } } +NdisReleaseSpinLock(); } /* @@ -144,10 +153,14 @@ VOID OvsNatCleanup() if (ovsNatTable == NULL) { return; } + +NdisAcquireSpinLock(); OvsFreeMemoryWithTag(ovsNatTable, OVS_CT_POOL_TAG); OvsFreeMemoryWithTag(ovsUnNatTable, OVS_CT_POOL_TAG); ovsNatTable = NULL; ovsUnNatTable = NULL; +NdisReleaseSpinLock(); +NdisFreeSpinLock(); } /* @@ -250,10 +263,13 @@ static UINT32 OvsNatHashRange(const OVS_CT_ENTRY *entry, UINT32 basis) VOID OvsNatAddEntry(OVS_NAT_ENTRY* entry) { +NdisAcquireSpinLock(); InsertHeadList(OvsNatGetBucket(>key, FALSE), >link); InsertHeadList(OvsNatGetBucket(>value, TRUE), >reverseLink); +NdisReleaseSpinLock(); +NdisInterlockedIncrement((PLONG)); } /* @@ -399,21 +415,29 @@ OvsNatLookup(const OVS_CT_KEY *ctKey, BOOLEAN reverse) PLIST_ENTRY link; POVS_NAT_ENTRY entry; +if (!ovsNatEntries) { +return NULL; +} + +NdisAcquireSpinLock(); LIST_FORALL(OvsNatGetBucket(ctKey, reverse), link) { if (reverse) { entry = CONTAINING_RECORD(link, OVS_NAT_ENTRY, reverseLink); if (OvsNatKeyAreSame(ctKey, >value)) { +NdisReleaseSpinLock(); return entry; } } else { entry = CONTAINING_RECORD(link, OVS_NAT_ENTRY, link); if (OvsNatKeyAreSame(ctKey, >key)) { +NdisReleaseSpinLock(); return entry; } } } +NdisReleaseSpinLock(); return NULL; } @@ -432,6 +456,7 @@ OvsNatDeleteEntry(POVS_NAT_ENTRY entry) RemoveEntryList(>link); RemoveEntryList(>reverseLink); OvsFreeMemoryWithTag(entry, OVS_CT_POOL_TAG); +NdisInterlockedDecrement((PLONG)); } /* -- 2.9.3.windows.1 ___ dev mailing list d...@openvswitch.org https://mail.openvswitch.org/mailman/listinfo/ovs-dev
[ovs-dev] [PATCH v4 2/4] datapath-windows: Remove NAT locks in conntrack.
This patch primarily gets rid of NdisRWLock in conntrack for NAT functionality along with some conntrack optimization. The subsequent patch will have a lock implementation inside NAT module. - Introduce a new function OvsGetTcpHeader() to retrieve TCP header and payload length, to optimize for TCP traffic. - Optimize conntrack look up. - Remove 'bucketlockRef' member from conntrack entry structure. Signed-off-by: Anand Kumar --- datapath-windows/ovsext/Conntrack-ftp.c | 4 +- datapath-windows/ovsext/Conntrack-tcp.c | 15 ++--- datapath-windows/ovsext/Conntrack.c | 110 +--- datapath-windows/ovsext/Conntrack.h | 36 +++ 4 files changed, 74 insertions(+), 91 deletions(-) diff --git a/datapath-windows/ovsext/Conntrack-ftp.c b/datapath-windows/ovsext/Conntrack-ftp.c index 6830dfa..ce09a65 100644 --- a/datapath-windows/ovsext/Conntrack-ftp.c +++ b/datapath-windows/ovsext/Conntrack-ftp.c @@ -129,14 +129,14 @@ OvsCtHandleFtp(PNET_BUFFER_LIST curNbl, char temp[256] = { 0 }; char ftpMsg[256] = { 0 }; +UINT32 len; TCPHdr tcpStorage; const TCPHdr *tcp; -tcp = OvsGetTcp(curNbl, layers->l4Offset, ); +tcp = OvsGetTcpHeader(curNbl, layers, , ); if (!tcp) { return NDIS_STATUS_INVALID_PACKET; } -UINT32 len = OvsGetTcpPayloadLength(curNbl); if (len > sizeof(temp)) { /* We only care up to 256 */ len = sizeof(temp); diff --git a/datapath-windows/ovsext/Conntrack-tcp.c b/datapath-windows/ovsext/Conntrack-tcp.c index 8cbab24..eda42ac 100644 --- a/datapath-windows/ovsext/Conntrack-tcp.c +++ b/datapath-windows/ovsext/Conntrack-tcp.c @@ -194,9 +194,9 @@ OvsCastConntrackEntryToTcpEntry(OVS_CT_ENTRY* conn) enum CT_UPDATE_RES OvsConntrackUpdateTcpEntry(OVS_CT_ENTRY* conn_, const TCPHdr *tcp, - PNET_BUFFER_LIST nbl, BOOLEAN reply, - UINT64 now) + UINT64 now, + UINT32 tcpPayloadLen) { struct conn_tcp *conn = OvsCastConntrackEntryToTcpEntry(conn_); /* The peer that sent 'pkt' */ @@ -207,7 +207,6 @@ OvsConntrackUpdateTcpEntry(OVS_CT_ENTRY* conn_, UINT16 tcp_flags = ntohs(tcp->flags); uint16_t win = ntohs(tcp->window); uint32_t ack, end, seq, orig_seq; -uint32_t p_len = OvsGetTcpPayloadLength(nbl); int ackskew; if (OvsCtInvalidTcpFlags(tcp_flags)) { @@ -248,7 +247,7 @@ OvsConntrackUpdateTcpEntry(OVS_CT_ENTRY* conn_, ack = ntohl(tcp->ack_seq); -end = seq + p_len; +end = seq + tcpPayloadLen; if (tcp_flags & TCP_SYN) { end++; if (dst->wscale & CT_WSCALE_FLAG) { @@ -287,7 +286,7 @@ OvsConntrackUpdateTcpEntry(OVS_CT_ENTRY* conn_, } else { ack = ntohl(tcp->ack_seq); -end = seq + p_len; +end = seq + tcpPayloadLen; if (tcp_flags & TCP_SYN) { end++; } @@ -469,8 +468,8 @@ OvsConntrackValidateTcpPacket(const TCPHdr *tcp) OVS_CT_ENTRY * OvsConntrackCreateTcpEntry(const TCPHdr *tcp, - PNET_BUFFER_LIST nbl, - UINT64 now) + UINT64 now, + UINT32 tcpPayloadLen) { struct conn_tcp* newconn; struct tcp_peer *src, *dst; @@ -486,7 +485,7 @@ OvsConntrackCreateTcpEntry(const TCPHdr *tcp, dst = >peer[1]; src->seqlo = ntohl(tcp->seq); -src->seqhi = src->seqlo + OvsGetTcpPayloadLength(nbl) + 1; +src->seqhi = src->seqlo + tcpPayloadLen + 1; if (tcp->flags & TCP_SYN) { src->seqhi++; diff --git a/datapath-windows/ovsext/Conntrack.c b/datapath-windows/ovsext/Conntrack.c index 7b54fba..2a85e57 100644 --- a/datapath-windows/ovsext/Conntrack.c +++ b/datapath-windows/ovsext/Conntrack.c @@ -32,7 +32,6 @@ KSTART_ROUTINE OvsConntrackEntryCleaner; static PLIST_ENTRY ovsConntrackTable; static OVS_CT_THREAD_CTX ctThreadCtx; static PNDIS_RW_LOCK_EX *ovsCtBucketLock = NULL; -static PNDIS_RW_LOCK_EX ovsCtNatLockObj; extern POVS_SWITCH_CONTEXT gOvsSwitchContext; static ULONG ctTotalEntries; @@ -54,19 +53,11 @@ OvsInitConntrack(POVS_SWITCH_CONTEXT context) ctTotalEntries = 0; UINT32 numBucketLocks = CT_HASH_TABLE_SIZE; -/* Init the sync-lock */ -ovsCtNatLockObj = NdisAllocateRWLock(context->NdisFilterHandle); -if (ovsCtNatLockObj == NULL) { -return STATUS_INSUFFICIENT_RESOURCES; -} - /* Init the Hash Buffer */ ovsConntrackTable = OvsAllocateMemoryWithTag(sizeof(LIST_ENTRY) * CT_HASH_TABLE_SIZE, OVS_CT_POOL_TAG); if (ovsConntrackTable == NULL) { -NdisFreeRWLock(ovsCtNatLockObj); -ovsCtNatLockObj = NULL; return STATUS_
[ovs-dev] [PATCH v4 1/4] datapath-windows: Use spinlock instead of RW lock for ct entry
This patch mainly changes a ndis RW lock for conntrack entry to a spinlock along with some minor refactor in conntrack. Using spinlock instead of RW lock as RW locks causes performance hits when acquired/released multiple times. - Use NdisInterlockedXX wrapper api's instead of InterlockedXX. - Update 'ctTotalRelatedEntries' using interlocked functions. - Move conntrack lock out of NAT module. Signed-off-by: Anand Kumar --- datapath-windows/ovsext/Conntrack-nat.c | 7 +- datapath-windows/ovsext/Conntrack-related.c | 17 ++-- datapath-windows/ovsext/Conntrack.c | 134 ++-- datapath-windows/ovsext/Conntrack.h | 2 +- datapath-windows/ovsext/Util.h | 18 5 files changed, 93 insertions(+), 85 deletions(-) diff --git a/datapath-windows/ovsext/Conntrack-nat.c b/datapath-windows/ovsext/Conntrack-nat.c index 316c946..da1814f 100644 --- a/datapath-windows/ovsext/Conntrack-nat.c +++ b/datapath-windows/ovsext/Conntrack-nat.c @@ -167,16 +167,13 @@ OvsNatPacket(OvsForwardingContext *ovsFwdCtx, { UINT32 natFlag; const struct ct_endpoint* endpoint; -LOCK_STATE_EX lockState; -/* XXX: Move conntrack locks out of NAT after implementing lock in NAT. */ -NdisAcquireRWLockRead(entry->lock, , 0); + /* When it is NAT, only entry->rev_key contains NATTED address; When it is unNAT, only entry->key contains the UNNATTED address;*/ const OVS_CT_KEY *ctKey = reverse ? >key : >rev_key; BOOLEAN isSrcNat; if (!(natAction & (NAT_ACTION_SRC | NAT_ACTION_DST))) { -NdisReleaseRWLock(entry->lock, ); return; } isSrcNat = (((natAction & NAT_ACTION_SRC) && !reverse) || @@ -206,7 +203,6 @@ OvsNatPacket(OvsForwardingContext *ovsFwdCtx, } } else if (ctKey->dl_type == htons(ETH_TYPE_IPV6)){ // XXX: IPv6 packet not supported yet. -NdisReleaseRWLock(entry->lock, ); return; } if (natAction & (NAT_ACTION_SRC_PORT | NAT_ACTION_DST_PORT)) { @@ -220,7 +216,6 @@ OvsNatPacket(OvsForwardingContext *ovsFwdCtx, } } } -NdisReleaseRWLock(entry->lock, ); } diff --git a/datapath-windows/ovsext/Conntrack-related.c b/datapath-windows/ovsext/Conntrack-related.c index ec4b536..b798137 100644 --- a/datapath-windows/ovsext/Conntrack-related.c +++ b/datapath-windows/ovsext/Conntrack-related.c @@ -18,7 +18,7 @@ #include "Jhash.h" static PLIST_ENTRY ovsCtRelatedTable; /* Holds related entries */ -static UINT64 ctTotalRelatedEntries; +static ULONG ctTotalRelatedEntries; static OVS_CT_THREAD_CTX ctRelThreadCtx; static PNDIS_RW_LOCK_EX ovsCtRelatedLockObj; extern POVS_SWITCH_CONTEXT gOvsSwitchContext; @@ -75,13 +75,11 @@ OvsCtRelatedLookup(OVS_CT_KEY key, UINT64 currentTime) POVS_CT_REL_ENTRY entry; LOCK_STATE_EX lockState; -NdisAcquireRWLockRead(ovsCtRelatedLockObj, , 0); - if (!ctTotalRelatedEntries) { -NdisReleaseRWLock(ovsCtRelatedLockObj, ); return NULL; } +NdisAcquireRWLockRead(ovsCtRelatedLockObj, , 0); for (int i = 0; i < CT_HASH_TABLE_SIZE; i++) { /* XXX - Scan the table based on the hash instead */ LIST_FORALL_SAFE([i], link, next) { @@ -103,7 +101,7 @@ OvsCtRelatedEntryDelete(POVS_CT_REL_ENTRY entry) { RemoveEntryList(>link); OvsFreeMemoryWithTag(entry, OVS_CT_POOL_TAG); -ctTotalRelatedEntries--; +NdisInterlockedDecrement((PLONG)); } NDIS_STATUS @@ -139,7 +137,7 @@ OvsCtRelatedEntryCreate(UINT8 ipProto, NdisAcquireRWLockWrite(ovsCtRelatedLockObj, , 0); InsertHeadList([hash & CT_HASH_TABLE_MASK], >link); -ctTotalRelatedEntries++; +NdisInterlockedIncrement((PLONG)); NdisReleaseRWLock(ovsCtRelatedLockObj, ); return NDIS_STATUS_SUCCESS; @@ -150,11 +148,10 @@ OvsCtRelatedFlush() { PLIST_ENTRY link, next; POVS_CT_REL_ENTRY entry; - LOCK_STATE_EX lockState; -NdisAcquireRWLockWrite(ovsCtRelatedLockObj, , 0); if (ctTotalRelatedEntries) { +NdisAcquireRWLockWrite(ovsCtRelatedLockObj, , 0); for (int i = 0; i < CT_HASH_TABLE_SIZE; i++) { LIST_FORALL_SAFE([i], link, next) { entry = CONTAINING_RECORD(link, OVS_CT_REL_ENTRY, link); @@ -189,9 +186,8 @@ OvsCtRelatedEntryCleaner(PVOID data) /* Lock has been freed by 'OvsCleanupCtRelated()' */ break; } -NdisAcquireRWLockWrite(ovsCtRelatedLockObj, , 0); + if (context->exit) { -NdisReleaseRWLock(ovsCtRelatedLockObj, ); break; } @@ -201,6 +197,7 @@ OvsCtRelatedEntryCleaner(PVOID data) threadSleepTimeout = currentTime + CT_CLEANUP_INTERVAL; if (ctTotalRelatedEntries) { +NdisAcquireRWLockWrite(ovsCtRelatedLockObj, , 0); for (int i = 0; i < CT_HASH_TABLE_SIZE; i++) {
[ovs-dev] [PATCH v4 4/4] datapath-windows: Compute ct hash based on 5-tuple and zone
Conntrack 5-tuple consists of src address, dst address, src port, dst port and protocol which will be unique to a ct session. Use this information along with zone to compute hash. Also re-factor conntrack code related to parsing netlink attributes. Signed-off-by: Anand Kumar --- datapath-windows/ovsext/Conntrack.c | 228 ++-- datapath-windows/ovsext/Conntrack.h | 2 - 2 files changed, 116 insertions(+), 114 deletions(-) diff --git a/datapath-windows/ovsext/Conntrack.c b/datapath-windows/ovsext/Conntrack.c index 2a85e57..91bd638 100644 --- a/datapath-windows/ovsext/Conntrack.c +++ b/datapath-windows/ovsext/Conntrack.c @@ -151,6 +151,24 @@ OvsCleanupConntrack(VOID) OvsNatCleanup(); } +/* + * + * OvsCtHashKey + * Compute hash using 5-tuple and zone. + * + */ +UINT32 +OvsCtHashKey(const OVS_CT_KEY *key) +{ +UINT32 hsrc, hdst, hash; +hsrc = key->src.addr.ipv4 | ntohl(key->src.port); +hdst = key->dst.addr.ipv4 | ntohl(key->dst.port); +hash = hsrc ^ hdst; /* TO identify reverse traffic */ +hash = hash | (key->zone + key->nw_proto); +hash = OvsJhashWords((uint32_t*) , 1, hash); +return hash; +} + static __inline VOID OvsCtKeyReverse(OVS_CT_KEY *key) { @@ -232,7 +250,7 @@ OvsCtAddEntry(POVS_CT_ENTRY entry, if (!OvsNatTranslateCtEntry(entry)) { return FALSE; } -ctx->hash = OvsHashCtKey(>key); +ctx->hash = OvsCtHashKey(>key); } else { entry->natInfo.natAction = natInfo->natAction; } @@ -529,20 +547,6 @@ OvsCtLookup(OvsConntrackKeyLookupCtx *ctx) return found; } -UINT32 -OvsHashCtKey(const OVS_CT_KEY *key) -{ -UINT32 hsrc, hdst, hash; -hsrc = OvsJhashBytes((UINT32*) >src, sizeof(key->src), 0); -hdst = OvsJhashBytes((UINT32*) >dst, sizeof(key->dst), 0); -hash = hsrc ^ hdst; /* TO identify reverse traffic */ -hash = OvsJhashBytes((uint32_t *) >dst + 1, - ((uint32_t *) (key + 1) - - (uint32_t *) (>dst + 1)), - hash); -return hash; -} - static UINT8 OvsReverseIcmpType(UINT8 type) { @@ -640,7 +644,7 @@ OvsCtSetupLookupCtx(OvsFlowKey *flowKey, OvsCtKeyReverse(>key); } -ctx->hash = OvsHashCtKey(>key); +ctx->hash = OvsCtHashKey(>key); return NDIS_STATUS_SUCCESS; } @@ -952,7 +956,6 @@ OvsExecuteConntrackAction(OvsForwardingContext *fwdCtx, OvsFlowKey *key, const PNL_ATTR a) { -PNL_ATTR ctAttr; BOOLEAN commit = FALSE; BOOLEAN force = FALSE; BOOLEAN postUpdateEvent = FALSE; @@ -972,109 +975,110 @@ OvsExecuteConntrackAction(OvsForwardingContext *fwdCtx, return status; } -/* XXX Convert this to NL_ATTR_FOR_EACH */ -ctAttr = NlAttrFindNested(a, OVS_CT_ATTR_ZONE); -if (ctAttr) { -zone = NlAttrGetU16(ctAttr); -} -ctAttr = NlAttrFindNested(a, OVS_CT_ATTR_COMMIT); -if (ctAttr) { -commit = TRUE; -} -ctAttr = NlAttrFindNested(a, OVS_CT_ATTR_MARK); -if (ctAttr) { -mark = NlAttrGet(ctAttr); -} -ctAttr = NlAttrFindNested(a, OVS_CT_ATTR_LABELS); -if (ctAttr) { -labels = NlAttrGet(ctAttr); -} -natActionInfo.natAction = NAT_ACTION_NONE; -ctAttr = NlAttrFindNested(a, OVS_CT_ATTR_NAT); -if (ctAttr) { -/* Pares Nested NAT attributes. */ -PNL_ATTR natAttr; -unsigned int left; -BOOLEAN hasMinIp = FALSE; -BOOLEAN hasMinPort = FALSE; -BOOLEAN hasMaxIp = FALSE; -BOOLEAN hasMaxPort = FALSE; -NL_NESTED_FOR_EACH_UNSAFE (natAttr, left, ctAttr) { -enum ovs_nat_attr subtype = NlAttrType(natAttr); -switch(subtype) { -case OVS_NAT_ATTR_SRC: -case OVS_NAT_ATTR_DST: -natActionInfo.natAction |= -((subtype == OVS_NAT_ATTR_SRC) -? NAT_ACTION_SRC : NAT_ACTION_DST); +PNL_ATTR ctAttr = NULL; +INT left; + +NL_NESTED_FOR_EACH (ctAttr, left, a) { +switch(NlAttrType(ctAttr)) { +case OVS_CT_ATTR_ZONE: +zone = NlAttrGetU16(ctAttr); +break; +case OVS_CT_ATTR_COMMIT: +commit = TRUE; +break; +case OVS_CT_ATTR_MARK: +mark = NlAttrGet(ctAttr); break; -case OVS_NAT_ATTR_IP_MIN: -memcpy(, - NlAttrData(natAttr), NlAttrGetSize(natAttr)); -hasMinIp = TRUE; +case OVS_CT_ATTR_LABELS: +labels = NlAttrGet(ctAttr);
[ovs-dev] [PATCH v1 0/4] Optimize conntrack performance
This patch series is primarily to refactor conntrack code for better throughput with conntrack. With this patch series TCP throughput with conntrack increased by ~50%. Testing: Verified loading/unloading the driver with driver verified enabled. Ran TCP/UDP and ICMP traffic. Anand Kumar (4): datapath-windows: Use spinlock instead of RW lock for ct entry datapath-windows: Remove NAT locks in conntrack. datapath-windows: Implement locking in conntrack NAT. datapath-windows: Compute ct hash based on 5-tuple and zone datapath-windows/ovsext/Conntrack-ftp.c | 4 +- datapath-windows/ovsext/Conntrack-nat.c | 34 +- datapath-windows/ovsext/Conntrack-related.c | 17 +- datapath-windows/ovsext/Conntrack-tcp.c | 15 +- datapath-windows/ovsext/Conntrack.c | 468 +--- datapath-windows/ovsext/Conntrack.h | 40 ++- datapath-windows/ovsext/Util.h | 18 ++ 7 files changed, 307 insertions(+), 289 deletions(-) -- 2.9.3.windows.1 ___ dev mailing list d...@openvswitch.org https://mail.openvswitch.org/mailman/listinfo/ovs-dev
Re: [ovs-dev] [PATCH v3] datapath-windows: Optimize conntrack performance
Abandoning this change. I have couple of patches to optimize conntrack, will be sending it out as a series. Thanks, Anand Kumar On 6/11/18, 11:39 AM, "Anand Kumar" wrote: - Use spinlock instead of read/write lock for conntrack entry. - Use NdisInterlockedXX wrapper api's instead of InterlockedXX - Update 'ctTotalRelatedEntries' using interlocked functions - Refactor conntrack code to make it simpler to read. Testing: Evaluated TCP performance using iperf3. Before optimization: Native: 6.0Gbps OVS: 5.1-5.75Gbps OVS with conntrack enabled: 3.9-4.0Gbps After optimization: Native: 6.0Gbps OVS: 5.1-5.75Gbps OVS with conntrack enabled:: 4.2-4.4Gbps Tested by loading/unloading driver with driver verifier enabled Signed-off-by: Anand Kumar --- v1->v2: Update commit message v2->v3: Address review comments --- datapath-windows/ovsext/Conntrack-nat.c | 7 +- datapath-windows/ovsext/Conntrack-related.c | 17 ++-- datapath-windows/ovsext/Conntrack.c | 130 +--- datapath-windows/ovsext/Conntrack.h | 20 - 4 files changed, 89 insertions(+), 85 deletions(-) diff --git a/datapath-windows/ovsext/Conntrack-nat.c b/datapath-windows/ovsext/Conntrack-nat.c index 316c946..da1814f 100644 --- a/datapath-windows/ovsext/Conntrack-nat.c +++ b/datapath-windows/ovsext/Conntrack-nat.c @@ -167,16 +167,13 @@ OvsNatPacket(OvsForwardingContext *ovsFwdCtx, { UINT32 natFlag; const struct ct_endpoint* endpoint; -LOCK_STATE_EX lockState; -/* XXX: Move conntrack locks out of NAT after implementing lock in NAT. */ -NdisAcquireRWLockRead(entry->lock, , 0); + /* When it is NAT, only entry->rev_key contains NATTED address; When it is unNAT, only entry->key contains the UNNATTED address;*/ const OVS_CT_KEY *ctKey = reverse ? >key : >rev_key; BOOLEAN isSrcNat; if (!(natAction & (NAT_ACTION_SRC | NAT_ACTION_DST))) { -NdisReleaseRWLock(entry->lock, ); return; } isSrcNat = (((natAction & NAT_ACTION_SRC) && !reverse) || @@ -206,7 +203,6 @@ OvsNatPacket(OvsForwardingContext *ovsFwdCtx, } } else if (ctKey->dl_type == htons(ETH_TYPE_IPV6)){ // XXX: IPv6 packet not supported yet. -NdisReleaseRWLock(entry->lock, ); return; } if (natAction & (NAT_ACTION_SRC_PORT | NAT_ACTION_DST_PORT)) { @@ -220,7 +216,6 @@ OvsNatPacket(OvsForwardingContext *ovsFwdCtx, } } } -NdisReleaseRWLock(entry->lock, ); } diff --git a/datapath-windows/ovsext/Conntrack-related.c b/datapath-windows/ovsext/Conntrack-related.c index ec4b536..b798137 100644 --- a/datapath-windows/ovsext/Conntrack-related.c +++ b/datapath-windows/ovsext/Conntrack-related.c @@ -18,7 +18,7 @@ #include "Jhash.h" static PLIST_ENTRY ovsCtRelatedTable; /* Holds related entries */ -static UINT64 ctTotalRelatedEntries; +static ULONG ctTotalRelatedEntries; static OVS_CT_THREAD_CTX ctRelThreadCtx; static PNDIS_RW_LOCK_EX ovsCtRelatedLockObj; extern POVS_SWITCH_CONTEXT gOvsSwitchContext; @@ -75,13 +75,11 @@ OvsCtRelatedLookup(OVS_CT_KEY key, UINT64 currentTime) POVS_CT_REL_ENTRY entry; LOCK_STATE_EX lockState; -NdisAcquireRWLockRead(ovsCtRelatedLockObj, , 0); - if (!ctTotalRelatedEntries) { -NdisReleaseRWLock(ovsCtRelatedLockObj, ); return NULL; } +NdisAcquireRWLockRead(ovsCtRelatedLockObj, , 0); for (int i = 0; i < CT_HASH_TABLE_SIZE; i++) { /* XXX - Scan the table based on the hash instead */ LIST_FORALL_SAFE([i], link, next) { @@ -103,7 +101,7 @@ OvsCtRelatedEntryDelete(POVS_CT_REL_ENTRY entry) { RemoveEntryList(>link); OvsFreeMemoryWithTag(entry, OVS_CT_POOL_TAG); -ctTotalRelatedEntries--; +NdisInterlockedDecrement((PLONG)); } NDIS_STATUS @@ -139,7 +137,7 @@ OvsCtRelatedEntryCreate(UINT8 ipProto, NdisAcquireRWLockWrite(ovsCtRelatedLockObj, , 0); InsertHeadList([hash & CT_HASH_TABLE_MASK], >link); -ctTotalRelatedEntries++; +NdisInterlockedIncrement((PLONG)); NdisReleaseRWLock(ovsCtRelatedLockObj, ); return NDIS_STATUS_SUCCESS; @@ -150,11 +148,10 @@ OvsCtRelatedFlush() { PLIST_ENTRY link, next; POVS_CT_REL_ENTRY entry; - LOCK_STATE_EX lockState; -NdisAcquireRWLockWrite(ovsCtRelatedLockObj, , 0); if (ctTotalRelatedEntries) { +
Re: [ovs-dev] [PATCH v2] datapath-windows: Optimize conntrack performance
Hi Shashank, Thanks for the review. I will address your comments and send out a V3. Regards, Anand Kumar From: Shashank Ram Date: Friday, June 8, 2018 at 10:11 AM To: Anand Kumar Cc: "d...@openvswitch.org" Subject: Re: [ovs-dev] [PATCH v2] datapath-windows: Optimize conntrack performance On Thu, Jun 7, 2018, 11:52 AM Anand Kumar mailto:kumaran...@vmware.com>> wrote: - Move conntrack lock out of NAT module - Use spinlock instead of read/write lock for conntrack entry. - Update 'ctTotalRelatedEntries' using interlocked functions - Refactor conntrack code to make it more readable. Testing: Evaluated TCP performance using iperf3. Before optimization: Native: 6.0Gbps OVS: 5.1-5.75Gbps OVS with conntrack enabled: 3.9-4.0Gbps After optimization: Native: 6.0Gbps OVS: 5.1-5.75Gbps OVS with conntrack enabled:: 4.2-4.4Gbps Tested by loading/unloading driver with driver verifier enabled Signed-off-by: Anand Kumar mailto:kumaran...@vmware.com>> --- v1->v2: Update commit message --- datapath-windows/ovsext/Conntrack-nat.c | 7 +- datapath-windows/ovsext/Conntrack-related.c | 17 ++-- datapath-windows/ovsext/Conntrack.c | 115 datapath-windows/ovsext/Conntrack.h | 2 +- 4 files changed, 59 insertions(+), 82 deletions(-) diff --git a/datapath-windows/ovsext/Conntrack-nat.c b/datapath-windows/ovsext/Conntrack-nat.c index 316c946..da1814f 100644 --- a/datapath-windows/ovsext/Conntrack-nat.c +++ b/datapath-windows/ovsext/Conntrack-nat.c @@ -167,16 +167,13 @@ OvsNatPacket(OvsForwardingContext *ovsFwdCtx, { UINT32 natFlag; const struct ct_endpoint* endpoint; -LOCK_STATE_EX lockState; -/* XXX: Move conntrack locks out of NAT after implementing lock in NAT. */ -NdisAcquireRWLockRead(entry->lock, , 0); + /* When it is NAT, only entry->rev_key contains NATTED address; When it is unNAT, only entry->key contains the UNNATTED address;*/ const OVS_CT_KEY *ctKey = reverse ? >key : >rev_key; BOOLEAN isSrcNat; if (!(natAction & (NAT_ACTION_SRC | NAT_ACTION_DST))) { -NdisReleaseRWLock(entry->lock, ); return; } isSrcNat = (((natAction & NAT_ACTION_SRC) && !reverse) || @@ -206,7 +203,6 @@ OvsNatPacket(OvsForwardingContext *ovsFwdCtx, } } else if (ctKey->dl_type == htons(ETH_TYPE_IPV6)){ // XXX: IPv6 packet not supported yet. -NdisReleaseRWLock(entry->lock, ); return; } if (natAction & (NAT_ACTION_SRC_PORT | NAT_ACTION_DST_PORT)) { @@ -220,7 +216,6 @@ OvsNatPacket(OvsForwardingContext *ovsFwdCtx, } } } -NdisReleaseRWLock(entry->lock, ); } diff --git a/datapath-windows/ovsext/Conntrack-related.c b/datapath-windows/ovsext/Conntrack-related.c index ec4b536..00eac67 100644 --- a/datapath-windows/ovsext/Conntrack-related.c +++ b/datapath-windows/ovsext/Conntrack-related.c @@ -18,7 +18,7 @@ #include "Jhash.h" static PLIST_ENTRY ovsCtRelatedTable; /* Holds related entries */ -static UINT64 ctTotalRelatedEntries; +static LONG ctTotalRelatedEntries; Please change to ULONG and cast it using PLONG where necessary. static OVS_CT_THREAD_CTX ctRelThreadCtx; static PNDIS_RW_LOCK_EX ovsCtRelatedLockObj; extern POVS_SWITCH_CONTEXT gOvsSwitchContext; @@ -75,13 +75,11 @@ OvsCtRelatedLookup(OVS_CT_KEY key, UINT64 currentTime) POVS_CT_REL_ENTRY entry; LOCK_STATE_EX lockState; -NdisAcquireRWLockRead(ovsCtRelatedLockObj, , 0); - if (!ctTotalRelatedEntries) { -NdisReleaseRWLock(ovsCtRelatedLockObj, ); return NULL; } +NdisAcquireRWLockRead(ovsCtRelatedLockObj, , 0); for (int i = 0; i < CT_HASH_TABLE_SIZE; i++) { /* XXX - Scan the table based on the hash instead */ LIST_FORALL_SAFE([i], link, next) { @@ -103,7 +101,7 @@ OvsCtRelatedEntryDelete(POVS_CT_REL_ENTRY entry) { RemoveEntryList(>link); OvsFreeMemoryWithTag(entry, OVS_CT_POOL_TAG); -ctTotalRelatedEntries--; +InterlockedDecrement((LONG volatile*)); Mind wrapping this using NdisInterlockedxxx function? } NDIS_STATUS @@ -139,7 +137,7 @@ OvsCtRelatedEntryCreate(UINT8 ipProto, NdisAcquireRWLockWrite(ovsCtRelatedLockObj, , 0); InsertHeadList([hash & CT_HASH_TABLE_MASK], >link); -ctTotalRelatedEntries++; +InterlockedIncrement((LONG volatile *)); NdisReleaseRWLock(ovsCtRelatedLockObj, ); return NDIS_STATUS_SUCCESS; @@ -150,11 +148,10 @@ OvsCtRelatedFlush() { PLIST_ENTRY link, next; POVS_CT_REL_ENTRY entry; - LOCK_STATE_EX lockState; -NdisAcquireRWLockWrite(ovsCtRelatedLockObj, , 0); if (ctTotalRelatedEntries) { +NdisAcquireRWLockWrite(ovsCtRelatedLockObj, , 0); for (int i = 0; i < CT_HASH_TABLE_SIZE; i++) { LIST_FORALL_SAFE([i], link, next) { entry = CONTAI
[ovs-dev] [PATCH v2] datapath-windows: Optimize conntrack performance
- Move conntrack lock out of NAT module - Use spinlock instead of read/write lock for conntrack entry. - Update 'ctTotalRelatedEntries' using interlocked functions - Refactor conntrack code to make it more readable. Testing: Evaluated TCP performance using iperf3. Before optimization: Native: 6.0Gbps OVS: 5.1-5.75Gbps OVS with conntrack enabled: 3.9-4.0Gbps After optimization: Native: 6.0Gbps OVS: 5.1-5.75Gbps OVS with conntrack enabled:: 4.2-4.4Gbps Tested by loading/unloading driver with driver verifier enabled Signed-off-by: Anand Kumar --- v1->v2: Update commit message --- datapath-windows/ovsext/Conntrack-nat.c | 7 +- datapath-windows/ovsext/Conntrack-related.c | 17 ++-- datapath-windows/ovsext/Conntrack.c | 115 datapath-windows/ovsext/Conntrack.h | 2 +- 4 files changed, 59 insertions(+), 82 deletions(-) diff --git a/datapath-windows/ovsext/Conntrack-nat.c b/datapath-windows/ovsext/Conntrack-nat.c index 316c946..da1814f 100644 --- a/datapath-windows/ovsext/Conntrack-nat.c +++ b/datapath-windows/ovsext/Conntrack-nat.c @@ -167,16 +167,13 @@ OvsNatPacket(OvsForwardingContext *ovsFwdCtx, { UINT32 natFlag; const struct ct_endpoint* endpoint; -LOCK_STATE_EX lockState; -/* XXX: Move conntrack locks out of NAT after implementing lock in NAT. */ -NdisAcquireRWLockRead(entry->lock, , 0); + /* When it is NAT, only entry->rev_key contains NATTED address; When it is unNAT, only entry->key contains the UNNATTED address;*/ const OVS_CT_KEY *ctKey = reverse ? >key : >rev_key; BOOLEAN isSrcNat; if (!(natAction & (NAT_ACTION_SRC | NAT_ACTION_DST))) { -NdisReleaseRWLock(entry->lock, ); return; } isSrcNat = (((natAction & NAT_ACTION_SRC) && !reverse) || @@ -206,7 +203,6 @@ OvsNatPacket(OvsForwardingContext *ovsFwdCtx, } } else if (ctKey->dl_type == htons(ETH_TYPE_IPV6)){ // XXX: IPv6 packet not supported yet. -NdisReleaseRWLock(entry->lock, ); return; } if (natAction & (NAT_ACTION_SRC_PORT | NAT_ACTION_DST_PORT)) { @@ -220,7 +216,6 @@ OvsNatPacket(OvsForwardingContext *ovsFwdCtx, } } } -NdisReleaseRWLock(entry->lock, ); } diff --git a/datapath-windows/ovsext/Conntrack-related.c b/datapath-windows/ovsext/Conntrack-related.c index ec4b536..00eac67 100644 --- a/datapath-windows/ovsext/Conntrack-related.c +++ b/datapath-windows/ovsext/Conntrack-related.c @@ -18,7 +18,7 @@ #include "Jhash.h" static PLIST_ENTRY ovsCtRelatedTable; /* Holds related entries */ -static UINT64 ctTotalRelatedEntries; +static LONG ctTotalRelatedEntries; static OVS_CT_THREAD_CTX ctRelThreadCtx; static PNDIS_RW_LOCK_EX ovsCtRelatedLockObj; extern POVS_SWITCH_CONTEXT gOvsSwitchContext; @@ -75,13 +75,11 @@ OvsCtRelatedLookup(OVS_CT_KEY key, UINT64 currentTime) POVS_CT_REL_ENTRY entry; LOCK_STATE_EX lockState; -NdisAcquireRWLockRead(ovsCtRelatedLockObj, , 0); - if (!ctTotalRelatedEntries) { -NdisReleaseRWLock(ovsCtRelatedLockObj, ); return NULL; } +NdisAcquireRWLockRead(ovsCtRelatedLockObj, , 0); for (int i = 0; i < CT_HASH_TABLE_SIZE; i++) { /* XXX - Scan the table based on the hash instead */ LIST_FORALL_SAFE([i], link, next) { @@ -103,7 +101,7 @@ OvsCtRelatedEntryDelete(POVS_CT_REL_ENTRY entry) { RemoveEntryList(>link); OvsFreeMemoryWithTag(entry, OVS_CT_POOL_TAG); -ctTotalRelatedEntries--; +InterlockedDecrement((LONG volatile*)); } NDIS_STATUS @@ -139,7 +137,7 @@ OvsCtRelatedEntryCreate(UINT8 ipProto, NdisAcquireRWLockWrite(ovsCtRelatedLockObj, , 0); InsertHeadList([hash & CT_HASH_TABLE_MASK], >link); -ctTotalRelatedEntries++; +InterlockedIncrement((LONG volatile *)); NdisReleaseRWLock(ovsCtRelatedLockObj, ); return NDIS_STATUS_SUCCESS; @@ -150,11 +148,10 @@ OvsCtRelatedFlush() { PLIST_ENTRY link, next; POVS_CT_REL_ENTRY entry; - LOCK_STATE_EX lockState; -NdisAcquireRWLockWrite(ovsCtRelatedLockObj, , 0); if (ctTotalRelatedEntries) { +NdisAcquireRWLockWrite(ovsCtRelatedLockObj, , 0); for (int i = 0; i < CT_HASH_TABLE_SIZE; i++) { LIST_FORALL_SAFE([i], link, next) { entry = CONTAINING_RECORD(link, OVS_CT_REL_ENTRY, link); @@ -189,9 +186,8 @@ OvsCtRelatedEntryCleaner(PVOID data) /* Lock has been freed by 'OvsCleanupCtRelated()' */ break; } -NdisAcquireRWLockWrite(ovsCtRelatedLockObj, , 0); + if (context->exit) { -NdisReleaseRWLock(ovsCtRelatedLockObj, ); break; } @@ -201,6 +197,7 @@ OvsCtRelatedEntryCleaner(PVOID data) threadSleepTimeout = currentTime + CT_CLEANUP_INTERVAL; if (ctTotalRelatedEntries) { +N
[ovs-dev] [PATCH] datapath-windows: Optimize conntrack performance
- Use spinlock instead of read/write lock for conntrack entry. - Update 'ctTotalRelatedEntries' using interlocked functions - Refactor conntrack code to make it more readable. Testing: Evaluated TCP performance using iperf3. Before optimization: Native: 6.0Gbps OVS: 5.1-5.75Gbps OVS with conntrack enabled: 3.9-4.0Gbps After optimization: Native: 6.0Gbps OVS: 5.1-5.75Gbps OVS with conntrack enabled:: 4.2-4.4Gbps Signed-off-by: Anand Kumar --- datapath-windows/ovsext/Conntrack-nat.c | 7 +- datapath-windows/ovsext/Conntrack-related.c | 17 ++-- datapath-windows/ovsext/Conntrack.c | 115 datapath-windows/ovsext/Conntrack.h | 2 +- 4 files changed, 59 insertions(+), 82 deletions(-) diff --git a/datapath-windows/ovsext/Conntrack-nat.c b/datapath-windows/ovsext/Conntrack-nat.c index 316c946..da1814f 100644 --- a/datapath-windows/ovsext/Conntrack-nat.c +++ b/datapath-windows/ovsext/Conntrack-nat.c @@ -167,16 +167,13 @@ OvsNatPacket(OvsForwardingContext *ovsFwdCtx, { UINT32 natFlag; const struct ct_endpoint* endpoint; -LOCK_STATE_EX lockState; -/* XXX: Move conntrack locks out of NAT after implementing lock in NAT. */ -NdisAcquireRWLockRead(entry->lock, , 0); + /* When it is NAT, only entry->rev_key contains NATTED address; When it is unNAT, only entry->key contains the UNNATTED address;*/ const OVS_CT_KEY *ctKey = reverse ? >key : >rev_key; BOOLEAN isSrcNat; if (!(natAction & (NAT_ACTION_SRC | NAT_ACTION_DST))) { -NdisReleaseRWLock(entry->lock, ); return; } isSrcNat = (((natAction & NAT_ACTION_SRC) && !reverse) || @@ -206,7 +203,6 @@ OvsNatPacket(OvsForwardingContext *ovsFwdCtx, } } else if (ctKey->dl_type == htons(ETH_TYPE_IPV6)){ // XXX: IPv6 packet not supported yet. -NdisReleaseRWLock(entry->lock, ); return; } if (natAction & (NAT_ACTION_SRC_PORT | NAT_ACTION_DST_PORT)) { @@ -220,7 +216,6 @@ OvsNatPacket(OvsForwardingContext *ovsFwdCtx, } } } -NdisReleaseRWLock(entry->lock, ); } diff --git a/datapath-windows/ovsext/Conntrack-related.c b/datapath-windows/ovsext/Conntrack-related.c index ec4b536..00eac67 100644 --- a/datapath-windows/ovsext/Conntrack-related.c +++ b/datapath-windows/ovsext/Conntrack-related.c @@ -18,7 +18,7 @@ #include "Jhash.h" static PLIST_ENTRY ovsCtRelatedTable; /* Holds related entries */ -static UINT64 ctTotalRelatedEntries; +static LONG ctTotalRelatedEntries; static OVS_CT_THREAD_CTX ctRelThreadCtx; static PNDIS_RW_LOCK_EX ovsCtRelatedLockObj; extern POVS_SWITCH_CONTEXT gOvsSwitchContext; @@ -75,13 +75,11 @@ OvsCtRelatedLookup(OVS_CT_KEY key, UINT64 currentTime) POVS_CT_REL_ENTRY entry; LOCK_STATE_EX lockState; -NdisAcquireRWLockRead(ovsCtRelatedLockObj, , 0); - if (!ctTotalRelatedEntries) { -NdisReleaseRWLock(ovsCtRelatedLockObj, ); return NULL; } +NdisAcquireRWLockRead(ovsCtRelatedLockObj, , 0); for (int i = 0; i < CT_HASH_TABLE_SIZE; i++) { /* XXX - Scan the table based on the hash instead */ LIST_FORALL_SAFE([i], link, next) { @@ -103,7 +101,7 @@ OvsCtRelatedEntryDelete(POVS_CT_REL_ENTRY entry) { RemoveEntryList(>link); OvsFreeMemoryWithTag(entry, OVS_CT_POOL_TAG); -ctTotalRelatedEntries--; +InterlockedDecrement((LONG volatile*)); } NDIS_STATUS @@ -139,7 +137,7 @@ OvsCtRelatedEntryCreate(UINT8 ipProto, NdisAcquireRWLockWrite(ovsCtRelatedLockObj, , 0); InsertHeadList([hash & CT_HASH_TABLE_MASK], >link); -ctTotalRelatedEntries++; +InterlockedIncrement((LONG volatile *)); NdisReleaseRWLock(ovsCtRelatedLockObj, ); return NDIS_STATUS_SUCCESS; @@ -150,11 +148,10 @@ OvsCtRelatedFlush() { PLIST_ENTRY link, next; POVS_CT_REL_ENTRY entry; - LOCK_STATE_EX lockState; -NdisAcquireRWLockWrite(ovsCtRelatedLockObj, , 0); if (ctTotalRelatedEntries) { +NdisAcquireRWLockWrite(ovsCtRelatedLockObj, , 0); for (int i = 0; i < CT_HASH_TABLE_SIZE; i++) { LIST_FORALL_SAFE([i], link, next) { entry = CONTAINING_RECORD(link, OVS_CT_REL_ENTRY, link); @@ -189,9 +186,8 @@ OvsCtRelatedEntryCleaner(PVOID data) /* Lock has been freed by 'OvsCleanupCtRelated()' */ break; } -NdisAcquireRWLockWrite(ovsCtRelatedLockObj, , 0); + if (context->exit) { -NdisReleaseRWLock(ovsCtRelatedLockObj, ); break; } @@ -201,6 +197,7 @@ OvsCtRelatedEntryCleaner(PVOID data) threadSleepTimeout = currentTime + CT_CLEANUP_INTERVAL; if (ctTotalRelatedEntries) { +NdisAcquireRWLockWrite(ovsCtRelatedLockObj, , 0); for (int i = 0; i < CT_HASH_TABLE_SIZE; i++) { LIST_FOR
[ovs-dev] [PATCH] datapath-windows: Add support for handling DEI bit of VLAN header
The Drop eligible indicator(DEI) is 1 bit wide and it is part of Tag control information (TCI) in VLAN header, which indicates that the frame can be dropped during congestion. Signed-off-by: Anand Kumar <kumaran...@vmware.com> --- datapath-windows/ovsext/Actions.c | 1 + datapath-windows/ovsext/User.c| 19 +-- 2 files changed, 14 insertions(+), 6 deletions(-) diff --git a/datapath-windows/ovsext/Actions.c b/datapath-windows/ovsext/Actions.c index 9bbc787..6922f05 100644 --- a/datapath-windows/ovsext/Actions.c +++ b/datapath-windows/ovsext/Actions.c @@ -2023,6 +2023,7 @@ OvsDoExecuteActions(POVS_SWITCH_CONTEXT switchContext, vlan = (struct ovs_action_push_vlan *)NlAttrGet((const PNL_ATTR)a); vlanTag->TagHeader.VlanId = ntohs(vlan->vlan_tci) & 0xfff; vlanTag->TagHeader.UserPriority = ntohs(vlan->vlan_tci) >> 13; + vlanTag->TagHeader.CanonicalFormatId = (ntohs(vlan->vlan_tci) >> 12) & 0x1; NET_BUFFER_LIST_INFO(ovsFwdCtx.curNbl, Ieee8021QNetBufferListInfo) = vlanTagValue; diff --git a/datapath-windows/ovsext/User.c b/datapath-windows/ovsext/User.c index 4693a8b..509472f 100644 --- a/datapath-windows/ovsext/User.c +++ b/datapath-windows/ovsext/User.c @@ -1000,11 +1000,12 @@ OvsCreateQueueNlPacket(PVOID userData, POVS_PACKET_HDR_INFO hdrInfo) { #define VLAN_TAG_SIZE 4 -UINT32 allocLen, dataLen, extraLen; +UINT32 allocLen, dataLen, extraLen = 0; POVS_PACKET_QUEUE_ELEM elem; UINT8 *src, *dst; NDIS_TCP_IP_CHECKSUM_NET_BUFFER_LIST_INFO csumInfo; -NDIS_NET_BUFFER_LIST_8021Q_INFO vlanInfo; +PNDIS_NET_BUFFER_LIST_8021Q_INFO vlanInfo = NULL; +PVOID vlanTag; OvsIPv4TunnelKey *tunnelKey = (OvsIPv4TunnelKey *)>tunKey; UINT32 pid; UINT32 nlMsgSize; @@ -1037,8 +1038,13 @@ OvsCreateQueueNlPacket(PVOID userData, return NULL; } -vlanInfo.Value = NET_BUFFER_LIST_INFO(nbl, Ieee8021QNetBufferListInfo); -extraLen = vlanInfo.TagHeader.VlanId ? VLAN_TAG_SIZE : 0; +vlanTag = NET_BUFFER_LIST_INFO(nbl, Ieee8021QNetBufferListInfo); +if (vlanTag) { +vlanInfo = (PNDIS_NET_BUFFER_LIST_8021Q_INFO)(PVOID *) +if (vlanInfo->Value) { +extraLen = VLAN_TAG_SIZE; +} +} dataLen = NET_BUFFER_DATA_LENGTH(nb); @@ -1148,8 +1154,9 @@ OvsCreateQueueNlPacket(PVOID userData, ((UINT32 *)dst)[2] = ((UINT32 *)src)[2]; dst += 12; ((UINT16 *)dst)[0] = htons(0x8100); -((UINT16 *)dst)[1] = htons(vlanInfo.TagHeader.VlanId | -(vlanInfo.TagHeader.UserPriority << 13)); +((UINT16 *)dst)[1] = htons(vlanInfo->TagHeader.VlanId | +(vlanInfo->TagHeader.CanonicalFormatId << 12) | +(vlanInfo->TagHeader.UserPriority << 13)); elem->hdrInfo.l3Offset += VLAN_TAG_SIZE; elem->hdrInfo.l4Offset += VLAN_TAG_SIZE; ovsUserStats.vlanInsert++; -- 2.9.3.windows.1 ___ dev mailing list d...@openvswitch.org https://mail.openvswitch.org/mailman/listinfo/ovs-dev
[ovs-dev] [PATCH v2] datapath-windows: Prevent ct-counters from getting redundantly incremented
The conntrack-counters ought to be incremented only if it's a new lookup or if it's recirculated through a different zone for the first time. Signed-off-by: Anand Kumar <kumaran...@vmware.com> --- datapath-windows/ovsext/Conntrack.c | 7 --- 1 file changed, 4 insertions(+), 3 deletions(-) diff --git a/datapath-windows/ovsext/Conntrack.c b/datapath-windows/ovsext/Conntrack.c index 678bedb..add1491 100644 --- a/datapath-windows/ovsext/Conntrack.c +++ b/datapath-windows/ovsext/Conntrack.c @@ -886,10 +886,11 @@ OvsCtExecute_(OvsForwardingContext *fwdCtx, return NDIS_STATUS_RESOURCES; } -/* Increment the counters soon after the lookup, since we set ct.state - * to OVS_CS_F_TRACKED after processing the ct entry. +/* Increment stats for the entry if it wasn't tracked previously or + * if they are on different zones */ -if (entry && (!(key->ct.state & OVS_CS_F_TRACKED))) { +if (entry && (entry->key.zone != key->ct.zone || + (!(key->ct.state & OVS_CS_F_TRACKED { OvsCtIncrementCounters(entry, ctx.reply, curNbl); } -- 2.9.3.windows.1 ___ dev mailing list d...@openvswitch.org https://mail.openvswitch.org/mailman/listinfo/ovs-dev
Re: [ovs-dev] [PATCH] datapath-windows: Update ct stats when packet is processed by conntrack
Hi Sairam, Sure. Will update the commit message and send out V2. Thanks, Anand Kumar On 4/26/18, 3:57 PM, "Sairam Venugopal" <vsai...@vmware.com> wrote: Anand, Thanks for the patch. Can you update the commit message to better describe the underlying issue? Prevent conntrack-counters from getting redundantly incremented for recirculated packets. The counters ought to be incremented only if it's a new lookup or if it's recirculated through a different zone for the first time. Thanks, Sairam On 4/25/18, 1:06 PM, "ovs-dev-boun...@openvswitch.org on behalf of Anand Kumar" <ovs-dev-boun...@openvswitch.org on behalf of kumaran...@vmware.com> wrote: When ct lookup returns a matching ct entry, increment ct stats even if the zone information in conntrack entry does not match with flowkey. Signed-off-by: Anand Kumar <kumaran...@vmware.com> --- datapath-windows/ovsext/Conntrack.c | 7 --- 1 file changed, 4 insertions(+), 3 deletions(-) diff --git a/datapath-windows/ovsext/Conntrack.c b/datapath-windows/ovsext/Conntrack.c index 678bedb..add1491 100644 --- a/datapath-windows/ovsext/Conntrack.c +++ b/datapath-windows/ovsext/Conntrack.c @@ -886,10 +886,11 @@ OvsCtExecute_(OvsForwardingContext *fwdCtx, return NDIS_STATUS_RESOURCES; } -/* Increment the counters soon after the lookup, since we set ct.state - * to OVS_CS_F_TRACKED after processing the ct entry. +/* Increment stats for the entry if it wasn't tracked previously or + * if they are on different zones */ -if (entry && (!(key->ct.state & OVS_CS_F_TRACKED))) { +if (entry && (entry->key.zone != key->ct.zone || + (!(key->ct.state & OVS_CS_F_TRACKED { OvsCtIncrementCounters(entry, ctx.reply, curNbl); } -- 2.9.3.windows.1 ___ dev mailing list d...@openvswitch.org https://urldefense.proofpoint.com/v2/url?u=https-3A__mail.openvswitch.org_mailman_listinfo_ovs-2Ddev=DwICAg=uilaK90D4TOVoH58JNXRgQ=Z6vowHUOjP5ysP_g372c49Nqc1vEKqHKNBkR5Q5Z7uo=0j1dB0xbZjupscaOcQmDzLRgDPo5kjVp8k_-HX4w7P8=HdqtDC96bCuR7w2P7qVPA5skEcXSEeVqVbqefbaA5pw= ___ dev mailing list d...@openvswitch.org https://mail.openvswitch.org/mailman/listinfo/ovs-dev
[ovs-dev] [PATCH] datapath-windows: Update ct stats when packet is processed by conntrack
When ct lookup returns a matching ct entry, increment ct stats even if the zone information in conntrack entry does not match with flowkey. Signed-off-by: Anand Kumar <kumaran...@vmware.com> --- datapath-windows/ovsext/Conntrack.c | 7 --- 1 file changed, 4 insertions(+), 3 deletions(-) diff --git a/datapath-windows/ovsext/Conntrack.c b/datapath-windows/ovsext/Conntrack.c index 678bedb..add1491 100644 --- a/datapath-windows/ovsext/Conntrack.c +++ b/datapath-windows/ovsext/Conntrack.c @@ -886,10 +886,11 @@ OvsCtExecute_(OvsForwardingContext *fwdCtx, return NDIS_STATUS_RESOURCES; } -/* Increment the counters soon after the lookup, since we set ct.state - * to OVS_CS_F_TRACKED after processing the ct entry. +/* Increment stats for the entry if it wasn't tracked previously or + * if they are on different zones */ -if (entry && (!(key->ct.state & OVS_CS_F_TRACKED))) { +if (entry && (entry->key.zone != key->ct.zone || + (!(key->ct.state & OVS_CS_F_TRACKED { OvsCtIncrementCounters(entry, ctx.reply, curNbl); } -- 2.9.3.windows.1 ___ dev mailing list d...@openvswitch.org https://mail.openvswitch.org/mailman/listinfo/ovs-dev
Re: [ovs-dev] [PATCH v2] datapath-windows: Do not drop Ip fragments less than MIN_FRAGMENT_SIZE
Hi Alin, Thanks for reviewing the patch. Can we get this patch applied? Thanks, Anand Kumar On 3/7/18, 3:42 AM, "ovs-dev-boun...@openvswitch.org on behalf of aserd...@ovn.org" <ovs-dev-boun...@openvswitch.org on behalf of aserd...@ovn.org> wrote: Acked-by: Alin Gabriel Serdean <aserd...@ovn.org> Alin. -Mesaj original- De la: ovs-dev-boun...@openvswitch.org <ovs-dev-boun...@openvswitch.org> În numele Anand Kumar Trimis: Wednesday, March 7, 2018 1:48 AM Către: d...@openvswitch.org Subiect: [ovs-dev] [PATCH v2] datapath-windows: Do not drop Ip fragments less than MIN_FRAGMENT_SIZE Previously ipfragment module would drop any fragments less than MIN_FRAGMENT_SIZE (400 bytes), which was added to safeguard against the vulnerability CVE-2000-0305. This check is incorrect, since minimum size of the Ipfragment is 68 bytes (i.e. max length of Ip Header + 8 bytes of L4 header). So Ip fragments less than MIN_FRAGMENT_SIZE (400 bytes) is not guranted to be malformed or illegal. To guard against security vulnerability CVE-2000-0305, for a given ip datagram, ipfragments should be dropped only when number of smallest fragments recieved reaches a certain threshold. Signed-off-by: Anand Kumar <kumaran...@vmware.com> --- datapath-windows/ovsext/IpFragment.c | 9 +++-- 1 file changed, 3 insertions(+), 6 deletions(-) diff --git a/datapath-windows/ovsext/IpFragment.c b/datapath-windows/ovsext/IpFragment.c index 3d5277a..d59d7cf 100644 --- a/datapath-windows/ovsext/IpFragment.c +++ b/datapath-windows/ovsext/IpFragment.c @@ -25,10 +25,10 @@ #undef OVS_DBG_MOD #endif #define OVS_DBG_MOD OVS_DBG_IPFRAG -/* Based on MIN_FRAGMENT_SIZE.*/ -#define MAX_FRAGMENTS 164 + #define MIN_FRAGMENT_SIZE 400 #define MAX_IPDATAGRAM_SIZE 65535 +#define MAX_FRAGMENTS MAX_IPDATAGRAM_SIZE/MIN_FRAGMENT_SIZE + 1 /* Function declarations */ static KSTART_ROUTINE OvsIpFragmentEntryCleaner; @@ -275,10 +275,7 @@ OvsProcessIpv4Fragment(POVS_SWITCH_CONTEXT switchContext, offset = ntohs(ipHdr->frag_off) & IP_OFFSET; offset <<= 3; flags = ntohs(ipHdr->frag_off) & IP_MF; -/* Only the last fragment can be of smaller size.*/ -if (flags && ntohs(ipHdr->tot_len) < MIN_FRAGMENT_SIZE) { -return NDIS_STATUS_INVALID_LENGTH; -} + /*Copy fragment specific fields. */ fragKey.protocol = ipHdr->protocol; fragKey.id = ipHdr->id; -- 2.9.3.windows.1 ___ dev mailing list d...@openvswitch.org https://urldefense.proofpoint.com/v2/url?u=https-3A__mail.openvswitch.org_mailman_listinfo_ovs-2Ddev=DwIFBA=uilaK90D4TOVoH58JNXRgQ=Q5z9tBe-nAOpE7LIHSPV8uy5-437agMXvkeHHMkR8Us=wBaFjfmLsp_IHmGbqcnqFbiVgL9SrUg_OjOmu6mtm18=-RSx_6v5FgOED7n_4PPfYEIrpVkor7g5BhyBt2ygcuU= ___ dev mailing list d...@openvswitch.org https://urldefense.proofpoint.com/v2/url?u=https-3A__mail.openvswitch.org_mailman_listinfo_ovs-2Ddev=DwIFBA=uilaK90D4TOVoH58JNXRgQ=Q5z9tBe-nAOpE7LIHSPV8uy5-437agMXvkeHHMkR8Us=wBaFjfmLsp_IHmGbqcnqFbiVgL9SrUg_OjOmu6mtm18=-RSx_6v5FgOED7n_4PPfYEIrpVkor7g5BhyBt2ygcuU= ___ dev mailing list d...@openvswitch.org https://mail.openvswitch.org/mailman/listinfo/ovs-dev
[ovs-dev] [PATCH v2] datapath-windows: Do not drop Ip fragments less than MIN_FRAGMENT_SIZE
Previously ipfragment module would drop any fragments less than MIN_FRAGMENT_SIZE (400 bytes), which was added to safeguard against the vulnerability CVE-2000-0305. This check is incorrect, since minimum size of the Ipfragment is 68 bytes (i.e. max length of Ip Header + 8 bytes of L4 header). So Ip fragments less than MIN_FRAGMENT_SIZE (400 bytes) is not guranted to be malformed or illegal. To guard against security vulnerability CVE-2000-0305, for a given ip datagram, ipfragments should be dropped only when number of smallest fragments recieved reaches a certain threshold. Signed-off-by: Anand Kumar <kumaran...@vmware.com> --- datapath-windows/ovsext/IpFragment.c | 9 +++-- 1 file changed, 3 insertions(+), 6 deletions(-) diff --git a/datapath-windows/ovsext/IpFragment.c b/datapath-windows/ovsext/IpFragment.c index 3d5277a..d59d7cf 100644 --- a/datapath-windows/ovsext/IpFragment.c +++ b/datapath-windows/ovsext/IpFragment.c @@ -25,10 +25,10 @@ #undef OVS_DBG_MOD #endif #define OVS_DBG_MOD OVS_DBG_IPFRAG -/* Based on MIN_FRAGMENT_SIZE.*/ -#define MAX_FRAGMENTS 164 + #define MIN_FRAGMENT_SIZE 400 #define MAX_IPDATAGRAM_SIZE 65535 +#define MAX_FRAGMENTS MAX_IPDATAGRAM_SIZE/MIN_FRAGMENT_SIZE + 1 /* Function declarations */ static KSTART_ROUTINE OvsIpFragmentEntryCleaner; @@ -275,10 +275,7 @@ OvsProcessIpv4Fragment(POVS_SWITCH_CONTEXT switchContext, offset = ntohs(ipHdr->frag_off) & IP_OFFSET; offset <<= 3; flags = ntohs(ipHdr->frag_off) & IP_MF; -/* Only the last fragment can be of smaller size.*/ -if (flags && ntohs(ipHdr->tot_len) < MIN_FRAGMENT_SIZE) { -return NDIS_STATUS_INVALID_LENGTH; -} + /*Copy fragment specific fields. */ fragKey.protocol = ipHdr->protocol; fragKey.id = ipHdr->id; -- 2.9.3.windows.1 ___ dev mailing list d...@openvswitch.org https://mail.openvswitch.org/mailman/listinfo/ovs-dev
Re: [ovs-dev] [PATCH] datapath-windows: Do not drop Ip fragments less than MIN_FRAGMENT_SIZE
Thanks for the review. MIN_FRAGMENT_SIZE is used to determine maximum number of fragments that are allowed for an IP datagram. I will update the macro MAX_FRAGMENTS to compute the value based on MIN_FRAGMENT_SIZE. Regards, Anand Kumar On 3/6/18, 5:43 AM, "aserd...@ovn.org" <aserd...@ovn.org> wrote: I guess you can also remove the define (https://urldefense.proofpoint.com/v2/url?u=https-3A__github.com_openvswitch_ovs_blob_master_datapath-2Dwindows_ovsext_IpFr=DwIFBA=uilaK90D4TOVoH58JNXRgQ=Q5z9tBe-nAOpE7LIHSPV8uy5-437agMXvkeHHMkR8Us=IO3JXN8xQplOxWRufcsmrjAad9LTMz362Yoy7M5ydI0=FWfFKSKtm6492BHmJ_HpEwBo_iYESHGduKFqDgo4ZOU= agment.c#L30) since it is not used anywhere else. Thanks, Alin. -Mesaj original- De la: ovs-dev-boun...@openvswitch.org <ovs-dev-boun...@openvswitch.org> În numele Anand Kumar Trimis: Tuesday, March 6, 2018 1:21 AM Către: d...@openvswitch.org Subiect: [ovs-dev] [PATCH] datapath-windows: Do not drop Ip fragments less than MIN_FRAGMENT_SIZE Previously ipfragment module would drop any fragments less than MIN_FRAGMENT_SIZE (400 bytes), which was added to safeguard against the vulnerability CVE-2000-0305. This check is incorrect, since minimum size of the Ipfragment is 68 bytes (i.e. max length of Ip Header + 8 bytes of L4 header). So Ip fragments less than MIN_FRAGMENT_SIZE (400 bytes) is not guranted to be malformed or illegal. To guard against security vulnerability CVE-2000-0305, for a given ip datagram, ipfragments should be dropped only when number of smallest fragments recieved reaches a certain threshold. Signed-off-by: Anand Kumar <kumaran...@vmware.com> --- datapath-windows/ovsext/IpFragment.c | 5 + 1 file changed, 1 insertion(+), 4 deletions(-) diff --git a/datapath-windows/ovsext/IpFragment.c b/datapath-windows/ovsext/IpFragment.c index 3d5277a..da9d33a 100644 --- a/datapath-windows/ovsext/IpFragment.c +++ b/datapath-windows/ovsext/IpFragment.c @@ -275,10 +275,7 @@ OvsProcessIpv4Fragment(POVS_SWITCH_CONTEXT switchContext, offset = ntohs(ipHdr->frag_off) & IP_OFFSET; offset <<= 3; flags = ntohs(ipHdr->frag_off) & IP_MF; -/* Only the last fragment can be of smaller size.*/ -if (flags && ntohs(ipHdr->tot_len) < MIN_FRAGMENT_SIZE) { -return NDIS_STATUS_INVALID_LENGTH; -} + /*Copy fragment specific fields. */ fragKey.protocol = ipHdr->protocol; fragKey.id = ipHdr->id; -- 2.9.3.windows.1 ___ dev mailing list d...@openvswitch.org https://urldefense.proofpoint.com/v2/url?u=https-3A__mail.openvswitch.org_mailman_listinfo_ovs-2Ddev=DwIFBA=uilaK90D4TOVoH58JNXRgQ=Q5z9tBe-nAOpE7LIHSPV8uy5-437agMXvkeHHMkR8Us=IO3JXN8xQplOxWRufcsmrjAad9LTMz362Yoy7M5ydI0=lZS0kDScEQSgdBhKx1EABsU7dS-a_f9IMQJv1aQar0I= ___ dev mailing list d...@openvswitch.org https://mail.openvswitch.org/mailman/listinfo/ovs-dev
[ovs-dev] [PATCH] datapath-windows: Do not drop Ip fragments less than MIN_FRAGMENT_SIZE
Previously ipfragment module would drop any fragments less than MIN_FRAGMENT_SIZE (400 bytes), which was added to safeguard against the vulnerability CVE-2000-0305. This check is incorrect, since minimum size of the Ipfragment is 68 bytes (i.e. max length of Ip Header + 8 bytes of L4 header). So Ip fragments less than MIN_FRAGMENT_SIZE (400 bytes) is not guranted to be malformed or illegal. To guard against security vulnerability CVE-2000-0305, for a given ip datagram, ipfragments should be dropped only when number of smallest fragments recieved reaches a certain threshold. Signed-off-by: Anand Kumar <kumaran...@vmware.com> --- datapath-windows/ovsext/IpFragment.c | 5 + 1 file changed, 1 insertion(+), 4 deletions(-) diff --git a/datapath-windows/ovsext/IpFragment.c b/datapath-windows/ovsext/IpFragment.c index 3d5277a..da9d33a 100644 --- a/datapath-windows/ovsext/IpFragment.c +++ b/datapath-windows/ovsext/IpFragment.c @@ -275,10 +275,7 @@ OvsProcessIpv4Fragment(POVS_SWITCH_CONTEXT switchContext, offset = ntohs(ipHdr->frag_off) & IP_OFFSET; offset <<= 3; flags = ntohs(ipHdr->frag_off) & IP_MF; -/* Only the last fragment can be of smaller size.*/ -if (flags && ntohs(ipHdr->tot_len) < MIN_FRAGMENT_SIZE) { -return NDIS_STATUS_INVALID_LENGTH; -} + /*Copy fragment specific fields. */ fragKey.protocol = ipHdr->protocol; fragKey.id = ipHdr->id; -- 2.9.3.windows.1 ___ dev mailing list d...@openvswitch.org https://mail.openvswitch.org/mailman/listinfo/ovs-dev
[ovs-dev] [PATCH v2] datapath-windows: On Debug builds, dump NBL info based on OVS_DBG_DEFAULT macro
Currently nbl information is getting dumped whenever a nbl is copied or allocated, since OVS_DBG_DEFAULT is set to OVS_DBG_INFO for debug builds, which affects the ovs performance. Instead dump nbl information only when OVS_DBG_DEFAULT is set to OVS_LOG_LOUD Signed-off-by: Anand Kumar <kumaran...@vmware.com> --- datapath-windows/ovsext/BufferMgmt.c | 13 - 1 file changed, 12 insertions(+), 1 deletion(-) diff --git a/datapath-windows/ovsext/BufferMgmt.c b/datapath-windows/ovsext/BufferMgmt.c index 03470d7..448cd76 100644 --- a/datapath-windows/ovsext/BufferMgmt.c +++ b/datapath-windows/ovsext/BufferMgmt.c @@ -273,6 +273,7 @@ OvsInitNBLContext(POVS_BUFFER_CONTEXT ctx, static VOID OvsDumpForwardingDetails(PNET_BUFFER_LIST nbl) { +#if OVS_DBG_DEFAULT >= OVS_DBG_LOUD PNDIS_SWITCH_FORWARDING_DETAIL_NET_BUFFER_LIST_INFO info; info = NET_BUFFER_LIST_SWITCH_FORWARDING_DETAIL(nbl); if (info == NULL) { @@ -284,12 +285,15 @@ OvsDumpForwardingDetails(PNET_BUFFER_LIST nbl) info->SourceNicIndex, info->IsPacketDataSafe ? "TRUE" : "FALSE", info->IsPacketDataSafe ? 0 : info->SafePacketDataSize); - +#else +UNREFERENCED_PARAMETER(nbl); +#endif } static VOID OvsDumpNBLContext(PNET_BUFFER_LIST nbl) { +#if OVS_DBG_DEFAULT >= OVS_DBG_LOUD PNET_BUFFER_LIST_CONTEXT ctx = nbl->Context; if (ctx == NULL) { OVS_LOG_INFO("No Net Buffer List context"); @@ -300,6 +304,9 @@ OvsDumpNBLContext(PNET_BUFFER_LIST nbl) nbl, ctx, ctx->Size, ctx->Offset); ctx = ctx->Next; } +#else +UNREFERENCED_PARAMETER(nbl); +#endif } @@ -337,6 +344,7 @@ OvsDumpNetBuffer(PNET_BUFFER nb) static VOID OvsDumpNetBufferList(PNET_BUFFER_LIST nbl) { +#if OVS_DBG_DEFAULT >= OVS_DBG_LOUD PNET_BUFFER nb; OVS_LOG_INFO("NBL: %p, parent: %p, SrcHandle: %p, ChildCount:%d " "poolHandle: %p", @@ -349,6 +357,9 @@ OvsDumpNetBufferList(PNET_BUFFER_LIST nbl) OvsDumpNetBuffer(nb); nb = NET_BUFFER_NEXT_NB(nb); } +#else +UNREFERENCED_PARAMETER(nbl); +#endif } /* -- 2.9.3.windows.1 ___ dev mailing list d...@openvswitch.org https://mail.openvswitch.org/mailman/listinfo/ovs-dev
Re: [ovs-dev] [PATCH v2] datapath-windows: Support to selectively compile targets
Acked-by: Anand Kumar <kumaran...@vmware.com> Thanks, Anand Kumar On 2/27/18, 11:57 AM, "ovs-dev-boun...@openvswitch.org on behalf of Shashank Ram" <ovs-dev-boun...@openvswitch.org on behalf of r...@vmware.com> wrote: Adds support to selectively compile kernel driver for target versions. This is useful when environments to compile for all targets might not be available on the user's machine, or if the user wants to only compile some targets selectively. Also once appveyor has support to build Win10 targets, we will not pass the "--with-vstudiotargetver" to the configure script. Signed-off-by: Shashank Ram <r...@vmware.com> --- Documentation/intro/install/windows.rst | 5 - Makefile.am | 15 +++ appveyor.yml| 4 ++-- m4/openvswitch.m4 | 28 +++- 4 files changed, 48 insertions(+), 4 deletions(-) diff --git a/Documentation/intro/install/windows.rst b/Documentation/intro/install/windows.rst index 2be4eb5..4c1cd0e 100644 --- a/Documentation/intro/install/windows.rst +++ b/Documentation/intro/install/windows.rst @@ -190,9 +190,12 @@ Finally, to the kernel module also: --sysconfdir="C:/openvswitch/etc" \ --with-pthread="C:/pthread" \ --enable-ssl --with-openssl="C:/OpenSSL-Win32" \ - --with-vstudiotarget="" + --with-vstudiotarget="" \ + --with-vstudiotargetver="" Possible values for are: ``Debug`` and ``Release`` +Possible values for is a comma separated list +of target versions to compile among: ``Win8,Win8.1,Win10`` .. note:: diff --git a/Makefile.am b/Makefile.am index d397f65..e035a98 100644 --- a/Makefile.am +++ b/Makefile.am @@ -411,14 +411,29 @@ if VSTUDIO_DDK ALL_LOCAL += ovsext ARCH = x64 ovsext: datapath-windows/ovsext.sln $(srcdir)/datapath-windows/include/OvsDpInterface.h +if VSTUDIO_WIN8 MSBuild.exe //nologo //maxcpucount datapath-windows/ovsext.sln /target:Build /property:Configuration="Win8$(VSTUDIO_CONFIG)" /property:Version="$(PACKAGE_VERSION)" //p:Platform=$(ARCH) +endif +if VSTUDIO_WIN8_1 MSBuild.exe //nologo //maxcpucount datapath-windows/ovsext.sln /target:Build /property:Configuration="Win8.1$(VSTUDIO_CONFIG)" /property:Version="$(PACKAGE_VERSION)" //p:Platform=$(ARCH) +endif +if VSTUDIO_WIN10 +MSBuild.exe //nologo //maxcpucount datapath-windows/ovsext.sln /target:Build /property:Configuration="Win10$(VSTUDIO_CONFIG)" /property:Version="$(PACKAGE_VERSION)" //p:Platform=$(ARCH) +endif + CLEAN_LOCAL += ovsext_clean ovsext_clean: datapath-windows/ovsext.sln +if VSTUDIO_WIN8 MSBuild.exe //nologo //maxcpucount datapath-windows/ovsext.sln /target:Clean /property:Configuration="Win8$(VSTUDIO_CONFIG)" /property:Version="$(PACKAGE_VERSION)" //p:Platform=$(ARCH) +endif +if VSTUDIO_WIN8_1 MSBuild.exe //nologo //maxcpucount datapath-windows/ovsext.sln /target:Clean /property:Configuration="Win8.1$(VSTUDIO_CONFIG)" /property:Version="$(PACKAGE_VERSION)" //p:Platform=$(ARCH) endif +if VSTUDIO_WIN10 +MSBuild.exe //nologo //maxcpucount datapath-windows/ovsext.sln /target:Clean /property:Configuration="Win10$(VSTUDIO_CONFIG)" /property:Version="$(PACKAGE_VERSION)" //p:Platform=$(ARCH) +endif +endif .PHONY: ovsext clang-analyze: clean diff --git a/appveyor.yml b/appveyor.yml index 0881e05..da31764 100644 --- a/appveyor.yml +++ b/appveyor.yml @@ -1,6 +1,6 @@ version: 1.0.{build} branches: - only: + only: - master clone_folder: C:\openvswitch init: @@ -41,6 +41,6 @@ build_script: - C:\MinGW\msys\1.0\bin\bash -lc "cp /c/pthreads-win32/Pre-built.2/dll/x86/*.dll /c/openvswitch/." - C:\MinGW\msys\1.0\bin\bash -lc "mv /bin/link.exe /bin/link_copy.exe" - C:\MinGW\msys\1.0\bin\bash -lc "cd /c/openvswitch && ./boot.sh" -- C:\MinGW\msys\1.0\bin\bash -lc "cd /c/openvswitch && ./configure CC=build-aux/cccl LD=\"`which link`\" LIBS=\"-lws2_32 -liphlpapi -lwbemuuid -lole32 -loleaut32\" --with-pthread=C:/pthreads-win32/Pre-built.2 --with-openssl=C:/OpenSSL-Win32 --with-vstudiotarget=\"Debug\"" +- C:\MinGW\msys\1.0\bin\bash -lc "cd /c/openvswitch && ./configure CC=build-aux/cccl LD=\"`which link`\" LIBS=\"-lws2_32 -liphlpapi -lwbemuuid -lole32 -loleaut32\" --with-pthread=C:/pthreads
Re: [ovs-dev] [PATCH] datapath-windows: fix hash creation on ct mark
Acked-by: Anand Kumar <kumaran...@vmware.com> Thanks, Anand Kumar On 2/21/18, 6:57 AM, "ovs-dev-boun...@openvswitch.org on behalf of Alin Gabriel Serdean" <ovs-dev-boun...@openvswitch.org on behalf of aserd...@ovn.org> wrote: Use key->ct.mark instead of key->ct.zone when generating the hash over the mark. Signed-off-by: Alin Gabriel Serdean <aserd...@ovn.org> --- datapath-windows/ovsext/Flow.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/datapath-windows/ovsext/Flow.c b/datapath-windows/ovsext/Flow.c index bc42714c2..e64d00b8b 100644 --- a/datapath-windows/ovsext/Flow.c +++ b/datapath-windows/ovsext/Flow.c @@ -2571,7 +2571,7 @@ OvsLookupFlow(OVS_DATAPATH *datapath, *hash = OvsJhashWords((UINT32*)hash, 1, key->ct.zone); } if (key->ct.mark) { -*hash = OvsJhashWords((UINT32*)hash, 1, key->ct.zone); +*hash = OvsJhashWords((UINT32*)hash, 1, key->ct.mark); } if (key->ct.labels.ct_labels) { UINT32 lblHash = OvsJhashBytes(>ct.labels, -- 2.16.1.windows.1 ___ dev mailing list d...@openvswitch.org https://urldefense.proofpoint.com/v2/url?u=https-3A__mail.openvswitch.org_mailman_listinfo_ovs-2Ddev=DwICAg=uilaK90D4TOVoH58JNXRgQ=Q5z9tBe-nAOpE7LIHSPV8uy5-437agMXvkeHHMkR8Us=jef6IDqHh9tL43XOTWwHKdgkCxn1X6N3XtIaWuSxLkc=1HTVWmRloxt1Kzck2vYlu8ZjLpkimoLbkNL0rZopyvc= ___ dev mailing list d...@openvswitch.org https://mail.openvswitch.org/mailman/listinfo/ovs-dev
[ovs-dev] [PATCH] datapath-windows: On Debug builds, dump NBL info based on OVS_DBG_DEFAULT macro
Currently nbl information is getting dumped whenever a nbl is copied or allocated, since OVS_DBG_DEFAULT is set to OVS_DBG_INFO for debug builds, which affects the ovs performance. Instead dump nbl information only when OVS_DBG_DEFAULT is set to OVS_LOG_LOUD Signed-off-by: Anand Kumar <kumaran...@vmware.com> --- datapath-windows/ovsext/BufferMgmt.c | 15 ++- 1 file changed, 14 insertions(+), 1 deletion(-) diff --git a/datapath-windows/ovsext/BufferMgmt.c b/datapath-windows/ovsext/BufferMgmt.c index 03470d7..d51dafd 100644 --- a/datapath-windows/ovsext/BufferMgmt.c +++ b/datapath-windows/ovsext/BufferMgmt.c @@ -412,9 +412,11 @@ OvsAllocateFixSizeNBL(PVOID ovsContext, #ifdef DBG InterlockedIncrement((LONG volatile *)>fixNBLCount); +#if OVS_DBG_DEFAULT >= OVS_DBG_LOUD OvsDumpNetBufferList(nbl); OvsDumpForwardingDetails(nbl); #endif +#endif ctx = (POVS_BUFFER_CONTEXT)NET_BUFFER_LIST_CONTEXT_DATA_START(nbl); ASSERT(ctx); @@ -525,9 +527,11 @@ OvsAllocateVariableSizeNBL(PVOID ovsContext, #ifdef DBG InterlockedIncrement((LONG volatile *)>zeroNBLCount); +#if OVS_DBG_DEFAULT >= OVS_DBG_LOUD OvsDumpNetBufferList(nbl); OvsDumpForwardingDetails(nbl); #endif +#endif ctx = (POVS_BUFFER_CONTEXT)NET_BUFFER_LIST_CONTEXT_DATA_START(nbl); @@ -574,7 +578,9 @@ OvsInitExternalNBLContext(PVOID ovsContext, return NULL; } #ifdef DBG +#if OVS_DBG_DEFAULT >= OVS_DBG_LOUD OvsDumpNBLContext(nbl); +#endif InterlockedIncrement((LONG volatile *)>ovsPool.sysNBLCount); #endif flags = isRecv ? OVS_BUFFER_RECV_BUFFER : OVS_BUFFER_SEND_BUFFER; @@ -809,12 +815,14 @@ OvsPartialCopyNBL(PVOID ovsContext, InterlockedIncrement((LONG volatile *)>refCount); #ifdef DBG +#if OVS_DBG_DEFAULT >= OVS_DBG_LOUD OvsDumpNetBufferList(nbl); OvsDumpForwardingDetails(nbl); OvsDumpNetBufferList(newNbl); OvsDumpForwardingDetails(newNbl); #endif +#endif OVS_LOG_LOUD("Partial Copy new NBL: %p", newNbl); return newNbl; @@ -942,9 +950,11 @@ OvsCopySinglePacketNBL(PVOID ovsContext, dstCtx->flags |= srcCtx->flags & (OVS_BUFFER_RECV_BUFFER | OVS_BUFFER_SEND_BUFFER); #ifdef DBG +#if OVS_DBG_DEFAULT >= OVS_DBG_LOUD OvsDumpNetBufferList(newNbl); OvsDumpForwardingDetails(newNbl); #endif +#endif OVS_LOG_LOUD("Copy single nb to new NBL: %p", newNbl); return newNbl; } @@ -1064,8 +1074,10 @@ OvsFullCopyNBL(PVOID ovsContext, OVS_DPPORT_NUMBER_INVALID); #ifdef DBG +#if OVS_DBG_DEFAULT >= OVS_DBG_LOUD OvsDumpNetBufferList(nbl); OvsDumpForwardingDetails(nbl); +#endif InterlockedIncrement((LONG volatile *)>nblOnlyCount); #endif OVS_LOG_LOUD("newNbl: %p", newNbl); @@ -1466,13 +1478,14 @@ OvsFragmentNBL(PVOID ovsContext, InterlockedIncrement((LONG volatile *)>refCount); #ifdef DBG InterlockedIncrement((LONG volatile *)>fragNBLCount); - +#if OVS_DBG_DEFAULT >= OVS_DBG_LOUD OvsDumpNetBufferList(nbl); OvsDumpForwardingDetails(nbl); OvsDumpNetBufferList(newNbl); OvsDumpForwardingDetails(newNbl); #endif +#endif OVS_LOG_TRACE("Fragment nbl %p to newNbl: %p", nbl, newNbl); return newNbl; -- 2.9.3.windows.1 ___ dev mailing list d...@openvswitch.org https://mail.openvswitch.org/mailman/listinfo/ovs-dev
Re: [ovs-dev] [PATCH] datapath-windows: Support to selectively compile targets
Acked-by: Anand Kumar <kumaran...@vmware.com> Thanks, Anand Kumar From: Shashank Ram <r...@vmware.com> Sent: Tuesday, February 6, 2018 1:29 PM To: d...@openvswitch.org Cc: Shashank Ram Subject: [PATCH] datapath-windows: Support to selectively compile targets Adds support to selectively compile kernel driver for target versions. This is useful when environments to compile for all targets might not be available on the user's machine, or if the user wants to only compile some targets selectively. Also once appveyor has support to build Win10 targets, we will not pass the "--with-vstudiotargetver" to the configure script. Signed-off-by: Shashank Ram <r...@vmware.com> --- Makefile.am | 15 +++ appveyor.yml | 4 ++-- m4/openvswitch.m4 | 28 +++- 3 files changed, 44 insertions(+), 3 deletions(-) diff --git a/Makefile.am b/Makefile.am index d397f65..e035a98 100644 --- a/Makefile.am +++ b/Makefile.am @@ -411,14 +411,29 @@ if VSTUDIO_DDK ALL_LOCAL += ovsext ARCH = x64 ovsext: datapath-windows/ovsext.sln $(srcdir)/datapath-windows/include/OvsDpInterface.h +if VSTUDIO_WIN8 MSBuild.exe //nologo //maxcpucount datapath-windows/ovsext.sln /target:Build /property:Configuration="Win8$(VSTUDIO_CONFIG)" /property:Version="$(PACKAGE_VERSION)" //p:Platform=$(ARCH) +endif +if VSTUDIO_WIN8_1 MSBuild.exe //nologo //maxcpucount datapath-windows/ovsext.sln /target:Build /property:Configuration="Win8.1$(VSTUDIO_CONFIG)" /property:Version="$(PACKAGE_VERSION)" //p:Platform=$(ARCH) +endif +if VSTUDIO_WIN10 +MSBuild.exe //nologo //maxcpucount datapath-windows/ovsext.sln /target:Build /property:Configuration="Win10$(VSTUDIO_CONFIG)" /property:Version="$(PACKAGE_VERSION)" //p:Platform=$(ARCH) +endif + CLEAN_LOCAL += ovsext_clean ovsext_clean: datapath-windows/ovsext.sln +if VSTUDIO_WIN8 MSBuild.exe //nologo //maxcpucount datapath-windows/ovsext.sln /target:Clean /property:Configuration="Win8$(VSTUDIO_CONFIG)" /property:Version="$(PACKAGE_VERSION)" //p:Platform=$(ARCH) +endif +if VSTUDIO_WIN8_1 MSBuild.exe //nologo //maxcpucount datapath-windows/ovsext.sln /target:Clean /property:Configuration="Win8.1$(VSTUDIO_CONFIG)" /property:Version="$(PACKAGE_VERSION)" //p:Platform=$(ARCH) endif +if VSTUDIO_WIN10 +MSBuild.exe //nologo //maxcpucount datapath-windows/ovsext.sln /target:Clean /property:Configuration="Win10$(VSTUDIO_CONFIG)" /property:Version="$(PACKAGE_VERSION)" //p:Platform=$(ARCH) +endif +endif .PHONY: ovsext clang-analyze: clean diff --git a/appveyor.yml b/appveyor.yml index 0881e05..da31764 100644 --- a/appveyor.yml +++ b/appveyor.yml @@ -1,6 +1,6 @@ version: 1.0.{build} branches: - only: + only: - master clone_folder: C:\openvswitch init: @@ -41,6 +41,6 @@ build_script: - C:\MinGW\msys\1.0\bin\bash -lc "cp /c/pthreads-win32/Pre-built.2/dll/x86/*.dll /c/openvswitch/." - C:\MinGW\msys\1.0\bin\bash -lc "mv /bin/link.exe /bin/link_copy.exe" - C:\MinGW\msys\1.0\bin\bash -lc "cd /c/openvswitch && ./boot.sh" -- C:\MinGW\msys\1.0\bin\bash -lc "cd /c/openvswitch && ./configure CC=build-aux/cccl LD=\"`which link`\" LIBS=\"-lws2_32 -liphlpapi -lwbemuuid -lole32 -loleaut32\" --with-pthread=C:/pthreads-win32/Pre-built.2 --with-openssl=C:/OpenSSL-Win32 --with-vstudiotarget=\"Debug\"" +- C:\MinGW\msys\1.0\bin\bash -lc "cd /c/openvswitch && ./configure CC=build-aux/cccl LD=\"`which link`\" LIBS=\"-lws2_32 -liphlpapi -lwbemuuid -lole32 -loleaut32\" --with-pthread=C:/pthreads-win32/Pre-built.2 --with-openssl=C:/OpenSSL-Win32 --with-vstudiotarget=\"Debug\" --with-vstudiotargetver=\"Win8,Win8.1\"" - C:\MinGW\msys\1.0\bin\bash -lc "cd /c/openvswitch && make" - C:\MinGW\msys\1.0\bin\bash -lc "cd /c/openvswitch && make datapath_windows_analyze" diff --git a/m4/openvswitch.m4 b/m4/openvswitch.m4 index de4d66c..78082d4 100644 --- a/m4/openvswitch.m4 +++ b/m4/op
Re: [ovs-dev] [PATCH] datapath-windows: Support to selectively compile targets
Hi, My thoughts are with Shashank on this, it makes sense to have 1 configure and 1 make command to build a particular target, instead of having flexibility to specify multiple targets. Thanks, Anand Kumar On 2/8/18, 10:56 AM, "ovs-dev-boun...@openvswitch.org on behalf of Shashank Ram" <ovs-dev-boun...@openvswitch.org on behalf of r...@vmware.com> wrote: From: aserd...@ovn.org <aserd...@ovn.org> Sent: Thursday, February 8, 2018 10:43 AM To: Shashank Ram; aserd...@ovn.org; d...@openvswitch.org Subject: RE: [ovs-dev] [PATCH] datapath-windows: Support to selectively compile targets Trimming the message a bit. -Mesaj original- De la: ovs-dev-boun...@openvswitch.org [mailto:ovs-dev-boun...@openvswitch.org] În numele Shashank Ram Trimis: Thursday, February 8, 2018 7:50 PM Către: aserd...@ovn.org; d...@openvswitch.org Subiect: Re: [ovs-dev] [PATCH] datapath-windows: Support to selectively compile targets Hi Alin, thanks for the review. I personally feel we should be consistent and run configure, and have a single make command to build both user space and kernel. What part did you find complicated? [Alin Serdean] I.e. if I configure to target 8. And after I need to target 10 I need to do a reconfigure (similar, for debug and or other platforms). [SR]: In an automated environment, this shouldn't happen. For local compilation, you should be able to manually compile the kernel. The configure part is particularly slow on Windows. For convenience the old part with selecting Debug/Release and trying to build for all the compilers found in the system is still there, so building both userspace and kernel will still be in a single command. I don't see a huge issue to specify two or more make commands to build a particular target of the kernel via the shell. [SR]: I don't think its a big deal either, but its more convenient to run 1 configure and 1 make command. I prefer to keep this as is for now and wait for more reviews. ___ dev mailing list d...@openvswitch.org https://urldefense.proofpoint.com/v2/url?u=https-3A__mail.openvswitch.org_mailman_listinfo_ovs-2Ddev=DwIFBA=uilaK90D4TOVoH58JNXRgQ=Q5z9tBe-nAOpE7LIHSPV8uy5-437agMXvkeHHMkR8Us=fKK6KZRD0tZEfwHzLhMsabCH5aXzzYiRP-pJR20Xj9o=nEl_7Q-LhJ74AdsiY85DjA-kWy0uESr5DyFrWDQYKjs= ___ dev mailing list d...@openvswitch.org https://mail.openvswitch.org/mailman/listinfo/ovs-dev
[ovs-dev] [PATCH v2] datapath-windows: Add trace level logs in conntrack for invalid ct state.
Signed-off-by: Anand Kumar <kumaran...@vmware.com> --- datapath-windows/ovsext/Conntrack-icmp.c | 1 + datapath-windows/ovsext/Conntrack-tcp.c | 4 datapath-windows/ovsext/Conntrack.c | 6 ++ 3 files changed, 11 insertions(+) diff --git a/datapath-windows/ovsext/Conntrack-icmp.c b/datapath-windows/ovsext/Conntrack-icmp.c index 4da0665..28fe2bf 100644 --- a/datapath-windows/ovsext/Conntrack-icmp.c +++ b/datapath-windows/ovsext/Conntrack-icmp.c @@ -61,6 +61,7 @@ BOOLEAN OvsConntrackValidateIcmpPacket(const ICMPHdr *icmp) { if (!icmp) { +OVS_LOG_TRACE("Invalid ICMP packet detected, header cannot be NULL"); return FALSE; } diff --git a/datapath-windows/ovsext/Conntrack-tcp.c b/datapath-windows/ovsext/Conntrack-tcp.c index f8e85a2..8cbab24 100644 --- a/datapath-windows/ovsext/Conntrack-tcp.c +++ b/datapath-windows/ovsext/Conntrack-tcp.c @@ -444,12 +444,14 @@ BOOLEAN OvsConntrackValidateTcpPacket(const TCPHdr *tcp) { if (!tcp) { +OVS_LOG_TRACE("Invalid TCP packet detected, header cannot be NULL"); return FALSE; } UINT16 tcp_flags = ntohs(tcp->flags); if (OvsCtInvalidTcpFlags(tcp_flags)) { +OVS_LOG_TRACE("Invalid TCP packet detected, tcp_flags %hu", tcp_flags); return FALSE; } @@ -457,6 +459,8 @@ OvsConntrackValidateTcpPacket(const TCPHdr *tcp) * totally new connections (syn) or already established, not partially * open (syn+ack). */ if ((tcp_flags & TCP_SYN) && (tcp_flags & TCP_ACK)) { +OVS_LOG_TRACE("Invalid TCP packet detected, SYN+ACK flags not allowed," + "tcp_flags %hu", tcp_flags); return FALSE; } diff --git a/datapath-windows/ovsext/Conntrack.c b/datapath-windows/ovsext/Conntrack.c index 43c9dd3..678bedb 100644 --- a/datapath-windows/ovsext/Conntrack.c +++ b/datapath-windows/ovsext/Conntrack.c @@ -317,6 +317,10 @@ OvsCtEntryCreate(OvsForwardingContext *fwdCtx, const ICMPHdr *icmp; icmp = OvsGetIcmp(curNbl, l4Offset, ); if (!OvsConntrackValidateIcmpPacket(icmp)) { +if(icmp) { +OVS_LOG_TRACE("Invalid ICMP packet detected, icmp->type %u", + icmp->type); +} state = OVS_CS_F_INVALID; break; } @@ -334,6 +338,8 @@ OvsCtEntryCreate(OvsForwardingContext *fwdCtx, break; } default: +OVS_LOG_TRACE("Invalid packet detected, protocol not supported" + " ipProto %u", ipProto); state = OVS_CS_F_INVALID; break; } -- 2.9.3.windows.1 ___ dev mailing list d...@openvswitch.org https://mail.openvswitch.org/mailman/listinfo/ovs-dev
Re: [ovs-dev] [PATCH] datapath-windows: Add trace level logs in conntrack for invalid ct state.
Hi Alin, Thanks for the quick review. I will expand log messages to “Invalid XXX packet detected”. I’m not sure if I follow your comment about “s/syn/SYN/g”. Do you want me to change it to Uppercase? Thanks, Anand Kumar On 2/2/18, 2:06 PM, "Alin Serdean" <aserd...@cloudbasesolutions.com> wrote: Looks good just a small nit: s/syn/SYN/g s/ack/ACK/g Also I would prefer if you drop the text `Invalid!`. Either just remove it or expand it, i.e.: "Invalid! ICMPhdr cannot be NULL" => "Invalid ICMP packet detected the header cannot be NULL" Thanks, Alin. -Mesaj original- De la: ovs-dev-boun...@openvswitch.org [mailto:ovs-dev-boun...@openvswitch.org] În numele Anand Kumar Trimis: Friday, February 2, 2018 11:19 PM Către: d...@openvswitch.org Subiect: [ovs-dev] [PATCH] datapath-windows: Add trace level logs in conntrack for invalid ct state. Signed-off-by: Anand Kumar <kumaran...@vmware.com> --- datapath-windows/ovsext/Conntrack-icmp.c | 1 + datapath-windows/ovsext/Conntrack-tcp.c | 4 datapath-windows/ovsext/Conntrack.c | 4 3 files changed, 9 insertions(+) diff --git a/datapath-windows/ovsext/Conntrack-icmp.c b/datapath-windows/ovsext/Conntrack-icmp.c index 4da0665..d86feed 100644 --- a/datapath-windows/ovsext/Conntrack-icmp.c +++ b/datapath-windows/ovsext/Conntrack-icmp.c @@ -61,6 +61,7 @@ BOOLEAN OvsConntrackValidateIcmpPacket(const ICMPHdr *icmp) { if (!icmp) { +OVS_LOG_TRACE("Invalid! ICMPhdr cannot be NULL"); return FALSE; } diff --git a/datapath-windows/ovsext/Conntrack-tcp.c b/datapath-windows/ovsext/Conntrack-tcp.c index f8e85a2..65eaac5 100644 --- a/datapath-windows/ovsext/Conntrack-tcp.c +++ b/datapath-windows/ovsext/Conntrack-tcp.c @@ -444,12 +444,14 @@ BOOLEAN OvsConntrackValidateTcpPacket(const TCPHdr *tcp) { if (!tcp) { +OVS_LOG_TRACE("Invalid! TCPHdr cannot be NULL"); return FALSE; } UINT16 tcp_flags = ntohs(tcp->flags); if (OvsCtInvalidTcpFlags(tcp_flags)) { +OVS_LOG_TRACE("Invalid! tcp_flags %hu", tcp_flags); return FALSE; } @@ -457,6 +459,8 @@ OvsConntrackValidateTcpPacket(const TCPHdr *tcp) * totally new connections (syn) or already established, not partially * open (syn+ack). */ if ((tcp_flags & TCP_SYN) && (tcp_flags & TCP_ACK)) { +OVS_LOG_TRACE("Invalid! syn+ack flags not allowed, tcp_flags %hu", + tcp_flags); return FALSE; } diff --git a/datapath-windows/ovsext/Conntrack.c b/datapath-windows/ovsext/Conntrack.c index 43c9dd3..7e413c6 100644 --- a/datapath-windows/ovsext/Conntrack.c +++ b/datapath-windows/ovsext/Conntrack.c @@ -317,6 +317,9 @@ OvsCtEntryCreate(OvsForwardingContext *fwdCtx, const ICMPHdr *icmp; icmp = OvsGetIcmp(curNbl, l4Offset, ); if (!OvsConntrackValidateIcmpPacket(icmp)) { +if(icmp) { +OVS_LOG_TRACE("Invalid! icmp->type %u", icmp->type); +} state = OVS_CS_F_INVALID; break; } @@ -334,6 +337,7 @@ OvsCtEntryCreate(OvsForwardingContext *fwdCtx, break; } default: +OVS_LOG_TRACE("Invalid! Not supported protocol, ipProto %u", + ipProto); state = OVS_CS_F_INVALID; break; } -- 2.9.3.windows.1 ___ dev mailing list d...@openvswitch.org https://urldefense.proofpoint.com/v2/url?u=https-3A__mail.openvswitch.org_mailman_listinfo_ovs-2Ddev=DwIFBA=uilaK90D4TOVoH58JNXRgQ=Q5z9tBe-nAOpE7LIHSPV8uy5-437agMXvkeHHMkR8Us=yuxpORhg-xij1o9VvWANs9QOyywwfr7YO_EDT-QvqQ4=ainXigtUgyesCYog0X4639gDSFv1mrY4OTsctKNaH0M= ___ dev mailing list d...@openvswitch.org https://mail.openvswitch.org/mailman/listinfo/ovs-dev
[ovs-dev] [PATCH] datapath-windows: Add trace level logs in conntrack for invalid ct state.
Signed-off-by: Anand Kumar <kumaran...@vmware.com> --- datapath-windows/ovsext/Conntrack-icmp.c | 1 + datapath-windows/ovsext/Conntrack-tcp.c | 4 datapath-windows/ovsext/Conntrack.c | 4 3 files changed, 9 insertions(+) diff --git a/datapath-windows/ovsext/Conntrack-icmp.c b/datapath-windows/ovsext/Conntrack-icmp.c index 4da0665..d86feed 100644 --- a/datapath-windows/ovsext/Conntrack-icmp.c +++ b/datapath-windows/ovsext/Conntrack-icmp.c @@ -61,6 +61,7 @@ BOOLEAN OvsConntrackValidateIcmpPacket(const ICMPHdr *icmp) { if (!icmp) { +OVS_LOG_TRACE("Invalid! ICMPhdr cannot be NULL"); return FALSE; } diff --git a/datapath-windows/ovsext/Conntrack-tcp.c b/datapath-windows/ovsext/Conntrack-tcp.c index f8e85a2..65eaac5 100644 --- a/datapath-windows/ovsext/Conntrack-tcp.c +++ b/datapath-windows/ovsext/Conntrack-tcp.c @@ -444,12 +444,14 @@ BOOLEAN OvsConntrackValidateTcpPacket(const TCPHdr *tcp) { if (!tcp) { +OVS_LOG_TRACE("Invalid! TCPHdr cannot be NULL"); return FALSE; } UINT16 tcp_flags = ntohs(tcp->flags); if (OvsCtInvalidTcpFlags(tcp_flags)) { +OVS_LOG_TRACE("Invalid! tcp_flags %hu", tcp_flags); return FALSE; } @@ -457,6 +459,8 @@ OvsConntrackValidateTcpPacket(const TCPHdr *tcp) * totally new connections (syn) or already established, not partially * open (syn+ack). */ if ((tcp_flags & TCP_SYN) && (tcp_flags & TCP_ACK)) { +OVS_LOG_TRACE("Invalid! syn+ack flags not allowed, tcp_flags %hu", + tcp_flags); return FALSE; } diff --git a/datapath-windows/ovsext/Conntrack.c b/datapath-windows/ovsext/Conntrack.c index 43c9dd3..7e413c6 100644 --- a/datapath-windows/ovsext/Conntrack.c +++ b/datapath-windows/ovsext/Conntrack.c @@ -317,6 +317,9 @@ OvsCtEntryCreate(OvsForwardingContext *fwdCtx, const ICMPHdr *icmp; icmp = OvsGetIcmp(curNbl, l4Offset, ); if (!OvsConntrackValidateIcmpPacket(icmp)) { +if(icmp) { +OVS_LOG_TRACE("Invalid! icmp->type %u", icmp->type); +} state = OVS_CS_F_INVALID; break; } @@ -334,6 +337,7 @@ OvsCtEntryCreate(OvsForwardingContext *fwdCtx, break; } default: +OVS_LOG_TRACE("Invalid! Not supported protocol, ipProto %u", ipProto); state = OVS_CS_F_INVALID; break; } -- 2.9.3.windows.1 ___ dev mailing list d...@openvswitch.org https://mail.openvswitch.org/mailman/listinfo/ovs-dev
Re: [ovs-dev] [PATCH v3 3/3] datapath-windows: Optimize conntrack lock implementation.
Hi Alin, Thanks for the review. I’ll address the warning and send out the patch. Thanks, Anand Kumar On 1/29/18, 5:20 AM, "Alin Serdean" <aserd...@cloudbasesolutions.com> wrote: Trimming the patch a bit. Just one small nit from the static analyzer inlined. Rest looks good. Acked-by: Alin Gabriel Serdean <aserd...@ovn.org> <--8<--> /* @@ -124,12 +135,9 @@ OvsInitConntrack(POVS_SWITCH_CONTEXT context) VOID OvsCleanupConntrack(VOID) { -LOCK_STATE_EX lockState, lockStateNat; -NdisAcquireRWLockWrite(ovsConntrackLockObj, , 0); +LOCK_STATE_EX lockStateNat; ctThreadCtx.exit = 1; KeSetEvent(, 0, FALSE); -NdisReleaseRWLock(ovsConntrackLockObj, ); - KeWaitForSingleObject(ctThreadCtx.threadObject, Executive, KernelMode, FALSE, NULL); ObDereferenceObject(ctThreadCtx.threadObject); @@ -142,8 +150,14 @@ OvsCleanupConntrack(VOID) ovsConntrackTable = NULL; } -NdisFreeRWLock(ovsConntrackLockObj); -ovsConntrackLockObj = NULL; +for (UINT32 i = 0; i < CT_HASH_TABLE_SIZE; i++) { +if (ovsCtBucketLock[i] != NULL) { [Alin Serdean] datapath-windows\ovsext\conntrack.c(154): warning C6001: Using uninitialized memory '*ovsCtBucketLock'. +NdisFreeRWLock(ovsCtBucketLock[i]); +} +} +OvsFreeMemoryWithTag(ovsCtBucketLock, OVS_CT_POOL_TAG); +ovsCtBucketLock = NULL; + NdisAcquireRWLockWrite(ovsCtNatLockObj, , 0); OvsNatCleanup(); NdisReleaseRWLock(ovsCtNatLockObj, ); @@ -179,11 +193,20 @@ OvsCtUpdateFlowKey(struct OvsFlowKey *key, } } <--8<--> ___ dev mailing list d...@openvswitch.org https://mail.openvswitch.org/mailman/listinfo/ovs-dev
[ovs-dev] [PATCH v4 3/3] datapath-windows: Optimize conntrack lock implementation.
Currently, there is one global lock for conntrack module, which protects conntrack entries and conntrack table. All the NAT operations are performed holding this lock. This becomes inefficient, as the number of conntrack entries grow. With new implementation, we will have two PNDIS_RW_LOCK_EX locks in conntrack. 1. ovsCtBucketLock - one rw lock per bucket of the conntrack table, which is shared by all the ct entries that belong to the same bucket. 2. lock - a rw lock in OVS_CT_ENTRY structure that protects the members of conntrack entry. Also, OVS_CT_ENTRY structure will have a lock reference(bucketLockRef) to the corresponding OvsCtBucketLock of conntrack table. We need this reference to retrieve ovsCtBucketLock from ct entry for delete operation. Signed-off-by: Anand Kumar <kumaran...@vmware.com> --- v1->v2: Address potential memory leak in conntrack initialization. v2->v3: Fix invalid memory access after deleting ct entry. v3->v4: Address warning "uninitialized memory" --- datapath-windows/ovsext/Conntrack-nat.c | 6 + datapath-windows/ovsext/Conntrack.c | 233 datapath-windows/ovsext/Conntrack.h | 3 + 3 files changed, 157 insertions(+), 85 deletions(-) diff --git a/datapath-windows/ovsext/Conntrack-nat.c b/datapath-windows/ovsext/Conntrack-nat.c index 7975770..316c946 100644 --- a/datapath-windows/ovsext/Conntrack-nat.c +++ b/datapath-windows/ovsext/Conntrack-nat.c @@ -167,12 +167,16 @@ OvsNatPacket(OvsForwardingContext *ovsFwdCtx, { UINT32 natFlag; const struct ct_endpoint* endpoint; +LOCK_STATE_EX lockState; +/* XXX: Move conntrack locks out of NAT after implementing lock in NAT. */ +NdisAcquireRWLockRead(entry->lock, , 0); /* When it is NAT, only entry->rev_key contains NATTED address; When it is unNAT, only entry->key contains the UNNATTED address;*/ const OVS_CT_KEY *ctKey = reverse ? >key : >rev_key; BOOLEAN isSrcNat; if (!(natAction & (NAT_ACTION_SRC | NAT_ACTION_DST))) { +NdisReleaseRWLock(entry->lock, ); return; } isSrcNat = (((natAction & NAT_ACTION_SRC) && !reverse) || @@ -202,6 +206,7 @@ OvsNatPacket(OvsForwardingContext *ovsFwdCtx, } } else if (ctKey->dl_type == htons(ETH_TYPE_IPV6)){ // XXX: IPv6 packet not supported yet. +NdisReleaseRWLock(entry->lock, ); return; } if (natAction & (NAT_ACTION_SRC_PORT | NAT_ACTION_DST_PORT)) { @@ -215,6 +220,7 @@ OvsNatPacket(OvsForwardingContext *ovsFwdCtx, } } } +NdisReleaseRWLock(entry->lock, ); } diff --git a/datapath-windows/ovsext/Conntrack.c b/datapath-windows/ovsext/Conntrack.c index 7d56a50..c90c000 100644 --- a/datapath-windows/ovsext/Conntrack.c +++ b/datapath-windows/ovsext/Conntrack.c @@ -31,7 +31,7 @@ KSTART_ROUTINE OvsConntrackEntryCleaner; static PLIST_ENTRY ovsConntrackTable; static OVS_CT_THREAD_CTX ctThreadCtx; -static PNDIS_RW_LOCK_EX ovsConntrackLockObj; +static PNDIS_RW_LOCK_EX *ovsCtBucketLock = NULL; static PNDIS_RW_LOCK_EX ovsCtNatLockObj; extern POVS_SWITCH_CONTEXT gOvsSwitchContext; static LONG ctTotalEntries; @@ -49,20 +49,14 @@ MapNlToCtTuple(POVS_MESSAGE msgIn, PNL_ATTR attr, NTSTATUS OvsInitConntrack(POVS_SWITCH_CONTEXT context) { -NTSTATUS status; +NTSTATUS status = STATUS_SUCCESS; HANDLE threadHandle = NULL; ctTotalEntries = 0; +UINT32 numBucketLocks = CT_HASH_TABLE_SIZE; /* Init the sync-lock */ -ovsConntrackLockObj = NdisAllocateRWLock(context->NdisFilterHandle); -if (ovsConntrackLockObj == NULL) { -return STATUS_INSUFFICIENT_RESOURCES; -} - ovsCtNatLockObj = NdisAllocateRWLock(context->NdisFilterHandle); if (ovsCtNatLockObj == NULL) { -NdisFreeRWLock(ovsConntrackLockObj); -ovsConntrackLockObj = NULL; return STATUS_INSUFFICIENT_RESOURCES; } @@ -71,15 +65,27 @@ OvsInitConntrack(POVS_SWITCH_CONTEXT context) * CT_HASH_TABLE_SIZE, OVS_CT_POOL_TAG); if (ovsConntrackTable == NULL) { -NdisFreeRWLock(ovsConntrackLockObj); -ovsConntrackLockObj = NULL; NdisFreeRWLock(ovsCtNatLockObj); ovsCtNatLockObj = NULL; return STATUS_INSUFFICIENT_RESOURCES; } -for (int i = 0; i < CT_HASH_TABLE_SIZE; i++) { +ovsCtBucketLock = OvsAllocateMemoryWithTag(sizeof(PNDIS_RW_LOCK_EX) + * CT_HASH_TABLE_SIZE, + OVS_CT_POOL_TAG); +if (ovsCtBucketLock == NULL) { +status = STATUS_INSUFFICIENT_RESOURCES; +goto freeTable; +} + +for (UINT32 i = 0; i < CT_HASH_TABLE_SIZE; i++) { InitializeListHead([i]); +ovsCtBucketLock[i] = NdisAllocateRWLock(contex
[ovs-dev] [PATCH v4 1/3] datapath-windows: Refactor conntrack code.
Some of the functions and code are refactored so that new conntrack lock can be implemented Signed-off-by: Anand Kumar <kumaran...@vmware.com> Acked-by: Alin Gabriel Serdean <aserd...@ovn.org> --- datapath-windows/ovsext/Conntrack-nat.c | 11 +- datapath-windows/ovsext/Conntrack.c | 174 ++-- datapath-windows/ovsext/Conntrack.h | 4 - 3 files changed, 103 insertions(+), 86 deletions(-) diff --git a/datapath-windows/ovsext/Conntrack-nat.c b/datapath-windows/ovsext/Conntrack-nat.c index c778f12..7975770 100644 --- a/datapath-windows/ovsext/Conntrack-nat.c +++ b/datapath-windows/ovsext/Conntrack-nat.c @@ -93,26 +93,23 @@ NTSTATUS OvsNatInit() sizeof(LIST_ENTRY) * NAT_HASH_TABLE_SIZE, OVS_CT_POOL_TAG); if (ovsNatTable == NULL) { -goto failNoMem; +return STATUS_INSUFFICIENT_RESOURCES; } ovsUnNatTable = OvsAllocateMemoryWithTag( sizeof(LIST_ENTRY) * NAT_HASH_TABLE_SIZE, OVS_CT_POOL_TAG); if (ovsUnNatTable == NULL) { -goto freeNatTable; +OvsFreeMemoryWithTag(ovsNatTable, OVS_CT_POOL_TAG); +return STATUS_INSUFFICIENT_RESOURCES; } for (int i = 0; i < NAT_HASH_TABLE_SIZE; i++) { InitializeListHead([i]); InitializeListHead([i]); } -return STATUS_SUCCESS; -freeNatTable: -OvsFreeMemoryWithTag(ovsNatTable, OVS_CT_POOL_TAG); -failNoMem: -return STATUS_INSUFFICIENT_RESOURCES; +return STATUS_SUCCESS; } /* diff --git a/datapath-windows/ovsext/Conntrack.c b/datapath-windows/ovsext/Conntrack.c index 169ec4f..3cde836 100644 --- a/datapath-windows/ovsext/Conntrack.c +++ b/datapath-windows/ovsext/Conntrack.c @@ -33,7 +33,7 @@ static PLIST_ENTRY ovsConntrackTable; static OVS_CT_THREAD_CTX ctThreadCtx; static PNDIS_RW_LOCK_EX ovsConntrackLockObj; extern POVS_SWITCH_CONTEXT gOvsSwitchContext; -static UINT64 ctTotalEntries; +static LONG ctTotalEntries; static __inline OvsCtFlush(UINT16 zone, struct ovs_key_ct_tuple_ipv4 *tuple); static __inline NDIS_STATUS @@ -212,7 +212,7 @@ OvsCtAddEntry(POVS_CT_ENTRY entry, OvsConntrackKeyLookupCtx *ctx, InsertHeadList([ctx->hash & CT_HASH_TABLE_MASK], >link); -ctTotalEntries++; +InterlockedIncrement((LONG volatile *)); return TRUE; } @@ -235,11 +235,6 @@ OvsCtEntryCreate(OvsForwardingContext *fwdCtx, *entryCreated = FALSE; state |= OVS_CS_F_NEW; -parentEntry = OvsCtRelatedLookup(ctx->key, currentTime); -if (parentEntry != NULL) { -state |= OVS_CS_F_RELATED; -} - switch (ipProto) { case IPPROTO_TCP: { @@ -283,6 +278,11 @@ OvsCtEntryCreate(OvsForwardingContext *fwdCtx, break; } +parentEntry = OvsCtRelatedLookup(ctx->key, currentTime); +if (parentEntry != NULL && state != OVS_CS_F_INVALID) { +state |= OVS_CS_F_RELATED; +} + if (state != OVS_CS_F_INVALID && commit) { if (entry) { entry->parent = parentEntry; @@ -315,6 +315,7 @@ OvsCtUpdateEntry(OVS_CT_ENTRY* entry, BOOLEAN reply, UINT64 now) { +CT_UPDATE_RES status; switch (ipProto) { case IPPROTO_TCP: { @@ -322,32 +323,23 @@ OvsCtUpdateEntry(OVS_CT_ENTRY* entry, const TCPHdr *tcp; tcp = OvsGetTcp(nbl, l4Offset, ); if (!tcp) { -return CT_UPDATE_INVALID; +status = CT_UPDATE_INVALID; +break; } -return OvsConntrackUpdateTcpEntry(entry, tcp, nbl, reply, now); +status = OvsConntrackUpdateTcpEntry(entry, tcp, nbl, reply, now); +break; } case IPPROTO_ICMP: -return OvsConntrackUpdateIcmpEntry(entry, reply, now); +status = OvsConntrackUpdateIcmpEntry(entry, reply, now); +break; case IPPROTO_UDP: -return OvsConntrackUpdateOtherEntry(entry, reply, now); +status = OvsConntrackUpdateOtherEntry(entry, reply, now); +break; default: -return CT_UPDATE_INVALID; -} -} - -static __inline VOID -OvsCtEntryDelete(POVS_CT_ENTRY entry) -{ -if (entry == NULL) { -return; -} -if (entry->natInfo.natAction) { -OvsNatDeleteKey(>key); +status = CT_UPDATE_INVALID; +break; } -OvsPostCtEventEntry(entry, OVS_EVENT_CT_DELETE); -RemoveEntryList(>link); -OvsFreeMemoryWithTag(entry, OVS_CT_POOL_TAG); -ctTotalEntries--; +return status; } static __inline BOOLEAN @@ -358,6 +350,24 @@ OvsCtEntryExpired(POVS_CT_ENTRY entry) return entry->expiration < currentTime; } +static __inline VOID +OvsCtEntryDelete(POVS_CT_ENTRY entry, BOOLEAN forceDelete) +{ +if (entry == NULL) { +return; +} +if (forceDelete || OvsCtEntryExpired(entry)) { +if (entry->natInfo.natAction) { +OvsNatDeleteKey(>key); +} +OvsPostC
[ovs-dev] [PATCH v4 0/3] datapath-windows: New lock implementation in conntrack
This patch series replaces existing one RW lock implemenation in conntrack with two RW locks in conntrack and one RW lock in NAT. --- v1->v2: - Patch 3, address review comments v2->v3: - Patch 3, fix invalid memory access after deleting ct entry v3->v4: - Patch 3, address static analyzer warning message --- Anand Kumar (3): datapath-windows: Refactor conntrack code. datapath-windows: Add a global level RW lock for NAT datapath-windows: Optimize conntrack lock implementation. datapath-windows/ovsext/Conntrack-nat.c | 17 +- datapath-windows/ovsext/Conntrack.c | 413 datapath-windows/ovsext/Conntrack.h | 7 +- 3 files changed, 279 insertions(+), 158 deletions(-) -- 2.9.3.windows.1 ___ dev mailing list d...@openvswitch.org https://mail.openvswitch.org/mailman/listinfo/ovs-dev
[ovs-dev] [PATCH v4 2/3] datapath-windows: Add a global level RW lock for NAT
Currently NAT module relies on the existing conntrack lock. This patch provides a basic lock implementation for NAT module in conntrack. Signed-off-by: Anand Kumar <kumaran...@vmware.com> Acked-by: Alin Gabriel Serdean <aserd...@ovn.org> --- datapath-windows/ovsext/Conntrack.c | 36 ++-- 1 file changed, 34 insertions(+), 2 deletions(-) diff --git a/datapath-windows/ovsext/Conntrack.c b/datapath-windows/ovsext/Conntrack.c index 3cde836..7d56a50 100644 --- a/datapath-windows/ovsext/Conntrack.c +++ b/datapath-windows/ovsext/Conntrack.c @@ -32,6 +32,7 @@ KSTART_ROUTINE OvsConntrackEntryCleaner; static PLIST_ENTRY ovsConntrackTable; static OVS_CT_THREAD_CTX ctThreadCtx; static PNDIS_RW_LOCK_EX ovsConntrackLockObj; +static PNDIS_RW_LOCK_EX ovsCtNatLockObj; extern POVS_SWITCH_CONTEXT gOvsSwitchContext; static LONG ctTotalEntries; @@ -58,6 +59,13 @@ OvsInitConntrack(POVS_SWITCH_CONTEXT context) return STATUS_INSUFFICIENT_RESOURCES; } +ovsCtNatLockObj = NdisAllocateRWLock(context->NdisFilterHandle); +if (ovsCtNatLockObj == NULL) { +NdisFreeRWLock(ovsConntrackLockObj); +ovsConntrackLockObj = NULL; +return STATUS_INSUFFICIENT_RESOURCES; +} + /* Init the Hash Buffer */ ovsConntrackTable = OvsAllocateMemoryWithTag(sizeof(LIST_ENTRY) * CT_HASH_TABLE_SIZE, @@ -65,6 +73,8 @@ OvsInitConntrack(POVS_SWITCH_CONTEXT context) if (ovsConntrackTable == NULL) { NdisFreeRWLock(ovsConntrackLockObj); ovsConntrackLockObj = NULL; +NdisFreeRWLock(ovsCtNatLockObj); +ovsCtNatLockObj = NULL; return STATUS_INSUFFICIENT_RESOURCES; } @@ -82,6 +92,9 @@ OvsInitConntrack(POVS_SWITCH_CONTEXT context) NdisFreeRWLock(ovsConntrackLockObj); ovsConntrackLockObj = NULL; +NdisFreeRWLock(ovsCtNatLockObj); +ovsCtNatLockObj = NULL; + OvsFreeMemoryWithTag(ovsConntrackTable, OVS_CT_POOL_TAG); ovsConntrackTable = NULL; @@ -111,7 +124,7 @@ OvsInitConntrack(POVS_SWITCH_CONTEXT context) VOID OvsCleanupConntrack(VOID) { -LOCK_STATE_EX lockState; +LOCK_STATE_EX lockState, lockStateNat; NdisAcquireRWLockWrite(ovsConntrackLockObj, , 0); ctThreadCtx.exit = 1; KeSetEvent(, 0, FALSE); @@ -131,7 +144,11 @@ OvsCleanupConntrack(VOID) NdisFreeRWLock(ovsConntrackLockObj); ovsConntrackLockObj = NULL; +NdisAcquireRWLockWrite(ovsCtNatLockObj, , 0); OvsNatCleanup(); +NdisReleaseRWLock(ovsCtNatLockObj, ); +NdisFreeRWLock(ovsCtNatLockObj); +ovsCtNatLockObj = NULL; } static __inline VOID @@ -197,15 +214,19 @@ OvsCtAddEntry(POVS_CT_ENTRY entry, OvsConntrackKeyLookupCtx *ctx, if (natInfo == NULL) { entry->natInfo.natAction = NAT_ACTION_NONE; } else { +LOCK_STATE_EX lockStateNat; +NdisAcquireRWLockWrite(ovsCtNatLockObj, , 0); if (OvsIsForwardNat(natInfo->natAction)) { entry->natInfo = *natInfo; if (!OvsNatTranslateCtEntry(entry)) { +NdisReleaseRWLock(ovsCtNatLockObj, ); return FALSE; } ctx->hash = OvsHashCtKey(>key); } else { entry->natInfo.natAction = natInfo->natAction; } +NdisReleaseRWLock(ovsCtNatLockObj, ); } entry->timestampStart = now; @@ -358,7 +379,10 @@ OvsCtEntryDelete(POVS_CT_ENTRY entry, BOOLEAN forceDelete) } if (forceDelete || OvsCtEntryExpired(entry)) { if (entry->natInfo.natAction) { +LOCK_STATE_EX lockStateNat; +NdisAcquireRWLockWrite(ovsCtNatLockObj, , 0); OvsNatDeleteKey(>key); +NdisReleaseRWLock(ovsCtNatLockObj, ); } OvsPostCtEventEntry(entry, OVS_EVENT_CT_DELETE); RemoveEntryList(>link); @@ -560,7 +584,10 @@ OvsCtSetupLookupCtx(OvsFlowKey *flowKey, return NDIS_STATUS_INVALID_PACKET; } +LOCK_STATE_EX lockStateNat; +NdisAcquireRWLockRead(ovsCtNatLockObj, , 0); natEntry = OvsNatLookup(>key, TRUE); +NdisReleaseRWLock(ovsCtNatLockObj, ); if (natEntry) { /* Translate address first for reverse NAT */ ctx->key = natEntry->ctEntry->key; @@ -813,8 +840,11 @@ OvsCtExecute_(OvsForwardingContext *fwdCtx, */ if (natInfo->natAction != NAT_ACTION_NONE) { +LOCK_STATE_EX lockStateNat; +NdisAcquireRWLockWrite(ovsCtNatLockObj, , 0); OvsNatPacket(fwdCtx, entry, entry->natInfo.natAction, key, ctx.reply); +NdisReleaseRWLock(ovsCtNatLockObj, ); } OvsCtSetMarkLabel(key, entry, mark, labels, ); @@ -1052,7 +1082,7 @@ OvsCtFlush(UINT16 zone, struct ovs_key_ct_tuple_ipv4 *tuple) PLIST_ENTRY link, next; POVS_CT_ENTRY entry; -LOCK_STATE_EX lockState; +LOCK_STATE_EX lockState,
[ovs-dev] [PATCH v3 2/3] datapath-windows: Add a global level RW lock for NAT
Currently NAT module relies on the existing conntrack lock. This patch provides a basic lock implementation for NAT module in conntrack. Signed-off-by: Anand Kumar <kumaran...@vmware.com> Acked-by: Alin Gabriel Serdean <aserd...@ovn.org> --- datapath-windows/ovsext/Conntrack.c | 36 ++-- 1 file changed, 34 insertions(+), 2 deletions(-) diff --git a/datapath-windows/ovsext/Conntrack.c b/datapath-windows/ovsext/Conntrack.c index 3cde836..7d56a50 100644 --- a/datapath-windows/ovsext/Conntrack.c +++ b/datapath-windows/ovsext/Conntrack.c @@ -32,6 +32,7 @@ KSTART_ROUTINE OvsConntrackEntryCleaner; static PLIST_ENTRY ovsConntrackTable; static OVS_CT_THREAD_CTX ctThreadCtx; static PNDIS_RW_LOCK_EX ovsConntrackLockObj; +static PNDIS_RW_LOCK_EX ovsCtNatLockObj; extern POVS_SWITCH_CONTEXT gOvsSwitchContext; static LONG ctTotalEntries; @@ -58,6 +59,13 @@ OvsInitConntrack(POVS_SWITCH_CONTEXT context) return STATUS_INSUFFICIENT_RESOURCES; } +ovsCtNatLockObj = NdisAllocateRWLock(context->NdisFilterHandle); +if (ovsCtNatLockObj == NULL) { +NdisFreeRWLock(ovsConntrackLockObj); +ovsConntrackLockObj = NULL; +return STATUS_INSUFFICIENT_RESOURCES; +} + /* Init the Hash Buffer */ ovsConntrackTable = OvsAllocateMemoryWithTag(sizeof(LIST_ENTRY) * CT_HASH_TABLE_SIZE, @@ -65,6 +73,8 @@ OvsInitConntrack(POVS_SWITCH_CONTEXT context) if (ovsConntrackTable == NULL) { NdisFreeRWLock(ovsConntrackLockObj); ovsConntrackLockObj = NULL; +NdisFreeRWLock(ovsCtNatLockObj); +ovsCtNatLockObj = NULL; return STATUS_INSUFFICIENT_RESOURCES; } @@ -82,6 +92,9 @@ OvsInitConntrack(POVS_SWITCH_CONTEXT context) NdisFreeRWLock(ovsConntrackLockObj); ovsConntrackLockObj = NULL; +NdisFreeRWLock(ovsCtNatLockObj); +ovsCtNatLockObj = NULL; + OvsFreeMemoryWithTag(ovsConntrackTable, OVS_CT_POOL_TAG); ovsConntrackTable = NULL; @@ -111,7 +124,7 @@ OvsInitConntrack(POVS_SWITCH_CONTEXT context) VOID OvsCleanupConntrack(VOID) { -LOCK_STATE_EX lockState; +LOCK_STATE_EX lockState, lockStateNat; NdisAcquireRWLockWrite(ovsConntrackLockObj, , 0); ctThreadCtx.exit = 1; KeSetEvent(, 0, FALSE); @@ -131,7 +144,11 @@ OvsCleanupConntrack(VOID) NdisFreeRWLock(ovsConntrackLockObj); ovsConntrackLockObj = NULL; +NdisAcquireRWLockWrite(ovsCtNatLockObj, , 0); OvsNatCleanup(); +NdisReleaseRWLock(ovsCtNatLockObj, ); +NdisFreeRWLock(ovsCtNatLockObj); +ovsCtNatLockObj = NULL; } static __inline VOID @@ -197,15 +214,19 @@ OvsCtAddEntry(POVS_CT_ENTRY entry, OvsConntrackKeyLookupCtx *ctx, if (natInfo == NULL) { entry->natInfo.natAction = NAT_ACTION_NONE; } else { +LOCK_STATE_EX lockStateNat; +NdisAcquireRWLockWrite(ovsCtNatLockObj, , 0); if (OvsIsForwardNat(natInfo->natAction)) { entry->natInfo = *natInfo; if (!OvsNatTranslateCtEntry(entry)) { +NdisReleaseRWLock(ovsCtNatLockObj, ); return FALSE; } ctx->hash = OvsHashCtKey(>key); } else { entry->natInfo.natAction = natInfo->natAction; } +NdisReleaseRWLock(ovsCtNatLockObj, ); } entry->timestampStart = now; @@ -358,7 +379,10 @@ OvsCtEntryDelete(POVS_CT_ENTRY entry, BOOLEAN forceDelete) } if (forceDelete || OvsCtEntryExpired(entry)) { if (entry->natInfo.natAction) { +LOCK_STATE_EX lockStateNat; +NdisAcquireRWLockWrite(ovsCtNatLockObj, , 0); OvsNatDeleteKey(>key); +NdisReleaseRWLock(ovsCtNatLockObj, ); } OvsPostCtEventEntry(entry, OVS_EVENT_CT_DELETE); RemoveEntryList(>link); @@ -560,7 +584,10 @@ OvsCtSetupLookupCtx(OvsFlowKey *flowKey, return NDIS_STATUS_INVALID_PACKET; } +LOCK_STATE_EX lockStateNat; +NdisAcquireRWLockRead(ovsCtNatLockObj, , 0); natEntry = OvsNatLookup(>key, TRUE); +NdisReleaseRWLock(ovsCtNatLockObj, ); if (natEntry) { /* Translate address first for reverse NAT */ ctx->key = natEntry->ctEntry->key; @@ -813,8 +840,11 @@ OvsCtExecute_(OvsForwardingContext *fwdCtx, */ if (natInfo->natAction != NAT_ACTION_NONE) { +LOCK_STATE_EX lockStateNat; +NdisAcquireRWLockWrite(ovsCtNatLockObj, , 0); OvsNatPacket(fwdCtx, entry, entry->natInfo.natAction, key, ctx.reply); +NdisReleaseRWLock(ovsCtNatLockObj, ); } OvsCtSetMarkLabel(key, entry, mark, labels, ); @@ -1052,7 +1082,7 @@ OvsCtFlush(UINT16 zone, struct ovs_key_ct_tuple_ipv4 *tuple) PLIST_ENTRY link, next; POVS_CT_ENTRY entry; -LOCK_STATE_EX lockState; +LOCK_STATE_EX lockState,
[ovs-dev] [PATCH v3 3/3] datapath-windows: Optimize conntrack lock implementation.
Currently, there is one global lock for conntrack module, which protects conntrack entries and conntrack table. All the NAT operations are performed holding this lock. This becomes inefficient, as the number of conntrack entries grow. With new implementation, we will have two PNDIS_RW_LOCK_EX locks in conntrack. 1. ovsCtBucketLock - one rw lock per bucket of the conntrack table, which is shared by all the ct entries that belong to the same bucket. 2. lock - a rw lock in OVS_CT_ENTRY structure that protects the members of conntrack entry. Also, OVS_CT_ENTRY structure will have a lock reference(bucketLockRef) to the corresponding OvsCtBucketLock of conntrack table. We need this reference to retrieve ovsCtBucketLock from ct entry for delete operation. Signed-off-by: Anand Kumar <kumaran...@vmware.com> --- v1->v2: Address potential memory leak in conntrack initialization. v2->v3: Fix invalid memory access after deleting ct entry. --- datapath-windows/ovsext/Conntrack-nat.c | 6 + datapath-windows/ovsext/Conntrack.c | 233 datapath-windows/ovsext/Conntrack.h | 3 + 3 files changed, 157 insertions(+), 85 deletions(-) diff --git a/datapath-windows/ovsext/Conntrack-nat.c b/datapath-windows/ovsext/Conntrack-nat.c index 7975770..316c946 100644 --- a/datapath-windows/ovsext/Conntrack-nat.c +++ b/datapath-windows/ovsext/Conntrack-nat.c @@ -167,12 +167,16 @@ OvsNatPacket(OvsForwardingContext *ovsFwdCtx, { UINT32 natFlag; const struct ct_endpoint* endpoint; +LOCK_STATE_EX lockState; +/* XXX: Move conntrack locks out of NAT after implementing lock in NAT. */ +NdisAcquireRWLockRead(entry->lock, , 0); /* When it is NAT, only entry->rev_key contains NATTED address; When it is unNAT, only entry->key contains the UNNATTED address;*/ const OVS_CT_KEY *ctKey = reverse ? >key : >rev_key; BOOLEAN isSrcNat; if (!(natAction & (NAT_ACTION_SRC | NAT_ACTION_DST))) { +NdisReleaseRWLock(entry->lock, ); return; } isSrcNat = (((natAction & NAT_ACTION_SRC) && !reverse) || @@ -202,6 +206,7 @@ OvsNatPacket(OvsForwardingContext *ovsFwdCtx, } } else if (ctKey->dl_type == htons(ETH_TYPE_IPV6)){ // XXX: IPv6 packet not supported yet. +NdisReleaseRWLock(entry->lock, ); return; } if (natAction & (NAT_ACTION_SRC_PORT | NAT_ACTION_DST_PORT)) { @@ -215,6 +220,7 @@ OvsNatPacket(OvsForwardingContext *ovsFwdCtx, } } } +NdisReleaseRWLock(entry->lock, ); } diff --git a/datapath-windows/ovsext/Conntrack.c b/datapath-windows/ovsext/Conntrack.c index 7d56a50..7f75eb2 100644 --- a/datapath-windows/ovsext/Conntrack.c +++ b/datapath-windows/ovsext/Conntrack.c @@ -31,7 +31,7 @@ KSTART_ROUTINE OvsConntrackEntryCleaner; static PLIST_ENTRY ovsConntrackTable; static OVS_CT_THREAD_CTX ctThreadCtx; -static PNDIS_RW_LOCK_EX ovsConntrackLockObj; +static PNDIS_RW_LOCK_EX *ovsCtBucketLock; static PNDIS_RW_LOCK_EX ovsCtNatLockObj; extern POVS_SWITCH_CONTEXT gOvsSwitchContext; static LONG ctTotalEntries; @@ -49,20 +49,14 @@ MapNlToCtTuple(POVS_MESSAGE msgIn, PNL_ATTR attr, NTSTATUS OvsInitConntrack(POVS_SWITCH_CONTEXT context) { -NTSTATUS status; +NTSTATUS status = STATUS_SUCCESS; HANDLE threadHandle = NULL; ctTotalEntries = 0; +UINT32 numBucketLocks = CT_HASH_TABLE_SIZE; /* Init the sync-lock */ -ovsConntrackLockObj = NdisAllocateRWLock(context->NdisFilterHandle); -if (ovsConntrackLockObj == NULL) { -return STATUS_INSUFFICIENT_RESOURCES; -} - ovsCtNatLockObj = NdisAllocateRWLock(context->NdisFilterHandle); if (ovsCtNatLockObj == NULL) { -NdisFreeRWLock(ovsConntrackLockObj); -ovsConntrackLockObj = NULL; return STATUS_INSUFFICIENT_RESOURCES; } @@ -71,15 +65,27 @@ OvsInitConntrack(POVS_SWITCH_CONTEXT context) * CT_HASH_TABLE_SIZE, OVS_CT_POOL_TAG); if (ovsConntrackTable == NULL) { -NdisFreeRWLock(ovsConntrackLockObj); -ovsConntrackLockObj = NULL; NdisFreeRWLock(ovsCtNatLockObj); ovsCtNatLockObj = NULL; return STATUS_INSUFFICIENT_RESOURCES; } -for (int i = 0; i < CT_HASH_TABLE_SIZE; i++) { +ovsCtBucketLock = OvsAllocateMemoryWithTag(sizeof(PNDIS_RW_LOCK_EX) + * CT_HASH_TABLE_SIZE, + OVS_CT_POOL_TAG); +if (ovsCtBucketLock == NULL) { +status = STATUS_INSUFFICIENT_RESOURCES; +goto freeTable; +} + +for (UINT32 i = 0; i < CT_HASH_TABLE_SIZE; i++) { InitializeListHead([i]); +ovsCtBucketLock[i] = NdisAllocateRWLock(context->NdisFilterHandle); +if (ovsCtBucketLock[i] == NULL) { +
[ovs-dev] [PATCH v3 1/3] datapath-windows: Refactor conntrack code.
Some of the functions and code are refactored so that new conntrack lock can be implemented Signed-off-by: Anand Kumar <kumaran...@vmware.com> Acked-by: Alin Gabriel Serdean <aserd...@ovn.org> --- datapath-windows/ovsext/Conntrack-nat.c | 11 +- datapath-windows/ovsext/Conntrack.c | 174 ++-- datapath-windows/ovsext/Conntrack.h | 4 - 3 files changed, 103 insertions(+), 86 deletions(-) diff --git a/datapath-windows/ovsext/Conntrack-nat.c b/datapath-windows/ovsext/Conntrack-nat.c index c778f12..7975770 100644 --- a/datapath-windows/ovsext/Conntrack-nat.c +++ b/datapath-windows/ovsext/Conntrack-nat.c @@ -93,26 +93,23 @@ NTSTATUS OvsNatInit() sizeof(LIST_ENTRY) * NAT_HASH_TABLE_SIZE, OVS_CT_POOL_TAG); if (ovsNatTable == NULL) { -goto failNoMem; +return STATUS_INSUFFICIENT_RESOURCES; } ovsUnNatTable = OvsAllocateMemoryWithTag( sizeof(LIST_ENTRY) * NAT_HASH_TABLE_SIZE, OVS_CT_POOL_TAG); if (ovsUnNatTable == NULL) { -goto freeNatTable; +OvsFreeMemoryWithTag(ovsNatTable, OVS_CT_POOL_TAG); +return STATUS_INSUFFICIENT_RESOURCES; } for (int i = 0; i < NAT_HASH_TABLE_SIZE; i++) { InitializeListHead([i]); InitializeListHead([i]); } -return STATUS_SUCCESS; -freeNatTable: -OvsFreeMemoryWithTag(ovsNatTable, OVS_CT_POOL_TAG); -failNoMem: -return STATUS_INSUFFICIENT_RESOURCES; +return STATUS_SUCCESS; } /* diff --git a/datapath-windows/ovsext/Conntrack.c b/datapath-windows/ovsext/Conntrack.c index 169ec4f..3cde836 100644 --- a/datapath-windows/ovsext/Conntrack.c +++ b/datapath-windows/ovsext/Conntrack.c @@ -33,7 +33,7 @@ static PLIST_ENTRY ovsConntrackTable; static OVS_CT_THREAD_CTX ctThreadCtx; static PNDIS_RW_LOCK_EX ovsConntrackLockObj; extern POVS_SWITCH_CONTEXT gOvsSwitchContext; -static UINT64 ctTotalEntries; +static LONG ctTotalEntries; static __inline OvsCtFlush(UINT16 zone, struct ovs_key_ct_tuple_ipv4 *tuple); static __inline NDIS_STATUS @@ -212,7 +212,7 @@ OvsCtAddEntry(POVS_CT_ENTRY entry, OvsConntrackKeyLookupCtx *ctx, InsertHeadList([ctx->hash & CT_HASH_TABLE_MASK], >link); -ctTotalEntries++; +InterlockedIncrement((LONG volatile *)); return TRUE; } @@ -235,11 +235,6 @@ OvsCtEntryCreate(OvsForwardingContext *fwdCtx, *entryCreated = FALSE; state |= OVS_CS_F_NEW; -parentEntry = OvsCtRelatedLookup(ctx->key, currentTime); -if (parentEntry != NULL) { -state |= OVS_CS_F_RELATED; -} - switch (ipProto) { case IPPROTO_TCP: { @@ -283,6 +278,11 @@ OvsCtEntryCreate(OvsForwardingContext *fwdCtx, break; } +parentEntry = OvsCtRelatedLookup(ctx->key, currentTime); +if (parentEntry != NULL && state != OVS_CS_F_INVALID) { +state |= OVS_CS_F_RELATED; +} + if (state != OVS_CS_F_INVALID && commit) { if (entry) { entry->parent = parentEntry; @@ -315,6 +315,7 @@ OvsCtUpdateEntry(OVS_CT_ENTRY* entry, BOOLEAN reply, UINT64 now) { +CT_UPDATE_RES status; switch (ipProto) { case IPPROTO_TCP: { @@ -322,32 +323,23 @@ OvsCtUpdateEntry(OVS_CT_ENTRY* entry, const TCPHdr *tcp; tcp = OvsGetTcp(nbl, l4Offset, ); if (!tcp) { -return CT_UPDATE_INVALID; +status = CT_UPDATE_INVALID; +break; } -return OvsConntrackUpdateTcpEntry(entry, tcp, nbl, reply, now); +status = OvsConntrackUpdateTcpEntry(entry, tcp, nbl, reply, now); +break; } case IPPROTO_ICMP: -return OvsConntrackUpdateIcmpEntry(entry, reply, now); +status = OvsConntrackUpdateIcmpEntry(entry, reply, now); +break; case IPPROTO_UDP: -return OvsConntrackUpdateOtherEntry(entry, reply, now); +status = OvsConntrackUpdateOtherEntry(entry, reply, now); +break; default: -return CT_UPDATE_INVALID; -} -} - -static __inline VOID -OvsCtEntryDelete(POVS_CT_ENTRY entry) -{ -if (entry == NULL) { -return; -} -if (entry->natInfo.natAction) { -OvsNatDeleteKey(>key); +status = CT_UPDATE_INVALID; +break; } -OvsPostCtEventEntry(entry, OVS_EVENT_CT_DELETE); -RemoveEntryList(>link); -OvsFreeMemoryWithTag(entry, OVS_CT_POOL_TAG); -ctTotalEntries--; +return status; } static __inline BOOLEAN @@ -358,6 +350,24 @@ OvsCtEntryExpired(POVS_CT_ENTRY entry) return entry->expiration < currentTime; } +static __inline VOID +OvsCtEntryDelete(POVS_CT_ENTRY entry, BOOLEAN forceDelete) +{ +if (entry == NULL) { +return; +} +if (forceDelete || OvsCtEntryExpired(entry)) { +if (entry->natInfo.natAction) { +OvsNatDeleteKey(>key); +} +OvsPostC
[ovs-dev] [PATCH v3 0/3] datapath-windows: New lock implementation in conntrack
This patch series replaces existing one RW lock implemenation in conntrack with two RW locks in conntrack and one RW lock in NAT. --- v1->v2: - Patch 3, address review comments v2->v3: - Patch 3, fix invalid memory access after deleting ct entry --- Anand Kumar (3): datapath-windows: Refactor conntrack code. datapath-windows: Add a global level RW lock for NAT datapath-windows: Optimize conntrack lock implementation. datapath-windows/ovsext/Conntrack-nat.c | 17 +- datapath-windows/ovsext/Conntrack.c | 413 datapath-windows/ovsext/Conntrack.h | 7 +- 3 files changed, 279 insertions(+), 158 deletions(-) -- 2.9.3.windows.1 ___ dev mailing list d...@openvswitch.org https://mail.openvswitch.org/mailman/listinfo/ovs-dev
[ovs-dev] [PATCH v2 3/3] datapath-windows: Optimize conntrack lock implementation.
Currently, there is one global lock for conntrack module, which protects conntrack entries and conntrack table. All the NAT operations are performed holding this lock. This becomes inefficient, as the number of conntrack entries grow. With new implementation, we will have two PNDIS_RW_LOCK_EX locks in conntrack. 1. ovsCtBucketLock - one rw lock per bucket of the conntrack table, which is shared by all the ct entries that belong to the same bucket. 2. lock - a rw lock in OVS_CT_ENTRY structure that protects the members of conntrack entry. Also, OVS_CT_ENTRY structure will have a lock reference(bucketLockRef) to the corresponding OvsCtBucketLock of conntrack table. We need this reference to retrieve ovsCtBucketLock from ct entry for delete operation. Signed-off-by: Anand Kumar <kumaran...@vmware.com> --- v1->v2: Address potential memory leak in conntrack initialization. --- datapath-windows/ovsext/Conntrack-nat.c | 6 + datapath-windows/ovsext/Conntrack.c | 230 datapath-windows/ovsext/Conntrack.h | 3 + 3 files changed, 154 insertions(+), 85 deletions(-) diff --git a/datapath-windows/ovsext/Conntrack-nat.c b/datapath-windows/ovsext/Conntrack-nat.c index 7975770..316c946 100644 --- a/datapath-windows/ovsext/Conntrack-nat.c +++ b/datapath-windows/ovsext/Conntrack-nat.c @@ -167,12 +167,16 @@ OvsNatPacket(OvsForwardingContext *ovsFwdCtx, { UINT32 natFlag; const struct ct_endpoint* endpoint; +LOCK_STATE_EX lockState; +/* XXX: Move conntrack locks out of NAT after implementing lock in NAT. */ +NdisAcquireRWLockRead(entry->lock, , 0); /* When it is NAT, only entry->rev_key contains NATTED address; When it is unNAT, only entry->key contains the UNNATTED address;*/ const OVS_CT_KEY *ctKey = reverse ? >key : >rev_key; BOOLEAN isSrcNat; if (!(natAction & (NAT_ACTION_SRC | NAT_ACTION_DST))) { +NdisReleaseRWLock(entry->lock, ); return; } isSrcNat = (((natAction & NAT_ACTION_SRC) && !reverse) || @@ -202,6 +206,7 @@ OvsNatPacket(OvsForwardingContext *ovsFwdCtx, } } else if (ctKey->dl_type == htons(ETH_TYPE_IPV6)){ // XXX: IPv6 packet not supported yet. +NdisReleaseRWLock(entry->lock, ); return; } if (natAction & (NAT_ACTION_SRC_PORT | NAT_ACTION_DST_PORT)) { @@ -215,6 +220,7 @@ OvsNatPacket(OvsForwardingContext *ovsFwdCtx, } } } +NdisReleaseRWLock(entry->lock, ); } diff --git a/datapath-windows/ovsext/Conntrack.c b/datapath-windows/ovsext/Conntrack.c index 7d56a50..f0ef5c5 100644 --- a/datapath-windows/ovsext/Conntrack.c +++ b/datapath-windows/ovsext/Conntrack.c @@ -31,7 +31,7 @@ KSTART_ROUTINE OvsConntrackEntryCleaner; static PLIST_ENTRY ovsConntrackTable; static OVS_CT_THREAD_CTX ctThreadCtx; -static PNDIS_RW_LOCK_EX ovsConntrackLockObj; +static PNDIS_RW_LOCK_EX *ovsCtBucketLock; static PNDIS_RW_LOCK_EX ovsCtNatLockObj; extern POVS_SWITCH_CONTEXT gOvsSwitchContext; static LONG ctTotalEntries; @@ -49,20 +49,14 @@ MapNlToCtTuple(POVS_MESSAGE msgIn, PNL_ATTR attr, NTSTATUS OvsInitConntrack(POVS_SWITCH_CONTEXT context) { -NTSTATUS status; +NTSTATUS status = STATUS_SUCCESS; HANDLE threadHandle = NULL; ctTotalEntries = 0; +UINT32 numBucketLocks = CT_HASH_TABLE_SIZE; /* Init the sync-lock */ -ovsConntrackLockObj = NdisAllocateRWLock(context->NdisFilterHandle); -if (ovsConntrackLockObj == NULL) { -return STATUS_INSUFFICIENT_RESOURCES; -} - ovsCtNatLockObj = NdisAllocateRWLock(context->NdisFilterHandle); if (ovsCtNatLockObj == NULL) { -NdisFreeRWLock(ovsConntrackLockObj); -ovsConntrackLockObj = NULL; return STATUS_INSUFFICIENT_RESOURCES; } @@ -71,15 +65,27 @@ OvsInitConntrack(POVS_SWITCH_CONTEXT context) * CT_HASH_TABLE_SIZE, OVS_CT_POOL_TAG); if (ovsConntrackTable == NULL) { -NdisFreeRWLock(ovsConntrackLockObj); -ovsConntrackLockObj = NULL; NdisFreeRWLock(ovsCtNatLockObj); ovsCtNatLockObj = NULL; return STATUS_INSUFFICIENT_RESOURCES; } -for (int i = 0; i < CT_HASH_TABLE_SIZE; i++) { +ovsCtBucketLock = OvsAllocateMemoryWithTag(sizeof(PNDIS_RW_LOCK_EX) + * CT_HASH_TABLE_SIZE, + OVS_CT_POOL_TAG); +if (ovsCtBucketLock == NULL) { +status = STATUS_INSUFFICIENT_RESOURCES; +goto freeTable; +} + +for (UINT32 i = 0; i < CT_HASH_TABLE_SIZE; i++) { InitializeListHead([i]); +ovsCtBucketLock[i] = NdisAllocateRWLock(context->NdisFilterHandle); +if (ovsCtBucketLock[i] == NULL) { +status = STATUS_INSUFFICIENT_RESOU
[ovs-dev] [PATCH v2 2/3] datapath-windows: Add a global level RW lock for NAT
Currently NAT module relies on the existing conntrack lock. This patch provides a basic lock implementation for NAT module in conntrack. Signed-off-by: Anand Kumar <kumaran...@vmware.com> Acked-by: Alin Gabriel Serdean <aserd...@ovn.org> --- datapath-windows/ovsext/Conntrack.c | 36 ++-- 1 file changed, 34 insertions(+), 2 deletions(-) diff --git a/datapath-windows/ovsext/Conntrack.c b/datapath-windows/ovsext/Conntrack.c index 3cde836..7d56a50 100644 --- a/datapath-windows/ovsext/Conntrack.c +++ b/datapath-windows/ovsext/Conntrack.c @@ -32,6 +32,7 @@ KSTART_ROUTINE OvsConntrackEntryCleaner; static PLIST_ENTRY ovsConntrackTable; static OVS_CT_THREAD_CTX ctThreadCtx; static PNDIS_RW_LOCK_EX ovsConntrackLockObj; +static PNDIS_RW_LOCK_EX ovsCtNatLockObj; extern POVS_SWITCH_CONTEXT gOvsSwitchContext; static LONG ctTotalEntries; @@ -58,6 +59,13 @@ OvsInitConntrack(POVS_SWITCH_CONTEXT context) return STATUS_INSUFFICIENT_RESOURCES; } +ovsCtNatLockObj = NdisAllocateRWLock(context->NdisFilterHandle); +if (ovsCtNatLockObj == NULL) { +NdisFreeRWLock(ovsConntrackLockObj); +ovsConntrackLockObj = NULL; +return STATUS_INSUFFICIENT_RESOURCES; +} + /* Init the Hash Buffer */ ovsConntrackTable = OvsAllocateMemoryWithTag(sizeof(LIST_ENTRY) * CT_HASH_TABLE_SIZE, @@ -65,6 +73,8 @@ OvsInitConntrack(POVS_SWITCH_CONTEXT context) if (ovsConntrackTable == NULL) { NdisFreeRWLock(ovsConntrackLockObj); ovsConntrackLockObj = NULL; +NdisFreeRWLock(ovsCtNatLockObj); +ovsCtNatLockObj = NULL; return STATUS_INSUFFICIENT_RESOURCES; } @@ -82,6 +92,9 @@ OvsInitConntrack(POVS_SWITCH_CONTEXT context) NdisFreeRWLock(ovsConntrackLockObj); ovsConntrackLockObj = NULL; +NdisFreeRWLock(ovsCtNatLockObj); +ovsCtNatLockObj = NULL; + OvsFreeMemoryWithTag(ovsConntrackTable, OVS_CT_POOL_TAG); ovsConntrackTable = NULL; @@ -111,7 +124,7 @@ OvsInitConntrack(POVS_SWITCH_CONTEXT context) VOID OvsCleanupConntrack(VOID) { -LOCK_STATE_EX lockState; +LOCK_STATE_EX lockState, lockStateNat; NdisAcquireRWLockWrite(ovsConntrackLockObj, , 0); ctThreadCtx.exit = 1; KeSetEvent(, 0, FALSE); @@ -131,7 +144,11 @@ OvsCleanupConntrack(VOID) NdisFreeRWLock(ovsConntrackLockObj); ovsConntrackLockObj = NULL; +NdisAcquireRWLockWrite(ovsCtNatLockObj, , 0); OvsNatCleanup(); +NdisReleaseRWLock(ovsCtNatLockObj, ); +NdisFreeRWLock(ovsCtNatLockObj); +ovsCtNatLockObj = NULL; } static __inline VOID @@ -197,15 +214,19 @@ OvsCtAddEntry(POVS_CT_ENTRY entry, OvsConntrackKeyLookupCtx *ctx, if (natInfo == NULL) { entry->natInfo.natAction = NAT_ACTION_NONE; } else { +LOCK_STATE_EX lockStateNat; +NdisAcquireRWLockWrite(ovsCtNatLockObj, , 0); if (OvsIsForwardNat(natInfo->natAction)) { entry->natInfo = *natInfo; if (!OvsNatTranslateCtEntry(entry)) { +NdisReleaseRWLock(ovsCtNatLockObj, ); return FALSE; } ctx->hash = OvsHashCtKey(>key); } else { entry->natInfo.natAction = natInfo->natAction; } +NdisReleaseRWLock(ovsCtNatLockObj, ); } entry->timestampStart = now; @@ -358,7 +379,10 @@ OvsCtEntryDelete(POVS_CT_ENTRY entry, BOOLEAN forceDelete) } if (forceDelete || OvsCtEntryExpired(entry)) { if (entry->natInfo.natAction) { +LOCK_STATE_EX lockStateNat; +NdisAcquireRWLockWrite(ovsCtNatLockObj, , 0); OvsNatDeleteKey(>key); +NdisReleaseRWLock(ovsCtNatLockObj, ); } OvsPostCtEventEntry(entry, OVS_EVENT_CT_DELETE); RemoveEntryList(>link); @@ -560,7 +584,10 @@ OvsCtSetupLookupCtx(OvsFlowKey *flowKey, return NDIS_STATUS_INVALID_PACKET; } +LOCK_STATE_EX lockStateNat; +NdisAcquireRWLockRead(ovsCtNatLockObj, , 0); natEntry = OvsNatLookup(>key, TRUE); +NdisReleaseRWLock(ovsCtNatLockObj, ); if (natEntry) { /* Translate address first for reverse NAT */ ctx->key = natEntry->ctEntry->key; @@ -813,8 +840,11 @@ OvsCtExecute_(OvsForwardingContext *fwdCtx, */ if (natInfo->natAction != NAT_ACTION_NONE) { +LOCK_STATE_EX lockStateNat; +NdisAcquireRWLockWrite(ovsCtNatLockObj, , 0); OvsNatPacket(fwdCtx, entry, entry->natInfo.natAction, key, ctx.reply); +NdisReleaseRWLock(ovsCtNatLockObj, ); } OvsCtSetMarkLabel(key, entry, mark, labels, ); @@ -1052,7 +1082,7 @@ OvsCtFlush(UINT16 zone, struct ovs_key_ct_tuple_ipv4 *tuple) PLIST_ENTRY link, next; POVS_CT_ENTRY entry; -LOCK_STATE_EX lockState; +LOCK_STATE_EX lockState,
[ovs-dev] [PATCH v2 1/3] datapath-windows: Refactor conntrack code.
Some of the functions and code are refactored so that new conntrack lock can be implemented Signed-off-by: Anand Kumar <kumaran...@vmware.com> Acked-by: Alin Gabriel Serdean <aserd...@ovn.org> --- datapath-windows/ovsext/Conntrack-nat.c | 11 +- datapath-windows/ovsext/Conntrack.c | 174 ++-- datapath-windows/ovsext/Conntrack.h | 4 - 3 files changed, 103 insertions(+), 86 deletions(-) diff --git a/datapath-windows/ovsext/Conntrack-nat.c b/datapath-windows/ovsext/Conntrack-nat.c index c778f12..7975770 100644 --- a/datapath-windows/ovsext/Conntrack-nat.c +++ b/datapath-windows/ovsext/Conntrack-nat.c @@ -93,26 +93,23 @@ NTSTATUS OvsNatInit() sizeof(LIST_ENTRY) * NAT_HASH_TABLE_SIZE, OVS_CT_POOL_TAG); if (ovsNatTable == NULL) { -goto failNoMem; +return STATUS_INSUFFICIENT_RESOURCES; } ovsUnNatTable = OvsAllocateMemoryWithTag( sizeof(LIST_ENTRY) * NAT_HASH_TABLE_SIZE, OVS_CT_POOL_TAG); if (ovsUnNatTable == NULL) { -goto freeNatTable; +OvsFreeMemoryWithTag(ovsNatTable, OVS_CT_POOL_TAG); +return STATUS_INSUFFICIENT_RESOURCES; } for (int i = 0; i < NAT_HASH_TABLE_SIZE; i++) { InitializeListHead([i]); InitializeListHead([i]); } -return STATUS_SUCCESS; -freeNatTable: -OvsFreeMemoryWithTag(ovsNatTable, OVS_CT_POOL_TAG); -failNoMem: -return STATUS_INSUFFICIENT_RESOURCES; +return STATUS_SUCCESS; } /* diff --git a/datapath-windows/ovsext/Conntrack.c b/datapath-windows/ovsext/Conntrack.c index 169ec4f..3cde836 100644 --- a/datapath-windows/ovsext/Conntrack.c +++ b/datapath-windows/ovsext/Conntrack.c @@ -33,7 +33,7 @@ static PLIST_ENTRY ovsConntrackTable; static OVS_CT_THREAD_CTX ctThreadCtx; static PNDIS_RW_LOCK_EX ovsConntrackLockObj; extern POVS_SWITCH_CONTEXT gOvsSwitchContext; -static UINT64 ctTotalEntries; +static LONG ctTotalEntries; static __inline OvsCtFlush(UINT16 zone, struct ovs_key_ct_tuple_ipv4 *tuple); static __inline NDIS_STATUS @@ -212,7 +212,7 @@ OvsCtAddEntry(POVS_CT_ENTRY entry, OvsConntrackKeyLookupCtx *ctx, InsertHeadList([ctx->hash & CT_HASH_TABLE_MASK], >link); -ctTotalEntries++; +InterlockedIncrement((LONG volatile *)); return TRUE; } @@ -235,11 +235,6 @@ OvsCtEntryCreate(OvsForwardingContext *fwdCtx, *entryCreated = FALSE; state |= OVS_CS_F_NEW; -parentEntry = OvsCtRelatedLookup(ctx->key, currentTime); -if (parentEntry != NULL) { -state |= OVS_CS_F_RELATED; -} - switch (ipProto) { case IPPROTO_TCP: { @@ -283,6 +278,11 @@ OvsCtEntryCreate(OvsForwardingContext *fwdCtx, break; } +parentEntry = OvsCtRelatedLookup(ctx->key, currentTime); +if (parentEntry != NULL && state != OVS_CS_F_INVALID) { +state |= OVS_CS_F_RELATED; +} + if (state != OVS_CS_F_INVALID && commit) { if (entry) { entry->parent = parentEntry; @@ -315,6 +315,7 @@ OvsCtUpdateEntry(OVS_CT_ENTRY* entry, BOOLEAN reply, UINT64 now) { +CT_UPDATE_RES status; switch (ipProto) { case IPPROTO_TCP: { @@ -322,32 +323,23 @@ OvsCtUpdateEntry(OVS_CT_ENTRY* entry, const TCPHdr *tcp; tcp = OvsGetTcp(nbl, l4Offset, ); if (!tcp) { -return CT_UPDATE_INVALID; +status = CT_UPDATE_INVALID; +break; } -return OvsConntrackUpdateTcpEntry(entry, tcp, nbl, reply, now); +status = OvsConntrackUpdateTcpEntry(entry, tcp, nbl, reply, now); +break; } case IPPROTO_ICMP: -return OvsConntrackUpdateIcmpEntry(entry, reply, now); +status = OvsConntrackUpdateIcmpEntry(entry, reply, now); +break; case IPPROTO_UDP: -return OvsConntrackUpdateOtherEntry(entry, reply, now); +status = OvsConntrackUpdateOtherEntry(entry, reply, now); +break; default: -return CT_UPDATE_INVALID; -} -} - -static __inline VOID -OvsCtEntryDelete(POVS_CT_ENTRY entry) -{ -if (entry == NULL) { -return; -} -if (entry->natInfo.natAction) { -OvsNatDeleteKey(>key); +status = CT_UPDATE_INVALID; +break; } -OvsPostCtEventEntry(entry, OVS_EVENT_CT_DELETE); -RemoveEntryList(>link); -OvsFreeMemoryWithTag(entry, OVS_CT_POOL_TAG); -ctTotalEntries--; +return status; } static __inline BOOLEAN @@ -358,6 +350,24 @@ OvsCtEntryExpired(POVS_CT_ENTRY entry) return entry->expiration < currentTime; } +static __inline VOID +OvsCtEntryDelete(POVS_CT_ENTRY entry, BOOLEAN forceDelete) +{ +if (entry == NULL) { +return; +} +if (forceDelete || OvsCtEntryExpired(entry)) { +if (entry->natInfo.natAction) { +OvsNatDeleteKey(>key); +} +OvsPostC
[ovs-dev] [PATCH v2 0/3] datapath-windows: New lock implementation in conntrack
This patch series replaces existing one RW lock implemenation in conntrack with two RW locks in conntrack and one RW lock in NAT. --- v1->v2: - Patch 3, address review comments --- Anand Kumar (3): datapath-windows: Refactor conntrack code. datapath-windows: Add a global level RW lock for NAT datapath-windows: Optimize conntrack lock implementation. datapath-windows/ovsext/Conntrack-nat.c | 17 +- datapath-windows/ovsext/Conntrack.c | 410 datapath-windows/ovsext/Conntrack.h | 7 +- 3 files changed, 276 insertions(+), 158 deletions(-) -- 2.9.3.windows.1 ___ dev mailing list d...@openvswitch.org https://mail.openvswitch.org/mailman/listinfo/ovs-dev
[ovs-dev] [PATCH v1] datapath-windows: Add support for deleting conntrack entry by 5-tuple.
To delete a conntrack entry specified by 5-tuple pass an additional conntrack 5-tuple parameter to flush-conntrack. Signed-off-by: Anand Kumar <kumaran...@vmware.com> --- datapath-windows/ovsext/Conntrack.c | 146 +--- 1 file changed, 134 insertions(+), 12 deletions(-) diff --git a/datapath-windows/ovsext/Conntrack.c b/datapath-windows/ovsext/Conntrack.c index 3203411..dc268b3 100644 --- a/datapath-windows/ovsext/Conntrack.c +++ b/datapath-windows/ovsext/Conntrack.c @@ -35,8 +35,10 @@ static PNDIS_RW_LOCK_EX ovsConntrackLockObj; extern POVS_SWITCH_CONTEXT gOvsSwitchContext; static UINT64 ctTotalEntries; -static __inline NDIS_STATUS OvsCtFlush(UINT16 zone); - +static __inline OvsCtFlush(UINT16 zone, struct ovs_key_ct_tuple_ipv4 *tuple); +static __inline NDIS_STATUS +MapNlToCtTuple(POVS_MESSAGE msgIn, PNL_ATTR attr, + struct ovs_key_ct_tuple_ipv4 *ct_tuple); /* * * OvsInitConntrack @@ -120,7 +122,7 @@ OvsCleanupConntrack(VOID) ObDereferenceObject(ctThreadCtx.threadObject); /* Force flush all entries before removing */ -OvsCtFlush(0); +OvsCtFlush(0, NULL); if (ovsConntrackTable) { OvsFreeMemoryWithTag(ovsConntrackTable, OVS_CT_POOL_TAG); @@ -1018,11 +1020,11 @@ OvsConntrackEntryCleaner(PVOID data) /* * * OvsCtFlush - * Flushes out all Conntrack Entries that match the given zone + * Flushes out all Conntrack Entries that match any of the arguments * */ static __inline NDIS_STATUS -OvsCtFlush(UINT16 zone) +OvsCtFlush(UINT16 zone, struct ovs_key_ct_tuple_ipv4 *tuple) { PLIST_ENTRY link, next; POVS_CT_ENTRY entry; @@ -1034,9 +1036,26 @@ OvsCtFlush(UINT16 zone) for (int i = 0; i < CT_HASH_TABLE_SIZE; i++) { LIST_FORALL_SAFE([i], link, next) { entry = CONTAINING_RECORD(link, OVS_CT_ENTRY, link); -/* zone is a non-zero value */ -if (!zone || zone == entry->key.zone) +if (tuple) { +if (tuple->ipv4_proto != IPPROTO_ICMP && +tuple->ipv4_src == entry->key.src.addr.ipv4_aligned && +tuple->ipv4_dst == entry->key.dst.addr.ipv4_aligned && +tuple->ipv4_proto == entry->key.nw_proto && +tuple->src_port == entry->key.src.port && +tuple->dst_port == entry->key.dst.port && +(zone ? entry->key.zone == zone: TRUE)) { +OvsCtEntryDelete(entry); +} else if (tuple->ipv4_src == entry->key.src.addr.ipv4_aligned && +tuple->ipv4_dst == entry->key.dst.addr.ipv4_aligned && +tuple->ipv4_proto == entry->key.nw_proto && +tuple->src_port == entry->key.src.icmp_type && +tuple->dst_port == entry->key.src.icmp_code && +(zone ? entry->key.zone == zone: TRUE)) { +OvsCtEntryDelete(entry); +} +} else if (!zone || zone == entry->key.zone) { OvsCtEntryDelete(entry); +} } } } @@ -1058,19 +1077,21 @@ OvsCtDeleteCmdHandler(POVS_USER_PARAMS_CONTEXT usrParamsCtx, NL_ERROR nlError = NL_ERROR_SUCCESS; NTSTATUS status; UINT16 zone = 0; +struct ovs_key_ct_tuple_ipv4 *ct_tuple = NULL; NL_BUFFER nlBuf; UINT16 nlmsgType; PNL_MSG_HDR nlMsg; -static const NL_POLICY ctZonePolicy[] = { -[CTA_ZONE] = { .type = NL_A_BE16, .optional = TRUE }, +static const NL_POLICY ctAttrPolicy[] = { +[CTA_TUPLE_ORIG] = {.type = NL_A_NESTED, .optional = TRUE}, +[CTA_ZONE] = {.type = NL_A_BE16, .optional = TRUE }, }; if ((NlAttrParse(nlMsgHdr, attrOffset, NlNfMsgAttrsLen(nlMsgHdr), -ctZonePolicy, ARRAY_SIZE(ctZonePolicy), +ctAttrPolicy, ARRAY_SIZE(ctAttrPolicy), ctAttrs, ARRAY_SIZE(ctAttrs))) != TRUE) { -OVS_LOG_ERROR("Zone attr parsing failed for msg: %p", nlMsgHdr); +OVS_LOG_ERROR("Ct attr parsing failed for msg: %p", nlMsgHdr); status = STATUS_INVALID_PARAMETER; goto done; } @@ -1079,7 +1100,21 @@ OvsCtDeleteCmdHandler(POVS_USER_PARAMS_CONTEXT usrParamsCtx, zone = NlAttrGetU16(ctAttrs[CTA_ZONE]); } -status = OvsCtFlush(zone); +if (ctAttrs[CTA_TUPLE_ORIG]) { +ct_tuple = OvsAllocateMemoryWithTag(sizeof(struct ovs
[ovs-dev] [PATCH v1] Add support to delete a conntrack entry by 5-tuple
This patch adds support for deleting conntrack entry by conntrack 5-tuple in windows kernel. The related userspace changes are currrently being reviewed https://patchwork.ozlabs.org/project/openvswitch/list/?series=13345 The windows kernel changes can go in independent of the userspace changes since conntrack 5-tuple is an optional parameter. Anand Kumar (1): datapath-windows: Add support for deleting conntrack entry by 5 tuple. datapath-windows/ovsext/Conntrack.c | 146 +--- 1 file changed, 134 insertions(+), 12 deletions(-) -- 2.9.3.windows.1 ___ dev mailing list d...@openvswitch.org https://mail.openvswitch.org/mailman/listinfo/ovs-dev
Re: [ovs-dev] [PATCH] datapath-windows: Account for VLAN tag in tunnel Decap
Acked-by: Anand Kumar <kumaran...@vmware.com> Thanks, Anand Kumar On 11/20/17, 3:06 PM, "ovs-dev-boun...@openvswitch.org on behalf of Shashank Ram" <ovs-dev-boun...@openvswitch.org on behalf of r...@vmware.com> wrote: Decap functions for tunneling protocols do not compute the packet header offsets correctly when there is a VLAN tag in the L2 header. This results in incorrect checksum computation causing the packet to be dropped. This patch adds support to account for the VLAN tag in the packet if its present, and makes use of the OvsExtractLayers() function to correctly compute the header offsets for different layers. Testing done: - Tested Geneve, STT, Vxlan and Gre and verified that there are no regressions. - Verified that packets with VLAN tags are correctly handled in the decap code of all tunneling protocols. Previously, this would result in packet drops due to invalid checksums being computed. - Verified that non-VLAN tagged packets are handled correctly. Signed-off-by: Shashank Ram <r...@vmware.com> --- datapath-windows/ovsext/Geneve.c | 14 + datapath-windows/ovsext/Geneve.h | 6 ++ datapath-windows/ovsext/Gre.c | 29 -- datapath-windows/ovsext/Gre.h | 16 ++ datapath-windows/ovsext/Offload.c | 10 + datapath-windows/ovsext/Offload.h | 3 ++- datapath-windows/ovsext/Stt.c | 44 +++ datapath-windows/ovsext/Stt.h | 6 ++ datapath-windows/ovsext/Vxlan.c | 14 + datapath-windows/ovsext/Vxlan.h | 6 ++ 10 files changed, 111 insertions(+), 37 deletions(-) diff --git a/datapath-windows/ovsext/Geneve.c b/datapath-windows/ovsext/Geneve.c index 6dca69b..210716d 100644 --- a/datapath-windows/ovsext/Geneve.c +++ b/datapath-windows/ovsext/Geneve.c @@ -262,10 +262,16 @@ NDIS_STATUS OvsDecapGeneve(POVS_SWITCH_CONTEXT switchContext, PUINT8 bufferStart; PVOID optStart; NDIS_STATUS status; +OVS_PACKET_HDR_INFO layers = { 0 }; + +status = OvsExtractLayers(curNbl, ); +if (status != NDIS_STATUS_SUCCESS) { +return status; +} /* Check the length of the UDP payload */ curNb = NET_BUFFER_LIST_FIRST_NB(curNbl); -tunnelSize = OvsGetGeneveTunHdrMinSize(); +tunnelSize = OvsGetGeneveTunHdrSizeFromLayers(); packetLength = NET_BUFFER_DATA_LENGTH(curNb); if (packetLength <= tunnelSize) { return NDIS_STATUS_INVALID_LENGTH; @@ -295,13 +301,13 @@ NDIS_STATUS OvsDecapGeneve(POVS_SWITCH_CONTEXT switchContext, ethHdr = (EthHdr *)bufferStart; /* XXX: Handle IP options. */ -ipHdr = (IPHdr *)((PCHAR)ethHdr + sizeof *ethHdr); +ipHdr = (IPHdr *)(bufferStart + layers.l3Offset); tunKey->src = ipHdr->saddr; tunKey->dst = ipHdr->daddr; tunKey->tos = ipHdr->tos; tunKey->ttl = ipHdr->ttl; tunKey->pad = 0; -udpHdr = (UDPHdr *)((PCHAR)ipHdr + sizeof *ipHdr); +udpHdr = (UDPHdr *)(bufferStart + layers.l4Offset); /* Validate if NIC has indicated checksum failure. */ status = OvsValidateUDPChecksum(curNbl, udpHdr->check == 0); @@ -312,7 +318,7 @@ NDIS_STATUS OvsDecapGeneve(POVS_SWITCH_CONTEXT switchContext, /* Calculate and verify UDP checksum if NIC didn't do it. */ if (udpHdr->check != 0) { status = OvsCalculateUDPChecksum(curNbl, curNb, ipHdr, udpHdr, - packetLength); + packetLength, ); tunKey->flags |= OVS_TNL_F_CSUM; if (status != NDIS_STATUS_SUCCESS) { goto dropNbl; diff --git a/datapath-windows/ovsext/Geneve.h b/datapath-windows/ovsext/Geneve.h index 019c0dd..db758dd 100644 --- a/datapath-windows/ovsext/Geneve.h +++ b/datapath-windows/ovsext/Geneve.h @@ -113,6 +113,12 @@ OvsGetGeneveTunHdrMaxSize(VOID) return OvsGetGeneveTunHdrMinSize() + TUN_OPT_MAX_LEN; } +static __inline UINT32 +OvsGetGeneveTunHdrSizeFromLayers(POVS_PACKET_HDR_INFO layers) +{ +return layers->l7Offset + sizeof(GeneveHdr); +} + #define GENEVE_UDP_PORT 6081 #define GENEVE_UDP_PORT_NBO 0xC117 #define GENEVE_VER 0 diff --git a/datapath-windows/ovsext/Gre.c b/datapath-windows/ovsext/Gre.c index f095742..1f38ee7 100644 --- a/datapath-windows/ovsext/Gre.c +++ b/datapath-windows/ovsext/Gre.c @@ -317,35 +317,42 @@ OvsDecapGre(POVS_SWITCH_CONTEXT switchContext, GREHdr *greHdr; UINT32 tunnelSize, packetLength; UINT32 headRoom = 0; +UINT
[ovs-dev] [PATCH v1 3/3] datapath-windows: Optimize conntrack lock implementation.
Currently, there is one global lock for conntrack module, which protects conntrack entries and conntrack table. All the NAT operations are performed holding this lock. This becomes inefficient, as the number of conntrack entries grow. With new implementation, we will have two PNDIS_RW_LOCK_EX locks in conntrack. 1. ovsCtBucketLock - one rw lock per bucket of the conntrack table, which is shared by all the ct entries that belong to the same bucket. 2. lock - a rw lock in OVS_CT_ENTRY structure that protects the members of conntrack entry. Also, OVS_CT_ENTRY structure will have a lock reference(bucketLockRef) to the corresponding OvsCtBucketLock of conntrack table. We need this reference to retrieve ovsCtBucketLock from ct entry for delete operation. Signed-off-by: Anand Kumar <kumaran...@vmware.com> --- datapath-windows/ovsext/Conntrack-nat.c | 6 + datapath-windows/ovsext/Conntrack.c | 231 datapath-windows/ovsext/Conntrack.h | 3 + 3 files changed, 154 insertions(+), 86 deletions(-) diff --git a/datapath-windows/ovsext/Conntrack-nat.c b/datapath-windows/ovsext/Conntrack-nat.c index 7975770..33a86cf 100644 --- a/datapath-windows/ovsext/Conntrack-nat.c +++ b/datapath-windows/ovsext/Conntrack-nat.c @@ -167,12 +167,16 @@ OvsNatPacket(OvsForwardingContext *ovsFwdCtx, { UINT32 natFlag; const struct ct_endpoint* endpoint; +LOCK_STATE_EX lockState; +/* XXX Move conntrack locks out of NAT after implementing lock in NAT. */ +NdisAcquireRWLockRead(entry->lock, , 0); /* When it is NAT, only entry->rev_key contains NATTED address; When it is unNAT, only entry->key contains the UNNATTED address;*/ const OVS_CT_KEY *ctKey = reverse ? >key : >rev_key; BOOLEAN isSrcNat; if (!(natAction & (NAT_ACTION_SRC | NAT_ACTION_DST))) { +NdisReleaseRWLock(entry->lock, ); return; } isSrcNat = (((natAction & NAT_ACTION_SRC) && !reverse) || @@ -202,6 +206,7 @@ OvsNatPacket(OvsForwardingContext *ovsFwdCtx, } } else if (ctKey->dl_type == htons(ETH_TYPE_IPV6)){ // XXX: IPv6 packet not supported yet. +NdisReleaseRWLock(entry->lock, ); return; } if (natAction & (NAT_ACTION_SRC_PORT | NAT_ACTION_DST_PORT)) { @@ -215,6 +220,7 @@ OvsNatPacket(OvsForwardingContext *ovsFwdCtx, } } } +NdisReleaseRWLock(entry->lock, ); } diff --git a/datapath-windows/ovsext/Conntrack.c b/datapath-windows/ovsext/Conntrack.c index ba0dc88..f5e4996 100644 --- a/datapath-windows/ovsext/Conntrack.c +++ b/datapath-windows/ovsext/Conntrack.c @@ -31,7 +31,7 @@ KSTART_ROUTINE OvsConntrackEntryCleaner; static PLIST_ENTRY ovsConntrackTable; static OVS_CT_THREAD_CTX ctThreadCtx; -static PNDIS_RW_LOCK_EX ovsConntrackLockObj; +static PNDIS_RW_LOCK_EX *ovsCtBucketLock; static PNDIS_RW_LOCK_EX ovsCtNatLockObj; extern POVS_SWITCH_CONTEXT gOvsSwitchContext; static LONG ctTotalEntries; @@ -47,20 +47,13 @@ static __inline NDIS_STATUS OvsCtFlush(UINT16 zone); NTSTATUS OvsInitConntrack(POVS_SWITCH_CONTEXT context) { -NTSTATUS status; +NTSTATUS status = STATUS_SUCCESS; HANDLE threadHandle = NULL; ctTotalEntries = 0; /* Init the sync-lock */ -ovsConntrackLockObj = NdisAllocateRWLock(context->NdisFilterHandle); -if (ovsConntrackLockObj == NULL) { -return STATUS_INSUFFICIENT_RESOURCES; -} - ovsCtNatLockObj = NdisAllocateRWLock(context->NdisFilterHandle); if (ovsCtNatLockObj == NULL) { -NdisFreeRWLock(ovsConntrackLockObj); -ovsConntrackLockObj = NULL; return STATUS_INSUFFICIENT_RESOURCES; } @@ -69,15 +62,26 @@ OvsInitConntrack(POVS_SWITCH_CONTEXT context) * CT_HASH_TABLE_SIZE, OVS_CT_POOL_TAG); if (ovsConntrackTable == NULL) { -NdisFreeRWLock(ovsConntrackLockObj); -ovsConntrackLockObj = NULL; NdisFreeRWLock(ovsCtNatLockObj); ovsCtNatLockObj = NULL; return STATUS_INSUFFICIENT_RESOURCES; } -for (int i = 0; i < CT_HASH_TABLE_SIZE; i++) { +ovsCtBucketLock = OvsAllocateMemoryWithTag(sizeof(PNDIS_RW_LOCK_EX) + * CT_HASH_TABLE_SIZE, + OVS_CT_POOL_TAG); +if (ovsCtBucketLock == NULL) { +status = STATUS_INSUFFICIENT_RESOURCES; +goto freeTable; +} + +for (UINT32 i = 0; i < CT_HASH_TABLE_SIZE; i++) { InitializeListHead([i]); +ovsCtBucketLock[i] = NdisAllocateRWLock(context->NdisFilterHandle); +if (ovsCtBucketLock[i] == NULL) { +status = STATUS_INSUFFICIENT_RESOURCES; +goto freeBucketLock; +} } /* Init CT Cleaner Thread */ @@ -87,16 +91,7 @@ OvsInitConntrack(POVS_SWITCH_CONT
[ovs-dev] [PATCH v1 2/3] datapath-windows: Add a global level RW lock for NAT
Currently NAT module relies on the existing conntrack lock. This patch provides a basic lock implementation for NAT module in conntrack. Signed-off-by: Anand Kumar <kumaran...@vmware.com> --- datapath-windows/ovsext/Conntrack.c | 36 ++-- 1 file changed, 34 insertions(+), 2 deletions(-) diff --git a/datapath-windows/ovsext/Conntrack.c b/datapath-windows/ovsext/Conntrack.c index 48d4abf..ba0dc88 100644 --- a/datapath-windows/ovsext/Conntrack.c +++ b/datapath-windows/ovsext/Conntrack.c @@ -32,6 +32,7 @@ KSTART_ROUTINE OvsConntrackEntryCleaner; static PLIST_ENTRY ovsConntrackTable; static OVS_CT_THREAD_CTX ctThreadCtx; static PNDIS_RW_LOCK_EX ovsConntrackLockObj; +static PNDIS_RW_LOCK_EX ovsCtNatLockObj; extern POVS_SWITCH_CONTEXT gOvsSwitchContext; static LONG ctTotalEntries; @@ -56,6 +57,13 @@ OvsInitConntrack(POVS_SWITCH_CONTEXT context) return STATUS_INSUFFICIENT_RESOURCES; } +ovsCtNatLockObj = NdisAllocateRWLock(context->NdisFilterHandle); +if (ovsCtNatLockObj == NULL) { +NdisFreeRWLock(ovsConntrackLockObj); +ovsConntrackLockObj = NULL; +return STATUS_INSUFFICIENT_RESOURCES; +} + /* Init the Hash Buffer */ ovsConntrackTable = OvsAllocateMemoryWithTag(sizeof(LIST_ENTRY) * CT_HASH_TABLE_SIZE, @@ -63,6 +71,8 @@ OvsInitConntrack(POVS_SWITCH_CONTEXT context) if (ovsConntrackTable == NULL) { NdisFreeRWLock(ovsConntrackLockObj); ovsConntrackLockObj = NULL; +NdisFreeRWLock(ovsCtNatLockObj); +ovsCtNatLockObj = NULL; return STATUS_INSUFFICIENT_RESOURCES; } @@ -80,6 +90,9 @@ OvsInitConntrack(POVS_SWITCH_CONTEXT context) NdisFreeRWLock(ovsConntrackLockObj); ovsConntrackLockObj = NULL; +NdisFreeRWLock(ovsCtNatLockObj); +ovsCtNatLockObj = NULL; + OvsFreeMemoryWithTag(ovsConntrackTable, OVS_CT_POOL_TAG); ovsConntrackTable = NULL; @@ -109,7 +122,7 @@ OvsInitConntrack(POVS_SWITCH_CONTEXT context) VOID OvsCleanupConntrack(VOID) { -LOCK_STATE_EX lockState; +LOCK_STATE_EX lockState, lockStateNat; NdisAcquireRWLockWrite(ovsConntrackLockObj, , 0); ctThreadCtx.exit = 1; KeSetEvent(, 0, FALSE); @@ -129,7 +142,11 @@ OvsCleanupConntrack(VOID) NdisFreeRWLock(ovsConntrackLockObj); ovsConntrackLockObj = NULL; +NdisAcquireRWLockWrite(ovsCtNatLockObj, , 0); OvsNatCleanup(); +NdisReleaseRWLock(ovsCtNatLockObj, ); +NdisFreeRWLock(ovsCtNatLockObj); +ovsCtNatLockObj = NULL; } static __inline VOID @@ -195,15 +212,19 @@ OvsCtAddEntry(POVS_CT_ENTRY entry, OvsConntrackKeyLookupCtx *ctx, if (natInfo == NULL) { entry->natInfo.natAction = NAT_ACTION_NONE; } else { +LOCK_STATE_EX lockStateNat; +NdisAcquireRWLockWrite(ovsCtNatLockObj, , 0); if (OvsIsForwardNat(natInfo->natAction)) { entry->natInfo = *natInfo; if (!OvsNatTranslateCtEntry(entry)) { +NdisReleaseRWLock(ovsCtNatLockObj, ); return FALSE; } ctx->hash = OvsHashCtKey(>key); } else { entry->natInfo.natAction = natInfo->natAction; } +NdisReleaseRWLock(ovsCtNatLockObj, ); } entry->timestampStart = now; @@ -356,7 +377,10 @@ OvsCtEntryDelete(POVS_CT_ENTRY entry, BOOLEAN forceDelete) } if (forceDelete || OvsCtEntryExpired(entry)) { if (entry->natInfo.natAction) { +LOCK_STATE_EX lockStateNat; +NdisAcquireRWLockWrite(ovsCtNatLockObj, , 0); OvsNatDeleteKey(>key); +NdisReleaseRWLock(ovsCtNatLockObj, ); } OvsPostCtEventEntry(entry, OVS_EVENT_CT_DELETE); RemoveEntryList(>link); @@ -558,7 +582,10 @@ OvsCtSetupLookupCtx(OvsFlowKey *flowKey, return NDIS_STATUS_INVALID_PACKET; } +LOCK_STATE_EX lockStateNat; +NdisAcquireRWLockRead(ovsCtNatLockObj, , 0); natEntry = OvsNatLookup(>key, TRUE); +NdisReleaseRWLock(ovsCtNatLockObj, ); if (natEntry) { /* Translate address first for reverse NAT */ ctx->key = natEntry->ctEntry->key; @@ -811,8 +838,11 @@ OvsCtExecute_(OvsForwardingContext *fwdCtx, */ if (natInfo->natAction != NAT_ACTION_NONE) { +LOCK_STATE_EX lockStateNat; +NdisAcquireRWLockWrite(ovsCtNatLockObj, , 0); OvsNatPacket(fwdCtx, entry, entry->natInfo.natAction, key, ctx.reply); +NdisReleaseRWLock(ovsCtNatLockObj, ); } OvsCtSetMarkLabel(key, entry, mark, labels, ); @@ -1050,7 +1080,7 @@ OvsCtFlush(UINT16 zone) PLIST_ENTRY link, next; POVS_CT_ENTRY entry; -LOCK_STATE_EX lockState; +LOCK_STATE_EX lockState, lockStateNat; NdisAcquireRWLockWrite(ovsConntrackLockObj, , 0); if (ct
[ovs-dev] [PATCH v1 1/3] datapath-windows: Refactor conntrack code.
Some of the functions and code are refactored so that new conntrack lock can be implemented Signed-off-by: Anand Kumar <kumaran...@vmware.com> --- datapath-windows/ovsext/Conntrack-nat.c | 11 +-- datapath-windows/ovsext/Conntrack.c | 170 ++-- datapath-windows/ovsext/Conntrack.h | 4 - 3 files changed, 101 insertions(+), 84 deletions(-) diff --git a/datapath-windows/ovsext/Conntrack-nat.c b/datapath-windows/ovsext/Conntrack-nat.c index c778f12..7975770 100644 --- a/datapath-windows/ovsext/Conntrack-nat.c +++ b/datapath-windows/ovsext/Conntrack-nat.c @@ -93,26 +93,23 @@ NTSTATUS OvsNatInit() sizeof(LIST_ENTRY) * NAT_HASH_TABLE_SIZE, OVS_CT_POOL_TAG); if (ovsNatTable == NULL) { -goto failNoMem; +return STATUS_INSUFFICIENT_RESOURCES; } ovsUnNatTable = OvsAllocateMemoryWithTag( sizeof(LIST_ENTRY) * NAT_HASH_TABLE_SIZE, OVS_CT_POOL_TAG); if (ovsUnNatTable == NULL) { -goto freeNatTable; +OvsFreeMemoryWithTag(ovsNatTable, OVS_CT_POOL_TAG); +return STATUS_INSUFFICIENT_RESOURCES; } for (int i = 0; i < NAT_HASH_TABLE_SIZE; i++) { InitializeListHead([i]); InitializeListHead([i]); } -return STATUS_SUCCESS; -freeNatTable: -OvsFreeMemoryWithTag(ovsNatTable, OVS_CT_POOL_TAG); -failNoMem: -return STATUS_INSUFFICIENT_RESOURCES; +return STATUS_SUCCESS; } /* diff --git a/datapath-windows/ovsext/Conntrack.c b/datapath-windows/ovsext/Conntrack.c index 3203411..48d4abf 100644 --- a/datapath-windows/ovsext/Conntrack.c +++ b/datapath-windows/ovsext/Conntrack.c @@ -33,7 +33,7 @@ static PLIST_ENTRY ovsConntrackTable; static OVS_CT_THREAD_CTX ctThreadCtx; static PNDIS_RW_LOCK_EX ovsConntrackLockObj; extern POVS_SWITCH_CONTEXT gOvsSwitchContext; -static UINT64 ctTotalEntries; +static LONG ctTotalEntries; static __inline NDIS_STATUS OvsCtFlush(UINT16 zone); @@ -210,7 +210,7 @@ OvsCtAddEntry(POVS_CT_ENTRY entry, OvsConntrackKeyLookupCtx *ctx, InsertHeadList([ctx->hash & CT_HASH_TABLE_MASK], >link); -ctTotalEntries++; +InterlockedIncrement((LONG volatile *)); return TRUE; } @@ -233,11 +233,6 @@ OvsCtEntryCreate(OvsForwardingContext *fwdCtx, *entryCreated = FALSE; state |= OVS_CS_F_NEW; -parentEntry = OvsCtRelatedLookup(ctx->key, currentTime); -if (parentEntry != NULL) { -state |= OVS_CS_F_RELATED; -} - switch (ipProto) { case IPPROTO_TCP: { @@ -281,6 +276,11 @@ OvsCtEntryCreate(OvsForwardingContext *fwdCtx, break; } +parentEntry = OvsCtRelatedLookup(ctx->key, currentTime); +if (parentEntry != NULL && state != OVS_CS_F_INVALID) { +state |= OVS_CS_F_RELATED; +} + if (state != OVS_CS_F_INVALID && commit) { if (entry) { entry->parent = parentEntry; @@ -313,6 +313,7 @@ OvsCtUpdateEntry(OVS_CT_ENTRY* entry, BOOLEAN reply, UINT64 now) { +CT_UPDATE_RES status; switch (ipProto) { case IPPROTO_TCP: { @@ -320,32 +321,23 @@ OvsCtUpdateEntry(OVS_CT_ENTRY* entry, const TCPHdr *tcp; tcp = OvsGetTcp(nbl, l4Offset, ); if (!tcp) { -return CT_UPDATE_INVALID; +status = CT_UPDATE_INVALID; +break; } -return OvsConntrackUpdateTcpEntry(entry, tcp, nbl, reply, now); +status = OvsConntrackUpdateTcpEntry(entry, tcp, nbl, reply, now); +break; } case IPPROTO_ICMP: -return OvsConntrackUpdateIcmpEntry(entry, reply, now); +status = OvsConntrackUpdateIcmpEntry(entry, reply, now); +break; case IPPROTO_UDP: -return OvsConntrackUpdateOtherEntry(entry, reply, now); +status = OvsConntrackUpdateOtherEntry(entry, reply, now); +break; default: -return CT_UPDATE_INVALID; -} -} - -static __inline VOID -OvsCtEntryDelete(POVS_CT_ENTRY entry) -{ -if (entry == NULL) { -return; -} -if (entry->natInfo.natAction) { -OvsNatDeleteKey(>key); +status = CT_UPDATE_INVALID; +break; } -OvsPostCtEventEntry(entry, OVS_EVENT_CT_DELETE); -RemoveEntryList(>link); -OvsFreeMemoryWithTag(entry, OVS_CT_POOL_TAG); -ctTotalEntries--; +return status; } static __inline BOOLEAN @@ -356,6 +348,24 @@ OvsCtEntryExpired(POVS_CT_ENTRY entry) return entry->expiration < currentTime; } +static __inline VOID +OvsCtEntryDelete(POVS_CT_ENTRY entry, BOOLEAN forceDelete) +{ +if (entry == NULL) { +return; +} +if (forceDelete || OvsCtEntryExpired(entry)) { +if (entry->natInfo.natAction) { +OvsNatDeleteKey(>key); +} +OvsPostCtEventEntry(entry, OVS_EVENT_CT_DELETE); +RemoveEntryList(>link); +OvsFreeMemoryWithTag(e
[ovs-dev] [PATCH v1 0/3] datapath-windows: New lock implementation in conntrack
This patch series replaces existing one RW lock implemenation in conntrack with two RW locks in conntrack and one RW lock in NAT. Anand Kumar (3): datapath-windows: Refactor conntrack code. datapath-windows: Add a global level RW lock for NAT datapath-windows: Optimize conntrack lock implementation. datapath-windows/ovsext/Conntrack-nat.c | 17 +- datapath-windows/ovsext/Conntrack.c | 407 datapath-windows/ovsext/Conntrack.h | 7 +- 3 files changed, 274 insertions(+), 157 deletions(-) -- 2.9.3.windows.1 ___ dev mailing list d...@openvswitch.org https://mail.openvswitch.org/mailman/listinfo/ovs-dev