Hi Anand, Thanks a lot for implementing the series.
I saw some small nits while initializing and cleaning `ovsCtBucketLock`, otherwise it looks good. I think I'm comfortable to acknowledge the next revision. Thanks, Alin. > -----Original Message----- > From: [email protected] [mailto:ovs-dev- > [email protected]] On Behalf Of Anand Kumar > Sent: Tuesday, November 14, 2017 8:48 PM > To: [email protected] > Subject: [ovs-dev] [PATCH v1 3/3] datapath-windows: Optimize conntrack > lock implementation. > > Currently, there is one global lock for conntrack module, which protects > conntrack entries and conntrack table. All the NAT operations are performed > holding this lock. > > This becomes inefficient, as the number of conntrack entries grow. > With new implementation, we will have two PNDIS_RW_LOCK_EX locks in > conntrack. > > 1. ovsCtBucketLock - one rw lock per bucket of the conntrack table, which is > shared by all the ct entries that belong to the same bucket. > 2. lock - a rw lock in OVS_CT_ENTRY structure that protects the members of > conntrack entry. > > Also, OVS_CT_ENTRY structure will have a lock reference(bucketLockRef) to > the corresponding OvsCtBucketLock of conntrack table. > We need this reference to retrieve ovsCtBucketLock from ct entry for delete > operation. > > Signed-off-by: Anand Kumar <[email protected]> > --- > datapath-windows/ovsext/Conntrack-nat.c | 6 + > datapath-windows/ovsext/Conntrack.c | 231 ++++++++++++++++++++--- > --------- > datapath-windows/ovsext/Conntrack.h | 3 + > 3 files changed, 154 insertions(+), 86 deletions(-) > > diff --git a/datapath-windows/ovsext/Conntrack-nat.c b/datapath- > windows/ovsext/Conntrack-nat.c > index 7975770..33a86cf 100644 > --- a/datapath-windows/ovsext/Conntrack-nat.c > +++ b/datapath-windows/ovsext/Conntrack-nat.c > @@ -167,12 +167,16 @@ OvsNatPacket(OvsForwardingContext *ovsFwdCtx, > { > UINT32 natFlag; > const struct ct_endpoint* endpoint; > + LOCK_STATE_EX lockState; > + /* XXX Move conntrack locks out of NAT after implementing lock in NAT. > */ > + NdisAcquireRWLockRead(entry->lock, &lockState, 0); > /* When it is NAT, only entry->rev_key contains NATTED address; > When it is unNAT, only entry->key contains the UNNATTED address;*/ > const OVS_CT_KEY *ctKey = reverse ? &entry->key : &entry->rev_key; > BOOLEAN isSrcNat; > > if (!(natAction & (NAT_ACTION_SRC | NAT_ACTION_DST))) { > + NdisReleaseRWLock(entry->lock, &lockState); > return; > } > isSrcNat = (((natAction & NAT_ACTION_SRC) && !reverse) || @@ -202,6 > +206,7 @@ OvsNatPacket(OvsForwardingContext *ovsFwdCtx, > } > } else if (ctKey->dl_type == htons(ETH_TYPE_IPV6)){ > // XXX: IPv6 packet not supported yet. > + NdisReleaseRWLock(entry->lock, &lockState); > return; > } > if (natAction & (NAT_ACTION_SRC_PORT | NAT_ACTION_DST_PORT)) { > @@ -215,6 +220,7 @@ OvsNatPacket(OvsForwardingContext *ovsFwdCtx, > } > } > } > + NdisReleaseRWLock(entry->lock, &lockState); > } > > > diff --git a/datapath-windows/ovsext/Conntrack.c b/datapath- > windows/ovsext/Conntrack.c > index ba0dc88..f5e4996 100644 > --- a/datapath-windows/ovsext/Conntrack.c > +++ b/datapath-windows/ovsext/Conntrack.c > @@ -31,7 +31,7 @@ > KSTART_ROUTINE OvsConntrackEntryCleaner; static PLIST_ENTRY > ovsConntrackTable; static OVS_CT_THREAD_CTX ctThreadCtx; -static > PNDIS_RW_LOCK_EX ovsConntrackLockObj; > +static PNDIS_RW_LOCK_EX *ovsCtBucketLock; > static PNDIS_RW_LOCK_EX ovsCtNatLockObj; extern > POVS_SWITCH_CONTEXT gOvsSwitchContext; static LONG ctTotalEntries; > @@ -47,20 +47,13 @@ static __inline NDIS_STATUS OvsCtFlush(UINT16 > zone); NTSTATUS OvsInitConntrack(POVS_SWITCH_CONTEXT context) { > - NTSTATUS status; > + NTSTATUS status = STATUS_SUCCESS; > HANDLE threadHandle = NULL; > ctTotalEntries = 0; > > /* Init the sync-lock */ > - ovsConntrackLockObj = NdisAllocateRWLock(context->NdisFilterHandle); > - if (ovsConntrackLockObj == NULL) { > - return STATUS_INSUFFICIENT_RESOURCES; > - } > - > ovsCtNatLockObj = NdisAllocateRWLock(context->NdisFilterHandle); > if (ovsCtNatLockObj == NULL) { > - NdisFreeRWLock(ovsConntrackLockObj); > - ovsConntrackLockObj = NULL; > return STATUS_INSUFFICIENT_RESOURCES; > } > > @@ -69,15 +62,26 @@ OvsInitConntrack(POVS_SWITCH_CONTEXT context) > * CT_HASH_TABLE_SIZE, > OVS_CT_POOL_TAG); > if (ovsConntrackTable == NULL) { > - NdisFreeRWLock(ovsConntrackLockObj); > - ovsConntrackLockObj = NULL; > NdisFreeRWLock(ovsCtNatLockObj); > ovsCtNatLockObj = NULL; > return STATUS_INSUFFICIENT_RESOURCES; > } > > - for (int i = 0; i < CT_HASH_TABLE_SIZE; i++) { > + ovsCtBucketLock = > OvsAllocateMemoryWithTag(sizeof(PNDIS_RW_LOCK_EX) > + * CT_HASH_TABLE_SIZE, > + OVS_CT_POOL_TAG); > + if (ovsCtBucketLock == NULL) { > + status = STATUS_INSUFFICIENT_RESOURCES; > + goto freeTable; > + } > + > + for (UINT32 i = 0; i < CT_HASH_TABLE_SIZE; i++) { > InitializeListHead(&ovsConntrackTable[i]); > + ovsCtBucketLock[i] = NdisAllocateRWLock(context->NdisFilterHandle); > + if (ovsCtBucketLock[i] == NULL) { > + status = STATUS_INSUFFICIENT_RESOURCES; > + goto freeBucketLock; > + } > } > > /* Init CT Cleaner Thread */ > @@ -87,16 +91,7 @@ OvsInitConntrack(POVS_SWITCH_CONTEXT context) > &ctThreadCtx); > > if (status != STATUS_SUCCESS) { > - NdisFreeRWLock(ovsConntrackLockObj); > - ovsConntrackLockObj = NULL; > - > - NdisFreeRWLock(ovsCtNatLockObj); > - ovsCtNatLockObj = NULL; > - > - OvsFreeMemoryWithTag(ovsConntrackTable, OVS_CT_POOL_TAG); > - ovsConntrackTable = NULL; > - > - return status; > + goto freeBucketLock; > } > > ObReferenceObjectByHandle(threadHandle, SYNCHRONIZE, NULL, > KernelMode, @@ -104,13 +99,28 @@ > OvsInitConntrack(POVS_SWITCH_CONTEXT context) > ZwClose(threadHandle); > threadHandle = NULL; > > - status = OvsNatInit(); > + status = OvsNatInit(context); > > if (status != STATUS_SUCCESS) { > OvsCleanupConntrack(); > - return status; > + goto freeBucketLock; [Alin Serdean] Double free of ovsCtBucketLock can happen. > } > return STATUS_SUCCESS; > + > +freeBucketLock: > + for (UINT32 i = 0; i < CT_HASH_TABLE_SIZE; i++) { > + if (ovsCtBucketLock[i] != NULL) { [Alin Serdean] The list may not be initialized at this point. You can leave the loop that contains `ovsCtBucketLock[i] = NdisAllocateRWLock(context->NdisFilterHandle);` without a goto and check the status after. > + NdisFreeRWLock(ovsCtBucketLock[i]); > + } > + } > + OvsFreeMemoryWithTag(ovsCtBucketLock, OVS_CT_POOL_TAG); > + ovsCtBucketLock = NULL; > +freeTable: > + OvsFreeMemoryWithTag(ovsConntrackTable, OVS_CT_POOL_TAG); > + ovsConntrackTable = NULL; > + NdisFreeRWLock(ovsCtNatLockObj); > + ovsCtNatLockObj = NULL; > + return status; > } > _______________________________________________ dev mailing list [email protected] https://mail.openvswitch.org/mailman/listinfo/ovs-dev
