With this patch, OvsTcpSegmentNBL not only supports fragmenting NBL to TCP segments but also Ipv4 fragments.
To reflect the new changes, renamed function name from OvsTcpSegmentNBL to OvsFragmentNBL and created a wrapper for OvsTcpSegmentNBL. Signed-off-by: Anand Kumar <kumaran...@vmware.com> --- v5->v6: No Change v4->v5: Changed a variable mss to fragmentSize. v3->v4: No Change v2->v3: - Updated log message and function summary v1->v2: - Fix compile error for release mode. --- datapath-windows/ovsext/BufferMgmt.c | 194 +++++++++++++++++++++++++---------- datapath-windows/ovsext/BufferMgmt.h | 10 +- datapath-windows/ovsext/Geneve.c | 2 +- datapath-windows/ovsext/Gre.c | 2 +- datapath-windows/ovsext/Stt.c | 2 +- datapath-windows/ovsext/User.c | 2 +- datapath-windows/ovsext/Vxlan.c | 2 +- 7 files changed, 152 insertions(+), 62 deletions(-) diff --git a/datapath-windows/ovsext/BufferMgmt.c b/datapath-windows/ovsext/BufferMgmt.c index d99052d..0011c10 100644 --- a/datapath-windows/ovsext/BufferMgmt.c +++ b/datapath-windows/ovsext/BufferMgmt.c @@ -1084,6 +1084,31 @@ nblcopy_error: return NULL; } +NDIS_STATUS +GetIpHeaderInfo(PNET_BUFFER_LIST curNbl, + UINT32 *hdrSize) +{ + CHAR *ethBuf[sizeof(EthHdr)]; + EthHdr *eth; + IPHdr *ipHdr; + PNET_BUFFER curNb; + + curNb = NET_BUFFER_LIST_FIRST_NB(curNbl); + ASSERT(NET_BUFFER_NEXT_NB(curNb) == NULL); + + eth = (EthHdr *)NdisGetDataBuffer(curNb, ETH_HEADER_LENGTH, + (PVOID)ðBuf, 1, 0); + if (eth == NULL) { + return NDIS_STATUS_INVALID_PACKET; + } + ipHdr = (IPHdr *)((PCHAR)eth + ETH_HEADER_LENGTH); + if (ipHdr == NULL) { + return NDIS_STATUS_INVALID_PACKET; + } + *hdrSize = (UINT32)(ETH_HEADER_LENGTH + (ipHdr->ihl * 4)); + return NDIS_STATUS_SUCCESS; +} + /* * -------------------------------------------------------------------------- * GetSegmentHeaderInfo @@ -1113,15 +1138,16 @@ GetSegmentHeaderInfo(PNET_BUFFER_LIST nbl, /* * -------------------------------------------------------------------------- - * FixSegmentHeader + * FixPacketHeader * - * Fix IP length, IP checksum, TCP sequence number and TCP checksum - * in the segment. + * Fix IP length, Offset, IP checksum, TCP sequence number and TCP checksum + * in the netbuffer if applicable. * -------------------------------------------------------------------------- */ static NDIS_STATUS -FixSegmentHeader(PNET_BUFFER nb, UINT16 segmentSize, UINT32 seqNumber, - BOOLEAN lastPacket, UINT16 packetCounter) +FixPacketHeader(PNET_BUFFER nb, UINT16 segmentSize, UINT32 seqNumber, + BOOLEAN lastPacket, UINT16 packetCounter, UINT16 offset, + BOOLEAN isFragment) { EthHdr *dstEth = NULL; TCPHdr *dstTCP = NULL; @@ -1140,41 +1166,55 @@ FixSegmentHeader(PNET_BUFFER nb, UINT16 segmentSize, UINT32 seqNumber, case ETH_TYPE_IPV4_NBO: { IPHdr *dstIP = NULL; - - ASSERT((INT)MmGetMdlByteCount(mdl) - NET_BUFFER_CURRENT_MDL_OFFSET(nb) + if (!isFragment) { + ASSERT((INT)MmGetMdlByteCount(mdl) - NET_BUFFER_CURRENT_MDL_OFFSET(nb) >= sizeof(EthHdr) + sizeof(IPHdr) + sizeof(TCPHdr)); - dstIP = (IPHdr *)((PCHAR)dstEth + sizeof(*dstEth)); - dstTCP = (TCPHdr *)((PCHAR)dstIP + dstIP->ihl * 4); - ASSERT((INT)MmGetMdlByteCount(mdl) - NET_BUFFER_CURRENT_MDL_OFFSET(nb) + dstIP = (IPHdr *)((PCHAR)dstEth + sizeof(*dstEth)); + dstTCP = (TCPHdr *)((PCHAR)dstIP + dstIP->ihl * 4); + ASSERT((INT)MmGetMdlByteCount(mdl) - NET_BUFFER_CURRENT_MDL_OFFSET(nb) >= sizeof(EthHdr) + dstIP->ihl * 4 + TCP_HDR_LEN(dstTCP)); - /* Fix IP length and checksum */ - ASSERT(dstIP->protocol == IPPROTO_TCP); - dstIP->tot_len = htons(segmentSize + dstIP->ihl * 4 + TCP_HDR_LEN(dstTCP)); - dstIP->id += packetCounter; + /* Fix IP length and checksum */ + ASSERT(dstIP->protocol == IPPROTO_TCP); + dstIP->tot_len = htons(segmentSize + dstIP->ihl * 4 + TCP_HDR_LEN(dstTCP)); + dstIP->id += packetCounter; + dstTCP->seq = htonl(seqNumber); + + /* + * Set the TCP FIN and PSH bit only for the last packet + * More information can be found under: + * https://msdn.microsoft.com/en-us/library/windows/hardware/ff568840%28v=vs.85%29.aspx + */ + if (dstTCP->fin) { + dstTCP->fin = lastPacket; + } + if (dstTCP->psh) { + dstTCP->psh = lastPacket; + } + UINT16 csumLength = segmentSize + TCP_HDR_LEN(dstTCP); + dstTCP->check = IPPseudoChecksum(&dstIP->saddr, + &dstIP->daddr, + IPPROTO_TCP, + csumLength); + dstTCP->check = CalculateChecksumNB(nb, + csumLength, + sizeof(*dstEth) + dstIP->ihl * 4); + } else { + ASSERT((INT)MmGetMdlByteCount(mdl) - NET_BUFFER_CURRENT_MDL_OFFSET(nb) + >= sizeof(EthHdr) + sizeof(IPHdr)); + + dstIP = (IPHdr *)((PCHAR)dstEth + sizeof(*dstEth)); + ASSERT((INT)MmGetMdlByteCount(mdl) - NET_BUFFER_CURRENT_MDL_OFFSET(nb) + >= sizeof(EthHdr) + dstIP->ihl * 4); + dstIP->tot_len = htons(segmentSize + dstIP->ihl * 4); + if (lastPacket) { + dstIP->frag_off = htons(offset & IP_OFFSET); + } else { + dstIP->frag_off = htons((offset & IP_OFFSET) | IP_MF); + } + } dstIP->check = 0; dstIP->check = IPChecksum((UINT8 *)dstIP, dstIP->ihl * 4, 0); - dstTCP->seq = htonl(seqNumber); - - /* - * Set the TCP FIN and PSH bit only for the last packet - * More information can be found under: - * https://msdn.microsoft.com/en-us/library/windows/hardware/ff568840%28v=vs.85%29.aspx - */ - if (dstTCP->fin) { - dstTCP->fin = lastPacket; - } - if (dstTCP->psh) { - dstTCP->psh = lastPacket; - } - UINT16 csumLength = segmentSize + TCP_HDR_LEN(dstTCP); - dstTCP->check = IPPseudoChecksum(&dstIP->saddr, - &dstIP->daddr, - IPPROTO_TCP, - csumLength); - dstTCP->check = CalculateChecksumNB(nb, - csumLength, - sizeof(*dstEth) + dstIP->ihl * 4); break; } case ETH_TYPE_IPV6_NBO: @@ -1218,11 +1258,29 @@ FixSegmentHeader(PNET_BUFFER nb, UINT16 segmentSize, UINT32 seqNumber, return STATUS_SUCCESS; } + /* + * -------------------------------------------------------------------------- + * OvsTcpSegmentNBL -- + * Wrapper function to Fragment a given NBL based on MSS + * -------------------------------------------------------------------------- + */ +PNET_BUFFER_LIST +OvsTcpSegmentNBL(PVOID ovsContext, + PNET_BUFFER_LIST nbl, + POVS_PACKET_HDR_INFO hdrInfo, + UINT32 mss, + UINT32 headRoom, + BOOLEAN isIpFragment) +{ + return OvsFragmentNBL(ovsContext, nbl, hdrInfo, mss, headRoom, isIpFragment); +} + + /* * -------------------------------------------------------------------------- - * OvsTcpSegmentNBL -- + * OvsFragmentNBL -- * - * Segment TCP payload, and prepend each segment with ether/IP/TCP header. + * Fragment NBL payload, and prepend each segment with either/IP/TCP header. * Leave headRoom for additional encap. * * Please note, @@ -1235,24 +1293,25 @@ FixSegmentHeader(PNET_BUFFER nb, UINT16 segmentSize, UINT32 seqNumber, * -------------------------------------------------------------------------- */ PNET_BUFFER_LIST -OvsTcpSegmentNBL(PVOID ovsContext, - PNET_BUFFER_LIST nbl, - POVS_PACKET_HDR_INFO hdrInfo, - UINT32 mss, - UINT32 headRoom) +OvsFragmentNBL(PVOID ovsContext, + PNET_BUFFER_LIST nbl, + POVS_PACKET_HDR_INFO hdrInfo, + UINT32 fragmentSize, + UINT32 headRoom, + BOOLEAN isIpFragment) { POVS_SWITCH_CONTEXT context = (POVS_SWITCH_CONTEXT)ovsContext; #ifdef DBG POVS_NBL_POOL ovsPool = &context->ovsPool; #endif POVS_BUFFER_CONTEXT dstCtx, srcCtx; - UINT32 size, hdrSize, seqNumber; + UINT32 size, hdrSize, nblSize, seqNumber = 0; PNET_BUFFER_LIST newNbl; PNET_BUFFER nb, newNb; NDIS_STATUS status; UINT16 segmentSize; ULONG copiedSize; - UINT16 packetCounter = 0; + UINT16 offset = 0, packetCounter = 0; srcCtx = (POVS_BUFFER_CONTEXT)NET_BUFFER_LIST_CONTEXT_DATA_START(nbl); if (srcCtx == NULL || srcCtx->magic != OVS_CTX_MAGIC) { @@ -1264,18 +1323,28 @@ OvsTcpSegmentNBL(PVOID ovsContext, nb = NET_BUFFER_LIST_FIRST_NB(nbl); ASSERT(NET_BUFFER_NEXT_NB(nb) == NULL); - /* Figure out the segment header size */ - status = GetSegmentHeaderInfo(nbl, hdrInfo, &hdrSize, &seqNumber); + /* Figure out the header size */ + if (isIpFragment) { + status = GetIpHeaderInfo(nbl, &hdrSize); + } else { + status = GetSegmentHeaderInfo(nbl, hdrInfo, &hdrSize, &seqNumber); + } if (status != NDIS_STATUS_SUCCESS) { OVS_LOG_INFO("Cannot parse NBL header"); return NULL; } - + /* Get the NBL size. */ + if (isIpFragment) { + nblSize = fragmentSize - hdrSize; + } else { + nblSize = fragmentSize; + } size = NET_BUFFER_DATA_LENGTH(nb) - hdrSize; /* XXX add to ovsPool counters? */ - newNbl = NdisAllocateFragmentNetBufferList(nbl, NULL, - NULL, hdrSize, mss, hdrSize + headRoom , 0, 0); + newNbl = NdisAllocateFragmentNetBufferList(nbl, NULL, NULL, hdrSize, + nblSize, hdrSize + headRoom , + 0, 0); if (newNbl == NULL) { return NULL; } @@ -1283,7 +1352,7 @@ OvsTcpSegmentNBL(PVOID ovsContext, /* Now deal with TCP payload */ for (newNb = NET_BUFFER_LIST_FIRST_NB(newNbl); newNb != NULL; newNb = NET_BUFFER_NEXT_NB(newNb)) { - segmentSize = (size > mss ? mss : size) & 0xffff; + segmentSize = (size > nblSize ? nblSize : size) & 0xffff; if (headRoom) { NdisAdvanceNetBufferDataStart(newNb, headRoom, FALSE, NULL); } @@ -1295,17 +1364,21 @@ OvsTcpSegmentNBL(PVOID ovsContext, goto nblcopy_error; } - status = FixSegmentHeader(newNb, segmentSize, seqNumber, - NET_BUFFER_NEXT_NB(newNb) == NULL, - packetCounter); + status = FixPacketHeader(newNb, segmentSize, seqNumber, + NET_BUFFER_NEXT_NB(newNb) == NULL, + packetCounter, offset, isIpFragment); + if (status != NDIS_STATUS_SUCCESS) { goto nblcopy_error; } - /* Move on to the next segment */ + if (isIpFragment) { + offset += (segmentSize) / 8; + } else { + seqNumber += segmentSize; + } size -= segmentSize; - seqNumber += segmentSize; packetCounter++; } @@ -1319,6 +1392,15 @@ OvsTcpSegmentNBL(PVOID ovsContext, goto nbl_context_error; } + if (isIpFragment) { + /* Copy with Flag - NDIS_SWITCH_COPY_NBL_INFO_FLAGS_PRESERVE_DESTINATIONS. */ + status = context->NdisSwitchHandlers. + CopyNetBufferListInfo(context->ovsPool.ndisContext, newNbl, nbl, 1); + + if (status != NDIS_STATUS_SUCCESS) { + goto nbl_context_error; + } + } newNbl->ParentNetBufferList = nbl; /* Remember it's a fragment NBL so we can free it properly */ @@ -1340,7 +1422,7 @@ OvsTcpSegmentNBL(PVOID ovsContext, OvsDumpNetBufferList(newNbl); OvsDumpForwardingDetails(newNbl); #endif - OVS_LOG_TRACE("Segment nbl %p to newNbl: %p", nbl, newNbl); + OVS_LOG_TRACE("Fragmnet nbl %p to newNbl: %p", nbl, newNbl); return newNbl; nbl_context_error: diff --git a/datapath-windows/ovsext/BufferMgmt.h b/datapath-windows/ovsext/BufferMgmt.h index 77b2854..52fe532 100644 --- a/datapath-windows/ovsext/BufferMgmt.h +++ b/datapath-windows/ovsext/BufferMgmt.h @@ -116,7 +116,15 @@ PNET_BUFFER_LIST OvsTcpSegmentNBL(PVOID context, PNET_BUFFER_LIST nbl, POVS_PACKET_HDR_INFO hdrInfo, UINT32 MSS, - UINT32 headRoom); + UINT32 headRoom, + BOOLEAN isIpFragment); + +PNET_BUFFER_LIST OvsFragmentNBL(PVOID context, + PNET_BUFFER_LIST nbl, + POVS_PACKET_HDR_INFO hdrInfo, + UINT32 MSS, + UINT32 headRoom, + BOOLEAN isIpFragment); PNET_BUFFER_LIST OvsAllocateNBLFromBuffer(PVOID context, PVOID buffer, diff --git a/datapath-windows/ovsext/Geneve.c b/datapath-windows/ovsext/Geneve.c index 1938aaa..43374e2 100644 --- a/datapath-windows/ovsext/Geneve.c +++ b/datapath-windows/ovsext/Geneve.c @@ -118,7 +118,7 @@ NDIS_STATUS OvsEncapGeneve(POVS_VPORT_ENTRY vport, if (mss) { OVS_LOG_TRACE("l4Offset %d", layers->l4Offset); *newNbl = OvsTcpSegmentNBL(switchContext, curNbl, layers, - mss, headRoom); + mss, headRoom, FALSE); if (*newNbl == NULL) { OVS_LOG_ERROR("Unable to segment NBL"); return NDIS_STATUS_FAILURE; diff --git a/datapath-windows/ovsext/Gre.c b/datapath-windows/ovsext/Gre.c index c5da064..f095742 100644 --- a/datapath-windows/ovsext/Gre.c +++ b/datapath-windows/ovsext/Gre.c @@ -158,7 +158,7 @@ OvsDoEncapGre(POVS_VPORT_ENTRY vport, if (mss) { OVS_LOG_TRACE("l4Offset %d", layers->l4Offset); *newNbl = OvsTcpSegmentNBL(switchContext, curNbl, layers, - mss, headRoom); + mss, headRoom, FALSE); if (*newNbl == NULL) { OVS_LOG_ERROR("Unable to segment NBL"); return NDIS_STATUS_FAILURE; diff --git a/datapath-windows/ovsext/Stt.c b/datapath-windows/ovsext/Stt.c index 5aa8652..1f36835 100644 --- a/datapath-windows/ovsext/Stt.c +++ b/datapath-windows/ovsext/Stt.c @@ -185,7 +185,7 @@ OvsDoEncapStt(POVS_VPORT_ENTRY vport, if ((innerFrameLen > OVS_MAX_STT_PACKET_LENGTH) || (layers->l4Offset > OVS_MAX_STT_L4_OFFSET_LENGTH)) { *newNbl = OvsTcpSegmentNBL(switchContext, curNbl, layers, - mss - headRoom, headRoom); + mss - headRoom, headRoom, FALSE); if (*newNbl == NULL) { OVS_LOG_ERROR("Unable to segment NBL"); return NDIS_STATUS_FAILURE; diff --git a/datapath-windows/ovsext/User.c b/datapath-windows/ovsext/User.c index 3154640..7880220 100644 --- a/datapath-windows/ovsext/User.c +++ b/datapath-windows/ovsext/User.c @@ -787,7 +787,7 @@ OvsCreateAndAddPackets(PVOID userData, if (tsoInfo.LsoV1Transmit.MSS) { OVS_LOG_TRACE("l4Offset %d", hdrInfo->l4Offset); newNbl = OvsTcpSegmentNBL(switchContext, nbl, hdrInfo, - tsoInfo.LsoV1Transmit.MSS , 0); + tsoInfo.LsoV1Transmit.MSS , 0, FALSE); if (newNbl == NULL) { return NDIS_STATUS_FAILURE; } diff --git a/datapath-windows/ovsext/Vxlan.c b/datapath-windows/ovsext/Vxlan.c index 84c2f2f..427f31c 100644 --- a/datapath-windows/ovsext/Vxlan.c +++ b/datapath-windows/ovsext/Vxlan.c @@ -205,7 +205,7 @@ OvsDoEncapVxlan(POVS_VPORT_ENTRY vport, if (mss) { OVS_LOG_TRACE("l4Offset %d", layers->l4Offset); *newNbl = OvsTcpSegmentNBL(switchContext, curNbl, layers, - mss, headRoom); + mss, headRoom, FALSE); if (*newNbl == NULL) { OVS_LOG_ERROR("Unable to segment NBL"); return NDIS_STATUS_FAILURE; -- 2.9.3.windows.1 _______________________________________________ dev mailing list d...@openvswitch.org https://mail.openvswitch.org/mailman/listinfo/ovs-dev