From: Frank Guo <fra...@vmware.com> when traffic is sent through tunnel, it will drop untile MAC is learned.
I tested with installingw ovsext driver, normally after driver installed and all is setup, the first ping packet will drop, with this patch, the first ping packet is OK. Reported-at:openvswitch/ovs-issues#253 Signed-off-by: Frank Guo <fra...@vmware.com> --- datapath-windows/ovsext/Actions.c | 148 ++++++++++++++++++++++++++++++ datapath-windows/ovsext/Actions.h | 9 ++ datapath-windows/ovsext/Geneve.c | 64 +++++++------ datapath-windows/ovsext/Vxlan.c | 70 +++++++------- 4 files changed, 232 insertions(+), 59 deletions(-) diff --git a/datapath-windows/ovsext/Actions.c b/datapath-windows/ovsext/Actions.c index 20de4db4c..484a01710 100644 --- a/datapath-windows/ovsext/Actions.c +++ b/datapath-windows/ovsext/Actions.c @@ -620,6 +620,154 @@ OvsDoFlowLookupOutput(OvsForwardingContext* ovsFwdCtx) return status; } +VOID +OvsEncapPktCB(PNET_BUFFER_LIST nbl, + UINT32 inPort, + PVOID tunnelKey, + PVOID cbData1, + PVOID cbData2, + NTSTATUS status, + POVS_FWD_INFO fwdInfo) +{ + POVS_SWITCH_CONTEXT switchContext = (POVS_SWITCH_CONTEXT)cbData1; + OvsIPTunnelKey *tunKey = (OvsIPTunnelKey *)tunnelKey; + OvsForwardingContext ovsFwdCtx = { 0 }; + BOOLEAN isDispatchLevel = KeGetCurrentIrql() == DISPATCH_LEVEL; + LOCK_STATE_EX lockState, dpLockState; + PNET_BUFFER curNb; + char ipAddrStr[64] = { 0 }; + + UNREFERENCED_PARAMETER(inPort); + UNREFERENCED_PARAMETER(cbData2); + + if (fwdInfo->dstIphAddr.si_family == AF_INET) { + RtlIpv4AddressToStringA(&fwdInfo->dstIphAddr.Ipv4.sin_addr, + ipAddrStr); + } else if (fwdInfo->dstIphAddr.si_family == AF_INET6) { + RtlIpv6AddressToStringA(&fwdInfo->dstIphAddr.Ipv6.sin6_addr, + ipAddrStr); + } + OVS_LOG_INFO("Resolve IP %s MAC %02x:%02x:%02x:%02x:%02x:%02x " + "status %x, nbl %p, vport %p", ipAddrStr, + fwdInfo->dstMacAddr[0], fwdInfo->dstMacAddr[1], + fwdInfo->dstMacAddr[2], fwdInfo->dstMacAddr[3], + fwdInfo->dstMacAddr[4], fwdInfo->dstMacAddr[5], + status, nbl, fwdInfo->vport); + + if (!nbl) { + return; + } + + /* XXX - switchContext should not be released */ + if (isDispatchLevel) { + NdisAcquireRWLockRead(switchContext->dispatchLock, &lockState, + NDIS_RWL_AT_DISPATCH_LEVEL); + } else { + NdisAcquireRWLockRead(switchContext->dispatchLock, &lockState, 0); + } + if (fwdInfo->vport == NULL || status != STATUS_SUCCESS) { + goto unlock_free_out; + } + ASSERT(OvsIphAddrEquals(&tunKey->dst, &fwdInfo->dstIphAddr)); + ASSERT(OvsIphAddrEquals(&tunKey->src, &fwdInfo->srcIphAddr) || + OvsIphIsZero(&tunKey->src)); + + /* Update each header of NB */ + for (curNb = NET_BUFFER_LIST_FIRST_NB(nbl); curNb != NULL; + curNb = curNb->Next) { + EthHdr *ethHdr; + PMDL curMdl = NET_BUFFER_CURRENT_MDL(curNb); + PUINT8 bufferStart = (PUINT8)OvsGetMdlWithLowPriority(curMdl); + if (!bufferStart) { + status = NDIS_STATUS_RESOURCES; + OVS_LOG_ERROR("nbl %p nb %p buffer error", nbl, curNb); + goto unlock_free_out; + } + + bufferStart += NET_BUFFER_CURRENT_MDL_OFFSET(curNb); + if (NET_BUFFER_NEXT_NB(curNb)) { + OVS_LOG_TRACE("nbl %p nb %p length %u next %u", nbl, curNb, + NET_BUFFER_DATA_LENGTH(curNb), + NET_BUFFER_DATA_LENGTH(curNb->Next)); + } + + /* L2 header */ + ethHdr = (EthHdr *)bufferStart; + NdisMoveMemory(ethHdr->Destination, fwdInfo->dstMacAddr, + sizeof ethHdr->Destination); + NdisMoveMemory(ethHdr->Source, fwdInfo->srcMacAddr, + sizeof ethHdr->Source); + OVS_LOG_INFO("nbl %p nb %p flags %x", nbl, curNb, tunKey->flags); + if (tunKey->flags & OVS_TNL_F_CSUM) { + if (ethHdr->Type == htons(ETH_TYPE_IPV4)) { + IPHdr *ipHdr; + /* IP header */ + ipHdr = (IPHdr *)((PCHAR)ethHdr + sizeof *ethHdr); + if (ipHdr->saddr == 0) { + UDPHdr *udpHdr; + + ipHdr->saddr = fwdInfo->srcIphAddr.Ipv4.sin_addr.s_addr; + /* UDP header */ + udpHdr = (UDPHdr *)((PCHAR)ipHdr + sizeof *ipHdr); + OVS_LOG_INFO("nbl %p nb %p len %u src %d.%d.%d.%d, csum %u", + nbl, curNb, NET_BUFFER_DATA_LENGTH(curNb), + ipHdr->saddr & 0xff, (ipHdr->saddr >> 8) & 0xff, + (ipHdr->saddr >> 16) & 0xff, + (ipHdr->saddr >> 24) & 0xff, udpHdr->check); + + /* Update checksum */ + ASSERT(udpHdr->check); + udpHdr->check = ChecksumUpdate32(udpHdr->check, 0, ipHdr->saddr); + } + } else if (ethHdr->Type == htons(ETH_TYPE_IPV6)) { + IPv6Hdr *ipv6Hdr; + UINT32 *srcIpv6Addr; + /* IP header */ + ipv6Hdr = (IPv6Hdr *)((PCHAR)ethHdr + sizeof *ethHdr); + srcIpv6Addr = (UINT32 *)&ipv6Hdr->saddr; + if (srcIpv6Addr[0] == 0 && srcIpv6Addr[1] == 0 && + srcIpv6Addr[2] == 0 && srcIpv6Addr[3] == 0) { + UDPHdr *udpHdr; + UINT16 udpChksumLen = 0; + + RtlCopyMemory(&ipv6Hdr->saddr, + &fwdInfo->srcIphAddr.Ipv6.sin6_addr, + sizeof(ipv6Hdr->saddr)); + /* UDP header */ + udpHdr = (UDPHdr *)((PCHAR)ipv6Hdr + sizeof *ipv6Hdr); + RtlIpv6AddressToStringA(&fwdInfo->srcIphAddr.Ipv6.sin6_addr, + ipAddrStr); + OVS_LOG_INFO("nbl %p nb %p len %u src %s, csum %u", + nbl, curNb, NET_BUFFER_DATA_LENGTH(curNb), + ipAddrStr, udpHdr->check); + + udpChksumLen = (UINT16) NET_BUFFER_DATA_LENGTH(curNb) - + sizeof *ipv6Hdr - sizeof *ethHdr; + udpHdr->check = IPv6PseudoChecksum((UINT32*)&ipv6Hdr->saddr, + (UINT32*)&ipv6Hdr->daddr, + IPPROTO_UDP, udpChksumLen); + } + } + } + } + + OvsAcquireDatapathRead(&switchContext->datapath, &dpLockState, + isDispatchLevel); + OvsInitForwardingCtx(&ovsFwdCtx, switchContext, nbl, + fwdInfo->vport->portNo, 0, + NET_BUFFER_LIST_SWITCH_FORWARDING_DETAIL(nbl), + NULL, &ovsFwdCtx.layers, TRUE); + OvsDoFlowLookupOutput(&ovsFwdCtx); + OvsReleaseDatapath(&switchContext->datapath, &dpLockState); + NdisReleaseRWLock(switchContext->dispatchLock, &lockState); + + return; + +unlock_free_out: + OvsCompleteNBL(switchContext, nbl, TRUE); + NdisReleaseRWLock(switchContext->dispatchLock, &lockState); +} + /* * -------------------------------------------------------------------------- * OvsTunnelPortTx -- diff --git a/datapath-windows/ovsext/Actions.h b/datapath-windows/ovsext/Actions.h index 7329d0ed0..1604415bd 100644 --- a/datapath-windows/ovsext/Actions.h +++ b/datapath-windows/ovsext/Actions.h @@ -20,6 +20,7 @@ #include "Switch.h" #include "PacketIO.h" +typedef union _OVS_FWD_INFO *POVS_FWD_INFO; /* * There a lot of data that needs to be maintained while executing the pipeline @@ -138,4 +139,12 @@ OvsUpdateAddressAndPortForIpv6(OvsForwardingContext *ovsFwdCtx, struct in6_addr newAddr, UINT16 newPort, BOOLEAN isSource, BOOLEAN isTx); +VOID +OvsEncapPktCB(PNET_BUFFER_LIST nbl, + UINT32 inPort, + PVOID tunnelKey, + PVOID cbData1, + PVOID cbData2, + NTSTATUS status, + POVS_FWD_INFO fwdInfo); #endif /* __ACTIONS_H_ */ diff --git a/datapath-windows/ovsext/Geneve.c b/datapath-windows/ovsext/Geneve.c index e4e81c157..ec61c030b 100644 --- a/datapath-windows/ovsext/Geneve.c +++ b/datapath-windows/ovsext/Geneve.c @@ -16,6 +16,7 @@ #include "precomp.h" +#include "Actions.h" #include "Atomic.h" #include "Debug.h" #include "Flow.h" @@ -92,6 +93,7 @@ NDIS_STATUS OvsEncapGeneve(POVS_VPORT_ENTRY vport, UINT32 packetLength; ULONG mss = 0; NDIS_TCP_IP_CHECKSUM_NET_BUFFER_LIST_INFO csumInfo; + BOOLEAN firstPkt = FALSE; if (tunKey->dst.si_family == AF_INET) { headRoom = OvsGetGeneveTunHdrMinSize() + tunKey->tunOptLen; @@ -101,19 +103,18 @@ NDIS_STATUS OvsEncapGeneve(POVS_VPORT_ENTRY vport, } status = OvsLookupIPhFwdInfo(tunKey->src, tunKey->dst, &fwdInfo); - if (status != STATUS_SUCCESS) { - OvsFwdIPHelperRequest(NULL, 0, tunKey, NULL, NULL, NULL); - // return NDIS_STATUS_PENDING; - /* - * XXX: Don't know if the completionList will make any sense when - * accessed in the callback. Make sure the caveats are known. - * - * XXX: This code will work once we are able to grab locks in the - * callback. - */ - return NDIS_STATUS_FAILURE; + /* + * Only support the first packet, if more packets are comming before + * FwdInfo is learned, drop them. + */ + if (status == STATUS_NOT_FOUND) { + firstPkt = TRUE; + } else if (fwdInfo.vport == NULL) { + return NDIS_STATUS_PENDING; + } else { + RtlCopyMemory(switchFwdInfo->value, fwdInfo.value, + sizeof fwdInfo.value); } - RtlCopyMemory(switchFwdInfo->value, fwdInfo.value, sizeof fwdInfo.value); curNb = NET_BUFFER_LIST_FIRST_NB(curNbl); packetLength = NET_BUFFER_DATA_LENGTH(curNb); @@ -141,8 +142,13 @@ NDIS_STATUS OvsEncapGeneve(POVS_VPORT_ENTRY vport, /* If we didn't split the packet above, make a copy now */ if (*newNbl == NULL) { - *newNbl = OvsPartialCopyNBL(switchContext, curNbl, 0, headRoom, - FALSE /*NBL info*/); + if (firstPkt == TRUE) { + *newNbl = OvsFullCopyNBL(switchContext, curNbl, headRoom, + FALSE /*NBL info*/); + } else { + *newNbl = OvsPartialCopyNBL(switchContext, curNbl, 0, headRoom, + FALSE /*NBL info*/); + } if (*newNbl == NULL) { OVS_LOG_ERROR("Unable to copy NBL"); return NDIS_STATUS_FAILURE; @@ -180,11 +186,12 @@ NDIS_STATUS OvsEncapGeneve(POVS_VPORT_ENTRY vport, /* L2 header */ ethHdr = (EthHdr *)bufferStart; - NdisMoveMemory(ethHdr->Destination, fwdInfo.dstMacAddr, - sizeof ethHdr->Destination); - NdisMoveMemory(ethHdr->Source, fwdInfo.srcMacAddr, - sizeof ethHdr->Source); - + if (firstPkt == FALSE) { + NdisMoveMemory(ethHdr->Destination, fwdInfo.dstMacAddr, + sizeof ethHdr->Destination); + NdisMoveMemory(ethHdr->Source, fwdInfo.srcMacAddr, + sizeof ethHdr->Source); + } if (tunKey->dst.si_family == AF_INET) { ethHdr->Type = htons(ETH_TYPE_IPV4); } else if (tunKey->dst.si_family == AF_INET6) { @@ -205,10 +212,8 @@ NDIS_STATUS OvsEncapGeneve(POVS_VPORT_ENTRY vport, IP_DF_NBO : 0; ipHdr->ttl = tunKey->ttl ? tunKey->ttl : GENEVE_DEFAULT_TTL; ipHdr->protocol = IPPROTO_UDP; - ASSERT(OvsIphAddrEquals(&tunKey->dst, &fwdInfo.dstIphAddr)); - ASSERT(OvsIphAddrEquals(&tunKey->src, &fwdInfo.srcIphAddr) || OvsIphIsZero(&tunKey->src)); - ipHdr->saddr = fwdInfo.srcIphAddr.Ipv4.sin_addr.s_addr; - ipHdr->daddr = fwdInfo.dstIphAddr.Ipv4.sin_addr.s_addr; + ipHdr->saddr = tunKey->src.Ipv4.sin_addr.s_addr; + ipHdr->daddr = tunKey->dst.Ipv4.sin_addr.s_addr; ipHdr->check = 0; } else if (tunKey->dst.si_family == AF_INET6) { /* IPv6 header */ @@ -222,11 +227,9 @@ NDIS_STATUS OvsEncapGeneve(POVS_VPORT_ENTRY vport, ipv6Hdr->payload_len = htons(NET_BUFFER_DATA_LENGTH(curNb) - sizeof *ethHdr - sizeof *ipv6Hdr); ipv6Hdr->hop_limit = tunKey->ttl ? tunKey->ttl : GENEVE_DEFAULT_TTL; ipv6Hdr->nexthdr = IPPROTO_UDP; - ASSERT(OvsIphAddrEquals(&(tunKey->dst), &(fwdInfo.dstIphAddr))); - ASSERT(OvsIphAddrEquals(&(tunKey->src), &(fwdInfo.srcIphAddr)) || OvsIphIsZero(&(tunKey->src))); - RtlCopyMemory(&ipv6Hdr->saddr, &fwdInfo.srcIphAddr.Ipv6.sin6_addr, + RtlCopyMemory(&ipv6Hdr->saddr, &tunKey->src.Ipv6.sin6_addr, sizeof(ipv6Hdr->saddr)); - RtlCopyMemory(&ipv6Hdr->daddr, &fwdInfo.dstIphAddr.Ipv6.sin6_addr, + RtlCopyMemory(&ipv6Hdr->daddr, &tunKey->dst.Ipv6.sin6_addr, sizeof(ipv6Hdr->daddr)); } @@ -294,7 +297,12 @@ NDIS_STATUS OvsEncapGeneve(POVS_VPORT_ENTRY vport, NET_BUFFER_LIST_INFO(curNbl, TcpIpChecksumNetBufferListInfo) = csumInfo.Value; } - + if (firstPkt == TRUE) { + OvsFwdIPHelperRequest(*newNbl, 0, tunKey, OvsEncapPktCB, + switchContext, NULL); + *newNbl = NULL; + return NDIS_STATUS_PENDING; + } return STATUS_SUCCESS; ret_error: diff --git a/datapath-windows/ovsext/Vxlan.c b/datapath-windows/ovsext/Vxlan.c index b268e7de2..c2f40a81e 100644 --- a/datapath-windows/ovsext/Vxlan.c +++ b/datapath-windows/ovsext/Vxlan.c @@ -16,6 +16,7 @@ #include "precomp.h" +#include "Actions.h" #include "Atomic.h" #include "Debug.h" #include "Flow.h" @@ -177,7 +178,8 @@ OvsDoEncapVxlan(POVS_VPORT_ENTRY vport, POVS_FWD_INFO fwdInfo, POVS_PACKET_HDR_INFO layers, POVS_SWITCH_CONTEXT switchContext, - PNET_BUFFER_LIST *newNbl) + PNET_BUFFER_LIST *newNbl, + BOOLEAN firstPkt) { NDIS_STATUS status; PNET_BUFFER curNb; @@ -194,9 +196,6 @@ OvsDoEncapVxlan(POVS_VPORT_ENTRY vport, UINT32 headRoom = OvsGetVxlanTunHdrSize(fwdInfo->dstIphAddr.si_family == AF_INET ? TRUE : FALSE); - ASSERT(OvsIphAddrEquals(&tunKey->dst, &fwdInfo->dstIphAddr)); - ASSERT(OvsIphAddrEquals(&tunKey->src, &fwdInfo->srcIphAddr) || - OvsIphIsZero(&tunKey->src)); curNb = NET_BUFFER_LIST_FIRST_NB(curNbl); packetLength = NET_BUFFER_DATA_LENGTH(curNb); @@ -224,8 +223,13 @@ OvsDoEncapVxlan(POVS_VPORT_ENTRY vport, /* If we didn't split the packet above, make a copy now */ if (*newNbl == NULL) { - *newNbl = OvsPartialCopyNBL(switchContext, curNbl, 0, headRoom, - FALSE /*NBL info*/); + if (firstPkt == TRUE) { + *newNbl = OvsFullCopyNBL(switchContext, curNbl, headRoom, + FALSE /*NBL info*/); + } else { + *newNbl = OvsPartialCopyNBL(switchContext, curNbl, 0, headRoom, + FALSE /*NBL info*/); + } if (*newNbl == NULL) { OVS_LOG_ERROR("Unable to copy NBL"); return NDIS_STATUS_FAILURE; @@ -263,14 +267,16 @@ OvsDoEncapVxlan(POVS_VPORT_ENTRY vport, /* L2 header */ ethHdr = (EthHdr *)bufferStart; - NdisMoveMemory(ethHdr->Destination, fwdInfo->dstMacAddr, - sizeof ethHdr->Destination); - NdisMoveMemory(ethHdr->Source, fwdInfo->srcMacAddr, - sizeof ethHdr->Source); + if (firstPkt == FALSE) { + NdisMoveMemory(ethHdr->Destination, fwdInfo->dstMacAddr, + sizeof ethHdr->Destination); + NdisMoveMemory(ethHdr->Source, fwdInfo->srcMacAddr, + sizeof ethHdr->Source); + } ethHdr->Type = htons(ETH_TYPE_IPV4); /* IP header */ - if (fwdInfo->dstIphAddr.si_family == AF_INET) { + if (tunKey->dst.si_family == AF_INET) { ipHdr = (IPHdr *)((PCHAR)ethHdr + sizeof *ethHdr); ipHdr->ihl = sizeof *ipHdr / 4; @@ -283,12 +289,9 @@ OvsDoEncapVxlan(POVS_VPORT_ENTRY vport, IP_DF_NBO : 0; ipHdr->ttl = tunKey->ttl ? tunKey->ttl : VXLAN_DEFAULT_TTL; ipHdr->protocol = IPPROTO_UDP; - ASSERT(OvsIphAddrEquals(&tunKey->dst, &fwdInfo->dstIphAddr)); - ASSERT(OvsIphAddrEquals(&tunKey->src, &fwdInfo->srcIphAddr) || - OvsIphIsZero(&tunKey->src)); - ipHdr->saddr = fwdInfo->srcIphAddr.Ipv4.sin_addr.s_addr; - ipHdr->daddr = fwdInfo->dstIphAddr.Ipv4.sin_addr.s_addr; + ipHdr->saddr = tunKey->src.Ipv4.sin_addr.s_addr; + ipHdr->daddr = tunKey->dst.Ipv4.sin_addr.s_addr; ipHdr->check = 0; @@ -323,7 +326,7 @@ OvsDoEncapVxlan(POVS_VPORT_ENTRY vport, } csumInfo.Value = 0; - if (fwdInfo->dstIphAddr.si_family == AF_INET) { + if (tunKey->dst.si_family == AF_INET) { csumInfo.Transmit.IpHeaderChecksum = 1; csumInfo.Transmit.IsIPv4 = 1; } else { @@ -334,7 +337,12 @@ OvsDoEncapVxlan(POVS_VPORT_ENTRY vport, } NET_BUFFER_LIST_INFO(curNbl, TcpIpChecksumNetBufferListInfo) = csumInfo.Value; - + if (firstPkt == TRUE) { + OvsFwdIPHelperRequest(*newNbl, 0, tunKey, OvsEncapPktCB, + switchContext, NULL); + *newNbl = NULL; + return NDIS_STATUS_PENDING; + } return STATUS_SUCCESS; @@ -363,6 +371,7 @@ OvsEncapVxlan(POVS_VPORT_ENTRY vport, { NTSTATUS status; OVS_FWD_INFO fwdInfo; + BOOLEAN firstPkt = FALSE; if (tunKey->dst.si_family != AF_INET) { /*V6 tunnel support will be supported later*/ @@ -370,22 +379,21 @@ OvsEncapVxlan(POVS_VPORT_ENTRY vport, } status = OvsLookupIPhFwdInfo(tunKey->src, tunKey->dst, &fwdInfo); - if (status != STATUS_SUCCESS) { - OvsFwdIPHelperRequest(NULL, 0, tunKey, NULL, NULL, NULL); - /* - * XXX: Don't know if the completionList will make any sense when - * accessed in the callback. Make sure the caveats are known. - * - * XXX: This code will work once we are able to grab locks in the - * callback. - */ - return NDIS_STATUS_FAILURE; + /* + * Only support the first packet, if more packets are comming before + * FwdInfo is learned, drop them. + */ + if (status == STATUS_NOT_FOUND) { + firstPkt = TRUE; + } else if (fwdInfo.vport == NULL) { + return NDIS_STATUS_PENDING; + } else { + RtlCopyMemory(switchFwdInfo->value, fwdInfo.value, + sizeof fwdInfo.value); } - RtlCopyMemory(switchFwdInfo->value, fwdInfo.value, sizeof fwdInfo.value); - return OvsDoEncapVxlan(vport, curNbl, tunKey, &fwdInfo, layers, - switchContext, newNbl); + switchContext, newNbl, firstPkt); } -- 2.25.1 _______________________________________________ dev mailing list d...@openvswitch.org https://mail.openvswitch.org/mailman/listinfo/ovs-dev