Re: [ovs-dev] same tcp session encapsulated with different udp src port in kernel mode if packet has do ip_forward

2019-11-08 Thread ychen



I have tested this patch in linux with ovs version 2.8.2, and it seems worked.




>From fa24d308d40f37f890fead0b79ac1f0f7baa28ba Mon Sep 17 00:00:00 2001
From: hzchenyuefang 
Date: Sat, 9 Nov 2019 10:14:23 +0800
Subject: [PATCH 1/1] fix skb_hash problem when sending from internal port


first packet need come to userspace first and when execute in datapath,
skb->hash value is changed to 0 hence the packet's skb->hash value need
to recalculate when used.  This may happend in such conditions: send
packet from internal port, and then do vxlan encapsulting, outer udp
header source port is calculated by skb->hash(see function
udp_flow_src_port), so same tcp session packet may have different outer
udp source port, this may affect load blance.
---
 datapath-windows/ovsext/BufferMgmt.h  |  1 +
 datapath-windows/ovsext/DpInternal.h  |  1 +
 datapath-windows/ovsext/User.c| 11 ++-
 datapath/datapath.c   | 17 -
 datapath/datapath.h   |  0
 datapath/linux/compat/include/linux/openvswitch.h |  1 +
 lib/dpif-netlink.c|  7 ++-
 lib/dpif.h|  2 ++
 ofproto/ofproto-dpif-upcall.c | 11 ++-
 9 files changed, 47 insertions(+), 4 deletions(-)
 mode change 100644 => 100755 datapath-windows/ovsext/BufferMgmt.h
 mode change 100644 => 100755 datapath-windows/ovsext/DpInternal.h
 mode change 100644 => 100755 datapath-windows/ovsext/User.c
 mode change 100644 => 100755 datapath/datapath.c
 mode change 100644 => 100755 datapath/datapath.h
 mode change 100644 => 100755 lib/dpif-netlink.c
 mode change 100644 => 100755 lib/dpif.h
 mode change 100644 => 100755 ofproto/ofproto-dpif-upcall.c


diff --git a/datapath-windows/ovsext/BufferMgmt.h 
b/datapath-windows/ovsext/BufferMgmt.h
old mode 100644
new mode 100755
index dcf310a..34f50a1
--- a/datapath-windows/ovsext/BufferMgmt.h
+++ b/datapath-windows/ovsext/BufferMgmt.h
@@ -59,6 +59,7 @@ typedef union _OVS_BUFFER_CONTEXT {
 UINT32 dataOffsetDelta;
 };
 UINT16 mru;
+UINT32 skb_hash;
 };


 UINT64 value[MEM_ALIGN_SIZE(32) >> 3];
diff --git a/datapath-windows/ovsext/DpInternal.h 
b/datapath-windows/ovsext/DpInternal.h
old mode 100644
new mode 100755
index 3e351b7..8bb7110
--- a/datapath-windows/ovsext/DpInternal.h
+++ b/datapath-windows/ovsext/DpInternal.h
@@ -300,6 +300,7 @@ typedef struct OvsPacketExecute {
uint32_t dpNo;
uint32_t inPort;
uint16 mru;
+   uint32 skb_hash;
uint32_t packetLen;
uint32_t actionsLen;
PNL_MSG_HDR nlMsgHdr;
diff --git a/datapath-windows/ovsext/User.c b/datapath-windows/ovsext/User.c
old mode 100644
new mode 100755
index 4693a8b..4439379
--- a/datapath-windows/ovsext/User.c
+++ b/datapath-windows/ovsext/User.c
@@ -292,7 +292,8 @@ OvsNlExecuteCmdHandler(POVS_USER_PARAMS_CONTEXT 
usrParamsCtx,
 [OVS_PACKET_ATTR_USERDATA] = {.type = NL_A_UNSPEC, .optional = TRUE},
 [OVS_PACKET_ATTR_EGRESS_TUN_KEY] = {.type = NL_A_UNSPEC,
 .optional = TRUE},
-[OVS_PACKET_ATTR_MRU] = { .type = NL_A_U16, .optional = TRUE }
+[OVS_PACKET_ATTR_MRU] = { .type = NL_A_U16, .optional = TRUE },
+[OVS_PACKET_ATTR_SKB_HASH] = { .type = NL_A_U16, .optional = TRUE }
 };


 RtlZeroMemory(, sizeof(OvsPacketExecute));
@@ -394,6 +395,9 @@ _MapNlAttrToOvsPktExec(PNL_MSG_HDR nlMsgHdr, PNL_ATTR 
*nlAttrs,
 if (nlAttrs[OVS_PACKET_ATTR_MRU]) {
 execute->mru = NlAttrGetU16(nlAttrs[OVS_PACKET_ATTR_MRU]);
 }
+if (nlAttrs[OVS_PACKET_ATTR_SKB_HASH]){
+execute->skb_hash = NlAttrGetU32(nlAttrs[OVS_PACKET_ATTR_SKB_HASH]);
+}
 }


 NTSTATUS
@@ -1101,6 +1105,11 @@ OvsCreateQueueNlPacket(PVOID userData,
 goto fail;
 }
 }
+if (ctx->skb_hash) {
+if (!NlMsgPutTailU32(, OVS_PACKET_ATTR_SKB_HASH, 
(UINT32)ctx->skb_hash)){
+goto fail;
+}
+}


 /* XXX must send OVS_PACKET_ATTR_EGRESS_TUN_KEY if set by vswtchd */
 if (userData){
diff --git a/datapath/datapath.c b/datapath/datapath.c
old mode 100644
new mode 100755
index b565fc5..9559b4d
--- a/datapath/datapath.c
+++ b/datapath/datapath.c
@@ -287,7 +287,6 @@ void ovs_dp_process_packet(struct sk_buff *skb, struct 
sw_flow_key *key)
stats_counter = >n_missed;
goto out;
}
-
ovs_flow_stats_update(flow, key->tp.flags, skb);
sf_acts = rcu_dereference(flow->sf_acts);
ovs_execute_actions(dp, skb, sf_acts, key);
@@ -521,6 +520,14 @@ static int queue_userspace_packet(struct datapath *dp, 
struct sk_buff *skb,
pad_packet(dp, user_skb);
}


+if (skb->hash) {
+if (nla_put_u32(user_skb, OVS_PACKET_ATTR_SKB_HASH, skb->hash)) {
+err = -ENOBUFS;
+goto out;
+}
+pad_packet(dp, 

Re: [ovs-dev] same tcp session encapsulated with different udp src port in kernel mode if packet has do ip_forward

2019-11-05 Thread Tonghao Zhang
On Mon, Nov 4, 2019 at 7:44 PM ychen  wrote:
>
>
>
> we can easily reproduce this phenomenon by using tcp socket stream sending 
> from ovs internal port.
>
>
>
>
> At 2019-10-30 19:49:16, "ychen"  wrote:
>
> Hi,
>when we use docker to establish tcp session, we found that the packet 
> which must do upcall to userspace has different encapsulated udp source port
>with packet that only needs do datapath flow forwarding.
>
>
>After some code research and kprobe debug,  we found the following:
>1.  use udp_flow_src_port() to get the port
> so when both skb->l4_hash==0 and skb->sw_hash==0, 5 tuple data will 
> be used to calculate the skb->hash
> 2. when first packet of tcp session coming,  packet needs do upcall to 
> userspace, and then ovs_packet_cmd_execute() called
> new skb is allocated with both l4_hash and sw_hash set to 0
> 3. when none first packet of tcp sesion coming, function 
> ovs_dp_process_packet()->ovs_execute_actions() called,
> and this time original skb is reserved.
> when packet has do ip_forward(), kprobe debug prints skb->l4_hash=1, 
> sw_hash=0
> 4. we searched kernel code, and found such code:
>  skb_set_hash_from_sk(struct sk_buff *skb, struct sock *sk)
> {  if (sk->sk_txhash) {
> skb->l4_hash = 1;
> skb->hash = sk->sk_txhash;
> }
>}
>   static inline void sk_set_txhash(struct sock *sk)
>   {sk->sk_txhash = net_tx_rndhash();  ==>it is a random 
> value!!}
>5. so let's have a summary:
>when packet is processing only in datapath flow, skb->hash is random 
> value for the same tcp session?
>when packet needs processing first to userspace, than kernel space, 
> skb->hash is calculated by 5 tuple?
>
>Our testing enviroment:
>debian 9, kernel 4.9.65
>ovs version: 2.8.2
>
>
>Simple topo is like this:
>docker_eth0<---+
>   | veth ip_forward
>  
> +host_veth0<->port-eth(ovs-ineternal)
> host_veth0 and port-eth device stay in physical host.
>
>
>So can we treat skb->hash as a attribute, when send packet to userspace, 
> encode this attribute;
>and then do ovs_packet_cmd_execute(), retrieve the same hash value from 
> userspace?
>
>
>   another important tips:
>  if we send packets from qemu based tap device, vxlan source port is always 
> same for the same tcp session;
>  only when send packets from docker in which packets will do ip_forward, 
> vxlan source port may different for same tcp session.
Should be fixed. The patch will be sent.
>
>
>
>
>
>
> ___
> dev mailing list
> d...@openvswitch.org
> https://mail.openvswitch.org/mailman/listinfo/ovs-dev
___
dev mailing list
d...@openvswitch.org
https://mail.openvswitch.org/mailman/listinfo/ovs-dev


Re: [ovs-dev] same tcp session encapsulated with different udp src port in kernel mode if packet has do ip_forward

2019-11-04 Thread ychen



we can easily reproduce this phenomenon by using tcp socket stream sending from 
ovs internal port.




At 2019-10-30 19:49:16, "ychen"  wrote:

Hi, 
   when we use docker to establish tcp session, we found that the packet which 
must do upcall to userspace has different encapsulated udp source port 
   with packet that only needs do datapath flow forwarding.


   After some code research and kprobe debug,  we found the following:
   1.  use udp_flow_src_port() to get the port
so when both skb->l4_hash==0 and skb->sw_hash==0, 5 tuple data will be 
used to calculate the skb->hash
2. when first packet of tcp session coming,  packet needs do upcall to 
userspace, and then ovs_packet_cmd_execute() called
new skb is allocated with both l4_hash and sw_hash set to 0
3. when none first packet of tcp sesion coming, function 
ovs_dp_process_packet()->ovs_execute_actions() called,
and this time original skb is reserved. 
when packet has do ip_forward(), kprobe debug prints skb->l4_hash=1, 
sw_hash=0
4. we searched kernel code, and found such code:
 skb_set_hash_from_sk(struct sk_buff *skb, struct sock *sk)
{  if (sk->sk_txhash) {
skb->l4_hash = 1;
skb->hash = sk->sk_txhash;
}
   }
  static inline void sk_set_txhash(struct sock *sk)
  {sk->sk_txhash = net_tx_rndhash();  ==>it is a random value!!}
   5. so let's have a summary:
   when packet is processing only in datapath flow, skb->hash is random 
value for the same tcp session?
   when packet needs processing first to userspace, than kernel space, 
skb->hash is calculated by 5 tuple?

   Our testing enviroment:
   debian 9, kernel 4.9.65
   ovs version: 2.8.2


   Simple topo is like this:
   docker_eth0<---+
  | veth ip_forward
 
+host_veth0<->port-eth(ovs-ineternal)
host_veth0 and port-eth device stay in physical host.


   So can we treat skb->hash as a attribute, when send packet to userspace, 
encode this attribute; 
   and then do ovs_packet_cmd_execute(), retrieve the same hash value from 
userspace?


  another important tips:
 if we send packets from qemu based tap device, vxlan source port is always 
same for the same tcp session;
 only when send packets from docker in which packets will do ip_forward, vxlan 
source port may different for same tcp session.






 
___
dev mailing list
d...@openvswitch.org
https://mail.openvswitch.org/mailman/listinfo/ovs-dev


[ovs-dev] same tcp session encapsulated with different udp src port in kernel mode if packet has do ip_forward

2019-10-30 Thread ychen
Hi, 
   when we use docker to establish tcp session, we found that the packet which 
must do upcall to userspace has different encapsulated udp source port 
   with packet that only needs do datapath flow forwarding.


   After some code research and kprobe debug,  we found the following:
   1.  use udp_flow_src_port() to get the port
so when both skb->l4_hash==0 and skb->sw_hash==0, 5 tuple data will be 
used to calculate the skb->hash
2. when first packet of tcp session coming,  packet needs do upcall to 
userspace, and then ovs_packet_cmd_execute() called
new skb is allocated with both l4_hash and sw_hash set to 0
3. when none first packet of tcp sesion coming, function 
ovs_dp_process_packet()->ovs_execute_actions() called,
and this time original skb is reserved. 
when packet has do ip_forward(), kprobe debug prints skb->l4_hash=1, 
sw_hash=0
4. we searched kernel code, and found such code:
 skb_set_hash_from_sk(struct sk_buff *skb, struct sock *sk)
{  if (sk->sk_txhash) {
skb->l4_hash = 1;
skb->hash = sk->sk_txhash;
}
   }
  static inline void sk_set_txhash(struct sock *sk)
  {sk->sk_txhash = net_tx_rndhash();  ==>it is a random value!!}
   5. so let's have a summary:
   when packet is processing only in datapath flow, skb->hash is random 
value for the same tcp session?
   when packet needs processing first to userspace, than kernel space, 
skb->hash is calculated by 5 tuple?

   Our testing enviroment:
   debian 9, kernel 4.9.65
   ovs version: 2.8.2


   Simple topo is like this:
   docker_eth0<---+
  | veth ip_forward
 
+host_veth0<->port-eth(ovs-ineternal)
host_veth0 and port-eth device stay in physical host.


   So can we treat skb->hash as a attribute, when send packet to userspace, 
encode this attribute; 
   and then do ovs_packet_cmd_execute(), retrieve the same hash value from 
userspace?


  another important tips:
 if we send packets from qemu based tap device, vxlan source port is always 
same for the same tcp session;
 only when send packets from docker in which packets will do ip_forward, vxlan 
source port may different for same tcp session.
___
dev mailing list
d...@openvswitch.org
https://mail.openvswitch.org/mailman/listinfo/ovs-dev