The reason why HOST-2 got DUP ICMP reply is HOST-1 got DUP ICMP request. The reason HOST-1 got DUP ICMP request is two tnl_pop(2) in actions in megaflow-2.
So I have only 1 question: Why megaflow-2 on HOST-1 has two tnl_pop(2) in actions ? ---- Simon Jones Simon Jones <batmanu...@gmail.com> 于2023年6月9日周五 11:45写道: > Hi all, > > I'm using OVS-DPDK of version 2.17.1 . > > ** 1. This is my vxlan topology ** > > ``` > ### HOST-1, use ovs-dpdk > > > HOST-1 > ------ > > br-int > | > --------- (br-int) > | | > vxlan1 pf0hpf > > > br-phy(192.168.10.20) > | > -------- (br-phy) > | > p0 > > [root@localhost ~]# ovs-vsctl show > 62001a07-219d-48d8-ac1d-fb378a3ad231 > Bridge br-int > datapath_type: netdev > Port vxlan1 > Interface vxlan1 > type: vxlan > options: {key=flow, remote_ip=flow} > Port pf0hpf > Interface pf0hpf > type: dpdk > options: {dpdk-devargs="00:0a.0,representor=[65535]"} > Port br-int > Interface br-int > type: internal > Bridge br-phy > datapath_type: netdev > Port p0 > Interface p0 > type: dpdk > options: {dpdk-devargs="0000:00:0a.0"} > Port br-phy > Interface br-phy > type: internal > ovs_version: "2.17.2" > > [root@localhost ~]# ip a > 21: br-phy: <BROADCAST,MULTICAST,PROMISC,UP,LOWER_UP> mtu 1500 qdisc > fq_codel state UP group default qlen 1000 > link/ether da:99:52:a3:c0:4b brd ff:ff:ff:ff:ff:ff > inet 192.168.10.20/24 scope global br-phy > valid_lft forever preferred_lft forever > inet6 fe80::d899:52ff:fea3:c04b/64 scope link > valid_lft forever preferred_lft forever > 22: br-int: <BROADCAST,MULTICAST,PROMISC> mtu 1500 qdisc noop state DOWN > group default qlen 1000 > link/ether 2e:98:7d:95:01:44 brd ff:ff:ff:ff:ff:ff > > [root@localhost ~]# ovs-ofctl dump-flows br-int > cookie=0x0, duration=85007.416s, table=0, n_packets=16375, > n_bytes=1359974, in_port=pf0hpf > actions=set_tunnel:0x64,load:0xc0a80a14->NXM_NX_TUN_IPV4_SRC[],load:0xc0a80a0a->NXM_NX_TUN_IPV4_DST[],output:vxlan1 > cookie=0x0, duration=68310.837s, table=0, n_packets=12694, > n_bytes=1205960, actions=NORMAL > [root@localhost ~]# ovs-ofctl dump-flows br-phy > cookie=0x0, duration=85111.116s, table=0, n_packets=17693, > n_bytes=2374117, in_port=p0 actions=LOCAL > cookie=0x0, duration=146898.807s, table=0, n_packets=17110, > n_bytes=2216776, priority=0 actions=NORMAL > > > ### HOST-2, use ovs kernel > > root@gyw:~# ovs-vsctl show > 65c74aa0-a9f4-4009-bc2d-cb06613dfd43 > Bridge br-phy > Port br-phy > Interface br-phy > type: internal > Port enp7s0 > Interface enp7s0 > Bridge br-int > Port vxlan1 > Interface vxlan1 > type: vxlan > options: {key="100", local_ip="192.168.10.10", > remote_ip="192.168.10.20"} > Port veth0 > Interface veth0 > Port br-int > Interface br-int > type: internal > ovs_version: "2.17.5" > > root@gyw:~# ovs-ofctl dump-flows br-phy > cookie=0x0, duration=2.250s, table=0, n_packets=0, n_bytes=0, > in_port=enp7s0 actions=LOCAL > cookie=0x0, duration=69739.938s, table=0, n_packets=13632, > n_bytes=1904248, priority=0 actions=NORMAL > root@gyw:~# ovs-ofctl dump-flows br-int > cookie=0x0, duration=69709.632s, table=0, n_packets=25874, > n_bytes=2427556, actions=NORMAL > cookie=0x0, duration=36.920s, table=0, n_packets=0, n_bytes=0, > in_port=veth0 > actions=set_tunnel:0x64,load:0xc0a80a0a->NXM_NX_TUN_IPV4_SRC[],load:0xc0a80a14->NXM_NX_TUN_IPV4_DST[],output:vxlan1 > cookie=0x0, duration=16.354s, table=0, n_packets=0, n_bytes=0, > in_port=vxlan1 actions=output:veth0 > > > ### start ping > > ping from namespace of HOST-2, and this namespace is connect to veth0 by > veth-pair NIC, to provide a simple description, you could think I send ARP > and ICMP request into veth0. > > Then these packets is vxlan encap and send to HOST-1. > > Then packets is decap by HOST-1 and send to pf0hpf at last. > > The pf0hpf is connect to a namespace, and its kernel stack response ARP > and ICMP request. > > > ### megaflow of HOST-1 > > [root@localhost ~]# ovs-appctl dpctl/dump-flows > flow-dump from the main thread: > recirc_id(0),in_port(5),packet_type(ns=0,id=0),eth(src=da:99:52:a3:c0:4b,dst=58:cf:eb:8e:0d:01),eth_type(0x0806), > packets:0, bytes:0, offload_packets:0, offload_bytes:0, used:never, > actions:4 > flow-dump from pmd on cpu core: 11 > tunnel(tun_id=0x64,src=192.168.10.10,dst=192.168.10.20,flags(-df-csum+key)),recirc_id(0),in_port(2),packet_type(ns=0,id=0),eth(src=ba:dc:f5:5c:dc:77,dst=82:4f:d9:d2:8a:65),eth_type(0x0800),ipv4(src=1.1.1.2,dst=1.1.1.1,proto=1,frag=no), > packets:157, bytes:15386, offload_packets:0, offload_bytes:0, used:0.052s, > actions:3,1 > recirc_id(0),in_port(4),packet_type(ns=0,id=0),eth(src=58:cf:eb:8e:0d:01,dst=da:99:52:a3:c0:4b),eth_type(0x0800),ipv4(src=192.168.10.10,dst=192.168.10.20,proto=17,frag=no),udp(src=49091,dst=4789), > packets:157, bytes:23236, offload_packets:0, offload_bytes:0, used:0.052s, > actions:tnl_pop(2) > recirc_id(0),in_port(3),packet_type(ns=0,id=0),eth(src=82:4f:d9:d2:8a:65,dst=ba:dc:f5:5c:dc:77),eth_type(0x0800),ipv4(src=1.1.1.1,dst=1.1.1.2,proto=1,tos=0/0x3,frag=no), > packets:157, bytes:15386, offload_packets:0, offload_bytes:0, used:0.052s, > actions:clone(tnl_push(tnl_port(2),header(size=50,type=4,eth(dst=58:cf:eb:8e:0d:01,src=da:99:52:a3:c0:4b,dl_type=0x0800),ipv4(src=192.168.10.20,dst=192.168.10.10,proto=17,tos=0,ttl=64,frag=0x4000),udp(src=44687,dst=4789,csum=0x0),vxlan(flags=0x8000000,vni=0x64)),out_port(5)),4) > recirc_id(0),in_port(4),packet_type(ns=0,id=0),eth(src=58:cf:eb:8e:0d:01,dst=da:99:52:a3:c0:4b),eth_type(0x0806),arp(sip=192.168.10.10,tip=192.168.10.20,op=1), > packets:0, bytes:0, offload_packets:0, offload_bytes:0, used:never, > actions:5 > > [root@localhost ~]# ovs-appctl dpctl/show > netdev@ovs-netdev: > lookups: hit:7976 missed:535 lost:1 > flows: 4 > port 0: ovs-netdev (tap) > port 1: br-int (tap) > port 2: vxlan_sys_4789 (vxlan: packet_type=ptap) > port 3: pf0hpf (dpdk: configured_rx_queues=1, > configured_rxq_descriptors=2048, configured_tx_queues=2, > configured_txq_descriptors=2048, dpdk-vf-mac=00:00:00:00:00:00, > lsc_interrupt_mode=false, mtu=1500, requested_rx_queues=1, > requested_rxq_descriptors=2048, requested_tx_queues=2, > requested_txq_descriptors=2048, rx_csum_offload=false, tx_tso_offload=false) > port 4: p0 (dpdk: configured_rx_queues=1, > configured_rxq_descriptors=2048, configured_tx_queues=2, > configured_txq_descriptors=2048, lsc_interrupt_mode=false, mtu=1500, > requested_rx_queues=1, requested_rxq_descriptors=2048, > requested_tx_queues=2, requested_txq_descriptors=2048, > rx_csum_offload=false, tx_tso_offload=false) > port 5: br-phy (tap) > ``` > > > ** 2. Then I start ovs-tcpdump on p0 port on HOST-1 ** > > ``` > [root@localhost ~]# ovs-tcpdump -i p0 > dropped privs to tcpdump > tcpdump: verbose output suppressed, use -v[v]... for full protocol decode > listening on mip0, link-type EN10MB (Ethernet), snapshot length 262144 > bytes > 16:09:39.437730 IP localhost.localdomain.44687 > 192.168.10.10.vxlan: > VXLAN, flags [I] (0x08), vni 100 > IP one.one.one.one > 1.1.1.2: ICMP echo reply, id 37262, seq 1231, length > 64 > 16:09:39.438262 IP localhost.localdomain.44687 > 192.168.10.10.vxlan: > VXLAN, flags [I] (0x08), vni 100 > IP one.one.one.one > 1.1.1.2: ICMP echo reply, id 37262, seq 1231, length > 64 > 16:09:40.438761 IP localhost.localdomain.44687 > 192.168.10.10.vxlan: > VXLAN, flags [I] (0x08), vni 100 > IP one.one.one.one > 1.1.1.2: ICMP echo reply, id 37262, seq 1232, length > 64 > > ### topology changes, add mip0 on br-phy, this is reasonable as > ovs-tcpdump script do this. > > [root@localhost ~]# ip a > 33: mip0: <BROADCAST,NOARP,PROMISC,UP,LOWER_UP> mtu 1500 qdisc noqueue > state UNKNOWN group default qlen 1000 > link/ether 9e:5f:ca:f4:18:52 brd ff:ff:ff:ff:ff:ff > inet6 fe80::9c5f:caff:fef4:1852/64 scope link > valid_lft forever preferred_lft forever > [root@localhost ~]# ovs-vsctl show > 62001a07-219d-48d8-ac1d-fb378a3ad231 > Bridge br-int > datapath_type: netdev > Port vxlan1 > Interface vxlan1 > type: vxlan > options: {key=flow, remote_ip=flow} > Port pf0hpf > Interface pf0hpf > type: dpdk > options: {dpdk-devargs="00:0a.0,representor=[65535]"} > Port br-int > Interface br-int > type: internal > Bridge br-phy > datapath_type: netdev > Port p0 > Interface p0 > type: dpdk > options: {dpdk-devargs="0000:00:0a.0"} > Port mip0 > Interface mip0 > Port br-phy > Interface br-phy > type: internal > ovs_version: "2.17.2" > > ### megaflow changes > > [root@localhost ~]# ovs-appctl dpctl/dump-flows > flow-dump from pmd on cpu core: 11 > tunnel(tun_id=0x64,src=192.168.10.10,dst=192.168.10.20,flags(-df-csum+key)),recirc_id(0),in_port(2),packet_type(ns=0,id=0),eth(src=ba:dc:f5:5c:dc:77,dst=82:4f:d9:d2:8a:65),eth_type(0x0800),ipv4(src=1.1.1.2,dst=1.1.1.1,proto=1,frag=no), > packets:418, bytes:40964, offload_packets:0, offload_bytes:0, used:0.918s, > actions:3,1 > recirc_id(0),in_port(4),packet_type(ns=0,id=0),eth(src=02:42:ac:11:00:02,dst=02:42:1a:42:a5:68),eth_type(0x0800),ipv4(src=172.17.0.2,dst=114.114.114.114,proto=17,frag=no),udp(src=34673,dst=53), > packets:0, bytes:0, offload_packets:0, offload_bytes:0, used:never, > actions:6,5 > recirc_id(0),in_port(4),packet_type(ns=0,id=0),eth(src=58:cf:eb:8e:0d:01,dst=da:99:52:a3:c0:4b),eth_type(0x0800),ipv4(src=192.168.10.10,dst=192.168.10.20,proto=17,frag=no),udp(src=49091,dst=4789), > packets:416, bytes:61568, offload_packets:0, offload_bytes:0, used:0.918s, > actions:tnl_pop(2),tnl_pop(2) > recirc_id(0),in_port(3),packet_type(ns=0,id=0),eth(src=82:4f:d9:d2:8a:65,dst=ba:dc:f5:5c:dc:77),eth_type(0x0800),ipv4(src=1.1.1.1,dst=1.1.1.2,proto=1,tos=0/0x3,frag=no), > packets:418, bytes:40964, offload_packets:0, offload_bytes:0, used:0.917s, > actions:clone(tnl_push(tnl_port(2),header(size=50,type=4,eth(dst=58:cf:eb:8e:0d:01,src=da:99:52:a3:c0:4b,dl_type=0x0800),ipv4(src=192.168.10.20,dst=192.168.10.10,proto=17,tos=0,ttl=64,frag=0x4000),udp(src=44687,dst=4789,csum=0x0),vxlan(flags=0x8000000,vni=0x64)),out_port(5)),4,6) > > ### Got dup ICMP reply on HOST-2's veth0, like this > > 64 bytes from 1.1.1.1: icmp_seq=13 ttl=64 time=1.169 ms (DUP!) > 64 bytes from 1.1.1.1: icmp_seq=14 ttl=64 time=0.939 ms > 64 bytes from 1.1.1.1: icmp_seq=14 ttl=64 time=1.037 ms (DUP!) > 64 bytes from 1.1.1.1: icmp_seq=15 ttl=64 time=0.965 ms > ``` > > Additional, after I stop ovs-tcpdump or link down mip0(ip link set mip0 > down), there is no DUP IMCP reply. > > > ** 3. Here I have 2 questions ** > > 1. Why megaflow-2 on HOST-1 has two tnl_pop(2) in actions ? > 2. Why there are DUP ICMP reply ? > > So, Why? Thank you. > > > > > > ---- > Simon Jones > _______________________________________________ dev mailing list d...@openvswitch.org https://mail.openvswitch.org/mailman/listinfo/ovs-dev