This test creates a raw IPv4 socket, fragments a largish UDP
datagram and sends the fragments out of order.

Then repeats in a loop with different message and fragment lengths.

Then does the same with overlapping fragments (with overlapping
fragments the expectation is that the recv times out).

Tested:

root@<host># time ./ip_defrag.sh
ipv4 defrag
PASS
ipv4 defrag with overlaps
PASS

real    1m7.679s
user    0m0.628s
sys     0m2.242s

A similar test for IPv6 is to follow.

Signed-off-by: Peter Oskolkov <p...@google.com>
Reviewed-by: Willem de Bruijn <will...@google.com>
---
 tools/testing/selftests/net/.gitignore   |   2 +
 tools/testing/selftests/net/Makefile     |   4 +-
 tools/testing/selftests/net/ip_defrag.c  | 313 +++++++++++++++++++++++
 tools/testing/selftests/net/ip_defrag.sh |  29 +++
 4 files changed, 346 insertions(+), 2 deletions(-)
 create mode 100644 tools/testing/selftests/net/ip_defrag.c
 create mode 100755 tools/testing/selftests/net/ip_defrag.sh

diff --git a/tools/testing/selftests/net/.gitignore 
b/tools/testing/selftests/net/.gitignore
index 78b24cf76f40..2836e0cf2d81 100644
--- a/tools/testing/selftests/net/.gitignore
+++ b/tools/testing/selftests/net/.gitignore
@@ -14,3 +14,5 @@ udpgso_bench_rx
 udpgso_bench_tx
 tcp_inq
 tls
+ip_defrag
+
diff --git a/tools/testing/selftests/net/Makefile 
b/tools/testing/selftests/net/Makefile
index 9cca68e440a0..cccdb2295567 100644
--- a/tools/testing/selftests/net/Makefile
+++ b/tools/testing/selftests/net/Makefile
@@ -5,13 +5,13 @@ CFLAGS =  -Wall -Wl,--no-as-needed -O2 -g
 CFLAGS += -I../../../../usr/include/
 
 TEST_PROGS := run_netsocktests run_afpackettests test_bpf.sh netdevice.sh 
rtnetlink.sh
-TEST_PROGS += fib_tests.sh fib-onlink-tests.sh pmtu.sh udpgso.sh
+TEST_PROGS += fib_tests.sh fib-onlink-tests.sh pmtu.sh udpgso.sh ip_defrag.sh
 TEST_PROGS += udpgso_bench.sh fib_rule_tests.sh msg_zerocopy.sh psock_snd.sh
 TEST_PROGS_EXTENDED := in_netns.sh
 TEST_GEN_FILES =  socket
 TEST_GEN_FILES += psock_fanout psock_tpacket msg_zerocopy
 TEST_GEN_FILES += tcp_mmap tcp_inq psock_snd
-TEST_GEN_FILES += udpgso udpgso_bench_tx udpgso_bench_rx
+TEST_GEN_FILES += udpgso udpgso_bench_tx udpgso_bench_rx ip_defrag
 TEST_GEN_PROGS = reuseport_bpf reuseport_bpf_cpu reuseport_bpf_numa
 TEST_GEN_PROGS += reuseport_dualstack reuseaddr_conflict tls
 
diff --git a/tools/testing/selftests/net/ip_defrag.c 
b/tools/testing/selftests/net/ip_defrag.c
new file mode 100644
index 000000000000..55fdcdc78eef
--- /dev/null
+++ b/tools/testing/selftests/net/ip_defrag.c
@@ -0,0 +1,313 @@
+// SPDX-License-Identifier: GPL-2.0
+
+#define _GNU_SOURCE
+
+#include <arpa/inet.h>
+#include <errno.h>
+#include <error.h>
+#include <linux/in.h>
+#include <netinet/ip.h>
+#include <netinet/ip6.h>
+#include <netinet/udp.h>
+#include <stdbool.h>
+#include <stdio.h>
+#include <stdlib.h>
+#include <string.h>
+#include <time.h>
+#include <unistd.h>
+
+static bool            cfg_do_ipv4;
+static bool            cfg_do_ipv6;
+static bool            cfg_verbose;
+static bool            cfg_overlap;
+static unsigned short  cfg_port = 9000;
+
+const struct in_addr addr4 = { .s_addr = __constant_htonl(INADDR_LOOPBACK + 2) 
};
+
+#define IP4_HLEN       (sizeof(struct iphdr))
+#define IP6_HLEN       (sizeof(struct ip6_hdr))
+#define UDP_HLEN       (sizeof(struct udphdr))
+
+static int msg_len;
+static int max_frag_len;
+
+#define MSG_LEN_MAX    60000   /* Max UDP payload length. */
+
+#define IP4_MF         (1u << 13)  /* IPv4 MF flag. */
+
+static uint8_t udp_payload[MSG_LEN_MAX];
+static uint8_t ip_frame[IP_MAXPACKET];
+static uint16_t ip_id = 0xabcd;
+static int msg_counter;
+static int frag_counter;
+static unsigned int seed;
+
+/* Receive a UDP packet. Validate it matches udp_payload. */
+static void recv_validate_udp(int fd_udp)
+{
+       ssize_t ret;
+       static uint8_t recv_buff[MSG_LEN_MAX];
+
+       ret = recv(fd_udp, recv_buff, msg_len, 0);
+       msg_counter++;
+
+       if (cfg_overlap) {
+               if (ret != -1)
+                       error(1, 0, "recv: expected timeout; got %d; seed = %u",
+                               (int)ret, seed);
+               if (errno != ETIMEDOUT && errno != EAGAIN)
+                       error(1, errno, "recv: expected timeout: %d; seed = %u",
+                                errno, seed);
+               return;  /* OK */
+       }
+
+       if (ret == -1)
+               error(1, errno, "recv: msg_len = %d max_frag_len = %d",
+                       msg_len, max_frag_len);
+       if (ret != msg_len)
+               error(1, 0, "recv: wrong size: %d vs %d", (int)ret, msg_len);
+       if (memcmp(udp_payload, recv_buff, msg_len))
+               error(1, 0, "recv: wrong data");
+}
+
+static uint32_t raw_checksum(uint8_t *buf, int len, uint32_t sum)
+{
+       int i;
+
+       for (i = 0; i < (len & ~1U); i += 2) {
+               sum += (u_int16_t)ntohs(*((u_int16_t *)(buf + i)));
+               if (sum > 0xffff)
+                       sum -= 0xffff;
+       }
+
+       if (i < len) {
+               sum += buf[i] << 8;
+               if (sum > 0xffff)
+                       sum -= 0xffff;
+       }
+
+       return sum;
+}
+
+static uint16_t udp_checksum(struct ip *iphdr, struct udphdr *udphdr)
+{
+       uint32_t sum = 0;
+
+       sum = raw_checksum((uint8_t *)&iphdr->ip_src, 2 * sizeof(iphdr->ip_src),
+                               IPPROTO_UDP + (uint32_t)(UDP_HLEN + msg_len));
+       sum = raw_checksum((uint8_t *)udp_payload, msg_len, sum);
+       sum = raw_checksum((uint8_t *)udphdr, UDP_HLEN, sum);
+       return htons(0xffff & ~sum);
+}
+
+static void send_fragment(int fd_raw, struct sockaddr *addr, socklen_t alen,
+                               struct ip *iphdr, int offset)
+{
+       int frag_len;
+       int res;
+
+       if (msg_len - offset <= max_frag_len) {
+               /* This is the last fragment. */
+               frag_len = IP4_HLEN + msg_len - offset;
+               iphdr->ip_off = htons((offset + UDP_HLEN) / 8);
+       } else {
+               frag_len = IP4_HLEN + max_frag_len;
+               iphdr->ip_off = htons((offset + UDP_HLEN) / 8 | IP4_MF);
+       }
+       iphdr->ip_len = htons(frag_len);
+       memcpy(ip_frame + IP4_HLEN, udp_payload + offset,
+                frag_len - IP4_HLEN);
+
+       res = sendto(fd_raw, ip_frame, frag_len, 0, addr, alen);
+       if (res < 0)
+               error(1, errno, "send_fragment");
+       if (res != frag_len)
+               error(1, 0, "send_fragment: %d vs %d", res, frag_len);
+
+       frag_counter++;
+}
+
+static void send_udp_frags_v4(int fd_raw, struct sockaddr *addr, socklen_t 
alen)
+{
+       struct ip *iphdr = (struct ip *)ip_frame;
+       struct udphdr udphdr;
+       int res;
+       int offset;
+       int frag_len;
+
+       /* Send the UDP datagram using raw IP fragments: the 0th fragment
+        * has the UDP header; other fragments are pieces of udp_payload
+        * split in chunks of frag_len size.
+        *
+        * Odd fragments (1st, 3rd, 5th, etc.) are sent out first, then
+        * even fragments (0th, 2nd, etc.) are sent out.
+        */
+       memset(iphdr, 0, sizeof(*iphdr));
+       iphdr->ip_hl = 5;
+       iphdr->ip_v = 4;
+       iphdr->ip_tos = 0;
+       iphdr->ip_id = htons(ip_id++);
+       iphdr->ip_ttl = 0x40;
+       iphdr->ip_p = IPPROTO_UDP;
+       iphdr->ip_src.s_addr = htonl(INADDR_LOOPBACK);
+       iphdr->ip_dst = addr4;
+       iphdr->ip_sum = 0;
+
+       /* Odd fragments. */
+       offset = 0;
+       while (offset < msg_len) {
+               send_fragment(fd_raw, addr, alen, iphdr, offset);
+               offset += 2 * max_frag_len;
+       }
+
+       if (cfg_overlap) {
+               /* Send an extra random fragment. */
+               offset = rand() % (UDP_HLEN + msg_len - 1);
+               /* sendto() returns EINVAL if offset + frag_len is too small. */
+               frag_len = IP4_HLEN + UDP_HLEN + rand() % 256;
+               iphdr->ip_off = htons(offset / 8 | IP4_MF);
+               iphdr->ip_len = htons(frag_len);
+               res = sendto(fd_raw, ip_frame, frag_len, 0, addr, alen);
+               if (res < 0)
+                       error(1, errno, "sendto overlap");
+               if (res != frag_len)
+                       error(1, 0, "sendto overlap: %d vs %d", (int)res, 
frag_len);
+               frag_counter++;
+       }
+
+       /* Zeroth fragment (UDP header). */
+       frag_len = IP4_HLEN + UDP_HLEN;
+       iphdr->ip_len = htons(frag_len);
+       iphdr->ip_off = htons(IP4_MF);
+
+       udphdr.source = htons(cfg_port + 1);
+       udphdr.dest = htons(cfg_port);
+       udphdr.len = htons(UDP_HLEN + msg_len);
+       udphdr.check = 0;
+       udphdr.check = udp_checksum(iphdr, &udphdr);
+
+       memcpy(ip_frame + IP4_HLEN, &udphdr, UDP_HLEN);
+       res = sendto(fd_raw, ip_frame, frag_len, 0, addr, alen);
+       if (res < 0)
+               error(1, errno, "sendto UDP header");
+       if (res != frag_len)
+               error(1, 0, "sendto UDP header: %d vs %d", (int)res, frag_len);
+       frag_counter++;
+
+       /* Even fragments. */
+       offset = max_frag_len;
+       while (offset < msg_len) {
+               send_fragment(fd_raw, addr, alen, iphdr, offset);
+               offset += 2 * max_frag_len;
+       }
+}
+
+static void run_test(struct sockaddr *addr, socklen_t alen)
+{
+       int fd_tx_udp, fd_tx_raw, fd_rx_udp;
+       struct timeval tv = { .tv_sec = 0, .tv_usec = 10 * 1000 };
+       int idx;
+
+       /* Initialize the payload. */
+       for (idx = 0; idx < MSG_LEN_MAX; ++idx)
+               udp_payload[idx] = idx % 256;
+
+       /* Open sockets. */
+       fd_tx_udp = socket(addr->sa_family, SOCK_DGRAM, 0);
+       if (fd_tx_udp == -1)
+               error(1, errno, "socket tx_udp");
+
+       fd_tx_raw = socket(addr->sa_family, SOCK_RAW, IPPROTO_RAW);
+       if (fd_tx_raw == -1)
+               error(1, errno, "socket tx_raw");
+
+       fd_rx_udp = socket(addr->sa_family, SOCK_DGRAM, 0);
+       if (fd_rx_udp == -1)
+               error(1, errno, "socket rx_udp");
+       if (bind(fd_rx_udp, addr, alen))
+               error(1, errno, "bind");
+       /* Fail fast. */
+       if (setsockopt(fd_rx_udp, SOL_SOCKET, SO_RCVTIMEO, &tv, sizeof(tv)))
+               error(1, errno, "setsockopt rcv timeout");
+
+       for (msg_len = 1; msg_len < MSG_LEN_MAX; msg_len += (rand() % 4096)) {
+               if (cfg_verbose)
+                       printf("msg_len: %d\n", msg_len);
+               max_frag_len = addr->sa_family == AF_INET ? 8 : 1280;
+               for (; max_frag_len < 1500 && max_frag_len <= msg_len;
+                               max_frag_len += 8) {
+                       send_udp_frags_v4(fd_tx_raw, addr, alen);
+                       recv_validate_udp(fd_rx_udp);
+               }
+       }
+
+       /* Cleanup. */
+       if (close(fd_tx_raw))
+               error(1, errno, "close tx_raw");
+       if (close(fd_tx_udp))
+               error(1, errno, "close tx_udp");
+       if (close(fd_rx_udp))
+               error(1, errno, "close rx_udp");
+
+       if (cfg_verbose)
+               printf("processed %d messages, %d fragments\n",
+                       msg_counter, frag_counter);
+
+       fprintf(stderr, "PASS\n");
+}
+
+
+static void run_test_v4(void)
+{
+       struct sockaddr_in addr = {0};
+
+       addr.sin_family = AF_INET;
+       addr.sin_port = htons(cfg_port);
+       addr.sin_addr = addr4;
+
+       run_test((void *)&addr, sizeof(addr));
+}
+
+static void run_test_v6(void)
+{
+       fprintf(stderr, "NOT IMPL.\n");
+       exit(1);
+}
+
+static void parse_opts(int argc, char **argv)
+{
+       int c;
+
+       while ((c = getopt(argc, argv, "46ov")) != -1) {
+               switch (c) {
+               case '4':
+                       cfg_do_ipv4 = true;
+                       break;
+               case '6':
+                       cfg_do_ipv6 = true;
+                       break;
+               case 'o':
+                       cfg_overlap = true;
+                       break;
+               case 'v':
+                       cfg_verbose = true;
+                       break;
+               default:
+                       error(1, 0, "%s: parse error", argv[0]);
+               }
+       }
+}
+
+int main(int argc, char **argv)
+{
+       parse_opts(argc, argv);
+       seed = time(NULL);
+       srand(seed);
+
+       if (cfg_do_ipv4)
+               run_test_v4();
+       if (cfg_do_ipv6)
+               run_test_v6();
+
+       return 0;
+}
diff --git a/tools/testing/selftests/net/ip_defrag.sh 
b/tools/testing/selftests/net/ip_defrag.sh
new file mode 100755
index 000000000000..53a1ed46790d
--- /dev/null
+++ b/tools/testing/selftests/net/ip_defrag.sh
@@ -0,0 +1,29 @@
+#!/bin/sh
+# SPDX-License-Identifier: GPL-2.0
+#
+# Run a couple of IP defragmentation tests.
+
+set +x
+set -e
+
+echo "ipv4 defrag"
+
+run_v4() {
+sysctl -w net.ipv4.ipfrag_high_thresh=9000000 &> /dev/null
+sysctl -w net.ipv4.ipfrag_low_thresh=7000000 &> /dev/null
+./ip_defrag -4
+}
+export -f run_v4
+
+./in_netns.sh "run_v4"
+
+echo "ipv4 defrag with overlaps"
+run_v4o() {
+sysctl -w net.ipv4.ipfrag_high_thresh=9000000 &> /dev/null
+sysctl -w net.ipv4.ipfrag_low_thresh=7000000 &> /dev/null
+./ip_defrag -4o
+}
+export -f run_v4o
+
+./in_netns.sh "run_v4o"
+
-- 
2.19.0.rc0.228.g281dcd1b4d0-goog

Reply via email to