Re: [PATCH v4 1/1] xdp: Sample xdp program implementing ip forward

2017-11-14 Thread Christina Jacob
On Thu, Nov 9, 2017 at 7:08 AM, Jesper Dangaard Brouer
<bro...@redhat.com> wrote:
> On Wed, 08 Nov 2017 10:40:24 +0900 (KST)
> David Miller <da...@davemloft.net> wrote:
>
>> From: Christina Jacob <christina.jacob.koik...@gmail.com>
>> Date: Sun,  5 Nov 2017 08:52:30 +0530
>>
>> > From: Christina Jacob <christina.ja...@cavium.com>
>> >
>> > Implements port to port forwarding with route table and arp table
>> > lookup for ipv4 packets using bpf_redirect helper function and
>> > lpm_trie  map.
>> >
>> > Signed-off-by: Christina Jacob <christina.ja...@cavium.com>
>>
>> Applied to net-next, thank you.
>
> I've not had time to proper test (and review) this V4 patch, but I
> guess I'll have to do so when I get home from Seoul...
>
> I especially want to measure the effect of using bpf_redirect_map().
> To Christina: what performance improvement did you see on your
> board/arch when switching from bpf_redirect() to bpf_redirect_map()?

ndo_xdp_flush is yet to be implemented in our driver.
So I don't see any difference moving from bpf_redirect to bpf_redirect_map.

>
> --
> Best regards,
>   Jesper Dangaard Brouer
>   MSc.CS, Principal Kernel Engineer at Red Hat
>   LinkedIn: http://www.linkedin.com/in/brouer


[PATCH v4 1/1] xdp: Sample xdp program implementing ip forward

2017-11-04 Thread Christina Jacob
From: Christina Jacob <christina.ja...@cavium.com>

Implements port to port forwarding with route table and arp table
lookup for ipv4 packets using bpf_redirect helper function and
lpm_trie  map.

Signed-off-by: Christina Jacob <christina.ja...@cavium.com>
---
 samples/bpf/Makefile   |   4 +
 samples/bpf/xdp_router_ipv4_kern.c | 186 +++
 samples/bpf/xdp_router_ipv4_user.c | 659 +
 3 files changed, 849 insertions(+)

diff --git a/samples/bpf/Makefile b/samples/bpf/Makefile
index cf17c79..8504ebb 100644
--- a/samples/bpf/Makefile
+++ b/samples/bpf/Makefile
@@ -28,6 +28,7 @@ hostprogs-y += test_cgrp2_sock
 hostprogs-y += test_cgrp2_sock2
 hostprogs-y += xdp1
 hostprogs-y += xdp2
+hostprogs-y += xdp_router_ipv4
 hostprogs-y += test_current_task_under_cgroup
 hostprogs-y += trace_event
 hostprogs-y += sampleip
@@ -73,6 +74,7 @@ test_cgrp2_sock2-objs := bpf_load.o $(LIBBPF) 
test_cgrp2_sock2.o
 xdp1-objs := bpf_load.o $(LIBBPF) xdp1_user.o
 # reuse xdp1 source intentionally
 xdp2-objs := bpf_load.o $(LIBBPF) xdp1_user.o
+xdp_router_ipv4-objs := bpf_load.o $(LIBBPF) xdp_router_ipv4_user.o
 test_current_task_under_cgroup-objs := bpf_load.o $(LIBBPF) cgroup_helpers.o \
   test_current_task_under_cgroup_user.o
 trace_event-objs := bpf_load.o $(LIBBPF) trace_event_user.o
@@ -114,6 +116,7 @@ always += parse_varlen.o parse_simple.o parse_ldabs.o
 always += test_cgrp2_tc_kern.o
 always += xdp1_kern.o
 always += xdp2_kern.o
+always += xdp_router_ipv4_kern.o
 always += test_current_task_under_cgroup_kern.o
 always += trace_event_kern.o
 always += sampleip_kern.o
@@ -160,6 +163,7 @@ HOSTLOADLIBES_map_perf_test += -lelf -lrt
 HOSTLOADLIBES_test_overhead += -lelf -lrt
 HOSTLOADLIBES_xdp1 += -lelf
 HOSTLOADLIBES_xdp2 += -lelf
+HOSTLOADLIBES_xdp_router_ipv4 += -lelf
 HOSTLOADLIBES_test_current_task_under_cgroup += -lelf
 HOSTLOADLIBES_trace_event += -lelf
 HOSTLOADLIBES_sampleip += -lelf
diff --git a/samples/bpf/xdp_router_ipv4_kern.c 
b/samples/bpf/xdp_router_ipv4_kern.c
new file mode 100644
index 000..993f56b
--- /dev/null
+++ b/samples/bpf/xdp_router_ipv4_kern.c
@@ -0,0 +1,186 @@
+/* Copyright (C) 2017 Cavium, Inc.
+ *
+ * This program is free software; you can redistribute it and/or modify it
+ * under the terms of version 2 of the GNU General Public License
+ * as published by the Free Software Foundation.
+ */
+#define KBUILD_MODNAME "foo"
+#include 
+#include 
+#include 
+#include 
+#include 
+#include 
+#include 
+#include "bpf_helpers.h"
+#include 
+#include 
+
+struct trie_value {
+   __u8 prefix[4];
+   __be64 value;
+   int ifindex;
+   int metric;
+   __be32 gw;
+};
+
+/* Key for lpm_trie*/
+union key_4 {
+   u32 b32[2];
+   u8 b8[8];
+};
+
+struct arp_entry {
+   __be64 mac;
+   __be32 dst;
+};
+
+struct direct_map {
+   struct arp_entry arp;
+   int ifindex;
+   __be64 mac;
+};
+
+/* Map for trie implementation*/
+struct bpf_map_def SEC("maps") lpm_map = {
+   .type = BPF_MAP_TYPE_LPM_TRIE,
+   .key_size = 8,
+   .value_size = sizeof(struct trie_value),
+   .max_entries = 50,
+   .map_flags = BPF_F_NO_PREALLOC,
+};
+
+/* Map for counter*/
+struct bpf_map_def SEC("maps") rxcnt = {
+   .type = BPF_MAP_TYPE_PERCPU_ARRAY,
+   .key_size = sizeof(u32),
+   .value_size = sizeof(u64),
+   .max_entries = 256,
+};
+
+/* Map for ARP table*/
+struct bpf_map_def SEC("maps") arp_table = {
+   .type = BPF_MAP_TYPE_HASH,
+   .key_size = sizeof(__be32),
+   .value_size = sizeof(__be64),
+   .max_entries = 50,
+};
+
+/* Map to keep the exact match entries in the route table*/
+struct bpf_map_def SEC("maps") exact_match = {
+   .type = BPF_MAP_TYPE_HASH,
+   .key_size = sizeof(__be32),
+   .value_size = sizeof(struct direct_map),
+   .max_entries = 50,
+};
+
+struct bpf_map_def SEC("maps") tx_port = {
+   .type = BPF_MAP_TYPE_DEVMAP,
+   .key_size = sizeof(int),
+   .value_size = sizeof(int),
+   .max_entries = 100,
+};
+
+/* Function to set source and destination mac of the packet */
+static inline void set_src_dst_mac(void *data, void *src, void *dst)
+{
+   unsigned short *source = src;
+   unsigned short *dest  = dst;
+   unsigned short *p = data;
+
+   __builtin_memcpy(p, dest, 6);
+   __builtin_memcpy(p + 3, source, 6);
+}
+
+/* Parse IPV4 packet to get SRC, DST IP and protocol */
+static inline int parse_ipv4(void *data, u64 nh_off, void *data_end,
+__be32 *src, __be32 *dest)
+{
+   struct iphdr *iph = data + nh_off;
+
+   if (iph + 1 > data_end)
+   return 0;
+   *src = iph->saddr;
+   *dest = iph->daddr;
+   return iph->protocol;
+}
+
+SEC("xdp_router_ipv4")
+int xdp_router_ipv4_prog(struct xdp_md *ctx)
+

[PATCH v4 0/1] XDP program for ip forward

2017-11-04 Thread Christina Jacob
From: Christina Jacob <christina.ja...@cavium.com>

The patch below implements port to port forwarding through route table and arp
table lookup for ipv4 packets using bpf_redirect helper function and lpm_trie
map.  This has an improved performance over the normal kernel stack ip forward.

Implementation details.
---
The program uses one map each for arp table, route table and packet count.
The number of entries the program can process is limited by the size of the
map used.

In the xdp_router_ipv4_user.c,

initially, the routing table is read and is stored in an lpm trie map.
The arp table is read and stored in an array map There are two netlink sockets
that listens to any change in the route table  and arp table.
There are two types of changes to the route table.
1.New

The new entries are added to the lpm_trie with proper key and prefix
length If there is a another entry in the route table with a different
metric(only metric is considered). Then the values are compared and the
one with lowest metric is added to the node.

2.Deletion

On deletion from the route table, The particular node is removed and the
entire route table is again read to check if there is another entry with
a different metric.

This implementation depends on  bpf: Implement map_delete_elem for
BPF_MAP_TYPE_LPM_TRIE which is not yet upstreamed.

There are two types of changes to the route table

1.New

The new arp entries are added in the in the array map directly with the
ip address as the key and the destination mac address as the value.

2.Delete

The entry corresponding to the particular ip is deleted from the
arp table map.

Another map is maintained for entries in the route table having 32 bit mask.
such entries can have a corresponding  arp entry which if  stored together with
the route entry in an array map and can be accessed in O(1) time. Eliminating
the trie lookup and arp lookup.

In the xdp_router_ipv4_kern.c,

The array map for the 32 bit mask entries checked to see if there is a key that
exactly matches with the destination ip. If it has a non zero destination mac
entry then the xdp data is updated accordingly Otherwise a proper route and
arp table lookup is done using the lpm_trie and the arp table array map.

Usage: as ./xdp_router_ipv4 -S  (-S for
generic xdp implementation ifindex- the index of the interface to which
the xdp program has to be attached.) in 4.14-rc3 kernel.

Changes from v1 to v2
-

* As suggested by Jesper Dangaard Brouer
1. Changed the program name to  list xdp_router_ipv4
2. Changed the commandline arguments from ifindex list to interface name
Usage : ./xdp_router_ipv4 [-S] 
-S for generic xdp implementation
-interface name list is the list of interfaces to which
the xdp program should attach to

* As suggested by Daniel Borkmann
1. Using __builin_memcpy to update source and destination mac in the bpf
  kernel program.

2. Started using __be32 in the kernel program to be inline with the data
   type used in user program

3. Rectified few style issues.

* Corrected the copyright issue pointed out by David Ahern

* Fixed the bug: The already attached interfaces are not detached from the
  xdp program if the program fails to attach to an interface later in the list.


Changes from v2 to v3
-
* As pointed out by Jesper Dangaard Brouer
   1. Changed the program name in the cover letter.
   2. Changed variable declararions to follow Reverse-xmas tree
  rule.
   3. Reduced the nesting in code for readability.
   4. Fixed bug: incorrect mac address being set for source and
  destination mac.
   5. Fixed comment style.

* As suggested by Stephen Hemminger 
Changed all the bzeros' to memset.

* As suggested by David Laight
removed the signed remainders calculation.

* As suggested by Stephen Hemminger and David Daney 
1. Added checks for the ioctl return value.
2. Changed data types to be64 to be sure about the size of the
   data type.
3. Verified byte order. Using the mac address from ioctl in
   network byte order. not casting to to long data type
   anymore.
4. Fixed returning address of local variable.

Changes from v3 to v4
-
* As suggested by Jesper,
1. Removed redundant typecastings.
2. Modified program to use bpf_redirect_map for better
   performance.
3. Changed program name in the code as well.


Christina Jacob (1):
  xdp: Sample xdp program implement

[PATCH v3 0/1] XDP program for ip forward

2017-11-01 Thread Christina Jacob
The patch below implements port to port forwarding through route table and arp
table lookup for ipv4 packets using bpf_redirect helper function and lpm_trie
map.  This has an improved performance over the normal kernel stack ip forward.

Implementation details.
---
The program uses one map each for arp table, route table and packet count.
The number of entries the program can process is limited by the size of the
map used.

In the xdp_router_ipv4_user.c,

initially, the routing table is read and is stored in an lpm trie map.
The arp table is read and stored in an array map There are two netlink sockets
that listens to any change in the route table  and arp table.
There are two types of changes to the route table.
1.New

The new entries are added to the lpm_trie with proper key and prefix
length If there is a another entry in the route table with a different
metric(only metric is considered). Then the values are compared and the
one with lowest metric is added to the node.

2.Deletion

On deletion from the route table, The particular node is removed and the
entire route table is again read to check if there is another entry with
a different metric.

This implementation depends on  bpf: Implement map_delete_elem for
BPF_MAP_TYPE_LPM_TRIE which is not yet upstreamed.

There are two types of changes to the route table

1.New

The new arp entries are added in the in the array map directly with the
ip address as the key and the destination mac address as the value.

2.Delete

The entry corresponding to the particular ip is deleted from the
arp table map.

Another map is maintained for entries in the route table having 32 bit mask.
such entries can have a corresponding  arp entry which if  stored together with
the route entry in an array map and can be accessed in O(1) time. Eliminating
the trie lookup and arp lookup.

In the xdp_router_ipv4_kern.c,

The array map for the 32 bit mask entries checked to see if there is a key that
exactly matches with the destination ip. If it has a non zero destination mac
entry then the xdp data is updated accordingly Otherwise a proper route and
arp table lookup is done using the lpm_trie and the arp table array map.

Usage: as ./xdp_router_ipv4 -S  (-S for
generic xdp implementation ifindex- the index of the interface to which
the xdp program has to be attached.) in 4.14-rc3 kernel.

Changes from v1 to v2
-

* As suggested by Jesper Dangaard Brouer
1. Changed the program name to  list xdp_router_ipv4
2. Changed the commandline arguments from ifindex list to interface name
Usage : ./xdp_router_ipv4 [-S] 
-S for generic xdp implementation
-interface name list is the list of interfaces to which
the xdp program should attach to

* As suggested by Daniel Borkmann
1. Using __builin_memcpy to update source and destination mac in the bpf
  kernel program.

2. Started using __be32 in the kernel program to be inline with the data
   type used in user program

3. Rectified few style issues.

* Corrected the copyright issue pointed out by David Ahern

* Fixed the bug: The already attached interfaces are not detached from the
  xdp program if the program fails to attach to an interface later in the list.


Changes from v2 to v3
-
* As pointed out by Jesper Dangaard Brouer
   1. Changed the program name in the cover letter.
   2. Changed variable declararions to follow Reverse-xmas tree
  rule.
   3. Reduced the nesting in code for readability.
   4. Fixed bug: incorrect mac address being set for source and
  destination mac.
   5. Fixed comment style.

* As suggested by Stephen Hemminger 
Changed all the bzeros' to memset.

* As suggested by David Laight
removed the signed remainders calculation.

* As suggested by Stephen Hemminger and David Daney 
1. Added checks for the ioctl return value.
2. Changed data types to be64 to be sure about the size of the
   data type.
3. Verified byte order. Using the mac address from ioctl in
   network byte order. not casting to to long data type
   anymore.
4. Fixed returning address of local variable.


Christina Jacob (1):
  xdp: Sample xdp program implementing ip forward

 samples/bpf/Makefile   |   4 +
 samples/bpf/xdp_router_ipv4_kern.c | 181 ++
 samples/bpf/xdp_router_ipv4_user.c | 657 +
 3 files changed, 842 insertions(+)
 create mode 100644 samples/bpf/xdp_router_ipv4_kern.c
 create mode 100644 samples/bpf/xdp_router_ipv4_user.c

-- 
2.7.4



[PATCH v3 1/1] xdp: Sample xdp program implementing ip forward

2017-11-01 Thread Christina Jacob
From: Christina Jacob <christina.ja...@cavium.com>

Implements port to port forwarding with route table and arp table
lookup for ipv4 packets using bpf_redirect helper function and
lpm_trie  map.
Signed-off-by: Christina Jacob <christina.ja...@cavium.com>
---
 samples/bpf/Makefile   |   4 +
 samples/bpf/xdp_router_ipv4_kern.c | 181 ++
 samples/bpf/xdp_router_ipv4_user.c | 657 +
 3 files changed, 842 insertions(+)

diff --git a/samples/bpf/Makefile b/samples/bpf/Makefile
index cf17c79..8504ebb 100644
--- a/samples/bpf/Makefile
+++ b/samples/bpf/Makefile
@@ -28,6 +28,7 @@ hostprogs-y += test_cgrp2_sock
 hostprogs-y += test_cgrp2_sock2
 hostprogs-y += xdp1
 hostprogs-y += xdp2
+hostprogs-y += xdp_router_ipv4
 hostprogs-y += test_current_task_under_cgroup
 hostprogs-y += trace_event
 hostprogs-y += sampleip
@@ -73,6 +74,7 @@ test_cgrp2_sock2-objs := bpf_load.o $(LIBBPF) 
test_cgrp2_sock2.o
 xdp1-objs := bpf_load.o $(LIBBPF) xdp1_user.o
 # reuse xdp1 source intentionally
 xdp2-objs := bpf_load.o $(LIBBPF) xdp1_user.o
+xdp_router_ipv4-objs := bpf_load.o $(LIBBPF) xdp_router_ipv4_user.o
 test_current_task_under_cgroup-objs := bpf_load.o $(LIBBPF) cgroup_helpers.o \
   test_current_task_under_cgroup_user.o
 trace_event-objs := bpf_load.o $(LIBBPF) trace_event_user.o
@@ -114,6 +116,7 @@ always += parse_varlen.o parse_simple.o parse_ldabs.o
 always += test_cgrp2_tc_kern.o
 always += xdp1_kern.o
 always += xdp2_kern.o
+always += xdp_router_ipv4_kern.o
 always += test_current_task_under_cgroup_kern.o
 always += trace_event_kern.o
 always += sampleip_kern.o
@@ -160,6 +163,7 @@ HOSTLOADLIBES_map_perf_test += -lelf -lrt
 HOSTLOADLIBES_test_overhead += -lelf -lrt
 HOSTLOADLIBES_xdp1 += -lelf
 HOSTLOADLIBES_xdp2 += -lelf
+HOSTLOADLIBES_xdp_router_ipv4 += -lelf
 HOSTLOADLIBES_test_current_task_under_cgroup += -lelf
 HOSTLOADLIBES_trace_event += -lelf
 HOSTLOADLIBES_sampleip += -lelf
diff --git a/samples/bpf/xdp_router_ipv4_kern.c 
b/samples/bpf/xdp_router_ipv4_kern.c
new file mode 100644
index 000..70a5907
--- /dev/null
+++ b/samples/bpf/xdp_router_ipv4_kern.c
@@ -0,0 +1,181 @@
+/* Copyright (C) 2017 Cavium, Inc.
+ *
+ * This program is free software; you can redistribute it and/or modify it
+ * under the terms of version 2 of the GNU General Public License
+ * as published by the Free Software Foundation.
+ */
+#define KBUILD_MODNAME "foo"
+#include 
+#include 
+#include 
+#include 
+#include 
+#include 
+#include 
+#include "bpf_helpers.h"
+#include 
+#include 
+
+struct trie_value {
+   __u8 prefix[4];
+   __be32 ifindex;
+   __be32 metric;
+   __be64 value;
+   __be32 gw;
+};
+
+/* Key for lpm_trie*/
+union key_4 {
+   u32 b32[2];
+   u8 b8[8];
+};
+
+struct arp_entry {
+   __be64 mac;
+   __be32 dst;
+};
+
+struct direct_map {
+   struct arp_entry arp;
+   int ifindex;
+   __be64 mac;
+};
+
+/* Map for trie implementation*/
+struct bpf_map_def SEC("maps") lpm_map = {
+   .type = BPF_MAP_TYPE_LPM_TRIE,
+   .key_size = 8,
+   .value_size = sizeof(struct trie_value),
+   .max_entries = 50,
+   .map_flags = BPF_F_NO_PREALLOC,
+};
+
+/* Map for counter*/
+struct bpf_map_def SEC("maps") rxcnt = {
+   .type = BPF_MAP_TYPE_PERCPU_ARRAY,
+   .key_size = sizeof(u32),
+   .value_size = sizeof(u64),
+   .max_entries = 256,
+};
+
+/* Map for ARP table*/
+struct bpf_map_def SEC("maps") arp_table = {
+   .type = BPF_MAP_TYPE_HASH,
+   .key_size = sizeof(__be32),
+   .value_size = sizeof(__be64),
+   .max_entries = 50,
+};
+
+/* Map to keep the exact match entries in the route table*/
+struct bpf_map_def SEC("maps") exact_match = {
+   .type = BPF_MAP_TYPE_HASH,
+   .key_size = sizeof(__be32),
+   .value_size = sizeof(struct direct_map),
+   .max_entries = 50,
+};
+
+/* Function to set source and destination mac of the packet */
+static inline void set_src_dst_mac(void *data, void *src, void *dst)
+{
+   unsigned short *source = src;
+   unsigned short *dest  = dst;
+   unsigned short *p = data;
+
+   __builtin_memcpy(p, dest, 6);
+   __builtin_memcpy(p + 3, source, 6);
+}
+
+/* Parse IPV4 packet to get SRC, DST IP and protocol */
+static inline int parse_ipv4(void *data, u64 nh_off, void *data_end,
+__be32 *src, __be32 *dest)
+{
+   struct iphdr *iph = data + nh_off;
+
+   if (iph + 1 > data_end)
+   return 0;
+   *src = iph->saddr;
+   *dest = iph->daddr;
+   return iph->protocol;
+}
+
+SEC("xdp3")
+int xdp_prog3(struct xdp_md *ctx)
+{
+   void *data_end = (void *)(long)ctx->data_end;
+   __be64 *dest_mac = NULL, *src_mac = NULL;
+   void *data = (void *)(long)ctx->data;
+   struct trie_value *prefix_value;
+   int rc = XD

Re: [PATCH v2] xdp: Sample xdp program implementing ip forward

2017-10-28 Thread Christina Jacob
On Wed, Oct 11, 2017 at 3:07 AM, David Daney <dda...@caviumnetworks.com> wrote:
> On 10/10/2017 10:19 AM, Stephen Hemminger wrote:
>>
>> On Tue, 10 Oct 2017 12:58:52 +0530
>> Christina Jacob <christina.jacob.koik...@gmail.com> wrote:
>>
>>> +/* Get the mac address of the interface given interface name */
>>> +static long *getmac(char *iface)
>>> +{
>>> +   int fd;
>>> +   struct ifreq ifr;
>>> +   long *mac = NULL;
>>> +
>>> +   fd = socket(AF_INET, SOCK_DGRAM, 0);
>>> +   ifr.ifr_addr.sa_family = AF_INET;
>>> +   strncpy(ifr.ifr_name, iface, IFNAMSIZ - 1);
>>> +   ioctl(fd, SIOCGIFHWADDR, );
>>> +   mac = (long *)ifr.ifr_hwaddr.sa_data;
>>> +   close(fd);
>>> +   return mac;
>>
>>
>> Always check return value of ioctl.
>> You are assuming sizeof(long) > 6 bytes.
>> Also the byte order.
>
>
>
> Also:
>
> Returning the address of a local variable (ifr.ifr_hwaddr.sa_data), and then
> dereferencing it outside of the function is not correct.
>
> The casting of the char sa_data[] to a long * may cause alignment faults on
> some architectures.  The may also be endinaness issues depending on how the
> data are manipulated if you pack all those chars into a long.
>
> If we think that a MAC address is char[6], then it may be best to define the
> data structures as such and manipulate it as an array instead of trying to
> pack it into a long.

How do I feed the MAC address to xdp.data ? Is it ok to do a manual
leftshift + biwise and  for the purpose?

>
> Keep working on this though, this program will surely be useful.
>
> David Daney


Re: [PATCH v2] XDP Program for Ip forward

2017-10-11 Thread Christina Jacob
On Tue, Oct 10, 2017 at 7:30 PM, Jesper Dangaard Brouer
 wrote:
>
> On Tue, 10 Oct 2017 15:12:31 +0200
> Jesper Dangaard Brouer  wrote:
>
> > I'll try to test/benchmark your program...
>
> In my initial testing, I cannot get this to work...
>

What is the test setup you are using? So that I can also test
I verified the program in this minimal test setup, I did not see any
issue with the mac addresses being set.

Below is the test setup.

machine 1 machine 2
(90.0.0.2)port 1 ===>port3(90.0.0.1)
   ||
(80.0.0.2)port 2 You do seem to XDP_REDIRECT out the right interface, but you have an
> error with setting the correct MAC address.
>
> --
> Best regards,
>   Jesper Dangaard Brouer
>   MSc.CS, Principal Kernel Engineer at Red Hat
>   LinkedIn: http://www.linkedin.com/in/brouer


[PATCH v2] XDP Program for Ip forward

2017-10-10 Thread Christina Jacob
The patch below implements port to port forwarding through route table and arp
table lookup for ipv4 packets using bpf_redirect helper function and lpm_trie
map.  This has an improved performance over the normal kernel stack ip forward.

Implementation details.
---
The program uses one map each for arp table, route table and packet count.
The number of entries the program can process is limited by the size of the
map used.

In the xdp3_user.c,

initially, the routing table is read and is stored in an lpm trie map.
The arp table is read and stored in an array map There are two netlink sockets
that listens to any change in the route table  and arp table.
There are two types of changes to the route table.
1.New

The new entries are added to the lpm_trie with proper key and prefix
length If there is a another entry in the route table with a different
metric(only metric is considered). Then the values are compared and the
one with lowest metric is added to the node.

2.Deletion 

On deletion from the route table, The particular node is removed and the
entire route table is again read to check if there is another entry with
a different metric.  

This implementation depends on  bpf: Implement map_delete_elem for
BPF_MAP_TYPE_LPM_TRIE which is not yet upstreamed.

There are two types of changes to the route table

1.New

The new arp entries are added in the in the array map directly with the
ip address as the key and the destination mac address as the value.

2.Delete 

The entry corresponding to the particular ip is deleted from the 
arp table map.

Another map is maintained for entries in the route table having 32 bit mask.
such entries can have a corresponding  arp entry which if  stored together with
the route entry in an array map and can be accessed in O(1) time. Eliminating
the trie lookup and arp lookup.

In the xdp3_kern.c,

The array map for the 32 bit mask entries checked to see if there is a key that
exactly matches with the destination ip. If it has a non zero destination mac
entry then the xdp data is updated accordingly Otherwise a proper route and 
arp table lookup is done using the lpm_trie and the arp table array map.

Usage: as ./xdp3 -S  (-S for
generic xdp implementation ifindex- the index of the interface to which
the xdp program has to be attached.) in 4.14-rc3 kernel.

Changes from v1 to v2
-
 
* As suggested by Jesper Dangaard Brouer
1. Changed the program name to  list xdp_router_ipv4
2. Changed the commandline arguments from ifindex list to interface name
Usage : ./xdp_router_ipv4 [-S] 
-S for generic xdp implementation
-interface name list is the list of interfaces to which
the xdp program should attach to

* As suggested by Daniel Borkmann
1. Using __builin_memcpy to update source and destination mac in the bpf
  kernel program. 

2. Started using __be32 in the kernel program to be inline with the data
   type used in user program

3. Rectified few style issues.

* Corrected the copyright issue pointed out by David Ahern 

* Fixed the bug: The already attached interfaces are not detached from the 
  xdp program if the program fails to attach to an interface later in the list.


Christina Jacob (1):
  xdp: Sample xdp program implementing ip forward

 samples/bpf/Makefile   |4 +
 samples/bpf/xdp_router_ipv4_kern.c |  189 +++
 samples/bpf/xdp_router_ipv4_user.c |  655 
 3 files changed, 848 insertions(+), 0 deletions(-)
 create mode 100644 samples/bpf/xdp_router_ipv4_kern.c
 create mode 100644 samples/bpf/xdp_router_ipv4_user.c


[PATCH v2] xdp: Sample xdp program implementing ip forward

2017-10-10 Thread Christina Jacob
Implements port to port forwarding with route table and arp table
lookup for ipv4 packets using bpf_redirect helper function and
lpm_trie  map.

Signed-off-by: Christina Jacob <christina.ja...@cavium.com>
---
 samples/bpf/Makefile   |4 +
 samples/bpf/xdp_router_ipv4_kern.c |  189 +++
 samples/bpf/xdp_router_ipv4_user.c |  655 
 3 files changed, 848 insertions(+), 0 deletions(-)
 create mode 100644 samples/bpf/xdp_router_ipv4_kern.c
 create mode 100644 samples/bpf/xdp_router_ipv4_user.c

diff --git a/samples/bpf/Makefile b/samples/bpf/Makefile
index cf17c79..8504ebb 100644
--- a/samples/bpf/Makefile
+++ b/samples/bpf/Makefile
@@ -28,6 +28,7 @@ hostprogs-y += test_cgrp2_sock
 hostprogs-y += test_cgrp2_sock2
 hostprogs-y += xdp1
 hostprogs-y += xdp2
+hostprogs-y += xdp_router_ipv4
 hostprogs-y += test_current_task_under_cgroup
 hostprogs-y += trace_event
 hostprogs-y += sampleip
@@ -73,6 +74,7 @@ test_cgrp2_sock2-objs := bpf_load.o $(LIBBPF) 
test_cgrp2_sock2.o
 xdp1-objs := bpf_load.o $(LIBBPF) xdp1_user.o
 # reuse xdp1 source intentionally
 xdp2-objs := bpf_load.o $(LIBBPF) xdp1_user.o
+xdp_router_ipv4-objs := bpf_load.o $(LIBBPF) xdp_router_ipv4_user.o
 test_current_task_under_cgroup-objs := bpf_load.o $(LIBBPF) cgroup_helpers.o \
   test_current_task_under_cgroup_user.o
 trace_event-objs := bpf_load.o $(LIBBPF) trace_event_user.o
@@ -114,6 +116,7 @@ always += parse_varlen.o parse_simple.o parse_ldabs.o
 always += test_cgrp2_tc_kern.o
 always += xdp1_kern.o
 always += xdp2_kern.o
+always += xdp_router_ipv4_kern.o
 always += test_current_task_under_cgroup_kern.o
 always += trace_event_kern.o
 always += sampleip_kern.o
@@ -160,6 +163,7 @@ HOSTLOADLIBES_map_perf_test += -lelf -lrt
 HOSTLOADLIBES_test_overhead += -lelf -lrt
 HOSTLOADLIBES_xdp1 += -lelf
 HOSTLOADLIBES_xdp2 += -lelf
+HOSTLOADLIBES_xdp_router_ipv4 += -lelf
 HOSTLOADLIBES_test_current_task_under_cgroup += -lelf
 HOSTLOADLIBES_trace_event += -lelf
 HOSTLOADLIBES_sampleip += -lelf
diff --git a/samples/bpf/xdp_router_ipv4_kern.c 
b/samples/bpf/xdp_router_ipv4_kern.c
new file mode 100644
index 000..c2bfe40
--- /dev/null
+++ b/samples/bpf/xdp_router_ipv4_kern.c
@@ -0,0 +1,189 @@
+/*
+ * Copyright (C) 2017 Cavium, Inc.
+ *
+ * This program is free software; you can redistribute it and/or modify it
+ * under the terms of version 2 of the GNU General Public License
+ * as published by the Free Software Foundation.
+ */
+#define KBUILD_MODNAME "foo"
+#include 
+#include 
+#include 
+#include 
+#include 
+#include 
+#include 
+#include "bpf_helpers.h"
+#include 
+#include 
+
+struct trie_value {
+   __u8 prefix[4];
+   long value;
+   int gw;
+   int ifindex;
+   int metric;
+};
+
+/*Key for lpm_trie*/
+union key_4 {
+   u32 b32[2];
+   u8 b8[8];
+};
+
+struct arp_entry {
+   int dst;
+   long mac;
+};
+
+struct direct_map {
+   long mac;
+   int ifindex;
+   struct arp_entry arp;
+};
+
+/* Map for trie implementation*/
+struct bpf_map_def SEC("maps") lpm_map = {
+   .type = BPF_MAP_TYPE_LPM_TRIE,
+   .key_size = 8,
+   .value_size = sizeof(struct trie_value),
+   .max_entries = 50,
+   .map_flags = BPF_F_NO_PREALLOC,
+};
+
+/* Map for counter*/
+struct bpf_map_def SEC("maps") rxcnt = {
+   .type = BPF_MAP_TYPE_PERCPU_ARRAY,
+   .key_size = sizeof(u32),
+   .value_size = sizeof(long),
+   .max_entries = 256,
+};
+
+/* Map for ARP table*/
+struct bpf_map_def SEC("maps") arp_table = {
+   .type = BPF_MAP_TYPE_HASH,
+   .key_size = sizeof(int),
+   .value_size = sizeof(long),
+   .max_entries = 50,
+};
+
+/* Map to keep the exact match entries in the route table*/
+struct bpf_map_def SEC("maps") exact_match = {
+   .type = BPF_MAP_TYPE_HASH,
+   .key_size = sizeof(int),
+   .value_size = sizeof(struct direct_map),
+   .max_entries = 50,
+};
+
+/* Function to set source and destination mac of the packet */
+static inline void set_src_dst_mac(void *data, void *src, void *dst)
+{
+   unsigned short *p  = data;
+   unsigned short *dest   = dst;
+   unsigned short *source = src;
+
+   __builtin_memcpy(p, dest, 3);
+   __builtin_memcpy(p + 3, source, 3);
+}
+
+/* Parse IPV4 packet to get SRC, DST IP and protocol */
+static inline int parse_ipv4(void *data, u64 nh_off, void *data_end,
+__be32 *src, __be32 *dest)
+{
+   struct iphdr *iph = data + nh_off;
+
+   if (iph + 1 > data_end)
+   return 0;
+   *src = (__be32)iph->saddr;
+   *dest = (__be32)iph->daddr;
+   return iph->protocol;
+}
+
+SEC("xdp3")
+int xdp_prog3(struct xdp_md *ctx)
+{
+   void *data_end = (void *)(long)ctx->data_end;
+   void *data = (void *)(long)ctx->data;
+   struct ethhdr *eth = data;
+