Hi all,

On behalf of Cisco Systems I am authorized to contribute a new transport
to the network subsystem in qemu.

Specifically, we would like to contribute a new transport:

L2TPv3 static tunnel network transport.

Earlier versions of this (we have patchsets going back to qemu-1.0) have
been used extensively as a testing tool in our group and will be shipped
in product at a later date. We also use its counterpart designed for UML
in a soon to ship product.

The idea is quite simple - instead of inventing "yet another weird and
wonderful" encapsulation of the day for virtual transports, we use a
well established industry standard. Specifically we use L2TPv3 static
tunnels. This is very similar to what qemu has been doing in the storage
area for a long time (the iSCSI storage driver).

This allows:

1. Qemu to communicate with any other qemu, UML, router, firewall or
network device directly with no mediation by a virtual switch. Local and
remote.

2. Qemu to communicate with any modern linux distribution directly
(including communicating with containers). Local and remote.

3. Qemu to communicate with the local host, remote vms, network devices,
etc at speeds which for a number of use cases exceed the speed of the
legacy tap driver.

Our suggestion would be that this over time obsoletes the UDP variety of
the "socket" driver. It has a number of advantages over that driver:

    * Standard compliant - RFC 3931 updated recently by RFC 5641
    * Supported on most network equipment
    * Allows to move virtual switching off-host to a high performance
physical switch if needed
    * Allows to mix virtual and physical switching (well supported on
modern Linuxes and other OSes)
    * Well researched security profile as well as established
interactions with IPSEC allowing to extend virtual networks outside the
datacenter to remote physical devices and/or VMs.

This transport uses a linux specific call to get several GBit RX rate.
This call can be wrapped (at some cost) in a compatibility loop using
posix compliant recvmsg instead for other systems. As I am not familiar
with the fine details on how to add linux specific, windows specific,
etc bits to qemu I have decided to leave that bit out for the time being
and submit it "as we use it".

Patch attached.

Best Regards,

Anton

diff --git a/include/qemu/sockets.h b/include/qemu/sockets.h
index 45588d7..865f1c2 100644
--- a/include/qemu/sockets.h
+++ b/include/qemu/sockets.h
@@ -79,6 +79,7 @@ int socket_dgram(SocketAddress *remote, SocketAddress *local, Error **errp);
 
 /* Old, ipv4 only bits.  Don't use for new code. */
 int parse_host_port(struct sockaddr_in *saddr, const char *str);
+int parse_host_port6(struct sockaddr_in6 *saddr, const char *str);
 int socket_init(void);
 
 #endif /* QEMU_SOCKET_H */
diff --git a/net/Makefile.objs b/net/Makefile.objs
index 4854a14..364c785 100644
--- a/net/Makefile.objs
+++ b/net/Makefile.objs
@@ -2,6 +2,7 @@ common-obj-y = net.o queue.o checksum.o util.o hub.o
 common-obj-y += socket.o
 common-obj-y += dump.o
 common-obj-y += eth.o
+common-obj-y += l2tpv3.o
 common-obj-$(CONFIG_POSIX) += tap.o
 common-obj-$(CONFIG_LINUX) += tap-linux.o
 common-obj-$(CONFIG_WIN32) += tap-win32.o
diff --git a/net/clients.h b/net/clients.h
index 7793294..ea2a831 100644
--- a/net/clients.h
+++ b/net/clients.h
@@ -47,6 +47,11 @@ int net_init_tap(const NetClientOptions *opts, const char *name,
 int net_init_bridge(const NetClientOptions *opts, const char *name,
                     NetClientState *peer);
 
+int net_init_raw(const NetClientOptions *opts, const char *name,
+                    NetClientState *peer);
+
+int net_init_l2tpv3(const NetClientOptions *opts, const char *name,
+                    NetClientState *peer);
 #ifdef CONFIG_VDE
 int net_init_vde(const NetClientOptions *opts, const char *name,
                  NetClientState *peer);
diff --git a/net/l2tpv3.c b/net/l2tpv3.c
new file mode 100644
index 0000000..4b5d8ee
--- /dev/null
+++ b/net/l2tpv3.c
@@ -0,0 +1,630 @@
+/*
+ * QEMU System Emulator
+ *
+ * Copyright (c) 2003-2008 Fabrice Bellard
+ * Copyright (c) 2012 Cisco Systems
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to deal
+ * in the Software without restriction, including without limitation the rights
+ * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
+ * copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
+ * THE SOFTWARE.
+ */
+
+#include "net/l2tpv3.h"
+#include "config-host.h"
+#include "config-host.h"
+#include "net/net.h"
+#include "clients.h"
+#include "monitor/monitor.h"
+#include "qemu-common.h"
+#include "qemu/error-report.h"
+#include "qemu/option.h"
+#include "qemu/sockets.h"
+#include "qemu/iov.h"
+#include "qemu/main-loop.h"
+#include <linux/ip.h>
+
+
+
+
+#define PAGE_SIZE 4096
+#define MAX_L2TPV3_IOVCNT 64
+
+typedef struct NetL2TPV3State {
+    NetClientState nc;
+    int fd;
+    int state; /* 0 = getting length, 1 = getting data */
+    unsigned int index;
+    unsigned int packet_len;
+    /* 
+	these are used for xmit - that happens packet a time
+	and for first sign of life packet (easier to parse that once)
+
+    */ 
+    uint8_t * header_buf;
+    uint8_t * buf;
+    struct iovec * vec;
+    /* 
+	these are used for receive - try to "eat" up to 32 packets at a time
+    */
+    struct mmsghdr * msgvec;
+    /*
+      contains inet host and port destination if 
+      connectionless (SOCK_DGRAM). Will be NULL if 
+      in listen mode waiting for first connect
+
+    */
+    struct sockaddr_storage * dgram_dst; 
+    uint64_t rx_cookie;
+    uint64_t tx_cookie;
+    uint32_t rx_session;
+    uint32_t tx_session;
+    uint32_t header_size;
+    uint32_t counter;
+    uint32_t dst_size;
+    uint32_t new_mode;
+   
+    /* offsets */ 
+
+    uint32_t offset;   /* main offset == header offset */
+    uint32_t cookie_offset;
+    uint32_t counter_offset;
+    uint32_t session_offset;
+
+} NetL2TPV3State;
+
+typedef struct NetL2TPV3ListenState {
+    NetClientState nc;
+    char *model;
+    char *name;
+    int fd;
+} NetL2TPV3ListenState;
+
+static int l2tpv3_parse_cookie32(const char *src , void * dst) {
+
+  if (
+	 (src == NULL) || 
+	 (sscanf(src, "%x", (unsigned int *) dst) != 1)
+      ) { 
+	 fprintf(stderr, "cannot parse cookie!!!: %s\n", src);
+	 return -1;
+   } 
+
+   return 0;
+}
+
+static int l2tpv3_parse_cookie64(const char *src , void * dst) {
+
+   struct temphtonl temph;
+   uint32_t temp;
+   const int num = 42;
+    if (
+	 (src == NULL) || 
+	 (sscanf(src, "%lx", (long unsigned int *) &temph) != 1)
+      ) { 
+	 fprintf(stderr, "cannot parse cookie!!!: %s\n", src);
+	 return -1;
+   } 
+
+   if(*(char *)&num == 42) {
+      // why oh why there is no htonll
+      temp = htonl(temph.high);
+      temph.high = htonl(temph.low);
+      temph.low = temp;
+   } else {
+      temph.low = htonl(temph.low); 
+      temph.high = htonl(temph.high);
+   }	
+
+   memcpy(dst, &temph, sizeof (uint64_t));
+   return 0;
+}
+
+
+static int l2tpv3_form_header(NetL2TPV3State *s) {
+   
+   uint32_t *header;
+   uint32_t *session;
+   uint64_t *cookie64;
+   uint32_t *cookie32;
+   uint32_t *counter;
+
+   if (s->new_mode & NEW_MODE_UDP) {
+     header = (uint32_t *) s->header_buf;
+     * header = htonl(0x30000);
+   }
+   session = (uint32_t *) (s->header_buf + s->session_offset);
+   * session = s->tx_session;	
+
+   if (s->new_mode & NEW_MODE_COOKIE) {
+      if (s->new_mode & NEW_MODE_COOKIE_SIZE) {
+	 cookie64 = (uint64_t *)(s->header_buf + s->cookie_offset);
+	 * cookie64 = s->tx_cookie;
+      } else {
+	 cookie32 = (uint32_t *) (s->header_buf + s->cookie_offset);
+	 * cookie32 = * ((uint32_t *) s->tx_cookie);
+      }
+   }
+   
+   if (!(s->new_mode & NEW_MODE_NO_COUNTER)) {
+      counter = (uint32_t *)(s->header_buf + s->counter_offset);
+      * counter = htonl(++ s->counter);
+   }
+
+   return 0;
+}
+
+
+static ssize_t net_l2tpv3_receive_dgram_iov(NetClientState *nc, const struct iovec *iov, int iovcnt)
+{
+    NetL2TPV3State *s = DO_UPCAST(NetL2TPV3State, nc, nc);
+
+
+    struct msghdr message;
+    int ret;
+    if (iovcnt > MAX_L2TPV3_IOVCNT - 1) {
+	fprintf(stderr, "iovec too long %d > %d, change l2tpv3.h\n", iovcnt, MAX_L2TPV3_IOVCNT);
+	return -1;
+    }
+    l2tpv3_form_header(s);
+    memcpy(s->vec + 1, iov, iovcnt * sizeof(struct iovec));
+    s->vec->iov_base = s->header_buf;
+    s->vec->iov_len = s->offset;
+    if (s->dgram_dst) {
+       message.msg_name = s->dgram_dst;
+       message.msg_namelen = s->dst_size;
+       message.msg_iov = (struct iovec *) s->vec;
+       message.msg_iovlen = iovcnt + 1; 
+       message.msg_control = NULL;
+       message.msg_controllen = 0;
+       message.msg_flags = 0;
+       ret = sendmsg(s->fd, &message, MSG_DONTWAIT) - s->offset;
+       if (ret == 0) {
+	    ret = iov_size (iov, iovcnt);
+       }
+   } else {
+      fprintf(stderr, "no dst yet\n");
+      ret = iov_size (iov, iovcnt); /* silently drop, otherwise it is being stupid */
+   }
+    return ret;
+}
+
+
+static ssize_t net_l2tpv3_receive_dgram(NetClientState *nc, const uint8_t *buf, size_t size)
+{
+    NetL2TPV3State *s = DO_UPCAST(NetL2TPV3State, nc, nc);
+
+    struct iovec * vec;
+
+    struct msghdr message;
+    l2tpv3_form_header(s);
+    vec = s->vec;
+    vec->iov_base = s->header_buf;
+    vec->iov_len = s->offset;
+    vec++;
+    vec->iov_base = (void *) buf;
+    vec->iov_len = size;
+
+    if (s->dgram_dst) {
+       message.msg_name = s->dgram_dst;
+       message.msg_namelen = s->dst_size;
+       message.msg_iov = (struct iovec *) s->vec;
+       message.msg_iovlen = 2;
+       message.msg_control = NULL;
+       message.msg_controllen = 0;
+       message.msg_flags = 0;
+       size = sendmsg(s->fd, &message, 0) - s->offset;
+   } else {
+      fprintf(stderr, "no dst yet\n");
+   }
+   return size; /* silently drop, otherwise it is being stupid */
+}
+
+static int l2tpv3_verify_header(NetL2TPV3State *s, uint8_t *buf) {
+   uint64_t *cookie64;
+   uint32_t *cookie32;
+   uint32_t *session;
+
+
+    if ((!(s->new_mode & NEW_MODE_IP_VERSION)) && (!(s->new_mode & NEW_MODE_UDP))){
+	buf += sizeof(struct iphdr) /* fix for ipv4 raw */;
+    } 
+
+
+   if (s->new_mode & NEW_MODE_COOKIE) {
+      if (s->new_mode & NEW_MODE_COOKIE_SIZE) {
+	 /* 64 bit cookie */
+	 cookie64 = (uint64_t *)(buf + s->cookie_offset);
+	 if (*cookie64 != s->rx_cookie) {
+	    fprintf(stderr, "unknown cookie id\n");
+	    return -1; /* we need to return 0, otherwise barfus */
+	 }
+      } else {
+	 cookie32 = (uint32_t *)(buf + s->cookie_offset);
+	 if (*cookie32 != * (uint32_t *) &s->rx_cookie) {
+	    fprintf(stderr,"unknown cookie id\n");
+	    return -1 ; /* we need to return 0, otherwise barfus */
+	 }
+      }
+   }
+   session = (uint32_t *) (buf + s->session_offset);
+   if (* session != s->rx_session) {
+      fprintf(stderr,"session mismatch\n");
+      return -1;
+   }	
+   return 0;
+}
+
+static void net_l2tpv3_send(void *opaque)
+{
+    NetL2TPV3State *s = opaque;
+    
+    int i, count, offset;
+    
+    struct mmsghdr * msgvec;
+    struct iovec * vec;
+    
+    
+    msgvec = s->msgvec;
+    offset = s->offset;
+    if ((!(s->new_mode & NEW_MODE_IP_VERSION)) && (!(s->new_mode & NEW_MODE_UDP))){
+	offset +=   sizeof(struct iphdr);
+    }  
+    count = recvmmsg(s->fd, msgvec, MAX_L2TPV3_IOVCNT/2, MSG_DONTWAIT, NULL);
+    for (i=0;i<count;i++) {
+	if (msgvec->msg_len > 0) {
+	    vec = msgvec->msg_hdr.msg_iov;
+	    if (l2tpv3_verify_header(s, vec->iov_base) == 0) {
+		//vec->iov_len = offset; /* reset size just in case*/
+		vec++;
+		qemu_send_packet(&s->nc, vec->iov_base, msgvec->msg_len - offset);
+	    } else {
+		fprintf(stderr, "l2tpv3 header verification failed\n");
+		vec->iov_len = offset;
+		vec++;
+	    }
+	    //vec->iov_len = PAGE_SIZE; /* reset for next read */
+	} 
+	msgvec++;
+    }
+}
+
+static void net_l2tpv3_cleanup(NetClientState *nc) 
+{
+    NetL2TPV3State *s = DO_UPCAST(NetL2TPV3State, nc, nc);
+    qemu_set_fd_handler(s->fd, NULL, NULL, NULL);
+    close(s->fd);
+}
+
+static NetClientInfo net_l2tpv3_info = {
+    .type = NET_CLIENT_OPTIONS_KIND_L2TPV3,
+    .size = sizeof(NetL2TPV3State),
+    .receive = net_l2tpv3_receive_dgram,
+    .receive_iov = net_l2tpv3_receive_dgram_iov,
+    .cleanup = net_l2tpv3_cleanup,
+};
+
+static int net_l2tpv3_init(NetClientState *peer,
+                                 const char *name,
+                                 const char *src,
+                                 const char *dst,
+                                 const char *new_mode,
+                                 const char *tx_cookie,
+                                 const char *rx_cookie,
+                                 const char *tx_session,
+                                 const char *rx_session
+)
+{
+   NetL2TPV3State *s;
+   NetClientState *nc;
+
+   int fd, i;
+
+   int sock_family, sock_type, sock_proto;
+   unsigned int temp;
+   
+   struct mmsghdr * msgvec;
+   struct iovec * iov;
+
+
+   struct sockaddr_storage LocalSock;
+
+       
+   fprintf(stderr, "l2tpv3 user init mode %s\n", new_mode);
+   memset(&LocalSock, 0 , sizeof(LocalSock));
+
+   nc = qemu_new_net_client(&net_l2tpv3_info, peer, "l2tpv3", name);
+
+   s = DO_UPCAST(NetL2TPV3State, nc, nc);
+
+   s->header_buf = g_malloc(256);    
+
+   if (s->header_buf == NULL) {
+	fprintf(stderr, "Failed to allocate header buffer \n");
+	return -1;
+   } 
+   s->buf = g_malloc(PAGE_SIZE);    
+
+   if (s->buf == NULL) {
+	fprintf(stderr, "Failed to allocate packet buffer \n");
+	return -1;
+   } 
+   s->vec = g_malloc(sizeof(struct iovec) * MAX_L2TPV3_IOVCNT);;    
+   if (s->vec == NULL) {
+	fprintf(stderr, "Failed to allocate packet vec \n");
+	return -1;
+   } 
+
+   
+   s->offset = 4;
+   s->session_offset = 0;
+   s->cookie_offset = 4;
+   s->counter_offset = 4;
+   s->dst_size = sizeof(struct sockaddr_storage);
+
+   sscanf(new_mode, "%x", &s->new_mode);
+
+
+   fprintf(stderr, "type is: %s -> %x\n", new_mode, s->new_mode);
+
+   /* Cookies */ 
+
+  if (
+	 (tx_session == NULL) || 
+	 (sscanf(tx_session, "%x", &temp) != 1)
+      ) { 
+	 fprintf(stderr, "cannot parse tx_session!!!: %s\n", tx_session);
+	 return -1;
+   } else {
+      s->tx_session = htonl(temp);
+      fprintf(stderr, "l2tpv3_open :  parsed tx session 32, %0x\n", s->tx_session);
+   }
+
+  if (
+	 (tx_session == NULL) || 
+	 (sscanf(rx_session, "%x", &temp) != 1)
+      ) { 
+	 fprintf(stderr, "cannot parse rx_session!!!: %s\n", rx_session);
+	 return -1;
+   }  else {
+      s->rx_session = htonl(temp);
+      fprintf(stderr, "l2tpv3_open :  parsed rx session 32, %0x\n", s->rx_session);
+   }
+   if (s->new_mode & NEW_MODE_COOKIE) {
+      if (s->new_mode & NEW_MODE_COOKIE_SIZE) {
+	 /* 64 bit cookie */
+	 s->offset += 8;
+	 s->counter_offset += 8;
+	 if (l2tpv3_parse_cookie64(tx_cookie,&s->tx_cookie) !=0) {
+	    fprintf(stderr, "l2tpv3_open :  failed to parse tx cookie 64\n");
+	    return -1;
+	 } else {
+	    fprintf(stderr, "l2tpv3_open :  parsed tx cookie 64, %0lx\n", s->tx_cookie);
+	 }
+	 if (l2tpv3_parse_cookie64(rx_cookie,&s->rx_cookie) !=0) {
+	    fprintf(stderr, "l2tpv3_open :  failed to parse rx cookie 64\n");
+	    return -1;
+	 } else {
+	    fprintf(stderr, "l2tpv3_open :  parsed rx cookie 64, %0lx\n", s->rx_cookie);
+	 }
+      } else {
+	 /* 32 bit cookie */
+	 s->offset += 4;
+	 s->counter_offset +=4;
+	 s->tx_cookie = 0;
+	 if (l2tpv3_parse_cookie32(tx_cookie,&s->tx_cookie) !=0) {
+	    fprintf(stderr, "l2tpv3_open :  failed to parse tx cookie 32\n");
+	    return -1;
+	 }
+	 s->rx_cookie = 0;
+	 if (l2tpv3_parse_cookie32(rx_cookie,&s->rx_cookie) !=0) {
+	    fprintf(stderr, "l2tpv3_open :  failed to parse rx cookie 32\n");
+	    return -1;
+	 }
+      }
+   }
+
+         
+     
+   if (dst && (strlen(dst) > 0 )) {         
+      /* we now allocate it only if it we are not "listening" */
+      s->dgram_dst = malloc(sizeof(struct sockaddr_storage));
+      memset(s->dgram_dst, 0 , sizeof(struct sockaddr_storage));
+      fprintf(stderr, "l2tpv3_open : setting dst at init\n");
+   } else {
+      s->dgram_dst = NULL;
+   }
+
+   if (s->new_mode & NEW_MODE_IP_VERSION) {
+      /* IPv6 */
+      sock_family = AF_INET6;
+      if (parse_host_port6((struct sockaddr_in6 *) &LocalSock, src) !=0) {
+	 fprintf(stderr, "l2tpv3_open :  failed to parse v6 dst\n");
+	 return -1;
+      };
+   } else {
+      /* IPv4 */
+      sock_family = AF_INET;
+      if (parse_host_port((struct sockaddr_in*) &LocalSock, src) != 0) {
+	 fprintf(stderr, "l2tpv3_open :  failed to parse v4 src\n");
+	 return -1;
+      };
+   }
+   if (s->new_mode & NEW_MODE_UDP) {
+      sock_type = SOCK_DGRAM;
+      sock_proto = 0;
+      /* space for header */
+
+      s->offset += 4;
+      s->counter_offset += 4;
+      s->session_offset += 4;
+      s->cookie_offset += 4;
+    } else {
+      sock_type = SOCK_RAW;
+      sock_proto = 0x73;
+      if (s->new_mode & NEW_MODE_IP_VERSION) {
+	 /* IPv6 */
+	 ((struct sockaddr_in6 *) &LocalSock)->sin6_port = htons(0x73);
+      } else {
+	 /* IPv4 */
+	 ((struct sockaddr_in *) &LocalSock)->sin_port = htons(0x73);
+      }
+   }
+
+   if (!(s->new_mode & NEW_MODE_NO_COUNTER)) {
+      s->offset += 4;
+   }
+   if ((fd = socket(sock_family, sock_type, sock_proto)) == -1) {
+      fd = -errno;
+      fprintf(stderr, "l2tpv3_open : socket creation failed, "
+	       "errno = %d\n", -fd);
+      return fd;
+   } 
+
+
+   if (bind(fd, (struct sockaddr *) &LocalSock, sizeof(LocalSock))) {
+      fprintf(stderr, "l2tpv3_open :  could not bind socket err=%i\n", errno);
+      close(fd);
+      return -1;
+   } else {
+      fprintf(stderr, "l2tpv3_open : socket bound successfully\n");
+   }
+   if (s->dgram_dst) {
+      if (s->new_mode & NEW_MODE_IP_VERSION) {
+	 /* IPv6 */
+	 if (parse_host_port6((struct sockaddr_in6 *) s->dgram_dst, dst) !=0) {
+	    fprintf(stderr, "l2tpv3_open :  failed to parse v6 dst\n");
+	    return -1;
+	 };
+	 s->dst_size = sizeof(struct sockaddr_in6);
+      } else {
+	 /* IPv4 */
+	 if (parse_host_port((struct sockaddr_in *) s->dgram_dst, dst) != 0) {
+	    fprintf(stderr, "l2tpv3_open :  failed to parse v4 dst\n");
+	    return -1;
+	 };
+	 s->dst_size = sizeof(struct sockaddr_in);
+      }
+   }
+
+   s->msgvec = g_malloc(sizeof(struct mmsghdr) * (MAX_L2TPV3_IOVCNT/2));
+   if (s->msgvec == NULL) {
+	fprintf(stderr, "Failed to allocate receive packet vec \n");
+	return -1;
+   }
+   msgvec = s->msgvec;
+    
+   for (i=0;i<MAX_L2TPV3_IOVCNT/2;i++) {
+	/* we need to allocate these only for the first time and first buffer */
+	msgvec->msg_hdr.msg_name = NULL;
+	msgvec->msg_hdr.msg_namelen = 0;
+	iov =  g_malloc(sizeof(struct iovec) * 2);
+	if (iov == NULL) {
+	    fprintf(stderr, "Failed to allocate receive packet vec \n");
+	    return -1;
+	}
+	msgvec->msg_hdr.msg_iov = iov;
+	if ((!(s->new_mode & NEW_MODE_IP_VERSION)) && (!(s->new_mode & NEW_MODE_UDP))){
+	    iov->iov_base = g_malloc(s->offset + sizeof(struct iphdr)); /* fix for ipv4 raw */;
+	} else {
+	    iov->iov_base = g_malloc(s->offset);
+	}
+	
+	if (iov->iov_base == NULL) {
+	    fprintf(stderr, "Failed to allocate receive packet vec \n");
+	    return -1;
+	}
+	if ((!(s->new_mode & NEW_MODE_IP_VERSION)) && (!(s->new_mode & NEW_MODE_UDP))){
+	    iov->iov_len = s->offset + sizeof (struct iphdr);
+	} else {
+	    iov->iov_len = s->offset;
+	}
+	iov++ ;
+	iov->iov_base = qemu_memalign(PAGE_SIZE, PAGE_SIZE);
+	if (iov->iov_base == NULL) {
+	    fprintf(stderr, "Failed to allocate receive packet vec \n");
+	    return -1;
+	}
+	iov->iov_len = PAGE_SIZE;
+	msgvec->msg_hdr.msg_iovlen = 2;
+	msgvec->msg_hdr.msg_control = NULL;
+	msgvec->msg_hdr.msg_controllen = 0;
+	msgvec->msg_hdr.msg_flags = 0;
+	msgvec++;
+   }
+ 
+//   sendbuff = 90000;
+//
+//  printf("sets the send buffer to %d\n", sendbuff);
+// setsockopt(fd, SOL_SOCKET, SO_SNDBUF, &sendbuff, sizeof(sendbuff));
+
+   qemu_set_nonblock(fd);
+
+   if (fd < 0)
+	return -1;
+
+   s->fd = fd;
+   s->counter = 0;
+
+   qemu_set_fd_handler(s->fd, net_l2tpv3_send, NULL, s);
+
+   if (!s) {
+      fprintf (stderr, "l2tpv3_open : failed to set fd handler\n");
+      return -1;
+   }
+
+   /* this needs fixing too */
+
+   snprintf(s->nc.info_str, sizeof(s->nc.info_str),
+             "l2tpv3: connected");
+   return 0;
+
+}
+
+int net_init_l2tpv3(const NetClientOptions *opts,
+                    const char *name,
+                    NetClientState *peer) {
+
+
+    const NetdevL2TPv3Options * l2tpv3;
+    assert(opts->kind == NET_CLIENT_OPTIONS_KIND_L2TPV3);
+    l2tpv3 = opts->l2tpv3;
+
+
+  if (!(l2tpv3->has_src && l2tpv3->has_mode && l2tpv3->has_txsession && l2tpv3->has_rxsession)) {
+      error_report("src=, dst=, txsession=, rxsession= and mode= are required for l2tpv3");
+      return -1;
+  }
+
+
+  /* this needs cleaning up for new API */
+
+  if (net_l2tpv3_init(
+	 peer, 
+         name,
+	 l2tpv3->src, 
+	 l2tpv3->dst, 
+	 l2tpv3->mode, 
+	 l2tpv3->txcookie,
+	 l2tpv3->rxcookie,
+	 l2tpv3->txsession,
+	 l2tpv3->rxsession
+      
+   ) == -1) {
+      return -1;
+  }
+  return 0;
+}
diff --git a/net/l2tpv3.h b/net/l2tpv3.h
new file mode 100644
index 0000000..822e0b0
--- /dev/null
+++ b/net/l2tpv3.h
@@ -0,0 +1,65 @@
+/*
+ * QEMU System Emulator
+ *
+ * Copyright (c) 2003-2008 Fabrice Bellard
+ * Copyright (c) 2012 Cisco Systems
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to deal
+ * in the Software without restriction, including without limitation the rights
+ * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
+ * copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
+ * THE SOFTWARE.
+ */
+
+#include "qemu-common.h"
+
+#ifndef QEMU_NET_L2TPV3_H
+#define QEMU_NET_L2TPV3_H
+
+#define L2TPV3_BUFSIZE 2048
+
+#define NEW_MODE_IP_VERSION   1		  /* on for v6, off for v4 */
+#define NEW_MODE_UDP	      2		  /* on for udp, off for raw ip */
+#define NEW_MODE_COOKIE	      4		  /* cookie present */
+#define NEW_MODE_COOKIE_SIZE  8		  /* on for 64 bit */
+#define NEW_MODE_NO_COUNTER   16	  /* DT - no counter */
+
+/* legacy modes */
+
+/* mode 0 */
+
+#define LEGACY_UDP6_64_NO_COUNTER (NEW_MODE_IP_VERSION + NEW_MODE_UDP + NEW_MODE_COOKIE + NEW_MODE_COOKIE_SIZE + NEW_MODE_NO_COUNTER)
+
+#define LEGACY_MODE0 LEGACY_UDP6_64_NO_COUNTER
+
+/* mode 1 */
+
+#define LEGACY_IP6_64_NO_COUNTER (NEW_MODE_IP_VERSION + NEW_MODE_COOKIE + NEW_MODE_COOKIE_SIZE + NEW_MODE_NO_COUNTER)
+
+#define LEGACY_MODE1 LEGACY_IP6_64_NO_COUNTER
+
+/* mode 2 */
+
+#define LEGACY_UDP4_64_COUNTER (NEW_MODE_COOKIE + NEW_MODE_UDP + NEW_MODE_COOKIE_SIZE )
+
+#define LEGACY_MODE2 LEGACY_UDP4_64_COUNTER
+
+struct temphtonl {
+   uint32_t low; 
+   uint32_t high;
+};
+
+
+#endif /* QEMU_NET_SOCKET_H */
diff --git a/net/net.c b/net/net.c
index 0a88e68..ba5f51b 100644
--- a/net/net.c
+++ b/net/net.c
@@ -132,6 +132,43 @@ int parse_host_port(struct sockaddr_in *saddr, const char *str)
     return 0;
 }
 
+int parse_host_port6(struct sockaddr_in6 *saddr, const char *str)
+{
+    char buf[512];
+    char *p, *ip, *port;
+
+    strncpy((char *) &buf, str, 511);
+    ip = (char *) &buf;
+    port = NULL;
+    int portint;
+
+
+    for (p = (char *) &buf; (p < (char *) &buf + strlen(str)) || (p < (char *) &buf + 512); p++){
+      if (*p == '[') {
+	 ip = p + 1;
+      }
+      if (*p == ']') {
+	 *p = '\0';
+        if (*(p + 1) == ':') {
+	    port = p + 2;
+	}
+	break;
+      }
+    } 
+
+    saddr->sin6_family = AF_INET6;
+    if (!inet_pton(AF_INET6, ip, &saddr->sin6_addr)) {
+         return -1;
+    } 
+    if (port) {
+      sscanf(port, "%i", &portint);
+      saddr->sin6_port = htons(portint);
+    }
+    return 0;
+}
+
+
+
 void qemu_format_nic_info_str(NetClientState *nc, uint8_t macaddr[6])
 {
     snprintf(nc->info_str, sizeof(nc->info_str),
@@ -731,6 +768,7 @@ static int (* const net_client_init_fun[NET_CLIENT_OPTIONS_KIND_MAX])(
         [NET_CLIENT_OPTIONS_KIND_BRIDGE]    = net_init_bridge,
 #endif
         [NET_CLIENT_OPTIONS_KIND_HUBPORT]   = net_init_hubport,
+        [NET_CLIENT_OPTIONS_KIND_L2TPV3]       = net_init_l2tpv3,
 };
 
 
diff --git a/net/raw.c b/net/raw.c
new file mode 100644
index 0000000..04d244f
--- /dev/null
+++ b/net/raw.c
@@ -0,0 +1,244 @@
+/*
+ * QEMU System Emulator
+ *
+ * Copyright (c) 2003-2008 Fabrice Bellard
+ * Copyright (c) 2014 Cisco Systems
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to deal
+ * in the Software without restriction, including without limitation the rights
+ * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
+ * copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
+ * THE SOFTWARE.
+ */
+#include <netinet/ether.h>
+#include <linux/if_ether.h>
+#include <linux/if_packet.h>
+#include <sys/mman.h>
+#include <sys/ioctl.h>
+#include <net/if.h>
+
+#include "net/raw.h"
+
+#include "config-host.h"
+#include "net/net.h"
+#include "clients.h"
+#include "monitor/monitor.h"
+#include "qemu-common.h"
+#include "qemu/error-report.h"
+#include "qemu/option.h"
+#include "qemu/sockets.h"
+#include "qemu/iov.h"
+#include "qemu/main-loop.h"
+
+
+#define RAW_BUFSIZE 2048
+
+
+typedef struct NetRAWState {
+    NetClientState nc;
+    int fd;
+    int state; /* 0 = getting length, 1 = getting data */
+    unsigned int ring_index;
+    unsigned int packet_len;
+    uint8_t * multiread_buffer;
+} NetRAWState;
+
+
+static ssize_t net_raw_receive_dgram(NetClientState *nc, const uint8_t *buf, size_t size)
+{
+    NetRAWState *s = DO_UPCAST(NetRAWState, nc, nc);
+    return write(s->fd, buf, size);
+}
+
+static ssize_t net_raw_receive_dgram_iov(NetClientState *nc, const struct iovec *iov, int iovcnt)
+{
+    NetRAWState *s = DO_UPCAST(NetRAWState, nc, nc);
+    return writev(s->fd, iov, iovcnt);
+}
+
+static inline struct tpacket_hdr * current_header (NetRAWState *s) {
+    uint8_t * buffer;
+    buffer = s->multiread_buffer + (s->ring_index * RAW_TP_FRAME_SIZE);
+    return (struct tpacket_hdr *) buffer;
+}
+
+
+static struct tpacket_hdr * raw_advance_ring(NetRAWState *s) {
+
+    struct tpacket_hdr * header = current_header(s);
+    header->tp_status = TP_STATUS_KERNEL; /* mark as free */
+    s->ring_index = (s->ring_index + 1) % RAW_TP_FRAME_NR;
+    return current_header(s);
+}
+
+static void net_raw_send(void *opaque)
+{
+    NetRAWState *s = opaque;
+    struct tpacket_hdr * header;
+    
+    header = current_header(s);	
+    while ((header->tp_status & TP_STATUS_USER) > 0) {
+	qemu_send_packet(&s->nc, ((uint8_t *) header) + header->tp_mac, header->tp_snaplen);
+	header = raw_advance_ring(s);
+    }
+}
+
+
+static void net_raw_cleanup(NetClientState *nc)
+{
+    NetRAWState *s = DO_UPCAST(NetRAWState, nc, nc);
+    qemu_set_fd_handler(s->fd, NULL, NULL, NULL);
+    close(s->fd);
+}
+
+static NetClientInfo net_raw_info = {
+    .type = NET_CLIENT_OPTIONS_KIND_RAW,
+    .size = sizeof(NetRAWState),
+    .receive = net_raw_receive_dgram,
+    .receive_iov = net_raw_receive_dgram_iov,
+    .cleanup = net_raw_cleanup,
+};
+
+static int net_raw_init(NetClientState *peer,
+                                 const char *name,
+                                 const char *host_interface
+                 )
+{
+    NetRAWState *s;
+    NetClientState *nc;
+
+    int fd;
+
+
+
+    struct ifreq ifr;
+    struct sockaddr_ll sock;
+
+    int err;
+    struct tpacket_req tpacket;
+
+
+    nc = qemu_new_net_client(&net_raw_info, peer, "raw", name);
+
+    s = DO_UPCAST(NetRAWState, nc, nc);
+
+    s->ring_index = 0;
+	 
+
+    if ((fd = socket(AF_PACKET, SOCK_RAW, htons(ETH_P_ALL))) == -1) {
+	err = -errno;
+	fprintf(stderr, "raw_open : raw socket creation failed, errno = %d\n", -err);
+	return err;
+    }
+
+    tpacket.tp_block_size = RAW_TP_BLOCK_SIZE; 
+    tpacket.tp_frame_size = RAW_TP_FRAME_SIZE; 
+    tpacket.tp_block_nr = RAW_TP_BLOCK_NR ; 
+    tpacket.tp_frame_nr = RAW_TP_FRAME_NR;
+
+    if (setsockopt(fd, SOL_PACKET, PACKET_RX_RING, (void *) &tpacket, sizeof(struct tpacket_req))) {
+	fprintf(stderr, "uml_raw: failed to request packet mmap");
+	return -errno;
+    } else {
+	fprintf(stderr, "uml_raw: requested packet mmap\n");
+    }
+
+    s->multiread_buffer = (uint8_t *) mmap(
+	    NULL, 
+	    RAW_TP_FRAME_SIZE * RAW_TP_FRAME_NR, 
+	    PROT_READ | PROT_WRITE, MAP_SHARED, 
+	    fd, 
+	    0
+    );
+
+    if (!(s->multiread_buffer)) {
+	fprintf(stderr, "raw: failed to map buffer");
+	return -1;
+    } else {
+	fprintf(stderr, "raw: mmmap-ed buffer at %p\n", s->multiread_buffer);
+    }
+
+
+    
+
+    memset(&ifr, 0, sizeof(struct ifreq));
+    strncpy((char *) &ifr.ifr_name, host_interface, sizeof(ifr.ifr_name) - 1);
+    if(ioctl(fd, SIOCGIFINDEX, (void *) &ifr) < 0) {
+	err = -errno;
+	fprintf(stderr, "SIOCGIFINDEX, failed to get raw interface index for %s", host_interface);
+	close(fd);
+	return(-1);
+    } else {
+    }
+
+    sock.sll_family = AF_PACKET;
+    sock.sll_protocol = htons(ETH_P_ALL);
+    sock.sll_ifindex = ifr.ifr_ifindex;
+
+    fprintf(stderr, "raw: binding raw on interface index: %i\n", ifr.ifr_ifindex);
+    if (bind(fd, (struct sockaddr *) &sock, sizeof(struct sockaddr_ll)) < 0) {
+	fprintf(stderr, "raw: failed to bind raw socket");
+	close(fd);
+	return(-1);
+    }
+
+
+    qemu_set_nonblock(fd);
+
+    if (fd < 0)
+	return -1;
+
+    s->ring_index = 0;
+
+    s->fd = fd;
+
+    qemu_set_fd_handler(s->fd, net_raw_send, NULL, s);
+
+    if (!s) {
+      fprintf (stderr, "raw_open : failed to set fd handler\n");
+      return -1;
+    }
+
+/* this needs fixing too */
+
+    snprintf(s->nc.info_str, sizeof(s->nc.info_str), "raw: connected");
+    return 0;
+
+}
+
+int net_init_raw(const NetClientOptions *opts,
+                    const char *name,
+                    NetClientState *peer) {
+
+
+    const NetdevRawOptions * raw;
+    assert(opts->kind == NET_CLIENT_OPTIONS_KIND_RAW);
+    raw = opts->raw;
+
+    if (raw->has_fd) {
+	error_report("passing socket fd not yet supported");
+	return -1 ;
+    }
+    
+    if (!(raw->has_ifname)) {
+      error_report("iface required for raw");
+      return -1;
+    }
+
+    if (net_raw_init( peer, name, raw->ifname) == -1) {
+      return -1;
+    }
+    return 0;
+}
diff --git a/net/raw.h b/net/raw.h
new file mode 100644
index 0000000..80ab43e
--- /dev/null
+++ b/net/raw.h
@@ -0,0 +1,33 @@
+/*
+ * QEMU System Emulator
+ *
+ * Copyright (c) 2003-2008 Fabrice Bellard
+ * Copyright (c) 2014 Cisco Systems
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to deal
+ * in the Software without restriction, including without limitation the rights
+ * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
+ * copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
+ * THE SOFTWARE.
+ */
+#ifndef QEMU_NET_RAW_H
+#define QEMU_NET_RAW_H
+
+#define RAW_TP_BLOCK_SIZE 4096
+#define RAW_TP_FRAME_SIZE 2048
+#define RAW_TP_BLOCK_NR 32
+#define RAW_TP_FRAME_NR 64
+
+#endif /* QEMU_NET_RAW_H */
diff --git a/qapi-schema.json b/qapi-schema.json
index 83fa485..192d19c 100644
--- a/qapi-schema.json
+++ b/qapi-schema.json
@@ -2941,6 +2941,45 @@
     '*udp':       'str' } }
 
 ##
+# @NetdevL2TPv3Options
+#
+# Connect the VLAN to Ethernet over L2TPv3 Static tunnel
+#
+# @fd: #optional file descriptor of an already opened socket
+#
+# @src: source address
+#
+# @dst: #optional destination address
+#
+# @mode: bitmask mode for the tunnel (see l2tpv3.h for actual definition)
+#
+# @txcookie:#optional 32 or 64 bit tx cookie for the tunnel (set mode for actual type selection)
+#
+# @rxcookie:#optional 32 or 64 bit rx cookie for the tunnel (set mode for actual type selection)
+#
+# @txsession: tx 32 bit session
+#
+# @rxsession: rx 32 bit session
+#
+#
+# Since 1.0
+##
+##
+{ 'type': 'NetdevL2TPv3Options',
+  'data': {
+    '*fd':        'str',
+    '*src':	  'str', 
+    '*dst':	  'str', 
+    '*mode':	  'str', 
+    '*txcookie':  'str',
+    '*rxcookie':  'str',
+    '*txsession': 'str',
+    '*rxsession': 'str'
+
+} }
+
+##
+##
 # @NetdevVdeOptions
 #
 # Connect the VLAN to a vde switch running on the host.
@@ -3021,6 +3060,7 @@
     'nic':      'NetLegacyNicOptions',
     'user':     'NetdevUserOptions',
     'tap':      'NetdevTapOptions',
+    'l2tpv3':   'NetdevL2TPv3Options',
     'socket':   'NetdevSocketOptions',
     'vde':      'NetdevVdeOptions',
     'dump':     'NetdevDumpOptions',

Reply via email to