This patch partially isolates ipv4 by adding the network namespace
structure in the structure sock, bind bucket and skbuf. When a socket
is created, the pointer to the network namespace is stored in the
struct sock and the socket belongs to the namespace by this way. That
allows to identify sockets related to a namespace for lookup and
procfs.
The lookup is extended with a network namespace pointer, in
order to identify listen points binded to the same port. That allows
to have several applications binded to INADDR_ANY:port in different
network namespace without conflicting. The bind is checked against
port and network namespace.
When an outgoing packet has the loopback destination addres, the
skbuff is filled with the network namespace. So the loopback packets
never go outside the namespace. This approach facilitate the migration
of loopback because identification is done by network namespace and
not by address. The loopback has been benchmarked by tbench and the
overhead is roughly 1.5 %
Replace-Subject: [Network namespace] ipv4 isolation
Signed-off-by: Daniel Lezcano <[EMAIL PROTECTED]>
--
include/linux/skbuff.h |2 ++
include/net/inet_hashtables.h| 34 --
include/net/inet_timewait_sock.h |1 +
include/net/sock.h |4
net/dccp/ipv4.c |7 ---
net/ipv4/af_inet.c |2 ++
net/ipv4/inet_connection_sock.c |3 ++-
net/ipv4/inet_diag.c |3 ++-
net/ipv4/inet_hashtables.c |6 +-
net/ipv4/inet_timewait_sock.c|1 +
net/ipv4/ip_output.c |4
net/ipv4/tcp_ipv4.c | 25 -
net/ipv4/udp.c |7 +--
13 files changed, 72 insertions(+), 27 deletions(-)
Index: 2.6-mm/include/linux/skbuff.h
===
--- 2.6-mm.orig/include/linux/skbuff.h
+++ 2.6-mm/include/linux/skbuff.h
@@ -27,6 +27,7 @@
#include
#include
#include
+#include
#include
#include
@@ -301,6 +302,7 @@
*data,
*tail,
*end;
+ struct net_namespace*net_ns;
};
#ifdef __KERNEL__
Index: 2.6-mm/include/net/inet_hashtables.h
===
--- 2.6-mm.orig/include/net/inet_hashtables.h
+++ 2.6-mm/include/net/inet_hashtables.h
@@ -23,6 +23,8 @@
#include
#include
#include
+#include
+#include
#include
#include
@@ -78,6 +80,7 @@
signed shortfastreuse;
struct hlist_node node;
struct hlist_head owners;
+ struct net_namespace*net_ns;
};
#define inet_bind_bucket_for_each(tb, node, head) \
@@ -274,13 +277,15 @@
extern struct sock *__inet_lookup_listener(const struct hlist_head *head,
const u32 daddr,
const unsigned short hnum,
- const int dif);
+ const int dif,
+ const struct net_namespace *net_ns);
/* Optimize the common listener case. */
static inline struct sock *
inet_lookup_listener(struct inet_hashinfo *hashinfo,
const u32 daddr,
-const unsigned short hnum, const int dif)
+const unsigned short hnum, const int dif,
+const struct net_namespace *net_ns)
{
struct sock *sk = NULL;
const struct hlist_head *head;
@@ -294,8 +299,9 @@
(!inet->rcv_saddr || inet->rcv_saddr == daddr) &&
(sk->sk_family == PF_INET || !ipv6_only_sock(sk)) &&
!sk->sk_bound_dev_if)
- goto sherry_cache;
- sk = __inet_lookup_listener(head, daddr, hnum, dif);
+ if (sk->sk_net_ns == net_ns && LOOPBACK(daddr))
+ goto sherry_cache;
+ sk = __inet_lookup_listener(head, daddr, hnum, dif, net_ns);
}
if (sk) {
sherry_cache:
@@ -358,7 +364,8 @@
__inet_lookup_established(struct inet_hashinfo *hashinfo,
const u32 saddr, const u16 sport,
const u32 daddr, const u16 hnum,
- const int dif)
+ const int dif,
+ const struct net_namespace *net_ns)
{
INET_ADDR_COOKIE(acookie, saddr, daddr)
const __u32 ports = INET_COMBINED_PORTS(sport, hnum);
@@ -373,12 +380,16 @@
prefetch(head->chain.first);
read_lock(&head->lock);
sk_for_each(sk, node, &head->chain) {
+ if (sk->sk_net_ns != net_ns && LOOPBACK(dadd