On Sun, Jan 11, 2009 at 2:19 AM, Grzegorz Nosek <[email protected]> wrote:
>
> So if I understand you right, your proposed solution would be something
> akin to ipt_cgroup (matching packets originating from a cgroup, like
> ipt_owner matches uid/gid) plus netfilter hooks for blocking/remapping
> addresses passed to connect() and/or bind()? Or maybe a dedicated
> netfilter table with per-cgroup chains?
Yes, something like one of those options. But it would never need to
be actually matching real packets in the data path - just
connect/bind/accept requests in the control path.
>
> Using the iptables API with connect() sending a fake packet, how would
> you represent "allow this connection, but bind() to 10.0.0.1 first"?
> Rewrite the source address in an iptables target?
Hmm, I hadn't considered that - I'd just been thinking of permit/deny
decisions. But you're right, a rewrite rule might be a natural way to
do this.
Clearly this feature would only use a small subset of the available
iptables API, so in that sense it might be overkill. But avoiding
inventing a complex new API seems worth the potential overkill.
I've attached the vague prototype that I was playing with a few months
ago. It's missing some of the bits that it would need:
- it uses the NF_INET_LOCAL_OUT table rather than a new
NF_INET_CONTROL table, because trying to edit/recompile the iptables
userspace binary to handle a new table proved to be too painful for
this prototype. (i.e. it currently does use the fast path checks, but
it really shouldn't ...)
- it only currently handles connect() - no bind() or accept()
- it doesn't have a cgroup-specific iptables filter yet - it just
provides a system-wide control over connections. Adding a per-group
filter would be pretty easy, I think
As it stands, it's sufficient to express complex rules like "disallow
connections to a remote sshd port, except on host H", etc.
Paul
---
include/linux/netfilter.h | 1
include/linux/netfilter_ipv4.h | 7 +
include/net/netns/ipv4.h | 3
net/ipv4/netfilter/Kconfig | 7 +
net/ipv4/netfilter/Makefile | 3
net/ipv4/netfilter/ip_tables.c | 7 +
net/ipv4/netfilter/iptable_control.c | 167 +++++++++++++++++++++++++++++++++++
net/ipv4/tcp_ipv4.c | 6 +
net/netfilter/xt_tcpudp.c | 16 +++
9 files changed, 214 insertions(+), 3 deletions(-)
Index: netfilter-2.6.25-rc3/include/linux/netfilter.h
===================================================================
--- netfilter-2.6.25-rc3.orig/include/linux/netfilter.h
+++ netfilter-2.6.25-rc3/include/linux/netfilter.h
@@ -47,6 +47,7 @@ enum nf_inet_hooks {
NF_INET_FORWARD,
NF_INET_LOCAL_OUT,
NF_INET_POST_ROUTING,
+// NF_INET_CONTROL,
NF_INET_NUMHOOKS
};
Index: netfilter-2.6.25-rc3/include/net/netns/ipv4.h
===================================================================
--- netfilter-2.6.25-rc3.orig/include/net/netns/ipv4.h
+++ netfilter-2.6.25-rc3/include/net/netns/ipv4.h
@@ -33,5 +33,8 @@ struct netns_ipv4 {
struct xt_table *iptable_raw;
struct xt_table *arptable_filter;
#endif
+#ifdef CONFIG_IP_NF_CONTROL
+ struct xt_table *iptable_control;
+#endif
};
#endif
Index: netfilter-2.6.25-rc3/net/ipv4/netfilter/Kconfig
===================================================================
--- netfilter-2.6.25-rc3.orig/net/ipv4/netfilter/Kconfig
+++ netfilter-2.6.25-rc3/net/ipv4/netfilter/Kconfig
@@ -281,6 +281,13 @@ config NF_NAT_SIP
depends on IP_NF_IPTABLES && NF_CONNTRACK && NF_NAT
default NF_NAT && NF_CONNTRACK_SIP
+config IP_NF_CONTROL
+ tristate "Connection control"
+ depends on IP_NF_IPTABLES
+ default m
+ help
+ This option adds a control hook/table
+
# mangle + specific targets
config IP_NF_MANGLE
tristate "Packet mangling"
Index: netfilter-2.6.25-rc3/net/ipv4/netfilter/Makefile
===================================================================
--- netfilter-2.6.25-rc3.orig/net/ipv4/netfilter/Makefile
+++ netfilter-2.6.25-rc3/net/ipv4/netfilter/Makefile
@@ -34,11 +34,12 @@ obj-$(CONFIG_NF_NAT_PROTO_GRE) += nf_nat
# generic IP tables
obj-$(CONFIG_IP_NF_IPTABLES) += ip_tables.o
-# the three instances of ip_tables
+# the five instances of ip_tables
obj-$(CONFIG_IP_NF_FILTER) += iptable_filter.o
obj-$(CONFIG_IP_NF_MANGLE) += iptable_mangle.o
obj-$(CONFIG_NF_NAT) += iptable_nat.o
obj-$(CONFIG_IP_NF_RAW) += iptable_raw.o
+obj-$(CONFIG_IP_NF_CONTROL) += iptable_control.o
# matches
obj-$(CONFIG_IP_NF_MATCH_ADDRTYPE) += ipt_addrtype.o
Index: netfilter-2.6.25-rc3/net/ipv4/netfilter/iptable_control.c
===================================================================
--- /dev/null
+++ netfilter-2.6.25-rc3/net/ipv4/netfilter/iptable_control.c
@@ -0,0 +1,167 @@
+/*
+ * 'control' table, used for controlling operations such as bind() or connect()
+ *
+ * Copyright (C) 2007 Paul Menage <[email protected]>
+ * Cloned from code originally by Jozsef Kadlecsik <[email protected]>
+ */
+#include <linux/module.h>
+#include <linux/netfilter_ipv4/ip_tables.h>
+#include <net/ip.h>
+#include <net/tcp.h>
+
+#define CONTROL_VALID_HOOKS (1 << NF_INET_LOCAL_OUT)
+
+static struct
+{
+ struct ipt_replace repl;
+ struct ipt_standard entries[1];
+ struct ipt_error term;
+} initial_table __net_initdata = {
+ .repl = {
+ .name = "control",
+ .valid_hooks = CONTROL_VALID_HOOKS,
+ .num_entries = 2,
+ .size = sizeof(struct ipt_standard) * 1 + sizeof(struct ipt_error),
+ .hook_entry = {
+ [NF_INET_LOCAL_OUT] = 0,
+ },
+ .underflow = {
+ [NF_INET_LOCAL_OUT] = 0,
+ },
+ },
+ .entries = {
+ IPT_STANDARD_INIT(NF_ACCEPT), /* CONTROL */
+ },
+ .term = IPT_ERROR_INIT, /* ERROR */
+};
+
+static struct xt_table packet_control = {
+ .name = "control",
+ .valid_hooks = CONTROL_VALID_HOOKS,
+ .lock = RW_LOCK_UNLOCKED,
+ .me = THIS_MODULE,
+ .af = AF_INET,
+};
+
+/* The work comes in here from netfilter.c. */
+static unsigned int
+ipt_hook(unsigned int hook,
+ struct sk_buff *skb,
+ const struct net_device *in,
+ const struct net_device *out,
+ int (*okfn)(struct sk_buff *))
+{
+ /* We don't actually want to do anything in the real hook. (We
+ * should actually have a separate hook, but handling that
+ * from userspace is non-trivial. */
+ return NF_ACCEPT;
+}
+
+int ipt_control_check(int protocol,
+ struct inet_sock *sock,
+ struct sockaddr_in *remote)
+{
+ int err = 0;
+ struct sk_buff *skb = alloc_skb(MAX_TCP_HEADER, GFP_USER);
+ struct iphdr *iph;
+ struct tcphdr *th;
+ int verdict;
+ if (skb == NULL) {
+ return -ENOMEM;
+ }
+
+ /* Allow the "owner" module to work */
+ skb->sk = &sock->sk;
+
+ /* Set up a fake TCP/UDP packet */
+ iph = (struct iphdr *)skb_put(skb, sizeof(*iph));
+ skb_reset_network_header(skb);
+ memset(iph, 0, sizeof(*iph));
+ iph->version = 4;
+ iph->protocol = protocol;
+ iph->saddr = sock->rcv_saddr;
+ iph->daddr = remote->sin_addr.s_addr;
+ iph->ihl = sizeof(*iph) / 4;
+ iph->tot_len = sizeof(*iph) + sizeof(*th);
+ th = (struct tcphdr *)skb_put(skb, sizeof(*th));
+ memset(th, 0, sizeof(*th));
+ skb_set_transport_header(skb, sizeof(*iph));
+ th->source = sock->num;
+ th->dest = remote->sin_port;
+
+#if 0
+ printk(KERN_ERR "Calling ipt_do_table for %08x:%04x -> %08x:%04x. iph = %p, th = %p, data = %p, neth = %p, transh = %p\n",
+ sock->rcv_saddr, sock->num, remote->sin_addr.s_addr, remote->sin_port, iph, th, skb->data, skb->network_header, skb->transport_header);
+#endif
+ verdict = ipt_do_table(skb, NF_INET_LOCAL_OUT, NULL, NULL, init_net.ipv4.iptable_control);
+
+ //printk(KERN_ERR "Verdict = %d\n", verdict);
+ if (verdict != NF_ACCEPT) {
+ err = -EPERM;
+ }
+ kfree_skb(skb);
+ return err;
+}
+
+static struct nf_hook_ops control_ipt_ops[] __read_mostly = {
+ {
+ .hook = ipt_hook,
+ .pf = PF_INET,
+ .hooknum = NF_INET_LOCAL_OUT,
+ .owner = THIS_MODULE,
+ },
+};
+
+static int __net_init iptable_control_net_init(struct net *net)
+{
+ /* Register table */
+ net->ipv4.iptable_control =
+ ipt_register_table(net, &packet_control, &initial_table.repl);
+ if (IS_ERR(net->ipv4.iptable_control)) {
+ int errno = PTR_ERR(net->ipv4.iptable_control);
+ printk(KERN_ERR "Failed to register control table: %d\n",
+ errno);
+ return errno;
+ }
+ return 0;
+}
+
+static void __net_exit iptable_control_net_exit(struct net *net)
+{
+ ipt_unregister_table(net->ipv4.iptable_control);
+}
+
+static struct pernet_operations iptable_control_net_ops = {
+ .init = iptable_control_net_init,
+ .exit = iptable_control_net_exit,
+};
+
+static int __init iptable_control_init(void)
+{
+ int ret;
+
+ ret = register_pernet_subsys(&iptable_control_net_ops);
+ if (ret < 0)
+ return ret;
+
+ /* Register hooks */
+ ret = nf_register_hooks(control_ipt_ops, ARRAY_SIZE(control_ipt_ops));
+ if (ret < 0)
+ goto cleanup_table;
+
+ return ret;
+
+ cleanup_table:
+ unregister_pernet_subsys(&iptable_control_net_ops);
+ return ret;
+}
+
+static void __exit iptable_control_fini(void)
+{
+ nf_unregister_hooks(control_ipt_ops, ARRAY_SIZE(control_ipt_ops));
+ unregister_pernet_subsys(&iptable_control_net_ops);
+}
+
+module_init(iptable_control_init);
+module_exit(iptable_control_fini);
+MODULE_LICENSE("GPL");
Index: netfilter-2.6.25-rc3/include/linux/netfilter_ipv4.h
===================================================================
--- netfilter-2.6.25-rc3.orig/include/linux/netfilter_ipv4.h
+++ netfilter-2.6.25-rc3/include/linux/netfilter_ipv4.h
@@ -48,6 +48,7 @@
#define NF_IP_LOCAL_OUT 3
/* Packets about to hit the wire. */
#define NF_IP_POST_ROUTING 4
+//#define NF_IP_CONTROL 5
#define NF_IP_NUMHOOKS 5
#endif /* ! __KERNEL__ */
@@ -79,6 +80,12 @@ extern int ip_route_me_harder(struct sk_
extern int ip_xfrm_me_harder(struct sk_buff *skb);
extern __sum16 nf_ip_checksum(struct sk_buff *skb, unsigned int hook,
unsigned int dataoff, u_int8_t protocol);
+
+struct inet_sock;
+int ipt_control_check(int protocol,
+ struct inet_sock *sock,
+ struct sockaddr_in *remote);
+
#endif /*__KERNEL__*/
#endif /*__LINUX_IP_NETFILTER_H*/
Index: netfilter-2.6.25-rc3/net/ipv4/netfilter/ip_tables.c
===================================================================
--- netfilter-2.6.25-rc3.orig/net/ipv4/netfilter/ip_tables.c
+++ netfilter-2.6.25-rc3/net/ipv4/netfilter/ip_tables.c
@@ -32,9 +32,9 @@ MODULE_LICENSE("GPL");
MODULE_AUTHOR("Netfilter Core Team <[email protected]>");
MODULE_DESCRIPTION("IPv4 packet filter");
-/*#define DEBUG_IP_FIREWALL*/
+#define DEBUG_IP_FIREWALL
/*#define DEBUG_ALLOW_ALL*/ /* Useful for remote debugging */
-/*#define DEBUG_IP_FIREWALL_USER*/
+#define DEBUG_IP_FIREWALL_USER
#ifdef DEBUG_IP_FIREWALL
#define dprintf(format, args...) printk(format , ## args)
@@ -231,6 +231,9 @@ static const char *const hooknames[] = {
[NF_INET_FORWARD] = "FORWARD",
[NF_INET_LOCAL_OUT] = "OUTPUT",
[NF_INET_POST_ROUTING] = "POSTROUTING",
+#ifdef CONFIG_IP_NF_CONTROL
+ [NF_INET_CONTROL] = "CONTROL",
+#endif
};
enum nf_ip_trace_comments {
Index: netfilter-2.6.25-rc3/net/ipv4/tcp_ipv4.c
===================================================================
--- netfilter-2.6.25-rc3.orig/net/ipv4/tcp_ipv4.c
+++ netfilter-2.6.25-rc3/net/ipv4/tcp_ipv4.c
@@ -82,6 +82,8 @@
#include <linux/crypto.h>
#include <linux/scatterlist.h>
+#include <linux/netfilter_ipv4.h>
+
int sysctl_tcp_tw_reuse __read_mostly;
int sysctl_tcp_low_latency __read_mostly;
@@ -166,6 +168,10 @@ int tcp_v4_connect(struct sock *sk, stru
if (usin->sin_family != AF_INET)
return -EAFNOSUPPORT;
+ if ((err = ipt_control_check(IPPROTO_TCP, inet, usin))) {
+ return err;
+ }
+
nexthop = daddr = usin->sin_addr.s_addr;
if (inet->opt && inet->opt->srr) {
if (!daddr)
Index: netfilter-2.6.25-rc3/net/netfilter/xt_tcpudp.c
===================================================================
--- netfilter-2.6.25-rc3.orig/net/netfilter/xt_tcpudp.c
+++ netfilter-2.6.25-rc3/net/netfilter/xt_tcpudp.c
@@ -19,6 +19,7 @@ MODULE_ALIAS("ipt_tcp");
MODULE_ALIAS("ip6t_udp");
MODULE_ALIAS("ip6t_tcp");
+#define DEBUG_IP_FIREWALL_USER
#ifdef DEBUG_IP_FIREWALL_USER
#define duprintf(format, args...) printk(format , ## args)
#else
@@ -75,6 +76,8 @@ tcp_mt(const struct sk_buff *skb, const
struct tcphdr _tcph, *th;
const struct xt_tcp *tcpinfo = matchinfo;
+ printk(KERN_ERR "In tcp_mt\n");
+
if (offset) {
/* To quote Alan:
@@ -93,6 +96,8 @@ tcp_mt(const struct sk_buff *skb, const
#define FWINVTCP(bool, invflg) ((bool) ^ !!(tcpinfo->invflags & (invflg)))
th = skb_header_pointer(skb, protoff, sizeof(_tcph), &_tcph);
+
+ printk(KERN_ERR "th=%p\n", th);
if (th == NULL) {
/* We've been asked to examine this packet, and we
can't. Hence, no choice but to drop. */
@@ -101,18 +106,29 @@ tcp_mt(const struct sk_buff *skb, const
return false;
}
+ duprintf("Checking source ports\n");
+
if (!port_match(tcpinfo->spts[0], tcpinfo->spts[1],
ntohs(th->source),
!!(tcpinfo->invflags & XT_TCP_INV_SRCPT)))
return false;
+
+ duprintf("Checking dest ports %d - %d vs %d\n", tcpinfo->dpts[0], tcpinfo->dpts[1], ntohs(th->dest));
+
if (!port_match(tcpinfo->dpts[0], tcpinfo->dpts[1],
ntohs(th->dest),
!!(tcpinfo->invflags & XT_TCP_INV_DSTPT)))
return false;
+
+ duprintf("Checking flags\n");
+
if (!FWINVTCP((((unsigned char *)th)[13] & tcpinfo->flg_mask)
== tcpinfo->flg_cmp,
XT_TCP_INV_FLAGS))
return false;
+
+ duprintf("Checking options\n");
+
if (tcpinfo->option) {
if (th->doff * 4 < sizeof(_tcph)) {
*hotdrop = true;
_______________________________________________
Containers mailing list
[email protected]
https://lists.linux-foundation.org/mailman/listinfo/containers
_______________________________________________
Devel mailing list
[email protected]
https://openvz.org/mailman/listinfo/devel