Hi,
Linux kernel recently got a bugfix 1fde6f21d90f ("proc: fix /proc/net/* after
setns(2)"),
but unfortunately it only solves the issue for procfs net file inodes so they
get correct
content after a process change namespace.
Checking on a v5.2-rc6 kernel :
sh-4.4# sh netns_procfs_test.sh
[ 16.451640] ip (108) used greatest stack depth: 12264 bytes left
Before net namespace change :
==== /proc/net/dev ====
Inter-| Receive | Transmit
face |bytes packets errs drop fifo frame compressed multicast|bytes packd
eth0: 0 0 0 0 0 0 0 0 0 0
lo: 0 0 0 0 0 0 0 0 0 0
if_default: 0 0 0 0 0 0 0 0 0 0
sit0: 0 0 0 0 0 0 0 0 0 0
==== files in /proc/net/dev_snmp6 ====
.
..
lo
eth0
sit0
if_default
After net namespace change :
==== /proc/net/dev ====
Inter-| Receive | Transmit
face |bytes packets errs drop fifo frame compressed multicast|bytes packd
sit0: 0 0 0 0 0 0 0 0 0 0
if_other: 0 0 0 0 0 0 0 0 0 0
lo: 0 0 0 0 0 0 0 0 0 0
==== files in /proc/net/dev_snmp6 ====
.
..
lo
eth0
sit0
if_default
This kernel is fixed for file inode bug but suffers dir inode bug
sh-4.4#
As can be seen above, after the namespace change we see new content in procfs
net/dev
but the listing of procfs net/dev_snmp6 still shows entries from previous
namespace.
We need to apply similar bugfix for directory creation in procfs net as the
mentioned
commit do for files.
Checking on a v5.2-rc6 kernel with bugfixes :
sh-4.4# sh netns_procfs_test.sh
[ 745.993882] ip (108) used greatest stack depth: 12264 bytes left
Before net namespace change :
==== /proc/net/dev ====
Inter-| Receive | Transmit
face |bytes packets errs drop fifo frame compressed multicast|bytes packd
lo: 0 0 0 0 0 0 0 0 0 0
sit0: 0 0 0 0 0 0 0 0 0 0
eth0: 0 0 0 0 0 0 0 0 0 0
if_default: 0 0 0 0 0 0 0 0 0 0
==== files in /proc/net/dev_snmp6 ====
.
..
lo
eth0
sit0
if_default
After net namespace change :
==== /proc/net/dev ====
Inter-| Receive | Transmit
face |bytes packets errs drop fifo frame compressed multicast|bytes packd
if_other: 0 0 0 0 0 0 0 0 0 0
sit0: 0 0 0 0 0 0 0 0 0 0
lo: 0 0 0 0 0 0 0 0 0 0
==== files in /proc/net/dev_snmp6 ====
.
..
lo
sit0
if_other
This kernel is fixed for both file and dir inode bug
sh-4.4#
Here we see that the directory procfs net/dev_snmp6 is updated according to the
namespace
change.
The fix is two commits, first updates proc_net_mkdir() entries similar to
mentioned patch
and second one is changing net/ipv6/proc.c to use proc_net_mkdir() instead.
Speaking about proc_net_mkdir()...
[phallsma@arn-phallsma-l3 linux]$ git grep proc_mkdir | grep proc_net
drivers/isdn/divert/divert_procfs.c: isdn_proc_entry = proc_mkdir("isdn",
init_net.proc_net);
drivers/isdn/hysdn/hysdn_procconf.c: hysdn_proc_entry =
proc_mkdir(PROC_SUBDIR_NAME, init_net.proc_net);
drivers/net/bonding/bond_procfs.c: bn->proc_dir =
proc_mkdir(DRV_NAME, bn->net->proc_net);
drivers/net/wireless/intel/ipw2x00/libipw_module.c: libipw_proc =
proc_mkdir(DRV_PROCNAME, init_net.proc_net);
drivers/net/wireless/intersil/hostap/hostap_main.c: hostap_proc =
proc_mkdir("hostap", init_net.proc_net);
drivers/staging/rtl8192u/ieee80211/ieee80211_module.c: ieee80211_proc =
proc_mkdir(DRV_NAME, init_net.proc_net);
drivers/staging/rtl8192u/r8192U_core.c: rtl8192_proc =
proc_mkdir(RTL819XU_MODULE_NAME, init_net.proc_net);
net/appletalk/atalk_proc.c: if (!proc_mkdir("atalk", init_net.proc_net))
net/core/pktgen.c: pn->proc_dir = proc_mkdir(PG_PROC_DIR,
pn->net->proc_net);
net/ipv4/netfilter/ipt_CLUSTERIP.c: cn->procdir =
proc_mkdir("ipt_CLUSTERIP", net->proc_net);
net/ipv6/proc.c: net->mib.proc_net_devsnmp6 = proc_mkdir("dev_snmp6",
net->proc_net);
net/llc/llc_proc.c: llc_proc_dir = proc_mkdir("llc", init_net.proc_net);
net/netfilter/xt_hashlimit.c: hashlimit_net->ipt_hashlimit =
proc_mkdir("ipt_hashlimit", net->proc_net);
net/netfilter/xt_hashlimit.c: hashlimit_net->ip6t_hashlimit =
proc_mkdir("ip6t_hashlimit", net->proc_net);
net/netfilter/xt_recent.c: recent_net->xt_recent = proc_mkdir("xt_recent",
net->proc_net);
net/sunrpc/cache.c: cd->procfs = proc_mkdir(cd->name, sn->proc_net_rpc);
net/sunrpc/stats.c: sn->proc_net_rpc = proc_mkdir("rpc", net->proc_net);
net/x25/x25_proc.c: if (!proc_mkdir("x25", init_net.proc_net))
[phallsma@arn-phallsma-l3 linux]$
IMHO all code should use proc_net_mkdir() instead of proc_mkdir() for procfs
net entries,
or am I missing something here? If not possible to changeover to
proc_net_mkdir() there
is a need for duplicating my first commit at those places. I'm fixing the one
for dev_snmp6()
which is what I've tested as well.
Also wonder if it all is optimal. Wouldn't it be better to re-enable dcache for
these (files as well as directories)
and in addition have kernel drop dcache in case of a namespace change?
Attaching patches and app/script for verifying.
I'm not on the mailing lists so please keep me on CC in case of responding.
Best regards,
Per
--
Per Hallsmark [email protected]
Senior Member Technical Staff Wind River AB
Mobile: +46733249340 Office: +46859461127
Torshamnsgatan 27 164 40 Kista
From 66769af1f931124da1aa04b34350b3623a9003e3 Mon Sep 17 00:00:00 2001 From: Per Hallsmark <[email protected]> Date: Wed, 19 Jun 2019 15:46:39 +0200 Subject: [PATCH 1/2] Make directory inodes in /proc/net adhere to net namespace This patch fixes /proc/net directory inodes in similar way as commit 1fde6f21d90f ("proc: fix /proc/net/* after setns(2)") fixes file inodes. Signed-off-by: Per Hallsmark <[email protected]> --- fs/proc/proc_net.c | 12 ++++++++++++ include/linux/proc_fs.h | 7 ++----- 2 files changed, 14 insertions(+), 5 deletions(-) diff --git a/fs/proc/proc_net.c b/fs/proc/proc_net.c index 76ae278df1c4..1c4231a60c9e 100644 --- a/fs/proc/proc_net.c +++ b/fs/proc/proc_net.c @@ -55,6 +55,18 @@ static void pde_force_lookup(struct proc_dir_entry *pde) pde->proc_dops = &proc_net_dentry_ops; } +struct proc_dir_entry *proc_net_mkdir(struct net *net, const char *name, + struct proc_dir_entry *parent) +{ + struct proc_dir_entry *pde; + + pde = proc_mkdir_data(name, 0, parent, net); + pde->proc_dops = &proc_net_dentry_ops; + + return pde; +} +EXPORT_SYMBOL(proc_net_mkdir); + static int seq_open_net(struct inode *inode, struct file *file) { unsigned int state_size = PDE(inode)->state_size; diff --git a/include/linux/proc_fs.h b/include/linux/proc_fs.h index 52a283ba0465..608b8a10e338 100644 --- a/include/linux/proc_fs.h +++ b/include/linux/proc_fs.h @@ -124,11 +124,8 @@ static inline struct pid *tgid_pidfd_to_pid(const struct file *file) struct net; -static inline struct proc_dir_entry *proc_net_mkdir( - struct net *net, const char *name, struct proc_dir_entry *parent) -{ - return proc_mkdir_data(name, 0, parent, net); -} +extern struct proc_dir_entry *proc_net_mkdir( + struct net *net, const char *name, struct proc_dir_entry *parent); struct ns_common; int open_related_ns(struct ns_common *ns, -- 2.20.1
From 6925a5408ffce37dc71871c2ee05db96e60cae0d Mon Sep 17 00:00:00 2001 From: Per Hallsmark <[email protected]> Date: Thu, 20 Jun 2019 10:21:31 +0200 Subject: [PATCH 2/2] net: Directories created in /proc/net should be done via proc_net_mkdir() Directories created in /proc/net should be done via proc_net_mkdir() Signed-off-by: Per Hallsmark <[email protected]> --- net/ipv6/proc.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/net/ipv6/proc.c b/net/ipv6/proc.c index 4a8da679866e..3728c57e93dc 100644 --- a/net/ipv6/proc.c +++ b/net/ipv6/proc.c @@ -282,7 +282,8 @@ static int __net_init ipv6_proc_init_net(struct net *net) snmp6_seq_show, NULL)) goto proc_snmp6_fail; - net->mib.proc_net_devsnmp6 = proc_mkdir("dev_snmp6", net->proc_net); + net->mib.proc_net_devsnmp6 = proc_net_mkdir(net, + "dev_snmp6", net->proc_net); if (!net->mib.proc_net_devsnmp6) goto proc_dev_snmp6_fail; return 0; -- 2.20.1
/* Verifier of /proc/net and net namespace consistency Author : Per Hallsmark <[email protected]> First setup a net namespace and add a veth so we get something to test with ip netns add netns_other ip link add if_default type veth peer name if_other ip link set if_other netns netns_other Run test app without first reading some info from /proc/net in default namespace ./netns_procfs_test Run test app again, this time reading some info from /proc/net in default namespace before changing net namespace ./netns_procfs_test cached On a bugfree kernel, the output after "After net namespace change" should be same as in first test run, meaning we should see if_other in the /proc/net/dev dump and we should see if_other in the directory. The issue is not visible if it is different processes doing the readings since then the inode's aren't cached. */ #define _GNU_SOURCE #include <sched.h> #include <stdio.h> #include <stdlib.h> #include <unistd.h> #include <errno.h> #include <string.h> #include <assert.h> #include <fcntl.h> #include <errno.h> #include <limits.h> #include <sys/types.h> #include <sys/mount.h> #include <errno.h> #include <dirent.h> #define NETNS_RUN_DIR "/var/run/netns" static int get_nsfd(const char *ns) { char path[strlen(NETNS_RUN_DIR) + strlen(ns) + 2]; int fd; snprintf(path, sizeof(path), "%s/%s", NETNS_RUN_DIR, ns); fd = open(path, O_RDONLY, 0); return fd; } static int dump_file(const char *fn, const int checkforbug) { int fd; char buf[512]; ssize_t len; char *wholebuf = NULL; ssize_t wholelen = 0; int status = 0; printf("==== %s ====\n", fn); fd = open(fn, O_RDONLY); if (fd == -1) { printf("Couldn't open %s (%s)\n", fn, strerror(errno)); return -1; } do { len = read(fd, buf, sizeof(buf)-1); buf[len] = 0; wholebuf = realloc(wholebuf, wholelen + len + 1); sprintf(&wholebuf[wholelen], "%s", buf); wholelen += len; } while (len > 0); printf("%s\n", wholebuf); if (checkforbug && strstr(wholebuf, "if_other")) { status = 1; } free(wholebuf); close(fd); return status; } static int dump_dir(const char *dn, const int checkforbug) { DIR *dir; struct dirent *dir_entry; int status = 0; printf("==== files in %s ====\n", dn); dir = opendir(dn); if (!dir) { printf("Couldn't open %s (%s)\n", dn, strerror(errno)); return -1; } while ((dir_entry = readdir(dir)) != NULL) { printf(" %s", dir_entry->d_name); if (checkforbug && !strcmp(dir_entry->d_name, "if_other")) { status = 2; } printf("\n"); } closedir(dir); return status; } static int switch_ns(const char *ns) { int nsfd = get_nsfd(ns); if (setns(nsfd, CLONE_NEWNET) < 0) { fprintf(stderr, "Unable to switch to namespace(fd=%d): %s.\n", nsfd, strerror(errno)); exit(-1); } return 0; } int main (int argc, char **argv) { int bugcheck = 0; int checkforbug = 1; if (argc == 2 && !strcmp(argv[1], "cached")) { // access /proc/net a bit so we dcache file and // directory inodes printf("Before net namespace change :\n"); dump_file("/proc/net/dev", 0); dump_dir("/proc/net/dev_snmp6", 0); } else { // we cannot check for bug if not run with cached arg checkforbug = 0; } // change namespace switch_ns("netns_other"); // access /proc/net again, if all works we should see // new info because of net namespace change printf("\n\nAfter net namespace change :\n"); bugcheck += dump_file("/proc/net/dev", 1); bugcheck += dump_dir("/proc/net/dev_snmp6", 1); if (!checkforbug) return 0; switch (bugcheck) { case 1 : printf("This kernel is fixed for file inode bug but suffers dir inode bug\n"); break; case 3 : printf("This kernel is fixed for both file and dir inode bug\n"); break; default : printf("This kernel suffers both file and dir inode bug\n"); } return bugcheck != 3; }
netns_procfs_test.sh
Description: netns_procfs_test.sh

