Re: RFC: ipfw nat VIMAGE improvements
On Thu, Aug 15, 2013 at 12:25:34AM +0200, Marko Zec wrote: Anyhow, this looks fine to me... Thanks, committed as r254776. -- Mikolaj Golub ___ freebsd-virtualization@freebsd.org mailing list http://lists.freebsd.org/mailman/listinfo/freebsd-virtualization To unsubscribe, send any mail to freebsd-virtualization-unsubscr...@freebsd.org
Re: RFC: carp(4): improved VIMAGE support
On Tue, Aug 13, 2013 at 11:26:31AM +0400, Gleb Smirnoff wrote: On Mon, Aug 12, 2013 at 10:45:17PM +0300, Mikolaj Golub wrote: M On Mon, Aug 12, 2013 at 02:16:15PM +0400, Gleb Smirnoff wrote: M On Sun, Aug 11, 2013 at 11:08:35PM +0300, Mikolaj Golub wrote: M M Hi, M M M M I would like to virtualize carp(4) variables to have per vnet control. M M M M Any comments, objections? M M No objections! M M Andrey pointed out (on irc) that carpstats was missing virtualization M too. Here is an updated patch. Thanks. As before, no objections from me. Thanks. Committed as r254292. -- Mikolaj Golub ___ freebsd-virtualization@freebsd.org mailing list http://lists.freebsd.org/mailman/listinfo/freebsd-virtualization To unsubscribe, send any mail to freebsd-virtualization-unsubscr...@freebsd.org
Re: RFC: carp(4): improved VIMAGE support
On Mon, Aug 12, 2013 at 02:16:15PM +0400, Gleb Smirnoff wrote: On Sun, Aug 11, 2013 at 11:08:35PM +0300, Mikolaj Golub wrote: M Hi, M M I would like to virtualize carp(4) variables to have per vnet control. M M Any comments, objections? No objections! Andrey pointed out (on irc) that carpstats was missing virtualization too. Here is an updated patch. -- Mikolaj Golub commit ac71429a70862ca75a614bb798dcfb62e67faca3 Author: Mikolaj Golub troc...@freebsd.org Date: Tue Aug 6 23:47:31 2013 +0300 Virtualize carp(4) variables to have per vnet control. diff --git a/sys/netinet/ip_carp.c b/sys/netinet/ip_carp.c index 9228a8f..436385d 100644 --- a/sys/netinet/ip_carp.c +++ b/sys/netinet/ip_carp.c @@ -187,48 +187,59 @@ static int proto_reg[] = {-1, -1}; * dereferencing our function pointers. */ -static int carp_allow = 1; /* Accept incoming CARP packets. */ -static int carp_preempt = 0; /* Preempt slower nodes. */ -static int carp_log = 1; /* Log level. */ -static int carp_demotion = 0; /* Global advskew demotion. */ -static int carp_senderr_adj = CARP_MAXSKEW; /* Send error demotion factor */ -static int carp_ifdown_adj = CARP_MAXSKEW; /* Iface down demotion factor */ +/* Accept incoming CARP packets. */ +static VNET_DEFINE(int, carp_allow) = 1; +#define V_carp_allow VNET(carp_allow) + +/* Preempt slower nodes. */ +static VNET_DEFINE(int, carp_preempt) = 0; +#define V_carp_preempt VNET(carp_preempt) + +/* Log level. */ +static VNET_DEFINE(int, carp_log) = 1; +#define V_carp_log VNET(carp_log) + +/* Global advskew demotion. */ +static VNET_DEFINE(int, carp_demotion) = 0; +#define V_carp_demotion VNET(carp_demotion) + +/* Send error demotion factor. */ +static VNET_DEFINE(int, carp_senderr_adj) = CARP_MAXSKEW; +#define V_carp_senderr_adj VNET(carp_senderr_adj) + +/* Iface down demotion factor. */ +static VNET_DEFINE(int, carp_ifdown_adj) = CARP_MAXSKEW; +#define V_carp_ifdown_adj VNET(carp_ifdown_adj) + static int carp_demote_adj_sysctl(SYSCTL_HANDLER_ARGS); SYSCTL_NODE(_net_inet, IPPROTO_CARP, carp, CTLFLAG_RW, 0, CARP); -SYSCTL_INT(_net_inet_carp, OID_AUTO, allow, CTLFLAG_RW, carp_allow, 0, -Accept incoming CARP packets); -SYSCTL_INT(_net_inet_carp, OID_AUTO, preempt, CTLFLAG_RW, carp_preempt, 0, -High-priority backup preemption mode); -SYSCTL_INT(_net_inet_carp, OID_AUTO, log, CTLFLAG_RW, carp_log, 0, -CARP log level); -SYSCTL_PROC(_net_inet_carp, OID_AUTO, demotion, CTLTYPE_INT|CTLFLAG_RW, +SYSCTL_VNET_INT(_net_inet_carp, OID_AUTO, allow, CTLFLAG_RW, +VNET_NAME(carp_allow), 0, Accept incoming CARP packets); +SYSCTL_VNET_INT(_net_inet_carp, OID_AUTO, preempt, CTLFLAG_RW, +VNET_NAME(carp_preempt), 0, High-priority backup preemption mode); +SYSCTL_VNET_INT(_net_inet_carp, OID_AUTO, log, CTLFLAG_RW, +VNET_NAME(carp_log), 0, CARP log level); +SYSCTL_VNET_PROC(_net_inet_carp, OID_AUTO, demotion, CTLTYPE_INT|CTLFLAG_RW, 0, 0, carp_demote_adj_sysctl, I, Adjust demotion factor (skew of advskew)); -SYSCTL_INT(_net_inet_carp, OID_AUTO, senderr_demotion_factor, CTLFLAG_RW, -carp_senderr_adj, 0, Send error demotion factor adjustment); -SYSCTL_INT(_net_inet_carp, OID_AUTO, ifdown_demotion_factor, CTLFLAG_RW, -carp_ifdown_adj, 0, Interface down demotion factor adjustment); +SYSCTL_VNET_INT(_net_inet_carp, OID_AUTO, senderr_demotion_factor, CTLFLAG_RW, +VNET_NAME(carp_senderr_adj), 0, Send error demotion factor adjustment); +SYSCTL_VNET_INT(_net_inet_carp, OID_AUTO, ifdown_demotion_factor, CTLFLAG_RW, +VNET_NAME(carp_ifdown_adj), 0, +Interface down demotion factor adjustment); + +VNET_PCPUSTAT_DEFINE(struct carpstats, carpstats); +VNET_PCPUSTAT_SYSINIT(carpstats); +VNET_PCPUSTAT_SYSUNINIT(carpstats); -static counter_u64_t carpstats[sizeof(struct carpstats) / sizeof(uint64_t)]; #define CARPSTATS_ADD(name, val) \ -counter_u64_add(carpstats[offsetof(struct carpstats, name) / \ +counter_u64_add(VNET(carpstats)[offsetof(struct carpstats, name) / \ sizeof(uint64_t)], (val)) #define CARPSTATS_INC(name) CARPSTATS_ADD(name, 1) -static int -carpstats_sysctl(SYSCTL_HANDLER_ARGS) -{ - struct carpstats s; - - COUNTER_ARRAY_COPY(carpstats, s, sizeof(s) / sizeof(uint64_t)); - if (req-newptr) - COUNTER_ARRAY_ZERO(carpstats, sizeof(s) / sizeof(uint64_t)); - return (SYSCTL_OUT(req, s, sizeof(s))); -} -SYSCTL_PROC(_net_inet_carp, OID_AUTO, stats, CTLTYPE_OPAQUE | CTLFLAG_RW, -NULL, 0, carpstats_sysctl, I, -CARP statistics (struct carpstats, netinet/ip_carp.h)); +SYSCTL_VNET_PCPUSTAT(_net_inet_carp, OID_AUTO, stats, struct carpstats, +carpstats, CARP statistics (struct carpstats, netinet/ip_carp.h)); #define CARP_LOCK_INIT(sc) mtx_init((sc)-sc_mtx, carp_softc, \ NULL, MTX_DEF) @@ -251,12 +262,12 @@ SYSCTL_PROC(_net_inet_carp, OID_AUTO, stats, CTLTYPE_OPAQUE | CTLFLAG_RW, } while (0) #define CARP_LOG(...) do {\ - if (carp_log 0)\ + if (V_carp_log 0)\ log(LOG_INFO, carp: __VA_ARGS__); \ } while (0
RFC: carp(4): improved VIMAGE support
Hi, I would like to virtualize carp(4) variables to have per vnet control. Any comments, objections? -- Mikolaj Golub commit 646e0b161ae7782259335cec652e178652ed7af3 Author: Mikolaj Golub troc...@freebsd.org Date: Tue Aug 6 23:47:31 2013 +0300 Virtualize carp(4) variables for per vnet control. diff --git a/sys/netinet/ip_carp.c b/sys/netinet/ip_carp.c index 9228a8f..2ac39b7 100644 --- a/sys/netinet/ip_carp.c +++ b/sys/netinet/ip_carp.c @@ -187,28 +187,47 @@ static int proto_reg[] = {-1, -1}; * dereferencing our function pointers. */ -static int carp_allow = 1; /* Accept incoming CARP packets. */ -static int carp_preempt = 0; /* Preempt slower nodes. */ -static int carp_log = 1; /* Log level. */ -static int carp_demotion = 0; /* Global advskew demotion. */ -static int carp_senderr_adj = CARP_MAXSKEW; /* Send error demotion factor */ -static int carp_ifdown_adj = CARP_MAXSKEW; /* Iface down demotion factor */ +/* Accept incoming CARP packets. */ +static VNET_DEFINE(int, carp_allow) = 1; +#define V_carp_allow VNET(carp_allow) + +/* Preempt slower nodes. */ +static VNET_DEFINE(int, carp_preempt) = 0; +#define V_carp_preempt VNET(carp_preempt) + +/* Log level. */ +static VNET_DEFINE(int, carp_log) = 1; +#define V_carp_log VNET(carp_log) + +/* Global advskew demotion. */ +static VNET_DEFINE(int, carp_demotion) = 0; +#define V_carp_demotion VNET(carp_demotion) + +/* Send error demotion factor. */ +static VNET_DEFINE(int, carp_senderr_adj) = CARP_MAXSKEW; +#define V_carp_senderr_adj VNET(carp_senderr_adj) + +/* Iface down demotion factor. */ +static VNET_DEFINE(int, carp_ifdown_adj) = CARP_MAXSKEW; +#define V_carp_ifdown_adj VNET(carp_ifdown_adj) + static int carp_demote_adj_sysctl(SYSCTL_HANDLER_ARGS); SYSCTL_NODE(_net_inet, IPPROTO_CARP, carp, CTLFLAG_RW, 0, CARP); -SYSCTL_INT(_net_inet_carp, OID_AUTO, allow, CTLFLAG_RW, carp_allow, 0, -Accept incoming CARP packets); -SYSCTL_INT(_net_inet_carp, OID_AUTO, preempt, CTLFLAG_RW, carp_preempt, 0, -High-priority backup preemption mode); -SYSCTL_INT(_net_inet_carp, OID_AUTO, log, CTLFLAG_RW, carp_log, 0, -CARP log level); -SYSCTL_PROC(_net_inet_carp, OID_AUTO, demotion, CTLTYPE_INT|CTLFLAG_RW, +SYSCTL_VNET_INT(_net_inet_carp, OID_AUTO, allow, CTLFLAG_RW, +VNET_NAME(carp_allow), 0, Accept incoming CARP packets); +SYSCTL_VNET_INT(_net_inet_carp, OID_AUTO, preempt, CTLFLAG_RW, +VNET_NAME(carp_preempt), 0, High-priority backup preemption mode); +SYSCTL_VNET_INT(_net_inet_carp, OID_AUTO, log, CTLFLAG_RW, +VNET_NAME(carp_log), 0, CARP log level); +SYSCTL_VNET_PROC(_net_inet_carp, OID_AUTO, demotion, CTLTYPE_INT|CTLFLAG_RW, 0, 0, carp_demote_adj_sysctl, I, Adjust demotion factor (skew of advskew)); -SYSCTL_INT(_net_inet_carp, OID_AUTO, senderr_demotion_factor, CTLFLAG_RW, -carp_senderr_adj, 0, Send error demotion factor adjustment); -SYSCTL_INT(_net_inet_carp, OID_AUTO, ifdown_demotion_factor, CTLFLAG_RW, -carp_ifdown_adj, 0, Interface down demotion factor adjustment); +SYSCTL_VNET_INT(_net_inet_carp, OID_AUTO, senderr_demotion_factor, CTLFLAG_RW, +VNET_NAME(carp_senderr_adj), 0, Send error demotion factor adjustment); +SYSCTL_VNET_INT(_net_inet_carp, OID_AUTO, ifdown_demotion_factor, CTLFLAG_RW, +VNET_NAME(carp_ifdown_adj), 0, +Interface down demotion factor adjustment); static counter_u64_t carpstats[sizeof(struct carpstats) / sizeof(uint64_t)]; #define CARPSTATS_ADD(name, val) \ @@ -251,12 +270,12 @@ SYSCTL_PROC(_net_inet_carp, OID_AUTO, stats, CTLTYPE_OPAQUE | CTLFLAG_RW, } while (0) #define CARP_LOG(...) do {\ - if (carp_log 0)\ + if (V_carp_log 0)\ log(LOG_INFO, carp: __VA_ARGS__); \ } while (0) #define CARP_DEBUG(...) do {\ - if (carp_log 1)\ + if (V_carp_log 1)\ log(LOG_DEBUG, __VA_ARGS__); \ } while (0) @@ -277,8 +296,8 @@ SYSCTL_PROC(_net_inet_carp, OID_AUTO, stats, CTLTYPE_OPAQUE | CTLFLAG_RW, TAILQ_FOREACH((sc), (ifp)-if_carp-cif_vrs, sc_list) #define DEMOTE_ADVSKEW(sc) \ -(((sc)-sc_advskew + carp_demotion CARP_MAXSKEW) ? \ -CARP_MAXSKEW : ((sc)-sc_advskew + carp_demotion)) +(((sc)-sc_advskew + V_carp_demotion CARP_MAXSKEW) ? \ +CARP_MAXSKEW : ((sc)-sc_advskew + V_carp_demotion)) static void carp_input_c(struct mbuf *, struct carp_header *, sa_family_t); static struct carp_softc @@ -430,7 +449,7 @@ carp_input(struct mbuf *m, int hlen) CARPSTATS_INC(carps_ipackets); - if (!carp_allow) { + if (!V_carp_allow) { m_freem(m); return; } @@ -513,7 +532,7 @@ carp6_input(struct mbuf **mp, int *offp, int proto) CARPSTATS_INC(carps_ipackets6); - if (!carp_allow) { + if (!V_carp_allow) { m_freem(m); return (IPPROTO_DONE); } @@ -647,7 +666,7 @@ carp_input_c(struct mbuf *m, struct carp_header *ch, sa_family_t af) * If we're pre-empting masters who advertise slower than us, * and this one claims to be slower, treat him as down. */ - if (carp_preempt timevalcmp(sc_tv, ch_tv
Re: lagg panic
On Mon, Feb 11, 2013 at 02:36:17PM +0200, Nikos Vassiliadis wrote: Hi, I just noticed this is not committed. Could somebody commit it? http://lists.freebsd.org/pipermail/freebsd-net/2011-November/030526.html Committed (r251490). Thanks. -- Mikolaj Golub ___ freebsd-virtualization@freebsd.org mailing list http://lists.freebsd.org/mailman/listinfo/freebsd-virtualization To unsubscribe, send any mail to freebsd-virtualization-unsubscr...@freebsd.org
Re: pf + vimage patch
On Thu, Jun 06, 2013 at 12:35:33PM +0200, Nikos Vassiliadis wrote: -VNET_DEFINE(u_long, pf_srchashsize); -#define V_pf_srchashsizeVNET(pf_srchashsize) -SYSCTL_VNET_UINT(_net_pf, OID_AUTO, source_nodes_hashsize, CTLFLAG_RDTUN, -VNET_NAME(pf_srchashsize), 0, Size of pf(4) source nodes hashtable); +u_longpf_srchashsize; +SYSCTL_UINT(_net_pf, OID_AUTO, source_nodes_hashsize, CTLFLAG_RDTUN, +pf_srchashsize, 0, Size of pf(4) source nodes hashtable); Why do you have to devirtualize these variables? Are per vnet hashtables sizes not useful or do they cause issues? Per VNET variables are not useful for pf_hashsize and pf_srchashsize since these values are RO and cannot be modified at runtime. Indeed. I missed RDTUN flag. module unload is broken:( Maybe it can be fixed at a (bit) later date? I don't think Gleb will be happy with this. Some time ago he removed some vimage related stuff to prevent crashing on module unload (see r229849). Actually your patch looks like a partial revert of that commit. So I think you need to think about this issue from start. At least it should not crash non-vimage kernel and there should be understanding how to fix it for vimage kernel. Your approach with keeping only one purge thread might make it simpler. -- Mikolaj Golub ___ freebsd-virtualization@freebsd.org mailing list http://lists.freebsd.org/mailman/listinfo/freebsd-virtualization To unsubscribe, send any mail to freebsd-virtualization-unsubscr...@freebsd.org
Re: GPF when doing jail -r, possibly an use-after-free
On Thu, 05 Jul 2012 12:18:20 -0700 Xin Li wrote: XL Hi, Mikolaj, XL On 07/04/12 00:00, Mikolaj Golub wrote: Is this observed after destroying epair? There is an issue with epair: on destroy, when epair_clone_destroy() calls ether_ifdetach() for its second half it does not switch to its vnet and if_detach_internal() can't find the interface and just returns. As a result V_ifnet list is left with dead reference. XL Yes. http://lists.freebsd.org/pipermail/freebsd-virtualization/2011-January/000628.html Here is an updated patch against CURRENT: http://people.freebsd.org/~trociny/if_epair.c.epair_clone_destroy.1.patch XL Your XL patch did fixed the problem, thanks! Are you going to commit it XL against -HEAD and then MFC after a while? I would like Bjoern review it before me committing, or at least tell he does not mind, if he does not have time to review -) -- Mikolaj Golub ___ freebsd-virtualization@freebsd.org mailing list http://lists.freebsd.org/mailman/listinfo/freebsd-virtualization To unsubscribe, send any mail to freebsd-virtualization-unsubscr...@freebsd.org
Re: GPF when doing jail -r, possibly an use-after-free
On Thu, 5 Jul 2012 20:21:53 + Bjoern A. Zeeb wrote: BAZ On 5. Jul 2012, at 19:53 , Mikolaj Golub wrote: On Thu, 05 Jul 2012 12:18:20 -0700 Xin Li wrote: XL Hi, Mikolaj, XL On 07/04/12 00:00, Mikolaj Golub wrote: Is this observed after destroying epair? There is an issue with epair: on destroy, when epair_clone_destroy() calls ether_ifdetach() for its second half it does not switch to its vnet and if_detach_internal() can't find the interface and just returns. As a result V_ifnet list is left with dead reference. XL Yes. http://lists.freebsd.org/pipermail/freebsd-virtualization/2011-January/000628.html Here is an updated patch against CURRENT: http://people.freebsd.org/~trociny/if_epair.c.epair_clone_destroy.1.patch XL Your XL patch did fixed the problem, thanks! Are you going to commit it XL against -HEAD and then MFC after a while? I would like Bjoern review it before me committing, or at least tell he does not mind, if he does not have time to review -) BAZ To me the patch looks wrong; I am wondering if someone broke some other central BAZ assumptions but given I cannot currently spend time on this and if it fixes things BAZ feel free to go ahead. If you told what looks wrong I could try to dig at that direction and might be back with a better solution, instead of committing a presumably wrong fix. -- Mikolaj Golub ___ freebsd-virtualization@freebsd.org mailing list http://lists.freebsd.org/mailman/listinfo/freebsd-virtualization To unsubscribe, send any mail to freebsd-virtualization-unsubscr...@freebsd.org
Re: kern/158686: [vimage] [tap] [patch] Add VIMAGE support to if_tap
The following reply was made to PR kern/158686; it has been noted by GNATS. From: Mikolaj Golub troc...@freebsd.org To: Daan Vreeken \[PA4DAN\] pa4...@bliksem.vehosting.nl Cc: bug-follo...@freebsd.org, Alexander V. Chernikov melif...@freebsd.org Subject: Re: kern/158686: [vimage] [tap] [patch] Add VIMAGE support to if_tap Date: Sat, 16 Jun 2012 10:25:26 +0300 Hi Daan, The patch is committed. Thank you! Note, I have not committed the FIB part of the patch though because I am not sure it was supposed to be done in this way. Your change looks reasonable for me, as it looks like without it mbufs of all incoming packets are not tagged on tap interface. On the other hand I don't see examples in kernel where tag is set before if_input() call (only before netisr_queue() or netisr_dispatch()). if_input() is actually ether_input(). May be it is supposed to just add this tag in ether_input()? I cc Alexander Chernikov, who is the author of the original patch that added mbuf tagging of incoming packets (r223741), and close this pr as the vimage part is fixed. -- Mikolaj Golub ___ freebsd-virtualization@freebsd.org mailing list http://lists.freebsd.org/mailman/listinfo/freebsd-virtualization To unsubscribe, send any mail to freebsd-virtualization-unsubscr...@freebsd.org
[patch] VirtualBox-4.0.14 + FreeBSD/CURRENT + VIMAGE: crash on vm shutdown
Hi, Here is a patch that fixes the issue I have been observing recently: running on VIMAGE enabled kernel VirtualBox causes the kernel crash when vm is powered off: ng_unref_node(8b156880,0,1,101,0,...) at ng_unref_node+0x74 ng_snd_item(8d060d40,0,8b0e18f0,0,0,...) at ng_snd_item+0x2a5 ng_send_fn(8b156880,0,8b0e18f0,0,0,...) at ng_send_fn+0x35 ng_rmnode_self(8b156880,89f840e0,8b0dbcc0,deec88cc,8b0d9ffa,...) at ng_rmnode_self+0x4a vboxNetFltOsDeleteInstance(89f84010,89f84014,89f84010,deec88ec,8b0da0f4,...) at vboxNetFltOsDeleteInstance+0x63 vboxNetFltRelease(89f84010,0,0,0,89f84014,...) at vboxNetFltRelease+0x6a vboxNetFltPortDisconnectAndRelease(89f84014,2710,87748bd4,87748bd4,deec8930,...) at vboxNetFltPortDisconnectAndRelease+0x64 _end(8b15ed10,89f84210,877456b0,0,deec8990,...) at 0x8cffe057 SUPR0ObjRelease(8b15ed10,89f4b810,deec89c8,deec89a0,0,...) at SUPR0ObjRelease+0x133 _end(8b15eb90,898a0e10,877456b0,0,89f4b810,...) at 0x8cffe4bb supdrvCleanupSession(8162bac0,89f4b810,89f4b810,deec89f4,8160e52f,...) at supdrvCleanupSession+0xef supdrvCloseSession(8162bac0,89f4b810,8ceecb18,deec8a18,8ceecaa0,...) at supdrvCloseSession+0x19 VBoxDrvFreeBSDClose(89ee6c00,3,2000,8adcc5c0,8adcc5c0,...) at VBoxDrvFreeBSDClose+0x2f devfs_close(deec8a78,8ceecaa0,80400,80f8caac,133,...) at devfs_close+0x2ca VOP_CLOSE_APV(81095960,deec8a78,80f8caac,133,2,...) at VOP_CLOSE_APV+0xda vn_close(8ceecaa0,3,8b24b380,8adcc5c0,80f792e0,...) at vn_close+0x190 vn_closefile(89f8cd58,8adcc5c0,0,89f8cd58,0,...) at vn_closefile+0xe4 devfs_close_f(89f8cd58,8adcc5c0,0,0,89f8cd58,...) at devfs_close_f+0x35 _fdrop(89f8cd58,8adcc5c0,0,deec8b80,0,8adcc670,81273398,810a7620,89e50c2c,79d,80f7401d,deec8b90,80a2f81e,89e50c2c,8,80f7401d,79d,0,89f8cd58) at _fdrop+0x43 closef(89f8cd58,8adcc5c0,79d,79a,8adcc670,...) at closef+0x2b0 fdfree(8adcc5c0,0,80f74dc6,107,deec8c18,...) at fdfree+0x3ea exit1(8adcc5c0,0,deec8d1c,80db7aee,8adcc5c0,...) at exit1+0x57a sys_sys_exit(8adcc5c0,deec8cec,80fd0bf8,80f53607,8adbf5c0,...) at sys_sys_exit+0x1d syscall(deec8d28) at syscall+0x2de Xint0x80_syscall() at Xint0x80_syscall+0x21 -- Mikolaj Golub --- VirtualBox-4.0.14_OSE/src/VBox/HostDrivers/VBoxNetFlt/freebsd/VBoxNetFlt-freebsd.c.orig 2012-03-18 00:21:42.0 +0200 +++ VirtualBox-4.0.14_OSE/src/VBox/HostDrivers/VBoxNetFlt/freebsd/VBoxNetFlt-freebsd.c 2012-03-18 00:22:18.0 +0200 @@ -651,13 +651,13 @@ bool vboxNetFltOsMaybeRediscovered(PVBOX ng_rmnode_self(pThis-u.s.node); pThis-u.s.node = NULL; } +VBOXCURVNET_RESTORE(); if (ifp0 != NULL) { vboxNetFltOsDeleteInstance(pThis); vboxNetFltOsInitInstance(pThis, NULL); } -VBOXCURVNET_RESTORE(); return !ASMAtomicUoReadBool(pThis-fDisconnectedFromHost); } @@ -671,8 +671,10 @@ void vboxNetFltOsDeleteInstance(PVBOXNET mtx_destroy(pThis-u.s.inq.ifq_mtx); mtx_destroy(pThis-u.s.outq.ifq_mtx); +VBOXCURVNET_SET_FROM_UCRED(); if (pThis-u.s.node != NULL) ng_rmnode_self(pThis-u.s.node); +VBOXCURVNET_RESTORE(); pThis-u.s.node = NULL; } ___ freebsd-virtualization@freebsd.org mailing list http://lists.freebsd.org/mailman/listinfo/freebsd-virtualization To unsubscribe, send any mail to freebsd-virtualization-unsubscr...@freebsd.org
FIX: accessing module's virtualized global variables from another module
Hi, Some time ago I reported about this problem (the thread vnet: acessing module's virtualized global variables from another module). I got a reply only from Marko but failed to convinced him that it was not issue with curvnet context being not set properly. I will try to explain the issue again, as this time I come with a patch that might be a solution. Below are some things that may be obvious for people who are involved in the subject. Please skip to the last paragraphs then :-). So, kernel and modules have 'set_vnet' linker set and the virtualized global variables (defined via VNET_DEFINE() macro) are placed in this set. This serves two purposes: 1) When a new virtual network stack instance is allocated it is initialized copying the globals from kernel 'set_vnet' linker set. 2) The variable name acts as unique global name by which the variable can be referred to. The location of a per-virtual instance variable is calculated at run-time using accessor macros (VNET_VNET, VNET, ...), like in the example below for ifnet variable in the vnet0: vnet0-vnet_data_base + (uintptr_t) vnet_entry_ifnet When a vnet is created its vnet_data_base is set accordingly to make this work. This works for virtualized variables from kernel 'set_vnet' linker set, but modules need additional work. When a module is loaded its global variables from 'set_vnet' linker set are copied to the kernel 'set_vnet', which has additional space ('modspace') reserved for this purpose. This makes (1) work. And to make the formula (2) valid for the module's virtual global the linker reallocates all its references according to this formula: (x - ef-vnet_start) + ef-vnet_base (see kern/link_elf.c:elf_relocaddr()). Here is an example. In the ipfw module layer3_chain virtual global is defined: kopusha:~% nm /boot/kernel/ipfw.ko |grep -i 'set_vnet\|layer3_chain' 00010820 A __start_set_vnet 00010b0c A __stop_set_vnet 00010840 d vnet_entry_layer3_chain So it has relative address 0x00010840. The module is loaded at 0x9337f000: kopusha:~% kldstat|grep ipfw.ko 222 0x9337f000 11000ipfw.ko and vnet_entry_layer3_chain is at 0x9337f000 + 0x00010840 = 0x9338f840: (kgdb) p vnet_entry_layer3_chain $1 = (struct ip_fw_chain *) 0x9338f840 But inside ipfw.ko, the linker relocated the 0x9338f840 references to 0x86dac580, otherwise VNET macros will access wrong location: (kgdb) p *(struct ip_fw_chain *)(vnet0-vnet_data_base + (uintptr_t) vnet_entry_layer3_chain) Error accessing memory address 0x9908b2c0: Bad address. (kgdb) p *(struct ip_fw_chain *)(vnet0-vnet_data_base + (uintptr_t) 0x86dac580) $2 = {rules = 0x0, reap = 0x872d296c, default_rule = 0x937bdbd0, n_rules = -1823040548, ... So far so good. But what if the global defined in one module is accessed from another module (as it is in the case of layer3_chain, which is accessed from ipfw_nat)? The linker relocates references only for local globals (defined in its own module), so inside ipfw_nat the global is accessed using unmodified vnet_entry_layer3_chain location, thus by wrong address. The fix could be to make the linker on a module load recognize external virtual globals defined in previously loaded modules and relocate them accordingly. For this we need set_vnet list, where the addresses of modules 'set_vnet' linker sets are stored in. The attach patch implements this and works for me. Also, could someone explain what is kern/link_elf_obj.c for? The patch does not deal with this file, but I see the reloactions here similar to those in link_elf.c. So I might need to do similar modifiactions in link_elf_obj.c too, but I don't know where this code is used (it looks like it is not executed on my box, so I don't know how to test it). -- Mikolaj Golub Index: sys/net/vnet.c === --- sys/net/vnet.c (revision 224286) +++ sys/net/vnet.c (working copy) @@ -152,6 +152,11 @@ struct vnet *vnet0; * initialized, in order to propagate the new static initializations to all * existing virtual network stack instances so that the soon-to-be executing * module will find every network stack instance with proper default values. + * + * The addresses of modules 'set_vnet' linker sets are stored in + * vnet_set list. The list is used by the kernel module linker to bind + * references of virtualized global variables defined in another + * modules. */ /* @@ -302,6 +307,102 @@ vnet_destroy(struct vnet *vnet) } /* + * set_vnet list. + */ +MALLOC_DEFINE(M_SET_VNET, set_vnet, VNET data); + +struct set_vnet { + uintptr_t vs_start; + uintptr_t vs_stop; + uintptr_t vs_base; + TAILQ_ENTRY(set_vnet) vs_link; +}; + +static TAILQ_HEAD(set_vnet_head, set_vnet) set_vnet_list; + +static void +set_vnet_insert(struct set_vnet *set) +{ + struct set_vnet *iter; + + TAILQ_FOREACH(iter, set_vnet_list, vs_link) { + + KASSERT((set-vs_start iter-vs_start set-vs_stop iter-vs_stop) || + (set-vs_start iter
Re: vnet: acessing module's virtualized global variables from another module
On Mon, 9 May 2011 16:21:15 +0200 Marko Zec wrote: MZ On Monday 09 May 2011 14:48:25 Mikolaj Golub wrote: Hi, Trying ipfw_nat under VIMAGE kernel I got this panic on the module load: MZ Hi, MZ I think the problem here is that curvnet context is not set properly on entry MZ to ipfw_nat_modevent(). The canonical way to initialize VNET-enabled MZ subsystems is to trigger them using VNET_SYSINIT() macros (instead of using MZ modevent mechanisms), which in turn ensure that: MZ a) that the initializer function gets invoked for each existing vnet MZ b) curvnet context is set properly on entry to initializer functions and hm, sorry, but I don't see how curvnet context might help here. For me this does not look like curvnet context problem or my understanding how it works completely wrong. Below is kgdb session on live VIMAGE system with ipfw.ko loaded. Let's look at some kernel virtualized variable: (kgdb) p vnet_entry_ifnet $1 = {tqh_first = 0x0, tqh_last = 0x0} (kgdb) p vnet_entry_ifnet $2 = (struct ifnethead *) 0x8102d488 As expected the address is in kernel 'set_vnet': kopusha:/usr/src/sys% kldstat |grep kernel 1 69 0x8040 1092700 kernel kopusha:/usr/src/sys% nm /boot/kernel/kernel |grep __start_set_vnet 8102d480 A __start_set_vnet default vnet: (kgdb) p vnet0 $3 = (struct vnet *) 0x86d9b000 Calculate ifnet location on vnet0 (a la VNET_VNET(vnet0, ifnet)): (kgdb) printf 0x%x\n, vnet0-vnet_data_base + (uintptr_t) vnet_entry_ifnet 0x86d9c008 Access it: (kgdb) p *((struct ifnethead *)0x86d9c008) $4 = {tqh_first = 0x86da5c00, tqh_last = 0x89489c0c} (kgdb) p (*((struct ifnethead *)0x86d9c008)).tqh_first-if_dname $7 = 0x80e8b480 usbus (kgdb) p (*((struct ifnethead *)0x86d9c008)).tqh_first-if_vnet $8 = (struct vnet *) 0x86d9b000 Everything looks good. Now try the same with virtualized variable layer3_chain from ipfw module: (kgdb) p vnet_entry_layer3_chain $9 = {rules = 0x0, reap = 0x0, default_rule = 0x0, n_rules = 0, static_len = 0, map = 0x0, nat = {lh_first = 0x0}, tables = {0x0 repeats 128 times}, rwmtx = {lock_object = { lo_name = 0x0, lo_flags = 0, lo_data = 0, lo_witness = 0x0}, rw_lock = 0}, uh_lock = { lock_object = {lo_name = 0x0, lo_flags = 0, lo_data = 0, lo_witness = 0x0}, rw_lock = 0}, id = 0, gencnt = 0} master variable looks good (initialized to zeros), what about its address? (kgdb) p vnet_entry_layer3_chain $10 = (struct ip_fw_chain *) 0x894a5c00 It points to 'set_vnet' of the ipfw.ko: kopusha# kldstat |grep ipfw.ko 132 0x89495000 11000ipfw.ko kopusha:/usr/src/sys% nm /boot/kernel/ipfw.ko |grep __start_set_vnet 00010be0 A __start_set_vnet kopusha:/usr/src/sys% printf 0x%x\n $((0x89495000 + 0x00010be0)) 0x894a5be0 Calculate layer3_chain location on vnet0 (a la VNET_VNET(vnet0, layer3_chain)): (kgdb) printf 0x%x\n, vnet0-vnet_data_base + (uintptr_t) vnet_entry_layer3_chain 0x8f214780 Try to read it: (kgdb) p ((struct ip_fw_chain *)0x8f214780)-rwmtx $13 = {lock_object = {lo_name = 0x0, lo_flags = 0, lo_data = 0, lo_witness = 0x0}, rw_lock = 0} (kgdb) p ((struct ip_fw_chain *)0x8f214780)-rules $14 = (struct ip_fw *) 0x6 Data looks wrong. But this is the way how this variable is acessed by ipfw_nat. I see the same in the crash image: (kgdb) where ... #11 0xc09a4882 in _rw_wlock (rw=0xc6d5e91c, file=0xca0ac2e3 /usr/src/sys/modules/ipfw_nat/../../netinet/ipfw/ip_fw_nat.c, line=547) at /usr/src/sys/kern/kern_rwlock.c:238 #12 0xca0ab841 in ipfw_nat_modevent (mod=0xc98a48c0, type=0, unused=0x0) at /usr/src/sys/modules/ipfw_nat/../../netinet/ipfw/ip_fw_nat.c:547 note, rw=0xc6d5e91c (it crashed on it). And I get the same address doing like I did above: (kgdb) VNET_VNET vnet0 vnet_entry_layer3_chain at 0xc6d5e700 of type = struct ip_fw_chain (kgdb) p ((struct ip_fw_chain *)0xc6d5e700)-rwmtx $8 = (struct rwlock *) 0xc6d5e91c Thus ipfw_nat was in vnet0 context then. I saw crashes (in other modules) when the context was not initialised and they looked differently. Right location was 0x86d9c160 (found adding print to ipfw module, I don't know easier way): (kgdb) p ((struct ip_fw_chain *)0x86d9c160)-rwmtx $1 = {lock_object = {lo_name = 0x932ba4b3 IPFW static rules, lo_flags = 69402624, lo_data = 0, lo_witness = 0x86d6ab30}, rw_lock = 1} (kgdb) p ((struct ip_fw_chain *)0x86d9c160)-rules $2 = (struct ip_fw *) 0x8f2d1c80 So I don't see a way how to reach module's virtualized variable from outside the module even if you are in the right vnet context. The linker, when loading the module and allocating the variable on vnet stacks in 'modspace' possesses this information and it reallocates addresses in the module and they are accessible from inside the module, but not from outside. MZ Cheers, MZ Marko Fatal trap 12: page fault while in kernel mode cpuid = 1; apic id = 01 fault virtual address = 0x4 fault code = supervisor read, page not present instruction pointer
Re: HEADS UP: tenetative plan to merge VIMAGE parts
On Sun, 13 Feb 2011 17:01:28 + (UTC) Bjoern A. Zeeb wrote: BAZ On Thu, 3 Feb 2011, Bjoern A. Zeeb wrote: BAZ Hi, next week I plan to extract the initial parts from perforce and merge them to SVN. This will include: 1) vnet socket pushdown. BAZ As some might have noticed I committed a bit of the noise from that BAZ already. It's proven kind of hard to extract the things from perforce BAZ with an additional several months of development and changes on top, BAZ but it'll be easier with every commit to SVN;) BAZ I will continue to directly commit small changes to SVN while I will BAZ post larger ones here for review/testing. If you are tracking VIMAGE BAZ in HEAD and anything breaks for you please yell at me! BAZ Here's the vnet socket push back patch: BAZ http://people.freebsd.org/~bz/20110213-01-vnet-socket-pushdown.diff BAZ I have done some but not thorough testing on the patch. Given it BAZ should be a complete extract from p4 I have been using it for more BAZ than half a year though;) BAZ I plan to commit this early-mid-weekish, so if you want to test let me BAZ know by then. I did not do much testing but the patch works for me. BAZ If you are on stable/8 you might want to apply the combined upcoming BAZ merge patch before the above: BAZ http://people.freebsd.org/~bz/20110213-02-vnet-mfc01.diff Running my notebook (stable/8) with these patches. BAZ I'd suggest we'll check an updated smbfs patch after that and get it BAZ in (if still needed). Previously I patched smb_trantcp.c in order to prevent crashes when mounting smbfs. Now it looks likes I don't need it any more -- smb works without that patch. -- Mikolaj Golub ___ freebsd-virtualization@freebsd.org mailing list http://lists.freebsd.org/mailman/listinfo/freebsd-virtualization To unsubscribe, send any mail to freebsd-virtualization-unsubscr...@freebsd.org
Re: VirtualBox + VIMAGE
On Thu, 27 Jan 2011 00:19:57 +0100 Bernhard Froehlich wrote: BF http://home.bluelife.at/patches/virtualbox-ose-kmod-devel-VIMAGE.diff BF I've integrated it a bit better into the VirtualBox build system, added BF the ports stuff and updated the patch for VirtualBox 4.0.2. Thanks! BF It is currently unclear to me why you add VIMAGE to CFLAGS but nowhere BF check for VIMAGE in VBoxNetFlt-freebsd.c. Shouldn't we add a check for BF VIMAGE in the #if defined line or is this already done somewhere deep in BF the included headers? All magic is in net/vnet.h :-) -- Mikolaj Golub ___ freebsd-virtualization@freebsd.org mailing list http://lists.freebsd.org/mailman/listinfo/freebsd-virtualization To unsubscribe, send any mail to freebsd-virtualization-unsubscr...@freebsd.org
Re: VirtualBox + VIMAGE
On Wed, 26 Jan 2011 10:22:40 +0100 Bernhard Froehlich wrote: BF Sounds like it's my turn now. Which FreeBSD version is required to be BF able to use it? As Bjoern noted it is for __FreeBSD_version = 800500. BF Is VIMAGE enabled per default and what happens if VIMAGE is disabled - BF does it at least build fine with that patch? We have VIMAGE disabled by default. I have added to src/VBox/HostDrivers/VBoxNetFlt/freebsd/Makefile: .if defined(VIMAGE) CFLAGS += -DVIMAGE .endif So to build the driver for VIMAGE enabled kernel one should run VIMAGE=1 make If VIMAGE variable is not defined the module for VIMAGE disabled kernel will be built. I am not familiar with VBox build process so there might be a better solution. I suppose we should have an option in ports, something: .if ${OSVERSION} 800500 OPTIONS+=VIMAGE Build for VIMAGE kernel off .endif and if it is on set VIMAGE make environment variable. Or may be you have a better solution. BF For Mikolaj Golub to get this patch upstream we need that you either BF agree that his patch is under the MIT License or sign a Sun Contributors BF Agreement. Then we can submit that patch upstream or you can do it BF yourself if you prefer. Read here: BF http://www.virtualbox.org/wiki/Contributor_information MIT License is ok. It would be very nice of you if you submitted the patch upstream :-). If nobody do this then I will try to submit it myself. -- Mikolaj Golub ___ freebsd-virtualization@freebsd.org mailing list http://lists.freebsd.org/mailman/listinfo/freebsd-virtualization To unsubscribe, send any mail to freebsd-virtualization-unsubscr...@freebsd.org
Re: VirtualBox + VIMAGE
On Sun, 23 Jan 2011 23:44:41 -0800 Julian Elischer wrote: JE On 1/23/11 6:33 AM, Mikolaj Golub wrote: As a quick fix I added in VBoxNetFlt-freebsd.c in all problem functions CURVNET_SET_QUIET(vnet0) macro, which sets td-td_vnet to default vnet (see the attached patch). This has fixed the issue for me and now I am happily running both VNET jails and VirtualBox machines on my desktop. But the way I fixed the issue looks hackish for me. Could someone suggest a better solution? JE this is probably at east partially the correct solution. JE Usually you should also set the value back to its previous value when JE you leave that code as well. Thank you for the comments. Actually the patch contained CURVNET_RESTORE() too. I just did not mentioned it :-). So if it looks like right solution here is an improved version of the patch: vnet to switch is taken from thread's ucred (on interface initialization) or from ifp-if_vnet. In case someone is interested in trying this patch, below are the instructions how to build VIMAGE safe vbox driver from ports: cd /usr/ports/emulators/virtualbox-ose-kmod make patch cd work patch -p0 /path/to/VirtualBox-3.2.12.VIMAGE.patch cd .. VIMAGE=1 make -- Mikolaj Golub diff -rpu VirtualBox-3.2.12_OSE.orig/src/VBox/HostDrivers/VBoxNetFlt/freebsd/Makefile VirtualBox-3.2.12_OSE/src/VBox/HostDrivers/VBoxNetFlt/freebsd/Makefile --- VirtualBox-3.2.12_OSE.orig/src/VBox/HostDrivers/VBoxNetFlt/freebsd/Makefile 2010-12-01 19:09:33.0 +0200 +++ VirtualBox-3.2.12_OSE/src/VBox/HostDrivers/VBoxNetFlt/freebsd/Makefile 2011-01-25 22:48:26.0 +0200 @@ -26,6 +26,10 @@ CFLAGS += -DRT_OS_FREEBSD -DIN_RING0 -DI CFLAGS += -DRT_ARCH_AMD64 .endif +.if defined(VIMAGE) + CFLAGS += -DVIMAGE +.endif + SRCS = \ VBoxNetFlt-freebsd.c \ VBoxNetFlt.c \ diff -rpu VirtualBox-3.2.12_OSE.orig/src/VBox/HostDrivers/VBoxNetFlt/freebsd/VBoxNetFlt-freebsd.c VirtualBox-3.2.12_OSE/src/VBox/HostDrivers/VBoxNetFlt/freebsd/VBoxNetFlt-freebsd.c --- VirtualBox-3.2.12_OSE.orig/src/VBox/HostDrivers/VBoxNetFlt/freebsd/VBoxNetFlt-freebsd.c 2010-12-01 19:09:33.0 +0200 +++ VirtualBox-3.2.12_OSE/src/VBox/HostDrivers/VBoxNetFlt/freebsd/VBoxNetFlt-freebsd.c 2011-01-25 22:48:26.0 +0200 @@ -37,6 +37,7 @@ #include sys/module.h #include sys/systm.h #include sys/errno.h +#include sys/jail.h #include sys/kernel.h #include sys/fcntl.h #include sys/conf.h @@ -51,6 +52,7 @@ #include net/if_dl.h #include net/if_types.h #include net/ethernet.h +#include net/vnet.h #include netgraph/ng_message.h #include netgraph/netgraph.h @@ -322,6 +324,7 @@ static int ng_vboxnetflt_rcvdata(hook_p struct m_tag *mtag; bool fActive; +CURVNET_SET_QUIET(ifp-if_vnet); fActive = vboxNetFltTryRetainBusyActive(pThis); NGI_GET_M(item, m); @@ -346,6 +349,7 @@ static int ng_vboxnetflt_rcvdata(hook_p ether_demux(ifp, m); if (fActive) vboxNetFltRelease(pThis, true /*fBusy*/); +CURVNET_RESTORE(); return (0); } mtx_lock_spin(pThis-u.s.inq.ifq_mtx); @@ -363,6 +367,7 @@ static int ng_vboxnetflt_rcvdata(hook_p int rc = ether_output_frame(ifp, m); if (fActive) vboxNetFltRelease(pThis, true /*fBusy*/); +CURVNET_RESTORE(); return rc; } mtx_lock_spin(pThis-u.s.outq.ifq_mtx); @@ -377,6 +382,7 @@ static int ng_vboxnetflt_rcvdata(hook_p if (fActive) vboxNetFltRelease(pThis, true /*fBusy*/); +CURVNET_RESTORE(); return (0); } @@ -409,6 +415,7 @@ static void vboxNetFltFreeBSDinput(void bool fDropIt = false, fActive; PINTNETSG pSG; +CURVNET_SET_QUIET(ifp-if_vnet); vboxNetFltRetain(pThis, true /* fBusy */); for (;;) { @@ -438,6 +445,7 @@ static void vboxNetFltFreeBSDinput(void ether_demux(ifp, m); } vboxNetFltRelease(pThis, true /* fBusy */); +CURVNET_RESTORE(); } /** @@ -452,6 +460,7 @@ static void vboxNetFltFreeBSDoutput(void bool fDropIt = false, fActive; PINTNETSG pSG; +CURVNET_SET_QUIET(ifp-if_vnet); vboxNetFltRetain(pThis, true /* fBusy */); for (;;) { @@ -481,6 +490,7 @@ static void vboxNetFltFreeBSDoutput(void ether_output_frame(ifp, m); } vboxNetFltRelease(pThis, true /* fBusy */); +CURVNET_RESTORE(); } /** @@ -498,6 +508,7 @@ int vboxNetFltPortOsXmit(PVBOXNETFLTINS int error; ifp = (void *)ASMAtomicUoReadPtr((void * volatile *)pThis-u.s.ifp); +CURVNET_SET_QUIET(ifp-if_vnet); if (fDst INTNETTRUNKDIR_WIRE) { @@ -539,6 +550,7 @@ int vboxNetFltPortOsXmit(PVBOXNETFLTINS m-m_pkthdr.rcvif = ifp; ifp-if_input(ifp, m); } +CURVNET_RESTORE(); return VINF_SUCCESS; } @@ -556,6 +568,7 @@ int vboxNetFltOsInitInstance(PVBOXNETFLT node_p node; RTSPINLOCKTMP Tmp
VirtualBox + VIMAGE
Hi, Sorry for cross posting, but this question concerns both VirtualBox and VIMAGE :-). I would like to be able to run VirtualBox on my desktop, with kernel compilled with VIMAGE, so I would use light vnet jails for FreeBSD VMs and VirtualBox for other OSes. Currently the system will crash if you try to use VirtualBox bridged networking under VIMAGE kernel. The crash may look like below: db bt Tracing pid 2349 tid 100239 td 0xc984b2d0 ifunit(c7e3b9ac,6f,c129e970,f8089984,c08d6dc0,...) at ifunit+0x33 vboxNetFltOsInitInstance(c7e3b810,0,8,7,c9aa6ae0,...) at vboxNetFltOsInitInstance+0x31 vboxNetFltFactoryCreateAndConnect(c9aa8cc8,c96636d4,c9664010,0,c966403c,...) at vboxNetFltFactory CreateAndConnect+0x1f7 _end(c7b11810,c962c238,3,c962c2b8,0,...) at 0xc9c0114f _end(c7b11810,c962c22c,f20,c0d31268,f8089a90,...) at 0xc9c01265 _end(c962c22c,0,0,c7b11810,c0cfe2fe,...) at 0xc9bbc775 _end(f808a000,fffd,28,c962c22c,0,...) at 0xc9bbd086 supdrvIOCtl(c10c5607,c12a0a40,c7b11810,c962c200,c0e81df8,...) at supdrvIOCtl+0x1af1 VBoxDrvFreeBSDIOCtl(c94efb00,c10c5607,c962c200,3,c984b2d0,...) at VBoxDrvFreeBSDIOCtl+0x1ea devfs_ioctl_f(c8e70118,c10c5607,c962c200,c9a82900,c984b2d0,...) at devfs_ioctl_f+0x10b kern_ioctl(c984b2d0,e,c10c5607,c962c200,1089cec,...) at kern_ioctl+0x1fd ioctl(c984b2d0,f8089cec,2b077000,1,c94ed2a8,...) at ioctl+0x134 syscallenter(c984b2d0,f8089ce4,c0c20e2d,c0e83290,0,...) at syscallenter+0x2a6 syscall(f8089d28) at syscall+0x4f Xint0x80_syscall() at Xint0x80_syscall+0x21 --- syscall (54, FreeBSD ELF32, ioctl), eip = 0x282ae093, esp = 0xbf8b980c, ebp = 0xbf8b9828 --- So it crashes when a function (ifunit in this case) tries to access virtualized variable (V_ifnet in this case). The macro uses td-td_vnet for this, which should be set to current vnet, but for VBox driver threads it is NULL. As a quick fix I added in VBoxNetFlt-freebsd.c in all problem functions CURVNET_SET_QUIET(vnet0) macro, which sets td-td_vnet to default vnet (see the attached patch). This has fixed the issue for me and now I am happily running both VNET jails and VirtualBox machines on my desktop. But the way I fixed the issue looks hackish for me. Could someone suggest a better solution? In case someone is interested in trying this patch, below are the instructions how to build VIMAGE safe vbox driver from ports: cd /usr/ports/emulators/virtualbox-ose-kmod make patch cd work patch -p0 /path/to/VirtualBox-3.2.12.VIMAGE.patch cd .. VIMAGE=1 make -- Mikolaj Golub diff -rpu VirtualBox-3.2.12_OSE.orig/src/VBox/HostDrivers/VBoxNetFlt/freebsd/Makefile VirtualBox-3.2.12_OSE/src/VBox/HostDrivers/VBoxNetFlt/freebsd/Makefile --- VirtualBox-3.2.12_OSE.orig/src/VBox/HostDrivers/VBoxNetFlt/freebsd/Makefile 2010-12-01 19:09:33.0 +0200 +++ VirtualBox-3.2.12_OSE/src/VBox/HostDrivers/VBoxNetFlt/freebsd/Makefile 2011-01-23 12:44:24.0 +0200 @@ -26,6 +26,10 @@ CFLAGS += -DRT_OS_FREEBSD -DIN_RING0 -DI CFLAGS += -DRT_ARCH_AMD64 .endif +.if defined(VIMAGE) + CFLAGS += -DVIMAGE +.endif + SRCS = \ VBoxNetFlt-freebsd.c \ VBoxNetFlt.c \ diff -rpu VirtualBox-3.2.12_OSE.orig/src/VBox/HostDrivers/VBoxNetFlt/freebsd/VBoxNetFlt-freebsd.c VirtualBox-3.2.12_OSE/src/VBox/HostDrivers/VBoxNetFlt/freebsd/VBoxNetFlt-freebsd.c --- VirtualBox-3.2.12_OSE.orig/src/VBox/HostDrivers/VBoxNetFlt/freebsd/VBoxNetFlt-freebsd.c 2010-12-01 19:09:33.0 +0200 +++ VirtualBox-3.2.12_OSE/src/VBox/HostDrivers/VBoxNetFlt/freebsd/VBoxNetFlt-freebsd.c 2011-01-23 12:14:19.0 +0200 @@ -51,6 +51,7 @@ #include net/if_dl.h #include net/if_types.h #include net/ethernet.h +#include net/vnet.h #include netgraph/ng_message.h #include netgraph/netgraph.h @@ -322,6 +323,7 @@ static int ng_vboxnetflt_rcvdata(hook_p struct m_tag *mtag; bool fActive; +CURVNET_SET_QUIET(vnet0); fActive = vboxNetFltTryRetainBusyActive(pThis); NGI_GET_M(item, m); @@ -346,6 +348,7 @@ static int ng_vboxnetflt_rcvdata(hook_p ether_demux(ifp, m); if (fActive) vboxNetFltRelease(pThis, true /*fBusy*/); +CURVNET_RESTORE(); return (0); } mtx_lock_spin(pThis-u.s.inq.ifq_mtx); @@ -363,6 +366,7 @@ static int ng_vboxnetflt_rcvdata(hook_p int rc = ether_output_frame(ifp, m); if (fActive) vboxNetFltRelease(pThis, true /*fBusy*/); +CURVNET_RESTORE(); return rc; } mtx_lock_spin(pThis-u.s.outq.ifq_mtx); @@ -377,6 +381,7 @@ static int ng_vboxnetflt_rcvdata(hook_p if (fActive) vboxNetFltRelease(pThis, true /*fBusy*/); +CURVNET_RESTORE(); return (0); } @@ -409,6 +414,7 @@ static void vboxNetFltFreeBSDinput(void bool fDropIt = false, fActive; PINTNETSG pSG; +CURVNET_SET_QUIET(vnet0); vboxNetFltRetain(pThis, true /* fBusy */); for (;;) { @@ -438,6 +444,7 @@ static void vboxNetFltFreeBSDinput(void ether_demux
epair: panic after destroying
Hi, On CURRENT when doing something like below: jail -c name=test vnet persist ifconfig epair0 create ifconfig epair0b vnet test ifconfig epair0a destroy jexec test ifconfig The system panics on the last command: #8 0xc0c32f7c in calltrap () at /usr/src/sys/i386/i386/exception.s:168 #9 0xc0994618 in strlen (str=0xdeadc0de Address 0xdeadc0de out of bounds) at /usr/src/sys/libkern/strlen.c:41 #10 0xc09216c7 in kvprintf (fmt=0xc0d32ec7 @ %s:%d, func=0xc0920950 snprintf_func, arg=0xeb5dfa30, radix=10, ap=0xeb5dfa74 \202lD4C0C4\005) at /usr/src/sys/kern/subr_prf.c:728 #11 0xc0921f4b in vsnprintf (str=0xc0ebd700 mtx_lock() of spin mutex , size=256, format=0xc0d32eac mtx_lock() of spin mutex %s @ %s:%d, ap=0xeb5dfa70 DEC0ADDE\202lD4C0C4\005) at /usr/src/sys/kern/subr_prf.c:461 #12 0xc08e914b in panic (fmt=0xc0d32eac mtx_lock() of spin mutex %s @ %s:%d) at /usr/src/sys/kern/kern_shutdown.c:557 #13 0xc08d882a in _mtx_lock_flags (m=0xc2ec7628, opts=0, file=0xc0d46c82 /usr/src/sys/net/rtsock.c, line=1476) at /usr/src/sys/kern/kern_mutex.c:197 #14 0xc09b8389 in sysctl_rtsock (oidp=0xc0e412a0, arg1=0xeb5dfbe4, arg2=4, req=0xeb5dfb70) at /usr/src/sys/net/rtsock.c:1476 #15 0xc08f3eed in sysctl_root (oidp=Variable oidp is not available. ) at /usr/src/sys/kern/kern_sysctl.c:1456 #16 0xc08f4145 in userland_sysctl (td=0xc3ab9870, name=0xeb5dfbdc, namelen=6, old=0x0, oldlenp=0xbfbfe538, inkernel=0, new=0x0, newlen=0, retval=0xeb5dfc3c, flags=0) at /usr/src/sys/kern/kern_sysctl.c:1566 #17 0xc08f4514 in __sysctl (td=0xc3ab9870, uap=0xeb5dfcec) at /usr/src/sys/kern/kern_sysctl.c:1492 #18 0xc092b793 in syscallenter (td=0xc3ab9870, sa=0xeb5dfce4) at /usr/src/sys/kern/subr_trap.c:318 #19 0xc0c4975f in syscall (frame=0xeb5dfd28) at /usr/src/sys/i386/i386/trap.c:1094 #20 0xc0c33011 in Xint0x80_syscall () at /usr/src/sys/i386/i386/exception.s:266 #21 0x0033 in ?? () Previous frame inner to this frame (corrupt stack?) It crashes in sysctl_iflist when traversing V_ifnet list because it contains dead ifp. This is because when epair_clone_destroy() calls ether_ifdetach() for its second half it does not switch to its vnet and if_detach_internal() can't find the interface and just returns. This can be checked when adding the first patch from the attaches: 5epair20a: link state changed to DOWN 5epair20b: link state changed to DOWN panic: if_detach_internal: ifp=0xc35b8800 not on the ifnet tailq 0xc3e43038 #11 0xc08e91f4 in panic ( fmt=0xc0d44def %s: ifp=%p not on the ifnet tailq %p) at /usr/src/sys/kern/kern_shutdown.c:574 #12 0xc09a19b1 in if_detach_internal (ifp=0xc35b8800, vmove=0) at /usr/src/sys/net/if.c:831 #13 0xc09a4190 in if_detach (ifp=0xc35b8800) at /usr/src/sys/net/if.c:805 #14 0xc09a5fdd in ether_ifdetach (ifp=0xc35b8800) at /usr/src/sys/net/if_ethersubr.c:989 #15 0xc3e63ec8 in epair_clone_destroy (ifc=0xc3e661c0, ifp=0xc2ec7400) at /usr/src/sys/modules/if_epair/../../net/if_epair.c:871 #16 0xc09a4b17 in if_clone_destroyif (ifc=0xc3e661c0, ifp=0xc2ec7400) at /usr/src/sys/net/if_clone.c:269 #17 0xc09a53c7 in if_clone_destroy (name=0xc303e940 epair20a) at /usr/src/sys/net/if_clone.c:230 #18 0xc09a2a21 in ifioctl (so=0xc3aa3680, cmd=2149607801, data=0xc303e940 epair20a, td=0xc3bcb5a0) at /usr/src/sys/net/if.c:2464 #19 0xc093c977 in soo_ioctl (fp=0xc3aa5000, cmd=2149607801, data=0xc303e940, active_cred=0xc2d8dc80, td=0xc3bcb5a0) at /usr/src/sys/kern/sys_socket.c:212 #20 0xc0936aed in kern_ioctl (td=0xc3bcb5a0, fd=3, com=2149607801, data=0xc303e940 epair20a) at file.h:254 #21 0xc0936c74 in ioctl (td=0xc3bcb5a0, uap=0xeb5fdcec) at /usr/src/sys/kern/sys_generic.c:679 #22 0xc092b793 in syscallenter (td=0xc3bcb5a0, sa=0xeb5fdce4) at /usr/src/sys/kern/subr_trap.c:318 #23 0xc0c4974f in syscall (frame=0xeb5fdd28) at /usr/src/sys/i386/i386/trap.c:1094 #24 0xc0c33001 in Xint0x80_syscall () at /usr/src/sys/i386/i386/exception.s:266 Adding vnet switch to epair_clone_destroy() like below fixes the issue: - ether_ifdetach(oifp); + { + CURVNET_SET_QUIET(oifp-if_vnet); + ether_ifdetach(oifp); + CURVNET_RESTORE(); + } ether_ifdetach(ifp); Although it looks a bit ugly :-). What do you think about some reordering like in the second patch attached? Also it looks wrong that if_detach_internal() silently returned in that case. I think it should panic or at least warns (like in the third patch). -- Mikolaj Golub Index: sys/net/if.c === --- sys/net/if.c (revision 217617) +++ sys/net/if.c (working copy) @@ -828,11 +828,8 @@ if_detach_internal(struct ifnet *ifp, int vmove) #endif IFNET_WUNLOCK(); if (!found) { - if (vmove) - panic(%s: ifp=%p not on the ifnet tailq %p, - __func__, ifp, V_ifnet); - else - return; /* XXX this should panic as well? */ + panic(%s
smbfs on VIMAGE kernel
Hi, With the attached patch I can mount and use samba fs on current built with VIMAGE option. -- Mikolaj Golub Index: sys/netsmb/smb_trantcp.c === --- sys/netsmb/smb_trantcp.c (revision 217275) +++ sys/netsmb/smb_trantcp.c (working copy) @@ -46,6 +46,7 @@ __FBSDID($FreeBSD$); #include net/if.h #include net/route.h +#include net/vnet.h #include netinet/in.h #include netinet/tcp.h @@ -79,13 +80,17 @@ static int nb_setsockopt_int(struct socket *so, int level, int name, int val) { struct sockopt sopt; + int error; bzero(sopt, sizeof(sopt)); sopt.sopt_level = level; sopt.sopt_name = name; sopt.sopt_val = val; sopt.sopt_valsize = sizeof(val); - return sosetopt(so, sopt); + CURVNET_SET(so-so_vnet); + error = sosetopt(so, sopt); + CURVNET_RESTORE(); + return error; } static int @@ -286,8 +291,10 @@ nbssn_recvhdr(struct nbpcb *nbp, int *lenp, auio.uio_offset = 0; auio.uio_resid = sizeof(len); auio.uio_td = td; + CURVNET_SET(so-so_vnet); error = soreceive(so, (struct sockaddr **)NULL, auio, (struct mbuf **)NULL, (struct mbuf **)NULL, flags); + CURVNET_RESTORE(); if (error) return error; if (auio.uio_resid 0) { @@ -371,8 +378,10 @@ nbssn_recv(struct nbpcb *nbp, struct mbuf **mpp, i */ do { rcvflg = MSG_WAITALL; +CURVNET_SET(so-so_vnet); error = soreceive(so, (struct sockaddr **)NULL, auio, tm, (struct mbuf **)NULL, rcvflg); +CURVNET_RESTORE(); } while (error == EWOULDBLOCK || error == EINTR || error == ERESTART); if (error) ___ freebsd-virtualization@freebsd.org mailing list http://lists.freebsd.org/mailman/listinfo/freebsd-virtualization To unsubscribe, send any mail to freebsd-virtualization-unsubscr...@freebsd.org
Re: smbfs on VIMAGE kernel
On Tue, 11 Jan 2011 16:20:11 -0800 Julian Elischer wrote: JE On 1/11/11 11:42 AM, Mikolaj Golub wrote: Hi, With the attached patch I can mount and use samba fs on current built with VIMAGE option. JE but what does that actually MEAN? Without the patch when you try to mount samba fs on a system compiled with VIMAGE option it will just panic. I didn't need samba inside jails, I just wanted to mount samba fs on host. -- Mikolaj Golub ___ freebsd-virtualization@freebsd.org mailing list http://lists.freebsd.org/mailman/listinfo/freebsd-virtualization To unsubscribe, send any mail to freebsd-virtualization-unsubscr...@freebsd.org