Re: RFC: ipfw nat VIMAGE improvements

2013-08-25 Thread Mikolaj Golub
On Thu, Aug 15, 2013 at 12:25:34AM +0200, Marko Zec wrote:

 Anyhow, this looks fine to me...

Thanks, committed as r254776.

-- 
Mikolaj Golub
___
freebsd-virtualization@freebsd.org mailing list
http://lists.freebsd.org/mailman/listinfo/freebsd-virtualization
To unsubscribe, send any mail to 
freebsd-virtualization-unsubscr...@freebsd.org


Re: RFC: carp(4): improved VIMAGE support

2013-08-14 Thread Mikolaj Golub
On Tue, Aug 13, 2013 at 11:26:31AM +0400, Gleb Smirnoff wrote:
 On Mon, Aug 12, 2013 at 10:45:17PM +0300, Mikolaj Golub wrote:
 M On Mon, Aug 12, 2013 at 02:16:15PM +0400, Gleb Smirnoff wrote:
 M  On Sun, Aug 11, 2013 at 11:08:35PM +0300, Mikolaj Golub wrote:
 M  M Hi,
 M  M 
 M  M I would like to virtualize carp(4) variables to have per vnet control.
 M  M 
 M  M Any comments, objections?
 M  
 M  No objections!
 M 
 M Andrey pointed out (on irc) that carpstats was missing virtualization
 M too. Here is an updated patch.
 
 Thanks. As before, no objections from me.

Thanks. Committed as r254292.

-- 
Mikolaj Golub
___
freebsd-virtualization@freebsd.org mailing list
http://lists.freebsd.org/mailman/listinfo/freebsd-virtualization
To unsubscribe, send any mail to 
freebsd-virtualization-unsubscr...@freebsd.org


Re: RFC: carp(4): improved VIMAGE support

2013-08-12 Thread Mikolaj Golub
On Mon, Aug 12, 2013 at 02:16:15PM +0400, Gleb Smirnoff wrote:
 On Sun, Aug 11, 2013 at 11:08:35PM +0300, Mikolaj Golub wrote:
 M Hi,
 M 
 M I would like to virtualize carp(4) variables to have per vnet control.
 M 
 M Any comments, objections?
 
 No objections!

Andrey pointed out (on irc) that carpstats was missing virtualization
too. Here is an updated patch.

-- 
Mikolaj Golub
commit ac71429a70862ca75a614bb798dcfb62e67faca3
Author: Mikolaj Golub troc...@freebsd.org
Date:   Tue Aug 6 23:47:31 2013 +0300

Virtualize carp(4) variables to have per vnet control.

diff --git a/sys/netinet/ip_carp.c b/sys/netinet/ip_carp.c
index 9228a8f..436385d 100644
--- a/sys/netinet/ip_carp.c
+++ b/sys/netinet/ip_carp.c
@@ -187,48 +187,59 @@ static int proto_reg[] = {-1, -1};
  *   dereferencing our function pointers.
  */
 
-static int carp_allow = 1;		/* Accept incoming CARP packets. */
-static int carp_preempt = 0;		/* Preempt slower nodes. */
-static int carp_log = 1;		/* Log level. */
-static int carp_demotion = 0;		/* Global advskew demotion. */
-static int carp_senderr_adj = CARP_MAXSKEW;	/* Send error demotion factor */
-static int carp_ifdown_adj = CARP_MAXSKEW;	/* Iface down demotion factor */
+/* Accept incoming CARP packets. */
+static VNET_DEFINE(int, carp_allow) = 1;
+#define	V_carp_allow	VNET(carp_allow)
+
+/* Preempt slower nodes. */
+static VNET_DEFINE(int, carp_preempt) = 0;
+#define	V_carp_preempt	VNET(carp_preempt)
+
+/* Log level. */
+static VNET_DEFINE(int, carp_log) = 1;
+#define	V_carp_log	VNET(carp_log)
+
+/* Global advskew demotion. */
+static VNET_DEFINE(int, carp_demotion) = 0;
+#define	V_carp_demotion	VNET(carp_demotion)
+
+/* Send error demotion factor. */
+static VNET_DEFINE(int, carp_senderr_adj) = CARP_MAXSKEW;
+#define	V_carp_senderr_adj	VNET(carp_senderr_adj)
+
+/* Iface down demotion factor. */
+static VNET_DEFINE(int, carp_ifdown_adj) = CARP_MAXSKEW;
+#define	V_carp_ifdown_adj	VNET(carp_ifdown_adj)
+
 static int carp_demote_adj_sysctl(SYSCTL_HANDLER_ARGS);
 
 SYSCTL_NODE(_net_inet, IPPROTO_CARP,	carp,	CTLFLAG_RW, 0,	CARP);
-SYSCTL_INT(_net_inet_carp, OID_AUTO, allow, CTLFLAG_RW, carp_allow, 0,
-Accept incoming CARP packets);
-SYSCTL_INT(_net_inet_carp, OID_AUTO, preempt, CTLFLAG_RW, carp_preempt, 0,
-High-priority backup preemption mode);
-SYSCTL_INT(_net_inet_carp, OID_AUTO, log, CTLFLAG_RW, carp_log, 0,
-CARP log level);
-SYSCTL_PROC(_net_inet_carp, OID_AUTO, demotion, CTLTYPE_INT|CTLFLAG_RW,
+SYSCTL_VNET_INT(_net_inet_carp, OID_AUTO, allow, CTLFLAG_RW,
+VNET_NAME(carp_allow), 0, Accept incoming CARP packets);
+SYSCTL_VNET_INT(_net_inet_carp, OID_AUTO, preempt, CTLFLAG_RW,
+VNET_NAME(carp_preempt), 0, High-priority backup preemption mode);
+SYSCTL_VNET_INT(_net_inet_carp, OID_AUTO, log, CTLFLAG_RW,
+VNET_NAME(carp_log), 0, CARP log level);
+SYSCTL_VNET_PROC(_net_inet_carp, OID_AUTO, demotion, CTLTYPE_INT|CTLFLAG_RW,
 0, 0, carp_demote_adj_sysctl, I,
 Adjust demotion factor (skew of advskew));
-SYSCTL_INT(_net_inet_carp, OID_AUTO, senderr_demotion_factor, CTLFLAG_RW,
-carp_senderr_adj, 0, Send error demotion factor adjustment);
-SYSCTL_INT(_net_inet_carp, OID_AUTO, ifdown_demotion_factor, CTLFLAG_RW,
-carp_ifdown_adj, 0, Interface down demotion factor adjustment);
+SYSCTL_VNET_INT(_net_inet_carp, OID_AUTO, senderr_demotion_factor, CTLFLAG_RW,
+VNET_NAME(carp_senderr_adj), 0, Send error demotion factor adjustment);
+SYSCTL_VNET_INT(_net_inet_carp, OID_AUTO, ifdown_demotion_factor, CTLFLAG_RW,
+VNET_NAME(carp_ifdown_adj), 0,
+Interface down demotion factor adjustment);
+
+VNET_PCPUSTAT_DEFINE(struct carpstats, carpstats);
+VNET_PCPUSTAT_SYSINIT(carpstats);
+VNET_PCPUSTAT_SYSUNINIT(carpstats);
 
-static counter_u64_t carpstats[sizeof(struct carpstats) / sizeof(uint64_t)];
 #define	CARPSTATS_ADD(name, val)	\
-counter_u64_add(carpstats[offsetof(struct carpstats, name) / \
+counter_u64_add(VNET(carpstats)[offsetof(struct carpstats, name) / \
 	sizeof(uint64_t)], (val))
 #define	CARPSTATS_INC(name)		CARPSTATS_ADD(name, 1)
 
-static int
-carpstats_sysctl(SYSCTL_HANDLER_ARGS)
-{
-	struct carpstats s;
-
-	COUNTER_ARRAY_COPY(carpstats, s, sizeof(s) / sizeof(uint64_t));
-	if (req-newptr)
-		COUNTER_ARRAY_ZERO(carpstats, sizeof(s) / sizeof(uint64_t));
-	return (SYSCTL_OUT(req, s, sizeof(s)));
-}
-SYSCTL_PROC(_net_inet_carp, OID_AUTO, stats, CTLTYPE_OPAQUE | CTLFLAG_RW,
-NULL, 0, carpstats_sysctl, I,
-CARP statistics (struct carpstats, netinet/ip_carp.h));
+SYSCTL_VNET_PCPUSTAT(_net_inet_carp, OID_AUTO, stats, struct carpstats,
+carpstats, CARP statistics (struct carpstats, netinet/ip_carp.h));
 
 #define	CARP_LOCK_INIT(sc)	mtx_init((sc)-sc_mtx, carp_softc,   \
 	NULL, MTX_DEF)
@@ -251,12 +262,12 @@ SYSCTL_PROC(_net_inet_carp, OID_AUTO, stats, CTLTYPE_OPAQUE | CTLFLAG_RW,
 } while (0)
 
 #define	CARP_LOG(...)	do {\
-	if (carp_log  0)\
+	if (V_carp_log  0)\
 		log(LOG_INFO, carp:  __VA_ARGS__);	\
 } while (0

RFC: carp(4): improved VIMAGE support

2013-08-11 Thread Mikolaj Golub
Hi,

I would like to virtualize carp(4) variables to have per vnet control.

Any comments, objections?

-- 
Mikolaj Golub
commit 646e0b161ae7782259335cec652e178652ed7af3
Author: Mikolaj Golub troc...@freebsd.org
Date:   Tue Aug 6 23:47:31 2013 +0300

Virtualize carp(4) variables for per vnet control.

diff --git a/sys/netinet/ip_carp.c b/sys/netinet/ip_carp.c
index 9228a8f..2ac39b7 100644
--- a/sys/netinet/ip_carp.c
+++ b/sys/netinet/ip_carp.c
@@ -187,28 +187,47 @@ static int proto_reg[] = {-1, -1};
  *   dereferencing our function pointers.
  */
 
-static int carp_allow = 1;		/* Accept incoming CARP packets. */
-static int carp_preempt = 0;		/* Preempt slower nodes. */
-static int carp_log = 1;		/* Log level. */
-static int carp_demotion = 0;		/* Global advskew demotion. */
-static int carp_senderr_adj = CARP_MAXSKEW;	/* Send error demotion factor */
-static int carp_ifdown_adj = CARP_MAXSKEW;	/* Iface down demotion factor */
+/* Accept incoming CARP packets. */
+static VNET_DEFINE(int, carp_allow) = 1;
+#define	V_carp_allow	VNET(carp_allow)
+
+/* Preempt slower nodes. */
+static VNET_DEFINE(int, carp_preempt) = 0;
+#define	V_carp_preempt	VNET(carp_preempt)
+
+/* Log level. */
+static VNET_DEFINE(int, carp_log) = 1;
+#define	V_carp_log	VNET(carp_log)
+
+/* Global advskew demotion. */
+static VNET_DEFINE(int, carp_demotion) = 0;
+#define	V_carp_demotion	VNET(carp_demotion)
+
+/* Send error demotion factor. */
+static VNET_DEFINE(int, carp_senderr_adj) = CARP_MAXSKEW;
+#define	V_carp_senderr_adj	VNET(carp_senderr_adj)
+
+/* Iface down demotion factor. */
+static VNET_DEFINE(int, carp_ifdown_adj) = CARP_MAXSKEW;
+#define	V_carp_ifdown_adj	VNET(carp_ifdown_adj)
+
 static int carp_demote_adj_sysctl(SYSCTL_HANDLER_ARGS);
 
 SYSCTL_NODE(_net_inet, IPPROTO_CARP,	carp,	CTLFLAG_RW, 0,	CARP);
-SYSCTL_INT(_net_inet_carp, OID_AUTO, allow, CTLFLAG_RW, carp_allow, 0,
-Accept incoming CARP packets);
-SYSCTL_INT(_net_inet_carp, OID_AUTO, preempt, CTLFLAG_RW, carp_preempt, 0,
-High-priority backup preemption mode);
-SYSCTL_INT(_net_inet_carp, OID_AUTO, log, CTLFLAG_RW, carp_log, 0,
-CARP log level);
-SYSCTL_PROC(_net_inet_carp, OID_AUTO, demotion, CTLTYPE_INT|CTLFLAG_RW,
+SYSCTL_VNET_INT(_net_inet_carp, OID_AUTO, allow, CTLFLAG_RW,
+VNET_NAME(carp_allow), 0, Accept incoming CARP packets);
+SYSCTL_VNET_INT(_net_inet_carp, OID_AUTO, preempt, CTLFLAG_RW,
+VNET_NAME(carp_preempt), 0, High-priority backup preemption mode);
+SYSCTL_VNET_INT(_net_inet_carp, OID_AUTO, log, CTLFLAG_RW,
+VNET_NAME(carp_log), 0, CARP log level);
+SYSCTL_VNET_PROC(_net_inet_carp, OID_AUTO, demotion, CTLTYPE_INT|CTLFLAG_RW,
 0, 0, carp_demote_adj_sysctl, I,
 Adjust demotion factor (skew of advskew));
-SYSCTL_INT(_net_inet_carp, OID_AUTO, senderr_demotion_factor, CTLFLAG_RW,
-carp_senderr_adj, 0, Send error demotion factor adjustment);
-SYSCTL_INT(_net_inet_carp, OID_AUTO, ifdown_demotion_factor, CTLFLAG_RW,
-carp_ifdown_adj, 0, Interface down demotion factor adjustment);
+SYSCTL_VNET_INT(_net_inet_carp, OID_AUTO, senderr_demotion_factor, CTLFLAG_RW,
+VNET_NAME(carp_senderr_adj), 0, Send error demotion factor adjustment);
+SYSCTL_VNET_INT(_net_inet_carp, OID_AUTO, ifdown_demotion_factor, CTLFLAG_RW,
+VNET_NAME(carp_ifdown_adj), 0,
+Interface down demotion factor adjustment);
 
 static counter_u64_t carpstats[sizeof(struct carpstats) / sizeof(uint64_t)];
 #define	CARPSTATS_ADD(name, val)	\
@@ -251,12 +270,12 @@ SYSCTL_PROC(_net_inet_carp, OID_AUTO, stats, CTLTYPE_OPAQUE | CTLFLAG_RW,
 } while (0)
 
 #define	CARP_LOG(...)	do {\
-	if (carp_log  0)\
+	if (V_carp_log  0)\
 		log(LOG_INFO, carp:  __VA_ARGS__);	\
 } while (0)
 
 #define	CARP_DEBUG(...)	do {\
-	if (carp_log  1)\
+	if (V_carp_log  1)\
 		log(LOG_DEBUG, __VA_ARGS__);		\
 } while (0)
 
@@ -277,8 +296,8 @@ SYSCTL_PROC(_net_inet_carp, OID_AUTO, stats, CTLTYPE_OPAQUE | CTLFLAG_RW,
 	TAILQ_FOREACH((sc), (ifp)-if_carp-cif_vrs, sc_list)
 
 #define	DEMOTE_ADVSKEW(sc)	\
-(((sc)-sc_advskew + carp_demotion  CARP_MAXSKEW) ?	\
-CARP_MAXSKEW : ((sc)-sc_advskew + carp_demotion))
+(((sc)-sc_advskew + V_carp_demotion  CARP_MAXSKEW) ?	\
+CARP_MAXSKEW : ((sc)-sc_advskew + V_carp_demotion))
 
 static void	carp_input_c(struct mbuf *, struct carp_header *, sa_family_t);
 static struct carp_softc
@@ -430,7 +449,7 @@ carp_input(struct mbuf *m, int hlen)
 
 	CARPSTATS_INC(carps_ipackets);
 
-	if (!carp_allow) {
+	if (!V_carp_allow) {
 		m_freem(m);
 		return;
 	}
@@ -513,7 +532,7 @@ carp6_input(struct mbuf **mp, int *offp, int proto)
 
 	CARPSTATS_INC(carps_ipackets6);
 
-	if (!carp_allow) {
+	if (!V_carp_allow) {
 		m_freem(m);
 		return (IPPROTO_DONE);
 	}
@@ -647,7 +666,7 @@ carp_input_c(struct mbuf *m, struct carp_header *ch, sa_family_t af)
 		 * If we're pre-empting masters who advertise slower than us,
 		 * and this one claims to be slower, treat him as down.
 		 */
-		if (carp_preempt  timevalcmp(sc_tv, ch_tv

Re: lagg panic

2013-06-07 Thread Mikolaj Golub
On Mon, Feb 11, 2013 at 02:36:17PM +0200, Nikos Vassiliadis wrote:
 Hi,
 
 I just noticed this is not committed. Could somebody commit it?
 
 http://lists.freebsd.org/pipermail/freebsd-net/2011-November/030526.html
 

Committed (r251490). Thanks.

-- 
Mikolaj Golub
___
freebsd-virtualization@freebsd.org mailing list
http://lists.freebsd.org/mailman/listinfo/freebsd-virtualization
To unsubscribe, send any mail to 
freebsd-virtualization-unsubscr...@freebsd.org


Re: pf + vimage patch

2013-06-06 Thread Mikolaj Golub
On Thu, Jun 06, 2013 at 12:35:33PM +0200, Nikos Vassiliadis wrote:

  -VNET_DEFINE(u_long, pf_srchashsize);
  -#define   V_pf_srchashsizeVNET(pf_srchashsize)
  -SYSCTL_VNET_UINT(_net_pf, OID_AUTO, source_nodes_hashsize, CTLFLAG_RDTUN,
  -VNET_NAME(pf_srchashsize), 0, Size of pf(4) source nodes 
  hashtable);
  +u_longpf_srchashsize;
  +SYSCTL_UINT(_net_pf, OID_AUTO, source_nodes_hashsize, CTLFLAG_RDTUN,
  +pf_srchashsize, 0, Size of pf(4) source nodes hashtable);
 
 
  Why do you have to devirtualize these variables? Are per vnet
  hashtables sizes not useful or do they cause issues?
 
 Per VNET variables are not useful for pf_hashsize and pf_srchashsize
 since these values are RO and cannot be modified at runtime.

Indeed. I missed RDTUN flag.
 
 module unload is broken:( Maybe it can be fixed at a (bit) later date?

I don't think Gleb will be happy with this. Some time ago he removed
some vimage related stuff to prevent crashing on module unload (see
r229849). Actually your patch looks like a partial revert of that
commit. So I think you need to think about this issue from start. At
least it should not crash non-vimage kernel and there should be
understanding how to fix it for vimage kernel. Your approach with
keeping only one purge thread might make it simpler.

-- 
Mikolaj Golub
___
freebsd-virtualization@freebsd.org mailing list
http://lists.freebsd.org/mailman/listinfo/freebsd-virtualization
To unsubscribe, send any mail to 
freebsd-virtualization-unsubscr...@freebsd.org


Re: GPF when doing jail -r, possibly an use-after-free

2012-07-05 Thread Mikolaj Golub

On Thu, 05 Jul 2012 12:18:20 -0700 Xin Li wrote:

 XL Hi, Mikolaj,

 XL On 07/04/12 00:00, Mikolaj Golub wrote:
  Is this observed after destroying epair? There is an issue with
  epair: on destroy, when epair_clone_destroy() calls
  ether_ifdetach() for its second half it does not switch to its vnet
  and if_detach_internal() can't find the interface and just returns.
  As a result V_ifnet list is left with dead reference.

 XL Yes.

  http://lists.freebsd.org/pipermail/freebsd-virtualization/2011-January/000628.html
 
   Here is an updated patch against CURRENT:
  
  http://people.freebsd.org/~trociny/if_epair.c.epair_clone_destroy.1.patch

 XL Your
  
 XL patch did fixed the problem, thanks!  Are you going to commit it
 XL against -HEAD and then MFC after a while?

I would like Bjoern review it before me committing, or at least tell he does
not mind, if he does not have time to review -)

-- 
Mikolaj Golub
___
freebsd-virtualization@freebsd.org mailing list
http://lists.freebsd.org/mailman/listinfo/freebsd-virtualization
To unsubscribe, send any mail to 
freebsd-virtualization-unsubscr...@freebsd.org


Re: GPF when doing jail -r, possibly an use-after-free

2012-07-05 Thread Mikolaj Golub

On Thu, 5 Jul 2012 20:21:53 + Bjoern A. Zeeb wrote:

 BAZ On 5. Jul 2012, at 19:53 , Mikolaj Golub wrote:

  
  On Thu, 05 Jul 2012 12:18:20 -0700 Xin Li wrote:
  
  XL Hi, Mikolaj,
  
  XL On 07/04/12 00:00, Mikolaj Golub wrote:
  Is this observed after destroying epair? There is an issue with
  epair: on destroy, when epair_clone_destroy() calls
  ether_ifdetach() for its second half it does not switch to its vnet
  and if_detach_internal() can't find the interface and just returns.
  As a result V_ifnet list is left with dead reference.
  
  XL Yes.
  
  http://lists.freebsd.org/pipermail/freebsd-virtualization/2011-January/000628.html
  
  Here is an updated patch against CURRENT:
  
  http://people.freebsd.org/~trociny/if_epair.c.epair_clone_destroy.1.patch
  
  XL Your
  
  XL patch did fixed the problem, thanks!  Are you going to commit it
  XL against -HEAD and then MFC after a while?
  
  I would like Bjoern review it before me committing, or at least tell he does
  not mind, if he does not have time to review -)

 BAZ To me the patch looks wrong; I am wondering if someone broke some other 
central
 BAZ assumptions but given I cannot currently spend time on this and if it 
fixes things
 BAZ feel free to go ahead.

If you told what looks wrong I could try to dig at that direction and might be
back with a better solution, instead of committing a presumably wrong fix.

-- 
Mikolaj Golub
___
freebsd-virtualization@freebsd.org mailing list
http://lists.freebsd.org/mailman/listinfo/freebsd-virtualization
To unsubscribe, send any mail to 
freebsd-virtualization-unsubscr...@freebsd.org


Re: kern/158686: [vimage] [tap] [patch] Add VIMAGE support to if_tap

2012-06-16 Thread Mikolaj Golub
The following reply was made to PR kern/158686; it has been noted by GNATS.

From: Mikolaj Golub troc...@freebsd.org
To: Daan Vreeken \[PA4DAN\] pa4...@bliksem.vehosting.nl
Cc: bug-follo...@freebsd.org, Alexander V. Chernikov melif...@freebsd.org
Subject: Re: kern/158686: [vimage] [tap] [patch] Add VIMAGE support to if_tap
Date: Sat, 16 Jun 2012 10:25:26 +0300

 Hi Daan,
 
 The patch is committed. Thank you!
 
 Note, I have not committed the FIB part of the patch though because I am not
 sure it was supposed to be done in this way. Your change looks reasonable for
 me, as it looks like without it mbufs of all incoming packets are not tagged
 on tap interface. On the other hand I don't see examples in kernel where tag
 is set before if_input() call (only before netisr_queue() or 
netisr_dispatch()).
 if_input() is actually ether_input(). May be it is supposed to just add this 
tag
 in ether_input()?
 
 I cc Alexander Chernikov, who is the author of the original patch that added
 mbuf tagging of incoming packets (r223741), and close this pr as the vimage
 part is fixed.
 
 -- 
 Mikolaj Golub
___
freebsd-virtualization@freebsd.org mailing list
http://lists.freebsd.org/mailman/listinfo/freebsd-virtualization
To unsubscribe, send any mail to 
freebsd-virtualization-unsubscr...@freebsd.org


[patch] VirtualBox-4.0.14 + FreeBSD/CURRENT + VIMAGE: crash on vm shutdown

2012-03-19 Thread Mikolaj Golub
Hi,

Here is a patch that fixes the issue I have been observing recently: running
on VIMAGE enabled kernel VirtualBox causes the kernel crash when vm is powered
off:

ng_unref_node(8b156880,0,1,101,0,...) at ng_unref_node+0x74
ng_snd_item(8d060d40,0,8b0e18f0,0,0,...) at ng_snd_item+0x2a5
ng_send_fn(8b156880,0,8b0e18f0,0,0,...) at ng_send_fn+0x35
ng_rmnode_self(8b156880,89f840e0,8b0dbcc0,deec88cc,8b0d9ffa,...) at 
ng_rmnode_self+0x4a
vboxNetFltOsDeleteInstance(89f84010,89f84014,89f84010,deec88ec,8b0da0f4,...) at 
vboxNetFltOsDeleteInstance+0x63
vboxNetFltRelease(89f84010,0,0,0,89f84014,...) at vboxNetFltRelease+0x6a
vboxNetFltPortDisconnectAndRelease(89f84014,2710,87748bd4,87748bd4,deec8930,...)
 at vboxNetFltPortDisconnectAndRelease+0x64
_end(8b15ed10,89f84210,877456b0,0,deec8990,...) at 0x8cffe057
SUPR0ObjRelease(8b15ed10,89f4b810,deec89c8,deec89a0,0,...) at 
SUPR0ObjRelease+0x133
_end(8b15eb90,898a0e10,877456b0,0,89f4b810,...) at 0x8cffe4bb
supdrvCleanupSession(8162bac0,89f4b810,89f4b810,deec89f4,8160e52f,...) at 
supdrvCleanupSession+0xef
supdrvCloseSession(8162bac0,89f4b810,8ceecb18,deec8a18,8ceecaa0,...) at 
supdrvCloseSession+0x19
VBoxDrvFreeBSDClose(89ee6c00,3,2000,8adcc5c0,8adcc5c0,...) at 
VBoxDrvFreeBSDClose+0x2f
devfs_close(deec8a78,8ceecaa0,80400,80f8caac,133,...) at devfs_close+0x2ca
VOP_CLOSE_APV(81095960,deec8a78,80f8caac,133,2,...) at VOP_CLOSE_APV+0xda
vn_close(8ceecaa0,3,8b24b380,8adcc5c0,80f792e0,...) at vn_close+0x190
vn_closefile(89f8cd58,8adcc5c0,0,89f8cd58,0,...) at vn_closefile+0xe4
devfs_close_f(89f8cd58,8adcc5c0,0,0,89f8cd58,...) at devfs_close_f+0x35
_fdrop(89f8cd58,8adcc5c0,0,deec8b80,0,8adcc670,81273398,810a7620,89e50c2c,79d,80f7401d,deec8b90,80a2f81e,89e50c2c,8,80f7401d,79d,0,89f8cd58)
 at _fdrop+0x43
closef(89f8cd58,8adcc5c0,79d,79a,8adcc670,...) at closef+0x2b0
fdfree(8adcc5c0,0,80f74dc6,107,deec8c18,...) at fdfree+0x3ea
exit1(8adcc5c0,0,deec8d1c,80db7aee,8adcc5c0,...) at exit1+0x57a
sys_sys_exit(8adcc5c0,deec8cec,80fd0bf8,80f53607,8adbf5c0,...) at 
sys_sys_exit+0x1d
syscall(deec8d28) at syscall+0x2de
Xint0x80_syscall() at Xint0x80_syscall+0x21

-- 
Mikolaj Golub

--- VirtualBox-4.0.14_OSE/src/VBox/HostDrivers/VBoxNetFlt/freebsd/VBoxNetFlt-freebsd.c.orig	2012-03-18 00:21:42.0 +0200
+++ VirtualBox-4.0.14_OSE/src/VBox/HostDrivers/VBoxNetFlt/freebsd/VBoxNetFlt-freebsd.c	2012-03-18 00:22:18.0 +0200
@@ -651,13 +651,13 @@ bool vboxNetFltOsMaybeRediscovered(PVBOX
 ng_rmnode_self(pThis-u.s.node);
 pThis-u.s.node = NULL;
 }
+VBOXCURVNET_RESTORE();
 
 if (ifp0 != NULL)
 {
 vboxNetFltOsDeleteInstance(pThis);
 vboxNetFltOsInitInstance(pThis, NULL);
 }
-VBOXCURVNET_RESTORE();
 
 return !ASMAtomicUoReadBool(pThis-fDisconnectedFromHost);
 }
@@ -671,8 +671,10 @@ void vboxNetFltOsDeleteInstance(PVBOXNET
 mtx_destroy(pThis-u.s.inq.ifq_mtx);
 mtx_destroy(pThis-u.s.outq.ifq_mtx);
 
+VBOXCURVNET_SET_FROM_UCRED();
 if (pThis-u.s.node != NULL)
 ng_rmnode_self(pThis-u.s.node);
+VBOXCURVNET_RESTORE();
 pThis-u.s.node = NULL;
 }
 
___
freebsd-virtualization@freebsd.org mailing list
http://lists.freebsd.org/mailman/listinfo/freebsd-virtualization
To unsubscribe, send any mail to 
freebsd-virtualization-unsubscr...@freebsd.org


FIX: accessing module's virtualized global variables from another module

2011-07-24 Thread Mikolaj Golub
Hi,

Some time ago I reported about this problem (the thread vnet: acessing
module's virtualized global variables from another module). I got a reply
only from Marko but failed to convinced him that it was not issue with curvnet
context being not set properly.

I will try to explain the issue again, as this time I come with a patch that
might be a solution. Below are some things that may be obvious for people who
are involved in the subject. Please skip to the last paragraphs then :-).

So, kernel and modules have 'set_vnet' linker set and the virtualized global
variables (defined via VNET_DEFINE() macro) are placed in this set. This
serves two purposes:

1) When a new virtual network stack instance is allocated it is initialized
copying the globals from kernel 'set_vnet' linker set.

2) The variable name acts as unique global name by which the variable can be
referred to. The location of a per-virtual instance variable is calculated at
run-time using accessor macros (VNET_VNET, VNET, ...), like in the example
below for ifnet variable in the vnet0:

vnet0-vnet_data_base + (uintptr_t)  vnet_entry_ifnet

When a vnet is created its vnet_data_base is set accordingly to make this
work.

This works for virtualized variables from kernel 'set_vnet' linker set, but
modules need additional work. When a module is loaded its global variables
from 'set_vnet' linker set are copied to the kernel 'set_vnet', which has
additional space ('modspace') reserved for this purpose. This makes (1) work.
And to make the formula (2) valid for the module's virtual global the linker
reallocates all its references according to this formula:

(x - ef-vnet_start) + ef-vnet_base

(see kern/link_elf.c:elf_relocaddr()).

Here is an example. In the ipfw module layer3_chain virtual global is defined:

kopusha:~% nm /boot/kernel/ipfw.ko |grep -i 'set_vnet\|layer3_chain'
00010820 A __start_set_vnet
00010b0c A __stop_set_vnet
00010840 d vnet_entry_layer3_chain

So it has relative address 0x00010840. The module is loaded at 0x9337f000:

kopusha:~% kldstat|grep ipfw.ko
222 0x9337f000 11000ipfw.ko

and vnet_entry_layer3_chain is at 0x9337f000 + 0x00010840 = 0x9338f840:

(kgdb) p vnet_entry_layer3_chain
$1 = (struct ip_fw_chain *) 0x9338f840

But inside ipfw.ko, the linker relocated the 0x9338f840 references to
0x86dac580, otherwise VNET macros will access wrong location:

(kgdb) p *(struct ip_fw_chain *)(vnet0-vnet_data_base + (uintptr_t) 
vnet_entry_layer3_chain)
Error accessing memory address 0x9908b2c0: Bad address.
(kgdb) p *(struct ip_fw_chain *)(vnet0-vnet_data_base + (uintptr_t) 0x86dac580)
$2 = {rules = 0x0, reap = 0x872d296c, default_rule = 0x937bdbd0, n_rules = 
-1823040548, 
  ...

So far so good. But what if the global defined in one module is accessed from
another module (as it is in the case of layer3_chain, which is accessed from
ipfw_nat)? The linker relocates references only for local globals (defined
in its own module), so inside ipfw_nat the global is accessed using unmodified
vnet_entry_layer3_chain location, thus by wrong address.

The fix could be to make the linker on a module load recognize external
virtual globals defined in previously loaded modules and relocate them
accordingly. For this we need set_vnet list, where the addresses of modules
'set_vnet' linker sets are stored in. The attach patch implements this and
works for me.

Also, could someone explain what is kern/link_elf_obj.c for? The patch does
not deal with this file, but I see the reloactions here similar to those in
link_elf.c. So I might need to do similar modifiactions in link_elf_obj.c too,
but I don't know where this code is used (it looks like it is not executed on
my box, so I don't know how to test it).

-- 
Mikolaj Golub

Index: sys/net/vnet.c
===
--- sys/net/vnet.c	(revision 224286)
+++ sys/net/vnet.c	(working copy)
@@ -152,6 +152,11 @@ struct vnet *vnet0;
  * initialized, in order to propagate the new static initializations to all
  * existing virtual network stack instances so that the soon-to-be executing
  * module will find every network stack instance with proper default values.
+ *
+ * The addresses of modules 'set_vnet' linker sets are stored in
+ * vnet_set list. The list is used by the kernel module linker to bind
+ * references of virtualized global variables defined in another
+ * modules.
  */
 
 /*
@@ -302,6 +307,102 @@ vnet_destroy(struct vnet *vnet)
 }
 
 /*
+ * set_vnet list.
+ */
+MALLOC_DEFINE(M_SET_VNET, set_vnet, VNET data);
+
+struct set_vnet {
+	uintptr_t   vs_start;
+	uintptr_t   vs_stop;
+	uintptr_t	vs_base;
+	TAILQ_ENTRY(set_vnet)	vs_link;
+};
+
+static TAILQ_HEAD(set_vnet_head, set_vnet) set_vnet_list;
+
+static void
+set_vnet_insert(struct set_vnet *set)
+{
+	struct set_vnet *iter;
+
+	TAILQ_FOREACH(iter, set_vnet_list, vs_link) {
+
+		KASSERT((set-vs_start  iter-vs_start  set-vs_stop  iter-vs_stop) ||
+		(set-vs_start  iter

Re: vnet: acessing module's virtualized global variables from another module

2011-05-09 Thread Mikolaj Golub

On Mon, 9 May 2011 16:21:15 +0200 Marko Zec wrote:

 MZ On Monday 09 May 2011 14:48:25 Mikolaj Golub wrote:
  Hi,
 
  Trying ipfw_nat under VIMAGE kernel I got this panic on the module load:

 MZ Hi,

 MZ I think the problem here is that curvnet context is not set properly on 
entry 
 MZ to ipfw_nat_modevent().  The canonical way to initialize VNET-enabled 
 MZ subsystems is to trigger them using VNET_SYSINIT() macros (instead of 
using 
 MZ modevent mechanisms), which in turn ensure that:

 MZ a) that the initializer function gets invoked for each existing vnet
 MZ b) curvnet context is set properly on entry to initializer functions and

hm, sorry, but I don't see how curvnet context might help here. For me this
does not look like curvnet context problem or my understanding how it works
completely wrong.

Below is kgdb session on live VIMAGE system with ipfw.ko loaded.

Let's look at some kernel virtualized variable:

(kgdb) p vnet_entry_ifnet
$1 = {tqh_first = 0x0, tqh_last = 0x0}
(kgdb) p vnet_entry_ifnet
$2 = (struct ifnethead *) 0x8102d488

As expected the address is in kernel 'set_vnet':

kopusha:/usr/src/sys% kldstat |grep kernel
 1   69 0x8040 1092700  kernel
kopusha:/usr/src/sys% nm /boot/kernel/kernel |grep  __start_set_vnet 
8102d480 A __start_set_vnet

default vnet:

(kgdb) p vnet0
$3 = (struct vnet *) 0x86d9b000

Calculate ifnet location on vnet0 (a la VNET_VNET(vnet0, ifnet)):

(kgdb) printf 0x%x\n, vnet0-vnet_data_base + (uintptr_t)  vnet_entry_ifnet 
0x86d9c008

Access it:

(kgdb) p *((struct ifnethead *)0x86d9c008)
$4 = {tqh_first = 0x86da5c00, tqh_last = 0x89489c0c}
(kgdb) p (*((struct ifnethead *)0x86d9c008)).tqh_first-if_dname
$7 = 0x80e8b480 usbus
(kgdb) p (*((struct ifnethead *)0x86d9c008)).tqh_first-if_vnet 
$8 = (struct vnet *) 0x86d9b000

Everything looks good. Now try the same with virtualized variable layer3_chain
from ipfw module:

(kgdb) p vnet_entry_layer3_chain
$9 = {rules = 0x0, reap = 0x0, default_rule = 0x0, n_rules = 0, static_len = 0, 
map = 0x0, 
  nat = {lh_first = 0x0}, tables = {0x0 repeats 128 times}, rwmtx = 
{lock_object = {
  lo_name = 0x0, lo_flags = 0, lo_data = 0, lo_witness = 0x0}, rw_lock = 
0}, uh_lock = {
lock_object = {lo_name = 0x0, lo_flags = 0, lo_data = 0, lo_witness = 0x0}, 
rw_lock = 0}, 
  id = 0, gencnt = 0}

master variable looks good (initialized to zeros), what about its address?

(kgdb) p vnet_entry_layer3_chain
$10 = (struct ip_fw_chain *) 0x894a5c00

It points to 'set_vnet' of the ipfw.ko:

kopusha# kldstat |grep ipfw.ko
132 0x89495000 11000ipfw.ko
kopusha:/usr/src/sys% nm /boot/kernel/ipfw.ko |grep  __start_set_vnet  
00010be0 A __start_set_vnet
kopusha:/usr/src/sys% printf 0x%x\n $((0x89495000 + 0x00010be0))
0x894a5be0

Calculate layer3_chain location on vnet0 (a la VNET_VNET(vnet0, layer3_chain)):

(kgdb) printf 0x%x\n, vnet0-vnet_data_base + (uintptr_t)  
vnet_entry_layer3_chain 
0x8f214780

Try to read it:

(kgdb) p ((struct ip_fw_chain *)0x8f214780)-rwmtx
$13 = {lock_object = {lo_name = 0x0, lo_flags = 0, lo_data = 0, lo_witness = 
0x0}, rw_lock = 0}
(kgdb) p ((struct ip_fw_chain *)0x8f214780)-rules
$14 = (struct ip_fw *) 0x6

Data looks wrong. But this is the way how this variable is acessed by
ipfw_nat. I see the same in the crash image:

(kgdb) where
...
#11 0xc09a4882 in _rw_wlock (rw=0xc6d5e91c, 
file=0xca0ac2e3 
/usr/src/sys/modules/ipfw_nat/../../netinet/ipfw/ip_fw_nat.c, line=547)
at /usr/src/sys/kern/kern_rwlock.c:238
#12 0xca0ab841 in ipfw_nat_modevent (mod=0xc98a48c0, type=0, unused=0x0)
at /usr/src/sys/modules/ipfw_nat/../../netinet/ipfw/ip_fw_nat.c:547

note, rw=0xc6d5e91c (it crashed on it). And I get the same address doing like I 
did above:

(kgdb) VNET_VNET vnet0 vnet_entry_layer3_chain
at 0xc6d5e700 of type = struct ip_fw_chain
(kgdb) p ((struct ip_fw_chain *)0xc6d5e700)-rwmtx
$8 = (struct rwlock *) 0xc6d5e91c

Thus ipfw_nat was in vnet0 context then. I saw crashes (in other modules) when
the context was not initialised and they looked differently.

Right location was 0x86d9c160 (found adding print to ipfw module, I don't know
easier way):

(kgdb) p ((struct ip_fw_chain *)0x86d9c160)-rwmtx
$1 = {lock_object = {lo_name = 0x932ba4b3 IPFW static rules, lo_flags = 
69402624, 
lo_data = 0, lo_witness = 0x86d6ab30}, rw_lock = 1}
(kgdb) p ((struct ip_fw_chain *)0x86d9c160)-rules
$2 = (struct ip_fw *) 0x8f2d1c80

So I don't see a way how to reach module's virtualized variable from outside
the module even if you are in the right vnet context. The linker, when loading
the module and allocating the variable on vnet stacks in 'modspace' possesses
this information and it reallocates addresses in the module and they are
accessible from inside the module, but not from outside.

 MZ Cheers,

 MZ Marko


  Fatal trap 12: page fault while in kernel mode
  cpuid = 1; apic id = 01
  fault virtual address   = 0x4
  fault code  = supervisor read, page not present
  instruction pointer

Re: HEADS UP: tenetative plan to merge VIMAGE parts

2011-02-15 Thread Mikolaj Golub

On Sun, 13 Feb 2011 17:01:28 + (UTC) Bjoern A. Zeeb wrote:

 BAZ On Thu, 3 Feb 2011, Bjoern A. Zeeb wrote:

 BAZ Hi,

  next week I plan to extract the initial parts from perforce and merge
  them to SVN. This will include:
 
  1) vnet socket pushdown.

 BAZ As some might have noticed I committed a bit of the noise from that
 BAZ already.  It's proven kind of hard to extract the things from perforce
 BAZ with an additional several months of development and changes on top,
 BAZ but it'll be easier with every commit to SVN;)

 BAZ I will continue to directly commit small changes to SVN while I will
 BAZ post larger ones here for review/testing.  If you are tracking VIMAGE
 BAZ in HEAD and anything breaks for you please yell at me!

 BAZ Here's the vnet socket push back patch:
 BAZ http://people.freebsd.org/~bz/20110213-01-vnet-socket-pushdown.diff

 BAZ I have done some but not thorough testing on the patch.  Given it
 BAZ should be a complete extract from p4 I have been using it for more
 BAZ than half a year though;)

 BAZ I plan to commit this early-mid-weekish, so if you want to test let me
 BAZ know by then.

I did not do much testing but the patch works for me.

 BAZ If you are on stable/8 you might want to apply the combined upcoming
 BAZ merge patch before the above:
 BAZ http://people.freebsd.org/~bz/20110213-02-vnet-mfc01.diff

Running my notebook (stable/8) with these patches.

 BAZ I'd suggest we'll check an updated smbfs patch after that and get it
 BAZ in (if still needed).

Previously I patched smb_trantcp.c in order to prevent crashes when mounting
smbfs. Now it looks likes I don't need it any more -- smb works without that
patch.

-- 
Mikolaj Golub
___
freebsd-virtualization@freebsd.org mailing list
http://lists.freebsd.org/mailman/listinfo/freebsd-virtualization
To unsubscribe, send any mail to 
freebsd-virtualization-unsubscr...@freebsd.org


Re: VirtualBox + VIMAGE

2011-01-27 Thread Mikolaj Golub

On Thu, 27 Jan 2011 00:19:57 +0100 Bernhard Froehlich wrote:

 BF http://home.bluelife.at/patches/virtualbox-ose-kmod-devel-VIMAGE.diff

 BF I've integrated it a bit better into the VirtualBox build system, added
 BF the ports stuff and updated the patch for VirtualBox 4.0.2.

Thanks!

 BF It is currently unclear to me why you add VIMAGE to CFLAGS but nowhere
 BF check for VIMAGE in VBoxNetFlt-freebsd.c. Shouldn't we add a check for
 BF VIMAGE in the #if defined line or is this already done somewhere deep in
 BF the included headers?

All magic is in net/vnet.h :-)

-- 
Mikolaj Golub
___
freebsd-virtualization@freebsd.org mailing list
http://lists.freebsd.org/mailman/listinfo/freebsd-virtualization
To unsubscribe, send any mail to 
freebsd-virtualization-unsubscr...@freebsd.org


Re: VirtualBox + VIMAGE

2011-01-26 Thread Mikolaj Golub

On Wed, 26 Jan 2011 10:22:40 +0100 Bernhard Froehlich wrote:

 BF Sounds like it's my turn now. Which FreeBSD version is required to be
 BF able to use it?

As Bjoern noted it is for __FreeBSD_version = 800500.

 BF Is VIMAGE enabled per default and what happens if VIMAGE is disabled -
 BF does it at least build fine with that patch?

We have VIMAGE disabled by default.

I have added to src/VBox/HostDrivers/VBoxNetFlt/freebsd/Makefile:

.if defined(VIMAGE)
 CFLAGS += -DVIMAGE
.endif

So to build the driver for VIMAGE enabled kernel one should run

VIMAGE=1 make

If VIMAGE variable is not defined the module for VIMAGE disabled kernel will
be built.

I am not familiar with VBox build process so there might be a better solution.

I suppose we should have an option in ports, something:

.if ${OSVERSION}  800500
OPTIONS+=VIMAGE Build for VIMAGE kernel off
.endif

and if it is on set VIMAGE make environment variable. Or may be you have
a better solution.

 BF For Mikolaj Golub to get this patch upstream we need that you either
 BF agree that his patch is under the MIT License or sign a Sun Contributors
 BF Agreement. Then we can submit that patch upstream or you can do it
 BF yourself if you prefer. Read here:
 BF http://www.virtualbox.org/wiki/Contributor_information

MIT License is ok.

It would be very nice of you if you submitted the patch upstream :-). If
nobody do this then I will try to submit it myself.

-- 
Mikolaj Golub
___
freebsd-virtualization@freebsd.org mailing list
http://lists.freebsd.org/mailman/listinfo/freebsd-virtualization
To unsubscribe, send any mail to 
freebsd-virtualization-unsubscr...@freebsd.org


Re: VirtualBox + VIMAGE

2011-01-25 Thread Mikolaj Golub

On Sun, 23 Jan 2011 23:44:41 -0800 Julian Elischer wrote:

 JE On 1/23/11 6:33 AM, Mikolaj Golub wrote:

  As a quick fix I added in VBoxNetFlt-freebsd.c in all problem functions
  CURVNET_SET_QUIET(vnet0) macro, which sets td-td_vnet to default vnet (see 
  the
  attached patch). This has fixed the issue for me and now I am happily 
  running
  both VNET jails and VirtualBox machines on my desktop.
 
  But the way I fixed the issue looks hackish for me. Could someone suggest a
  better solution?
 JE this is probably at east partially the correct solution.
 JE Usually you should also set the value back to its previous value when
 JE you leave that code as well.

Thank you for the comments. Actually the patch contained CURVNET_RESTORE()
too. I just did not mentioned it :-). So if it looks like right solution here
is an improved version of the patch: vnet to switch is taken from thread's
ucred (on interface initialization) or from ifp-if_vnet.

  In case someone is interested in trying this patch, below are the 
  instructions
  how to build VIMAGE safe vbox driver from ports:
 
  cd /usr/ports/emulators/virtualbox-ose-kmod
  make patch
  cd work
  patch -p0  /path/to/VirtualBox-3.2.12.VIMAGE.patch
  cd ..
  VIMAGE=1 make
 

-- 
Mikolaj Golub
diff -rpu VirtualBox-3.2.12_OSE.orig/src/VBox/HostDrivers/VBoxNetFlt/freebsd/Makefile VirtualBox-3.2.12_OSE/src/VBox/HostDrivers/VBoxNetFlt/freebsd/Makefile
--- VirtualBox-3.2.12_OSE.orig/src/VBox/HostDrivers/VBoxNetFlt/freebsd/Makefile	2010-12-01 19:09:33.0 +0200
+++ VirtualBox-3.2.12_OSE/src/VBox/HostDrivers/VBoxNetFlt/freebsd/Makefile	2011-01-25 22:48:26.0 +0200
@@ -26,6 +26,10 @@ CFLAGS += -DRT_OS_FREEBSD -DIN_RING0 -DI
  CFLAGS += -DRT_ARCH_AMD64
 .endif
 
+.if defined(VIMAGE)
+ CFLAGS += -DVIMAGE
+.endif
+
 SRCS = \
 	VBoxNetFlt-freebsd.c \
 	VBoxNetFlt.c \
diff -rpu VirtualBox-3.2.12_OSE.orig/src/VBox/HostDrivers/VBoxNetFlt/freebsd/VBoxNetFlt-freebsd.c VirtualBox-3.2.12_OSE/src/VBox/HostDrivers/VBoxNetFlt/freebsd/VBoxNetFlt-freebsd.c
--- VirtualBox-3.2.12_OSE.orig/src/VBox/HostDrivers/VBoxNetFlt/freebsd/VBoxNetFlt-freebsd.c	2010-12-01 19:09:33.0 +0200
+++ VirtualBox-3.2.12_OSE/src/VBox/HostDrivers/VBoxNetFlt/freebsd/VBoxNetFlt-freebsd.c	2011-01-25 22:48:26.0 +0200
@@ -37,6 +37,7 @@
 #include sys/module.h
 #include sys/systm.h
 #include sys/errno.h
+#include sys/jail.h
 #include sys/kernel.h
 #include sys/fcntl.h
 #include sys/conf.h
@@ -51,6 +52,7 @@
 #include net/if_dl.h
 #include net/if_types.h
 #include net/ethernet.h
+#include net/vnet.h
 
 #include netgraph/ng_message.h
 #include netgraph/netgraph.h
@@ -322,6 +324,7 @@ static int ng_vboxnetflt_rcvdata(hook_p 
 struct m_tag *mtag;
 bool fActive;
 
+CURVNET_SET_QUIET(ifp-if_vnet);
 fActive = vboxNetFltTryRetainBusyActive(pThis);
 
 NGI_GET_M(item, m);
@@ -346,6 +349,7 @@ static int ng_vboxnetflt_rcvdata(hook_p 
 ether_demux(ifp, m);
 if (fActive)
 vboxNetFltRelease(pThis, true /*fBusy*/);
+CURVNET_RESTORE();
 return (0);
 }
 mtx_lock_spin(pThis-u.s.inq.ifq_mtx);
@@ -363,6 +367,7 @@ static int ng_vboxnetflt_rcvdata(hook_p 
 int rc = ether_output_frame(ifp, m);
 if (fActive)
 vboxNetFltRelease(pThis, true /*fBusy*/);
+CURVNET_RESTORE();
 return rc;
 }
 mtx_lock_spin(pThis-u.s.outq.ifq_mtx);
@@ -377,6 +382,7 @@ static int ng_vboxnetflt_rcvdata(hook_p 
 
 if (fActive)
 vboxNetFltRelease(pThis, true /*fBusy*/);
+CURVNET_RESTORE();
 return (0);
 }
 
@@ -409,6 +415,7 @@ static void vboxNetFltFreeBSDinput(void 
 bool fDropIt = false, fActive;
 PINTNETSG pSG;
 
+CURVNET_SET_QUIET(ifp-if_vnet);
 vboxNetFltRetain(pThis, true /* fBusy */);
 for (;;)
 {
@@ -438,6 +445,7 @@ static void vboxNetFltFreeBSDinput(void 
 ether_demux(ifp, m);
 }
 vboxNetFltRelease(pThis, true /* fBusy */);
+CURVNET_RESTORE();
 }
 
 /**
@@ -452,6 +460,7 @@ static void vboxNetFltFreeBSDoutput(void
 bool fDropIt = false, fActive;
 PINTNETSG pSG;
 
+CURVNET_SET_QUIET(ifp-if_vnet);
 vboxNetFltRetain(pThis, true /* fBusy */);
 for (;;)
 {
@@ -481,6 +490,7 @@ static void vboxNetFltFreeBSDoutput(void
 ether_output_frame(ifp, m);
 }
 vboxNetFltRelease(pThis, true /* fBusy */);
+CURVNET_RESTORE();
 }
 
 /**
@@ -498,6 +508,7 @@ int vboxNetFltPortOsXmit(PVBOXNETFLTINS 
 int error;
 
 ifp = (void *)ASMAtomicUoReadPtr((void * volatile *)pThis-u.s.ifp);
+CURVNET_SET_QUIET(ifp-if_vnet);
 
 if (fDst  INTNETTRUNKDIR_WIRE)
 {
@@ -539,6 +550,7 @@ int vboxNetFltPortOsXmit(PVBOXNETFLTINS 
 m-m_pkthdr.rcvif = ifp;
 ifp-if_input(ifp, m);
 }
+CURVNET_RESTORE();
 return VINF_SUCCESS;
 }
 
@@ -556,6 +568,7 @@ int vboxNetFltOsInitInstance(PVBOXNETFLT
 node_p node;
 RTSPINLOCKTMP Tmp

VirtualBox + VIMAGE

2011-01-23 Thread Mikolaj Golub
Hi,

Sorry for cross posting, but this question concerns both VirtualBox and VIMAGE 
:-).

I would like to be able to run VirtualBox on my desktop, with kernel compilled
with VIMAGE, so I would use light vnet jails for FreeBSD VMs and VirtualBox
for other OSes.

Currently the system will crash if you try to use VirtualBox bridged
networking under VIMAGE kernel.

The crash may look like below:

db bt

Tracing pid 2349 tid 100239 td 0xc984b2d0
ifunit(c7e3b9ac,6f,c129e970,f8089984,c08d6dc0,...) at ifunit+0x33
vboxNetFltOsInitInstance(c7e3b810,0,8,7,c9aa6ae0,...) at 
vboxNetFltOsInitInstance+0x31
vboxNetFltFactoryCreateAndConnect(c9aa8cc8,c96636d4,c9664010,0,c966403c,...) at 
vboxNetFltFactory
CreateAndConnect+0x1f7
_end(c7b11810,c962c238,3,c962c2b8,0,...) at 0xc9c0114f
_end(c7b11810,c962c22c,f20,c0d31268,f8089a90,...) at 0xc9c01265
_end(c962c22c,0,0,c7b11810,c0cfe2fe,...) at 0xc9bbc775
_end(f808a000,fffd,28,c962c22c,0,...) at 0xc9bbd086
supdrvIOCtl(c10c5607,c12a0a40,c7b11810,c962c200,c0e81df8,...) at 
supdrvIOCtl+0x1af1
VBoxDrvFreeBSDIOCtl(c94efb00,c10c5607,c962c200,3,c984b2d0,...) at 
VBoxDrvFreeBSDIOCtl+0x1ea
devfs_ioctl_f(c8e70118,c10c5607,c962c200,c9a82900,c984b2d0,...) at 
devfs_ioctl_f+0x10b
kern_ioctl(c984b2d0,e,c10c5607,c962c200,1089cec,...) at kern_ioctl+0x1fd
ioctl(c984b2d0,f8089cec,2b077000,1,c94ed2a8,...) at ioctl+0x134
syscallenter(c984b2d0,f8089ce4,c0c20e2d,c0e83290,0,...) at syscallenter+0x2a6
syscall(f8089d28) at syscall+0x4f
Xint0x80_syscall() at Xint0x80_syscall+0x21
--- syscall (54, FreeBSD ELF32, ioctl), eip = 0x282ae093, esp = 0xbf8b980c, ebp 
= 0xbf8b9828 ---

So it crashes when a function (ifunit in this case) tries to access
virtualized variable (V_ifnet in this case). The macro uses td-td_vnet for
this, which should be set to current vnet, but for VBox driver threads it is
NULL.

As a quick fix I added in VBoxNetFlt-freebsd.c in all problem functions
CURVNET_SET_QUIET(vnet0) macro, which sets td-td_vnet to default vnet (see the
attached patch). This has fixed the issue for me and now I am happily running
both VNET jails and VirtualBox machines on my desktop.

But the way I fixed the issue looks hackish for me. Could someone suggest a
better solution?

In case someone is interested in trying this patch, below are the instructions
how to build VIMAGE safe vbox driver from ports:

cd /usr/ports/emulators/virtualbox-ose-kmod
make patch
cd work
patch -p0  /path/to/VirtualBox-3.2.12.VIMAGE.patch
cd ..
VIMAGE=1 make

-- 
Mikolaj Golub

diff -rpu VirtualBox-3.2.12_OSE.orig/src/VBox/HostDrivers/VBoxNetFlt/freebsd/Makefile VirtualBox-3.2.12_OSE/src/VBox/HostDrivers/VBoxNetFlt/freebsd/Makefile
--- VirtualBox-3.2.12_OSE.orig/src/VBox/HostDrivers/VBoxNetFlt/freebsd/Makefile	2010-12-01 19:09:33.0 +0200
+++ VirtualBox-3.2.12_OSE/src/VBox/HostDrivers/VBoxNetFlt/freebsd/Makefile	2011-01-23 12:44:24.0 +0200
@@ -26,6 +26,10 @@ CFLAGS += -DRT_OS_FREEBSD -DIN_RING0 -DI
  CFLAGS += -DRT_ARCH_AMD64
 .endif
 
+.if defined(VIMAGE)
+ CFLAGS += -DVIMAGE
+.endif
+
 SRCS = \
 	VBoxNetFlt-freebsd.c \
 	VBoxNetFlt.c \
diff -rpu VirtualBox-3.2.12_OSE.orig/src/VBox/HostDrivers/VBoxNetFlt/freebsd/VBoxNetFlt-freebsd.c VirtualBox-3.2.12_OSE/src/VBox/HostDrivers/VBoxNetFlt/freebsd/VBoxNetFlt-freebsd.c
--- VirtualBox-3.2.12_OSE.orig/src/VBox/HostDrivers/VBoxNetFlt/freebsd/VBoxNetFlt-freebsd.c	2010-12-01 19:09:33.0 +0200
+++ VirtualBox-3.2.12_OSE/src/VBox/HostDrivers/VBoxNetFlt/freebsd/VBoxNetFlt-freebsd.c	2011-01-23 12:14:19.0 +0200
@@ -51,6 +51,7 @@
 #include net/if_dl.h
 #include net/if_types.h
 #include net/ethernet.h
+#include net/vnet.h
 
 #include netgraph/ng_message.h
 #include netgraph/netgraph.h
@@ -322,6 +323,7 @@ static int ng_vboxnetflt_rcvdata(hook_p 
 struct m_tag *mtag;
 bool fActive;
 
+CURVNET_SET_QUIET(vnet0);
 fActive = vboxNetFltTryRetainBusyActive(pThis);
 
 NGI_GET_M(item, m);
@@ -346,6 +348,7 @@ static int ng_vboxnetflt_rcvdata(hook_p 
 ether_demux(ifp, m);
 if (fActive)
 vboxNetFltRelease(pThis, true /*fBusy*/);
+CURVNET_RESTORE();
 return (0);
 }
 mtx_lock_spin(pThis-u.s.inq.ifq_mtx);
@@ -363,6 +366,7 @@ static int ng_vboxnetflt_rcvdata(hook_p 
 int rc = ether_output_frame(ifp, m);
 if (fActive)
 vboxNetFltRelease(pThis, true /*fBusy*/);
+CURVNET_RESTORE();
 return rc;
 }
 mtx_lock_spin(pThis-u.s.outq.ifq_mtx);
@@ -377,6 +381,7 @@ static int ng_vboxnetflt_rcvdata(hook_p 
 
 if (fActive)
 vboxNetFltRelease(pThis, true /*fBusy*/);
+CURVNET_RESTORE();
 return (0);
 }
 
@@ -409,6 +414,7 @@ static void vboxNetFltFreeBSDinput(void 
 bool fDropIt = false, fActive;
 PINTNETSG pSG;
 
+CURVNET_SET_QUIET(vnet0);
 vboxNetFltRetain(pThis, true /* fBusy */);
 for (;;)
 {
@@ -438,6 +444,7 @@ static void vboxNetFltFreeBSDinput(void 
 ether_demux

epair: panic after destroying

2011-01-20 Thread Mikolaj Golub
Hi,

On CURRENT when doing something like below:

jail -c name=test vnet persist
ifconfig epair0 create
ifconfig epair0b vnet test
ifconfig epair0a destroy
jexec test ifconfig

The system panics on the last command:

#8  0xc0c32f7c in calltrap () at /usr/src/sys/i386/i386/exception.s:168
#9  0xc0994618 in strlen (str=0xdeadc0de Address 0xdeadc0de out of bounds)
at /usr/src/sys/libkern/strlen.c:41
#10 0xc09216c7 in kvprintf (fmt=0xc0d32ec7  @ %s:%d, 
func=0xc0920950 snprintf_func, arg=0xeb5dfa30, radix=10, 
ap=0xeb5dfa74 \202lD4C0C4\005) at /usr/src/sys/kern/subr_prf.c:728
#11 0xc0921f4b in vsnprintf (str=0xc0ebd700 mtx_lock() of spin mutex , 
size=256, format=0xc0d32eac mtx_lock() of spin mutex %s @ %s:%d, 
ap=0xeb5dfa70 DEC0ADDE\202lD4C0C4\005) at 
/usr/src/sys/kern/subr_prf.c:461
#12 0xc08e914b in panic (fmt=0xc0d32eac mtx_lock() of spin mutex %s @ %s:%d)
at /usr/src/sys/kern/kern_shutdown.c:557
#13 0xc08d882a in _mtx_lock_flags (m=0xc2ec7628, opts=0, 
file=0xc0d46c82 /usr/src/sys/net/rtsock.c, line=1476)
at /usr/src/sys/kern/kern_mutex.c:197
#14 0xc09b8389 in sysctl_rtsock (oidp=0xc0e412a0, arg1=0xeb5dfbe4, arg2=4, 
req=0xeb5dfb70) at /usr/src/sys/net/rtsock.c:1476
#15 0xc08f3eed in sysctl_root (oidp=Variable oidp is not available.
) at /usr/src/sys/kern/kern_sysctl.c:1456
#16 0xc08f4145 in userland_sysctl (td=0xc3ab9870, name=0xeb5dfbdc, namelen=6, 
old=0x0, oldlenp=0xbfbfe538, inkernel=0, new=0x0, newlen=0, 
retval=0xeb5dfc3c, flags=0) at /usr/src/sys/kern/kern_sysctl.c:1566
#17 0xc08f4514 in __sysctl (td=0xc3ab9870, uap=0xeb5dfcec)
at /usr/src/sys/kern/kern_sysctl.c:1492
#18 0xc092b793 in syscallenter (td=0xc3ab9870, sa=0xeb5dfce4)
at /usr/src/sys/kern/subr_trap.c:318
#19 0xc0c4975f in syscall (frame=0xeb5dfd28)
at /usr/src/sys/i386/i386/trap.c:1094
#20 0xc0c33011 in Xint0x80_syscall ()
at /usr/src/sys/i386/i386/exception.s:266
#21 0x0033 in ?? ()
Previous frame inner to this frame (corrupt stack?)

It crashes in sysctl_iflist when traversing V_ifnet list because it contains
dead ifp.

This is because when epair_clone_destroy() calls ether_ifdetach() for its
second half it does not switch to its vnet and if_detach_internal() can't find
the interface and just returns. This can be checked when adding the first
patch from the attaches:

5epair20a: link state changed to DOWN
5epair20b: link state changed to DOWN
panic: if_detach_internal: ifp=0xc35b8800 not on the ifnet tailq 0xc3e43038

#11 0xc08e91f4 in panic (
fmt=0xc0d44def %s: ifp=%p not on the ifnet tailq %p)
at /usr/src/sys/kern/kern_shutdown.c:574
#12 0xc09a19b1 in if_detach_internal (ifp=0xc35b8800, vmove=0)
at /usr/src/sys/net/if.c:831
#13 0xc09a4190 in if_detach (ifp=0xc35b8800) at /usr/src/sys/net/if.c:805
#14 0xc09a5fdd in ether_ifdetach (ifp=0xc35b8800)
at /usr/src/sys/net/if_ethersubr.c:989
#15 0xc3e63ec8 in epair_clone_destroy (ifc=0xc3e661c0, ifp=0xc2ec7400)
at /usr/src/sys/modules/if_epair/../../net/if_epair.c:871
#16 0xc09a4b17 in if_clone_destroyif (ifc=0xc3e661c0, ifp=0xc2ec7400)
at /usr/src/sys/net/if_clone.c:269
#17 0xc09a53c7 in if_clone_destroy (name=0xc303e940 epair20a)
at /usr/src/sys/net/if_clone.c:230
#18 0xc09a2a21 in ifioctl (so=0xc3aa3680, cmd=2149607801, 
data=0xc303e940 epair20a, td=0xc3bcb5a0) at /usr/src/sys/net/if.c:2464
#19 0xc093c977 in soo_ioctl (fp=0xc3aa5000, cmd=2149607801, data=0xc303e940, 
active_cred=0xc2d8dc80, td=0xc3bcb5a0)
at /usr/src/sys/kern/sys_socket.c:212
#20 0xc0936aed in kern_ioctl (td=0xc3bcb5a0, fd=3, com=2149607801, 
data=0xc303e940 epair20a) at file.h:254
#21 0xc0936c74 in ioctl (td=0xc3bcb5a0, uap=0xeb5fdcec)
at /usr/src/sys/kern/sys_generic.c:679
#22 0xc092b793 in syscallenter (td=0xc3bcb5a0, sa=0xeb5fdce4)
at /usr/src/sys/kern/subr_trap.c:318
#23 0xc0c4974f in syscall (frame=0xeb5fdd28)
at /usr/src/sys/i386/i386/trap.c:1094
#24 0xc0c33001 in Xint0x80_syscall ()
at /usr/src/sys/i386/i386/exception.s:266

Adding vnet switch to epair_clone_destroy() like below fixes the issue:

-   ether_ifdetach(oifp);
+   {
+   CURVNET_SET_QUIET(oifp-if_vnet);
+   ether_ifdetach(oifp);
+   CURVNET_RESTORE();
+   }
ether_ifdetach(ifp);

Although it looks a bit ugly :-). What do you think about some reordering like
in the second patch attached?

Also it looks wrong that if_detach_internal() silently returned in that case.
I think it should panic or at least warns (like in the third patch).

-- 
Mikolaj Golub

Index: sys/net/if.c
===
--- sys/net/if.c	(revision 217617)
+++ sys/net/if.c	(working copy)
@@ -828,11 +828,8 @@ if_detach_internal(struct ifnet *ifp, int vmove)
 #endif
 	IFNET_WUNLOCK();
 	if (!found) {
-		if (vmove)
-			panic(%s: ifp=%p not on the ifnet tailq %p,
-			__func__, ifp, V_ifnet);
-		else
-			return; /* XXX this should panic as well? */
+		panic(%s

smbfs on VIMAGE kernel

2011-01-11 Thread Mikolaj Golub
Hi,

With the attached patch I can mount and use samba fs on current built with
VIMAGE option.

-- 
Mikolaj Golub

Index: sys/netsmb/smb_trantcp.c
===
--- sys/netsmb/smb_trantcp.c	(revision 217275)
+++ sys/netsmb/smb_trantcp.c	(working copy)
@@ -46,6 +46,7 @@ __FBSDID($FreeBSD$);
 
 #include net/if.h
 #include net/route.h
+#include net/vnet.h
 
 #include netinet/in.h
 #include netinet/tcp.h
@@ -79,13 +80,17 @@ static int
 nb_setsockopt_int(struct socket *so, int level, int name, int val)
 {
 	struct sockopt sopt;
+	int error;
 
 	bzero(sopt, sizeof(sopt));
 	sopt.sopt_level = level;
 	sopt.sopt_name = name;
 	sopt.sopt_val = val;
 	sopt.sopt_valsize = sizeof(val);
-	return sosetopt(so, sopt);
+	CURVNET_SET(so-so_vnet);
+	error = sosetopt(so, sopt);
+	CURVNET_RESTORE();
+	return error;
 }
 
 static int
@@ -286,8 +291,10 @@ nbssn_recvhdr(struct nbpcb *nbp, int *lenp,
 	auio.uio_offset = 0;
 	auio.uio_resid = sizeof(len);
 	auio.uio_td = td;
+	CURVNET_SET(so-so_vnet);
 	error = soreceive(so, (struct sockaddr **)NULL, auio,
 	(struct mbuf **)NULL, (struct mbuf **)NULL, flags);
+	CURVNET_RESTORE();
 	if (error)
 		return error;
 	if (auio.uio_resid  0) {
@@ -371,8 +378,10 @@ nbssn_recv(struct nbpcb *nbp, struct mbuf **mpp, i
 			 */
 			do {
 rcvflg = MSG_WAITALL;
+CURVNET_SET(so-so_vnet);
 error = soreceive(so, (struct sockaddr **)NULL,
 auio, tm, (struct mbuf **)NULL, rcvflg);
+CURVNET_RESTORE();
 			} while (error == EWOULDBLOCK || error == EINTR ||
  error == ERESTART);
 			if (error)
___
freebsd-virtualization@freebsd.org mailing list
http://lists.freebsd.org/mailman/listinfo/freebsd-virtualization
To unsubscribe, send any mail to 
freebsd-virtualization-unsubscr...@freebsd.org


Re: smbfs on VIMAGE kernel

2011-01-11 Thread Mikolaj Golub

On Tue, 11 Jan 2011 16:20:11 -0800 Julian Elischer wrote:

 JE On 1/11/11 11:42 AM, Mikolaj Golub wrote:
  Hi,
 
  With the attached patch I can mount and use samba fs on current built with
  VIMAGE option.
 
 JE but what does that actually MEAN?

Without the patch when you try to mount samba fs on a system compiled with
VIMAGE option it will just panic. I didn't need samba inside jails, I just
wanted to mount samba fs on host.

-- 
Mikolaj Golub
___
freebsd-virtualization@freebsd.org mailing list
http://lists.freebsd.org/mailman/listinfo/freebsd-virtualization
To unsubscribe, send any mail to 
freebsd-virtualization-unsubscr...@freebsd.org