Module Name:    src
Committed By:   riz
Date:           Sun Nov 21 23:55:58 UTC 2010

Modified Files:
        src/sys/arch/xen/xen [netbsd-5]: if_xennet_xenbus.c

Log Message:
Pull up following revision(s) (requested by jym in ticket #1473):
        sys/arch/xen/xen/if_xennet_xenbus.c: revision 1.42
        sys/arch/xen/xen/if_xennet_xenbus.c: revision 1.43
        sys/arch/xen/xen/if_xennet_xenbus.c: revision 1.45
Features (like feature-rx-copy) are set during XenbusStateInitWait in
backend. So delay xennet_xenbus_resume() up to notification of
backend state change.
This avoids a race that happens during dynamic attach/detach of network
interfaces with xm(1), where frontend queries xenstore for features not
yet reported by backend. This does not happen during normal domU boot,
as the backend has enough time to fill in these entries before frontend
asks for them.
Issue was reported by sborrill@: detaching xennet interfaces with RX copy
mode enabled turns them back during attach to RX flip mode due to the race.
feature-rx-copy support is part of another patch.
Handle error case (avoid changing to XenbusStateConnected when resume
failed)
Implement feature-rx-copy support in xennet (domU network frontend).
Instead of flipping pages back and forth between dom0 and domU for the
network RX queue, feature-rx-copy tells frontend to use content copy
instead.
This is the only mode supported by the dom0 Linux pv_ops backend. NetBSD
domU and dom0 can still fall back to flipping, when needed.
Copy is supposed to be faster than flipping, as it does not require
MMU manipulation and TLB shootdowns.
Based on patch provided by Hideki ONO. Thanks!
See also http://mail-index.netbsd.org/port-xen/2010/09/24/msg006265.html
and http://mail-index.netbsd.org/port-xen/2010/10/16/msg006312.html
ok bou...@.
XXX will ask for a pull-up after 5.1 is branched.


To generate a diff of this commit:
cvs rdiff -u -r1.29.2.3 -r1.29.2.4 src/sys/arch/xen/xen/if_xennet_xenbus.c

Please note that diffs are not public domain; they are subject to the
copyright notices on the relevant files.

Modified files:

Index: src/sys/arch/xen/xen/if_xennet_xenbus.c
diff -u src/sys/arch/xen/xen/if_xennet_xenbus.c:1.29.2.3 src/sys/arch/xen/xen/if_xennet_xenbus.c:1.29.2.4
--- src/sys/arch/xen/xen/if_xennet_xenbus.c:1.29.2.3	Mon Sep 28 01:31:46 2009
+++ src/sys/arch/xen/xen/if_xennet_xenbus.c	Sun Nov 21 23:55:58 2010
@@ -1,4 +1,4 @@
-/*      $NetBSD: if_xennet_xenbus.c,v 1.29.2.3 2009/09/28 01:31:46 snj Exp $      */
+/*      $NetBSD: if_xennet_xenbus.c,v 1.29.2.4 2010/11/21 23:55:58 riz Exp $      */
 
 /*
  * Copyright (c) 2006 Manuel Bouyer.
@@ -61,7 +61,7 @@
  */
 
 #include <sys/cdefs.h>
-__KERNEL_RCSID(0, "$NetBSD: if_xennet_xenbus.c,v 1.29.2.3 2009/09/28 01:31:46 snj Exp $");
+__KERNEL_RCSID(0, "$NetBSD: if_xennet_xenbus.c,v 1.29.2.4 2010/11/21 23:55:58 riz Exp $");
 
 #include "opt_xen.h"
 #include "opt_nfs_boot.h"
@@ -178,6 +178,9 @@
 #define BEST_DISCONNECTED	1
 #define BEST_CONNECTED		2
 #define BEST_SUSPENDED		3
+	unsigned long sc_rx_feature;
+#define FEATURE_RX_FLIP		0
+#define FEATURE_RX_COPY		1
 #if NRND > 0
 	rndsource_element_t     sc_rnd_source;
 #endif
@@ -347,9 +350,6 @@
 	rnd_attach_source(&sc->sc_rnd_source, device_xname(sc->sc_dev),
 	    RND_TYPE_NET, 0);
 #endif
-
-	/* initialise shared structures and tell backend that we are ready */
-	xennet_xenbus_resume(sc);
 }
 
 static int
@@ -411,6 +411,7 @@
 {
 	struct xennet_xenbus_softc *sc = p;
 	struct xenbus_transaction *xbt;
+	unsigned long rx_copy;
 	int error;
 	netif_tx_sring_t *tx_ring;
 	netif_rx_sring_t *rx_ring;
@@ -420,7 +421,6 @@
 	sc->sc_tx_ring_gntref = GRANT_INVALID_REF;
 	sc->sc_rx_ring_gntref = GRANT_INVALID_REF;
 
-
 	/* setup device: alloc event channel and shared rings */
 	tx_ring = (void *)uvm_km_alloc(kernel_map, PAGE_SIZE, 0,
 	     UVM_KMF_WIRED | UVM_KMF_ZERO);
@@ -450,6 +450,19 @@
 	event_set_handler(sc->sc_evtchn, &xennet_handler, sc,
 	    IPL_NET, device_xname(sc->sc_dev));
 
+	error = xenbus_read_ul(NULL, sc->sc_xbusd->xbusd_otherend,
+	    "feature-rx-copy", &rx_copy, 10);
+	if (error)
+		rx_copy = 0; /* default value if key is absent */
+
+	if (rx_copy == 1) {
+		aprint_normal_dev(sc->sc_dev, "using RX copy mode\n");
+		sc->sc_rx_feature = FEATURE_RX_COPY;
+	} else {
+		aprint_normal_dev(sc->sc_dev, "using RX flip mode\n");
+		sc->sc_rx_feature = FEATURE_RX_FLIP;
+	}
+
 again:
 	xbt = xenbus_transaction_start();
 	if (xbt == NULL)
@@ -467,21 +480,21 @@
 		goto abort_transaction;
 	}
 	error = xenbus_printf(xbt, sc->sc_xbusd->xbusd_path,
-	    "feature-rx-notify", "%u", 1);
+	    "request-rx-copy", "%lu", rx_copy);
 	if (error) {
-		errmsg = "writing feature-rx-notify";
+		errmsg = "writing request-rx-copy";
 		goto abort_transaction;
 	}
 	error = xenbus_printf(xbt, sc->sc_xbusd->xbusd_path,
-	    "event-channel", "%u", sc->sc_evtchn);
+	    "feature-rx-notify", "%u", 1);
 	if (error) {
-		errmsg = "writing event channel";
+		errmsg = "writing feature-rx-notify";
 		goto abort_transaction;
 	}
 	error = xenbus_printf(xbt, sc->sc_xbusd->xbusd_path,
-	    "state", "%d", XenbusStateConnected);
+	    "event-channel", "%u", sc->sc_evtchn);
 	if (error) {
-		errmsg = "writing frontend XenbusStateConnected";
+		errmsg = "writing event channel";
 		goto abort_transaction;
 	}
 	error = xenbus_transaction_end(xbt, 0);
@@ -508,14 +521,17 @@
 
 	switch (new_state) {
 	case XenbusStateInitialising:
-	case XenbusStateInitWait:
 	case XenbusStateInitialised:
+	case XenbusStateConnected:
 		break;
 	case XenbusStateClosing:
 		sc->sc_backend_status = BEST_CLOSED;
 		xenbus_switch_state(sc->sc_xbusd, NULL, XenbusStateClosed);
 		break;
-	case XenbusStateConnected:
+	case XenbusStateInitWait:
+		if (xennet_xenbus_resume(sc) == 0)
+			xenbus_switch_state(sc->sc_xbusd, NULL,
+			    XenbusStateConnected);
 		break;
 	case XenbusStateUnknown:
 	default:
@@ -530,9 +546,11 @@
 	RING_IDX i;
 	struct xennet_rxreq *req;
 	struct xen_memory_reservation reservation;
-	int s1, s2;
+	int s1, s2, otherend_id;
 	paddr_t pfn;
 
+	otherend_id = sc->sc_xbusd->xbusd_otherend_id;
+
 	s1 = splnet();
 	for (i = 0; sc->sc_free_rxreql != 0; i++) {
 		req  = SLIST_FIRST(&sc->sc_rxreq_head);
@@ -540,53 +558,80 @@
 		KASSERT(req == &sc->sc_rxreqs[req->rxreq_id]);
 		RING_GET_REQUEST(&sc->sc_rx_ring, req_prod + i)->id =
 		    req->rxreq_id;
-		if (xengnt_grant_transfer(sc->sc_xbusd->xbusd_otherend_id,
-		    &req->rxreq_gntref) != 0) {
+
+		switch (sc->sc_rx_feature) {
+		case FEATURE_RX_COPY:
+			if (xengnt_grant_access(otherend_id,
+			    xpmap_ptom_masked(req->rxreq_pa),
+			    0, &req->rxreq_gntref) != 0) {
+				goto out_loop;
+			}
 			break;
+		case FEATURE_RX_FLIP:
+			if (xengnt_grant_transfer(otherend_id,
+			    &req->rxreq_gntref) != 0) {
+				goto out_loop;
+			}
+			break;
+		default:
+			panic("%s: unsupported RX feature mode: %ld\n",
+			    __func__, sc->sc_rx_feature);
 		}
+
 		RING_GET_REQUEST(&sc->sc_rx_ring, req_prod + i)->gref =
 		    req->rxreq_gntref;
 
 		SLIST_REMOVE_HEAD(&sc->sc_rxreq_head, rxreq_next);
 		sc->sc_free_rxreql--;
 
-		/* unmap the page */
-		MULTI_update_va_mapping(&rx_mcl[i], req->rxreq_va, 0, 0);
-		/*
-		 * Remove this page from pseudo phys map before
-		 * passing back to Xen.
-		 */
-		pfn = (req->rxreq_pa - XPMAP_OFFSET) >> PAGE_SHIFT;
-		xennet_pages[i] = xpmap_phys_to_machine_mapping[pfn];
-		xpmap_phys_to_machine_mapping[pfn] = INVALID_P2M_ENTRY;
+		if (sc->sc_rx_feature == FEATURE_RX_FLIP) {
+			/* unmap the page */
+			MULTI_update_va_mapping(&rx_mcl[i],
+			    req->rxreq_va, 0, 0);
+			/*
+			 * Remove this page from pseudo phys map before
+			 * passing back to Xen.
+			 */
+			pfn = (req->rxreq_pa - XPMAP_OFFSET) >> PAGE_SHIFT;
+			xennet_pages[i] = xpmap_phys_to_machine_mapping[pfn];
+			xpmap_phys_to_machine_mapping[pfn] = INVALID_P2M_ENTRY;
+		}
 	}
+
+out_loop:
 	if (i == 0) {
 		splx(s1);
 		return;
 	}
-	/* also make sure to flush all TLB entries */
-	rx_mcl[i-1].args[MULTI_UVMFLAGS_INDEX] = UVMF_TLB_FLUSH|UVMF_ALL;
-	/*
-	 * We may have allocated buffers which have entries
-	 * outstanding in the page update queue -- make sure we flush
-	 * those first!
-	 */
-	s2 = splvm();
-	xpq_flush_queue();
-	splx(s2);
-	/* now decrease reservation */
-	reservation.extent_start = xennet_pages;
-	reservation.nr_extents = i;
-	reservation.extent_order = 0;
-	reservation.address_bits = 0;
-	reservation.domid = DOMID_SELF;
-	rx_mcl[i].op = __HYPERVISOR_memory_op;
-	rx_mcl[i].args[0] = XENMEM_decrease_reservation;
-	rx_mcl[i].args[1] = (unsigned long)&reservation;
-	HYPERVISOR_multicall(rx_mcl, i+1);
-	if (__predict_false(rx_mcl[i].result != i)) {
-		panic("xennet_alloc_rx_buffer: XENMEM_decrease_reservation");
+
+	if (sc->sc_rx_feature == FEATURE_RX_FLIP) {
+		/* also make sure to flush all TLB entries */
+		rx_mcl[i-1].args[MULTI_UVMFLAGS_INDEX] =
+		    UVMF_TLB_FLUSH|UVMF_ALL;
+		/*
+		 * We may have allocated buffers which have entries
+		 * outstanding in the page update queue -- make sure we flush
+		 * those first!
+		 */
+		s2 = splvm();
+		xpq_flush_queue();
+		splx(s2);
+		/* now decrease reservation */
+		reservation.extent_start = xennet_pages;
+		reservation.nr_extents = i;
+		reservation.extent_order = 0;
+		reservation.address_bits = 0;
+		reservation.domid = DOMID_SELF;
+		rx_mcl[i].op = __HYPERVISOR_memory_op;
+		rx_mcl[i].args[0] = XENMEM_decrease_reservation;
+		rx_mcl[i].args[1] = (unsigned long)&reservation;
+		HYPERVISOR_multicall(rx_mcl, i+1);
+		if (__predict_false(rx_mcl[i].result != i)) {
+			panic("xennet_alloc_rx_buffer: "
+			    "XENMEM_decrease_reservation");
+		}
 	}
+
 	sc->sc_rx_ring.req_prod_pvt = req_prod + i;
 	RING_PUSH_REQUESTS(&sc->sc_rx_ring);
 
@@ -626,44 +671,57 @@
 			SLIST_INSERT_HEAD(&sc->sc_rxreq_head, rxreq,
 			    rxreq_next);
 			sc->sc_free_rxreql++;
-			ma = xengnt_revoke_transfer(rxreq->rxreq_gntref);
-			rxreq->rxreq_gntref = GRANT_INVALID_REF;
-			if (ma == 0) {
-				u_long pfn;
-				struct xen_memory_reservation xenres;
-				/*
-				 * transfer not complete, we lost the page.
-				 * Get one from hypervisor
-				 */
-				xenres.extent_start = &pfn;
-				xenres.nr_extents = 1;
-				xenres.extent_order = 0;
-				xenres.address_bits = 31;
-				xenres.domid = DOMID_SELF;
-				if (HYPERVISOR_memory_op(
-				    XENMEM_increase_reservation, &xenres) < 0) {
-					panic("xennet_free_rx_buffer: "
-					    "can't get memory back");
+
+			switch (sc->sc_rx_feature) {
+			case FEATURE_RX_COPY:
+				xengnt_revoke_access(rxreq->rxreq_gntref);
+				rxreq->rxreq_gntref = GRANT_INVALID_REF;
+				break;
+			case FEATURE_RX_FLIP:
+				ma = xengnt_revoke_transfer(
+				    rxreq->rxreq_gntref);
+				rxreq->rxreq_gntref = GRANT_INVALID_REF;
+				if (ma == 0) {
+					u_long pfn;
+					struct xen_memory_reservation xenres;
+					/*
+					 * transfer not complete, we lost the page.
+					 * Get one from hypervisor
+					 */
+					xenres.extent_start = &pfn;
+					xenres.nr_extents = 1;
+					xenres.extent_order = 0;
+					xenres.address_bits = 31;
+					xenres.domid = DOMID_SELF;
+					if (HYPERVISOR_memory_op(
+					    XENMEM_increase_reservation, &xenres) < 0) {
+						panic("xennet_free_rx_buffer: "
+						    "can't get memory back");
+					}
+					ma = pfn;
+					KASSERT(ma != 0);
 				}
-				ma = pfn;
-				KASSERT(ma != 0);
+				pa = rxreq->rxreq_pa;
+				va = rxreq->rxreq_va;
+				/* remap the page */
+				mmu[0].ptr = (ma << PAGE_SHIFT) | MMU_MACHPHYS_UPDATE;
+				mmu[0].val = ((pa - XPMAP_OFFSET) >> PAGE_SHIFT);
+				MULTI_update_va_mapping(&mcl[0], va, 
+				    (ma << PAGE_SHIFT) | PG_V | PG_KW,
+				    UVMF_TLB_FLUSH|UVMF_ALL);
+				xpmap_phys_to_machine_mapping[
+				    (pa - XPMAP_OFFSET) >> PAGE_SHIFT] = ma;
+				mcl[1].op = __HYPERVISOR_mmu_update;
+				mcl[1].args[0] = (unsigned long)mmu;
+				mcl[1].args[1] = 1;
+				mcl[1].args[2] = 0;
+				mcl[1].args[3] = DOMID_SELF;
+				HYPERVISOR_multicall(mcl, 2);
+				break;
+			default:
+				panic("%s: unsupported RX feature mode: %ld\n",
+				    __func__, sc->sc_rx_feature);
 			}
-			pa = rxreq->rxreq_pa;
-			va = rxreq->rxreq_va;
-			/* remap the page */
-			mmu[0].ptr = (ma << PAGE_SHIFT) | MMU_MACHPHYS_UPDATE;
-			mmu[0].val = ((pa - XPMAP_OFFSET) >> PAGE_SHIFT);
-			MULTI_update_va_mapping(&mcl[0], va, 
-			    (ma << PAGE_SHIFT) | PG_V | PG_KW,
-			    UVMF_TLB_FLUSH|UVMF_ALL);
-			xpmap_phys_to_machine_mapping[
-			    (pa - XPMAP_OFFSET) >> PAGE_SHIFT] = ma;
-			mcl[1].op = __HYPERVISOR_mmu_update;
-			mcl[1].args[0] = (unsigned long)mmu;
-			mcl[1].args[1] = 1;
-			mcl[1].args[2] = 0;
-			mcl[1].args[3] = DOMID_SELF;
-			HYPERVISOR_multicall(mcl, 2);
 		}
 
 	}
@@ -776,41 +834,58 @@
 		req = &sc->sc_rxreqs[rx->id];
 		KASSERT(req->rxreq_gntref != GRANT_INVALID_REF);
 		KASSERT(req->rxreq_id == rx->id);
-		ma = xengnt_revoke_transfer(req->rxreq_gntref);
-		if (ma == 0) {
-			DPRINTFN(XEDB_EVENT, ("xennet_handler ma == 0\n"));
-			/*
-			 * the remote could't send us a packet.
-			 * we can't free this rxreq as no page will be mapped
-			 * here. Instead give it back immediatly to backend.
-			 */
-			ifp->if_ierrors++;
-			RING_GET_REQUEST(&sc->sc_rx_ring,
-			    sc->sc_rx_ring.req_prod_pvt)->id = req->rxreq_id;
-			RING_GET_REQUEST(&sc->sc_rx_ring,
-			    sc->sc_rx_ring.req_prod_pvt)->gref =
-				req->rxreq_gntref;
-			sc->sc_rx_ring.req_prod_pvt++;
-			RING_PUSH_REQUESTS(&sc->sc_rx_ring);
-			continue;
+
+		ma = 0;
+		switch (sc->sc_rx_feature) {
+		case FEATURE_RX_COPY:
+			xengnt_revoke_access(req->rxreq_gntref);
+			break;
+		case FEATURE_RX_FLIP:
+			ma = xengnt_revoke_transfer(req->rxreq_gntref);
+			if (ma == 0) {
+				DPRINTFN(XEDB_EVENT, ("xennet_handler ma == 0\n"));
+				/*
+				 * the remote could't send us a packet.
+				 * we can't free this rxreq as no page will be mapped
+				 * here. Instead give it back immediatly to backend.
+				 */
+				ifp->if_ierrors++;
+				RING_GET_REQUEST(&sc->sc_rx_ring,
+				    sc->sc_rx_ring.req_prod_pvt)->id = req->rxreq_id;
+				RING_GET_REQUEST(&sc->sc_rx_ring,
+				    sc->sc_rx_ring.req_prod_pvt)->gref =
+					req->rxreq_gntref;
+				sc->sc_rx_ring.req_prod_pvt++;
+				RING_PUSH_REQUESTS(&sc->sc_rx_ring);
+				continue;
+			}
+			break;
+		default:
+			panic("%s: unsupported RX feature mode: %ld\n",
+			    __func__, sc->sc_rx_feature);			
 		}
+
 		req->rxreq_gntref = GRANT_INVALID_REF;
 
 		pa = req->rxreq_pa;
 		va = req->rxreq_va;
-		/* remap the page */
-		mmu[0].ptr = (ma << PAGE_SHIFT) | MMU_MACHPHYS_UPDATE;
-		mmu[0].val = ((pa - XPMAP_OFFSET) >> PAGE_SHIFT);
-		MULTI_update_va_mapping(&mcl[0], va, 
-		    (ma << PAGE_SHIFT) | PG_V | PG_KW, UVMF_TLB_FLUSH|UVMF_ALL);
-		xpmap_phys_to_machine_mapping[
-		    (pa - XPMAP_OFFSET) >> PAGE_SHIFT] = ma;
-		mcl[1].op = __HYPERVISOR_mmu_update;
-		mcl[1].args[0] = (unsigned long)mmu;
-		mcl[1].args[1] = 1;
-		mcl[1].args[2] = 0;
-		mcl[1].args[3] = DOMID_SELF;
-		HYPERVISOR_multicall(mcl, 2);
+
+		if (sc->sc_rx_feature == FEATURE_RX_FLIP) {
+			/* remap the page */
+			mmu[0].ptr = (ma << PAGE_SHIFT) | MMU_MACHPHYS_UPDATE;
+			mmu[0].val = ((pa - XPMAP_OFFSET) >> PAGE_SHIFT);
+			MULTI_update_va_mapping(&mcl[0], va, 
+			    (ma << PAGE_SHIFT) | PG_V | PG_KW, UVMF_TLB_FLUSH|UVMF_ALL);
+			xpmap_phys_to_machine_mapping[
+			    (pa - XPMAP_OFFSET) >> PAGE_SHIFT] = ma;
+			mcl[1].op = __HYPERVISOR_mmu_update;
+			mcl[1].args[0] = (unsigned long)mmu;
+			mcl[1].args[1] = 1;
+			mcl[1].args[2] = 0;
+			mcl[1].args[3] = DOMID_SELF;
+			HYPERVISOR_multicall(mcl, 2);
+		}
+
 		pktp = (void *)(va + rx->offset);
 #ifdef XENNET_DEBUG_DUMP
 		xennet_hex_dump(pktp, rx->status, "r", rx->id);
@@ -1226,7 +1301,7 @@
 {
 	size_t i, j;
 
-	printf("pkt %p len %d/%x type %s id %d\n", pkt, len, len, type, id);
+	printf("pkt %p len %zd/%zx type %s id %d\n", pkt, len, len, type, id);
 	printf("00000000  ");
 	for(i=0; i<len; i++) {
 		printf("%c%c ", XCHR(pkt[i]>>4), XCHR(pkt[i]));

Reply via email to