Module Name: src
Committed By: cherry
Date: Tue Jul 6 15:00:10 UTC 2010
Modified Files:
src/sys/arch/amd64/conf: XEN3_DOM0 XEN3_DOMU
src/sys/arch/i386/conf: XEN3_DOM0 XEN3_DOMU
src/sys/arch/xen/conf: files.xen
src/sys/arch/xen/xenbus: xenbus_probe.c
Added Files:
src/sys/arch/xen/include: balloon.h
src/sys/arch/xen/xen: balloon.c
Log Message:
The Xen balloon driver enables growing and shrinking
PV domains on the fly, by collaborating with UVM and the hypervisor
To generate a diff of this commit:
cvs rdiff -u -r1.55 -r1.56 src/sys/arch/amd64/conf/XEN3_DOM0
cvs rdiff -u -r1.23 -r1.24 src/sys/arch/amd64/conf/XEN3_DOMU
cvs rdiff -u -r1.35 -r1.36 src/sys/arch/i386/conf/XEN3_DOM0
cvs rdiff -u -r1.24 -r1.25 src/sys/arch/i386/conf/XEN3_DOMU
cvs rdiff -u -r1.107 -r1.108 src/sys/arch/xen/conf/files.xen
cvs rdiff -u -r0 -r1.1 src/sys/arch/xen/include/balloon.h
cvs rdiff -u -r0 -r1.1 src/sys/arch/xen/xen/balloon.c
cvs rdiff -u -r1.27 -r1.28 src/sys/arch/xen/xenbus/xenbus_probe.c
Please note that diffs are not public domain; they are subject to the
copyright notices on the relevant files.
Modified files:
Index: src/sys/arch/amd64/conf/XEN3_DOM0
diff -u src/sys/arch/amd64/conf/XEN3_DOM0:1.55 src/sys/arch/amd64/conf/XEN3_DOM0:1.56
--- src/sys/arch/amd64/conf/XEN3_DOM0:1.55 Sat Jun 26 13:08:37 2010
+++ src/sys/arch/amd64/conf/XEN3_DOM0 Tue Jul 6 15:00:09 2010
@@ -1,4 +1,4 @@
-# $NetBSD: XEN3_DOM0,v 1.55 2010/06/26 13:08:37 bouyer Exp $
+# $NetBSD: XEN3_DOM0,v 1.56 2010/07/06 15:00:09 cherry Exp $
include "arch/amd64/conf/std.xen"
@@ -12,6 +12,8 @@
#
options DOM0OPS
+#options XEN_BALLOON # Xen memory ballooning - Experimental
+
# boot messages with MPBIOS, acpi and ioapic can be quite large
options MSGBUFSIZE=24576
Index: src/sys/arch/amd64/conf/XEN3_DOMU
diff -u src/sys/arch/amd64/conf/XEN3_DOMU:1.23 src/sys/arch/amd64/conf/XEN3_DOMU:1.24
--- src/sys/arch/amd64/conf/XEN3_DOMU:1.23 Sat May 8 22:16:25 2010
+++ src/sys/arch/amd64/conf/XEN3_DOMU Tue Jul 6 15:00:09 2010
@@ -1,4 +1,4 @@
-# $NetBSD: XEN3_DOMU,v 1.23 2010/05/08 22:16:25 mrg Exp $
+# $NetBSD: XEN3_DOMU,v 1.24 2010/07/06 15:00:09 cherry Exp $
include "arch/amd64/conf/std.xen"
@@ -13,6 +13,7 @@
#
options MAXPHYS=32768 #xbd doesn't handle 64k transfers
#options DOM0OPS
+#options XEN_BALLOON # Xen memory ballooning - Experimental
#options VM86 # virtual 8086 emulation
#options USER_LDT # user-settable LDT; used by WINE
Index: src/sys/arch/i386/conf/XEN3_DOM0
diff -u src/sys/arch/i386/conf/XEN3_DOM0:1.35 src/sys/arch/i386/conf/XEN3_DOM0:1.36
--- src/sys/arch/i386/conf/XEN3_DOM0:1.35 Sat Jun 26 13:08:37 2010
+++ src/sys/arch/i386/conf/XEN3_DOM0 Tue Jul 6 15:00:09 2010
@@ -1,4 +1,4 @@
-# $NetBSD: XEN3_DOM0,v 1.35 2010/06/26 13:08:37 bouyer Exp $
+# $NetBSD: XEN3_DOM0,v 1.36 2010/07/06 15:00:09 cherry Exp $
#
# XEN3_0: Xen 3.0 domain0 kernel
@@ -23,6 +23,7 @@
# making MCLBYTES = PAGE_SIZE avoids a copy when a mbuf cluster is sent
# to a domU, at the expense of a higher memory usage by the network stack.
#options MCLSHIFT=12
+#options XEN_BALLOON # Xen memory ballooning - Experimental
makeoptions CPUFLAGS="-march=i686"
Index: src/sys/arch/i386/conf/XEN3_DOMU
diff -u src/sys/arch/i386/conf/XEN3_DOMU:1.24 src/sys/arch/i386/conf/XEN3_DOMU:1.25
--- src/sys/arch/i386/conf/XEN3_DOMU:1.24 Sat May 8 22:16:28 2010
+++ src/sys/arch/i386/conf/XEN3_DOMU Tue Jul 6 15:00:09 2010
@@ -1,4 +1,4 @@
-# $NetBSD: XEN3_DOMU,v 1.24 2010/05/08 22:16:28 mrg Exp $
+# $NetBSD: XEN3_DOMU,v 1.25 2010/07/06 15:00:09 cherry Exp $
include "arch/xen/conf/std.xen"
@@ -13,6 +13,7 @@
#
options XEN
#options DOM0OPS
+#options XEN_BALLOON # Xen memory ballooning - Experimental
makeoptions CPUFLAGS="-march=i686"
Index: src/sys/arch/xen/conf/files.xen
diff -u src/sys/arch/xen/conf/files.xen:1.107 src/sys/arch/xen/conf/files.xen:1.108
--- src/sys/arch/xen/conf/files.xen:1.107 Mon May 10 18:46:58 2010
+++ src/sys/arch/xen/conf/files.xen Tue Jul 6 15:00:09 2010
@@ -1,4 +1,4 @@
-# $NetBSD: files.xen,v 1.107 2010/05/10 18:46:58 dyoung Exp $
+# $NetBSD: files.xen,v 1.108 2010/07/06 15:00:09 cherry Exp $
# NetBSD: files.x86,v 1.10 2003/10/08 17:30:00 bouyer Exp
# NetBSD: files.i386,v 1.254 2004/03/25 23:32:10 jmc Exp
@@ -396,7 +396,7 @@
include "dev/pcmcia/files.pcmcia"
# Domain-0 operations
-defflag opt_xen.h DOM0OPS XEN_COMPAT_030001
+defflag opt_xen.h DOM0OPS XEN_COMPAT_030001 XEN_BALLOON
file arch/xen/xen/privcmd.c dom0ops
file arch/xen/x86/xen_shm_machdep.c dom0ops
file arch/x86/pci/pci_machdep.c hypervisor & pci & dom0ops
@@ -406,6 +406,7 @@
file arch/xen/xen/xennetback_xenbus.c xvif
file arch/xen/xen/xennet_checksum.c xvif | xennet
file arch/xen/xen/xbdback_xenbus.c xbdback
+file arch/xen/xen/balloon.c hypervisor
ifdef i386
include "arch/i386/conf/majors.i386"
Index: src/sys/arch/xen/xenbus/xenbus_probe.c
diff -u src/sys/arch/xen/xenbus/xenbus_probe.c:1.27 src/sys/arch/xen/xenbus/xenbus_probe.c:1.28
--- src/sys/arch/xen/xenbus/xenbus_probe.c:1.27 Fri Jan 9 22:26:25 2009
+++ src/sys/arch/xen/xenbus/xenbus_probe.c Tue Jul 6 15:00:10 2010
@@ -1,4 +1,4 @@
-/* $NetBSD: xenbus_probe.c,v 1.27 2009/01/09 22:26:25 jym Exp $ */
+/* $NetBSD: xenbus_probe.c,v 1.28 2010/07/06 15:00:10 cherry Exp $ */
/******************************************************************************
* Talks to Xen Store to figure out what devices we have.
*
@@ -29,7 +29,7 @@
*/
#include <sys/cdefs.h>
-__KERNEL_RCSID(0, "$NetBSD: xenbus_probe.c,v 1.27 2009/01/09 22:26:25 jym Exp $");
+__KERNEL_RCSID(0, "$NetBSD: xenbus_probe.c,v 1.28 2010/07/06 15:00:10 cherry Exp $");
#if 0
#define DPRINTK(fmt, args...) \
@@ -55,6 +55,10 @@
#include <xen/evtchn.h>
#include <xen/shutdown_xenbus.h>
+#ifdef XEN_BALLOON
+#include <xen/balloon.h>
+#endif
+
#include "xenbus_comms.h"
extern struct semaphore xenwatch_mutex;
@@ -514,6 +518,10 @@
register_xenbus_watch(&be_watch);
shutdown_xenbus_setup();
+#ifdef XEN_BALLOON
+ balloon_xenbus_setup();
+#endif
+
/* Notify others that xenstore is up */
//notifier_call_chain(&xenstore_chain, 0, NULL);
}
Added files:
Index: src/sys/arch/xen/include/balloon.h
diff -u /dev/null src/sys/arch/xen/include/balloon.h:1.1
--- /dev/null Tue Jul 6 15:00:10 2010
+++ src/sys/arch/xen/include/balloon.h Tue Jul 6 15:00:09 2010
@@ -0,0 +1,37 @@
+/* $NetBSD: balloon.h,v 1.1 2010/07/06 15:00:09 cherry Exp $ */
+
+/*-
+ * Copyright (c) 2010 The NetBSD Foundation, Inc.
+ * All rights reserved.
+ *
+ * This code is derived from software contributed to The NetBSD Foundation
+ * by Cherry G. Mathew <[email protected]>
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE NETBSD FOUNDATION, INC. AND CONTRIBUTORS
+ * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
+ * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
+ * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE FOUNDATION OR CONTRIBUTORS
+ * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
+ * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
+ * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
+ * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
+ * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
+ * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
+ * POSSIBILITY OF SUCH DAMAGE.
+ */
+
+#ifndef _XEN_BALLOON_H
+#define _XEN_BALLOON_H
+
+void balloon_xenbus_setup(void);
+
+#endif /* _XEN_BALLOON_H */
Index: src/sys/arch/xen/xen/balloon.c
diff -u /dev/null src/sys/arch/xen/xen/balloon.c:1.1
--- /dev/null Tue Jul 6 15:00:10 2010
+++ src/sys/arch/xen/xen/balloon.c Tue Jul 6 15:00:09 2010
@@ -0,0 +1,877 @@
+/* $NetBSD: balloon.c,v 1.1 2010/07/06 15:00:09 cherry Exp $ */
+
+/*-
+ * Copyright (c) 2010 The NetBSD Foundation, Inc.
+ * All rights reserved.
+ *
+ * This code is derived from software contributed to The NetBSD Foundation
+ * by Cherry G. Mathew <[email protected]> and
+ * Jean-Yves Migeon <[email protected]>
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE NETBSD FOUNDATION, INC. AND CONTRIBUTORS
+ * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
+ * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
+ * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE FOUNDATION OR CONTRIBUTORS
+ * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
+ * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
+ * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
+ * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
+ * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
+ * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
+ * POSSIBILITY OF SUCH DAMAGE.
+ */
+
+/*
+ * The Xen balloon driver enables growing and shrinking PV
+ * domains on the fly, by allocating and freeing memory directly.
+ */
+
+#define BALLOONDEBUG 1
+
+/*
+ * sysctl TODOs:
+ * xen.balloon
+ * xen.balloon.current: DONE
+ * xen.balloon.target: DONE
+ * xen.balloon.low-balloon: In Progress
+ * xen.balloon.high-balloon: In Progress
+ * xen.balloon.limit: XXX
+ *
+ * sysctl labels = { 'current' : 'Current allocation',
+ * 'target' : 'Requested target',
+ * 'low-balloon' : 'Low-mem balloon',
+ * 'high-balloon' : 'High-mem balloon',
+ * 'limit' : 'Xen hard limit' }
+ *
+ */
+
+#include <sys/cdefs.h>
+__KERNEL_RCSID(0, "$NetBSD: balloon.c,v 1.1 2010/07/06 15:00:09 cherry Exp $");
+
+#include <sys/inttypes.h>
+#include <sys/param.h>
+
+#include <sys/condvar.h>
+#include <sys/kernel.h>
+#include <sys/kmem.h>
+#include <sys/kthread.h>
+#include <sys/mutex.h>
+#include <sys/queue.h>
+#include <sys/sysctl.h>
+
+#include <xen/xen.h>
+#include <xen/xenbus.h>
+#include <xen/balloon.h>
+
+#include <uvm/uvm.h>
+#include <uvm/uvm_extern.h>
+#include <xen/xenpmap.h>
+
+#define BALLOONINTERVALMS 100 /* milliseconds */
+
+#define BALLOON_DELTA 1024 /* The maximum increments allowed in a
+ * single call of balloon_inflate() or
+ * balloon_deflate
+ */
+#define BALLOON_RETRIES 4 /* Number of time every (in|de)flate of
+ * BALLOON_DELTA or less, occurs
+ */
+
+/* XXX: fix limits */
+#define BALLOON_BALLAST 256 /* In pages */
+#define XEN_RESERVATION_MIN (uvmexp.freemin + BALLOON_BALLAST) /* In pages */
+#define XEN_RESERVATION_MAX nkmempages /* In pages */
+
+/* Forward declaration */
+static void xenbus_balloon_watcher(struct xenbus_watch *, const char **,
+ unsigned int);
+
+struct balloon_page_entry {
+ struct vm_page *pg;
+ SLIST_ENTRY(balloon_page_entry) entry;
+};
+
+static struct balloon_conf {
+ kmutex_t flaglock; /* Protects condvar (below) */
+ kcondvar_t cv_memchanged; /* Notifier flag for target (below) */
+
+ kmutex_t tgtlock; /* Spin lock, protects .target, below */
+ size_t target; /* Target VM reservation size, in pages. */
+
+ /* The following are not protected by above locks */
+ SLIST_HEAD(, balloon_page_entry) balloon_page_entries;
+ size_t balloon_num_page_entries;
+
+ /* Balloon limits */
+ size_t xen_res_min;
+ size_t xen_res_max;
+} balloon_conf;
+
+static struct xenbus_watch xenbus_balloon_watch = {
+ .node = __UNCONST("memory/target"),
+ .xbw_callback = xenbus_balloon_watcher,
+};
+
+static uint64_t sysctl_current;
+static uint64_t sysctl_target;
+
+/* List of MFNs for inflating/deflating balloon */
+static xen_pfn_t *mfn_lista;
+
+/* Returns zero, on error */
+static size_t
+xenmem_get_maxreservation(void)
+{
+#if 0 /* XXX: Fix this call */
+ int s, ret;
+
+ s = splvm();
+ ret = HYPERVISOR_memory_op(XENMEM_maximum_reservation,
+ & (domid_t) { DOMID_SELF });
+
+ splx(s);
+
+ if (ret < 0) {
+ panic("Could not obtain hypervisor max reservation for VM\n");
+ return 0;
+ }
+
+ return ret;
+#else
+ return nkmempages;
+#endif
+}
+
+/* Returns zero, on error */
+static size_t
+xenmem_get_currentreservation(void)
+{
+ int s, ret;
+
+ s = splvm();
+ ret = HYPERVISOR_memory_op(XENMEM_current_reservation,
+ & (domid_t) { DOMID_SELF });
+ splx(s);
+
+ if (ret < 0) {
+ panic("Could not obtain hypervisor current "
+ "reservation for VM\n");
+ return 0;
+ }
+
+ return ret;
+}
+
+/*
+ * The target value is managed in 3 variables:
+ * a) Incoming xenbus copy, maintained by the hypervisor.
+ * b) sysctl_target: This is an incoming target value via the
+ * sysctl(9) interface.
+ * c) balloon_conf.target
+ * This is the canonical current target that the driver tries to
+ * attain.
+ *
+ */
+
+
+static size_t
+xenbus_balloon_read_target(void)
+{
+ unsigned long long new_target;
+
+ if (0 != xenbus_read_ull(NULL, "memory", "target", &new_target, 0)) {
+ printf("error, couldn't read xenbus target node\n");
+ return 0;
+ }
+
+ /* Convert to npages */
+
+ return new_target * 1024 / PAGE_SIZE;
+}
+
+static void
+xenbus_balloon_write_target(unsigned long long new_target)
+{
+
+ /* Convert to KB */
+ new_target = new_target * PAGE_SIZE / 1024;
+
+ if (0 != xenbus_printf(NULL, "memory", "target", "%llu", new_target)) {
+ printf("error, couldn't write xenbus target node\n");
+ }
+
+ return;
+}
+
+static size_t
+balloon_get_target(void)
+{
+ size_t target;
+
+ mutex_spin_enter(&balloon_conf.tgtlock);
+ target = balloon_conf.target;
+ mutex_spin_exit(&balloon_conf.tgtlock);
+
+ return target;
+
+}
+
+static void
+balloon_set_target(size_t target)
+{
+
+ mutex_spin_enter(&balloon_conf.tgtlock);
+ balloon_conf.target = target;
+ mutex_spin_exit(&balloon_conf.tgtlock);
+
+ return;
+
+}
+
+/*
+ * This is the special case where, due to the driver not reaching
+ * current balloon_conf.target, a new value is internally calculated
+ * and fed back to both the sysctl and the xenbus interfaces,
+ * described above.
+ */
+static void
+balloon_feedback_target(size_t newtarget)
+{
+ /* Notify XenStore. */
+ xenbus_balloon_write_target(newtarget);
+ /* Update sysctl value XXX: Locking ? */
+ sysctl_target = newtarget;
+
+ /* Finally update our private copy */
+ balloon_set_target(newtarget);
+}
+
+
+/* Number of pages currently used up by balloon */
+static size_t
+balloon_reserve(void)
+{
+ return balloon_conf.balloon_num_page_entries;
+}
+
+static size_t
+reserve_pages(size_t npages, xen_pfn_t *mfn_list)
+{
+
+ int s;
+
+ struct vm_page *pg;
+ struct balloon_page_entry *bpg_entry;
+ size_t rpages;
+ paddr_t pa;
+
+ for (rpages = 0; rpages < npages; rpages++) {
+
+ pg = uvm_pagealloc(NULL, 0, NULL,
+ UVM_PGA_ZERO);
+
+ if (pg == NULL) {
+ break;
+ }
+
+ pa = VM_PAGE_TO_PHYS(pg);
+
+ mfn_list[rpages] = xpmap_ptom(pa) >> PAGE_SHIFT;
+
+ s = splvm();
+
+ /* Invalidate pg */
+ xpmap_phys_to_machine_mapping[
+ (pa - XPMAP_OFFSET) >> PAGE_SHIFT
+ ] = INVALID_P2M_ENTRY;
+
+ splx(s);
+
+ /* Save mfn */
+ /*
+ * XXX: We don't keep a copy, but just save a pointer
+ * to the uvm pg handle. Is this ok ?
+ */
+
+ bpg_entry = kmem_alloc(sizeof *bpg_entry, KM_SLEEP);
+
+ if (bpg_entry == NULL) {
+ uvm_pagefree(pg);
+ break;
+ }
+
+ bpg_entry->pg = pg;
+
+ SLIST_INSERT_HEAD(&balloon_conf.balloon_page_entries,
+ bpg_entry, entry);
+ balloon_conf.balloon_num_page_entries++;
+ }
+
+ return rpages;
+}
+
+static size_t
+unreserve_pages(size_t ret, xen_pfn_t *mfn_list)
+{
+
+ int s;
+ size_t npages;
+ paddr_t pa;
+ struct vm_page *pg;
+ struct balloon_page_entry *bpg_entry;
+
+ for (npages = 0; npages < ret; npages++) {
+
+ if (SLIST_EMPTY(&balloon_conf.balloon_page_entries)) {
+ /*
+ * XXX: This is the case where extra "hot-plug"
+ * mem w.r.t boot comes in
+ */
+ printf("Balloon is empty. can't be collapsed further!");
+ break;
+ }
+
+ bpg_entry = SLIST_FIRST(&balloon_conf.balloon_page_entries);
+ SLIST_REMOVE_HEAD(&balloon_conf.balloon_page_entries, entry);
+ balloon_conf.balloon_num_page_entries--;
+
+ pg = bpg_entry->pg;
+
+ kmem_free(bpg_entry, sizeof *bpg_entry);
+
+ s = splvm();
+
+ /* Update P->M */
+ pa = VM_PAGE_TO_PHYS(pg);
+ xpmap_phys_to_machine_mapping[
+ (pa - XPMAP_OFFSET) >> PAGE_SHIFT] = mfn_list[npages];
+
+ xpq_queue_machphys_update(
+ ((paddr_t) (mfn_list[npages])) << PAGE_SHIFT, pa);
+
+ xpq_flush_queue();
+
+ /* Free it to UVM */
+ uvm_pagefree(pg);
+
+ splx(s);
+ }
+
+ return npages;
+}
+
+static void
+balloon_inflate(size_t tpages)
+{
+
+ int s, ret;
+ size_t npages, respgcnt;
+
+ struct xen_memory_reservation reservation = {
+ .address_bits = 0,
+ .extent_order = 0,
+ .domid = DOMID_SELF
+ };
+
+
+ npages = xenmem_get_currentreservation();
+ KASSERT (npages > tpages);
+ npages -= tpages;
+
+
+ KASSERT(npages > 0);
+ KASSERT(npages <= BALLOON_DELTA);
+
+ memset(mfn_lista, 0, BALLOON_DELTA * sizeof *mfn_lista);
+
+ /*
+ * There's a risk that npages might overflow ret.
+ * Do this is smaller steps then.
+ * See: HYPERVISOR_memory_op(...) below....
+ */
+
+ if (npages > XEN_RESERVATION_MAX) {
+ return;
+ }
+
+ respgcnt = reserve_pages(npages, mfn_lista);
+
+ if (respgcnt == 0) {
+ return;
+ }
+ /* Hand over pages to Hypervisor */
+ xenguest_handle(reservation.extent_start) = mfn_lista;
+ reservation.nr_extents = respgcnt;
+
+ s = splvm();
+ ret = HYPERVISOR_memory_op(XENMEM_decrease_reservation, &reservation);
+ splx(s);
+
+ if (ret > 0 && ret != respgcnt) {
+#if BALLOONDEBUG
+ printf("decrease reservation incomplete\n");
+#endif
+ /* Unroll loop and release page frames back to the OS. */
+ KASSERT(respgcnt > ret);
+ if ((respgcnt - ret) !=
+ unreserve_pages(respgcnt - ret, mfn_lista + ret)) {
+ panic("Could not unreserve balloon pages in "
+ "inflate incomplete path!");
+ }
+
+ return;
+ }
+
+#if BALLOONDEBUG
+ printf("inflated by %d\n", ret);
+#endif
+ return;
+}
+
+static void
+balloon_deflate(size_t tpages)
+{
+
+ int s, ret;
+ size_t npages, pgmax, pgcur;
+
+ struct xen_memory_reservation reservation = {
+ .address_bits = 0,
+ .extent_order = 0,
+ .domid = DOMID_SELF
+ };
+
+
+ /*
+ * Trim npages, if it has exceeded the hard limit
+ */
+ pgmax = xenmem_get_maxreservation();
+
+ KASSERT(pgmax > 0);
+
+ pgcur = xenmem_get_currentreservation();
+
+ KASSERT(pgcur > 0);
+
+ pgmax -= pgcur;
+
+ KASSERT(tpages > pgcur);
+ npages = tpages - pgcur;
+
+ /*
+ * There's a risk that npages might overflow ret.
+ * Do this in smaller steps then.
+ * See: HYPERVISOR_memory_op(...) below....
+ */
+
+ KASSERT(npages > 0);
+ KASSERT(npages <= BALLOON_DELTA);
+
+ memset(mfn_lista, 0, BALLOON_DELTA * sizeof *mfn_lista);
+
+ if (npages > XEN_RESERVATION_MAX) {
+ return;
+ }
+
+ if (npages > pgmax) {
+ return;
+ }
+
+ /*
+ * Check to see if we're deflating beyond empty.
+ * This is currently unsupported. XXX: See if we can
+ * "hot-plug" these extra pages into uvm(9)
+ */
+
+ if (npages > balloon_reserve()) {
+ npages = balloon_reserve();
+
+#if BALLOONDEBUG
+ printf("\"hot-plug\" memory unsupported - clipping "
+ "reservation to %zd pages.\n", pgcur + npages);
+#endif
+ if (!npages) { /* Nothing to do */
+ return;
+ }
+ }
+
+ xenguest_handle(reservation.extent_start) = mfn_lista;
+ reservation.nr_extents = npages;
+
+ s = splvm();
+ ret = HYPERVISOR_memory_op(XENMEM_increase_reservation, &reservation);
+ splx(s);
+
+ if (ret <= 0) {
+ printf("%s: Increase reservation failed.\n",
+ __FILE__);
+
+ return;
+ }
+
+ npages = unreserve_pages(ret, mfn_lista);
+
+#if BALLOONDEBUG
+ printf("deflated by %zu\n", npages);
+#endif
+
+ return;
+
+}
+
+/*
+ * Synchronous call that resizes reservation
+ */
+static void
+balloon_resize(size_t targetpages)
+{
+
+ size_t currentpages;
+
+ /* Get current number of pages */
+ currentpages = xenmem_get_currentreservation();
+
+ KASSERT(currentpages > 0);
+
+ if (targetpages == currentpages) {
+ return;
+ }
+
+#if BALLOONDEBUG
+ printf("Current pages == %zu\n", currentpages);
+#endif
+
+ /* Increase or decrease, accordingly */
+ if (targetpages > currentpages) {
+ balloon_deflate(targetpages);
+ } else {
+ balloon_inflate(targetpages);
+ }
+
+ return;
+}
+
+static void
+balloon_thread(void *ignore)
+{
+
+ int i = 0, deltachunk = 0, pollticks;
+ size_t current, tgtcache;
+ ssize_t delta = 0; /* The balloon increment size */
+
+ pollticks = mstohz(BALLOONINTERVALMS);
+
+ /*
+ * Get target. This will ensure that the wait loop (below)
+ * won't break out until the target is set properly for the
+ * first time. The value of targetinprogress is probably
+ * rubbish.
+ */
+
+ for/*ever*/ (;;) {
+
+ mutex_enter(&balloon_conf.flaglock);
+
+ while (!(delta = balloon_get_target() -
+ (current = xenmem_get_currentreservation()))) {
+
+ if (EWOULDBLOCK ==
+ cv_timedwait(&balloon_conf.cv_memchanged,
+ &balloon_conf.flaglock,
+ pollticks)) {
+ /*
+ * Get a bit more lethargic. Rollover
+ * is ok.
+ */
+ pollticks += mstohz(BALLOONINTERVALMS);
+
+ } else { /* activity! Poll fast! */
+ pollticks = mstohz(BALLOONINTERVALMS);
+ }
+ }
+
+ KASSERT(delta <= INT_MAX && delta >= INT_MIN); /* int abs(int); */
+ KASSERT(abs(delta) < XEN_RESERVATION_MAX);
+
+ if (delta >= 0) {
+ deltachunk = MIN(BALLOON_DELTA, delta);
+ } else {
+ deltachunk = MAX(-BALLOON_DELTA, delta);
+ }
+
+ tgtcache = current + deltachunk;
+
+ if (deltachunk && i >= BALLOON_RETRIES) {
+ tgtcache = xenmem_get_currentreservation();
+ balloon_feedback_target(tgtcache);
+ if (i > BALLOON_RETRIES) {
+ /* Perhaps the "feedback" failed ? */
+ panic("Multiple Balloon retry resets.\n");
+ }
+
+#if BALLOONDEBUG
+ printf("Aborted new target at %d tries\n", i);
+ printf("Fed back new target value %zu\n", tgtcache);
+ printf("delta == %zd\n", delta);
+ printf("deltachunk == %d\n", deltachunk);
+#endif
+
+ } else {
+
+#if BALLOONDEBUG
+ printf("new target ==> %zu\n", tgtcache);
+#endif
+ balloon_resize(tgtcache);
+ }
+
+ current = xenmem_get_currentreservation();
+
+ /*
+ * Every deltachunk gets a fresh set of
+ * BALLOON_RETRIES
+ */
+ i = (current != tgtcache) ? i + 1 : 0;
+
+ mutex_exit(&balloon_conf.flaglock);
+
+ }
+
+}
+
+static void
+xenbus_balloon_watcher(struct xenbus_watch *watch, const char **vec,
+ unsigned int len)
+{
+ size_t new_target;
+
+ if (0 == (new_target = (size_t) xenbus_balloon_read_target())) {
+ /* Don't update target value */
+ return;
+ }
+#if BALLOONDEBUG
+ if (new_target < balloon_conf.xen_res_min ||
+ new_target > balloon_conf.xen_res_max) {
+ printf("Requested target is unacceptable.\n");
+ return;
+ }
+#endif
+
+ /*
+ * balloon_set_target() calls
+ * xenbus_balloon_write_target(). Not sure if this is racy
+ */
+ balloon_set_target(new_target);
+
+#if BALLOONDEBUG
+ printf("Setting target to %zu\n", new_target);
+ printf("Current reservation is %zu\n", xenmem_get_currentreservation());
+#endif
+
+ /* Notify balloon thread, if we can. */
+ if (mutex_tryenter(&balloon_conf.flaglock)) {
+ cv_signal(&balloon_conf.cv_memchanged);
+ mutex_exit(&balloon_conf.flaglock);
+ }
+
+ return;
+}
+
+void
+balloon_xenbus_setup(void)
+{
+
+ size_t currentpages;
+
+ /* Allocate list of MFNs for inflating/deflating balloon */
+ mfn_lista = kmem_alloc(BALLOON_DELTA * sizeof *mfn_lista, KM_NOSLEEP);
+ if (mfn_lista == NULL) {
+ aprint_error("%s: could not allocate mfn_lista\n", __func__);
+ return;
+ }
+
+ /* Setup flaglocks, condvars et. al */
+ mutex_init(&balloon_conf.flaglock, MUTEX_DEFAULT, IPL_NONE);
+ mutex_init(&balloon_conf.tgtlock, MUTEX_DEFAULT, IPL_HIGH);
+ cv_init(&balloon_conf.cv_memchanged, "ballooning");
+
+ SLIST_INIT(&balloon_conf.balloon_page_entries);
+ balloon_conf.balloon_num_page_entries = 0;
+
+ /* Deliberately not-constified for future extensibility */
+ balloon_conf.xen_res_min = XEN_RESERVATION_MIN;
+ balloon_conf.xen_res_max = XEN_RESERVATION_MAX;
+
+#if BALLOONDEBUG
+ printf("uvmexp.freemin == %d\n", uvmexp.freemin);
+ printf("xen_res_min == %zu\n", balloon_conf.xen_res_min);
+ printf("xen_res_max == %zu\n", balloon_conf.xen_res_max);
+#endif
+ /* Get current number of pages */
+ currentpages = xenmem_get_currentreservation();
+
+ KASSERT(currentpages > 0);
+
+ /* Update initial target value */
+ balloon_set_target(currentpages);
+
+ /*
+ * Initialise the sysctl_xxx copies of target and current
+ * as above, because sysctl inits before balloon_xenbus_setup()
+ */
+ sysctl_target = sysctl_current = currentpages;
+
+ /* Setup xenbus node watch callback */
+ if (register_xenbus_watch(&xenbus_balloon_watch)) {
+ aprint_error("%s: unable to watch memory/target\n", __func__);
+ cv_destroy(&balloon_conf.cv_memchanged);
+ mutex_destroy(&balloon_conf.tgtlock);
+ mutex_destroy(&balloon_conf.flaglock);
+ kmem_free(mfn_lista, BALLOON_DELTA * sizeof *mfn_lista);
+ mfn_lista = NULL;
+ return;
+
+ }
+
+ /* Setup kernel thread to asynchronously (in/de)-flate the balloon */
+ if (kthread_create(PRI_NONE, KTHREAD_MPSAFE, NULL, balloon_thread,
+ NULL /* arg */, NULL, "balloon")) {
+ aprint_error("%s: unable to create balloon thread\n", __func__);
+ unregister_xenbus_watch(&xenbus_balloon_watch);
+ cv_destroy(&balloon_conf.cv_memchanged);
+ mutex_destroy(&balloon_conf.tgtlock);
+ mutex_destroy(&balloon_conf.flaglock);
+ }
+
+ return;
+
+}
+
+
+/*
+ * sysctl(9) stuff
+ */
+
+/* sysctl helper routine */
+static int
+sysctl_kern_xen_balloon(SYSCTLFN_ARGS)
+{
+
+ struct sysctlnode node;
+
+ /*
+ * Assumes SIZE_T_MAX <= ((uint64_t) -1) see createv() in
+ * SYSCTL_SETUP(...) below
+ */
+
+ int error;
+ int64_t node_val;
+
+ KASSERT(rnode != NULL);
+ node = *rnode;
+
+ if (strcmp(node.sysctl_name, "current") == 0) {
+ node_val = xenmem_get_currentreservation();
+ node.sysctl_data = &node_val;
+ return sysctl_lookup(SYSCTLFN_CALL(&node));
+
+ } else if (strcmp(node.sysctl_name, "target") == 0) {
+ node_val = * (int64_t *) rnode->sysctl_data;
+ node.sysctl_data = &node_val;
+ error = sysctl_lookup(SYSCTLFN_CALL(&node));
+ if (error != 0) {
+ return error;
+ }
+
+ /* Sanity check new size */
+ if (node_val < XEN_RESERVATION_MIN ||
+ node_val > XEN_RESERVATION_MAX ) {
+#if BALLOONDEBUG
+ printf("node_val out of range.\n");
+ printf("node_val = %"PRIu64"\n", node_val);
+#endif
+ return EINVAL;
+ }
+
+#if BALLOONDEBUG
+ printf("node_val = %"PRIu64"\n", node_val);
+#endif
+
+ if (node_val != balloon_get_target()) {
+ * (int64_t *) rnode->sysctl_data = node_val;
+
+#if BALLOONDEBUG
+ printf("setting to %" PRIu64"\n", node_val);
+#endif
+
+ balloon_set_target(node_val);
+
+ /* Notify balloon thread, if we can. */
+ if (mutex_tryenter(&balloon_conf.flaglock)) {
+ cv_signal(&balloon_conf.cv_memchanged);
+ mutex_exit(&balloon_conf.flaglock);
+ }
+
+ /* Notify XenStore. */
+ xenbus_balloon_write_target(node_val);
+ }
+
+ return 0;
+ }
+
+ return EINVAL;
+}
+
+/* Setup nodes. */
+SYSCTL_SETUP(sysctl_kern_xen_balloon_setup, "sysctl kern.xen.balloon setup")
+{
+ const struct sysctlnode *node = NULL;
+
+ sysctl_createv(clog, 0, NULL, &node,
+ CTLFLAG_PERMANENT,
+ CTLTYPE_NODE, "kern", NULL,
+ NULL, 0, NULL, 0,
+ CTL_KERN, CTL_EOL);
+
+ if (node == NULL) {
+ printf("sysctl create failed\n");
+ }
+
+ sysctl_createv(clog, 0, &node, &node,
+ CTLFLAG_PERMANENT,
+ CTLTYPE_NODE, "xen",
+ SYSCTL_DESCR("Xen top level node"),
+ NULL, 0, NULL, 0,
+ CTL_CREATE, CTL_EOL);
+
+ sysctl_createv(clog, 0, &node, &node,
+ CTLFLAG_PERMANENT,
+ CTLTYPE_NODE, "balloon",
+ SYSCTL_DESCR("Balloon details"),
+ NULL, 0, NULL, 0,
+ CTL_CREATE, CTL_EOL);
+
+ sysctl_createv(clog, 0, &node, NULL,
+ CTLFLAG_PERMANENT,
+ CTLTYPE_QUAD, "current",
+ SYSCTL_DESCR("current memory reservation from "
+ "hypervisor, in pages."),
+ sysctl_kern_xen_balloon, 0, &sysctl_current, 0,
+ CTL_CREATE, CTL_EOL);
+
+ sysctl_createv(clog, 0, &node, NULL,
+ CTLFLAG_PERMANENT | CTLFLAG_READWRITE,
+ CTLTYPE_QUAD, "target",
+ SYSCTL_DESCR("Target memory reservation to adjust "
+ "balloon size to, in pages"),
+ sysctl_kern_xen_balloon, 0, &sysctl_target, 0,
+ CTL_CREATE, CTL_EOL);
+}