Author: gibbs
Date: Fri Nov 15 16:35:28 2013
New Revision: 258178
URL: http://svnweb.freebsd.org/changeset/base/258178

Log:
  Improve robustness of the Xen balloon driver.
  
  sys/dev/xen/balloon/balloon.c:
        Remove unused and commented out code.
  
        Fix deadlock caused by performing a sleepable malloc
        while holding the balloon mutex.
  
        Perform proper accounting of the memory used by the domain.
  
  Submitted by: Roger Pau Monné
  Sponsored by: Citrix Systems R&D
  Reviewed by:  gibbs
  MFC after:    2 days

Modified:
  head/sys/dev/xen/balloon/balloon.c

Modified: head/sys/dev/xen/balloon/balloon.c
==============================================================================
--- head/sys/dev/xen/balloon/balloon.c  Fri Nov 15 16:16:55 2013        
(r258177)
+++ head/sys/dev/xen/balloon/balloon.c  Fri Nov 15 16:35:28 2013        
(r258178)
@@ -52,18 +52,13 @@ __FBSDID("$FreeBSD$");
 
 static MALLOC_DEFINE(M_BALLOON, "Balloon", "Xen Balloon Driver");
 
-struct mtx balloon_mutex;
+/* Convert from KB (as fetched from xenstore) to number of PAGES */
+#define KB_TO_PAGE_SHIFT       (PAGE_SHIFT - 10)
 
-/*
- * Protects atomic reservation decrease/increase against concurrent increases.
- * Also protects non-atomic updates of current_pages and driver_pages, and
- * balloon lists.
- */
-struct mtx balloon_lock;
+struct mtx balloon_mutex;
 
 /* We increase/decrease in batches which fit in a page */
 static unsigned long frame_list[PAGE_SIZE / sizeof(unsigned long)];
-#define ARRAY_SIZE(A)  (sizeof(A) / sizeof(A[0]))
 
 struct balloon_stats {
        /* We aim for 'current allocation' == 'target allocation'. */
@@ -116,15 +111,21 @@ static void balloon_process(void *unused
        printk(KERN_WARNING "xen_mem: " fmt, ##args)
 
 /* balloon_append: add the given page to the balloon. */
-static void 
+static int
 balloon_append(vm_page_t page)
 {
        struct balloon_entry *entry;
 
-       entry = malloc(sizeof(struct balloon_entry), M_BALLOON, M_WAITOK);
+       mtx_assert(&balloon_mutex, MA_OWNED);
+
+       entry = malloc(sizeof(struct balloon_entry), M_BALLOON, M_NOWAIT);
+       if (!entry)
+               return (ENOMEM);
        entry->page = page;
        STAILQ_INSERT_HEAD(&ballooned_pages, entry, list);
        bs.balloon_low++;
+
+       return (0);
 }
 
 /* balloon_retrieve: rescue a page from the balloon, if it is not empty. */
@@ -134,8 +135,10 @@ balloon_retrieve(void)
        vm_page_t page;
        struct balloon_entry *entry;
 
+       mtx_assert(&balloon_mutex, MA_OWNED);
+
        if (STAILQ_EMPTY(&ballooned_pages))
-               return NULL;
+               return (NULL);
 
        entry = STAILQ_FIRST(&ballooned_pages);
        STAILQ_REMOVE_HEAD(&ballooned_pages, list);
@@ -145,7 +148,7 @@ balloon_retrieve(void)
        
        bs.balloon_low--;
 
-       return page;
+       return (page);
 }
 
 static unsigned long 
@@ -154,21 +157,22 @@ current_target(void)
        unsigned long target = min(bs.target_pages, bs.hard_limit);
        if (target > (bs.current_pages + bs.balloon_low + bs.balloon_high))
                target = bs.current_pages + bs.balloon_low + bs.balloon_high;
-       return target;
+       return (target);
 }
 
 static unsigned long
 minimum_target(void)
 {
 #ifdef XENHVM
-#define max_pfn physmem
+#define max_pfn realmem
 #else
 #define max_pfn HYPERVISOR_shared_info->arch.max_pfn
 #endif
        unsigned long min_pages, curr_pages = current_target();
 
 #define MB2PAGES(mb) ((mb) << (20 - PAGE_SHIFT))
-       /* Simple continuous piecewiese linear function:
+       /*
+        * Simple continuous piecewiese linear function:
         *  max MiB -> min MiB  gradient
         *       0         0
         *      16        16
@@ -189,12 +193,10 @@ minimum_target(void)
        else
                min_pages = MB2PAGES(296) + (max_pfn >> 5);
 #undef MB2PAGES
+#undef max_pfn
 
        /* Don't enforce growth */
-       return min(min_pages, curr_pages);
-#ifndef CONFIG_XEN
-#undef max_pfn
-#endif
+       return (min(min_pages, curr_pages));
 }
 
 static int 
@@ -210,10 +212,10 @@ increase_reservation(unsigned long nr_pa
                .domid        = DOMID_SELF
        };
 
-       if (nr_pages > ARRAY_SIZE(frame_list))
-               nr_pages = ARRAY_SIZE(frame_list);
+       mtx_assert(&balloon_mutex, MA_OWNED);
 
-       mtx_lock(&balloon_lock);
+       if (nr_pages > nitems(frame_list))
+               nr_pages = nitems(frame_list);
 
        for (entry = STAILQ_FIRST(&ballooned_pages), i = 0;
             i < nr_pages; i++, entry = STAILQ_NEXT(entry, list)) {
@@ -253,33 +255,14 @@ increase_reservation(unsigned long nr_pa
 
                set_phys_to_machine(pfn, frame_list[i]);
 
-#if 0
-#ifndef XENHVM
-               /* Link back into the page tables if not highmem. */
-               if (pfn < max_low_pfn) {
-                       int ret;
-                       ret = HYPERVISOR_update_va_mapping(
-                               (unsigned long)__va(pfn << PAGE_SHIFT),
-                               pfn_pte_ma(frame_list[i], PAGE_KERNEL),
-                               0);
-                       PASSING(ret == 0,
-                           ("HYPERVISOR_update_va_mapping failed"));
-               }
-#endif
-#endif
-
-               /* Relinquish the page back to the allocator. */
                vm_page_unwire(page, 0);
                vm_page_free(page);
        }
 
        bs.current_pages += nr_pages;
-       //totalram_pages = bs.current_pages;
 
  out:
-       mtx_unlock(&balloon_lock);
-
-       return 0;
+       return (0);
 }
 
 static int
@@ -295,8 +278,10 @@ decrease_reservation(unsigned long nr_pa
                .domid        = DOMID_SELF
        };
 
-       if (nr_pages > ARRAY_SIZE(frame_list))
-               nr_pages = ARRAY_SIZE(frame_list);
+       mtx_assert(&balloon_mutex, MA_OWNED);
+
+       if (nr_pages > nitems(frame_list))
+               nr_pages = nitems(frame_list);
 
        for (i = 0; i < nr_pages; i++) {
                if ((page = vm_page_alloc(NULL, 0, 
@@ -310,39 +295,15 @@ decrease_reservation(unsigned long nr_pa
                pfn = (VM_PAGE_TO_PHYS(page) >> PAGE_SHIFT);
                frame_list[i] = PFNTOMFN(pfn);
 
-#if 0
-               if (!PageHighMem(page)) {
-                       v = phys_to_virt(pfn << PAGE_SHIFT);
-                       scrub_pages(v, 1);
-#ifdef CONFIG_XEN
-                       ret = HYPERVISOR_update_va_mapping(
-                               (unsigned long)v, __pte_ma(0), 0);
-                       BUG_ON(ret);
-#endif
-               }
-#endif
-#ifdef CONFIG_XEN_SCRUB_PAGES
-               else {
-                       v = kmap(page);
-                       scrub_pages(v, 1);
-                       kunmap(page);
-               }
-#endif
-       }
-
-#ifdef CONFIG_XEN
-       /* Ensure that ballooned highmem pages don't have kmaps. */
-       kmap_flush_unused();
-       flush_tlb_all();
-#endif
-
-       mtx_lock(&balloon_lock);
-
-       /* No more mappings: invalidate P2M and add to balloon. */
-       for (i = 0; i < nr_pages; i++) {
-               pfn = MFNTOPFN(frame_list[i]);
                set_phys_to_machine(pfn, INVALID_P2M_ENTRY);
-               balloon_append(PHYS_TO_VM_PAGE(pfn << PAGE_SHIFT));
+               if (balloon_append(page) != 0) {
+                       vm_page_unwire(page, 0);
+                       vm_page_free(page);
+
+                       nr_pages = i;
+                       need_sleep = 1;
+                       break;
+               }
        }
 
        set_xen_guest_handle(reservation.extent_start, frame_list);
@@ -351,9 +312,6 @@ decrease_reservation(unsigned long nr_pa
        KASSERT(ret == nr_pages, ("HYPERVISOR_memory_op failed"));
 
        bs.current_pages -= nr_pages;
-       //totalram_pages = bs.current_pages;
-
-       mtx_unlock(&balloon_lock);
 
        return (need_sleep);
 }
@@ -425,11 +383,11 @@ watch_target(struct xs_watch *watch,
                return;
        } 
         
-       /* The given memory/target value is in KiB, so it needs converting to
-          pages.  PAGE_SHIFT converts bytes to pages, hence PAGE_SHIFT - 10.
-       */
-       set_new_target(new_target >> (PAGE_SHIFT - 10));
-    
+       /*
+        * The given memory/target value is in KiB, so it needs converting to
+        * pages.  PAGE_SHIFT converts bytes to pages, hence PAGE_SHIFT - 10.
+        */
+       set_new_target(new_target >> KB_TO_PAGE_SHIFT);
 }
 
 static void 
@@ -461,13 +419,12 @@ balloon_init(void *arg)
        if (!is_running_on_xen())
                return;
 
-       mtx_init(&balloon_lock, "balloon_lock", NULL, MTX_DEF);
        mtx_init(&balloon_mutex, "balloon_mutex", NULL, MTX_DEF);
 
 #ifndef XENHVM
        bs.current_pages = min(xen_start_info->nr_pages, max_pfn);
 #else
-       bs.current_pages = physmem;
+       bs.current_pages = realmem;
 #endif
        bs.target_pages  = bs.current_pages;
        bs.balloon_low   = 0;
@@ -497,76 +454,7 @@ void balloon_update_driver_allowance(lon
 void 
 balloon_update_driver_allowance(long delta)
 {
-       mtx_lock(&balloon_lock);
+       mtx_lock(&balloon_mutex);
        bs.driver_pages += delta;
-       mtx_unlock(&balloon_lock);
-}
-
-#if 0
-static int dealloc_pte_fn(
-       pte_t *pte, struct page *pte_page, unsigned long addr, void *data)
-{
-       unsigned long mfn = pte_mfn(*pte);
-       int ret;
-       struct xen_memory_reservation reservation = {
-               .extent_start = &mfn,
-               .nr_extents   = 1,
-               .extent_order = 0,
-               .domid        = DOMID_SELF
-       };
-       set_pte_at(&init_mm, addr, pte, __pte_ma(0));
-       set_phys_to_machine(__pa(addr) >> PAGE_SHIFT, INVALID_P2M_ENTRY);
-       ret = HYPERVISOR_memory_op(XENMEM_decrease_reservation, &reservation);
-       KASSERT(ret == 1, ("HYPERVISOR_memory_op failed"));
-       return 0;
-}
-
-#endif
-
-#if 0
-vm_page_t
-balloon_alloc_empty_page_range(unsigned long nr_pages)
-{
-       vm_page_t pages;
-       int i, rc;
-       unsigned long *mfn_list;
-       struct xen_memory_reservation reservation = {
-               .address_bits = 0,
-               .extent_order = 0,
-               .domid        = DOMID_SELF
-       };
-
-       pages = vm_page_alloc_contig(nr_pages, 0, -1, 4, 4)
-       if (pages == NULL)
-               return NULL;
-       
-       mfn_list = malloc(nr_pages*sizeof(unsigned long), M_DEVBUF, M_WAITOK);
-       
-       for (i = 0; i < nr_pages; i++) {
-               mfn_list[i] = PFNTOMFN(VM_PAGE_TO_PHYS(pages[i]) >> PAGE_SHIFT);
-               PFNTOMFN(i) = INVALID_P2M_ENTRY;
-               reservation.extent_start = mfn_list;
-               reservation.nr_extents = nr_pages;
-               rc = HYPERVISOR_memory_op(XENMEM_decrease_reservation,
-                   &reservation);
-               KASSERT(rc == nr_pages, ("HYPERVISOR_memory_op failed"));
-       }
-
-       current_pages -= nr_pages;
-
-       wakeup(balloon_process);
-
-       return pages;
-}
-
-void 
-balloon_dealloc_empty_page_range(vm_page_t page, unsigned long nr_pages)
-{
-       unsigned long i;
-
-       for (i = 0; i < nr_pages; i++)
-               balloon_append(page + i);
-
-       wakeup(balloon_process);
+       mtx_unlock(&balloon_mutex);
 }
-#endif
_______________________________________________
svn-src-head@freebsd.org mailing list
http://lists.freebsd.org/mailman/listinfo/svn-src-head
To unsubscribe, send any mail to "svn-src-head-unsubscr...@freebsd.org"

Reply via email to