Hi Yisheng,

> On Tue, Sep 12, 2017 at 04:05:22PM +0800, Yisheng Xie wrote:
> > IMO, before a page is allocated, it is in buddy system, which means it is 
> > free
> > and no other 'map' on the page except direct map. Then if the page is 
> > allocated
> > to user, XPFO should unmap the direct map. otherwise the ret2dir may works 
> > at
> > this window before it is freed. Or maybe I'm still missing anything.
> 
> I agree that it seems broken. I'm just not sure why the test doesn't
> fail. It's certainly worth understanding.

Ok, so I think what's going on is that the page *is* mapped and unmapped by the
kernel as Juerg described, but only in certain cases. See prep_new_page(),
which has the following:

        if (!free_pages_prezeroed() && (gfp_flags & __GFP_ZERO))
                for (i = 0; i < (1 << order); i++)
                        clear_highpage(page + i);

clear_highpage() maps and unmaps the pages, so that's why xpfo works with this
set.

I tried with CONFIG_PAGE_POISONING_ZERO=y and page_poison=y, and the
XPFO_READ_USER test does not fail, i.e. the read succeeds. So, I think we need
to include this zeroing condition in xpfo_alloc_pages(), something like the
patch below. Unfortunately, this fails to boot for me, probably for an
unrelated reason that I'll look into.

Thanks a lot!

Tycho


>From bfc21a6438cf8c56741af94cac939f1b0f63752c Mon Sep 17 00:00:00 2001
From: Tycho Andersen <ty...@docker.com>
Date: Tue, 12 Sep 2017 12:06:41 -0600
Subject: [PATCH] draft of unmapping patch

Signed-off-by: Tycho Andersen <ty...@docker.com>
---
 include/linux/xpfo.h |  5 +++--
 mm/compaction.c      |  2 +-
 mm/internal.h        |  2 +-
 mm/page_alloc.c      | 10 ++++++----
 mm/xpfo.c            | 10 ++++++++--
 5 files changed, 19 insertions(+), 10 deletions(-)

diff --git a/include/linux/xpfo.h b/include/linux/xpfo.h
index b24be9ac4a2d..c991bf7f051d 100644
--- a/include/linux/xpfo.h
+++ b/include/linux/xpfo.h
@@ -29,7 +29,7 @@ void xpfo_flush_kernel_tlb(struct page *page, int order);
 
 void xpfo_kmap(void *kaddr, struct page *page);
 void xpfo_kunmap(void *kaddr, struct page *page);
-void xpfo_alloc_pages(struct page *page, int order, gfp_t gfp);
+void xpfo_alloc_pages(struct page *page, int order, gfp_t gfp, bool will_map);
 void xpfo_free_pages(struct page *page, int order);
 
 bool xpfo_page_is_unmapped(struct page *page);
@@ -49,7 +49,8 @@ void xpfo_temp_unmap(const void *addr, size_t size, void 
**mapping,
 
 static inline void xpfo_kmap(void *kaddr, struct page *page) { }
 static inline void xpfo_kunmap(void *kaddr, struct page *page) { }
-static inline void xpfo_alloc_pages(struct page *page, int order, gfp_t gfp) { 
}
+static inline void xpfo_alloc_pages(struct page *page, int order, gfp_t gfp,
+                                   bool will_map) { }
 static inline void xpfo_free_pages(struct page *page, int order) { }
 
 static inline bool xpfo_page_is_unmapped(struct page *page) { return false; }
diff --git a/mm/compaction.c b/mm/compaction.c
index fb548e4c7bd4..9a222258e65c 100644
--- a/mm/compaction.c
+++ b/mm/compaction.c
@@ -76,7 +76,7 @@ static void map_pages(struct list_head *list)
                order = page_private(page);
                nr_pages = 1 << order;
 
-               post_alloc_hook(page, order, __GFP_MOVABLE);
+               post_alloc_hook(page, order, __GFP_MOVABLE, false);
                if (order)
                        split_page(page, order);
 
diff --git a/mm/internal.h b/mm/internal.h
index 4ef49fc55e58..1a0331ec2b2d 100644
--- a/mm/internal.h
+++ b/mm/internal.h
@@ -165,7 +165,7 @@ extern void __free_pages_bootmem(struct page *page, 
unsigned long pfn,
                                        unsigned int order);
 extern void prep_compound_page(struct page *page, unsigned int order);
 extern void post_alloc_hook(struct page *page, unsigned int order,
-                                       gfp_t gfp_flags);
+                                       gfp_t gfp_flags, bool will_map);
 extern int user_min_free_kbytes;
 
 #if defined CONFIG_COMPACTION || defined CONFIG_CMA
diff --git a/mm/page_alloc.c b/mm/page_alloc.c
index 09fdf1bad21f..f73809847c58 100644
--- a/mm/page_alloc.c
+++ b/mm/page_alloc.c
@@ -1750,7 +1750,7 @@ static bool check_new_pages(struct page *page, unsigned 
int order)
 }
 
 inline void post_alloc_hook(struct page *page, unsigned int order,
-                               gfp_t gfp_flags)
+                               gfp_t gfp_flags, bool will_map)
 {
        set_page_private(page, 0);
        set_page_refcounted(page);
@@ -1759,18 +1759,20 @@ inline void post_alloc_hook(struct page *page, unsigned 
int order,
        kernel_map_pages(page, 1 << order, 1);
        kernel_poison_pages(page, 1 << order, 1);
        kasan_alloc_pages(page, order);
-       xpfo_alloc_pages(page, order, gfp_flags);
+       xpfo_alloc_pages(page, order, gfp_flags, will_map);
        set_page_owner(page, order, gfp_flags);
 }
 
+extern bool xpfo_test;
 static void prep_new_page(struct page *page, unsigned int order, gfp_t 
gfp_flags,
                                                        unsigned int 
alloc_flags)
 {
        int i;
+       bool needs_zero = !free_pages_prezeroed() && (gfp_flags & __GFP_ZERO);
 
-       post_alloc_hook(page, order, gfp_flags);
+       post_alloc_hook(page, order, gfp_flags, needs_zero);
 
-       if (!free_pages_prezeroed() && (gfp_flags & __GFP_ZERO))
+       if (needs_zero)
                for (i = 0; i < (1 << order); i++)
                        clear_highpage(page + i);
 
diff --git a/mm/xpfo.c b/mm/xpfo.c
index ca5d4d1838f9..dd25e24213fe 100644
--- a/mm/xpfo.c
+++ b/mm/xpfo.c
@@ -86,7 +86,7 @@ static inline struct xpfo *lookup_xpfo(struct page *page)
        return (void *)page_ext + page_xpfo_ops.offset;
 }
 
-void xpfo_alloc_pages(struct page *page, int order, gfp_t gfp)
+void xpfo_alloc_pages(struct page *page, int order, gfp_t gfp, bool will_map)
 {
        int i, flush_tlb = 0;
        struct xpfo *xpfo;
@@ -116,8 +116,14 @@ void xpfo_alloc_pages(struct page *page, int order, gfp_t 
gfp)
                         * Tag the page as a user page and flush the TLB if it
                         * was previously allocated to the kernel.
                         */
-                       if (!test_and_set_bit(XPFO_PAGE_USER, &xpfo->flags))
+                       bool was_user = !test_and_set_bit(XPFO_PAGE_USER,
+                                                         &xpfo->flags);
+
+                       if (was_user || !will_map) {
+                               set_kpte(page_address(page + i), page + i,
+                                        __pgprot(0));
                                flush_tlb = 1;
+                       }
                } else {
                        /* Tag the page as a non-user (kernel) page */
                        clear_bit(XPFO_PAGE_USER, &xpfo->flags);
-- 
2.11.0

Reply via email to