> On Jul 2, 2018, at 8:42 AM, Alexandru Isaila <aisa...@bitdefender.com> wrote: > > From: Isaila Alexandru <aisa...@bitdefender.com> > > This patch adds access rights for the NPT pages. The access rights are > saved in a radix tree with the root saved in p2m_domain. The rights are > manipulated through p2m_set_access() > and p2m_get_access() functions. > The patch follows the ept logic.
This description needs to be much more complete. Something like this: --- This patch adds access control for NPT mode. There aren’t enough extra bits to store the access rights in the NPT p2m table, so we add a radix tree to store the rights. For efficiency, remove entries which match the default permissions rather than continuing to store them. Modify p2m-pt.c:p2m_type_to_flags() to mirror the ept version: taking an access, and removing / adding RW or NX flags as appropriate. --- > > Note: It was tested with xen-access write > > Signed-off-by: Alexandru Isaila <aisa...@bitdefender.com> > > --- > Changes since V2: > - Delete blak line > - Add return if p2m_access_rwx = a > - Delete the comment from p2m_pt_get_entry() > - Moved radix_tree_init() to arch_monitor_init_domain(). > --- > xen/arch/x86/mm/mem_access.c | 3 ++ > xen/arch/x86/mm/p2m-pt.c | 109 ++++++++++++++++++++++++++++++++++----- > xen/arch/x86/mm/p2m.c | 6 +++ > xen/arch/x86/monitor.c | 13 +++++ > xen/include/asm-x86/mem_access.h | 2 +- > xen/include/asm-x86/p2m.h | 6 +++ > 6 files changed, 124 insertions(+), 15 deletions(-) > > diff --git a/xen/arch/x86/mm/mem_access.c b/xen/arch/x86/mm/mem_access.c > index c0cd017..d78c82c 100644 > --- a/xen/arch/x86/mm/mem_access.c > +++ b/xen/arch/x86/mm/mem_access.c > @@ -221,7 +221,10 @@ bool p2m_mem_access_check(paddr_t gpa, unsigned long gla, > { > req->u.mem_access.flags |= MEM_ACCESS_GLA_VALID; > req->u.mem_access.gla = gla; > + } > > + if ( npfec.gla_valid || cpu_has_svm ) > + { I can’t immediately tell what this is about, which means it needs a comment. It may also be (depending on why this is here) that “cpu_has_svm” makes this more fragile — if this is really about having a radix tree, for instance, then you should probably check for a radix tree. > if ( npfec.kind == npfec_kind_with_gla ) > req->u.mem_access.flags |= MEM_ACCESS_FAULT_WITH_GLA; > else if ( npfec.kind == npfec_kind_in_gpt ) > diff --git a/xen/arch/x86/mm/p2m-pt.c b/xen/arch/x86/mm/p2m-pt.c > index b8c5d2e..4330d1f 100644 > --- a/xen/arch/x86/mm/p2m-pt.c > +++ b/xen/arch/x86/mm/p2m-pt.c > @@ -68,7 +68,8 @@ > static unsigned long p2m_type_to_flags(const struct p2m_domain *p2m, > p2m_type_t t, > mfn_t mfn, > - unsigned int level) > + unsigned int level, > + p2m_access_t access) > { > unsigned long flags; > /* > @@ -87,23 +88,27 @@ static unsigned long p2m_type_to_flags(const struct > p2m_domain *p2m, > case p2m_ram_paged: > case p2m_ram_paging_in: > default: > - return flags | _PAGE_NX_BIT; > + flags |= P2M_BASE_FLAGS | _PAGE_NX_BIT; > + break; > case p2m_grant_map_ro: > return flags | P2M_BASE_FLAGS | _PAGE_NX_BIT; > case p2m_ioreq_server: > flags |= P2M_BASE_FLAGS | _PAGE_RW | _PAGE_NX_BIT; > if ( p2m->ioreq.flags & XEN_DMOP_IOREQ_MEM_ACCESS_WRITE ) > - return flags & ~_PAGE_RW; > - return flags; > + flags &= ~_PAGE_RW; > + break; > case p2m_ram_ro: > case p2m_ram_logdirty: > case p2m_ram_shared: > - return flags | P2M_BASE_FLAGS; > + flags |= P2M_BASE_FLAGS; > + break; > case p2m_ram_rw: > - return flags | P2M_BASE_FLAGS | _PAGE_RW; > + flags |= P2M_BASE_FLAGS | _PAGE_RW; > + break; > case p2m_grant_map_rw: > case p2m_map_foreign: > - return flags | P2M_BASE_FLAGS | _PAGE_RW | _PAGE_NX_BIT; > + flags |= P2M_BASE_FLAGS | _PAGE_RW | _PAGE_NX_BIT; > + break; > case p2m_mmio_direct: > if ( !rangeset_contains_singleton(mmio_ro_ranges, mfn_x(mfn)) ) > flags |= _PAGE_RW; > @@ -112,8 +117,37 @@ static unsigned long p2m_type_to_flags(const struct > p2m_domain *p2m, > flags |= _PAGE_PWT; > ASSERT(!level); > } > - return flags | P2M_BASE_FLAGS | _PAGE_PCD; > + flags |= P2M_BASE_FLAGS | _PAGE_PCD; > + break; > + } I think you want a blank line here. > + switch ( access ) > + { > + case p2m_access_r: > + case p2m_access_w: > + flags |= _PAGE_NX_BIT; > + flags &= ~_PAGE_RW; > + break; > + case p2m_access_rw: > + flags |= _PAGE_NX_BIT; > + break; > + case p2m_access_n: > + case p2m_access_n2rwx: > + flags |= _PAGE_NX_BIT; > + flags &= ~_PAGE_RW; > + break; > + case p2m_access_rx: > + case p2m_access_wx: > + case p2m_access_rx2rw: > + flags &= ~(_PAGE_NX_BIT | _PAGE_RW); This looks like a mistake — this will unconditionally enable execution, even if the underlying p2m type forbids it. ept_p2m_type_to_flags() doesn’t do that. > + break; > + case p2m_access_x: > + flags &= ~_PAGE_RW; > + break; > + case p2m_access_rwx: > + default: > + break; > } I think you want another blank line here too. Also, this doesn’t seem to capture the ‘r’ part of the equation — shouldn’t p2m_access_n end up with a not-present p2m entry? > + return flags; > } > > > @@ -174,6 +208,44 @@ static void p2m_add_iommu_flags(l1_pgentry_t *p2m_entry, > l1e_add_flags(*p2m_entry, iommu_nlevel_to_flags(nlevel, flags)); > } > > +static p2m_access_t p2m_get_access(struct p2m_domain *p2m, unsigned long gfn) > +{ > + void *ptr; > + > + if ( !p2m->mem_access_settings ) > + return p2m_access_rwx; > + > + ptr = radix_tree_lookup(p2m->mem_access_settings, gfn); > + if ( !ptr ) > + return p2m_access_rwx; > + else > + return radix_tree_ptr_to_int(ptr); > +} > + > +static void p2m_set_access(struct p2m_domain *p2m, unsigned long gfn, > + p2m_access_t a) > +{ > + int rc; > + > + if ( !p2m->mem_access_settings ) > + return; > + > + if ( p2m_access_rwx == a ) > + { > + radix_tree_delete(p2m->mem_access_settings, gfn); > + return; > + } > + > + rc = radix_tree_insert(p2m->mem_access_settings, gfn, > + radix_tree_int_to_ptr(a)); > + if ( rc == -EEXIST ) > + /* If a setting already exists, change it to the new one. */ > + radix_tree_replace_slot( > + radix_tree_lookup_slot( > + p2m->mem_access_settings, gfn), > + radix_tree_int_to_ptr(a)); > +} > + > /* Returns: 0 for success, -errno for failure */ > static int > p2m_next_level(struct p2m_domain *p2m, void **table, > @@ -201,6 +273,7 @@ p2m_next_level(struct p2m_domain *p2m, void **table, > new_entry = l1e_from_mfn(mfn, P2M_BASE_FLAGS | _PAGE_RW); > > p2m_add_iommu_flags(&new_entry, level, > IOMMUF_readable|IOMMUF_writable); > + p2m_set_access(p2m, gfn, p2m->default_access); > p2m->write_p2m_entry(p2m, gfn, p2m_entry, new_entry, level + 1); > } > else if ( flags & _PAGE_PSE ) > @@ -249,6 +322,7 @@ p2m_next_level(struct p2m_domain *p2m, void **table, > { > new_entry = l1e_from_pfn(pfn | (i << ((level - 1) * > PAGETABLE_ORDER)), > flags); > + p2m_set_access(p2m, gfn, p2m->default_access); > p2m->write_p2m_entry(p2m, gfn, l1_entry + i, new_entry, level); > } > > @@ -256,6 +330,7 @@ p2m_next_level(struct p2m_domain *p2m, void **table, > > new_entry = l1e_from_mfn(mfn, P2M_BASE_FLAGS | _PAGE_RW); > p2m_add_iommu_flags(&new_entry, level, > IOMMUF_readable|IOMMUF_writable); > + p2m_set_access(p2m, gfn, p2m->default_access); > p2m->write_p2m_entry(p2m, gfn, p2m_entry, new_entry, level + 1); > } > else > @@ -420,8 +495,9 @@ static int do_recalc(struct p2m_domain *p2m, unsigned > long gfn) > if ( nt != ot ) > { > unsigned long mfn = l1e_get_pfn(e); > + p2m_access_t a = p2m_get_access(p2m, gfn); > unsigned long flags = p2m_type_to_flags(p2m, nt, > - _mfn(mfn), level); > + _mfn(mfn), level, a); > > if ( level ) > { > @@ -569,13 +645,14 @@ p2m_pt_set_entry(struct p2m_domain *p2m, gfn_t gfn_, > mfn_t mfn, > ASSERT(!mfn_valid(mfn) || p2mt != p2m_mmio_direct); > l3e_content = mfn_valid(mfn) || p2m_allows_invalid_mfn(p2mt) > ? p2m_l3e_from_pfn(mfn_x(mfn), > - p2m_type_to_flags(p2m, p2mt, mfn, 2)) > + p2m_type_to_flags(p2m, p2mt, mfn, 2, p2ma)) > : l3e_empty(); > entry_content.l1 = l3e_content.l3; > > if ( entry_content.l1 != 0 ) > p2m_add_iommu_flags(&entry_content, 0, iommu_pte_flags); > > + p2m_set_access(p2m, gfn, p2ma); > p2m->write_p2m_entry(p2m, gfn, p2m_entry, entry_content, 3); > /* NB: paging_write_p2m_entry() handles tlb flushes properly */ > } > @@ -608,7 +685,7 @@ p2m_pt_set_entry(struct p2m_domain *p2m, gfn_t gfn_, > mfn_t mfn, > > if ( mfn_valid(mfn) || p2m_allows_invalid_mfn(p2mt) ) > entry_content = p2m_l1e_from_pfn(mfn_x(mfn), > - p2m_type_to_flags(p2m, p2mt, mfn, > 0)); > + p2m_type_to_flags(p2m, p2mt, mfn, > 0, p2ma)); > else > entry_content = l1e_empty(); > > @@ -630,6 +707,7 @@ p2m_pt_set_entry(struct p2m_domain *p2m, gfn_t gfn_, > mfn_t mfn, > p2m->ioreq.entry_count--; > } > > + p2m_set_access(p2m, gfn, p2ma); > /* level 1 entry */ > p2m->write_p2m_entry(p2m, gfn, p2m_entry, entry_content, 1); > /* NB: paging_write_p2m_entry() handles tlb flushes properly */ > @@ -661,13 +739,14 @@ p2m_pt_set_entry(struct p2m_domain *p2m, gfn_t gfn_, > mfn_t mfn, > ASSERT(!mfn_valid(mfn) || p2mt != p2m_mmio_direct); > l2e_content = mfn_valid(mfn) || p2m_allows_invalid_mfn(p2mt) > ? p2m_l2e_from_pfn(mfn_x(mfn), > - p2m_type_to_flags(p2m, p2mt, mfn, 1)) > + p2m_type_to_flags(p2m, p2mt, mfn, 1, p2ma)) > : l2e_empty(); > entry_content.l1 = l2e_content.l2; > > if ( entry_content.l1 != 0 ) > p2m_add_iommu_flags(&entry_content, 0, iommu_pte_flags); > > + p2m_set_access(p2m, gfn, p2ma); > p2m->write_p2m_entry(p2m, gfn, p2m_entry, entry_content, 2); > /* NB: paging_write_p2m_entry() handles tlb flushes properly */ > } > @@ -749,8 +828,7 @@ p2m_pt_get_entry(struct p2m_domain *p2m, gfn_t gfn_, > * XXX Once we start explicitly registering MMIO regions in the p2m > * XXX we will return p2m_invalid for unmapped gfns */ > *t = p2m_mmio_dm; > - /* Not implemented except with EPT */ > - *a = p2m_access_rwx; > + *a = p2m_access_n; > > if ( gfn > p2m->max_mapped_pfn ) > { > @@ -813,6 +891,7 @@ pod_retry_l3: > l1_table_offset(addr)); > *t = p2m_recalc_type(recalc || _needs_recalc(flags), > p2m_flags_to_type(flags), p2m, gfn); > + *a = p2m_get_access(p2m, gfn); > unmap_domain_page(l3e); > > ASSERT(mfn_valid(mfn) || !p2m_is_ram(*t)); > @@ -852,6 +931,7 @@ pod_retry_l2: > mfn = _mfn(l2e_get_pfn(*l2e) + l1_table_offset(addr)); > *t = p2m_recalc_type(recalc || _needs_recalc(flags), > p2m_flags_to_type(flags), p2m, gfn); > + *a = p2m_get_access(p2m, gfn); > unmap_domain_page(l2e); > > ASSERT(mfn_valid(mfn) || !p2m_is_ram(*t)); > @@ -888,6 +968,7 @@ pod_retry_l1: > } > mfn = l1e_get_mfn(*l1e); > *t = p2m_recalc_type(recalc || _needs_recalc(flags), l1t, p2m, gfn); > + *a = p2m_get_access(p2m, gfn); > unmap_domain_page(l1e); > > ASSERT(mfn_valid(mfn) || !p2m_is_ram(*t) || p2m_is_paging(*t)); > diff --git a/xen/arch/x86/mm/p2m.c b/xen/arch/x86/mm/p2m.c > index c53cab4..12e2d24 100644 > --- a/xen/arch/x86/mm/p2m.c > +++ b/xen/arch/x86/mm/p2m.c > @@ -675,6 +675,12 @@ void p2m_teardown(struct p2m_domain *p2m) > > d = p2m->domain; > > + if ( p2m->mem_access_settings ) > + { > + radix_tree_destroy(p2m->mem_access_settings, NULL); > + xfree(p2m->mem_access_settings); > + } > + > p2m_lock(p2m); > ASSERT(atomic_read(&d->shr_pages) == 0); > p2m->phys_table = pagetable_null(); > diff --git a/xen/arch/x86/monitor.c b/xen/arch/x86/monitor.c > index 3fb6531..18b88a1 100644 > --- a/xen/arch/x86/monitor.c > +++ b/xen/arch/x86/monitor.c > @@ -20,10 +20,13 @@ > */ > > #include <asm/monitor.h> > +#include <asm/p2m.h> > #include <public/vm_event.h> > > int arch_monitor_init_domain(struct domain *d) > { > + struct p2m_domain *p2m = p2m_get_hostp2m(d); > + > if ( !d->arch.monitor.msr_bitmap ) > d->arch.monitor.msr_bitmap = xzalloc_array(struct monitor_msr_bitmap, > 2); > @@ -31,6 +34,16 @@ int arch_monitor_init_domain(struct domain *d) > if ( !d->arch.monitor.msr_bitmap ) > return -ENOMEM; > > + if ( cpu_has_svm && !p2m->mem_access_settings ) > + { > + p2m->mem_access_settings = xmalloc(struct radix_tree_root); > + > + if( !p2m->mem_access_settings ) > + return -ENOMEM; This will leak d->arch.monitor.msr_bitmap. You need to use the `goto out_free:` pattern. Everything else looks OK. -George _______________________________________________ Xen-devel mailing list Xen-devel@lists.xenproject.org https://lists.xenproject.org/mailman/listinfo/xen-devel