For those users who want to use the virtual addresses that are in the hypervisor's virtual address space - these threej new functions allow that. Along with providing the underlaying MFNs for the user's (such as changing page table permissions).
Implementation wise the vmap API keeps track of two virtual address regions now: a) VMAP_VIRT_START b) Any provided virtual address space (need start and end). The a) one is the default one and the existing behavior for users of vmalloc, vmap, etc is the same. If however one wishes to use the b) one only has to use the vm_init_type to initialize and the vmalloc_type to utilize it. This allows users (such as xSplice) to provide their own mechanism to change the the page flags, and also use virtual addresses closer to the hypervisor virtual addresses (at least on x86) while not having to deal with the allocation of pages. For example of users, see patch titled "xsplice: Implement payload loading", where we parse the payload's ELF relocations - which is defined to be signed 32-bit (so max displacement is 2GB virtual spacE). The displacement of the hypervisor virtual addresses to the vmalloc (on x86) is more than 32-bits - which means that ELF relocations would truncate the 34 and 33th bit. Hence this alternate API. We also add add extra checks in case the b) range has not been initialized. Signed-off-by: Konrad Rzeszutek Wilk <konrad.w...@oracle.com> Suggested-by: Jan Beulich <jbeul...@suse.com> Acked-by: Julien Grall <julien.gr...@arm.com> [ARM] --- Cc: Ian Jackson <ian.jack...@eu.citrix.com> Cc: Jan Beulich <jbeul...@suse.com> Cc: Keir Fraser <k...@xen.org> Cc: Tim Deegan <t...@xen.org> Cc: Stefano Stabellini <sstabell...@kernel.org> Cc: Julien Grall <julien.gr...@arm.com> v4: New patch. v5: Update per Jan's comments. v6: Drop the stray parentheses on typedefs. Ditch the vunmap callback. Stash away the virtual addresses in lists. Ditch the vmap callback. Just provide virtual address. Ditch the vmalloc_range. Require users of alternative virtual address to call vmap_init_type first. v7: Don't expose the vmalloc_type and such. Instead provide an wrapper called vmalloc_xen for those. Rename the enum, change one of the names. Moved the vunmap_type around in c file so we don't have to declare it in the header. --- --- xen/arch/arm/kernel.c | 2 +- xen/arch/arm/mm.c | 2 +- xen/arch/x86/mm.c | 2 +- xen/common/vmap.c | 202 ++++++++++++++++++++++++++++++------------------- xen/drivers/acpi/osl.c | 2 +- xen/include/xen/vmap.h | 21 ++++- 6 files changed, 148 insertions(+), 83 deletions(-) diff --git a/xen/arch/arm/kernel.c b/xen/arch/arm/kernel.c index 61808ac..9871bd9 100644 --- a/xen/arch/arm/kernel.c +++ b/xen/arch/arm/kernel.c @@ -299,7 +299,7 @@ static __init int kernel_decompress(struct bootmodule *mod) return -ENOMEM; } mfn = _mfn(page_to_mfn(pages)); - output = __vmap(&mfn, 1 << kernel_order_out, 1, 1, PAGE_HYPERVISOR); + output = __vmap(&mfn, 1 << kernel_order_out, 1, 1, PAGE_HYPERVISOR, VMAP_DEFAULT); rc = perform_gunzip(output, input, size); clean_dcache_va_range(output, output_size); diff --git a/xen/arch/arm/mm.c b/xen/arch/arm/mm.c index 7065c3e..94ea054 100644 --- a/xen/arch/arm/mm.c +++ b/xen/arch/arm/mm.c @@ -807,7 +807,7 @@ void *ioremap_attr(paddr_t pa, size_t len, unsigned int attributes) mfn_t mfn = _mfn(PFN_DOWN(pa)); unsigned int offs = pa & (PAGE_SIZE - 1); unsigned int nr = PFN_UP(offs + len); - void *ptr = __vmap(&mfn, nr, 1, 1, attributes); + void *ptr = __vmap(&mfn, nr, 1, 1, attributes, VMAP_DEFAULT); if ( ptr == NULL ) return NULL; diff --git a/xen/arch/x86/mm.c b/xen/arch/x86/mm.c index bca7532..ca2d0bb 100644 --- a/xen/arch/x86/mm.c +++ b/xen/arch/x86/mm.c @@ -6124,7 +6124,7 @@ void __iomem *ioremap(paddr_t pa, size_t len) unsigned int offs = pa & (PAGE_SIZE - 1); unsigned int nr = PFN_UP(offs + len); - va = __vmap(&mfn, nr, 1, 1, PAGE_HYPERVISOR_NOCACHE) + offs; + va = __vmap(&mfn, nr, 1, 1, PAGE_HYPERVISOR_NOCACHE, VMAP_DEFAULT) + offs; } return (void __force __iomem *)va; diff --git a/xen/common/vmap.c b/xen/common/vmap.c index 134eda0..eb477a4 100644 --- a/xen/common/vmap.c +++ b/xen/common/vmap.c @@ -10,40 +10,43 @@ #include <asm/page.h> static DEFINE_SPINLOCK(vm_lock); -static void *__read_mostly vm_base; -#define vm_bitmap ((unsigned long *)vm_base) +static void *__read_mostly vm_base[VMAP_REGION_NR]; +#define vm_bitmap(x) ((unsigned long *)vm_base[x]) /* highest allocated bit in the bitmap */ -static unsigned int __read_mostly vm_top; +static unsigned int __read_mostly vm_top[VMAP_REGION_NR]; /* total number of bits in the bitmap */ -static unsigned int __read_mostly vm_end; +static unsigned int __read_mostly vm_end[VMAP_REGION_NR]; /* lowest known clear bit in the bitmap */ -static unsigned int vm_low; +static unsigned int vm_low[VMAP_REGION_NR]; -void __init vm_init(void) +void __init vm_init_type(enum vmap_region type, void *start, void *end) { unsigned int i, nr; unsigned long va; - vm_base = (void *)VMAP_VIRT_START; - vm_end = PFN_DOWN(arch_vmap_virt_end() - vm_base); - vm_low = PFN_UP((vm_end + 7) / 8); - nr = PFN_UP((vm_low + 7) / 8); - vm_top = nr * PAGE_SIZE * 8; + ASSERT(!vm_base[type]); - for ( i = 0, va = (unsigned long)vm_bitmap; i < nr; ++i, va += PAGE_SIZE ) + vm_base[type] = start; + vm_end[type] = PFN_DOWN(end - start); + vm_low[type]= PFN_UP((vm_end[type] + 7) / 8); + nr = PFN_UP((vm_low[type] + 7) / 8); + vm_top[type] = nr * PAGE_SIZE * 8; + + for ( i = 0, va = (unsigned long)vm_bitmap(type); i < nr; ++i, va += PAGE_SIZE ) { struct page_info *pg = alloc_domheap_page(NULL, 0); map_pages_to_xen(va, page_to_mfn(pg), 1, PAGE_HYPERVISOR); clear_page((void *)va); } - bitmap_fill(vm_bitmap, vm_low); + bitmap_fill(vm_bitmap(type), vm_low[type]); /* Populate page tables for the bitmap if necessary. */ - populate_pt_range(va, 0, vm_low - nr); + populate_pt_range(va, 0, vm_low[type] - nr); } -void *vm_alloc(unsigned int nr, unsigned int align) +static void *vm_alloc_type(unsigned int nr, unsigned int align, + enum vmap_region t) { unsigned int start, bit; @@ -52,27 +55,30 @@ void *vm_alloc(unsigned int nr, unsigned int align) else if ( align & (align - 1) ) align &= -align; + if ( !vm_base[t] ) + return NULL; + spin_lock(&vm_lock); for ( ; ; ) { struct page_info *pg; - ASSERT(vm_low == vm_top || !test_bit(vm_low, vm_bitmap)); - for ( start = vm_low; start < vm_top; ) + ASSERT(vm_low[t] == vm_top[t] || !test_bit(vm_low[t], vm_bitmap(t))); + for ( start = vm_low[t]; start < vm_top[t]; ) { - bit = find_next_bit(vm_bitmap, vm_top, start + 1); - if ( bit > vm_top ) - bit = vm_top; + bit = find_next_bit(vm_bitmap(t), vm_top[t], start + 1); + if ( bit > vm_top[t] ) + bit = vm_top[t]; /* * Note that this skips the first bit, making the * corresponding page a guard one. */ start = (start + align) & ~(align - 1); - if ( bit < vm_top ) + if ( bit < vm_top[t] ) { if ( start + nr < bit ) break; - start = find_next_zero_bit(vm_bitmap, vm_top, bit + 1); + start = find_next_zero_bit(vm_bitmap(t), vm_top[t], bit + 1); } else { @@ -82,12 +88,12 @@ void *vm_alloc(unsigned int nr, unsigned int align) } } - if ( start < vm_top ) + if ( start < vm_top[t] ) break; spin_unlock(&vm_lock); - if ( vm_top >= vm_end ) + if ( vm_top[t] >= vm_end[t] ) return NULL; pg = alloc_domheap_page(NULL, 0); @@ -96,23 +102,23 @@ void *vm_alloc(unsigned int nr, unsigned int align) spin_lock(&vm_lock); - if ( start >= vm_top ) + if ( start >= vm_top[t] ) { - unsigned long va = (unsigned long)vm_bitmap + vm_top / 8; + unsigned long va = (unsigned long)vm_bitmap(t) + vm_top[t] / 8; if ( !map_pages_to_xen(va, page_to_mfn(pg), 1, PAGE_HYPERVISOR) ) { clear_page((void *)va); - vm_top += PAGE_SIZE * 8; - if ( vm_top > vm_end ) - vm_top = vm_end; + vm_top[t] += PAGE_SIZE * 8; + if ( vm_top[t] > vm_end[t] ) + vm_top[t] = vm_end[t]; continue; } } free_domheap_page(pg); - if ( start >= vm_top ) + if ( start >= vm_top[t] ) { spin_unlock(&vm_lock); return NULL; @@ -120,47 +126,56 @@ void *vm_alloc(unsigned int nr, unsigned int align) } for ( bit = start; bit < start + nr; ++bit ) - __set_bit(bit, vm_bitmap); - if ( bit < vm_top ) - ASSERT(!test_bit(bit, vm_bitmap)); + __set_bit(bit, vm_bitmap(t)); + if ( bit < vm_top[t] ) + ASSERT(!test_bit(bit, vm_bitmap(t))); else - ASSERT(bit == vm_top); - if ( start <= vm_low + 2 ) - vm_low = bit; + ASSERT(bit == vm_top[t]); + if ( start <= vm_low[t] + 2 ) + vm_low[t] = bit; spin_unlock(&vm_lock); - return vm_base + start * PAGE_SIZE; + return vm_base[t] + start * PAGE_SIZE; +} + +void *vm_alloc(unsigned int nr, unsigned int align) +{ + return vm_alloc_type(nr, align, VMAP_DEFAULT); } -static unsigned int vm_index(const void *va) +static unsigned int vm_index(const void *va, enum vmap_region type) { unsigned long addr = (unsigned long)va & ~(PAGE_SIZE - 1); unsigned int idx; + unsigned long start = (unsigned long)vm_base[type]; + + if ( !start) + return 0; - if ( addr < VMAP_VIRT_START + (vm_end / 8) || - addr >= VMAP_VIRT_START + vm_top * PAGE_SIZE ) + if ( addr < start + (vm_end[type] / 8) || + addr >= start + vm_top[type] * PAGE_SIZE ) return 0; - idx = PFN_DOWN(va - vm_base); - return !test_bit(idx - 1, vm_bitmap) && - test_bit(idx, vm_bitmap) ? idx : 0; + idx = PFN_DOWN(va - vm_base[type]); + return !test_bit(idx - 1, vm_bitmap(type)) && + test_bit(idx, vm_bitmap(type)) ? idx : 0; } -static unsigned int vm_size(const void *va) +static unsigned int vm_size(const void *va, enum vmap_region type) { - unsigned int start = vm_index(va), end; + unsigned int start = vm_index(va, type), end; if ( !start ) return 0; - end = find_next_zero_bit(vm_bitmap, vm_top, start + 1); + end = find_next_zero_bit(vm_bitmap(type), vm_top[type], start + 1); - return min(end, vm_top) - start; + return min(end, vm_top[type]) - start; } -void vm_free(const void *va) +static void vm_free_type(const void *va, enum vmap_region type) { - unsigned int bit = vm_index(va); + unsigned int bit = vm_index(va, type); if ( !bit ) { @@ -169,29 +184,54 @@ void vm_free(const void *va) } spin_lock(&vm_lock); - if ( bit < vm_low ) + if ( bit < vm_low[type] ) { - vm_low = bit - 1; - while ( !test_bit(vm_low - 1, vm_bitmap) ) - --vm_low; + vm_low[type] = bit - 1; + while ( !test_bit(vm_low[type] - 1, vm_bitmap(type)) ) + --vm_low[type]; } - while ( __test_and_clear_bit(bit, vm_bitmap) ) - if ( ++bit == vm_top ) + while ( __test_and_clear_bit(bit, vm_bitmap(type)) ) + if ( ++bit == vm_top[type] ) break; spin_unlock(&vm_lock); } +void vm_free(const void *va) +{ + vm_free_type(va, VMAP_DEFAULT); +} + +static void vunmap_type(const void *va, enum vmap_region type) +{ + unsigned int size = vm_size(va, type); +#ifndef _PAGE_NONE + unsigned long addr = (unsigned long)va; + + destroy_xen_mappings(addr, addr + PAGE_SIZE * size); +#else /* Avoid tearing down intermediate page tables. */ + map_pages_to_xen((unsigned long)va, 0, size, _PAGE_NONE); +#endif + vm_free_type(va, type); +} + +void vunmap(const void *va) +{ + vunmap_type(va, VMAP_DEFAULT); +} + + void *__vmap(const mfn_t *mfn, unsigned int granularity, - unsigned int nr, unsigned int align, unsigned int flags) + unsigned int nr, unsigned int align, unsigned int flags, + enum vmap_region type) { - void *va = vm_alloc(nr * granularity, align); + void *va = vm_alloc_type(nr * granularity, align, type); unsigned long cur = (unsigned long)va; for ( ; va && nr--; ++mfn, cur += PAGE_SIZE * granularity ) { if ( map_pages_to_xen(cur, mfn_x(*mfn), granularity, flags) ) { - vunmap(va); + vunmap_type(va, type); va = NULL; } } @@ -201,22 +241,10 @@ void *__vmap(const mfn_t *mfn, unsigned int granularity, void *vmap(const mfn_t *mfn, unsigned int nr) { - return __vmap(mfn, 1, nr, 1, PAGE_HYPERVISOR); + return __vmap(mfn, 1, nr, 1, PAGE_HYPERVISOR, VMAP_DEFAULT); } -void vunmap(const void *va) -{ -#ifndef _PAGE_NONE - unsigned long addr = (unsigned long)va; - - destroy_xen_mappings(addr, addr + PAGE_SIZE * vm_size(va)); -#else /* Avoid tearing down intermediate page tables. */ - map_pages_to_xen((unsigned long)va, 0, vm_size(va), _PAGE_NONE); -#endif - vm_free(va); -} - -void *vmalloc(size_t size) +static void *vmalloc_type(size_t size, enum vmap_region type) { mfn_t *mfn; size_t pages, i; @@ -238,11 +266,12 @@ void *vmalloc(size_t size) mfn[i] = _mfn(page_to_mfn(pg)); } - va = vmap(mfn, pages); + va = __vmap(mfn, 1, pages, 1, PAGE_HYPERVISOR, type); if ( va == NULL ) goto error; xfree(mfn); + return va; error: @@ -252,6 +281,16 @@ void *vmalloc(size_t size) return NULL; } +void *vmalloc(size_t size) +{ + return vmalloc_type(size, VMAP_DEFAULT); +} + +void *vmalloc_xen(size_t size) +{ + return vmalloc_type(size, VMAP_XEN); +} + void *vzalloc(size_t size) { void *p = vmalloc(size); @@ -266,7 +305,7 @@ void *vzalloc(size_t size) return p; } -void vfree(void *va) +void vfree_type(void *va, enum vmap_region type) { unsigned int i, pages; struct page_info *pg; @@ -275,7 +314,8 @@ void vfree(void *va) if ( !va ) return; - pages = vm_size(va); + pages = vm_size(va, type); + ASSERT(pages); for ( i = 0; i < pages; i++ ) @@ -285,9 +325,19 @@ void vfree(void *va) ASSERT(page); page_list_add(page, &pg_list); } - vunmap(va); + vunmap_type(va, type); while ( (pg = page_list_remove_head(&pg_list)) != NULL ) free_domheap_page(pg); } + +void vfree(void *va) +{ + vfree_type(va, VMAP_DEFAULT); +} + +void vfree_xen(void *va) +{ + vfree_type(va, VMAP_XEN); +} #endif diff --git a/xen/drivers/acpi/osl.c b/xen/drivers/acpi/osl.c index 8a28d87..9a49029 100644 --- a/xen/drivers/acpi/osl.c +++ b/xen/drivers/acpi/osl.c @@ -97,7 +97,7 @@ acpi_os_map_memory(acpi_physical_address phys, acpi_size size) if (IS_ENABLED(CONFIG_X86) && !((phys + size - 1) >> 20)) return __va(phys); return __vmap(&mfn, PFN_UP(offs + size), 1, 1, - ACPI_MAP_MEM_ATTR) + offs; + ACPI_MAP_MEM_ATTR, VMAP_DEFAULT) + offs; } return __acpi_map_table(phys, size); } diff --git a/xen/include/xen/vmap.h b/xen/include/xen/vmap.h index 5671ac8..0fb78a2 100644 --- a/xen/include/xen/vmap.h +++ b/xen/include/xen/vmap.h @@ -4,16 +4,28 @@ #include <xen/mm.h> #include <asm/page.h> +enum vmap_region { + VMAP_DEFAULT, + VMAP_XEN, + VMAP_REGION_NR, +}; + +void vm_init_type(enum vmap_region type, void *start, void *end); + void *vm_alloc(unsigned int nr, unsigned int align); void vm_free(const void *); -void *__vmap(const mfn_t *mfn, unsigned int granularity, - unsigned int nr, unsigned int align, unsigned int flags); +void *__vmap(const mfn_t *mfn, unsigned int granularity, unsigned int nr, + unsigned int align, unsigned int flags, enum vmap_region); void *vmap(const mfn_t *mfn, unsigned int nr); void vunmap(const void *); + void *vmalloc(size_t size); +void *vmalloc_xen(size_t size); + void *vzalloc(size_t size); void vfree(void *va); +void vfree_xen(void *va); void __iomem *ioremap(paddr_t, size_t); @@ -24,7 +36,10 @@ static inline void iounmap(void __iomem *va) vunmap((void *)(addr & PAGE_MASK)); } -void vm_init(void); void *arch_vmap_virt_end(void); +static inline void vm_init(void) +{ + vm_init_type(VMAP_DEFAULT, (void *)VMAP_VIRT_START, arch_vmap_virt_end()); +} #endif /* __XEN_VMAP_H__ */ -- 2.5.0 _______________________________________________ Xen-devel mailing list Xen-devel@lists.xen.org http://lists.xen.org/xen-devel