On 11/23/15, Aneesh Kumar K.V wrote:
> We support THP only with book3s_64 and 64K page size. Move
> THP details to hash64-64k.h to clarify the same.
>
> Acked-by: Scott Wood
> Signed-off-by: Aneesh Kumar K.V
> ---
> arch/powerpc/include/asm/book3s/64/hash-64k.h | 126 +
> arch/powerpc/include/asm/book3s/64/hash.h | 223
> +--
> arch/powerpc/include/asm/nohash/64/pgtable.h | 253
> +-
> arch/powerpc/mm/hash_native_64.c | 10 +
> arch/powerpc/mm/pgtable_64.c | 2 +-
> arch/powerpc/platforms/pseries/lpar.c | 10 +
> 6 files changed, 201 insertions(+), 423 deletions(-)
>
> diff --git a/arch/powerpc/include/asm/book3s/64/hash-64k.h
> b/arch/powerpc/include/asm/book3s/64/hash-64k.h
> index 20865ca7a179..34eab4542b85 100644
> --- a/arch/powerpc/include/asm/book3s/64/hash-64k.h
> +++ b/arch/powerpc/include/asm/book3s/64/hash-64k.h
> @@ -170,6 +170,132 @@ static inline int hugepd_ok(hugepd_t hpd)
>
> #endif /* CONFIG_HUGETLB_PAGE */
>
> +#ifdef CONFIG_TRANSPARENT_HUGEPAGE
> +extern unsigned long pmd_hugepage_update(struct mm_struct *mm,
> + unsigned long addr,
> + pmd_t *pmdp,
> + unsigned long clr,
> + unsigned long set);
> +static inline char *get_hpte_slot_array(pmd_t *pmdp)
> +{
> + /*
> + * The hpte hindex is stored in the pgtable whose address is in the
> + * second half of the PMD
> + *
> + * Order this load with the test for pmd_trans_huge in the caller
> + */
> + smp_rmb();
> + return *(char **)(pmdp + PTRS_PER_PMD);
> +
> +
> +}
> +/*
> + * The linux hugepage PMD now include the pmd entries followed by the
> address
> + * to the stashed pgtable_t. The stashed pgtable_t contains the hpte bits.
> + * [ 1 bit secondary | 3 bit hidx | 1 bit valid | 000]. We use one byte
> per
> + * each HPTE entry. With 16MB hugepage and 64K HPTE we need 256 entries
> and
> + * with 4K HPTE we need 4096 entries. Both will fit in a 4K pgtable_t.
> + *
> + * The last three bits are intentionally left to zero. This memory
> location
> + * are also used as normal page PTE pointers. So if we have any pointers
> + * left around while we collapse a hugepage, we need to make sure
> + * _PAGE_PRESENT bit of that is zero when we look at them
> + */
> +static inline unsigned int hpte_valid(unsigned char *hpte_slot_array, int
> index)
> +{
> + return (hpte_slot_array[index] >> 3) & 0x1;
> +}
> +
> +static inline unsigned int hpte_hash_index(unsigned char *hpte_slot_array,
> +int index)
> +{
> + return hpte_slot_array[index] >> 4;
> +}
> +
> +static inline void mark_hpte_slot_valid(unsigned char *hpte_slot_array,
> + unsigned int index, unsigned int hidx)
> +{
> + hpte_slot_array[index] = hidx << 4 | 0x1 << 3;
> +}
> +
> +/*
> + *
> + * For core kernel code by design pmd_trans_huge is never run on any
> hugetlbfs
> + * page. The hugetlbfs page table walking and mangling paths are totally
> + * separated form the core VM paths and they're differentiated by
> + * VM_HUGETLB being set on vm_flags well before any pmd_trans_huge could
> run.
> + *
> + * pmd_trans_huge() is defined as false at build time if
> + * CONFIG_TRANSPARENT_HUGEPAGE=n to optimize away code blocks at build
> + * time in such case.
> + *
> + * For ppc64 we need to differntiate from explicit hugepages from THP,
> because
> + * for THP we also track the subpage details at the pmd level. We don't do
> + * that for explicit huge pages.
> + *
> + */
> +static inline int pmd_trans_huge(pmd_t pmd)
> +{
> + /*
> + * leaf pte for huge page, bottom two bits != 00
> + */
> + return (pmd_val(pmd) & 0x3) && (pmd_val(pmd) & _PAGE_THP_HUGE);
> +}
> +
> +static inline int pmd_trans_splitting(pmd_t pmd)
> +{
> + if (pmd_trans_huge(pmd))
> + return pmd_val(pmd) & _PAGE_SPLITTING;
> + return 0;
> +}
> +
> +static inline int pmd_large(pmd_t pmd)
> +{
> + /*
> + * leaf pte for huge page, bottom two bits != 00
> + */
> + return ((pmd_val(pmd) & 0x3) != 0x0);
> +}
> +
> +static inline pmd_t pmd_mknotpresent(pmd_t pmd)
> +{
> + return __pmd(pmd_val(pmd) & ~_PAGE_PRESENT);
> +}
> +
> +static inline pmd_t pmd_mksplitting(pmd_t pmd)
> +{
> + return __pmd(pmd_val(pmd) | _PAGE_SPLITTING);
> +}
> +
> +#define __HAVE_ARCH_PMD_SAME
> +static inline int pmd_same(pmd_t pmd_a, pmd_t pmd_b)
> +{
> + return (((pmd_val(pmd_a) ^ pmd_val(pmd_b)) & ~_PAGE_HPTEFLAGS) == 0);
> +}
> +
> +static inline int __pmdp_test_and_clear_young(struct mm_struct *mm,
> + unsigned long addr, pmd_t *pmdp)
> +{
> + unsigned long old;
> +
> + if