Re: [PATCH] powerpc/pseries: Move CMO code from plapr_wrappers.h to platforms/pseries
Hi Michael, [auto build test ERROR on powerpc/next] [also build test ERROR on v4.9-rc5 next-2016] [if your patch is applied to the wrong git tree, please drop us a note to help improve the system] url: https://github.com/0day-ci/linux/commits/Michael-Ellerman/powerpc-pseries-Move-CMO-code-from-plapr_wrappers-h-to-platforms-pseries/20161114-145812 base: https://git.kernel.org/pub/scm/linux/kernel/git/powerpc/linux.git next config: powerpc-allmodconfig (attached as .config) compiler: powerpc64-linux-gnu-gcc (Debian 6.1.1-9) 6.1.1 20160705 reproduce: wget https://git.kernel.org/cgit/linux/kernel/git/wfg/lkp-tests.git/plain/sbin/make.cross -O ~/bin/make.cross chmod +x ~/bin/make.cross # save the attached .config to linux build tree make.cross ARCH=powerpc All errors (new ones prefixed by >>): arch/powerpc/platforms/pseries/cmm.c: In function 'plpar_page_set_loaned': >> arch/powerpc/platforms/pseries/cmm.c:114:30: error: implicit declaration of >> function 'cmo_get_page_size' [-Werror=implicit-function-declaration] unsigned long cmo_page_sz = cmo_get_page_size(); ^ cc1: some warnings being treated as errors vim +/cmo_get_page_size +114 arch/powerpc/platforms/pseries/cmm.c 108 static int hotplug_occurred; /* protected by the hotplug mutex */ 109 110 static struct task_struct *cmm_thread_ptr; 111 112 static long plpar_page_set_loaned(unsigned long vpa) 113 { > 114 unsigned long cmo_page_sz = cmo_get_page_size(); 115 long rc = 0; 116 int i; 117 --- 0-DAY kernel test infrastructureOpen Source Technology Center https://lists.01.org/pipermail/kbuild-all Intel Corporation .config.gz Description: application/gzip
[PATCH] powerpc/pseries: Move CMO code from plapr_wrappers.h to platforms/pseries
Currently there's some CMO (Cooperative Memory Overcommit) code, in plpar_wrappers.h. Some of it is #ifdef CONFIG_PSERIES and some of it isn't. The end result being if a file includes plpar_wrappers.h it won't build with CONFIG_PSERIES=n. Fix it by moving the CMO code into platforms/pseries. The two hcall wrappers can just be moved into their only caller, cmm.c, and the accessors can go in pseries.h. Note we need the accessors because cmm.c can be built as a module, so there needs to be a split between the built-in code vs the module, and that's achieved by using those accessors. Signed-off-by: Michael Ellerman --- arch/powerpc/include/asm/hvcall.h | 21 arch/powerpc/include/asm/plpar_wrappers.h | 32 --- arch/powerpc/platforms/pseries/cmm.c | 32 +++ arch/powerpc/platforms/pseries/pseries.h | 19 ++ 4 files changed, 51 insertions(+), 53 deletions(-) diff --git a/arch/powerpc/include/asm/hvcall.h b/arch/powerpc/include/asm/hvcall.h index 708edebcf147..1acdcad5f773 100644 --- a/arch/powerpc/include/asm/hvcall.h +++ b/arch/powerpc/include/asm/hvcall.h @@ -412,27 +412,6 @@ static inline unsigned int get_longbusy_msecs(int longbusy_rc) } } -#ifdef CONFIG_PPC_PSERIES -extern int CMO_PrPSP; -extern int CMO_SecPSP; -extern unsigned long CMO_PageSize; - -static inline int cmo_get_primary_psp(void) -{ - return CMO_PrPSP; -} - -static inline int cmo_get_secondary_psp(void) -{ - return CMO_SecPSP; -} - -static inline unsigned long cmo_get_page_size(void) -{ - return CMO_PageSize; -} -#endif /* CONFIG_PPC_PSERIES */ - #endif /* __ASSEMBLY__ */ #endif /* __KERNEL__ */ #endif /* _ASM_POWERPC_HVCALL_H */ diff --git a/arch/powerpc/include/asm/plpar_wrappers.h b/arch/powerpc/include/asm/plpar_wrappers.h index 1b394247afc2..034a588b122c 100644 --- a/arch/powerpc/include/asm/plpar_wrappers.h +++ b/arch/powerpc/include/asm/plpar_wrappers.h @@ -93,38 +93,6 @@ static inline long register_dtl(unsigned long cpu, unsigned long vpa) return vpa_call(H_VPA_REG_DTL, cpu, vpa); } -static inline long plpar_page_set_loaned(unsigned long vpa) -{ - unsigned long cmo_page_sz = cmo_get_page_size(); - long rc = 0; - int i; - - for (i = 0; !rc && i < PAGE_SIZE; i += cmo_page_sz) - rc = plpar_hcall_norets(H_PAGE_INIT, H_PAGE_SET_LOANED, vpa + i, 0); - - for (i -= cmo_page_sz; rc && i != 0; i -= cmo_page_sz) - plpar_hcall_norets(H_PAGE_INIT, H_PAGE_SET_ACTIVE, - vpa + i - cmo_page_sz, 0); - - return rc; -} - -static inline long plpar_page_set_active(unsigned long vpa) -{ - unsigned long cmo_page_sz = cmo_get_page_size(); - long rc = 0; - int i; - - for (i = 0; !rc && i < PAGE_SIZE; i += cmo_page_sz) - rc = plpar_hcall_norets(H_PAGE_INIT, H_PAGE_SET_ACTIVE, vpa + i, 0); - - for (i -= cmo_page_sz; rc && i != 0; i -= cmo_page_sz) - plpar_hcall_norets(H_PAGE_INIT, H_PAGE_SET_LOANED, - vpa + i - cmo_page_sz, 0); - - return rc; -} - extern void vpa_init(int cpu); static inline long plpar_pte_enter(unsigned long flags, diff --git a/arch/powerpc/platforms/pseries/cmm.c b/arch/powerpc/platforms/pseries/cmm.c index 66e7227469b8..4412f12374d3 100644 --- a/arch/powerpc/platforms/pseries/cmm.c +++ b/arch/powerpc/platforms/pseries/cmm.c @@ -109,6 +109,38 @@ static int hotplug_occurred; /* protected by the hotplug mutex */ static struct task_struct *cmm_thread_ptr; +static long plpar_page_set_loaned(unsigned long vpa) +{ + unsigned long cmo_page_sz = cmo_get_page_size(); + long rc = 0; + int i; + + for (i = 0; !rc && i < PAGE_SIZE; i += cmo_page_sz) + rc = plpar_hcall_norets(H_PAGE_INIT, H_PAGE_SET_LOANED, vpa + i, 0); + + for (i -= cmo_page_sz; rc && i != 0; i -= cmo_page_sz) + plpar_hcall_norets(H_PAGE_INIT, H_PAGE_SET_ACTIVE, + vpa + i - cmo_page_sz, 0); + + return rc; +} + +static long plpar_page_set_active(unsigned long vpa) +{ + unsigned long cmo_page_sz = cmo_get_page_size(); + long rc = 0; + int i; + + for (i = 0; !rc && i < PAGE_SIZE; i += cmo_page_sz) + rc = plpar_hcall_norets(H_PAGE_INIT, H_PAGE_SET_ACTIVE, vpa + i, 0); + + for (i -= cmo_page_sz; rc && i != 0; i -= cmo_page_sz) + plpar_hcall_norets(H_PAGE_INIT, H_PAGE_SET_LOANED, + vpa + i - cmo_page_sz, 0); + + return rc; +} + /** * cmm_alloc_pages - Allocate pages and mark them as loaned * @nr:number of pages to allocate diff --git a/arch/powerpc/platforms/pseries/pseries.h b/arch/powerpc/platforms/pseries/pseries.h index b1be7b713fe6..1361a9db534b 100644 --- a/arch/powerpc/platforms/pseries/pseries.h +++ b/arch/powerpc/platforms/p
Re: [PATCH V3 1/2] mm: move vma_is_anonymous check within pmd_move_must_withdraw
On 14/11/16 02:00, Aneesh Kumar K.V wrote: > Architectures like ppc64 want to use page table deposit/withraw > even with huge pmd dax entries. Allow arch to override the > vma_is_anonymous check by moving that to pmd_move_must_withdraw > function > I think the changelog can be reworded a bit Independent of whether the vma is for anonymous memory, some arches like ppc64 would like to override pmd_move_must_withdraw(). One option is to encapsulate the vma_is_anonymous() check for general architectures inside pmd_move_must_withdraw() so that is always called and architectures that need unconditional overriding can override this function. ppc64 needs to override the function when the MMU is configured to use hash PTE's. What do you think? Balbir Singh.
Re: [PATCH 1/3] crash: move crashkernel parsing and vmcore related code under CONFIG_CRASH_CORE
On 11/10/16 at 05:27pm, Hari Bathini wrote: > Traditionally, kdump is used to save vmcore in case of a crash. Some > architectures like powerpc can save vmcore using architecture specific > support instead of kexec/kdump mechanism. Such architecture specific > support also needs to reserve memory, to be used by dump capture kernel. > crashkernel parameter can be a reused, for memory reservation, by such > architecture specific infrastructure. > > But currently, code related to vmcoreinfo and parsing of crashkernel > parameter is built under CONFIG_KEXEC_CORE. This patch introduces > CONFIG_CRASH_CORE and moves the above mentioned code under this config, > allowing code reuse without dependency on CONFIG_KEXEC. While here, > removing the multiple definitions of append_elf_note() and final_note() > for one defined under CONFIG_CONFIG_CORE. There is no functional change > with this patch. Can't think of a reason to object. Could it be that do the moving from kexec_core.c to crash_core.c only, then do the arch specific clean up in another patch? Besides there's already a file crash_dump.h, can we reuse that? > > Signed-off-by: Hari Bathini > --- > arch/Kconfig |4 > arch/ia64/kernel/crash.c | 22 -- > arch/powerpc/Kconfig | 10 - > arch/powerpc/include/asm/fadump.h |2 > arch/powerpc/kernel/crash.c|2 > arch/powerpc/kernel/fadump.c | 34 --- > arch/powerpc/kernel/setup-common.c |5 > include/linux/crash_core.h | 75 ++ > include/linux/kexec.h | 63 - > kernel/Makefile|1 > kernel/crash_core.c| 450 > > kernel/kexec_core.c| 435 --- > 12 files changed, 550 insertions(+), 553 deletions(-) > create mode 100644 include/linux/crash_core.h > create mode 100644 kernel/crash_core.c > > diff --git a/arch/Kconfig b/arch/Kconfig > index 659bdd0..4ad34b9 100644 > --- a/arch/Kconfig > +++ b/arch/Kconfig > @@ -2,7 +2,11 @@ > # General architecture dependent options > # > > +config CRASH_CORE > + bool > + > config KEXEC_CORE > + select CRASH_CORE > bool > > config OPROFILE > diff --git a/arch/ia64/kernel/crash.c b/arch/ia64/kernel/crash.c > index 2955f35..75859a0 100644 > --- a/arch/ia64/kernel/crash.c > +++ b/arch/ia64/kernel/crash.c > @@ -27,28 +27,6 @@ static int kdump_freeze_monarch; > static int kdump_on_init = 1; > static int kdump_on_fatal_mca = 1; > > -static inline Elf64_Word > -*append_elf_note(Elf64_Word *buf, char *name, unsigned type, void *data, > - size_t data_len) > -{ > - struct elf_note *note = (struct elf_note *)buf; > - note->n_namesz = strlen(name) + 1; > - note->n_descsz = data_len; > - note->n_type = type; > - buf += (sizeof(*note) + 3)/4; > - memcpy(buf, name, note->n_namesz); > - buf += (note->n_namesz + 3)/4; > - memcpy(buf, data, data_len); > - buf += (data_len + 3)/4; > - return buf; > -} > - > -static void > -final_note(void *buf) > -{ > - memset(buf, 0, sizeof(struct elf_note)); > -} > - > extern void ia64_dump_cpu_regs(void *); > > static DEFINE_PER_CPU(struct elf_prstatus, elf_prstatus); > diff --git a/arch/powerpc/Kconfig b/arch/powerpc/Kconfig > index 65fba4c..644703f 100644 > --- a/arch/powerpc/Kconfig > +++ b/arch/powerpc/Kconfig > @@ -479,21 +479,23 @@ config RELOCATABLE > load address of the kernel (eg. u-boot/mkimage). > > config CRASH_DUMP > - bool "Build a kdump crash kernel" > + bool "Build a dump capture kernel" > depends on PPC64 || 6xx || FSL_BOOKE || (44x && !SMP) > select RELOCATABLE if (PPC64 && !COMPILE_TEST) || 44x || FSL_BOOKE > help > - Build a kernel suitable for use as a kdump capture kernel. > + Build a kernel suitable for use as a dump capture kernel. > The same kernel binary can be used as production kernel and dump > capture kernel. > > config FA_DUMP > bool "Firmware-assisted dump" > - depends on PPC64 && PPC_RTAS && CRASH_DUMP && KEXEC > + depends on PPC64 && PPC_RTAS > + select CRASH_CORE > + select CRASH_DUMP > help > A robust mechanism to get reliable kernel crash dump with > assistance from firmware. This approach does not use kexec, > - instead firmware assists in booting the kdump kernel > + instead firmware assists in booting the capture kernel > while preserving memory contents. Firmware-assisted dump > is meant to be a kdump replacement offering robustness and > speed not possible without system firmware assistance. > diff --git a/arch/powerpc/include/asm/fadump.h > b/arch/powerpc/include/asm/fadump.h > index 0031806..60b9108 100644 > --- a/arch/powerpc/include/asm/fadump.h > +++ b/arch/powerpc/include/asm/fadump.h > @@ -73,6 +73,8 @@ > reg_entry++;
[powerpc v4 3/3] Enable storage keys for radix - user mode execution
ISA 3 defines new encoded access authority that allows instruction access prevention in privileged mode and allows normal access to problem state. This patch just enables IAMR (Instruction Authority Mask Register), enabling AMR would require more work. I've tested this with a buggy driver and a simple payload. The payload is specific to the build I've tested. Signed-off-by: Balbir Singh --- arch/powerpc/mm/pgtable-radix.c | 20 1 file changed, 20 insertions(+) diff --git a/arch/powerpc/mm/pgtable-radix.c b/arch/powerpc/mm/pgtable-radix.c index 7343573..5c90bcd 100644 --- a/arch/powerpc/mm/pgtable-radix.c +++ b/arch/powerpc/mm/pgtable-radix.c @@ -339,6 +339,24 @@ static void __init radix_init_amor(void) mtspr(SPRN_AMOR, amor); } +/* + * For radix page tables we setup, the IAMR values as follows + * IMAR = 0100...00 (key 0 is set to 1) + * AMR, UAMR, UAMOR are not affected + */ +static void __init radix_init_iamr(void) +{ + unsigned long iamr_mask = 0x4000; + unsigned long iamr = mfspr(SPRN_IAMR); + + if (cpu_has_feature(CPU_FTR_POWER9_DD1)) + return; + + iamr = iamr_mask; + + mtspr(SPRN_IAMR, iamr); +} + void __init radix__early_init_mmu(void) { unsigned long lpcr; @@ -398,6 +416,7 @@ void __init radix__early_init_mmu(void) radix_init_amor(); } + radix_init_iamr(); radix_init_pgtable(); } @@ -415,6 +434,7 @@ void radix__early_init_mmu_secondary(void) __pa(partition_tb) | (PATB_SIZE_SHIFT - 12)); radix_init_amor(); } + radix_init_iamr(); } void radix__mmu_cleanup_all(void) -- 2.5.5
[powerpc v4 2/3] Detect instruction fetch denied and report
ISA 3 allows for prevention of instruction fetch and execution of user mode pages. If such an error occurs, SRR1 bit 35 reports the error. We catch and report the error in do_page_fault() Signed-off-by: Balbir Singh --- arch/powerpc/mm/fault.c | 4 1 file changed, 4 insertions(+) diff --git a/arch/powerpc/mm/fault.c b/arch/powerpc/mm/fault.c index d0b137d..1e7ff7b 100644 --- a/arch/powerpc/mm/fault.c +++ b/arch/powerpc/mm/fault.c @@ -404,6 +404,10 @@ int do_page_fault(struct pt_regs *regs, unsigned long address, (cpu_has_feature(CPU_FTR_NOEXECUTE) || !(vma->vm_flags & (VM_READ | VM_WRITE goto bad_area; + + if (regs->msr & SRR1_ISI_N_OR_G) + goto bad_area; + #ifdef CONFIG_PPC_STD_MMU /* * protfault should only happen due to us -- 2.5.5
[powerpc v4 1/3] Setup AMOR in HV mode
AMOR should be setup in HV mode, we set it up once and let the generic kernel handle IAMR. This patch is used to enable storage keys in a following patch as defined in ISA 3 Reported-by: Aneesh Kumar K.V Signed-off-by: Balbir Singh --- arch/powerpc/mm/pgtable-radix.c | 21 + 1 file changed, 21 insertions(+) diff --git a/arch/powerpc/mm/pgtable-radix.c b/arch/powerpc/mm/pgtable-radix.c index ed7bddc..7343573 100644 --- a/arch/powerpc/mm/pgtable-radix.c +++ b/arch/powerpc/mm/pgtable-radix.c @@ -320,6 +320,25 @@ static void update_hid_for_radix(void) cpu_relax(); } +/* + * In HV mode, we init AMOR so that the hypervisor + * and guest can setup IMAR, enable key 0 and set + * it to 1 + * AMOR = 110000 (Mask for key 0 is 11) + */ +static void __init radix_init_amor(void) +{ + unsigned long amor_mask = 0xc000; + unsigned long amor = mfspr(SPRN_AMOR); + + if (cpu_has_feature(CPU_FTR_POWER9_DD1)) + return; + + amor = amor_mask; + + mtspr(SPRN_AMOR, amor); +} + void __init radix__early_init_mmu(void) { unsigned long lpcr; @@ -376,6 +395,7 @@ void __init radix__early_init_mmu(void) lpcr = mfspr(SPRN_LPCR); mtspr(SPRN_LPCR, lpcr | LPCR_UPRT | LPCR_HR); radix_init_partition_table(); + radix_init_amor(); } radix_init_pgtable(); @@ -393,6 +413,7 @@ void radix__early_init_mmu_secondary(void) mtspr(SPRN_PTCR, __pa(partition_tb) | (PATB_SIZE_SHIFT - 12)); + radix_init_amor(); } } -- 2.5.5
[powerpc v4 0/3] Enable IAMR storage keys for radix
The first patch sets up AMOR in hypervisor mode. AMOR needs to be setup before IAMR (details of AMOR/IAMR in each patch). The second patch enables detection of exceptions generated due to instruction fetch violations caused and OOPSs' the task. The third patch enables IAMR for both hypervisor and guest kernels. I've tested with patch series with a sample hack and payload. Chris Smart helped with the series, reviewing and providing valuable feedback Cc: Chris Smart Cc: Benjamin Herrenschmidt Cc: Michael Neuling Cc: Aneesh Kumar K.V Cc: Paul Mackerras Changelog Enable both primary and secondary MMU's (BUG FIX) Make the check for instruction violations common (SRR1_ISI_N_OR_G) Balbir Singh (3): Setup AMOR in HV mode Detect instruction fetch denied and report Enable storage keys for radix - user mode execution arch/powerpc/mm/fault.c | 4 arch/powerpc/mm/pgtable-radix.c | 41 + 2 files changed, 45 insertions(+) -- 2.5.5
Re: [PATCH V3 2/2] mm: THP page cache support for ppc64
On Sun, Nov 13, 2016 at 08:30:25PM +0530, Aneesh Kumar K.V wrote: > Add arch specific callback in the generic THP page cache code that will > deposit and withdarw preallocated page table. Archs like ppc64 use > this preallocated table to store the hash pte slot information. > > Testing: > kernel build of the patch series on tmpfs mounted with option huge=always > > The related thp stat: > thp_fault_alloc 72939 > thp_fault_fallback 60547 > thp_collapse_alloc 603 > thp_collapse_alloc_failed 0 > thp_file_alloc 253763 > thp_file_mapped 4251 > thp_split_page 51518 > thp_split_page_failed 1 > thp_deferred_split_page 73566 > thp_split_pmd 665 > thp_zero_page_alloc 3 > thp_zero_page_alloc_failed 0 > > Signed-off-by: Aneesh Kumar K.V One nit-pick below, but otherwise Acked-by: Kirill A. Shutemov > @@ -2975,6 +3004,13 @@ static int do_set_pmd(struct fault_env *fe, struct > page *page) > ret = 0; > count_vm_event(THP_FILE_MAPPED); > out: > + /* > + * If we are going to fallback to pte mapping, do a > + * withdraw with pmd lock held. > + */ > + if (arch_needs_pgtable_deposit() && (ret == VM_FAULT_FALLBACK)) Parenthesis are redundant around ret check. > + fe->prealloc_pte = pgtable_trans_huge_withdraw(vma->vm_mm, > +fe->pmd); > spin_unlock(fe->ptl); > return ret; > } -- Kirill A. Shutemov
[PATCH] Rename early_init_mmu to early_init_mmu_primary
It helps clarify that the action taken is just for the primary CPU and more action might be required for in the secondaries in early_init_mmu_secondary. This patch does not introduce a functional change Signed-off-by: Balbir Singh --- arch/powerpc/include/asm/book3s/64/mmu.h | 10 +- arch/powerpc/include/asm/mmu.h | 2 +- arch/powerpc/kernel/setup_32.c | 2 +- arch/powerpc/kernel/setup_64.c | 2 +- arch/powerpc/mm/hash_utils_64.c | 2 +- arch/powerpc/mm/pgtable-radix.c | 2 +- arch/powerpc/mm/tlb_hash32.c | 2 +- arch/powerpc/mm/tlb_nohash.c | 4 ++-- 8 files changed, 13 insertions(+), 13 deletions(-) diff --git a/arch/powerpc/include/asm/book3s/64/mmu.h b/arch/powerpc/include/asm/book3s/64/mmu.h index 8afb0e0..c60a629 100644 --- a/arch/powerpc/include/asm/book3s/64/mmu.h +++ b/arch/powerpc/include/asm/book3s/64/mmu.h @@ -104,13 +104,13 @@ void mmu_early_init_devtree(void); void hash__early_init_devtree(void); void radix__early_init_devtree(void); extern void radix_init_native(void); -extern void hash__early_init_mmu(void); -extern void radix__early_init_mmu(void); -static inline void early_init_mmu(void) +extern void hash__early_init_mmu_primary(void); +extern void radix__early_init_mmu_primary(void); +static inline void early_init_mmu_primary(void) { if (radix_enabled()) - return radix__early_init_mmu(); - return hash__early_init_mmu(); + return radix__early_init_mmu_primary(); + return hash__early_init_mmu_primary(); } extern void hash__early_init_mmu_secondary(void); extern void radix__early_init_mmu_secondary(void); diff --git a/arch/powerpc/include/asm/mmu.h b/arch/powerpc/include/asm/mmu.h index e883683..3f8226c 100644 --- a/arch/powerpc/include/asm/mmu.h +++ b/arch/powerpc/include/asm/mmu.h @@ -284,7 +284,7 @@ static inline bool early_radix_enabled(void) #ifndef __ASSEMBLY__ /* MMU initialization */ -extern void early_init_mmu(void); +extern void early_init_mmu_primary(void); extern void early_init_mmu_secondary(void); extern void setup_initial_memory_limit(phys_addr_t first_memblock_base, phys_addr_t first_memblock_size); diff --git a/arch/powerpc/kernel/setup_32.c b/arch/powerpc/kernel/setup_32.c index 5fe7918..e1ee6d6 100644 --- a/arch/powerpc/kernel/setup_32.c +++ b/arch/powerpc/kernel/setup_32.c @@ -118,7 +118,7 @@ notrace void __init machine_init(u64 dt_ptr) /* Do some early initialization based on the flat device tree */ early_init_devtree(__va(dt_ptr)); - early_init_mmu(); + early_init_mmu_primary(); setup_kdump_trampoline(); } diff --git a/arch/powerpc/kernel/setup_64.c b/arch/powerpc/kernel/setup_64.c index c3e1290..5d1ba51a 100644 --- a/arch/powerpc/kernel/setup_64.c +++ b/arch/powerpc/kernel/setup_64.c @@ -303,7 +303,7 @@ void __init early_setup(unsigned long dt_ptr) setup_feature_keys(); /* Initialize the hash table or TLB handling */ - early_init_mmu(); + early_init_mmu_primary(); /* * At this point, we can let interrupts switch to virtual mode diff --git a/arch/powerpc/mm/hash_utils_64.c b/arch/powerpc/mm/hash_utils_64.c index 44d3c3a..e0acd6d 100644 --- a/arch/powerpc/mm/hash_utils_64.c +++ b/arch/powerpc/mm/hash_utils_64.c @@ -964,7 +964,7 @@ void __init hash__early_init_devtree(void) htab_scan_page_sizes(); } -void __init hash__early_init_mmu(void) +void __init hash__early_init_mmu_primary(void) { htab_init_page_sizes(); diff --git a/arch/powerpc/mm/pgtable-radix.c b/arch/powerpc/mm/pgtable-radix.c index ed7bddc..968e29c 100644 --- a/arch/powerpc/mm/pgtable-radix.c +++ b/arch/powerpc/mm/pgtable-radix.c @@ -320,7 +320,7 @@ static void update_hid_for_radix(void) cpu_relax(); } -void __init radix__early_init_mmu(void) +void __init radix__early_init_mmu_primary(void) { unsigned long lpcr; diff --git a/arch/powerpc/mm/tlb_hash32.c b/arch/powerpc/mm/tlb_hash32.c index 702d768..c006f04 100644 --- a/arch/powerpc/mm/tlb_hash32.c +++ b/arch/powerpc/mm/tlb_hash32.c @@ -168,6 +168,6 @@ void flush_tlb_range(struct vm_area_struct *vma, unsigned long start, } EXPORT_SYMBOL(flush_tlb_range); -void __init early_init_mmu(void) +void __init early_init_mmu_primary(void) { } diff --git a/arch/powerpc/mm/tlb_nohash.c b/arch/powerpc/mm/tlb_nohash.c index 050badc..e704b33 100644 --- a/arch/powerpc/mm/tlb_nohash.c +++ b/arch/powerpc/mm/tlb_nohash.c @@ -718,7 +718,7 @@ static void __init early_mmu_set_memory_limit(void) } /* boot cpu only */ -void __init early_init_mmu(void) +void __init early_init_mmu_primary(void) { early_init_mmu_global(); early_init_this_mmu(); @@ -770,7 +770,7 @@ void setup_initial_memory_limit(phys_addr_t first_memblock_base, memblock_set_current_limit(first_memblock_base + ppc64_rma_size); } #else /* ! CONFIG_PPC64
[PATCH V4 2/2] powerpc/kvm: Update kvmppc_set_arch_compat() for ISA v3.00
The function kvmppc_set_arch_compat() is used to determine the value of the processor compatibility register (PCR) for a guest running in a given compatibility mode. There is currently no support for v3.00 of the ISA. Add support for v3.00 of the ISA which adds an ISA v2.07 compatilibity mode to the PCR. We also add a check to ensure the processor we are running on is capable of emulating the chosen processor (for example a POWER7 cannot emulate a POWER8, similarly with a POWER8 and a POWER9). Based on work by: Paul Mackerras Signed-off-by: Suraj Jitindar Singh --- arch/powerpc/kvm/book3s_hv.c | 38 +++--- 1 file changed, 23 insertions(+), 15 deletions(-) diff --git a/arch/powerpc/kvm/book3s_hv.c b/arch/powerpc/kvm/book3s_hv.c index 3686471..5d83ecb 100644 --- a/arch/powerpc/kvm/book3s_hv.c +++ b/arch/powerpc/kvm/book3s_hv.c @@ -301,39 +301,47 @@ static void kvmppc_set_pvr_hv(struct kvm_vcpu *vcpu, u32 pvr) static int kvmppc_set_arch_compat(struct kvm_vcpu *vcpu, u32 arch_compat) { - unsigned long pcr = 0; + unsigned long host_pcr_bit = 0, guest_pcr_bit = 0; struct kvmppc_vcore *vc = vcpu->arch.vcore; + /* We can (emulate) our own architecture version and anything older */ + if (cpu_has_feature(CPU_FTR_ARCH_300)) + host_pcr_bit = PCR_ARCH_300; + else if (cpu_has_feature(CPU_FTR_ARCH_207S)) + host_pcr_bit = PCR_ARCH_207; + else if (cpu_has_feature(CPU_FTR_ARCH_206)) + host_pcr_bit = PCR_ARCH_206; + else + host_pcr_bit = PCR_ARCH_205; + + /* Determine lowest PCR bit needed to run guest in given PVR level */ if (arch_compat) { switch (arch_compat) { case PVR_ARCH_205: - /* -* If an arch bit is set in PCR, all the defined -* higher-order arch bits also have to be set. -*/ - pcr = PCR_ARCH_206 | PCR_ARCH_205; + guest_pcr_bit = PCR_ARCH_205; break; case PVR_ARCH_206: case PVR_ARCH_206p: - pcr = PCR_ARCH_206; + guest_pcr_bit = PCR_ARCH_206; break; case PVR_ARCH_207: + guest_pcr_bit = PCR_ARCH_207; + break; + case PVR_ARCH_300: + guest_pcr_bit = PCR_ARCH_300; break; default: return -EINVAL; } - - if (!cpu_has_feature(CPU_FTR_ARCH_207S)) { - /* POWER7 can't emulate POWER8 */ - if (!(pcr & PCR_ARCH_206)) - return -EINVAL; - pcr &= ~PCR_ARCH_206; - } } + /* Check requested PCR bits don't exceed our capabilities */ + if (guest_pcr_bit > host_pcr_bit) + return -EINVAL; + spin_lock(&vc->lock); vc->arch_compat = arch_compat; - vc->pcr = pcr; + vc->pcr = host_pcr_bit - guest_pcr_bit; spin_unlock(&vc->lock); return 0; -- 2.5.5
[PATCH V4 1/2] powerpc: Define new ISA v3.00 logical PVR value and PCR register value
ISA 3.00 adds the logical PVR value 0x0f05, so add a definition for this. Define PCR_ARCH_207 to reflect ISA 2.07 compatibility mode in the processor compatibility register (PCR). The next patch changes the algorithm used to determine the required PCR value in the function kvmppc_set_arch_compat(). We use the PCR_ARCH_XXX bits to specify and determine the compatibility level which we want to emulate as well as the compatibility levels which the host is capable of emulating. To show that we can emulate a v3.00 guest (which is actually a v3.00 host with no compatility bits set, at the moment) we need a PCR_ARCH_300 bit to represent this, however currently there is no such bit defined by the ISA. Thus we define a 'dummy' v3.00 compat bit to be used. Signed-off-by: Suraj Jitindar Singh --- arch/powerpc/include/asm/reg.h | 11 +++ 1 file changed, 11 insertions(+) diff --git a/arch/powerpc/include/asm/reg.h b/arch/powerpc/include/asm/reg.h index 9cd4e8c..30d897a 100644 --- a/arch/powerpc/include/asm/reg.h +++ b/arch/powerpc/include/asm/reg.h @@ -377,6 +377,16 @@ #define PCR_VEC_DIS (1ul << (63-0)) /* Vec. disable (bit NA since POWER8) */ #define PCR_VSX_DIS (1ul << (63-1)) /* VSX disable (bit NA since POWER8) */ #define PCR_TM_DIS (1ul << (63-2)) /* Trans. memory disable (POWER8) */ +/* + * These bits are used in the function kvmppc_set_arch_compat() to specify and + * determine both the compatibility level which we want to emulate and the + * compatibility level which the host is capable of emulating. Thus we need a + * bit to show that we are capable of emulating an ISA v3.00 guest however as + * yet no such bit has been defined in the PCR register. Thus we have to define + * a 'dummy' value to be used. + */ +#define PCR_ARCH_300 0x10/* Dummy Architecture 3.00 */ +#define PCR_ARCH_207 0x8 /* Architecture 2.07 */ #define PCR_ARCH_206 0x4 /* Architecture 2.06 */ #define PCR_ARCH_205 0x2 /* Architecture 2.05 */ #defineSPRN_HEIR 0x153 /* Hypervisor Emulated Instruction Register */ @@ -1218,6 +1228,7 @@ #define PVR_ARCH_206 0x0f03 #define PVR_ARCH_206p 0x0f13 #define PVR_ARCH_207 0x0f04 +#define PVR_ARCH_300 0x0f05 /* Macros for setting and retrieving special purpose registers */ #ifndef __ASSEMBLY__ -- 2.5.5
[PATCH V4 0/2] powerpc: add support for ISA v2.07 compat level
Version v3.00 of the ISA added a new compat level to the processor compatibility register (PCR), an ISA v2.07 compatibility mode. Upstream QEMU already supports this so it may as well go into the kernel now. Change Log: V1 -> V2: - Reworked logic to set and mask the PCR, no functional change V2 -> V3: - Reworked logic again, no functional change V3 -> V4: - Added a comment in the first patch to clarify why a 'dummy' PCR v3.00 value is needed Suraj Jitindar Singh (2): powerpc: Define new ISA v3.00 logical PVR value and PCR register value powerpc/kvm: Update kvmppc_set_arch_compat() for ISA v3.00 arch/powerpc/include/asm/reg.h | 11 +++ arch/powerpc/kvm/book3s_hv.c | 38 +++--- 2 files changed, 34 insertions(+), 15 deletions(-) -- 2.5.5
Re: [PATCH] powerpc/64: Simplify adaptation to new ISA v3.00 HPTE format
On 11/11/16 16:55, Paul Mackerras wrote: > This changes the way that we support the new ISA v3.00 HPTE format. > Instead of adapting everything that uses HPTE values to handle either > the old format or the new format, depending on which CPU we are on, > we now convert explicitly between old and new formats if necessary > in the low-level routines that actually access HPTEs in memory. > This limits the amount of code that needs to know about the new > format and makes the conversions explicit. This is OK because the > old format contains all the information that is in the new format. > > This also fixes operation under a hypervisor, because the H_ENTER > hypercall (and other hypercalls that deal with HPTEs) will continue > to require the HPTE value to be supplied in the old format. At > present the kernel will not boot in HPT mode on POWER9 under a > hypervisor. > > This fixes and partially reverts commit 50de596de8be > ("powerpc/mm/hash: Add support for Power9 Hash", 2016-04-29). > > Fixes: 50de596de8be > Signed-off-by: Paul Mackerras > --- > arch/powerpc/include/asm/book3s/64/mmu-hash.h | 47 > ++- > arch/powerpc/mm/hash_native_64.c | 30 + > arch/powerpc/platforms/ps3/htab.c | 2 +- > arch/powerpc/platforms/pseries/lpar.c | 2 +- > 4 files changed, 65 insertions(+), 16 deletions(-) > > diff --git a/arch/powerpc/include/asm/book3s/64/mmu-hash.h > b/arch/powerpc/include/asm/book3s/64/mmu-hash.h > index e407af2..2e6a823 100644 > --- a/arch/powerpc/include/asm/book3s/64/mmu-hash.h > +++ b/arch/powerpc/include/asm/book3s/64/mmu-hash.h > @@ -70,7 +70,9 @@ > > #define HPTE_V_SSIZE_SHIFT 62 > #define HPTE_V_AVPN_SHIFT7 > +#define HPTE_V_COMMON_BITS ASM_CONST(0x000f) > #define HPTE_V_AVPN ASM_CONST(0x3f80) > +#define HPTE_V_AVPN_3_0 ASM_CONST(0x000fff80) > #define HPTE_V_AVPN_VAL(x) (((x) & HPTE_V_AVPN) >> HPTE_V_AVPN_SHIFT) > #define HPTE_V_COMPARE(x,y) (!(((x) ^ (y)) & 0xff80UL)) > #define HPTE_V_BOLTEDASM_CONST(0x0010) > @@ -80,14 +82,16 @@ > #define HPTE_V_VALID ASM_CONST(0x0001) > > /* > - * ISA 3.0 have a different HPTE format. > + * ISA 3.0 has a different HPTE format. > */ > #define HPTE_R_3_0_SSIZE_SHIFT 58 > +#define HPTE_R_3_0_SSIZE_MASK(3ull << HPTE_R_3_0_SSIZE_SHIFT) > #define HPTE_R_PP0 ASM_CONST(0x8000) > #define HPTE_R_TSASM_CONST(0x4000) > #define HPTE_R_KEY_HIASM_CONST(0x3000) > #define HPTE_R_RPN_SHIFT 12 > #define HPTE_R_RPN ASM_CONST(0x0000) > +#define HPTE_R_RPN_3_0 ASM_CONST(0x01fff000) > #define HPTE_R_PPASM_CONST(0x0003) > #define HPTE_R_PPP ASM_CONST(0x8003) > #define HPTE_R_N ASM_CONST(0x0004) > @@ -316,12 +320,43 @@ static inline unsigned long hpte_encode_avpn(unsigned > long vpn, int psize, >*/ > v = (vpn >> (23 - VPN_SHIFT)) & ~(mmu_psize_defs[psize].avpnm); > v <<= HPTE_V_AVPN_SHIFT; > - if (!cpu_has_feature(CPU_FTR_ARCH_300)) > - v |= ((unsigned long) ssize) << HPTE_V_SSIZE_SHIFT; > + v |= ((unsigned long) ssize) << HPTE_V_SSIZE_SHIFT; > return v; > } > > /* > + * ISA v3.0 defines a new HPTE format, which differs from the old > + * format in having smaller AVPN and ARPN fields, and the B field > + * in the second dword instead of the first. > + */ > +static inline unsigned long hpte_old_to_new_v(unsigned long v) > +{ > + /* trim AVPN, drop B */ > + return v & HPTE_V_COMMON_BITS; > +} > + > +static inline unsigned long hpte_old_to_new_r(unsigned long v, unsigned long > r) > +{ > + /* move B field from 1st to 2nd dword, trim ARPN */ > + return (r & ~HPTE_R_3_0_SSIZE_MASK) | > + (((v) >> HPTE_V_SSIZE_SHIFT) << HPTE_R_3_0_SSIZE_SHIFT); > +} > + > +static inline unsigned long hpte_new_to_old_v(unsigned long v, unsigned long > r) > +{ > + /* insert B field */ > + return (v & HPTE_V_COMMON_BITS) | > + ((r & HPTE_R_3_0_SSIZE_MASK) << > + (HPTE_V_SSIZE_SHIFT - HPTE_R_3_0_SSIZE_SHIFT)); > +} > + > +static inline unsigned long hpte_new_to_old_r(unsigned long r) > +{ > + /* clear out B field */ > + return r & ~HPTE_R_3_0_SSIZE_MASK; > +} > + I wonder if we can encapsulate the name and ISA version check inside the helpers and like Aneesh suggested call them as newv3 as opposed to new_? > +/* > * This function sets the AVPN and L fields of the HPTE appropriately > * using the base page size and actual page size. > */ > @@ -341,12 +376,8 @@ static inline unsigned long hpte_encode_v(unsigned long > vpn, int base_psize, > * aligned for the requested page size > */ > static inline unsigned long hpte_encode_r(unsigned long pa, int
[PATCH 0/3] soc: avoid module usage in non-modular code
This series of commits is a part of a larger project to ensure people don't reference modular support functions in non-modular code. Overall there was roughly 5k lines of dead code in the kernel due to this. So far we've fixed several areas, like tty, x86, net, gpio ... and we continue to work on other areas. There are several reasons to not use module support for code that can never be built as a module, but the big ones are: (1) it is easy to accidentally code up unused module_exit and remove code (2) it can be misleading when reading the source, thinking it can be modular when the Makefile and/or Kconfig prohibit it (3) it requires the include of the module.h header file which in turn includes nearly everything else. Two of the changes are essentially source only -- the resuting module will be binary equivalent. Only the FSL driver has unused code in addition to the use of modular macros that get converted. Note the FSL SOC driver just appeared in linux-next and so this series won't apply on Linus' master branch. These commits were applied to linux-next and build tested there. Paul. --- Cc: Alexandre Courbot Cc: Arnd Bergmann Cc: Maxime Ripard Cc: Scott Wood Cc: Stephen Warren Cc: Thierry Reding Cc: Ulf Hansson Cc: Yangbo Lu Cc: linux-arm-ker...@lists.infradead.org Cc: linuxppc-dev@lists.ozlabs.org Cc: linux-te...@vger.kernel.org Paul Gortmaker (3): soc: sunxi: make sunxi_sram explicitly non-modular soc: tegra: make fuse-tegra explicitly non-modular soc: fsl: make guts driver explicitly non-modular drivers/soc/fsl/guts.c | 17 ++--- drivers/soc/sunxi/sunxi_sram.c | 9 ++--- drivers/soc/tegra/fuse/fuse-tegra.c | 4 ++-- 3 files changed, 6 insertions(+), 24 deletions(-) -- 2.10.1
[PATCH 3/3] soc: fsl: make guts driver explicitly non-modular
The Kconfig currently controlling compilation of this code is: drivers/soc/fsl/Kconfig:config FSL_GUTS drivers/soc/fsl/Kconfig:bool ...meaning that it currently is not being built as a module by anyone. Lets remove the modular code that is essentially orphaned, so that when reading the driver there is no doubt it is builtin-only. We explicitly disallow a driver unbind, since that doesn't have a sensible use case anyway, and it allows us to drop the ".remove" code for non-modular drivers. Since the code was already not using module_init, the init ordering remains unchanged with this commit. Also note that MODULE_DEVICE_TABLE is a no-op for non-modular code. Cc: Scott Wood Cc: Yangbo Lu Cc: Arnd Bergmann Cc: Ulf Hansson Cc: linuxppc-dev@lists.ozlabs.org Cc: linux-arm-ker...@lists.infradead.org Signed-off-by: Paul Gortmaker --- drivers/soc/fsl/guts.c | 17 ++--- 1 file changed, 2 insertions(+), 15 deletions(-) diff --git a/drivers/soc/fsl/guts.c b/drivers/soc/fsl/guts.c index 0ac88263c2d7..b4d2fd9263b2 100644 --- a/drivers/soc/fsl/guts.c +++ b/drivers/soc/fsl/guts.c @@ -11,7 +11,7 @@ #include #include -#include +#include #include #include #include @@ -180,12 +180,6 @@ static int fsl_guts_probe(struct platform_device *pdev) return 0; } -static int fsl_guts_remove(struct platform_device *dev) -{ - soc_device_unregister(soc_dev); - return 0; -} - /* * Table for matching compatible strings, for device tree * guts node, for Freescale QorIQ SOCs. @@ -212,15 +206,14 @@ static const struct of_device_id fsl_guts_of_match[] = { { .compatible = "fsl,ls2080a-dcfg", }, {} }; -MODULE_DEVICE_TABLE(of, fsl_guts_of_match); static struct platform_driver fsl_guts_driver = { .driver = { .name = "fsl-guts", + .suppress_bind_attrs = true, .of_match_table = fsl_guts_of_match, }, .probe = fsl_guts_probe, - .remove = fsl_guts_remove, }; static int __init fsl_guts_init(void) @@ -228,9 +221,3 @@ static int __init fsl_guts_init(void) return platform_driver_register(&fsl_guts_driver); } core_initcall(fsl_guts_init); - -static void __exit fsl_guts_exit(void) -{ - platform_driver_unregister(&fsl_guts_driver); -} -module_exit(fsl_guts_exit); -- 2.10.1
Re: [PATCH net-next v7 03/10] dpaa_eth: add option to use one buffer pool set
From: Madalin Bucur Date: Fri, 11 Nov 2016 10:20:00 +0200 > @@ -8,3 +8,12 @@ menuconfig FSL_DPAA_ETH > supporting the Freescale QorIQ chips. > Depends on Freescale Buffer Manager and Queue Manager > driver and Frame Manager Driver. > + > +if FSL_DPAA_ETH > +config FSL_DPAA_ETH_COMMON_BPOOL > + bool "Use a common buffer pool set for all the interfaces" > + ---help--- > + The DPAA Ethernet netdevices require buffer pools for storing the > buffers > + used by the FMan hardware for reception. One can use a single buffer > pool > + set for all interfaces or a dedicated buffer pool set for each > interface. > +endif # FSL_DPAA_ETH This in no way belongs in Kconfig. If you want to support this, support it wit a run time configuration choice via ethtool flags or similar. Do not use debugfs, do not use sysfs, do not use module options. If you put it in Kconfig, distributions will have to pick one way or another which means that users who want the other choice lose. This never works.
[PATCH V3 2/2] mm: THP page cache support for ppc64
Add arch specific callback in the generic THP page cache code that will deposit and withdarw preallocated page table. Archs like ppc64 use this preallocated table to store the hash pte slot information. Testing: kernel build of the patch series on tmpfs mounted with option huge=always The related thp stat: thp_fault_alloc 72939 thp_fault_fallback 60547 thp_collapse_alloc 603 thp_collapse_alloc_failed 0 thp_file_alloc 253763 thp_file_mapped 4251 thp_split_page 51518 thp_split_page_failed 1 thp_deferred_split_page 73566 thp_split_pmd 665 thp_zero_page_alloc 3 thp_zero_page_alloc_failed 0 Signed-off-by: Aneesh Kumar K.V --- Changes from V2: * Handle page table allocation failures. arch/powerpc/include/asm/book3s/64/pgtable.h | 10 + include/asm-generic/pgtable.h| 3 ++ mm/Kconfig | 6 +-- mm/huge_memory.c | 17 mm/khugepaged.c | 21 +- mm/memory.c | 60 +++- 6 files changed, 100 insertions(+), 17 deletions(-) diff --git a/arch/powerpc/include/asm/book3s/64/pgtable.h b/arch/powerpc/include/asm/book3s/64/pgtable.h index 700301bc5190..0ebfbc8f0449 100644 --- a/arch/powerpc/include/asm/book3s/64/pgtable.h +++ b/arch/powerpc/include/asm/book3s/64/pgtable.h @@ -1021,6 +1021,16 @@ static inline int pmd_move_must_withdraw(struct spinlock *new_pmd_ptl, */ return true; } + + +#define arch_needs_pgtable_deposit arch_needs_pgtable_deposit +static inline bool arch_needs_pgtable_deposit(void) +{ + if (radix_enabled()) + return false; + return true; +} + #endif /* CONFIG_TRANSPARENT_HUGEPAGE */ #endif /* __ASSEMBLY__ */ #endif /* _ASM_POWERPC_BOOK3S_64_PGTABLE_H_ */ diff --git a/include/asm-generic/pgtable.h b/include/asm-generic/pgtable.h index 324990273ad2..e00e3b7cf6a8 100644 --- a/include/asm-generic/pgtable.h +++ b/include/asm-generic/pgtable.h @@ -653,6 +653,9 @@ static inline pmd_t pmd_read_atomic(pmd_t *pmdp) } #endif +#ifndef arch_needs_pgtable_deposit +#define arch_needs_pgtable_deposit() (false) +#endif /* * This function is meant to be used by sites walking pagetables with * the mmap_sem hold in read mode to protect against MADV_DONTNEED and diff --git a/mm/Kconfig b/mm/Kconfig index be0ee11fa0d9..0a279d399722 100644 --- a/mm/Kconfig +++ b/mm/Kconfig @@ -447,13 +447,9 @@ choice benefit. endchoice -# -# We don't deposit page tables on file THP mapping, -# but Power makes use of them to address MMU quirk. -# config TRANSPARENT_HUGE_PAGECACHE def_bool y - depends on TRANSPARENT_HUGEPAGE && !PPC + depends on TRANSPARENT_HUGEPAGE # # UP and nommu archs use km based percpu allocator diff --git a/mm/huge_memory.c b/mm/huge_memory.c index 54f265ec902e..a6f1e4443adc 100644 --- a/mm/huge_memory.c +++ b/mm/huge_memory.c @@ -1377,6 +1377,15 @@ bool madvise_free_huge_pmd(struct mmu_gather *tlb, struct vm_area_struct *vma, return ret; } +static inline void zap_deposited_table(struct mm_struct *mm, pmd_t *pmd) +{ + pgtable_t pgtable; + + pgtable = pgtable_trans_huge_withdraw(mm, pmd); + pte_free(mm, pgtable); + atomic_long_dec(&mm->nr_ptes); +} + int zap_huge_pmd(struct mmu_gather *tlb, struct vm_area_struct *vma, pmd_t *pmd, unsigned long addr) { @@ -1416,6 +1425,8 @@ int zap_huge_pmd(struct mmu_gather *tlb, struct vm_area_struct *vma, atomic_long_dec(&tlb->mm->nr_ptes); add_mm_counter(tlb->mm, MM_ANONPAGES, -HPAGE_PMD_NR); } else { + if (arch_needs_pgtable_deposit()) + zap_deposited_table(tlb->mm, pmd); add_mm_counter(tlb->mm, MM_FILEPAGES, -HPAGE_PMD_NR); } spin_unlock(ptl); @@ -1595,6 +1606,12 @@ static void __split_huge_pmd_locked(struct vm_area_struct *vma, pmd_t *pmd, if (!vma_is_anonymous(vma)) { _pmd = pmdp_huge_clear_flush_notify(vma, haddr, pmd); + /* +* We are going to unmap this huge page. So +* just go ahead and zap it +*/ + if (arch_needs_pgtable_deposit()) + zap_deposited_table(mm, pmd); if (vma_is_dax(vma)) return; page = pmd_page(_pmd); diff --git a/mm/khugepaged.c b/mm/khugepaged.c index 728d7790dc2d..9fb7b275cb63 100644 --- a/mm/khugepaged.c +++ b/mm/khugepaged.c @@ -1240,6 +1240,7 @@ static void retract_page_tables(struct address_space *mapping, pgoff_t pgoff) struct vm_area_struct *vma; unsigned long addr; pmd_t *pmd, _pmd; + bool deposited = false; i_mmap_lock_write(mapping); vma_interval_tree_foreach(vma, &mapping->i_mmap, pgoff, pgoff) { @@ -1264,10 +1265,26 @@ static
[PATCH V3 1/2] mm: move vma_is_anonymous check within pmd_move_must_withdraw
Architectures like ppc64 want to use page table deposit/withraw even with huge pmd dax entries. Allow arch to override the vma_is_anonymous check by moving that to pmd_move_must_withdraw function Acked-by: Kirill A. Shutemov Signed-off-by: Aneesh Kumar K.V --- arch/powerpc/include/asm/book3s/64/pgtable.h | 3 ++- include/asm-generic/pgtable.h| 12 mm/huge_memory.c | 18 -- 3 files changed, 18 insertions(+), 15 deletions(-) diff --git a/arch/powerpc/include/asm/book3s/64/pgtable.h b/arch/powerpc/include/asm/book3s/64/pgtable.h index 9fd77f8794a0..700301bc5190 100644 --- a/arch/powerpc/include/asm/book3s/64/pgtable.h +++ b/arch/powerpc/include/asm/book3s/64/pgtable.h @@ -1009,7 +1009,8 @@ static inline void pmdp_huge_split_prepare(struct vm_area_struct *vma, #define pmd_move_must_withdraw pmd_move_must_withdraw struct spinlock; static inline int pmd_move_must_withdraw(struct spinlock *new_pmd_ptl, -struct spinlock *old_pmd_ptl) +struct spinlock *old_pmd_ptl, +struct vm_area_struct *vma) { if (radix_enabled()) return false; diff --git a/include/asm-generic/pgtable.h b/include/asm-generic/pgtable.h index c4f8fd2fd384..324990273ad2 100644 --- a/include/asm-generic/pgtable.h +++ b/include/asm-generic/pgtable.h @@ -653,18 +653,6 @@ static inline pmd_t pmd_read_atomic(pmd_t *pmdp) } #endif -#ifndef pmd_move_must_withdraw -static inline int pmd_move_must_withdraw(spinlock_t *new_pmd_ptl, -spinlock_t *old_pmd_ptl) -{ - /* -* With split pmd lock we also need to move preallocated -* PTE page table if new_pmd is on different PMD page table. -*/ - return new_pmd_ptl != old_pmd_ptl; -} -#endif - /* * This function is meant to be used by sites walking pagetables with * the mmap_sem hold in read mode to protect against MADV_DONTNEED and diff --git a/mm/huge_memory.c b/mm/huge_memory.c index cdcd25cb30fe..54f265ec902e 100644 --- a/mm/huge_memory.c +++ b/mm/huge_memory.c @@ -1424,6 +1424,21 @@ int zap_huge_pmd(struct mmu_gather *tlb, struct vm_area_struct *vma, return 1; } +#ifndef pmd_move_must_withdraw +static inline int pmd_move_must_withdraw(spinlock_t *new_pmd_ptl, +spinlock_t *old_pmd_ptl, +struct vm_area_struct *vma) +{ + /* +* With split pmd lock we also need to move preallocated +* PTE page table if new_pmd is on different PMD page table. +* +* We also don't deposit and withdraw tables for file pages. +*/ + return (new_pmd_ptl != old_pmd_ptl) && vma_is_anonymous(vma); +} +#endif + bool move_huge_pmd(struct vm_area_struct *vma, unsigned long old_addr, unsigned long new_addr, unsigned long old_end, pmd_t *old_pmd, pmd_t *new_pmd) @@ -1458,8 +1473,7 @@ bool move_huge_pmd(struct vm_area_struct *vma, unsigned long old_addr, pmd = pmdp_huge_get_and_clear(mm, old_addr, old_pmd); VM_BUG_ON(!pmd_none(*new_pmd)); - if (pmd_move_must_withdraw(new_ptl, old_ptl) && - vma_is_anonymous(vma)) { + if (pmd_move_must_withdraw(new_ptl, old_ptl, vma)) { pgtable_t pgtable; pgtable = pgtable_trans_huge_withdraw(mm, old_pmd); pgtable_trans_huge_deposit(mm, new_pmd, pgtable); -- 2.10.2