[PATCH] include/kcore: Remove left-over instances of 'kclist_add_remap()'
Commit bf904d2762ee ("x86/pti/64: Remove the SYSCALL64 entry trampoline") removed the sole usage of 'kclist_add_remap()' from 'arch/x86/mm/cpu_entry_area.c', but it did not remove the left-over definition from the include file. Fix the same. Cc: James Morse Cc: Boris Petkov Cc: Ingo Molnar Cc: Thomas Gleixner Cc: Michael Ellerman Cc: Dave Anderson Cc: Dave Young Cc: x...@kernel.org Cc: linuxppc-dev@lists.ozlabs.org Cc: linux-arm-ker...@lists.infradead.org Cc: linux-ker...@vger.kernel.org Cc: ke...@lists.infradead.org Signed-off-by: Bhupesh Sharma --- include/linux/kcore.h | 11 --- 1 file changed, 11 deletions(-) diff --git a/include/linux/kcore.h b/include/linux/kcore.h index c843f4a9c512..da676cdbd727 100644 --- a/include/linux/kcore.h +++ b/include/linux/kcore.h @@ -38,12 +38,6 @@ struct vmcoredd_node { #ifdef CONFIG_PROC_KCORE void __init kclist_add(struct kcore_list *, void *, size_t, int type); -static inline -void kclist_add_remap(struct kcore_list *m, void *addr, void *vaddr, size_t sz) -{ - m->vaddr = (unsigned long)vaddr; - kclist_add(m, addr, sz, KCORE_REMAP); -} extern int __init register_mem_pfn_is_ram(int (*fn)(unsigned long pfn)); #else @@ -51,11 +45,6 @@ static inline void kclist_add(struct kcore_list *new, void *addr, size_t size, int type) { } - -static inline -void kclist_add_remap(struct kcore_list *m, void *addr, void *vaddr, size_t sz) -{ -} #endif #endif /* _LINUX_KCORE_H */ -- 2.7.4
[PATCH v2 3/3] powerpc/mm: print hash info in a helper
Reduce #ifdef mess by defining a helper to print hash info at startup. In the meantime, remove the display of hash table address to reduce leak of non necessary information. Signed-off-by: Christophe Leroy --- v2: added header to avoid sparse warning arch/powerpc/kernel/setup-common.c | 19 +-- arch/powerpc/mm/hash_utils_64.c| 10 ++ arch/powerpc/mm/mmu_decl.h | 5 - arch/powerpc/mm/ppc_mmu_32.c | 9 - 4 files changed, 23 insertions(+), 20 deletions(-) diff --git a/arch/powerpc/kernel/setup-common.c b/arch/powerpc/kernel/setup-common.c index 2e5dfb6e0823..f24a74f7912d 100644 --- a/arch/powerpc/kernel/setup-common.c +++ b/arch/powerpc/kernel/setup-common.c @@ -799,12 +799,6 @@ void arch_setup_pdev_archdata(struct platform_device *pdev) static __init void print_system_info(void) { pr_info("-\n"); -#ifdef CONFIG_PPC_BOOK3S_64 - pr_info("ppc64_pft_size= 0x%llx\n", ppc64_pft_size); -#endif -#ifdef CONFIG_PPC_BOOK3S_32 - pr_info("Hash_size = 0x%lx\n", Hash_size); -#endif pr_info("phys_mem_size = 0x%llx\n", (unsigned long long)memblock_phys_mem_size()); @@ -826,18 +820,7 @@ static __init void print_system_info(void) pr_info("firmware_features = 0x%016lx\n", powerpc_firmware_features); #endif -#ifdef CONFIG_PPC_BOOK3S_64 - if (htab_address) - pr_info("htab_address = 0x%p\n", htab_address); - if (htab_hash_mask) - pr_info("htab_hash_mask= 0x%lx\n", htab_hash_mask); -#endif -#ifdef CONFIG_PPC_BOOK3S_32 - if (Hash) - pr_info("Hash = 0x%p\n", Hash); - if (Hash_mask) - pr_info("Hash_mask = 0x%lx\n", Hash_mask); -#endif + print_system_hash_info(); if (PHYSICAL_START > 0) pr_info("physical_start= 0x%llx\n", diff --git a/arch/powerpc/mm/hash_utils_64.c b/arch/powerpc/mm/hash_utils_64.c index 0a4f939a8161..cef2d05fdb9b 100644 --- a/arch/powerpc/mm/hash_utils_64.c +++ b/arch/powerpc/mm/hash_utils_64.c @@ -65,6 +65,8 @@ #include #include +#include "mmu_decl.h" + #ifdef DEBUG #define DBG(fmt...) udbg_printf(fmt) #else @@ -1909,3 +1911,11 @@ static int __init hash64_debugfs(void) } machine_device_initcall(pseries, hash64_debugfs); #endif /* CONFIG_DEBUG_FS */ + +void __init print_system_hash_info(void) +{ + pr_info("ppc64_pft_size= 0x%llx\n", ppc64_pft_size); + + if (htab_hash_mask) + pr_info("htab_hash_mask= 0x%lx\n", htab_hash_mask); +} diff --git a/arch/powerpc/mm/mmu_decl.h b/arch/powerpc/mm/mmu_decl.h index f7f1374ba3ee..dc617ade83ab 100644 --- a/arch/powerpc/mm/mmu_decl.h +++ b/arch/powerpc/mm/mmu_decl.h @@ -83,6 +83,8 @@ static inline void _tlbivax_bcast(unsigned long address, unsigned int pid, } #endif +static inline void print_system_hash_info(void) {} + #else /* CONFIG_PPC_MMU_NOHASH */ extern void hash_preload(struct mm_struct *mm, unsigned long ea, @@ -92,6 +94,8 @@ extern void hash_preload(struct mm_struct *mm, unsigned long ea, extern void _tlbie(unsigned long address); extern void _tlbia(void); +void print_system_hash_info(void); + #endif /* CONFIG_PPC_MMU_NOHASH */ #ifdef CONFIG_PPC32 @@ -105,7 +109,6 @@ extern unsigned int rtas_data, rtas_size; struct hash_pte; extern struct hash_pte *Hash; -extern unsigned long Hash_size, Hash_mask; #endif /* CONFIG_PPC32 */ diff --git a/arch/powerpc/mm/ppc_mmu_32.c b/arch/powerpc/mm/ppc_mmu_32.c index 088f14d57cce..864096489b6d 100644 --- a/arch/powerpc/mm/ppc_mmu_32.c +++ b/arch/powerpc/mm/ppc_mmu_32.c @@ -37,7 +37,7 @@ #include "mmu_decl.h" struct hash_pte *Hash; -unsigned long Hash_size, Hash_mask; +static unsigned long Hash_size, Hash_mask; unsigned long _SDR1; struct ppc_bat BATS[8][2]; /* 8 pairs of IBAT, DBAT */ @@ -392,3 +392,10 @@ void setup_initial_memory_limit(phys_addr_t first_memblock_base, else /* Anything else has 256M mapped */ memblock_set_current_limit(min_t(u64, first_memblock_size, 0x1000)); } + +void __init print_system_hash_info(void) +{ + pr_info("Hash_size = 0x%lx\n", Hash_size); + if (Hash_mask) + pr_info("Hash_mask = 0x%lx\n", Hash_mask); +} -- 2.13.3
[PATCH v2 2/3] powerpc/32s: don't try to print hash table address.
Due to %p, (ptrval) is printed in lieu of the hash table address. showing the hash table address isn't an operationnal need so just don't print it. Signed-off-by: Christophe Leroy --- v2: no change arch/powerpc/mm/ppc_mmu_32.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/arch/powerpc/mm/ppc_mmu_32.c b/arch/powerpc/mm/ppc_mmu_32.c index 302409fcfbc7..088f14d57cce 100644 --- a/arch/powerpc/mm/ppc_mmu_32.c +++ b/arch/powerpc/mm/ppc_mmu_32.c @@ -345,8 +345,8 @@ void __init MMU_init_hw(void) __func__, Hash_size, Hash_size); _SDR1 = __pa(Hash) | SDR1_LOW_BITS; - printk("Total memory = %lldMB; using %ldkB for hash table (at %p)\n", - (unsigned long long)(total_memory >> 20), Hash_size >> 10, Hash); + pr_info("Total memory = %lldMB; using %ldkB for hash table\n", + (unsigned long long)(total_memory >> 20), Hash_size >> 10); /* -- 2.13.3
[PATCH v2 1/3] powerpc/32s: drop Hash_end
Hash_end has never been used, drop it. Signed-off-by: Christophe Leroy --- v2: no change arch/powerpc/mm/mmu_decl.h | 2 +- arch/powerpc/mm/ppc_mmu_32.c | 4 +--- 2 files changed, 2 insertions(+), 4 deletions(-) diff --git a/arch/powerpc/mm/mmu_decl.h b/arch/powerpc/mm/mmu_decl.h index 74ff61dabcb1..f7f1374ba3ee 100644 --- a/arch/powerpc/mm/mmu_decl.h +++ b/arch/powerpc/mm/mmu_decl.h @@ -104,7 +104,7 @@ extern int __map_without_bats; extern unsigned int rtas_data, rtas_size; struct hash_pte; -extern struct hash_pte *Hash, *Hash_end; +extern struct hash_pte *Hash; extern unsigned long Hash_size, Hash_mask; #endif /* CONFIG_PPC32 */ diff --git a/arch/powerpc/mm/ppc_mmu_32.c b/arch/powerpc/mm/ppc_mmu_32.c index f29d2f118b44..302409fcfbc7 100644 --- a/arch/powerpc/mm/ppc_mmu_32.c +++ b/arch/powerpc/mm/ppc_mmu_32.c @@ -36,7 +36,7 @@ #include "mmu_decl.h" -struct hash_pte *Hash, *Hash_end; +struct hash_pte *Hash; unsigned long Hash_size, Hash_mask; unsigned long _SDR1; @@ -345,8 +345,6 @@ void __init MMU_init_hw(void) __func__, Hash_size, Hash_size); _SDR1 = __pa(Hash) | SDR1_LOW_BITS; - Hash_end = (struct hash_pte *) ((unsigned long)Hash + Hash_size); - printk("Total memory = %lldMB; using %ldkB for hash table (at %p)\n", (unsigned long long)(total_memory >> 20), Hash_size >> 10, Hash); -- 2.13.3
Re: [PATCH 3/7] powerpc/setup: define cpu_pvr at all time
Le 22/03/2019 à 09:08, Christophe Leroy a écrit : To avoid ifdefs, define cpu_pvr at all time. Signed-off-by: Christophe Leroy This patch introduces a sparse warning. I guess we can skip it for now and rework more deeply the use of cpu_pvr versus SPRN_PVR which is re-read in many places in the code. Christophe --- arch/powerpc/kernel/setup-common.c | 12 +--- 1 file changed, 5 insertions(+), 7 deletions(-) diff --git a/arch/powerpc/kernel/setup-common.c b/arch/powerpc/kernel/setup-common.c index a90e8367ccde..a4ed9301e815 100644 --- a/arch/powerpc/kernel/setup-common.c +++ b/arch/powerpc/kernel/setup-common.c @@ -190,9 +190,7 @@ void machine_halt(void) machine_hang(); } -#ifdef CONFIG_SMP DEFINE_PER_CPU(unsigned int, cpu_pvr); -#endif static void show_cpuinfo_summary(struct seq_file *m) { @@ -234,11 +232,11 @@ static int show_cpuinfo(struct seq_file *m, void *v) unsigned short maj; unsigned short min; -#ifdef CONFIG_SMP - pvr = per_cpu(cpu_pvr, cpu_id); -#else - pvr = mfspr(SPRN_PVR); -#endif + if (IS_ENABLED(CONFIG_SMP)) + pvr = per_cpu(cpu_pvr, cpu_id); + else + pvr = mfspr(SPRN_PVR); + maj = (pvr >> 8) & 0xFF; min = pvr & 0xFF;
Re: [PATCH v3] powerpc/8xx: fix possible object reference leak
Le 26/03/2019 à 11:29, Peng Hao a écrit : Could you fix your clock or clock setup ? This emails appears to have been sent today at 11:29 (Paris Time ie GMT+1) allthough it is only 7am at the time being. Christophe From: Wen Yang The call to of_find_compatible_node returns a node pointer with refcount incremented thus it must be explicitly decremented after the last usage. irq_domain_add_linear also calls of_node_get to increase refcount, so irq_domain will not be affected when it is released. Detected by coccinelle with the following warnings: ./arch/powerpc/platforms/8xx/pic.c:158:1-7: ERROR: missing of_node_put; acquired a node pointer with refcount incremented on line 136, but without a corresponding object release within this function. Fixes: a8db8cf0d894 ("irq_domain: Replace irq_alloc_host() with revmap-specific initializers") Signed-off-by: Wen Yang Suggested-by: Christophe Leroy Suggested-by: Michael Ellerman Reviewed-by: Peng Hao Reviewed-by: Christophe Leroy Cc: Vitaly Bordug Cc: Benjamin Herrenschmidt Cc: Paul Mackerras Cc: Michael Ellerman Cc: linuxppc-dev@lists.ozlabs.org Cc: linux-ker...@vger.kernel.org --- v3->v2: set ret to zero explicitly. v2->v1: add a Fixes tag. arch/powerpc/platforms/8xx/pic.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/arch/powerpc/platforms/8xx/pic.c b/arch/powerpc/platforms/8xx/pic.c index 8d5a25d..9993998 100644 --- a/arch/powerpc/platforms/8xx/pic.c +++ b/arch/powerpc/platforms/8xx/pic.c @@ -153,9 +153,9 @@ int mpc8xx_pic_init(void) if (mpc8xx_pic_host == NULL) { printk(KERN_ERR "MPC8xx PIC: failed to allocate irq host!\n"); ret = -ENOMEM; - goto out; } - return 0; + + ret = 0; out: of_node_put(np);
Re: [PATCH] compiler: allow all arches to enable CONFIG_OPTIMIZE_INLINING
Hi Masahiro, Le 25/03/2019 à 07:44, Masahiro Yamada a écrit : Hi Christophe, On Sat, Mar 23, 2019 at 5:27 PM LEROY Christophe wrote: Arnd Bergmann a écrit : On Wed, Mar 20, 2019 at 10:41 AM Arnd Bergmann wrote: I've added your patch to my randconfig test setup and will let you know if I see anything noticeable. I'm currently testing clang-arm32, clang-arm64 and gcc-x86. This is the only additional bug that has come up so far: `.exit.text' referenced in section `.alt.smp.init' of drivers/char/ipmi/ipmi_msghandler.o: defined in discarded section `exit.text' of drivers/char/ipmi/ipmi_msghandler.o Wouldn't it be useful to activate -Winline gcc warning to ease finding out problematic usage of inline keyword ? Yes, it is useful to find out which function is causing the error. Thanks for the tip. I did a mass build on kisskb. Almost ok, see results at http://kisskb.ellerman.id.au/kisskb/head/203d912edf8dde291977c71aa64024065e197f79/ ps3_defconfig with GCC 5 fails (http://kisskb.ellerman.id.au/kisskb/buildresult/13742711/) with: arch/powerpc/mm/tlb-radix.c:148:2: error: asm operand 3 probably doesn't match constraints [-Werror] arch/powerpc/mm/tlb-radix.c:148:2: error: impossible constraint in 'asm' arch/powerpc/mm/tlb-radix.c:118:2: error: asm operand 3 probably doesn't match constraints [-Werror] arch/powerpc/mm/tlb-radix.c:104:2: error: asm operand 3 probably doesn't match constraints [-Werror] ps3_defconfig with GCC 4.6 fails (http://kisskb.ellerman.id.au/kisskb/buildresult/13742591/) with: arch/powerpc/mm/tlb-radix.c:148:2: error: asm operand 3 probably doesn't match constraints [-Werror] arch/powerpc/mm/tlb-radix.c:118:2: error: asm operand 3 probably doesn't match constraints [-Werror] arch/powerpc/mm/tlb-radix.c:104:2: error: asm operand 3 probably doesn't match constraints [-Werror] randconfig with GCC 4.6 fails (http://kisskb.ellerman.id.au/kisskb/buildresult/13742698/) with: arch/powerpc/mm/tlb-radix.c:104:2: error: impossible constraint in 'asm' Christophe
Re: [PATCH v2] arch/powerpc: Rework local_paca to avoid LTO warnings
Alastair D'Silva's on March 14, 2019 12:31 pm: > From: Alastair D'Silva > > When building an LTO kernel, the existing code generates warnings: > ./arch/powerpc/include/asm/paca.h:37:30: warning: register of > ‘local_paca’ used for multiple global register variables > register struct paca_struct *local_paca asm("r13"); > ^ > ./arch/powerpc/include/asm/paca.h:37:30: note: conflicts with > ‘local_paca’ Isn't this a bogus warning? It doesn't look like there's a way to define it any other way. > > This patch reworks local_paca into an inline getter & setter function, > which addresses the warning. > > Changelog: > V2 > - Address whitespace issues > - keep new implementation close to where the old implementation was > > Signed-off-by: Alastair D'Silva > --- > arch/powerpc/include/asm/paca.h | 37 + > arch/powerpc/kernel/paca.c | 2 +- > 2 files changed, 29 insertions(+), 10 deletions(-) > > diff --git a/arch/powerpc/include/asm/paca.h b/arch/powerpc/include/asm/paca.h > index e843bc5d1a0f..2fa0b43357c9 100644 > --- a/arch/powerpc/include/asm/paca.h > +++ b/arch/powerpc/include/asm/paca.h > @@ -34,19 +34,38 @@ > #include > #include > > -register struct paca_struct *local_paca asm("r13"); > - > #if defined(CONFIG_DEBUG_PREEMPT) && defined(CONFIG_SMP) > extern unsigned int debug_smp_processor_id(void); /* from linux/smp.h */ > -/* > - * Add standard checks that preemption cannot occur when using get_paca(): > - * otherwise the paca_struct it points to may be the wrong one just after. > - */ > -#define get_paca() ((void) debug_smp_processor_id(), local_paca) > -#else > -#define get_paca() local_paca > #endif > > +static inline struct paca_struct *get_paca_no_preempt_check(void) > +{ > + register struct paca_struct *paca asm("r13"); > + > + return paca; > +} Problem is it now changes the global register variable to a local register variable. The compiler would presumably be within its rights to "cache" that return value or use another register for it, which is not really what we want. Thanks, Nick
[PATCH] powerpc: Add force enable of DAWR on P9 option
This adds a flag so that the DAWR can be enabled on P9 via: echo Y > /sys/kernel/debug/powerpc/dawr_enable_dangerous The DAWR was previously force disabled on POWER9 in: 9654153158 powerpc: Disable DAWR in the base POWER9 CPU features Also see Documentation/powerpc/DAWR-POWER9.txt This is a dangerous setting, USE AT YOUR OWN RISK. Some users may not care about a bad user crashing their box (ie. single user/desktop systems) and really want the DAWR. This allows them to force enable DAWR. This flag can also be used to disable DAWR access. Once this is cleared, all DAWR access should be cleared immediately and your machine once again safe from crashing. Userspace may get confused by toggling this. If DAWR is force enabled/disabled between getting the number of breakpoints (via PTRACE_GETHWDBGINFO) and setting the breakpoint, userspace will get an inconsistent view of what's available. Similarly for guests. For the DAWR to be enabled in a KVM guest, the DAWR needs to be force enabled in the host AND the guest. For this reason, this won't work on POWERVM as it doesn't allow the HCALL to work. Writes of 'Y' to the dawr_enable_dangerous file will fail if the hypervisor doesn't support writing the DAWR. To double check the DAWR is working, run this kernel selftest: tools/testing/selftests/powerpc/ptrace/ptrace-hwbreak.c Any errors/failures/skips mean something is wrong. Signed-off-by: Michael Neuling --- Documentation/powerpc/DAWR-POWER9.txt| 32 + arch/powerpc/include/asm/hw_breakpoint.h | 7 +++ arch/powerpc/kernel/hw_breakpoint.c | 60 +++- arch/powerpc/kernel/process.c| 9 ++-- arch/powerpc/kernel/ptrace.c | 3 +- arch/powerpc/kvm/book3s_hv.c | 3 +- arch/powerpc/kvm/book3s_hv_rmhandlers.S | 23 + 7 files changed, 120 insertions(+), 17 deletions(-) diff --git a/Documentation/powerpc/DAWR-POWER9.txt b/Documentation/powerpc/DAWR-POWER9.txt index 2feaa66196..bdec036509 100644 --- a/Documentation/powerpc/DAWR-POWER9.txt +++ b/Documentation/powerpc/DAWR-POWER9.txt @@ -56,3 +56,35 @@ POWER9. Loads and stores to the watchpoint locations will not be trapped in GDB. The watchpoint is remembered, so if the guest is migrated back to the POWER8 host, it will start working again. +Force enabling the DAWR += +Kernels (since ~v5.2) have an option to force enable the DAWR via: + + echo Y > /sys/kernel/debug/powerpc/dawr_enable_dangerous + +This enables the DAWR even on POWER9. + +This is a dangerous setting, USE AT YOUR OWN RISK. + +Some users may not care about a bad user crashing their box +(ie. single user/desktop systems) and really want the DAWR. This +allows them to force enable DAWR. + +This flag can also be used to disable DAWR access. Once this is +cleared, all DAWR access should be cleared immediately and your +machine once again safe from crashing. + +Userspace may get confused by toggling this. If DAWR is force +enabled/disabled between getting the number of breakpoints (via +PTRACE_GETHWDBGINFO) and setting the breakpoint, userspace will get an +inconsistent view of what's available. Similarly for guests. + +For the DAWR to be enabled in a KVM guest, the DAWR needs to be force +enabled in the host AND the guest. For this reason, this won't work on +POWERVM as it doesn't allow the HCALL to work. Writes of 'Y' to the +dawr_enable_dangerous file will fail if the hypervisor doesn't support +writing the DAWR. + +To double check the DAWR is working, run this kernel selftest: + tools/testing/selftests/powerpc/ptrace/ptrace-hwbreak.c +Any errors/failures/skips mean something is wrong. diff --git a/arch/powerpc/include/asm/hw_breakpoint.h b/arch/powerpc/include/asm/hw_breakpoint.h index ece4dc89c9..87c2a74f64 100644 --- a/arch/powerpc/include/asm/hw_breakpoint.h +++ b/arch/powerpc/include/asm/hw_breakpoint.h @@ -90,6 +90,13 @@ static inline void hw_breakpoint_disable(void) extern void thread_change_pc(struct task_struct *tsk, struct pt_regs *regs); int hw_breakpoint_handler(struct die_args *args); +extern int set_dawr(struct arch_hw_breakpoint *brk); +extern bool dawr_force_enable; +static inline bool dawr_enabled(void) +{ + return dawr_force_enable; +} + #else /* CONFIG_HAVE_HW_BREAKPOINT */ static inline void hw_breakpoint_disable(void) { } static inline void thread_change_pc(struct task_struct *tsk, diff --git a/arch/powerpc/kernel/hw_breakpoint.c b/arch/powerpc/kernel/hw_breakpoint.c index fec8a67731..0b2461e1d9 100644 --- a/arch/powerpc/kernel/hw_breakpoint.c +++ b/arch/powerpc/kernel/hw_breakpoint.c @@ -29,11 +29,14 @@ #include #include #include +#include +#include #include #include #include #include +#include #include /* @@ -174,7 +177,7 @@ int hw_breakpoint_arch_parse(struct perf_event *bp, if (!ppc_breakpoint_available()) return -ENODEV; length_max = 8; /* DABR */ - if (cpu_has_feature(CPU_FTR_
Re: [PATCH] kbuild: strip whitespace in cmd_record_mcount findstring
On Tue, Mar 26, 2019 at 1:05 AM Joe Lawrence wrote: > > CC_FLAGS_FTRACE may contain trailing whitespace that interferes with > findstring. > > For example, commit 6977f95e63b9 ("powerpc: avoid -mno-sched-epilog on > GCC 4.9 and newer") introduced a change such that on my ppc64le box, > CC_FLAGS_FTRACE="-pg -mprofile-kernel ". (Note the trailing space.) > When cmd_record_mcount is now invoked, findstring fails as the ftrace > flags were found at very end of _c_flags, without the trailing space. > > _c_flags=" ... -pg -mprofile-kernel" > CC_FLAGS_FTRACE="-pg -mprofile-kernel " >^ > findstring is looking for this extra space > > Remove the redundant whitespaces from CC_FLAGS_FTRACE in > cmd_record_mcount to avoid this problem. > > Fixes: 6977f95e63b9 ("powerpc: avoid -mno-sched-epilog on GCC 4.9 and newer"). > Signed-off-by: Joe Lawrence > --- > > Standard disclaimer: I'm not a kbuild expert, but this works around the > problem I reported where ftrace and livepatch self-tests were failing as > specified object files were not run through the recordmcount.pl script: > > ppc64le: ftrace self-tests and $(CC_FLAGS_FTRACE) broken? > https://lists.ozlabs.org/pipermail/linuxppc-dev/2019-March/187298.html > > scripts/Makefile.build | 8 > 1 file changed, 4 insertions(+), 4 deletions(-) > > diff --git a/scripts/Makefile.build b/scripts/Makefile.build > index 2554a15ecf2b..74d402b5aa3c 100644 > --- a/scripts/Makefile.build > +++ b/scripts/Makefile.build > @@ -199,10 +199,10 @@ sub_cmd_record_mcount = perl > $(srctree)/scripts/recordmcount.pl "$(ARCH)" \ > "$(if $(part-of-module),1,0)" "$(@)"; > recordmcount_source := $(srctree)/scripts/recordmcount.pl > endif # BUILD_C_RECORDMCOUNT > -cmd_record_mcount =\ > - if [ "$(findstring $(CC_FLAGS_FTRACE),$(_c_flags))" = \ > -"$(CC_FLAGS_FTRACE)" ]; then \ > - $(sub_cmd_record_mcount)\ > +cmd_record_mcount =\ > + if [ "$(findstring $(strip $(CC_FLAGS_FTRACE)),$(_c_flags))" = \ > +"$(strip $(CC_FLAGS_FTRACE))" ]; then \ > + $(sub_cmd_record_mcount)\ > fi > endif # CC_USING_RECORD_MCOUNT > endif # CONFIG_FTRACE_MCOUNT_RECORD > -- > 2.20.1 > I do not see a point in using the shell command here in the first place. Instead of adding crappy workarounds, I guess the following simple code should work: index 2554a15..5f13021 100644 --- a/scripts/Makefile.build +++ b/scripts/Makefile.build @@ -199,11 +199,8 @@ sub_cmd_record_mcount = perl $(srctree)/scripts/recordmcount.pl "$(ARCH)" \ "$(if $(part-of-module),1,0)" "$(@)"; recordmcount_source := $(srctree)/scripts/recordmcount.pl endif # BUILD_C_RECORDMCOUNT -cmd_record_mcount =\ - if [ "$(findstring $(CC_FLAGS_FTRACE),$(_c_flags))" = \ -"$(CC_FLAGS_FTRACE)" ]; then \ - $(sub_cmd_record_mcount)\ - fi +cmd_record_mcount = $(if $(findstring $(CC_FLAGS_FTRACE),$(_c_flags)),\ + $(sub_cmd_record_mcount)) endif # CC_USING_RECORD_MCOUNT endif # CONFIG_FTRACE_MCOUNT_RECORD -- Best Regards Masahiro Yamada
[PATCH v3] powerpc/8xx: fix possible object reference leak
From: Wen Yang The call to of_find_compatible_node returns a node pointer with refcount incremented thus it must be explicitly decremented after the last usage. irq_domain_add_linear also calls of_node_get to increase refcount, so irq_domain will not be affected when it is released. Detected by coccinelle with the following warnings: ./arch/powerpc/platforms/8xx/pic.c:158:1-7: ERROR: missing of_node_put; acquired a node pointer with refcount incremented on line 136, but without a corresponding object release within this function. Fixes: a8db8cf0d894 ("irq_domain: Replace irq_alloc_host() with revmap-specific initializers") Signed-off-by: Wen Yang Suggested-by: Christophe Leroy Suggested-by: Michael Ellerman Reviewed-by: Peng Hao Reviewed-by: Christophe Leroy Cc: Vitaly Bordug Cc: Benjamin Herrenschmidt Cc: Paul Mackerras Cc: Michael Ellerman Cc: linuxppc-dev@lists.ozlabs.org Cc: linux-ker...@vger.kernel.org --- v3->v2: set ret to zero explicitly. v2->v1: add a Fixes tag. arch/powerpc/platforms/8xx/pic.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/arch/powerpc/platforms/8xx/pic.c b/arch/powerpc/platforms/8xx/pic.c index 8d5a25d..9993998 100644 --- a/arch/powerpc/platforms/8xx/pic.c +++ b/arch/powerpc/platforms/8xx/pic.c @@ -153,9 +153,9 @@ int mpc8xx_pic_init(void) if (mpc8xx_pic_host == NULL) { printk(KERN_ERR "MPC8xx PIC: failed to allocate irq host!\n"); ret = -ENOMEM; - goto out; } - return 0; + + ret = 0; out: of_node_put(np); -- 2.9.5
答复: Re: [PATCH v2] powerpc/8xx: fix possible object reference leak
>> The call to of_find_compatible_node returns a node pointer with refcount >> incremented thus it must be explicitly decremented after the last >> usage. >> irq_domain_add_linear also calls of_node_get to increase refcount, >> so irq_domain will not be affected when it is released. >> >> Detected by coccinelle with the following warnings: >> ./arch/powerpc/platforms/8xx/pic.c:158:1-7: ERROR: missing of_node_put; >> acquired a node pointer with refcount incremented on line 136, but without a >> corresponding object release within this function. >> >> Fixes: a8db8cf0d894 ("irq_domain: Replace irq_alloc_host() with >> revmap-specific initializers") >> Signed-off-by: Wen Yang >> Suggested-by: Christophe Leroy >> Reviewed-by: Peng Hao >> Cc: Vitaly Bordug >> Cc: Benjamin Herrenschmidt >> Cc: Paul Mackerras >> Cc: Michael Ellerman >> Cc: linuxppc-dev@lists.ozlabs.org >> Cc: linux-ker...@vger.kernel.org >> --- >> arch/powerpc/platforms/8xx/pic.c | 2 -- >> 1 file changed, 2 deletions(-) >> >> diff --git a/arch/powerpc/platforms/8xx/pic.c >> b/arch/powerpc/platforms/8xx/pic.c >> index 8d5a25d..4453df6 100644 >> --- a/arch/powerpc/platforms/8xx/pic.c >> +++ b/arch/powerpc/platforms/8xx/pic.c >> @@ -153,9 +153,7 @@ int mpc8xx_pic_init(void) >> if (mpc8xx_pic_host == NULL) { >> printk(KERN_ERR "MPC8xx PIC: failed to allocate irq host!\n"); >> ret = -ENOMEM; >> -goto out; >> } >> -return 0; > > It's fragile to rely on ret being equal to zero here. If the code > further up is changed it's easy enough to miss that ret is expected to > be zero. > > Better to set it to zero here explicitly, this is the success path after > all, eg: > > ret = 0; > Thank you for your comments. I'll fix it soon. Thanks and regards, Wen
Re: [PATCH] powerpc: vmlinux.lds: Drop Binutils 2.18 workarounds
Joel Stanley writes: > Segher added some workarounds for GCC 4.2 and bintuils 2.18. We now set > 4.6 and 2.20 as the minimum, so they can be dropped. > > This is mostly a revert of c6995fe4 ("powerpc: Fix build bug with > binutils < 2.18 and GCC < 4.2"). > > Signed-off-by: Joel Stanley > --- > arch/powerpc/kernel/vmlinux.lds.S | 35 --- > 1 file changed, 4 insertions(+), 31 deletions(-) Seems this breaks some toolchains, at least the one from kernel.org: /opt/cross/kisskb/korg/gcc-8.1.0-nolibc/powerpc64-linux/bin/powerpc64-linux-ld: .tmp_vmlinux1: Not enough room for program headers, try linking with -N /opt/cross/kisskb/korg/gcc-8.1.0-nolibc/powerpc64-linux/bin/powerpc64-linux-ld: final link failed: Bad value make[1]: *** [/kisskb/src/Makefile:1024: vmlinux] Error 1 http://kisskb.ellerman.id.au/kisskb/buildresult/13743374/ Not sure why. That's binutils 2.30. cheers
Re: Bad file pattern in MAINTAINERS section 'IBM Power Virtual Accelerator Switchboard'
On Mon, 2019-03-25 at 16:35 -0700, Joe Perches wrote: > A file pattern line in this section of the MAINTAINERS file in linux-next > does not have a match in the linux source files. > > This could occur because a matching filename was never added, was deleted > or renamed in some other commit. > > The commits that added and if found renamed or removed the file pattern > are shown below. > > Please fix this defect appropriately. > > 1: --- > > linux-next MAINTAINERS section: > > 7396IBM Power Virtual Accelerator Switchboard > 7397M: Sukadev Bhattiprolu btw: Your name needs an email address here too. This should be: M: Sukadev Bhattiprolu
Bad file pattern in MAINTAINERS section 'IBM Power Virtual Accelerator Switchboard'
A file pattern line in this section of the MAINTAINERS file in linux-next does not have a match in the linux source files. This could occur because a matching filename was never added, was deleted or renamed in some other commit. The commits that added and if found renamed or removed the file pattern are shown below. Please fix this defect appropriately. 1: --- linux-next MAINTAINERS section: 7396IBM Power Virtual Accelerator Switchboard 7397M: Sukadev Bhattiprolu 7398L: linuxppc-dev@lists.ozlabs.org 7399S: Supported 7400F: arch/powerpc/platforms/powernv/vas* 7401F: arch/powerpc/platforms/powernv/copy-paste.h 7402F: arch/powerpc/include/asm/vas.h --> 7403F: arch/powerpc/include/uapi/asm/vas.h 2: --- The most recent commit that added or modified file pattern 'arch/powerpc/include/uapi/asm/vas.h': commit 4dea2d1a927c61114a168d4509b56329ea6effb7 Author: Sukadev Bhattiprolu Date: Mon Aug 28 23:23:33 2017 -0700 powerpc/powernv/vas: Define vas_init() and vas_exit() Implement vas_init() and vas_exit() functions for a new VAS module. This VAS module is essentially a library for other device drivers and kernel users of the NX coprocessors like NX-842 and NX-GZIP. In the future this will be extended to add support for user space to access the NX coprocessors. VAS is currently only supported with 64K page size. Signed-off-by: Sukadev Bhattiprolu Signed-off-by: Michael Ellerman .../devicetree/bindings/powerpc/ibm,vas.txt| 22 +++ MAINTAINERS| 8 ++ arch/powerpc/platforms/powernv/Kconfig | 14 ++ arch/powerpc/platforms/powernv/Makefile| 1 + arch/powerpc/platforms/powernv/vas-window.c| 19 +++ arch/powerpc/platforms/powernv/vas.c | 151 + arch/powerpc/platforms/powernv/vas.h | 2 + 7 files changed, 217 insertions(+) 3: --- No commit with file pattern 'arch/powerpc/include/uapi/asm/vas.h' was found
Re: [RESEND 4/7] mm/gup: Add FOLL_LONGTERM capability to GUP fast
On Mon, Mar 25, 2019 at 03:36:28PM -0700, Dan Williams wrote: > On Mon, Mar 25, 2019 at 3:22 PM Ira Weiny wrote: > [..] > > FWIW this thread is making me think my original patch which simply > > implemented > > get_user_pages_fast_longterm() would be more clear. There is some evidence > > that the GUP API was trending that way (see get_user_pages_remote). That > > seems > > wrong but I don't know how to ensure users don't specify the wrong flag. > > What about just making the existing get_user_pages_longterm() have a > fast path option? That would work but was not the direction we agreed upon before.[1] At this point I would rather see this patch set applied, focus on fixing the filesystem issues, and once that is done determine if FOLL_LONGTERM is needed in any GUP calls. Ira [1] https://lkml.org/lkml/2019/2/11/2038
Re: [RESEND 4/7] mm/gup: Add FOLL_LONGTERM capability to GUP fast
On Mon, Mar 25, 2019 at 3:22 PM Ira Weiny wrote: [..] > FWIW this thread is making me think my original patch which simply implemented > get_user_pages_fast_longterm() would be more clear. There is some evidence > that the GUP API was trending that way (see get_user_pages_remote). That > seems > wrong but I don't know how to ensure users don't specify the wrong flag. What about just making the existing get_user_pages_longterm() have a fast path option?
Re: [RESEND 4/7] mm/gup: Add FOLL_LONGTERM capability to GUP fast
On Mon, Mar 25, 2019 at 02:51:50PM -0300, Jason Gunthorpe wrote: > On Mon, Mar 25, 2019 at 02:23:15AM -0700, Ira Weiny wrote: > > > > Unfortunately holding the lock is required to support FOLL_LONGTERM (to > > > > check > > > > the VMAs) but we don't want to hold the lock to be optimal > > > > (specifically allow > > > > FAULT_FOLL_ALLOW_RETRY). So I'm maintaining the optimization for > > > > *_fast users > > > > who do not specify FOLL_LONGTERM. > > > > > > > > Another way to do this would have been to define > > > > __gup_longterm_unlocked with > > > > the above logic, but that seemed overkill at this point. > > > > > > get_user_pages_unlocked() is an exported symbol, shouldn't it work > > > with the FOLL_LONGTERM flag? > > > > > > I think it should even though we have no user.. > > > > > > Otherwise the GUP API just gets more confusing. > > > > I agree WRT to the API. But I think callers of get_user_pages_unlocked() > > are > > not going to get the behavior they want if they specify FOLL_LONGTERM. > > Oh? Isn't the only thing FOLL_LONGTERM does is block the call on DAX? >From an API yes. > Why does the locking mode matter to this test? DAX checks for VMA's being Filesystem DAX. Therefore, it requires collection of VMA's as the GUP code executes. The unlocked version can drop the lock and therefore the VMAs may become invalid. Therefore, the 2 code paths are incompatible. Users of GUP unlocked are going to want the benefit of FAULT_FOLL_ALLOW_RETRY. So I don't anticipate anyone using FOLL_LONGTERM with get_user_pages_unlocked(). FWIW this thread is making me think my original patch which simply implemented get_user_pages_fast_longterm() would be more clear. There is some evidence that the GUP API was trending that way (see get_user_pages_remote). That seems wrong but I don't know how to ensure users don't specify the wrong flag. > > > What I could do is BUG_ON (or just WARN_ON) if unlocked is called with > > FOLL_LONGTERM similar to the code in get_user_pages_locked() which does not > > allow locked and vmas to be passed together: > > The GUP call should fail if you are doing something like this. But I'd > rather not see confusing specialc cases in code without a clear > comment explaining why it has to be there. Code comment would be necessary, sure. Was just throwing ideas out there. Ira
RE: [PATCH v4 0/4] ocxl: OpenCAPI Cleanup
> -Original Message- > From: Frederic Barrat > Sent: Tuesday, 26 March 2019 4:34 AM > To: Greg Kurz ; Alastair D'Silva > Cc: alast...@d-silva.org; Arnd Bergmann ; Greg Kroah- > Hartman ; linux-ker...@vger.kernel.org; > Andrew Donnellan ; linuxppc- > d...@lists.ozlabs.org > Subject: Re: [PATCH v4 0/4] ocxl: OpenCAPI Cleanup > > > > Le 25/03/2019 à 17:49, Greg Kurz a écrit : > > Hi Alastair, > > > > I forgot to mention it during v3 but please don't link new version of > > a patchset to the previous one with --in-reply-to. This is to ensure I > > can see them in my email client without having to scroll back many > > days in the past (which likely means a fair number of e-mails on > > linuxppc-dev). > > > I'm also seeing the other series (ocxl refactoring) somehow under the same > thread. I haven't checked why, and there may be some mail client bug in the > way, I just mention it in case you see a reason for it in the way you submit > the patch set. > I probably grabbed the wrong message-id. -- Alastair D'Silva mob: 0423 762 819 skype: alastair_dsilva msn: alast...@d-silva.org blog: http://alastair.d-silva.orgTwitter: @EvilDeece
Re: powerpc/mm: Only define MAX_PHYSMEM_BITS in SPARSEMEM configurations
On Mär 25 2019, Michael Ellerman wrote: > So I'm inclined to just switch to always using SPARSEMEM on 64-bit > Book3S, because that's what's well tested and we hardly need more code > paths to test. Unless anyone has a strong objection, I haven't actually > benchmarked FLATMEM vs SPARSEMEM on a G5. Configuring with SPARSEMEM saves about 32Mb of memory. Andreas. -- Andreas Schwab, sch...@linux-m68k.org GPG Key fingerprint = 7578 EB47 D4E5 4D69 2510 2552 DF73 E780 A9DA AEC1 "And now for something completely different."
Re: [RESEND 1/7] mm/gup: Replace get_user_pages_longterm() with FOLL_LONGTERM
On Fri, Mar 22, 2019 at 02:24:40PM -0700, Dan Williams wrote: > On Sun, Mar 17, 2019 at 7:36 PM wrote: [snip] > > + * __gup_longterm_locked() is a wrapper for __get_uer_pages_locked which > > s/uer/user/ > > > + * allows us to process the FOLL_LONGTERM flag if present. > > + * > > + * FOLL_LONGTERM Checks for either DAX VMAs or PPC CMA regions and either > > fails > > + * the pin or attempts to migrate the page as appropriate. > > + * > > + * In the filesystem-dax case mappings are subject to the lifetime > > enforced by > > + * the filesystem and we need guarantees that longterm users like RDMA and > > V4L2 > > + * only establish mappings that have a kernel enforced revocation > > mechanism. > > + * > > + * In the CMA case pages can't be pinned in a CMA region as this would > > + * unnecessarily fragment that region. So CMA attempts to migrate the page > > + * before pinning. > > * > > * "longterm" == userspace controlled elevated page count lifetime. > > * Contrast this to iov_iter_get_pages() usages which are transient. > > Ah, here's the longterm documentation, but if I was a developer > considering whether to use FOLL_LONGTERM or not I would expect to find > the documentation at the flag definition site. > > I think it has become more clear since get_user_pages_longterm() was > initially merged that we need to warn people not to use it, or at > least seriously reconsider whether they want an interface to support > indefinite pins. I will move the comment to the flag definition but... In reviewing this comment it occurs to me that the addition of special casing CMA regions via FOLL_LONGTERM has made it less experimental/temporary and now simply implies intent to the GUP code as to the use of the pages. As I'm not super familiar with the CMA use case I can't say for certain but it seems that it is not a temporary solution. So I'm not going to refrain from a FIXME WRT removing the flag. New suggested text below. diff --git a/include/linux/mm.h b/include/linux/mm.h index 6831077d126c..5db9d8e894aa 100644 --- a/include/linux/mm.h +++ b/include/linux/mm.h @@ -2596,7 +2596,28 @@ struct page *follow_page(struct vm_area_struct *vma, unsigned long address, #define FOLL_REMOTE0x2000 /* we are working on non-current tsk/mm */ #define FOLL_COW 0x4000 /* internal GUP flag */ #define FOLL_ANON 0x8000 /* don't do file mappings */ -#define FOLL_LONGTERM 0x1 /* mapping is intended for a long term pin */ +#define FOLL_LONGTERM 0x1 /* mapping lifetime is indefinite: see below */ + +/* + * NOTE on FOLL_LONGTERM: + * + * FOLL_LONGTERM indicates that the page will be held for an indefinite time + * period _often_ under userspace control. This is contrasted with + * iov_iter_get_pages() where usages which are transient. + * + * FIXME: For pages which are part of a filesystem, mappings are subject to the + * lifetime enforced by the filesystem and we need guarantees that longterm + * users like RDMA and V4L2 only establish mappings which coordinate usage with + * the filesystem. Ideas for this coordination include revoking the longterm + * pin, delaying writeback, bounce buffer page writeback, etc. As FS DAX was + * added after the problem with filesystems was found FS DAX VMAs are + * specifically failed. Filesystem pages are still subject to bugs and use of + * FOLL_LONGTERM should be avoided on those pages. + * + * In the CMA case: longterm pins in a CMA region would unnecessarily fragment + * that region. And so CMA attempts to migrate the page before pinning when + * FOLL_LONGTERM is specified. + */ static inline int vm_fault_to_errno(vm_fault_t vm_fault, int foll_flags) {
Re: [PATCH v3] powerpc/64: Fix memcmp reading past the end of src/dest
On Mon, Mar 25, 2019 at 11:33:56PM +1100, Michael Ellerman wrote: > Segher Boessenkool writes: > > On Fri, Mar 22, 2019 at 11:37:24PM +1100, Michael Ellerman wrote: > >> + clrldi r6,r4,(64-12) // r6 = r4 & 0xfff > > > > You can just write > > rlwinm r6,r4,0,0x0fff > > > if that is clearer? Or do you still want a comment with that :-) > > I don't think it's clearer doing a rotate of zero bits :) > > And yeah I'd probably still leave the comment, so I'm inclined to stick > with the clrldi? I always have to think what the clrldi etc. do exactly, while with rlwinm it is obvious. But yeah this may be different for other people who are used to different idiom. > Would be nice if the assembler could support: > > andir6, r4, 0x0fff > > And turn it into the rlwinm, or rldicl :) The extended mnemonics are *simple*, *one-to-one* mappings. Having "andi. 6,4,0x0f0f" a valid insn, but an extended mnemonic "andi 6,4,0x0f0f" that is not (and the other way around for say 0xffff) would violate that. You could do some assembler macro, that can also expand to multiple insns where that is useful. Also one for loading constants, etc. The downside to that is you often do care how many insns are generated. Instead you could do a macro for only those cases that can be done with *one* insn. But that then is pretty restricted in use, and people have to learn what values are valid. I don't see a perfect solution. Segher
Re: [RESEND 4/7] mm/gup: Add FOLL_LONGTERM capability to GUP fast
On Mon, Mar 25, 2019 at 02:23:15AM -0700, Ira Weiny wrote: > > > Unfortunately holding the lock is required to support FOLL_LONGTERM (to > > > check > > > the VMAs) but we don't want to hold the lock to be optimal (specifically > > > allow > > > FAULT_FOLL_ALLOW_RETRY). So I'm maintaining the optimization for *_fast > > > users > > > who do not specify FOLL_LONGTERM. > > > > > > Another way to do this would have been to define __gup_longterm_unlocked > > > with > > > the above logic, but that seemed overkill at this point. > > > > get_user_pages_unlocked() is an exported symbol, shouldn't it work > > with the FOLL_LONGTERM flag? > > > > I think it should even though we have no user.. > > > > Otherwise the GUP API just gets more confusing. > > I agree WRT to the API. But I think callers of get_user_pages_unlocked() are > not going to get the behavior they want if they specify FOLL_LONGTERM. Oh? Isn't the only thing FOLL_LONGTERM does is block the call on DAX? Why does the locking mode matter to this test? > What I could do is BUG_ON (or just WARN_ON) if unlocked is called with > FOLL_LONGTERM similar to the code in get_user_pages_locked() which does not > allow locked and vmas to be passed together: The GUP call should fail if you are doing something like this. But I'd rather not see confusing specialc cases in code without a clear comment explaining why it has to be there. Jason
Re: [PATCH 2/2] arch: add pidfd and io_uring syscalls everywhere
Hi Arnd, On Mon, Mar 25, 2019 at 03:47:37PM +0100, Arnd Bergmann wrote: > Add the io_uring and pidfd_send_signal system calls to all architectures. > > These system calls are designed to handle both native and compat tasks, > so all entries are the same across architectures, only arm-compat and > the generic tale still use an old format. > > Signed-off-by: Arnd Bergmann > --- >% > diff --git a/arch/mips/kernel/syscalls/syscall_n64.tbl > b/arch/mips/kernel/syscalls/syscall_n64.tbl > index c85502e67b44..c4a49f7d57bb 100644 > --- a/arch/mips/kernel/syscalls/syscall_n64.tbl > +++ b/arch/mips/kernel/syscalls/syscall_n64.tbl > @@ -338,3 +338,7 @@ > 327 n64 rseqsys_rseq > 328 n64 io_pgetevents sys_io_pgetevents > # 329 through 423 are reserved to sync up with other architectures > +424 common pidfd_send_signal sys_pidfd_send_signal > +425 common io_uring_setup sys_io_uring_setup > +426 common io_uring_enter sys_io_uring_enter > +427 common io_uring_register sys_io_uring_register Shouldn't these declare the ABI as "n64"? I don't see anywhere that it would actually change the generated code, but a comment at the top of the file says that every entry should use "n64" and so far they all do. Did you have something else in mind here? Thanks, Paul
Re: [PATCH v4 0/4] ocxl: OpenCAPI Cleanup
Le 25/03/2019 à 17:49, Greg Kurz a écrit : Hi Alastair, I forgot to mention it during v3 but please don't link new version of a patchset to the previous one with --in-reply-to. This is to ensure I can see them in my email client without having to scroll back many days in the past (which likely means a fair number of e-mails on linuxppc-dev). I'm also seeing the other series (ocxl refactoring) somehow under the same thread. I haven't checked why, and there may be some mail client bug in the way, I just mention it in case you see a reason for it in the way you submit the patch set. Fred Cheers, -- Greg On Mon, 25 Mar 2019 16:34:51 +1100 "Alastair D'Silva" wrote: From: Alastair D'Silva Some minor cleanups for the OpenCAPI driver as a prerequisite for an ocxl driver refactoring to allow the driver core to be utilised by external drivers. Changelog: V4: - Drop printf format changes as they omit the format indicator for '0' V3: - Add missed header in 'ocxl: Remove some unused exported symbols'. This addresses the introduced sparse warnings V2: - remove intermediate assignment of 'link' var in 'Rename struct link to ocxl_link' - Don't shift definition of ocxl_context_attach in 'Remove some unused exported symbols' Alastair D'Silva (4): ocxl: Rename struct link to ocxl_link ocxl: read_pasid never returns an error, so make it void ocxl: Remove superfluous 'extern' from headers ocxl: Remove some unused exported symbols drivers/misc/ocxl/config.c| 13 ++--- drivers/misc/ocxl/file.c | 5 +- drivers/misc/ocxl/link.c | 36 ++--- drivers/misc/ocxl/ocxl_internal.h | 85 +++ include/misc/ocxl.h | 53 ++- 5 files changed, 91 insertions(+), 101 deletions(-)
Re: [RESEND 4/7] mm/gup: Add FOLL_LONGTERM capability to GUP fast
On Mon, Mar 25, 2019 at 01:47:13PM -0300, Jason Gunthorpe wrote: > On Mon, Mar 25, 2019 at 01:42:26AM -0700, Ira Weiny wrote: > > On Fri, Mar 22, 2019 at 03:12:55PM -0700, Dan Williams wrote: > > > On Sun, Mar 17, 2019 at 7:36 PM wrote: > > > > > > > > From: Ira Weiny > > > > > > > > DAX pages were previously unprotected from longterm pins when users > > > > called get_user_pages_fast(). > > > > > > > > Use the new FOLL_LONGTERM flag to check for DEVMAP pages and fall > > > > back to regular GUP processing if a DEVMAP page is encountered. > > > > > > > > Signed-off-by: Ira Weiny > > > > mm/gup.c | 29 + > > > > 1 file changed, 25 insertions(+), 4 deletions(-) > > > > > > > > diff --git a/mm/gup.c b/mm/gup.c > > > > index 0684a9536207..173db0c44678 100644 > > > > +++ b/mm/gup.c > > > > @@ -1600,6 +1600,9 @@ static int gup_pte_range(pmd_t pmd, unsigned long > > > > addr, unsigned long end, > > > > goto pte_unmap; > > > > > > > > if (pte_devmap(pte)) { > > > > + if (unlikely(flags & FOLL_LONGTERM)) > > > > + goto pte_unmap; > > > > + > > > > pgmap = get_dev_pagemap(pte_pfn(pte), pgmap); > > > > if (unlikely(!pgmap)) { > > > > undo_dev_pagemap(nr, nr_start, pages); > > > > @@ -1739,8 +1742,11 @@ static int gup_huge_pmd(pmd_t orig, pmd_t *pmdp, > > > > unsigned long addr, > > > > if (!pmd_access_permitted(orig, flags & FOLL_WRITE)) > > > > return 0; > > > > > > > > - if (pmd_devmap(orig)) > > > > + if (pmd_devmap(orig)) { > > > > + if (unlikely(flags & FOLL_LONGTERM)) > > > > + return 0; > > > > return __gup_device_huge_pmd(orig, pmdp, addr, end, > > > > pages, nr); > > > > + } > > > > > > > > refs = 0; > > > > page = pmd_page(orig) + ((addr & ~PMD_MASK) >> PAGE_SHIFT); > > > > @@ -1777,8 +1783,11 @@ static int gup_huge_pud(pud_t orig, pud_t *pudp, > > > > unsigned long addr, > > > > if (!pud_access_permitted(orig, flags & FOLL_WRITE)) > > > > return 0; > > > > > > > > - if (pud_devmap(orig)) > > > > + if (pud_devmap(orig)) { > > > > + if (unlikely(flags & FOLL_LONGTERM)) > > > > + return 0; > > > > return __gup_device_huge_pud(orig, pudp, addr, end, > > > > pages, nr); > > > > + } > > > > > > > > refs = 0; > > > > page = pud_page(orig) + ((addr & ~PUD_MASK) >> PAGE_SHIFT); > > > > @@ -2066,8 +2075,20 @@ int get_user_pages_fast(unsigned long start, int > > > > nr_pages, > > > > start += nr << PAGE_SHIFT; > > > > pages += nr; > > > > > > > > - ret = get_user_pages_unlocked(start, nr_pages - nr, > > > > pages, > > > > - gup_flags); > > > > + if (gup_flags & FOLL_LONGTERM) { > > > > + down_read(¤t->mm->mmap_sem); > > > > + ret = __gup_longterm_locked(current, > > > > current->mm, > > > > + start, nr_pages - > > > > nr, > > > > + pages, NULL, > > > > gup_flags); > > > > + up_read(¤t->mm->mmap_sem); > > > > + } else { > > > > + /* > > > > +* retain FAULT_FOLL_ALLOW_RETRY optimization if > > > > +* possible > > > > +*/ > > > > + ret = get_user_pages_unlocked(start, nr_pages - > > > > nr, > > > > + pages, gup_flags); > > > > > > I couldn't immediately grok why this path needs to branch on > > > FOLL_LONGTERM? Won't get_user_pages_unlocked(..., FOLL_LONGTERM) do > > > the right thing? > > > > Unfortunately holding the lock is required to support FOLL_LONGTERM (to > > check > > the VMAs) but we don't want to hold the lock to be optimal (specifically > > allow > > FAULT_FOLL_ALLOW_RETRY). So I'm maintaining the optimization for *_fast > > users > > who do not specify FOLL_LONGTERM. > > > > Another way to do this would have been to define __gup_longterm_unlocked > > with > > the above logic, but that seemed overkill at this point. > > get_user_pages_unlocked() is an exported symbol, shouldn't it work > with the FOLL_LONGTERM flag? > > I think it should even though we have no user.. > > Otherwise the GUP API just gets more confusing. I agree WRT to the API. But I think callers of get_user_pages_unlocked() are not going to get the behavior they want if they specify FOLL_LONGTERM. What I could do is BUG_ON (or just WARN_ON) if unlocked is called with FOLL_LONGTERM similar to the code in get_user_pages_locked() which does not allow locked and
Re: [PATCH v4 4/4] ocxl: Remove some unused exported symbols
On Mon, 25 Mar 2019 16:34:55 +1100 "Alastair D'Silva" wrote: > From: Alastair D'Silva > > Remove some unused exported symbols. > > Signed-off-by: Alastair D'Silva > --- Reviewed-by: Greg Kurz > drivers/misc/ocxl/config.c| 4 +--- > drivers/misc/ocxl/ocxl_internal.h | 23 +++ > include/misc/ocxl.h | 23 --- > 3 files changed, 24 insertions(+), 26 deletions(-) > > diff --git a/drivers/misc/ocxl/config.c b/drivers/misc/ocxl/config.c > index 4dc11897237d..5e65acb8e134 100644 > --- a/drivers/misc/ocxl/config.c > +++ b/drivers/misc/ocxl/config.c > @@ -2,8 +2,8 @@ > // Copyright 2017 IBM Corp. > #include > #include > -#include > #include > +#include "ocxl_internal.h" > > #define EXTRACT_BIT(val, bit) (!!(val & BIT(bit))) > #define EXTRACT_BITS(val, s, e) ((val & GENMASK(e, s)) >> s) > @@ -299,7 +299,6 @@ int ocxl_config_check_afu_index(struct pci_dev *dev, > } > return 1; > } > -EXPORT_SYMBOL_GPL(ocxl_config_check_afu_index); > > static int read_afu_name(struct pci_dev *dev, struct ocxl_fn_config *fn, > struct ocxl_afu_config *afu) > @@ -535,7 +534,6 @@ int ocxl_config_get_pasid_info(struct pci_dev *dev, int > *count) > { > return pnv_ocxl_get_pasid_count(dev, count); > } > -EXPORT_SYMBOL_GPL(ocxl_config_get_pasid_info); > > void ocxl_config_set_afu_pasid(struct pci_dev *dev, int pos, int pasid_base, > u32 pasid_count_log) > diff --git a/drivers/misc/ocxl/ocxl_internal.h > b/drivers/misc/ocxl/ocxl_internal.h > index 321b29e77f45..06fd98c989c8 100644 > --- a/drivers/misc/ocxl/ocxl_internal.h > +++ b/drivers/misc/ocxl/ocxl_internal.h > @@ -107,6 +107,29 @@ void ocxl_pasid_afu_free(struct ocxl_fn *fn, u32 start, > u32 size); > int ocxl_actag_afu_alloc(struct ocxl_fn *fn, u32 size); > void ocxl_actag_afu_free(struct ocxl_fn *fn, u32 start, u32 size); > > +/* > + * Get the max PASID value that can be used by the function > + */ > +int ocxl_config_get_pasid_info(struct pci_dev *dev, int *count); > + > +/* > + * Check if an AFU index is valid for the given function. > + * > + * AFU indexes can be sparse, so a driver should check all indexes up > + * to the maximum found in the function description > + */ > +int ocxl_config_check_afu_index(struct pci_dev *dev, > + struct ocxl_fn_config *fn, int afu_idx); > + > +/** > + * Update values within a Process Element > + * > + * link_handle: the link handle associated with the process element > + * pasid: the PASID for the AFU context > + * tid: the new thread id for the process element > + */ > +int ocxl_link_update_pe(void *link_handle, int pasid, __u16 tid); > + > struct ocxl_context *ocxl_context_alloc(void); > int ocxl_context_init(struct ocxl_context *ctx, struct ocxl_afu *afu, > struct address_space *mapping); > diff --git a/include/misc/ocxl.h b/include/misc/ocxl.h > index 4544573cc93c..9530d3be1b30 100644 > --- a/include/misc/ocxl.h > +++ b/include/misc/ocxl.h > @@ -56,15 +56,6 @@ struct ocxl_fn_config { > int ocxl_config_read_function(struct pci_dev *dev, > struct ocxl_fn_config *fn); > > -/* > - * Check if an AFU index is valid for the given function. > - * > - * AFU indexes can be sparse, so a driver should check all indexes up > - * to the maximum found in the function description > - */ > -int ocxl_config_check_afu_index(struct pci_dev *dev, > - struct ocxl_fn_config *fn, int afu_idx); > - > /* > * Read the configuration space of a function for the AFU specified by > * the index 'afu_idx'. Fills in a ocxl_afu_config structure > @@ -74,11 +65,6 @@ int ocxl_config_read_afu(struct pci_dev *dev, > struct ocxl_afu_config *afu, > u8 afu_idx); > > -/* > - * Get the max PASID value that can be used by the function > - */ > -int ocxl_config_get_pasid_info(struct pci_dev *dev, int *count); > - > /* > * Tell an AFU, by writing in the configuration space, the PASIDs that > * it can use. Range starts at 'pasid_base' and its size is a multiple > @@ -188,15 +174,6 @@ int ocxl_link_add_pe(void *link_handle, int pasid, u32 > pidr, u32 tidr, > void (*xsl_err_cb)(void *data, u64 addr, u64 dsisr), > void *xsl_err_data); > > -/** > - * Update values within a Process Element > - * > - * link_handle: the link handle associated with the process element > - * pasid: the PASID for the AFU context > - * tid: the new thread id for the process element > - */ > -int ocxl_link_update_pe(void *link_handle, int pasid, __u16 tid); > - > /* > * Remove a Process Element from the Shared Process Area for a link > */
Re: [PATCH v4 0/4] ocxl: OpenCAPI Cleanup
Hi Alastair, I forgot to mention it during v3 but please don't link new version of a patchset to the previous one with --in-reply-to. This is to ensure I can see them in my email client without having to scroll back many days in the past (which likely means a fair number of e-mails on linuxppc-dev). Cheers, -- Greg On Mon, 25 Mar 2019 16:34:51 +1100 "Alastair D'Silva" wrote: > From: Alastair D'Silva > > Some minor cleanups for the OpenCAPI driver as a prerequisite > for an ocxl driver refactoring to allow the driver core to > be utilised by external drivers. > > Changelog: > V4: > - Drop printf format changes as they omit the format indicator for '0' > V3: > - Add missed header in 'ocxl: Remove some unused exported symbols'. > This addresses the introduced sparse warnings > V2: > - remove intermediate assignment of 'link' var in > 'Rename struct link to ocxl_link' > - Don't shift definition of ocxl_context_attach in > 'Remove some unused exported symbols' > > > Alastair D'Silva (4): > ocxl: Rename struct link to ocxl_link > ocxl: read_pasid never returns an error, so make it void > ocxl: Remove superfluous 'extern' from headers > ocxl: Remove some unused exported symbols > > drivers/misc/ocxl/config.c| 13 ++--- > drivers/misc/ocxl/file.c | 5 +- > drivers/misc/ocxl/link.c | 36 ++--- > drivers/misc/ocxl/ocxl_internal.h | 85 +++ > include/misc/ocxl.h | 53 ++- > 5 files changed, 91 insertions(+), 101 deletions(-) >
Re: [PATCH v4 3/4] ocxl: Remove superfluous 'extern' from headers
On Mon, 25 Mar 2019 16:34:54 +1100 "Alastair D'Silva" wrote: > From: Alastair D'Silva > > The 'extern' keyword adds no value here. > > Signed-off-by: Alastair D'Silva > --- Reviewed-by: Greg Kurz > drivers/misc/ocxl/ocxl_internal.h | 54 +++ > include/misc/ocxl.h | 36 ++--- > 2 files changed, 44 insertions(+), 46 deletions(-) > > diff --git a/drivers/misc/ocxl/ocxl_internal.h > b/drivers/misc/ocxl/ocxl_internal.h > index a32f2151029f..321b29e77f45 100644 > --- a/drivers/misc/ocxl/ocxl_internal.h > +++ b/drivers/misc/ocxl/ocxl_internal.h > @@ -16,7 +16,6 @@ > > extern struct pci_driver ocxl_pci_driver; > > - > struct ocxl_fn { > struct device dev; > int bar_used[3]; > @@ -92,41 +91,40 @@ struct ocxl_process_element { > __be32 software_state; > }; > > +struct ocxl_afu *ocxl_afu_get(struct ocxl_afu *afu); > +void ocxl_afu_put(struct ocxl_afu *afu); > > -extern struct ocxl_afu *ocxl_afu_get(struct ocxl_afu *afu); > -extern void ocxl_afu_put(struct ocxl_afu *afu); > - > -extern int ocxl_create_cdev(struct ocxl_afu *afu); > -extern void ocxl_destroy_cdev(struct ocxl_afu *afu); > -extern int ocxl_register_afu(struct ocxl_afu *afu); > -extern void ocxl_unregister_afu(struct ocxl_afu *afu); > +int ocxl_create_cdev(struct ocxl_afu *afu); > +void ocxl_destroy_cdev(struct ocxl_afu *afu); > +int ocxl_register_afu(struct ocxl_afu *afu); > +void ocxl_unregister_afu(struct ocxl_afu *afu); > > -extern int ocxl_file_init(void); > -extern void ocxl_file_exit(void); > +int ocxl_file_init(void); > +void ocxl_file_exit(void); > > -extern int ocxl_pasid_afu_alloc(struct ocxl_fn *fn, u32 size); > -extern void ocxl_pasid_afu_free(struct ocxl_fn *fn, u32 start, u32 size); > -extern int ocxl_actag_afu_alloc(struct ocxl_fn *fn, u32 size); > -extern void ocxl_actag_afu_free(struct ocxl_fn *fn, u32 start, u32 size); > +int ocxl_pasid_afu_alloc(struct ocxl_fn *fn, u32 size); > +void ocxl_pasid_afu_free(struct ocxl_fn *fn, u32 start, u32 size); > +int ocxl_actag_afu_alloc(struct ocxl_fn *fn, u32 size); > +void ocxl_actag_afu_free(struct ocxl_fn *fn, u32 start, u32 size); > > -extern struct ocxl_context *ocxl_context_alloc(void); > -extern int ocxl_context_init(struct ocxl_context *ctx, struct ocxl_afu *afu, > +struct ocxl_context *ocxl_context_alloc(void); > +int ocxl_context_init(struct ocxl_context *ctx, struct ocxl_afu *afu, > struct address_space *mapping); > -extern int ocxl_context_attach(struct ocxl_context *ctx, u64 amr); > -extern int ocxl_context_mmap(struct ocxl_context *ctx, > +int ocxl_context_attach(struct ocxl_context *ctx, u64 amr); > +int ocxl_context_mmap(struct ocxl_context *ctx, > struct vm_area_struct *vma); > -extern int ocxl_context_detach(struct ocxl_context *ctx); > -extern void ocxl_context_detach_all(struct ocxl_afu *afu); > -extern void ocxl_context_free(struct ocxl_context *ctx); > +int ocxl_context_detach(struct ocxl_context *ctx); > +void ocxl_context_detach_all(struct ocxl_afu *afu); > +void ocxl_context_free(struct ocxl_context *ctx); > > -extern int ocxl_sysfs_add_afu(struct ocxl_afu *afu); > -extern void ocxl_sysfs_remove_afu(struct ocxl_afu *afu); > +int ocxl_sysfs_add_afu(struct ocxl_afu *afu); > +void ocxl_sysfs_remove_afu(struct ocxl_afu *afu); > > -extern int ocxl_afu_irq_alloc(struct ocxl_context *ctx, u64 *irq_offset); > -extern int ocxl_afu_irq_free(struct ocxl_context *ctx, u64 irq_offset); > -extern void ocxl_afu_irq_free_all(struct ocxl_context *ctx); > -extern int ocxl_afu_irq_set_fd(struct ocxl_context *ctx, u64 irq_offset, > +int ocxl_afu_irq_alloc(struct ocxl_context *ctx, u64 *irq_offset); > +int ocxl_afu_irq_free(struct ocxl_context *ctx, u64 irq_offset); > +void ocxl_afu_irq_free_all(struct ocxl_context *ctx); > +int ocxl_afu_irq_set_fd(struct ocxl_context *ctx, u64 irq_offset, > int eventfd); > -extern u64 ocxl_afu_irq_get_addr(struct ocxl_context *ctx, u64 irq_offset); > +u64 ocxl_afu_irq_get_addr(struct ocxl_context *ctx, u64 irq_offset); > > #endif /* _OCXL_INTERNAL_H_ */ > diff --git a/include/misc/ocxl.h b/include/misc/ocxl.h > index 9ff6ddc28e22..4544573cc93c 100644 > --- a/include/misc/ocxl.h > +++ b/include/misc/ocxl.h > @@ -53,7 +53,7 @@ struct ocxl_fn_config { > * Read the configuration space of a function and fill in a > * ocxl_fn_config structure with all the function details > */ > -extern int ocxl_config_read_function(struct pci_dev *dev, > +int ocxl_config_read_function(struct pci_dev *dev, > struct ocxl_fn_config *fn); > > /* > @@ -62,14 +62,14 @@ extern int ocxl_config_read_function(struct pci_dev *dev, > * AFU indexes can be sparse, so a driver should check all indexes up > * to the maximum found in the function description > */ > -extern int ocxl_config_check_afu_index(struct pci_dev *dev, > +int ocxl_config_check_afu_index(struct pci_dev *dev, >
Re: [RESEND 1/7] mm/gup: Replace get_user_pages_longterm() with FOLL_LONGTERM
On Mon, Mar 25, 2019 at 09:45:12AM -0700, Dan Williams wrote: > On Mon, Mar 25, 2019 at 7:21 AM Ira Weiny wrote: > [..] > > > > @@ -1268,10 +1246,14 @@ static long > > > > check_and_migrate_cma_pages(unsigned long start, long nr_pages, > > > > putback_movable_pages(&cma_page_list); > > > > } > > > > /* > > > > -* We did migrate all the pages, Try to get the page > > > > references again > > > > -* migrating any new CMA pages which we failed to > > > > isolate earlier. > > > > +* We did migrate all the pages, Try to get the page > > > > references > > > > +* again migrating any new CMA pages which we failed to > > > > isolate > > > > +* earlier. > > > > */ > > > > - nr_pages = get_user_pages(start, nr_pages, gup_flags, > > > > pages, vmas); > > > > + nr_pages = __get_user_pages_locked(tsk, mm, start, > > > > nr_pages, > > > > + pages, vmas, NULL, > > > > + gup_flags); > > > > + > > > > > > Why did this need to change to __get_user_pages_locked? > > > > __get_uer_pages_locked() is now the "internal call" for get_user_pages. > > > > Technically it did not _have_ to change but there is no need to call > > get_user_pages() again because the FOLL_TOUCH flags is already set. Also > > this > > call then matches the __get_user_pages_locked() which was called on the > > pages > > we migrated from. Mostly this keeps the code "symmetrical" in that we > > called > > __get_user_pages_locked() on the pages we are migrating from and the same > > call > > on the pages we migrated to. > > > > While the change here looks funny I think the final code is better. > > Agree, but I think that either needs to be noted in the changelog so > it's not a surprise, or moved to a follow-on cleanup patch. > Can do... Thanks! Ira
Re: [RESEND 4/7] mm/gup: Add FOLL_LONGTERM capability to GUP fast
On Mon, Mar 25, 2019 at 01:42:26AM -0700, Ira Weiny wrote: > On Fri, Mar 22, 2019 at 03:12:55PM -0700, Dan Williams wrote: > > On Sun, Mar 17, 2019 at 7:36 PM wrote: > > > > > > From: Ira Weiny > > > > > > DAX pages were previously unprotected from longterm pins when users > > > called get_user_pages_fast(). > > > > > > Use the new FOLL_LONGTERM flag to check for DEVMAP pages and fall > > > back to regular GUP processing if a DEVMAP page is encountered. > > > > > > Signed-off-by: Ira Weiny > > > mm/gup.c | 29 + > > > 1 file changed, 25 insertions(+), 4 deletions(-) > > > > > > diff --git a/mm/gup.c b/mm/gup.c > > > index 0684a9536207..173db0c44678 100644 > > > +++ b/mm/gup.c > > > @@ -1600,6 +1600,9 @@ static int gup_pte_range(pmd_t pmd, unsigned long > > > addr, unsigned long end, > > > goto pte_unmap; > > > > > > if (pte_devmap(pte)) { > > > + if (unlikely(flags & FOLL_LONGTERM)) > > > + goto pte_unmap; > > > + > > > pgmap = get_dev_pagemap(pte_pfn(pte), pgmap); > > > if (unlikely(!pgmap)) { > > > undo_dev_pagemap(nr, nr_start, pages); > > > @@ -1739,8 +1742,11 @@ static int gup_huge_pmd(pmd_t orig, pmd_t *pmdp, > > > unsigned long addr, > > > if (!pmd_access_permitted(orig, flags & FOLL_WRITE)) > > > return 0; > > > > > > - if (pmd_devmap(orig)) > > > + if (pmd_devmap(orig)) { > > > + if (unlikely(flags & FOLL_LONGTERM)) > > > + return 0; > > > return __gup_device_huge_pmd(orig, pmdp, addr, end, > > > pages, nr); > > > + } > > > > > > refs = 0; > > > page = pmd_page(orig) + ((addr & ~PMD_MASK) >> PAGE_SHIFT); > > > @@ -1777,8 +1783,11 @@ static int gup_huge_pud(pud_t orig, pud_t *pudp, > > > unsigned long addr, > > > if (!pud_access_permitted(orig, flags & FOLL_WRITE)) > > > return 0; > > > > > > - if (pud_devmap(orig)) > > > + if (pud_devmap(orig)) { > > > + if (unlikely(flags & FOLL_LONGTERM)) > > > + return 0; > > > return __gup_device_huge_pud(orig, pudp, addr, end, > > > pages, nr); > > > + } > > > > > > refs = 0; > > > page = pud_page(orig) + ((addr & ~PUD_MASK) >> PAGE_SHIFT); > > > @@ -2066,8 +2075,20 @@ int get_user_pages_fast(unsigned long start, int > > > nr_pages, > > > start += nr << PAGE_SHIFT; > > > pages += nr; > > > > > > - ret = get_user_pages_unlocked(start, nr_pages - nr, pages, > > > - gup_flags); > > > + if (gup_flags & FOLL_LONGTERM) { > > > + down_read(¤t->mm->mmap_sem); > > > + ret = __gup_longterm_locked(current, current->mm, > > > + start, nr_pages - nr, > > > + pages, NULL, > > > gup_flags); > > > + up_read(¤t->mm->mmap_sem); > > > + } else { > > > + /* > > > +* retain FAULT_FOLL_ALLOW_RETRY optimization if > > > +* possible > > > +*/ > > > + ret = get_user_pages_unlocked(start, nr_pages - > > > nr, > > > + pages, gup_flags); > > > > I couldn't immediately grok why this path needs to branch on > > FOLL_LONGTERM? Won't get_user_pages_unlocked(..., FOLL_LONGTERM) do > > the right thing? > > Unfortunately holding the lock is required to support FOLL_LONGTERM (to check > the VMAs) but we don't want to hold the lock to be optimal (specifically allow > FAULT_FOLL_ALLOW_RETRY). So I'm maintaining the optimization for *_fast users > who do not specify FOLL_LONGTERM. > > Another way to do this would have been to define __gup_longterm_unlocked with > the above logic, but that seemed overkill at this point. get_user_pages_unlocked() is an exported symbol, shouldn't it work with the FOLL_LONGTERM flag? I think it should even though we have no user.. Otherwise the GUP API just gets more confusing. Jason
Re: [RESEND 1/7] mm/gup: Replace get_user_pages_longterm() with FOLL_LONGTERM
On Mon, Mar 25, 2019 at 7:21 AM Ira Weiny wrote: [..] > > > @@ -1268,10 +1246,14 @@ static long check_and_migrate_cma_pages(unsigned > > > long start, long nr_pages, > > > putback_movable_pages(&cma_page_list); > > > } > > > /* > > > -* We did migrate all the pages, Try to get the page > > > references again > > > -* migrating any new CMA pages which we failed to isolate > > > earlier. > > > +* We did migrate all the pages, Try to get the page > > > references > > > +* again migrating any new CMA pages which we failed to > > > isolate > > > +* earlier. > > > */ > > > - nr_pages = get_user_pages(start, nr_pages, gup_flags, > > > pages, vmas); > > > + nr_pages = __get_user_pages_locked(tsk, mm, start, > > > nr_pages, > > > + pages, vmas, NULL, > > > + gup_flags); > > > + > > > > Why did this need to change to __get_user_pages_locked? > > __get_uer_pages_locked() is now the "internal call" for get_user_pages. > > Technically it did not _have_ to change but there is no need to call > get_user_pages() again because the FOLL_TOUCH flags is already set. Also this > call then matches the __get_user_pages_locked() which was called on the pages > we migrated from. Mostly this keeps the code "symmetrical" in that we called > __get_user_pages_locked() on the pages we are migrating from and the same call > on the pages we migrated to. > > While the change here looks funny I think the final code is better. Agree, but I think that either needs to be noted in the changelog so it's not a surprise, or moved to a follow-on cleanup patch.
Re: [RESEND 5/7] IB/hfi1: Use the new FOLL_LONGTERM flag to get_user_pages_fast()
On Fri, Mar 22, 2019 at 03:14:26PM -0700, Dan Williams wrote: > On Sun, Mar 17, 2019 at 7:36 PM wrote: > > > > From: Ira Weiny > > > > Use the new FOLL_LONGTERM to get_user_pages_fast() to protect against > > FS DAX pages being mapped. > > > > Signed-off-by: Ira Weiny > > --- > > drivers/infiniband/hw/hfi1/user_pages.c | 6 -- > > 1 file changed, 4 insertions(+), 2 deletions(-) > > > > diff --git a/drivers/infiniband/hw/hfi1/user_pages.c > > b/drivers/infiniband/hw/hfi1/user_pages.c > > index 78ccacaf97d0..6a7f9cd5a94e 100644 > > --- a/drivers/infiniband/hw/hfi1/user_pages.c > > +++ b/drivers/infiniband/hw/hfi1/user_pages.c > > @@ -104,9 +104,11 @@ int hfi1_acquire_user_pages(struct mm_struct *mm, > > unsigned long vaddr, size_t np > > bool writable, struct page **pages) > > { > > int ret; > > + unsigned int gup_flags = writable ? FOLL_WRITE : 0; > > Maybe: > > unsigned int gup_flags = FOLL_LONGTERM | (writable ? FOLL_WRITE : 0); > > ? Sure looks good. Ira > > > > > - ret = get_user_pages_fast(vaddr, npages, writable ? FOLL_WRITE : 0, > > - pages); > > + gup_flags |= FOLL_LONGTERM; > > + > > + ret = get_user_pages_fast(vaddr, npages, gup_flags, pages); > > if (ret < 0) > > return ret; > > > > -- > > 2.20.1 > > >
Re: [RESEND 4/7] mm/gup: Add FOLL_LONGTERM capability to GUP fast
On Fri, Mar 22, 2019 at 03:12:55PM -0700, Dan Williams wrote: > On Sun, Mar 17, 2019 at 7:36 PM wrote: > > > > From: Ira Weiny > > > > DAX pages were previously unprotected from longterm pins when users > > called get_user_pages_fast(). > > > > Use the new FOLL_LONGTERM flag to check for DEVMAP pages and fall > > back to regular GUP processing if a DEVMAP page is encountered. > > > > Signed-off-by: Ira Weiny > > --- > > mm/gup.c | 29 + > > 1 file changed, 25 insertions(+), 4 deletions(-) > > > > diff --git a/mm/gup.c b/mm/gup.c > > index 0684a9536207..173db0c44678 100644 > > --- a/mm/gup.c > > +++ b/mm/gup.c > > @@ -1600,6 +1600,9 @@ static int gup_pte_range(pmd_t pmd, unsigned long > > addr, unsigned long end, > > goto pte_unmap; > > > > if (pte_devmap(pte)) { > > + if (unlikely(flags & FOLL_LONGTERM)) > > + goto pte_unmap; > > + > > pgmap = get_dev_pagemap(pte_pfn(pte), pgmap); > > if (unlikely(!pgmap)) { > > undo_dev_pagemap(nr, nr_start, pages); > > @@ -1739,8 +1742,11 @@ static int gup_huge_pmd(pmd_t orig, pmd_t *pmdp, > > unsigned long addr, > > if (!pmd_access_permitted(orig, flags & FOLL_WRITE)) > > return 0; > > > > - if (pmd_devmap(orig)) > > + if (pmd_devmap(orig)) { > > + if (unlikely(flags & FOLL_LONGTERM)) > > + return 0; > > return __gup_device_huge_pmd(orig, pmdp, addr, end, pages, > > nr); > > + } > > > > refs = 0; > > page = pmd_page(orig) + ((addr & ~PMD_MASK) >> PAGE_SHIFT); > > @@ -1777,8 +1783,11 @@ static int gup_huge_pud(pud_t orig, pud_t *pudp, > > unsigned long addr, > > if (!pud_access_permitted(orig, flags & FOLL_WRITE)) > > return 0; > > > > - if (pud_devmap(orig)) > > + if (pud_devmap(orig)) { > > + if (unlikely(flags & FOLL_LONGTERM)) > > + return 0; > > return __gup_device_huge_pud(orig, pudp, addr, end, pages, > > nr); > > + } > > > > refs = 0; > > page = pud_page(orig) + ((addr & ~PUD_MASK) >> PAGE_SHIFT); > > @@ -2066,8 +2075,20 @@ int get_user_pages_fast(unsigned long start, int > > nr_pages, > > start += nr << PAGE_SHIFT; > > pages += nr; > > > > - ret = get_user_pages_unlocked(start, nr_pages - nr, pages, > > - gup_flags); > > + if (gup_flags & FOLL_LONGTERM) { > > + down_read(¤t->mm->mmap_sem); > > + ret = __gup_longterm_locked(current, current->mm, > > + start, nr_pages - nr, > > + pages, NULL, gup_flags); > > + up_read(¤t->mm->mmap_sem); > > + } else { > > + /* > > +* retain FAULT_FOLL_ALLOW_RETRY optimization if > > +* possible > > +*/ > > + ret = get_user_pages_unlocked(start, nr_pages - nr, > > + pages, gup_flags); > > I couldn't immediately grok why this path needs to branch on > FOLL_LONGTERM? Won't get_user_pages_unlocked(..., FOLL_LONGTERM) do > the right thing? Unfortunately holding the lock is required to support FOLL_LONGTERM (to check the VMAs) but we don't want to hold the lock to be optimal (specifically allow FAULT_FOLL_ALLOW_RETRY). So I'm maintaining the optimization for *_fast users who do not specify FOLL_LONGTERM. Another way to do this would have been to define __gup_longterm_unlocked with the above logic, but that seemed overkill at this point. Ira
Re: [RESEND 3/7] mm/gup: Change GUP fast to use flags rather than a write 'bool'
On Fri, Mar 22, 2019 at 03:05:53PM -0700, Dan Williams wrote: > On Sun, Mar 17, 2019 at 7:36 PM wrote: > > > > From: Ira Weiny > > > > To facilitate additional options to get_user_pages_fast() change the > > singular write parameter to be gup_flags. > > > > This patch does not change any functionality. New functionality will > > follow in subsequent patches. > > > > Some of the get_user_pages_fast() call sites were unchanged because they > > already passed FOLL_WRITE or 0 for the write parameter. > > > > Signed-off-by: Ira Weiny > > > > --- > > Changes from V1: > > Rebase to current merge tree > > arch/powerpc/mm/mmu_context_iommu.c no longer calls gup_fast > > The gup_longterm was converted in patch 1 > > > > arch/mips/mm/gup.c | 11 ++- > > arch/powerpc/kvm/book3s_64_mmu_hv.c| 4 ++-- > > arch/powerpc/kvm/e500_mmu.c| 2 +- > > arch/s390/kvm/interrupt.c | 2 +- > > arch/s390/mm/gup.c | 12 ++-- > > arch/sh/mm/gup.c | 11 ++- > > arch/sparc/mm/gup.c| 9 + > > arch/x86/kvm/paging_tmpl.h | 2 +- > > arch/x86/kvm/svm.c | 2 +- > > drivers/fpga/dfl-afu-dma-region.c | 2 +- > > drivers/gpu/drm/via/via_dmablit.c | 3 ++- > > drivers/infiniband/hw/hfi1/user_pages.c| 3 ++- > > drivers/misc/genwqe/card_utils.c | 2 +- > > drivers/misc/vmw_vmci/vmci_host.c | 2 +- > > drivers/misc/vmw_vmci/vmci_queue_pair.c| 6 -- > > drivers/platform/goldfish/goldfish_pipe.c | 3 ++- > > drivers/rapidio/devices/rio_mport_cdev.c | 4 +++- > > drivers/sbus/char/oradax.c | 2 +- > > drivers/scsi/st.c | 3 ++- > > drivers/staging/gasket/gasket_page_table.c | 4 ++-- > > drivers/tee/tee_shm.c | 2 +- > > drivers/vfio/vfio_iommu_spapr_tce.c| 3 ++- > > drivers/vhost/vhost.c | 2 +- > > drivers/video/fbdev/pvr2fb.c | 2 +- > > drivers/virt/fsl_hypervisor.c | 2 +- > > drivers/xen/gntdev.c | 2 +- > > fs/orangefs/orangefs-bufmap.c | 2 +- > > include/linux/mm.h | 4 ++-- > > kernel/futex.c | 2 +- > > lib/iov_iter.c | 7 +-- > > mm/gup.c | 10 +- > > mm/util.c | 8 > > net/ceph/pagevec.c | 2 +- > > net/rds/info.c | 2 +- > > net/rds/rdma.c | 3 ++- > > 35 files changed, 79 insertions(+), 63 deletions(-) > > > > > > diff --git a/arch/mips/mm/gup.c b/arch/mips/mm/gup.c > > index 0d14e0d8eacf..4c2b4483683c 100644 > > --- a/arch/mips/mm/gup.c > > +++ b/arch/mips/mm/gup.c > > @@ -235,7 +235,7 @@ int __get_user_pages_fast(unsigned long start, int > > nr_pages, int write, > > * get_user_pages_fast() - pin user pages in memory > > * @start: starting user address > > * @nr_pages: number of pages from start to pin > > - * @write: whether pages will be written to > > + * @gup_flags: flags modifying pin behaviour > > * @pages: array that receives pointers to the pages pinned. > > * Should be at least nr_pages long. > > * > > @@ -247,8 +247,8 @@ int __get_user_pages_fast(unsigned long start, int > > nr_pages, int write, > > * requested. If nr_pages is 0 or negative, returns 0. If no pages > > * were pinned, returns -errno. > > */ > > -int get_user_pages_fast(unsigned long start, int nr_pages, int write, > > - struct page **pages) > > +int get_user_pages_fast(unsigned long start, int nr_pages, > > + unsigned int gup_flags, struct page **pages) > > This looks a tad scary given all related thrash especially when it's > only 1 user that wants to do get_user_page_fast_longterm, right? Agreed but the discussion back in Feb agreed that it would be better to make get_user_pages_fast() use flags rather than add another *_longterm call, and I agree. > Maybe > something like the following. Note I explicitly moved the flags to the > end so that someone half paying attention that calls > __get_user_pages_fast will get a compile error if they specify the > args in the same order. I did this to remain consistent with the other public calls. They put the "return" pages parameter at the end. For example get_user_pages() is defined this way with pages and vmas at the end. long get_user_pages(unsigned long start, unsigned long nr_pages, unsigned int gup_flags, struct page **pages, struct vm_area_struct **vmas) I'm pretty sure I got all the callers. Is this worth making the signature "non-standard" WRT the other calls? Ir
Re: [PATCH] kbuild: strip whitespace in cmd_record_mcount findstring
On Mon, 25 Mar 2019 12:04:38 -0400 Joe Lawrence wrote: > CC_FLAGS_FTRACE may contain trailing whitespace that interferes with > findstring. > > For example, commit 6977f95e63b9 ("powerpc: avoid -mno-sched-epilog on > GCC 4.9 and newer") introduced a change such that on my ppc64le box, > CC_FLAGS_FTRACE="-pg -mprofile-kernel ". (Note the trailing space.) > When cmd_record_mcount is now invoked, findstring fails as the ftrace > flags were found at very end of _c_flags, without the trailing space. > > _c_flags=" ... -pg -mprofile-kernel" > CC_FLAGS_FTRACE="-pg -mprofile-kernel " >^ > findstring is looking for this extra space Wow, what a bug. > > Remove the redundant whitespaces from CC_FLAGS_FTRACE in > cmd_record_mcount to avoid this problem. > > Fixes: 6977f95e63b9 ("powerpc: avoid -mno-sched-epilog on GCC 4.9 and newer"). > Signed-off-by: Joe Lawrence Acked-by: Steven Rostedt (VMware) -- Steve > --- > > Standard disclaimer: I'm not a kbuild expert, but this works around the > problem I reported where ftrace and livepatch self-tests were failing as > specified object files were not run through the recordmcount.pl script: > > ppc64le: ftrace self-tests and $(CC_FLAGS_FTRACE) broken? > https://lists.ozlabs.org/pipermail/linuxppc-dev/2019-March/187298.html > > scripts/Makefile.build | 8 > 1 file changed, 4 insertions(+), 4 deletions(-) > > diff --git a/scripts/Makefile.build b/scripts/Makefile.build > index 2554a15ecf2b..74d402b5aa3c 100644 > --- a/scripts/Makefile.build > +++ b/scripts/Makefile.build > @@ -199,10 +199,10 @@ sub_cmd_record_mcount = perl > $(srctree)/scripts/recordmcount.pl "$(ARCH)" \ > "$(if $(part-of-module),1,0)" "$(@)"; > recordmcount_source := $(srctree)/scripts/recordmcount.pl > endif # BUILD_C_RECORDMCOUNT > -cmd_record_mcount = \ > - if [ "$(findstring $(CC_FLAGS_FTRACE),$(_c_flags))" = \ > - "$(CC_FLAGS_FTRACE)" ]; then \ > - $(sub_cmd_record_mcount)\ > +cmd_record_mcount = \ > + if [ "$(findstring $(strip $(CC_FLAGS_FTRACE)),$(_c_flags))" = \ > + "$(strip $(CC_FLAGS_FTRACE))" ]; then \ > + $(sub_cmd_record_mcount)\ > fi > endif # CC_USING_RECORD_MCOUNT > endif # CONFIG_FTRACE_MCOUNT_RECORD
Re: Regression in 5.1.0-rc2: PowerBook G4 Aluminum fails to boot - bisected to commit 0df977eafc792
On 3/25/19 3:46 AM, Christophe Leroy wrote: Le 25/03/2019 à 01:49, Larry Finger a écrit : A build of kernel 5.1.0-rc2 resulted in a failure to boot on my PowerBook G4 Aluminum. The bootstrap loads the initial kernel and issues the appropriate start, but the machine hangs at that point. The problem does not depend on the choice of PPC32 processor type. This machine has a 7447A according to /proc/cpuinfo. The problem was bisected to the following: commit 0df977eafc792a5365a7f81d8d5920132e03afad Author: Christophe Leroy Date: Thu Feb 21 10:37:54 2019 + powerpc/6xx: Don't use SPRN_SPRG2 for storing stack pointer while in RTAS When calling RTAS, the stack pointer is stored in SPRN_SPRG2 in order to be able to restore it in case of machine check in RTAS. As machine check is not a perfomance critical path, this patch frees SPRN_SPRG2 by using a field in thread struct instead. Signed-off-by: Christophe Leroy Signed-off-by: Michael Ellerman I reverted this patch and found that the system began execution, and then failed, likely due to the reassignment of SPRN_SPRG2. I had found this problem with 5.1.0-rc1, but -rc2 was out by the time I finished the bisection. Unfortunately, none of the changes in -rc2 fixed the problem. I think I identified the issue, see https://patchwork.ozlabs.org/patch/1063962/ It is now booting properly under QEMU with your .config Could you please test it on your target ? Thanks. That patch fixes the issue. Larry
Re: [PATCH] powerpc/rtas: fix early boot failure.
On 3/25/19 3:43 AM, Christophe Leroy wrote: Commit 0df977eafc79 ("powerpc/6xx: Don't use SPRN_SPRG2 for storing stack pointer while in RTAS") changes the code to use a field in thread struct to store the stack pointer while in RTAS instead of using SPRN_SPRG2. It therefore converts all places which were manipulating SPRN_SPRG2 to use that field. During early startup, the zeroing of SPRN_SPRG2 has been replaced by a zeroing of that field in thread struct. But at least in start_here, that's done wrongly because it used the physical address of the fields while MMU is on at that time. So the virtual address of the field should be used instead, but in the meantime, thread struct has already been zeroised and initialised so we can just drop this initialisation. Reported-by: Larry Finger Fixes: 0df977eafc79 ("powerpc/6xx: Don't use SPRN_SPRG2 for storing stack pointer while in RTAS") Signed-off-by: Christophe Leroy Tested-by: Larry Finger My PPC box now boots OK. Larry
Re: ppc64le: ftrace self-tests and $(CC_FLAGS_FTRACE) broken?
On 3/23/19 1:27 PM, Joe Lawrence wrote: On 03/23/2019 12:17 PM, Steven Rostedt wrote: On Sat, 23 Mar 2019 09:19:50 -0400 Joe Lawrence wrote: Perhaps this is gcc version specific? I didn't see any other reports, so maybe its specific to my config. If I run make V=1, I can see that gcc is called with '-pg -mprofile-kernel', but then the record_mcount script is skipped. Any ideas? But you see it running the script if you remove that commit? Do you happen to see -mrecord-mcount at all in a V=1 make? See entire make output from the two runs here: http://people.redhat.com/~jolawren/ppc64le-mprofile/ Mystery solved! That other commit inadvertently added a trailing space to CC_FLAGS_FTRACE, which then confused the findstring call in the cmd_record_mcount Makefile function. Workaround patch just posted ... kbuild experts can suggest better fixes if that one is sub optimal. Thanks, -- Joe
[PATCH] kbuild: strip whitespace in cmd_record_mcount findstring
CC_FLAGS_FTRACE may contain trailing whitespace that interferes with findstring. For example, commit 6977f95e63b9 ("powerpc: avoid -mno-sched-epilog on GCC 4.9 and newer") introduced a change such that on my ppc64le box, CC_FLAGS_FTRACE="-pg -mprofile-kernel ". (Note the trailing space.) When cmd_record_mcount is now invoked, findstring fails as the ftrace flags were found at very end of _c_flags, without the trailing space. _c_flags=" ... -pg -mprofile-kernel" CC_FLAGS_FTRACE="-pg -mprofile-kernel " ^ findstring is looking for this extra space Remove the redundant whitespaces from CC_FLAGS_FTRACE in cmd_record_mcount to avoid this problem. Fixes: 6977f95e63b9 ("powerpc: avoid -mno-sched-epilog on GCC 4.9 and newer"). Signed-off-by: Joe Lawrence --- Standard disclaimer: I'm not a kbuild expert, but this works around the problem I reported where ftrace and livepatch self-tests were failing as specified object files were not run through the recordmcount.pl script: ppc64le: ftrace self-tests and $(CC_FLAGS_FTRACE) broken? https://lists.ozlabs.org/pipermail/linuxppc-dev/2019-March/187298.html scripts/Makefile.build | 8 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/scripts/Makefile.build b/scripts/Makefile.build index 2554a15ecf2b..74d402b5aa3c 100644 --- a/scripts/Makefile.build +++ b/scripts/Makefile.build @@ -199,10 +199,10 @@ sub_cmd_record_mcount = perl $(srctree)/scripts/recordmcount.pl "$(ARCH)" \ "$(if $(part-of-module),1,0)" "$(@)"; recordmcount_source := $(srctree)/scripts/recordmcount.pl endif # BUILD_C_RECORDMCOUNT -cmd_record_mcount =\ - if [ "$(findstring $(CC_FLAGS_FTRACE),$(_c_flags))" = \ -"$(CC_FLAGS_FTRACE)" ]; then \ - $(sub_cmd_record_mcount)\ +cmd_record_mcount =\ + if [ "$(findstring $(strip $(CC_FLAGS_FTRACE)),$(_c_flags))" = \ +"$(strip $(CC_FLAGS_FTRACE))" ]; then \ + $(sub_cmd_record_mcount)\ fi endif # CC_USING_RECORD_MCOUNT endif # CONFIG_FTRACE_MCOUNT_RECORD -- 2.20.1
Re: Regression in 5.1.0-rc2: PowerBook G4 Aluminum fails to boot - bisected to commit 0df977eafc792
On 3/25/19 1:53 AM, Christophe Leroy wrote: Le 25/03/2019 à 01:49, Larry Finger a écrit : A build of kernel 5.1.0-rc2 resulted in a failure to boot on my PowerBook G4 Aluminum. The bootstrap loads the initial kernel and issues the appropriate start, but the machine hangs at that point. Can you please be more explicit ? What do you mean by "issues the appropriate start" ? What is "that point" ? Any messages on the console ? Sorry to be unclear. The bootstrap code prints a line saying "Booting Linux via __start() @ 0x00c0" and then hangs. Your idea of confusion between physical and virtual address sounds right. I am building a kernel with that patch applied now. As it seems to be going quickly, I should have the answer fairly quickly. Thanks, Larry
Re: [PATCH v3 7/7] ocxl: Provide global MMIO accessors for external drivers
Le 25/03/2019 à 06:44, Alastair D'Silva a écrit : From: Alastair D'Silva External drivers that communicate via OpenCAPI will need to make MMIO calls to interact with the devices. Signed-off-by: Alastair D'Silva Reviewed-by: Greg Kurz --- Acked-by: Frederic Barrat drivers/misc/ocxl/Makefile | 2 +- drivers/misc/ocxl/mmio.c | 234 + include/misc/ocxl.h| 110 + 3 files changed, 345 insertions(+), 1 deletion(-) create mode 100644 drivers/misc/ocxl/mmio.c diff --git a/drivers/misc/ocxl/Makefile b/drivers/misc/ocxl/Makefile index bc4e39bfda7b..d07d1bb8e8d4 100644 --- a/drivers/misc/ocxl/Makefile +++ b/drivers/misc/ocxl/Makefile @@ -1,7 +1,7 @@ # SPDX-License-Identifier: GPL-2.0+ ccflags-$(CONFIG_PPC_WERROR) += -Werror -ocxl-y+= main.o pci.o config.o file.o pasid.o +ocxl-y += main.o pci.o config.o file.o pasid.o mmio.o ocxl-y+= link.o context.o afu_irq.o sysfs.o trace.o ocxl-y+= core.o obj-$(CONFIG_OCXL)+= ocxl.o diff --git a/drivers/misc/ocxl/mmio.c b/drivers/misc/ocxl/mmio.c new file mode 100644 index ..aae713db4ebe --- /dev/null +++ b/drivers/misc/ocxl/mmio.c @@ -0,0 +1,234 @@ +// SPDX-License-Identifier: GPL-2.0+ +// Copyright 2019 IBM Corp. +#include +#include "trace.h" +#include "ocxl_internal.h" + +int ocxl_global_mmio_read32(struct ocxl_afu *afu, size_t offset, + enum ocxl_endian endian, u32 *val) +{ + if (offset > afu->config.global_mmio_size - 4) + return -EINVAL; + +#ifdef __BIG_ENDIAN__ + if (endian == OCXL_HOST_ENDIAN) + endian = OCXL_BIG_ENDIAN; +#endif + + switch (endian) { + case OCXL_BIG_ENDIAN: + *val = readl_be((char *)afu->global_mmio_ptr + offset); + break; + + default: + *val = readl((char *)afu->global_mmio_ptr + offset); + break; + } + + return 0; +} +EXPORT_SYMBOL_GPL(ocxl_global_mmio_read32); + +int ocxl_global_mmio_read64(struct ocxl_afu *afu, size_t offset, + enum ocxl_endian endian, u64 *val) +{ + if (offset > afu->config.global_mmio_size - 8) + return -EINVAL; + +#ifdef __BIG_ENDIAN__ + if (endian == OCXL_HOST_ENDIAN) + endian = OCXL_BIG_ENDIAN; +#endif + + switch (endian) { + case OCXL_BIG_ENDIAN: + *val = readq_be((char *)afu->global_mmio_ptr + offset); + break; + + default: + *val = readq((char *)afu->global_mmio_ptr + offset); + break; + } + + return 0; +} +EXPORT_SYMBOL_GPL(ocxl_global_mmio_read64); + +int ocxl_global_mmio_write32(struct ocxl_afu *afu, size_t offset, + enum ocxl_endian endian, u32 val) +{ + if (offset > afu->config.global_mmio_size - 4) + return -EINVAL; + +#ifdef __BIG_ENDIAN__ + if (endian == OCXL_HOST_ENDIAN) + endian = OCXL_BIG_ENDIAN; +#endif + + switch (endian) { + case OCXL_BIG_ENDIAN: + writel_be(val, (char *)afu->global_mmio_ptr + offset); + break; + + default: + writel(val, (char *)afu->global_mmio_ptr + offset); + break; + } + + + return 0; +} +EXPORT_SYMBOL_GPL(ocxl_global_mmio_write32); + +int ocxl_global_mmio_write64(struct ocxl_afu *afu, size_t offset, + enum ocxl_endian endian, u64 val) +{ + if (offset > afu->config.global_mmio_size - 8) + return -EINVAL; + +#ifdef __BIG_ENDIAN__ + if (endian == OCXL_HOST_ENDIAN) + endian = OCXL_BIG_ENDIAN; +#endif + + switch (endian) { + case OCXL_BIG_ENDIAN: + writeq_be(val, (char *)afu->global_mmio_ptr + offset); + break; + + default: + writeq(val, (char *)afu->global_mmio_ptr + offset); + break; + } + + + return 0; +} +EXPORT_SYMBOL_GPL(ocxl_global_mmio_write64); + +int ocxl_global_mmio_set32(struct ocxl_afu *afu, size_t offset, + enum ocxl_endian endian, u32 mask) +{ + u32 tmp; + + if (offset > afu->config.global_mmio_size - 4) + return -EINVAL; + +#ifdef __BIG_ENDIAN__ + if (endian == OCXL_HOST_ENDIAN) + endian = OCXL_BIG_ENDIAN; +#endif + + switch (endian) { + case OCXL_BIG_ENDIAN: + tmp = readl_be((char *)afu->global_mmio_ptr + offset); + tmp |= mask; + writel_be(tmp, (char *)afu->global_mmio_ptr + offset); + break; + + default: + tmp = readl((char *)afu->global_mmio_ptr + offset); + tmp |= mask; + writel(tmp, (char *)afu->global_mmio_ptr + offset); + break; + } + +
Re: [PATCH v3 6/7] ocxl: move event_fd handling to frontend
Le 25/03/2019 à 06:44, Alastair D'Silva a écrit : From: Alastair D'Silva Event_fd is only used in the driver frontend, so it does not need to exist in the backend code. Relocate it to the frontend and provide an opaque mechanism for consumers instead. Signed-off-by: Alastair D'Silva --- drivers/misc/ocxl/afu_irq.c | 76 ++- drivers/misc/ocxl/file.c | 22 - drivers/misc/ocxl/ocxl_internal.h | 5 -- include/misc/ocxl.h | 46 +++ 4 files changed, 111 insertions(+), 38 deletions(-) diff --git a/drivers/misc/ocxl/afu_irq.c b/drivers/misc/ocxl/afu_irq.c index 2d410cd6f817..d71e62df7d31 100644 --- a/drivers/misc/ocxl/afu_irq.c +++ b/drivers/misc/ocxl/afu_irq.c @@ -1,7 +1,7 @@ // SPDX-License-Identifier: GPL-2.0+ // Copyright 2017 IBM Corp. #include -#include +#include #include "ocxl_internal.h" #include "trace.h" @@ -11,7 +11,9 @@ struct afu_irq { unsigned int virq; char *name; u64 trigger_page; - struct eventfd_ctx *ev_ctx; + irqreturn_t (*handler)(void *private); + void (*free_private)(void *private); + void *private; }; int ocxl_irq_offset_to_id(struct ocxl_context *ctx, u64 offset) @@ -24,14 +26,43 @@ u64 ocxl_irq_id_to_offset(struct ocxl_context *ctx, int irq_id) return ctx->afu->irq_base_offset + (irq_id << PAGE_SHIFT); } +int ocxl_irq_set_handler(struct ocxl_context *ctx, int irq_id, + irqreturn_t (*handler)(void *private), + void (*free_private)(void *private), + void *private) +{ + struct afu_irq *irq; + int rc; + + mutex_lock(&ctx->irq_lock); + irq = idr_find(&ctx->irq_idr, irq_id); + if (!irq) { + rc = -EINVAL; + goto unlock; + } + + irq->handler = handler; + irq->private = private; + + rc = 0; + goto unlock; That goto is too much. + +unlock: + mutex_unlock(&ctx->irq_lock); + return rc; +} +EXPORT_SYMBOL_GPL(ocxl_irq_set_handler); + static irqreturn_t afu_irq_handler(int virq, void *data) { struct afu_irq *irq = (struct afu_irq *) data; trace_ocxl_afu_irq_receive(virq); - if (irq->ev_ctx) - eventfd_signal(irq->ev_ctx, 1); - return IRQ_HANDLED; + + if (irq->handler) + return irq->handler(irq->private); + + return IRQ_HANDLED; // Just drop it on the ground } static int setup_afu_irq(struct ocxl_context *ctx, struct afu_irq *irq) @@ -118,6 +149,8 @@ int ocxl_afu_irq_alloc(struct ocxl_context *ctx, int *irq_id) return rc; } +EXPORT_SYMBOL_GPL(ocxl_afu_irq_alloc); nit: checkpatch doesn't like the empty line between the function and its symbol export. Also true for a few other symbols in that file. diff --git a/include/misc/ocxl.h b/include/misc/ocxl.h index a8fe0ce4ea67..1b48e9d63abb 100644 --- a/include/misc/ocxl.h +++ b/include/misc/ocxl.h @@ -161,6 +161,52 @@ int ocxl_context_attach(struct ocxl_context *ctx, u64 amr, */ int ocxl_context_detach(struct ocxl_context *ctx); +// AFU IRQs + +/** + * Allocate an IRQ associated with an AFU context + * @ctx: the AFU context + * @irq_id: out, the IRQ ID + * + * Returns 0 on success, negative on failure + */ +extern int ocxl_afu_irq_alloc(struct ocxl_context *ctx, int *irq_id); + +/** + * Frees an IRQ associated with an AFU context + * @ctx: the AFU context + * @irq_id: the IRQ ID + * + * Returns 0 on success, negative on failure + */ +extern int ocxl_afu_irq_free(struct ocxl_context *ctx, int irq_id); + +/** + * Gets the address of the trigger page for an IRQ + * This can then be provided to an AFU which will write to that + * page to trigger the IRQ. + * @ctx: The AFU context that the IRQ is associated with + * @irq_id: The IRQ ID + * + * returns the trigger page address, or 0 if the IRQ is not valid + */ +extern u64 ocxl_afu_irq_get_addr(struct ocxl_context *ctx, int irq_id); + +/** + * Provide a callback to be called when an IRQ is triggered + * @ctx: The AFU context that the IRQ is associated with + * @irq_id: The IRQ ID + * @handler: the callback to be called when the IRQ is triggered + * @free_private: the callback to be called when the IRQ is freed We should mention free_private can be NULL, in which case it is ignored. Fred
Re: [PATCH v5 3/3] locking/rwsem: Optimize down_read_trylock()
Hi, Could you share the microbenchmark you are using ? I'd like to test the series on powerpc. Thanks Christophe Le 22/03/2019 à 15:30, Waiman Long a écrit : Modify __down_read_trylock() to optimize for an unlocked rwsem and make it generate slightly better code. Before this patch, down_read_trylock: 0x <+0>: callq 0x5 0x0005 <+5>: jmp0x18 0x0007 <+7>: lea0x1(%rdx),%rcx 0x000b <+11>:mov%rdx,%rax 0x000e <+14>:lock cmpxchg %rcx,(%rdi) 0x0013 <+19>:cmp%rax,%rdx 0x0016 <+22>:je 0x23 0x0018 <+24>:mov(%rdi),%rdx 0x001b <+27>:test %rdx,%rdx 0x001e <+30>:jns0x7 0x0020 <+32>:xor%eax,%eax 0x0022 <+34>:retq 0x0023 <+35>:mov%gs:0x0,%rax 0x002c <+44>:or $0x3,%rax 0x0030 <+48>:mov%rax,0x20(%rdi) 0x0034 <+52>:mov$0x1,%eax 0x0039 <+57>:retq After patch, down_read_trylock: 0x <+0>: callq 0x5 0x0005 <+5>: xor%eax,%eax 0x0007 <+7>: lea0x1(%rax),%rdx 0x000b <+11>: lock cmpxchg %rdx,(%rdi) 0x0010 <+16>: jne0x29 0x0012 <+18>: mov%gs:0x0,%rax 0x001b <+27>: or $0x3,%rax 0x001f <+31>: mov%rax,0x20(%rdi) 0x0023 <+35>: mov$0x1,%eax 0x0028 <+40>: retq 0x0029 <+41>: test %rax,%rax 0x002c <+44>: jns0x7 0x002e <+46>: xor%eax,%eax 0x0030 <+48>: retq By using a rwsem microbenchmark, the down_read_trylock() rate (with a load of 10 to lengthen the lock critical section) on a x86-64 system before and after the patch were: Before PatchAfter Patch # of Threads rlock rlock - - 1 14,496 14,716 28,644 8,453 46,799 6,983 85,664 7,190 On a ARM64 system, the performance results were: Before PatchAfter Patch # of Threads rlock rlock - - 1 23,676 24,488 27,697 9,502 44,945 3,440 82,641 1,603 For the uncontended case (1 thread), the new down_read_trylock() is a little bit faster. For the contended cases, the new down_read_trylock() perform pretty well in x86-64, but performance degrades at high contention level on ARM64. Suggested-by: Linus Torvalds Signed-off-by: Waiman Long --- kernel/locking/rwsem.h | 13 - 1 file changed, 8 insertions(+), 5 deletions(-) diff --git a/kernel/locking/rwsem.h b/kernel/locking/rwsem.h index 45ee00236e03..1f5775aa6a1d 100644 --- a/kernel/locking/rwsem.h +++ b/kernel/locking/rwsem.h @@ -174,14 +174,17 @@ static inline int __down_read_killable(struct rw_semaphore *sem) static inline int __down_read_trylock(struct rw_semaphore *sem) { - long tmp; + /* +* Optimize for the case when the rwsem is not locked at all. +*/ + long tmp = RWSEM_UNLOCKED_VALUE; - while ((tmp = atomic_long_read(&sem->count)) >= 0) { - if (tmp == atomic_long_cmpxchg_acquire(&sem->count, tmp, - tmp + RWSEM_ACTIVE_READ_BIAS)) { + do { + if (atomic_long_try_cmpxchg_acquire(&sem->count, &tmp, + tmp + RWSEM_ACTIVE_READ_BIAS)) { return 1; } - } + } while (tmp >= 0); return 0; }
Re: [PATCH v3 5/7] ocxl: afu_irq only deals with IRQ IDs, not offsets
Le 25/03/2019 à 06:44, Alastair D'Silva a écrit : From: Alastair D'Silva The use of offsets is required only in the frontend, so alter the IRQ API to only work with IRQ IDs in the backend. Signed-off-by: Alastair D'Silva --- Acked-by: Frederic Barrat drivers/misc/ocxl/afu_irq.c | 34 +++ drivers/misc/ocxl/context.c | 7 +-- drivers/misc/ocxl/file.c | 13 +++- drivers/misc/ocxl/ocxl_internal.h | 10 + drivers/misc/ocxl/trace.h | 12 --- 5 files changed, 39 insertions(+), 37 deletions(-) diff --git a/drivers/misc/ocxl/afu_irq.c b/drivers/misc/ocxl/afu_irq.c index 11ab996657a2..2d410cd6f817 100644 --- a/drivers/misc/ocxl/afu_irq.c +++ b/drivers/misc/ocxl/afu_irq.c @@ -14,14 +14,14 @@ struct afu_irq { struct eventfd_ctx *ev_ctx; }; -static int irq_offset_to_id(struct ocxl_context *ctx, u64 offset) +int ocxl_irq_offset_to_id(struct ocxl_context *ctx, u64 offset) { return (offset - ctx->afu->irq_base_offset) >> PAGE_SHIFT; } -static u64 irq_id_to_offset(struct ocxl_context *ctx, int id) +u64 ocxl_irq_id_to_offset(struct ocxl_context *ctx, int irq_id) { - return ctx->afu->irq_base_offset + (id << PAGE_SHIFT); + return ctx->afu->irq_base_offset + (irq_id << PAGE_SHIFT); } static irqreturn_t afu_irq_handler(int virq, void *data) @@ -69,7 +69,7 @@ static void release_afu_irq(struct afu_irq *irq) kfree(irq->name); } -int ocxl_afu_irq_alloc(struct ocxl_context *ctx, u64 *irq_offset) +int ocxl_afu_irq_alloc(struct ocxl_context *ctx, int *irq_id) { struct afu_irq *irq; int rc; @@ -101,11 +101,11 @@ int ocxl_afu_irq_alloc(struct ocxl_context *ctx, u64 *irq_offset) if (rc) goto err_alloc; - *irq_offset = irq_id_to_offset(ctx, irq->id); - - trace_ocxl_afu_irq_alloc(ctx->pasid, irq->id, irq->virq, irq->hw_irq, - *irq_offset); + trace_ocxl_afu_irq_alloc(ctx->pasid, irq->id, irq->virq, irq->hw_irq); mutex_unlock(&ctx->irq_lock); + + *irq_id = irq->id; + return 0; err_alloc: @@ -123,7 +123,7 @@ static void afu_irq_free(struct afu_irq *irq, struct ocxl_context *ctx) trace_ocxl_afu_irq_free(ctx->pasid, irq->id); if (ctx->mapping) unmap_mapping_range(ctx->mapping, - irq_id_to_offset(ctx, irq->id), + ocxl_irq_id_to_offset(ctx, irq->id), 1 << PAGE_SHIFT, 1); release_afu_irq(irq); if (irq->ev_ctx) @@ -132,14 +132,13 @@ static void afu_irq_free(struct afu_irq *irq, struct ocxl_context *ctx) kfree(irq); } -int ocxl_afu_irq_free(struct ocxl_context *ctx, u64 irq_offset) +int ocxl_afu_irq_free(struct ocxl_context *ctx, int irq_id) { struct afu_irq *irq; - int id = irq_offset_to_id(ctx, irq_offset); mutex_lock(&ctx->irq_lock); - irq = idr_find(&ctx->irq_idr, id); + irq = idr_find(&ctx->irq_idr, irq_id); if (!irq) { mutex_unlock(&ctx->irq_lock); return -EINVAL; @@ -161,14 +160,14 @@ void ocxl_afu_irq_free_all(struct ocxl_context *ctx) mutex_unlock(&ctx->irq_lock); } -int ocxl_afu_irq_set_fd(struct ocxl_context *ctx, u64 irq_offset, int eventfd) +int ocxl_afu_irq_set_fd(struct ocxl_context *ctx, int irq_id, int eventfd) { struct afu_irq *irq; struct eventfd_ctx *ev_ctx; - int rc = 0, id = irq_offset_to_id(ctx, irq_offset); + int rc = 0; mutex_lock(&ctx->irq_lock); - irq = idr_find(&ctx->irq_idr, id); + irq = idr_find(&ctx->irq_idr, irq_id); if (!irq) { rc = -EINVAL; goto unlock; @@ -186,14 +185,13 @@ int ocxl_afu_irq_set_fd(struct ocxl_context *ctx, u64 irq_offset, int eventfd) return rc; } -u64 ocxl_afu_irq_get_addr(struct ocxl_context *ctx, u64 irq_offset) +u64 ocxl_afu_irq_get_addr(struct ocxl_context *ctx, int irq_id) { struct afu_irq *irq; - int id = irq_offset_to_id(ctx, irq_offset); u64 addr = 0; mutex_lock(&ctx->irq_lock); - irq = idr_find(&ctx->irq_idr, id); + irq = idr_find(&ctx->irq_idr, irq_id); if (irq) addr = irq->trigger_page; mutex_unlock(&ctx->irq_lock); diff --git a/drivers/misc/ocxl/context.c b/drivers/misc/ocxl/context.c index 8b97b0f19db8..d6056883b85d 100644 --- a/drivers/misc/ocxl/context.c +++ b/drivers/misc/ocxl/context.c @@ -93,8 +93,9 @@ static vm_fault_t map_afu_irq(struct vm_area_struct *vma, unsigned long address, u64 offset, struct ocxl_context *ctx) { u64 trigger_addr; + int irq_id = ocxl_irq_offset_to_id(ctx, offset); - trigger_addr = ocxl_afu_irq_get_addr(ctx, offset); + trigger_addr = ocxl_afu_irq_get_addr(ctx, irq_id); if (!trigger_addr)
Re: [PATCH v3 4/7] ocxl: Allow external drivers to use OpenCAPI contexts
Le 25/03/2019 à 06:44, Alastair D'Silva a écrit : From: Alastair D'Silva Most OpenCAPI operations require a valid context, so exposing these functions to external drivers is necessary. Signed-off-by: Alastair D'Silva Reviewed-by: Greg Kurz --- See comment on previous patch regarding merging ocxl_context_alloc() and ocxl_context_init(), it would impact the exported symbols. Fred drivers/misc/ocxl/context.c | 9 +-- drivers/misc/ocxl/file.c | 2 +- drivers/misc/ocxl/ocxl_internal.h | 6 - include/misc/ocxl.h | 45 +++ 4 files changed, 53 insertions(+), 9 deletions(-) diff --git a/drivers/misc/ocxl/context.c b/drivers/misc/ocxl/context.c index c73a859d2224..8b97b0f19db8 100644 --- a/drivers/misc/ocxl/context.c +++ b/drivers/misc/ocxl/context.c @@ -8,6 +8,7 @@ struct ocxl_context *ocxl_context_alloc(void) { return kzalloc(sizeof(struct ocxl_context), GFP_KERNEL); } +EXPORT_SYMBOL_GPL(ocxl_context_alloc); int ocxl_context_init(struct ocxl_context *ctx, struct ocxl_afu *afu, struct address_space *mapping) @@ -43,6 +44,7 @@ int ocxl_context_init(struct ocxl_context *ctx, struct ocxl_afu *afu, ocxl_afu_get(afu); return 0; } +EXPORT_SYMBOL_GPL(ocxl_context_init); /* * Callback for when a translation fault triggers an error @@ -63,7 +65,7 @@ static void xsl_fault_error(void *data, u64 addr, u64 dsisr) wake_up_all(&ctx->events_wq); } -int ocxl_context_attach(struct ocxl_context *ctx, u64 amr) +int ocxl_context_attach(struct ocxl_context *ctx, u64 amr, struct mm_struct *mm) { int rc; @@ -75,7 +77,7 @@ int ocxl_context_attach(struct ocxl_context *ctx, u64 amr) } rc = ocxl_link_add_pe(ctx->afu->fn->link, ctx->pasid, - current->mm->context.id, ctx->tidr, amr, current->mm, + mm->context.id, ctx->tidr, amr, mm, xsl_fault_error, ctx); if (rc) goto out; @@ -85,6 +87,7 @@ int ocxl_context_attach(struct ocxl_context *ctx, u64 amr) mutex_unlock(&ctx->status_mutex); return rc; } +EXPORT_SYMBOL_GPL(ocxl_context_attach); static vm_fault_t map_afu_irq(struct vm_area_struct *vma, unsigned long address, u64 offset, struct ocxl_context *ctx) @@ -243,6 +246,7 @@ int ocxl_context_detach(struct ocxl_context *ctx) } return 0; } +EXPORT_SYMBOL_GPL(ocxl_context_detach); void ocxl_context_detach_all(struct ocxl_afu *afu) { @@ -280,3 +284,4 @@ void ocxl_context_free(struct ocxl_context *ctx) ocxl_afu_put(ctx->afu); kfree(ctx); } +EXPORT_SYMBOL_GPL(ocxl_context_free); diff --git a/drivers/misc/ocxl/file.c b/drivers/misc/ocxl/file.c index e6e6121cd9a3..e51578186fd4 100644 --- a/drivers/misc/ocxl/file.c +++ b/drivers/misc/ocxl/file.c @@ -94,7 +94,7 @@ static long afu_ioctl_attach(struct ocxl_context *ctx, return -EINVAL; amr = arg.amr & mfspr(SPRN_UAMOR); - rc = ocxl_context_attach(ctx, amr); + rc = ocxl_context_attach(ctx, amr, current->mm); return rc; } diff --git a/drivers/misc/ocxl/ocxl_internal.h b/drivers/misc/ocxl/ocxl_internal.h index e04e547df29e..cda1e7667fc8 100644 --- a/drivers/misc/ocxl/ocxl_internal.h +++ b/drivers/misc/ocxl/ocxl_internal.h @@ -130,15 +130,9 @@ int ocxl_config_check_afu_index(struct pci_dev *dev, */ int ocxl_link_update_pe(void *link_handle, int pasid, __u16 tid); -struct ocxl_context *ocxl_context_alloc(void); -int ocxl_context_init(struct ocxl_context *ctx, struct ocxl_afu *afu, - struct address_space *mapping); -int ocxl_context_attach(struct ocxl_context *ctx, u64 amr); int ocxl_context_mmap(struct ocxl_context *ctx, struct vm_area_struct *vma); -int ocxl_context_detach(struct ocxl_context *ctx); void ocxl_context_detach_all(struct ocxl_afu *afu); -void ocxl_context_free(struct ocxl_context *ctx); int ocxl_sysfs_register_afu(struct ocxl_afu *afu); void ocxl_sysfs_unregister_afu(struct ocxl_afu *afu); diff --git a/include/misc/ocxl.h b/include/misc/ocxl.h index 8bafd748e380..a8fe0ce4ea67 100644 --- a/include/misc/ocxl.h +++ b/include/misc/ocxl.h @@ -116,6 +116,51 @@ const struct ocxl_fn_config *ocxl_function_config(struct ocxl_fn *fn); */ void ocxl_function_close(struct ocxl_fn *fn); +// Context allocation + +/** + * Allocate space for a new OpenCAPI context + * + * Returns NULL on failure + */ +struct ocxl_context *ocxl_context_alloc(void); + +/** + * Initialize an OpenCAPI context + * + * @ctx: The OpenCAPI context to initialize + * @afu: The AFU the context belongs to + * @mapping: The mapping to unmap when the context is closed (may be NULL) + */ +int ocxl_context_init(struct ocxl_context *ctx, struct ocxl_afu *afu, + struct address_space *mapping); + +/** + * Free an OpenCAPI context + * + *
Re: [PATCH v3 3/7] ocxl: Create a clear delineation between ocxl backend & frontend
This is a huge patch, there are probably ways to split it in smaller pieces to make the review easier. However, considering how much time we've already invested discussing and reviewing it, it's with by me to keep it as is. The ref-counting and device management look good to me now. A few details below. --- a/drivers/misc/ocxl/file.c +++ b/drivers/misc/ocxl/file.c @@ -17,12 +17,10 @@ static struct class *ocxl_class; static struct mutex minors_idr_lock; static struct idr minors_idr; -static struct ocxl_afu *find_and_get_afu(dev_t devno) +static struct ocxl_file_info *find_file_info(dev_t devno) { - struct ocxl_afu *afu; - int afu_minor; + struct ocxl_file_info *info; - afu_minor = MINOR(devno); /* * We don't declare an RCU critical section here, as our AFU * is protected by a reference counter on the device. By the time the @@ -30,56 +28,52 @@ static struct ocxl_afu *find_and_get_afu(dev_t devno) * the device is already at 0, so no user API will access that AFU and * this function can't return it. */ The comment is still true, but needs tuning. Something like: "We don't declare an RCU critical section here, as our info structure is protected by a reference counter on the device. By the time the info reference is removed from the idr, the ref count of the device is already at 0, so no user API will access the corresponding AFU and this function can't return it." - afu = idr_find(&minors_idr, afu_minor); - if (afu) - ocxl_afu_get(afu); - return afu; + info = idr_find(&minors_idr, MINOR(devno)); + return info; } -static int allocate_afu_minor(struct ocxl_afu *afu) +static int allocate_minor(struct ocxl_file_info *info) { int minor; mutex_lock(&minors_idr_lock); - minor = idr_alloc(&minors_idr, afu, 0, OCXL_NUM_MINORS, GFP_KERNEL); + minor = idr_alloc(&minors_idr, info, 0, OCXL_NUM_MINORS, GFP_KERNEL); mutex_unlock(&minors_idr_lock); return minor; } -static void free_afu_minor(struct ocxl_afu *afu) +static void free_minor(struct ocxl_file_info *info) { mutex_lock(&minors_idr_lock); - idr_remove(&minors_idr, MINOR(afu->dev.devt)); + idr_remove(&minors_idr, MINOR(info->dev.devt)); mutex_unlock(&minors_idr_lock); } static int afu_open(struct inode *inode, struct file *file) { - struct ocxl_afu *afu; + struct ocxl_file_info *info; struct ocxl_context *ctx; int rc; pr_debug("%s for device %x\n", __func__, inode->i_rdev); - afu = find_and_get_afu(inode->i_rdev); - if (!afu) + info = find_file_info(inode->i_rdev); + if (!info) return -ENODEV; ctx = ocxl_context_alloc(); if (!ctx) { rc = -ENOMEM; - goto put_afu; + goto err; } - rc = ocxl_context_init(ctx, afu, inode->i_mapping); + rc = ocxl_context_init(ctx, info->afu, inode->i_mapping); if (rc) - goto put_afu; + goto err; file->private_data = ctx; - ocxl_afu_put(afu); return 0; -put_afu: - ocxl_afu_put(afu); +err: return rc; The error path with goto is here useless. However, if ocxl_context_init() fails, the memory for the context is never released. You may consider either getting rid of ocxl_context_alloc(), which is just a simple wrapper around kzalloc(), or merging the allocation in ocxl_context_init(). It would impact the external API, but having 2 calls (alloc and init) feels like there's one too many. +static int ocxl_file_make_visible(struct ocxl_afu *afu) { int rc; + struct ocxl_file_info *info = ocxl_afu_get_private(afu); - cdev_init(&afu->cdev, &ocxl_afu_fops); - rc = cdev_add(&afu->cdev, afu->dev.devt, 1); + cdev_init(&info->cdev, &ocxl_afu_fops); + rc = cdev_add(&info->cdev, info->dev.devt, 1); if (rc) { - dev_err(&afu->dev, "Unable to add afu char device: %d\n", rc); + dev_err(&info->dev, "Unable to add afu char device: %d\n", rc); return rc; } + return 0; } -void ocxl_destroy_cdev(struct ocxl_afu *afu) +void ocxl_file_make_invisible(struct ocxl_afu *afu) This function is not called anywhere? -void ocxl_unregister_afu(struct ocxl_afu *afu) +void ocxl_file_unregister_afu(struct ocxl_afu *afu) { - free_afu_minor(afu); + struct ocxl_file_info *info = ocxl_afu_get_private(afu); + + if (!info) + return; + So that's likely where we miss the "make invisible" call. However, is it enough to rely on the private data to be set on the AFU? diff --git a/drivers/misc/ocxl/ocxl_internal.h b/drivers/misc/ocxl/ocxl_internal.h index 81086534dab5..e04e547df29e 100644 --- a/drivers/misc/ocxl/ocxl_internal.h +++ b/drivers/misc/ocxl/ocxl_intern
[PATCH 2/2] arch: add pidfd and io_uring syscalls everywhere
Add the io_uring and pidfd_send_signal system calls to all architectures. These system calls are designed to handle both native and compat tasks, so all entries are the same across architectures, only arm-compat and the generic tale still use an old format. Signed-off-by: Arnd Bergmann --- arch/alpha/kernel/syscalls/syscall.tbl | 4 arch/arm/tools/syscall.tbl | 4 arch/arm64/include/asm/unistd.h | 2 +- arch/arm64/include/asm/unistd32.h | 8 arch/ia64/kernel/syscalls/syscall.tbl | 4 arch/m68k/kernel/syscalls/syscall.tbl | 4 arch/microblaze/kernel/syscalls/syscall.tbl | 4 arch/mips/kernel/syscalls/syscall_n32.tbl | 4 arch/mips/kernel/syscalls/syscall_n64.tbl | 4 arch/mips/kernel/syscalls/syscall_o32.tbl | 4 arch/parisc/kernel/syscalls/syscall.tbl | 4 arch/powerpc/kernel/syscalls/syscall.tbl| 4 arch/s390/kernel/syscalls/syscall.tbl | 4 arch/sh/kernel/syscalls/syscall.tbl | 4 arch/sparc/kernel/syscalls/syscall.tbl | 4 arch/xtensa/kernel/syscalls/syscall.tbl | 4 16 files changed, 65 insertions(+), 1 deletion(-) diff --git a/arch/alpha/kernel/syscalls/syscall.tbl b/arch/alpha/kernel/syscalls/syscall.tbl index 63ed39cbd3bd..165f268beafc 100644 --- a/arch/alpha/kernel/syscalls/syscall.tbl +++ b/arch/alpha/kernel/syscalls/syscall.tbl @@ -463,3 +463,7 @@ 532common getppid sys_getppid # all other architectures have common numbers for new syscall, alpha # is the exception. +534common pidfd_send_signal sys_pidfd_send_signal +535common io_uring_setup sys_io_uring_setup +536common io_uring_enter sys_io_uring_enter +537common io_uring_register sys_io_uring_register diff --git a/arch/arm/tools/syscall.tbl b/arch/arm/tools/syscall.tbl index 9016f4081bb9..0393917eaa57 100644 --- a/arch/arm/tools/syscall.tbl +++ b/arch/arm/tools/syscall.tbl @@ -437,3 +437,7 @@ 421common rt_sigtimedwait_time64 sys_rt_sigtimedwait 422common futex_time64sys_futex 423common sched_rr_get_interval_time64sys_sched_rr_get_interval +424common pidfd_send_signal sys_pidfd_send_signal +425common io_uring_setup sys_io_uring_setup +426common io_uring_enter sys_io_uring_enter +427common io_uring_register sys_io_uring_register diff --git a/arch/arm64/include/asm/unistd.h b/arch/arm64/include/asm/unistd.h index 310d8f1cae7a..c6946fe640e6 100644 --- a/arch/arm64/include/asm/unistd.h +++ b/arch/arm64/include/asm/unistd.h @@ -49,7 +49,7 @@ #define __ARM_NR_compat_set_tls(__ARM_NR_COMPAT_BASE + 5) #define __ARM_NR_COMPAT_END(__ARM_NR_COMPAT_BASE + 0x800) -#define __NR_compat_syscalls 424 +#define __NR_compat_syscalls 428 #endif #define __ARCH_WANT_SYS_CLONE diff --git a/arch/arm64/include/asm/unistd32.h b/arch/arm64/include/asm/unistd32.h index 5590f2623690..23f1a44acada 100644 --- a/arch/arm64/include/asm/unistd32.h +++ b/arch/arm64/include/asm/unistd32.h @@ -866,6 +866,14 @@ __SYSCALL(__NR_rt_sigtimedwait_time64, compat_sys_rt_sigtimedwait_time64) __SYSCALL(__NR_futex_time64, sys_futex) #define __NR_sched_rr_get_interval_time64 423 __SYSCALL(__NR_sched_rr_get_interval_time64, sys_sched_rr_get_interval) +#define __NR_pidfd_send_signal 424 +__SYSCALL(__NR_pidfd_send_signal, sys_pidfd_send_signal) +#define __NR_io_uring_setup 425 +__SYSCALL(__NR_io_uring_setup, sys_io_uring_setup) +#define __NR_io_uring_enter 426 +__SYSCALL(__NR_io_uring_enter, sys_io_uring_enter) +#define __NR_io_uring_register 427 +__SYSCALL(__NR_io_uring_register, sys_io_uring_register) /* * Please add new compat syscalls above this comment and update diff --git a/arch/ia64/kernel/syscalls/syscall.tbl b/arch/ia64/kernel/syscalls/syscall.tbl index ab9cda5f6136..56e3d0b685e1 100644 --- a/arch/ia64/kernel/syscalls/syscall.tbl +++ b/arch/ia64/kernel/syscalls/syscall.tbl @@ -344,3 +344,7 @@ 332common pkey_free sys_pkey_free 333common rseqsys_rseq # 334 through 423 are reserved to sync up with other architectures +424common pidfd_send_signal sys_pidfd_send_signal +425common io_uring_setup sys_io_uring_setup +426common io_uring_enter sys_io_uring_enter +427common io_uring_register sys_io_uring_register diff --git a/arch/m68k/kernel/syscalls/syscall.tbl b/arch/m68k/kernel/syscalls/syscall.tbl index 125c14178979..df4ec3ec71d1 100644 --- a/arch/m68k/kernel/syscalls/syscall.tbl +++ b/arch/m68k/kernel/syscalls/syscall.tbl @@ -423,3 +423,7 @@ 421common rt_sigtimedwait_time64 sys_rt_sigtimedwait 422common futex_time64sys_futex 4
Re: [RESEND 1/7] mm/gup: Replace get_user_pages_longterm() with FOLL_LONGTERM
On Fri, Mar 22, 2019 at 02:24:40PM -0700, Dan Williams wrote: > On Sun, Mar 17, 2019 at 7:36 PM wrote: > > > > From: Ira Weiny > > > > Rather than have a separate get_user_pages_longterm() call, > > introduce FOLL_LONGTERM and change the longterm callers to use > > it. > > > > This patch does not change any functionality. > > > > FOLL_LONGTERM can only be supported with get_user_pages() as it > > requires vmas to determine if DAX is in use. > > > > CC: Aneesh Kumar K.V > > CC: Andrew Morton > > CC: Michal Hocko > > Signed-off-by: Ira Weiny > [..] > > diff --git a/include/linux/mm.h b/include/linux/mm.h > > index 2d483dbdffc0..6831077d126c 100644 > > --- a/include/linux/mm.h > > +++ b/include/linux/mm.h > [..] > > @@ -2609,6 +2596,7 @@ struct page *follow_page(struct vm_area_struct *vma, > > unsigned long address, > > #define FOLL_REMOTE0x2000 /* we are working on non-current tsk/mm */ > > #define FOLL_COW 0x4000 /* internal GUP flag */ > > #define FOLL_ANON 0x8000 /* don't do file mappings */ > > +#define FOLL_LONGTERM 0x1 /* mapping is intended for a long term pin > > */ > > Let's change this comment to say something like /* mapping lifetime is > indefinite / at the discretion of userspace */, since "longterm is not > well defined. > > I think it should also include a /* FIXME: */ to say something about > the havoc a long term pin might wreak on fs and mm code paths. Will do. > > > static inline int vm_fault_to_errno(vm_fault_t vm_fault, int foll_flags) > > { > > diff --git a/mm/gup.c b/mm/gup.c > > index f84e22685aaa..8cb4cff067bc 100644 > > --- a/mm/gup.c > > +++ b/mm/gup.c > > @@ -1112,26 +1112,7 @@ long get_user_pages_remote(struct task_struct *tsk, > > struct mm_struct *mm, > > } > > EXPORT_SYMBOL(get_user_pages_remote); > > > > -/* > > - * This is the same as get_user_pages_remote(), just with a > > - * less-flexible calling convention where we assume that the task > > - * and mm being operated on are the current task's and don't allow > > - * passing of a locked parameter. We also obviously don't pass > > - * FOLL_REMOTE in here. > > - */ > > -long get_user_pages(unsigned long start, unsigned long nr_pages, > > - unsigned int gup_flags, struct page **pages, > > - struct vm_area_struct **vmas) > > -{ > > - return __get_user_pages_locked(current, current->mm, start, > > nr_pages, > > - pages, vmas, NULL, > > - gup_flags | FOLL_TOUCH); > > -} > > -EXPORT_SYMBOL(get_user_pages); > > - > > #if defined(CONFIG_FS_DAX) || defined (CONFIG_CMA) > > - > > -#ifdef CONFIG_FS_DAX > > static bool check_dax_vmas(struct vm_area_struct **vmas, long nr_pages) > > { > > long i; > > @@ -1150,12 +1131,6 @@ static bool check_dax_vmas(struct vm_area_struct > > **vmas, long nr_pages) > > } > > return false; > > } > > -#else > > -static inline bool check_dax_vmas(struct vm_area_struct **vmas, long > > nr_pages) > > -{ > > - return false; > > -} > > -#endif > > > > #ifdef CONFIG_CMA > > static struct page *new_non_cma_page(struct page *page, unsigned long > > private) > > @@ -1209,10 +1184,13 @@ static struct page *new_non_cma_page(struct page > > *page, unsigned long private) > > return __alloc_pages_node(nid, gfp_mask, 0); > > } > > > > -static long check_and_migrate_cma_pages(unsigned long start, long nr_pages, > > - unsigned int gup_flags, > > +static long check_and_migrate_cma_pages(struct task_struct *tsk, > > + struct mm_struct *mm, > > + unsigned long start, > > + unsigned long nr_pages, > > struct page **pages, > > - struct vm_area_struct **vmas) > > + struct vm_area_struct **vmas, > > + unsigned int gup_flags) > > { > > long i; > > bool drain_allow = true; > > @@ -1268,10 +1246,14 @@ static long check_and_migrate_cma_pages(unsigned > > long start, long nr_pages, > > putback_movable_pages(&cma_page_list); > > } > > /* > > -* We did migrate all the pages, Try to get the page > > references again > > -* migrating any new CMA pages which we failed to isolate > > earlier. > > +* We did migrate all the pages, Try to get the page > > references > > +* again migrating any new CMA pages which we failed to > > isolate > > +* earlier. > > */ > > - nr_pages = get_user_pages(start, nr_pages, gup_flags, > > pages, vmas); > > + nr_pages = __get_user_pages_locked(tsk, mm, start, nr_pages, > > +
Re: [PATCH v3] powerpc/64: Fix memcmp reading past the end of src/dest
Segher Boessenkool writes: > On Fri, Mar 22, 2019 at 11:37:24PM +1100, Michael Ellerman wrote: >> .Lcmp_rest_lt8bytes: >> -/* Here we have only less than 8 bytes to compare with. at least s1 >> - * Address is aligned with 8 bytes. >> - * The next double words are load and shift right with appropriate >> - * bits. >> +/* >> + * Here we have less than 8 bytes to compare. At least s1 is aligned to >> + * 8 bytes, but s2 may not be. We must make sure s2 + 8 doesn't cross a > > "s2 + 7"? The code is fine though (bgt, not bge). Duh, thanks for catching it. >> + * page boundary, otherwise we might read past the end of the buffer and >> + * trigger a page fault. We use 4K as the conservative minimum page >> + * size. If we detect that case we go to the byte-by-byte loop. >> + * >> + * Otherwise the next double word is loaded from s1 and s2, and shifted >> + * right to compare the appropriate bits. >> */ >> +clrldi r6,r4,(64-12) // r6 = r4 & 0xfff > > You can just write > rlwinm r6,r4,0,0x0fff > if that is clearer? Or do you still want a comment with that :-) I don't think it's clearer doing a rotate of zero bits :) And yeah I'd probably still leave the comment, so I'm inclined to stick with the clrldi? Would be nice if the assembler could support: andir6, r4, 0x0fff And turn it into the rlwinm, or rldicl :) >> +cmpdi r6,0xff8 >> +bgt .Lshort > > Reviewed-by: Segher Boessenkool I'll fixup the comment. Thanks. cheers
PowerPC fixes 5.1-3: CONFIG_SPARSEMEM doesn't exist in the kernel source code
I was able to activate CONFIG_SPARSEMEM in the kernel configuration. But does the P.A. Semi Nemo board need this option? -- Christian On 25 March 2019 at 12:00PM, Christian Zigotzky wrote: Hi All, I wasn't able to compile the RC2 today because of the following error messages: CC arch/powerpc/mm/slb.o In file included from ./arch/powerpc/include/asm/book3s/64/mmu.h:39:0, from ./arch/powerpc/include/asm/mmu.h:360, from ./arch/powerpc/include/asm/lppaca.h:36, from ./arch/powerpc/include/asm/paca.h:21, from ./arch/powerpc/include/asm/current.h:16, from ./include/linux/thread_info.h:21, from ./include/asm-generic/preempt.h:5, from ./arch/powerpc/include/generated/asm/preempt.h:1, from ./include/linux/preempt.h:78, from ./include/linux/spinlock.h:51, from ./include/linux/mmzone.h:8, from ./include/linux/gfp.h:6, from ./include/linux/mm.h:10, from ./arch/powerpc/include/asm/cacheflush.h:12, from ./arch/powerpc/include/asm/asm-prototypes.h:16, from arch/powerpc/mm/slb.c:17: ./arch/powerpc/include/asm/book3s/64/mmu-hash.h:584:6: warning: "MAX_PHYSMEM_BITS" is not defined [-Wundef] #if (MAX_PHYSMEM_BITS > MAX_EA_BITS_PER_CONTEXT) ^ arch/powerpc/mm/slb.c: In function 'slb_allocate_kernel': arch/powerpc/mm/slb.c:697:37: error: 'MAX_PHYSMEM_BITS' undeclared (first use in this function) if ((ea & ~REGION_MASK) > (1UL << MAX_PHYSMEM_BITS)) ^ arch/powerpc/mm/slb.c:697:37: note: each undeclared identifier is reported only once for each function it appears in scripts/Makefile.build:278: recipe for target 'arch/powerpc/mm/slb.o' failed make[3]: *** [arch/powerpc/mm/slb.o] Error 1 scripts/Makefile.build:489: recipe for target 'arch/powerpc/mm' failed make[2]: *** [arch/powerpc/mm] Error 2 /home/christian/Downloads/a/Makefile:1046: recipe for target 'arch/powerpc' failed make[1]: *** [arch/powerpc] Error 2 Makefile:170: recipe for target 'sub-make' failed make: *** [sub-make] Error 2 - The variable MAX_PHYSMEM_BITS isn't defined. The problem is in the last PowerPC fixes 5.1-3: https://git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git/commit/?h=v5.1-rc2&id=a5ed1e96cafde5ba48638f486bfca0685dc6ddc9 Commit log: powerpc/mm: Only define MAX_PHYSMEM_BITS in SPARSEMEM configurations Problematic fix: diff --git a/arch/powerpc/include/asm/mmu.h b/arch/powerpc/include/asm/mmu.h index d34ad1657d7b..598cdcdd1355 100644 --- a/arch/powerpc/include/asm/mmu.h +++ b/arch/powerpc/include/asm/mmu.h @@ -352,7 +352,7 @@ static inline bool strict_kernel_rwx_enabled(void) #if defined(CONFIG_SPARSEMEM_VMEMMAP) && defined(CONFIG_SPARSEMEM_EXTREME) && \ defined (CONFIG_PPC_64K_PAGES) #define MAX_PHYSMEM_BITS 51 -#else +#elif defined(CONFIG_SPARSEMEM) #define MAX_PHYSMEM_BITS 46 #endif The first if statement isn't successfull because the variables CONFIG_SPARSEMEM_EXTREME and CONFIG_PPC_64K_PAGES aren't activated in our kernel configuration. Therefore we need MAX_PHYSMEM_BITS 46 for our Nemo board. Unfortunately CONFIG_SPARSEMEM doesn't exist in the kernel source code so we can't activate it in the kernel config. I replaced '#elif defined(CONFIG_SPARSEMEM)' with '#else' and after that the compiling of the RC2 works again. Please check if CONFIG_SPARSEMEM exists. Thanks, Christian
Re: powerpc/mm: Only define MAX_PHYSMEM_BITS in SPARSEMEM configurations
Le 25/03/2019 à 12:35, Michael Ellerman a écrit : Ben Hutchings writes: On Mon, 2019-03-25 at 01:03 +0100, Andreas Schwab wrote: On Mär 24 2019, Ben Hutchings wrote: Presumably you have CONFIG_PPC_BOOK3S_64 enabled and CONFIG_SPARSEMEM disabled? Was this configuration actually usable? Why not? I assume that CONFIG_SPARSEMEM is the default for a good reason. What I don't know is how strong that reason is (I am not a Power expert at all). Looking a bit further, it seems to be related to CONFIG_NUMA in that you can enable CONFIG_FLATMEM if and only if that's disabled. So I suppose the configuration you used works for non-NUMA systems. Aneesh pointed out this fix would break FLATMEM after I'd merged it, but it didn't break any of our defconfigs so I wondered if anyone would notice. I checked today and a G5 will boot with FLATMEM, which I assume is what Andreas is using. I guess we should fix this build break for now. Even some G5's have discontiguous memory, so FLATMEM is not clearly a good choice even for all G5's, and actually a fresh g5_defconfig uses SPARSEMEM. So I'm inclined to just switch to always using SPARSEMEM on 64-bit Book3S, because that's what's well tested and we hardly need more code paths to test. Unless anyone has a strong objection, I haven't actually benchmarked FLATMEM vs SPARSEMEM on a G5. Christian Zigotzky reported a build failure with this patch. Christophe
Re: powerpc/mm: Only define MAX_PHYSMEM_BITS in SPARSEMEM configurations
Ben Hutchings writes: > On Mon, 2019-03-25 at 01:03 +0100, Andreas Schwab wrote: >> On Mär 24 2019, Ben Hutchings wrote: >> >> > Presumably you have CONFIG_PPC_BOOK3S_64 enabled and >> > CONFIG_SPARSEMEM >> > disabled? Was this configuration actually usable? >> >> Why not? > > I assume that CONFIG_SPARSEMEM is the default for a good reason. > What I don't know is how strong that reason is (I am not a Power expert > at all). Looking a bit further, it seems to be related to CONFIG_NUMA > in that you can enable CONFIG_FLATMEM if and only if that's disabled. > So I suppose the configuration you used works for non-NUMA systems. Aneesh pointed out this fix would break FLATMEM after I'd merged it, but it didn't break any of our defconfigs so I wondered if anyone would notice. I checked today and a G5 will boot with FLATMEM, which I assume is what Andreas is using. I guess we should fix this build break for now. Even some G5's have discontiguous memory, so FLATMEM is not clearly a good choice even for all G5's, and actually a fresh g5_defconfig uses SPARSEMEM. So I'm inclined to just switch to always using SPARSEMEM on 64-bit Book3S, because that's what's well tested and we hardly need more code paths to test. Unless anyone has a strong objection, I haven't actually benchmarked FLATMEM vs SPARSEMEM on a G5. cheers
PowerPC fixes 5.1-3: CONFIG_SPARSEMEM doesn't exist in the kernel source code
Hi All, I wasn't able to compile the RC2 today because of the following error messages: CC arch/powerpc/mm/slb.o In file included from ./arch/powerpc/include/asm/book3s/64/mmu.h:39:0, from ./arch/powerpc/include/asm/mmu.h:360, from ./arch/powerpc/include/asm/lppaca.h:36, from ./arch/powerpc/include/asm/paca.h:21, from ./arch/powerpc/include/asm/current.h:16, from ./include/linux/thread_info.h:21, from ./include/asm-generic/preempt.h:5, from ./arch/powerpc/include/generated/asm/preempt.h:1, from ./include/linux/preempt.h:78, from ./include/linux/spinlock.h:51, from ./include/linux/mmzone.h:8, from ./include/linux/gfp.h:6, from ./include/linux/mm.h:10, from ./arch/powerpc/include/asm/cacheflush.h:12, from ./arch/powerpc/include/asm/asm-prototypes.h:16, from arch/powerpc/mm/slb.c:17: ./arch/powerpc/include/asm/book3s/64/mmu-hash.h:584:6: warning: "MAX_PHYSMEM_BITS" is not defined [-Wundef] #if (MAX_PHYSMEM_BITS > MAX_EA_BITS_PER_CONTEXT) ^ arch/powerpc/mm/slb.c: In function 'slb_allocate_kernel': arch/powerpc/mm/slb.c:697:37: error: 'MAX_PHYSMEM_BITS' undeclared (first use in this function) if ((ea & ~REGION_MASK) > (1UL << MAX_PHYSMEM_BITS)) ^ arch/powerpc/mm/slb.c:697:37: note: each undeclared identifier is reported only once for each function it appears in scripts/Makefile.build:278: recipe for target 'arch/powerpc/mm/slb.o' failed make[3]: *** [arch/powerpc/mm/slb.o] Error 1 scripts/Makefile.build:489: recipe for target 'arch/powerpc/mm' failed make[2]: *** [arch/powerpc/mm] Error 2 /home/christian/Downloads/a/Makefile:1046: recipe for target 'arch/powerpc' failed make[1]: *** [arch/powerpc] Error 2 Makefile:170: recipe for target 'sub-make' failed make: *** [sub-make] Error 2 - The variable MAX_PHYSMEM_BITS isn't defined. The problem is in the last PowerPC fixes 5.1-3: https://git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git/commit/?h=v5.1-rc2&id=a5ed1e96cafde5ba48638f486bfca0685dc6ddc9 Commit log: powerpc/mm: Only define MAX_PHYSMEM_BITS in SPARSEMEM configurations Problematic fix: diff --git a/arch/powerpc/include/asm/mmu.h b/arch/powerpc/include/asm/mmu.h index d34ad1657d7b..598cdcdd1355 100644 --- a/arch/powerpc/include/asm/mmu.h +++ b/arch/powerpc/include/asm/mmu.h @@ -352,7 +352,7 @@ static inline bool strict_kernel_rwx_enabled(void) #if defined(CONFIG_SPARSEMEM_VMEMMAP) && defined(CONFIG_SPARSEMEM_EXTREME) && \ defined (CONFIG_PPC_64K_PAGES) #define MAX_PHYSMEM_BITS 51 -#else +#elif defined(CONFIG_SPARSEMEM) #define MAX_PHYSMEM_BITS 46 #endif The first if statement isn't successfull because the variables CONFIG_SPARSEMEM_EXTREME and CONFIG_PPC_64K_PAGES aren't activated in our kernel configuration. Therefore we need MAX_PHYSMEM_BITS 46 for our Nemo board. Unfortunately CONFIG_SPARSEMEM doesn't exist in the kernel source code so we can't activate it in the kernel config. I replaced '#elif defined(CONFIG_SPARSEMEM)' with '#else' and after that the compiling of the RC2 works again. Please check if CONFIG_SPARSEMEM exists. Thanks, Christian
Re: [PATCH v2] powerpc/8xx: fix possible object reference leak
Peng Hao writes: > From: Wen Yang > > The call to of_find_compatible_node returns a node pointer with refcount > incremented thus it must be explicitly decremented after the last > usage. > irq_domain_add_linear also calls of_node_get to increase refcount, > so irq_domain will not be affected when it is released. > > Detected by coccinelle with the following warnings: > ./arch/powerpc/platforms/8xx/pic.c:158:1-7: ERROR: missing of_node_put; > acquired a node pointer with refcount incremented on line 136, but without a > corresponding object release within this function. > > Fixes: a8db8cf0d894 ("irq_domain: Replace irq_alloc_host() with > revmap-specific initializers") > Signed-off-by: Wen Yang > Suggested-by: Christophe Leroy > Reviewed-by: Peng Hao > Cc: Vitaly Bordug > Cc: Benjamin Herrenschmidt > Cc: Paul Mackerras > Cc: Michael Ellerman > Cc: linuxppc-dev@lists.ozlabs.org > Cc: linux-ker...@vger.kernel.org > --- > arch/powerpc/platforms/8xx/pic.c | 2 -- > 1 file changed, 2 deletions(-) > > diff --git a/arch/powerpc/platforms/8xx/pic.c > b/arch/powerpc/platforms/8xx/pic.c > index 8d5a25d..4453df6 100644 > --- a/arch/powerpc/platforms/8xx/pic.c > +++ b/arch/powerpc/platforms/8xx/pic.c > @@ -153,9 +153,7 @@ int mpc8xx_pic_init(void) > if (mpc8xx_pic_host == NULL) { > printk(KERN_ERR "MPC8xx PIC: failed to allocate irq host!\n"); > ret = -ENOMEM; > - goto out; > } > - return 0; It's fragile to rely on ret being equal to zero here. If the code further up is changed it's easy enough to miss that ret is expected to be zero. Better to set it to zero here explicitly, this is the success path after all, eg: ret = 0; > out: > of_node_put(np); cheers
Re: [PATCH] compiler: allow all arches to enable CONFIG_OPTIMIZE_INLINING
On Mon, Mar 25, 2019 at 8:55 AM Masahiro Yamada wrote: > > On Mon, Mar 25, 2019 at 4:33 PM Arnd Bergmann wrote: > > > > On Mon, Mar 25, 2019 at 7:11 AM Masahiro Yamada > > wrote: > > > I do not know why to reproduce it, > > > but is "__init __noreturn" more sensible than > > > "__always_inline" here? > > > > It's in a header file, so it has to be 'inline'. We could make it > > static inline __init __noreturn, > > Yes, I like 'static inline __init __noreturn' > > > but I don't see an advantage over > > __always_inline there. > > __always_inline takes away the compiler's freedom. > > I'd like to leave it up to the compiler where possible. Ok, fair enough. Arnd
Re: [PATCH] compiler: allow all arches to enable CONFIG_OPTIMIZE_INLINING
On Wed, Mar 20, 2019 at 2:34 PM Arnd Bergmann wrote: > > On Wed, Mar 20, 2019 at 10:41 AM Arnd Bergmann wrote: > > > > I've added your patch to my randconfig test setup and will let you > > know if I see anything noticeable. I'm currently testing clang-arm32, > > clang-arm64 and gcc-x86. > > This is the only additional bug that has come up so far: > > `.exit.text' referenced in section `.alt.smp.init' of > drivers/char/ipmi/ipmi_msghandler.o: defined in discarded section > `exit.text' of drivers/char/ipmi/ipmi_msghandler.o FWIW, the message up there does not match the patch anyway, that was an unrelated bug. Arnd
Re: [PATCH v3 2/7] ocxl: Don't pass pci_dev around
Le 25/03/2019 à 06:44, Alastair D'Silva a écrit : From: Alastair D'Silva This data is already available in a struct Signed-off-by: Alastair D'Silva --- Acked-by: Frederic Barrat drivers/misc/ocxl/core.c | 38 +- 1 file changed, 21 insertions(+), 17 deletions(-) diff --git a/drivers/misc/ocxl/core.c b/drivers/misc/ocxl/core.c index 1a4411b72d35..2f2fe12eac1e 100644 --- a/drivers/misc/ocxl/core.c +++ b/drivers/misc/ocxl/core.c @@ -66,10 +66,11 @@ static int set_afu_device(struct ocxl_afu *afu, const char *location) return rc; } -static int assign_afu_actag(struct ocxl_afu *afu, struct pci_dev *dev) +static int assign_afu_actag(struct ocxl_afu *afu) { struct ocxl_fn *fn = afu->fn; int actag_count, actag_offset; + struct pci_dev *pci_dev = to_pci_dev(fn->dev.parent); /* * if there were not enough actags for the function, each afu @@ -79,16 +80,16 @@ static int assign_afu_actag(struct ocxl_afu *afu, struct pci_dev *dev) fn->actag_enabled / fn->actag_supported; actag_offset = ocxl_actag_afu_alloc(fn, actag_count); if (actag_offset < 0) { - dev_err(&afu->dev, "Can't allocate %d actags for AFU: %d\n", + dev_err(&pci_dev->dev, "Can't allocate %d actags for AFU: %d\n", actag_count, actag_offset); return actag_offset; } afu->actag_base = fn->actag_base + actag_offset; afu->actag_enabled = actag_count; - ocxl_config_set_afu_actag(dev, afu->config.dvsec_afu_control_pos, + ocxl_config_set_afu_actag(pci_dev, afu->config.dvsec_afu_control_pos, afu->actag_base, afu->actag_enabled); - dev_dbg(&afu->dev, "actag base=%d enabled=%d\n", + dev_dbg(&pci_dev->dev, "actag base=%d enabled=%d\n", afu->actag_base, afu->actag_enabled); return 0; } @@ -103,10 +104,11 @@ static void reclaim_afu_actag(struct ocxl_afu *afu) ocxl_actag_afu_free(afu->fn, start_offset, size); } -static int assign_afu_pasid(struct ocxl_afu *afu, struct pci_dev *dev) +static int assign_afu_pasid(struct ocxl_afu *afu) { struct ocxl_fn *fn = afu->fn; int pasid_count, pasid_offset; + struct pci_dev *pci_dev = to_pci_dev(fn->dev.parent); /* * We only support the case where the function configuration @@ -115,7 +117,7 @@ static int assign_afu_pasid(struct ocxl_afu *afu, struct pci_dev *dev) pasid_count = 1 << afu->config.pasid_supported_log; pasid_offset = ocxl_pasid_afu_alloc(fn, pasid_count); if (pasid_offset < 0) { - dev_err(&afu->dev, "Can't allocate %d PASIDs for AFU: %d\n", + dev_err(&pci_dev->dev, "Can't allocate %d PASIDs for AFU: %d\n", pasid_count, pasid_offset); return pasid_offset; } @@ -123,10 +125,10 @@ static int assign_afu_pasid(struct ocxl_afu *afu, struct pci_dev *dev) afu->pasid_count = 0; afu->pasid_max = pasid_count; - ocxl_config_set_afu_pasid(dev, afu->config.dvsec_afu_control_pos, + ocxl_config_set_afu_pasid(pci_dev, afu->config.dvsec_afu_control_pos, afu->pasid_base, afu->config.pasid_supported_log); - dev_dbg(&afu->dev, "PASID base=%d, enabled=%d\n", + dev_dbg(&pci_dev->dev, "PASID base=%d, enabled=%d\n", afu->pasid_base, pasid_count); return 0; } @@ -172,9 +174,10 @@ static void release_fn_bar(struct ocxl_fn *fn, int bar) WARN_ON(fn->bar_used[idx] < 0); } -static int map_mmio_areas(struct ocxl_afu *afu, struct pci_dev *dev) +static int map_mmio_areas(struct ocxl_afu *afu) { int rc; + struct pci_dev *pci_dev = to_pci_dev(afu->fn->dev.parent); rc = reserve_fn_bar(afu->fn, afu->config.global_mmio_bar); if (rc) @@ -187,10 +190,10 @@ static int map_mmio_areas(struct ocxl_afu *afu, struct pci_dev *dev) } afu->global_mmio_start = - pci_resource_start(dev, afu->config.global_mmio_bar) + + pci_resource_start(pci_dev, afu->config.global_mmio_bar) + afu->config.global_mmio_offset; afu->pp_mmio_start = - pci_resource_start(dev, afu->config.pp_mmio_bar) + + pci_resource_start(pci_dev, afu->config.pp_mmio_bar) + afu->config.pp_mmio_offset; afu->global_mmio_ptr = ioremap(afu->global_mmio_start, @@ -198,7 +201,7 @@ static int map_mmio_areas(struct ocxl_afu *afu, struct pci_dev *dev) if (!afu->global_mmio_ptr) { release_fn_bar(afu->fn, afu->config.pp_mmio_bar); release_fn_bar(afu->fn, afu->config.global_mmio_bar); - dev_err(&dev->dev, "Error mapping global mmio area\n"); + dev_err(&pci_dev->dev, "Error mapping global mmio area\n");
Re: [PATCH v3 1/7] ocxl: Split pci.c
Le 25/03/2019 à 06:44, Alastair D'Silva a écrit : From: Alastair D'Silva In preparation for making core code available for external drivers, move the core code out of pci.c and into core.c Signed-off-by: Alastair D'Silva --- Acked-by: Frederic Barrat drivers/misc/ocxl/Makefile| 1 + drivers/misc/ocxl/core.c | 517 + drivers/misc/ocxl/ocxl_internal.h | 5 + drivers/misc/ocxl/pci.c | 519 +- 4 files changed, 524 insertions(+), 518 deletions(-) create mode 100644 drivers/misc/ocxl/core.c diff --git a/drivers/misc/ocxl/Makefile b/drivers/misc/ocxl/Makefile index 5229dcda8297..bc4e39bfda7b 100644 --- a/drivers/misc/ocxl/Makefile +++ b/drivers/misc/ocxl/Makefile @@ -3,6 +3,7 @@ ccflags-$(CONFIG_PPC_WERROR)+= -Werror ocxl-y+= main.o pci.o config.o file.o pasid.o ocxl-y+= link.o context.o afu_irq.o sysfs.o trace.o +ocxl-y += core.o obj-$(CONFIG_OCXL)+= ocxl.o # For tracepoints to include our trace.h from tracepoint infrastructure: diff --git a/drivers/misc/ocxl/core.c b/drivers/misc/ocxl/core.c new file mode 100644 index ..1a4411b72d35 --- /dev/null +++ b/drivers/misc/ocxl/core.c @@ -0,0 +1,517 @@ +// SPDX-License-Identifier: GPL-2.0+ +// Copyright 2019 IBM Corp. +#include +#include "ocxl_internal.h" + +static struct ocxl_fn *ocxl_fn_get(struct ocxl_fn *fn) +{ + return (get_device(&fn->dev) == NULL) ? NULL : fn; +} + +static void ocxl_fn_put(struct ocxl_fn *fn) +{ + put_device(&fn->dev); +} + +struct ocxl_afu *ocxl_afu_get(struct ocxl_afu *afu) +{ + return (get_device(&afu->dev) == NULL) ? NULL : afu; +} + +void ocxl_afu_put(struct ocxl_afu *afu) +{ + put_device(&afu->dev); +} + +static struct ocxl_afu *alloc_afu(struct ocxl_fn *fn) +{ + struct ocxl_afu *afu; + + afu = kzalloc(sizeof(struct ocxl_afu), GFP_KERNEL); + if (!afu) + return NULL; + + mutex_init(&afu->contexts_lock); + mutex_init(&afu->afu_control_lock); + idr_init(&afu->contexts_idr); + afu->fn = fn; + ocxl_fn_get(fn); + return afu; +} + +static void free_afu(struct ocxl_afu *afu) +{ + idr_destroy(&afu->contexts_idr); + ocxl_fn_put(afu->fn); + kfree(afu); +} + +static void free_afu_dev(struct device *dev) +{ + struct ocxl_afu *afu = to_ocxl_afu(dev); + + ocxl_unregister_afu(afu); + free_afu(afu); +} + +static int set_afu_device(struct ocxl_afu *afu, const char *location) +{ + struct ocxl_fn *fn = afu->fn; + int rc; + + afu->dev.parent = &fn->dev; + afu->dev.release = free_afu_dev; + rc = dev_set_name(&afu->dev, "%s.%s.%hhu", afu->config.name, location, + afu->config.idx); + return rc; +} + +static int assign_afu_actag(struct ocxl_afu *afu, struct pci_dev *dev) +{ + struct ocxl_fn *fn = afu->fn; + int actag_count, actag_offset; + + /* +* if there were not enough actags for the function, each afu +* reduces its count as well +*/ + actag_count = afu->config.actag_supported * + fn->actag_enabled / fn->actag_supported; + actag_offset = ocxl_actag_afu_alloc(fn, actag_count); + if (actag_offset < 0) { + dev_err(&afu->dev, "Can't allocate %d actags for AFU: %d\n", + actag_count, actag_offset); + return actag_offset; + } + afu->actag_base = fn->actag_base + actag_offset; + afu->actag_enabled = actag_count; + + ocxl_config_set_afu_actag(dev, afu->config.dvsec_afu_control_pos, + afu->actag_base, afu->actag_enabled); + dev_dbg(&afu->dev, "actag base=%d enabled=%d\n", + afu->actag_base, afu->actag_enabled); + return 0; +} + +static void reclaim_afu_actag(struct ocxl_afu *afu) +{ + struct ocxl_fn *fn = afu->fn; + int start_offset, size; + + start_offset = afu->actag_base - fn->actag_base; + size = afu->actag_enabled; + ocxl_actag_afu_free(afu->fn, start_offset, size); +} + +static int assign_afu_pasid(struct ocxl_afu *afu, struct pci_dev *dev) +{ + struct ocxl_fn *fn = afu->fn; + int pasid_count, pasid_offset; + + /* +* We only support the case where the function configuration +* requested enough PASIDs to cover all AFUs. +*/ + pasid_count = 1 << afu->config.pasid_supported_log; + pasid_offset = ocxl_pasid_afu_alloc(fn, pasid_count); + if (pasid_offset < 0) { + dev_err(&afu->dev, "Can't allocate %d PASIDs for AFU: %d\n", + pasid_count, pasid_offset); + return pasid_offset; + } + afu->pasid_base = fn->pasid_base + pasid_offset; + afu->pasid_count = 0; + afu->pasid_max = pasid_count; + + ocxl_config_set_afu_pasid(d
[PATCH v2] powerpc/8xx: fix possible object reference leak
From: Wen Yang The call to of_find_compatible_node returns a node pointer with refcount incremented thus it must be explicitly decremented after the last usage. irq_domain_add_linear also calls of_node_get to increase refcount, so irq_domain will not be affected when it is released. Detected by coccinelle with the following warnings: ./arch/powerpc/platforms/8xx/pic.c:158:1-7: ERROR: missing of_node_put; acquired a node pointer with refcount incremented on line 136, but without a corresponding object release within this function. Fixes: a8db8cf0d894 ("irq_domain: Replace irq_alloc_host() with revmap-specific initializers") Signed-off-by: Wen Yang Suggested-by: Christophe Leroy Reviewed-by: Peng Hao Cc: Vitaly Bordug Cc: Benjamin Herrenschmidt Cc: Paul Mackerras Cc: Michael Ellerman Cc: linuxppc-dev@lists.ozlabs.org Cc: linux-ker...@vger.kernel.org --- arch/powerpc/platforms/8xx/pic.c | 2 -- 1 file changed, 2 deletions(-) diff --git a/arch/powerpc/platforms/8xx/pic.c b/arch/powerpc/platforms/8xx/pic.c index 8d5a25d..4453df6 100644 --- a/arch/powerpc/platforms/8xx/pic.c +++ b/arch/powerpc/platforms/8xx/pic.c @@ -153,9 +153,7 @@ int mpc8xx_pic_init(void) if (mpc8xx_pic_host == NULL) { printk(KERN_ERR "MPC8xx PIC: failed to allocate irq host!\n"); ret = -ENOMEM; - goto out; } - return 0; out: of_node_put(np); -- 2.9.5
Re: powerpc/mm: Only define MAX_PHYSMEM_BITS in SPARSEMEM configurations
Hi, Ben > Presumably you have CONFIG_PPC_BOOK3S_64 enabled and CONFIG_SPARSEMEM > disabled? > Was this configuration actually usable? I just noticed these build warnings too. FWIW, I've been using CONFIG_FLATMEM on my Power Mac G5 for about three years, so I guess the answer is "yes" (for this specific machine, at least).
Re: [PATCH v2] powerpc/8xx: fix possible object reference leak
Le 25/03/2019 à 18:11, Peng Hao a écrit : From: Wen Yang The call to of_find_compatible_node returns a node pointer with refcount incremented thus it must be explicitly decremented after the last usage. irq_domain_add_linear also calls of_node_get to increase refcount, so irq_domain will not be affected when it is released. Detected by coccinelle with the following warnings: ./arch/powerpc/platforms/8xx/pic.c:158:1-7: ERROR: missing of_node_put; acquired a node pointer with refcount incremented on line 136, but without a corresponding object release within this function. Fixes: a8db8cf0d894 ("irq_domain: Replace irq_alloc_host() with revmap-specific initializers") Signed-off-by: Wen Yang Suggested-by: Christophe Leroy Reviewed-by: Peng Hao Reviewed-by: Christophe Leroy Cc: Vitaly Bordug Cc: Benjamin Herrenschmidt Cc: Paul Mackerras Cc: Michael Ellerman Cc: linuxppc-dev@lists.ozlabs.org Cc: linux-ker...@vger.kernel.org --- arch/powerpc/platforms/8xx/pic.c | 2 -- 1 file changed, 2 deletions(-) diff --git a/arch/powerpc/platforms/8xx/pic.c b/arch/powerpc/platforms/8xx/pic.c index 8d5a25d..4453df6 100644 --- a/arch/powerpc/platforms/8xx/pic.c +++ b/arch/powerpc/platforms/8xx/pic.c @@ -153,9 +153,7 @@ int mpc8xx_pic_init(void) if (mpc8xx_pic_host == NULL) { printk(KERN_ERR "MPC8xx PIC: failed to allocate irq host!\n"); ret = -ENOMEM; - goto out; } - return 0; out: of_node_put(np);
Re: Regression in 5.1.0-rc2: PowerBook G4 Aluminum fails to boot - bisected to commit 0df977eafc792
Le 25/03/2019 à 01:49, Larry Finger a écrit : A build of kernel 5.1.0-rc2 resulted in a failure to boot on my PowerBook G4 Aluminum. The bootstrap loads the initial kernel and issues the appropriate start, but the machine hangs at that point. The problem does not depend on the choice of PPC32 processor type. This machine has a 7447A according to /proc/cpuinfo. The problem was bisected to the following: commit 0df977eafc792a5365a7f81d8d5920132e03afad Author: Christophe Leroy Date: Thu Feb 21 10:37:54 2019 + powerpc/6xx: Don't use SPRN_SPRG2 for storing stack pointer while in RTAS When calling RTAS, the stack pointer is stored in SPRN_SPRG2 in order to be able to restore it in case of machine check in RTAS. As machine check is not a perfomance critical path, this patch frees SPRN_SPRG2 by using a field in thread struct instead. Signed-off-by: Christophe Leroy Signed-off-by: Michael Ellerman I reverted this patch and found that the system began execution, and then failed, likely due to the reassignment of SPRN_SPRG2. I had found this problem with 5.1.0-rc1, but -rc2 was out by the time I finished the bisection. Unfortunately, none of the changes in -rc2 fixed the problem. I think I identified the issue, see https://patchwork.ozlabs.org/patch/1063962/ It is now booting properly under QEMU with your .config Could you please test it on your target ? Thanks Christophe Attached is the .config that I used. Thanks, Larry
[PATCH] powerpc/rtas: fix early boot failure.
Commit 0df977eafc79 ("powerpc/6xx: Don't use SPRN_SPRG2 for storing stack pointer while in RTAS") changes the code to use a field in thread struct to store the stack pointer while in RTAS instead of using SPRN_SPRG2. It therefore converts all places which were manipulating SPRN_SPRG2 to use that field. During early startup, the zeroing of SPRN_SPRG2 has been replaced by a zeroing of that field in thread struct. But at least in start_here, that's done wrongly because it used the physical address of the fields while MMU is on at that time. So the virtual address of the field should be used instead, but in the meantime, thread struct has already been zeroised and initialised so we can just drop this initialisation. Reported-by: Larry Finger Fixes: 0df977eafc79 ("powerpc/6xx: Don't use SPRN_SPRG2 for storing stack pointer while in RTAS") Signed-off-by: Christophe Leroy --- arch/powerpc/kernel/head_32.S | 8 1 file changed, 8 deletions(-) diff --git a/arch/powerpc/kernel/head_32.S b/arch/powerpc/kernel/head_32.S index 48051c8977c5..e25b615e9f9e 100644 --- a/arch/powerpc/kernel/head_32.S +++ b/arch/powerpc/kernel/head_32.S @@ -851,10 +851,6 @@ __secondary_start: tophys(r4,r2) addir4,r4,THREAD/* phys address of our thread_struct */ mtspr SPRN_SPRG_THREAD,r4 -#ifdef CONFIG_PPC_RTAS - li r3,0 - stw r3, RTAS_SP(r4) /* 0 => not in RTAS */ -#endif lis r4, (swapper_pg_dir - PAGE_OFFSET)@h ori r4, r4, (swapper_pg_dir - PAGE_OFFSET)@l mtspr SPRN_SPRG_PGDIR, r4 @@ -941,10 +937,6 @@ start_here: tophys(r4,r2) addir4,r4,THREAD/* init task's THREAD */ mtspr SPRN_SPRG_THREAD,r4 -#ifdef CONFIG_PPC_RTAS - li r3,0 - stw r3, RTAS_SP(r4) /* 0 => not in RTAS */ -#endif lis r4, (swapper_pg_dir - PAGE_OFFSET)@h ori r4, r4, (swapper_pg_dir - PAGE_OFFSET)@l mtspr SPRN_SPRG_PGDIR, r4 -- 2.13.3
Re: [PATCH] compiler: allow all arches to enable CONFIG_OPTIMIZE_INLINING
On Mon, Mar 25, 2019 at 4:33 PM Arnd Bergmann wrote: > > On Mon, Mar 25, 2019 at 7:11 AM Masahiro Yamada > wrote: > > On Wed, Mar 20, 2019 at 10:34 PM Arnd Bergmann wrote: > > > > > > On Wed, Mar 20, 2019 at 10:41 AM Arnd Bergmann wrote: > > > > > > > > I've added your patch to my randconfig test setup and will let you > > > > know if I see anything noticeable. I'm currently testing clang-arm32, > > > > clang-arm64 and gcc-x86. > > > > > > This is the only additional bug that has come up so far: > > > > > > `.exit.text' referenced in section `.alt.smp.init' of > > > drivers/char/ipmi/ipmi_msghandler.o: defined in discarded section > > > `exit.text' of drivers/char/ipmi/ipmi_msghandler.o > > > > > > diff --git a/arch/arm/kernel/atags.h b/arch/arm/kernel/atags.h > > > index 201100226301..84b12e33104d 100644 > > > --- a/arch/arm/kernel/atags.h > > > +++ b/arch/arm/kernel/atags.h > > > @@ -5,7 +5,7 @@ void convert_to_tag_list(struct tag *tags); > > > const struct machine_desc *setup_machine_tags(phys_addr_t > > > __atags_pointer, > > > unsigned int machine_nr); > > > #else > > > -static inline const struct machine_desc * > > > +static __always_inline const struct machine_desc * > > > setup_machine_tags(phys_addr_t __atags_pointer, unsigned int machine_nr) > > > { > > > early_print("no ATAGS support: can't continue\n"); > > > > > > > > > I do not know why to reproduce it, > > but is "__init __noreturn" more sensible than > > "__always_inline" here? > > It's in a header file, so it has to be 'inline'. We could make it > static inline __init __noreturn, Yes, I like 'static inline __init __noreturn' > but I don't see an advantage over > __always_inline there. __always_inline takes away the compiler's freedom. I'd like to leave it up to the compiler where possible. The inlining decision may change depending on -O2, -Os, -Og(which was rejected) or whatever optimization strategy. > > Arnd > > __ > Linux MTD discussion mailing list > http://lists.infradead.org/mailman/listinfo/linux-mtd/ -- Best Regards Masahiro Yamada
Re: [PATCH] compiler: allow all arches to enable CONFIG_OPTIMIZE_INLINING
On Mon, Mar 25, 2019 at 7:11 AM Masahiro Yamada wrote: > On Wed, Mar 20, 2019 at 10:34 PM Arnd Bergmann wrote: > > > > On Wed, Mar 20, 2019 at 10:41 AM Arnd Bergmann wrote: > > > > > > I've added your patch to my randconfig test setup and will let you > > > know if I see anything noticeable. I'm currently testing clang-arm32, > > > clang-arm64 and gcc-x86. > > > > This is the only additional bug that has come up so far: > > > > `.exit.text' referenced in section `.alt.smp.init' of > > drivers/char/ipmi/ipmi_msghandler.o: defined in discarded section > > `exit.text' of drivers/char/ipmi/ipmi_msghandler.o > > > > diff --git a/arch/arm/kernel/atags.h b/arch/arm/kernel/atags.h > > index 201100226301..84b12e33104d 100644 > > --- a/arch/arm/kernel/atags.h > > +++ b/arch/arm/kernel/atags.h > > @@ -5,7 +5,7 @@ void convert_to_tag_list(struct tag *tags); > > const struct machine_desc *setup_machine_tags(phys_addr_t __atags_pointer, > > unsigned int machine_nr); > > #else > > -static inline const struct machine_desc * > > +static __always_inline const struct machine_desc * > > setup_machine_tags(phys_addr_t __atags_pointer, unsigned int machine_nr) > > { > > early_print("no ATAGS support: can't continue\n"); > > > > > I do not know why to reproduce it, > but is "__init __noreturn" more sensible than > "__always_inline" here? It's in a header file, so it has to be 'inline'. We could make it static inline __init __noreturn, but I don't see an advantage over __always_inline there. Arnd