Re: [RFC FIX v1 1/2] powerpc: Discover radix availability before scanning the memory nodes
Bharata B Raowrites: > Currently device tree nodes for memory are scanned before the > radix feature is discovered in mmu_early_init_devtree(). Move this > routine ahead of scanning memory nodes so that we know if the > guest is radix or not when scanning ibm,dynamic-reconfiguration-memory. Sorry this doesn't work. > diff --git a/arch/powerpc/kernel/prom.c b/arch/powerpc/kernel/prom.c > index b15bae2..079d893 100644 > --- a/arch/powerpc/kernel/prom.c > +++ b/arch/powerpc/kernel/prom.c > @@ -722,6 +722,8 @@ void __init early_init_devtree(void *params) >*/ > of_scan_flat_dt(early_init_dt_scan_chosen_ppc, boot_command_line); > > + mmu_early_init_devtree(); > + You've moved this above parse_early_param(), but mmu_early_init_devtree() uses disable_radix, which is an early param. So this will break disable_radix handling. It will probably break other things too because the ordering of this init code is very fragile - bootstrapping is hard :) > /* Scan memory nodes and rebuild MEMBLOCKs */ > of_scan_flat_dt(early_init_dt_scan_root, NULL); > of_scan_flat_dt(early_init_dt_scan_memory_ppc, NULL); > @@ -783,8 +785,6 @@ void __init early_init_devtree(void *params) > spinning_secondaries = boot_cpu_count - 1; > #endif > > - mmu_early_init_devtree(); > - > #ifdef CONFIG_PPC_POWERNV > /* Scan and build the list of machine check recoverable ranges */ > of_scan_flat_dt(early_init_dt_scan_recoverable_ranges, NULL); cheers
Re: [PATCH 09/67] arc: remove CONFIG_ARC_PLAT_NEEDS_PHYS_TO_DMA
On 12/29/2017 12:25 AM, Christoph Hellwig wrote: We always use the stub definitions, so remove the unused other code. Signed-off-by: Christoph HellwigAcked-by: Vineet Gupta FWIW, it was removed and reintroduced as one of the customers wanted it, which is not relevant now ! Thx, -Vineet --- arch/arc/Kconfig | 3 --- arch/arc/include/asm/dma-mapping.h | 7 --- arch/arc/mm/dma.c | 14 +++--- 3 files changed, 7 insertions(+), 17 deletions(-) diff --git a/arch/arc/Kconfig b/arch/arc/Kconfig index 9d5fd00d9e91..f3a80cf164cc 100644 --- a/arch/arc/Kconfig +++ b/arch/arc/Kconfig @@ -463,9 +463,6 @@ config ARCH_PHYS_ADDR_T_64BIT config ARCH_DMA_ADDR_T_64BIT bool -config ARC_PLAT_NEEDS_PHYS_TO_DMA - bool - config ARC_KVADDR_SIZE int "Kernel Virtual Address Space size (MB)" range 0 512 diff --git a/arch/arc/include/asm/dma-mapping.h b/arch/arc/include/asm/dma-mapping.h index 94285031c4fb..7a16824bfe98 100644 --- a/arch/arc/include/asm/dma-mapping.h +++ b/arch/arc/include/asm/dma-mapping.h @@ -11,13 +11,6 @@ #ifndef ASM_ARC_DMA_MAPPING_H #define ASM_ARC_DMA_MAPPING_H -#ifndef CONFIG_ARC_PLAT_NEEDS_PHYS_TO_DMA -#define plat_dma_to_phys(dev, dma_handle) ((phys_addr_t)(dma_handle)) -#define plat_phys_to_dma(dev, paddr) ((dma_addr_t)(paddr)) -#else -#include -#endif - extern const struct dma_map_ops arc_dma_ops; static inline const struct dma_map_ops *get_arch_dma_ops(struct bus_type *bus) diff --git a/arch/arc/mm/dma.c b/arch/arc/mm/dma.c index fad18261ef6a..1d405b86250c 100644 --- a/arch/arc/mm/dma.c +++ b/arch/arc/mm/dma.c @@ -60,7 +60,7 @@ static void *arc_dma_alloc(struct device *dev, size_t size, /* This is linear addr (0x8000_ based) */ paddr = page_to_phys(page); - *dma_handle = plat_phys_to_dma(dev, paddr); + *dma_handle = paddr; /* This is kernel Virtual address (0x7000_ based) */ if (need_kvaddr) { @@ -92,7 +92,7 @@ static void *arc_dma_alloc(struct device *dev, size_t size, static void arc_dma_free(struct device *dev, size_t size, void *vaddr, dma_addr_t dma_handle, unsigned long attrs) { - phys_addr_t paddr = plat_dma_to_phys(dev, dma_handle); + phys_addr_t paddr = dma_handle; struct page *page = virt_to_page(paddr); int is_non_coh = 1; @@ -111,7 +111,7 @@ static int arc_dma_mmap(struct device *dev, struct vm_area_struct *vma, { unsigned long user_count = vma_pages(vma); unsigned long count = PAGE_ALIGN(size) >> PAGE_SHIFT; - unsigned long pfn = __phys_to_pfn(plat_dma_to_phys(dev, dma_addr)); + unsigned long pfn = __phys_to_pfn(dma_addr); unsigned long off = vma->vm_pgoff; int ret = -ENXIO; @@ -175,7 +175,7 @@ static dma_addr_t arc_dma_map_page(struct device *dev, struct page *page, if (!(attrs & DMA_ATTR_SKIP_CPU_SYNC)) _dma_cache_sync(paddr, size, dir); - return plat_phys_to_dma(dev, paddr); + return paddr; } /* @@ -190,7 +190,7 @@ static void arc_dma_unmap_page(struct device *dev, dma_addr_t handle, size_t size, enum dma_data_direction dir, unsigned long attrs) { - phys_addr_t paddr = plat_dma_to_phys(dev, handle); + phys_addr_t paddr = handle; if (!(attrs & DMA_ATTR_SKIP_CPU_SYNC)) _dma_cache_sync(paddr, size, dir); @@ -224,13 +224,13 @@ static void arc_dma_unmap_sg(struct device *dev, struct scatterlist *sg, static void arc_dma_sync_single_for_cpu(struct device *dev, dma_addr_t dma_handle, size_t size, enum dma_data_direction dir) { - _dma_cache_sync(plat_dma_to_phys(dev, dma_handle), size, DMA_FROM_DEVICE); + _dma_cache_sync(dma_handle, size, DMA_FROM_DEVICE); } static void arc_dma_sync_single_for_device(struct device *dev, dma_addr_t dma_handle, size_t size, enum dma_data_direction dir) { - _dma_cache_sync(plat_dma_to_phys(dev, dma_handle), size, DMA_TO_DEVICE); + _dma_cache_sync(dma_handle, size, DMA_TO_DEVICE); } static void arc_dma_sync_sg_for_cpu(struct device *dev,
Re: [PATCH v7 07/10] kernel/jump_label: abstract jump_entry member accessors
On 5 January 2018 at 18:22, Catalin Marinaswrote: > On Fri, Jan 05, 2018 at 06:01:33PM +, Ard Biesheuvel wrote: >> On 5 January 2018 at 17:58, Catalin Marinas wrote: >> > On Tue, Jan 02, 2018 at 08:05:46PM +, Ard Biesheuvel wrote: >> >> diff --git a/arch/arm/include/asm/jump_label.h >> >> b/arch/arm/include/asm/jump_label.h >> >> index e12d7d096fc0..7b05b404063a 100644 >> >> --- a/arch/arm/include/asm/jump_label.h >> >> +++ b/arch/arm/include/asm/jump_label.h >> >> @@ -45,5 +45,32 @@ struct jump_entry { >> >> jump_label_t key; >> >> }; >> >> >> >> +static inline jump_label_t jump_entry_code(const struct jump_entry >> >> *entry) >> >> +{ >> >> + return entry->code; >> >> +} >> >> + >> >> +static inline struct static_key *jump_entry_key(const struct jump_entry >> >> *entry) >> >> +{ >> >> + return (struct static_key *)((unsigned long)entry->key & ~1UL); >> >> +} >> >> + >> >> +static inline bool jump_entry_is_branch(const struct jump_entry *entry) >> >> +{ >> >> + return (unsigned long)entry->key & 1UL; >> >> +} >> >> + >> >> +static inline bool jump_entry_is_module_init(const struct jump_entry >> >> *entry) >> >> +{ >> >> + return entry->code == 0; >> >> +} >> >> + >> >> +static inline void jump_entry_set_module_init(struct jump_entry *entry) >> >> +{ >> >> + entry->code = 0; >> >> +} >> >> + >> >> +#define jump_label_swap NULL >> > >> > Is there any difference between these functions on any of the >> > architectures touched? Even with the relative offset, arm64 and x86 >> > looked the same to me (well, I may have missed some detail). >> >> No, the latter two are identical everywhere, and the others are the >> same modulo absolute vs relative. >> >> The issue is that the struct definition is per-arch so the accessors >> should be as well. > > Up to this patch, even the jump_entry structure is the same on all > architectures (the jump_label_t type differs). > > With relative offset, can you not just define jump_label_t to s32? At a > quick grep in mainline, it doesn't seem to be used outside the structure > definition. > I think we can just remove jump_label_t entirely, and replace it with unsigned long for absolute, and s32 for relative. Maybe I am missing something, but things like #ifdef CONFIG_X86_64 typedef u64 jump_label_t; #else typedef u32 jump_label_t; #endif seem a bit pointless to me anyway. >> Perhaps I should introduce two variants two asm-generic, similar to >> how we have different flavors of unaligned accessors. > > You could as well define them directly in kernel/jump_label.h or, if > used outside this file, include/linux/jump_label.h. > Perhaps I should define a Kconfig symbol after all for relative jump labels, and just keep everything in the same file. The question is whether I should use CONFIG_HAVE_ARCH_PREL32_RELOCATIONS for this as well.
Re: [PATCH v7 07/10] kernel/jump_label: abstract jump_entry member accessors
On Fri, Jan 05, 2018 at 06:01:33PM +, Ard Biesheuvel wrote: > On 5 January 2018 at 17:58, Catalin Marinaswrote: > > On Tue, Jan 02, 2018 at 08:05:46PM +, Ard Biesheuvel wrote: > >> diff --git a/arch/arm/include/asm/jump_label.h > >> b/arch/arm/include/asm/jump_label.h > >> index e12d7d096fc0..7b05b404063a 100644 > >> --- a/arch/arm/include/asm/jump_label.h > >> +++ b/arch/arm/include/asm/jump_label.h > >> @@ -45,5 +45,32 @@ struct jump_entry { > >> jump_label_t key; > >> }; > >> > >> +static inline jump_label_t jump_entry_code(const struct jump_entry *entry) > >> +{ > >> + return entry->code; > >> +} > >> + > >> +static inline struct static_key *jump_entry_key(const struct jump_entry > >> *entry) > >> +{ > >> + return (struct static_key *)((unsigned long)entry->key & ~1UL); > >> +} > >> + > >> +static inline bool jump_entry_is_branch(const struct jump_entry *entry) > >> +{ > >> + return (unsigned long)entry->key & 1UL; > >> +} > >> + > >> +static inline bool jump_entry_is_module_init(const struct jump_entry > >> *entry) > >> +{ > >> + return entry->code == 0; > >> +} > >> + > >> +static inline void jump_entry_set_module_init(struct jump_entry *entry) > >> +{ > >> + entry->code = 0; > >> +} > >> + > >> +#define jump_label_swap NULL > > > > Is there any difference between these functions on any of the > > architectures touched? Even with the relative offset, arm64 and x86 > > looked the same to me (well, I may have missed some detail). > > No, the latter two are identical everywhere, and the others are the > same modulo absolute vs relative. > > The issue is that the struct definition is per-arch so the accessors > should be as well. Up to this patch, even the jump_entry structure is the same on all architectures (the jump_label_t type differs). With relative offset, can you not just define jump_label_t to s32? At a quick grep in mainline, it doesn't seem to be used outside the structure definition. > Perhaps I should introduce two variants two asm-generic, similar to > how we have different flavors of unaligned accessors. You could as well define them directly in kernel/jump_label.h or, if used outside this file, include/linux/jump_label.h. -- Catalin
Re: [PATCH v7 07/10] kernel/jump_label: abstract jump_entry member accessors
On 5 January 2018 at 17:58, Catalin Marinaswrote: > On Tue, Jan 02, 2018 at 08:05:46PM +, Ard Biesheuvel wrote: >> diff --git a/arch/arm/include/asm/jump_label.h >> b/arch/arm/include/asm/jump_label.h >> index e12d7d096fc0..7b05b404063a 100644 >> --- a/arch/arm/include/asm/jump_label.h >> +++ b/arch/arm/include/asm/jump_label.h >> @@ -45,5 +45,32 @@ struct jump_entry { >> jump_label_t key; >> }; >> >> +static inline jump_label_t jump_entry_code(const struct jump_entry *entry) >> +{ >> + return entry->code; >> +} >> + >> +static inline struct static_key *jump_entry_key(const struct jump_entry >> *entry) >> +{ >> + return (struct static_key *)((unsigned long)entry->key & ~1UL); >> +} >> + >> +static inline bool jump_entry_is_branch(const struct jump_entry *entry) >> +{ >> + return (unsigned long)entry->key & 1UL; >> +} >> + >> +static inline bool jump_entry_is_module_init(const struct jump_entry *entry) >> +{ >> + return entry->code == 0; >> +} >> + >> +static inline void jump_entry_set_module_init(struct jump_entry *entry) >> +{ >> + entry->code = 0; >> +} >> + >> +#define jump_label_swap NULL > > Is there any difference between these functions on any of the > architectures touched? Even with the relative offset, arm64 and x86 > looked the same to me (well, I may have missed some detail). > No, the latter two are identical everywhere, and the others are the same modulo absolute vs relative. The issue is that the struct definition is per-arch so the accessors should be as well. Perhaps I should introduce two variants two asm-generic, similar to how we have different flavors of unaligned accessors.
Re: [PATCH v7 07/10] kernel/jump_label: abstract jump_entry member accessors
On Tue, Jan 02, 2018 at 08:05:46PM +, Ard Biesheuvel wrote: > diff --git a/arch/arm/include/asm/jump_label.h > b/arch/arm/include/asm/jump_label.h > index e12d7d096fc0..7b05b404063a 100644 > --- a/arch/arm/include/asm/jump_label.h > +++ b/arch/arm/include/asm/jump_label.h > @@ -45,5 +45,32 @@ struct jump_entry { > jump_label_t key; > }; > > +static inline jump_label_t jump_entry_code(const struct jump_entry *entry) > +{ > + return entry->code; > +} > + > +static inline struct static_key *jump_entry_key(const struct jump_entry > *entry) > +{ > + return (struct static_key *)((unsigned long)entry->key & ~1UL); > +} > + > +static inline bool jump_entry_is_branch(const struct jump_entry *entry) > +{ > + return (unsigned long)entry->key & 1UL; > +} > + > +static inline bool jump_entry_is_module_init(const struct jump_entry *entry) > +{ > + return entry->code == 0; > +} > + > +static inline void jump_entry_set_module_init(struct jump_entry *entry) > +{ > + entry->code = 0; > +} > + > +#define jump_label_swap NULL Is there any difference between these functions on any of the architectures touched? Even with the relative offset, arm64 and x86 looked the same to me (well, I may have missed some detail). -- Catalin
Re: [PATCH v7 05/10] PCI: Add support for relative addressing in quirk tables
On 5 January 2018 at 17:41, Catalin Marinaswrote: > On Tue, Jan 02, 2018 at 08:05:44PM +, Ard Biesheuvel wrote: >> diff --git a/drivers/pci/quirks.c b/drivers/pci/quirks.c >> index 10684b17d0bd..b6d51b4d5ce1 100644 >> --- a/drivers/pci/quirks.c >> +++ b/drivers/pci/quirks.c >> @@ -3556,9 +3556,16 @@ static void pci_do_fixups(struct pci_dev *dev, struct >> pci_fixup *f, >>f->vendor == (u16) PCI_ANY_ID) && >> (f->device == dev->device || >>f->device == (u16) PCI_ANY_ID)) { >> - calltime = fixup_debug_start(dev, f->hook); >> - f->hook(dev); >> - fixup_debug_report(dev, calltime, f->hook); >> + void (*hook)(struct pci_dev *dev); >> +#ifdef CONFIG_HAVE_ARCH_PREL32_RELOCATIONS >> + hook = (void *)((unsigned long)>hook_offset + >> + f->hook_offset); >> +#else >> + hook = f->hook; >> +#endif > > More of a nitpick but I've seen this pattern in several places in your > code, maybe worth defining a macro (couldn't come up with a better > name): > > #define offset_to_ptr(off) \ > ((void *)((unsigned long)&(off) + (off))) > Yeah, good point. Or even static inline void *offset_to_ptr(const s32 *off) { return (void *)((unsigned long)off + *off); }
Re: [PATCH v7 05/10] PCI: Add support for relative addressing in quirk tables
On Tue, Jan 02, 2018 at 08:05:44PM +, Ard Biesheuvel wrote: > diff --git a/drivers/pci/quirks.c b/drivers/pci/quirks.c > index 10684b17d0bd..b6d51b4d5ce1 100644 > --- a/drivers/pci/quirks.c > +++ b/drivers/pci/quirks.c > @@ -3556,9 +3556,16 @@ static void pci_do_fixups(struct pci_dev *dev, struct > pci_fixup *f, >f->vendor == (u16) PCI_ANY_ID) && > (f->device == dev->device || >f->device == (u16) PCI_ANY_ID)) { > - calltime = fixup_debug_start(dev, f->hook); > - f->hook(dev); > - fixup_debug_report(dev, calltime, f->hook); > + void (*hook)(struct pci_dev *dev); > +#ifdef CONFIG_HAVE_ARCH_PREL32_RELOCATIONS > + hook = (void *)((unsigned long)>hook_offset + > + f->hook_offset); > +#else > + hook = f->hook; > +#endif More of a nitpick but I've seen this pattern in several places in your code, maybe worth defining a macro (couldn't come up with a better name): #define offset_to_ptr(off) \ ((void *)((unsigned long)&(off) + (off))) -- Catalin
[PATCH 1/3] powerpc/32: Fix hugepage allocation on 8xx at hint address
When an app has some regular pages allocated (e.g. see below) and tries to mmap() a huge page at a hint address covered by the same PMD entry, the kernel accepts the hint allthough the 8xx cannot handle different page sizes in the same PMD entry. 1000-10001000 r-xp 00:0f 2597 /root/malloc 1001-10011000 rwxp 00:0f 2597 /root/malloc mmap(0x1008, 524288, PROT_READ|PROT_WRITE, MAP_PRIVATE|MAP_ANONYMOUS|0x4, -1, 0) = 0x1008 This results in the following warning, and the app remains forever in do_page_fault()/hugetlb_fault() [162980.035629] WARNING: CPU: 0 PID: 2777 at arch/powerpc/mm/hugetlbpage.c:354 hugetlb_free_pgd_range+0xc8/0x1e4 [162980.035699] CPU: 0 PID: 2777 Comm: malloc Tainted: G W 4.14.6 #85 [162980.035744] task: c67e2c00 task.stack: c668e000 [162980.035783] NIP: c000fe18 LR: c00e1eec CTR: c00f90c0 [162980.035830] REGS: c668fc20 TRAP: 0700 Tainted: G W(4.14.6) [162980.035854] MSR: 00029032CR: 24044224 XER: 2000 [162980.036003] [162980.036003] GPR00: c00e1eec c668fcd0 c67e2c00 0010 c6869410 1008 77fb4000 [162980.036003] GPR08: 0001 0683c001 ff80 44028228 10018a34 4008 418004fc [162980.036003] GPR16: c668e000 00040100 c668e000 c06c c668fe78 c668e000 c6835ba0 c668fd48 [162980.036003] GPR24: 73ff 7400 0001 77fb4000 100f 1010 1010 [162980.036743] NIP [c000fe18] hugetlb_free_pgd_range+0xc8/0x1e4 [162980.036839] LR [c00e1eec] free_pgtables+0x12c/0x150 [162980.036861] Call Trace: [162980.036939] [c668fcd0] [c00f0774] unlink_anon_vmas+0x1c4/0x214 (unreliable) [162980.037040] [c668fd10] [c00e1eec] free_pgtables+0x12c/0x150 [162980.037118] [c668fd40] [c00eabac] exit_mmap+0xe8/0x1b4 [162980.037210] [c668fda0] [c0019710] mmput.part.9+0x20/0xd8 [162980.037301] [c668fdb0] [c001ecb0] do_exit+0x1f0/0x93c [162980.037386] [c668fe00] [c001f478] do_group_exit+0x40/0xcc [162980.037479] [c668fe10] [c002a76c] get_signal+0x47c/0x614 [162980.037570] [c668fe70] [c0007840] do_signal+0x54/0x244 [162980.037654] [c668ff30] [c0007ae8] do_notify_resume+0x34/0x88 [162980.037744] [c668ff40] [c000dae8] do_user_signal+0x74/0xc4 [162980.037781] Instruction dump: [162980.037821] 7fdff378 8137 54a3463a 80890020 7d24182e 7c841a14 712a0004 4082ff94 [162980.038014] 2f89 419e0010 712a0ff0 408200e0 <0fe0> 54a9000a 7f984840 419d0094 [162980.038216] ---[ end trace c0ceeca8e7a5800a ]--- [162980.038754] BUG: non-zero nr_ptes on freeing mm: 1 [162985.363322] BUG: non-zero nr_ptes on freeing mm: -1 In order to fix this, the address space "slices" implemented for BOOK3S/64 is reused. This patch: 1/ Modifies the "slices" implementation to support 32 bits CPUs, based on using only the low slices. 2/ Moves "slices" functions prototypes from page64.h to page.h 3/ Modifies the context.id on the 8xx to be in the range [1:16] instead of [0:15] in order to identify context.id == 0 as not initialised contexts 4/ Activates CONFIG_PPC_MM_SLICES when CONFIG_HUGETLB_PAGE is selected for the 8xx Alltough we could in theory have as many slices as PMD entries, the current slices implementation limits the number of low slices to 16. Fixes: 4b91428699477 ("powerpc/8xx: Implement support of hugepages") Signed-off-by: Christophe Leroy --- arch/powerpc/include/asm/mmu-8xx.h | 6 arch/powerpc/include/asm/page.h| 14 arch/powerpc/include/asm/page_32.h | 19 +++ arch/powerpc/include/asm/page_64.h | 21 ++-- arch/powerpc/kernel/setup-common.c | 2 +- arch/powerpc/mm/8xx_mmu.c | 2 +- arch/powerpc/mm/hash_utils_64.c| 2 +- arch/powerpc/mm/hugetlbpage.c | 2 ++ arch/powerpc/mm/mmu_context_nohash.c | 11 +-- arch/powerpc/mm/slice.c| 58 +++--- arch/powerpc/platforms/Kconfig.cputype | 1 + 11 files changed, 95 insertions(+), 43 deletions(-) diff --git a/arch/powerpc/include/asm/mmu-8xx.h b/arch/powerpc/include/asm/mmu-8xx.h index 5bb3dbede41a..5f89b6010453 100644 --- a/arch/powerpc/include/asm/mmu-8xx.h +++ b/arch/powerpc/include/asm/mmu-8xx.h @@ -169,6 +169,12 @@ typedef struct { unsigned int id; unsigned int active; unsigned long vdso_base; +#ifdef CONFIG_PPC_MM_SLICES + u16 user_psize; /* page size index */ + u64 low_slices_psize; /* page size encodings */ + unsigned char high_slices_psize[0]; + unsigned long slb_addr_limit; +#endif } mm_context_t; #define PHYS_IMMR_BASE (mfspr(SPRN_IMMR) & 0xfff8) diff --git a/arch/powerpc/include/asm/page.h b/arch/powerpc/include/asm/page.h index 8da5d4c1cab2..d0384f9db9eb 100644 --- a/arch/powerpc/include/asm/page.h +++ b/arch/powerpc/include/asm/page.h @@ -342,6 +342,20 @@ typedef struct page *pgtable_t; #endif #endif +#ifdef CONFIG_PPC_MM_SLICES +struct mm_struct; + +unsigned long slice_get_unmapped_area(unsigned long
[PATCH 3/3] powerpc/8xx: Increase the number of mm slices
On the 8xx, we can have as many slices as PMD entries. This means we could have 1024 slices in 4k size pages mode and 64 slices in 16k size pages. However, due to a stack overflow in slice_get_unmapped_area(), we limit to 512 slices. Signed-off-by: Christophe Leroy--- arch/powerpc/include/asm/mmu-8xx.h | 6 +- arch/powerpc/include/asm/page_32.h | 3 ++- 2 files changed, 7 insertions(+), 2 deletions(-) diff --git a/arch/powerpc/include/asm/mmu-8xx.h b/arch/powerpc/include/asm/mmu-8xx.h index d669d0062da4..40aa7b0cd0dc 100644 --- a/arch/powerpc/include/asm/mmu-8xx.h +++ b/arch/powerpc/include/asm/mmu-8xx.h @@ -171,7 +171,11 @@ typedef struct { unsigned long vdso_base; #ifdef CONFIG_PPC_MM_SLICES u16 user_psize; /* page size index */ - unsigned char low_slices_psize[8]; /* 16 slices */ +#if defined(CONFIG_PPC_16K_PAGES) + unsigned char low_slices_psize[32]; /* 64 slices */ +#else + unsigned char low_slices_psize[256]; /* 512 slices */ +#endif unsigned char high_slices_psize[0]; unsigned long slb_addr_limit; #endif diff --git a/arch/powerpc/include/asm/page_32.h b/arch/powerpc/include/asm/page_32.h index f7d1bd1183c8..43695ce7ee07 100644 --- a/arch/powerpc/include/asm/page_32.h +++ b/arch/powerpc/include/asm/page_32.h @@ -62,7 +62,8 @@ extern void copy_page(void *to, void *from); #ifdef CONFIG_PPC_MM_SLICES -#define SLICE_LOW_SHIFT28 +/* SLICE_LOW_SHIFT >= 23 to avoid stack overflow in slice_get_unmapped_area() */ +#define SLICE_LOW_SHIFT(PMD_SHIFT > 23 ? PMD_SHIFT : 23) #define SLICE_HIGH_SHIFT 0 #define SLICE_LOW_TOP (0xul) -- 2.13.3
[PATCH v4 1/7] platform/pseries: Update VF config space after EEH
Add EEH platform operations for pseries to update VF config space. With this change after EEH, the VF will have updated config space for pseries platform. Signed-off-by: Bryant G. LySigned-off-by: Juan J. Alvarez --- arch/powerpc/include/asm/eeh.h | 1 + arch/powerpc/kernel/eeh.c| 59 + arch/powerpc/platforms/powernv/eeh-powernv.c | 65 ++-- arch/powerpc/platforms/pseries/eeh_pseries.c | 26 ++- 4 files changed, 88 insertions(+), 63 deletions(-) diff --git a/arch/powerpc/include/asm/eeh.h b/arch/powerpc/include/asm/eeh.h index 5161c37dd039..82829c65f31a 100644 --- a/arch/powerpc/include/asm/eeh.h +++ b/arch/powerpc/include/asm/eeh.h @@ -297,6 +297,7 @@ int eeh_pe_reset(struct eeh_pe *pe, int option); int eeh_pe_configure(struct eeh_pe *pe); int eeh_pe_inject_err(struct eeh_pe *pe, int type, int func, unsigned long addr, unsigned long mask); +int eeh_restore_vf_config(struct pci_dn *pdn); /** * EEH_POSSIBLE_ERROR() -- test for possible MMIO failure. diff --git a/arch/powerpc/kernel/eeh.c b/arch/powerpc/kernel/eeh.c index cbca0a667682..cc649809885e 100644 --- a/arch/powerpc/kernel/eeh.c +++ b/arch/powerpc/kernel/eeh.c @@ -740,6 +740,65 @@ static void *eeh_restore_dev_state(void *data, void *userdata) return NULL; } +int eeh_restore_vf_config(struct pci_dn *pdn) +{ + struct eeh_dev *edev = pdn_to_eeh_dev(pdn); + u32 devctl, cmd, cap2, aer_capctl; + int old_mps; + + if (edev->pcie_cap) { + /* Restore MPS */ + old_mps = (ffs(pdn->mps) - 8) << 5; + eeh_ops->read_config(pdn, edev->pcie_cap + PCI_EXP_DEVCTL, +2, ); + devctl &= ~PCI_EXP_DEVCTL_PAYLOAD; + devctl |= old_mps; + eeh_ops->write_config(pdn, edev->pcie_cap + PCI_EXP_DEVCTL, + 2, devctl); + + /* Disable Completion Timeout */ + eeh_ops->read_config(pdn, edev->pcie_cap + PCI_EXP_DEVCAP2, +4, ); + if (cap2 & 0x10) { + eeh_ops->read_config(pdn, +edev->pcie_cap + PCI_EXP_DEVCTL2, +4, ); + cap2 |= 0x10; + eeh_ops->write_config(pdn, + edev->pcie_cap + PCI_EXP_DEVCTL2, + 4, cap2); + } + } + + /* Enable SERR and parity checking */ + eeh_ops->read_config(pdn, PCI_COMMAND, 2, ); + cmd |= (PCI_COMMAND_PARITY | PCI_COMMAND_SERR); + eeh_ops->write_config(pdn, PCI_COMMAND, 2, cmd); + + /* Enable report various errors */ + if (edev->pcie_cap) { + eeh_ops->read_config(pdn, edev->pcie_cap + PCI_EXP_DEVCTL, +2, ); + devctl &= ~PCI_EXP_DEVCTL_CERE; + devctl |= (PCI_EXP_DEVCTL_NFERE | + PCI_EXP_DEVCTL_FERE | + PCI_EXP_DEVCTL_URRE); + eeh_ops->write_config(pdn, edev->pcie_cap + PCI_EXP_DEVCTL, + 2, devctl); + } + + /* Enable ECRC generation and check */ + if (edev->pcie_cap && edev->aer_cap) { + eeh_ops->read_config(pdn, edev->aer_cap + PCI_ERR_CAP, +4, _capctl); + aer_capctl |= (PCI_ERR_CAP_ECRC_GENE | PCI_ERR_CAP_ECRC_CHKE); + eeh_ops->write_config(pdn, edev->aer_cap + PCI_ERR_CAP, + 4, aer_capctl); + } + + return 0; +} + /** * pcibios_set_pcie_reset_state - Set PCI-E reset state * @dev: pci device struct diff --git a/arch/powerpc/platforms/powernv/eeh-powernv.c b/arch/powerpc/platforms/powernv/eeh-powernv.c index 961e64115d92..0665b6d03cb3 100644 --- a/arch/powerpc/platforms/powernv/eeh-powernv.c +++ b/arch/powerpc/platforms/powernv/eeh-powernv.c @@ -1655,70 +1655,11 @@ static int pnv_eeh_next_error(struct eeh_pe **pe) return ret; } -static int pnv_eeh_restore_vf_config(struct pci_dn *pdn) -{ - struct eeh_dev *edev = pdn_to_eeh_dev(pdn); - u32 devctl, cmd, cap2, aer_capctl; - int old_mps; - - if (edev->pcie_cap) { - /* Restore MPS */ - old_mps = (ffs(pdn->mps) - 8) << 5; - eeh_ops->read_config(pdn, edev->pcie_cap + PCI_EXP_DEVCTL, -2, ); - devctl &= ~PCI_EXP_DEVCTL_PAYLOAD; - devctl |= old_mps; - eeh_ops->write_config(pdn, edev->pcie_cap + PCI_EXP_DEVCTL, - 2, devctl); - - /* Disable Completion Timeout */ -
[PATCH v4 3/7] platforms/pseries: Set eeh_pe of EEH_PE_VF type
To correctly use EEH code one has to make sure that the EEH_PE_VF is set for dynamic created VFs. Therefore this patch allocates an eeh_pe of eeh type EEH_PE_VF and associates PE with parent. Signed-off-by: Bryant G. LySigned-off-by: Juan J. Alvarez --- arch/powerpc/include/asm/pci-bridge.h| 5 - arch/powerpc/platforms/pseries/eeh_pseries.c | 17 + 2 files changed, 21 insertions(+), 1 deletion(-) diff --git a/arch/powerpc/include/asm/pci-bridge.h b/arch/powerpc/include/asm/pci-bridge.h index 9f66ddebb799..16d70740a76f 100644 --- a/arch/powerpc/include/asm/pci-bridge.h +++ b/arch/powerpc/include/asm/pci-bridge.h @@ -211,7 +211,10 @@ struct pci_dn { unsigned int *pe_num_map; /* PE# for the first VF PE or array */ boolm64_single_mode;/* Use M64 BAR in Single Mode */ #define IODA_INVALID_M64(-1) - int (*m64_map)[PCI_SRIOV_NUM_BARS]; + union { + int (*m64_map)[PCI_SRIOV_NUM_BARS]; /*Only used in powernv */ + int last_allow_rc; /* Only used in pSeries */ + }; #endif /* CONFIG_PCI_IOV */ int mps;/* Maximum Payload Size */ struct list_head child_list; diff --git a/arch/powerpc/platforms/pseries/eeh_pseries.c b/arch/powerpc/platforms/pseries/eeh_pseries.c index a671ef4f57f5..ca6bbfd83701 100644 --- a/arch/powerpc/platforms/pseries/eeh_pseries.c +++ b/arch/powerpc/platforms/pseries/eeh_pseries.c @@ -55,9 +55,12 @@ static int ibm_get_config_addr_info; static int ibm_get_config_addr_info2; static int ibm_configure_pe; +#ifdef CONFIG_PCI_IOV void pseries_pcibios_bus_add_device(struct pci_dev *pdev) { struct pci_dn *pdn = pci_get_pdn(pdev); + struct pci_dn *physfn_pdn; + struct eeh_dev *edev; if (!pdev->is_virtfn) return; @@ -65,6 +68,14 @@ void pseries_pcibios_bus_add_device(struct pci_dev *pdev) pdn->device_id = pdev->device; pdn->vendor_id = pdev->vendor; pdn->class_code = pdev->class; + /* Last allow unfreeze return code used for retrieval +* by user space in eeh-sysfs to show the last command +* completion from platform +*/ + pdn->last_allow_rc = 0; + physfn_pdn = pci_get_pdn(pdev->physfn); + pdn->pe_number = physfn_pdn->pe_num_map[pdn->vf_index]; + edev = pdn_to_eeh_dev(pdn); /* * The following operations will fail if VF's sysfs files @@ -72,9 +83,13 @@ void pseries_pcibios_bus_add_device(struct pci_dev *pdev) */ eeh_add_device_early(pdn); eeh_add_device_late(pdev); + edev->pe_config_addr = (pdn->busno << 16) | (pdn->devfn << 8); + eeh_rmv_from_parent_pe(edev); /* Remove as it is adding to bus pe */ + eeh_add_to_parent_pe(edev); /* Add as VF PE type */ eeh_sysfs_add_device(pdev); } +#endif /* * Buffer for reporting slot-error-detail rtas calls. Its here @@ -141,8 +156,10 @@ static int pseries_eeh_init(void) /* Set EEH probe mode */ eeh_add_flag(EEH_PROBE_MODE_DEVTREE | EEH_ENABLE_IO_FOR_LOG); +#ifdef CONFIG_PCI_IOV /* Set EEH machine dependent code */ ppc_md.pcibios_bus_add_device = pseries_pcibios_bus_add_device; +#endif return 0; } -- 2.14.3 (Apple Git-98)
[PATCH v4 6/7] pseries/pci: Associate PEs to VFs in configure SR-IOV
After initial validation of SR-IOV resources, firmware will associate PEs to the dynamic VFs created within this call. This patch adds the association of PEs to the PF array of PE numbers indexed by VF. Signed-off-by: Bryant G. LySigned-off-by: Juan J. Alvarez --- arch/powerpc/platforms/pseries/pci.c | 150 ++- 1 file changed, 148 insertions(+), 2 deletions(-) diff --git a/arch/powerpc/platforms/pseries/pci.c b/arch/powerpc/platforms/pseries/pci.c index 48d3af026f90..eab96637d6cf 100644 --- a/arch/powerpc/platforms/pseries/pci.c +++ b/arch/powerpc/platforms/pseries/pci.c @@ -59,16 +59,162 @@ DECLARE_PCI_FIXUP_HEADER(PCI_ANY_ID, PCI_ANY_ID, pcibios_name_device); #endif #ifdef CONFIG_PCI_IOV +#define MAX_VFS_FOR_MAP_PE 256 +struct pe_map_bar_entry { + __be64 bar; /* Input: Virtual Function BAR */ + __be16 rid; /* Input: Virtual Function Router ID */ + __be16 pe_num;/* Output: Virtual Function PE Number */ + __be32 reserved; /* Reserved Space */ +}; + +int pseries_send_map_pe(struct pci_dev *pdev, + u16 num_vfs, + struct pe_map_bar_entry *vf_pe_array) +{ + struct pci_dn *pdn; + int rc; + unsigned long buid, addr; + int ibm_map_pes = rtas_token("ibm,open-sriov-map-pe-number"); + + if (ibm_map_pes == RTAS_UNKNOWN_SERVICE) + return -EINVAL; + + pdn = pci_get_pdn(pdev); + addr = rtas_config_addr(pdn->busno, pdn->devfn, 0); + buid = pdn->phb->buid; + spin_lock(_data_buf_lock); + memcpy(rtas_data_buf, vf_pe_array, + RTAS_DATA_BUF_SIZE); + rc = rtas_call(ibm_map_pes, 5, 1, NULL, addr, + BUID_HI(buid), BUID_LO(buid), + rtas_data_buf, + num_vfs * sizeof(struct pe_map_bar_entry)); + memcpy(vf_pe_array, rtas_data_buf, RTAS_DATA_BUF_SIZE); + spin_unlock(_data_buf_lock); + + if (rc) + dev_err(>dev, + "%s: Failed to associate pes PE#%lx, rc=%x\n", + __func__, addr, rc); + + return rc; +} + +void pseries_set_pe_num(struct pci_dev *pdev, u16 vf_index, __be16 pe_num) +{ + struct pci_dn *pdn; + + pdn = pci_get_pdn(pdev); + pdn->pe_num_map[vf_index] = be16_to_cpu(pe_num); + dev_dbg(>dev, "VF %04x:%02x:%02x.%x associated with PE#%x\n", + pci_domain_nr(pdev->bus), + pdev->bus->number, + PCI_SLOT(pci_iov_virtfn_devfn(pdev, vf_index)), + PCI_FUNC(pci_iov_virtfn_devfn(pdev, vf_index)), + pdn->pe_num_map[vf_index]); +} + +int pseries_associate_pes(struct pci_dev *pdev, u16 num_vfs) +{ + struct pci_dn *pdn; + int i, rc, vf_index; + struct pe_map_bar_entry *vf_pe_array; + struct resource *res; + u64 size; + + vf_pe_array = kzalloc(RTAS_DATA_BUF_SIZE, GFP_KERNEL); + if (!vf_pe_array) + return -ENOMEM; + + pdn = pci_get_pdn(pdev); + /* create firmware structure to associate pes */ + for (vf_index = 0; vf_index < num_vfs; vf_index++) { + pdn->pe_num_map[vf_index] = IODA_INVALID_PE; + for (i = 0; i < PCI_SRIOV_NUM_BARS; i++) { + res = >resource[i + PCI_IOV_RESOURCES]; + if (!res->parent) + continue; + size = pcibios_iov_resource_alignment(pdev, i + + PCI_IOV_RESOURCES); + vf_pe_array[vf_index].bar = + cpu_to_be64(res->start + size * vf_index); + vf_pe_array[vf_index].rid = + cpu_to_be16((pci_iov_virtfn_bus(pdev, vf_index) + << 8) | pci_iov_virtfn_devfn(pdev, + vf_index)); + vf_pe_array[vf_index].pe_num = + cpu_to_be16(IODA_INVALID_PE); + } + } + + rc = pseries_send_map_pe(pdev, num_vfs, vf_pe_array); + /* Only zero is success */ + if (!rc) + for (vf_index = 0; vf_index < num_vfs; vf_index++) + pseries_set_pe_num(pdev, vf_index, + vf_pe_array[vf_index].pe_num); + + kfree(vf_pe_array); + return rc; +} + +int pseries_pci_sriov_enable(struct pci_dev *pdev, u16 num_vfs) +{ + struct pci_dn *pdn; + intrc; + const int *max_vfs; + int max_config_vfs; + struct device_node *dn = pci_device_to_OF_node(pdev); + + max_vfs = of_get_property(dn, "ibm,number-of-configurable-vfs", NULL); + + if (!max_vfs) + return -EINVAL; + + /* First integer
[PATCH v4 4/7] powerpc/kernel Add EEH operations to notify resume
When pseries SR-IOV is enabled and after a PF driver has resumed from EEH, platform has to be notified of the event so the child VFs can be allowed to resume their normal recovery path. This patch makes the EEH operation allow unfreeze platform dependent code and adds the call to pseries EEH code. Signed-off-by: Bryant G. LySigned-off-by: Juan J. Alvarez --- arch/powerpc/include/asm/eeh.h | 1 + arch/powerpc/platforms/powernv/eeh-powernv.c | 3 +- arch/powerpc/platforms/pseries/eeh_pseries.c | 96 +++- 3 files changed, 98 insertions(+), 2 deletions(-) diff --git a/arch/powerpc/include/asm/eeh.h b/arch/powerpc/include/asm/eeh.h index 82829c65f31a..fd37cc101f4f 100644 --- a/arch/powerpc/include/asm/eeh.h +++ b/arch/powerpc/include/asm/eeh.h @@ -214,6 +214,7 @@ struct eeh_ops { int (*write_config)(struct pci_dn *pdn, int where, int size, u32 val); int (*next_error)(struct eeh_pe **pe); int (*restore_config)(struct pci_dn *pdn); + int (*notify_resume)(struct pci_dn *pdn); }; extern int eeh_subsystem_flags; diff --git a/arch/powerpc/platforms/powernv/eeh-powernv.c b/arch/powerpc/platforms/powernv/eeh-powernv.c index 0665b6d03cb3..33c86c1a1720 100644 --- a/arch/powerpc/platforms/powernv/eeh-powernv.c +++ b/arch/powerpc/platforms/powernv/eeh-powernv.c @@ -1704,7 +1704,8 @@ static struct eeh_ops pnv_eeh_ops = { .read_config= pnv_eeh_read_config, .write_config = pnv_eeh_write_config, .next_error = pnv_eeh_next_error, - .restore_config = pnv_eeh_restore_config + .restore_config = pnv_eeh_restore_config, + .notify_resume = NULL }; #ifdef CONFIG_PCI_IOV diff --git a/arch/powerpc/platforms/pseries/eeh_pseries.c b/arch/powerpc/platforms/pseries/eeh_pseries.c index ca6bbfd83701..898bb055cb19 100644 --- a/arch/powerpc/platforms/pseries/eeh_pseries.c +++ b/arch/powerpc/platforms/pseries/eeh_pseries.c @@ -749,6 +749,97 @@ static int pseries_eeh_restore_config(struct pci_dn *pdn) return ret; } +#ifdef CONFIG_PCI_IOV +int pseries_send_allow_unfreeze(struct pci_dn *pdn, + u16 *vf_pe_array, int cur_vfs) +{ + int rc; + int ibm_allow_unfreeze = rtas_token("ibm,open-sriov-allow-unfreeze"); + unsigned long buid, addr; + + addr = rtas_config_addr(pdn->busno, pdn->devfn, 0); + buid = pdn->phb->buid; + spin_lock(_data_buf_lock); + memcpy(rtas_data_buf, vf_pe_array, RTAS_DATA_BUF_SIZE); + rc = rtas_call(ibm_allow_unfreeze, 5, 1, NULL, + addr, + BUID_HI(buid), + BUID_LO(buid), + rtas_data_buf, cur_vfs * sizeof(u16)); + spin_unlock(_data_buf_lock); + if (rc) + pr_warn("%s: Failed to allow unfreeze for PHB#%x-PE#%lx, rc=%x\n", + __func__, + pdn->phb->global_number, addr, rc); + return rc; +} + +static int pseries_call_allow_unfreeze(struct eeh_dev *edev) +{ + struct pci_dn *pdn, *tmp, *parent, *physfn_pdn; + int cur_vfs = 0, rc = 0, vf_index, bus, devfn; + u16 *vf_pe_array; + + vf_pe_array = kzalloc(RTAS_DATA_BUF_SIZE, GFP_KERNEL); + if (!vf_pe_array) + return -ENOMEM; + if (pci_num_vf(edev->physfn ? edev->physfn : edev->pdev)) { + if (edev->pdev->is_physfn) { + cur_vfs = pci_num_vf(edev->pdev); + pdn = eeh_dev_to_pdn(edev); + parent = pdn->parent; + for (vf_index = 0; vf_index < cur_vfs; vf_index++) + vf_pe_array[vf_index] = + cpu_to_be16(pdn->pe_num_map[vf_index]); + rc = pseries_send_allow_unfreeze(pdn, vf_pe_array, +cur_vfs); + pdn->last_allow_rc = rc; + for (vf_index = 0; vf_index < cur_vfs; vf_index++) { + list_for_each_entry_safe(pdn, tmp, +>child_list, +list) { + bus = pci_iov_virtfn_bus(edev->pdev, +vf_index); + devfn = pci_iov_virtfn_devfn(edev->pdev, +vf_index); + if (pdn->busno != bus || + pdn->devfn != devfn) + continue; + pdn->last_allow_rc = rc; + } +
[PATCH 2/3] powerpc/mm: Allow more than 16 low slices
While the implementation of the "slices" address space allows a significant amount of high slices, it limits the number of low slices to 16 due to the use of a single u64 low_slices element in struct slice_mask. In order to override this limitation, this patch switches the handling of low_slices to BITMAPs as done already for high_slices. Signed-off-by: Christophe Leroy--- arch/powerpc/include/asm/book3s/64/mmu.h | 2 +- arch/powerpc/include/asm/mmu-8xx.h | 2 +- arch/powerpc/include/asm/paca.h | 2 +- arch/powerpc/kernel/paca.c | 3 +- arch/powerpc/mm/hash_utils_64.c | 13 ++-- arch/powerpc/mm/slb_low.S| 8 ++- arch/powerpc/mm/slice.c | 102 +-- 7 files changed, 73 insertions(+), 59 deletions(-) diff --git a/arch/powerpc/include/asm/book3s/64/mmu.h b/arch/powerpc/include/asm/book3s/64/mmu.h index c9448e19847a..27e7e9732ea1 100644 --- a/arch/powerpc/include/asm/book3s/64/mmu.h +++ b/arch/powerpc/include/asm/book3s/64/mmu.h @@ -91,7 +91,7 @@ typedef struct { struct npu_context *npu_context; #ifdef CONFIG_PPC_MM_SLICES - u64 low_slices_psize; /* SLB page size encodings */ + unsigned char low_slices_psize[8]; /* SLB page size encodings */ unsigned char high_slices_psize[SLICE_ARRAY_SIZE]; unsigned long slb_addr_limit; #else diff --git a/arch/powerpc/include/asm/mmu-8xx.h b/arch/powerpc/include/asm/mmu-8xx.h index 5f89b6010453..d669d0062da4 100644 --- a/arch/powerpc/include/asm/mmu-8xx.h +++ b/arch/powerpc/include/asm/mmu-8xx.h @@ -171,7 +171,7 @@ typedef struct { unsigned long vdso_base; #ifdef CONFIG_PPC_MM_SLICES u16 user_psize; /* page size index */ - u64 low_slices_psize; /* page size encodings */ + unsigned char low_slices_psize[8]; /* 16 slices */ unsigned char high_slices_psize[0]; unsigned long slb_addr_limit; #endif diff --git a/arch/powerpc/include/asm/paca.h b/arch/powerpc/include/asm/paca.h index 3892db93b837..612017054825 100644 --- a/arch/powerpc/include/asm/paca.h +++ b/arch/powerpc/include/asm/paca.h @@ -141,7 +141,7 @@ struct paca_struct { #ifdef CONFIG_PPC_BOOK3S mm_context_id_t mm_ctx_id; #ifdef CONFIG_PPC_MM_SLICES - u64 mm_ctx_low_slices_psize; + unsigned char mm_ctx_low_slices_psize[8]; unsigned char mm_ctx_high_slices_psize[SLICE_ARRAY_SIZE]; unsigned long mm_ctx_slb_addr_limit; #else diff --git a/arch/powerpc/kernel/paca.c b/arch/powerpc/kernel/paca.c index d6597038931d..8e1566bf82b8 100644 --- a/arch/powerpc/kernel/paca.c +++ b/arch/powerpc/kernel/paca.c @@ -264,7 +264,8 @@ void copy_mm_to_paca(struct mm_struct *mm) #ifdef CONFIG_PPC_MM_SLICES VM_BUG_ON(!mm->context.slb_addr_limit); get_paca()->mm_ctx_slb_addr_limit = mm->context.slb_addr_limit; - get_paca()->mm_ctx_low_slices_psize = context->low_slices_psize; + memcpy(_paca()->mm_ctx_low_slices_psize, + >low_slices_psize, sizeof(context->low_slices_psize)); memcpy(_paca()->mm_ctx_high_slices_psize, >high_slices_psize, TASK_SLICE_ARRAY_SZ(mm)); #else /* CONFIG_PPC_MM_SLICES */ diff --git a/arch/powerpc/mm/hash_utils_64.c b/arch/powerpc/mm/hash_utils_64.c index 3266b3326088..2f0c6b527a83 100644 --- a/arch/powerpc/mm/hash_utils_64.c +++ b/arch/powerpc/mm/hash_utils_64.c @@ -1097,19 +1097,18 @@ unsigned int hash_page_do_lazy_icache(unsigned int pp, pte_t pte, int trap) #ifdef CONFIG_PPC_MM_SLICES static unsigned int get_paca_psize(unsigned long addr) { - u64 lpsizes; - unsigned char *hpsizes; + unsigned char *psizes; unsigned long index, mask_index; if (addr <= SLICE_LOW_TOP) { - lpsizes = get_paca()->mm_ctx_low_slices_psize; + psizes = get_paca()->mm_ctx_low_slices_psize; index = GET_LOW_SLICE_INDEX(addr); - return (lpsizes >> (index * 4)) & 0xF; + } else { + psizes = get_paca()->mm_ctx_high_slices_psize; + index = GET_HIGH_SLICE_INDEX(addr); } - hpsizes = get_paca()->mm_ctx_high_slices_psize; - index = GET_HIGH_SLICE_INDEX(addr); mask_index = index & 0x1; - return (hpsizes[index >> 1] >> (mask_index * 4)) & 0xF; + return (psizes[index >> 1] >> (mask_index * 4)) & 0xF; } #else diff --git a/arch/powerpc/mm/slb_low.S b/arch/powerpc/mm/slb_low.S index 2cf5ef3fc50d..2c7c717fd2ea 100644 --- a/arch/powerpc/mm/slb_low.S +++ b/arch/powerpc/mm/slb_low.S @@ -200,10 +200,12 @@ END_MMU_FTR_SECTION_IFCLR(MMU_FTR_1T_SEGMENT) 5: /* * Handle lpsizes -* r9 is get_paca()->context.low_slices_psize, r11 is index +* r9 is get_paca()->context.low_slices_psize[index], r11 is mask_index */ - ld r9,PACALOWSLICESPSIZE(r13) - mr r11,r10 + srdir11,r10,1 /* index */ +
[PATCH v4 2/7] linux/pci: Add uevents in AER and EEH error/resume
Devices can go offline when erors reported. This patch adds a change to the kernel object and lets udev know of error. When device resumes, a change is also set reporting device as online. Therefore, EEH and AER events are better propagated to user space for PCI devices in all arches. Signed-off-by: Bryant G. LySigned-off-by: Juan J. Alvarez Acked-by: Bjorn Helgaas --- arch/powerpc/kernel/eeh_driver.c | 6 ++ drivers/pci/pcie/aer/aerdrv_core.c | 3 +++ include/linux/pci.h| 36 3 files changed, 45 insertions(+) diff --git a/arch/powerpc/kernel/eeh_driver.c b/arch/powerpc/kernel/eeh_driver.c index 3c0fa99c5533..beea2182d754 100644 --- a/arch/powerpc/kernel/eeh_driver.c +++ b/arch/powerpc/kernel/eeh_driver.c @@ -228,6 +228,7 @@ static void *eeh_report_error(void *data, void *userdata) edev->in_error = true; eeh_pcid_put(dev); + pci_uevent_ers(dev, PCI_ERS_RESULT_NONE); return NULL; } @@ -381,6 +382,10 @@ static void *eeh_report_resume(void *data, void *userdata) driver->err_handler->resume(dev); eeh_pcid_put(dev); + pci_uevent_ers(dev, PCI_ERS_RESULT_RECOVERED); +#ifdef CONFIG_PCI_IOV + eeh_ops->notify_resume(eeh_dev_to_pdn(edev)); +#endif return NULL; } @@ -416,6 +421,7 @@ static void *eeh_report_failure(void *data, void *userdata) driver->err_handler->error_detected(dev, pci_channel_io_perm_failure); eeh_pcid_put(dev); + pci_uevent_ers(dev, PCI_ERS_RESULT_DISCONNECT); return NULL; } diff --git a/drivers/pci/pcie/aer/aerdrv_core.c b/drivers/pci/pcie/aer/aerdrv_core.c index 744805232155..8d7448063fd1 100644 --- a/drivers/pci/pcie/aer/aerdrv_core.c +++ b/drivers/pci/pcie/aer/aerdrv_core.c @@ -278,6 +278,7 @@ static int report_error_detected(struct pci_dev *dev, void *data) } else { err_handler = dev->driver->err_handler; vote = err_handler->error_detected(dev, result_data->state); + pci_uevent_ers(dev, PCI_ERS_RESULT_NONE); } result_data->result = merge_result(result_data->result, vote); @@ -341,6 +342,7 @@ static int report_resume(struct pci_dev *dev, void *data) err_handler = dev->driver->err_handler; err_handler->resume(dev); + pci_uevent_ers(dev, PCI_ERS_RESULT_RECOVERED); out: device_unlock(>dev); return 0; @@ -541,6 +543,7 @@ static void do_recovery(struct pci_dev *dev, int severity) return; failed: + pci_uevent_ers(dev, PCI_ERS_RESULT_DISCONNECT); /* TODO: Should kernel panic here? */ dev_info(>dev, "AER: Device recovery failed\n"); } diff --git a/include/linux/pci.h b/include/linux/pci.h index e3e94467687a..405630441b74 100644 --- a/include/linux/pci.h +++ b/include/linux/pci.h @@ -2277,6 +2277,42 @@ static inline bool pci_is_thunderbolt_attached(struct pci_dev *pdev) return false; } +/** + * pci_uevent_ers - emit a uevent during recovery path of pci device + * @pdev: pci device to check + * @err_type: type of error event + * + */ +static inline void pci_uevent_ers(struct pci_dev *pdev, + enum pci_ers_result err_type) +{ + int idx = 0; + char *envp[3]; + + switch (err_type) { + case PCI_ERS_RESULT_NONE: + case PCI_ERS_RESULT_CAN_RECOVER: + envp[idx++] = "ERROR_EVENT=BEGIN_RECOVERY"; + envp[idx++] = "DEVICE_ONLINE=0"; + break; + case PCI_ERS_RESULT_RECOVERED: + envp[idx++] = "ERROR_EVENT=SUCCESSFUL_RECOVERY"; + envp[idx++] = "DEVICE_ONLINE=1"; + break; + case PCI_ERS_RESULT_DISCONNECT: + envp[idx++] = "ERROR_EVENT=FAILED_RECOVERY"; + envp[idx++] = "DEVICE_ONLINE=0"; + break; + default: + break; + } + + if (idx > 0) { + envp[idx++] = NULL; + kobject_uevent_env(>dev.kobj, KOBJ_CHANGE, envp); + } +} + /* provide the legacy pci_dma_* API */ #include -- 2.14.3 (Apple Git-98)
[PATCH v4 0/7] SR-IOV Enablement on PowerVM
This patch series will enable SR-IOV on PowerVM. A specific set of lids for PFW/PHYP is required. They are planned to release with 920 at the moment. For IBM internal testers let me know of a system you want to test on and we can put on the lids required or we can provide a system to run the tests. This patch depends on the three patches: 988fc3ba5653278a8c14d6ccf687371775930d2b dae7253f9f78a731755ca20c66b2d2c40b86baea 608c0d8804ef3ca4cda8ec6ad914e47deb283d7b v1 - Initial Patch v2 - Addressed Alexey and Russell's comments v3 - Unify the call of .error_detected() v4 - Fixed subject and change log per Bjorn's comments and fixed Alexey's comments Bryant G. Ly (7): platform/pseries: Update VF config space after EEH linux/pci: Add uevents in AER and EEH error/resume platforms/pseries: Set eeh_pe of EEH_PE_VF type powerpc/kernel Add EEH operations to notify resume powerpc/kernel: Add EEH notify resume sysfs pseries/pci: Associate PEs to VFs in configure SR-IOV pseries/setup: Add Initialization of VF Bars arch/powerpc/include/asm/eeh.h | 2 + arch/powerpc/include/asm/pci-bridge.h| 5 +- arch/powerpc/include/asm/pci.h | 2 + arch/powerpc/kernel/eeh.c| 59 ++ arch/powerpc/kernel/eeh_driver.c | 6 + arch/powerpc/kernel/eeh_sysfs.c | 45 arch/powerpc/kernel/pci_of_scan.c| 2 +- arch/powerpc/platforms/powernv/eeh-powernv.c | 68 +-- arch/powerpc/platforms/pseries/eeh_pseries.c | 137 +- arch/powerpc/platforms/pseries/pci.c | 150 +++- arch/powerpc/platforms/pseries/setup.c | 164 +++ drivers/pci/pcie/aer/aerdrv_core.c | 3 + include/linux/pci.h | 36 ++ 13 files changed, 611 insertions(+), 68 deletions(-) -- 2.14.3 (Apple Git-98)
[PATCH v4 7/7] pseries/setup: Add Initialization of VF Bars
When enabling SR-IOV in pseries platform, the VF bar properties for a PF are reported on the device node in the device tree. This patch adds the IOV Bar resources to Linux structures from the device tree for later use when configuring SR-IOV by PF driver. Signed-off-by: Bryant G. LySigned-off-by: Juan J. Alvarez --- arch/powerpc/include/asm/pci.h | 2 + arch/powerpc/kernel/pci_of_scan.c | 2 +- arch/powerpc/platforms/pseries/setup.c | 164 + 3 files changed, 167 insertions(+), 1 deletion(-) diff --git a/arch/powerpc/include/asm/pci.h b/arch/powerpc/include/asm/pci.h index 8dc32eacc97c..d82802ff5088 100644 --- a/arch/powerpc/include/asm/pci.h +++ b/arch/powerpc/include/asm/pci.h @@ -121,6 +121,8 @@ extern int remove_phb_dynamic(struct pci_controller *phb); extern struct pci_dev *of_create_pci_dev(struct device_node *node, struct pci_bus *bus, int devfn); +extern unsigned int pci_parse_of_flags(u32 addr0, int bridge); + extern void of_scan_pci_bridge(struct pci_dev *dev); extern void of_scan_bus(struct device_node *node, struct pci_bus *bus); diff --git a/arch/powerpc/kernel/pci_of_scan.c b/arch/powerpc/kernel/pci_of_scan.c index 0d790f8432d2..20ceec4a5f5e 100644 --- a/arch/powerpc/kernel/pci_of_scan.c +++ b/arch/powerpc/kernel/pci_of_scan.c @@ -38,7 +38,7 @@ static u32 get_int_prop(struct device_node *np, const char *name, u32 def) * @addr0: value of 1st cell of a device tree PCI address. * @bridge: Set this flag if the address is from a bridge 'ranges' property */ -static unsigned int pci_parse_of_flags(u32 addr0, int bridge) +unsigned int pci_parse_of_flags(u32 addr0, int bridge) { unsigned int flags = 0; diff --git a/arch/powerpc/platforms/pseries/setup.c b/arch/powerpc/platforms/pseries/setup.c index 1d6e2de2445c..e8f523cb5526 100644 --- a/arch/powerpc/platforms/pseries/setup.c +++ b/arch/powerpc/platforms/pseries/setup.c @@ -459,6 +459,162 @@ static void __init find_and_init_phbs(void) of_pci_check_probe_only(); } +#ifdef CONFIG_PCI_IOV +enum rtas_iov_fw_value_map { + NUM_RES_PROPERTY = 0, /* Number of Resources */ + LOW_INT = 1, /* Lowest 32 bits of Address */ + START_OF_ENTRIES = 2, /* Always start of entry */ + APERTURE_PROPERTY = 2, /* Start of entry+ to Aperture Size */ + WDW_SIZE_PROPERTY = 4, /* Start of entry+ to Window Size */ + NEXT_ENTRY= 7 /* Go to next entry on array */ +}; + +enum get_iov_fw_value_index { + BAR_ADDRS = 1,/* Get Bar Address */ + APERTURE_SIZE = 2,/* Get Aperture Size */ + WDW_SIZE = 3 /* Get Window Size */ +}; + +resource_size_t pseries_get_iov_fw_value(struct pci_dev *dev, int resno, +enum get_iov_fw_value_index value) +{ + const int *indexes; + struct device_node *dn = pci_device_to_OF_node(dev); + int i, num_res, ret = 0; + + indexes = of_get_property(dn, "ibm,open-sriov-vf-bar-info", NULL); + if (!indexes) + return 0; + + /* +* First element in the array is the number of Bars +* returned. Search through the list to find the matching +* bar +*/ + num_res = of_read_number([NUM_RES_PROPERTY], 1); + if (resno >= num_res) + return 0; /* or an errror */ + + i = START_OF_ENTRIES + NEXT_ENTRY * resno; + switch (value) { + case BAR_ADDRS: + ret = of_read_number([i], 2); + break; + case APERTURE_SIZE: + ret = of_read_number([i + APERTURE_PROPERTY], 2); + break; + case WDW_SIZE: + ret = of_read_number([i + WDW_SIZE_PROPERTY], 2); + break; + } + + return ret; +} + +void of_pci_set_vf_bar_size(struct pci_dev *dev, const int *indexes) +{ + struct resource *res; + resource_size_t base, size; + int i, r, num_res; + + num_res = of_read_number([NUM_RES_PROPERTY], 1); + num_res = min_t(int, num_res, PCI_SRIOV_NUM_BARS); + for (i = START_OF_ENTRIES, r = 0; r < num_res && r < PCI_SRIOV_NUM_BARS; +i += NEXT_ENTRY, r++) { + res = >resource[r + PCI_IOV_RESOURCES]; + base = of_read_number([i], 2); + size = of_read_number([i + APERTURE_PROPERTY], 2); + res->flags = pci_parse_of_flags(of_read_number + ([i + LOW_INT], 1), 0); + res->flags |= (IORESOURCE_MEM_64 | IORESOURCE_PCI_FIXED); + res->name = pci_name(dev); + res->start = base; + res->end = base + size - 1; + } +} + +void of_pci_parse_iov_addrs(struct pci_dev *dev, const int *indexes) +{ + struct resource *res, *root, *conflict; + resource_size_t base, size; +
[PATCH v4 5/7] powerpc/kernel: Add EEH notify resume sysfs
Introduce a method for notify resume to be called from sysfs. In this patch one can now call notify resume from sysfs when is supported by platform. Signed-off-by: Bryant G. LySigned-off-by: Juan J. Alvarez --- arch/powerpc/kernel/eeh_sysfs.c | 45 + 1 file changed, 45 insertions(+) diff --git a/arch/powerpc/kernel/eeh_sysfs.c b/arch/powerpc/kernel/eeh_sysfs.c index 797549289798..9c513abc102c 100644 --- a/arch/powerpc/kernel/eeh_sysfs.c +++ b/arch/powerpc/kernel/eeh_sysfs.c @@ -90,6 +90,38 @@ static ssize_t eeh_pe_state_store(struct device *dev, static DEVICE_ATTR_RW(eeh_pe_state); +#ifdef CONFIG_PCI_IOV +static ssize_t eeh_notify_resume_show(struct device *dev, + struct device_attribute *attr, char *buf) +{ + struct pci_dev *pdev = to_pci_dev(dev); + struct eeh_dev *edev = pci_dev_to_eeh_dev(pdev); + struct pci_dn *pdn = pci_get_pdn(pdev); + + if (!edev || !edev->pe) + return -ENODEV; + + pdn = pci_get_pdn(pdev); + return sprintf(buf, "%d\n", pdn->last_allow_rc); +} + +static ssize_t eeh_notify_resume_store(struct device *dev, + struct device_attribute *attr, + const char *buf, size_t count) +{ + struct pci_dev *pdev = to_pci_dev(dev); + struct eeh_dev *edev = pci_dev_to_eeh_dev(pdev); + + if (!edev || !edev->pe) + return -ENODEV; + + if (eeh_ops->notify_resume(pci_get_pdn(pdev))) + return -EIO; + return count; +} +static DEVICE_ATTR_RW(eeh_notify_resume); +#endif + void eeh_sysfs_add_device(struct pci_dev *pdev) { struct eeh_dev *edev = pci_dev_to_eeh_dev(pdev); @@ -105,6 +137,13 @@ void eeh_sysfs_add_device(struct pci_dev *pdev) rc += device_create_file(>dev, _attr_eeh_pe_config_addr); rc += device_create_file(>dev, _attr_eeh_pe_state); +#ifdef CONFIG_PCI_IOV + if (of_get_property(pci_device_to_OF_node + ((pdev->is_physfn ? pdev : pdev->physfn)), + "ibm,is-open-sriov-pf", NULL)) + rc += device_create_file(>dev, +_attr_eeh_notify_resume); +#endif if (rc) pr_warn("EEH: Unable to create sysfs entries\n"); else if (edev) @@ -128,6 +167,12 @@ void eeh_sysfs_remove_device(struct pci_dev *pdev) device_remove_file(>dev, _attr_eeh_mode); device_remove_file(>dev, _attr_eeh_pe_config_addr); device_remove_file(>dev, _attr_eeh_pe_state); +#ifdef CONFIG_PCI_IOV + if (of_get_property(pci_device_to_OF_node + ((pdev->is_physfn ? pdev : pdev->physfn)), + "ibm,is-open-sriov-pf", NULL)) + device_remove_file(>dev, _attr_eeh_notify_resume); +#endif if (edev) edev->mode &= ~EEH_DEV_SYSFS; -- 2.14.3 (Apple Git-98)
Re: [RFC FIX v1 1/2] powerpc: Discover radix availability before scanning the memory nodes
On 01/05/2018 04:35 PM, Bharata B Rao wrote: Currently device tree nodes for memory are scanned before the radix feature is discovered in mmu_early_init_devtree(). Move this routine ahead of scanning memory nodes so that we know if the guest is radix or not when scanning ibm,dynamic-reconfiguration-memory. Signed-off-by: Bharata B Rao--- arch/powerpc/kernel/prom.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/arch/powerpc/kernel/prom.c b/arch/powerpc/kernel/prom.c index b15bae2..079d893 100644 --- a/arch/powerpc/kernel/prom.c +++ b/arch/powerpc/kernel/prom.c @@ -722,6 +722,8 @@ void __init early_init_devtree(void *params) */ of_scan_flat_dt(early_init_dt_scan_chosen_ppc, boot_command_line); + mmu_early_init_devtree(); + /* Scan memory nodes and rebuild MEMBLOCKs */ of_scan_flat_dt(early_init_dt_scan_root, NULL); of_scan_flat_dt(early_init_dt_scan_memory_ppc, NULL); @@ -783,8 +785,6 @@ void __init early_init_devtree(void *params) spinning_secondaries = boot_cpu_count - 1; #endif - mmu_early_init_devtree(); - #ifdef CONFIG_PPC_POWERNV /* Scan and build the list of machine check recoverable ranges */ of_scan_flat_dt(early_init_dt_scan_recoverable_ranges, NULL); I guess that will cause issues with pa-feature scanning. I don't think we finalize cpu features/mmu features that early. -aneesh
[RFC FIX v1 2/2] powerpc: Fix memory unplug failure on radix guest
For a PowerKVM guest, it is possible to explicitly specify a DIMM device in addition to the system RAM at boot time. When such a cold plugged DIMM device is removed from a radix guest, we hit the following warning in the guest kernel resulting in the eventual failure of memory unplug: remove_pud_table: unaligned range WARNING: CPU: 3 PID: 164 at arch/powerpc/mm/pgtable-radix.c:597 remove_pagetable+0x468/0xca0 Call Trace: remove_pagetable+0x464/0xca0 (unreliable) radix__remove_section_mapping+0x24/0x40 remove_section_mapping+0x28/0x60 arch_remove_memory+0xcc/0x120 remove_memory+0x1ac/0x270 dlpar_remove_lmb+0x1ac/0x210 dlpar_memory+0xbc4/0xeb0 pseries_hp_work_fn+0x1a4/0x230 process_one_work+0x1cc/0x660 worker_thread+0xac/0x6d0 kthread+0x16c/0x1b0 ret_from_kernel_thread+0x5c/0x74 The DIMM memory that is cold plugged gets merged to the same memblock region as RAM and hence gets mapped at 1G alignment. However since the removal is done for one LMB (lmb size 256MB) at a time, the address of the LMB (which is 256MB aligned) would get flagged as unaligned in remove_pud_table() resulting in the above failure. This problem is not seen for hot plugged memory because for the hot plugged memory, the mappings are created separately for each LMB and hence they all get aligned at 256MB. To fix this problem for the cold plugged memory, let us mark the cold plugged memblock region explicitly as hotplugged so that the region doesn't get merged with RAM. All the memory that is discovered via ibm,dynamic-reconfiguration-memory is marked so(1). Next identify such regions in radix_init_pgtable() and create separate mappings within that region for each LMB so that they get don't get aligned like RAM region at 1G (2). (1) The effect of marking the memory as hotplugged is that the marked memory falls into ZONE_MOVABLE if movable_node kernel command line option is enabled. This means no kernel allocations can occur from this memory. This should be reasonalble to expect for hotplugged memory but has an undesirable effect on PowerVM. On PowerVM, all the memory except RMA is represented via ibm,dynamic-reconfiguration-memory and hence we can't mark that entire memory as hotpluggable and movable. However since radix isn't supported on PowerVM, we make this marking conditional to radix so that PowerVM isn't affected. For PowerKVM guests, all boot time memory is represented via memory@ nodes and hot plugged/pluggable memory is represented via ibm,dynamic-reconfiguration-memory property. We are marking all the memory that is in ASSIGNED state during boot as hotplugged. With this only cold plugged memory gets marked for PowerKVM. (2) To create separate mappings for every LMB in the hot plugged region, we need lmb-size. I am currently using memory_block_size_bytes() API to get the lmb-size. Since this is early init time code, the machine type isn't probed yet and hence memory_block_size_bytes() would return the default LMB size as 16MB. Hence we end up creating separate mappings at much lower granularity than what we can ideally do for pseries machine. Signed-off-by: Bharata B Rao--- arch/powerpc/kernel/prom.c | 2 ++ arch/powerpc/mm/pgtable-radix.c | 17 ++--- 2 files changed, 16 insertions(+), 3 deletions(-) diff --git a/arch/powerpc/kernel/prom.c b/arch/powerpc/kernel/prom.c index 079d893..2ad8fb1 100644 --- a/arch/powerpc/kernel/prom.c +++ b/arch/powerpc/kernel/prom.c @@ -525,6 +525,8 @@ static int __init early_init_dt_scan_drconf_memory(unsigned long node) size = 0x8000ul - base; } memblock_add(base, size); + if (early_radix_enabled()) + memblock_mark_hotplug(base, size); } while (--rngs); } memblock_dump_all(); diff --git a/arch/powerpc/mm/pgtable-radix.c b/arch/powerpc/mm/pgtable-radix.c index cfbbee9..10ceced 100644 --- a/arch/powerpc/mm/pgtable-radix.c +++ b/arch/powerpc/mm/pgtable-radix.c @@ -17,6 +17,7 @@ #include #include #include +#include #include #include @@ -278,15 +279,25 @@ static void __init radix_init_pgtable(void) { unsigned long rts_field; struct memblock_region *reg; + phys_addr_t addr; + u64 lmb_size = memory_block_size_bytes(); /* We don't support slb for radix */ mmu_slb_size = 0; /* * Create the linear mapping, using standard page size for now */ - for_each_memblock(memory, reg) - WARN_ON(create_physical_mapping(reg->base, - reg->base + reg->size)); + for_each_memblock(memory, reg) { + if (memblock_is_hotpluggable(reg)) { + for (addr = reg->base; addr < (reg->base + reg->size); + addr += lmb_size) +
[RFC FIX v1 1/2] powerpc: Discover radix availability before scanning the memory nodes
Currently device tree nodes for memory are scanned before the radix feature is discovered in mmu_early_init_devtree(). Move this routine ahead of scanning memory nodes so that we know if the guest is radix or not when scanning ibm,dynamic-reconfiguration-memory. Signed-off-by: Bharata B Rao--- arch/powerpc/kernel/prom.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/arch/powerpc/kernel/prom.c b/arch/powerpc/kernel/prom.c index b15bae2..079d893 100644 --- a/arch/powerpc/kernel/prom.c +++ b/arch/powerpc/kernel/prom.c @@ -722,6 +722,8 @@ void __init early_init_devtree(void *params) */ of_scan_flat_dt(early_init_dt_scan_chosen_ppc, boot_command_line); + mmu_early_init_devtree(); + /* Scan memory nodes and rebuild MEMBLOCKs */ of_scan_flat_dt(early_init_dt_scan_root, NULL); of_scan_flat_dt(early_init_dt_scan_memory_ppc, NULL); @@ -783,8 +785,6 @@ void __init early_init_devtree(void *params) spinning_secondaries = boot_cpu_count - 1; #endif - mmu_early_init_devtree(); - #ifdef CONFIG_PPC_POWERNV /* Scan and build the list of machine check recoverable ranges */ of_scan_flat_dt(early_init_dt_scan_recoverable_ranges, NULL); -- 2.7.4
[RFC FIX v1 0/2] powerpc: Fix memory unplug failure for radix guests
This is an RFC patchset to fix the call trace observed during memory unplug for radix guests. The problem and the fix is described in patch 2/2. The ideal fix for this is to break the bigger radix mapping into smaller mappings during memory hot removal, but in the meanwhile I am just posting an alternate and easier solution which of course, has its own limitations (mentioned in 2/2) Changes in v1: -- - Move mmu-early-init-devtree() ahead of scanning memory DT nodes so that we know if the guest is radix or not when scanning memory nodes. - Make the marking of hotpluggable only if the guest is radix. v0: http://patchwork.ozlabs.org/patch/800142/ Bharata B Rao (2): powerpc: Discover radix availability before scanning the memory nodes powerpc: Fix memory unplug failure on radix guest arch/powerpc/kernel/prom.c | 6 -- arch/powerpc/mm/pgtable-radix.c | 17 ++--- 2 files changed, 18 insertions(+), 5 deletions(-) -- 2.7.4