Re: powerpc: use mm zones more sensibly

2018-12-22 Thread Michael Ellerman
On Sun, 2018-12-16 at 16:53:49 UTC, Christoph Hellwig wrote:
> Powerpc has somewhat odd usage where ZONE_DMA is used for all memory on
> common 64-bit configfs, and ZONE_DMA32 is used for 31-bit schemes.
> 
> Move to a scheme closer to what other architectures use (and I dare to
> say the intent of the system):
> 
>  - ZONE_DMA: optionally for memory < 31-bit (64-bit embedded only)
>  - ZONE_NORMAL: everything addressable by the kernel
>  - ZONE_HIGHMEM: memory > 32-bit for 32-bit kernels
> 
> Also provide information on how ZONE_DMA is used by defining
> ARCH_ZONE_DMA_BITS.
> 
> Contains various fixes from Benjamin Herrenschmidt.
> 
> Signed-off-by: Christoph Hellwig 

Applied to powerpc next, thanks.

https://git.kernel.org/powerpc/c/25078dc1f74be16b858e914f52cc8f

cheers


Re: [PATCH 01/33] powerpc: use mm zones more sensibly

2018-12-21 Thread Benjamin Herrenschmidt
On Tue, 2018-10-09 at 15:24 +0200, Christoph Hellwig wrote:
>   * Find the least restrictive zone that is entirely below the
> @@ -324,11 +305,14 @@ void __init paging_init(void)
> printk(KERN_DEBUG "Memory hole size: %ldMB\n",
>(long int)((top_of_ram - total_ram) >> 20));
>  
> +#ifdef CONFIG_ZONE_DMA
> +   max_zone_pfns[ZONE_DMA] = min(max_low_pfn, 0x7fffUL >> 
> PAGE_SHIFT);
> +#endif
> +   max_zone_pfns[ZONE_NORMAL] = max_low_pfn;
>  #ifdef CONFIG_HIGHMEM
> -   limit_zone_pfn(ZONE_NORMAL, lowmem_end_addr >> PAGE_SHIFT);
> +   max_zone_pfns[ZONE_HIGHMEM] = max_pfn
   ^
Missing a  ";" here  --|

Sorry ... works with that fix on an old laptop with highmem.

>  #endif
> -   limit_zone_pfn(TOP_ZONE, top_of_ram >> PAGE_SHIFT);
> -   zone_limits_final = true;
> +
> free_area_init_nodes(max_zone_pfns);
>  



[PATCH] powerpc: use mm zones more sensibly

2018-12-16 Thread Christoph Hellwig
Powerpc has somewhat odd usage where ZONE_DMA is used for all memory on
common 64-bit configfs, and ZONE_DMA32 is used for 31-bit schemes.

Move to a scheme closer to what other architectures use (and I dare to
say the intent of the system):

 - ZONE_DMA: optionally for memory < 31-bit (64-bit embedded only)
 - ZONE_NORMAL: everything addressable by the kernel
 - ZONE_HIGHMEM: memory > 32-bit for 32-bit kernels

Also provide information on how ZONE_DMA is used by defining
ARCH_ZONE_DMA_BITS.

Contains various fixes from Benjamin Herrenschmidt.

Signed-off-by: Christoph Hellwig 
---
 arch/powerpc/Kconfig  |  8 +---
 arch/powerpc/include/asm/page.h   |  2 +
 arch/powerpc/include/asm/pgtable.h|  1 -
 arch/powerpc/kernel/dma-swiotlb.c |  6 +--
 arch/powerpc/kernel/dma.c |  7 +--
 arch/powerpc/mm/mem.c | 47 +++
 arch/powerpc/platforms/85xx/corenet_generic.c | 10 
 arch/powerpc/platforms/85xx/qemu_e500.c   |  9 
 include/linux/mmzone.h|  2 +-
 9 files changed, 25 insertions(+), 67 deletions(-)

diff --git a/arch/powerpc/Kconfig b/arch/powerpc/Kconfig
index 4bc8edd83cee..964e22e3b8d7 100644
--- a/arch/powerpc/Kconfig
+++ b/arch/powerpc/Kconfig
@@ -373,9 +373,9 @@ config PPC_ADV_DEBUG_DAC_RANGE
depends on PPC_ADV_DEBUG_REGS && 44x
default y
 
-config ZONE_DMA32
+config ZONE_DMA
bool
-   default y if PPC64
+   default y if PPC_BOOK3E_64
 
 config PGTABLE_LEVELS
int
@@ -868,10 +868,6 @@ config ISA
  have an IBM RS/6000 or pSeries machine, say Y.  If you have an
  embedded board, consult your board documentation.
 
-config ZONE_DMA
-   bool
-   default y
-
 config GENERIC_ISA_DMA
bool
depends on ISA_DMA_API
diff --git a/arch/powerpc/include/asm/page.h b/arch/powerpc/include/asm/page.h
index f6a1265face2..fc8c9ac0c6be 100644
--- a/arch/powerpc/include/asm/page.h
+++ b/arch/powerpc/include/asm/page.h
@@ -354,4 +354,6 @@ typedef struct page *pgtable_t;
 #endif /* __ASSEMBLY__ */
 #include 
 
+#define ARCH_ZONE_DMA_BITS 31
+
 #endif /* _ASM_POWERPC_PAGE_H */
diff --git a/arch/powerpc/include/asm/pgtable.h 
b/arch/powerpc/include/asm/pgtable.h
index 9679b7519a35..8af32ce93c7f 100644
--- a/arch/powerpc/include/asm/pgtable.h
+++ b/arch/powerpc/include/asm/pgtable.h
@@ -66,7 +66,6 @@ extern unsigned long empty_zero_page[];
 
 extern pgd_t swapper_pg_dir[];
 
-void limit_zone_pfn(enum zone_type zone, unsigned long max_pfn);
 int dma_pfn_limit_to_zone(u64 pfn_limit);
 extern void paging_init(void);
 
diff --git a/arch/powerpc/kernel/dma-swiotlb.c 
b/arch/powerpc/kernel/dma-swiotlb.c
index 430a7d0aa2cb..7d5fc9751622 100644
--- a/arch/powerpc/kernel/dma-swiotlb.c
+++ b/arch/powerpc/kernel/dma-swiotlb.c
@@ -107,12 +107,8 @@ int __init swiotlb_setup_bus_notifier(void)
 
 void __init swiotlb_detect_4g(void)
 {
-   if ((memblock_end_of_DRAM() - 1) > 0x) {
+   if ((memblock_end_of_DRAM() - 1) > 0x)
ppc_swiotlb_enable = 1;
-#ifdef CONFIG_ZONE_DMA32
-   limit_zone_pfn(ZONE_DMA32, (1ULL << 32) >> PAGE_SHIFT);
-#endif
-   }
 }
 
 static int __init check_swiotlb_enabled(void)
diff --git a/arch/powerpc/kernel/dma.c b/arch/powerpc/kernel/dma.c
index d442d23e182b..f55bc60274b9 100644
--- a/arch/powerpc/kernel/dma.c
+++ b/arch/powerpc/kernel/dma.c
@@ -50,7 +50,7 @@ static int dma_nommu_dma_supported(struct device *dev, u64 
mask)
return 1;
 
 #ifdef CONFIG_FSL_SOC
-   /* Freescale gets another chance via ZONE_DMA/ZONE_DMA32, however
+   /* Freescale gets another chance via ZONE_DMA, however
 * that will have to be refined if/when they support iommus
 */
return 1;
@@ -94,13 +94,10 @@ void *__dma_nommu_alloc_coherent(struct device *dev, size_t 
size,
}
 
switch (zone) {
+#ifdef CONFIG_ZONE_DMA
case ZONE_DMA:
flag |= GFP_DMA;
break;
-#ifdef CONFIG_ZONE_DMA32
-   case ZONE_DMA32:
-   flag |= GFP_DMA32;
-   break;
 #endif
};
 #endif /* CONFIG_FSL_SOC */
diff --git a/arch/powerpc/mm/mem.c b/arch/powerpc/mm/mem.c
index 0a64fffabee1..c0b676c3a5ba 100644
--- a/arch/powerpc/mm/mem.c
+++ b/arch/powerpc/mm/mem.c
@@ -246,35 +246,19 @@ static int __init mark_nonram_nosave(void)
 }
 #endif
 
-static bool zone_limits_final;
-
 /*
- * The memory zones past TOP_ZONE are managed by generic mm code.
- * These should be set to zero since that's what every other
- * architecture does.
+ * Zones usage:
+ *
+ * We setup ZONE_DMA to be 31-bits on all platforms and ZONE_NORMAL to be
+ * everything else. GFP_DMA32 page allocations automatically fall back to
+ * ZONE_DMA.
+ *
+ * By using 31-bit unconditionally, we can exploit ARCH_ZONE_DMA_BITS to
+ * inform the generic DMA mapping code.  32-bit only devices (if not handled
+ * by an IOMMU anywa

Re: [PATCH 01/34] powerpc: use mm zones more sensibly

2018-12-07 Thread Christian Zigotzky
I will work at the weekend to figure out where the problematic commit is.

— Christian

Sent from my iPhone

> On 7. Dec 2018, at 15:09, Christoph Hellwig  wrote:
> 
>> On Fri, Dec 07, 2018 at 11:18:18PM +1100, Michael Ellerman wrote:
>> Christoph Hellwig  writes:
>> 
>>> Ben / Michael,
>>> 
>>> can we get this one queued up for 4.21 to prepare for the DMA work later
>>> on?
>> 
>> I was hoping the PASEMI / NXP regressions could be solved before
>> merging.
>> 
>> My p5020ds is booting fine with this series, so I'm not sure why it's
>> causing problems on Christian's machine.
>> 
>> The last time I turned on my PASEMI board it tripped some breakers, so I
>> need to investigate that before I can help test that.
>> 
>> I'll see how things look on Monday and either merge the commits you
>> identified or the whole series depending on if there's any more info
>> from Christian.
> 
> Christian just confirmed everything up to at least
> "powerpc/dma: stop overriding dma_get_required_mask" works for his
> setups.


Re: [PATCH 01/34] powerpc: use mm zones more sensibly

2018-12-07 Thread Christoph Hellwig
On Fri, Dec 07, 2018 at 11:18:18PM +1100, Michael Ellerman wrote:
> Christoph Hellwig  writes:
> 
> > Ben / Michael,
> >
> > can we get this one queued up for 4.21 to prepare for the DMA work later
> > on?
> 
> I was hoping the PASEMI / NXP regressions could be solved before
> merging.
> 
> My p5020ds is booting fine with this series, so I'm not sure why it's
> causing problems on Christian's machine.
> 
> The last time I turned on my PASEMI board it tripped some breakers, so I
> need to investigate that before I can help test that.
> 
> I'll see how things look on Monday and either merge the commits you
> identified or the whole series depending on if there's any more info
> from Christian.

Christian just confirmed everything up to at least
"powerpc/dma: stop overriding dma_get_required_mask" works for his
setups.


Re: [PATCH 01/34] powerpc: use mm zones more sensibly

2018-12-07 Thread Michael Ellerman
Christoph Hellwig  writes:

> Ben / Michael,
>
> can we get this one queued up for 4.21 to prepare for the DMA work later
> on?

I was hoping the PASEMI / NXP regressions could be solved before
merging.

My p5020ds is booting fine with this series, so I'm not sure why it's
causing problems on Christian's machine.

The last time I turned on my PASEMI board it tripped some breakers, so I
need to investigate that before I can help test that.

I'll see how things look on Monday and either merge the commits you
identified or the whole series depending on if there's any more info
from Christian.

cheers

> On Wed, Nov 14, 2018 at 09:22:41AM +0100, Christoph Hellwig wrote:
>> Powerpc has somewhat odd usage where ZONE_DMA is used for all memory on
>> common 64-bit configfs, and ZONE_DMA32 is used for 31-bit schemes.
>> 
>> Move to a scheme closer to what other architectures use (and I dare to
>> say the intent of the system):
>> 
>>  - ZONE_DMA: optionally for memory < 31-bit (64-bit embedded only)
>>  - ZONE_NORMAL: everything addressable by the kernel
>>  - ZONE_HIGHMEM: memory > 32-bit for 32-bit kernels
>> 
>> Also provide information on how ZONE_DMA is used by defining
>> ARCH_ZONE_DMA_BITS.
>> 
>> Contains various fixes from Benjamin Herrenschmidt.
>> 
>> Signed-off-by: Christoph Hellwig 
>> ---
>>  arch/powerpc/Kconfig  |  8 +---
>>  arch/powerpc/include/asm/page.h   |  2 +
>>  arch/powerpc/include/asm/pgtable.h|  1 -
>>  arch/powerpc/kernel/dma-swiotlb.c |  6 +--
>>  arch/powerpc/kernel/dma.c |  7 +--
>>  arch/powerpc/mm/mem.c | 47 +++
>>  arch/powerpc/platforms/85xx/corenet_generic.c | 10 
>>  arch/powerpc/platforms/85xx/qemu_e500.c   |  9 
>>  include/linux/mmzone.h|  2 +-
>>  9 files changed, 25 insertions(+), 67 deletions(-)
>> 
>> diff --git a/arch/powerpc/Kconfig b/arch/powerpc/Kconfig
>> index 8be31261aec8..c3613bc1 100644
>> --- a/arch/powerpc/Kconfig
>> +++ b/arch/powerpc/Kconfig
>> @@ -374,9 +374,9 @@ config PPC_ADV_DEBUG_DAC_RANGE
>>  depends on PPC_ADV_DEBUG_REGS && 44x
>>  default y
>>  
>> -config ZONE_DMA32
>> +config ZONE_DMA
>>  bool
>> -default y if PPC64
>> +default y if PPC_BOOK3E_64
>>  
>>  config PGTABLE_LEVELS
>>  int
>> @@ -869,10 +869,6 @@ config ISA
>>have an IBM RS/6000 or pSeries machine, say Y.  If you have an
>>embedded board, consult your board documentation.
>>  
>> -config ZONE_DMA
>> -bool
>> -default y
>> -
>>  config GENERIC_ISA_DMA
>>  bool
>>  depends on ISA_DMA_API
>> diff --git a/arch/powerpc/include/asm/page.h 
>> b/arch/powerpc/include/asm/page.h
>> index f6a1265face2..fc8c9ac0c6be 100644
>> --- a/arch/powerpc/include/asm/page.h
>> +++ b/arch/powerpc/include/asm/page.h
>> @@ -354,4 +354,6 @@ typedef struct page *pgtable_t;
>>  #endif /* __ASSEMBLY__ */
>>  #include 
>>  
>> +#define ARCH_ZONE_DMA_BITS 31
>> +
>>  #endif /* _ASM_POWERPC_PAGE_H */
>> diff --git a/arch/powerpc/include/asm/pgtable.h 
>> b/arch/powerpc/include/asm/pgtable.h
>> index 9679b7519a35..8af32ce93c7f 100644
>> --- a/arch/powerpc/include/asm/pgtable.h
>> +++ b/arch/powerpc/include/asm/pgtable.h
>> @@ -66,7 +66,6 @@ extern unsigned long empty_zero_page[];
>>  
>>  extern pgd_t swapper_pg_dir[];
>>  
>> -void limit_zone_pfn(enum zone_type zone, unsigned long max_pfn);
>>  int dma_pfn_limit_to_zone(u64 pfn_limit);
>>  extern void paging_init(void);
>>  
>> diff --git a/arch/powerpc/kernel/dma-swiotlb.c 
>> b/arch/powerpc/kernel/dma-swiotlb.c
>> index 5fc335f4d9cd..678811abccfc 100644
>> --- a/arch/powerpc/kernel/dma-swiotlb.c
>> +++ b/arch/powerpc/kernel/dma-swiotlb.c
>> @@ -108,12 +108,8 @@ int __init swiotlb_setup_bus_notifier(void)
>>  
>>  void __init swiotlb_detect_4g(void)
>>  {
>> -if ((memblock_end_of_DRAM() - 1) > 0x) {
>> +if ((memblock_end_of_DRAM() - 1) > 0x)
>>  ppc_swiotlb_enable = 1;
>> -#ifdef CONFIG_ZONE_DMA32
>> -limit_zone_pfn(ZONE_DMA32, (1ULL << 32) >> PAGE_SHIFT);
>> -#endif
>> -}
>>  }
>>  
>>  static int __init check_swiotlb_enabled(void)
>> diff --git a/arch/powerpc/kernel/dma.c b/arch/powerpc/kernel/dma.c
>> index dbfc7056d7df..6551685a4ed0 100644
>> --- a/arch/powerpc/kernel/dma.c
>> +++ b/arch/powerpc/kernel/dma.c
>> @@ -50,7 +50,7 @@ static int dma_nommu_dma_supported(struct device *dev, u64 
>> mask)
>>  return 1;
>>  
>>  #ifdef CONFIG_FSL_SOC
>> -/* Freescale gets another chance via ZONE_DMA/ZONE_DMA32, however
>> +/* Freescale gets another chance via ZONE_DMA, however
>>   * that will have to be refined if/when they support iommus
>>   */
>>  return 1;
>> @@ -94,13 +94,10 @@ void *__dma_nommu_alloc_coherent(struct device *dev, 
>> size_t size,
>>  }
>>  
>>  switch (zone) {
>> +#ifdef CONFIG_ZONE_DMA
>>  case ZONE_DMA:
>>  flag |= GFP_DMA;
>> 

Re: [PATCH 01/34] powerpc: use mm zones more sensibly

2018-12-06 Thread Christoph Hellwig
Ben / Michael,

can we get this one queued up for 4.21 to prepare for the DMA work later
on?

On Wed, Nov 14, 2018 at 09:22:41AM +0100, Christoph Hellwig wrote:
> Powerpc has somewhat odd usage where ZONE_DMA is used for all memory on
> common 64-bit configfs, and ZONE_DMA32 is used for 31-bit schemes.
> 
> Move to a scheme closer to what other architectures use (and I dare to
> say the intent of the system):
> 
>  - ZONE_DMA: optionally for memory < 31-bit (64-bit embedded only)
>  - ZONE_NORMAL: everything addressable by the kernel
>  - ZONE_HIGHMEM: memory > 32-bit for 32-bit kernels
> 
> Also provide information on how ZONE_DMA is used by defining
> ARCH_ZONE_DMA_BITS.
> 
> Contains various fixes from Benjamin Herrenschmidt.
> 
> Signed-off-by: Christoph Hellwig 
> ---
>  arch/powerpc/Kconfig  |  8 +---
>  arch/powerpc/include/asm/page.h   |  2 +
>  arch/powerpc/include/asm/pgtable.h|  1 -
>  arch/powerpc/kernel/dma-swiotlb.c |  6 +--
>  arch/powerpc/kernel/dma.c |  7 +--
>  arch/powerpc/mm/mem.c | 47 +++
>  arch/powerpc/platforms/85xx/corenet_generic.c | 10 
>  arch/powerpc/platforms/85xx/qemu_e500.c   |  9 
>  include/linux/mmzone.h|  2 +-
>  9 files changed, 25 insertions(+), 67 deletions(-)
> 
> diff --git a/arch/powerpc/Kconfig b/arch/powerpc/Kconfig
> index 8be31261aec8..c3613bc1 100644
> --- a/arch/powerpc/Kconfig
> +++ b/arch/powerpc/Kconfig
> @@ -374,9 +374,9 @@ config PPC_ADV_DEBUG_DAC_RANGE
>   depends on PPC_ADV_DEBUG_REGS && 44x
>   default y
>  
> -config ZONE_DMA32
> +config ZONE_DMA
>   bool
> - default y if PPC64
> + default y if PPC_BOOK3E_64
>  
>  config PGTABLE_LEVELS
>   int
> @@ -869,10 +869,6 @@ config ISA
> have an IBM RS/6000 or pSeries machine, say Y.  If you have an
> embedded board, consult your board documentation.
>  
> -config ZONE_DMA
> - bool
> - default y
> -
>  config GENERIC_ISA_DMA
>   bool
>   depends on ISA_DMA_API
> diff --git a/arch/powerpc/include/asm/page.h b/arch/powerpc/include/asm/page.h
> index f6a1265face2..fc8c9ac0c6be 100644
> --- a/arch/powerpc/include/asm/page.h
> +++ b/arch/powerpc/include/asm/page.h
> @@ -354,4 +354,6 @@ typedef struct page *pgtable_t;
>  #endif /* __ASSEMBLY__ */
>  #include 
>  
> +#define ARCH_ZONE_DMA_BITS 31
> +
>  #endif /* _ASM_POWERPC_PAGE_H */
> diff --git a/arch/powerpc/include/asm/pgtable.h 
> b/arch/powerpc/include/asm/pgtable.h
> index 9679b7519a35..8af32ce93c7f 100644
> --- a/arch/powerpc/include/asm/pgtable.h
> +++ b/arch/powerpc/include/asm/pgtable.h
> @@ -66,7 +66,6 @@ extern unsigned long empty_zero_page[];
>  
>  extern pgd_t swapper_pg_dir[];
>  
> -void limit_zone_pfn(enum zone_type zone, unsigned long max_pfn);
>  int dma_pfn_limit_to_zone(u64 pfn_limit);
>  extern void paging_init(void);
>  
> diff --git a/arch/powerpc/kernel/dma-swiotlb.c 
> b/arch/powerpc/kernel/dma-swiotlb.c
> index 5fc335f4d9cd..678811abccfc 100644
> --- a/arch/powerpc/kernel/dma-swiotlb.c
> +++ b/arch/powerpc/kernel/dma-swiotlb.c
> @@ -108,12 +108,8 @@ int __init swiotlb_setup_bus_notifier(void)
>  
>  void __init swiotlb_detect_4g(void)
>  {
> - if ((memblock_end_of_DRAM() - 1) > 0x) {
> + if ((memblock_end_of_DRAM() - 1) > 0x)
>   ppc_swiotlb_enable = 1;
> -#ifdef CONFIG_ZONE_DMA32
> - limit_zone_pfn(ZONE_DMA32, (1ULL << 32) >> PAGE_SHIFT);
> -#endif
> - }
>  }
>  
>  static int __init check_swiotlb_enabled(void)
> diff --git a/arch/powerpc/kernel/dma.c b/arch/powerpc/kernel/dma.c
> index dbfc7056d7df..6551685a4ed0 100644
> --- a/arch/powerpc/kernel/dma.c
> +++ b/arch/powerpc/kernel/dma.c
> @@ -50,7 +50,7 @@ static int dma_nommu_dma_supported(struct device *dev, u64 
> mask)
>   return 1;
>  
>  #ifdef CONFIG_FSL_SOC
> - /* Freescale gets another chance via ZONE_DMA/ZONE_DMA32, however
> + /* Freescale gets another chance via ZONE_DMA, however
>* that will have to be refined if/when they support iommus
>*/
>   return 1;
> @@ -94,13 +94,10 @@ void *__dma_nommu_alloc_coherent(struct device *dev, 
> size_t size,
>   }
>  
>   switch (zone) {
> +#ifdef CONFIG_ZONE_DMA
>   case ZONE_DMA:
>   flag |= GFP_DMA;
>   break;
> -#ifdef CONFIG_ZONE_DMA32
> - case ZONE_DMA32:
> - flag |= GFP_DMA32;
> - break;
>  #endif
>   };
>  #endif /* CONFIG_FSL_SOC */
> diff --git a/arch/powerpc/mm/mem.c b/arch/powerpc/mm/mem.c
> index 0a64fffabee1..c0b676c3a5ba 100644
> --- a/arch/powerpc/mm/mem.c
> +++ b/arch/powerpc/mm/mem.c
> @@ -246,35 +246,19 @@ static int __init mark_nonram_nosave(void)
>  }
>  #endif
>  
> -static bool zone_limits_final;
> -
>  /*
> - * The memory zones past TOP_ZONE are managed by generic mm code.
> - * These should be set to zero since that's what every other
> - * 

[PATCH 01/34] powerpc: use mm zones more sensibly

2018-11-14 Thread Christoph Hellwig
Powerpc has somewhat odd usage where ZONE_DMA is used for all memory on
common 64-bit configfs, and ZONE_DMA32 is used for 31-bit schemes.

Move to a scheme closer to what other architectures use (and I dare to
say the intent of the system):

 - ZONE_DMA: optionally for memory < 31-bit (64-bit embedded only)
 - ZONE_NORMAL: everything addressable by the kernel
 - ZONE_HIGHMEM: memory > 32-bit for 32-bit kernels

Also provide information on how ZONE_DMA is used by defining
ARCH_ZONE_DMA_BITS.

Contains various fixes from Benjamin Herrenschmidt.

Signed-off-by: Christoph Hellwig 
---
 arch/powerpc/Kconfig  |  8 +---
 arch/powerpc/include/asm/page.h   |  2 +
 arch/powerpc/include/asm/pgtable.h|  1 -
 arch/powerpc/kernel/dma-swiotlb.c |  6 +--
 arch/powerpc/kernel/dma.c |  7 +--
 arch/powerpc/mm/mem.c | 47 +++
 arch/powerpc/platforms/85xx/corenet_generic.c | 10 
 arch/powerpc/platforms/85xx/qemu_e500.c   |  9 
 include/linux/mmzone.h|  2 +-
 9 files changed, 25 insertions(+), 67 deletions(-)

diff --git a/arch/powerpc/Kconfig b/arch/powerpc/Kconfig
index 8be31261aec8..c3613bc1 100644
--- a/arch/powerpc/Kconfig
+++ b/arch/powerpc/Kconfig
@@ -374,9 +374,9 @@ config PPC_ADV_DEBUG_DAC_RANGE
depends on PPC_ADV_DEBUG_REGS && 44x
default y
 
-config ZONE_DMA32
+config ZONE_DMA
bool
-   default y if PPC64
+   default y if PPC_BOOK3E_64
 
 config PGTABLE_LEVELS
int
@@ -869,10 +869,6 @@ config ISA
  have an IBM RS/6000 or pSeries machine, say Y.  If you have an
  embedded board, consult your board documentation.
 
-config ZONE_DMA
-   bool
-   default y
-
 config GENERIC_ISA_DMA
bool
depends on ISA_DMA_API
diff --git a/arch/powerpc/include/asm/page.h b/arch/powerpc/include/asm/page.h
index f6a1265face2..fc8c9ac0c6be 100644
--- a/arch/powerpc/include/asm/page.h
+++ b/arch/powerpc/include/asm/page.h
@@ -354,4 +354,6 @@ typedef struct page *pgtable_t;
 #endif /* __ASSEMBLY__ */
 #include 
 
+#define ARCH_ZONE_DMA_BITS 31
+
 #endif /* _ASM_POWERPC_PAGE_H */
diff --git a/arch/powerpc/include/asm/pgtable.h 
b/arch/powerpc/include/asm/pgtable.h
index 9679b7519a35..8af32ce93c7f 100644
--- a/arch/powerpc/include/asm/pgtable.h
+++ b/arch/powerpc/include/asm/pgtable.h
@@ -66,7 +66,6 @@ extern unsigned long empty_zero_page[];
 
 extern pgd_t swapper_pg_dir[];
 
-void limit_zone_pfn(enum zone_type zone, unsigned long max_pfn);
 int dma_pfn_limit_to_zone(u64 pfn_limit);
 extern void paging_init(void);
 
diff --git a/arch/powerpc/kernel/dma-swiotlb.c 
b/arch/powerpc/kernel/dma-swiotlb.c
index 5fc335f4d9cd..678811abccfc 100644
--- a/arch/powerpc/kernel/dma-swiotlb.c
+++ b/arch/powerpc/kernel/dma-swiotlb.c
@@ -108,12 +108,8 @@ int __init swiotlb_setup_bus_notifier(void)
 
 void __init swiotlb_detect_4g(void)
 {
-   if ((memblock_end_of_DRAM() - 1) > 0x) {
+   if ((memblock_end_of_DRAM() - 1) > 0x)
ppc_swiotlb_enable = 1;
-#ifdef CONFIG_ZONE_DMA32
-   limit_zone_pfn(ZONE_DMA32, (1ULL << 32) >> PAGE_SHIFT);
-#endif
-   }
 }
 
 static int __init check_swiotlb_enabled(void)
diff --git a/arch/powerpc/kernel/dma.c b/arch/powerpc/kernel/dma.c
index dbfc7056d7df..6551685a4ed0 100644
--- a/arch/powerpc/kernel/dma.c
+++ b/arch/powerpc/kernel/dma.c
@@ -50,7 +50,7 @@ static int dma_nommu_dma_supported(struct device *dev, u64 
mask)
return 1;
 
 #ifdef CONFIG_FSL_SOC
-   /* Freescale gets another chance via ZONE_DMA/ZONE_DMA32, however
+   /* Freescale gets another chance via ZONE_DMA, however
 * that will have to be refined if/when they support iommus
 */
return 1;
@@ -94,13 +94,10 @@ void *__dma_nommu_alloc_coherent(struct device *dev, size_t 
size,
}
 
switch (zone) {
+#ifdef CONFIG_ZONE_DMA
case ZONE_DMA:
flag |= GFP_DMA;
break;
-#ifdef CONFIG_ZONE_DMA32
-   case ZONE_DMA32:
-   flag |= GFP_DMA32;
-   break;
 #endif
};
 #endif /* CONFIG_FSL_SOC */
diff --git a/arch/powerpc/mm/mem.c b/arch/powerpc/mm/mem.c
index 0a64fffabee1..c0b676c3a5ba 100644
--- a/arch/powerpc/mm/mem.c
+++ b/arch/powerpc/mm/mem.c
@@ -246,35 +246,19 @@ static int __init mark_nonram_nosave(void)
 }
 #endif
 
-static bool zone_limits_final;
-
 /*
- * The memory zones past TOP_ZONE are managed by generic mm code.
- * These should be set to zero since that's what every other
- * architecture does.
+ * Zones usage:
+ *
+ * We setup ZONE_DMA to be 31-bits on all platforms and ZONE_NORMAL to be
+ * everything else. GFP_DMA32 page allocations automatically fall back to
+ * ZONE_DMA.
+ *
+ * By using 31-bit unconditionally, we can exploit ARCH_ZONE_DMA_BITS to
+ * inform the generic DMA mapping code.  32-bit only devices (if not handled
+ * by an IOMMU anywa

[PATCH 01/33] powerpc: use mm zones more sensibly

2018-10-09 Thread Christoph Hellwig
Powerpc has somewhat odd usage where ZONE_DMA is used for all memory on
common 64-bit configfs, and ZONE_DMA32 is used for 31-bit schemes.

Move to a scheme closer to what other architectures use (and I dare to
say the intent of the system):

 - ZONE_DMA: optionally for memory < 31-bit
 - ZONE_NORMAL: everything addressable by the kernel
 - ZONE_HIGHMEM: memory > 32-bit for 32-bit kernels

Also provide information on how ZONE_DMA is used by defining
ARCH_ZONE_DMA_BITS.

Contains various fixes from Benjamin Herrenschmidt.

Signed-off-by: Christoph Hellwig 
---
 arch/powerpc/Kconfig  |  6 +--
 arch/powerpc/include/asm/page.h   |  2 +
 arch/powerpc/include/asm/pgtable.h|  1 -
 arch/powerpc/kernel/dma-swiotlb.c |  6 +--
 arch/powerpc/kernel/dma.c |  7 +--
 arch/powerpc/mm/mem.c | 50 +++
 arch/powerpc/platforms/85xx/corenet_generic.c | 10 
 arch/powerpc/platforms/85xx/qemu_e500.c   |  9 
 include/linux/mmzone.h|  2 +-
 9 files changed, 24 insertions(+), 69 deletions(-)

diff --git a/arch/powerpc/Kconfig b/arch/powerpc/Kconfig
index a80669209155..06996df07cad 100644
--- a/arch/powerpc/Kconfig
+++ b/arch/powerpc/Kconfig
@@ -380,7 +380,7 @@ config PPC_ADV_DEBUG_DAC_RANGE
depends on PPC_ADV_DEBUG_REGS && 44x
default y
 
-config ZONE_DMA32
+config ZONE_DMA
bool
default y if PPC64
 
@@ -879,10 +879,6 @@ config ISA
  have an IBM RS/6000 or pSeries machine, say Y.  If you have an
  embedded board, consult your board documentation.
 
-config ZONE_DMA
-   bool
-   default y
-
 config GENERIC_ISA_DMA
bool
depends on ISA_DMA_API
diff --git a/arch/powerpc/include/asm/page.h b/arch/powerpc/include/asm/page.h
index f6a1265face2..fc8c9ac0c6be 100644
--- a/arch/powerpc/include/asm/page.h
+++ b/arch/powerpc/include/asm/page.h
@@ -354,4 +354,6 @@ typedef struct page *pgtable_t;
 #endif /* __ASSEMBLY__ */
 #include 
 
+#define ARCH_ZONE_DMA_BITS 31
+
 #endif /* _ASM_POWERPC_PAGE_H */
diff --git a/arch/powerpc/include/asm/pgtable.h 
b/arch/powerpc/include/asm/pgtable.h
index 14c79a7dc855..9bafb38e959e 100644
--- a/arch/powerpc/include/asm/pgtable.h
+++ b/arch/powerpc/include/asm/pgtable.h
@@ -37,7 +37,6 @@ extern unsigned long empty_zero_page[];
 
 extern pgd_t swapper_pg_dir[];
 
-void limit_zone_pfn(enum zone_type zone, unsigned long max_pfn);
 int dma_pfn_limit_to_zone(u64 pfn_limit);
 extern void paging_init(void);
 
diff --git a/arch/powerpc/kernel/dma-swiotlb.c 
b/arch/powerpc/kernel/dma-swiotlb.c
index 88f3963ca30f..93a4622563c6 100644
--- a/arch/powerpc/kernel/dma-swiotlb.c
+++ b/arch/powerpc/kernel/dma-swiotlb.c
@@ -108,12 +108,8 @@ int __init swiotlb_setup_bus_notifier(void)
 
 void __init swiotlb_detect_4g(void)
 {
-   if ((memblock_end_of_DRAM() - 1) > 0x) {
+   if ((memblock_end_of_DRAM() - 1) > 0x)
ppc_swiotlb_enable = 1;
-#ifdef CONFIG_ZONE_DMA32
-   limit_zone_pfn(ZONE_DMA32, (1ULL << 32) >> PAGE_SHIFT);
-#endif
-   }
 }
 
 static int __init check_swiotlb_enabled(void)
diff --git a/arch/powerpc/kernel/dma.c b/arch/powerpc/kernel/dma.c
index dbfc7056d7df..6551685a4ed0 100644
--- a/arch/powerpc/kernel/dma.c
+++ b/arch/powerpc/kernel/dma.c
@@ -50,7 +50,7 @@ static int dma_nommu_dma_supported(struct device *dev, u64 
mask)
return 1;
 
 #ifdef CONFIG_FSL_SOC
-   /* Freescale gets another chance via ZONE_DMA/ZONE_DMA32, however
+   /* Freescale gets another chance via ZONE_DMA, however
 * that will have to be refined if/when they support iommus
 */
return 1;
@@ -94,13 +94,10 @@ void *__dma_nommu_alloc_coherent(struct device *dev, size_t 
size,
}
 
switch (zone) {
+#ifdef CONFIG_ZONE_DMA
case ZONE_DMA:
flag |= GFP_DMA;
break;
-#ifdef CONFIG_ZONE_DMA32
-   case ZONE_DMA32:
-   flag |= GFP_DMA32;
-   break;
 #endif
};
 #endif /* CONFIG_FSL_SOC */
diff --git a/arch/powerpc/mm/mem.c b/arch/powerpc/mm/mem.c
index 5c8530d0c611..8bff7e893bde 100644
--- a/arch/powerpc/mm/mem.c
+++ b/arch/powerpc/mm/mem.c
@@ -69,15 +69,12 @@ pte_t *kmap_pte;
 EXPORT_SYMBOL(kmap_pte);
 pgprot_t kmap_prot;
 EXPORT_SYMBOL(kmap_prot);
-#define TOP_ZONE ZONE_HIGHMEM
 
 static inline pte_t *virt_to_kpte(unsigned long vaddr)
 {
return pte_offset_kernel(pmd_offset(pud_offset(pgd_offset_k(vaddr),
vaddr), vaddr), vaddr);
 }
-#else
-#define TOP_ZONE ZONE_NORMAL
 #endif
 
 int page_is_ram(unsigned long pfn)
@@ -246,35 +243,19 @@ static int __init mark_nonram_nosave(void)
 }
 #endif
 
-static bool zone_limits_final;
-
-/*
- * The memory zones past TOP_ZONE are managed by generic mm code.
- * These should be set to zero since that's what every other
- * architecture does.
- */
-static unsigned long max_zone_pfns[MA