Re: [PATCH v6 3/4] arm64: use both ZONE_DMA and ZONE_DMA32

2019-10-23 Thread Matthias Brugger



On 22/10/2019 13:23, Nicolas Saenz Julienne wrote:
> On Mon, 2019-10-21 at 16:36 -0400, Qian Cai wrote:
>> I managed to get more information here,
>>
>> [0.00] cma: dma_contiguous_reserve(limit c000)
>> [0.00] cma: dma_contiguous_reserve: reserving 64 MiB for global area
>> [0.00] cma: cma_declare_contiguous(size 0x0400, base
>> 0x, limit 0xc000 alignment 0x)
>> [0.00] cma: Failed to reserve 512 MiB
>>
>> Full dmesg:
>>
>> https://cailca.github.io/files/dmesg.txt
> 
> OK I got it, reproduced it too.
> 
> Here are the relevant logs:
> 
>   [0.00]   DMA  [mem 0x802f-0xbfff]
>   [0.00]   DMA32[mem 0xc000-0x]
>   [0.00]   Normal   [mem 0x0001-0x0097fcff]
> 
> As you can see ZONE_DMA spans from 0x802f-0xbfff which
> is slightly smaller than 1GB.
> 
>   [0.00] crashkernel reserved: 0x9fe0 - 
> 0xbfe0 (512 MB)
> 
> Here crashkernel reserved 512M in ZONE_DMA.
> 
>   [0.00] cma: Failed to reserve 512 MiB
> 
> CMA tried to allocate 512M in ZONE_DMA which fails as there is no enough 
> space.
> Makes sense.
> 
> A fix could be moving crashkernel reservations after CMA and then if unable to
> fit in ZONE_DMA try ZONE_DMA32 before bailing out. Maybe it's a little over 
> the
> top, yet although most devices will be fine with ZONE_DMA32, the RPi4 needs
> crashkernel to be reserved in ZONE_DMA.
> 
> My knowledge of Kdump is limited, so I'd love to see what Catalin has to say.
> Here's a tested patch of what I'm proposing:
> 
> diff --git a/arch/arm64/mm/init.c b/arch/arm64/mm/init.c
> index 120c26af916b..49f3c3a34ae2 100644
> --- a/arch/arm64/mm/init.c
> +++ b/arch/arm64/mm/init.c
> @@ -76,6 +76,7 @@ phys_addr_t arm64_dma32_phys_limit __ro_after_init;
>  static void __init reserve_crashkernel(void)
>  {
> unsigned long long crash_base, crash_size;
> +   phys_addr_t limit = arm64_dma_phys_limit;
> int ret;
> 
> ret = parse_crashkernel(boot_command_line, memblock_phys_mem_size(),
> @@ -86,11 +87,14 @@ static void __init reserve_crashkernel(void)
> 
> crash_size = PAGE_ALIGN(crash_size);
> 
> +again:
> if (crash_base == 0) {
> /* Current arm64 boot protocol requires 2MB alignment */
> -   crash_base = memblock_find_in_range(0, ARCH_LOW_ADDRESS_LIMIT,
> -   crash_size, SZ_2M);
> -   if (crash_base == 0) {
> +   crash_base = memblock_find_in_range(0, limit, crash_size,
> SZ_2M);
> +   if (!crash_base && limit == arm64_dma_phys_limit) {
> +   limit = arm64_dma32_phys_limit;
> +   goto again;

I'd try to avoid the goto.
Apart from that we should write some information message that the crashkernel
got reserved in arm64_dma_phys_limit. Otherwise RPi4 might break silently and
this will give the user at least a hint what happened.

Regards,
Matthias

> +   } else if (!crash_base && limit == arm64_dma32_phys_limit) {
> pr_warn("cannot allocate crashkernel (size:0x%llx)\n",
> crash_size);
> return;
> @@ -448,13 +452,13 @@ void __init arm64_memblock_init(void)
> else
> arm64_dma32_phys_limit = PHYS_MASK + 1;
> 
> -   reserve_crashkernel();
> -
> reserve_elfcorehdr();
> 
> high_memory = __va(memblock_end_of_DRAM() - 1) + 1;
> 
> dma_contiguous_reserve(arm64_dma_phys_limit ? : 
> arm64_dma32_phys_limit);
> +
> +   reserve_crashkernel();
>  }
> 
>  void __init bootmem_init(void)
> 
> 
> Regards,
> Nicolas
> 


Re: [PATCH v6 3/4] arm64: use both ZONE_DMA and ZONE_DMA32

2019-10-22 Thread Nicolas Saenz Julienne
On Mon, 2019-10-21 at 16:36 -0400, Qian Cai wrote:
> I managed to get more information here,
> 
> [0.00] cma: dma_contiguous_reserve(limit c000)
> [0.00] cma: dma_contiguous_reserve: reserving 64 MiB for global area
> [0.00] cma: cma_declare_contiguous(size 0x0400, base
> 0x, limit 0xc000 alignment 0x)
> [0.00] cma: Failed to reserve 512 MiB
> 
> Full dmesg:
> 
> https://cailca.github.io/files/dmesg.txt

OK I got it, reproduced it too.

Here are the relevant logs:

[0.00]   DMA  [mem 0x802f-0xbfff]
[0.00]   DMA32[mem 0xc000-0x]
[0.00]   Normal   [mem 0x0001-0x0097fcff]

As you can see ZONE_DMA spans from 0x802f-0xbfff which
is slightly smaller than 1GB.

[0.00] crashkernel reserved: 0x9fe0 - 
0xbfe0 (512 MB)

Here crashkernel reserved 512M in ZONE_DMA.

[0.00] cma: Failed to reserve 512 MiB

CMA tried to allocate 512M in ZONE_DMA which fails as there is no enough space.
Makes sense.

A fix could be moving crashkernel reservations after CMA and then if unable to
fit in ZONE_DMA try ZONE_DMA32 before bailing out. Maybe it's a little over the
top, yet although most devices will be fine with ZONE_DMA32, the RPi4 needs
crashkernel to be reserved in ZONE_DMA.

My knowledge of Kdump is limited, so I'd love to see what Catalin has to say.
Here's a tested patch of what I'm proposing:

diff --git a/arch/arm64/mm/init.c b/arch/arm64/mm/init.c
index 120c26af916b..49f3c3a34ae2 100644
--- a/arch/arm64/mm/init.c
+++ b/arch/arm64/mm/init.c
@@ -76,6 +76,7 @@ phys_addr_t arm64_dma32_phys_limit __ro_after_init;
 static void __init reserve_crashkernel(void)
 {
unsigned long long crash_base, crash_size;
+   phys_addr_t limit = arm64_dma_phys_limit;
int ret;

ret = parse_crashkernel(boot_command_line, memblock_phys_mem_size(),
@@ -86,11 +87,14 @@ static void __init reserve_crashkernel(void)

crash_size = PAGE_ALIGN(crash_size);

+again:
if (crash_base == 0) {
/* Current arm64 boot protocol requires 2MB alignment */
-   crash_base = memblock_find_in_range(0, ARCH_LOW_ADDRESS_LIMIT,
-   crash_size, SZ_2M);
-   if (crash_base == 0) {
+   crash_base = memblock_find_in_range(0, limit, crash_size,
SZ_2M);
+   if (!crash_base && limit == arm64_dma_phys_limit) {
+   limit = arm64_dma32_phys_limit;
+   goto again;
+   } else if (!crash_base && limit == arm64_dma32_phys_limit) {
pr_warn("cannot allocate crashkernel (size:0x%llx)\n",
crash_size);
return;
@@ -448,13 +452,13 @@ void __init arm64_memblock_init(void)
else
arm64_dma32_phys_limit = PHYS_MASK + 1;

-   reserve_crashkernel();
-
reserve_elfcorehdr();

high_memory = __va(memblock_end_of_DRAM() - 1) + 1;

dma_contiguous_reserve(arm64_dma_phys_limit ? : arm64_dma32_phys_limit);
+
+   reserve_crashkernel();
 }

 void __init bootmem_init(void)


Regards,
Nicolas



signature.asc
Description: This is a digitally signed message part


Re: [PATCH v6 3/4] arm64: use both ZONE_DMA and ZONE_DMA32

2019-10-21 Thread Qian Cai



> On Oct 21, 2019, at 1:55 PM, Nicolas Saenz Julienne  
> wrote:
> 
> On Mon, 2019-10-21 at 13:25 -0400, Qian Cai wrote:
>>> On Oct 21, 2019, at 1:01 PM, Nicolas Saenz Julienne 
>>> wrote:
>>> 
>>> Could you enable CMA debugging to see if anything interesting comes out of
>>> it.
>> 
>> I did but nothing interesting came out. Did you use the same config I gave?
> 
> Yes, aside from enabling ZONE_DMA.
> 
>> Also, it has those cmdline.
>> 
>> page_poison=on page_owner=on numa_balancing=enable \
>> systemd.unified_cgroup_hierarchy=1 debug_guardpage_minorder=1 \
>> page_alloc.shuffle=1
> 
> No luck, still works for me even after adding those extra flags. IIRC most of
> them (if not all) are not even parsed by the time CMA is configured.
> 
> So, can you confirm the zones setup you're seeing is similar to this one:
> 
> [0.00][T0] Zone ranges:
> [0.00][T0]   DMA  [mem 0x802f-0xbfff]
> [0.00][T0]   DMA32[mem 0xc000-0x]
> [0.00][T0]   Normal   [mem 0x0001-0x0093fcff]
> 
> Maybe your memory starts between 0xe000-0x. That would be
> problematic (although somewhat unwarranted).

I managed to get more information here,

[0.00] cma: dma_contiguous_reserve(limit c000)
[0.00] cma: dma_contiguous_reserve: reserving 64 MiB for global area
[0.00] cma: cma_declare_contiguous(size 0x0400, base 
0x, limit 0xc000 alignment 0x)
[0.00] cma: Failed to reserve 512 MiB

Full dmesg:

https://cailca.github.io/files/dmesg.txt

Re: [PATCH v6 3/4] arm64: use both ZONE_DMA and ZONE_DMA32

2019-10-21 Thread Nicolas Saenz Julienne
On Mon, 2019-10-21 at 13:25 -0400, Qian Cai wrote:
> > On Oct 21, 2019, at 1:01 PM, Nicolas Saenz Julienne 
> > wrote:
> > 
> > Could you enable CMA debugging to see if anything interesting comes out of
> > it.
> 
> I did but nothing interesting came out. Did you use the same config I gave?

Yes, aside from enabling ZONE_DMA.

> Also, it has those cmdline.
>
> page_poison=on page_owner=on numa_balancing=enable \
> systemd.unified_cgroup_hierarchy=1 debug_guardpage_minorder=1 \
> page_alloc.shuffle=1

No luck, still works for me even after adding those extra flags. IIRC most of
them (if not all) are not even parsed by the time CMA is configured.

So, can you confirm the zones setup you're seeing is similar to this one:

[0.00][T0] Zone ranges:
[0.00][T0]   DMA  [mem 0x802f-0xbfff]
[0.00][T0]   DMA32[mem 0xc000-0x]
[0.00][T0]   Normal   [mem 0x0001-0x0093fcff]

Maybe your memory starts between 0xe000-0x. That would be
problematic (although somewhat unwarranted).

Regards,
Nicolas



signature.asc
Description: This is a digitally signed message part


Re: [PATCH v6 3/4] arm64: use both ZONE_DMA and ZONE_DMA32

2019-10-21 Thread Qian Cai



> On Oct 21, 2019, at 1:01 PM, Nicolas Saenz Julienne  
> wrote:
> 
> Could you enable CMA debugging to see if anything interesting comes out of it.

I did but nothing interesting came out. Did you use the same config I gave? 
Also, it has those cmdline.

page_poison=on page_owner=on numa_balancing=enable \
systemd.unified_cgroup_hierarchy=1 debug_guardpage_minorder=1 \
page_alloc.shuffle=1

Re: [PATCH v6 3/4] arm64: use both ZONE_DMA and ZONE_DMA32

2019-10-21 Thread Nicolas Saenz Julienne
On Mon, 2019-10-21 at 10:46 -0400, Qian Cai wrote:
> > On Oct 21, 2019, at 10:34 AM, Nicolas Saenz Julienne  > > wrote:
> > 
> > On Mon, 2019-10-21 at 10:15 -0400, Qian Cai wrote:
> > > > On Sep 11, 2019, at 2:25 PM, Nicolas Saenz Julienne <
> > > > nsaenzjulie...@suse.de>
> > > > wrote:
> > > > 
> > > > So far all arm64 devices have supported 32 bit DMA masks for their
> > > > peripherals. This is not true anymore for the Raspberry Pi 4 as most of
> > > > it's peripherals can only address the first GB of memory on a total of
> > > > up to 4 GB.
> > > > 
> > > > This goes against ZONE_DMA32's intent, as it's expected for ZONE_DMA32
> > > > to be addressable with a 32 bit mask. So it was decided to re-introduce
> > > > ZONE_DMA in arm64.
> > > > 
> > > > ZONE_DMA will contain the lower 1G of memory, which is currently the
> > > > memory area addressable by any peripheral on an arm64 device.
> > > > ZONE_DMA32 will contain the rest of the 32 bit addressable memory.
> > > > 
> > > > Signed-off-by: Nicolas Saenz Julienne 
> > > > Reviewed-by: Catalin Marinas 
> > > > 
> > > > ---
> > > 
> > > With ZONE_DMA=y, this config will fail to reserve 512M CMA on a server,
> > > 
> > > https://raw.githubusercontent.com/cailca/linux-mm/master/arm64.config
> > > 
> > > CONFIG_DMA_CMA=y
> > > CONFIG_CMA_SIZE_MBYTES=64
> > > CONFIG_CMA_SIZE_SEL_MBYTES=y
> > > CONFIG_CMA_ALIGNMENT=8
> > > CONFIG_CMA=y
> > > CONFIG_CMA_DEBUGFS=y
> > > CONFIG_CMA_AREAS=7
> > > 
> > > Is this expected?
> > 
> > Not really, just tested cma=512M on a Raspberry Pi4, and it went well. The
> > only
> > thing on my build that differs from your config is CONFIG_CMA_DEBUGFS.
> > 
> > Could you post more information on the device you're experiencing this on?
> > Also
> > some logs.
> 
> With the above config, it does not even need "cma=512M" kernel cmdline.
> 
> [0.00] Booting Linux on physical CPU 0x00 [0x431f0af1]
> [0.00] Linux version 5.4.0-rc4-next-20191021+ (clang version 8.0.1
> (Red Hat 8.0.1-1.module+el8.1.0+3866+6be7f4d8)) #1 SMP Mon Oct 21 10:03:03 EDT
> 2019
> [0.00] Setting debug_guardpage_minorder to 1
> [0.00] efi: Getting EFI parameters from FDT:
> [0.00] efi: EFI v2.70 by American Megatrends
> [0.00] efi:  ESRT=0xf935ed98  SMBIOS=0xfcc9  SMBIOS
> 3.0=0xfcc8  ACPI 2.0=0xfac8  MEMRESERVE=0xfacd1018 
> [0.00] esrt: Reserving ESRT space from 0xf935ed98 to
> 0xf935edd0.
> [0.00] crashkernel reserved: 0x0097db40 - 0x0097fb40
> (512 MB)
> [0.00] cma: Reserved 512 MiB at 0xa000
> 
> With ZONE_DMA=y, it will say,
> 
> cma: Failed to reserve 512 MiB
> 
> The machine is a ThunderX2 server.
> 
> 
https://buy.hpe.com/us/en/servers/apollo-systems/apollo-70-system/apollo-70-system/hpe-apollo-70-system/p/1010742472
> 
> # lscpu
> Architecture:aarch64
> Byte Order:  Little Endian
> CPU(s):  256
> On-line CPU(s) list: 0-255
> Thread(s) per core:  4
> Core(s) per socket:  32
> Socket(s):   2
> NUMA node(s):2
> Vendor ID:   Cavium
> Model:   1
> Model name:  ThunderX2 99xx
> Stepping:0x1
> BogoMIPS:400.00
> L1d cache:   32K
> L1i cache:   32K
> L2 cache:256K
> L3 cache:32768K
> NUMA node0 CPU(s):   0-127
> NUMA node1 CPU(s):   128-255
> Flags:   fp asimd aes pmull sha1 sha2 crc32 atomics cpuid asimdrdm

Hi Qian,
I happen to have access to a very similar machine:

thunderx25:~ # lscpu 
Architecture:aarch64
Byte Order:  Little Endian
CPU(s):  224
On-line CPU(s) list: 0-223
Thread(s) per core:  4
Core(s) per socket:  28
Socket(s):   2
NUMA node(s):2
Vendor ID:   Cavium
Model:   1
Model name:  ThunderX2 99xx
Stepping:0x1
CPU max MHz: 2500.
CPU min MHz: 1000.
BogoMIPS:400.00
L1d cache:   32K
L1i cache:   32K
L2 cache:256K
L3 cache:32768K
NUMA node0 CPU(s):   0-111
NUMA node1 CPU(s):   112-223
Flags:   fp asimd evtstrm aes pmull sha1 sha2 crc32 atomics cpuid
asimdrdm

I tested a kernel with your configuration plus CONFIG_ZONE_DMA=y yet I'm unable
to reproduce the error. The CMA allocation is successful.

[0.00][T0] Booting Linux on physical CPU 0x00 [0x431f0af1]
[0.00][T0] Linux version 5.4.0-rc4-next-20191021 (nico@linux-9qgx) 
(gcc version 9.2.1 20190903 [gcc-9-branch revision 275330] (SUSE Linux)) #60 
SMP Mon Oct 21 18:48:51 CEST 2019
[0.00][T0] printk: debug: ignoring loglevel setting.
[0.00][T0] efi: Getting EFI parameters from FDT:
[0.00][T0] efi: EFI v2.70 by American Megatrends
[0.00][T0] efi:  ESRT=0xf10b4198  SMBIOS=0xfcc9  SMBIOS 
3.0=0xfcc8  ACPI 2.0=0xf967  MEMRESERVE=0xf1117018
[0.00][T0] esrt: Res

Re: [PATCH v6 3/4] arm64: use both ZONE_DMA and ZONE_DMA32

2019-10-21 Thread Qian Cai



> On Oct 21, 2019, at 10:34 AM, Nicolas Saenz Julienne  
> wrote:
> 
> On Mon, 2019-10-21 at 10:15 -0400, Qian Cai wrote:
>>> On Sep 11, 2019, at 2:25 PM, Nicolas Saenz Julienne 
>>> wrote:
>>> 
>>> So far all arm64 devices have supported 32 bit DMA masks for their
>>> peripherals. This is not true anymore for the Raspberry Pi 4 as most of
>>> it's peripherals can only address the first GB of memory on a total of
>>> up to 4 GB.
>>> 
>>> This goes against ZONE_DMA32's intent, as it's expected for ZONE_DMA32
>>> to be addressable with a 32 bit mask. So it was decided to re-introduce
>>> ZONE_DMA in arm64.
>>> 
>>> ZONE_DMA will contain the lower 1G of memory, which is currently the
>>> memory area addressable by any peripheral on an arm64 device.
>>> ZONE_DMA32 will contain the rest of the 32 bit addressable memory.
>>> 
>>> Signed-off-by: Nicolas Saenz Julienne 
>>> Reviewed-by: Catalin Marinas 
>>> 
>>> ---
>> 
>> With ZONE_DMA=y, this config will fail to reserve 512M CMA on a server,
>> 
>> https://raw.githubusercontent.com/cailca/linux-mm/master/arm64.config
>> 
>> CONFIG_DMA_CMA=y
>> CONFIG_CMA_SIZE_MBYTES=64
>> CONFIG_CMA_SIZE_SEL_MBYTES=y
>> CONFIG_CMA_ALIGNMENT=8
>> CONFIG_CMA=y
>> CONFIG_CMA_DEBUGFS=y
>> CONFIG_CMA_AREAS=7
>> 
>> Is this expected?
> 
> Not really, just tested cma=512M on a Raspberry Pi4, and it went well. The 
> only
> thing on my build that differs from your config is CONFIG_CMA_DEBUGFS.
> 
> Could you post more information on the device you're experiencing this on? 
> Also
> some logs.

With the above config, it does not even need "cma=512M" kernel cmdline.

[0.00] Booting Linux on physical CPU 0x00 [0x431f0af1]
[0.00] Linux version 5.4.0-rc4-next-20191021+ (clang version 8.0.1 (Red 
Hat 8.0.1-1.module+el8.1.0+3866+6be7f4d8)) #1 SMP Mon Oct 21 10:03:03 EDT 2019
[0.00] Setting debug_guardpage_minorder to 1
[0.00] efi: Getting EFI parameters from FDT:
[0.00] efi: EFI v2.70 by American Megatrends
[0.00] efi:  ESRT=0xf935ed98  SMBIOS=0xfcc9  SMBIOS 3.0=0xfcc8  
ACPI 2.0=0xfac8  MEMRESERVE=0xfacd1018 
[0.00] esrt: Reserving ESRT space from 0xf935ed98 to 
0xf935edd0.
[0.00] crashkernel reserved: 0x0097db40 - 0x0097fb40 
(512 MB)
[0.00] cma: Reserved 512 MiB at 0xa000

With ZONE_DMA=y, it will say,

cma: Failed to reserve 512 MiB

The machine is a ThunderX2 server.

https://buy.hpe.com/us/en/servers/apollo-systems/apollo-70-system/apollo-70-system/hpe-apollo-70-system/p/1010742472

# lscpu
Architecture:aarch64
Byte Order:  Little Endian
CPU(s):  256
On-line CPU(s) list: 0-255
Thread(s) per core:  4
Core(s) per socket:  32
Socket(s):   2
NUMA node(s):2
Vendor ID:   Cavium
Model:   1
Model name:  ThunderX2 99xx
Stepping:0x1
BogoMIPS:400.00
L1d cache:   32K
L1i cache:   32K
L2 cache:256K
L3 cache:32768K
NUMA node0 CPU(s):   0-127
NUMA node1 CPU(s):   128-255
Flags:   fp asimd aes pmull sha1 sha2 crc32 atomics cpuid asimdrdm

Re: [PATCH v6 3/4] arm64: use both ZONE_DMA and ZONE_DMA32

2019-10-21 Thread Nicolas Saenz Julienne
On Mon, 2019-10-21 at 10:15 -0400, Qian Cai wrote:
> > On Sep 11, 2019, at 2:25 PM, Nicolas Saenz Julienne 
> > wrote:
> > 
> > So far all arm64 devices have supported 32 bit DMA masks for their
> > peripherals. This is not true anymore for the Raspberry Pi 4 as most of
> > it's peripherals can only address the first GB of memory on a total of
> > up to 4 GB.
> > 
> > This goes against ZONE_DMA32's intent, as it's expected for ZONE_DMA32
> > to be addressable with a 32 bit mask. So it was decided to re-introduce
> > ZONE_DMA in arm64.
> > 
> > ZONE_DMA will contain the lower 1G of memory, which is currently the
> > memory area addressable by any peripheral on an arm64 device.
> > ZONE_DMA32 will contain the rest of the 32 bit addressable memory.
> > 
> > Signed-off-by: Nicolas Saenz Julienne 
> > Reviewed-by: Catalin Marinas 
> > 
> > ---
> 
> With ZONE_DMA=y, this config will fail to reserve 512M CMA on a server,
> 
> https://raw.githubusercontent.com/cailca/linux-mm/master/arm64.config
> 
> CONFIG_DMA_CMA=y
> CONFIG_CMA_SIZE_MBYTES=64
> CONFIG_CMA_SIZE_SEL_MBYTES=y
> CONFIG_CMA_ALIGNMENT=8
> CONFIG_CMA=y
> CONFIG_CMA_DEBUGFS=y
> CONFIG_CMA_AREAS=7
> 
> Is this expected?

Not really, just tested cma=512M on a Raspberry Pi4, and it went well. The only
thing on my build that differs from your config is CONFIG_CMA_DEBUGFS.

Could you post more information on the device you're experiencing this on? Also
some logs.

Regards,
Nicolas



signature.asc
Description: This is a digitally signed message part


Re: [PATCH v6 3/4] arm64: use both ZONE_DMA and ZONE_DMA32

2019-10-21 Thread Qian Cai



> On Sep 11, 2019, at 2:25 PM, Nicolas Saenz Julienne  
> wrote:
> 
> So far all arm64 devices have supported 32 bit DMA masks for their
> peripherals. This is not true anymore for the Raspberry Pi 4 as most of
> it's peripherals can only address the first GB of memory on a total of
> up to 4 GB.
> 
> This goes against ZONE_DMA32's intent, as it's expected for ZONE_DMA32
> to be addressable with a 32 bit mask. So it was decided to re-introduce
> ZONE_DMA in arm64.
> 
> ZONE_DMA will contain the lower 1G of memory, which is currently the
> memory area addressable by any peripheral on an arm64 device.
> ZONE_DMA32 will contain the rest of the 32 bit addressable memory.
> 
> Signed-off-by: Nicolas Saenz Julienne 
> Reviewed-by: Catalin Marinas 
> 
> ---
> 
> I kept the Reviewed-by as the last bug solution was proposed by Catalin
> 
> Changes in v6:
> - Fixed bug in max_zone_phys
> 
> Changes in v5:
> - Fixed swiotlb initialization
> 
> Changes in v4:
> - Fixed issue when NUMA=n and ZONE_DMA=n
> - Merged two max_zone_dma*_phys() functions
> 
> Changes in v3:
> - Used fixed size ZONE_DMA
> - Fix check befor swiotlb_init()
> 
> Changes in v2:
> - Update comment to reflect new zones split
> - ZONE_DMA will never be left empty
> 
> arch/arm64/Kconfig|  4 +++
> arch/arm64/include/asm/page.h |  2 ++
> arch/arm64/mm/init.c  | 54 +--
> 3 files changed, 45 insertions(+), 15 deletions(-)
> 
> diff --git a/arch/arm64/Kconfig b/arch/arm64/Kconfig
> index 6b6362b83004..2dbe0165bd15 100644
> --- a/arch/arm64/Kconfig
> +++ b/arch/arm64/Kconfig
> @@ -267,6 +267,10 @@ config GENERIC_CSUM
> config GENERIC_CALIBRATE_DELAY
>   def_bool y
> 
> +config ZONE_DMA
> + bool "Support DMA zone" if EXPERT
> + default y
> +
> config ZONE_DMA32
>   bool "Support DMA32 zone" if EXPERT
>   default y
> diff --git a/arch/arm64/include/asm/page.h b/arch/arm64/include/asm/page.h
> index d39ddb258a04..7b8c98830101 100644
> --- a/arch/arm64/include/asm/page.h
> +++ b/arch/arm64/include/asm/page.h
> @@ -38,4 +38,6 @@ extern int pfn_valid(unsigned long);
> 
> #include 
> 
> +#define ARCH_ZONE_DMA_BITS 30
> +
> #endif
> diff --git a/arch/arm64/mm/init.c b/arch/arm64/mm/init.c
> index 8e9bc64c5878..44f07fdf7a59 100644
> --- a/arch/arm64/mm/init.c
> +++ b/arch/arm64/mm/init.c
> @@ -56,6 +56,13 @@ EXPORT_SYMBOL(physvirt_offset);
> struct page *vmemmap __ro_after_init;
> EXPORT_SYMBOL(vmemmap);
> 
> +/*
> + * We create both ZONE_DMA and ZONE_DMA32. ZONE_DMA covers the first 1G of
> + * memory as some devices, namely the Raspberry Pi 4, have peripherals with
> + * this limited view of the memory. ZONE_DMA32 will cover the rest of the 32
> + * bit addressable memory area.
> + */
> +phys_addr_t arm64_dma_phys_limit __ro_after_init;
> phys_addr_t arm64_dma32_phys_limit __ro_after_init;
> 
> #ifdef CONFIG_KEXEC_CORE
> @@ -169,15 +176,16 @@ static void __init reserve_elfcorehdr(void)
> {
> }
> #endif /* CONFIG_CRASH_DUMP */
> +
> /*
> - * Return the maximum physical address for ZONE_DMA32 (DMA_BIT_MASK(32)). It
> - * currently assumes that for memory starting above 4G, 32-bit devices will
> - * use a DMA offset.
> + * Return the maximum physical address for a zone with a given address size
> + * limit. It currently assumes that for memory starting above 4G, 32-bit
> + * devices will use a DMA offset.
>  */
> -static phys_addr_t __init max_zone_dma32_phys(void)
> +static phys_addr_t __init max_zone_phys(unsigned int zone_bits)
> {
> - phys_addr_t offset = memblock_start_of_DRAM() & GENMASK_ULL(63, 32);
> - return min(offset + (1ULL << 32), memblock_end_of_DRAM());
> + phys_addr_t offset = memblock_start_of_DRAM() & GENMASK_ULL(63, 
> zone_bits);
> + return min(offset + (1ULL << zone_bits), memblock_end_of_DRAM());
> }
> 
> #ifdef CONFIG_NUMA
> @@ -186,6 +194,9 @@ static void __init zone_sizes_init(unsigned long min, 
> unsigned long max)
> {
>   unsigned long max_zone_pfns[MAX_NR_ZONES]  = {0};
> 
> +#ifdef CONFIG_ZONE_DMA
> + max_zone_pfns[ZONE_DMA] = PFN_DOWN(arm64_dma_phys_limit);
> +#endif
> #ifdef CONFIG_ZONE_DMA32
>   max_zone_pfns[ZONE_DMA32] = PFN_DOWN(arm64_dma32_phys_limit);
> #endif
> @@ -201,13 +212,18 @@ static void __init zone_sizes_init(unsigned long min, 
> unsigned long max)
>   struct memblock_region *reg;
>   unsigned long zone_size[MAX_NR_ZONES], zhole_size[MAX_NR_ZONES];
>   unsigned long max_dma32 = min;
> + unsigned long max_dma = min;
> 
>   memset(zone_size, 0, sizeof(zone_size));
> 
> - /* 4GB maximum for 32-bit only capable devices */
> +#ifdef CONFIG_ZONE_DMA
> + max_dma = PFN_DOWN(arm64_dma_phys_limit);
> + zone_size[ZONE_DMA] = max_dma - min;
> + max_dma32 = max_dma;
> +#endif
> #ifdef CONFIG_ZONE_DMA32
>   max_dma32 = PFN_DOWN(arm64_dma32_phys_limit);
> - zone_size[ZONE_DMA32] = max_dma32 - min;
> + zone_size[ZONE_DMA32] = max_dma32 - max_dma;
> #endif
>   zone_size[ZONE_NORMAL] 

[PATCH v6 3/4] arm64: use both ZONE_DMA and ZONE_DMA32

2019-09-11 Thread Nicolas Saenz Julienne
So far all arm64 devices have supported 32 bit DMA masks for their
peripherals. This is not true anymore for the Raspberry Pi 4 as most of
it's peripherals can only address the first GB of memory on a total of
up to 4 GB.

This goes against ZONE_DMA32's intent, as it's expected for ZONE_DMA32
to be addressable with a 32 bit mask. So it was decided to re-introduce
ZONE_DMA in arm64.

ZONE_DMA will contain the lower 1G of memory, which is currently the
memory area addressable by any peripheral on an arm64 device.
ZONE_DMA32 will contain the rest of the 32 bit addressable memory.

Signed-off-by: Nicolas Saenz Julienne 
Reviewed-by: Catalin Marinas 

---

I kept the Reviewed-by as the last bug solution was proposed by Catalin

Changes in v6:
- Fixed bug in max_zone_phys

Changes in v5:
- Fixed swiotlb initialization

Changes in v4:
- Fixed issue when NUMA=n and ZONE_DMA=n
- Merged two max_zone_dma*_phys() functions

Changes in v3:
- Used fixed size ZONE_DMA
- Fix check befor swiotlb_init()

Changes in v2:
- Update comment to reflect new zones split
- ZONE_DMA will never be left empty

 arch/arm64/Kconfig|  4 +++
 arch/arm64/include/asm/page.h |  2 ++
 arch/arm64/mm/init.c  | 54 +--
 3 files changed, 45 insertions(+), 15 deletions(-)

diff --git a/arch/arm64/Kconfig b/arch/arm64/Kconfig
index 6b6362b83004..2dbe0165bd15 100644
--- a/arch/arm64/Kconfig
+++ b/arch/arm64/Kconfig
@@ -267,6 +267,10 @@ config GENERIC_CSUM
 config GENERIC_CALIBRATE_DELAY
def_bool y
 
+config ZONE_DMA
+   bool "Support DMA zone" if EXPERT
+   default y
+
 config ZONE_DMA32
bool "Support DMA32 zone" if EXPERT
default y
diff --git a/arch/arm64/include/asm/page.h b/arch/arm64/include/asm/page.h
index d39ddb258a04..7b8c98830101 100644
--- a/arch/arm64/include/asm/page.h
+++ b/arch/arm64/include/asm/page.h
@@ -38,4 +38,6 @@ extern int pfn_valid(unsigned long);
 
 #include 
 
+#define ARCH_ZONE_DMA_BITS 30
+
 #endif
diff --git a/arch/arm64/mm/init.c b/arch/arm64/mm/init.c
index 8e9bc64c5878..44f07fdf7a59 100644
--- a/arch/arm64/mm/init.c
+++ b/arch/arm64/mm/init.c
@@ -56,6 +56,13 @@ EXPORT_SYMBOL(physvirt_offset);
 struct page *vmemmap __ro_after_init;
 EXPORT_SYMBOL(vmemmap);
 
+/*
+ * We create both ZONE_DMA and ZONE_DMA32. ZONE_DMA covers the first 1G of
+ * memory as some devices, namely the Raspberry Pi 4, have peripherals with
+ * this limited view of the memory. ZONE_DMA32 will cover the rest of the 32
+ * bit addressable memory area.
+ */
+phys_addr_t arm64_dma_phys_limit __ro_after_init;
 phys_addr_t arm64_dma32_phys_limit __ro_after_init;
 
 #ifdef CONFIG_KEXEC_CORE
@@ -169,15 +176,16 @@ static void __init reserve_elfcorehdr(void)
 {
 }
 #endif /* CONFIG_CRASH_DUMP */
+
 /*
- * Return the maximum physical address for ZONE_DMA32 (DMA_BIT_MASK(32)). It
- * currently assumes that for memory starting above 4G, 32-bit devices will
- * use a DMA offset.
+ * Return the maximum physical address for a zone with a given address size
+ * limit. It currently assumes that for memory starting above 4G, 32-bit
+ * devices will use a DMA offset.
  */
-static phys_addr_t __init max_zone_dma32_phys(void)
+static phys_addr_t __init max_zone_phys(unsigned int zone_bits)
 {
-   phys_addr_t offset = memblock_start_of_DRAM() & GENMASK_ULL(63, 32);
-   return min(offset + (1ULL << 32), memblock_end_of_DRAM());
+   phys_addr_t offset = memblock_start_of_DRAM() & GENMASK_ULL(63, 
zone_bits);
+   return min(offset + (1ULL << zone_bits), memblock_end_of_DRAM());
 }
 
 #ifdef CONFIG_NUMA
@@ -186,6 +194,9 @@ static void __init zone_sizes_init(unsigned long min, 
unsigned long max)
 {
unsigned long max_zone_pfns[MAX_NR_ZONES]  = {0};
 
+#ifdef CONFIG_ZONE_DMA
+   max_zone_pfns[ZONE_DMA] = PFN_DOWN(arm64_dma_phys_limit);
+#endif
 #ifdef CONFIG_ZONE_DMA32
max_zone_pfns[ZONE_DMA32] = PFN_DOWN(arm64_dma32_phys_limit);
 #endif
@@ -201,13 +212,18 @@ static void __init zone_sizes_init(unsigned long min, 
unsigned long max)
struct memblock_region *reg;
unsigned long zone_size[MAX_NR_ZONES], zhole_size[MAX_NR_ZONES];
unsigned long max_dma32 = min;
+   unsigned long max_dma = min;
 
memset(zone_size, 0, sizeof(zone_size));
 
-   /* 4GB maximum for 32-bit only capable devices */
+#ifdef CONFIG_ZONE_DMA
+   max_dma = PFN_DOWN(arm64_dma_phys_limit);
+   zone_size[ZONE_DMA] = max_dma - min;
+   max_dma32 = max_dma;
+#endif
 #ifdef CONFIG_ZONE_DMA32
max_dma32 = PFN_DOWN(arm64_dma32_phys_limit);
-   zone_size[ZONE_DMA32] = max_dma32 - min;
+   zone_size[ZONE_DMA32] = max_dma32 - max_dma;
 #endif
zone_size[ZONE_NORMAL] = max - max_dma32;
 
@@ -219,11 +235,17 @@ static void __init zone_sizes_init(unsigned long min, 
unsigned long max)
 
if (start >= max)
continue;
-
+#ifdef CONFIG_ZONE_DMA
+   if (start < max_dma) {
+