Re: [Xen-devel] [PATCH for-4.12 v3] xen/arm: Stop relocating Xen

2018-12-18 Thread Stefano Stabellini
On Tue, 18 Dec 2018, Stefano Stabellini wrote:
> On Tue, 18 Dec 2018, Julien Grall wrote:
> > At the moment, Xen is relocated towards the end of the memory. While
> > this has the advantage to free space in low memory, the code is not
> > compliant with the break-before-make because it requires to switch
> > between two sets of page-table. This is not entirely trivial to fix as
> > it would require us to go through an identity mapping and disabling MMU.
> > 
> > Furthermore, it looks like that some platform (such as the Hikey960)
> > may not be able to bring-up secondary CPUs if the entry is too high.
> > 
> > While Xen should be quite tiny (< 2MB), the current algorigthm to
> > allocate Dom0 memory will allocate memory chunk of at least 128MB.
> > Those memory chunks will always be 128MB. This means that depending on
> > where the modules are loaded, an extra 128MB may disappear.
> > 
> > As there are up to 4 modules (initramfs, XSM, kernel, DTB) loaded in
> > low memory. The problem is not entirely new as you could already waste
> > 512MB of low-memory. The right solution would be to fix the allocation
> > algorightm. But this is independent from this patch.
> > 
> > For user in control of the memory (such as in U-boot), all modules
> > should be loaded as much as possible together or outside low-memory (i.e
> > above 4GB). For other users (i.e Grub/UEFI), I believe the bootloader is
> > already keeping everything together.
> > 
> > Based on the above, it would be fine to stop relocating Xen. This has
> > the advantage to simplify the code and should speed-up the boot as
> > relocation is not necessary anymore.
> > 
> > Note that the break-before-make issue is not fixed by this patch.
> > 
> > Signed-off-by: Julien Grall 
> > Reported-by: Matthew Daley 
> > Tested-by: Matthew Daley 
> 
> Reviewed and committed

I forgot to add that I fixed a couple of grammar errors in the commit
message as I committed.

 
> > ---
> > Changes in v3:
> > - Update the commit message
> > 
> > Changes in v2:
> > - Add Matthew's tested-by
> > ---
> >  xen/arch/arm/arm32/head.S | 54 +++
> >  xen/arch/arm/arm64/head.S | 50 +---
> >  xen/arch/arm/mm.c | 18 +++--
> >  xen/arch/arm/setup.c  | 65 
> > +++
> >  xen/include/asm-arm/mm.h  |  2 +-
> >  5 files changed, 17 insertions(+), 172 deletions(-)
> > 
> > diff --git a/xen/arch/arm/arm32/head.S b/xen/arch/arm/arm32/head.S
> > index 93b51e9ef2..390a505e05 100644
> > --- a/xen/arch/arm/arm32/head.S
> > +++ b/xen/arch/arm/arm32/head.S
> > @@ -469,58 +469,12 @@ fail:   PRINT("- Boot failed -\r\n")
> >  GLOBAL(_end_boot)
> >  
> >  /*
> > - * Copy Xen to new location and switch TTBR
> > + * Switch TTBR
> >   * r1:r0   ttbr
> > - * r2  source address
> > - * r3  destination address
> > - * [sp]=>r4length
> >   *
> > - * Source and destination must be word aligned, length is rounded up
> > - * to a 16 byte boundary.
> > - *
> > - * MUST BE VERY CAREFUL when saving things to RAM over the copy
> > + * TODO: This code does not comply with break-before-make.
> >   */
> > -ENTRY(relocate_xen)
> > -push {r4,r5,r6,r7,r8,r9,r10,r11}
> > -
> > -ldr   r4, [sp, #8*4]/* Get 4th argument from stack 
> > */
> > -
> > -/* Copy 16 bytes at a time using:
> > - * r5:  counter
> > - * r6:  data
> > - * r7:  data
> > - * r8:  data
> > - * r9:  data
> > - * r10: source
> > - * r11: destination
> > - */
> > -mov   r5, r4
> > -mov   r10, r2
> > -mov   r11, r3
> > -1:  ldmia r10!, {r6, r7, r8, r9}
> > -stmia r11!, {r6, r7, r8, r9}
> > -
> > -subs  r5, r5, #16
> > -bgt   1b
> > -
> > -/* Flush destination from dcache using:
> > - * r5: counter
> > - * r6: step
> > - * r7: vaddr
> > - */
> > -dsb/* So the CPU issues all writes to the range */
> > -
> > -mov   r5, r4
> > -ldr   r6, =dcache_line_bytes /* r6 := step */
> > -ldr   r6, [r6]
> > -mov   r7, r3
> > -
> > -1:  mcr   CP32(r7, DCCMVAC)
> > -
> > -add   r7, r7, r6
> > -subs  r5, r5, r6
> > -bgt   1b
> > -
> > +ENTRY(switch_ttbr)
> >  dsb/* Ensure the flushes happen before
> >  * continuing */
> >  isb/* Ensure synchronization with 
> > previous
> > @@ -543,8 +497,6 @@ ENTRY(relocate_xen)
> >  dsb/* Ensure completion of TLB+BP 
> > flush */
> >  isb
> >  
> > -pop {r4, r5,r6,r7,r8,r9,r10,r11}
> > -
> >  mov pc, lr
> >  
> >  #ifdef CONFIG_EARLY_PRINTK
> > diff --git a/xen/arch/arm/arm64/head.S b/xen/arch/arm/arm64/head.S
> > index 

Re: [Xen-devel] [PATCH for-4.12 v3] xen/arm: Stop relocating Xen

2018-12-18 Thread Stefano Stabellini
On Tue, 18 Dec 2018, Julien Grall wrote:
> At the moment, Xen is relocated towards the end of the memory. While
> this has the advantage to free space in low memory, the code is not
> compliant with the break-before-make because it requires to switch
> between two sets of page-table. This is not entirely trivial to fix as
> it would require us to go through an identity mapping and disabling MMU.
> 
> Furthermore, it looks like that some platform (such as the Hikey960)
> may not be able to bring-up secondary CPUs if the entry is too high.
> 
> While Xen should be quite tiny (< 2MB), the current algorigthm to
> allocate Dom0 memory will allocate memory chunk of at least 128MB.
> Those memory chunks will always be 128MB. This means that depending on
> where the modules are loaded, an extra 128MB may disappear.
> 
> As there are up to 4 modules (initramfs, XSM, kernel, DTB) loaded in
> low memory. The problem is not entirely new as you could already waste
> 512MB of low-memory. The right solution would be to fix the allocation
> algorightm. But this is independent from this patch.
> 
> For user in control of the memory (such as in U-boot), all modules
> should be loaded as much as possible together or outside low-memory (i.e
> above 4GB). For other users (i.e Grub/UEFI), I believe the bootloader is
> already keeping everything together.
> 
> Based on the above, it would be fine to stop relocating Xen. This has
> the advantage to simplify the code and should speed-up the boot as
> relocation is not necessary anymore.
> 
> Note that the break-before-make issue is not fixed by this patch.
> 
> Signed-off-by: Julien Grall 
> Reported-by: Matthew Daley 
> Tested-by: Matthew Daley 

Reviewed and committed


> ---
> Changes in v3:
> - Update the commit message
> 
> Changes in v2:
> - Add Matthew's tested-by
> ---
>  xen/arch/arm/arm32/head.S | 54 +++
>  xen/arch/arm/arm64/head.S | 50 +---
>  xen/arch/arm/mm.c | 18 +++--
>  xen/arch/arm/setup.c  | 65 
> +++
>  xen/include/asm-arm/mm.h  |  2 +-
>  5 files changed, 17 insertions(+), 172 deletions(-)
> 
> diff --git a/xen/arch/arm/arm32/head.S b/xen/arch/arm/arm32/head.S
> index 93b51e9ef2..390a505e05 100644
> --- a/xen/arch/arm/arm32/head.S
> +++ b/xen/arch/arm/arm32/head.S
> @@ -469,58 +469,12 @@ fail:   PRINT("- Boot failed -\r\n")
>  GLOBAL(_end_boot)
>  
>  /*
> - * Copy Xen to new location and switch TTBR
> + * Switch TTBR
>   * r1:r0   ttbr
> - * r2  source address
> - * r3  destination address
> - * [sp]=>r4length
>   *
> - * Source and destination must be word aligned, length is rounded up
> - * to a 16 byte boundary.
> - *
> - * MUST BE VERY CAREFUL when saving things to RAM over the copy
> + * TODO: This code does not comply with break-before-make.
>   */
> -ENTRY(relocate_xen)
> -push {r4,r5,r6,r7,r8,r9,r10,r11}
> -
> -ldr   r4, [sp, #8*4]/* Get 4th argument from stack */
> -
> -/* Copy 16 bytes at a time using:
> - * r5:  counter
> - * r6:  data
> - * r7:  data
> - * r8:  data
> - * r9:  data
> - * r10: source
> - * r11: destination
> - */
> -mov   r5, r4
> -mov   r10, r2
> -mov   r11, r3
> -1:  ldmia r10!, {r6, r7, r8, r9}
> -stmia r11!, {r6, r7, r8, r9}
> -
> -subs  r5, r5, #16
> -bgt   1b
> -
> -/* Flush destination from dcache using:
> - * r5: counter
> - * r6: step
> - * r7: vaddr
> - */
> -dsb/* So the CPU issues all writes to the range */
> -
> -mov   r5, r4
> -ldr   r6, =dcache_line_bytes /* r6 := step */
> -ldr   r6, [r6]
> -mov   r7, r3
> -
> -1:  mcr   CP32(r7, DCCMVAC)
> -
> -add   r7, r7, r6
> -subs  r5, r5, r6
> -bgt   1b
> -
> +ENTRY(switch_ttbr)
>  dsb/* Ensure the flushes happen before
>  * continuing */
>  isb/* Ensure synchronization with 
> previous
> @@ -543,8 +497,6 @@ ENTRY(relocate_xen)
>  dsb/* Ensure completion of TLB+BP flush 
> */
>  isb
>  
> -pop {r4, r5,r6,r7,r8,r9,r10,r11}
> -
>  mov pc, lr
>  
>  #ifdef CONFIG_EARLY_PRINTK
> diff --git a/xen/arch/arm/arm64/head.S b/xen/arch/arm/arm64/head.S
> index ef87b5c254..0b7f6e7f92 100644
> --- a/xen/arch/arm/arm64/head.S
> +++ b/xen/arch/arm/arm64/head.S
> @@ -609,52 +609,14 @@ fail:   PRINT("- Boot failed -\r\n")
>  
>  GLOBAL(_end_boot)
>  
> -/* Copy Xen to new location and switch TTBR
> - * x0ttbr
> - * x1source address
> - * x2destination address
> - * x3length
> +/*
> + * Switch TTBR
>   *
> - * Source and destination must be word aligned, length is 

Re: [Xen-devel] [PATCH for-4.12 v3] xen/arm: Stop relocating Xen

2018-12-18 Thread Andrii Anisov



On 18.12.18 15:07, Julien Grall wrote:

At the moment, Xen is relocated towards the end of the memory. While
this has the advantage to free space in low memory, the code is not
compliant with the break-before-make because it requires to switch
between two sets of page-table. This is not entirely trivial to fix as
it would require us to go through an identity mapping and disabling MMU.

Furthermore, it looks like that some platform (such as the Hikey960)
may not be able to bring-up secondary CPUs if the entry is too high.

While Xen should be quite tiny (< 2MB), the current algorigthm to
allocate Dom0 memory will allocate memory chunk of at least 128MB.
Those memory chunks will always be 128MB. This means that depending on
where the modules are loaded, an extra 128MB may disappear.

As there are up to 4 modules (initramfs, XSM, kernel, DTB) loaded in
low memory. The problem is not entirely new as you could already waste
512MB of low-memory. The right solution would be to fix the allocation
algorightm. But this is independent from this patch.

For user in control of the memory (such as in U-boot), all modules
should be loaded as much as possible together or outside low-memory (i.e
above 4GB). For other users (i.e Grub/UEFI), I believe the bootloader is
already keeping everything together.

Based on the above, it would be fine to stop relocating Xen. This has
the advantage to simplify the code and should speed-up the boot as
relocation is not necessary anymore.

Note that the break-before-make issue is not fixed by this patch.

Signed-off-by: Julien Grall 
Reported-by: Matthew Daley 
Tested-by: Matthew Daley 


As long as the code did not change:

Tested-by: Andrii Anisov 
Reviewed-by: Andrii Anisov 

--
Sincerely,
Andrii Anisov.

___
Xen-devel mailing list
Xen-devel@lists.xenproject.org
https://lists.xenproject.org/mailman/listinfo/xen-devel

[Xen-devel] [PATCH for-4.12 v3] xen/arm: Stop relocating Xen

2018-12-18 Thread Julien Grall
At the moment, Xen is relocated towards the end of the memory. While
this has the advantage to free space in low memory, the code is not
compliant with the break-before-make because it requires to switch
between two sets of page-table. This is not entirely trivial to fix as
it would require us to go through an identity mapping and disabling MMU.

Furthermore, it looks like that some platform (such as the Hikey960)
may not be able to bring-up secondary CPUs if the entry is too high.

While Xen should be quite tiny (< 2MB), the current algorigthm to
allocate Dom0 memory will allocate memory chunk of at least 128MB.
Those memory chunks will always be 128MB. This means that depending on
where the modules are loaded, an extra 128MB may disappear.

As there are up to 4 modules (initramfs, XSM, kernel, DTB) loaded in
low memory. The problem is not entirely new as you could already waste
512MB of low-memory. The right solution would be to fix the allocation
algorightm. But this is independent from this patch.

For user in control of the memory (such as in U-boot), all modules
should be loaded as much as possible together or outside low-memory (i.e
above 4GB). For other users (i.e Grub/UEFI), I believe the bootloader is
already keeping everything together.

Based on the above, it would be fine to stop relocating Xen. This has
the advantage to simplify the code and should speed-up the boot as
relocation is not necessary anymore.

Note that the break-before-make issue is not fixed by this patch.

Signed-off-by: Julien Grall 
Reported-by: Matthew Daley 
Tested-by: Matthew Daley 

---
Changes in v3:
- Update the commit message

Changes in v2:
- Add Matthew's tested-by
---
 xen/arch/arm/arm32/head.S | 54 +++
 xen/arch/arm/arm64/head.S | 50 +---
 xen/arch/arm/mm.c | 18 +++--
 xen/arch/arm/setup.c  | 65 +++
 xen/include/asm-arm/mm.h  |  2 +-
 5 files changed, 17 insertions(+), 172 deletions(-)

diff --git a/xen/arch/arm/arm32/head.S b/xen/arch/arm/arm32/head.S
index 93b51e9ef2..390a505e05 100644
--- a/xen/arch/arm/arm32/head.S
+++ b/xen/arch/arm/arm32/head.S
@@ -469,58 +469,12 @@ fail:   PRINT("- Boot failed -\r\n")
 GLOBAL(_end_boot)
 
 /*
- * Copy Xen to new location and switch TTBR
+ * Switch TTBR
  * r1:r0   ttbr
- * r2  source address
- * r3  destination address
- * [sp]=>r4length
  *
- * Source and destination must be word aligned, length is rounded up
- * to a 16 byte boundary.
- *
- * MUST BE VERY CAREFUL when saving things to RAM over the copy
+ * TODO: This code does not comply with break-before-make.
  */
-ENTRY(relocate_xen)
-push {r4,r5,r6,r7,r8,r9,r10,r11}
-
-ldr   r4, [sp, #8*4]/* Get 4th argument from stack */
-
-/* Copy 16 bytes at a time using:
- * r5:  counter
- * r6:  data
- * r7:  data
- * r8:  data
- * r9:  data
- * r10: source
- * r11: destination
- */
-mov   r5, r4
-mov   r10, r2
-mov   r11, r3
-1:  ldmia r10!, {r6, r7, r8, r9}
-stmia r11!, {r6, r7, r8, r9}
-
-subs  r5, r5, #16
-bgt   1b
-
-/* Flush destination from dcache using:
- * r5: counter
- * r6: step
- * r7: vaddr
- */
-dsb/* So the CPU issues all writes to the range */
-
-mov   r5, r4
-ldr   r6, =dcache_line_bytes /* r6 := step */
-ldr   r6, [r6]
-mov   r7, r3
-
-1:  mcr   CP32(r7, DCCMVAC)
-
-add   r7, r7, r6
-subs  r5, r5, r6
-bgt   1b
-
+ENTRY(switch_ttbr)
 dsb/* Ensure the flushes happen before
 * continuing */
 isb/* Ensure synchronization with previous
@@ -543,8 +497,6 @@ ENTRY(relocate_xen)
 dsb/* Ensure completion of TLB+BP flush */
 isb
 
-pop {r4, r5,r6,r7,r8,r9,r10,r11}
-
 mov pc, lr
 
 #ifdef CONFIG_EARLY_PRINTK
diff --git a/xen/arch/arm/arm64/head.S b/xen/arch/arm/arm64/head.S
index ef87b5c254..0b7f6e7f92 100644
--- a/xen/arch/arm/arm64/head.S
+++ b/xen/arch/arm/arm64/head.S
@@ -609,52 +609,14 @@ fail:   PRINT("- Boot failed -\r\n")
 
 GLOBAL(_end_boot)
 
-/* Copy Xen to new location and switch TTBR
- * x0ttbr
- * x1source address
- * x2destination address
- * x3length
+/*
+ * Switch TTBR
  *
- * Source and destination must be word aligned, length is rounded up
- * to a 16 byte boundary.
+ * x0ttbr
  *
- * MUST BE VERY CAREFUL when saving things to RAM over the copy */
-ENTRY(relocate_xen)
-/* Copy 16 bytes at a time using:
- *   x9: counter
- *   x10: data
- *   x11: data
- *   x12: source
- *   x13: destination
- */
-mov x9,