Re: [PATCH 2/2] arm64: Reduce PT size estimation complexity

2023-03-07 Thread Tom Rini
On Tue, Feb 14, 2023 at 09:38:14PM +0800, Ying-Chun Liu (PaulLiu) wrote:

> From: Marc Zyngier 
> 
> count_required_pts()'s complexity is high if mappings are not using the
> largest possible block size (due to some other requirement such as tracking
> dirty pages, for example).
> 
> Let's switch to a method that follows the pattern established with
> the add_map() helper, and make it almost instantaneous instead of
> taking a large amount of time if 2MB mappings are in use instead of
> 1GB.
> 
> Signed-off-by: Marc Zyngier 
> Signed-off-by: Pierre-Clément Tosi 
> [ Paul: pick from the Android tree. Fixup Pierre's commit. Rebase to the
>   upstream ]
> Signed-off-by: Ying-Chun Liu (PaulLiu) 
> Cc: Tom Rini 
> Link: 
> https://android.googlesource.com/platform/external/u-boot/+/5d756d147e31a1cdaaa261a50e526404ca5968f5
> Link: 
> https://android.googlesource.com/platform/external/u-boot/+/6be9330601d81545c7c941e3609f35bf68a09059

Applied to u-boot/next, thanks!

-- 
Tom


signature.asc
Description: PGP signature


[PATCH 2/2] arm64: Reduce PT size estimation complexity

2023-02-14 Thread Ying-Chun Liu (PaulLiu)
From: Marc Zyngier 

count_required_pts()'s complexity is high if mappings are not using the
largest possible block size (due to some other requirement such as tracking
dirty pages, for example).

Let's switch to a method that follows the pattern established with
the add_map() helper, and make it almost instantaneous instead of
taking a large amount of time if 2MB mappings are in use instead of
1GB.

Signed-off-by: Marc Zyngier 
Signed-off-by: Pierre-Clément Tosi 
[ Paul: pick from the Android tree. Fixup Pierre's commit. Rebase to the
  upstream ]
Signed-off-by: Ying-Chun Liu (PaulLiu) 
Cc: Tom Rini 
Link: 
https://android.googlesource.com/platform/external/u-boot/+/5d756d147e31a1cdaaa261a50e526404ca5968f5
Link: 
https://android.googlesource.com/platform/external/u-boot/+/6be9330601d81545c7c941e3609f35bf68a09059
---
 arch/arm/cpu/armv8/cache_v8.c | 109 +++---
 1 file changed, 34 insertions(+), 75 deletions(-)

diff --git a/arch/arm/cpu/armv8/cache_v8.c b/arch/arm/cpu/armv8/cache_v8.c
index 876344e1b4..697334086f 100644
--- a/arch/arm/cpu/armv8/cache_v8.c
+++ b/arch/arm/cpu/armv8/cache_v8.c
@@ -352,98 +352,57 @@ static void add_map(struct mm_region *map)
  (u64 *)gd->arch.tlb_addr, attrs);
 }
 
-enum pte_type {
-   PTE_INVAL,
-   PTE_BLOCK,
-   PTE_LEVEL,
-};
-
-/*
- * This is a recursively called function to count the number of
- * page tables we need to cover a particular PTE range. If you
- * call this with level = -1 you basically get the full 48 bit
- * coverage.
- */
-static int count_required_pts(u64 addr, int level, u64 maxaddr)
+static void count_range(u64 virt, u64 size, int level, int *cntp)
 {
-   int levelshift = level2shift(level);
-   u64 levelsize = 1ULL << levelshift;
-   u64 levelmask = levelsize - 1;
-   u64 levelend = addr + levelsize;
-   int r = 0;
-   int i;
-   enum pte_type pte_type = PTE_INVAL;
+   u64 map_size = BIT_ULL(level2shift(level));
+   int i, idx;
 
-   for (i = 0; mem_map[i].size || mem_map[i].attrs; i++) {
-   struct mm_region *map = _map[i];
-   u64 start = map->virt;
-   u64 end = start + map->size;
+   idx = (virt >> level2shift(level)) & (MAX_PTE_ENTRIES - 1);
+   for (i = idx; size; i++) {
+   u64 next_size;
 
-   /* Check if the PTE would overlap with the map */
-   if (max(addr, start) <= min(levelend, end)) {
-   start = max(addr, start);
-   end = min(levelend, end);
+   if (level >= 1 &&
+   size >= map_size && !(virt & (map_size - 1))) {
+   virt += map_size;
+   size -= map_size;
 
-   /* We need a sub-pt for this level */
-   if ((start & levelmask) || (end & levelmask)) {
-   pte_type = PTE_LEVEL;
-   break;
-   }
+   continue;
+   }
 
-   /* Lv0 can not do block PTEs, so do levels here too */
-   if (level <= 0) {
-   pte_type = PTE_LEVEL;
-   break;
-   }
+   /* Going one level down */
+   (*cntp)++;
+   next_size = min(map_size - (virt & (map_size - 1)), size);
 
-   /* PTE is active, but fits into a block */
-   pte_type = PTE_BLOCK;
-   }
-   }
+   count_range(virt, next_size, level + 1, cntp);
 
-   /*
-* Block PTEs at this level are already covered by the parent page
-* table, so we only need to count sub page tables.
-*/
-   if (pte_type == PTE_LEVEL) {
-   int sublevel = level + 1;
-   u64 sublevelsize = 1ULL << level2shift(sublevel);
-
-   /* Account for the new sub page table ... */
-   r = 1;
-
-   /* ... and for all child page tables that one might have */
-   for (i = 0; i < MAX_PTE_ENTRIES; i++) {
-   r += count_required_pts(addr, sublevel, maxaddr);
-   addr += sublevelsize;
-
-   if (addr >= maxaddr) {
-   /*
-* We reached the end of address space, no need
-* to look any further.
-*/
-   break;
-   }
-   }
+   virt += next_size;
+   size -= next_size;
}
-
-   return r;
 }
 
-/* Returns the estimated required size of all page tables */
-__weak u64 get_page_table_size(void)
+static int count_ranges(void)
 {
-   u64 one_pt = MAX_PTE_ENTRIES * sizeof(u64);
-   u64 size = 0;
+   int i, count = 0, level = 0;