Hi Anshuman,

On Wed, Oct 09, 2019 at 01:51:48PM +0530, Anshuman Khandual wrote:
> +static void unmap_hotplug_pmd_range(pud_t *pudp, unsigned long addr,
> +                                 unsigned long end, bool free_mapped)
> +{
> +     unsigned long next;
> +     pmd_t *pmdp, pmd;
> +
> +     do {
> +             next = pmd_addr_end(addr, end);
> +             pmdp = pmd_offset(pudp, addr);
> +             pmd = READ_ONCE(*pmdp);
> +             if (pmd_none(pmd))
> +                     continue;
> +
> +             WARN_ON(!pmd_present(pmd));
> +             if (pmd_sect(pmd)) {
> +                     pmd_clear(pmdp);
> +                     flush_tlb_kernel_range(addr, next);

The range here could be a whole PMD_SIZE. Since we are invalidating a
single block entry, one TLBI should be sufficient:

                        flush_tlb_kernel_range(addr, addr + PAGE_SIZE);

> +                     if (free_mapped)
> +                             free_hotplug_page_range(pmd_page(pmd),
> +                                                     PMD_SIZE);
> +                     continue;
> +             }
> +             WARN_ON(!pmd_table(pmd));
> +             unmap_hotplug_pte_range(pmdp, addr, next, free_mapped);
> +     } while (addr = next, addr < end);
> +}
> +
> +static void unmap_hotplug_pud_range(pgd_t *pgdp, unsigned long addr,
> +                                 unsigned long end, bool free_mapped)
> +{
> +     unsigned long next;
> +     pud_t *pudp, pud;
> +
> +     do {
> +             next = pud_addr_end(addr, end);
> +             pudp = pud_offset(pgdp, addr);
> +             pud = READ_ONCE(*pudp);
> +             if (pud_none(pud))
> +                     continue;
> +
> +             WARN_ON(!pud_present(pud));
> +             if (pud_sect(pud)) {
> +                     pud_clear(pudp);
> +                     flush_tlb_kernel_range(addr, next);
                        ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
                        flush_tlb_kernel_range(addr, addr + PAGE_SIZE);

[...]
> +static void free_empty_pte_table(pmd_t *pmdp, unsigned long addr,
> +                              unsigned long end, unsigned long floor,
> +                              unsigned long ceiling)
> +{
> +     pte_t *ptep, pte;
> +     unsigned long i, start = addr;
> +
> +     do {
> +             ptep = pte_offset_kernel(pmdp, addr);
> +             pte = READ_ONCE(*ptep);
> +             WARN_ON(!pte_none(pte));
> +     } while (addr += PAGE_SIZE, addr < end);

So this loop is just a sanity check (pte clearing having been done by
the unmap loops). That's fine, maybe a comment for future reference.

> +
> +     if (!pgtable_range_aligned(start, end, floor, ceiling, PMD_MASK))
> +             return;
> +
> +     ptep = pte_offset_kernel(pmdp, 0UL);
> +     for (i = 0; i < PTRS_PER_PTE; i++) {
> +             if (!pte_none(READ_ONCE(ptep[i])))
> +                     return;
> +     }

We could do with a comment for this loop along the lines of:

        Check whether we can free the pte page if the rest of the
        entries are empty. Overlap with other regions have been handled
        by the floor/ceiling check.

Apart from the comments above, the rest of the patch looks fine. Once
fixed:

Reviewed-by: Catalin Marinas <catalin.mari...@arm.com>

Mark Rutland mentioned at some point that, as a preparatory patch to
this series, we'd need to make sure we don't hot-remove memory already
given to the kernel at boot. Any plans here?

Thanks.

-- 
Catalin

Reply via email to