On Thu, 2013-06-20 at 20:28 +0200, Joerg Roedel wrote:
> On Mon, Jun 17, 2013 at 07:52:14PM -0600, Alex Williamson wrote:
> >  static void free_pagetable(struct protection_domain *domain)
> >  {
> > -   int i, j;
> > -   u64 *p1, *p2, *p3;
> > +   int i, j, k, l, m, depth = domain->mode;
> > +   u64 *p1, *p2, *p3, *p4, *p5, *p6;
> >  
> >     p1 = domain->pt_root;
> >  
> >     if (!p1)
> >             return;
> >  
> > -   for (i = 0; i < 512; ++i) {
> > +   for (i = 0; depth > 1 && i < 512; ++i) {
> >             if (!IOMMU_PTE_PRESENT(p1[i]))
> >                     continue;
> >  
> >             p2 = IOMMU_PTE_PAGE(p1[i]);
> > -           for (j = 0; j < 512; ++j) {
> > +           for (j = 0; depth > 2 && j < 512; ++j) {
> >                     if (!IOMMU_PTE_PRESENT(p2[j]))
> >                             continue;
> > +
> >                     p3 = IOMMU_PTE_PAGE(p2[j]);
> > +                   for (k = 0; depth > 3 && k < 512; ++k) {
> > +                           if (!IOMMU_PTE_PRESENT(p3[k]))
> > +                                   continue;
> > +
> > +                           p4 = IOMMU_PTE_PAGE(p3[k]);
> > +                           for (l = 0; depth > 4 && l < 512; ++l) {
> > +                                   if (!IOMMU_PTE_PRESENT(p4[l]))
> > +                                           continue;
> > +
> > +                                   p5 = IOMMU_PTE_PAGE(p4[l]);
> > +                                   for (m = 0; depth > 5 && m < 512; ++m) {
> > +                                           if (!IOMMU_PTE_PRESENT(p5[m]))
> > +                                                   continue;
> > +                                           p6 = IOMMU_PTE_PAGE(p5[m]);
> > +                                           free_page((unsigned long)p6);
> > +                                   }
> > +
> > +                                   free_page((unsigned long)p5);
> > +                           }
> > +
> > +                           free_page((unsigned long)p4);
> > +                   }
> > +
> >                     free_page((unsigned long)p3);
> >             }
> 
> Hmm, actually a recursive version would make more sense here. But since
> recursion is a bad idea in the kernel, how about this approach instead:

It's a fixed maximum depth of recursion though, is it really that taboo?

> From d500d538ad1370679d05667663dcaf8603d529db Mon Sep 17 00:00:00 2001
> From: Joerg Roedel <j...@8bytes.org>
> Date: Thu, 20 Jun 2013 20:22:58 +0200
> Subject: [PATCH] iommu/amd: Fix memory leak in free_pagetable
> 
> The IOMMU pagetables can have up to 3 levels, but the code

s/3/6/

> in free_pagetable() only releases the first 3 levels. Fix
> this leak by releasing all levels.
> 
> Reported-by: Alex Williamson <alex.william...@redhat.com>
> Signed-off-by: Joerg Roedel <j...@8bytes.org>
> ---
>  drivers/iommu/amd_iommu.c |   73 
> ++++++++++++++++++++++++++++++---------------
>  1 file changed, 49 insertions(+), 24 deletions(-)
> 
> diff --git a/drivers/iommu/amd_iommu.c b/drivers/iommu/amd_iommu.c
> index 21d02b0..5cde682 100644
> --- a/drivers/iommu/amd_iommu.c
> +++ b/drivers/iommu/amd_iommu.c
> @@ -1893,34 +1893,59 @@ static void domain_id_free(int id)
>       write_unlock_irqrestore(&amd_iommu_devtable_lock, flags);
>  }
>  
> +#define DEFINE_FREE_PT_FN(LVL, FN)                           \
> +static void free_pt_##LVL (unsigned long __pt)                       \
> +{                                                            \
> +     unsigned long p;                                        \
> +     u64 *pt;                                                \
> +     int i;                                                  \
> +                                                             \
> +     pt = (u64 *)__pt;                                       \
> +                                                             \
> +     for (i = 0; i < 512; ++i) {                             \
> +             if (!IOMMU_PTE_PRESENT(pt[i]))                  \
> +                     continue;                               \
> +                                                             \
> +             p = (unsigned long)IOMMU_PTE_PAGE(pt[i]);       \
> +             FN(p);                                          \
> +     }                                                       \
> +     free_page((unsigned long)pt);                           \
> +}
> +
> +DEFINE_FREE_PT_FN(l2, free_page)
> +DEFINE_FREE_PT_FN(l3, free_pt_l2)
> +DEFINE_FREE_PT_FN(l4, free_pt_l3)
> +DEFINE_FREE_PT_FN(l5, free_pt_l4)
> +DEFINE_FREE_PT_FN(l6, free_pt_l5)
> +
>  static void free_pagetable(struct protection_domain *domain)
>  {
> -     int i, j;
> -     u64 *p1, *p2, *p3;
> -
> -     p1 = domain->pt_root;
> -
> -     if (!p1)
> -             return;
> -
> -     for (i = 0; i < 512; ++i) {
> -             if (!IOMMU_PTE_PRESENT(p1[i]))
> -                     continue;
> +     unsigned long root = (unsigned long)domain->pt_root;
>  
> -             p2 = IOMMU_PTE_PAGE(p1[i]);
> -             for (j = 0; j < 512; ++j) {
> -                     if (!IOMMU_PTE_PRESENT(p2[j]))
> -                             continue;
> -                     p3 = IOMMU_PTE_PAGE(p2[j]);
> -                     free_page((unsigned long)p3);
> -             }
> -
> -             free_page((unsigned long)p2);
> +     switch (domain->mode) {
> +     case PAGE_MODE_NONE:
> +             break;
> +     case PAGE_MODE_1_LEVEL:
> +             free_page(root);
> +             break;
> +     case PAGE_MODE_2_LEVEL:
> +             free_pt_l2(root);
> +             break;
> +     case PAGE_MODE_3_LEVEL:
> +             free_pt_l3(root);
> +             break;
> +     case PAGE_MODE_4_LEVEL:
> +             free_pt_l4(root);
> +             break;
> +     case PAGE_MODE_5_LEVEL:
> +             free_pt_l5(root);
> +             break;
> +     case PAGE_MODE_6_LEVEL:
> +             free_pt_l6(root);
> +             break;
> +     default:
> +             BUG();
>       }
> -
> -     free_page((unsigned long)p1);
> -
> -     domain->pt_root = NULL;
>  }
>  
>  static void free_gcr3_tbl_level1(u64 *tbl)

Seems like it should do the right thing

Reviewed-by: Alex Williamson <alex.william...@redhat.com>

--
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Please read the FAQ at  http://www.tux.org/lkml/

Reply via email to