Re: [Intel-gfx] [PATCH v2 1/6] mm: introduce vma->vm_flags modifier functions

2023-02-02 Thread Mike Rapoport
On Thu, Jan 26, 2023 at 11:17:09AM +0200, Mike Rapoport wrote:
> On Wed, Jan 25, 2023 at 12:38:46AM -0800, Suren Baghdasaryan wrote:
> > vm_flags are among VMA attributes which affect decisions like VMA merging
> > and splitting. Therefore all vm_flags modifications are performed after
> > taking exclusive mmap_lock to prevent vm_flags updates racing with such
> > operations. Introduce modifier functions for vm_flags to be used whenever
> > flags are updated. This way we can better check and control correct
> > locking behavior during these updates.
> > 
> > Signed-off-by: Suren Baghdasaryan 
> > ---
> >  include/linux/mm.h   | 37 +
> >  include/linux/mm_types.h |  8 +++-
> >  2 files changed, 44 insertions(+), 1 deletion(-)
> > 
> > diff --git a/include/linux/mm.h b/include/linux/mm.h
> > index c2f62bdce134..b71f2809caac 100644
> > --- a/include/linux/mm.h
> > +++ b/include/linux/mm.h
> > @@ -627,6 +627,43 @@ static inline void vma_init(struct vm_area_struct 
> > *vma, struct mm_struct *mm)
> > INIT_LIST_HEAD(>anon_vma_chain);
> >  }
> >  
> > +/* Use when VMA is not part of the VMA tree and needs no locking */
> > +static inline void init_vm_flags(struct vm_area_struct *vma,
> > +unsigned long flags)
> 
> I'd suggest to make it vm_flags_init() etc.

Thinking more about it, it will be even clearer to name these vma_flags_xyz()

> Except that
> 
> Acked-by: Mike Rapoport (IBM) 
> 

--
Sincerely yours,
Mike.


Re: [Intel-gfx] [PATCH v2 4/6] mm: replace vma->vm_flags indirect modification in ksm_madvise

2023-02-02 Thread Mike Rapoport
On Wed, Jan 25, 2023 at 12:38:49AM -0800, Suren Baghdasaryan wrote:
> Replace indirect modifications to vma->vm_flags with calls to modifier
> functions to be able to track flag changes and to keep vma locking
> correctness. Add a BUG_ON check in ksm_madvise() to catch indirect
> vm_flags modification attempts.
> 
> Signed-off-by: Suren Baghdasaryan 

Acked-by: Mike Rapoport (IBM) 

> ---
>  arch/powerpc/kvm/book3s_hv_uvmem.c | 5 -
>  arch/s390/mm/gmap.c| 5 -
>  mm/khugepaged.c| 2 ++
>  mm/ksm.c   | 2 ++
>  4 files changed, 12 insertions(+), 2 deletions(-)
> 
> diff --git a/arch/powerpc/kvm/book3s_hv_uvmem.c 
> b/arch/powerpc/kvm/book3s_hv_uvmem.c
> index 1d67baa5557a..325a7a47d348 100644
> --- a/arch/powerpc/kvm/book3s_hv_uvmem.c
> +++ b/arch/powerpc/kvm/book3s_hv_uvmem.c
> @@ -393,6 +393,7 @@ static int kvmppc_memslot_page_merge(struct kvm *kvm,
>  {
>   unsigned long gfn = memslot->base_gfn;
>   unsigned long end, start = gfn_to_hva(kvm, gfn);
> + unsigned long vm_flags;
>   int ret = 0;
>   struct vm_area_struct *vma;
>   int merge_flag = (merge) ? MADV_MERGEABLE : MADV_UNMERGEABLE;
> @@ -409,12 +410,14 @@ static int kvmppc_memslot_page_merge(struct kvm *kvm,
>   ret = H_STATE;
>   break;
>   }
> + vm_flags = vma->vm_flags;
>   ret = ksm_madvise(vma, vma->vm_start, vma->vm_end,
> -   merge_flag, >vm_flags);
> +   merge_flag, _flags);
>   if (ret) {
>   ret = H_STATE;
>   break;
>   }
> + reset_vm_flags(vma, vm_flags);
>   start = vma->vm_end;
>   } while (end > vma->vm_end);
>  
> diff --git a/arch/s390/mm/gmap.c b/arch/s390/mm/gmap.c
> index 3a695b8a1e3c..d5eb47dcdacb 100644
> --- a/arch/s390/mm/gmap.c
> +++ b/arch/s390/mm/gmap.c
> @@ -2587,14 +2587,17 @@ int gmap_mark_unmergeable(void)
>  {
>   struct mm_struct *mm = current->mm;
>   struct vm_area_struct *vma;
> + unsigned long vm_flags;
>   int ret;
>   VMA_ITERATOR(vmi, mm, 0);
>  
>   for_each_vma(vmi, vma) {
> + vm_flags = vma->vm_flags;
>   ret = ksm_madvise(vma, vma->vm_start, vma->vm_end,
> -   MADV_UNMERGEABLE, >vm_flags);
> +   MADV_UNMERGEABLE, _flags);
>   if (ret)
>   return ret;
> + reset_vm_flags(vma, vm_flags);
>   }
>   mm->def_flags &= ~VM_MERGEABLE;
>   return 0;
> diff --git a/mm/khugepaged.c b/mm/khugepaged.c
> index 8abc59345bf2..76b24cd0c179 100644
> --- a/mm/khugepaged.c
> +++ b/mm/khugepaged.c
> @@ -354,6 +354,8 @@ struct attribute_group khugepaged_attr_group = {
>  int hugepage_madvise(struct vm_area_struct *vma,
>unsigned long *vm_flags, int advice)
>  {
> + /* vma->vm_flags can be changed only using modifier functions */
> + BUG_ON(vm_flags == >vm_flags);
>   switch (advice) {
>   case MADV_HUGEPAGE:
>  #ifdef CONFIG_S390
> diff --git a/mm/ksm.c b/mm/ksm.c
> index 04f1c8c2df11..992b2be9f5e6 100644
> --- a/mm/ksm.c
> +++ b/mm/ksm.c
> @@ -2573,6 +2573,8 @@ int ksm_madvise(struct vm_area_struct *vma, unsigned 
> long start,
>   struct mm_struct *mm = vma->vm_mm;
>   int err;
>  
> + /* vma->vm_flags can be changed only using modifier functions */
> + BUG_ON(vm_flags == >vm_flags);
>   switch (advice) {
>   case MADV_MERGEABLE:
>   /*
> -- 
> 2.39.1
> 
> 


Re: [Intel-gfx] [PATCH v2 6/6] mm: export dump_mm()

2023-02-02 Thread Mike Rapoport
On Wed, Jan 25, 2023 at 12:38:51AM -0800, Suren Baghdasaryan wrote:
> mmap_assert_write_locked() is used in vm_flags modifiers. Because
> mmap_assert_write_locked() uses dump_mm() and vm_flags are sometimes
> modified from from inside a module, it's necessary to export
> dump_mm() function.
> 
> Signed-off-by: Suren Baghdasaryan 

Acked-by: Mike Rapoport (IBM) 

> ---
>  mm/debug.c | 1 +
>  1 file changed, 1 insertion(+)
> 
> diff --git a/mm/debug.c b/mm/debug.c
> index 9d3d893dc7f4..96d594e16292 100644
> --- a/mm/debug.c
> +++ b/mm/debug.c
> @@ -215,6 +215,7 @@ void dump_mm(const struct mm_struct *mm)
>   mm->def_flags, >def_flags
>   );
>  }
> +EXPORT_SYMBOL(dump_mm);
>  
>  static bool page_init_poisoning __read_mostly = true;
>  
> -- 
> 2.39.1
> 


Re: [Intel-gfx] [PATCH v2 2/6] mm: replace VM_LOCKED_CLEAR_MASK with VM_LOCKED_MASK

2023-02-02 Thread Mike Rapoport
On Wed, Jan 25, 2023 at 12:38:47AM -0800, Suren Baghdasaryan wrote:
> To simplify the usage of VM_LOCKED_CLEAR_MASK in clear_vm_flags(),
> replace it with VM_LOCKED_MASK bitmask and convert all users.
> 
> Signed-off-by: Suren Baghdasaryan 

Acked-by: Mike Rapoport (IBM) 

> ---
>  include/linux/mm.h | 4 ++--
>  kernel/fork.c  | 2 +-
>  mm/hugetlb.c   | 4 ++--
>  mm/mlock.c | 6 +++---
>  mm/mmap.c  | 6 +++---
>  mm/mremap.c| 2 +-
>  6 files changed, 12 insertions(+), 12 deletions(-)
> 
> diff --git a/include/linux/mm.h b/include/linux/mm.h
> index b71f2809caac..da62bdd627bf 100644
> --- a/include/linux/mm.h
> +++ b/include/linux/mm.h
> @@ -421,8 +421,8 @@ extern unsigned int kobjsize(const void *objp);
>  /* This mask defines which mm->def_flags a process can inherit its parent */
>  #define VM_INIT_DEF_MASK VM_NOHUGEPAGE
>  
> -/* This mask is used to clear all the VMA flags used by mlock */
> -#define VM_LOCKED_CLEAR_MASK (~(VM_LOCKED | VM_LOCKONFAULT))
> +/* This mask represents all the VMA flag bits used by mlock */
> +#define VM_LOCKED_MASK   (VM_LOCKED | VM_LOCKONFAULT)
>  
>  /* Arch-specific flags to clear when updating VM flags on protection change 
> */
>  #ifndef VM_ARCH_CLEAR
> diff --git a/kernel/fork.c b/kernel/fork.c
> index 6683c1b0f460..03d472051236 100644
> --- a/kernel/fork.c
> +++ b/kernel/fork.c
> @@ -669,7 +669,7 @@ static __latent_entropy int dup_mmap(struct mm_struct *mm,
>   tmp->anon_vma = NULL;
>   } else if (anon_vma_fork(tmp, mpnt))
>   goto fail_nomem_anon_vma_fork;
> - tmp->vm_flags &= ~(VM_LOCKED | VM_LOCKONFAULT);
> + clear_vm_flags(tmp, VM_LOCKED_MASK);
>   file = tmp->vm_file;
>   if (file) {
>   struct address_space *mapping = file->f_mapping;
> diff --git a/mm/hugetlb.c b/mm/hugetlb.c
> index d20c8b09890e..4ecdbad9a451 100644
> --- a/mm/hugetlb.c
> +++ b/mm/hugetlb.c
> @@ -6973,8 +6973,8 @@ static unsigned long page_table_shareable(struct 
> vm_area_struct *svma,
>   unsigned long s_end = sbase + PUD_SIZE;
>  
>   /* Allow segments to share if only one is marked locked */
> - unsigned long vm_flags = vma->vm_flags & VM_LOCKED_CLEAR_MASK;
> - unsigned long svm_flags = svma->vm_flags & VM_LOCKED_CLEAR_MASK;
> + unsigned long vm_flags = vma->vm_flags & ~VM_LOCKED_MASK;
> + unsigned long svm_flags = svma->vm_flags & ~VM_LOCKED_MASK;
>  
>   /*
>* match the virtual addresses, permission and the alignment of the
> diff --git a/mm/mlock.c b/mm/mlock.c
> index 0336f52e03d7..5c4fff93cd6b 100644
> --- a/mm/mlock.c
> +++ b/mm/mlock.c
> @@ -497,7 +497,7 @@ static int apply_vma_lock_flags(unsigned long start, 
> size_t len,
>   if (vma->vm_start != tmp)
>   return -ENOMEM;
>  
> - newflags = vma->vm_flags & VM_LOCKED_CLEAR_MASK;
> + newflags = vma->vm_flags & ~VM_LOCKED_MASK;
>   newflags |= flags;
>   /* Here we know that  vma->vm_start <= nstart < vma->vm_end. */
>   tmp = vma->vm_end;
> @@ -661,7 +661,7 @@ static int apply_mlockall_flags(int flags)
>   struct vm_area_struct *vma, *prev = NULL;
>   vm_flags_t to_add = 0;
>  
> - current->mm->def_flags &= VM_LOCKED_CLEAR_MASK;
> + current->mm->def_flags &= ~VM_LOCKED_MASK;
>   if (flags & MCL_FUTURE) {
>   current->mm->def_flags |= VM_LOCKED;
>  
> @@ -681,7 +681,7 @@ static int apply_mlockall_flags(int flags)
>   for_each_vma(vmi, vma) {
>   vm_flags_t newflags;
>  
> - newflags = vma->vm_flags & VM_LOCKED_CLEAR_MASK;
> + newflags = vma->vm_flags & ~VM_LOCKED_MASK;
>   newflags |= to_add;
>  
>   /* Ignore errors */
> diff --git a/mm/mmap.c b/mm/mmap.c
> index d4abc6feced1..323bd253b25a 100644
> --- a/mm/mmap.c
> +++ b/mm/mmap.c
> @@ -2671,7 +2671,7 @@ unsigned long mmap_region(struct file *file, unsigned 
> long addr,
>   if ((vm_flags & VM_SPECIAL) || vma_is_dax(vma) ||
>   is_vm_hugetlb_page(vma) ||
>   vma == get_gate_vma(current->mm))
> - vma->vm_flags &= VM_LOCKED_CLEAR_MASK;
> + clear_vm_flags(vma, VM_LOCKED_MASK);
>   else
>   mm->locked_vm += (len >> PAGE_SHIFT);
>   }
> @@ -3340,8 +3340,8 @@ static struct vm_area_struct *__install_sp

Re: [Intel-gfx] [PATCH v2 1/6] mm: introduce vma->vm_flags modifier functions

2023-02-02 Thread Mike Rapoport
On Wed, Jan 25, 2023 at 12:38:46AM -0800, Suren Baghdasaryan wrote:
> vm_flags are among VMA attributes which affect decisions like VMA merging
> and splitting. Therefore all vm_flags modifications are performed after
> taking exclusive mmap_lock to prevent vm_flags updates racing with such
> operations. Introduce modifier functions for vm_flags to be used whenever
> flags are updated. This way we can better check and control correct
> locking behavior during these updates.
> 
> Signed-off-by: Suren Baghdasaryan 
> ---
>  include/linux/mm.h   | 37 +
>  include/linux/mm_types.h |  8 +++-
>  2 files changed, 44 insertions(+), 1 deletion(-)
> 
> diff --git a/include/linux/mm.h b/include/linux/mm.h
> index c2f62bdce134..b71f2809caac 100644
> --- a/include/linux/mm.h
> +++ b/include/linux/mm.h
> @@ -627,6 +627,43 @@ static inline void vma_init(struct vm_area_struct *vma, 
> struct mm_struct *mm)
>   INIT_LIST_HEAD(>anon_vma_chain);
>  }
>  
> +/* Use when VMA is not part of the VMA tree and needs no locking */
> +static inline void init_vm_flags(struct vm_area_struct *vma,
> +  unsigned long flags)

I'd suggest to make it vm_flags_init() etc.
Except that

Acked-by: Mike Rapoport (IBM) 

> +{
> + vma->vm_flags = flags;
> +}
> +
> +/* Use when VMA is part of the VMA tree and modifications need coordination 
> */
> +static inline void reset_vm_flags(struct vm_area_struct *vma,
> +   unsigned long flags)
> +{
> + mmap_assert_write_locked(vma->vm_mm);
> + init_vm_flags(vma, flags);
> +}
> +
> +static inline void set_vm_flags(struct vm_area_struct *vma,
> + unsigned long flags)
> +{
> + mmap_assert_write_locked(vma->vm_mm);
> + vma->vm_flags |= flags;
> +}
> +
> +static inline void clear_vm_flags(struct vm_area_struct *vma,
> +   unsigned long flags)
> +{
> + mmap_assert_write_locked(vma->vm_mm);
> + vma->vm_flags &= ~flags;
> +}
> +
> +static inline void mod_vm_flags(struct vm_area_struct *vma,
> + unsigned long set, unsigned long clear)
> +{
> + mmap_assert_write_locked(vma->vm_mm);
> + vma->vm_flags |= set;
> + vma->vm_flags &= ~clear;
> +}
> +
>  static inline void vma_set_anonymous(struct vm_area_struct *vma)
>  {
>   vma->vm_ops = NULL;
> diff --git a/include/linux/mm_types.h b/include/linux/mm_types.h
> index 2d6d790d9bed..6c7c70bf50dd 100644
> --- a/include/linux/mm_types.h
> +++ b/include/linux/mm_types.h
> @@ -491,7 +491,13 @@ struct vm_area_struct {
>* See vmf_insert_mixed_prot() for discussion.
>*/
>   pgprot_t vm_page_prot;
> - unsigned long vm_flags; /* Flags, see mm.h. */
> +
> + /*
> +  * Flags, see mm.h.
> +  * WARNING! Do not modify directly.
> +  * Use {init|reset|set|clear|mod}_vm_flags() functions instead.
> +  */
> + unsigned long vm_flags;
>  
>   /*
>* For areas with an address space and backing store,
> -- 
> 2.39.1
> 
> 


Re: [Intel-gfx] [PATCH v2 5/6] mm: introduce mod_vm_flags_nolock and use it in untrack_pfn

2023-02-02 Thread Mike Rapoport
On Wed, Jan 25, 2023 at 12:38:50AM -0800, Suren Baghdasaryan wrote:
> In cases when VMA flags are modified after VMA was isolated and mmap_lock
> was downgraded, flags modifications would result in an assertion because
> mmap write lock is not held.
> Introduce mod_vm_flags_nolock to be used in such situation.

vm_flags_mod_nolock?

> Pass a hint to untrack_pfn to conditionally use mod_vm_flags_nolock for
> flags modification and to avoid assertion.
> 
> Signed-off-by: Suren Baghdasaryan 
> ---
>  arch/x86/mm/pat/memtype.c | 10 +++---
>  include/linux/mm.h| 12 +---
>  include/linux/pgtable.h   |  5 +++--
>  mm/memory.c   | 13 +++--
>  mm/memremap.c |  4 ++--
>  mm/mmap.c | 16 ++--
>  6 files changed, 38 insertions(+), 22 deletions(-)
> 
> diff --git a/arch/x86/mm/pat/memtype.c b/arch/x86/mm/pat/memtype.c
> index ae9645c900fa..d8adc0b42cf2 100644
> --- a/arch/x86/mm/pat/memtype.c
> +++ b/arch/x86/mm/pat/memtype.c
> @@ -1046,7 +1046,7 @@ void track_pfn_insert(struct vm_area_struct *vma, 
> pgprot_t *prot, pfn_t pfn)
>   * can be for the entire vma (in which case pfn, size are zero).
>   */
>  void untrack_pfn(struct vm_area_struct *vma, unsigned long pfn,
> -  unsigned long size)
> +  unsigned long size, bool mm_wr_locked)
>  {
>   resource_size_t paddr;
>   unsigned long prot;
> @@ -1065,8 +1065,12 @@ void untrack_pfn(struct vm_area_struct *vma, unsigned 
> long pfn,
>   size = vma->vm_end - vma->vm_start;
>   }
>   free_pfn_range(paddr, size);
> - if (vma)
> - clear_vm_flags(vma, VM_PAT);
> + if (vma) {
> + if (mm_wr_locked)
> + clear_vm_flags(vma, VM_PAT);
> + else
> + mod_vm_flags_nolock(vma, 0, VM_PAT);
> + }
>  }
>  
>  /*
> diff --git a/include/linux/mm.h b/include/linux/mm.h
> index 55335edd1373..48d49930c411 100644
> --- a/include/linux/mm.h
> +++ b/include/linux/mm.h
> @@ -656,12 +656,18 @@ static inline void clear_vm_flags(struct vm_area_struct 
> *vma,
>   vma->vm_flags &= ~flags;
>  }
>  
> +static inline void mod_vm_flags_nolock(struct vm_area_struct *vma,
> +unsigned long set, unsigned long clear)
> +{
> + vma->vm_flags |= set;
> + vma->vm_flags &= ~clear;
> +}
> +
>  static inline void mod_vm_flags(struct vm_area_struct *vma,
>   unsigned long set, unsigned long clear)
>  {
>   mmap_assert_write_locked(vma->vm_mm);
> - vma->vm_flags |= set;
> - vma->vm_flags &= ~clear;
> + mod_vm_flags_nolock(vma, set, clear);
>  }
>  
>  static inline void vma_set_anonymous(struct vm_area_struct *vma)
> @@ -2087,7 +2093,7 @@ static inline void zap_vma_pages(struct vm_area_struct 
> *vma)
>  }
>  void unmap_vmas(struct mmu_gather *tlb, struct maple_tree *mt,
>   struct vm_area_struct *start_vma, unsigned long start,
> - unsigned long end);
> + unsigned long end, bool mm_wr_locked);
>  
>  struct mmu_notifier_range;
>  
> diff --git a/include/linux/pgtable.h b/include/linux/pgtable.h
> index 5fd45454c073..c63cd44777ec 100644
> --- a/include/linux/pgtable.h
> +++ b/include/linux/pgtable.h
> @@ -1185,7 +1185,8 @@ static inline int track_pfn_copy(struct vm_area_struct 
> *vma)
>   * can be for the entire vma (in which case pfn, size are zero).
>   */
>  static inline void untrack_pfn(struct vm_area_struct *vma,
> -unsigned long pfn, unsigned long size)
> +unsigned long pfn, unsigned long size,
> +bool mm_wr_locked)
>  {
>  }
>  
> @@ -1203,7 +1204,7 @@ extern void track_pfn_insert(struct vm_area_struct 
> *vma, pgprot_t *prot,
>pfn_t pfn);
>  extern int track_pfn_copy(struct vm_area_struct *vma);
>  extern void untrack_pfn(struct vm_area_struct *vma, unsigned long pfn,
> - unsigned long size);
> + unsigned long size, bool mm_wr_locked);
>  extern void untrack_pfn_moved(struct vm_area_struct *vma);
>  #endif
>  
> diff --git a/mm/memory.c b/mm/memory.c
> index d6902065e558..5b11b50e2c4a 100644
> --- a/mm/memory.c
> +++ b/mm/memory.c
> @@ -1613,7 +1613,7 @@ void unmap_page_range(struct mmu_gather *tlb,
>  static void unmap_single_vma(struct mmu_gather *tlb,
>   struct vm_area_struct *vma, unsigned long start_addr,
>   unsigned long end_addr,
> - struct zap_details *details)
> + struct zap_details *details, bool mm_wr_locked)
>  {
>   unsigned long start = max(vma->vm_start, start_addr);
>   unsigned long end;
> @@ -1628,7 +1628,7 @@ static void unmap_single_vma(struct mmu_gather *tlb,
>   uprobe_munmap(vma, start, end);
>  
>   if (unlikely(vma->vm_flags & VM_PFNMAP))
> - untrack_pfn(vma, 0, 0);
> + untrack_pfn(vma, 0, 0, 

Re: [Intel-gfx] [PATCH v2 3/6] mm: replace vma->vm_flags direct modifications with modifier calls

2023-02-02 Thread Mike Rapoport
On Wed, Jan 25, 2023 at 12:38:48AM -0800, Suren Baghdasaryan wrote:
> Replace direct modifications to vma->vm_flags with calls to modifier
> functions to be able to track flag changes and to keep vma locking
> correctness.
> 
> Signed-off-by: Suren Baghdasaryan 

Acked-by: Mike Rapoport (IBM) 

> ---
>  arch/arm/kernel/process.c  |  2 +-
>  arch/ia64/mm/init.c|  8 
>  arch/loongarch/include/asm/tlb.h   |  2 +-
>  arch/powerpc/kvm/book3s_xive_native.c  |  2 +-
>  arch/powerpc/mm/book3s64/subpage_prot.c|  2 +-
>  arch/powerpc/platforms/book3s/vas-api.c|  2 +-
>  arch/powerpc/platforms/cell/spufs/file.c   | 14 +++---
>  arch/s390/mm/gmap.c|  3 +--
>  arch/x86/entry/vsyscall/vsyscall_64.c  |  2 +-
>  arch/x86/kernel/cpu/sgx/driver.c   |  2 +-
>  arch/x86/kernel/cpu/sgx/virt.c |  2 +-
>  arch/x86/mm/pat/memtype.c  |  6 +++---
>  arch/x86/um/mem_32.c   |  2 +-
>  drivers/acpi/pfr_telemetry.c   |  2 +-
>  drivers/android/binder.c   |  3 +--
>  drivers/char/mspec.c   |  2 +-
>  drivers/crypto/hisilicon/qm.c  |  2 +-
>  drivers/dax/device.c   |  2 +-
>  drivers/dma/idxd/cdev.c|  2 +-
>  drivers/gpu/drm/amd/amdgpu/amdgpu_gem.c|  2 +-
>  drivers/gpu/drm/amd/amdkfd/kfd_chardev.c   |  4 ++--
>  drivers/gpu/drm/amd/amdkfd/kfd_doorbell.c  |  4 ++--
>  drivers/gpu/drm/amd/amdkfd/kfd_events.c|  4 ++--
>  drivers/gpu/drm/amd/amdkfd/kfd_process.c   |  4 ++--
>  drivers/gpu/drm/drm_gem.c  |  2 +-
>  drivers/gpu/drm/drm_gem_dma_helper.c   |  3 +--
>  drivers/gpu/drm/drm_gem_shmem_helper.c |  2 +-
>  drivers/gpu/drm/drm_vm.c   |  8 
>  drivers/gpu/drm/etnaviv/etnaviv_gem.c  |  2 +-
>  drivers/gpu/drm/exynos/exynos_drm_gem.c|  4 ++--
>  drivers/gpu/drm/gma500/framebuffer.c   |  2 +-
>  drivers/gpu/drm/i810/i810_dma.c|  2 +-
>  drivers/gpu/drm/i915/gem/i915_gem_mman.c   |  4 ++--
>  drivers/gpu/drm/mediatek/mtk_drm_gem.c |  2 +-
>  drivers/gpu/drm/msm/msm_gem.c  |  2 +-
>  drivers/gpu/drm/omapdrm/omap_gem.c |  3 +--
>  drivers/gpu/drm/rockchip/rockchip_drm_gem.c|  3 +--
>  drivers/gpu/drm/tegra/gem.c|  5 ++---
>  drivers/gpu/drm/ttm/ttm_bo_vm.c|  3 +--
>  drivers/gpu/drm/virtio/virtgpu_vram.c  |  2 +-
>  drivers/gpu/drm/vmwgfx/vmwgfx_ttm_glue.c   |  2 +-
>  drivers/gpu/drm/xen/xen_drm_front_gem.c|  3 +--
>  drivers/hsi/clients/cmt_speech.c   |  2 +-
>  drivers/hwtracing/intel_th/msu.c   |  2 +-
>  drivers/hwtracing/stm/core.c   |  2 +-
>  drivers/infiniband/hw/hfi1/file_ops.c  |  4 ++--
>  drivers/infiniband/hw/mlx5/main.c  |  4 ++--
>  drivers/infiniband/hw/qib/qib_file_ops.c   | 13 ++---
>  drivers/infiniband/hw/usnic/usnic_ib_verbs.c   |  2 +-
>  drivers/infiniband/hw/vmw_pvrdma/pvrdma_verbs.c|  2 +-
>  .../media/common/videobuf2/videobuf2-dma-contig.c  |  2 +-
>  drivers/media/common/videobuf2/videobuf2-vmalloc.c |  2 +-
>  drivers/media/v4l2-core/videobuf-dma-contig.c  |  2 +-
>  drivers/media/v4l2-core/videobuf-dma-sg.c  |  4 ++--
>  drivers/media/v4l2-core/videobuf-vmalloc.c |  2 +-
>  drivers/misc/cxl/context.c |  2 +-
>  drivers/misc/habanalabs/common/memory.c|  2 +-
>  drivers/misc/habanalabs/gaudi/gaudi.c  |  4 ++--
>  drivers/misc/habanalabs/gaudi2/gaudi2.c|  8 
>  drivers/misc/habanalabs/goya/goya.c|  4 ++--
>  drivers/misc/ocxl/context.c|  4 ++--
>  drivers/misc/ocxl/sysfs.c  |  2 +-
>  drivers/misc/open-dice.c   |  4 ++--
>  drivers/misc/sgi-gru/grufile.c |  4 ++--
>  drivers/misc/uacce/uacce.c |  2 +-
>  drivers/sbus/char/oradax.c |  2 +-
>  drivers/scsi/cxlflash/ocxl_hw.c|  2 +-
>  drivers/scsi/sg.c  |  2 +-
>  drivers/staging/media/atomisp/pci/hmm/hmm_bo.c |  2 +-
>  drivers/staging/media/deprecated/meye/meye.c   |  4 ++--
>  .../media/deprecated/stkwebcam

Re: [Intel-gfx] [PATCH 2/6] treewide: remove using list iterator after loop body as a ptr

2022-03-01 Thread Mike Rapoport



On February 28, 2022 10:42:53 PM GMT+02:00, James Bottomley 
 wrote:
>On Mon, 2022-02-28 at 21:07 +0100, Christian König wrote:
>> Am 28.02.22 um 20:56 schrieb Linus Torvalds:
>> > On Mon, Feb 28, 2022 at 4:19 AM Christian König
>> >  wrote:
>> > > I don't think that using the extra variable makes the code in any
>> > > way
>> > > more reliable or easier to read.
>> > So I think the next step is to do the attached patch (which
>> > requires
>> > that "-std=gnu11" that was discussed in the original thread).
>> > 
>> > That will guarantee that the 'pos' parameter of
>> > list_for_each_entry()
>> > is only updated INSIDE the for_each_list_entry() loop, and can
>> > never
>> > point to the (wrongly typed) head entry.
>> > 
>> > And I would actually hope that it should actually cause compiler
>> > warnings about possibly uninitialized variables if people then use
>> > the
>> > 'pos' pointer outside the loop. Except
>> > 
>> >   (a) that code in sgx/encl.c currently initializes 'tmp' to NULL
>> > for
>> > inexplicable reasons - possibly because it already expected this
>> > behavior
>> > 
>> >   (b) when I remove that NULL initializer, I still don't get a
>> > warning,
>> > because we've disabled -Wno-maybe-uninitialized since it results in
>> > so
>> > many false positives.
>> > 
>> > Oh well.
>> > 
>> > Anyway, give this patch a look, and at least if it's expanded to do
>> > "(pos) = NULL" in the entry statement for the for-loop, it will
>> > avoid the HEAD type confusion that Jakob is working on. And I think
>> > in a cleaner way than the horrid games he plays.
>> > 
>> > (But it won't avoid possible CPU speculation of such type
>> > confusion. That, in my opinion, is a completely different issue)
>> 
>> Yes, completely agree.
>> 
>> > I do wish we could actually poison the 'pos' value after the loop
>> > somehow - but clearly the "might be uninitialized" I was hoping for
>> > isn't the way to do it.
>> > 
>> > Anybody have any ideas?
>> 
>> I think we should look at the use cases why code is touching (pos)
>> after the loop.
>> 
>> Just from skimming over the patches to change this and experience
>> with the drivers/subsystems I help to maintain I think the primary
>> pattern looks something like this:
>> 
>> list_for_each_entry(entry, head, member) {
>>  if (some_condition_checking(entry))
>>  break;
>> }
>> do_something_with(entry);
>
>
>Actually, we usually have a check to see if the loop found anything,
>but in that case it should something like
>
>if (list_entry_is_head(entry, head, member)) {
>return with error;
>}
>do_somethin_with(entry);
>
>Suffice?  The list_entry_is_head() macro is designed to cope with the
>bogus entry on head problem.

Won't suffice because the end goal of this work is to limit scope of entry only 
to loop. Hence the need for additional variable.

Besides, there are no guarantees that people won't do_something_with(entry) 
without the check or won't compare entry to NULL to check if the loop finished 
with break or not.

>James


-- 
Sincerely yours,
Mike


Re: [Intel-gfx] [lib/stackdepot] 1cd8ce52c5: BUG:unable_to_handle_page_fault_for_address

2021-10-15 Thread Mike Rapoport
On Fri, Oct 15, 2021 at 10:27:17AM +0200, Vlastimil Babka wrote:
> On 10/14/21 12:16, Mike Rapoport wrote:
> > On Thu, Oct 14, 2021 at 11:33:03AM +0200, Vlastimil Babka wrote:
> >> On 10/14/21 10:54, kernel test robot wrote:
> >> 
> >> In my local testing of the patch, when stackdepot was initialized through
> >> page owner init, it was using kvmalloc() so slab_is_available() was true.
> >> Looks like the exact order of slab vs page_owner alloc is 
> >> non-deterministic,
> >> could be arch-dependent or just random ordering of init calls. A wrong 
> >> order
> >> will exploit the apparent fact that slab_is_available() is not a good
> >> indicator of using memblock vs page allocator, and we would need a better 
> >> one.
> >> Thoughts?
> > 
> > The order of slab vs page_owner is deterministic, but it is different for
> > FLATMEM and SPARSEMEM. And page_ext_init_flatmem_late() that initializes
> > page_ext for FLATMEM is called exactly between buddy and slab setup:
> 
> Oh, so it was due to FLATMEM, thanks for figuring that out!
> 
> > static void __init mm_init(void)
> > {
> > ...
> > 
> > mem_init();
> > mem_init_print_info();
> > /* page_owner must be initialized after buddy is ready */
> > page_ext_init_flatmem_late();
> > kmem_cache_init();
> > 
> > ...
> > }
> > 
> > I've stared for a while at page_ext init and it seems that the
> > page_ext_init_flatmem_late() can be simply dropped because there is anyway
> > a call to invoke_init_callbacks() in page_ext_init() that is called much
> > later in the boot process.
> 
> Yeah, but page_ext_init() only does something for SPARSEMEM, and is empty on
> FLATMEM. Otherwise it would be duplicating all the work. So I'll just move
> page_ext_init_flatmem_late() below kmem_cache_init() in mm_init().

I hope at some point we'll have cleaner mm_init(), but for now simply
moving page_ext_init_flatmem_late() should work.

> Thanks again!

Welcome :)
 

-- 
Sincerely yours,
Mike.


Re: [Intel-gfx] [lib/stackdepot] 1cd8ce52c5: BUG:unable_to_handle_page_fault_for_address

2021-10-14 Thread Mike Rapoport
On Thu, Oct 14, 2021 at 11:33:03AM +0200, Vlastimil Babka wrote:
> On 10/14/21 10:54, kernel test robot wrote:
> > 
> > 
> > Greeting,
> > 
> > FYI, we noticed the following commit (built with gcc-9):
> > 
> > commit: 1cd8ce52c520c26c513899fb5aee42b8e5f60d0d ("[PATCH v2] 
> > lib/stackdepot: allow optional init and stack_table allocation by 
> > kvmalloc()")
> > url: 
> > https://github.com/0day-ci/linux/commits/Vlastimil-Babka/lib-stackdepot-allow-optional-init-and-stack_table-allocation-by-kvmalloc/20211012-170816
> > base: git://anongit.freedesktop.org/drm-intel for-linux-next
> > 
> > in testcase: rcutorture
> > version: 
> > with following parameters:
> > 
> > runtime: 300s
> > test: cpuhotplug
> > torture_type: srcud
> > 
> > test-description: rcutorture is rcutorture kernel module load/unload test.
> > test-url: https://www.kernel.org/doc/Documentation/RCU/torture.txt
> > 
> > 
> > on test machine: qemu-system-i386 -enable-kvm -cpu SandyBridge -smp 2 -m 4G
> > 
> > caused below changes (please refer to attached dmesg/kmsg for entire 
> > log/backtrace):
> > 
> > 
> > +-+++
> > | | a94a6d76c9 | 1cd8ce52c5 |
> > +-+++
> > | boot_successes  | 30 | 0  |
> > | boot_failures   | 0  | 7  |
> > | BUG:kernel_NULL_pointer_dereference,address | 0  | 2  |
> > | Oops:#[##]  | 0  | 7  |
> > | EIP:stack_depot_save| 0  | 7  |
> > | Kernel_panic-not_syncing:Fatal_exception| 0  | 7  |
> > | BUG:unable_to_handle_page_fault_for_address | 0  | 5  |
> > +-+++
> > 
> > 
> > If you fix the issue, kindly add following tag
> > Reported-by: kernel test robot 
> > 
> > 
> > 
> > [  319.147926][  T259] BUG: unable to handle page fault for address: 
> > 0ec74110
> > [  319.149309][  T259] #PF: supervisor read access in kernel mode
> > [  319.150362][  T259] #PF: error_code(0x) - not-present page
> > [  319.151372][  T259] *pde = 
> > [  319.151964][  T259] Oops:  [#1] SMP
> > [  319.152617][  T259] CPU: 0 PID: 259 Comm: systemd-rc-loca Not tainted 
> > 5.15.0-rc1-00270-g1cd8ce52c520 #1
> > [  319.154514][  T259] Hardware name: QEMU Standard PC (i440FX + PIIX, 
> > 1996), BIOS 1.12.0-1 04/01/2014
> > [  319.156200][  T259] EIP: stack_depot_save+0x12a/0x4d0
> 
> 
> Cc Mike Rapoport, looks like:
> - memblock_alloc() should have failed (I think, because page allocator
>   already took over?), but didn't. So apparently we got some area that wasn't
>   fully mapped.
> - using slab_is_available() is not accurate enough to detect when to use
> memblock or page allocator (kvmalloc in case of my patch). I have used it
> because memblock_alloc_internal() checks the same condition to issue a 
> warning.
> 
> Relevant part of dmesg.xz that was attached:
> [1.589075][T0] Dentry cache hash table entries: 524288 (order: 9, 
> 2097152 bytes, linear)
> [1.592396][T0] Inode-cache hash table entries: 262144 (order: 8, 
> 1048576 bytes, linear)
> [2.916844][T0] allocated 31496920 bytes of page_ext
> 
> - this means we were allocating from page allocator by 
> alloc_pages_exact_nid() already
> 
> [2.918197][T0] mem auto-init: stack:off, heap alloc:off, heap free:on
> [2.919683][T0] mem auto-init: clearing system memory may take some 
> time...
> [2.921239][T0] Initializing HighMem for node 0 (000b67fe:000bffe0)
> [   23.023619][T0] Initializing Movable for node 0 (:)
> [  245.194520][T0] Checking if this processor honours the WP bit even in 
> supervisor mode...Ok.
> [  245.196847][T0] Memory: 2914460K/3145208K available (20645K kernel 
> code, 5953K rwdata, 12624K rodata, 760K init, 8112K bss, 230748K reserved, 0K 
> cma-reserved, 155528K highmem)
> [  245.200521][T0] Stack Depot allocating hash table with memblock_alloc
> 
> - initializing stack depot as part of initializing page_owner, uses 
> memblock_alloc()
>   because slab_is_available() is still false
> 
> [  245.212005][T0] Node 0, zone   Normal: page owner found early 
> allocated 0 pages
> [  245.213867][T0] Node 0,

Re: [Intel-gfx] [patch V2 28/29] stacktrace: Provide common infrastructure

2019-04-20 Thread Mike Rapoport
On Thu, Apr 18, 2019 at 10:41:47AM +0200, Thomas Gleixner wrote:
> All architectures which support stacktrace carry duplicated code and
> do the stack storage and filtering at the architecture side.
> 
> Provide a consolidated interface with a callback function for consuming the
> stack entries provided by the architecture specific stack walker. This
> removes lots of duplicated code and allows to implement better filtering
> than 'skip number of entries' in the future without touching any
> architecture specific code.
> 
> Signed-off-by: Thomas Gleixner 
> Cc: linux-a...@vger.kernel.org
> ---
>  include/linux/stacktrace.h |   38 +
>  kernel/stacktrace.c|  173 
> +
>  lib/Kconfig|4 +
>  3 files changed, 215 insertions(+)
> 
> --- a/include/linux/stacktrace.h
> +++ b/include/linux/stacktrace.h
> @@ -23,6 +23,43 @@ unsigned int stack_trace_save_regs(struc
>  unsigned int stack_trace_save_user(unsigned long *store, unsigned int size);
> 
>  /* Internal interfaces. Do not use in generic code */
> +#ifdef CONFIG_ARCH_STACKWALK
> +
> +/**
> + * stack_trace_consume_fn - Callback for arch_stack_walk()
> + * @cookie:  Caller supplied pointer handed back by arch_stack_walk()
> + * @addr:The stack entry address to consume
> + * @reliable:True when the stack entry is reliable. Required by
> + *   some printk based consumers.
> + *
> + * Returns:  True, if the entry was consumed or skipped
> + *   False, if there is no space left to store
> + */
> +typedef bool (*stack_trace_consume_fn)(void *cookie, unsigned long addr,
> +bool reliable);
> +/**
> + * arch_stack_walk - Architecture specific function to walk the stack
> +

Nit: no '*' at line beginning makes kernel-doc unhappy

> + * @consume_entry:   Callback which is invoked by the architecture code for
> + *   each entry.
> + * @cookie:  Caller supplied pointer which is handed back to
> + *   @consume_entry
> + * @task:Pointer to a task struct, can be NULL
> + * @regs:Pointer to registers, can be NULL
> + *
> + * @task @regs:
> + * NULL  NULLStack trace from current
> + * task  NULLStack trace from task (can be current)
> + * NULL  regsStack trace starting on regs->stackpointer

This will render as a single line with 'make *docs'.
Adding line separators makes this actually a table in the generated docs:

 *  === 
 * task regs
 *  === 
 * NULL NULLStack trace from current
 * task NULLStack trace from task (can be current)
 * NULL regsStack trace starting on regs->stackpointer
 *  === 


> + */
> +void arch_stack_walk(stack_trace_consume_fn consume_entry, void *cookie,
> +  struct task_struct *task, struct pt_regs *regs);
> +int arch_stack_walk_reliable(stack_trace_consume_fn consume_entry, void 
> *cookie,
> +  struct task_struct *task);
> +void arch_stack_walk_user(stack_trace_consume_fn consume_entry, void *cookie,
> +   const struct pt_regs *regs);
> +
> +#else /* CONFIG_ARCH_STACKWALK */
>  struct stack_trace {
>   unsigned int nr_entries, max_entries;
>   unsigned long *entries;
> @@ -37,6 +74,7 @@ extern void save_stack_trace_tsk(struct
>  extern int save_stack_trace_tsk_reliable(struct task_struct *tsk,
>struct stack_trace *trace);
>  extern void save_stack_trace_user(struct stack_trace *trace);
> +#endif /* !CONFIG_ARCH_STACKWALK */
>  #endif /* CONFIG_STACKTRACE */
> 
>  #if defined(CONFIG_STACKTRACE) && defined(CONFIG_HAVE_RELIABLE_STACKTRACE)
> --- a/kernel/stacktrace.c
> +++ b/kernel/stacktrace.c
> @@ -5,6 +5,8 @@
>   *
>   *  Copyright (C) 2006 Red Hat, Inc., Ingo Molnar 
>   */
> +#include 
> +#include 
>  #include 
>  #include 
>  #include 
> @@ -64,6 +66,175 @@ int stack_trace_snprint(char *buf, size_
>  }
>  EXPORT_SYMBOL_GPL(stack_trace_snprint);
> 
> +#ifdef CONFIG_ARCH_STACKWALK
> +
> +struct stacktrace_cookie {
> + unsigned long   *store;
> + unsigned intsize;
> + unsigned intskip;
> + unsigned intlen;
> +};
> +
> +static bool stack_trace_consume_entry(void *cookie, unsigned long addr,
> +   bool reliable)
> +{
> + struct stacktrace_cookie *c = cookie;
> +
> + if (c->len >= c->size)
> + return false;
> +
> + if (c->skip > 0) {
> + c->skip--;
> + return true;
> + }
> + c->store[c->len++] = addr;
> + return c->len < c->size;
> +}
> +
> +static bool stack_trace_consume_entry_nosched(void *cookie, unsigned long 
> addr,
> +  

Re: [Intel-gfx] [patch V2 03/29] lib/stackdepot: Provide functions which operate on plain storage arrays

2019-04-20 Thread Mike Rapoport
On Thu, Apr 18, 2019 at 10:41:22AM +0200, Thomas Gleixner wrote:
> The struct stack_trace indirection in the stack depot functions is a truly
> pointless excercise which requires horrible code at the callsites.
> 
> Provide interfaces based on plain storage arrays.
> 
> Signed-off-by: Thomas Gleixner 
> Acked-by: Alexander Potapenko 
> ---
>  include/linux/stackdepot.h |4 ++
>  lib/stackdepot.c   |   66 
> -
>  2 files changed, 51 insertions(+), 19 deletions(-)
> 
> --- a/include/linux/stackdepot.h
> +++ b/include/linux/stackdepot.h
> @@ -26,7 +26,11 @@ typedef u32 depot_stack_handle_t;
>  struct stack_trace;
> 
>  depot_stack_handle_t depot_save_stack(struct stack_trace *trace, gfp_t 
> flags);
> +depot_stack_handle_t stack_depot_save(unsigned long *entries,
> +   unsigned int nr_entries, gfp_t gfp_flags);
> 
>  void depot_fetch_stack(depot_stack_handle_t handle, struct stack_trace 
> *trace);
> +unsigned int stack_depot_fetch(depot_stack_handle_t handle,
> +unsigned long **entries);
> 
>  #endif
> --- a/lib/stackdepot.c
> +++ b/lib/stackdepot.c
> @@ -194,40 +194,56 @@ static inline struct stack_record *find_
>   return NULL;
>  }
> 
> -void depot_fetch_stack(depot_stack_handle_t handle, struct stack_trace 
> *trace)
> +/**
> + * stack_depot_fetch - Fetch stack entries from a depot
> + *

Nit: kernel-doc will complain about missing description of @handle.

> + * @entries: Pointer to store the entries address
> + */
> +unsigned int stack_depot_fetch(depot_stack_handle_t handle,
> +unsigned long **entries)
>  {
>   union handle_parts parts = { .handle = handle };
>   void *slab = stack_slabs[parts.slabindex];
>   size_t offset = parts.offset << STACK_ALLOC_ALIGN;
>   struct stack_record *stack = slab + offset;
> 
> - trace->nr_entries = trace->max_entries = stack->size;
> - trace->entries = stack->entries;
> - trace->skip = 0;
> + *entries = stack->entries;
> + return stack->size;
> +}
> +EXPORT_SYMBOL_GPL(stack_depot_fetch);
> +
> +void depot_fetch_stack(depot_stack_handle_t handle, struct stack_trace 
> *trace)
> +{
> + unsigned int nent = stack_depot_fetch(handle, >entries);
> +
> + trace->max_entries = trace->nr_entries = nent;
>  }
>  EXPORT_SYMBOL_GPL(depot_fetch_stack);
> 
>  /**
> - * depot_save_stack - save stack in a stack depot.
> - * @trace - the stacktrace to save.
> - * @alloc_flags - flags for allocating additional memory if required.
> + * stack_depot_save - Save a stack trace from an array
>   *
> - * Returns the handle of the stack struct stored in depot.
> + * @entries: Pointer to storage array
> + * @nr_entries:  Size of the storage array
> + * @alloc_flags: Allocation gfp flags
> + *
> + * Returns the handle of the stack struct stored in depot

Can you please s/Returns/Return:/ so that kernel-doc will recognize this as
return section.

>   */
> -depot_stack_handle_t depot_save_stack(struct stack_trace *trace,
> - gfp_t alloc_flags)
> +depot_stack_handle_t stack_depot_save(unsigned long *entries,
> +   unsigned int nr_entries,
> +   gfp_t alloc_flags)
>  {
> - u32 hash;
> - depot_stack_handle_t retval = 0;
>   struct stack_record *found = NULL, **bucket;
> - unsigned long flags;
> + depot_stack_handle_t retval = 0;
>   struct page *page = NULL;
>   void *prealloc = NULL;
> + unsigned long flags;
> + u32 hash;
> 
> - if (unlikely(trace->nr_entries == 0))
> + if (unlikely(nr_entries == 0))
>   goto fast_exit;
> 
> - hash = hash_stack(trace->entries, trace->nr_entries);
> + hash = hash_stack(entries, nr_entries);
>   bucket = _table[hash & STACK_HASH_MASK];
> 
>   /*
> @@ -235,8 +251,8 @@ depot_stack_handle_t depot_save_stack(st
>* The smp_load_acquire() here pairs with smp_store_release() to
>* |bucket| below.
>*/
> - found = find_stack(smp_load_acquire(bucket), trace->entries,
> -trace->nr_entries, hash);
> + found = find_stack(smp_load_acquire(bucket), entries,
> +nr_entries, hash);
>   if (found)
>   goto exit;
> 
> @@ -264,10 +280,10 @@ depot_stack_handle_t depot_save_stack(st
> 
>   spin_lock_irqsave(_lock, flags);
> 
> - found = find_stack(*bucket, trace->entries, trace->nr_entries, hash);
> + found = find_stack(*bucket, entries, nr_entries, hash);
>   if (!found) {
>   struct stack_record *new =
> - depot_alloc_stack(trace->entries, trace->nr_entries,
> + depot_alloc_stack(entries, nr_entries,
> hash, , alloc_flags);
>   if (new) {
>   new->next = *bucket;
> @@