Jan Kiszka <jan.kis...@siemens.com> writes:

> On 05.03.21 12:38, Jan Kiszka via Xenomai wrote:
>> From: Jan Kiszka <jan.kis...@siemens.com>
>> 
>> This is still needed to avoid that a real-time parent seems minor faults
>> after forking for shared pages until they are finally unshared.
>> 
>> This missing support became visible in Xenomai when running the complete
>> smokey test suite on certain architectures/config combinations.
>> 
>> Fixes: 0f0b6099c45f ("ipipe: mm: Drop un-COW from copy_pte_range")
>> Signed-off-by: Jan Kiszka <jan.kis...@siemens.com>
>> ---
>> 
>> For discussion. If there is a better pattern in reach, I'm happy to go 
>> that path as well.
>> 
>> Otherwise, this should go into our 4.19 branches (Greg, your arm64 isn't 
>> in sync with noarch, you will need manual merging). Possibly, it already 
>> applies to 5.4 as well, didn't check yet.
>> 
>>  mm/memory.c | 76 ++++++++++++++++++++++++++++++++++++++++++++++++-----
>>  1 file changed, 69 insertions(+), 7 deletions(-)
>> 
>> diff --git a/mm/memory.c b/mm/memory.c
>> index 4cc7c72a0b7e..f102f4de2213 100644
>> --- a/mm/memory.c
>> +++ b/mm/memory.c
>> @@ -141,6 +141,9 @@ EXPORT_SYMBOL(zero_pfn);
>>  
>>  unsigned long highest_memmap_pfn __read_mostly;
>>  
>> +static bool cow_user_page(struct page *dst, struct page *src,
>> +                      struct vm_fault *vmf);
>> +
>>  /*
>>   * CONFIG_MMU architectures set up ZERO_PAGE in their paging_init()
>>   */
>> @@ -957,8 +960,9 @@ struct page *vm_normal_page_pmd(struct vm_area_struct 
>> *vma, unsigned long addr,
>>  
>>  static inline unsigned long
>>  copy_one_pte(struct mm_struct *dst_mm, struct mm_struct *src_mm,
>> -            pte_t *dst_pte, pte_t *src_pte, struct vm_area_struct *vma,
>> -            unsigned long addr, int *rss)
>> +         pte_t *dst_pte, pte_t *src_pte, struct vm_area_struct *vma,
>> +         unsigned long addr, int *rss, pmd_t *src_pmd,
>> +         struct page *uncow_page)
>>  {
>>      unsigned long vm_flags = vma->vm_flags;
>>      pte_t pte = *src_pte;
>> @@ -1036,6 +1040,33 @@ copy_one_pte(struct mm_struct *dst_mm, struct 
>> mm_struct *src_mm,
>>       * in the parent and the child
>>       */
>>      if (is_cow_mapping(vm_flags) && pte_write(pte)) {
>> +#ifdef CONFIG_IPIPE
>> +            if (uncow_page) {
>> +                    struct page *old_page = vm_normal_page(vma, addr, pte);
>> +                    struct vm_fault vmf;
>> +
>> +                    vmf.vma = vma;
>> +                    vmf.address = addr;
>> +                    vmf.orig_pte = pte;
>> +                    vmf.pmd = src_pmd;
>> +
>> +                    if (cow_user_page(uncow_page, old_page, &vmf)) {
>> +                            pte = mk_pte(uncow_page, vma->vm_page_prot);
>> +
>> +                            if (vm_flags & VM_SHARED)
>> +                                    pte = pte_mkclean(pte);
>> +                            pte = pte_mkold(pte);
>> +
>> +                            page_add_new_anon_rmap(uncow_page, vma, addr,

>> +                                                   false);
>> +                            rss[!!PageAnon(uncow_page)]++;
>> +                            goto out_set_pte;
>> +                    } else {
>> +                            /* unexpected: source page no longer present */
>> +                            WARN_ON_ONCE(1);
>> +                    }
>> +            }
>> +#endif /* CONFIG_IPIPE */
>>              ptep_set_wrprotect(src_mm, addr, src_pte);
>>              pte = pte_wrprotect(pte);
>>      }
>> @@ -1083,13 +1114,27 @@ static int copy_pte_range(struct mm_struct *dst_mm, 
>> struct mm_struct *src_mm,
>>      int progress = 0;
>>      int rss[NR_MM_COUNTERS];
>>      swp_entry_t entry = (swp_entry_t){0};
>> -
>> +    struct page *uncow_page = NULL;
>> +#ifdef CONFIG_IPIPE
>> +    int do_cow_break = 0;
>> +again:
>> +    if (do_cow_break) {
>> +            uncow_page = alloc_page_vma(GFP_HIGHUSER, vma, addr);
>> +            if (uncow_page == NULL)
>> +                    return -ENOMEM;
>> +            do_cow_break = 0;
>> +    }
>> +#else
>>  again:
>> +#endif
>>      init_rss_vec(rss);
>>  
>>      dst_pte = pte_alloc_map_lock(dst_mm, dst_pmd, addr, &dst_ptl);
>> -    if (!dst_pte)
>> +    if (!dst_pte) {
>> +            if (uncow_page)
>> +                    put_page(uncow_page);
>>              return -ENOMEM;
>> +    }
>>      src_pte = pte_offset_map(src_pmd, addr);
>>      src_ptl = pte_lockptr(src_mm, src_pmd);
>>      spin_lock_nested(src_ptl, SINGLE_DEPTH_NESTING);
>> @@ -1112,8 +1157,25 @@ static int copy_pte_range(struct mm_struct *dst_mm, 
>> struct mm_struct *src_mm,
>>                      progress++;
>>                      continue;
>>              }
>> +#ifdef CONFIG_IPIPE
>> +            if (likely(uncow_page == NULL) && 
>> likely(pte_present(*src_pte))) {
>> +                    if (is_cow_mapping(vma->vm_flags) &&
>> +                        test_bit(MMF_VM_PINNED, &src_mm->flags) &&
>> +                        ((vma->vm_flags|src_mm->def_flags) & VM_LOCKED)) {
>> +                            arch_leave_lazy_mmu_mode();
>> +                            spin_unlock(src_ptl);
>> +                            pte_unmap(src_pte);
>> +                            add_mm_rss_vec(dst_mm, rss);
>> +                            pte_unmap_unlock(dst_pte, dst_ptl);
>> +                            cond_resched();
>> +                            do_cow_break = 1;
>> +                            goto again;
>> +                    }
>> +            }
>> +#endif
>>              entry.val = copy_one_pte(dst_mm, src_mm, dst_pte, src_pte,
>> -                                                    vma, addr, rss);
>> +                                     vma, addr, rss, src_pmd, uncow_page);
>> +            uncow_page = NULL;
>>              if (entry.val)
>>                      break;
>>              progress += 8;
>> @@ -2348,8 +2410,8 @@ static inline int pte_unmap_same(struct mm_struct *mm, 
>> pmd_t *pmd,
>>      return same;
>>  }
>>  
>> -static inline bool cow_user_page(struct page *dst, struct page *src,
>> -                             struct vm_fault *vmf)
>> +static bool cow_user_page(struct page *dst, struct page *src,
>> +                      struct vm_fault *vmf)
>>  {
>>      bool ret;
>>      void *kaddr;
>> 
>
> With this applied to ipipe-4.19 and 5.4 queues, related issues are gone, see
>
> https://source.denx.de/Xenomai/xenomai-images/-/pipelines/6670
>
> I'm inclined to take this for now so that we can move forward with other
> topics. Any comments?
>

Fine with me. This code can be simplified, working on it. I'll channel
this through Dovetail.

-- 
Philippe.

Reply via email to