Jan Kiszka <jan.kis...@siemens.com> writes:
> On 06.03.21 15:58, Philippe Gerum wrote: >> >> Jan Kiszka <jan.kis...@siemens.com> writes: >> >>> On 05.03.21 12:38, Jan Kiszka via Xenomai wrote: >>>> From: Jan Kiszka <jan.kis...@siemens.com> >>>> >>>> This is still needed to avoid that a real-time parent seems minor faults >>>> after forking for shared pages until they are finally unshared. >>>> >>>> This missing support became visible in Xenomai when running the complete >>>> smokey test suite on certain architectures/config combinations. >>>> >>>> Fixes: 0f0b6099c45f ("ipipe: mm: Drop un-COW from copy_pte_range") >>>> Signed-off-by: Jan Kiszka <jan.kis...@siemens.com> >>>> --- >>>> >>>> For discussion. If there is a better pattern in reach, I'm happy to go >>>> that path as well. >>>> >>>> Otherwise, this should go into our 4.19 branches (Greg, your arm64 isn't >>>> in sync with noarch, you will need manual merging). Possibly, it already >>>> applies to 5.4 as well, didn't check yet. >>>> >>>> mm/memory.c | 76 ++++++++++++++++++++++++++++++++++++++++++++++++----- >>>> 1 file changed, 69 insertions(+), 7 deletions(-) >>>> >>>> diff --git a/mm/memory.c b/mm/memory.c >>>> index 4cc7c72a0b7e..f102f4de2213 100644 >>>> --- a/mm/memory.c >>>> +++ b/mm/memory.c >>>> @@ -141,6 +141,9 @@ EXPORT_SYMBOL(zero_pfn); >>>> >>>> unsigned long highest_memmap_pfn __read_mostly; >>>> >>>> +static bool cow_user_page(struct page *dst, struct page *src, >>>> + struct vm_fault *vmf); >>>> + >>>> /* >>>> * CONFIG_MMU architectures set up ZERO_PAGE in their paging_init() >>>> */ >>>> @@ -957,8 +960,9 @@ struct page *vm_normal_page_pmd(struct vm_area_struct >>>> *vma, unsigned long addr, >>>> >>>> static inline unsigned long >>>> copy_one_pte(struct mm_struct *dst_mm, struct mm_struct *src_mm, >>>> - pte_t *dst_pte, pte_t *src_pte, struct vm_area_struct *vma, >>>> - unsigned long addr, int *rss) >>>> + pte_t *dst_pte, pte_t *src_pte, struct vm_area_struct *vma, >>>> + unsigned long addr, int *rss, pmd_t *src_pmd, >>>> + struct page *uncow_page) >>>> { >>>> unsigned long vm_flags = vma->vm_flags; >>>> pte_t pte = *src_pte; >>>> @@ -1036,6 +1040,33 @@ copy_one_pte(struct mm_struct *dst_mm, struct >>>> mm_struct *src_mm, >>>> * in the parent and the child >>>> */ >>>> if (is_cow_mapping(vm_flags) && pte_write(pte)) { >>>> +#ifdef CONFIG_IPIPE >>>> + if (uncow_page) { >>>> + struct page *old_page = vm_normal_page(vma, addr, pte); >>>> + struct vm_fault vmf; >>>> + >>>> + vmf.vma = vma; >>>> + vmf.address = addr; >>>> + vmf.orig_pte = pte; >>>> + vmf.pmd = src_pmd; >>>> + >>>> + if (cow_user_page(uncow_page, old_page, &vmf)) { >>>> + pte = mk_pte(uncow_page, vma->vm_page_prot); >>>> + >>>> + if (vm_flags & VM_SHARED) >>>> + pte = pte_mkclean(pte); >>>> + pte = pte_mkold(pte); >>>> + >>>> + page_add_new_anon_rmap(uncow_page, vma, addr, >> >>>> + false); >>>> + rss[!!PageAnon(uncow_page)]++; >>>> + goto out_set_pte; >>>> + } else { >>>> + /* unexpected: source page no longer present */ >>>> + WARN_ON_ONCE(1); >>>> + } >>>> + } >>>> +#endif /* CONFIG_IPIPE */ >>>> ptep_set_wrprotect(src_mm, addr, src_pte); >>>> pte = pte_wrprotect(pte); >>>> } >>>> @@ -1083,13 +1114,27 @@ static int copy_pte_range(struct mm_struct >>>> *dst_mm, struct mm_struct *src_mm, >>>> int progress = 0; >>>> int rss[NR_MM_COUNTERS]; >>>> swp_entry_t entry = (swp_entry_t){0}; >>>> - >>>> + struct page *uncow_page = NULL; >>>> +#ifdef CONFIG_IPIPE >>>> + int do_cow_break = 0; >>>> +again: >>>> + if (do_cow_break) { >>>> + uncow_page = alloc_page_vma(GFP_HIGHUSER, vma, addr); >>>> + if (uncow_page == NULL) >>>> + return -ENOMEM; >>>> + do_cow_break = 0; >>>> + } >>>> +#else >>>> again: >>>> +#endif >>>> init_rss_vec(rss); >>>> >>>> dst_pte = pte_alloc_map_lock(dst_mm, dst_pmd, addr, &dst_ptl); >>>> - if (!dst_pte) >>>> + if (!dst_pte) { >>>> + if (uncow_page) >>>> + put_page(uncow_page); >>>> return -ENOMEM; >>>> + } >>>> src_pte = pte_offset_map(src_pmd, addr); >>>> src_ptl = pte_lockptr(src_mm, src_pmd); >>>> spin_lock_nested(src_ptl, SINGLE_DEPTH_NESTING); >>>> @@ -1112,8 +1157,25 @@ static int copy_pte_range(struct mm_struct *dst_mm, >>>> struct mm_struct *src_mm, >>>> progress++; >>>> continue; >>>> } >>>> +#ifdef CONFIG_IPIPE >>>> + if (likely(uncow_page == NULL) && >>>> likely(pte_present(*src_pte))) { >>>> + if (is_cow_mapping(vma->vm_flags) && >>>> + test_bit(MMF_VM_PINNED, &src_mm->flags) && >>>> + ((vma->vm_flags|src_mm->def_flags) & VM_LOCKED)) { >>>> + arch_leave_lazy_mmu_mode(); >>>> + spin_unlock(src_ptl); >>>> + pte_unmap(src_pte); >>>> + add_mm_rss_vec(dst_mm, rss); >>>> + pte_unmap_unlock(dst_pte, dst_ptl); >>>> + cond_resched(); >>>> + do_cow_break = 1; >>>> + goto again; >>>> + } >>>> + } >>>> +#endif >>>> entry.val = copy_one_pte(dst_mm, src_mm, dst_pte, src_pte, >>>> - vma, addr, rss); >>>> + vma, addr, rss, src_pmd, uncow_page); >>>> + uncow_page = NULL; >>>> if (entry.val) >>>> break; >>>> progress += 8; >>>> @@ -2348,8 +2410,8 @@ static inline int pte_unmap_same(struct mm_struct >>>> *mm, pmd_t *pmd, >>>> return same; >>>> } >>>> >>>> -static inline bool cow_user_page(struct page *dst, struct page *src, >>>> - struct vm_fault *vmf) >>>> +static bool cow_user_page(struct page *dst, struct page *src, >>>> + struct vm_fault *vmf) >>>> { >>>> bool ret; >>>> void *kaddr; >>>> >>> >>> With this applied to ipipe-4.19 and 5.4 queues, related issues are gone, see >>> >>> https://source.denx.de/Xenomai/xenomai-images/-/pipelines/6670 >>> >>> I'm inclined to take this for now so that we can move forward with other >>> topics. Any comments? >>> >> >> Fine with me. This code can be simplified, working on it. I'll channel >> this through Dovetail. >> > > This didn't make into dovetail/v5.10 yet, did it? Asking because smokey > triggers tones of primary mode migrations over dovetail. > > Jan No, it did not. -- Philippe.