[RFC PATCH v2 2/2] Implement sharing/unsharing of PMDs for FS/DAX
This is based on (but somewhat different from) what hugetlbfs does to share/unshare page tables. Signed-off-by: Larry Bassel --- include/linux/hugetlb.h | 4 ++ mm/huge_memory.c| 37 + mm/hugetlb.c| 8 ++-- mm/memory.c | 108 +++- 4 files changed, 152 insertions(+), 5 deletions(-) diff --git a/include/linux/hugetlb.h b/include/linux/hugetlb.h index edf476c..debff55 100644 --- a/include/linux/hugetlb.h +++ b/include/linux/hugetlb.h @@ -140,6 +140,10 @@ pte_t *huge_pte_offset(struct mm_struct *mm, int huge_pmd_unshare(struct mm_struct *mm, unsigned long *addr, pte_t *ptep); void adjust_range_if_pmd_sharing_possible(struct vm_area_struct *vma, unsigned long *start, unsigned long *end); +unsigned long page_table_shareable(struct vm_area_struct *svma, + struct vm_area_struct *vma, + unsigned long addr, pgoff_t idx); +bool vma_shareable(struct vm_area_struct *vma, unsigned long addr); struct page *follow_huge_addr(struct mm_struct *mm, unsigned long address, int write); struct page *follow_huge_pd(struct vm_area_struct *vma, diff --git a/mm/huge_memory.c b/mm/huge_memory.c index 9f8bce9..935874c 100644 --- a/mm/huge_memory.c +++ b/mm/huge_memory.c @@ -1751,6 +1751,33 @@ static inline void zap_deposited_table(struct mm_struct *mm, pmd_t *pmd) mm_dec_nr_ptes(mm); } +#ifdef CONFIG_ARCH_HAS_HUGE_PMD_SHARE +static int unshare_huge_pmd(struct mm_struct *mm, unsigned long addr, + pmd_t *pmdp) +{ + pgd_t *pgd = pgd_offset(mm, addr); + p4d_t *p4d = p4d_offset(pgd, addr); + pud_t *pud = pud_offset(p4d, addr); + + WARN_ON(page_count(virt_to_page(pmdp)) == 0); + if (page_count(virt_to_page(pmdp)) == 1) + return 0; + + pud_clear(pud); + put_page(virt_to_page(pmdp)); + mm_dec_nr_pmds(mm); + return 1; +} + +#else +static int unshare_huge_pmd(struct mm_struct *mm, unsigned long addr, + pmd_t *pmdp) +{ + return 0; +} + +#endif + int zap_huge_pmd(struct mmu_gather *tlb, struct vm_area_struct *vma, pmd_t *pmd, unsigned long addr) { @@ -1768,6 +1795,11 @@ int zap_huge_pmd(struct mmu_gather *tlb, struct vm_area_struct *vma, * pgtable_trans_huge_withdraw after finishing pmdp related * operations. */ + if (unshare_huge_pmd(vma->vm_mm, addr, pmd)) { + spin_unlock(ptl); + return 1; + } + orig_pmd = pmdp_huge_get_and_clear_full(tlb->mm, addr, pmd, tlb->fullmm); tlb_remove_pmd_tlb_entry(tlb, pmd, addr); @@ -1915,6 +1947,11 @@ int change_huge_pmd(struct vm_area_struct *vma, pmd_t *pmd, if (!ptl) return 0; + if (unshare_huge_pmd(mm, addr, pmd)) { + spin_unlock(ptl); + return HPAGE_PMD_NR; + } + preserve_write = prot_numa && pmd_write(*pmd); ret = 1; diff --git a/mm/hugetlb.c b/mm/hugetlb.c index 3a54c9d..1c1ed4e 100644 --- a/mm/hugetlb.c +++ b/mm/hugetlb.c @@ -4653,9 +4653,9 @@ long hugetlb_unreserve_pages(struct inode *inode, long start, long end, } #ifdef CONFIG_ARCH_HAS_HUGE_PMD_SHARE -static unsigned long page_table_shareable(struct vm_area_struct *svma, - struct vm_area_struct *vma, - unsigned long addr, pgoff_t idx) +unsigned long page_table_shareable(struct vm_area_struct *svma, + struct vm_area_struct *vma, + unsigned long addr, pgoff_t idx) { unsigned long saddr = ((idx - svma->vm_pgoff) << PAGE_SHIFT) + svma->vm_start; @@ -4678,7 +4678,7 @@ static unsigned long page_table_shareable(struct vm_area_struct *svma, return saddr; } -static bool vma_shareable(struct vm_area_struct *vma, unsigned long addr) +bool vma_shareable(struct vm_area_struct *vma, unsigned long addr) { unsigned long base = addr & PUD_MASK; unsigned long end = base + PUD_SIZE; diff --git a/mm/memory.c b/mm/memory.c index ddf20bd..1ca8f75 100644 --- a/mm/memory.c +++ b/mm/memory.c @@ -3932,6 +3932,109 @@ static vm_fault_t handle_pte_fault(struct vm_fault *vmf) return 0; } +#ifdef CONFIG_ARCH_HAS_HUGE_PMD_SHARE +static pmd_t *huge_pmd_offset(struct mm_struct *mm, + unsigned long addr, unsigned long sz) +{ + pgd_t *pgd; + p4d_t *p4d; + pud_t *pud; + pmd_t *pmd; + + pgd = pgd_offset(mm, addr); + if (!pgd_present(*pgd)) + return NULL; + p4d = p4d_offset(pgd, addr); + if (!p4d_present(*p4d)) + return NULL; + + pud = pud_offset(p4d, addr); + if
[RFC PATCH v2 1/2] Rename CONFIG_ARCH_WANT_HUGE_PMD_SHARE to CONFIG_ARCH_HAS_HUGE_PMD_SHARE
Signed-off-by: Larry Bassel --- arch/arm64/Kconfig | 2 +- arch/arm64/mm/hugetlbpage.c | 2 +- arch/x86/Kconfig| 2 +- mm/hugetlb.c| 6 +++--- 4 files changed, 6 insertions(+), 6 deletions(-) diff --git a/arch/arm64/Kconfig b/arch/arm64/Kconfig index 697ea05..36d6189 100644 --- a/arch/arm64/Kconfig +++ b/arch/arm64/Kconfig @@ -901,7 +901,7 @@ config HW_PERF_EVENTS config SYS_SUPPORTS_HUGETLBFS def_bool y -config ARCH_WANT_HUGE_PMD_SHARE +config ARCH_HAS_HUGE_PMD_SHARE def_bool y if ARM64_4K_PAGES || (ARM64_16K_PAGES && !ARM64_VA_BITS_36) config ARCH_HAS_CACHE_LINE_SIZE diff --git a/arch/arm64/mm/hugetlbpage.c b/arch/arm64/mm/hugetlbpage.c index f475e54..4f3cb3f 100644 --- a/arch/arm64/mm/hugetlbpage.c +++ b/arch/arm64/mm/hugetlbpage.c @@ -241,7 +241,7 @@ pte_t *huge_pte_alloc(struct mm_struct *mm, */ ptep = pte_alloc_map(mm, pmdp, addr); } else if (sz == PMD_SIZE) { - if (IS_ENABLED(CONFIG_ARCH_WANT_HUGE_PMD_SHARE) && + if (IS_ENABLED(CONFIG_ARCH_HAS_HUGE_PMD_SHARE) && pud_none(READ_ONCE(*pudp))) ptep = huge_pmd_share(mm, addr, pudp); else diff --git a/arch/x86/Kconfig b/arch/x86/Kconfig index 2bbbd4d..fdbddb9 100644 --- a/arch/x86/Kconfig +++ b/arch/x86/Kconfig @@ -301,7 +301,7 @@ config ARCH_HIBERNATION_POSSIBLE config ARCH_SUSPEND_POSSIBLE def_bool y -config ARCH_WANT_HUGE_PMD_SHARE +config ARCH_HAS_HUGE_PMD_SHARE def_bool y config ARCH_WANT_GENERAL_HUGETLB diff --git a/mm/hugetlb.c b/mm/hugetlb.c index ac843d3..3a54c9d 100644 --- a/mm/hugetlb.c +++ b/mm/hugetlb.c @@ -4652,7 +4652,7 @@ long hugetlb_unreserve_pages(struct inode *inode, long start, long end, return 0; } -#ifdef CONFIG_ARCH_WANT_HUGE_PMD_SHARE +#ifdef CONFIG_ARCH_HAS_HUGE_PMD_SHARE static unsigned long page_table_shareable(struct vm_area_struct *svma, struct vm_area_struct *vma, unsigned long addr, pgoff_t idx) @@ -4807,7 +4807,7 @@ int huge_pmd_unshare(struct mm_struct *mm, unsigned long *addr, pte_t *ptep) return 1; } #define want_pmd_share() (1) -#else /* !CONFIG_ARCH_WANT_HUGE_PMD_SHARE */ +#else /* !CONFIG_ARCH_HAS_HUGE_PMD_SHARE */ pte_t *huge_pmd_share(struct mm_struct *mm, unsigned long addr, pud_t *pud) { return NULL; @@ -4823,7 +4823,7 @@ void adjust_range_if_pmd_sharing_possible(struct vm_area_struct *vma, { } #define want_pmd_share() (0) -#endif /* CONFIG_ARCH_WANT_HUGE_PMD_SHARE */ +#endif /* CONFIG_ARCH_HAS_HUGE_PMD_SHARE */ #ifdef CONFIG_ARCH_WANT_GENERAL_HUGETLB pte_t *huge_pte_alloc(struct mm_struct *mm, -- 1.8.3.1
[RFC PATCH v2 0/2] Share PMDs for FS/DAX on x86
Changes from v1 to v2: * Rebased on v5.2-rc3 * An incorrect reference to "page table entries" was fixed (pointed out by Kirill Shutemov) * Renamed CONFIG_ARCH_WANT_HUGE_PMD_SHARE to CONFIG_ARCH_HAS_HUGE_PMD_SHARE instead of introducing a new config option (suggested by Dan Williams) * Removed some unnecessary #ifdef stubs (suggested by Matt Wilcox) * A previously overlooked case involving mprotect() is now handled properly (pointed out by Mike Kravetz) --- This patchset implements sharing of page tables pointing to 2MiB pages (PMDs) for FS/DAX on x86. Only shared mmapings of files (i.e. neither private mmapings nor anonymous pages) are eligible for PMD sharing. Due to the characteristics of DAX, this code is simpler and less intrusive than the general case would be. In our use case (high end Oracle database using DAX/XFS/PMEM/2MiB pages) there would be significant memory savings. A future system might have 6 TiB of PMEM on it and there might be 1 processes each mapping all of this 6 TiB. Here the savings would be approximately (6 TiB / 2 MiB) * 8 bytes (page table size) * 1 = 240 GiB (and these page tables themselves would probably be in non-PMEM (ordinary RAM)). There would also be a reduction in page faults because in some cases the page fault has already been satisfied and the page table entry has been filled in (and so the processes after the first would not take a fault). The code for detecting whether PMDs can be shared and the implementation of sharing and unsharing is based on, but somewhat different than that in mm/hugetlb.c, though some of the code from this file could be reused and thus was made non-static. Larry Bassel (2): Rename CONFIG_ARCH_WANT_HUGE_PMD_SHARE to CONFIG_ARCH_HAS_HUGE_PMD_SHARE Implement sharing/unsharing of PMDs for FS/DAX arch/arm64/Kconfig | 2 +- arch/arm64/mm/hugetlbpage.c | 2 +- arch/x86/Kconfig| 2 +- include/linux/hugetlb.h | 4 ++ mm/huge_memory.c| 37 +++ mm/hugetlb.c| 14 +++--- mm/memory.c | 108 +++- 7 files changed, 158 insertions(+), 11 deletions(-) -- 1.8.3.1
Re: [PATCH, RFC 2/2] Implement sharing/unsharing of PMDs for FS/DAX
On 14 May 19 16:01, Kirill A. Shutemov wrote: > On Thu, May 09, 2019 at 09:05:33AM -0700, Larry Bassel wrote: [trim] > > --- a/mm/huge_memory.c > > +++ b/mm/huge_memory.c > > @@ -1747,6 +1747,33 @@ static inline void zap_deposited_table(struct > > mm_struct *mm, pmd_t *pmd) > > mm_dec_nr_ptes(mm); > > } > > > > +#ifdef CONFIG_MAY_SHARE_FSDAX_PMD > > +static int unshare_huge_pmd(struct mm_struct *mm, unsigned long addr, > > + pmd_t *pmdp) > > +{ > > + pgd_t *pgd = pgd_offset(mm, addr); > > + p4d_t *p4d = p4d_offset(pgd, addr); > > + pud_t *pud = pud_offset(p4d, addr); > > + > > + WARN_ON(page_count(virt_to_page(pmdp)) == 0); > > + if (page_count(virt_to_page(pmdp)) == 1) > > + return 0; > > + > > + pud_clear(pud); > > You don't have proper locking in place to do this. This code is based on and very similar to the code in mm/hugetlb.c (huge_pmd_unshare()). I asked Mike Kravetz why the locking in huge_pmd_share() and huge_pmd_unshare() is correct. The issue (as you point out later in your email) is whether in both of those cases it is OK to take the PMD table lock and then modify the PUD table. He responded with the following analysis: - I went back and looked at the locking in the hugetlb code. Here is most of the code for huge_pmd_share(). i_mmap_lock_write(mapping); vma_interval_tree_foreach(svma, &mapping->i_mmap, idx, idx) { if (svma == vma) continue; saddr = page_table_shareable(svma, vma, addr, idx); if (saddr) { spte = huge_pte_offset(svma->vm_mm, saddr, vma_mmu_pagesize(svma)); if (spte) { get_page(virt_to_page(spte)); break; } } } if (!spte) goto out; ptl = huge_pte_lock(hstate_vma(vma), mm, spte); >>> The primary reason the page table lock is taken here is for the purpose of checking and possibly updating the PUD (pointer to PMD page). Note that by the time we get here we already have found a PMD page to share. Also note that the lock taken is the one associated with the PMD page. The synchronization question to ask is: Can anyone else modify the PUD value while I am holding the PMD lock? In general, the answer is Yes. However, we can infer something subtle about the shared PMD case. Suppose someone else wanted to set the PUD value. The only value they could set it to is the PMD page we found in this routine. They also would need to go through this routine to set the value. They also would need to get the lock on the same shared PMD. Actually, they would hit the mapping->i_mmap_rwsem first. But, the bottom line is that nobody else can set it. What about clearing? In the hugetlb case, the only places where PUD gets cleared are final page table tear down and huge_pmd_unshare(). The final page table tear down case is not interesting as the process is exiting. All callers if huge_pmd_unshare must hold the (PMD) page table lock. This is a requirement. Therefore, within a single process this synchronizes two threads: one calling huge_pmd_share and another huge_pmd_unshare. - I assert that the same analysis applies to pmd_share() and unshare_huge_pmd() which are added in this patch. > > > + put_page(virt_to_page(pmdp)); > > + mm_dec_nr_pmds(mm); > > + return 1; > > +} > > + > > +#else > > +static int unshare_huge_pmd(struct mm_struct *mm, unsigned long addr, > > + pmd_t *pmdp) > > +{ > > + return 0; > > +} > > + > > +#endif > > + > > int zap_huge_pmd(struct mmu_gather *tlb, struct vm_area_struct *vma, > > pmd_t *pmd, unsigned long addr) > > { > > @@ -1764,6 +1791,11 @@ int zap_huge_pmd(struct mmu_gather *tlb, struct > > vm_area_struct *vma, > > * pgtable_trans_huge_withdraw after finishing pmdp related > > * operations. > > */ > > + if (unshare_huge_pmd(vma->vm_mm, addr, pmd)) { > > + spin_unlock(ptl); > > + return 1; > > + } > > + > > orig_pmd = pmdp_huge_get_and_clear_full(tlb->mm, addr, pmd, > > tlb->fullmm); > > tlb_remove_pmd_tlb_entry(tlb, pmd, addr); > > diff --git a/mm/hugetlb.c b/mm/hugetlb.c > > index 641cedf..919a290 100644 > > --- a/mm/hugetlb.c > > +++ b/
Re: [PATCH, RFC 0/2] Share PMDs for FS/DAX on x86
On 14 May 19 15:28, Kirill A. Shutemov wrote: > On Thu, May 09, 2019 at 09:05:31AM -0700, Larry Bassel wrote: > > This patchset implements sharing of page table entries pointing > > to 2MiB pages (PMDs) for FS/DAX on x86. > > -EPARSE. > > How do you share entries? Entries do not take any space, page tables that > cointain these entries do. Yes, I'll correct this in v2. > > Have you checked if the patch makes memory consumption any better. I have > doubts in it. Yes I have -- the following is debugging output I have from my testing. The (admittedly simple) test case is two copies of a program that mmaps 1GiB of a DAX/XFS file (with 2MiB page size), touches the first page (physical 20040 in this case) and then sleeps forever. sharing disabled: (process A) [ 420.369975] pgd_index = fe [ 420.369975] pgd = e1ebf83b [ 420.369975] pgd_val = 800405ca8067 [ 420.369976] pud_index = 100 [ 420.369976] pud = bd7a7df0 [ 420.369976] pud_val = 4058f9067 [ 420.369977] pmd_index = 0 [ 420.369977] pmd = 791e93d4 [ 420.369977] pmd_val = 8402004008e7 [ 420.369978] pmd huge [ 420.369978] page_addr = 20040, page_offset = 0 [ 420.369979] vaddr = 7f40, paddr = 20040 (process B) [ 420.370013] pgd_index = fe [ 420.370014] pgd = a2bac60d [ 420.370014] pgd_val = 800405a8f067 [ 420.370015] pud_index = 100 [ 420.370015] pud = dcc3ff1a [ 420.370015] pud_val = 3fc713067 [ 420.370016] pmd_index = 0 [ 420.370016] pmd = 6b4679db [ 420.370016] pmd_val = 8402004008e7 [ 420.370017] pmd huge [ 420.370017] page_addr = 20040, page_offset = 0 [ 420.370018] vaddr = 7f40, paddr = 20040 sharing enabled: (process A) [ 696.992342] pgd_index = fe [ 696.992342] pgd = 9612024b [ 696.992343] pgd_val = 800404725067 [ 696.992343] pud_index = 100 [ 696.992343] pud = c98ab17c [ 696.992344] pud_val = 4038e3067 [ 696.992344] pmd_index = 0 [ 696.992344] pmd = 2437681b [ 696.992344] pmd_val = 8402004008e7 [ 696.992345] pmd huge [ 696.992345] page_addr = 20040, page_offset = 0 [ 696.992345] vaddr = 7f40, paddr = 20040 (process B) [ 696.992351] pgd_index = fe [ 696.992351] pgd = 12326848 [ 696.992352] pgd_val = 80040a953067 [ 696.992352] pud_index = 100 [ 696.992352] pud = f989bcf6 [ 696.992352] pud_val = 4038e3067 [ 696.992353] pmd_index = 0 [ 696.992353] pmd = 2437681b [ 696.992353] pmd_val = 8402004008e7 [ 696.992353] pmd huge [ 696.992354] page_addr = 20040, page_offset = 0 [ 696.992354] vaddr = 7f40, paddr = 20040 Note that in the sharing enabled case, the pud_val and pmd are the same for the two processes. In the disabled case we have two separate pmds (and so more memory was allocated). Also, (though not visible from the output above) the second process did not take a page fault as the virtual->physical mapping was already established thanks to the sharing. Larry
Re: question about page tables in DAX/FS/PMEM case
[adding linux-mm] On 21 Feb 19 15:41, Jerome Glisse wrote: > On Wed, Feb 20, 2019 at 03:06:22PM -0800, Larry Bassel wrote: > > I'm working on sharing page tables in the DAX/XFS/PMEM/PMD case. > > > > If multiple processes would use the identical page of PMDs corresponding > > to a 1 GiB address range of DAX/XFS/PMEM/PMDs, presumably one can instead > > of populating a new PUD, just atomically increment a refcount and point > > to the same PUD in the next level above. Thanks for your feedback. Some comments/clarification below. > > I think page table sharing was discuss several time in the past and > the complexity involve versus the benefit were not clear. For 1GB > of virtual address you need: > #pte pages = 1G/(512 * 2^12) = 512 pte pages > #pmd pages = 1G/(512 * 512 * 2^12) = 1 pmd pages > > So if we were to share the pmd directory page we would be saving a > total of 513 pages for every page table or ~2MB. This goes up with > the number of process that map the same range ie if 10 process map > the same range and share the same pmd than you are saving 9 * 2MB > 18MB of memory. This seems relatively modest saving. The file blocksize = page size in what I am working on would be 2 MiB (sharing puds/pages of pmds), I'm not trying to support sharing pmds/pages of ptes. And yes, the savings in this case is actually even less than in your example (but see my example below). > > AFAIK there is no hardware benefit from sharing the page table > directory within different page table. So the only benefit is the > amount of memory we save. Yes, in our use case (high end Oracle database using DAX/XFS/PMEM/PMD) the main benefit would be memory savings: A future system might have 6 TiB of PMEM on it and there might be 1 processes each mapping all of this 6 TiB. Here the savings would be approximately (6 TiB / 2 MiB) * 8 bytes (page table size) * 1 = 240 GiB (and these page tables themselves would be in non-PMEM (ordinary RAM)). > > See below for comments on complexity to achieve this. > [trim] > > > > If I have a mmap of a DAX/FS/PMEM file and I take > > a page (either pte or PMD sized) fault on access to this file, > > the page table(s) are set up in dax_iomap_fault() in fs/dax.c (correct?). > > Not exactly the page table are allocated long before dax_iomap_fault() > get calls. They are allocated by the handle_mm_fault() and its childs > functions. Yes, I misstated this, the fault is handled there which may well alter the PUD (in my case), but the original page tables are set up earlier. > > > > > If the process later munmaps this file or exits but there are still > > other users of the shared page of PMDs, I would need to > > detect that this has happened and act accordingly (#3 above) > > > > Where will these page table entries be torn down? > > In the same code where any other page table is torn down? > > If this is the case, what would the cleanest way of telling that these > > page tables (PMDs, etc.) correspond to a DAX/FS/PMEM mapping > > (look at the physical address pointed to?) so that > > I could do the right thing here. > > > > I understand that I may have missed something obvious here. > > > > They are many issues here are the one i can think of: > - finding a pmd/pud to share, you need to walk the reverse mapping > of the range you are mapping and to find if any process or other > virtual address already as a pud or pmd you can reuse. This can > take more time than allocating page directory pages. > - if one process munmap some portion of a share pud you need to > break the sharing this means that munmap (or mremap) would need > to handle this page table directory sharing case first > - many code path in the kernel might need update to understand this > share page table thing (mprotect, userfaultfd, ...) > - the locking rules is bound to be painfull > - this might not work on all architecture as some architecture do > associate information with page table directory and that can not > always be share (it would need to be enabled arch by arch) Yes, some architectures don't support DAX at all (note again that I'm not trying to share non-DAX page table here). > > The nice thing: > - unmapping for migration, when you unmap a share pud/pmd you can > decrement mapcount by share pud/pmd count this could speedup > migration A followup question: the kernel does sharing of page tables for hugetlbfs (also 2 MiB pages), why aren't the above issues relevant there as well (or are they but we support it anyhow)? > > This is what i could think of on the top of my head but there m
question about page tables in DAX/FS/PMEM case
I'm working on sharing page tables in the DAX/XFS/PMEM/PMD case. If multiple processes would use the identical page of PMDs corresponding to a 1 GiB address range of DAX/XFS/PMEM/PMDs, presumably one can instead of populating a new PUD, just atomically increment a refcount and point to the same PUD in the next level above. i.e. OLD: process 1: VA -> levels of page tables -> PUD1 -> page of PMDs1 process 2: VA -> levels of page tables -> PUD2 -> page of PMDs2 NEW: process 1: VA -> levels of page tables -> PUD1 -> page of PMDs1 process 2: VA -> levels of page tables -> PUD1 -> page of PMDs1 (refcount 2) There are several cases to consider: 1. New mapping OLD: make a new PUD, populate the associated page of PMDs (at least partially) with PMD entries. NEW: same 2. Mapping by a process same (same VA->PA and size and protections, etc.) as one that already exists OLD: make a new PUD, populate the associated page of PMDs (at least partially) with PMD entries. NEW: use same PUD, increase refcount (potentially even if this mapping is private in which case there may eventually be a copy-on-write -- see #5 below) 3. Unmapping of a mapping which is the same as that from another process OLD: destroy the process's copy of mapping, free PUD, etc. NEW: decrease refcount, only if now 0 do we destroy mapping, etc. 4. Unmapping of a mapping which is unique (refcount 1) OLD: destroy the process's copy of mapping, free PUD, etc. NEW: same 5. Mapping was private (but same as another process), process writes OLD: break the PMD into PTEs, destroy PMD mapping, free PUD, etc.. NEW: decrease refcount, only if now 0 do we destroy mapping, etc. we still break the PMD into PTEs. If I have a mmap of a DAX/FS/PMEM file and I take a page (either pte or PMD sized) fault on access to this file, the page table(s) are set up in dax_iomap_fault() in fs/dax.c (correct?). If the process later munmaps this file or exits but there are still other users of the shared page of PMDs, I would need to detect that this has happened and act accordingly (#3 above) Where will these page table entries be torn down? In the same code where any other page table is torn down? If this is the case, what would the cleanest way of telling that these page tables (PMDs, etc.) correspond to a DAX/FS/PMEM mapping (look at the physical address pointed to?) so that I could do the right thing here. I understand that I may have missed something obvious here. Thanks. Larry
question about mmap MAP_PRIVATE on PMEM/DAX/fs files
Is mmaping a PMEM/DAX/fs file MAP_PRIVATE supported? Is it something that people are likely to want to do? If it is supported, suppose I open a file in PMEM/DAX/fs, mmap it MAP_PRIVATE, read from the memory mapped file (with memory accesses, not the read syscall) and take a page fault which the kernel satisfies. At this time do my page tables for the private mmaped page(s) point to the PMEM corresponding to the file and the kernel will wait until the page(s) is/are altered (either by me or someone else) to copy on write and give me a different page/mapping? Or does the kernel avoid this by always mapping a copy of the page(s) involved in the private mmap in the first place? In either case, is my private copy going to come from PMEM or is it an "ordinary" page, or is this "random"? Does the program have any choice in this (i.e. suppose I want to make sure my copied page is persistent)? Thanks. Larry
RFC: revisiting shared page tables
In August 2005, Dave McCracken sent out a patch which implemented shared page tables (http://lkml.iu.edu/hypermail/linux/kernel/0508.3/1623.html) based on 2.6.13. He also wrote two OLS papers about the topic (https://landley.net/kdocs/ols/2003/ols2003-pages-315-320.pdf and https://www.landley.net/kdocs/ols/2006/ols2006v2-pages-125-130.pdf), the second of which was published after his patch submission. This patch was discussed for a few days. It was not accepted. There were several comments about technical issues (about a typo, some questions about locking, how to search the vmas, whether one must iterate through all of the vmas) which no doubt could be fixed, and in fact Dave indicated that he would eventually provide a revised patch which fixed these problems. AFAICT this never occurred. However, there were also questions about whether sharing page tables would provide any significant benefit. Specifically, there were concerns about whether the patch would improve performance at all (Dave indicated a 3% improvement on some "large benchmarks"), especially once another change (the test at at the beginning of copy_page_range() which prevents page table copies in some cases) was merged (d992895ba2, which has been in the kernel since 2.6.14). It was also suggested that the use of randomize_vm_space might also make shared page tables uninteresting, though that objection appeared to be addressed. Isn't Linux kernel archaeology fun :-) 13 years have elapsed. Given the many changes in the kernel since the original patch submission, I'd appreciate your insight into the following questions: * Is there (still?) a need for shared page tables (and if not, why not?). * If one were to resume work on this, is there any reason why one shouldn't start with Dave's 2.6.13 patch (plus fixes to the known bugs in it) and forward port it to the tip, rather than starting from scratch? Thanks. Larry Bassel
Re: [RFC] mm, THP: Map read-only text segments using large THP pages
On 17 May 18 08:23, Matthew Wilcox wrote: > > I can't find any information on what page sizes SPARC supports. > Maybe you could point me at a reference? All I've managed to find is > the architecture manuals for SPARC which believe it is not their purpose > to mandate an MMU. > Page sizes of 8K, 64K, 512K, 4M, 32M, 256M, 2G, 16G are allowed architecturally -- some of these aren't present in some SPARC machines. Generally 8K, 64K, 4M, 256M, 2G, 16G are present on modern machines. Also note that the SPARC THP page size is 8M (so that it is PMD aligned). Larry
[PATCH v8 1/2] arm64: adjust el0_sync so that a function can be called
To implement the context tracker properly on arm64, a function call needs to be made after debugging and interrupts are turned on, but before the lr is changed to point to ret_to_user(). If the function call is made after the lr is changed the function will not return to the correct place. For similar reasons, defer the setting of x0 so that it doesn't need to be saved around the function call (save far_el1 in x26 temporarily instead). Acked-by: Will Deacon Reviewed-by: Kevin Hilman Tested-by: Kevin Hilman Signed-off-by: Larry Bassel --- arch/arm64/kernel/entry.S | 19 +-- 1 file changed, 13 insertions(+), 6 deletions(-) diff --git a/arch/arm64/kernel/entry.S b/arch/arm64/kernel/entry.S index e8b23a3..b0101b9 100644 --- a/arch/arm64/kernel/entry.S +++ b/arch/arm64/kernel/entry.S @@ -354,7 +354,6 @@ el0_sync: lsr x24, x25, #ESR_EL1_EC_SHIFT // exception class cmp x24, #ESR_EL1_EC_SVC64 // SVC in 64-bit state b.eqel0_svc - adr lr, ret_to_user cmp x24, #ESR_EL1_EC_DABT_EL0 // data abort in EL0 b.eqel0_da cmp x24, #ESR_EL1_EC_IABT_EL0 // instruction abort in EL0 @@ -383,7 +382,6 @@ el0_sync_compat: lsr x24, x25, #ESR_EL1_EC_SHIFT // exception class cmp x24, #ESR_EL1_EC_SVC32 // SVC in 32-bit state b.eqel0_svc_compat - adr lr, ret_to_user cmp x24, #ESR_EL1_EC_DABT_EL0 // data abort in EL0 b.eqel0_da cmp x24, #ESR_EL1_EC_IABT_EL0 // instruction abort in EL0 @@ -426,22 +424,25 @@ el0_da: /* * Data abort handling */ - mrs x0, far_el1 - bic x0, x0, #(0xff << 56) + mrs x26, far_el1 // enable interrupts before calling the main handler enable_dbg_and_irq + bic x0, x26, #(0xff << 56) mov x1, x25 mov x2, sp + adr lr, ret_to_user b do_mem_abort el0_ia: /* * Instruction abort handling */ - mrs x0, far_el1 + mrs x26, far_el1 // enable interrupts before calling the main handler enable_dbg_and_irq + mov x0, x26 orr x1, x25, #1 << 24 // use reserved ISS bit for instruction aborts mov x2, sp + adr lr, ret_to_user b do_mem_abort el0_fpsimd_acc: /* @@ -450,6 +451,7 @@ el0_fpsimd_acc: enable_dbg mov x0, x25 mov x1, sp + adr lr, ret_to_user b do_fpsimd_acc el0_fpsimd_exc: /* @@ -458,16 +460,19 @@ el0_fpsimd_exc: enable_dbg mov x0, x25 mov x1, sp + adr lr, ret_to_user b do_fpsimd_exc el0_sp_pc: /* * Stack or PC alignment exception handling */ - mrs x0, far_el1 + mrs x26, far_el1 // enable interrupts before calling the main handler enable_dbg_and_irq + mov x0, x26 mov x1, x25 mov x2, sp + adr lr, ret_to_user b do_sp_pc_abort el0_undef: /* @@ -476,6 +481,7 @@ el0_undef: // enable interrupts before calling the main handler enable_dbg_and_irq mov x0, sp + adr lr, ret_to_user b do_undefinstr el0_dbg: /* @@ -493,6 +499,7 @@ el0_inv: mov x0, sp mov x1, #BAD_SYNC mrs x2, esr_el1 + adr lr, ret_to_user b bad_mode ENDPROC(el0_sync) -- 1.8.3.2 -- To unsubscribe from this list: send the line "unsubscribe linux-kernel" in the body of a message to majord...@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html Please read the FAQ at http://www.tux.org/lkml/
[PATCH v8 0/2] context tracker support for arm64
Implement and enable context tracking for arm64 (which is a prerequisite for FULL_NOHZ support). This patchset builds upon earlier work by Kevin Hilman and is based on Will Deacon's tree. Changes v7 to v8: * Fix bug where el1_irq was calling ct_user_exit rather than el0_irq Changes v6 to v7: * Rename parameter of ct_user_exit from restore to syscall Changes v5 to v6: * Don't save far_el1 in x26 in el0_dbg path (not needed) * TIF_NOHZ processes go through the slow path (so no register save/restore is needed in ct_user_enter) Changes v4 to v5: * Improvement to code restoring far_el1 (suggested by Christopher Covington) * Improvement to register save/restore in ct_user_enter Changes v3 to v4: * Rename parameter of ct_user_exit from save to restore * Rebased patch to Will Deacon's tree (branch remotes/origin/aarch64 of git://git.kernel.org/pub/scm/linux/kernel/git/will/linux.git) Changes v2 to v3: * Save/restore necessary registers in ct_user_enter and ct_user_exit * Annotate "error paths" out of el0_sync with ct_user_exit Changes v1 to v2: * Save far_el1 in x26 temporarily Larry Bassel (2): arm64: adjust el0_sync so that a function can be called arm64: enable context tracking arch/arm64/Kconfig | 1 + arch/arm64/include/asm/thread_info.h | 4 +++ arch/arm64/kernel/entry.S| 58 +++- 3 files changed, 56 insertions(+), 7 deletions(-) -- 1.8.3.2 -- To unsubscribe from this list: send the line "unsubscribe linux-kernel" in the body of a message to majord...@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html Please read the FAQ at http://www.tux.org/lkml/
[PATCH v8 2/2] arm64: enable context tracking
Make calls to ct_user_enter when the kernel is exited and ct_user_exit when the kernel is entered (in el0_da, el0_ia, el0_svc, el0_irq and all of the "error" paths). These macros expand to function calls which will only work properly if el0_sync and related code has been rearranged (in a previous patch of this series). The calls to ct_user_exit are made after hw debugging has been enabled (enable_dbg_and_irq). The call to ct_user_enter is made at the beginning of the kernel_exit macro. This patch is based on earlier work by Kevin Hilman. Save/restore optimizations were also done by Kevin. Acked-by: Will Deacon Reviewed-by: Kevin Hilman Tested-by: Kevin Hilman Signed-off-by: Kevin Hilman Signed-off-by: Larry Bassel --- arch/arm64/Kconfig | 1 + arch/arm64/include/asm/thread_info.h | 4 arch/arm64/kernel/entry.S| 39 +++- 3 files changed, 43 insertions(+), 1 deletion(-) diff --git a/arch/arm64/Kconfig b/arch/arm64/Kconfig index e759af5..ef18ae5 100644 --- a/arch/arm64/Kconfig +++ b/arch/arm64/Kconfig @@ -55,6 +55,7 @@ config ARM64 select RTC_LIB select SPARSE_IRQ select SYSCTL_EXCEPTION_TRACE + select HAVE_CONTEXT_TRACKING help ARM 64-bit (AArch64) Linux support. diff --git a/arch/arm64/include/asm/thread_info.h b/arch/arm64/include/asm/thread_info.h index 720e70b..8363f34 100644 --- a/arch/arm64/include/asm/thread_info.h +++ b/arch/arm64/include/asm/thread_info.h @@ -100,6 +100,7 @@ static inline struct thread_info *current_thread_info(void) #define TIF_SIGPENDING 0 #define TIF_NEED_RESCHED 1 #define TIF_NOTIFY_RESUME 2 /* callback before returning to user */ +#define TIF_NOHZ7 #define TIF_SYSCALL_TRACE 8 #define TIF_POLLING_NRFLAG 16 #define TIF_MEMDIE 18 /* is terminating due to OOM killer */ @@ -113,9 +114,12 @@ static inline struct thread_info *current_thread_info(void) #define _TIF_NEED_RESCHED (1 << TIF_NEED_RESCHED) #define _TIF_NOTIFY_RESUME (1 << TIF_NOTIFY_RESUME) #define _TIF_32BIT (1 << TIF_32BIT) +#define _TIF_SYSCALL_TRACE (1 << TIF_SYSCALL_TRACE) +#define _TIF_NOHZ (1 << TIF_NOHZ) #define _TIF_WORK_MASK (_TIF_NEED_RESCHED | _TIF_SIGPENDING | \ _TIF_NOTIFY_RESUME) +#define _TIF_SYSCALL_WORK (_TIF_SYSCALL_TRACE | _TIF_NOHZ) #endif /* __KERNEL__ */ #endif /* __ASM_THREAD_INFO_H */ diff --git a/arch/arm64/kernel/entry.S b/arch/arm64/kernel/entry.S index b0101b9..0c5844e 100644 --- a/arch/arm64/kernel/entry.S +++ b/arch/arm64/kernel/entry.S @@ -30,6 +30,32 @@ #include /* + * Context tracking subsystem. Used to instrument transitions + * between user and kernel mode. + */ + .macro ct_user_exit, syscall = 0 +#ifdef CONFIG_CONTEXT_TRACKING + bl context_tracking_user_exit + .if \syscall == 1 + /* +* Save/restore needed during syscalls. Restore syscall arguments from +* the values already saved on stack during kernel_entry. +*/ + ldp x0, x1, [sp] + ldp x2, x3, [sp, #S_X2] + ldp x4, x5, [sp, #S_X4] + ldp x6, x7, [sp, #S_X6] + .endif +#endif + .endm + + .macro ct_user_enter +#ifdef CONFIG_CONTEXT_TRACKING + bl context_tracking_user_enter +#endif + .endm + +/* * Bad Abort numbers *- */ @@ -91,6 +117,7 @@ .macro kernel_exit, el, ret = 0 ldp x21, x22, [sp, #S_PC] // load ELR, SPSR .if \el == 0 + ct_user_enter ldr x23, [sp, #S_SP]// load return stack pointer .endif .if \ret @@ -427,6 +454,7 @@ el0_da: mrs x26, far_el1 // enable interrupts before calling the main handler enable_dbg_and_irq + ct_user_exit bic x0, x26, #(0xff << 56) mov x1, x25 mov x2, sp @@ -439,6 +467,7 @@ el0_ia: mrs x26, far_el1 // enable interrupts before calling the main handler enable_dbg_and_irq + ct_user_exit mov x0, x26 orr x1, x25, #1 << 24 // use reserved ISS bit for instruction aborts mov x2, sp @@ -449,6 +478,7 @@ el0_fpsimd_acc: * Floating Point or Advanced SIMD access */ enable_dbg + ct_user_exit mov x0, x25 mov x1, sp adr lr, ret_to_user @@ -458,6 +488,7 @@ el0_fpsimd_exc: * Floating Point or Advanced SIMD exception */ enable_dbg + ct_user_exit mov x0, x25 mov x1, sp adr lr, ret_to_user @@ -480,6 +511,7 @@ el0_undef: */ // enable interrupts before calling the main handler enable_dbg_and_irq + ct_user_exit
[PATCH v7 0/2] context tracker support for arm64
Implement and enable context tracking for arm64 (which is a prerequisite for FULL_NOHZ support). This patchset builds upon earlier work by Kevin Hilman and is based on Will Deacon's tree. Changes v6 to v7: * Rename parameter of ct_user_exit from restore to syscall Changes v5 to v6: * Don't save far_el1 in x26 in el0_dbg path (not needed) * TIF_NOHZ processes go through the slow path (so no register save/restore is needed in ct_user_enter) Changes v4 to v5: * Improvement to code restoring far_el1 (suggested by Christopher Covington) * Improvement to register save/restore in ct_user_enter Changes v3 to v4: * Rename parameter of ct_user_exit from save to restore * Rebased patch to Will Deacon's tree (branch remotes/origin/aarch64 of git://git.kernel.org/pub/scm/linux/kernel/git/will/linux.git) Changes v2 to v3: * Save/restore necessary registers in ct_user_enter and ct_user_exit * Annotate "error paths" out of el0_sync with ct_user_exit Changes v1 to v2: * Save far_el1 in x26 temporarily Larry Bassel (2): arm64: adjust el0_sync so that a function can be called arm64: enable context tracking arch/arm64/Kconfig | 1 + arch/arm64/include/asm/thread_info.h | 4 +++ arch/arm64/kernel/entry.S| 58 +++- 3 files changed, 56 insertions(+), 7 deletions(-) -- 1.8.3.2 -- To unsubscribe from this list: send the line "unsubscribe linux-kernel" in the body of a message to majord...@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html Please read the FAQ at http://www.tux.org/lkml/
[PATCH v7 2/2] arm64: enable context tracking
Make calls to ct_user_enter when the kernel is exited and ct_user_exit when the kernel is entered (in el0_da, el0_ia, el0_svc, el0_irq and all of the "error" paths). These macros expand to function calls which will only work properly if el0_sync and related code has been rearranged (in a previous patch of this series). The calls to ct_user_exit are made after hw debugging has been enabled (enable_dbg_and_irq). The call to ct_user_enter is made at the beginning of the kernel_exit macro. This patch is based on earlier work by Kevin Hilman. Save/restore optimizations were also done by Kevin. Signed-off-by: Kevin Hilman Signed-off-by: Larry Bassel Acked-by: Will Deacon --- arch/arm64/Kconfig | 1 + arch/arm64/include/asm/thread_info.h | 4 arch/arm64/kernel/entry.S| 39 +++- 3 files changed, 43 insertions(+), 1 deletion(-) diff --git a/arch/arm64/Kconfig b/arch/arm64/Kconfig index e759af5..ef18ae5 100644 --- a/arch/arm64/Kconfig +++ b/arch/arm64/Kconfig @@ -55,6 +55,7 @@ config ARM64 select RTC_LIB select SPARSE_IRQ select SYSCTL_EXCEPTION_TRACE + select HAVE_CONTEXT_TRACKING help ARM 64-bit (AArch64) Linux support. diff --git a/arch/arm64/include/asm/thread_info.h b/arch/arm64/include/asm/thread_info.h index 720e70b..8363f34 100644 --- a/arch/arm64/include/asm/thread_info.h +++ b/arch/arm64/include/asm/thread_info.h @@ -100,6 +100,7 @@ static inline struct thread_info *current_thread_info(void) #define TIF_SIGPENDING 0 #define TIF_NEED_RESCHED 1 #define TIF_NOTIFY_RESUME 2 /* callback before returning to user */ +#define TIF_NOHZ7 #define TIF_SYSCALL_TRACE 8 #define TIF_POLLING_NRFLAG 16 #define TIF_MEMDIE 18 /* is terminating due to OOM killer */ @@ -113,9 +114,12 @@ static inline struct thread_info *current_thread_info(void) #define _TIF_NEED_RESCHED (1 << TIF_NEED_RESCHED) #define _TIF_NOTIFY_RESUME (1 << TIF_NOTIFY_RESUME) #define _TIF_32BIT (1 << TIF_32BIT) +#define _TIF_SYSCALL_TRACE (1 << TIF_SYSCALL_TRACE) +#define _TIF_NOHZ (1 << TIF_NOHZ) #define _TIF_WORK_MASK (_TIF_NEED_RESCHED | _TIF_SIGPENDING | \ _TIF_NOTIFY_RESUME) +#define _TIF_SYSCALL_WORK (_TIF_SYSCALL_TRACE | _TIF_NOHZ) #endif /* __KERNEL__ */ #endif /* __ASM_THREAD_INFO_H */ diff --git a/arch/arm64/kernel/entry.S b/arch/arm64/kernel/entry.S index b0101b9..39d4dc9 100644 --- a/arch/arm64/kernel/entry.S +++ b/arch/arm64/kernel/entry.S @@ -30,6 +30,32 @@ #include /* + * Context tracking subsystem. Used to instrument transitions + * between user and kernel mode. + */ + .macro ct_user_exit, syscall = 0 +#ifdef CONFIG_CONTEXT_TRACKING + bl context_tracking_user_exit + .if \syscall == 1 + /* +* Save/restore needed during syscalls. Restore syscall arguments from +* the values already saved on stack during kernel_entry. +*/ + ldp x0, x1, [sp] + ldp x2, x3, [sp, #S_X2] + ldp x4, x5, [sp, #S_X4] + ldp x6, x7, [sp, #S_X6] + .endif +#endif + .endm + + .macro ct_user_enter +#ifdef CONFIG_CONTEXT_TRACKING + bl context_tracking_user_enter +#endif + .endm + +/* * Bad Abort numbers *- */ @@ -91,6 +117,7 @@ .macro kernel_exit, el, ret = 0 ldp x21, x22, [sp, #S_PC] // load ELR, SPSR .if \el == 0 + ct_user_enter ldr x23, [sp, #S_SP]// load return stack pointer .endif .if \ret @@ -318,6 +345,7 @@ el1_irq: bl trace_hardirqs_off #endif + ct_user_exit irq_handler #ifdef CONFIG_PREEMPT @@ -427,6 +455,7 @@ el0_da: mrs x26, far_el1 // enable interrupts before calling the main handler enable_dbg_and_irq + ct_user_exit bic x0, x26, #(0xff << 56) mov x1, x25 mov x2, sp @@ -439,6 +468,7 @@ el0_ia: mrs x26, far_el1 // enable interrupts before calling the main handler enable_dbg_and_irq + ct_user_exit mov x0, x26 orr x1, x25, #1 << 24 // use reserved ISS bit for instruction aborts mov x2, sp @@ -449,6 +479,7 @@ el0_fpsimd_acc: * Floating Point or Advanced SIMD access */ enable_dbg + ct_user_exit mov x0, x25 mov x1, sp adr lr, ret_to_user @@ -458,6 +489,7 @@ el0_fpsimd_exc: * Floating Point or Advanced SIMD exception */ enable_dbg + ct_user_exit mov x0, x25 mov x1, sp adr lr, ret_to_user @@ -480,6 +512,7 @@ el0_undef: */
[PATCH v7 1/2] arm64: adjust el0_sync so that a function can be called
To implement the context tracker properly on arm64, a function call needs to be made after debugging and interrupts are turned on, but before the lr is changed to point to ret_to_user(). If the function call is made after the lr is changed the function will not return to the correct place. For similar reasons, defer the setting of x0 so that it doesn't need to be saved around the function call (save far_el1 in x26 temporarily instead). Signed-off-by: Larry Bassel Acked-by: Will Deacon --- arch/arm64/kernel/entry.S | 19 +-- 1 file changed, 13 insertions(+), 6 deletions(-) diff --git a/arch/arm64/kernel/entry.S b/arch/arm64/kernel/entry.S index e8b23a3..b0101b9 100644 --- a/arch/arm64/kernel/entry.S +++ b/arch/arm64/kernel/entry.S @@ -354,7 +354,6 @@ el0_sync: lsr x24, x25, #ESR_EL1_EC_SHIFT // exception class cmp x24, #ESR_EL1_EC_SVC64 // SVC in 64-bit state b.eqel0_svc - adr lr, ret_to_user cmp x24, #ESR_EL1_EC_DABT_EL0 // data abort in EL0 b.eqel0_da cmp x24, #ESR_EL1_EC_IABT_EL0 // instruction abort in EL0 @@ -383,7 +382,6 @@ el0_sync_compat: lsr x24, x25, #ESR_EL1_EC_SHIFT // exception class cmp x24, #ESR_EL1_EC_SVC32 // SVC in 32-bit state b.eqel0_svc_compat - adr lr, ret_to_user cmp x24, #ESR_EL1_EC_DABT_EL0 // data abort in EL0 b.eqel0_da cmp x24, #ESR_EL1_EC_IABT_EL0 // instruction abort in EL0 @@ -426,22 +424,25 @@ el0_da: /* * Data abort handling */ - mrs x0, far_el1 - bic x0, x0, #(0xff << 56) + mrs x26, far_el1 // enable interrupts before calling the main handler enable_dbg_and_irq + bic x0, x26, #(0xff << 56) mov x1, x25 mov x2, sp + adr lr, ret_to_user b do_mem_abort el0_ia: /* * Instruction abort handling */ - mrs x0, far_el1 + mrs x26, far_el1 // enable interrupts before calling the main handler enable_dbg_and_irq + mov x0, x26 orr x1, x25, #1 << 24 // use reserved ISS bit for instruction aborts mov x2, sp + adr lr, ret_to_user b do_mem_abort el0_fpsimd_acc: /* @@ -450,6 +451,7 @@ el0_fpsimd_acc: enable_dbg mov x0, x25 mov x1, sp + adr lr, ret_to_user b do_fpsimd_acc el0_fpsimd_exc: /* @@ -458,16 +460,19 @@ el0_fpsimd_exc: enable_dbg mov x0, x25 mov x1, sp + adr lr, ret_to_user b do_fpsimd_exc el0_sp_pc: /* * Stack or PC alignment exception handling */ - mrs x0, far_el1 + mrs x26, far_el1 // enable interrupts before calling the main handler enable_dbg_and_irq + mov x0, x26 mov x1, x25 mov x2, sp + adr lr, ret_to_user b do_sp_pc_abort el0_undef: /* @@ -476,6 +481,7 @@ el0_undef: // enable interrupts before calling the main handler enable_dbg_and_irq mov x0, sp + adr lr, ret_to_user b do_undefinstr el0_dbg: /* @@ -493,6 +499,7 @@ el0_inv: mov x0, sp mov x1, #BAD_SYNC mrs x2, esr_el1 + adr lr, ret_to_user b bad_mode ENDPROC(el0_sync) -- 1.8.3.2 -- To unsubscribe from this list: send the line "unsubscribe linux-kernel" in the body of a message to majord...@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html Please read the FAQ at http://www.tux.org/lkml/
[PATCH v6 0/2] context tracker support for arm64
Implement and enable context tracking for arm64 (which is a prerequisite for FULL_NOHZ support). This patchset builds upon earlier work by Kevin Hilman and is based on Will Deacon's tree. Changes v5 to v6: * Don't save far_el1 in x26 in el0_dbg path (not needed) * TIF_NOHZ processes go through the slow path (so no register save/restore is needed in ct_user_enter) Changes v4 to v5: * Improvement to code restoring far_el1 (suggested by Christopher Covington) * Improvement to register save/restore in ct_user_enter Changes v3 to v4: * Rename parameter of ct_user_exit from save to restore * Rebased patch to Will Deacon's tree (branch remotes/origin/aarch64 of git://git.kernel.org/pub/scm/linux/kernel/git/will/linux.git) Changes v2 to v3: * Save/restore necessary registers in ct_user_enter and ct_user_exit * Annotate "error paths" out of el0_sync with ct_user_exit Changes v1 to v2: * Save far_el1 in x26 temporarily Larry Bassel (2): arm64: adjust el0_sync so that a function can be called arm64: enable context tracking arch/arm64/Kconfig | 1 + arch/arm64/include/asm/thread_info.h | 4 +++ arch/arm64/kernel/entry.S| 58 +++- 3 files changed, 56 insertions(+), 7 deletions(-) -- 1.8.3.2 -- To unsubscribe from this list: send the line "unsubscribe linux-kernel" in the body of a message to majord...@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html Please read the FAQ at http://www.tux.org/lkml/
[PATCH v6 2/2] arm64: enable context tracking
Make calls to ct_user_enter when the kernel is exited and ct_user_exit when the kernel is entered (in el0_da, el0_ia, el0_svc, el0_irq and all of the "error" paths). These macros expand to function calls which will only work properly if el0_sync and related code has been rearranged (in a previous patch of this series). The calls to ct_user_exit are made after hw debugging has been enabled (enable_dbg_and_irq). The call to ct_user_enter is made at the beginning of the kernel_exit macro. This patch is based on earlier work by Kevin Hilman. Save/restore optimizations were also done by Kevin. Signed-off-by: Kevin Hilman Signed-off-by: Larry Bassel --- arch/arm64/Kconfig | 1 + arch/arm64/include/asm/thread_info.h | 4 arch/arm64/kernel/entry.S| 39 +++- 3 files changed, 43 insertions(+), 1 deletion(-) diff --git a/arch/arm64/Kconfig b/arch/arm64/Kconfig index e759af5..ef18ae5 100644 --- a/arch/arm64/Kconfig +++ b/arch/arm64/Kconfig @@ -55,6 +55,7 @@ config ARM64 select RTC_LIB select SPARSE_IRQ select SYSCTL_EXCEPTION_TRACE + select HAVE_CONTEXT_TRACKING help ARM 64-bit (AArch64) Linux support. diff --git a/arch/arm64/include/asm/thread_info.h b/arch/arm64/include/asm/thread_info.h index 720e70b..8363f34 100644 --- a/arch/arm64/include/asm/thread_info.h +++ b/arch/arm64/include/asm/thread_info.h @@ -100,6 +100,7 @@ static inline struct thread_info *current_thread_info(void) #define TIF_SIGPENDING 0 #define TIF_NEED_RESCHED 1 #define TIF_NOTIFY_RESUME 2 /* callback before returning to user */ +#define TIF_NOHZ7 #define TIF_SYSCALL_TRACE 8 #define TIF_POLLING_NRFLAG 16 #define TIF_MEMDIE 18 /* is terminating due to OOM killer */ @@ -113,9 +114,12 @@ static inline struct thread_info *current_thread_info(void) #define _TIF_NEED_RESCHED (1 << TIF_NEED_RESCHED) #define _TIF_NOTIFY_RESUME (1 << TIF_NOTIFY_RESUME) #define _TIF_32BIT (1 << TIF_32BIT) +#define _TIF_SYSCALL_TRACE (1 << TIF_SYSCALL_TRACE) +#define _TIF_NOHZ (1 << TIF_NOHZ) #define _TIF_WORK_MASK (_TIF_NEED_RESCHED | _TIF_SIGPENDING | \ _TIF_NOTIFY_RESUME) +#define _TIF_SYSCALL_WORK (_TIF_SYSCALL_TRACE | _TIF_NOHZ) #endif /* __KERNEL__ */ #endif /* __ASM_THREAD_INFO_H */ diff --git a/arch/arm64/kernel/entry.S b/arch/arm64/kernel/entry.S index b0101b9..3c484e2 100644 --- a/arch/arm64/kernel/entry.S +++ b/arch/arm64/kernel/entry.S @@ -30,6 +30,32 @@ #include /* + * Context tracking subsystem. Used to instrument transitions + * between user and kernel mode. + */ + .macro ct_user_exit, restore = 0 +#ifdef CONFIG_CONTEXT_TRACKING + bl context_tracking_user_exit + .if \restore == 1 + /* +* Save/restore needed during syscalls. Restore syscall arguments from +* the values already saved on stack during kernel_entry. +*/ + ldp x0, x1, [sp] + ldp x2, x3, [sp, #S_X2] + ldp x4, x5, [sp, #S_X4] + ldp x6, x7, [sp, #S_X6] + .endif +#endif + .endm + + .macro ct_user_enter +#ifdef CONFIG_CONTEXT_TRACKING + bl context_tracking_user_enter +#endif + .endm + +/* * Bad Abort numbers *- */ @@ -91,6 +117,7 @@ .macro kernel_exit, el, ret = 0 ldp x21, x22, [sp, #S_PC] // load ELR, SPSR .if \el == 0 + ct_user_enter ldr x23, [sp, #S_SP]// load return stack pointer .endif .if \ret @@ -318,6 +345,7 @@ el1_irq: bl trace_hardirqs_off #endif + ct_user_exit irq_handler #ifdef CONFIG_PREEMPT @@ -427,6 +455,7 @@ el0_da: mrs x26, far_el1 // enable interrupts before calling the main handler enable_dbg_and_irq + ct_user_exit bic x0, x26, #(0xff << 56) mov x1, x25 mov x2, sp @@ -439,6 +468,7 @@ el0_ia: mrs x26, far_el1 // enable interrupts before calling the main handler enable_dbg_and_irq + ct_user_exit mov x0, x26 orr x1, x25, #1 << 24 // use reserved ISS bit for instruction aborts mov x2, sp @@ -449,6 +479,7 @@ el0_fpsimd_acc: * Floating Point or Advanced SIMD access */ enable_dbg + ct_user_exit mov x0, x25 mov x1, sp adr lr, ret_to_user @@ -458,6 +489,7 @@ el0_fpsimd_exc: * Floating Point or Advanced SIMD exception */ enable_dbg + ct_user_exit mov x0, x25 mov x1, sp adr lr, ret_to_user @@ -480,6 +512,7 @@ el0_undef: */ // enable interrupts before
[PATCH v6 1/2] arm64: adjust el0_sync so that a function can be called
To implement the context tracker properly on arm64, a function call needs to be made after debugging and interrupts are turned on, but before the lr is changed to point to ret_to_user(). If the function call is made after the lr is changed the function will not return to the correct place. For similar reasons, defer the setting of x0 so that it doesn't need to be saved around the function call (save far_el1 in x26 temporarily instead). Signed-off-by: Larry Bassel --- arch/arm64/kernel/entry.S | 19 +-- 1 file changed, 13 insertions(+), 6 deletions(-) diff --git a/arch/arm64/kernel/entry.S b/arch/arm64/kernel/entry.S index e8b23a3..b0101b9 100644 --- a/arch/arm64/kernel/entry.S +++ b/arch/arm64/kernel/entry.S @@ -354,7 +354,6 @@ el0_sync: lsr x24, x25, #ESR_EL1_EC_SHIFT // exception class cmp x24, #ESR_EL1_EC_SVC64 // SVC in 64-bit state b.eqel0_svc - adr lr, ret_to_user cmp x24, #ESR_EL1_EC_DABT_EL0 // data abort in EL0 b.eqel0_da cmp x24, #ESR_EL1_EC_IABT_EL0 // instruction abort in EL0 @@ -383,7 +382,6 @@ el0_sync_compat: lsr x24, x25, #ESR_EL1_EC_SHIFT // exception class cmp x24, #ESR_EL1_EC_SVC32 // SVC in 32-bit state b.eqel0_svc_compat - adr lr, ret_to_user cmp x24, #ESR_EL1_EC_DABT_EL0 // data abort in EL0 b.eqel0_da cmp x24, #ESR_EL1_EC_IABT_EL0 // instruction abort in EL0 @@ -426,22 +424,25 @@ el0_da: /* * Data abort handling */ - mrs x0, far_el1 - bic x0, x0, #(0xff << 56) + mrs x26, far_el1 // enable interrupts before calling the main handler enable_dbg_and_irq + bic x0, x26, #(0xff << 56) mov x1, x25 mov x2, sp + adr lr, ret_to_user b do_mem_abort el0_ia: /* * Instruction abort handling */ - mrs x0, far_el1 + mrs x26, far_el1 // enable interrupts before calling the main handler enable_dbg_and_irq + mov x0, x26 orr x1, x25, #1 << 24 // use reserved ISS bit for instruction aborts mov x2, sp + adr lr, ret_to_user b do_mem_abort el0_fpsimd_acc: /* @@ -450,6 +451,7 @@ el0_fpsimd_acc: enable_dbg mov x0, x25 mov x1, sp + adr lr, ret_to_user b do_fpsimd_acc el0_fpsimd_exc: /* @@ -458,16 +460,19 @@ el0_fpsimd_exc: enable_dbg mov x0, x25 mov x1, sp + adr lr, ret_to_user b do_fpsimd_exc el0_sp_pc: /* * Stack or PC alignment exception handling */ - mrs x0, far_el1 + mrs x26, far_el1 // enable interrupts before calling the main handler enable_dbg_and_irq + mov x0, x26 mov x1, x25 mov x2, sp + adr lr, ret_to_user b do_sp_pc_abort el0_undef: /* @@ -476,6 +481,7 @@ el0_undef: // enable interrupts before calling the main handler enable_dbg_and_irq mov x0, sp + adr lr, ret_to_user b do_undefinstr el0_dbg: /* @@ -493,6 +499,7 @@ el0_inv: mov x0, sp mov x1, #BAD_SYNC mrs x2, esr_el1 + adr lr, ret_to_user b bad_mode ENDPROC(el0_sync) -- 1.8.3.2 -- To unsubscribe from this list: send the line "unsubscribe linux-kernel" in the body of a message to majord...@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html Please read the FAQ at http://www.tux.org/lkml/
[PATCH v6 1/2] arm64: adjust el0_sync so that a function can be called
To implement the context tracker properly on arm64, a function call needs to be made after debugging and interrupts are turned on, but before the lr is changed to point to ret_to_user(). If the function call is made after the lr is changed the function will not return to the correct place. For similar reasons, defer the setting of x0 so that it doesn't need to be saved around the function call (save far_el1 in x26 temporarily instead). Signed-off-by: Larry Bassel --- arch/arm64/kernel/entry.S | 19 +-- 1 file changed, 13 insertions(+), 6 deletions(-) diff --git a/arch/arm64/kernel/entry.S b/arch/arm64/kernel/entry.S index e8b23a3..b0101b9 100644 --- a/arch/arm64/kernel/entry.S +++ b/arch/arm64/kernel/entry.S @@ -354,7 +354,6 @@ el0_sync: lsr x24, x25, #ESR_EL1_EC_SHIFT // exception class cmp x24, #ESR_EL1_EC_SVC64 // SVC in 64-bit state b.eqel0_svc - adr lr, ret_to_user cmp x24, #ESR_EL1_EC_DABT_EL0 // data abort in EL0 b.eqel0_da cmp x24, #ESR_EL1_EC_IABT_EL0 // instruction abort in EL0 @@ -383,7 +382,6 @@ el0_sync_compat: lsr x24, x25, #ESR_EL1_EC_SHIFT // exception class cmp x24, #ESR_EL1_EC_SVC32 // SVC in 32-bit state b.eqel0_svc_compat - adr lr, ret_to_user cmp x24, #ESR_EL1_EC_DABT_EL0 // data abort in EL0 b.eqel0_da cmp x24, #ESR_EL1_EC_IABT_EL0 // instruction abort in EL0 @@ -426,22 +424,25 @@ el0_da: /* * Data abort handling */ - mrs x0, far_el1 - bic x0, x0, #(0xff << 56) + mrs x26, far_el1 // enable interrupts before calling the main handler enable_dbg_and_irq + bic x0, x26, #(0xff << 56) mov x1, x25 mov x2, sp + adr lr, ret_to_user b do_mem_abort el0_ia: /* * Instruction abort handling */ - mrs x0, far_el1 + mrs x26, far_el1 // enable interrupts before calling the main handler enable_dbg_and_irq + mov x0, x26 orr x1, x25, #1 << 24 // use reserved ISS bit for instruction aborts mov x2, sp + adr lr, ret_to_user b do_mem_abort el0_fpsimd_acc: /* @@ -450,6 +451,7 @@ el0_fpsimd_acc: enable_dbg mov x0, x25 mov x1, sp + adr lr, ret_to_user b do_fpsimd_acc el0_fpsimd_exc: /* @@ -458,16 +460,19 @@ el0_fpsimd_exc: enable_dbg mov x0, x25 mov x1, sp + adr lr, ret_to_user b do_fpsimd_exc el0_sp_pc: /* * Stack or PC alignment exception handling */ - mrs x0, far_el1 + mrs x26, far_el1 // enable interrupts before calling the main handler enable_dbg_and_irq + mov x0, x26 mov x1, x25 mov x2, sp + adr lr, ret_to_user b do_sp_pc_abort el0_undef: /* @@ -476,6 +481,7 @@ el0_undef: // enable interrupts before calling the main handler enable_dbg_and_irq mov x0, sp + adr lr, ret_to_user b do_undefinstr el0_dbg: /* @@ -493,6 +499,7 @@ el0_inv: mov x0, sp mov x1, #BAD_SYNC mrs x2, esr_el1 + adr lr, ret_to_user b bad_mode ENDPROC(el0_sync) -- 1.8.3.2 -- To unsubscribe from this list: send the line "unsubscribe linux-kernel" in the body of a message to majord...@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html Please read the FAQ at http://www.tux.org/lkml/
[PATCH v6 2/2] arm64: enable context tracking
Make calls to ct_user_enter when the kernel is exited and ct_user_exit when the kernel is entered (in el0_da, el0_ia, el0_svc, el0_irq and all of the "error" paths). These macros expand to function calls which will only work properly if el0_sync and related code has been rearranged (in a previous patch of this series). The calls to ct_user_exit are made after hw debugging has been enabled (enable_dbg_and_irq). The call to ct_user_enter is made at the beginning of the kernel_exit macro. This patch is based on earlier work by Kevin Hilman. Save/restore optimizations were also done by Kevin. Signed-off-by: Kevin Hilman Signed-off-by: Larry Bassel --- arch/arm64/Kconfig | 1 + arch/arm64/include/asm/thread_info.h | 4 arch/arm64/kernel/entry.S| 39 +++- 3 files changed, 43 insertions(+), 1 deletion(-) diff --git a/arch/arm64/Kconfig b/arch/arm64/Kconfig index e759af5..ef18ae5 100644 --- a/arch/arm64/Kconfig +++ b/arch/arm64/Kconfig @@ -55,6 +55,7 @@ config ARM64 select RTC_LIB select SPARSE_IRQ select SYSCTL_EXCEPTION_TRACE + select HAVE_CONTEXT_TRACKING help ARM 64-bit (AArch64) Linux support. diff --git a/arch/arm64/include/asm/thread_info.h b/arch/arm64/include/asm/thread_info.h index 720e70b..8363f34 100644 --- a/arch/arm64/include/asm/thread_info.h +++ b/arch/arm64/include/asm/thread_info.h @@ -100,6 +100,7 @@ static inline struct thread_info *current_thread_info(void) #define TIF_SIGPENDING 0 #define TIF_NEED_RESCHED 1 #define TIF_NOTIFY_RESUME 2 /* callback before returning to user */ +#define TIF_NOHZ7 #define TIF_SYSCALL_TRACE 8 #define TIF_POLLING_NRFLAG 16 #define TIF_MEMDIE 18 /* is terminating due to OOM killer */ @@ -113,9 +114,12 @@ static inline struct thread_info *current_thread_info(void) #define _TIF_NEED_RESCHED (1 << TIF_NEED_RESCHED) #define _TIF_NOTIFY_RESUME (1 << TIF_NOTIFY_RESUME) #define _TIF_32BIT (1 << TIF_32BIT) +#define _TIF_SYSCALL_TRACE (1 << TIF_SYSCALL_TRACE) +#define _TIF_NOHZ (1 << TIF_NOHZ) #define _TIF_WORK_MASK (_TIF_NEED_RESCHED | _TIF_SIGPENDING | \ _TIF_NOTIFY_RESUME) +#define _TIF_SYSCALL_WORK (_TIF_SYSCALL_TRACE | _TIF_NOHZ) #endif /* __KERNEL__ */ #endif /* __ASM_THREAD_INFO_H */ diff --git a/arch/arm64/kernel/entry.S b/arch/arm64/kernel/entry.S index b0101b9..3c484e2 100644 --- a/arch/arm64/kernel/entry.S +++ b/arch/arm64/kernel/entry.S @@ -30,6 +30,32 @@ #include /* + * Context tracking subsystem. Used to instrument transitions + * between user and kernel mode. + */ + .macro ct_user_exit, restore = 0 +#ifdef CONFIG_CONTEXT_TRACKING + bl context_tracking_user_exit + .if \restore == 1 + /* +* Save/restore needed during syscalls. Restore syscall arguments from +* the values already saved on stack during kernel_entry. +*/ + ldp x0, x1, [sp] + ldp x2, x3, [sp, #S_X2] + ldp x4, x5, [sp, #S_X4] + ldp x6, x7, [sp, #S_X6] + .endif +#endif + .endm + + .macro ct_user_enter +#ifdef CONFIG_CONTEXT_TRACKING + bl context_tracking_user_enter +#endif + .endm + +/* * Bad Abort numbers *- */ @@ -91,6 +117,7 @@ .macro kernel_exit, el, ret = 0 ldp x21, x22, [sp, #S_PC] // load ELR, SPSR .if \el == 0 + ct_user_enter ldr x23, [sp, #S_SP]// load return stack pointer .endif .if \ret @@ -318,6 +345,7 @@ el1_irq: bl trace_hardirqs_off #endif + ct_user_exit irq_handler #ifdef CONFIG_PREEMPT @@ -427,6 +455,7 @@ el0_da: mrs x26, far_el1 // enable interrupts before calling the main handler enable_dbg_and_irq + ct_user_exit bic x0, x26, #(0xff << 56) mov x1, x25 mov x2, sp @@ -439,6 +468,7 @@ el0_ia: mrs x26, far_el1 // enable interrupts before calling the main handler enable_dbg_and_irq + ct_user_exit mov x0, x26 orr x1, x25, #1 << 24 // use reserved ISS bit for instruction aborts mov x2, sp @@ -449,6 +479,7 @@ el0_fpsimd_acc: * Floating Point or Advanced SIMD access */ enable_dbg + ct_user_exit mov x0, x25 mov x1, sp adr lr, ret_to_user @@ -458,6 +489,7 @@ el0_fpsimd_exc: * Floating Point or Advanced SIMD exception */ enable_dbg + ct_user_exit mov x0, x25 mov x1, sp adr lr, ret_to_user @@ -480,6 +512,7 @@ el0_undef: */ // enable interrupts before
[PATCH v6 0/2] context tracker support for arm64
Implement and enable context tracking for arm64 (which is a prerequisite for FULL_NOHZ support). This patchset builds upon earlier work by Kevin Hilman and is based on Will Deacon's tree. Changes v5 to v6: * Don't save far_el1 in x26 in el0_dbg path (not needed) * TIF_NOHZ processes go through the slow path (so no register save/restore is needed in ct_user_enter) Changes v4 to v5: * Improvement to code restoring far_el1 (suggested by Christopher Covington) * Improvement to register save/restore in ct_user_enter Changes v3 to v4: * Rename parameter of ct_user_exit from save to restore * Rebased patch to Will Deacon's tree (branch remotes/origin/aarch64 of git://git.kernel.org/pub/scm/linux/kernel/git/will/linux.git) Changes v2 to v3: * Save/restore necessary registers in ct_user_enter and ct_user_exit * Annotate "error paths" out of el0_sync with ct_user_exit Changes v1 to v2: * Save far_el1 in x26 temporarily *** BLURB HERE *** Larry Bassel (2): arm64: adjust el0_sync so that a function can be called arm64: enable context tracking arch/arm64/Kconfig | 1 + arch/arm64/include/asm/thread_info.h | 4 +++ arch/arm64/kernel/entry.S| 58 +++- 3 files changed, 56 insertions(+), 7 deletions(-) -- 1.8.3.2 -- To unsubscribe from this list: send the line "unsubscribe linux-kernel" in the body of a message to majord...@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html Please read the FAQ at http://www.tux.org/lkml/
Re: [PATCH v5 1/2] arm64: adjust el0_sync so that a function can be called
On 28 May 14 12:27, Will Deacon wrote: > Hi Larry, > > On Mon, May 26, 2014 at 07:56:12PM +0100, Larry Bassel wrote: > > To implement the context tracker properly on arm64, > > a function call needs to be made after debugging and > > interrupts are turned on, but before the lr is changed > > to point to ret_to_user(). If the function call > > is made after the lr is changed the function will not > > return to the correct place. > > > > For similar reasons, defer the setting of x0 so that > > it doesn't need to be saved around the function call > > (save far_el1 in x26 temporarily instead). > > > > Signed-off-by: Larry Bassel > > [...] > > > Why have you added this mov instruction? I believe (please correct me if I'm wrong) that it is necessary. Here is why: > > @@ -476,23 +481,27 @@ el0_undef: > > // enable interrupts before calling the main handler > > enable_dbg_and_irq > > mov x0, sp > > + adr lr, ret_to_user > > b do_undefinstr > > el0_dbg: > > /* > > * Debug exception handling > > */ > > tbnzx24, #0, el0_inv// EL0 only > > - mrs x0, far_el1 > > + mrs x26, far_el1 needed because do_debug_exception may clobber x0, so save far_el1 in x26 (as other parts of this patch do) > > + mov x0, x26 needed because far_el1 is expected to be in x0 here > > mov x1, x25 > > mov x2, sp > > bl do_debug_exception > > enable_dbg [call to ct_user_exit will go here in the next patch, this may re-clobber x0] > > + mov x0, x26 needed because far_el1 is expected to be in x0 here Since the purpose of this patch is to make calling a function possible in this code path, the "extra" mov instruction above is necessary and IMHO should be added in this patch and not in the next one whose purpose is to define the ct_user_* macros and add calls to them in the proper places. > > b ret_to_user > > Will Larry -- To unsubscribe from this list: send the line "unsubscribe linux-kernel" in the body of a message to majord...@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html Please read the FAQ at http://www.tux.org/lkml/
[PATCH v5 0/2] context tracker support for arm64
Implement and enable context tracking for arm64 (which is a prerequisite for FULL_NOHZ support). This patchset builds upon earlier work by Kevin Hilman and is based on Will Deacon's tree. Changes v4 to v5: * Improvement to code restoring far_el1 (suggested by Christopher Covington) * Improvement to register save/restore in ct_user_enter Changes v3 to v4: * Rename parameter of ct_user_exit from save to restore * Rebased patch to Will Deacon's tree (branch remotes/origin/aarch64 of git://git.kernel.org/pub/scm/linux/kernel/git/will/linux.git) Changes v2 to v3: * Save/restore necessary registers in ct_user_enter and ct_user_exit * Annotate "error paths" out of el0_sync with ct_user_exit Changes v1 to v2: * Save far_el1 in x26 temporarily Larry Bassel (2): arm64: adjust el0_sync so that a function can be called arm64: enable context tracking arch/arm64/Kconfig | 1 + arch/arm64/include/asm/thread_info.h | 1 + arch/arm64/kernel/entry.S| 69 3 files changed, 64 insertions(+), 7 deletions(-) -- 1.8.3.2 -- To unsubscribe from this list: send the line "unsubscribe linux-kernel" in the body of a message to majord...@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html Please read the FAQ at http://www.tux.org/lkml/
[PATCH v5 2/2] arm64: enable context tracking
Make calls to ct_user_enter when the kernel is exited and ct_user_exit when the kernel is entered (in el0_da, el0_ia, el0_svc, el0_irq and all of the "error" paths). These macros expand to function calls which will only work properly if el0_sync and related code has been rearranged (in a previous patch of this series). The calls to ct_user_exit are made after hw debugging has been enabled (enable_dbg_and_irq). The call to ct_user_enter is made at the beginning of the kernel_exit macro. This patch is based on earlier work by Kevin Hilman. Save/restore optimizations were also done by Kevin. Signed-off-by: Kevin Hilman Signed-off-by: Larry Bassel --- arch/arm64/Kconfig | 1 + arch/arm64/include/asm/thread_info.h | 1 + arch/arm64/kernel/entry.S| 46 3 files changed, 48 insertions(+) diff --git a/arch/arm64/Kconfig b/arch/arm64/Kconfig index e759af5..ef18ae5 100644 --- a/arch/arm64/Kconfig +++ b/arch/arm64/Kconfig @@ -55,6 +55,7 @@ config ARM64 select RTC_LIB select SPARSE_IRQ select SYSCTL_EXCEPTION_TRACE + select HAVE_CONTEXT_TRACKING help ARM 64-bit (AArch64) Linux support. diff --git a/arch/arm64/include/asm/thread_info.h b/arch/arm64/include/asm/thread_info.h index 720e70b..301ea6a 100644 --- a/arch/arm64/include/asm/thread_info.h +++ b/arch/arm64/include/asm/thread_info.h @@ -108,6 +108,7 @@ static inline struct thread_info *current_thread_info(void) #define TIF_SINGLESTEP 21 #define TIF_32BIT 22 /* 32bit process */ #define TIF_SWITCH_MM 23 /* deferred switch_mm */ +#define TIF_NOHZ24 #define _TIF_SIGPENDING(1 << TIF_SIGPENDING) #define _TIF_NEED_RESCHED (1 << TIF_NEED_RESCHED) diff --git a/arch/arm64/kernel/entry.S b/arch/arm64/kernel/entry.S index c6bc1a3..0605963 100644 --- a/arch/arm64/kernel/entry.S +++ b/arch/arm64/kernel/entry.S @@ -30,6 +30,42 @@ #include /* + * Context tracking subsystem. Used to instrument transitions + * between user and kernel mode. + */ + .macro ct_user_exit, restore = 0 +#ifdef CONFIG_CONTEXT_TRACKING + bl context_tracking_user_exit + .if \restore == 1 + /* +* Save/restore needed during syscalls. Restore syscall arguments from +* the values already saved on stack during kernel_entry. +*/ + ldp x0, x1, [sp] + ldp x2, x3, [sp, #S_X2] + ldp x4, x5, [sp, #S_X4] + ldp x6, x7, [sp, #S_X6] + .endif +#endif + .endm + + .macro ct_user_enter, save = 0 +#ifdef CONFIG_CONTEXT_TRACKING + .if \save == 1 + /* +* We only have to save/restore x0 on the fast syscall path where +* x0 contains the syscall return. +*/ + mov x19, x0 + .endif + bl context_tracking_user_enter + .if \save == 1 + mov x0, x19 + .endif +#endif + .endm + +/* * Bad Abort numbers *- */ @@ -91,6 +127,7 @@ .macro kernel_exit, el, ret = 0 ldp x21, x22, [sp, #S_PC] // load ELR, SPSR .if \el == 0 + ct_user_enter \ret ldr x23, [sp, #S_SP]// load return stack pointer .endif .if \ret @@ -318,6 +355,7 @@ el1_irq: bl trace_hardirqs_off #endif + ct_user_exit irq_handler #ifdef CONFIG_PREEMPT @@ -427,6 +465,7 @@ el0_da: mrs x26, far_el1 // enable interrupts before calling the main handler enable_dbg_and_irq + ct_user_exit bic x0, x26, #(0xff << 56) mov x1, x25 mov x2, sp @@ -439,6 +478,7 @@ el0_ia: mrs x26, far_el1 // enable interrupts before calling the main handler enable_dbg_and_irq + ct_user_exit mov x0, x26 orr x1, x25, #1 << 24 // use reserved ISS bit for instruction aborts mov x2, sp @@ -449,6 +489,7 @@ el0_fpsimd_acc: * Floating Point or Advanced SIMD access */ enable_dbg + ct_user_exit mov x0, x25 mov x1, sp adr lr, ret_to_user @@ -458,6 +499,7 @@ el0_fpsimd_exc: * Floating Point or Advanced SIMD exception */ enable_dbg + ct_user_exit mov x0, x25 mov x1, sp adr lr, ret_to_user @@ -480,6 +522,7 @@ el0_undef: */ // enable interrupts before calling the main handler enable_dbg_and_irq + ct_user_exit mov x0, sp adr lr, ret_to_user b do_undefinstr @@ -494,10 +537,12 @@ el0_dbg: mov x2, sp bl do_debug_exception enable_dbg + ct_user_exit mov x0, x26 b ret_to_user el0_inv: enable_dbg + ct_user_exit
[PATCH v5 1/2] arm64: adjust el0_sync so that a function can be called
To implement the context tracker properly on arm64, a function call needs to be made after debugging and interrupts are turned on, but before the lr is changed to point to ret_to_user(). If the function call is made after the lr is changed the function will not return to the correct place. For similar reasons, defer the setting of x0 so that it doesn't need to be saved around the function call (save far_el1 in x26 temporarily instead). Signed-off-by: Larry Bassel --- arch/arm64/kernel/entry.S | 23 --- 1 file changed, 16 insertions(+), 7 deletions(-) diff --git a/arch/arm64/kernel/entry.S b/arch/arm64/kernel/entry.S index e8b23a3..c6bc1a3 100644 --- a/arch/arm64/kernel/entry.S +++ b/arch/arm64/kernel/entry.S @@ -354,7 +354,6 @@ el0_sync: lsr x24, x25, #ESR_EL1_EC_SHIFT // exception class cmp x24, #ESR_EL1_EC_SVC64 // SVC in 64-bit state b.eqel0_svc - adr lr, ret_to_user cmp x24, #ESR_EL1_EC_DABT_EL0 // data abort in EL0 b.eqel0_da cmp x24, #ESR_EL1_EC_IABT_EL0 // instruction abort in EL0 @@ -383,7 +382,6 @@ el0_sync_compat: lsr x24, x25, #ESR_EL1_EC_SHIFT // exception class cmp x24, #ESR_EL1_EC_SVC32 // SVC in 32-bit state b.eqel0_svc_compat - adr lr, ret_to_user cmp x24, #ESR_EL1_EC_DABT_EL0 // data abort in EL0 b.eqel0_da cmp x24, #ESR_EL1_EC_IABT_EL0 // instruction abort in EL0 @@ -426,22 +424,25 @@ el0_da: /* * Data abort handling */ - mrs x0, far_el1 - bic x0, x0, #(0xff << 56) + mrs x26, far_el1 // enable interrupts before calling the main handler enable_dbg_and_irq + bic x0, x26, #(0xff << 56) mov x1, x25 mov x2, sp + adr lr, ret_to_user b do_mem_abort el0_ia: /* * Instruction abort handling */ - mrs x0, far_el1 + mrs x26, far_el1 // enable interrupts before calling the main handler enable_dbg_and_irq + mov x0, x26 orr x1, x25, #1 << 24 // use reserved ISS bit for instruction aborts mov x2, sp + adr lr, ret_to_user b do_mem_abort el0_fpsimd_acc: /* @@ -450,6 +451,7 @@ el0_fpsimd_acc: enable_dbg mov x0, x25 mov x1, sp + adr lr, ret_to_user b do_fpsimd_acc el0_fpsimd_exc: /* @@ -458,16 +460,19 @@ el0_fpsimd_exc: enable_dbg mov x0, x25 mov x1, sp + adr lr, ret_to_user b do_fpsimd_exc el0_sp_pc: /* * Stack or PC alignment exception handling */ - mrs x0, far_el1 + mrs x26, far_el1 // enable interrupts before calling the main handler enable_dbg_and_irq + mov x0, x26 mov x1, x25 mov x2, sp + adr lr, ret_to_user b do_sp_pc_abort el0_undef: /* @@ -476,23 +481,27 @@ el0_undef: // enable interrupts before calling the main handler enable_dbg_and_irq mov x0, sp + adr lr, ret_to_user b do_undefinstr el0_dbg: /* * Debug exception handling */ tbnzx24, #0, el0_inv// EL0 only - mrs x0, far_el1 + mrs x26, far_el1 + mov x0, x26 mov x1, x25 mov x2, sp bl do_debug_exception enable_dbg + mov x0, x26 b ret_to_user el0_inv: enable_dbg mov x0, sp mov x1, #BAD_SYNC mrs x2, esr_el1 + adr lr, ret_to_user b bad_mode ENDPROC(el0_sync) -- 1.8.3.2 -- To unsubscribe from this list: send the line "unsubscribe linux-kernel" in the body of a message to majord...@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html Please read the FAQ at http://www.tux.org/lkml/
Re: [PATCH v4 1/2] arm64: adjust el0_sync so that a function can be called
On 23 May 14 15:44, Catalin Marinas wrote: > On Thu, May 22, 2014 at 11:35:20PM +0100, Larry Bassel wrote: > > > On 05/22/2014 03:27 PM, Larry Bassel wrote: > > > > To implement the context tracker properly on arm64, > > > > a function call needs to be made after debugging and > > > > interrupts are turned on, but before the lr is changed > > > > to point to ret_to_user(). If the function call > > > > is made after the lr is changed the function will not > > > > return to the correct place. > > > > > > > > For similar reasons, defer the setting of x0 so that > > > > it doesn't need to be saved around the function call > > > > (save far_el1 in x26 temporarily instead). > > > > > > > > Signed-off-by: Larry Bassel > > > > --- > > > > arch/arm64/kernel/entry.S | 24 +--- > > > > 1 file changed, 17 insertions(+), 7 deletions(-) > > > > > > > > diff --git a/arch/arm64/kernel/entry.S b/arch/arm64/kernel/entry.S > > > > index e8b23a3..20b336e 100644 > > > > --- a/arch/arm64/kernel/entry.S > > > > +++ b/arch/arm64/kernel/entry.S > > > > @@ -354,7 +354,6 @@ el0_sync: > > > > lsr x24, x25, #ESR_EL1_EC_SHIFT // exception class > > > > cmp x24, #ESR_EL1_EC_SVC64 // SVC in 64-bit state > > > > b.eqel0_svc > > > > - adr lr, ret_to_user > > > > cmp x24, #ESR_EL1_EC_DABT_EL0 // data abort in EL0 > > > > b.eqel0_da > > > > cmp x24, #ESR_EL1_EC_IABT_EL0 // instruction abort in > > > > EL0 > > > > @@ -383,7 +382,6 @@ el0_sync_compat: > > > > lsr x24, x25, #ESR_EL1_EC_SHIFT // exception class > > > > cmp x24, #ESR_EL1_EC_SVC32 // SVC in 32-bit state > > > > b.eqel0_svc_compat > > > > - adr lr, ret_to_user > > > > cmp x24, #ESR_EL1_EC_DABT_EL0 // data abort in EL0 > > > > b.eqel0_da > > > > cmp x24, #ESR_EL1_EC_IABT_EL0 // instruction abort in > > > > EL0 > > > > @@ -426,22 +424,26 @@ el0_da: > > > > /* > > > > * Data abort handling > > > > */ > > > > - mrs x0, far_el1 > > > > - bic x0, x0, #(0xff << 56) > > > > + mrs x26, far_el1 > > > > // enable interrupts before calling the main handler > > > > enable_dbg_and_irq > > > > + mov x0, x26 > > > > + bic x0, x0, #(0xff << 56) > > > > > > Nit: I believe you can bit clear with x26 as the source register and omit > > > the > > > move instruction. > > > > Is that really an improvement (assuming it works)? Are we saving > > any cycles here? If so, does it matter? It is easy to see what > > the move instruction is doing. > > Even if it's not noticeable, I would still reduce the number of lines by > one. BIC with immediate is just an alias for AND and it supports > different source and destination. Ack. > > -- > Catalin Larry -- To unsubscribe from this list: send the line "unsubscribe linux-kernel" in the body of a message to majord...@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html Please read the FAQ at http://www.tux.org/lkml/
Re: [PATCH v4 1/2] arm64: adjust el0_sync so that a function can be called
On 22 May 14 16:23, Christopher Covington wrote: > Hi Larry, > > On 05/22/2014 03:27 PM, Larry Bassel wrote: > > To implement the context tracker properly on arm64, > > a function call needs to be made after debugging and > > interrupts are turned on, but before the lr is changed > > to point to ret_to_user(). If the function call > > is made after the lr is changed the function will not > > return to the correct place. > > > > For similar reasons, defer the setting of x0 so that > > it doesn't need to be saved around the function call > > (save far_el1 in x26 temporarily instead). > > > > Signed-off-by: Larry Bassel > > --- > > arch/arm64/kernel/entry.S | 24 +--- > > 1 file changed, 17 insertions(+), 7 deletions(-) > > > > diff --git a/arch/arm64/kernel/entry.S b/arch/arm64/kernel/entry.S > > index e8b23a3..20b336e 100644 > > --- a/arch/arm64/kernel/entry.S > > +++ b/arch/arm64/kernel/entry.S > > @@ -354,7 +354,6 @@ el0_sync: > > lsr x24, x25, #ESR_EL1_EC_SHIFT // exception class > > cmp x24, #ESR_EL1_EC_SVC64 // SVC in 64-bit state > > b.eqel0_svc > > - adr lr, ret_to_user > > cmp x24, #ESR_EL1_EC_DABT_EL0 // data abort in EL0 > > b.eqel0_da > > cmp x24, #ESR_EL1_EC_IABT_EL0 // instruction abort in EL0 > > @@ -383,7 +382,6 @@ el0_sync_compat: > > lsr x24, x25, #ESR_EL1_EC_SHIFT // exception class > > cmp x24, #ESR_EL1_EC_SVC32 // SVC in 32-bit state > > b.eqel0_svc_compat > > - adr lr, ret_to_user > > cmp x24, #ESR_EL1_EC_DABT_EL0 // data abort in EL0 > > b.eqel0_da > > cmp x24, #ESR_EL1_EC_IABT_EL0 // instruction abort in EL0 > > @@ -426,22 +424,26 @@ el0_da: > > /* > > * Data abort handling > > */ > > - mrs x0, far_el1 > > - bic x0, x0, #(0xff << 56) > > + mrs x26, far_el1 > > // enable interrupts before calling the main handler > > enable_dbg_and_irq > > + mov x0, x26 > > + bic x0, x0, #(0xff << 56) > > Nit: I believe you can bit clear with x26 as the source register and omit the > move instruction. Is that really an improvement (assuming it works)? Are we saving any cycles here? If so, does it matter? It is easy to see what the move instruction is doing. > > Regards, > Christopher > > -- > Employee of Qualcomm Innovation Center, Inc. > Qualcomm Innovation Center, Inc. is a member of Code Aurora Forum, > hosted by the Linux Foundation. Larry -- To unsubscribe from this list: send the line "unsubscribe linux-kernel" in the body of a message to majord...@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html Please read the FAQ at http://www.tux.org/lkml/
[PATCH v4 0/2] context tracker support for arm64
Implement and enable context tracking for arm64 (which is a prerequisite for FULL_NOHZ support). This patchset builds upon earlier work by Kevin Hilman and is based on Will Deacon's tree. Changes v3 to v4: * Rename parameter of ct_user_exit from save to restore * Rebased patch to Will Deacon's tree (branch remotes/origin/aarch64 of git://git.kernel.org/pub/scm/linux/kernel/git/will/linux.git) Changes v2 to v3: * Save/restore necessary registers in ct_user_enter and ct_user_exit * Annotate "error paths" out of el0_sync with ct_user_exit Changes v1 to v2: * Save far_el1 in x26 temporarily Larry Bassel (2): arm64: adjust el0_sync so that a function can be called arm64: enable context tracking arch/arm64/Kconfig | 1 + arch/arm64/include/asm/thread_info.h | 1 + arch/arm64/kernel/entry.S| 72 3 files changed, 67 insertions(+), 7 deletions(-) -- 1.8.3.2 -- To unsubscribe from this list: send the line "unsubscribe linux-kernel" in the body of a message to majord...@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html Please read the FAQ at http://www.tux.org/lkml/
[PATCH v4 2/2] arm64: enable context tracking
Make calls to ct_user_enter when the kernel is exited and ct_user_exit when the kernel is entered (in el0_da, el0_ia, el0_svc, el0_irq and all of the "error" paths). These macros expand to function calls which will only work properly if el0_sync and related code has been rearranged (in a previous patch of this series). The calls to ct_user_exit are made after hw debugging has been enabled (enable_dbg_and_irq). The call to ct_user_enter is made at the beginning of the kernel_exit macro. This patch is based on earlier work by Kevin Hilman. Save/restore optimizations were also done by Kevin. Signed-off-by: Kevin Hilman Signed-off-by: Larry Bassel --- arch/arm64/Kconfig | 1 + arch/arm64/include/asm/thread_info.h | 1 + arch/arm64/kernel/entry.S| 48 3 files changed, 50 insertions(+) diff --git a/arch/arm64/Kconfig b/arch/arm64/Kconfig index e759af5..ef18ae5 100644 --- a/arch/arm64/Kconfig +++ b/arch/arm64/Kconfig @@ -55,6 +55,7 @@ config ARM64 select RTC_LIB select SPARSE_IRQ select SYSCTL_EXCEPTION_TRACE + select HAVE_CONTEXT_TRACKING help ARM 64-bit (AArch64) Linux support. diff --git a/arch/arm64/include/asm/thread_info.h b/arch/arm64/include/asm/thread_info.h index 720e70b..301ea6a 100644 --- a/arch/arm64/include/asm/thread_info.h +++ b/arch/arm64/include/asm/thread_info.h @@ -108,6 +108,7 @@ static inline struct thread_info *current_thread_info(void) #define TIF_SINGLESTEP 21 #define TIF_32BIT 22 /* 32bit process */ #define TIF_SWITCH_MM 23 /* deferred switch_mm */ +#define TIF_NOHZ24 #define _TIF_SIGPENDING(1 << TIF_SIGPENDING) #define _TIF_NEED_RESCHED (1 << TIF_NEED_RESCHED) diff --git a/arch/arm64/kernel/entry.S b/arch/arm64/kernel/entry.S index 20b336e..520da4c 100644 --- a/arch/arm64/kernel/entry.S +++ b/arch/arm64/kernel/entry.S @@ -30,6 +30,44 @@ #include /* + * Context tracking subsystem. Used to instrument transitions + * between user and kernel mode. + */ + .macro ct_user_exit, restore = 0 +#ifdef CONFIG_CONTEXT_TRACKING + bl context_tracking_user_exit + .if \restore == 1 + /* +* Save/restore needed during syscalls. Restore syscall arguments from +* the values already saved on stack during kernel_entry. +*/ + ldp x0, x1, [sp] + ldp x2, x3, [sp, #S_X2] + ldp x4, x5, [sp, #S_X4] + ldp x6, x7, [sp, #S_X6] + .endif +#endif + .endm + + .macro ct_user_enter, save = 0 +#ifdef CONFIG_CONTEXT_TRACKING + .if \save == 1 + /* +* Save/restore only needed on syscall fastpath, which uses +* x0-x2. +*/ + pushx2, x3 + pushx0, x1 + .endif + bl context_tracking_user_enter + .if \save == 1 + pop x0, x1 + pop x2, x3 + .endif +#endif + .endm + +/* * Bad Abort numbers *- */ @@ -91,6 +129,7 @@ .macro kernel_exit, el, ret = 0 ldp x21, x22, [sp, #S_PC] // load ELR, SPSR .if \el == 0 + ct_user_enter \ret ldr x23, [sp, #S_SP]// load return stack pointer .endif .if \ret @@ -318,6 +357,7 @@ el1_irq: bl trace_hardirqs_off #endif + ct_user_exit irq_handler #ifdef CONFIG_PREEMPT @@ -427,6 +467,7 @@ el0_da: mrs x26, far_el1 // enable interrupts before calling the main handler enable_dbg_and_irq + ct_user_exit mov x0, x26 bic x0, x0, #(0xff << 56) mov x1, x25 @@ -440,6 +481,7 @@ el0_ia: mrs x26, far_el1 // enable interrupts before calling the main handler enable_dbg_and_irq + ct_user_exit mov x0, x26 orr x1, x25, #1 << 24 // use reserved ISS bit for instruction aborts mov x2, sp @@ -450,6 +492,7 @@ el0_fpsimd_acc: * Floating Point or Advanced SIMD access */ enable_dbg + ct_user_exit mov x0, x25 mov x1, sp adr lr, ret_to_user @@ -459,6 +502,7 @@ el0_fpsimd_exc: * Floating Point or Advanced SIMD exception */ enable_dbg + ct_user_exit mov x0, x25 mov x1, sp adr lr, ret_to_user @@ -481,6 +525,7 @@ el0_undef: */ // enable interrupts before calling the main handler enable_dbg_and_irq + ct_user_exit mov x0, sp adr lr, ret_to_user b do_undefinstr @@ -495,10 +540,12 @@ el0_dbg: mov x2, sp bl do_debug_exception enable_dbg + ct_user_exit mov x0, x26 b ret_to_user el0_inv: enable_dbg + c
[PATCH v4 1/2] arm64: adjust el0_sync so that a function can be called
To implement the context tracker properly on arm64, a function call needs to be made after debugging and interrupts are turned on, but before the lr is changed to point to ret_to_user(). If the function call is made after the lr is changed the function will not return to the correct place. For similar reasons, defer the setting of x0 so that it doesn't need to be saved around the function call (save far_el1 in x26 temporarily instead). Signed-off-by: Larry Bassel --- arch/arm64/kernel/entry.S | 24 +--- 1 file changed, 17 insertions(+), 7 deletions(-) diff --git a/arch/arm64/kernel/entry.S b/arch/arm64/kernel/entry.S index e8b23a3..20b336e 100644 --- a/arch/arm64/kernel/entry.S +++ b/arch/arm64/kernel/entry.S @@ -354,7 +354,6 @@ el0_sync: lsr x24, x25, #ESR_EL1_EC_SHIFT // exception class cmp x24, #ESR_EL1_EC_SVC64 // SVC in 64-bit state b.eqel0_svc - adr lr, ret_to_user cmp x24, #ESR_EL1_EC_DABT_EL0 // data abort in EL0 b.eqel0_da cmp x24, #ESR_EL1_EC_IABT_EL0 // instruction abort in EL0 @@ -383,7 +382,6 @@ el0_sync_compat: lsr x24, x25, #ESR_EL1_EC_SHIFT // exception class cmp x24, #ESR_EL1_EC_SVC32 // SVC in 32-bit state b.eqel0_svc_compat - adr lr, ret_to_user cmp x24, #ESR_EL1_EC_DABT_EL0 // data abort in EL0 b.eqel0_da cmp x24, #ESR_EL1_EC_IABT_EL0 // instruction abort in EL0 @@ -426,22 +424,26 @@ el0_da: /* * Data abort handling */ - mrs x0, far_el1 - bic x0, x0, #(0xff << 56) + mrs x26, far_el1 // enable interrupts before calling the main handler enable_dbg_and_irq + mov x0, x26 + bic x0, x0, #(0xff << 56) mov x1, x25 mov x2, sp + adr lr, ret_to_user b do_mem_abort el0_ia: /* * Instruction abort handling */ - mrs x0, far_el1 + mrs x26, far_el1 // enable interrupts before calling the main handler enable_dbg_and_irq + mov x0, x26 orr x1, x25, #1 << 24 // use reserved ISS bit for instruction aborts mov x2, sp + adr lr, ret_to_user b do_mem_abort el0_fpsimd_acc: /* @@ -450,6 +452,7 @@ el0_fpsimd_acc: enable_dbg mov x0, x25 mov x1, sp + adr lr, ret_to_user b do_fpsimd_acc el0_fpsimd_exc: /* @@ -458,16 +461,19 @@ el0_fpsimd_exc: enable_dbg mov x0, x25 mov x1, sp + adr lr, ret_to_user b do_fpsimd_exc el0_sp_pc: /* * Stack or PC alignment exception handling */ - mrs x0, far_el1 + mrs x26, far_el1 // enable interrupts before calling the main handler enable_dbg_and_irq + mov x0, x26 mov x1, x25 mov x2, sp + adr lr, ret_to_user b do_sp_pc_abort el0_undef: /* @@ -476,23 +482,27 @@ el0_undef: // enable interrupts before calling the main handler enable_dbg_and_irq mov x0, sp + adr lr, ret_to_user b do_undefinstr el0_dbg: /* * Debug exception handling */ tbnzx24, #0, el0_inv// EL0 only - mrs x0, far_el1 + mrs x26, far_el1 + mov x0, x26 mov x1, x25 mov x2, sp bl do_debug_exception enable_dbg + mov x0, x26 b ret_to_user el0_inv: enable_dbg mov x0, sp mov x1, #BAD_SYNC mrs x2, esr_el1 + adr lr, ret_to_user b bad_mode ENDPROC(el0_sync) -- 1.8.3.2 -- To unsubscribe from this list: send the line "unsubscribe linux-kernel" in the body of a message to majord...@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html Please read the FAQ at http://www.tux.org/lkml/
[PATCH v3] arm64: Support arch_irq_work_raise() via self IPIs
80 to 0xde05dec8) de80: 0001 0001 de05b440 c082afac de057ac0 de057ac0 de0443c0 dea0: c082afbc de05dec8 c009f2a0 c051c778 dec0: 2113 [] (__irq_svc+0x44/0x5c) from [] (_raw_spin_unlock_irq+0x28/0x2c) [] (_raw_spin_unlock_irq+0x28/0x2c) from [] (proc_alloc_inum+0x30/0xa8) [] (proc_alloc_inum+0x30/0xa8) from [] (proc_register+0x18/0x130) [] (proc_register+0x18/0x130) from [] (proc_mkdir_data+0x44/0x6c) [] (proc_mkdir_data+0x44/0x6c) from [] (register_irq_proc+0x6c/0x128) [] (register_irq_proc+0x6c/0x128) from [] (init_irq_proc+0x74/0xb0) [] (init_irq_proc+0x74/0xb0) from [] (kernel_init_freeable+0x84/0x1c8) [] (kernel_init_freeable+0x84/0x1c8) from [] (kernel_init+0x8/0x150) [] (kernel_init+0x8/0x150) from [] (ret_from_fork+0x14/0x2c) Code: bad PC value Fixes: bf18525fd79 "ARM: 7872/1: Support arch_irq_work_raise() via self IPIs" Reported-by: Olof Johansson Signed-off-by: Stephen Boyd Tested-by: Olof Johansson Signed-off-by: Russell King Changes v2 to v3: * Do not call is_smp() as this is only defined on arm32 Changes v1 to v2: * Include ARM 7887/1 bugfix Signed-off-by: Larry Bassel Reviewed-by: Kevin Hilman --- arch/arm64/include/asm/hardirq.h | 2 +- arch/arm64/kernel/smp.c | 19 +++ 2 files changed, 20 insertions(+), 1 deletion(-) diff --git a/arch/arm64/include/asm/hardirq.h b/arch/arm64/include/asm/hardirq.h index ae4801d..0be6782 100644 --- a/arch/arm64/include/asm/hardirq.h +++ b/arch/arm64/include/asm/hardirq.h @@ -20,7 +20,7 @@ #include #include -#define NR_IPI 5 +#define NR_IPI 6 typedef struct { unsigned int __softirq_pending; diff --git a/arch/arm64/kernel/smp.c b/arch/arm64/kernel/smp.c index f0a141d..049aa8d 100644 --- a/arch/arm64/kernel/smp.c +++ b/arch/arm64/kernel/smp.c @@ -35,6 +35,7 @@ #include #include #include +#include #include #include @@ -62,6 +63,7 @@ enum ipi_msg_type { IPI_CALL_FUNC_SINGLE, IPI_CPU_STOP, IPI_TIMER, + IPI_IRQ_WORK, }; /* @@ -455,6 +457,14 @@ void arch_send_call_function_single_ipi(int cpu) smp_cross_call(cpumask_of(cpu), IPI_CALL_FUNC_SINGLE); } +#ifdef CONFIG_IRQ_WORK +void arch_irq_work_raise(void) +{ + if (smp_cross_call) + smp_cross_call(cpumask_of(smp_processor_id()), IPI_IRQ_WORK); +} +#endif + static const char *ipi_types[NR_IPI] = { #define S(x,s) [x - IPI_RESCHEDULE] = s S(IPI_RESCHEDULE, "Rescheduling interrupts"), @@ -462,6 +472,7 @@ static const char *ipi_types[NR_IPI] = { S(IPI_CALL_FUNC_SINGLE, "Single function call interrupts"), S(IPI_CPU_STOP, "CPU stop interrupts"), S(IPI_TIMER, "Timer broadcast interrupts"), + S(IPI_IRQ_WORK, "IRQ work interrupts"), }; void show_ipi_list(struct seq_file *p, int prec) @@ -554,6 +565,14 @@ void handle_IPI(int ipinr, struct pt_regs *regs) break; #endif +#ifdef CONFIG_IRQ_WORK + case IPI_IRQ_WORK: + irq_enter(); + irq_work_run(); + irq_exit(); + break; +#endif + default: pr_crit("CPU%u: Unknown IPI message 0x%x\n", cpu, ipinr); break; -- 1.8.3.2 -- To unsubscribe from this list: send the line "unsubscribe linux-kernel" in the body of a message to majord...@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html Please read the FAQ at http://www.tux.org/lkml/
Re: [PATCH v2] arm64: Support arch_irq_work_raise() via self IPIs
On 12 May 14 10:29, Will Deacon wrote: > On Sat, May 10, 2014 at 11:23:41PM +0100, Larry Bassel wrote: > > Support for arch_irq_work_raise() was missing from > > arm64 (a prerequisite for FULL_NOHZ). > > [...] > > > @@ -455,6 +457,14 @@ void arch_send_call_function_single_ipi(int cpu) > > smp_cross_call(cpumask_of(cpu), IPI_CALL_FUNC_SINGLE); > > } > > > > +#ifdef CONFIG_IRQ_WORK > > +void arch_irq_work_raise(void) > > +{ > > + if (is_smp()) > > + smp_cross_call(cpumask_of(smp_processor_id()), IPI_IRQ_WORK); > > +} > > +#endif > > Does this even compile? We're probably better off just checking whether or > not smp_cross_call is NULL. No it doesn't (I incorrectly assumed that is_smp() was generic, not arm32 specific and so I didn't compile this before submitting). I've verified that your suggestion compiles and runs properly and will resubmit. Thanks for catching this. > > Will Larry -- To unsubscribe from this list: send the line "unsubscribe linux-kernel" in the body of a message to majord...@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html Please read the FAQ at http://www.tux.org/lkml/
[PATCH v2] arm64: Support arch_irq_work_raise() via self IPIs
ption stack(0xde05de80 to 0xde05dec8) de80: 0001 0001 de05b440 c082afac de057ac0 de057ac0 de0443c0 dea0: c082afbc de05dec8 c009f2a0 c051c778 dec0: 2113 [] (__irq_svc+0x44/0x5c) from [] (_raw_spin_unlock_irq+0x28/0x2c) [] (_raw_spin_unlock_irq+0x28/0x2c) from [] (proc_alloc_inum+0x30/0xa8) [] (proc_alloc_inum+0x30/0xa8) from [] (proc_register+0x18/0x130) [] (proc_register+0x18/0x130) from [] (proc_mkdir_data+0x44/0x6c) [] (proc_mkdir_data+0x44/0x6c) from [] (register_irq_proc+0x6c/0x128) [] (register_irq_proc+0x6c/0x128) from [] (init_irq_proc+0x74/0xb0) [] (init_irq_proc+0x74/0xb0) from [] (kernel_init_freeable+0x84/0x1c8) [] (kernel_init_freeable+0x84/0x1c8) from [] (kernel_init+0x8/0x150) [] (kernel_init+0x8/0x150) from [] (ret_from_fork+0x14/0x2c) Code: bad PC value Fixes: bf18525fd79 "ARM: 7872/1: Support arch_irq_work_raise() via self IPIs" Reported-by: Olof Johansson Signed-off-by: Stephen Boyd Tested-by: Olof Johansson Signed-off-by: Russell King Changes v1 to v2: * Include ARM 7887/1 bugfix Signed-off-by: Larry Bassel Reviewed-by: Kevin Hilman --- arch/arm64/include/asm/hardirq.h | 2 +- arch/arm64/kernel/smp.c | 19 +++ 2 files changed, 20 insertions(+), 1 deletion(-) diff --git a/arch/arm64/include/asm/hardirq.h b/arch/arm64/include/asm/hardirq.h index ae4801d..0be6782 100644 --- a/arch/arm64/include/asm/hardirq.h +++ b/arch/arm64/include/asm/hardirq.h @@ -20,7 +20,7 @@ #include #include -#define NR_IPI 5 +#define NR_IPI 6 typedef struct { unsigned int __softirq_pending; diff --git a/arch/arm64/kernel/smp.c b/arch/arm64/kernel/smp.c index f0a141d..78c3f97 100644 --- a/arch/arm64/kernel/smp.c +++ b/arch/arm64/kernel/smp.c @@ -35,6 +35,7 @@ #include #include #include +#include #include #include @@ -62,6 +63,7 @@ enum ipi_msg_type { IPI_CALL_FUNC_SINGLE, IPI_CPU_STOP, IPI_TIMER, + IPI_IRQ_WORK, }; /* @@ -455,6 +457,14 @@ void arch_send_call_function_single_ipi(int cpu) smp_cross_call(cpumask_of(cpu), IPI_CALL_FUNC_SINGLE); } +#ifdef CONFIG_IRQ_WORK +void arch_irq_work_raise(void) +{ + if (is_smp()) + smp_cross_call(cpumask_of(smp_processor_id()), IPI_IRQ_WORK); +} +#endif + static const char *ipi_types[NR_IPI] = { #define S(x,s) [x - IPI_RESCHEDULE] = s S(IPI_RESCHEDULE, "Rescheduling interrupts"), @@ -462,6 +472,7 @@ static const char *ipi_types[NR_IPI] = { S(IPI_CALL_FUNC_SINGLE, "Single function call interrupts"), S(IPI_CPU_STOP, "CPU stop interrupts"), S(IPI_TIMER, "Timer broadcast interrupts"), + S(IPI_IRQ_WORK, "IRQ work interrupts"), }; void show_ipi_list(struct seq_file *p, int prec) @@ -554,6 +565,14 @@ void handle_IPI(int ipinr, struct pt_regs *regs) break; #endif +#ifdef CONFIG_IRQ_WORK + case IPI_IRQ_WORK: + irq_enter(); + irq_work_run(); + irq_exit(); + break; +#endif + default: pr_crit("CPU%u: Unknown IPI message 0x%x\n", cpu, ipinr); break; -- 1.8.3.2 -- To unsubscribe from this list: send the line "unsubscribe linux-kernel" in the body of a message to majord...@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html Please read the FAQ at http://www.tux.org/lkml/
Re: [PATCH] arm64: Support arch_irq_work_raise() via self IPIs
On 09 May 14 16:57, Catalin Marinas wrote: > On Mon, May 05, 2014 at 09:48:27PM +0100, Larry Bassel wrote: > > Support for arch_irq_work_raise() was missing from > > arm64 (a prerequisite for FULL_NOHZ). > > > > This patch is based on the arm32 patch ARM 7872/1 > > which ports cleanly. > [...] > > +#ifdef CONFIG_IRQ_WORK > > +void arch_irq_work_raise(void) > > +{ > > + smp_cross_call(cpumask_of(smp_processor_id()), IPI_IRQ_WORK); > > +} > > +#endif > > There was a subsequent patch adding is_smp() check here (c682e51dbc98 > ARM: 7887/1: Don't smp_cross_call() on UP devices in > arch_irq_work_raise()). Don't we need it? I will look into this. Thanks. > > -- > Catalin Larry -- To unsubscribe from this list: send the line "unsubscribe linux-kernel" in the body of a message to majord...@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html Please read the FAQ at http://www.tux.org/lkml/
[PATCH v3 1/2] arm64: adjust el0_sync so that a function can be called
To implement the context tracker properly on arm64, a function call needs to be made after debugging and interrupts are turned on, but before the lr is changed to point to ret_from_exception(). If the function call is made after the lr is changed the function will not return to the correct place. For similar reasons, defer the setting of x0 so that it doesn't need to be saved around the function call (save far_el1 in x26 temporarily instead). Signed-off-by: Larry Bassel Reviewed-by: Kevin Hilman --- arch/arm64/kernel/entry.S | 27 +++ 1 file changed, 19 insertions(+), 8 deletions(-) diff --git a/arch/arm64/kernel/entry.S b/arch/arm64/kernel/entry.S index 39ac630..136bb7d 100644 --- a/arch/arm64/kernel/entry.S +++ b/arch/arm64/kernel/entry.S @@ -349,7 +349,6 @@ el0_sync: lsr x24, x25, #ESR_EL1_EC_SHIFT // exception class cmp x24, #ESR_EL1_EC_SVC64 // SVC in 64-bit state b.eqel0_svc - adr lr, ret_from_exception cmp x24, #ESR_EL1_EC_DABT_EL0 // data abort in EL0 b.eqel0_da cmp x24, #ESR_EL1_EC_IABT_EL0 // instruction abort in EL0 @@ -378,7 +377,6 @@ el0_sync_compat: lsr x24, x25, #ESR_EL1_EC_SHIFT // exception class cmp x24, #ESR_EL1_EC_SVC32 // SVC in 32-bit state b.eqel0_svc_compat - adr lr, ret_from_exception cmp x24, #ESR_EL1_EC_DABT_EL0 // data abort in EL0 b.eqel0_da cmp x24, #ESR_EL1_EC_IABT_EL0 // instruction abort in EL0 @@ -421,28 +419,32 @@ el0_da: /* * Data abort handling */ - mrs x0, far_el1 - bic x0, x0, #(0xff << 56) + mrs x26, far_el1 disable_step x1 isb enable_dbg // enable interrupts before calling the main handler enable_irq + mov x0, x26 + bic x0, x0, #(0xff << 56) mov x1, x25 mov x2, sp + adr lr, ret_from_exception b do_mem_abort el0_ia: /* * Instruction abort handling */ - mrs x0, far_el1 + mrs x26, far_el1 disable_step x1 isb enable_dbg // enable interrupts before calling the main handler enable_irq + mov x0, x26 orr x1, x25, #1 << 24 // use reserved ISS bit for instruction aborts mov x2, sp + adr lr, ret_from_exception b do_mem_abort el0_fpsimd_acc: /* @@ -450,6 +452,7 @@ el0_fpsimd_acc: */ mov x0, x25 mov x1, sp + adr lr, ret_from_exception b do_fpsimd_acc el0_fpsimd_exc: /* @@ -457,42 +460,50 @@ el0_fpsimd_exc: */ mov x0, x25 mov x1, sp + adr lr, ret_from_exception b do_fpsimd_exc el0_sp_pc: /* * Stack or PC alignment exception handling */ - mrs x0, far_el1 + mrs x26, far_el1 disable_step x1 isb enable_dbg // enable interrupts before calling the main handler enable_irq + mov x0, x26 mov x1, x25 mov x2, sp + adr lr, ret_from_exception b do_sp_pc_abort el0_undef: /* * Undefined instruction */ - mov x0, sp + mov x26, sp // enable interrupts before calling the main handler enable_irq + mov x0, x26 + adr lr, ret_from_exception b do_undefinstr el0_dbg: /* * Debug exception handling */ tbnzx24, #0, el0_inv// EL0 only - mrs x0, far_el1 + mrs x26, far_el1 disable_step x1 + mov x0, x26 mov x1, x25 mov x2, sp + adr lr, ret_from_exception b do_debug_exception el0_inv: mov x0, sp mov x1, #BAD_SYNC mrs x2, esr_el1 + adr lr, ret_from_exception b bad_mode ENDPROC(el0_sync) -- 1.8.3.2 -- To unsubscribe from this list: send the line "unsubscribe linux-kernel" in the body of a message to majord...@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html Please read the FAQ at http://www.tux.org/lkml/
[PATCH v3 2/2] arm64: enable context tracking
Make calls to ct_user_enter when the kernel is exited and ct_user_exit when the kernel is entered (in el0_da, el0_ia, el0_svc, el0_irq and all of the "error" paths). These macros expand to function calls which will only work properly if el0_sync and related code has been rearranged (in a previous patch of this series). The calls to ct_user_exit are made after hw debugging has been enabled (enable_dbg). The call to ct_user_enter is made at the beginning of the kernel_exit macro. This patch is based on earlier work by Kevin Hilman. Signed-off-by: Kevin Hilman Signed-off-by: Larry Bassel --- arch/arm64/Kconfig | 1 + arch/arm64/include/asm/thread_info.h | 1 + arch/arm64/kernel/entry.S| 49 3 files changed, 51 insertions(+) diff --git a/arch/arm64/Kconfig b/arch/arm64/Kconfig index e6e4d37..152d92b 100644 --- a/arch/arm64/Kconfig +++ b/arch/arm64/Kconfig @@ -55,6 +55,7 @@ config ARM64 select RTC_LIB select SPARSE_IRQ select SYSCTL_EXCEPTION_TRACE + select HAVE_CONTEXT_TRACKING help ARM 64-bit (AArch64) Linux support. diff --git a/arch/arm64/include/asm/thread_info.h b/arch/arm64/include/asm/thread_info.h index 720e70b..301ea6a 100644 --- a/arch/arm64/include/asm/thread_info.h +++ b/arch/arm64/include/asm/thread_info.h @@ -108,6 +108,7 @@ static inline struct thread_info *current_thread_info(void) #define TIF_SINGLESTEP 21 #define TIF_32BIT 22 /* 32bit process */ #define TIF_SWITCH_MM 23 /* deferred switch_mm */ +#define TIF_NOHZ24 #define _TIF_SIGPENDING(1 << TIF_SIGPENDING) #define _TIF_NEED_RESCHED (1 << TIF_NEED_RESCHED) diff --git a/arch/arm64/kernel/entry.S b/arch/arm64/kernel/entry.S index 136bb7d..c839bab 100644 --- a/arch/arm64/kernel/entry.S +++ b/arch/arm64/kernel/entry.S @@ -30,6 +30,44 @@ #include /* + * Context tracking subsystem. Used to instrument transitions + * between user and kernel mode. + */ + .macro ct_user_exit, save = 0 +#ifdef CONFIG_CONTEXT_TRACKING + bl context_tracking_user_exit + .if \save == 1 + /* +* save/restore needed during syscalls. Restore syscall arguments from +* the values already saved on stack during kernel_entry +*/ + ldp x0, x1, [sp] + ldp x2, x3, [sp, #S_X2] + ldp x4, x5, [sp, #S_X4] + ldp x6, x7, [sp, #S_X6] + .endif +#endif + .endm + + .macro ct_user_enter, save = 0 +#ifdef CONFIG_CONTEXT_TRACKING + .if \save == 1 + /* +* save/restore only needed on syscall fastpath, which uses +* x0-x2 +*/ + pushx2, x3 + pushx0, x1 + .endif + bl context_tracking_user_enter + .if \save == 1 + pop x0, x1 + pop x2, x3 + .endif +#endif + .endm + +/* * Bad Abort numbers *- */ @@ -88,6 +126,7 @@ .macro kernel_exit, el, ret = 0 ldp x21, x22, [sp, #S_PC] // load ELR, SPSR .if \el == 0 + ct_user_enter \ret ldr x23, [sp, #S_SP]// load return stack pointer .endif .if \ret @@ -425,6 +464,7 @@ el0_da: enable_dbg // enable interrupts before calling the main handler enable_irq + ct_user_exit mov x0, x26 bic x0, x0, #(0xff << 56) mov x1, x25 @@ -441,6 +481,7 @@ el0_ia: enable_dbg // enable interrupts before calling the main handler enable_irq + ct_user_exit mov x0, x26 orr x1, x25, #1 << 24 // use reserved ISS bit for instruction aborts mov x2, sp @@ -450,6 +491,7 @@ el0_fpsimd_acc: /* * Floating Point or Advanced SIMD access */ + ct_user_exit mov x0, x25 mov x1, sp adr lr, ret_from_exception @@ -458,6 +500,7 @@ el0_fpsimd_exc: /* * Floating Point or Advanced SIMD exception */ + ct_user_exit mov x0, x25 mov x1, sp adr lr, ret_from_exception @@ -472,6 +515,7 @@ el0_sp_pc: enable_dbg // enable interrupts before calling the main handler enable_irq + ct_user_exit mov x0, x26 mov x1, x25 mov x2, sp @@ -484,6 +528,7 @@ el0_undef: mov x26, sp // enable interrupts before calling the main handler enable_irq + ct_user_exit mov x0, x26 adr lr, ret_from_exception b do_undefinstr @@ -494,12 +539,14 @@ el0_dbg: tbnzx24, #0, el0_inv// EL0 only mrs x26, far_el1 disable_step x1 + ct_user_exit mov x0, x26 mov x1, x25
[PATCH v3 0/2] context tracker support for arm64
Implement and enable context tracking for arm64 (which is a prerequisite for FULL_NOHZ support). This patchset builds upon earlier work by Kevin Hilman and is based on 3.15-rc2. Changes v2 to v3: * Save/restore necessary registers in ct_user_enter and ct_user_exit * Annotate "error paths" out of el0_sync with ct_user_exit Changes v1 to v2: * Save far_el1 in x26 temporarily Larry Bassel (2): arm64: adjust el0_sync so that a function can be called arm64: enable context tracking arch/arm64/Kconfig | 1 + arch/arm64/include/asm/thread_info.h | 1 + arch/arm64/kernel/entry.S| 76 3 files changed, 70 insertions(+), 8 deletions(-) -- 1.8.3.2 -- To unsubscribe from this list: send the line "unsubscribe linux-kernel" in the body of a message to majord...@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html Please read the FAQ at http://www.tux.org/lkml/
Re: [PATCH v2 2/2] arm64: enable context tracking
On 07 May 14 11:17, Will Deacon wrote: > On Wed, May 07, 2014 at 12:32:29AM +0100, Larry Bassel wrote: > > Make calls to ct_user_enter when the kernel is exited > > and ct_user_exit when the kernel is entered (in el0_da, > > el0_ia, el0_svc, el0_irq). > > Why only these entry points? I can reschedule after any exception from EL0, > so I'd expect all exceptions from userspace to need annotating, no? > > > These macros expand to function calls which will only work > > properly if el0_sync and related code has been rearranged > > (in a previous patch of this series). > > > > In order to avoid saving registers, the slow syscall path > > is forced (as x86 does). > > ... and if you decide to handle undef exceptions, I think you'll need > the register saving too, in case the kernel needs to perform emulation. These are excellent points, I will rework the patch and submit v3. Thanks for the feedback. > > Will Larry -- To unsubscribe from this list: send the line "unsubscribe linux-kernel" in the body of a message to majord...@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html Please read the FAQ at http://www.tux.org/lkml/
[PATCH v2 0/2] context tracker support for arm64
Implement and enable context tracking for arm64 (which is a prerequisite for FULL_NOHZ support). This patchset builds upon earlier work by Kevin Hilman and is based on 3.15-rc2. Larry Bassel (2): arm64: adjust el0_sync so that a function can be called arm64: enable context tracking arch/arm64/Kconfig | 1 + arch/arm64/include/asm/thread_info.h | 1 + arch/arm64/kernel/entry.S| 36 +++- 3 files changed, 33 insertions(+), 5 deletions(-) -- 1.8.3.2 -- To unsubscribe from this list: send the line "unsubscribe linux-kernel" in the body of a message to majord...@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html Please read the FAQ at http://www.tux.org/lkml/
[PATCH v2 1/2] arm64: adjust el0_sync so that a function can be called
To implement the context tracker properly on arm64, a function call needs to be made after debugging and interrupts are turned on, but before the lr is changed to point to ret_from_exception(). If the function call is made after the lr is changed the function will not return to the correct place. For similar reasons, defer the setting of x0 so that it doesn't need to be saved around the function call (save far_el1 in x26 temporarily instead). Signed-off-by: Larry Bassel Reviewed-by: Kevin Hilman --- arch/arm64/kernel/entry.S | 14 +- 1 file changed, 9 insertions(+), 5 deletions(-) diff --git a/arch/arm64/kernel/entry.S b/arch/arm64/kernel/entry.S index 39ac630..d920d7f 100644 --- a/arch/arm64/kernel/entry.S +++ b/arch/arm64/kernel/entry.S @@ -349,11 +349,11 @@ el0_sync: lsr x24, x25, #ESR_EL1_EC_SHIFT // exception class cmp x24, #ESR_EL1_EC_SVC64 // SVC in 64-bit state b.eqel0_svc - adr lr, ret_from_exception cmp x24, #ESR_EL1_EC_DABT_EL0 // data abort in EL0 b.eqel0_da cmp x24, #ESR_EL1_EC_IABT_EL0 // instruction abort in EL0 b.eqel0_ia + adr lr, ret_from_exception cmp x24, #ESR_EL1_EC_FP_ASIMD // FP/ASIMD access b.eqel0_fpsimd_acc cmp x24, #ESR_EL1_EC_FP_EXC64 // FP/ASIMD exception @@ -378,11 +378,11 @@ el0_sync_compat: lsr x24, x25, #ESR_EL1_EC_SHIFT // exception class cmp x24, #ESR_EL1_EC_SVC32 // SVC in 32-bit state b.eqel0_svc_compat - adr lr, ret_from_exception cmp x24, #ESR_EL1_EC_DABT_EL0 // data abort in EL0 b.eqel0_da cmp x24, #ESR_EL1_EC_IABT_EL0 // instruction abort in EL0 b.eqel0_ia + adr lr, ret_from_exception cmp x24, #ESR_EL1_EC_FP_ASIMD // FP/ASIMD access b.eqel0_fpsimd_acc cmp x24, #ESR_EL1_EC_FP_EXC32 // FP/ASIMD exception @@ -421,28 +421,32 @@ el0_da: /* * Data abort handling */ - mrs x0, far_el1 - bic x0, x0, #(0xff << 56) + mrs x26, far_el1 disable_step x1 isb enable_dbg // enable interrupts before calling the main handler enable_irq + mov x0, x26 + bic x0, x0, #(0xff << 56) mov x1, x25 mov x2, sp + adr lr, ret_from_exception b do_mem_abort el0_ia: /* * Instruction abort handling */ - mrs x0, far_el1 + mrs x26, far_el1 disable_step x1 isb enable_dbg // enable interrupts before calling the main handler enable_irq + mov x0, x26 orr x1, x25, #1 << 24 // use reserved ISS bit for instruction aborts mov x2, sp + adr lr, ret_from_exception b do_mem_abort el0_fpsimd_acc: /* -- 1.8.3.2 -- To unsubscribe from this list: send the line "unsubscribe linux-kernel" in the body of a message to majord...@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html Please read the FAQ at http://www.tux.org/lkml/
[PATCH v2 2/2] arm64: enable context tracking
Make calls to ct_user_enter when the kernel is exited and ct_user_exit when the kernel is entered (in el0_da, el0_ia, el0_svc, el0_irq). These macros expand to function calls which will only work properly if el0_sync and related code has been rearranged (in a previous patch of this series). In order to avoid saving registers, the slow syscall path is forced (as x86 does). The calls to ct_user_exit are made after hw debugging has been enabled (enable_dbg). The call to ct_user_enter is made at the beginning of the kernel_exit macro. This patch is based on earlier work by Kevin Hilman. Signed-off-by: Kevin Hilman Signed-off-by: Larry Bassel --- arch/arm64/Kconfig | 1 + arch/arm64/include/asm/thread_info.h | 1 + arch/arm64/kernel/entry.S| 22 ++ 3 files changed, 24 insertions(+) diff --git a/arch/arm64/Kconfig b/arch/arm64/Kconfig index e6e4d37..152d92b 100644 --- a/arch/arm64/Kconfig +++ b/arch/arm64/Kconfig @@ -55,6 +55,7 @@ config ARM64 select RTC_LIB select SPARSE_IRQ select SYSCTL_EXCEPTION_TRACE + select HAVE_CONTEXT_TRACKING help ARM 64-bit (AArch64) Linux support. diff --git a/arch/arm64/include/asm/thread_info.h b/arch/arm64/include/asm/thread_info.h index 720e70b..301ea6a 100644 --- a/arch/arm64/include/asm/thread_info.h +++ b/arch/arm64/include/asm/thread_info.h @@ -108,6 +108,7 @@ static inline struct thread_info *current_thread_info(void) #define TIF_SINGLESTEP 21 #define TIF_32BIT 22 /* 32bit process */ #define TIF_SWITCH_MM 23 /* deferred switch_mm */ +#define TIF_NOHZ24 #define _TIF_SIGPENDING(1 << TIF_SIGPENDING) #define _TIF_NEED_RESCHED (1 << TIF_NEED_RESCHED) diff --git a/arch/arm64/kernel/entry.S b/arch/arm64/kernel/entry.S index d920d7f..5fe447c 100644 --- a/arch/arm64/kernel/entry.S +++ b/arch/arm64/kernel/entry.S @@ -30,6 +30,22 @@ #include /* + * Context tracking subsystem. Used to instrument transitions + * between user and kernel mode. + */ + .macro ct_user_exit +#ifdef CONFIG_CONTEXT_TRACKING + bl context_tracking_user_exit +#endif + .endm + + .macro ct_user_enter +#ifdef CONFIG_CONTEXT_TRACKING + bl context_tracking_user_enter +#endif + .endm + +/* * Bad Abort numbers *- */ @@ -88,6 +104,7 @@ .macro kernel_exit, el, ret = 0 ldp x21, x22, [sp, #S_PC] // load ELR, SPSR .if \el == 0 + ct_user_enter ldr x23, [sp, #S_SP]// load return stack pointer .endif .if \ret @@ -427,6 +444,7 @@ el0_da: enable_dbg // enable interrupts before calling the main handler enable_irq + ct_user_exit mov x0, x26 bic x0, x0, #(0xff << 56) mov x1, x25 @@ -443,6 +461,7 @@ el0_ia: enable_dbg // enable interrupts before calling the main handler enable_irq + ct_user_exit mov x0, x26 orr x1, x25, #1 << 24 // use reserved ISS bit for instruction aborts mov x2, sp @@ -511,6 +530,7 @@ el0_irq_naked: bl trace_hardirqs_off #endif + ct_user_exit irq_handler get_thread_info tsk @@ -633,10 +653,12 @@ el0_svc_naked:// compat entry point isb enable_dbg enable_irq + ct_user_exit get_thread_info tsk ldr x16, [tsk, #TI_FLAGS] // check for syscall tracing tbnzx16, #TIF_SYSCALL_TRACE, __sys_trace // are we tracing syscalls? + tbnzx16, #TIF_NOHZ, __sys_trace adr lr, ret_fast_syscall// return address cmp scno, sc_nr // check upper syscall limit b.hsni_sys -- 1.8.3.2 -- To unsubscribe from this list: send the line "unsubscribe linux-kernel" in the body of a message to majord...@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html Please read the FAQ at http://www.tux.org/lkml/
[PATCH] arm64: Support arch_irq_work_raise() via self IPIs
Support for arch_irq_work_raise() was missing from arm64 (a prerequisite for FULL_NOHZ). This patch is based on the arm32 patch ARM 7872/1 which ports cleanly. commit bf18525fd793101df42a1344ecc48b49b62e48c9 Author: Stephen Boyd Date: Tue Oct 29 20:32:56 2013 +0100 ARM: 7872/1: Support arch_irq_work_raise() via self IPIs By default, IRQ work is run from the tick interrupt (see irq_work_run() in update_process_times()). When we're in full NOHZ mode, restarting the tick requires the use of IRQ work and if the only place we run IRQ work is in the tick interrupt we have an unbreakable cycle. Implement arch_irq_work_raise() via self IPIs to break this cycle and get the tick started again. Note that we implement this via IPIs which are only available on SMP builds. This shouldn't be a problem because full NOHZ is only supported on SMP builds anyway. Signed-off-by: Stephen Boyd Reviewed-by: Kevin Hilman Cc: Frederic Weisbecker Signed-off-by: Russell King Signed-off-by: Larry Bassel Reviewed-by: Kevin Hilman --- arch/arm64/include/asm/hardirq.h | 2 +- arch/arm64/kernel/smp.c | 18 ++ 2 files changed, 19 insertions(+), 1 deletion(-) diff --git a/arch/arm64/include/asm/hardirq.h b/arch/arm64/include/asm/hardirq.h index ae4801d..0be6782 100644 --- a/arch/arm64/include/asm/hardirq.h +++ b/arch/arm64/include/asm/hardirq.h @@ -20,7 +20,7 @@ #include #include -#define NR_IPI 5 +#define NR_IPI 6 typedef struct { unsigned int __softirq_pending; diff --git a/arch/arm64/kernel/smp.c b/arch/arm64/kernel/smp.c index f0a141d..20fd074 100644 --- a/arch/arm64/kernel/smp.c +++ b/arch/arm64/kernel/smp.c @@ -35,6 +35,7 @@ #include #include #include +#include #include #include @@ -62,6 +63,7 @@ enum ipi_msg_type { IPI_CALL_FUNC_SINGLE, IPI_CPU_STOP, IPI_TIMER, + IPI_IRQ_WORK, }; /* @@ -455,6 +457,13 @@ void arch_send_call_function_single_ipi(int cpu) smp_cross_call(cpumask_of(cpu), IPI_CALL_FUNC_SINGLE); } +#ifdef CONFIG_IRQ_WORK +void arch_irq_work_raise(void) +{ + smp_cross_call(cpumask_of(smp_processor_id()), IPI_IRQ_WORK); +} +#endif + static const char *ipi_types[NR_IPI] = { #define S(x,s) [x - IPI_RESCHEDULE] = s S(IPI_RESCHEDULE, "Rescheduling interrupts"), @@ -462,6 +471,7 @@ static const char *ipi_types[NR_IPI] = { S(IPI_CALL_FUNC_SINGLE, "Single function call interrupts"), S(IPI_CPU_STOP, "CPU stop interrupts"), S(IPI_TIMER, "Timer broadcast interrupts"), + S(IPI_IRQ_WORK, "IRQ work interrupts"), }; void show_ipi_list(struct seq_file *p, int prec) @@ -554,6 +564,14 @@ void handle_IPI(int ipinr, struct pt_regs *regs) break; #endif +#ifdef CONFIG_IRQ_WORK + case IPI_IRQ_WORK: + irq_enter(); + irq_work_run(); + irq_exit(); + break; +#endif + default: pr_crit("CPU%u: Unknown IPI message 0x%x\n", cpu, ipinr); break; -- 1.8.3.2 -- To unsubscribe from this list: send the line "unsubscribe linux-kernel" in the body of a message to majord...@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html Please read the FAQ at http://www.tux.org/lkml/
[PATCH 3/3] arm64: enable context tracking
Make calls to ct_user_enter when the kernel is exited and ct_user_exit when the kernel is entered (in el0_da, el0_ia, el0_svc, el0_irq). These macros expand to function calls which will only work properly if el0_sync and related code has been rearranged (in a previous patch of this series). The calls to ct_user_exit are made after hw debugging has been enabled (enable_dbg). The call to ct_user_enter is made at the end of the kernel_exit macro. Signed-off-by: Kevin Hilman Signed-off-by: Larry Bassel --- arch/arm64/kernel/entry.S | 5 + 1 file changed, 5 insertions(+) diff --git a/arch/arm64/kernel/entry.S b/arch/arm64/kernel/entry.S index 76b09d8..e949435 100644 --- a/arch/arm64/kernel/entry.S +++ b/arch/arm64/kernel/entry.S @@ -104,6 +104,7 @@ .macro kernel_exit, el, ret = 0 ldp x21, x22, [sp, #S_PC] // load ELR, SPSR .if \el == 0 + ct_user_enter ldr x23, [sp, #S_SP]// load return stack pointer .endif .if \ret @@ -442,6 +443,7 @@ el0_da: enable_dbg // enable interrupts before calling the main handler enable_irq + ct_user_exit mrs x0, far_el1 bic x0, x0, #(0xff << 56) mov x1, x25 @@ -457,6 +459,7 @@ el0_ia: enable_dbg // enable interrupts before calling the main handler enable_irq + ct_user_exit mrs x0, far_el1 orr x1, x25, #1 << 24 // use reserved ISS bit for instruction aborts mov x2, sp @@ -525,6 +528,7 @@ el0_irq_naked: bl trace_hardirqs_off #endif + ct_user_exit irq_handler get_thread_info tsk @@ -647,6 +651,7 @@ el0_svc_naked: // compat entry point isb enable_dbg enable_irq + ct_user_exit get_thread_info tsk ldr x16, [tsk, #TI_FLAGS] // check for syscall tracing -- 1.8.3.2 -- To unsubscribe from this list: send the line "unsubscribe linux-kernel" in the body of a message to majord...@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html Please read the FAQ at http://www.tux.org/lkml/
[PATCH 2/3] arm64: add support for context tracking
From: Kevin Hilman Add the macros and defines needed to implement context tracking on arm64. Signed-off-by: Kevin Hilman Signed-off-by: Larry Bassel --- arch/arm64/Kconfig | 1 + arch/arm64/include/asm/thread_info.h | 1 + arch/arm64/kernel/entry.S| 16 3 files changed, 18 insertions(+) diff --git a/arch/arm64/Kconfig b/arch/arm64/Kconfig index e6e4d37..152d92b 100644 --- a/arch/arm64/Kconfig +++ b/arch/arm64/Kconfig @@ -55,6 +55,7 @@ config ARM64 select RTC_LIB select SPARSE_IRQ select SYSCTL_EXCEPTION_TRACE + select HAVE_CONTEXT_TRACKING help ARM 64-bit (AArch64) Linux support. diff --git a/arch/arm64/include/asm/thread_info.h b/arch/arm64/include/asm/thread_info.h index 720e70b..301ea6a 100644 --- a/arch/arm64/include/asm/thread_info.h +++ b/arch/arm64/include/asm/thread_info.h @@ -108,6 +108,7 @@ static inline struct thread_info *current_thread_info(void) #define TIF_SINGLESTEP 21 #define TIF_32BIT 22 /* 32bit process */ #define TIF_SWITCH_MM 23 /* deferred switch_mm */ +#define TIF_NOHZ24 #define _TIF_SIGPENDING(1 << TIF_SIGPENDING) #define _TIF_NEED_RESCHED (1 << TIF_NEED_RESCHED) diff --git a/arch/arm64/kernel/entry.S b/arch/arm64/kernel/entry.S index eda7755..76b09d8 100644 --- a/arch/arm64/kernel/entry.S +++ b/arch/arm64/kernel/entry.S @@ -30,6 +30,22 @@ #include /* + * Context tracking subsystem. Used to instrument transitions + * between user and kernel mode. + */ + .macro ct_user_exit +#ifdef CONFIG_CONTEXT_TRACKING + bl context_tracking_user_exit +#endif + .endm + + .macro ct_user_enter +#ifdef CONFIG_CONTEXT_TRACKING + bl context_tracking_user_enter +#endif + .endm + +/* * Bad Abort numbers *- */ -- 1.8.3.2 -- To unsubscribe from this list: send the line "unsubscribe linux-kernel" in the body of a message to majord...@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html Please read the FAQ at http://www.tux.org/lkml/
[PATCH 0/3] context tracker support for arm64
Implement and enable context tracking for arm64 (which is a prerequisite for FULL_NOHZ support). This patchset builds upon earlier work by Kevin Hilman and is based on 3.15-rc2. Kevin Hilman (1): arm64: add support for context tracking Larry Bassel (2): arm64: adjust el0_sync so that a function can be called arm64: enable context tracking arch/arm64/Kconfig | 1 + arch/arm64/include/asm/thread_info.h | 1 + arch/arm64/kernel/entry.S| 33 - 3 files changed, 30 insertions(+), 5 deletions(-) -- 1.8.3.2 -- To unsubscribe from this list: send the line "unsubscribe linux-kernel" in the body of a message to majord...@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html Please read the FAQ at http://www.tux.org/lkml/
[PATCH 1/3] arm64: adjust el0_sync so that a function can be called
To implement the context tracker properly on arm64, a function call needs to be made after debugging and interrupts are turned on, but before the lr is changed to point to ret_from_exception(). If the function call is made after the lr is changed the function will not return to the correct place. For similar reasons, defer the setting of x0 so that it doesn't need to be saved around the function call. Signed-off-by: Larry Bassel Reviewed-by: Kevin Hilman --- arch/arm64/kernel/entry.S | 12 +++- 1 file changed, 7 insertions(+), 5 deletions(-) diff --git a/arch/arm64/kernel/entry.S b/arch/arm64/kernel/entry.S index 39ac630..eda7755 100644 --- a/arch/arm64/kernel/entry.S +++ b/arch/arm64/kernel/entry.S @@ -349,11 +349,11 @@ el0_sync: lsr x24, x25, #ESR_EL1_EC_SHIFT // exception class cmp x24, #ESR_EL1_EC_SVC64 // SVC in 64-bit state b.eqel0_svc - adr lr, ret_from_exception cmp x24, #ESR_EL1_EC_DABT_EL0 // data abort in EL0 b.eqel0_da cmp x24, #ESR_EL1_EC_IABT_EL0 // instruction abort in EL0 b.eqel0_ia + adr lr, ret_from_exception cmp x24, #ESR_EL1_EC_FP_ASIMD // FP/ASIMD access b.eqel0_fpsimd_acc cmp x24, #ESR_EL1_EC_FP_EXC64 // FP/ASIMD exception @@ -378,11 +378,11 @@ el0_sync_compat: lsr x24, x25, #ESR_EL1_EC_SHIFT // exception class cmp x24, #ESR_EL1_EC_SVC32 // SVC in 32-bit state b.eqel0_svc_compat - adr lr, ret_from_exception cmp x24, #ESR_EL1_EC_DABT_EL0 // data abort in EL0 b.eqel0_da cmp x24, #ESR_EL1_EC_IABT_EL0 // instruction abort in EL0 b.eqel0_ia + adr lr, ret_from_exception cmp x24, #ESR_EL1_EC_FP_ASIMD // FP/ASIMD access b.eqel0_fpsimd_acc cmp x24, #ESR_EL1_EC_FP_EXC32 // FP/ASIMD exception @@ -421,28 +421,30 @@ el0_da: /* * Data abort handling */ - mrs x0, far_el1 - bic x0, x0, #(0xff << 56) disable_step x1 isb enable_dbg // enable interrupts before calling the main handler enable_irq + mrs x0, far_el1 + bic x0, x0, #(0xff << 56) mov x1, x25 mov x2, sp + adr lr, ret_from_exception b do_mem_abort el0_ia: /* * Instruction abort handling */ - mrs x0, far_el1 disable_step x1 isb enable_dbg // enable interrupts before calling the main handler enable_irq + mrs x0, far_el1 orr x1, x25, #1 << 24 // use reserved ISS bit for instruction aborts mov x2, sp + adr lr, ret_from_exception b do_mem_abort el0_fpsimd_acc: /* -- 1.8.3.2 -- To unsubscribe from this list: send the line "unsubscribe linux-kernel" in the body of a message to majord...@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html Please read the FAQ at http://www.tux.org/lkml/