[RFC PATCH v2 2/2] Implement sharing/unsharing of PMDs for FS/DAX

2019-06-07 Thread Larry Bassel
This is based on (but somewhat different from) what hugetlbfs
does to share/unshare page tables.

Signed-off-by: Larry Bassel 
---
 include/linux/hugetlb.h |   4 ++
 mm/huge_memory.c|  37 +
 mm/hugetlb.c|   8 ++--
 mm/memory.c | 108 +++-
 4 files changed, 152 insertions(+), 5 deletions(-)

diff --git a/include/linux/hugetlb.h b/include/linux/hugetlb.h
index edf476c..debff55 100644
--- a/include/linux/hugetlb.h
+++ b/include/linux/hugetlb.h
@@ -140,6 +140,10 @@ pte_t *huge_pte_offset(struct mm_struct *mm,
 int huge_pmd_unshare(struct mm_struct *mm, unsigned long *addr, pte_t *ptep);
 void adjust_range_if_pmd_sharing_possible(struct vm_area_struct *vma,
unsigned long *start, unsigned long *end);
+unsigned long page_table_shareable(struct vm_area_struct *svma,
+  struct vm_area_struct *vma,
+  unsigned long addr, pgoff_t idx);
+bool vma_shareable(struct vm_area_struct *vma, unsigned long addr);
 struct page *follow_huge_addr(struct mm_struct *mm, unsigned long address,
  int write);
 struct page *follow_huge_pd(struct vm_area_struct *vma,
diff --git a/mm/huge_memory.c b/mm/huge_memory.c
index 9f8bce9..935874c 100644
--- a/mm/huge_memory.c
+++ b/mm/huge_memory.c
@@ -1751,6 +1751,33 @@ static inline void zap_deposited_table(struct mm_struct 
*mm, pmd_t *pmd)
mm_dec_nr_ptes(mm);
 }
 
+#ifdef CONFIG_ARCH_HAS_HUGE_PMD_SHARE
+static int unshare_huge_pmd(struct mm_struct *mm, unsigned long addr,
+   pmd_t *pmdp)
+{
+   pgd_t *pgd = pgd_offset(mm, addr);
+   p4d_t *p4d = p4d_offset(pgd, addr);
+   pud_t *pud = pud_offset(p4d, addr);
+
+   WARN_ON(page_count(virt_to_page(pmdp)) == 0);
+   if (page_count(virt_to_page(pmdp)) == 1)
+   return 0;
+
+   pud_clear(pud);
+   put_page(virt_to_page(pmdp));
+   mm_dec_nr_pmds(mm);
+   return 1;
+}
+
+#else
+static int unshare_huge_pmd(struct mm_struct *mm, unsigned long addr,
+   pmd_t *pmdp)
+{
+   return 0;
+}
+
+#endif
+
 int zap_huge_pmd(struct mmu_gather *tlb, struct vm_area_struct *vma,
 pmd_t *pmd, unsigned long addr)
 {
@@ -1768,6 +1795,11 @@ int zap_huge_pmd(struct mmu_gather *tlb, struct 
vm_area_struct *vma,
 * pgtable_trans_huge_withdraw after finishing pmdp related
 * operations.
 */
+   if (unshare_huge_pmd(vma->vm_mm, addr, pmd)) {
+   spin_unlock(ptl);
+   return 1;
+   }
+
orig_pmd = pmdp_huge_get_and_clear_full(tlb->mm, addr, pmd,
tlb->fullmm);
tlb_remove_pmd_tlb_entry(tlb, pmd, addr);
@@ -1915,6 +1947,11 @@ int change_huge_pmd(struct vm_area_struct *vma, pmd_t 
*pmd,
if (!ptl)
return 0;
 
+   if (unshare_huge_pmd(mm, addr, pmd)) {
+   spin_unlock(ptl);
+   return HPAGE_PMD_NR;
+   }
+
preserve_write = prot_numa && pmd_write(*pmd);
ret = 1;
 
diff --git a/mm/hugetlb.c b/mm/hugetlb.c
index 3a54c9d..1c1ed4e 100644
--- a/mm/hugetlb.c
+++ b/mm/hugetlb.c
@@ -4653,9 +4653,9 @@ long hugetlb_unreserve_pages(struct inode *inode, long 
start, long end,
 }
 
 #ifdef CONFIG_ARCH_HAS_HUGE_PMD_SHARE
-static unsigned long page_table_shareable(struct vm_area_struct *svma,
-   struct vm_area_struct *vma,
-   unsigned long addr, pgoff_t idx)
+unsigned long page_table_shareable(struct vm_area_struct *svma,
+  struct vm_area_struct *vma,
+  unsigned long addr, pgoff_t idx)
 {
unsigned long saddr = ((idx - svma->vm_pgoff) << PAGE_SHIFT) +
svma->vm_start;
@@ -4678,7 +4678,7 @@ static unsigned long page_table_shareable(struct 
vm_area_struct *svma,
return saddr;
 }
 
-static bool vma_shareable(struct vm_area_struct *vma, unsigned long addr)
+bool vma_shareable(struct vm_area_struct *vma, unsigned long addr)
 {
unsigned long base = addr & PUD_MASK;
unsigned long end = base + PUD_SIZE;
diff --git a/mm/memory.c b/mm/memory.c
index ddf20bd..1ca8f75 100644
--- a/mm/memory.c
+++ b/mm/memory.c
@@ -3932,6 +3932,109 @@ static vm_fault_t handle_pte_fault(struct vm_fault *vmf)
return 0;
 }
 
+#ifdef CONFIG_ARCH_HAS_HUGE_PMD_SHARE
+static pmd_t *huge_pmd_offset(struct mm_struct *mm,
+ unsigned long addr, unsigned long sz)
+{
+   pgd_t *pgd;
+   p4d_t *p4d;
+   pud_t *pud;
+   pmd_t *pmd;
+
+   pgd = pgd_offset(mm, addr);
+   if (!pgd_present(*pgd))
+   return NULL;
+   p4d = p4d_offset(pgd, addr);
+   if (!p4d_present(*p4d))
+   return NULL;
+
+   pud = pud_offset(p4d, addr);
+   if

[RFC PATCH v2 1/2] Rename CONFIG_ARCH_WANT_HUGE_PMD_SHARE to CONFIG_ARCH_HAS_HUGE_PMD_SHARE

2019-06-07 Thread Larry Bassel
Signed-off-by: Larry Bassel 
---
 arch/arm64/Kconfig  | 2 +-
 arch/arm64/mm/hugetlbpage.c | 2 +-
 arch/x86/Kconfig| 2 +-
 mm/hugetlb.c| 6 +++---
 4 files changed, 6 insertions(+), 6 deletions(-)

diff --git a/arch/arm64/Kconfig b/arch/arm64/Kconfig
index 697ea05..36d6189 100644
--- a/arch/arm64/Kconfig
+++ b/arch/arm64/Kconfig
@@ -901,7 +901,7 @@ config HW_PERF_EVENTS
 config SYS_SUPPORTS_HUGETLBFS
def_bool y
 
-config ARCH_WANT_HUGE_PMD_SHARE
+config ARCH_HAS_HUGE_PMD_SHARE
def_bool y if ARM64_4K_PAGES || (ARM64_16K_PAGES && !ARM64_VA_BITS_36)
 
 config ARCH_HAS_CACHE_LINE_SIZE
diff --git a/arch/arm64/mm/hugetlbpage.c b/arch/arm64/mm/hugetlbpage.c
index f475e54..4f3cb3f 100644
--- a/arch/arm64/mm/hugetlbpage.c
+++ b/arch/arm64/mm/hugetlbpage.c
@@ -241,7 +241,7 @@ pte_t *huge_pte_alloc(struct mm_struct *mm,
 */
ptep = pte_alloc_map(mm, pmdp, addr);
} else if (sz == PMD_SIZE) {
-   if (IS_ENABLED(CONFIG_ARCH_WANT_HUGE_PMD_SHARE) &&
+   if (IS_ENABLED(CONFIG_ARCH_HAS_HUGE_PMD_SHARE) &&
pud_none(READ_ONCE(*pudp)))
ptep = huge_pmd_share(mm, addr, pudp);
else
diff --git a/arch/x86/Kconfig b/arch/x86/Kconfig
index 2bbbd4d..fdbddb9 100644
--- a/arch/x86/Kconfig
+++ b/arch/x86/Kconfig
@@ -301,7 +301,7 @@ config ARCH_HIBERNATION_POSSIBLE
 config ARCH_SUSPEND_POSSIBLE
def_bool y
 
-config ARCH_WANT_HUGE_PMD_SHARE
+config ARCH_HAS_HUGE_PMD_SHARE
def_bool y
 
 config ARCH_WANT_GENERAL_HUGETLB
diff --git a/mm/hugetlb.c b/mm/hugetlb.c
index ac843d3..3a54c9d 100644
--- a/mm/hugetlb.c
+++ b/mm/hugetlb.c
@@ -4652,7 +4652,7 @@ long hugetlb_unreserve_pages(struct inode *inode, long 
start, long end,
return 0;
 }
 
-#ifdef CONFIG_ARCH_WANT_HUGE_PMD_SHARE
+#ifdef CONFIG_ARCH_HAS_HUGE_PMD_SHARE
 static unsigned long page_table_shareable(struct vm_area_struct *svma,
struct vm_area_struct *vma,
unsigned long addr, pgoff_t idx)
@@ -4807,7 +4807,7 @@ int huge_pmd_unshare(struct mm_struct *mm, unsigned long 
*addr, pte_t *ptep)
return 1;
 }
 #define want_pmd_share()   (1)
-#else /* !CONFIG_ARCH_WANT_HUGE_PMD_SHARE */
+#else /* !CONFIG_ARCH_HAS_HUGE_PMD_SHARE */
 pte_t *huge_pmd_share(struct mm_struct *mm, unsigned long addr, pud_t *pud)
 {
return NULL;
@@ -4823,7 +4823,7 @@ void adjust_range_if_pmd_sharing_possible(struct 
vm_area_struct *vma,
 {
 }
 #define want_pmd_share()   (0)
-#endif /* CONFIG_ARCH_WANT_HUGE_PMD_SHARE */
+#endif /* CONFIG_ARCH_HAS_HUGE_PMD_SHARE */
 
 #ifdef CONFIG_ARCH_WANT_GENERAL_HUGETLB
 pte_t *huge_pte_alloc(struct mm_struct *mm,
-- 
1.8.3.1



[RFC PATCH v2 0/2] Share PMDs for FS/DAX on x86

2019-06-07 Thread Larry Bassel
Changes from v1 to v2:

* Rebased on v5.2-rc3

* An incorrect reference to "page table entries" was fixed (pointed
out by Kirill Shutemov)

* Renamed CONFIG_ARCH_WANT_HUGE_PMD_SHARE
to CONFIG_ARCH_HAS_HUGE_PMD_SHARE instead of introducing
a new config option (suggested by Dan Williams)

* Removed some unnecessary #ifdef stubs (suggested by Matt Wilcox)

* A previously overlooked case involving mprotect() is now handled
properly (pointed out by Mike Kravetz)

---

This patchset implements sharing of page tables pointing
to 2MiB pages (PMDs) for FS/DAX on x86.

Only shared mmapings of files (i.e. neither private mmapings nor
anonymous pages) are eligible for PMD sharing.

Due to the characteristics of DAX, this code is simpler and
less intrusive than the general case would be.

In our use case (high end Oracle database using DAX/XFS/PMEM/2MiB
pages) there would be significant memory savings.

A future system might have 6 TiB of PMEM on it and
there might be 1 processes each mapping all of this 6 TiB.
Here the savings would be approximately
(6 TiB / 2 MiB) * 8 bytes (page table size) * 1 = 240 GiB
(and these page tables themselves would probably be in
non-PMEM (ordinary RAM)).

There would also be a reduction in page faults because in
some cases the page fault has already been satisfied and
the page table entry has been filled in (and so the processes
after the first would not take a fault).

The code for detecting whether PMDs can be shared and
the implementation of sharing and unsharing is based
on, but somewhat different than that in mm/hugetlb.c,
though some of the code from this file could be reused and
thus was made non-static.

Larry Bassel (2):
  Rename CONFIG_ARCH_WANT_HUGE_PMD_SHARE to
CONFIG_ARCH_HAS_HUGE_PMD_SHARE
  Implement sharing/unsharing of PMDs for FS/DAX

 arch/arm64/Kconfig  |   2 +-
 arch/arm64/mm/hugetlbpage.c |   2 +-
 arch/x86/Kconfig|   2 +-
 include/linux/hugetlb.h |   4 ++
 mm/huge_memory.c|  37 +++
 mm/hugetlb.c|  14 +++---
 mm/memory.c | 108 +++-
 7 files changed, 158 insertions(+), 11 deletions(-)

-- 
1.8.3.1



Re: [PATCH, RFC 2/2] Implement sharing/unsharing of PMDs for FS/DAX

2019-05-24 Thread Larry Bassel
On 14 May 19 16:01, Kirill A. Shutemov wrote:
> On Thu, May 09, 2019 at 09:05:33AM -0700, Larry Bassel wrote:
[trim]
> > --- a/mm/huge_memory.c
> > +++ b/mm/huge_memory.c
> > @@ -1747,6 +1747,33 @@ static inline void zap_deposited_table(struct 
> > mm_struct *mm, pmd_t *pmd)
> > mm_dec_nr_ptes(mm);
> >  }
> >  
> > +#ifdef CONFIG_MAY_SHARE_FSDAX_PMD
> > +static int unshare_huge_pmd(struct mm_struct *mm, unsigned long addr,
> > +   pmd_t *pmdp)
> > +{
> > +   pgd_t *pgd = pgd_offset(mm, addr);
> > +   p4d_t *p4d = p4d_offset(pgd, addr);
> > +   pud_t *pud = pud_offset(p4d, addr);
> > +
> > +   WARN_ON(page_count(virt_to_page(pmdp)) == 0);
> > +   if (page_count(virt_to_page(pmdp)) == 1)
> > +   return 0;
> > +
> > +   pud_clear(pud);
> 
> You don't have proper locking in place to do this.

This code is based on and very similar to the code in
mm/hugetlb.c (huge_pmd_unshare()).

I asked Mike Kravetz why the locking in huge_pmd_share() and
huge_pmd_unshare() is correct. The issue (as you point out later
in your email) is whether in both of those cases it is OK to
take the PMD table lock and then modify the PUD table.

He responded with the following analysis:

-
I went back and looked at the locking in the hugetlb code.  Here is
most of the code for huge_pmd_share().

i_mmap_lock_write(mapping);
vma_interval_tree_foreach(svma, &mapping->i_mmap, idx, idx) {
if (svma == vma)
continue;

saddr = page_table_shareable(svma, vma, addr, idx);
if (saddr) {
spte = huge_pte_offset(svma->vm_mm, saddr,
   vma_mmu_pagesize(svma));
if (spte) {
get_page(virt_to_page(spte));
break;
}
}
}

if (!spte)
goto out;

ptl = huge_pte_lock(hstate_vma(vma), mm, spte);
>>>
The primary reason the page table lock is taken here is for the purpose of
checking and possibly updating the PUD (pointer to PMD page).  Note that by
the time we get here we already have found a PMD page to share.  Also note
that the lock taken is the one associated with the PMD page.

The synchronization question to ask is:  Can anyone else modify the PUD value
while I am holding the PMD lock?  In general, the answer is Yes.  However,
we can infer something subtle about the shared PMD case.  Suppose someone
else wanted to set the PUD value.  The only value they could set it to is the
PMD page we found in this routine.  They also would need to go through this
routine to set the value.  They also would need to get the lock on the same
shared PMD.  Actually, they would hit the mapping->i_mmap_rwsem first.  But,
the bottom line is that nobody else can set it.  What about clearing?  In the
hugetlb case, the only places where PUD gets cleared are final page table
tear down and huge_pmd_unshare().  The final page table tear down case is not
interesting as the process is exiting.  All callers if huge_pmd_unshare must
hold the (PMD) page table lock.  This is a requirement.  Therefore, within
a single process this synchronizes two threads:  one calling huge_pmd_share
and another huge_pmd_unshare.
-

I assert that the same analysis applies to pmd_share() and unshare_huge_pmd()
which are added in this patch.

> 
> > +   put_page(virt_to_page(pmdp));
> > +   mm_dec_nr_pmds(mm);
> > +   return 1;
> > +}
> > +
> > +#else
> > +static int unshare_huge_pmd(struct mm_struct *mm, unsigned long addr,
> > +   pmd_t *pmdp)
> > +{
> > +   return 0;
> > +}
> > +
> > +#endif
> > +
> >  int zap_huge_pmd(struct mmu_gather *tlb, struct vm_area_struct *vma,
> >  pmd_t *pmd, unsigned long addr)
> >  {
> > @@ -1764,6 +1791,11 @@ int zap_huge_pmd(struct mmu_gather *tlb, struct 
> > vm_area_struct *vma,
> >  * pgtable_trans_huge_withdraw after finishing pmdp related
> >  * operations.
> >  */
> > +   if (unshare_huge_pmd(vma->vm_mm, addr, pmd)) {
> > +   spin_unlock(ptl);
> > +   return 1;
> > +   }
> > +
> > orig_pmd = pmdp_huge_get_and_clear_full(tlb->mm, addr, pmd,
> > tlb->fullmm);
> > tlb_remove_pmd_tlb_entry(tlb, pmd, addr);
> > diff --git a/mm/hugetlb.c b/mm/hugetlb.c
> > index 641cedf..919a290 100644
> > --- a/mm/hugetlb.c
> > +++ b/

Re: [PATCH, RFC 0/2] Share PMDs for FS/DAX on x86

2019-05-14 Thread Larry Bassel
On 14 May 19 15:28, Kirill A. Shutemov wrote:
> On Thu, May 09, 2019 at 09:05:31AM -0700, Larry Bassel wrote:
> > This patchset implements sharing of page table entries pointing
> > to 2MiB pages (PMDs) for FS/DAX on x86.
> 
> -EPARSE.
> 
> How do you share entries? Entries do not take any space, page tables that
> cointain these entries do.

Yes, I'll correct this in v2.

> 
> Have you checked if the patch makes memory consumption any better. I have
> doubts in it.

Yes I have -- the following is debugging output I have from my testing.
The (admittedly simple) test case is two copies of a program that mmaps
1GiB of a DAX/XFS file (with 2MiB page size), touches the first page
(physical 20040 in this case) and then sleeps forever.

sharing disabled:

(process A)
[  420.369975] pgd_index = fe
[  420.369975] pgd = e1ebf83b
[  420.369975] pgd_val = 800405ca8067
[  420.369976] pud_index = 100
[  420.369976] pud = bd7a7df0
[  420.369976] pud_val = 4058f9067
[  420.369977] pmd_index = 0
[  420.369977] pmd = 791e93d4
[  420.369977] pmd_val = 8402004008e7
[  420.369978] pmd huge
[  420.369978] page_addr = 20040, page_offset = 0
[  420.369979] vaddr = 7f40, paddr = 20040

(process B)
[  420.370013] pgd_index = fe
[  420.370014] pgd = a2bac60d
[  420.370014] pgd_val = 800405a8f067
[  420.370015] pud_index = 100
[  420.370015] pud = dcc3ff1a
[  420.370015] pud_val = 3fc713067
[  420.370016] pmd_index = 0
[  420.370016] pmd = 6b4679db
[  420.370016] pmd_val = 8402004008e7
[  420.370017] pmd huge
[  420.370017] page_addr = 20040, page_offset = 0
[  420.370018] vaddr = 7f40, paddr = 20040

sharing enabled:

(process A)
[  696.992342] pgd_index = fe
[  696.992342] pgd = 9612024b
[  696.992343] pgd_val = 800404725067
[  696.992343] pud_index = 100
[  696.992343] pud = c98ab17c
[  696.992344] pud_val = 4038e3067
[  696.992344] pmd_index = 0
[  696.992344] pmd = 2437681b
[  696.992344] pmd_val = 8402004008e7
[  696.992345] pmd huge
[  696.992345] page_addr = 20040, page_offset = 0
[  696.992345] vaddr = 7f40, paddr = 20040

(process B)
[  696.992351] pgd_index = fe
[  696.992351] pgd = 12326848
[  696.992352] pgd_val = 80040a953067
[  696.992352] pud_index = 100
[  696.992352] pud = f989bcf6
[  696.992352] pud_val = 4038e3067
[  696.992353] pmd_index = 0
[  696.992353] pmd = 2437681b
[  696.992353] pmd_val = 8402004008e7
[  696.992353] pmd huge
[  696.992354] page_addr = 20040, page_offset = 0
[  696.992354] vaddr = 7f40, paddr = 20040

Note that in the sharing enabled case, the pud_val and pmd are
the same for the two processes. In the disabled case we
have two separate pmds (and so more memory was allocated).

Also, (though not visible from the output above) the second
process did not take a page fault as the virtual->physical mapping
was already established thanks to the sharing.

Larry


Re: question about page tables in DAX/FS/PMEM case

2019-02-21 Thread Larry Bassel
[adding linux-mm]

On 21 Feb 19 15:41, Jerome Glisse wrote:
> On Wed, Feb 20, 2019 at 03:06:22PM -0800, Larry Bassel wrote:
> > I'm working on sharing page tables in the DAX/XFS/PMEM/PMD case.
> > 
> > If multiple processes would use the identical page of PMDs corresponding
> > to a 1 GiB address range of DAX/XFS/PMEM/PMDs, presumably one can instead
> > of populating a new PUD, just atomically increment a refcount and point
> > to the same PUD in the next level above.

Thanks for your feedback. Some comments/clarification below.

> 
> I think page table sharing was discuss several time in the past and
> the complexity involve versus the benefit were not clear. For 1GB
> of virtual address you need:
> #pte pages = 1G/(512 * 2^12)   = 512 pte pages
> #pmd pages = 1G/(512 * 512 * 2^12) = 1   pmd pages
> 
> So if we were to share the pmd directory page we would be saving a
> total of 513 pages for every page table or ~2MB. This goes up with
> the number of process that map the same range ie if 10 process map
> the same range and share the same pmd than you are saving 9 * 2MB
> 18MB of memory. This seems relatively modest saving.

The file blocksize = page size in what I am working on would
be 2 MiB (sharing puds/pages of pmds), I'm not trying to
support sharing pmds/pages of ptes. And yes, the savings in this
case is actually even less than in your example (but see my example below).

> 
> AFAIK there is no hardware benefit from sharing the page table
> directory within different page table. So the only benefit is the
> amount of memory we save.

Yes, in our use case (high end Oracle database using DAX/XFS/PMEM/PMD)
the main benefit would be memory savings:

A future system might have 6 TiB of PMEM on it and
there might be 1 processes each mapping all of this 6 TiB.
Here the savings would be approximately
(6 TiB / 2 MiB) * 8 bytes (page table size) * 1 = 240 GiB
(and these page tables themselves would be in non-PMEM (ordinary RAM)).

> 
> See below for comments on complexity to achieve this.
> 
[trim]
> > 
> > If I have a mmap of a DAX/FS/PMEM file and I take
> > a page (either pte or PMD sized) fault on access to this file,
> > the page table(s) are set up in dax_iomap_fault() in fs/dax.c (correct?).
> 
> Not exactly the page table are allocated long before dax_iomap_fault()
> get calls. They are allocated by the handle_mm_fault() and its childs
> functions.

Yes, I misstated this, the fault is handled there which may well
alter the PUD (in my case), but the original page tables are set up earlier.

> 
> > 
> > If the process later munmaps this file or exits but there are still
> > other users of the shared page of PMDs, I would need to
> > detect that this has happened and act accordingly (#3 above)
> > 
> > Where will these page table entries be torn down?
> > In the same code where any other page table is torn down?
> > If this is the case, what would the cleanest way of telling that these
> > page tables (PMDs, etc.) correspond to a DAX/FS/PMEM mapping
> > (look at the physical address pointed to?) so that
> > I could do the right thing here.
> > 
> > I understand that I may have missed something obvious here.
> > 
> 
> They are many issues here are the one i can think of:
> - finding a pmd/pud to share, you need to walk the reverse mapping
>   of the range you are mapping and to find if any process or other
>   virtual address already as a pud or pmd you can reuse. This can
>   take more time than allocating page directory pages.
> - if one process munmap some portion of a share pud you need to
>   break the sharing this means that munmap (or mremap) would need
>   to handle this page table directory sharing case first
> - many code path in the kernel might need update to understand this
>   share page table thing (mprotect, userfaultfd, ...)
> - the locking rules is bound to be painfull
> - this might not work on all architecture as some architecture do
>   associate information with page table directory and that can not
>   always be share (it would need to be enabled arch by arch)

Yes, some architectures don't support DAX at all (note again that
I'm not trying to share non-DAX page table here).

> 
> The nice thing:
> - unmapping for migration, when you unmap a share pud/pmd you can
>   decrement mapcount by share pud/pmd count this could speedup
>   migration

A followup question: the kernel does sharing of page tables for hugetlbfs
(also 2 MiB pages), why aren't the above issues relevant there as well
(or are they but we support it anyhow)?

> 
> This is what i could think of on the top of my head but there m

question about page tables in DAX/FS/PMEM case

2019-02-20 Thread Larry Bassel
I'm working on sharing page tables in the DAX/XFS/PMEM/PMD case.

If multiple processes would use the identical page of PMDs corresponding
to a 1 GiB address range of DAX/XFS/PMEM/PMDs, presumably one can instead
of populating a new PUD, just atomically increment a refcount and point
to the same PUD in the next level above.

i.e.

OLD:
process 1:
VA -> levels of page tables -> PUD1 -> page of PMDs1
process 2:
VA -> levels of page tables -> PUD2 -> page of PMDs2

NEW:
process 1:
VA -> levels of page tables -> PUD1 -> page of PMDs1
process 2:
VA -> levels of page tables -> PUD1 -> page of PMDs1 (refcount 2)

There are several cases to consider:

1. New mapping
OLD:
make a new PUD, populate the associated page of PMDs
(at least partially) with PMD entries.
NEW:
same

2. Mapping by a process same (same VA->PA and size and protections, etc.)
as one that already exists
OLD:
make a new PUD, populate the associated page of PMDs
(at least partially) with PMD entries.
NEW:
use same PUD, increase refcount (potentially even if this mapping is private
in which case there may eventually be a copy-on-write -- see #5 below)

3. Unmapping of a mapping which is the same as that from another process
OLD:
destroy the process's copy of mapping, free PUD, etc.
NEW:
decrease refcount, only if now 0 do we destroy mapping, etc.

4. Unmapping of a mapping which is unique (refcount 1)
OLD:
destroy the process's copy of mapping, free PUD, etc.
NEW:
same

5. Mapping was private (but same as another process), process writes
OLD:
break the PMD into PTEs, destroy PMD mapping, free PUD, etc..
NEW:
decrease refcount, only if now 0 do we destroy mapping, etc.
we still break the PMD into PTEs.

If I have a mmap of a DAX/FS/PMEM file and I take
a page (either pte or PMD sized) fault on access to this file,
the page table(s) are set up in dax_iomap_fault() in fs/dax.c (correct?).

If the process later munmaps this file or exits but there are still
other users of the shared page of PMDs, I would need to
detect that this has happened and act accordingly (#3 above)

Where will these page table entries be torn down?
In the same code where any other page table is torn down?
If this is the case, what would the cleanest way of telling that these
page tables (PMDs, etc.) correspond to a DAX/FS/PMEM mapping
(look at the physical address pointed to?) so that
I could do the right thing here.

I understand that I may have missed something obvious here.

Thanks.

Larry


question about mmap MAP_PRIVATE on PMEM/DAX/fs files

2019-02-06 Thread Larry Bassel
Is mmaping a PMEM/DAX/fs file MAP_PRIVATE supported? Is it something
that people are likely to want to do?

If it is supported, suppose I open a file in PMEM/DAX/fs, mmap it
MAP_PRIVATE, read from the memory mapped file (with memory accesses,
not the read syscall) and take a page fault which the kernel satisfies.

At this time do my page tables for the private mmaped page(s) point to the
PMEM corresponding to the file and the kernel will wait until
the page(s) is/are altered (either by me or someone else) to
copy on write and give me a different page/mapping?

Or does the kernel avoid this by always mapping a copy of the
page(s) involved in the private mmap in the first place?

In either case, is my private copy going to come from PMEM or is it
an "ordinary" page, or is this "random"? Does the program have
any choice in this (i.e. suppose I want to make sure my copied
page is persistent)?

Thanks.

Larry


RFC: revisiting shared page tables

2018-12-04 Thread Larry Bassel
In August 2005, Dave McCracken sent out a patch which implemented shared
page tables (http://lkml.iu.edu/hypermail/linux/kernel/0508.3/1623.html)
based on 2.6.13.

He also wrote two OLS papers about the topic
(https://landley.net/kdocs/ols/2003/ols2003-pages-315-320.pdf
and https://www.landley.net/kdocs/ols/2006/ols2006v2-pages-125-130.pdf), the
second of which was published after his patch submission.

This patch was discussed for a few days. It was not accepted.

There were several comments about technical issues (about a typo,
some questions about locking, how to search the vmas, whether one must
iterate through all of the vmas) which no doubt could be fixed, and
in fact Dave indicated that he would eventually provide a revised patch
which fixed these problems. AFAICT this never occurred.

However, there were also questions about whether sharing page tables would
provide any significant benefit.

Specifically, there were concerns about whether the patch would
improve performance at all (Dave indicated a 3% improvement on some
"large benchmarks"), especially once another change (the test at
at the beginning of copy_page_range() which prevents page table copies
in some cases) was merged (d992895ba2, which has been in the kernel since
2.6.14).

It was also suggested that the use of randomize_vm_space
might also make shared page tables uninteresting, though that objection
appeared to be addressed.

Isn't Linux kernel archaeology fun :-)

13 years have elapsed. Given the many changes in the kernel since the original
patch submission, I'd appreciate your insight into the following questions:

* Is there (still?) a need for shared page tables (and if not, why not?).
* If one were to resume work on this, is there any reason why one shouldn't
start with Dave's 2.6.13 patch (plus fixes to the known bugs in it)
and forward port it to the tip, rather than starting from scratch?

Thanks.

Larry Bassel


Re: [RFC] mm, THP: Map read-only text segments using large THP pages

2018-05-17 Thread Larry Bassel
On 17 May 18 08:23, Matthew Wilcox wrote:
> 
> I can't find any information on what page sizes SPARC supports.
> Maybe you could point me at a reference?  All I've managed to find is
> the architecture manuals for SPARC which believe it is not their purpose
> to mandate an MMU.
> 

Page sizes of 8K, 64K, 512K, 4M, 32M, 256M, 2G, 16G are allowed
architecturally -- some of these aren't present in some
SPARC machines. Generally 8K, 64K, 4M, 256M, 2G, 16G are
present on modern machines. 

Also note that the SPARC THP page size is 8M (so that it is
PMD aligned).

Larry


[PATCH v8 1/2] arm64: adjust el0_sync so that a function can be called

2014-06-03 Thread Larry Bassel
To implement the context tracker properly on arm64,
a function call needs to be made after debugging and
interrupts are turned on, but before the lr is changed
to point to ret_to_user(). If the function call
is made after the lr is changed the function will not
return to the correct place.

For similar reasons, defer the setting of x0 so that
it doesn't need to be saved around the function call
(save far_el1 in x26 temporarily instead).

Acked-by: Will Deacon 
Reviewed-by: Kevin Hilman 
Tested-by: Kevin Hilman 
Signed-off-by: Larry Bassel 
---
 arch/arm64/kernel/entry.S | 19 +--
 1 file changed, 13 insertions(+), 6 deletions(-)

diff --git a/arch/arm64/kernel/entry.S b/arch/arm64/kernel/entry.S
index e8b23a3..b0101b9 100644
--- a/arch/arm64/kernel/entry.S
+++ b/arch/arm64/kernel/entry.S
@@ -354,7 +354,6 @@ el0_sync:
lsr x24, x25, #ESR_EL1_EC_SHIFT // exception class
cmp x24, #ESR_EL1_EC_SVC64  // SVC in 64-bit state
b.eqel0_svc
-   adr lr, ret_to_user
cmp x24, #ESR_EL1_EC_DABT_EL0   // data abort in EL0
b.eqel0_da
cmp x24, #ESR_EL1_EC_IABT_EL0   // instruction abort in EL0
@@ -383,7 +382,6 @@ el0_sync_compat:
lsr x24, x25, #ESR_EL1_EC_SHIFT // exception class
cmp x24, #ESR_EL1_EC_SVC32  // SVC in 32-bit state
b.eqel0_svc_compat
-   adr lr, ret_to_user
cmp x24, #ESR_EL1_EC_DABT_EL0   // data abort in EL0
b.eqel0_da
cmp x24, #ESR_EL1_EC_IABT_EL0   // instruction abort in EL0
@@ -426,22 +424,25 @@ el0_da:
/*
 * Data abort handling
 */
-   mrs x0, far_el1
-   bic x0, x0, #(0xff << 56)
+   mrs x26, far_el1
// enable interrupts before calling the main handler
enable_dbg_and_irq
+   bic x0, x26, #(0xff << 56)
mov x1, x25
mov x2, sp
+   adr lr, ret_to_user
b   do_mem_abort
 el0_ia:
/*
 * Instruction abort handling
 */
-   mrs x0, far_el1
+   mrs x26, far_el1
// enable interrupts before calling the main handler
enable_dbg_and_irq
+   mov x0, x26
orr x1, x25, #1 << 24   // use reserved ISS bit for 
instruction aborts
mov x2, sp
+   adr lr, ret_to_user
b   do_mem_abort
 el0_fpsimd_acc:
/*
@@ -450,6 +451,7 @@ el0_fpsimd_acc:
enable_dbg
mov x0, x25
mov x1, sp
+   adr lr, ret_to_user
b   do_fpsimd_acc
 el0_fpsimd_exc:
/*
@@ -458,16 +460,19 @@ el0_fpsimd_exc:
enable_dbg
mov x0, x25
mov x1, sp
+   adr lr, ret_to_user
b   do_fpsimd_exc
 el0_sp_pc:
/*
 * Stack or PC alignment exception handling
 */
-   mrs x0, far_el1
+   mrs x26, far_el1
// enable interrupts before calling the main handler
enable_dbg_and_irq
+   mov x0, x26
mov x1, x25
mov x2, sp
+   adr lr, ret_to_user
b   do_sp_pc_abort
 el0_undef:
/*
@@ -476,6 +481,7 @@ el0_undef:
// enable interrupts before calling the main handler
enable_dbg_and_irq
mov x0, sp
+   adr lr, ret_to_user
b   do_undefinstr
 el0_dbg:
/*
@@ -493,6 +499,7 @@ el0_inv:
mov x0, sp
mov x1, #BAD_SYNC
mrs x2, esr_el1
+   adr lr, ret_to_user
b   bad_mode
 ENDPROC(el0_sync)
 
-- 
1.8.3.2

--
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Please read the FAQ at  http://www.tux.org/lkml/


[PATCH v8 0/2] context tracker support for arm64

2014-06-03 Thread Larry Bassel
Implement and enable context tracking for arm64 (which is
a prerequisite for FULL_NOHZ support). This patchset
builds upon earlier work by Kevin Hilman and is based on
Will Deacon's tree.

Changes v7 to v8:

* Fix bug where el1_irq was calling ct_user_exit rather than el0_irq

Changes v6 to v7:

* Rename parameter of ct_user_exit from restore to syscall

Changes v5 to v6:

* Don't save far_el1 in x26 in el0_dbg path (not needed)
* TIF_NOHZ processes go through the slow path (so no register
save/restore is needed in ct_user_enter)

Changes v4 to v5:

* Improvement to code restoring far_el1 (suggested by Christopher Covington)
* Improvement to register save/restore in ct_user_enter

Changes v3 to v4:

* Rename parameter of ct_user_exit from save to restore
* Rebased patch to Will Deacon's tree (branch remotes/origin/aarch64
of git://git.kernel.org/pub/scm/linux/kernel/git/will/linux.git)

Changes v2 to v3:

* Save/restore necessary registers in ct_user_enter and ct_user_exit
* Annotate "error paths" out of el0_sync with ct_user_exit

Changes v1 to v2:

* Save far_el1 in x26 temporarily

Larry Bassel (2):
  arm64: adjust el0_sync so that a function can be called
  arm64: enable context tracking

 arch/arm64/Kconfig   |  1 +
 arch/arm64/include/asm/thread_info.h |  4 +++
 arch/arm64/kernel/entry.S| 58 +++-
 3 files changed, 56 insertions(+), 7 deletions(-)

-- 
1.8.3.2

--
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Please read the FAQ at  http://www.tux.org/lkml/


[PATCH v8 2/2] arm64: enable context tracking

2014-06-03 Thread Larry Bassel
Make calls to ct_user_enter when the kernel is exited
and ct_user_exit when the kernel is entered (in el0_da,
el0_ia, el0_svc, el0_irq and all of the "error" paths).

These macros expand to function calls which will only work
properly if el0_sync and related code has been rearranged
(in a previous patch of this series).

The calls to ct_user_exit are made after hw debugging has been
enabled (enable_dbg_and_irq).

The call to ct_user_enter is made at the beginning of the
kernel_exit macro.

This patch is based on earlier work by Kevin Hilman.
Save/restore optimizations were also done by Kevin.

Acked-by: Will Deacon 
Reviewed-by: Kevin Hilman 
Tested-by: Kevin Hilman 
Signed-off-by: Kevin Hilman 
Signed-off-by: Larry Bassel 
---
 arch/arm64/Kconfig   |  1 +
 arch/arm64/include/asm/thread_info.h |  4 
 arch/arm64/kernel/entry.S| 39 +++-
 3 files changed, 43 insertions(+), 1 deletion(-)

diff --git a/arch/arm64/Kconfig b/arch/arm64/Kconfig
index e759af5..ef18ae5 100644
--- a/arch/arm64/Kconfig
+++ b/arch/arm64/Kconfig
@@ -55,6 +55,7 @@ config ARM64
select RTC_LIB
select SPARSE_IRQ
select SYSCTL_EXCEPTION_TRACE
+   select HAVE_CONTEXT_TRACKING
help
  ARM 64-bit (AArch64) Linux support.
 
diff --git a/arch/arm64/include/asm/thread_info.h 
b/arch/arm64/include/asm/thread_info.h
index 720e70b..8363f34 100644
--- a/arch/arm64/include/asm/thread_info.h
+++ b/arch/arm64/include/asm/thread_info.h
@@ -100,6 +100,7 @@ static inline struct thread_info *current_thread_info(void)
 #define TIF_SIGPENDING 0
 #define TIF_NEED_RESCHED   1
 #define TIF_NOTIFY_RESUME  2   /* callback before returning to user */
+#define TIF_NOHZ7
 #define TIF_SYSCALL_TRACE  8
 #define TIF_POLLING_NRFLAG 16
 #define TIF_MEMDIE 18  /* is terminating due to OOM killer */
@@ -113,9 +114,12 @@ static inline struct thread_info *current_thread_info(void)
 #define _TIF_NEED_RESCHED  (1 << TIF_NEED_RESCHED)
 #define _TIF_NOTIFY_RESUME (1 << TIF_NOTIFY_RESUME)
 #define _TIF_32BIT (1 << TIF_32BIT)
+#define _TIF_SYSCALL_TRACE  (1 << TIF_SYSCALL_TRACE)
+#define _TIF_NOHZ   (1 << TIF_NOHZ)
 
 #define _TIF_WORK_MASK (_TIF_NEED_RESCHED | _TIF_SIGPENDING | \
 _TIF_NOTIFY_RESUME)
+#define _TIF_SYSCALL_WORK   (_TIF_SYSCALL_TRACE | _TIF_NOHZ)
 
 #endif /* __KERNEL__ */
 #endif /* __ASM_THREAD_INFO_H */
diff --git a/arch/arm64/kernel/entry.S b/arch/arm64/kernel/entry.S
index b0101b9..0c5844e 100644
--- a/arch/arm64/kernel/entry.S
+++ b/arch/arm64/kernel/entry.S
@@ -30,6 +30,32 @@
 #include 
 
 /*
+ * Context tracking subsystem.  Used to instrument transitions
+ * between user and kernel mode.
+ */
+   .macro ct_user_exit, syscall = 0
+#ifdef CONFIG_CONTEXT_TRACKING
+   bl  context_tracking_user_exit
+   .if \syscall == 1
+   /*
+* Save/restore needed during syscalls.  Restore syscall arguments from
+* the values already saved on stack during kernel_entry.
+*/
+   ldp x0, x1, [sp]
+   ldp x2, x3, [sp, #S_X2]
+   ldp x4, x5, [sp, #S_X4]
+   ldp x6, x7, [sp, #S_X6]
+   .endif
+#endif
+   .endm
+
+   .macro ct_user_enter
+#ifdef CONFIG_CONTEXT_TRACKING
+   bl  context_tracking_user_enter
+#endif
+   .endm
+
+/*
  * Bad Abort numbers
  *-
  */
@@ -91,6 +117,7 @@
.macro  kernel_exit, el, ret = 0
ldp x21, x22, [sp, #S_PC]   // load ELR, SPSR
.if \el == 0
+   ct_user_enter
ldr x23, [sp, #S_SP]// load return stack pointer
.endif
.if \ret
@@ -427,6 +454,7 @@ el0_da:
mrs x26, far_el1
// enable interrupts before calling the main handler
enable_dbg_and_irq
+   ct_user_exit
bic x0, x26, #(0xff << 56)
mov x1, x25
mov x2, sp
@@ -439,6 +467,7 @@ el0_ia:
mrs x26, far_el1
// enable interrupts before calling the main handler
enable_dbg_and_irq
+   ct_user_exit
mov x0, x26
orr x1, x25, #1 << 24   // use reserved ISS bit for 
instruction aborts
mov x2, sp
@@ -449,6 +478,7 @@ el0_fpsimd_acc:
 * Floating Point or Advanced SIMD access
 */
enable_dbg
+   ct_user_exit
mov x0, x25
mov x1, sp
adr lr, ret_to_user
@@ -458,6 +488,7 @@ el0_fpsimd_exc:
 * Floating Point or Advanced SIMD exception
 */
enable_dbg
+   ct_user_exit
mov x0, x25
mov x1, sp
adr lr, ret_to_user
@@ -480,6 +511,7 @@ el0_undef:
 */
// enable interrupts before calling the main handler
enable_dbg_and_irq
+   ct_user_exit
  

[PATCH v7 0/2] context tracker support for arm64

2014-05-30 Thread Larry Bassel
Implement and enable context tracking for arm64 (which is
a prerequisite for FULL_NOHZ support). This patchset
builds upon earlier work by Kevin Hilman and is based on
Will Deacon's tree.

Changes v6 to v7:

* Rename parameter of ct_user_exit from restore to syscall

Changes v5 to v6:

* Don't save far_el1 in x26 in el0_dbg path (not needed)
* TIF_NOHZ processes go through the slow path (so no register
save/restore is needed in ct_user_enter)

Changes v4 to v5:

* Improvement to code restoring far_el1 (suggested by Christopher Covington)
* Improvement to register save/restore in ct_user_enter

Changes v3 to v4:

* Rename parameter of ct_user_exit from save to restore
* Rebased patch to Will Deacon's tree (branch remotes/origin/aarch64
of git://git.kernel.org/pub/scm/linux/kernel/git/will/linux.git)

Changes v2 to v3:

* Save/restore necessary registers in ct_user_enter and ct_user_exit
* Annotate "error paths" out of el0_sync with ct_user_exit

Changes v1 to v2:

* Save far_el1 in x26 temporarily

Larry Bassel (2):
  arm64: adjust el0_sync so that a function can be called
  arm64: enable context tracking

 arch/arm64/Kconfig   |  1 +
 arch/arm64/include/asm/thread_info.h |  4 +++
 arch/arm64/kernel/entry.S| 58 +++-
 3 files changed, 56 insertions(+), 7 deletions(-)

-- 
1.8.3.2

--
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Please read the FAQ at  http://www.tux.org/lkml/


[PATCH v7 2/2] arm64: enable context tracking

2014-05-30 Thread Larry Bassel
Make calls to ct_user_enter when the kernel is exited
and ct_user_exit when the kernel is entered (in el0_da,
el0_ia, el0_svc, el0_irq and all of the "error" paths).

These macros expand to function calls which will only work
properly if el0_sync and related code has been rearranged
(in a previous patch of this series).

The calls to ct_user_exit are made after hw debugging has been
enabled (enable_dbg_and_irq).

The call to ct_user_enter is made at the beginning of the
kernel_exit macro.

This patch is based on earlier work by Kevin Hilman.
Save/restore optimizations were also done by Kevin.

Signed-off-by: Kevin Hilman 
Signed-off-by: Larry Bassel 
Acked-by: Will Deacon 
---
 arch/arm64/Kconfig   |  1 +
 arch/arm64/include/asm/thread_info.h |  4 
 arch/arm64/kernel/entry.S| 39 +++-
 3 files changed, 43 insertions(+), 1 deletion(-)

diff --git a/arch/arm64/Kconfig b/arch/arm64/Kconfig
index e759af5..ef18ae5 100644
--- a/arch/arm64/Kconfig
+++ b/arch/arm64/Kconfig
@@ -55,6 +55,7 @@ config ARM64
select RTC_LIB
select SPARSE_IRQ
select SYSCTL_EXCEPTION_TRACE
+   select HAVE_CONTEXT_TRACKING
help
  ARM 64-bit (AArch64) Linux support.
 
diff --git a/arch/arm64/include/asm/thread_info.h 
b/arch/arm64/include/asm/thread_info.h
index 720e70b..8363f34 100644
--- a/arch/arm64/include/asm/thread_info.h
+++ b/arch/arm64/include/asm/thread_info.h
@@ -100,6 +100,7 @@ static inline struct thread_info *current_thread_info(void)
 #define TIF_SIGPENDING 0
 #define TIF_NEED_RESCHED   1
 #define TIF_NOTIFY_RESUME  2   /* callback before returning to user */
+#define TIF_NOHZ7
 #define TIF_SYSCALL_TRACE  8
 #define TIF_POLLING_NRFLAG 16
 #define TIF_MEMDIE 18  /* is terminating due to OOM killer */
@@ -113,9 +114,12 @@ static inline struct thread_info *current_thread_info(void)
 #define _TIF_NEED_RESCHED  (1 << TIF_NEED_RESCHED)
 #define _TIF_NOTIFY_RESUME (1 << TIF_NOTIFY_RESUME)
 #define _TIF_32BIT (1 << TIF_32BIT)
+#define _TIF_SYSCALL_TRACE  (1 << TIF_SYSCALL_TRACE)
+#define _TIF_NOHZ   (1 << TIF_NOHZ)
 
 #define _TIF_WORK_MASK (_TIF_NEED_RESCHED | _TIF_SIGPENDING | \
 _TIF_NOTIFY_RESUME)
+#define _TIF_SYSCALL_WORK   (_TIF_SYSCALL_TRACE | _TIF_NOHZ)
 
 #endif /* __KERNEL__ */
 #endif /* __ASM_THREAD_INFO_H */
diff --git a/arch/arm64/kernel/entry.S b/arch/arm64/kernel/entry.S
index b0101b9..39d4dc9 100644
--- a/arch/arm64/kernel/entry.S
+++ b/arch/arm64/kernel/entry.S
@@ -30,6 +30,32 @@
 #include 
 
 /*
+ * Context tracking subsystem.  Used to instrument transitions
+ * between user and kernel mode.
+ */
+   .macro ct_user_exit, syscall = 0
+#ifdef CONFIG_CONTEXT_TRACKING
+   bl  context_tracking_user_exit
+   .if \syscall == 1
+   /*
+* Save/restore needed during syscalls.  Restore syscall arguments from
+* the values already saved on stack during kernel_entry.
+*/
+   ldp x0, x1, [sp]
+   ldp x2, x3, [sp, #S_X2]
+   ldp x4, x5, [sp, #S_X4]
+   ldp x6, x7, [sp, #S_X6]
+   .endif
+#endif
+   .endm
+
+   .macro ct_user_enter
+#ifdef CONFIG_CONTEXT_TRACKING
+   bl  context_tracking_user_enter
+#endif
+   .endm
+
+/*
  * Bad Abort numbers
  *-
  */
@@ -91,6 +117,7 @@
.macro  kernel_exit, el, ret = 0
ldp x21, x22, [sp, #S_PC]   // load ELR, SPSR
.if \el == 0
+   ct_user_enter
ldr x23, [sp, #S_SP]// load return stack pointer
.endif
.if \ret
@@ -318,6 +345,7 @@ el1_irq:
bl  trace_hardirqs_off
 #endif
 
+   ct_user_exit
irq_handler
 
 #ifdef CONFIG_PREEMPT
@@ -427,6 +455,7 @@ el0_da:
mrs x26, far_el1
// enable interrupts before calling the main handler
enable_dbg_and_irq
+   ct_user_exit
bic x0, x26, #(0xff << 56)
mov x1, x25
mov x2, sp
@@ -439,6 +468,7 @@ el0_ia:
mrs x26, far_el1
// enable interrupts before calling the main handler
enable_dbg_and_irq
+   ct_user_exit
mov x0, x26
orr x1, x25, #1 << 24   // use reserved ISS bit for 
instruction aborts
mov x2, sp
@@ -449,6 +479,7 @@ el0_fpsimd_acc:
 * Floating Point or Advanced SIMD access
 */
enable_dbg
+   ct_user_exit
mov x0, x25
mov x1, sp
adr lr, ret_to_user
@@ -458,6 +489,7 @@ el0_fpsimd_exc:
 * Floating Point or Advanced SIMD exception
 */
enable_dbg
+   ct_user_exit
mov x0, x25
mov x1, sp
adr lr, ret_to_user
@@ -480,6 +512,7 @@ el0_undef:
 */

[PATCH v7 1/2] arm64: adjust el0_sync so that a function can be called

2014-05-30 Thread Larry Bassel
To implement the context tracker properly on arm64,
a function call needs to be made after debugging and
interrupts are turned on, but before the lr is changed
to point to ret_to_user(). If the function call
is made after the lr is changed the function will not
return to the correct place.

For similar reasons, defer the setting of x0 so that
it doesn't need to be saved around the function call
(save far_el1 in x26 temporarily instead).

Signed-off-by: Larry Bassel 
Acked-by: Will Deacon 
---
 arch/arm64/kernel/entry.S | 19 +--
 1 file changed, 13 insertions(+), 6 deletions(-)

diff --git a/arch/arm64/kernel/entry.S b/arch/arm64/kernel/entry.S
index e8b23a3..b0101b9 100644
--- a/arch/arm64/kernel/entry.S
+++ b/arch/arm64/kernel/entry.S
@@ -354,7 +354,6 @@ el0_sync:
lsr x24, x25, #ESR_EL1_EC_SHIFT // exception class
cmp x24, #ESR_EL1_EC_SVC64  // SVC in 64-bit state
b.eqel0_svc
-   adr lr, ret_to_user
cmp x24, #ESR_EL1_EC_DABT_EL0   // data abort in EL0
b.eqel0_da
cmp x24, #ESR_EL1_EC_IABT_EL0   // instruction abort in EL0
@@ -383,7 +382,6 @@ el0_sync_compat:
lsr x24, x25, #ESR_EL1_EC_SHIFT // exception class
cmp x24, #ESR_EL1_EC_SVC32  // SVC in 32-bit state
b.eqel0_svc_compat
-   adr lr, ret_to_user
cmp x24, #ESR_EL1_EC_DABT_EL0   // data abort in EL0
b.eqel0_da
cmp x24, #ESR_EL1_EC_IABT_EL0   // instruction abort in EL0
@@ -426,22 +424,25 @@ el0_da:
/*
 * Data abort handling
 */
-   mrs x0, far_el1
-   bic x0, x0, #(0xff << 56)
+   mrs x26, far_el1
// enable interrupts before calling the main handler
enable_dbg_and_irq
+   bic x0, x26, #(0xff << 56)
mov x1, x25
mov x2, sp
+   adr lr, ret_to_user
b   do_mem_abort
 el0_ia:
/*
 * Instruction abort handling
 */
-   mrs x0, far_el1
+   mrs x26, far_el1
// enable interrupts before calling the main handler
enable_dbg_and_irq
+   mov x0, x26
orr x1, x25, #1 << 24   // use reserved ISS bit for 
instruction aborts
mov x2, sp
+   adr lr, ret_to_user
b   do_mem_abort
 el0_fpsimd_acc:
/*
@@ -450,6 +451,7 @@ el0_fpsimd_acc:
enable_dbg
mov x0, x25
mov x1, sp
+   adr lr, ret_to_user
b   do_fpsimd_acc
 el0_fpsimd_exc:
/*
@@ -458,16 +460,19 @@ el0_fpsimd_exc:
enable_dbg
mov x0, x25
mov x1, sp
+   adr lr, ret_to_user
b   do_fpsimd_exc
 el0_sp_pc:
/*
 * Stack or PC alignment exception handling
 */
-   mrs x0, far_el1
+   mrs x26, far_el1
// enable interrupts before calling the main handler
enable_dbg_and_irq
+   mov x0, x26
mov x1, x25
mov x2, sp
+   adr lr, ret_to_user
b   do_sp_pc_abort
 el0_undef:
/*
@@ -476,6 +481,7 @@ el0_undef:
// enable interrupts before calling the main handler
enable_dbg_and_irq
mov x0, sp
+   adr lr, ret_to_user
b   do_undefinstr
 el0_dbg:
/*
@@ -493,6 +499,7 @@ el0_inv:
mov x0, sp
mov x1, #BAD_SYNC
mrs x2, esr_el1
+   adr lr, ret_to_user
b   bad_mode
 ENDPROC(el0_sync)
 
-- 
1.8.3.2

--
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Please read the FAQ at  http://www.tux.org/lkml/


[PATCH v6 0/2] context tracker support for arm64

2014-05-29 Thread Larry Bassel
Implement and enable context tracking for arm64 (which is
a prerequisite for FULL_NOHZ support). This patchset
builds upon earlier work by Kevin Hilman and is based on
Will Deacon's tree.

Changes v5 to v6:

* Don't save far_el1 in x26 in el0_dbg path (not needed)
* TIF_NOHZ processes go through the slow path (so no register
save/restore is needed in ct_user_enter)

Changes v4 to v5:

* Improvement to code restoring far_el1 (suggested by Christopher Covington)
* Improvement to register save/restore in ct_user_enter

Changes v3 to v4:

* Rename parameter of ct_user_exit from save to restore
* Rebased patch to Will Deacon's tree (branch remotes/origin/aarch64
of git://git.kernel.org/pub/scm/linux/kernel/git/will/linux.git)

Changes v2 to v3:

* Save/restore necessary registers in ct_user_enter and ct_user_exit
* Annotate "error paths" out of el0_sync with ct_user_exit

Changes v1 to v2:

* Save far_el1 in x26 temporarily

Larry Bassel (2):
  arm64: adjust el0_sync so that a function can be called
  arm64: enable context tracking

 arch/arm64/Kconfig   |  1 +
 arch/arm64/include/asm/thread_info.h |  4 +++
 arch/arm64/kernel/entry.S| 58 +++-
 3 files changed, 56 insertions(+), 7 deletions(-)

-- 
1.8.3.2

--
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Please read the FAQ at  http://www.tux.org/lkml/


[PATCH v6 2/2] arm64: enable context tracking

2014-05-29 Thread Larry Bassel
Make calls to ct_user_enter when the kernel is exited
and ct_user_exit when the kernel is entered (in el0_da,
el0_ia, el0_svc, el0_irq and all of the "error" paths).

These macros expand to function calls which will only work
properly if el0_sync and related code has been rearranged
(in a previous patch of this series).

The calls to ct_user_exit are made after hw debugging has been
enabled (enable_dbg_and_irq).

The call to ct_user_enter is made at the beginning of the
kernel_exit macro.

This patch is based on earlier work by Kevin Hilman.
Save/restore optimizations were also done by Kevin.

Signed-off-by: Kevin Hilman 
Signed-off-by: Larry Bassel 
---
 arch/arm64/Kconfig   |  1 +
 arch/arm64/include/asm/thread_info.h |  4 
 arch/arm64/kernel/entry.S| 39 +++-
 3 files changed, 43 insertions(+), 1 deletion(-)

diff --git a/arch/arm64/Kconfig b/arch/arm64/Kconfig
index e759af5..ef18ae5 100644
--- a/arch/arm64/Kconfig
+++ b/arch/arm64/Kconfig
@@ -55,6 +55,7 @@ config ARM64
select RTC_LIB
select SPARSE_IRQ
select SYSCTL_EXCEPTION_TRACE
+   select HAVE_CONTEXT_TRACKING
help
  ARM 64-bit (AArch64) Linux support.
 
diff --git a/arch/arm64/include/asm/thread_info.h 
b/arch/arm64/include/asm/thread_info.h
index 720e70b..8363f34 100644
--- a/arch/arm64/include/asm/thread_info.h
+++ b/arch/arm64/include/asm/thread_info.h
@@ -100,6 +100,7 @@ static inline struct thread_info *current_thread_info(void)
 #define TIF_SIGPENDING 0
 #define TIF_NEED_RESCHED   1
 #define TIF_NOTIFY_RESUME  2   /* callback before returning to user */
+#define TIF_NOHZ7
 #define TIF_SYSCALL_TRACE  8
 #define TIF_POLLING_NRFLAG 16
 #define TIF_MEMDIE 18  /* is terminating due to OOM killer */
@@ -113,9 +114,12 @@ static inline struct thread_info *current_thread_info(void)
 #define _TIF_NEED_RESCHED  (1 << TIF_NEED_RESCHED)
 #define _TIF_NOTIFY_RESUME (1 << TIF_NOTIFY_RESUME)
 #define _TIF_32BIT (1 << TIF_32BIT)
+#define _TIF_SYSCALL_TRACE  (1 << TIF_SYSCALL_TRACE)
+#define _TIF_NOHZ   (1 << TIF_NOHZ)
 
 #define _TIF_WORK_MASK (_TIF_NEED_RESCHED | _TIF_SIGPENDING | \
 _TIF_NOTIFY_RESUME)
+#define _TIF_SYSCALL_WORK   (_TIF_SYSCALL_TRACE | _TIF_NOHZ)
 
 #endif /* __KERNEL__ */
 #endif /* __ASM_THREAD_INFO_H */
diff --git a/arch/arm64/kernel/entry.S b/arch/arm64/kernel/entry.S
index b0101b9..3c484e2 100644
--- a/arch/arm64/kernel/entry.S
+++ b/arch/arm64/kernel/entry.S
@@ -30,6 +30,32 @@
 #include 
 
 /*
+ * Context tracking subsystem.  Used to instrument transitions
+ * between user and kernel mode.
+ */
+   .macro ct_user_exit, restore = 0
+#ifdef CONFIG_CONTEXT_TRACKING
+   bl  context_tracking_user_exit
+   .if \restore == 1
+   /*
+* Save/restore needed during syscalls.  Restore syscall arguments from
+* the values already saved on stack during kernel_entry.
+*/
+   ldp x0, x1, [sp]
+   ldp x2, x3, [sp, #S_X2]
+   ldp x4, x5, [sp, #S_X4]
+   ldp x6, x7, [sp, #S_X6]
+   .endif
+#endif
+   .endm
+
+   .macro ct_user_enter
+#ifdef CONFIG_CONTEXT_TRACKING
+   bl  context_tracking_user_enter
+#endif
+   .endm
+
+/*
  * Bad Abort numbers
  *-
  */
@@ -91,6 +117,7 @@
.macro  kernel_exit, el, ret = 0
ldp x21, x22, [sp, #S_PC]   // load ELR, SPSR
.if \el == 0
+   ct_user_enter
ldr x23, [sp, #S_SP]// load return stack pointer
.endif
.if \ret
@@ -318,6 +345,7 @@ el1_irq:
bl  trace_hardirqs_off
 #endif
 
+   ct_user_exit
irq_handler
 
 #ifdef CONFIG_PREEMPT
@@ -427,6 +455,7 @@ el0_da:
mrs x26, far_el1
// enable interrupts before calling the main handler
enable_dbg_and_irq
+   ct_user_exit
bic x0, x26, #(0xff << 56)
mov x1, x25
mov x2, sp
@@ -439,6 +468,7 @@ el0_ia:
mrs x26, far_el1
// enable interrupts before calling the main handler
enable_dbg_and_irq
+   ct_user_exit
mov x0, x26
orr x1, x25, #1 << 24   // use reserved ISS bit for 
instruction aborts
mov x2, sp
@@ -449,6 +479,7 @@ el0_fpsimd_acc:
 * Floating Point or Advanced SIMD access
 */
enable_dbg
+   ct_user_exit
mov x0, x25
mov x1, sp
adr lr, ret_to_user
@@ -458,6 +489,7 @@ el0_fpsimd_exc:
 * Floating Point or Advanced SIMD exception
 */
enable_dbg
+   ct_user_exit
mov x0, x25
mov x1, sp
adr lr, ret_to_user
@@ -480,6 +512,7 @@ el0_undef:
 */
// enable interrupts before

[PATCH v6 1/2] arm64: adjust el0_sync so that a function can be called

2014-05-29 Thread Larry Bassel
To implement the context tracker properly on arm64,
a function call needs to be made after debugging and
interrupts are turned on, but before the lr is changed
to point to ret_to_user(). If the function call
is made after the lr is changed the function will not
return to the correct place.

For similar reasons, defer the setting of x0 so that
it doesn't need to be saved around the function call
(save far_el1 in x26 temporarily instead).

Signed-off-by: Larry Bassel 
---
 arch/arm64/kernel/entry.S | 19 +--
 1 file changed, 13 insertions(+), 6 deletions(-)

diff --git a/arch/arm64/kernel/entry.S b/arch/arm64/kernel/entry.S
index e8b23a3..b0101b9 100644
--- a/arch/arm64/kernel/entry.S
+++ b/arch/arm64/kernel/entry.S
@@ -354,7 +354,6 @@ el0_sync:
lsr x24, x25, #ESR_EL1_EC_SHIFT // exception class
cmp x24, #ESR_EL1_EC_SVC64  // SVC in 64-bit state
b.eqel0_svc
-   adr lr, ret_to_user
cmp x24, #ESR_EL1_EC_DABT_EL0   // data abort in EL0
b.eqel0_da
cmp x24, #ESR_EL1_EC_IABT_EL0   // instruction abort in EL0
@@ -383,7 +382,6 @@ el0_sync_compat:
lsr x24, x25, #ESR_EL1_EC_SHIFT // exception class
cmp x24, #ESR_EL1_EC_SVC32  // SVC in 32-bit state
b.eqel0_svc_compat
-   adr lr, ret_to_user
cmp x24, #ESR_EL1_EC_DABT_EL0   // data abort in EL0
b.eqel0_da
cmp x24, #ESR_EL1_EC_IABT_EL0   // instruction abort in EL0
@@ -426,22 +424,25 @@ el0_da:
/*
 * Data abort handling
 */
-   mrs x0, far_el1
-   bic x0, x0, #(0xff << 56)
+   mrs x26, far_el1
// enable interrupts before calling the main handler
enable_dbg_and_irq
+   bic x0, x26, #(0xff << 56)
mov x1, x25
mov x2, sp
+   adr lr, ret_to_user
b   do_mem_abort
 el0_ia:
/*
 * Instruction abort handling
 */
-   mrs x0, far_el1
+   mrs x26, far_el1
// enable interrupts before calling the main handler
enable_dbg_and_irq
+   mov x0, x26
orr x1, x25, #1 << 24   // use reserved ISS bit for 
instruction aborts
mov x2, sp
+   adr lr, ret_to_user
b   do_mem_abort
 el0_fpsimd_acc:
/*
@@ -450,6 +451,7 @@ el0_fpsimd_acc:
enable_dbg
mov x0, x25
mov x1, sp
+   adr lr, ret_to_user
b   do_fpsimd_acc
 el0_fpsimd_exc:
/*
@@ -458,16 +460,19 @@ el0_fpsimd_exc:
enable_dbg
mov x0, x25
mov x1, sp
+   adr lr, ret_to_user
b   do_fpsimd_exc
 el0_sp_pc:
/*
 * Stack or PC alignment exception handling
 */
-   mrs x0, far_el1
+   mrs x26, far_el1
// enable interrupts before calling the main handler
enable_dbg_and_irq
+   mov x0, x26
mov x1, x25
mov x2, sp
+   adr lr, ret_to_user
b   do_sp_pc_abort
 el0_undef:
/*
@@ -476,6 +481,7 @@ el0_undef:
// enable interrupts before calling the main handler
enable_dbg_and_irq
mov x0, sp
+   adr lr, ret_to_user
b   do_undefinstr
 el0_dbg:
/*
@@ -493,6 +499,7 @@ el0_inv:
mov x0, sp
mov x1, #BAD_SYNC
mrs x2, esr_el1
+   adr lr, ret_to_user
b   bad_mode
 ENDPROC(el0_sync)
 
-- 
1.8.3.2

--
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Please read the FAQ at  http://www.tux.org/lkml/


[PATCH v6 1/2] arm64: adjust el0_sync so that a function can be called

2014-05-29 Thread Larry Bassel
To implement the context tracker properly on arm64,
a function call needs to be made after debugging and
interrupts are turned on, but before the lr is changed
to point to ret_to_user(). If the function call
is made after the lr is changed the function will not
return to the correct place.

For similar reasons, defer the setting of x0 so that
it doesn't need to be saved around the function call
(save far_el1 in x26 temporarily instead).

Signed-off-by: Larry Bassel 
---
 arch/arm64/kernel/entry.S | 19 +--
 1 file changed, 13 insertions(+), 6 deletions(-)

diff --git a/arch/arm64/kernel/entry.S b/arch/arm64/kernel/entry.S
index e8b23a3..b0101b9 100644
--- a/arch/arm64/kernel/entry.S
+++ b/arch/arm64/kernel/entry.S
@@ -354,7 +354,6 @@ el0_sync:
lsr x24, x25, #ESR_EL1_EC_SHIFT // exception class
cmp x24, #ESR_EL1_EC_SVC64  // SVC in 64-bit state
b.eqel0_svc
-   adr lr, ret_to_user
cmp x24, #ESR_EL1_EC_DABT_EL0   // data abort in EL0
b.eqel0_da
cmp x24, #ESR_EL1_EC_IABT_EL0   // instruction abort in EL0
@@ -383,7 +382,6 @@ el0_sync_compat:
lsr x24, x25, #ESR_EL1_EC_SHIFT // exception class
cmp x24, #ESR_EL1_EC_SVC32  // SVC in 32-bit state
b.eqel0_svc_compat
-   adr lr, ret_to_user
cmp x24, #ESR_EL1_EC_DABT_EL0   // data abort in EL0
b.eqel0_da
cmp x24, #ESR_EL1_EC_IABT_EL0   // instruction abort in EL0
@@ -426,22 +424,25 @@ el0_da:
/*
 * Data abort handling
 */
-   mrs x0, far_el1
-   bic x0, x0, #(0xff << 56)
+   mrs x26, far_el1
// enable interrupts before calling the main handler
enable_dbg_and_irq
+   bic x0, x26, #(0xff << 56)
mov x1, x25
mov x2, sp
+   adr lr, ret_to_user
b   do_mem_abort
 el0_ia:
/*
 * Instruction abort handling
 */
-   mrs x0, far_el1
+   mrs x26, far_el1
// enable interrupts before calling the main handler
enable_dbg_and_irq
+   mov x0, x26
orr x1, x25, #1 << 24   // use reserved ISS bit for 
instruction aborts
mov x2, sp
+   adr lr, ret_to_user
b   do_mem_abort
 el0_fpsimd_acc:
/*
@@ -450,6 +451,7 @@ el0_fpsimd_acc:
enable_dbg
mov x0, x25
mov x1, sp
+   adr lr, ret_to_user
b   do_fpsimd_acc
 el0_fpsimd_exc:
/*
@@ -458,16 +460,19 @@ el0_fpsimd_exc:
enable_dbg
mov x0, x25
mov x1, sp
+   adr lr, ret_to_user
b   do_fpsimd_exc
 el0_sp_pc:
/*
 * Stack or PC alignment exception handling
 */
-   mrs x0, far_el1
+   mrs x26, far_el1
// enable interrupts before calling the main handler
enable_dbg_and_irq
+   mov x0, x26
mov x1, x25
mov x2, sp
+   adr lr, ret_to_user
b   do_sp_pc_abort
 el0_undef:
/*
@@ -476,6 +481,7 @@ el0_undef:
// enable interrupts before calling the main handler
enable_dbg_and_irq
mov x0, sp
+   adr lr, ret_to_user
b   do_undefinstr
 el0_dbg:
/*
@@ -493,6 +499,7 @@ el0_inv:
mov x0, sp
mov x1, #BAD_SYNC
mrs x2, esr_el1
+   adr lr, ret_to_user
b   bad_mode
 ENDPROC(el0_sync)
 
-- 
1.8.3.2

--
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Please read the FAQ at  http://www.tux.org/lkml/


[PATCH v6 2/2] arm64: enable context tracking

2014-05-29 Thread Larry Bassel
Make calls to ct_user_enter when the kernel is exited
and ct_user_exit when the kernel is entered (in el0_da,
el0_ia, el0_svc, el0_irq and all of the "error" paths).

These macros expand to function calls which will only work
properly if el0_sync and related code has been rearranged
(in a previous patch of this series).

The calls to ct_user_exit are made after hw debugging has been
enabled (enable_dbg_and_irq).

The call to ct_user_enter is made at the beginning of the
kernel_exit macro.

This patch is based on earlier work by Kevin Hilman.
Save/restore optimizations were also done by Kevin.

Signed-off-by: Kevin Hilman 
Signed-off-by: Larry Bassel 
---
 arch/arm64/Kconfig   |  1 +
 arch/arm64/include/asm/thread_info.h |  4 
 arch/arm64/kernel/entry.S| 39 +++-
 3 files changed, 43 insertions(+), 1 deletion(-)

diff --git a/arch/arm64/Kconfig b/arch/arm64/Kconfig
index e759af5..ef18ae5 100644
--- a/arch/arm64/Kconfig
+++ b/arch/arm64/Kconfig
@@ -55,6 +55,7 @@ config ARM64
select RTC_LIB
select SPARSE_IRQ
select SYSCTL_EXCEPTION_TRACE
+   select HAVE_CONTEXT_TRACKING
help
  ARM 64-bit (AArch64) Linux support.
 
diff --git a/arch/arm64/include/asm/thread_info.h 
b/arch/arm64/include/asm/thread_info.h
index 720e70b..8363f34 100644
--- a/arch/arm64/include/asm/thread_info.h
+++ b/arch/arm64/include/asm/thread_info.h
@@ -100,6 +100,7 @@ static inline struct thread_info *current_thread_info(void)
 #define TIF_SIGPENDING 0
 #define TIF_NEED_RESCHED   1
 #define TIF_NOTIFY_RESUME  2   /* callback before returning to user */
+#define TIF_NOHZ7
 #define TIF_SYSCALL_TRACE  8
 #define TIF_POLLING_NRFLAG 16
 #define TIF_MEMDIE 18  /* is terminating due to OOM killer */
@@ -113,9 +114,12 @@ static inline struct thread_info *current_thread_info(void)
 #define _TIF_NEED_RESCHED  (1 << TIF_NEED_RESCHED)
 #define _TIF_NOTIFY_RESUME (1 << TIF_NOTIFY_RESUME)
 #define _TIF_32BIT (1 << TIF_32BIT)
+#define _TIF_SYSCALL_TRACE  (1 << TIF_SYSCALL_TRACE)
+#define _TIF_NOHZ   (1 << TIF_NOHZ)
 
 #define _TIF_WORK_MASK (_TIF_NEED_RESCHED | _TIF_SIGPENDING | \
 _TIF_NOTIFY_RESUME)
+#define _TIF_SYSCALL_WORK   (_TIF_SYSCALL_TRACE | _TIF_NOHZ)
 
 #endif /* __KERNEL__ */
 #endif /* __ASM_THREAD_INFO_H */
diff --git a/arch/arm64/kernel/entry.S b/arch/arm64/kernel/entry.S
index b0101b9..3c484e2 100644
--- a/arch/arm64/kernel/entry.S
+++ b/arch/arm64/kernel/entry.S
@@ -30,6 +30,32 @@
 #include 
 
 /*
+ * Context tracking subsystem.  Used to instrument transitions
+ * between user and kernel mode.
+ */
+   .macro ct_user_exit, restore = 0
+#ifdef CONFIG_CONTEXT_TRACKING
+   bl  context_tracking_user_exit
+   .if \restore == 1
+   /*
+* Save/restore needed during syscalls.  Restore syscall arguments from
+* the values already saved on stack during kernel_entry.
+*/
+   ldp x0, x1, [sp]
+   ldp x2, x3, [sp, #S_X2]
+   ldp x4, x5, [sp, #S_X4]
+   ldp x6, x7, [sp, #S_X6]
+   .endif
+#endif
+   .endm
+
+   .macro ct_user_enter
+#ifdef CONFIG_CONTEXT_TRACKING
+   bl  context_tracking_user_enter
+#endif
+   .endm
+
+/*
  * Bad Abort numbers
  *-
  */
@@ -91,6 +117,7 @@
.macro  kernel_exit, el, ret = 0
ldp x21, x22, [sp, #S_PC]   // load ELR, SPSR
.if \el == 0
+   ct_user_enter
ldr x23, [sp, #S_SP]// load return stack pointer
.endif
.if \ret
@@ -318,6 +345,7 @@ el1_irq:
bl  trace_hardirqs_off
 #endif
 
+   ct_user_exit
irq_handler
 
 #ifdef CONFIG_PREEMPT
@@ -427,6 +455,7 @@ el0_da:
mrs x26, far_el1
// enable interrupts before calling the main handler
enable_dbg_and_irq
+   ct_user_exit
bic x0, x26, #(0xff << 56)
mov x1, x25
mov x2, sp
@@ -439,6 +468,7 @@ el0_ia:
mrs x26, far_el1
// enable interrupts before calling the main handler
enable_dbg_and_irq
+   ct_user_exit
mov x0, x26
orr x1, x25, #1 << 24   // use reserved ISS bit for 
instruction aborts
mov x2, sp
@@ -449,6 +479,7 @@ el0_fpsimd_acc:
 * Floating Point or Advanced SIMD access
 */
enable_dbg
+   ct_user_exit
mov x0, x25
mov x1, sp
adr lr, ret_to_user
@@ -458,6 +489,7 @@ el0_fpsimd_exc:
 * Floating Point or Advanced SIMD exception
 */
enable_dbg
+   ct_user_exit
mov x0, x25
mov x1, sp
adr lr, ret_to_user
@@ -480,6 +512,7 @@ el0_undef:
 */
// enable interrupts before

[PATCH v6 0/2] context tracker support for arm64

2014-05-29 Thread Larry Bassel
Implement and enable context tracking for arm64 (which is
a prerequisite for FULL_NOHZ support). This patchset
builds upon earlier work by Kevin Hilman and is based on
Will Deacon's tree.

Changes v5 to v6:

* Don't save far_el1 in x26 in el0_dbg path (not needed)
* TIF_NOHZ processes go through the slow path (so no register
save/restore is needed in ct_user_enter)

Changes v4 to v5:

* Improvement to code restoring far_el1 (suggested by Christopher Covington)
* Improvement to register save/restore in ct_user_enter

Changes v3 to v4:

* Rename parameter of ct_user_exit from save to restore
* Rebased patch to Will Deacon's tree (branch remotes/origin/aarch64
of git://git.kernel.org/pub/scm/linux/kernel/git/will/linux.git)

Changes v2 to v3:

* Save/restore necessary registers in ct_user_enter and ct_user_exit
* Annotate "error paths" out of el0_sync with ct_user_exit

Changes v1 to v2:

* Save far_el1 in x26 temporarily

*** BLURB HERE ***

Larry Bassel (2):
  arm64: adjust el0_sync so that a function can be called
  arm64: enable context tracking

 arch/arm64/Kconfig   |  1 +
 arch/arm64/include/asm/thread_info.h |  4 +++
 arch/arm64/kernel/entry.S| 58 +++-
 3 files changed, 56 insertions(+), 7 deletions(-)

-- 
1.8.3.2

--
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Please read the FAQ at  http://www.tux.org/lkml/


Re: [PATCH v5 1/2] arm64: adjust el0_sync so that a function can be called

2014-05-28 Thread Larry Bassel
On 28 May 14 12:27, Will Deacon wrote:
> Hi Larry,
> 
> On Mon, May 26, 2014 at 07:56:12PM +0100, Larry Bassel wrote:
> > To implement the context tracker properly on arm64,
> > a function call needs to be made after debugging and
> > interrupts are turned on, but before the lr is changed
> > to point to ret_to_user(). If the function call
> > is made after the lr is changed the function will not
> > return to the correct place.
> > 
> > For similar reasons, defer the setting of x0 so that
> > it doesn't need to be saved around the function call
> > (save far_el1 in x26 temporarily instead).
> > 
> > Signed-off-by: Larry Bassel 
> 
> [...]
> 
> 
> Why have you added this mov instruction?

I believe (please correct me if I'm wrong) that it is necessary.
Here is why:

> > @@ -476,23 +481,27 @@ el0_undef:
> > // enable interrupts before calling the main handler
> > enable_dbg_and_irq
> > mov x0, sp
> > +   adr lr, ret_to_user
> > b   do_undefinstr
> >  el0_dbg:
> > /*
> >  * Debug exception handling
> >  */
> > tbnzx24, #0, el0_inv// EL0 only
> > -   mrs x0, far_el1
> > +   mrs x26, far_el1

needed because do_debug_exception may clobber x0, so save far_el1
in x26 (as other parts of this patch do)

> > +   mov x0, x26

needed because far_el1 is expected to be in x0 here

> > mov x1, x25
> > mov x2, sp
> > bl  do_debug_exception
> > enable_dbg

[call to ct_user_exit will go here in the next patch, this may re-clobber x0]

> > +   mov x0, x26

needed because far_el1 is expected to be in x0 here

Since the purpose of this patch is to make calling a function
possible in this code path, the "extra" mov instruction above
is necessary and IMHO should be added in this patch and not in the
next one whose purpose is to define the ct_user_* macros and
add calls to them in the proper places.

> > b   ret_to_user
> 
> Will

Larry
--
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Please read the FAQ at  http://www.tux.org/lkml/


[PATCH v5 0/2] context tracker support for arm64

2014-05-26 Thread Larry Bassel
Implement and enable context tracking for arm64 (which is
a prerequisite for FULL_NOHZ support). This patchset
builds upon earlier work by Kevin Hilman and is based on
Will Deacon's tree.

Changes v4 to v5:

* Improvement to code restoring far_el1 (suggested by Christopher Covington)
* Improvement to register save/restore in ct_user_enter

Changes v3 to v4:

* Rename parameter of ct_user_exit from save to restore
* Rebased patch to Will Deacon's tree (branch remotes/origin/aarch64
of git://git.kernel.org/pub/scm/linux/kernel/git/will/linux.git)

Changes v2 to v3:

* Save/restore necessary registers in ct_user_enter and ct_user_exit
* Annotate "error paths" out of el0_sync with ct_user_exit

Changes v1 to v2:

* Save far_el1 in x26 temporarily

Larry Bassel (2):
  arm64: adjust el0_sync so that a function can be called
  arm64: enable context tracking

 arch/arm64/Kconfig   |  1 +
 arch/arm64/include/asm/thread_info.h |  1 +
 arch/arm64/kernel/entry.S| 69 
 3 files changed, 64 insertions(+), 7 deletions(-)

-- 
1.8.3.2

--
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Please read the FAQ at  http://www.tux.org/lkml/


[PATCH v5 2/2] arm64: enable context tracking

2014-05-26 Thread Larry Bassel
Make calls to ct_user_enter when the kernel is exited
and ct_user_exit when the kernel is entered (in el0_da,
el0_ia, el0_svc, el0_irq and all of the "error" paths).

These macros expand to function calls which will only work
properly if el0_sync and related code has been rearranged
(in a previous patch of this series).

The calls to ct_user_exit are made after hw debugging has been
enabled (enable_dbg_and_irq).

The call to ct_user_enter is made at the beginning of the
kernel_exit macro.

This patch is based on earlier work by Kevin Hilman.
Save/restore optimizations were also done by Kevin.

Signed-off-by: Kevin Hilman 
Signed-off-by: Larry Bassel 
---
 arch/arm64/Kconfig   |  1 +
 arch/arm64/include/asm/thread_info.h |  1 +
 arch/arm64/kernel/entry.S| 46 
 3 files changed, 48 insertions(+)

diff --git a/arch/arm64/Kconfig b/arch/arm64/Kconfig
index e759af5..ef18ae5 100644
--- a/arch/arm64/Kconfig
+++ b/arch/arm64/Kconfig
@@ -55,6 +55,7 @@ config ARM64
select RTC_LIB
select SPARSE_IRQ
select SYSCTL_EXCEPTION_TRACE
+   select HAVE_CONTEXT_TRACKING
help
  ARM 64-bit (AArch64) Linux support.
 
diff --git a/arch/arm64/include/asm/thread_info.h 
b/arch/arm64/include/asm/thread_info.h
index 720e70b..301ea6a 100644
--- a/arch/arm64/include/asm/thread_info.h
+++ b/arch/arm64/include/asm/thread_info.h
@@ -108,6 +108,7 @@ static inline struct thread_info *current_thread_info(void)
 #define TIF_SINGLESTEP 21
 #define TIF_32BIT  22  /* 32bit process */
 #define TIF_SWITCH_MM  23  /* deferred switch_mm */
+#define TIF_NOHZ24
 
 #define _TIF_SIGPENDING(1 << TIF_SIGPENDING)
 #define _TIF_NEED_RESCHED  (1 << TIF_NEED_RESCHED)
diff --git a/arch/arm64/kernel/entry.S b/arch/arm64/kernel/entry.S
index c6bc1a3..0605963 100644
--- a/arch/arm64/kernel/entry.S
+++ b/arch/arm64/kernel/entry.S
@@ -30,6 +30,42 @@
 #include 
 
 /*
+ * Context tracking subsystem.  Used to instrument transitions
+ * between user and kernel mode.
+ */
+   .macro ct_user_exit, restore = 0
+#ifdef CONFIG_CONTEXT_TRACKING
+   bl  context_tracking_user_exit
+   .if \restore == 1
+   /*
+* Save/restore needed during syscalls.  Restore syscall arguments from
+* the values already saved on stack during kernel_entry.
+*/
+   ldp x0, x1, [sp]
+   ldp x2, x3, [sp, #S_X2]
+   ldp x4, x5, [sp, #S_X4]
+   ldp x6, x7, [sp, #S_X6]
+   .endif
+#endif
+   .endm
+
+   .macro ct_user_enter, save = 0
+#ifdef CONFIG_CONTEXT_TRACKING
+   .if \save == 1
+   /*
+* We only have to save/restore x0 on the fast syscall path where
+* x0 contains the syscall return.
+*/
+   mov x19, x0
+   .endif
+   bl  context_tracking_user_enter
+   .if \save == 1
+   mov x0, x19
+   .endif
+#endif
+   .endm
+
+/*
  * Bad Abort numbers
  *-
  */
@@ -91,6 +127,7 @@
.macro  kernel_exit, el, ret = 0
ldp x21, x22, [sp, #S_PC]   // load ELR, SPSR
.if \el == 0
+   ct_user_enter \ret
ldr x23, [sp, #S_SP]// load return stack pointer
.endif
.if \ret
@@ -318,6 +355,7 @@ el1_irq:
bl  trace_hardirqs_off
 #endif
 
+   ct_user_exit
irq_handler
 
 #ifdef CONFIG_PREEMPT
@@ -427,6 +465,7 @@ el0_da:
mrs x26, far_el1
// enable interrupts before calling the main handler
enable_dbg_and_irq
+   ct_user_exit
bic x0, x26, #(0xff << 56)
mov x1, x25
mov x2, sp
@@ -439,6 +478,7 @@ el0_ia:
mrs x26, far_el1
// enable interrupts before calling the main handler
enable_dbg_and_irq
+   ct_user_exit
mov x0, x26
orr x1, x25, #1 << 24   // use reserved ISS bit for 
instruction aborts
mov x2, sp
@@ -449,6 +489,7 @@ el0_fpsimd_acc:
 * Floating Point or Advanced SIMD access
 */
enable_dbg
+   ct_user_exit
mov x0, x25
mov x1, sp
adr lr, ret_to_user
@@ -458,6 +499,7 @@ el0_fpsimd_exc:
 * Floating Point or Advanced SIMD exception
 */
enable_dbg
+   ct_user_exit
mov x0, x25
mov x1, sp
adr lr, ret_to_user
@@ -480,6 +522,7 @@ el0_undef:
 */
// enable interrupts before calling the main handler
enable_dbg_and_irq
+   ct_user_exit
mov x0, sp
adr lr, ret_to_user
b   do_undefinstr
@@ -494,10 +537,12 @@ el0_dbg:
mov x2, sp
bl  do_debug_exception
enable_dbg
+   ct_user_exit
mov x0, x26
b   ret_to_user
 el0_inv:
enable_dbg
+   ct_user_exit

[PATCH v5 1/2] arm64: adjust el0_sync so that a function can be called

2014-05-26 Thread Larry Bassel
To implement the context tracker properly on arm64,
a function call needs to be made after debugging and
interrupts are turned on, but before the lr is changed
to point to ret_to_user(). If the function call
is made after the lr is changed the function will not
return to the correct place.

For similar reasons, defer the setting of x0 so that
it doesn't need to be saved around the function call
(save far_el1 in x26 temporarily instead).

Signed-off-by: Larry Bassel 
---
 arch/arm64/kernel/entry.S | 23 ---
 1 file changed, 16 insertions(+), 7 deletions(-)

diff --git a/arch/arm64/kernel/entry.S b/arch/arm64/kernel/entry.S
index e8b23a3..c6bc1a3 100644
--- a/arch/arm64/kernel/entry.S
+++ b/arch/arm64/kernel/entry.S
@@ -354,7 +354,6 @@ el0_sync:
lsr x24, x25, #ESR_EL1_EC_SHIFT // exception class
cmp x24, #ESR_EL1_EC_SVC64  // SVC in 64-bit state
b.eqel0_svc
-   adr lr, ret_to_user
cmp x24, #ESR_EL1_EC_DABT_EL0   // data abort in EL0
b.eqel0_da
cmp x24, #ESR_EL1_EC_IABT_EL0   // instruction abort in EL0
@@ -383,7 +382,6 @@ el0_sync_compat:
lsr x24, x25, #ESR_EL1_EC_SHIFT // exception class
cmp x24, #ESR_EL1_EC_SVC32  // SVC in 32-bit state
b.eqel0_svc_compat
-   adr lr, ret_to_user
cmp x24, #ESR_EL1_EC_DABT_EL0   // data abort in EL0
b.eqel0_da
cmp x24, #ESR_EL1_EC_IABT_EL0   // instruction abort in EL0
@@ -426,22 +424,25 @@ el0_da:
/*
 * Data abort handling
 */
-   mrs x0, far_el1
-   bic x0, x0, #(0xff << 56)
+   mrs x26, far_el1
// enable interrupts before calling the main handler
enable_dbg_and_irq
+   bic x0, x26, #(0xff << 56)
mov x1, x25
mov x2, sp
+   adr lr, ret_to_user
b   do_mem_abort
 el0_ia:
/*
 * Instruction abort handling
 */
-   mrs x0, far_el1
+   mrs x26, far_el1
// enable interrupts before calling the main handler
enable_dbg_and_irq
+   mov x0, x26
orr x1, x25, #1 << 24   // use reserved ISS bit for 
instruction aborts
mov x2, sp
+   adr lr, ret_to_user
b   do_mem_abort
 el0_fpsimd_acc:
/*
@@ -450,6 +451,7 @@ el0_fpsimd_acc:
enable_dbg
mov x0, x25
mov x1, sp
+   adr lr, ret_to_user
b   do_fpsimd_acc
 el0_fpsimd_exc:
/*
@@ -458,16 +460,19 @@ el0_fpsimd_exc:
enable_dbg
mov x0, x25
mov x1, sp
+   adr lr, ret_to_user
b   do_fpsimd_exc
 el0_sp_pc:
/*
 * Stack or PC alignment exception handling
 */
-   mrs x0, far_el1
+   mrs x26, far_el1
// enable interrupts before calling the main handler
enable_dbg_and_irq
+   mov x0, x26
mov x1, x25
mov x2, sp
+   adr lr, ret_to_user
b   do_sp_pc_abort
 el0_undef:
/*
@@ -476,23 +481,27 @@ el0_undef:
// enable interrupts before calling the main handler
enable_dbg_and_irq
mov x0, sp
+   adr lr, ret_to_user
b   do_undefinstr
 el0_dbg:
/*
 * Debug exception handling
 */
tbnzx24, #0, el0_inv// EL0 only
-   mrs x0, far_el1
+   mrs x26, far_el1
+   mov x0, x26
mov x1, x25
mov x2, sp
bl  do_debug_exception
enable_dbg
+   mov x0, x26
b   ret_to_user
 el0_inv:
enable_dbg
mov x0, sp
mov x1, #BAD_SYNC
mrs x2, esr_el1
+   adr lr, ret_to_user
b   bad_mode
 ENDPROC(el0_sync)
 
-- 
1.8.3.2

--
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Please read the FAQ at  http://www.tux.org/lkml/


Re: [PATCH v4 1/2] arm64: adjust el0_sync so that a function can be called

2014-05-23 Thread Larry Bassel
On 23 May 14 15:44, Catalin Marinas wrote:
> On Thu, May 22, 2014 at 11:35:20PM +0100, Larry Bassel wrote:
> > > On 05/22/2014 03:27 PM, Larry Bassel wrote:
> > > > To implement the context tracker properly on arm64,
> > > > a function call needs to be made after debugging and
> > > > interrupts are turned on, but before the lr is changed
> > > > to point to ret_to_user(). If the function call
> > > > is made after the lr is changed the function will not
> > > > return to the correct place.
> > > > 
> > > > For similar reasons, defer the setting of x0 so that
> > > > it doesn't need to be saved around the function call
> > > > (save far_el1 in x26 temporarily instead).
> > > > 
> > > > Signed-off-by: Larry Bassel 
> > > > ---
> > > >  arch/arm64/kernel/entry.S | 24 +---
> > > >  1 file changed, 17 insertions(+), 7 deletions(-)
> > > > 
> > > > diff --git a/arch/arm64/kernel/entry.S b/arch/arm64/kernel/entry.S
> > > > index e8b23a3..20b336e 100644
> > > > --- a/arch/arm64/kernel/entry.S
> > > > +++ b/arch/arm64/kernel/entry.S
> > > > @@ -354,7 +354,6 @@ el0_sync:
> > > > lsr x24, x25, #ESR_EL1_EC_SHIFT // exception class
> > > > cmp x24, #ESR_EL1_EC_SVC64  // SVC in 64-bit state
> > > > b.eqel0_svc
> > > > -   adr lr, ret_to_user
> > > > cmp x24, #ESR_EL1_EC_DABT_EL0   // data abort in EL0
> > > > b.eqel0_da
> > > > cmp x24, #ESR_EL1_EC_IABT_EL0   // instruction abort in 
> > > > EL0
> > > > @@ -383,7 +382,6 @@ el0_sync_compat:
> > > > lsr x24, x25, #ESR_EL1_EC_SHIFT // exception class
> > > > cmp x24, #ESR_EL1_EC_SVC32  // SVC in 32-bit state
> > > > b.eqel0_svc_compat
> > > > -   adr lr, ret_to_user
> > > > cmp x24, #ESR_EL1_EC_DABT_EL0   // data abort in EL0
> > > > b.eqel0_da
> > > > cmp x24, #ESR_EL1_EC_IABT_EL0   // instruction abort in 
> > > > EL0
> > > > @@ -426,22 +424,26 @@ el0_da:
> > > > /*
> > > >  * Data abort handling
> > > >  */
> > > > -   mrs x0, far_el1
> > > > -   bic x0, x0, #(0xff << 56)
> > > > +   mrs x26, far_el1
> > > > // enable interrupts before calling the main handler
> > > > enable_dbg_and_irq
> > > > +   mov x0, x26
> > > > +   bic x0, x0, #(0xff << 56)
> > > 
> > > Nit: I believe you can bit clear with x26 as the source register and omit 
> > > the
> > > move instruction.
> > 
> > Is that really an improvement (assuming it works)? Are we saving
> > any cycles here? If so, does it matter? It is easy to see what
> > the move instruction is doing.
> 
> Even if it's not noticeable, I would still reduce the number of lines by
> one. BIC with immediate is just an alias for AND and it supports
> different source and destination.

Ack.

> 
> -- 
> Catalin

Larry
--
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Please read the FAQ at  http://www.tux.org/lkml/


Re: [PATCH v4 1/2] arm64: adjust el0_sync so that a function can be called

2014-05-22 Thread Larry Bassel
On 22 May 14 16:23, Christopher Covington wrote:
> Hi Larry,
> 
> On 05/22/2014 03:27 PM, Larry Bassel wrote:
> > To implement the context tracker properly on arm64,
> > a function call needs to be made after debugging and
> > interrupts are turned on, but before the lr is changed
> > to point to ret_to_user(). If the function call
> > is made after the lr is changed the function will not
> > return to the correct place.
> > 
> > For similar reasons, defer the setting of x0 so that
> > it doesn't need to be saved around the function call
> > (save far_el1 in x26 temporarily instead).
> > 
> > Signed-off-by: Larry Bassel 
> > ---
> >  arch/arm64/kernel/entry.S | 24 +---
> >  1 file changed, 17 insertions(+), 7 deletions(-)
> > 
> > diff --git a/arch/arm64/kernel/entry.S b/arch/arm64/kernel/entry.S
> > index e8b23a3..20b336e 100644
> > --- a/arch/arm64/kernel/entry.S
> > +++ b/arch/arm64/kernel/entry.S
> > @@ -354,7 +354,6 @@ el0_sync:
> > lsr x24, x25, #ESR_EL1_EC_SHIFT // exception class
> > cmp x24, #ESR_EL1_EC_SVC64  // SVC in 64-bit state
> > b.eqel0_svc
> > -   adr lr, ret_to_user
> > cmp x24, #ESR_EL1_EC_DABT_EL0   // data abort in EL0
> > b.eqel0_da
> > cmp x24, #ESR_EL1_EC_IABT_EL0   // instruction abort in EL0
> > @@ -383,7 +382,6 @@ el0_sync_compat:
> > lsr x24, x25, #ESR_EL1_EC_SHIFT // exception class
> > cmp x24, #ESR_EL1_EC_SVC32  // SVC in 32-bit state
> > b.eqel0_svc_compat
> > -   adr lr, ret_to_user
> > cmp x24, #ESR_EL1_EC_DABT_EL0   // data abort in EL0
> > b.eqel0_da
> > cmp x24, #ESR_EL1_EC_IABT_EL0   // instruction abort in EL0
> > @@ -426,22 +424,26 @@ el0_da:
> > /*
> >  * Data abort handling
> >  */
> > -   mrs x0, far_el1
> > -   bic x0, x0, #(0xff << 56)
> > +   mrs x26, far_el1
> > // enable interrupts before calling the main handler
> > enable_dbg_and_irq
> > +   mov x0, x26
> > +   bic x0, x0, #(0xff << 56)
> 
> Nit: I believe you can bit clear with x26 as the source register and omit the
> move instruction.

Is that really an improvement (assuming it works)? Are we saving
any cycles here? If so, does it matter? It is easy to see what
the move instruction is doing.

> 
> Regards,
> Christopher
> 
> -- 
> Employee of Qualcomm Innovation Center, Inc.
> Qualcomm Innovation Center, Inc. is a member of Code Aurora Forum,
> hosted by the Linux Foundation.

Larry
--
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Please read the FAQ at  http://www.tux.org/lkml/


[PATCH v4 0/2] context tracker support for arm64

2014-05-22 Thread Larry Bassel
Implement and enable context tracking for arm64 (which is
a prerequisite for FULL_NOHZ support). This patchset
builds upon earlier work by Kevin Hilman and is based on
Will Deacon's tree.

Changes v3 to v4:

* Rename parameter of ct_user_exit from save to restore
* Rebased patch to Will Deacon's tree (branch remotes/origin/aarch64
of git://git.kernel.org/pub/scm/linux/kernel/git/will/linux.git)

Changes v2 to v3:

* Save/restore necessary registers in ct_user_enter and ct_user_exit
* Annotate "error paths" out of el0_sync with ct_user_exit

Changes v1 to v2:

* Save far_el1 in x26 temporarily

Larry Bassel (2):
  arm64: adjust el0_sync so that a function can be called
  arm64: enable context tracking

 arch/arm64/Kconfig   |  1 +
 arch/arm64/include/asm/thread_info.h |  1 +
 arch/arm64/kernel/entry.S| 72 
 3 files changed, 67 insertions(+), 7 deletions(-)

-- 
1.8.3.2

--
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Please read the FAQ at  http://www.tux.org/lkml/


[PATCH v4 2/2] arm64: enable context tracking

2014-05-22 Thread Larry Bassel
Make calls to ct_user_enter when the kernel is exited
and ct_user_exit when the kernel is entered (in el0_da,
el0_ia, el0_svc, el0_irq and all of the "error" paths).

These macros expand to function calls which will only work
properly if el0_sync and related code has been rearranged
(in a previous patch of this series).

The calls to ct_user_exit are made after hw debugging has been
enabled (enable_dbg_and_irq).

The call to ct_user_enter is made at the beginning of the
kernel_exit macro.

This patch is based on earlier work by Kevin Hilman.
Save/restore optimizations were also done by Kevin.

Signed-off-by: Kevin Hilman 
Signed-off-by: Larry Bassel 
---
 arch/arm64/Kconfig   |  1 +
 arch/arm64/include/asm/thread_info.h |  1 +
 arch/arm64/kernel/entry.S| 48 
 3 files changed, 50 insertions(+)

diff --git a/arch/arm64/Kconfig b/arch/arm64/Kconfig
index e759af5..ef18ae5 100644
--- a/arch/arm64/Kconfig
+++ b/arch/arm64/Kconfig
@@ -55,6 +55,7 @@ config ARM64
select RTC_LIB
select SPARSE_IRQ
select SYSCTL_EXCEPTION_TRACE
+   select HAVE_CONTEXT_TRACKING
help
  ARM 64-bit (AArch64) Linux support.
 
diff --git a/arch/arm64/include/asm/thread_info.h 
b/arch/arm64/include/asm/thread_info.h
index 720e70b..301ea6a 100644
--- a/arch/arm64/include/asm/thread_info.h
+++ b/arch/arm64/include/asm/thread_info.h
@@ -108,6 +108,7 @@ static inline struct thread_info *current_thread_info(void)
 #define TIF_SINGLESTEP 21
 #define TIF_32BIT  22  /* 32bit process */
 #define TIF_SWITCH_MM  23  /* deferred switch_mm */
+#define TIF_NOHZ24
 
 #define _TIF_SIGPENDING(1 << TIF_SIGPENDING)
 #define _TIF_NEED_RESCHED  (1 << TIF_NEED_RESCHED)
diff --git a/arch/arm64/kernel/entry.S b/arch/arm64/kernel/entry.S
index 20b336e..520da4c 100644
--- a/arch/arm64/kernel/entry.S
+++ b/arch/arm64/kernel/entry.S
@@ -30,6 +30,44 @@
 #include 
 
 /*
+ * Context tracking subsystem.  Used to instrument transitions
+ * between user and kernel mode.
+ */
+   .macro ct_user_exit, restore = 0
+#ifdef CONFIG_CONTEXT_TRACKING
+   bl  context_tracking_user_exit
+   .if \restore == 1
+   /*
+* Save/restore needed during syscalls.  Restore syscall arguments from
+* the values already saved on stack during kernel_entry.
+*/
+   ldp x0, x1, [sp]
+   ldp x2, x3, [sp, #S_X2]
+   ldp x4, x5, [sp, #S_X4]
+   ldp x6, x7, [sp, #S_X6]
+   .endif
+#endif
+   .endm
+
+   .macro ct_user_enter, save = 0
+#ifdef CONFIG_CONTEXT_TRACKING
+   .if \save == 1
+   /*
+* Save/restore only needed on syscall fastpath, which uses
+* x0-x2.
+*/
+   pushx2, x3
+   pushx0, x1
+   .endif
+   bl  context_tracking_user_enter
+   .if \save == 1
+   pop x0, x1
+   pop x2, x3
+   .endif
+#endif
+   .endm
+
+/*
  * Bad Abort numbers
  *-
  */
@@ -91,6 +129,7 @@
.macro  kernel_exit, el, ret = 0
ldp x21, x22, [sp, #S_PC]   // load ELR, SPSR
.if \el == 0
+   ct_user_enter \ret
ldr x23, [sp, #S_SP]// load return stack pointer
.endif
.if \ret
@@ -318,6 +357,7 @@ el1_irq:
bl  trace_hardirqs_off
 #endif
 
+   ct_user_exit
irq_handler
 
 #ifdef CONFIG_PREEMPT
@@ -427,6 +467,7 @@ el0_da:
mrs x26, far_el1
// enable interrupts before calling the main handler
enable_dbg_and_irq
+   ct_user_exit
mov x0, x26
bic x0, x0, #(0xff << 56)
mov x1, x25
@@ -440,6 +481,7 @@ el0_ia:
mrs x26, far_el1
// enable interrupts before calling the main handler
enable_dbg_and_irq
+   ct_user_exit
mov x0, x26
orr x1, x25, #1 << 24   // use reserved ISS bit for 
instruction aborts
mov x2, sp
@@ -450,6 +492,7 @@ el0_fpsimd_acc:
 * Floating Point or Advanced SIMD access
 */
enable_dbg
+   ct_user_exit
mov x0, x25
mov x1, sp
adr lr, ret_to_user
@@ -459,6 +502,7 @@ el0_fpsimd_exc:
 * Floating Point or Advanced SIMD exception
 */
enable_dbg
+   ct_user_exit
mov x0, x25
mov x1, sp
adr lr, ret_to_user
@@ -481,6 +525,7 @@ el0_undef:
 */
// enable interrupts before calling the main handler
enable_dbg_and_irq
+   ct_user_exit
mov x0, sp
adr lr, ret_to_user
b   do_undefinstr
@@ -495,10 +540,12 @@ el0_dbg:
mov x2, sp
bl  do_debug_exception
enable_dbg
+   ct_user_exit
mov x0, x26
b   ret_to_user
 el0_inv:
enable_dbg
+   c

[PATCH v4 1/2] arm64: adjust el0_sync so that a function can be called

2014-05-22 Thread Larry Bassel
To implement the context tracker properly on arm64,
a function call needs to be made after debugging and
interrupts are turned on, but before the lr is changed
to point to ret_to_user(). If the function call
is made after the lr is changed the function will not
return to the correct place.

For similar reasons, defer the setting of x0 so that
it doesn't need to be saved around the function call
(save far_el1 in x26 temporarily instead).

Signed-off-by: Larry Bassel 
---
 arch/arm64/kernel/entry.S | 24 +---
 1 file changed, 17 insertions(+), 7 deletions(-)

diff --git a/arch/arm64/kernel/entry.S b/arch/arm64/kernel/entry.S
index e8b23a3..20b336e 100644
--- a/arch/arm64/kernel/entry.S
+++ b/arch/arm64/kernel/entry.S
@@ -354,7 +354,6 @@ el0_sync:
lsr x24, x25, #ESR_EL1_EC_SHIFT // exception class
cmp x24, #ESR_EL1_EC_SVC64  // SVC in 64-bit state
b.eqel0_svc
-   adr lr, ret_to_user
cmp x24, #ESR_EL1_EC_DABT_EL0   // data abort in EL0
b.eqel0_da
cmp x24, #ESR_EL1_EC_IABT_EL0   // instruction abort in EL0
@@ -383,7 +382,6 @@ el0_sync_compat:
lsr x24, x25, #ESR_EL1_EC_SHIFT // exception class
cmp x24, #ESR_EL1_EC_SVC32  // SVC in 32-bit state
b.eqel0_svc_compat
-   adr lr, ret_to_user
cmp x24, #ESR_EL1_EC_DABT_EL0   // data abort in EL0
b.eqel0_da
cmp x24, #ESR_EL1_EC_IABT_EL0   // instruction abort in EL0
@@ -426,22 +424,26 @@ el0_da:
/*
 * Data abort handling
 */
-   mrs x0, far_el1
-   bic x0, x0, #(0xff << 56)
+   mrs x26, far_el1
// enable interrupts before calling the main handler
enable_dbg_and_irq
+   mov x0, x26
+   bic x0, x0, #(0xff << 56)
mov x1, x25
mov x2, sp
+   adr lr, ret_to_user
b   do_mem_abort
 el0_ia:
/*
 * Instruction abort handling
 */
-   mrs x0, far_el1
+   mrs x26, far_el1
// enable interrupts before calling the main handler
enable_dbg_and_irq
+   mov x0, x26
orr x1, x25, #1 << 24   // use reserved ISS bit for 
instruction aborts
mov x2, sp
+   adr lr, ret_to_user
b   do_mem_abort
 el0_fpsimd_acc:
/*
@@ -450,6 +452,7 @@ el0_fpsimd_acc:
enable_dbg
mov x0, x25
mov x1, sp
+   adr lr, ret_to_user
b   do_fpsimd_acc
 el0_fpsimd_exc:
/*
@@ -458,16 +461,19 @@ el0_fpsimd_exc:
enable_dbg
mov x0, x25
mov x1, sp
+   adr lr, ret_to_user
b   do_fpsimd_exc
 el0_sp_pc:
/*
 * Stack or PC alignment exception handling
 */
-   mrs x0, far_el1
+   mrs x26, far_el1
// enable interrupts before calling the main handler
enable_dbg_and_irq
+   mov x0, x26
mov x1, x25
mov x2, sp
+   adr lr, ret_to_user
b   do_sp_pc_abort
 el0_undef:
/*
@@ -476,23 +482,27 @@ el0_undef:
// enable interrupts before calling the main handler
enable_dbg_and_irq
mov x0, sp
+   adr lr, ret_to_user
b   do_undefinstr
 el0_dbg:
/*
 * Debug exception handling
 */
tbnzx24, #0, el0_inv// EL0 only
-   mrs x0, far_el1
+   mrs x26, far_el1
+   mov x0, x26
mov x1, x25
mov x2, sp
bl  do_debug_exception
enable_dbg
+   mov x0, x26
b   ret_to_user
 el0_inv:
enable_dbg
mov x0, sp
mov x1, #BAD_SYNC
mrs x2, esr_el1
+   adr lr, ret_to_user
b   bad_mode
 ENDPROC(el0_sync)
 
-- 
1.8.3.2

--
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Please read the FAQ at  http://www.tux.org/lkml/


[PATCH v3] arm64: Support arch_irq_work_raise() via self IPIs

2014-05-12 Thread Larry Bassel
80 to 0xde05dec8)
de80: 0001 0001  de05b440 c082afac de057ac0 de057ac0 
de0443c0
dea0:     c082afbc de05dec8 c009f2a0 
c051c778
dec0: 2113 
[] (__irq_svc+0x44/0x5c) from [] 
(_raw_spin_unlock_irq+0x28/0x2c)
[] (_raw_spin_unlock_irq+0x28/0x2c) from [] 
(proc_alloc_inum+0x30/0xa8)
[] (proc_alloc_inum+0x30/0xa8) from [] 
(proc_register+0x18/0x130)
[] (proc_register+0x18/0x130) from [] 
(proc_mkdir_data+0x44/0x6c)
[] (proc_mkdir_data+0x44/0x6c) from [] 
(register_irq_proc+0x6c/0x128)
[] (register_irq_proc+0x6c/0x128) from [] 
(init_irq_proc+0x74/0xb0)
[] (init_irq_proc+0x74/0xb0) from [] 
(kernel_init_freeable+0x84/0x1c8)
[] (kernel_init_freeable+0x84/0x1c8) from [] 
(kernel_init+0x8/0x150)
[] (kernel_init+0x8/0x150) from [] 
(ret_from_fork+0x14/0x2c)
Code: bad PC value

Fixes: bf18525fd79 "ARM: 7872/1: Support arch_irq_work_raise() via self 
IPIs"

Reported-by: Olof Johansson 
Signed-off-by: Stephen Boyd 
Tested-by: Olof Johansson 
Signed-off-by: Russell King 

Changes v2 to v3:

* Do not call is_smp() as this is only defined on arm32

Changes v1 to v2:

* Include ARM 7887/1 bugfix

Signed-off-by: Larry Bassel 
Reviewed-by: Kevin Hilman 
---
 arch/arm64/include/asm/hardirq.h |  2 +-
 arch/arm64/kernel/smp.c  | 19 +++
 2 files changed, 20 insertions(+), 1 deletion(-)

diff --git a/arch/arm64/include/asm/hardirq.h b/arch/arm64/include/asm/hardirq.h
index ae4801d..0be6782 100644
--- a/arch/arm64/include/asm/hardirq.h
+++ b/arch/arm64/include/asm/hardirq.h
@@ -20,7 +20,7 @@
 #include 
 #include 
 
-#define NR_IPI 5
+#define NR_IPI 6
 
 typedef struct {
unsigned int __softirq_pending;
diff --git a/arch/arm64/kernel/smp.c b/arch/arm64/kernel/smp.c
index f0a141d..049aa8d 100644
--- a/arch/arm64/kernel/smp.c
+++ b/arch/arm64/kernel/smp.c
@@ -35,6 +35,7 @@
 #include 
 #include 
 #include 
+#include 
 
 #include 
 #include 
@@ -62,6 +63,7 @@ enum ipi_msg_type {
IPI_CALL_FUNC_SINGLE,
IPI_CPU_STOP,
IPI_TIMER,
+   IPI_IRQ_WORK,
 };
 
 /*
@@ -455,6 +457,14 @@ void arch_send_call_function_single_ipi(int cpu)
smp_cross_call(cpumask_of(cpu), IPI_CALL_FUNC_SINGLE);
 }
 
+#ifdef CONFIG_IRQ_WORK
+void arch_irq_work_raise(void)
+{
+   if (smp_cross_call)
+   smp_cross_call(cpumask_of(smp_processor_id()), IPI_IRQ_WORK);
+}
+#endif
+
 static const char *ipi_types[NR_IPI] = {
 #define S(x,s) [x - IPI_RESCHEDULE] = s
S(IPI_RESCHEDULE, "Rescheduling interrupts"),
@@ -462,6 +472,7 @@ static const char *ipi_types[NR_IPI] = {
S(IPI_CALL_FUNC_SINGLE, "Single function call interrupts"),
S(IPI_CPU_STOP, "CPU stop interrupts"),
S(IPI_TIMER, "Timer broadcast interrupts"),
+   S(IPI_IRQ_WORK, "IRQ work interrupts"),
 };
 
 void show_ipi_list(struct seq_file *p, int prec)
@@ -554,6 +565,14 @@ void handle_IPI(int ipinr, struct pt_regs *regs)
break;
 #endif
 
+#ifdef CONFIG_IRQ_WORK
+   case IPI_IRQ_WORK:
+   irq_enter();
+   irq_work_run();
+   irq_exit();
+   break;
+#endif
+
default:
pr_crit("CPU%u: Unknown IPI message 0x%x\n", cpu, ipinr);
break;
-- 
1.8.3.2

--
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Please read the FAQ at  http://www.tux.org/lkml/


Re: [PATCH v2] arm64: Support arch_irq_work_raise() via self IPIs

2014-05-12 Thread Larry Bassel
On 12 May 14 10:29, Will Deacon wrote:
> On Sat, May 10, 2014 at 11:23:41PM +0100, Larry Bassel wrote:
> > Support for arch_irq_work_raise() was missing from
> > arm64 (a prerequisite for FULL_NOHZ).
> 
> [...]
> 
> > @@ -455,6 +457,14 @@ void arch_send_call_function_single_ipi(int cpu)
> > smp_cross_call(cpumask_of(cpu), IPI_CALL_FUNC_SINGLE);
> >  }
> >  
> > +#ifdef CONFIG_IRQ_WORK
> > +void arch_irq_work_raise(void)
> > +{
> > +   if (is_smp())
> > +   smp_cross_call(cpumask_of(smp_processor_id()), IPI_IRQ_WORK);
> > +}
> > +#endif
> 
> Does this even compile? We're probably better off just checking whether or
> not smp_cross_call is NULL.

No it doesn't (I incorrectly assumed that is_smp() was generic, not
arm32 specific and so I didn't compile this before submitting).

I've verified that your suggestion compiles and runs properly
and will resubmit.

Thanks for catching this.

> 
> Will

Larry
--
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Please read the FAQ at  http://www.tux.org/lkml/


[PATCH v2] arm64: Support arch_irq_work_raise() via self IPIs

2014-05-10 Thread Larry Bassel
ption stack(0xde05de80 to 0xde05dec8)
de80: 0001 0001  de05b440 c082afac de057ac0 de057ac0 
de0443c0
dea0:     c082afbc de05dec8 c009f2a0 
c051c778
dec0: 2113 
[] (__irq_svc+0x44/0x5c) from [] 
(_raw_spin_unlock_irq+0x28/0x2c)
[] (_raw_spin_unlock_irq+0x28/0x2c) from [] 
(proc_alloc_inum+0x30/0xa8)
[] (proc_alloc_inum+0x30/0xa8) from [] 
(proc_register+0x18/0x130)
[] (proc_register+0x18/0x130) from [] 
(proc_mkdir_data+0x44/0x6c)
[] (proc_mkdir_data+0x44/0x6c) from [] 
(register_irq_proc+0x6c/0x128)
[] (register_irq_proc+0x6c/0x128) from [] 
(init_irq_proc+0x74/0xb0)
[] (init_irq_proc+0x74/0xb0) from [] 
(kernel_init_freeable+0x84/0x1c8)
[] (kernel_init_freeable+0x84/0x1c8) from [] 
(kernel_init+0x8/0x150)
[] (kernel_init+0x8/0x150) from [] 
(ret_from_fork+0x14/0x2c)
Code: bad PC value

Fixes: bf18525fd79 "ARM: 7872/1: Support arch_irq_work_raise() via self 
IPIs"

Reported-by: Olof Johansson 
Signed-off-by: Stephen Boyd 
Tested-by: Olof Johansson 
Signed-off-by: Russell King 

Changes v1 to v2:

* Include ARM 7887/1 bugfix

Signed-off-by: Larry Bassel 
Reviewed-by: Kevin Hilman 
---
 arch/arm64/include/asm/hardirq.h |  2 +-
 arch/arm64/kernel/smp.c  | 19 +++
 2 files changed, 20 insertions(+), 1 deletion(-)

diff --git a/arch/arm64/include/asm/hardirq.h b/arch/arm64/include/asm/hardirq.h
index ae4801d..0be6782 100644
--- a/arch/arm64/include/asm/hardirq.h
+++ b/arch/arm64/include/asm/hardirq.h
@@ -20,7 +20,7 @@
 #include 
 #include 
 
-#define NR_IPI 5
+#define NR_IPI 6
 
 typedef struct {
unsigned int __softirq_pending;
diff --git a/arch/arm64/kernel/smp.c b/arch/arm64/kernel/smp.c
index f0a141d..78c3f97 100644
--- a/arch/arm64/kernel/smp.c
+++ b/arch/arm64/kernel/smp.c
@@ -35,6 +35,7 @@
 #include 
 #include 
 #include 
+#include 
 
 #include 
 #include 
@@ -62,6 +63,7 @@ enum ipi_msg_type {
IPI_CALL_FUNC_SINGLE,
IPI_CPU_STOP,
IPI_TIMER,
+   IPI_IRQ_WORK,
 };
 
 /*
@@ -455,6 +457,14 @@ void arch_send_call_function_single_ipi(int cpu)
smp_cross_call(cpumask_of(cpu), IPI_CALL_FUNC_SINGLE);
 }
 
+#ifdef CONFIG_IRQ_WORK
+void arch_irq_work_raise(void)
+{
+   if (is_smp())
+   smp_cross_call(cpumask_of(smp_processor_id()), IPI_IRQ_WORK);
+}
+#endif
+
 static const char *ipi_types[NR_IPI] = {
 #define S(x,s) [x - IPI_RESCHEDULE] = s
S(IPI_RESCHEDULE, "Rescheduling interrupts"),
@@ -462,6 +472,7 @@ static const char *ipi_types[NR_IPI] = {
S(IPI_CALL_FUNC_SINGLE, "Single function call interrupts"),
S(IPI_CPU_STOP, "CPU stop interrupts"),
S(IPI_TIMER, "Timer broadcast interrupts"),
+   S(IPI_IRQ_WORK, "IRQ work interrupts"),
 };
 
 void show_ipi_list(struct seq_file *p, int prec)
@@ -554,6 +565,14 @@ void handle_IPI(int ipinr, struct pt_regs *regs)
break;
 #endif
 
+#ifdef CONFIG_IRQ_WORK
+   case IPI_IRQ_WORK:
+   irq_enter();
+   irq_work_run();
+   irq_exit();
+   break;
+#endif
+
default:
pr_crit("CPU%u: Unknown IPI message 0x%x\n", cpu, ipinr);
break;
-- 
1.8.3.2

--
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Please read the FAQ at  http://www.tux.org/lkml/


Re: [PATCH] arm64: Support arch_irq_work_raise() via self IPIs

2014-05-10 Thread Larry Bassel
On 09 May 14 16:57, Catalin Marinas wrote:
> On Mon, May 05, 2014 at 09:48:27PM +0100, Larry Bassel wrote:
> > Support for arch_irq_work_raise() was missing from
> > arm64 (a prerequisite for FULL_NOHZ).
> > 
> > This patch is based on the arm32 patch ARM 7872/1
> > which ports cleanly.
> [...]
> > +#ifdef CONFIG_IRQ_WORK
> > +void arch_irq_work_raise(void)
> > +{
> > +   smp_cross_call(cpumask_of(smp_processor_id()), IPI_IRQ_WORK);
> > +}
> > +#endif
> 
> There was a subsequent patch adding is_smp() check here (c682e51dbc98
> ARM: 7887/1: Don't smp_cross_call() on UP devices in
> arch_irq_work_raise()). Don't we need it?

I will look into this. Thanks.

> 
> -- 
> Catalin

Larry
--
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Please read the FAQ at  http://www.tux.org/lkml/


[PATCH v3 1/2] arm64: adjust el0_sync so that a function can be called

2014-05-09 Thread Larry Bassel
To implement the context tracker properly on arm64,
a function call needs to be made after debugging and
interrupts are turned on, but before the lr is changed
to point to ret_from_exception(). If the function call
is made after the lr is changed the function will not
return to the correct place.

For similar reasons, defer the setting of x0 so that
it doesn't need to be saved around the function call
(save far_el1 in x26 temporarily instead).

Signed-off-by: Larry Bassel 
Reviewed-by: Kevin Hilman 
---
 arch/arm64/kernel/entry.S | 27 +++
 1 file changed, 19 insertions(+), 8 deletions(-)

diff --git a/arch/arm64/kernel/entry.S b/arch/arm64/kernel/entry.S
index 39ac630..136bb7d 100644
--- a/arch/arm64/kernel/entry.S
+++ b/arch/arm64/kernel/entry.S
@@ -349,7 +349,6 @@ el0_sync:
lsr x24, x25, #ESR_EL1_EC_SHIFT // exception class
cmp x24, #ESR_EL1_EC_SVC64  // SVC in 64-bit state
b.eqel0_svc
-   adr lr, ret_from_exception
cmp x24, #ESR_EL1_EC_DABT_EL0   // data abort in EL0
b.eqel0_da
cmp x24, #ESR_EL1_EC_IABT_EL0   // instruction abort in EL0
@@ -378,7 +377,6 @@ el0_sync_compat:
lsr x24, x25, #ESR_EL1_EC_SHIFT // exception class
cmp x24, #ESR_EL1_EC_SVC32  // SVC in 32-bit state
b.eqel0_svc_compat
-   adr lr, ret_from_exception
cmp x24, #ESR_EL1_EC_DABT_EL0   // data abort in EL0
b.eqel0_da
cmp x24, #ESR_EL1_EC_IABT_EL0   // instruction abort in EL0
@@ -421,28 +419,32 @@ el0_da:
/*
 * Data abort handling
 */
-   mrs x0, far_el1
-   bic x0, x0, #(0xff << 56)
+   mrs x26, far_el1
disable_step x1
isb
enable_dbg
// enable interrupts before calling the main handler
enable_irq
+   mov x0, x26
+   bic x0, x0, #(0xff << 56)
mov x1, x25
mov x2, sp
+   adr lr, ret_from_exception
b   do_mem_abort
 el0_ia:
/*
 * Instruction abort handling
 */
-   mrs x0, far_el1
+   mrs x26, far_el1
disable_step x1
isb
enable_dbg
// enable interrupts before calling the main handler
enable_irq
+   mov x0, x26
orr x1, x25, #1 << 24   // use reserved ISS bit for 
instruction aborts
mov x2, sp
+   adr lr, ret_from_exception
b   do_mem_abort
 el0_fpsimd_acc:
/*
@@ -450,6 +452,7 @@ el0_fpsimd_acc:
 */
mov x0, x25
mov x1, sp
+   adr lr, ret_from_exception
b   do_fpsimd_acc
 el0_fpsimd_exc:
/*
@@ -457,42 +460,50 @@ el0_fpsimd_exc:
 */
mov x0, x25
mov x1, sp
+   adr lr, ret_from_exception
b   do_fpsimd_exc
 el0_sp_pc:
/*
 * Stack or PC alignment exception handling
 */
-   mrs x0, far_el1
+   mrs x26, far_el1
disable_step x1
isb
enable_dbg
// enable interrupts before calling the main handler
enable_irq
+   mov x0, x26
mov x1, x25
mov x2, sp
+   adr lr, ret_from_exception
b   do_sp_pc_abort
 el0_undef:
/*
 * Undefined instruction
 */
-   mov x0, sp
+   mov x26, sp
// enable interrupts before calling the main handler
enable_irq
+   mov x0, x26
+   adr lr, ret_from_exception
b   do_undefinstr
 el0_dbg:
/*
 * Debug exception handling
 */
tbnzx24, #0, el0_inv// EL0 only
-   mrs x0, far_el1
+   mrs x26, far_el1
disable_step x1
+   mov x0, x26
mov x1, x25
mov x2, sp
+   adr lr, ret_from_exception
b   do_debug_exception
 el0_inv:
mov x0, sp
mov x1, #BAD_SYNC
mrs x2, esr_el1
+   adr lr, ret_from_exception
b   bad_mode
 ENDPROC(el0_sync)
 
-- 
1.8.3.2

--
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Please read the FAQ at  http://www.tux.org/lkml/


[PATCH v3 2/2] arm64: enable context tracking

2014-05-09 Thread Larry Bassel
Make calls to ct_user_enter when the kernel is exited
and ct_user_exit when the kernel is entered (in el0_da,
el0_ia, el0_svc, el0_irq and all of the "error" paths).

These macros expand to function calls which will only work
properly if el0_sync and related code has been rearranged
(in a previous patch of this series).

The calls to ct_user_exit are made after hw debugging has been
enabled (enable_dbg).

The call to ct_user_enter is made at the beginning of the
kernel_exit macro.

This patch is based on earlier work by Kevin Hilman.

Signed-off-by: Kevin Hilman 
Signed-off-by: Larry Bassel 
---
 arch/arm64/Kconfig   |  1 +
 arch/arm64/include/asm/thread_info.h |  1 +
 arch/arm64/kernel/entry.S| 49 
 3 files changed, 51 insertions(+)

diff --git a/arch/arm64/Kconfig b/arch/arm64/Kconfig
index e6e4d37..152d92b 100644
--- a/arch/arm64/Kconfig
+++ b/arch/arm64/Kconfig
@@ -55,6 +55,7 @@ config ARM64
select RTC_LIB
select SPARSE_IRQ
select SYSCTL_EXCEPTION_TRACE
+   select HAVE_CONTEXT_TRACKING
help
  ARM 64-bit (AArch64) Linux support.
 
diff --git a/arch/arm64/include/asm/thread_info.h 
b/arch/arm64/include/asm/thread_info.h
index 720e70b..301ea6a 100644
--- a/arch/arm64/include/asm/thread_info.h
+++ b/arch/arm64/include/asm/thread_info.h
@@ -108,6 +108,7 @@ static inline struct thread_info *current_thread_info(void)
 #define TIF_SINGLESTEP 21
 #define TIF_32BIT  22  /* 32bit process */
 #define TIF_SWITCH_MM  23  /* deferred switch_mm */
+#define TIF_NOHZ24
 
 #define _TIF_SIGPENDING(1 << TIF_SIGPENDING)
 #define _TIF_NEED_RESCHED  (1 << TIF_NEED_RESCHED)
diff --git a/arch/arm64/kernel/entry.S b/arch/arm64/kernel/entry.S
index 136bb7d..c839bab 100644
--- a/arch/arm64/kernel/entry.S
+++ b/arch/arm64/kernel/entry.S
@@ -30,6 +30,44 @@
 #include 
 
 /*
+ * Context tracking subsystem.  Used to instrument transitions
+ * between user and kernel mode.
+ */
+   .macro ct_user_exit, save = 0
+#ifdef CONFIG_CONTEXT_TRACKING
+   bl  context_tracking_user_exit
+   .if \save == 1
+   /*
+* save/restore needed during syscalls.  Restore syscall arguments from
+* the values already saved on stack during kernel_entry
+*/
+   ldp x0, x1, [sp]
+   ldp x2, x3, [sp, #S_X2]
+   ldp x4, x5, [sp, #S_X4]
+   ldp x6, x7, [sp, #S_X6]
+   .endif
+#endif
+   .endm
+
+   .macro ct_user_enter, save = 0
+#ifdef CONFIG_CONTEXT_TRACKING
+   .if \save == 1
+   /*
+* save/restore only needed on syscall fastpath, which uses
+* x0-x2
+*/
+   pushx2, x3
+   pushx0, x1
+   .endif
+   bl  context_tracking_user_enter
+   .if \save == 1
+   pop x0, x1
+   pop x2, x3
+   .endif
+#endif
+   .endm
+
+/*
  * Bad Abort numbers
  *-
  */
@@ -88,6 +126,7 @@
.macro  kernel_exit, el, ret = 0
ldp x21, x22, [sp, #S_PC]   // load ELR, SPSR
.if \el == 0
+   ct_user_enter \ret
ldr x23, [sp, #S_SP]// load return stack pointer
.endif
.if \ret
@@ -425,6 +464,7 @@ el0_da:
enable_dbg
// enable interrupts before calling the main handler
enable_irq
+   ct_user_exit
mov x0, x26
bic x0, x0, #(0xff << 56)
mov x1, x25
@@ -441,6 +481,7 @@ el0_ia:
enable_dbg
// enable interrupts before calling the main handler
enable_irq
+   ct_user_exit
mov x0, x26
orr x1, x25, #1 << 24   // use reserved ISS bit for 
instruction aborts
mov x2, sp
@@ -450,6 +491,7 @@ el0_fpsimd_acc:
/*
 * Floating Point or Advanced SIMD access
 */
+   ct_user_exit
mov x0, x25
mov x1, sp
adr lr, ret_from_exception
@@ -458,6 +500,7 @@ el0_fpsimd_exc:
/*
 * Floating Point or Advanced SIMD exception
 */
+   ct_user_exit
mov x0, x25
mov x1, sp
adr lr, ret_from_exception
@@ -472,6 +515,7 @@ el0_sp_pc:
enable_dbg
// enable interrupts before calling the main handler
enable_irq
+   ct_user_exit
mov x0, x26
mov x1, x25
mov x2, sp
@@ -484,6 +528,7 @@ el0_undef:
mov x26, sp
// enable interrupts before calling the main handler
enable_irq
+   ct_user_exit
mov x0, x26
adr lr, ret_from_exception
b   do_undefinstr
@@ -494,12 +539,14 @@ el0_dbg:
tbnzx24, #0, el0_inv// EL0 only
mrs x26, far_el1
disable_step x1
+   ct_user_exit
mov x0, x26
mov x1, x25
   

[PATCH v3 0/2] context tracker support for arm64

2014-05-09 Thread Larry Bassel
Implement and enable context tracking for arm64 (which is
a prerequisite for FULL_NOHZ support). This patchset
builds upon earlier work by Kevin Hilman and is based on 3.15-rc2.

Changes v2 to v3:

* Save/restore necessary registers in ct_user_enter and ct_user_exit
* Annotate "error paths" out of el0_sync with ct_user_exit

Changes v1 to v2:

* Save far_el1 in x26 temporarily

Larry Bassel (2):
  arm64: adjust el0_sync so that a function can be called
  arm64: enable context tracking

 arch/arm64/Kconfig   |  1 +
 arch/arm64/include/asm/thread_info.h |  1 +
 arch/arm64/kernel/entry.S| 76 
 3 files changed, 70 insertions(+), 8 deletions(-)

-- 
1.8.3.2

--
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Please read the FAQ at  http://www.tux.org/lkml/


Re: [PATCH v2 2/2] arm64: enable context tracking

2014-05-07 Thread Larry Bassel
On 07 May 14 11:17, Will Deacon wrote:
> On Wed, May 07, 2014 at 12:32:29AM +0100, Larry Bassel wrote:
> > Make calls to ct_user_enter when the kernel is exited
> > and ct_user_exit when the kernel is entered (in el0_da,
> > el0_ia, el0_svc, el0_irq).
> 
> Why only these entry points? I can reschedule after any exception from EL0,
> so I'd expect all exceptions from userspace to need annotating, no?
> 
> > These macros expand to function calls which will only work
> > properly if el0_sync and related code has been rearranged
> > (in a previous patch of this series).
> > 
> > In order to avoid saving registers, the slow syscall path
> > is forced (as x86 does).
> 
> ... and if you decide to handle undef exceptions, I think you'll need
> the register saving too, in case the kernel needs to perform emulation.

These are excellent points, I will rework the patch and submit v3.

Thanks for the feedback.

> 
> Will

Larry
--
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Please read the FAQ at  http://www.tux.org/lkml/


[PATCH v2 0/2] context tracker support for arm64

2014-05-06 Thread Larry Bassel
Implement and enable context tracking for arm64 (which is
a prerequisite for FULL_NOHZ support). This patchset
builds upon earlier work by Kevin Hilman and is based on 3.15-rc2.

Larry Bassel (2):
  arm64: adjust el0_sync so that a function can be called
  arm64: enable context tracking

 arch/arm64/Kconfig   |  1 +
 arch/arm64/include/asm/thread_info.h |  1 +
 arch/arm64/kernel/entry.S| 36 +++-
 3 files changed, 33 insertions(+), 5 deletions(-)

-- 
1.8.3.2

--
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Please read the FAQ at  http://www.tux.org/lkml/


[PATCH v2 1/2] arm64: adjust el0_sync so that a function can be called

2014-05-06 Thread Larry Bassel
To implement the context tracker properly on arm64,
a function call needs to be made after debugging and
interrupts are turned on, but before the lr is changed
to point to ret_from_exception(). If the function call
is made after the lr is changed the function will not
return to the correct place.

For similar reasons, defer the setting of x0 so that
it doesn't need to be saved around the function call
(save far_el1 in x26 temporarily instead).

Signed-off-by: Larry Bassel 
Reviewed-by: Kevin Hilman 
---
 arch/arm64/kernel/entry.S | 14 +-
 1 file changed, 9 insertions(+), 5 deletions(-)

diff --git a/arch/arm64/kernel/entry.S b/arch/arm64/kernel/entry.S
index 39ac630..d920d7f 100644
--- a/arch/arm64/kernel/entry.S
+++ b/arch/arm64/kernel/entry.S
@@ -349,11 +349,11 @@ el0_sync:
lsr x24, x25, #ESR_EL1_EC_SHIFT // exception class
cmp x24, #ESR_EL1_EC_SVC64  // SVC in 64-bit state
b.eqel0_svc
-   adr lr, ret_from_exception
cmp x24, #ESR_EL1_EC_DABT_EL0   // data abort in EL0
b.eqel0_da
cmp x24, #ESR_EL1_EC_IABT_EL0   // instruction abort in EL0
b.eqel0_ia
+   adr lr, ret_from_exception
cmp x24, #ESR_EL1_EC_FP_ASIMD   // FP/ASIMD access
b.eqel0_fpsimd_acc
cmp x24, #ESR_EL1_EC_FP_EXC64   // FP/ASIMD exception
@@ -378,11 +378,11 @@ el0_sync_compat:
lsr x24, x25, #ESR_EL1_EC_SHIFT // exception class
cmp x24, #ESR_EL1_EC_SVC32  // SVC in 32-bit state
b.eqel0_svc_compat
-   adr lr, ret_from_exception
cmp x24, #ESR_EL1_EC_DABT_EL0   // data abort in EL0
b.eqel0_da
cmp x24, #ESR_EL1_EC_IABT_EL0   // instruction abort in EL0
b.eqel0_ia
+   adr lr, ret_from_exception
cmp x24, #ESR_EL1_EC_FP_ASIMD   // FP/ASIMD access
b.eqel0_fpsimd_acc
cmp x24, #ESR_EL1_EC_FP_EXC32   // FP/ASIMD exception
@@ -421,28 +421,32 @@ el0_da:
/*
 * Data abort handling
 */
-   mrs x0, far_el1
-   bic x0, x0, #(0xff << 56)
+   mrs x26, far_el1
disable_step x1
isb
enable_dbg
// enable interrupts before calling the main handler
enable_irq
+   mov x0, x26
+   bic x0, x0, #(0xff << 56)
mov x1, x25
mov x2, sp
+   adr lr, ret_from_exception
b   do_mem_abort
 el0_ia:
/*
 * Instruction abort handling
 */
-   mrs x0, far_el1
+   mrs x26, far_el1
disable_step x1
isb
enable_dbg
// enable interrupts before calling the main handler
enable_irq
+   mov x0, x26
orr x1, x25, #1 << 24   // use reserved ISS bit for 
instruction aborts
mov x2, sp
+   adr lr, ret_from_exception
b   do_mem_abort
 el0_fpsimd_acc:
/*
-- 
1.8.3.2

--
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Please read the FAQ at  http://www.tux.org/lkml/


[PATCH v2 2/2] arm64: enable context tracking

2014-05-06 Thread Larry Bassel
Make calls to ct_user_enter when the kernel is exited
and ct_user_exit when the kernel is entered (in el0_da,
el0_ia, el0_svc, el0_irq).

These macros expand to function calls which will only work
properly if el0_sync and related code has been rearranged
(in a previous patch of this series).

In order to avoid saving registers, the slow syscall path
is forced (as x86 does).

The calls to ct_user_exit are made after hw debugging has been
enabled (enable_dbg).

The call to ct_user_enter is made at the beginning of the
kernel_exit macro.

This patch is based on earlier work by Kevin Hilman.

Signed-off-by: Kevin Hilman 
Signed-off-by: Larry Bassel 
---
 arch/arm64/Kconfig   |  1 +
 arch/arm64/include/asm/thread_info.h |  1 +
 arch/arm64/kernel/entry.S| 22 ++
 3 files changed, 24 insertions(+)

diff --git a/arch/arm64/Kconfig b/arch/arm64/Kconfig
index e6e4d37..152d92b 100644
--- a/arch/arm64/Kconfig
+++ b/arch/arm64/Kconfig
@@ -55,6 +55,7 @@ config ARM64
select RTC_LIB
select SPARSE_IRQ
select SYSCTL_EXCEPTION_TRACE
+   select HAVE_CONTEXT_TRACKING
help
  ARM 64-bit (AArch64) Linux support.
 
diff --git a/arch/arm64/include/asm/thread_info.h 
b/arch/arm64/include/asm/thread_info.h
index 720e70b..301ea6a 100644
--- a/arch/arm64/include/asm/thread_info.h
+++ b/arch/arm64/include/asm/thread_info.h
@@ -108,6 +108,7 @@ static inline struct thread_info *current_thread_info(void)
 #define TIF_SINGLESTEP 21
 #define TIF_32BIT  22  /* 32bit process */
 #define TIF_SWITCH_MM  23  /* deferred switch_mm */
+#define TIF_NOHZ24
 
 #define _TIF_SIGPENDING(1 << TIF_SIGPENDING)
 #define _TIF_NEED_RESCHED  (1 << TIF_NEED_RESCHED)
diff --git a/arch/arm64/kernel/entry.S b/arch/arm64/kernel/entry.S
index d920d7f..5fe447c 100644
--- a/arch/arm64/kernel/entry.S
+++ b/arch/arm64/kernel/entry.S
@@ -30,6 +30,22 @@
 #include 
 
 /*
+ * Context tracking subsystem.  Used to instrument transitions
+ * between user and kernel mode.
+ */
+   .macro ct_user_exit
+#ifdef CONFIG_CONTEXT_TRACKING
+   bl  context_tracking_user_exit
+#endif
+   .endm
+
+   .macro ct_user_enter
+#ifdef CONFIG_CONTEXT_TRACKING
+   bl  context_tracking_user_enter
+#endif
+   .endm
+
+/*
  * Bad Abort numbers
  *-
  */
@@ -88,6 +104,7 @@
.macro  kernel_exit, el, ret = 0
ldp x21, x22, [sp, #S_PC]   // load ELR, SPSR
.if \el == 0
+   ct_user_enter
ldr x23, [sp, #S_SP]// load return stack pointer
.endif
.if \ret
@@ -427,6 +444,7 @@ el0_da:
enable_dbg
// enable interrupts before calling the main handler
enable_irq
+   ct_user_exit
mov x0, x26
bic x0, x0, #(0xff << 56)
mov x1, x25
@@ -443,6 +461,7 @@ el0_ia:
enable_dbg
// enable interrupts before calling the main handler
enable_irq
+   ct_user_exit
mov x0, x26
orr x1, x25, #1 << 24   // use reserved ISS bit for 
instruction aborts
mov x2, sp
@@ -511,6 +530,7 @@ el0_irq_naked:
bl  trace_hardirqs_off
 #endif
 
+   ct_user_exit
irq_handler
get_thread_info tsk
 
@@ -633,10 +653,12 @@ el0_svc_naked:// 
compat entry point
isb
enable_dbg
enable_irq
+   ct_user_exit
 
get_thread_info tsk
ldr x16, [tsk, #TI_FLAGS]   // check for syscall tracing
tbnzx16, #TIF_SYSCALL_TRACE, __sys_trace // are we tracing syscalls?
+   tbnzx16, #TIF_NOHZ, __sys_trace
adr lr, ret_fast_syscall// return address
cmp scno, sc_nr // check upper syscall limit
b.hsni_sys
-- 
1.8.3.2

--
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Please read the FAQ at  http://www.tux.org/lkml/


[PATCH] arm64: Support arch_irq_work_raise() via self IPIs

2014-05-05 Thread Larry Bassel
Support for arch_irq_work_raise() was missing from
arm64 (a prerequisite for FULL_NOHZ).

This patch is based on the arm32 patch ARM 7872/1
which ports cleanly.

commit bf18525fd793101df42a1344ecc48b49b62e48c9
Author: Stephen Boyd 
Date:   Tue Oct 29 20:32:56 2013 +0100

ARM: 7872/1: Support arch_irq_work_raise() via self IPIs

By default, IRQ work is run from the tick interrupt (see
irq_work_run() in update_process_times()). When we're in full
NOHZ mode, restarting the tick requires the use of IRQ work and
if the only place we run IRQ work is in the tick interrupt we
have an unbreakable cycle. Implement arch_irq_work_raise() via
self IPIs to break this cycle and get the tick started again.
Note that we implement this via IPIs which are only available on
SMP builds. This shouldn't be a problem because full NOHZ is only
supported on SMP builds anyway.

Signed-off-by: Stephen Boyd 
Reviewed-by: Kevin Hilman 
Cc: Frederic Weisbecker 
Signed-off-by: Russell King 

Signed-off-by: Larry Bassel 
Reviewed-by: Kevin Hilman 
---
 arch/arm64/include/asm/hardirq.h |  2 +-
 arch/arm64/kernel/smp.c  | 18 ++
 2 files changed, 19 insertions(+), 1 deletion(-)

diff --git a/arch/arm64/include/asm/hardirq.h b/arch/arm64/include/asm/hardirq.h
index ae4801d..0be6782 100644
--- a/arch/arm64/include/asm/hardirq.h
+++ b/arch/arm64/include/asm/hardirq.h
@@ -20,7 +20,7 @@
 #include 
 #include 
 
-#define NR_IPI 5
+#define NR_IPI 6
 
 typedef struct {
unsigned int __softirq_pending;
diff --git a/arch/arm64/kernel/smp.c b/arch/arm64/kernel/smp.c
index f0a141d..20fd074 100644
--- a/arch/arm64/kernel/smp.c
+++ b/arch/arm64/kernel/smp.c
@@ -35,6 +35,7 @@
 #include 
 #include 
 #include 
+#include 
 
 #include 
 #include 
@@ -62,6 +63,7 @@ enum ipi_msg_type {
IPI_CALL_FUNC_SINGLE,
IPI_CPU_STOP,
IPI_TIMER,
+   IPI_IRQ_WORK,
 };
 
 /*
@@ -455,6 +457,13 @@ void arch_send_call_function_single_ipi(int cpu)
smp_cross_call(cpumask_of(cpu), IPI_CALL_FUNC_SINGLE);
 }
 
+#ifdef CONFIG_IRQ_WORK
+void arch_irq_work_raise(void)
+{
+   smp_cross_call(cpumask_of(smp_processor_id()), IPI_IRQ_WORK);
+}
+#endif
+
 static const char *ipi_types[NR_IPI] = {
 #define S(x,s) [x - IPI_RESCHEDULE] = s
S(IPI_RESCHEDULE, "Rescheduling interrupts"),
@@ -462,6 +471,7 @@ static const char *ipi_types[NR_IPI] = {
S(IPI_CALL_FUNC_SINGLE, "Single function call interrupts"),
S(IPI_CPU_STOP, "CPU stop interrupts"),
S(IPI_TIMER, "Timer broadcast interrupts"),
+   S(IPI_IRQ_WORK, "IRQ work interrupts"),
 };
 
 void show_ipi_list(struct seq_file *p, int prec)
@@ -554,6 +564,14 @@ void handle_IPI(int ipinr, struct pt_regs *regs)
break;
 #endif
 
+#ifdef CONFIG_IRQ_WORK
+   case IPI_IRQ_WORK:
+   irq_enter();
+   irq_work_run();
+   irq_exit();
+   break;
+#endif
+
default:
pr_crit("CPU%u: Unknown IPI message 0x%x\n", cpu, ipinr);
break;
-- 
1.8.3.2

--
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Please read the FAQ at  http://www.tux.org/lkml/


[PATCH 3/3] arm64: enable context tracking

2014-04-27 Thread Larry Bassel
Make calls to ct_user_enter when the kernel is exited
and ct_user_exit when the kernel is entered (in el0_da,
el0_ia, el0_svc, el0_irq).

These macros expand to function calls which will only work
properly if el0_sync and related code has been rearranged
(in a previous patch of this series).

The calls to ct_user_exit are made after hw debugging has been
enabled (enable_dbg).

The call to ct_user_enter is made at the end of the kernel_exit
macro.

Signed-off-by: Kevin Hilman 
Signed-off-by: Larry Bassel 
---
 arch/arm64/kernel/entry.S | 5 +
 1 file changed, 5 insertions(+)

diff --git a/arch/arm64/kernel/entry.S b/arch/arm64/kernel/entry.S
index 76b09d8..e949435 100644
--- a/arch/arm64/kernel/entry.S
+++ b/arch/arm64/kernel/entry.S
@@ -104,6 +104,7 @@
.macro  kernel_exit, el, ret = 0
ldp x21, x22, [sp, #S_PC]   // load ELR, SPSR
.if \el == 0
+   ct_user_enter
ldr x23, [sp, #S_SP]// load return stack pointer
.endif
.if \ret
@@ -442,6 +443,7 @@ el0_da:
enable_dbg
// enable interrupts before calling the main handler
enable_irq
+   ct_user_exit
mrs x0, far_el1
bic x0, x0, #(0xff << 56)
mov x1, x25
@@ -457,6 +459,7 @@ el0_ia:
enable_dbg
// enable interrupts before calling the main handler
enable_irq
+   ct_user_exit
mrs x0, far_el1
orr x1, x25, #1 << 24   // use reserved ISS bit for 
instruction aborts
mov x2, sp
@@ -525,6 +528,7 @@ el0_irq_naked:
bl  trace_hardirqs_off
 #endif
 
+   ct_user_exit
irq_handler
get_thread_info tsk
 
@@ -647,6 +651,7 @@ el0_svc_naked:  // 
compat entry point
isb
enable_dbg
enable_irq
+   ct_user_exit
 
get_thread_info tsk
ldr x16, [tsk, #TI_FLAGS]   // check for syscall tracing
-- 
1.8.3.2

--
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Please read the FAQ at  http://www.tux.org/lkml/


[PATCH 2/3] arm64: add support for context tracking

2014-04-27 Thread Larry Bassel
From: Kevin Hilman 

Add the macros and defines needed to implement
context tracking on arm64.

Signed-off-by: Kevin Hilman 
Signed-off-by: Larry Bassel 
---
 arch/arm64/Kconfig   |  1 +
 arch/arm64/include/asm/thread_info.h |  1 +
 arch/arm64/kernel/entry.S| 16 
 3 files changed, 18 insertions(+)

diff --git a/arch/arm64/Kconfig b/arch/arm64/Kconfig
index e6e4d37..152d92b 100644
--- a/arch/arm64/Kconfig
+++ b/arch/arm64/Kconfig
@@ -55,6 +55,7 @@ config ARM64
select RTC_LIB
select SPARSE_IRQ
select SYSCTL_EXCEPTION_TRACE
+   select HAVE_CONTEXT_TRACKING
help
  ARM 64-bit (AArch64) Linux support.
 
diff --git a/arch/arm64/include/asm/thread_info.h 
b/arch/arm64/include/asm/thread_info.h
index 720e70b..301ea6a 100644
--- a/arch/arm64/include/asm/thread_info.h
+++ b/arch/arm64/include/asm/thread_info.h
@@ -108,6 +108,7 @@ static inline struct thread_info *current_thread_info(void)
 #define TIF_SINGLESTEP 21
 #define TIF_32BIT  22  /* 32bit process */
 #define TIF_SWITCH_MM  23  /* deferred switch_mm */
+#define TIF_NOHZ24
 
 #define _TIF_SIGPENDING(1 << TIF_SIGPENDING)
 #define _TIF_NEED_RESCHED  (1 << TIF_NEED_RESCHED)
diff --git a/arch/arm64/kernel/entry.S b/arch/arm64/kernel/entry.S
index eda7755..76b09d8 100644
--- a/arch/arm64/kernel/entry.S
+++ b/arch/arm64/kernel/entry.S
@@ -30,6 +30,22 @@
 #include 
 
 /*
+ * Context tracking subsystem.  Used to instrument transitions
+ * between user and kernel mode.
+ */
+   .macro ct_user_exit
+#ifdef CONFIG_CONTEXT_TRACKING
+   bl  context_tracking_user_exit
+#endif
+   .endm
+
+   .macro ct_user_enter
+#ifdef CONFIG_CONTEXT_TRACKING
+   bl  context_tracking_user_enter
+#endif
+   .endm
+
+/*
  * Bad Abort numbers
  *-
  */
-- 
1.8.3.2

--
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Please read the FAQ at  http://www.tux.org/lkml/


[PATCH 0/3] context tracker support for arm64

2014-04-27 Thread Larry Bassel
Implement and enable context tracking for arm64 (which is
a prerequisite for FULL_NOHZ support). This patchset
builds upon earlier work by Kevin Hilman and is based on 3.15-rc2.

Kevin Hilman (1):
  arm64: add support for context tracking

Larry Bassel (2):
  arm64: adjust el0_sync so that a function can be called
  arm64: enable context tracking

 arch/arm64/Kconfig   |  1 +
 arch/arm64/include/asm/thread_info.h |  1 +
 arch/arm64/kernel/entry.S| 33 -
 3 files changed, 30 insertions(+), 5 deletions(-)

-- 
1.8.3.2

--
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Please read the FAQ at  http://www.tux.org/lkml/


[PATCH 1/3] arm64: adjust el0_sync so that a function can be called

2014-04-27 Thread Larry Bassel
To implement the context tracker properly on arm64,
a function call needs to be made after debugging and
interrupts are turned on, but before the lr is changed
to point to ret_from_exception(). If the function call
is made after the lr is changed the function will not
return to the correct place. For similar reasons, defer
the setting of x0 so that it doesn't need to be saved
around the function call.

Signed-off-by: Larry Bassel 
Reviewed-by: Kevin Hilman 
---
 arch/arm64/kernel/entry.S | 12 +++-
 1 file changed, 7 insertions(+), 5 deletions(-)

diff --git a/arch/arm64/kernel/entry.S b/arch/arm64/kernel/entry.S
index 39ac630..eda7755 100644
--- a/arch/arm64/kernel/entry.S
+++ b/arch/arm64/kernel/entry.S
@@ -349,11 +349,11 @@ el0_sync:
lsr x24, x25, #ESR_EL1_EC_SHIFT // exception class
cmp x24, #ESR_EL1_EC_SVC64  // SVC in 64-bit state
b.eqel0_svc
-   adr lr, ret_from_exception
cmp x24, #ESR_EL1_EC_DABT_EL0   // data abort in EL0
b.eqel0_da
cmp x24, #ESR_EL1_EC_IABT_EL0   // instruction abort in EL0
b.eqel0_ia
+   adr lr, ret_from_exception
cmp x24, #ESR_EL1_EC_FP_ASIMD   // FP/ASIMD access
b.eqel0_fpsimd_acc
cmp x24, #ESR_EL1_EC_FP_EXC64   // FP/ASIMD exception
@@ -378,11 +378,11 @@ el0_sync_compat:
lsr x24, x25, #ESR_EL1_EC_SHIFT // exception class
cmp x24, #ESR_EL1_EC_SVC32  // SVC in 32-bit state
b.eqel0_svc_compat
-   adr lr, ret_from_exception
cmp x24, #ESR_EL1_EC_DABT_EL0   // data abort in EL0
b.eqel0_da
cmp x24, #ESR_EL1_EC_IABT_EL0   // instruction abort in EL0
b.eqel0_ia
+   adr lr, ret_from_exception
cmp x24, #ESR_EL1_EC_FP_ASIMD   // FP/ASIMD access
b.eqel0_fpsimd_acc
cmp x24, #ESR_EL1_EC_FP_EXC32   // FP/ASIMD exception
@@ -421,28 +421,30 @@ el0_da:
/*
 * Data abort handling
 */
-   mrs x0, far_el1
-   bic x0, x0, #(0xff << 56)
disable_step x1
isb
enable_dbg
// enable interrupts before calling the main handler
enable_irq
+   mrs x0, far_el1
+   bic x0, x0, #(0xff << 56)
mov x1, x25
mov x2, sp
+   adr lr, ret_from_exception
b   do_mem_abort
 el0_ia:
/*
 * Instruction abort handling
 */
-   mrs x0, far_el1
disable_step x1
isb
enable_dbg
// enable interrupts before calling the main handler
enable_irq
+   mrs x0, far_el1
orr x1, x25, #1 << 24   // use reserved ISS bit for 
instruction aborts
mov x2, sp
+   adr lr, ret_from_exception
b   do_mem_abort
 el0_fpsimd_acc:
/*
-- 
1.8.3.2

--
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Please read the FAQ at  http://www.tux.org/lkml/