Re: [PATCH 5/8] trace_uprobe: Support SDT markers having reference count (semaphore)

2018-03-19 Thread Oleg Nesterov
On 03/19, Ravi Bangoria wrote:
>
> Hi Oleg,
> 
> On 03/14/2018 10:29 PM, Oleg Nesterov wrote:
> > On 03/13, Ravi Bangoria wrote:
> >> +static bool sdt_valid_vma(struct trace_uprobe *tu, struct vm_area_struct 
> >> *vma)
> >> +{
> >> +  unsigned long vaddr = vma_offset_to_vaddr(vma, tu->ref_ctr_offset);
> >> +
> >> +  return tu->ref_ctr_offset &&
> >> +  vma->vm_file &&
> >> +  file_inode(vma->vm_file) == tu->inode &&
> >> +  vma->vm_flags & VM_WRITE &&
> >> +  vma->vm_start <= vaddr &&
> >> +  vma->vm_end > vaddr;
> >> +}
> > Perhaps in this case a simple
> >
> > ref_ctr_offset < vma->vm_end - vma->vm_start
> >
> > check without vma_offset_to_vaddr() makes more sense, but I won't insist.
> >
> 
> I still don't get this. This seems a comparison between file offset and size
> of the vma. Shouldn't we need to consider pg_off here?

Indeed, I am stupid ;)

Oleg.



Re: [PATCH 5/8] trace_uprobe: Support SDT markers having reference count (semaphore)

2018-03-19 Thread Oleg Nesterov
On 03/19, Ravi Bangoria wrote:
>
> Hi Oleg,
> 
> On 03/14/2018 10:29 PM, Oleg Nesterov wrote:
> > On 03/13, Ravi Bangoria wrote:
> >> +static bool sdt_valid_vma(struct trace_uprobe *tu, struct vm_area_struct 
> >> *vma)
> >> +{
> >> +  unsigned long vaddr = vma_offset_to_vaddr(vma, tu->ref_ctr_offset);
> >> +
> >> +  return tu->ref_ctr_offset &&
> >> +  vma->vm_file &&
> >> +  file_inode(vma->vm_file) == tu->inode &&
> >> +  vma->vm_flags & VM_WRITE &&
> >> +  vma->vm_start <= vaddr &&
> >> +  vma->vm_end > vaddr;
> >> +}
> > Perhaps in this case a simple
> >
> > ref_ctr_offset < vma->vm_end - vma->vm_start
> >
> > check without vma_offset_to_vaddr() makes more sense, but I won't insist.
> >
> 
> I still don't get this. This seems a comparison between file offset and size
> of the vma. Shouldn't we need to consider pg_off here?

Indeed, I am stupid ;)

Oleg.



Re: [PATCH 5/8] trace_uprobe: Support SDT markers having reference count (semaphore)

2018-03-18 Thread Ravi Bangoria
Hi Oleg,

On 03/14/2018 10:29 PM, Oleg Nesterov wrote:
> On 03/13, Ravi Bangoria wrote:
>> +static bool sdt_valid_vma(struct trace_uprobe *tu, struct vm_area_struct 
>> *vma)
>> +{
>> +unsigned long vaddr = vma_offset_to_vaddr(vma, tu->ref_ctr_offset);
>> +
>> +return tu->ref_ctr_offset &&
>> +vma->vm_file &&
>> +file_inode(vma->vm_file) == tu->inode &&
>> +vma->vm_flags & VM_WRITE &&
>> +vma->vm_start <= vaddr &&
>> +vma->vm_end > vaddr;
>> +}
> Perhaps in this case a simple
>
>   ref_ctr_offset < vma->vm_end - vma->vm_start
>
> check without vma_offset_to_vaddr() makes more sense, but I won't insist.
>

I still don't get this. This seems a comparison between file offset and size
of the vma. Shouldn't we need to consider pg_off here?

Thanks,
Ravi



Re: [PATCH 5/8] trace_uprobe: Support SDT markers having reference count (semaphore)

2018-03-18 Thread Ravi Bangoria
Hi Oleg,

On 03/14/2018 10:29 PM, Oleg Nesterov wrote:
> On 03/13, Ravi Bangoria wrote:
>> +static bool sdt_valid_vma(struct trace_uprobe *tu, struct vm_area_struct 
>> *vma)
>> +{
>> +unsigned long vaddr = vma_offset_to_vaddr(vma, tu->ref_ctr_offset);
>> +
>> +return tu->ref_ctr_offset &&
>> +vma->vm_file &&
>> +file_inode(vma->vm_file) == tu->inode &&
>> +vma->vm_flags & VM_WRITE &&
>> +vma->vm_start <= vaddr &&
>> +vma->vm_end > vaddr;
>> +}
> Perhaps in this case a simple
>
>   ref_ctr_offset < vma->vm_end - vma->vm_start
>
> check without vma_offset_to_vaddr() makes more sense, but I won't insist.
>

I still don't get this. This seems a comparison between file offset and size
of the vma. Shouldn't we need to consider pg_off here?

Thanks,
Ravi



Re: [PATCH 5/8] trace_uprobe: Support SDT markers having reference count (semaphore)

2018-03-16 Thread Oleg Nesterov
On 03/15, Steven Rostedt wrote:
>
> On Tue, 13 Mar 2018 18:26:00 +0530
> Ravi Bangoria  wrote:
>
> > +static void sdt_increment_ref_ctr(struct trace_uprobe *tu)
> > +{
> > +   struct uprobe_map_info *info;
> > +   struct vm_area_struct *vma;
> > +   unsigned long vaddr;
> > +
> > +   uprobe_start_dup_mmap();
>
> Please add a comment here that this function ups the mm ref count for
> each info returned. Otherwise it's hard to know what that mmput() below
> matches.

You meant uprobe_build_map_info(), not uprobe_start_dup_mmap().

Yes, and if it gets more callers perhaps we should move this mmput() into
uprobe_free_map_info()...

Oleg.


--- x/kernel/events/uprobes.c
+++ x/kernel/events/uprobes.c
@@ -714,6 +714,7 @@ struct map_info {
 static inline struct map_info *free_map_info(struct map_info *info)
 {
struct map_info *next = info->next;
+   mmput(info->mm);
kfree(info);
return next;
 }
@@ -783,8 +784,11 @@ build_map_info(struct address_space *map
 
goto again;
  out:
-   while (prev)
-   prev = free_map_info(prev);
+   while (prev) {
+   info = prev;
+   prev = prev->next;
+   kfree(info);
+   }
return curr;
 }
 
@@ -834,7 +838,6 @@ register_for_each_vma(struct uprobe *upr
  unlock:
up_write(>mmap_sem);
  free:
-   mmput(mm);
info = free_map_info(info);
}
  out:



Re: [PATCH 5/8] trace_uprobe: Support SDT markers having reference count (semaphore)

2018-03-16 Thread Oleg Nesterov
On 03/15, Steven Rostedt wrote:
>
> On Tue, 13 Mar 2018 18:26:00 +0530
> Ravi Bangoria  wrote:
>
> > +static void sdt_increment_ref_ctr(struct trace_uprobe *tu)
> > +{
> > +   struct uprobe_map_info *info;
> > +   struct vm_area_struct *vma;
> > +   unsigned long vaddr;
> > +
> > +   uprobe_start_dup_mmap();
>
> Please add a comment here that this function ups the mm ref count for
> each info returned. Otherwise it's hard to know what that mmput() below
> matches.

You meant uprobe_build_map_info(), not uprobe_start_dup_mmap().

Yes, and if it gets more callers perhaps we should move this mmput() into
uprobe_free_map_info()...

Oleg.


--- x/kernel/events/uprobes.c
+++ x/kernel/events/uprobes.c
@@ -714,6 +714,7 @@ struct map_info {
 static inline struct map_info *free_map_info(struct map_info *info)
 {
struct map_info *next = info->next;
+   mmput(info->mm);
kfree(info);
return next;
 }
@@ -783,8 +784,11 @@ build_map_info(struct address_space *map
 
goto again;
  out:
-   while (prev)
-   prev = free_map_info(prev);
+   while (prev) {
+   info = prev;
+   prev = prev->next;
+   kfree(info);
+   }
return curr;
 }
 
@@ -834,7 +838,6 @@ register_for_each_vma(struct uprobe *upr
  unlock:
up_write(>mmap_sem);
  free:
-   mmput(mm);
info = free_map_info(info);
}
  out:



Re: [PATCH 5/8] trace_uprobe: Support SDT markers having reference count (semaphore)

2018-03-16 Thread Ravi Bangoria


On 03/16/2018 05:09 PM, Oleg Nesterov wrote:
> On 03/16, Ravi Bangoria wrote:
>> On 03/15/2018 08:00 PM, Oleg Nesterov wrote:
>>> Note to mention that sdt_find_vma() can return NULL but the callers do
>>> vma_offset_to_vaddr(vma) without any check.
>> If the "mm" we are passing to sdt_find_vma() is returned by
>> uprobe_build_map_info(ref_ctr_offset), sdt_find_vma() must
>> _not_ return NULL.
> Not at all.
>
> Once build_map_info() returns any mapping can go away. Otherwise, why do
> you think the caller has to take ->mmap_sem and use find_vma()? If you
> were right, build_map_info() could just return the list of vma's instead
> of list of mm's.

Oh.. okay.. I was under wrong impression then. Will add a check there.

Thanks for the review :)
Ravi



Re: [PATCH 5/8] trace_uprobe: Support SDT markers having reference count (semaphore)

2018-03-16 Thread Ravi Bangoria


On 03/16/2018 05:09 PM, Oleg Nesterov wrote:
> On 03/16, Ravi Bangoria wrote:
>> On 03/15/2018 08:00 PM, Oleg Nesterov wrote:
>>> Note to mention that sdt_find_vma() can return NULL but the callers do
>>> vma_offset_to_vaddr(vma) without any check.
>> If the "mm" we are passing to sdt_find_vma() is returned by
>> uprobe_build_map_info(ref_ctr_offset), sdt_find_vma() must
>> _not_ return NULL.
> Not at all.
>
> Once build_map_info() returns any mapping can go away. Otherwise, why do
> you think the caller has to take ->mmap_sem and use find_vma()? If you
> were right, build_map_info() could just return the list of vma's instead
> of list of mm's.

Oh.. okay.. I was under wrong impression then. Will add a check there.

Thanks for the review :)
Ravi



Re: [PATCH 5/8] trace_uprobe: Support SDT markers having reference count (semaphore)

2018-03-16 Thread Oleg Nesterov
On 03/16, Ravi Bangoria wrote:
>
> On 03/15/2018 08:00 PM, Oleg Nesterov wrote:
> > Note to mention that sdt_find_vma() can return NULL but the callers do
> > vma_offset_to_vaddr(vma) without any check.
>
> If the "mm" we are passing to sdt_find_vma() is returned by
> uprobe_build_map_info(ref_ctr_offset), sdt_find_vma() must
> _not_ return NULL.

Not at all.

Once build_map_info() returns any mapping can go away. Otherwise, why do
you think the caller has to take ->mmap_sem and use find_vma()? If you
were right, build_map_info() could just return the list of vma's instead
of list of mm's.

Oleg.



Re: [PATCH 5/8] trace_uprobe: Support SDT markers having reference count (semaphore)

2018-03-16 Thread Oleg Nesterov
On 03/16, Ravi Bangoria wrote:
>
> On 03/15/2018 08:00 PM, Oleg Nesterov wrote:
> > Note to mention that sdt_find_vma() can return NULL but the callers do
> > vma_offset_to_vaddr(vma) without any check.
>
> If the "mm" we are passing to sdt_find_vma() is returned by
> uprobe_build_map_info(ref_ctr_offset), sdt_find_vma() must
> _not_ return NULL.

Not at all.

Once build_map_info() returns any mapping can go away. Otherwise, why do
you think the caller has to take ->mmap_sem and use find_vma()? If you
were right, build_map_info() could just return the list of vma's instead
of list of mm's.

Oleg.



Re: [PATCH 5/8] trace_uprobe: Support SDT markers having reference count (semaphore)

2018-03-16 Thread Ravi Bangoria


On 03/15/2018 08:31 PM, Oleg Nesterov wrote:
> On 03/13, Ravi Bangoria wrote:
>> +sdt_update_ref_ctr(struct mm_struct *mm, unsigned long vaddr, short d)
>> +{
>> +void *kaddr;
>> +struct page *page;
>> +struct vm_area_struct *vma;
>> +int ret = 0;
>> +unsigned short orig = 0;
>> +
>> +if (vaddr == 0)
>> +return -EINVAL;
>> +
>> +ret = get_user_pages_remote(NULL, mm, vaddr, 1,
>> +FOLL_FORCE | FOLL_WRITE, , , NULL);
>> +if (ret <= 0)
>> +return ret;
>> +
>> +kaddr = kmap_atomic(page);
>> +memcpy(, kaddr + (vaddr & ~PAGE_MASK), sizeof(orig));
>> +orig += d;
>> +memcpy(kaddr + (vaddr & ~PAGE_MASK), , sizeof(orig));
>> +kunmap_atomic(kaddr);
> Hmm. Why memcpy? You could simply do
>
>   kaddr = kmap_atomic();
>   unsigned short *ptr = kaddr + (vaddr & ~PAGE_MASK);
>   *ptr += d;
>   kunmap_atomic();

Yes, that should work. Will change it.

Thanks for the review,
Ravi



Re: [PATCH 5/8] trace_uprobe: Support SDT markers having reference count (semaphore)

2018-03-16 Thread Ravi Bangoria


On 03/15/2018 08:31 PM, Oleg Nesterov wrote:
> On 03/13, Ravi Bangoria wrote:
>> +sdt_update_ref_ctr(struct mm_struct *mm, unsigned long vaddr, short d)
>> +{
>> +void *kaddr;
>> +struct page *page;
>> +struct vm_area_struct *vma;
>> +int ret = 0;
>> +unsigned short orig = 0;
>> +
>> +if (vaddr == 0)
>> +return -EINVAL;
>> +
>> +ret = get_user_pages_remote(NULL, mm, vaddr, 1,
>> +FOLL_FORCE | FOLL_WRITE, , , NULL);
>> +if (ret <= 0)
>> +return ret;
>> +
>> +kaddr = kmap_atomic(page);
>> +memcpy(, kaddr + (vaddr & ~PAGE_MASK), sizeof(orig));
>> +orig += d;
>> +memcpy(kaddr + (vaddr & ~PAGE_MASK), , sizeof(orig));
>> +kunmap_atomic(kaddr);
> Hmm. Why memcpy? You could simply do
>
>   kaddr = kmap_atomic();
>   unsigned short *ptr = kaddr + (vaddr & ~PAGE_MASK);
>   *ptr += d;
>   kunmap_atomic();

Yes, that should work. Will change it.

Thanks for the review,
Ravi



Re: [PATCH 5/8] trace_uprobe: Support SDT markers having reference count (semaphore)

2018-03-16 Thread Ravi Bangoria


On 03/15/2018 08:00 PM, Oleg Nesterov wrote:
> On 03/15, Oleg Nesterov wrote:
>>> +static struct vm_area_struct *
>>> +sdt_find_vma(struct mm_struct *mm, struct trace_uprobe *tu)
>>> +{
>>> +   struct vm_area_struct *tmp;
>>> +
>>> +   for (tmp = mm->mmap; tmp != NULL; tmp = tmp->vm_next)
>>> +   if (sdt_valid_vma(tu, tmp))
>>> +   return tmp;
>>> +
>>> +   return NULL;
>> I can't understand the logic... Lets ignore sdt_valid_vma() for now.
>> The caller has uprobe_map_info, why it can't simply do
>> vma = find_vma(uprobe_map_info->vaddr)? and then check sdt_valid_vma().
> Note to mention that sdt_find_vma() can return NULL but the callers do
> vma_offset_to_vaddr(vma) without any check.

If the "mm" we are passing to sdt_find_vma() is returned by
uprobe_build_map_info(ref_ctr_offset), sdt_find_vma() must
_not_ return NULL.

Thanks for the review,
Ravi



Re: [PATCH 5/8] trace_uprobe: Support SDT markers having reference count (semaphore)

2018-03-16 Thread Ravi Bangoria


On 03/15/2018 08:00 PM, Oleg Nesterov wrote:
> On 03/15, Oleg Nesterov wrote:
>>> +static struct vm_area_struct *
>>> +sdt_find_vma(struct mm_struct *mm, struct trace_uprobe *tu)
>>> +{
>>> +   struct vm_area_struct *tmp;
>>> +
>>> +   for (tmp = mm->mmap; tmp != NULL; tmp = tmp->vm_next)
>>> +   if (sdt_valid_vma(tu, tmp))
>>> +   return tmp;
>>> +
>>> +   return NULL;
>> I can't understand the logic... Lets ignore sdt_valid_vma() for now.
>> The caller has uprobe_map_info, why it can't simply do
>> vma = find_vma(uprobe_map_info->vaddr)? and then check sdt_valid_vma().
> Note to mention that sdt_find_vma() can return NULL but the callers do
> vma_offset_to_vaddr(vma) without any check.

If the "mm" we are passing to sdt_find_vma() is returned by
uprobe_build_map_info(ref_ctr_offset), sdt_find_vma() must
_not_ return NULL.

Thanks for the review,
Ravi



Re: [PATCH 5/8] trace_uprobe: Support SDT markers having reference count (semaphore)

2018-03-16 Thread Ravi Bangoria


On 03/15/2018 07:51 PM, Oleg Nesterov wrote:
> On 03/13, Ravi Bangoria wrote:
>> @@ -1053,6 +1056,9 @@ int uprobe_mmap(struct vm_area_struct *vma)
>>  struct uprobe *uprobe, *u;
>>  struct inode *inode;
>>
>> +if (uprobe_mmap_callback)
>> +uprobe_mmap_callback(vma);
>> +
>>  if (no_uprobe_events() || !valid_vma(vma, true))
>>  return 0;
> probe_event_enable() does
>
>   uprobe_register();
>   /* WINDOW */
>   sdt_increment_ref_ctr();
>
> what if uprobe_mmap() is called in between? The counter(s) in this vma
> will be incremented twice, no?

I guess, it's a valid issue with PATCH 5 but should be taken care by PATCH 6.

>
>> +static struct vm_area_struct *
>> +sdt_find_vma(struct mm_struct *mm, struct trace_uprobe *tu)
>> +{
>> +struct vm_area_struct *tmp;
>> +
>> +for (tmp = mm->mmap; tmp != NULL; tmp = tmp->vm_next)
>> +if (sdt_valid_vma(tu, tmp))
>> +return tmp;
>> +
>> +return NULL;
> I can't understand the logic... Lets ignore sdt_valid_vma() for now.
> The caller has uprobe_map_info, why it can't simply do
> vma = find_vma(uprobe_map_info->vaddr)? and then check sdt_valid_vma().

Yes. that should work. Will change it.

Thanks for the review,
Ravi



Re: [PATCH 5/8] trace_uprobe: Support SDT markers having reference count (semaphore)

2018-03-16 Thread Ravi Bangoria


On 03/15/2018 07:51 PM, Oleg Nesterov wrote:
> On 03/13, Ravi Bangoria wrote:
>> @@ -1053,6 +1056,9 @@ int uprobe_mmap(struct vm_area_struct *vma)
>>  struct uprobe *uprobe, *u;
>>  struct inode *inode;
>>
>> +if (uprobe_mmap_callback)
>> +uprobe_mmap_callback(vma);
>> +
>>  if (no_uprobe_events() || !valid_vma(vma, true))
>>  return 0;
> probe_event_enable() does
>
>   uprobe_register();
>   /* WINDOW */
>   sdt_increment_ref_ctr();
>
> what if uprobe_mmap() is called in between? The counter(s) in this vma
> will be incremented twice, no?

I guess, it's a valid issue with PATCH 5 but should be taken care by PATCH 6.

>
>> +static struct vm_area_struct *
>> +sdt_find_vma(struct mm_struct *mm, struct trace_uprobe *tu)
>> +{
>> +struct vm_area_struct *tmp;
>> +
>> +for (tmp = mm->mmap; tmp != NULL; tmp = tmp->vm_next)
>> +if (sdt_valid_vma(tu, tmp))
>> +return tmp;
>> +
>> +return NULL;
> I can't understand the logic... Lets ignore sdt_valid_vma() for now.
> The caller has uprobe_map_info, why it can't simply do
> vma = find_vma(uprobe_map_info->vaddr)? and then check sdt_valid_vma().

Yes. that should work. Will change it.

Thanks for the review,
Ravi



Re: [PATCH 5/8] trace_uprobe: Support SDT markers having reference count (semaphore)

2018-03-16 Thread Ravi Bangoria


On 03/15/2018 10:18 PM, Steven Rostedt wrote:
> On Tue, 13 Mar 2018 18:26:00 +0530
> Ravi Bangoria  wrote:
>
>> +static void sdt_increment_ref_ctr(struct trace_uprobe *tu)
>> +{
>> +struct uprobe_map_info *info;
>> +struct vm_area_struct *vma;
>> +unsigned long vaddr;
>> +
>> +uprobe_start_dup_mmap();
> Please add a comment here that this function ups the mm ref count for
> each info returned. Otherwise it's hard to know what that mmput() below
> matches.

Sure. Will add it.

Thanks for the review,
Ravi



Re: [PATCH 5/8] trace_uprobe: Support SDT markers having reference count (semaphore)

2018-03-16 Thread Ravi Bangoria


On 03/15/2018 10:18 PM, Steven Rostedt wrote:
> On Tue, 13 Mar 2018 18:26:00 +0530
> Ravi Bangoria  wrote:
>
>> +static void sdt_increment_ref_ctr(struct trace_uprobe *tu)
>> +{
>> +struct uprobe_map_info *info;
>> +struct vm_area_struct *vma;
>> +unsigned long vaddr;
>> +
>> +uprobe_start_dup_mmap();
> Please add a comment here that this function ups the mm ref count for
> each info returned. Otherwise it's hard to know what that mmput() below
> matches.

Sure. Will add it.

Thanks for the review,
Ravi



Re: [PATCH 5/8] trace_uprobe: Support SDT markers having reference count (semaphore)

2018-03-15 Thread Steven Rostedt
On Tue, 13 Mar 2018 18:26:00 +0530
Ravi Bangoria  wrote:

> +static void sdt_increment_ref_ctr(struct trace_uprobe *tu)
> +{
> + struct uprobe_map_info *info;
> + struct vm_area_struct *vma;
> + unsigned long vaddr;
> +
> + uprobe_start_dup_mmap();

Please add a comment here that this function ups the mm ref count for
each info returned. Otherwise it's hard to know what that mmput() below
matches.

-- Steve

> + info = uprobe_build_map_info(tu->inode->i_mapping,
> + tu->ref_ctr_offset, false);
> + if (IS_ERR(info))
> + goto out;
> +
> + while (info) {
> + down_write(>mm->mmap_sem);
> +
> + vma = sdt_find_vma(info->mm, tu);
> + vaddr = vma_offset_to_vaddr(vma, tu->ref_ctr_offset);
> + sdt_update_ref_ctr(info->mm, vaddr, 1);
> +
> + up_write(>mm->mmap_sem);
> + mmput(info->mm);
> + info = uprobe_free_map_info(info);
> + }
> +
> +out:
> + uprobe_end_dup_mmap();
> +}
> +


Re: [PATCH 5/8] trace_uprobe: Support SDT markers having reference count (semaphore)

2018-03-15 Thread Steven Rostedt
On Tue, 13 Mar 2018 18:26:00 +0530
Ravi Bangoria  wrote:

> +static void sdt_increment_ref_ctr(struct trace_uprobe *tu)
> +{
> + struct uprobe_map_info *info;
> + struct vm_area_struct *vma;
> + unsigned long vaddr;
> +
> + uprobe_start_dup_mmap();

Please add a comment here that this function ups the mm ref count for
each info returned. Otherwise it's hard to know what that mmput() below
matches.

-- Steve

> + info = uprobe_build_map_info(tu->inode->i_mapping,
> + tu->ref_ctr_offset, false);
> + if (IS_ERR(info))
> + goto out;
> +
> + while (info) {
> + down_write(>mm->mmap_sem);
> +
> + vma = sdt_find_vma(info->mm, tu);
> + vaddr = vma_offset_to_vaddr(vma, tu->ref_ctr_offset);
> + sdt_update_ref_ctr(info->mm, vaddr, 1);
> +
> + up_write(>mm->mmap_sem);
> + mmput(info->mm);
> + info = uprobe_free_map_info(info);
> + }
> +
> +out:
> + uprobe_end_dup_mmap();
> +}
> +


Re: [PATCH 5/8] trace_uprobe: Support SDT markers having reference count (semaphore)

2018-03-15 Thread Oleg Nesterov
On 03/13, Ravi Bangoria wrote:
>
> +sdt_update_ref_ctr(struct mm_struct *mm, unsigned long vaddr, short d)
> +{
> + void *kaddr;
> + struct page *page;
> + struct vm_area_struct *vma;
> + int ret = 0;
> + unsigned short orig = 0;
> +
> + if (vaddr == 0)
> + return -EINVAL;
> +
> + ret = get_user_pages_remote(NULL, mm, vaddr, 1,
> + FOLL_FORCE | FOLL_WRITE, , , NULL);
> + if (ret <= 0)
> + return ret;
> +
> + kaddr = kmap_atomic(page);
> + memcpy(, kaddr + (vaddr & ~PAGE_MASK), sizeof(orig));
> + orig += d;
> + memcpy(kaddr + (vaddr & ~PAGE_MASK), , sizeof(orig));
> + kunmap_atomic(kaddr);

Hmm. Why memcpy? You could simply do

kaddr = kmap_atomic();
unsigned short *ptr = kaddr + (vaddr & ~PAGE_MASK);
*ptr += d;
kunmap_atomic();

Oleg.



Re: [PATCH 5/8] trace_uprobe: Support SDT markers having reference count (semaphore)

2018-03-15 Thread Oleg Nesterov
On 03/13, Ravi Bangoria wrote:
>
> +sdt_update_ref_ctr(struct mm_struct *mm, unsigned long vaddr, short d)
> +{
> + void *kaddr;
> + struct page *page;
> + struct vm_area_struct *vma;
> + int ret = 0;
> + unsigned short orig = 0;
> +
> + if (vaddr == 0)
> + return -EINVAL;
> +
> + ret = get_user_pages_remote(NULL, mm, vaddr, 1,
> + FOLL_FORCE | FOLL_WRITE, , , NULL);
> + if (ret <= 0)
> + return ret;
> +
> + kaddr = kmap_atomic(page);
> + memcpy(, kaddr + (vaddr & ~PAGE_MASK), sizeof(orig));
> + orig += d;
> + memcpy(kaddr + (vaddr & ~PAGE_MASK), , sizeof(orig));
> + kunmap_atomic(kaddr);

Hmm. Why memcpy? You could simply do

kaddr = kmap_atomic();
unsigned short *ptr = kaddr + (vaddr & ~PAGE_MASK);
*ptr += d;
kunmap_atomic();

Oleg.



Re: [PATCH 5/8] trace_uprobe: Support SDT markers having reference count (semaphore)

2018-03-15 Thread Oleg Nesterov
On 03/15, Oleg Nesterov wrote:
>
> > +static struct vm_area_struct *
> > +sdt_find_vma(struct mm_struct *mm, struct trace_uprobe *tu)
> > +{
> > +   struct vm_area_struct *tmp;
> > +
> > +   for (tmp = mm->mmap; tmp != NULL; tmp = tmp->vm_next)
> > +   if (sdt_valid_vma(tu, tmp))
> > +   return tmp;
> > +
> > +   return NULL;
>
> I can't understand the logic... Lets ignore sdt_valid_vma() for now.
> The caller has uprobe_map_info, why it can't simply do
> vma = find_vma(uprobe_map_info->vaddr)? and then check sdt_valid_vma().

Note to mention that sdt_find_vma() can return NULL but the callers do
vma_offset_to_vaddr(vma) without any check.

Oleg.



Re: [PATCH 5/8] trace_uprobe: Support SDT markers having reference count (semaphore)

2018-03-15 Thread Oleg Nesterov
On 03/15, Oleg Nesterov wrote:
>
> > +static struct vm_area_struct *
> > +sdt_find_vma(struct mm_struct *mm, struct trace_uprobe *tu)
> > +{
> > +   struct vm_area_struct *tmp;
> > +
> > +   for (tmp = mm->mmap; tmp != NULL; tmp = tmp->vm_next)
> > +   if (sdt_valid_vma(tu, tmp))
> > +   return tmp;
> > +
> > +   return NULL;
>
> I can't understand the logic... Lets ignore sdt_valid_vma() for now.
> The caller has uprobe_map_info, why it can't simply do
> vma = find_vma(uprobe_map_info->vaddr)? and then check sdt_valid_vma().

Note to mention that sdt_find_vma() can return NULL but the callers do
vma_offset_to_vaddr(vma) without any check.

Oleg.



Re: [PATCH 5/8] trace_uprobe: Support SDT markers having reference count (semaphore)

2018-03-15 Thread Oleg Nesterov
On 03/13, Ravi Bangoria wrote:
>
> @@ -1053,6 +1056,9 @@ int uprobe_mmap(struct vm_area_struct *vma)
>   struct uprobe *uprobe, *u;
>   struct inode *inode;
>
> + if (uprobe_mmap_callback)
> + uprobe_mmap_callback(vma);
> +
>   if (no_uprobe_events() || !valid_vma(vma, true))
>   return 0;

probe_event_enable() does

uprobe_register();
/* WINDOW */
sdt_increment_ref_ctr();

what if uprobe_mmap() is called in between? The counter(s) in this vma
will be incremented twice, no?

> +static struct vm_area_struct *
> +sdt_find_vma(struct mm_struct *mm, struct trace_uprobe *tu)
> +{
> + struct vm_area_struct *tmp;
> +
> + for (tmp = mm->mmap; tmp != NULL; tmp = tmp->vm_next)
> + if (sdt_valid_vma(tu, tmp))
> + return tmp;
> +
> + return NULL;

I can't understand the logic... Lets ignore sdt_valid_vma() for now.
The caller has uprobe_map_info, why it can't simply do
vma = find_vma(uprobe_map_info->vaddr)? and then check sdt_valid_vma().

Oleg.



Re: [PATCH 5/8] trace_uprobe: Support SDT markers having reference count (semaphore)

2018-03-15 Thread Oleg Nesterov
On 03/13, Ravi Bangoria wrote:
>
> @@ -1053,6 +1056,9 @@ int uprobe_mmap(struct vm_area_struct *vma)
>   struct uprobe *uprobe, *u;
>   struct inode *inode;
>
> + if (uprobe_mmap_callback)
> + uprobe_mmap_callback(vma);
> +
>   if (no_uprobe_events() || !valid_vma(vma, true))
>   return 0;

probe_event_enable() does

uprobe_register();
/* WINDOW */
sdt_increment_ref_ctr();

what if uprobe_mmap() is called in between? The counter(s) in this vma
will be incremented twice, no?

> +static struct vm_area_struct *
> +sdt_find_vma(struct mm_struct *mm, struct trace_uprobe *tu)
> +{
> + struct vm_area_struct *tmp;
> +
> + for (tmp = mm->mmap; tmp != NULL; tmp = tmp->vm_next)
> + if (sdt_valid_vma(tu, tmp))
> + return tmp;
> +
> + return NULL;

I can't understand the logic... Lets ignore sdt_valid_vma() for now.
The caller has uprobe_map_info, why it can't simply do
vma = find_vma(uprobe_map_info->vaddr)? and then check sdt_valid_vma().

Oleg.



Re: [PATCH 5/8] trace_uprobe: Support SDT markers having reference count (semaphore)

2018-03-15 Thread Ravi Bangoria


On 03/14/2018 10:29 PM, Oleg Nesterov wrote:
> On 03/13, Ravi Bangoria wrote:
>> +static bool sdt_valid_vma(struct trace_uprobe *tu, struct vm_area_struct 
>> *vma)
>> +{
>> +unsigned long vaddr = vma_offset_to_vaddr(vma, tu->ref_ctr_offset);
>> +
>> +return tu->ref_ctr_offset &&
>> +vma->vm_file &&
>> +file_inode(vma->vm_file) == tu->inode &&
>> +vma->vm_flags & VM_WRITE &&
>> +vma->vm_start <= vaddr &&
>> +vma->vm_end > vaddr;
>> +}
> Perhaps in this case a simple
>
>   ref_ctr_offset < vma->vm_end - vma->vm_start
>
> check without vma_offset_to_vaddr() makes more sense, but I won't insist.
>

Hmm... I'm not quite sure. Will rethink and get back to you.

>
>> +static void sdt_increment_ref_ctr(struct trace_uprobe *tu)
>> +{
>> +struct uprobe_map_info *info;
>> +struct vm_area_struct *vma;
>> +unsigned long vaddr;
>> +
>> +uprobe_start_dup_mmap();
>> +info = uprobe_build_map_info(tu->inode->i_mapping,
>> +tu->ref_ctr_offset, false);
> Hmm. This doesn't look right.
>
> If you need to find all mappings (and avoid the races with fork/dup_mmap) you
> need to take this semaphore for writing, uprobe_start_dup_mmap() can't help.

Oops. Yes. Will change it.

Thanks for the review :)
Ravi



Re: [PATCH 5/8] trace_uprobe: Support SDT markers having reference count (semaphore)

2018-03-15 Thread Ravi Bangoria


On 03/14/2018 10:29 PM, Oleg Nesterov wrote:
> On 03/13, Ravi Bangoria wrote:
>> +static bool sdt_valid_vma(struct trace_uprobe *tu, struct vm_area_struct 
>> *vma)
>> +{
>> +unsigned long vaddr = vma_offset_to_vaddr(vma, tu->ref_ctr_offset);
>> +
>> +return tu->ref_ctr_offset &&
>> +vma->vm_file &&
>> +file_inode(vma->vm_file) == tu->inode &&
>> +vma->vm_flags & VM_WRITE &&
>> +vma->vm_start <= vaddr &&
>> +vma->vm_end > vaddr;
>> +}
> Perhaps in this case a simple
>
>   ref_ctr_offset < vma->vm_end - vma->vm_start
>
> check without vma_offset_to_vaddr() makes more sense, but I won't insist.
>

Hmm... I'm not quite sure. Will rethink and get back to you.

>
>> +static void sdt_increment_ref_ctr(struct trace_uprobe *tu)
>> +{
>> +struct uprobe_map_info *info;
>> +struct vm_area_struct *vma;
>> +unsigned long vaddr;
>> +
>> +uprobe_start_dup_mmap();
>> +info = uprobe_build_map_info(tu->inode->i_mapping,
>> +tu->ref_ctr_offset, false);
> Hmm. This doesn't look right.
>
> If you need to find all mappings (and avoid the races with fork/dup_mmap) you
> need to take this semaphore for writing, uprobe_start_dup_mmap() can't help.

Oops. Yes. Will change it.

Thanks for the review :)
Ravi



Re: [PATCH 5/8] trace_uprobe: Support SDT markers having reference count (semaphore)

2018-03-14 Thread Steven Rostedt
On Tue, 13 Mar 2018 18:26:00 +0530
Ravi Bangoria  wrote:

>  include/linux/uprobes.h |   2 +
>  kernel/events/uprobes.c |   6 ++
>  kernel/trace/trace_uprobe.c | 172 
> +++-

I'm currently traveling, but I'll try to look at it in a week or two.

-- Steve

>  3 files changed, 178 insertions(+), 2 deletions(-)
> 
>


Re: [PATCH 5/8] trace_uprobe: Support SDT markers having reference count (semaphore)

2018-03-14 Thread Steven Rostedt
On Tue, 13 Mar 2018 18:26:00 +0530
Ravi Bangoria  wrote:

>  include/linux/uprobes.h |   2 +
>  kernel/events/uprobes.c |   6 ++
>  kernel/trace/trace_uprobe.c | 172 
> +++-

I'm currently traveling, but I'll try to look at it in a week or two.

-- Steve

>  3 files changed, 178 insertions(+), 2 deletions(-)
> 
>


Re: [PATCH 5/8] trace_uprobe: Support SDT markers having reference count (semaphore)

2018-03-14 Thread Oleg Nesterov
On 03/13, Ravi Bangoria wrote:
>
> +static bool sdt_valid_vma(struct trace_uprobe *tu, struct vm_area_struct 
> *vma)
> +{
> + unsigned long vaddr = vma_offset_to_vaddr(vma, tu->ref_ctr_offset);
> +
> + return tu->ref_ctr_offset &&
> + vma->vm_file &&
> + file_inode(vma->vm_file) == tu->inode &&
> + vma->vm_flags & VM_WRITE &&
> + vma->vm_start <= vaddr &&
> + vma->vm_end > vaddr;
> +}

Perhaps in this case a simple

ref_ctr_offset < vma->vm_end - vma->vm_start

check without vma_offset_to_vaddr() makes more sense, but I won't insist.



> +static void sdt_increment_ref_ctr(struct trace_uprobe *tu)
> +{
> + struct uprobe_map_info *info;
> + struct vm_area_struct *vma;
> + unsigned long vaddr;
> +
> + uprobe_start_dup_mmap();
> + info = uprobe_build_map_info(tu->inode->i_mapping,
> + tu->ref_ctr_offset, false);

Hmm. This doesn't look right.

If you need to find all mappings (and avoid the races with fork/dup_mmap) you
need to take this semaphore for writing, uprobe_start_dup_mmap() can't help.

Oleg.



Re: [PATCH 5/8] trace_uprobe: Support SDT markers having reference count (semaphore)

2018-03-14 Thread Oleg Nesterov
On 03/13, Ravi Bangoria wrote:
>
> +static bool sdt_valid_vma(struct trace_uprobe *tu, struct vm_area_struct 
> *vma)
> +{
> + unsigned long vaddr = vma_offset_to_vaddr(vma, tu->ref_ctr_offset);
> +
> + return tu->ref_ctr_offset &&
> + vma->vm_file &&
> + file_inode(vma->vm_file) == tu->inode &&
> + vma->vm_flags & VM_WRITE &&
> + vma->vm_start <= vaddr &&
> + vma->vm_end > vaddr;
> +}

Perhaps in this case a simple

ref_ctr_offset < vma->vm_end - vma->vm_start

check without vma_offset_to_vaddr() makes more sense, but I won't insist.



> +static void sdt_increment_ref_ctr(struct trace_uprobe *tu)
> +{
> + struct uprobe_map_info *info;
> + struct vm_area_struct *vma;
> + unsigned long vaddr;
> +
> + uprobe_start_dup_mmap();
> + info = uprobe_build_map_info(tu->inode->i_mapping,
> + tu->ref_ctr_offset, false);

Hmm. This doesn't look right.

If you need to find all mappings (and avoid the races with fork/dup_mmap) you
need to take this semaphore for writing, uprobe_start_dup_mmap() can't help.

Oleg.



Re: [PATCH 5/8] trace_uprobe: Support SDT markers having reference count (semaphore)

2018-03-14 Thread Ravi Bangoria
Hi Masami,

On 03/14/2018 07:18 PM, Masami Hiramatsu wrote:
> Hi Ravi,
>
> On Tue, 13 Mar 2018 18:26:00 +0530
> Ravi Bangoria  wrote:
>
>> Userspace Statically Defined Tracepoints[1] are dtrace style markers
>> inside userspace applications. These markers are added by developer at
>> important places in the code. Each marker source expands to a single
>> nop instruction in the compiled code but there may be additional
>> overhead for computing the marker arguments which expands to couple of
>> instructions. In case the overhead is more, execution of it can be
>> ommited by runtime if() condition when no one is tracing on the marker:
>>
>> if (reference_counter > 0) {
>> Execute marker instructions;
>> }
>>
>> Default value of reference counter is 0. Tracer has to increment the
>> reference counter before tracing on a marker and decrement it when
>> done with the tracing.
>>
>> Implement the reference counter logic in trace_uprobe, leaving core
>> uprobe infrastructure as is, except one new callback from uprobe_mmap()
>> to trace_uprobe.
>>
>> trace_uprobe definition with reference counter will now be:
>>
>>   :[(ref_ctr_offset)]
> Would you mean 
> :()
> ?
>
> or use "[]" for delimiter?

[] indicates optional field.

> Since,
>
>> @@ -454,6 +458,26 @@ static int create_trace_uprobe(int argc, char **argv)
>>  goto fail_address_parse;
>>  }
>>  
>> +/* Parse reference counter offset if specified. */
>> +rctr = strchr(arg, '(');
> This seems you choose "()" for delimiter.

Correct.

>> +if (rctr) {
>> +rctr_end = strchr(arg, ')');
>   rctr_end = strchr(rctr, ')');
>
> ? since we are sure rctr != NULL.

Yes. we can use rctr instead of arg.

>> +if (rctr > rctr_end || *(rctr_end + 1) != 0) {
>> +ret = -EINVAL;
>> +pr_info("Invalid reference counter offset.\n");
>> +goto fail_address_parse;
>> +}
>
> Also
>
>> +
>> +*rctr++ = 0;
>> +*rctr_end = 0;
> Please consider to use '\0' for nul;

Sure. Will change it.

Thanks for the review :)
Ravi



Re: [PATCH 5/8] trace_uprobe: Support SDT markers having reference count (semaphore)

2018-03-14 Thread Ravi Bangoria
Hi Masami,

On 03/14/2018 07:18 PM, Masami Hiramatsu wrote:
> Hi Ravi,
>
> On Tue, 13 Mar 2018 18:26:00 +0530
> Ravi Bangoria  wrote:
>
>> Userspace Statically Defined Tracepoints[1] are dtrace style markers
>> inside userspace applications. These markers are added by developer at
>> important places in the code. Each marker source expands to a single
>> nop instruction in the compiled code but there may be additional
>> overhead for computing the marker arguments which expands to couple of
>> instructions. In case the overhead is more, execution of it can be
>> ommited by runtime if() condition when no one is tracing on the marker:
>>
>> if (reference_counter > 0) {
>> Execute marker instructions;
>> }
>>
>> Default value of reference counter is 0. Tracer has to increment the
>> reference counter before tracing on a marker and decrement it when
>> done with the tracing.
>>
>> Implement the reference counter logic in trace_uprobe, leaving core
>> uprobe infrastructure as is, except one new callback from uprobe_mmap()
>> to trace_uprobe.
>>
>> trace_uprobe definition with reference counter will now be:
>>
>>   :[(ref_ctr_offset)]
> Would you mean 
> :()
> ?
>
> or use "[]" for delimiter?

[] indicates optional field.

> Since,
>
>> @@ -454,6 +458,26 @@ static int create_trace_uprobe(int argc, char **argv)
>>  goto fail_address_parse;
>>  }
>>  
>> +/* Parse reference counter offset if specified. */
>> +rctr = strchr(arg, '(');
> This seems you choose "()" for delimiter.

Correct.

>> +if (rctr) {
>> +rctr_end = strchr(arg, ')');
>   rctr_end = strchr(rctr, ')');
>
> ? since we are sure rctr != NULL.

Yes. we can use rctr instead of arg.

>> +if (rctr > rctr_end || *(rctr_end + 1) != 0) {
>> +ret = -EINVAL;
>> +pr_info("Invalid reference counter offset.\n");
>> +goto fail_address_parse;
>> +}
>
> Also
>
>> +
>> +*rctr++ = 0;
>> +*rctr_end = 0;
> Please consider to use '\0' for nul;

Sure. Will change it.

Thanks for the review :)
Ravi



Re: [PATCH 5/8] trace_uprobe: Support SDT markers having reference count (semaphore)

2018-03-14 Thread Masami Hiramatsu
Hi Ravi,

On Tue, 13 Mar 2018 18:26:00 +0530
Ravi Bangoria  wrote:

> Userspace Statically Defined Tracepoints[1] are dtrace style markers
> inside userspace applications. These markers are added by developer at
> important places in the code. Each marker source expands to a single
> nop instruction in the compiled code but there may be additional
> overhead for computing the marker arguments which expands to couple of
> instructions. In case the overhead is more, execution of it can be
> ommited by runtime if() condition when no one is tracing on the marker:
> 
> if (reference_counter > 0) {
> Execute marker instructions;
> }
> 
> Default value of reference counter is 0. Tracer has to increment the
> reference counter before tracing on a marker and decrement it when
> done with the tracing.
> 
> Implement the reference counter logic in trace_uprobe, leaving core
> uprobe infrastructure as is, except one new callback from uprobe_mmap()
> to trace_uprobe.
> 
> trace_uprobe definition with reference counter will now be:
> 
>   :[(ref_ctr_offset)]

Would you mean 
:()
?

or use "[]" for delimiter?

Since,

> @@ -454,6 +458,26 @@ static int create_trace_uprobe(int argc, char **argv)
>   goto fail_address_parse;
>   }
>  
> + /* Parse reference counter offset if specified. */
> + rctr = strchr(arg, '(');

This seems you choose "()" for delimiter.

> + if (rctr) {
> + rctr_end = strchr(arg, ')');

rctr_end = strchr(rctr, ')');

? since we are sure rctr != NULL.

> + if (rctr > rctr_end || *(rctr_end + 1) != 0) {
> + ret = -EINVAL;
> + pr_info("Invalid reference counter offset.\n");
> + goto fail_address_parse;
> + }


Also

> +
> + *rctr++ = 0;
> + *rctr_end = 0;

Please consider to use '\0' for nul;

Thanks,



> + ret = kstrtoul(rctr, 0, _ctr_offset);
> + if (ret) {
> + pr_info("Invalid reference counter offset.\n");
> + goto fail_address_parse;
> + }
> + }
> +
> + /* Parse uprobe offset. */
>   ret = kstrtoul(arg, 0, );
>   if (ret)
>   goto fail_address_parse;
> @@ -488,6 +512,7 @@ static int create_trace_uprobe(int argc, char **argv)
>   goto fail_address_parse;
>   }
>   tu->offset = offset;
> + tu->ref_ctr_offset = ref_ctr_offset;
>   tu->inode = inode;
>   tu->filename = kstrdup(filename, GFP_KERNEL);
>  
> @@ -620,6 +645,8 @@ static int probes_seq_show(struct seq_file *m, void *v)
>   break;
>   }
>   }
> + if (tu->ref_ctr_offset)
> + seq_printf(m, "(0x%lx)", tu->ref_ctr_offset);
>  
>   for (i = 0; i < tu->tp.nr_args; i++)
>   seq_printf(m, " %s=%s", tu->tp.args[i].name, 
> tu->tp.args[i].comm);
> @@ -894,6 +921,139 @@ static void uretprobe_trace_func(struct trace_uprobe 
> *tu, unsigned long func,
>   return trace_handle_return(s);
>  }
>  
> +static bool sdt_valid_vma(struct trace_uprobe *tu, struct vm_area_struct 
> *vma)
> +{
> + unsigned long vaddr = vma_offset_to_vaddr(vma, tu->ref_ctr_offset);
> +
> + return tu->ref_ctr_offset &&
> + vma->vm_file &&
> + file_inode(vma->vm_file) == tu->inode &&
> + vma->vm_flags & VM_WRITE &&
> + vma->vm_start <= vaddr &&
> + vma->vm_end > vaddr;
> +}
> +
> +static struct vm_area_struct *
> +sdt_find_vma(struct mm_struct *mm, struct trace_uprobe *tu)
> +{
> + struct vm_area_struct *tmp;
> +
> + for (tmp = mm->mmap; tmp != NULL; tmp = tmp->vm_next)
> + if (sdt_valid_vma(tu, tmp))
> + return tmp;
> +
> + return NULL;
> +}
> +
> +/*
> + * Reference count gate the invocation of probe. If present,
> + * by default reference count is 0. One needs to increment
> + * it before tracing the probe and decrement it when done.
> + */
> +static int
> +sdt_update_ref_ctr(struct mm_struct *mm, unsigned long vaddr, short d)
> +{
> + void *kaddr;
> + struct page *page;
> + struct vm_area_struct *vma;
> + int ret = 0;
> + unsigned short orig = 0;
> +
> + if (vaddr == 0)
> + return -EINVAL;
> +
> + ret = get_user_pages_remote(NULL, mm, vaddr, 1,
> + FOLL_FORCE | FOLL_WRITE, , , NULL);
> + if (ret <= 0)
> + return ret;
> +
> + kaddr = kmap_atomic(page);
> + memcpy(, kaddr + (vaddr & ~PAGE_MASK), sizeof(orig));
> + orig += d;
> + memcpy(kaddr + (vaddr & ~PAGE_MASK), , sizeof(orig));
> + kunmap_atomic(kaddr);
> +
> + put_page(page);
> + return 0;
> +}
> +
> +static void sdt_increment_ref_ctr(struct trace_uprobe *tu)
> +{
> + struct uprobe_map_info *info;
> + struct vm_area_struct *vma;
> + unsigned long vaddr;
> +
> + uprobe_start_dup_mmap();
> + 

Re: [PATCH 5/8] trace_uprobe: Support SDT markers having reference count (semaphore)

2018-03-14 Thread Masami Hiramatsu
Hi Ravi,

On Tue, 13 Mar 2018 18:26:00 +0530
Ravi Bangoria  wrote:

> Userspace Statically Defined Tracepoints[1] are dtrace style markers
> inside userspace applications. These markers are added by developer at
> important places in the code. Each marker source expands to a single
> nop instruction in the compiled code but there may be additional
> overhead for computing the marker arguments which expands to couple of
> instructions. In case the overhead is more, execution of it can be
> ommited by runtime if() condition when no one is tracing on the marker:
> 
> if (reference_counter > 0) {
> Execute marker instructions;
> }
> 
> Default value of reference counter is 0. Tracer has to increment the
> reference counter before tracing on a marker and decrement it when
> done with the tracing.
> 
> Implement the reference counter logic in trace_uprobe, leaving core
> uprobe infrastructure as is, except one new callback from uprobe_mmap()
> to trace_uprobe.
> 
> trace_uprobe definition with reference counter will now be:
> 
>   :[(ref_ctr_offset)]

Would you mean 
:()
?

or use "[]" for delimiter?

Since,

> @@ -454,6 +458,26 @@ static int create_trace_uprobe(int argc, char **argv)
>   goto fail_address_parse;
>   }
>  
> + /* Parse reference counter offset if specified. */
> + rctr = strchr(arg, '(');

This seems you choose "()" for delimiter.

> + if (rctr) {
> + rctr_end = strchr(arg, ')');

rctr_end = strchr(rctr, ')');

? since we are sure rctr != NULL.

> + if (rctr > rctr_end || *(rctr_end + 1) != 0) {
> + ret = -EINVAL;
> + pr_info("Invalid reference counter offset.\n");
> + goto fail_address_parse;
> + }


Also

> +
> + *rctr++ = 0;
> + *rctr_end = 0;

Please consider to use '\0' for nul;

Thanks,



> + ret = kstrtoul(rctr, 0, _ctr_offset);
> + if (ret) {
> + pr_info("Invalid reference counter offset.\n");
> + goto fail_address_parse;
> + }
> + }
> +
> + /* Parse uprobe offset. */
>   ret = kstrtoul(arg, 0, );
>   if (ret)
>   goto fail_address_parse;
> @@ -488,6 +512,7 @@ static int create_trace_uprobe(int argc, char **argv)
>   goto fail_address_parse;
>   }
>   tu->offset = offset;
> + tu->ref_ctr_offset = ref_ctr_offset;
>   tu->inode = inode;
>   tu->filename = kstrdup(filename, GFP_KERNEL);
>  
> @@ -620,6 +645,8 @@ static int probes_seq_show(struct seq_file *m, void *v)
>   break;
>   }
>   }
> + if (tu->ref_ctr_offset)
> + seq_printf(m, "(0x%lx)", tu->ref_ctr_offset);
>  
>   for (i = 0; i < tu->tp.nr_args; i++)
>   seq_printf(m, " %s=%s", tu->tp.args[i].name, 
> tu->tp.args[i].comm);
> @@ -894,6 +921,139 @@ static void uretprobe_trace_func(struct trace_uprobe 
> *tu, unsigned long func,
>   return trace_handle_return(s);
>  }
>  
> +static bool sdt_valid_vma(struct trace_uprobe *tu, struct vm_area_struct 
> *vma)
> +{
> + unsigned long vaddr = vma_offset_to_vaddr(vma, tu->ref_ctr_offset);
> +
> + return tu->ref_ctr_offset &&
> + vma->vm_file &&
> + file_inode(vma->vm_file) == tu->inode &&
> + vma->vm_flags & VM_WRITE &&
> + vma->vm_start <= vaddr &&
> + vma->vm_end > vaddr;
> +}
> +
> +static struct vm_area_struct *
> +sdt_find_vma(struct mm_struct *mm, struct trace_uprobe *tu)
> +{
> + struct vm_area_struct *tmp;
> +
> + for (tmp = mm->mmap; tmp != NULL; tmp = tmp->vm_next)
> + if (sdt_valid_vma(tu, tmp))
> + return tmp;
> +
> + return NULL;
> +}
> +
> +/*
> + * Reference count gate the invocation of probe. If present,
> + * by default reference count is 0. One needs to increment
> + * it before tracing the probe and decrement it when done.
> + */
> +static int
> +sdt_update_ref_ctr(struct mm_struct *mm, unsigned long vaddr, short d)
> +{
> + void *kaddr;
> + struct page *page;
> + struct vm_area_struct *vma;
> + int ret = 0;
> + unsigned short orig = 0;
> +
> + if (vaddr == 0)
> + return -EINVAL;
> +
> + ret = get_user_pages_remote(NULL, mm, vaddr, 1,
> + FOLL_FORCE | FOLL_WRITE, , , NULL);
> + if (ret <= 0)
> + return ret;
> +
> + kaddr = kmap_atomic(page);
> + memcpy(, kaddr + (vaddr & ~PAGE_MASK), sizeof(orig));
> + orig += d;
> + memcpy(kaddr + (vaddr & ~PAGE_MASK), , sizeof(orig));
> + kunmap_atomic(kaddr);
> +
> + put_page(page);
> + return 0;
> +}
> +
> +static void sdt_increment_ref_ctr(struct trace_uprobe *tu)
> +{
> + struct uprobe_map_info *info;
> + struct vm_area_struct *vma;
> + unsigned long vaddr;
> +
> + uprobe_start_dup_mmap();
> + info = 

[PATCH 5/8] trace_uprobe: Support SDT markers having reference count (semaphore)

2018-03-13 Thread Ravi Bangoria
Userspace Statically Defined Tracepoints[1] are dtrace style markers
inside userspace applications. These markers are added by developer at
important places in the code. Each marker source expands to a single
nop instruction in the compiled code but there may be additional
overhead for computing the marker arguments which expands to couple of
instructions. In case the overhead is more, execution of it can be
ommited by runtime if() condition when no one is tracing on the marker:

if (reference_counter > 0) {
Execute marker instructions;
}

Default value of reference counter is 0. Tracer has to increment the
reference counter before tracing on a marker and decrement it when
done with the tracing.

Implement the reference counter logic in trace_uprobe, leaving core
uprobe infrastructure as is, except one new callback from uprobe_mmap()
to trace_uprobe.

trace_uprobe definition with reference counter will now be:

  :[(ref_ctr_offset)]

There are two different cases while enabling the marker,
 1. Trace existing process. In this case, find all suitable processes
and increment the reference counter in them.
 2. Enable trace before running target binary. In this case, all mmaps
will get notified to trace_uprobe and trace_uprobe will increment
the reference counter if corresponding uprobe is enabled.

At the time of disabling probes, decrement reference counter in all
existing target processes.

[1] https://sourceware.org/systemtap/wiki/UserSpaceProbeImplementation

Note: 'reference counter' is called as 'semaphore' in original Dtrace
(or Systemtap, bcc and even in ELF) documentation and code. But the
term 'semaphore' is misleading in this context. This is just a counter
used to hold number of tracers tracing on a marker. This is not really
used for any synchronization. So we are referring it as 'reference
counter' in kernel / perf code.

Signed-off-by: Ravi Bangoria 
Signed-off-by: Fengguang Wu 
[Fengguang reported/fixed build failure in RFC patch]
---
 include/linux/uprobes.h |   2 +
 kernel/events/uprobes.c |   6 ++
 kernel/trace/trace_uprobe.c | 172 +++-
 3 files changed, 178 insertions(+), 2 deletions(-)

diff --git a/include/linux/uprobes.h b/include/linux/uprobes.h
index 7bd2760..2d4df65 100644
--- a/include/linux/uprobes.h
+++ b/include/linux/uprobes.h
@@ -122,6 +122,8 @@ struct uprobe_map_info {
unsigned long vaddr;
 };
 
+extern void (*uprobe_mmap_callback)(struct vm_area_struct *vma);
+
 extern int set_swbp(struct arch_uprobe *aup, struct mm_struct *mm, unsigned 
long vaddr);
 extern int set_orig_insn(struct arch_uprobe *aup, struct mm_struct *mm, 
unsigned long vaddr);
 extern bool is_swbp_insn(uprobe_opcode_t *insn);
diff --git a/kernel/events/uprobes.c b/kernel/events/uprobes.c
index e7830b8..06821bb 100644
--- a/kernel/events/uprobes.c
+++ b/kernel/events/uprobes.c
@@ -1041,6 +1041,9 @@ static void build_probe_list(struct inode *inode,
spin_unlock(_treelock);
 }
 
+/* Rightnow the only user of this is trace_uprobe. */
+void (*uprobe_mmap_callback)(struct vm_area_struct *vma);
+
 /*
  * Called from mmap_region/vma_adjust with mm->mmap_sem acquired.
  *
@@ -1053,6 +1056,9 @@ int uprobe_mmap(struct vm_area_struct *vma)
struct uprobe *uprobe, *u;
struct inode *inode;
 
+   if (uprobe_mmap_callback)
+   uprobe_mmap_callback(vma);
+
if (no_uprobe_events() || !valid_vma(vma, true))
return 0;
 
diff --git a/kernel/trace/trace_uprobe.c b/kernel/trace/trace_uprobe.c
index 2014f43..b6c9b48 100644
--- a/kernel/trace/trace_uprobe.c
+++ b/kernel/trace/trace_uprobe.c
@@ -25,6 +25,8 @@
 #include 
 #include 
 #include 
+#include 
+#include 
 
 #include "trace_probe.h"
 
@@ -58,6 +60,7 @@ struct trace_uprobe {
struct inode*inode;
char*filename;
unsigned long   offset;
+   unsigned long   ref_ctr_offset;
unsigned long   nhit;
struct trace_probe  tp;
 };
@@ -362,10 +365,10 @@ static int create_trace_uprobe(int argc, char **argv)
 {
struct trace_uprobe *tu;
struct inode *inode;
-   char *arg, *event, *group, *filename;
+   char *arg, *event, *group, *filename, *rctr, *rctr_end;
char buf[MAX_EVENT_NAME_LEN];
struct path path;
-   unsigned long offset;
+   unsigned long offset, ref_ctr_offset;
bool is_delete, is_return;
int i, ret;
 
@@ -375,6 +378,7 @@ static int create_trace_uprobe(int argc, char **argv)
is_return = false;
event = NULL;
group = NULL;
+   ref_ctr_offset = 0;
 
/* argc must be >= 1 */
if (argv[0][0] == '-')
@@ -454,6 +458,26 @@ static int create_trace_uprobe(int argc, char **argv)
goto fail_address_parse;
}
 
+   /* 

[PATCH 5/8] trace_uprobe: Support SDT markers having reference count (semaphore)

2018-03-13 Thread Ravi Bangoria
Userspace Statically Defined Tracepoints[1] are dtrace style markers
inside userspace applications. These markers are added by developer at
important places in the code. Each marker source expands to a single
nop instruction in the compiled code but there may be additional
overhead for computing the marker arguments which expands to couple of
instructions. In case the overhead is more, execution of it can be
ommited by runtime if() condition when no one is tracing on the marker:

if (reference_counter > 0) {
Execute marker instructions;
}

Default value of reference counter is 0. Tracer has to increment the
reference counter before tracing on a marker and decrement it when
done with the tracing.

Implement the reference counter logic in trace_uprobe, leaving core
uprobe infrastructure as is, except one new callback from uprobe_mmap()
to trace_uprobe.

trace_uprobe definition with reference counter will now be:

  :[(ref_ctr_offset)]

There are two different cases while enabling the marker,
 1. Trace existing process. In this case, find all suitable processes
and increment the reference counter in them.
 2. Enable trace before running target binary. In this case, all mmaps
will get notified to trace_uprobe and trace_uprobe will increment
the reference counter if corresponding uprobe is enabled.

At the time of disabling probes, decrement reference counter in all
existing target processes.

[1] https://sourceware.org/systemtap/wiki/UserSpaceProbeImplementation

Note: 'reference counter' is called as 'semaphore' in original Dtrace
(or Systemtap, bcc and even in ELF) documentation and code. But the
term 'semaphore' is misleading in this context. This is just a counter
used to hold number of tracers tracing on a marker. This is not really
used for any synchronization. So we are referring it as 'reference
counter' in kernel / perf code.

Signed-off-by: Ravi Bangoria 
Signed-off-by: Fengguang Wu 
[Fengguang reported/fixed build failure in RFC patch]
---
 include/linux/uprobes.h |   2 +
 kernel/events/uprobes.c |   6 ++
 kernel/trace/trace_uprobe.c | 172 +++-
 3 files changed, 178 insertions(+), 2 deletions(-)

diff --git a/include/linux/uprobes.h b/include/linux/uprobes.h
index 7bd2760..2d4df65 100644
--- a/include/linux/uprobes.h
+++ b/include/linux/uprobes.h
@@ -122,6 +122,8 @@ struct uprobe_map_info {
unsigned long vaddr;
 };
 
+extern void (*uprobe_mmap_callback)(struct vm_area_struct *vma);
+
 extern int set_swbp(struct arch_uprobe *aup, struct mm_struct *mm, unsigned 
long vaddr);
 extern int set_orig_insn(struct arch_uprobe *aup, struct mm_struct *mm, 
unsigned long vaddr);
 extern bool is_swbp_insn(uprobe_opcode_t *insn);
diff --git a/kernel/events/uprobes.c b/kernel/events/uprobes.c
index e7830b8..06821bb 100644
--- a/kernel/events/uprobes.c
+++ b/kernel/events/uprobes.c
@@ -1041,6 +1041,9 @@ static void build_probe_list(struct inode *inode,
spin_unlock(_treelock);
 }
 
+/* Rightnow the only user of this is trace_uprobe. */
+void (*uprobe_mmap_callback)(struct vm_area_struct *vma);
+
 /*
  * Called from mmap_region/vma_adjust with mm->mmap_sem acquired.
  *
@@ -1053,6 +1056,9 @@ int uprobe_mmap(struct vm_area_struct *vma)
struct uprobe *uprobe, *u;
struct inode *inode;
 
+   if (uprobe_mmap_callback)
+   uprobe_mmap_callback(vma);
+
if (no_uprobe_events() || !valid_vma(vma, true))
return 0;
 
diff --git a/kernel/trace/trace_uprobe.c b/kernel/trace/trace_uprobe.c
index 2014f43..b6c9b48 100644
--- a/kernel/trace/trace_uprobe.c
+++ b/kernel/trace/trace_uprobe.c
@@ -25,6 +25,8 @@
 #include 
 #include 
 #include 
+#include 
+#include 
 
 #include "trace_probe.h"
 
@@ -58,6 +60,7 @@ struct trace_uprobe {
struct inode*inode;
char*filename;
unsigned long   offset;
+   unsigned long   ref_ctr_offset;
unsigned long   nhit;
struct trace_probe  tp;
 };
@@ -362,10 +365,10 @@ static int create_trace_uprobe(int argc, char **argv)
 {
struct trace_uprobe *tu;
struct inode *inode;
-   char *arg, *event, *group, *filename;
+   char *arg, *event, *group, *filename, *rctr, *rctr_end;
char buf[MAX_EVENT_NAME_LEN];
struct path path;
-   unsigned long offset;
+   unsigned long offset, ref_ctr_offset;
bool is_delete, is_return;
int i, ret;
 
@@ -375,6 +378,7 @@ static int create_trace_uprobe(int argc, char **argv)
is_return = false;
event = NULL;
group = NULL;
+   ref_ctr_offset = 0;
 
/* argc must be >= 1 */
if (argv[0][0] == '-')
@@ -454,6 +458,26 @@ static int create_trace_uprobe(int argc, char **argv)
goto fail_address_parse;
}
 
+   /* Parse reference counter offset if specified. */
+   rctr