from:"Li Bin"

[PATCH] perf svghelper: fix memory leak in svg_build_topology_map

2020-06-02 Thread Li Bin

Fix leak of memory pointed to by t.sib_thr and t.sib_core in
svg_build_topology_map in the non-error path.

Signed-off-by: Li Bin 
---
 tools/perf/util/svghelper.c | 6 +++---
 1 file changed, 3 insertions(+), 3 deletions(-)

diff --git a/tools/perf/util/svghelper.c b/tools/perf/util/svghelper.c
index 96f941e..d50955f 100644
--- a/tools/perf/util/svghelper.c
+++ b/tools/perf/util/svghelper.c
@@ -754,6 +754,7 @@ int svg_build_topology_map(struct perf_env *env)
int i, nr_cpus;
struct topology t;
char *sib_core, *sib_thr;
+   int ret = -1;
 
nr_cpus = min(env->nr_cpus_online, MAX_NR_CPUS);
 
@@ -798,12 +799,11 @@ int svg_build_topology_map(struct perf_env *env)
topology_map[i] = -1;
 
scan_core_topology(topology_map, &t, nr_cpus);
-
-   return 0;
+   ret = 0;
 
 exit:
zfree(&t.sib_core);
zfree(&t.sib_thr);
 
-   return -1;
+   return ret;
 }
-- 
1.7.12.4

[PATCH] sched: fix typo in error message

2018-04-24 Thread Li Bin


Signed-off-by: Li Bin 
---
 kernel/sched/topology.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/kernel/sched/topology.c b/kernel/sched/topology.c
index 64cc564..cf15c1c 100644
--- a/kernel/sched/topology.c
+++ b/kernel/sched/topology.c
@@ -1618,7 +1618,7 @@ static struct sched_domain *build_sched_domain(struct 
sched_domain_topology_leve
 
if (!cpumask_subset(sched_domain_span(child),
sched_domain_span(sd))) {
-   pr_err("BUG: arch topology borken\n");
+   pr_err("BUG: arch topology broken\n");
 #ifdef CONFIG_SCHED_DEBUG
pr_err(" the %s domain not a subset of the %s 
domain\n",
child->name, sd->name);
-- 
1.7.12.4

[PATCH v2] prctl: fix compat handling for prctl

2018-04-18 Thread Li Bin

The member auxv in prctl_mm_map structure which be shared with
userspace is pointer type, but the kernel supporting COMPAT didn't
handle it. This patch fix the compat handling for prctl syscall.

Signed-off-by: Li Bin 
---
 kernel/sys.c | 42 ++
 1 file changed, 42 insertions(+)

diff --git a/kernel/sys.c b/kernel/sys.c
index ad69218..d4259938 100644
--- a/kernel/sys.c
+++ b/kernel/sys.c
@@ -1969,6 +1969,26 @@ static int validate_prctl_map(struct prctl_mm_map 
*prctl_map)
 }
 
 #ifdef CONFIG_CHECKPOINT_RESTORE
+
+#ifdef CONFIG_COMPAT
+struct compat_prctl_mm_map {
+   __u64   start_code; /* code section bounds */
+   __u64   end_code;
+   __u64   start_data; /* data section bounds */
+   __u64   end_data;
+   __u64   start_brk;  /* heap for brk() syscall */
+   __u64   brk;
+   __u64   start_stack;/* stack starts at */
+   __u64   arg_start;  /* command line arguments bounds */
+   __u64   arg_end;
+   __u64   env_start;  /* environment variables bounds */
+   __u64   env_end;
+   compat_uptr_t   auxv;   /* auxiliary vector */
+   __u32   auxv_size;  /* vector size */
+   __u32   exe_fd; /* /proc/$pid/exe link file */
+};
+#endif
+
 static int prctl_set_mm_map(int opt, const void __user *addr, unsigned long 
data_size)
 {
struct prctl_mm_map prctl_map = { .exe_fd = (u32)-1, };
@@ -1986,6 +2006,28 @@ static int prctl_set_mm_map(int opt, const void __user 
*addr, unsigned long data
if (data_size != sizeof(prctl_map))
return -EINVAL;
 
+#ifdef CONFIG_COMPAT
+   if (in_compat_syscall()) {
+   struct compat_prctl_mm_map prctl_map32;
+   if (copy_from_user(&prctl_map32, addr, sizeof(prctl_map32)))
+   return -EFAULT;
+
+   prctl_map.start_code = prctl_map32.start_code;
+   prctl_map.end_code = prctl_map32.end_code;
+   prctl_map.start_data = prctl_map32.start_data;
+   prctl_map.end_data = prctl_map32.end_data;
+   prctl_map.start_brk = prctl_map32.start_brk;
+   prctl_map.brk = prctl_map32.brk;
+   prctl_map.start_stack = prctl_map32.start_stack;
+   prctl_map.arg_start = prctl_map32.arg_start;
+   prctl_map.arg_end = prctl_map32.arg_end;
+   prctl_map.env_start = prctl_map32.env_start;
+   prctl_map.env_end = prctl_map32.env_end;
+   prctl_map.auxv = compat_ptr(prctl_map32.auxv);
+   prctl_map.auxv_size = prctl_map32.auxv_size;
+   prctl_map.exe_fd = prctl_map32.exe_fd;
+   } else
+#endif
if (copy_from_user(&prctl_map, addr, sizeof(prctl_map)))
return -EFAULT;
 
-- 
1.7.12.4

[PATCH] prctl: fix compat handling for prctl

2018-04-17 Thread Li Bin

The member auxv in prctl_mm_map structure which be shared with
userspace is pointer type, but the kernel supporting COMPAT didn't
handle it. This patch fix the compat handling for prctl syscall.

Signed-off-by: Li Bin 
---
 kernel/sys.c | 41 +
 1 file changed, 41 insertions(+)

diff --git a/kernel/sys.c b/kernel/sys.c
index ad69218..03b9731 100644
--- a/kernel/sys.c
+++ b/kernel/sys.c
@@ -1968,6 +1968,25 @@ static int validate_prctl_map(struct prctl_mm_map 
*prctl_map)
return error;
 }
 
+#ifdef CONFIG_COMPAT
+struct compat_prctl_mm_map {
+   __u64   start_code; /* code section bounds */
+   __u64   end_code;
+   __u64   start_data; /* data section bounds */
+   __u64   end_data;
+   __u64   start_brk;  /* heap for brk() syscall */
+   __u64   brk;
+   __u64   start_stack;/* stack starts at */
+   __u64   arg_start;  /* command line arguments bounds */
+   __u64   arg_end;
+   __u64   env_start;  /* environment variables bounds */
+   __u64   env_end;
+   compat_uptr_t   auxv;   /* auxiliary vector */
+   __u32   auxv_size;  /* vector size */
+   __u32   exe_fd; /* /proc/$pid/exe link file */
+};
+#endif
+
 #ifdef CONFIG_CHECKPOINT_RESTORE
 static int prctl_set_mm_map(int opt, const void __user *addr, unsigned long 
data_size)
 {
@@ -1986,6 +2005,28 @@ static int prctl_set_mm_map(int opt, const void __user 
*addr, unsigned long data
if (data_size != sizeof(prctl_map))
return -EINVAL;
 
+#ifdef CONFIG_COMPAT
+   if (is_compat_task()) {
+   struct compat_prctl_mm_map prctl_map32;
+   if (copy_from_user(&prctl_map32, addr, sizeof(prctl_map32)))
+   return -EFAULT;
+
+   prctl_map.start_code = prctl_map32.start_code;
+   prctl_map.end_code = prctl_map32.end_code;
+   prctl_map.start_data = prctl_map32.start_data;
+   prctl_map.end_data = prctl_map32.end_data;
+   prctl_map.start_brk = prctl_map32.start_brk;
+   prctl_map.brk = prctl_map32.brk;
+   prctl_map.start_stack = prctl_map32.start_stack;
+   prctl_map.arg_start = prctl_map32.arg_start;
+   prctl_map.arg_end = prctl_map32.arg_end;
+   prctl_map.env_start = prctl_map32.env_start;
+   prctl_map.env_end = prctl_map32.env_end;
+   prctl_map.auxv = compat_ptr(prctl_map32.auxv);
+   prctl_map.auxv_size = prctl_map32.auxv_size;
+   prctl_map.exe_fd = prctl_map32.exe_fd;
+   } else
+#endif
if (copy_from_user(&prctl_map, addr, sizeof(prctl_map)))
return -EFAULT;
 
-- 
1.7.12.4

[PATCH v2 1/2] sched/rt.c: pick and check task if double_lock_balance() unlock the rq

2018-04-12 Thread Li Bin

From: Zhou Chengming 

push_rt_task() pick the first pushable task and find an eligible
lowest_rq, then double_lock_balance(rq, lowest_rq). So if
double_lock_balance() unlock the rq (when double_lock_balance() return 1),
we have to check if this task is still on the rq.

The problem is that the check conditions are not sufficient:

if (unlikely(task_rq(task) != rq ||
 !cpumask_test_cpu(lowest_rq->cpu, &task->cpus_allowed) ||
 task_running(rq, task) ||
 !rt_task(task) ||
 !task_on_rq_queued(task))) {

cpu2cpu1cpu0
push_rt_task(rq1)
  pick task_A on rq1
  find rq0
double_lock_balance(rq1, rq0)
  unlock(rq1)
rq1 __schedule
  pick task_A run
task_A sleep (dequeued)
  lock(rq0)
  lock(rq1)
do_above_check(task_A)
  task_rq(task_A) == rq1
  cpus_allowed unchanged
  task_running == false
  rt_task(task_A) == true
try_to_wake_up(task_A)
  select_cpu = cpu3
  enqueue(rq3, task_A)
  task_A->on_rq = 1
  task_on_rq_queued(task_A)
above_check passed, return rq0
...
migrate task_A from rq1 to rq0

So we can't rely on these checks of task_A to make sure the task_A is
still on the rq1, even though we hold the rq1->lock. This patch will
repick the first pushable task to be sure the task is still on the rq.

Signed-off-by: Zhou Chengming 
Signed-off-by: Li Bin 
Acked-by: Peter Zijlstra (Intel) 
Reviewed-by: Steven Rostedt (VMware) 
---
 kernel/sched/rt.c | 50 --
 1 file changed, 24 insertions(+), 26 deletions(-)

diff --git a/kernel/sched/rt.c b/kernel/sched/rt.c
index aad49451..ff3bfce 100644
--- a/kernel/sched/rt.c
+++ b/kernel/sched/rt.c
@@ -1695,6 +1695,26 @@ static int find_lowest_rq(struct task_struct *task)
return -1;
 }
 
+static struct task_struct *pick_next_pushable_task(struct rq *rq)
+{
+   struct task_struct *p;
+
+   if (!has_pushable_tasks(rq))
+   return NULL;
+
+   p = plist_first_entry(&rq->rt.pushable_tasks,
+ struct task_struct, pushable_tasks);
+
+   BUG_ON(rq->cpu != task_cpu(p));
+   BUG_ON(task_current(rq, p));
+   BUG_ON(p->nr_cpus_allowed <= 1);
+
+   BUG_ON(!task_on_rq_queued(p));
+   BUG_ON(!rt_task(p));
+
+   return p;
+}
+
 /* Will lock the rq it finds */
 static struct rq *find_lock_lowest_rq(struct task_struct *task, struct rq *rq)
 {
@@ -1722,17 +1742,15 @@ static struct rq *find_lock_lowest_rq(struct 
task_struct *task, struct rq *rq)
 
/* if the prio of this runqueue changed, try again */
if (double_lock_balance(rq, lowest_rq)) {
+   struct task_struct *next_task;
/*
 * We had to unlock the run queue. In
 * the mean time, task could have
 * migrated already or had its affinity changed.
-* Also make sure that it wasn't scheduled on its rq.
 */
-   if (unlikely(task_rq(task) != rq ||
-!cpumask_test_cpu(lowest_rq->cpu, 
&task->cpus_allowed) ||
-task_running(rq, task) ||
-!rt_task(task) ||
-!task_on_rq_queued(task))) {
+   next_task = pick_next_pushable_task(rq);
+   if (unlikely(next_task != task ||
+!cpumask_test_cpu(lowest_rq->cpu, 
&task->cpus_allowed))) {
 
double_unlock_balance(rq, lowest_rq);
lowest_rq = NULL;
@@ -1752,26 +1770,6 @@ static struct rq *find_lock_lowest_rq(struct task_struct 
*task, struct rq *rq)
return lowest_rq;
 }
 
-static struct task_struct *pick_next_pushable_task(struct rq *rq)
-{
-   struct task_struct *p;
-
-   if (!has_pushable_tasks(rq))
-   return NULL;
-
-   p = plist_first_entry(&rq->rt.pushable_tasks,
- struct task_struct, pushable_tasks);
-
-   BUG_ON(rq->cpu != task_cpu(p));
-   BUG_ON(task_current(rq, p));
-   BUG_ON(p->nr_cpus_allowed <= 1);
-
-   BUG_ON(!task_on_rq_queued(p));
-   BUG_ON(!rt_task(p));
-
-   return p;
-}
-
 /*
  * If the current CPU has more than one RT task, see if the non
  * running task can migrate over to a CPU that is running a task
-- 
1.7.12.4

[PATCH v2 2/2] sched/deadline.c: pick and check task if double_lock_balance() unlock the rq

2018-04-12 Thread Li Bin

push_dl_task() pick the first pushable task and find an eligible
lowest_rq, then double_lock_balance(rq, lowest_rq). So if
double_lock_balance() unlock the rq (when double_lock_balance() return
1), we have to check if this task is still on the rq.

The problem is that the check conditions are not sufficient:

if (unlikely(task_rq(task) != rq ||
 !cpumask_test_cpu(later_rq->cpu, &task->cpus_allowed) ||
 task_running(rq, task) ||
 !dl_task(task) ||
 !task_on_rq_queued(task))) {

cpu2cpu1cpu0
push_dl_task(rq1)
  pick task_A on rq1
  find rq0
double_lock_balance(rq1, rq0)
  unlock(rq1)
rq1 __schedule
  pick task_A run
task_A sleep (dequeued)
  lock(rq0)
  lock(rq1)
do_above_check(task_A)
  task_rq(task_A) == rq1
  cpus_allowed unchanged
  task_running == false
  dl_task(task_A) == true
try_to_wake_up(task_A)
  select_cpu = cpu3
  enqueue(rq3, task_A)
  task_A->on_rq = 1
  task_on_rq_queued(task_A)
above_check passed, return rq0
...
migrate task_A from rq1 to rq0

So we can't rely on these checks of task_A to make sure the task_A is
still on the rq1, even though we hold the rq1->lock. This patch will
repick the first pushable task to be sure the task is still on the rq.

Signed-off-by: Li Bin 
Acked-by: Peter Zijlstra (Intel) 
Reviewed-by: Steven Rostedt (VMware) 
---
 kernel/sched/deadline.c | 55 +++--
 1 file changed, 30 insertions(+), 25 deletions(-)

diff --git a/kernel/sched/deadline.c b/kernel/sched/deadline.c
index 9df0978..8e0f6a4 100644
--- a/kernel/sched/deadline.c
+++ b/kernel/sched/deadline.c
@@ -1936,6 +1936,26 @@ static int find_later_rq(struct task_struct *task)
return -1;
 }
 
+static struct task_struct *pick_next_pushable_dl_task(struct rq *rq)
+{
+   struct task_struct *p;
+
+   if (!has_pushable_dl_tasks(rq))
+   return NULL;
+
+   p = rb_entry(rq->dl.pushable_dl_tasks_root.rb_leftmost,
+struct task_struct, pushable_dl_tasks);
+
+   BUG_ON(rq->cpu != task_cpu(p));
+   BUG_ON(task_current(rq, p));
+   BUG_ON(p->nr_cpus_allowed <= 1);
+
+   BUG_ON(!task_on_rq_queued(p));
+   BUG_ON(!dl_task(p));
+
+   return p;
+}
+
 /* Locks the rq it finds */
 static struct rq *find_lock_later_rq(struct task_struct *task, struct rq *rq)
 {
@@ -1965,11 +1985,16 @@ static struct rq *find_lock_later_rq(struct task_struct 
*task, struct rq *rq)
 
/* Retry if something changed. */
if (double_lock_balance(rq, later_rq)) {
-   if (unlikely(task_rq(task) != rq ||
-!cpumask_test_cpu(later_rq->cpu, 
&task->cpus_allowed) ||
-task_running(rq, task) ||
-!dl_task(task) ||
-!task_on_rq_queued(task))) {
+   struct task_struct *next_task;
+   /*
+* We had to unlock the run queue. In
+* the mean time, task could have
+* migrated already or had its affinity changed.
+* Also make sure that it wasn't scheduled on its rq.
+*/
+   next_task = pick_next_pushable_dl_task(rq);
+   if (unlikely(next_task != task ||
+   
!cpumask_test_cpu(later_rq->cpu, &task->cpus_allowed))) {
double_unlock_balance(rq, later_rq);
later_rq = NULL;
break;
@@ -1994,26 +2019,6 @@ static struct rq *find_lock_later_rq(struct task_struct 
*task, struct rq *rq)
return later_rq;
 }
 
-static struct task_struct *pick_next_pushable_dl_task(struct rq *rq)
-{
-   struct task_struct *p;
-
-   if (!has_pushable_dl_tasks(rq))
-   return NULL;
-
-   p = rb_entry(rq->dl.pushable_dl_tasks_root.rb_leftmost,
-struct task_struct, pushable_dl_tasks);
-
-   BUG_ON(rq->cpu != task_cpu(p));
-   BUG_ON(task_current(rq, p));
-   BUG_ON(p->nr_cpus_allowed <= 1);
-
-   BUG_ON(!task_on_rq_queued(p));
-   BUG_ON(!dl_task(p));
-
-   return p;
-}
-
 /*
  * See if the non running -deadline tasks on this rq
  * can be sent to some other CPU where they can preempt
-- 
1.7.12.4

[PATCH v2 0/2] sched: pick and check task if double_lock_balance() unlock the rq

2018-04-12 Thread Li Bin

Changes in v2:
* Only change the comment and coding style as suggested by Steve

Li Bin (1):
  sched/deadline.c: pick and check task if double_lock_balance() unlock
the rq

Zhou Chengming (1):
  sched/rt.c: pick and check task if double_lock_balance() unlock the
rq

 kernel/sched/deadline.c | 55 +++--
 kernel/sched/rt.c   | 50 +---
 2 files changed, 54 insertions(+), 51 deletions(-)

-- 
1.7.12.4

[PATCH 2/2] sched/deadline.c: pick and check task if double_lock_balance() unlock the rq

2018-04-12 Thread Li Bin

push_dl_task() pick the first pushable task and find an eligible
later_rq, then double_lock_balance(rq, lowest_rq). So if
double_lock_balance() unlock the rq (when double_lock_balance() return
1), we have to check if this task is still on the rq.

The problem is that the check conditions are not sufficient:

if (unlikely(task_rq(task) != rq ||
 !cpumask_test_cpu(later_rq->cpu, &task->cpus_allowed) ||
 task_running(rq, task) ||
 !dl_task(task) ||
 !task_on_rq_queued(task))) {

cpu2cpu1cpu0
push_dl_task(rq1)
  pick task_A on rq1
  find rq0
double_lock_balance(rq1, rq0)
  unlock(rq1)
rq1 __schedule
  pick task_A run
task_A sleep (dequeued)
  lock(rq0)
  lock(rq1)
do_above_check(task_A)
  task_rq(task_A) == rq1
  cpus_allowed unchanged
  task_running == false
  dl_task(task_A) == true
try_to_wake_up(task_A)
  select_cpu = cpu3
  enqueue(rq3, task_A)
  task_A->on_rq = 1
  task_on_rq_queued(task_A)
above_check passed, return rq0
...
migrate task_A from rq1 to rq0

So we can't rely on these checks of task_A to make sure the task_A is
still on the rq1, even though we hold the rq1->lock. This patch will
repick the first pushable task to be sure the task is still on the rq.

Signed-off-by: Li Bin 
---
 kernel/sched/deadline.c | 48 +++-
 1 file changed, 23 insertions(+), 25 deletions(-)

diff --git a/kernel/sched/deadline.c b/kernel/sched/deadline.c
index 9df0978..87cd7ca 100644
--- a/kernel/sched/deadline.c
+++ b/kernel/sched/deadline.c
@@ -1936,6 +1936,26 @@ static int find_later_rq(struct task_struct *task)
return -1;
 }
 
+static struct task_struct *pick_next_pushable_dl_task(struct rq *rq)
+{
+   struct task_struct *p;
+
+   if (!has_pushable_dl_tasks(rq))
+   return NULL;
+
+   p = rb_entry(rq->dl.pushable_dl_tasks_root.rb_leftmost,
+struct task_struct, pushable_dl_tasks);
+
+   BUG_ON(rq->cpu != task_cpu(p));
+   BUG_ON(task_current(rq, p));
+   BUG_ON(p->nr_cpus_allowed <= 1);
+
+   BUG_ON(!task_on_rq_queued(p));
+   BUG_ON(!dl_task(p));
+
+   return p;
+}
+
 /* Locks the rq it finds */
 static struct rq *find_lock_later_rq(struct task_struct *task, struct rq *rq)
 {
@@ -1965,11 +1985,9 @@ static struct rq *find_lock_later_rq(struct task_struct 
*task, struct rq *rq)
 
/* Retry if something changed. */
if (double_lock_balance(rq, later_rq)) {
-   if (unlikely(task_rq(task) != rq ||
-!cpumask_test_cpu(later_rq->cpu, 
&task->cpus_allowed) ||
-task_running(rq, task) ||
-!dl_task(task) ||
-!task_on_rq_queued(task))) {
+   struct task_struct *next_task = 
pick_next_pushable_dl_task(rq);
+   if (unlikely(next_task != task ||
+   
!cpumask_test_cpu(later_rq->cpu, &task->cpus_allowed))) {
double_unlock_balance(rq, later_rq);
later_rq = NULL;
break;
@@ -1994,26 +2012,6 @@ static struct rq *find_lock_later_rq(struct task_struct 
*task, struct rq *rq)
return later_rq;
 }
 
-static struct task_struct *pick_next_pushable_dl_task(struct rq *rq)
-{
-   struct task_struct *p;
-
-   if (!has_pushable_dl_tasks(rq))
-   return NULL;
-
-   p = rb_entry(rq->dl.pushable_dl_tasks_root.rb_leftmost,
-struct task_struct, pushable_dl_tasks);
-
-   BUG_ON(rq->cpu != task_cpu(p));
-   BUG_ON(task_current(rq, p));
-   BUG_ON(p->nr_cpus_allowed <= 1);
-
-   BUG_ON(!task_on_rq_queued(p));
-   BUG_ON(!dl_task(p));
-
-   return p;
-}
-
 /*
  * See if the non running -deadline tasks on this rq
  * can be sent to some other CPU where they can preempt
-- 
1.7.12.4

[PATCH 1/2] sched/rt.c: pick and check task if double_lock_balance() unlock the rq

2018-04-12 Thread Li Bin

From: Zhou Chengming 

push_rt_task() pick the first pushable task and find an eligible
lowest_rq, then double_lock_balance(rq, lowest_rq). So if
double_lock_balance() unlock the rq (when double_lock_balance() return 1),
we have to check if this task is still on the rq.

The problem is that the check conditions are not sufficient:

if (unlikely(task_rq(task) != rq ||
 !cpumask_test_cpu(lowest_rq->cpu, &task->cpus_allowed) ||
 task_running(rq, task) ||
 !rt_task(task) ||
 !task_on_rq_queued(task))) {

cpu2cpu1cpu0
push_rt_task(rq1)
  pick task_A on rq1
  find rq0
double_lock_balance(rq1, rq0)
  unlock(rq1)
rq1 __schedule
  pick task_A run
task_A sleep (dequeued)
  lock(rq0)
  lock(rq1)
do_above_check(task_A)
  task_rq(task_A) == rq1
  cpus_allowed unchanged
  task_running == false
  rt_task(task_A) == true
try_to_wake_up(task_A)
  select_cpu = cpu3
  enqueue(rq3, task_A)
  task_A->on_rq = 1
  task_on_rq_queued(task_A)
above_check passed, return rq0
...
migrate task_A from rq1 to rq0

So we can't rely on these checks of task_A to make sure the task_A is
still on the rq1, even though we hold the rq1->lock. This patch will
repick the first pushable task to be sure the task is still on the rq.

Signed-off-by: Zhou Chengming 
Signed-off-by: Li Bin 
---
 kernel/sched/rt.c | 49 +++--
 1 file changed, 23 insertions(+), 26 deletions(-)

diff --git a/kernel/sched/rt.c b/kernel/sched/rt.c
index aad49451..e51d574 100644
--- a/kernel/sched/rt.c
+++ b/kernel/sched/rt.c
@@ -1695,6 +1695,26 @@ static int find_lowest_rq(struct task_struct *task)
return -1;
 }
 
+static struct task_struct *pick_next_pushable_task(struct rq *rq)
+{
+   struct task_struct *p;
+
+   if (!has_pushable_tasks(rq))
+   return NULL;
+
+   p = plist_first_entry(&rq->rt.pushable_tasks,
+ struct task_struct, pushable_tasks);
+
+   BUG_ON(rq->cpu != task_cpu(p));
+   BUG_ON(task_current(rq, p));
+   BUG_ON(p->nr_cpus_allowed <= 1);
+
+   BUG_ON(!task_on_rq_queued(p));
+   BUG_ON(!rt_task(p));
+
+   return p;
+}
+
 /* Will lock the rq it finds */
 static struct rq *find_lock_lowest_rq(struct task_struct *task, struct rq *rq)
 {
@@ -1726,13 +1746,10 @@ static struct rq *find_lock_lowest_rq(struct 
task_struct *task, struct rq *rq)
 * We had to unlock the run queue. In
 * the mean time, task could have
 * migrated already or had its affinity changed.
-* Also make sure that it wasn't scheduled on its rq.
 */
-   if (unlikely(task_rq(task) != rq ||
-!cpumask_test_cpu(lowest_rq->cpu, 
&task->cpus_allowed) ||
-task_running(rq, task) ||
-!rt_task(task) ||
-!task_on_rq_queued(task))) {
+   struct task_struct *next_task = 
pick_next_pushable_task(rq);
+   if (unlikely(next_task != task ||
+!cpumask_test_cpu(lowest_rq->cpu, 
&task->cpus_allowed))) {
 
double_unlock_balance(rq, lowest_rq);
lowest_rq = NULL;
@@ -1752,26 +1769,6 @@ static struct rq *find_lock_lowest_rq(struct task_struct 
*task, struct rq *rq)
return lowest_rq;
 }
 
-static struct task_struct *pick_next_pushable_task(struct rq *rq)
-{
-   struct task_struct *p;
-
-   if (!has_pushable_tasks(rq))
-   return NULL;
-
-   p = plist_first_entry(&rq->rt.pushable_tasks,
- struct task_struct, pushable_tasks);
-
-   BUG_ON(rq->cpu != task_cpu(p));
-   BUG_ON(task_current(rq, p));
-   BUG_ON(p->nr_cpus_allowed <= 1);
-
-   BUG_ON(!task_on_rq_queued(p));
-   BUG_ON(!rt_task(p));
-
-   return p;
-}
-
 /*
  * If the current CPU has more than one RT task, see if the non
  * running task can migrate over to a CPU that is running a task
-- 
1.7.12.4

[PATCH 0/2] sched: pick and check task if double_lock_balance() unlock the rq

2018-04-12 Thread Li Bin

Li Bin (1):
  sched/deadline.c: pick and check task if double_lock_balance() unlock
the rq

Zhou Chengming (1):
  sched/rt.c: pick and check task if double_lock_balance() unlock the
rq

 kernel/sched/deadline.c | 48 +++-
 kernel/sched/rt.c   | 49 +++--
 2 files changed, 46 insertions(+), 51 deletions(-)

-- 
1.7.12.4

[PATCH v2] workqueue: Fix NULL pointer dereference

2017-10-27 Thread Li Bin

When queue_work() is used in irq (not in task context), there is
a potential case that trigger NULL pointer dereference.

worker_thread()
|-spin_lock_irq()
|-process_one_work()
|-worker->current_pwq = pwq
|-spin_unlock_irq()
|-worker->current_func(work)
|-spin_lock_irq()
|-worker->current_pwq = NULL
|-spin_unlock_irq()

//interrupt here
|-irq_handler
|-__queue_work()
//assuming that the wq is 
draining
|-is_chained_work(wq)
|-current_wq_worker()
//Here, 'current' is 
the interrupted worker!

|-current->current_pwq is NULL here!
|-schedule()


Avoid it by checking for task context in current_wq_worker(), and
if not in task context, we shouldn't use the 'current' to check the
condition.

Reported-by: Xiaofei Tan 
Signed-off-by: Li Bin 
---
 kernel/workqueue_internal.h | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

diff --git a/kernel/workqueue_internal.h b/kernel/workqueue_internal.h
index 8635417..29fa81f 100644
--- a/kernel/workqueue_internal.h
+++ b/kernel/workqueue_internal.h
@@ -9,6 +9,7 @@
 
 #include 
 #include 
+#include 
 
 struct worker_pool;
 
@@ -59,7 +60,7 @@ struct worker {
  */
 static inline struct worker *current_wq_worker(void)
 {
-   if (current->flags & PF_WQ_WORKER)
+   if (in_task() && (current->flags & PF_WQ_WORKER))
return kthread_data(current);
return NULL;
 }
-- 
1.7.12.4

Re: [PATCH] workqueue: Fix NULL pointer dereference

2017-10-26 Thread Li Bin


Hi, Jiangshan

on 2017/10/26 23:55, Lai Jiangshan wrote:
> On Tue, Oct 24, 2017 at 9:18 AM, Li Bin  wrote:

> 
> I remember that softirq can be invoked when irq_eixt(),
> and in this case the current->current_pwq is also NULL
> if __queue_work() is called in the soft irq.
> 
> So in_task() might be better than !in_irq() for the fix?
> 

Good catch, I will fix it and resend the patch.

Thanks,
Li Bin

>> |-schedule()
>> 
>>
>> Avoid it by checking for irq context in current_wq_worker(), and
>> if in irq context, we shouldn't use the 'current' to check the
>> condition.
>>
>> Reported-by: Xiaofei Tan 
>> Signed-off-by: Li Bin 
>> ---
>>  kernel/workqueue_internal.h | 3 ++-
>>  1 file changed, 2 insertions(+), 1 deletion(-)
>>
>> diff --git a/kernel/workqueue_internal.h b/kernel/workqueue_internal.h
>> index 8635417..d81cb9b 100644
>> --- a/kernel/workqueue_internal.h
>> +++ b/kernel/workqueue_internal.h
>> @@ -9,6 +9,7 @@
>>
>>  #include 
>>  #include 
>> +#include 
>>
>>  struct worker_pool;
>>
>> @@ -59,7 +60,7 @@ struct worker {
>>   */
>>  static inline struct worker *current_wq_worker(void)
>>  {
>> -   if (current->flags & PF_WQ_WORKER)
>> +   if (!in_irq() && (current->flags & PF_WQ_WORKER))
>> return kthread_data(current);
>> return NULL;
>>  }
>> --
>> 1.7.12.4
>>
> 
> .
>

[PATCH] workqueue: Fix NULL pointer dereference

2017-10-23 Thread Li Bin

When queue_work() is used in irq handler, there is a potential
case that trigger NULL pointer dereference.

worker_thread()
|-spin_lock_irq()
|-process_one_work()
|-worker->current_pwq = pwq
|-spin_unlock_irq()
|-worker->current_func(work)
|-spin_lock_irq()
|-worker->current_pwq = NULL
|-spin_unlock_irq()

//interrupt here
|-irq_handler
|-__queue_work()
//assuming that the wq is 
draining
|-is_chained_work(wq)
|-current_wq_worker()
//Here, 'current' is 
the interrupted worker!

|-current->current_pwq is NULL here!
|-schedule()


Avoid it by checking for irq context in current_wq_worker(), and
if in irq context, we shouldn't use the 'current' to check the
condition.

Reported-by: Xiaofei Tan 
Signed-off-by: Li Bin 
---
 kernel/workqueue_internal.h | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

diff --git a/kernel/workqueue_internal.h b/kernel/workqueue_internal.h
index 8635417..d81cb9b 100644
--- a/kernel/workqueue_internal.h
+++ b/kernel/workqueue_internal.h
@@ -9,6 +9,7 @@
 
 #include 
 #include 
+#include 
 
 struct worker_pool;
 
@@ -59,7 +60,7 @@ struct worker {
  */
 static inline struct worker *current_wq_worker(void)
 {
-   if (current->flags & PF_WQ_WORKER)
+   if (!in_irq() && (current->flags & PF_WQ_WORKER))
return kthread_data(current);
return NULL;
 }
-- 
1.7.12.4

Re: [Question] null pointer risk of kernel workqueue

2017-10-23 Thread Li Bin



on 2017/10/23 22:03, Tejun Heo wrote:
>>
>> And I think the following patch can solve the bug, right?
>>
>> diff --git a/kernel/workqueue_internal.h b/kernel/workqueue_internal.h
>> index 8635417..650680c 100644
>> --- a/kernel/workqueue_internal.h
>> +++ b/kernel/workqueue_internal.h
>> @@ -59,7 +59,7 @@ struct worker {
>>   */
>>  static inline struct worker *current_wq_worker(void)
>>  {
>> -   if (current->flags & PF_WQ_WORKER)
>> +   if (!in_irq() && (current->flags & PF_WQ_WORKER))
>> return kthread_data(current);
>> return NULL;
>>  }
> 
> Yeah, that makes sense to me.  Can you please resend the patch with
> patch description and SOB?

Ok, I will resend the patch soon.

Thanks,
Li Bin

> 
> Thanks.
>

Re: [Question] null pointer risk of kernel workqueue

2017-10-22 Thread Li Bin



on 2017/10/21 23:35, Tejun Heo wrote:
> On Fri, Oct 20, 2017 at 02:57:18PM +0800, tanxiaofei wrote:
>> Hi Tejun,
>>
>> Any comments about this?
> 
> I think you're confused, or at least can't understand what you're
> trying to say.  Can you create a rero?
> 

Hi Tejun,
The case is as following:

worker_thread()
|-spin_lock_irq()
|-process_one_work()
|-worker->current_pwq = pwq
|-spin_unlock_irq()
|-worker->current_func(work)
|-spin_lock_irq()
|-worker->current_pwq = NULL
|-spin_unlock_irq()
//interrupt 
here

|-irq_handler

|-__queue_work()

//assuming that the wq is draining

|-if (unlikely(wq->flags & __WQ_DRAINING) &&WARN_ON_ONCE(!is_chained_work(wq)))

|-is_chained_work(wq)

|-current_wq_worker() // Here, 'current' is the interrupted worker!

|-current->current_pwq is NULL here!
|-schedule()

And I think the following patch can solve the bug, right?

diff --git a/kernel/workqueue_internal.h b/kernel/workqueue_internal.h
index 8635417..650680c 100644
--- a/kernel/workqueue_internal.h
+++ b/kernel/workqueue_internal.h
@@ -59,7 +59,7 @@ struct worker {
  */
 static inline struct worker *current_wq_worker(void)
 {
-   if (current->flags & PF_WQ_WORKER)
+   if (!in_irq() && (current->flags & PF_WQ_WORKER))
return kthread_data(current);
return NULL;
 }


Thanks,
Li Bin

> Thanks.
>

[tip:perf/core] perf symbols: Fix plt entry calculation for ARM and AARCH64

2017-08-29 Thread tip-bot for Li Bin

Commit-ID:  b2f7605076d6cdd68162c42c34caadafbbe4c69f
Gitweb: http://git.kernel.org/tip/b2f7605076d6cdd68162c42c34caadafbbe4c69f
Author: Li Bin 
AuthorDate: Mon, 5 Jun 2017 08:34:09 +0800
Committer:  Arnaldo Carvalho de Melo 
CommitDate: Tue, 29 Aug 2017 11:41:27 -0300

perf symbols: Fix plt entry calculation for ARM and AARCH64

On x86, the plt header size is as same as the plt entry size, and can be
identified from shdr's sh_entsize of the plt.

But we can't assume that the sh_entsize of the plt shdr is always the
plt entry size in all architecture, and the plt header size may be not
as same as the plt entry size in some architecure.

On ARM, the plt header size is 20 bytes and the plt entry size is 12
bytes (don't consider the FOUR_WORD_PLT case) that refer to the binutils
implementation. The plt section is as follows:

Disassembly of section .plt:
04a0 <__cxa_finalize@plt-0x14>:
 4a0:   e52de004push{lr}; (str lr, [sp, #-4]!)
 4a4:   e59fe004ldr lr, [pc, #4]; 4b0 <_init+0x1c>
 4a8:   e08fe00eadd lr, pc, lr
 4ac:   e5bef008ldr pc, [lr, #8]!
 4b0:   8424.word   0x8424

04b4 <__cxa_finalize@plt>:
 4b4:   e28fc600add ip, pc, #0, 12
 4b8:   e28cca08add ip, ip, #8, 20  ; 0x8000
 4bc:   e5bcf424ldr pc, [ip, #1060]!; 0x424

04c0 :
 4c0:   e28fc600add ip, pc, #0, 12
 4c4:   e28cca08add ip, ip, #8, 20  ; 0x8000
 4c8:   e5bcf41cldr pc, [ip, #1052]!; 0x41c

On AARCH64, the plt header size is 32 bytes and the plt entry size is 16
bytes.  The plt section is as follows:

Disassembly of section .plt:
0560 <__cxa_finalize@plt-0x20>:
 560:   a9bf7bf0stp x16, x30, [sp,#-16]!
 564:   9090adrpx16, 1 <__FRAME_END__+0xf8a8>
 568:   f944be11ldr x17, [x16,#2424]
 56c:   9125e210add x16, x16, #0x978
 570:   d61f0220br  x17
 574:   d503201fnop
 578:   d503201fnop
 57c:   d503201fnop

0580 <__cxa_finalize@plt>:
 580:   9090adrpx16, 1 <__FRAME_END__+0xf8a8>
 584:   f944c211ldr x17, [x16,#2432]
 588:   91260210add x16, x16, #0x980
 58c:   d61f0220br  x17

0590 <__gmon_start__@plt>:
 590:   9090adrpx16, 1 <__FRAME_END__+0xf8a8>
 594:   f944c611ldr x17, [x16,#2440]
 598:   91262210add x16, x16, #0x988
 59c:   d61f0220br  x17

NOTES:

In addition to ARM and AARCH64, other architectures, such as
s390/alpha/mips/parisc/poperpc/sh/sparc/xtensa also need to consider
this issue.

Signed-off-by: Li Bin 
Acked-by: Namhyung Kim 
Cc: Alexander Shishkin 
Cc: Alexis Berlemont 
Cc: David Tolnay 
Cc: Hanjun Guo 
Cc: Hemant Kumar 
Cc: Masami Hiramatsu 
Cc: Milian Wolff 
Cc: Namhyung Kim 
Cc: Peter Zijlstra 
Cc: Wang Nan 
Cc: zhangmengt...@huawei.com
Link: 
http://lkml.kernel.org/r/1496622849-21877-1-git-send-email-huawei.li...@huawei.com
Signed-off-by: Arnaldo Carvalho de Melo 
---
 tools/perf/util/symbol-elf.c | 27 ++-
 1 file changed, 22 insertions(+), 5 deletions(-)

diff --git a/tools/perf/util/symbol-elf.c b/tools/perf/util/symbol-elf.c
index a704790..5c39f42 100644
--- a/tools/perf/util/symbol-elf.c
+++ b/tools/perf/util/symbol-elf.c
@@ -259,7 +259,7 @@ int dso__synthesize_plt_symbols(struct dso *dso, struct 
symsrc *ss, struct map *
 {
uint32_t nr_rel_entries, idx;
GElf_Sym sym;
-   u64 plt_offset;
+   u64 plt_offset, plt_header_size, plt_entry_size;
GElf_Shdr shdr_plt;
struct symbol *f;
GElf_Shdr shdr_rel_plt, shdr_dynsym;
@@ -326,6 +326,23 @@ int dso__synthesize_plt_symbols(struct dso *dso, struct 
symsrc *ss, struct map *
 
nr_rel_entries = shdr_rel_plt.sh_size / shdr_rel_plt.sh_entsize;
plt_offset = shdr_plt.sh_offset;
+   switch (ehdr.e_machine) {
+   case EM_ARM:
+   plt_header_size = 20;
+   plt_entry_size = 12;
+   break;
+
+   case EM_AARCH64:
+   plt_header_size = 32;
+   plt_entry_size = 16;
+   break;
+
+   default: /* FIXME: 
s390/alpha/mips/parisc/poperpc/sh/sparc/xtensa need to be checked */
+   plt_header_size = shdr_plt.sh_entsize;
+   plt_entry_size = shdr_plt.sh_entsize;
+   break;
+   }
+   plt_offset += plt_header_size;
 
if (shdr_rel_plt.sh_type == SHT_RELA) {
GElf_Rela pos_mem, *pos;
@@ -335,7 +352,6 @@ int dso__synthesize_plt_symbols(struct dso *dso, struct 
symsrc *ss, struct map *
const char *elf_name = NULL;
char *dema

[tip:perf/core] perf probe: Fix kprobe blacklist checking condition

2017-08-29 Thread tip-bot for Li Bin

Commit-ID:  2c29461e273abaf149cf8220c3403e9d67dd8b61
Gitweb: http://git.kernel.org/tip/2c29461e273abaf149cf8220c3403e9d67dd8b61
Author: Li Bin 
AuthorDate: Tue, 29 Aug 2017 20:57:23 +0800
Committer:  Arnaldo Carvalho de Melo 
CommitDate: Tue, 29 Aug 2017 11:14:12 -0300

perf probe: Fix kprobe blacklist checking condition

The commit 9aaf5a5f479b ("perf probe: Check kprobes blacklist when
adding new events"), 'perf probe' supports checking the blacklist of the
fuctions which can not be probed.  But the checking condition is wrong,
that the end_addr of the symbol which is the start_addr of the next
symbol can't be included.

Committer notes:

IOW make it match its kernel counterpart in kernel/kprobes.c:

  bool within_kprobe_blacklist(unsigned long addr)

Each entry have as its end address not its end address, but the first
address _outside_ that symbol, which for related functions, is the first
address of the next symbol, like these from kernel/trace/trace_probe.c:

0xbd198df0-0xbd198e40   print_type_u8
0xbd198e40-0xbd198e90   print_type_u16
0xbd198e90-0xbd198ee0   print_type_u32
0xbd198ee0-0xbd198f30   print_type_u64
0xbd198f30-0xbd198f80   print_type_s8
0xbd198f80-0xbd198fd0   print_type_s16
0xbd198fd0-0xbd199020   print_type_s32
0xbd199020-0xbd199070   print_type_s64
0xbd199070-0xbd1990c0   print_type_x8
0xbd1990c0-0xbd199110   print_type_x16
0xbd199110-0xbd199160   print_type_x32
0xbd199160-0xbd1991b0   print_type_x64

But not always:

0xbd1997b0-0xbd1997c0   fetch_kernel_stack_address 
(kernel/trace/trace_probe.c)
0xbd1c57f0-0xbd1c58b0   __context_tracking_enter   
(kernel/context_tracking.c)

Signed-off-by: Li Bin 
Cc: Masami Hiramatsu 
Cc: Namhyung Kim 
Cc: Peter Zijlstra 
Cc: Wang Nan 
Cc: zhangmengt...@huawei.com
Fixes: 9aaf5a5f479b ("perf probe: Check kprobes blacklist when adding new 
events")
Link: 
http://lkml.kernel.org/r/1504011443-7269-1-git-send-email-huawei.li...@huawei.com
Signed-off-by: Arnaldo Carvalho de Melo 
---
 tools/perf/util/probe-event.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/tools/perf/util/probe-event.c b/tools/perf/util/probe-event.c
index d7cd114..b7aaf9b 100644
--- a/tools/perf/util/probe-event.c
+++ b/tools/perf/util/probe-event.c
@@ -2395,7 +2395,7 @@ kprobe_blacklist__find_by_address(struct list_head 
*blacklist,
struct kprobe_blacklist_node *node;
 
list_for_each_entry(node, blacklist, list) {
-   if (node->start <= address && address <= node->end)
+   if (node->start <= address && address < node->end)
return node;
}

[PATCH] perf probe: Fix kprobe blacklist checking condition

2017-08-29 Thread Li Bin

The commit 9aaf5a5("perf probe: Check kprobes blacklist
when adding new events"), perf probe supports checking
the blacklist of the fuctions which can not be probed.
But the checking condition is wrong, that the end_addr
of the symbol which is the start_addr of the next symbol
can't be included.

Signed-off-by: Li Bin 
---
 tools/perf/util/probe-event.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/tools/perf/util/probe-event.c b/tools/perf/util/probe-event.c
index a2670e9..bf7c928 100644
--- a/tools/perf/util/probe-event.c
+++ b/tools/perf/util/probe-event.c
@@ -2373,7 +2373,7 @@ static int kprobe_blacklist__load(struct list_head 
*blacklist)
struct kprobe_blacklist_node *node;
 
list_for_each_entry(node, blacklist, list) {
-   if (node->start <= address && address <= node->end)
+   if (node->start <= address && address < node->end)
return node;
}
 
-- 
1.7.12.4

[PATCH] perf symbols: Fix plt entry calculation for ARM and AARCH64

2017-06-04 Thread Li Bin

On x86, the plt header size is as same as the plt entry size, and
can be identified from shdr's sh_entsize of the plt.
But we cann't assume that the sh_entsize of the plt shdr is always
the plt entry size in all architecture, and the plt header size may
be not as same as the plt entry size in some architecure.

On ARM, the plt header size is 20 bytes and the plt entry size is 12
bytes(don't consider the FOUR_WORD_PLT case)
that refer to the binutils implementation.
plt section is as follows:

Disassembly of section .plt:
04a0 <__cxa_finalize@plt-0x14>:
 4a0:   e52de004push{lr}; (str lr, [sp, #-4]!)
 4a4:   e59fe004ldr lr, [pc, #4]; 4b0 <_init+0x1c>
 4a8:   e08fe00eadd lr, pc, lr
 4ac:   e5bef008ldr pc, [lr, #8]!
 4b0:   8424.word   0x8424

04b4 <__cxa_finalize@plt>:
 4b4:   e28fc600add ip, pc, #0, 12
 4b8:   e28cca08add ip, ip, #8, 20  ; 0x8000
 4bc:   e5bcf424ldr pc, [ip, #1060]!; 0x424

04c0 :
 4c0:   e28fc600add ip, pc, #0, 12
 4c4:   e28cca08add ip, ip, #8, 20  ; 0x8000
 4c8:   e5bcf41cldr pc, [ip, #1052]!; 0x41c

On AARCH64, the plt header size is 32 bytes and the plt entry size is
16 bytes.
plt section is as follows:

Disassembly of section .plt:
0560 <__cxa_finalize@plt-0x20>:
 560:   a9bf7bf0stp x16, x30, [sp,#-16]!
 564:   9090adrpx16, 1 <__FRAME_END__+0xf8a8>
 568:   f944be11ldr x17, [x16,#2424]
 56c:   9125e210add x16, x16, #0x978
 570:   d61f0220br  x17
 574:   d503201fnop
 578:   d503201fnop
 57c:   d503201fnop

0580 <__cxa_finalize@plt>:
 580:   9090adrpx16, 1 <__FRAME_END__+0xf8a8>
 584:   f944c211ldr x17, [x16,#2432]
 588:   91260210add x16, x16, #0x980
 58c:   d61f0220br  x17

0590 <__gmon_start__@plt>:
 590:   9090adrpx16, 1 <__FRAME_END__+0xf8a8>
 594:   f944c611ldr x17, [x16,#2440]
 598:   91262210add x16, x16, #0x988
 59c:   d61f0220br  x17

NOTES:
In addition to ARM and AARCH64, other architectures, such as
s390/alpha/mips/parisc/poperpc/sh/sparc/xtensa also need to consider
this issue.

Signed-off-by: Li Bin 
---
 tools/perf/util/symbol-elf.c | 27 ++-
 1 file changed, 22 insertions(+), 5 deletions(-)

diff --git a/tools/perf/util/symbol-elf.c b/tools/perf/util/symbol-elf.c
index e7ee47f..1bccf71 100644
--- a/tools/perf/util/symbol-elf.c
+++ b/tools/perf/util/symbol-elf.c
@@ -259,7 +259,7 @@ int dso__synthesize_plt_symbols(struct dso *dso, struct 
symsrc *ss, struct map *
 {
uint32_t nr_rel_entries, idx;
GElf_Sym sym;
-   u64 plt_offset;
+   u64 plt_offset, plt_header_size, plt_entry_size;
GElf_Shdr shdr_plt;
struct symbol *f;
GElf_Shdr shdr_rel_plt, shdr_dynsym;
@@ -326,6 +326,23 @@ int dso__synthesize_plt_symbols(struct dso *dso, struct 
symsrc *ss, struct map *
 
nr_rel_entries = shdr_rel_plt.sh_size / shdr_rel_plt.sh_entsize;
plt_offset = shdr_plt.sh_offset;
+   switch (ehdr.e_machine) {
+   case EM_ARM:
+   plt_header_size = 20;
+   plt_entry_size = 12;
+   break;
+
+   case EM_AARCH64:
+   plt_header_size = 32;
+   plt_entry_size = 16;
+   break;
+
+   default: /* FIXME: 
s390/alpha/mips/parisc/poperpc/sh/sparc/xtensa need to be checked */
+   plt_header_size = shdr_plt.sh_entsize;
+   plt_entry_size = shdr_plt.sh_entsize;
+   break;
+   }
+   plt_offset += plt_header_size;
 
if (shdr_rel_plt.sh_type == SHT_RELA) {
GElf_Rela pos_mem, *pos;
@@ -335,7 +352,6 @@ int dso__synthesize_plt_symbols(struct dso *dso, struct 
symsrc *ss, struct map *
const char *elf_name = NULL;
char *demangled = NULL;
symidx = GELF_R_SYM(pos->r_info);
-   plt_offset += shdr_plt.sh_entsize;
gelf_getsym(syms, symidx, &sym);
 
elf_name = elf_sym__name(&sym, symstrs);
@@ -346,11 +362,12 @@ int dso__synthesize_plt_symbols(struct dso *dso, struct 
symsrc *ss, struct map *
 "%s@plt", elf_name);
free(demangled);
 
-   f = symbol__new(plt_offset, shdr_plt.sh_entsize,
+   f = symbol__new(plt_offset, plt_entry_size,
STB_GLOBAL, sympltname);
if (!f)

Re: [PATCH] reduce the time of finding symbols for module

2017-03-28 Thread Li Bin

Hi,

on 2017/3/29 8:03, Jessica Yu wrote:
> +++ Miroslav Benes [28/03/17 13:16 +0200]:
>> On Tue, 28 Mar 2017, zhouchengming wrote:
>>
>>> On 2017/3/28 17:00, Miroslav Benes wrote:
>>> >
>>> > Hi,
>>> >
>>> > On Tue, 28 Mar 2017, Zhou Chengming wrote:
>>> >
>>> > > It's reported that the time of insmoding a klp.ko for one of our
>>> > > out-tree modules is too long.
>>> > >
>>> > > ~ time sudo insmod klp.ko
>>> > > real0m23.799s
>>> > > user0m0.036s
>>> > > sys0m21.256s
>>> >
>>> > Is this stable through several (>=10) runs? 23 seconds are really
>>> > suspicious. Yes, there is a linear search through all the kallsyms in
>>> > kallsyms_on_each_symbol(), but there are something like 70k symbols on my
>>> > machine (that is, way less than 1M). 23 seconds are somewhat unexpected.
>>> >
>>>
>>> Yes, it's stable through several runs.
>>>
>>> I think the big reason is that our out-tree module used a lot of static 
>>> local
>>> variables. We can see '.rela.kpatch.dynrelas' contains many entries, so it
>>> will
>>> waste a lot of time if we use kallsyms_on_each_symbol() to find these 
>>> symbols
>>> of module.
>>
>> Ok, it means that you have a lot of relocation records which reference
>> your out-of-tree module. Then for each such entry klp_resolve_symbol()
>> is called and then klp_find_object_symbol() to actually resolve it. So if
>> you have 20k entries, you walk through vmlinux kallsyms table 20k times.
>> It is unneeded and that is why your fix works.
>>
>> But if there were 20k modules loaded, the problem would still be there.
>>
>> I think it would be really nice to fix kallsyms :). Replace ordinary array
>> and the linear search with a hash table.
>>
>>> Relocation section '.rela.kpatch.funcs' at offset 0x382e0 contains 3 
>>> entries:
>>>   Offset  Info   Type   Sym. ValueSym. Name +
>>> Addend
>>>   00330101 R_AARCH64_ABS64    value_show + 0
>>> 0020  000b0101 R_AARCH64_ABS64    
>>> .kpatch.strings
>>> + 8
>>> 0028  000b0101 R_AARCH64_ABS64    
>>> .kpatch.strings
>>> + 0
>>
>> Hm, we do not have aarch64 support in upstream (yet). There is even no
>> dynamic ftrace with regs yet (if I am not mistaken).
> 
> I'm curious, how was this tested? Since there is no dynamic ftrace
> with regs and no livepatch stubs (klp_arch_set_pc, etc) implemented
> yet for aarch64. Also, livepatch has switched from klp_relocs/dynrelas
> to .klp.rela. sections since 4.7, so I'm curious how your patch module
> has a .kpatch.dynrelas section working with livepatch.
> 
> Unrelated to this patch, if there is a working aarch64 livepatch port (and
> kpatch build tool, it seems) floating out there, it would be
> wonderful to push that upstream :-)

Yeah, from 2014, we started to work on livepatch support on aarch64, and
in May 2015, we pushed the solution to the livepatch community[1] and gcc
community (mfentry feature on aarch64)[2]. And then, there were an another
gcc solution from linaro [3], which proposes to implement a new option
-fprolog-pad=N that generate a pad of N nops at the beginning of each
function, and AFAIK, Torsten Duwe from SUSE is still discussing this method
with gcc community.

At this stage, we are validating the livepatch support on aarch64 based on
aarch64 mfentry feature. When the community has a clear plan, we are happy
to make adaptation and contribute our related work to the community, including
the kpatch-build support :-)

[1] livepatch: add support on arm64
https://lkml.org/lkml/2015/5/28/54
[2] [AArch64] support -mfentry feature for arm64
https://gcc.gnu.org/ml/gcc-patches/2016-03/msg00756.html
[3] Kernel livepatching support in GCC
https://gcc.gnu.org/ml/gcc/2015-05/msg00267.html
[4] arm64: ftrace with regs for livepatch support
http://lists.infradead.org/pipermail/linux-arm-kernel/2016-January/401352.html

Thanks,
Li Bin

> 
> Jessica
> 
> .
>

Re: [PATCH v13 01/10] arm64: Add HAVE_REGS_AND_STACK_ACCESS_API feature

2016-06-19 Thread Li Bin



on 2016/6/3 11:26, David Long wrote:
> From: "David A. Long" 
> 
> Add HAVE_REGS_AND_STACK_ACCESS_API feature for arm64, including supporting
> functions and defines.
> 
> Signed-off-by: David A. Long 
> ---
>  arch/arm64/Kconfig  |   1 +
>  arch/arm64/include/asm/ptrace.h |  54 +-
>  arch/arm64/kernel/ptrace.c  | 118 
> 
>  3 files changed, 172 insertions(+), 1 deletion(-)
> 
> diff --git a/arch/arm64/Kconfig b/arch/arm64/Kconfig
> index 76747d9..0f7a624 100644
> --- a/arch/arm64/Kconfig
> +++ b/arch/arm64/Kconfig
> @@ -85,6 +85,7 @@ config ARM64
>   select HAVE_PERF_EVENTS
>   select HAVE_PERF_REGS
>   select HAVE_PERF_USER_STACK_DUMP
> + select HAVE_REGS_AND_STACK_ACCESS_API
>   select HAVE_RCU_TABLE_FREE
>   select HAVE_SYSCALL_TRACEPOINTS
>   select IOMMU_DMA if IOMMU_SUPPORT
> diff --git a/arch/arm64/include/asm/ptrace.h b/arch/arm64/include/asm/ptrace.h
> index a307eb6..c130f2b 100644
> --- a/arch/arm64/include/asm/ptrace.h
> +++ b/arch/arm64/include/asm/ptrace.h
> @@ -74,6 +74,7 @@
>  #define COMPAT_PT_DATA_ADDR  0x10004
>  #define COMPAT_PT_TEXT_END_ADDR  0x10008
>  #ifndef __ASSEMBLY__
> +#include 
>  
>  /* sizeof(struct user) for AArch32 */
>  #define COMPAT_USER_SZ   296
> @@ -119,6 +120,8 @@ struct pt_regs {
>   u64 syscallno;
>  };
>  
> +#define MAX_REG_OFFSET offsetof(struct pt_regs, pstate)
> +
>  #define arch_has_single_step()   (1)
>  
>  #ifdef CONFIG_COMPAT
> @@ -147,6 +150,55 @@ struct pt_regs {
>  #define user_stack_pointer(regs) \
>   (!compat_user_mode(regs) ? (regs)->sp : (regs)->compat_sp)
>  
> +extern int regs_query_register_offset(const char *name);
> +extern const char *regs_query_register_name(unsigned int offset);
> +extern bool regs_within_kernel_stack(struct pt_regs *regs, unsigned long 
> addr);
> +extern unsigned long regs_get_kernel_stack_nth(struct pt_regs *regs,
> +unsigned int n);
> +
> +/**
> + * regs_get_register() - get register value from its offset
> + * @regs:   pt_regs from which register value is gotten
> + * @offset:offset of the register.
> + *
> + * regs_get_register returns the value of a register whose offset from @regs.
> + * The @offset is the offset of the register in struct pt_regs.
> + * If @offset is bigger than MAX_REG_OFFSET, this returns 0.
> + */
> +static inline u64 regs_get_register(struct pt_regs *regs,
> +   unsigned int offset)
> +{
> + u64 val = 0;
> +
> + WARN_ON(offset & 7);
> +
> + offset >>= 3;

Here, the offset is 3-bits right shifted. So,

> + switch (offset) {
> + case0 ... 30:
> + val = regs->regs[offset];
> + break;
> + case offsetof(struct pt_regs, sp):

here should be shifted too, as

case offsetof(struct pt_regs, sp) >> 3:


> + val = regs->sp;
> + break;
> + case offsetof(struct pt_regs, pc):

and here,

> + val = regs->pc;
> + break;
> + case offsetof(struct pt_regs, pstate):

and here.

thanks,
Li Bin

> + val = regs->pstate;
> + break;
> + default:
> + val = 0;
> + }
> +
> + return val;
> +}
> +
> +/* Valid only for Kernel mode traps. */
> +static inline unsigned long kernel_stack_pointer(struct pt_regs *regs)
> +{
> + return regs->sp;
> +}
> +
>  static inline unsigned long regs_return_value(struct pt_regs *regs)
>  {
>   return regs->regs[0];
> @@ -156,7 +208,7 @@ static inline unsigned long regs_return_value(struct 
> pt_regs *regs)
>  struct task_struct;
>  int valid_user_regs(struct user_pt_regs *regs, struct task_struct *task);
>  
> -#define instruction_pointer(regs)((unsigned long)(regs)->pc)
> +#define instruction_pointer(regs)((regs)->pc)
>  
>  extern unsigned long profile_pc(struct pt_regs *regs);
>  
> diff --git a/arch/arm64/kernel/ptrace.c b/arch/arm64/kernel/ptrace.c
> index 3f6cd5c..2c88c33 100644
> --- a/arch/arm64/kernel/ptrace.c
> +++ b/arch/arm64/kernel/ptrace.c
> @@ -48,6 +48,124 @@
>  #define CREATE_TRACE_POINTS
>  #include 
>  
> +struct pt_regs_offset {
> + const char *name;
> + int offset;
> +};
> +
> +#define REG_OFFSET_NAME(r) {.name = #r, .offset = offsetof(struct pt_regs, 
> r)}
> +#define REG_OFFSET_END {.name = NULL, .offset = 0}
> +#define  GPR_OFFSET_NAME(r)  \
> + {.name = "x" #r, .offset = offs

Re: [PATCH v12 00/10] arm64: Add kernel probes (kprobes) support

2016-05-11 Thread Li Bin

@ int __kprobes setjmp_pre_handler(struct kprobe *p, struct 
pt_regs *regs)

instruction_pointer_set(regs, (long)jp->entry);
preempt_disable();
+   pause_graph_tracing();
return 1;
 }

@@ -757,6 +758,7 @@ int __kprobes longjmp_break_handler(struct kprobe *p, 
struct pt_regs *regs)
show_regs(regs);
BUG();
}
+   unpause_graph_tracing();
*regs = kcb->jprobe_saved_regs;
memcpy((void *)stack_addr, kcb->jprobes_stack,
   MIN_STACK_SIZE(stack_addr));


Li Bin

> 
> Thanks,
> 
> 
> James
> 
> 
> 
> 
> ___
> linux-arm-kernel mailing list
> linux-arm-ker...@lists.infradead.org
> http://lists.infradead.org/mailman/listinfo/linux-arm-kernel
> 
> .
>

Re: [PATCH v11 5/9] arm64: Kprobes with single stepping support

2016-04-19 Thread Li Bin

Hi David,

on 2016/3/9 13:32, David Long wrote:
> +int __kprobes arch_prepare_kprobe(struct kprobe *p)
> +{
> + unsigned long probe_addr = (unsigned long)p->addr;

Here should verify the addr alignment:

if (probe_addr & 0x3)
return -EINVAL;

Thanks,
Li Bin

> +
> + /* copy instruction */
> + p->opcode = le32_to_cpu(*p->addr);
> +
> + if (in_exception_text(probe_addr))
> + return -EINVAL;
> +
> + /* decode instruction */
> + switch (arm_kprobe_decode_insn(p->addr, &p->ainsn)) {
> + case INSN_REJECTED: /* insn not supported */
> + return -EINVAL;
> +
> + case INSN_GOOD_NO_SLOT: /* insn need simulation */
> + return -EINVAL;
> +
> + case INSN_GOOD: /* instruction uses slot */
> + p->ainsn.insn = get_insn_slot();
> + if (!p->ainsn.insn)
> + return -ENOMEM;
> + break;
> + };
> +
> + /* prepare the instruction */
> + arch_prepare_ss_slot(p);
> +
> + return 0;
> +}
> +

[PATCH] kprobes: fix race condition in __unregister_kprobe_top

2016-04-19 Thread Li Bin

In the following case, it will trigger kernel panic:
1. register a kprobe for one function
2. register a jprobe for the same function (to make it easier to
reproduce, let the entry callback take a long time such as calling
mdelay)
3. trigger the function be called, and then unregister the jprobe
(before the entry callback calling jprobe_return)

The reason is that in __unregister_kprobe_top (unregister_jprobe)
the break_handler of the aggrprobe will be set NULL, but now the
entry callback may has been triggered (before jprobe_return), and
then in jprobe_return, it trigger int3/brk exception, in exception
handler, because the break_handler has been set NULL, it will not
setup_singlestep, and will return to the original instrucion...

To fix this bug, __unregister_kprobe_top call the synchronize_sched()
before clearing the handler.

Signed-off-by: Li Bin 
---
 kernel/kprobes.c |1 +
 1 files changed, 1 insertions(+), 0 deletions(-)

diff --git a/kernel/kprobes.c b/kernel/kprobes.c
index d10ab6b..5b5dd68 100644
--- a/kernel/kprobes.c
+++ b/kernel/kprobes.c
@@ -1615,6 +1615,7 @@ static int __unregister_kprobe_top(struct kprobe *p)
 */
goto disarmed;
else {
+   synchronize_sched();
/* If disabling probe has special handlers, update aggrprobe */
if (p->break_handler && !kprobe_gone(p))
ap->break_handler = NULL;
-- 
1.7.1

Re: [PATCH 2/2] arm64: Fix watchpoint recursion when single-step is wrongly triggered in irq

2016-04-08 Thread Li Bin



on 2016/4/8 13:14, Pratyush Anand wrote:
> Hi Li,
> 
> On 07/04/2016:07:34:37 PM, Li Bin wrote:
>> Hi Pratyush,
>>
>> on 2016/4/4 13:17, Pratyush Anand wrote:
>>> Hi Li,
>>>
>>> On 31/03/2016:08:45:05 PM, Li Bin wrote:
>>>> Hi Pratyush,
>>>>
>>>> on 2016/3/21 18:24, Pratyush Anand wrote:
>>>>> On 21/03/2016:08:37:50 AM, He Kuang wrote:
>>>>>> On arm64, watchpoint handler enables single-step to bypass the next
>>>>>> instruction for not recursive enter. If an irq is triggered right
>>>>>> after the watchpoint, a single-step will be wrongly triggered in irq
>>>>>> handler, which causes the watchpoint address not stepped over and
>>>>>> system hang.
>>>>>
>>>>> Does patch [1] resolves this issue as well? I hope it should. Patch[1] 
>>>>> has still
>>>>> not been sent for review. Your test result will be helpful.
>>>>>
>>>>> ~Pratyush
>>>>>
>>>>> [1] 
>>>>> https://github.com/pratyushanand/linux/commit/7623c8099ac22eaa00e7e0f52430f7a4bd154652
>>>>
>>>> This patch did not consider that, when excetpion return, the singlestep 
>>>> flag
>>>> should be restored, otherwise the right singlestep will not triggered.
>>>> Right?
>>>
>>> Yes, you are right, and there are other problems as well. Will Deacon 
>>> pointed
>>> out [1] that kernel debugging is per-cpu rather than per-task. So, I did 
>>> thought
>>> of a per-cpu implementation by introducing a new element "flags" in struct
>>> pt_regs. But even with that I see issues. For example:
>>> - While executing single step instruction, we get a data abort
>>> - In the kernel_entry of data abort we disable single stepping based on 
>>> "flags"
>>>   bit field
>>> - While handling data abort we receive anther interrupt, so we are again in
>>>   kernel_entry (for el1_irq). Single stepping will be disabled again 
>>> (although
>>>   it does not matter).
>>>
>>> Now the issue is that, what condition should be verified in kernel_exit for
>>> enabling single step again? In the above scenario, kernel_exit for el1_irq
>>> should not enable single stepping, but how to prevent that elegantly?
>>
>> The condition for kernel_entry to disable the single step is that 
>> MDSCR_EL1.SS
>> has been set. And only when the corresponding kernel_entry has disabled the 
>> single
>> step, the kernel_exit should enable it, but the kernel_exit of single-step 
>> exception
>> should be handled specially, that when disable single step in single-step 
>> exception
>> handler, flag of pt_regs stored in stack should be cleard to prevent to be 
>> re-enabled
>> by kernel_exit.
> 
> Nice, :-)
> I had latter on almost similar patch [1], but it did fail when I merged two of
> the tests.
> -- I inserted kprobe to an instruction in function __copy_to_user() which 
> could
> generate data abort.
> -- In parallel I also run test case which is defined here [2]
> -- As soon as I did `cat /proc/version`, kernel crashed.

Hi Pratyush,

Firstly, I have test this case, and it does not trigger failture as you 
describing.
But it indeed may trigger problem, and it is an another issue that if an 
exception
triggered before single-step exception, changes the regs->pc (data abort 
exception will
fixup_exception), the current implemetion of kprobes does not support, for 
example:
1. kprobes brk exception setup single-step, regs->pc points to the slot, 
MDSCR.SS=1,
SPSR_EL1.SS=1 (Inactive state)
2. brk exception eret (Active-not-pending state)
3. execute the slot instruction and trigger data abort exception, and this case 
the
return addr is also the slot instruction, so the SPSR_EL1.SS is set to 1 
(Inactive state)
4. but in the data abort exception, fixup_exception will change the regs->pc to 
the fixup
code
5. data abort exception eret, going into Active-not-pending state, executing 
fixup code
without taking an exception, going into Active-pending state, triggering 
single-step
exception. But the single-step instruction is not the target instrution, so 
kprobe fails.

And so this case including copy_to/from_user should be added to kprobes 
blacklist.
Right, or am i missing something?

Thanks,
Li Bin

> 
> Although, just by comparing visually I do not see any functional difference 
> with
> the patch you inlined below, still can you please try both of the above test
> case in parallel and see how does it behave? To insert kprobe within
> __copy_to_user() you need to revert "arm64: add copy_to/from_user to kprobes
> blacklist".
> 
> [1] 
> https://github.com/pratyushanand/linux/commit/9182afbe640ea7caf52e209eb8e5f00bdf91b0c0
> [2] http://thread.gmane.org/gmane.linux.kernel/2167918
>>
>> Thanks,
>> Li Bin
>>

Re: [PATCH 2/2] arm64: Fix watchpoint recursion when single-step is wrongly triggered in irq

2016-04-07 Thread Li Bin

Hi Pratyush,

on 2016/4/4 13:17, Pratyush Anand wrote:
> Hi Li,
> 
> On 31/03/2016:08:45:05 PM, Li Bin wrote:
>> Hi Pratyush,
>>
>> on 2016/3/21 18:24, Pratyush Anand wrote:
>>> On 21/03/2016:08:37:50 AM, He Kuang wrote:
>>>> On arm64, watchpoint handler enables single-step to bypass the next
>>>> instruction for not recursive enter. If an irq is triggered right
>>>> after the watchpoint, a single-step will be wrongly triggered in irq
>>>> handler, which causes the watchpoint address not stepped over and
>>>> system hang.
>>>
>>> Does patch [1] resolves this issue as well? I hope it should. Patch[1] has 
>>> still
>>> not been sent for review. Your test result will be helpful.
>>>
>>> ~Pratyush
>>>
>>> [1] 
>>> https://github.com/pratyushanand/linux/commit/7623c8099ac22eaa00e7e0f52430f7a4bd154652
>>
>> This patch did not consider that, when excetpion return, the singlestep flag
>> should be restored, otherwise the right singlestep will not triggered.
>> Right?
> 
> Yes, you are right, and there are other problems as well. Will Deacon pointed
> out [1] that kernel debugging is per-cpu rather than per-task. So, I did 
> thought
> of a per-cpu implementation by introducing a new element "flags" in struct
> pt_regs. But even with that I see issues. For example:
> - While executing single step instruction, we get a data abort
> - In the kernel_entry of data abort we disable single stepping based on 
> "flags"
>   bit field
> - While handling data abort we receive anther interrupt, so we are again in
>   kernel_entry (for el1_irq). Single stepping will be disabled again (although
>   it does not matter).
> 
> Now the issue is that, what condition should be verified in kernel_exit for
> enabling single step again? In the above scenario, kernel_exit for el1_irq
> should not enable single stepping, but how to prevent that elegantly?

The condition for kernel_entry to disable the single step is that MDSCR_EL1.SS
has been set. And only when the corresponding kernel_entry has disabled the 
single
step, the kernel_exit should enable it, but the kernel_exit of single-step 
exception
should be handled specially, that when disable single step in single-step 
exception
handler, flag of pt_regs stored in stack should be cleard to prevent to be 
re-enabled
by kernel_exit.

Thanks,
Li Bin

---
 arch/arm64/include/asm/assembler.h  |   11 +++
 arch/arm64/include/asm/debug-monitors.h |2 +-
 arch/arm64/include/asm/ptrace.h |2 ++
 arch/arm64/kernel/asm-offsets.c |1 +
 arch/arm64/kernel/debug-monitors.c  |3 ++-
 arch/arm64/kernel/entry.S   |   18 ++
 arch/arm64/kernel/hw_breakpoint.c   |2 +-
 arch/arm64/kernel/kgdb.c|2 +-
 8 files changed, 37 insertions(+), 4 deletions(-)

diff --git a/arch/arm64/include/asm/assembler.h 
b/arch/arm64/include/asm/assembler.h
index 70f7b9e..56a4335 100644
--- a/arch/arm64/include/asm/assembler.h
+++ b/arch/arm64/include/asm/assembler.h
@@ -60,6 +60,17 @@
msr daifclr, #8
.endm

+   .macro  disable_step, orig
+   bic \orig, \orig, #1
+   msr mdscr_el1, \orig
+   .endm
+
+   .macro  enable_step, orig
+   disable_dbg
+   orr \orig, \orig, #1
+   msr mdscr_el1, \orig
+   .endm
+
.macro  disable_step_tsk, flgs, tmp
tbz \flgs, #TIF_SINGLESTEP, 9990f
mrs \tmp, mdscr_el1
diff --git a/arch/arm64/include/asm/debug-monitors.h 
b/arch/arm64/include/asm/debug-monitors.h
index 2fcb9b7..50f114c 100644
--- a/arch/arm64/include/asm/debug-monitors.h
+++ b/arch/arm64/include/asm/debug-monitors.h
@@ -115,7 +115,7 @@ void user_rewind_single_step(struct task_struct *task);
 void user_fastforward_single_step(struct task_struct *task);

 void kernel_enable_single_step(struct pt_regs *regs);
-void kernel_disable_single_step(void);
+void kernel_disable_single_step(struct pt_regs *regs);
 int kernel_active_single_step(void);

 #ifdef CONFIG_HAVE_HW_BREAKPOINT
diff --git a/arch/arm64/include/asm/ptrace.h b/arch/arm64/include/asm/ptrace.h
index a307eb6..da00cd8 100644
--- a/arch/arm64/include/asm/ptrace.h
+++ b/arch/arm64/include/asm/ptrace.h
@@ -113,6 +113,8 @@ struct pt_regs {
u64 sp;
u64 pc;
u64 pstate;
+   u64 flags;
+   u64 padding;
};
};
u64 orig_x0;
diff --git a/arch/arm64/kernel/asm-offsets.c b/arch/arm64/kernel/asm-offsets.c
index 3ae6b31..7936ba9 100644
--- a/arch/arm64/kernel/asm-offsets.c
+++ b/arch/arm64/kernel/asm-offsets.c
@@ -56,6 +56,7 @@ int main(void)
   DEFINE(S_COMPAT_SP,  offsetof(struct p

Re: [PATCH 2/2] arm64: Fix watchpoint recursion when single-step is wrongly triggered in irq

2016-03-31 Thread Li Bin

Hi Pratyush,

on 2016/3/21 18:24, Pratyush Anand wrote:
> On 21/03/2016:08:37:50 AM, He Kuang wrote:
>> On arm64, watchpoint handler enables single-step to bypass the next
>> instruction for not recursive enter. If an irq is triggered right
>> after the watchpoint, a single-step will be wrongly triggered in irq
>> handler, which causes the watchpoint address not stepped over and
>> system hang.
> 
> Does patch [1] resolves this issue as well? I hope it should. Patch[1] has 
> still
> not been sent for review. Your test result will be helpful.
> 
> ~Pratyush
> 
> [1] 
> https://github.com/pratyushanand/linux/commit/7623c8099ac22eaa00e7e0f52430f7a4bd154652

This patch did not consider that, when excetpion return, the singlestep flag
should be restored, otherwise the right singlestep will not triggered.
Right?

Thanks,
Li Bin

> 
> ___
> linux-arm-kernel mailing list
> linux-arm-ker...@lists.infradead.org
> http://lists.infradead.org/mailman/listinfo/linux-arm-kernel
> 
> .
>

[PATCH] kernel/Makefile: remove the useless CFLAGS_REMOVE_cgroup-debug.o

2016-01-29 Thread Li Bin

The file cgroup-debug.c had been removed from commit fe6934354f8e
(cgroups: move the cgroup debug subsys into cgroup.c to access internal state).
Remain the CFLAGS_REMOVE_cgroup-debug.o = $(CC_FLAGS_FTRACE)
useless in kernel/Makefile.

Signed-off-by: Li Bin 
---
 kernel/Makefile |3 +--
 1 files changed, 1 insertions(+), 2 deletions(-)

diff --git a/kernel/Makefile b/kernel/Makefile
index 53abf00..baa55e5 100644
--- a/kernel/Makefile
+++ b/kernel/Makefile
@@ -14,8 +14,7 @@ obj-y = fork.o exec_domain.o panic.o \
 obj-$(CONFIG_MULTIUSER) += groups.o
 
 ifdef CONFIG_FUNCTION_TRACER
-# Do not trace debug files and internal ftrace files
-CFLAGS_REMOVE_cgroup-debug.o = $(CC_FLAGS_FTRACE)
+# Do not trace internal ftrace files
 CFLAGS_REMOVE_irq_work.o = $(CC_FLAGS_FTRACE)
 endif
 
-- 
1.7.1

Re: [PATCH 5/5] x86: ftrace: fix the comments for ftrace_modify_code_direct

2015-12-05 Thread Li Bin



on 2015/12/6 6:52, Steven Rostedt wrote:
> On Sat, 5 Dec 2015 18:12:57 +0100 (CET)
> Thomas Gleixner  wrote:
>
>> On Fri, 4 Dec 2015, Li Bin wrote:
>>> --- a/arch/x86/kernel/ftrace.c
>>> +++ b/arch/x86/kernel/ftrace.c
>>> @@ -106,13 +106,12 @@ ftrace_modify_code_direct(unsigned long ip, unsigned 
>>> const char *old_code,
>>> unsigned char replaced[MCOUNT_INSN_SIZE];
>>>  
>>> /*
>>> -* Note: Due to modules and __init, code can
>>> -*  disappear and change, we need to protect against faulting
>>> -*  as well as code changing. We do this by using the
>>> -*  probe_kernel_* functions.
>>> -*
>>> -* No real locking needed, this code is run through
>>> -* kstop_machine, or before SMP starts.
>>> +* Note:
>>> +* We are paranoid about modifying text, as if a bug were to happen, it
>>> +* could cause us to read or write to someplace that could cause harm.
>>> +* Carefully read and modify the code with aarch64_insn_*() which uses  
>> aarch64_insn_() is related to x86 in which way?
>>
>>
> The original comment is incorrect and we discussed this with the
> arm64 code, and said the other archs need the comment updated as
> well. But it seems that Li Bin just cut and pasted the arm64 patch for
> the other archs, or at least with x86 (haven't looked at the others
> yet). This needs to be fixed.

Sorry for my mistake, I will modify it.

Thanks,
Li Bin

> -- Steve
>
> .
>


--
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Please read the FAQ at  http://www.tux.org/lkml/

[PATCH v2 5/5] x86: ftrace: fix the comments for ftrace_modify_code_direct

2015-12-05 Thread Li Bin

There is no need to worry about module and __init text disappearing
case, because that ftrace has a module notifier that is called when
a module is being unloaded and before the text goes away and this
code grabs the ftrace_lock mutex and removes the module functions
from the ftrace list, such that it will no longer do any
modifications to that module's text, the update to make functions
be traced or not is done under the ftrace_lock mutex as well.
And by now, __init section codes should not been modified
by ftrace, because it is black listed in recordmcount.c and
ignored by ftrace.

Cc: Thomas Gleixner 
Cc: "H. Peter Anvin" 
Cc: x...@kernel.org
Suggested-by: Steven Rostedt 
Signed-off-by: Li Bin 
---
 arch/x86/kernel/ftrace.c |   12 +---
 1 files changed, 5 insertions(+), 7 deletions(-)

diff --git a/arch/x86/kernel/ftrace.c b/arch/x86/kernel/ftrace.c
index 311bcf3..db0f6b1 100644
--- a/arch/x86/kernel/ftrace.c
+++ b/arch/x86/kernel/ftrace.c
@@ -106,13 +106,11 @@ ftrace_modify_code_direct(unsigned long ip, unsigned 
const char *old_code,
unsigned char replaced[MCOUNT_INSN_SIZE];
 
/*
-* Note: Due to modules and __init, code can
-*  disappear and change, we need to protect against faulting
-*  as well as code changing. We do this by using the
-*  probe_kernel_* functions.
-*
-* No real locking needed, this code is run through
-* kstop_machine, or before SMP starts.
+* Note:
+* We are paranoid about modifying text, as if a bug was to happen, it
+* could cause us to read or write to someplace that could cause harm.
+* Carefully read and modify the code with probe_kernel_*(), and make
+* sure what we read is what we expected it to be before modifying it.
 */
 
/* read the text we want to modify */
-- 
1.7.1

--
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Please read the FAQ at  http://www.tux.org/lkml/

[PATCH v2 3/5] powerpc: ftrace: fix the comments for ftrace_modify_code

2015-12-05 Thread Li Bin

There is no need to worry about module and __init text disappearing
case, because that ftrace has a module notifier that is called when
a module is being unloaded and before the text goes away and this
code grabs the ftrace_lock mutex and removes the module functions
from the ftrace list, such that it will no longer do any
modifications to that module's text, the update to make functions
be traced or not is done under the ftrace_lock mutex as well.
And by now, __init section codes should not been modified
by ftrace, because it is black listed in recordmcount.c and
ignored by ftrace.

Cc: Benjamin Herrenschmidt 
Cc: Paul Mackerras 
Cc: Michael Ellerman 
Cc: linuxppc-...@lists.ozlabs.org
Suggested-by: Steven Rostedt 
Signed-off-by: Li Bin 
---
 arch/powerpc/kernel/ftrace.c |   12 +---
 1 files changed, 5 insertions(+), 7 deletions(-)

diff --git a/arch/powerpc/kernel/ftrace.c b/arch/powerpc/kernel/ftrace.c
index 44d4d8e..83e3c88 100644
--- a/arch/powerpc/kernel/ftrace.c
+++ b/arch/powerpc/kernel/ftrace.c
@@ -47,13 +47,11 @@ ftrace_modify_code(unsigned long ip, unsigned int old, 
unsigned int new)
unsigned int replaced;
 
/*
-* Note: Due to modules and __init, code can
-*  disappear and change, we need to protect against faulting
-*  as well as code changing. We do this by using the
-*  probe_kernel_* functions.
-*
-* No real locking needed, this code is run through
-* kstop_machine, or before SMP starts.
+* Note:
+* We are paranoid about modifying text, as if a bug was to happen, it
+* could cause us to read or write to someplace that could cause harm.
+* Carefully read and modify the code with probe_kernel_*(), and make
+* sure what we read is what we expected it to be before modifying it.
 */
 
/* read the text we want to modify */
-- 
1.7.1

--
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Please read the FAQ at  http://www.tux.org/lkml/

[PATCH v2 4/5] sh: ftrace: fix the comments for ftrace_modify_code

2015-12-05 Thread Li Bin

There is no need to worry about module and __init text disappearing
case, because that ftrace has a module notifier that is called when
a module is being unloaded and before the text goes away and this
code grabs the ftrace_lock mutex and removes the module functions
from the ftrace list, such that it will no longer do any
modifications to that module's text, the update to make functions
be traced or not is done under the ftrace_lock mutex as well.
And by now, __init section codes should not been modified
by ftrace, because it is black listed in recordmcount.c and
ignored by ftrace.

Cc: linux...@vger.kernel.org
Suggested-by: Steven Rostedt 
Signed-off-by: Li Bin 
---
 arch/sh/kernel/ftrace.c |   12 +---
 1 files changed, 5 insertions(+), 7 deletions(-)

diff --git a/arch/sh/kernel/ftrace.c b/arch/sh/kernel/ftrace.c
index 079d70e..38993e0 100644
--- a/arch/sh/kernel/ftrace.c
+++ b/arch/sh/kernel/ftrace.c
@@ -212,13 +212,11 @@ static int ftrace_modify_code(unsigned long ip, unsigned 
char *old_code,
unsigned char replaced[MCOUNT_INSN_SIZE];
 
/*
-* Note: Due to modules and __init, code can
-*  disappear and change, we need to protect against faulting
-*  as well as code changing. We do this by using the
-*  probe_kernel_* functions.
-*
-* No real locking needed, this code is run through
-* kstop_machine, or before SMP starts.
+* Note:
+* We are paranoid about modifying text, as if a bug was to happen, it
+* could cause us to read or write to someplace that could cause harm.
+* Carefully read and modify the code with probe_kernel_*(), and make
+* sure what we read is what we expected it to be before modifying it.
 */
 
/* read the text we want to modify */
-- 
1.7.1

--
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Please read the FAQ at  http://www.tux.org/lkml/

[PATCH v2 1/5] ia64: ftrace: fix the comments for ftrace_modify_code

2015-12-05 Thread Li Bin

There is no need to worry about module and __init text disappearing
case, because that ftrace has a module notifier that is called when
a module is being unloaded and before the text goes away and this
code grabs the ftrace_lock mutex and removes the module functions
from the ftrace list, such that it will no longer do any
modifications to that module's text, the update to make functions
be traced or not is done under the ftrace_lock mutex as well.
And by now, __init section codes should not been modified
by ftrace, because it is black listed in recordmcount.c and
ignored by ftrace.

Cc: Tony Luck 
Cc: Fenghua Yu 
Cc: linux-i...@vger.kernel.org
Suggested-by: Steven Rostedt 
Signed-off-by: Li Bin 
---
 arch/ia64/kernel/ftrace.c |   12 +---
 1 files changed, 5 insertions(+), 7 deletions(-)

diff --git a/arch/ia64/kernel/ftrace.c b/arch/ia64/kernel/ftrace.c
index 3b0c2aa..cee411e 100644
--- a/arch/ia64/kernel/ftrace.c
+++ b/arch/ia64/kernel/ftrace.c
@@ -97,13 +97,11 @@ ftrace_modify_code(unsigned long ip, unsigned char 
*old_code,
unsigned char replaced[MCOUNT_INSN_SIZE];
 
/*
-* Note: Due to modules and __init, code can
-*  disappear and change, we need to protect against faulting
-*  as well as code changing. We do this by using the
-*  probe_kernel_* functions.
-*
-* No real locking needed, this code is run through
-* kstop_machine, or before SMP starts.
+* Note:
+* We are paranoid about modifying text, as if a bug was to happen, it
+* could cause us to read or write to someplace that could cause harm.
+* Carefully read and modify the code with probe_kernel_*(), and make
+* sure what we read is what we expected it to be before modifying it.
 */
 
if (!do_check)
-- 
1.7.1

--
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Please read the FAQ at  http://www.tux.org/lkml/

[PATCH v2 0/5] ftrace: fix ftrace misleading comments for arch using it

2015-12-05 Thread Li Bin

Fix the following similar misleading comments of ftrace for arch
ia64/metag/powerpc/sh/x86:

Note: Due to modules and __init, code can
 disappear and change, we need to protect against faulting
 as well as code changing. We do this by using the
 probe_kernel_* functions.

No real locking needed, this code is run through
kstop_machine, or before SMP starts.

Cc: Tony Luck 
Cc: Fenghua Yu 
Cc: linux-i...@vger.kernel.org
Cc: James Hogan 
Cc: linux-me...@vger.kernel.org
Cc: Benjamin Herrenschmidt 
Cc: Paul Mackerras 
Cc: Michael Ellerman  
Cc: linuxppc-...@lists.ozlabs.org
Cc: linux...@vger.kernel.org
Cc: Thomas Gleixner 
"Cc: H. Peter Anvin" 
Cc: x...@kernel.org

Li Bin (5):
  ia64: ftrace: fix the comments for ftrace_modify_code
  metag: ftrace: fix the comments for ftrace_modify_code
  powerpc: ftrace: fix the comments for ftrace_modify_code
  sh: ftrace: fix the comments for ftrace_modify_code
  x86: ftrace: fix the comments for ftrace_modify_code_direct

 arch/ia64/kernel/ftrace.c|   12 +---
 arch/metag/kernel/ftrace.c   |   11 +--
 arch/powerpc/kernel/ftrace.c |   12 +---
 arch/sh/kernel/ftrace.c  |   12 +---
 arch/x86/kernel/ftrace.c |   12 +---
 5 files changed, 25 insertions(+), 34 deletions(-)

--
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Please read the FAQ at  http://www.tux.org/lkml/

[PATCH v2 2/5] metag: ftrace: fix the comments for ftrace_modify_code

2015-12-05 Thread Li Bin

There is no need to worry about module and __init text disappearing
case, because that ftrace has a module notifier that is called when
a module is being unloaded and before the text goes away and this
code grabs the ftrace_lock mutex and removes the module functions
from the ftrace list, such that it will no longer do any
modifications to that module's text, the update to make functions
be traced or not is done under the ftrace_lock mutex as well.
And by now, __init section codes should not been modified
by ftrace, because it is black listed in recordmcount.c and
ignored by ftrace.

Cc: James Hogan 
Cc: linux-me...@vger.kernel.org
Suggested-by: Steven Rostedt 
Signed-off-by: Li Bin 
---
 arch/metag/kernel/ftrace.c |   11 +--
 1 files changed, 5 insertions(+), 6 deletions(-)

diff --git a/arch/metag/kernel/ftrace.c b/arch/metag/kernel/ftrace.c
index ed1d685..ac8c039 100644
--- a/arch/metag/kernel/ftrace.c
+++ b/arch/metag/kernel/ftrace.c
@@ -54,12 +54,11 @@ static int ftrace_modify_code(unsigned long pc, unsigned 
char *old_code,
unsigned char replaced[MCOUNT_INSN_SIZE];
 
/*
-* Note: Due to modules and __init, code can
-*  disappear and change, we need to protect against faulting
-*  as well as code changing.
-*
-* No real locking needed, this code is run through
-* kstop_machine.
+* Note:
+* We are paranoid about modifying text, as if a bug was to happen, it
+* could cause us to read or write to someplace that could cause harm.
+* Carefully read and modify the code with probe_kernel_*(), and make
+* sure what we read is what we expected it to be before modifying it.
 */
 
/* read the text we want to modify */
-- 
1.7.1

--
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Please read the FAQ at  http://www.tux.org/lkml/

[PATCH 5/5] x86: ftrace: fix the comments for ftrace_modify_code_direct

2015-12-03 Thread Li Bin

There is no need to worry about module and __init text disappearing
case, because that ftrace has a module notifier that is called when
a module is being unloaded and before the text goes away and this
code grabs the ftrace_lock mutex and removes the module functions
from the ftrace list, such that it will no longer do any
modifications to that module's text, the update to make functions
be traced or not is done under the ftrace_lock mutex as well.
And by now, __init section codes should not been modified
by ftrace, because it is black listed in recordmcount.c and
ignored by ftrace.

Cc: Thomas Gleixner 
Cc: "H. Peter Anvin" 
Cc: x...@kernel.org
Suggested-by: Steven Rostedt 
Signed-off-by: Li Bin 
---
 arch/x86/kernel/ftrace.c |   13 ++---
 1 files changed, 6 insertions(+), 7 deletions(-)

diff --git a/arch/x86/kernel/ftrace.c b/arch/x86/kernel/ftrace.c
index 311bcf3..c2987e8 100644
--- a/arch/x86/kernel/ftrace.c
+++ b/arch/x86/kernel/ftrace.c
@@ -106,13 +106,12 @@ ftrace_modify_code_direct(unsigned long ip, unsigned 
const char *old_code,
unsigned char replaced[MCOUNT_INSN_SIZE];
 
/*
-* Note: Due to modules and __init, code can
-*  disappear and change, we need to protect against faulting
-*  as well as code changing. We do this by using the
-*  probe_kernel_* functions.
-*
-* No real locking needed, this code is run through
-* kstop_machine, or before SMP starts.
+* Note:
+* We are paranoid about modifying text, as if a bug were to happen, it
+* could cause us to read or write to someplace that could cause harm.
+* Carefully read and modify the code with aarch64_insn_*() which uses
+* probe_kernel_*(), and make sure what we read is what we expected it
+* to be before modifying it.
 */
 
/* read the text we want to modify */
-- 
1.7.1

--
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Please read the FAQ at  http://www.tux.org/lkml/

[PATCH 3/5] powerpc: ftrace: fix the comments for ftrace_modify_code

2015-12-03 Thread Li Bin

There is no need to worry about module and __init text disappearing
case, because that ftrace has a module notifier that is called when
a module is being unloaded and before the text goes away and this
code grabs the ftrace_lock mutex and removes the module functions
from the ftrace list, such that it will no longer do any
modifications to that module's text, the update to make functions
be traced or not is done under the ftrace_lock mutex as well.
And by now, __init section codes should not been modified
by ftrace, because it is black listed in recordmcount.c and
ignored by ftrace.

Cc: Benjamin Herrenschmidt 
Cc: Paul Mackerras 
Cc: Michael Ellerman 
Cc: linuxppc-...@lists.ozlabs.org
Suggested-by: Steven Rostedt 
Signed-off-by: Li Bin 
---
 arch/powerpc/kernel/ftrace.c |   13 ++---
 1 files changed, 6 insertions(+), 7 deletions(-)

diff --git a/arch/powerpc/kernel/ftrace.c b/arch/powerpc/kernel/ftrace.c
index 44d4d8e..c6452b2 100644
--- a/arch/powerpc/kernel/ftrace.c
+++ b/arch/powerpc/kernel/ftrace.c
@@ -47,13 +47,12 @@ ftrace_modify_code(unsigned long ip, unsigned int old, 
unsigned int new)
unsigned int replaced;
 
/*
-* Note: Due to modules and __init, code can
-*  disappear and change, we need to protect against faulting
-*  as well as code changing. We do this by using the
-*  probe_kernel_* functions.
-*
-* No real locking needed, this code is run through
-* kstop_machine, or before SMP starts.
+* Note:
+* We are paranoid about modifying text, as if a bug were to happen, it
+* could cause us to read or write to someplace that could cause harm.
+* Carefully read and modify the code with aarch64_insn_*() which uses
+* probe_kernel_*(), and make sure what we read is what we expected it
+* to be before modifying it.
 */
 
/* read the text we want to modify */
-- 
1.7.1

--
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Please read the FAQ at  http://www.tux.org/lkml/

[PATCH 4/5] sh: ftrace: fix the comments for ftrace_modify_code

2015-12-03 Thread Li Bin

There is no need to worry about module and __init text disappearing
case, because that ftrace has a module notifier that is called when
a module is being unloaded and before the text goes away and this
code grabs the ftrace_lock mutex and removes the module functions
from the ftrace list, such that it will no longer do any
modifications to that module's text, the update to make functions
be traced or not is done under the ftrace_lock mutex as well.
And by now, __init section codes should not been modified
by ftrace, because it is black listed in recordmcount.c and
ignored by ftrace.

Cc: linux...@vger.kernel.org
Suggested-by: Steven Rostedt 
Signed-off-by: Li Bin 
---
 arch/sh/kernel/ftrace.c |   13 ++---
 1 files changed, 6 insertions(+), 7 deletions(-)

diff --git a/arch/sh/kernel/ftrace.c b/arch/sh/kernel/ftrace.c
index 079d70e..b696f92 100644
--- a/arch/sh/kernel/ftrace.c
+++ b/arch/sh/kernel/ftrace.c
@@ -212,13 +212,12 @@ static int ftrace_modify_code(unsigned long ip, unsigned 
char *old_code,
unsigned char replaced[MCOUNT_INSN_SIZE];
 
/*
-* Note: Due to modules and __init, code can
-*  disappear and change, we need to protect against faulting
-*  as well as code changing. We do this by using the
-*  probe_kernel_* functions.
-*
-* No real locking needed, this code is run through
-* kstop_machine, or before SMP starts.
+* Note:
+* We are paranoid about modifying text, as if a bug were to happen, it
+* could cause us to read or write to someplace that could cause harm.
+* Carefully read and modify the code with aarch64_insn_*() which uses
+* probe_kernel_*(), and make sure what we read is what we expected it
+* to be before modifying it.
 */
 
/* read the text we want to modify */
-- 
1.7.1

--
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Please read the FAQ at  http://www.tux.org/lkml/

[PATCH 2/5] metag: ftrace: fix the comments for ftrace_modify_code

2015-12-03 Thread Li Bin

There is no need to worry about module and __init text disappearing
case, because that ftrace has a module notifier that is called when
a module is being unloaded and before the text goes away and this
code grabs the ftrace_lock mutex and removes the module functions
from the ftrace list, such that it will no longer do any
modifications to that module's text, the update to make functions
be traced or not is done under the ftrace_lock mutex as well.
And by now, __init section codes should not been modified
by ftrace, because it is black listed in recordmcount.c and
ignored by ftrace.

Cc: James Hogan 
Cc: linux-me...@vger.kernel.org
Suggested-by: Steven Rostedt 
Signed-off-by: Li Bin 
---
 arch/metag/kernel/ftrace.c |   12 ++--
 1 files changed, 6 insertions(+), 6 deletions(-)

diff --git a/arch/metag/kernel/ftrace.c b/arch/metag/kernel/ftrace.c
index ed1d685..e5d71b1 100644
--- a/arch/metag/kernel/ftrace.c
+++ b/arch/metag/kernel/ftrace.c
@@ -54,12 +54,12 @@ static int ftrace_modify_code(unsigned long pc, unsigned 
char *old_code,
unsigned char replaced[MCOUNT_INSN_SIZE];
 
/*
-* Note: Due to modules and __init, code can
-*  disappear and change, we need to protect against faulting
-*  as well as code changing.
-*
-* No real locking needed, this code is run through
-* kstop_machine.
+* Note:
+* We are paranoid about modifying text, as if a bug were to happen, it
+* could cause us to read or write to someplace that could cause harm.
+* Carefully read and modify the code with aarch64_insn_*() which uses
+* probe_kernel_*(), and make sure what we read is what we expected it
+* to be before modifying it.
 */
 
/* read the text we want to modify */
-- 
1.7.1

--
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Please read the FAQ at  http://www.tux.org/lkml/

[PATCH 0/5] ftrace: fix ftrace misleading comments for arch using it

2015-12-03 Thread Li Bin

Fix the following similar misleading comments of ftrace for arch
ia64/metag/powerpc/sh/x86:

Note: Due to modules and __init, code can
 disappear and change, we need to protect against faulting
 as well as code changing. We do this by using the
 probe_kernel_* functions.

No real locking needed, this code is run through
kstop_machine, or before SMP starts.

Cc: Tony Luck 
Cc: Fenghua Yu 
Cc: linux-i...@vger.kernel.org
Cc: James Hogan 
Cc: linux-me...@vger.kernel.org
Cc: Benjamin Herrenschmidt 
Cc: Paul Mackerras 
Cc: Michael Ellerman  
Cc: linuxppc-...@lists.ozlabs.org
Cc: linux...@vger.kernel.org
Cc: Thomas Gleixner 
"Cc: H. Peter Anvin" 
Cc: x...@kernel.org

Li Bin (5):
  ia64: ftrace: fix the comments for ftrace_modify_code
  metag: ftrace: fix the comments for ftrace_modify_code
  powerpc: ftrace: fix the comments for ftrace_modify_code
  sh: ftrace: fix the comments for ftrace_modify_code
  x86: ftrace: fix the comments for ftrace_modify_code_direct

 arch/ia64/kernel/ftrace.c|   13 ++---
 arch/metag/kernel/ftrace.c   |   12 ++--
 arch/powerpc/kernel/ftrace.c |   13 ++---
 arch/sh/kernel/ftrace.c  |   13 ++---
 arch/x86/kernel/ftrace.c |   13 ++---
 5 files changed, 30 insertions(+), 34 deletions(-)

--
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Please read the FAQ at  http://www.tux.org/lkml/

[PATCH 1/5] ia64: ftrace: fix the comments for ftrace_modify_code

2015-12-03 Thread Li Bin

There is no need to worry about module and __init text disappearing
case, because that ftrace has a module notifier that is called when
a module is being unloaded and before the text goes away and this
code grabs the ftrace_lock mutex and removes the module functions
from the ftrace list, such that it will no longer do any
modifications to that module's text, the update to make functions
be traced or not is done under the ftrace_lock mutex as well.
And by now, __init section codes should not been modified
by ftrace, because it is black listed in recordmcount.c and
ignored by ftrace.

Cc: Tony Luck 
Cc: Fenghua Yu 
Cc: linux-i...@vger.kernel.org
Suggested-by: Steven Rostedt 
Signed-off-by: Li Bin 
---
 arch/ia64/kernel/ftrace.c |   13 ++---
 1 files changed, 6 insertions(+), 7 deletions(-)

diff --git a/arch/ia64/kernel/ftrace.c b/arch/ia64/kernel/ftrace.c
index 3b0c2aa..a48a3f4 100644
--- a/arch/ia64/kernel/ftrace.c
+++ b/arch/ia64/kernel/ftrace.c
@@ -97,13 +97,12 @@ ftrace_modify_code(unsigned long ip, unsigned char 
*old_code,
unsigned char replaced[MCOUNT_INSN_SIZE];
 
/*
-* Note: Due to modules and __init, code can
-*  disappear and change, we need to protect against faulting
-*  as well as code changing. We do this by using the
-*  probe_kernel_* functions.
-*
-* No real locking needed, this code is run through
-* kstop_machine, or before SMP starts.
+* Note:
+* We are paranoid about modifying text, as if a bug were to happen, it
+* could cause us to read or write to someplace that could cause harm.
+* Carefully read and modify the code with aarch64_insn_*() which uses
+* probe_kernel_*(), and make sure what we read is what we expected it
+* to be before modifying it.
 */
 
if (!do_check)
-- 
1.7.1

--
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Please read the FAQ at  http://www.tux.org/lkml/

Re: [PATCH v2 2/2] arm64: ftrace: fix the comments for ftrace_modify_code

2015-12-03 Thread Li Bin

I will also update the comment for the other arch that using the similar
description, such as ia64/metag/powerpc/sh/x86.

Thanks,
Li Bin

on 2015/12/4 10:50, Steven Rostedt wrote:
> On Fri, 4 Dec 2015 10:18:39 +0800
> Li Bin  wrote:
>
>> There is no need to worry about module text disappearing case,
>> because that ftrace has a module notifier that is called when
>> a module is being unloaded and before the text goes away, and this
>> code grabs the ftrace_lock mutex and removes the module functions
>> from the ftrace list, such that it will no longer do any
>> modifications to that module's text.
>> The update to make functions be traced or not is done under the
>> ftrace_lock mutex as well.
>>
>> Signed-off-by: Li Bin 
>> ---
>>  arch/arm64/kernel/ftrace.c |5 +
>>  1 files changed, 1 insertions(+), 4 deletions(-)
>>
>> diff --git a/arch/arm64/kernel/ftrace.c b/arch/arm64/kernel/ftrace.c
>> index 9669b33..ee91c0c 100644
>> --- a/arch/arm64/kernel/ftrace.c
>> +++ b/arch/arm64/kernel/ftrace.c
>> @@ -29,12 +29,9 @@ static int ftrace_modify_code(unsigned long pc, u32 old, 
>> u32 new,
>>  
>>  /*
>>   * Note:
>> - * Due to modules and __init, code can disappear and change,
>> + * Due to __init, code can disappear and change,
> Init code should not be modified either because it is black listed in
> recordmcount.c.
>
> I say just change the comment to be something like:
>
> We are paranoid about modifying text, as if a bug were to happen, it
> could cause us to read or write to someplace that could cause harm.
> Carefully read and modify the code with aarch64_insn_*() which uses
> probe_kernel_*(), and make sure what we read is what we expected it to
> be before modifying it.
>
> -- Steve
>
>
>>   * we need to protect against faulting as well as code changing.
>>   * We do this by aarch64_insn_*() which use the probe_kernel_*().
>> - *
>> - * No lock is held here because all the modifications are run
>> - * through stop_machine().
>>   */
>>  if (validate) {
>>  if (aarch64_insn_read((void *)pc, &replaced))
>
> .
>


--
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Please read the FAQ at  http://www.tux.org/lkml/

[PATCH v3 1/2] arm64: ftrace: stop using kstop_machine to enable/disable tracing

2015-12-03 Thread Li Bin

For ftrace on arm64, kstop_machine which is hugely disruptive
to a running system is not needed to convert nops to ftrace calls
or back, because that to be modified instrucions, that NOP, B or BL,
are all safe instructions which called "concurrent modification
and execution of instructions", that can be executed by one
thread of execution as they are being modified by another thread
of execution without requiring explicit synchronization.

Signed-off-by: Li Bin 
Reviewed-by: Steven Rostedt 
---
 arch/arm64/kernel/ftrace.c |5 +
 1 files changed, 5 insertions(+), 0 deletions(-)

diff --git a/arch/arm64/kernel/ftrace.c b/arch/arm64/kernel/ftrace.c
index c851be7..9669b33 100644
--- a/arch/arm64/kernel/ftrace.c
+++ b/arch/arm64/kernel/ftrace.c
@@ -93,6 +93,11 @@ int ftrace_make_nop(struct module *mod, struct dyn_ftrace 
*rec,
return ftrace_modify_code(pc, old, new, true);
 }
 
+void arch_ftrace_update_code(int command)
+{
+   ftrace_modify_all_code(command);
+}
+
 int __init ftrace_dyn_arch_init(void)
 {
return 0;
-- 
1.7.1

--
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Please read the FAQ at  http://www.tux.org/lkml/

[PATCH v3 0/2] arm64: stop using kstop_machine for ftrace

2015-12-03 Thread Li Bin

v2:
Based on the comments from Will and Steve,
1. Modify the commit message
2. Fix the misleading comments for ftrace_modify_code

v3:
Modify the comments again based on the comment from Steve.

Link: https://lkml.org/lkml/2015/12/3/422

Li Bin (2):
  arm64: ftrace: stop using kstop_machine to enable/disable tracing
  arm64: ftrace: fix the comments for ftrace_modify_code

 arch/arm64/kernel/ftrace.c |   16 ++--
 1 files changed, 10 insertions(+), 6 deletions(-)

--
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Please read the FAQ at  http://www.tux.org/lkml/

[PATCH v3 2/2] arm64: ftrace: fix the comments for ftrace_modify_code

2015-12-03 Thread Li Bin

There is no need to worry about module and __init text disappearing
case, because that ftrace has a module notifier that is called when
a module is being unloaded and before the text goes away and this
code grabs the ftrace_lock mutex and removes the module functions
from the ftrace list, such that it will no longer do any
modifications to that module's text, the update to make functions
be traced or not is done under the ftrace_lock mutex as well.
And by now, __init section codes should not been modified
by ftrace, because it is black listed in recordmcount.c and
ignored by ftrace.

Suggested-by: Steven Rostedt 
Signed-off-by: Li Bin 
---
 arch/arm64/kernel/ftrace.c |   11 +--
 1 files changed, 5 insertions(+), 6 deletions(-)

diff --git a/arch/arm64/kernel/ftrace.c b/arch/arm64/kernel/ftrace.c
index 9669b33..8f7005b 100644
--- a/arch/arm64/kernel/ftrace.c
+++ b/arch/arm64/kernel/ftrace.c
@@ -29,12 +29,11 @@ static int ftrace_modify_code(unsigned long pc, u32 old, 
u32 new,
 
/*
 * Note:
-* Due to modules and __init, code can disappear and change,
-* we need to protect against faulting as well as code changing.
-* We do this by aarch64_insn_*() which use the probe_kernel_*().
-*
-* No lock is held here because all the modifications are run
-* through stop_machine().
+* We are paranoid about modifying text, as if a bug were to happen, it
+* could cause us to read or write to someplace that could cause harm.
+* Carefully read and modify the code with aarch64_insn_*() which uses
+* probe_kernel_*(), and make sure what we read is what we expected it
+* to be before modifying it.
 */
if (validate) {
if (aarch64_insn_read((void *)pc, &replaced))
-- 
1.7.1

--
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Please read the FAQ at  http://www.tux.org/lkml/

Re: [PATCH v2 2/2] arm64: ftrace: fix the comments for ftrace_modify_code

2015-12-03 Thread Li Bin



on 2015/12/4 10:50, Steven Rostedt wrote:
> On Fri, 4 Dec 2015 10:18:39 +0800
> Li Bin  wrote:
>
>> There is no need to worry about module text disappearing case,
>> because that ftrace has a module notifier that is called when
>> a module is being unloaded and before the text goes away, and this
>> code grabs the ftrace_lock mutex and removes the module functions
>> from the ftrace list, such that it will no longer do any
>> modifications to that module's text.
>> The update to make functions be traced or not is done under the
>> ftrace_lock mutex as well.
>>
>> Signed-off-by: Li Bin 
>> ---
>>  arch/arm64/kernel/ftrace.c |5 +
>>  1 files changed, 1 insertions(+), 4 deletions(-)
>>
>> diff --git a/arch/arm64/kernel/ftrace.c b/arch/arm64/kernel/ftrace.c
>> index 9669b33..ee91c0c 100644
>> --- a/arch/arm64/kernel/ftrace.c
>> +++ b/arch/arm64/kernel/ftrace.c
>> @@ -29,12 +29,9 @@ static int ftrace_modify_code(unsigned long pc, u32 old, 
>> u32 new,
>>  
>>  /*
>>   * Note:
>> - * Due to modules and __init, code can disappear and change,
>> + * Due to __init, code can disappear and change,
> Init code should not be modified either because it is black listed in
> recordmcount.c.
>
> I say just change the comment to be something like:
>
> We are paranoid about modifying text, as if a bug were to happen, it
> could cause us to read or write to someplace that could cause harm.
> Carefully read and modify the code with aarch64_insn_*() which uses
> probe_kernel_*(), and make sure what we read is what we expected it to
> be before modifying it.

Ok, I will modify it.

Thanks,
Li Bin

> -- Steve
>
>
>>   * we need to protect against faulting as well as code changing.
>>   * We do this by aarch64_insn_*() which use the probe_kernel_*().
>> - *
>> - * No lock is held here because all the modifications are run
>> - * through stop_machine().
>>   */
>>  if (validate) {
>>  if (aarch64_insn_read((void *)pc, &replaced))
>
> .
>


--
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Please read the FAQ at  http://www.tux.org/lkml/

[PATCH v2 0/2] arm64: stop using kstop_machine for ftrace

2015-12-03 Thread Li Bin

v2:
Based on the comments from Will and Steve,
1. Modify the commit message
2. Fix the misleading comments for ftrace_modify_code

Link: https://lkml.org/lkml/2015/12/3/422

Li Bin (2):
  arm64: ftrace: stop using kstop_machine to enable/disable tracing
  arm64: ftrace: fix the comments for ftrace_modify_code

 arch/arm64/kernel/ftrace.c |   10 ++
 1 files changed, 6 insertions(+), 4 deletions(-)

--
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Please read the FAQ at  http://www.tux.org/lkml/

[PATCH v2 1/2] arm64: ftrace: stop using kstop_machine to enable/disable tracing

2015-12-03 Thread Li Bin

For ftrace on arm64, kstop_machine which is hugely disruptive
to a running system is not needed to convert nops to ftrace calls
or back, because that to be modified instrucions, that NOP, B or BL,
are all safe instructions which called "concurrent modification
and execution of instructions", that can be executed by one
thread of execution as they are being modified by another thread
of execution without requiring explicit synchronization.

Signed-off-by: Li Bin 
---
 arch/arm64/kernel/ftrace.c |5 +
 1 files changed, 5 insertions(+), 0 deletions(-)

diff --git a/arch/arm64/kernel/ftrace.c b/arch/arm64/kernel/ftrace.c
index c851be7..9669b33 100644
--- a/arch/arm64/kernel/ftrace.c
+++ b/arch/arm64/kernel/ftrace.c
@@ -93,6 +93,11 @@ int ftrace_make_nop(struct module *mod, struct dyn_ftrace 
*rec,
return ftrace_modify_code(pc, old, new, true);
 }
 
+void arch_ftrace_update_code(int command)
+{
+   ftrace_modify_all_code(command);
+}
+
 int __init ftrace_dyn_arch_init(void)
 {
return 0;
-- 
1.7.1

--
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Please read the FAQ at  http://www.tux.org/lkml/

[PATCH v2 2/2] arm64: ftrace: fix the comments for ftrace_modify_code

2015-12-03 Thread Li Bin

There is no need to worry about module text disappearing case,
because that ftrace has a module notifier that is called when
a module is being unloaded and before the text goes away, and this
code grabs the ftrace_lock mutex and removes the module functions
from the ftrace list, such that it will no longer do any
modifications to that module's text.
The update to make functions be traced or not is done under the
ftrace_lock mutex as well.

Signed-off-by: Li Bin 
---
 arch/arm64/kernel/ftrace.c |5 +
 1 files changed, 1 insertions(+), 4 deletions(-)

diff --git a/arch/arm64/kernel/ftrace.c b/arch/arm64/kernel/ftrace.c
index 9669b33..ee91c0c 100644
--- a/arch/arm64/kernel/ftrace.c
+++ b/arch/arm64/kernel/ftrace.c
@@ -29,12 +29,9 @@ static int ftrace_modify_code(unsigned long pc, u32 old, u32 
new,
 
/*
 * Note:
-* Due to modules and __init, code can disappear and change,
+* Due to __init, code can disappear and change,
 * we need to protect against faulting as well as code changing.
 * We do this by aarch64_insn_*() which use the probe_kernel_*().
-*
-* No lock is held here because all the modifications are run
-* through stop_machine().
 */
if (validate) {
if (aarch64_insn_read((void *)pc, &replaced))
-- 
1.7.1

--
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Please read the FAQ at  http://www.tux.org/lkml/

[PATCH v2] ftrace: fix a typo in comment

2015-11-30 Thread Li Bin

s/ARCH_SUPPORT_FTARCE_OPS/ARCH_SUPPORTS_FTRACE_OPS

Signed-off-by: Li Bin 
---
 kernel/trace/ftrace.c |2 +-
 1 files changed, 1 insertions(+), 1 deletions(-)

diff --git a/kernel/trace/ftrace.c b/kernel/trace/ftrace.c
index 3f743b1..0033e05 100644
--- a/kernel/trace/ftrace.c
+++ b/kernel/trace/ftrace.c
@@ -5195,7 +5195,7 @@ out:
  * being NULL, or CONFIG_DYNAMIC_FTRACE_WITH_REGS.
  * Note, CONFIG_DYNAMIC_FTRACE_WITH_REGS expects a full regs to be saved.
  * An architecture can pass partial regs with ftrace_ops and still
- * set the ARCH_SUPPORT_FTARCE_OPS.
+ * set the ARCH_SUPPORTS_FTRACE_OPS.
  */
 #if ARCH_SUPPORTS_FTRACE_OPS
 static void ftrace_ops_list_func(unsigned long ip, unsigned long parent_ip,
-- 
1.7.1

--
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Please read the FAQ at  http://www.tux.org/lkml/

[PATCH] ftrace: fix a typo in comment

2015-11-30 Thread Li Bin

s/ARCH_SUPPORT_FTARCE_OPS/ARCH_SUPPORTS_FTARCE_OPS

Signed-off-by: Li Bin 
---
 kernel/trace/ftrace.c |2 +-
 1 files changed, 1 insertions(+), 1 deletions(-)

diff --git a/kernel/trace/ftrace.c b/kernel/trace/ftrace.c
index 3f743b1..eb4a881 100644
--- a/kernel/trace/ftrace.c
+++ b/kernel/trace/ftrace.c
@@ -5195,7 +5195,7 @@ out:
  * being NULL, or CONFIG_DYNAMIC_FTRACE_WITH_REGS.
  * Note, CONFIG_DYNAMIC_FTRACE_WITH_REGS expects a full regs to be saved.
  * An architecture can pass partial regs with ftrace_ops and still
- * set the ARCH_SUPPORT_FTARCE_OPS.
+ * set the ARCH_SUPPORTS_FTARCE_OPS.
  */
 #if ARCH_SUPPORTS_FTRACE_OPS
 static void ftrace_ops_list_func(unsigned long ip, unsigned long parent_ip,
-- 
1.7.1

--
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Please read the FAQ at  http://www.tux.org/lkml/

[PATCH] livepatch: fix race between enabled_store() and klp_unregister_patch()

2015-11-29 Thread Li Bin

There is a potential race as following:

CPU0 |  CPU1
-|---
enabled_store()  |  klp_unregister_patch()
 |  |-mutex_lock(&klp_mutex);
|-mutex_lock(&klp_mutex);|  |-klp_free_patch();
 |  |-mutex_unlock(&klp_mutex);
|-[process the patch's state]|
|-mutex_unlock(&klp_mutex)   |

Fix this race condition by adding klp_is_patch_registered() check in
enabled_store() after get the lock klp_mutex.

Signed-off-by: Li Bin 
---
 kernel/livepatch/core.c |5 +
 1 files changed, 5 insertions(+), 0 deletions(-)

diff --git a/kernel/livepatch/core.c b/kernel/livepatch/core.c
index db545cb..50af971 100644
--- a/kernel/livepatch/core.c
+++ b/kernel/livepatch/core.c
@@ -614,6 +614,11 @@ static ssize_t enabled_store(struct kobject *kobj, struct 
kobj_attribute *attr,
 
mutex_lock(&klp_mutex);
 
+   if (!klp_is_patch_registered(patch)) {
+   ret = -EINVAL;
+   goto err;
+   }
+
if (val == patch->state) {
/* already in requested state */
ret = -EINVAL;
-- 
1.7.1

--
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Please read the FAQ at  http://www.tux.org/lkml/

[PATCH] arm64: ftrace: stop using kstop_machine to enable/disable tracing

2015-11-27 Thread Li Bin

On arm64, kstop_machine which is hugely disruptive to a running
system is not needed to convert nops to ftrace calls or back,
because that modifed code is a single 32bit instructions which
is impossible to cross cache (or page) boundaries, and the used str
instruction is single-copy atomic.

Cc:  # 3.18+
Signed-off-by: Li Bin 
---
 arch/arm64/kernel/ftrace.c |5 +
 1 files changed, 5 insertions(+), 0 deletions(-)

diff --git a/arch/arm64/kernel/ftrace.c b/arch/arm64/kernel/ftrace.c
index c851be7..9669b33 100644
--- a/arch/arm64/kernel/ftrace.c
+++ b/arch/arm64/kernel/ftrace.c
@@ -93,6 +93,11 @@ int ftrace_make_nop(struct module *mod, struct dyn_ftrace 
*rec,
return ftrace_modify_code(pc, old, new, true);
 }
 
+void arch_ftrace_update_code(int command)
+{
+   ftrace_modify_all_code(command);
+}
+
 int __init ftrace_dyn_arch_init(void)
 {
return 0;
-- 
1.7.1

--
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Please read the FAQ at  http://www.tux.org/lkml/

[PATCH 0/3] recordmcount: bugfix and amend

2015-10-30 Thread Li Bin

Li Bin (3):
  recordmcount: fix endianness handling bug for nop_mcount
  recordmcount: x86: assign a meaningful value to rel_type_nop
  recordmcount: arm64: replace the ignored mcount call into nop

 scripts/recordmcount.c | 26 +-
 scripts/recordmcount.h |  5 +++--
 2 files changed, 28 insertions(+), 3 deletions(-)

-- 
1.7.12.4

--
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Please read the FAQ at  http://www.tux.org/lkml/

[PATCH 2/3] recordmcount: x86: assign a meaningful value to rel_type_nop

2015-10-30 Thread Li Bin

Although, the default value of rel_type_nop is zero, and the value
of R_386_NONE/R_X86_64_NONE is zero too, but it should be assigned
a meaningful value explicitly, otherwise it looks confused.

Assign R_386_NONE to rel_type_nop for 386, assign R_X86_64_NONE
to rel_type_nop for x86_64.

Signed-off-by: Li Bin 
---
 scripts/recordmcount.c | 2 ++
 1 file changed, 2 insertions(+)

diff --git a/scripts/recordmcount.c b/scripts/recordmcount.c
index 3d1984e..8cc020b 100644
--- a/scripts/recordmcount.c
+++ b/scripts/recordmcount.c
@@ -345,6 +345,7 @@ do_file(char const *const fname)
break;
case EM_386:
reltype = R_386_32;
+   rel_type_nop = R_386_NONE;
make_nop = make_nop_x86;
ideal_nop = ideal_nop5_x86_32;
mcount_adjust_32 = -1;
@@ -371,6 +372,7 @@ do_file(char const *const fname)
make_nop = make_nop_x86;
ideal_nop = ideal_nop5_x86_64;
reltype = R_X86_64_64;
+   rel_type_nop = R_X86_64_NONE;
mcount_adjust_64 = -1;
break;
}  /* end switch */
-- 
1.7.12.4

--
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Please read the FAQ at  http://www.tux.org/lkml/

[PATCH 3/3] recordmcount: arm64: replace the ignored mcount call into nop

2015-10-30 Thread Li Bin

By now, the recordmcount only records the function that in
following sections:
.text/.ref.text/.sched.text/.spinlock.text/.irqentry.text/
.kprobes.text/.text.unlikely

For the function that not in these sections, the call mcount
will be in place and not be replaced when kernel boot up. And
it will bring performance overhead, such as do_mem_abort (in
.exception.text section). This patch make the call mcount to
nop for this case in recordmcount.

Cc:  # 3.18+
Signed-off-by: Li Bin 
---
 scripts/recordmcount.c | 24 +++-
 1 file changed, 23 insertions(+), 1 deletion(-)

diff --git a/scripts/recordmcount.c b/scripts/recordmcount.c
index 8cc020b..698768b 100644
--- a/scripts/recordmcount.c
+++ b/scripts/recordmcount.c
@@ -42,6 +42,7 @@
 
 #ifndef EM_AARCH64
 #define EM_AARCH64 183
+#define R_AARCH64_NONE 0
 #define R_AARCH64_ABS64257
 #endif
 
@@ -160,6 +161,22 @@ static int make_nop_x86(void *map, size_t const offset)
return 0;
 }
 
+static unsigned char ideal_nop4_arm64[4] = {0x1f, 0x20, 0x03, 0xd5};
+static int make_nop_arm64(void *map, size_t const offset)
+{
+   uint32_t *ptr;
+
+   ptr = map + offset;
+   /* bl <_mcount> is 0x9400 before relocation */
+   if (*ptr != 0x9400)
+   return -1;
+
+   /* Convert to nop */
+   ulseek(fd_map, offset, SEEK_SET);
+   uwrite(fd_map, ideal_nop, 4);
+   return 0;
+}
+
 /*
  * Get the whole file as a programming convenience in order to avoid
  * malloc+lseek+read+free of many pieces.  If successful, then mmap
@@ -354,7 +371,12 @@ do_file(char const *const fname)
 altmcount = "__gnu_mcount_nc";
 break;
case EM_AARCH64:
-reltype = R_AARCH64_ABS64; gpfx = '_'; break;
+   reltype = R_AARCH64_ABS64;
+   make_nop = make_nop_arm64;
+   rel_type_nop = R_AARCH64_NONE;
+   ideal_nop = ideal_nop4_arm64;
+   gpfx = '_';
+   break;
case EM_IA_64:   reltype = R_IA64_IMM64;   gpfx = '_'; break;
case EM_METAG:   reltype = R_METAG_ADDR32;
 altmcount = "_mcount_wrapper";
-- 
1.7.12.4

--
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Please read the FAQ at  http://www.tux.org/lkml/

[PATCH 1/3] recordmcount: fix endianness handling bug for nop_mcount

2015-10-30 Thread Li Bin

In nop_mcount, shdr->sh_offset and welp->r_offset should handle
endianness properly, otherwise it will trigger Segmentation fault
if the recordmcount main and file.o have different endianness.

Cc:  # 3.0+
Signed-off-by: Li Bin 
---
 scripts/recordmcount.h | 5 +++--
 1 file changed, 3 insertions(+), 2 deletions(-)

diff --git a/scripts/recordmcount.h b/scripts/recordmcount.h
index 49b582a..dda9dba 100644
--- a/scripts/recordmcount.h
+++ b/scripts/recordmcount.h
@@ -376,8 +376,9 @@ static void nop_mcount(Elf_Shdr const *const relhdr,
mcountsym = get_mcountsym(sym0, relp, str0);
 
if (mcountsym == Elf_r_sym(relp) && !is_fake_mcount(relp)) {
-   if (make_nop)
-   ret = make_nop((void *)ehdr, shdr->sh_offset + 
relp->r_offset);
+   if (make_nop) {
+   ret = make_nop((void *)ehdr, 
_w(shdr->sh_offset) + _w(relp->r_offset));
+   }
if (warn_on_notrace_sect && !once) {
printf("Section %s has mcount callers being 
ignored\n",
   txtname);
-- 
1.7.12.4

--
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Please read the FAQ at  http://www.tux.org/lkml/

[PATCH] recordmcount: x86: assign a meaningful value to rel_type_nop

2015-10-28 Thread Li Bin

Although, the default value of rel_type_nop is zero, and the value
of R_386_NONE/R_X86_64_NONE is zero too, but it should be assigned
a meaningful value explicitly, otherwise it looks confused.

Assign R_386_NONE to rel_type_nop for 386, assign R_X86_64_NONE
to rel_type_nop for x86_64.

Signed-off-by: Li Bin 
---
 scripts/recordmcount.c |2 ++
 1 files changed, 2 insertions(+), 0 deletions(-)

diff --git a/scripts/recordmcount.c b/scripts/recordmcount.c
index 3d1984e..8cc020b 100644
--- a/scripts/recordmcount.c
+++ b/scripts/recordmcount.c
@@ -345,6 +345,7 @@ do_file(char const *const fname)
break;
case EM_386:
reltype = R_386_32;
+   rel_type_nop = R_386_NONE;
make_nop = make_nop_x86;
ideal_nop = ideal_nop5_x86_32;
mcount_adjust_32 = -1;
@@ -371,6 +372,7 @@ do_file(char const *const fname)
make_nop = make_nop_x86;
ideal_nop = ideal_nop5_x86_64;
reltype = R_X86_64_64;
+   rel_type_nop = R_X86_64_NONE;
mcount_adjust_64 = -1;
break;
}  /* end switch */
-- 
1.7.1

--
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Please read the FAQ at  http://www.tux.org/lkml/

[PATCH] recordmcount: arm64: replace the ignored mcount call into nop

2015-10-28 Thread Li Bin

By now, the recordmcount only records the function that in
following sections:
.text/.ref.text/.sched.text/.spinlock.text/.irqentry.text/
.kprobes.text/.text.unlikely

For the function that not in these sections, the call mcount
will be in place and not be replaced when kernel boot up. And
it will bring performance overhead, such as do_mem_abort (in
.exception.text section). This patch make the call mcount to
nop for this case in recordmcount.

Signed-off-by: Li Bin 
---
 scripts/recordmcount.c | 23 ++-
 1 file changed, 22 insertions(+), 1 deletion(-)

diff --git a/scripts/recordmcount.c b/scripts/recordmcount.c
index 3d1984e..f697226 100644
--- a/scripts/recordmcount.c
+++ b/scripts/recordmcount.c
@@ -42,6 +42,7 @@
 
 #ifndef EM_AARCH64
 #define EM_AARCH64 183
+#define R_AARCH64_NONE 0
 #define R_AARCH64_ABS64257
 #endif
 
@@ -160,6 +161,21 @@ static int make_nop_x86(void *map, size_t const offset)
return 0;
 }
 
+static unsigned char ideal_nop4_arm64[4] = {0x1f, 0x20, 0x03, 0xd5};
+static int make_nop_arm64(void *map, size_t const offset)
+{
+   uint32_t *ptr;
+
+   ptr = map + offset;
+   if (*ptr != 0x9400)
+   return -1;
+
+   /* Convert to nop */
+   ulseek(fd_map, offset, SEEK_SET);
+   uwrite(fd_map, ideal_nop, 4);
+   return 0;
+}
+
 /*
  * Get the whole file as a programming convenience in order to avoid
  * malloc+lseek+read+free of many pieces.  If successful, then mmap
@@ -353,7 +369,12 @@ do_file(char const *const fname)
 altmcount = "__gnu_mcount_nc";
 break;
case EM_AARCH64:
-reltype = R_AARCH64_ABS64; gpfx = '_'; break;
+   reltype = R_AARCH64_ABS64;
+   make_nop = make_nop_arm64;
+   rel_type_nop = R_AARCH64_NONE;
+   ideal_nop = ideal_nop4_arm64;
+   gpfx = '_';
+   break;
case EM_IA_64:   reltype = R_IA64_IMM64;   gpfx = '_'; break;
case EM_METAG:   reltype = R_METAG_ADDR32;
 altmcount = "_mcount_wrapper";
-- 
1.7.12.4

--
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Please read the FAQ at  http://www.tux.org/lkml/

[PATCH] arm64: ftrace: function_graph: dump real return addr in call trace

2015-10-15 Thread Li Bin

When using function graph tracer, the printed call trace will be as
following that has many ftrace_graph_caller (return_to_handler - 4),
which is been placed in the stack by ftrace_graph tracer to replace
the real return address.

[  198.582568] Call trace:
[  198.583313] [] next_tgid+0x30/0x100
[  198.584359] [] ftrace_graph_caller+0x6c/0x70
[  198.585503] [] ftrace_graph_caller+0x6c/0x70
[  198.586574] [] ftrace_graph_caller+0x6c/0x70
[  198.587660] [] ftrace_graph_caller+0x6c/0x70
[  198.588896] Code: aa0003f5 2a0103f4 b4000102 91004043 (885f7c60)
[  198.591092] ---[ end trace 6a346f8f20949ac8 ]---

This patch fix it, and dump the real return address in the call trace.

Signed-off-by: Li Bin 
---
 arch/arm64/kernel/traps.c | 31 +++
 1 file changed, 31 insertions(+)

diff --git a/arch/arm64/kernel/traps.c b/arch/arm64/kernel/traps.c
index f93aae5..4a4e679 100644
--- a/arch/arm64/kernel/traps.c
+++ b/arch/arm64/kernel/traps.c
@@ -143,9 +143,38 @@ static void dump_instr(const char *lvl, struct pt_regs 
*regs)
set_fs(fs);
 }
 
+#ifdef CONFIG_FUNCTION_GRAPH_TRACER
+static void print_ftrace_graph_addr(unsigned long addr,
+   struct task_struct *tsk,
+   unsigned long sp, int *graph)
+{
+   unsigned long ret_addr;
+   int index = tsk->curr_ret_stack;
+
+   if (addr != ((unsigned long)return_to_handler - 4))
+   return;
+
+   if (!tsk->ret_stack || index < *graph)
+   return;
+
+   index -= *graph;
+   ret_addr = tsk->ret_stack[index].ret;
+
+   dump_backtrace_entry(ret_addr - 4, sp);
+
+   (*graph)++;
+}
+#else
+static inline void print_ftrace_graph_addr(unsigned long addr,
+   struct task_struct *tsk,
+   unsigned long sp, int *graph)
+{}
+#endif
+
 static void dump_backtrace(struct pt_regs *regs, struct task_struct *tsk)
 {
struct stackframe frame;
+   int graph = 0;
 
pr_debug("%s(regs = %p tsk = %p)\n", __func__, regs, tsk);
 
@@ -177,7 +206,9 @@ static void dump_backtrace(struct pt_regs *regs, struct 
task_struct *tsk)
ret = unwind_frame(&frame);
if (ret < 0)
break;
+
dump_backtrace_entry(where, frame.sp);
+   print_ftrace_graph_addr(where, tsk, frame.sp, &graph);
}
 }
 
-- 
1.7.12.4

--
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Please read the FAQ at  http://www.tux.org/lkml/

[PATCH] arm64: ftrace: fix function_graph tracer panic

2015-09-29 Thread Li Bin

When function graph tracer is enabled, the following operation
will trigger panic:

mount -t debugfs nodev /sys/kernel
echo next_tgid > /sys/kernel/tracing/set_ftrace_filter
echo function_graph > /sys/kernel/tracing/current_tracer
ls /proc/

[ cut here ]
[  198.501417] Unable to handle kernel paging request at virtual address 
cb88537fdc8ba316
[  198.506126] pgd = ffc008f79000
[  198.509363] [cb88537fdc8ba316] *pgd=488c6003, *pud=488c6003, 
*pmd=
[  198.517726] Internal error: Oops: 9405 [#1] SMP
[  198.518798] Modules linked in:
[  198.520582] CPU: 1 PID: 1388 Comm: ls Tainted: G
[  198.521800] Hardware name: linux,dummy-virt (DT)
[  198.522852] task: ffc0fa9e8000 ti: ffc0f9ab task.ti: 
ffc0f9ab
[  198.524306] PC is at next_tgid+0x30/0x100
[  198.525205] LR is at return_to_handler+0x0/0x20
[  198.526090] pc : [] lr : [] pstate: 
6145
[  198.527392] sp : ffc0f9ab3d40
[  198.528084] x29: ffc0f9ab3d40 x28: ffc0f9ab
[  198.529406] x27: ffc000d6a000 x26: ffc000b786e8
[  198.530659] x25: ffc0002a1900 x24: ffc0faf16c00
[  198.531942] x23: ffc0f9ab3ea0 x22: 0002
[  198.533202] x21: ffc000d85050 x20: 0002
[  198.534446] x19: 0002 x18: 
[  198.535719] x17: 0049fa08 x16: ffc000242efc
[  198.537030] x15: 007fa472b54c x14: ff00
[  198.538347] x13: ffc0fada84a0 x12: 0001
[  198.539634] x11: ffc0f9ab3d70 x10: ffc0f9ab3d70
[  198.540915] x9 : ffc907c0 x8 : ffc0f9ab3d40
[  198.542215] x7 : 002e330f08f0 x6 : 0015
[  198.543508] x5 : 0f08 x4 : ffc0f9835ec0
[  198.544792] x3 : cb88537fdc8ba316 x2 : cb88537fdc8ba306
[  198.546108] x1 : 0002 x0 : ffc000d85050
[  198.547432]
[  198.547920] Process ls (pid: 1388, stack limit = 0xffc0f9ab0020)
[  198.549170] Stack: (0xffc0f9ab3d40 to 0xffc0f9ab4000)
[  198.582568] Call trace:
[  198.583313] [] next_tgid+0x30/0x100
[  198.584359] [] ftrace_graph_caller+0x6c/0x70
[  198.585503] [] ftrace_graph_caller+0x6c/0x70
[  198.586574] [] ftrace_graph_caller+0x6c/0x70
[  198.587660] [] ftrace_graph_caller+0x6c/0x70
[  198.588896] Code: aa0003f5 2a0103f4 b4000102 91004043 (885f7c60)
[  198.591092] ---[ end trace 6a346f8f20949ac8 ]---

This is because when using function graph tracer, if the traced
function return value is in multi regs ([0x-07]), return_to_handler
may corrupt them. So in return_to_handler, the parameter regs should
be protected properly.

Cc:  # 3.18+
Signed-off-by: Li Bin 
---
 arch/arm64/kernel/entry-ftrace.S |   22 --
 1 files changed, 20 insertions(+), 2 deletions(-)

diff --git a/arch/arm64/kernel/entry-ftrace.S b/arch/arm64/kernel/entry-ftrace.S
index 08cafc5..0f03a8f 100644
--- a/arch/arm64/kernel/entry-ftrace.S
+++ b/arch/arm64/kernel/entry-ftrace.S
@@ -178,6 +178,24 @@ ENTRY(ftrace_stub)
 ENDPROC(ftrace_stub)
 
 #ifdef CONFIG_FUNCTION_GRAPH_TRACER
+   /* save return value regs*/
+   .macro save_return_regs
+   sub sp, sp, #64
+   stp x0, x1, [sp]
+   stp x2, x3, [sp, #16]
+   stp x4, x5, [sp, #32]
+   stp x6, x7, [sp, #48]
+   .endm
+
+   /* restore return value regs*/
+   .macro restore_return_regs
+   ldp x0, x1, [sp]
+   ldp x2, x3, [sp, #16]
+   ldp x4, x5, [sp, #32]
+   ldp x6, x7, [sp, #48]
+   add sp, sp, #64
+   .endm
+
 /*
  * void ftrace_graph_caller(void)
  *
@@ -204,11 +222,11 @@ ENDPROC(ftrace_graph_caller)
  * only when CONFIG_HAVE_FUNCTION_GRAPH_FP_TEST is enabled.
  */
 ENTRY(return_to_handler)
-   str x0, [sp, #-16]!
+   save_return_regs
mov x0, x29 // parent's fp
bl  ftrace_return_to_handler// addr = ftrace_return_to_hander(fp);
mov x30, x0 // restore the original return address
-   ldr x0, [sp], #16
+   restore_return_regs
ret
 END(return_to_handler)
 #endif /* CONFIG_FUNCTION_GRAPH_TRACER */
-- 
1.7.1

--
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Please read the FAQ at  http://www.tux.org/lkml/

Re: [PATCH] livepatch: add sysfs interface /sys/kernel/livepatch/state

2015-06-17 Thread Li Bin

On 2015/6/17 21:20, Miroslav Benes wrote:
> On Wed, 17 Jun 2015, Li Bin wrote:
> 
>> On 2015/6/17 16:13, Miroslav Benes wrote:
>>> On Wed, 17 Jun 2015, Li Bin wrote:
>>
>>> The list of applied patches can be obtained just by 'ls 
>>> /sys/kernel/livepatch' and their state is in enabled attribute in each 
>>> respective patch (no, you cannot obtain the order in the stack).
>>
>> But why we cannot obtain it? I think We indeed need the stack order when we
>> will disable one patch, at least, we can find out whether it is on the top of
>> the stack if failed to disable one patch.
> 
> I meant with the current means. It is correct that we do not export 
> information about stacking order anywhere.
> 
> What we do in kGraft is that there is something like refcount for each 
> patch. When the patch is being applied the refcount of all the previous 
> patches is increased. Only the patch with the refcount equal to 0 can be 
> removed. This information is exported and gives one a clue about the 
> order.
> 

It sounds good, but the information is limited that cannot show the stack
order, right? (The refcount of all the disabled patch is equal to 0, if
being enable one disabled patch, the stack order is also needed.)

refcount Patch
---
3   patch1(enabled)
2   patch2(enabled)
1   patch3(enabled)
0   patch4(enabled)
0   patch5(disabled)
0   patch6(disabled)

Unless the refcount is allowed to be less than 0, then when the patch is
being disabled the refcount of all the patches is decreased, when the patch
is being enabled the refcount of all patches is increased. Only the patch
with the refcount equal to 0 can be disabled and only equal to -1 can be
enabled, and only less or equal to 0 can be removed (that the livepatch does
not support right now).

refcount Patch
---
3   patch1(enabled)
2   patch2(enabled)
1   patch3(enabled)
0   patch4(enabled)
-1  patch5(disabled)
-2  patch6(disabled)

Thanks,
Li Bin

> So if there is a need to have something like this there would certainly 
> be a way (or ways to be precise) how to do it. The question is if we need 
> it right now.
> 
> Regards,
> Miroslav
> 
> .
> 


--
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Please read the FAQ at  http://www.tux.org/lkml/

Re: [PATCH] livepatch: add sysfs interface /sys/kernel/livepatch/state

2015-06-17 Thread Li Bin

On 2015/6/17 16:13, Miroslav Benes wrote:
> On Wed, 17 Jun 2015, Li Bin wrote:
> 
>> The added sysfs interface /sys/kernel/livepatch/state is read-only,
>> it shows the patches that have been applied, incluing the stack index
>> and the state of each patch.
>>
>> $ cat /sys/kernel/livepatch/state
>> IndexPatch   State
>> ---
>> 1klp_test1   enabled
>> 2klp_test2   enabled
>> 3klp_test3   enabled
>> ---
>>
>> $ echo 0 > /sys/kernel/livepatch/klp_test3/enabled
>> $ cat /sys/kernel/livepatch/state
>> IndexPatch   State
>> ---
>> 1klp_test1   enabled
>> 2klp_test2   enabled
>> 3klp_test3   disabled
>> ---
>>
>> Signed-off-by: Li Bin 
>> ---
> 
> Hi,
> 
> I think we should comply with sysfs policy and keep 'one value per 
> attribute' as mentioned in the Documentation/filesystems/sysfs.txt:
> 
> "
> Attributes should be ASCII text files, preferably with only one value
> per file. It is noted that it may not be efficient to contain only one
> value per file, so it is socially acceptable to express an array of
> values of the same type.
> 
> Mixing types, expressing multiple lines of data, and doing fancy
> formatting of data is heavily frowned upon. Doing these things may get
> you publicly humiliated and your code rewritten without notice.
> "

Hi Miroslav Benes,
Thanks for your comments.

> The list of applied patches can be obtained just by 'ls 
> /sys/kernel/livepatch' and their state is in enabled attribute in each 
> respective patch (no, you cannot obtain the order in the stack).

But why we cannot obtain it? I think We indeed need the stack order when we
will disable one patch, at least, we can find out whether it is on the top of
the stack if failed to disable one patch.

Thanks,
Li Bin
> 
> Thanks,
> 
> Miroslav Benes
> SUSE Labs
> 
> .
> 


--
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Please read the FAQ at  http://www.tux.org/lkml/

[PATCH] livepatch: add sysfs interface /sys/kernel/livepatch/state

2015-06-16 Thread Li Bin

The added sysfs interface /sys/kernel/livepatch/state is read-only,
it shows the patches that have been applied, incluing the stack index
and the state of each patch.

$ cat /sys/kernel/livepatch/state
Index   Patch   State
---
1   klp_test1   enabled
2   klp_test2   enabled
3   klp_test3   enabled
---

$ echo 0 > /sys/kernel/livepatch/klp_test3/enabled
$ cat /sys/kernel/livepatch/state
Index   Patch   State
---
1   klp_test1   enabled
2   klp_test2   enabled
3   klp_test3   disabled
---

Signed-off-by: Li Bin 
---
 Documentation/ABI/testing/sysfs-kernel-livepatch |9 +
 kernel/livepatch/core.c  |   41 +-
 2 files changed, 49 insertions(+), 1 deletions(-)

diff --git a/Documentation/ABI/testing/sysfs-kernel-livepatch 
b/Documentation/ABI/testing/sysfs-kernel-livepatch
index 5bf42a8..01c64da 100644
--- a/Documentation/ABI/testing/sysfs-kernel-livepatch
+++ b/Documentation/ABI/testing/sysfs-kernel-livepatch
@@ -8,6 +8,15 @@ Description:
The /sys/kernel/livepatch directory contains subdirectories for
each loaded live patch module.
 
+What:  /sys/kernel/livepatch/state
+Date:  Jun 2015
+KernelVersion: 4.1.0   
+Contact:   live-patch...@vger.kernel.org
+Description:
+   The state file is read-only and shows the patches that have
+   been applied, including the patch stack index and state of
+   each patch. 
+
 What:  /sys/kernel/livepatch/
 Date:  Nov 2014
 KernelVersion: 3.19.0
diff --git a/kernel/livepatch/core.c b/kernel/livepatch/core.c
index 3f9f1d6..5d004f3 100644
--- a/kernel/livepatch/core.c
+++ b/kernel/livepatch/core.c
@@ -604,6 +604,7 @@ EXPORT_SYMBOL_GPL(klp_enable_patch);
  * Sysfs Interface
  *
  * /sys/kernel/livepatch
+ * /sys/kernel/livepatch/state
  * /sys/kernel/livepatch/
  * /sys/kernel/livepatch//enabled
  * /sys/kernel/livepatch//
@@ -1008,6 +1009,36 @@ static struct notifier_block klp_module_nb = {
.priority = INT_MIN+1, /* called late but before ftrace notifier */
 };
 
+static ssize_t state_show(struct kobject *kobj,
+   struct kobj_attribute *attr, char *buf)
+{
+   struct klp_patch *patch;
+   unsigned long size = 0;
+   int index = 0;
+   size += snprintf(buf+size, PAGE_SIZE-size-1, "%-5s\t%-26s\t%-8s\n", 
"Index", "Patch", "State");
+   size += snprintf(buf+size, PAGE_SIZE-size-1, 
"---\n");
+   mutex_lock(&klp_mutex);
+   list_for_each_entry(patch, &klp_patches, list) {
+   size += snprintf(buf+size, PAGE_SIZE-size-1, 
"%-5d\t%-26s\t%-8s\n",
+   ++index, patch->mod->name, patch->state ? "enabled" : 
"disabled");
+   }
+   mutex_unlock(&klp_mutex);
+   size += snprintf(buf+size, PAGE_SIZE-size-1, 
"---\n");
+
+   return size;
+}
+static struct kobj_attribute state_kobj_attr = __ATTR_RO(state);
+
+static struct attribute *klp_top_attrs[] = {
+   &state_kobj_attr.attr,
+   NULL
+};
+
+static struct kobj_type klp_ktype_top = {
+   .sysfs_ops = &kobj_sysfs_ops,
+   .default_attrs = klp_top_attrs,
+};
+
 static int klp_init(void)
 {
int ret;
@@ -1022,12 +1053,20 @@ static int klp_init(void)
if (ret)
return ret;
 
-   klp_root_kobj = kobject_create_and_add("livepatch", kernel_kobj);
+   klp_root_kobj = kzalloc(sizeof(*klp_root_kobj), GFP_KERNEL);
if (!klp_root_kobj) {
ret = -ENOMEM;
goto unregister;
}
 
+   ret = kobject_init_and_add(klp_root_kobj, &klp_ktype_top,
+   kernel_kobj, "livepatch");
+   if (ret) {
+   kobject_put(klp_root_kobj);
+   klp_root_kobj = NULL;
+   goto unregister;
+   }
+
return 0;
 
 unregister:
-- 
1.7.7

--
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Please read the FAQ at  http://www.tux.org/lkml/

Re: [RFC PATCH 0/5] livepatch: add support on arm64

2015-06-02 Thread Li Bin

On 2015/6/2 10:15, AKASHI Takahiro wrote:
> On 05/30/2015 09:01 AM, Masami Hiramatsu wrote:
>> On 2015/05/28 14:51, Li Bin wrote:
>>> This patchset propose a method for gcc -mfentry feature(profile
>>> before prologue) implementation for arm64, and propose the livepatch
>>> implementation for arm64 based on this feature.
>>> The gcc implementation about this feature will be post to the gcc
>>> community soon.
>>>
>>> With this -mfentry feature, the entry of each function like:
>>>
>>> foo:
>>>  mov x9, x30
>>>  bl __fentry__
>>>  mov x30, x9
>>>  [prologue]
>>>  ...
>>>
>>> The x9 is a callee corruptible register, and the __fentry__ function
>>> is responsible to protect all registers, so it can be used to protect
>>> the x30. And the added two instructions which is register mov operation
>>> have ralatively small impact on performance.
>>
>> Hm, this implementation looks good to me :)
>> This also enables us to KPROBES_ON_FTRACE too.
> 
> Even if x9 is a callee-saved register, there is no way to restore its original
> value in setting up a pt_regs in ftrace_reg_caller.

Hi, Takahiro AKASHI

Firstly, x9 is not a callee-saved but a caller-saved register(or being called
corruptible register).
Secondly, I think x9 is already protected properly, please reference the patch:
[PATCH 1/5] livepatch: ftrace: arm64: Add support for DYNAMIC_FTRACE_WITH_REGS
[PATCH 3/5] livepatch: ftrace: arm64: Add support for -mfentry on arm64

> It's not the right thing for KPROBES_ON_FTRACE, is it?
> 
> Saving Link register in stack is not a big deal since the overhead of ftrace
> is much bigger.

Performance overhead is only one aspect of the problem, and more importantly,
even worse is that it would break the arm64 ABI rules.

Thanks,
Li Bin
> 
> -Takahiro AKASHI
> 
> 
>> Thanks,
>>
>>>
>>> This patchset has been tested on arm64 platform.
>>>
>>> Li Bin (4):
>>>livepatch: ftrace: arm64: Add support for DYNAMIC_FTRACE_WITH_REGS
>>>livepatch: ftrace: add ftrace_function_stub_ip function
>>>livepatch: ftrace: arm64: Add support for -mfentry on arm64
>>>livepatch: arm64: add support for livepatch on arm64
>>>
>>> Xie XiuQi (1):
>>>livepatch: arm64: support relocation in a module
>>>
>>>   arch/arm64/Kconfig |5 +
>>>   arch/arm64/include/asm/ftrace.h|9 +
>>>   arch/arm64/include/asm/livepatch.h |   45 +
>>>   arch/arm64/kernel/Makefile |1 +
>>>   arch/arm64/kernel/arm64ksyms.c |4 +
>>>   arch/arm64/kernel/entry-ftrace.S   |  154 +++-
>>>   arch/arm64/kernel/ftrace.c |   28 +++-
>>>   arch/arm64/kernel/livepatch.c  |   41 
>>>   arch/arm64/kernel/module.c |  355 
>>> ++--
>>>   include/linux/ftrace.h |1 +
>>>   kernel/livepatch/core.c|   17 ++-
>>>   kernel/trace/ftrace.c  |   32 
>>>   scripts/recordmcount.pl|2 +-
>>>   13 files changed, 508 insertions(+), 186 deletions(-)
>>>   create mode 100644 arch/arm64/include/asm/livepatch.h
>>>   create mode 100644 arch/arm64/kernel/livepatch.c
>>>
>>> --
>>> To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
>>> the body of a message to majord...@vger.kernel.org
>>> More majordomo info at  http://vger.kernel.org/majordomo-info.html
>>> Please read the FAQ at  http://www.tux.org/lkml/
>>>
>>
>>
> --
> To unsubscribe from this list: send the line "unsubscribe live-patching" in
> the body of a message to majord...@vger.kernel.org
> More majordomo info at  http://vger.kernel.org/majordomo-info.html
> 
> .
> 


--
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Please read the FAQ at  http://www.tux.org/lkml/

Re: [RFC PATCH 1/5] livepatch: ftrace: arm64: Add support for DYNAMIC_FTRACE_WITH_REGS

2015-05-29 Thread Li Bin

On 2015/5/29 15:14, Paul Bolle wrote:
> On Thu, 2015-05-28 at 13:51 +0800, Li Bin wrote:
>> --- a/arch/arm64/Kconfig
>> +++ b/arch/arm64/Kconfig
> 
>>  select HAVE_DYNAMIC_FTRACE
>> +select HAVE_DYNAMIC_FTRACE_WITH_REGS if HAVE_DYNAMIC_FTRACE
> 
> What's the point of "if HAVE_DYNAMIC_FTRACE" here? That test should
> always evaluate to true, because HAVE_DYNAMIC_FTRACE is also selected.
> 

Ah, I'll modify it.
Thanks!
Li Bin

> 
> Paul Bolle
> 
> 
> 
> 
> .
> 


--
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Please read the FAQ at  http://www.tux.org/lkml/

Re: [RFC 0/4] arm64: add livepatch support

2015-05-27 Thread Li Bin

On 2015/4/24 17:27, Jiri Kosina wrote:
> On Fri, 24 Apr 2015, AKASHI Takahiro wrote:
> 
>> This patchset enables livepatch support on arm64.
>>
>> Livepatch was merged in v4.0, and allows replacying a function dynamically
>> based on ftrace framework, but it also requires -mfentry option of gcc.
>> Currently arm64 gcc doesn't support it, but by adding a helper function to
>> ftrace, we will be able to support livepatch on arch's which don't support
>> this option.
>>
>> I submit this patchset as RFC since I'm not quite sure that I'm doing
>> in the right way, or we should definitely support -fentry instead.
> 
> I don't have arm64 cross-compiler handy, could you please copy/paste how 
> does function prologue, generated by gcc -pg on arm64 look like?
> 

The function prologue on arm64 with gcc -pg look like as following:
func:
stp x29, x30, [sp, -48]!
add x29, sp, 0
mov x1, x30
    str w0, [x29,28]
mov x0, x1
bl  _mcount
...
Thanks,
Li Bin

> Thanks,
> 


--
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Please read the FAQ at  http://www.tux.org/lkml/

Re: [PATCH] livepatch: match function return value type with prototype

2015-05-27 Thread Li Bin

On 2015/5/26 15:32, Jiri Slaby wrote:
> -BEGIN PGP SIGNED MESSAGE-
> Hash: SHA256
> 
> On 05/26/2015, 06:46 AM, Minfei Huang wrote:
>> On Tue, May 26, 2015 at 10:44 AM, Li Bin 
>> wrote:
>>> The klp_is_module return type should be boolean.
>>>
>>> Signed-off-by: Li Bin  --- 
>>> kernel/livepatch/core.c |2 +- 1 files changed, 1
>>> insertions(+), 1 deletions(-)
>>>
>>> diff --git a/kernel/livepatch/core.c b/kernel/livepatch/core.c 
>>> index 284e269..30e9339 100644 --- a/kernel/livepatch/core.c +++
>>> b/kernel/livepatch/core.c @@ -78,7 +78,7 @@ static struct klp_ops
>>> *klp_find_ops(unsigned long old_addr)
>>>
>>> static bool klp_is_module(struct klp_object *obj) { -
>>> return obj->name; +   return !!obj->name; }
>>
>> Hi, Bin.
>>
>> Does compile blame the prototype before you convert it? If not, I
>> think we do not need to fix it, since there are a lot of places in
>> kernel where we return point value as boolean.
> 
> Yes, but the real reason is not that we use it on a lot of places, but
> that it is standardized in ¶ 6.3.1.2:
> When any scalar value is converted to _Bool, the result is 0 if the
> value compares equal to 0; otherwise, the result is 1.
> 

OK, I see, thank you!
Li Bin

> Along with the definition of scalar types:
> Arithmetic types and pointer types are collectively called scalar types.
> 
> thanks,
> - -- 
> js
> suse labs
> -BEGIN PGP SIGNATURE-
> Version: GnuPG v2
> 
> iQIcBAEBCAAGBQJVZCGXAAoJEL0lsQQGtHBJMCEP/AufUwrMFZ7KTD/pWKpjpDlu
> fP8eflJ7iBxGtHXe9PGOsthY4kd1mB+kCo+54N1gks9bbnWF6P302hck5zrBHBdH
> /wbo7YXhbHicUNAXZUzxfDh3nkKmR96CIeqZhMp2H1UBzmGRKzd6kVSdBwlbJ0/W
> ZNcWmiIaALdr7aMEw+qEExV35kUdbJaqUcHKC9in3qQtlzCVQbZ5mCqURe+61ZTL
> 9u/Sbf3vB+nJPzyC/8uSVAxF616PiPdgGvxRrrRrRH82JmRaVJFjsRJ1WeMNxOFt
> s4gbNByNePmTG3SisqFFKQ6VJYyeEsxkeRKbvL2mW4IlzJjiWCbp6XHcv2/IzR9g
> GdIu3Kgy5R0OBXTxMbb5VLVtDRZUYnD/HiOFVLdUiqI6HQ/MtdZGujnVhWAVGIse
> BD8T+hTOQyt6Yk7evlEF+REOlAU8jvBvnq3PUfRlUjWso8w0giPx1Re6rQk4k39P
> TmfIVrve79n2nR/OejeCAB3xBEDtFbKBg9I+5ONm1gogxz/+3o6mwfMGS8TSdNs9
> l4Kl/dhQhSW3aOmrclUoOZpcAzfTJSNCepTFq6+hKIbKibtnKs63wkLnGyL+XZSE
> dEOWBC/B5YNC9VVAm20U8jAtUS5gw3n0NpXFTxOi0nb62IGY+mT+D5pPylVn31Rq
> H7FDzgpZ7+lyvx+FDqpv
> =UzJg
> -END PGP SIGNATURE-
> --
> To unsubscribe from this list: send the line "unsubscribe live-patching" in
> the body of a message to majord...@vger.kernel.org
> More majordomo info at  http://vger.kernel.org/majordomo-info.html
> 
> .
> 


--
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Please read the FAQ at  http://www.tux.org/lkml/

[RFC PATCH 5/5] livepatch: arm64: support relocation in a module

2015-05-27 Thread Li Bin

From: Xie XiuQi 

This patch implement klp_write_module_reloc on arm64 platform.

Signed-off-by: Xie XiuQi 
Signed-off-by: Li Bin 
---
 arch/arm64/kernel/livepatch.c |7 +-
 arch/arm64/kernel/module.c|  355 +
 2 files changed, 186 insertions(+), 176 deletions(-)

diff --git a/arch/arm64/kernel/livepatch.c b/arch/arm64/kernel/livepatch.c
index 2a55532..ad674f0 100644
--- a/arch/arm64/kernel/livepatch.c
+++ b/arch/arm64/kernel/livepatch.c
@@ -18,8 +18,11 @@
  */
 
 #include 
+#include 
 #include 
 
+extern int static_relocate(struct module *mod, unsigned long type,
+  void * loc, unsigned long value);
 /**
  * klp_write_module_reloc() - write a relocation in a module
  * @mod:   module in which the section to be modified is found
@@ -33,6 +36,6 @@
 int klp_write_module_reloc(struct module *mod, unsigned long type,
   unsigned long loc, unsigned long value)
 {
-   pr_err("lpc_write_module_reloc has not supported now\n");
-   return -ENOSYS;
+   /* Perform the static relocation. */
+   return static_relocate(mod, type, (void *)loc, value);
 }
diff --git a/arch/arm64/kernel/module.c b/arch/arm64/kernel/module.c
index 67bf410..7781241 100644
--- a/arch/arm64/kernel/module.c
+++ b/arch/arm64/kernel/module.c
@@ -193,6 +193,182 @@ static int reloc_insn_imm(enum aarch64_reloc_op op, void 
*place, u64 val,
return 0;
 }
 
+int static_relocate(struct module *me, unsigned long type, void *loc,
+   unsigned long val)
+{
+   int ovf = 0;
+   bool overflow_check = true;
+   /* Perform the static relocation. */
+   switch (type) {
+   /* Null relocations. */
+   case R_ARM_NONE:
+   case R_AARCH64_NONE:
+   ovf = 0;
+   break;
+
+   /* Data relocations. */
+   case R_AARCH64_ABS64:
+   overflow_check = false;
+   ovf = reloc_data(RELOC_OP_ABS, loc, val, 64);
+   break;
+   case R_AARCH64_ABS32:
+   ovf = reloc_data(RELOC_OP_ABS, loc, val, 32);
+   break;
+   case R_AARCH64_ABS16:
+   ovf = reloc_data(RELOC_OP_ABS, loc, val, 16);
+   break;
+   case R_AARCH64_PREL64:
+   overflow_check = false;
+   ovf = reloc_data(RELOC_OP_PREL, loc, val, 64);
+   break;
+   case R_AARCH64_PREL32:
+   ovf = reloc_data(RELOC_OP_PREL, loc, val, 32);
+   break;
+   case R_AARCH64_PREL16:
+   ovf = reloc_data(RELOC_OP_PREL, loc, val, 16);
+   break;
+
+   /* MOVW instruction relocations. */
+   case R_AARCH64_MOVW_UABS_G0_NC:
+   overflow_check = false;
+   case R_AARCH64_MOVW_UABS_G0:
+   ovf = reloc_insn_movw(RELOC_OP_ABS, loc, val, 0,
+ AARCH64_INSN_IMM_16);
+   break;
+   case R_AARCH64_MOVW_UABS_G1_NC:
+   overflow_check = false;
+   case R_AARCH64_MOVW_UABS_G1:
+   ovf = reloc_insn_movw(RELOC_OP_ABS, loc, val, 16,
+ AARCH64_INSN_IMM_16);
+   break;
+   case R_AARCH64_MOVW_UABS_G2_NC:
+   overflow_check = false;
+   case R_AARCH64_MOVW_UABS_G2:
+   ovf = reloc_insn_movw(RELOC_OP_ABS, loc, val, 32,
+ AARCH64_INSN_IMM_16);
+   break;
+   case R_AARCH64_MOVW_UABS_G3:
+   /* We're using the top bits so we can't overflow. */
+   overflow_check = false;
+   ovf = reloc_insn_movw(RELOC_OP_ABS, loc, val, 48,
+ AARCH64_INSN_IMM_16);
+   break;
+   case R_AARCH64_MOVW_SABS_G0:
+   ovf = reloc_insn_movw(RELOC_OP_ABS, loc, val, 0,
+ AARCH64_INSN_IMM_MOVNZ);
+   break;
+   case R_AARCH64_MOVW_SABS_G1:
+   ovf = reloc_insn_movw(RELOC_OP_ABS, loc, val, 16,
+ AARCH64_INSN_IMM_MOVNZ);
+   break;
+   case R_AARCH64_MOVW_SABS_G2:
+   ovf = reloc_insn_movw(RELOC_OP_ABS, loc, val, 32,
+ AARCH64_INSN_IMM_MOVNZ);
+   break;
+   case R_AARCH64_MOVW_PREL_G0_NC:
+   overflow_check = false;
+   ovf = reloc_insn_movw(RELOC_OP_PREL, loc, val, 0,
+ AARCH64_INSN_IMM_MOVK);
+   break;
+   case R_AARCH64_MOVW_PREL_G0:
+   ovf = reloc_insn_movw(RELOC_OP_PREL, loc, val, 0,
+ AARCH64_INSN_IMM_MOVNZ);
+   break;
+   case R_AARCH64_MOVW_PREL_G1_NC:
+   overflow_check = false;
+   ovf = reloc_insn_movw(RELOC_OP_PREL, loc, val, 16,
+ A

[RFC PATCH 4/5] livepatch: arm64: add support for livepatch on arm64

2015-05-27 Thread Li Bin

This patch add support for livepatch on arm64 based on the gcc -mfentry
feature and the ftrace DYNAMIC_FTRACE_WITH_REGS feature.

Signed-off-by: Li Bin 
---
 arch/arm64/Kconfig |3 ++
 arch/arm64/include/asm/livepatch.h |   45 
 arch/arm64/kernel/Makefile |1 +
 arch/arm64/kernel/livepatch.c  |   38 ++
 kernel/livepatch/core.c|   17 +
 5 files changed, 99 insertions(+), 5 deletions(-)
 create mode 100644 arch/arm64/include/asm/livepatch.h
 create mode 100644 arch/arm64/kernel/livepatch.c

diff --git a/arch/arm64/Kconfig b/arch/arm64/Kconfig
index 7bb2468..8a3845c 100644
--- a/arch/arm64/Kconfig
+++ b/arch/arm64/Kconfig
@@ -85,6 +85,7 @@ config ARM64
select SPARSE_IRQ
select SYSCTL_EXCEPTION_TRACE
select HAVE_CONTEXT_TRACKING
+   select HAVE_LIVEPATCH
help
  ARM 64-bit (AArch64) Linux support.
 
@@ -159,6 +160,8 @@ source "init/Kconfig"
 
 source "kernel/Kconfig.freezer"
 
+source "kernel/livepatch/Kconfig"
+
 menu "Platform selection"
 
 config ARCH_EXYNOS
diff --git a/arch/arm64/include/asm/livepatch.h 
b/arch/arm64/include/asm/livepatch.h
new file mode 100644
index 000..1890413
--- /dev/null
+++ b/arch/arm64/include/asm/livepatch.h
@@ -0,0 +1,45 @@
+/*
+ * livepatch.h - arm64-specific Kernel Live Patching Core
+ *
+ * Copyright (C) 2014 Li Bin 
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License
+ * as published by the Free Software Foundation; either version 2
+ * of the License, or (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, see <http://www.gnu.org/licenses/>.
+ */
+
+#ifndef _ASM_ARM64_LIVEPATCH_H
+#define _ASM_ARM64_LIVEPATCH_H
+
+#include 
+#include 
+
+#ifdef CONFIG_LIVEPATCH
+static inline int klp_check_compiler_support(void)
+{
+#ifndef CC_USING_FENTRY
+   return 1;
+#endif
+   return 0;
+}
+extern int klp_write_module_reloc(struct module *mod, unsigned long type,
+ unsigned long loc, unsigned long value);
+
+static inline void klp_arch_set_pc(struct pt_regs *regs, unsigned long pc)
+{
+   regs->pc = pc;
+}
+#else
+#error Live patching support is disabled; check CONFIG_LIVEPATCH
+#endif
+
+#endif /* _ASM_ARM64_LIVEPATCH_H */
diff --git a/arch/arm64/kernel/Makefile b/arch/arm64/kernel/Makefile
index 426d076..30d307b 100644
--- a/arch/arm64/kernel/Makefile
+++ b/arch/arm64/kernel/Makefile
@@ -23,6 +23,7 @@ arm64-obj-$(CONFIG_COMPAT)+= sys32.o kuser32.o 
signal32.o \
   sys_compat.o entry32.o   
\
   ../../arm/kernel/opcodes.o
 arm64-obj-$(CONFIG_FUNCTION_TRACER)+= ftrace.o entry-ftrace.o
+arm64-obj-$(CONFIG_LIVEPATCH) += livepatch.o
 arm64-obj-$(CONFIG_MODULES)+= arm64ksyms.o module.o
 arm64-obj-$(CONFIG_SMP)+= smp.o smp_spin_table.o 
topology.o
 arm64-obj-$(CONFIG_PERF_EVENTS)+= perf_regs.o
diff --git a/arch/arm64/kernel/livepatch.c b/arch/arm64/kernel/livepatch.c
new file mode 100644
index 000..2a55532
--- /dev/null
+++ b/arch/arm64/kernel/livepatch.c
@@ -0,0 +1,38 @@
+/*
+ * livepatch.c - arm64-specific Kernel Live Patching Core
+ *
+ * Copyright (C) 2014 Li Bin 
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License
+ * as published by the Free Software Foundation; either version 2
+ * of the License, or (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, see <http://www.gnu.org/licenses/>.
+ */
+
+#include 
+#include 
+
+/**
+ * klp_write_module_reloc() - write a relocation in a module
+ * @mod:   module in which the section to be modified is found
+ * @type:  ELF relocation type (see asm/elf.h)
+ * @loc:   address that the relocation should be written to
+ * @value: relocation value (sym address + addend)
+ *
+ * This function writes a relocation to the specified location for
+ * a particular module.
+ */
+int klp_write_module_reloc(struct module *mod, unsigned long type,
+

[RFC PATCH 1/5] livepatch: ftrace: arm64: Add support for DYNAMIC_FTRACE_WITH_REGS

2015-05-27 Thread Li Bin

If ftrace_ops is registered with flag FTRACE_OPS_FL_SAVE_REGS, the
arch that support DYNAMIC_FTRACE_WITH_REGS will pass a full set of
pt_regs to the ftrace_ops callback function, which may read or change
the value of the pt_regs and the pt_regs will be restored to support
work flow redirection such as kernel live patching.

This patch adds DYNAMIC_FTRACE_WITH_REGS feature support for arm64
architecture.

Signed-off-by: Li Bin 
---
 arch/arm64/Kconfig   |1 +
 arch/arm64/include/asm/ftrace.h  |4 ++
 arch/arm64/kernel/entry-ftrace.S |   95 ++
 arch/arm64/kernel/ftrace.c   |   28 ++-
 4 files changed, 126 insertions(+), 2 deletions(-)

diff --git a/arch/arm64/Kconfig b/arch/arm64/Kconfig
index 7796af4..ea435c9 100644
--- a/arch/arm64/Kconfig
+++ b/arch/arm64/Kconfig
@@ -57,6 +57,7 @@ config ARM64
select HAVE_DMA_ATTRS
select HAVE_DMA_CONTIGUOUS
select HAVE_DYNAMIC_FTRACE
+   select HAVE_DYNAMIC_FTRACE_WITH_REGS if HAVE_DYNAMIC_FTRACE
select HAVE_EFFICIENT_UNALIGNED_ACCESS
select HAVE_FTRACE_MCOUNT_RECORD
select HAVE_FUNCTION_TRACER
diff --git a/arch/arm64/include/asm/ftrace.h b/arch/arm64/include/asm/ftrace.h
index c5534fa..a7722b9 100644
--- a/arch/arm64/include/asm/ftrace.h
+++ b/arch/arm64/include/asm/ftrace.h
@@ -16,6 +16,10 @@
 #define MCOUNT_ADDR((unsigned long)_mcount)
 #define MCOUNT_INSN_SIZE   AARCH64_INSN_SIZE
 
+#ifdef CONFIG_DYNAMIC_FTRACE
+#define ARCH_SUPPORTS_FTRACE_OPS 1
+#endif
+
 #ifndef __ASSEMBLY__
 #include 
 
diff --git a/arch/arm64/kernel/entry-ftrace.S b/arch/arm64/kernel/entry-ftrace.S
index 08cafc5..fde793b 100644
--- a/arch/arm64/kernel/entry-ftrace.S
+++ b/arch/arm64/kernel/entry-ftrace.S
@@ -12,6 +12,8 @@
 #include 
 #include 
 #include 
+#include 
+#include 
 
 /*
  * Gcc with -pg will put the following code in the beginning of each function:
@@ -50,11 +52,37 @@
mov x29, sp
.endm
 
+   /* save parameter registers & corruptible registers */
+   .macro save_mcount_regs
+   sub sp, sp, #S_FRAME_SIZE
+   stp x0, x1, [sp]
+   stp x2, x3, [sp, #16]
+   stp x4, x5, [sp, #32]
+   stp x6, x7, [sp, #48]
+   stp x8, x9, [sp, #64]
+   stp x10, x11, [sp, #80]
+   stp x12, x13, [sp, #96]
+   stp x14, x15, [sp, #112]
+   .endm
+
.macro mcount_exit
ldp x29, x30, [sp], #16
ret
.endm
 
+   /* restore parameter registers & corruptible registers */
+   .macro restore_mcount_regs
+   ldp x0, x1, [sp]
+   ldp x2, x3, [sp, #16]
+   ldp x4, x5, [sp, #32]
+   ldp x6, x7, [sp, #48]
+   ldp x8, x9, [sp, #64]
+   ldp x10, x11, [sp, #80]
+   ldp x12, x13, [sp, #96]
+   ldp x14, x15, [sp, #112]
+   add sp, sp, #S_FRAME_SIZE
+   .endm
+
.macro mcount_adjust_addr rd, rn
sub \rd, \rn, #AARCH64_INSN_SIZE
.endm
@@ -97,6 +125,7 @@
  */
 ENTRY(_mcount)
mcount_enter
+   save_mcount_regs
 
adrpx0, ftrace_trace_function
ldr x2, [x0, #:lo12:ftrace_trace_function]
@@ -110,8 +139,10 @@ ENTRY(_mcount)
 
 #ifndef CONFIG_FUNCTION_GRAPH_TRACER
 skip_ftrace_call:  //   return;
+   restore_mcount_regs
mcount_exit // }
 #else
+   restore_mcount_regs
mcount_exit //   return;
// }
 skip_ftrace_call:
@@ -127,6 +158,7 @@ skip_ftrace_call:
cmp x0, x2
b.neftrace_graph_caller // ftrace_graph_caller();
 
+   restore_mcount_regs
mcount_exit
 #endif /* CONFIG_FUNCTION_GRAPH_TRACER */
 ENDPROC(_mcount)
@@ -153,15 +185,20 @@ ENDPROC(_mcount)
  */
 ENTRY(ftrace_caller)
mcount_enter
+   save_mcount_regs
 
+   adrpx0, function_trace_op
+   ldr x2, [x0, #:lo12:function_trace_op]
mcount_get_pc0  x0  // function's pc
mcount_get_lr   x1  // function's lr
+   mov x3, #0
 
.global ftrace_call
 ftrace_call:   // tracer(pc, lr);
nop // This will be replaced with "bl xxx"
// where xxx can be any kind of tracer.
 
+ftrace_return:
 #ifdef CONFIG_FUNCTION_GRAPH_TRACER
.global ftrace_graph_call
 ftrace_graph_call: // ftrace_graph_caller();
@@ -169,8 +206,65 @@ ftrace_graph_call: // 
ftrace_graph_caller();
// "b ftrace_graph_caller"
 #endif
 
+   restore_mcount_regs
mcount_exit
 ENDPROC(ftrace_caller)
+
+ENTRY(ftrace_regs_caller)
+   mcount_enter
+   save_mcount_regs
+
+   /* Save the rest of pt_regs */
+   stp x16, x17, [sp, #12

[RFC PATCH 3/5] livepatch: ftrace: arm64: Add support for -mfentry on arm64

2015-05-27 Thread Li Bin

This patch depends on the compiler's mfentry feature for arm64 that
proposed by this patchset. If the kernel is compiled with this feature,
the entry of each function like:
   foo:
   mov x9, x30
   bl __fentry__
   mov x30, x9
When -mfentry is used, the call is to '__fentry__' and not '_mcount'
and is done before the function's stack frame is set up. So __fentry__
is responsibel to protect parameter registers and corruptible registers.

Signed-off-by: Li Bin 
---
 arch/arm64/Kconfig   |1 +
 arch/arm64/include/asm/ftrace.h  |5 +++
 arch/arm64/kernel/arm64ksyms.c   |4 ++
 arch/arm64/kernel/entry-ftrace.S |   59 +++--
 scripts/recordmcount.pl  |2 +-
 5 files changed, 66 insertions(+), 5 deletions(-)

diff --git a/arch/arm64/Kconfig b/arch/arm64/Kconfig
index ea435c9..7bb2468 100644
--- a/arch/arm64/Kconfig
+++ b/arch/arm64/Kconfig
@@ -60,6 +60,7 @@ config ARM64
select HAVE_DYNAMIC_FTRACE_WITH_REGS if HAVE_DYNAMIC_FTRACE
select HAVE_EFFICIENT_UNALIGNED_ACCESS
select HAVE_FTRACE_MCOUNT_RECORD
+   select HAVE_FENTRY
select HAVE_FUNCTION_TRACER
select HAVE_FUNCTION_GRAPH_TRACER
select HAVE_GENERIC_DMA_COHERENT
diff --git a/arch/arm64/include/asm/ftrace.h b/arch/arm64/include/asm/ftrace.h
index a7722b9..08eab52 100644
--- a/arch/arm64/include/asm/ftrace.h
+++ b/arch/arm64/include/asm/ftrace.h
@@ -13,7 +13,11 @@
 
 #include 
 
+#ifdef CC_USING_FENTRY
+#define MCOUNT_ADDR((unsigned long)__fentry__)
+#else
 #define MCOUNT_ADDR((unsigned long)_mcount)
+#endif
 #define MCOUNT_INSN_SIZE   AARCH64_INSN_SIZE
 
 #ifdef CONFIG_DYNAMIC_FTRACE
@@ -24,6 +28,7 @@
 #include 
 
 extern void _mcount(unsigned long);
+extern void __fentry__(unsigned long);
 extern void *return_address(unsigned int);
 
 struct dyn_arch_ftrace {
diff --git a/arch/arm64/kernel/arm64ksyms.c b/arch/arm64/kernel/arm64ksyms.c
index a85843d..f0455d3 100644
--- a/arch/arm64/kernel/arm64ksyms.c
+++ b/arch/arm64/kernel/arm64ksyms.c
@@ -63,5 +63,9 @@ EXPORT_SYMBOL(change_bit);
 EXPORT_SYMBOL(test_and_change_bit);
 
 #ifdef CONFIG_FUNCTION_TRACER
+#ifdef CC_USING_FENTRY
+EXPORT_SYMBOL(__fentry__);
+#else
 EXPORT_SYMBOL(_mcount);
 #endif
+#endif
diff --git a/arch/arm64/kernel/entry-ftrace.S b/arch/arm64/kernel/entry-ftrace.S
index fde793b..18cfe5b 100644
--- a/arch/arm64/kernel/entry-ftrace.S
+++ b/arch/arm64/kernel/entry-ftrace.S
@@ -93,27 +93,57 @@
ldr \reg, [\reg]
.endm
 
+   /* for instrumented function's parent */
+   .macro fentry_get_parent_fp reg
+   ldr \reg, [x29]
+   .endm
+
/* for instrumented function */
.macro mcount_get_pc0 reg
mcount_adjust_addr  \reg, x30
.endm
 
+   /* for instrumented function */
+   .macro fentry_get_pc0 reg
+   mcount_adjust_addr  \reg, x30
+   .endm
+
.macro mcount_get_pc reg
ldr \reg, [x29, #8]
mcount_adjust_addr  \reg, \reg
.endm
 
+   .macro fentry_get_pc reg
+   ldr \reg, [x29, #8]
+   mcount_adjust_addr  \reg, \reg
+   .endm
+
.macro mcount_get_lr reg
ldr \reg, [x29]
ldr \reg, [\reg, #8]
mcount_adjust_addr  \reg, \reg
.endm
 
+   .macro fentry_get_lr reg, base
+   ldr \reg, [\base, #72]  //S_X9
+   mcount_adjust_addr  \reg, \reg
+   .endm
+
.macro mcount_get_lr_addr reg
ldr \reg, [x29]
add \reg, \reg, #8
.endm
 
+   .macro fentry_get_lr_addr reg, base
+   add \reg, \base, #72//S_X9
+   .endm
+
+#ifdef CC_USING_FENTRY
+#definefunction_hook   __fentry__
+#else
+#definefunction_hook   _mcount
+#endif
+
 #ifndef CONFIG_DYNAMIC_FTRACE
 /*
  * void _mcount(unsigned long return_address)
@@ -123,7 +153,7 @@
  * - tracer function to probe instrumented function's entry,
  * - ftrace_graph_caller to set up an exit hook
  */
-ENTRY(_mcount)
+ENTRY(function_hook)
mcount_enter
save_mcount_regs
 
@@ -133,8 +163,13 @@ ENTRY(_mcount)
cmp x0, x2  // if (ftrace_trace_function
b.eqskip_ftrace_call// != ftrace_stub) {
 
+#ifdef CC_USING_FENTRY
+   fentry_get_pc   x0  //   function's pc
+   fentry_get_lr   x1, sp  //   function's lr (= parent's pc)
+#else
mcount_get_pc   x0  //   function's pc
mcount_get_lr   x1  //   function's lr (= parent's pc)
+#endif
blr x2  //   (*ftrace_trace_function)(pc, lr);
 
 #ifndef CONFIG_FUNCTION_GRAPH_TRACER
@@ -161,7 +196,7 @@ skip_ftrace_call:
restore_mcount_regs
mcount_exit
 #endif /* CONFIG_FUNCTION_GRAPH_TRACER */
-ENDPROC(_mcount)
+ENDPROC(function_hook

[RFC PATCH 2/5] livepatch: ftrace: add ftrace_function_stub_ip function

2015-05-27 Thread Li Bin

The function of ftrace_function_stub_ip is to convert the function
address to the ftrace stub calling instruction address.

This is needed for the platform that the complier does not support
"profile before prologue" feature and the profile calling instruction
is not at begin of the function.

EXAMPLES:
...
stub_ip = ftrace_function_stub_ip(func_addr);
ftrace_set_filter_ip(&ftrace_ops, stub_ip, 0, 0);
register_ftrace_function(&ftrace_ops);
...

Signed-off-by: Li Bin 
---
 include/linux/ftrace.h |1 +
 kernel/trace/ftrace.c  |   32 
 2 files changed, 33 insertions(+), 0 deletions(-)

diff --git a/include/linux/ftrace.h b/include/linux/ftrace.h
index 1da6029..38a2811 100644
--- a/include/linux/ftrace.h
+++ b/include/linux/ftrace.h
@@ -360,6 +360,7 @@ struct dyn_ftrace {
 int ftrace_force_update(void);
 int ftrace_set_filter_ip(struct ftrace_ops *ops, unsigned long ip,
 int remove, int reset);
+unsigned long ftrace_function_stub_ip(unsigned long addr);
 int ftrace_set_filter(struct ftrace_ops *ops, unsigned char *buf,
   int len, int reset);
 int ftrace_set_notrace(struct ftrace_ops *ops, unsigned char *buf,
diff --git a/kernel/trace/ftrace.c b/kernel/trace/ftrace.c
index 02bece4..4d8692c 100644
--- a/kernel/trace/ftrace.c
+++ b/kernel/trace/ftrace.c
@@ -4161,6 +4161,38 @@ int ftrace_set_filter_ip(struct ftrace_ops *ops, 
unsigned long ip,
 }
 EXPORT_SYMBOL_GPL(ftrace_set_filter_ip);
 
+/**
+ * ftrace_function_stub_ip - get the profile stub calling location by the
+ * function address. It is useful for the platform that doesn't place the
+ * function profiling call at the start of the function.
+ * @addr - the function address to get the stub ip
+ *
+ * It returns the corresponding profile stub calling location if founded, else
+ * return zero.
+ */
+unsigned long ftrace_function_stub_ip(unsigned long addr)
+{
+   struct ftrace_page *pg;
+   struct dyn_ftrace *rec;
+   unsigned long ret = 0;
+
+   mutex_lock(&ftrace_lock);
+
+   do_for_each_ftrace_rec(pg, rec) {
+   unsigned long offset;
+
+   if (kallsyms_lookup_size_offset(rec->ip, NULL, &offset)
+   && addr + offset == rec->ip) {
+   ret = rec->ip;
+   goto out_unlock;
+   }
+   } while_for_each_ftrace_rec()
+out_unlock:
+   mutex_unlock(&ftrace_lock);
+   return ret;
+}
+EXPORT_SYMBOL_GPL(ftrace_function_stub_ip);
+
 static int
 ftrace_set_regex(struct ftrace_ops *ops, unsigned char *buf, int len,
 int reset, int enable)
-- 
1.7.1

--
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Please read the FAQ at  http://www.tux.org/lkml/

[RFC PATCH 0/5] livepatch: add support on arm64

2015-05-27 Thread Li Bin

This patchset propose a method for gcc -mfentry feature(profile
before prologue) implementation for arm64, and propose the livepatch
implementation for arm64 based on this feature.
The gcc implementation about this feature will be post to the gcc 
community soon.

With this -mfentry feature, the entry of each function like:

foo:
mov x9, x30 
bl __fentry__
mov x30, x9
[prologue]
... 

The x9 is a callee corruptible register, and the __fentry__ function
is responsible to protect all registers, so it can be used to protect
the x30. And the added two instructions which is register mov operation
have ralatively small impact on performance.

This patchset has been tested on arm64 platform.

Li Bin (4):
  livepatch: ftrace: arm64: Add support for DYNAMIC_FTRACE_WITH_REGS
  livepatch: ftrace: add ftrace_function_stub_ip function
  livepatch: ftrace: arm64: Add support for -mfentry on arm64
  livepatch: arm64: add support for livepatch on arm64

Xie XiuQi (1):
  livepatch: arm64: support relocation in a module

 arch/arm64/Kconfig |5 +
 arch/arm64/include/asm/ftrace.h|9 +
 arch/arm64/include/asm/livepatch.h |   45 +
 arch/arm64/kernel/Makefile |1 +
 arch/arm64/kernel/arm64ksyms.c |4 +
 arch/arm64/kernel/entry-ftrace.S   |  154 +++-
 arch/arm64/kernel/ftrace.c |   28 +++-
 arch/arm64/kernel/livepatch.c  |   41 
 arch/arm64/kernel/module.c |  355 ++--
 include/linux/ftrace.h |1 +
 kernel/livepatch/core.c|   17 ++-
 kernel/trace/ftrace.c  |   32 
 scripts/recordmcount.pl|2 +-
 13 files changed, 508 insertions(+), 186 deletions(-)
 create mode 100644 arch/arm64/include/asm/livepatch.h
 create mode 100644 arch/arm64/kernel/livepatch.c

--
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Please read the FAQ at  http://www.tux.org/lkml/

Re: [RFC 0/4] arm64: add livepatch support

2015-05-27 Thread Li Bin

On 2015/4/24 14:05, Masami Hiramatsu wrote:
> (2015/04/24 12:24), Li Bin wrote:
>> On 2015/4/24 10:44, AKASHI Takahiro wrote:
>>> This patchset enables livepatch support on arm64.
>>>
>>> Livepatch was merged in v4.0, and allows replacying a function dynamically
>>> based on ftrace framework, but it also requires -mfentry option of gcc.
>>> Currently arm64 gcc doesn't support it, but by adding a helper function to
>>> ftrace, we will be able to support livepatch on arch's which don't support
>>> this option.
>>>
>>
>> This is not correct for the case that the prologue of the old and new 
>> function
>> is different.
> 
> Hmm, is that possible to support -mfentry on arm64?
> 
> Of course we can not call a function directly at the first
> instruction of functions on arm, because it can overwrite
> link register which stores caller address. However, we can
> do "store link register to stack and branch with link"
> on arm. That is actually almost same as -mfentry does :),
> and that may not depend on the prologue.
>

Yes, but the method "store link register to stack" will break the arm64 ABI 
rules,
and memory operations may bring performance cost.
We have a gcc -mfentry implementation strategy for arm64, and based on this we
implement the livepatch for arm64.
I will post the patchset for review soon.
Thank you,
Li Bin

> Thank you,
> 
> 
>> Thanks,
>>  Li Bin
>>
>>> I submit this patchset as RFC since I'm not quite sure that I'm doing
>>> in the right way, or we should definitely support -fentry instead.
>>>
>>> Please note that I tested the feature only with livepatch-sample, and
>>> the code for DYNAMIC_TRACE_WITH_REGS is still rough-edged.
>>>
>>> To: Steven Rostedt 
>>> To: Ingo Molnar 
>>> To: Josh Poimboeuf 
>>> To: Seth Jennings 
>>> To: Jiri Kosina 
>>> To: Vojtech Pavlik 
>>> To: Catalin Marinas 
>>> To: Will Deacon 
>>>
>>> AKASHI Takahiro (4):
>>>   ftrace: add a helper function for livepatch
>>>   livepatch: adjust a patched function's address
>>>   arm64: ftrace: add DYNAMIC_TRACE_WITH_REGS version
>>>   arm64: add livepatch support
>>>
>>>  arch/arm64/Kconfig |4 ++
>>>  arch/arm64/include/asm/ftrace.h|4 ++
>>>  arch/arm64/include/asm/livepatch.h |   38 +++
>>>  arch/arm64/kernel/Makefile |1 +
>>>  arch/arm64/kernel/entry-ftrace.S   |  124 
>>> 
>>>  arch/arm64/kernel/ftrace.c |   24 ++-
>>>  arch/arm64/kernel/livepatch.c  |   68 
>>>  arch/x86/include/asm/livepatch.h   |5 ++
>>>  include/linux/ftrace.h |2 +
>>>  include/linux/livepatch.h  |2 +
>>>  kernel/livepatch/core.c|   16 +++--
>>>  kernel/trace/ftrace.c  |   26 
>>>  12 files changed, 309 insertions(+), 5 deletions(-)
>>>  create mode 100644 arch/arm64/include/asm/livepatch.h
>>>  create mode 100644 arch/arm64/kernel/livepatch.c
>>>
>>
>>
>>
> 
> 


--
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Please read the FAQ at  http://www.tux.org/lkml/

[PATCH] livepatch: match function return value type with prototype

2015-05-25 Thread Li Bin

The klp_is_module return type should be boolean.

Signed-off-by: Li Bin 
---
 kernel/livepatch/core.c |2 +-
 1 files changed, 1 insertions(+), 1 deletions(-)

diff --git a/kernel/livepatch/core.c b/kernel/livepatch/core.c
index 284e269..30e9339 100644
--- a/kernel/livepatch/core.c
+++ b/kernel/livepatch/core.c
@@ -78,7 +78,7 @@ static struct klp_ops *klp_find_ops(unsigned long old_addr)

 static bool klp_is_module(struct klp_object *obj)
 {
-   return obj->name;
+   return !!obj->name;
 }

 static bool klp_is_object_loaded(struct klp_object *obj)
-- 
1.7.1


--
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Please read the FAQ at  http://www.tux.org/lkml/

livepatch: match function return value type with prototype

2015-05-25 Thread Li Bin

The klp_is_module return type should be boolean.

Signed-off-by: Li Bin 
---
 kernel/livepatch/core.c |2 +-
 1 files changed, 1 insertions(+), 1 deletions(-)

diff --git a/kernel/livepatch/core.c b/kernel/livepatch/core.c
index 284e269..30e9339 100644
--- a/kernel/livepatch/core.c
+++ b/kernel/livepatch/core.c
@@ -78,7 +78,7 @@ static struct klp_ops *klp_find_ops(unsigned long old_addr)

 static bool klp_is_module(struct klp_object *obj)
 {
-   return obj->name;
+   return !!obj->name;
 }

 static bool klp_is_object_loaded(struct klp_object *obj)
-- 
1.7.1


--
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Please read the FAQ at  http://www.tux.org/lkml/

Re: [RFC 0/4] arm64: add livepatch support

2015-04-23 Thread Li Bin

On 2015/4/24 10:44, AKASHI Takahiro wrote:
> This patchset enables livepatch support on arm64.
> 
> Livepatch was merged in v4.0, and allows replacying a function dynamically
> based on ftrace framework, but it also requires -mfentry option of gcc.
> Currently arm64 gcc doesn't support it, but by adding a helper function to
> ftrace, we will be able to support livepatch on arch's which don't support
> this option.
> 

This is not correct for the case that the prologue of the old and new function
is different.
Thanks,
Li Bin

> I submit this patchset as RFC since I'm not quite sure that I'm doing
> in the right way, or we should definitely support -fentry instead.
> 
> Please note that I tested the feature only with livepatch-sample, and
> the code for DYNAMIC_TRACE_WITH_REGS is still rough-edged.
> 
> To: Steven Rostedt 
> To: Ingo Molnar 
> To: Josh Poimboeuf 
> To: Seth Jennings 
> To: Jiri Kosina 
> To: Vojtech Pavlik 
> To: Catalin Marinas 
> To: Will Deacon 
> 
> AKASHI Takahiro (4):
>   ftrace: add a helper function for livepatch
>   livepatch: adjust a patched function's address
>   arm64: ftrace: add DYNAMIC_TRACE_WITH_REGS version
>   arm64: add livepatch support
> 
>  arch/arm64/Kconfig |4 ++
>  arch/arm64/include/asm/ftrace.h|4 ++
>  arch/arm64/include/asm/livepatch.h |   38 +++
>  arch/arm64/kernel/Makefile |1 +
>  arch/arm64/kernel/entry-ftrace.S   |  124 
> 
>  arch/arm64/kernel/ftrace.c |   24 ++-
>  arch/arm64/kernel/livepatch.c  |   68 
>  arch/x86/include/asm/livepatch.h   |5 ++
>  include/linux/ftrace.h |2 +
>  include/linux/livepatch.h  |2 +
>  kernel/livepatch/core.c|   16 +++--
>  kernel/trace/ftrace.c  |   26 
>  12 files changed, 309 insertions(+), 5 deletions(-)
>  create mode 100644 arch/arm64/include/asm/livepatch.h
>  create mode 100644 arch/arm64/kernel/livepatch.c
> 


--
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Please read the FAQ at  http://www.tux.org/lkml/

[PATCH resend] kprobes: Update Documentation/kprobes.txt

2015-03-04 Thread Li Bin

The patch 125e564("Move Kconfig.instrumentation to arch/Kconfig and
init/Kconfig") had removed the "Instrumentation Support" menu,
and the configurations under this had be moved to "General setup".
Update Documentation/kprobes.txt to reflect this change.

Signed-off-by: Li Bin 
Acked-by: Masami Hiramatsu 
---
 Documentation/kprobes.txt |4 ++--
 1 files changed, 2 insertions(+), 2 deletions(-)

diff --git a/Documentation/kprobes.txt b/Documentation/kprobes.txt
index 1488b65..1f9b3e2 100644
--- a/Documentation/kprobes.txt
+++ b/Documentation/kprobes.txt
@@ -305,8 +305,8 @@ architectures:
 3. Configuring Kprobes
 
 When configuring the kernel using make menuconfig/xconfig/oldconfig,
-ensure that CONFIG_KPROBES is set to "y".  Under "Instrumentation
-Support", look for "Kprobes".
+ensure that CONFIG_KPROBES is set to "y". Under "General setup", look
+for "Kprobes".
 
 So that you can load and unload Kprobes-based instrumentation modules,
 make sure "Loadable module support" (CONFIG_MODULES) and "Module
-- 
1.7.1

--
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Please read the FAQ at  http://www.tux.org/lkml/

Re: [PATCH 2/2] livepatch: disable/enable_patch manners for interdependent patches

2015-01-22 Thread Li Bin

On 2015/1/22 21:05, Josh Poimboeuf wrote:
> On Thu, Jan 22, 2015 at 05:54:23PM +0800, Li Bin wrote:
>> On 2015/1/22 16:39, Li Bin wrote:
>>> On 2015/1/22 11:51, Josh Poimboeuf wrote:
>>>> On Thu, Jan 22, 2015 at 08:42:29AM +0800, Li Bin wrote:
>>>>> On 2015/1/21 22:08, Jiri Kosina wrote:
>>>>>> On Wed, 21 Jan 2015, Li Bin wrote:
>>>>>> By this you limit the definition of the patch inter-dependency to just 
>>>>>> symbols. But that's not the only way how patches can depend on it other 
>>>>>> -- 
>>>>>> the dependency can be semantical.
>>>>>
>>>>> Yes, I agree with you. But I think the other dependencies such as 
>>>>> semantical
>>>>> dependency should be judged by the user, like reverting a patch from git 
>>>>> repository.
>>>>> Right?
>>>>
>>>> But with live patching, there are two users: the patch creator (who
>>>> creates the patch module) and the end user (who loads it on their
>>>> system).
>>>>
>>>> We can assume the patch creator knows what he's doing, but the end user
>>>> doesn't always know or care about low level details like patch
>>>> dependencies.  The easiest and safest way to protect the end user is the
>>>> current approach, which assumes that each patch depends on all
>>>> previously applied patches.
>>>
>>> But then, the feature that disable patch dynamically is useless.
>>> For example, if user find a bug be introduced by the last patch and disable
>>> it directly, the new patch is no longer allowed from now unless enable the
>>> old patch firstly but there is a risk window by this way.
>>>
>>
>> Ok, in this case we can unregister the old patch firstly.
>> But it seems that the feature that enable/disable patch dynamically indeed
>> useless. (Its value is only for the last patch to enable or disable.)
> 
> I wouldn't say it's useless... It's just a patch stack.  If there's a
> bug at the bottom of the stack, you can either:
> 
> 1) push a new patch which does the opposite of the original patch
>(similar to how "git revert" adds a new commit);
> 
> 2) or pop everything off the stack and create a new stack to your
>liking.
> 
> It doesn't actually prevent you from doing what you want, it just makes
> it less convenient (and more safe IMO).
> 

I am not arguing the value of the patch stack manner, but the sysfs attribute
'enabled'.

> I suppose an alternative would be to allow the patch creator to specify
> patch dependencies (in addition to something like your patch to catch
> the obvious dependencies).  But dependencies are tricky and I'm not
> really convinced that would be worth the added risk and code complexity.

Yes, since we can not assume that end users know the low level details, but they
have permission to unregister the stacktop patch, and this action will affect 
the
subsequent patch without providing dependencies information.

> 


--
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Please read the FAQ at  http://www.tux.org/lkml/

Re: [PATCH 2/2] livepatch: disable/enable_patch manners for interdependent patches

2015-01-22 Thread Li Bin

On 2015/1/22 16:39, Li Bin wrote:
> On 2015/1/22 11:51, Josh Poimboeuf wrote:
>> On Thu, Jan 22, 2015 at 08:42:29AM +0800, Li Bin wrote:
>>> On 2015/1/21 22:08, Jiri Kosina wrote:
>>>> On Wed, 21 Jan 2015, Li Bin wrote:
>>>> By this you limit the definition of the patch inter-dependency to just 
>>>> symbols. But that's not the only way how patches can depend on it other -- 
>>>> the dependency can be semantical.
>>>
>>> Yes, I agree with you. But I think the other dependencies such as semantical
>>> dependency should be judged by the user, like reverting a patch from git 
>>> repository.
>>> Right?
>>
>> But with live patching, there are two users: the patch creator (who
>> creates the patch module) and the end user (who loads it on their
>> system).
>>
>> We can assume the patch creator knows what he's doing, but the end user
>> doesn't always know or care about low level details like patch
>> dependencies.  The easiest and safest way to protect the end user is the
>> current approach, which assumes that each patch depends on all
>> previously applied patches.
> 
> But then, the feature that disable patch dynamically is useless.
> For example, if user find a bug be introduced by the last patch and disable
> it directly, the new patch is no longer allowed from now unless enable the
> old patch firstly but there is a risk window by this way.
> 

Ok, in this case we can unregister the old patch firstly.
But it seems that the feature that enable/disable patch dynamically indeed
useless. (Its value is only for the last patch to enable or disable.)

>>
> 
> 
> --
> To unsubscribe from this list: send the line "unsubscribe live-patching" in
> the body of a message to majord...@vger.kernel.org
> More majordomo info at  http://vger.kernel.org/majordomo-info.html
> 
> .
> 


--
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Please read the FAQ at  http://www.tux.org/lkml/

Re: [PATCH 1/2] livepatch: Revert "livepatch: enforce patch stacking semantics"

2015-01-22 Thread Li Bin

On 2015/1/22 17:15, Miroslav Benes wrote:
> On Thu, 22 Jan 2015, Li Bin wrote:
> 
>> On 2015/1/21 17:07, Li Bin wrote:
>>> This reverts commit 83a90bb1345767f0cb96d242fd8b9db44b2b0e17.
>>>
>>> The method that only allowing the topmost patch on the stack to be
>>> enabled or disabled is unreasonable. Such as the following case:
>>>
>>> - do live patch1
>>> - disable patch1
>>> - do live patch2 //error
>>>
>>> Now, we will never be able to do new live patch unless disabing the
>>> patch1 although there is no dependencies.
>>>
>>
>> Correct the log:
>> ... unless disabling the patch1 although ... -->
>> ... unless enabling the patch1 firstly although ...
> 
> Yes, but in such situation you can unregister patch1 and proceed with new 
> live patch. No problem. As Jiri has already written. Or are we missing 
> something?
> 

Ok, that is before process with new live patch we must unregister the disabled
patch1 previously. Is there need some message to avoid confusing the user?

Thanks,
Li Bin

> Regards,
> --
> Miroslav Benes
> SUSE Labs
> --
> To unsubscribe from this list: send the line "unsubscribe live-patching" in
> the body of a message to majord...@vger.kernel.org
> More majordomo info at  http://vger.kernel.org/majordomo-info.html
> 
> .
> 


--
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Please read the FAQ at  http://www.tux.org/lkml/

Re: [PATCH 2/2] livepatch: disable/enable_patch manners for interdependent patches

2015-01-22 Thread Li Bin

On 2015/1/22 11:51, Josh Poimboeuf wrote:
> On Thu, Jan 22, 2015 at 08:42:29AM +0800, Li Bin wrote:
>> On 2015/1/21 22:08, Jiri Kosina wrote:
>>> On Wed, 21 Jan 2015, Li Bin wrote:
>>> By this you limit the definition of the patch inter-dependency to just 
>>> symbols. But that's not the only way how patches can depend on it other -- 
>>> the dependency can be semantical.
>>
>> Yes, I agree with you. But I think the other dependencies such as semantical
>> dependency should be judged by the user, like reverting a patch from git 
>> repository.
>> Right?
> 
> But with live patching, there are two users: the patch creator (who
> creates the patch module) and the end user (who loads it on their
> system).
> 
> We can assume the patch creator knows what he's doing, but the end user
> doesn't always know or care about low level details like patch
> dependencies.  The easiest and safest way to protect the end user is the
> current approach, which assumes that each patch depends on all
> previously applied patches.

But then, the feature that disable patch dynamically is useless.
For example, if user find a bug be introduced by the last patch and disable
it directly, the new patch is no longer allowed from now unless enable the
old patch firstly but there is a risk window by this way.

> 


--
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Please read the FAQ at  http://www.tux.org/lkml/

Re: [PATCH 1/2] livepatch: Revert "livepatch: enforce patch stacking semantics"

2015-01-21 Thread Li Bin

On 2015/1/21 17:07, Li Bin wrote:
> This reverts commit 83a90bb1345767f0cb96d242fd8b9db44b2b0e17.
> 
> The method that only allowing the topmost patch on the stack to be
> enabled or disabled is unreasonable. Such as the following case:
> 
>   - do live patch1
>   - disable patch1
>   - do live patch2 //error
> 
> Now, we will never be able to do new live patch unless disabing the
> patch1 although there is no dependencies.
> 

Correct the log:
... unless disabling the patch1 although ... -->
... unless enabling the patch1 firstly although ...

> Signed-off-by: Li Bin 
> ---
>  kernel/livepatch/core.c |   10 --
>  1 files changed, 0 insertions(+), 10 deletions(-)
> 
> diff --git a/kernel/livepatch/core.c b/kernel/livepatch/core.c
> index bc05d39..7861ed2 100644
> --- a/kernel/livepatch/core.c
> +++ b/kernel/livepatch/core.c
> @@ -468,11 +468,6 @@ static int __klp_disable_patch(struct klp_patch *patch)
>   struct klp_object *obj;
>   int ret;
>  
> - /* enforce stacking: only the last enabled patch can be disabled */
> - if (!list_is_last(&patch->list, &klp_patches) &&
> - list_next_entry(patch, list)->state == KLP_ENABLED)
> - return -EBUSY;
> -
>   pr_notice("disabling patch '%s'\n", patch->mod->name);
>  
>   for (obj = patch->objs; obj->funcs; obj++) {
> @@ -529,11 +524,6 @@ static int __klp_enable_patch(struct klp_patch *patch)
>   if (WARN_ON(patch->state != KLP_DISABLED))
>   return -EINVAL;
>  
> - /* enforce stacking: only the first disabled patch can be enabled */
> - if (patch->list.prev != &klp_patches &&
> - list_prev_entry(patch, list)->state == KLP_DISABLED)
> - return -EBUSY;
> -
>   pr_notice_once("tainting kernel with TAINT_LIVEPATCH\n");
>   add_taint(TAINT_LIVEPATCH, LOCKDEP_STILL_OK);
>  
> 


--
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Please read the FAQ at  http://www.tux.org/lkml/

Re: [PATCH 1/2] livepatch: Revert "livepatch: enforce patch stacking semantics"

2015-01-21 Thread Li Bin

On 2015/1/21 22:36, Seth Jennings wrote:
> On Wed, Jan 21, 2015 at 03:06:38PM +0100, Jiri Kosina wrote:
>> On Wed, 21 Jan 2015, Li Bin wrote:
>>
>>> This reverts commit 83a90bb1345767f0cb96d242fd8b9db44b2b0e17.
>>>
>>> The method that only allowing the topmost patch on the stack to be
>>> enabled or disabled is unreasonable. Such as the following case:
>>>
>>> - do live patch1
>>> - disable patch1
>>> - do live patch2 //error
>>>
>>> Now, we will never be able to do new live patch unless disabing the
>>> patch1 although there is no dependencies.
>>
>> Unregistering disabled patch still works and removes it from the list no 
>> matter the position.
>>
>> So what exactly is the problem?
> 
>>From a quick glance, it seems that what this set does is it only
> enforces the stacking requirements if two patches patch the same
> function.
> 

Yes, this patch is only concerning this case that 'multi patches patch
the same function' and solve the problem that mentioned previously:

foo_unpatched()
foo_patch1()
foo_patch2()
foo_patch3()
            disable(foo_patch2)
disable(foo_patch3)
foo_patch1()

foo_patch2 is not allowed to be disabled before disable foo_patch3.

Thanks,
Li Bin

> I'm not sure if that is correct logically or correctly implemented by
> these patches yet.
> 
> Seth
> 
>>
>> -- 
>> Jiri Kosina
>> SUSE Labs
> 
> .
> 


--
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Please read the FAQ at  http://www.tux.org/lkml/

Re: [PATCH 2/2] livepatch: disable/enable_patch manners for interdependent patches

2015-01-21 Thread Li Bin

On 2015/1/21 22:08, Jiri Kosina wrote:
> On Wed, 21 Jan 2015, Li Bin wrote:
> 
>> for disable_patch:
>> The patch is unallowed to be disabled if one patch after has
>> dependencies with it and has been enabled.
>>
>> for enable_patch:
>> The patch is unallowed to be enabled if one patch before has
>> dependencies with it and has been disabled.
>>
>> Signed-off-by: Li Bin 
>> ---
>>  kernel/livepatch/core.c |   60 
>> +++
>>  1 files changed, 60 insertions(+), 0 deletions(-)
>>
>> diff --git a/kernel/livepatch/core.c b/kernel/livepatch/core.c
>> index 7861ed2..a12a31c 100644
>> --- a/kernel/livepatch/core.c
>> +++ b/kernel/livepatch/core.c
>> @@ -114,6 +114,21 @@ static bool klp_is_patch_registered(struct klp_patch 
>> *patch)
>>  return false;
>>  }
>>  
>> +static bool klp_func_in_patch(struct klp_func *kfunc, struct klp_patch 
>> *patch)
>> +{
>> +struct klp_object *obj;
>> +struct klp_func *func;
>> +
>> +for (obj = patch->objs; obj->funcs; obj++) {
>> +for (func = obj->funcs; func->old_name; func++) {
>> +if (kfunc->old_addr == func->old_addr) {
>> +return true;
>> +}
>> +}
>> +}
>> +return false;
>> +}
>> +
>>  static bool klp_initialized(void)
>>  {
>>  return klp_root_kobj;
>> @@ -466,8 +481,31 @@ unregister:
>>  static int __klp_disable_patch(struct klp_patch *patch)
>>  {
>>  struct klp_object *obj;
>> +struct klp_patch *temp;
>> +struct klp_func *func;
>>  int ret;
>>  
>> +/*
>> + * the patch is unallowed to be disabled if one patch
>> + * after has dependencies with it and has been enabled.
>> + */
>> +for (temp = list_next_entry(patch, list);
>> +&temp->list != &klp_patches;
>> +temp = list_next_entry(temp, list)) {
>> +if (temp->state != KLP_ENABLED)
>> +continue;
>> +
>> +for (obj = patch->objs; obj->funcs; obj++) {
>> +for (func = obj->funcs; func->old_name; func++) {
>> +if (klp_func_in_patch(func, temp)) {
>> +pr_err("this patch depends on '%s', 
>> please disable it firstly\n",
>> +   temp->mod->name);
>> +return -EBUSY;
>> +}
>> +}
>> +}
>> +}
>> +
>>  pr_notice("disabling patch '%s'\n", patch->mod->name);
>>  
>>  for (obj = patch->objs; obj->funcs; obj++) {
>> @@ -519,11 +557,33 @@ EXPORT_SYMBOL_GPL(klp_disable_patch);
>>  static int __klp_enable_patch(struct klp_patch *patch)
>>  {
>>  struct klp_object *obj;
>> +struct klp_patch *temp;
>> +struct klp_func *func;
>>  int ret;
>>  
>>  if (WARN_ON(patch->state != KLP_DISABLED))
>>  return -EINVAL;
>>  
>> +/*
>> + * the patch is unallowed to be enabled if one patch
>> + * before has dependencies with it and has been disabled.
>> + */
>> +for (temp = list_first_entry(&klp_patches, struct klp_patch, list);
>> +temp != patch; temp = list_next_entry(temp, list)) {
>> +if (temp->state != KLP_DISABLED)
>> +continue;
>> +
>> +for (obj = patch->objs; obj->funcs; obj++) {
>> +for (func = obj->funcs; func->old_name; func++) {
>> +if (klp_func_in_patch(func, temp)) {
>> +pr_err("this patch depends on '%s', 
>> please enable it firstly\n",
>> +   temp->mod->name);
>> +return -EBUSY;
> 
> By this you limit the definition of the patch inter-dependency to just 
> symbols. But that's not the only way how patches can depend on it other -- 
> the dependency can be semantical.

Yes, I agree with you. But I think the other dependencies such as semantical
dependency should be judged by the user, like reverting a patch from git 
repository.
Right?

Thanks,
Li Bin

> 


--
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Please read the FAQ at  http://www.tux.org/lkml/

[PATCH 2/2] livepatch: disable/enable_patch manners for interdependent patches

2015-01-21 Thread Li Bin

for disable_patch:
The patch is unallowed to be disabled if one patch after has
dependencies with it and has been enabled.

for enable_patch:
The patch is unallowed to be enabled if one patch before has
dependencies with it and has been disabled.

Signed-off-by: Li Bin 
---
 kernel/livepatch/core.c |   60 +++
 1 files changed, 60 insertions(+), 0 deletions(-)

diff --git a/kernel/livepatch/core.c b/kernel/livepatch/core.c
index 7861ed2..a12a31c 100644
--- a/kernel/livepatch/core.c
+++ b/kernel/livepatch/core.c
@@ -114,6 +114,21 @@ static bool klp_is_patch_registered(struct klp_patch 
*patch)
return false;
 }
 
+static bool klp_func_in_patch(struct klp_func *kfunc, struct klp_patch *patch)
+{
+   struct klp_object *obj;
+   struct klp_func *func;
+
+   for (obj = patch->objs; obj->funcs; obj++) {
+   for (func = obj->funcs; func->old_name; func++) {
+   if (kfunc->old_addr == func->old_addr) {
+   return true;
+   }
+   }
+   }
+   return false;
+}
+
 static bool klp_initialized(void)
 {
return klp_root_kobj;
@@ -466,8 +481,31 @@ unregister:
 static int __klp_disable_patch(struct klp_patch *patch)
 {
struct klp_object *obj;
+   struct klp_patch *temp;
+   struct klp_func *func;
int ret;
 
+   /*
+* the patch is unallowed to be disabled if one patch
+* after has dependencies with it and has been enabled.
+*/
+   for (temp = list_next_entry(patch, list);
+   &temp->list != &klp_patches;
+   temp = list_next_entry(temp, list)) {
+   if (temp->state != KLP_ENABLED)
+   continue;
+
+   for (obj = patch->objs; obj->funcs; obj++) {
+   for (func = obj->funcs; func->old_name; func++) {
+   if (klp_func_in_patch(func, temp)) {
+   pr_err("this patch depends on '%s', 
please disable it firstly\n",
+  temp->mod->name);
+   return -EBUSY;
+   }
+   }
+   }
+   }
+
pr_notice("disabling patch '%s'\n", patch->mod->name);
 
for (obj = patch->objs; obj->funcs; obj++) {
@@ -519,11 +557,33 @@ EXPORT_SYMBOL_GPL(klp_disable_patch);
 static int __klp_enable_patch(struct klp_patch *patch)
 {
struct klp_object *obj;
+   struct klp_patch *temp;
+   struct klp_func *func;
int ret;
 
if (WARN_ON(patch->state != KLP_DISABLED))
return -EINVAL;
 
+   /*
+* the patch is unallowed to be enabled if one patch
+* before has dependencies with it and has been disabled.
+*/
+   for (temp = list_first_entry(&klp_patches, struct klp_patch, list);
+   temp != patch; temp = list_next_entry(temp, list)) {
+   if (temp->state != KLP_DISABLED)
+   continue;
+
+   for (obj = patch->objs; obj->funcs; obj++) {
+   for (func = obj->funcs; func->old_name; func++) {
+   if (klp_func_in_patch(func, temp)) {
+   pr_err("this patch depends on '%s', 
please enable it firstly\n",
+  temp->mod->name);
+   return -EBUSY;
+   }
+   }
+   }
+   }
+
pr_notice_once("tainting kernel with TAINT_LIVEPATCH\n");
add_taint(TAINT_LIVEPATCH, LOCKDEP_STILL_OK);
 
-- 
1.7.1

--
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Please read the FAQ at  http://www.tux.org/lkml/

[PATCH 1/2] livepatch: Revert "livepatch: enforce patch stacking semantics"

2015-01-21 Thread Li Bin

This reverts commit 83a90bb1345767f0cb96d242fd8b9db44b2b0e17.

The method that only allowing the topmost patch on the stack to be
enabled or disabled is unreasonable. Such as the following case:

- do live patch1
- disable patch1
- do live patch2 //error

Now, we will never be able to do new live patch unless disabing the
patch1 although there is no dependencies.

Signed-off-by: Li Bin 
---
 kernel/livepatch/core.c |   10 --
 1 files changed, 0 insertions(+), 10 deletions(-)

diff --git a/kernel/livepatch/core.c b/kernel/livepatch/core.c
index bc05d39..7861ed2 100644
--- a/kernel/livepatch/core.c
+++ b/kernel/livepatch/core.c
@@ -468,11 +468,6 @@ static int __klp_disable_patch(struct klp_patch *patch)
struct klp_object *obj;
int ret;
 
-   /* enforce stacking: only the last enabled patch can be disabled */
-   if (!list_is_last(&patch->list, &klp_patches) &&
-   list_next_entry(patch, list)->state == KLP_ENABLED)
-   return -EBUSY;
-
pr_notice("disabling patch '%s'\n", patch->mod->name);
 
for (obj = patch->objs; obj->funcs; obj++) {
@@ -529,11 +524,6 @@ static int __klp_enable_patch(struct klp_patch *patch)
if (WARN_ON(patch->state != KLP_DISABLED))
return -EINVAL;
 
-   /* enforce stacking: only the first disabled patch can be enabled */
-   if (patch->list.prev != &klp_patches &&
-   list_prev_entry(patch, list)->state == KLP_DISABLED)
-   return -EBUSY;
-
pr_notice_once("tainting kernel with TAINT_LIVEPATCH\n");
add_taint(TAINT_LIVEPATCH, LOCKDEP_STILL_OK);
 
-- 
1.7.1

--
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Please read the FAQ at  http://www.tux.org/lkml/

[PATCH 0/2] disable/enable_patch manners for interdependent patches

2015-01-21 Thread Li Bin

Revert commit 83a90bb1345767f0cb96d242fd8b9db44b2b0e17
and introduce disable/enable_patch manners for interdependent patches

for disable_patch:
The patch is unallowed to be disabled if one patch after has
dependencies with it and has been enabled.

for enable_patch:
The patch is unallowed to be enabled if one patch before has
dependencies with it and has been disabled.
Li Bin (2):
  livepatch: Revert "livepatch: enforce patch stacking semantics"
  livepatch: disable/enable_patch manners for interdependent patches

 kernel/livepatch/core.c |   66 +-
 1 files changed, 58 insertions(+), 8 deletions(-)

--
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Please read the FAQ at  http://www.tux.org/lkml/

Re: sched: spinlock recursion in sched_rr_get_interval

2014-12-27 Thread Li Bin

On 2014/12/26 15:01, Sasha Levin wrote:
> On 12/26/2014 01:45 AM, Li Bin wrote:
>> On 2014/7/8 4:05, Peter Zijlstra wrote:
>>>> On Mon, Jul 07, 2014 at 09:55:43AM -0400, Sasha Levin wrote:
>>>>>> I've also had this one, which looks similar:
>>>>>>
>>>>>> [10375.005884] BUG: spinlock recursion on CPU#0, modprobe/10965
>>>>>> [10375.006573]  lock: 0x8803a0fd7740, .magic: dead4ead, .owner: 
>>>>>> modprobe/10965, .owner_cpu: 15
>>>>>> [10375.007412] CPU: 0 PID: 10965 Comm: modprobe Tainted: GW  
>>>>>> 3.16.0-rc3-next-20140704-sasha-00023-g26c0906-dirty #765
>>>>
>>>> Something's fucked; so we have:
>>>>
>>>> debug_spin_lock_before()
>>>>SPIN_BUG_ON(lock->owner == current, "recursion");
>>>>
>> Hello，
>> Does ACCESS_ONCE() can help this issue? I have no evidence that its lack is
>> responsible for the issue, but I think here need it indeed. Is that right?
>>
>> SPIN_BUG_ON(ACCESS_ONCE(lock->owner) == current, "recursion");
> 
> Could you explain a bit more why do you think it's needed?
> 

Oh, just adding ACCESS_ONCE may be not enough, and i think lacking lock 
protection
for reading lock->owner is a risk. In short, the reason of the issue is more 
like
the spinlock debug mechanism, rather than a real spinlock recursion.

...
//under no lock protection
if (lock->owner == current) //access lock->owner
|-spin_dump(lock, "recursion");
|-if (lock->owner && lock->owner != SPINLOCK_OWNER_INIT) //access 
lock->owner again
owner = lock->owner;
... 

Right, or am I missing something?
Thanks,
Li Bin

> 
> Thanks,
> Sasha
> 
> 


--
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Please read the FAQ at  http://www.tux.org/lkml/

Re: sched: spinlock recursion in sched_rr_get_interval

2014-12-25 Thread Li Bin

On 2014/7/8 4:05, Peter Zijlstra wrote:
> On Mon, Jul 07, 2014 at 09:55:43AM -0400, Sasha Levin wrote:
>> I've also had this one, which looks similar:
>>
>> [10375.005884] BUG: spinlock recursion on CPU#0, modprobe/10965
>> [10375.006573]  lock: 0x8803a0fd7740, .magic: dead4ead, .owner: 
>> modprobe/10965, .owner_cpu: 15
>> [10375.007412] CPU: 0 PID: 10965 Comm: modprobe Tainted: GW  
>> 3.16.0-rc3-next-20140704-sasha-00023-g26c0906-dirty #765
> 
> Something's fucked; so we have:
> 
> debug_spin_lock_before()
>   SPIN_BUG_ON(lock->owner == current, "recursion");
> 

Hello，
Does ACCESS_ONCE() can help this issue? I have no evidence that its lack is
responsible for the issue, but I think here need it indeed. Is that right?

SPIN_BUG_ON(ACCESS_ONCE(lock->owner) == current, "recursion");

Thanks,
Li Bin

> Causing that, _HOWEVER_ look at .owner_cpu and the reporting cpu!! How
> can the lock owner, own the lock on cpu 15 and again contend with it on
> CPU 0. That's impossible.
> 
> About when-ish did you start seeing things like this? Lemme go stare
> hard at recent changes.
> 


--
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Please read the FAQ at  http://www.tux.org/lkml/

Re: [kpatch] [PATCH] livepatch v7: move x86 specific ftrace handler code to arch/x86

2014-12-18 Thread Li Bin


Sorry! Bad format, please ignore this patch.

On 2014/12/19 13:37, Li Bin wrote:
> The execution flow redirection related implemention in the livepatch
> ftrace handler is depended on the specific architecture. This patch
> introduces klp_arch_set_pc(like kgdb_arch_set_pc) interface to change
> the pt_regs.
> 
> Signed-off-by: Li Bin 


--
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Please read the FAQ at  http://www.tux.org/lkml/

[kpatch] [PATCH] livepatch v7: move x86 specific ftrace handler code to arch/x86

2014-12-18 Thread Li Bin

The execution flow redirection related implemention in the livepatch
ftrace handler is depended on the specific architecture. This patch
introduces klp_arch_set_pc(like kgdb_arch_set_pc) interface to change
the pt_regs.

Signed-off-by: Li Bin 
---
 arch/x86/include/asm/livepatch.h |5 +
 kernel/livepatch/core.c  |2 +-
 2 files changed, 6 insertions(+), 1 deletions(-)

diff --git a/arch/x86/include/asm/livepatch.h b/arch/x86/include/asm/livepatch.h
index c2ae592..4cdec4e 100644
--- a/arch/x86/include/asm/livepatch.h
+++ b/arch/x86/include/asm/livepatch.h
@@ -21,6 +21,7 @@
 #define _ASM_X86_LIVEPATCH_H

 #include 
+#include 

 #ifdef CONFIG_LIVE_PATCHING
 #ifndef CC_USING_FENTRY
@@ -29,6 +30,10 @@
 extern int klp_write_module_reloc(struct module *mod, unsigned long type,
  unsigned long loc, unsigned long value);

+static inline void klp_arch_set_pc(struct pt_regs *regs, unsigned long ip)
+{
+   regs->ip = ip;
+}
 #else
 #error Live patching support is disabled; check CONFIG_LIVE_PATCHING
 #endif
diff --git a/kernel/livepatch/core.c b/kernel/livepatch/core.c
index 0004a71..c4c04fd 100644
--- a/kernel/livepatch/core.c
+++ b/kernel/livepatch/core.c
@@ -271,7 +271,7 @@ static void notrace klp_ftrace_handler(unsigned long ip,
 {
struct klp_func *func = ops->private;

-   regs->ip = (unsigned long)func->new_func;
+   klp_arch_set_pc(regs, (unsigned long)func->new_func);
 }

 static int klp_disable_func(struct klp_func *func)
-- 
1.7.1


--
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Please read the FAQ at  http://www.tux.org/lkml/

[PATCH] ftrace: fix typo in comment

2014-12-18 Thread Li Bin

%s/ARCH_SUPPORT_FTARCE_OPS/ARCH_SUPPORTS_FTRACE_OPS/

Signed-off-by: Li Bin 
---
 kernel/trace/ftrace.c |2 +-
 1 files changed, 1 insertions(+), 1 deletions(-)

diff --git a/kernel/trace/ftrace.c b/kernel/trace/ftrace.c
index 929a733..9473b24 100644
--- a/kernel/trace/ftrace.c
+++ b/kernel/trace/ftrace.c
@@ -5102,7 +5102,7 @@ out:
  * being NULL, or CONFIG_DYNAMIC_FTRACE_WITH_REGS.
  * Note, CONFIG_DYNAMIC_FTRACE_WITH_REGS expects a full regs to be saved.
  * An architecture can pass partial regs with ftrace_ops and still
- * set the ARCH_SUPPORT_FTARCE_OPS.
+ * set the ARCH_SUPPORTS_FTRACE_OPS.
  */
 #if ARCH_SUPPORTS_FTRACE_OPS
 static void ftrace_ops_list_func(unsigned long ip, unsigned long parent_ip,
-- 
1.7.1

--
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Please read the FAQ at  http://www.tux.org/lkml/

[kpatch] [PATCH] livepatch v7: move x86 specific ftrace handler code to arch/x86

2014-12-18 Thread Li Bin

The execution flow redirection related implemention in the livepatch
ftrace handler is depended on the specific architecture. This patch
introduces klp_arch_set_pc(like kgdb_arch_set_pc) interface to change
the pt_regs.

Signed-off-by: Li Bin 
---
 arch/x86/include/asm/livepatch.h |5 +
 kernel/livepatch/core.c  |2 +-
 2 files changed, 6 insertions(+), 1 deletions(-)

diff --git a/arch/x86/include/asm/livepatch.h b/arch/x86/include/asm/livepatch.h
index c2ae592..4cdec4e 100644
--- a/arch/x86/include/asm/livepatch.h
+++ b/arch/x86/include/asm/livepatch.h
@@ -21,6 +21,7 @@
 #define _ASM_X86_LIVEPATCH_H

 #include 
+#include 

 #ifdef CONFIG_LIVE_PATCHING
 #ifndef CC_USING_FENTRY
@@ -29,6 +30,10 @@
 extern int klp_write_module_reloc(struct module *mod, unsigned long type,
  unsigned long loc, unsigned long value);

+static inline void klp_arch_set_pc(struct pt_regs *regs, unsigned long ip)
+{
+   regs->ip = ip;
+}
 #else
 #error Live patching support is disabled; check CONFIG_LIVE_PATCHING
 #endif
diff --git a/kernel/livepatch/core.c b/kernel/livepatch/core.c
index 0004a71..c4c04fd 100644
--- a/kernel/livepatch/core.c
+++ b/kernel/livepatch/core.c
@@ -271,7 +271,7 @@ static void notrace klp_ftrace_handler(unsigned long ip,
 {
struct klp_func *func = ops->private;

-   regs->ip = (unsigned long)func->new_func;
+   klp_arch_set_pc(regs, (unsigned long)func->new_func);
 }

 static int klp_disable_func(struct klp_func *func)
-- 
1.7.1


--
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Please read the FAQ at  http://www.tux.org/lkml/

[PATCH] kprobes: Update Documentation/kprobes.txt

2014-09-24 Thread Li Bin

The patch 125e564("Move Kconfig.instrumentation to arch/Kconfig and
init/Kconfig") had removed the "Instrumentation Support" menu,
and the configurations under this had be moved to "General setup".
Update Documentation/kprobes.txt to reflect this change.

Signed-off-by: Li Bin 
---
 Documentation/kprobes.txt |4 ++--
 1 files changed, 2 insertions(+), 2 deletions(-)

diff --git a/Documentation/kprobes.txt b/Documentation/kprobes.txt
index 4bbeca8..ca4a927 100644
--- a/Documentation/kprobes.txt
+++ b/Documentation/kprobes.txt
@@ -304,8 +304,8 @@ architectures:
 3. Configuring Kprobes
 
 When configuring the kernel using make menuconfig/xconfig/oldconfig,
-ensure that CONFIG_KPROBES is set to "y".  Under "Instrumentation
-Support", look for "Kprobes".
+ensure that CONFIG_KPROBES is set to "y". Under "General setup", look
+for "Kprobes".
 
 So that you can load and unload Kprobes-based instrumentation modules,
 make sure "Loadable module support" (CONFIG_MODULES) and "Module
-- 
1.7.1

--
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Please read the FAQ at  http://www.tux.org/lkml/

Re: Enhancement for PLE handler in KVM

2014-03-12 Thread Li, Bin (Bin)


Thanks, Paolo for the comments

Understand the requirement to "fix it for all guest OSes".

I will investigate the "new hypercall KVM_HC_HALT_AND_YIELD_TO_CPU that 
takes an APIC id, donates the quantum to that CPU, and puts the 
originating CPU in halted state. "


Regards
Bin

--
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Please read the FAQ at  http://www.tux.org/lkml/

RE: Enhancement for PLE handler in KVM

2014-03-07 Thread Li, Bin (Bin)

Fully agree.
It will be a very helpful feature to make ple setting per VM. 
This feature will provide more flexible control to the VM user. All KVM user 
will love to have it.

The enhancement we proposed is neither overlapping nor conflicting with this 
feature. The enhancement is targeting to provide the best real time performance 
to the guest OS.
There will be more and more embedded system migrating to KVM. Especially in the 
telecom industry. And a lot of existing system will be running on top of 
customized embedded OS which significant different from generic OS (either 
linux  or windows ).

Is there any concern regarding to the enhancement we need to address? Or more 
work need to be done?

Regards
Bin

-Original Message-
From: Marcelo Tosatti [mailto:mtosa...@redhat.com] 
Sent: Friday, March 07, 2014 12:42 PM
To: Li, Bin (Bin)
Cc: Paolo Bonzini; k...@vger.kernel.org; Jatania, Neel (Neel); 
linux-kernel@vger.kernel.org
Subject: Re: Enhancement for PLE handler in KVM

On Fri, Mar 07, 2014 at 02:26:19PM +, Li, Bin (Bin) wrote:
> Can we have "per-VM PLE values"?
> 
> My understanding is that the ple values are kvm module setting which applying 
> to all VMs in the system.
> And all vms must be stopped first, then unload kvm-intel, reload kvm-intel 
> with new ple setting.
> 
> /sbin/modprobe -r kvm-intel
> /sbin/modprobe kvm-intel ple_window=16384
> 
> Regards
> Bin

Yes, but it can be made per-VM (its a VMCS field).

--
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Please read the FAQ at  http://www.tux.org/lkml/

RE: Enhancement for PLE handler in KVM

2014-03-07 Thread Li, Bin (Bin)

Can we have "per-VM PLE values"?

My understanding is that the ple values are kvm module setting which applying 
to all VMs in the system.
And all vms must be stopped first, then unload kvm-intel, reload kvm-intel with 
new ple setting.

/sbin/modprobe -r kvm-intel
/sbin/modprobe kvm-intel ple_window=16384

Regards
Bin

-Original Message-
From: Marcelo Tosatti [mailto:mtosa...@redhat.com] 
Sent: Thursday, March 06, 2014 10:07 PM
To: Li, Bin (Bin)
Cc: Paolo Bonzini; k...@vger.kernel.org; Jatania, Neel (Neel); 
linux-kernel@vger.kernel.org; Peter Zijlstra; Mike Galbraith; Chris Wright; 
ttr...@redhat.com; Nakajima, Jun; r...@redhat.com
Subject: Re: Enhancement for PLE handler in KVM

On Wed, Mar 05, 2014 at 09:16:45PM +0000, Li, Bin (Bin) wrote:
> Did you also find out here why this is the case?
> 
> Binl:
> Yes. The application running in our customized embedded OS is also real time 
> application which is timing sensitive. 
> The timing sensitive level varies among the applications.
> When I talking about different threshold, the assumption is using default 
> 4096 ple_window setting.
> If I set up the ple_window to 16384 or higher, there will be no problem for 
> our application. But the finial user could also run linux or window VM on 
> same hypervisor which would prefer default 4096 setting for linux and windows.

Then have per-VM PLE values?

> We are looking for a solution to be good for both linux / window and real 
> time application.
> The enhancement patch we proposed will satisfy both linux/window application 
> and real time embedded applications.
> 
> Paolo


--
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Please read the FAQ at  http://www.tux.org/lkml/

RE: Enhancement for PLE handler in KVM

2014-03-05 Thread Li, Bin (Bin)

Thanks for the quick response.

Inline the comments and also added a typical log.

Regards
Bin
  

-Original Message-
From: Paolo Bonzini [mailto:pbonz...@redhat.com] 
Sent: Wednesday, March 05, 2014 9:49 AM
To: Li, Bin (Bin); k...@vger.kernel.org
Cc: Jatania, Neel (Neel); linux-kernel@vger.kernel.org; Srivatsa Vaddagiri; 
Peter Zijlstra; Mike Galbraith; Chris Wright; ttr...@redhat.com; Nakajima, Jun; 
r...@redhat.com
Subject: Re: Enhancement for PLE handler in KVM

Il 05/03/2014 15:17, Li, Bin (Bin) ha scritto:
> Hello, Paolo,
>
> We are using a customized embedded SMP OS as guest OS. It is not meaningful 
> to post the guest OS code.
> Also, there is no "performance numbers for common workloads" since there is 
> no common workloads to compare with.
> In our OS, there is still a big kernel lock to protect the kernel.

Does this means that average spinning time for the spinlock is relatively high 
compared to Linux or Windows?

Binl:

Yes. The default setting for ple_window is 4096 which based on linux and 
window. In our guest OS, the ple_window need to be at least 16384 in order to 
prevent large jitter caused system reset. With setting ple_window to 16384, the 
biggest jitter in guest OS is about 24-25ms.


> - when the in-correct boosting happens, the vCPU in spin lock will run 
> longer time on the pCPU and causing the lock holder vCPU having less 
> time to run on pCPU since they are sharing the on same pCPU.

Correct.  This is an unfortunate problem in the current implementation of PLE.

> Adding hyper call in every kernel enter and kernel exist is expensive. 
> From the trace log collect from i7 running @ 3.0GHz , the cost per  
> hyper is <1us.

Right, it is around 1500 cycles and 0.4-0.5 us, i.e. approximately 1 us for 
enter and exit together.

This is not too bad for a kernel with a big lock, but not acceptable if you do 
not have it (as is the case for Linux and Windows).

> Regarding to the " paravirtual ticketlock ", we did try the same idea in our 
> embedded guest OS.
> We got following results:
>
> a) We implemented similar approach like linux "paravirtual 
> ticketlock". The system clock jitter does get reduced a lot. But, the 
> system clock jitter is still happening at lower rate. In a few hours 
> system stress test, we still see the big jitter few times.

Did you find out why?  It could happen if the virtual CPU is scheduled out for 
a relatively long time.  A small number of spinning iterations can then account 
for a relatively large time.

My impression is that you're implementing a paravirtual spinlock, except that 
you're relying on PLE to decide when to go to sleep.  PLE is implemented using 
the TSC.  Can you assume the host TSC is of good quality?  If so, perhaps you 
can try to modify the pv ticketlock algorithm, and use a threshold based on TSC 
instead of an iteration count?

BinL:
Our "paravirtual ticket lock" is done in following way, 
 - when a vCPU is doing spin loop to get into kernel state, it will set up a 
counter to track how many time in the tight loop.
If the counter reach certain threshold, the vCPU will add a request to the lock 
holder work item pipe ( multiple producer, single consumer pipe ) and the vCPU 
will run "hlt" instruction, waiting for the lock holder to wake it up.
 - TSC is not used in this case.
 - When lock holder sees the request, it will send an IPI interrupt to wake up 
the vCPU which is hlt-ed.
 - With this implementation, we are able to get the system stress test ( about 
hours run with traffic ) completing successfully most of time. But not all the 
time. The big jitter still happening during the run, just less frequently. The 
root cause of the problem is still there.

 The lock holder vCPU is schedule out for a relatively long time. We did more 
investigation for why.
  a) From the trace-cmd log, we believe the root cause is PLE handler 
incorrectly boost the spin loop vCPU which being stack with the lock holder 
vCPU on same pCPU. 
 The sample log we captured is, lock holder vCPU and the vCPU doing spin 
loop, both running on a pCPU. The vCPU in spin loop being incorrectly boosted 
by ple handler. And linux scheduler could toggling two vCPU on the pCPU at vert 
high rate. The worst case is, the lock is happen to stay between the two vCPUs 
on the same pCPU. Then we will see lock holder vCPU is scheduled in doing a 
little bit real work and the other spin loop vCPU being scheduled in doing spin 
loop. And the context switch on the pCPU happens very often. ( see more details 
of typical trace below)

 This causing the real cpu power used for processing real work becomes much 
less than normal. ( a short period truly over load on this pCPU). And the guest 
OS stays in a very slow kernel work stage because the lock holder vCPU having 
much less time to do its real work. The system will get out of

1 2 >

1 - 100 of 104 matches

Mail list logo