[PATCH 05/15] bpf: Export a map-clearing function

2015-03-02 Thread Tom Zanussi
Add a new map_clear() function to bpf_map_ops along with a
tracing_map_clear() export for external users.

Map implementations that it makes sense for should implement it,
otherwise it's not required.  The bpf hashtab implementation does
implement a clear operation, but since it doesn't make sense for bpf
arraymaps, the arraymap implementation doesn't.

Signed-off-by: Tom Zanussi 
---
 include/linux/bpf.h  |  2 ++
 kernel/bpf/hashtab.c |  8 
 kernel/bpf/syscall.c | 17 +
 3 files changed, 27 insertions(+)

diff --git a/include/linux/bpf.h b/include/linux/bpf.h
index 900405bf..f7f95d7 100644
--- a/include/linux/bpf.h
+++ b/include/linux/bpf.h
@@ -18,6 +18,7 @@ struct bpf_map_ops {
/* funcs callable from userspace (via syscall) */
struct bpf_map *(*map_alloc)(union bpf_attr *attr);
void (*map_free)(struct bpf_map *);
+   void (*map_clear)(struct bpf_map *);
int (*map_get_next_key)(struct bpf_map *map, void *key, void *next_key);
 
/* funcs callable from userspace and from eBPF programs */
@@ -144,6 +145,7 @@ extern struct bpf_func_proto bpf_map_delete_elem_proto;
 
 struct bpf_map *tracing_map_create(union bpf_attr *attr);
 void tracing_map_destroy(struct bpf_map *map);
+void tracing_map_clear(struct bpf_map *map);
 int tracing_map_update_elem(struct bpf_map *map, void *key, void *value,
union bpf_attr *attr);
 int tracing_map_lookup_elem(struct bpf_map *map, void *key, void *uvalue);
diff --git a/kernel/bpf/hashtab.c b/kernel/bpf/hashtab.c
index b3ba436..addf3a8 100644
--- a/kernel/bpf/hashtab.c
+++ b/kernel/bpf/hashtab.c
@@ -325,6 +325,13 @@ static void delete_all_elements(struct bpf_htab *htab)
}
 }
 
+static void htab_map_clear(struct bpf_map *map)
+{
+   struct bpf_htab *htab = container_of(map, struct bpf_htab, map);
+
+   delete_all_elements(htab);
+}
+
 /* Called when map->refcnt goes to zero, either from workqueue or from syscall 
*/
 static void htab_map_free(struct bpf_map *map)
 {
@@ -348,6 +355,7 @@ static void htab_map_free(struct bpf_map *map)
 static struct bpf_map_ops htab_ops = {
.map_alloc = htab_map_alloc,
.map_free = htab_map_free,
+   .map_clear = htab_map_clear,
.map_get_next_key = htab_map_get_next_key,
.map_lookup_elem = htab_map_lookup_elem,
.map_update_elem = htab_map_update_elem,
diff --git a/kernel/bpf/syscall.c b/kernel/bpf/syscall.c
index cac8df6..0f28904 100644
--- a/kernel/bpf/syscall.c
+++ b/kernel/bpf/syscall.c
@@ -362,6 +362,23 @@ int tracing_map_delete_elem(struct bpf_map *map, void *key)
 }
 EXPORT_SYMBOL_GPL(tracing_map_delete_elem);
 
+/**
+ * tracing_map_clear - Clear a bpf_map
+ * @map: The bpf_map to clear
+ *
+ * Clear the bpf_map.
+ *
+ * Return: nothing, map clearing always succeeds
+ */
+void tracing_map_clear(struct bpf_map *map)
+{
+   rcu_read_lock();
+   if (map->ops->map_clear)
+   map->ops->map_clear(map);
+   rcu_read_unlock();
+}
+EXPORT_SYMBOL_GPL(tracing_map_clear);
+
 #define BPF_MAP_DELETE_ELEM_LAST_FIELD key
 
 static int map_delete_elem(union bpf_attr *attr)
-- 
1.9.3

--
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Please read the FAQ at  http://www.tux.org/lkml/


[PATCH] spi: img-spfi: Verify max spfi transfer length

2015-03-02 Thread Sifan Naeem
Maximum transfer length supported by SPFI is 65535, this is limited
by the number of bits available in SPFI TSize register to represent
the transfer size.
For transfer requests larger than the maximum supported the driver
will return an invalid argument error.

Signed-off-by: Sifan Naeem 
Reviewed-by: Andrew Bresticker 
---
 drivers/spi/spi-img-spfi.c |7 +++
 1 file changed, 7 insertions(+)

diff --git a/drivers/spi/spi-img-spfi.c b/drivers/spi/spi-img-spfi.c
index c01567d..e649bc7 100644
--- a/drivers/spi/spi-img-spfi.c
+++ b/drivers/spi/spi-img-spfi.c
@@ -459,6 +459,13 @@ static int img_spfi_transfer_one(struct spi_master *master,
unsigned long flags;
int ret;
 
+   if (xfer->len > SPFI_TRANSACTION_TSIZE_MASK) {
+   dev_err(spfi->dev,
+   "Transfer length (%d) is greater than the max supported 
(%d)",
+   xfer->len, SPFI_TRANSACTION_TSIZE_MASK);
+   return -EINVAL;
+   }
+
/*
 * Stop all DMA and reset the controller if the previous transaction
 * timed-out and never completed it's DMA.
-- 
1.7.9.5

--
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Please read the FAQ at  http://www.tux.org/lkml/


[PATCH 13/15] tracing: Add sorting to hist triggers

2015-03-02 Thread Tom Zanussi
Add support for sorting to hist triggers, which without this will
display entries in hash order, which is to say randomly.

Currently, support is implemented for just a primary sort key which is
by default ascending.

To specify the sort key for a trigger, append ':sort=',
where sort_key can be any value specified in the values= param, or the
special value 'hitcount', which is a sum of the number of times each
entry in the hashtable was hit.

With these changes, even if the user doesn't explicitly specify a sort
key, the table will be sorted ascending on hitcount.  To sort in
descending order, append the .descending modifier to the sort field,
as such:

  ':sort=.descending'

Signed-off-by: Tom Zanussi 
---
 kernel/trace/trace.c|   2 +-
 kernel/trace/trace_events_trigger.c | 279 +++-
 2 files changed, 278 insertions(+), 3 deletions(-)

diff --git a/kernel/trace/trace.c b/kernel/trace/trace.c
index 683048f..2fa41ef 100644
--- a/kernel/trace/trace.c
+++ b/kernel/trace/trace.c
@@ -3777,7 +3777,7 @@ static const char readme_msg[] =
"\tThe 'sort' param can be used to specify a value field to sort\n"
"\ton.  The default if unspecified is 'hitcount' and the.\n"
"\tdefault sort order is 'ascending'.  To sort in the opposite\n"
-   "\tdirection, append .descending' to the sort key.\n"
+   "\tdirection, append .descending' to the sort key.\n\n"
"\tThe 'pause' param can be used to pause an existing hist\n"
"\ttrigger or to start a hist trigger but not log any events\n"
"\tuntil told to do so.  'continue' can be used to start or\n"
diff --git a/kernel/trace/trace_events_trigger.c 
b/kernel/trace/trace_events_trigger.c
index 80805f3..ae75528 100644
--- a/kernel/trace/trace_events_trigger.c
+++ b/kernel/trace/trace_events_trigger.c
@@ -23,6 +23,7 @@
 #include 
 #include 
 #include 
+#include 
 
 #include 
 
@@ -1479,6 +1480,7 @@ DEFINE_HIST_FIELD_FN(u8);
 #define HIST_TRIGGER_BITS_MAX  17
 #define HIST_TRIGGER_BITS_MIN  7
 #define HIST_VALS_MAX  8
+#define HIST_SORT_KEYS_MAX 2
 
 #define HIST_KEY_STRING_MAX64
 
@@ -1491,9 +1493,16 @@ enum hist_field_flags {
HIST_FIELD_SYSCALL  = 32,
 };
 
+struct hist_trigger_sort_key {
+   booluse_hitcount;
+   booldescending;
+   unsigned intidx;
+};
+
 struct hist_trigger_attrs {
char*keys_str;
char*vals_str;
+   char*sort_keys_str;
boolpause;
boolcont;
boolclear;
@@ -1509,6 +1518,8 @@ struct hist_trigger_data {
struct ftrace_event_file*event_file;
atomic64_t  drops;
struct hist_trigger_attrs   *attrs;
+   struct hist_trigger_sort_key*sort_keys[HIST_SORT_KEYS_MAX];
+   struct hist_trigger_sort_key*sort_key_cur;
unsigned intmap_bits;
struct bpf_map  *map;
union bpf_attr  map_attr;
@@ -1521,6 +1532,11 @@ struct hist_trigger_entry {
char*comm;
 };
 
+struct hist_trigger_sort_entry {
+   void*key;
+   struct hist_trigger_entry   *entry;
+};
+
 #define HIST_STACKTRACE_DEPTH 16
 #define HIST_STACKTRACE_SKIP 5
 
@@ -1671,6 +1687,106 @@ static void destroy_hist_fields(struct 
hist_trigger_data *hist_data)
}
 }
 
+static inline struct hist_trigger_sort_key *create_default_sort_key(void)
+{
+   struct hist_trigger_sort_key *sort_key;
+
+   sort_key = kzalloc(sizeof(*sort_key), GFP_KERNEL);
+   if (!sort_key)
+   return ERR_PTR(-ENOMEM);
+
+   sort_key->use_hitcount = true;
+
+   return sort_key;
+}
+
+static inline struct hist_trigger_sort_key *
+create_sort_key(char *field_name, struct hist_trigger_data *hist_data)
+{
+   struct hist_trigger_sort_key *sort_key;
+   unsigned int i;
+
+   if (!strcmp(field_name, "hitcount"))
+   return create_default_sort_key();
+
+   for (i = 0; i < hist_data->n_vals; i++)
+   if (!strcmp(field_name, hist_data->vals[i]->field->name))
+   goto out;
+
+   return ERR_PTR(-EINVAL);
+ out:
+   sort_key = kzalloc(sizeof(*sort_key), GFP_KERNEL);
+   if (!sort_key)
+   return ERR_PTR(-ENOMEM);
+
+   sort_key->idx = i;
+
+   return sort_key;
+}
+
+static int create_sort_keys(struct hist_trigger_data *hist_data)
+{
+   char *fields_str = hist_data->attrs->sort_keys_str;
+   struct hist_trigger_sort_key *sort_key;
+   char *field_str, *field_name;
+   unsigned int i;
+   int ret = 0;
+
+   if (!fields_str) {
+   sort_key = create_default_sort_key();
+   if (IS_ERR(sort_key)) {
+   ret = PTR_ERR(sort_key);
+   goto out;
+ 

[PATCH 11/15] tracing: Add a per-event-trigger 'paused' field

2015-03-02 Thread Tom Zanussi
Add a simple per-trigger 'paused' flag, allowing individual triggers
to pause.  We could leave it to individual triggers that need this
functionality to do it themselves, but we also want to allow other
events to control pausing, so add it to the trigger data.

Signed-off-by: Tom Zanussi 
---
 kernel/trace/trace.h| 1 +
 kernel/trace/trace_events_trigger.c | 4 
 2 files changed, 5 insertions(+)

diff --git a/kernel/trace/trace.h b/kernel/trace/trace.h
index 3d38c2e..5bc1752 100644
--- a/kernel/trace/trace.h
+++ b/kernel/trace/trace.h
@@ -1110,6 +1110,7 @@ struct event_trigger_data {
struct event_filter __rcu   *filter;
char*filter_str;
void*private_data;
+   boolpaused;
struct list_headlist;
 };
 
diff --git a/kernel/trace/trace_events_trigger.c 
b/kernel/trace/trace_events_trigger.c
index a8dfd4e..010ce30 100644
--- a/kernel/trace/trace_events_trigger.c
+++ b/kernel/trace/trace_events_trigger.c
@@ -73,6 +73,8 @@ event_triggers_call(struct ftrace_event_file *file, void *rec)
return tt;
 
list_for_each_entry_rcu(data, >triggers, list) {
+   if (data->paused)
+   continue;
if (!rec) {
data->ops->func(data, rec);
continue;
@@ -110,6 +112,8 @@ event_triggers_post_call(struct ftrace_event_file *file,
struct event_trigger_data *data;
 
list_for_each_entry_rcu(data, >triggers, list) {
+   if (data->paused)
+   continue;
if (data->cmd_ops->trigger_type & tt)
data->ops->func(data, rec);
}
-- 
1.9.3

--
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Please read the FAQ at  http://www.tux.org/lkml/


[PATCH 15/15] tracing: Add 'hist' trigger Documentation

2015-03-02 Thread Tom Zanussi
Add documentation and usage examples for 'hist' triggers.

Signed-off-by: Tom Zanussi 
---
 Documentation/trace/events.txt | 870 +
 1 file changed, 870 insertions(+)

diff --git a/Documentation/trace/events.txt b/Documentation/trace/events.txt
index 75d25a1..0e7a27f 100644
--- a/Documentation/trace/events.txt
+++ b/Documentation/trace/events.txt
@@ -494,3 +494,873 @@ The following commands are supported:
 
   Note that there can be only one traceon or traceoff trigger per
   triggering event.
+
+- hist
+
+  This command aggregates event hits into a hash table keyed on a
+  trace event format fields (or stacktrace) and a set of running
+  totals derived from one or more trace event format fields and/or
+  event counts (hitcount).
+
+  The format of a hist trigger is as follows:
+
+hist:keys=:values=
+  [:size=#entries][:sort=field1][:pause][:continue]
+  [:clear] [if ]
+
+  When a matching event is hit, an entry is added to a hash table
+  using the key(s) and value(s) named.  Keys and values correspond to
+  fields in the event's format description.  Values must correspond to
+  numeric fields - on an event hit, the value(s) will be added to a
+  sum kept for that field.  The special string 'hitcount' can be used
+  in place of an explicit value field - this is simply a count of
+  event hits.  Keys can be any field, or the special string
+  'stacktrace', which will use the event's kernel stacktrace as the
+  key.  The keywords 'keys' or 'key' can be used to specify keys, and
+  the keyworks 'values', 'vals', or 'val' can be used to specify
+  values.  For the time being, only a single key can be used -
+  compound keys aren't yet supported.
+
+  'hist' triggers add a 'hist' file to each event's subdirectory.
+  Reading the 'hist' file for the event will dump the hash table in
+  its entirety to stdout.  By default, numeric fields are displayed as
+  base-10 integers.  This can be modified by appending any of the
+  following modifiers to the field name:
+
+.hex   display a number as a hex value
+   .sym   display an address as a symbol
+   .syscall   display a syscall id as a system call name
+   .execname  display a common_pid as a program name
+
+  A typical usage scenario would be the following to enable a hist
+  trigger, read its current contents, and then turn it off:
+
+  # echo 'hist:keys=skbaddr.hex:vals=len' > \
+/sys/kernel/debug/tracing/events/net/netif_rx/trigger
+
+  # cat /sys/kernel/debug/tracing/events/net/netif_rx/hist
+
+  # echo '!hist:keys=skbaddr.hex:vals=len' > \
+/sys/kernel/debug/tracing/events/net/netif_rx/trigger
+
+  The trigger file itself can be read to show the details of the
+  currently attached hist trigger.  This information is also displayed
+  at the top of the 'hist' file when read.
+
+  By default, the size of the hash table is 2048 entries.  The 'size'
+  param can be used to specify more or fewer than that.  The units are
+  in terms of hashtable entries - if a run uses more entries than
+  specified, the results will show the number of 'drops', the number
+  of hits that were ignored.  The size should be a power of 2 between
+  128 and 131072 (any non- power-of-2 number specified will be rounded
+  up).
+
+  The 'sort' param can be used to specify a value field to sort on.
+  The default if unspecified is 'hitcount' and the default sort order
+  is 'ascending'.  To sort in the opposite direction, append
+  .descending' to the sort key.
+
+  The 'pause' param can be used to pause an existing hist trigger or
+  to start a hist trigger but not log any events until told to do so.
+  'continue' or 'cont' can be used to start or restart a paused hist
+  trigger.
+
+  The 'clear' param will clear the contents of a running hist trigger
+  and leave its current paused/active state.
+
+- enable_hist/disable_hist
+
+  The enable_hist and disable_hist triggers can be used to have one
+  event conditionally start and stop another event's already-attached
+  hist trigger.  Any number of enable_hist and disable_hist triggers
+  can be attached to a given event, allowing that event to kick off
+  and stop aggregations on a host of other events.
+
+  The format is very similar to the enable/disable_event triggers:
+
+  enable_hist::[:count]
+  disable_hist::[:count]
+
+  Instead of enabling or disabling the tracing of the target event
+  into the trace buffer as the enable/disable_event triggers do, the
+  enable/disable_hist triggers enable or disable the aggregation of
+  the target event into a hash table.
+
+  A typical usage scenario for the enable_hist/disable_hist triggers
+  would be to first set up a paused hist trigger on some event,
+  followed by an enable_hist/disable_hist pair that turns the hist
+  aggregation on and off when conditions of interest are hit:
+
+  # echo 'hist:keys=skbaddr.hex:vals=len:pause' > \
+

[PATCH 14/15] tracing: Add enable_hist/disable_hist triggers

2015-03-02 Thread Tom Zanussi
Similar to enable_event/disable_event triggers, these triggers enable
and disable the aggregation of events into maps rather than enabling
and disabling their writing into the trace buffer.

They can be used to automatically start and stop hist triggers based
on a matching filter condition.

If there's a paused hist trigger on system:event, the following would
start it when the filter condition was hit:

  # echo enable_hist:system:event [ if filter] > event/trigger

And the following would disable a running system:event hist trigger:

  # echo disable_hist:system:event [ if filter] > event/trigger

See Documentation/trace/events.txt for real examples.

Signed-off-by: Tom Zanussi 
---
 include/linux/ftrace_event.h|   1 +
 kernel/trace/trace.c|  11 ++-
 kernel/trace/trace_events_trigger.c | 138 +++-
 3 files changed, 146 insertions(+), 4 deletions(-)

diff --git a/include/linux/ftrace_event.h b/include/linux/ftrace_event.h
index 483e011..8679996 100644
--- a/include/linux/ftrace_event.h
+++ b/include/linux/ftrace_event.h
@@ -417,6 +417,7 @@ enum event_trigger_type {
ETT_STACKTRACE  = (1 << 2),
ETT_EVENT_ENABLE= (1 << 3),
ETT_EVENT_HIST  = (1 << 4),
+   ETT_HIST_ENABLE = (1 << 5),
 };
 
 extern int filter_match_preds(struct event_filter *filter, void *rec);
diff --git a/kernel/trace/trace.c b/kernel/trace/trace.c
index 2fa41ef..b537a37 100644
--- a/kernel/trace/trace.c
+++ b/kernel/trace/trace.c
@@ -3724,6 +3724,8 @@ static const char readme_msg[] =
"\t   trigger: traceon, traceoff\n"
"\tenable_event::\n"
"\tdisable_event::\n"
+   "\tenable_hist::\n"
+   "\tdisable_hist::\n"
 #ifdef CONFIG_STACKTRACE
"\t\tstacktrace\n"
 #endif
@@ -3783,7 +3785,14 @@ static const char readme_msg[] =
"\tuntil told to do so.  'continue' can be used to start or\n"
"\trestart a paused hist trigger.\n\n"
"\tThe 'clear' param will clear the contents of a running hist\n"
-   "\ttrigger and leave its current paused/active state.\n"
+   "\ttrigger and leave its current paused/active state.\n\n"
+   "\tThe enable_hist and disable_hist triggers can be used to\n"
+   "\thave one event conditionally start and stop another event's\n"
+   "\talready-attached hist trigger.  Any number of enable_hist\n"
+   "\tand disable_hist triggers can be attached to a given event,\n"
+   "\tallowing that event to kick off and stop aggregations on\n"
+   "\ta host of other events.  See Documentation/trace/events.txt\n"
+   "\tfor examples.\n"
 ;
 
 static ssize_t
diff --git a/kernel/trace/trace_events_trigger.c 
b/kernel/trace/trace_events_trigger.c
index ae75528..ca5 100644
--- a/kernel/trace/trace_events_trigger.c
+++ b/kernel/trace/trace_events_trigger.c
@@ -1062,10 +1062,13 @@ static __init void 
unregister_trigger_traceon_traceoff_cmds(void)
 /* Avoid typos */
 #define ENABLE_EVENT_STR   "enable_event"
 #define DISABLE_EVENT_STR  "disable_event"
+#define ENABLE_HIST_STR"enable_hist"
+#define DISABLE_HIST_STR   "disable_hist"
 
 struct enable_trigger_data {
struct ftrace_event_file*file;
boolenable;
+   boolhist;
 };
 
 static void
@@ -1104,7 +1107,9 @@ event_enable_trigger_print(struct seq_file *m, struct 
event_trigger_ops *ops,
struct enable_trigger_data *enable_data = data->private_data;
 
seq_printf(m, "%s:%s:%s",
-  enable_data->enable ? ENABLE_EVENT_STR : DISABLE_EVENT_STR,
+  enable_data->hist ?
+  (enable_data->enable ? ENABLE_HIST_STR : DISABLE_HIST_STR) :
+  (enable_data->enable ? ENABLE_EVENT_STR : DISABLE_EVENT_STR),
   enable_data->file->event_call->class->system,
   ftrace_event_name(enable_data->file->event_call));
 
@@ -1183,6 +1188,7 @@ event_enable_trigger_func(struct event_command *cmd_ops,
char *trigger;
char *number;
bool enable;
+   bool hist;
int ret;
 
if (!param)
@@ -1204,7 +1210,11 @@ event_enable_trigger_func(struct event_command *cmd_ops,
if (!event_enable_file)
goto out;
 
-   enable = strcmp(cmd, ENABLE_EVENT_STR) == 0;
+   hist = ((strcmp(cmd, ENABLE_HIST_STR) == 0) ||
+   (strcmp(cmd, DISABLE_HIST_STR) == 0));
+
+   enable = ((strcmp(cmd, ENABLE_EVENT_STR) == 0) ||
+ (strcmp(cmd, ENABLE_HIST_STR) == 0));
 
trigger_ops = cmd_ops->get_trigger_ops(cmd, trigger);
 
@@ -1225,6 +1235,7 @@ event_enable_trigger_func(struct event_command *cmd_ops,
INIT_LIST_HEAD(_data->list);
RCU_INIT_POINTER(trigger_data->filter, NULL);
 
+   enable_data->hist = hist;

Re: [PATCH] video: ARM CLCD: Added support for FBIOPAN_DISPLAY and virtual y resolution

2015-03-02 Thread Pawel Moll
On Wed, 2015-02-25 at 21:01 +, Arun Ramamurthy wrote:
> Added code to support FBIOPAN_DISPLAY. Also added yres_virtual
> parameter to device tree to set the virtual y resolution
> 
> Reviewed-by: Ray Jui 
> Reviewed-by: Scott Branden 
> Signed-off-by: Arun Ramamurthy 
> ---
>  .../devicetree/bindings/video/arm,pl11x.txt|  4 +++
>  drivers/video/fbdev/amba-clcd.c| 31 
> +++---
>  2 files changed, 31 insertions(+), 4 deletions(-)
> 
> diff --git a/Documentation/devicetree/bindings/video/arm,pl11x.txt 
> b/Documentation/devicetree/bindings/video/arm,pl11x.txt
> index 14d6f87..2262cdb 100644
> --- a/Documentation/devicetree/bindings/video/arm,pl11x.txt
> +++ b/Documentation/devicetree/bindings/video/arm,pl11x.txt
> @@ -50,6 +50,10 @@ Optional properties:
>   display mode, data is driven onto the LCD data lines at the
>   programmed edge of CLCP when CLAC is in its active state.
>  
> +- yres_virtual: Virtual Y resolution,
> + It can be used to configure a virtual y resolution. It
> + must be a value larger than the actual y resolution.
> +

I'm not sure about this... The word "virtual" never works well with
device tree nodes defined as "hardware description".

I understand what you're doing, but adding this property to the display
controller's node doesn't sound right. How does this describe hardware?
If anywhere, it's more like a job for the panel node?

Pawel

--
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Please read the FAQ at  http://www.tux.org/lkml/


[PATCH 08/15] tracing: Make kmem memory allocation tracepoints conditional

2015-03-02 Thread Tom Zanussi
Making the memory allocation tracepoints conditional allows those
events to be traced using the functions containing them e.g. the
kmalloc_trace() handler can itself use kmalloc() in the trace path,
which otherwise wouldn't be possible.

The TP_CONDITION simply tests gfp_flags for the newly introduced
___GFP_NOTRACE and if true, the tracepoint simply returns without
tracing.

This allows _notrace versions of the memory allocation functions to be
defined, which do the same thing as the normal versions but in
addition set the ___GFP_NOTRACE flag, and thus avoid the (possibly
indirect) recursive calls to themselves.

Signed-off-by: Tom Zanussi 
---
 include/trace/events/kmem.h | 28 +++-
 1 file changed, 19 insertions(+), 9 deletions(-)

diff --git a/include/trace/events/kmem.h b/include/trace/events/kmem.h
index aece134..6d34eed 100644
--- a/include/trace/events/kmem.h
+++ b/include/trace/events/kmem.h
@@ -42,20 +42,24 @@ DECLARE_EVENT_CLASS(kmem_alloc,
show_gfp_flags(__entry->gfp_flags))
 );
 
-DEFINE_EVENT(kmem_alloc, kmalloc,
+DEFINE_EVENT_CONDITION(kmem_alloc, kmalloc,
 
TP_PROTO(unsigned long call_site, const void *ptr,
 size_t bytes_req, size_t bytes_alloc, gfp_t gfp_flags),
 
-   TP_ARGS(call_site, ptr, bytes_req, bytes_alloc, gfp_flags)
+   TP_ARGS(call_site, ptr, bytes_req, bytes_alloc, gfp_flags),
+
+   TP_CONDITION(!(gfp_flags & ___GFP_NOTRACE))
 );
 
-DEFINE_EVENT(kmem_alloc, kmem_cache_alloc,
+DEFINE_EVENT_CONDITION(kmem_alloc, kmem_cache_alloc,
 
TP_PROTO(unsigned long call_site, const void *ptr,
 size_t bytes_req, size_t bytes_alloc, gfp_t gfp_flags),
 
-   TP_ARGS(call_site, ptr, bytes_req, bytes_alloc, gfp_flags)
+   TP_ARGS(call_site, ptr, bytes_req, bytes_alloc, gfp_flags),
+
+   TP_CONDITION(!(gfp_flags & ___GFP_NOTRACE))
 );
 
 DECLARE_EVENT_CLASS(kmem_alloc_node,
@@ -96,22 +100,26 @@ DECLARE_EVENT_CLASS(kmem_alloc_node,
__entry->node)
 );
 
-DEFINE_EVENT(kmem_alloc_node, kmalloc_node,
+DEFINE_EVENT_CONDITION(kmem_alloc_node, kmalloc_node,
 
TP_PROTO(unsigned long call_site, const void *ptr,
 size_t bytes_req, size_t bytes_alloc,
 gfp_t gfp_flags, int node),
 
-   TP_ARGS(call_site, ptr, bytes_req, bytes_alloc, gfp_flags, node)
+   TP_ARGS(call_site, ptr, bytes_req, bytes_alloc, gfp_flags, node),
+
+   TP_CONDITION(!(gfp_flags & ___GFP_NOTRACE))
 );
 
-DEFINE_EVENT(kmem_alloc_node, kmem_cache_alloc_node,
+DEFINE_EVENT_CONDITION(kmem_alloc_node, kmem_cache_alloc_node,
 
TP_PROTO(unsigned long call_site, const void *ptr,
 size_t bytes_req, size_t bytes_alloc,
 gfp_t gfp_flags, int node),
 
-   TP_ARGS(call_site, ptr, bytes_req, bytes_alloc, gfp_flags, node)
+   TP_ARGS(call_site, ptr, bytes_req, bytes_alloc, gfp_flags, node),
+
+   TP_CONDITION(!(gfp_flags & ___GFP_NOTRACE))
 );
 
 DECLARE_EVENT_CLASS(kmem_free,
@@ -191,13 +199,15 @@ TRACE_EVENT(mm_page_free_batched,
__entry->cold)
 );
 
-TRACE_EVENT(mm_page_alloc,
+TRACE_EVENT_CONDITION(mm_page_alloc,
 
TP_PROTO(struct page *page, unsigned int order,
gfp_t gfp_flags, int migratetype),
 
TP_ARGS(page, order, gfp_flags, migratetype),
 
+   TP_CONDITION(!(gfp_flags & ___GFP_NOTRACE)),
+
TP_STRUCT__entry(
__field(struct page *,  page)
__field(unsigned int,   order   )
-- 
1.9.3

--
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Please read the FAQ at  http://www.tux.org/lkml/


Re: [PATCH] video: ARM CLCD: Added support for FBIO_WAITFORVSYNC

2015-03-02 Thread Pawel Moll
On Wed, 2015-02-25 at 21:01 +, Arun Ramamurthy wrote:
> Added ioctl and interrupt handler functions to support FBIO_WAITFORVSYNC
> Also corrected documentation to make interrupts and interrupt-names
> optional as they are not required properties.

You may not be aware of this fact, but its the "documentation" what
defines what properties are required...

> Reviewed-by: Ray Jui 
> Reviewed-by: Scott Branden 
> Signed-off-by: Arun Ramamurthy 0
> ---
>  .../devicetree/bindings/video/arm,pl11x.txt| 11 +--
>  drivers/video/fbdev/amba-clcd.c| 82 
> ++
>  include/linux/amba/clcd.h  |  4 ++
>  3 files changed, 89 insertions(+), 8 deletions(-)
> 
> diff --git a/Documentation/devicetree/bindings/video/arm,pl11x.txt 
> b/Documentation/devicetree/bindings/video/arm,pl11x.txt
> index 2262cdb..7d19024 100644
> --- a/Documentation/devicetree/bindings/video/arm,pl11x.txt
> +++ b/Documentation/devicetree/bindings/video/arm,pl11x.txt
> @@ -10,14 +10,6 @@ Required properties:
>  
>  - reg: base address and size of the control registers block
>  
> -- interrupt-names: either the single entry "combined" representing a
> - combined interrupt output (CLCDINTR), or the four entries
> - "mbe", "vcomp", "lnbu", "fuf" representing the individual
> - CLCDMBEINTR, CLCDVCOMPINTR, CLCDLNBUINTR, CLCDFUFINTR interrupts
> -
> -- interrupts: contains an interrupt specifier for each entry in
> - interrupt-names
> -
>  - clock-names: should contain "clcdclk" and "apb_pclk"
>  
>  - clocks: contains phandle and clock specifier pairs for the entries

So no, you can't do that.

Pawel

--
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Please read the FAQ at  http://www.tux.org/lkml/


Re: [PATCH] usb: plusb: Add support for National Instruments host-to-host cable

2015-03-02 Thread Ben Shelton
On 02/27, David Miller wrote:
> From: Ben Shelton 
> Date: Fri, 27 Feb 2015 15:26:32 -0600
> 
> > On 02/20, David Miller wrote:
> >> From: Ben Shelton 
> >> Date: Mon, 16 Feb 2015 13:47:06 -0600
> >> 
> >> > The National Instruments USB Host-to-Host Cable is based on the Prolific
> >> > PL-25A1 chipset.  Add its VID/PID so the plusb driver will recognize it.
> >> > 
> >> > Signed-off-by: Ben Shelton 
> >> 
> >> Applied, thanks.
> > 
> > Hi David,
> > 
> > Is this something you think would go into stable as well?
> 
> I'm ambivalent.

In that case, could you go ahead and apply it to stable as well?

Thanks,
Ben
--
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Please read the FAQ at  http://www.tux.org/lkml/


[PATCH v5 1/5] sched_clock: Match scope of read and write seqcounts

2015-03-02 Thread Daniel Thompson
Currently the scope of the raw_write_seqcount_begin/end in
sched_clock_register far exceeds the scope of the read section in
sched_clock. This gives the impression of safety during cursory review
but achieves little.

Note that this is likely to be a latent issue at present because
sched_clock_register() is typically called before we enable interrupts,
however the issue does risk bugs being needlessly introduced as the code
evolves.

This patch fixes the problem by increasing the scope of the read locking
performed by sched_clock() to cover all data modified by
sched_clock_register.

We also improve clarity by moving writes to struct clock_data that do
not impact sched_clock() outside of the critical section.

Signed-off-by: Daniel Thompson 
Cc: Russell King 
Cc: Will Deacon 
Cc: Catalin Marinas 
Reviewed-by: Stephen Boyd 
---
 kernel/time/sched_clock.c | 25 +++--
 1 file changed, 11 insertions(+), 14 deletions(-)

diff --git a/kernel/time/sched_clock.c b/kernel/time/sched_clock.c
index 01d2d15aa662..3d21a8719444 100644
--- a/kernel/time/sched_clock.c
+++ b/kernel/time/sched_clock.c
@@ -58,23 +58,21 @@ static inline u64 notrace cyc_to_ns(u64 cyc, u32 mult, u32 
shift)
 
 unsigned long long notrace sched_clock(void)
 {
-   u64 epoch_ns;
-   u64 epoch_cyc;
-   u64 cyc;
+   u64 cyc, res;
unsigned long seq;
 
-   if (cd.suspended)
-   return cd.epoch_ns;
-
do {
seq = raw_read_seqcount_begin();
-   epoch_cyc = cd.epoch_cyc;
-   epoch_ns = cd.epoch_ns;
+
+   res = cd.epoch_ns;
+   if (!cd.suspended) {
+   cyc = read_sched_clock();
+   cyc = (cyc - cd.epoch_cyc) & sched_clock_mask;
+   res += cyc_to_ns(cyc, cd.mult, cd.shift);
+   }
} while (read_seqcount_retry(, seq));
 
-   cyc = read_sched_clock();
-   cyc = (cyc - epoch_cyc) & sched_clock_mask;
-   return epoch_ns + cyc_to_ns(cyc, cd.mult, cd.shift);
+   return res;
 }
 
 /*
@@ -124,10 +122,11 @@ void __init sched_clock_register(u64 (*read)(void), int 
bits,
clocks_calc_mult_shift(_mult, _shift, rate, NSEC_PER_SEC, 3600);
 
new_mask = CLOCKSOURCE_MASK(bits);
+   cd.rate = rate;
 
/* calculate how many ns until we wrap */
wrap = clocks_calc_max_nsecs(new_mult, new_shift, 0, new_mask);
-   new_wrap_kt = ns_to_ktime(wrap - (wrap >> 3));
+   cd.wrap_kt = ns_to_ktime(wrap - (wrap >> 3));
 
/* update epoch for new counter and update epoch_ns from old counter*/
new_epoch = read();
@@ -138,8 +137,6 @@ void __init sched_clock_register(u64 (*read)(void), int 
bits,
raw_write_seqcount_begin();
read_sched_clock = read;
sched_clock_mask = new_mask;
-   cd.rate = rate;
-   cd.wrap_kt = new_wrap_kt;
cd.mult = new_mult;
cd.shift = new_shift;
cd.epoch_cyc = new_epoch;
-- 
2.1.0

--
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Please read the FAQ at  http://www.tux.org/lkml/


Re: [PATCH v2 3/9] make kernel be able to load above 4G in boot stage

2015-03-02 Thread Baoquan He
Oops, I didn't copy the subject of Yinghai's patch, it should be as
below. Will change it back when repost.

x86, boot: Enable ident_mapping for kasl above 4G on 64bit

On 03/02/15 at 10:58pm, Baoquan He wrote:
> From: Yinghai Lu 
> 
> split kernel_ident_mapping_init() and call that in boot::decompress_kernel
> stage. it will cover new range that is above 4G.
> 
> -v2: fix one typo, use round_up/round_down and use MACRO for size.
> 
> Signed-off-by: Yinghai Lu 
> ---
>  arch/x86/boot/compressed/misc.c | 10 +
>  arch/x86/boot/compressed/misc_pgt.c | 61 ++
>  arch/x86/include/asm/page.h |  5 +++
>  arch/x86/mm/ident_map.c | 74 
> +
>  arch/x86/mm/init_64.c   | 74 
> +
>  5 files changed, 151 insertions(+), 73 deletions(-)
>  create mode 100644 arch/x86/boot/compressed/misc_pgt.c
>  create mode 100644 arch/x86/mm/ident_map.c
> 
> diff --git a/arch/x86/boot/compressed/misc.c b/arch/x86/boot/compressed/misc.c
> index ac5c05e..c9d8187 100644
> --- a/arch/x86/boot/compressed/misc.c
> +++ b/arch/x86/boot/compressed/misc.c
> @@ -9,6 +9,11 @@
>   * High loaded stuff by Hans Lermen & Werner Almesberger, Feb. 1996
>   */
>  
> +#ifdef CONFIG_X86_64
> +#define __pa(x)  ((unsigned long)(x))
> +#define __va(x)  ((void *)((unsigned long)(x)))
> +#endif
> +
>  #include "misc.h"
>  #include "../string.h"
>  
> @@ -366,6 +371,8 @@ static void parse_elf(void *output)
>   free(phdrs);
>  }
>  
> +#include "misc_pgt.c"
> +
>  asmlinkage __visible void *decompress_kernel(void *rmode, memptr heap,
> unsigned char *input_data,
> unsigned long input_len,
> @@ -421,6 +428,9 @@ asmlinkage __visible void *decompress_kernel(void *rmode, 
> memptr heap,
>   error("Wrong destination address");
>  #endif
>  
> + if (output != output_orig)
> + fill_linux64_pagetable((unsigned long)output, output_len);
> +
>   debug_putstr("\nDecompressing Linux... ");
>   decompress(input_data, input_len, NULL, NULL, output, NULL, error);
>   parse_elf(output);
> diff --git a/arch/x86/boot/compressed/misc_pgt.c 
> b/arch/x86/boot/compressed/misc_pgt.c
> new file mode 100644
> index 000..2783f0f
> --- /dev/null
> +++ b/arch/x86/boot/compressed/misc_pgt.c
> @@ -0,0 +1,61 @@
> +
> +#ifdef CONFIG_X86_64
> +#include 
> +#include 
> +
> +#include "../../mm/ident_map.c"
> +
> +struct alloc_pgt_data {
> + unsigned char *pgt_buf;
> + unsigned long pgt_buf_size;
> + unsigned long pgt_buf_offset;
> +};
> +
> +static void *alloc_pgt_page(void *context)
> +{
> + struct alloc_pgt_data *d = (struct alloc_pgt_data *)context;
> + unsigned char *p = (unsigned char *)d->pgt_buf;
> +
> + if (d->pgt_buf_offset >= d->pgt_buf_size) {
> + debug_putstr("out of pgt_buf in misc.c\n");
> + return NULL;
> + }
> +
> + p += d->pgt_buf_offset;
> + d->pgt_buf_offset += PAGE_SIZE;
> +
> + return p;
> +}
> +
> +/* 4 pages to cover cross 512G boundary */
> +#define PGT_BUF_SIZE (PAGE_SIZE*4)
> +
> +unsigned long __force_order;
> +static unsigned char pgt_buf[PGT_BUF_SIZE] __aligned(PAGE_SIZE);
> +
> +static void fill_linux64_pagetable(unsigned long start, unsigned long size)
> +{
> + struct alloc_pgt_data data = {
> + .pgt_buf = (unsigned char *) pgt_buf,
> + .pgt_buf_size = sizeof(pgt_buf),
> + .pgt_buf_offset = 0,
> + };
> + struct x86_mapping_info mapping_info = {
> + .alloc_pgt_page = alloc_pgt_page,
> + .context = ,
> + .pmd_flag = __PAGE_KERNEL_LARGE_EXEC,
> + };
> + unsigned long end = start + size;
> + pgd_t *level4p = (pgd_t *)read_cr3();
> +
> + /* align boundary to 2M */
> + start = round_down(start, PMD_SIZE);
> + end = round_up(end, PMD_SIZE);
> + if (start >= (1UL<<32))
> + kernel_ident_mapping_init(_info, level4p, start, end);
> +}
> +#else
> +static void fill_linux64_pagetable(unsigned long start, unsigned long size)
> +{
> +}
> +#endif
> diff --git a/arch/x86/include/asm/page.h b/arch/x86/include/asm/page.h
> index 802dde3..cf8f619 100644
> --- a/arch/x86/include/asm/page.h
> +++ b/arch/x86/include/asm/page.h
> @@ -37,7 +37,10 @@ static inline void copy_user_page(void *to, void *from, 
> unsigned long vaddr,
>   alloc_page_vma(GFP_HIGHUSER | __GFP_ZERO | movableflags, vma, vaddr)
>  #define __HAVE_ARCH_ALLOC_ZEROED_USER_HIGHPAGE
>  
> +#ifndef __pa
>  #define __pa(x)  __phys_addr((unsigned long)(x))
> +#endif
> +
>  #define __pa_nodebug(x)  __phys_addr_nodebug((unsigned long)(x))
>  /* __pa_symbol should be used for C visible symbols.
> This seems to be the official gcc blessed way to do such arithmetic. */
> @@ -51,7 +54,9 @@ static inline void copy_user_page(void *to, void *from, 
> unsigned long vaddr,
>  #define 

[PATCH v5 3/5] sched_clock: Remove suspend from clock_read_data

2015-03-02 Thread Daniel Thompson
Currently cd.read_data.suspended is read by the hotpath function
sched_clock(). This variable need not be accessed on the hotpath. In
fact, once it is removed, we can remove the conditional branches from
sched_clock() and install a dummy read_sched_clock function to suspend
the clock.

The new master copy of the function pointer (actual_read_sched_clock) is
introduced and is used for all reads of the clock hardware except those
within sched_clock itself.

Suggested-by: Thomas Gleixner 
Signed-off-by: Daniel Thompson 
Cc: Russell King 
Cc: Will Deacon 
Cc: Catalin Marinas 
Reviewed-by: Stephen Boyd 
---
 kernel/time/sched_clock.c | 40 +---
 1 file changed, 25 insertions(+), 15 deletions(-)

diff --git a/kernel/time/sched_clock.c b/kernel/time/sched_clock.c
index 695b2ac2e8b4..5d6407951fb8 100644
--- a/kernel/time/sched_clock.c
+++ b/kernel/time/sched_clock.c
@@ -28,10 +28,9 @@
  * @read_sched_clock:  Current clock source (or dummy source when suspended)
  * @mult:  Multipler for scaled math conversion
  * @shift: Shift value for scaled math conversion
- * @suspended: Flag to indicate if the clock is suspended (stopped)
  *
  * Care must be taken when updating this structure; it is read by
- * some very hot code paths. It occupies <=48 bytes and, when combined
+ * some very hot code paths. It occupies <=40 bytes and, when combined
  * with the seqcount used to synchronize access, comfortably fits into
  * a 64 byte cache line.
  */
@@ -42,7 +41,6 @@ struct clock_read_data {
u64 (*read_sched_clock)(void);
u32 mult;
u32 shift;
-   bool suspended;
 };
 
 /**
@@ -64,6 +62,7 @@ struct clock_data {
struct clock_read_data read_data;
ktime_t wrap_kt;
unsigned long rate;
+   u64 (*actual_read_sched_clock)(void);
 };
 
 static struct hrtimer sched_clock_timer;
@@ -83,6 +82,8 @@ static u64 notrace jiffy_sched_clock_read(void)
 static struct clock_data cd cacheline_aligned = {
.read_data = { .mult = NSEC_PER_SEC / HZ,
   .read_sched_clock = jiffy_sched_clock_read, },
+   .actual_read_sched_clock = jiffy_sched_clock_read,
+
 };
 
 static inline u64 notrace cyc_to_ns(u64 cyc, u32 mult, u32 shift)
@@ -99,12 +100,9 @@ unsigned long long notrace sched_clock(void)
do {
seq = raw_read_seqcount_begin();
 
-   res = rd->epoch_ns;
-   if (!rd->suspended) {
-   cyc = rd->read_sched_clock();
-   cyc = (cyc - rd->epoch_cyc) & rd->sched_clock_mask;
-   res += cyc_to_ns(cyc, rd->mult, rd->shift);
-   }
+   cyc = (rd->read_sched_clock() - rd->epoch_cyc) &
+ rd->sched_clock_mask;
+   res = rd->epoch_ns + cyc_to_ns(cyc, rd->mult, rd->shift);
} while (read_seqcount_retry(, seq));
 
return res;
@@ -120,7 +118,7 @@ static void notrace update_sched_clock(void)
u64 ns;
struct clock_read_data *rd = _data;
 
-   cyc = rd->read_sched_clock();
+   cyc = cd.actual_read_sched_clock();
ns = rd->epoch_ns +
 cyc_to_ns((cyc - rd->epoch_cyc) & rd->sched_clock_mask,
   rd->mult, rd->shift);
@@ -166,10 +164,11 @@ void __init sched_clock_register(u64 (*read)(void), int 
bits,
 
/* update epoch for new counter and update epoch_ns from old counter*/
new_epoch = read();
-   cyc = rd->read_sched_clock();
+   cyc = cd.actual_read_sched_clock();
ns = rd->epoch_ns +
 cyc_to_ns((cyc - rd->epoch_cyc) & rd->sched_clock_mask,
   rd->mult, rd->shift);
+   cd.actual_read_sched_clock = read;
 
raw_write_seqcount_begin();
rd->read_sched_clock = read;
@@ -209,7 +208,7 @@ void __init sched_clock_postinit(void)
 * If no sched_clock function has been provided at that point,
 * make it the final one one.
 */
-   if (cd.read_data.read_sched_clock == jiffy_sched_clock_read)
+   if (cd.actual_read_sched_clock == jiffy_sched_clock_read)
sched_clock_register(jiffy_sched_clock_read, BITS_PER_LONG, HZ);
 
update_sched_clock();
@@ -223,13 +222,24 @@ void __init sched_clock_postinit(void)
hrtimer_start(_clock_timer, cd.wrap_kt, HRTIMER_MODE_REL);
 }
 
+/*
+ * Clock read function for use when the clock is suspended.
+ *
+ * This function makes it appear to sched_clock() as if the clock
+ * stopped counting at its last update.
+ */
+static u64 notrace suspended_sched_clock_read(void)
+{
+   return cd.read_data.epoch_cyc;
+}
+
 static int sched_clock_suspend(void)
 {
struct clock_read_data *rd = _data;
 
update_sched_clock();
hrtimer_cancel(_clock_timer);
-   rd->suspended = true;
+   rd->read_sched_clock = suspended_sched_clock_read;
return 0;
 }
 
@@ -237,9 +247,9 @@ static void 

[PATCH v5 5/5] sched_clock: Avoid deadlock during read from NMI

2015-03-02 Thread Daniel Thompson
Currently it is possible for an NMI (or FIQ on ARM) to come in and
read sched_clock() whilst update_sched_clock() has locked the seqcount
for writing. This results in the NMI handler locking up when it calls
raw_read_seqcount_begin().

This patch fixes the NMI safety issues by providing banked clock data.
This is a similar approach to the one used in Thomas Gleixner's
4396e058c52e("timekeeping: Provide fast and NMI safe access to
CLOCK_MONOTONIC").

Suggested-by: Stephen Boyd 
Signed-off-by: Daniel Thompson 
Cc: Russell King 
Cc: Will Deacon 
Cc: Catalin Marinas 
Reviewed-by: Stephen Boyd 
---
 kernel/time/sched_clock.c | 103 ++
 1 file changed, 68 insertions(+), 35 deletions(-)

diff --git a/kernel/time/sched_clock.c b/kernel/time/sched_clock.c
index 9280327676dc..a23d98c33dab 100644
--- a/kernel/time/sched_clock.c
+++ b/kernel/time/sched_clock.c
@@ -47,19 +47,20 @@ struct clock_read_data {
  * struct clock_data - all data needed for sched_clock (including
  * registration of a new clock source)
  *
- * @seq:   Sequence counter for protecting updates.
+ * @seq:   Sequence counter for protecting updates. The lowest
+ * bit is the index for @read_data.
  * @read_data: Data required to read from sched_clock.
  * @wrap_kt:   Duration for which clock can run before wrapping
  * @rate:  Tick rate of the registered clock
  * @actual_read_sched_clock: Registered clock read function
  *
  * The ordering of this structure has been chosen to optimize cache
- * performance. In particular seq and read_data (combined) should fit
+ * performance. In particular seq and read_data[0] (combined) should fit
  * into a single 64 byte cache line.
  */
 struct clock_data {
seqcount_t seq;
-   struct clock_read_data read_data;
+   struct clock_read_data read_data[2];
ktime_t wrap_kt;
unsigned long rate;
u64 (*actual_read_sched_clock)(void);
@@ -80,10 +81,9 @@ static u64 notrace jiffy_sched_clock_read(void)
 }
 
 static struct clock_data cd cacheline_aligned = {
-   .read_data = { .mult = NSEC_PER_SEC / HZ,
-  .read_sched_clock = jiffy_sched_clock_read, },
+   .read_data[0] = { .mult = NSEC_PER_SEC / HZ,
+ .read_sched_clock = jiffy_sched_clock_read, },
.actual_read_sched_clock = jiffy_sched_clock_read,
-
 };
 
 static inline u64 notrace cyc_to_ns(u64 cyc, u32 mult, u32 shift)
@@ -95,10 +95,11 @@ unsigned long long notrace sched_clock(void)
 {
u64 cyc, res;
unsigned long seq;
-   struct clock_read_data *rd = _data;
+   struct clock_read_data *rd;
 
do {
-   seq = raw_read_seqcount_begin();
+   seq = raw_read_seqcount();
+   rd = cd.read_data + (seq & 1);
 
cyc = (rd->read_sched_clock() - rd->epoch_cyc) &
  rd->sched_clock_mask;
@@ -109,26 +110,50 @@ unsigned long long notrace sched_clock(void)
 }
 
 /*
+ * Updating the data required to read the clock.
+ *
+ * sched_clock will never observe mis-matched data even if called from
+ * an NMI. We do this by maintaining an odd/even copy of the data and
+ * steering sched_clock to one or the other using a sequence counter.
+ * In order to preserve the data cache profile of sched_clock as much
+ * as possible the system reverts back to the even copy when the update
+ * completes; the odd copy is used *only* during an update.
+ */
+static void update_clock_read_data(struct clock_read_data *rd)
+{
+   /* update the backup (odd) copy with the new data */
+   cd.read_data[1] = *rd;
+
+   /* steer readers towards the odd copy */
+   raw_write_seqcount_latch();
+
+   /* now its safe for us to update the normal (even) copy */
+   cd.read_data[0] = *rd;
+
+   /* switch readers back to the even copy */
+   raw_write_seqcount_latch();
+}
+
+/*
  * Atomically update the sched_clock epoch.
  */
 static void update_sched_clock(void)
 {
-   unsigned long flags;
u64 cyc;
u64 ns;
-   struct clock_read_data *rd = _data;
+   struct clock_read_data rd;
+
+   rd = cd.read_data[0];
 
cyc = cd.actual_read_sched_clock();
-   ns = rd->epoch_ns +
-cyc_to_ns((cyc - rd->epoch_cyc) & rd->sched_clock_mask,
-  rd->mult, rd->shift);
-
-   raw_local_irq_save(flags);
-   raw_write_seqcount_begin();
-   rd->epoch_ns = ns;
-   rd->epoch_cyc = cyc;
-   raw_write_seqcount_end();
-   raw_local_irq_restore(flags);
+   ns = rd.epoch_ns +
+cyc_to_ns((cyc - rd.epoch_cyc) & rd.sched_clock_mask,
+  rd.mult, rd.shift);
+
+   rd.epoch_ns = ns;
+   rd.epoch_cyc = cyc;
+
+   update_clock_read_data();
 }
 
 static enum hrtimer_restart sched_clock_poll(struct hrtimer *hrt)
@@ -145,7 +170,7 @@ void __init 

[PATCH] ARM: dts: vf610: remove unused gpio-range-cells property

2015-03-02 Thread Stefan Agner
The anyway depricated gpio-range-cells property was never used
by the pin controller driver. This patch removes it.

Signed-off-by: Stefan Agner 
---
 arch/arm/boot/dts/vfxxx.dtsi | 1 -
 1 file changed, 1 deletion(-)

diff --git a/arch/arm/boot/dts/vfxxx.dtsi b/arch/arm/boot/dts/vfxxx.dtsi
index a29c7ce..0d95232 100644
--- a/arch/arm/boot/dts/vfxxx.dtsi
+++ b/arch/arm/boot/dts/vfxxx.dtsi
@@ -213,7 +213,6 @@
iomuxc: iomuxc@40048000 {
compatible = "fsl,vf610-iomuxc";
reg = <0x40048000 0x1000>;
-   #gpio-range-cells = <3>;
};
 
gpio0: gpio@40049000 {
-- 
2.3.0

--
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Please read the FAQ at  http://www.tux.org/lkml/


[PATCH v5 2/5] sched_clock: Optimize cache line usage

2015-03-02 Thread Daniel Thompson
Currently sched_clock(), a very hot code path, is not optimized to
minimise its cache profile. In particular:

  1. cd is not cacheline_aligned,

  2. struct clock_data does not distinguish between hotpath and
 coldpath data, reducing locality of reference in the hotpath,

  3. Some hotpath data is missing from struct clock_data and is marked
 __read_mostly (which more or less guarantees it will not share a
 cache line with cd).

This patch corrects these problems by extracting all hotpath data
into a separate structure and using cacheline_aligned to ensure
the hotpath uses a single (64 byte) cache line.

Signed-off-by: Daniel Thompson 
Cc: Russell King 
Cc: Will Deacon 
Cc: Catalin Marinas 
Reviewed-by: Stephen Boyd 
---
 kernel/time/sched_clock.c | 113 +++---
 1 file changed, 77 insertions(+), 36 deletions(-)

diff --git a/kernel/time/sched_clock.c b/kernel/time/sched_clock.c
index 3d21a8719444..695b2ac2e8b4 100644
--- a/kernel/time/sched_clock.c
+++ b/kernel/time/sched_clock.c
@@ -18,28 +18,59 @@
 #include 
 #include 
 
-struct clock_data {
-   ktime_t wrap_kt;
+/**
+ * struct clock_read_data - data required to read from sched_clock
+ *
+ * @epoch_ns:  sched_clock value at last update
+ * @epoch_cyc: Clock cycle value at last update
+ * @sched_clock_mask:   Bitmask for two's complement subtraction of non 64bit
+ * clocks
+ * @read_sched_clock:  Current clock source (or dummy source when suspended)
+ * @mult:  Multipler for scaled math conversion
+ * @shift: Shift value for scaled math conversion
+ * @suspended: Flag to indicate if the clock is suspended (stopped)
+ *
+ * Care must be taken when updating this structure; it is read by
+ * some very hot code paths. It occupies <=48 bytes and, when combined
+ * with the seqcount used to synchronize access, comfortably fits into
+ * a 64 byte cache line.
+ */
+struct clock_read_data {
u64 epoch_ns;
u64 epoch_cyc;
-   seqcount_t seq;
-   unsigned long rate;
+   u64 sched_clock_mask;
+   u64 (*read_sched_clock)(void);
u32 mult;
u32 shift;
bool suspended;
 };
 
+/**
+ * struct clock_data - all data needed for sched_clock (including
+ * registration of a new clock source)
+ *
+ * @seq:   Sequence counter for protecting updates.
+ * @read_data: Data required to read from sched_clock.
+ * @wrap_kt:   Duration for which clock can run before wrapping
+ * @rate:  Tick rate of the registered clock
+ * @actual_read_sched_clock: Registered clock read function
+ *
+ * The ordering of this structure has been chosen to optimize cache
+ * performance. In particular seq and read_data (combined) should fit
+ * into a single 64 byte cache line.
+ */
+struct clock_data {
+   seqcount_t seq;
+   struct clock_read_data read_data;
+   ktime_t wrap_kt;
+   unsigned long rate;
+};
+
 static struct hrtimer sched_clock_timer;
 static int irqtime = -1;
 
 core_param(irqtime, irqtime, int, 0400);
 
-static struct clock_data cd = {
-   .mult   = NSEC_PER_SEC / HZ,
-};
-
-static u64 __read_mostly sched_clock_mask;
-
 static u64 notrace jiffy_sched_clock_read(void)
 {
/*
@@ -49,7 +80,10 @@ static u64 notrace jiffy_sched_clock_read(void)
return (u64)(jiffies - INITIAL_JIFFIES);
 }
 
-static u64 __read_mostly (*read_sched_clock)(void) = jiffy_sched_clock_read;
+static struct clock_data cd cacheline_aligned = {
+   .read_data = { .mult = NSEC_PER_SEC / HZ,
+  .read_sched_clock = jiffy_sched_clock_read, },
+};
 
 static inline u64 notrace cyc_to_ns(u64 cyc, u32 mult, u32 shift)
 {
@@ -60,15 +94,16 @@ unsigned long long notrace sched_clock(void)
 {
u64 cyc, res;
unsigned long seq;
+   struct clock_read_data *rd = _data;
 
do {
seq = raw_read_seqcount_begin();
 
-   res = cd.epoch_ns;
-   if (!cd.suspended) {
-   cyc = read_sched_clock();
-   cyc = (cyc - cd.epoch_cyc) & sched_clock_mask;
-   res += cyc_to_ns(cyc, cd.mult, cd.shift);
+   res = rd->epoch_ns;
+   if (!rd->suspended) {
+   cyc = rd->read_sched_clock();
+   cyc = (cyc - rd->epoch_cyc) & rd->sched_clock_mask;
+   res += cyc_to_ns(cyc, rd->mult, rd->shift);
}
} while (read_seqcount_retry(, seq));
 
@@ -83,16 +118,17 @@ static void notrace update_sched_clock(void)
unsigned long flags;
u64 cyc;
u64 ns;
+   struct clock_read_data *rd = _data;
 
-   cyc = read_sched_clock();
-   ns = cd.epoch_ns +
-   cyc_to_ns((cyc - cd.epoch_cyc) & sched_clock_mask,
- cd.mult, cd.shift);
+   cyc = rd->read_sched_clock();
+   ns = 

[PATCH v5 0/5] sched_clock: Optimize and avoid deadlock during read from NMI

2015-03-02 Thread Daniel Thompson
This patchset optimizes the generic sched_clock implementation by
removing branches and significantly reducing the data cache profile. It
also makes it safe to call sched_clock() from NMI (or FIQ on ARM).

The data cache profile of sched_clock() in the original code is
somewhere between 2 and 3 (64-byte) cache lines, depending on alignment
of struct clock_data. After patching, the cache profile for the normal
case should be a single cacheline.

NMI safety was tested on i.MX6 with perf drowning the system in FIQs and
using the perf handler to check that sched_clock() returned monotonic
values. At the same time I forcefully reduced kt_wrap so that
update_sched_clock() is being called at >1000Hz.

Without the patches the above system is grossly unstable, surviving
[9K,115K,25K] perf event cycles during three separate runs. With the
patch I ran for over 9M perf event cycles before getting bored.

Performance testing has primarily been performed using a simple
tight loop test (i.e. one that is unlikely to benefit from the
cache profile improvements). Summary results show benefit on all
CPUs although magnitude varies significantly:

  Cortex A9 @ 792MHz 4.1% speedup
  Cortex A9 @ 1GHz   0.4% speedup  (different SoC to above)
  Scorpian  13.6% speedup
  Krait 35.1% speedup
  Cortex A53 @ 1GHz  1.6% speedup
  Cortex A57 @ 1GHz  5.0% speedup

Benchmarking was done by Stephen Boyd and myself, full data for the
above summaries can be found here:
https://docs.google.com/spreadsheets/d/1Zd2xN42U4oAVZcArqAYdAWgFI5oDFRysURCSYNmBpZA/edit?usp=sharing

v5:
* Summarized benchmark results in the patchset cover letter and
  added some Reviewed-by:s.
* Rebased on 4.0-rc1.

v4:
* Optimized sched_clock() to be branchless by introducing a dummy
  function to provide clock values while the clock is suspended
  (Stephen Boyd).
* Improved commenting, including the kerneldoc comments (Stephen Boyd).
* Removed a redundant notrace from the update logic (Steven Rostedt).

v3:
* Optimized to minimise cache profile, including elimination of
  the suspended flag (Thomas Gleixner).
* Replaced the update_bank_begin/end with a single update function
  (Thomas Gleixner).
* Split into multiple patches to aid review.

v2:
* Extended the scope of the read lock in sched_clock() so we can bank
  all data consumed there (John Stultz)


Daniel Thompson (5):
  sched_clock: Match scope of read and write seqcounts
  sched_clock: Optimize cache line usage
  sched_clock: Remove suspend from clock_read_data
  sched_clock: Remove redundant notrace from update function
  sched_clock: Avoid deadlock during read from NMI

 kernel/time/sched_clock.c | 195 --
 1 file changed, 138 insertions(+), 57 deletions(-)

--
2.1.0

--
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Please read the FAQ at  http://www.tux.org/lkml/


Re: [PATCH] pinctrl: imx: do not implicitly set pin regs to -1

2015-03-02 Thread Uwe Kleine-König
Hello,

On Mon, Mar 02, 2015 at 02:42:01PM +0100, Stefan Agner wrote:
> On 2015-03-02 13:59, Uwe Kleine-König wrote:
> > On Mon, Mar 02, 2015 at 07:45:17PM +0800, Shawn Guo wrote:
> >> On Fri, Feb 06, 2015 at 05:30:56PM +0100, Stefan Agner wrote:
> >> > Commit 3dac1918a491 ("pinctrl: imx: detect uninitialized pins") needs
> >> > the values in struct imx_pin_reg to be -1. This has been done in a
> >> > rather unorthodox way by setting the memory to 0xff using memset...
> >> > Use a proper for loop to initialize the whole array with -1.
> >> >
> >> > Signed-off-by: Stefan Agner 
> >>
> >> Acked-by: Shawn Guo 
> > too late. This patch is part of 4.0-rc1 (4ff0f034e95d).
> 
> This is not the same patch. The patch you are mentioning is actually
> fixing a bug introduce in the change where we set -1 for uninitialized
> pins. This patch is solving the weird assignment of the initial value...
ah right.

Best regards
Uwe

-- 
Pengutronix e.K.   | Uwe Kleine-König|
Industrial Linux Solutions | http://www.pengutronix.de/  |
--
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Please read the FAQ at  http://www.tux.org/lkml/


Re: [PATCH v2 0/7] Kernel huge I/O mapping support

2015-03-02 Thread Toshi Kani
On Tue, 2015-02-24 at 09:09 +0100, Ingo Molnar wrote:
> * Andrew Morton  wrote:
> 
> > 
> > 
> > Oh.  We don't do any checking at all.  We're just telling 
> > userspace programmers "don't do that".  hrm.  What are 
> > your thoughts on adding the overlap checks to the kernel?
> 
> I have requested such sanity checking in previous review as 
> well, it has to be made fool-proof for this optimization to 
> be usable.
> 
> Another alternative would be to make this not a transparent 
> optimization, but a separate API: ioremap_hugepage() or so.
> 
> The devices and drivers dealing with GBs of remapped pages 
> is still relatively low, so they could make explicit use of 
> the API and opt in to it.
> 
> What I was arguing against was to make it a CONFIG_ option: 
> that achieves very little in practice, such APIs should be 
> uniformly available.

I was able to come up with simple changes that fall back to 4KB mappings
when a target range is covered by MTRRs.  So, with the changes, it is
now safe to enable huge page mappings to ioremap() transparently without
such restriction.  I will post updated patchset hopefully soon.

Thanks,
-Toshi

--
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Please read the FAQ at  http://www.tux.org/lkml/


Re: [PATCH v2] usb/isp1760: set IRQ flags properly

2015-03-02 Thread laurent . pinchart
Hi Valentin,

Thank  you for the patch.

On Sun Mar 01 2015 17:54:32 GMT+0200 (EET), Valentin Rothberg wrote:
> The IRQF_DISABLED is a NOOP and scheduled to be removed.  According to
> commit e58aa3d2d0cc ("genirq: Run irq handlers with interrupts
> disabled") running IRQ handlers with interrupts enabled can cause stack
> overflows when the interrupt line of the issuing device is still active.
> 
> This patch removes using this deprecated flag and additionally removes
> redundantly setting IRQF_SHARED for isp1760_udc_register().
> 
> Signed-off-by: Valentin Rothberg 

Acked-by: Laurent Pinchart 

> ---
> v2: Reverted change that removed IRQF_SHARED in the call of
> isp1760_hcd_register().
> ---
>  drivers/usb/isp1760/isp1760-core.c | 3 +--
>  drivers/usb/isp1760/isp1760-udc.c  | 4 ++--
>  2 files changed, 3 insertions(+), 4 deletions(-)
> 
> diff --git a/drivers/usb/isp1760/isp1760-core.c 
> b/drivers/usb/isp1760/isp1760-core.c
> index b982755..bfa402c 100644
> --- a/drivers/usb/isp1760/isp1760-core.c
> +++ b/drivers/usb/isp1760/isp1760-core.c
> @@ -151,8 +151,7 @@ int isp1760_register(struct resource *mem, int irq, 
> unsigned long irqflags,
>   }
>  
>   if (IS_ENABLED(CONFIG_USB_ISP1761_UDC) && !udc_disabled) {
> - ret = isp1760_udc_register(isp, irq, irqflags | IRQF_SHARED |
> -IRQF_DISABLED);
> + ret = isp1760_udc_register(isp, irq, irqflags);
>   if (ret < 0) {
>   isp1760_hcd_unregister(>hcd);
>   return ret;
> diff --git a/drivers/usb/isp1760/isp1760-udc.c 
> b/drivers/usb/isp1760/isp1760-udc.c
> index 9612d79..0b46ff0 100644
> --- a/drivers/usb/isp1760/isp1760-udc.c
> +++ b/drivers/usb/isp1760/isp1760-udc.c
> @@ -1451,8 +1451,8 @@ int isp1760_udc_register(struct isp1760_device *isp, 
> int irq,
>  
>   sprintf(udc->irqname, "%s (udc)", devname);
>  
> - ret = request_irq(irq, isp1760_udc_irq, IRQF_SHARED | IRQF_DISABLED |
> -   irqflags, udc->irqname, udc);
> + ret = request_irq(irq, isp1760_udc_irq, IRQF_SHARED | irqflags,
> +   udc->irqname, udc);
>   if (ret < 0)
>   goto error;
>  
> -- 
> 1.9.1
> 
>N�r��yb�X��ǧv�^�)޺{.n�+{zX����ܨ}���Ơz�:+v���zZ+��+zf���h���~i���z��w���?�&�)ߢf��^jǫy�m��@A�a���
>0��h���i

Re: [PATCH v5 09/11] drm/tegra: Reset the SOR on probe

2015-03-02 Thread Simon Glass
Hi,

On 2 March 2015 at 01:41, Alexandre Courbot  wrote:
>
> On Thu, Feb 12, 2015 at 5:51 PM, Tomeu Vizoso
>  wrote:
> > As there isn't a way for the firmware on the Nyan chromebooks to hand
> > over the display to the kernel.
>
> Could this have a side-effect on models for which the firmware *does*
> hand over the display to the kernel? E.g. temporary glitch or black
> screen?
>
> This is probably ok though, as such a handing over would need to be
> documented in the firmware/kernel command line, and could thus be
> caught to disable that code block if needed.

Is there a general way in which this hand-over is done, e.g. with a
device tree binding?

Regards,
Simon
--
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Please read the FAQ at  http://www.tux.org/lkml/


Re: [PATCH perf/core 2/4] perf-probe: Fix to handle aliased symbols in glibc

2015-03-02 Thread Arnaldo Carvalho de Melo
Em Mon, Mar 02, 2015 at 09:49:53PM +0900, Masami Hiramatsu escreveu:
> With this patch;
>   -
>   # ./perf probe -x /usr/lib64/libc-2.17.so -V malloc
>   Available variables at malloc
>   @<__libc_malloc+0>
>   size_t  bytes
>   # ./perf probe -x /usr/lib64/libc-2.17.so -a "malloc bytes"
>   Added new event:
> probe_libc:malloc(on malloc in /usr/lib64/libc-2.17.so with bytes)
> 
>   You can now use it in all perf tools, such as:
> 
>   perf record -e probe_libc:malloc -aR sleep 1

> Reported-by: Arnaldo Carvalho de Melo 

Humm, not working for me, after the patch:

[root@ssdandy ~]# perf probe -x /usr/lib64/libc-2.17.so -V malloc
Available variables at malloc
@<__malloc_check_init+96>
(No matched variables)
[root@ssdandy ~]#

And then the one asking for 'bytes' to be collectd fails.

After processing the other patches I'll try to debug this...

[root@ssdandy ~]# cat /etc/redhat-release 
Red Hat Enterprise Linux Server release 7.0 (Maipo)
[root@ssdandy ~]# rpm -q glibc glibc-debuginfo
glibc-2.17-55.el7_0.3.x86_64
glibc-debuginfo-2.17-55.el7_0.1.x86_64
[root@ssdandy ~]#
[acme@ssdandy linux]$ readelf -Ws /usr/lib64/libc-2.17.so| grep malloc
   438: 000800c0   245 FUNCGLOBAL DEFAULT   12 
__libc_malloc@@GLIBC_2.2.5
   545: 00082320   239 FUNCGLOBAL DEFAULT   12 
malloc_info@@GLIBC_2.10
   810: 000820c0   490 FUNCWEAK   DEFAULT   12 
malloc_stats@@GLIBC_2.2.5
   981: 000802e0   507 FUNCWEAK   DEFAULT   12 
malloc_get_state@@GLIBC_2.2.5
  1077: 003ba740 8 OBJECT  WEAK   DEFAULT   32 
__malloc_hook@@GLIBC_2.2.5
  1170: 000800c0   245 FUNCGLOBAL DEFAULT   12 malloc@@GLIBC_2.2.5
  1204: 00080d30   222 FUNCWEAK   DEFAULT   12 
malloc_usable_size@@GLIBC_2.2.5
  1450: 00081d50   604 FUNCWEAK   DEFAULT   12 
malloc_trim@@GLIBC_2.2.5
  1767: 003bca60 8 OBJECT  WEAK   DEFAULT   33 
__malloc_initialize_hook@@GLIBC_2.2.5
  2061: 000814f0  1286 FUNCWEAK   DEFAULT   12 
malloc_set_state@@GLIBC_2.2.5
95: 003bbaa0 4 OBJECT  LOCAL  DEFAULT   33 cache_malloced
  1004:  0 FILELOCAL  DEFAULT  ABS malloc.c
  1005: 0007b060   275 FUNCLOCAL  DEFAULT   12 ptmalloc_lock_all
  1011: 003bcb30 8 OBJECT  LOCAL  DEFAULT   33 save_malloc_hook
  1013: 000801c0   285 FUNCLOCAL  DEFAULT   12 malloc_atfork
  1017: 0007b180   158 FUNCLOCAL  DEFAULT   12 ptmalloc_unlock_all2
  1030: 0007ba90   174 FUNCLOCAL  DEFAULT   12 ptmalloc_unlock_all
  1033: 0007bb40   232 FUNCLOCAL  DEFAULT   12 malloc_printerr
  1057: 0007c3d0  1518 FUNCLOCAL  DEFAULT   12 malloc_consolidate
  1089: 0007dee0  5195 FUNCLOCAL  DEFAULT   12 _int_malloc
  1100: 0007f330   282 FUNCLOCAL  DEFAULT   12 malloc_check
  1117: 003bca90 4 OBJECT  LOCAL  DEFAULT   33 disallow_malloc_check
  1118: 003bcaa0 4 OBJECT  LOCAL  DEFAULT   33 using_malloc_checking
  1136: 00080fe0  1015 FUNCLOCAL  DEFAULT   12 ptmalloc_init.part.8
  1138: 000813e021 FUNCLOCAL  DEFAULT   12 ptmalloc_init
  1139: 0008140060 FUNCLOCAL  DEFAULT   12 malloc_hook_ini
  1160: 00082fc0   495 FUNCLOCAL  DEFAULT   12 mallochook
  1162: 003bcbf0 8 OBJECT  LOCAL  DEFAULT   33 old_malloc_hook
  1181: 003bcc40 8 OBJECT  LOCAL  DEFAULT   33 tr_old_malloc_hook
  1182: 00083fc0   189 FUNCLOCAL  DEFAULT   12 tr_mallochook
  1194: 003bcc60 8 OBJECT  LOCAL  DEFAULT   33 malloc_trace_buffer
  3673: 003ba170 4 OBJECT  LOCAL  DEFAULT   32 
__libc_malloc_initialized
  3734: 000814f0  1286 FUNCLOCAL  DEFAULT   12 __malloc_set_state
  4047: 00080d30   222 FUNCLOCAL  DEFAULT   12 __malloc_usable_size
  4101: 00081d50   604 FUNCLOCAL  DEFAULT   12 __malloc_trim
  4338: 000800c0   245 FUNCLOCAL  DEFAULT   12 __GI___libc_malloc
  4531: 000802e0   507 FUNCLOCAL  DEFAULT   12 __malloc_get_state
  4569: 000820c0   490 FUNCLOCAL  DEFAULT   12 __malloc_stats
  4849: 00080050   107 FUNCLOCAL  DEFAULT   12 __malloc_check_init
  5351: 000800c0   245 FUNCLOCAL  DEFAULT   12 __malloc
  5490: 003bca60 8 OBJECT  WEAK   DEFAULT   33 
__malloc_initialize_hook
  5571: 000814f0  1286 FUNCWEAK   DEFAULT   12 malloc_set_state
  5868: 000800c0   245 FUNCGLOBAL DEFAULT   12 malloc
  5878: 00082320   239 FUNCGLOBAL DEFAULT   12 malloc_info
  5988: 00081d50   604 FUNCWEAK   DEFAULT   12 malloc_trim
  6526: 003ba740 8 OBJECT  WEAK   DEFAULT   32 __malloc_hook
  6615: 00080d30   222 FUNCWEAK   DEFAULT   12 malloc_usable_size
  7087: 000802e0   507 FUNCWEAK   DEFAULT   12 malloc_get_state
  7104: 

Re: [patch v2 1/3] mm: remove GFP_THISNODE

2015-03-02 Thread Christoph Lameter
On Mon, 2 Mar 2015, Vlastimil Babka wrote:

> So it would be IMHO better for longer-term maintainability to have the
> relevant __GFP_THISNODE callers pass also __GFP_NO_KSWAPD to denote these
> opportunistic allocation attempts, instead of having this subtle semantic

You are thinking about an opportunistic allocation attempt in SLAB?

AFAICT SLAB allocations should trigger reclaim.
--
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Please read the FAQ at  http://www.tux.org/lkml/


Re: [PATCH] capabilities: Ambient capability set V2

2015-03-02 Thread Christoph Lameter
On Sat, 28 Feb 2015, Serge E. Hallyn wrote:

> Your example program is not filling in pI though?

The setcap sets the inheritance bit. When the binary runs the i bits
should be set.

> Ah, i see why.  In get_file_caps() you are still assigning
>
>   fP = pA
>
> if the file has no file capabilities.  so then you are actually
> doing
>
>pP' = (X & (fP | pA)) | (pI & (fI | pA))
> rather than
>pP' = (X & fP) | (pI & (fI | pA))


I thought that fP, fI and pI = {} since the file has no caps
so this comes  out as

pP' = pA

> Other than that, the patch is looking good to me.  We should
> consider emitting an audit record when a task fills in its

How do I do that?

> pA, and I do still wonder whether we should be requiring
> CAP_SETFCAP (unsure how best to think of it).  But assuming the
> fP = pA was not intended, I think this largely does the right
> thing.
--
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Please read the FAQ at  http://www.tux.org/lkml/


Re: [PATCH v3 3/4] mm: cma: add list of currently allocated CMA buffers to debugfs

2015-03-02 Thread Stefan Strogin
Hi Michał,

Thank you for the answer.

On 25/02/15 00:32, Michal Nazarewicz wrote:
> On Tue, Feb 24 2015, Stefan Strogin  wrote:
>> --- a/mm/cma.h
>> +++ b/mm/cma.h
>> @@ -11,8 +13,32 @@ struct cma {
>>  struct hlist_head mem_head;
>>  spinlock_t mem_head_lock;
>>  #endif
>> +#ifdef CONFIG_CMA_BUFFER_LIST
>> +struct list_head buffer_list;
>> +struct mutexlist_lock;
>> +#endif
>>  };
>>  
>> +#ifdef CONFIG_CMA_BUFFER_LIST
>> +struct cma_buffer {
>> +unsigned long pfn;
>> +unsigned long count;
>> +pid_t pid;
>> +char comm[TASK_COMM_LEN];
>> +#ifdef CONFIG_CMA_ALLOC_STACKTRACE
>> +unsigned long trace_entries[16];
>> +unsigned int nr_entries;
>> +#endif
>> +struct list_head list;
>> +};
> 
> This structure is only ever used in cma_debug.c so is there a reason
> to define it in the header file?
> 

No, there isn't. Thanks. I'll move it to cma_debug.c

>> +
>> +extern int cma_buffer_list_add(struct cma *cma, unsigned long pfn, int 
>> count);
>> +extern void cma_buffer_list_del(struct cma *cma, unsigned long pfn, int 
>> count);
>> +#else
>> +#define cma_buffer_list_add(cma, pfn, count) { }
>> +#define cma_buffer_list_del(cma, pfn, count) { }
>> +#endif /* CONFIG_CMA_BUFFER_LIST */
>> +
>>  extern struct cma cma_areas[MAX_CMA_AREAS];
>>  extern unsigned cma_area_count;
> 
> 
>> +#ifdef CONFIG_CMA_BUFFER_LIST
>> +static ssize_t cma_buffer_list_read(struct file *file, char __user *userbuf,
>> +size_t count, loff_t *ppos)
>> +{
>> +struct cma *cma = file->private_data;
>> +struct cma_buffer *cmabuf;
>> +char *buf;
>> +int ret, n = 0;
>> +#ifdef CONFIG_CMA_ALLOC_STACKTRACE
>> +struct stack_trace trace;
>> +#endif
>> +
>> +if (*ppos < 0 || !count)
>> +return -EINVAL;
>> +
>> +buf = vmalloc(count);
>> +if (!buf)
>> +return -ENOMEM;
>> +
>> +mutex_lock(>list_lock);
>> +list_for_each_entry(cmabuf, >buffer_list, list) {
>> +n += snprintf(buf + n, count - n,
>> +  "0x%llx - 0x%llx (%lu kB), allocated by pid %u 
>> (%s)\n",
>> +  (unsigned long long)PFN_PHYS(cmabuf->pfn),
>> +  (unsigned long long)PFN_PHYS(cmabuf->pfn +
>> +  cmabuf->count),
>> +  (cmabuf->count * PAGE_SIZE) >> 10, cmabuf->pid,
>> +  cmabuf->comm);
>> +
>> +#ifdef CONFIG_CMA_ALLOC_STACKTRACE
>> +trace.nr_entries = cmabuf->nr_entries;
>> +trace.entries = >trace_entries[0];
>> +n += snprint_stack_trace(buf + n, count - n, , 0);
>> +n += snprintf(buf + n, count - n, "\n");
>> +#endif
>> +}
>> +mutex_unlock(>list_lock);
>> +
>> +ret = simple_read_from_buffer(userbuf, count, ppos, buf, n);
>> +vfree(buf);
>> +
>> +return ret;
>> +}
> 
> So in practice user space must allocate buffer big enough to read the
> whole file into memory.  Calling read(2) with some count will never read
> anything past the first count bytes of the file.
> 

My fault. You are right.
I'm not sure how to do the output nice... I could use *ppos to point the
number of next list entry to read (like that is used in
read_page_owner()). But in this case the list could be changed before we
finish reading, it's bad.
Or we could use seq_files like in v1, iterating over buffer_list
entries. But seq_print_stack_trace() has to be added.
--
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Please read the FAQ at  http://www.tux.org/lkml/


[PATCH] Revert "drm/i915: Switch planes from transitional helpers to full atomic helpers"

2015-03-02 Thread Daniel Vetter
This reverts commit 3f678c96abb43a977d2ea41aefccdc49e8a3e896.

We've been a bit too optimistic with this one here :(

The trouble is that internally we're still using these plane
update/disable hooks. Which was totally ok pre-atomic since the drm
core did all the book-keeping updating and these just mostly updated
hw state. But with atomic there's lots more going on, and it causes
heaps of trouble with the load detect code.

This one specifically cause a deadlock since both the load detect code
and the nested plane atomic helper functions tried to grab the same
locks. It only blows up because of the evil tricks though we play with
the implicit ww acquire context.

Applying this revert unearths the NULL deref on already freed
framebuffer objects reported as a regression in 4.0 by various people.

Fixing this will be fairly invasive, hence revert even for the
4.1-next queue.

Cc: Matt Roper 
Cc: Linus Torvalds 
Cc: Paul Bolle 
Signed-off-by: Daniel Vetter 
---
Just to make it really clear: This is 4.1-next material. It's simply
the explanation for why we didn't notice the oops ourselves. The 4.0
oops itself looks like some glue lacking in the load detect code,
still working on that one.
-Daniel
---
 drivers/gpu/drm/i915/intel_display.c | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/drivers/gpu/drm/i915/intel_display.c 
b/drivers/gpu/drm/i915/intel_display.c
index 3156d77b2215..cc3305e30c1b 100644
--- a/drivers/gpu/drm/i915/intel_display.c
+++ b/drivers/gpu/drm/i915/intel_display.c
@@ -12179,8 +12179,8 @@ void intel_plane_destroy(struct drm_plane *plane)
 }
 
 const struct drm_plane_funcs intel_plane_funcs = {
-   .update_plane = drm_atomic_helper_update_plane,
-   .disable_plane = drm_atomic_helper_disable_plane,
+   .update_plane = drm_plane_helper_update,
+   .disable_plane = drm_plane_helper_disable,
.destroy = intel_plane_destroy,
.set_property = drm_atomic_helper_plane_set_property,
.atomic_get_property = intel_plane_atomic_get_property,
-- 
1.8.4.rc3

--
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Please read the FAQ at  http://www.tux.org/lkml/


Re: [GIT PULL] microcode loader updates

2015-03-02 Thread Quentin Casasnovas
On Mon, Mar 02, 2015 at 04:04:28PM +0100, Borislav Petkov wrote:
> 
> Ok, ok, you got me persuaded.

Oh. that's unexpected :)

> 
> Better?
> 
> :-)
> 

I prefer it, thanks!

Quentin
--
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Please read the FAQ at  http://www.tux.org/lkml/


Re: [PATCH 05/11] perf data: Add tracepoint events fields CTF conversion support

2015-03-02 Thread Arnaldo Carvalho de Melo
Em Sun, Mar 01, 2015 at 02:20:43PM +0100, Jiri Olsa escreveu:
> On Wed, Feb 25, 2015 at 04:23:44PM -0300, Arnaldo Carvalho de Melo wrote:
> > Em Fri, Feb 20, 2015 at 11:17:02PM +0100, Jiri Olsa escreveu:
> > > From: Sebastian Andrzej Siewior 
> > > Adding support to convert tracepoint event fields into CTF
> > > event fields.
> >  
> > > We parse each tracepoint event for CTF conversion and add
> > > tracepoint fields as regular CTF event fields, so they
> > > appear in babeltrace output like:
> >  
> > >   $ babeltrace ./ctf-data/
> > >   ...
> > >   [09:02:00.950703057] (+?.?) sched:sched_stat_runtime: { }, { 
> > > perf_ip = ... SNIP ... common_type = 298, common_flags = 1, \
> > >   common_preempt_count = 0, common_pid = 31813, comm = "perf", pid = 
> > > 31813, runtime = 458800, vruntime = 52059858071 }
> > >   ...
> > 
> > Processed the previous patches, everything ok:
> > 
> > [acme@ssdandy linux]$ ls -la perf.data
> > ls: cannot access perf.data: No such file or directory
> > [acme@ssdandy linux]$ trace record usleep 1
> > [ perf record: Woken up 1 times to write data ]
> > [ perf record: Captured and wrote 0.029 MB perf.data (88 samples) ]
> > [acme@ssdandy linux]$ ls -la perf.data
> > -rw---. 1 acme acme 5399896 Fev 25 16:19 perf.data
> > [acme@ssdandy linux]$ perf evlist
> > raw_syscalls:sys_enter
> > raw_syscalls:sys_exit
> > [acme@ssdandy linux]$ perf evlist -v
> > raw_syscalls:sys_enter: sample_freq=1, type: 2, config: 75, size: 104, 
> > sample_type: IP|TID|TIME|ID|CPU|PERIOD|RAW, read_format: ID, disabled: 1, 
> > inherit: 1, mmap: 1, mmap2: 1, comm: 1, comm_exec: 1, enable_on_exec: 1, 
> > task: 1, sample_id_all: 1, exclude_guest: 1
> > raw_syscalls:sys_exit: sample_freq=1, type: 2, config: 74, size: 104, 
> > sample_type: IP|TID|TIME|ID|CPU|PERIOD|RAW, read_format: ID, disabled: 1, 
> > inherit: 1, enable_on_exec: 1, sample_id_all: 1, exclude_guest: 1
> > [acme@ssdandy linux]$ perf data convert --to-ctf=./ctf-data/
> > [ perf data convert: Converted 'perf.data' into CTF data './ctf-data/' ]
> > [ perf data convert: Converted and wrote 0.009 MB (88 samples) ]
> > [acme@ssdandy linux]$ babeltrace ./ctf-data/
> > [23:48:47.557933780] (+?.?) raw_syscalls:sys_exit: { }, { perf_ip = 
> > 0x81020FBA, perf_tid = 5093, perf_pid = 5093, perf_id = 1512, 
> > perf_cpu = 3, perf_period = 1 }
> > [23:48:47.557957461] (+0.23681) raw_syscalls:sys_enter: { }, { perf_ip 
> > = 0x81020EA6, perf_tid = 5093, perf_pid = 5093, perf_id = 1504, 
> > perf_cpu = 3, perf_period = 1 }
> > [23:48:47.557958406] (+0.00945) raw_syscalls:sys_exit: { }, { perf_ip = 
> > 0x81020FBA, perf_tid = 5093, perf_pid = 5093, perf_id = 1512, 
> > perf_cpu = 3, perf_period = 1 }
> > [23:48:47.557973567] (+0.15161) raw_syscalls:sys_enter: { }, { perf_ip 
> > = 0x81020EA6, perf_tid = 5093, perf_pid = 5093, perf_id = 1504, 
> > perf_cpu = 3, perf_period = 1 }
> > [23:48:47.557976047] (+0.02480) raw_syscalls:sys_exit: { }, { perf_ip = 
> > 0x81020FBA, perf_tid = 5093, perf_pid = 5093, perf_id = 1512, 
> > perf_cpu = 3, perf_period = 1 }
> > [23:48:47.557985774] (+0.09727) raw_syscalls:sys_enter: { }, { perf_ip 
> > = 0x81020EA6, perf_tid = 5093, perf_pid = 5093, perf_id = 1504, 
> > perf_cpu = 3, perf_period = 1 }
> > [23:48:47.557990826] (+0.05052) raw_syscalls:sys_exit: { }, { perf_ip = 
> > 0x81020FBA, perf_tid = 5093, perf_pid = 5093, perf_id = 1512, 
> > perf_cpu = 3, perf_period = 1 }
> > 
> > 
> > But then I apply this patch (convert tracepoint events fields into CTF 
> > event fields) and:
> > 
> > [acme@ssdandy linux]$ perf data convert --to-ctf=./ctf-data/
> > [ perf data convert: Converted 'perf.data' into CTF data './ctf-data/' ]
> > [ perf data convert: Converted and wrote 0.009 MB (88 samples) ]
> > [acme@ssdandy linux]$ babeltrace ./ctf-data/
> > [error] Packet size (18446744073709551615 bits) is larger than remaining 
> > file size (262144 bits).
> > [error] Stream index creation error.
> > [error] Open file stream error.
> > [warning] [Context] Cannot open_trace of format ctf at path ./ctf-data.
> > [warning] [Context] cannot open trace "./ctf-data" from ./ctf-data/ for 
> > reading.
> > [error] Cannot open any trace for reading.
> > 
> > [error] opening trace "./ctf-data/" for reading.
> > 
> > [error] none of the specified trace paths could be opened.
> > 
> > [acme@ssdandy linux]$
> > 
> > It stops working.
> > 
> > [acme@ssdandy linux]$ ls -la ctf-data/
> > total 44
> > drwxrwx---.  2 acme acme41 Fev 25 16:12 .
> > drwxrwxr-x. 28 acme acme  4096 Fev 25 16:19 ..
> > -rw-rw.  1 acme acme  4666 Fev 25 16:21 metadata
> > -rw-rw.  1 acme acme 32768 Fev 25 16:21 perf_stream_0
> > [acme@ssdandy linux]$
> > 
> > Can you try to reproduce this? The ctf-data/metadata file is below:
> 
> hum, i just tried and can't reproduce this one.. anychance of mixed
> babeltrace installations? How did you install babeltrace 

Re: [PATCH 1/9] HSI: cmt_speech: Add cmt-speech driver

2015-03-02 Thread Sebastian Reichel
Hi Oliver,

On Mon, Mar 02, 2015 at 11:22:33AM +0100, Oliver Neukum wrote:
> > +static ssize_t cs_char_read(struct file *file, char __user *buf, size_t 
> > count,
> > +   loff_t *unused)
> > +{
> > +   struct cs_char *csdata = file->private_data;
> > +   u32 data;
> > +   ssize_t retval;
> > +
> > +   if (count < sizeof(data))
> > +   return -EINVAL;
> > +
> > +   for ( ; ; ) {
> > +   DEFINE_WAIT(wait);
> > +
> > +   spin_lock_bh(>lock);
> > +   if (!list_empty(>chardev_queue)) {
> > +   data = cs_pop_entry(>chardev_queue);
> > +   } else if (!list_empty(>dataind_queue)) {
> > +   data = cs_pop_entry(>dataind_queue);
> > +   --csdata->dataind_pending;
> > +
> > +   } else {
> > +   data = 0;
> > +   }
> > +   spin_unlock_bh(>lock);
> > +
> > +   if (data)
> > +   break;
> > +   if (file->f_flags & O_NONBLOCK) {
> > +   retval = -EAGAIN;
> > +   goto out;
> > +   } else if (signal_pending(current)) {
> > +   retval = -ERESTARTSYS;
> > +   goto out;
> > +   }
> > +   prepare_to_wait_exclusive(>wait, ,
> > +   TASK_INTERRUPTIBLE);
> > +   schedule();
> > +   finish_wait(>wait, );
> > +   }
> > +
> > +   retval = put_user(data, (u32 __user *)buf);
> > +   if (!retval)
> > +   retval = sizeof(data);
> > +
> > +out:
> > +   return retval;
> > +}
> > +
> > +static ssize_t cs_char_write(struct file *file, const char __user *buf,
> > +   size_t count, loff_t *unused)
> > +{
> > +   struct cs_char *csdata = file->private_data;
> > +   u32 data;
> > +   int err;
> > +   ssize_t retval;
> > +
> > +   if (count < sizeof(data))
> > +   return -EINVAL;
> > +
> > +   if (get_user(data, (u32 __user *)buf))
> > +   retval = -EFAULT;
> > +   else
> > +   retval = count;
> 
> You want to execute the command even if you got -EFAULT?
> That is highly unusual.

I will change this in PATCHv2.

> > +
> > +   err = cs_hsi_command(csdata->hi, data);
> > +   if (err < 0)
> > +   retval = err;
> > +
> > +   return retval;
> > +}
> > +
> > +static long cs_char_ioctl(struct file *file, unsigned int cmd,
> > +   unsigned long arg)
> > +{
> > +   struct cs_char *csdata = file->private_data;
> > +   int r = 0;
> > +
> > +   switch (cmd) {
> > +   case CS_GET_STATE: {
> > +   unsigned int state;
> > +
> > +   state = cs_hsi_get_state(csdata->hi);
> > +   if (copy_to_user((void __user *)arg, , sizeof(state)))
> > +   r = -EFAULT;
> > +   }
> > +   break;
> > +   case CS_SET_WAKELINE: {
> > +   unsigned int state;
> > +
> > +   if (copy_from_user(, (void __user *)arg, sizeof(state)))
> > +   r = -EFAULT;
> > +   else
> > +   cs_hsi_set_wakeline(csdata->hi, state);
> 
> No sanity checking for state?

Will be added in PATCHv2, so that -EINVAL is returned for values > 1.

> > +   }
> > +   break;
> > +   case CS_GET_IF_VERSION: {
> > +   unsigned int ifver = CS_IF_VERSION;
> > +
> > +   if (copy_to_user((void __user *)arg, , sizeof(ifver)))
> > +   r = -EFAULT;
> > +   break;
> > +   }
> > +   case CS_CONFIG_BUFS: {
> > +   struct cs_buffer_config buf_cfg;
> > +
> > +   if (copy_from_user(_cfg, (void __user *)arg,
> > +   sizeof(buf_cfg)))
> > +   r = -EFAULT;
> > +   else
> > +   r = cs_hsi_buf_config(csdata->hi, _cfg);
> 
> Sanity checking?

cs_hsi_buf_config() calls check_buf_params().

> > +   break;
> > +   }
> > +   default:
> > +   r = -ENOTTY;
> > +   break;
> > +   }
> > +
> > +   return r;
> > +}
> > +
> > +static int cs_char_mmap(struct file *file, struct vm_area_struct *vma)
> > +{
> > +   if (vma->vm_end < vma->vm_start)
> > +   return -EINVAL;
> > +
> > +   if (((vma->vm_end - vma->vm_start) >> PAGE_SHIFT) != 1)
> > +   return -EINVAL;
> > +
> > +   vma->vm_flags |= VM_RESERVED;
> > +   vma->vm_ops = _char_vm_ops;
> > +   vma->vm_private_data = file->private_data;
> > +
> > +   return 0;
> > +}
> > +
> > +static int cs_char_open(struct inode *unused, struct file *file)
> > +{
> > +   int ret = 0;
> > +
> > +   spin_lock_bh(_char_data.lock);
> > +   if (cs_char_data.opened) {
> > +   ret = -EBUSY;
> > +   spin_unlock_bh(_char_data.lock);
> > +   goto out;
> > +   }
> > +   cs_char_data.mmap_base = get_zeroed_page(GFP_ATOMIC);
> 
> This could be moved outside the locked sectionand use GFP_KERNEL.

Right, this is fixed by a follow up patch. I kept the patchset 

Re: [PATCH] x86: svm: make wbinvd faster

2015-03-02 Thread Bandan Das
Radim Krčmář  writes:

> 2015-03-01 21:29-0500, Bandan Das:
>> Joel Schopp  writes:
>> 
>> > From: David Kaplan 
>> > No need to re-decode WBINVD since we know what it is from the intercept.
>> >
>> > Signed-off-by: David Kaplan 
>> > [extracted from larger unlrelated patch, forward ported, tested]
>> > Signed-off-by: Joel Schopp 
>> > ---
>> > diff --git a/arch/x86/kvm/svm.c b/arch/x86/kvm/svm.c
>> > +static int wbinvd_interception(struct vcpu_svm *svm)
>> > +{
>> > +  kvm_emulate_wbinvd(>vcpu);
>> > +  skip_emulated_instruction(>vcpu);
>> > +  return 1;
>> > +}
>> > +
>> > +
>> Can't we merge this to kvm_emulate_wbinvd, and just call that function
>> directly for both vmx and svm ?
>
> kvm_emulate_wbinvd() lives in x86.c and skip_emulated_instruction() is
> from svm.c/vmx.c:  so we'd have to create a new x86 op and change the
> emulator code as well ... it's probably better like this.

There's already one - kvm_x86_ops->skip_emulated_instruction

>> >  static int xsetbv_interception(struct vcpu_svm *svm)
>> >  {
>> >u64 new_bv = kvm_read_edx_eax(>vcpu);
>> > @@ -3376,7 +3384,7 @@ static int (*const svm_exit_handlers[])(struct 
>> > vcpu_svm *svm) = {
>> >[SVM_EXIT_STGI] = stgi_interception,
>> >[SVM_EXIT_CLGI] = clgi_interception,
>> >[SVM_EXIT_SKINIT]   = skinit_interception,
>> > -  [SVM_EXIT_WBINVD]   = emulate_on_interception,
>> So, this means x86_emulate_insn() in emulate.c has no callers left for the
>> wbinvd case ? vmx calls kvm_emulate_wbinvd directly too..
>
> I think that invalid state emulation might still hit wbinvd.
--
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Please read the FAQ at  http://www.tux.org/lkml/


Re: [PATCH v4 2/2] cpusets,isolcpus: add file to show isolated cpus in cpuset

2015-03-02 Thread Tejun Heo
On Mon, Mar 02, 2015 at 01:44:50PM +0100, Mike Galbraith wrote:
> Hm, I'm now all system-disease-ified now (still hate the bloody thing),
> and have no problem isolating cpus via cpusets, modulo workqueues
> wanting a bat upside the head.

It shouldn't be difficult to teach workqueue pools to follow the same
rules.  This matters only for the unbound ones anyway, right?

Thanks.

-- 
tejun
--
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Please read the FAQ at  http://www.tux.org/lkml/


Re: [RFC v2 0/4] fs/locks: Use plain percpu spinlocks instead of lglock to protect file_lock

2015-03-02 Thread Jeff Layton
On Mon,  2 Mar 2015 15:25:09 +0100
Daniel Wagner  wrote:

> Hi Jeff,
> 
> I've dropped the spinlock conversion for the time beeing. Maybe the
> last patch which changes the usage of blocked_lock_lock is still
> useful. And in case I can convince of the spinlock conversion it can
> easliy done on top of it. I think it makes it also simpler to review
> doing it this after all.
> 
> cheers,
> daniel
> 
> v2:
>  - added a few lockdep assertion
>  - dropped spinlock conversion
>  
> v1:
>  - rebased on v3.19-8975-g3d88348
>  - splittet into smaller pieces
>  - fixed a wrong usage of __locks_insert/delete_block() and it's posix version
>  - added seqfile helpers to avoid ugly open coded version
> 
> 
> Original cover letter:
> 
> I am looking at how to get rid of lglock. Reason being -rt is not too
> happy with that lock, especially that it uses arch_spinlock_t and
> therefore it is not changed into a mutex on -rt. I know no change is
> accepted only fixing something for -rt alone. So here my attempt to
> make things faster for mainline and fixing -rt.
> 
> There are two users of lglock at this point. fs/locks.c and
> kernel/stop_machine.c
> 
> I presume the fs/locks is the more interesting one in respect of
> performance. Let's have a look at that one first.
> 
> The lglock version of file_lock_lock is used in combination of
> blocked_lock_lock to protect file_lock's fl_link, fl_block, fl_next,
> blocked_hash and the percpu file_lock_list.
> 
> The plan is to reorganize the usage of the locks and what they protect
> so that the usage of the global blocked_lock_lock is reduced.
> 
> Whenever we insert a new lock we are going to grab besides the i_lock
> also the corresponding percpu file_lock_lock. The global
> blocked_lock_lock is only used when blocked_hash is involved.
> 
> file_lock_list exists to be being able to produce the content of
> /proc/locks. For listing the all locks it seems a bit excessive to
> grab all locks at once. We should be okay just grabbing the
> corresponding lock when iterating over the percpu file_lock_list.
> 
> file_lock_lock protects now file_lock_list and fl_link, fl_block and
> fl_next allone. That means we need to define which file_lock_lock is
> used for all waiters. Luckely, fl_link_cpu can be reused for fl_block
> and fl_next.
> 
> I haven't found a good way around for the open coded seq_ops
> (locks_start, locks_next, locks_stop). Maybe someone has good idea how
> to handle with the locks.
> 
> For performance testing I used
> git://git.samba.org/jlayton/lockperf.git and for correctness
> https://github.com/linux-test-project/ltp/tree/master/testcases/network/nfsv4/locks
> In case you are missing the posix03 results, my machine doesn't like
> it too much. The load brings it to its knees due to the very high
> load. Propably I need different parameters.
> 
> I didn't run excessive tests so far, because I am waiting for getting
> access on a bigger box compared to my small i7-4850HQ system. I hope
> to see larger improvements when there are more cores involved.
> 
> [...]
> 
> Cc: Alexander Viro 
> Cc: Jeff Layton 
> Cc: "J. Bruce Fields" 
> Cc: linux-fsde...@vger.kernel.org
> Cc: linux-kernel@vger.kernel.org
> 
> Daniel Wagner (4):
>   locks: Remove unnecessary IS_POSIX test
>   locks: Add lockdep assertion for blocked_lock_lock
>   locks: Split insert/delete block functions into flock/posix parts
>   locks: Use blocked_lock_lock only to protect blocked_hash
> 
>  fs/locks.c | 124 
> +
>  1 file changed, 84 insertions(+), 40 deletions(-)
> 

These look good at first glance, but I do need to go over patches 3 and
4 in more detail.

FWIW, usually when I see "RFC" in the subject, I take it as a hint that
this is still work-in-progress and that you're looking for early feedback
on it, and hence they it shouldn't be merged yet. Is that the case
here, or would I be OK to merge these?

-- 
Jeff Layton 
--
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Please read the FAQ at  http://www.tux.org/lkml/


Re: [PATCH v2 2/2] cgroups: add an nproc subsystem

2015-03-02 Thread Tejun Heo
Hello,

On Fri, Feb 27, 2015 at 03:17:19PM +1100, Aleksa Sarai wrote:
> +config CGROUP_NPROC
> + bool "Process number limiting on cgroups"
> + depends on PAGE_COUNTER
> + help
> +   This options enables the setting of process number limits in the scope
> +   of a cgroup. Any attempt to fork more processes than is allowed in the
> +   cgroup will fail. This allows for more basic resource limitation that
> +   applies to a cgroup, similar to RLIMIT_NPROC (except that instead of
> +   applying to a process tree it applies to a cgroup).

Please reflect the rationale from this discussion thread in the commit
message and help text.  Also, I'd much prefer to name it pids
controller after the resource it's controlling.

> +struct nproc {
> + struct page_counter proc_counter;

I don't think it's a good idea to use page_counter outside memcg.
This is pretty much an implementation detail of memcg.  The only
reason that file is out there is because of the wacky tcp controller
which is somewhat part of memcg (and to be replaced by proper kmemcg).
Either use plain atomic_t or percpu_counter with controlled batch
value (e.g. upto 10% deviation allowed from the target or sth).  Given
that fork/exit is pretty heavy path, just plain atomic_t is prolly
enough.

> +static int nproc_can_attach(struct cgroup_subsys_state *css,
> + struct cgroup_taskset *tset)
> +{
> + struct nproc *nproc = css_nproc(css);
> + unsigned long num_tasks = 0;
> + struct task_struct *task;
> +
> + cgroup_taskset_for_each(task, tset)
> + num_tasks++;
> +
> + return nproc_add_procs(nproc, num_tasks);
> +}

can_attach() can't fail in the unified hierarchy.  Circumvention of
configuration by moving processes to children is prevented through
hierarchical limit enforcement.

> +static int nproc_write_limit(struct cgroup_subsys_state *css,
> +  struct cftype *cft, u64 val)
> +{
> + struct nproc *nproc = css_nproc(css);
> +
> + return page_counter_limit(>proc_counter, val);
> +}

Please make it handle "max".

> +static u64 nproc_read_limit(struct cgroup_subsys_state *css,
> + struct cftype *cft)
> +{
> + struct nproc *nproc = css_nproc(css);
> +
> + return nproc->proc_counter.limit;
> +}

Ditto when reading back.

> +static u64 nproc_read_max_limit(struct cgroup_subsys_state *css,
> +struct cftype *cft)
> +{
> + return PAGE_COUNTER_MAX;
> +}

And drop this file.

> +static u64 nproc_read_usage(struct cgroup_subsys_state *css,
> + struct cftype *cft)
> +{
> + struct nproc *nproc = css_nproc(css);
> +
> + return page_counter_read(>proc_counter);
> +}
> +
> +static struct cftype files[] = {
> + {
> + .name = "limit",
> + .write_u64 = nproc_write_limit,
> + .read_u64 = nproc_read_limit,
> + },

pids.max

> + {
> + .name = "max_limit",
> + .read_u64 = nproc_read_max_limit,
> + },
> + {
> + .name = "usage",
> + .read_u64 = nproc_read_usage,
> + },

pids.current

Thanks.

-- 
tejun
--
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Please read the FAQ at  http://www.tux.org/lkml/


Re: [PATCH 21/32] wireless: use %*pb[l] to print bitmaps including cpumasks and nodemasks

2015-03-02 Thread Kalle Valo
Tejun Heo  writes:

> printk and friends can now formap bitmaps using '%*pb[l]'.  cpumask
> and nodemask also provide cpumask_pr_args() and nodemask_pr_args()
> respectively which can be used to generate the two printf arguments
> necessary to format the specified cpu/nodemask.
>
> This patch is dependent on the following two patches.
>
>  lib/vsprintf: implement bitmap printing through '%*pb[l]'
>  cpumask, nodemask: implement cpumask/nodemask_pr_args()
>
> Please wait till the forementioned patches are merged to mainline
> before applying to subsystem trees.
>
> Signed-off-by: Tejun Heo 
> Cc: Andrew Morton 
> Cc: "John W. Linville" 
> Cc: linux-wirel...@vger.kernel.org

This was already applied so I'm dropping it from my queue.

-- 
Kalle Valo
--
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Please read the FAQ at  http://www.tux.org/lkml/


Re: [PATCH] perf tools: Improve 'libbabel' feature check failure message

2015-03-02 Thread Arnaldo Carvalho de Melo
Em Sat, Feb 28, 2015 at 10:18:49AM +0100, Ingo Molnar escreveu:
> 
> On Debian-ish systems libbabeltrace-dev should be suggested as a 
> package install as well.
> 
> Signed-off-by: Ingo Molnar 
> 
> diff --git a/tools/perf/.gitignore b/tools/perf/.gitignore
> index 40399c3d97d6..68328f517a2e 100644
> --- a/tools/perf/.gitignore
> +++ b/tools/perf/.gitignore

This definetely is not .gitignore ;-) Fixing it up...

- Arnaldo

> @@ -707,7 +706,7 @@ endif
>  
>  ifndef NO_LIBBABELTRACE
>ifeq ($(feature-libbabeltrace), 0)
> -msg := $(warning No libbabeltrace found, disables 'perf data' CTF format 
> support, please install libbabeltrace-devel/libbabeltrace-ctf-dev);
> +msg := $(warning No libbabeltrace found, disables 'perf data' CTF format 
> support, please install libbabeltrace-dev[el]/libbabeltrace-ctf-dev);
>  NO_LIBBABELTRACE := 1
>else
>  CFLAGS += -DHAVE_LIBBABELTRACE_SUPPORT $(LIBBABELTRACE_CFLAGS)
--
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Please read the FAQ at  http://www.tux.org/lkml/


[PATCH] keyboard/tc3589x-keypad.c: set IRQF_ONESHOT flag to ensure IRQ request

2015-03-02 Thread Valentin Rothberg
Since commit 1c6c69525b40eb76de8adf039409722015927dc3 ("genirq: Reject
bogus threaded irq requests") threaded IRQs without a primary handler
need to be requested with IRQF_ONESHOT, otherwise the request will fail.

Currently, plat->irqtype is only set to IRQF_TRIGGER_FALLING.  This
patch sets the ONESHOT flag directly in request_threaded_irq() to
enforce the flag without being affected by future changes to
plat->irqtype.

Generated by: scripts/coccinelle/misc/irqf_oneshot.cocci

Signed-off-by: Valentin Rothberg 
---
 drivers/input/keyboard/tc3589x-keypad.c | 6 +++---
 1 file changed, 3 insertions(+), 3 deletions(-)

diff --git a/drivers/input/keyboard/tc3589x-keypad.c 
b/drivers/input/keyboard/tc3589x-keypad.c
index 8ff612d..5639325 100644
--- a/drivers/input/keyboard/tc3589x-keypad.c
+++ b/drivers/input/keyboard/tc3589x-keypad.c
@@ -411,9 +411,9 @@ static int tc3589x_keypad_probe(struct platform_device 
*pdev)
 
input_set_drvdata(input, keypad);
 
-   error = request_threaded_irq(irq, NULL,
-   tc3589x_keypad_irq, plat->irqtype,
-   "tc3589x-keypad", keypad);
+   error = request_threaded_irq(irq, NULL, tc3589x_keypad_irq,
+plat->irqtype | IRQF_ONESHOT,
+"tc3589x-keypad", keypad);
if (error < 0) {
dev_err(>dev,
"Could not allocate irq %d,error %d\n",
-- 
1.9.1

--
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Please read the FAQ at  http://www.tux.org/lkml/


Re: [RFC PATCH 0/4] make memtest a generic kernel feature

2015-03-02 Thread Baruch Siach
Hi Vladimir,

On Mon, Mar 02, 2015 at 02:55:41PM +, Vladimir Murzin wrote:
> Memtest is a simple feature which fills the memory with a given set of
> patterns and validates memory contents, if bad memory regions is detected it
> reserves them via memblock API. Since memblock API is widely used by other
> architectures this feature can be enabled outside of x86 world.
> 
> This patch set promotes memtest to live under generic mm umbrella and enables
> memtest feature for arm/arm64.

Please update the architectures list in the 'memtest' entry at 
Documentation/kernel-parameters.txt.

baruch

-- 
 http://baruch.siach.name/blog/  ~. .~   Tk Open Systems
=}ooO--U--Ooo{=
   - bar...@tkos.co.il - tel: +972.2.679.5364, http://www.tkos.co.il -
--
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Please read the FAQ at  http://www.tux.org/lkml/


Re: [PATCH] perf tools: Improve Python feature detection messages

2015-03-02 Thread Arnaldo Carvalho de Melo
Em Sat, Feb 28, 2015 at 09:33:45AM +0100, Ingo Molnar escreveu:
> 
> Change the Python detection message from:
> 
>   config/Makefile:566: No python-config tool was found
>   config/Makefile:566: Python support will not be built
> 
> To:
> 
>   config/Makefile:565: No 'python-config' tool was found: disables Python 
> support - please install python-devel/python-dev
> 
> It's now a standard one-line message with a package install 
> suggestion, and it also uses the standard language used by other 
> feature detection messages.
> 
> Signed-off-by: Ingo Molnar 

These patches came with no --- separating the log message from the patch
and the following hunk has no line number info for where to apply the
first hunk, fixing these up for you.

- Arnaldo
 
> diff --git a/tools/perf/config/Makefile b/tools/perf/config/Makefile
> index c3570b5f3bf3..44a14bd40e96 100644
> --- a/tools/perf/config/Makefile
> +++ b/tools/perf/config/Makefile
>  disable-python = $(eval $(disable-python_code))
>  define disable-python_code
>CFLAGS += -DNO_LIBPYTHON
> -  $(if $(1),$(warning No $(1) was found))
> -  $(warning Python support will not be built)
> +  $(warning $1)
>NO_LIBPYTHON := 1
>  endef
>  
>  ifdef NO_LIBPYTHON
> -  $(call disable-python)
> +  $(call disable-python,Python support disabled by user)
>  else
>  
>ifndef PYTHON
> -$(call disable-python,python interpreter)
> +$(call disable-python,No python interpreter was found: disables Python 
> support - please install python-devel/python-dev)
>else
>  PYTHON_WORD := $(call shell-wordify,$(PYTHON))
>  
>  ifndef PYTHON_CONFIG
> -  $(call disable-python,python-config tool)
> +  $(call disable-python,No 'python-config' tool was found: disables 
> Python support - please install python-devel/python-dev)
>  else
>  
>PYTHON_CONFIG_SQ := $(call shell-sq,$(PYTHON_CONFIG))
> @@ -575,7 +574,7 @@ else
>FLAGS_PYTHON_EMBED := $(PYTHON_EMBED_CCOPTS) $(PYTHON_EMBED_LDOPTS)
>  
>ifneq ($(feature-libpython), 1)
> -$(call disable-python,Python.h (for Python 2.x))
> +$(call disable-python,No 'Python.h' (for Python 2.x support) was 
> found: disables Python support - please install python-devel/python-dev)
>else
>  
>  ifneq ($(feature-libpython-version), 1)
--
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Please read the FAQ at  http://www.tux.org/lkml/


Re: [PATCH v3] brcmfmac: avoid duplicated suspend/resume operation

2015-03-02 Thread Kalle Valo
Arend van Spriel  writes:

>> Now that there is not 3.20 version. My understanding is that this
>> patch will be in linus' tree 4.1-rc1, right?
>
> Yes. It will go into linux-next first, which you can consider to be an
> incubator where all stuff for the next release is integrated. Stuff
> will be added there until 4.0 is released. At that moment the merge
> window starts which moves all the stuff from linux-next into the
> mainline linux repo to prepare 4.1-rc1.
>
> Now regarding your patch I have to give a heads up. Our pending
> patches have been applied by Kalle and includes similar fix.

Yeah, Zhonghui's patch doesn't apply anymore. There is similar code in
wireless-drivers-next but still a bit different. So what should we do?
Is the driver ok now?

-- 
Kalle Valo
--
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Please read the FAQ at  http://www.tux.org/lkml/


[PATCH v2 4/9] introduce struct slot_area to manage randomization slot info

2015-03-02 Thread Baoquan He
Kernel is expected to be randomly reloaded anywhere in the whole
physical memory area, it could be near 64T at most. In this case
there could be about 4*1024*1024 randomization slots. Hence the
old slot array will cost too much memory and can not be used any
more.

Here introduce struct slot_area to manage randomization slot info
in one contiguous memory area excluding the avoid area. slot_areas
is used to store all slot area info. Since setup_data is a linked
list, could contain many datas by pointer to point one by one,
excluding them will split RAM memory into many smaller areas, here
only take the first 100 slot areas if too many of them.

Signed-off-by: Baoquan He 
---
 arch/x86/boot/compressed/aslr.c | 12 
 1 file changed, 12 insertions(+)

diff --git a/arch/x86/boot/compressed/aslr.c b/arch/x86/boot/compressed/aslr.c
index 6a22129..26610a2 100644
--- a/arch/x86/boot/compressed/aslr.c
+++ b/arch/x86/boot/compressed/aslr.c
@@ -215,8 +215,20 @@ static bool mem_avoid_overlap(struct mem_vector *img)
 
 static unsigned long slots[CONFIG_RANDOMIZE_BASE_MAX_OFFSET /
   CONFIG_PHYSICAL_ALIGN];
+
+struct slot_area {
+   unsigned long addr;
+   int num;
+};
+
+#define MAX_SLOT_AREA 100
+
+static struct slot_area slot_areas[MAX_SLOT_AREA];
+
 static unsigned long slot_max;
 
+static unsigned long slot_area_index;
+
 static void slots_append(unsigned long addr)
 {
/* Overflowing the slots list should be impossible. */
-- 
1.9.3

--
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Please read the FAQ at  http://www.tux.org/lkml/


Re: [PATCH] perf tools: Fix build error on ARCH=i386/x86_64/sparc64

2015-03-02 Thread Arnaldo Carvalho de Melo
Em Mon, Mar 02, 2015 at 10:56:00AM +0100, Jiri Olsa escreveu:
> On Mon, Mar 02, 2015 at 01:31:03PM +0900, Namhyung Kim wrote:
> > He Kuang reported that current perf tools failed to build when ARCH
> > variable was given like above.  It was because the name is different
> > that internal directory name.  I can see that David's sparc64 build
> > has same problem.  So fix it by applying the sed conversion script to
> > the command line ARCH variable also, and fixing the converted name
> > there (i.e. i386/x86_64 -> x86, sparc64 -> sparc).
> > 
> > Reported-and-tested-by: He Kuang 
> > Cc: David Ahern 
> > Signed-off-by: Namhyung Kim 
> 
> Acked-by: Jiri Olsa 

Applied after resolving conflict with David's sparc64->sparc patch, that
had already been applied and merged by Ingo.

- Arnaldo
 
> thanks,
> jirka
> 
> > ---
> >  tools/perf/config/Makefile.arch | 23 +--
> >  1 file changed, 5 insertions(+), 18 deletions(-)
> > 
> > diff --git a/tools/perf/config/Makefile.arch 
> > b/tools/perf/config/Makefile.arch
> > index ff95a68741d1..e9720571341d 100644
> > --- a/tools/perf/config/Makefile.arch
> > +++ b/tools/perf/config/Makefile.arch
> > @@ -1,28 +1,15 @@
> > +ifndef ARCH
> > +ARCH := $(shell uname -m 2>/dev/null || echo not)
> > +endif
> >  
> > -uname_M := $(shell uname -m 2>/dev/null || echo not)
> > -
> > -RAW_ARCH := $(shell echo $(uname_M) | sed -e s/i.86/i386/ -e 
> > s/sun4u/sparc64/ \
> > +ARCH := $(shell echo $(ARCH) | sed -e s/i.86/x86/ -e s/x86_64/x86/ \
> > +  -e s/sun4u/sparc/ -e s/sparc64/sparc/ \
> >-e s/arm.*/arm/ -e s/sa110/arm/ \
> >-e s/s390x/s390/ -e s/parisc64/parisc/ \
> >-e s/ppc.*/powerpc/ -e s/mips.*/mips/ \
> >-e s/sh[234].*/sh/ -e s/aarch64.*/arm64/ 
> > \
> >-e s/tile.*/tile/ )
> >  
> > -# Additional ARCH settings for x86
> > -ifeq ($(RAW_ARCH),i386)
> > -  ARCH ?= x86
> > -endif
> > -
> > -ifeq ($(RAW_ARCH),x86_64)
> > -  ARCH ?= x86
> > -
> > -  ifneq (, $(findstring m32,$(CFLAGS)))
> > -RAW_ARCH := x86_32
> > -  endif
> > -endif
> > -
> > -ARCH ?= $(RAW_ARCH)
> > -
> >  LP64 := $(shell echo __LP64__ | ${CC} ${CFLAGS} -E -x c - | tail -n 1)
> >  ifeq ($(LP64), 1)
> >IS_64_BIT := 1
> > -- 
> > 2.2.2
> > 
--
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Please read the FAQ at  http://www.tux.org/lkml/


Re: [GIT PULL] microcode loader updates

2015-03-02 Thread Borislav Petkov
On Mon, Mar 02, 2015 at 02:42:12PM +0100, Quentin Casasnovas wrote:
> It's just that this potential-but-very-very-likely-impossible kfree() on
> garbage wasn't present in the original code - so I thought changing the
> kmalloc() => kcalloc() was small enough to add in your serie.  I'd also be
> fine removing the early loop termination condition if you think it's dead
> code since that'll make sure this will never happen.  A static analyzer or
> maybe some cocinnelle semantic patches are likely to start complaining
> about this otherwise, I think.

Ok, ok, you got me persuaded.

---
diff --git a/arch/x86/kernel/cpu/microcode/intel_early.c 
b/arch/x86/kernel/cpu/microcode/intel_early.c
index 3fd583b4f576..2f49ab4ac0ae 100644
--- a/arch/x86/kernel/cpu/microcode/intel_early.c
+++ b/arch/x86/kernel/cpu/microcode/intel_early.c
@@ -189,8 +189,7 @@ save_microcode(struct mc_saved_data *mc_saved_data,
/*
 * Copy new microcode data.
 */
-   saved_ptr = kmalloc(mc_saved_count * sizeof(struct microcode_intel *),
-GFP_KERNEL);
+   saved_ptr = kcalloc(mc_saved_count, sizeof(struct microcode_intel *), 
GFP_KERNEL);
if (!saved_ptr)
return -ENOMEM;
--

Better?

:-)

-- 
Regards/Gruss,
Boris.

ECO tip #101: Trim your mails when you reply.
--
--
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Please read the FAQ at  http://www.tux.org/lkml/


[PATCH v2 5/9] add mem_min_overlap to find the first avoid region within a memory region

2015-03-02 Thread Baoquan He
Given a memory region mem_min_overlap will iterate all avoid region
to find the first one which overlap with it. This function will be
used later.

Signed-off-by: Baoquan He 
---
 arch/x86/boot/compressed/aslr.c | 32 
 1 file changed, 32 insertions(+)

diff --git a/arch/x86/boot/compressed/aslr.c b/arch/x86/boot/compressed/aslr.c
index 26610a2..ded3959 100644
--- a/arch/x86/boot/compressed/aslr.c
+++ b/arch/x86/boot/compressed/aslr.c
@@ -213,6 +213,38 @@ static bool mem_avoid_overlap(struct mem_vector *img)
return false;
 }
 
+static unsigned long mem_min_overlap(struct mem_vector *img, struct mem_vector 
*out)
+{
+   int i;
+   struct setup_data *ptr;
+   unsigned long min = img->start + img->size;
+
+   for (i = 0; i < MEM_AVOID_MAX; i++) {
+   if (mem_overlaps(img, _avoid[i]) && (mem_avoid[i].start < 
min) ) {
+   *out = mem_avoid[i];
+   min = mem_avoid[i].start;
+   }
+   }
+
+   /* Check all entries in the setup_data linked list. */
+   ptr = (struct setup_data *)(unsigned long)real_mode->hdr.setup_data;
+   while (ptr) {
+   struct mem_vector avoid;
+
+   avoid.start = (unsigned long)ptr;
+   avoid.size = sizeof(*ptr) + ptr->len;
+
+   if (mem_overlaps(img, ) && (avoid.start < min) ) {
+   *out = avoid;
+   min = avoid.start;
+   }
+
+   ptr = (struct setup_data *)(unsigned long)ptr->next;
+   }
+
+   return min;
+}
+
 static unsigned long slots[CONFIG_RANDOMIZE_BASE_MAX_OFFSET /
   CONFIG_PHYSICAL_ALIGN];
 
-- 
1.9.3

--
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Please read the FAQ at  http://www.tux.org/lkml/


[PATCH v2 1/9] remove a unused function parameter

2015-03-02 Thread Baoquan He
Make a clean up to simplify the later change.

Signed-off-by: Baoquan He 
---
 arch/x86/boot/compressed/aslr.c | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/arch/x86/boot/compressed/aslr.c b/arch/x86/boot/compressed/aslr.c
index 7083c16..6a22129 100644
--- a/arch/x86/boot/compressed/aslr.c
+++ b/arch/x86/boot/compressed/aslr.c
@@ -142,7 +142,7 @@ static bool mem_overlaps(struct mem_vector *one, struct 
mem_vector *two)
 }
 
 static void mem_avoid_init(unsigned long input, unsigned long input_size,
-  unsigned long output, unsigned long output_size)
+  unsigned long output_size)
 {
u64 initrd_start, initrd_size;
u64 cmd_line, cmd_line_size;
@@ -349,7 +349,7 @@ unsigned char *choose_kernel_location(struct boot_params 
*params,
 
/* Record the various known unsafe memory ranges. */
mem_avoid_init((unsigned long)input, input_size,
-  (unsigned long)output, output_size);
+  output_size);
 
/* Walk e820 and find a random address. */
random = find_random_addr(choice, output_size);
-- 
1.9.3

--
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Please read the FAQ at  http://www.tux.org/lkml/


[PATCH v2 6/9] change process_e820_entry to store slot info into slot_area

2015-03-02 Thread Baoquan He
For passed in e820 entry, check if it is overlapped with avoid area.
If no just calculate and store slot info into related slot_area.
Otherwise iterate all avoid areas to find the first one, namely with
lowest starting address among all overlapped avoid areas. Then split
it by exluding that avoid area and store slot info of the preceding
part, then process the left part just as a new e820 memory region.
Repeat this till the whole e820 memory region is processed.

Signed-off-by: Baoquan He 
---
 arch/x86/boot/compressed/aslr.c | 71 ++---
 1 file changed, 52 insertions(+), 19 deletions(-)

diff --git a/arch/x86/boot/compressed/aslr.c b/arch/x86/boot/compressed/aslr.c
index ded3959..1c6fb31 100644
--- a/arch/x86/boot/compressed/aslr.c
+++ b/arch/x86/boot/compressed/aslr.c
@@ -280,20 +280,18 @@ static unsigned long slots_fetch_random(void)
return slots[get_random_long() % slot_max];
 }
 
-static void process_e820_entry(struct e820entry *entry,
+static int process_e820_entry(struct e820entry *entry,
   unsigned long minimum,
   unsigned long image_size)
 {
-   struct mem_vector region, img;
+   struct mem_vector region, img, out;
+   struct slot_area slot_area;
+   unsigned long min, start_orig;
 
/* Skip non-RAM entries. */
if (entry->type != E820_RAM)
return;
 
-   /* Ignore entries entirely above our maximum. */
-   if (entry->addr >= CONFIG_RANDOMIZE_BASE_MAX_OFFSET)
-   return;
-
/* Ignore entries entirely below our minimum. */
if (entry->addr + entry->size < minimum)
return;
@@ -305,6 +303,14 @@ static void process_e820_entry(struct e820entry *entry,
if (region.start < minimum)
region.start = minimum;
 
+repeat:
+
+   /* Return if slot area array is full */
+   if ( slot_area_index == MAX_SLOT_AREA )
+   return;
+
+   start_orig = region.start;
+
/* Potentially raise address to meet alignment requirements. */
region.start = ALIGN(region.start, CONFIG_PHYSICAL_ALIGN);
 
@@ -313,20 +319,47 @@ static void process_e820_entry(struct e820entry *entry,
return;
 
/* Reduce size by any delta from the original address. */
-   region.size -= region.start - entry->addr;
-
-   /* Reduce maximum size to fit end of image within maximum limit. */
-   if (region.start + region.size > CONFIG_RANDOMIZE_BASE_MAX_OFFSET)
-   region.size = CONFIG_RANDOMIZE_BASE_MAX_OFFSET - region.start;
-
-   /* Walk each aligned slot and check for avoided areas. */
-   for (img.start = region.start, img.size = image_size ;
-mem_contains(, ) ;
-img.start += CONFIG_PHYSICAL_ALIGN) {
-   if (mem_avoid_overlap())
-   continue;
-   slots_append(img.start);
+   region.size -= region.start - start_orig;
+
+   if ( region.size < image_size )
+   return;
+
+   if (!mem_avoid_overlap()) {
+   slot_area.addr = region.start;
+   if ( image_size <= CONFIG_PHYSICAL_ALIGN)
+   slot_area.num = region.size / CONFIG_PHYSICAL_ALIGN;
+   else
+   slot_area.num = ( region.size - image_size ) /
+   CONFIG_PHYSICAL_ALIGN + 1;
+
+   if (slot_area.num > 0) {
+   slot_areas[slot_area_index++] = slot_area;
+   slot_max += slot_area.num;
+   }
+   return;
}
+
+   min = mem_min_overlap(, );
+
+   if ( min > region.start + image_size ) {
+   unsigned long size = min - region.start;
+
+   slot_area.addr = region.start;
+   if ( image_size <= CONFIG_PHYSICAL_ALIGN)
+slot_area.num = (min - region.start ) / 
CONFIG_PHYSICAL_ALIGN;
+else
+slot_area.num = ( min - region.start - image_size ) /
+CONFIG_PHYSICAL_ALIGN + 1;
+
+   if (slot_area.num > 0) {
+   slot_areas[slot_area_index++] = slot_area;
+   slot_max += slot_area.num;
+   }
+   }
+
+   region.size -= out.start - region.start + out.size;
+   region.start = out.start + out.size;
+   goto repeat;
 }
 
 static unsigned long find_random_addr(unsigned long minimum,
-- 
1.9.3

--
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Please read the FAQ at  http://www.tux.org/lkml/


[PATCH v2 9/9] change the relocations behavior for kaslr on x86_64

2015-03-02 Thread Baoquan He
On x86_64, in old kaslr implementaion only physical address of kernel
loading is randomized. Then calculate the delta of physical address
where vmlinux was linked to load and where it is finally loaded. If
delta is not equal to 0, namely there's a new physical address where
kernel is actually decompressed, relocation handling need be done. Then
delta is added to offset of kernel symbol relocation, this makes the
address of kernel text mapping move delta long.

Here the behavior is changed. Randomize both the physical address
where kernel is decompressed and the virtual address where kernel text
is mapped. And physical address can be randomized from where vmlinux
was linked to load to maximum physical memory, possibly near 64T. While
virtual address can get a random offset from load address to
CONFIG_RANDOMIZE_BASE_MAX_OFFSET, then added to __START_KERNEL_map.

And relocation handling only depends on virtual address randomization.
Means if and only if virtual address is randomized to a different value,
we add the delta to the offset of kernel relocs.

Signed-off-by: Baoquan He 
---
 arch/x86/boot/compressed/aslr.c | 29 ++---
 arch/x86/boot/compressed/misc.c | 34 +-
 arch/x86/boot/compressed/misc.h | 22 --
 3 files changed, 47 insertions(+), 38 deletions(-)

diff --git a/arch/x86/boot/compressed/aslr.c b/arch/x86/boot/compressed/aslr.c
index 332a8c4..3114ae0 100644
--- a/arch/x86/boot/compressed/aslr.c
+++ b/arch/x86/boot/compressed/aslr.c
@@ -426,13 +426,13 @@ static void add_kaslr_setup_data(struct boot_params 
*params, __u8 enabled)
 
 }
 
-unsigned char *choose_kernel_location(struct boot_params *params,
- unsigned char *input,
- unsigned long input_size,
- unsigned char *output,
- unsigned long output_size)
+void choose_kernel_location(struct boot_params *params,
+   unsigned char *input,
+   unsigned long input_size,
+   unsigned char **output,
+   unsigned long output_size,
+   unsigned char **virt_offset)
 {
-   unsigned long choice = (unsigned long)output;
unsigned long random;
 
 #ifdef CONFIG_HIBERNATION
@@ -455,17 +455,16 @@ unsigned char *choose_kernel_location(struct boot_params 
*params,
   output_size);
 
/* Walk e820 and find a random address. */
-   random = find_random_addr(choice, output_size);
-   if (!random) {
+   random = find_random_phy_addr((unsigned long)*output, output_size);
+   if (!random)
debug_putstr("KASLR could not find suitable E820 region...\n");
-   goto out;
-   }
+   else
+   *output = (unsigned char*)random;
 
-   /* Always enforce the minimum. */
-   if (random < choice)
-   goto out;
+   random = find_random_virt_offset(LOAD_PHYSICAL_ADDR, output_size);
+   *virt_offset = (unsigned char*)random;
 
-   choice = random;
 out:
-   return (unsigned char *)choice;
+   if (!random)
+   *virt_offset = (unsigned char*)LOAD_PHYSICAL_ADDR;
 }
diff --git a/arch/x86/boot/compressed/misc.c b/arch/x86/boot/compressed/misc.c
index c9d8187..53bb2dc 100644
--- a/arch/x86/boot/compressed/misc.c
+++ b/arch/x86/boot/compressed/misc.c
@@ -236,7 +236,8 @@ static void error(char *x)
 }
 
 #if CONFIG_X86_NEED_RELOCS
-static void handle_relocations(void *output, unsigned long output_len)
+static void handle_relocations(void *output, unsigned long output_len,
+   void *virt_offset)
 {
int *reloc;
unsigned long delta, map, ptr;
@@ -248,11 +249,6 @@ static void handle_relocations(void *output, unsigned long 
output_len)
 * and where it was actually loaded.
 */
delta = min_addr - LOAD_PHYSICAL_ADDR;
-   if (!delta) {
-   debug_putstr("No relocation needed... ");
-   return;
-   }
-   debug_putstr("Performing relocations... ");
 
/*
 * The kernel contains a table of relocation addresses. Those
@@ -263,6 +259,16 @@ static void handle_relocations(void *output, unsigned long 
output_len)
 */
map = delta - __START_KERNEL_map;
 
+   /*  */
+   if (IS_ENABLED(CONFIG_X86_64))
+   delta = (unsigned long)virt_offset - LOAD_PHYSICAL_ADDR;
+
+   if (!delta) {
+   debug_putstr("No relocation needed... ");
+   return;
+   }
+   debug_putstr("Performing relocations... ");
+
/*
 * Process relocations: 32 bit relocations first then 64 bit after.
 * Three sets of binary relocations are added to the end of the kernel
@@ -316,7 +322,8 @@ static void handle_relocations(void *output, 

[RFC PATCH 1/4] mm: move memtest under /mm

2015-03-02 Thread Vladimir Murzin
There is nothing platform dependent in the core memtest code, so other platform
might benefit of this feature too.

Signed-off-by: Vladimir Murzin 
---
 arch/x86/Kconfig|   11 
 arch/x86/include/asm/e820.h |8 ---
 arch/x86/mm/Makefile|2 -
 arch/x86/mm/memtest.c   |  118 ---
 include/linux/memblock.h|8 +++
 lib/Kconfig.debug   |   11 
 mm/Makefile |1 +
 mm/memtest.c|  118 +++
 8 files changed, 138 insertions(+), 139 deletions(-)
 delete mode 100644 arch/x86/mm/memtest.c
 create mode 100644 mm/memtest.c

diff --git a/arch/x86/Kconfig b/arch/x86/Kconfig
index c2fb8a8..a8a8a86 100644
--- a/arch/x86/Kconfig
+++ b/arch/x86/Kconfig
@@ -715,17 +715,6 @@ endif #HYPERVISOR_GUEST
 config NO_BOOTMEM
def_bool y
 
-config MEMTEST
-   bool "Memtest"
-   ---help---
- This option adds a kernel parameter 'memtest', which allows memtest
- to be set.
-   memtest=0, mean disabled; -- default
-   memtest=1, mean do 1 test pattern;
-   ...
-   memtest=4, mean do 4 test patterns.
- If you are unsure how to answer this question, answer N.
-
 source "arch/x86/Kconfig.cpu"
 
 config HPET_TIMER
diff --git a/arch/x86/include/asm/e820.h b/arch/x86/include/asm/e820.h
index 779c2ef..3ab0537 100644
--- a/arch/x86/include/asm/e820.h
+++ b/arch/x86/include/asm/e820.h
@@ -40,14 +40,6 @@ static inline void e820_mark_nosave_regions(unsigned long 
limit_pfn)
 }
 #endif
 
-#ifdef CONFIG_MEMTEST
-extern void early_memtest(unsigned long start, unsigned long end);
-#else
-static inline void early_memtest(unsigned long start, unsigned long end)
-{
-}
-#endif
-
 extern unsigned long e820_end_of_ram_pfn(void);
 extern unsigned long e820_end_of_low_ram_pfn(void);
 extern u64 early_reserve_e820(u64 sizet, u64 align);
diff --git a/arch/x86/mm/Makefile b/arch/x86/mm/Makefile
index c4cc740..a482d10 100644
--- a/arch/x86/mm/Makefile
+++ b/arch/x86/mm/Makefile
@@ -32,6 +32,4 @@ obj-$(CONFIG_AMD_NUMA)+= amdtopology.o
 obj-$(CONFIG_ACPI_NUMA)+= srat.o
 obj-$(CONFIG_NUMA_EMU) += numa_emulation.o
 
-obj-$(CONFIG_MEMTEST)  += memtest.o
-
 obj-$(CONFIG_X86_INTEL_MPX)+= mpx.o
diff --git a/arch/x86/mm/memtest.c b/arch/x86/mm/memtest.c
deleted file mode 100644
index 1e9da79..000
--- a/arch/x86/mm/memtest.c
+++ /dev/null
@@ -1,118 +0,0 @@
-#include 
-#include 
-#include 
-#include 
-#include 
-#include 
-#include 
-#include 
-#include 
-
-static u64 patterns[] __initdata = {
-   /* The first entry has to be 0 to leave memtest with zeroed memory */
-   0,
-   0xULL,
-   0xULL,
-   0xULL,
-   0xULL,
-   0xULL,
-   0xULL,
-   0xULL,
-   0xULL,
-   0xULL,
-   0xULL,
-   0xULL,
-   0xULL,
-   0xULL,
-   0xULL,
-   0xULL,
-   0x7a6c7258554e494cULL, /* yeah ;-) */
-};
-
-static void __init reserve_bad_mem(u64 pattern, u64 start_bad, u64 end_bad)
-{
-   printk(KERN_INFO "  %016llx bad mem addr %010llx - %010llx reserved\n",
-  (unsigned long long) pattern,
-  (unsigned long long) start_bad,
-  (unsigned long long) end_bad);
-   memblock_reserve(start_bad, end_bad - start_bad);
-}
-
-static void __init memtest(u64 pattern, u64 start_phys, u64 size)
-{
-   u64 *p, *start, *end;
-   u64 start_bad, last_bad;
-   u64 start_phys_aligned;
-   const size_t incr = sizeof(pattern);
-
-   start_phys_aligned = ALIGN(start_phys, incr);
-   start = __va(start_phys_aligned);
-   end = start + (size - (start_phys_aligned - start_phys)) / incr;
-   start_bad = 0;
-   last_bad = 0;
-
-   for (p = start; p < end; p++)
-   *p = pattern;
-
-   for (p = start; p < end; p++, start_phys_aligned += incr) {
-   if (*p == pattern)
-   continue;
-   if (start_phys_aligned == last_bad + incr) {
-   last_bad += incr;
-   continue;
-   }
-   if (start_bad)
-   reserve_bad_mem(pattern, start_bad, last_bad + incr);
-   start_bad = last_bad = start_phys_aligned;
-   }
-   if (start_bad)
-   reserve_bad_mem(pattern, start_bad, last_bad + incr);
-}
-
-static void __init do_one_pass(u64 pattern, u64 start, u64 end)
-{
-   u64 i;
-   phys_addr_t this_start, this_end;
-
-   for_each_free_mem_range(i, NUMA_NO_NODE, _start, _end, NULL) {
-   this_start = clamp_t(phys_addr_t, this_start, start, end);
-   

[PATCH v2 7/9] get the random phy addr according to slot_area info

2015-03-02 Thread Baoquan He
Now random value can be used to get related slot info stored in
slot_area, mainly use slot_area.num to position which slot is target.
With this slot its starting address is returned as the physical
address where kernel will put.

Signed-off-by: Baoquan He 
---
 arch/x86/boot/compressed/aslr.c | 34 +++---
 1 file changed, 19 insertions(+), 15 deletions(-)

diff --git a/arch/x86/boot/compressed/aslr.c b/arch/x86/boot/compressed/aslr.c
index 1c6fb31..55adee2 100644
--- a/arch/x86/boot/compressed/aslr.c
+++ b/arch/x86/boot/compressed/aslr.c
@@ -245,9 +245,6 @@ static unsigned long mem_min_overlap(struct mem_vector 
*img, struct mem_vector *
return min;
 }
 
-static unsigned long slots[CONFIG_RANDOMIZE_BASE_MAX_OFFSET /
-  CONFIG_PHYSICAL_ALIGN];
-
 struct slot_area {
unsigned long addr;
int num;
@@ -261,23 +258,28 @@ static unsigned long slot_max;
 
 static unsigned long slot_area_index;
 
-static void slots_append(unsigned long addr)
-{
-   /* Overflowing the slots list should be impossible. */
-   if (slot_max >= CONFIG_RANDOMIZE_BASE_MAX_OFFSET /
-   CONFIG_PHYSICAL_ALIGN)
-   return;
-
-   slots[slot_max++] = addr;
-}
-
 static unsigned long slots_fetch_random(void)
 {
+   unsigned long random;
+   int i;
+
/* Handle case of no slots stored. */
if (slot_max == 0)
return 0;
 
-   return slots[get_random_long() % slot_max];
+   random = get_random_long() % slot_max;
+
+   for (i=0; i< slot_area_index; i++) {
+   if (random > slot_areas[i].num) {
+   random -= slot_areas[i].num;
+   continue;
+   }
+   return slot_areas[i].addr + random * CONFIG_PHYSICAL_ALIGN;
+   }
+
+   if (i == slot_area_index )
+   debug_putstr("something wrong happened in 
slots_fetch_random()...\n");
+   return 0;
 }
 
 static int process_e820_entry(struct e820entry *entry,
@@ -362,7 +364,7 @@ repeat:
goto repeat;
 }
 
-static unsigned long find_random_addr(unsigned long minimum,
+static unsigned long find_random_phy_addr(unsigned long minimum,
  unsigned long size)
 {
int i;
@@ -374,6 +376,8 @@ static unsigned long find_random_addr(unsigned long minimum,
/* Verify potential e820 positions, appending to slots list. */
for (i = 0; i < real_mode->e820_entries; i++) {
process_e820_entry(_mode->e820_map[i], minimum, size);
+   if ( slot_area_index == MAX_SLOT_AREA )
+   break;
}
 
return slots_fetch_random();
-- 
1.9.3

--
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Please read the FAQ at  http://www.tux.org/lkml/


[PATCH v2 0/9] randomize kernel physical address and virtual address separately

2015-03-02 Thread Baoquan He
Currently kaslr only randomize physical address of kernel loading, then add the 
delta
to virtual address of kernel text mapping. Because kernel virtual address can 
only be
from __START_KERNEL_map to LOAD_PHYSICAL_ADDR+CONFIG_RANDOMIZE_BASE_MAX_OFFSET, 
namely
[0x8000, 0xc000], so physical address can only be 
randomized
in region [LOAD_PHYSICAL_ADDR, CONFIG_RANDOMIZE_BASE_MAX_OFFSET], namely [16M, 
1G].

So hpa and Vivek suggested the randomization should be done separately for both 
physical
and virtual address. In this patchset the behavior is changed. Randomize both 
the physical
address where kernel is decompressed and the virtual address where kernel text 
is mapped.
And physical address can be randomized from where vmlinux was linked to load to 
maximum
physical memory, possibly near 64T. While virtual address can get a random 
offset from load
address to CONFIG_RANDOMIZE_BASE_MAX_OFFSET, then added to __START_KERNEL_map. 
And
relocation handling only depends on virtual address randomization. Means if and 
only if
virtual address is randomized to a different value, we add the delta to the 
offset of
kernel relocs.

v1->v2:
Thanks to Yinghai's patch which make kernel be able to load above 4G in 
boot stage,
physical address can be randomized to anywhere, even near 64T.


Thank Andy Lutomirski for his adding idt patch, finally it's not used in 
this patchset
since I didn't make the #PF handler work well. I believe it does work, but 
I didn't
make it. Will check why later. Thanks anyway.

Baoquan He (8):
  remove a unused function parameter
  a bug that relocation can not be handled when kernel is loaded above
2G
  introduce struct slot_area to manage randomization slot info
  add mem_min_overlap to find the first avoid region within a memory
region
  change process_e820_entry to store slot info into slot_area
  get the random phy addr according to slot_area info
  introduce fetch_random_virt_offset to randomize the kernel text
mapping address
  change the relocations behavior for kaslr on x86_64

Yinghai Lu (1):
  make kernel be able to load above 4G in boot stage

 arch/x86/boot/compressed/aslr.c | 194 +++-
 arch/x86/boot/compressed/misc.c |  46 ++---
 arch/x86/boot/compressed/misc.h |  22 ++--
 arch/x86/boot/compressed/misc_pgt.c |  61 
 arch/x86/include/asm/page.h |   5 +
 arch/x86/mm/ident_map.c |  74 ++
 arch/x86/mm/init_64.c   |  74 +-
 7 files changed, 333 insertions(+), 143 deletions(-)
 create mode 100644 arch/x86/boot/compressed/misc_pgt.c
 create mode 100644 arch/x86/mm/ident_map.c

-- 
1.9.3

--
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Please read the FAQ at  http://www.tux.org/lkml/


[PATCH] ARM: imx: Fix trivial typo in comments

2015-03-02 Thread Yannick Guerrini
change 'mutliple' to 'multiple'

Signed-off-by: Yannick Guerrini 
---
 arch/arm/mach-imx/iomux-mx3.h | 2 +-
 arch/arm/mach-imx/iomux-v3.h  | 2 +-
 2 files changed, 2 insertions(+), 2 deletions(-)

diff --git a/arch/arm/mach-imx/iomux-mx3.h b/arch/arm/mach-imx/iomux-mx3.h
index 0a5adba..2e4a0dd 100644
--- a/arch/arm/mach-imx/iomux-mx3.h
+++ b/arch/arm/mach-imx/iomux-mx3.h
@@ -114,7 +114,7 @@ enum iomux_gp_func {
  */
 int mxc_iomux_alloc_pin(unsigned int pin, const char *label);
 /*
- * setups mutliple pins
+ * setups multiple pins
  * convenient way to call the above function with tables
  */
 int mxc_iomux_setup_multiple_pins(const unsigned int *pin_list, unsigned count,
diff --git a/arch/arm/mach-imx/iomux-v3.h b/arch/arm/mach-imx/iomux-v3.h
index 2fa3b54..cb45ae5 100644
--- a/arch/arm/mach-imx/iomux-v3.h
+++ b/arch/arm/mach-imx/iomux-v3.h
@@ -128,7 +128,7 @@ typedef u64 iomux_v3_cfg_t;
 int mxc_iomux_v3_setup_pad(iomux_v3_cfg_t pad);
 
 /*
- * setups mutliple pads
+ * setups multiple pads
  * convenient way to call the above function with tables
  */
 int mxc_iomux_v3_setup_multiple_pads(iomux_v3_cfg_t *pad_list, unsigned count);
-- 
1.9.5.msysgit.0

--
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Please read the FAQ at  http://www.tux.org/lkml/


[PATCH v2 8/9] introduce fetch_random_virt_offset to randomize the kernel text mapping address

2015-03-02 Thread Baoquan He
Kaslr extended kernel text mapping region size from 512M to 1G,
namely CONFIG_RANDOMIZE_BASE_MAX_OFFSET. This means kernel text
can be mapped to below region:

[__START_KERNEL_map + LOAD_PHYSICAL_ADDR, __START_KERNEL_map + 1G]

Introduce a function find_random_virt_offset() to get random value
between LOAD_PHYSICAL_ADDR and CONFIG_RANDOMIZE_BASE_MAX_OFFSET.
This random value will be added to __START_KERNEL_map to get the
starting address which kernel text is mapped from. Since slot can
be anywhere of this region, means it is a independent slot_area,
it is easy to get a slot w.r.t random value.

Signed-off-by: Baoquan He 
---
 arch/x86/boot/compressed/aslr.c | 22 ++
 1 file changed, 22 insertions(+)

diff --git a/arch/x86/boot/compressed/aslr.c b/arch/x86/boot/compressed/aslr.c
index 55adee2..332a8c4 100644
--- a/arch/x86/boot/compressed/aslr.c
+++ b/arch/x86/boot/compressed/aslr.c
@@ -383,6 +383,28 @@ static unsigned long find_random_phy_addr(unsigned long 
minimum,
return slots_fetch_random();
 }
 
+static unsigned long find_random_virt_offset(unsigned long minimum,
+ unsigned long image_size)
+{
+   unsigned long slot_num, random;
+   struct mem_vector region, img;
+
+   /* Make sure minimum is aligned. */
+   minimum = ALIGN(minimum, CONFIG_PHYSICAL_ALIGN);
+
+   if ( image_size <= CONFIG_PHYSICAL_ALIGN)
+   slot_num = ( CONFIG_RANDOMIZE_BASE_MAX_OFFSET - minimum ) /
+   CONFIG_PHYSICAL_ALIGN;
+   else
+   slot_num = ( CONFIG_RANDOMIZE_BASE_MAX_OFFSET - minimum - 
image_size ) /
+   CONFIG_PHYSICAL_ALIGN + 1;
+
+   random = get_random_long() % slot_num;
+
+   return random * CONFIG_PHYSICAL_ALIGN + minimum;
+}
+
+
 static void add_kaslr_setup_data(struct boot_params *params, __u8 enabled)
 {
struct setup_data *data;
-- 
1.9.3

--
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Please read the FAQ at  http://www.tux.org/lkml/


[PATCH v2 2/9] a bug that relocation can not be handled when kernel is loaded above 2G

2015-03-02 Thread Baoquan He
When process 32 bit relocation tables a local variable extended is
defined to calculate the physical address of relocs entry. However
it's type is int which is enough for i386, for x86_64 not enough.
That's why relocation can only be handled when kernel is loaded
below 2G, otherwise a overflow will happen and cause system hang.

Here change it to long as 32 bit inverse relocation processing does,
and this change is safe for i386 relocation handling too.

Signed-off-by: Baoquan He 
---
 arch/x86/boot/compressed/misc.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/arch/x86/boot/compressed/misc.c b/arch/x86/boot/compressed/misc.c
index 5903089..ac5c05e 100644
--- a/arch/x86/boot/compressed/misc.c
+++ b/arch/x86/boot/compressed/misc.c
@@ -278,7 +278,7 @@ static void handle_relocations(void *output, unsigned long 
output_len)
 * So we work backwards from the end of the decompressed image.
 */
for (reloc = output + output_len - sizeof(*reloc); *reloc; reloc--) {
-   int extended = *reloc;
+   long extended = *reloc;
extended += map;
 
ptr = (unsigned long)extended;
-- 
1.9.3

--
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Please read the FAQ at  http://www.tux.org/lkml/


[PATCH v4 3/4] scsi: ufs: add trace events and dump prints for debug

2015-03-02 Thread Gilad Broner
Add trace events to driver to allow monitoring and profilig
of activities such as PM suspend/resume, hibernate enter/exit,
clock gating and clock scaling up/down.
In addition, add UFS host controller register dumps to provide
detailed information in case of errors to assist in analysis
of issues.

Signed-off-by: Dolev Raviv 
Signed-off-by: Subhash Jadavani 
Signed-off-by: Lee Susman 
Signed-off-by: Sujit Reddy Thumma 
Signed-off-by: Yaniv Gardi 
---
 drivers/scsi/ufs/ufs-qcom.c |  53 +
 drivers/scsi/ufs/ufshcd.c   | 511 +---
 drivers/scsi/ufs/ufshcd.h   |  49 -
 drivers/scsi/ufs/ufshci.h   |   1 +
 include/trace/events/ufs.h  | 227 
 5 files changed, 804 insertions(+), 37 deletions(-)
 create mode 100644 include/trace/events/ufs.h

diff --git a/drivers/scsi/ufs/ufs-qcom.c b/drivers/scsi/ufs/ufs-qcom.c
index 9217af9..9fe675d 100644
--- a/drivers/scsi/ufs/ufs-qcom.c
+++ b/drivers/scsi/ufs/ufs-qcom.c
@@ -30,6 +30,14 @@ static int ufs_qcom_get_bus_vote(struct ufs_qcom_host *host,
const char *speed_mode);
 static int ufs_qcom_set_bus_vote(struct ufs_qcom_host *host, int vote);
 
+static void ufs_qcom_dump_regs(struct ufs_hba *hba, int offset, int len,
+   char *prefix)
+{
+   print_hex_dump(KERN_ERR, prefix,
+   len > 4 ? DUMP_PREFIX_OFFSET : DUMP_PREFIX_NONE,
+   16, 4, hba->mmio_base + offset, len * 4, false);
+}
+
 static int ufs_qcom_get_connected_tx_lanes(struct ufs_hba *hba, u32 *tx_lanes)
 {
int err = 0;
@@ -983,6 +991,50 @@ void ufs_qcom_clk_scale_notify(struct ufs_hba *hba)
dev_req_params->hs_rate);
 }
 
+static void ufs_qcom_print_hw_debug_reg_all(struct ufs_hba *hba)
+{
+   u32 reg;
+
+   ufs_qcom_dump_regs(hba, UFS_UFS_DBG_RD_REG_OCSC, 44,
+   "UFS_UFS_DBG_RD_REG_OCSC ");
+
+   reg = ufshcd_readl(hba, REG_UFS_CFG1);
+   reg |= UFS_BIT(17);
+   ufshcd_writel(hba, reg, REG_UFS_CFG1);
+
+   ufs_qcom_dump_regs(hba, UFS_UFS_DBG_RD_EDTL_RAM, 32,
+   "UFS_UFS_DBG_RD_EDTL_RAM ");
+   ufs_qcom_dump_regs(hba, UFS_UFS_DBG_RD_DESC_RAM, 128,
+   "UFS_UFS_DBG_RD_DESC_RAM ");
+   ufs_qcom_dump_regs(hba, UFS_UFS_DBG_RD_PRDT_RAM, 64,
+   "UFS_UFS_DBG_RD_PRDT_RAM ");
+
+   ufshcd_writel(hba, (reg & ~UFS_BIT(17)), REG_UFS_CFG1);
+
+   ufs_qcom_dump_regs(hba, UFS_DBG_RD_REG_UAWM, 4,
+   "UFS_DBG_RD_REG_UAWM ");
+   ufs_qcom_dump_regs(hba, UFS_DBG_RD_REG_UARM, 4,
+   "UFS_DBG_RD_REG_UARM ");
+   ufs_qcom_dump_regs(hba, UFS_DBG_RD_REG_TXUC, 48,
+   "UFS_DBG_RD_REG_TXUC ");
+   ufs_qcom_dump_regs(hba, UFS_DBG_RD_REG_RXUC, 27,
+   "UFS_DBG_RD_REG_RXUC ");
+   ufs_qcom_dump_regs(hba, UFS_DBG_RD_REG_DFC, 19,
+   "UFS_DBG_RD_REG_DFC ");
+   ufs_qcom_dump_regs(hba, UFS_DBG_RD_REG_TRLUT, 34,
+   "UFS_DBG_RD_REG_TRLUT ");
+   ufs_qcom_dump_regs(hba, UFS_DBG_RD_REG_TMRLUT, 9,
+   "UFS_DBG_RD_REG_TMRLUT ");
+}
+
+static void ufs_qcom_dump_dbg_regs(struct ufs_hba *hba)
+{
+   ufs_qcom_dump_regs(hba, REG_UFS_SYS1CLK_1US, 5,
+   "REG_UFS_SYS1CLK_1US ");
+
+   ufs_qcom_print_hw_debug_reg_all(hba);
+}
+
 /**
  * struct ufs_hba_qcom_vops - UFS QCOM specific variant operations
  *
@@ -1000,5 +1052,6 @@ static const struct ufs_hba_variant_ops ufs_hba_qcom_vops 
= {
.pwr_change_notify  = ufs_qcom_pwr_change_notify,
.suspend= ufs_qcom_suspend,
.resume = ufs_qcom_resume,
+   .dbg_register_dump  = ufs_qcom_dump_dbg_regs,
 };
 EXPORT_SYMBOL(ufs_hba_qcom_vops);
diff --git a/drivers/scsi/ufs/ufshcd.c b/drivers/scsi/ufs/ufshcd.c
index 7697cc6..3ae0b3f 100644
--- a/drivers/scsi/ufs/ufshcd.c
+++ b/drivers/scsi/ufs/ufshcd.c
@@ -45,6 +45,9 @@
 #include "unipro.h"
 #include "ufs-debugfs.h"
 
+#define CREATE_TRACE_POINTS
+#include 
+
 #ifdef CONFIG_DEBUG_FS
 
 static void ufshcd_update_error_stats(struct ufs_hba *hba, int type)
@@ -145,6 +148,8 @@ static inline ufshcd_update_error_stats(struct ufs_hba 
*hba, int type)
_ret = ufshcd_disable_vreg(_dev, _vreg);\
_ret;   \
})
+#define ufshcd_hex_dump(prefix_str, buf, len) \
+print_hex_dump(KERN_ERR, prefix_str, DUMP_PREFIX_OFFSET, 16, 4, buf, len, 
false)
 
 static u32 ufs_query_desc_max_size[] = {
QUERY_DESC_DEVICE_MAX_SIZE,
@@ -272,6 +277,151 @@ static inline void ufshcd_disable_irq(struct ufs_hba *hba)
}
 }
 
+#ifdef CONFIG_TRACEPOINTS
+static void ufshcd_add_command_trace(struct ufs_hba *hba,
+   unsigned int tag, const char *str)
+{
+   sector_t lba = -1;
+   u8 opcode = 0;
+   u32 intr, doorbell;
+   

[PATCH v2 3/9] make kernel be able to load above 4G in boot stage

2015-03-02 Thread Baoquan He
From: Yinghai Lu 

split kernel_ident_mapping_init() and call that in boot::decompress_kernel
stage. it will cover new range that is above 4G.

-v2: fix one typo, use round_up/round_down and use MACRO for size.

Signed-off-by: Yinghai Lu 
---
 arch/x86/boot/compressed/misc.c | 10 +
 arch/x86/boot/compressed/misc_pgt.c | 61 ++
 arch/x86/include/asm/page.h |  5 +++
 arch/x86/mm/ident_map.c | 74 +
 arch/x86/mm/init_64.c   | 74 +
 5 files changed, 151 insertions(+), 73 deletions(-)
 create mode 100644 arch/x86/boot/compressed/misc_pgt.c
 create mode 100644 arch/x86/mm/ident_map.c

diff --git a/arch/x86/boot/compressed/misc.c b/arch/x86/boot/compressed/misc.c
index ac5c05e..c9d8187 100644
--- a/arch/x86/boot/compressed/misc.c
+++ b/arch/x86/boot/compressed/misc.c
@@ -9,6 +9,11 @@
  * High loaded stuff by Hans Lermen & Werner Almesberger, Feb. 1996
  */
 
+#ifdef CONFIG_X86_64
+#define __pa(x)  ((unsigned long)(x))
+#define __va(x)  ((void *)((unsigned long)(x)))
+#endif
+
 #include "misc.h"
 #include "../string.h"
 
@@ -366,6 +371,8 @@ static void parse_elf(void *output)
free(phdrs);
 }
 
+#include "misc_pgt.c"
+
 asmlinkage __visible void *decompress_kernel(void *rmode, memptr heap,
  unsigned char *input_data,
  unsigned long input_len,
@@ -421,6 +428,9 @@ asmlinkage __visible void *decompress_kernel(void *rmode, 
memptr heap,
error("Wrong destination address");
 #endif
 
+   if (output != output_orig)
+   fill_linux64_pagetable((unsigned long)output, output_len);
+
debug_putstr("\nDecompressing Linux... ");
decompress(input_data, input_len, NULL, NULL, output, NULL, error);
parse_elf(output);
diff --git a/arch/x86/boot/compressed/misc_pgt.c 
b/arch/x86/boot/compressed/misc_pgt.c
new file mode 100644
index 000..2783f0f
--- /dev/null
+++ b/arch/x86/boot/compressed/misc_pgt.c
@@ -0,0 +1,61 @@
+
+#ifdef CONFIG_X86_64
+#include 
+#include 
+
+#include "../../mm/ident_map.c"
+
+struct alloc_pgt_data {
+   unsigned char *pgt_buf;
+   unsigned long pgt_buf_size;
+   unsigned long pgt_buf_offset;
+};
+
+static void *alloc_pgt_page(void *context)
+{
+   struct alloc_pgt_data *d = (struct alloc_pgt_data *)context;
+   unsigned char *p = (unsigned char *)d->pgt_buf;
+
+   if (d->pgt_buf_offset >= d->pgt_buf_size) {
+   debug_putstr("out of pgt_buf in misc.c\n");
+   return NULL;
+   }
+
+   p += d->pgt_buf_offset;
+   d->pgt_buf_offset += PAGE_SIZE;
+
+   return p;
+}
+
+/* 4 pages to cover cross 512G boundary */
+#define PGT_BUF_SIZE (PAGE_SIZE*4)
+
+unsigned long __force_order;
+static unsigned char pgt_buf[PGT_BUF_SIZE] __aligned(PAGE_SIZE);
+
+static void fill_linux64_pagetable(unsigned long start, unsigned long size)
+{
+   struct alloc_pgt_data data = {
+   .pgt_buf = (unsigned char *) pgt_buf,
+   .pgt_buf_size = sizeof(pgt_buf),
+   .pgt_buf_offset = 0,
+   };
+   struct x86_mapping_info mapping_info = {
+   .alloc_pgt_page = alloc_pgt_page,
+   .context = ,
+   .pmd_flag = __PAGE_KERNEL_LARGE_EXEC,
+   };
+   unsigned long end = start + size;
+   pgd_t *level4p = (pgd_t *)read_cr3();
+
+   /* align boundary to 2M */
+   start = round_down(start, PMD_SIZE);
+   end = round_up(end, PMD_SIZE);
+   if (start >= (1UL<<32))
+   kernel_ident_mapping_init(_info, level4p, start, end);
+}
+#else
+static void fill_linux64_pagetable(unsigned long start, unsigned long size)
+{
+}
+#endif
diff --git a/arch/x86/include/asm/page.h b/arch/x86/include/asm/page.h
index 802dde3..cf8f619 100644
--- a/arch/x86/include/asm/page.h
+++ b/arch/x86/include/asm/page.h
@@ -37,7 +37,10 @@ static inline void copy_user_page(void *to, void *from, 
unsigned long vaddr,
alloc_page_vma(GFP_HIGHUSER | __GFP_ZERO | movableflags, vma, vaddr)
 #define __HAVE_ARCH_ALLOC_ZEROED_USER_HIGHPAGE
 
+#ifndef __pa
 #define __pa(x)__phys_addr((unsigned long)(x))
+#endif
+
 #define __pa_nodebug(x)__phys_addr_nodebug((unsigned long)(x))
 /* __pa_symbol should be used for C visible symbols.
This seems to be the official gcc blessed way to do such arithmetic. */
@@ -51,7 +54,9 @@ static inline void copy_user_page(void *to, void *from, 
unsigned long vaddr,
 #define __pa_symbol(x) \
__phys_addr_symbol(__phys_reloc_hide((unsigned long)(x)))
 
+#ifndef __va
 #define __va(x)((void *)((unsigned 
long)(x)+PAGE_OFFSET))
+#endif
 
 #define __boot_va(x)   __va(x)
 #define __boot_pa(x)   __pa(x)
diff --git a/arch/x86/mm/ident_map.c b/arch/x86/mm/ident_map.c
new file mode 100644
index 000..751ca92
--- /dev/null
+++ 

[PATCH v4 4/4] scsi: ufs: inject errors to verify error handling

2015-03-02 Thread Gilad Broner
From: Sujit Reddy Thumma 

Use fault-injection framework to simulate error conditions
in the controller and verify error handling mechanisms
implemented in UFS host controller driver.

This is used only during development and hence
guarded by CONFIG_UFS_FAULT_INJECTION debug config option.

Signed-off-by: Sujit Reddy Thumma 
---
 drivers/scsi/ufs/ufs-debugfs.c | 140 +
 drivers/scsi/ufs/ufs-debugfs.h |   4 ++
 drivers/scsi/ufs/ufshcd.c  |   2 +
 drivers/scsi/ufs/ufshcd.h  |   5 ++
 lib/Kconfig.debug  |  14 +
 5 files changed, 165 insertions(+)

diff --git a/drivers/scsi/ufs/ufs-debugfs.c b/drivers/scsi/ufs/ufs-debugfs.c
index 27ab053..bac72d0 100644
--- a/drivers/scsi/ufs/ufs-debugfs.c
+++ b/drivers/scsi/ufs/ufs-debugfs.c
@@ -17,6 +17,7 @@
  *
  */
 
+#include 
 #include "ufs-debugfs.h"
 #include "unipro.h"
 
@@ -41,6 +42,143 @@ struct desc_field_offset {
} while (0)
 #define DOORBELL_CLR_TOUT_US   (1000 * 1000) /* 1 sec */
 
+#ifdef CONFIG_UFS_FAULT_INJECTION
+
+#define INJECT_COMMAND_HANG (0x0)
+
+static DECLARE_FAULT_ATTR(fail_default_attr);
+static char *fail_request;
+module_param(fail_request, charp, 0);
+
+static bool inject_fatal_err_tr(struct ufs_hba *hba, u8 ocs_err)
+{
+   int tag;
+
+   tag = find_first_bit(>outstanding_reqs, hba->nutrs);
+   if (tag == hba->nutrs)
+   return 0;
+
+   ufshcd_writel(hba, ~(1 << tag), REG_UTP_TRANSFER_REQ_LIST_CLEAR);
+   (>lrb[tag])->utr_descriptor_ptr->header.dword_2 =
+   cpu_to_be32(ocs_err);
+
+   /* fatal error injected */
+   return 1;
+}
+
+static bool inject_fatal_err_tm(struct ufs_hba *hba, u8 ocs_err)
+{
+   int tag;
+
+   tag = find_first_bit(>outstanding_tasks, hba->nutmrs);
+   if (tag == hba->nutmrs)
+   return 0;
+
+   ufshcd_writel(hba, ~(1 << tag), REG_UTP_TASK_REQ_LIST_CLEAR);
+   (>utmrdl_base_addr[tag])->header.dword_2 =
+   cpu_to_be32(ocs_err);
+
+   /* fatal error injected */
+   return 1;
+}
+
+static bool inject_cmd_hang_tr(struct ufs_hba *hba)
+{
+   int tag;
+
+   tag = find_first_bit(>outstanding_reqs, hba->nutrs);
+   if (tag == hba->nutrs)
+   return 0;
+
+   __clear_bit(tag, >outstanding_reqs);
+   hba->lrb[tag].cmd = NULL;
+   __clear_bit(tag, >lrb_in_use);
+
+   /* command hang injected */
+   return 1;
+}
+
+static int inject_cmd_hang_tm(struct ufs_hba *hba)
+{
+   int tag;
+
+   tag = find_first_bit(>outstanding_tasks, hba->nutmrs);
+   if (tag == hba->nutmrs)
+   return 0;
+
+   __clear_bit(tag, >outstanding_tasks);
+   __clear_bit(tag, >tm_slots_in_use);
+
+   /* command hang injected */
+   return 1;
+}
+
+void ufsdbg_fail_request(struct ufs_hba *hba, u32 *intr_status)
+{
+   u8 ocs_err;
+   static const u32 errors[] = {
+   CONTROLLER_FATAL_ERROR,
+   SYSTEM_BUS_FATAL_ERROR,
+   INJECT_COMMAND_HANG,
+   };
+
+   if (!should_fail(>debugfs_files.fail_attr, 1))
+   goto out;
+
+   *intr_status = errors[prandom_u32() % ARRAY_SIZE(errors)];
+   dev_info(hba->dev, "%s: fault-inject error: 0x%x\n",
+   __func__, *intr_status);
+
+   switch (*intr_status) {
+   case CONTROLLER_FATAL_ERROR: /* fall through */
+   ocs_err = OCS_FATAL_ERROR;
+   goto set_ocs;
+   case SYSTEM_BUS_FATAL_ERROR:
+   ocs_err = OCS_INVALID_CMD_TABLE_ATTR;
+set_ocs:
+   if (!inject_fatal_err_tr(hba, ocs_err))
+   if (!inject_fatal_err_tm(hba, ocs_err))
+   *intr_status = 0;
+   break;
+   case INJECT_COMMAND_HANG:
+   if (!inject_cmd_hang_tr(hba))
+   inject_cmd_hang_tm(hba);
+   break;
+   default:
+   BUG();
+   /* some configurations ignore panics caused by BUG() */
+   break;
+   }
+out:
+   return;
+}
+
+static void ufsdbg_setup_fault_injection(struct ufs_hba *hba)
+{
+   hba->debugfs_files.fail_attr = fail_default_attr;
+
+   if (fail_request)
+   setup_fault_attr(>debugfs_files.fail_attr, fail_request);
+
+   /* suppress dump stack everytime failure is injected */
+   hba->debugfs_files.fail_attr.verbose = 0;
+
+   if (IS_ERR(fault_create_debugfs_attr("inject_fault",
+   hba->debugfs_files.debugfs_root,
+   >debugfs_files.fail_attr)))
+   dev_err(hba->dev, "%s: failed to create debugfs entry\n",
+   __func__);
+}
+#else
+void ufsdbg_fail_request(struct ufs_hba *hba, u32 *intr_status)
+{
+}
+
+static void ufsdbg_setup_fault_injection(struct ufs_hba *hba)
+{
+}
+#endif /* 

[PATCH v4 1/4] scsi: ufs: add ioctl interface for query request

2015-03-02 Thread Gilad Broner
From: Dolev Raviv 

This patch exposes the ioctl interface for UFS driver via SCSI device
ioctl interface. As of now UFS driver would provide the ioctl for query
interface to connected UFS device.

Signed-off-by: Dolev Raviv 
Signed-off-by: Noa Rubens 
Signed-off-by: Raviv Shvili 
Signed-off-by: Yaniv Gardi 
---
 drivers/scsi/ufs/ufs.h|  53 +++---
 drivers/scsi/ufs/ufshcd.c | 225 +-
 include/uapi/scsi/Kbuild  |   1 +
 include/uapi/scsi/ufs/Kbuild  |   3 +
 include/uapi/scsi/ufs/ioctl.h |  57 +++
 include/uapi/scsi/ufs/ufs.h   |  66 +
 6 files changed, 361 insertions(+), 44 deletions(-)
 create mode 100644 include/uapi/scsi/ufs/Kbuild
 create mode 100644 include/uapi/scsi/ufs/ioctl.h
 create mode 100644 include/uapi/scsi/ufs/ufs.h

diff --git a/drivers/scsi/ufs/ufs.h b/drivers/scsi/ufs/ufs.h
index 42c459a..1f023c4 100644
--- a/drivers/scsi/ufs/ufs.h
+++ b/drivers/scsi/ufs/ufs.h
@@ -38,6 +38,7 @@
 
 #include 
 #include 
+#include 
 
 #define MAX_CDB_SIZE   16
 #define GENERAL_UPIU_REQUEST_SIZE 32
@@ -71,6 +72,16 @@ enum {
UFS_UPIU_RPMB_WLUN  = 0xC4,
 };
 
+/**
+ * ufs_is_valid_unit_desc_lun - checks if the given LUN has a unit descriptor
+ * @lun: LU number to check
+ * @return: true if the lun has a matching unit descriptor, false otherwise
+ */
+static inline bool ufs_is_valid_unit_desc_lun(u8 lun)
+{
+   return (lun == UFS_UPIU_RPMB_WLUN || (lun < UFS_UPIU_MAX_GENERAL_LUN));
+}
+
 /*
  * UFS Protocol Information Unit related definitions
  */
@@ -126,35 +137,6 @@ enum {
UPIU_QUERY_FUNC_STANDARD_WRITE_REQUEST  = 0x81,
 };
 
-/* Flag idn for Query Requests*/
-enum flag_idn {
-   QUERY_FLAG_IDN_FDEVICEINIT  = 0x01,
-   QUERY_FLAG_IDN_PWR_ON_WPE   = 0x03,
-   QUERY_FLAG_IDN_BKOPS_EN = 0x04,
-};
-
-/* Attribute idn for Query requests */
-enum attr_idn {
-   QUERY_ATTR_IDN_ACTIVE_ICC_LVL   = 0x03,
-   QUERY_ATTR_IDN_BKOPS_STATUS = 0x05,
-   QUERY_ATTR_IDN_EE_CONTROL   = 0x0D,
-   QUERY_ATTR_IDN_EE_STATUS= 0x0E,
-};
-
-/* Descriptor idn for Query requests */
-enum desc_idn {
-   QUERY_DESC_IDN_DEVICE   = 0x0,
-   QUERY_DESC_IDN_CONFIGURAION = 0x1,
-   QUERY_DESC_IDN_UNIT = 0x2,
-   QUERY_DESC_IDN_RFU_0= 0x3,
-   QUERY_DESC_IDN_INTERCONNECT = 0x4,
-   QUERY_DESC_IDN_STRING   = 0x5,
-   QUERY_DESC_IDN_RFU_1= 0x6,
-   QUERY_DESC_IDN_GEOMETRY = 0x7,
-   QUERY_DESC_IDN_POWER= 0x8,
-   QUERY_DESC_IDN_MAX,
-};
-
 enum desc_header_offset {
QUERY_DESC_LENGTH_OFFSET= 0x00,
QUERY_DESC_DESC_TYPE_OFFSET = 0x01,
@@ -247,19 +229,6 @@ enum bkops_status {
BKOPS_STATUS_MAX = BKOPS_STATUS_CRITICAL,
 };
 
-/* UTP QUERY Transaction Specific Fields OpCode */
-enum query_opcode {
-   UPIU_QUERY_OPCODE_NOP   = 0x0,
-   UPIU_QUERY_OPCODE_READ_DESC = 0x1,
-   UPIU_QUERY_OPCODE_WRITE_DESC= 0x2,
-   UPIU_QUERY_OPCODE_READ_ATTR = 0x3,
-   UPIU_QUERY_OPCODE_WRITE_ATTR= 0x4,
-   UPIU_QUERY_OPCODE_READ_FLAG = 0x5,
-   UPIU_QUERY_OPCODE_SET_FLAG  = 0x6,
-   UPIU_QUERY_OPCODE_CLEAR_FLAG= 0x7,
-   UPIU_QUERY_OPCODE_TOGGLE_FLAG   = 0x8,
-};
-
 /* Query response result code */
 enum {
QUERY_RESULT_SUCCESS= 0x00,
diff --git a/drivers/scsi/ufs/ufshcd.c b/drivers/scsi/ufs/ufshcd.c
index 5d60a86..cb357f8 100644
--- a/drivers/scsi/ufs/ufshcd.c
+++ b/drivers/scsi/ufs/ufshcd.c
@@ -3,7 +3,7 @@
  *
  * This code is based on drivers/scsi/ufs/ufshcd.c
  * Copyright (C) 2011-2013 Samsung India Software Operations
- * Copyright (c) 2013-2014, The Linux Foundation. All rights reserved.
+ * Copyright (c) 2013-2015, The Linux Foundation. All rights reserved.
  *
  * Authors:
  * Santosh Yaraganavi 
@@ -39,6 +39,7 @@
 
 #include 
 #include 
+#include 
 
 #include "ufshcd.h"
 #include "unipro.h"
@@ -74,6 +75,9 @@
 /* Interrupt aggregation default timeout, unit: 40us */
 #define INT_AGGR_DEF_TO0x02
 
+/* IOCTL opcode for command - ufs set device read only */
+#define UFS_IOCTL_BLKROSET  BLKROSET
+
 #define ufshcd_toggle_vreg(_dev, _vreg, _on)   \
({  \
int _ret;   \
@@ -1882,7 +1886,7 @@ static inline int ufshcd_read_unit_desc_param(struct 
ufs_hba *hba,
 * Unit descriptors are only available for general purpose LUs (LUN id
 * from 0 to 7) and RPMB Well known LU.
 */
-   if (lun != UFS_UPIU_RPMB_WLUN && (lun >= UFS_UPIU_MAX_GENERAL_LUN))
+   if (!ufs_is_valid_unit_desc_lun(lun))
return -EOPNOTSUPP;
 
return ufshcd_read_desc_param(hba, QUERY_DESC_IDN_UNIT, lun,
@@ -4201,6 +4205,222 @@ static void 

[PATCH v4 2/4] scsi: ufs: add debugfs for ufs

2015-03-02 Thread Gilad Broner
From: Lee Susman 

Adding debugfs capability for ufshcd.

debugfs attributes introduced in this patch:
 - View driver/controller runtime data
 - Command tag statistics for performance analisis
 - Dump device descriptor info
 - Track recoverable errors statistics during runtime
 - Change UFS power mode during runtime
 entry a string in the format 'GGLLMM' where:
 G - selected gear
 L - number of lanes
 M - power mode
 (1=fast mode, 2=slow mode, 4=fast-auto mode,
  5=slow-auto mode)
 First letter is for RX, second is for TX.
 - Get/set DME attributes

Signed-off-by: Lee Susman 
Signed-off-by: Dolev Raviv 
Signed-off-by: Yaniv Gardi 
Signed-off-by: Raviv Shvili 
Signed-off-by: Gilad Broner 
---
 drivers/scsi/ufs/Makefile  |   1 +
 drivers/scsi/ufs/ufs-debugfs.c | 902 +
 drivers/scsi/ufs/ufs-debugfs.h |  38 ++
 drivers/scsi/ufs/ufshcd.c  | 229 ++-
 drivers/scsi/ufs/ufshcd.h  |  65 +++
 drivers/scsi/ufs/ufshci.h  |   2 +
 6 files changed, 1225 insertions(+), 12 deletions(-)
 create mode 100644 drivers/scsi/ufs/ufs-debugfs.c
 create mode 100644 drivers/scsi/ufs/ufs-debugfs.h

diff --git a/drivers/scsi/ufs/Makefile b/drivers/scsi/ufs/Makefile
index 8303bcc..0692314 100644
--- a/drivers/scsi/ufs/Makefile
+++ b/drivers/scsi/ufs/Makefile
@@ -3,3 +3,4 @@ obj-$(CONFIG_SCSI_UFS_QCOM) += ufs-qcom.o
 obj-$(CONFIG_SCSI_UFSHCD) += ufshcd.o
 obj-$(CONFIG_SCSI_UFSHCD_PCI) += ufshcd-pci.o
 obj-$(CONFIG_SCSI_UFSHCD_PLATFORM) += ufshcd-pltfrm.o
+obj-$(CONFIG_DEBUG_FS) += ufs-debugfs.o
diff --git a/drivers/scsi/ufs/ufs-debugfs.c b/drivers/scsi/ufs/ufs-debugfs.c
new file mode 100644
index 000..27ab053
--- /dev/null
+++ b/drivers/scsi/ufs/ufs-debugfs.c
@@ -0,0 +1,902 @@
+/* Copyright (c) 2013-2015, The Linux Foundation. All rights reserved.
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 and
+ * only version 2 as published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * UFS debugfs - add debugfs interface to the ufshcd.
+ * This is currently used for statistics collection and exporting from the
+ * UFS driver.
+ * This infrastructure can be used for debugging or direct tweaking
+ * of the driver from userspace.
+ *
+ */
+
+#include "ufs-debugfs.h"
+#include "unipro.h"
+
+enum field_width {
+   BYTE= 1,
+   WORD= 2,
+};
+
+struct desc_field_offset {
+   char *name;
+   int offset;
+   enum field_width width_byte;
+};
+
+#define UFS_ERR_STATS_PRINT(file, error_index, string, error_seen) \
+   do {\
+   if (err_stats[error_index]) {   \
+   seq_printf(file, string,\
+   err_stats[error_index]);\
+   error_seen = true;  \
+   }   \
+   } while (0)
+#define DOORBELL_CLR_TOUT_US   (1000 * 1000) /* 1 sec */
+
+#define BUFF_LINE_CAPACITY 16
+#define TAB_CHARS 8
+
+static int ufsdbg_tag_stats_show(struct seq_file *file, void *data)
+{
+   struct ufs_hba *hba = (struct ufs_hba *)file->private;
+   struct ufs_stats *ufs_stats;
+   int i, j;
+   int max_depth;
+   bool is_tag_empty = true;
+   unsigned long flags;
+   char *sep = " | * | ";
+
+   if (!hba)
+   goto exit;
+
+   ufs_stats = >ufs_stats;
+
+   if (!ufs_stats->enabled) {
+   pr_debug("%s: ufs statistics are disabled\n", __func__);
+   seq_puts(file, "ufs statistics are disabled");
+   goto exit;
+   }
+
+   max_depth = hba->nutrs;
+
+   spin_lock_irqsave(hba->host->host_lock, flags);
+   /* Header */
+   seq_printf(file, " Tag Stat\t\t%s Queue Fullness\n", sep);
+   for (i = 0; i < TAB_CHARS * (TS_NUM_STATS + 4); i++) {
+   seq_puts(file, "-");
+   if (i == (TAB_CHARS * 3 - 1))
+   seq_puts(file, sep);
+   }
+   seq_printf(file,
+   "\n #\tnum uses\t%s\t #\tAll\t Read\t Write\t Flush\n",
+   sep);
+
+   /* values */
+   for (i = 0; i < max_depth; i++) {
+   if (ufs_stats->tag_stats[i][0] <= 0 &&
+   ufs_stats->tag_stats[i][1] <= 0 &&
+   ufs_stats->tag_stats[i][2] <= 0 &&
+   ufs_stats->tag_stats[i][3] <= 0)
+   continue;
+
+   is_tag_empty = false;
+   

[RFC PATCH 3/4] arm64: add support for memtest

2015-03-02 Thread Vladimir Murzin
Add support for memtest command line option.

Signed-off-by: Vladimir Murzin 
---
 arch/arm64/mm/init.c |2 ++
 1 file changed, 2 insertions(+)

diff --git a/arch/arm64/mm/init.c b/arch/arm64/mm/init.c
index ae85da6..597831b 100644
--- a/arch/arm64/mm/init.c
+++ b/arch/arm64/mm/init.c
@@ -190,6 +190,8 @@ void __init bootmem_init(void)
min = PFN_UP(memblock_start_of_DRAM());
max = PFN_DOWN(memblock_end_of_DRAM());
 
+   early_memtest(min << PAGE_SHIFT, max << PAGE_SHIFT);
+
/*
 * Sparsemem tries to allocate bootmem in memory_present(), so must be
 * done after the fixed reservations.
-- 
1.7.9.5


--
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Please read the FAQ at  http://www.tux.org/lkml/


[PATCH v4 0/4] Add ioctl and debug utilities to UFS driver

2015-03-02 Thread Gilad Broner
Changes from V3:
Changed tag statistics macros to functions and removed redundant
call to ufsdbg_remove_debugfs().
Other minor changes fixing previous comments.

Dolev Raviv (1):
  scsi: ufs: add ioctl interface for query request

Gilad Broner (1):
  scsi: ufs: add trace events and dump prints for debug

Lee Susman (1):
  scsi: ufs: add debugfs for ufs

Sujit Reddy Thumma (1):
  scsi: ufs: inject errors to verify error handling

 drivers/scsi/ufs/Makefile  |1 +
 drivers/scsi/ufs/ufs-debugfs.c | 1042 
 drivers/scsi/ufs/ufs-debugfs.h |   42 ++
 drivers/scsi/ufs/ufs-qcom.c|   53 ++
 drivers/scsi/ufs/ufs.h |   53 +-
 drivers/scsi/ufs/ufshcd.c  |  965 +++--
 drivers/scsi/ufs/ufshcd.h  |  115 +
 drivers/scsi/ufs/ufshci.h  |3 +
 include/trace/events/ufs.h |  227 +
 include/uapi/scsi/Kbuild   |1 +
 include/uapi/scsi/ufs/Kbuild   |3 +
 include/uapi/scsi/ufs/ioctl.h  |   57 +++
 include/uapi/scsi/ufs/ufs.h|   66 +++
 lib/Kconfig.debug  |   14 +
 14 files changed, 2552 insertions(+), 90 deletions(-)
 create mode 100644 drivers/scsi/ufs/ufs-debugfs.c
 create mode 100644 drivers/scsi/ufs/ufs-debugfs.h
 create mode 100644 include/trace/events/ufs.h
 create mode 100644 include/uapi/scsi/ufs/Kbuild
 create mode 100644 include/uapi/scsi/ufs/ioctl.h
 create mode 100644 include/uapi/scsi/ufs/ufs.h

-- 
Qualcomm Israel, on behalf of Qualcomm Innovation Center, Inc.
The Qualcomm Innovation Center, Inc. is a member of the Code Aurora Forum,
a Linux Foundation Collaborative Project

--
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Please read the FAQ at  http://www.tux.org/lkml/


[RFC PATCH 4/4] arm: add support for memtest

2015-03-02 Thread Vladimir Murzin
Add support for memtest command line option.

Signed-off-by: Vladimir Murzin 
---
 arch/arm/mm/init.c |3 +++
 1 file changed, 3 insertions(+)

diff --git a/arch/arm/mm/init.c b/arch/arm/mm/init.c
index 1609b02..3d0e9ae 100644
--- a/arch/arm/mm/init.c
+++ b/arch/arm/mm/init.c
@@ -335,6 +335,9 @@ void __init bootmem_init(void)
 
find_limits(, _low, _high);
 
+   early_memtest((phys_addr_t)min << PAGE_SHIFT,
+ (phys_addr_t)max_low << PAGE_SHIFT);
+
/*
 * Sparsemem tries to allocate bootmem in memory_present(),
 * so must be done after the fixed reservations
-- 
1.7.9.5


--
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Please read the FAQ at  http://www.tux.org/lkml/


Re: [PATCH 32/35] clockevents: Fix cpu down race for hrtimer based broadcasting

2015-03-02 Thread Peter Zijlstra
On Fri, Feb 27, 2015 at 02:19:05PM +0530, Preeti U Murthy wrote:
> The problem reported in the changelog of this patch is causing severe
> regressions very frequently on our machines for certain usecases. It would
> help to put in a fix in place first and then follow that up with these
> cleanups.  A fix on the below lines :

Regression how? Neither Thomas' Changelog, nor yours mention its a
regression.

If its a (recent) Regression you need to have a Fixes tag at the very
least. So when was this broken and by which patch?
--
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Please read the FAQ at  http://www.tux.org/lkml/


[RFC PATCH 2/4] memtest: use phys_addr_t for physical addresses

2015-03-02 Thread Vladimir Murzin
Since memtest might be used by other architectures pass input parameters
as phys_addr_t instead of long to prevent overflow.
---
 include/linux/memblock.h |4 ++--
 mm/memtest.c |   16 
 2 files changed, 10 insertions(+), 10 deletions(-)

diff --git a/include/linux/memblock.h b/include/linux/memblock.h
index 6724cb0..9497ec7 100644
--- a/include/linux/memblock.h
+++ b/include/linux/memblock.h
@@ -366,9 +366,9 @@ static inline unsigned long 
memblock_region_reserved_end_pfn(const struct memblo
 #endif
 
 #ifdef CONFIG_MEMTEST
-extern void early_memtest(unsigned long start, unsigned long end);
+extern void early_memtest(phys_addr_t start, phys_addr_t end);
 #else
-static inline void early_memtest(unsigned long start, unsigned long end)
+static inline void early_memtest(phys_addr_t start, phys_addr_t end)
 {
 }
 #endif
diff --git a/mm/memtest.c b/mm/memtest.c
index 1e9da79..1997d93 100644
--- a/mm/memtest.c
+++ b/mm/memtest.c
@@ -29,7 +29,7 @@ static u64 patterns[] __initdata = {
0x7a6c7258554e494cULL, /* yeah ;-) */
 };
 
-static void __init reserve_bad_mem(u64 pattern, u64 start_bad, u64 end_bad)
+static void __init reserve_bad_mem(u64 pattern, phys_addr_t start_bad, 
phys_addr_t end_bad)
 {
printk(KERN_INFO "  %016llx bad mem addr %010llx - %010llx reserved\n",
   (unsigned long long) pattern,
@@ -38,11 +38,11 @@ static void __init reserve_bad_mem(u64 pattern, u64 
start_bad, u64 end_bad)
memblock_reserve(start_bad, end_bad - start_bad);
 }
 
-static void __init memtest(u64 pattern, u64 start_phys, u64 size)
+static void __init memtest(u64 pattern, phys_addr_t start_phys, phys_addr_t 
size)
 {
u64 *p, *start, *end;
-   u64 start_bad, last_bad;
-   u64 start_phys_aligned;
+   phys_addr_t start_bad, last_bad;
+   phys_addr_t start_phys_aligned;
const size_t incr = sizeof(pattern);
 
start_phys_aligned = ALIGN(start_phys, incr);
@@ -69,14 +69,14 @@ static void __init memtest(u64 pattern, u64 start_phys, u64 
size)
reserve_bad_mem(pattern, start_bad, last_bad + incr);
 }
 
-static void __init do_one_pass(u64 pattern, u64 start, u64 end)
+static void __init do_one_pass(u64 pattern, phys_addr_t start, phys_addr_t end)
 {
u64 i;
phys_addr_t this_start, this_end;
 
for_each_free_mem_range(i, NUMA_NO_NODE, _start, _end, NULL) {
-   this_start = clamp_t(phys_addr_t, this_start, start, end);
-   this_end = clamp_t(phys_addr_t, this_end, start, end);
+   this_start = clamp(this_start, start, end);
+   this_end = clamp(this_end, start, end);
if (this_start < this_end) {
printk(KERN_INFO "  %010llx - %010llx pattern 
%016llx\n",
   (unsigned long long)this_start,
@@ -102,7 +102,7 @@ static int __init parse_memtest(char *arg)
 
 early_param("memtest", parse_memtest);
 
-void __init early_memtest(unsigned long start, unsigned long end)
+void __init early_memtest(phys_addr_t start, phys_addr_t end)
 {
unsigned int i;
unsigned int idx = 0;
-- 
1.7.9.5


--
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Please read the FAQ at  http://www.tux.org/lkml/


Re: [PATCH v4 2/2] cpusets,isolcpus: add file to show isolated cpus in cpuset

2015-03-02 Thread Mike Galbraith
On Mon, 2015-03-02 at 09:35 -0500, Rik van Riel wrote:
> On 03/02/2015 07:44 AM, Mike Galbraith wrote:
> > On Mon, 2015-03-02 at 10:09 +0100, Peter Zijlstra wrote:
> >> On Thu, Feb 26, 2015 at 12:12:31PM -0500, Rik van Riel wrote:
> >>> Subject: cpusets,isolcpus: add file to show isolated cpus in cpuset
> >>>
> >>> The previous patch makes it so the code skips over isolcpus when
> >>> building scheduler load balancing domains. This makes it hard to
> >>> see for a user which of the CPUs in a cpuset are participating in
> >>> load balancing, and which ones are isolated cpus.
> >>>
> >>> Add a cpuset.isolcpus file with info on which cpus in a cpuset are
> >>> isolated CPUs.
> >>>
> >>> This file is read-only for now. In the future we could extend things
> >>> so isolcpus can be changed at run time, for the root (system wide)
> >>> cpuset only.
> >>>
> >>> Acked-by: David Rientjes 
> >>> Cc: Peter Zijlstra 
> >>> Cc: Clark Williams 
> >>> Cc: Li Zefan 
> >>> Cc: Ingo Molnar 
> >>> Cc: Luiz Capitulino 
> >>> Cc: David Rientjes 
> >>> Cc: Mike Galbraith 
> >>> Cc: cgro...@vger.kernel.org
> >>> Signed-off-by: Rik van Riel 
> >>
> >> So let me start off by saying I hate isolcpus ;-)
> >>
> >> Let me further state that I had hopes we could extend cpusets to
> >> natively provide the functionality isolcpus has, and kill isolcpus.
> > 
> > +1
> > 
> > That's where nohz_full goop belongs too.
> 
> Except nohz_full and isolcpus are very much global attributes of
> each CPU, so I am not sure whether it would make sense to allow
> configuration of this attribute anywhere other than the root
> cpuset.

They're attributes of exclusive sets, which excludes the root set.  It'd
be kinda hard to have the root set be both ticked and tickless :)

-Mike


--
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Please read the FAQ at  http://www.tux.org/lkml/


[RFC PATCH 0/4] make memtest a generic kernel feature

2015-03-02 Thread Vladimir Murzin
Hi,

Memtest is a simple feature which fills the memory with a given set of
patterns and validates memory contents, if bad memory regions is detected it
reserves them via memblock API. Since memblock API is widely used by other
architectures this feature can be enabled outside of x86 world.

This patch set promotes memtest to live under generic mm umbrella and enables
memtest feature for arm/arm64.

Patches are built on top of 4.0-rc1

Vladimir Murzin (4):
  mm: move memtest under /mm
  memtest: use phys_addr_t for physical addresses
  arm64: add support for memtest
  arm: add support for memtest

 arch/arm/mm/init.c  |3 ++
 arch/arm64/mm/init.c|2 +
 arch/x86/Kconfig|   11 
 arch/x86/include/asm/e820.h |8 ---
 arch/x86/mm/Makefile|2 -
 arch/x86/mm/memtest.c   |  118 ---
 include/linux/memblock.h|8 +++
 lib/Kconfig.debug   |   11 
 mm/Makefile |1 +
 mm/memtest.c|  118 +++
 10 files changed, 143 insertions(+), 139 deletions(-)
 delete mode 100644 arch/x86/mm/memtest.c
 create mode 100644 mm/memtest.c

-- 
1.7.9.5


--
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Please read the FAQ at  http://www.tux.org/lkml/


Re: [PATCH 1/8] x86, kaslr: get kaslr_enabled back correctly

2015-03-02 Thread Borislav Petkov
On Mon, Mar 02, 2015 at 03:04:30AM -0800, Yinghai Lu wrote:
> We can not assume that range is safe to use.
> 
> Please check attach one that should fix the problem really.

Well, it seems to work here but it still doesn't look reliable enough to
me. And this addon_zo thing of arbitrary 256K is strange.

What we should do instead and IMHO is have a special setup_data section
located right above .text where struct setup_data things are put in so
that they never get overwritten. This should be the cleanest IMHO.

It is hpa's call in the end of the day anyway.

Thanks.

-- 
Regards/Gruss,
Boris.

ECO tip #101: Trim your mails when you reply.
--
--
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Please read the FAQ at  http://www.tux.org/lkml/


Re: [PATCH] tick/broadcast-hrtimer : Fix suspicious RCU usage in idle loop

2015-03-02 Thread Peter Zijlstra
On Thu, Feb 26, 2015 at 08:52:02AM +0530, Preeti U Murthy wrote:
> The hrtimer mode of broadcast queues hrtimers in the idle entry
> path so as to wakeup cpus in deep idle states. 

Callgraph please...

> hrtimer_{start/cancel}
> functions call into tracing which uses RCU. But it is not legal to call
> into RCU in cpuidle because it is one of the quiescent states. Hence
> protect this region with RCU_NONIDLE which informs RCU that the cpu
> is momentarily non-idle.

It it not clear to me that every user of bc_set_next() is from IDLE.
>From what I can tell it ends up being clockevents_program_event() and
that is called quite a lot.

Why is bc_set_next() a good function to annotate?
--
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Please read the FAQ at  http://www.tux.org/lkml/


Re: [PATCH v2 00/15] x86, alternatives: Instruction padding and more robust JMPs

2015-03-02 Thread Hitoshi Mitake

Hi Borislav,

At Thu, 26 Feb 2015 19:13:38 +0100,
Borislav Petkov wrote:
> 
> Hi all,
> 
> So this alternatives patchset breaks perf bench mem, here are a couple
> of patches ontop, you guys tell me whether it makes sense. I wanted to
> make it run all memset/memcpy routines so here are a couple of patches
> which do this:
> 
> ./perf bench mem memset -l 20MB -r all
> # Running 'mem/memset' benchmark:
> Routine default (Default memset() provided by glibc)
> # Copying 20MB Bytes ...
> 
>1.136000 GB/Sec
>6.026304 GB/Sec (with prefault)
> Routine x86-64-unrolled (unrolled memset() in arch/x86/lib/memset_64.S)
> # Copying 20MB Bytes ...
> 
>5.333493 GB/Sec
>5.633473 GB/Sec (with prefault)
> Routine x86-64-stosq (movsq-based memset() in arch/x86/lib/memset_64.S)
> # Copying 20MB Bytes ...
> 
>5.828484 GB/Sec
>5.851183 GB/Sec (with prefault)
> Routine x86-64-stosb (movsb-based memset() in arch/x86/lib/memset_64.S)
> # Copying 20MB Bytes ...
> 
>5.553384 GB/Sec
>5.956465 GB/Sec (with prefault)
> 
> This way you can see all results by executing one command only with "-r
> all".
> 
> Patches coming as a reply to this message.

I'm not sure I'm a suitable person for reviewing your patch, but I
tested this patchset for perf bench with your latest (v2) patchset for
x86 alternatives. It looks good to me.
Reviewed-by: Hitoshi Mitake 

Thanks,
Hitoshi
--
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Please read the FAQ at  http://www.tux.org/lkml/


Re: [PATCH v4 2/2] cpusets,isolcpus: add file to show isolated cpus in cpuset

2015-03-02 Thread Rik van Riel
On 03/02/2015 07:44 AM, Mike Galbraith wrote:
> On Mon, 2015-03-02 at 10:09 +0100, Peter Zijlstra wrote:
>> On Thu, Feb 26, 2015 at 12:12:31PM -0500, Rik van Riel wrote:
>>> Subject: cpusets,isolcpus: add file to show isolated cpus in cpuset
>>>
>>> The previous patch makes it so the code skips over isolcpus when
>>> building scheduler load balancing domains. This makes it hard to
>>> see for a user which of the CPUs in a cpuset are participating in
>>> load balancing, and which ones are isolated cpus.
>>>
>>> Add a cpuset.isolcpus file with info on which cpus in a cpuset are
>>> isolated CPUs.
>>>
>>> This file is read-only for now. In the future we could extend things
>>> so isolcpus can be changed at run time, for the root (system wide)
>>> cpuset only.
>>>
>>> Acked-by: David Rientjes 
>>> Cc: Peter Zijlstra 
>>> Cc: Clark Williams 
>>> Cc: Li Zefan 
>>> Cc: Ingo Molnar 
>>> Cc: Luiz Capitulino 
>>> Cc: David Rientjes 
>>> Cc: Mike Galbraith 
>>> Cc: cgro...@vger.kernel.org
>>> Signed-off-by: Rik van Riel 
>>
>> So let me start off by saying I hate isolcpus ;-)
>>
>> Let me further state that I had hopes we could extend cpusets to
>> natively provide the functionality isolcpus has, and kill isolcpus.
> 
> +1
> 
> That's where nohz_full goop belongs too.

Except nohz_full and isolcpus are very much global attributes of
each CPU, so I am not sure whether it would make sense to allow
configuration of this attribute anywhere other than the root
cpuset.

-- 
All rights reversed
--
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Please read the FAQ at  http://www.tux.org/lkml/


[RFC PATCH v4 33/34] ftrace: enable make ftrace nop before ftrace_init().

2015-03-02 Thread Wang Nan
This patch is for early kprobes, enables early kprobes to convert
target instruction into nop so it is possible to optimize them.

Signed-off-by: Wang Nan 
---
 include/linux/ftrace.h |  5 +
 kernel/trace/ftrace.c  | 18 ++
 2 files changed, 23 insertions(+)

diff --git a/include/linux/ftrace.h b/include/linux/ftrace.h
index fe99166..9cb6061a 100644
--- a/include/linux/ftrace.h
+++ b/include/linux/ftrace.h
@@ -703,9 +703,14 @@ static inline void __ftrace_enabled_restore(int enabled)
 #ifdef CONFIG_FTRACE_MCOUNT_RECORD
 extern void ftrace_init(void);
 extern void ftrace_init_early(void);
+extern int ftrace_process_loc_early(unsigned long ip);
 #else
 static inline void ftrace_init(void) { }
 static inline void ftrace_init_early(void) { }
+static inline int ftrace_process_loc_early(unsigned long __unused)
+{
+   return 0;
+}
 #endif
 
 /*
diff --git a/kernel/trace/ftrace.c b/kernel/trace/ftrace.c
index 78787d4..bb66b20 100644
--- a/kernel/trace/ftrace.c
+++ b/kernel/trace/ftrace.c
@@ -5046,6 +5046,24 @@ void __init ftrace_init_early(void)
ftrace_sort_mcount_area(__start_mcount_loc, __stop_mcount_loc);
 }
 
+int __init ftrace_process_loc_early(unsigned long addr)
+{
+   unsigned long ip;
+   struct dyn_ftrace fake_rec;
+   int ret;
+
+   BUG_ON(ftrace_pages_start);
+
+   ip = ftrace_location(addr);
+   if (ip != addr)
+   return -EINVAL;
+
+   memset(_rec, '\0', sizeof(fake_rec));
+   fake_rec.ip = ip;
+   ret = ftrace_make_nop(NULL, _rec, MCOUNT_ADDR);
+   return ret;
+}
+
 /* Do nothing if arch does not support this */
 void __weak arch_ftrace_update_trampoline(struct ftrace_ops *ops)
 {
-- 
1.8.4

--
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Please read the FAQ at  http://www.tux.org/lkml/


[RFC v2 3/4] locks: Split insert/delete block functions into flock/posix parts

2015-03-02 Thread Daniel Wagner
The locks_insert/delete_block() functions are used for flock, posix
and leases types. blocked_lock_lock is used to serialize all access to
fl_link, fl_block, fl_next and blocked_hash. Here, we prepare the
stage for using blocked_lock_lock to protect blocked_hash.

Signed-off-by: Daniel Wagner 
Cc: Jeff Layton 
Cc: "J. Bruce Fields" 
Cc: Alexander Viro 
---
 fs/locks.c | 48 
 1 file changed, 40 insertions(+), 8 deletions(-)

diff --git a/fs/locks.c b/fs/locks.c
index 4498da0..02821dd 100644
--- a/fs/locks.c
+++ b/fs/locks.c
@@ -611,11 +611,20 @@ static void locks_delete_global_blocked(struct file_lock 
*waiter)
  */
 static void __locks_delete_block(struct file_lock *waiter)
 {
-   locks_delete_global_blocked(waiter);
list_del_init(>fl_block);
waiter->fl_next = NULL;
 }
 
+/* Posix block variant of __locks_delete_block.
+ *
+ * Must be called with blocked_lock_lock held.
+ */
+static void __locks_delete_posix_block(struct file_lock *waiter)
+{
+   locks_delete_global_blocked(waiter);
+   __locks_delete_block(waiter);
+}
+
 static void locks_delete_block(struct file_lock *waiter)
 {
spin_lock(_lock_lock);
@@ -623,6 +632,13 @@ static void locks_delete_block(struct file_lock *waiter)
spin_unlock(_lock_lock);
 }
 
+static void locks_delete_posix_block(struct file_lock *waiter)
+{
+   spin_lock(_lock_lock);
+   __locks_delete_posix_block(waiter);
+   spin_unlock(_lock_lock);
+}
+
 /* Insert waiter into blocker's block list.
  * We use a circular list so that processes can be easily woken up in
  * the order they blocked. The documentation doesn't require this but
@@ -639,7 +655,17 @@ static void __locks_insert_block(struct file_lock *blocker,
BUG_ON(!list_empty(>fl_block));
waiter->fl_next = blocker;
list_add_tail(>fl_block, >fl_block);
-   if (IS_POSIX(blocker) && !IS_OFDLCK(blocker))
+}
+
+/* Posix block variant of __locks_insert_block.
+ *
+ * Must be called with flc_lock and blocked_lock_lock held.
+ */
+static void __locks_insert_posix_block(struct file_lock *blocker,
+   struct file_lock *waiter)
+{
+   __locks_insert_block(blocker, waiter);
+   if (!IS_OFDLCK(blocker))
locks_insert_global_blocked(waiter);
 }
 
@@ -675,7 +701,10 @@ static void locks_wake_up_blocks(struct file_lock *blocker)
 
waiter = list_first_entry(>fl_block,
struct file_lock, fl_block);
-   __locks_delete_block(waiter);
+   if (IS_POSIX(blocker))
+   __locks_delete_posix_block(waiter);
+   else
+   __locks_delete_block(waiter);
if (waiter->fl_lmops && waiter->fl_lmops->lm_notify)
waiter->fl_lmops->lm_notify(waiter);
else
@@ -985,7 +1014,7 @@ static int __posix_lock_file(struct inode *inode, struct 
file_lock *request, str
spin_lock(_lock_lock);
if (likely(!posix_locks_deadlock(request, fl))) {
error = FILE_LOCK_DEFERRED;
-   __locks_insert_block(fl, request);
+   __locks_insert_posix_block(fl, request);
}
spin_unlock(_lock_lock);
goto out;
@@ -1186,7 +1215,7 @@ int posix_lock_file_wait(struct file *filp, struct 
file_lock *fl)
if (!error)
continue;
 
-   locks_delete_block(fl);
+   locks_delete_posix_block(fl);
break;
}
return error;
@@ -1283,7 +1312,7 @@ int locks_mandatory_area(int read_write, struct inode 
*inode,
continue;
}
 
-   locks_delete_block();
+   locks_delete_posix_block();
break;
}
 
@@ -2103,7 +2132,10 @@ static int do_lock_file_wait(struct file *filp, unsigned 
int cmd,
if (!error)
continue;
 
-   locks_delete_block(fl);
+   if (IS_POSIX(fl))
+   locks_delete_posix_block(fl);
+   else
+   locks_delete_block(fl);
break;
}
 
@@ -2467,7 +2499,7 @@ posix_unblock_lock(struct file_lock *waiter)
 
spin_lock(_lock_lock);
if (waiter->fl_next)
-   __locks_delete_block(waiter);
+   __locks_delete_posix_block(waiter);
else
status = -ENOENT;
spin_unlock(_lock_lock);
-- 
2.1.0

--
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Please read the FAQ at  http://www.tux.org/lkml/


Re: [PATCH 3/3] gpio: pxa: add PXA1928 gpio type support

2015-03-02 Thread Linus Walleij
On Tue, Feb 3, 2015 at 2:44 PM, Rob Herring  wrote:
> On Tue, Feb 3, 2015 at 6:41 AM, Linus Walleij  
> wrote:
>> On Tue, Jan 27, 2015 at 5:46 AM, Rob Herring  wrote:
>>
>>> Add support for PXA1928 GPIOs. The PXA1928 adds a 6th bank from previous
>>> generations.
>>>
>>> Signed-off-by: Jing Xiang 
>>> Signed-off-by: Xiangzhan Meng 
>>> [robh: ported to 3.19 from vendor kernel]
>>> Signed-off-by: Rob Herring 
>>> Cc: Linus Walleij 
>>> Cc: Alexandre Courbot 
>>
>> Patch applied...
>>
>>> -#define BANK_OFF(n)(((n) < 3) ? (n) << 2 : 0x100 + (((n) - 3) << 2))
>>> +#define BANK_OFF(n)(((n) < 3) ? (n) << 2 : ((n) > 5 ? 0x200 : 0x100)   
>>> \
>>> +   + (((n) % 3) << 2))
>>
>> While this is a bit convoluted.
>>
>> Someone care to send a patch converting it to something like a
>> parseable static inline?
>
> I should have looked more closely than just taking the vendor code.
> This was needlessly convoluted before and this just added on to it. It
> can be simplified down to this:
>
> #define BANK_OFF(n) (((n) / 3) << 8) + (((n) % 3) << 2)
>
> I'll send a fix unless you want to fix up this patch.

I never saw a fixup patch, so if you have time... please tend to it.

Yours,
Linus Walleij
--
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Please read the FAQ at  http://www.tux.org/lkml/


Re: [PATCH v2 1/2] x86: mce: kexec: turn off MCE in kexec

2015-03-02 Thread Naoya Horiguchi

On Mon, Mar 02, 2015 at 01:17:01PM +0100, Borislav Petkov wrote:

On Mon, Mar 02, 2015 at 02:31:19AM +, Naoya Horiguchi wrote:
> And please note that the target of this patch is an MCE when the kernel is
> already running on kdump code (so crashing happened *not* because of the MCE).
> In that case, we can expect that kdump works fine if the MCE hits the "kdump
> shotdown" CPU which are just running cpu_relax() loop, because a 2nd kernel's
> CPU isn't affected by the MCE (even the CPU failure is fatal one.)

Well, why would you even want to disable MCA then? If all the CPUs are
offlined, it is very very highly unlikely they'd cause an MCE.


Yes, CPU offlining is one option to keep other CPUs quiet. I'm not sure why
current kexec implementation doesn't offline the other CPUs but just doing
cpu_relax() loop, but my guess is that in some kernel panic situation (like
soft lockup) we want to keep CPUs' status undisturbed to make sure the bug's
info is captured in kdump.


> If a fatal MCE happens on the CPU running kdump code, there's no reason to
> try harder to get kdump as you pointed out. In such case, what we can do is
> to print out a message like "kdump failed due to MCE" and reset the system.

Yes, so a primitive kdump-specific MCE handler would be more viable than
disabling MCA.


OK.

Thanks,
Naoya Horiguchi
--
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Please read the FAQ at  http://www.tux.org/lkml/


[RFC PATCH v4 21/34] ftrace: sort ftrace entries earlier.

2015-03-02 Thread Wang Nan
By extracting mcount sorting code and sort them earliler, futher patches
will be able to determine whether an address is on an ftrace entry or
not using bsearch().

ftrace_sort_mcount_area() will be called before, during and after
ftrace_init (when module insertion). Ensure it sort kernel mcount table
only once.

Signed-off-by: Wang Nan 
---
 include/linux/ftrace.h |  2 ++
 init/main.c|  1 +
 kernel/trace/ftrace.c  | 38 --
 3 files changed, 39 insertions(+), 2 deletions(-)

diff --git a/include/linux/ftrace.h b/include/linux/ftrace.h
index 1da6029..8db315a 100644
--- a/include/linux/ftrace.h
+++ b/include/linux/ftrace.h
@@ -701,8 +701,10 @@ static inline void __ftrace_enabled_restore(int enabled)
 
 #ifdef CONFIG_FTRACE_MCOUNT_RECORD
 extern void ftrace_init(void);
+extern void ftrace_init_early(void);
 #else
 static inline void ftrace_init(void) { }
+static inline void ftrace_init_early(void) { }
 #endif
 
 /*
diff --git a/init/main.c b/init/main.c
index 679d49e..6d269ac 100644
--- a/init/main.c
+++ b/init/main.c
@@ -518,6 +518,7 @@ asmlinkage __visible void __init start_kernel(void)
boot_cpu_init();
page_address_init();
pr_notice("%s", linux_banner);
+   ftrace_init_early();
setup_arch(_line);
init_kprobes_early();
mm_init_cpumask(_mm);
diff --git a/kernel/trace/ftrace.c b/kernel/trace/ftrace.c
index 6c6cbb1..fa3cdd3 100644
--- a/kernel/trace/ftrace.c
+++ b/kernel/trace/ftrace.c
@@ -112,6 +112,7 @@ ftrace_func_t ftrace_trace_function __read_mostly = 
ftrace_stub;
 ftrace_func_t ftrace_pid_function __read_mostly = ftrace_stub;
 static struct ftrace_ops global_ops;
 static struct ftrace_ops control_ops;
+static bool kernel_mcount_sorted = false;
 
 static void ftrace_ops_recurs_func(unsigned long ip, unsigned long parent_ip,
   struct ftrace_ops *op, struct pt_regs *regs);
@@ -4743,6 +4744,32 @@ static void ftrace_swap_ips(void *a, void *b, int size)
*ipb = t;
 }
 
+static void ftrace_sort_mcount_area(unsigned long *start, unsigned long *end)
+{
+   extern unsigned long __start_mcount_loc[];
+   extern unsigned long __stop_mcount_loc[];
+
+   unsigned long count;
+   bool is_kernel_mcount;
+
+   count = end - start;
+   if (!count)
+   return;
+
+   is_kernel_mcount =
+   (start == __start_mcount_loc) &&
+   (end == __stop_mcount_loc);
+
+   if (is_kernel_mcount && kernel_mcount_sorted)
+   return;
+
+   sort(start, count, sizeof(*start),
+   ftrace_cmp_ips, ftrace_swap_ips);
+
+   if (is_kernel_mcount)
+   kernel_mcount_sorted = true;
+}
+
 static int ftrace_process_locs(struct module *mod,
   unsigned long *start,
   unsigned long *end)
@@ -4761,8 +4788,7 @@ static int ftrace_process_locs(struct module *mod,
if (!count)
return 0;
 
-   sort(start, count, sizeof(*start),
-ftrace_cmp_ips, ftrace_swap_ips);
+   ftrace_sort_mcount_area(start, end);
 
start_pg = ftrace_allocate_pages(count);
if (!start_pg)
@@ -4965,6 +4991,14 @@ void __init ftrace_init(void)
ftrace_disabled = 1;
 }
 
+void __init ftrace_init_early(void)
+{
+   extern unsigned long __start_mcount_loc[];
+   extern unsigned long __stop_mcount_loc[];
+
+   ftrace_sort_mcount_area(__start_mcount_loc, __stop_mcount_loc);
+}
+
 /* Do nothing if arch does not support this */
 void __weak arch_ftrace_update_trampoline(struct ftrace_ops *ops)
 {
-- 
1.8.4

--
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Please read the FAQ at  http://www.tux.org/lkml/


[RFC PATCH v4 23/34] ftrace: notify kprobe when ftrace is initialized.

2015-03-02 Thread Wang Nan
Makes ftrace calls init_kprobes_on_ftrace() when ftrace_init()
finished. Before this call, marks kprobes on ftrace with
'KPROBE_FLAG_FTRACE_EARLY' instead of 'KPROBE_FLAG_FTRACE' to make
kprobe not to kprobe treats these kprobes as ftrace kprobes.

Following patches should convert such kprobes into kprobes on ftrace.

Signed-off-by: Wang Nan 
---
 include/linux/kprobes.h | 11 +++
 kernel/kprobes.c| 17 -
 kernel/trace/ftrace.c   |  2 ++
 3 files changed, 29 insertions(+), 1 deletion(-)

diff --git a/include/linux/kprobes.h b/include/linux/kprobes.h
index bb2b2c6..96dc842 100644
--- a/include/linux/kprobes.h
+++ b/include/linux/kprobes.h
@@ -130,6 +130,8 @@ struct kprobe {
   * this flag is only for optimized_kprobe.
   */
 #define KPROBE_FLAG_FTRACE 8 /* probe is using ftrace */
+/* probe will use ftrace, but ftrace is not ready when registering */
+#define KPROBE_FLAG_FTRACE_EARLY   16
 
 /* Has this kprobe gone ? */
 static inline int kprobe_gone(struct kprobe *p)
@@ -269,6 +271,14 @@ extern void show_registers(struct pt_regs *regs);
 extern void kprobes_inc_nmissed_count(struct kprobe *p);
 extern bool arch_within_kprobe_blacklist(unsigned long addr);
 
+#if defined(CONFIG_EARLY_KPROBES) && defined(CONFIG_KPROBES_ON_FTRACE)
+extern void init_kprobes_on_ftrace(void);
+#else
+static inline void init_kprobes_on_ftrace(void)
+{
+}
+#endif // CONFIG_EARLY_KPROBES && CONFIG_KPROBES_ON_FTRACE
+
 #ifdef CONFIG_EARLY_KPROBES
 
 #define NR_EARLY_KPROBES_SLOTS CONFIG_NR_EARLY_KPROBES_SLOTS
@@ -453,6 +463,7 @@ extern int proc_kprobes_optimization_handler(struct 
ctl_table *table,
 extern void kprobe_ftrace_handler(unsigned long ip, unsigned long parent_ip,
  struct ftrace_ops *ops, struct pt_regs *regs);
 extern int arch_prepare_kprobe_ftrace(struct kprobe *p);
+
 #endif
 
 int arch_check_ftrace_location(struct kprobe *p);
diff --git a/kernel/kprobes.c b/kernel/kprobes.c
index 4b7b20a..b5e13ba 100644
--- a/kernel/kprobes.c
+++ b/kernel/kprobes.c
@@ -69,6 +69,11 @@
 
 static int kprobes_initialized;
 static int kprobes_blacklist_initialized;
+#if defined(CONFIG_KPROBES_ON_FTRACE) && defined(CONFIG_EARLY_KPROBES)
+static bool kprobes_on_ftrace_initialized __read_mostly = false;
+#else
+# define kprobes_on_ftrace_initialized false
+#endif
 
 bool kprobes_is_early(void)
 {
@@ -1497,7 +1502,10 @@ int __weak arch_check_ftrace_location(struct kprobe *p)
/* Given address is not on the instruction boundary */
if ((unsigned long)p->addr != ftrace_addr)
return -EILSEQ;
-   p->flags |= KPROBE_FLAG_FTRACE;
+   if (unlikely(!kprobes_on_ftrace_initialized))
+   p->flags |= KPROBE_FLAG_FTRACE_EARLY;
+   else
+   p->flags |= KPROBE_FLAG_FTRACE;
 #else  /* !CONFIG_KPROBES_ON_FTRACE */
return -EINVAL;
 #endif
@@ -2574,3 +2582,10 @@ module_init(init_kprobes);
 
 /* defined in arch/.../kernel/kprobes.c */
 EXPORT_SYMBOL_GPL(jprobe_return);
+
+#if defined(CONFIG_KPROBES_ON_FTRACE) && defined(CONFIG_EARLY_KPROBES)
+void init_kprobes_on_ftrace(void)
+{
+   kprobes_on_ftrace_initialized = true;
+}
+#endif
diff --git a/kernel/trace/ftrace.c b/kernel/trace/ftrace.c
index 7fa88d0..5cb0269 100644
--- a/kernel/trace/ftrace.c
+++ b/kernel/trace/ftrace.c
@@ -23,6 +23,7 @@
 #include 
 #include 
 #include 
+#include 
 #include 
 #include 
 #include 
@@ -5022,6 +5023,7 @@ void __init ftrace_init(void)
if (ret)
pr_warning("Failed to register trace ftrace module exit 
notifier\n");
 
+   init_kprobes_on_ftrace();
set_ftrace_early_filters();
 
return;
-- 
1.8.4

--
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Please read the FAQ at  http://www.tux.org/lkml/


Re: [PATCH v3 3/3] pinctrl: qcom: Add msm8916 pinctrl driver

2015-03-02 Thread Linus Walleij
On Wed, Feb 4, 2015 at 3:39 PM, Stanimir Varbanov  wrote:
> On 02/03/2015 06:47 PM, Andy Gross wrote:
>> On Fri, Jan 30, 2015 at 12:04:01PM +0200, Stanimir Varbanov wrote:
>>> From: Joonwoo Park 
>>>
>>> Add initial pinctrl driver to support pin configuration with
>>> pinctrl framework for msm8916.
>>>
>>> Signed-off-by: Joonwoo Park 
>>> Signed-off-by: Stanimir Varbanov 
>>> Reviewed-by: Bjorn Andersson 
>>
>> Looks good.
>>
>> Reviewed-by: Andy Gross 
>>
>
> Linus, do you want me to resend the patchset including this Reviewed-by tag?

1/3 is already merged upstream.

2/3 has an unaddressed review comment. Please fix that and resent.

So this 3/3 can be resent as well, with the Review tag.

But it's down to just two patches now atleast.

Yours,
Linus Walleij
--
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Please read the FAQ at  http://www.tux.org/lkml/


[RFC PATCH v4 00/34] Early kprobe: enable kprobes at very early booting stage.

2015-03-02 Thread Wang Nan
This is version 4 of early kprobes. The original idea and the previous
version can be found from [1] and [2].

The aim of early kprobe is to provide a method to enable kprobe as early
as possible to allow users to debug booting stage.

The user interface and data collection are still very weak in this
series. However, this series is different from previous version at many
places so I think it is worth to post at this time. The main changes
including:

 a) Don't rely on OPTPROBE. This series enable breakpoint based kprobe
rely on early_trap_init(). Early kprobes now usable after
setup_arch(). Later than previous version, but enough for debugging.

 b) Makes ftrace and early kprobe coupled with each other based on
suggection from Steven Rostedt, remove the notification chain.

Patch 1-3 are some bugfix for x86. Patch 1 should has been already
merged into -tip tree. Without patch 1 early_trap_init() of x86 doesn't
work correctly.

Patch 4-17 are basic early kprobes support. Main part of it is
statically allocation support.

Patch 18 only enables early kprobes for ARM.

Patch 19-30 deal with copuling of kprobe and ftrace. Ftrace notifies
kprobe about its initialization and instruction conversion. After ftrace
fully initialized, kprobe use arm_kprobe_ftrace() reinstall all
ftrace-based kprobes, directly convert kprobe into ftrace.

Patch 31 enables early kprobes for X86.

Patch 32 shows a rough kernel cmdline support. The usage is similar to
my V1 patch. I'd like to drop it and design a new one so let it
unchanged.

Patch 33-34 convert ftrace entries into NOP at early stage, enable
kprobe to optimize them.

[1] https://lkml.org/lkml/2015/1/7/76

[2] https://lkml.org/lkml/2015/2/13/24

Wang Nan (34):
  x86, traps: Enable DEBUG_STACK after cpu_init() for TRAP_DB/BP.
  x86, traps: separate set_intr_gate() and cleanup early_trap_init().
  x86, traps: install gates using IST after cpu_init().
  early kprobes: within_kprobe_blacklist_early() early.
  early kprobes: introduce kprobe_is_early for futher early kprobe use.
  early kprobes: enable kprobe smoke test for early kprobes.
  early kprobes: init kprobes at very early stage.
  early kprobes: ARM: add definition for vmlinux.lds use.
  early kprobes: x86: add definition for vmlinux.lds use.
  early kprobes: introduce early kprobes related code area.
  early kprobes: introduces macros for allocing early kprobe resources.
  early kprobes: allows __alloc_insn_slot() from early kprobes slots.
  early kprobes: alloc optimized kprobe before memory system is ready.
  early kprobes: use stop_machine() based x86 optimizer.
  early kprobes: use stop_machine() based optimization method for early
kprobes.
  early kprobes: perhibit probing at early kprobe reserved area.
  early kprobes: run kprobes smoke test for early kprobes.
  early kprobes: add CONFIG_EARLY_KPROBES option.
  ftrace: don't update record flags if code modification fail.
  ftrace/x86: Ensure rec->flags no change when failure occures.
  ftrace: sort ftrace entries earlier.
  ftrace: allow search ftrace addr before ftrace fully inited.
  ftrace: notify kprobe when ftrace is initialized.
  early kprobes on ftrace: introduce x86 arch_fix_ftrace_early_kprobe().
  ftrace: don't fire ftrace_bug if the instruction is taken by early
kprobes.
  early kprobes on ftrace: x86: arch code for retrieving kprobed
instruction.
  early kprobes on ftrace: kprobe_on_ftrace_get_old_insn()
  ftrace: x86: get old instruction from early kprobes when make call.
  ftrace: x86: call kprobe_int3_handler() in ftrace int3 handler.
  early kprobes: convert early kprobes on ftrace to ftrace.
  early kprobes: enable early kprobes for x86.
  early kprobes: enable 'ekprobe=' cmdline option for early kprobes.
  ftrace: enable make ftrace nop before ftrace_init().
  early kprobes: enable optimization of kprobes on ftrace before ftrace
is ready.

 arch/Kconfig  |  19 ++
 arch/arm/Kconfig  |   1 +
 arch/arm/kernel/vmlinux.lds.S |  10 +
 arch/x86/Kconfig  |   1 +
 arch/x86/include/asm/desc.h   |   7 +-
 arch/x86/kernel/ftrace.c  |  46 -
 arch/x86/kernel/kprobes/core.c|  56 ++
 arch/x86/kernel/kprobes/opt.c |  45 -
 arch/x86/kernel/traps.c   |  39 +++-
 arch/x86/kernel/vmlinux.lds.S |  10 +
 include/asm-generic/vmlinux.lds.h |  19 +-
 include/linux/ftrace.h|  12 +-
 include/linux/kprobes.h   | 167 +
 init/main.c   |   3 +
 kernel/kprobes.c  | 375 --
 kernel/test_kprobes.c |  58 --
 kernel/trace/ftrace.c | 130 +++--
 17 files changed, 932 insertions(+), 66 deletions(-)

-- 
1.8.4

--
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Please read 

Re: [Xen-devel] [PATCH] xen, apic: Setup our own APIC driver and validator for APIC IDs.

2015-03-02 Thread Konrad Rzeszutek Wilk
On Mon, Mar 02, 2015 at 11:24:04AM +, David Vrabel wrote:
> On 27/02/15 21:14, Konrad Rzeszutek Wilk wrote:
> > Via CPUID masking and the different apic-> overrides we
> > effectively make PV guests only but with the default APIC
> > driver. That is OK as an PV guest should never access any
> > APIC registers. However, the APIC is also used to limit the
> > amount of CPUs if the APIC IDs are incorrect - and since we
> > mask the x2APIC from the CPUID - any APIC IDs above 0xFF
> > are deemed incorrect by the default APIC routines.
> > 
> > As such add a new routine to check for APIC ID which will
> > be only used if the CPUID (native one) tells us the system
> > is using x2APIC.
> 
> I was applying this but it breaks the build.

Could you send me your .config please.
> 
> arch/x86/built-in.o:(.data+0x2a28): undefined reference to
> `xen_send_IPI_mask'
> arch/x86/built-in.o:(.data+0x2a30): undefined reference to
> `xen_send_IPI_mask_allbutself'
> arch/x86/built-in.o:(.data+0x2a38): undefined reference to
> `xen_send_IPI_allbutself'
> arch/x86/built-in.o:(.data+0x2a40): undefined reference to
> `xen_send_IPI_all'
> arch/x86/built-in.o:(.data+0x2a48): undefined reference to
> `xen_send_IPI_self'
> 
> There are some minor things that I was going to fix up (see below).
> 
> I also found the commit message a bit garbled so rewrote it to:
> 
> x86/xen: Provide a "Xen PV" APIC driver to support >255 VCPUs
> 
> Instead of mangling the default APIC driver, provide a Xen PV guest
> specific one that explicitly provides appropriate methods.
> 
> This allows use to report that all APIC IDs are valid, allowing dom0
> to boot with more than 255 VCPUs.
> 
> Since the probe order of APIC drivers is link dependent, we add in an
> late probe function to change to the Xen PV if it hadn't been done
> during bootup.
> 
> 
> > +static u32 xen_safe_apic_wait_icr_idle(void)
> > +{
> > +return 0;
> > +}
> > +
> > +
> 
> Extra blank line.
> 
> > +static int probe_xen(void)
> 
> xen_apic_probe_pv
> 
> > +static struct apic xen_apic = {
> 
> static struct apic xen_pv_apic
> 
> > +void __init xen_apic_check(void)
> 
> static
> 
> David
--
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Please read the FAQ at  http://www.tux.org/lkml/


[RFC PATCH v4 02/34] x86, traps: separate set_intr_gate() and cleanup early_trap_init().

2015-03-02 Thread Wang Nan
As early_trap_init() doesn't use IST, replace set_intr_gate_ist() and
set_system_intr_gate_ist() with their standard counterparts.

set_intr_gate() requires a trace_debug symbol which we don't have and
won't use. This patch seprates set_intr_gate() into 2 parts, and uses
base version in early_trap_init().

Signed-off-by: Wang Nan 
---
 arch/x86/include/asm/desc.h |  7 ++-
 arch/x86/kernel/traps.c | 20 
 2 files changed, 18 insertions(+), 9 deletions(-)

diff --git a/arch/x86/include/asm/desc.h b/arch/x86/include/asm/desc.h
index a94b82e..a0bf89f 100644
--- a/arch/x86/include/asm/desc.h
+++ b/arch/x86/include/asm/desc.h
@@ -376,11 +376,16 @@ static inline void _set_gate(int gate, unsigned type, 
void *addr,
  * Pentium F0 0F bugfix can have resulted in the mapped
  * IDT being write-protected.
  */
-#define set_intr_gate(n, addr) \
+#define set_intr_gate_notrace(n, addr) \
do {\
BUG_ON((unsigned)n > 0xFF); \
_set_gate(n, GATE_INTERRUPT, (void *)addr, 0, 0,\
  __KERNEL_CS); \
+   } while (0)
+
+#define set_intr_gate(n, addr) \
+   do {\
+   set_intr_gate_notrace(n, addr); \
_trace_set_gate(n, GATE_INTERRUPT, (void *)trace_##addr,\
0, 0, __KERNEL_CS); \
} while (0)
diff --git a/arch/x86/kernel/traps.c b/arch/x86/kernel/traps.c
index 4281988..9965bd1 100644
--- a/arch/x86/kernel/traps.c
+++ b/arch/x86/kernel/traps.c
@@ -926,16 +926,20 @@ dotraplinkage void do_iret_error(struct pt_regs *regs, 
long error_code)
 void __init early_trap_init(void)
 {
/*
-* Don't set ist to DEBUG_STACK as it doesn't work until TSS is
-* ready in cpu_init() <-- trap_init(). Before trap_init(), CPU
-* runs at ring 0 so it is impossible to hit an invalid stack.
-* Using the original stack works well enough at this early
-* stage. DEBUG_STACK will be equipped after cpu_init() in
+* Don't use IST to set DEBUG_STACK as it doesn't work until TSS
+* is ready in cpu_init() <-- trap_init(). Before trap_init(),
+* CPU runs at ring 0 so it is impossible to hit an invalid
+* stack.  Using the original stack works well enough at this
+* early stage. DEBUG_STACK will be equipped after cpu_init() in
 * trap_init().
+*
+* We don't need to set trace_idt_table like set_intr_gate(),
+* since we don't have trace_debug and it will be reset to
+* 'debug' in trap_init() by set_intr_gate_ist().
 */
-   set_intr_gate_ist(X86_TRAP_DB, , 0);
+   set_intr_gate_notrace(X86_TRAP_DB, debug);
/* int3 can be called from all */
-   set_system_intr_gate_ist(X86_TRAP_BP, , 0);
+   set_system_intr_gate(X86_TRAP_BP, );
 #ifdef CONFIG_X86_32
set_intr_gate(X86_TRAP_PF, page_fault);
 #endif
@@ -1015,7 +1019,7 @@ void __init trap_init(void)
 
/*
 * X86_TRAP_DB and X86_TRAP_BP have been set
-* in early_trap_init(). However, DEBUG_STACK works only after
+* in early_trap_init(). However, ITS works only after
 * cpu_init() loads TSS. See comments in early_trap_init().
 */
set_intr_gate_ist(X86_TRAP_DB, , DEBUG_STACK);
-- 
1.8.4

--
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Please read the FAQ at  http://www.tux.org/lkml/


[RFC PATCH v4 32/34] early kprobes: enable 'ekprobe=' cmdline option for early kprobes.

2015-03-02 Thread Wang Nan
This patch shows a very rough usage of arly kprobes. By adding
kernel cmdline options such as 'ekprobe=__alloc_pages_nodemask' or
'ekprobe=0xc00f3c2c', early kprobes are installed. When the probed
instructions get hit, a message is printed.

This patch is only a sample. I'll drop it in future.

Signed-off-by: Wang Nan 
---
 kernel/kprobes.c | 84 
 1 file changed, 84 insertions(+)

diff --git a/kernel/kprobes.c b/kernel/kprobes.c
index 95754f6..56fb8c8 100644
--- a/kernel/kprobes.c
+++ b/kernel/kprobes.c
@@ -2224,9 +2224,12 @@ static struct notifier_block kprobe_module_nb = {
.priority = 0
 };
 
+static LIST_HEAD(cmdline_early_kprobes_list);
+
 void init_kprobes_early(void)
 {
int i, err = 0;
+   struct kprobe *early_kp, *tmp;
 
/* FIXME allocate the probe table, currently defined statically */
/* initialize all list heads */
@@ -2270,6 +2273,16 @@ void init_kprobes_early(void)
if (!err)
init_test_probes();
 #endif
+
+   list_for_each_entry_safe(early_kp, tmp, _early_kprobes_list, 
list) {
+   int ret;
+
+   list_del(_kp->list);
+   INIT_LIST_HEAD(_kp->list);
+   ret = register_kprobe(early_kp);
+   printk("early kprobe: register early kprobe at %p: result = 
%d\n",
+   early_kp->addr, ret);
+   }
 }
 
 static int __init init_kprobes(void)
@@ -2724,3 +2737,74 @@ void init_kprobes_on_ftrace(void)
convert_early_kprobes_on_ftrace();
 }
 #endif
+
+#ifdef CONFIG_EARLY_KPROBES
+static int early_kprobe_pre_handler(struct kprobe *p, struct pt_regs *regs)
+{
+   const char *sym = NULL;
+   char *modname, namebuf[KSYM_NAME_LEN];
+   unsigned long offset = 0;
+
+   sym = kallsyms_lookup((unsigned long)p->addr, NULL,
+   , , namebuf);
+   if (sym)
+   pr_info("Hit early kprobe at %s+0x%lx%s%s\n",
+   sym, offset,
+   (modname ? " " : ""),
+   (modname ? modname : ""));
+   else
+   pr_info("Hit early kprobe at %p\n", p->addr);
+   return 0;
+}
+
+DEFINE_EKPROBE_ALLOC_OPS(struct kprobe, early_kprobe_setup, static);
+static int __init early_kprobe_setup(char *p)
+{
+   unsigned long long addr;
+   struct kprobe *kp;
+   int len = strlen(p);
+   int err;
+
+   if (len <= 0) {
+   pr_err("early kprobe: wrong param: %s\n", p);
+   return 0;
+   }
+
+   if ((p[0] == '0') && (p[1] == 'x')) {
+   err = kstrtoull(p, 16, );
+   if (err) {
+   pr_err("early kprobe: wrong address: %p\n", p);
+   return 0;
+   }
+   } else {
+   addr = kallsyms_lookup_name(p);
+   if (!addr) {
+   pr_err("early kprobe: wrong symbol: %s\n", p);
+   return 0;
+   }
+   }
+
+   if ((addr < (unsigned long)_text) ||
+   (addr >= (unsigned long)_etext))
+   pr_err("early kprobe: address of %p out of range\n", p);
+
+   kp = ek_alloc_early_kprobe_setup();
+   if (kp == NULL) {
+   pr_err("early kprobe: no enough early kprobe slot\n");
+   return 0;
+   }
+   kp->addr = (void *)(unsigned long)(addr);
+   kp->pre_handler = early_kprobe_pre_handler;
+
+   list_add(>list, _early_kprobes_list);
+
+   return 0;
+}
+#else
+static int __init early_kprobe_setup(char *p)
+{
+   return 0;
+}
+#endif
+
+early_param("ekprobe", early_kprobe_setup);
-- 
1.8.4

--
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Please read the FAQ at  http://www.tux.org/lkml/


[RFC PATCH v4 05/34] early kprobes: introduce kprobe_is_early for futher early kprobe use.

2015-03-02 Thread Wang Nan
Following early kprobe patches will enable kprobe registering very
early, even before kprobe system initialized. kprobe_is_early() can be
used to check whether we are working on early kprobes.

Signed-off-by: Wang Nan 
---
 include/linux/kprobes.h | 2 ++
 kernel/kprobes.c| 6 ++
 2 files changed, 8 insertions(+)

diff --git a/include/linux/kprobes.h b/include/linux/kprobes.h
index 1ab5475..a3de759 100644
--- a/include/linux/kprobes.h
+++ b/include/linux/kprobes.h
@@ -50,6 +50,8 @@
 #define KPROBE_REENTER 0x0004
 #define KPROBE_HIT_SSDONE  0x0008
 
+extern bool kprobes_is_early(void);
+
 #else /* CONFIG_KPROBES */
 typedef int kprobe_opcode_t;
 struct arch_specific_insn {
diff --git a/kernel/kprobes.c b/kernel/kprobes.c
index 427d761..2e728a4 100644
--- a/kernel/kprobes.c
+++ b/kernel/kprobes.c
@@ -69,6 +69,12 @@
 
 static int kprobes_initialized;
 static int kprobes_blacklist_initialized;
+
+bool kprobes_is_early(void)
+{
+   return !(kprobes_initialized && kprobes_blacklist_initialized);
+}
+
 static struct hlist_head kprobe_table[KPROBE_TABLE_SIZE];
 static struct hlist_head kretprobe_inst_table[KPROBE_TABLE_SIZE];
 
-- 
1.8.4

--
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Please read the FAQ at  http://www.tux.org/lkml/


[RFC PATCH v4 19/34] ftrace: don't update record flags if code modification fail.

2015-03-02 Thread Wang Nan
X86 and common ftrace_replace_code() behave differently.

In x86, rec->flags get updated only when (almost) all works are done. In
common code, rec->flags is updated before code modification, and never
get restored when code modification fails.

This patch ensures rec->flags kept its original value if
ftrace_replace_code() fail. A later patch will correct that function
for x86.

Signed-off-by: Wang Nan 
---
 kernel/trace/ftrace.c | 17 -
 1 file changed, 12 insertions(+), 5 deletions(-)

diff --git a/kernel/trace/ftrace.c b/kernel/trace/ftrace.c
index 45e5cb1..6c6cbb1 100644
--- a/kernel/trace/ftrace.c
+++ b/kernel/trace/ftrace.c
@@ -2254,23 +2254,30 @@ __ftrace_replace_code(struct dyn_ftrace *rec, int 
enable)
/* This needs to be done before we call ftrace_update_record */
ftrace_old_addr = ftrace_get_addr_curr(rec);
 
-   ret = ftrace_update_record(rec, enable);
+   ret = ftrace_test_record(rec, enable);
 
switch (ret) {
case FTRACE_UPDATE_IGNORE:
return 0;
 
case FTRACE_UPDATE_MAKE_CALL:
-   return ftrace_make_call(rec, ftrace_addr);
+   ret = ftrace_make_call(rec, ftrace_addr);
+   break;
 
case FTRACE_UPDATE_MAKE_NOP:
-   return ftrace_make_nop(NULL, rec, ftrace_old_addr);
+   ret = ftrace_make_nop(NULL, rec, ftrace_old_addr);
+   break;
 
case FTRACE_UPDATE_MODIFY_CALL:
-   return ftrace_modify_call(rec, ftrace_old_addr, ftrace_addr);
+   ret = ftrace_modify_call(rec, ftrace_old_addr, ftrace_addr);
+   break;
}
 
-   return -1; /* unknow ftrace bug */
+   if (ret)
+   return -1; /* unknow ftrace bug */
+
+   ftrace_update_record(rec, enable);
+   return 0;
 }
 
 void __weak ftrace_replace_code(int enable)
-- 
1.8.4

--
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Please read the FAQ at  http://www.tux.org/lkml/


[RFC PATCH v4 26/34] early kprobes on ftrace: x86: arch code for retrieving kprobed instruction.

2015-03-02 Thread Wang Nan
arch_kprobe_on_ftrace_get_old_insn() is for retriving kprobed
instrution, which is for ftrace used. When ftrace trying to make call,
it compares original instruction against exoected instruction (usually
nop), and deny to work if they are different. This newly introduced
function returns the bytes pattern of kprobe probed instruction. It
doesn't re-read the bytes and returns it to ftrace. Instead, it
regenerates the probed instruction for comparing.

Signed-off-by: Wang Nan 
---
 arch/x86/kernel/kprobes/core.c | 22 ++
 arch/x86/kernel/kprobes/opt.c  | 19 ++-
 include/linux/kprobes.h|  6 ++
 3 files changed, 42 insertions(+), 5 deletions(-)

diff --git a/arch/x86/kernel/kprobes/core.c b/arch/x86/kernel/kprobes/core.c
index ead5b51..87beb64 100644
--- a/arch/x86/kernel/kprobes/core.c
+++ b/arch/x86/kernel/kprobes/core.c
@@ -1159,4 +1159,26 @@ void arch_fix_ftrace_early_kprobe(struct kprobe *kp,
}
 }
 
+const unsigned char *arch_kprobe_on_ftrace_get_old_insn(struct kprobe *kp,
+   const unsigned char *ftrace_nop,
+   unsigned char *dest, size_t insn_size)
+{
+   u8 brkp[] = {BREAKPOINT_INSTRUCTION};
+   struct optimized_kprobe *op;
+
+   if (kp->flags & KPROBE_FLAG_OPTIMIZED) {
+#ifndef CONFIG_OPTPROBES
+   BUG_ON(1);
+#else
+   op = container_of(kp, struct optimized_kprobe, kp);
+   arch_optimize_kprobes_genbranch(op, dest, insn_size);
+   return dest;
+#endif
+   }
+
+   memcpy(dest, brkp, INT3_SIZE);
+   memcpy(dest + INT3_SIZE, ftrace_nop + INT3_SIZE,
+   insn_size - INT3_SIZE);
+   return dest;
+}
 #endif
diff --git a/arch/x86/kernel/kprobes/opt.c b/arch/x86/kernel/kprobes/opt.c
index ef3c0be..9b54148 100644
--- a/arch/x86/kernel/kprobes/opt.c
+++ b/arch/x86/kernel/kprobes/opt.c
@@ -392,6 +392,19 @@ static int optimize_kprobe_stop_machine(void *data)
return 0;
 }
 
+const unsigned char *arch_optimize_kprobes_genbranch(struct optimized_kprobe 
*op,
+   unsigned char *insn_buf, size_t buf_length)
+{
+   s32 rel = (s32)((long)op->optinsn.insn -
+   ((long)op->kp.addr + RELATIVEJUMP_SIZE));
+
+   BUG_ON(buf_length < RELATIVEJUMP_SIZE);
+
+   insn_buf[0] = RELATIVEJUMP_OPCODE;
+   *(s32 *)(_buf[1]) = rel;
+   return insn_buf;
+}
+
 /*
  * Replace breakpoints (int3) with relative jumps.
  * Caller must call with locking kprobe_mutex and text_mutex.
@@ -402,8 +415,7 @@ void arch_optimize_kprobes(struct list_head *oplist)
u8 insn_buf[RELATIVEJUMP_SIZE];
 
list_for_each_entry_safe(op, tmp, oplist, list) {
-   s32 rel = (s32)((long)op->optinsn.insn -
-   ((long)op->kp.addr + RELATIVEJUMP_SIZE));
+   arch_optimize_kprobes_genbranch(op, insn_buf, 
RELATIVEJUMP_SIZE);
 
WARN_ON(kprobe_disabled(>kp));
 
@@ -411,9 +423,6 @@ void arch_optimize_kprobes(struct list_head *oplist)
memcpy(op->optinsn.copied_insn, op->kp.addr + INT3_SIZE,
   RELATIVE_ADDR_SIZE);
 
-   insn_buf[0] = RELATIVEJUMP_OPCODE;
-   *(s32 *)(_buf[1]) = rel;
-
if (unlikely(kprobes_is_early())) {
struct optimize_kprobe_early_param p = {
.op = op,
diff --git a/include/linux/kprobes.h b/include/linux/kprobes.h
index ab1a330..5a5290f 100644
--- a/include/linux/kprobes.h
+++ b/include/linux/kprobes.h
@@ -276,10 +276,16 @@ extern bool arch_within_kprobe_blacklist(unsigned long 
addr);
  * its pointer in function decl list.
  */
 struct optimized_kprobe;
+#ifdef CONFIG_OPTPROBES
+extern const unsigned char *arch_optimize_kprobes_genbranch(struct 
optimized_kprobe *op,
+   unsigned char *insn_buf, size_t buf_length);
+#endif
 
 #if defined(CONFIG_EARLY_KPROBES) && defined(CONFIG_KPROBES_ON_FTRACE)
 extern void arch_fix_ftrace_early_kprobe(struct kprobe *kp,
struct optimized_kprobe *op, int optimized);
+extern const unsigned char *arch_kprobe_on_ftrace_get_old_insn(struct kprobe 
*kp,
+   const unsigned char *ftrace_nop, unsigned char *dest, size_t 
insn_size);
 
 extern void init_kprobes_on_ftrace(void);
 extern bool kprobe_fix_ftrace_make_nop(struct dyn_ftrace *rec);
-- 
1.8.4

--
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Please read the FAQ at  http://www.tux.org/lkml/


Re: gadgetfs broken since 7f7f25e8

2015-03-02 Thread Alexander Holler

Am 02.03.2015 um 14:02 schrieb Alexander Holler:

Am 02.03.2015 um 12:39 schrieb Alexander Holler:

Am 02.03.2015 um 11:20 schrieb Al Viro:

On Mon, Mar 02, 2015 at 10:13:27AM +0100, Richard Weinberger wrote:

On Mon, Mar 2, 2015 at 9:28 AM, Alexander Holler
 wrote:

Hello.

Commit 7f7f25e82d54870df24d415a7007fbd327da027b (introduced with
3.16) broke
dynamic changing of file_operations->[read|write].

At least gadgetfs is a victim.


Just for your amusement and as an example:

This bug lead to me to examine and search bugs in the userland piece 
I've tried to use and ended up in around


===
aholler@laptopahbt ~/Source/USBProxy.git/src $ PAGER= git diff 
7d2506648e3404bf7070bae6ab8da4a702ed093c --stat
 doc/gadgetfs_kernel_above_3.15.patch |  50 
+++

 src/Plugins/Hosts/GadgetFS_helpers.c |   4 ++--
 src/Plugins/Hosts/HostProxy_GadgetFS.cpp |  12 
 src/debian/header-check.c|   1 -
 src/lib/CMakeLists.txt   |   2 --
 src/lib/ConfigParser.cpp |   9 +++--
 src/lib/ConfigParser.h   |   2 +-
 src/lib/FDInfo.c |   2 +-
 src/lib/HaltSignal.c |  54 
---

 src/lib/HaltSignal.h |  19 --
 src/lib/Injector.cpp |  23 +-
 src/lib/Injector.h   |  11 +++
 src/lib/Manager.cpp  | 122 
+++---

 src/lib/Manager.h|  15 +++---
 src/lib/PluginManager.cpp|  47 
+---

 src/lib/Proxy.h  |  12 
 src/lib/RelayReader.cpp  |  39 
-

 src/lib/RelayReader.h|   9 ++---
 src/lib/RelayWriter.cpp  |  69 


 src/lib/RelayWriter.h|   8 +---
 src/tools/usb-mitm.cpp   |   2 --
 21 files changed, 223 insertions(+), 289 deletions(-)
 ===

without counting at least a dozen patches I did on that userland piece 
before those which are counted in the above stat. All in order to find 
the bug.


So, you can see, I've already spend some hours before I've dived into 
the kernel to search for the bug. Of course, the problem in the kernel 
is innocent for all the problems I've found in userland which lead me to 
the assumption that the -EINVAL returned from a read() after a poll() is 
because of some problem in userspace (like memory or stack corruption).


Just in case someone thinks I'm lazy because I don't want to rewrite 
gadgetfs and deal with kernel maintainers.


Regards,

Alexander Holler
--
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Please read the FAQ at  http://www.tux.org/lkml/


[PATCH 2/2] cpuidle / sleep: Use broadcast timer for states that stop local timer

2015-03-02 Thread Rafael J. Wysocki
From: Rafael J. Wysocki 

Commit 381063133246 (PM / sleep: Re-implement suspend-to-idle handling)
overlooked the fact that entering some sufficiently deep idle states
by CPUs may cause their local timers to stop and in those cases it
is necessary to switch over to a broadcase timer prior to entering
the idle state.  If the cpuidle driver in use does not provide
the new ->enter_freeze callback for any of the idle states, that
problem affects suspend-to-idle too, but it is not taken into account
after the changes made by commit 381063133246.

Fix that by changing the definition of cpuidle_enter_freeze() and
re-arranging of the code in cpuidle_idle_call(), so the former does
not call cpuidle_enter() any more and the fallback case is handled
by cpuidle_idle_call() directly.

Fixes: 381063133246 (PM / sleep: Re-implement suspend-to-idle handling)
Reported-by: Lorenzo Pieralisi 
Signed-off-by: Rafael J. Wysocki 
---
 drivers/cpuidle/cpuidle.c |   59 --
 include/linux/cpuidle.h   |   17 +++--
 kernel/sched/idle.c   |   30 ---
 3 files changed, 57 insertions(+), 49 deletions(-)

Index: linux-pm/drivers/cpuidle/cpuidle.c
===
--- linux-pm.orig/drivers/cpuidle/cpuidle.c
+++ linux-pm/drivers/cpuidle/cpuidle.c
@@ -44,8 +44,8 @@ void disable_cpuidle(void)
off = 1;
 }
 
-static bool cpuidle_not_available(struct cpuidle_driver *drv,
- struct cpuidle_device *dev)
+bool cpuidle_not_available(struct cpuidle_driver *drv,
+  struct cpuidle_device *dev)
 {
return off || !initialized || !drv || !dev || !dev->enabled;
 }
@@ -72,14 +72,8 @@ int cpuidle_play_dead(void)
return -ENODEV;
 }
 
-/**
- * cpuidle_find_deepest_state - Find deepest state meeting specific conditions.
- * @drv: cpuidle driver for the given CPU.
- * @dev: cpuidle device for the given CPU.
- * @freeze: Whether or not the state should be suitable for suspend-to-idle.
- */
-static int cpuidle_find_deepest_state(struct cpuidle_driver *drv,
- struct cpuidle_device *dev, bool freeze)
+static int find_deepest_state(struct cpuidle_driver *drv,
+ struct cpuidle_device *dev, bool freeze)
 {
unsigned int latency_req = 0;
int i, ret = freeze ? -1 : CPUIDLE_DRIVER_STATE_START - 1;
@@ -98,6 +92,17 @@ static int cpuidle_find_deepest_state(st
return ret;
 }
 
+/**
+ * cpuidle_find_deepest_state - Find the deepest available idle state.
+ * @drv: cpuidle driver for the given CPU.
+ * @dev: cpuidle device for the given CPU.
+ */
+int cpuidle_find_deepest_state(struct cpuidle_driver *drv,
+  struct cpuidle_device *dev)
+{
+   return find_deepest_state(drv, dev, false);
+}
+
 static void enter_freeze_proper(struct cpuidle_driver *drv,
struct cpuidle_device *dev, int index)
 {
@@ -119,46 +124,27 @@ static void enter_freeze_proper(struct c
 
 /**
  * cpuidle_enter_freeze - Enter an idle state suitable for suspend-to-idle.
+ * @drv: cpuidle driver for the given CPU.
+ * @dev: cpuidle device for the given CPU.
  *
  * If there are states with the ->enter_freeze callback, find the deepest of
  * them and enter it with frozen tick.  Otherwise, find the deepest state
  * available and enter it normally.
- *
- * Returns with enabled interrupts.
  */
-void cpuidle_enter_freeze(void)
+int cpuidle_enter_freeze(struct cpuidle_driver *drv, struct cpuidle_device 
*dev)
 {
-   struct cpuidle_device *dev = __this_cpu_read(cpuidle_devices);
-   struct cpuidle_driver *drv = cpuidle_get_cpu_driver(dev);
int index;
 
-   if (cpuidle_not_available(drv, dev))
-   goto fallback;
-
/*
 * Find the deepest state with ->enter_freeze present, which guarantees
 * that interrupts won't be enabled when it exits and allows the tick to
 * be frozen safely.
 */
-   index = cpuidle_find_deepest_state(drv, dev, true);
-   if (index >= 0) {
+   index = find_deepest_state(drv, dev, true);
+   if (index >= 0)
enter_freeze_proper(drv, dev, index);
-   local_irq_enable();
-   return;
-   }
 
-   /*
-* It is not safe to freeze the tick, find the deepest state available
-* at all and try to enter it normally.
-*/
-   index = cpuidle_find_deepest_state(drv, dev, false);
-   if (index >= 0) {
-   cpuidle_enter(drv, dev, index);
-   return;
-   }
-
- fallback:
-   arch_cpu_idle();
+   return index;
 }
 
 /**
@@ -217,9 +203,6 @@ int cpuidle_enter_state(struct cpuidle_d
  */
 int cpuidle_select(struct cpuidle_driver *drv, struct cpuidle_device *dev)
 {
-   if (cpuidle_not_available(drv, dev))
-   return -ENODEV;
-
return 

[RFC PATCH v4 06/34] early kprobes: enable kprobe smoke test for early kprobes.

2015-03-02 Thread Wang Nan
Let kprobe smoke test code behavior differently depending on
kprobes_is_early(). Following patches will test kprobes twice, one for
early kprobes, another for normal kprobes.

Since this test will be executed more than once, before real test
we should first init the ?probe structures to avoid garbage data in
previous round trigger problem. For example, register_kprobe() denies
to process struct kprobe with both addr and symbol_name set, but itself
fills them both.

Signed-off-by: Wang Nan 
---
 kernel/test_kprobes.c | 58 +++
 1 file changed, 45 insertions(+), 13 deletions(-)

diff --git a/kernel/test_kprobes.c b/kernel/test_kprobes.c
index 0dbab6d..cce4536 100644
--- a/kernel/test_kprobes.c
+++ b/kernel/test_kprobes.c
@@ -22,6 +22,20 @@
 
 #define div_factor 3
 
+#define init_probe(src)memcpy(, &_##src, sizeof(src))
+#define init_probes_pair(a)\
+   do {\
+   init_probe(a);  \
+   init_probe(a##2);   \
+   } while(0)
+
+#define init_all_probes()  \
+   do {\
+   init_probes_pair(kp);   \
+   init_probes_pair(jp);   \
+   init_probes_pair(rp);   \
+   } while(0)
+
 static u32 rand1, preh_val, posth_val, jph_val;
 static int errors, handler_errors, num_tests;
 static u32 (*target)(u32 value);
@@ -48,11 +62,12 @@ static void kp_post_handler(struct kprobe *p, struct 
pt_regs *regs,
posth_val = preh_val + div_factor;
 }
 
-static struct kprobe kp = {
+static struct kprobe _kp = {
.symbol_name = "kprobe_target",
.pre_handler = kp_pre_handler,
.post_handler = kp_post_handler
 };
+static struct kprobe kp;
 
 static int test_kprobe(void)
 {
@@ -101,11 +116,12 @@ static void kp_post_handler2(struct kprobe *p, struct 
pt_regs *regs,
posth_val = preh_val + div_factor;
 }
 
-static struct kprobe kp2 = {
+static struct kprobe _kp2 = {
.symbol_name = "kprobe_target2",
.pre_handler = kp_pre_handler2,
.post_handler = kp_post_handler2
 };
+static struct kprobe kp2;
 
 static int test_kprobes(void)
 {
@@ -166,10 +182,11 @@ static u32 j_kprobe_target(u32 value)
return 0;
 }
 
-static struct jprobe jp = {
+static struct jprobe _jp = {
.entry  = j_kprobe_target,
.kp.symbol_name = "kprobe_target"
 };
+static struct jprobe jp;
 
 static int test_jprobe(void)
 {
@@ -191,10 +208,11 @@ static int test_jprobe(void)
return 0;
 }
 
-static struct jprobe jp2 = {
+static struct jprobe _jp2 = {
.entry  = j_kprobe_target,
.kp.symbol_name = "kprobe_target2"
 };
+static struct jprobe jp2;
 
 static int test_jprobes(void)
 {
@@ -253,11 +271,12 @@ static int return_handler(struct kretprobe_instance *ri, 
struct pt_regs *regs)
return 0;
 }
 
-static struct kretprobe rp = {
+static struct kretprobe _rp = {
.handler= return_handler,
.entry_handler  = entry_handler,
.kp.symbol_name = "kprobe_target"
 };
+static struct kretprobe rp;
 
 static int test_kretprobe(void)
 {
@@ -296,11 +315,12 @@ static int return_handler2(struct kretprobe_instance *ri, 
struct pt_regs *regs)
return 0;
 }
 
-static struct kretprobe rp2 = {
+static struct kretprobe _rp2 = {
.handler= return_handler2,
.entry_handler  = entry_handler,
.kp.symbol_name = "kprobe_target2"
 };
+static struct kretprobe rp2;
 
 static int test_kretprobes(void)
 {
@@ -337,15 +357,24 @@ static int test_kretprobes(void)
 int init_test_probes(void)
 {
int ret;
+   char *early_str;
+
+   init_all_probes();
 
target = kprobe_target;
target2 = kprobe_target2;
 
-   do {
-   rand1 = prandom_u32();
-   } while (rand1 <= div_factor);
+   if (!kprobes_is_early()) {
+   do {
+   rand1 = prandom_u32();
+   } while (rand1 <= div_factor);
+   early_str = "";
+   } else {
+   rand1 = 123456789;
+   early_str = "(early) ";
+   }
 
-   pr_info("started\n");
+   pr_info("%sstarted\n", early_str);
num_tests++;
ret = test_kprobe();
if (ret < 0)
@@ -366,6 +395,8 @@ int init_test_probes(void)
if (ret < 0)
errors++;
 
+   if (kprobes_is_early())
+   goto out;
 #ifdef CONFIG_KRETPROBES
num_tests++;
ret = test_kretprobe();
@@ -378,12 +409,13 @@ int init_test_probes(void)
errors++;
 #endif /* CONFIG_KRETPROBES */
 
+out:
if (errors)
-   pr_err("BUG: %d out of %d tests failed\n", errors, num_tests);
+   pr_err("%sBUG: %d out of %d tests failed\n", early_str, errors, 
num_tests);
else if (handler_errors)
-   pr_err("BUG: %d error(s) running handlers\n", handler_errors);
+   

[RFC PATCH v4 34/34] early kprobes: enable optimization of kprobes on ftrace before ftrace is ready.

2015-03-02 Thread Wang Nan
Uses ftrace_process_loc_early() introduced by previous patch to
convert ftrace entries to nops before ftrace_init(). For x86, original
kprobe entries are 'call' and are optimizable only after this conversion.

Signed-off-by: Wang Nan 
---
 kernel/kprobes.c | 3 +++
 1 file changed, 3 insertions(+)

diff --git a/kernel/kprobes.c b/kernel/kprobes.c
index 56fb8c8..1ec8e6e 100644
--- a/kernel/kprobes.c
+++ b/kernel/kprobes.c
@@ -1602,6 +1602,9 @@ int register_kprobe(struct kprobe *p)
goto out;
}
 
+   if (p->flags & KPROBE_FLAG_FTRACE_EARLY)
+   ftrace_process_loc_early((unsigned long)p->addr);
+
mutex_lock(_mutex);/* Avoiding text modification */
ret = prepare_kprobe(p);
mutex_unlock(_mutex);
-- 
1.8.4

--
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Please read the FAQ at  http://www.tux.org/lkml/


[RFC PATCH v4 11/34] early kprobes: introduces macros for allocing early kprobe resources.

2015-03-02 Thread Wang Nan
Introduces macros to genearte common early kprobe related resource
allocator.

All early kprobe related resources are statically allocated during
linking for each early kprobe slot. For each type of resource, a bitmap
is used to track allocation. __DEFINE_EKPROBE_ALLOC_OPS defines alloc
and free handler for them. The range of the resource and the bitmap
should be provided for allocaing and freeing. DEFINE_EKPROBE_ALLOC_OPS
defines bitmap and the array used by it.

Signed-off-by: Wang Nan 
---
 include/linux/kprobes.h | 77 +
 1 file changed, 77 insertions(+)

diff --git a/include/linux/kprobes.h b/include/linux/kprobes.h
index b7cb992..3d721eb 100644
--- a/include/linux/kprobes.h
+++ b/include/linux/kprobes.h
@@ -269,6 +269,83 @@ extern void show_registers(struct pt_regs *regs);
 extern void kprobes_inc_nmissed_count(struct kprobe *p);
 extern bool arch_within_kprobe_blacklist(unsigned long addr);
 
+#ifdef CONFIG_EARLY_KPROBES
+
+#define NR_EARLY_KPROBES_SLOTS CONFIG_NR_EARLY_KPROBES_SLOTS
+#define EARLY_KPROBES_BITMAP_SZround_up(NR_EARLY_KPROBES_SLOTS, 
BITS_PER_LONG)
+
+#define __ek_in_range(v, s, e) (((v) >= (s)) && ((v) < (e)))
+#define __ek_buf_sz(s, e)  ((void *)(e) - (void *)(s))
+#define __ek_elem_sz_b(s, e)   (__ek_buf_sz(s, e) / NR_EARLY_KPROBES_SLOTS)
+#define __ek_elem_sz(s, e) (__ek_elem_sz_b(s, e) / sizeof(s[0]))
+#define __ek_elem_idx(v, s, e) (__ek_buf_sz(s, v) / __ek_elem_sz_b(s, e))
+#define __ek_get_elem(i, s, e) (&((s)[__ek_elem_sz(s, e) * (i)]))
+#define __DEFINE_EKPROBE_ALLOC_OPS(__t, __name)
\
+static inline __t *__ek_alloc_##__name(__t *__s, __t *__e, unsigned long *__b)\
+{  \
+   int __i = find_next_zero_bit(__b, NR_EARLY_KPROBES_SLOTS, 0);   \
+   if (__i >= NR_EARLY_KPROBES_SLOTS)  \
+   return NULL;\
+   set_bit(__i, __b);  \
+   return __ek_get_elem(__i, __s, __e);\
+}  \
+static inline int __ek_free_##__name(__t *__v, __t *__s, __t *__e, unsigned 
long *__b) \
+{  \
+   if (!__ek_in_range(__v, __s, __e))  \
+   return 0;   \
+   clear_bit(__ek_elem_idx(__v, __s, __e), __b);   \
+   return 1;   \
+}
+
+#define __DEFINE_EKPROBE_AREA(__t, __name, __static)   \
+__static __t __ek_##__name##_slots[NR_EARLY_KPROBES_SLOTS];\
+__static unsigned long __ek_##__name##_bitmap[EARLY_KPROBES_BITMAP_SZ];
+
+#define DEFINE_EKPROBE_ALLOC_OPS(__t, __name, __static)
\
+__DEFINE_EKPROBE_AREA(__t, __name, __static)   \
+__DEFINE_EKPROBE_ALLOC_OPS(__t, __name)
\
+static inline __t *ek_alloc_##__name(void) \
+{  \
+   return __ek_alloc_##__name(&((__ek_##__name##_slots)[0]),   \
+   &((__ek_##__name##_slots)[NR_EARLY_KPROBES_SLOTS]),\
+   (__ek_##__name##_bitmap));  \
+}  \
+static inline int ek_free_##__name(__t *__s)   \
+{  \
+   return __ek_free_##__name(__s, &((__ek_##__name##_slots)[0]),   \
+   &((__ek_##__name##_slots)[NR_EARLY_KPROBES_SLOTS]),\
+   (__ek_##__name##_bitmap));  \
+}
+
+
+#else
+#define __DEFINE_EKPROBE_ALLOC_OPS(__t, __name)
\
+static inline __t *__ek_alloc_##__name(__t *__s, __t *__e, unsigned long *__b)\
+{  \
+   return NULL;\
+}  \
+static inline int __ek_free_##__name(__t *__v, __t *__s, __t *__e, unsigned 
long *__b)\
+{  \
+   return 0;   \
+}
+
+#define __DEFINE_EKPROBE_AREA(__t, __name, __static)   \
+__static __t __ek_##__name##_slots[0]; \
+__static unsigned long __ek_##__name##_bitmap[0];
+
+#define DEFINE_EKPROBE_ALLOC_OPS(__t, __name, __static)
\
+__DEFINE_EKPROBE_ALLOC_OPS(__t, __name)
\

[RFC PATCH v4 08/34] early kprobes: ARM: add definition for vmlinux.lds use.

2015-03-02 Thread Wang Nan
This patch defines MAX_OPTINSN_SIZE, MAX_INSN_SIZE and
KPROBE_OPCODE_SIZE for ARM for vmlinux.lds.S use. These macros are
originally defined in kprobes.h, which are unable to be used in
vmlinux.lds.

Signed-off-by: Wang Nan 
---
 arch/arm/kernel/vmlinux.lds.S | 10 ++
 1 file changed, 10 insertions(+)

diff --git a/arch/arm/kernel/vmlinux.lds.S b/arch/arm/kernel/vmlinux.lds.S
index b31aa73..09fcc20 100644
--- a/arch/arm/kernel/vmlinux.lds.S
+++ b/arch/arm/kernel/vmlinux.lds.S
@@ -45,6 +45,16 @@
 #define ARM_EXIT_DISCARD(x)x
 #endif
 
+#ifdef CONFIG_EARLY_KPROBES
+# ifdef CONFIG_THUMB2_KERNEL
+#  define MAX_OPTINSN_SIZE 0
+# else
+#  define MAX_OPTINSN_SIZE (optprobe_template_end - optprobe_template_entry)
+# endif
+# define MAX_INSN_SIZE 2
+# define KPROBE_OPCODE_SIZE 4
+#endif
+
 OUTPUT_ARCH(arm)
 ENTRY(stext)
 
-- 
1.8.4

--
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Please read the FAQ at  http://www.tux.org/lkml/


[RFC PATCH v4 27/34] early kprobes on ftrace: kprobe_on_ftrace_get_old_insn()

2015-03-02 Thread Wang Nan
Newly introduced function kprobe_on_ftrace_get_old_insn() will be
called by ftrace when ftrace generating call instruction. It is for
retriving probed instructions which original nops are replaced by
kprobe. FTRACE_FL_EARLY_KPROBES bit in rec->flags is cleared, so after
calling kprobe_on_ftrace_get_old_insn() an ftrace record will not be
treated as early kprobed.

Signed-off-by: Wang Nan 
---
 include/linux/kprobes.h |  9 +
 kernel/kprobes.c| 34 ++
 2 files changed, 43 insertions(+)

diff --git a/include/linux/kprobes.h b/include/linux/kprobes.h
index 5a5290f..2d78bbb 100644
--- a/include/linux/kprobes.h
+++ b/include/linux/kprobes.h
@@ -289,6 +289,8 @@ extern const unsigned char 
*arch_kprobe_on_ftrace_get_old_insn(struct kprobe *kp
 
 extern void init_kprobes_on_ftrace(void);
 extern bool kprobe_fix_ftrace_make_nop(struct dyn_ftrace *rec);
+extern const unsigned char *kprobe_on_ftrace_get_old_insn(struct dyn_ftrace 
*rec,
+   const unsigned char *ftrace_nop, unsigned char *dest, size_t 
insn_size);
 #else
 static inline void init_kprobes_on_ftrace(void)
 {
@@ -299,6 +301,13 @@ static inline bool kprobe_fix_ftrace_make_nop(struct 
dyn_ftrace *_unused)
 
return false;
 }
+
+static inline const unsigned char *
+kprobe_on_ftrace_get_old_insn(struct dyn_ftrace *_unused,
+   const unsigned char *ftrace_nop, unsigned char *_unused2, 
size_t _unused3)
+{
+   return ftrace_nop;
+}
 #endif // CONFIG_EARLY_KPROBES && CONFIG_KPROBES_ON_FTRACE
 
 #ifdef CONFIG_EARLY_KPROBES
diff --git a/kernel/kprobes.c b/kernel/kprobes.c
index 20b6ab8..c504c1c 100644
--- a/kernel/kprobes.c
+++ b/kernel/kprobes.c
@@ -2623,6 +2623,40 @@ bool kprobe_fix_ftrace_make_nop(struct dyn_ftrace *rec)
return true;
 }
 
+/* NOTE: caller must ensure holding kprobe_mutex */
+const unsigned char *
+kprobe_on_ftrace_get_old_insn(struct dyn_ftrace *rec,
+   const unsigned char *ftrace_nop,
+   unsigned char *dest, size_t insn_size)
+{
+   const unsigned char *ret;
+   struct kprobe *kp;
+   void *addr;
+
+   if (!(rec->flags & FTRACE_FL_EARLY_KPROBES))
+   return ftrace_nop;
+
+   addr = (void *)rec->ip;
+
+   /*
+* Note that get_kprobe always get the kprobe on table, for it
+* KPROBE_FLAG_OPTIMIZED is reliable.
+*/
+   kp = get_kprobe(addr);
+
+   if (!kp || !(kp->flags & KPROBE_FLAG_FTRACE_EARLY)) {
+   mutex_unlock(_mutex);
+   return ftrace_nop;
+   }
+
+   ret = arch_kprobe_on_ftrace_get_old_insn(kp, ftrace_nop,
+   dest, insn_size);
+
+   /* Only give one chance for kprobe to retrive old insn. */
+   rec->flags &= ~FTRACE_FL_EARLY_KPROBES;
+   return ret;
+}
+
 void init_kprobes_on_ftrace(void)
 {
kprobes_on_ftrace_initialized = true;
-- 
1.8.4

--
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Please read the FAQ at  http://www.tux.org/lkml/


[RFC v2 4/4] locks: Use blocked_lock_lock only to protect blocked_hash

2015-03-02 Thread Daniel Wagner
blocked_lock_lock and file_lock_lglock is used to protect file_lock's
fl_link, fl_block, fl_next, blocked_hash and the percpu
file_lock_list.

The plan is to reorganize the usage of the locks and what they protect
so that the usage of the global blocked_lock_lock is reduced.

Whenever we insert a new lock we are going to grab besides the
flc_lock also the corresponding file_lock_lglock. The global
blocked_lock_lock is only used when blocked_hash is involved.

file_lock_lglock protects now file_lock_list and fl_link, fl_block and
fl_next allone. That means we need to define which file_lock_lglock is
used for all waiters. Luckely, fl_link_cpu can be reused for fl_block
and fl_next.

Signed-off-by: Daniel Wagner 
Cc: Jeff Layton 
Cc: "J. Bruce Fields" 
Cc: Alexander Viro 
---
 fs/locks.c | 78 ++
 1 file changed, 43 insertions(+), 35 deletions(-)

diff --git a/fs/locks.c b/fs/locks.c
index 02821dd..de15ea8 100644
--- a/fs/locks.c
+++ b/fs/locks.c
@@ -162,6 +162,20 @@ int lease_break_time = 45;
  * keep a list on each CPU, with each list protected by its own spinlock via
  * the file_lock_lglock. Note that alterations to the list also require that
  * the relevant flc_lock is held.
+ *
+ * In addition, it also protects the fl->fl_block list, and the fl->fl_next
+ * pointer for file_lock structures that are acting as lock requests (in
+ * contrast to those that are acting as records of acquired locks).
+ *
+ * file_lock structures acting as lock requests (waiters) use the same
+ * spinlock as the those acting as lock holder (blocker). E.g. the
+ * blocker is initially added to the file_lock_list living on CPU 0,
+ * all waiters on that blocker are serialized via CPU 0 (see
+ * fl_link_cpu usage).
+ *
+ * In particular, adding an entry to the fl_block list requires that you hold
+ * both the flc_lock and the blocked_lock_lock (acquired in that order).
+ * Deleting an entry from the list however only requires the file_lock_gllock.
  */
 DEFINE_STATIC_LGLOCK(file_lock_lglock);
 static DEFINE_PER_CPU(struct hlist_head, file_lock_list);
@@ -183,19 +197,6 @@ static DEFINE_HASHTABLE(blocked_hash, BLOCKED_HASH_BITS);
 /*
  * This lock protects the blocked_hash. Generally, if you're accessing it, you
  * want to be holding this lock.
- *
- * In addition, it also protects the fl->fl_block list, and the fl->fl_next
- * pointer for file_lock structures that are acting as lock requests (in
- * contrast to those that are acting as records of acquired locks).
- *
- * Note that when we acquire this lock in order to change the above fields,
- * we often hold the flc_lock as well. In certain cases, when reading the 
fields
- * protected by this lock, we can skip acquiring it iff we already hold the
- * flc_lock.
- *
- * In particular, adding an entry to the fl_block list requires that you hold
- * both the flc_lock and the blocked_lock_lock (acquired in that order).
- * Deleting an entry from the list however only requires the file_lock_lock.
  */
 static DEFINE_SPINLOCK(blocked_lock_lock);
 
@@ -607,7 +608,7 @@ static void locks_delete_global_blocked(struct file_lock 
*waiter)
 /* Remove waiter from blocker's block list.
  * When blocker ends up pointing to itself then the list is empty.
  *
- * Must be called with blocked_lock_lock held.
+ * Must be called with file_lock_lglock held.
  */
 static void __locks_delete_block(struct file_lock *waiter)
 {
@@ -617,7 +618,7 @@ static void __locks_delete_block(struct file_lock *waiter)
 
 /* Posix block variant of __locks_delete_block.
  *
- * Must be called with blocked_lock_lock held.
+ * Must be called with file_lock_lglock held.
  */
 static void __locks_delete_posix_block(struct file_lock *waiter)
 {
@@ -627,16 +628,18 @@ static void __locks_delete_posix_block(struct file_lock 
*waiter)
 
 static void locks_delete_block(struct file_lock *waiter)
 {
-   spin_lock(_lock_lock);
+   lg_local_lock_cpu(_lock_lglock, waiter->fl_link_cpu);
__locks_delete_block(waiter);
-   spin_unlock(_lock_lock);
+   lg_local_unlock_cpu(_lock_lglock, waiter->fl_link_cpu);
 }
 
 static void locks_delete_posix_block(struct file_lock *waiter)
 {
+   lg_local_lock_cpu(_lock_lglock, waiter->fl_link_cpu);
spin_lock(_lock_lock);
__locks_delete_posix_block(waiter);
spin_unlock(_lock_lock);
+   lg_local_unlock_cpu(_lock_lglock, waiter->fl_link_cpu);
 }
 
 /* Insert waiter into blocker's block list.
@@ -644,22 +647,23 @@ static void locks_delete_posix_block(struct file_lock 
*waiter)
  * the order they blocked. The documentation doesn't require this but
  * it seems like the reasonable thing to do.
  *
- * Must be called with both the flc_lock and blocked_lock_lock held. The
- * fl_block list itself is protected by the blocked_lock_lock, but by ensuring
+ * Must be called with both the flc_lock and file_lock_lglock held. The
+ * fl_block list itself is protected by the file_lock_lglock, but by 

[PATCH 2/2] mm/page_alloc.c: fix a grammar in comment

2015-03-02 Thread Yaowei Bai
Alter 'controls' -> 'control'.

Signed-off-by: Yaowei Bai 
---
 mm/page_alloc.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/mm/page_alloc.c b/mm/page_alloc.c
index 12c96ad..5158fa2 100644
--- a/mm/page_alloc.c
+++ b/mm/page_alloc.c
@@ -5716,7 +5716,7 @@ static void __setup_per_zone_wmarks(void)
 * value here.
 *
 * The (WMARK_HIGH-WMARK_LOW) and (WMARK_LOW-WMARK_MIN)
-* deltas controls asynch page reclaim, and so should
+* deltas control asynch page reclaim, and so should
 * not be capped for highmem.
 */
unsigned long min_pages;
-- 
1.9.1


--
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Please read the FAQ at  http://www.tux.org/lkml/


[RFC PATCH v4 12/34] early kprobes: allows __alloc_insn_slot() from early kprobes slots.

2015-03-02 Thread Wang Nan
Introduces early_slots_start/end and bitmap for struct kprobe_insn_cache
then uses previous introduced macro to generate allocator. This patch
makes get/free_insn_slot() and get/free_optinsn_slot() transparent to
early kprobes.

Signed-off-by: Wang Nan 
---
 include/linux/kprobes.h | 40 
 kernel/kprobes.c| 14 ++
 2 files changed, 54 insertions(+)

diff --git a/include/linux/kprobes.h b/include/linux/kprobes.h
index 3d721eb..bb2b2c6 100644
--- a/include/linux/kprobes.h
+++ b/include/linux/kprobes.h
@@ -317,6 +317,17 @@ static inline int ek_free_##__name(__t *__s)   
\
(__ek_##__name##_bitmap));  \
 }
 
+/*
+ * Start and end of early kprobes area, including code area and
+ * insn_slot area.
+ */
+extern char __early_kprobes_start[];
+extern char __early_kprobes_end[];
+
+extern kprobe_opcode_t __early_kprobes_code_area_start[];
+extern kprobe_opcode_t __early_kprobes_code_area_end[];
+extern kprobe_opcode_t __early_kprobes_insn_slot_start[];
+extern kprobe_opcode_t __early_kprobes_insn_slot_end[];
 
 #else
 #define __DEFINE_EKPROBE_ALLOC_OPS(__t, __name)
\
@@ -346,6 +357,8 @@ static inline int ek_free_##__name(__t *__s)
\
 
 #endif
 
+__DEFINE_EKPROBE_ALLOC_OPS(kprobe_opcode_t, opcode)
+
 struct kprobe_insn_cache {
struct mutex mutex;
void *(*alloc)(void);   /* allocate insn page */
@@ -353,8 +366,35 @@ struct kprobe_insn_cache {
struct list_head pages; /* list of kprobe_insn_page */
size_t insn_size;   /* size of instruction slot */
int nr_garbage;
+#ifdef CONFIG_EARLY_KPROBES
+# define slots_start(c)((c)->early_slots_start)
+# define slots_end(c)  ((c)->early_slots_end)
+# define slots_bitmap(c)   ((c)->early_slots_bitmap)
+   kprobe_opcode_t *early_slots_start;
+   kprobe_opcode_t *early_slots_end;
+   unsigned long early_slots_bitmap[EARLY_KPROBES_BITMAP_SZ];
+#else
+# define slots_start(c)NULL
+# define slots_end(c)  NULL
+# define slots_bitmap(c)   NULL
+#endif
 };
 
+static inline kprobe_opcode_t *
+__get_insn_slot_early(struct kprobe_insn_cache *c)
+{
+   return __ek_alloc_opcode(slots_start(c),
+   slots_end(c), slots_bitmap(c));
+}
+
+static inline int
+__free_insn_slot_early(struct kprobe_insn_cache *c,
+   kprobe_opcode_t *slot)
+{
+   return __ek_free_opcode(slot, slots_start(c),
+   slots_end(c), slots_bitmap(c));
+}
+
 extern kprobe_opcode_t *__get_insn_slot(struct kprobe_insn_cache *c);
 extern void __free_insn_slot(struct kprobe_insn_cache *c,
 kprobe_opcode_t *slot, int dirty);
diff --git a/kernel/kprobes.c b/kernel/kprobes.c
index 614138c..1eb3000 100644
--- a/kernel/kprobes.c
+++ b/kernel/kprobes.c
@@ -144,6 +144,10 @@ struct kprobe_insn_cache kprobe_insn_slots = {
.pages = LIST_HEAD_INIT(kprobe_insn_slots.pages),
.insn_size = MAX_INSN_SIZE,
.nr_garbage = 0,
+#ifdef CONFIG_EARLY_KPROBES
+   .early_slots_start = __early_kprobes_insn_slot_start,
+   .early_slots_end = __early_kprobes_insn_slot_end,
+#endif
 };
 static int collect_garbage_slots(struct kprobe_insn_cache *c);
 
@@ -156,6 +160,9 @@ kprobe_opcode_t *__get_insn_slot(struct kprobe_insn_cache 
*c)
struct kprobe_insn_page *kip;
kprobe_opcode_t *slot = NULL;
 
+   if (kprobes_is_early())
+   return __get_insn_slot_early(c);
+
mutex_lock(>mutex);
  retry:
list_for_each_entry(kip, >pages, list) {
@@ -256,6 +263,9 @@ void __free_insn_slot(struct kprobe_insn_cache *c,
 {
struct kprobe_insn_page *kip;
 
+   if (unlikely(__free_insn_slot_early(c, slot)))
+   return;
+
mutex_lock(>mutex);
list_for_each_entry(kip, >pages, list) {
long idx = ((long)slot - (long)kip->insns) /
@@ -287,6 +297,10 @@ struct kprobe_insn_cache kprobe_optinsn_slots = {
.pages = LIST_HEAD_INIT(kprobe_optinsn_slots.pages),
/* .insn_size is initialized later */
.nr_garbage = 0,
+#ifdef CONFIG_EARLY_KPROBES
+   .early_slots_start = __early_kprobes_code_area_start,
+   .early_slots_end = __early_kprobes_code_area_end,
+#endif
 };
 #endif
 #endif
-- 
1.8.4

--
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Please read the FAQ at  http://www.tux.org/lkml/


[RFC PATCH v4 17/34] early kprobes: run kprobes smoke test for early kprobes.

2015-03-02 Thread Wang Nan
We are able to use early kprobes. Do some small test for early kprobes.
Note that, previous patches makes init_test_probes() behaviors
differently when kprobe_is_early().

Signed-off-by: Wang Nan 
---
 kernel/kprobes.c | 4 
 1 file changed, 4 insertions(+)

diff --git a/kernel/kprobes.c b/kernel/kprobes.c
index 7dbe8b2..4b7b20a 100644
--- a/kernel/kprobes.c
+++ b/kernel/kprobes.c
@@ -2258,6 +2258,10 @@ void init_kprobes_early(void)
err = register_module_notifier(_module_nb);
 
kprobes_initialized = (err == 0);
+#ifdef CONFIG_EARLY_KPROBES
+   if (!err)
+   init_test_probes();
+#endif
 }
 
 static int __init init_kprobes(void)
-- 
1.8.4

--
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Please read the FAQ at  http://www.tux.org/lkml/


[RFC PATCH v4 28/34] ftrace: x86: get old instruction from early kprobes when make call.

2015-03-02 Thread Wang Nan
For previously detected early kprobes on ftrace, retrieve old
instruction using kprobe_on_ftrace_get_old_insn() instead of
ftrace_nop_replace(). Which will enable convertion an early kprobed
ftrace entry directly to 'call' instrustion without turnning off
kprobe.

Signed-off-by: Wang Nan 
---
 arch/x86/kernel/ftrace.c | 23 +--
 1 file changed, 21 insertions(+), 2 deletions(-)

diff --git a/arch/x86/kernel/ftrace.c b/arch/x86/kernel/ftrace.c
index 7bdba65..f200cd4 100644
--- a/arch/x86/kernel/ftrace.c
+++ b/arch/x86/kernel/ftrace.c
@@ -21,6 +21,7 @@
 #include 
 #include 
 #include 
+#include 
 
 #include 
 
@@ -164,7 +165,17 @@ int ftrace_make_call(struct dyn_ftrace *rec, unsigned long 
addr)
unsigned const char *new, *old;
unsigned long ip = rec->ip;
 
-   old = ftrace_nop_replace();
+#if defined(CONFIG_KPROBES_ON_FTRACE) && defined(CONFIG_EARLY_KPROBES)
+   unsigned char kprobes_old_insn[MCOUNT_INSN_SIZE];
+
+   if (unlikely(rec->flags & FTRACE_FL_EARLY_KPROBES))
+   old = kprobe_on_ftrace_get_old_insn(rec,
+   ftrace_nop_replace(),
+   kprobes_old_insn,
+   MCOUNT_INSN_SIZE);
+   else
+#endif
+   old = ftrace_nop_replace();
new = ftrace_call_replace(ip, addr);
 
/* Should only be called when module is loaded */
@@ -335,8 +346,16 @@ static int add_brk_on_call(struct dyn_ftrace *rec, 
unsigned long addr)
 static int add_brk_on_nop(struct dyn_ftrace *rec)
 {
unsigned const char *old;
+#if defined(CONFIG_KPROBES_ON_FTRACE) && defined(CONFIG_EARLY_KPROBES)
+   unsigned char kprobes_old_insn[MCOUNT_INSN_SIZE];
 
-   old = ftrace_nop_replace();
+   if (unlikely(rec->flags & FTRACE_FL_EARLY_KPROBES))
+   old = kprobe_on_ftrace_get_old_insn(rec, ftrace_nop_replace(),
+   kprobes_old_insn,
+   MCOUNT_INSN_SIZE);
+   else
+#endif
+   old = ftrace_nop_replace();
 
return add_break(rec->ip, old);
 }
-- 
1.8.4

--
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Please read the FAQ at  http://www.tux.org/lkml/


[RFC PATCH v4 22/34] ftrace: allow search ftrace addr before ftrace fully inited.

2015-03-02 Thread Wang Nan
This patch enables ftrace_location() to be used before ftrace_init().
The first user should be early kprobes, which can insert kprobes to
kernel code even before setup_arch() finishes. This patch gives it a
chance to determine whether it is probing ftrace entries and allows it
do some special treatment.

ftrace_cmp_ips_insn() is introduced to make early ftrace_location()
behavior consistent with normal ftrace_location(). With existing
ftrace_cmp_ips(), searching an address in middle of an instruction will
fail, which is inconsistent with ftrace_cmp_recs() used by normal
ftrace_location().

With this and previous patch ftrace_location() now is able to be called
in and after setup_arch().

Signed-off-by: Wang Nan 
---
 kernel/trace/ftrace.c | 38 ++
 1 file changed, 38 insertions(+)

diff --git a/kernel/trace/ftrace.c b/kernel/trace/ftrace.c
index fa3cdd3..7fa88d0 100644
--- a/kernel/trace/ftrace.c
+++ b/kernel/trace/ftrace.c
@@ -1539,6 +1539,8 @@ static unsigned long ftrace_location_range(unsigned long 
start, unsigned long en
return 0;
 }
 
+static unsigned long ftrace_search_mcount_ip(unsigned long ip);
+
 /**
  * ftrace_location - return true if the ip giving is a traced location
  * @ip: the instruction pointer to check
@@ -1550,6 +1552,9 @@ static unsigned long ftrace_location_range(unsigned long 
start, unsigned long en
  */
 unsigned long ftrace_location(unsigned long ip)
 {
+   if (unlikely(!ftrace_pages_start))
+   return ftrace_search_mcount_ip(ip);
+
return ftrace_location_range(ip, ip);
 }
 
@@ -4733,6 +4738,18 @@ static int ftrace_cmp_ips(const void *a, const void *b)
return 0;
 }
 
+static int ftrace_cmp_ips_insn(const void *a, const void *b)
+{
+   const unsigned long *ipa = a;
+   const unsigned long *ipb = b;
+
+   if (*ipa >= *ipb + MCOUNT_INSN_SIZE)
+   return 1;
+   if (*ipa < *ipb)
+   return -1;
+   return 0;
+}
+
 static void ftrace_swap_ips(void *a, void *b, int size)
 {
unsigned long *ipa = a;
@@ -4770,6 +4787,27 @@ static void ftrace_sort_mcount_area(unsigned long 
*start, unsigned long *end)
kernel_mcount_sorted = true;
 }
 
+static unsigned long ftrace_search_mcount_ip(unsigned long ip)
+{
+   extern unsigned long __start_mcount_loc[];
+   extern unsigned long __stop_mcount_loc[];
+
+   unsigned long *mcount_start = __start_mcount_loc;
+   unsigned long *mcount_end = __stop_mcount_loc;
+   unsigned long count = mcount_end - mcount_start;
+   unsigned long *retval;
+
+   if (!kernel_mcount_sorted)
+   return 0;
+
+   retval = bsearch(, mcount_start, count,
+   sizeof(unsigned long), ftrace_cmp_ips_insn);
+   if (!retval)
+   return 0;
+
+   return ftrace_call_adjust(ip);
+}
+
 static int ftrace_process_locs(struct module *mod,
   unsigned long *start,
   unsigned long *end)
-- 
1.8.4

--
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Please read the FAQ at  http://www.tux.org/lkml/


[RFC PATCH v4 31/34] early kprobes: enable early kprobes for x86.

2015-03-02 Thread Wang Nan
After dealing with kprobes on ftrace, early kprobes are allowed at
function entries if FTRACE is on. Which enables it functions
practically. This patch enables its kconfig entries for X86.

Signed-off-by: Wang Nan 
---
 arch/x86/Kconfig | 1 +
 1 file changed, 1 insertion(+)

diff --git a/arch/x86/Kconfig b/arch/x86/Kconfig
index c2fb8a8..ad259ea 100644
--- a/arch/x86/Kconfig
+++ b/arch/x86/Kconfig
@@ -38,6 +38,7 @@ config X86
select HAVE_PERF_EVENTS
select HAVE_IOREMAP_PROT
select HAVE_KPROBES
+   select HAVE_EARLY_KPROBES
select HAVE_MEMBLOCK
select HAVE_MEMBLOCK_NODE_MAP
select ARCH_DISCARD_MEMBLOCK
-- 
1.8.4

--
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Please read the FAQ at  http://www.tux.org/lkml/


[PATCH 0/2] cpuidle / sleep: fix timer stopping regression (was: drivers: cpuidle: minor suspend-to-idle fixes)

2015-03-02 Thread Rafael J. Wysocki
On Monday, March 02, 2015 02:13:05 PM Rafael J. Wysocki wrote:
> On Monday, March 02, 2015 10:08:23 AM Lorenzo Pieralisi wrote:
> > On Sat, Feb 28, 2015 at 11:58:21PM +, Rafael J. Wysocki wrote:
> > > On Saturday, February 28, 2015 11:54:23 AM Lorenzo Pieralisi wrote:
> 
> [cut]
> 
> > > Index: linux-pm/drivers/cpuidle/cpuidle.c
> > > ===
> > > --- linux-pm.orig/drivers/cpuidle/cpuidle.c
> > > +++ linux-pm/drivers/cpuidle/cpuidle.c
> > > @@ -230,15 +230,39 @@ int cpuidle_select(struct cpuidle_driver
> > >   * @dev:   the cpuidle device
> > >   * @index: the index in the idle state table
> > >   *
> > > - * Returns the index in the idle state, < 0 in case of error.
> > > - * The error code depends on the backend driver
> > > + * Returns the index in the idle state, < 0 in case of error.  -EBUSY is
> > > + * returned to indicate that the target state was temporarily 
> > > inaccessible.
> > > + * The other error codes depend on the backend driver.
> > >   */
> > >  int cpuidle_enter(struct cpuidle_driver *drv, struct cpuidle_device *dev,
> > > int index)
> > >  {
> > > - if (cpuidle_state_is_coupled(dev, drv, index))
> > > - return cpuidle_enter_state_coupled(dev, drv, index);
> > > - return cpuidle_enter_state(dev, drv, index);
> > > + unsigned int broadcast;
> > > + int ret;
> > > +
> > > + broadcast = drv->states[index].flags & CPUIDLE_FLAG_TIMER_STOP;
> > > +
> > > + /*
> > > +  * Tell the time framework to switch to a broadcast timer
> > > +  * because our local timer will be shutdown. If a local timer
> > > +  * is used from another cpu as a broadcast timer, this call may
> > > +  * fail if it is not available
> > > +  */
> > > + if (broadcast) {
> > > + ret = clockevents_notify(CLOCK_EVT_NOTIFY_BROADCAST_ENTER,
> > > +  >cpu);
> > > + if (ret)
> > > + return ret;
> > > + }
> > > +
> > > + ret = cpuidle_state_is_coupled(dev, drv, index) ?
> > > + cpuidle_enter_state_coupled(dev, drv, index) :
> > > + cpuidle_enter_state(dev, drv, index);
> > > +
> > > + if (broadcast)
> > > + clockevents_notify(CLOCK_EVT_NOTIFY_BROADCAST_EXIT, >cpu);
> > > +
> > > + return ret;
> > 
> > You have to check this return value in cpuidle_enter_freeze() too
> > otherwise we return to the idle thread on -EBUSY without
> > even executing arch_cpu_idle() and with IRQ disabled, code hits
> > the WARN_ON_ONCE line 180.
> 
> Right.
> 
> > There are multiple ways of fixing the issue, either you check the
> > cpuidle_enter_freeze() return value (you add one) to cpuidle_idle_call()
> > to make code consistent with the cpuidle_idle_call "normal" idle
> > behaviour or you add the return value check in cpuidle_enter_freeze(),
> > I am fine both ways.
> 
> Well, in both cases we'd end up with a function enabling interrupts on exit
> in some cases and not doing that in some other ones.  Not nice.
> 
> Below is an alternative to that (on top of the previous patches).  Can you
> test it please?

Actually, this one is still slightly incorrect, because we only should call
cpuidle_reflect() if we've called cpuidle_select() before.  Also it's better
to pass cpuidle_driver and cpuidle_device to all functions called by
cpuidle_idle_call().

Two patches will follow.  [1/2] is a cleanup re-arranging the code in
cpuidle_idle_call() to move the fallback path to the end of the function.
[2/2] is a replacement for the patch sent previously.

Please test.

Rafael

--
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Please read the FAQ at  http://www.tux.org/lkml/


[RFC PATCH v4 24/34] early kprobes on ftrace: introduce x86 arch_fix_ftrace_early_kprobe().

2015-03-02 Thread Wang Nan
arch_fix_ftrace_early_kprobe() will be called during ftrace converting
its entries into nops. This function is made for kprobe adjusting its
internal data.

To make as much as arch independent logic out of arch specific code,
arch_fix_ftrace_early_kprobe() doesn't iterate on kprobes in a aggr
kprobe. Such iteration should be done in kernel/kprobes.c.

Signed-off-by: Wang Nan 
---
 arch/x86/kernel/kprobes/core.c | 34 ++
 include/linux/kprobes.h|  9 +
 2 files changed, 43 insertions(+)

diff --git a/arch/x86/kernel/kprobes/core.c b/arch/x86/kernel/kprobes/core.c
index 4e3d5a9..ead5b51 100644
--- a/arch/x86/kernel/kprobes/core.c
+++ b/arch/x86/kernel/kprobes/core.c
@@ -1126,3 +1126,37 @@ int arch_trampoline_kprobe(struct kprobe *p)
 {
return 0;
 }
+
+#if defined(CONFIG_KPROBES_ON_FTRACE) && defined(CONFIG_EARLY_KPROBES)
+
+#define INT3_SIZE sizeof(kprobe_opcode_t)
+
+void arch_fix_ftrace_early_kprobe(struct kprobe *kp,
+   struct optimized_kprobe *op, int optimized)
+{
+   const unsigned char *ftrace_nop = ideal_nops[NOP_ATOMIC5];
+   const unsigned char *src = ftrace_nop + INT3_SIZE;
+   unsigned char *dest = kp->addr + INT3_SIZE;
+   size_t length = MCOUNT_INSN_SIZE - INT3_SIZE;
+
+   BUG_ON(op && (>kp != kp));
+   BUG_ON(optimized && op && (!(kp->flags & KPROBE_FLAG_OPTIMIZED)));
+
+   if ((!optimized) && (memcmp(dest, src, length) != 0))
+   text_poke_early(dest, src, length);
+
+   memcpy(>opcode, ftrace_nop, INT3_SIZE);
+   if (op && op->kp.flags & KPROBE_FLAG_OPTIMIZED) {
+   /*
+* We are not allowed to use internal data of struct
+* optimized_kprobe if CONFIG_OPTPROBES is not defined.
+*/
+#ifdef CONFIG_OPTPROBES
+   memcpy(op->optinsn.copied_insn, src, length);
+#else
+   BUG_ON(1);
+#endif
+   }
+}
+
+#endif
diff --git a/include/linux/kprobes.h b/include/linux/kprobes.h
index 96dc842..f8f2ac2 100644
--- a/include/linux/kprobes.h
+++ b/include/linux/kprobes.h
@@ -271,7 +271,16 @@ extern void show_registers(struct pt_regs *regs);
 extern void kprobes_inc_nmissed_count(struct kprobe *p);
 extern bool arch_within_kprobe_blacklist(unsigned long addr);
 
+/*
+ * Only when CONFIG_OPTPROBES struct optimized_kprobe is defined. Only use
+ * its pointer in function decl list.
+ */
+struct optimized_kprobe;
+
 #if defined(CONFIG_EARLY_KPROBES) && defined(CONFIG_KPROBES_ON_FTRACE)
+extern void arch_fix_ftrace_early_kprobe(struct kprobe *kp,
+   struct optimized_kprobe *op, int optimized);
+
 extern void init_kprobes_on_ftrace(void);
 #else
 static inline void init_kprobes_on_ftrace(void)
-- 
1.8.4

--
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Please read the FAQ at  http://www.tux.org/lkml/


[RFC PATCH v4 29/34] ftrace: x86: call kprobe_int3_handler() in ftrace int3 handler.

2015-03-02 Thread Wang Nan
Since early kprobes and ftrace both use int3 (ftrace insert int3 to the
first byte of ftrace entry, fill other bytes of the inserted 'call'
instruction and finally restore the first byte, while kprobe rely on
int3 to trigger its actions), it is possible that a breakpoint is shared
between ftrace and an early kprobe on it. Let ftrace_int3_handler() deal
with this confliction by calling kprobe_int3_handler() before it jump
to next instruction to avoid lost event during ftrace inserting 'call'
instruction.

Signed-off-by: Wang Nan 
---
 arch/x86/kernel/ftrace.c | 12 +++-
 1 file changed, 11 insertions(+), 1 deletion(-)

diff --git a/arch/x86/kernel/ftrace.c b/arch/x86/kernel/ftrace.c
index f200cd4..0a86c7c 100644
--- a/arch/x86/kernel/ftrace.c
+++ b/arch/x86/kernel/ftrace.c
@@ -302,7 +302,17 @@ int ftrace_int3_handler(struct pt_regs *regs)
if (!ftrace_location(ip) && !is_ftrace_caller(ip))
return 0;
 
-   regs->ip += MCOUNT_INSN_SIZE - 1;
+   /*
+* During converting early kprobes on ftrace to ftrace, it is
+* possible to hit a breakpoint belong to both ftrace and
+* kprobe. Call kprobe_int3_handler() to avoid missing events.
+* Note that even if kprobe is optimized, breakpoint based
+* kprobe should still be functional.
+*/
+#if defined(CONFIG_EARLY_KPROBES) && defined(CONFIG_KPROBES_ON_FTRACE)
+   if (!kprobe_int3_handler(regs))
+#endif
+   regs->ip += MCOUNT_INSN_SIZE - 1;
 
return 1;
 }
-- 
1.8.4

--
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Please read the FAQ at  http://www.tux.org/lkml/


[RFC PATCH v4 03/34] x86, traps: install gates using IST after cpu_init().

2015-03-02 Thread Wang Nan
X86_TRAP_NMI, X86_TRAP_DF and X86_TRAP_MC use their own stack. Those
stacks are invalid until cpu_init() installs TSS.

This patch moves setting of the 3 gates after cpu_init().

Signed-off-by: Wang Nan 
---
 arch/x86/kernel/traps.c | 14 --
 1 file changed, 8 insertions(+), 6 deletions(-)

diff --git a/arch/x86/kernel/traps.c b/arch/x86/kernel/traps.c
index 9965bd1..4000b19 100644
--- a/arch/x86/kernel/traps.c
+++ b/arch/x86/kernel/traps.c
@@ -966,7 +966,6 @@ void __init trap_init(void)
 #endif
 
set_intr_gate(X86_TRAP_DE, divide_error);
-   set_intr_gate_ist(X86_TRAP_NMI, , NMI_STACK);
/* int4 can be called from all */
set_system_intr_gate(X86_TRAP_OF, );
set_intr_gate(X86_TRAP_BR, bounds);
@@ -974,8 +973,6 @@ void __init trap_init(void)
set_intr_gate(X86_TRAP_NM, device_not_available);
 #ifdef CONFIG_X86_32
set_task_gate(X86_TRAP_DF, GDT_ENTRY_DOUBLEFAULT_TSS);
-#else
-   set_intr_gate_ist(X86_TRAP_DF, _fault, DOUBLEFAULT_STACK);
 #endif
set_intr_gate(X86_TRAP_OLD_MF, coprocessor_segment_overrun);
set_intr_gate(X86_TRAP_TS, invalid_TSS);
@@ -985,9 +982,6 @@ void __init trap_init(void)
set_intr_gate(X86_TRAP_SPURIOUS, spurious_interrupt_bug);
set_intr_gate(X86_TRAP_MF, coprocessor_error);
set_intr_gate(X86_TRAP_AC, alignment_check);
-#ifdef CONFIG_X86_MCE
-   set_intr_gate_ist(X86_TRAP_MC, _check, MCE_STACK);
-#endif
set_intr_gate(X86_TRAP_XF, simd_coprocessor_error);
 
/* Reserve all the builtin and the syscall vector: */
@@ -1017,6 +1011,14 @@ void __init trap_init(void)
 */
cpu_init();
 
+   set_intr_gate_ist(X86_TRAP_NMI, , NMI_STACK);
+#ifndef CONFIG_X86_32
+   set_intr_gate_ist(X86_TRAP_DF, _fault, DOUBLEFAULT_STACK);
+#endif
+#ifdef CONFIG_X86_MCE
+   set_intr_gate_ist(X86_TRAP_MC, _check, MCE_STACK);
+#endif
+
/*
 * X86_TRAP_DB and X86_TRAP_BP have been set
 * in early_trap_init(). However, ITS works only after
-- 
1.8.4

--
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Please read the FAQ at  http://www.tux.org/lkml/


<    3   4   5   6   7   8   9   10   11   12   >