[PATCH 05/15] perf callchain: Create real callchain entries for inlined frames

2017-10-25 Thread Arnaldo Carvalho de Melo
From: Milian Wolff 

The inline_node structs are maintained by the new dso->inlines tree.
This in turn keeps ownership of the fake symbols and srcline string
representing an inline frame.

This tree is sorted by address to allow quick lookups. All other entries
of the symbol beside the function name are unused for inline frames. The
advantage of this approach is that all existing users of the callchain
API can now transparently display inlined frames without having to patch
their code.

Signed-off-by: Milian Wolff 
Reviewed-by: Jiri Olsa 
Reviewed-by: Namhyung Kim 
Cc: David Ahern 
Cc: Peter Zijlstra 
Cc: Yao Jin 
Link: http://lkml.kernel.org/r/20171009203310.17362-6-milian.wo...@kdab.com
Signed-off-by: Arnaldo Carvalho de Melo 
---
 tools/perf/util/dso.c |  5 +
 tools/perf/util/dso.h |  1 +
 tools/perf/util/machine.c | 37 ++
 tools/perf/util/srcline.c | 51 +++
 tools/perf/util/srcline.h |  9 +
 5 files changed, 103 insertions(+)

diff --git a/tools/perf/util/dso.c b/tools/perf/util/dso.c
index 339e52971380..75c8250b3b8a 100644
--- a/tools/perf/util/dso.c
+++ b/tools/perf/util/dso.c
@@ -10,6 +10,7 @@
 #include "compress.h"
 #include "path.h"
 #include "symbol.h"
+#include "srcline.h"
 #include "dso.h"
 #include "machine.h"
 #include "auxtrace.h"
@@ -1201,6 +1202,7 @@ struct dso *dso__new(const char *name)
for (i = 0; i < MAP__NR_TYPES; ++i)
dso->symbols[i] = dso->symbol_names[i] = RB_ROOT;
dso->data.cache = RB_ROOT;
+   dso->inlined_nodes = RB_ROOT;
dso->data.fd = -1;
dso->data.status = DSO_DATA_STATUS_UNKNOWN;
dso->symtab_type = DSO_BINARY_TYPE__NOT_FOUND;
@@ -1232,6 +1234,9 @@ void dso__delete(struct dso *dso)
if (!RB_EMPTY_NODE(>rb_node))
pr_err("DSO %s is still in rbtree when being deleted!\n",
   dso->long_name);
+
+   /* free inlines first, as they reference symbols */
+   inlines__tree_delete(>inlined_nodes);
for (i = 0; i < MAP__NR_TYPES; ++i)
symbols__delete(>symbols[i]);
 
diff --git a/tools/perf/util/dso.h b/tools/perf/util/dso.h
index a2bbb21f301c..122eca0d242d 100644
--- a/tools/perf/util/dso.h
+++ b/tools/perf/util/dso.h
@@ -141,6 +141,7 @@ struct dso {
struct rb_root   *root; /* root of rbtree that rb_node is in */
struct rb_root   symbols[MAP__NR_TYPES];
struct rb_root   symbol_names[MAP__NR_TYPES];
+   struct rb_root   inlined_nodes;
struct {
u64 addr;
struct symbol   *symbol;
diff --git a/tools/perf/util/machine.c b/tools/perf/util/machine.c
index a37e1c056415..3d049cb313ac 100644
--- a/tools/perf/util/machine.c
+++ b/tools/perf/util/machine.c
@@ -2109,6 +2109,40 @@ static int thread__resolve_callchain_sample(struct 
thread *thread,
return 0;
 }
 
+static int append_inlines(struct callchain_cursor *cursor,
+ struct map *map, struct symbol *sym, u64 ip)
+{
+   struct inline_node *inline_node;
+   struct inline_list *ilist;
+   u64 addr;
+
+   if (!symbol_conf.inline_name || !map || !sym)
+   return 1;
+
+   addr = map__rip_2objdump(map, ip);
+
+   inline_node = inlines__tree_find(>dso->inlined_nodes, addr);
+   if (!inline_node) {
+   inline_node = dso__parse_addr_inlines(map->dso, addr, sym);
+   if (!inline_node)
+   return 1;
+
+   inlines__tree_insert(>dso->inlined_nodes, inline_node);
+   }
+
+   list_for_each_entry(ilist, _node->val, list) {
+   int ret = callchain_cursor_append(cursor, ip, map,
+ ilist->symbol, false,
+ NULL, 0, 0, 0,
+ ilist->srcline);
+
+   if (ret != 0)
+   return ret;
+   }
+
+   return 0;
+}
+
 static int unwind_entry(struct unwind_entry *entry, void *arg)
 {
struct callchain_cursor *cursor = arg;
@@ -2117,6 +2151,9 @@ static int unwind_entry(struct unwind_entry *entry, void 
*arg)
if (symbol_conf.hide_unresolved && entry->sym == NULL)
return 0;
 
+   if (append_inlines(cursor, entry->map, entry->sym, entry->ip) == 0)
+   return 0;
+
srcline = callchain_srcline(entry->map, entry->sym, entry->ip);
return callchain_cursor_append(cursor, entry->ip,
   entry->map, entry->sym,
diff --git a/tools/perf/util/srcline.c b/tools/perf/util/srcline.c
index f202fc7827df..8bea6621d657 100644
--- 

[PATCH 05/15] perf callchain: Create real callchain entries for inlined frames

2017-10-25 Thread Arnaldo Carvalho de Melo
From: Milian Wolff 

The inline_node structs are maintained by the new dso->inlines tree.
This in turn keeps ownership of the fake symbols and srcline string
representing an inline frame.

This tree is sorted by address to allow quick lookups. All other entries
of the symbol beside the function name are unused for inline frames. The
advantage of this approach is that all existing users of the callchain
API can now transparently display inlined frames without having to patch
their code.

Signed-off-by: Milian Wolff 
Reviewed-by: Jiri Olsa 
Reviewed-by: Namhyung Kim 
Cc: David Ahern 
Cc: Peter Zijlstra 
Cc: Yao Jin 
Link: http://lkml.kernel.org/r/20171009203310.17362-6-milian.wo...@kdab.com
Signed-off-by: Arnaldo Carvalho de Melo 
---
 tools/perf/util/dso.c |  5 +
 tools/perf/util/dso.h |  1 +
 tools/perf/util/machine.c | 37 ++
 tools/perf/util/srcline.c | 51 +++
 tools/perf/util/srcline.h |  9 +
 5 files changed, 103 insertions(+)

diff --git a/tools/perf/util/dso.c b/tools/perf/util/dso.c
index 339e52971380..75c8250b3b8a 100644
--- a/tools/perf/util/dso.c
+++ b/tools/perf/util/dso.c
@@ -10,6 +10,7 @@
 #include "compress.h"
 #include "path.h"
 #include "symbol.h"
+#include "srcline.h"
 #include "dso.h"
 #include "machine.h"
 #include "auxtrace.h"
@@ -1201,6 +1202,7 @@ struct dso *dso__new(const char *name)
for (i = 0; i < MAP__NR_TYPES; ++i)
dso->symbols[i] = dso->symbol_names[i] = RB_ROOT;
dso->data.cache = RB_ROOT;
+   dso->inlined_nodes = RB_ROOT;
dso->data.fd = -1;
dso->data.status = DSO_DATA_STATUS_UNKNOWN;
dso->symtab_type = DSO_BINARY_TYPE__NOT_FOUND;
@@ -1232,6 +1234,9 @@ void dso__delete(struct dso *dso)
if (!RB_EMPTY_NODE(>rb_node))
pr_err("DSO %s is still in rbtree when being deleted!\n",
   dso->long_name);
+
+   /* free inlines first, as they reference symbols */
+   inlines__tree_delete(>inlined_nodes);
for (i = 0; i < MAP__NR_TYPES; ++i)
symbols__delete(>symbols[i]);
 
diff --git a/tools/perf/util/dso.h b/tools/perf/util/dso.h
index a2bbb21f301c..122eca0d242d 100644
--- a/tools/perf/util/dso.h
+++ b/tools/perf/util/dso.h
@@ -141,6 +141,7 @@ struct dso {
struct rb_root   *root; /* root of rbtree that rb_node is in */
struct rb_root   symbols[MAP__NR_TYPES];
struct rb_root   symbol_names[MAP__NR_TYPES];
+   struct rb_root   inlined_nodes;
struct {
u64 addr;
struct symbol   *symbol;
diff --git a/tools/perf/util/machine.c b/tools/perf/util/machine.c
index a37e1c056415..3d049cb313ac 100644
--- a/tools/perf/util/machine.c
+++ b/tools/perf/util/machine.c
@@ -2109,6 +2109,40 @@ static int thread__resolve_callchain_sample(struct 
thread *thread,
return 0;
 }
 
+static int append_inlines(struct callchain_cursor *cursor,
+ struct map *map, struct symbol *sym, u64 ip)
+{
+   struct inline_node *inline_node;
+   struct inline_list *ilist;
+   u64 addr;
+
+   if (!symbol_conf.inline_name || !map || !sym)
+   return 1;
+
+   addr = map__rip_2objdump(map, ip);
+
+   inline_node = inlines__tree_find(>dso->inlined_nodes, addr);
+   if (!inline_node) {
+   inline_node = dso__parse_addr_inlines(map->dso, addr, sym);
+   if (!inline_node)
+   return 1;
+
+   inlines__tree_insert(>dso->inlined_nodes, inline_node);
+   }
+
+   list_for_each_entry(ilist, _node->val, list) {
+   int ret = callchain_cursor_append(cursor, ip, map,
+ ilist->symbol, false,
+ NULL, 0, 0, 0,
+ ilist->srcline);
+
+   if (ret != 0)
+   return ret;
+   }
+
+   return 0;
+}
+
 static int unwind_entry(struct unwind_entry *entry, void *arg)
 {
struct callchain_cursor *cursor = arg;
@@ -2117,6 +2151,9 @@ static int unwind_entry(struct unwind_entry *entry, void 
*arg)
if (symbol_conf.hide_unresolved && entry->sym == NULL)
return 0;
 
+   if (append_inlines(cursor, entry->map, entry->sym, entry->ip) == 0)
+   return 0;
+
srcline = callchain_srcline(entry->map, entry->sym, entry->ip);
return callchain_cursor_append(cursor, entry->ip,
   entry->map, entry->sym,
diff --git a/tools/perf/util/srcline.c b/tools/perf/util/srcline.c
index f202fc7827df..8bea6621d657 100644
--- a/tools/perf/util/srcline.c
+++ b/tools/perf/util/srcline.c
@@ -583,3 +583,54 @@ void inline_node__delete(struct inline_node *node)
 
free(node);
 }
+
+void 

[PATCH 09/15] perf callchain: Compare symbol name for inlined frames when matching

2017-10-25 Thread Arnaldo Carvalho de Melo
From: Milian Wolff 

The fake symbols we create for inlined frames will represent different
functions but can use the symbol start address. This leads to issues
when different inline branches all lead to the same function.

Before:
~
$ perf report -s sym -i perf.inlining.data --inline --stdio -g function
...
 --38.86%--_start
   __libc_start_main
   main
   |
--37.57%--std::norm (inlined)
  std::_Norm_helper::_S_do_it 
(inlined)
  |
   --36.36%--std::abs (inlined)
 std::__complex_abs (inlined)
 |
  
--12.24%--std::linear_congruential_engine::operator() (inlined)

std::__detail::__mod (inlined)

std::__detail::_Mod::__calc (inlined)
~

Note that this backtrace representation is completely bogus.
Complex abs does not call the linear congruential engine! It
is just a side-effect of a longer inlined stack being appended
to a shorter, different inlined stack, both of which originate
in the same function (main).

This patch fixes the issue:

~
$ perf report -s sym -i perf.inlining.data --inline --stdio -g function
...
 --38.86%--_start
   __libc_start_main
   main
   |
   
|--35.59%--std::uniform_real_distribution::operator() (inlined)
   |  
std::uniform_real_distribution::operator() (inlined)
   |  |
   |   
--34.37%--std::__detail::_Adaptor::operator() (inlined)
   | std::generate_canonical (inlined)
   | |
   |  
--12.24%--std::linear_congruential_engine::operator() (inlined)
   |
std::__detail::__mod (inlined)
   |
std::__detail::_Mod::__calc (inlined)
   |
--1.99%--std::norm (inlined)
  std::_Norm_helper::_S_do_it 
(inlined)
  std::abs (inlined)
  std::__complex_abs (inlined)
~

Signed-off-by: Milian Wolff 
Reviewed-by: Jiri Olsa 
Reviewed-by: Namhyung Kim 
Cc: David Ahern 
Cc: Peter Zijlstra 
Cc: Ravi Bangoria 
Cc: Yao Jin 
Link: http://lkml.kernel.org/r/20171009203310.17362-10-milian.wo...@kdab.com
Cc: Arnaldo Carvalho de Melo 
[ Fix up conflict with c1fbc0cf81f1 ("perf callchain: Compare dsos (as well) 
for CCKEY_FUNCTION"), remove unneeded hunk ]
Signed-off-by: Arnaldo Carvalho de Melo 
---
 tools/perf/util/callchain.c | 8 
 1 file changed, 8 insertions(+)

diff --git a/tools/perf/util/callchain.c b/tools/perf/util/callchain.c
index 77031efdca5c..35a920f09503 100644
--- a/tools/perf/util/callchain.c
+++ b/tools/perf/util/callchain.c
@@ -690,6 +690,14 @@ static enum match_result match_chain(struct 
callchain_cursor_node *node,
}
 
if (cnode->ms.sym && sym && callchain_param.key == CCKEY_FUNCTION) {
+   /*
+* Compare inlined frames based on their symbol name because
+* different inlined frames will have the same symbol start
+*/
+   if (cnode->ms.sym->inlined || node->sym->inlined)
+   return match_chain_strings(cnode->ms.sym->name,
+  node->sym->name);
+
left = cnode->ms.sym->start;
right = sym->start;
left_dso = cnode->ms.map->dso;
-- 
2.13.6



[PATCH 09/15] perf callchain: Compare symbol name for inlined frames when matching

2017-10-25 Thread Arnaldo Carvalho de Melo
From: Milian Wolff 

The fake symbols we create for inlined frames will represent different
functions but can use the symbol start address. This leads to issues
when different inline branches all lead to the same function.

Before:
~
$ perf report -s sym -i perf.inlining.data --inline --stdio -g function
...
 --38.86%--_start
   __libc_start_main
   main
   |
--37.57%--std::norm (inlined)
  std::_Norm_helper::_S_do_it 
(inlined)
  |
   --36.36%--std::abs (inlined)
 std::__complex_abs (inlined)
 |
  
--12.24%--std::linear_congruential_engine::operator() (inlined)

std::__detail::__mod (inlined)

std::__detail::_Mod::__calc (inlined)
~

Note that this backtrace representation is completely bogus.
Complex abs does not call the linear congruential engine! It
is just a side-effect of a longer inlined stack being appended
to a shorter, different inlined stack, both of which originate
in the same function (main).

This patch fixes the issue:

~
$ perf report -s sym -i perf.inlining.data --inline --stdio -g function
...
 --38.86%--_start
   __libc_start_main
   main
   |
   
|--35.59%--std::uniform_real_distribution::operator() > (inlined)
   |  
std::uniform_real_distribution::operator() > (inlined)
   |  |
   |   
--34.37%--std::__detail::_Adaptor, double>::operator() (inlined)
   | std::generate_canonical > (inlined)
   | |
   |  
--12.24%--std::linear_congruential_engine::operator() (inlined)
   |
std::__detail::__mod (inlined)
   |
std::__detail::_Mod::__calc (inlined)
   |
--1.99%--std::norm (inlined)
  std::_Norm_helper::_S_do_it 
(inlined)
  std::abs (inlined)
  std::__complex_abs (inlined)
~

Signed-off-by: Milian Wolff 
Reviewed-by: Jiri Olsa 
Reviewed-by: Namhyung Kim 
Cc: David Ahern 
Cc: Peter Zijlstra 
Cc: Ravi Bangoria 
Cc: Yao Jin 
Link: http://lkml.kernel.org/r/20171009203310.17362-10-milian.wo...@kdab.com
Cc: Arnaldo Carvalho de Melo 
[ Fix up conflict with c1fbc0cf81f1 ("perf callchain: Compare dsos (as well) 
for CCKEY_FUNCTION"), remove unneeded hunk ]
Signed-off-by: Arnaldo Carvalho de Melo 
---
 tools/perf/util/callchain.c | 8 
 1 file changed, 8 insertions(+)

diff --git a/tools/perf/util/callchain.c b/tools/perf/util/callchain.c
index 77031efdca5c..35a920f09503 100644
--- a/tools/perf/util/callchain.c
+++ b/tools/perf/util/callchain.c
@@ -690,6 +690,14 @@ static enum match_result match_chain(struct 
callchain_cursor_node *node,
}
 
if (cnode->ms.sym && sym && callchain_param.key == CCKEY_FUNCTION) {
+   /*
+* Compare inlined frames based on their symbol name because
+* different inlined frames will have the same symbol start
+*/
+   if (cnode->ms.sym->inlined || node->sym->inlined)
+   return match_chain_strings(cnode->ms.sym->name,
+  node->sym->name);
+
left = cnode->ms.sym->start;
right = sym->start;
left_dso = cnode->ms.map->dso;
-- 
2.13.6



[PATCH 08/15] perf script: Mark inlined frames and do not print DSO for them

2017-10-25 Thread Arnaldo Carvalho de Melo
From: Milian Wolff 

Instead of showing the (repeated) DSO name of the non-inlined frame, we
now show the "(inlined)" suffix instead.

Before:
   214f7 __hypot_finite (/usr/lib/libm-2.25.so)
ace3 hypot (/usr/lib/libm-2.25.so)
 a4a std::__complex_abs 
(/home/milian/projects/src/perf-tests/inlining)
 a4a std::abs 
(/home/milian/projects/src/perf-tests/inlining)
 a4a std::_Norm_helper::_S_do_it 
(/home/milian/projects/src/perf-tests/inlining)
 a4a std::norm 
(/home/milian/projects/src/perf-tests/inlining)
 a4a main (/home/milian/projects/src/perf-tests/inlining)
   20510 __libc_start_main (/usr/lib/libc-2.25.so)
 bd9 _start (/home/milian/projects/src/perf-tests/inlining)

After:
   214f7 __hypot_finite (/usr/lib/libm-2.25.so)
ace3 hypot (/usr/lib/libm-2.25.so)
 a4a std::__complex_abs (inlined)
 a4a std::abs (inlined)
 a4a std::_Norm_helper::_S_do_it (inlined)
 a4a std::norm (inlined)
 a4a main (/home/milian/projects/src/perf-tests/inlining)
   20510 __libc_start_main (/usr/lib/libc-2.25.so)
 bd9 _start (/home/milian/projects/src/perf-tests/inlining)

Signed-off-by: Milian Wolff 
Reviewed-by: Jiri Olsa 
Reviewed-by: Namhyung Kim 
Cc: David Ahern 
Cc: Peter Zijlstra 
Cc: Yao Jin 
Link: http://lkml.kernel.org/r/20171009203310.17362-9-milian.wo...@kdab.com
Signed-off-by: Arnaldo Carvalho de Melo 
---
 tools/perf/util/evsel_fprintf.c | 5 -
 1 file changed, 4 insertions(+), 1 deletion(-)

diff --git a/tools/perf/util/evsel_fprintf.c b/tools/perf/util/evsel_fprintf.c
index f2c6c5ee11e8..5b9e89257aa7 100644
--- a/tools/perf/util/evsel_fprintf.c
+++ b/tools/perf/util/evsel_fprintf.c
@@ -157,7 +157,7 @@ int sample__fprintf_callchain(struct perf_sample *sample, 
int left_alignment,
}
}
 
-   if (print_dso) {
+   if (print_dso && (!node->sym || !node->sym->inlined)) {
printed += fprintf(fp, " (");
printed += map__fprintf_dsoname(node->map, fp);
printed += fprintf(fp, ")");
@@ -166,6 +166,9 @@ int sample__fprintf_callchain(struct perf_sample *sample, 
int left_alignment,
if (print_srcline)
printed += map__fprintf_srcline(node->map, 
addr, "\n  ", fp);
 
+   if (node->sym && node->sym->inlined)
+   printed += fprintf(fp, " (inlined)");
+
if (!print_oneline)
printed += fprintf(fp, "\n");
 
-- 
2.13.6



[PATCH 08/15] perf script: Mark inlined frames and do not print DSO for them

2017-10-25 Thread Arnaldo Carvalho de Melo
From: Milian Wolff 

Instead of showing the (repeated) DSO name of the non-inlined frame, we
now show the "(inlined)" suffix instead.

Before:
   214f7 __hypot_finite (/usr/lib/libm-2.25.so)
ace3 hypot (/usr/lib/libm-2.25.so)
 a4a std::__complex_abs 
(/home/milian/projects/src/perf-tests/inlining)
 a4a std::abs 
(/home/milian/projects/src/perf-tests/inlining)
 a4a std::_Norm_helper::_S_do_it 
(/home/milian/projects/src/perf-tests/inlining)
 a4a std::norm 
(/home/milian/projects/src/perf-tests/inlining)
 a4a main (/home/milian/projects/src/perf-tests/inlining)
   20510 __libc_start_main (/usr/lib/libc-2.25.so)
 bd9 _start (/home/milian/projects/src/perf-tests/inlining)

After:
   214f7 __hypot_finite (/usr/lib/libm-2.25.so)
ace3 hypot (/usr/lib/libm-2.25.so)
 a4a std::__complex_abs (inlined)
 a4a std::abs (inlined)
 a4a std::_Norm_helper::_S_do_it (inlined)
 a4a std::norm (inlined)
 a4a main (/home/milian/projects/src/perf-tests/inlining)
   20510 __libc_start_main (/usr/lib/libc-2.25.so)
 bd9 _start (/home/milian/projects/src/perf-tests/inlining)

Signed-off-by: Milian Wolff 
Reviewed-by: Jiri Olsa 
Reviewed-by: Namhyung Kim 
Cc: David Ahern 
Cc: Peter Zijlstra 
Cc: Yao Jin 
Link: http://lkml.kernel.org/r/20171009203310.17362-9-milian.wo...@kdab.com
Signed-off-by: Arnaldo Carvalho de Melo 
---
 tools/perf/util/evsel_fprintf.c | 5 -
 1 file changed, 4 insertions(+), 1 deletion(-)

diff --git a/tools/perf/util/evsel_fprintf.c b/tools/perf/util/evsel_fprintf.c
index f2c6c5ee11e8..5b9e89257aa7 100644
--- a/tools/perf/util/evsel_fprintf.c
+++ b/tools/perf/util/evsel_fprintf.c
@@ -157,7 +157,7 @@ int sample__fprintf_callchain(struct perf_sample *sample, 
int left_alignment,
}
}
 
-   if (print_dso) {
+   if (print_dso && (!node->sym || !node->sym->inlined)) {
printed += fprintf(fp, " (");
printed += map__fprintf_dsoname(node->map, fp);
printed += fprintf(fp, ")");
@@ -166,6 +166,9 @@ int sample__fprintf_callchain(struct perf_sample *sample, 
int left_alignment,
if (print_srcline)
printed += map__fprintf_srcline(node->map, 
addr, "\n  ", fp);
 
+   if (node->sym && node->sym->inlined)
+   printed += fprintf(fp, " (inlined)");
+
if (!print_oneline)
printed += fprintf(fp, "\n");
 
-- 
2.13.6



[PATCH 11/15] perf report: Properly handle branch count in match_chain()

2017-10-25 Thread Arnaldo Carvalho de Melo
From: Milian Wolff 

Some of the code paths I introduced before returned too early without
running the code to handle a node's branch count.  By refactoring
match_chain to only have one exit point, this can be remedied.

Signed-off-by: Milian Wolff 
Acked-by: Namhyung Kim 
Cc: David Ahern 
Cc: Jin Yao 
Cc: Peter Zijlstra 
Cc: Ravi Bangoria 
Link: http://lkml.kernel.org/r/1707691.qaJ269GSZW@agathebauer
Link: http://lkml.kernel.org/r/20171018185350.14893-2-milian.wo...@kdab.com
Signed-off-by: Arnaldo Carvalho de Melo 
---
 tools/perf/util/callchain.c | 140 
 1 file changed, 78 insertions(+), 62 deletions(-)

diff --git a/tools/perf/util/callchain.c b/tools/perf/util/callchain.c
index 35a920f09503..19bfcadcf891 100644
--- a/tools/perf/util/callchain.c
+++ b/tools/perf/util/callchain.c
@@ -666,83 +666,99 @@ static enum match_result match_chain_strings(const char 
*left,
return ret;
 }
 
-static enum match_result match_chain(struct callchain_cursor_node *node,
-struct callchain_list *cnode)
+/*
+ * We need to always use relative addresses because we're aggregating
+ * callchains from multiple threads, i.e. different address spaces, so
+ * comparing absolute addresses make no sense as a symbol in a DSO may end up
+ * in a different address when used in a different binary or even the same
+ * binary but with some sort of address randomization technique, thus we need
+ * to compare just relative addresses. -acme
+ */
+static enum match_result match_chain_dso_addresses(struct map *left_map, u64 
left_ip,
+  struct map *right_map, u64 
right_ip)
 {
-   struct symbol *sym = node->sym;
-   u64 left, right;
-   struct dso *left_dso = NULL;
-   struct dso *right_dso = NULL;
+   struct dso *left_dso = left_map ? left_map->dso : NULL;
+   struct dso *right_dso = right_map ? right_map->dso : NULL;
 
-   if (callchain_param.key == CCKEY_SRCLINE) {
-   enum match_result match = match_chain_strings(cnode->srcline,
- node->srcline);
+   if (left_dso != right_dso)
+   return left_dso < right_dso ? MATCH_LT : MATCH_GT;
 
-   /* if no srcline is available, fallback to symbol name */
-   if (match == MATCH_ERROR && cnode->ms.sym && node->sym)
-   match = match_chain_strings(cnode->ms.sym->name,
-   node->sym->name);
+   if (left_ip != right_ip)
+   return left_ip < right_ip ? MATCH_LT : MATCH_GT;
 
-   if (match != MATCH_ERROR)
-   return match;
+   return MATCH_EQ;
+}
 
-   /* otherwise fall-back to IP-based comparison below */
-   }
+static enum match_result match_chain(struct callchain_cursor_node *node,
+struct callchain_list *cnode)
+{
+   enum match_result match = MATCH_ERROR;
 
-   if (cnode->ms.sym && sym && callchain_param.key == CCKEY_FUNCTION) {
-   /*
-* Compare inlined frames based on their symbol name because
-* different inlined frames will have the same symbol start
-*/
-   if (cnode->ms.sym->inlined || node->sym->inlined)
-   return match_chain_strings(cnode->ms.sym->name,
-  node->sym->name);
-
-   left = cnode->ms.sym->start;
-   right = sym->start;
-   left_dso = cnode->ms.map->dso;
-   right_dso = node->map->dso;
-   } else {
-   left = cnode->ip;
-   right = node->ip;
+   switch (callchain_param.key) {
+   case CCKEY_SRCLINE:
+   match = match_chain_strings(cnode->srcline, node->srcline);
+   if (match != MATCH_ERROR)
+   break;
+   /* otherwise fall-back to symbol-based comparison below */
+   __fallthrough;
+   case CCKEY_FUNCTION:
+   if (node->sym && cnode->ms.sym) {
+   /*
+* Compare inlined frames based on their symbol name
+* because different inlined frames will have the same
+* symbol start. Otherwise do a faster comparison based
+* on the symbol start address.
+*/
+   if (cnode->ms.sym->inlined || node->sym->inlined) {
+   match = match_chain_strings(cnode->ms.sym->name,
+   node->sym->name);
+   if 

[PATCH 11/15] perf report: Properly handle branch count in match_chain()

2017-10-25 Thread Arnaldo Carvalho de Melo
From: Milian Wolff 

Some of the code paths I introduced before returned too early without
running the code to handle a node's branch count.  By refactoring
match_chain to only have one exit point, this can be remedied.

Signed-off-by: Milian Wolff 
Acked-by: Namhyung Kim 
Cc: David Ahern 
Cc: Jin Yao 
Cc: Peter Zijlstra 
Cc: Ravi Bangoria 
Link: http://lkml.kernel.org/r/1707691.qaJ269GSZW@agathebauer
Link: http://lkml.kernel.org/r/20171018185350.14893-2-milian.wo...@kdab.com
Signed-off-by: Arnaldo Carvalho de Melo 
---
 tools/perf/util/callchain.c | 140 
 1 file changed, 78 insertions(+), 62 deletions(-)

diff --git a/tools/perf/util/callchain.c b/tools/perf/util/callchain.c
index 35a920f09503..19bfcadcf891 100644
--- a/tools/perf/util/callchain.c
+++ b/tools/perf/util/callchain.c
@@ -666,83 +666,99 @@ static enum match_result match_chain_strings(const char 
*left,
return ret;
 }
 
-static enum match_result match_chain(struct callchain_cursor_node *node,
-struct callchain_list *cnode)
+/*
+ * We need to always use relative addresses because we're aggregating
+ * callchains from multiple threads, i.e. different address spaces, so
+ * comparing absolute addresses make no sense as a symbol in a DSO may end up
+ * in a different address when used in a different binary or even the same
+ * binary but with some sort of address randomization technique, thus we need
+ * to compare just relative addresses. -acme
+ */
+static enum match_result match_chain_dso_addresses(struct map *left_map, u64 
left_ip,
+  struct map *right_map, u64 
right_ip)
 {
-   struct symbol *sym = node->sym;
-   u64 left, right;
-   struct dso *left_dso = NULL;
-   struct dso *right_dso = NULL;
+   struct dso *left_dso = left_map ? left_map->dso : NULL;
+   struct dso *right_dso = right_map ? right_map->dso : NULL;
 
-   if (callchain_param.key == CCKEY_SRCLINE) {
-   enum match_result match = match_chain_strings(cnode->srcline,
- node->srcline);
+   if (left_dso != right_dso)
+   return left_dso < right_dso ? MATCH_LT : MATCH_GT;
 
-   /* if no srcline is available, fallback to symbol name */
-   if (match == MATCH_ERROR && cnode->ms.sym && node->sym)
-   match = match_chain_strings(cnode->ms.sym->name,
-   node->sym->name);
+   if (left_ip != right_ip)
+   return left_ip < right_ip ? MATCH_LT : MATCH_GT;
 
-   if (match != MATCH_ERROR)
-   return match;
+   return MATCH_EQ;
+}
 
-   /* otherwise fall-back to IP-based comparison below */
-   }
+static enum match_result match_chain(struct callchain_cursor_node *node,
+struct callchain_list *cnode)
+{
+   enum match_result match = MATCH_ERROR;
 
-   if (cnode->ms.sym && sym && callchain_param.key == CCKEY_FUNCTION) {
-   /*
-* Compare inlined frames based on their symbol name because
-* different inlined frames will have the same symbol start
-*/
-   if (cnode->ms.sym->inlined || node->sym->inlined)
-   return match_chain_strings(cnode->ms.sym->name,
-  node->sym->name);
-
-   left = cnode->ms.sym->start;
-   right = sym->start;
-   left_dso = cnode->ms.map->dso;
-   right_dso = node->map->dso;
-   } else {
-   left = cnode->ip;
-   right = node->ip;
+   switch (callchain_param.key) {
+   case CCKEY_SRCLINE:
+   match = match_chain_strings(cnode->srcline, node->srcline);
+   if (match != MATCH_ERROR)
+   break;
+   /* otherwise fall-back to symbol-based comparison below */
+   __fallthrough;
+   case CCKEY_FUNCTION:
+   if (node->sym && cnode->ms.sym) {
+   /*
+* Compare inlined frames based on their symbol name
+* because different inlined frames will have the same
+* symbol start. Otherwise do a faster comparison based
+* on the symbol start address.
+*/
+   if (cnode->ms.sym->inlined || node->sym->inlined) {
+   match = match_chain_strings(cnode->ms.sym->name,
+   node->sym->name);
+   if (match != MATCH_ERROR)
+   break;
+   } else {
+   match = 
match_chain_dso_addresses(cnode->ms.map, 

[PATCH 13/15] perf report: Cache srclines for callchain nodes

2017-10-25 Thread Arnaldo Carvalho de Melo
From: Milian Wolff 

On one hand this ensures that the memory is properly freed when the DSO
gets freed. On the other hand this significantly speeds up the
processing of the callchain nodes when lots of srclines are requested.
For one of my data files e.g.:

Before:

 Performance counter stats for 'perf report -s srcline -g srcline --stdio':

  52496.495043  task-clock (msec) #0.999 CPUs utilized
   634  context-switches  #0.012 K/sec
 2  cpu-migrations#0.000 K/sec
   191,561  page-faults   #0.004 M/sec
   165,074,498,235  cycles#3.144 GHz
   334,170,832,408  instructions  #2.02  insn per cycle
90,220,029,745  branches  # 1718.591 M/sec
   654,525,177  branch-misses #0.73% of all branches

  52.533273822 seconds time elapsedProcessed 236605 events and lost 40 
chunks!

After:

 Performance counter stats for 'perf report -s srcline -g srcline --stdio':

  22606.323706  task-clock (msec) #1.000 CPUs utilized
31  context-switches  #0.001 K/sec
 0  cpu-migrations#0.000 K/sec
   185,471  page-faults   #0.008 M/sec
71,188,113,681  cycles#3.149 GHz
   133,204,943,083  instructions  #1.87  insn per cycle
34,886,384,979  branches  # 1543.214 M/sec
   278,214,495  branch-misses #0.80% of all branches

  22.609857253 seconds time elapsed

Note that the difference is only this large when `--inline` is not
passed. In such situations, we would use the inliner cache and thus do
not run this code path that often.

I think that this cache should actually be used in other places, too.
When looking at the valgrind leak report for perf report, we see tons of
srclines being leaked, most notably from calls to
hist_entry__get_srcline. The problem is that get_srcline has many
different formatting options (show_sym, show_addr, potentially even
unwind_inlines when calling __get_srcline directly). As such, the
srcline cannot easily be cached for all calls, or we'd have to add
caches for all formatting combinations (6 so far). An alternative would
be to remove the formatting options and handle that on a different level
- i.e. print the sym/addr on demand wherever we actually output
something. And the unwind_inlines could be moved into a separate
function that does not return the srcline.

Signed-off-by: Milian Wolff 
Reviewed-by: Andi Kleen 
Cc: David Ahern 
Cc: Jin Yao 
Cc: Jiri Olsa 
Cc: Namhyung Kim 
Cc: Peter Zijlstra 
Link: http://lkml.kernel.org/r/20171019113836.5548-4-milian.wo...@kdab.com
Signed-off-by: Arnaldo Carvalho de Melo 
---
 tools/perf/util/dso.c |  2 ++
 tools/perf/util/dso.h |  1 +
 tools/perf/util/machine.c | 17 +---
 tools/perf/util/srcline.c | 66 +++
 tools/perf/util/srcline.h |  7 +
 5 files changed, 90 insertions(+), 3 deletions(-)

diff --git a/tools/perf/util/dso.c b/tools/perf/util/dso.c
index 75c8250b3b8a..3192b608e91b 100644
--- a/tools/perf/util/dso.c
+++ b/tools/perf/util/dso.c
@@ -1203,6 +1203,7 @@ struct dso *dso__new(const char *name)
dso->symbols[i] = dso->symbol_names[i] = RB_ROOT;
dso->data.cache = RB_ROOT;
dso->inlined_nodes = RB_ROOT;
+   dso->srclines = RB_ROOT;
dso->data.fd = -1;
dso->data.status = DSO_DATA_STATUS_UNKNOWN;
dso->symtab_type = DSO_BINARY_TYPE__NOT_FOUND;
@@ -1237,6 +1238,7 @@ void dso__delete(struct dso *dso)
 
/* free inlines first, as they reference symbols */
inlines__tree_delete(>inlined_nodes);
+   srcline__tree_delete(>srclines);
for (i = 0; i < MAP__NR_TYPES; ++i)
symbols__delete(>symbols[i]);
 
diff --git a/tools/perf/util/dso.h b/tools/perf/util/dso.h
index 122eca0d242d..821b16c67030 100644
--- a/tools/perf/util/dso.h
+++ b/tools/perf/util/dso.h
@@ -142,6 +142,7 @@ struct dso {
struct rb_root   symbols[MAP__NR_TYPES];
struct rb_root   symbol_names[MAP__NR_TYPES];
struct rb_root   inlined_nodes;
+   struct rb_root   srclines;
struct {
u64 addr;
struct symbol   *symbol;
diff --git a/tools/perf/util/machine.c b/tools/perf/util/machine.c
index 177c1d4088f8..94d8f1ccedd9 100644
--- a/tools/perf/util/machine.c
+++ b/tools/perf/util/machine.c
@@ -1711,11 +1711,22 @@ struct mem_info *sample__resolve_mem(struct perf_sample 
*sample,
 
 static char 

[PATCH 15/15] perf util: Enable handling of inlined frames by default

2017-10-25 Thread Arnaldo Carvalho de Melo
From: Milian Wolff 

Now that we have caches in place to speed up the process of finding
inlined frames and srcline information repeatedly, we can enable this
useful option by default.

Suggested-by: Ingo Molnar 
Signed-off-by: Milian Wolff 
Reviewed-by: Andi Kleen 
Cc: David Ahern 
Cc: Jin Yao 
Cc: Jiri Olsa 
Cc: Namhyung Kim 
Cc: Peter Zijlstra 
Link: http://lkml.kernel.org/r/20171019113836.5548-6-milian.wo...@kdab.com
Signed-off-by: Arnaldo Carvalho de Melo 
---
 tools/perf/Documentation/perf-report.txt | 3 ++-
 tools/perf/Documentation/perf-script.txt | 3 ++-
 tools/perf/util/symbol.c | 1 +
 3 files changed, 5 insertions(+), 2 deletions(-)

diff --git a/tools/perf/Documentation/perf-report.txt 
b/tools/perf/Documentation/perf-report.txt
index 383a98d992ed..ddde2b54af57 100644
--- a/tools/perf/Documentation/perf-report.txt
+++ b/tools/perf/Documentation/perf-report.txt
@@ -434,7 +434,8 @@ include::itrace.txt[]
 
 --inline::
If a callgraph address belongs to an inlined function, the inline stack
-   will be printed. Each entry is function name or file/line.
+   will be printed. Each entry is function name or file/line. Enabled by
+   default, disable with --no-inline.
 
 include::callchain-overhead-calculation.txt[]
 
diff --git a/tools/perf/Documentation/perf-script.txt 
b/tools/perf/Documentation/perf-script.txt
index bcc1ba35a2d8..25e677344728 100644
--- a/tools/perf/Documentation/perf-script.txt
+++ b/tools/perf/Documentation/perf-script.txt
@@ -327,7 +327,8 @@ include::itrace.txt[]
 
 --inline::
If a callgraph address belongs to an inlined function, the inline stack
-   will be printed. Each entry has function name and file/line.
+   will be printed. Each entry has function name and file/line. Enabled by
+   default, disable with --no-inline.
 
 SEE ALSO
 
diff --git a/tools/perf/util/symbol.c b/tools/perf/util/symbol.c
index 066e38aa4063..ce6993bebf8c 100644
--- a/tools/perf/util/symbol.c
+++ b/tools/perf/util/symbol.c
@@ -45,6 +45,7 @@ struct symbol_conf symbol_conf = {
.show_hist_headers  = true,
.symfs  = "",
.event_group= true,
+   .inline_name= true,
 };
 
 static enum dso_binary_type binary_type_symtab[] = {
-- 
2.13.6



[PATCH 13/15] perf report: Cache srclines for callchain nodes

2017-10-25 Thread Arnaldo Carvalho de Melo
From: Milian Wolff 

On one hand this ensures that the memory is properly freed when the DSO
gets freed. On the other hand this significantly speeds up the
processing of the callchain nodes when lots of srclines are requested.
For one of my data files e.g.:

Before:

 Performance counter stats for 'perf report -s srcline -g srcline --stdio':

  52496.495043  task-clock (msec) #0.999 CPUs utilized
   634  context-switches  #0.012 K/sec
 2  cpu-migrations#0.000 K/sec
   191,561  page-faults   #0.004 M/sec
   165,074,498,235  cycles#3.144 GHz
   334,170,832,408  instructions  #2.02  insn per cycle
90,220,029,745  branches  # 1718.591 M/sec
   654,525,177  branch-misses #0.73% of all branches

  52.533273822 seconds time elapsedProcessed 236605 events and lost 40 
chunks!

After:

 Performance counter stats for 'perf report -s srcline -g srcline --stdio':

  22606.323706  task-clock (msec) #1.000 CPUs utilized
31  context-switches  #0.001 K/sec
 0  cpu-migrations#0.000 K/sec
   185,471  page-faults   #0.008 M/sec
71,188,113,681  cycles#3.149 GHz
   133,204,943,083  instructions  #1.87  insn per cycle
34,886,384,979  branches  # 1543.214 M/sec
   278,214,495  branch-misses #0.80% of all branches

  22.609857253 seconds time elapsed

Note that the difference is only this large when `--inline` is not
passed. In such situations, we would use the inliner cache and thus do
not run this code path that often.

I think that this cache should actually be used in other places, too.
When looking at the valgrind leak report for perf report, we see tons of
srclines being leaked, most notably from calls to
hist_entry__get_srcline. The problem is that get_srcline has many
different formatting options (show_sym, show_addr, potentially even
unwind_inlines when calling __get_srcline directly). As such, the
srcline cannot easily be cached for all calls, or we'd have to add
caches for all formatting combinations (6 so far). An alternative would
be to remove the formatting options and handle that on a different level
- i.e. print the sym/addr on demand wherever we actually output
something. And the unwind_inlines could be moved into a separate
function that does not return the srcline.

Signed-off-by: Milian Wolff 
Reviewed-by: Andi Kleen 
Cc: David Ahern 
Cc: Jin Yao 
Cc: Jiri Olsa 
Cc: Namhyung Kim 
Cc: Peter Zijlstra 
Link: http://lkml.kernel.org/r/20171019113836.5548-4-milian.wo...@kdab.com
Signed-off-by: Arnaldo Carvalho de Melo 
---
 tools/perf/util/dso.c |  2 ++
 tools/perf/util/dso.h |  1 +
 tools/perf/util/machine.c | 17 +---
 tools/perf/util/srcline.c | 66 +++
 tools/perf/util/srcline.h |  7 +
 5 files changed, 90 insertions(+), 3 deletions(-)

diff --git a/tools/perf/util/dso.c b/tools/perf/util/dso.c
index 75c8250b3b8a..3192b608e91b 100644
--- a/tools/perf/util/dso.c
+++ b/tools/perf/util/dso.c
@@ -1203,6 +1203,7 @@ struct dso *dso__new(const char *name)
dso->symbols[i] = dso->symbol_names[i] = RB_ROOT;
dso->data.cache = RB_ROOT;
dso->inlined_nodes = RB_ROOT;
+   dso->srclines = RB_ROOT;
dso->data.fd = -1;
dso->data.status = DSO_DATA_STATUS_UNKNOWN;
dso->symtab_type = DSO_BINARY_TYPE__NOT_FOUND;
@@ -1237,6 +1238,7 @@ void dso__delete(struct dso *dso)
 
/* free inlines first, as they reference symbols */
inlines__tree_delete(>inlined_nodes);
+   srcline__tree_delete(>srclines);
for (i = 0; i < MAP__NR_TYPES; ++i)
symbols__delete(>symbols[i]);
 
diff --git a/tools/perf/util/dso.h b/tools/perf/util/dso.h
index 122eca0d242d..821b16c67030 100644
--- a/tools/perf/util/dso.h
+++ b/tools/perf/util/dso.h
@@ -142,6 +142,7 @@ struct dso {
struct rb_root   symbols[MAP__NR_TYPES];
struct rb_root   symbol_names[MAP__NR_TYPES];
struct rb_root   inlined_nodes;
+   struct rb_root   srclines;
struct {
u64 addr;
struct symbol   *symbol;
diff --git a/tools/perf/util/machine.c b/tools/perf/util/machine.c
index 177c1d4088f8..94d8f1ccedd9 100644
--- a/tools/perf/util/machine.c
+++ b/tools/perf/util/machine.c
@@ -1711,11 +1711,22 @@ struct mem_info *sample__resolve_mem(struct perf_sample 
*sample,
 
 static char *callchain_srcline(struct map *map, struct symbol *sym, u64 ip)
 {
+   char *srcline = NULL;
+
if (!map || callchain_param.key == CCKEY_FUNCTION)
-   return NULL;
+   return 

[PATCH 15/15] perf util: Enable handling of inlined frames by default

2017-10-25 Thread Arnaldo Carvalho de Melo
From: Milian Wolff 

Now that we have caches in place to speed up the process of finding
inlined frames and srcline information repeatedly, we can enable this
useful option by default.

Suggested-by: Ingo Molnar 
Signed-off-by: Milian Wolff 
Reviewed-by: Andi Kleen 
Cc: David Ahern 
Cc: Jin Yao 
Cc: Jiri Olsa 
Cc: Namhyung Kim 
Cc: Peter Zijlstra 
Link: http://lkml.kernel.org/r/20171019113836.5548-6-milian.wo...@kdab.com
Signed-off-by: Arnaldo Carvalho de Melo 
---
 tools/perf/Documentation/perf-report.txt | 3 ++-
 tools/perf/Documentation/perf-script.txt | 3 ++-
 tools/perf/util/symbol.c | 1 +
 3 files changed, 5 insertions(+), 2 deletions(-)

diff --git a/tools/perf/Documentation/perf-report.txt 
b/tools/perf/Documentation/perf-report.txt
index 383a98d992ed..ddde2b54af57 100644
--- a/tools/perf/Documentation/perf-report.txt
+++ b/tools/perf/Documentation/perf-report.txt
@@ -434,7 +434,8 @@ include::itrace.txt[]
 
 --inline::
If a callgraph address belongs to an inlined function, the inline stack
-   will be printed. Each entry is function name or file/line.
+   will be printed. Each entry is function name or file/line. Enabled by
+   default, disable with --no-inline.
 
 include::callchain-overhead-calculation.txt[]
 
diff --git a/tools/perf/Documentation/perf-script.txt 
b/tools/perf/Documentation/perf-script.txt
index bcc1ba35a2d8..25e677344728 100644
--- a/tools/perf/Documentation/perf-script.txt
+++ b/tools/perf/Documentation/perf-script.txt
@@ -327,7 +327,8 @@ include::itrace.txt[]
 
 --inline::
If a callgraph address belongs to an inlined function, the inline stack
-   will be printed. Each entry has function name and file/line.
+   will be printed. Each entry has function name and file/line. Enabled by
+   default, disable with --no-inline.
 
 SEE ALSO
 
diff --git a/tools/perf/util/symbol.c b/tools/perf/util/symbol.c
index 066e38aa4063..ce6993bebf8c 100644
--- a/tools/perf/util/symbol.c
+++ b/tools/perf/util/symbol.c
@@ -45,6 +45,7 @@ struct symbol_conf symbol_conf = {
.show_hist_headers  = true,
.symfs  = "",
.event_group= true,
+   .inline_name= true,
 };
 
 static enum dso_binary_type binary_type_symtab[] = {
-- 
2.13.6



[PATCH 14/15] perf report: Use srcline from callchain for hist entries

2017-10-25 Thread Arnaldo Carvalho de Melo
From: Milian Wolff 

This also removes the symbol name from the srcline column, more on this
below.

This ensures we use the correct srcline, which could originate from a
potentially inlined function. The hist entries used to query for the
srcline based purely on the IP, which leads to wrong results for inlined
entries.

Before:

~
  perf report --inline -s srcline -g none --stdio
  ...
  # Children  Self  Source:Line
  #     
..
  #
  94.23% 0.00%  __libc_start_main+18446603487898210537
  94.23% 0.00%  _start+41
  44.58% 0.00%  main+100
  44.58% 0.00%  std::_Norm_helper::_S_do_it+100
  44.58% 0.00%  std::__complex_abs+100
  44.58% 0.00%  std::abs+100
  44.58% 0.00%  std::norm+100
  36.01% 0.00%  hypot+18446603487892193300
  25.81% 0.00%  main+41
  25.81% 0.00%  
std::__detail::_Adaptor::operator()+41
  25.81% 0.00%  
std::uniform_real_distribution::operator()+41
  25.75%25.75%  random.h:143
  18.39% 0.00%  main+57
  18.39% 0.00%  
std::__detail::_Adaptor::operator()+57
  18.39% 0.00%  
std::uniform_real_distribution::operator()+57
  13.80%13.80%  random.tcc:3330
   5.64% 0.00%  ??:0
   4.13% 4.13%  __hypot_finite+163
   4.13% 0.00%  __hypot_finite+18446603487892193443
...
~

After:

~
  perf report --inline -s srcline -g none --stdio
  ...
  # Children  Self  Source:Line
  #     ...
  #
  94.30% 1.19%  main.cpp:39
  94.23% 0.00%  __libc_start_main+18446603487898210537
  94.23% 0.00%  _start+41
  48.44% 1.70%  random.h:1823
  48.44% 0.00%  random.h:1814
  46.74% 2.53%  random.h:185
  44.68% 0.10%  complex:589
  44.68% 0.00%  complex:597
  44.68% 0.00%  complex:654
  44.68% 0.00%  complex:664
  40.61%13.80%  random.tcc:3330
  36.01% 0.00%  hypot+18446603487892193300
  26.81% 0.00%  random.h:151
  26.81% 0.00%  random.h:332
  25.75%25.75%  random.h:143
   5.64% 0.00%  ??:0
   4.13% 4.13%  __hypot_finite+163
   4.13% 0.00%  __hypot_finite+18446603487892193443
...
~

Note that this change removes the symbol from the source:line hist
column. If this information is desired, users should explicitly query
for it if needed. I.e. run this command instead:

~
  perf report --inline -s sym,srcline -g none --stdio
  ...
  # To display the perf.data header info, please use --header/--header-only 
options.
  #
  #
  # Total Lost Samples: 0
  #
  # Samples: 1K of event 'cycles:uppp'
  # Event count (approx.): 1381229476
  #
  # Children  Self  Symbol  
 
Source:Line
  #     
...
  ...
  #
  94.30% 1.19%  [.] main
 
main.cpp:39
  94.23% 0.00%  [.] __libc_start_main   
 
__libc_start_main+18446603487898210537
  94.23% 0.00%  [.] _start  
 
_start+41
  48.44% 0.00%  [.] 
std::uniform_real_distribution::operator() (inlined)  random.h:1814
  48.44% 0.00%  [.] 
std::uniform_real_distribution::operator() (inlined)  random.h:1823
  46.74% 0.00%  [.] 
std::__detail::_Adaptor::operator() (inlined)  random.h:185
  44.68% 0.00%  [.] std::_Norm_helper::_S_do_it (inlined) 
 
complex:654
  44.68% 0.00%  [.] std::__complex_abs (inlined)
 
complex:589
  44.68% 0.00%  [.] std::abs (inlined)  
 
complex:597
  44.68% 0.00%  [.] std::norm (inlined) 
 

[PATCH 14/15] perf report: Use srcline from callchain for hist entries

2017-10-25 Thread Arnaldo Carvalho de Melo
From: Milian Wolff 

This also removes the symbol name from the srcline column, more on this
below.

This ensures we use the correct srcline, which could originate from a
potentially inlined function. The hist entries used to query for the
srcline based purely on the IP, which leads to wrong results for inlined
entries.

Before:

~
  perf report --inline -s srcline -g none --stdio
  ...
  # Children  Self  Source:Line
  #     
..
  #
  94.23% 0.00%  __libc_start_main+18446603487898210537
  94.23% 0.00%  _start+41
  44.58% 0.00%  main+100
  44.58% 0.00%  std::_Norm_helper::_S_do_it+100
  44.58% 0.00%  std::__complex_abs+100
  44.58% 0.00%  std::abs+100
  44.58% 0.00%  std::norm+100
  36.01% 0.00%  hypot+18446603487892193300
  25.81% 0.00%  main+41
  25.81% 0.00%  
std::__detail::_Adaptor, double>::operator()+41
  25.81% 0.00%  
std::uniform_real_distribution::operator() >+41
  25.75%25.75%  random.h:143
  18.39% 0.00%  main+57
  18.39% 0.00%  
std::__detail::_Adaptor, double>::operator()+57
  18.39% 0.00%  
std::uniform_real_distribution::operator() >+57
  13.80%13.80%  random.tcc:3330
   5.64% 0.00%  ??:0
   4.13% 4.13%  __hypot_finite+163
   4.13% 0.00%  __hypot_finite+18446603487892193443
...
~

After:

~
  perf report --inline -s srcline -g none --stdio
  ...
  # Children  Self  Source:Line
  #     ...
  #
  94.30% 1.19%  main.cpp:39
  94.23% 0.00%  __libc_start_main+18446603487898210537
  94.23% 0.00%  _start+41
  48.44% 1.70%  random.h:1823
  48.44% 0.00%  random.h:1814
  46.74% 2.53%  random.h:185
  44.68% 0.10%  complex:589
  44.68% 0.00%  complex:597
  44.68% 0.00%  complex:654
  44.68% 0.00%  complex:664
  40.61%13.80%  random.tcc:3330
  36.01% 0.00%  hypot+18446603487892193300
  26.81% 0.00%  random.h:151
  26.81% 0.00%  random.h:332
  25.75%25.75%  random.h:143
   5.64% 0.00%  ??:0
   4.13% 4.13%  __hypot_finite+163
   4.13% 0.00%  __hypot_finite+18446603487892193443
...
~

Note that this change removes the symbol from the source:line hist
column. If this information is desired, users should explicitly query
for it if needed. I.e. run this command instead:

~
  perf report --inline -s sym,srcline -g none --stdio
  ...
  # To display the perf.data header info, please use --header/--header-only 
options.
  #
  #
  # Total Lost Samples: 0
  #
  # Samples: 1K of event 'cycles:uppp'
  # Event count (approx.): 1381229476
  #
  # Children  Self  Symbol  
 
Source:Line
  #     
...
  ...
  #
  94.30% 1.19%  [.] main
 
main.cpp:39
  94.23% 0.00%  [.] __libc_start_main   
 
__libc_start_main+18446603487898210537
  94.23% 0.00%  [.] _start  
 
_start+41
  48.44% 0.00%  [.] 
std::uniform_real_distribution::operator() > (inlined)  random.h:1814
  48.44% 0.00%  [.] 
std::uniform_real_distribution::operator() > (inlined)  random.h:1823
  46.74% 0.00%  [.] 
std::__detail::_Adaptor, double>::operator() (inlined)  random.h:185
  44.68% 0.00%  [.] std::_Norm_helper::_S_do_it (inlined) 
 
complex:654
  44.68% 0.00%  [.] std::__complex_abs (inlined)
 
complex:589
  44.68% 0.00%  [.] std::abs (inlined)  
 
complex:597
  44.68% 0.00%  [.] std::norm (inlined) 
 
complex:664
  39.80%13.59%  [.] std::generate_canonical >
   random.tcc:3330
  36.01% 0.00%  [.] hypot   
   

[PATCH 12/15] perf report: Cache failed lookups of inlined frames

2017-10-25 Thread Arnaldo Carvalho de Melo
From: Milian Wolff 

When no inlined frames could be found for a given address, we did not
store this information anywhere. That means we potentially do the costly
inliner lookup repeatedly for cases where we know it can never succeed.

This patch makes dso__parse_addr_inlines always return a valid
inline_node. It will be empty when no inliners are found. This enables
us to cache the empty list in the DSO, thereby improving the performance
when many addresses fail to find the inliners.

For my trivial example, the performance impact is already quite
significant:

Before:

~
 Performance counter stats for 'perf report --stdio --inline -g srcline -s 
srcline' (5 runs):

594.804032  task-clock (msec) #0.998 CPUs utilized  
  ( +-  0.07% )
53  context-switches  #0.089 K/sec  
  ( +-  4.09% )
 0  cpu-migrations#0.000 K/sec  
  ( +-100.00% )
 5,687  page-faults   #0.010 M/sec  
  ( +-  0.02% )
 2,300,918,213  cycles#3.868 GHz
  ( +-  0.09% )
 4,395,839,080  instructions  #1.91  insn per cycle 
  ( +-  0.00% )
   939,177,205  branches  # 1578.969 M/sec  
  ( +-  0.00% )
11,824,633  branch-misses #1.26% of all branches
  ( +-  0.10% )

   0.596246531 seconds time elapsed 
 ( +-  0.07% )
~

After:

~
 Performance counter stats for 'perf report --stdio --inline -g srcline -s 
srcline' (5 runs):

113.111405  task-clock (msec) #0.990 CPUs utilized  
  ( +-  0.89% )
29  context-switches  #0.255 K/sec  
  ( +- 54.25% )
 0  cpu-migrations#0.000 K/sec
 5,380  page-faults   #0.048 M/sec  
  ( +-  0.01% )
   432,378,779  cycles#3.823 GHz
  ( +-  0.75% )
   670,057,633  instructions  #1.55  insn per cycle 
  ( +-  0.01% )
   141,001,247  branches  # 1246.570 M/sec  
  ( +-  0.01% )
 2,346,845  branch-misses #1.66% of all branches
  ( +-  0.19% )

   0.114222393 seconds time elapsed 
 ( +-  1.19% )
~

Signed-off-by: Milian Wolff 
Reviewed-by: Andi Kleen 
Cc: David Ahern 
Cc: Jin Yao 
Cc: Jiri Olsa 
Cc: Namhyung Kim 
Cc: Peter Zijlstra 
Link: http://lkml.kernel.org/r/20171019113836.5548-3-milian.wo...@kdab.com
Signed-off-by: Arnaldo Carvalho de Melo 
---
 tools/perf/util/machine.c | 15 +++
 tools/perf/util/srcline.c | 16 +---
 2 files changed, 8 insertions(+), 23 deletions(-)

diff --git a/tools/perf/util/machine.c b/tools/perf/util/machine.c
index 3d049cb313ac..177c1d4088f8 100644
--- a/tools/perf/util/machine.c
+++ b/tools/perf/util/machine.c
@@ -2115,9 +2115,10 @@ static int append_inlines(struct callchain_cursor 
*cursor,
struct inline_node *inline_node;
struct inline_list *ilist;
u64 addr;
+   int ret = 1;
 
if (!symbol_conf.inline_name || !map || !sym)
-   return 1;
+   return ret;
 
addr = map__rip_2objdump(map, ip);
 
@@ -2125,22 +2126,20 @@ static int append_inlines(struct callchain_cursor 
*cursor,
if (!inline_node) {
inline_node = dso__parse_addr_inlines(map->dso, addr, sym);
if (!inline_node)
-   return 1;
-
+   return ret;
inlines__tree_insert(>dso->inlined_nodes, inline_node);
}
 
list_for_each_entry(ilist, _node->val, list) {
-   int ret = callchain_cursor_append(cursor, ip, map,
- ilist->symbol, false,
- NULL, 0, 0, 0,
- ilist->srcline);
+   ret = callchain_cursor_append(cursor, ip, map,
+ ilist->symbol, false,
+ NULL, 0, 0, 0, ilist->srcline);
 
if (ret != 0)
return ret;
}
 
-   return 0;
+   return ret;
 }
 
 static int unwind_entry(struct unwind_entry *entry, void *arg)
diff --git a/tools/perf/util/srcline.c b/tools/perf/util/srcline.c
index 8bea6621d657..fc3888664b20 100644
--- a/tools/perf/util/srcline.c
+++ b/tools/perf/util/srcline.c
@@ -353,17 +353,8 @@ 

[PATCH 12/15] perf report: Cache failed lookups of inlined frames

2017-10-25 Thread Arnaldo Carvalho de Melo
From: Milian Wolff 

When no inlined frames could be found for a given address, we did not
store this information anywhere. That means we potentially do the costly
inliner lookup repeatedly for cases where we know it can never succeed.

This patch makes dso__parse_addr_inlines always return a valid
inline_node. It will be empty when no inliners are found. This enables
us to cache the empty list in the DSO, thereby improving the performance
when many addresses fail to find the inliners.

For my trivial example, the performance impact is already quite
significant:

Before:

~
 Performance counter stats for 'perf report --stdio --inline -g srcline -s 
srcline' (5 runs):

594.804032  task-clock (msec) #0.998 CPUs utilized  
  ( +-  0.07% )
53  context-switches  #0.089 K/sec  
  ( +-  4.09% )
 0  cpu-migrations#0.000 K/sec  
  ( +-100.00% )
 5,687  page-faults   #0.010 M/sec  
  ( +-  0.02% )
 2,300,918,213  cycles#3.868 GHz
  ( +-  0.09% )
 4,395,839,080  instructions  #1.91  insn per cycle 
  ( +-  0.00% )
   939,177,205  branches  # 1578.969 M/sec  
  ( +-  0.00% )
11,824,633  branch-misses #1.26% of all branches
  ( +-  0.10% )

   0.596246531 seconds time elapsed 
 ( +-  0.07% )
~

After:

~
 Performance counter stats for 'perf report --stdio --inline -g srcline -s 
srcline' (5 runs):

113.111405  task-clock (msec) #0.990 CPUs utilized  
  ( +-  0.89% )
29  context-switches  #0.255 K/sec  
  ( +- 54.25% )
 0  cpu-migrations#0.000 K/sec
 5,380  page-faults   #0.048 M/sec  
  ( +-  0.01% )
   432,378,779  cycles#3.823 GHz
  ( +-  0.75% )
   670,057,633  instructions  #1.55  insn per cycle 
  ( +-  0.01% )
   141,001,247  branches  # 1246.570 M/sec  
  ( +-  0.01% )
 2,346,845  branch-misses #1.66% of all branches
  ( +-  0.19% )

   0.114222393 seconds time elapsed 
 ( +-  1.19% )
~

Signed-off-by: Milian Wolff 
Reviewed-by: Andi Kleen 
Cc: David Ahern 
Cc: Jin Yao 
Cc: Jiri Olsa 
Cc: Namhyung Kim 
Cc: Peter Zijlstra 
Link: http://lkml.kernel.org/r/20171019113836.5548-3-milian.wo...@kdab.com
Signed-off-by: Arnaldo Carvalho de Melo 
---
 tools/perf/util/machine.c | 15 +++
 tools/perf/util/srcline.c | 16 +---
 2 files changed, 8 insertions(+), 23 deletions(-)

diff --git a/tools/perf/util/machine.c b/tools/perf/util/machine.c
index 3d049cb313ac..177c1d4088f8 100644
--- a/tools/perf/util/machine.c
+++ b/tools/perf/util/machine.c
@@ -2115,9 +2115,10 @@ static int append_inlines(struct callchain_cursor 
*cursor,
struct inline_node *inline_node;
struct inline_list *ilist;
u64 addr;
+   int ret = 1;
 
if (!symbol_conf.inline_name || !map || !sym)
-   return 1;
+   return ret;
 
addr = map__rip_2objdump(map, ip);
 
@@ -2125,22 +2126,20 @@ static int append_inlines(struct callchain_cursor 
*cursor,
if (!inline_node) {
inline_node = dso__parse_addr_inlines(map->dso, addr, sym);
if (!inline_node)
-   return 1;
-
+   return ret;
inlines__tree_insert(>dso->inlined_nodes, inline_node);
}
 
list_for_each_entry(ilist, _node->val, list) {
-   int ret = callchain_cursor_append(cursor, ip, map,
- ilist->symbol, false,
- NULL, 0, 0, 0,
- ilist->srcline);
+   ret = callchain_cursor_append(cursor, ip, map,
+ ilist->symbol, false,
+ NULL, 0, 0, 0, ilist->srcline);
 
if (ret != 0)
return ret;
}
 
-   return 0;
+   return ret;
 }
 
 static int unwind_entry(struct unwind_entry *entry, void *arg)
diff --git a/tools/perf/util/srcline.c b/tools/perf/util/srcline.c
index 8bea6621d657..fc3888664b20 100644
--- a/tools/perf/util/srcline.c
+++ b/tools/perf/util/srcline.c
@@ -353,17 +353,8 @@ static struct inline_node *addr2inlines(const char 
*dso_name, u64 addr,
INIT_LIST_HEAD(>val);
node->addr = addr;
 
-   if (!addr2line(dso_name, addr, NULL, NULL, dso, TRUE, 

[PATCH 10/15] perf report: Compare symbol name for inlined frames when sorting

2017-10-25 Thread Arnaldo Carvalho de Melo
From: Milian Wolff 

Similar to the callstack frame matching, we also have to compare the
symbol name when sorting hist entries. The reason is twofold: On one
hand, multiple inlined functions will use the same symbol start/end
values of the parent, non-inlined symbol.

As such, all of these symbols often end up missing from top-level
report, as they get merged with the non-inlined frame. On the other
hand, multiple different functions may end up inlining the same
function, and we need to aggregate these values properly.

Before:

~
  perf report --stdio --inline -g none
  # Children Self  Command   Shared Object Symbol
  #      . 
...
  #
 100.00%   39.69%  cpp-inlining  cpp-inlining  [.] main
 100.00%0.00%  cpp-inlining  cpp-inlining  [.] _start
 100.00%0.00%  cpp-inlining  libc-2.25.so  [.] __libc_start_main
  97.03%0.00%  cpp-inlining  cpp-inlining  [.] std::norm 
(inlined)
  59.53%4.26%  cpp-inlining  libm-2.25.so  [.] hypot
  55.21%   55.08%  cpp-inlining  libm-2.25.so  [.] __hypot_finite
   0.52%0.52%  cpp-inlining  libm-2.25.so  [.] cabs
~

After:

~
  perf report --stdio --inline -g none
  # Children Self  Command   Shared Object Symbol
  #      . 
...
  #
 100.00%   39.69%  cpp-inlining  cpp-inlining  [.] main
 100.00%0.00%  cpp-inlining  cpp-inlining  [.] _start
 100.00%0.00%  cpp-inlining  libc-2.25.so  [.] __libc_start_main
  62.57%0.00%  cpp-inlining  cpp-inlining  [.] 
std::_Norm_helper::_S_do_it (inlined)
  62.57%0.00%  cpp-inlining  cpp-inlining  [.] std::__complex_abs 
(inlined)
  62.57%0.00%  cpp-inlining  cpp-inlining  [.] std::abs 
(inlined)
  62.57%0.00%  cpp-inlining  cpp-inlining  [.] std::norm 
(inlined)
  59.53%4.26%  cpp-inlining  libm-2.25.so  [.] hypot
  55.21%   55.08%  cpp-inlining  libm-2.25.so  [.] __hypot_finite
  34.46%0.00%  cpp-inlining  cpp-inlining  [.] 
std::uniform_real_distribution::operator() (inlined)
  32.39%0.00%  cpp-inlining  cpp-inlining  [.] 
std::__detail::_Adaptor::operator() (inlined)
  32.39%0.00%  cpp-inlining  cpp-inlining  [.] 
std::generate_canonical (inlined)
  12.29%0.00%  cpp-inlining  cpp-inlining  [.] 
std::__detail::_Mod::__calc (inlined)
  12.29%0.00%  cpp-inlining  cpp-inlining  [.] 
std::__detail::__mod (inlined)
  12.29%0.00%  cpp-inlining  cpp-inlining  [.] 
std::linear_congruential_engine::operator() (inlined)
   0.52%0.52%  cpp-inlining  libm-2.25.so  [.] cabs
~

Signed-off-by: Milian Wolff 
Reviewed-by: Jiri Olsa 
Reviewed-by: Namhyung Kim 
Cc: David Ahern 
Cc: Peter Zijlstra 
Cc: Yao Jin 
Link: http://lkml.kernel.org/r/20171009203310.17362-11-milian.wo...@kdab.com
Signed-off-by: Arnaldo Carvalho de Melo 
---
 tools/perf/util/sort.c | 3 +++
 1 file changed, 3 insertions(+)

diff --git a/tools/perf/util/sort.c b/tools/perf/util/sort.c
index acb9210fd18a..006d10a0dc96 100644
--- a/tools/perf/util/sort.c
+++ b/tools/perf/util/sort.c
@@ -225,6 +225,9 @@ static int64_t _sort__sym_cmp(struct symbol *sym_l, struct 
symbol *sym_r)
if (sym_l == sym_r)
return 0;
 
+   if (sym_l->inlined || sym_r->inlined)
+   return strcmp(sym_l->name, sym_r->name);
+
if (sym_l->start != sym_r->start)
return (int64_t)(sym_r->start - sym_l->start);
 
-- 
2.13.6



[PATCH 10/15] perf report: Compare symbol name for inlined frames when sorting

2017-10-25 Thread Arnaldo Carvalho de Melo
From: Milian Wolff 

Similar to the callstack frame matching, we also have to compare the
symbol name when sorting hist entries. The reason is twofold: On one
hand, multiple inlined functions will use the same symbol start/end
values of the parent, non-inlined symbol.

As such, all of these symbols often end up missing from top-level
report, as they get merged with the non-inlined frame. On the other
hand, multiple different functions may end up inlining the same
function, and we need to aggregate these values properly.

Before:

~
  perf report --stdio --inline -g none
  # Children Self  Command   Shared Object Symbol
  #      . 
...
  #
 100.00%   39.69%  cpp-inlining  cpp-inlining  [.] main
 100.00%0.00%  cpp-inlining  cpp-inlining  [.] _start
 100.00%0.00%  cpp-inlining  libc-2.25.so  [.] __libc_start_main
  97.03%0.00%  cpp-inlining  cpp-inlining  [.] std::norm 
(inlined)
  59.53%4.26%  cpp-inlining  libm-2.25.so  [.] hypot
  55.21%   55.08%  cpp-inlining  libm-2.25.so  [.] __hypot_finite
   0.52%0.52%  cpp-inlining  libm-2.25.so  [.] cabs
~

After:

~
  perf report --stdio --inline -g none
  # Children Self  Command   Shared Object Symbol
  #      . 
...
  #
 100.00%   39.69%  cpp-inlining  cpp-inlining  [.] main
 100.00%0.00%  cpp-inlining  cpp-inlining  [.] _start
 100.00%0.00%  cpp-inlining  libc-2.25.so  [.] __libc_start_main
  62.57%0.00%  cpp-inlining  cpp-inlining  [.] 
std::_Norm_helper::_S_do_it (inlined)
  62.57%0.00%  cpp-inlining  cpp-inlining  [.] std::__complex_abs 
(inlined)
  62.57%0.00%  cpp-inlining  cpp-inlining  [.] std::abs 
(inlined)
  62.57%0.00%  cpp-inlining  cpp-inlining  [.] std::norm 
(inlined)
  59.53%4.26%  cpp-inlining  libm-2.25.so  [.] hypot
  55.21%   55.08%  cpp-inlining  libm-2.25.so  [.] __hypot_finite
  34.46%0.00%  cpp-inlining  cpp-inlining  [.] 
std::uniform_real_distribution::operator() > (inlined)
  32.39%0.00%  cpp-inlining  cpp-inlining  [.] 
std::__detail::_Adaptor, double>::operator() (inlined)
  32.39%0.00%  cpp-inlining  cpp-inlining  [.] 
std::generate_canonical > (inlined)
  12.29%0.00%  cpp-inlining  cpp-inlining  [.] 
std::__detail::_Mod::__calc (inlined)
  12.29%0.00%  cpp-inlining  cpp-inlining  [.] 
std::__detail::__mod (inlined)
  12.29%0.00%  cpp-inlining  cpp-inlining  [.] 
std::linear_congruential_engine::operator() (inlined)
   0.52%0.52%  cpp-inlining  libm-2.25.so  [.] cabs
~

Signed-off-by: Milian Wolff 
Reviewed-by: Jiri Olsa 
Reviewed-by: Namhyung Kim 
Cc: David Ahern 
Cc: Peter Zijlstra 
Cc: Yao Jin 
Link: http://lkml.kernel.org/r/20171009203310.17362-11-milian.wo...@kdab.com
Signed-off-by: Arnaldo Carvalho de Melo 
---
 tools/perf/util/sort.c | 3 +++
 1 file changed, 3 insertions(+)

diff --git a/tools/perf/util/sort.c b/tools/perf/util/sort.c
index acb9210fd18a..006d10a0dc96 100644
--- a/tools/perf/util/sort.c
+++ b/tools/perf/util/sort.c
@@ -225,6 +225,9 @@ static int64_t _sort__sym_cmp(struct symbol *sym_l, struct 
symbol *sym_r)
if (sym_l == sym_r)
return 0;
 
+   if (sym_l->inlined || sym_r->inlined)
+   return strcmp(sym_l->name, sym_r->name);
+
if (sym_l->start != sym_r->start)
return (int64_t)(sym_r->start - sym_l->start);
 
-- 
2.13.6



[PATCH 01/15] perf report: Remove code to handle inline frames from browsers

2017-10-25 Thread Arnaldo Carvalho de Melo
From: Milian Wolff 

The follow-up commits will make inline frames first-class citizens in
the callchain, thereby obsoleting all of this special code.

Signed-off-by: Milian Wolff 
Reviewed-by: Jiri Olsa 
Reviewed-by: Namhyung Kim 
Cc: David Ahern 
Cc: Peter Zijlstra 
Cc: Yao Jin 
Link: http://lkml.kernel.org/r/20171009203310.17362-2-milian.wo...@kdab.com
Signed-off-by: Arnaldo Carvalho de Melo 
---
 tools/perf/ui/browsers/hists.c  | 180 +++-
 tools/perf/ui/stdio/hist.c  |  77 +
 tools/perf/util/evsel_fprintf.c |  32 ---
 tools/perf/util/hist.c  |   5 --
 tools/perf/util/sort.h  |   1 -
 5 files changed, 13 insertions(+), 282 deletions(-)

diff --git a/tools/perf/ui/browsers/hists.c b/tools/perf/ui/browsers/hists.c
index 13dfb0a0bdeb..3a433f370e7f 100644
--- a/tools/perf/ui/browsers/hists.c
+++ b/tools/perf/ui/browsers/hists.c
@@ -154,57 +154,9 @@ static void callchain_list__set_folding(struct 
callchain_list *cl, bool unfold)
cl->unfolded = unfold ? cl->has_children : false;
 }
 
-static struct inline_node *inline_node__create(struct map *map, u64 ip)
-{
-   struct dso *dso;
-   struct inline_node *node;
-
-   if (map == NULL)
-   return NULL;
-
-   dso = map->dso;
-   if (dso == NULL)
-   return NULL;
-
-   node = dso__parse_addr_inlines(dso,
-  map__rip_2objdump(map, ip));
-
-   return node;
-}
-
-static int inline__count_rows(struct inline_node *node)
-{
-   struct inline_list *ilist;
-   int i = 0;
-
-   if (node == NULL)
-   return 0;
-
-   list_for_each_entry(ilist, >val, list) {
-   if ((ilist->filename != NULL) || (ilist->funcname != NULL))
-   i++;
-   }
-
-   return i;
-}
-
-static int callchain_list__inline_rows(struct callchain_list *chain)
-{
-   struct inline_node *node;
-   int rows;
-
-   node = inline_node__create(chain->ms.map, chain->ip);
-   if (node == NULL)
-   return 0;
-
-   rows = inline__count_rows(node);
-   inline_node__delete(node);
-   return rows;
-}
-
 static int callchain_node__count_rows_rb_tree(struct callchain_node *node)
 {
-   int n = 0, inline_rows;
+   int n = 0;
struct rb_node *nd;
 
for (nd = rb_first(>rb_root); nd; nd = rb_next(nd)) {
@@ -215,12 +167,6 @@ static int callchain_node__count_rows_rb_tree(struct 
callchain_node *node)
list_for_each_entry(chain, >val, list) {
++n;
 
-   if (symbol_conf.inline_name) {
-   inline_rows =
-   callchain_list__inline_rows(chain);
-   n += inline_rows;
-   }
-
/* We need this because we may not have children */
folded_sign = callchain_list__folded(chain);
if (folded_sign == '+')
@@ -272,7 +218,7 @@ static int callchain_node__count_rows(struct callchain_node 
*node)
 {
struct callchain_list *chain;
bool unfolded = false;
-   int n = 0, inline_rows;
+   int n = 0;
 
if (callchain_param.mode == CHAIN_FLAT)
return callchain_node__count_flat_rows(node);
@@ -281,10 +227,6 @@ static int callchain_node__count_rows(struct 
callchain_node *node)
 
list_for_each_entry(chain, >val, list) {
++n;
-   if (symbol_conf.inline_name) {
-   inline_rows = callchain_list__inline_rows(chain);
-   n += inline_rows;
-   }
 
unfolded = chain->unfolded;
}
@@ -432,19 +374,6 @@ static void hist_entry__init_have_children(struct 
hist_entry *he)
he->init_have_children = true;
 }
 
-static void hist_entry_init_inline_node(struct hist_entry *he)
-{
-   if (he->inline_node)
-   return;
-
-   he->inline_node = inline_node__create(he->ms.map, he->ip);
-
-   if (he->inline_node == NULL)
-   return;
-
-   he->has_children = true;
-}
-
 static bool hist_browser__toggle_fold(struct hist_browser *browser)
 {
struct hist_entry *he = browser->he_selection;
@@ -476,12 +405,8 @@ static bool hist_browser__toggle_fold(struct hist_browser 
*browser)
 
if (he->unfolded) {
if (he->leaf)
-   if (he->inline_node)
-   he->nr_rows = inline__count_rows(
-   he->inline_node);
-   else
-   he->nr_rows = callchain__count_rows(
- 

[PATCH 01/15] perf report: Remove code to handle inline frames from browsers

2017-10-25 Thread Arnaldo Carvalho de Melo
From: Milian Wolff 

The follow-up commits will make inline frames first-class citizens in
the callchain, thereby obsoleting all of this special code.

Signed-off-by: Milian Wolff 
Reviewed-by: Jiri Olsa 
Reviewed-by: Namhyung Kim 
Cc: David Ahern 
Cc: Peter Zijlstra 
Cc: Yao Jin 
Link: http://lkml.kernel.org/r/20171009203310.17362-2-milian.wo...@kdab.com
Signed-off-by: Arnaldo Carvalho de Melo 
---
 tools/perf/ui/browsers/hists.c  | 180 +++-
 tools/perf/ui/stdio/hist.c  |  77 +
 tools/perf/util/evsel_fprintf.c |  32 ---
 tools/perf/util/hist.c  |   5 --
 tools/perf/util/sort.h  |   1 -
 5 files changed, 13 insertions(+), 282 deletions(-)

diff --git a/tools/perf/ui/browsers/hists.c b/tools/perf/ui/browsers/hists.c
index 13dfb0a0bdeb..3a433f370e7f 100644
--- a/tools/perf/ui/browsers/hists.c
+++ b/tools/perf/ui/browsers/hists.c
@@ -154,57 +154,9 @@ static void callchain_list__set_folding(struct 
callchain_list *cl, bool unfold)
cl->unfolded = unfold ? cl->has_children : false;
 }
 
-static struct inline_node *inline_node__create(struct map *map, u64 ip)
-{
-   struct dso *dso;
-   struct inline_node *node;
-
-   if (map == NULL)
-   return NULL;
-
-   dso = map->dso;
-   if (dso == NULL)
-   return NULL;
-
-   node = dso__parse_addr_inlines(dso,
-  map__rip_2objdump(map, ip));
-
-   return node;
-}
-
-static int inline__count_rows(struct inline_node *node)
-{
-   struct inline_list *ilist;
-   int i = 0;
-
-   if (node == NULL)
-   return 0;
-
-   list_for_each_entry(ilist, >val, list) {
-   if ((ilist->filename != NULL) || (ilist->funcname != NULL))
-   i++;
-   }
-
-   return i;
-}
-
-static int callchain_list__inline_rows(struct callchain_list *chain)
-{
-   struct inline_node *node;
-   int rows;
-
-   node = inline_node__create(chain->ms.map, chain->ip);
-   if (node == NULL)
-   return 0;
-
-   rows = inline__count_rows(node);
-   inline_node__delete(node);
-   return rows;
-}
-
 static int callchain_node__count_rows_rb_tree(struct callchain_node *node)
 {
-   int n = 0, inline_rows;
+   int n = 0;
struct rb_node *nd;
 
for (nd = rb_first(>rb_root); nd; nd = rb_next(nd)) {
@@ -215,12 +167,6 @@ static int callchain_node__count_rows_rb_tree(struct 
callchain_node *node)
list_for_each_entry(chain, >val, list) {
++n;
 
-   if (symbol_conf.inline_name) {
-   inline_rows =
-   callchain_list__inline_rows(chain);
-   n += inline_rows;
-   }
-
/* We need this because we may not have children */
folded_sign = callchain_list__folded(chain);
if (folded_sign == '+')
@@ -272,7 +218,7 @@ static int callchain_node__count_rows(struct callchain_node 
*node)
 {
struct callchain_list *chain;
bool unfolded = false;
-   int n = 0, inline_rows;
+   int n = 0;
 
if (callchain_param.mode == CHAIN_FLAT)
return callchain_node__count_flat_rows(node);
@@ -281,10 +227,6 @@ static int callchain_node__count_rows(struct 
callchain_node *node)
 
list_for_each_entry(chain, >val, list) {
++n;
-   if (symbol_conf.inline_name) {
-   inline_rows = callchain_list__inline_rows(chain);
-   n += inline_rows;
-   }
 
unfolded = chain->unfolded;
}
@@ -432,19 +374,6 @@ static void hist_entry__init_have_children(struct 
hist_entry *he)
he->init_have_children = true;
 }
 
-static void hist_entry_init_inline_node(struct hist_entry *he)
-{
-   if (he->inline_node)
-   return;
-
-   he->inline_node = inline_node__create(he->ms.map, he->ip);
-
-   if (he->inline_node == NULL)
-   return;
-
-   he->has_children = true;
-}
-
 static bool hist_browser__toggle_fold(struct hist_browser *browser)
 {
struct hist_entry *he = browser->he_selection;
@@ -476,12 +405,8 @@ static bool hist_browser__toggle_fold(struct hist_browser 
*browser)
 
if (he->unfolded) {
if (he->leaf)
-   if (he->inline_node)
-   he->nr_rows = inline__count_rows(
-   he->inline_node);
-   else
-   he->nr_rows = callchain__count_rows(
-   >sorted_chain);
+   he->nr_rows = callchain__count_rows(
+   

[PATCH 06/15] perf report: Fall-back to function name comparison for -g srcline

2017-10-25 Thread Arnaldo Carvalho de Melo
From: Milian Wolff 

When a callchain entry has no srcline available, we ended up comparing
the instruction pointer. I consider this to be not too useful. Rather, I
think we should group the entries by function name, which this patch
adds. For people who want to split the data on the IP boundary, using
`-g address` is the correct choice.

Before:

~
   100.00%38.86%  [.] main
|
|--61.14%--main inlining.cpp:14
|  std::norm complex:664
|  std::_Norm_helper::_S_do_it complex:654
|  std::abs complex:597
|  std::__complex_abs complex:589
|  |
|  |--56.03%--hypot
|  |  |
|  |  |--8.45%--__hypot_finite
|  |  |
|  |  |--7.62%--__hypot_finite
|  |  |
|  |  |--2.29%--__hypot_finite
|  |  |
|  |  |--2.24%--__hypot_finite
|  |  |
|  |  |--2.06%--__hypot_finite
|  |  |
|  |  |--1.81%--__hypot_finite
...
~

After:

~
   100.00%38.86%  [.] main
|
|--61.14%--main inlining.cpp:14
|  std::norm complex:664
|  std::_Norm_helper::_S_do_it complex:654
|  std::abs complex:597
|  std::__complex_abs complex:589
|  |
|  |--60.29%--hypot
|  |  |
|  |   --56.03%--__hypot_finite
|  |
|   --0.85%--cabs
~

Signed-off-by: Milian Wolff 
Reviewed-by: Jiri Olsa 
Reviewed-by: Namhyung Kim 
Cc: David Ahern 
Cc: Peter Zijlstra 
Cc: Yao Jin 
Link: http://lkml.kernel.org/r/20171009203310.17362-7-milian.wo...@kdab.com
Signed-off-by: Arnaldo Carvalho de Melo 
---
 tools/perf/util/callchain.c | 20 
 1 file changed, 12 insertions(+), 8 deletions(-)

diff --git a/tools/perf/util/callchain.c b/tools/perf/util/callchain.c
index e7ee794d1e5b..0f2ba493a7a3 100644
--- a/tools/perf/util/callchain.c
+++ b/tools/perf/util/callchain.c
@@ -645,11 +645,9 @@ enum match_result {
MATCH_GT,
 };
 
-static enum match_result match_chain_srcline(struct callchain_cursor_node 
*node,
-struct callchain_list *cnode)
+static enum match_result match_chain_strings(const char *left,
+const char *right)
 {
-   const char *left = cnode->srcline;
-   const char *right = node->srcline;
enum match_result ret = MATCH_EQ;
int cmp;
 
@@ -659,10 +657,8 @@ static enum match_result match_chain_srcline(struct 
callchain_cursor_node *node,
cmp = 1;
else if (left && !right)
cmp = -1;
-   else if (cnode->ip == node->ip)
-   cmp = 0;
else
-   cmp = (cnode->ip < node->ip) ? -1 : 1;
+   return MATCH_ERROR;
 
if (cmp != 0)
ret = cmp < 0 ? MATCH_LT : MATCH_GT;
@@ -679,10 +675,18 @@ static enum match_result match_chain(struct 
callchain_cursor_node *node,
struct dso *right_dso = NULL;
 
if (callchain_param.key == CCKEY_SRCLINE) {
-   enum match_result match = match_chain_srcline(node, cnode);
+   enum match_result match = match_chain_strings(cnode->srcline,
+ node->srcline);
+
+   /* if no srcline is available, fallback to symbol name */
+   if (match == MATCH_ERROR && cnode->ms.sym && node->sym)
+   match = match_chain_strings(cnode->ms.sym->name,
+   node->sym->name);
 
if (match != MATCH_ERROR)
return match;
+
+   /* otherwise fall-back to IP-based comparison below */
}
 
if (cnode->ms.sym && sym && callchain_param.key == CCKEY_FUNCTION) {
-- 
2.13.6



[PATCH 06/15] perf report: Fall-back to function name comparison for -g srcline

2017-10-25 Thread Arnaldo Carvalho de Melo
From: Milian Wolff 

When a callchain entry has no srcline available, we ended up comparing
the instruction pointer. I consider this to be not too useful. Rather, I
think we should group the entries by function name, which this patch
adds. For people who want to split the data on the IP boundary, using
`-g address` is the correct choice.

Before:

~
   100.00%38.86%  [.] main
|
|--61.14%--main inlining.cpp:14
|  std::norm complex:664
|  std::_Norm_helper::_S_do_it complex:654
|  std::abs complex:597
|  std::__complex_abs complex:589
|  |
|  |--56.03%--hypot
|  |  |
|  |  |--8.45%--__hypot_finite
|  |  |
|  |  |--7.62%--__hypot_finite
|  |  |
|  |  |--2.29%--__hypot_finite
|  |  |
|  |  |--2.24%--__hypot_finite
|  |  |
|  |  |--2.06%--__hypot_finite
|  |  |
|  |  |--1.81%--__hypot_finite
...
~

After:

~
   100.00%38.86%  [.] main
|
|--61.14%--main inlining.cpp:14
|  std::norm complex:664
|  std::_Norm_helper::_S_do_it complex:654
|  std::abs complex:597
|  std::__complex_abs complex:589
|  |
|  |--60.29%--hypot
|  |  |
|  |   --56.03%--__hypot_finite
|  |
|   --0.85%--cabs
~

Signed-off-by: Milian Wolff 
Reviewed-by: Jiri Olsa 
Reviewed-by: Namhyung Kim 
Cc: David Ahern 
Cc: Peter Zijlstra 
Cc: Yao Jin 
Link: http://lkml.kernel.org/r/20171009203310.17362-7-milian.wo...@kdab.com
Signed-off-by: Arnaldo Carvalho de Melo 
---
 tools/perf/util/callchain.c | 20 
 1 file changed, 12 insertions(+), 8 deletions(-)

diff --git a/tools/perf/util/callchain.c b/tools/perf/util/callchain.c
index e7ee794d1e5b..0f2ba493a7a3 100644
--- a/tools/perf/util/callchain.c
+++ b/tools/perf/util/callchain.c
@@ -645,11 +645,9 @@ enum match_result {
MATCH_GT,
 };
 
-static enum match_result match_chain_srcline(struct callchain_cursor_node 
*node,
-struct callchain_list *cnode)
+static enum match_result match_chain_strings(const char *left,
+const char *right)
 {
-   const char *left = cnode->srcline;
-   const char *right = node->srcline;
enum match_result ret = MATCH_EQ;
int cmp;
 
@@ -659,10 +657,8 @@ static enum match_result match_chain_srcline(struct 
callchain_cursor_node *node,
cmp = 1;
else if (left && !right)
cmp = -1;
-   else if (cnode->ip == node->ip)
-   cmp = 0;
else
-   cmp = (cnode->ip < node->ip) ? -1 : 1;
+   return MATCH_ERROR;
 
if (cmp != 0)
ret = cmp < 0 ? MATCH_LT : MATCH_GT;
@@ -679,10 +675,18 @@ static enum match_result match_chain(struct 
callchain_cursor_node *node,
struct dso *right_dso = NULL;
 
if (callchain_param.key == CCKEY_SRCLINE) {
-   enum match_result match = match_chain_srcline(node, cnode);
+   enum match_result match = match_chain_strings(cnode->srcline,
+ node->srcline);
+
+   /* if no srcline is available, fallback to symbol name */
+   if (match == MATCH_ERROR && cnode->ms.sym && node->sym)
+   match = match_chain_strings(cnode->ms.sym->name,
+   node->sym->name);
 
if (match != MATCH_ERROR)
return match;
+
+   /* otherwise fall-back to IP-based comparison below */
}
 
if (cnode->ms.sym && sym && callchain_param.key == CCKEY_FUNCTION) {
-- 
2.13.6



[GIT PULL 00/15] perf/core inlining improvements

2017-10-25 Thread Arnaldo Carvalho de Melo
Hi Ingo,

Please consider pulling, this is Milian's v7 plus some fixes
acked by Namhyung after some discussion among the three of us, I
probably need to pick some more patches that are related to this area,
but lets make some progress and merge this kit,

- Arnaldo

Test results at the end of this message, as usual.

The following changes since commit 9b7c85473cc2fa6fc4a7f87636ff2b69742b82b7:

  Merge tag 'perf-core-for-mingo-4.15-20171023' of 
git://git.kernel.org/pub/scm/linux/kernel/git/acme/linux into perf/core 
(2017-10-24 10:53:04 +0200)

are available in the git repository at:

  git://git.kernel.org/pub/scm/linux/kernel/git/acme/linux.git 
tags/perf-core-for-mingo-4.15-20171025

for you to fetch changes up to d8a88dd243a170a226aba33e7c53704db2f82aa6:

  perf util: Enable handling of inlined frames by default (2017-10-25 10:50:47 
-0300)


perf/core inline improvements:

>From Milian's cover letter: (Milian Wolff)

This series of patches completely reworks the way inline frames are
handled.  Instead of querying for the inline nodes on-demand in the
individual tools, we now create proper callchain nodes for inlined
frames. The advantages this approach brings are numerous:

- Less duplicated code in the individual browser

- Aggregated cost for inlined frames for the --children top-down list

- Various bug fixes that arose from querying for a srcline/symbol based on
  the IP of a sample, which will always point to the last inlined frame
  instead of the corresponding non-inlined frame

- Overall much better support for visualizing cost for heavily-inlined C++
  code, which simply was confusing and unreliable before

- srcline honors the global setting as to whether full paths or basenames
  should be shown

- Caches for inlined frames and srcline information, which allow us to
  enable inline frame handling by default

Signed-off-by: Arnaldo Carvalho de Melo <a...@redhat.com>


Milian Wolff (15):
  perf report: Remove code to handle inline frames from browsers
  perf callchain: Store srcline in callchain_cursor_node
  perf callchain: Refactor inline_list to operate on symbols
  perf callchain: Refactor inline_list to store srcline string directly
  perf callchain: Create real callchain entries for inlined frames
  perf report: Fall-back to function name comparison for -g srcline
  perf callchain: Mark inlined frames in output by " (inlined)" suffix
  perf script: Mark inlined frames and do not print DSO for them
  perf callchain: Compare symbol name for inlined frames when matching
  perf report: Compare symbol name for inlined frames when sorting
  perf report: Properly handle branch count in match_chain()
  perf report: Cache failed lookups of inlined frames
  perf report: Cache srclines for callchain nodes
  perf report: Use srcline from callchain for hist entries
  perf util: Enable handling of inlined frames by default

 tools/perf/Documentation/perf-report.txt |   3 +-
 tools/perf/Documentation/perf-script.txt |   3 +-
 tools/perf/ui/browsers/hists.c   | 180 ++---
 tools/perf/ui/stdio/hist.c   |  77 +
 tools/perf/util/callchain.c  | 174 +++-
 tools/perf/util/callchain.h  |   6 +-
 tools/perf/util/dso.c|   7 +
 tools/perf/util/dso.h|   2 +
 tools/perf/util/event.c  |   1 +
 tools/perf/util/evsel_fprintf.c  |  37 +
 tools/perf/util/hist.c   |   7 +-
 tools/perf/util/machine.c|  65 +++-
 tools/perf/util/sort.c   |   6 +
 tools/perf/util/sort.h   |   1 -
 tools/perf/util/srcline.c| 268 +--
 tools/perf/util/srcline.h|  26 ++-
 tools/perf/util/symbol.c |   1 +
 tools/perf/util/symbol.h |   2 +
 18 files changed, 443 insertions(+), 423 deletions(-)

Test results:

The first ones are container (docker) based builds of tools/perf with and
without libelf support.  Where clang is available, it is also used to build
perf with/without libelf.

The objtool and samples/bpf/ builds are disabled now that I'm switching from
using the sources in a local volume to fetching them from a http server to
build it inside the container, to make it easier to build in a container 
cluster.
Those will come back later.

Several are cross builds, the ones with -x-ARCH and the android one, and those
may not have all the features built, due to lack of multi-arch devel packages,
available and being used so far on just a few, like
debian:experimental-x-{arm64,mipsel}.

The 'perf test' one will perform a variety of tests exercising
tools/perf/util/, tools/lib/{bpf,traceevent,etc}, as well as run perf comm

[GIT PULL 00/15] perf/core inlining improvements

2017-10-25 Thread Arnaldo Carvalho de Melo
Hi Ingo,

Please consider pulling, this is Milian's v7 plus some fixes
acked by Namhyung after some discussion among the three of us, I
probably need to pick some more patches that are related to this area,
but lets make some progress and merge this kit,

- Arnaldo

Test results at the end of this message, as usual.

The following changes since commit 9b7c85473cc2fa6fc4a7f87636ff2b69742b82b7:

  Merge tag 'perf-core-for-mingo-4.15-20171023' of 
git://git.kernel.org/pub/scm/linux/kernel/git/acme/linux into perf/core 
(2017-10-24 10:53:04 +0200)

are available in the git repository at:

  git://git.kernel.org/pub/scm/linux/kernel/git/acme/linux.git 
tags/perf-core-for-mingo-4.15-20171025

for you to fetch changes up to d8a88dd243a170a226aba33e7c53704db2f82aa6:

  perf util: Enable handling of inlined frames by default (2017-10-25 10:50:47 
-0300)


perf/core inline improvements:

>From Milian's cover letter: (Milian Wolff)

This series of patches completely reworks the way inline frames are
handled.  Instead of querying for the inline nodes on-demand in the
individual tools, we now create proper callchain nodes for inlined
frames. The advantages this approach brings are numerous:

- Less duplicated code in the individual browser

- Aggregated cost for inlined frames for the --children top-down list

- Various bug fixes that arose from querying for a srcline/symbol based on
  the IP of a sample, which will always point to the last inlined frame
  instead of the corresponding non-inlined frame

- Overall much better support for visualizing cost for heavily-inlined C++
  code, which simply was confusing and unreliable before

- srcline honors the global setting as to whether full paths or basenames
  should be shown

- Caches for inlined frames and srcline information, which allow us to
  enable inline frame handling by default

Signed-off-by: Arnaldo Carvalho de Melo 


Milian Wolff (15):
  perf report: Remove code to handle inline frames from browsers
  perf callchain: Store srcline in callchain_cursor_node
  perf callchain: Refactor inline_list to operate on symbols
  perf callchain: Refactor inline_list to store srcline string directly
  perf callchain: Create real callchain entries for inlined frames
  perf report: Fall-back to function name comparison for -g srcline
  perf callchain: Mark inlined frames in output by " (inlined)" suffix
  perf script: Mark inlined frames and do not print DSO for them
  perf callchain: Compare symbol name for inlined frames when matching
  perf report: Compare symbol name for inlined frames when sorting
  perf report: Properly handle branch count in match_chain()
  perf report: Cache failed lookups of inlined frames
  perf report: Cache srclines for callchain nodes
  perf report: Use srcline from callchain for hist entries
  perf util: Enable handling of inlined frames by default

 tools/perf/Documentation/perf-report.txt |   3 +-
 tools/perf/Documentation/perf-script.txt |   3 +-
 tools/perf/ui/browsers/hists.c   | 180 ++---
 tools/perf/ui/stdio/hist.c   |  77 +
 tools/perf/util/callchain.c  | 174 +++-
 tools/perf/util/callchain.h  |   6 +-
 tools/perf/util/dso.c|   7 +
 tools/perf/util/dso.h|   2 +
 tools/perf/util/event.c  |   1 +
 tools/perf/util/evsel_fprintf.c  |  37 +
 tools/perf/util/hist.c   |   7 +-
 tools/perf/util/machine.c|  65 +++-
 tools/perf/util/sort.c   |   6 +
 tools/perf/util/sort.h   |   1 -
 tools/perf/util/srcline.c| 268 +--
 tools/perf/util/srcline.h|  26 ++-
 tools/perf/util/symbol.c |   1 +
 tools/perf/util/symbol.h |   2 +
 18 files changed, 443 insertions(+), 423 deletions(-)

Test results:

The first ones are container (docker) based builds of tools/perf with and
without libelf support.  Where clang is available, it is also used to build
perf with/without libelf.

The objtool and samples/bpf/ builds are disabled now that I'm switching from
using the sources in a local volume to fetching them from a http server to
build it inside the container, to make it easier to build in a container 
cluster.
Those will come back later.

Several are cross builds, the ones with -x-ARCH and the android one, and those
may not have all the features built, due to lack of multi-arch devel packages,
available and being used so far on just a few, like
debian:experimental-x-{arm64,mipsel}.

The 'perf test' one will perform a variety of tests exercising
tools/perf/util/, tools/lib/{bpf,traceevent,etc}, as well as run perf commands
with a variety of comma

Re: [PATCH 4/8] PCI: host: brcmstb: add dma-ranges for inbound traffic

2017-10-25 Thread Jim Quinlan
On Wed, Oct 25, 2017 at 5:46 AM, David Laight  wrote:
> From: Jim QuinlanPCIE_IPROC_MSI
>> Sent: 24 October 2017 19:16
>> The Broadcom STB PCIe host controller is intimately related to the
>> memory subsystem.  This close relationship adds complexity to how cpu
>> system memory is mapped to PCIe memory.  Ideally, this mapping is an
>> identity mapping, or an identity mapping off by a constant.  Not so in
>> this case.
>>
>> Consider the Broadcom reference board BCM97445LCC_4X8 which has 6 GB
>> of system memory.  Here is how the PCIe controller maps the
>> system memory to PCIe memory:
>>
>>   memc0-a@[03fff] <=> pci@[03fff]
>>   memc0-b@[1...13fff] <=> pci@[ 40007fff]
>>   memc1-a@[ 40007fff] <=> pci@[ 8000bfff]
>>   memc1-b@[3...33fff] <=> pci@[ c000]
>>   memc2-a@[ 8000bfff] <=> pci@[1...13fff]
>>   memc2-b@[c...c3fff] <=> pci@[14000...17fff]
>
> I presume the first column is the 'cpu physical address'
> and the second the 'pci' address?
>
Yes.  I probably made this more difficult to read because I ordered
the rows by PCI addresses.

> ...
>
> Isn't this just the same as having an iommu that converts 'bus'
> addresses into 'physical' ones?

Pretty much, but for PCIe devices only.  This could be done by somehow
overriding the arch specific phys_to_dma() and dma_to_phys() calls.

>
> A simple table lookup of the high address bits will do the
> conversion.
True, but this table could be passed something like ARM_MAPPING_ERROR,
which may be out the table (the driver is not privy to
ARM_MAPPING_ERROR's definition).

Thanks,
Jim

>
> David
>


Re: [PATCH 4/8] PCI: host: brcmstb: add dma-ranges for inbound traffic

2017-10-25 Thread Jim Quinlan
On Wed, Oct 25, 2017 at 5:46 AM, David Laight  wrote:
> From: Jim QuinlanPCIE_IPROC_MSI
>> Sent: 24 October 2017 19:16
>> The Broadcom STB PCIe host controller is intimately related to the
>> memory subsystem.  This close relationship adds complexity to how cpu
>> system memory is mapped to PCIe memory.  Ideally, this mapping is an
>> identity mapping, or an identity mapping off by a constant.  Not so in
>> this case.
>>
>> Consider the Broadcom reference board BCM97445LCC_4X8 which has 6 GB
>> of system memory.  Here is how the PCIe controller maps the
>> system memory to PCIe memory:
>>
>>   memc0-a@[03fff] <=> pci@[03fff]
>>   memc0-b@[1...13fff] <=> pci@[ 40007fff]
>>   memc1-a@[ 40007fff] <=> pci@[ 8000bfff]
>>   memc1-b@[3...33fff] <=> pci@[ c000]
>>   memc2-a@[ 8000bfff] <=> pci@[1...13fff]
>>   memc2-b@[c...c3fff] <=> pci@[14000...17fff]
>
> I presume the first column is the 'cpu physical address'
> and the second the 'pci' address?
>
Yes.  I probably made this more difficult to read because I ordered
the rows by PCI addresses.

> ...
>
> Isn't this just the same as having an iommu that converts 'bus'
> addresses into 'physical' ones?

Pretty much, but for PCIe devices only.  This could be done by somehow
overriding the arch specific phys_to_dma() and dma_to_phys() calls.

>
> A simple table lookup of the high address bits will do the
> conversion.
True, but this table could be passed something like ARM_MAPPING_ERROR,
which may be out the table (the driver is not privy to
ARM_MAPPING_ERROR's definition).

Thanks,
Jim

>
> David
>


RE: [patch v9 1/4] drivers: jtag: Add JTAG core driver

2017-10-25 Thread Oleksandr Shamray
> -Original Message-
> From: Greg KH [mailto:gre...@linuxfoundation.org]
> Sent: Friday, October 20, 2017 5:54 PM
> To: Oleksandr Shamray 
> Cc: a...@arndb.de; linux-kernel@vger.kernel.org; linux-arm-
> ker...@lists.infradead.org; devicet...@vger.kernel.org;
> open...@lists.ozlabs.org; j...@jms.id.au; j...@resnulli.us;
> tklau...@distanz.ch; linux-ser...@vger.kernel.org; m...@shout.net; Vadim
> Pasternak ; system-sw-low-level  le...@mellanox.com>; robh...@kernel.org; openocd-devel-
> ow...@lists.sourceforge.net; linux-...@vger.kernel.org;
> da...@davemloft.net; mche...@kernel.org; Jiri Pirko 
> Subject: Re: [patch v9 1/4] drivers: jtag: Add JTAG core driver
> 
> On Fri, Oct 20, 2017 at 02:34:00PM +, Oleksandr Shamray wrote:
> > Hi Greg.
> >
> > > -Original Message-
> > > From: Greg KH [mailto:gre...@linuxfoundation.org]
> > > Sent: Friday, October 20, 2017 2:55 PM
> > > To: Oleksandr Shamray 
> > > Cc: a...@arndb.de; linux-kernel@vger.kernel.org; linux-arm-
> > > ker...@lists.infradead.org; devicet...@vger.kernel.org;
> > > open...@lists.ozlabs.org; j...@jms.id.au; j...@resnulli.us;
> > > tklau...@distanz.ch; linux-ser...@vger.kernel.org; m...@shout.net;
> > > Vadim Pasternak ; system-sw-low-level
> > > ; robh...@kernel.org;
> > > openocd-devel- ow...@lists.sourceforge.net;
> > > linux-...@vger.kernel.org; da...@davemloft.net; mche...@kernel.org;
> > > Jiri Pirko 
> > > Subject: Re: [patch v9 1/4] drivers: jtag: Add JTAG core driver
> > >
> > > On Thu, Sep 21, 2017 at 12:25:29PM +0300, Oleksandr Shamray wrote:
> > > > +struct jtag {
> > > > +   struct device *dev;
> > > > +   struct cdev cdev;
> > >
> > > Why are you using a cdev here and not just a normal misc device?
> >
> > What the benefits to use misc instead of cdev?
> 
> Less code, simpler logic, easier to review and understand, etc.
> 
> Let me ask you, why use a cdev instead of a misc?

As I know misc device more applicable if we want to create one device f.e.  
/dev/jtag. 
But in current case we can have more than one jtag device /dev/jtag0 ... 
/dev/jtagN.  
So I decided to use cdev.

> 
> > > I forgot if this is what you were doing before, sorry...
> > >
> > > > +   int id;
> > > > +   atomic_t open;
> > >
> > > Why do you need this?
> >
> > This counter used to avoid open at the same time by 2 or more users.
> 
> But it isn't working :)
> 
> And why do you care?
> 
> > > > +   const struct jtag_ops *ops;
> > > > +   unsigned long priv[0] __aligned(ARCH_DMA_MINALIGN);
> > >
> > > Huh?  Why is this needed to be dma aligned?  Why not just use the
> > > private pointer in struct device?
> > >
> >
> > It is critical?
> 
> You are saying it is, so you have to justify it.  There is a pointer for you 
> to use,
> don't make new ones for no reason, right?
> 

You are right. Will remove.

> > > > +};
> > > > +
> > > > +static dev_t jtag_devt;
> > > > +static DEFINE_IDA(jtag_ida);
> > > > +
> > > > +void *jtag_priv(struct jtag *jtag) {
> > > > +   return jtag->priv;
> > > > +}
> > > > +EXPORT_SYMBOL_GPL(jtag_priv);
> > > > +
> > > > +static u8 *jtag_copy_from_user(__u64 udata, unsigned long bit_size) {
> > > > +   unsigned long size;
> > > > +   void *kdata;
> > > > +
> > > > +   size = DIV_ROUND_UP(bit_size, BITS_PER_BYTE);
> > > > +   kdata = memdup_user(u64_to_user_ptr(udata), size);
> > >
> > > You only use this once, why not just open-code it?
> >
> > I think it makes code more understandable.
> 
> As a reviewer, I don't :)

Ok, I will fix :)

> 
> > > > +
> > > > +   return kdata;
> > > > +}
> > > > +
> > > > +static unsigned long jtag_copy_to_user(__u64 udata, u8 *kdata,
> > > > +  unsigned long bit_size) {
> > > > +   unsigned long size;
> > > > +
> > > > +   size = DIV_ROUND_UP(bit_size, BITS_PER_BYTE);
> > > > +
> > > > +   return copy_to_user(u64_to_user_ptr(udata), (void *)(kdata),
> > > > +size);
> > >
> > > Same here, making this a separate function seems odd.
> >
> > Same, I think it makes code more understandable.
> 
> But it doesn't.
> 

Ok, I will fix :)

> > > > +
> > > > +   if (jtag->ops->freq_set)
> > > > +   err = jtag->ops->freq_set(jtag, value);
> > > > +   else
> > > > +   err = -EOPNOTSUPP;
> > > > +   break;
> > > > +
> > > > +   case JTAG_IOCRUNTEST:
> > > > +   if (copy_from_user(, (void *)arg,
> > > > +  sizeof(struct jtag_run_test_idle)))
> > > > +   return -ENOMEM;
> > > > +   err = jtag_run_test_idle_op(jtag, );
> > >
> > > Who validates the structure fields?  Is that up to the individual
> > > jtag driver?  Why not do it in the core correctly so that it only
> > > has to be done in one place and you do not have to audit 

RE: [patch v9 1/4] drivers: jtag: Add JTAG core driver

2017-10-25 Thread Oleksandr Shamray
> -Original Message-
> From: Greg KH [mailto:gre...@linuxfoundation.org]
> Sent: Friday, October 20, 2017 5:54 PM
> To: Oleksandr Shamray 
> Cc: a...@arndb.de; linux-kernel@vger.kernel.org; linux-arm-
> ker...@lists.infradead.org; devicet...@vger.kernel.org;
> open...@lists.ozlabs.org; j...@jms.id.au; j...@resnulli.us;
> tklau...@distanz.ch; linux-ser...@vger.kernel.org; m...@shout.net; Vadim
> Pasternak ; system-sw-low-level  le...@mellanox.com>; robh...@kernel.org; openocd-devel-
> ow...@lists.sourceforge.net; linux-...@vger.kernel.org;
> da...@davemloft.net; mche...@kernel.org; Jiri Pirko 
> Subject: Re: [patch v9 1/4] drivers: jtag: Add JTAG core driver
> 
> On Fri, Oct 20, 2017 at 02:34:00PM +, Oleksandr Shamray wrote:
> > Hi Greg.
> >
> > > -Original Message-
> > > From: Greg KH [mailto:gre...@linuxfoundation.org]
> > > Sent: Friday, October 20, 2017 2:55 PM
> > > To: Oleksandr Shamray 
> > > Cc: a...@arndb.de; linux-kernel@vger.kernel.org; linux-arm-
> > > ker...@lists.infradead.org; devicet...@vger.kernel.org;
> > > open...@lists.ozlabs.org; j...@jms.id.au; j...@resnulli.us;
> > > tklau...@distanz.ch; linux-ser...@vger.kernel.org; m...@shout.net;
> > > Vadim Pasternak ; system-sw-low-level
> > > ; robh...@kernel.org;
> > > openocd-devel- ow...@lists.sourceforge.net;
> > > linux-...@vger.kernel.org; da...@davemloft.net; mche...@kernel.org;
> > > Jiri Pirko 
> > > Subject: Re: [patch v9 1/4] drivers: jtag: Add JTAG core driver
> > >
> > > On Thu, Sep 21, 2017 at 12:25:29PM +0300, Oleksandr Shamray wrote:
> > > > +struct jtag {
> > > > +   struct device *dev;
> > > > +   struct cdev cdev;
> > >
> > > Why are you using a cdev here and not just a normal misc device?
> >
> > What the benefits to use misc instead of cdev?
> 
> Less code, simpler logic, easier to review and understand, etc.
> 
> Let me ask you, why use a cdev instead of a misc?

As I know misc device more applicable if we want to create one device f.e.  
/dev/jtag. 
But in current case we can have more than one jtag device /dev/jtag0 ... 
/dev/jtagN.  
So I decided to use cdev.

> 
> > > I forgot if this is what you were doing before, sorry...
> > >
> > > > +   int id;
> > > > +   atomic_t open;
> > >
> > > Why do you need this?
> >
> > This counter used to avoid open at the same time by 2 or more users.
> 
> But it isn't working :)
> 
> And why do you care?
> 
> > > > +   const struct jtag_ops *ops;
> > > > +   unsigned long priv[0] __aligned(ARCH_DMA_MINALIGN);
> > >
> > > Huh?  Why is this needed to be dma aligned?  Why not just use the
> > > private pointer in struct device?
> > >
> >
> > It is critical?
> 
> You are saying it is, so you have to justify it.  There is a pointer for you 
> to use,
> don't make new ones for no reason, right?
> 

You are right. Will remove.

> > > > +};
> > > > +
> > > > +static dev_t jtag_devt;
> > > > +static DEFINE_IDA(jtag_ida);
> > > > +
> > > > +void *jtag_priv(struct jtag *jtag) {
> > > > +   return jtag->priv;
> > > > +}
> > > > +EXPORT_SYMBOL_GPL(jtag_priv);
> > > > +
> > > > +static u8 *jtag_copy_from_user(__u64 udata, unsigned long bit_size) {
> > > > +   unsigned long size;
> > > > +   void *kdata;
> > > > +
> > > > +   size = DIV_ROUND_UP(bit_size, BITS_PER_BYTE);
> > > > +   kdata = memdup_user(u64_to_user_ptr(udata), size);
> > >
> > > You only use this once, why not just open-code it?
> >
> > I think it makes code more understandable.
> 
> As a reviewer, I don't :)

Ok, I will fix :)

> 
> > > > +
> > > > +   return kdata;
> > > > +}
> > > > +
> > > > +static unsigned long jtag_copy_to_user(__u64 udata, u8 *kdata,
> > > > +  unsigned long bit_size) {
> > > > +   unsigned long size;
> > > > +
> > > > +   size = DIV_ROUND_UP(bit_size, BITS_PER_BYTE);
> > > > +
> > > > +   return copy_to_user(u64_to_user_ptr(udata), (void *)(kdata),
> > > > +size);
> > >
> > > Same here, making this a separate function seems odd.
> >
> > Same, I think it makes code more understandable.
> 
> But it doesn't.
> 

Ok, I will fix :)

> > > > +
> > > > +   if (jtag->ops->freq_set)
> > > > +   err = jtag->ops->freq_set(jtag, value);
> > > > +   else
> > > > +   err = -EOPNOTSUPP;
> > > > +   break;
> > > > +
> > > > +   case JTAG_IOCRUNTEST:
> > > > +   if (copy_from_user(, (void *)arg,
> > > > +  sizeof(struct jtag_run_test_idle)))
> > > > +   return -ENOMEM;
> > > > +   err = jtag_run_test_idle_op(jtag, );
> > >
> > > Who validates the structure fields?  Is that up to the individual
> > > jtag driver?  Why not do it in the core correctly so that it only
> > > has to be done in one place and you do not have to audit every individual
> driver?
> >
> > Input parameters validated by jtag  platform driver. I think it not 
> > critical.
> 
> Not true at 

[PATCH] staging: rtl8723bs: Fix space before '[' error.

2017-10-25 Thread Arvind Yadav
Fix checkpatch.pl error:
ERROR: space prohibited before open square bracket '['.

Signed-off-by: Arvind Yadav 
---
 drivers/staging/rtl8723bs/os_dep/rtw_proc.c | 6 +++---
 1 file changed, 3 insertions(+), 3 deletions(-)

diff --git a/drivers/staging/rtl8723bs/os_dep/rtw_proc.c 
b/drivers/staging/rtl8723bs/os_dep/rtw_proc.c
index 9227745..3f18cb1 100644
--- a/drivers/staging/rtl8723bs/os_dep/rtw_proc.c
+++ b/drivers/staging/rtl8723bs/os_dep/rtw_proc.c
@@ -87,7 +87,7 @@ static ssize_t proc_set_log_level(struct file *file, const 
char __user *buffer,
 * rtw_drv_proc:
 * init/deinit when register/unregister driver
 */
-static const struct rtw_proc_hdl drv_proc_hdls [] = {
+static const struct rtw_proc_hdl drv_proc_hdls[] = {
{"ver_info", proc_get_drv_version, NULL},
{"log_level", proc_get_log_level, proc_set_log_level},
 };
@@ -363,7 +363,7 @@ static int proc_get_cam_cache(struct seq_file *m, void *v)
 * rtw_adapter_proc:
 * init/deinit when register/unregister net_device
 */
-static const struct rtw_proc_hdl adapter_proc_hdls [] = {
+static const struct rtw_proc_hdl adapter_proc_hdls[] = {
{"write_reg", proc_get_dummy, proc_set_write_reg},
{"read_reg", proc_get_read_reg, proc_set_read_reg},
{"fwstate", proc_get_fwstate, NULL},
@@ -598,7 +598,7 @@ ssize_t proc_set_odm_adaptivity(struct file *file, const 
char __user *buffer, si
 * rtw_odm_proc:
 * init/deinit when register/unregister net_device, along with rtw_adapter_proc
 */
-static const struct rtw_proc_hdl odm_proc_hdls [] = {
+static const struct rtw_proc_hdl odm_proc_hdls[] = {
{"dbg_comp", proc_get_odm_dbg_comp, proc_set_odm_dbg_comp},
{"dbg_level", proc_get_odm_dbg_level, proc_set_odm_dbg_level},
{"ability", proc_get_odm_ability, proc_set_odm_ability},
-- 
2.7.4



[PATCH] staging: rtl8723bs: Fix space before '[' error.

2017-10-25 Thread Arvind Yadav
Fix checkpatch.pl error:
ERROR: space prohibited before open square bracket '['.

Signed-off-by: Arvind Yadav 
---
 drivers/staging/rtl8723bs/os_dep/rtw_proc.c | 6 +++---
 1 file changed, 3 insertions(+), 3 deletions(-)

diff --git a/drivers/staging/rtl8723bs/os_dep/rtw_proc.c 
b/drivers/staging/rtl8723bs/os_dep/rtw_proc.c
index 9227745..3f18cb1 100644
--- a/drivers/staging/rtl8723bs/os_dep/rtw_proc.c
+++ b/drivers/staging/rtl8723bs/os_dep/rtw_proc.c
@@ -87,7 +87,7 @@ static ssize_t proc_set_log_level(struct file *file, const 
char __user *buffer,
 * rtw_drv_proc:
 * init/deinit when register/unregister driver
 */
-static const struct rtw_proc_hdl drv_proc_hdls [] = {
+static const struct rtw_proc_hdl drv_proc_hdls[] = {
{"ver_info", proc_get_drv_version, NULL},
{"log_level", proc_get_log_level, proc_set_log_level},
 };
@@ -363,7 +363,7 @@ static int proc_get_cam_cache(struct seq_file *m, void *v)
 * rtw_adapter_proc:
 * init/deinit when register/unregister net_device
 */
-static const struct rtw_proc_hdl adapter_proc_hdls [] = {
+static const struct rtw_proc_hdl adapter_proc_hdls[] = {
{"write_reg", proc_get_dummy, proc_set_write_reg},
{"read_reg", proc_get_read_reg, proc_set_read_reg},
{"fwstate", proc_get_fwstate, NULL},
@@ -598,7 +598,7 @@ ssize_t proc_set_odm_adaptivity(struct file *file, const 
char __user *buffer, si
 * rtw_odm_proc:
 * init/deinit when register/unregister net_device, along with rtw_adapter_proc
 */
-static const struct rtw_proc_hdl odm_proc_hdls [] = {
+static const struct rtw_proc_hdl odm_proc_hdls[] = {
{"dbg_comp", proc_get_odm_dbg_comp, proc_set_odm_dbg_comp},
{"dbg_level", proc_get_odm_dbg_level, proc_set_odm_dbg_level},
{"ability", proc_get_odm_ability, proc_set_odm_ability},
-- 
2.7.4



Re: [PATCH] sound: Convert timers to use timer_setup()

2017-10-25 Thread Takashi Iwai
On Wed, 25 Oct 2017 17:09:27 +0200,
Kees Cook wrote:
> 
> In preparation for unconditionally passing the struct timer_list
> pointer to all timer callbacks, switch to using the new timer_setup()
> and from_timer() to pass the timer pointer explicitly. These are all the
> "mechanical" changes remaining in the sound subsystem.
> 
> Cc: Jaroslav Kysela 
> Cc: Takashi Iwai 
> Cc: Bard Liao 
> Cc: Oder Chiou 
> Cc: Liam Girdwood 
> Cc: Mark Brown 
> Cc: Peter Ujfalusi 
> Cc: Jarkko Nikula 
> Cc: Takashi Sakamoto 
> Cc: alsa-de...@alsa-project.org
> Signed-off-by: Kees Cook 
> ---
>  sound/i2c/other/ak4117.c  |  8 
>  sound/pci/ctxfi/cttimer.c |  7 +++
>  sound/pci/echoaudio/midi.c| 10 +-
>  sound/pci/korg1212/korg1212.c |  7 +++
>  sound/pci/rme9652/hdsp.c  |  8 
>  sound/pci/rme9652/hdspm.c |  8 
>  sound/soc/codecs/rt5645.c |  7 +++
>  sound/soc/omap/ams-delta.c|  4 ++--

This contains a couple of ASoC fixes.
Mark, is it OK to take this through my tree directly?
It's a trivial change and supposed not to conflict anything else.


thanks,

Takashi


>  8 files changed, 28 insertions(+), 31 deletions(-)
> 
> diff --git a/sound/i2c/other/ak4117.c b/sound/i2c/other/ak4117.c
> index 3ab099fb8c15..b923342cadf4 100644
> --- a/sound/i2c/other/ak4117.c
> +++ b/sound/i2c/other/ak4117.c
> @@ -35,7 +35,7 @@ MODULE_LICENSE("GPL");
>  
>  #define AK4117_ADDR  0x00 /* fixed address */
>  
> -static void snd_ak4117_timer(unsigned long data);
> +static void snd_ak4117_timer(struct timer_list *t);
>  
>  static void reg_write(struct ak4117 *ak4117, unsigned char reg, unsigned 
> char val)
>  {
> @@ -91,7 +91,7 @@ int snd_ak4117_create(struct snd_card *card, ak4117_read_t 
> *read, ak4117_write_t
>   chip->read = read;
>   chip->write = write;
>   chip->private_data = private_data;
> - setup_timer(>timer, snd_ak4117_timer, (unsigned long)chip);
> + timer_setup(>timer, snd_ak4117_timer, 0);
>  
>   for (reg = 0; reg < 5; reg++)
>   chip->regmap[reg] = pgm[reg];
> @@ -529,9 +529,9 @@ int snd_ak4117_check_rate_and_errors(struct ak4117 
> *ak4117, unsigned int flags)
>   return res;
>  }
>  
> -static void snd_ak4117_timer(unsigned long data)
> +static void snd_ak4117_timer(struct timer_list *t)
>  {
> - struct ak4117 *chip = (struct ak4117 *)data;
> + struct ak4117 *chip = from_timer(chip, t, timer);
>  
>   if (chip->init)
>   return;
> diff --git a/sound/pci/ctxfi/cttimer.c b/sound/pci/ctxfi/cttimer.c
> index 8f945341720b..08e874e9a7f6 100644
> --- a/sound/pci/ctxfi/cttimer.c
> +++ b/sound/pci/ctxfi/cttimer.c
> @@ -63,9 +63,9 @@ struct ct_timer {
>   * system-timer-based updates
>   */
>  
> -static void ct_systimer_callback(unsigned long data)
> +static void ct_systimer_callback(struct timer_list *t)
>  {
> - struct ct_timer_instance *ti = (struct ct_timer_instance *)data;
> + struct ct_timer_instance *ti = from_timer(ti, t, timer);
>   struct snd_pcm_substream *substream = ti->substream;
>   struct snd_pcm_runtime *runtime = substream->runtime;
>   struct ct_atc_pcm *apcm = ti->apcm;
> @@ -93,8 +93,7 @@ static void ct_systimer_callback(unsigned long data)
>  
>  static void ct_systimer_init(struct ct_timer_instance *ti)
>  {
> - setup_timer(>timer, ct_systimer_callback,
> - (unsigned long)ti);
> + timer_setup(>timer, ct_systimer_callback, 0);
>  }
>  
>  static void ct_systimer_start(struct ct_timer_instance *ti)
> diff --git a/sound/pci/echoaudio/midi.c b/sound/pci/echoaudio/midi.c
> index 8c685ddb1a41..6045a115cffe 100644
> --- a/sound/pci/echoaudio/midi.c
> +++ b/sound/pci/echoaudio/midi.c
> @@ -199,9 +199,9 @@ static int snd_echo_midi_output_open(struct 
> snd_rawmidi_substream *substream)
>  
>  
>  
> -static void snd_echo_midi_output_write(unsigned long data)
> +static void snd_echo_midi_output_write(struct timer_list *t)
>  {
> - struct echoaudio *chip = (struct echoaudio *)data;
> + struct echoaudio *chip = from_timer(chip, t, timer);
>   unsigned long flags;
>   int bytes, sent, time;
>   unsigned char buf[MIDI_OUT_BUFFER_SIZE - 1];
> @@ -257,8 +257,8 @@ static void snd_echo_midi_output_trigger(struct 
> snd_rawmidi_substream *substream
>   spin_lock_irq(>lock);
>   if (up) {
>   if (!chip->tinuse) {
> - setup_timer(>timer, snd_echo_midi_output_write,
> - (unsigned long)chip);
> + timer_setup(>timer, snd_echo_midi_output_write,
> + 0);
>   chip->tinuse = 1;
>   }
>   } else {
> @@ -273,7 +273,7 @@ static void 

Re: [PATCH] sound: Convert timers to use timer_setup()

2017-10-25 Thread Takashi Iwai
On Wed, 25 Oct 2017 17:09:27 +0200,
Kees Cook wrote:
> 
> In preparation for unconditionally passing the struct timer_list
> pointer to all timer callbacks, switch to using the new timer_setup()
> and from_timer() to pass the timer pointer explicitly. These are all the
> "mechanical" changes remaining in the sound subsystem.
> 
> Cc: Jaroslav Kysela 
> Cc: Takashi Iwai 
> Cc: Bard Liao 
> Cc: Oder Chiou 
> Cc: Liam Girdwood 
> Cc: Mark Brown 
> Cc: Peter Ujfalusi 
> Cc: Jarkko Nikula 
> Cc: Takashi Sakamoto 
> Cc: alsa-de...@alsa-project.org
> Signed-off-by: Kees Cook 
> ---
>  sound/i2c/other/ak4117.c  |  8 
>  sound/pci/ctxfi/cttimer.c |  7 +++
>  sound/pci/echoaudio/midi.c| 10 +-
>  sound/pci/korg1212/korg1212.c |  7 +++
>  sound/pci/rme9652/hdsp.c  |  8 
>  sound/pci/rme9652/hdspm.c |  8 
>  sound/soc/codecs/rt5645.c |  7 +++
>  sound/soc/omap/ams-delta.c|  4 ++--

This contains a couple of ASoC fixes.
Mark, is it OK to take this through my tree directly?
It's a trivial change and supposed not to conflict anything else.


thanks,

Takashi


>  8 files changed, 28 insertions(+), 31 deletions(-)
> 
> diff --git a/sound/i2c/other/ak4117.c b/sound/i2c/other/ak4117.c
> index 3ab099fb8c15..b923342cadf4 100644
> --- a/sound/i2c/other/ak4117.c
> +++ b/sound/i2c/other/ak4117.c
> @@ -35,7 +35,7 @@ MODULE_LICENSE("GPL");
>  
>  #define AK4117_ADDR  0x00 /* fixed address */
>  
> -static void snd_ak4117_timer(unsigned long data);
> +static void snd_ak4117_timer(struct timer_list *t);
>  
>  static void reg_write(struct ak4117 *ak4117, unsigned char reg, unsigned 
> char val)
>  {
> @@ -91,7 +91,7 @@ int snd_ak4117_create(struct snd_card *card, ak4117_read_t 
> *read, ak4117_write_t
>   chip->read = read;
>   chip->write = write;
>   chip->private_data = private_data;
> - setup_timer(>timer, snd_ak4117_timer, (unsigned long)chip);
> + timer_setup(>timer, snd_ak4117_timer, 0);
>  
>   for (reg = 0; reg < 5; reg++)
>   chip->regmap[reg] = pgm[reg];
> @@ -529,9 +529,9 @@ int snd_ak4117_check_rate_and_errors(struct ak4117 
> *ak4117, unsigned int flags)
>   return res;
>  }
>  
> -static void snd_ak4117_timer(unsigned long data)
> +static void snd_ak4117_timer(struct timer_list *t)
>  {
> - struct ak4117 *chip = (struct ak4117 *)data;
> + struct ak4117 *chip = from_timer(chip, t, timer);
>  
>   if (chip->init)
>   return;
> diff --git a/sound/pci/ctxfi/cttimer.c b/sound/pci/ctxfi/cttimer.c
> index 8f945341720b..08e874e9a7f6 100644
> --- a/sound/pci/ctxfi/cttimer.c
> +++ b/sound/pci/ctxfi/cttimer.c
> @@ -63,9 +63,9 @@ struct ct_timer {
>   * system-timer-based updates
>   */
>  
> -static void ct_systimer_callback(unsigned long data)
> +static void ct_systimer_callback(struct timer_list *t)
>  {
> - struct ct_timer_instance *ti = (struct ct_timer_instance *)data;
> + struct ct_timer_instance *ti = from_timer(ti, t, timer);
>   struct snd_pcm_substream *substream = ti->substream;
>   struct snd_pcm_runtime *runtime = substream->runtime;
>   struct ct_atc_pcm *apcm = ti->apcm;
> @@ -93,8 +93,7 @@ static void ct_systimer_callback(unsigned long data)
>  
>  static void ct_systimer_init(struct ct_timer_instance *ti)
>  {
> - setup_timer(>timer, ct_systimer_callback,
> - (unsigned long)ti);
> + timer_setup(>timer, ct_systimer_callback, 0);
>  }
>  
>  static void ct_systimer_start(struct ct_timer_instance *ti)
> diff --git a/sound/pci/echoaudio/midi.c b/sound/pci/echoaudio/midi.c
> index 8c685ddb1a41..6045a115cffe 100644
> --- a/sound/pci/echoaudio/midi.c
> +++ b/sound/pci/echoaudio/midi.c
> @@ -199,9 +199,9 @@ static int snd_echo_midi_output_open(struct 
> snd_rawmidi_substream *substream)
>  
>  
>  
> -static void snd_echo_midi_output_write(unsigned long data)
> +static void snd_echo_midi_output_write(struct timer_list *t)
>  {
> - struct echoaudio *chip = (struct echoaudio *)data;
> + struct echoaudio *chip = from_timer(chip, t, timer);
>   unsigned long flags;
>   int bytes, sent, time;
>   unsigned char buf[MIDI_OUT_BUFFER_SIZE - 1];
> @@ -257,8 +257,8 @@ static void snd_echo_midi_output_trigger(struct 
> snd_rawmidi_substream *substream
>   spin_lock_irq(>lock);
>   if (up) {
>   if (!chip->tinuse) {
> - setup_timer(>timer, snd_echo_midi_output_write,
> - (unsigned long)chip);
> + timer_setup(>timer, snd_echo_midi_output_write,
> + 0);
>   chip->tinuse = 1;
>   }
>   } else {
> @@ -273,7 +273,7 @@ static void snd_echo_midi_output_trigger(struct 
> snd_rawmidi_substream *substream
>   spin_unlock_irq(>lock);
>  
>   if (up && !chip->midi_full)
> - snd_echo_midi_output_write((unsigned long)chip);
> + 

Re: [PATCH] wimax/i2400m: Remove VLAIS

2017-10-25 Thread Dan Williams
On Tue, 2017-10-24 at 21:00 +, Perez-Gonzalez, Inaky wrote:
> > ping
> > any comments on this?
> 
> None from me; I don't have access to this HW anymore, so I can't
> validate
> if the change would work or not.

I still have a 5350 around somewhere, I can make sure firmware loads if
you like.

Dan


Re: [PATCH] iio/accel/bmc150: Improve unlocking of a mutex in two functions

2017-10-25 Thread Hans de Goede

Hi,

On 25-10-17 16:33, SF Markus Elfring wrote:

From: Markus Elfring 
Date: Wed, 25 Oct 2017 16:26:29 +0200

Add a jump target so that a call of the function "mutex_unlock" is mostly
stored at the end of these function implementations.
Replace five calls by goto statements.

This issue was detected by using the Coccinelle software.

Signed-off-by: Markus Elfring 
---
  drivers/iio/accel/bmc150-accel-core.c | 32 ++--
  1 file changed, 14 insertions(+), 18 deletions(-)

diff --git a/drivers/iio/accel/bmc150-accel-core.c 
b/drivers/iio/accel/bmc150-accel-core.c
index 870f92ef61c2..f2a85a11a5e4 100644
--- a/drivers/iio/accel/bmc150-accel-core.c
+++ b/drivers/iio/accel/bmc150-accel-core.c
@@ -554,18 +554,15 @@ static int bmc150_accel_get_axis(struct bmc150_accel_data 
*data,
  
  	mutex_lock(>mutex);

ret = bmc150_accel_set_power_state(data, true);
-   if (ret < 0) {
-   mutex_unlock(>mutex);
-   return ret;
-   }
+   if (ret < 0)
+   goto unlock_after_failure;
  
  	ret = regmap_bulk_read(data->regmap, BMC150_ACCEL_AXIS_TO_REG(axis),

   _val, sizeof(raw_val));
if (ret < 0) {
dev_err(dev, "Error reading axis %d\n", axis);
bmc150_accel_set_power_state(data, false);
-   mutex_unlock(>mutex);
-   return ret;
+   goto unlock_after_failure;
}
*val = sign_extend32(le16_to_cpu(raw_val) >> chan->scan_type.shift,
 chan->scan_type.realbits - 1);
@@ -575,6 +572,10 @@ static int bmc150_accel_get_axis(struct bmc150_accel_data 
*data,
return ret;
  
  	return IIO_VAL_INT;

+
+unlock_after_failure:
+   mutex_unlock(>mutex);
+   return ret;
  }
  
  static int bmc150_accel_read_raw(struct iio_dev *indio_dev,


IMHO, if you do this, you should rework the function so that there is a single 
unlock call
at the end, not a separate one in in error label.

Could e.g. change this:

ret = bmc150_accel_set_power_state(data, false);
mutex_unlock(>mutex);
if (ret < 0)
return ret;

return IIO_VAL_INT;
}

To:

ret = bmc150_accel_set_power_state(data, false);
if (ret < 0)
goto unlock;

ret = IIO_VAL_INT;
unlock:
mutex_unlock(>mutex);

return ret;
}

And also use the unlock label in the other cases, this is actually
quite a normal pattern. I see little use in a patch like this if there
are still 2 unlock paths after the patch.

Regards,

Hans






@@ -1170,28 +1171,23 @@ static int bmc150_accel_trigger_set_state(struct 
iio_trigger *trig,
mutex_lock(>mutex);
  
  	if (t->enabled == state) {

-   mutex_unlock(>mutex);
-   return 0;
+   ret = 0;
+   goto unlock;
}
  
  	if (t->setup) {

ret = t->setup(t, state);
-   if (ret < 0) {
-   mutex_unlock(>mutex);
-   return ret;
-   }
+   if (ret < 0)
+   goto unlock;
}
  
  	ret = bmc150_accel_set_interrupt(data, t->intr, state);

-   if (ret < 0) {
-   mutex_unlock(>mutex);
-   return ret;
-   }
+   if (ret < 0)
+   goto unlock;
  
  	t->enabled = state;

-
+unlock:
mutex_unlock(>mutex);
-
return ret;
  }
  



Re: [PATCH] wimax/i2400m: Remove VLAIS

2017-10-25 Thread Dan Williams
On Tue, 2017-10-24 at 21:00 +, Perez-Gonzalez, Inaky wrote:
> > ping
> > any comments on this?
> 
> None from me; I don't have access to this HW anymore, so I can't
> validate
> if the change would work or not.

I still have a 5350 around somewhere, I can make sure firmware loads if
you like.

Dan


Re: [PATCH] iio/accel/bmc150: Improve unlocking of a mutex in two functions

2017-10-25 Thread Hans de Goede

Hi,

On 25-10-17 16:33, SF Markus Elfring wrote:

From: Markus Elfring 
Date: Wed, 25 Oct 2017 16:26:29 +0200

Add a jump target so that a call of the function "mutex_unlock" is mostly
stored at the end of these function implementations.
Replace five calls by goto statements.

This issue was detected by using the Coccinelle software.

Signed-off-by: Markus Elfring 
---
  drivers/iio/accel/bmc150-accel-core.c | 32 ++--
  1 file changed, 14 insertions(+), 18 deletions(-)

diff --git a/drivers/iio/accel/bmc150-accel-core.c 
b/drivers/iio/accel/bmc150-accel-core.c
index 870f92ef61c2..f2a85a11a5e4 100644
--- a/drivers/iio/accel/bmc150-accel-core.c
+++ b/drivers/iio/accel/bmc150-accel-core.c
@@ -554,18 +554,15 @@ static int bmc150_accel_get_axis(struct bmc150_accel_data 
*data,
  
  	mutex_lock(>mutex);

ret = bmc150_accel_set_power_state(data, true);
-   if (ret < 0) {
-   mutex_unlock(>mutex);
-   return ret;
-   }
+   if (ret < 0)
+   goto unlock_after_failure;
  
  	ret = regmap_bulk_read(data->regmap, BMC150_ACCEL_AXIS_TO_REG(axis),

   _val, sizeof(raw_val));
if (ret < 0) {
dev_err(dev, "Error reading axis %d\n", axis);
bmc150_accel_set_power_state(data, false);
-   mutex_unlock(>mutex);
-   return ret;
+   goto unlock_after_failure;
}
*val = sign_extend32(le16_to_cpu(raw_val) >> chan->scan_type.shift,
 chan->scan_type.realbits - 1);
@@ -575,6 +572,10 @@ static int bmc150_accel_get_axis(struct bmc150_accel_data 
*data,
return ret;
  
  	return IIO_VAL_INT;

+
+unlock_after_failure:
+   mutex_unlock(>mutex);
+   return ret;
  }
  
  static int bmc150_accel_read_raw(struct iio_dev *indio_dev,


IMHO, if you do this, you should rework the function so that there is a single 
unlock call
at the end, not a separate one in in error label.

Could e.g. change this:

ret = bmc150_accel_set_power_state(data, false);
mutex_unlock(>mutex);
if (ret < 0)
return ret;

return IIO_VAL_INT;
}

To:

ret = bmc150_accel_set_power_state(data, false);
if (ret < 0)
goto unlock;

ret = IIO_VAL_INT;
unlock:
mutex_unlock(>mutex);

return ret;
}

And also use the unlock label in the other cases, this is actually
quite a normal pattern. I see little use in a patch like this if there
are still 2 unlock paths after the patch.

Regards,

Hans






@@ -1170,28 +1171,23 @@ static int bmc150_accel_trigger_set_state(struct 
iio_trigger *trig,
mutex_lock(>mutex);
  
  	if (t->enabled == state) {

-   mutex_unlock(>mutex);
-   return 0;
+   ret = 0;
+   goto unlock;
}
  
  	if (t->setup) {

ret = t->setup(t, state);
-   if (ret < 0) {
-   mutex_unlock(>mutex);
-   return ret;
-   }
+   if (ret < 0)
+   goto unlock;
}
  
  	ret = bmc150_accel_set_interrupt(data, t->intr, state);

-   if (ret < 0) {
-   mutex_unlock(>mutex);
-   return ret;
-   }
+   if (ret < 0)
+   goto unlock;
  
  	t->enabled = state;

-
+unlock:
mutex_unlock(>mutex);
-
return ret;
  }
  



Re: [PATCH 6/8] PCI: host: brcmstb: add MSI capability

2017-10-25 Thread Jim Quinlan
On Wed, Oct 25, 2017 at 9:22 AM, Bjorn Helgaas  wrote:
> On Tue, Oct 24, 2017 at 02:15:47PM -0400, Jim Quinlan wrote:
>> This commit adds MSI to the Broadcom STB PCIe host controller. It does
>> not add MSIX since that functiostbsrvnality is not in the HW.  The MSI
>> controller is physically located within the PCIe block, however, there
>> is no reason why the MSI controller could not be moved elsewhere in
>> the future.
>>
>> Since the internal Brcmstb MSI controller is intertwined with the PCIe
>> controller, it is not its own platform device but rather part of the
>> PCIe platform device.
>>
>> Signed-off-by: Jim Quinlan 
>> ---
>>  drivers/pci/host/Kconfig   |  12 ++
>>  drivers/pci/host/Makefile  |   1 +
>>  drivers/pci/host/pci-brcmstb-msi.c | 318 
>> +
>>  drivers/pci/host/pci-brcmstb.c |  72 +++--
>>  drivers/pci/host/pci-brcmstb.h |  26 +++
>>  5 files changed, 419 insertions(+), 10 deletions(-)
>>  create mode 100644 drivers/pci/host/pci-brcmstb-msi.c
>>
>> diff --git a/drivers/pci/host/Kconfig b/drivers/pci/host/Kconfig
>> index b9b4f11..54aa5d2 100644
>> --- a/drivers/pci/host/Kconfig
>> +++ b/drivers/pci/host/Kconfig
>> @@ -228,4 +228,16 @@ config PCI_BRCMSTB
>>   default ARCH_BRCMSTB || BMIPS_GENERIC
>>   help
>> Adds support for Broadcom Settop Box PCIe host controller.
>> +   To compile this driver as a module, choose m here.
>> +
>> +config PCI_BRCMSTB_MSI
>> + bool "Broadcom Brcmstb PCIe MSI support"
>> + depends on ARCH_BRCMSTB || BMIPS_GENERIC
>> + depends on OF
>> + depends on PCI_MSI
>> + default PCI_BRCMSTB
>> + help
>> +   Say Y here if you want to enable MSI support for Broadcom's iProc
>> +   PCIe controller
>
> Is there any reason *not* to enable MSI?  Most drivers just include
> MSI support in the base driver, in the same file and under the same
> config symbol.
>
CONFIG_PCI_BRCMSTB_MSI is bool, CONFIG_PCI_BRCMSTB is tri-state.  The
reason for this -- and also for the separate msi file -- is that there
is an irq call  that is not exported (I can dig it up if you'd like).
We have had requests that the PCIe RC driver doesn't run at boot, so
we consider the idea of it being a module valuable.  You can see the
same thing going on with PCIE_IPROC_MSI.

>> +
>>  endmenu
>> diff --git a/drivers/pci/host/Makefile b/drivers/pci/host/Makefile
>> index c283321..1026d6f 100644
>> --- a/drivers/pci/host/Makefile
>> +++ b/drivers/pci/host/Makefile
>> @@ -23,6 +23,7 @@ obj-$(CONFIG_PCIE_TANGO_SMP8759) += pcie-tango.o
>>  obj-$(CONFIG_VMD) += vmd.o
>>  obj-$(CONFIG_PCI_BRCMSTB) += brcmstb-pci.o
>>  brcmstb-pci-objs := pci-brcmstb.o pci-brcmstb-dma.o
>> +obj-$(CONFIG_PCI_BRCMSTB_MSI) += pci-brcmstb-msi.o
>>
>>  # The following drivers are for devices that use the generic ACPI
>>  # pci_root.c driver but don't support standard ECAM config access.
>> diff --git a/drivers/pci/host/pci-brcmstb-msi.c 
>> b/drivers/pci/host/pci-brcmstb-msi.c
>> new file mode 100644
>> index 000..c805e2f
>> --- /dev/null
>> +++ b/drivers/pci/host/pci-brcmstb-msi.c
>> @@ -0,0 +1,318 @@
>> +/*
>> + * Copyright (C) 2015-2017 Broadcom
>> + *
>> + * This program is free software; you can redistribute it and/or modify
>> + * it under the terms of the GNU General Public License version 2 as
>> + * published by the Free Software Foundation.
>> + *
>> + * This program is distributed in the hope that it will be useful,
>> + * but WITHOUT ANY WARRANTY; without even the implied warranty of
>> + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
>> + * GNU General Public License for more details.
>> + *
>> + */
>
> Remove last blank line of comment and add a blank line between comment
> and #includes.
Will fix.

>
>> +#include 
>> +#include 
>> +#include 
>> +#include 
>> +#include 
>> +#include 
>> +#include 
>> +#include 
>> +#include 
>> +#include 
>> +#include 
>> +#include 
>> +#include 
>> +#include 
>> +#include 
>> +
>> +#include "pci-brcmstb.h"
>> +
>> +#define PCIE_MISC_MSI_DATA_CONFIG0x404c
>> +#define PCIE_MSI_INTR2_BASE  0x4500
>> +#define PCIE_MISC_MSI_BAR_CONFIG_LO  0x4044
>> +#define PCIE_MISC_MSI_BAR_CONFIG_HI  0x4048
>> +
>> +/* Offsets from PCIE_INTR2_CPU_BASE and PCIE_MSI_INTR2_BASE */
>> +#define STATUS   0x0
>> +#define SET  0x4
>> +#define CLR  0x8
>> +#define MASK_STATUS  0xc
>> +#define MASK_SET 0x10
>> +#define MASK_CLR 0x14
>> +
>> +struct brcm_msi {
>> + struct irq_domain *msi_domain;
>> + struct irq_domain *inner_domain;
>> + struct mutex lock; /* guards the alloc/free operations */
>> + u64 target_addr;
>> + int irq;
>> + /* intr_base is the base pointer for interrupt status/set/clr regs */
>> + 

Re: [PATCH 6/8] PCI: host: brcmstb: add MSI capability

2017-10-25 Thread Jim Quinlan
On Wed, Oct 25, 2017 at 9:22 AM, Bjorn Helgaas  wrote:
> On Tue, Oct 24, 2017 at 02:15:47PM -0400, Jim Quinlan wrote:
>> This commit adds MSI to the Broadcom STB PCIe host controller. It does
>> not add MSIX since that functiostbsrvnality is not in the HW.  The MSI
>> controller is physically located within the PCIe block, however, there
>> is no reason why the MSI controller could not be moved elsewhere in
>> the future.
>>
>> Since the internal Brcmstb MSI controller is intertwined with the PCIe
>> controller, it is not its own platform device but rather part of the
>> PCIe platform device.
>>
>> Signed-off-by: Jim Quinlan 
>> ---
>>  drivers/pci/host/Kconfig   |  12 ++
>>  drivers/pci/host/Makefile  |   1 +
>>  drivers/pci/host/pci-brcmstb-msi.c | 318 
>> +
>>  drivers/pci/host/pci-brcmstb.c |  72 +++--
>>  drivers/pci/host/pci-brcmstb.h |  26 +++
>>  5 files changed, 419 insertions(+), 10 deletions(-)
>>  create mode 100644 drivers/pci/host/pci-brcmstb-msi.c
>>
>> diff --git a/drivers/pci/host/Kconfig b/drivers/pci/host/Kconfig
>> index b9b4f11..54aa5d2 100644
>> --- a/drivers/pci/host/Kconfig
>> +++ b/drivers/pci/host/Kconfig
>> @@ -228,4 +228,16 @@ config PCI_BRCMSTB
>>   default ARCH_BRCMSTB || BMIPS_GENERIC
>>   help
>> Adds support for Broadcom Settop Box PCIe host controller.
>> +   To compile this driver as a module, choose m here.
>> +
>> +config PCI_BRCMSTB_MSI
>> + bool "Broadcom Brcmstb PCIe MSI support"
>> + depends on ARCH_BRCMSTB || BMIPS_GENERIC
>> + depends on OF
>> + depends on PCI_MSI
>> + default PCI_BRCMSTB
>> + help
>> +   Say Y here if you want to enable MSI support for Broadcom's iProc
>> +   PCIe controller
>
> Is there any reason *not* to enable MSI?  Most drivers just include
> MSI support in the base driver, in the same file and under the same
> config symbol.
>
CONFIG_PCI_BRCMSTB_MSI is bool, CONFIG_PCI_BRCMSTB is tri-state.  The
reason for this -- and also for the separate msi file -- is that there
is an irq call  that is not exported (I can dig it up if you'd like).
We have had requests that the PCIe RC driver doesn't run at boot, so
we consider the idea of it being a module valuable.  You can see the
same thing going on with PCIE_IPROC_MSI.

>> +
>>  endmenu
>> diff --git a/drivers/pci/host/Makefile b/drivers/pci/host/Makefile
>> index c283321..1026d6f 100644
>> --- a/drivers/pci/host/Makefile
>> +++ b/drivers/pci/host/Makefile
>> @@ -23,6 +23,7 @@ obj-$(CONFIG_PCIE_TANGO_SMP8759) += pcie-tango.o
>>  obj-$(CONFIG_VMD) += vmd.o
>>  obj-$(CONFIG_PCI_BRCMSTB) += brcmstb-pci.o
>>  brcmstb-pci-objs := pci-brcmstb.o pci-brcmstb-dma.o
>> +obj-$(CONFIG_PCI_BRCMSTB_MSI) += pci-brcmstb-msi.o
>>
>>  # The following drivers are for devices that use the generic ACPI
>>  # pci_root.c driver but don't support standard ECAM config access.
>> diff --git a/drivers/pci/host/pci-brcmstb-msi.c 
>> b/drivers/pci/host/pci-brcmstb-msi.c
>> new file mode 100644
>> index 000..c805e2f
>> --- /dev/null
>> +++ b/drivers/pci/host/pci-brcmstb-msi.c
>> @@ -0,0 +1,318 @@
>> +/*
>> + * Copyright (C) 2015-2017 Broadcom
>> + *
>> + * This program is free software; you can redistribute it and/or modify
>> + * it under the terms of the GNU General Public License version 2 as
>> + * published by the Free Software Foundation.
>> + *
>> + * This program is distributed in the hope that it will be useful,
>> + * but WITHOUT ANY WARRANTY; without even the implied warranty of
>> + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
>> + * GNU General Public License for more details.
>> + *
>> + */
>
> Remove last blank line of comment and add a blank line between comment
> and #includes.
Will fix.

>
>> +#include 
>> +#include 
>> +#include 
>> +#include 
>> +#include 
>> +#include 
>> +#include 
>> +#include 
>> +#include 
>> +#include 
>> +#include 
>> +#include 
>> +#include 
>> +#include 
>> +#include 
>> +
>> +#include "pci-brcmstb.h"
>> +
>> +#define PCIE_MISC_MSI_DATA_CONFIG0x404c
>> +#define PCIE_MSI_INTR2_BASE  0x4500
>> +#define PCIE_MISC_MSI_BAR_CONFIG_LO  0x4044
>> +#define PCIE_MISC_MSI_BAR_CONFIG_HI  0x4048
>> +
>> +/* Offsets from PCIE_INTR2_CPU_BASE and PCIE_MSI_INTR2_BASE */
>> +#define STATUS   0x0
>> +#define SET  0x4
>> +#define CLR  0x8
>> +#define MASK_STATUS  0xc
>> +#define MASK_SET 0x10
>> +#define MASK_CLR 0x14
>> +
>> +struct brcm_msi {
>> + struct irq_domain *msi_domain;
>> + struct irq_domain *inner_domain;
>> + struct mutex lock; /* guards the alloc/free operations */
>> + u64 target_addr;
>> + int irq;
>> + /* intr_base is the base pointer for interrupt status/set/clr regs */
>> + void __iomem *intr_base;
>> + /* 

Re: alpha boot hang - 4.14-rc* regression

2017-10-25 Thread Meelis Roos
> > > > > > removing libata modules and rebooting fixes it - so it seems to be 
> > > > > > loading of libata.
> > > > > 
> > > > > Can you please cherry-pick:
> > > > > 
> > > > > commit b1f9e5e355e9 ("ide: fix IRQ assignment for PCI bus order 
> > > > > probing")
> > > > > 
> > > > > from mainline and let us know if that solves the issue ?
> > > > 
> > > > No, still breaks the same way (b1f9e5e355e9 patched on top of 
> > > > 0e4c2eeb758a).
> > > > 
> > > > 4.14.0-rc5-00095-g1c9fec470b81 was also still broken the same way 
> > > > (tried 
> > > > on Sunday).
> > > 
> > > I am not sure I patched the right sys file but if I did, does the patch
> > > below help ?
> > > 
> > > I think that at sata driver binding time the kernel finds a freed
> > > pointer in the host bridge map_irq() hook and that's where things
> > > go wrong.
> > > 
> > > Please let me know if that's the right sys file, it is a mechanical
> > > change and making it for other sys file should be reasonably simple.
> > > 
> > > Lorenzo
> > > 
> > > -- >8 --
> > > diff --git a/arch/alpha/kernel/sys_dp264.c b/arch/alpha/kernel/sys_dp264.c
> > 
> > "Booting GENERIC on Tsunami variation Webbrick using machine vector 
> > Webbrick from SRM"
> > 
> > Seems to be the correct file - tsunami is referenced from this file and 
> > the IRQ-s are DP264.
> > 
> > But the patch does not make a difference :(
> 
> It is probably because I patched the wrong map_irq() function,
> I am trying to detect which one you are _actually_ using, if
> the patch below fails I will patch them all (which is what I
> have to do anyway).
> 
> Please give this a go - this _has_ to make a difference, it is not
> correct to leave map_irq() pointers as __init memory, IRQ routing
> for modules can't work.

Yes, webrick entry seems to be the correct one fro DS10L. It works fine 
on top of the cherry-picked ATA IRQ patch.

Will try it on top of current mainline git.

-- 
Meelis Roos (mr...@linux.ee)


Re: alpha boot hang - 4.14-rc* regression

2017-10-25 Thread Meelis Roos
> > > > > > removing libata modules and rebooting fixes it - so it seems to be 
> > > > > > loading of libata.
> > > > > 
> > > > > Can you please cherry-pick:
> > > > > 
> > > > > commit b1f9e5e355e9 ("ide: fix IRQ assignment for PCI bus order 
> > > > > probing")
> > > > > 
> > > > > from mainline and let us know if that solves the issue ?
> > > > 
> > > > No, still breaks the same way (b1f9e5e355e9 patched on top of 
> > > > 0e4c2eeb758a).
> > > > 
> > > > 4.14.0-rc5-00095-g1c9fec470b81 was also still broken the same way 
> > > > (tried 
> > > > on Sunday).
> > > 
> > > I am not sure I patched the right sys file but if I did, does the patch
> > > below help ?
> > > 
> > > I think that at sata driver binding time the kernel finds a freed
> > > pointer in the host bridge map_irq() hook and that's where things
> > > go wrong.
> > > 
> > > Please let me know if that's the right sys file, it is a mechanical
> > > change and making it for other sys file should be reasonably simple.
> > > 
> > > Lorenzo
> > > 
> > > -- >8 --
> > > diff --git a/arch/alpha/kernel/sys_dp264.c b/arch/alpha/kernel/sys_dp264.c
> > 
> > "Booting GENERIC on Tsunami variation Webbrick using machine vector 
> > Webbrick from SRM"
> > 
> > Seems to be the correct file - tsunami is referenced from this file and 
> > the IRQ-s are DP264.
> > 
> > But the patch does not make a difference :(
> 
> It is probably because I patched the wrong map_irq() function,
> I am trying to detect which one you are _actually_ using, if
> the patch below fails I will patch them all (which is what I
> have to do anyway).
> 
> Please give this a go - this _has_ to make a difference, it is not
> correct to leave map_irq() pointers as __init memory, IRQ routing
> for modules can't work.

Yes, webrick entry seems to be the correct one fro DS10L. It works fine 
on top of the cherry-picked ATA IRQ patch.

Will try it on top of current mainline git.

-- 
Meelis Roos (mr...@linux.ee)


Re: [PATCH] I2C-Octeon: Use common error handling code in octeon_i2c_probe()

2017-10-25 Thread David Daney

On 10/25/2017 02:52 AM, SF Markus Elfring wrote:

From: Markus Elfring 


NAK.  We don't need any more Markus Elfring spam.



Date: Wed, 25 Oct 2017 11:45:48 +0200

Add a jump target so that a specific error message is stored only once
at the end of this function implementation.


Please read C standard about duplicate literal strings to see why this 
is a completely misleading and false statement.




Replace two calls of the function "dev_err" by goto statements.

This issue was detected by using the Coccinelle software.

Signed-off-by: Markus Elfring 
---
  drivers/i2c/busses/i2c-octeon-platdrv.c | 14 ++
  1 file changed, 6 insertions(+), 8 deletions(-)

diff --git a/drivers/i2c/busses/i2c-octeon-platdrv.c 
b/drivers/i2c/busses/i2c-octeon-platdrv.c
index 64bda83e65ac..ac8a1099c523 100644
--- a/drivers/i2c/busses/i2c-octeon-platdrv.c
+++ b/drivers/i2c/busses/i2c-octeon-platdrv.c
@@ -207,10 +207,8 @@ static int octeon_i2c_probe(struct platform_device *pdev)
result = devm_request_irq(>dev, i2c->hlc_irq,
  octeon_i2c_hlc_isr78, 0,
  DRV_NAME, i2c);
-   if (result < 0) {
-   dev_err(i2c->dev, "failed to attach interrupt\n");
-   goto out;
-   }
+   if (result < 0)
+   goto report_failure;
} else {
i2c->int_enable = octeon_i2c_int_enable;
i2c->int_disable = octeon_i2c_int_disable;
@@ -220,10 +218,8 @@ static int octeon_i2c_probe(struct platform_device *pdev)
  
  	result = devm_request_irq(>dev, i2c->irq,

  octeon_i2c_isr, 0, DRV_NAME, i2c);
-   if (result < 0) {
-   dev_err(i2c->dev, "failed to attach interrupt\n");
-   goto out;
-   }
+   if (result < 0)
+   goto report_failure;
  
  	if (OCTEON_IS_MODEL(OCTEON_CN38XX))

i2c->broken_irq_check = true;
@@ -251,6 +247,8 @@ static int octeon_i2c_probe(struct platform_device *pdev)
dev_info(i2c->dev, "probed\n");
return 0;
  
+report_failure:

+   dev_err(i2c->dev, "failed to attach interrupt\n");
  out:
return result;
  };





Re: [PATCH] I2C-Octeon: Use common error handling code in octeon_i2c_probe()

2017-10-25 Thread David Daney

On 10/25/2017 02:52 AM, SF Markus Elfring wrote:

From: Markus Elfring 


NAK.  We don't need any more Markus Elfring spam.



Date: Wed, 25 Oct 2017 11:45:48 +0200

Add a jump target so that a specific error message is stored only once
at the end of this function implementation.


Please read C standard about duplicate literal strings to see why this 
is a completely misleading and false statement.




Replace two calls of the function "dev_err" by goto statements.

This issue was detected by using the Coccinelle software.

Signed-off-by: Markus Elfring 
---
  drivers/i2c/busses/i2c-octeon-platdrv.c | 14 ++
  1 file changed, 6 insertions(+), 8 deletions(-)

diff --git a/drivers/i2c/busses/i2c-octeon-platdrv.c 
b/drivers/i2c/busses/i2c-octeon-platdrv.c
index 64bda83e65ac..ac8a1099c523 100644
--- a/drivers/i2c/busses/i2c-octeon-platdrv.c
+++ b/drivers/i2c/busses/i2c-octeon-platdrv.c
@@ -207,10 +207,8 @@ static int octeon_i2c_probe(struct platform_device *pdev)
result = devm_request_irq(>dev, i2c->hlc_irq,
  octeon_i2c_hlc_isr78, 0,
  DRV_NAME, i2c);
-   if (result < 0) {
-   dev_err(i2c->dev, "failed to attach interrupt\n");
-   goto out;
-   }
+   if (result < 0)
+   goto report_failure;
} else {
i2c->int_enable = octeon_i2c_int_enable;
i2c->int_disable = octeon_i2c_int_disable;
@@ -220,10 +218,8 @@ static int octeon_i2c_probe(struct platform_device *pdev)
  
  	result = devm_request_irq(>dev, i2c->irq,

  octeon_i2c_isr, 0, DRV_NAME, i2c);
-   if (result < 0) {
-   dev_err(i2c->dev, "failed to attach interrupt\n");
-   goto out;
-   }
+   if (result < 0)
+   goto report_failure;
  
  	if (OCTEON_IS_MODEL(OCTEON_CN38XX))

i2c->broken_irq_check = true;
@@ -251,6 +247,8 @@ static int octeon_i2c_probe(struct platform_device *pdev)
dev_info(i2c->dev, "probed\n");
return 0;
  
+report_failure:

+   dev_err(i2c->dev, "failed to attach interrupt\n");
  out:
return result;
  };





Re: [PATCH v3] cpuset: Enable cpuset controller in default hierarchy

2017-10-25 Thread Waiman Long
On 10/06/2017 05:10 PM, Waiman Long wrote:
> Given the fact that thread mode had been merged into 4.14, it is now
> time to enable cpuset to be used in the default hierarchy (cgroup v2)
> as it is clearly threaded.
>
> The cpuset controller had experienced feature creep since its
> introduction more than a decade ago. Besides the core cpus and mems
> control files to limit cpus and memory nodes, there are a bunch of
> additional features that can be controlled from the userspace. Some of
> the features are of doubtful usefulness and may not be actively used.
>
> After examining the source code of some sample users like systemd,
> libvirt and lxc for their use of those additional features, only
> memory_migrate is used by libvirt.
>
> This patch enables cpuset controller in the default hierarchy with a
> minimal set of features. Currently, only memory_migrate is supported.
> We can certainly add more features to the default hierarchy if there
> is a real user need for them later on.
>
> For features that are actually flags which are set internally, they are
> being combined into a single "cpuset.flags" control file. That includes
> the memory_migrate feature which is the only flag that is currently
> supported. When the "cpuset.flags" file is read, it contains either
> "+mem_migrate" (enabled) or "-mem_migrate" (disabled).
>
> To enable it, use
>
>   # echo +mem_migrate > cpuset.flags
>
> To disable it, use
>
>   # echo -mem_migrate > cpuset.flags
>
> Note that the flag name is changed to "mem_migrate" for better naming
> consistency.
>
> v3:
>  - Further trim the additional features down to just memory_migrate.
>  - Update Documentation/cgroup-v2.txt.
>
> Signed-off-by: Waiman Long 

Ping! Any comment on this patch?

Cheers,
Longman



Re: [PATCH v3] cpuset: Enable cpuset controller in default hierarchy

2017-10-25 Thread Waiman Long
On 10/06/2017 05:10 PM, Waiman Long wrote:
> Given the fact that thread mode had been merged into 4.14, it is now
> time to enable cpuset to be used in the default hierarchy (cgroup v2)
> as it is clearly threaded.
>
> The cpuset controller had experienced feature creep since its
> introduction more than a decade ago. Besides the core cpus and mems
> control files to limit cpus and memory nodes, there are a bunch of
> additional features that can be controlled from the userspace. Some of
> the features are of doubtful usefulness and may not be actively used.
>
> After examining the source code of some sample users like systemd,
> libvirt and lxc for their use of those additional features, only
> memory_migrate is used by libvirt.
>
> This patch enables cpuset controller in the default hierarchy with a
> minimal set of features. Currently, only memory_migrate is supported.
> We can certainly add more features to the default hierarchy if there
> is a real user need for them later on.
>
> For features that are actually flags which are set internally, they are
> being combined into a single "cpuset.flags" control file. That includes
> the memory_migrate feature which is the only flag that is currently
> supported. When the "cpuset.flags" file is read, it contains either
> "+mem_migrate" (enabled) or "-mem_migrate" (disabled).
>
> To enable it, use
>
>   # echo +mem_migrate > cpuset.flags
>
> To disable it, use
>
>   # echo -mem_migrate > cpuset.flags
>
> Note that the flag name is changed to "mem_migrate" for better naming
> consistency.
>
> v3:
>  - Further trim the additional features down to just memory_migrate.
>  - Update Documentation/cgroup-v2.txt.
>
> Signed-off-by: Waiman Long 

Ping! Any comment on this patch?

Cheers,
Longman



RE: [PATCH net-next 2/2] net: dsa: lan9303: Learn addresses on CPU port when bridged

2017-10-25 Thread Woojung.Huh
Hi Egil,

> >> @@ -62,7 +80,10 @@ static struct sk_buff *lan9303_xmit(struct sk_buff
> *skb,
> >> struct net_device *dev)
> >>
> >>lan9303_tag = (u16 *)(skb->data + 2 * ETH_ALEN);
> >>lan9303_tag[0] = htons(ETH_P_8021Q);
> >> -  lan9303_tag[1] = htons(dp->index | BIT(4));
> >> +  lan9303_tag[1] = lan9303_tx_use_arl(dp, skb->data) ?
> >
> > How about using skb_mac_header(skb) than skb->data?
> >
> >> +  LAN9303_TAG_TX_USE_ALR :
> >> +  dp->index |
> >
> 
> I am not the expert here.
> 
> I see that skb_mac_header() is (skb->head + skb->mac_header). So it will
> cost a few nano seconds per packet. Not the end of the world though.
> But I see that other net/dsa/tag_*.c use skb->data, assuming that
> skb->data point to mac header.
> 

Revisited skb_mac_header(). It is basically skb->data after math.
Understand that it would be extra steps than referring skb->data directly.
Unless no one comments on this, please keep first patch.

Thanks.
Woojung


RE: [PATCH net-next 2/2] net: dsa: lan9303: Learn addresses on CPU port when bridged

2017-10-25 Thread Woojung.Huh
Hi Egil,

> >> @@ -62,7 +80,10 @@ static struct sk_buff *lan9303_xmit(struct sk_buff
> *skb,
> >> struct net_device *dev)
> >>
> >>lan9303_tag = (u16 *)(skb->data + 2 * ETH_ALEN);
> >>lan9303_tag[0] = htons(ETH_P_8021Q);
> >> -  lan9303_tag[1] = htons(dp->index | BIT(4));
> >> +  lan9303_tag[1] = lan9303_tx_use_arl(dp, skb->data) ?
> >
> > How about using skb_mac_header(skb) than skb->data?
> >
> >> +  LAN9303_TAG_TX_USE_ALR :
> >> +  dp->index |
> >
> 
> I am not the expert here.
> 
> I see that skb_mac_header() is (skb->head + skb->mac_header). So it will
> cost a few nano seconds per packet. Not the end of the world though.
> But I see that other net/dsa/tag_*.c use skb->data, assuming that
> skb->data point to mac header.
> 

Revisited skb_mac_header(). It is basically skb->data after math.
Understand that it would be extra steps than referring skb->data directly.
Unless no one comments on this, please keep first patch.

Thanks.
Woojung


Re: Re [PATCH v2] lib: optimize cpumask_next_and()

2017-10-25 Thread Yury Norov
On Wed, Oct 25, 2017 at 05:28:41PM +0200, Clement Courbet wrote:
> Thanks for the comments Yury.
> 
> > But I'd like also to keep _find_next_bit() consistent with
> > _find_next_bit_le()
> 
> Not sure I understand what you're suggesting here: Do you want a
> find_next_and_bit_le() or do you want to make _find_next_bit_le() more
> like _find_next_bit() ? In the latter case we might just want to merge
> it with _find_next_bit() and end up with an extra is_le parameter :)

Both ways will work, but I think that extra is_le is too much.
_find_next_bit_le() should be the copy of _find_next_bit() with the
addition of swapping code.

If you don't need find_next_and_bit_le(), don't add it.
find_{first,last}_bit() doesn't have LE version, for example.

Yury


Re: Re [PATCH v2] lib: optimize cpumask_next_and()

2017-10-25 Thread Yury Norov
On Wed, Oct 25, 2017 at 05:28:41PM +0200, Clement Courbet wrote:
> Thanks for the comments Yury.
> 
> > But I'd like also to keep _find_next_bit() consistent with
> > _find_next_bit_le()
> 
> Not sure I understand what you're suggesting here: Do you want a
> find_next_and_bit_le() or do you want to make _find_next_bit_le() more
> like _find_next_bit() ? In the latter case we might just want to merge
> it with _find_next_bit() and end up with an extra is_le parameter :)

Both ways will work, but I think that extra is_le is too much.
_find_next_bit_le() should be the copy of _find_next_bit() with the
addition of swapping code.

If you don't need find_next_and_bit_le(), don't add it.
find_{first,last}_bit() doesn't have LE version, for example.

Yury


Re: [RFC PATCH 0/9] V4L2 Jobs API WIP

2017-10-25 Thread Laurent Pinchart
Hello,

On Monday, 23 October 2017 11:45:01 EEST Alexandre Courbot wrote:
> On Thu, Oct 19, 2017 at 11:43 PM, Sakari Ailus  wrote:
> > On Thu, Sep 28, 2017 at 06:50:18PM +0900, Alexandre Courbot wrote:
> >> Hi everyone,
> >> 
> >> Here is a new attempt at the "request" (which I propose to rename "jobs")
> >> API for V4L2, hopefully in a manner that can converge to something that
> >> will be merged. The core ideas should be easy to grasp for those
> >> familiar with the previous attemps, yet there are a few important
> >> differences.
> >> 
> >> Most notably, user-space does not need to explicitly allocate and manage
> >> requests/jobs (but still can if this makes sense). We noticed that only
> >> specific use-cases require such an explicit management, and opted for a
> >> jobs queue that controls the flow of work over a set of opened devices.
> >> This should simplify user-space code quite a bit, while still retaining
> >> the ability to manage states explicitly like the previous request API
> >> proposals allowed to do.
> >> 
> >> The jobs API defines a few new concepts that user-space can use to
> >> control the workflow on a set of opened V4L2 devices:
> >> 
> >> A JOB QUEUE can be created from a set of opened FDs that are part of a
> >> pipeline and need to cooperate (be it capture, m2m, or media controller
> >> devices).
> >> 
> >> A JOB can then be set up with regular (if slightly modified) V4L2 ioctls,
> >> and then submitted to the job queue. Once the job queue schedules the
> >> job, its parameters (controls, etc) are applied to the devices of the
> >> queue, and itsd buffers are processed. Immediately after a job is
> >> submitted, the next job is ready to be set up without further user
> >> action.
> >> 
> >> Once a job completes, it must be dequeued and user-space can then read
> >> back its properties (notably controls) at completion time.
> >> 
> >> Internally, the state of jobs is managed through STATE HANDLERS. Each
> >> driver supporting the jobs API needs to specify an implementation of a
> >> state handler. Fortunately, most drivers can rely on the generic state
> >> handler implementation that simply records and replays a job's parameter
> >> using standard V4L2 functions. Thanks to this, adding jobs API support
> >> to a driver relying on the control framework and vb2 only requires a
> >> dozen lines of codes.
> >> 
> >> Drivers with specific needs or opportunities for optimization can however
> >> provide their own implementation of a state handler. This may in
> >> particular be beneficial for hardware that supports configuration or
> >> command buffers (thinking about VSP1 here).
> >> 
> >> This is still very early work, and focus has been on the following
> >> points:
> >> 
> >> * Provide something that anybody can test (currently using vim2m and
> >> vivid),
> >> * Reuse the current V4L2 APIs as much as possible,
> >> * Remain flexible enough to accomodate the inevitable changes that will
> >> be requested,
> >> * Keep line count low, even if functionality is missing at the moment.
> >> 
> >> Please keep this in mind while going through the patches. In particular,
> >> at the moment the parameters of a job are limited to integer controls. I
> >> know that much more is expected, but V4L2 has quite a learning curve and
> >> I preferred to focus on the general concepts for now. More is coming
> >> though! :)
> >> 
> >> I have written two small example programs that demonstrate the use of
> >> this API:
> >> 
> >> - With a codec device (vim2m):
> >> https://gist.github.com/Gnurou/34c35f1f8e278dad454b51578d239a42
> >> 
> >> - With a capture device (vivid):
> >> https://gist.github.com/Gnurou/5052e6ab41e7c55164b75d2970bc5a04
> >> 
> >> Considering the history with the request API, I don't expect everything
> >> proposed here to be welcome or understood immediately. In particular I
> >> apologize for not reusing any of the previous attempts - I was just more
> >> comfortable laying down my ideas from scratch.
> >> 
> >> If this proposal is not dismissed as complete garbage I will also be
> >> happy to discuss it in-person at the mini-summit in Prague. :)
> > 
> > Thank you for the initiative and the patchset.
> > 
> > While reviewing this patchset, I'm concentrating primarily on the approach
> > taken and the design, not so much in the actual implementation which I
> > don't think matters much at this moment.
> 
> Thanks, that is exactly how I hoped things would go for the moment.
> 
> > It's difficult to avoid seeing many similarities with the Request API
> > patches posted earlier on. And not only that, rather you have to start
> > looking for the differences in what I could call details, while important
> > design decisions could sometimes be only visible in what appear details at
> > this point.
> 
> I was not quite sure whether I should base this work on one of the
> existing patchsets (and in this case, which one) or start from
> scratch. This being my first 

Re: [f2fs-dev] [PATCH 1/2] f2fs: add missing quota_initialize in f2fs_set_acl

2017-10-25 Thread Chao Yu
On 2017/10/25 14:30, Jaegeuk Kim wrote:
> On 10/25, Chao Yu wrote:
>> On 2017/10/25 13:44, Jaegeuk Kim wrote:
>>> On 10/24, Chao Yu wrote:
 On 2017/10/24 6:14, Jaegeuk Kim wrote:
> This patch adds to call quota_intialize in f2fs_set_acl.
>
> Signed-off-by: Jaegeuk Kim 
> ---
>  fs/f2fs/acl.c | 4 
>  1 file changed, 4 insertions(+)
>
> diff --git a/fs/f2fs/acl.c b/fs/f2fs/acl.c
> index 436b3a1464d9..f6471f9d707e 100644
> --- a/fs/f2fs/acl.c
> +++ b/fs/f2fs/acl.c
> @@ -209,6 +209,10 @@ static int __f2fs_set_acl(struct inode *inode, int 
> type,
>   int error;
>   umode_t mode = inode->i_mode;
>  
> + error = dquot_initialize(inode);
> + if (error)
> + return error;

 Could you move this to f2fs_setxattr, and also add missing 
 dquot_initialize in
 unlink and rename like ext4?
>>>
>>> I've checked that f2fs_unlink and f2fs_rename are calling 
>>> dquot_initialize().
>>
>> ext4_unlink:
>>
>>  retval = dquot_initialize(dir);
>>  if (retval)
>>  return retval;
>>  retval = dquot_initialize(d_inode(dentry));
>>  if (retval)
>>  return retval;
>>
>> f2fs_unlink:
>>
>>  err = dquot_initialize(dir);
>>  if (err)
>>  return err;
>>
>> ext4_rename
>>
>>  retval = dquot_initialize(old.dir);
>>  if (retval)
>>  return retval;
>>  retval = dquot_initialize(new.dir);
>>  if (retval)
>>  return retval;
>>
>>  /* Initialize quotas before so that eventual writes go
>>   * in separate transaction */
>>  if (new.inode) {
>>  retval = dquot_initialize(new.inode);
>>  if (retval)
>>  return retval;
>>  }
>>
>> f2fs_rename
>>
>>  err = dquot_initialize(old_dir);
>>  if (err)
>>  goto out;
>>
>>  err = dquot_initialize(new_dir);
>>  if (err)
>>  goto out;
>>
>> ext4 call one more dquot_initialize than f2fs, I didn't look into this in
>> detail, but it's better to check that. :)
> 
> Ah, okay. :)
> 
> This patch adds to call quota_intialize in f2fs_set_acl, f2fs_unlink,
> and f2fs_rename.
> 
> Signed-off-by: Jaegeuk Kim 

Reviewed-by: Chao Yu 

Thanks,

> ---
>  fs/f2fs/namei.c | 9 +
>  fs/f2fs/xattr.c | 4 
>  2 files changed, 13 insertions(+)
> 
> diff --git a/fs/f2fs/namei.c b/fs/f2fs/namei.c
> index 944f7a6940b6..35d982a475b1 100644
> --- a/fs/f2fs/namei.c
> +++ b/fs/f2fs/namei.c
> @@ -436,6 +436,9 @@ static int f2fs_unlink(struct inode *dir, struct dentry 
> *dentry)
>   return -EIO;
>  
>   err = dquot_initialize(dir);
> + if (err)
> + return err;
> + err = dquot_initialize(inode);
>   if (err)
>   return err;
>  
> @@ -815,6 +818,12 @@ static int f2fs_rename(struct inode *old_dir, struct 
> dentry *old_dentry,
>   if (err)
>   goto out;
>  
> + if (new_inode) {
> + err = dquot_initialize(new_inode);
> + if (err)
> + goto out;
> + }
> +
>   old_entry = f2fs_find_entry(old_dir, _dentry->d_name, _page);
>   if (!old_entry) {
>   if (IS_ERR(old_page))
> diff --git a/fs/f2fs/xattr.c b/fs/f2fs/xattr.c
> index 147b481c6902..8801db019892 100644
> --- a/fs/f2fs/xattr.c
> +++ b/fs/f2fs/xattr.c
> @@ -686,6 +686,10 @@ int f2fs_setxattr(struct inode *inode, int index, const 
> char *name,
>   struct f2fs_sb_info *sbi = F2FS_I_SB(inode);
>   int err;
>  
> + err = dquot_initialize(inode);
> + if (err)
> + return err;
> +
>   /* this case is only from init_inode_metadata */
>   if (ipage)
>   return __f2fs_setxattr(inode, index, name, value,
> 


Re: [RFC PATCH 0/9] V4L2 Jobs API WIP

2017-10-25 Thread Laurent Pinchart
Hello,

On Monday, 23 October 2017 11:45:01 EEST Alexandre Courbot wrote:
> On Thu, Oct 19, 2017 at 11:43 PM, Sakari Ailus  wrote:
> > On Thu, Sep 28, 2017 at 06:50:18PM +0900, Alexandre Courbot wrote:
> >> Hi everyone,
> >> 
> >> Here is a new attempt at the "request" (which I propose to rename "jobs")
> >> API for V4L2, hopefully in a manner that can converge to something that
> >> will be merged. The core ideas should be easy to grasp for those
> >> familiar with the previous attemps, yet there are a few important
> >> differences.
> >> 
> >> Most notably, user-space does not need to explicitly allocate and manage
> >> requests/jobs (but still can if this makes sense). We noticed that only
> >> specific use-cases require such an explicit management, and opted for a
> >> jobs queue that controls the flow of work over a set of opened devices.
> >> This should simplify user-space code quite a bit, while still retaining
> >> the ability to manage states explicitly like the previous request API
> >> proposals allowed to do.
> >> 
> >> The jobs API defines a few new concepts that user-space can use to
> >> control the workflow on a set of opened V4L2 devices:
> >> 
> >> A JOB QUEUE can be created from a set of opened FDs that are part of a
> >> pipeline and need to cooperate (be it capture, m2m, or media controller
> >> devices).
> >> 
> >> A JOB can then be set up with regular (if slightly modified) V4L2 ioctls,
> >> and then submitted to the job queue. Once the job queue schedules the
> >> job, its parameters (controls, etc) are applied to the devices of the
> >> queue, and itsd buffers are processed. Immediately after a job is
> >> submitted, the next job is ready to be set up without further user
> >> action.
> >> 
> >> Once a job completes, it must be dequeued and user-space can then read
> >> back its properties (notably controls) at completion time.
> >> 
> >> Internally, the state of jobs is managed through STATE HANDLERS. Each
> >> driver supporting the jobs API needs to specify an implementation of a
> >> state handler. Fortunately, most drivers can rely on the generic state
> >> handler implementation that simply records and replays a job's parameter
> >> using standard V4L2 functions. Thanks to this, adding jobs API support
> >> to a driver relying on the control framework and vb2 only requires a
> >> dozen lines of codes.
> >> 
> >> Drivers with specific needs or opportunities for optimization can however
> >> provide their own implementation of a state handler. This may in
> >> particular be beneficial for hardware that supports configuration or
> >> command buffers (thinking about VSP1 here).
> >> 
> >> This is still very early work, and focus has been on the following
> >> points:
> >> 
> >> * Provide something that anybody can test (currently using vim2m and
> >> vivid),
> >> * Reuse the current V4L2 APIs as much as possible,
> >> * Remain flexible enough to accomodate the inevitable changes that will
> >> be requested,
> >> * Keep line count low, even if functionality is missing at the moment.
> >> 
> >> Please keep this in mind while going through the patches. In particular,
> >> at the moment the parameters of a job are limited to integer controls. I
> >> know that much more is expected, but V4L2 has quite a learning curve and
> >> I preferred to focus on the general concepts for now. More is coming
> >> though! :)
> >> 
> >> I have written two small example programs that demonstrate the use of
> >> this API:
> >> 
> >> - With a codec device (vim2m):
> >> https://gist.github.com/Gnurou/34c35f1f8e278dad454b51578d239a42
> >> 
> >> - With a capture device (vivid):
> >> https://gist.github.com/Gnurou/5052e6ab41e7c55164b75d2970bc5a04
> >> 
> >> Considering the history with the request API, I don't expect everything
> >> proposed here to be welcome or understood immediately. In particular I
> >> apologize for not reusing any of the previous attempts - I was just more
> >> comfortable laying down my ideas from scratch.
> >> 
> >> If this proposal is not dismissed as complete garbage I will also be
> >> happy to discuss it in-person at the mini-summit in Prague. :)
> > 
> > Thank you for the initiative and the patchset.
> > 
> > While reviewing this patchset, I'm concentrating primarily on the approach
> > taken and the design, not so much in the actual implementation which I
> > don't think matters much at this moment.
> 
> Thanks, that is exactly how I hoped things would go for the moment.
> 
> > It's difficult to avoid seeing many similarities with the Request API
> > patches posted earlier on. And not only that, rather you have to start
> > looking for the differences in what I could call details, while important
> > design decisions could sometimes be only visible in what appear details at
> > this point.
> 
> I was not quite sure whether I should base this work on one of the
> existing patchsets (and in this case, which one) or start from
> scratch. This being my first contribution to a new 

Re: [f2fs-dev] [PATCH 1/2] f2fs: add missing quota_initialize in f2fs_set_acl

2017-10-25 Thread Chao Yu
On 2017/10/25 14:30, Jaegeuk Kim wrote:
> On 10/25, Chao Yu wrote:
>> On 2017/10/25 13:44, Jaegeuk Kim wrote:
>>> On 10/24, Chao Yu wrote:
 On 2017/10/24 6:14, Jaegeuk Kim wrote:
> This patch adds to call quota_intialize in f2fs_set_acl.
>
> Signed-off-by: Jaegeuk Kim 
> ---
>  fs/f2fs/acl.c | 4 
>  1 file changed, 4 insertions(+)
>
> diff --git a/fs/f2fs/acl.c b/fs/f2fs/acl.c
> index 436b3a1464d9..f6471f9d707e 100644
> --- a/fs/f2fs/acl.c
> +++ b/fs/f2fs/acl.c
> @@ -209,6 +209,10 @@ static int __f2fs_set_acl(struct inode *inode, int 
> type,
>   int error;
>   umode_t mode = inode->i_mode;
>  
> + error = dquot_initialize(inode);
> + if (error)
> + return error;

 Could you move this to f2fs_setxattr, and also add missing 
 dquot_initialize in
 unlink and rename like ext4?
>>>
>>> I've checked that f2fs_unlink and f2fs_rename are calling 
>>> dquot_initialize().
>>
>> ext4_unlink:
>>
>>  retval = dquot_initialize(dir);
>>  if (retval)
>>  return retval;
>>  retval = dquot_initialize(d_inode(dentry));
>>  if (retval)
>>  return retval;
>>
>> f2fs_unlink:
>>
>>  err = dquot_initialize(dir);
>>  if (err)
>>  return err;
>>
>> ext4_rename
>>
>>  retval = dquot_initialize(old.dir);
>>  if (retval)
>>  return retval;
>>  retval = dquot_initialize(new.dir);
>>  if (retval)
>>  return retval;
>>
>>  /* Initialize quotas before so that eventual writes go
>>   * in separate transaction */
>>  if (new.inode) {
>>  retval = dquot_initialize(new.inode);
>>  if (retval)
>>  return retval;
>>  }
>>
>> f2fs_rename
>>
>>  err = dquot_initialize(old_dir);
>>  if (err)
>>  goto out;
>>
>>  err = dquot_initialize(new_dir);
>>  if (err)
>>  goto out;
>>
>> ext4 call one more dquot_initialize than f2fs, I didn't look into this in
>> detail, but it's better to check that. :)
> 
> Ah, okay. :)
> 
> This patch adds to call quota_intialize in f2fs_set_acl, f2fs_unlink,
> and f2fs_rename.
> 
> Signed-off-by: Jaegeuk Kim 

Reviewed-by: Chao Yu 

Thanks,

> ---
>  fs/f2fs/namei.c | 9 +
>  fs/f2fs/xattr.c | 4 
>  2 files changed, 13 insertions(+)
> 
> diff --git a/fs/f2fs/namei.c b/fs/f2fs/namei.c
> index 944f7a6940b6..35d982a475b1 100644
> --- a/fs/f2fs/namei.c
> +++ b/fs/f2fs/namei.c
> @@ -436,6 +436,9 @@ static int f2fs_unlink(struct inode *dir, struct dentry 
> *dentry)
>   return -EIO;
>  
>   err = dquot_initialize(dir);
> + if (err)
> + return err;
> + err = dquot_initialize(inode);
>   if (err)
>   return err;
>  
> @@ -815,6 +818,12 @@ static int f2fs_rename(struct inode *old_dir, struct 
> dentry *old_dentry,
>   if (err)
>   goto out;
>  
> + if (new_inode) {
> + err = dquot_initialize(new_inode);
> + if (err)
> + goto out;
> + }
> +
>   old_entry = f2fs_find_entry(old_dir, _dentry->d_name, _page);
>   if (!old_entry) {
>   if (IS_ERR(old_page))
> diff --git a/fs/f2fs/xattr.c b/fs/f2fs/xattr.c
> index 147b481c6902..8801db019892 100644
> --- a/fs/f2fs/xattr.c
> +++ b/fs/f2fs/xattr.c
> @@ -686,6 +686,10 @@ int f2fs_setxattr(struct inode *inode, int index, const 
> char *name,
>   struct f2fs_sb_info *sbi = F2FS_I_SB(inode);
>   int err;
>  
> + err = dquot_initialize(inode);
> + if (err)
> + return err;
> +
>   /* this case is only from init_inode_metadata */
>   if (ipage)
>   return __f2fs_setxattr(inode, index, name, value,
> 


Re: [PATCH] genalloc: Make the avail variable an atomic64_t

2017-10-25 Thread Logan Gunthorpe



On 25/10/17 09:32 AM, sba...@raithlin.com wrote:

From: Stephen Bates 

If the amount of resources allocated to a gen_pool exceeds 2^32 then
the avail atomic overflows and this causes problems when clients try
and borrow resources from the pool.

Add the  header to pull in atomic64 operations on
platforms that do not support them natively.

Signed-off-by: Stephen Bates 


Reviewed-by: Logan Gunthorpe 

This looks pretty straightforward to me.

Logan


Re: [PATCH] genalloc: Make the avail variable an atomic64_t

2017-10-25 Thread Logan Gunthorpe



On 25/10/17 09:32 AM, sba...@raithlin.com wrote:

From: Stephen Bates 

If the amount of resources allocated to a gen_pool exceeds 2^32 then
the avail atomic overflows and this causes problems when clients try
and borrow resources from the pool.

Add the  header to pull in atomic64 operations on
platforms that do not support them natively.

Signed-off-by: Stephen Bates 


Reviewed-by: Logan Gunthorpe 

This looks pretty straightforward to me.

Logan


Re: [PATCH v3] f2fs: add cur_reserved_blocks to support soft block reservation

2017-10-25 Thread Chao Yu
On 2017/10/25 22:06, Yunlong Song wrote:
> Hi, Chao,
>     Please see my comments below.
> 
> On 2017/10/25 20:26, Chao Yu wrote:
>> On 2017/10/25 18:02, Yunlong Song wrote:
>>> ping...
>> I've replied in this thread, check your email list please, or you can check 
>> the
>> comments in below link:
>>
>> https://patchwork.kernel.org/patch/9909407/
>>
>> Anyway, see comments below.
>>
>>> On 2017/8/18 23:09, Yunlong Song wrote:
 This patch adds cur_reserved_blocks to extend reserved_blocks sysfs
 interface to be soft threshold, which allows user configure it exceeding
 current available user space. To ensure there is enough space for
 supporting system's activation, this patch does not set the reserved space
 to the configured reserved_blocks value at once, instead, it safely
 increase cur_reserved_blocks in dev_valid_block(,node)_count to only take
 up the blocks which are just obsoleted.

 Signed-off-by: Yunlong Song 
 Signed-off-by: Chao Yu 
 ---
    Documentation/ABI/testing/sysfs-fs-f2fs |  3 ++-
    fs/f2fs/f2fs.h  | 13 +++--
    fs/f2fs/super.c |  3 ++-
    fs/f2fs/sysfs.c | 15 +--
    4 files changed, 28 insertions(+), 6 deletions(-)

 diff --git a/Documentation/ABI/testing/sysfs-fs-f2fs 
 b/Documentation/ABI/testing/sysfs-fs-f2fs
 index 11b7f4e..ba282ca 100644
 --- a/Documentation/ABI/testing/sysfs-fs-f2fs
 +++ b/Documentation/ABI/testing/sysfs-fs-f2fs
 @@ -138,7 +138,8 @@ What:    /sys/fs/f2fs//reserved_blocks
    Date:    June 2017
    Contact:    "Chao Yu" 
    Description:
 - Controls current reserved blocks in system.
 + Controls current reserved blocks in system, the threshold
 + is soft, it could exceed current available user space.
  What:    /sys/fs/f2fs//gc_urgent
    Date:    August 2017
 diff --git a/fs/f2fs/f2fs.h b/fs/f2fs/f2fs.h
 index 2f20b6b..84ccbdc 100644
 --- a/fs/f2fs/f2fs.h
 +++ b/fs/f2fs/f2fs.h
 @@ -1041,6 +1041,7 @@ struct f2fs_sb_info {
    block_t discard_blks;    /* discard command candidats */
    block_t last_valid_block_count;    /* for recovery */
    block_t reserved_blocks;    /* configurable reserved blocks */
 +    block_t cur_reserved_blocks;    /* current reserved blocks */
  u32 s_next_generation;    /* for NFS support */
    @@ -1515,7 +1516,8 @@ static inline int inc_valid_block_count(struct 
 f2fs_sb_info *sbi,
  spin_lock(>stat_lock);
    sbi->total_valid_block_count += (block_t)(*count);
 -    avail_user_block_count = sbi->user_block_count - sbi->reserved_blocks;
 +    avail_user_block_count = sbi->user_block_count -
 +    sbi->cur_reserved_blocks;
    if (unlikely(sbi->total_valid_block_count > 
 avail_user_block_count)) {
    diff = sbi->total_valid_block_count - avail_user_block_count;
    *count -= diff;
 @@ -1549,6 +1551,10 @@ static inline void dec_valid_block_count(struct 
 f2fs_sb_info *sbi,
    f2fs_bug_on(sbi, sbi->total_valid_block_count < (block_t) count);
    f2fs_bug_on(sbi, inode->i_blocks < sectors);
    sbi->total_valid_block_count -= (block_t)count;
 +    if (sbi->reserved_blocks &&
 +    sbi->reserved_blocks != sbi->cur_reserved_blocks)
>> It's redundent check here...
> I think in most cases, cur_reserved_blocks is equal to reserved_blocks, so we 
> do not need to calculate min any more, otherwise,
> if reserved_blocks is not 0, it will calculate min and set 
> current_reserved_blocks each time.

OK, IMO, in some condition, we can save dirtying cache line to decrease cache
line missing with that check.

>>
 +    sbi->cur_reserved_blocks = min(sbi->reserved_blocks,
 +    sbi->cur_reserved_blocks + count);
    spin_unlock(>stat_lock);
    f2fs_i_blocks_write(inode, count, false, true);
    }
 @@ -1695,7 +1701,7 @@ static inline int inc_valid_node_count(struct 
 f2fs_sb_info *sbi,
    spin_lock(>stat_lock);
  valid_block_count = sbi->total_valid_block_count + 1;
 -    if (unlikely(valid_block_count + sbi->reserved_blocks >
 +    if (unlikely(valid_block_count + sbi->cur_reserved_blocks >
    sbi->user_block_count)) {
    spin_unlock(>stat_lock);
    goto enospc;
 @@ -1738,6 +1744,9 @@ static inline void dec_valid_node_count(struct 
 f2fs_sb_info *sbi,
  sbi->total_valid_node_count--;
    sbi->total_valid_block_count--;
 +    if (sbi->reserved_blocks &&
 +    sbi->reserved_blocks != sbi->cur_reserved_blocks)
>> Checking low 

Re: [PATCH v3] f2fs: add cur_reserved_blocks to support soft block reservation

2017-10-25 Thread Chao Yu
On 2017/10/25 22:06, Yunlong Song wrote:
> Hi, Chao,
>     Please see my comments below.
> 
> On 2017/10/25 20:26, Chao Yu wrote:
>> On 2017/10/25 18:02, Yunlong Song wrote:
>>> ping...
>> I've replied in this thread, check your email list please, or you can check 
>> the
>> comments in below link:
>>
>> https://patchwork.kernel.org/patch/9909407/
>>
>> Anyway, see comments below.
>>
>>> On 2017/8/18 23:09, Yunlong Song wrote:
 This patch adds cur_reserved_blocks to extend reserved_blocks sysfs
 interface to be soft threshold, which allows user configure it exceeding
 current available user space. To ensure there is enough space for
 supporting system's activation, this patch does not set the reserved space
 to the configured reserved_blocks value at once, instead, it safely
 increase cur_reserved_blocks in dev_valid_block(,node)_count to only take
 up the blocks which are just obsoleted.

 Signed-off-by: Yunlong Song 
 Signed-off-by: Chao Yu 
 ---
    Documentation/ABI/testing/sysfs-fs-f2fs |  3 ++-
    fs/f2fs/f2fs.h  | 13 +++--
    fs/f2fs/super.c |  3 ++-
    fs/f2fs/sysfs.c | 15 +--
    4 files changed, 28 insertions(+), 6 deletions(-)

 diff --git a/Documentation/ABI/testing/sysfs-fs-f2fs 
 b/Documentation/ABI/testing/sysfs-fs-f2fs
 index 11b7f4e..ba282ca 100644
 --- a/Documentation/ABI/testing/sysfs-fs-f2fs
 +++ b/Documentation/ABI/testing/sysfs-fs-f2fs
 @@ -138,7 +138,8 @@ What:    /sys/fs/f2fs//reserved_blocks
    Date:    June 2017
    Contact:    "Chao Yu" 
    Description:
 - Controls current reserved blocks in system.
 + Controls current reserved blocks in system, the threshold
 + is soft, it could exceed current available user space.
  What:    /sys/fs/f2fs//gc_urgent
    Date:    August 2017
 diff --git a/fs/f2fs/f2fs.h b/fs/f2fs/f2fs.h
 index 2f20b6b..84ccbdc 100644
 --- a/fs/f2fs/f2fs.h
 +++ b/fs/f2fs/f2fs.h
 @@ -1041,6 +1041,7 @@ struct f2fs_sb_info {
    block_t discard_blks;    /* discard command candidats */
    block_t last_valid_block_count;    /* for recovery */
    block_t reserved_blocks;    /* configurable reserved blocks */
 +    block_t cur_reserved_blocks;    /* current reserved blocks */
  u32 s_next_generation;    /* for NFS support */
    @@ -1515,7 +1516,8 @@ static inline int inc_valid_block_count(struct 
 f2fs_sb_info *sbi,
  spin_lock(>stat_lock);
    sbi->total_valid_block_count += (block_t)(*count);
 -    avail_user_block_count = sbi->user_block_count - sbi->reserved_blocks;
 +    avail_user_block_count = sbi->user_block_count -
 +    sbi->cur_reserved_blocks;
    if (unlikely(sbi->total_valid_block_count > 
 avail_user_block_count)) {
    diff = sbi->total_valid_block_count - avail_user_block_count;
    *count -= diff;
 @@ -1549,6 +1551,10 @@ static inline void dec_valid_block_count(struct 
 f2fs_sb_info *sbi,
    f2fs_bug_on(sbi, sbi->total_valid_block_count < (block_t) count);
    f2fs_bug_on(sbi, inode->i_blocks < sectors);
    sbi->total_valid_block_count -= (block_t)count;
 +    if (sbi->reserved_blocks &&
 +    sbi->reserved_blocks != sbi->cur_reserved_blocks)
>> It's redundent check here...
> I think in most cases, cur_reserved_blocks is equal to reserved_blocks, so we 
> do not need to calculate min any more, otherwise,
> if reserved_blocks is not 0, it will calculate min and set 
> current_reserved_blocks each time.

OK, IMO, in some condition, we can save dirtying cache line to decrease cache
line missing with that check.

>>
 +    sbi->cur_reserved_blocks = min(sbi->reserved_blocks,
 +    sbi->cur_reserved_blocks + count);
    spin_unlock(>stat_lock);
    f2fs_i_blocks_write(inode, count, false, true);
    }
 @@ -1695,7 +1701,7 @@ static inline int inc_valid_node_count(struct 
 f2fs_sb_info *sbi,
    spin_lock(>stat_lock);
  valid_block_count = sbi->total_valid_block_count + 1;
 -    if (unlikely(valid_block_count + sbi->reserved_blocks >
 +    if (unlikely(valid_block_count + sbi->cur_reserved_blocks >
    sbi->user_block_count)) {
    spin_unlock(>stat_lock);
    goto enospc;
 @@ -1738,6 +1744,9 @@ static inline void dec_valid_node_count(struct 
 f2fs_sb_info *sbi,
  sbi->total_valid_node_count--;
    sbi->total_valid_block_count--;
 +    if (sbi->reserved_blocks &&
 +    sbi->reserved_blocks != sbi->cur_reserved_blocks)
>> Checking low boundary is more safe here.
> I think cur_reserved_blocks can 

Re: [1/2] drivers: firmware: psci: Add psci_is_available()

2017-10-25 Thread Lorenzo Pieralisi
On Wed, Oct 11, 2017 at 10:03:01AM +0200, Geert Uytterhoeven wrote:
> PSCI support may be disabled at build time (by configuration) or at
> run-time (PSCI firmware not present).  While CONFIG_ARM_PSCI_FW can be
> used to check for build time enablement, there is currently no simple
> way to check if PSCI is actually available and used.
> 
> Hence add a helper function to check if PSCI is available.

Hi Geert,

excuse us the delay in responding. I think it would be better if the
check just carries out a DT/ACPI matching check rather than being based
on PSCI ops initialization but before doing that I would like first to
understand what this function can be actually used for (ie I do not
think the usage in the PSCI checker is relevant to this discussion).

Thanks,
Lorenzo

> This is useful for e.g. drivers that are used on platforms with and
> without PSCI.  Such drivers may need to take provisions for proper
> operation when PSCI is used, and/or to implement functionality that is
> usually provided by PSCI.
> 
> Signed-off-by: Geert Uytterhoeven 
> ---
>  drivers/firmware/psci.c | 5 +
>  include/linux/psci.h| 2 ++
>  2 files changed, 7 insertions(+)
> 
> diff --git a/drivers/firmware/psci.c b/drivers/firmware/psci.c
> index da469c972b503f83..a3a11e2d8aaa 100644
> --- a/drivers/firmware/psci.c
> +++ b/drivers/firmware/psci.c
> @@ -670,6 +670,11 @@ int __init psci_dt_init(void)
>   return init_fn(np);
>  }
>  
> +bool psci_is_available(void)
> +{
> + return psci_ops.cpu_off && psci_ops.cpu_on && psci_ops.cpu_suspend;
> +}
> +
>  #ifdef CONFIG_ACPI
>  /*
>   * We use PSCI 0.2+ when ACPI is deployed on ARM64 and it's
> diff --git a/include/linux/psci.h b/include/linux/psci.h
> index bdea1cb5e1db142b..2bdee325aeb80cf6 100644
> --- a/include/linux/psci.h
> +++ b/include/linux/psci.h
> @@ -39,8 +39,10 @@ extern struct psci_operations psci_ops;
>  
>  #if defined(CONFIG_ARM_PSCI_FW)
>  int __init psci_dt_init(void);
> +bool psci_is_available(void);
>  #else
>  static inline int psci_dt_init(void) { return 0; }
> +static inline bool psci_is_available(void) { return false; }
>  #endif
>  
>  #if defined(CONFIG_ARM_PSCI_FW) && defined(CONFIG_ACPI)


Re: [1/2] drivers: firmware: psci: Add psci_is_available()

2017-10-25 Thread Lorenzo Pieralisi
On Wed, Oct 11, 2017 at 10:03:01AM +0200, Geert Uytterhoeven wrote:
> PSCI support may be disabled at build time (by configuration) or at
> run-time (PSCI firmware not present).  While CONFIG_ARM_PSCI_FW can be
> used to check for build time enablement, there is currently no simple
> way to check if PSCI is actually available and used.
> 
> Hence add a helper function to check if PSCI is available.

Hi Geert,

excuse us the delay in responding. I think it would be better if the
check just carries out a DT/ACPI matching check rather than being based
on PSCI ops initialization but before doing that I would like first to
understand what this function can be actually used for (ie I do not
think the usage in the PSCI checker is relevant to this discussion).

Thanks,
Lorenzo

> This is useful for e.g. drivers that are used on platforms with and
> without PSCI.  Such drivers may need to take provisions for proper
> operation when PSCI is used, and/or to implement functionality that is
> usually provided by PSCI.
> 
> Signed-off-by: Geert Uytterhoeven 
> ---
>  drivers/firmware/psci.c | 5 +
>  include/linux/psci.h| 2 ++
>  2 files changed, 7 insertions(+)
> 
> diff --git a/drivers/firmware/psci.c b/drivers/firmware/psci.c
> index da469c972b503f83..a3a11e2d8aaa 100644
> --- a/drivers/firmware/psci.c
> +++ b/drivers/firmware/psci.c
> @@ -670,6 +670,11 @@ int __init psci_dt_init(void)
>   return init_fn(np);
>  }
>  
> +bool psci_is_available(void)
> +{
> + return psci_ops.cpu_off && psci_ops.cpu_on && psci_ops.cpu_suspend;
> +}
> +
>  #ifdef CONFIG_ACPI
>  /*
>   * We use PSCI 0.2+ when ACPI is deployed on ARM64 and it's
> diff --git a/include/linux/psci.h b/include/linux/psci.h
> index bdea1cb5e1db142b..2bdee325aeb80cf6 100644
> --- a/include/linux/psci.h
> +++ b/include/linux/psci.h
> @@ -39,8 +39,10 @@ extern struct psci_operations psci_ops;
>  
>  #if defined(CONFIG_ARM_PSCI_FW)
>  int __init psci_dt_init(void);
> +bool psci_is_available(void);
>  #else
>  static inline int psci_dt_init(void) { return 0; }
> +static inline bool psci_is_available(void) { return false; }
>  #endif
>  
>  #if defined(CONFIG_ARM_PSCI_FW) && defined(CONFIG_ACPI)


[PATCH] genalloc: Make the avail variable an atomic64_t

2017-10-25 Thread sbates
From: Stephen Bates 

If the amount of resources allocated to a gen_pool exceeds 2^32 then
the avail atomic overflows and this causes problems when clients try
and borrow resources from the pool.

Add the  header to pull in atomic64 operations on
platforms that do not support them natively.

Signed-off-by: Stephen Bates 
---
 include/linux/genalloc.h |  3 ++-
 lib/genalloc.c   | 10 +-
 2 files changed, 7 insertions(+), 6 deletions(-)

diff --git a/include/linux/genalloc.h b/include/linux/genalloc.h
index 6dfec4d..b327c31 100644
--- a/include/linux/genalloc.h
+++ b/include/linux/genalloc.h
@@ -32,6 +32,7 @@
 
 #include 
 #include 
+#include 
 
 struct device;
 struct device_node;
@@ -71,7 +72,7 @@ struct gen_pool {
  */
 struct gen_pool_chunk {
struct list_head next_chunk;/* next chunk in pool */
-   atomic_t avail;
+   atomic64_t avail;
phys_addr_t phys_addr;  /* physical starting address of memory 
chunk */
unsigned long start_addr;   /* start address of memory chunk */
unsigned long end_addr; /* end address of memory chunk 
(inclusive) */
diff --git a/lib/genalloc.c b/lib/genalloc.c
index 144fe6b..a97df2b 100644
--- a/lib/genalloc.c
+++ b/lib/genalloc.c
@@ -194,7 +194,7 @@ int gen_pool_add_virt(struct gen_pool *pool, unsigned long 
virt, phys_addr_t phy
chunk->phys_addr = phys;
chunk->start_addr = virt;
chunk->end_addr = virt + size - 1;
-   atomic_set(>avail, size);
+   atomic64_set(>avail, size);
 
spin_lock(>lock);
list_add_rcu(>next_chunk, >chunks);
@@ -304,7 +304,7 @@ unsigned long gen_pool_alloc_algo(struct gen_pool *pool, 
size_t size,
nbits = (size + (1UL << order) - 1) >> order;
rcu_read_lock();
list_for_each_entry_rcu(chunk, >chunks, next_chunk) {
-   if (size > atomic_read(>avail))
+   if (size > atomic64_read(>avail))
continue;
 
start_bit = 0;
@@ -324,7 +324,7 @@ unsigned long gen_pool_alloc_algo(struct gen_pool *pool, 
size_t size,
 
addr = chunk->start_addr + ((unsigned long)start_bit << order);
size = nbits << order;
-   atomic_sub(size, >avail);
+   atomic64_sub(size, >avail);
break;
}
rcu_read_unlock();
@@ -390,7 +390,7 @@ void gen_pool_free(struct gen_pool *pool, unsigned long 
addr, size_t size)
remain = bitmap_clear_ll(chunk->bits, start_bit, nbits);
BUG_ON(remain);
size = nbits << order;
-   atomic_add(size, >avail);
+   atomic64_add(size, >avail);
rcu_read_unlock();
return;
}
@@ -464,7 +464,7 @@ size_t gen_pool_avail(struct gen_pool *pool)
 
rcu_read_lock();
list_for_each_entry_rcu(chunk, >chunks, next_chunk)
-   avail += atomic_read(>avail);
+   avail += atomic64_read(>avail);
rcu_read_unlock();
return avail;
 }
-- 
2.7.4



[PATCH] genalloc: Make the avail variable an atomic64_t

2017-10-25 Thread sbates
From: Stephen Bates 

If the amount of resources allocated to a gen_pool exceeds 2^32 then
the avail atomic overflows and this causes problems when clients try
and borrow resources from the pool.

Add the  header to pull in atomic64 operations on
platforms that do not support them natively.

Signed-off-by: Stephen Bates 
---
 include/linux/genalloc.h |  3 ++-
 lib/genalloc.c   | 10 +-
 2 files changed, 7 insertions(+), 6 deletions(-)

diff --git a/include/linux/genalloc.h b/include/linux/genalloc.h
index 6dfec4d..b327c31 100644
--- a/include/linux/genalloc.h
+++ b/include/linux/genalloc.h
@@ -32,6 +32,7 @@
 
 #include 
 #include 
+#include 
 
 struct device;
 struct device_node;
@@ -71,7 +72,7 @@ struct gen_pool {
  */
 struct gen_pool_chunk {
struct list_head next_chunk;/* next chunk in pool */
-   atomic_t avail;
+   atomic64_t avail;
phys_addr_t phys_addr;  /* physical starting address of memory 
chunk */
unsigned long start_addr;   /* start address of memory chunk */
unsigned long end_addr; /* end address of memory chunk 
(inclusive) */
diff --git a/lib/genalloc.c b/lib/genalloc.c
index 144fe6b..a97df2b 100644
--- a/lib/genalloc.c
+++ b/lib/genalloc.c
@@ -194,7 +194,7 @@ int gen_pool_add_virt(struct gen_pool *pool, unsigned long 
virt, phys_addr_t phy
chunk->phys_addr = phys;
chunk->start_addr = virt;
chunk->end_addr = virt + size - 1;
-   atomic_set(>avail, size);
+   atomic64_set(>avail, size);
 
spin_lock(>lock);
list_add_rcu(>next_chunk, >chunks);
@@ -304,7 +304,7 @@ unsigned long gen_pool_alloc_algo(struct gen_pool *pool, 
size_t size,
nbits = (size + (1UL << order) - 1) >> order;
rcu_read_lock();
list_for_each_entry_rcu(chunk, >chunks, next_chunk) {
-   if (size > atomic_read(>avail))
+   if (size > atomic64_read(>avail))
continue;
 
start_bit = 0;
@@ -324,7 +324,7 @@ unsigned long gen_pool_alloc_algo(struct gen_pool *pool, 
size_t size,
 
addr = chunk->start_addr + ((unsigned long)start_bit << order);
size = nbits << order;
-   atomic_sub(size, >avail);
+   atomic64_sub(size, >avail);
break;
}
rcu_read_unlock();
@@ -390,7 +390,7 @@ void gen_pool_free(struct gen_pool *pool, unsigned long 
addr, size_t size)
remain = bitmap_clear_ll(chunk->bits, start_bit, nbits);
BUG_ON(remain);
size = nbits << order;
-   atomic_add(size, >avail);
+   atomic64_add(size, >avail);
rcu_read_unlock();
return;
}
@@ -464,7 +464,7 @@ size_t gen_pool_avail(struct gen_pool *pool)
 
rcu_read_lock();
list_for_each_entry_rcu(chunk, >chunks, next_chunk)
-   avail += atomic_read(>avail);
+   avail += atomic64_read(>avail);
rcu_read_unlock();
return avail;
 }
-- 
2.7.4



Re: [PATCH 6/8] PCI: host: brcmstb: add MSI capability

2017-10-25 Thread Jim Quinlan
On Tue, Oct 24, 2017 at 2:57 PM, Florian Fainelli  wrote:
> Hi Jim,
>
> On 10/24/2017 11:15 AM, Jim Quinlan wrote:
>> This commit adds MSI to the Broadcom STB PCIe host controller. It does
>> not add MSIX since that functionality is not in the HW.  The MSI
>> controller is physically located within the PCIe block, however, there
>> is no reason why the MSI controller could not be moved elsewhere in
>> the future.
>>
>> Since the internal Brcmstb MSI controller is intertwined with the PCIe
>> controller, it is not its own platform device but rather part of the
>> PCIe platform device.
>>
>> Signed-off-by: Jim Quinlan 
>> ---
>>  drivers/pci/host/Kconfig   |  12 ++
>>  drivers/pci/host/Makefile  |   1 +
>>  drivers/pci/host/pci-brcmstb-msi.c | 318 
>> +
>>  drivers/pci/host/pci-brcmstb.c |  72 +++--
>>  drivers/pci/host/pci-brcmstb.h |  26 +++
>>  5 files changed, 419 insertions(+), 10 deletions(-)
>>  create mode 100644 drivers/pci/host/pci-brcmstb-msi.c
>>
>> diff --git a/drivers/pci/host/Kconfig b/drivers/pci/host/Kconfig
>> index b9b4f11..54aa5d2 100644
>> --- a/drivers/pci/host/Kconfig
>> +++ b/drivers/pci/host/Kconfig
>> @@ -228,4 +228,16 @@ config PCI_BRCMSTB
>>   default ARCH_BRCMSTB || BMIPS_GENERIC
>>   help
>> Adds support for Broadcom Settop Box PCIe host controller.
>> +   To compile this driver as a module, choose m here.
>> +
>> +config PCI_BRCMSTB_MSI
>> + bool "Broadcom Brcmstb PCIe MSI support"
>> + depends on ARCH_BRCMSTB || BMIPS_GENERIC
>
> This could probably be depends on PCI_BRCMSTB, which would imply these
> two conditions. PCI_BRCMSTB_MSI on its own is probably not very useful
> without the parent RC driver.
>
>> + depends on OF
>> + depends on PCI_MSI
>> + default PCI_BRCMSTB
>> + help
>> +   Say Y here if you want to enable MSI support for Broadcom's iProc
>> +   PCIe controller
>> +
>>  endmenu
>> diff --git a/drivers/pci/host/Makefile b/drivers/pci/host/Makefile
>> index c283321..1026d6f 100644
>> --- a/drivers/pci/host/Makefile
>> +++ b/drivers/pci/host/Makefile
>> @@ -23,6 +23,7 @@ obj-$(CONFIG_PCIE_TANGO_SMP8759) += pcie-tango.o
>>  obj-$(CONFIG_VMD) += vmd.o
>>  obj-$(CONFIG_PCI_BRCMSTB) += brcmstb-pci.o
>>  brcmstb-pci-objs := pci-brcmstb.o pci-brcmstb-dma.o
>> +obj-$(CONFIG_PCI_BRCMSTB_MSI) += pci-brcmstb-msi.o
>
> Should we combine this file with the brcmstb-pci.o? There is probably no
> functional difference, except that pci-brcmstb-msi.ko needs to be loaded
> first, right?
> --
> Florian

If you look at the pci/host/Kconfig you will see that other drivers
also have a separate MSI config (eg iproc, altera, xgene) so there is
precedent.  The reason that pci-brcmstb-msi.c is its own file is
because it depends on an irq function that is not exported.  That is
why CONFIG_PCI_BRCMSTB_MSI is bool, and CONFIG_PCI_BRCMSTB is
tristate.  -- Jim


Re: [PATCH 6/8] PCI: host: brcmstb: add MSI capability

2017-10-25 Thread Jim Quinlan
On Tue, Oct 24, 2017 at 2:57 PM, Florian Fainelli  wrote:
> Hi Jim,
>
> On 10/24/2017 11:15 AM, Jim Quinlan wrote:
>> This commit adds MSI to the Broadcom STB PCIe host controller. It does
>> not add MSIX since that functionality is not in the HW.  The MSI
>> controller is physically located within the PCIe block, however, there
>> is no reason why the MSI controller could not be moved elsewhere in
>> the future.
>>
>> Since the internal Brcmstb MSI controller is intertwined with the PCIe
>> controller, it is not its own platform device but rather part of the
>> PCIe platform device.
>>
>> Signed-off-by: Jim Quinlan 
>> ---
>>  drivers/pci/host/Kconfig   |  12 ++
>>  drivers/pci/host/Makefile  |   1 +
>>  drivers/pci/host/pci-brcmstb-msi.c | 318 
>> +
>>  drivers/pci/host/pci-brcmstb.c |  72 +++--
>>  drivers/pci/host/pci-brcmstb.h |  26 +++
>>  5 files changed, 419 insertions(+), 10 deletions(-)
>>  create mode 100644 drivers/pci/host/pci-brcmstb-msi.c
>>
>> diff --git a/drivers/pci/host/Kconfig b/drivers/pci/host/Kconfig
>> index b9b4f11..54aa5d2 100644
>> --- a/drivers/pci/host/Kconfig
>> +++ b/drivers/pci/host/Kconfig
>> @@ -228,4 +228,16 @@ config PCI_BRCMSTB
>>   default ARCH_BRCMSTB || BMIPS_GENERIC
>>   help
>> Adds support for Broadcom Settop Box PCIe host controller.
>> +   To compile this driver as a module, choose m here.
>> +
>> +config PCI_BRCMSTB_MSI
>> + bool "Broadcom Brcmstb PCIe MSI support"
>> + depends on ARCH_BRCMSTB || BMIPS_GENERIC
>
> This could probably be depends on PCI_BRCMSTB, which would imply these
> two conditions. PCI_BRCMSTB_MSI on its own is probably not very useful
> without the parent RC driver.
>
>> + depends on OF
>> + depends on PCI_MSI
>> + default PCI_BRCMSTB
>> + help
>> +   Say Y here if you want to enable MSI support for Broadcom's iProc
>> +   PCIe controller
>> +
>>  endmenu
>> diff --git a/drivers/pci/host/Makefile b/drivers/pci/host/Makefile
>> index c283321..1026d6f 100644
>> --- a/drivers/pci/host/Makefile
>> +++ b/drivers/pci/host/Makefile
>> @@ -23,6 +23,7 @@ obj-$(CONFIG_PCIE_TANGO_SMP8759) += pcie-tango.o
>>  obj-$(CONFIG_VMD) += vmd.o
>>  obj-$(CONFIG_PCI_BRCMSTB) += brcmstb-pci.o
>>  brcmstb-pci-objs := pci-brcmstb.o pci-brcmstb-dma.o
>> +obj-$(CONFIG_PCI_BRCMSTB_MSI) += pci-brcmstb-msi.o
>
> Should we combine this file with the brcmstb-pci.o? There is probably no
> functional difference, except that pci-brcmstb-msi.ko needs to be loaded
> first, right?
> --
> Florian

If you look at the pci/host/Kconfig you will see that other drivers
also have a separate MSI config (eg iproc, altera, xgene) so there is
precedent.  The reason that pci-brcmstb-msi.c is its own file is
because it depends on an irq function that is not exported.  That is
why CONFIG_PCI_BRCMSTB_MSI is bool, and CONFIG_PCI_BRCMSTB is
tristate.  -- Jim


Re: [PATCH] powerpc/powernv: Enable reset_devices parameter to issue a PHB reset

2017-10-25 Thread Guilherme G. Piccoli
V2 just sent to linuxppc-dev[0] list, with some simplifications.
This one is then officially dropped!

Thanks,


Guilherme

[0] http://patchwork.ozlabs.org/patch/830320



Re: [PATCH] powerpc/powernv: Enable reset_devices parameter to issue a PHB reset

2017-10-25 Thread Guilherme G. Piccoli
V2 just sent to linuxppc-dev[0] list, with some simplifications.
This one is then officially dropped!

Thanks,


Guilherme

[0] http://patchwork.ozlabs.org/patch/830320



Re [PATCH v2] lib: optimize cpumask_next_and()

2017-10-25 Thread Clement Courbet
Thanks for the comments Yury.

> But I'd like also to keep _find_next_bit() consistent with
> _find_next_bit_le()

Not sure I understand what you're suggesting here: Do you want a
find_next_and_bit_le() or do you want to make _find_next_bit_le() more
like _find_next_bit() ? In the latter case we might just want to merge
it with _find_next_bit() and end up with an extra is_le parameter :)



Re [PATCH v2] lib: optimize cpumask_next_and()

2017-10-25 Thread Clement Courbet
Thanks for the comments Yury.

> But I'd like also to keep _find_next_bit() consistent with
> _find_next_bit_le()

Not sure I understand what you're suggesting here: Do you want a
find_next_and_bit_le() or do you want to make _find_next_bit_le() more
like _find_next_bit() ? In the latter case we might just want to merge
it with _find_next_bit() and end up with an extra is_le parameter :)



[PATCH v8 1/2] crypto: s5p-sss: Change spaces to tabs

2017-10-25 Thread Kamil Konieczny
Change #define lines to use tabs consistently.

Acked-by: Vladimir Zapolskiy 
Reviewed-by: Krzysztof Kozlowski 
Signed-off-by: Kamil Konieczny 
---
 drivers/crypto/s5p-sss.c | 190 +++
 1 file changed, 95 insertions(+), 95 deletions(-)

diff --git a/drivers/crypto/s5p-sss.c b/drivers/crypto/s5p-sss.c
index 7ac657f46d15..dfae1865c384 100644
--- a/drivers/crypto/s5p-sss.c
+++ b/drivers/crypto/s5p-sss.c
@@ -30,98 +30,98 @@
 #include 
 #include 
 
-#define _SBF(s, v)  ((v) << (s))
+#define _SBF(s, v) ((v) << (s))
 
 /* Feed control registers */
-#define SSS_REG_FCINTSTAT   0x
-#define SSS_FCINTSTAT_BRDMAINT  BIT(3)
-#define SSS_FCINTSTAT_BTDMAINT  BIT(2)
-#define SSS_FCINTSTAT_HRDMAINT  BIT(1)
-#define SSS_FCINTSTAT_PKDMAINT  BIT(0)
-
-#define SSS_REG_FCINTENSET  0x0004
-#define SSS_FCINTENSET_BRDMAINTENSETBIT(3)
-#define SSS_FCINTENSET_BTDMAINTENSETBIT(2)
-#define SSS_FCINTENSET_HRDMAINTENSETBIT(1)
-#define SSS_FCINTENSET_PKDMAINTENSETBIT(0)
-
-#define SSS_REG_FCINTENCLR  0x0008
-#define SSS_FCINTENCLR_BRDMAINTENCLRBIT(3)
-#define SSS_FCINTENCLR_BTDMAINTENCLRBIT(2)
-#define SSS_FCINTENCLR_HRDMAINTENCLRBIT(1)
-#define SSS_FCINTENCLR_PKDMAINTENCLRBIT(0)
-
-#define SSS_REG_FCINTPEND   0x000C
-#define SSS_FCINTPEND_BRDMAINTP BIT(3)
-#define SSS_FCINTPEND_BTDMAINTP BIT(2)
-#define SSS_FCINTPEND_HRDMAINTP BIT(1)
-#define SSS_FCINTPEND_PKDMAINTP BIT(0)
-
-#define SSS_REG_FCFIFOSTAT  0x0010
-#define SSS_FCFIFOSTAT_BRFIFOFULBIT(7)
-#define SSS_FCFIFOSTAT_BRFIFOEMPBIT(6)
-#define SSS_FCFIFOSTAT_BTFIFOFULBIT(5)
-#define SSS_FCFIFOSTAT_BTFIFOEMPBIT(4)
-#define SSS_FCFIFOSTAT_HRFIFOFULBIT(3)
-#define SSS_FCFIFOSTAT_HRFIFOEMPBIT(2)
-#define SSS_FCFIFOSTAT_PKFIFOFULBIT(1)
-#define SSS_FCFIFOSTAT_PKFIFOEMPBIT(0)
-
-#define SSS_REG_FCFIFOCTRL  0x0014
-#define SSS_FCFIFOCTRL_DESSEL   BIT(2)
-#define SSS_HASHIN_INDEPENDENT  _SBF(0, 0x00)
-#define SSS_HASHIN_CIPHER_INPUT _SBF(0, 0x01)
-#define SSS_HASHIN_CIPHER_OUTPUT_SBF(0, 0x02)
-
-#define SSS_REG_FCBRDMAS0x0020
-#define SSS_REG_FCBRDMAL0x0024
-#define SSS_REG_FCBRDMAC0x0028
-#define SSS_FCBRDMAC_BYTESWAP   BIT(1)
-#define SSS_FCBRDMAC_FLUSH  BIT(0)
-
-#define SSS_REG_FCBTDMAS0x0030
-#define SSS_REG_FCBTDMAL0x0034
-#define SSS_REG_FCBTDMAC0x0038
-#define SSS_FCBTDMAC_BYTESWAP   BIT(1)
-#define SSS_FCBTDMAC_FLUSH  BIT(0)
-
-#define SSS_REG_FCHRDMAS0x0040
-#define SSS_REG_FCHRDMAL0x0044
-#define SSS_REG_FCHRDMAC0x0048
-#define SSS_FCHRDMAC_BYTESWAP   BIT(1)
-#define SSS_FCHRDMAC_FLUSH  BIT(0)
-
-#define SSS_REG_FCPKDMAS0x0050
-#define SSS_REG_FCPKDMAL0x0054
-#define SSS_REG_FCPKDMAC0x0058
-#define SSS_FCPKDMAC_BYTESWAP   BIT(3)
-#define SSS_FCPKDMAC_DESCENDBIT(2)
-#define SSS_FCPKDMAC_TRANSMIT   BIT(1)
-#define SSS_FCPKDMAC_FLUSH  BIT(0)
-
-#define SSS_REG_FCPKDMAO0x005C
+#define SSS_REG_FCINTSTAT  0x
+#define SSS_FCINTSTAT_BRDMAINT BIT(3)
+#define SSS_FCINTSTAT_BTDMAINT BIT(2)
+#define SSS_FCINTSTAT_HRDMAINT BIT(1)
+#define SSS_FCINTSTAT_PKDMAINT BIT(0)
+
+#define SSS_REG_FCINTENSET 0x0004
+#define SSS_FCINTENSET_BRDMAINTENSET   BIT(3)
+#define SSS_FCINTENSET_BTDMAINTENSET   BIT(2)
+#define SSS_FCINTENSET_HRDMAINTENSET   BIT(1)
+#define SSS_FCINTENSET_PKDMAINTENSET   BIT(0)
+
+#define SSS_REG_FCINTENCLR 0x0008
+#define SSS_FCINTENCLR_BRDMAINTENCLR   BIT(3)
+#define SSS_FCINTENCLR_BTDMAINTENCLR   BIT(2)
+#define SSS_FCINTENCLR_HRDMAINTENCLR   BIT(1)
+#define SSS_FCINTENCLR_PKDMAINTENCLR   BIT(0)
+
+#define SSS_REG_FCINTPEND  0x000C
+#define SSS_FCINTPEND_BRDMAINTPBIT(3)
+#define SSS_FCINTPEND_BTDMAINTPBIT(2)
+#define SSS_FCINTPEND_HRDMAINTPBIT(1)
+#define SSS_FCINTPEND_PKDMAINTPBIT(0)
+
+#define SSS_REG_FCFIFOSTAT 0x0010
+#define SSS_FCFIFOSTAT_BRFIFOFUL   BIT(7)
+#define SSS_FCFIFOSTAT_BRFIFOEMP   BIT(6)
+#define SSS_FCFIFOSTAT_BTFIFOFUL   BIT(5)
+#define SSS_FCFIFOSTAT_BTFIFOEMP   BIT(4)
+#define SSS_FCFIFOSTAT_HRFIFOFUL   BIT(3)
+#define SSS_FCFIFOSTAT_HRFIFOEMP   BIT(2)
+#define SSS_FCFIFOSTAT_PKFIFOFUL   BIT(1)
+#define SSS_FCFIFOSTAT_PKFIFOEMP   BIT(0)
+
+#define SSS_REG_FCFIFOCTRL 0x0014
+#define SSS_FCFIFOCTRL_DESSEL  BIT(2)
+#define 

[PATCH v8 1/2] crypto: s5p-sss: Change spaces to tabs

2017-10-25 Thread Kamil Konieczny
Change #define lines to use tabs consistently.

Acked-by: Vladimir Zapolskiy 
Reviewed-by: Krzysztof Kozlowski 
Signed-off-by: Kamil Konieczny 
---
 drivers/crypto/s5p-sss.c | 190 +++
 1 file changed, 95 insertions(+), 95 deletions(-)

diff --git a/drivers/crypto/s5p-sss.c b/drivers/crypto/s5p-sss.c
index 7ac657f46d15..dfae1865c384 100644
--- a/drivers/crypto/s5p-sss.c
+++ b/drivers/crypto/s5p-sss.c
@@ -30,98 +30,98 @@
 #include 
 #include 
 
-#define _SBF(s, v)  ((v) << (s))
+#define _SBF(s, v) ((v) << (s))
 
 /* Feed control registers */
-#define SSS_REG_FCINTSTAT   0x
-#define SSS_FCINTSTAT_BRDMAINT  BIT(3)
-#define SSS_FCINTSTAT_BTDMAINT  BIT(2)
-#define SSS_FCINTSTAT_HRDMAINT  BIT(1)
-#define SSS_FCINTSTAT_PKDMAINT  BIT(0)
-
-#define SSS_REG_FCINTENSET  0x0004
-#define SSS_FCINTENSET_BRDMAINTENSETBIT(3)
-#define SSS_FCINTENSET_BTDMAINTENSETBIT(2)
-#define SSS_FCINTENSET_HRDMAINTENSETBIT(1)
-#define SSS_FCINTENSET_PKDMAINTENSETBIT(0)
-
-#define SSS_REG_FCINTENCLR  0x0008
-#define SSS_FCINTENCLR_BRDMAINTENCLRBIT(3)
-#define SSS_FCINTENCLR_BTDMAINTENCLRBIT(2)
-#define SSS_FCINTENCLR_HRDMAINTENCLRBIT(1)
-#define SSS_FCINTENCLR_PKDMAINTENCLRBIT(0)
-
-#define SSS_REG_FCINTPEND   0x000C
-#define SSS_FCINTPEND_BRDMAINTP BIT(3)
-#define SSS_FCINTPEND_BTDMAINTP BIT(2)
-#define SSS_FCINTPEND_HRDMAINTP BIT(1)
-#define SSS_FCINTPEND_PKDMAINTP BIT(0)
-
-#define SSS_REG_FCFIFOSTAT  0x0010
-#define SSS_FCFIFOSTAT_BRFIFOFULBIT(7)
-#define SSS_FCFIFOSTAT_BRFIFOEMPBIT(6)
-#define SSS_FCFIFOSTAT_BTFIFOFULBIT(5)
-#define SSS_FCFIFOSTAT_BTFIFOEMPBIT(4)
-#define SSS_FCFIFOSTAT_HRFIFOFULBIT(3)
-#define SSS_FCFIFOSTAT_HRFIFOEMPBIT(2)
-#define SSS_FCFIFOSTAT_PKFIFOFULBIT(1)
-#define SSS_FCFIFOSTAT_PKFIFOEMPBIT(0)
-
-#define SSS_REG_FCFIFOCTRL  0x0014
-#define SSS_FCFIFOCTRL_DESSEL   BIT(2)
-#define SSS_HASHIN_INDEPENDENT  _SBF(0, 0x00)
-#define SSS_HASHIN_CIPHER_INPUT _SBF(0, 0x01)
-#define SSS_HASHIN_CIPHER_OUTPUT_SBF(0, 0x02)
-
-#define SSS_REG_FCBRDMAS0x0020
-#define SSS_REG_FCBRDMAL0x0024
-#define SSS_REG_FCBRDMAC0x0028
-#define SSS_FCBRDMAC_BYTESWAP   BIT(1)
-#define SSS_FCBRDMAC_FLUSH  BIT(0)
-
-#define SSS_REG_FCBTDMAS0x0030
-#define SSS_REG_FCBTDMAL0x0034
-#define SSS_REG_FCBTDMAC0x0038
-#define SSS_FCBTDMAC_BYTESWAP   BIT(1)
-#define SSS_FCBTDMAC_FLUSH  BIT(0)
-
-#define SSS_REG_FCHRDMAS0x0040
-#define SSS_REG_FCHRDMAL0x0044
-#define SSS_REG_FCHRDMAC0x0048
-#define SSS_FCHRDMAC_BYTESWAP   BIT(1)
-#define SSS_FCHRDMAC_FLUSH  BIT(0)
-
-#define SSS_REG_FCPKDMAS0x0050
-#define SSS_REG_FCPKDMAL0x0054
-#define SSS_REG_FCPKDMAC0x0058
-#define SSS_FCPKDMAC_BYTESWAP   BIT(3)
-#define SSS_FCPKDMAC_DESCENDBIT(2)
-#define SSS_FCPKDMAC_TRANSMIT   BIT(1)
-#define SSS_FCPKDMAC_FLUSH  BIT(0)
-
-#define SSS_REG_FCPKDMAO0x005C
+#define SSS_REG_FCINTSTAT  0x
+#define SSS_FCINTSTAT_BRDMAINT BIT(3)
+#define SSS_FCINTSTAT_BTDMAINT BIT(2)
+#define SSS_FCINTSTAT_HRDMAINT BIT(1)
+#define SSS_FCINTSTAT_PKDMAINT BIT(0)
+
+#define SSS_REG_FCINTENSET 0x0004
+#define SSS_FCINTENSET_BRDMAINTENSET   BIT(3)
+#define SSS_FCINTENSET_BTDMAINTENSET   BIT(2)
+#define SSS_FCINTENSET_HRDMAINTENSET   BIT(1)
+#define SSS_FCINTENSET_PKDMAINTENSET   BIT(0)
+
+#define SSS_REG_FCINTENCLR 0x0008
+#define SSS_FCINTENCLR_BRDMAINTENCLR   BIT(3)
+#define SSS_FCINTENCLR_BTDMAINTENCLR   BIT(2)
+#define SSS_FCINTENCLR_HRDMAINTENCLR   BIT(1)
+#define SSS_FCINTENCLR_PKDMAINTENCLR   BIT(0)
+
+#define SSS_REG_FCINTPEND  0x000C
+#define SSS_FCINTPEND_BRDMAINTPBIT(3)
+#define SSS_FCINTPEND_BTDMAINTPBIT(2)
+#define SSS_FCINTPEND_HRDMAINTPBIT(1)
+#define SSS_FCINTPEND_PKDMAINTPBIT(0)
+
+#define SSS_REG_FCFIFOSTAT 0x0010
+#define SSS_FCFIFOSTAT_BRFIFOFUL   BIT(7)
+#define SSS_FCFIFOSTAT_BRFIFOEMP   BIT(6)
+#define SSS_FCFIFOSTAT_BTFIFOFUL   BIT(5)
+#define SSS_FCFIFOSTAT_BTFIFOEMP   BIT(4)
+#define SSS_FCFIFOSTAT_HRFIFOFUL   BIT(3)
+#define SSS_FCFIFOSTAT_HRFIFOEMP   BIT(2)
+#define SSS_FCFIFOSTAT_PKFIFOFUL   BIT(1)
+#define SSS_FCFIFOSTAT_PKFIFOEMP   BIT(0)
+
+#define SSS_REG_FCFIFOCTRL 0x0014
+#define SSS_FCFIFOCTRL_DESSEL  BIT(2)
+#define SSS_HASHIN_INDEPENDENT _SBF(0, 0x00)
+#define SSS_HASHIN_CIPHER_INPUT   

[PATCH v8 0/2] crypto: s5p-sss: Add HASH support for Exynos

2017-10-25 Thread Kamil Konieczny
First patch change spaces to tabs, second adds HASH support for Exynos.
Changes:

version 8:
- fixes suggested by Vladimir Zapolskiy: drop first condition check in 
  s5p_hash_import, delete unused include delay.h, fix typo in commit
  message, fix descriptions of struct s5p_hash_reqctx and function
  s5p_hash_final()

version 7:
- fix ifdef into if(IS_ENABLED()) as suggested by Krzysztof Kozlowski

version 6:
- fixes suggested by Vladimir Zapolskiy: change HASH_OP enum into bool, fix
  comments, change int into unsigned int in several functions, change some
  functions to return void, remove unnecessary parentheses in s5p_hash_import,
  replace rctx with ctx for request context, drop some dd vars and use tctx->dd
  instead, simplify s5p_hash_rx, s5p_hash_copy_result and s5p_hash_set_flow,
  change int final into bool final, reoder some declarations, split patch into
  two
- rewrite and fix while loop in s5p_hash_copy_sg_lists
- rewrite while loop in s5p_hash_prepare_sgs

version 5:
- fix suggested by Krzysztof Kozlowski: change defines HASH_OP into enum, fix
  comments

version 4:
- fixes suggested by Krzysztof Kozlowski: reformat comments, convert context
  flags into two bool vars, drop SSS_ALIGNED, change name of SSS_DMA_ALIGN and
  SSS_DMA_ALIGN_MASK, split assignments into separate lines, use IS_ENABLED in
  place of ifdef, remove sss_hash_algs_info and simplify register and deregister
  HASH algs

version 3:
- many fixes suggested by Krzysztof Kozlowski: comments, uppercases in const,
  remove unused defines, remove unused variable bs, constify aes_variant,
  remove global var use_hash, remove WARN_ON, improve hash_import(),
  change goto label into 'out' in s5p_hash_handle_queue(), reorder variable
  declarations, add spinlock to protect clearing HASH_FLAGS_BUSY
- simplify code: replace one-line functions s5p_hash_update_req(),
  s5p_hash_final_req() with call to s5p_hash_xmit_dma(), and delete them
- replace call to s5p_hash_hw_init() into s5p_ahash_dma_init() and delete it
- fix clearing shash flag CRYPTO_TFM_REQ_MAY_SLEEP
- fix s5p_hash_set_flow()

version 2:
- change patch format so number of lines drops
- change in Kconfig as suggested by Krzysztof Kozlowski, add
EXYNOS_HASH subsection
- change #ifndef EXYNOS_RNG into #ifdef CRYPTO_DEV_EXYNOS_HASH
- remove style fixups in aes, as they should go in separate patch
- remove FLOW_LOG, FLOW_DUMP macros and its uses
- remove #if 0 ... endif
- remove unused function hash_wait and its defines
- fix compiler warning in dev_dbg
- remove some comments
- other minor fixes in comments

Kamil Konieczny (2):
  crypto: s5p-sss: Change spaces to tabs
  crypto: s5p-sss: Add HASH support for Exynos

 drivers/crypto/Kconfig   |   14 +
 drivers/crypto/s5p-sss.c | 1596 +++---
 2 files changed, 1505 insertions(+), 105 deletions(-)

-- 
2.14.1.536.g6867272d5b56



[PATCH v8 0/2] crypto: s5p-sss: Add HASH support for Exynos

2017-10-25 Thread Kamil Konieczny
First patch change spaces to tabs, second adds HASH support for Exynos.
Changes:

version 8:
- fixes suggested by Vladimir Zapolskiy: drop first condition check in 
  s5p_hash_import, delete unused include delay.h, fix typo in commit
  message, fix descriptions of struct s5p_hash_reqctx and function
  s5p_hash_final()

version 7:
- fix ifdef into if(IS_ENABLED()) as suggested by Krzysztof Kozlowski

version 6:
- fixes suggested by Vladimir Zapolskiy: change HASH_OP enum into bool, fix
  comments, change int into unsigned int in several functions, change some
  functions to return void, remove unnecessary parentheses in s5p_hash_import,
  replace rctx with ctx for request context, drop some dd vars and use tctx->dd
  instead, simplify s5p_hash_rx, s5p_hash_copy_result and s5p_hash_set_flow,
  change int final into bool final, reoder some declarations, split patch into
  two
- rewrite and fix while loop in s5p_hash_copy_sg_lists
- rewrite while loop in s5p_hash_prepare_sgs

version 5:
- fix suggested by Krzysztof Kozlowski: change defines HASH_OP into enum, fix
  comments

version 4:
- fixes suggested by Krzysztof Kozlowski: reformat comments, convert context
  flags into two bool vars, drop SSS_ALIGNED, change name of SSS_DMA_ALIGN and
  SSS_DMA_ALIGN_MASK, split assignments into separate lines, use IS_ENABLED in
  place of ifdef, remove sss_hash_algs_info and simplify register and deregister
  HASH algs

version 3:
- many fixes suggested by Krzysztof Kozlowski: comments, uppercases in const,
  remove unused defines, remove unused variable bs, constify aes_variant,
  remove global var use_hash, remove WARN_ON, improve hash_import(),
  change goto label into 'out' in s5p_hash_handle_queue(), reorder variable
  declarations, add spinlock to protect clearing HASH_FLAGS_BUSY
- simplify code: replace one-line functions s5p_hash_update_req(),
  s5p_hash_final_req() with call to s5p_hash_xmit_dma(), and delete them
- replace call to s5p_hash_hw_init() into s5p_ahash_dma_init() and delete it
- fix clearing shash flag CRYPTO_TFM_REQ_MAY_SLEEP
- fix s5p_hash_set_flow()

version 2:
- change patch format so number of lines drops
- change in Kconfig as suggested by Krzysztof Kozlowski, add
EXYNOS_HASH subsection
- change #ifndef EXYNOS_RNG into #ifdef CRYPTO_DEV_EXYNOS_HASH
- remove style fixups in aes, as they should go in separate patch
- remove FLOW_LOG, FLOW_DUMP macros and its uses
- remove #if 0 ... endif
- remove unused function hash_wait and its defines
- fix compiler warning in dev_dbg
- remove some comments
- other minor fixes in comments

Kamil Konieczny (2):
  crypto: s5p-sss: Change spaces to tabs
  crypto: s5p-sss: Add HASH support for Exynos

 drivers/crypto/Kconfig   |   14 +
 drivers/crypto/s5p-sss.c | 1596 +++---
 2 files changed, 1505 insertions(+), 105 deletions(-)

-- 
2.14.1.536.g6867272d5b56



[PATCH v2] iommu: qcom: wire up fault handler

2017-10-25 Thread Rob Clark
This is quite useful for debugging.  Currently, always TERMINATE the
translation when the fault handler returns (since this is all we need
for debugging drivers).  But I expect the SVM work should eventually
let us do something more clever.

Signed-off-by: Rob Clark 
---
v2: add back a hunk that was lost when rebasing

 drivers/iommu/qcom_iommu.c | 18 +-
 1 file changed, 13 insertions(+), 5 deletions(-)

diff --git a/drivers/iommu/qcom_iommu.c b/drivers/iommu/qcom_iommu.c
index c8a587d034b0..eeed1ce0cdde 100644
--- a/drivers/iommu/qcom_iommu.c
+++ b/drivers/iommu/qcom_iommu.c
@@ -66,6 +66,7 @@ struct qcom_iommu_ctx {
void __iomem*base;
bool secure_init;
u8   asid;  /* asid and ctx bank # are 1:1 */
+   struct iommu_domain *domain;
 };
 
 struct qcom_iommu_domain {
@@ -194,12 +195,15 @@ static irqreturn_t qcom_iommu_fault(int irq, void *dev)
fsynr = iommu_readl(ctx, ARM_SMMU_CB_FSYNR0);
iova = iommu_readq(ctx, ARM_SMMU_CB_FAR);
 
-   dev_err_ratelimited(ctx->dev,
-   "Unhandled context fault: fsr=0x%x, "
-   "iova=0x%016llx, fsynr=0x%x, cb=%d\n",
-   fsr, iova, fsynr, ctx->asid);
+   if (!report_iommu_fault(ctx->domain, ctx->dev, iova, 0)) {
+   dev_err_ratelimited(ctx->dev,
+   "Unhandled context fault: fsr=0x%x, "
+   "iova=0x%016llx, fsynr=0x%x, cb=%d\n",
+   fsr, iova, fsynr, ctx->asid);
+   }
 
iommu_writel(ctx, ARM_SMMU_CB_FSR, fsr);
+   iommu_writel(ctx, ARM_SMMU_CB_RESUME, RESUME_TERMINATE);
 
return IRQ_HANDLED;
 }
@@ -274,12 +278,14 @@ static int qcom_iommu_init_domain(struct iommu_domain 
*domain,
 
/* SCTLR */
reg = SCTLR_CFIE | SCTLR_CFRE | SCTLR_AFE | SCTLR_TRE |
-   SCTLR_M | SCTLR_S1_ASIDPNE;
+   SCTLR_M | SCTLR_S1_ASIDPNE | SCTLR_CFCFG;
 
if (IS_ENABLED(CONFIG_BIG_ENDIAN))
reg |= SCTLR_E;
 
iommu_writel(ctx, ARM_SMMU_CB_SCTLR, reg);
+
+   ctx->domain = domain;
}
 
mutex_unlock(_domain->init_mutex);
@@ -395,6 +401,8 @@ static void qcom_iommu_detach_dev(struct iommu_domain 
*domain, struct device *de
 
/* Disable the context bank: */
iommu_writel(ctx, ARM_SMMU_CB_SCTLR, 0);
+
+   ctx->domain = NULL;
}
pm_runtime_put_sync(qcom_iommu->dev);
 
-- 
2.13.6



[PATCH v2] iommu: qcom: wire up fault handler

2017-10-25 Thread Rob Clark
This is quite useful for debugging.  Currently, always TERMINATE the
translation when the fault handler returns (since this is all we need
for debugging drivers).  But I expect the SVM work should eventually
let us do something more clever.

Signed-off-by: Rob Clark 
---
v2: add back a hunk that was lost when rebasing

 drivers/iommu/qcom_iommu.c | 18 +-
 1 file changed, 13 insertions(+), 5 deletions(-)

diff --git a/drivers/iommu/qcom_iommu.c b/drivers/iommu/qcom_iommu.c
index c8a587d034b0..eeed1ce0cdde 100644
--- a/drivers/iommu/qcom_iommu.c
+++ b/drivers/iommu/qcom_iommu.c
@@ -66,6 +66,7 @@ struct qcom_iommu_ctx {
void __iomem*base;
bool secure_init;
u8   asid;  /* asid and ctx bank # are 1:1 */
+   struct iommu_domain *domain;
 };
 
 struct qcom_iommu_domain {
@@ -194,12 +195,15 @@ static irqreturn_t qcom_iommu_fault(int irq, void *dev)
fsynr = iommu_readl(ctx, ARM_SMMU_CB_FSYNR0);
iova = iommu_readq(ctx, ARM_SMMU_CB_FAR);
 
-   dev_err_ratelimited(ctx->dev,
-   "Unhandled context fault: fsr=0x%x, "
-   "iova=0x%016llx, fsynr=0x%x, cb=%d\n",
-   fsr, iova, fsynr, ctx->asid);
+   if (!report_iommu_fault(ctx->domain, ctx->dev, iova, 0)) {
+   dev_err_ratelimited(ctx->dev,
+   "Unhandled context fault: fsr=0x%x, "
+   "iova=0x%016llx, fsynr=0x%x, cb=%d\n",
+   fsr, iova, fsynr, ctx->asid);
+   }
 
iommu_writel(ctx, ARM_SMMU_CB_FSR, fsr);
+   iommu_writel(ctx, ARM_SMMU_CB_RESUME, RESUME_TERMINATE);
 
return IRQ_HANDLED;
 }
@@ -274,12 +278,14 @@ static int qcom_iommu_init_domain(struct iommu_domain 
*domain,
 
/* SCTLR */
reg = SCTLR_CFIE | SCTLR_CFRE | SCTLR_AFE | SCTLR_TRE |
-   SCTLR_M | SCTLR_S1_ASIDPNE;
+   SCTLR_M | SCTLR_S1_ASIDPNE | SCTLR_CFCFG;
 
if (IS_ENABLED(CONFIG_BIG_ENDIAN))
reg |= SCTLR_E;
 
iommu_writel(ctx, ARM_SMMU_CB_SCTLR, reg);
+
+   ctx->domain = domain;
}
 
mutex_unlock(_domain->init_mutex);
@@ -395,6 +401,8 @@ static void qcom_iommu_detach_dev(struct iommu_domain 
*domain, struct device *de
 
/* Disable the context bank: */
iommu_writel(ctx, ARM_SMMU_CB_SCTLR, 0);
+
+   ctx->domain = NULL;
}
pm_runtime_put_sync(qcom_iommu->dev);
 
-- 
2.13.6



[PATCH] iommu: qcom: wire up fault handler

2017-10-25 Thread Rob Clark
This is quite useful for debugging.  Currently, always TERMINATE the
translation when the fault handler returns (since this is all we need
for debugging drivers).  But I expect the SVM work should eventually
let us do something more clever.

Signed-off-by: Rob Clark 
---
 drivers/iommu/qcom_iommu.c | 16 
 1 file changed, 12 insertions(+), 4 deletions(-)

diff --git a/drivers/iommu/qcom_iommu.c b/drivers/iommu/qcom_iommu.c
index c8a587d034b0..3c6866d94e64 100644
--- a/drivers/iommu/qcom_iommu.c
+++ b/drivers/iommu/qcom_iommu.c
@@ -66,6 +66,7 @@ struct qcom_iommu_ctx {
void __iomem*base;
bool secure_init;
u8   asid;  /* asid and ctx bank # are 1:1 */
+   struct iommu_domain *domain;
 };
 
 struct qcom_iommu_domain {
@@ -194,12 +195,15 @@ static irqreturn_t qcom_iommu_fault(int irq, void *dev)
fsynr = iommu_readl(ctx, ARM_SMMU_CB_FSYNR0);
iova = iommu_readq(ctx, ARM_SMMU_CB_FAR);
 
-   dev_err_ratelimited(ctx->dev,
-   "Unhandled context fault: fsr=0x%x, "
-   "iova=0x%016llx, fsynr=0x%x, cb=%d\n",
-   fsr, iova, fsynr, ctx->asid);
+   if (!report_iommu_fault(ctx->domain, ctx->dev, iova, 0)) {
+   dev_err_ratelimited(ctx->dev,
+   "Unhandled context fault: fsr=0x%x, "
+   "iova=0x%016llx, fsynr=0x%x, cb=%d\n",
+   fsr, iova, fsynr, ctx->asid);
+   }
 
iommu_writel(ctx, ARM_SMMU_CB_FSR, fsr);
+   iommu_writel(ctx, ARM_SMMU_CB_RESUME, RESUME_TERMINATE);
 
return IRQ_HANDLED;
 }
@@ -280,6 +284,8 @@ static int qcom_iommu_init_domain(struct iommu_domain 
*domain,
reg |= SCTLR_E;
 
iommu_writel(ctx, ARM_SMMU_CB_SCTLR, reg);
+
+   ctx->domain = domain;
}
 
mutex_unlock(_domain->init_mutex);
@@ -395,6 +401,8 @@ static void qcom_iommu_detach_dev(struct iommu_domain 
*domain, struct device *de
 
/* Disable the context bank: */
iommu_writel(ctx, ARM_SMMU_CB_SCTLR, 0);
+
+   ctx->domain = NULL;
}
pm_runtime_put_sync(qcom_iommu->dev);
 
-- 
2.13.6



[PATCH] iommu: qcom: wire up fault handler

2017-10-25 Thread Rob Clark
This is quite useful for debugging.  Currently, always TERMINATE the
translation when the fault handler returns (since this is all we need
for debugging drivers).  But I expect the SVM work should eventually
let us do something more clever.

Signed-off-by: Rob Clark 
---
 drivers/iommu/qcom_iommu.c | 16 
 1 file changed, 12 insertions(+), 4 deletions(-)

diff --git a/drivers/iommu/qcom_iommu.c b/drivers/iommu/qcom_iommu.c
index c8a587d034b0..3c6866d94e64 100644
--- a/drivers/iommu/qcom_iommu.c
+++ b/drivers/iommu/qcom_iommu.c
@@ -66,6 +66,7 @@ struct qcom_iommu_ctx {
void __iomem*base;
bool secure_init;
u8   asid;  /* asid and ctx bank # are 1:1 */
+   struct iommu_domain *domain;
 };
 
 struct qcom_iommu_domain {
@@ -194,12 +195,15 @@ static irqreturn_t qcom_iommu_fault(int irq, void *dev)
fsynr = iommu_readl(ctx, ARM_SMMU_CB_FSYNR0);
iova = iommu_readq(ctx, ARM_SMMU_CB_FAR);
 
-   dev_err_ratelimited(ctx->dev,
-   "Unhandled context fault: fsr=0x%x, "
-   "iova=0x%016llx, fsynr=0x%x, cb=%d\n",
-   fsr, iova, fsynr, ctx->asid);
+   if (!report_iommu_fault(ctx->domain, ctx->dev, iova, 0)) {
+   dev_err_ratelimited(ctx->dev,
+   "Unhandled context fault: fsr=0x%x, "
+   "iova=0x%016llx, fsynr=0x%x, cb=%d\n",
+   fsr, iova, fsynr, ctx->asid);
+   }
 
iommu_writel(ctx, ARM_SMMU_CB_FSR, fsr);
+   iommu_writel(ctx, ARM_SMMU_CB_RESUME, RESUME_TERMINATE);
 
return IRQ_HANDLED;
 }
@@ -280,6 +284,8 @@ static int qcom_iommu_init_domain(struct iommu_domain 
*domain,
reg |= SCTLR_E;
 
iommu_writel(ctx, ARM_SMMU_CB_SCTLR, reg);
+
+   ctx->domain = domain;
}
 
mutex_unlock(_domain->init_mutex);
@@ -395,6 +401,8 @@ static void qcom_iommu_detach_dev(struct iommu_domain 
*domain, struct device *de
 
/* Disable the context bank: */
iommu_writel(ctx, ARM_SMMU_CB_SCTLR, 0);
+
+   ctx->domain = NULL;
}
pm_runtime_put_sync(qcom_iommu->dev);
 
-- 
2.13.6



[PATCH v8 2/2] crypto: s5p-sss: Add HASH support for Exynos

2017-10-25 Thread Kamil Konieczny
Add support for MD5, SHA1, SHA256 hash algorithms for Exynos HW.
It uses the crypto framework asynchronous hash api.
It is based on omap-sham.c driver.
S5P has some HW differencies and is not implemented.

Modifications in s5p-sss:

- Add hash supporting structures and functions.

- Modify irq handler to handle both aes and hash signals.

- Resize resource end in probe if EXYNOS_HASH is enabled in
  Kconfig.

- Add new copyright line and new author.

- Tested on Odroid-U3 with Exynos 4412 CPU, kernel 4.13-rc6
  with crypto run-time self test testmgr
  and with tcrypt module with: modprobe tcrypt sec=1 mode=N
  where N=402, 403, 404 (MD5, SHA1, SHA256).

Modifications in drivers/crypto/Kconfig:

- Add new CRYPTO_DEV_EXYNOS_HASH, depend on !EXYNOS_RNG
  and CRYPTO_DEV_S5P

- Select sw algorithms MD5, SHA1 and SHA256 in EXYNOS_HASH
  as they are needed for fallback.

Acked-by: Vladimir Zapolskiy 
Reviewed-by: Krzysztof Kozlowski 
Signed-off-by: Kamil Konieczny 
---
 drivers/crypto/Kconfig   |   14 +
 drivers/crypto/s5p-sss.c | 1406 +-
 2 files changed, 1410 insertions(+), 10 deletions(-)

diff --git a/drivers/crypto/Kconfig b/drivers/crypto/Kconfig
index 4b75084fabad..dea4d33d9c7f 100644
--- a/drivers/crypto/Kconfig
+++ b/drivers/crypto/Kconfig
@@ -439,6 +439,20 @@ config CRYPTO_DEV_S5P
  Select this to offload Samsung S5PV210 or S5PC110, Exynos from AES
  algorithms execution.
 
+config CRYPTO_DEV_EXYNOS_HASH
+   bool "Support for Samsung Exynos HASH accelerator"
+   depends on CRYPTO_DEV_S5P
+   depends on !CRYPTO_DEV_EXYNOS_RNG && CRYPTO_DEV_EXYNOS_RNG!=m
+   select CRYPTO_SHA1
+   select CRYPTO_MD5
+   select CRYPTO_SHA256
+   help
+ Select this to offload Exynos from HASH MD5/SHA1/SHA256.
+ This will select software SHA1, MD5 and SHA256 as they are
+ needed for small and zero-size messages.
+ HASH algorithms will be disabled if EXYNOS_RNG
+ is enabled due to hw conflict.
+
 config CRYPTO_DEV_NX
bool "Support for IBM PowerPC Nest (NX) cryptographic acceleration"
depends on PPC64
diff --git a/drivers/crypto/s5p-sss.c b/drivers/crypto/s5p-sss.c
index dfae1865c384..142c6020cec7 100644
--- a/drivers/crypto/s5p-sss.c
+++ b/drivers/crypto/s5p-sss.c
@@ -1,14 +1,16 @@
 /*
  * Cryptographic API.
  *
- * Support for Samsung S5PV210 HW acceleration.
+ * Support for Samsung S5PV210 and Exynos HW acceleration.
  *
  * Copyright (C) 2011 NetUP Inc. All rights reserved.
+ * Copyright (c) 2017 Samsung Electronics Co., Ltd. All rights reserved.
  *
  * This program is free software; you can redistribute it and/or modify
  * it under the terms of the GNU General Public License version 2 as published
  * by the Free Software Foundation.
  *
+ * Hash part based on omap-sham.c driver.
  */
 
 #include 
@@ -30,28 +32,41 @@
 #include 
 #include 
 
+#include 
+#include 
+#include 
+#include 
+
 #define _SBF(s, v) ((v) << (s))
 
 /* Feed control registers */
 #define SSS_REG_FCINTSTAT  0x
+#define SSS_FCINTSTAT_HPARTINT BIT(7)
+#define SSS_FCINTSTAT_HDONEINT BIT(5)
 #define SSS_FCINTSTAT_BRDMAINT BIT(3)
 #define SSS_FCINTSTAT_BTDMAINT BIT(2)
 #define SSS_FCINTSTAT_HRDMAINT BIT(1)
 #define SSS_FCINTSTAT_PKDMAINT BIT(0)
 
 #define SSS_REG_FCINTENSET 0x0004
+#define SSS_FCINTENSET_HPARTINTENSET   BIT(7)
+#define SSS_FCINTENSET_HDONEINTENSET   BIT(5)
 #define SSS_FCINTENSET_BRDMAINTENSET   BIT(3)
 #define SSS_FCINTENSET_BTDMAINTENSET   BIT(2)
 #define SSS_FCINTENSET_HRDMAINTENSET   BIT(1)
 #define SSS_FCINTENSET_PKDMAINTENSET   BIT(0)
 
 #define SSS_REG_FCINTENCLR 0x0008
+#define SSS_FCINTENCLR_HPARTINTENCLR   BIT(7)
+#define SSS_FCINTENCLR_HDONEINTENCLR   BIT(5)
 #define SSS_FCINTENCLR_BRDMAINTENCLR   BIT(3)
 #define SSS_FCINTENCLR_BTDMAINTENCLR   BIT(2)
 #define SSS_FCINTENCLR_HRDMAINTENCLR   BIT(1)
 #define SSS_FCINTENCLR_PKDMAINTENCLR   BIT(0)
 
 #define SSS_REG_FCINTPEND  0x000C
+#define SSS_FCINTPEND_HPARTINTPBIT(7)
+#define SSS_FCINTPEND_HDONEINTPBIT(5)
 #define SSS_FCINTPEND_BRDMAINTPBIT(3)
 #define SSS_FCINTPEND_BTDMAINTPBIT(2)
 #define SSS_FCINTPEND_HRDMAINTPBIT(1)
@@ -72,6 +87,7 @@
 #define SSS_HASHIN_INDEPENDENT _SBF(0, 0x00)
 #define SSS_HASHIN_CIPHER_INPUT_SBF(0, 0x01)
 #define SSS_HASHIN_CIPHER_OUTPUT   _SBF(0, 0x02)
+#define SSS_HASHIN_MASK_SBF(0, 0x03)
 
 #define SSS_REG_FCBRDMAS   0x0020
 #define SSS_REG_FCBRDMAL   0x0024
@@ -146,9 +162,80 @@
 #define AES_KEY_LEN16
 #define CRYPTO_QUEUE_LEN   1
 
+/* HASH registers */
+#define SSS_REG_HASH_CTRL  0x00
+
+#define SSS_HASH_USER_IV_EN

[PATCH v8 2/2] crypto: s5p-sss: Add HASH support for Exynos

2017-10-25 Thread Kamil Konieczny
Add support for MD5, SHA1, SHA256 hash algorithms for Exynos HW.
It uses the crypto framework asynchronous hash api.
It is based on omap-sham.c driver.
S5P has some HW differencies and is not implemented.

Modifications in s5p-sss:

- Add hash supporting structures and functions.

- Modify irq handler to handle both aes and hash signals.

- Resize resource end in probe if EXYNOS_HASH is enabled in
  Kconfig.

- Add new copyright line and new author.

- Tested on Odroid-U3 with Exynos 4412 CPU, kernel 4.13-rc6
  with crypto run-time self test testmgr
  and with tcrypt module with: modprobe tcrypt sec=1 mode=N
  where N=402, 403, 404 (MD5, SHA1, SHA256).

Modifications in drivers/crypto/Kconfig:

- Add new CRYPTO_DEV_EXYNOS_HASH, depend on !EXYNOS_RNG
  and CRYPTO_DEV_S5P

- Select sw algorithms MD5, SHA1 and SHA256 in EXYNOS_HASH
  as they are needed for fallback.

Acked-by: Vladimir Zapolskiy 
Reviewed-by: Krzysztof Kozlowski 
Signed-off-by: Kamil Konieczny 
---
 drivers/crypto/Kconfig   |   14 +
 drivers/crypto/s5p-sss.c | 1406 +-
 2 files changed, 1410 insertions(+), 10 deletions(-)

diff --git a/drivers/crypto/Kconfig b/drivers/crypto/Kconfig
index 4b75084fabad..dea4d33d9c7f 100644
--- a/drivers/crypto/Kconfig
+++ b/drivers/crypto/Kconfig
@@ -439,6 +439,20 @@ config CRYPTO_DEV_S5P
  Select this to offload Samsung S5PV210 or S5PC110, Exynos from AES
  algorithms execution.
 
+config CRYPTO_DEV_EXYNOS_HASH
+   bool "Support for Samsung Exynos HASH accelerator"
+   depends on CRYPTO_DEV_S5P
+   depends on !CRYPTO_DEV_EXYNOS_RNG && CRYPTO_DEV_EXYNOS_RNG!=m
+   select CRYPTO_SHA1
+   select CRYPTO_MD5
+   select CRYPTO_SHA256
+   help
+ Select this to offload Exynos from HASH MD5/SHA1/SHA256.
+ This will select software SHA1, MD5 and SHA256 as they are
+ needed for small and zero-size messages.
+ HASH algorithms will be disabled if EXYNOS_RNG
+ is enabled due to hw conflict.
+
 config CRYPTO_DEV_NX
bool "Support for IBM PowerPC Nest (NX) cryptographic acceleration"
depends on PPC64
diff --git a/drivers/crypto/s5p-sss.c b/drivers/crypto/s5p-sss.c
index dfae1865c384..142c6020cec7 100644
--- a/drivers/crypto/s5p-sss.c
+++ b/drivers/crypto/s5p-sss.c
@@ -1,14 +1,16 @@
 /*
  * Cryptographic API.
  *
- * Support for Samsung S5PV210 HW acceleration.
+ * Support for Samsung S5PV210 and Exynos HW acceleration.
  *
  * Copyright (C) 2011 NetUP Inc. All rights reserved.
+ * Copyright (c) 2017 Samsung Electronics Co., Ltd. All rights reserved.
  *
  * This program is free software; you can redistribute it and/or modify
  * it under the terms of the GNU General Public License version 2 as published
  * by the Free Software Foundation.
  *
+ * Hash part based on omap-sham.c driver.
  */
 
 #include 
@@ -30,28 +32,41 @@
 #include 
 #include 
 
+#include 
+#include 
+#include 
+#include 
+
 #define _SBF(s, v) ((v) << (s))
 
 /* Feed control registers */
 #define SSS_REG_FCINTSTAT  0x
+#define SSS_FCINTSTAT_HPARTINT BIT(7)
+#define SSS_FCINTSTAT_HDONEINT BIT(5)
 #define SSS_FCINTSTAT_BRDMAINT BIT(3)
 #define SSS_FCINTSTAT_BTDMAINT BIT(2)
 #define SSS_FCINTSTAT_HRDMAINT BIT(1)
 #define SSS_FCINTSTAT_PKDMAINT BIT(0)
 
 #define SSS_REG_FCINTENSET 0x0004
+#define SSS_FCINTENSET_HPARTINTENSET   BIT(7)
+#define SSS_FCINTENSET_HDONEINTENSET   BIT(5)
 #define SSS_FCINTENSET_BRDMAINTENSET   BIT(3)
 #define SSS_FCINTENSET_BTDMAINTENSET   BIT(2)
 #define SSS_FCINTENSET_HRDMAINTENSET   BIT(1)
 #define SSS_FCINTENSET_PKDMAINTENSET   BIT(0)
 
 #define SSS_REG_FCINTENCLR 0x0008
+#define SSS_FCINTENCLR_HPARTINTENCLR   BIT(7)
+#define SSS_FCINTENCLR_HDONEINTENCLR   BIT(5)
 #define SSS_FCINTENCLR_BRDMAINTENCLR   BIT(3)
 #define SSS_FCINTENCLR_BTDMAINTENCLR   BIT(2)
 #define SSS_FCINTENCLR_HRDMAINTENCLR   BIT(1)
 #define SSS_FCINTENCLR_PKDMAINTENCLR   BIT(0)
 
 #define SSS_REG_FCINTPEND  0x000C
+#define SSS_FCINTPEND_HPARTINTPBIT(7)
+#define SSS_FCINTPEND_HDONEINTPBIT(5)
 #define SSS_FCINTPEND_BRDMAINTPBIT(3)
 #define SSS_FCINTPEND_BTDMAINTPBIT(2)
 #define SSS_FCINTPEND_HRDMAINTPBIT(1)
@@ -72,6 +87,7 @@
 #define SSS_HASHIN_INDEPENDENT _SBF(0, 0x00)
 #define SSS_HASHIN_CIPHER_INPUT_SBF(0, 0x01)
 #define SSS_HASHIN_CIPHER_OUTPUT   _SBF(0, 0x02)
+#define SSS_HASHIN_MASK_SBF(0, 0x03)
 
 #define SSS_REG_FCBRDMAS   0x0020
 #define SSS_REG_FCBRDMAL   0x0024
@@ -146,9 +162,80 @@
 #define AES_KEY_LEN16
 #define CRYPTO_QUEUE_LEN   1
 
+/* HASH registers */
+#define SSS_REG_HASH_CTRL  0x00
+
+#define SSS_HASH_USER_IV_ENBIT(5)
+#define SSS_HASH_INIT_BIT  BIT(4)
+#define 

Re: [PATCH 2/2] crypto: s5p-sss: Add HASH support for Exynos

2017-10-25 Thread Kamil Konieczny
Hi Vladimir,

On 25.10.2017 17:09, Vladimir Zapolskiy wrote:
> 
> thank you for updates, everything looks good from my point of view.
> 

Thank you for your work, I am still newbie, I forgot 'v8'
and to: linux-crypto :( 
So I will resend this patch series with improved title 
and your Ack added.

> On 10/25/2017 05:57 PM, Kamil Konieczny wrote:
>> Add support for MD5, SHA1, SHA256 hash algorithms for Exynos HW.
>> It uses the crypto framework asynchronous hash api.
>> It is based on omap-sham.c driver.
>> S5P has some HW differencies and is not implemented.
>>[...]
>> Reviewed-by: Krzysztof Kozlowski 
>> Signed-off-by: Kamil Konieczny 
>> ---
> 
> I won't linger this time :)
> 
> Please feel free to add 
> 
> Acked-by: Vladimir Zapolskiy 
> 
-- 
Best regards,
Kamil Konieczny
Samsung R Institute Poland



Re: [PATCH 2/2] crypto: s5p-sss: Add HASH support for Exynos

2017-10-25 Thread Kamil Konieczny
Hi Vladimir,

On 25.10.2017 17:09, Vladimir Zapolskiy wrote:
> 
> thank you for updates, everything looks good from my point of view.
> 

Thank you for your work, I am still newbie, I forgot 'v8'
and to: linux-crypto :( 
So I will resend this patch series with improved title 
and your Ack added.

> On 10/25/2017 05:57 PM, Kamil Konieczny wrote:
>> Add support for MD5, SHA1, SHA256 hash algorithms for Exynos HW.
>> It uses the crypto framework asynchronous hash api.
>> It is based on omap-sham.c driver.
>> S5P has some HW differencies and is not implemented.
>>[...]
>> Reviewed-by: Krzysztof Kozlowski 
>> Signed-off-by: Kamil Konieczny 
>> ---
> 
> I won't linger this time :)
> 
> Please feel free to add 
> 
> Acked-by: Vladimir Zapolskiy 
> 
-- 
Best regards,
Kamil Konieczny
Samsung R Institute Poland



Re: [PATCH] tpm: Move Linux RNG connection to hwrng

2017-10-25 Thread PrasannaKumar Muralidharan
Hi Jason,

On 25 October 2017 at 20:48, Jason Gunthorpe
 wrote:
> On Wed, Oct 25, 2017 at 08:15:09PM +0530, PrasannaKumar Muralidharan
> wrote:
>
>> > +static int tpm_add_hwrng(struct tpm_chip *chip)
>> > +{
>> > +   if (!IS_ENABLED(CONFIG_HW_RANDOM_TPM))
>> > +   return 0;
>>
>> Can #ifndef CONFIG_HW_RANDOM_TPM be used instead? That way an if
>> condition can be avoided.
>
> Generally speaking IS_ENABLED is prefered over #ifdef as it reduces the
> set of compilation combinations.

Oh okay. No issues then.

Regards,
PrasannaKumar


Re: [PATCH] tpm: Move Linux RNG connection to hwrng

2017-10-25 Thread PrasannaKumar Muralidharan
Hi Jason,

On 25 October 2017 at 20:48, Jason Gunthorpe
 wrote:
> On Wed, Oct 25, 2017 at 08:15:09PM +0530, PrasannaKumar Muralidharan
> wrote:
>
>> > +static int tpm_add_hwrng(struct tpm_chip *chip)
>> > +{
>> > +   if (!IS_ENABLED(CONFIG_HW_RANDOM_TPM))
>> > +   return 0;
>>
>> Can #ifndef CONFIG_HW_RANDOM_TPM be used instead? That way an if
>> condition can be avoided.
>
> Generally speaking IS_ENABLED is prefered over #ifdef as it reduces the
> set of compilation combinations.

Oh okay. No issues then.

Regards,
PrasannaKumar


Re: [PATCH v2] tpm: use struct tpm_chip for tpm_chip_find_get()

2017-10-25 Thread Jason Gunthorpe
On Wed, Oct 25, 2017 at 01:55:04PM +0200, Jarkko Sakkinen wrote:
> Device number (the character device index) is not a stable identifier
> for a TPM chip. That is the reason why every call site passes
> TPM_ANY_NUM to tpm_chip_find_get().
> 
> This commit changes the API in a way that instead a struct tpm_chip
> instance is given and NULL means the default chip. In addition, this
> commit refines the documentation to be up to date with the
> implementation.
> 
> Suggested-by: Jason Gunthorpe  (@chip_num -> 
> @chip)
> Signed-off-by: Jarkko Sakkinen 
> v2:
> * Further defined function documentation.
> * Changed @chip_num to @chip instead of removing the parameter as suggested by
>   Jason Gunthorpe.

Reviewed-by: Jason Gunthorpe 

Jason


Re: [PATCH v2] tpm: use struct tpm_chip for tpm_chip_find_get()

2017-10-25 Thread Jason Gunthorpe
On Wed, Oct 25, 2017 at 01:55:04PM +0200, Jarkko Sakkinen wrote:
> Device number (the character device index) is not a stable identifier
> for a TPM chip. That is the reason why every call site passes
> TPM_ANY_NUM to tpm_chip_find_get().
> 
> This commit changes the API in a way that instead a struct tpm_chip
> instance is given and NULL means the default chip. In addition, this
> commit refines the documentation to be up to date with the
> implementation.
> 
> Suggested-by: Jason Gunthorpe  (@chip_num -> 
> @chip)
> Signed-off-by: Jarkko Sakkinen 
> v2:
> * Further defined function documentation.
> * Changed @chip_num to @chip instead of removing the parameter as suggested by
>   Jason Gunthorpe.

Reviewed-by: Jason Gunthorpe 

Jason


Re: [PATCH] thermal: cpu_cooling: pr_err() strings should end with newlines

2017-10-25 Thread Javi Merino
On Tue, Oct 24, 2017 at 01:20:39PM +0530, Arvind Yadav wrote:
> pr_err() messages should end with a new-line to avoid other messages
> being concatenated.
> 
> Signed-off-by: Arvind Yadav 

FWIW,

Acked-by: Javi Merino 

> ---
>  drivers/thermal/cpu_cooling.c | 2 +-
>  1 file changed, 1 insertion(+), 1 deletion(-)
> 
> diff --git a/drivers/thermal/cpu_cooling.c b/drivers/thermal/cpu_cooling.c
> index 908a801..dc63aba 100644
> --- a/drivers/thermal/cpu_cooling.c
> +++ b/drivers/thermal/cpu_cooling.c
> @@ -696,7 +696,7 @@ static unsigned int find_next_max(struct 
> cpufreq_frequency_table *table,
>   bool first;
>  
>   if (IS_ERR_OR_NULL(policy)) {
> - pr_err("%s: cpufreq policy isn't valid: %p", __func__, policy);
> + pr_err("%s: cpufreq policy isn't valid: %p\n", __func__, 
> policy);
>   return ERR_PTR(-EINVAL);
>   }
>  
> -- 
> 1.9.1
> 


Re: [PATCH] thermal: cpu_cooling: pr_err() strings should end with newlines

2017-10-25 Thread Javi Merino
On Tue, Oct 24, 2017 at 01:20:39PM +0530, Arvind Yadav wrote:
> pr_err() messages should end with a new-line to avoid other messages
> being concatenated.
> 
> Signed-off-by: Arvind Yadav 

FWIW,

Acked-by: Javi Merino 

> ---
>  drivers/thermal/cpu_cooling.c | 2 +-
>  1 file changed, 1 insertion(+), 1 deletion(-)
> 
> diff --git a/drivers/thermal/cpu_cooling.c b/drivers/thermal/cpu_cooling.c
> index 908a801..dc63aba 100644
> --- a/drivers/thermal/cpu_cooling.c
> +++ b/drivers/thermal/cpu_cooling.c
> @@ -696,7 +696,7 @@ static unsigned int find_next_max(struct 
> cpufreq_frequency_table *table,
>   bool first;
>  
>   if (IS_ERR_OR_NULL(policy)) {
> - pr_err("%s: cpufreq policy isn't valid: %p", __func__, policy);
> + pr_err("%s: cpufreq policy isn't valid: %p\n", __func__, 
> policy);
>   return ERR_PTR(-EINVAL);
>   }
>  
> -- 
> 1.9.1
> 


Re: [PATCH] tpm: Move Linux RNG connection to hwrng

2017-10-25 Thread Jason Gunthorpe
On Wed, Oct 25, 2017 at 08:15:09PM +0530, PrasannaKumar Muralidharan
wrote:

> > +static int tpm_add_hwrng(struct tpm_chip *chip)
> > +{
> > +   if (!IS_ENABLED(CONFIG_HW_RANDOM_TPM))
> > +   return 0;
> 
> Can #ifndef CONFIG_HW_RANDOM_TPM be used instead? That way an if
> condition can be avoided.

Generally speaking IS_ENABLED is prefered over #ifdef as it reduces the
set of compilation combinations.

Jason


Re: [PATCH] tpm: Move Linux RNG connection to hwrng

2017-10-25 Thread Jason Gunthorpe
On Wed, Oct 25, 2017 at 08:15:09PM +0530, PrasannaKumar Muralidharan
wrote:

> > +static int tpm_add_hwrng(struct tpm_chip *chip)
> > +{
> > +   if (!IS_ENABLED(CONFIG_HW_RANDOM_TPM))
> > +   return 0;
> 
> Can #ifndef CONFIG_HW_RANDOM_TPM be used instead? That way an if
> condition can be avoided.

Generally speaking IS_ENABLED is prefered over #ifdef as it reduces the
set of compilation combinations.

Jason


Re: [PATCH v2] tpm: use struct tpm_chip for tpm_chip_find_get()

2017-10-25 Thread PrasannaKumar Muralidharan
Hi Jarkko,

On 25 October 2017 at 17:25, Jarkko Sakkinen
 wrote:
> Device number (the character device index) is not a stable identifier
> for a TPM chip. That is the reason why every call site passes
> TPM_ANY_NUM to tpm_chip_find_get().
>
> This commit changes the API in a way that instead a struct tpm_chip
> instance is given and NULL means the default chip. In addition, this
> commit refines the documentation to be up to date with the
> implementation.
>
> Suggested-by: Jason Gunthorpe  (@chip_num -> 
> @chip)
> Signed-off-by: Jarkko Sakkinen 
> ---
> v2:
> * Further defined function documentation.
> * Changed @chip_num to @chip instead of removing the parameter as suggested by
>   Jason Gunthorpe.
>  drivers/char/hw_random/tpm-rng.c|   2 +-
>  drivers/char/tpm/tpm-chip.c |  21 +++---
>  drivers/char/tpm/tpm-interface.c| 135 
> +++-
>  drivers/char/tpm/tpm.h  |   2 +-
>  include/linux/tpm.h |  38 +-
>  security/integrity/ima/ima_crypto.c |   2 +-
>  security/integrity/ima/ima_init.c   |   2 +-
>  security/integrity/ima/ima_queue.c  |   2 +-
>  security/keys/trusted.c |  35 +-
>  9 files changed, 125 insertions(+), 114 deletions(-)
>
> diff --git a/drivers/char/hw_random/tpm-rng.c 
> b/drivers/char/hw_random/tpm-rng.c
> index d6d448266f07..c5e363825af0 100644
> --- a/drivers/char/hw_random/tpm-rng.c
> +++ b/drivers/char/hw_random/tpm-rng.c
> @@ -25,7 +25,7 @@
>
>  static int tpm_rng_read(struct hwrng *rng, void *data, size_t max, bool wait)
>  {
> -   return tpm_get_random(TPM_ANY_NUM, data, max);
> +   return tpm_get_random(NULL, data, max);
>  }
>
>  static struct hwrng tpm_rng = {
> diff --git a/drivers/char/tpm/tpm-chip.c b/drivers/char/tpm/tpm-chip.c
> index a114e8f7fb90..c7a4e7fb424d 100644
> --- a/drivers/char/tpm/tpm-chip.c
> +++ b/drivers/char/tpm/tpm-chip.c
> @@ -81,21 +81,26 @@ void tpm_put_ops(struct tpm_chip *chip)
>  EXPORT_SYMBOL_GPL(tpm_put_ops);
>
>  /**
> - * tpm_chip_find_get() - return tpm_chip for a given chip number
> - * @chip_num: id to find
> + * tpm_chip_find_get() - find and reserve a TPM chip
> + * @chip:  a  tpm_chip instance, %NULL for the default chip
>   *
> - * The return'd chip has been tpm_try_get_ops'd and must be released via
> - * tpm_put_ops
> + * Finds a TPM chip and reserves its class device and operations. The chip 
> must
> + * be released with tpm_chip_put_ops() after use.
> + *
> + * Return:
> + * A reserved  tpm_chip instance.
> + * %NULL if a chip is not found.
> + * %NULL if the chip is not available.
>   */
> -struct tpm_chip *tpm_chip_find_get(int chip_num)
> +struct tpm_chip *tpm_chip_find_get(struct tpm_chip *chip)
>  {
> -   struct tpm_chip *chip, *res = NULL;
> +   struct tpm_chip *res = NULL;
> +   int chip_num = 0;
> int chip_prev;
>
> mutex_lock(_lock);
>
> -   if (chip_num == TPM_ANY_NUM) {
> -   chip_num = 0;
> +   if (!chip) {
> do {
> chip_prev = chip_num;
> chip = idr_get_next(_nums_idr, _num);

When chip is not NULL just do tpm_try_get_ops(chip). Current code does
more things which are not required.

> diff --git a/drivers/char/tpm/tpm-interface.c 
> b/drivers/char/tpm/tpm-interface.c
> index ebe0a1d36d8c..19f820f775b5 100644
> --- a/drivers/char/tpm/tpm-interface.c
> +++ b/drivers/char/tpm/tpm-interface.c
> @@ -809,19 +809,20 @@ int tpm_pcr_read_dev(struct tpm_chip *chip, int 
> pcr_idx, u8 *res_buf)
>  }
>
>  /**
> - * tpm_is_tpm2 - is the chip a TPM2 chip?
> - * @chip_num:  tpm idx # or ANY
> + * tpm_is_tpm2 - do we a have a TPM2 chip?
> + * @chip:  a  tpm_chip instance, %NULL for the default chip
>   *
> - * Returns < 0 on error, and 1 or 0 on success depending whether the chip
> - * is a TPM2 chip.
> + * Return:
> + * 1 if we have a TPM2 chip.
> + * 0 if we don't have a TPM2 chip.
> + * A negative number for system errors (errno).
>   */
> -int tpm_is_tpm2(u32 chip_num)
> +int tpm_is_tpm2(struct tpm_chip *chip)
>  {
> -   struct tpm_chip *chip;
> int rc;
>
> -   chip = tpm_chip_find_get(chip_num);
> -   if (chip == NULL)
> +   chip = tpm_chip_find_get(chip);
> +   if (!chip)
> return -ENODEV;
>
> rc = (chip->flags & TPM_CHIP_FLAG_TPM2) != 0;
> @@ -833,23 +834,19 @@ int tpm_is_tpm2(u32 chip_num)
>  EXPORT_SYMBOL_GPL(tpm_is_tpm2);
>
>  /**
> - * tpm_pcr_read - read a pcr value
> - * @chip_num:  tpm idx # or ANY
> - * @pcr_idx:   pcr idx to retrieve
> - * @res_buf:   TPM_PCR value
> - * size of res_buf is 20 bytes (or NULL if you don't care)
> + * tpm_pcr_read - read a PCR value from SHA1 bank
> + * @chip:  a  tpm_chip instance, %NULL for the default chip
> + * @pcr_idx:   the PCR to be retrieved
> + * @res_buf:   the value of the PCR
>   *
> - * 

Re: [PATCH v2] tpm: use struct tpm_chip for tpm_chip_find_get()

2017-10-25 Thread PrasannaKumar Muralidharan
Hi Jarkko,

On 25 October 2017 at 17:25, Jarkko Sakkinen
 wrote:
> Device number (the character device index) is not a stable identifier
> for a TPM chip. That is the reason why every call site passes
> TPM_ANY_NUM to tpm_chip_find_get().
>
> This commit changes the API in a way that instead a struct tpm_chip
> instance is given and NULL means the default chip. In addition, this
> commit refines the documentation to be up to date with the
> implementation.
>
> Suggested-by: Jason Gunthorpe  (@chip_num -> 
> @chip)
> Signed-off-by: Jarkko Sakkinen 
> ---
> v2:
> * Further defined function documentation.
> * Changed @chip_num to @chip instead of removing the parameter as suggested by
>   Jason Gunthorpe.
>  drivers/char/hw_random/tpm-rng.c|   2 +-
>  drivers/char/tpm/tpm-chip.c |  21 +++---
>  drivers/char/tpm/tpm-interface.c| 135 
> +++-
>  drivers/char/tpm/tpm.h  |   2 +-
>  include/linux/tpm.h |  38 +-
>  security/integrity/ima/ima_crypto.c |   2 +-
>  security/integrity/ima/ima_init.c   |   2 +-
>  security/integrity/ima/ima_queue.c  |   2 +-
>  security/keys/trusted.c |  35 +-
>  9 files changed, 125 insertions(+), 114 deletions(-)
>
> diff --git a/drivers/char/hw_random/tpm-rng.c 
> b/drivers/char/hw_random/tpm-rng.c
> index d6d448266f07..c5e363825af0 100644
> --- a/drivers/char/hw_random/tpm-rng.c
> +++ b/drivers/char/hw_random/tpm-rng.c
> @@ -25,7 +25,7 @@
>
>  static int tpm_rng_read(struct hwrng *rng, void *data, size_t max, bool wait)
>  {
> -   return tpm_get_random(TPM_ANY_NUM, data, max);
> +   return tpm_get_random(NULL, data, max);
>  }
>
>  static struct hwrng tpm_rng = {
> diff --git a/drivers/char/tpm/tpm-chip.c b/drivers/char/tpm/tpm-chip.c
> index a114e8f7fb90..c7a4e7fb424d 100644
> --- a/drivers/char/tpm/tpm-chip.c
> +++ b/drivers/char/tpm/tpm-chip.c
> @@ -81,21 +81,26 @@ void tpm_put_ops(struct tpm_chip *chip)
>  EXPORT_SYMBOL_GPL(tpm_put_ops);
>
>  /**
> - * tpm_chip_find_get() - return tpm_chip for a given chip number
> - * @chip_num: id to find
> + * tpm_chip_find_get() - find and reserve a TPM chip
> + * @chip:  a  tpm_chip instance, %NULL for the default chip
>   *
> - * The return'd chip has been tpm_try_get_ops'd and must be released via
> - * tpm_put_ops
> + * Finds a TPM chip and reserves its class device and operations. The chip 
> must
> + * be released with tpm_chip_put_ops() after use.
> + *
> + * Return:
> + * A reserved  tpm_chip instance.
> + * %NULL if a chip is not found.
> + * %NULL if the chip is not available.
>   */
> -struct tpm_chip *tpm_chip_find_get(int chip_num)
> +struct tpm_chip *tpm_chip_find_get(struct tpm_chip *chip)
>  {
> -   struct tpm_chip *chip, *res = NULL;
> +   struct tpm_chip *res = NULL;
> +   int chip_num = 0;
> int chip_prev;
>
> mutex_lock(_lock);
>
> -   if (chip_num == TPM_ANY_NUM) {
> -   chip_num = 0;
> +   if (!chip) {
> do {
> chip_prev = chip_num;
> chip = idr_get_next(_nums_idr, _num);

When chip is not NULL just do tpm_try_get_ops(chip). Current code does
more things which are not required.

> diff --git a/drivers/char/tpm/tpm-interface.c 
> b/drivers/char/tpm/tpm-interface.c
> index ebe0a1d36d8c..19f820f775b5 100644
> --- a/drivers/char/tpm/tpm-interface.c
> +++ b/drivers/char/tpm/tpm-interface.c
> @@ -809,19 +809,20 @@ int tpm_pcr_read_dev(struct tpm_chip *chip, int 
> pcr_idx, u8 *res_buf)
>  }
>
>  /**
> - * tpm_is_tpm2 - is the chip a TPM2 chip?
> - * @chip_num:  tpm idx # or ANY
> + * tpm_is_tpm2 - do we a have a TPM2 chip?
> + * @chip:  a  tpm_chip instance, %NULL for the default chip
>   *
> - * Returns < 0 on error, and 1 or 0 on success depending whether the chip
> - * is a TPM2 chip.
> + * Return:
> + * 1 if we have a TPM2 chip.
> + * 0 if we don't have a TPM2 chip.
> + * A negative number for system errors (errno).
>   */
> -int tpm_is_tpm2(u32 chip_num)
> +int tpm_is_tpm2(struct tpm_chip *chip)
>  {
> -   struct tpm_chip *chip;
> int rc;
>
> -   chip = tpm_chip_find_get(chip_num);
> -   if (chip == NULL)
> +   chip = tpm_chip_find_get(chip);
> +   if (!chip)
> return -ENODEV;
>
> rc = (chip->flags & TPM_CHIP_FLAG_TPM2) != 0;
> @@ -833,23 +834,19 @@ int tpm_is_tpm2(u32 chip_num)
>  EXPORT_SYMBOL_GPL(tpm_is_tpm2);
>
>  /**
> - * tpm_pcr_read - read a pcr value
> - * @chip_num:  tpm idx # or ANY
> - * @pcr_idx:   pcr idx to retrieve
> - * @res_buf:   TPM_PCR value
> - * size of res_buf is 20 bytes (or NULL if you don't care)
> + * tpm_pcr_read - read a PCR value from SHA1 bank
> + * @chip:  a  tpm_chip instance, %NULL for the default chip
> + * @pcr_idx:   the PCR to be retrieved
> + * @res_buf:   the value of the PCR
>   *
> - * The TPM driver should be built-in, but for whatever reason it
> - * isn't, protect against the chip 

Re: [PATCH v2 0/7] fix fanotify issues with the series in v4.12

2017-10-25 Thread Amir Goldstein
On Wed, Oct 25, 2017 at 5:31 PM, Miklos Szeredi  wrote:
> On Wed, Oct 25, 2017 at 10:41 AM, Miklos Szeredi  wrote:
>> We discovered some problems in the latest fsnotify/fanotify codebase with
>> the help of a stress test (Xiong Zhou is working on upstreaming it to
>> fstests).
>>
>> This series attempts to fix these.  With the patch applied the stress test
>> passes.
>>
>> Please review/test.
>>
>> Changes in v2 (only cosmetic fixes, no functional change):
>>  - split first patch into 3 parts to make it more readable
>>  - checkpatch fixes
>>  - added cleanup patch for fanotify
>
> Pushed v3 (again with just cosmetics) to:
>
> git://git.kernel.org/pub/scm/linux/kernel/git/mszeredi/vfs.git 
> fsnotify-fixes-v3
>

Reviewed-by: Amir Goldstein 
for the series


Re: [PATCH v2 0/7] fix fanotify issues with the series in v4.12

2017-10-25 Thread Amir Goldstein
On Wed, Oct 25, 2017 at 5:31 PM, Miklos Szeredi  wrote:
> On Wed, Oct 25, 2017 at 10:41 AM, Miklos Szeredi  wrote:
>> We discovered some problems in the latest fsnotify/fanotify codebase with
>> the help of a stress test (Xiong Zhou is working on upstreaming it to
>> fstests).
>>
>> This series attempts to fix these.  With the patch applied the stress test
>> passes.
>>
>> Please review/test.
>>
>> Changes in v2 (only cosmetic fixes, no functional change):
>>  - split first patch into 3 parts to make it more readable
>>  - checkpatch fixes
>>  - added cleanup patch for fanotify
>
> Pushed v3 (again with just cosmetics) to:
>
> git://git.kernel.org/pub/scm/linux/kernel/git/mszeredi/vfs.git 
> fsnotify-fixes-v3
>

Reviewed-by: Amir Goldstein 
for the series


Re: [PATCH 0/3] arm64: remove some unused defconfig options

2017-10-25 Thread Alex Elder
On 10/25/2017 02:02 AM, Andy Gross wrote:
> On Tue, Oct 24, 2017 at 05:20:20PM -0700, Stephen Boyd wrote:
>> On 10/20, Alex Elder wrote:
>>> This series deletes three config options related to USB on Qualcomm
>>> SoCs from the arm64 "defconfig", along with the code that they
>>> enable.  The code is no longer needed by any Qualcomm hardware.  
>>>
>>> -Alex
>>>
>>> Alex Elder (3):
>>>   arm64: defconfig: remove CONFIG_USB_EHCI_MSM
>>>   arm64: defconfig: remove CONFIG_USB_MSM_OTG
>>>   arm64: defconfig: remove CONFIG_USB_QCOM_8X16_PHY
>>
>> As I said off-list, I would split this into defconfig for arm-soc
>> and drivers/usb for usb subsystem to pick up.
> 
> I agree with splitting.  But carry my ACKs when you do.

I will.  And I'll send two series, one for defconfig and
one for the drivers.  Thanks.

-Alex



Re: [PATCH 0/3] arm64: remove some unused defconfig options

2017-10-25 Thread Alex Elder
On 10/25/2017 02:02 AM, Andy Gross wrote:
> On Tue, Oct 24, 2017 at 05:20:20PM -0700, Stephen Boyd wrote:
>> On 10/20, Alex Elder wrote:
>>> This series deletes three config options related to USB on Qualcomm
>>> SoCs from the arm64 "defconfig", along with the code that they
>>> enable.  The code is no longer needed by any Qualcomm hardware.  
>>>
>>> -Alex
>>>
>>> Alex Elder (3):
>>>   arm64: defconfig: remove CONFIG_USB_EHCI_MSM
>>>   arm64: defconfig: remove CONFIG_USB_MSM_OTG
>>>   arm64: defconfig: remove CONFIG_USB_QCOM_8X16_PHY
>>
>> As I said off-list, I would split this into defconfig for arm-soc
>> and drivers/usb for usb subsystem to pick up.
> 
> I agree with splitting.  But carry my ACKs when you do.

I will.  And I'll send two series, one for defconfig and
one for the drivers.  Thanks.

-Alex



[PATCH v6 0/6] Add MediaTek PMIC keys support

2017-10-25 Thread Chen Zhong
MediaTek PMIC are multi-function devices that can handle key interrupts, 
typically there are two keys attached to PMIC, which called pwrkey and homekey. 
PWRKEY usually used to wake up system from sleep. Homekey can used as volume 
down key due to board design. Long press keys can shutdown PMIC, the mode can 
be choose to be one key only or two keys together.
This series add support for key functions for MediaTek PMIC MT6397/MT6323.

Changes since v5:
- use __maybe_unused annotation instead of #ifdef guard
- use of_* API instead of device_* API

Changes since v4:
- rebase to Linux 4.14-rc1
- add a common keyboard binding document
- use child device tree node to define each key

Changes since v3:
- make the naming to be consistent as mtk_pmic or MTK_PMIC
- add suspend/resume functions to enable/disable irq
- change binding properties to define wakeup sources

Changes since v2:
- use standard properties for keycodes and debounce time
- change to use platform_get_irq in leaf drivers
- use better ways to define IRQ resources

Changes since v1:
- create irq mappings in mfd core driver instead of leaf drivers
- remove some unused parts in mtk-pmic-keys driver

Chen Zhong (6):
  mfd: mt6397: create irq mappings in mfd core driver
  dt-bindings: input: Add common keyboard document bindings
  dt-bindings: input: Add document bindings for mtk-pmic-keys
  dt-bindings: mfd: Add bindings for the keys as subnode of PMIC
  input: Add MediaTek PMIC keys support
  mfd: mt6397: Add PMIC keys support to MT6397 driver

 Documentation/devicetree/bindings/input/keys.txt   |   8 +
 .../devicetree/bindings/input/mtk-pmic-keys.txt|  43 +++
 Documentation/devicetree/bindings/mfd/mt6397.txt   |   6 +
 drivers/input/keyboard/Kconfig |   9 +
 drivers/input/keyboard/Makefile|   1 +
 drivers/input/keyboard/mtk-pmic-keys.c | 339 +
 drivers/mfd/mt6397-core.c  |  26 +-
 drivers/rtc/rtc-mt6397.c   |   7 +-
 8 files changed, 432 insertions(+), 7 deletions(-)
 create mode 100644 Documentation/devicetree/bindings/input/keys.txt
 create mode 100644 Documentation/devicetree/bindings/input/mtk-pmic-keys.txt
 create mode 100644 drivers/input/keyboard/mtk-pmic-keys.c

-- 
1.9.1




[PATCH v6 0/6] Add MediaTek PMIC keys support

2017-10-25 Thread Chen Zhong
MediaTek PMIC are multi-function devices that can handle key interrupts, 
typically there are two keys attached to PMIC, which called pwrkey and homekey. 
PWRKEY usually used to wake up system from sleep. Homekey can used as volume 
down key due to board design. Long press keys can shutdown PMIC, the mode can 
be choose to be one key only or two keys together.
This series add support for key functions for MediaTek PMIC MT6397/MT6323.

Changes since v5:
- use __maybe_unused annotation instead of #ifdef guard
- use of_* API instead of device_* API

Changes since v4:
- rebase to Linux 4.14-rc1
- add a common keyboard binding document
- use child device tree node to define each key

Changes since v3:
- make the naming to be consistent as mtk_pmic or MTK_PMIC
- add suspend/resume functions to enable/disable irq
- change binding properties to define wakeup sources

Changes since v2:
- use standard properties for keycodes and debounce time
- change to use platform_get_irq in leaf drivers
- use better ways to define IRQ resources

Changes since v1:
- create irq mappings in mfd core driver instead of leaf drivers
- remove some unused parts in mtk-pmic-keys driver

Chen Zhong (6):
  mfd: mt6397: create irq mappings in mfd core driver
  dt-bindings: input: Add common keyboard document bindings
  dt-bindings: input: Add document bindings for mtk-pmic-keys
  dt-bindings: mfd: Add bindings for the keys as subnode of PMIC
  input: Add MediaTek PMIC keys support
  mfd: mt6397: Add PMIC keys support to MT6397 driver

 Documentation/devicetree/bindings/input/keys.txt   |   8 +
 .../devicetree/bindings/input/mtk-pmic-keys.txt|  43 +++
 Documentation/devicetree/bindings/mfd/mt6397.txt   |   6 +
 drivers/input/keyboard/Kconfig |   9 +
 drivers/input/keyboard/Makefile|   1 +
 drivers/input/keyboard/mtk-pmic-keys.c | 339 +
 drivers/mfd/mt6397-core.c  |  26 +-
 drivers/rtc/rtc-mt6397.c   |   7 +-
 8 files changed, 432 insertions(+), 7 deletions(-)
 create mode 100644 Documentation/devicetree/bindings/input/keys.txt
 create mode 100644 Documentation/devicetree/bindings/input/mtk-pmic-keys.txt
 create mode 100644 drivers/input/keyboard/mtk-pmic-keys.c

-- 
1.9.1




Re: alpha boot hang - 4.14-rc* regression

2017-10-25 Thread Lorenzo Pieralisi
On Wed, Oct 25, 2017 at 05:49:54PM +0300, Meelis Roos wrote:
> > > > > removing libata modules and rebooting fixes it - so it seems to be 
> > > > > loading of libata.
> > > > 
> > > > Can you please cherry-pick:
> > > > 
> > > > commit b1f9e5e355e9 ("ide: fix IRQ assignment for PCI bus order 
> > > > probing")
> > > > 
> > > > from mainline and let us know if that solves the issue ?
> > > 
> > > No, still breaks the same way (b1f9e5e355e9 patched on top of 
> > > 0e4c2eeb758a).
> > > 
> > > 4.14.0-rc5-00095-g1c9fec470b81 was also still broken the same way (tried 
> > > on Sunday).
> > 
> > I am not sure I patched the right sys file but if I did, does the patch
> > below help ?
> > 
> > I think that at sata driver binding time the kernel finds a freed
> > pointer in the host bridge map_irq() hook and that's where things
> > go wrong.
> > 
> > Please let me know if that's the right sys file, it is a mechanical
> > change and making it for other sys file should be reasonably simple.
> > 
> > Lorenzo
> > 
> > -- >8 --
> > diff --git a/arch/alpha/kernel/sys_dp264.c b/arch/alpha/kernel/sys_dp264.c
> 
> "Booting GENERIC on Tsunami variation Webbrick using machine vector 
> Webbrick from SRM"
> 
> Seems to be the correct file - tsunami is referenced from this file and 
> the IRQ-s are DP264.
> 
> But the patch does not make a difference :(

It is probably because I patched the wrong map_irq() function,
I am trying to detect which one you are _actually_ using, if
the patch below fails I will patch them all (which is what I
have to do anyway).

Please give this a go - this _has_ to make a difference, it is not
correct to leave map_irq() pointers as __init memory, IRQ routing
for modules can't work.

-- >8 --
diff --git a/arch/alpha/kernel/sys_dp264.c b/arch/alpha/kernel/sys_dp264.c
index 6c35159..62fd7f1 100644
--- a/arch/alpha/kernel/sys_dp264.c
+++ b/arch/alpha/kernel/sys_dp264.c
@@ -356,7 +356,7 @@ clipper_init_irq(void)
  *  10  64 bit PCI option slot 3 (not bus 0)
  */
 
-static int __init
+static int
 isa_irq_fixup(const struct pci_dev *dev, int irq)
 {
u8 irq8;
@@ -372,10 +372,10 @@ isa_irq_fixup(const struct pci_dev *dev, int irq)
return irq8 & 0xf;
 }
 
-static int __init
+static int
 dp264_map_irq(const struct pci_dev *dev, u8 slot, u8 pin)
 {
-   static char irq_tab[6][5] __initdata = {
+   static char irq_tab[6][5] = {
/*INTINTA   INTB   INTC   INTD */
{-1,-1,-1,-1,-1}, /* IdSel 5 ISA Bridge */
{ 16+ 3, 16+ 3, 16+ 2, 16+ 2, 16+ 2}, /* IdSel 6 SCSI builtin*/
@@ -456,10 +456,10 @@ monet_swizzle(struct pci_dev *dev, u8 *pinp)
return slot;
 }
 
-static int __init
+static int
 webbrick_map_irq(const struct pci_dev *dev, u8 slot, u8 pin)
 {
-   static char irq_tab[13][5] __initdata = {
+   static char irq_tab[13][5] = {
/*INTINTA   INTB   INTC   INTD */
{-1,-1,-1,-1,-1}, /* IdSel 7 ISA Bridge */
{-1,-1,-1,-1,-1}, /* IdSel 8 unused */




Re: alpha boot hang - 4.14-rc* regression

2017-10-25 Thread Lorenzo Pieralisi
On Wed, Oct 25, 2017 at 05:49:54PM +0300, Meelis Roos wrote:
> > > > > removing libata modules and rebooting fixes it - so it seems to be 
> > > > > loading of libata.
> > > > 
> > > > Can you please cherry-pick:
> > > > 
> > > > commit b1f9e5e355e9 ("ide: fix IRQ assignment for PCI bus order 
> > > > probing")
> > > > 
> > > > from mainline and let us know if that solves the issue ?
> > > 
> > > No, still breaks the same way (b1f9e5e355e9 patched on top of 
> > > 0e4c2eeb758a).
> > > 
> > > 4.14.0-rc5-00095-g1c9fec470b81 was also still broken the same way (tried 
> > > on Sunday).
> > 
> > I am not sure I patched the right sys file but if I did, does the patch
> > below help ?
> > 
> > I think that at sata driver binding time the kernel finds a freed
> > pointer in the host bridge map_irq() hook and that's where things
> > go wrong.
> > 
> > Please let me know if that's the right sys file, it is a mechanical
> > change and making it for other sys file should be reasonably simple.
> > 
> > Lorenzo
> > 
> > -- >8 --
> > diff --git a/arch/alpha/kernel/sys_dp264.c b/arch/alpha/kernel/sys_dp264.c
> 
> "Booting GENERIC on Tsunami variation Webbrick using machine vector 
> Webbrick from SRM"
> 
> Seems to be the correct file - tsunami is referenced from this file and 
> the IRQ-s are DP264.
> 
> But the patch does not make a difference :(

It is probably because I patched the wrong map_irq() function,
I am trying to detect which one you are _actually_ using, if
the patch below fails I will patch them all (which is what I
have to do anyway).

Please give this a go - this _has_ to make a difference, it is not
correct to leave map_irq() pointers as __init memory, IRQ routing
for modules can't work.

-- >8 --
diff --git a/arch/alpha/kernel/sys_dp264.c b/arch/alpha/kernel/sys_dp264.c
index 6c35159..62fd7f1 100644
--- a/arch/alpha/kernel/sys_dp264.c
+++ b/arch/alpha/kernel/sys_dp264.c
@@ -356,7 +356,7 @@ clipper_init_irq(void)
  *  10  64 bit PCI option slot 3 (not bus 0)
  */
 
-static int __init
+static int
 isa_irq_fixup(const struct pci_dev *dev, int irq)
 {
u8 irq8;
@@ -372,10 +372,10 @@ isa_irq_fixup(const struct pci_dev *dev, int irq)
return irq8 & 0xf;
 }
 
-static int __init
+static int
 dp264_map_irq(const struct pci_dev *dev, u8 slot, u8 pin)
 {
-   static char irq_tab[6][5] __initdata = {
+   static char irq_tab[6][5] = {
/*INTINTA   INTB   INTC   INTD */
{-1,-1,-1,-1,-1}, /* IdSel 5 ISA Bridge */
{ 16+ 3, 16+ 3, 16+ 2, 16+ 2, 16+ 2}, /* IdSel 6 SCSI builtin*/
@@ -456,10 +456,10 @@ monet_swizzle(struct pci_dev *dev, u8 *pinp)
return slot;
 }
 
-static int __init
+static int
 webbrick_map_irq(const struct pci_dev *dev, u8 slot, u8 pin)
 {
-   static char irq_tab[13][5] __initdata = {
+   static char irq_tab[13][5] = {
/*INTINTA   INTB   INTC   INTD */
{-1,-1,-1,-1,-1}, /* IdSel 7 ISA Bridge */
{-1,-1,-1,-1,-1}, /* IdSel 8 unused */




Re: [PATCH v3 1/1] xen/time: do not decrease steal time after live migration on xen

2017-10-25 Thread Boris Ostrovsky
On 10/25/2017 02:45 AM, Dongli Zhang wrote:
> After guest live migration on xen, steal time in /proc/stat
> (cpustat[CPUTIME_STEAL]) might decrease because steal returned by
> xen_steal_lock() might be less than this_rq()->prev_steal_time which is
> derived from previous return value of xen_steal_clock().
>
> For instance, steal time of each vcpu is 335 before live migration.
>
> cpu  198 0 368 200064 1962 0 0 1340 0 0
> cpu0 38 0 81 50063 492 0 0 335 0 0
> cpu1 65 0 97 49763 634 0 0 335 0 0
> cpu2 38 0 81 50098 462 0 0 335 0 0
> cpu3 56 0 107 50138 374 0 0 335 0 0
>
> After live migration, steal time is reduced to 312.
>
> cpu  200 0 370 200330 1971 0 0 1248 0 0
> cpu0 38 0 82 50123 500 0 0 312 0 0
> cpu1 65 0 97 49832 634 0 0 312 0 0
> cpu2 39 0 82 50167 462 0 0 312 0 0
> cpu3 56 0 107 50207 374 0 0 312 0 0
>
> Since runstate times are cumulative and cleared during xen live migration
> by xen hypervisor, the idea of this patch is to accumulate runstate times
> to global percpu variables before live migration suspend. Once guest VM is
> resumed, xen_get_runstate_snapshot_cpu() would always return the sum of new
> runstate times and previously accumulated times stored in global percpu
> variables.
>
> Similar and more severe issue would impact prior linux 4.8-4.10 as
> discussed by Michael Las at
> https://0xstubs.org/debugging-a-flaky-cpu-steal-time-counter-on-a-paravirtualized-xen-guest,
> which would overflow steal time and lead to 100% st usage in top command
> for linux 4.8-4.10. A backport of this patch would fix that issue.
>
> References: 
> https://0xstubs.org/debugging-a-flaky-cpu-steal-time-counter-on-a-paravirtualized-xen-guest
> Signed-off-by: Dongli Zhang 
>
> ---
> Changed since v1:
>   * relocate modification to xen_get_runstate_snapshot_cpu
>
> Changed since v2:
>   * accumulate runstate times before live migration
>
> ---
>  drivers/xen/manage.c  |  1 +
>  drivers/xen/time.c| 19 +++
>  include/xen/xen-ops.h |  1 +
>  3 files changed, 21 insertions(+)
>
> diff --git a/drivers/xen/manage.c b/drivers/xen/manage.c
> index c425d03..9aa2955 100644
> --- a/drivers/xen/manage.c
> +++ b/drivers/xen/manage.c
> @@ -72,6 +72,7 @@ static int xen_suspend(void *data)
>   }
>  
>   gnttab_suspend();
> + xen_accumulate_runstate_time();
>   xen_arch_pre_suspend();
>  
>   /*
> diff --git a/drivers/xen/time.c b/drivers/xen/time.c
> index ac5f23f..6df3f82 100644
> --- a/drivers/xen/time.c
> +++ b/drivers/xen/time.c
> @@ -19,6 +19,8 @@
>  /* runstate info updated by Xen */
>  static DEFINE_PER_CPU(struct vcpu_runstate_info, xen_runstate);
>  
> +static DEFINE_PER_CPU(u64[4], old_runstate_time);
> +
>  /* return an consistent snapshot of 64-bit time/counter value */
>  static u64 get64(const u64 *p)
>  {
> @@ -52,6 +54,7 @@ static void xen_get_runstate_snapshot_cpu(struct 
> vcpu_runstate_info *res,
>  {
>   u64 state_time;
>   struct vcpu_runstate_info *state;
> + int i;
>  
>   BUG_ON(preemptible());
>  
> @@ -64,6 +67,22 @@ static void xen_get_runstate_snapshot_cpu(struct 
> vcpu_runstate_info *res,
>   rmb();  /* Hypervisor might update data. */
>   } while (get64(>state_entry_time) != state_time ||
>(state_time & XEN_RUNSTATE_UPDATE));
> +
> + for (i = 0; i < 4; i++)
> + res->time[i] += per_cpu(old_runstate_time, cpu)[i];
> +}
> +
> +void xen_accumulate_runstate_time(void)
> +{
> + struct vcpu_runstate_info state;
> + int cpu;
> +
> + for_each_possible_cpu(cpu) {
> + xen_get_runstate_snapshot_cpu(, cpu);
> + memcpy(per_cpu(old_runstate_time, cpu),
> + state.time,
> + 4 * sizeof(u64));

sizeof(old_runstate_time). (I think this should work for per_cpu variables)

> + }

Hmm.. This may not perform as intended if we are merely checkpointing
(or pausing) the guest (i.e. if HYPERVISOR_suspend() returns 1). We will
double-account for the last interval that the guest has run.

I'd rather not have yet another per-cpu variable but I can't think of
anything else. Perhaps you or others can come up with something better.

-boris

>  }
>  
>  /*
> diff --git a/include/xen/xen-ops.h b/include/xen/xen-ops.h
> index 218e6aa..5680059 100644
> --- a/include/xen/xen-ops.h
> +++ b/include/xen/xen-ops.h
> @@ -32,6 +32,7 @@ void xen_resume_notifier_unregister(struct notifier_block 
> *nb);
>  bool xen_vcpu_stolen(int vcpu);
>  void xen_setup_runstate_info(int cpu);
>  void xen_time_setup_guest(void);
> +void xen_accumulate_runstate_time(void);
>  void xen_get_runstate_snapshot(struct vcpu_runstate_info *res);
>  u64 xen_steal_clock(int cpu);
>  



Re: [PATCH v3 1/1] xen/time: do not decrease steal time after live migration on xen

2017-10-25 Thread Boris Ostrovsky
On 10/25/2017 02:45 AM, Dongli Zhang wrote:
> After guest live migration on xen, steal time in /proc/stat
> (cpustat[CPUTIME_STEAL]) might decrease because steal returned by
> xen_steal_lock() might be less than this_rq()->prev_steal_time which is
> derived from previous return value of xen_steal_clock().
>
> For instance, steal time of each vcpu is 335 before live migration.
>
> cpu  198 0 368 200064 1962 0 0 1340 0 0
> cpu0 38 0 81 50063 492 0 0 335 0 0
> cpu1 65 0 97 49763 634 0 0 335 0 0
> cpu2 38 0 81 50098 462 0 0 335 0 0
> cpu3 56 0 107 50138 374 0 0 335 0 0
>
> After live migration, steal time is reduced to 312.
>
> cpu  200 0 370 200330 1971 0 0 1248 0 0
> cpu0 38 0 82 50123 500 0 0 312 0 0
> cpu1 65 0 97 49832 634 0 0 312 0 0
> cpu2 39 0 82 50167 462 0 0 312 0 0
> cpu3 56 0 107 50207 374 0 0 312 0 0
>
> Since runstate times are cumulative and cleared during xen live migration
> by xen hypervisor, the idea of this patch is to accumulate runstate times
> to global percpu variables before live migration suspend. Once guest VM is
> resumed, xen_get_runstate_snapshot_cpu() would always return the sum of new
> runstate times and previously accumulated times stored in global percpu
> variables.
>
> Similar and more severe issue would impact prior linux 4.8-4.10 as
> discussed by Michael Las at
> https://0xstubs.org/debugging-a-flaky-cpu-steal-time-counter-on-a-paravirtualized-xen-guest,
> which would overflow steal time and lead to 100% st usage in top command
> for linux 4.8-4.10. A backport of this patch would fix that issue.
>
> References: 
> https://0xstubs.org/debugging-a-flaky-cpu-steal-time-counter-on-a-paravirtualized-xen-guest
> Signed-off-by: Dongli Zhang 
>
> ---
> Changed since v1:
>   * relocate modification to xen_get_runstate_snapshot_cpu
>
> Changed since v2:
>   * accumulate runstate times before live migration
>
> ---
>  drivers/xen/manage.c  |  1 +
>  drivers/xen/time.c| 19 +++
>  include/xen/xen-ops.h |  1 +
>  3 files changed, 21 insertions(+)
>
> diff --git a/drivers/xen/manage.c b/drivers/xen/manage.c
> index c425d03..9aa2955 100644
> --- a/drivers/xen/manage.c
> +++ b/drivers/xen/manage.c
> @@ -72,6 +72,7 @@ static int xen_suspend(void *data)
>   }
>  
>   gnttab_suspend();
> + xen_accumulate_runstate_time();
>   xen_arch_pre_suspend();
>  
>   /*
> diff --git a/drivers/xen/time.c b/drivers/xen/time.c
> index ac5f23f..6df3f82 100644
> --- a/drivers/xen/time.c
> +++ b/drivers/xen/time.c
> @@ -19,6 +19,8 @@
>  /* runstate info updated by Xen */
>  static DEFINE_PER_CPU(struct vcpu_runstate_info, xen_runstate);
>  
> +static DEFINE_PER_CPU(u64[4], old_runstate_time);
> +
>  /* return an consistent snapshot of 64-bit time/counter value */
>  static u64 get64(const u64 *p)
>  {
> @@ -52,6 +54,7 @@ static void xen_get_runstate_snapshot_cpu(struct 
> vcpu_runstate_info *res,
>  {
>   u64 state_time;
>   struct vcpu_runstate_info *state;
> + int i;
>  
>   BUG_ON(preemptible());
>  
> @@ -64,6 +67,22 @@ static void xen_get_runstate_snapshot_cpu(struct 
> vcpu_runstate_info *res,
>   rmb();  /* Hypervisor might update data. */
>   } while (get64(>state_entry_time) != state_time ||
>(state_time & XEN_RUNSTATE_UPDATE));
> +
> + for (i = 0; i < 4; i++)
> + res->time[i] += per_cpu(old_runstate_time, cpu)[i];
> +}
> +
> +void xen_accumulate_runstate_time(void)
> +{
> + struct vcpu_runstate_info state;
> + int cpu;
> +
> + for_each_possible_cpu(cpu) {
> + xen_get_runstate_snapshot_cpu(, cpu);
> + memcpy(per_cpu(old_runstate_time, cpu),
> + state.time,
> + 4 * sizeof(u64));

sizeof(old_runstate_time). (I think this should work for per_cpu variables)

> + }

Hmm.. This may not perform as intended if we are merely checkpointing
(or pausing) the guest (i.e. if HYPERVISOR_suspend() returns 1). We will
double-account for the last interval that the guest has run.

I'd rather not have yet another per-cpu variable but I can't think of
anything else. Perhaps you or others can come up with something better.

-boris

>  }
>  
>  /*
> diff --git a/include/xen/xen-ops.h b/include/xen/xen-ops.h
> index 218e6aa..5680059 100644
> --- a/include/xen/xen-ops.h
> +++ b/include/xen/xen-ops.h
> @@ -32,6 +32,7 @@ void xen_resume_notifier_unregister(struct notifier_block 
> *nb);
>  bool xen_vcpu_stolen(int vcpu);
>  void xen_setup_runstate_info(int cpu);
>  void xen_time_setup_guest(void);
> +void xen_accumulate_runstate_time(void);
>  void xen_get_runstate_snapshot(struct vcpu_runstate_info *res);
>  u64 xen_steal_clock(int cpu);
>  



[PATCH] xen/gntdev: avoid out of bounds access in case of partial gntdev_mmap()

2017-10-25 Thread Juergen Gross
In case gntdev_mmap() succeeds only partially in mapping grant pages
it will leave some vital information uninitialized needed later for
cleanup. This will lead to an out of bounds array access when unmapping
the already mapped pages.

So just initialize the data needed for unmapping the pages a little bit
earlier.

Cc: 
Reported-by: Arthur Borsboom 
Signed-off-by: Juergen Gross 
---
 drivers/xen/gntdev.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/drivers/xen/gntdev.c b/drivers/xen/gntdev.c
index 82360594fa8e..57efbd3b053b 100644
--- a/drivers/xen/gntdev.c
+++ b/drivers/xen/gntdev.c
@@ -1024,6 +1024,7 @@ static int gntdev_mmap(struct file *flip, struct 
vm_area_struct *vma)
mutex_unlock(>lock);
 
if (use_ptemod) {
+   map->pages_vm_start = vma->vm_start;
err = apply_to_page_range(vma->vm_mm, vma->vm_start,
  vma->vm_end - vma->vm_start,
  find_grant_ptes, map);
@@ -1061,7 +1062,6 @@ static int gntdev_mmap(struct file *flip, struct 
vm_area_struct *vma)
set_grant_ptes_as_special, NULL);
}
 #endif
-   map->pages_vm_start = vma->vm_start;
}
 
return 0;
-- 
2.12.3



[PATCH] xen/gntdev: avoid out of bounds access in case of partial gntdev_mmap()

2017-10-25 Thread Juergen Gross
In case gntdev_mmap() succeeds only partially in mapping grant pages
it will leave some vital information uninitialized needed later for
cleanup. This will lead to an out of bounds array access when unmapping
the already mapped pages.

So just initialize the data needed for unmapping the pages a little bit
earlier.

Cc: 
Reported-by: Arthur Borsboom 
Signed-off-by: Juergen Gross 
---
 drivers/xen/gntdev.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/drivers/xen/gntdev.c b/drivers/xen/gntdev.c
index 82360594fa8e..57efbd3b053b 100644
--- a/drivers/xen/gntdev.c
+++ b/drivers/xen/gntdev.c
@@ -1024,6 +1024,7 @@ static int gntdev_mmap(struct file *flip, struct 
vm_area_struct *vma)
mutex_unlock(>lock);
 
if (use_ptemod) {
+   map->pages_vm_start = vma->vm_start;
err = apply_to_page_range(vma->vm_mm, vma->vm_start,
  vma->vm_end - vma->vm_start,
  find_grant_ptes, map);
@@ -1061,7 +1062,6 @@ static int gntdev_mmap(struct file *flip, struct 
vm_area_struct *vma)
set_grant_ptes_as_special, NULL);
}
 #endif
-   map->pages_vm_start = vma->vm_start;
}
 
return 0;
-- 
2.12.3



<    2   3   4   5   6   7   8   9   10   11   >