Re: [PATCH v8 6/6] powerpc/fadump: use the new parse_args callback arguments

2017-09-29 Thread Hari Bathini

In case, someone wishes for a changelog:


With fadump_rework_cmdline_params() function, parse_args() callback 
function,


taking new arguments - current & next, use them to process 
'fadump_extra_args='


parmeter, in enforcing the parameters passed through it for fadump kernel.


On Tuesday 12 September 2017 09:31 PM, Michal Suchanek wrote:

Signed-off-by: Michal Suchanek 
---
  arch/powerpc/kernel/fadump.c | 47 
  1 file changed, 13 insertions(+), 34 deletions(-)

diff --git a/arch/powerpc/kernel/fadump.c b/arch/powerpc/kernel/fadump.c
index 8778e1cc0380..1678d99ea835 100644
--- a/arch/powerpc/kernel/fadump.c
+++ b/arch/powerpc/kernel/fadump.c
@@ -481,33 +481,19 @@ struct param_info {
  };

  static void __init fadump_update_params(struct param_info *param_info,
-   char *param, char *val)
+   char *param, char *val,
+   char *currant, char *next)
  {
-   ptrdiff_t param_offset = param - param_info->tmp_cmdline;
+   ptrdiff_t param_offset = currant - param_info->tmp_cmdline;
size_t vallen = val ? strlen(val) : 0;
char *tgt = param_info->cmdline + param_offset
- param_info->shortening;
-   int shortening = 0;
-   int quoted = 0;
+   int shortening = ((next - 1) - (currant))
+   - (FADUMP_EXTRA_ARGS_LEN + 1 + vallen);

if (!val)
return;

-   /* leading '"' removed from parameter */
-   if ((param > param_info->tmp_cmdline) && *(param - 1) == '"') {
-   quoted = 1;
-   shortening += 1;
-   tgt--;
-   }
-
-   /* next_arg removes one leading and one trailing '"' */
-   if ((*(tgt + FADUMP_EXTRA_ARGS_LEN + 1 + vallen + shortening) == '"') &&
-   (quoted || (*(tgt + FADUMP_EXTRA_ARGS_LEN + 1) == '"'))) {
-   shortening += 1;
-   if (!quoted)
-   shortening += 1;
-   }
-
/* remove one leading and one trailing quote if both are present */
if ((val[0] == '"') && (val[vallen - 1] == '"')) {
shortening += 2;
@@ -515,22 +501,15 @@ static void __init fadump_update_params(struct param_info 
*param_info,
val++;
}

-   /* some characters were removed - move the trailing part of cmdline */
-   if (shortening) {
-   char *src;
+   strncpy(tgt, FADUMP_EXTRA_ARGS_PARAM, FADUMP_EXTRA_ARGS_LEN);
+   tgt += FADUMP_EXTRA_ARGS_LEN;
+   *tgt++ = ' ';
+   strncpy(tgt, val, vallen);
+   tgt += vallen;

-   strncpy(tgt, FADUMP_EXTRA_ARGS_PARAM, FADUMP_EXTRA_ARGS_LEN);
-   tgt += FADUMP_EXTRA_ARGS_LEN;
-   *tgt++ = ' ';
-
-   strncpy(tgt, val, vallen);
-   tgt += vallen;
-
-   src = tgt + shortening;
+   if (shortening) {
+   char *src = tgt + shortening;
memmove(tgt, src, strlen(src) + 1);
-   } else {
-   /* remove the '=' */
-   *(tgt + FADUMP_EXTRA_ARGS_LEN) = ' ';
}

param_info->shortening += shortening;
@@ -550,7 +529,7 @@ static int __init fadump_rework_cmdline_params(char *param, 
char *val,
 strlen(FADUMP_EXTRA_ARGS_PARAM) - 1))
return 0;

-   fadump_update_params(param_info, param, val);
+   fadump_update_params(param_info, param, val, currant, next);

return 0;
  }



Thanks
Hari



Re: [PATCH v8 6/6] powerpc/fadump: use the new parse_args callback arguments

2017-09-29 Thread Hari Bathini

In case, someone wishes for a changelog:


With fadump_rework_cmdline_params() function, parse_args() callback 
function,


taking new arguments - current & next, use them to process 
'fadump_extra_args='


parmeter, in enforcing the parameters passed through it for fadump kernel.


On Tuesday 12 September 2017 09:31 PM, Michal Suchanek wrote:

Signed-off-by: Michal Suchanek 
---
  arch/powerpc/kernel/fadump.c | 47 
  1 file changed, 13 insertions(+), 34 deletions(-)

diff --git a/arch/powerpc/kernel/fadump.c b/arch/powerpc/kernel/fadump.c
index 8778e1cc0380..1678d99ea835 100644
--- a/arch/powerpc/kernel/fadump.c
+++ b/arch/powerpc/kernel/fadump.c
@@ -481,33 +481,19 @@ struct param_info {
  };

  static void __init fadump_update_params(struct param_info *param_info,
-   char *param, char *val)
+   char *param, char *val,
+   char *currant, char *next)
  {
-   ptrdiff_t param_offset = param - param_info->tmp_cmdline;
+   ptrdiff_t param_offset = currant - param_info->tmp_cmdline;
size_t vallen = val ? strlen(val) : 0;
char *tgt = param_info->cmdline + param_offset
- param_info->shortening;
-   int shortening = 0;
-   int quoted = 0;
+   int shortening = ((next - 1) - (currant))
+   - (FADUMP_EXTRA_ARGS_LEN + 1 + vallen);

if (!val)
return;

-   /* leading '"' removed from parameter */
-   if ((param > param_info->tmp_cmdline) && *(param - 1) == '"') {
-   quoted = 1;
-   shortening += 1;
-   tgt--;
-   }
-
-   /* next_arg removes one leading and one trailing '"' */
-   if ((*(tgt + FADUMP_EXTRA_ARGS_LEN + 1 + vallen + shortening) == '"') &&
-   (quoted || (*(tgt + FADUMP_EXTRA_ARGS_LEN + 1) == '"'))) {
-   shortening += 1;
-   if (!quoted)
-   shortening += 1;
-   }
-
/* remove one leading and one trailing quote if both are present */
if ((val[0] == '"') && (val[vallen - 1] == '"')) {
shortening += 2;
@@ -515,22 +501,15 @@ static void __init fadump_update_params(struct param_info 
*param_info,
val++;
}

-   /* some characters were removed - move the trailing part of cmdline */
-   if (shortening) {
-   char *src;
+   strncpy(tgt, FADUMP_EXTRA_ARGS_PARAM, FADUMP_EXTRA_ARGS_LEN);
+   tgt += FADUMP_EXTRA_ARGS_LEN;
+   *tgt++ = ' ';
+   strncpy(tgt, val, vallen);
+   tgt += vallen;

-   strncpy(tgt, FADUMP_EXTRA_ARGS_PARAM, FADUMP_EXTRA_ARGS_LEN);
-   tgt += FADUMP_EXTRA_ARGS_LEN;
-   *tgt++ = ' ';
-
-   strncpy(tgt, val, vallen);
-   tgt += vallen;
-
-   src = tgt + shortening;
+   if (shortening) {
+   char *src = tgt + shortening;
memmove(tgt, src, strlen(src) + 1);
-   } else {
-   /* remove the '=' */
-   *(tgt + FADUMP_EXTRA_ARGS_LEN) = ' ';
}

param_info->shortening += shortening;
@@ -550,7 +529,7 @@ static int __init fadump_rework_cmdline_params(char *param, 
char *val,
 strlen(FADUMP_EXTRA_ARGS_PARAM) - 1))
return 0;

-   fadump_update_params(param_info, param, val);
+   fadump_update_params(param_info, param, val, currant, next);

return 0;
  }



Thanks
Hari



Re: [PATCH -v2 03/18] sched/fair: Cure calc_cfs_shares() vs reweight_entity()

2017-09-29 Thread Morten Rasmussen
On Fri, Sep 29, 2017 at 01:38:53PM +0200, Peter Zijlstra wrote:
> On Fri, Sep 29, 2017 at 10:04:34AM +0100, Morten Rasmussen wrote:
> 
> > > - load = scale_load_down(cfs_rq->load.weight);
> > > + load = max(scale_load_down(cfs_rq->load.weight), cfs_rq->avg.load_avg);
> > 
> > We use cfs_rq->tg_load_avg_contrib (the filtered version of
> > cfs_rq->avg.load_avg) instead of cfs_rq->avg.load_avg further down, so I
> > think we should here too for consistency.
> > 
> > +   load = max(scale_load_down(cfs_rq->load.weight),
> > +  cfs_rq->tg_load_avg_contrib);
> > 
> 
> No; we must use tg_load_avg_contrib because that is what's inclded in
> tg_weight, but we want to add the most up-to-date value back in.

Agreed. Looking at it again it make sense.



Re: [PATCH -v2 03/18] sched/fair: Cure calc_cfs_shares() vs reweight_entity()

2017-09-29 Thread Morten Rasmussen
On Fri, Sep 29, 2017 at 01:38:53PM +0200, Peter Zijlstra wrote:
> On Fri, Sep 29, 2017 at 10:04:34AM +0100, Morten Rasmussen wrote:
> 
> > > - load = scale_load_down(cfs_rq->load.weight);
> > > + load = max(scale_load_down(cfs_rq->load.weight), cfs_rq->avg.load_avg);
> > 
> > We use cfs_rq->tg_load_avg_contrib (the filtered version of
> > cfs_rq->avg.load_avg) instead of cfs_rq->avg.load_avg further down, so I
> > think we should here too for consistency.
> > 
> > +   load = max(scale_load_down(cfs_rq->load.weight),
> > +  cfs_rq->tg_load_avg_contrib);
> > 
> 
> No; we must use tg_load_avg_contrib because that is what's inclded in
> tg_weight, but we want to add the most up-to-date value back in.

Agreed. Looking at it again it make sense.



Re: [PATCH 1/8] sched: Consistent task-state printing

2017-09-29 Thread Steven Rostedt
On Fri, 29 Sep 2017 13:50:16 +0200
Peter Zijlstra  wrote:

> On Mon, Sep 25, 2017 at 04:01:09PM -0400, Steven Rostedt wrote:
> > On Mon, 25 Sep 2017 14:07:48 +0200
> > Peter Zijlstra  wrote:
> >   
> > > +static inline char __task_state_to_char(unsigned int state)
> > > +{
> > > + static const char state_char[] = "RSDTtXZ";
> > > +
> > > + BUILD_BUG_ON(1 + ilog2(TASK_REPORT) != sizeof(state_char) - 2);
> > >  
> > > - return state < sizeof(stat_nam) - 1 ? stat_nam[state] : '?';
> > > + return state_char[state];
> > > +}
> > > +
> > > +static inline char task_state_to_char(struct task_struct *tsk)
> > > +{
> > > + return __task_state_to_char(__get_task_state(tsk));
> > >  }
> > >
> > 
> > So far I'm fine with the patch set, but I hate the non descriptive "__"
> > prefix of __task_state_to_char(). Can we make this a bit more
> > descriptive, because every time I see it in other patches, I go back to
> > this patch to see if we are using the right function.
> > 
> > What about something like:
> > 
> > task_state_to_state_char(unsigned int state);
> > 
> > task_to_state_char(struct task_struct *tsk);
> > 
> > ?  
> 
> Hurmph.. naming.
> 
> How about something like so?
> 

I'm not picky, I just hate beginning underscores, and wish we can start
removing all functions that use them.

Acked-by: Steven Rostedt (VMware) 

-- Steve


Re: [PATCH 1/8] sched: Consistent task-state printing

2017-09-29 Thread Steven Rostedt
On Fri, 29 Sep 2017 13:50:16 +0200
Peter Zijlstra  wrote:

> On Mon, Sep 25, 2017 at 04:01:09PM -0400, Steven Rostedt wrote:
> > On Mon, 25 Sep 2017 14:07:48 +0200
> > Peter Zijlstra  wrote:
> >   
> > > +static inline char __task_state_to_char(unsigned int state)
> > > +{
> > > + static const char state_char[] = "RSDTtXZ";
> > > +
> > > + BUILD_BUG_ON(1 + ilog2(TASK_REPORT) != sizeof(state_char) - 2);
> > >  
> > > - return state < sizeof(stat_nam) - 1 ? stat_nam[state] : '?';
> > > + return state_char[state];
> > > +}
> > > +
> > > +static inline char task_state_to_char(struct task_struct *tsk)
> > > +{
> > > + return __task_state_to_char(__get_task_state(tsk));
> > >  }
> > >
> > 
> > So far I'm fine with the patch set, but I hate the non descriptive "__"
> > prefix of __task_state_to_char(). Can we make this a bit more
> > descriptive, because every time I see it in other patches, I go back to
> > this patch to see if we are using the right function.
> > 
> > What about something like:
> > 
> > task_state_to_state_char(unsigned int state);
> > 
> > task_to_state_char(struct task_struct *tsk);
> > 
> > ?  
> 
> Hurmph.. naming.
> 
> How about something like so?
> 

I'm not picky, I just hate beginning underscores, and wish we can start
removing all functions that use them.

Acked-by: Steven Rostedt (VMware) 

-- Steve


[PATCH 1/3] drm/bridge: make drm_panel_bridge_remove more robust

2017-09-29 Thread Benjamin Gaignard
Make sure that bridge parameter is not NULL and can be safely
cast into a panel_bridge structure.

Signed-off-by: Benjamin Gaignard 
---
 drivers/gpu/drm/bridge/panel.c | 10 +-
 1 file changed, 9 insertions(+), 1 deletion(-)

diff --git a/drivers/gpu/drm/bridge/panel.c b/drivers/gpu/drm/bridge/panel.c
index e0cca19..6d99d4a 100644
--- a/drivers/gpu/drm/bridge/panel.c
+++ b/drivers/gpu/drm/bridge/panel.c
@@ -188,7 +188,15 @@ EXPORT_SYMBOL(drm_panel_bridge_add);
  */
 void drm_panel_bridge_remove(struct drm_bridge *bridge)
 {
-   struct panel_bridge *panel_bridge = drm_bridge_to_panel_bridge(bridge);
+   struct panel_bridge *panel_bridge;
+
+   if (!bridge)
+   return;
+
+   if (bridge->funcs != _bridge_bridge_funcs)
+   return;
+
+   panel_bridge = drm_bridge_to_panel_bridge(bridge);
 
drm_bridge_remove(bridge);
devm_kfree(panel_bridge->panel->dev, bridge);
-- 
2.7.4



[PATCH 1/3] drm/bridge: make drm_panel_bridge_remove more robust

2017-09-29 Thread Benjamin Gaignard
Make sure that bridge parameter is not NULL and can be safely
cast into a panel_bridge structure.

Signed-off-by: Benjamin Gaignard 
---
 drivers/gpu/drm/bridge/panel.c | 10 +-
 1 file changed, 9 insertions(+), 1 deletion(-)

diff --git a/drivers/gpu/drm/bridge/panel.c b/drivers/gpu/drm/bridge/panel.c
index e0cca19..6d99d4a 100644
--- a/drivers/gpu/drm/bridge/panel.c
+++ b/drivers/gpu/drm/bridge/panel.c
@@ -188,7 +188,15 @@ EXPORT_SYMBOL(drm_panel_bridge_add);
  */
 void drm_panel_bridge_remove(struct drm_bridge *bridge)
 {
-   struct panel_bridge *panel_bridge = drm_bridge_to_panel_bridge(bridge);
+   struct panel_bridge *panel_bridge;
+
+   if (!bridge)
+   return;
+
+   if (bridge->funcs != _bridge_bridge_funcs)
+   return;
+
+   panel_bridge = drm_bridge_to_panel_bridge(bridge);
 
drm_bridge_remove(bridge);
devm_kfree(panel_bridge->panel->dev, bridge);
-- 
2.7.4



[PATCH 2/3] drm/drm_of: add drm_of_panel_bridge_remove function

2017-09-29 Thread Benjamin Gaignard
This function is the pendant of drm_of_find_panel_or_bridge()
to remove a previously allocated panel_bridge.
Given a specific port and endpoint it remove the panel bridge.
Since drm_panel_bridge_remove() will check that bridge parameter
is not NULL and is a real drm_panel_bridge and no a simple bridge
it is safe to call it directly.

Signed-off-by: Benjamin Gaignard 
---
 drivers/gpu/drm/drm_of.c | 33 +
 include/drm/drm_of.h |  8 
 2 files changed, 41 insertions(+)

diff --git a/drivers/gpu/drm/drm_of.c b/drivers/gpu/drm/drm_of.c
index 8dafbdf..6b54f17 100644
--- a/drivers/gpu/drm/drm_of.c
+++ b/drivers/gpu/drm/drm_of.c
@@ -260,3 +260,36 @@ int drm_of_find_panel_or_bridge(const struct device_node 
*np,
return ret;
 }
 EXPORT_SYMBOL_GPL(drm_of_find_panel_or_bridge);
+
+#ifdef CONFIG_DRM_PANEL_BRIDGE
+/*
+ * drm_of_panel_bridge_remove - remove panel bridge
+ * @np: device tree node containing panel bridge output ports
+ *
+ * Remove the panel bridge of a given DT node's port and endpoint number
+ *
+ * Returns zero if successful, or one of the standard error codes if it fails.
+ */
+int drm_of_panel_bridge_remove(const struct device_node *np,
+  int port, int endpoint)
+{
+   struct drm_bridge *bridge;
+   struct device_node *remote;
+
+   remote = of_graph_get_remote_node(np, port, endpoint);
+   if (!remote)
+   return -ENODEV;
+
+   bridge = of_drm_find_bridge(remote);
+   drm_panel_bridge_remove(bridge);
+
+   return 0;
+}
+#else
+int drm_of_panel_bridge_remove(const struct device_node *np,
+  int port, int endpoint)
+{
+   return -EINVAL;
+}
+#endif
+EXPORT_SYMBOL_GPL(drm_of_panel_bridge_remove);
diff --git a/include/drm/drm_of.h b/include/drm/drm_of.h
index 104dd51..390966e 100644
--- a/include/drm/drm_of.h
+++ b/include/drm/drm_of.h
@@ -29,6 +29,8 @@ int drm_of_find_panel_or_bridge(const struct device_node *np,
int port, int endpoint,
struct drm_panel **panel,
struct drm_bridge **bridge);
+int drm_of_panel_bridge_remove(const struct device_node *np,
+  int port, int endpoint);
 #else
 static inline uint32_t drm_of_find_possible_crtcs(struct drm_device *dev,
  struct device_node *port)
@@ -65,6 +67,12 @@ static inline int drm_of_find_panel_or_bridge(const struct 
device_node *np,
 {
return -EINVAL;
 }
+
+static inline int drm_of_panel_bridge_remove(const struct device_node *np,
+int port, int endpoint)
+{
+   return -EINVAL;
+}
 #endif
 
 static inline int drm_of_encoder_active_endpoint_id(struct device_node *node,
-- 
2.7.4



[PATCH 2/3] drm/drm_of: add drm_of_panel_bridge_remove function

2017-09-29 Thread Benjamin Gaignard
This function is the pendant of drm_of_find_panel_or_bridge()
to remove a previously allocated panel_bridge.
Given a specific port and endpoint it remove the panel bridge.
Since drm_panel_bridge_remove() will check that bridge parameter
is not NULL and is a real drm_panel_bridge and no a simple bridge
it is safe to call it directly.

Signed-off-by: Benjamin Gaignard 
---
 drivers/gpu/drm/drm_of.c | 33 +
 include/drm/drm_of.h |  8 
 2 files changed, 41 insertions(+)

diff --git a/drivers/gpu/drm/drm_of.c b/drivers/gpu/drm/drm_of.c
index 8dafbdf..6b54f17 100644
--- a/drivers/gpu/drm/drm_of.c
+++ b/drivers/gpu/drm/drm_of.c
@@ -260,3 +260,36 @@ int drm_of_find_panel_or_bridge(const struct device_node 
*np,
return ret;
 }
 EXPORT_SYMBOL_GPL(drm_of_find_panel_or_bridge);
+
+#ifdef CONFIG_DRM_PANEL_BRIDGE
+/*
+ * drm_of_panel_bridge_remove - remove panel bridge
+ * @np: device tree node containing panel bridge output ports
+ *
+ * Remove the panel bridge of a given DT node's port and endpoint number
+ *
+ * Returns zero if successful, or one of the standard error codes if it fails.
+ */
+int drm_of_panel_bridge_remove(const struct device_node *np,
+  int port, int endpoint)
+{
+   struct drm_bridge *bridge;
+   struct device_node *remote;
+
+   remote = of_graph_get_remote_node(np, port, endpoint);
+   if (!remote)
+   return -ENODEV;
+
+   bridge = of_drm_find_bridge(remote);
+   drm_panel_bridge_remove(bridge);
+
+   return 0;
+}
+#else
+int drm_of_panel_bridge_remove(const struct device_node *np,
+  int port, int endpoint)
+{
+   return -EINVAL;
+}
+#endif
+EXPORT_SYMBOL_GPL(drm_of_panel_bridge_remove);
diff --git a/include/drm/drm_of.h b/include/drm/drm_of.h
index 104dd51..390966e 100644
--- a/include/drm/drm_of.h
+++ b/include/drm/drm_of.h
@@ -29,6 +29,8 @@ int drm_of_find_panel_or_bridge(const struct device_node *np,
int port, int endpoint,
struct drm_panel **panel,
struct drm_bridge **bridge);
+int drm_of_panel_bridge_remove(const struct device_node *np,
+  int port, int endpoint);
 #else
 static inline uint32_t drm_of_find_possible_crtcs(struct drm_device *dev,
  struct device_node *port)
@@ -65,6 +67,12 @@ static inline int drm_of_find_panel_or_bridge(const struct 
device_node *np,
 {
return -EINVAL;
 }
+
+static inline int drm_of_panel_bridge_remove(const struct device_node *np,
+int port, int endpoint)
+{
+   return -EINVAL;
+}
 #endif
 
 static inline int drm_of_encoder_active_endpoint_id(struct device_node *node,
-- 
2.7.4



[PATCH 3/3] drm/stm: ltdc: remove bridge from driver internal structure

2017-09-29 Thread Benjamin Gaignard
With a call to drm_of_panel_bridge_remove() we could remove the bridge
without store it in ldtc internal driver structure.

Signed-off-by: Benjamin Gaignard 
---
 drivers/gpu/drm/stm/ltdc.c | 16 +---
 drivers/gpu/drm/stm/ltdc.h |  2 --
 2 files changed, 5 insertions(+), 13 deletions(-)

diff --git a/drivers/gpu/drm/stm/ltdc.c b/drivers/gpu/drm/stm/ltdc.c
index d394a03..735c908 100644
--- a/drivers/gpu/drm/stm/ltdc.c
+++ b/drivers/gpu/drm/stm/ltdc.c
@@ -791,9 +791,8 @@ static const struct drm_encoder_funcs ltdc_encoder_funcs = {
.destroy = drm_encoder_cleanup,
 };
 
-static int ltdc_encoder_init(struct drm_device *ddev)
+static int ltdc_encoder_init(struct drm_device *ddev, struct drm_bridge 
*bridge)
 {
-   struct ltdc_device *ldev = ddev->dev_private;
struct drm_encoder *encoder;
int ret;
 
@@ -807,7 +806,7 @@ static int ltdc_encoder_init(struct drm_device *ddev)
drm_encoder_init(ddev, encoder, _encoder_funcs,
 DRM_MODE_ENCODER_DPI, NULL);
 
-   ret = drm_bridge_attach(encoder, ldev->bridge, NULL);
+   ret = drm_bridge_attach(encoder, bridge, NULL);
if (ret) {
drm_encoder_cleanup(encoder);
return -EINVAL;
@@ -936,12 +935,9 @@ int ltdc_load(struct drm_device *ddev)
ret = PTR_ERR(bridge);
goto err;
}
-   ldev->is_panel_bridge = true;
}
 
-   ldev->bridge = bridge;
-
-   ret = ltdc_encoder_init(ddev);
+   ret = ltdc_encoder_init(ddev, bridge);
if (ret) {
DRM_ERROR("Failed to init encoder\n");
goto err;
@@ -972,8 +968,7 @@ int ltdc_load(struct drm_device *ddev)
return 0;
 
 err:
-   if (ldev->is_panel_bridge)
-   drm_panel_bridge_remove(bridge);
+   drm_panel_bridge_remove(bridge);
 
clk_disable_unprepare(ldev->pixel_clk);
 
@@ -986,8 +981,7 @@ void ltdc_unload(struct drm_device *ddev)
 
DRM_DEBUG_DRIVER("\n");
 
-   if (ldev->is_panel_bridge)
-   drm_panel_bridge_remove(ldev->bridge);
+   drm_of_panel_bridge_remove(ddev->dev->of_node, 0, 0);
 
clk_disable_unprepare(ldev->pixel_clk);
 }
diff --git a/drivers/gpu/drm/stm/ltdc.h b/drivers/gpu/drm/stm/ltdc.h
index bc6d6f6..ae43755 100644
--- a/drivers/gpu/drm/stm/ltdc.h
+++ b/drivers/gpu/drm/stm/ltdc.h
@@ -24,8 +24,6 @@ struct ltdc_device {
struct drm_fbdev_cma *fbdev;
void __iomem *regs;
struct clk *pixel_clk;  /* lcd pixel clock */
-   struct drm_bridge *bridge;
-   bool is_panel_bridge;
struct mutex err_lock;  /* protecting error_status */
struct ltdc_caps caps;
u32 error_status;
-- 
2.7.4



[PATCH 3/3] drm/stm: ltdc: remove bridge from driver internal structure

2017-09-29 Thread Benjamin Gaignard
With a call to drm_of_panel_bridge_remove() we could remove the bridge
without store it in ldtc internal driver structure.

Signed-off-by: Benjamin Gaignard 
---
 drivers/gpu/drm/stm/ltdc.c | 16 +---
 drivers/gpu/drm/stm/ltdc.h |  2 --
 2 files changed, 5 insertions(+), 13 deletions(-)

diff --git a/drivers/gpu/drm/stm/ltdc.c b/drivers/gpu/drm/stm/ltdc.c
index d394a03..735c908 100644
--- a/drivers/gpu/drm/stm/ltdc.c
+++ b/drivers/gpu/drm/stm/ltdc.c
@@ -791,9 +791,8 @@ static const struct drm_encoder_funcs ltdc_encoder_funcs = {
.destroy = drm_encoder_cleanup,
 };
 
-static int ltdc_encoder_init(struct drm_device *ddev)
+static int ltdc_encoder_init(struct drm_device *ddev, struct drm_bridge 
*bridge)
 {
-   struct ltdc_device *ldev = ddev->dev_private;
struct drm_encoder *encoder;
int ret;
 
@@ -807,7 +806,7 @@ static int ltdc_encoder_init(struct drm_device *ddev)
drm_encoder_init(ddev, encoder, _encoder_funcs,
 DRM_MODE_ENCODER_DPI, NULL);
 
-   ret = drm_bridge_attach(encoder, ldev->bridge, NULL);
+   ret = drm_bridge_attach(encoder, bridge, NULL);
if (ret) {
drm_encoder_cleanup(encoder);
return -EINVAL;
@@ -936,12 +935,9 @@ int ltdc_load(struct drm_device *ddev)
ret = PTR_ERR(bridge);
goto err;
}
-   ldev->is_panel_bridge = true;
}
 
-   ldev->bridge = bridge;
-
-   ret = ltdc_encoder_init(ddev);
+   ret = ltdc_encoder_init(ddev, bridge);
if (ret) {
DRM_ERROR("Failed to init encoder\n");
goto err;
@@ -972,8 +968,7 @@ int ltdc_load(struct drm_device *ddev)
return 0;
 
 err:
-   if (ldev->is_panel_bridge)
-   drm_panel_bridge_remove(bridge);
+   drm_panel_bridge_remove(bridge);
 
clk_disable_unprepare(ldev->pixel_clk);
 
@@ -986,8 +981,7 @@ void ltdc_unload(struct drm_device *ddev)
 
DRM_DEBUG_DRIVER("\n");
 
-   if (ldev->is_panel_bridge)
-   drm_panel_bridge_remove(ldev->bridge);
+   drm_of_panel_bridge_remove(ddev->dev->of_node, 0, 0);
 
clk_disable_unprepare(ldev->pixel_clk);
 }
diff --git a/drivers/gpu/drm/stm/ltdc.h b/drivers/gpu/drm/stm/ltdc.h
index bc6d6f6..ae43755 100644
--- a/drivers/gpu/drm/stm/ltdc.h
+++ b/drivers/gpu/drm/stm/ltdc.h
@@ -24,8 +24,6 @@ struct ltdc_device {
struct drm_fbdev_cma *fbdev;
void __iomem *regs;
struct clk *pixel_clk;  /* lcd pixel clock */
-   struct drm_bridge *bridge;
-   bool is_panel_bridge;
struct mutex err_lock;  /* protecting error_status */
struct ltdc_caps caps;
u32 error_status;
-- 
2.7.4



[PATCH 0/3] Simplify panel bridge cleanup

2017-09-29 Thread Benjamin Gaignard
The goal of this series is to simplify driver code when they need to clean up
a previously allocated panel bridge.
Few drivers have "is_panel_bridge" flag to be able to distinguish a
drm_panel_bridge from "simple" drm_bridge.
To remove this flag I propose to
- let drm_panel_bridge_remove() check if the bridge provided in parameter is
  really a drm_panel_bridge.
- add drm_of_panel_bridge_remove() to remove a bridge given DT port and
  endpoint
Finally that allow to remove drm_bridge structure and "is_panel_bridge" flag
from stm driver internal structure.

Benjamin Gaignard (3):
  drm/bridge: make drm_panel_bridge_remove more robust
  drm/drm_of: add drm_of_panel_bridge_remove function
  drm/stm: ltdc: remove bridge from driver internal structure

 drivers/gpu/drm/bridge/panel.c | 10 +-
 drivers/gpu/drm/drm_of.c   | 33 +
 drivers/gpu/drm/stm/ltdc.c | 16 +---
 drivers/gpu/drm/stm/ltdc.h |  2 --
 include/drm/drm_of.h   |  8 
 5 files changed, 55 insertions(+), 14 deletions(-)

-- 
2.7.4



[PATCH 0/3] Simplify panel bridge cleanup

2017-09-29 Thread Benjamin Gaignard
The goal of this series is to simplify driver code when they need to clean up
a previously allocated panel bridge.
Few drivers have "is_panel_bridge" flag to be able to distinguish a
drm_panel_bridge from "simple" drm_bridge.
To remove this flag I propose to
- let drm_panel_bridge_remove() check if the bridge provided in parameter is
  really a drm_panel_bridge.
- add drm_of_panel_bridge_remove() to remove a bridge given DT port and
  endpoint
Finally that allow to remove drm_bridge structure and "is_panel_bridge" flag
from stm driver internal structure.

Benjamin Gaignard (3):
  drm/bridge: make drm_panel_bridge_remove more robust
  drm/drm_of: add drm_of_panel_bridge_remove function
  drm/stm: ltdc: remove bridge from driver internal structure

 drivers/gpu/drm/bridge/panel.c | 10 +-
 drivers/gpu/drm/drm_of.c   | 33 +
 drivers/gpu/drm/stm/ltdc.c | 16 +---
 drivers/gpu/drm/stm/ltdc.h |  2 --
 include/drm/drm_of.h   |  8 
 5 files changed, 55 insertions(+), 14 deletions(-)

-- 
2.7.4



Re: How to verify linux-next

2017-09-29 Thread valdis . kletnieks
On Fri, 29 Sep 2017 16:08:07 +0530, Pintu Kumar said:

> I have a general question.
> How do we normally verify linux-next tree?

The same exact way you "verify" any other Linux kernel, for whatever
definition of "verify" you plan to use.

> 1) For Oracle virtual box 5.1.26 with ubuntu-32 bit, the linux-next
> kernel is not booting.

Does an Ubuntu kernel boot correctly under VirtualBox?  If not, fix
that issue first.  Also, "is not booting" isn't detailed enough for anybody
to make even a guess as to what's wrong.

Also, note that 5.1.28 is out.


pgpRzDz1OOz_4.pgp
Description: PGP signature


Re: How to verify linux-next

2017-09-29 Thread valdis . kletnieks
On Fri, 29 Sep 2017 16:08:07 +0530, Pintu Kumar said:

> I have a general question.
> How do we normally verify linux-next tree?

The same exact way you "verify" any other Linux kernel, for whatever
definition of "verify" you plan to use.

> 1) For Oracle virtual box 5.1.26 with ubuntu-32 bit, the linux-next
> kernel is not booting.

Does an Ubuntu kernel boot correctly under VirtualBox?  If not, fix
that issue first.  Also, "is not booting" isn't detailed enough for anybody
to make even a guess as to what's wrong.

Also, note that 5.1.28 is out.


pgpRzDz1OOz_4.pgp
Description: PGP signature


Re: [PATCH v2 02/10] pinctrl: axp209: add pinctrl features (fwd)

2017-09-29 Thread Julia Lawall


On Fri, 29 Sep 2017, Quentin Schulz wrote:

> Hi Julia,
>
> On 29/09/2017 14:15, Julia Lawall wrote:
> > I'm not sure that it is allowed to do krealloc on devm allocated data.
> > See lins 468 and 485.
> >
>
> Indeed, from a glance at the code, it does not look like it is a good idea.
>
> For v3, this piece of code will be deleted anyway so it won't be a
> problem anymore.
>
> However, this logic is used in drivers/pinctrl/sunxi/pinctrl-sunxi.c[1][2]
>
> @Maxime, @Chen-Yu:
> We should check more thoroughly than what I did but I think Julia is right.
>
> The following is my understanding from a very quick look at the code.
>
> devm_kzalloc will register gpio->functions as a res of the device.
>
> However it's possible that the pointer is different after krealloc. In
> that case, krealloc will free the "old" gpio->functions[3] which is
> managed by devres.
>
> 1) We might be exposed to a free of a NULL pointer when devres takes
> care of unregistering the device.

I guess it would be a double free?  krealloc won't update the devres view
of the pointer.

> 2) The "new" gpio->functions would never be freed.

That too.

julia

>
> Is that correct? If so, we should get rid of devm_kzalloc in favor of a
> simple kzalloc and free the pointer in the remove function of the driver.
>
> [1]
> http://elixir.free-electrons.com/linux/latest/source/drivers/pinctrl/sunxi/pinctrl-sunxi.c#L1078
> [2]
> http://elixir.free-electrons.com/linux/latest/source/drivers/pinctrl/sunxi/pinctrl-sunxi.c#L1107
> [3]
> http://elixir.free-electrons.com/linux/latest/source/mm/slab_common.c#L1414
>
> Thanks,
> Quentin
>
> > julia
> >
> > -- Forwarded message --
> > Date: Fri, 29 Sep 2017 20:00:03 +0800
> > From: kbuild test robot <fengguang...@intel.com>
> > To: kbu...@01.org
> > Cc: Julia Lawall <julia.law...@lip6.fr>
> > Subject: Re: [PATCH v2 02/10] pinctrl: axp209: add pinctrl features
> >
> > Hi Quentin,
> >
> > [auto build test WARNING on ]
> >
> > url:
> > https://github.com/0day-ci/linux/commits/Quentin-Schulz/add-pinmuxing-support-for-pins-in-AXP209-and-AXP813-PMICs/20170929-162846
> > base:
> > :: branch date: 4 hours ago
> > :: commit date: 4 hours ago
> >
> >>> drivers/pinctrl/pinctrl-axp209.c:485:19-27: WARNING: invalid free of 
> >>> devm_ allocated data
> >
> > # 
> > https://github.com/0day-ci/linux/commit/1e016076fb841f90f047d2b001c9f8d9fd5e2953
> > git remote add linux-review https://github.com/0day-ci/linux
> > git remote update linux-review
> > git checkout 1e016076fb841f90f047d2b001c9f8d9fd5e2953
> > vim +485 drivers/pinctrl/pinctrl-axp209.c
> >
> > 1e016076fb drivers/pinctrl/pinctrl-axp209.c Quentin Schulz 2017-09-26  446
> > 1e016076fb drivers/pinctrl/pinctrl-axp209.c Quentin Schulz 2017-09-26  447  
> > static int axp20x_build_state(struct platform_device *pdev)
> > 1e016076fb drivers/pinctrl/pinctrl-axp209.c Quentin Schulz 2017-09-26  448  
> > {
> > 1e016076fb drivers/pinctrl/pinctrl-axp209.c Quentin Schulz 2017-09-26  449  
> > struct axp20x_gpio *gpio = platform_get_drvdata(pdev);
> > 1e016076fb drivers/pinctrl/pinctrl-axp209.c Quentin Schulz 2017-09-26  450  
> > unsigned int npins = gpio->desc->npins;
> > 1e016076fb drivers/pinctrl/pinctrl-axp209.c Quentin Schulz 2017-09-26  451  
> > const struct axp20x_desc_pin *pin;
> > 1e016076fb drivers/pinctrl/pinctrl-axp209.c Quentin Schulz 2017-09-26  452  
> > struct axp20x_desc_function *func;
> > 1e016076fb drivers/pinctrl/pinctrl-axp209.c Quentin Schulz 2017-09-26  453  
> > int i, ret;
> > 1e016076fb drivers/pinctrl/pinctrl-axp209.c Quentin Schulz 2017-09-26  454
> > 1e016076fb drivers/pinctrl/pinctrl-axp209.c Quentin Schulz 2017-09-26  455  
> > gpio->ngroups = npins;
> > 1e016076fb drivers/pinctrl/pinctrl-axp209.c Quentin Schulz 2017-09-26  456  
> > gpio->groups = devm_kzalloc(>dev,
> > 1e016076fb drivers/pinctrl/pinctrl-axp209.c Quentin Schulz 2017-09-26  457  
> > gpio->ngroups * sizeof(*gpio->groups),
> > 1e016076fb drivers/pinctrl/pinctrl-axp209.c Quentin Schulz 2017-09-26  458  
> > GFP_KERNEL);
> > 1e016076fb drivers/pinctrl/pinctrl-axp209.c Quentin Schulz 2017-09-26  459  
> > if (!gpio->groups)
> > 1e016076fb drivers/pinctrl/pinctrl-axp209.c Quentin Schulz 2017-09-26  460  
> > return -ENOMEM;
> > 1e016076fb drivers/pinctr

Re: [PATCH v2 02/10] pinctrl: axp209: add pinctrl features (fwd)

2017-09-29 Thread Julia Lawall


On Fri, 29 Sep 2017, Quentin Schulz wrote:

> Hi Julia,
>
> On 29/09/2017 14:15, Julia Lawall wrote:
> > I'm not sure that it is allowed to do krealloc on devm allocated data.
> > See lins 468 and 485.
> >
>
> Indeed, from a glance at the code, it does not look like it is a good idea.
>
> For v3, this piece of code will be deleted anyway so it won't be a
> problem anymore.
>
> However, this logic is used in drivers/pinctrl/sunxi/pinctrl-sunxi.c[1][2]
>
> @Maxime, @Chen-Yu:
> We should check more thoroughly than what I did but I think Julia is right.
>
> The following is my understanding from a very quick look at the code.
>
> devm_kzalloc will register gpio->functions as a res of the device.
>
> However it's possible that the pointer is different after krealloc. In
> that case, krealloc will free the "old" gpio->functions[3] which is
> managed by devres.
>
> 1) We might be exposed to a free of a NULL pointer when devres takes
> care of unregistering the device.

I guess it would be a double free?  krealloc won't update the devres view
of the pointer.

> 2) The "new" gpio->functions would never be freed.

That too.

julia

>
> Is that correct? If so, we should get rid of devm_kzalloc in favor of a
> simple kzalloc and free the pointer in the remove function of the driver.
>
> [1]
> http://elixir.free-electrons.com/linux/latest/source/drivers/pinctrl/sunxi/pinctrl-sunxi.c#L1078
> [2]
> http://elixir.free-electrons.com/linux/latest/source/drivers/pinctrl/sunxi/pinctrl-sunxi.c#L1107
> [3]
> http://elixir.free-electrons.com/linux/latest/source/mm/slab_common.c#L1414
>
> Thanks,
> Quentin
>
> > julia
> >
> > -- Forwarded message --
> > Date: Fri, 29 Sep 2017 20:00:03 +0800
> > From: kbuild test robot 
> > To: kbu...@01.org
> > Cc: Julia Lawall 
> > Subject: Re: [PATCH v2 02/10] pinctrl: axp209: add pinctrl features
> >
> > Hi Quentin,
> >
> > [auto build test WARNING on ]
> >
> > url:
> > https://github.com/0day-ci/linux/commits/Quentin-Schulz/add-pinmuxing-support-for-pins-in-AXP209-and-AXP813-PMICs/20170929-162846
> > base:
> > :: branch date: 4 hours ago
> > :: commit date: 4 hours ago
> >
> >>> drivers/pinctrl/pinctrl-axp209.c:485:19-27: WARNING: invalid free of 
> >>> devm_ allocated data
> >
> > # 
> > https://github.com/0day-ci/linux/commit/1e016076fb841f90f047d2b001c9f8d9fd5e2953
> > git remote add linux-review https://github.com/0day-ci/linux
> > git remote update linux-review
> > git checkout 1e016076fb841f90f047d2b001c9f8d9fd5e2953
> > vim +485 drivers/pinctrl/pinctrl-axp209.c
> >
> > 1e016076fb drivers/pinctrl/pinctrl-axp209.c Quentin Schulz 2017-09-26  446
> > 1e016076fb drivers/pinctrl/pinctrl-axp209.c Quentin Schulz 2017-09-26  447  
> > static int axp20x_build_state(struct platform_device *pdev)
> > 1e016076fb drivers/pinctrl/pinctrl-axp209.c Quentin Schulz 2017-09-26  448  
> > {
> > 1e016076fb drivers/pinctrl/pinctrl-axp209.c Quentin Schulz 2017-09-26  449  
> > struct axp20x_gpio *gpio = platform_get_drvdata(pdev);
> > 1e016076fb drivers/pinctrl/pinctrl-axp209.c Quentin Schulz 2017-09-26  450  
> > unsigned int npins = gpio->desc->npins;
> > 1e016076fb drivers/pinctrl/pinctrl-axp209.c Quentin Schulz 2017-09-26  451  
> > const struct axp20x_desc_pin *pin;
> > 1e016076fb drivers/pinctrl/pinctrl-axp209.c Quentin Schulz 2017-09-26  452  
> > struct axp20x_desc_function *func;
> > 1e016076fb drivers/pinctrl/pinctrl-axp209.c Quentin Schulz 2017-09-26  453  
> > int i, ret;
> > 1e016076fb drivers/pinctrl/pinctrl-axp209.c Quentin Schulz 2017-09-26  454
> > 1e016076fb drivers/pinctrl/pinctrl-axp209.c Quentin Schulz 2017-09-26  455  
> > gpio->ngroups = npins;
> > 1e016076fb drivers/pinctrl/pinctrl-axp209.c Quentin Schulz 2017-09-26  456  
> > gpio->groups = devm_kzalloc(>dev,
> > 1e016076fb drivers/pinctrl/pinctrl-axp209.c Quentin Schulz 2017-09-26  457  
> > gpio->ngroups * sizeof(*gpio->groups),
> > 1e016076fb drivers/pinctrl/pinctrl-axp209.c Quentin Schulz 2017-09-26  458  
> > GFP_KERNEL);
> > 1e016076fb drivers/pinctrl/pinctrl-axp209.c Quentin Schulz 2017-09-26  459  
> > if (!gpio->groups)
> > 1e016076fb drivers/pinctrl/pinctrl-axp209.c Quentin Schulz 2017-09-26  460  
> > return -ENOMEM;
> > 1e016076fb drivers/pinctrl/pinctrl-axp209.c Quentin Schulz 2017-09-26  461
> >

Re: [PATCH 3/3 v12] printk: Add monotonic, boottime, and realtime timestamps

2017-09-29 Thread Pavel Machek
Hi!

> diff --git a/lib/Kconfig.debug b/lib/Kconfig.debug
> index b19c491cbc4e..e21b0c002d0f 100644
> --- a/lib/Kconfig.debug
> +++ b/lib/Kconfig.debug
> @@ -8,12 +8,58 @@ config PRINTK_TIME
> messages to be added to the output of the syslog() system
> call and at the console.
>  
> +choice
> + prompt "printk default clock timestamp" if PRINTK_TIME
> + default PRINTK_TIME_LOCAL if PRINTK_TIME
> + help
> +   This option is selected by setting one of
> +   PRINTK_TIME_[DISABLE|LOCAL|BOOT|MONO|REAL] and causes time stamps of
> +   the printk() messages to be added to the output of the syslog()
> +   system call and at the console.
> +
> The timestamp is always recorded internally, and exported
> to /dev/kmsg. This flag just specifies if the timestamp should
> be included, not that the timestamp is recorded.
>  
> The behavior is also controlled by the kernel command line
> -   parameter printk.time=1. See 
> Documentation/admin-guide/kernel-parameters.rst
> +   parameter printk.time. See
> +   Documentation/admin-guide/kernel-parameters.rst
> +
> +config PRINTK_TIME_LOCAL
> + bool "Local Clock"
> + help
> +   Selecting this option causes the time stamps of printk() to be
> +   stamped with the unadjusted hardware clock.
> +
> +config PRINTK_TIME_BOOT
> + bool "Boot Time Clock"
> + help
> +   Selecting this option causes the time stamps of printk() to be
> +   stamped with the adjusted boottime clock.
> +
> +config PRINTK_TIME_MONO
> + bool "Monotonic Clock"
> + help
> +   Selecting this option causes the time stamps of printk() to be
> +   stamped with the adjusted monotonic clock.
> +
> +config PRINTK_TIME_REAL
> + bool "Real Time Clock"
> + help
> +   Selecting this option causes the time stamps of printk() to be
> +   stamped with the adjusted realtime clock (UTC).
> +endchoice
> +
> +config PRINTK_TIME_TYPE
> + int
> + depends on PRINTK
> + range 0 4
> + default 0 if !PRINTK_TIME
> + default 1 if PRINTK_TIME_LOCAL
> + default 2 if PRINTK_TIME_BOOT
> + default 3 if PRINTK_TIME_MONO
> + default 4 if PRINTK_TIME_REAL
> +
>  
>  config CONSOLE_LOGLEVEL_DEFAULT
>   int "Default console loglevel (1-15)"

Come on, you don't need to ask user just to set default value to be
overriden by command line. This is not nearly important enough.

Drop it. You can still use CONFIG_CMDLINE="...".

Pavel
-- 
(english) http://www.livejournal.com/~pavelmachek
(cesky, pictures) 
http://atrey.karlin.mff.cuni.cz/~pavel/picture/horses/blog.html


signature.asc
Description: Digital signature


Re: [PATCH 3/3 v12] printk: Add monotonic, boottime, and realtime timestamps

2017-09-29 Thread Pavel Machek
Hi!

> diff --git a/lib/Kconfig.debug b/lib/Kconfig.debug
> index b19c491cbc4e..e21b0c002d0f 100644
> --- a/lib/Kconfig.debug
> +++ b/lib/Kconfig.debug
> @@ -8,12 +8,58 @@ config PRINTK_TIME
> messages to be added to the output of the syslog() system
> call and at the console.
>  
> +choice
> + prompt "printk default clock timestamp" if PRINTK_TIME
> + default PRINTK_TIME_LOCAL if PRINTK_TIME
> + help
> +   This option is selected by setting one of
> +   PRINTK_TIME_[DISABLE|LOCAL|BOOT|MONO|REAL] and causes time stamps of
> +   the printk() messages to be added to the output of the syslog()
> +   system call and at the console.
> +
> The timestamp is always recorded internally, and exported
> to /dev/kmsg. This flag just specifies if the timestamp should
> be included, not that the timestamp is recorded.
>  
> The behavior is also controlled by the kernel command line
> -   parameter printk.time=1. See 
> Documentation/admin-guide/kernel-parameters.rst
> +   parameter printk.time. See
> +   Documentation/admin-guide/kernel-parameters.rst
> +
> +config PRINTK_TIME_LOCAL
> + bool "Local Clock"
> + help
> +   Selecting this option causes the time stamps of printk() to be
> +   stamped with the unadjusted hardware clock.
> +
> +config PRINTK_TIME_BOOT
> + bool "Boot Time Clock"
> + help
> +   Selecting this option causes the time stamps of printk() to be
> +   stamped with the adjusted boottime clock.
> +
> +config PRINTK_TIME_MONO
> + bool "Monotonic Clock"
> + help
> +   Selecting this option causes the time stamps of printk() to be
> +   stamped with the adjusted monotonic clock.
> +
> +config PRINTK_TIME_REAL
> + bool "Real Time Clock"
> + help
> +   Selecting this option causes the time stamps of printk() to be
> +   stamped with the adjusted realtime clock (UTC).
> +endchoice
> +
> +config PRINTK_TIME_TYPE
> + int
> + depends on PRINTK
> + range 0 4
> + default 0 if !PRINTK_TIME
> + default 1 if PRINTK_TIME_LOCAL
> + default 2 if PRINTK_TIME_BOOT
> + default 3 if PRINTK_TIME_MONO
> + default 4 if PRINTK_TIME_REAL
> +
>  
>  config CONSOLE_LOGLEVEL_DEFAULT
>   int "Default console loglevel (1-15)"

Come on, you don't need to ask user just to set default value to be
overriden by command line. This is not nearly important enough.

Drop it. You can still use CONFIG_CMDLINE="...".

Pavel
-- 
(english) http://www.livejournal.com/~pavelmachek
(cesky, pictures) 
http://atrey.karlin.mff.cuni.cz/~pavel/picture/horses/blog.html


signature.asc
Description: Digital signature


Re: [PATCH v2 02/10] pinctrl: axp209: add pinctrl features (fwd)

2017-09-29 Thread Quentin Schulz
Hi Julia,

On 29/09/2017 14:15, Julia Lawall wrote:
> I'm not sure that it is allowed to do krealloc on devm allocated data.
> See lins 468 and 485.
> 

Indeed, from a glance at the code, it does not look like it is a good idea.

For v3, this piece of code will be deleted anyway so it won't be a
problem anymore.

However, this logic is used in drivers/pinctrl/sunxi/pinctrl-sunxi.c[1][2]

@Maxime, @Chen-Yu:
We should check more thoroughly than what I did but I think Julia is right.

The following is my understanding from a very quick look at the code.

devm_kzalloc will register gpio->functions as a res of the device.

However it's possible that the pointer is different after krealloc. In
that case, krealloc will free the "old" gpio->functions[3] which is
managed by devres.

1) We might be exposed to a free of a NULL pointer when devres takes
care of unregistering the device.
2) The "new" gpio->functions would never be freed.

Is that correct? If so, we should get rid of devm_kzalloc in favor of a
simple kzalloc and free the pointer in the remove function of the driver.

[1]
http://elixir.free-electrons.com/linux/latest/source/drivers/pinctrl/sunxi/pinctrl-sunxi.c#L1078
[2]
http://elixir.free-electrons.com/linux/latest/source/drivers/pinctrl/sunxi/pinctrl-sunxi.c#L1107
[3]
http://elixir.free-electrons.com/linux/latest/source/mm/slab_common.c#L1414

Thanks,
Quentin

> julia
> 
> -- Forwarded message --
> Date: Fri, 29 Sep 2017 20:00:03 +0800
> From: kbuild test robot <fengguang...@intel.com>
> To: kbu...@01.org
> Cc: Julia Lawall <julia.law...@lip6.fr>
> Subject: Re: [PATCH v2 02/10] pinctrl: axp209: add pinctrl features
> 
> Hi Quentin,
> 
> [auto build test WARNING on ]
> 
> url:
> https://github.com/0day-ci/linux/commits/Quentin-Schulz/add-pinmuxing-support-for-pins-in-AXP209-and-AXP813-PMICs/20170929-162846
> base:
> :: branch date: 4 hours ago
> :: commit date: 4 hours ago
> 
>>> drivers/pinctrl/pinctrl-axp209.c:485:19-27: WARNING: invalid free of devm_ 
>>> allocated data
> 
> # 
> https://github.com/0day-ci/linux/commit/1e016076fb841f90f047d2b001c9f8d9fd5e2953
> git remote add linux-review https://github.com/0day-ci/linux
> git remote update linux-review
> git checkout 1e016076fb841f90f047d2b001c9f8d9fd5e2953
> vim +485 drivers/pinctrl/pinctrl-axp209.c
> 
> 1e016076fb drivers/pinctrl/pinctrl-axp209.c Quentin Schulz 2017-09-26  446
> 1e016076fb drivers/pinctrl/pinctrl-axp209.c Quentin Schulz 2017-09-26  447  
> static int axp20x_build_state(struct platform_device *pdev)
> 1e016076fb drivers/pinctrl/pinctrl-axp209.c Quentin Schulz 2017-09-26  448  {
> 1e016076fb drivers/pinctrl/pinctrl-axp209.c Quentin Schulz 2017-09-26  449
> struct axp20x_gpio *gpio = platform_get_drvdata(pdev);
> 1e016076fb drivers/pinctrl/pinctrl-axp209.c Quentin Schulz 2017-09-26  450
> unsigned int npins = gpio->desc->npins;
> 1e016076fb drivers/pinctrl/pinctrl-axp209.c Quentin Schulz 2017-09-26  451
> const struct axp20x_desc_pin *pin;
> 1e016076fb drivers/pinctrl/pinctrl-axp209.c Quentin Schulz 2017-09-26  452
> struct axp20x_desc_function *func;
> 1e016076fb drivers/pinctrl/pinctrl-axp209.c Quentin Schulz 2017-09-26  453
> int i, ret;
> 1e016076fb drivers/pinctrl/pinctrl-axp209.c Quentin Schulz 2017-09-26  454
> 1e016076fb drivers/pinctrl/pinctrl-axp209.c Quentin Schulz 2017-09-26  455
> gpio->ngroups = npins;
> 1e016076fb drivers/pinctrl/pinctrl-axp209.c Quentin Schulz 2017-09-26  456
> gpio->groups = devm_kzalloc(>dev,
> 1e016076fb drivers/pinctrl/pinctrl-axp209.c Quentin Schulz 2017-09-26  457
> gpio->ngroups * sizeof(*gpio->groups),
> 1e016076fb drivers/pinctrl/pinctrl-axp209.c Quentin Schulz 2017-09-26  458
> GFP_KERNEL);
> 1e016076fb drivers/pinctrl/pinctrl-axp209.c Quentin Schulz 2017-09-26  459
> if (!gpio->groups)
> 1e016076fb drivers/pinctrl/pinctrl-axp209.c Quentin Schulz 2017-09-26  460
> return -ENOMEM;
> 1e016076fb drivers/pinctrl/pinctrl-axp209.c Quentin Schulz 2017-09-26  461
> 1e016076fb drivers/pinctrl/pinctrl-axp209.c Quentin Schulz 2017-09-26  462
> for (i = 0; i < npins; i++) {
> 1e016076fb drivers/pinctrl/pinctrl-axp209.c Quentin Schulz 2017-09-26  463
> gpio->groups[i].name = gpio->desc->pins[i].pin.name;
> 1e016076fb drivers/pinctrl/pinctrl-axp209.c Quentin Schulz 2017-09-26  464
> gpio->groups[i].pin = gpio->desc->pins[i].pin.number;
> 1e016076fb drivers/pinctrl/pinctrl-axp209.c Quentin Schulz 2017-09-26  465
> }
> 1e016076fb drivers/pinctrl/pinctrl-axp209.c Quentin Schulz 2017-09-26  466
> 1e016076fb drivers/pinctrl/pinctrl-axp2

Re: [PATCH v2 02/10] pinctrl: axp209: add pinctrl features (fwd)

2017-09-29 Thread Quentin Schulz
Hi Julia,

On 29/09/2017 14:15, Julia Lawall wrote:
> I'm not sure that it is allowed to do krealloc on devm allocated data.
> See lins 468 and 485.
> 

Indeed, from a glance at the code, it does not look like it is a good idea.

For v3, this piece of code will be deleted anyway so it won't be a
problem anymore.

However, this logic is used in drivers/pinctrl/sunxi/pinctrl-sunxi.c[1][2]

@Maxime, @Chen-Yu:
We should check more thoroughly than what I did but I think Julia is right.

The following is my understanding from a very quick look at the code.

devm_kzalloc will register gpio->functions as a res of the device.

However it's possible that the pointer is different after krealloc. In
that case, krealloc will free the "old" gpio->functions[3] which is
managed by devres.

1) We might be exposed to a free of a NULL pointer when devres takes
care of unregistering the device.
2) The "new" gpio->functions would never be freed.

Is that correct? If so, we should get rid of devm_kzalloc in favor of a
simple kzalloc and free the pointer in the remove function of the driver.

[1]
http://elixir.free-electrons.com/linux/latest/source/drivers/pinctrl/sunxi/pinctrl-sunxi.c#L1078
[2]
http://elixir.free-electrons.com/linux/latest/source/drivers/pinctrl/sunxi/pinctrl-sunxi.c#L1107
[3]
http://elixir.free-electrons.com/linux/latest/source/mm/slab_common.c#L1414

Thanks,
Quentin

> julia
> 
> -- Forwarded message --
> Date: Fri, 29 Sep 2017 20:00:03 +0800
> From: kbuild test robot 
> To: kbu...@01.org
> Cc: Julia Lawall 
> Subject: Re: [PATCH v2 02/10] pinctrl: axp209: add pinctrl features
> 
> Hi Quentin,
> 
> [auto build test WARNING on ]
> 
> url:
> https://github.com/0day-ci/linux/commits/Quentin-Schulz/add-pinmuxing-support-for-pins-in-AXP209-and-AXP813-PMICs/20170929-162846
> base:
> :: branch date: 4 hours ago
> :: commit date: 4 hours ago
> 
>>> drivers/pinctrl/pinctrl-axp209.c:485:19-27: WARNING: invalid free of devm_ 
>>> allocated data
> 
> # 
> https://github.com/0day-ci/linux/commit/1e016076fb841f90f047d2b001c9f8d9fd5e2953
> git remote add linux-review https://github.com/0day-ci/linux
> git remote update linux-review
> git checkout 1e016076fb841f90f047d2b001c9f8d9fd5e2953
> vim +485 drivers/pinctrl/pinctrl-axp209.c
> 
> 1e016076fb drivers/pinctrl/pinctrl-axp209.c Quentin Schulz 2017-09-26  446
> 1e016076fb drivers/pinctrl/pinctrl-axp209.c Quentin Schulz 2017-09-26  447  
> static int axp20x_build_state(struct platform_device *pdev)
> 1e016076fb drivers/pinctrl/pinctrl-axp209.c Quentin Schulz 2017-09-26  448  {
> 1e016076fb drivers/pinctrl/pinctrl-axp209.c Quentin Schulz 2017-09-26  449
> struct axp20x_gpio *gpio = platform_get_drvdata(pdev);
> 1e016076fb drivers/pinctrl/pinctrl-axp209.c Quentin Schulz 2017-09-26  450
> unsigned int npins = gpio->desc->npins;
> 1e016076fb drivers/pinctrl/pinctrl-axp209.c Quentin Schulz 2017-09-26  451
> const struct axp20x_desc_pin *pin;
> 1e016076fb drivers/pinctrl/pinctrl-axp209.c Quentin Schulz 2017-09-26  452
> struct axp20x_desc_function *func;
> 1e016076fb drivers/pinctrl/pinctrl-axp209.c Quentin Schulz 2017-09-26  453
> int i, ret;
> 1e016076fb drivers/pinctrl/pinctrl-axp209.c Quentin Schulz 2017-09-26  454
> 1e016076fb drivers/pinctrl/pinctrl-axp209.c Quentin Schulz 2017-09-26  455
> gpio->ngroups = npins;
> 1e016076fb drivers/pinctrl/pinctrl-axp209.c Quentin Schulz 2017-09-26  456
> gpio->groups = devm_kzalloc(>dev,
> 1e016076fb drivers/pinctrl/pinctrl-axp209.c Quentin Schulz 2017-09-26  457
> gpio->ngroups * sizeof(*gpio->groups),
> 1e016076fb drivers/pinctrl/pinctrl-axp209.c Quentin Schulz 2017-09-26  458
> GFP_KERNEL);
> 1e016076fb drivers/pinctrl/pinctrl-axp209.c Quentin Schulz 2017-09-26  459
> if (!gpio->groups)
> 1e016076fb drivers/pinctrl/pinctrl-axp209.c Quentin Schulz 2017-09-26  460
> return -ENOMEM;
> 1e016076fb drivers/pinctrl/pinctrl-axp209.c Quentin Schulz 2017-09-26  461
> 1e016076fb drivers/pinctrl/pinctrl-axp209.c Quentin Schulz 2017-09-26  462
> for (i = 0; i < npins; i++) {
> 1e016076fb drivers/pinctrl/pinctrl-axp209.c Quentin Schulz 2017-09-26  463
> gpio->groups[i].name = gpio->desc->pins[i].pin.name;
> 1e016076fb drivers/pinctrl/pinctrl-axp209.c Quentin Schulz 2017-09-26  464
> gpio->groups[i].pin = gpio->desc->pins[i].pin.number;
> 1e016076fb drivers/pinctrl/pinctrl-axp209.c Quentin Schulz 2017-09-26  465
> }
> 1e016076fb drivers/pinctrl/pinctrl-axp209.c Quentin Schulz 2017-09-26  466
> 1e016076fb drivers/pinctrl/pinctrl-axp209.c Quentin Schulz 2017-09-26  467
> /* We assume 4

[RESEND PATCH] prctl: add PR_[GS]ET_PDEATHSIG_PROC

2017-09-29 Thread Jürg Billeter
PR_SET_PDEATHSIG sets a parent death signal that the calling process
will get when its parent thread dies, even when the result of getppid()
doesn't change because the calling process is reparented to a different
thread in the same parent process. When managing multiple processes, a
process-based parent death signal is much more useful. E.g., to avoid
stray child processes.

PR_SET_PDEATHSIG_PROC sets a process-based death signal. Unlike
PR_SET_PDEATHSIG, this is inherited across fork to allow killing a whole
subtree without race conditions.

This can be used for sandboxing when combined with a seccomp filter.

There have been previous attempts to support this by changing the
behavior of PR_SET_PDEATHSIG. However, that would break existing
applications. See https://marc.info/?l=linux-kernel=117621804801689
and https://bugzilla.kernel.org/show_bug.cgi?id=43300

Signed-off-by: Jürg Billeter 
---

Previous discussion: https://patchwork.kernel.org/patch/9945315/

 fs/exec.c|  1 +
 include/linux/sched/signal.h |  3 +++
 include/uapi/linux/prctl.h   |  4 
 kernel/cred.c|  1 +
 kernel/exit.c|  4 
 kernel/fork.c|  2 ++
 kernel/sys.c | 11 +++
 security/apparmor/lsm.c  |  1 +
 security/selinux/hooks.c |  1 +
 9 files changed, 28 insertions(+)

diff --git a/fs/exec.c b/fs/exec.c
index ac34d9724684..7045f0223140 100644
--- a/fs/exec.c
+++ b/fs/exec.c
@@ -1334,6 +1334,7 @@ void setup_new_exec(struct linux_binprm * bprm)
if (bprm->secureexec) {
/* Make sure parent cannot signal privileged process. */
current->pdeath_signal = 0;
+   current->signal->pdeath_signal_proc = 0;
 
/*
 * For secureexec, reset the stack limit to sane default to
diff --git a/include/linux/sched/signal.h b/include/linux/sched/signal.h
index 2a0dd40b15db..c5c137e5ef39 100644
--- a/include/linux/sched/signal.h
+++ b/include/linux/sched/signal.h
@@ -103,6 +103,9 @@ struct signal_struct {
int group_stop_count;
unsigned intflags; /* see SIGNAL_* flags below */
 
+   /* The signal sent when the parent dies: */
+   int pdeath_signal_proc;
+
/*
 * PR_SET_CHILD_SUBREAPER marks a process, like a service
 * manager, to re-parent orphan (double-forking) child processes
diff --git a/include/uapi/linux/prctl.h b/include/uapi/linux/prctl.h
index a8d0759a9e40..04508e81d4f2 100644
--- a/include/uapi/linux/prctl.h
+++ b/include/uapi/linux/prctl.h
@@ -197,4 +197,8 @@ struct prctl_mm_map {
 # define PR_CAP_AMBIENT_LOWER  3
 # define PR_CAP_AMBIENT_CLEAR_ALL  4
 
+/* Process-based variant of PDEATHSIG */
+#define PR_SET_PDEATHSIG_PROC  48
+#define PR_GET_PDEATHSIG_PROC  49
+
 #endif /* _LINUX_PRCTL_H */
diff --git a/kernel/cred.c b/kernel/cred.c
index ecf03657e71c..0192a94670e1 100644
--- a/kernel/cred.c
+++ b/kernel/cred.c
@@ -448,6 +448,7 @@ int commit_creds(struct cred *new)
if (task->mm)
set_dumpable(task->mm, suid_dumpable);
task->pdeath_signal = 0;
+   task->signal->pdeath_signal_proc = 0;
smp_wmb();
}
 
diff --git a/kernel/exit.c b/kernel/exit.c
index 3481ababd06a..9b6fbb0128d7 100644
--- a/kernel/exit.c
+++ b/kernel/exit.c
@@ -635,6 +635,10 @@ static void reparent_leader(struct task_struct *father, 
struct task_struct *p,
if (unlikely(p->exit_state == EXIT_DEAD))
return;
 
+   if (p->signal->pdeath_signal_proc)
+   group_send_sig_info(p->signal->pdeath_signal_proc,
+   SEND_SIG_NOINFO, p);
+
/* We don't want people slaying init. */
p->exit_signal = SIGCHLD;
 
diff --git a/kernel/fork.c b/kernel/fork.c
index 10646182440f..264936c367e3 100644
--- a/kernel/fork.c
+++ b/kernel/fork.c
@@ -1415,6 +1415,8 @@ static int copy_signal(unsigned long clone_flags, struct 
task_struct *tsk)
 
mutex_init(>cred_guard_mutex);
 
+   sig->pdeath_signal_proc = current->signal->pdeath_signal_proc;
+
return 0;
 }
 
diff --git a/kernel/sys.c b/kernel/sys.c
index 9aebc2935013..dcb9a535404e 100644
--- a/kernel/sys.c
+++ b/kernel/sys.c
@@ -2206,6 +2206,17 @@ SYSCALL_DEFINE5(prctl, int, option, unsigned long, arg2, 
unsigned long, arg3,
case PR_GET_PDEATHSIG:
error = put_user(me->pdeath_signal, (int __user *)arg2);
break;
+   case PR_SET_PDEATHSIG_PROC:
+   if (!valid_signal(arg2)) {
+   error = -EINVAL;
+   break;
+   }
+   me->signal->pdeath_signal_proc = arg2;
+   break;
+   case PR_GET_PDEATHSIG_PROC:
+   error = put_user(me->signal->pdeath_signal_proc,
+(int __user *)arg2);
+ 

[RESEND PATCH] prctl: add PR_[GS]ET_PDEATHSIG_PROC

2017-09-29 Thread Jürg Billeter
PR_SET_PDEATHSIG sets a parent death signal that the calling process
will get when its parent thread dies, even when the result of getppid()
doesn't change because the calling process is reparented to a different
thread in the same parent process. When managing multiple processes, a
process-based parent death signal is much more useful. E.g., to avoid
stray child processes.

PR_SET_PDEATHSIG_PROC sets a process-based death signal. Unlike
PR_SET_PDEATHSIG, this is inherited across fork to allow killing a whole
subtree without race conditions.

This can be used for sandboxing when combined with a seccomp filter.

There have been previous attempts to support this by changing the
behavior of PR_SET_PDEATHSIG. However, that would break existing
applications. See https://marc.info/?l=linux-kernel=117621804801689
and https://bugzilla.kernel.org/show_bug.cgi?id=43300

Signed-off-by: Jürg Billeter 
---

Previous discussion: https://patchwork.kernel.org/patch/9945315/

 fs/exec.c|  1 +
 include/linux/sched/signal.h |  3 +++
 include/uapi/linux/prctl.h   |  4 
 kernel/cred.c|  1 +
 kernel/exit.c|  4 
 kernel/fork.c|  2 ++
 kernel/sys.c | 11 +++
 security/apparmor/lsm.c  |  1 +
 security/selinux/hooks.c |  1 +
 9 files changed, 28 insertions(+)

diff --git a/fs/exec.c b/fs/exec.c
index ac34d9724684..7045f0223140 100644
--- a/fs/exec.c
+++ b/fs/exec.c
@@ -1334,6 +1334,7 @@ void setup_new_exec(struct linux_binprm * bprm)
if (bprm->secureexec) {
/* Make sure parent cannot signal privileged process. */
current->pdeath_signal = 0;
+   current->signal->pdeath_signal_proc = 0;
 
/*
 * For secureexec, reset the stack limit to sane default to
diff --git a/include/linux/sched/signal.h b/include/linux/sched/signal.h
index 2a0dd40b15db..c5c137e5ef39 100644
--- a/include/linux/sched/signal.h
+++ b/include/linux/sched/signal.h
@@ -103,6 +103,9 @@ struct signal_struct {
int group_stop_count;
unsigned intflags; /* see SIGNAL_* flags below */
 
+   /* The signal sent when the parent dies: */
+   int pdeath_signal_proc;
+
/*
 * PR_SET_CHILD_SUBREAPER marks a process, like a service
 * manager, to re-parent orphan (double-forking) child processes
diff --git a/include/uapi/linux/prctl.h b/include/uapi/linux/prctl.h
index a8d0759a9e40..04508e81d4f2 100644
--- a/include/uapi/linux/prctl.h
+++ b/include/uapi/linux/prctl.h
@@ -197,4 +197,8 @@ struct prctl_mm_map {
 # define PR_CAP_AMBIENT_LOWER  3
 # define PR_CAP_AMBIENT_CLEAR_ALL  4
 
+/* Process-based variant of PDEATHSIG */
+#define PR_SET_PDEATHSIG_PROC  48
+#define PR_GET_PDEATHSIG_PROC  49
+
 #endif /* _LINUX_PRCTL_H */
diff --git a/kernel/cred.c b/kernel/cred.c
index ecf03657e71c..0192a94670e1 100644
--- a/kernel/cred.c
+++ b/kernel/cred.c
@@ -448,6 +448,7 @@ int commit_creds(struct cred *new)
if (task->mm)
set_dumpable(task->mm, suid_dumpable);
task->pdeath_signal = 0;
+   task->signal->pdeath_signal_proc = 0;
smp_wmb();
}
 
diff --git a/kernel/exit.c b/kernel/exit.c
index 3481ababd06a..9b6fbb0128d7 100644
--- a/kernel/exit.c
+++ b/kernel/exit.c
@@ -635,6 +635,10 @@ static void reparent_leader(struct task_struct *father, 
struct task_struct *p,
if (unlikely(p->exit_state == EXIT_DEAD))
return;
 
+   if (p->signal->pdeath_signal_proc)
+   group_send_sig_info(p->signal->pdeath_signal_proc,
+   SEND_SIG_NOINFO, p);
+
/* We don't want people slaying init. */
p->exit_signal = SIGCHLD;
 
diff --git a/kernel/fork.c b/kernel/fork.c
index 10646182440f..264936c367e3 100644
--- a/kernel/fork.c
+++ b/kernel/fork.c
@@ -1415,6 +1415,8 @@ static int copy_signal(unsigned long clone_flags, struct 
task_struct *tsk)
 
mutex_init(>cred_guard_mutex);
 
+   sig->pdeath_signal_proc = current->signal->pdeath_signal_proc;
+
return 0;
 }
 
diff --git a/kernel/sys.c b/kernel/sys.c
index 9aebc2935013..dcb9a535404e 100644
--- a/kernel/sys.c
+++ b/kernel/sys.c
@@ -2206,6 +2206,17 @@ SYSCALL_DEFINE5(prctl, int, option, unsigned long, arg2, 
unsigned long, arg3,
case PR_GET_PDEATHSIG:
error = put_user(me->pdeath_signal, (int __user *)arg2);
break;
+   case PR_SET_PDEATHSIG_PROC:
+   if (!valid_signal(arg2)) {
+   error = -EINVAL;
+   break;
+   }
+   me->signal->pdeath_signal_proc = arg2;
+   break;
+   case PR_GET_PDEATHSIG_PROC:
+   error = put_user(me->signal->pdeath_signal_proc,
+(int __user *)arg2);
+   

Re: [Part1 PATCH v5 02/17] x86/mm: Add Secure Encrypted Virtualization (SEV) support

2017-09-29 Thread Brijesh Singh


On 9/28/17 2:23 PM, Borislav Petkov wrote:
...
> So actually we need chicken bits to be able to *enable* both when
> CONFIG_AMD_MEM_ENCRYPT_ACTIVE_BY_DEFAULT is not set.
>
> I.e.,
>
> * mem_encrypt=on - both SME and SEV enabled
>
> * mem_encrypt=smeonly - only SME, no SEV on the host. This option will
> basically prevent from using any SEV guests and make the SEV part of the
> code inactive. I.e., sev_active() and sev_enabled should be false. As
> you say above, we should clear X86_FEATURE_SEV, yes.
>
> * mem_encrypt=off - neither SME/SEV are enabled.
>
> And =on and =off we already have.
>
> How does that sound?

if we are adding a chicken bits then I think we should do it for both
"smeonly" and "sevonly". We can boot host OS with SME disabled and SEV
enabled, and still be able to create the SEV guest from the hypervisor.

How about this ?

mem_encrypt=on both SME and SEV enabled
mem_encrypt=sev    only SEV enabled
mem_encrypt=sme   only SME enabled
mem_encrypt=off neither SME/SEV are enabled

-Brijesh



Re: [Part1 PATCH v5 02/17] x86/mm: Add Secure Encrypted Virtualization (SEV) support

2017-09-29 Thread Brijesh Singh


On 9/28/17 2:23 PM, Borislav Petkov wrote:
...
> So actually we need chicken bits to be able to *enable* both when
> CONFIG_AMD_MEM_ENCRYPT_ACTIVE_BY_DEFAULT is not set.
>
> I.e.,
>
> * mem_encrypt=on - both SME and SEV enabled
>
> * mem_encrypt=smeonly - only SME, no SEV on the host. This option will
> basically prevent from using any SEV guests and make the SEV part of the
> code inactive. I.e., sev_active() and sev_enabled should be false. As
> you say above, we should clear X86_FEATURE_SEV, yes.
>
> * mem_encrypt=off - neither SME/SEV are enabled.
>
> And =on and =off we already have.
>
> How does that sound?

if we are adding a chicken bits then I think we should do it for both
"smeonly" and "sevonly". We can boot host OS with SME disabled and SEV
enabled, and still be able to create the SEV guest from the hypervisor.

How about this ?

mem_encrypt=on both SME and SEV enabled
mem_encrypt=sev    only SEV enabled
mem_encrypt=sme   only SME enabled
mem_encrypt=off neither SME/SEV are enabled

-Brijesh



[PATCH v2 net] net: mvpp2: Fix clock resource by adding an optional bus clock

2017-09-29 Thread Gregory CLEMENT
On Armada 7K/8K we need to explicitly enable the bus clock. The bus clock
is optional because not all the SoCs need them but at least for Armada
7K/8K it is actually mandatory.

The binding documentation is updating accordingly.

Signed-off-by: Gregory CLEMENT 
---
Changelog:
v1 -> v2:
 - manage the -EPROBE_DEFER case
 - fix typos in documentation
 - remove useless test before clk_disable_unprepare()

 Documentation/devicetree/bindings/net/marvell-pp2.txt | 10 ++
 drivers/net/ethernet/marvell/mvpp2.c  | 15 +++
 2 files changed, 21 insertions(+), 4 deletions(-)

diff --git a/Documentation/devicetree/bindings/net/marvell-pp2.txt 
b/Documentation/devicetree/bindings/net/marvell-pp2.txt
index 7e2dad08a12e..1814fa13f6ab 100644
--- a/Documentation/devicetree/bindings/net/marvell-pp2.txt
+++ b/Documentation/devicetree/bindings/net/marvell-pp2.txt
@@ -21,8 +21,9 @@ Required properties:
- main controller clock (for both armada-375-pp2 and armada-7k-pp2)
- GOP clock (for both armada-375-pp2 and armada-7k-pp2)
- MG clock (only for armada-7k-pp2)
-- clock-names: names of used clocks, must be "pp_clk", "gop_clk" and
-  "mg_clk" (the latter only for armada-7k-pp2).
+   - AXI clock (only for armada-7k-pp2)
+- clock-names: names of used clocks, must be "pp_clk", "gop_clk", "mg_clk"
+  and "axi_clk" (the 2 latter only for armada-7k-pp2).
 
 The ethernet ports are represented by subnodes. At least one port is
 required.
@@ -78,8 +79,9 @@ Example for marvell,armada-7k-pp2:
 cpm_ethernet: ethernet@0 {
compatible = "marvell,armada-7k-pp22";
reg = <0x0 0x10>, <0x129000 0xb000>;
-   clocks = <_syscon0 1 3>, <_syscon0 1 9>, <_syscon0 1 5>;
-   clock-names = "pp_clk", "gop_clk", "gp_clk";
+   clocks = <_syscon0 1 3>, <_syscon0 1 9>,
+<_syscon0 1 5>, <_syscon0 1 18>;
+   clock-names = "pp_clk", "gop_clk", "gp_clk", "axi_clk";
 
eth0: eth0 {
interrupts = ,
diff --git a/drivers/net/ethernet/marvell/mvpp2.c 
b/drivers/net/ethernet/marvell/mvpp2.c
index dd0ee2691c86..f2656112986b 100644
--- a/drivers/net/ethernet/marvell/mvpp2.c
+++ b/drivers/net/ethernet/marvell/mvpp2.c
@@ -792,6 +792,7 @@ struct mvpp2 {
struct clk *pp_clk;
struct clk *gop_clk;
struct clk *mg_clk;
+   struct clk *axi_clk;
 
/* List of pointers to port structures */
struct mvpp2_port **port_list;
@@ -7963,6 +7964,18 @@ static int mvpp2_probe(struct platform_device *pdev)
err = clk_prepare_enable(priv->mg_clk);
if (err < 0)
goto err_gop_clk;
+
+   priv->axi_clk = devm_clk_get(>dev, "axi_clk");
+   if (IS_ERR(priv->axi_clk)) {
+   err = PTR_ERR(priv->axi_clk);
+   if (err == -EPROBE_DEFER)
+   goto err_gop_clk;
+   priv->axi_clk = NULL;
+   } else {
+   err = clk_prepare_enable(priv->axi_clk);
+   if (err < 0)
+   goto err_gop_clk;
+   }
}
 
/* Get system's tclk rate */
@@ -8015,6 +8028,7 @@ static int mvpp2_probe(struct platform_device *pdev)
return 0;
 
 err_mg_clk:
+   clk_disable_unprepare(priv->axi_clk);
if (priv->hw_version == MVPP22)
clk_disable_unprepare(priv->mg_clk);
 err_gop_clk:
@@ -8052,6 +8066,7 @@ static int mvpp2_remove(struct platform_device *pdev)
  aggr_txq->descs_dma);
}
 
+   clk_disable_unprepare(priv->axi_clk);
clk_disable_unprepare(priv->mg_clk);
clk_disable_unprepare(priv->pp_clk);
clk_disable_unprepare(priv->gop_clk);
-- 
2.14.1



[PATCH v2 net] net: mvpp2: Fix clock resource by adding an optional bus clock

2017-09-29 Thread Gregory CLEMENT
On Armada 7K/8K we need to explicitly enable the bus clock. The bus clock
is optional because not all the SoCs need them but at least for Armada
7K/8K it is actually mandatory.

The binding documentation is updating accordingly.

Signed-off-by: Gregory CLEMENT 
---
Changelog:
v1 -> v2:
 - manage the -EPROBE_DEFER case
 - fix typos in documentation
 - remove useless test before clk_disable_unprepare()

 Documentation/devicetree/bindings/net/marvell-pp2.txt | 10 ++
 drivers/net/ethernet/marvell/mvpp2.c  | 15 +++
 2 files changed, 21 insertions(+), 4 deletions(-)

diff --git a/Documentation/devicetree/bindings/net/marvell-pp2.txt 
b/Documentation/devicetree/bindings/net/marvell-pp2.txt
index 7e2dad08a12e..1814fa13f6ab 100644
--- a/Documentation/devicetree/bindings/net/marvell-pp2.txt
+++ b/Documentation/devicetree/bindings/net/marvell-pp2.txt
@@ -21,8 +21,9 @@ Required properties:
- main controller clock (for both armada-375-pp2 and armada-7k-pp2)
- GOP clock (for both armada-375-pp2 and armada-7k-pp2)
- MG clock (only for armada-7k-pp2)
-- clock-names: names of used clocks, must be "pp_clk", "gop_clk" and
-  "mg_clk" (the latter only for armada-7k-pp2).
+   - AXI clock (only for armada-7k-pp2)
+- clock-names: names of used clocks, must be "pp_clk", "gop_clk", "mg_clk"
+  and "axi_clk" (the 2 latter only for armada-7k-pp2).
 
 The ethernet ports are represented by subnodes. At least one port is
 required.
@@ -78,8 +79,9 @@ Example for marvell,armada-7k-pp2:
 cpm_ethernet: ethernet@0 {
compatible = "marvell,armada-7k-pp22";
reg = <0x0 0x10>, <0x129000 0xb000>;
-   clocks = <_syscon0 1 3>, <_syscon0 1 9>, <_syscon0 1 5>;
-   clock-names = "pp_clk", "gop_clk", "gp_clk";
+   clocks = <_syscon0 1 3>, <_syscon0 1 9>,
+<_syscon0 1 5>, <_syscon0 1 18>;
+   clock-names = "pp_clk", "gop_clk", "gp_clk", "axi_clk";
 
eth0: eth0 {
interrupts = ,
diff --git a/drivers/net/ethernet/marvell/mvpp2.c 
b/drivers/net/ethernet/marvell/mvpp2.c
index dd0ee2691c86..f2656112986b 100644
--- a/drivers/net/ethernet/marvell/mvpp2.c
+++ b/drivers/net/ethernet/marvell/mvpp2.c
@@ -792,6 +792,7 @@ struct mvpp2 {
struct clk *pp_clk;
struct clk *gop_clk;
struct clk *mg_clk;
+   struct clk *axi_clk;
 
/* List of pointers to port structures */
struct mvpp2_port **port_list;
@@ -7963,6 +7964,18 @@ static int mvpp2_probe(struct platform_device *pdev)
err = clk_prepare_enable(priv->mg_clk);
if (err < 0)
goto err_gop_clk;
+
+   priv->axi_clk = devm_clk_get(>dev, "axi_clk");
+   if (IS_ERR(priv->axi_clk)) {
+   err = PTR_ERR(priv->axi_clk);
+   if (err == -EPROBE_DEFER)
+   goto err_gop_clk;
+   priv->axi_clk = NULL;
+   } else {
+   err = clk_prepare_enable(priv->axi_clk);
+   if (err < 0)
+   goto err_gop_clk;
+   }
}
 
/* Get system's tclk rate */
@@ -8015,6 +8028,7 @@ static int mvpp2_probe(struct platform_device *pdev)
return 0;
 
 err_mg_clk:
+   clk_disable_unprepare(priv->axi_clk);
if (priv->hw_version == MVPP22)
clk_disable_unprepare(priv->mg_clk);
 err_gop_clk:
@@ -8052,6 +8066,7 @@ static int mvpp2_remove(struct platform_device *pdev)
  aggr_txq->descs_dma);
}
 
+   clk_disable_unprepare(priv->axi_clk);
clk_disable_unprepare(priv->mg_clk);
clk_disable_unprepare(priv->pp_clk);
clk_disable_unprepare(priv->gop_clk);
-- 
2.14.1



Re: [PATCH v2 0/2] mtd: nand: wait for tWHR, and fix the setup_data_interface of Denali

2017-09-29 Thread Boris Brezillon
On Fri, 29 Sep 2017 19:38:38 +0900
Masahiro Yamada  wrote:

> 1/2 : add NAND_WAIT_TWHR and nand_whr_delay().
>   You can set this new flag if you want nand_command(_lp)
>   to insert tWHR delay where needed.
> 
> 2/2 : Fix Denali setup_data_interface.
>   Boris' suggestion in v1 was a good reminder that
>   made me realize tCCS was missing in the driver.  Fix it now.
> 
> 
> Changes in v2:
>   - Add nand_whr_delay() helper
> Wait for tWHR only for drivers that explicitly set NAND_WAIT_TWHR flag
>   - newly added
> 
> Masahiro Yamada (2):
>   mtd: nand: wait for tWHR after NAND_CMD_STATUS / NAND_CMD_READID

Hm, I thought you were introducing this to then use it in the denali
driver. Sorry, but I don't want to apply something that nobody needs.
If someone ever complains about a missing delay I'll point him to your
patch, but until then I'll keep the core unchanged.

>   mtd: nand: denali: fix setup_data_interface to meet tCCS delay

This one is valid. I'll queue it to nand/next soon.

Thanks,

Boris

> 
>  drivers/mtd/nand/denali.c| 10 --
>  drivers/mtd/nand/nand_base.c | 21 +++--
>  include/linux/mtd/rawnand.h  | 13 -
>  3 files changed, 35 insertions(+), 9 deletions(-)
> 



Re: [PATCH v2 0/2] mtd: nand: wait for tWHR, and fix the setup_data_interface of Denali

2017-09-29 Thread Boris Brezillon
On Fri, 29 Sep 2017 19:38:38 +0900
Masahiro Yamada  wrote:

> 1/2 : add NAND_WAIT_TWHR and nand_whr_delay().
>   You can set this new flag if you want nand_command(_lp)
>   to insert tWHR delay where needed.
> 
> 2/2 : Fix Denali setup_data_interface.
>   Boris' suggestion in v1 was a good reminder that
>   made me realize tCCS was missing in the driver.  Fix it now.
> 
> 
> Changes in v2:
>   - Add nand_whr_delay() helper
> Wait for tWHR only for drivers that explicitly set NAND_WAIT_TWHR flag
>   - newly added
> 
> Masahiro Yamada (2):
>   mtd: nand: wait for tWHR after NAND_CMD_STATUS / NAND_CMD_READID

Hm, I thought you were introducing this to then use it in the denali
driver. Sorry, but I don't want to apply something that nobody needs.
If someone ever complains about a missing delay I'll point him to your
patch, but until then I'll keep the core unchanged.

>   mtd: nand: denali: fix setup_data_interface to meet tCCS delay

This one is valid. I'll queue it to nand/next soon.

Thanks,

Boris

> 
>  drivers/mtd/nand/denali.c| 10 --
>  drivers/mtd/nand/nand_base.c | 21 +++--
>  include/linux/mtd/rawnand.h  | 13 -
>  3 files changed, 35 insertions(+), 9 deletions(-)
> 



Re: [lkp-robot] [x86/mm] 9e52fc2b50: will-it-scale.per_thread_ops -16% regression

2017-09-29 Thread Vitaly Kuznetsov
kernel test robot  writes:

> Greeting,
>
> FYI, we noticed a -16% regression of will-it-scale.per_thread_ops due to 
> commit:
>
> commit: 9e52fc2b50de3a1c08b44f94c610fbe998c0031a ("x86/mm: Enable RCU based 
> page table freeing (CONFIG_HAVE_RCU_TABLE_FREE=y)")
> https://git.kernel.org/cgit/linux/kernel/git/torvalds/linux.git master
>
> in testcase: will-it-scale
> on test machine: 32 threads Intel(R) Xeon(R) CPU E5-2680 0 @ 2.70GHz with 64G 
> memory
> with following parameters:
>
>   test: malloc1
>   cpufreq_governor: performance
>
> test-description: Will It Scale takes a testcase and runs it from 1 through 
> to n parallel copies to see if the testcase will scale. It builds both a 
> process and threads based test in order to see any differences between the 
> two.
> test-url: https://github.com/antonblanchard/will-it-scale
>
> Details are as below:
> -->
>
> To reproduce:
>
> git clone https://github.com/intel/lkp-tests.git
> cd lkp-tests
> bin/lkp install job.yaml  # job file is attached in this email
> bin/lkp run job.yaml
>
> testcase/path_params/tbox_group/run: 
> will-it-scale/malloc1-performance/lkp-sb03
>
> 39e48d9b128abbd2  9e52fc2b50de3a1c08b44f94c6  
>   --  
>  %stddev  change %stddev
>  \  |\  
>  52686 ±  4%   -16%  44404will-it-scale.per_thread_ops
>   2351 216%   7432 ±  9%  
> will-it-scale.time.involuntary_context_switches

[snip]

Thank you for the report,

I tried reproducing this on a smaller system (16 threads E5-2640, 32 Gb
RAM) but I'm not seeing this:

4.14-rc2 with 9e52fc2b50de3a1c08b44f94c6 included:

time ./runtest.py malloc1
tasks,processes,processes_idle,threads,threads_idle,linear
0,0,100,0,100,0
1,282785,93.75,253402,93.74,282785
2,453749,87.50,123048,92.69,565570
3,654495,81.25,121974,91.04,848355
4,821504,75.01,120409,90.16,1131140
5,958374,68.76,133752,90.18,1413925
6,1078434,62.53,138999,90.37,1696710
7,1165645,56.27,134086,90.45,1979495
8,1257750,50.03,139918,90.39,2262280
9,870393,43.78,120765,89.20,2545065
10,695333,37.54,125554,87.90,2827850
11,533409,31.28,121283,87.57,3110635
12,458691,25.06,119839,87.23,3393420
13,432307,18.79,121203,86.22,3676205
14,428379,12.58,122107,86.16,3958990
15,424319,6.32,121789,86.34,4241775
16,426072,0.12,121244,86.44,4524560

real5m52.363s
user0m18.204s
sys 5m7.249s

4.14-rc2 with 9e52fc2b50de3a1c08b44f94c6 reverted:

time ./runtest.py malloc1
tasks,processes,processes_idle,threads,threads_idle,linear
0,0,100,0,100,0
1,290971,93.78,316790,93.76,316790
2,478501,87.48,122081,93.11,633580
3,722748,81.25,117410,92.28,950370
4,945460,75.01,123084,91.30,1267160
5,1145372,68.76,128113,91.71,1583950
6,1332411,62.51,132994,92.08,1900740
7,1479931,56.27,129479,92.24,2217530
8,1579569,50.03,133241,91.68,2534320
9,1272772,43.79,131393,89.87,2851110
10,1105981,37.54,126218,88.76,3167900
11,892427,31.29,127651,87.48,3484690
12,703695,25.06,125056,86.97,3801480
13,642629,18.82,123492,86.68,4118270
14,625952,12.58,121581,87.02,4435060
15,617222,6.34,121273,87.47,4751850
16,611371,0.11,125548,86.74,5068640

real5m52.406s
user0m27.973s
sys 5m8.169s

I have a couple of guesses why we may be seeing significantly increased
number of context switches in some very specific workloads:

1) In case the system is under extreme memory pressure and
__get_free_page() is failing in tlb_remove_table() we'll be doing
smp_call_function() for _each_ call (avoiding batching). We may want to
have a pre-allocated pool.

2) The default MAX_TABLE_BATCH is static (it is equal to the number of
pointer we can fit into one page - sizeof(struct mmu_table_batch) ==
509), we may want to adjust it for very big systems.

I'd love to work on these but with a good reproducible case I'm afraid
I'm stuck :-(

-- 
  Vitaly


Re: [lkp-robot] [x86/mm] 9e52fc2b50: will-it-scale.per_thread_ops -16% regression

2017-09-29 Thread Vitaly Kuznetsov
kernel test robot  writes:

> Greeting,
>
> FYI, we noticed a -16% regression of will-it-scale.per_thread_ops due to 
> commit:
>
> commit: 9e52fc2b50de3a1c08b44f94c610fbe998c0031a ("x86/mm: Enable RCU based 
> page table freeing (CONFIG_HAVE_RCU_TABLE_FREE=y)")
> https://git.kernel.org/cgit/linux/kernel/git/torvalds/linux.git master
>
> in testcase: will-it-scale
> on test machine: 32 threads Intel(R) Xeon(R) CPU E5-2680 0 @ 2.70GHz with 64G 
> memory
> with following parameters:
>
>   test: malloc1
>   cpufreq_governor: performance
>
> test-description: Will It Scale takes a testcase and runs it from 1 through 
> to n parallel copies to see if the testcase will scale. It builds both a 
> process and threads based test in order to see any differences between the 
> two.
> test-url: https://github.com/antonblanchard/will-it-scale
>
> Details are as below:
> -->
>
> To reproduce:
>
> git clone https://github.com/intel/lkp-tests.git
> cd lkp-tests
> bin/lkp install job.yaml  # job file is attached in this email
> bin/lkp run job.yaml
>
> testcase/path_params/tbox_group/run: 
> will-it-scale/malloc1-performance/lkp-sb03
>
> 39e48d9b128abbd2  9e52fc2b50de3a1c08b44f94c6  
>   --  
>  %stddev  change %stddev
>  \  |\  
>  52686 ±  4%   -16%  44404will-it-scale.per_thread_ops
>   2351 216%   7432 ±  9%  
> will-it-scale.time.involuntary_context_switches

[snip]

Thank you for the report,

I tried reproducing this on a smaller system (16 threads E5-2640, 32 Gb
RAM) but I'm not seeing this:

4.14-rc2 with 9e52fc2b50de3a1c08b44f94c6 included:

time ./runtest.py malloc1
tasks,processes,processes_idle,threads,threads_idle,linear
0,0,100,0,100,0
1,282785,93.75,253402,93.74,282785
2,453749,87.50,123048,92.69,565570
3,654495,81.25,121974,91.04,848355
4,821504,75.01,120409,90.16,1131140
5,958374,68.76,133752,90.18,1413925
6,1078434,62.53,138999,90.37,1696710
7,1165645,56.27,134086,90.45,1979495
8,1257750,50.03,139918,90.39,2262280
9,870393,43.78,120765,89.20,2545065
10,695333,37.54,125554,87.90,2827850
11,533409,31.28,121283,87.57,3110635
12,458691,25.06,119839,87.23,3393420
13,432307,18.79,121203,86.22,3676205
14,428379,12.58,122107,86.16,3958990
15,424319,6.32,121789,86.34,4241775
16,426072,0.12,121244,86.44,4524560

real5m52.363s
user0m18.204s
sys 5m7.249s

4.14-rc2 with 9e52fc2b50de3a1c08b44f94c6 reverted:

time ./runtest.py malloc1
tasks,processes,processes_idle,threads,threads_idle,linear
0,0,100,0,100,0
1,290971,93.78,316790,93.76,316790
2,478501,87.48,122081,93.11,633580
3,722748,81.25,117410,92.28,950370
4,945460,75.01,123084,91.30,1267160
5,1145372,68.76,128113,91.71,1583950
6,1332411,62.51,132994,92.08,1900740
7,1479931,56.27,129479,92.24,2217530
8,1579569,50.03,133241,91.68,2534320
9,1272772,43.79,131393,89.87,2851110
10,1105981,37.54,126218,88.76,3167900
11,892427,31.29,127651,87.48,3484690
12,703695,25.06,125056,86.97,3801480
13,642629,18.82,123492,86.68,4118270
14,625952,12.58,121581,87.02,4435060
15,617222,6.34,121273,87.47,4751850
16,611371,0.11,125548,86.74,5068640

real5m52.406s
user0m27.973s
sys 5m8.169s

I have a couple of guesses why we may be seeing significantly increased
number of context switches in some very specific workloads:

1) In case the system is under extreme memory pressure and
__get_free_page() is failing in tlb_remove_table() we'll be doing
smp_call_function() for _each_ call (avoiding batching). We may want to
have a pre-allocated pool.

2) The default MAX_TABLE_BATCH is static (it is equal to the number of
pointer we can fit into one page - sizeof(struct mmu_table_batch) ==
509), we may want to adjust it for very big systems.

I'd love to work on these but with a good reproducible case I'm afraid
I'm stuck :-(

-- 
  Vitaly


Re: [PATCH v6] PCI: quirks: update Cavium ThunderX ACS quirk implementation

2017-09-29 Thread Vadim Lomovtsev
Hi Bjorn,

On Wed, Sep 27, 2017 at 02:18:54PM -0600, Alex Williamson wrote:
> On Wed, 27 Sep 2017 11:20:39 -0700
> Vadim Lomovtsev  wrote:
> 
> > This commit makes Cavium PCI ACS quirk applicable only to Cavium
> > ThunderX (CN8XXX) family PCIE Root Ports which has limited PCI capabilities
> > in terms of no ACS support advertisement. However, the RTL internally
> > implements similar protection as if ACS had completion/request redirection,
> > upstream forwarding and validation features enabled.
> > 
> > Current quirk implementation doesn't take into account PCIERCs which
> > also needs to be quirked. So the pci device id check mask is updated
> > and check of device ID moved into separate function.
> > 
> > Signed-off-by: Vadim Lomovtsev 
> > ---
> > v5 -> v6: comment typo fix: change 0xa00 to 0xa000
> 
> Reviewed-by: Alex Williamson 
>

If there is no any objections, could you please take this patch ?

WBR,
Vadim

>  
> >  drivers/pci/quirks.c | 29 +
> >  1 file changed, 21 insertions(+), 8 deletions(-)
> > 
> > diff --git a/drivers/pci/quirks.c b/drivers/pci/quirks.c
> > index a4d3361..ed6c76d 100644
> > --- a/drivers/pci/quirks.c
> > +++ b/drivers/pci/quirks.c
> > @@ -4211,20 +4211,33 @@ static int pci_quirk_amd_sb_acs(struct pci_dev 
> > *dev, u16 acs_flags)
> >  #endif
> >  }
> >  
> > -static int pci_quirk_cavium_acs(struct pci_dev *dev, u16 acs_flags)
> > +/*
> > + * The Cavium downstream ports doesn't advertise their ACS capability 
> > registers.
> > + * However, the RTL internally implements similar protection as if
> > + * ACS had completion redirection, forwarding and validation features 
> > enabled.
> > + * So by this flags we're asserting that the hardware implements and
> > + * enables equivalent ACS functionality for these flags.
> > + */
> > +#define CAVIUM_CN8XXX_ACS_FLAGS (PCI_ACS_RR | PCI_ACS_CR | PCI_ACS_SV | 
> > PCI_ACS_UF)
> > +
> > +static __inline__  bool pci_quirk_cavium_acs_match(struct pci_dev *dev)
> >  {
> > /*
> > -* Cavium devices matching this quirk do not perform peer-to-peer
> > -* with other functions, allowing masking out these bits as if they
> > -* were unimplemented in the ACS capability.
> > +* Effectively selects all downstream ports for whole ThunderX 1 family
> > +* by 0xa000 mask (which represents 8 SoCs), while the lower bits of 
> > device ID
> > +* are used to indicate which subdevice is used within the SoC.
> >  */
> > -   acs_flags &= ~(PCI_ACS_SV | PCI_ACS_TB | PCI_ACS_RR |
> > -  PCI_ACS_CR | PCI_ACS_UF | PCI_ACS_DT);
> > +   return (pci_is_pcie(dev) &&
> > +   (pci_pcie_type(dev) == PCI_EXP_TYPE_ROOT_PORT) &&
> > +   ((dev->device & 0xf800) == 0xa000));
> > +}
> >  
> > -   if (!((dev->device >= 0xa000) && (dev->device <= 0xa0ff)))
> > +static int pci_quirk_cavium_acs(struct pci_dev *dev, u16 acs_flags)
> > +{
> > +   if (!pci_quirk_cavium_acs_match(dev))
> > return -ENOTTY;
> >  
> > -   return acs_flags ? 0 : 1;
> > +   return acs_flags & ~(CAVIUM_CN8XXX_ACS_FLAGS) ? 0 : 1;
> >  }
> >  
> >  static int pci_quirk_xgene_acs(struct pci_dev *dev, u16 acs_flags)
> 


Re: [PATCH v6] PCI: quirks: update Cavium ThunderX ACS quirk implementation

2017-09-29 Thread Vadim Lomovtsev
Hi Bjorn,

On Wed, Sep 27, 2017 at 02:18:54PM -0600, Alex Williamson wrote:
> On Wed, 27 Sep 2017 11:20:39 -0700
> Vadim Lomovtsev  wrote:
> 
> > This commit makes Cavium PCI ACS quirk applicable only to Cavium
> > ThunderX (CN8XXX) family PCIE Root Ports which has limited PCI capabilities
> > in terms of no ACS support advertisement. However, the RTL internally
> > implements similar protection as if ACS had completion/request redirection,
> > upstream forwarding and validation features enabled.
> > 
> > Current quirk implementation doesn't take into account PCIERCs which
> > also needs to be quirked. So the pci device id check mask is updated
> > and check of device ID moved into separate function.
> > 
> > Signed-off-by: Vadim Lomovtsev 
> > ---
> > v5 -> v6: comment typo fix: change 0xa00 to 0xa000
> 
> Reviewed-by: Alex Williamson 
>

If there is no any objections, could you please take this patch ?

WBR,
Vadim

>  
> >  drivers/pci/quirks.c | 29 +
> >  1 file changed, 21 insertions(+), 8 deletions(-)
> > 
> > diff --git a/drivers/pci/quirks.c b/drivers/pci/quirks.c
> > index a4d3361..ed6c76d 100644
> > --- a/drivers/pci/quirks.c
> > +++ b/drivers/pci/quirks.c
> > @@ -4211,20 +4211,33 @@ static int pci_quirk_amd_sb_acs(struct pci_dev 
> > *dev, u16 acs_flags)
> >  #endif
> >  }
> >  
> > -static int pci_quirk_cavium_acs(struct pci_dev *dev, u16 acs_flags)
> > +/*
> > + * The Cavium downstream ports doesn't advertise their ACS capability 
> > registers.
> > + * However, the RTL internally implements similar protection as if
> > + * ACS had completion redirection, forwarding and validation features 
> > enabled.
> > + * So by this flags we're asserting that the hardware implements and
> > + * enables equivalent ACS functionality for these flags.
> > + */
> > +#define CAVIUM_CN8XXX_ACS_FLAGS (PCI_ACS_RR | PCI_ACS_CR | PCI_ACS_SV | 
> > PCI_ACS_UF)
> > +
> > +static __inline__  bool pci_quirk_cavium_acs_match(struct pci_dev *dev)
> >  {
> > /*
> > -* Cavium devices matching this quirk do not perform peer-to-peer
> > -* with other functions, allowing masking out these bits as if they
> > -* were unimplemented in the ACS capability.
> > +* Effectively selects all downstream ports for whole ThunderX 1 family
> > +* by 0xa000 mask (which represents 8 SoCs), while the lower bits of 
> > device ID
> > +* are used to indicate which subdevice is used within the SoC.
> >  */
> > -   acs_flags &= ~(PCI_ACS_SV | PCI_ACS_TB | PCI_ACS_RR |
> > -  PCI_ACS_CR | PCI_ACS_UF | PCI_ACS_DT);
> > +   return (pci_is_pcie(dev) &&
> > +   (pci_pcie_type(dev) == PCI_EXP_TYPE_ROOT_PORT) &&
> > +   ((dev->device & 0xf800) == 0xa000));
> > +}
> >  
> > -   if (!((dev->device >= 0xa000) && (dev->device <= 0xa0ff)))
> > +static int pci_quirk_cavium_acs(struct pci_dev *dev, u16 acs_flags)
> > +{
> > +   if (!pci_quirk_cavium_acs_match(dev))
> > return -ENOTTY;
> >  
> > -   return acs_flags ? 0 : 1;
> > +   return acs_flags & ~(CAVIUM_CN8XXX_ACS_FLAGS) ? 0 : 1;
> >  }
> >  
> >  static int pci_quirk_xgene_acs(struct pci_dev *dev, u16 acs_flags)
> 


Re: [Part2 PATCH v4 02/29] x86/CPU/AMD: Add the Secure Encrypted Virtualization CPU feature

2017-09-29 Thread Borislav Petkov
On Tue, Sep 19, 2017 at 03:46:00PM -0500, Brijesh Singh wrote:
> From: Tom Lendacky 
> 
> Update the CPU features to include identifying and reporting on the
> Secure Encrypted Virtualization (SEV) feature.  SEV is identified by
> CPUID 0x801f, but requires BIOS support to enable it (set bit 23 of
> MSR_K8_SYSCFG and set bit 0 of MSR_K7_HWCR).  Only show the SEV feature
> as available if reported by CPUID and enabled by BIOS.
> 
> Cc: Thomas Gleixner 
> Cc: Ingo Molnar 
> Cc: "H. Peter Anvin" 
> Cc: Paolo Bonzini 
> Cc: "Radim Krčmář" 
> Cc: Borislav Petkov 
> Cc: k...@vger.kernel.org
> Cc: x...@kernel.org
> Cc: linux-kernel@vger.kernel.org
> Signed-off-by: Tom Lendacky 
> Signed-off-by: Brijesh Singh 
> ---
>  arch/x86/include/asm/cpufeatures.h |  1 +
>  arch/x86/include/asm/msr-index.h   |  2 ++
>  arch/x86/kernel/cpu/amd.c  | 66 
> ++
>  arch/x86/kernel/cpu/scattered.c|  1 +
>  4 files changed, 50 insertions(+), 20 deletions(-)

This one was in the Part1 set, right? It landed here for whatever
reason...

In any case:

Reviewed-by: Borislav Petkov 

-- 
Regards/Gruss,
Boris.

SUSE Linux GmbH, GF: Felix Imendörffer, Jane Smithard, Graham Norton, HRB 21284 
(AG Nürnberg)
-- 


Re: [Part2 PATCH v4 02/29] x86/CPU/AMD: Add the Secure Encrypted Virtualization CPU feature

2017-09-29 Thread Borislav Petkov
On Tue, Sep 19, 2017 at 03:46:00PM -0500, Brijesh Singh wrote:
> From: Tom Lendacky 
> 
> Update the CPU features to include identifying and reporting on the
> Secure Encrypted Virtualization (SEV) feature.  SEV is identified by
> CPUID 0x801f, but requires BIOS support to enable it (set bit 23 of
> MSR_K8_SYSCFG and set bit 0 of MSR_K7_HWCR).  Only show the SEV feature
> as available if reported by CPUID and enabled by BIOS.
> 
> Cc: Thomas Gleixner 
> Cc: Ingo Molnar 
> Cc: "H. Peter Anvin" 
> Cc: Paolo Bonzini 
> Cc: "Radim Krčmář" 
> Cc: Borislav Petkov 
> Cc: k...@vger.kernel.org
> Cc: x...@kernel.org
> Cc: linux-kernel@vger.kernel.org
> Signed-off-by: Tom Lendacky 
> Signed-off-by: Brijesh Singh 
> ---
>  arch/x86/include/asm/cpufeatures.h |  1 +
>  arch/x86/include/asm/msr-index.h   |  2 ++
>  arch/x86/kernel/cpu/amd.c  | 66 
> ++
>  arch/x86/kernel/cpu/scattered.c|  1 +
>  4 files changed, 50 insertions(+), 20 deletions(-)

This one was in the Part1 set, right? It landed here for whatever
reason...

In any case:

Reviewed-by: Borislav Petkov 

-- 
Regards/Gruss,
Boris.

SUSE Linux GmbH, GF: Felix Imendörffer, Jane Smithard, Graham Norton, HRB 21284 
(AG Nürnberg)
-- 


Re: [PATCH v2 02/10] pinctrl: axp209: add pinctrl features (fwd)

2017-09-29 Thread Julia Lawall
I'm not sure that it is allowed to do krealloc on devm allocated data.
See lins 468 and 485.

julia

-- Forwarded message --
Date: Fri, 29 Sep 2017 20:00:03 +0800
From: kbuild test robot <fengguang...@intel.com>
To: kbu...@01.org
Cc: Julia Lawall <julia.law...@lip6.fr>
Subject: Re: [PATCH v2 02/10] pinctrl: axp209: add pinctrl features

Hi Quentin,

[auto build test WARNING on ]

url:
https://github.com/0day-ci/linux/commits/Quentin-Schulz/add-pinmuxing-support-for-pins-in-AXP209-and-AXP813-PMICs/20170929-162846
base:
:: branch date: 4 hours ago
:: commit date: 4 hours ago

>> drivers/pinctrl/pinctrl-axp209.c:485:19-27: WARNING: invalid free of devm_ 
>> allocated data

# 
https://github.com/0day-ci/linux/commit/1e016076fb841f90f047d2b001c9f8d9fd5e2953
git remote add linux-review https://github.com/0day-ci/linux
git remote update linux-review
git checkout 1e016076fb841f90f047d2b001c9f8d9fd5e2953
vim +485 drivers/pinctrl/pinctrl-axp209.c

1e016076fb drivers/pinctrl/pinctrl-axp209.c Quentin Schulz 2017-09-26  446
1e016076fb drivers/pinctrl/pinctrl-axp209.c Quentin Schulz 2017-09-26  447  
static int axp20x_build_state(struct platform_device *pdev)
1e016076fb drivers/pinctrl/pinctrl-axp209.c Quentin Schulz 2017-09-26  448  {
1e016076fb drivers/pinctrl/pinctrl-axp209.c Quentin Schulz 2017-09-26  449  
struct axp20x_gpio *gpio = platform_get_drvdata(pdev);
1e016076fb drivers/pinctrl/pinctrl-axp209.c Quentin Schulz 2017-09-26  450  
unsigned int npins = gpio->desc->npins;
1e016076fb drivers/pinctrl/pinctrl-axp209.c Quentin Schulz 2017-09-26  451  
const struct axp20x_desc_pin *pin;
1e016076fb drivers/pinctrl/pinctrl-axp209.c Quentin Schulz 2017-09-26  452  
struct axp20x_desc_function *func;
1e016076fb drivers/pinctrl/pinctrl-axp209.c Quentin Schulz 2017-09-26  453  
int i, ret;
1e016076fb drivers/pinctrl/pinctrl-axp209.c Quentin Schulz 2017-09-26  454
1e016076fb drivers/pinctrl/pinctrl-axp209.c Quentin Schulz 2017-09-26  455  
gpio->ngroups = npins;
1e016076fb drivers/pinctrl/pinctrl-axp209.c Quentin Schulz 2017-09-26  456  
gpio->groups = devm_kzalloc(>dev,
1e016076fb drivers/pinctrl/pinctrl-axp209.c Quentin Schulz 2017-09-26  457  
gpio->ngroups * sizeof(*gpio->groups),
1e016076fb drivers/pinctrl/pinctrl-axp209.c Quentin Schulz 2017-09-26  458  
GFP_KERNEL);
1e016076fb drivers/pinctrl/pinctrl-axp209.c Quentin Schulz 2017-09-26  459  
if (!gpio->groups)
1e016076fb drivers/pinctrl/pinctrl-axp209.c Quentin Schulz 2017-09-26  460  
return -ENOMEM;
1e016076fb drivers/pinctrl/pinctrl-axp209.c Quentin Schulz 2017-09-26  461
1e016076fb drivers/pinctrl/pinctrl-axp209.c Quentin Schulz 2017-09-26  462  
for (i = 0; i < npins; i++) {
1e016076fb drivers/pinctrl/pinctrl-axp209.c Quentin Schulz 2017-09-26  463  
gpio->groups[i].name = gpio->desc->pins[i].pin.name;
1e016076fb drivers/pinctrl/pinctrl-axp209.c Quentin Schulz 2017-09-26  464  
gpio->groups[i].pin = gpio->desc->pins[i].pin.number;
1e016076fb drivers/pinctrl/pinctrl-axp209.c Quentin Schulz 2017-09-26  465  
}
1e016076fb drivers/pinctrl/pinctrl-axp209.c Quentin Schulz 2017-09-26  466
1e016076fb drivers/pinctrl/pinctrl-axp209.c Quentin Schulz 2017-09-26  467  
/* We assume 4 functions per pin should be enough as a default max */
1e016076fb drivers/pinctrl/pinctrl-axp209.c Quentin Schulz 2017-09-26  468  
gpio->functions = devm_kzalloc(>dev,
1e016076fb drivers/pinctrl/pinctrl-axp209.c Quentin Schulz 2017-09-26  469  
   npins * 4 * sizeof(*gpio->functions),
1e016076fb drivers/pinctrl/pinctrl-axp209.c Quentin Schulz 2017-09-26  470  
   GFP_KERNEL);
1e016076fb drivers/pinctrl/pinctrl-axp209.c Quentin Schulz 2017-09-26  471  
if (!gpio->functions)
1e016076fb drivers/pinctrl/pinctrl-axp209.c Quentin Schulz 2017-09-26  472  
return -ENOMEM;
1e016076fb drivers/pinctrl/pinctrl-axp209.c Quentin Schulz 2017-09-26  473
1e016076fb drivers/pinctrl/pinctrl-axp209.c Quentin Schulz 2017-09-26  474  
/* Create a list of uniquely named functions */
1e016076fb drivers/pinctrl/pinctrl-axp209.c Quentin Schulz 2017-09-26  475  
for (i = 0; i < npins; i++) {
1e016076fb drivers/pinctrl/pinctrl-axp209.c Quentin Schulz 2017-09-26  476  
pin = >desc->pins[i];
1e016076fb drivers/pinctrl/pinctrl-axp209.c Quentin Schulz 2017-09-26  477  
func = pin->functions;
1e016076fb drivers/pinctrl/pinctrl-axp209.c Quentin Schulz 2017-09-26  478
1e016076fb drivers/pinctrl/pinctrl-axp209.c Quentin Schulz 2017-09-26  479  
while (func->name) {
1e016076fb drivers/pinctrl/pinctrl-axp209.c Quentin Schulz 2017-09-26  480  
axp20x_pinctrl_add_function(gpio, func->name);
1e016076fb drivers/pinct

Re: [PATCH v2 02/10] pinctrl: axp209: add pinctrl features (fwd)

2017-09-29 Thread Julia Lawall
I'm not sure that it is allowed to do krealloc on devm allocated data.
See lins 468 and 485.

julia

-- Forwarded message --
Date: Fri, 29 Sep 2017 20:00:03 +0800
From: kbuild test robot 
To: kbu...@01.org
Cc: Julia Lawall 
Subject: Re: [PATCH v2 02/10] pinctrl: axp209: add pinctrl features

Hi Quentin,

[auto build test WARNING on ]

url:
https://github.com/0day-ci/linux/commits/Quentin-Schulz/add-pinmuxing-support-for-pins-in-AXP209-and-AXP813-PMICs/20170929-162846
base:
:: branch date: 4 hours ago
:: commit date: 4 hours ago

>> drivers/pinctrl/pinctrl-axp209.c:485:19-27: WARNING: invalid free of devm_ 
>> allocated data

# 
https://github.com/0day-ci/linux/commit/1e016076fb841f90f047d2b001c9f8d9fd5e2953
git remote add linux-review https://github.com/0day-ci/linux
git remote update linux-review
git checkout 1e016076fb841f90f047d2b001c9f8d9fd5e2953
vim +485 drivers/pinctrl/pinctrl-axp209.c

1e016076fb drivers/pinctrl/pinctrl-axp209.c Quentin Schulz 2017-09-26  446
1e016076fb drivers/pinctrl/pinctrl-axp209.c Quentin Schulz 2017-09-26  447  
static int axp20x_build_state(struct platform_device *pdev)
1e016076fb drivers/pinctrl/pinctrl-axp209.c Quentin Schulz 2017-09-26  448  {
1e016076fb drivers/pinctrl/pinctrl-axp209.c Quentin Schulz 2017-09-26  449  
struct axp20x_gpio *gpio = platform_get_drvdata(pdev);
1e016076fb drivers/pinctrl/pinctrl-axp209.c Quentin Schulz 2017-09-26  450  
unsigned int npins = gpio->desc->npins;
1e016076fb drivers/pinctrl/pinctrl-axp209.c Quentin Schulz 2017-09-26  451  
const struct axp20x_desc_pin *pin;
1e016076fb drivers/pinctrl/pinctrl-axp209.c Quentin Schulz 2017-09-26  452  
struct axp20x_desc_function *func;
1e016076fb drivers/pinctrl/pinctrl-axp209.c Quentin Schulz 2017-09-26  453  
int i, ret;
1e016076fb drivers/pinctrl/pinctrl-axp209.c Quentin Schulz 2017-09-26  454
1e016076fb drivers/pinctrl/pinctrl-axp209.c Quentin Schulz 2017-09-26  455  
gpio->ngroups = npins;
1e016076fb drivers/pinctrl/pinctrl-axp209.c Quentin Schulz 2017-09-26  456  
gpio->groups = devm_kzalloc(>dev,
1e016076fb drivers/pinctrl/pinctrl-axp209.c Quentin Schulz 2017-09-26  457  
gpio->ngroups * sizeof(*gpio->groups),
1e016076fb drivers/pinctrl/pinctrl-axp209.c Quentin Schulz 2017-09-26  458  
GFP_KERNEL);
1e016076fb drivers/pinctrl/pinctrl-axp209.c Quentin Schulz 2017-09-26  459  
if (!gpio->groups)
1e016076fb drivers/pinctrl/pinctrl-axp209.c Quentin Schulz 2017-09-26  460  
return -ENOMEM;
1e016076fb drivers/pinctrl/pinctrl-axp209.c Quentin Schulz 2017-09-26  461
1e016076fb drivers/pinctrl/pinctrl-axp209.c Quentin Schulz 2017-09-26  462  
for (i = 0; i < npins; i++) {
1e016076fb drivers/pinctrl/pinctrl-axp209.c Quentin Schulz 2017-09-26  463  
gpio->groups[i].name = gpio->desc->pins[i].pin.name;
1e016076fb drivers/pinctrl/pinctrl-axp209.c Quentin Schulz 2017-09-26  464  
gpio->groups[i].pin = gpio->desc->pins[i].pin.number;
1e016076fb drivers/pinctrl/pinctrl-axp209.c Quentin Schulz 2017-09-26  465  
}
1e016076fb drivers/pinctrl/pinctrl-axp209.c Quentin Schulz 2017-09-26  466
1e016076fb drivers/pinctrl/pinctrl-axp209.c Quentin Schulz 2017-09-26  467  
/* We assume 4 functions per pin should be enough as a default max */
1e016076fb drivers/pinctrl/pinctrl-axp209.c Quentin Schulz 2017-09-26  468  
gpio->functions = devm_kzalloc(>dev,
1e016076fb drivers/pinctrl/pinctrl-axp209.c Quentin Schulz 2017-09-26  469  
   npins * 4 * sizeof(*gpio->functions),
1e016076fb drivers/pinctrl/pinctrl-axp209.c Quentin Schulz 2017-09-26  470  
   GFP_KERNEL);
1e016076fb drivers/pinctrl/pinctrl-axp209.c Quentin Schulz 2017-09-26  471  
if (!gpio->functions)
1e016076fb drivers/pinctrl/pinctrl-axp209.c Quentin Schulz 2017-09-26  472  
return -ENOMEM;
1e016076fb drivers/pinctrl/pinctrl-axp209.c Quentin Schulz 2017-09-26  473
1e016076fb drivers/pinctrl/pinctrl-axp209.c Quentin Schulz 2017-09-26  474  
/* Create a list of uniquely named functions */
1e016076fb drivers/pinctrl/pinctrl-axp209.c Quentin Schulz 2017-09-26  475  
for (i = 0; i < npins; i++) {
1e016076fb drivers/pinctrl/pinctrl-axp209.c Quentin Schulz 2017-09-26  476  
pin = >desc->pins[i];
1e016076fb drivers/pinctrl/pinctrl-axp209.c Quentin Schulz 2017-09-26  477  
func = pin->functions;
1e016076fb drivers/pinctrl/pinctrl-axp209.c Quentin Schulz 2017-09-26  478
1e016076fb drivers/pinctrl/pinctrl-axp209.c Quentin Schulz 2017-09-26  479  
while (func->name) {
1e016076fb drivers/pinctrl/pinctrl-axp209.c Quentin Schulz 2017-09-26  480  
axp20x_pinctrl_add_function(gpio, func->name);
1e016076fb drivers/pinctrl/pinctrl-axp209.c Quentin Schulz 2017-09-26  481  
  

[PATCH v1] ARM: configs: stm32: Add I2C support in STM32 defconfig

2017-09-29 Thread Pierre-Yves MORDRET
This patch adds I2C support for STM32

Signed-off-by: Pierre-Yves MORDRET 
---
 Version history:
v1:
* Initial
---
---
 arch/arm/configs/stm32_defconfig | 1 +
 1 file changed, 1 insertion(+)

diff --git a/arch/arm/configs/stm32_defconfig b/arch/arm/configs/stm32_defconfig
index 3ed3158..80c7406 100644
--- a/arch/arm/configs/stm32_defconfig
+++ b/arch/arm/configs/stm32_defconfig
@@ -49,6 +49,7 @@ CONFIG_SERIAL_STM32_CONSOLE=y
 CONFIG_I2C=y
 CONFIG_I2C_CHARDEV=y
 CONFIG_I2C_STM32F4=y
+CONFIG_I2C_STM32F7=y
 CONFIG_GPIO_STMPE=y
 # CONFIG_HWMON is not set
 CONFIG_WATCHDOG=y
-- 
2.7.4



[PATCH v1] ARM: configs: stm32: Add I2C support in STM32 defconfig

2017-09-29 Thread Pierre-Yves MORDRET
This patch adds I2C support for STM32

Signed-off-by: Pierre-Yves MORDRET 
---
 Version history:
v1:
* Initial
---
---
 arch/arm/configs/stm32_defconfig | 1 +
 1 file changed, 1 insertion(+)

diff --git a/arch/arm/configs/stm32_defconfig b/arch/arm/configs/stm32_defconfig
index 3ed3158..80c7406 100644
--- a/arch/arm/configs/stm32_defconfig
+++ b/arch/arm/configs/stm32_defconfig
@@ -49,6 +49,7 @@ CONFIG_SERIAL_STM32_CONSOLE=y
 CONFIG_I2C=y
 CONFIG_I2C_CHARDEV=y
 CONFIG_I2C_STM32F4=y
+CONFIG_I2C_STM32F7=y
 CONFIG_GPIO_STMPE=y
 # CONFIG_HWMON is not set
 CONFIG_WATCHDOG=y
-- 
2.7.4



Re: [PATCH 3/4] iommu/arm-smmu-v3: Use NUMA memory allocations for stream tables and comamnd queues

2017-09-29 Thread Marek Szyprowski

Hi Robin,

On 2017-09-21 13:58, Robin Murphy wrote:

[+Christoph and Marek]

On 21/09/17 09:59, Ganapatrao Kulkarni wrote:

Introduce smmu_alloc_coherent and smmu_free_coherent functions to
allocate/free dma coherent memory from NUMA node associated with SMMU.
Replace all calls of dmam_alloc_coherent with smmu_alloc_coherent
for SMMU stream tables and command queues.

This doesn't work - not only do you lose the 'managed' aspect and risk
leaking various tables on probe failure or device removal, but more
importantly, unless you add DMA syncs around all the CPU accesses to the
tables, you lose the critical 'coherent' aspect, and that's a horribly
invasive change that I really don't want to make.

Christoph, Marek; how reasonable do you think it is to expect
dma_alloc_coherent() to be inherently NUMA-aware on NUMA-capable
systems? SWIOTLB looks fairly straightforward to fix up (for the simple
allocation case; I'm not sure it's even worth it for bounce-buffering),
but the likes of CMA might be a little trickier...


I'm not sure if there is any dma-coherent implementation that is NUMA aware.

Maybe author should provide some benchmarks, which show that those 
structures

should be allocated in NUMA-aware way?

On the other hand it is not that hard to add required dma_sync_* calls 
around

all the code which updated those tables.

> ...

Best regards
--
Marek Szyprowski, PhD
Samsung R Institute Poland



Re: [PATCH 3/4] iommu/arm-smmu-v3: Use NUMA memory allocations for stream tables and comamnd queues

2017-09-29 Thread Marek Szyprowski

Hi Robin,

On 2017-09-21 13:58, Robin Murphy wrote:

[+Christoph and Marek]

On 21/09/17 09:59, Ganapatrao Kulkarni wrote:

Introduce smmu_alloc_coherent and smmu_free_coherent functions to
allocate/free dma coherent memory from NUMA node associated with SMMU.
Replace all calls of dmam_alloc_coherent with smmu_alloc_coherent
for SMMU stream tables and command queues.

This doesn't work - not only do you lose the 'managed' aspect and risk
leaking various tables on probe failure or device removal, but more
importantly, unless you add DMA syncs around all the CPU accesses to the
tables, you lose the critical 'coherent' aspect, and that's a horribly
invasive change that I really don't want to make.

Christoph, Marek; how reasonable do you think it is to expect
dma_alloc_coherent() to be inherently NUMA-aware on NUMA-capable
systems? SWIOTLB looks fairly straightforward to fix up (for the simple
allocation case; I'm not sure it's even worth it for bounce-buffering),
but the likes of CMA might be a little trickier...


I'm not sure if there is any dma-coherent implementation that is NUMA aware.

Maybe author should provide some benchmarks, which show that those 
structures

should be allocated in NUMA-aware way?

On the other hand it is not that hard to add required dma_sync_* calls 
around

all the code which updated those tables.

> ...

Best regards
--
Marek Szyprowski, PhD
Samsung R Institute Poland



Re: [PATCH RFC 3/5] Add KSZ8795 switch driver

2017-09-29 Thread Andrew Lunn
On Fri, Sep 29, 2017 at 09:14:26AM +, David Laight wrote:
> From: Andrew Lunn
> > Sent: 28 September 2017 20:34
> ...
> > > There are 34 counters.  In normal case using generic bus I/O or PCI to 
> > > read them
> > > is very quick, but the switch is mostly accessed using SPI, or even I2C.  
> > > As the SPI
> > > access is very slow.
> > 
> > How slow is it? The Marvell switches all use MDIO. It is probably a
> > bit faster than I2C, but it is a lot slower than MMIO or PCI.
> > 
> > ethtool -S lan0 takes about 25ms.
> 
> Is the SPI access software bit-banged?

That will depend on the board design. I've used mdio bit banging, and
that was painfully slow for stats.

But we should primarily think about average hardware. It is going to
have hardware SPI or I2C. If statistics reading with hardware I2C is
reasonable, i would avoid caching, and just ensure other accesses are
permitted between individual statistic reads.

It also requires Microchip also post new code. They have been very
silent for quite a while

  Andrew


Re: [PATCH RFC 3/5] Add KSZ8795 switch driver

2017-09-29 Thread Andrew Lunn
On Fri, Sep 29, 2017 at 09:14:26AM +, David Laight wrote:
> From: Andrew Lunn
> > Sent: 28 September 2017 20:34
> ...
> > > There are 34 counters.  In normal case using generic bus I/O or PCI to 
> > > read them
> > > is very quick, but the switch is mostly accessed using SPI, or even I2C.  
> > > As the SPI
> > > access is very slow.
> > 
> > How slow is it? The Marvell switches all use MDIO. It is probably a
> > bit faster than I2C, but it is a lot slower than MMIO or PCI.
> > 
> > ethtool -S lan0 takes about 25ms.
> 
> Is the SPI access software bit-banged?

That will depend on the board design. I've used mdio bit banging, and
that was painfully slow for stats.

But we should primarily think about average hardware. It is going to
have hardware SPI or I2C. If statistics reading with hardware I2C is
reasonable, i would avoid caching, and just ensure other accesses are
permitted between individual statistic reads.

It also requires Microchip also post new code. They have been very
silent for quite a while

  Andrew


[RESEND PATCH] fs: add RWF_APPEND

2017-09-29 Thread Jürg Billeter
This is the per-I/O equivalent of O_APPEND to support atomic append
operations on any open file.

If a file is opened with O_APPEND, pwrite() ignores the offset and
always appends data to the end of the file. RWF_APPEND enables atomic
append and pwrite() with offset on a single file descriptor.

Signed-off-by: Jürg Billeter 
---
 include/linux/fs.h  | 2 ++
 include/uapi/linux/fs.h | 6 +-
 2 files changed, 7 insertions(+), 1 deletion(-)

diff --git a/include/linux/fs.h b/include/linux/fs.h
index 339e73742e73..fee24eae7523 100644
--- a/include/linux/fs.h
+++ b/include/linux/fs.h
@@ -3204,6 +3204,8 @@ static inline int kiocb_set_rw_flags(struct kiocb *ki, 
rwf_t flags)
ki->ki_flags |= IOCB_DSYNC;
if (flags & RWF_SYNC)
ki->ki_flags |= (IOCB_DSYNC | IOCB_SYNC);
+   if (flags & RWF_APPEND)
+   ki->ki_flags |= IOCB_APPEND;
return 0;
 }
 
diff --git a/include/uapi/linux/fs.h b/include/uapi/linux/fs.h
index 56235dddea7d..ac145430bcd8 100644
--- a/include/uapi/linux/fs.h
+++ b/include/uapi/linux/fs.h
@@ -376,7 +376,11 @@ typedef int __bitwise __kernel_rwf_t;
 /* per-IO, return -EAGAIN if operation would block */
 #define RWF_NOWAIT ((__force __kernel_rwf_t)0x0008)
 
+/* per-IO O_APPEND */
+#define RWF_APPEND ((__force __kernel_rwf_t)0x0010)
+
 /* mask of flags supported by the kernel */
-#define RWF_SUPPORTED  (RWF_HIPRI | RWF_DSYNC | RWF_SYNC | RWF_NOWAIT)
+#define RWF_SUPPORTED  (RWF_HIPRI | RWF_DSYNC | RWF_SYNC | RWF_NOWAIT |\
+RWF_APPEND)
 
 #endif /* _UAPI_LINUX_FS_H */
-- 
2.14.1



[RESEND PATCH] fs: add RWF_APPEND

2017-09-29 Thread Jürg Billeter
This is the per-I/O equivalent of O_APPEND to support atomic append
operations on any open file.

If a file is opened with O_APPEND, pwrite() ignores the offset and
always appends data to the end of the file. RWF_APPEND enables atomic
append and pwrite() with offset on a single file descriptor.

Signed-off-by: Jürg Billeter 
---
 include/linux/fs.h  | 2 ++
 include/uapi/linux/fs.h | 6 +-
 2 files changed, 7 insertions(+), 1 deletion(-)

diff --git a/include/linux/fs.h b/include/linux/fs.h
index 339e73742e73..fee24eae7523 100644
--- a/include/linux/fs.h
+++ b/include/linux/fs.h
@@ -3204,6 +3204,8 @@ static inline int kiocb_set_rw_flags(struct kiocb *ki, 
rwf_t flags)
ki->ki_flags |= IOCB_DSYNC;
if (flags & RWF_SYNC)
ki->ki_flags |= (IOCB_DSYNC | IOCB_SYNC);
+   if (flags & RWF_APPEND)
+   ki->ki_flags |= IOCB_APPEND;
return 0;
 }
 
diff --git a/include/uapi/linux/fs.h b/include/uapi/linux/fs.h
index 56235dddea7d..ac145430bcd8 100644
--- a/include/uapi/linux/fs.h
+++ b/include/uapi/linux/fs.h
@@ -376,7 +376,11 @@ typedef int __bitwise __kernel_rwf_t;
 /* per-IO, return -EAGAIN if operation would block */
 #define RWF_NOWAIT ((__force __kernel_rwf_t)0x0008)
 
+/* per-IO O_APPEND */
+#define RWF_APPEND ((__force __kernel_rwf_t)0x0010)
+
 /* mask of flags supported by the kernel */
-#define RWF_SUPPORTED  (RWF_HIPRI | RWF_DSYNC | RWF_SYNC | RWF_NOWAIT)
+#define RWF_SUPPORTED  (RWF_HIPRI | RWF_DSYNC | RWF_SYNC | RWF_NOWAIT |\
+RWF_APPEND)
 
 #endif /* _UAPI_LINUX_FS_H */
-- 
2.14.1



Re: [PATCH 1/6] mm: add kmalloc_array_node and kcalloc_node

2017-09-29 Thread Vlastimil Babka
On 09/27/2017 10:20 AM, Johannes Thumshirn wrote:
> We have kmalloc_array() and kcalloc() wrappers on top of kmalloc() which
> ensure us overflow free multiplication for the size of a memory
> allocation but these implementations are not NUMA-aware.
> 
> Likewise we have kmalloc_node() which is a NUMA-aware version of
> kmalloc() but the implementation is not aware of any possible overflows in
> eventual size calculations.
> 
> Introduce a combination of the two above cases to have a NUMA-node aware
> version of kmalloc_array() and kcalloc().
> 
> Signed-off-by: Johannes Thumshirn 

Sounds better than custom open-coded stuff indeed.

Acked-by: Vlastimil Babka 

> ---
>  include/linux/slab.h | 16 
>  1 file changed, 16 insertions(+)
> 
> diff --git a/include/linux/slab.h b/include/linux/slab.h
> index 41473df6dfb0..aaf4723e41b3 100644
> --- a/include/linux/slab.h
> +++ b/include/linux/slab.h
> @@ -635,6 +635,22 @@ extern void *__kmalloc_track_caller(size_t, gfp_t, 
> unsigned long);
>  #define kmalloc_track_caller(size, flags) \
>   __kmalloc_track_caller(size, flags, _RET_IP_)
>  
> +static inline void *kmalloc_array_node(size_t n, size_t size, gfp_t flags,
> +int node)
> +{
> + if (size != 0 && n > SIZE_MAX / size)
> + return NULL;
> + if (__builtin_constant_p(n) && __builtin_constant_p(size))
> + return kmalloc_node(n * size, flags, node);
> + return __kmalloc_node(n * size, flags, node);
> +}
> +
> +static inline void *kcalloc_node(size_t n, size_t size, gfp_t flags, int 
> node)
> +{
> + return kmalloc_array_node(n, size, flags | __GFP_ZERO, node);
> +}
> +
> +
>  #ifdef CONFIG_NUMA
>  extern void *__kmalloc_node_track_caller(size_t, gfp_t, int, unsigned long);
>  #define kmalloc_node_track_caller(size, flags, node) \
> 



Re: [PATCH 1/6] mm: add kmalloc_array_node and kcalloc_node

2017-09-29 Thread Vlastimil Babka
On 09/27/2017 10:20 AM, Johannes Thumshirn wrote:
> We have kmalloc_array() and kcalloc() wrappers on top of kmalloc() which
> ensure us overflow free multiplication for the size of a memory
> allocation but these implementations are not NUMA-aware.
> 
> Likewise we have kmalloc_node() which is a NUMA-aware version of
> kmalloc() but the implementation is not aware of any possible overflows in
> eventual size calculations.
> 
> Introduce a combination of the two above cases to have a NUMA-node aware
> version of kmalloc_array() and kcalloc().
> 
> Signed-off-by: Johannes Thumshirn 

Sounds better than custom open-coded stuff indeed.

Acked-by: Vlastimil Babka 

> ---
>  include/linux/slab.h | 16 
>  1 file changed, 16 insertions(+)
> 
> diff --git a/include/linux/slab.h b/include/linux/slab.h
> index 41473df6dfb0..aaf4723e41b3 100644
> --- a/include/linux/slab.h
> +++ b/include/linux/slab.h
> @@ -635,6 +635,22 @@ extern void *__kmalloc_track_caller(size_t, gfp_t, 
> unsigned long);
>  #define kmalloc_track_caller(size, flags) \
>   __kmalloc_track_caller(size, flags, _RET_IP_)
>  
> +static inline void *kmalloc_array_node(size_t n, size_t size, gfp_t flags,
> +int node)
> +{
> + if (size != 0 && n > SIZE_MAX / size)
> + return NULL;
> + if (__builtin_constant_p(n) && __builtin_constant_p(size))
> + return kmalloc_node(n * size, flags, node);
> + return __kmalloc_node(n * size, flags, node);
> +}
> +
> +static inline void *kcalloc_node(size_t n, size_t size, gfp_t flags, int 
> node)
> +{
> + return kmalloc_array_node(n, size, flags | __GFP_ZERO, node);
> +}
> +
> +
>  #ifdef CONFIG_NUMA
>  extern void *__kmalloc_node_track_caller(size_t, gfp_t, int, unsigned long);
>  #define kmalloc_node_track_caller(size, flags, node) \
> 



Re: [PATCH v8 12/28] x86/insn-eval: Add utility functions to get segment selector

2017-09-29 Thread Borislav Petkov
On Thu, Sep 28, 2017 at 11:06:42PM -0700, Ricardo Neri wrote:
> I agree. In fact, insn_get_seg_base() does not need insn at all. All it needs 
> is
> a INAT_SEG_REG_* index. This would make things clear. UMIP (and callers that
> need to copy_from_user code can do insn_get_seg_base(regs, INAT_SEG_REG_CS). 
> No
> insn needed.

Yap.

> In fact, it is only the insn_get_addr_ref_xx() family of functions that does

I think you mean get_addr_ref_xx() here.

> Do you think the pseudocode above addresses your concerns?
> 
> *insn_get_seg_base() will take a INAT_SEG_REG_* index
> *insn_get_ref_xx() receives an initialized insn that can check for NULL value.
> *a reworked resolve_seg_reg_idx will clearly check if it can use segment
> override prefixes and obtain them. If not, it will use default values.

Makes sense, but send me the final version to take a look at it too.

Thanks.

-- 
Regards/Gruss,
Boris.

SUSE Linux GmbH, GF: Felix Imendörffer, Jane Smithard, Graham Norton, HRB 21284 
(AG Nürnberg)
-- 


Re: [PATCH v8 12/28] x86/insn-eval: Add utility functions to get segment selector

2017-09-29 Thread Borislav Petkov
On Thu, Sep 28, 2017 at 11:06:42PM -0700, Ricardo Neri wrote:
> I agree. In fact, insn_get_seg_base() does not need insn at all. All it needs 
> is
> a INAT_SEG_REG_* index. This would make things clear. UMIP (and callers that
> need to copy_from_user code can do insn_get_seg_base(regs, INAT_SEG_REG_CS). 
> No
> insn needed.

Yap.

> In fact, it is only the insn_get_addr_ref_xx() family of functions that does

I think you mean get_addr_ref_xx() here.

> Do you think the pseudocode above addresses your concerns?
> 
> *insn_get_seg_base() will take a INAT_SEG_REG_* index
> *insn_get_ref_xx() receives an initialized insn that can check for NULL value.
> *a reworked resolve_seg_reg_idx will clearly check if it can use segment
> override prefixes and obtain them. If not, it will use default values.

Makes sense, but send me the final version to take a look at it too.

Thanks.

-- 
Regards/Gruss,
Boris.

SUSE Linux GmbH, GF: Felix Imendörffer, Jane Smithard, Graham Norton, HRB 21284 
(AG Nürnberg)
-- 


Re: [PATCH] staging: wlan-ng: resolve sparse Endianness issue.

2017-09-29 Thread Dan Carpenter
Someone already sent this.  Work against linux-next or staging-next.

regards,
dan carpenter



Re: [PATCH] staging: wlan-ng: resolve sparse Endianness issue.

2017-09-29 Thread Dan Carpenter
Someone already sent this.  Work against linux-next or staging-next.

regards,
dan carpenter



Re: [PATCH v3] mmc: dt-bindings: Add reg/source_cg/latch-ck for Mediatek MMC bindings

2017-09-29 Thread Chaotian Jing
On Fri, 2017-09-29 at 13:11 +0200, Matthias Brugger wrote:
> 
> On 09/22/2017 02:03 PM, Chaotian Jing wrote:
> > Change the comptiable for support of multi-platform
> > Add description for reg
> > Add description for source_cg
> > Add description for mediatek,latch-ck
> > Note that source_cg and mediatek,latch-ck are optional for some projects,
> > eg, MT2701 do not have source_cg, and MT2712 do not need
> > mediatek,latch-ck
> > 
> > Signed-off-by: Chaotian Jing 
> > ---
> >   Documentation/devicetree/bindings/mmc/mtk-sd.txt | 13 ++---
> >   1 file changed, 10 insertions(+), 3 deletions(-)
> > 
> > diff --git a/Documentation/devicetree/bindings/mmc/mtk-sd.txt 
> > b/Documentation/devicetree/bindings/mmc/mtk-sd.txt
> > index 4182ea3..b934a29 100644
> > --- a/Documentation/devicetree/bindings/mmc/mtk-sd.txt
> > +++ b/Documentation/devicetree/bindings/mmc/mtk-sd.txt
> > @@ -7,10 +7,15 @@ This file documents differences between the core 
> > properties in mmc.txt
> >   and the properties used by the msdc driver.
> >   
> >   Required properties:
> > -- compatible: Should be "mediatek,mt8173-mmc","mediatek,mt8135-mmc"
> > +- compatible: value should be either of the following.
> > +   "mediatek,mt8135-mmc": for mmc host ip compatible with mt8135
> > +   "mediatek,mt8173-mmc": for mmc host ip compatible with mt8173
> > +   "mediatek,mt2701-mmc": for mmc host ip compatible with mt2701
> > +   "mediatek,mt2712-mmc": for mmc host ip compatible with mt2712
> 
> NAK, this has to be:
> You have to add the fallback compatible ("mediatek,mt8135-mmc") to the 
> binding 
> description as otherwise the driver does not get probed.
> 
> Regards,
> Matthias

Thanks for your comments, will add the fallback compatible
("mediatek,mt8135-mmc") to the binding in this patch.
but, is possible to remove the fallback compatible at next patch ?
eg, following patches will modify the driver like below:
static const struct of_device_id msdc_of_ids[] = {
{ .compatible = "mediatek,mt8135-mmc", .data = _compat},
{ .compatible = "mediatek,mt8173-mmc", .data = _compat},
};
then, there is no probe issue and remove the fallback compatible.
Thx!
> 
> > +- reg: physical base address of the controller and length
> >   - interrupts: Should contain MSDC interrupt number
> > -- clocks: MSDC source clock, HCLK
> > -- clock-names: "source", "hclk"
> > +- clocks: MSDC source clock, HCLK, source_cg
> > +- clock-names: "source", "hclk", "source_cg"
> >   - pinctrl-names: should be "default", "state_uhs"
> >   - pinctrl-0: should contain default/high speed pin ctrl
> >   - pinctrl-1: should contain uhs mode pin ctrl
> > @@ -30,6 +35,8 @@ Optional properties:
> >   - mediatek,hs400-cmd-resp-sel-rising:  HS400 command response sample 
> > selection
> >If present,HS400 command responses are 
> > sampled on rising edges.
> >If not present,HS400 command responses 
> > are sampled on falling edges.
> > +- mediatek,latch-ck: Some SoCs do not support enhance_rx, need set correct 
> > latch-ck to avoid data crc
> > +error caused by stop clock(fifo full)
> >   
> >   Examples:
> >   mmc0: mmc@1123 {
> > 




Re: [PATCH v3] mmc: dt-bindings: Add reg/source_cg/latch-ck for Mediatek MMC bindings

2017-09-29 Thread Chaotian Jing
On Fri, 2017-09-29 at 13:11 +0200, Matthias Brugger wrote:
> 
> On 09/22/2017 02:03 PM, Chaotian Jing wrote:
> > Change the comptiable for support of multi-platform
> > Add description for reg
> > Add description for source_cg
> > Add description for mediatek,latch-ck
> > Note that source_cg and mediatek,latch-ck are optional for some projects,
> > eg, MT2701 do not have source_cg, and MT2712 do not need
> > mediatek,latch-ck
> > 
> > Signed-off-by: Chaotian Jing 
> > ---
> >   Documentation/devicetree/bindings/mmc/mtk-sd.txt | 13 ++---
> >   1 file changed, 10 insertions(+), 3 deletions(-)
> > 
> > diff --git a/Documentation/devicetree/bindings/mmc/mtk-sd.txt 
> > b/Documentation/devicetree/bindings/mmc/mtk-sd.txt
> > index 4182ea3..b934a29 100644
> > --- a/Documentation/devicetree/bindings/mmc/mtk-sd.txt
> > +++ b/Documentation/devicetree/bindings/mmc/mtk-sd.txt
> > @@ -7,10 +7,15 @@ This file documents differences between the core 
> > properties in mmc.txt
> >   and the properties used by the msdc driver.
> >   
> >   Required properties:
> > -- compatible: Should be "mediatek,mt8173-mmc","mediatek,mt8135-mmc"
> > +- compatible: value should be either of the following.
> > +   "mediatek,mt8135-mmc": for mmc host ip compatible with mt8135
> > +   "mediatek,mt8173-mmc": for mmc host ip compatible with mt8173
> > +   "mediatek,mt2701-mmc": for mmc host ip compatible with mt2701
> > +   "mediatek,mt2712-mmc": for mmc host ip compatible with mt2712
> 
> NAK, this has to be:
> You have to add the fallback compatible ("mediatek,mt8135-mmc") to the 
> binding 
> description as otherwise the driver does not get probed.
> 
> Regards,
> Matthias

Thanks for your comments, will add the fallback compatible
("mediatek,mt8135-mmc") to the binding in this patch.
but, is possible to remove the fallback compatible at next patch ?
eg, following patches will modify the driver like below:
static const struct of_device_id msdc_of_ids[] = {
{ .compatible = "mediatek,mt8135-mmc", .data = _compat},
{ .compatible = "mediatek,mt8173-mmc", .data = _compat},
};
then, there is no probe issue and remove the fallback compatible.
Thx!
> 
> > +- reg: physical base address of the controller and length
> >   - interrupts: Should contain MSDC interrupt number
> > -- clocks: MSDC source clock, HCLK
> > -- clock-names: "source", "hclk"
> > +- clocks: MSDC source clock, HCLK, source_cg
> > +- clock-names: "source", "hclk", "source_cg"
> >   - pinctrl-names: should be "default", "state_uhs"
> >   - pinctrl-0: should contain default/high speed pin ctrl
> >   - pinctrl-1: should contain uhs mode pin ctrl
> > @@ -30,6 +35,8 @@ Optional properties:
> >   - mediatek,hs400-cmd-resp-sel-rising:  HS400 command response sample 
> > selection
> >If present,HS400 command responses are 
> > sampled on rising edges.
> >If not present,HS400 command responses 
> > are sampled on falling edges.
> > +- mediatek,latch-ck: Some SoCs do not support enhance_rx, need set correct 
> > latch-ck to avoid data crc
> > +error caused by stop clock(fifo full)
> >   
> >   Examples:
> >   mmc0: mmc@1123 {
> > 




Re: [PATCH 1/8] sched: Consistent task-state printing

2017-09-29 Thread Peter Zijlstra
On Mon, Sep 25, 2017 at 04:01:09PM -0400, Steven Rostedt wrote:
> On Mon, 25 Sep 2017 14:07:48 +0200
> Peter Zijlstra  wrote:
> 
> > +static inline char __task_state_to_char(unsigned int state)
> > +{
> > +   static const char state_char[] = "RSDTtXZ";
> > +
> > +   BUILD_BUG_ON(1 + ilog2(TASK_REPORT) != sizeof(state_char) - 2);
> >  
> > -   return state < sizeof(stat_nam) - 1 ? stat_nam[state] : '?';
> > +   return state_char[state];
> > +}
> > +
> > +static inline char task_state_to_char(struct task_struct *tsk)
> > +{
> > +   return __task_state_to_char(__get_task_state(tsk));
> >  }
> >  
> 
> So far I'm fine with the patch set, but I hate the non descriptive "__"
> prefix of __task_state_to_char(). Can we make this a bit more
> descriptive, because every time I see it in other patches, I go back to
> this patch to see if we are using the right function.
> 
> What about something like:
> 
> task_state_to_state_char(unsigned int state);
> 
> task_to_state_char(struct task_struct *tsk);
> 
> ?

Hurmph.. naming.

How about something like so?


--- a/fs/proc/array.c
+++ b/fs/proc/array.c
@@ -137,7 +137,7 @@ static const char * const task_state_arr
 static inline const char *get_task_state(struct task_struct *tsk)
 {
BUILD_BUG_ON(1 + ilog2(TASK_REPORT_MAX) != 
ARRAY_SIZE(task_state_array));
-   return task_state_array[__get_task_state(tsk)];
+   return task_state_array[task_state_index(tsk)];
 }
 
 static inline int get_task_umask(struct task_struct *tsk)
--- a/include/linux/sched.h
+++ b/include/linux/sched.h
@@ -1245,7 +1245,7 @@ static inline pid_t task_pgrp_nr(struct
 #define TASK_REPORT_IDLE   (TASK_REPORT + 1)
 #define TASK_REPORT_MAX(TASK_REPORT_IDLE << 1)
 
-static inline unsigned int __get_task_state(struct task_struct *tsk)
+static inline unsigned int task_state_index(struct task_struct *tsk)
 {
unsigned int tsk_state = READ_ONCE(tsk->state);
unsigned int state = (tsk_state | tsk->exit_state) & TASK_REPORT;
@@ -1258,7 +1258,7 @@ static inline unsigned int __get_task_st
return fls(state);
 }
 
-static inline char __task_state_to_char(unsigned int state)
+static inline char task_index_to_char(unsigned int state)
 {
static const char state_char[] = "RSDTtXZPI";
 
@@ -1269,7 +1269,7 @@ static inline char __task_state_to_char(
 
 static inline char task_state_to_char(struct task_struct *tsk)
 {
-   return __task_state_to_char(__get_task_state(tsk));
+   return task_index_to_char(task_state_index(tsk));
 }
 
 /**
--- a/include/trace/events/sched.h
+++ b/include/trace/events/sched.h
@@ -117,7 +117,7 @@ static inline long __trace_sched_switch_
if (preempt)
return TASK_STATE_MAX;
 
-   return __get_task_state(p);
+   return task_state_index(p);
 }
 #endif /* CREATE_TRACE_POINTS */
 
--- a/kernel/trace/trace_output.c
+++ b/kernel/trace/trace_output.c
@@ -921,8 +921,8 @@ static enum print_line_t trace_ctxwake_p
 
trace_assign_type(field, iter->ent);
 
-   T = __task_state_to_char(field->next_state);
-   S = __task_state_to_char(field->prev_state);
+   T = task_index_to_char(field->next_state);
+   S = task_index_to_char(field->prev_state);
trace_find_cmdline(field->next_pid, comm);
trace_seq_printf(>seq,
 " %5d:%3d:%c %s [%03d] %5d:%3d:%c %s\n",
@@ -957,8 +957,8 @@ static int trace_ctxwake_raw(struct trac
trace_assign_type(field, iter->ent);
 
if (!S)
-   S = __task_state_to_char(field->prev_state);
-   T = __task_state_to_char(field->next_state);
+   S = task_index_to_char(field->prev_state);
+   T = task_index_to_char(field->next_state);
trace_seq_printf(>seq, "%d %d %c %d %d %d %c\n",
 field->prev_pid,
 field->prev_prio,
@@ -993,8 +993,8 @@ static int trace_ctxwake_hex(struct trac
trace_assign_type(field, iter->ent);
 
if (!S)
-   S = __task_state_to_char(field->prev_state);
-   T = __task_state_to_char(field->next_state);
+   S = task_index_to_char(field->prev_state);
+   T = task_index_to_char(field->next_state);
 
SEQ_PUT_HEX_FIELD(s, field->prev_pid);
SEQ_PUT_HEX_FIELD(s, field->prev_prio);
--- a/kernel/trace/trace_sched_wakeup.c
+++ b/kernel/trace/trace_sched_wakeup.c
@@ -397,10 +397,10 @@ tracing_sched_switch_trace(struct trace_
entry   = ring_buffer_event_data(event);
entry->prev_pid = prev->pid;
entry->prev_prio= prev->prio;
-   entry->prev_state   = __get_task_state(prev);
+   entry->prev_state   = task_state_index(prev);
entry->next_pid = next->pid;
entry->next_prio= next->prio;
-   entry->next_state   = __get_task_state(next);
+   entry->next_state   = 

Re: [PATCH 1/8] sched: Consistent task-state printing

2017-09-29 Thread Peter Zijlstra
On Mon, Sep 25, 2017 at 04:01:09PM -0400, Steven Rostedt wrote:
> On Mon, 25 Sep 2017 14:07:48 +0200
> Peter Zijlstra  wrote:
> 
> > +static inline char __task_state_to_char(unsigned int state)
> > +{
> > +   static const char state_char[] = "RSDTtXZ";
> > +
> > +   BUILD_BUG_ON(1 + ilog2(TASK_REPORT) != sizeof(state_char) - 2);
> >  
> > -   return state < sizeof(stat_nam) - 1 ? stat_nam[state] : '?';
> > +   return state_char[state];
> > +}
> > +
> > +static inline char task_state_to_char(struct task_struct *tsk)
> > +{
> > +   return __task_state_to_char(__get_task_state(tsk));
> >  }
> >  
> 
> So far I'm fine with the patch set, but I hate the non descriptive "__"
> prefix of __task_state_to_char(). Can we make this a bit more
> descriptive, because every time I see it in other patches, I go back to
> this patch to see if we are using the right function.
> 
> What about something like:
> 
> task_state_to_state_char(unsigned int state);
> 
> task_to_state_char(struct task_struct *tsk);
> 
> ?

Hurmph.. naming.

How about something like so?


--- a/fs/proc/array.c
+++ b/fs/proc/array.c
@@ -137,7 +137,7 @@ static const char * const task_state_arr
 static inline const char *get_task_state(struct task_struct *tsk)
 {
BUILD_BUG_ON(1 + ilog2(TASK_REPORT_MAX) != 
ARRAY_SIZE(task_state_array));
-   return task_state_array[__get_task_state(tsk)];
+   return task_state_array[task_state_index(tsk)];
 }
 
 static inline int get_task_umask(struct task_struct *tsk)
--- a/include/linux/sched.h
+++ b/include/linux/sched.h
@@ -1245,7 +1245,7 @@ static inline pid_t task_pgrp_nr(struct
 #define TASK_REPORT_IDLE   (TASK_REPORT + 1)
 #define TASK_REPORT_MAX(TASK_REPORT_IDLE << 1)
 
-static inline unsigned int __get_task_state(struct task_struct *tsk)
+static inline unsigned int task_state_index(struct task_struct *tsk)
 {
unsigned int tsk_state = READ_ONCE(tsk->state);
unsigned int state = (tsk_state | tsk->exit_state) & TASK_REPORT;
@@ -1258,7 +1258,7 @@ static inline unsigned int __get_task_st
return fls(state);
 }
 
-static inline char __task_state_to_char(unsigned int state)
+static inline char task_index_to_char(unsigned int state)
 {
static const char state_char[] = "RSDTtXZPI";
 
@@ -1269,7 +1269,7 @@ static inline char __task_state_to_char(
 
 static inline char task_state_to_char(struct task_struct *tsk)
 {
-   return __task_state_to_char(__get_task_state(tsk));
+   return task_index_to_char(task_state_index(tsk));
 }
 
 /**
--- a/include/trace/events/sched.h
+++ b/include/trace/events/sched.h
@@ -117,7 +117,7 @@ static inline long __trace_sched_switch_
if (preempt)
return TASK_STATE_MAX;
 
-   return __get_task_state(p);
+   return task_state_index(p);
 }
 #endif /* CREATE_TRACE_POINTS */
 
--- a/kernel/trace/trace_output.c
+++ b/kernel/trace/trace_output.c
@@ -921,8 +921,8 @@ static enum print_line_t trace_ctxwake_p
 
trace_assign_type(field, iter->ent);
 
-   T = __task_state_to_char(field->next_state);
-   S = __task_state_to_char(field->prev_state);
+   T = task_index_to_char(field->next_state);
+   S = task_index_to_char(field->prev_state);
trace_find_cmdline(field->next_pid, comm);
trace_seq_printf(>seq,
 " %5d:%3d:%c %s [%03d] %5d:%3d:%c %s\n",
@@ -957,8 +957,8 @@ static int trace_ctxwake_raw(struct trac
trace_assign_type(field, iter->ent);
 
if (!S)
-   S = __task_state_to_char(field->prev_state);
-   T = __task_state_to_char(field->next_state);
+   S = task_index_to_char(field->prev_state);
+   T = task_index_to_char(field->next_state);
trace_seq_printf(>seq, "%d %d %c %d %d %d %c\n",
 field->prev_pid,
 field->prev_prio,
@@ -993,8 +993,8 @@ static int trace_ctxwake_hex(struct trac
trace_assign_type(field, iter->ent);
 
if (!S)
-   S = __task_state_to_char(field->prev_state);
-   T = __task_state_to_char(field->next_state);
+   S = task_index_to_char(field->prev_state);
+   T = task_index_to_char(field->next_state);
 
SEQ_PUT_HEX_FIELD(s, field->prev_pid);
SEQ_PUT_HEX_FIELD(s, field->prev_prio);
--- a/kernel/trace/trace_sched_wakeup.c
+++ b/kernel/trace/trace_sched_wakeup.c
@@ -397,10 +397,10 @@ tracing_sched_switch_trace(struct trace_
entry   = ring_buffer_event_data(event);
entry->prev_pid = prev->pid;
entry->prev_prio= prev->prio;
-   entry->prev_state   = __get_task_state(prev);
+   entry->prev_state   = task_state_index(prev);
entry->next_pid = next->pid;
entry->next_prio= next->prio;
-   entry->next_state   = __get_task_state(next);
+   entry->next_state   = task_state_index(next);

Re: [PATCH][trace-cmd] trace-view: Pick up sched_wakeup_new event

2017-09-29 Thread Jan Kiszka
On 2017-09-29 13:29, Jan Kiszka wrote:
> From: Jan Kiszka 
> 
> Seems like a copy bug of the initial commit 7b0139ebb1cf.
> 
> Signed-off-by: Jan Kiszka 
> ---
> 
> I didn't manage to test this so far, but this looked strange while
> reading the code.

Now I found out how it is supposed to work and can confirm that this
patch fixes a real bug.

Jan

> 
>  trace-view-store.c | 2 +-
>  1 file changed, 1 insertion(+), 1 deletion(-)
> 
> diff --git a/trace-view-store.c b/trace-view-store.c
> index 2ec52c8..f5d0a04 100644
> --- a/trace-view-store.c
> +++ b/trace-view-store.c
> @@ -1254,7 +1254,7 @@ static void update_filter_tasks(TraceViewStore *store)
> "pid");
>  
>   store->sched_wakeup_new_event =
> - pevent_find_event_by_name(pevent, "sched", 
> "sched_wakeup");
> + pevent_find_event_by_name(pevent, "sched", 
> "sched_wakeup_new");
>   if (store->sched_wakeup_new_event)
>   store->sched_wakeup_new_pid_field =
>   
> pevent_find_any_field(store->sched_wakeup_new_event,
> 

-- 
Siemens AG, Corporate Technology, CT RDA ITP SES-DE
Corporate Competence Center Embedded Linux


Re: [PATCH][trace-cmd] trace-view: Pick up sched_wakeup_new event

2017-09-29 Thread Jan Kiszka
On 2017-09-29 13:29, Jan Kiszka wrote:
> From: Jan Kiszka 
> 
> Seems like a copy bug of the initial commit 7b0139ebb1cf.
> 
> Signed-off-by: Jan Kiszka 
> ---
> 
> I didn't manage to test this so far, but this looked strange while
> reading the code.

Now I found out how it is supposed to work and can confirm that this
patch fixes a real bug.

Jan

> 
>  trace-view-store.c | 2 +-
>  1 file changed, 1 insertion(+), 1 deletion(-)
> 
> diff --git a/trace-view-store.c b/trace-view-store.c
> index 2ec52c8..f5d0a04 100644
> --- a/trace-view-store.c
> +++ b/trace-view-store.c
> @@ -1254,7 +1254,7 @@ static void update_filter_tasks(TraceViewStore *store)
> "pid");
>  
>   store->sched_wakeup_new_event =
> - pevent_find_event_by_name(pevent, "sched", 
> "sched_wakeup");
> + pevent_find_event_by_name(pevent, "sched", 
> "sched_wakeup_new");
>   if (store->sched_wakeup_new_event)
>   store->sched_wakeup_new_pid_field =
>   
> pevent_find_any_field(store->sched_wakeup_new_event,
> 

-- 
Siemens AG, Corporate Technology, CT RDA ITP SES-DE
Corporate Competence Center Embedded Linux


Re: [PATCH v4 for 4.14 1/3] membarrier: Provide register expedited private command

2017-09-29 Thread Peter Zijlstra
On Fri, Sep 29, 2017 at 09:38:53PM +1000, Nicholas Piggin wrote:

> Not really. There is some ability to hold onto a line for a time, but
> there is no way to starve them, let alone starve hundreds of other
> CPUs. They will request the cacheline exclusive and eventually get it.

OK, hardware fairness there is nice.

> I would really prefer to go this way on powerpc first. We could add the
> the registration APIs as basically no-ops, but which would allow the
> locking approach to be changed if we find it causes issues. I'll try to
> find some time and a big system when I can.

Fair enough I suppose.

> > A semi related issue; I suppose we can do a arch upcall to flush_tlb_mm
> > and reset the mm_cpumask when we change cpuset groups.
> 
> For powerpc we have been looking at how mm_cpumask can be improved.
> It has real drawbacks even when you don't consider this new syscall.

What else do you use mm_cpumask for?


Re: [PATCH v4 for 4.14 1/3] membarrier: Provide register expedited private command

2017-09-29 Thread Peter Zijlstra
On Fri, Sep 29, 2017 at 09:38:53PM +1000, Nicholas Piggin wrote:

> Not really. There is some ability to hold onto a line for a time, but
> there is no way to starve them, let alone starve hundreds of other
> CPUs. They will request the cacheline exclusive and eventually get it.

OK, hardware fairness there is nice.

> I would really prefer to go this way on powerpc first. We could add the
> the registration APIs as basically no-ops, but which would allow the
> locking approach to be changed if we find it causes issues. I'll try to
> find some time and a big system when I can.

Fair enough I suppose.

> > A semi related issue; I suppose we can do a arch upcall to flush_tlb_mm
> > and reset the mm_cpumask when we change cpuset groups.
> 
> For powerpc we have been looking at how mm_cpumask can be improved.
> It has real drawbacks even when you don't consider this new syscall.

What else do you use mm_cpumask for?


Re: [PATCH v3 tip/core/rcu 40/40] rcu: Make non-preemptive schedule be Tasks RCU quiescent state

2017-09-29 Thread Paolo Bonzini
On 29/09/2017 12:34, Peter Zijlstra wrote:
> On Fri, Sep 29, 2017 at 12:01:24PM +0200, Paolo Bonzini wrote:
>>> Does this mean whenever we get a page fault in a RCU read-side critical
>>> section, we may hit this?
>>>
>>> Could we simply avoid to schedule() in kvm_async_pf_task_wait() if the
>>> fault process is in a RCU read-side critical section as follow?
>>>
>>> diff --git a/arch/x86/kernel/kvm.c b/arch/x86/kernel/kvm.c
>>> index aa60a08b65b1..291ea13b23d2 100644
>>> --- a/arch/x86/kernel/kvm.c
>>> +++ b/arch/x86/kernel/kvm.c
>>> @@ -140,7 +140,7 @@ void kvm_async_pf_task_wait(u32 token)
>>>  
>>> n.token = token;
>>> n.cpu = smp_processor_id();
>>> -   n.halted = is_idle_task(current) || preempt_count() > 1;
>>> +   n.halted = is_idle_task(current) || preempt_count() > 1 || 
>>> rcu_preempt_depth();
>>> init_swait_queue_head();
>>> hlist_add_head(, >list);
>>> raw_spin_unlock(>lock);
>>>
>>> (Add KVM folks and list Cced)
>>
>> Yes, that would work.  Mind to send it as a proper patch?
> 
> I'm confused, why would we do an ASYNC PF at all here? Thing is, a
> printk() shouldn't trigger a major fault _ever_. At worst it triggers
> something like a vmalloc minor fault. And I'm thinking we should not do
> the whole ASYNC machinery for minor faults.

Async page faults are page faults _on the host_ side, and you cannot
control what the host pages out.  Of course the hypervisor filters out
some cases itself (e.g. IF=0) but in general you could get one at any time.

Paolo



Re: [PATCH v3 tip/core/rcu 40/40] rcu: Make non-preemptive schedule be Tasks RCU quiescent state

2017-09-29 Thread Paolo Bonzini
On 29/09/2017 12:34, Peter Zijlstra wrote:
> On Fri, Sep 29, 2017 at 12:01:24PM +0200, Paolo Bonzini wrote:
>>> Does this mean whenever we get a page fault in a RCU read-side critical
>>> section, we may hit this?
>>>
>>> Could we simply avoid to schedule() in kvm_async_pf_task_wait() if the
>>> fault process is in a RCU read-side critical section as follow?
>>>
>>> diff --git a/arch/x86/kernel/kvm.c b/arch/x86/kernel/kvm.c
>>> index aa60a08b65b1..291ea13b23d2 100644
>>> --- a/arch/x86/kernel/kvm.c
>>> +++ b/arch/x86/kernel/kvm.c
>>> @@ -140,7 +140,7 @@ void kvm_async_pf_task_wait(u32 token)
>>>  
>>> n.token = token;
>>> n.cpu = smp_processor_id();
>>> -   n.halted = is_idle_task(current) || preempt_count() > 1;
>>> +   n.halted = is_idle_task(current) || preempt_count() > 1 || 
>>> rcu_preempt_depth();
>>> init_swait_queue_head();
>>> hlist_add_head(, >list);
>>> raw_spin_unlock(>lock);
>>>
>>> (Add KVM folks and list Cced)
>>
>> Yes, that would work.  Mind to send it as a proper patch?
> 
> I'm confused, why would we do an ASYNC PF at all here? Thing is, a
> printk() shouldn't trigger a major fault _ever_. At worst it triggers
> something like a vmalloc minor fault. And I'm thinking we should not do
> the whole ASYNC machinery for minor faults.

Async page faults are page faults _on the host_ side, and you cannot
control what the host pages out.  Of course the hypervisor filters out
some cases itself (e.g. IF=0) but in general you could get one at any time.

Paolo



Re: usb/misc/usbtest: null-ptr-deref in usbtest_probe/get_endpoints

2017-09-29 Thread Andrey Konovalov
On Thu, Sep 28, 2017 at 7:01 PM, Alan Stern  wrote:
> On Thu, 28 Sep 2017, Andrey Konovalov wrote:
>
>> Hi!
>>
>> I've got the following report while fuzzing the kernel with syzkaller.
>>
>> On commit dc972a67cc54585bd83ad811c4e9b6ab3dcd427e (4.14-rc2+).
>>
>> It seems that out pointer ends up being NULL and kernel crashes on
>> access to out->desc.bEndpointAddress.
>>
>> gadgetfs: bound to dummy_udc driver
>> usb 1-1: new full-speed USB device number 2 using dummy_hcd
>> gadgetfs: connected
>> gadgetfs: disconnected
>> gadgetfs: connected
>> usb 1-1: config 2 interface 0 altsetting 206 endpoint 0x81 has invalid
>> maxpacket 2040, setting to 64
>> usb 1-1: config 2 interface 0 altsetting 206 has an invalid endpoint
>> with address 0xFF, skipping
>> usb 1-1: config 2 interface 0 altsetting 206 endpoint 0x5 has an
>> invalid bInterval 255, changing to 4
>> usb 1-1: config 2 interface 0 has no altsetting 0
>> usb 1-1: New USB device found, idVendor=0525, idProduct=a4a0
>> usb 1-1: New USB device strings: Mfr=0, Product=185, SerialNumber=1
>> usb 1-1: Product: a
>> usb 1-1: SerialNumber: a
>> gadgetfs: configuration #2
>> kasan: CONFIG_KASAN_INLINE enabled
>> kasan: GPF could be caused by NULL-ptr deref or user memory access
>> general protection fault:  [#1] PREEMPT SMP KASAN
>> Modules linked in:
>> CPU: 1 PID: 1845 Comm: kworker/1:2 Not tainted
>> 4.14.0-rc2-42664-gaf7d1481b3cb #297
>> Hardware name: QEMU Standard PC (i440FX + PIIX, 1996), BIOS Bochs 01/01/2011
>> Workqueue: usb_hub_wq hub_event
>> task: 880064328000 task.stack: 880064398000
>> RIP: 0010:get_endpoints drivers/usb/misc/usbtest.c:208
>> RIP: 0010:usbtest_probe+0x114f/0x1ef0 drivers/usb/misc/usbtest.c:2706
>
> This looks like a simple logic error.
>
> Alan Stern

Hi Alan,

This fixes it.

Tested-by: Andrey Konovalov 

Thanks!

>
>
>
> Index: usb-4.x/drivers/usb/misc/usbtest.c
> ===
> --- usb-4.x.orig/drivers/usb/misc/usbtest.c
> +++ usb-4.x/drivers/usb/misc/usbtest.c
> @@ -202,12 +202,13 @@ found:
> return tmp;
> }
>
> -   if (in) {
> +   if (in)
> dev->in_pipe = usb_rcvbulkpipe(udev,
> in->desc.bEndpointAddress & USB_ENDPOINT_NUMBER_MASK);
> +   if (out)
> dev->out_pipe = usb_sndbulkpipe(udev,
> out->desc.bEndpointAddress & 
> USB_ENDPOINT_NUMBER_MASK);
> -   }
> +
> if (iso_in) {
> dev->iso_in = _in->desc;
> dev->in_iso_pipe = usb_rcvisocpipe(udev,
>


Re: usb/misc/usbtest: null-ptr-deref in usbtest_probe/get_endpoints

2017-09-29 Thread Andrey Konovalov
On Thu, Sep 28, 2017 at 7:01 PM, Alan Stern  wrote:
> On Thu, 28 Sep 2017, Andrey Konovalov wrote:
>
>> Hi!
>>
>> I've got the following report while fuzzing the kernel with syzkaller.
>>
>> On commit dc972a67cc54585bd83ad811c4e9b6ab3dcd427e (4.14-rc2+).
>>
>> It seems that out pointer ends up being NULL and kernel crashes on
>> access to out->desc.bEndpointAddress.
>>
>> gadgetfs: bound to dummy_udc driver
>> usb 1-1: new full-speed USB device number 2 using dummy_hcd
>> gadgetfs: connected
>> gadgetfs: disconnected
>> gadgetfs: connected
>> usb 1-1: config 2 interface 0 altsetting 206 endpoint 0x81 has invalid
>> maxpacket 2040, setting to 64
>> usb 1-1: config 2 interface 0 altsetting 206 has an invalid endpoint
>> with address 0xFF, skipping
>> usb 1-1: config 2 interface 0 altsetting 206 endpoint 0x5 has an
>> invalid bInterval 255, changing to 4
>> usb 1-1: config 2 interface 0 has no altsetting 0
>> usb 1-1: New USB device found, idVendor=0525, idProduct=a4a0
>> usb 1-1: New USB device strings: Mfr=0, Product=185, SerialNumber=1
>> usb 1-1: Product: a
>> usb 1-1: SerialNumber: a
>> gadgetfs: configuration #2
>> kasan: CONFIG_KASAN_INLINE enabled
>> kasan: GPF could be caused by NULL-ptr deref or user memory access
>> general protection fault:  [#1] PREEMPT SMP KASAN
>> Modules linked in:
>> CPU: 1 PID: 1845 Comm: kworker/1:2 Not tainted
>> 4.14.0-rc2-42664-gaf7d1481b3cb #297
>> Hardware name: QEMU Standard PC (i440FX + PIIX, 1996), BIOS Bochs 01/01/2011
>> Workqueue: usb_hub_wq hub_event
>> task: 880064328000 task.stack: 880064398000
>> RIP: 0010:get_endpoints drivers/usb/misc/usbtest.c:208
>> RIP: 0010:usbtest_probe+0x114f/0x1ef0 drivers/usb/misc/usbtest.c:2706
>
> This looks like a simple logic error.
>
> Alan Stern

Hi Alan,

This fixes it.

Tested-by: Andrey Konovalov 

Thanks!

>
>
>
> Index: usb-4.x/drivers/usb/misc/usbtest.c
> ===
> --- usb-4.x.orig/drivers/usb/misc/usbtest.c
> +++ usb-4.x/drivers/usb/misc/usbtest.c
> @@ -202,12 +202,13 @@ found:
> return tmp;
> }
>
> -   if (in) {
> +   if (in)
> dev->in_pipe = usb_rcvbulkpipe(udev,
> in->desc.bEndpointAddress & USB_ENDPOINT_NUMBER_MASK);
> +   if (out)
> dev->out_pipe = usb_sndbulkpipe(udev,
> out->desc.bEndpointAddress & 
> USB_ENDPOINT_NUMBER_MASK);
> -   }
> +
> if (iso_in) {
> dev->iso_in = _in->desc;
> dev->in_iso_pipe = usb_rcvisocpipe(udev,
>


Re: [PATCH v4 for 4.14 1/3] membarrier: Provide register expedited private command

2017-09-29 Thread Nicholas Piggin
On Fri, 29 Sep 2017 12:31:31 +0200
Peter Zijlstra  wrote:

> On Fri, Sep 29, 2017 at 02:27:57AM +1000, Nicholas Piggin wrote:
> 
> > The biggest power boxes are more tightly coupled than those big
> > SGI systems, but even so just plodding along taking and releasing
> > locks in turn would be fine on those SGI ones as well really. Not DoS
> > level. This is not a single mega hot cache line or lock that is
> > bouncing over the entire machine, but one process grabbing a line and
> > lock from each of 1000 CPUs.
> > 
> > Slight disturbance sure, but each individual CPU will see it as 1/1000th
> > of a disturbance, most of the cost will be concentrated in the syscall
> > caller.  
> 
> But once the:
> 
>   while (1)
>   sys_membarrier()
> 
> thread has all those (lock) lines in M state locally, it will become
> very hard for the remote CPUs to claim them back, because its constantly

Not really. There is some ability to hold onto a line for a time, but
there is no way to starve them, let alone starve hundreds of other
CPUs. They will request the cacheline exclusive and eventually get it.
Then the membarrier CPU has to pay to get it back. If there is a lot of
activity on the locks, the membarrier will have a difficult time to take
each one.

I don't say there is zero cost or can't interfere with others, only that
it does not seem particularly bad compared with other things. Once you
restrict it to mm_cpumask, then it's quite partitionable.

I would really prefer to go this way on powerpc first. We could add the
the registration APIs as basically no-ops, but which would allow the
locking approach to be changed if we find it causes issues. I'll try to
find some time and a big system when I can.

> touching them. Sure it will touch a 1000 other lines before its back to
> this one, but if they're all local that's fairly quick.
> 
> But you're right, your big machines have far smaller NUMA factors.
> 
> > > Bouncing that lock across the machine is *painful*, I have vague
> > > memories of cases where the lock ping-pong was most the time spend.
> > > 
> > > But only Power needs this, all the other architectures are fine with the
> > > lockless approach for MEMBAR_EXPEDITED_PRIVATE.  
> > 
> > Yes, we can add an iterator function that power can override in a few
> > lines. Less arch specific code than this proposal.  
> 
> A semi related issue; I suppose we can do a arch upcall to flush_tlb_mm
> and reset the mm_cpumask when we change cpuset groups.

For powerpc we have been looking at how mm_cpumask can be improved.
It has real drawbacks even when you don't consider this new syscall.

Thanks,
Nick


Re: [PATCH v4 for 4.14 1/3] membarrier: Provide register expedited private command

2017-09-29 Thread Nicholas Piggin
On Fri, 29 Sep 2017 12:31:31 +0200
Peter Zijlstra  wrote:

> On Fri, Sep 29, 2017 at 02:27:57AM +1000, Nicholas Piggin wrote:
> 
> > The biggest power boxes are more tightly coupled than those big
> > SGI systems, but even so just plodding along taking and releasing
> > locks in turn would be fine on those SGI ones as well really. Not DoS
> > level. This is not a single mega hot cache line or lock that is
> > bouncing over the entire machine, but one process grabbing a line and
> > lock from each of 1000 CPUs.
> > 
> > Slight disturbance sure, but each individual CPU will see it as 1/1000th
> > of a disturbance, most of the cost will be concentrated in the syscall
> > caller.  
> 
> But once the:
> 
>   while (1)
>   sys_membarrier()
> 
> thread has all those (lock) lines in M state locally, it will become
> very hard for the remote CPUs to claim them back, because its constantly

Not really. There is some ability to hold onto a line for a time, but
there is no way to starve them, let alone starve hundreds of other
CPUs. They will request the cacheline exclusive and eventually get it.
Then the membarrier CPU has to pay to get it back. If there is a lot of
activity on the locks, the membarrier will have a difficult time to take
each one.

I don't say there is zero cost or can't interfere with others, only that
it does not seem particularly bad compared with other things. Once you
restrict it to mm_cpumask, then it's quite partitionable.

I would really prefer to go this way on powerpc first. We could add the
the registration APIs as basically no-ops, but which would allow the
locking approach to be changed if we find it causes issues. I'll try to
find some time and a big system when I can.

> touching them. Sure it will touch a 1000 other lines before its back to
> this one, but if they're all local that's fairly quick.
> 
> But you're right, your big machines have far smaller NUMA factors.
> 
> > > Bouncing that lock across the machine is *painful*, I have vague
> > > memories of cases where the lock ping-pong was most the time spend.
> > > 
> > > But only Power needs this, all the other architectures are fine with the
> > > lockless approach for MEMBAR_EXPEDITED_PRIVATE.  
> > 
> > Yes, we can add an iterator function that power can override in a few
> > lines. Less arch specific code than this proposal.  
> 
> A semi related issue; I suppose we can do a arch upcall to flush_tlb_mm
> and reset the mm_cpumask when we change cpuset groups.

For powerpc we have been looking at how mm_cpumask can be improved.
It has real drawbacks even when you don't consider this new syscall.

Thanks,
Nick


Re: [PATCH -v2 03/18] sched/fair: Cure calc_cfs_shares() vs reweight_entity()

2017-09-29 Thread Peter Zijlstra
On Fri, Sep 29, 2017 at 10:04:34AM +0100, Morten Rasmussen wrote:

> > -   load = scale_load_down(cfs_rq->load.weight);
> > +   load = max(scale_load_down(cfs_rq->load.weight), cfs_rq->avg.load_avg);
> 
> We use cfs_rq->tg_load_avg_contrib (the filtered version of
> cfs_rq->avg.load_avg) instead of cfs_rq->avg.load_avg further down, so I
> think we should here too for consistency.
> 
> + load = max(scale_load_down(cfs_rq->load.weight),
> +cfs_rq->tg_load_avg_contrib);
> 

No; we must use tg_load_avg_contrib because that is what's inclded in
tg_weight, but we want to add the most up-to-date value back in.


Re: [PATCH -v2 03/18] sched/fair: Cure calc_cfs_shares() vs reweight_entity()

2017-09-29 Thread Peter Zijlstra
On Fri, Sep 29, 2017 at 10:04:34AM +0100, Morten Rasmussen wrote:

> > -   load = scale_load_down(cfs_rq->load.weight);
> > +   load = max(scale_load_down(cfs_rq->load.weight), cfs_rq->avg.load_avg);
> 
> We use cfs_rq->tg_load_avg_contrib (the filtered version of
> cfs_rq->avg.load_avg) instead of cfs_rq->avg.load_avg further down, so I
> think we should here too for consistency.
> 
> + load = max(scale_load_down(cfs_rq->load.weight),
> +cfs_rq->tg_load_avg_contrib);
> 

No; we must use tg_load_avg_contrib because that is what's inclded in
tg_weight, but we want to add the most up-to-date value back in.


Re: usb/serial/visor: slab-out-of-bounds in palm_os_3_probe

2017-09-29 Thread Andrey Konovalov
On Fri, Sep 29, 2017 at 10:37 AM, Greg Kroah-Hartman
 wrote:
> On Thu, Sep 28, 2017 at 07:57:46PM +0200, Andrey Konovalov wrote:
>> Hi!
>>
>> I've got the following report while fuzzing the kernel with syzkaller.
>>
>> On commit dc972a67cc54585bd83ad811c4e9b6ab3dcd427e (4.14-rc2+).
>>
>> There's no check on the connection_info->num_ports value when
>> iterating over ports.
>>
>> usb 1-1: Handspring Visor / Palm OS: port 162, is for unknown use
>> usb 1-1: Handspring Visor / Palm OS: port 81, is for unknown use
>> ==
>> BUG: KASAN: slab-out-of-bounds in palm_os_3_probe+0x4e4/0x570
>> Read of size 1 at addr 8800686daa26 by task kworker/0:1/24
>>
>> CPU: 0 PID: 24 Comm: kworker/0:1 Not tainted 4.14.0-rc2-42748-gcd3da2fe6c25 
>> #324
>> Hardware name: QEMU Standard PC (i440FX + PIIX, 1996), BIOS Bochs 01/01/2011
>> Workqueue: usb_hub_wq hub_event
>> Call Trace:
>>  __dump_stack lib/dump_stack.c:16
>>  dump_stack+0x292/0x395 lib/dump_stack.c:52
>>  print_address_description+0x78/0x280 mm/kasan/report.c:252
>>  kasan_report_error mm/kasan/report.c:351
>>  kasan_report+0x23d/0x350 mm/kasan/report.c:409
>>  __asan_report_load1_noabort+0x19/0x20 mm/kasan/report.c:427
>>  palm_os_3_probe+0x4e4/0x570 drivers/usb/serial/visor.c:348
>>  visor_probe+0x11c/0x1e0 drivers/usb/serial/visor.c:463
>>  usb_serial_probe+0x540/0x4090 drivers/usb/serial/usb-serial.c:903
>>  usb_probe_interface+0x35d/0x8e0 drivers/usb/core/driver.c:361
>>  really_probe drivers/base/dd.c:413
>>  driver_probe_device+0x610/0xa00 drivers/base/dd.c:557
>>  __device_attach_driver+0x230/0x290 drivers/base/dd.c:653
>>  bus_for_each_drv+0x161/0x210 drivers/base/bus.c:463
>>  __device_attach+0x26e/0x3d0 drivers/base/dd.c:710
>>  device_initial_probe+0x1f/0x30 drivers/base/dd.c:757
>>  bus_probe_device+0x1eb/0x290 drivers/base/bus.c:523
>>  device_add+0xd0b/0x1660 drivers/base/core.c:1835
>>  usb_set_configuration+0x104e/0x1870 drivers/usb/core/message.c:1932
>>  generic_probe+0x73/0xe0 drivers/usb/core/generic.c:174
>>  usb_probe_device+0xaf/0xe0 drivers/usb/core/driver.c:266
>>  really_probe drivers/base/dd.c:413
>>  driver_probe_device+0x610/0xa00 drivers/base/dd.c:557
>>  __device_attach_driver+0x230/0x290 drivers/base/dd.c:653
>>  bus_for_each_drv+0x161/0x210 drivers/base/bus.c:463
>>  __device_attach+0x26e/0x3d0 drivers/base/dd.c:710
>>  device_initial_probe+0x1f/0x30 drivers/base/dd.c:757
>>  bus_probe_device+0x1eb/0x290 drivers/base/bus.c:523
>>  device_add+0xd0b/0x1660 drivers/base/core.c:1835
>>  usb_new_device+0x7b8/0x1020 drivers/usb/core/hub.c:2538
>>  hub_port_connect drivers/usb/core/hub.c:4984
>>  hub_port_connect_change drivers/usb/core/hub.c:5090
>>  port_event drivers/usb/core/hub.c:5196
>>  hub_event_impl+0x1971/0x3760 drivers/usb/core/hub.c:5310
>>  hub_event+0x11b/0x3f0 drivers/usb/core/hub.c:5206
>>  process_one_work+0xc7f/0x1db0 kernel/workqueue.c:2119
>>  worker_thread+0x221/0x1850 kernel/workqueue.c:2253
>>  kthread+0x3a1/0x470 kernel/kthread.c:231
>>  ret_from_fork+0x2a/0x40 arch/x86/entry/entry_64.S:431
>>
>> Allocated by task 24:
>>  save_stack_trace+0x1b/0x20 arch/x86/kernel/stacktrace.c:59
>>  save_stack+0x43/0xd0 mm/kasan/kasan.c:447
>>  set_track mm/kasan/kasan.c:459
>>  kasan_kmalloc+0xad/0xe0 mm/kasan/kasan.c:551
>>  kmem_cache_alloc_trace+0x11e/0x2d0 mm/slub.c:2772
>>  kmalloc ./include/linux/slab.h:493
>>  palm_os_3_probe+0x7c/0x570 drivers/usb/serial/visor.c:325
>>  visor_probe+0x11c/0x1e0 drivers/usb/serial/visor.c:463
>>  usb_serial_probe+0x540/0x4090 drivers/usb/serial/usb-serial.c:903
>>  usb_probe_interface+0x35d/0x8e0 drivers/usb/core/driver.c:361
>>  really_probe drivers/base/dd.c:413
>>  driver_probe_device+0x610/0xa00 drivers/base/dd.c:557
>>  __device_attach_driver+0x230/0x290 drivers/base/dd.c:653
>>  bus_for_each_drv+0x161/0x210 drivers/base/bus.c:463
>>  __device_attach+0x26e/0x3d0 drivers/base/dd.c:710
>>  device_initial_probe+0x1f/0x30 drivers/base/dd.c:757
>>  bus_probe_device+0x1eb/0x290 drivers/base/bus.c:523
>>  device_add+0xd0b/0x1660 drivers/base/core.c:1835
>>  usb_set_configuration+0x104e/0x1870 drivers/usb/core/message.c:1932
>>  generic_probe+0x73/0xe0 drivers/usb/core/generic.c:174
>>  usb_probe_device+0xaf/0xe0 drivers/usb/core/driver.c:266
>>  really_probe drivers/base/dd.c:413
>>  driver_probe_device+0x610/0xa00 drivers/base/dd.c:557
>>  __device_attach_driver+0x230/0x290 drivers/base/dd.c:653
>>  bus_for_each_drv+0x161/0x210 drivers/base/bus.c:463
>>  __device_attach+0x26e/0x3d0 drivers/base/dd.c:710
>>  device_initial_probe+0x1f/0x30 drivers/base/dd.c:757
>>  bus_probe_device+0x1eb/0x290 drivers/base/bus.c:523
>>  device_add+0xd0b/0x1660 drivers/base/core.c:1835
>>  usb_new_device+0x7b8/0x1020 drivers/usb/core/hub.c:2538
>>  hub_port_connect drivers/usb/core/hub.c:4984
>>  hub_port_connect_change drivers/usb/core/hub.c:5090
>>  port_event drivers/usb/core/hub.c:5196
>>  

Re: usb/serial/visor: slab-out-of-bounds in palm_os_3_probe

2017-09-29 Thread Andrey Konovalov
On Fri, Sep 29, 2017 at 10:37 AM, Greg Kroah-Hartman
 wrote:
> On Thu, Sep 28, 2017 at 07:57:46PM +0200, Andrey Konovalov wrote:
>> Hi!
>>
>> I've got the following report while fuzzing the kernel with syzkaller.
>>
>> On commit dc972a67cc54585bd83ad811c4e9b6ab3dcd427e (4.14-rc2+).
>>
>> There's no check on the connection_info->num_ports value when
>> iterating over ports.
>>
>> usb 1-1: Handspring Visor / Palm OS: port 162, is for unknown use
>> usb 1-1: Handspring Visor / Palm OS: port 81, is for unknown use
>> ==
>> BUG: KASAN: slab-out-of-bounds in palm_os_3_probe+0x4e4/0x570
>> Read of size 1 at addr 8800686daa26 by task kworker/0:1/24
>>
>> CPU: 0 PID: 24 Comm: kworker/0:1 Not tainted 4.14.0-rc2-42748-gcd3da2fe6c25 
>> #324
>> Hardware name: QEMU Standard PC (i440FX + PIIX, 1996), BIOS Bochs 01/01/2011
>> Workqueue: usb_hub_wq hub_event
>> Call Trace:
>>  __dump_stack lib/dump_stack.c:16
>>  dump_stack+0x292/0x395 lib/dump_stack.c:52
>>  print_address_description+0x78/0x280 mm/kasan/report.c:252
>>  kasan_report_error mm/kasan/report.c:351
>>  kasan_report+0x23d/0x350 mm/kasan/report.c:409
>>  __asan_report_load1_noabort+0x19/0x20 mm/kasan/report.c:427
>>  palm_os_3_probe+0x4e4/0x570 drivers/usb/serial/visor.c:348
>>  visor_probe+0x11c/0x1e0 drivers/usb/serial/visor.c:463
>>  usb_serial_probe+0x540/0x4090 drivers/usb/serial/usb-serial.c:903
>>  usb_probe_interface+0x35d/0x8e0 drivers/usb/core/driver.c:361
>>  really_probe drivers/base/dd.c:413
>>  driver_probe_device+0x610/0xa00 drivers/base/dd.c:557
>>  __device_attach_driver+0x230/0x290 drivers/base/dd.c:653
>>  bus_for_each_drv+0x161/0x210 drivers/base/bus.c:463
>>  __device_attach+0x26e/0x3d0 drivers/base/dd.c:710
>>  device_initial_probe+0x1f/0x30 drivers/base/dd.c:757
>>  bus_probe_device+0x1eb/0x290 drivers/base/bus.c:523
>>  device_add+0xd0b/0x1660 drivers/base/core.c:1835
>>  usb_set_configuration+0x104e/0x1870 drivers/usb/core/message.c:1932
>>  generic_probe+0x73/0xe0 drivers/usb/core/generic.c:174
>>  usb_probe_device+0xaf/0xe0 drivers/usb/core/driver.c:266
>>  really_probe drivers/base/dd.c:413
>>  driver_probe_device+0x610/0xa00 drivers/base/dd.c:557
>>  __device_attach_driver+0x230/0x290 drivers/base/dd.c:653
>>  bus_for_each_drv+0x161/0x210 drivers/base/bus.c:463
>>  __device_attach+0x26e/0x3d0 drivers/base/dd.c:710
>>  device_initial_probe+0x1f/0x30 drivers/base/dd.c:757
>>  bus_probe_device+0x1eb/0x290 drivers/base/bus.c:523
>>  device_add+0xd0b/0x1660 drivers/base/core.c:1835
>>  usb_new_device+0x7b8/0x1020 drivers/usb/core/hub.c:2538
>>  hub_port_connect drivers/usb/core/hub.c:4984
>>  hub_port_connect_change drivers/usb/core/hub.c:5090
>>  port_event drivers/usb/core/hub.c:5196
>>  hub_event_impl+0x1971/0x3760 drivers/usb/core/hub.c:5310
>>  hub_event+0x11b/0x3f0 drivers/usb/core/hub.c:5206
>>  process_one_work+0xc7f/0x1db0 kernel/workqueue.c:2119
>>  worker_thread+0x221/0x1850 kernel/workqueue.c:2253
>>  kthread+0x3a1/0x470 kernel/kthread.c:231
>>  ret_from_fork+0x2a/0x40 arch/x86/entry/entry_64.S:431
>>
>> Allocated by task 24:
>>  save_stack_trace+0x1b/0x20 arch/x86/kernel/stacktrace.c:59
>>  save_stack+0x43/0xd0 mm/kasan/kasan.c:447
>>  set_track mm/kasan/kasan.c:459
>>  kasan_kmalloc+0xad/0xe0 mm/kasan/kasan.c:551
>>  kmem_cache_alloc_trace+0x11e/0x2d0 mm/slub.c:2772
>>  kmalloc ./include/linux/slab.h:493
>>  palm_os_3_probe+0x7c/0x570 drivers/usb/serial/visor.c:325
>>  visor_probe+0x11c/0x1e0 drivers/usb/serial/visor.c:463
>>  usb_serial_probe+0x540/0x4090 drivers/usb/serial/usb-serial.c:903
>>  usb_probe_interface+0x35d/0x8e0 drivers/usb/core/driver.c:361
>>  really_probe drivers/base/dd.c:413
>>  driver_probe_device+0x610/0xa00 drivers/base/dd.c:557
>>  __device_attach_driver+0x230/0x290 drivers/base/dd.c:653
>>  bus_for_each_drv+0x161/0x210 drivers/base/bus.c:463
>>  __device_attach+0x26e/0x3d0 drivers/base/dd.c:710
>>  device_initial_probe+0x1f/0x30 drivers/base/dd.c:757
>>  bus_probe_device+0x1eb/0x290 drivers/base/bus.c:523
>>  device_add+0xd0b/0x1660 drivers/base/core.c:1835
>>  usb_set_configuration+0x104e/0x1870 drivers/usb/core/message.c:1932
>>  generic_probe+0x73/0xe0 drivers/usb/core/generic.c:174
>>  usb_probe_device+0xaf/0xe0 drivers/usb/core/driver.c:266
>>  really_probe drivers/base/dd.c:413
>>  driver_probe_device+0x610/0xa00 drivers/base/dd.c:557
>>  __device_attach_driver+0x230/0x290 drivers/base/dd.c:653
>>  bus_for_each_drv+0x161/0x210 drivers/base/bus.c:463
>>  __device_attach+0x26e/0x3d0 drivers/base/dd.c:710
>>  device_initial_probe+0x1f/0x30 drivers/base/dd.c:757
>>  bus_probe_device+0x1eb/0x290 drivers/base/bus.c:523
>>  device_add+0xd0b/0x1660 drivers/base/core.c:1835
>>  usb_new_device+0x7b8/0x1020 drivers/usb/core/hub.c:2538
>>  hub_port_connect drivers/usb/core/hub.c:4984
>>  hub_port_connect_change drivers/usb/core/hub.c:5090
>>  port_event drivers/usb/core/hub.c:5196
>>  hub_event_impl+0x1971/0x3760 

Re: [PATCH -v2 02/18] sched/fair: Add comment to calc_cfs_shares()

2017-09-29 Thread Peter Zijlstra
On Thu, Sep 28, 2017 at 11:03:03AM +0100, Morten Rasmussen wrote:

> > +/*
> > + * All this does is approximate the hierarchical proportion which includes 
> > that
> > + * global sum we all love to hate.
> > + *
> > + * That is, the weight of a group entity, is the proportional share of the
> > + * group weight based on the group runqueue weights. That is:
> > + *
> > + * tg->weight * grq->load.weight
> > + *   ge->load.weight = -   (1)
> > + *   \Sum grq->load.weight
> > + *
> > + * Now, because computing that sum is prohibitively expensive to compute 
> > (been
> > + * there, done that) we approximate it with this average stuff. The average
> > + * moves slower and therefore the approximation is cheaper and more stable.
> > + *
> > + * So instead of the above, we substitute:
> > + *
> > + *   grq->load.weight -> grq->avg.load_avg (2)
> > + *
> > + * which yields the following:
> > + *
> > + * tg->weight * grq->avg.load_avg
> > + *   ge->load.weight = --  (3)
> > + * tg->load_avg
> > + *
> > + * Where: tg->load_avg ~= \Sum grq->avg.load_avg
> > + *
> > + * That is shares_avg, and it is right (given the approximation (2)).
> > + *
> > + * The problem with it is that because the average is slow -- it was 
> > designed
> > + * to be exactly that of course -- this leads to transients in boundary
> > + * conditions. In specific, the case where the group was idle and we start 
> > the
> > + * one task. It takes time for our CPU's grq->avg.load_avg to build up,
> > + * yielding bad latency etc..
> > + *
> > + * Now, in that special case (1) reduces to:
> > + *
> > + * tg->weight * grq->load.weight
> > + *   ge->load.weight = - = tg>weight   (4)
> > + * grp->load.weight
> 
> Should it be "grq->load.weight" in the denominator of (4)?
> And "tg->weight" at the end?

Yes, otherwise its all doesn't really make sense :-) Typing is hard it
seems.

> > + *
> > + * That is, the sum collapses because all other CPUs are idle; the UP 
> > scenario.
> 
> Shouldn't (3) collapse in the same way too in this special case?

That's more difficult to see and (1) is the canonical form.

> In theory it should reduce to:
> 
>  tg->weight * grq->avg.load_avg
>ge->load.weight = --
>   grq->avg.load_avg

Yes, agreed.

> But I can see many reasons why it won't happen in practice if things
> aren't perfectly up-to-date. If tg->load_avg and grq->avg.load_avg in
> (3) aren't in sync, or there are stale contributions to tg->load_avg
> from other cpus then (3) can return anything between 0 and tg->weight.

Just so.

> > + *
> > + * So what we do is modify our approximation (3) to approach (4) in the 
> > (near)
> > + * UP case, like:
> > + *
> > + *   ge->load.weight =
> > + *
> > + *  tg->weight * grq->load.weight
> > + * --- (5)
> > + * tg->load_avg - grq->avg.load_avg + grq->load.weight
> > + *
> > + *
> > + * And that is shares_weight and is icky. In the (near) UP case it 
> > approaches
> > + * (4) while in the normal case it approaches (3). It consistently
> > + * overestimates the ge->load.weight and therefore:
> > + *
> > + *   \Sum ge->load.weight >= tg->weight
> > + *
> > + * hence icky!
> 
> IIUC, if grq->avg.load_avg > grq->load.weight, i.e. you have blocked
> tasks, you can end up with underestimating the ge->load.weight for some
> of the group entities lead to \Sum ge->load.weight < tg->weight.

Ah yes, you're right. However, if you look at the end of the series we
actually end up with using:

max(grq->load.weight, grq->avg.load_avg)

Which I suppose makes it true again.


Re: [PATCH -v2 02/18] sched/fair: Add comment to calc_cfs_shares()

2017-09-29 Thread Peter Zijlstra
On Thu, Sep 28, 2017 at 11:03:03AM +0100, Morten Rasmussen wrote:

> > +/*
> > + * All this does is approximate the hierarchical proportion which includes 
> > that
> > + * global sum we all love to hate.
> > + *
> > + * That is, the weight of a group entity, is the proportional share of the
> > + * group weight based on the group runqueue weights. That is:
> > + *
> > + * tg->weight * grq->load.weight
> > + *   ge->load.weight = -   (1)
> > + *   \Sum grq->load.weight
> > + *
> > + * Now, because computing that sum is prohibitively expensive to compute 
> > (been
> > + * there, done that) we approximate it with this average stuff. The average
> > + * moves slower and therefore the approximation is cheaper and more stable.
> > + *
> > + * So instead of the above, we substitute:
> > + *
> > + *   grq->load.weight -> grq->avg.load_avg (2)
> > + *
> > + * which yields the following:
> > + *
> > + * tg->weight * grq->avg.load_avg
> > + *   ge->load.weight = --  (3)
> > + * tg->load_avg
> > + *
> > + * Where: tg->load_avg ~= \Sum grq->avg.load_avg
> > + *
> > + * That is shares_avg, and it is right (given the approximation (2)).
> > + *
> > + * The problem with it is that because the average is slow -- it was 
> > designed
> > + * to be exactly that of course -- this leads to transients in boundary
> > + * conditions. In specific, the case where the group was idle and we start 
> > the
> > + * one task. It takes time for our CPU's grq->avg.load_avg to build up,
> > + * yielding bad latency etc..
> > + *
> > + * Now, in that special case (1) reduces to:
> > + *
> > + * tg->weight * grq->load.weight
> > + *   ge->load.weight = - = tg>weight   (4)
> > + * grp->load.weight
> 
> Should it be "grq->load.weight" in the denominator of (4)?
> And "tg->weight" at the end?

Yes, otherwise its all doesn't really make sense :-) Typing is hard it
seems.

> > + *
> > + * That is, the sum collapses because all other CPUs are idle; the UP 
> > scenario.
> 
> Shouldn't (3) collapse in the same way too in this special case?

That's more difficult to see and (1) is the canonical form.

> In theory it should reduce to:
> 
>  tg->weight * grq->avg.load_avg
>ge->load.weight = --
>   grq->avg.load_avg

Yes, agreed.

> But I can see many reasons why it won't happen in practice if things
> aren't perfectly up-to-date. If tg->load_avg and grq->avg.load_avg in
> (3) aren't in sync, or there are stale contributions to tg->load_avg
> from other cpus then (3) can return anything between 0 and tg->weight.

Just so.

> > + *
> > + * So what we do is modify our approximation (3) to approach (4) in the 
> > (near)
> > + * UP case, like:
> > + *
> > + *   ge->load.weight =
> > + *
> > + *  tg->weight * grq->load.weight
> > + * --- (5)
> > + * tg->load_avg - grq->avg.load_avg + grq->load.weight
> > + *
> > + *
> > + * And that is shares_weight and is icky. In the (near) UP case it 
> > approaches
> > + * (4) while in the normal case it approaches (3). It consistently
> > + * overestimates the ge->load.weight and therefore:
> > + *
> > + *   \Sum ge->load.weight >= tg->weight
> > + *
> > + * hence icky!
> 
> IIUC, if grq->avg.load_avg > grq->load.weight, i.e. you have blocked
> tasks, you can end up with underestimating the ge->load.weight for some
> of the group entities lead to \Sum ge->load.weight < tg->weight.

Ah yes, you're right. However, if you look at the end of the series we
actually end up with using:

max(grq->load.weight, grq->avg.load_avg)

Which I suppose makes it true again.


Re: [PATCH v3] ebtables: fix race condition in frame_filter_net_init()

2017-09-29 Thread Pablo Neira Ayuso
On Tue, Sep 26, 2017 at 06:35:45PM +0200, Artem Savkov wrote:
> It is possible for ebt_in_hook to be triggered before ebt_table is assigned
> resulting in a NULL-pointer dereference. Make sure hooks are
> registered as the last step.

Applied, thanks.


Re: [PATCH v3] ebtables: fix race condition in frame_filter_net_init()

2017-09-29 Thread Pablo Neira Ayuso
On Tue, Sep 26, 2017 at 06:35:45PM +0200, Artem Savkov wrote:
> It is possible for ebt_in_hook to be triggered before ebt_table is assigned
> resulting in a NULL-pointer dereference. Make sure hooks are
> registered as the last step.

Applied, thanks.


[PATCH][trace-cmd] trace-view: Pick up sched_wakeup_new event

2017-09-29 Thread Jan Kiszka
From: Jan Kiszka 

Seems like a copy bug of the initial commit 7b0139ebb1cf.

Signed-off-by: Jan Kiszka 
---

I didn't manage to test this so far, but this looked strange while
reading the code.

 trace-view-store.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/trace-view-store.c b/trace-view-store.c
index 2ec52c8..f5d0a04 100644
--- a/trace-view-store.c
+++ b/trace-view-store.c
@@ -1254,7 +1254,7 @@ static void update_filter_tasks(TraceViewStore *store)
  "pid");
 
store->sched_wakeup_new_event =
-   pevent_find_event_by_name(pevent, "sched", 
"sched_wakeup");
+   pevent_find_event_by_name(pevent, "sched", 
"sched_wakeup_new");
if (store->sched_wakeup_new_event)
store->sched_wakeup_new_pid_field =

pevent_find_any_field(store->sched_wakeup_new_event,
-- 
2.12.3


[PATCH][trace-cmd] trace-view: Pick up sched_wakeup_new event

2017-09-29 Thread Jan Kiszka
From: Jan Kiszka 

Seems like a copy bug of the initial commit 7b0139ebb1cf.

Signed-off-by: Jan Kiszka 
---

I didn't manage to test this so far, but this looked strange while
reading the code.

 trace-view-store.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/trace-view-store.c b/trace-view-store.c
index 2ec52c8..f5d0a04 100644
--- a/trace-view-store.c
+++ b/trace-view-store.c
@@ -1254,7 +1254,7 @@ static void update_filter_tasks(TraceViewStore *store)
  "pid");
 
store->sched_wakeup_new_event =
-   pevent_find_event_by_name(pevent, "sched", 
"sched_wakeup");
+   pevent_find_event_by_name(pevent, "sched", 
"sched_wakeup_new");
if (store->sched_wakeup_new_event)
store->sched_wakeup_new_pid_field =

pevent_find_any_field(store->sched_wakeup_new_event,
-- 
2.12.3


[tip:x86/urgent] x86/asm: Fix inline asm call constraints for GCC 4.4

2017-09-29 Thread tip-bot for Josh Poimboeuf
Commit-ID:  520a13c530aeb5f63e011d668c42db1af19ed349
Gitweb: https://git.kernel.org/tip/520a13c530aeb5f63e011d668c42db1af19ed349
Author: Josh Poimboeuf 
AuthorDate: Thu, 28 Sep 2017 16:58:26 -0500
Committer:  Ingo Molnar 
CommitDate: Fri, 29 Sep 2017 13:15:44 +0200

x86/asm: Fix inline asm call constraints for GCC 4.4

The kernel test bot (run by Xiaolong Ye) reported that the following commit:

  f5caf621ee35 ("x86/asm: Fix inline asm call constraints for Clang")

is causing double faults in a kernel compiled with GCC 4.4.

Linus subsequently diagnosed the crash pattern and the buggy commit and found 
that
the issue is with this code:

  register unsigned int __asm_call_sp asm("esp");
  #define ASM_CALL_CONSTRAINT "+r" (__asm_call_sp)

Even on a 64-bit kernel, it's using ESP instead of RSP.  That causes GCC
to produce the following bogus code:

  8147461d:   89 e0   mov%esp,%eax
  8147461f:   4c 89 f7mov%r14,%rdi
  81474622:   4c 89 femov%r15,%rsi
  81474625:   ba 20 00 00 00  mov$0x20,%edx
  8147462a:   89 c4   mov%eax,%esp
  8147462c:   e8 bf 52 05 00  callq  814c98f0 


Despite the absurdity of it backing up and restoring the stack pointer
for no reason, the bug is actually the fact that it's only backing up
and restoring the lower 32 bits of the stack pointer.  The upper 32 bits
are getting cleared out, corrupting the stack pointer.

So change the '__asm_call_sp' register variable to be associated with
the actual full-size stack pointer.

This also requires changing the __ASM_SEL() macro to be based on the
actual compiled arch size, rather than the CONFIG value, because
CONFIG_X86_64 compiles some files with '-m32' (e.g., realmode and vdso).
Otherwise Clang fails to build the kernel because it complains about the
use of a 64-bit register (RSP) in a 32-bit file.

Reported-and-Bisected-and-Tested-by: kernel test robot 
Diagnosed-by: Linus Torvalds 
Signed-off-by: Josh Poimboeuf 
Cc: Alexander Potapenko 
Cc: Andrey Ryabinin 
Cc: Andy Lutomirski 
Cc: Arnd Bergmann 
Cc: Dmitriy Vyukov 
Cc: LKP 
Cc: Linus Torvalds 
Cc: Matthias Kaehlcke 
Cc: Miguel Bernal Marin 
Cc: Peter Zijlstra 
Cc: Thomas Gleixner 
Fixes: f5caf621ee35 ("x86/asm: Fix inline asm call constraints for Clang")
Link: http://lkml.kernel.org/r/20170928215826.6sdpmwtkiydiytim@treble
Signed-off-by: Ingo Molnar 
---
 arch/x86/include/asm/asm.h | 6 --
 1 file changed, 4 insertions(+), 2 deletions(-)

diff --git a/arch/x86/include/asm/asm.h b/arch/x86/include/asm/asm.h
index c1eadba..30c3c9a 100644
--- a/arch/x86/include/asm/asm.h
+++ b/arch/x86/include/asm/asm.h
@@ -11,10 +11,12 @@
 # define __ASM_FORM_COMMA(x) " " #x ","
 #endif
 
-#ifdef CONFIG_X86_32
+#ifndef __x86_64__
+/* 32 bit */
 # define __ASM_SEL(a,b) __ASM_FORM(a)
 # define __ASM_SEL_RAW(a,b) __ASM_FORM_RAW(a)
 #else
+/* 64 bit */
 # define __ASM_SEL(a,b) __ASM_FORM(b)
 # define __ASM_SEL_RAW(a,b) __ASM_FORM_RAW(b)
 #endif
@@ -139,7 +141,7 @@
  * gets set up by the containing function.  If you forget to do this, objtool
  * may print a "call without frame pointer save/setup" warning.
  */
-register unsigned int __asm_call_sp asm("esp");
+register unsigned long __asm_call_sp asm(_ASM_SP);
 #define ASM_CALL_CONSTRAINT "+r" (__asm_call_sp)
 #endif
 


[tip:x86/urgent] x86/asm: Fix inline asm call constraints for GCC 4.4

2017-09-29 Thread tip-bot for Josh Poimboeuf
Commit-ID:  520a13c530aeb5f63e011d668c42db1af19ed349
Gitweb: https://git.kernel.org/tip/520a13c530aeb5f63e011d668c42db1af19ed349
Author: Josh Poimboeuf 
AuthorDate: Thu, 28 Sep 2017 16:58:26 -0500
Committer:  Ingo Molnar 
CommitDate: Fri, 29 Sep 2017 13:15:44 +0200

x86/asm: Fix inline asm call constraints for GCC 4.4

The kernel test bot (run by Xiaolong Ye) reported that the following commit:

  f5caf621ee35 ("x86/asm: Fix inline asm call constraints for Clang")

is causing double faults in a kernel compiled with GCC 4.4.

Linus subsequently diagnosed the crash pattern and the buggy commit and found 
that
the issue is with this code:

  register unsigned int __asm_call_sp asm("esp");
  #define ASM_CALL_CONSTRAINT "+r" (__asm_call_sp)

Even on a 64-bit kernel, it's using ESP instead of RSP.  That causes GCC
to produce the following bogus code:

  8147461d:   89 e0   mov%esp,%eax
  8147461f:   4c 89 f7mov%r14,%rdi
  81474622:   4c 89 femov%r15,%rsi
  81474625:   ba 20 00 00 00  mov$0x20,%edx
  8147462a:   89 c4   mov%eax,%esp
  8147462c:   e8 bf 52 05 00  callq  814c98f0 


Despite the absurdity of it backing up and restoring the stack pointer
for no reason, the bug is actually the fact that it's only backing up
and restoring the lower 32 bits of the stack pointer.  The upper 32 bits
are getting cleared out, corrupting the stack pointer.

So change the '__asm_call_sp' register variable to be associated with
the actual full-size stack pointer.

This also requires changing the __ASM_SEL() macro to be based on the
actual compiled arch size, rather than the CONFIG value, because
CONFIG_X86_64 compiles some files with '-m32' (e.g., realmode and vdso).
Otherwise Clang fails to build the kernel because it complains about the
use of a 64-bit register (RSP) in a 32-bit file.

Reported-and-Bisected-and-Tested-by: kernel test robot 
Diagnosed-by: Linus Torvalds 
Signed-off-by: Josh Poimboeuf 
Cc: Alexander Potapenko 
Cc: Andrey Ryabinin 
Cc: Andy Lutomirski 
Cc: Arnd Bergmann 
Cc: Dmitriy Vyukov 
Cc: LKP 
Cc: Linus Torvalds 
Cc: Matthias Kaehlcke 
Cc: Miguel Bernal Marin 
Cc: Peter Zijlstra 
Cc: Thomas Gleixner 
Fixes: f5caf621ee35 ("x86/asm: Fix inline asm call constraints for Clang")
Link: http://lkml.kernel.org/r/20170928215826.6sdpmwtkiydiytim@treble
Signed-off-by: Ingo Molnar 
---
 arch/x86/include/asm/asm.h | 6 --
 1 file changed, 4 insertions(+), 2 deletions(-)

diff --git a/arch/x86/include/asm/asm.h b/arch/x86/include/asm/asm.h
index c1eadba..30c3c9a 100644
--- a/arch/x86/include/asm/asm.h
+++ b/arch/x86/include/asm/asm.h
@@ -11,10 +11,12 @@
 # define __ASM_FORM_COMMA(x) " " #x ","
 #endif
 
-#ifdef CONFIG_X86_32
+#ifndef __x86_64__
+/* 32 bit */
 # define __ASM_SEL(a,b) __ASM_FORM(a)
 # define __ASM_SEL_RAW(a,b) __ASM_FORM_RAW(a)
 #else
+/* 64 bit */
 # define __ASM_SEL(a,b) __ASM_FORM(b)
 # define __ASM_SEL_RAW(a,b) __ASM_FORM_RAW(b)
 #endif
@@ -139,7 +141,7 @@
  * gets set up by the containing function.  If you forget to do this, objtool
  * may print a "call without frame pointer save/setup" warning.
  */
-register unsigned int __asm_call_sp asm("esp");
+register unsigned long __asm_call_sp asm(_ASM_SP);
 #define ASM_CALL_CONSTRAINT "+r" (__asm_call_sp)
 #endif
 


Re: [PATCH v3] mmc: dt-bindings: Add reg/source_cg/latch-ck for Mediatek MMC bindings

2017-09-29 Thread Matthias Brugger



On 09/22/2017 02:03 PM, Chaotian Jing wrote:

Change the comptiable for support of multi-platform
Add description for reg
Add description for source_cg
Add description for mediatek,latch-ck
Note that source_cg and mediatek,latch-ck are optional for some projects,
eg, MT2701 do not have source_cg, and MT2712 do not need
mediatek,latch-ck

Signed-off-by: Chaotian Jing 
---
  Documentation/devicetree/bindings/mmc/mtk-sd.txt | 13 ++---
  1 file changed, 10 insertions(+), 3 deletions(-)

diff --git a/Documentation/devicetree/bindings/mmc/mtk-sd.txt 
b/Documentation/devicetree/bindings/mmc/mtk-sd.txt
index 4182ea3..b934a29 100644
--- a/Documentation/devicetree/bindings/mmc/mtk-sd.txt
+++ b/Documentation/devicetree/bindings/mmc/mtk-sd.txt
@@ -7,10 +7,15 @@ This file documents differences between the core properties 
in mmc.txt
  and the properties used by the msdc driver.
  
  Required properties:

-- compatible: Should be "mediatek,mt8173-mmc","mediatek,mt8135-mmc"
+- compatible: value should be either of the following.
+   "mediatek,mt8135-mmc": for mmc host ip compatible with mt8135
+   "mediatek,mt8173-mmc": for mmc host ip compatible with mt8173
+   "mediatek,mt2701-mmc": for mmc host ip compatible with mt2701
+   "mediatek,mt2712-mmc": for mmc host ip compatible with mt2712


NAK, this has to be:
You have to add the fallback comatible ("mediatek,mt8135-mmc") to the binding 
description as otherwise the driver does not get probed.


Regards,
Matthias


+- reg: physical base address of the controller and length
  - interrupts: Should contain MSDC interrupt number
-- clocks: MSDC source clock, HCLK
-- clock-names: "source", "hclk"
+- clocks: MSDC source clock, HCLK, source_cg
+- clock-names: "source", "hclk", "source_cg"
  - pinctrl-names: should be "default", "state_uhs"
  - pinctrl-0: should contain default/high speed pin ctrl
  - pinctrl-1: should contain uhs mode pin ctrl
@@ -30,6 +35,8 @@ Optional properties:
  - mediatek,hs400-cmd-resp-sel-rising:  HS400 command response sample selection
   If present,HS400 command responses are 
sampled on rising edges.
   If not present,HS400 command responses 
are sampled on falling edges.
+- mediatek,latch-ck: Some SoCs do not support enhance_rx, need set correct 
latch-ck to avoid data crc
+error caused by stop clock(fifo full)
  
  Examples:

  mmc0: mmc@1123 {



Re: [PATCH v3] mmc: dt-bindings: Add reg/source_cg/latch-ck for Mediatek MMC bindings

2017-09-29 Thread Matthias Brugger



On 09/22/2017 02:03 PM, Chaotian Jing wrote:

Change the comptiable for support of multi-platform
Add description for reg
Add description for source_cg
Add description for mediatek,latch-ck
Note that source_cg and mediatek,latch-ck are optional for some projects,
eg, MT2701 do not have source_cg, and MT2712 do not need
mediatek,latch-ck

Signed-off-by: Chaotian Jing 
---
  Documentation/devicetree/bindings/mmc/mtk-sd.txt | 13 ++---
  1 file changed, 10 insertions(+), 3 deletions(-)

diff --git a/Documentation/devicetree/bindings/mmc/mtk-sd.txt 
b/Documentation/devicetree/bindings/mmc/mtk-sd.txt
index 4182ea3..b934a29 100644
--- a/Documentation/devicetree/bindings/mmc/mtk-sd.txt
+++ b/Documentation/devicetree/bindings/mmc/mtk-sd.txt
@@ -7,10 +7,15 @@ This file documents differences between the core properties 
in mmc.txt
  and the properties used by the msdc driver.
  
  Required properties:

-- compatible: Should be "mediatek,mt8173-mmc","mediatek,mt8135-mmc"
+- compatible: value should be either of the following.
+   "mediatek,mt8135-mmc": for mmc host ip compatible with mt8135
+   "mediatek,mt8173-mmc": for mmc host ip compatible with mt8173
+   "mediatek,mt2701-mmc": for mmc host ip compatible with mt2701
+   "mediatek,mt2712-mmc": for mmc host ip compatible with mt2712


NAK, this has to be:
You have to add the fallback comatible ("mediatek,mt8135-mmc") to the binding 
description as otherwise the driver does not get probed.


Regards,
Matthias


+- reg: physical base address of the controller and length
  - interrupts: Should contain MSDC interrupt number
-- clocks: MSDC source clock, HCLK
-- clock-names: "source", "hclk"
+- clocks: MSDC source clock, HCLK, source_cg
+- clock-names: "source", "hclk", "source_cg"
  - pinctrl-names: should be "default", "state_uhs"
  - pinctrl-0: should contain default/high speed pin ctrl
  - pinctrl-1: should contain uhs mode pin ctrl
@@ -30,6 +35,8 @@ Optional properties:
  - mediatek,hs400-cmd-resp-sel-rising:  HS400 command response sample selection
   If present,HS400 command responses are 
sampled on rising edges.
   If not present,HS400 command responses 
are sampled on falling edges.
+- mediatek,latch-ck: Some SoCs do not support enhance_rx, need set correct 
latch-ck to avoid data crc
+error caused by stop clock(fifo full)
  
  Examples:

  mmc0: mmc@1123 {



Re: [PATCH 3/4] sched: WARN when migrating to an offline CPU

2017-09-29 Thread Peter Zijlstra
On Thu, Sep 28, 2017 at 01:42:49PM +0200, Peter Zijlstra wrote:
> On Thu, Sep 28, 2017 at 11:03:10AM +, Levin, Alexander (Sasha Levin) 
> wrote:
> > On Thu, Sep 28, 2017 at 12:35:41PM +0200, Peter Zijlstra wrote:
> > >On Thu, Sep 28, 2017 at 02:14:15AM -0700, Sasha Levin wrote:
> 
> > >> [2035565360.446794] Unregister pv shared memory for cpu 2
> > >> [2035565360.467930] numa_remove_cpu cpu 2 node 2: mask now 6
> > >> [2035565360.471431] [ cut here ]
> > >> [2035565360.472548] WARNING: CPU: 2 PID: 24 at > 
> > >> kernel/sched/core.c:1178 set_task_cpu (kernel/sched/core.c:1157)
> > >> [2035565360.473840] Modules linked in:
> > >> [2035565360.474632] CPU: 2 PID: 24 Comm: migration/2 Not tainted > 
> > >> 4.14.0-rc2-next-20170927+ #252
> > >
> > >Urgh, weird. That really shouldn't happen. Can you easily reproduce?
> > 
> > Looks like yes. Seems like it's enough to stress CPU hotplug + trinity.
> 
> OK, I'll see if I can reproduce building kernels and hotplug stress.
> Otherwise I'll try and cook up some debug patches for you.

I can't seem to trigger :-(

Can you please run with the below patch and:

  # echo 1 > /proc/sys/kernel/traceoff_on_warning

---
 kernel/sched/core.c |  3 +++
 kernel/sched/fair.c | 10 ++
 2 files changed, 13 insertions(+)

diff --git a/kernel/sched/core.c b/kernel/sched/core.c
index 18a6966567da..c613f7756981 100644
--- a/kernel/sched/core.c
+++ b/kernel/sched/core.c
@@ -5592,6 +5592,7 @@ int sched_cpu_activate(unsigned int cpu)
struct rq_flags rf;
 
set_cpu_active(cpu, true);
+   trace_printk("active: %d mask: %*pbl\n", cpu, 
cpumask_pr_args(cpu_active_mask));
 
if (sched_smp_initialized) {
sched_domains_numa_masks_set(cpu);
@@ -5624,6 +5625,7 @@ int sched_cpu_deactivate(unsigned int cpu)
int ret;
 
set_cpu_active(cpu, false);
+   trace_printk("not-active: %d mask: %*pbl\n", cpu, 
cpumask_pr_args(cpu_active_mask));
/*
 * We've cleared cpu_active_mask, wait for all preempt-disabled and RCU
 * users of this state to go away such that all new such users will
@@ -5632,6 +5634,7 @@ int sched_cpu_deactivate(unsigned int cpu)
 * Do sync before park smpboot threads to take care the rcu boost case.
 */
synchronize_rcu_mult(call_rcu, call_rcu_sched);
+   trace_printk("rcu-sync: %d\n", cpu);
 
if (!sched_smp_initialized)
return 0;
diff --git a/kernel/sched/fair.c b/kernel/sched/fair.c
index 70ba32e08a23..cb8f43a59f33 100644
--- a/kernel/sched/fair.c
+++ b/kernel/sched/fair.c
@@ -8150,8 +8150,11 @@ static int load_balance(int this_cpu, struct rq *this_rq,
.tasks  = LIST_HEAD_INIT(env.tasks),
};
 
+
cpumask_and(cpus, sched_domain_span(sd), cpu_active_mask);
 
+   trace_printk("dst_cpu: %d cpus: %*pbl\n", this_cpu, 
cpumask_pr_args(cpus));
+
schedstat_inc(sd->lb_count[idle]);
 
 redo:
@@ -8248,6 +8251,9 @@ static int load_balance(int this_cpu, struct rq *this_rq,
 
env.dst_rq   = cpu_rq(env.new_dst_cpu);
env.dst_cpu  = env.new_dst_cpu;
+
+   trace_printk("dst_cpu: %d\n", env.dst_cpu);
+
env.flags   &= ~LBF_DST_PINNED;
env.loop = 0;
env.loop_break   = sched_nr_migrate_break;
@@ -8465,6 +8471,7 @@ static int idle_balance(struct rq *this_rq, struct 
rq_flags *rf)
 
update_blocked_averages(this_cpu);
rcu_read_lock();
+   trace_printk("rcu-read-lock: %d\n", this_cpu);
for_each_domain(this_cpu, sd) {
int continue_balancing = 1;
u64 t0, domain_cost;
@@ -8500,6 +8507,7 @@ static int idle_balance(struct rq *this_rq, struct 
rq_flags *rf)
if (pulled_task || this_rq->nr_running > 0)
break;
}
+   trace_printk("rcu-read-unlock: %d\n", this_cpu);
rcu_read_unlock();
 
raw_spin_lock(_rq->lock);
@@ -8790,6 +8798,7 @@ static void rebalance_domains(struct rq *rq, enum 
cpu_idle_type idle)
update_blocked_averages(cpu);
 
rcu_read_lock();
+   trace_printk("rcu-read-lock: %d\n", cpu);
for_each_domain(cpu, sd) {
/*
 * Decay the newidle max times here because this is a regular
@@ -8853,6 +8862,7 @@ static void rebalance_domains(struct rq *rq, enum 
cpu_idle_type idle)
rq->max_idle_balance_cost =
max((u64)sysctl_sched_migration_cost, max_cost);
}
+   trace_printk("rcu-read-unlock: %d\n", cpu);
rcu_read_unlock();
 
/*


Re: [PATCH 3/4] sched: WARN when migrating to an offline CPU

2017-09-29 Thread Peter Zijlstra
On Thu, Sep 28, 2017 at 01:42:49PM +0200, Peter Zijlstra wrote:
> On Thu, Sep 28, 2017 at 11:03:10AM +, Levin, Alexander (Sasha Levin) 
> wrote:
> > On Thu, Sep 28, 2017 at 12:35:41PM +0200, Peter Zijlstra wrote:
> > >On Thu, Sep 28, 2017 at 02:14:15AM -0700, Sasha Levin wrote:
> 
> > >> [2035565360.446794] Unregister pv shared memory for cpu 2
> > >> [2035565360.467930] numa_remove_cpu cpu 2 node 2: mask now 6
> > >> [2035565360.471431] [ cut here ]
> > >> [2035565360.472548] WARNING: CPU: 2 PID: 24 at > 
> > >> kernel/sched/core.c:1178 set_task_cpu (kernel/sched/core.c:1157)
> > >> [2035565360.473840] Modules linked in:
> > >> [2035565360.474632] CPU: 2 PID: 24 Comm: migration/2 Not tainted > 
> > >> 4.14.0-rc2-next-20170927+ #252
> > >
> > >Urgh, weird. That really shouldn't happen. Can you easily reproduce?
> > 
> > Looks like yes. Seems like it's enough to stress CPU hotplug + trinity.
> 
> OK, I'll see if I can reproduce building kernels and hotplug stress.
> Otherwise I'll try and cook up some debug patches for you.

I can't seem to trigger :-(

Can you please run with the below patch and:

  # echo 1 > /proc/sys/kernel/traceoff_on_warning

---
 kernel/sched/core.c |  3 +++
 kernel/sched/fair.c | 10 ++
 2 files changed, 13 insertions(+)

diff --git a/kernel/sched/core.c b/kernel/sched/core.c
index 18a6966567da..c613f7756981 100644
--- a/kernel/sched/core.c
+++ b/kernel/sched/core.c
@@ -5592,6 +5592,7 @@ int sched_cpu_activate(unsigned int cpu)
struct rq_flags rf;
 
set_cpu_active(cpu, true);
+   trace_printk("active: %d mask: %*pbl\n", cpu, 
cpumask_pr_args(cpu_active_mask));
 
if (sched_smp_initialized) {
sched_domains_numa_masks_set(cpu);
@@ -5624,6 +5625,7 @@ int sched_cpu_deactivate(unsigned int cpu)
int ret;
 
set_cpu_active(cpu, false);
+   trace_printk("not-active: %d mask: %*pbl\n", cpu, 
cpumask_pr_args(cpu_active_mask));
/*
 * We've cleared cpu_active_mask, wait for all preempt-disabled and RCU
 * users of this state to go away such that all new such users will
@@ -5632,6 +5634,7 @@ int sched_cpu_deactivate(unsigned int cpu)
 * Do sync before park smpboot threads to take care the rcu boost case.
 */
synchronize_rcu_mult(call_rcu, call_rcu_sched);
+   trace_printk("rcu-sync: %d\n", cpu);
 
if (!sched_smp_initialized)
return 0;
diff --git a/kernel/sched/fair.c b/kernel/sched/fair.c
index 70ba32e08a23..cb8f43a59f33 100644
--- a/kernel/sched/fair.c
+++ b/kernel/sched/fair.c
@@ -8150,8 +8150,11 @@ static int load_balance(int this_cpu, struct rq *this_rq,
.tasks  = LIST_HEAD_INIT(env.tasks),
};
 
+
cpumask_and(cpus, sched_domain_span(sd), cpu_active_mask);
 
+   trace_printk("dst_cpu: %d cpus: %*pbl\n", this_cpu, 
cpumask_pr_args(cpus));
+
schedstat_inc(sd->lb_count[idle]);
 
 redo:
@@ -8248,6 +8251,9 @@ static int load_balance(int this_cpu, struct rq *this_rq,
 
env.dst_rq   = cpu_rq(env.new_dst_cpu);
env.dst_cpu  = env.new_dst_cpu;
+
+   trace_printk("dst_cpu: %d\n", env.dst_cpu);
+
env.flags   &= ~LBF_DST_PINNED;
env.loop = 0;
env.loop_break   = sched_nr_migrate_break;
@@ -8465,6 +8471,7 @@ static int idle_balance(struct rq *this_rq, struct 
rq_flags *rf)
 
update_blocked_averages(this_cpu);
rcu_read_lock();
+   trace_printk("rcu-read-lock: %d\n", this_cpu);
for_each_domain(this_cpu, sd) {
int continue_balancing = 1;
u64 t0, domain_cost;
@@ -8500,6 +8507,7 @@ static int idle_balance(struct rq *this_rq, struct 
rq_flags *rf)
if (pulled_task || this_rq->nr_running > 0)
break;
}
+   trace_printk("rcu-read-unlock: %d\n", this_cpu);
rcu_read_unlock();
 
raw_spin_lock(_rq->lock);
@@ -8790,6 +8798,7 @@ static void rebalance_domains(struct rq *rq, enum 
cpu_idle_type idle)
update_blocked_averages(cpu);
 
rcu_read_lock();
+   trace_printk("rcu-read-lock: %d\n", cpu);
for_each_domain(cpu, sd) {
/*
 * Decay the newidle max times here because this is a regular
@@ -8853,6 +8862,7 @@ static void rebalance_domains(struct rq *rq, enum 
cpu_idle_type idle)
rq->max_idle_balance_cost =
max((u64)sysctl_sched_migration_cost, max_cost);
}
+   trace_printk("rcu-read-unlock: %d\n", cpu);
rcu_read_unlock();
 
/*


[PATCH v2] xhci: Cleanup current_cmd in xhci_cleanup_command_queue()

2017-09-29 Thread Jeffy Chen
KASAN reported use-after-free bug when xhci host controller died:
[  176.952537] BUG: KASAN: use-after-free in 
xhci_handle_command_timeout+0x68/0x224
[  176.960846] Write of size 4 at addr ffc0cbb01608 by task kworker/3:3/1680
...
[  177.180644] Freed by task 0:
[  177.183882]  kasan_slab_free+0x90/0x15c
[  177.188194]  kfree+0x114/0x28c
[  177.191630]  xhci_cleanup_command_queue+0xc8/0xf8
[  177.196916]  xhci_hc_died+0x84/0x358

Problem here is that when the cmd_timer fired, it would try to access
current_cmd while the command queue is already freed by xhci_hc_died().

Cleanup current_cmd in xhci_cleanup_command_queue() to avoid that.

Fixes: d9f11ba9f107 ("xhci: Rework how we handle unresponsive or hoptlug 
removed hosts")
Signed-off-by: Jeffy Chen 
---

Changes in v2:
We cannot cancel cmd_timer in xhci_hc_died(), which would cause
might_sleep warning.

 drivers/usb/host/xhci-ring.c | 1 +
 1 file changed, 1 insertion(+)

diff --git a/drivers/usb/host/xhci-ring.c b/drivers/usb/host/xhci-ring.c
index a9443651ce0f..48ae15afa59e 100644
--- a/drivers/usb/host/xhci-ring.c
+++ b/drivers/usb/host/xhci-ring.c
@@ -1309,6 +1309,7 @@ static void xhci_complete_del_and_free_cmd(struct 
xhci_command *cmd, u32 status)
 void xhci_cleanup_command_queue(struct xhci_hcd *xhci)
 {
struct xhci_command *cur_cmd, *tmp_cmd;
+   xhci->current_cmd = NULL;
list_for_each_entry_safe(cur_cmd, tmp_cmd, >cmd_list, cmd_list)
xhci_complete_del_and_free_cmd(cur_cmd, COMP_COMMAND_ABORTED);
 }
-- 
2.11.0




[PATCH v2] xhci: Cleanup current_cmd in xhci_cleanup_command_queue()

2017-09-29 Thread Jeffy Chen
KASAN reported use-after-free bug when xhci host controller died:
[  176.952537] BUG: KASAN: use-after-free in 
xhci_handle_command_timeout+0x68/0x224
[  176.960846] Write of size 4 at addr ffc0cbb01608 by task kworker/3:3/1680
...
[  177.180644] Freed by task 0:
[  177.183882]  kasan_slab_free+0x90/0x15c
[  177.188194]  kfree+0x114/0x28c
[  177.191630]  xhci_cleanup_command_queue+0xc8/0xf8
[  177.196916]  xhci_hc_died+0x84/0x358

Problem here is that when the cmd_timer fired, it would try to access
current_cmd while the command queue is already freed by xhci_hc_died().

Cleanup current_cmd in xhci_cleanup_command_queue() to avoid that.

Fixes: d9f11ba9f107 ("xhci: Rework how we handle unresponsive or hoptlug 
removed hosts")
Signed-off-by: Jeffy Chen 
---

Changes in v2:
We cannot cancel cmd_timer in xhci_hc_died(), which would cause
might_sleep warning.

 drivers/usb/host/xhci-ring.c | 1 +
 1 file changed, 1 insertion(+)

diff --git a/drivers/usb/host/xhci-ring.c b/drivers/usb/host/xhci-ring.c
index a9443651ce0f..48ae15afa59e 100644
--- a/drivers/usb/host/xhci-ring.c
+++ b/drivers/usb/host/xhci-ring.c
@@ -1309,6 +1309,7 @@ static void xhci_complete_del_and_free_cmd(struct 
xhci_command *cmd, u32 status)
 void xhci_cleanup_command_queue(struct xhci_hcd *xhci)
 {
struct xhci_command *cur_cmd, *tmp_cmd;
+   xhci->current_cmd = NULL;
list_for_each_entry_safe(cur_cmd, tmp_cmd, >cmd_list, cmd_list)
xhci_complete_del_and_free_cmd(cur_cmd, COMP_COMMAND_ABORTED);
 }
-- 
2.11.0




[PATCH] kvm/x86: Handle async PF in RCU read-side critical sections

2017-09-29 Thread Boqun Feng
Sasha Levin reported a WARNING:

| WARNING: CPU: 0 PID: 6974 at kernel/rcu/tree_plugin.h:329
| rcu_preempt_note_context_switch kernel/rcu/tree_plugin.h:329 [inline]
| WARNING: CPU: 0 PID: 6974 at kernel/rcu/tree_plugin.h:329
| rcu_note_context_switch+0x16c/0x2210 kernel/rcu/tree.c:458
...
| CPU: 0 PID: 6974 Comm: syz-fuzzer Not tainted 4.13.0-next-20170908+ #246
| Hardware name: QEMU Standard PC (i440FX + PIIX, 1996), BIOS
| 1.10.1-1ubuntu1 04/01/2014
| Call Trace:
...
| RIP: 0010:rcu_preempt_note_context_switch kernel/rcu/tree_plugin.h:329 
[inline]
| RIP: 0010:rcu_note_context_switch+0x16c/0x2210 kernel/rcu/tree.c:458
| RSP: 0018:88003b2debc8 EFLAGS: 00010002
| RAX: 0001 RBX: 11000765bd85 RCX: 
| RDX: 1100075d7882 RSI: b5c7da20 RDI: 88003aebc410
| RBP: 88003b2def30 R08: dc00 R09: 0001
| R10:  R11:  R12: 88003b2def08
| R13:  R14: 88003aebc040 R15: 88003aebc040
| __schedule+0x201/0x2240 kernel/sched/core.c:3292
| schedule+0x113/0x460 kernel/sched/core.c:3421
| kvm_async_pf_task_wait+0x43f/0x940 arch/x86/kernel/kvm.c:158
| do_async_page_fault+0x72/0x90 arch/x86/kernel/kvm.c:271
| async_page_fault+0x22/0x30 arch/x86/entry/entry_64.S:1069
| RIP: 0010:format_decode+0x240/0x830 lib/vsprintf.c:1996
| RSP: 0018:88003b2df520 EFLAGS: 00010283
| RAX: 003f RBX: b5d1e141 RCX: 88003b2df670
| RDX: 0001 RSI: dc00 RDI: b5d1e140
| RBP: 88003b2df560 R08: dc00 R09: 
| R10: 88003b2df718 R11:  R12: 88003b2df5d8
| R13: 0064 R14: b5d1e140 R15: 
| vsnprintf+0x173/0x1700 lib/vsprintf.c:2136
| sprintf+0xbe/0xf0 lib/vsprintf.c:2386
| proc_self_get_link+0xfb/0x1c0 fs/proc/self.c:23
| get_link fs/namei.c:1047 [inline]
| link_path_walk+0x1041/0x1490 fs/namei.c:2127
...

And this happened when we hit a page fault in an RCU read-side critical
section and then we tried to reschedule in kvm_async_pf_task_wait(),
this reschedule would hit the WARN in rcu_preempt_note_context_switch(),
and be treated as a sleep in RCU read-side critical section, which is
not allowed(even in preemptible RCU).

To cure this, make kvm_async_pf_task_wait() go to the halt path if the
PF happens in a RCU read-side critical section.

Reported-by: Sasha Levin 
Cc: "Paul E. McKenney" 
Cc: Peter Zijlstra 
Signed-off-by: Boqun Feng 
---
 arch/x86/kernel/kvm.c | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

diff --git a/arch/x86/kernel/kvm.c b/arch/x86/kernel/kvm.c
index aa60a08b65b1..e675704fa6f7 100644
--- a/arch/x86/kernel/kvm.c
+++ b/arch/x86/kernel/kvm.c
@@ -140,7 +140,8 @@ void kvm_async_pf_task_wait(u32 token)
 
n.token = token;
n.cpu = smp_processor_id();
-   n.halted = is_idle_task(current) || preempt_count() > 1;
+   n.halted = is_idle_task(current) || preempt_count() > 1 ||
+  rcu_preempt_depth();
init_swait_queue_head();
hlist_add_head(, >list);
raw_spin_unlock(>lock);
-- 
2.14.1



[PATCH] kvm/x86: Handle async PF in RCU read-side critical sections

2017-09-29 Thread Boqun Feng
Sasha Levin reported a WARNING:

| WARNING: CPU: 0 PID: 6974 at kernel/rcu/tree_plugin.h:329
| rcu_preempt_note_context_switch kernel/rcu/tree_plugin.h:329 [inline]
| WARNING: CPU: 0 PID: 6974 at kernel/rcu/tree_plugin.h:329
| rcu_note_context_switch+0x16c/0x2210 kernel/rcu/tree.c:458
...
| CPU: 0 PID: 6974 Comm: syz-fuzzer Not tainted 4.13.0-next-20170908+ #246
| Hardware name: QEMU Standard PC (i440FX + PIIX, 1996), BIOS
| 1.10.1-1ubuntu1 04/01/2014
| Call Trace:
...
| RIP: 0010:rcu_preempt_note_context_switch kernel/rcu/tree_plugin.h:329 
[inline]
| RIP: 0010:rcu_note_context_switch+0x16c/0x2210 kernel/rcu/tree.c:458
| RSP: 0018:88003b2debc8 EFLAGS: 00010002
| RAX: 0001 RBX: 11000765bd85 RCX: 
| RDX: 1100075d7882 RSI: b5c7da20 RDI: 88003aebc410
| RBP: 88003b2def30 R08: dc00 R09: 0001
| R10:  R11:  R12: 88003b2def08
| R13:  R14: 88003aebc040 R15: 88003aebc040
| __schedule+0x201/0x2240 kernel/sched/core.c:3292
| schedule+0x113/0x460 kernel/sched/core.c:3421
| kvm_async_pf_task_wait+0x43f/0x940 arch/x86/kernel/kvm.c:158
| do_async_page_fault+0x72/0x90 arch/x86/kernel/kvm.c:271
| async_page_fault+0x22/0x30 arch/x86/entry/entry_64.S:1069
| RIP: 0010:format_decode+0x240/0x830 lib/vsprintf.c:1996
| RSP: 0018:88003b2df520 EFLAGS: 00010283
| RAX: 003f RBX: b5d1e141 RCX: 88003b2df670
| RDX: 0001 RSI: dc00 RDI: b5d1e140
| RBP: 88003b2df560 R08: dc00 R09: 
| R10: 88003b2df718 R11:  R12: 88003b2df5d8
| R13: 0064 R14: b5d1e140 R15: 
| vsnprintf+0x173/0x1700 lib/vsprintf.c:2136
| sprintf+0xbe/0xf0 lib/vsprintf.c:2386
| proc_self_get_link+0xfb/0x1c0 fs/proc/self.c:23
| get_link fs/namei.c:1047 [inline]
| link_path_walk+0x1041/0x1490 fs/namei.c:2127
...

And this happened when we hit a page fault in an RCU read-side critical
section and then we tried to reschedule in kvm_async_pf_task_wait(),
this reschedule would hit the WARN in rcu_preempt_note_context_switch(),
and be treated as a sleep in RCU read-side critical section, which is
not allowed(even in preemptible RCU).

To cure this, make kvm_async_pf_task_wait() go to the halt path if the
PF happens in a RCU read-side critical section.

Reported-by: Sasha Levin 
Cc: "Paul E. McKenney" 
Cc: Peter Zijlstra 
Signed-off-by: Boqun Feng 
---
 arch/x86/kernel/kvm.c | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

diff --git a/arch/x86/kernel/kvm.c b/arch/x86/kernel/kvm.c
index aa60a08b65b1..e675704fa6f7 100644
--- a/arch/x86/kernel/kvm.c
+++ b/arch/x86/kernel/kvm.c
@@ -140,7 +140,8 @@ void kvm_async_pf_task_wait(u32 token)
 
n.token = token;
n.cpu = smp_processor_id();
-   n.halted = is_idle_task(current) || preempt_count() > 1;
+   n.halted = is_idle_task(current) || preempt_count() > 1 ||
+  rcu_preempt_depth();
init_swait_queue_head();
hlist_add_head(, >list);
raw_spin_unlock(>lock);
-- 
2.14.1



Re: [PATCH v1 02/14] tee: add register user memory

2017-09-29 Thread Mark Rutland
On Thu, Sep 28, 2017 at 09:03:59PM +0300, Volodymyr Babchuk wrote:
> +static int
> +tee_ioctl_shm_register(struct tee_context *ctx,
> +struct tee_ioctl_shm_register_data __user *udata)
> +{
> + long ret;
> + struct tee_ioctl_shm_register_data data;
> + struct tee_shm *shm;
> +
> + if (copy_from_user(, udata, sizeof(data)))
> + return -EFAULT;
> +
> + /* Currently no input flags are supported */
> + if (data.flags)
> + return -EINVAL;
> +
> + shm = tee_shm_register(ctx, data.addr, data.length,
> +TEE_SHM_DMA_BUF | TEE_SHM_USER_MAPPED);
> + if (IS_ERR(shm))
> + return PTR_ERR(shm);
> +
> + data.id = shm->id;
> + data.flags = shm->flags;
> + data.length = shm->size;
> +
> + if (copy_to_user(udata, , sizeof(data)))
> + ret = -EFAULT;
> + else
> + ret = tee_shm_get_fd(shm);

Why do you need both the fd and an id? That seems redundant.

[...]

> +struct tee_shm *tee_shm_register(struct tee_context *ctx, unsigned long addr,
> +  size_t length, u32 flags)
> +{
> + struct tee_device *teedev = ctx->teedev;
> + const u32 req_flags = TEE_SHM_DMA_BUF | TEE_SHM_USER_MAPPED;
> + struct tee_shm *shm;
> + void *ret;
> + int rc;
> + int num_pages;
> + unsigned long start;
> +
> + if (flags != req_flags) {
> + dev_err(teedev->dev.parent, "invliad shm flags %#x", flags);
> + return ERR_PTR(-EINVAL);
> + }
> +
> + if (!tee_device_get(teedev))
> + return ERR_PTR(-EINVAL);
> +
> + if (!teedev->desc->ops->shm_register ||
> + !teedev->desc->ops->shm_unregister) {
> + dev_err(teedev->dev.parent,
> + "register shared memory unspported by device");

I don't think this should be a dev_err. The user requested something
that the device did not support, but that's not a device-side error.

A user may legitmiately do this to probe whether the TEE supports
registering memory.

> + tee_device_put(teedev);
> + return ERR_PTR(-EINVAL);

Perhaps EOPNOTSUPP?

> + }
> +
> + shm = kzalloc(sizeof(*shm), GFP_KERNEL);
> + if (!shm) {
> + ret = ERR_PTR(-ENOMEM);
> + goto err;
> + }
> +
> + shm->flags = flags | TEE_SHM_REGISTER;
> + shm->teedev = teedev;
> + shm->ctx = ctx;
> + shm->id = -1;
> + start = rounddown(addr, PAGE_SIZE);
> + shm->offset = addr - start;
> + shm->size = length;
> + num_pages = (roundup(addr + length, PAGE_SIZE) - start) / PAGE_SIZE;

Why not mandate that the user passes a buffer which has a start and end
aligned to PAGE_SIZE?

Otherwise, the buffer is size is silently upgraded without the user's
knowledge, which seems likely to result in bugs.

> + shm->pages = kcalloc(num_pages, sizeof(struct page), GFP_KERNEL);

I think you mean sizeof(struct page *) here.

Generally, for:

  lhs = some_alloc(sizeof(x))

... it's preferred that x is *lhs, so as to keep the types in sync. e.g.

  shm->pages = kcalloc(num_pages, sizeof(*shm->pages), GFP_KERNEL);

> + if (!shm->pages) {
> + ret = ERR_PTR(-ENOMEM);
> + goto err;
> + }
> +
> + rc = get_user_pages_fast(start, num_pages, 1, shm->pages);
> + if (rc > 0)
> + shm->num_pages = rc;
> + if (rc != num_pages) {
> + if (rc > 0)
> + rc = -ENOMEM;
> + ret = ERR_PTR(rc);
> + goto err;
> + }
> +
> + mutex_lock(>mutex);
> + shm->id = idr_alloc(>idr, shm, 1, 0, GFP_KERNEL);
> + mutex_unlock(>mutex);

AFAICT, idr_alloc() can fail, so I beleive you're missing a sanity check
on the return value here.

THanks,
Mark.


Re: [PATCH v1 02/14] tee: add register user memory

2017-09-29 Thread Mark Rutland
On Thu, Sep 28, 2017 at 09:03:59PM +0300, Volodymyr Babchuk wrote:
> +static int
> +tee_ioctl_shm_register(struct tee_context *ctx,
> +struct tee_ioctl_shm_register_data __user *udata)
> +{
> + long ret;
> + struct tee_ioctl_shm_register_data data;
> + struct tee_shm *shm;
> +
> + if (copy_from_user(, udata, sizeof(data)))
> + return -EFAULT;
> +
> + /* Currently no input flags are supported */
> + if (data.flags)
> + return -EINVAL;
> +
> + shm = tee_shm_register(ctx, data.addr, data.length,
> +TEE_SHM_DMA_BUF | TEE_SHM_USER_MAPPED);
> + if (IS_ERR(shm))
> + return PTR_ERR(shm);
> +
> + data.id = shm->id;
> + data.flags = shm->flags;
> + data.length = shm->size;
> +
> + if (copy_to_user(udata, , sizeof(data)))
> + ret = -EFAULT;
> + else
> + ret = tee_shm_get_fd(shm);

Why do you need both the fd and an id? That seems redundant.

[...]

> +struct tee_shm *tee_shm_register(struct tee_context *ctx, unsigned long addr,
> +  size_t length, u32 flags)
> +{
> + struct tee_device *teedev = ctx->teedev;
> + const u32 req_flags = TEE_SHM_DMA_BUF | TEE_SHM_USER_MAPPED;
> + struct tee_shm *shm;
> + void *ret;
> + int rc;
> + int num_pages;
> + unsigned long start;
> +
> + if (flags != req_flags) {
> + dev_err(teedev->dev.parent, "invliad shm flags %#x", flags);
> + return ERR_PTR(-EINVAL);
> + }
> +
> + if (!tee_device_get(teedev))
> + return ERR_PTR(-EINVAL);
> +
> + if (!teedev->desc->ops->shm_register ||
> + !teedev->desc->ops->shm_unregister) {
> + dev_err(teedev->dev.parent,
> + "register shared memory unspported by device");

I don't think this should be a dev_err. The user requested something
that the device did not support, but that's not a device-side error.

A user may legitmiately do this to probe whether the TEE supports
registering memory.

> + tee_device_put(teedev);
> + return ERR_PTR(-EINVAL);

Perhaps EOPNOTSUPP?

> + }
> +
> + shm = kzalloc(sizeof(*shm), GFP_KERNEL);
> + if (!shm) {
> + ret = ERR_PTR(-ENOMEM);
> + goto err;
> + }
> +
> + shm->flags = flags | TEE_SHM_REGISTER;
> + shm->teedev = teedev;
> + shm->ctx = ctx;
> + shm->id = -1;
> + start = rounddown(addr, PAGE_SIZE);
> + shm->offset = addr - start;
> + shm->size = length;
> + num_pages = (roundup(addr + length, PAGE_SIZE) - start) / PAGE_SIZE;

Why not mandate that the user passes a buffer which has a start and end
aligned to PAGE_SIZE?

Otherwise, the buffer is size is silently upgraded without the user's
knowledge, which seems likely to result in bugs.

> + shm->pages = kcalloc(num_pages, sizeof(struct page), GFP_KERNEL);

I think you mean sizeof(struct page *) here.

Generally, for:

  lhs = some_alloc(sizeof(x))

... it's preferred that x is *lhs, so as to keep the types in sync. e.g.

  shm->pages = kcalloc(num_pages, sizeof(*shm->pages), GFP_KERNEL);

> + if (!shm->pages) {
> + ret = ERR_PTR(-ENOMEM);
> + goto err;
> + }
> +
> + rc = get_user_pages_fast(start, num_pages, 1, shm->pages);
> + if (rc > 0)
> + shm->num_pages = rc;
> + if (rc != num_pages) {
> + if (rc > 0)
> + rc = -ENOMEM;
> + ret = ERR_PTR(rc);
> + goto err;
> + }
> +
> + mutex_lock(>mutex);
> + shm->id = idr_alloc(>idr, shm, 1, 0, GFP_KERNEL);
> + mutex_unlock(>mutex);

AFAICT, idr_alloc() can fail, so I beleive you're missing a sanity check
on the return value here.

THanks,
Mark.


Re: [PATCH v1 00/14] tee: optee: add dynamic shared memory support

2017-09-29 Thread Volodymyr Babchuk

Hello Mark,

On 29.09.17 13:31, Mark Rutland wrote:

Hi,

On Thu, Sep 28, 2017 at 09:03:57PM +0300, Volodymyr Babchuk wrote:

From: Volodymyr Babchuk 

This patch series enables dynamic shared memory support in the TEE
subsystem as a whole and in OP-TEE in particular.

Global Platform TEE specification [1] allows client applications
to register part of own memory as a shared buffer between
application and TEE. This allows fast zero-copy communication between
TEE and REE. But current implementation of TEE in Linux does not support
this feature.

Also, current implementation of OP-TEE transport uses fixed size
pre-shared buffer for all communications with OP-TEE OS. This is okay
in the most use cases. But this prevents use of OP-TEE in virtualized
environments, because:
  a) We can't share the same buffer between different virtual machines
  b) Physically contiguous memory as seen by VM can be non-contiguous
 in reality (and as seen by OP-TEE OS) due to second stage of
 MMU translation.
  c) Size of this pre-shared buffer is limited.


I'm afraid that I don't follow the arguments for virtualized OP-TEE
usage.
Yes, this is short summary. This is patches for Linux kernel, so I 
thought it is okay to mention only linux part.



In a virtualised environment, TEE access *must* be mediated via the
hypervisor, which can virtualise the interface, pin pages, etc.
Absolutely right. I had many discussions with XEN community on this 
topic there: [2]

Also I had discussions with OP-TEE guys there: [3]


Could you elaborate on how you expect TEE access to work in a
virtualised environment?
Hypervisor will trap all SMCs. SMCs that belong to TEE OS and TEE apps 
will be handled by corresponding TEE mediator in a hypervsior.


TEE mediator will:

1) check if this guest/domain have right to work with TEE at all.
2) Mangle pointers in command buffer (translate IPA to PA)
3) Add VMID to request
3) Forward mangled request to TEE
4) Mangle pointers in response buffer (if any)
5) Return response back to guest.

Besides this, TEE mediator will inform TEE about guest creation and 
destruction, so TEE can track opened sessions, shared buffers and such.


[2] http://xen.markmail.org/thread/6pwpa2j6sbqkxgge
[3] https://github.com/OP-TEE/optee_os/issues/1019


Re: [PATCH v1 00/14] tee: optee: add dynamic shared memory support

2017-09-29 Thread Volodymyr Babchuk

Hello Mark,

On 29.09.17 13:31, Mark Rutland wrote:

Hi,

On Thu, Sep 28, 2017 at 09:03:57PM +0300, Volodymyr Babchuk wrote:

From: Volodymyr Babchuk 

This patch series enables dynamic shared memory support in the TEE
subsystem as a whole and in OP-TEE in particular.

Global Platform TEE specification [1] allows client applications
to register part of own memory as a shared buffer between
application and TEE. This allows fast zero-copy communication between
TEE and REE. But current implementation of TEE in Linux does not support
this feature.

Also, current implementation of OP-TEE transport uses fixed size
pre-shared buffer for all communications with OP-TEE OS. This is okay
in the most use cases. But this prevents use of OP-TEE in virtualized
environments, because:
  a) We can't share the same buffer between different virtual machines
  b) Physically contiguous memory as seen by VM can be non-contiguous
 in reality (and as seen by OP-TEE OS) due to second stage of
 MMU translation.
  c) Size of this pre-shared buffer is limited.


I'm afraid that I don't follow the arguments for virtualized OP-TEE
usage.
Yes, this is short summary. This is patches for Linux kernel, so I 
thought it is okay to mention only linux part.



In a virtualised environment, TEE access *must* be mediated via the
hypervisor, which can virtualise the interface, pin pages, etc.
Absolutely right. I had many discussions with XEN community on this 
topic there: [2]

Also I had discussions with OP-TEE guys there: [3]


Could you elaborate on how you expect TEE access to work in a
virtualised environment?
Hypervisor will trap all SMCs. SMCs that belong to TEE OS and TEE apps 
will be handled by corresponding TEE mediator in a hypervsior.


TEE mediator will:

1) check if this guest/domain have right to work with TEE at all.
2) Mangle pointers in command buffer (translate IPA to PA)
3) Add VMID to request
3) Forward mangled request to TEE
4) Mangle pointers in response buffer (if any)
5) Return response back to guest.

Besides this, TEE mediator will inform TEE about guest creation and 
destruction, so TEE can track opened sessions, shared buffers and such.


[2] http://xen.markmail.org/thread/6pwpa2j6sbqkxgge
[3] https://github.com/OP-TEE/optee_os/issues/1019


[PATCH v2 1/2] mtd: nand: wait for tWHR after NAND_CMD_STATUS / NAND_CMD_READID

2017-09-29 Thread Masahiro Yamada
For commands such as Read Status, Read ID, etc. the controller needs
to wait for tWHR before it reads out the first data.  If the controller
does not take care of it, the driver has to wait explicitly to make
sure to meet the spec.

Introduce NAND_WAIT_TWHR, which works like NAND_WAIT_TCCS.  The
driver can set this flag to let the core to handle the tWHR delay.

Signed-off-by: Masahiro Yamada 
---

Changes in v2:
  - Add nand_whr_delay() helper
Wait for tWHR only for drivers that explicitly set NAND_WAIT_TWHR flag

 drivers/mtd/nand/nand_base.c | 21 +++--
 include/linux/mtd/rawnand.h  | 13 -
 2 files changed, 27 insertions(+), 7 deletions(-)

diff --git a/drivers/mtd/nand/nand_base.c b/drivers/mtd/nand/nand_base.c
index b1cf32c..455085d 100644
--- a/drivers/mtd/nand/nand_base.c
+++ b/drivers/mtd/nand/nand_base.c
@@ -676,6 +676,17 @@ static void nand_wait_status_ready(struct mtd_info *mtd, 
unsigned long timeo)
} while (time_before(jiffies, timeo));
 };
 
+static void nand_whr_delay(struct nand_chip *chip)
+{
+   if (!(chip->options & NAND_WAIT_TWHR))
+   return;
+
+   if (chip->data_interface && chip->data_interface->timings.sdr.tWHR_min)
+   ndelay(chip->data_interface->timings.sdr.tWHR_min / 1000);
+   else
+   ndelay(200);
+}
+
 /**
  * nand_command - [DEFAULT] Send command to NAND device
  * @mtd: MTD device structure
@@ -742,9 +753,12 @@ static void nand_command(struct mtd_info *mtd, unsigned 
int command,
case NAND_CMD_ERASE1:
case NAND_CMD_ERASE2:
case NAND_CMD_SEQIN:
+   case NAND_CMD_SET_FEATURES:
+   return;
+
case NAND_CMD_STATUS:
case NAND_CMD_READID:
-   case NAND_CMD_SET_FEATURES:
+   nand_whr_delay(chip);
return;
 
case NAND_CMD_RESET:
@@ -871,9 +885,12 @@ static void nand_command_lp(struct mtd_info *mtd, unsigned 
int command,
case NAND_CMD_ERASE1:
case NAND_CMD_ERASE2:
case NAND_CMD_SEQIN:
+   case NAND_CMD_SET_FEATURES:
+   return;
+
case NAND_CMD_STATUS:
case NAND_CMD_READID:
-   case NAND_CMD_SET_FEATURES:
+   nand_whr_delay(chip);
return;
 
case NAND_CMD_RNDIN:
diff --git a/include/linux/mtd/rawnand.h b/include/linux/mtd/rawnand.h
index 749bb08..bb0165b 100644
--- a/include/linux/mtd/rawnand.h
+++ b/include/linux/mtd/rawnand.h
@@ -213,13 +213,16 @@ enum nand_ecc_algo {
 
 /*
  * In case your controller is implementing ->cmd_ctrl() and is relying on the
- * default ->cmdfunc() implementation, you may want to let the core handle the
- * tCCS delay which is required when a column change (RNDIN or RNDOUT) is
- * requested.
- * If your controller already takes care of this delay, you don't need to set
- * this flag.
+ * default ->cmdfunc() implementation, you may want to let the core handle
+ * some delays to meet the timing specification.
+ * If your controller already takes care of these delays, you don't need to set
+ * the following flags.
  */
+
+/* tCCS for a column change (RNDIN or RNDOUT) */
 #define NAND_WAIT_TCCS 0x0020
+/* tWHR for STATUS, READID, etc. */
+#define NAND_WAIT_TWHR 0x0040
 
 /* Options set by nand scan */
 /* Nand scan has allocated controller struct */
-- 
2.7.4



[PATCH v2 2/2] mtd: nand: denali: fix setup_data_interface to meet tCCS delay

2017-09-29 Thread Masahiro Yamada
The WE_2_RE register specifies the number of clock cycles inserted
between the rising edge of #WE and the falling edge of #RE.

The current setup_data_interface implementation takes care of tWHR,
but tCCS is missing.  Wait max(tCSS, tWHR) to meet the spec.

With setup_data_interface() is properly programmed, the Denali NAND
controller can observe the timing, so NAND_WAIT_{TCCS,TWHR} flag is
unneeded.  Say this in the comment block.

Signed-off-by: Masahiro Yamada 
---

Changes in v2:
  - newly added

 drivers/mtd/nand/denali.c | 10 --
 1 file changed, 8 insertions(+), 2 deletions(-)

diff --git a/drivers/mtd/nand/denali.c b/drivers/mtd/nand/denali.c
index 0b268ec..332c022 100644
--- a/drivers/mtd/nand/denali.c
+++ b/drivers/mtd/nand/denali.c
@@ -1004,8 +1004,14 @@ static int denali_setup_data_interface(struct mtd_info 
*mtd, int chipnr,
tmp |= FIELD_PREP(RE_2_RE__VALUE, re_2_re);
iowrite32(tmp, denali->reg + RE_2_RE);
 
-   /* tWHR -> WE_2_RE */
-   we_2_re = DIV_ROUND_UP(timings->tWHR_min, t_clk);
+   /*
+* tCCS, tWHR -> WE_2_RE
+*
+* With WE_2_RE properly set, the Denali controller automatically takes
+* care of tCCS, tWHR; the driver need not set NAND_WAIT_{TCCS,TWHR}.
+*/
+   we_2_re = DIV_ROUND_UP(max(timings->tCCS_min, timings->tWHR_min),
+  t_clk);
we_2_re = min_t(int, we_2_re, TWHR2_AND_WE_2_RE__WE_2_RE);
 
tmp = ioread32(denali->reg + TWHR2_AND_WE_2_RE);
-- 
2.7.4



[PATCH v2 1/2] mtd: nand: wait for tWHR after NAND_CMD_STATUS / NAND_CMD_READID

2017-09-29 Thread Masahiro Yamada
For commands such as Read Status, Read ID, etc. the controller needs
to wait for tWHR before it reads out the first data.  If the controller
does not take care of it, the driver has to wait explicitly to make
sure to meet the spec.

Introduce NAND_WAIT_TWHR, which works like NAND_WAIT_TCCS.  The
driver can set this flag to let the core to handle the tWHR delay.

Signed-off-by: Masahiro Yamada 
---

Changes in v2:
  - Add nand_whr_delay() helper
Wait for tWHR only for drivers that explicitly set NAND_WAIT_TWHR flag

 drivers/mtd/nand/nand_base.c | 21 +++--
 include/linux/mtd/rawnand.h  | 13 -
 2 files changed, 27 insertions(+), 7 deletions(-)

diff --git a/drivers/mtd/nand/nand_base.c b/drivers/mtd/nand/nand_base.c
index b1cf32c..455085d 100644
--- a/drivers/mtd/nand/nand_base.c
+++ b/drivers/mtd/nand/nand_base.c
@@ -676,6 +676,17 @@ static void nand_wait_status_ready(struct mtd_info *mtd, 
unsigned long timeo)
} while (time_before(jiffies, timeo));
 };
 
+static void nand_whr_delay(struct nand_chip *chip)
+{
+   if (!(chip->options & NAND_WAIT_TWHR))
+   return;
+
+   if (chip->data_interface && chip->data_interface->timings.sdr.tWHR_min)
+   ndelay(chip->data_interface->timings.sdr.tWHR_min / 1000);
+   else
+   ndelay(200);
+}
+
 /**
  * nand_command - [DEFAULT] Send command to NAND device
  * @mtd: MTD device structure
@@ -742,9 +753,12 @@ static void nand_command(struct mtd_info *mtd, unsigned 
int command,
case NAND_CMD_ERASE1:
case NAND_CMD_ERASE2:
case NAND_CMD_SEQIN:
+   case NAND_CMD_SET_FEATURES:
+   return;
+
case NAND_CMD_STATUS:
case NAND_CMD_READID:
-   case NAND_CMD_SET_FEATURES:
+   nand_whr_delay(chip);
return;
 
case NAND_CMD_RESET:
@@ -871,9 +885,12 @@ static void nand_command_lp(struct mtd_info *mtd, unsigned 
int command,
case NAND_CMD_ERASE1:
case NAND_CMD_ERASE2:
case NAND_CMD_SEQIN:
+   case NAND_CMD_SET_FEATURES:
+   return;
+
case NAND_CMD_STATUS:
case NAND_CMD_READID:
-   case NAND_CMD_SET_FEATURES:
+   nand_whr_delay(chip);
return;
 
case NAND_CMD_RNDIN:
diff --git a/include/linux/mtd/rawnand.h b/include/linux/mtd/rawnand.h
index 749bb08..bb0165b 100644
--- a/include/linux/mtd/rawnand.h
+++ b/include/linux/mtd/rawnand.h
@@ -213,13 +213,16 @@ enum nand_ecc_algo {
 
 /*
  * In case your controller is implementing ->cmd_ctrl() and is relying on the
- * default ->cmdfunc() implementation, you may want to let the core handle the
- * tCCS delay which is required when a column change (RNDIN or RNDOUT) is
- * requested.
- * If your controller already takes care of this delay, you don't need to set
- * this flag.
+ * default ->cmdfunc() implementation, you may want to let the core handle
+ * some delays to meet the timing specification.
+ * If your controller already takes care of these delays, you don't need to set
+ * the following flags.
  */
+
+/* tCCS for a column change (RNDIN or RNDOUT) */
 #define NAND_WAIT_TCCS 0x0020
+/* tWHR for STATUS, READID, etc. */
+#define NAND_WAIT_TWHR 0x0040
 
 /* Options set by nand scan */
 /* Nand scan has allocated controller struct */
-- 
2.7.4



[PATCH v2 2/2] mtd: nand: denali: fix setup_data_interface to meet tCCS delay

2017-09-29 Thread Masahiro Yamada
The WE_2_RE register specifies the number of clock cycles inserted
between the rising edge of #WE and the falling edge of #RE.

The current setup_data_interface implementation takes care of tWHR,
but tCCS is missing.  Wait max(tCSS, tWHR) to meet the spec.

With setup_data_interface() is properly programmed, the Denali NAND
controller can observe the timing, so NAND_WAIT_{TCCS,TWHR} flag is
unneeded.  Say this in the comment block.

Signed-off-by: Masahiro Yamada 
---

Changes in v2:
  - newly added

 drivers/mtd/nand/denali.c | 10 --
 1 file changed, 8 insertions(+), 2 deletions(-)

diff --git a/drivers/mtd/nand/denali.c b/drivers/mtd/nand/denali.c
index 0b268ec..332c022 100644
--- a/drivers/mtd/nand/denali.c
+++ b/drivers/mtd/nand/denali.c
@@ -1004,8 +1004,14 @@ static int denali_setup_data_interface(struct mtd_info 
*mtd, int chipnr,
tmp |= FIELD_PREP(RE_2_RE__VALUE, re_2_re);
iowrite32(tmp, denali->reg + RE_2_RE);
 
-   /* tWHR -> WE_2_RE */
-   we_2_re = DIV_ROUND_UP(timings->tWHR_min, t_clk);
+   /*
+* tCCS, tWHR -> WE_2_RE
+*
+* With WE_2_RE properly set, the Denali controller automatically takes
+* care of tCCS, tWHR; the driver need not set NAND_WAIT_{TCCS,TWHR}.
+*/
+   we_2_re = DIV_ROUND_UP(max(timings->tCCS_min, timings->tWHR_min),
+  t_clk);
we_2_re = min_t(int, we_2_re, TWHR2_AND_WE_2_RE__WE_2_RE);
 
tmp = ioread32(denali->reg + TWHR2_AND_WE_2_RE);
-- 
2.7.4



[PATCH v2 0/2] mtd: nand: wait for tWHR, and fix the setup_data_interface of Denali

2017-09-29 Thread Masahiro Yamada

1/2 : add NAND_WAIT_TWHR and nand_whr_delay().
  You can set this new flag if you want nand_command(_lp)
  to insert tWHR delay where needed.

2/2 : Fix Denali setup_data_interface.
  Boris' suggestion in v1 was a good reminder that
  made me realize tCCS was missing in the driver.  Fix it now.


Changes in v2:
  - Add nand_whr_delay() helper
Wait for tWHR only for drivers that explicitly set NAND_WAIT_TWHR flag
  - newly added

Masahiro Yamada (2):
  mtd: nand: wait for tWHR after NAND_CMD_STATUS / NAND_CMD_READID
  mtd: nand: denali: fix setup_data_interface to meet tCCS delay

 drivers/mtd/nand/denali.c| 10 --
 drivers/mtd/nand/nand_base.c | 21 +++--
 include/linux/mtd/rawnand.h  | 13 -
 3 files changed, 35 insertions(+), 9 deletions(-)

-- 
2.7.4



[PATCH v2 0/2] mtd: nand: wait for tWHR, and fix the setup_data_interface of Denali

2017-09-29 Thread Masahiro Yamada

1/2 : add NAND_WAIT_TWHR and nand_whr_delay().
  You can set this new flag if you want nand_command(_lp)
  to insert tWHR delay where needed.

2/2 : Fix Denali setup_data_interface.
  Boris' suggestion in v1 was a good reminder that
  made me realize tCCS was missing in the driver.  Fix it now.


Changes in v2:
  - Add nand_whr_delay() helper
Wait for tWHR only for drivers that explicitly set NAND_WAIT_TWHR flag
  - newly added

Masahiro Yamada (2):
  mtd: nand: wait for tWHR after NAND_CMD_STATUS / NAND_CMD_READID
  mtd: nand: denali: fix setup_data_interface to meet tCCS delay

 drivers/mtd/nand/denali.c| 10 --
 drivers/mtd/nand/nand_base.c | 21 +++--
 include/linux/mtd/rawnand.h  | 13 -
 3 files changed, 35 insertions(+), 9 deletions(-)

-- 
2.7.4



Re: [RFC PATCH v2 3/7] sched/idle: Add poll before enter real idle path

2017-09-29 Thread Quan Xu



On 2017/9/1 14:49, Quan Xu wrote:

on 2017/8/29 22:39, Borislav Petkov wrote:

On Tue, Aug 29, 2017 at 11:46:37AM +, Yang Zhang wrote:

Add poll in do_idle. For UP VM, if there are running task, it will not
goes into idle path, so we only enable poll in SMP VM.

Signed-off-by: Yang Zhang 
Signed-off-by: Quan Xu 
Cc: Thomas Gleixner 
Cc: Ingo Molnar 
Cc: "H. Peter Anvin" 
Cc: x...@kernel.org
Cc: Peter Zijlstra 
Cc: Borislav Petkov 
Cc: Kyle Huey 
Cc: Andy Lutomirski 
Cc: Len Brown 
Cc: linux-kernel@vger.kernel.org
---
  arch/x86/kernel/process.c | 7 +++
  kernel/sched/idle.c   | 2 ++
  2 files changed, 9 insertions(+)

diff --git a/arch/x86/kernel/process.c b/arch/x86/kernel/process.c
index 3ca1980..def4113 100644
--- a/arch/x86/kernel/process.c
+++ b/arch/x86/kernel/process.c
@@ -332,6 +332,13 @@ void arch_cpu_idle(void)
  x86_idle();
  }
  +#if defined(CONFIG_SMP) && defined(CONFIG_PARAVIRT)
+void arch_cpu_idle_poll(void)
+{
+    paravirt_idle_poll();
+}
+#endif

So this will get called on any system which has CONFIG_PARAVIRT enabled
*even* if they're not running any guests.

Huh?

 Borislav,
 think twice, it is better to run ander an IF condition, to make 
sure they are running any guests.


  Quan

Borislav ,
   yes, this will get called on any system which has CONFIG_PARAVIRT 
enabled.


   but if they are not running any guests,  the callback is 
paravirt_nop() ,
   IIUC which is as similar as the other paravirt_*, such as 
paravirt_pgd_free()..


 - Quan




Re: [RFC PATCH v2 3/7] sched/idle: Add poll before enter real idle path

2017-09-29 Thread Quan Xu



On 2017/9/1 14:49, Quan Xu wrote:

on 2017/8/29 22:39, Borislav Petkov wrote:

On Tue, Aug 29, 2017 at 11:46:37AM +, Yang Zhang wrote:

Add poll in do_idle. For UP VM, if there are running task, it will not
goes into idle path, so we only enable poll in SMP VM.

Signed-off-by: Yang Zhang 
Signed-off-by: Quan Xu 
Cc: Thomas Gleixner 
Cc: Ingo Molnar 
Cc: "H. Peter Anvin" 
Cc: x...@kernel.org
Cc: Peter Zijlstra 
Cc: Borislav Petkov 
Cc: Kyle Huey 
Cc: Andy Lutomirski 
Cc: Len Brown 
Cc: linux-kernel@vger.kernel.org
---
  arch/x86/kernel/process.c | 7 +++
  kernel/sched/idle.c   | 2 ++
  2 files changed, 9 insertions(+)

diff --git a/arch/x86/kernel/process.c b/arch/x86/kernel/process.c
index 3ca1980..def4113 100644
--- a/arch/x86/kernel/process.c
+++ b/arch/x86/kernel/process.c
@@ -332,6 +332,13 @@ void arch_cpu_idle(void)
  x86_idle();
  }
  +#if defined(CONFIG_SMP) && defined(CONFIG_PARAVIRT)
+void arch_cpu_idle_poll(void)
+{
+    paravirt_idle_poll();
+}
+#endif

So this will get called on any system which has CONFIG_PARAVIRT enabled
*even* if they're not running any guests.

Huh?

 Borislav,
 think twice, it is better to run ander an IF condition, to make 
sure they are running any guests.


  Quan

Borislav ,
   yes, this will get called on any system which has CONFIG_PARAVIRT 
enabled.


   but if they are not running any guests,  the callback is 
paravirt_nop() ,
   IIUC which is as similar as the other paravirt_*, such as 
paravirt_pgd_free()..


 - Quan




Re: How to verify linux-next

2017-09-29 Thread Pintu Kumar
Hi,

I have a general question.
How do we normally verify linux-next tree?

I wanted to work on linux-next but I am facing some issues.
I could able to build linux-next for both x86 and arm, but I could not
verify it on any machine.
Currently I don't have a real Linux PC to boot with linux-next kernel.
So I am trying to find alternative ways, like using the virtual box or qemu-arm.
1) For Oracle virtual box 5.1.26 with ubuntu-32 bit, the linux-next
kernel is not booting.
2) For qemu-arm (versatilepb), I am able to build the kernel, but I
could not figure out which rootfs to use with it.
I tried creating minimal rootfs using busybox, but it does not contain
enough interface. I am not able to open multiple terminal and also
could not setup ssh to access it using PUTTY.

So, if you know of any better rootfs to use with qemu-arm please let me know.

Or, if you know of any better option to use linux-next please tell me.
It will be really helpful.


Thank You!
Regards,
Pintu


Re: How to verify linux-next

2017-09-29 Thread Pintu Kumar
Hi,

I have a general question.
How do we normally verify linux-next tree?

I wanted to work on linux-next but I am facing some issues.
I could able to build linux-next for both x86 and arm, but I could not
verify it on any machine.
Currently I don't have a real Linux PC to boot with linux-next kernel.
So I am trying to find alternative ways, like using the virtual box or qemu-arm.
1) For Oracle virtual box 5.1.26 with ubuntu-32 bit, the linux-next
kernel is not booting.
2) For qemu-arm (versatilepb), I am able to build the kernel, but I
could not figure out which rootfs to use with it.
I tried creating minimal rootfs using busybox, but it does not contain
enough interface. I am not able to open multiple terminal and also
could not setup ssh to access it using PUTTY.

So, if you know of any better rootfs to use with qemu-arm please let me know.

Or, if you know of any better option to use linux-next please tell me.
It will be really helpful.


Thank You!
Regards,
Pintu


<    5   6   7   8   9   10   11   12   13   >