Re: [PATCH v4 1/3] perf: add PERF_RECORD_NAMESPACES to include namespaces related info

2016-12-21 Thread Peter Zijlstra
On Thu, Dec 22, 2016 at 08:21:23PM +1300, Eric W. Biederman wrote:
> 
> And please make the array the last item in the structure so that
> expanding or contracting it does not affect the ability to read the rest
> of the structure.

Sorry, sample_id must be last, because hysterical crud :/

(basically because that was the only way to add a field to records like
PERF_RECORD_MMAP which used the record length to determine the
filename[] length, yes I know, we won't ever do that again).


Re: [PATCH 2/4] vfio-mdev: de-polute the namespace, rename parent_device & parent_ops

2016-12-21 Thread Jike Song
Not sure if this is appropriate, but if not having the Documentation considered,
for patch 2-4:

Reviewed-by: Jike Song 

--
Thanks,
Jike

On 12/22/2016 07:27 AM, Alex Williamson wrote:
> From: Alex Williamson 
> 
> Add an mdev_ prefix so we're not poluting the namespace so much.
> 
> Cc: Kirti Wankhede 
> Cc: Zhenyu Wang 
> Cc: Zhi Wang 
> Cc: Jike Song 
> Signed-off-by: Alex Williamson 
> ---
>  drivers/gpu/drm/i915/gvt/kvmgt.c |2 +-
>  drivers/vfio/mdev/mdev_core.c|   28 ++--
>  drivers/vfio/mdev/mdev_private.h |6 +++---
>  drivers/vfio/mdev/mdev_sysfs.c   |8 
>  drivers/vfio/mdev/vfio_mdev.c|   12 ++--
>  include/linux/mdev.h |   16 
>  samples/vfio-mdev/mtty.c |2 +-
>  7 files changed, 37 insertions(+), 37 deletions(-)
> 
> diff --git a/drivers/gpu/drm/i915/gvt/kvmgt.c 
> b/drivers/gpu/drm/i915/gvt/kvmgt.c
> index 4dd6722..081ada2 100644
> --- a/drivers/gpu/drm/i915/gvt/kvmgt.c
> +++ b/drivers/gpu/drm/i915/gvt/kvmgt.c
> @@ -1089,7 +1089,7 @@ static long intel_vgpu_ioctl(struct mdev_device *mdev, 
> unsigned int cmd,
>   return 0;
>  }
>  
> -static const struct parent_ops intel_vgpu_ops = {
> +static const struct mdev_parent_ops intel_vgpu_ops = {
>   .supported_type_groups  = intel_vgpu_type_groups,
>   .create = intel_vgpu_create,
>   .remove = intel_vgpu_remove,
> diff --git a/drivers/vfio/mdev/mdev_core.c b/drivers/vfio/mdev/mdev_core.c
> index be1ee89..4a140e0 100644
> --- a/drivers/vfio/mdev/mdev_core.c
> +++ b/drivers/vfio/mdev/mdev_core.c
> @@ -42,7 +42,7 @@ static int _find_mdev_device(struct device *dev, void *data)
>   return 0;
>  }
>  
> -static bool mdev_device_exist(struct parent_device *parent, uuid_le uuid)
> +static bool mdev_device_exist(struct mdev_parent *parent, uuid_le uuid)
>  {
>   struct device *dev;
>  
> @@ -56,9 +56,9 @@ static bool mdev_device_exist(struct parent_device *parent, 
> uuid_le uuid)
>  }
>  
>  /* Should be called holding parent_list_lock */
> -static struct parent_device *__find_parent_device(struct device *dev)
> +static struct mdev_parent *__find_parent_device(struct device *dev)
>  {
> - struct parent_device *parent;
> + struct mdev_parent *parent;
>  
>   list_for_each_entry(parent, _list, next) {
>   if (parent->dev == dev)
> @@ -69,8 +69,8 @@ static struct parent_device *__find_parent_device(struct 
> device *dev)
>  
>  static void mdev_release_parent(struct kref *kref)
>  {
> - struct parent_device *parent = container_of(kref, struct parent_device,
> - ref);
> + struct mdev_parent *parent = container_of(kref, struct mdev_parent,
> +   ref);
>   struct device *dev = parent->dev;
>  
>   kfree(parent);
> @@ -78,7 +78,7 @@ static void mdev_release_parent(struct kref *kref)
>  }
>  
>  static
> -inline struct parent_device *mdev_get_parent(struct parent_device *parent)
> +inline struct mdev_parent *mdev_get_parent(struct mdev_parent *parent)
>  {
>   if (parent)
>   kref_get(>ref);
> @@ -86,7 +86,7 @@ inline struct parent_device *mdev_get_parent(struct 
> parent_device *parent)
>   return parent;
>  }
>  
> -static inline void mdev_put_parent(struct parent_device *parent)
> +static inline void mdev_put_parent(struct mdev_parent *parent)
>  {
>   if (parent)
>   kref_put(>ref, mdev_release_parent);
> @@ -95,7 +95,7 @@ static inline void mdev_put_parent(struct parent_device 
> *parent)
>  static int mdev_device_create_ops(struct kobject *kobj,
> struct mdev_device *mdev)
>  {
> - struct parent_device *parent = mdev->parent;
> + struct mdev_parent *parent = mdev->parent;
>   int ret;
>  
>   ret = parent->ops->create(kobj, mdev);
> @@ -122,7 +122,7 @@ static int mdev_device_create_ops(struct kobject *kobj,
>   */
>  static int mdev_device_remove_ops(struct mdev_device *mdev, bool 
> force_remove)
>  {
> - struct parent_device *parent = mdev->parent;
> + struct mdev_parent *parent = mdev->parent;
>   int ret;
>  
>   /*
> @@ -153,10 +153,10 @@ static int mdev_device_remove_cb(struct device *dev, 
> void *data)
>   * Add device to list of registered parent devices.
>   * Returns a negative value on error, otherwise 0.
>   */
> -int mdev_register_device(struct device *dev, const struct parent_ops *ops)
> +int mdev_register_device(struct device *dev, const struct mdev_parent_ops 
> *ops)
>  {
>   int ret;
> - struct parent_device *parent;
> + struct mdev_parent *parent;
>  
>   /* check for mandatory ops */
>   if (!ops || !ops->create || !ops->remove || !ops->supported_type_groups)
> @@ -229,7 +229,7 @@ 

Re: [PATCH v4 1/3] perf: add PERF_RECORD_NAMESPACES to include namespaces related info

2016-12-21 Thread Peter Zijlstra
On Thu, Dec 22, 2016 at 08:21:23PM +1300, Eric W. Biederman wrote:
> 
> And please make the array the last item in the structure so that
> expanding or contracting it does not affect the ability to read the rest
> of the structure.

Sorry, sample_id must be last, because hysterical crud :/

(basically because that was the only way to add a field to records like
PERF_RECORD_MMAP which used the record length to determine the
filename[] length, yes I know, we won't ever do that again).


Re: [PATCH 2/4] vfio-mdev: de-polute the namespace, rename parent_device & parent_ops

2016-12-21 Thread Jike Song
Not sure if this is appropriate, but if not having the Documentation considered,
for patch 2-4:

Reviewed-by: Jike Song 

--
Thanks,
Jike

On 12/22/2016 07:27 AM, Alex Williamson wrote:
> From: Alex Williamson 
> 
> Add an mdev_ prefix so we're not poluting the namespace so much.
> 
> Cc: Kirti Wankhede 
> Cc: Zhenyu Wang 
> Cc: Zhi Wang 
> Cc: Jike Song 
> Signed-off-by: Alex Williamson 
> ---
>  drivers/gpu/drm/i915/gvt/kvmgt.c |2 +-
>  drivers/vfio/mdev/mdev_core.c|   28 ++--
>  drivers/vfio/mdev/mdev_private.h |6 +++---
>  drivers/vfio/mdev/mdev_sysfs.c   |8 
>  drivers/vfio/mdev/vfio_mdev.c|   12 ++--
>  include/linux/mdev.h |   16 
>  samples/vfio-mdev/mtty.c |2 +-
>  7 files changed, 37 insertions(+), 37 deletions(-)
> 
> diff --git a/drivers/gpu/drm/i915/gvt/kvmgt.c 
> b/drivers/gpu/drm/i915/gvt/kvmgt.c
> index 4dd6722..081ada2 100644
> --- a/drivers/gpu/drm/i915/gvt/kvmgt.c
> +++ b/drivers/gpu/drm/i915/gvt/kvmgt.c
> @@ -1089,7 +1089,7 @@ static long intel_vgpu_ioctl(struct mdev_device *mdev, 
> unsigned int cmd,
>   return 0;
>  }
>  
> -static const struct parent_ops intel_vgpu_ops = {
> +static const struct mdev_parent_ops intel_vgpu_ops = {
>   .supported_type_groups  = intel_vgpu_type_groups,
>   .create = intel_vgpu_create,
>   .remove = intel_vgpu_remove,
> diff --git a/drivers/vfio/mdev/mdev_core.c b/drivers/vfio/mdev/mdev_core.c
> index be1ee89..4a140e0 100644
> --- a/drivers/vfio/mdev/mdev_core.c
> +++ b/drivers/vfio/mdev/mdev_core.c
> @@ -42,7 +42,7 @@ static int _find_mdev_device(struct device *dev, void *data)
>   return 0;
>  }
>  
> -static bool mdev_device_exist(struct parent_device *parent, uuid_le uuid)
> +static bool mdev_device_exist(struct mdev_parent *parent, uuid_le uuid)
>  {
>   struct device *dev;
>  
> @@ -56,9 +56,9 @@ static bool mdev_device_exist(struct parent_device *parent, 
> uuid_le uuid)
>  }
>  
>  /* Should be called holding parent_list_lock */
> -static struct parent_device *__find_parent_device(struct device *dev)
> +static struct mdev_parent *__find_parent_device(struct device *dev)
>  {
> - struct parent_device *parent;
> + struct mdev_parent *parent;
>  
>   list_for_each_entry(parent, _list, next) {
>   if (parent->dev == dev)
> @@ -69,8 +69,8 @@ static struct parent_device *__find_parent_device(struct 
> device *dev)
>  
>  static void mdev_release_parent(struct kref *kref)
>  {
> - struct parent_device *parent = container_of(kref, struct parent_device,
> - ref);
> + struct mdev_parent *parent = container_of(kref, struct mdev_parent,
> +   ref);
>   struct device *dev = parent->dev;
>  
>   kfree(parent);
> @@ -78,7 +78,7 @@ static void mdev_release_parent(struct kref *kref)
>  }
>  
>  static
> -inline struct parent_device *mdev_get_parent(struct parent_device *parent)
> +inline struct mdev_parent *mdev_get_parent(struct mdev_parent *parent)
>  {
>   if (parent)
>   kref_get(>ref);
> @@ -86,7 +86,7 @@ inline struct parent_device *mdev_get_parent(struct 
> parent_device *parent)
>   return parent;
>  }
>  
> -static inline void mdev_put_parent(struct parent_device *parent)
> +static inline void mdev_put_parent(struct mdev_parent *parent)
>  {
>   if (parent)
>   kref_put(>ref, mdev_release_parent);
> @@ -95,7 +95,7 @@ static inline void mdev_put_parent(struct parent_device 
> *parent)
>  static int mdev_device_create_ops(struct kobject *kobj,
> struct mdev_device *mdev)
>  {
> - struct parent_device *parent = mdev->parent;
> + struct mdev_parent *parent = mdev->parent;
>   int ret;
>  
>   ret = parent->ops->create(kobj, mdev);
> @@ -122,7 +122,7 @@ static int mdev_device_create_ops(struct kobject *kobj,
>   */
>  static int mdev_device_remove_ops(struct mdev_device *mdev, bool 
> force_remove)
>  {
> - struct parent_device *parent = mdev->parent;
> + struct mdev_parent *parent = mdev->parent;
>   int ret;
>  
>   /*
> @@ -153,10 +153,10 @@ static int mdev_device_remove_cb(struct device *dev, 
> void *data)
>   * Add device to list of registered parent devices.
>   * Returns a negative value on error, otherwise 0.
>   */
> -int mdev_register_device(struct device *dev, const struct parent_ops *ops)
> +int mdev_register_device(struct device *dev, const struct mdev_parent_ops 
> *ops)
>  {
>   int ret;
> - struct parent_device *parent;
> + struct mdev_parent *parent;
>  
>   /* check for mandatory ops */
>   if (!ops || !ops->create || !ops->remove || !ops->supported_type_groups)
> @@ -229,7 +229,7 @@ int mdev_register_device(struct device *dev, const struct 
> parent_ops *ops)
>  
>  void mdev_unregister_device(struct device *dev)
>  {
> - struct 

Re: [PATCH] ARM64: zynqmp: Fix i2c node's compatible string

2016-12-21 Thread Michal Simek
On 22.12.2016 06:49, Moritz Fischer wrote:
> From: Moritz Fischer 
> 
> The Zynq Ultrascale MP uses version 1.4 of the Cadence IP core
> which fixes some silicon bugs that needed software workarounds
> in Version 1.0 that was used on Zynq systems.
> 
> Signed-off-by: Moritz Fischer 
> Cc: Michal Simek 
> Cc: Sören Brinkmann 
> Cc: U-Boot List 
> Cc: Rob Herring 
> ---
> 
> Hi Michal,
> 
> I think this is a slip up and should be r1p14 for
> Ultrascale ZynqMP. drivers/i2c/i2c-cadence.c already uses this.
> I Cc'd the u-boot list, because the same change would be required there.
> 
> Cheers,
> 
> Moritz
> 
> ---
>  arch/arm64/boot/dts/xilinx/zynqmp.dtsi | 4 ++--
>  1 file changed, 2 insertions(+), 2 deletions(-)
> 
> diff --git a/arch/arm64/boot/dts/xilinx/zynqmp.dtsi 
> b/arch/arm64/boot/dts/xilinx/zynqmp.dtsi
> index 68a90833..a5a5f91 100644
> --- a/arch/arm64/boot/dts/xilinx/zynqmp.dtsi
> +++ b/arch/arm64/boot/dts/xilinx/zynqmp.dtsi
> @@ -175,7 +175,7 @@
>   };
>  
>   i2c0: i2c@ff02 {
> - compatible = "cdns,i2c-r1p10";
> + compatible = "cdns,i2c-r1p14";

I was checking this internally and p10 is doing something what p14
doesn't need to do. That's why this should be

compatible = "cdns,i2c-r1p14", "cdns,i2c-r1p10";

The same of course for u-boot where also p14 should be added to the driver.

Thanks,
Michal


Re: [PATCH] ARM64: zynqmp: Fix i2c node's compatible string

2016-12-21 Thread Michal Simek
On 22.12.2016 06:49, Moritz Fischer wrote:
> From: Moritz Fischer 
> 
> The Zynq Ultrascale MP uses version 1.4 of the Cadence IP core
> which fixes some silicon bugs that needed software workarounds
> in Version 1.0 that was used on Zynq systems.
> 
> Signed-off-by: Moritz Fischer 
> Cc: Michal Simek 
> Cc: Sören Brinkmann 
> Cc: U-Boot List 
> Cc: Rob Herring 
> ---
> 
> Hi Michal,
> 
> I think this is a slip up and should be r1p14 for
> Ultrascale ZynqMP. drivers/i2c/i2c-cadence.c already uses this.
> I Cc'd the u-boot list, because the same change would be required there.
> 
> Cheers,
> 
> Moritz
> 
> ---
>  arch/arm64/boot/dts/xilinx/zynqmp.dtsi | 4 ++--
>  1 file changed, 2 insertions(+), 2 deletions(-)
> 
> diff --git a/arch/arm64/boot/dts/xilinx/zynqmp.dtsi 
> b/arch/arm64/boot/dts/xilinx/zynqmp.dtsi
> index 68a90833..a5a5f91 100644
> --- a/arch/arm64/boot/dts/xilinx/zynqmp.dtsi
> +++ b/arch/arm64/boot/dts/xilinx/zynqmp.dtsi
> @@ -175,7 +175,7 @@
>   };
>  
>   i2c0: i2c@ff02 {
> - compatible = "cdns,i2c-r1p10";
> + compatible = "cdns,i2c-r1p14";

I was checking this internally and p10 is doing something what p14
doesn't need to do. That's why this should be

compatible = "cdns,i2c-r1p14", "cdns,i2c-r1p10";

The same of course for u-boot where also p14 should be added to the driver.

Thanks,
Michal


Re: [PATCH 2/2] nsfs: Add an ioctl() to return creator UID of a userns

2016-12-21 Thread Eric W. Biederman
Andrei Vagin  writes:

> On Mon, Dec 19, 2016 at 03:38:35PM +0100, Michael Kerrisk (man-pages) wrote:
>> @@ -174,6 +175,11 @@ static long ns_ioctl(struct file *filp, unsigned int 
>> ioctl,
>>  return open_related_ns(ns, ns->ops->get_parent);
>>  case NS_GET_NSTYPE:
>>  return ns->ops->type;
>> +case NS_GET_CREATOR_UID:
>> +if (ns->ops->type != CLONE_NEWUSER)
>> +return -EINVAL;
>> +user_ns = container_of(ns, struct user_namespace, ns);
>> +return from_kuid_munged(current_user_ns(), user_ns->owner);
>
> uid_t is "unsigned int", ioctl() returns long, so it may be hard to
> distinguish user id-s from errors on x32.

Very good point.

> off-topic: What is about user_ns->group? I can't find where it is
> used...

Over design. I put it in because I thought it might be useful.  It turns
out it never was used so we can clean things up and remove it.  The
group has never been exposed to userspace so no one will care.

Eric


Re: [PATCH 2/2] nsfs: Add an ioctl() to return creator UID of a userns

2016-12-21 Thread Eric W. Biederman
Andrei Vagin  writes:

> On Mon, Dec 19, 2016 at 03:38:35PM +0100, Michael Kerrisk (man-pages) wrote:
>> @@ -174,6 +175,11 @@ static long ns_ioctl(struct file *filp, unsigned int 
>> ioctl,
>>  return open_related_ns(ns, ns->ops->get_parent);
>>  case NS_GET_NSTYPE:
>>  return ns->ops->type;
>> +case NS_GET_CREATOR_UID:
>> +if (ns->ops->type != CLONE_NEWUSER)
>> +return -EINVAL;
>> +user_ns = container_of(ns, struct user_namespace, ns);
>> +return from_kuid_munged(current_user_ns(), user_ns->owner);
>
> uid_t is "unsigned int", ioctl() returns long, so it may be hard to
> distinguish user id-s from errors on x32.

Very good point.

> off-topic: What is about user_ns->group? I can't find where it is
> used...

Over design. I put it in because I thought it might be useful.  It turns
out it never was used so we can clean things up and remove it.  The
group has never been exposed to userspace so no one will care.

Eric


Re: [PATCH v4 1/3] perf: add PERF_RECORD_NAMESPACES to include namespaces related info

2016-12-21 Thread Eric W. Biederman
Hari Bathini  writes:

> On Wednesday 21 December 2016 06:54 PM, Peter Zijlstra wrote:
>> On Wed, Dec 21, 2016 at 06:39:01PM +0530, Hari Bathini wrote:
>>> Hi Peter,
 I don't see how the tool can parse old records (with NAMESPACES_MAX ==
 7) if you set its NAMESPACES_MAX to say 10.

 Then it will expect the link_info array to be 10 entries and either read
 past the end of the record (if !sample_all) or try and interpret
 sample_id as link_info records.

>>> Right. There will be inconsistency with data the perf tool tries to read
>>> beyond
>>> what the kernel supports. IIUC, you mean, include nr_namespaces field in the
>>> record and warn the user if it doesn't match with the one perf-tool supports
>>> before proceeding..?
>> Yes, if you add a nr_namespaces field its always parsable. If an old
>> tool finds more namespace than it has 'names' for it can always display
>> the raw index number. If a new tool finds the array short, it will not
>> display the missing ones.
>>
>
> Sure, Peter. Will post the next version as soon as
> I am back from vacation..

And please make the array the last item in the structure so that
expanding or contracting it does not affect the ability to read the rest
of the structure.

Eric



Re: [PATCH v4 1/3] perf: add PERF_RECORD_NAMESPACES to include namespaces related info

2016-12-21 Thread Eric W. Biederman
Hari Bathini  writes:

> On Wednesday 21 December 2016 06:54 PM, Peter Zijlstra wrote:
>> On Wed, Dec 21, 2016 at 06:39:01PM +0530, Hari Bathini wrote:
>>> Hi Peter,
 I don't see how the tool can parse old records (with NAMESPACES_MAX ==
 7) if you set its NAMESPACES_MAX to say 10.

 Then it will expect the link_info array to be 10 entries and either read
 past the end of the record (if !sample_all) or try and interpret
 sample_id as link_info records.

>>> Right. There will be inconsistency with data the perf tool tries to read
>>> beyond
>>> what the kernel supports. IIUC, you mean, include nr_namespaces field in the
>>> record and warn the user if it doesn't match with the one perf-tool supports
>>> before proceeding..?
>> Yes, if you add a nr_namespaces field its always parsable. If an old
>> tool finds more namespace than it has 'names' for it can always display
>> the raw index number. If a new tool finds the array short, it will not
>> display the missing ones.
>>
>
> Sure, Peter. Will post the next version as soon as
> I am back from vacation..

And please make the array the last item in the structure so that
expanding or contracting it does not affect the ability to read the rest
of the structure.

Eric



[PATCH 2/3] xen: return xenstore command failures via response instead of rc

2016-12-21 Thread Juergen Gross
When the xenbus driver does some special handling for a Xenstore
command any error condition related to the command should be returned
via an error response instead of letting the related write operation
fail. Otherwise the user land handler might take wrong decisions
assuming the connection to Xenstore is broken.

While at it try to return the same error values xenstored would
return for those cases.

Signed-off-by: Juergen Gross 
---
 drivers/xen/xenbus/xenbus_dev_frontend.c | 47 ++--
 1 file changed, 27 insertions(+), 20 deletions(-)

diff --git a/drivers/xen/xenbus/xenbus_dev_frontend.c 
b/drivers/xen/xenbus/xenbus_dev_frontend.c
index a068281..79130b3 100644
--- a/drivers/xen/xenbus/xenbus_dev_frontend.c
+++ b/drivers/xen/xenbus/xenbus_dev_frontend.c
@@ -302,6 +302,29 @@ static void watch_fired(struct xenbus_watch *watch,
mutex_unlock(>dev_data->reply_mutex);
 }
 
+static int xenbus_command_reply(struct xenbus_file_priv *u,
+   unsigned int msg_type, const char *reply)
+{
+   struct {
+   struct xsd_sockmsg hdr;
+   const char body[16];
+   } msg;
+   int rc;
+
+   msg.hdr = u->u.msg;
+   msg.hdr.type = msg_type;
+   msg.hdr.len = strlen(reply) + 1;
+   if (msg.hdr.len > sizeof(msg.body))
+   return -E2BIG;
+
+   mutex_lock(>reply_mutex);
+   rc = queue_reply(>read_buffers, , sizeof(msg.hdr) + msg.hdr.len);
+   wake_up(>read_waitq);
+   mutex_unlock(>reply_mutex);
+
+   return rc;
+}
+
 static int xenbus_write_transaction(unsigned msg_type,
struct xenbus_file_priv *u)
 {
@@ -321,7 +344,7 @@ static int xenbus_write_transaction(unsigned msg_type,
if (trans->handle.id == u->u.msg.tx_id)
break;
if (>list == >transactions)
-   return -ESRCH;
+   return xenbus_command_reply(u, XS_ERROR, "ENOENT");
}
 
reply = xenbus_dev_request_and_reply(>u.msg);
@@ -372,12 +395,12 @@ static int xenbus_write_watch(unsigned msg_type, struct 
xenbus_file_priv *u)
path = u->u.buffer + sizeof(u->u.msg);
token = memchr(path, 0, u->u.msg.len);
if (token == NULL) {
-   rc = -EILSEQ;
+   rc = xenbus_command_reply(u, XS_ERROR, "EINVAL");
goto out;
}
token++;
if (memchr(token, 0, u->u.msg.len - (token - path)) == NULL) {
-   rc = -EILSEQ;
+   rc = xenbus_command_reply(u, XS_ERROR, "EINVAL");
goto out;
}
 
@@ -411,23 +434,7 @@ static int xenbus_write_watch(unsigned msg_type, struct 
xenbus_file_priv *u)
}
 
/* Success.  Synthesize a reply to say all is OK. */
-   {
-   struct {
-   struct xsd_sockmsg hdr;
-   char body[3];
-   } __packed reply = {
-   {
-   .type = msg_type,
-   .len = sizeof(reply.body)
-   },
-   "OK"
-   };
-
-   mutex_lock(>reply_mutex);
-   rc = queue_reply(>read_buffers, , sizeof(reply));
-   wake_up(>read_waitq);
-   mutex_unlock(>reply_mutex);
-   }
+   rc = xenbus_command_reply(u, msg_type, "OK");
 
 out:
return rc;
-- 
2.10.2



[PATCH 3/3] xen: remove stale xs_input_avail() from header

2016-12-21 Thread Juergen Gross
In drivers/xen/xenbus/xenbus_comms.h there is a stale declaration of
xs_input_avail(). Remove it.

Signed-off-by: Juergen Gross 
---
 drivers/xen/xenbus/xenbus_comms.h | 1 -
 1 file changed, 1 deletion(-)

diff --git a/drivers/xen/xenbus/xenbus_comms.h 
b/drivers/xen/xenbus/xenbus_comms.h
index e74f9c1..867a2e4 100644
--- a/drivers/xen/xenbus/xenbus_comms.h
+++ b/drivers/xen/xenbus/xenbus_comms.h
@@ -42,7 +42,6 @@ int xb_write(const void *data, unsigned len);
 int xb_read(void *data, unsigned len);
 int xb_data_to_read(void);
 int xb_wait_for_data_to_read(void);
-int xs_input_avail(void);
 extern struct xenstore_domain_interface *xen_store_interface;
 extern int xen_store_evtchn;
 extern enum xenstore_init xen_store_domain_type;
-- 
2.10.2



[PATCH 1/3] xen: xenbus driver must not accept invalid transaction ids

2016-12-21 Thread Juergen Gross
When accessing Xenstore in a transaction the user is specifying a
transaction id which he normally obtained from Xenstore when starting
the transaction. Xenstore is validating a transaction id against all
known transaction ids of the connection the request came in. As all
requests of a domain not being the one where Xenstore lives share
one connection, validation of transaction ids of different users of
Xenstore in that domain should be done by the kernel of that domain
being the multiplexer between the Xenstore users in that domain and
Xenstore.

In order to prohibit one Xenstore user to be able to "hijack" a
transaction from another user the xenbus driver has to verify a
given transaction id against all known transaction ids of the user
before forwarding it to Xenstore.

Signed-off-by: Juergen Gross 
---
 drivers/xen/xenbus/xenbus_dev_frontend.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/drivers/xen/xenbus/xenbus_dev_frontend.c 
b/drivers/xen/xenbus/xenbus_dev_frontend.c
index 6c0ead4..a068281 100644
--- a/drivers/xen/xenbus/xenbus_dev_frontend.c
+++ b/drivers/xen/xenbus/xenbus_dev_frontend.c
@@ -316,7 +316,7 @@ static int xenbus_write_transaction(unsigned msg_type,
rc = -ENOMEM;
goto out;
}
-   } else if (msg_type == XS_TRANSACTION_END) {
+   } else if (u->u.msg.tx_id != 0) {
list_for_each_entry(trans, >transactions, list)
if (trans->handle.id == u->u.msg.tx_id)
break;
-- 
2.10.2



[PATCH 2/3] xen: return xenstore command failures via response instead of rc

2016-12-21 Thread Juergen Gross
When the xenbus driver does some special handling for a Xenstore
command any error condition related to the command should be returned
via an error response instead of letting the related write operation
fail. Otherwise the user land handler might take wrong decisions
assuming the connection to Xenstore is broken.

While at it try to return the same error values xenstored would
return for those cases.

Signed-off-by: Juergen Gross 
---
 drivers/xen/xenbus/xenbus_dev_frontend.c | 47 ++--
 1 file changed, 27 insertions(+), 20 deletions(-)

diff --git a/drivers/xen/xenbus/xenbus_dev_frontend.c 
b/drivers/xen/xenbus/xenbus_dev_frontend.c
index a068281..79130b3 100644
--- a/drivers/xen/xenbus/xenbus_dev_frontend.c
+++ b/drivers/xen/xenbus/xenbus_dev_frontend.c
@@ -302,6 +302,29 @@ static void watch_fired(struct xenbus_watch *watch,
mutex_unlock(>dev_data->reply_mutex);
 }
 
+static int xenbus_command_reply(struct xenbus_file_priv *u,
+   unsigned int msg_type, const char *reply)
+{
+   struct {
+   struct xsd_sockmsg hdr;
+   const char body[16];
+   } msg;
+   int rc;
+
+   msg.hdr = u->u.msg;
+   msg.hdr.type = msg_type;
+   msg.hdr.len = strlen(reply) + 1;
+   if (msg.hdr.len > sizeof(msg.body))
+   return -E2BIG;
+
+   mutex_lock(>reply_mutex);
+   rc = queue_reply(>read_buffers, , sizeof(msg.hdr) + msg.hdr.len);
+   wake_up(>read_waitq);
+   mutex_unlock(>reply_mutex);
+
+   return rc;
+}
+
 static int xenbus_write_transaction(unsigned msg_type,
struct xenbus_file_priv *u)
 {
@@ -321,7 +344,7 @@ static int xenbus_write_transaction(unsigned msg_type,
if (trans->handle.id == u->u.msg.tx_id)
break;
if (>list == >transactions)
-   return -ESRCH;
+   return xenbus_command_reply(u, XS_ERROR, "ENOENT");
}
 
reply = xenbus_dev_request_and_reply(>u.msg);
@@ -372,12 +395,12 @@ static int xenbus_write_watch(unsigned msg_type, struct 
xenbus_file_priv *u)
path = u->u.buffer + sizeof(u->u.msg);
token = memchr(path, 0, u->u.msg.len);
if (token == NULL) {
-   rc = -EILSEQ;
+   rc = xenbus_command_reply(u, XS_ERROR, "EINVAL");
goto out;
}
token++;
if (memchr(token, 0, u->u.msg.len - (token - path)) == NULL) {
-   rc = -EILSEQ;
+   rc = xenbus_command_reply(u, XS_ERROR, "EINVAL");
goto out;
}
 
@@ -411,23 +434,7 @@ static int xenbus_write_watch(unsigned msg_type, struct 
xenbus_file_priv *u)
}
 
/* Success.  Synthesize a reply to say all is OK. */
-   {
-   struct {
-   struct xsd_sockmsg hdr;
-   char body[3];
-   } __packed reply = {
-   {
-   .type = msg_type,
-   .len = sizeof(reply.body)
-   },
-   "OK"
-   };
-
-   mutex_lock(>reply_mutex);
-   rc = queue_reply(>read_buffers, , sizeof(reply));
-   wake_up(>read_waitq);
-   mutex_unlock(>reply_mutex);
-   }
+   rc = xenbus_command_reply(u, msg_type, "OK");
 
 out:
return rc;
-- 
2.10.2



[PATCH 3/3] xen: remove stale xs_input_avail() from header

2016-12-21 Thread Juergen Gross
In drivers/xen/xenbus/xenbus_comms.h there is a stale declaration of
xs_input_avail(). Remove it.

Signed-off-by: Juergen Gross 
---
 drivers/xen/xenbus/xenbus_comms.h | 1 -
 1 file changed, 1 deletion(-)

diff --git a/drivers/xen/xenbus/xenbus_comms.h 
b/drivers/xen/xenbus/xenbus_comms.h
index e74f9c1..867a2e4 100644
--- a/drivers/xen/xenbus/xenbus_comms.h
+++ b/drivers/xen/xenbus/xenbus_comms.h
@@ -42,7 +42,6 @@ int xb_write(const void *data, unsigned len);
 int xb_read(void *data, unsigned len);
 int xb_data_to_read(void);
 int xb_wait_for_data_to_read(void);
-int xs_input_avail(void);
 extern struct xenstore_domain_interface *xen_store_interface;
 extern int xen_store_evtchn;
 extern enum xenstore_init xen_store_domain_type;
-- 
2.10.2



[PATCH 1/3] xen: xenbus driver must not accept invalid transaction ids

2016-12-21 Thread Juergen Gross
When accessing Xenstore in a transaction the user is specifying a
transaction id which he normally obtained from Xenstore when starting
the transaction. Xenstore is validating a transaction id against all
known transaction ids of the connection the request came in. As all
requests of a domain not being the one where Xenstore lives share
one connection, validation of transaction ids of different users of
Xenstore in that domain should be done by the kernel of that domain
being the multiplexer between the Xenstore users in that domain and
Xenstore.

In order to prohibit one Xenstore user to be able to "hijack" a
transaction from another user the xenbus driver has to verify a
given transaction id against all known transaction ids of the user
before forwarding it to Xenstore.

Signed-off-by: Juergen Gross 
---
 drivers/xen/xenbus/xenbus_dev_frontend.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/drivers/xen/xenbus/xenbus_dev_frontend.c 
b/drivers/xen/xenbus/xenbus_dev_frontend.c
index 6c0ead4..a068281 100644
--- a/drivers/xen/xenbus/xenbus_dev_frontend.c
+++ b/drivers/xen/xenbus/xenbus_dev_frontend.c
@@ -316,7 +316,7 @@ static int xenbus_write_transaction(unsigned msg_type,
rc = -ENOMEM;
goto out;
}
-   } else if (msg_type == XS_TRANSACTION_END) {
+   } else if (u->u.msg.tx_id != 0) {
list_for_each_entry(trans, >transactions, list)
if (trans->handle.id == u->u.msg.tx_id)
break;
-- 
2.10.2



[PATCH 0/3] xen: fix some minor bugs and cleanup of xenbus

2016-12-21 Thread Juergen Gross
Do some minor bug fixes and cleanup of xenbus driver.

Juergen Gross (3):
  xen: xenbus driver must not accept invalid transaction ids
  xen: return xenstore command failures via response instead of rc
  xen: remove stale xs_input_avail() from header

 drivers/xen/xenbus/xenbus_comms.h|  1 -
 drivers/xen/xenbus/xenbus_dev_frontend.c | 49 ++--
 2 files changed, 28 insertions(+), 22 deletions(-)

-- 
2.10.2



Re: [PATCH 0/2] Add further ioctl() operations for namespace discovery

2016-12-21 Thread Michael Kerrisk (man-pages)
Hi Eric,

On 12/22/2016 01:27 AM, Eric W. Biederman wrote:
> "Michael Kerrisk (man-pages)"  writes:
> 
>> Hi Eric,
>>
>> On 12/21/2016 01:17 AM, Eric W. Biederman wrote:
>>> "Michael Kerrisk (man-pages)"  writes:
>>>
 Hi Eric,

 On 12/20/2016 09:22 PM, Eric W. Biederman wrote:
> "Michael Kerrisk (man-pages)"  writes:
>
>> Hello Eric,
>>
>> On 12/19/2016 11:53 PM, Eric W. Biederman wrote:
>>> "Michael Kerrisk (man-pages)"  writes:
>>>
> 
>>> Now the question becomes who are the users of this?  Because it just
>>> occurred to me that we now have an interesting complication.  Userspace
>>> extending the meaning of the capability bits, and using to protect
>>> additional things.  Ugh.  That could be a maintenance problem of another
>>> flavor.  Definitely not my favorite.
>>
>> I don't follow you here. Could you say some more about what you mean?
> 
> I have seen user space userspace do thing such as extend CAP_SYS_REBOOT
> to things such as permission to invoke "shutdown -r now".  Which
> depending on what a clean reboot entails could be greately increasing
> the scope of CAP_SYS_REBOOT.
> 
> I am concerned for that and similar situations that userspace
> applications could lead us into situation that one wrong decision could
> wind up being an unfixable mistake because fixing the mistake would
> break userspsace.

Okay.

>>> So why are we asking the questions about what permissions a process has?
>>
>> My main interest here is monitoring/discovery/debugging on a running
>> system. NS_GET_PARENT, NS_GET_USERNS, NS_GET_CREATOR_UID, and 
>> NS_GET_NSTYPE provide most of what I'd like to see. Being able to ask
>> "does this process have permissions in that namespace?" would be nice 
>> to have in terms of understanding/debugging a system.
> 
> If we are just looking at explanations then I seem to have been
> over-engineering things.  So let's just aim at the two ioctls.
> Or at least the information in those ioctls.

Okay.

> With at least a comment on the ioctl returning the OWNER_UID that
> describes why it is not a problem to if the owners uid is something like
> ((uid_t)-3).  Which overlaps with the space for error return codes.
>
> I don't know if we are fine or not, but that review comment definitely
> deserves some consideration.


See my reply just sent to Andrei. We should instead then just return 
the UID via a buffer pointed to by the ioctl() argument:

ioctl(fd, NS_GET_OWNER_UID, );

Cheers,

Michael


-- 
Michael Kerrisk
Linux man-pages maintainer; http://www.kernel.org/doc/man-pages/
Linux/UNIX System Programming Training: http://man7.org/training/


[PATCH 0/3] xen: fix some minor bugs and cleanup of xenbus

2016-12-21 Thread Juergen Gross
Do some minor bug fixes and cleanup of xenbus driver.

Juergen Gross (3):
  xen: xenbus driver must not accept invalid transaction ids
  xen: return xenstore command failures via response instead of rc
  xen: remove stale xs_input_avail() from header

 drivers/xen/xenbus/xenbus_comms.h|  1 -
 drivers/xen/xenbus/xenbus_dev_frontend.c | 49 ++--
 2 files changed, 28 insertions(+), 22 deletions(-)

-- 
2.10.2



Re: [PATCH 0/2] Add further ioctl() operations for namespace discovery

2016-12-21 Thread Michael Kerrisk (man-pages)
Hi Eric,

On 12/22/2016 01:27 AM, Eric W. Biederman wrote:
> "Michael Kerrisk (man-pages)"  writes:
> 
>> Hi Eric,
>>
>> On 12/21/2016 01:17 AM, Eric W. Biederman wrote:
>>> "Michael Kerrisk (man-pages)"  writes:
>>>
 Hi Eric,

 On 12/20/2016 09:22 PM, Eric W. Biederman wrote:
> "Michael Kerrisk (man-pages)"  writes:
>
>> Hello Eric,
>>
>> On 12/19/2016 11:53 PM, Eric W. Biederman wrote:
>>> "Michael Kerrisk (man-pages)"  writes:
>>>
> 
>>> Now the question becomes who are the users of this?  Because it just
>>> occurred to me that we now have an interesting complication.  Userspace
>>> extending the meaning of the capability bits, and using to protect
>>> additional things.  Ugh.  That could be a maintenance problem of another
>>> flavor.  Definitely not my favorite.
>>
>> I don't follow you here. Could you say some more about what you mean?
> 
> I have seen user space userspace do thing such as extend CAP_SYS_REBOOT
> to things such as permission to invoke "shutdown -r now".  Which
> depending on what a clean reboot entails could be greately increasing
> the scope of CAP_SYS_REBOOT.
> 
> I am concerned for that and similar situations that userspace
> applications could lead us into situation that one wrong decision could
> wind up being an unfixable mistake because fixing the mistake would
> break userspsace.

Okay.

>>> So why are we asking the questions about what permissions a process has?
>>
>> My main interest here is monitoring/discovery/debugging on a running
>> system. NS_GET_PARENT, NS_GET_USERNS, NS_GET_CREATOR_UID, and 
>> NS_GET_NSTYPE provide most of what I'd like to see. Being able to ask
>> "does this process have permissions in that namespace?" would be nice 
>> to have in terms of understanding/debugging a system.
> 
> If we are just looking at explanations then I seem to have been
> over-engineering things.  So let's just aim at the two ioctls.
> Or at least the information in those ioctls.

Okay.

> With at least a comment on the ioctl returning the OWNER_UID that
> describes why it is not a problem to if the owners uid is something like
> ((uid_t)-3).  Which overlaps with the space for error return codes.
>
> I don't know if we are fine or not, but that review comment definitely
> deserves some consideration.


See my reply just sent to Andrei. We should instead then just return 
the UID via a buffer pointed to by the ioctl() argument:

ioctl(fd, NS_GET_OWNER_UID, );

Cheers,

Michael


-- 
Michael Kerrisk
Linux man-pages maintainer; http://www.kernel.org/doc/man-pages/
Linux/UNIX System Programming Training: http://man7.org/training/


RE: ATH9 driver issues on ARM64

2016-12-21 Thread Bharat Kumar Gogada
Hi All,

After further debugging we know the place it hangs.

In function:
static int ath_reset_internal (struct ath_softc *sc, struct ath9k_channel 
*hchan)
{
disable_irq(sc->irq);
tasklet_disable(>intr_tq);
tasklet_disable(>bcon_tasklet);
spin_lock_bh(>sc_pcu_lock);



if (!ath_complete_reset(sc, true))  -> This function enables 
hardware interrupts
r = -EIO;

out:
enable_irq(sc->irq);-> Here IRQ line state is 
changed to enable state
spin_unlock_bh(>sc_pcu_lock);
tasklet_enable(>bcon_tasklet);
tasklet_enable(>intr_tq);

}

static bool ath_complete_reset(struct ath_softc *sc, bool start)
{
struct ath_hw *ah = sc->sc_ah;
struct ath_common *common = ath9k_hw_common(ah);
unsigned long flags;

ath9k_calculate_summary_state(sc, sc->cur_chan);
ath_startrecv(sc);


  
sc->gtt_cnt = 0;

ath9k_hw_set_interrupts(ah);-> Here hardware interrupts are 
being enabled
ath9k_hw_enable_interrupts(ah); -> We see hang after this line
ieee80211_wake_queues(sc->hw);
ath9k_p2p_ps_timer(sc);

return true;
}

Before changing IRQ line to to enabled state, hardware interrupts are being 
enabled. 
Wont this cause a race condition where within this period of hardware raises an 
interrupt, but IRQ line state is disabled state, this will 
reach the following condition making EP handler not being invoked.

void handle_simple_irq(struct irq_desc *desc)
{
raw_spin_lock(>lock);
   ... 
if (unlikely(!desc->action || irqd_irq_disabled(>irq_data))) {
// This condition is reaching and becoming true.
desc->istate |= IRQS_PENDING;
goto out_unlock;
}

kstat_incr_irqs_this_cpu(desc);
handle_irq_event(desc);

out_unlock:
raw_spin_unlock(>lock);
}

We see hang at that statement, without reaching back enable_irq, looks like by 
this time CPU is in stall.

Can any tell why hardware interrupts are being enabled before kernel changing 
IRQ line state?


Regards,
Bharat

> 
> > On Sat, Dec 10, 2016 at 02:40:48PM +, Bharat Kumar Gogada wrote:
> > > Hi,
> > >
> > > After taking some more lecroy traces, we see that after 2nd ASSERT
> > > from EP
> > on ARM64 we see continuous data movement of 32 dwords or 12 dwords and
> > never sign of DEASSERT.
> > > Comparatively on working traces (x86) after 2nd assert there are
> > > only BAR
> > register reads and writes and then DEASSERT, for almost most of the
> > interrupts and we haven't seen 12 or 32 dwords data movement on this trace.
> > >
> > > I did not work on EP wifi/network drivers, any help why EP needs
> > > those many
> > number of data at scan time ?
> >
> > The device doesn't know whether it's in an x86 or an arm64 system.  If
> > it works differently, it must be because the PCI core or the driver is
> > programming the device differently.
> >
> > You should be able to match up Memory transactions from the host in
> > the trace with things the driver does.  For example, if you see an
> > Assert_INTx message from the device, you should eventually see a
> > Memory Read from the host to get the ISR, i.e., some read done in the bowels
> of ath9k_hw_getisr().
> >
> > I don't know how the ath9k device works, but there must be some Memory
> > Read or Write done by the driver that tells the device "we've handled this
> interrupt".
> > The device should then send a Deassert_INTx; of course, if the device
> > still requires service, e.g., because it has received more packets, it
> > might leave the INTx asserted.
> >
> > I doubt you'd see exactly the same traces on x86 and arm64 because
> > they aren't seeing the same network packets and the driver is executing at
> different rates.
> > But you should at least be able to identify interrupt assertion and
> > the actions of the driver's interrupt service routine.
> 
> 
> Thanks Bjorn.
> 
> As you mentioned we did try to debug in that path. After we start scan after 
> 2nd
> ASSERT we see lots of 32 and 12 dword data, and in function void
> ath9k_hw_enable_interrupts(struct ath_hw *ah) {
>   ...
>   ..
>   REG_WRITE(ah, AR_IER, AR_IER_ENABLE);
>   // EP driver hangs at this
> position after 2nd ASSERT
>   // The following writes are not
> happening
> if (!AR_SREV_9100(ah)) {
> REG_WRITE(ah, AR_INTR_ASYNC_ENABLE, async_mask);
> REG_WRITE(ah, AR_INTR_ASYNC_MASK, async_mask);
> 
> REG_WRITE(ah, AR_INTR_SYNC_ENABLE, sync_default);
> REG_WRITE(ah, AR_INTR_SYNC_MASK, sync_default);
> }
> ath_dbg(common, INTERRUPT, "AR_IMR 0x%x IER 0x%x\n",
> REG_READ(ah, AR_IMR), REG_READ(ah, AR_IER)); } 

RE: ATH9 driver issues on ARM64

2016-12-21 Thread Bharat Kumar Gogada
Hi All,

After further debugging we know the place it hangs.

In function:
static int ath_reset_internal (struct ath_softc *sc, struct ath9k_channel 
*hchan)
{
disable_irq(sc->irq);
tasklet_disable(>intr_tq);
tasklet_disable(>bcon_tasklet);
spin_lock_bh(>sc_pcu_lock);



if (!ath_complete_reset(sc, true))  -> This function enables 
hardware interrupts
r = -EIO;

out:
enable_irq(sc->irq);-> Here IRQ line state is 
changed to enable state
spin_unlock_bh(>sc_pcu_lock);
tasklet_enable(>bcon_tasklet);
tasklet_enable(>intr_tq);

}

static bool ath_complete_reset(struct ath_softc *sc, bool start)
{
struct ath_hw *ah = sc->sc_ah;
struct ath_common *common = ath9k_hw_common(ah);
unsigned long flags;

ath9k_calculate_summary_state(sc, sc->cur_chan);
ath_startrecv(sc);


  
sc->gtt_cnt = 0;

ath9k_hw_set_interrupts(ah);-> Here hardware interrupts are 
being enabled
ath9k_hw_enable_interrupts(ah); -> We see hang after this line
ieee80211_wake_queues(sc->hw);
ath9k_p2p_ps_timer(sc);

return true;
}

Before changing IRQ line to to enabled state, hardware interrupts are being 
enabled. 
Wont this cause a race condition where within this period of hardware raises an 
interrupt, but IRQ line state is disabled state, this will 
reach the following condition making EP handler not being invoked.

void handle_simple_irq(struct irq_desc *desc)
{
raw_spin_lock(>lock);
   ... 
if (unlikely(!desc->action || irqd_irq_disabled(>irq_data))) {
// This condition is reaching and becoming true.
desc->istate |= IRQS_PENDING;
goto out_unlock;
}

kstat_incr_irqs_this_cpu(desc);
handle_irq_event(desc);

out_unlock:
raw_spin_unlock(>lock);
}

We see hang at that statement, without reaching back enable_irq, looks like by 
this time CPU is in stall.

Can any tell why hardware interrupts are being enabled before kernel changing 
IRQ line state?


Regards,
Bharat

> 
> > On Sat, Dec 10, 2016 at 02:40:48PM +, Bharat Kumar Gogada wrote:
> > > Hi,
> > >
> > > After taking some more lecroy traces, we see that after 2nd ASSERT
> > > from EP
> > on ARM64 we see continuous data movement of 32 dwords or 12 dwords and
> > never sign of DEASSERT.
> > > Comparatively on working traces (x86) after 2nd assert there are
> > > only BAR
> > register reads and writes and then DEASSERT, for almost most of the
> > interrupts and we haven't seen 12 or 32 dwords data movement on this trace.
> > >
> > > I did not work on EP wifi/network drivers, any help why EP needs
> > > those many
> > number of data at scan time ?
> >
> > The device doesn't know whether it's in an x86 or an arm64 system.  If
> > it works differently, it must be because the PCI core or the driver is
> > programming the device differently.
> >
> > You should be able to match up Memory transactions from the host in
> > the trace with things the driver does.  For example, if you see an
> > Assert_INTx message from the device, you should eventually see a
> > Memory Read from the host to get the ISR, i.e., some read done in the bowels
> of ath9k_hw_getisr().
> >
> > I don't know how the ath9k device works, but there must be some Memory
> > Read or Write done by the driver that tells the device "we've handled this
> interrupt".
> > The device should then send a Deassert_INTx; of course, if the device
> > still requires service, e.g., because it has received more packets, it
> > might leave the INTx asserted.
> >
> > I doubt you'd see exactly the same traces on x86 and arm64 because
> > they aren't seeing the same network packets and the driver is executing at
> different rates.
> > But you should at least be able to identify interrupt assertion and
> > the actions of the driver's interrupt service routine.
> 
> 
> Thanks Bjorn.
> 
> As you mentioned we did try to debug in that path. After we start scan after 
> 2nd
> ASSERT we see lots of 32 and 12 dword data, and in function void
> ath9k_hw_enable_interrupts(struct ath_hw *ah) {
>   ...
>   ..
>   REG_WRITE(ah, AR_IER, AR_IER_ENABLE);
>   // EP driver hangs at this
> position after 2nd ASSERT
>   // The following writes are not
> happening
> if (!AR_SREV_9100(ah)) {
> REG_WRITE(ah, AR_INTR_ASYNC_ENABLE, async_mask);
> REG_WRITE(ah, AR_INTR_ASYNC_MASK, async_mask);
> 
> REG_WRITE(ah, AR_INTR_SYNC_ENABLE, sync_default);
> REG_WRITE(ah, AR_INTR_SYNC_MASK, sync_default);
> }
> ath_dbg(common, INTERRUPT, "AR_IMR 0x%x IER 0x%x\n",
> REG_READ(ah, AR_IMR), REG_READ(ah, AR_IER)); } 

Re: [PATCH 2/2] nsfs: Add an ioctl() to return creator UID of a userns

2016-12-21 Thread Michael Kerrisk (man-pages)
Hi Andrei,

On 12/21/2016 04:13 AM, Andrei Vagin wrote:
> On Mon, Dec 19, 2016 at 03:38:35PM +0100, Michael Kerrisk (man-pages) wrote:
>> # Some open questions about this patch below.
>> #
>> One of the rules regarding capabilities is:
>>
>> A process that resides in the parent of the user namespace and
>> whose effective user ID matches the owner of the namespace has
>> all capabilities in the namespace.
>>
>> Therefore, in order to write code that discovers whether process X has
>> capabilities in namespace Y, we need a way to find out who the creator
>> of a user namespace is. This patch adds an NS_GET_CREATOR_UID ioctl()
>> that returns the (munged) UID of the creator of the user namespace
>> referred to by the specified file descriptor.
>>
>> If the supplied file descriptor does not refer to a user namespace,
>> the operation fails with the error EINVAL.
>>
>> Signed-off-by: Michael Kerrisk 
>> ---
>>  fs/nsfs.c | 6 ++
>>  include/uapi/linux/nsfs.h | 8 +---
>>  2 files changed, 11 insertions(+), 3 deletions(-)
>>
>> Open questions:
>>
>> * Would it be preferabe to separate the logic for NS_GET_CREATOR_UID
>>   into a small helper function?
>> * Is this a correct use of container_of()? I did not immediately
>>   see another way to get to the user_namespace struct, but I
>>   may well have missed something.
>>
>> diff --git a/fs/nsfs.c b/fs/nsfs.c
>> index 5d53476..26f6d94 100644
>> --- a/fs/nsfs.c
>> +++ b/fs/nsfs.c
>> @@ -163,6 +163,7 @@ int open_related_ns(struct ns_common *ns,
>>  static long ns_ioctl(struct file *filp, unsigned int ioctl,
>>  unsigned long arg)
>>  {
>> +struct user_namespace *user_ns;
>>  struct ns_common *ns = get_proc_ns(file_inode(filp));
>>  
>>  switch (ioctl) {
>> @@ -174,6 +175,11 @@ static long ns_ioctl(struct file *filp, unsigned int 
>> ioctl,
>>  return open_related_ns(ns, ns->ops->get_parent);
>>  case NS_GET_NSTYPE:
>>  return ns->ops->type;
>> +case NS_GET_CREATOR_UID:
>> +if (ns->ops->type != CLONE_NEWUSER)
>> +return -EINVAL;
>> +user_ns = container_of(ns, struct user_namespace, ns);
>> +return from_kuid_munged(current_user_ns(), user_ns->owner);
> 
> uid_t is "unsigned int", ioctl() returns long, so it may be hard to
> distinguish user id-s from errors on x32.

Good point. So, we could instead return the UID via a buffer pointed to 
by the ioctl() arg. That would seem better, right?

> off-topic: What is about user_ns->group? I can't find where it is used...

I've no idea. Like you, I can't see any place where it's being used.

Cheers,

Michael


>>  default:
>>  return -ENOTTY;
>>  }
>> diff --git a/include/uapi/linux/nsfs.h b/include/uapi/linux/nsfs.h
>> index 2b48df1..b3c6c78 100644
>> --- a/include/uapi/linux/nsfs.h
>> +++ b/include/uapi/linux/nsfs.h
>> @@ -6,11 +6,13 @@
>>  #define NSIO0xb7
>>  
>>  /* Returns a file descriptor that refers to an owning user namespace */
>> -#define NS_GET_USERNS   _IO(NSIO, 0x1)
>> +#define NS_GET_USERNS   _IO(NSIO, 0x1)
>>  /* Returns a file descriptor that refers to a parent namespace */
>> -#define NS_GET_PARENT   _IO(NSIO, 0x2)
>> +#define NS_GET_PARENT   _IO(NSIO, 0x2)
>>  /* Returns the type of namespace (CLONE_NEW* value) referred to by
>> file descriptor */
>> -#define NS_GET_NSTYPE   _IO(NSIO, 0x3)
>> +#define NS_GET_NSTYPE   _IO(NSIO, 0x3)
>> +/* Get creator UID for a user namespace */
>> +#define NS_GET_CREATOR_UID  _IO(NSIO, 0x4)
>>  
>>  #endif /* __LINUX_NSFS_H */
>> -- 
>> 2.5.5
>>
> 


-- 
Michael Kerrisk
Linux man-pages maintainer; http://www.kernel.org/doc/man-pages/
Linux/UNIX System Programming Training: http://man7.org/training/


Re: [PATCH 2/2] nsfs: Add an ioctl() to return creator UID of a userns

2016-12-21 Thread Michael Kerrisk (man-pages)
Hi Andrei,

On 12/21/2016 04:13 AM, Andrei Vagin wrote:
> On Mon, Dec 19, 2016 at 03:38:35PM +0100, Michael Kerrisk (man-pages) wrote:
>> # Some open questions about this patch below.
>> #
>> One of the rules regarding capabilities is:
>>
>> A process that resides in the parent of the user namespace and
>> whose effective user ID matches the owner of the namespace has
>> all capabilities in the namespace.
>>
>> Therefore, in order to write code that discovers whether process X has
>> capabilities in namespace Y, we need a way to find out who the creator
>> of a user namespace is. This patch adds an NS_GET_CREATOR_UID ioctl()
>> that returns the (munged) UID of the creator of the user namespace
>> referred to by the specified file descriptor.
>>
>> If the supplied file descriptor does not refer to a user namespace,
>> the operation fails with the error EINVAL.
>>
>> Signed-off-by: Michael Kerrisk 
>> ---
>>  fs/nsfs.c | 6 ++
>>  include/uapi/linux/nsfs.h | 8 +---
>>  2 files changed, 11 insertions(+), 3 deletions(-)
>>
>> Open questions:
>>
>> * Would it be preferabe to separate the logic for NS_GET_CREATOR_UID
>>   into a small helper function?
>> * Is this a correct use of container_of()? I did not immediately
>>   see another way to get to the user_namespace struct, but I
>>   may well have missed something.
>>
>> diff --git a/fs/nsfs.c b/fs/nsfs.c
>> index 5d53476..26f6d94 100644
>> --- a/fs/nsfs.c
>> +++ b/fs/nsfs.c
>> @@ -163,6 +163,7 @@ int open_related_ns(struct ns_common *ns,
>>  static long ns_ioctl(struct file *filp, unsigned int ioctl,
>>  unsigned long arg)
>>  {
>> +struct user_namespace *user_ns;
>>  struct ns_common *ns = get_proc_ns(file_inode(filp));
>>  
>>  switch (ioctl) {
>> @@ -174,6 +175,11 @@ static long ns_ioctl(struct file *filp, unsigned int 
>> ioctl,
>>  return open_related_ns(ns, ns->ops->get_parent);
>>  case NS_GET_NSTYPE:
>>  return ns->ops->type;
>> +case NS_GET_CREATOR_UID:
>> +if (ns->ops->type != CLONE_NEWUSER)
>> +return -EINVAL;
>> +user_ns = container_of(ns, struct user_namespace, ns);
>> +return from_kuid_munged(current_user_ns(), user_ns->owner);
> 
> uid_t is "unsigned int", ioctl() returns long, so it may be hard to
> distinguish user id-s from errors on x32.

Good point. So, we could instead return the UID via a buffer pointed to 
by the ioctl() arg. That would seem better, right?

> off-topic: What is about user_ns->group? I can't find where it is used...

I've no idea. Like you, I can't see any place where it's being used.

Cheers,

Michael


>>  default:
>>  return -ENOTTY;
>>  }
>> diff --git a/include/uapi/linux/nsfs.h b/include/uapi/linux/nsfs.h
>> index 2b48df1..b3c6c78 100644
>> --- a/include/uapi/linux/nsfs.h
>> +++ b/include/uapi/linux/nsfs.h
>> @@ -6,11 +6,13 @@
>>  #define NSIO0xb7
>>  
>>  /* Returns a file descriptor that refers to an owning user namespace */
>> -#define NS_GET_USERNS   _IO(NSIO, 0x1)
>> +#define NS_GET_USERNS   _IO(NSIO, 0x1)
>>  /* Returns a file descriptor that refers to a parent namespace */
>> -#define NS_GET_PARENT   _IO(NSIO, 0x2)
>> +#define NS_GET_PARENT   _IO(NSIO, 0x2)
>>  /* Returns the type of namespace (CLONE_NEW* value) referred to by
>> file descriptor */
>> -#define NS_GET_NSTYPE   _IO(NSIO, 0x3)
>> +#define NS_GET_NSTYPE   _IO(NSIO, 0x3)
>> +/* Get creator UID for a user namespace */
>> +#define NS_GET_CREATOR_UID  _IO(NSIO, 0x4)
>>  
>>  #endif /* __LINUX_NSFS_H */
>> -- 
>> 2.5.5
>>
> 


-- 
Michael Kerrisk
Linux man-pages maintainer; http://www.kernel.org/doc/man-pages/
Linux/UNIX System Programming Training: http://man7.org/training/


Re: [PATCH v18 0/4] Introduce usb charger framework to deal with the usb gadget power negotation

2016-12-21 Thread Baolin Wang
On 22 December 2016 at 07:47, NeilBrown  wrote:
> On Wed, Dec 21 2016, Baolin Wang wrote:
>
>> On 21 December 2016 at 11:48, NeilBrown  wrote:
>>> On Wed, Dec 21 2016, Baolin Wang wrote:
>>>
 Hi,

 On 21 December 2016 at 06:07, NeilBrown  wrote:
> On Tue, Dec 20 2016, Baolin Wang wrote:
>
>> Hi Neil,
>>
>> On 3 November 2016 at 09:25, NeilBrown  wrote:
>>> On Tue, Nov 01 2016, Baolin Wang wrote:
>>>
>>>
> So I won't be responding on this topic any further until I see a 
> genuine
> attempt to understand and resolve the inconsistencies with
> usb_register_notifier().

 Any better solution?
>>>
>>> I'm not sure exactly what you are asking, so I'll assume you are asking
>>> the question I want to answer :-)
>>>
>>> 1/ Liase with the extcon developers to resolve the inconsistencies
>>>   with USB connector types.
>>>   e.g. current there is both "EXTCON_USB" and "EXTCON_CHG_USB_SDP"
>>>   which both seem to suggest a standard downstream port.  There is no
>>>   documentation describing how these relate, and no consistent practice
>>>   to copy.
>>>   I suspect the intention is that
>>> EXTCON_USB and EXTCON_USB_HOST indicated that data capabilities of
>>> the cable, while EXTCON_CHG_USB* indicate the power capabilities of
>>> the cable.
>>> So EXTCON_CHG_USB_SDP should always appear together with EXTCON_USB
>>> while EXTCON_CHG_USB_DCP would not, and EXTCON_CHG_USB_ACA
>>> would normally appear with EXTCON_USB_HOST (I think).
>>>   Some drivers follow this model, particularly extcon-max14577.c
>>>   but it is not consistent.
>>>
>>>   This policy should be well documented and possibly existing drivers
>>>   should be updated to follow it.
>>>
>>>   At the same time it would make sense to resolve EXTCON_CHG_USB_SLOW
>>>   and EXTCON_CHG_USB_FAST.  These names don't mean much.
>>>   They were recently removed from drivers/power/axp288_charger.c
>>>   which is good, but are still used in drivers/extcon/extcon-max*
>>>   Possibly they should be changed to names from the standard, or
>>>   possibly they should be renamed to identify the current they are
>>>   expected to provide. e.g. EXTCON_CHG_USB_500MA and EXTCON_CHG_USB_1A
>>
>> Now I am creating the new patchset with fixing and converting exist 
>> drivers.
>
> Thanks!
>
>>
>> I did some investigation about EXTCON subsystem. From your suggestion:
>> 1. EXTCON_CHG_USB_SDP should always appear together with EXTCON_USB.
>>  After checking, now all extcon drivers were following this rule.
>
> what about extcon-axp288.c ?
> axp288_handle_chrg_det_event() sets or clears EXTCON_CHG_USB_SDP but
> never sets EXTCON_USB.
> Similarly phy-rockchip-inno-usb2.c never sets EXTCON_USB.

 Ha, sorry, I missed these 2 files, and I will fix them.

>
>>
>> 2. EXTCON_CHG_USB_ACA would normally appear with EXTCON_USB_HOST.
>>  Now no extcon drivers used EXTCON_CHG_USB_ACA, then no need to
>> change.
>
> Agreed.
>
>>
>> 3. Change EXTCON_CHG_USB_SLOW/FAST to EXTCON_CHG_USB_500MA/1A.
>>  There are no model that shows the slow charger should be 500mA
>> and fast charger is 1A. (In extcon-max77693.c file, the fast charger
>> can be drawn 2A), so changing to EXTCON_CHG_USB_500MA/1A is not useful
>> I think.
>
> Leaving the names a SLOW/FAST is less useful as those names don't *mean*
> anything.
> The only place where the cable types are registered are in
>  extcon-max{14577,77693,77843,8997}.c
>
> In each case, the code strongly suggests that the meaning is that "slow"
> means "500mA" and that "fast" means "1A" (or sometimes 1A-2A).
>
> With names like "fast" and "slow", any common changer framework cannot
> make use of these cable types as the name doesn't mean anything.
> If the names were changed to 500MA/1A, then common code could reasonably
> assume how much current can safely be drawn from each.

 As I know, some fast charger can be drawn 5A, then do we need another
 macro named 5A? then will introduce more macros in future, I am not
 true this is helpful.
>>>
>>> It isn't really a question of what the charger can provide.  It is a
>>> question of what the cable reports to the phy.
>>
>> Yes, there is no spec to describe fast/slow charger type and how much
>> current fast/slow charger can provide. Maybe some fast charger can
>> provide 1A/2A, others can provide 5A, which depends on users'
>> platform. If we change to EXTCON_CHG_USB_500MA/1A and some fast
>> charger can provide 1.5A on user's platform, will it report the fast
>> charger type by EXTCON_CHG_USB_1A on user's platform 

Re: [PATCH v18 0/4] Introduce usb charger framework to deal with the usb gadget power negotation

2016-12-21 Thread Baolin Wang
On 22 December 2016 at 07:47, NeilBrown  wrote:
> On Wed, Dec 21 2016, Baolin Wang wrote:
>
>> On 21 December 2016 at 11:48, NeilBrown  wrote:
>>> On Wed, Dec 21 2016, Baolin Wang wrote:
>>>
 Hi,

 On 21 December 2016 at 06:07, NeilBrown  wrote:
> On Tue, Dec 20 2016, Baolin Wang wrote:
>
>> Hi Neil,
>>
>> On 3 November 2016 at 09:25, NeilBrown  wrote:
>>> On Tue, Nov 01 2016, Baolin Wang wrote:
>>>
>>>
> So I won't be responding on this topic any further until I see a 
> genuine
> attempt to understand and resolve the inconsistencies with
> usb_register_notifier().

 Any better solution?
>>>
>>> I'm not sure exactly what you are asking, so I'll assume you are asking
>>> the question I want to answer :-)
>>>
>>> 1/ Liase with the extcon developers to resolve the inconsistencies
>>>   with USB connector types.
>>>   e.g. current there is both "EXTCON_USB" and "EXTCON_CHG_USB_SDP"
>>>   which both seem to suggest a standard downstream port.  There is no
>>>   documentation describing how these relate, and no consistent practice
>>>   to copy.
>>>   I suspect the intention is that
>>> EXTCON_USB and EXTCON_USB_HOST indicated that data capabilities of
>>> the cable, while EXTCON_CHG_USB* indicate the power capabilities of
>>> the cable.
>>> So EXTCON_CHG_USB_SDP should always appear together with EXTCON_USB
>>> while EXTCON_CHG_USB_DCP would not, and EXTCON_CHG_USB_ACA
>>> would normally appear with EXTCON_USB_HOST (I think).
>>>   Some drivers follow this model, particularly extcon-max14577.c
>>>   but it is not consistent.
>>>
>>>   This policy should be well documented and possibly existing drivers
>>>   should be updated to follow it.
>>>
>>>   At the same time it would make sense to resolve EXTCON_CHG_USB_SLOW
>>>   and EXTCON_CHG_USB_FAST.  These names don't mean much.
>>>   They were recently removed from drivers/power/axp288_charger.c
>>>   which is good, but are still used in drivers/extcon/extcon-max*
>>>   Possibly they should be changed to names from the standard, or
>>>   possibly they should be renamed to identify the current they are
>>>   expected to provide. e.g. EXTCON_CHG_USB_500MA and EXTCON_CHG_USB_1A
>>
>> Now I am creating the new patchset with fixing and converting exist 
>> drivers.
>
> Thanks!
>
>>
>> I did some investigation about EXTCON subsystem. From your suggestion:
>> 1. EXTCON_CHG_USB_SDP should always appear together with EXTCON_USB.
>>  After checking, now all extcon drivers were following this rule.
>
> what about extcon-axp288.c ?
> axp288_handle_chrg_det_event() sets or clears EXTCON_CHG_USB_SDP but
> never sets EXTCON_USB.
> Similarly phy-rockchip-inno-usb2.c never sets EXTCON_USB.

 Ha, sorry, I missed these 2 files, and I will fix them.

>
>>
>> 2. EXTCON_CHG_USB_ACA would normally appear with EXTCON_USB_HOST.
>>  Now no extcon drivers used EXTCON_CHG_USB_ACA, then no need to
>> change.
>
> Agreed.
>
>>
>> 3. Change EXTCON_CHG_USB_SLOW/FAST to EXTCON_CHG_USB_500MA/1A.
>>  There are no model that shows the slow charger should be 500mA
>> and fast charger is 1A. (In extcon-max77693.c file, the fast charger
>> can be drawn 2A), so changing to EXTCON_CHG_USB_500MA/1A is not useful
>> I think.
>
> Leaving the names a SLOW/FAST is less useful as those names don't *mean*
> anything.
> The only place where the cable types are registered are in
>  extcon-max{14577,77693,77843,8997}.c
>
> In each case, the code strongly suggests that the meaning is that "slow"
> means "500mA" and that "fast" means "1A" (or sometimes 1A-2A).
>
> With names like "fast" and "slow", any common changer framework cannot
> make use of these cable types as the name doesn't mean anything.
> If the names were changed to 500MA/1A, then common code could reasonably
> assume how much current can safely be drawn from each.

 As I know, some fast charger can be drawn 5A, then do we need another
 macro named 5A? then will introduce more macros in future, I am not
 true this is helpful.
>>>
>>> It isn't really a question of what the charger can provide.  It is a
>>> question of what the cable reports to the phy.
>>
>> Yes, there is no spec to describe fast/slow charger type and how much
>> current fast/slow charger can provide. Maybe some fast charger can
>> provide 1A/2A, others can provide 5A, which depends on users'
>> platform. If we change to EXTCON_CHG_USB_500MA/1A and some fast
>> charger can provide 1.5A on user's platform, will it report the fast
>> charger type by EXTCON_CHG_USB_1A on user's platform (but it can
>> provide 1.5A)? So what I mean, can we keep 

Re: [PATCH 2/2] net: wireless: fix to uses struct

2016-12-21 Thread kbuild test robot
Hi Ozgur,

[auto build test ERROR on mac80211-next/master]
[also build test ERROR on v4.9 next-20161221]
[if your patch is applied to the wrong git tree, please drop us a note to help 
improve the system]

url:
https://github.com/0day-ci/linux/commits/Ozgur-Karatas/net-wireless-fixed-to-checkpatch-errors/20161222-125128
base:   https://git.kernel.org/pub/scm/linux/kernel/git/jberg/mac80211-next.git 
master
config: i386-allmodconfig (attached as .config)
compiler: gcc-6 (Debian 6.2.0-3) 6.2.0 20160901
reproduce:
# save the attached .config to linux build tree
make ARCH=i386 

All errors (new ones prefixed by >>):

   net/wireless/reg.c: In function 'reg_query_builtin':
>> net/wireless/reg.c:493:28: error: 'reg_regdb_apply_request' undeclared 
>> (first use in this function)
 request = kzalloc(sizeof(*reg_regdb_apply_request), GFP_KERNEL);
   ^~~
   net/wireless/reg.c:493:28: note: each undeclared identifier is reported only 
once for each function it appears in
   net/wireless/reg.c: In function 'regulatory_hint_core':
   net/wireless/reg.c:2294:28: error: 'regulatory_request' undeclared (first 
use in this function)
 request = kzalloc(sizeof(*regulatory_request), GFP_KERNEL);
   ^~
   net/wireless/reg.c: In function 'regulatory_hint_user':
   net/wireless/reg.c:2316:28: error: 'regulatory_request' undeclared (first 
use in this function)
 request = kzalloc(sizeof(*regulatory_request), GFP_KERNEL);
   ^~
   net/wireless/reg.c: In function 'regulatory_hint':
   net/wireless/reg.c:2388:28: error: 'regulatory_request' undeclared (first 
use in this function)
 request = kzalloc(sizeof(*regulatory_request), GFP_KERNEL);
   ^~

vim +/reg_regdb_apply_request +493 net/wireless/reg.c

   487  }
   488  }
   489  
   490  if (!regdom)
   491  return -ENODATA;
   492  
 > 493  request = kzalloc(sizeof(*reg_regdb_apply_request), GFP_KERNEL);
   494  if (!request)
   495  return -ENOMEM;
   496  

---
0-DAY kernel test infrastructureOpen Source Technology Center
https://lists.01.org/pipermail/kbuild-all   Intel Corporation


.config.gz
Description: application/gzip


Re: [PATCH 2/2] net: wireless: fix to uses struct

2016-12-21 Thread kbuild test robot
Hi Ozgur,

[auto build test ERROR on mac80211-next/master]
[also build test ERROR on v4.9 next-20161221]
[if your patch is applied to the wrong git tree, please drop us a note to help 
improve the system]

url:
https://github.com/0day-ci/linux/commits/Ozgur-Karatas/net-wireless-fixed-to-checkpatch-errors/20161222-125128
base:   https://git.kernel.org/pub/scm/linux/kernel/git/jberg/mac80211-next.git 
master
config: i386-allmodconfig (attached as .config)
compiler: gcc-6 (Debian 6.2.0-3) 6.2.0 20160901
reproduce:
# save the attached .config to linux build tree
make ARCH=i386 

All errors (new ones prefixed by >>):

   net/wireless/reg.c: In function 'reg_query_builtin':
>> net/wireless/reg.c:493:28: error: 'reg_regdb_apply_request' undeclared 
>> (first use in this function)
 request = kzalloc(sizeof(*reg_regdb_apply_request), GFP_KERNEL);
   ^~~
   net/wireless/reg.c:493:28: note: each undeclared identifier is reported only 
once for each function it appears in
   net/wireless/reg.c: In function 'regulatory_hint_core':
   net/wireless/reg.c:2294:28: error: 'regulatory_request' undeclared (first 
use in this function)
 request = kzalloc(sizeof(*regulatory_request), GFP_KERNEL);
   ^~
   net/wireless/reg.c: In function 'regulatory_hint_user':
   net/wireless/reg.c:2316:28: error: 'regulatory_request' undeclared (first 
use in this function)
 request = kzalloc(sizeof(*regulatory_request), GFP_KERNEL);
   ^~
   net/wireless/reg.c: In function 'regulatory_hint':
   net/wireless/reg.c:2388:28: error: 'regulatory_request' undeclared (first 
use in this function)
 request = kzalloc(sizeof(*regulatory_request), GFP_KERNEL);
   ^~

vim +/reg_regdb_apply_request +493 net/wireless/reg.c

   487  }
   488  }
   489  
   490  if (!regdom)
   491  return -ENODATA;
   492  
 > 493  request = kzalloc(sizeof(*reg_regdb_apply_request), GFP_KERNEL);
   494  if (!request)
   495  return -ENOMEM;
   496  

---
0-DAY kernel test infrastructureOpen Source Technology Center
https://lists.01.org/pipermail/kbuild-all   Intel Corporation


.config.gz
Description: application/gzip


RE: [PATCH] acpi: Fix format string type mistakes

2016-12-21 Thread Zheng, Lv
The original change doesn't handle ACPI_SIZE (which can be UINT64/UINT32 for 
different architectures) correctly.
I changed it when it is back ported.
This sounds like a different problem.

Thanks and best regards
Lv

> From: Moore, Robert
> Subject: RE: [PATCH] acpi: Fix format string type mistakes
> 
> These formatting changes will not compile under:
> 
> Gcc 4.4.5
> Gcc 5.4.0
> 
> The printf formatting stuff is very delicate, as ACPICA has to be compiled 
> under many different
> compilers.
> 
> Bob
> 
> > From: Zheng, Lv
> > Subject: RE: [PATCH] acpi: Fix format string type mistakes
> >
> > Hi, Kees and Emese
> >
> > The pull request is under rebasing.
> > So if you cannot reach the URL, find the commit here:
> > https://github.com/acpica/acpica/pull/196
> >
> > Thanks and best regards
> > Lv
> >
> > > From: Zheng, Lv
> > > Subject: RE: [PATCH] acpi: Fix format string type mistakes
> > >
> > > Hi, Kees and Emese
> > >
> > > I just helped to back port the commit here:
> > > https://github.com/acpica/acpica/pull/196/commits/5e64857f
> > > If you can see something wrong in it, please let me know.
> > >
> > > Thanks and best regards
> > > Lv
> > >
> > > > From: Devel [mailto:devel-boun...@acpica.org] On Behalf Of Zheng, Lv
> > > > Subject: Re: [Devel] [PATCH] acpi: Fix format string type mistakes
> > > >
> > > > Hi,
> > > >
> > > > > From: Kees Cook [mailto:keesc...@chromium.org]
> > > > > Subject: [PATCH] acpi: Fix format string type mistakes
> > > > >
> > > > > From: Emese Revfy 
> > > > >
> > > > > This adds the missing __printf attribute which allows compile time
> > > > > format string checking (and will be used by the coming initify gcc
> > > > > plugin). Additionally, this fixes the warnings exposed by the
> > attribute.
> > > > >
> > > > > Signed-off-by: Emese Revfy 
> > > > > [kees: split scsi/acpi, merged attr and fix, new commit messages]
> > > > > Signed-off-by: Kees Cook 
> > > > > ---
> > > > >  drivers/acpi/acpica/dbhistry.c |  2 +-
> > > > > drivers/acpi/acpica/dbinput.c  | 10 ++---
> > > > > drivers/acpi/acpica/dbstats.c  | 88
> > > > > +-
> > > > >  drivers/acpi/acpica/utdebug.c  |  2 +-
> > > > >  include/acpi/acpiosxf.h|  3 +-
> > > > >  5 files changed, 53 insertions(+), 52 deletions(-)
> > > > >
> > > > > diff --git a/drivers/acpi/acpica/dbhistry.c
> > > > > b/drivers/acpi/acpica/dbhistry.c index 46bd65d38df9..ec9da4830f6a
> > > > > 100644
> > > > > --- a/drivers/acpi/acpica/dbhistry.c
> > > > > +++ b/drivers/acpi/acpica/dbhistry.c
> > > > > @@ -155,7 +155,7 @@ void acpi_db_display_history(void)
> > > > >
> > > > >   for (i = 0; i < acpi_gbl_num_history; i++) {
> > > > >   if (acpi_gbl_history_buffer[history_index].command) {
> > > > > - acpi_os_printf("%3ld %s\n",
> > > > > + acpi_os_printf("%3u %s\n",
> > > > >
> > acpi_gbl_history_buffer[history_index].
> > > > >  cmd_num,
> > > > >
> > acpi_gbl_history_buffer[history_index].
> > > > > diff --git a/drivers/acpi/acpica/dbinput.c
> > > > > b/drivers/acpi/acpica/dbinput.c index 068214f9cc9d..43be06bdb790
> > > > > 100644
> > > > > --- a/drivers/acpi/acpica/dbinput.c
> > > > > +++ b/drivers/acpi/acpica/dbinput.c
> > > > > @@ -608,7 +608,7 @@ static u32 acpi_db_get_line(char
> > *input_buffer)
> > > > >   (acpi_gbl_db_parsed_buf, sizeof(acpi_gbl_db_parsed_buf),
> > > > >input_buffer)) {
> > > > >   acpi_os_printf
> > > > > - ("Buffer overflow while parsing input line (max %u
> > characters)\n",
> > > > > + ("Buffer overflow while parsing input line (max %lu
> > > > > +characters)\n",
> > > > >sizeof(acpi_gbl_db_parsed_buf));
> > > > >   return (0);
> > > > >   }
> > > > > @@ -864,24 +864,24 @@ acpi_db_command_dispatch(char *input_buffer,
> > > > >
> > > > >   if (param_count == 0) {
> > > > >   acpi_os_printf
> > > > > - ("Current debug level for file output is:
> > %8.8lX\n",
> > > > > + ("Current debug level for file output is:
> > %8.8X\n",
> > > > >acpi_gbl_db_debug_level);
> > > > >   acpi_os_printf
> > > > > - ("Current debug level for console output is:
> > %8.8lX\n",
> > > > > + ("Current debug level for console output is:
> > %8.8X\n",
> > > > >acpi_gbl_db_console_debug_level);
> > > > >   } else if (param_count == 2) {
> > > > >   temp = acpi_gbl_db_console_debug_level;
> > > > >   acpi_gbl_db_console_debug_level =
> > > > >   strtoul(acpi_gbl_db_args[1], NULL, 16);
> > > > >   acpi_os_printf
> > > > > - 

RE: [PATCH] acpi: Fix format string type mistakes

2016-12-21 Thread Zheng, Lv
The original change doesn't handle ACPI_SIZE (which can be UINT64/UINT32 for 
different architectures) correctly.
I changed it when it is back ported.
This sounds like a different problem.

Thanks and best regards
Lv

> From: Moore, Robert
> Subject: RE: [PATCH] acpi: Fix format string type mistakes
> 
> These formatting changes will not compile under:
> 
> Gcc 4.4.5
> Gcc 5.4.0
> 
> The printf formatting stuff is very delicate, as ACPICA has to be compiled 
> under many different
> compilers.
> 
> Bob
> 
> > From: Zheng, Lv
> > Subject: RE: [PATCH] acpi: Fix format string type mistakes
> >
> > Hi, Kees and Emese
> >
> > The pull request is under rebasing.
> > So if you cannot reach the URL, find the commit here:
> > https://github.com/acpica/acpica/pull/196
> >
> > Thanks and best regards
> > Lv
> >
> > > From: Zheng, Lv
> > > Subject: RE: [PATCH] acpi: Fix format string type mistakes
> > >
> > > Hi, Kees and Emese
> > >
> > > I just helped to back port the commit here:
> > > https://github.com/acpica/acpica/pull/196/commits/5e64857f
> > > If you can see something wrong in it, please let me know.
> > >
> > > Thanks and best regards
> > > Lv
> > >
> > > > From: Devel [mailto:devel-boun...@acpica.org] On Behalf Of Zheng, Lv
> > > > Subject: Re: [Devel] [PATCH] acpi: Fix format string type mistakes
> > > >
> > > > Hi,
> > > >
> > > > > From: Kees Cook [mailto:keesc...@chromium.org]
> > > > > Subject: [PATCH] acpi: Fix format string type mistakes
> > > > >
> > > > > From: Emese Revfy 
> > > > >
> > > > > This adds the missing __printf attribute which allows compile time
> > > > > format string checking (and will be used by the coming initify gcc
> > > > > plugin). Additionally, this fixes the warnings exposed by the
> > attribute.
> > > > >
> > > > > Signed-off-by: Emese Revfy 
> > > > > [kees: split scsi/acpi, merged attr and fix, new commit messages]
> > > > > Signed-off-by: Kees Cook 
> > > > > ---
> > > > >  drivers/acpi/acpica/dbhistry.c |  2 +-
> > > > > drivers/acpi/acpica/dbinput.c  | 10 ++---
> > > > > drivers/acpi/acpica/dbstats.c  | 88
> > > > > +-
> > > > >  drivers/acpi/acpica/utdebug.c  |  2 +-
> > > > >  include/acpi/acpiosxf.h|  3 +-
> > > > >  5 files changed, 53 insertions(+), 52 deletions(-)
> > > > >
> > > > > diff --git a/drivers/acpi/acpica/dbhistry.c
> > > > > b/drivers/acpi/acpica/dbhistry.c index 46bd65d38df9..ec9da4830f6a
> > > > > 100644
> > > > > --- a/drivers/acpi/acpica/dbhistry.c
> > > > > +++ b/drivers/acpi/acpica/dbhistry.c
> > > > > @@ -155,7 +155,7 @@ void acpi_db_display_history(void)
> > > > >
> > > > >   for (i = 0; i < acpi_gbl_num_history; i++) {
> > > > >   if (acpi_gbl_history_buffer[history_index].command) {
> > > > > - acpi_os_printf("%3ld %s\n",
> > > > > + acpi_os_printf("%3u %s\n",
> > > > >
> > acpi_gbl_history_buffer[history_index].
> > > > >  cmd_num,
> > > > >
> > acpi_gbl_history_buffer[history_index].
> > > > > diff --git a/drivers/acpi/acpica/dbinput.c
> > > > > b/drivers/acpi/acpica/dbinput.c index 068214f9cc9d..43be06bdb790
> > > > > 100644
> > > > > --- a/drivers/acpi/acpica/dbinput.c
> > > > > +++ b/drivers/acpi/acpica/dbinput.c
> > > > > @@ -608,7 +608,7 @@ static u32 acpi_db_get_line(char
> > *input_buffer)
> > > > >   (acpi_gbl_db_parsed_buf, sizeof(acpi_gbl_db_parsed_buf),
> > > > >input_buffer)) {
> > > > >   acpi_os_printf
> > > > > - ("Buffer overflow while parsing input line (max %u
> > characters)\n",
> > > > > + ("Buffer overflow while parsing input line (max %lu
> > > > > +characters)\n",
> > > > >sizeof(acpi_gbl_db_parsed_buf));
> > > > >   return (0);
> > > > >   }
> > > > > @@ -864,24 +864,24 @@ acpi_db_command_dispatch(char *input_buffer,
> > > > >
> > > > >   if (param_count == 0) {
> > > > >   acpi_os_printf
> > > > > - ("Current debug level for file output is:
> > %8.8lX\n",
> > > > > + ("Current debug level for file output is:
> > %8.8X\n",
> > > > >acpi_gbl_db_debug_level);
> > > > >   acpi_os_printf
> > > > > - ("Current debug level for console output is:
> > %8.8lX\n",
> > > > > + ("Current debug level for console output is:
> > %8.8X\n",
> > > > >acpi_gbl_db_console_debug_level);
> > > > >   } else if (param_count == 2) {
> > > > >   temp = acpi_gbl_db_console_debug_level;
> > > > >   acpi_gbl_db_console_debug_level =
> > > > >   strtoul(acpi_gbl_db_args[1], NULL, 16);
> > > > >   acpi_os_printf
> > > > > - ("Debug Level for console output was %8.8lX,
> > now %8.8lX\n",
> > > 

Re: [4.10, panic, regression] iscsi: null pointer deref at iscsi_tcp_segment_done+0x20d/0x2e0

2016-12-21 Thread Dave Chinner
On Wed, Dec 21, 2016 at 09:46:37PM -0800, Linus Torvalds wrote:
> On Wed, Dec 21, 2016 at 9:13 PM, Dave Chinner  wrote:
> >
> > There may be deeper issues. I just started running scalability tests
> > (e.g. 16-way fsmark create tests) and about a minute in I got a
> > directory corruption reported - something I hadn't seen in the dev
> > cycle at all.
> 
> By "in the dev cycle", do you mean your XFS changes, or have you been
> tracking the merge cycle at least for some testing?

I mean the three months leading up to the 4.10 merge, when all the
XFS changes were being tested against 4.9-rc kernels.

The iscsi problem showed up when I updated the base kernel from
4.9 to 4.10-current last week to test the pullreq I was going to
send you. I've been bust with other stuff until now, so I didn't
upgrade my working trees again until today in the hope the iscsi
problem had already been found and fixed.

> > I unmounted the fs, mkfs'd it again, ran the
> > workload again and about a minute in this fired:
> >
> > [628867.607417] [ cut here ]
> > [628867.608603] WARNING: CPU: 2 PID: 16925 at mm/workingset.c:461 
> > shadow_lru_isolate+0x171/0x220
> 
> Well, part of the changes during the merge window were the shadow
> entry tracking changes that came in through Andrew's tree. Adding
> Johannes Weiner to the participants.
> 
> > Now, this workload does not touch the page cache at all - it's
> > entirely an XFS metadata workload, so it should not really be
> > affecting the working set code.
> 
> Well, I suspect that anything that creates memory pressure will end up
> triggering the working set code, so ..
> 
> That said, obviously memory corruption could be involved and result in
> random issues too, but I wouldn't really expect that in this code.
> 
> It would probably be really useful to get more data points - is the
> problem reliably in this area, or is it going to be random and all
> over the place.

The iscsi problem is 100% reproducable. create a pair of iscsi luns,
mkfs, run xfstests on them. iscsi fails a second after xfstests mounts
the filesystems.

The test machine I'm having all these other problems on? stable and
steady as a rock using PMEM devices. Moment I go to use /dev/vdc
(i.e. run load/perf benchmarks) it starts falling over left, right
and center.

And I just smacked into this in the bulkstat phase of the benchmark
(mkfs, fsmark, xfs_repair, mount, bulkstat, find, grep, rm):

[ 2729.750563] BUG: Bad page state in process bstat  pfn:14945
[ 2729.751863] page:ea525140 count:-1 mapcount:0 mapping:  
(null) index:0x0
[ 2729.753763] flags: 0x4000()
[ 2729.754671] raw: 4000   

[ 2729.756469] raw: dead0100 dead0200  

[ 2729.758276] page dumped because: nonzero _refcount
[ 2729.759393] Modules linked in:
[ 2729.760137] CPU: 7 PID: 25902 Comm: bstat Tainted: GB   
4.9.0-dgc #18
[ 2729.761888] Hardware name: QEMU Standard PC (i440FX + PIIX, 1996), BIOS 
Debian-1.8.2-1 04/01/2014
[ 2729.763943] Call Trace:
[ 2729.764523]  
[ 2729.765004]  dump_stack+0x63/0x83
[ 2729.765784]  bad_page+0xc4/0x130
[ 2729.766552]  free_pages_check_bad+0x4f/0x70
[ 2729.767531]  free_pcppages_bulk+0x3c5/0x3d0
[ 2729.768513]  ? page_alloc_cpu_dead+0x30/0x30
[ 2729.769510]  drain_pages_zone+0x41/0x60
[ 2729.770417]  drain_pages+0x3e/0x60
[ 2729.771215]  drain_local_pages+0x24/0x30
[ 2729.772138]  flush_smp_call_function_queue+0x88/0x160
[ 2729.773317]  generic_smp_call_function_single_interrupt+0x13/0x30
[ 2729.774742]  smp_call_function_single_interrupt+0x27/0x40
[ 2729.776000]  smp_call_function_interrupt+0xe/0x10
[ 2729.777102]  call_function_interrupt+0x8e/0xa0
[ 2729.778147] RIP: 0010:delay_tsc+0x41/0x90
[ 2729.779085] RSP: 0018:c9000f0cf500 EFLAGS: 0202 ORIG_RAX: 
ff03
[ 2729.780852] RAX: 77541291 RBX: 88008b5efe40 RCX: 002e
[ 2729.782514] RDX: 0577 RSI: 05541291 RDI: 0001
[ 2729.784167] RBP: c9000f0cf500 R08: 0007 R09: c9000f0cf678
[ 2729.785818] R10: 0006 R11: 1000 R12: 0061
[ 2729.787480] R13: 0001 R14: 83214e30 R15: 0080
[ 2729.789124]  
[ 2729.789626]  __delay+0xf/0x20
[ 2729.790333]  do_raw_spin_lock+0x8c/0x160
[ 2729.791255]  _raw_spin_lock+0x15/0x20
[ 2729.792112]  list_lru_add+0x1a/0x70
[ 2729.792932]  xfs_buf_rele+0x3e7/0x410
[ 2729.793792]  xfs_buftarg_shrink_scan+0x6b/0x80
[ 2729.794841]  shrink_slab.part.65.constprop.86+0x1dc/0x410
[ 2729.796099]  shrink_node+0x57/0x90
[ 2729.796905]  do_try_to_free_pages+0xdd/0x230
[ 2729.797914]  try_to_free_pages+0xce/0x1a0
[ 2729.798852]  __alloc_pages_slowpath+0x2df/0x960
[ 2729.799908]  __alloc_pages_nodemask+0x24b/0x290
[ 2729.800963]  new_slab+0x2ac/0x380
[ 2729.801743]  ___slab_alloc.constprop.82+0x336/0x440
[ 

Re: [4.10, panic, regression] iscsi: null pointer deref at iscsi_tcp_segment_done+0x20d/0x2e0

2016-12-21 Thread Dave Chinner
On Wed, Dec 21, 2016 at 09:46:37PM -0800, Linus Torvalds wrote:
> On Wed, Dec 21, 2016 at 9:13 PM, Dave Chinner  wrote:
> >
> > There may be deeper issues. I just started running scalability tests
> > (e.g. 16-way fsmark create tests) and about a minute in I got a
> > directory corruption reported - something I hadn't seen in the dev
> > cycle at all.
> 
> By "in the dev cycle", do you mean your XFS changes, or have you been
> tracking the merge cycle at least for some testing?

I mean the three months leading up to the 4.10 merge, when all the
XFS changes were being tested against 4.9-rc kernels.

The iscsi problem showed up when I updated the base kernel from
4.9 to 4.10-current last week to test the pullreq I was going to
send you. I've been bust with other stuff until now, so I didn't
upgrade my working trees again until today in the hope the iscsi
problem had already been found and fixed.

> > I unmounted the fs, mkfs'd it again, ran the
> > workload again and about a minute in this fired:
> >
> > [628867.607417] [ cut here ]
> > [628867.608603] WARNING: CPU: 2 PID: 16925 at mm/workingset.c:461 
> > shadow_lru_isolate+0x171/0x220
> 
> Well, part of the changes during the merge window were the shadow
> entry tracking changes that came in through Andrew's tree. Adding
> Johannes Weiner to the participants.
> 
> > Now, this workload does not touch the page cache at all - it's
> > entirely an XFS metadata workload, so it should not really be
> > affecting the working set code.
> 
> Well, I suspect that anything that creates memory pressure will end up
> triggering the working set code, so ..
> 
> That said, obviously memory corruption could be involved and result in
> random issues too, but I wouldn't really expect that in this code.
> 
> It would probably be really useful to get more data points - is the
> problem reliably in this area, or is it going to be random and all
> over the place.

The iscsi problem is 100% reproducable. create a pair of iscsi luns,
mkfs, run xfstests on them. iscsi fails a second after xfstests mounts
the filesystems.

The test machine I'm having all these other problems on? stable and
steady as a rock using PMEM devices. Moment I go to use /dev/vdc
(i.e. run load/perf benchmarks) it starts falling over left, right
and center.

And I just smacked into this in the bulkstat phase of the benchmark
(mkfs, fsmark, xfs_repair, mount, bulkstat, find, grep, rm):

[ 2729.750563] BUG: Bad page state in process bstat  pfn:14945
[ 2729.751863] page:ea525140 count:-1 mapcount:0 mapping:  
(null) index:0x0
[ 2729.753763] flags: 0x4000()
[ 2729.754671] raw: 4000   

[ 2729.756469] raw: dead0100 dead0200  

[ 2729.758276] page dumped because: nonzero _refcount
[ 2729.759393] Modules linked in:
[ 2729.760137] CPU: 7 PID: 25902 Comm: bstat Tainted: GB   
4.9.0-dgc #18
[ 2729.761888] Hardware name: QEMU Standard PC (i440FX + PIIX, 1996), BIOS 
Debian-1.8.2-1 04/01/2014
[ 2729.763943] Call Trace:
[ 2729.764523]  
[ 2729.765004]  dump_stack+0x63/0x83
[ 2729.765784]  bad_page+0xc4/0x130
[ 2729.766552]  free_pages_check_bad+0x4f/0x70
[ 2729.767531]  free_pcppages_bulk+0x3c5/0x3d0
[ 2729.768513]  ? page_alloc_cpu_dead+0x30/0x30
[ 2729.769510]  drain_pages_zone+0x41/0x60
[ 2729.770417]  drain_pages+0x3e/0x60
[ 2729.771215]  drain_local_pages+0x24/0x30
[ 2729.772138]  flush_smp_call_function_queue+0x88/0x160
[ 2729.773317]  generic_smp_call_function_single_interrupt+0x13/0x30
[ 2729.774742]  smp_call_function_single_interrupt+0x27/0x40
[ 2729.776000]  smp_call_function_interrupt+0xe/0x10
[ 2729.777102]  call_function_interrupt+0x8e/0xa0
[ 2729.778147] RIP: 0010:delay_tsc+0x41/0x90
[ 2729.779085] RSP: 0018:c9000f0cf500 EFLAGS: 0202 ORIG_RAX: 
ff03
[ 2729.780852] RAX: 77541291 RBX: 88008b5efe40 RCX: 002e
[ 2729.782514] RDX: 0577 RSI: 05541291 RDI: 0001
[ 2729.784167] RBP: c9000f0cf500 R08: 0007 R09: c9000f0cf678
[ 2729.785818] R10: 0006 R11: 1000 R12: 0061
[ 2729.787480] R13: 0001 R14: 83214e30 R15: 0080
[ 2729.789124]  
[ 2729.789626]  __delay+0xf/0x20
[ 2729.790333]  do_raw_spin_lock+0x8c/0x160
[ 2729.791255]  _raw_spin_lock+0x15/0x20
[ 2729.792112]  list_lru_add+0x1a/0x70
[ 2729.792932]  xfs_buf_rele+0x3e7/0x410
[ 2729.793792]  xfs_buftarg_shrink_scan+0x6b/0x80
[ 2729.794841]  shrink_slab.part.65.constprop.86+0x1dc/0x410
[ 2729.796099]  shrink_node+0x57/0x90
[ 2729.796905]  do_try_to_free_pages+0xdd/0x230
[ 2729.797914]  try_to_free_pages+0xce/0x1a0
[ 2729.798852]  __alloc_pages_slowpath+0x2df/0x960
[ 2729.799908]  __alloc_pages_nodemask+0x24b/0x290
[ 2729.800963]  new_slab+0x2ac/0x380
[ 2729.801743]  ___slab_alloc.constprop.82+0x336/0x440
[ 2729.802890]  ? 

Detecting kprobes generated code addresses

2016-12-21 Thread Josh Poimboeuf
Hi Masami,

I would like to make __kernel_text_address() be able to detect whether
an address belongs to code which was generated by kprobes.  As far as I
can tell, that information seems to be in the 'pages' lists of
kprobe_insn_slots and kprobe_optinsn_slots.  But they seem to be
protected by mutexes.  Do you know if there's a sleep-free way to access
that
protected 

-- 
Josh


Detecting kprobes generated code addresses

2016-12-21 Thread Josh Poimboeuf
Hi Masami,

I would like to make __kernel_text_address() be able to detect whether
an address belongs to code which was generated by kprobes.  As far as I
can tell, that information seems to be in the 'pages' lists of
kprobe_insn_slots and kprobe_optinsn_slots.  But they seem to be
protected by mutexes.  Do you know if there's a sleep-free way to access
that
protected 

-- 
Josh


[PATCH] ib umem: bug: put pid back before return from error path

2016-12-21 Thread Kenneth Lee
I catched this bug when reading the code. I'm sorry I have no hardware to test
it. But it is abviously a bug.

Signed-off-by: Kenneth Lee 
---
 drivers/infiniband/core/umem.c | 2 ++
 1 file changed, 2 insertions(+)

diff --git a/drivers/infiniband/core/umem.c b/drivers/infiniband/core/umem.c
index 1e62a5f..4609b92 100644
--- a/drivers/infiniband/core/umem.c
+++ b/drivers/infiniband/core/umem.c
@@ -134,6 +134,7 @@ struct ib_umem *ib_umem_get(struct ib_ucontext *context, 
unsigned long addr,
 IB_ACCESS_REMOTE_ATOMIC | IB_ACCESS_MW_BIND));
 
if (access & IB_ACCESS_ON_DEMAND) {
+   put_pid(umem->pid);
ret = ib_umem_odp_get(context, umem);
if (ret) {
kfree(umem);
@@ -149,6 +150,7 @@ struct ib_umem *ib_umem_get(struct ib_ucontext *context, 
unsigned long addr,
 
page_list = (struct page **) __get_free_page(GFP_KERNEL);
if (!page_list) {
+   put_pid(umem->pid);
kfree(umem);
return ERR_PTR(-ENOMEM);
}
-- 
1.9.1



[PATCH] ib umem: bug: put pid back before return from error path

2016-12-21 Thread Kenneth Lee
I catched this bug when reading the code. I'm sorry I have no hardware to test
it. But it is abviously a bug.

Signed-off-by: Kenneth Lee 
---
 drivers/infiniband/core/umem.c | 2 ++
 1 file changed, 2 insertions(+)

diff --git a/drivers/infiniband/core/umem.c b/drivers/infiniband/core/umem.c
index 1e62a5f..4609b92 100644
--- a/drivers/infiniband/core/umem.c
+++ b/drivers/infiniband/core/umem.c
@@ -134,6 +134,7 @@ struct ib_umem *ib_umem_get(struct ib_ucontext *context, 
unsigned long addr,
 IB_ACCESS_REMOTE_ATOMIC | IB_ACCESS_MW_BIND));
 
if (access & IB_ACCESS_ON_DEMAND) {
+   put_pid(umem->pid);
ret = ib_umem_odp_get(context, umem);
if (ret) {
kfree(umem);
@@ -149,6 +150,7 @@ struct ib_umem *ib_umem_get(struct ib_ucontext *context, 
unsigned long addr,
 
page_list = (struct page **) __get_free_page(GFP_KERNEL);
if (!page_list) {
+   put_pid(umem->pid);
kfree(umem);
return ERR_PTR(-ENOMEM);
}
-- 
1.9.1



Detecting kprobes generated code addresses

2016-12-21 Thread Josh Poimboeuf
Hi Masami,

I would like to make __kernel_text_address() be able to detect whether
an address belongs to code which was generated by kprobes.  As far as I
can tell, that information seems to be in the 'pages' lists of
kprobe_insn_slots and kprobe_optinsn_slots.  But they seem to be
protected by mutexes.  Do you know if there's a sleep-free way to access
that information?

-- 
Josh


Detecting kprobes generated code addresses

2016-12-21 Thread Josh Poimboeuf
Hi Masami,

I would like to make __kernel_text_address() be able to detect whether
an address belongs to code which was generated by kprobes.  As far as I
can tell, that information seems to be in the 'pages' lists of
kprobe_insn_slots and kprobe_optinsn_slots.  But they seem to be
protected by mutexes.  Do you know if there's a sleep-free way to access
that information?

-- 
Josh


Re: [4.10, panic, regression] iscsi: null pointer deref at iscsi_tcp_segment_done+0x20d/0x2e0

2016-12-21 Thread Christoph Hellwig
On Thu, Dec 22, 2016 at 05:30:46PM +1100, Dave Chinner wrote:
> > For "normal" bios the for_each_segment loop iterates over bi_vcnt,
> > so it will be ignored anyway.  That being said both I and the lists
> > got CCed halfway through the thread and I haven't seen the original
> > report, so I'm not really sure what's going on here anyway.
> 
> http://www.gossamer-threads.com/lists/linux/kernel/2587485

This doesn't look like the discard changes, but if Chris wants to test
without them f9d03f96b988 reverts cleanly.


Re: [4.10, panic, regression] iscsi: null pointer deref at iscsi_tcp_segment_done+0x20d/0x2e0

2016-12-21 Thread Christoph Hellwig
On Thu, Dec 22, 2016 at 05:30:46PM +1100, Dave Chinner wrote:
> > For "normal" bios the for_each_segment loop iterates over bi_vcnt,
> > so it will be ignored anyway.  That being said both I and the lists
> > got CCed halfway through the thread and I haven't seen the original
> > report, so I'm not really sure what's going on here anyway.
> 
> http://www.gossamer-threads.com/lists/linux/kernel/2587485

This doesn't look like the discard changes, but if Chris wants to test
without them f9d03f96b988 reverts cleanly.


Re: [PATCH v5 0/6] inherit dma configuration from parent dev

2016-12-21 Thread Vivek Gautam
On Thu, Nov 17, 2016 at 5:13 PM, Sriram Dash  wrote:
> For xhci-hcd platform device, all the DMA parameters are not
> configured properly, notably dma ops for dwc3 devices.
>
> The idea here is that you pass in the parent of_node along
> with the child device pointer, so it would behave exactly
> like the parent already does. The difference is that it also
> handles all the other attributes besides the mask.
>
> Arnd Bergmann (6):
>   usb: separate out sysdev pointer from usb_bus
>   usb: chipidea: use bus->sysdev for DMA configuration
>   usb: ehci: fsl: use bus->sysdev for DMA configuration
>   usb: xhci: use bus->sysdev for DMA configuration
>   usb: dwc3: use bus->sysdev for DMA configuration
>   usb: dwc3: Do not set dma coherent mask

Tested patches 1, 4 & 5 on db820c platform with required set of patches [1] for
phy.

Tested-by: Vivek Gautam 
for the above mentioned patches.

[1] https://lkml.org/lkml/2016/12/20/392


-- 
Qualcomm Innovation Center, Inc. is a member of Code Aurora Forum,
a Linux Foundation Collaborative Project


Re: [PATCH v5 0/6] inherit dma configuration from parent dev

2016-12-21 Thread Vivek Gautam
On Thu, Nov 17, 2016 at 5:13 PM, Sriram Dash  wrote:
> For xhci-hcd platform device, all the DMA parameters are not
> configured properly, notably dma ops for dwc3 devices.
>
> The idea here is that you pass in the parent of_node along
> with the child device pointer, so it would behave exactly
> like the parent already does. The difference is that it also
> handles all the other attributes besides the mask.
>
> Arnd Bergmann (6):
>   usb: separate out sysdev pointer from usb_bus
>   usb: chipidea: use bus->sysdev for DMA configuration
>   usb: ehci: fsl: use bus->sysdev for DMA configuration
>   usb: xhci: use bus->sysdev for DMA configuration
>   usb: dwc3: use bus->sysdev for DMA configuration
>   usb: dwc3: Do not set dma coherent mask

Tested patches 1, 4 & 5 on db820c platform with required set of patches [1] for
phy.

Tested-by: Vivek Gautam 
for the above mentioned patches.

[1] https://lkml.org/lkml/2016/12/20/392


-- 
Qualcomm Innovation Center, Inc. is a member of Code Aurora Forum,
a Linux Foundation Collaborative Project


[GIT PULL] SELinux fix for 4.10

2016-12-21 Thread James Morris
Please pull.

>From Paul: "A small SELinux patch to fix some clang/llvm compiler warnings 
and ensure the tools under scripts work well in the face of kernel 
changes."


The following changes since commit 52bce91165e5f2db422b2b972e83d389e5e4725c:

  splice: reinstate SIGPIPE/EPIPE handling (2016-12-21 10:59:34 -0800)

are available in the git repository at:
  git://git.kernel.org/pub/scm/linux/kernel/git/jmorris/linux-security.git 
for-linus

James Morris (1):
  Merge branch 'stable-4.10' of 
git://git.infradead.org/users/pcmoore/selinux into for-linus

Paul Moore (1):
  selinux: use the kernel headers when building scripts/selinux

 scripts/selinux/genheaders/Makefile |4 +++-
 scripts/selinux/genheaders/genheaders.c |4 
 scripts/selinux/mdp/Makefile|4 +++-
 scripts/selinux/mdp/mdp.c   |4 
 security/selinux/include/classmap.h |2 ++
 5 files changed, 16 insertions(+), 2 deletions(-)

---

commit bfc5e3a6af397dcf9c99a6c1872458e7867c4680
Author: Paul Moore 
Date:   Wed Dec 21 10:39:25 2016 -0500

selinux: use the kernel headers when building scripts/selinux

Commit 3322d0d64f4e ("selinux: keep SELinux in sync with new capability
definitions") added a check on the defined capabilities without
explicitly including the capability header file which caused problems
when building genheaders for users of clang/llvm.  Resolve this by
using the kernel headers when building genheaders, which is arguably
the right thing to do regardless, and explicitly including the
kernel's capability.h header file in classmap.h.  We also update the
mdp build, even though it wasn't causing an error we really should
be using the headers from the kernel we are building.

Reported-by: Nicolas Iooss 
Signed-off-by: Paul Moore 

diff --git a/scripts/selinux/genheaders/Makefile 
b/scripts/selinux/genheaders/Makefile
index 1d1ac51..6fc2b87 100644
--- a/scripts/selinux/genheaders/Makefile
+++ b/scripts/selinux/genheaders/Makefile
@@ -1,4 +1,6 @@
 hostprogs-y:= genheaders
-HOST_EXTRACFLAGS += -Isecurity/selinux/include
+HOST_EXTRACFLAGS += \
+   -I$(srctree)/include/uapi -I$(srctree)/include \
+   -I$(srctree)/security/selinux/include
 
 always := $(hostprogs-y)
diff --git a/scripts/selinux/genheaders/genheaders.c 
b/scripts/selinux/genheaders/genheaders.c
index 539855f..f4dd41f 100644
--- a/scripts/selinux/genheaders/genheaders.c
+++ b/scripts/selinux/genheaders/genheaders.c
@@ -1,3 +1,7 @@
+
+/* NOTE: we really do want to use the kernel headers here */
+#define __EXPORTED_HEADERS__
+
 #include 
 #include 
 #include 
diff --git a/scripts/selinux/mdp/Makefile b/scripts/selinux/mdp/Makefile
index dba7eff..d6a83ca 100644
--- a/scripts/selinux/mdp/Makefile
+++ b/scripts/selinux/mdp/Makefile
@@ -1,5 +1,7 @@
 hostprogs-y:= mdp
-HOST_EXTRACFLAGS += -Isecurity/selinux/include
+HOST_EXTRACFLAGS += \
+   -I$(srctree)/include/uapi -I$(srctree)/include \
+   -I$(srctree)/security/selinux/include
 
 always := $(hostprogs-y)
 clean-files:= policy.* file_contexts
diff --git a/scripts/selinux/mdp/mdp.c b/scripts/selinux/mdp/mdp.c
index e10beb1..c29fa4a 100644
--- a/scripts/selinux/mdp/mdp.c
+++ b/scripts/selinux/mdp/mdp.c
@@ -24,6 +24,10 @@
  * Authors: Serge E. Hallyn 
  */
 
+
+/* NOTE: we really do want to use the kernel headers here */
+#define __EXPORTED_HEADERS__
+
 #include 
 #include 
 #include 
diff --git a/security/selinux/include/classmap.h 
b/security/selinux/include/classmap.h
index e2d4ad3..13ae49b 100644
--- a/security/selinux/include/classmap.h
+++ b/security/selinux/include/classmap.h
@@ -1,3 +1,5 @@
+#include 
+
 #define COMMON_FILE_SOCK_PERMS "ioctl", "read", "write", "create", \
 "getattr", "setattr", "lock", "relabelfrom", "relabelto", "append"
 


[GIT PULL] SELinux fix for 4.10

2016-12-21 Thread James Morris
Please pull.

>From Paul: "A small SELinux patch to fix some clang/llvm compiler warnings 
and ensure the tools under scripts work well in the face of kernel 
changes."


The following changes since commit 52bce91165e5f2db422b2b972e83d389e5e4725c:

  splice: reinstate SIGPIPE/EPIPE handling (2016-12-21 10:59:34 -0800)

are available in the git repository at:
  git://git.kernel.org/pub/scm/linux/kernel/git/jmorris/linux-security.git 
for-linus

James Morris (1):
  Merge branch 'stable-4.10' of 
git://git.infradead.org/users/pcmoore/selinux into for-linus

Paul Moore (1):
  selinux: use the kernel headers when building scripts/selinux

 scripts/selinux/genheaders/Makefile |4 +++-
 scripts/selinux/genheaders/genheaders.c |4 
 scripts/selinux/mdp/Makefile|4 +++-
 scripts/selinux/mdp/mdp.c   |4 
 security/selinux/include/classmap.h |2 ++
 5 files changed, 16 insertions(+), 2 deletions(-)

---

commit bfc5e3a6af397dcf9c99a6c1872458e7867c4680
Author: Paul Moore 
Date:   Wed Dec 21 10:39:25 2016 -0500

selinux: use the kernel headers when building scripts/selinux

Commit 3322d0d64f4e ("selinux: keep SELinux in sync with new capability
definitions") added a check on the defined capabilities without
explicitly including the capability header file which caused problems
when building genheaders for users of clang/llvm.  Resolve this by
using the kernel headers when building genheaders, which is arguably
the right thing to do regardless, and explicitly including the
kernel's capability.h header file in classmap.h.  We also update the
mdp build, even though it wasn't causing an error we really should
be using the headers from the kernel we are building.

Reported-by: Nicolas Iooss 
Signed-off-by: Paul Moore 

diff --git a/scripts/selinux/genheaders/Makefile 
b/scripts/selinux/genheaders/Makefile
index 1d1ac51..6fc2b87 100644
--- a/scripts/selinux/genheaders/Makefile
+++ b/scripts/selinux/genheaders/Makefile
@@ -1,4 +1,6 @@
 hostprogs-y:= genheaders
-HOST_EXTRACFLAGS += -Isecurity/selinux/include
+HOST_EXTRACFLAGS += \
+   -I$(srctree)/include/uapi -I$(srctree)/include \
+   -I$(srctree)/security/selinux/include
 
 always := $(hostprogs-y)
diff --git a/scripts/selinux/genheaders/genheaders.c 
b/scripts/selinux/genheaders/genheaders.c
index 539855f..f4dd41f 100644
--- a/scripts/selinux/genheaders/genheaders.c
+++ b/scripts/selinux/genheaders/genheaders.c
@@ -1,3 +1,7 @@
+
+/* NOTE: we really do want to use the kernel headers here */
+#define __EXPORTED_HEADERS__
+
 #include 
 #include 
 #include 
diff --git a/scripts/selinux/mdp/Makefile b/scripts/selinux/mdp/Makefile
index dba7eff..d6a83ca 100644
--- a/scripts/selinux/mdp/Makefile
+++ b/scripts/selinux/mdp/Makefile
@@ -1,5 +1,7 @@
 hostprogs-y:= mdp
-HOST_EXTRACFLAGS += -Isecurity/selinux/include
+HOST_EXTRACFLAGS += \
+   -I$(srctree)/include/uapi -I$(srctree)/include \
+   -I$(srctree)/security/selinux/include
 
 always := $(hostprogs-y)
 clean-files:= policy.* file_contexts
diff --git a/scripts/selinux/mdp/mdp.c b/scripts/selinux/mdp/mdp.c
index e10beb1..c29fa4a 100644
--- a/scripts/selinux/mdp/mdp.c
+++ b/scripts/selinux/mdp/mdp.c
@@ -24,6 +24,10 @@
  * Authors: Serge E. Hallyn 
  */
 
+
+/* NOTE: we really do want to use the kernel headers here */
+#define __EXPORTED_HEADERS__
+
 #include 
 #include 
 #include 
diff --git a/security/selinux/include/classmap.h 
b/security/selinux/include/classmap.h
index e2d4ad3..13ae49b 100644
--- a/security/selinux/include/classmap.h
+++ b/security/selinux/include/classmap.h
@@ -1,3 +1,5 @@
+#include 
+
 #define COMMON_FILE_SOCK_PERMS "ioctl", "read", "write", "create", \
 "getattr", "setattr", "lock", "relabelfrom", "relabelto", "append"
 


Re: [4.10, panic, regression] iscsi: null pointer deref at iscsi_tcp_segment_done+0x20d/0x2e0

2016-12-21 Thread Dave Chinner
On Thu, Dec 22, 2016 at 07:18:27AM +0100, Christoph Hellwig wrote:
> On Wed, Dec 21, 2016 at 03:19:15PM -0800, Linus Torvalds wrote:
> > Looking around a bit, the only even halfway suspicious scatterlist
> > initialization thing I see is commit f9d03f96b988 ("block: improve
> > handling of the magic discard payload") which used to have a magic
> > hack wrt !bio->bi_vcnt, and that got removed. See __blk_bios_map_sg(),
> > now it does __blk_bvec_map_sg() instead.
> 
> But that check was only for discard (and discard-like) bios which
> had the maic single page that sometimes was unused attached.
> 
> For "normal" bios the for_each_segment loop iterates over bi_vcnt,
> so it will be ignored anyway.  That being said both I and the lists
> got CCed halfway through the thread and I haven't seen the original
> report, so I'm not really sure what's going on here anyway.

http://www.gossamer-threads.com/lists/linux/kernel/2587485

Cheers,

Dave.
-- 
Dave Chinner
da...@fromorbit.com


Re: Patch to include/linux/kernel.h breaks 3rd party modules.

2016-12-21 Thread Christoph Hellwig
On Wed, Dec 21, 2016 at 03:42:05PM -0500, Valdis Kletnieks wrote:
> Yes, I know that usually out-of-tree modules are on their own.
> However, this one may require a rethink..
> 
> (Sorry for not catching this sooner, I hadn't tried to deal with the
> affected module since this patch hit linux-next in next-20161128)

So fix your out of tree module.


Re: [4.10, panic, regression] iscsi: null pointer deref at iscsi_tcp_segment_done+0x20d/0x2e0

2016-12-21 Thread Dave Chinner
On Thu, Dec 22, 2016 at 07:18:27AM +0100, Christoph Hellwig wrote:
> On Wed, Dec 21, 2016 at 03:19:15PM -0800, Linus Torvalds wrote:
> > Looking around a bit, the only even halfway suspicious scatterlist
> > initialization thing I see is commit f9d03f96b988 ("block: improve
> > handling of the magic discard payload") which used to have a magic
> > hack wrt !bio->bi_vcnt, and that got removed. See __blk_bios_map_sg(),
> > now it does __blk_bvec_map_sg() instead.
> 
> But that check was only for discard (and discard-like) bios which
> had the maic single page that sometimes was unused attached.
> 
> For "normal" bios the for_each_segment loop iterates over bi_vcnt,
> so it will be ignored anyway.  That being said both I and the lists
> got CCed halfway through the thread and I haven't seen the original
> report, so I'm not really sure what's going on here anyway.

http://www.gossamer-threads.com/lists/linux/kernel/2587485

Cheers,

Dave.
-- 
Dave Chinner
da...@fromorbit.com


Re: Patch to include/linux/kernel.h breaks 3rd party modules.

2016-12-21 Thread Christoph Hellwig
On Wed, Dec 21, 2016 at 03:42:05PM -0500, Valdis Kletnieks wrote:
> Yes, I know that usually out-of-tree modules are on their own.
> However, this one may require a rethink..
> 
> (Sorry for not catching this sooner, I hadn't tried to deal with the
> affected module since this patch hit linux-next in next-20161128)

So fix your out of tree module.


Re: [4.10, panic, regression] iscsi: null pointer deref at iscsi_tcp_segment_done+0x20d/0x2e0

2016-12-21 Thread Dave Chinner
On Thu, Dec 22, 2016 at 04:13:22PM +1100, Dave Chinner wrote:
> On Wed, Dec 21, 2016 at 04:13:03PM -0800, Chris Leech wrote:
> > On Wed, Dec 21, 2016 at 03:19:15PM -0800, Linus Torvalds wrote:
> > > Hi,
> > > 
> > > On Wed, Dec 21, 2016 at 2:16 PM, Dave Chinner  wrote:
> > > > On Fri, Dec 16, 2016 at 10:59:06AM -0800, Chris Leech wrote:
> > > >> Thanks Dave,
> > > >>
> > > >> I'm hitting a bug at scatterlist.h:140 before I even get any iSCSI
> > > >> modules loaded (virtio block) so there's something else going on in the
> > > >> current merge window.  I'll keep an eye on it and make sure there's
> > > >> nothing iSCSI needs fixing for.
> > > >
> > > > OK, so before this slips through the cracks.
> > > >
> > > > Linus - your tree as of a few minutes ago still panics immediately
> > > > when starting xfstests on iscsi devices. It appears to be a
> > > > scatterlist corruption and not an iscsi problem, so the iscsi guys
> > > > seem to have bounced it and no-one is looking at it.
> > > 
> > > Hmm. There's not much to go by.
> > > 
> > > Can somebody in iscsi-land please try to just bisect it - I'm not
> > > seeing a lot of clues to where this comes from otherwise.
> > 
> > Yeah, my hopes of this being quickly resolved by someone else didn't
> > work out and whatever is going on in that test VM is looking like a
> > different kind of odd.  I'm saving that off for later, and seeing if I
> > can't be a bisect on the iSCSI issue.
> 
> There may be deeper issues. I just started running scalability tests
> (e.g. 16-way fsmark create tests) and about a minute in I got a
> directory corruption reported - something I hadn't seen in the dev
> cycle at all. I unmounted the fs, mkfs'd it again, ran the
> workload again and about a minute in this fired:
> 
> [628867.607417] [ cut here ]
> [628867.608603] WARNING: CPU: 2 PID: 16925 at mm/workingset.c:461 
> shadow_lru_isolate+0x171/0x220
> [628867.610702] Modules linked in:
> [628867.611375] CPU: 2 PID: 16925 Comm: kworker/2:97 Tainted: GW  
>  4.9.0-dgc #18
> [628867.613382] Hardware name: QEMU Standard PC (i440FX + PIIX, 1996), BIOS 
> Debian-1.8.2-1 04/01/2014
> [628867.616179] Workqueue: events rht_deferred_worker
> [628867.632422] Call Trace:
> [628867.634691]  dump_stack+0x63/0x83
> [628867.637937]  __warn+0xcb/0xf0
> [628867.641359]  warn_slowpath_null+0x1d/0x20
> [628867.643362]  shadow_lru_isolate+0x171/0x220
> [628867.644627]  __list_lru_walk_one.isra.11+0x79/0x110
> [628867.645780]  ? __list_lru_init+0x70/0x70
> [628867.646628]  list_lru_walk_one+0x17/0x20
> [628867.647488]  scan_shadow_nodes+0x34/0x50
> [628867.648358]  shrink_slab.part.65.constprop.86+0x1dc/0x410
> [628867.649506]  shrink_node+0x57/0x90
> [628867.650233]  do_try_to_free_pages+0xdd/0x230
> [628867.651157]  try_to_free_pages+0xce/0x1a0
> [628867.652342]  __alloc_pages_slowpath+0x2df/0x960
> [628867.653332]  ? __might_sleep+0x4a/0x80
> [628867.654148]  __alloc_pages_nodemask+0x24b/0x290
> [628867.655237]  kmalloc_order+0x21/0x50
> [628867.656016]  kmalloc_order_trace+0x24/0xc0
> [628867.656878]  __kmalloc+0x17d/0x1d0
> [628867.657644]  bucket_table_alloc+0x195/0x1d0
> [628867.658564]  ? __might_sleep+0x4a/0x80
> [628867.659449]  rht_deferred_worker+0x287/0x3c0
> [628867.660366]  ? _raw_spin_unlock_irq+0xe/0x30
> [628867.661294]  process_one_work+0x1de/0x4d0
> [628867.662208]  worker_thread+0x4b/0x4f0
> [628867.662990]  kthread+0x10c/0x140
> [628867.663687]  ? process_one_work+0x4d0/0x4d0
> [628867.664564]  ? kthread_create_on_node+0x40/0x40
> [628867.665523]  ret_from_fork+0x25/0x30
> [628867.666317] ---[ end trace 7c38634006a9955e ]---
> 
> Now, this workload does not touch the page cache at all - it's
> entirely an XFS metadata workload, so it should not really be
> affecting the working set code.

The system back up, and I haven't reproduced this problem yet.
However, benchmark results are way off where they should be, and at
times the performance is utterly abysmal. The XFS for-next tree
based on the 4.9 kernel shows none of these problems, so I don't
think there's an XFS problem here. Workload is the same 16-way
fsmark workload that I've been using for years as a performance
regression test.

The workload normally averages around 230k files/s - i'm seeing
and average of ~175k files/s on you current kernel. And there are
periods where performance just completely tanks:

#  ./fs_mark  -D  1  -S0  -n  10  -s  0  -L  32  -d  /mnt/scratch/0  -d 
 /mnt/scratch/1  -d  /mnt/scratch/2  -d  /mnt/scratch/3  -d  /mnt/scratch/4  -d 
 /mnt/scratch/5  -d  /mnt/scratch/6  -d  /mnt/scratch/7  -d  /mnt/scratch/8  -d 
 /mnt/scratch/9  -d  /mnt/scratch/10  -d  /mnt/scratch/11  -d  /mnt/scratch/12  
-d  /mnt/scratch/13  -d  /mnt/scratch/14  -d  /mnt/scratch/15
#   Version 3.3, 16 thread(s) starting at Thu Dec 22 16:29:20 2016
#   Sync method: NO SYNC: Test does not issue sync() or fsync() calls.
#   Directories:  Time based hash 

Re: [4.10, panic, regression] iscsi: null pointer deref at iscsi_tcp_segment_done+0x20d/0x2e0

2016-12-21 Thread Dave Chinner
On Thu, Dec 22, 2016 at 04:13:22PM +1100, Dave Chinner wrote:
> On Wed, Dec 21, 2016 at 04:13:03PM -0800, Chris Leech wrote:
> > On Wed, Dec 21, 2016 at 03:19:15PM -0800, Linus Torvalds wrote:
> > > Hi,
> > > 
> > > On Wed, Dec 21, 2016 at 2:16 PM, Dave Chinner  wrote:
> > > > On Fri, Dec 16, 2016 at 10:59:06AM -0800, Chris Leech wrote:
> > > >> Thanks Dave,
> > > >>
> > > >> I'm hitting a bug at scatterlist.h:140 before I even get any iSCSI
> > > >> modules loaded (virtio block) so there's something else going on in the
> > > >> current merge window.  I'll keep an eye on it and make sure there's
> > > >> nothing iSCSI needs fixing for.
> > > >
> > > > OK, so before this slips through the cracks.
> > > >
> > > > Linus - your tree as of a few minutes ago still panics immediately
> > > > when starting xfstests on iscsi devices. It appears to be a
> > > > scatterlist corruption and not an iscsi problem, so the iscsi guys
> > > > seem to have bounced it and no-one is looking at it.
> > > 
> > > Hmm. There's not much to go by.
> > > 
> > > Can somebody in iscsi-land please try to just bisect it - I'm not
> > > seeing a lot of clues to where this comes from otherwise.
> > 
> > Yeah, my hopes of this being quickly resolved by someone else didn't
> > work out and whatever is going on in that test VM is looking like a
> > different kind of odd.  I'm saving that off for later, and seeing if I
> > can't be a bisect on the iSCSI issue.
> 
> There may be deeper issues. I just started running scalability tests
> (e.g. 16-way fsmark create tests) and about a minute in I got a
> directory corruption reported - something I hadn't seen in the dev
> cycle at all. I unmounted the fs, mkfs'd it again, ran the
> workload again and about a minute in this fired:
> 
> [628867.607417] [ cut here ]
> [628867.608603] WARNING: CPU: 2 PID: 16925 at mm/workingset.c:461 
> shadow_lru_isolate+0x171/0x220
> [628867.610702] Modules linked in:
> [628867.611375] CPU: 2 PID: 16925 Comm: kworker/2:97 Tainted: GW  
>  4.9.0-dgc #18
> [628867.613382] Hardware name: QEMU Standard PC (i440FX + PIIX, 1996), BIOS 
> Debian-1.8.2-1 04/01/2014
> [628867.616179] Workqueue: events rht_deferred_worker
> [628867.632422] Call Trace:
> [628867.634691]  dump_stack+0x63/0x83
> [628867.637937]  __warn+0xcb/0xf0
> [628867.641359]  warn_slowpath_null+0x1d/0x20
> [628867.643362]  shadow_lru_isolate+0x171/0x220
> [628867.644627]  __list_lru_walk_one.isra.11+0x79/0x110
> [628867.645780]  ? __list_lru_init+0x70/0x70
> [628867.646628]  list_lru_walk_one+0x17/0x20
> [628867.647488]  scan_shadow_nodes+0x34/0x50
> [628867.648358]  shrink_slab.part.65.constprop.86+0x1dc/0x410
> [628867.649506]  shrink_node+0x57/0x90
> [628867.650233]  do_try_to_free_pages+0xdd/0x230
> [628867.651157]  try_to_free_pages+0xce/0x1a0
> [628867.652342]  __alloc_pages_slowpath+0x2df/0x960
> [628867.653332]  ? __might_sleep+0x4a/0x80
> [628867.654148]  __alloc_pages_nodemask+0x24b/0x290
> [628867.655237]  kmalloc_order+0x21/0x50
> [628867.656016]  kmalloc_order_trace+0x24/0xc0
> [628867.656878]  __kmalloc+0x17d/0x1d0
> [628867.657644]  bucket_table_alloc+0x195/0x1d0
> [628867.658564]  ? __might_sleep+0x4a/0x80
> [628867.659449]  rht_deferred_worker+0x287/0x3c0
> [628867.660366]  ? _raw_spin_unlock_irq+0xe/0x30
> [628867.661294]  process_one_work+0x1de/0x4d0
> [628867.662208]  worker_thread+0x4b/0x4f0
> [628867.662990]  kthread+0x10c/0x140
> [628867.663687]  ? process_one_work+0x4d0/0x4d0
> [628867.664564]  ? kthread_create_on_node+0x40/0x40
> [628867.665523]  ret_from_fork+0x25/0x30
> [628867.666317] ---[ end trace 7c38634006a9955e ]---
> 
> Now, this workload does not touch the page cache at all - it's
> entirely an XFS metadata workload, so it should not really be
> affecting the working set code.

The system back up, and I haven't reproduced this problem yet.
However, benchmark results are way off where they should be, and at
times the performance is utterly abysmal. The XFS for-next tree
based on the 4.9 kernel shows none of these problems, so I don't
think there's an XFS problem here. Workload is the same 16-way
fsmark workload that I've been using for years as a performance
regression test.

The workload normally averages around 230k files/s - i'm seeing
and average of ~175k files/s on you current kernel. And there are
periods where performance just completely tanks:

#  ./fs_mark  -D  1  -S0  -n  10  -s  0  -L  32  -d  /mnt/scratch/0  -d 
 /mnt/scratch/1  -d  /mnt/scratch/2  -d  /mnt/scratch/3  -d  /mnt/scratch/4  -d 
 /mnt/scratch/5  -d  /mnt/scratch/6  -d  /mnt/scratch/7  -d  /mnt/scratch/8  -d 
 /mnt/scratch/9  -d  /mnt/scratch/10  -d  /mnt/scratch/11  -d  /mnt/scratch/12  
-d  /mnt/scratch/13  -d  /mnt/scratch/14  -d  /mnt/scratch/15
#   Version 3.3, 16 thread(s) starting at Thu Dec 22 16:29:20 2016
#   Sync method: NO SYNC: Test does not issue sync() or fsync() calls.
#   Directories:  Time based hash between directories 

Re: [PATCH v1] security: Add a new hook: inode_touch_atime

2016-12-21 Thread Christoph Hellwig
On Thu, Dec 22, 2016 at 12:15:06AM +0100, Mickaël Salaün wrote:
> Add a new LSM hook named inode_touch_atime which is needed to deny
> indirect update of extended file attributes (i.e. access time) which are
> not catched by the inode_setattr hook. By creating a new hook instead of
> calling inode_setattr, we avoid to simulate a useless struct iattr.
> 
> This hook allows to create read-only environments as with read-only
> mount points. It can also take care of anonymous inodes.

And LSM has absolutely no business doing that - that's what the mount
code is for.


Re: [PATCH v1] security: Add a new hook: inode_touch_atime

2016-12-21 Thread Christoph Hellwig
On Thu, Dec 22, 2016 at 12:15:06AM +0100, Mickaël Salaün wrote:
> Add a new LSM hook named inode_touch_atime which is needed to deny
> indirect update of extended file attributes (i.e. access time) which are
> not catched by the inode_setattr hook. By creating a new hook instead of
> calling inode_setattr, we avoid to simulate a useless struct iattr.
> 
> This hook allows to create read-only environments as with read-only
> mount points. It can also take care of anonymous inodes.

And LSM has absolutely no business doing that - that's what the mount
code is for.


Re: [PATCH] Btrfs: add another missing end_page_writeback on submit_extent_page failure

2016-12-21 Thread Liu Bo
On Fri, Dec 16, 2016 at 03:41:50PM +0900, Takafumi Kubota wrote:
> This is actually inspired by Filipe's patch(55e3bd2e0c2e1).
> 
> When submit_extent_page() in __extent_writepage_io() fails,
> Btrfs misses clearing a writeback bit of the failed page.
> This causes the false under-writeback page.
> Then, another sync task hangs in filemap_fdatawait_range(),
> because it waits the false under-writeback page.
> 
> CPU0CPU1
> 
> __extent_writepage_io()
>   ret = submit_extent_page() // fail
> 
>   if (ret)
> SetPageError(page)
> // miss clearing the writeback bit
> 
> sync()
>   ...
>   filemap_fdatawait_range()
> wait_on_page_writeback(page);
> // wait the false under-writeback page
> 
> Signed-off-by: Takafumi Kubota 
> ---
>  fs/btrfs/extent_io.c | 4 +++-
>  1 file changed, 3 insertions(+), 1 deletion(-)
> 
> diff --git a/fs/btrfs/extent_io.c b/fs/btrfs/extent_io.c
> index 1e67723..ef9793b 100644
> --- a/fs/btrfs/extent_io.c
> +++ b/fs/btrfs/extent_io.c
> @@ -3443,8 +3443,10 @@ static noinline_for_stack int 
> __extent_writepage_io(struct inode *inode,
>bdev, >bio, max_nr,
>end_bio_extent_writepage,
>0, 0, 0, false);
> - if (ret)
> + if (ret) {
>   SetPageError(page);
> + end_page_writeback(page);
> + }

OK...this could be complex as we don't know which part in
submit_extent_page gets the error, if the page has been added into bio
and bio_end would call end_page_writepage(page) as well, so whichever
comes later, the BUG() in end_page_writeback() would complain.

Looks like commit 55e3bd2e0c2e1 also has the same problem although I
gave it my reviewed-by.

Thanks,

-liubo

>  
>   cur = cur + iosize;
>   pg_offset += iosize;
> -- 
> 1.9.3
> 
> --
> To unsubscribe from this list: send the line "unsubscribe linux-btrfs" in
> the body of a message to majord...@vger.kernel.org
> More majordomo info at  http://vger.kernel.org/majordomo-info.html


Re: [PATCH] Btrfs: add another missing end_page_writeback on submit_extent_page failure

2016-12-21 Thread Liu Bo
On Fri, Dec 16, 2016 at 03:41:50PM +0900, Takafumi Kubota wrote:
> This is actually inspired by Filipe's patch(55e3bd2e0c2e1).
> 
> When submit_extent_page() in __extent_writepage_io() fails,
> Btrfs misses clearing a writeback bit of the failed page.
> This causes the false under-writeback page.
> Then, another sync task hangs in filemap_fdatawait_range(),
> because it waits the false under-writeback page.
> 
> CPU0CPU1
> 
> __extent_writepage_io()
>   ret = submit_extent_page() // fail
> 
>   if (ret)
> SetPageError(page)
> // miss clearing the writeback bit
> 
> sync()
>   ...
>   filemap_fdatawait_range()
> wait_on_page_writeback(page);
> // wait the false under-writeback page
> 
> Signed-off-by: Takafumi Kubota 
> ---
>  fs/btrfs/extent_io.c | 4 +++-
>  1 file changed, 3 insertions(+), 1 deletion(-)
> 
> diff --git a/fs/btrfs/extent_io.c b/fs/btrfs/extent_io.c
> index 1e67723..ef9793b 100644
> --- a/fs/btrfs/extent_io.c
> +++ b/fs/btrfs/extent_io.c
> @@ -3443,8 +3443,10 @@ static noinline_for_stack int 
> __extent_writepage_io(struct inode *inode,
>bdev, >bio, max_nr,
>end_bio_extent_writepage,
>0, 0, 0, false);
> - if (ret)
> + if (ret) {
>   SetPageError(page);
> + end_page_writeback(page);
> + }

OK...this could be complex as we don't know which part in
submit_extent_page gets the error, if the page has been added into bio
and bio_end would call end_page_writepage(page) as well, so whichever
comes later, the BUG() in end_page_writeback() would complain.

Looks like commit 55e3bd2e0c2e1 also has the same problem although I
gave it my reviewed-by.

Thanks,

-liubo

>  
>   cur = cur + iosize;
>   pg_offset += iosize;
> -- 
> 1.9.3
> 
> --
> To unsubscribe from this list: send the line "unsubscribe linux-btrfs" in
> the body of a message to majord...@vger.kernel.org
> More majordomo info at  http://vger.kernel.org/majordomo-info.html


Re: [PATCH 1/2] arm64: setup: introduce kaslr_offset()

2016-12-21 Thread Yury Norov
On Sun, Dec 11, 2016 at 03:50:55AM +0300, Alexander Popov wrote:
> Introduce kaslr_offset() similarly to x86_64 for fixing kcov.
> 
> Signed-off-by: Alexander Popov 
> ---
>  arch/arm64/include/asm/setup.h  | 19 +++
>  arch/arm64/include/uapi/asm/setup.h |  4 ++--
>  arch/arm64/kernel/setup.c   |  8 
>  3 files changed, 25 insertions(+), 6 deletions(-)
>  create mode 100644 arch/arm64/include/asm/setup.h
> 
> diff --git a/arch/arm64/include/asm/setup.h b/arch/arm64/include/asm/setup.h
> new file mode 100644
> index 000..e7b59b9
> --- /dev/null
> +++ b/arch/arm64/include/asm/setup.h
> @@ -0,0 +1,19 @@
> +/*
> + * arch/arm64/include/asm/setup.h
> + *
> + * This program is free software; you can redistribute it and/or modify
> + * it under the terms of the GNU General Public License version 2 as
> + * published by the Free Software Foundation.
> + */
> +
> +#ifndef __ASM_SETUP_H
> +#define __ASM_SETUP_H
> +
> +#include 
> +
> +static inline unsigned long kaslr_offset(void)
> +{
> + return kimage_vaddr - KIMAGE_VADDR;
> +}
> +
> +#endif

Hi Alexander,

I found today's linux-next master broken:
In file included from init/main.c:88:0:
./arch/arm64/include/asm/setup.h:14:100: error: redefinition of ‘kaslr_offset’
In file included from ./arch/arm64/include/asm/page.h:54:0,
   from ./include/linux/mm_types.h:16,
   from ./include/linux/sched.h:27,
   from ./arch/arm64/include/asm/compat.h:25,
   from ./arch/arm64/include/asm/stat.h:23,
   from ./include/linux/stat.h:5,
   from ./include/linux/module.h:10,
   from init/main.c:15:
/arch/arm64/include/asm/memory.h:168:100: note: previous definition of 
‘kaslr_offset’ was here scripts/Makefile.build:293: recipe for target 
'init/main.o' failed
make[1]: *** [init/main.o] Error 1

It looks like you declare kaslr_offset() twice - in this patch, and in 7ede8665f
(arm64: setup: introduce kaslr_offset()). 

Yury


Re: [PATCH 1/2] arm64: setup: introduce kaslr_offset()

2016-12-21 Thread Yury Norov
On Sun, Dec 11, 2016 at 03:50:55AM +0300, Alexander Popov wrote:
> Introduce kaslr_offset() similarly to x86_64 for fixing kcov.
> 
> Signed-off-by: Alexander Popov 
> ---
>  arch/arm64/include/asm/setup.h  | 19 +++
>  arch/arm64/include/uapi/asm/setup.h |  4 ++--
>  arch/arm64/kernel/setup.c   |  8 
>  3 files changed, 25 insertions(+), 6 deletions(-)
>  create mode 100644 arch/arm64/include/asm/setup.h
> 
> diff --git a/arch/arm64/include/asm/setup.h b/arch/arm64/include/asm/setup.h
> new file mode 100644
> index 000..e7b59b9
> --- /dev/null
> +++ b/arch/arm64/include/asm/setup.h
> @@ -0,0 +1,19 @@
> +/*
> + * arch/arm64/include/asm/setup.h
> + *
> + * This program is free software; you can redistribute it and/or modify
> + * it under the terms of the GNU General Public License version 2 as
> + * published by the Free Software Foundation.
> + */
> +
> +#ifndef __ASM_SETUP_H
> +#define __ASM_SETUP_H
> +
> +#include 
> +
> +static inline unsigned long kaslr_offset(void)
> +{
> + return kimage_vaddr - KIMAGE_VADDR;
> +}
> +
> +#endif

Hi Alexander,

I found today's linux-next master broken:
In file included from init/main.c:88:0:
./arch/arm64/include/asm/setup.h:14:100: error: redefinition of ‘kaslr_offset’
In file included from ./arch/arm64/include/asm/page.h:54:0,
   from ./include/linux/mm_types.h:16,
   from ./include/linux/sched.h:27,
   from ./arch/arm64/include/asm/compat.h:25,
   from ./arch/arm64/include/asm/stat.h:23,
   from ./include/linux/stat.h:5,
   from ./include/linux/module.h:10,
   from init/main.c:15:
/arch/arm64/include/asm/memory.h:168:100: note: previous definition of 
‘kaslr_offset’ was here scripts/Makefile.build:293: recipe for target 
'init/main.o' failed
make[1]: *** [init/main.o] Error 1

It looks like you declare kaslr_offset() twice - in this patch, and in 7ede8665f
(arm64: setup: introduce kaslr_offset()). 

Yury


Re: [4.10, panic, regression] iscsi: null pointer deref at iscsi_tcp_segment_done+0x20d/0x2e0

2016-12-21 Thread Christoph Hellwig
On Wed, Dec 21, 2016 at 03:19:15PM -0800, Linus Torvalds wrote:
> Looking around a bit, the only even halfway suspicious scatterlist
> initialization thing I see is commit f9d03f96b988 ("block: improve
> handling of the magic discard payload") which used to have a magic
> hack wrt !bio->bi_vcnt, and that got removed. See __blk_bios_map_sg(),
> now it does __blk_bvec_map_sg() instead.

But that check was only for discard (and discard-like) bios which
had the maic single page that sometimes was unused attached.

For "normal" bios the for_each_segment loop iterates over bi_vcnt,
so it will be ignored anyway.  That being said both I and the lists
got CCed halfway through the thread and I haven't seen the original
report, so I'm not really sure what's going on here anyway.


Re: [4.10, panic, regression] iscsi: null pointer deref at iscsi_tcp_segment_done+0x20d/0x2e0

2016-12-21 Thread Christoph Hellwig
On Wed, Dec 21, 2016 at 03:19:15PM -0800, Linus Torvalds wrote:
> Looking around a bit, the only even halfway suspicious scatterlist
> initialization thing I see is commit f9d03f96b988 ("block: improve
> handling of the magic discard payload") which used to have a magic
> hack wrt !bio->bi_vcnt, and that got removed. See __blk_bios_map_sg(),
> now it does __blk_bvec_map_sg() instead.

But that check was only for discard (and discard-like) bios which
had the maic single page that sometimes was unused attached.

For "normal" bios the for_each_segment loop iterates over bi_vcnt,
so it will be ignored anyway.  That being said both I and the lists
got CCed halfway through the thread and I haven't seen the original
report, so I'm not really sure what's going on here anyway.


Re: [kernel-hardening] Re: [PATCH v7 3/6] random: use SipHash in place of MD5

2016-12-21 Thread Jason A. Donenfeld
Hi Ted,

On Thu, Dec 22, 2016 at 6:41 AM, Theodore Ts'o  wrote:
> The bottom line is that I think we're really "pixel peeping" at this
> point --- which is what obsessed digital photographers will do when
> debating the quality of a Canon vs Nikon DSLR by blowing up a photo by
> a thousand times, and then trying to claim that this is visible to the
> human eye.  Or people who obsessing over the frequency response curves
> of TH-X00 headphones with Mahogony vs Purpleheart wood, when it's
> likely that in a blind head-to-head comparison, most people wouldn't
> be able to tell the difference

This is hilarious, thanks for the laugh. I believe you're right about this...

>
> I think the main argument for using the batched getrandom approach is
> that it, I would argue, simpler than introducing siphash into the
> picture.  On 64-bit platforms it is faster and more consistent, so
> it's basically that versus complexity of having to adding siphash to
> the things that people have to analyze when considering random number
> security on Linux.   But it's a close call either way, I think.

I find this compelling. We'll have one csprng for both
get_random_int/long and for /dev/urandom, and we'll be able to update
that silly warning on the comment of get_random_int/long to read
"gives output of either rdrand quality or of /dev/urandom quality",
which makes it more useful for other things. It introduces less error
prone code, and it lets the RNG analysis be spent on just one RNG, not
two.

So, with your blessing, I'm going to move ahead with implementing a
pretty version of this for v8.

Regards,
Jason


Re: [kernel-hardening] Re: [PATCH v7 3/6] random: use SipHash in place of MD5

2016-12-21 Thread Jason A. Donenfeld
Hi Ted,

On Thu, Dec 22, 2016 at 6:41 AM, Theodore Ts'o  wrote:
> The bottom line is that I think we're really "pixel peeping" at this
> point --- which is what obsessed digital photographers will do when
> debating the quality of a Canon vs Nikon DSLR by blowing up a photo by
> a thousand times, and then trying to claim that this is visible to the
> human eye.  Or people who obsessing over the frequency response curves
> of TH-X00 headphones with Mahogony vs Purpleheart wood, when it's
> likely that in a blind head-to-head comparison, most people wouldn't
> be able to tell the difference

This is hilarious, thanks for the laugh. I believe you're right about this...

>
> I think the main argument for using the batched getrandom approach is
> that it, I would argue, simpler than introducing siphash into the
> picture.  On 64-bit platforms it is faster and more consistent, so
> it's basically that versus complexity of having to adding siphash to
> the things that people have to analyze when considering random number
> security on Linux.   But it's a close call either way, I think.

I find this compelling. We'll have one csprng for both
get_random_int/long and for /dev/urandom, and we'll be able to update
that silly warning on the comment of get_random_int/long to read
"gives output of either rdrand quality or of /dev/urandom quality",
which makes it more useful for other things. It introduces less error
prone code, and it lets the RNG analysis be spent on just one RNG, not
two.

So, with your blessing, I'm going to move ahead with implementing a
pretty version of this for v8.

Regards,
Jason


[PATCH 3/3] perf sched timehist: Show total scheduling time

2016-12-21 Thread Namhyung Kim
Show length of analyzed sample time and rate of idle task running.
This also takes care of time range given by --time option.

  $ perf sched timehist -sI | tail
  Samples do not have callchains.
  Idle stats:
  CPU  0 idle for930.316  msec  ( 92.93%)
  CPU  1 idle for963.614  msec  ( 96.25%)
  CPU  2 idle for885.482  msec  ( 88.45%)
  CPU  3 idle for938.635  msec  ( 93.76%)

  Total number of unique tasks: 118
  Total number of context switches: 2337
 Total run time (msec): 3718.048
  Total scheduling time (msec): 1001.131  (x 4)

Suggested-by: David Ahern 
Signed-off-by: Namhyung Kim 
---
 tools/perf/builtin-sched.c | 17 ++---
 1 file changed, 14 insertions(+), 3 deletions(-)

diff --git a/tools/perf/builtin-sched.c b/tools/perf/builtin-sched.c
index d53e706a6f17..5b134b0d1ff3 100644
--- a/tools/perf/builtin-sched.c
+++ b/tools/perf/builtin-sched.c
@@ -209,6 +209,7 @@ struct perf_sched {
u64 skipped_samples;
const char  *time_str;
struct perf_time_interval ptime;
+   struct perf_time_interval hist_time;
 };
 
 /* per thread run time data */
@@ -2460,6 +2461,11 @@ static int timehist_sched_change_event(struct perf_tool 
*tool,
timehist_print_sample(sched, sample, , thread, t);
 
 out:
+   if (sched->hist_time.start == 0 && t >= ptime->start)
+   sched->hist_time.start = t;
+   if (ptime->end == 0 || t <= ptime->end)
+   sched->hist_time.end = t;
+
if (tr) {
/* time of this sched_switch event becomes last time task seen 
*/
tr->last_time = sample->time;
@@ -2624,6 +2630,7 @@ static void timehist_print_summary(struct perf_sched 
*sched,
struct thread *t;
struct thread_runtime *r;
int i;
+   u64 hist_time = sched->hist_time.end - sched->hist_time.start;
 
memset(, 0, sizeof(totals));
 
@@ -2665,7 +2672,7 @@ static void timehist_print_summary(struct perf_sched 
*sched,
totals.sched_count += r->run_stats.n;
printf("CPU %2d idle for ", i);
print_sched_time(r->total_run_time, 6);
-   printf(" msec\n");
+   printf(" msec  (%6.2f%%)\n", 100.0 * r->total_run_time 
/ hist_time);
} else
printf("CPU %2d idle entire time window\n", i);
}
@@ -2701,12 +2708,16 @@ static void timehist_print_summary(struct perf_sched 
*sched,
 
printf("\n"
   "Total number of unique tasks: %" PRIu64 "\n"
-  "Total number of context switches: %" PRIu64 "\n"
-  "   Total run time (msec): ",
+  "Total number of context switches: %" PRIu64 "\n",
   totals.task_count, totals.sched_count);
 
+   printf("   Total run time (msec): ");
print_sched_time(totals.total_run_time, 2);
printf("\n");
+
+   printf("Total scheduling time (msec): ");
+   print_sched_time(hist_time, 2);
+   printf(" (x %d)\n", sched->max_cpu);
 }
 
 typedef int (*sched_handler)(struct perf_tool *tool,
-- 
2.10.0



[PATCH 2/3] perf sched timehist: Fix invalid period calculation

2016-12-21 Thread Namhyung Kim
When --time option is given with a value outside recorded time, the last
sample time (tprev) was set to that value and run time calculation might
be incorrect.  This is a problem of the first samples for each cpus
since it would skip the runtime update when tprev is 0.  But with --time
option it had non-zero (which is invalid) value so the calculation is
also incorrect.

For example, let's see the followging:

  $ perf sched timehist
 timecpu  task name   wait time  sch delay  
 run time
  [tid/pid]  (msec) (msec)  
   (msec)
  --- --  --  -  -  
-
  3195.968367 [0003]0.000  0.000  
0.000
  3195.968386 [0002]  Timer[4306/4277]0.000  0.000  
0.018
  3195.968397 [0002]  Web Content[4277]   0.000  0.000  
0.000
  3195.968595 [0001]  JS Helper[4302/4277]0.000  0.000  
0.000
  3195.969217 []0.000  0.000  
0.621
  3195.969251 [0001]  kworker/1:1H[291]   0.000  0.000  
0.033

The sample starts at 3195.968367 but when I gave a time interval from
3194 to 3196 (in sec) it will calculate the whole 2 second as runtime.
In below, 2 cpus accounted it as runtime, other 2 cpus accounted it as
idle time.

Before:

  $ perf sched timehist --time 3194,3196 -s | tail
  Idle stats:
  CPU  0 idle for   1995.991  msec
  CPU  1 idle for 20.793  msec
  CPU  2 idle for 30.191  msec
  CPU  3 idle for   1999.852  msec

  Total number of unique tasks: 23
  Total number of context switches: 128
 Total run time (msec): 3724.940

After:

  $ perf sched timehist --time 3194,3196 -s | tail
  Idle stats:
  CPU  0 idle for 10.811  msec
  CPU  1 idle for 20.793  msec
  CPU  2 idle for 30.191  msec
  CPU  3 idle for 18.337  msec

  Total number of unique tasks: 23
  Total number of context switches: 128
 Total run time (msec): 18.139

Cc: David Ahern 
Fixes: 853b74071110 ("perf sched timehist: Add option to specify time window of 
interest")
Signed-off-by: Namhyung Kim 
---
 tools/perf/builtin-sched.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/tools/perf/builtin-sched.c b/tools/perf/builtin-sched.c
index 5052caa91caa..d53e706a6f17 100644
--- a/tools/perf/builtin-sched.c
+++ b/tools/perf/builtin-sched.c
@@ -2405,7 +2405,7 @@ static int timehist_sched_change_event(struct perf_tool 
*tool,
if (ptime->start && ptime->start > t)
goto out;
 
-   if (ptime->start > tprev)
+   if (tprev && ptime->start > tprev)
tprev = ptime->start;
 
/*
-- 
2.10.0



[PATCH 1/3] perf sched timehist: Enlarge default comm_width

2016-12-21 Thread Namhyung Kim
Current default value is 20 but it's easily changed to a bigger value as
task has a long name and different tid and pid.  And it makes the output
not aligned.  So change it to have a large value as summary shows.

Signed-off-by: Namhyung Kim 
---
 tools/perf/builtin-sched.c | 12 +---
 1 file changed, 5 insertions(+), 7 deletions(-)

diff --git a/tools/perf/builtin-sched.c b/tools/perf/builtin-sched.c
index c1c07bfe132c..5052caa91caa 100644
--- a/tools/perf/builtin-sched.c
+++ b/tools/perf/builtin-sched.c
@@ -1775,7 +1775,7 @@ static u64 perf_evsel__get_time(struct perf_evsel *evsel, 
u32 cpu)
return r->last_time[cpu];
 }
 
-static int comm_width = 20;
+static int comm_width = 30;
 
 static char *timehist_get_commstr(struct thread *thread)
 {
@@ -1817,7 +1817,7 @@ static void timehist_header(struct perf_sched *sched)
printf(" ");
}
 
-   printf(" %-20s  %9s  %9s  %9s",
+   printf(" %-*s  %9s  %9s  %9s", comm_width,
"task name", "wait time", "sch delay", "run time");
 
printf("\n");
@@ -1830,7 +1830,8 @@ static void timehist_header(struct perf_sched *sched)
if (sched->show_cpu_visual)
printf(" %*s ", ncpus, "");
 
-   printf(" %-20s  %9s  %9s  %9s\n", "[tid/pid]", "(msec)", "(msec)", 
"(msec)");
+   printf(" %-*s  %9s  %9s  %9s\n", comm_width,
+  "[tid/pid]", "(msec)", "(msec)", "(msec)");
 
/*
 * separator
@@ -1840,7 +1841,7 @@ static void timehist_header(struct perf_sched *sched)
if (sched->show_cpu_visual)
printf(" %.*s ", ncpus, graph_dotted_line);
 
-   printf(" %.20s  %.9s  %.9s  %.9s",
+   printf(" %.*s  %.9s  %.9s  %.9s", comm_width,
graph_dotted_line, graph_dotted_line, graph_dotted_line,
graph_dotted_line);
 
@@ -2626,9 +2627,6 @@ static void timehist_print_summary(struct perf_sched 
*sched,
 
memset(, 0, sizeof(totals));
 
-   if (comm_width < 30)
-   comm_width = 30;
-
if (sched->idle_hist) {
printf("\nIdle-time summary\n");
printf("%*s  parent  sched-out  ", comm_width, "comm");
-- 
2.10.0



[PATCH 3/3] perf sched timehist: Show total scheduling time

2016-12-21 Thread Namhyung Kim
Show length of analyzed sample time and rate of idle task running.
This also takes care of time range given by --time option.

  $ perf sched timehist -sI | tail
  Samples do not have callchains.
  Idle stats:
  CPU  0 idle for930.316  msec  ( 92.93%)
  CPU  1 idle for963.614  msec  ( 96.25%)
  CPU  2 idle for885.482  msec  ( 88.45%)
  CPU  3 idle for938.635  msec  ( 93.76%)

  Total number of unique tasks: 118
  Total number of context switches: 2337
 Total run time (msec): 3718.048
  Total scheduling time (msec): 1001.131  (x 4)

Suggested-by: David Ahern 
Signed-off-by: Namhyung Kim 
---
 tools/perf/builtin-sched.c | 17 ++---
 1 file changed, 14 insertions(+), 3 deletions(-)

diff --git a/tools/perf/builtin-sched.c b/tools/perf/builtin-sched.c
index d53e706a6f17..5b134b0d1ff3 100644
--- a/tools/perf/builtin-sched.c
+++ b/tools/perf/builtin-sched.c
@@ -209,6 +209,7 @@ struct perf_sched {
u64 skipped_samples;
const char  *time_str;
struct perf_time_interval ptime;
+   struct perf_time_interval hist_time;
 };
 
 /* per thread run time data */
@@ -2460,6 +2461,11 @@ static int timehist_sched_change_event(struct perf_tool 
*tool,
timehist_print_sample(sched, sample, , thread, t);
 
 out:
+   if (sched->hist_time.start == 0 && t >= ptime->start)
+   sched->hist_time.start = t;
+   if (ptime->end == 0 || t <= ptime->end)
+   sched->hist_time.end = t;
+
if (tr) {
/* time of this sched_switch event becomes last time task seen 
*/
tr->last_time = sample->time;
@@ -2624,6 +2630,7 @@ static void timehist_print_summary(struct perf_sched 
*sched,
struct thread *t;
struct thread_runtime *r;
int i;
+   u64 hist_time = sched->hist_time.end - sched->hist_time.start;
 
memset(, 0, sizeof(totals));
 
@@ -2665,7 +2672,7 @@ static void timehist_print_summary(struct perf_sched 
*sched,
totals.sched_count += r->run_stats.n;
printf("CPU %2d idle for ", i);
print_sched_time(r->total_run_time, 6);
-   printf(" msec\n");
+   printf(" msec  (%6.2f%%)\n", 100.0 * r->total_run_time 
/ hist_time);
} else
printf("CPU %2d idle entire time window\n", i);
}
@@ -2701,12 +2708,16 @@ static void timehist_print_summary(struct perf_sched 
*sched,
 
printf("\n"
   "Total number of unique tasks: %" PRIu64 "\n"
-  "Total number of context switches: %" PRIu64 "\n"
-  "   Total run time (msec): ",
+  "Total number of context switches: %" PRIu64 "\n",
   totals.task_count, totals.sched_count);
 
+   printf("   Total run time (msec): ");
print_sched_time(totals.total_run_time, 2);
printf("\n");
+
+   printf("Total scheduling time (msec): ");
+   print_sched_time(hist_time, 2);
+   printf(" (x %d)\n", sched->max_cpu);
 }
 
 typedef int (*sched_handler)(struct perf_tool *tool,
-- 
2.10.0



[PATCH 2/3] perf sched timehist: Fix invalid period calculation

2016-12-21 Thread Namhyung Kim
When --time option is given with a value outside recorded time, the last
sample time (tprev) was set to that value and run time calculation might
be incorrect.  This is a problem of the first samples for each cpus
since it would skip the runtime update when tprev is 0.  But with --time
option it had non-zero (which is invalid) value so the calculation is
also incorrect.

For example, let's see the followging:

  $ perf sched timehist
 timecpu  task name   wait time  sch delay  
 run time
  [tid/pid]  (msec) (msec)  
   (msec)
  --- --  --  -  -  
-
  3195.968367 [0003]0.000  0.000  
0.000
  3195.968386 [0002]  Timer[4306/4277]0.000  0.000  
0.018
  3195.968397 [0002]  Web Content[4277]   0.000  0.000  
0.000
  3195.968595 [0001]  JS Helper[4302/4277]0.000  0.000  
0.000
  3195.969217 []0.000  0.000  
0.621
  3195.969251 [0001]  kworker/1:1H[291]   0.000  0.000  
0.033

The sample starts at 3195.968367 but when I gave a time interval from
3194 to 3196 (in sec) it will calculate the whole 2 second as runtime.
In below, 2 cpus accounted it as runtime, other 2 cpus accounted it as
idle time.

Before:

  $ perf sched timehist --time 3194,3196 -s | tail
  Idle stats:
  CPU  0 idle for   1995.991  msec
  CPU  1 idle for 20.793  msec
  CPU  2 idle for 30.191  msec
  CPU  3 idle for   1999.852  msec

  Total number of unique tasks: 23
  Total number of context switches: 128
 Total run time (msec): 3724.940

After:

  $ perf sched timehist --time 3194,3196 -s | tail
  Idle stats:
  CPU  0 idle for 10.811  msec
  CPU  1 idle for 20.793  msec
  CPU  2 idle for 30.191  msec
  CPU  3 idle for 18.337  msec

  Total number of unique tasks: 23
  Total number of context switches: 128
 Total run time (msec): 18.139

Cc: David Ahern 
Fixes: 853b74071110 ("perf sched timehist: Add option to specify time window of 
interest")
Signed-off-by: Namhyung Kim 
---
 tools/perf/builtin-sched.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/tools/perf/builtin-sched.c b/tools/perf/builtin-sched.c
index 5052caa91caa..d53e706a6f17 100644
--- a/tools/perf/builtin-sched.c
+++ b/tools/perf/builtin-sched.c
@@ -2405,7 +2405,7 @@ static int timehist_sched_change_event(struct perf_tool 
*tool,
if (ptime->start && ptime->start > t)
goto out;
 
-   if (ptime->start > tprev)
+   if (tprev && ptime->start > tprev)
tprev = ptime->start;
 
/*
-- 
2.10.0



[PATCH 1/3] perf sched timehist: Enlarge default comm_width

2016-12-21 Thread Namhyung Kim
Current default value is 20 but it's easily changed to a bigger value as
task has a long name and different tid and pid.  And it makes the output
not aligned.  So change it to have a large value as summary shows.

Signed-off-by: Namhyung Kim 
---
 tools/perf/builtin-sched.c | 12 +---
 1 file changed, 5 insertions(+), 7 deletions(-)

diff --git a/tools/perf/builtin-sched.c b/tools/perf/builtin-sched.c
index c1c07bfe132c..5052caa91caa 100644
--- a/tools/perf/builtin-sched.c
+++ b/tools/perf/builtin-sched.c
@@ -1775,7 +1775,7 @@ static u64 perf_evsel__get_time(struct perf_evsel *evsel, 
u32 cpu)
return r->last_time[cpu];
 }
 
-static int comm_width = 20;
+static int comm_width = 30;
 
 static char *timehist_get_commstr(struct thread *thread)
 {
@@ -1817,7 +1817,7 @@ static void timehist_header(struct perf_sched *sched)
printf(" ");
}
 
-   printf(" %-20s  %9s  %9s  %9s",
+   printf(" %-*s  %9s  %9s  %9s", comm_width,
"task name", "wait time", "sch delay", "run time");
 
printf("\n");
@@ -1830,7 +1830,8 @@ static void timehist_header(struct perf_sched *sched)
if (sched->show_cpu_visual)
printf(" %*s ", ncpus, "");
 
-   printf(" %-20s  %9s  %9s  %9s\n", "[tid/pid]", "(msec)", "(msec)", 
"(msec)");
+   printf(" %-*s  %9s  %9s  %9s\n", comm_width,
+  "[tid/pid]", "(msec)", "(msec)", "(msec)");
 
/*
 * separator
@@ -1840,7 +1841,7 @@ static void timehist_header(struct perf_sched *sched)
if (sched->show_cpu_visual)
printf(" %.*s ", ncpus, graph_dotted_line);
 
-   printf(" %.20s  %.9s  %.9s  %.9s",
+   printf(" %.*s  %.9s  %.9s  %.9s", comm_width,
graph_dotted_line, graph_dotted_line, graph_dotted_line,
graph_dotted_line);
 
@@ -2626,9 +2627,6 @@ static void timehist_print_summary(struct perf_sched 
*sched,
 
memset(, 0, sizeof(totals));
 
-   if (comm_width < 30)
-   comm_width = 30;
-
if (sched->idle_hist) {
printf("\nIdle-time summary\n");
printf("%*s  parent  sched-out  ", comm_width, "comm");
-- 
2.10.0



Re: [PATCH] pci: add kernel config option for disabling common PCI quirks

2016-12-21 Thread John Crispin


On 21/12/2016 15:26, Christoph Hellwig wrote:
> On Wed, Dec 21, 2016 at 02:11:25PM +0100, John Crispin wrote:
>> I can turn it into an enable patch that is selected by default.
>>
>> The current patch disables all those quirks that are used for x86/PC
>> style machines and hence are not required in the embedded world.
> 
> Maybe we'll just need to reorganize the quirks so that most of them
> arch in arch code or the affected drivers?
> 

Hi Christoph

to be honest i have no opinion on this. I am currently trying to reduce
the amount of patches that we have inside the LEDE tree. the patches
were written by other people and then dumped on us. obviously i am
interested to get this upstream with the least amount of effort. I am
quite aware though that some patches will need an overhaul to be
applicable for upstream. its not really my call if it is enough to make
this an enable patch and review the quirks enabled by it or if the code
needs to be moved.

John


Re: [PATCH] pci: add kernel config option for disabling common PCI quirks

2016-12-21 Thread John Crispin


On 21/12/2016 15:26, Christoph Hellwig wrote:
> On Wed, Dec 21, 2016 at 02:11:25PM +0100, John Crispin wrote:
>> I can turn it into an enable patch that is selected by default.
>>
>> The current patch disables all those quirks that are used for x86/PC
>> style machines and hence are not required in the embedded world.
> 
> Maybe we'll just need to reorganize the quirks so that most of them
> arch in arch code or the affected drivers?
> 

Hi Christoph

to be honest i have no opinion on this. I am currently trying to reduce
the amount of patches that we have inside the LEDE tree. the patches
were written by other people and then dumped on us. obviously i am
interested to get this upstream with the least amount of effort. I am
quite aware though that some patches will need an overhaul to be
applicable for upstream. its not really my call if it is enough to make
this an enable patch and review the quirks enabled by it or if the code
needs to be moved.

John


Fwd: [PATCH 1/1] of/fdt: failed to mark hotplug range message

2016-12-21 Thread Heinrich Schuchardt
scripts/get_maintainers.pl did not show the people involved in creating
the code to be changed.

On 12/22/2016 06:34 AM, Heinrich Schuchardt wrote:
> If marking a hotplug range fails a message
> "failed to mark hotplug range" is written.
> 
> The end address is base + size - 1.
> 
> Signed-off-by: Heinrich Schuchardt 
> ---
>  drivers/of/fdt.c | 2 +-
>  1 file changed, 1 insertion(+), 1 deletion(-)
> 
> diff --git a/drivers/of/fdt.c b/drivers/of/fdt.c
> index c9b5cac03b36..fd129b6e5396 100644
> --- a/drivers/of/fdt.c
> +++ b/drivers/of/fdt.c
> @@ -1057,7 +1057,7 @@ int __init early_init_dt_scan_memory(unsigned long 
> node, const char *uname,
>  
>   if (early_init_dt_mark_hotplug_memory_arch(base, size))
>   pr_warn("failed to mark hotplug range 0x%llx - 
> 0x%llx\n",
> - base, base + size);
> + base, base + size - 1);
>   }
>  
>   return 0;
> 



Fwd: [PATCH 1/1] of/fdt: failed to mark hotplug range message

2016-12-21 Thread Heinrich Schuchardt
scripts/get_maintainers.pl did not show the people involved in creating
the code to be changed.

On 12/22/2016 06:34 AM, Heinrich Schuchardt wrote:
> If marking a hotplug range fails a message
> "failed to mark hotplug range" is written.
> 
> The end address is base + size - 1.
> 
> Signed-off-by: Heinrich Schuchardt 
> ---
>  drivers/of/fdt.c | 2 +-
>  1 file changed, 1 insertion(+), 1 deletion(-)
> 
> diff --git a/drivers/of/fdt.c b/drivers/of/fdt.c
> index c9b5cac03b36..fd129b6e5396 100644
> --- a/drivers/of/fdt.c
> +++ b/drivers/of/fdt.c
> @@ -1057,7 +1057,7 @@ int __init early_init_dt_scan_memory(unsigned long 
> node, const char *uname,
>  
>   if (early_init_dt_mark_hotplug_memory_arch(base, size))
>   pr_warn("failed to mark hotplug range 0x%llx - 
> 0x%llx\n",
> - base, base + size);
> + base, base + size - 1);
>   }
>  
>   return 0;
> 



[PATCH v5 14/14] irqchip: mbigen: Add ACPI support

2016-12-21 Thread Hanjun Guo
From: Hanjun Guo 

With the preparation of platform msi support and interrupt producer
in DSDT, we can add mbigen ACPI support now.

We are using _PRS methd to indicate number of irq pins instead
of num_pins in DT to avoid _DSD usage in this case.

For mbi-gen,
Device(MBI0) {
  Name(_HID, "HISI0152")
  Name(_UID, Zero)
  Name(_CRS, ResourceTemplate() {
  Memory32Fixed(ReadWrite, 0xa008, 0x1)
  })

  Name (_PRS, ResourceTemplate() {
  Interrupt(ResourceProducer,...) {12,14,}
  })
}

For devices,

   Device(COM0) {
  Name(_HID, "ACPIIDxx")
  Name(_UID, Zero)
  Name(_CRS, ResourceTemplate() {
 Memory32Fixed(ReadWrite, 0xb003, 0x1)
 Interrupt(ResourceConsumer,..., "\_SB.MBI0") {12}
  })
}

With the helpe of platform msi and interrupt producer, then devices
will get the virq from mbi-gen's irqdomain.

Signed-off-by: Hanjun Guo 
Cc: Marc Zyngier 
Cc: Thomas Gleixner 
Cc: Ma Jun 
---
 drivers/irqchip/irq-mbigen.c | 70 ++--
 1 file changed, 67 insertions(+), 3 deletions(-)

diff --git a/drivers/irqchip/irq-mbigen.c b/drivers/irqchip/irq-mbigen.c
index 4e11da5..17d35fa 100644
--- a/drivers/irqchip/irq-mbigen.c
+++ b/drivers/irqchip/irq-mbigen.c
@@ -16,6 +16,7 @@
  * along with this program.  If not, see .
  */
 
+#include 
 #include 
 #include 
 #include 
@@ -180,7 +181,7 @@ static int mbigen_domain_translate(struct irq_domain *d,
unsigned long *hwirq,
unsigned int *type)
 {
-   if (is_of_node(fwspec->fwnode)) {
+   if (is_of_node(fwspec->fwnode) || is_acpi_device_node(fwspec->fwnode)) {
if (fwspec->param_count != 2)
return -EINVAL;
 
@@ -271,6 +272,54 @@ static int mbigen_of_create_domain(struct platform_device 
*pdev,
return 0;
 }
 
+#ifdef CONFIG_ACPI
+static acpi_status mbigen_acpi_process_resource(struct acpi_resource *ares,
+void *context)
+{
+   struct acpi_resource_extended_irq *ext_irq;
+   u32 *num_irqs = context;
+
+   switch (ares->type) {
+   case ACPI_RESOURCE_TYPE_EXTENDED_IRQ:
+   ext_irq = >data.extended_irq;
+   *num_irqs += ext_irq->interrupt_count;
+   break;
+   default:
+   break;
+   }
+
+   return AE_OK;
+}
+
+static int mbigen_acpi_create_domain(struct platform_device *pdev,
+struct mbigen_device *mgn_chip)
+{
+   struct irq_domain *domain;
+   u32 num_msis = 0;
+   acpi_status status;
+
+   status = acpi_walk_resources(ACPI_HANDLE(>dev), METHOD_NAME__PRS,
+mbigen_acpi_process_resource, _msis);
+if (ACPI_FAILURE(status) || num_msis == 0)
+   return -EINVAL;
+
+   domain = platform_msi_create_device_domain(>dev, num_msis,
+  mbigen_write_msg,
+  _domain_ops,
+  mgn_chip);
+   if (!domain)
+   return -ENOMEM;
+
+   return 0;
+}
+#else
+static int mbigen_acpi_create_domain(struct platform_device *pdev,
+struct mbigen_device *mgn_chip)
+{
+   return -ENODEV;
+}
+#endif
+
 static int mbigen_device_probe(struct platform_device *pdev)
 {
struct mbigen_device *mgn_chip;
@@ -288,9 +337,17 @@ static int mbigen_device_probe(struct platform_device 
*pdev)
if (IS_ERR(mgn_chip->base))
return PTR_ERR(mgn_chip->base);
 
-   err = mbigen_of_create_domain(pdev, mgn_chip);
-   if (err)
+   if (IS_ENABLED(CONFIG_OF) && pdev->dev.of_node)
+   err = mbigen_of_create_domain(pdev, mgn_chip);
+   else if (ACPI_COMPANION(>dev))
+   err = mbigen_acpi_create_domain(pdev, mgn_chip);
+   else
+   err = -EINVAL;
+
+   if (err) {
+   dev_err(>dev, "Failed to create mbi-gen@%p irqdomain", 
mgn_chip->base);
return err;
+   }
 
platform_set_drvdata(pdev, mgn_chip);
return 0;
@@ -302,10 +359,17 @@ static int mbigen_device_probe(struct platform_device 
*pdev)
 };
 MODULE_DEVICE_TABLE(of, mbigen_of_match);
 
+static const struct acpi_device_id mbigen_acpi_match[] = {
+{ "HISI0152", 0 },
+   {}
+};
+MODULE_DEVICE_TABLE(acpi, mbigen_acpi_match);
+
 static struct platform_driver mbigen_platform_driver = {
.driver = {
.name   = "Hisilicon MBIGEN-V2",
.of_match_table = mbigen_of_match,
+   .acpi_match_table = 

[PATCH v5 14/14] irqchip: mbigen: Add ACPI support

2016-12-21 Thread Hanjun Guo
From: Hanjun Guo 

With the preparation of platform msi support and interrupt producer
in DSDT, we can add mbigen ACPI support now.

We are using _PRS methd to indicate number of irq pins instead
of num_pins in DT to avoid _DSD usage in this case.

For mbi-gen,
Device(MBI0) {
  Name(_HID, "HISI0152")
  Name(_UID, Zero)
  Name(_CRS, ResourceTemplate() {
  Memory32Fixed(ReadWrite, 0xa008, 0x1)
  })

  Name (_PRS, ResourceTemplate() {
  Interrupt(ResourceProducer,...) {12,14,}
  })
}

For devices,

   Device(COM0) {
  Name(_HID, "ACPIIDxx")
  Name(_UID, Zero)
  Name(_CRS, ResourceTemplate() {
 Memory32Fixed(ReadWrite, 0xb003, 0x1)
 Interrupt(ResourceConsumer,..., "\_SB.MBI0") {12}
  })
}

With the helpe of platform msi and interrupt producer, then devices
will get the virq from mbi-gen's irqdomain.

Signed-off-by: Hanjun Guo 
Cc: Marc Zyngier 
Cc: Thomas Gleixner 
Cc: Ma Jun 
---
 drivers/irqchip/irq-mbigen.c | 70 ++--
 1 file changed, 67 insertions(+), 3 deletions(-)

diff --git a/drivers/irqchip/irq-mbigen.c b/drivers/irqchip/irq-mbigen.c
index 4e11da5..17d35fa 100644
--- a/drivers/irqchip/irq-mbigen.c
+++ b/drivers/irqchip/irq-mbigen.c
@@ -16,6 +16,7 @@
  * along with this program.  If not, see .
  */
 
+#include 
 #include 
 #include 
 #include 
@@ -180,7 +181,7 @@ static int mbigen_domain_translate(struct irq_domain *d,
unsigned long *hwirq,
unsigned int *type)
 {
-   if (is_of_node(fwspec->fwnode)) {
+   if (is_of_node(fwspec->fwnode) || is_acpi_device_node(fwspec->fwnode)) {
if (fwspec->param_count != 2)
return -EINVAL;
 
@@ -271,6 +272,54 @@ static int mbigen_of_create_domain(struct platform_device 
*pdev,
return 0;
 }
 
+#ifdef CONFIG_ACPI
+static acpi_status mbigen_acpi_process_resource(struct acpi_resource *ares,
+void *context)
+{
+   struct acpi_resource_extended_irq *ext_irq;
+   u32 *num_irqs = context;
+
+   switch (ares->type) {
+   case ACPI_RESOURCE_TYPE_EXTENDED_IRQ:
+   ext_irq = >data.extended_irq;
+   *num_irqs += ext_irq->interrupt_count;
+   break;
+   default:
+   break;
+   }
+
+   return AE_OK;
+}
+
+static int mbigen_acpi_create_domain(struct platform_device *pdev,
+struct mbigen_device *mgn_chip)
+{
+   struct irq_domain *domain;
+   u32 num_msis = 0;
+   acpi_status status;
+
+   status = acpi_walk_resources(ACPI_HANDLE(>dev), METHOD_NAME__PRS,
+mbigen_acpi_process_resource, _msis);
+if (ACPI_FAILURE(status) || num_msis == 0)
+   return -EINVAL;
+
+   domain = platform_msi_create_device_domain(>dev, num_msis,
+  mbigen_write_msg,
+  _domain_ops,
+  mgn_chip);
+   if (!domain)
+   return -ENOMEM;
+
+   return 0;
+}
+#else
+static int mbigen_acpi_create_domain(struct platform_device *pdev,
+struct mbigen_device *mgn_chip)
+{
+   return -ENODEV;
+}
+#endif
+
 static int mbigen_device_probe(struct platform_device *pdev)
 {
struct mbigen_device *mgn_chip;
@@ -288,9 +337,17 @@ static int mbigen_device_probe(struct platform_device 
*pdev)
if (IS_ERR(mgn_chip->base))
return PTR_ERR(mgn_chip->base);
 
-   err = mbigen_of_create_domain(pdev, mgn_chip);
-   if (err)
+   if (IS_ENABLED(CONFIG_OF) && pdev->dev.of_node)
+   err = mbigen_of_create_domain(pdev, mgn_chip);
+   else if (ACPI_COMPANION(>dev))
+   err = mbigen_acpi_create_domain(pdev, mgn_chip);
+   else
+   err = -EINVAL;
+
+   if (err) {
+   dev_err(>dev, "Failed to create mbi-gen@%p irqdomain", 
mgn_chip->base);
return err;
+   }
 
platform_set_drvdata(pdev, mgn_chip);
return 0;
@@ -302,10 +359,17 @@ static int mbigen_device_probe(struct platform_device 
*pdev)
 };
 MODULE_DEVICE_TABLE(of, mbigen_of_match);
 
+static const struct acpi_device_id mbigen_acpi_match[] = {
+{ "HISI0152", 0 },
+   {}
+};
+MODULE_DEVICE_TABLE(acpi, mbigen_acpi_match);
+
 static struct platform_driver mbigen_platform_driver = {
.driver = {
.name   = "Hisilicon MBIGEN-V2",
.of_match_table = mbigen_of_match,
+   .acpi_match_table = ACPI_PTR(mbigen_acpi_match),
},
.probe  = mbigen_device_probe,
 };
-- 
1.7.12.4



[PATCH] ARM64: zynqmp: Fix i2c node's compatible string

2016-12-21 Thread Moritz Fischer
From: Moritz Fischer 

The Zynq Ultrascale MP uses version 1.4 of the Cadence IP core
which fixes some silicon bugs that needed software workarounds
in Version 1.0 that was used on Zynq systems.

Signed-off-by: Moritz Fischer 
Cc: Michal Simek 
Cc: Sören Brinkmann 
Cc: U-Boot List 
Cc: Rob Herring 
---

Hi Michal,

I think this is a slip up and should be r1p14 for
Ultrascale ZynqMP. drivers/i2c/i2c-cadence.c already uses this.
I Cc'd the u-boot list, because the same change would be required there.

Cheers,

Moritz

---
 arch/arm64/boot/dts/xilinx/zynqmp.dtsi | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/arch/arm64/boot/dts/xilinx/zynqmp.dtsi 
b/arch/arm64/boot/dts/xilinx/zynqmp.dtsi
index 68a90833..a5a5f91 100644
--- a/arch/arm64/boot/dts/xilinx/zynqmp.dtsi
+++ b/arch/arm64/boot/dts/xilinx/zynqmp.dtsi
@@ -175,7 +175,7 @@
};
 
i2c0: i2c@ff02 {
-   compatible = "cdns,i2c-r1p10";
+   compatible = "cdns,i2c-r1p14";
status = "disabled";
interrupt-parent = <>;
interrupts = <0 17 4>;
@@ -185,7 +185,7 @@
};
 
i2c1: i2c@ff03 {
-   compatible = "cdns,i2c-r1p10";
+   compatible = "cdns,i2c-r1p14";
status = "disabled";
interrupt-parent = <>;
interrupts = <0 18 4>;
-- 
2.4.11



[PATCH] ARM64: zynqmp: Fix i2c node's compatible string

2016-12-21 Thread Moritz Fischer
From: Moritz Fischer 

The Zynq Ultrascale MP uses version 1.4 of the Cadence IP core
which fixes some silicon bugs that needed software workarounds
in Version 1.0 that was used on Zynq systems.

Signed-off-by: Moritz Fischer 
Cc: Michal Simek 
Cc: Sören Brinkmann 
Cc: U-Boot List 
Cc: Rob Herring 
---

Hi Michal,

I think this is a slip up and should be r1p14 for
Ultrascale ZynqMP. drivers/i2c/i2c-cadence.c already uses this.
I Cc'd the u-boot list, because the same change would be required there.

Cheers,

Moritz

---
 arch/arm64/boot/dts/xilinx/zynqmp.dtsi | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/arch/arm64/boot/dts/xilinx/zynqmp.dtsi 
b/arch/arm64/boot/dts/xilinx/zynqmp.dtsi
index 68a90833..a5a5f91 100644
--- a/arch/arm64/boot/dts/xilinx/zynqmp.dtsi
+++ b/arch/arm64/boot/dts/xilinx/zynqmp.dtsi
@@ -175,7 +175,7 @@
};
 
i2c0: i2c@ff02 {
-   compatible = "cdns,i2c-r1p10";
+   compatible = "cdns,i2c-r1p14";
status = "disabled";
interrupt-parent = <>;
interrupts = <0 17 4>;
@@ -185,7 +185,7 @@
};
 
i2c1: i2c@ff03 {
-   compatible = "cdns,i2c-r1p10";
+   compatible = "cdns,i2c-r1p14";
status = "disabled";
interrupt-parent = <>;
interrupts = <0 18 4>;
-- 
2.4.11



Re: [4.10, panic, regression] iscsi: null pointer deref at iscsi_tcp_segment_done+0x20d/0x2e0

2016-12-21 Thread Linus Torvalds
On Wed, Dec 21, 2016 at 9:13 PM, Dave Chinner  wrote:
>
> There may be deeper issues. I just started running scalability tests
> (e.g. 16-way fsmark create tests) and about a minute in I got a
> directory corruption reported - something I hadn't seen in the dev
> cycle at all.

By "in the dev cycle", do you mean your XFS changes, or have you been
tracking the merge cycle at least for some testing?

> I unmounted the fs, mkfs'd it again, ran the
> workload again and about a minute in this fired:
>
> [628867.607417] [ cut here ]
> [628867.608603] WARNING: CPU: 2 PID: 16925 at mm/workingset.c:461 
> shadow_lru_isolate+0x171/0x220

Well, part of the changes during the merge window were the shadow
entry tracking changes that came in through Andrew's tree. Adding
Johannes Weiner to the participants.

> Now, this workload does not touch the page cache at all - it's
> entirely an XFS metadata workload, so it should not really be
> affecting the working set code.

Well, I suspect that anything that creates memory pressure will end up
triggering the working set code, so ..

That said, obviously memory corruption could be involved and result in
random issues too, but I wouldn't really expect that in this code.

It would probably be really useful to get more data points - is the
problem reliably in this area, or is it going to be random and all
over the place.

That said:

> And worse, on that last error, the /host/ is now going into meltdown
> (running 4.7.5) with 32 CPUs all burning down in ACPI code:

The obvious question here is how much you trust the environment if the
host ends up also showing problems. Maybe you do end up having hw
issues pop up too.

The primary suspect would presumably be the development kernel you're
testing triggering something, but it has to be asked..

 Linus


Re: [4.10, panic, regression] iscsi: null pointer deref at iscsi_tcp_segment_done+0x20d/0x2e0

2016-12-21 Thread Linus Torvalds
On Wed, Dec 21, 2016 at 9:13 PM, Dave Chinner  wrote:
>
> There may be deeper issues. I just started running scalability tests
> (e.g. 16-way fsmark create tests) and about a minute in I got a
> directory corruption reported - something I hadn't seen in the dev
> cycle at all.

By "in the dev cycle", do you mean your XFS changes, or have you been
tracking the merge cycle at least for some testing?

> I unmounted the fs, mkfs'd it again, ran the
> workload again and about a minute in this fired:
>
> [628867.607417] [ cut here ]
> [628867.608603] WARNING: CPU: 2 PID: 16925 at mm/workingset.c:461 
> shadow_lru_isolate+0x171/0x220

Well, part of the changes during the merge window were the shadow
entry tracking changes that came in through Andrew's tree. Adding
Johannes Weiner to the participants.

> Now, this workload does not touch the page cache at all - it's
> entirely an XFS metadata workload, so it should not really be
> affecting the working set code.

Well, I suspect that anything that creates memory pressure will end up
triggering the working set code, so ..

That said, obviously memory corruption could be involved and result in
random issues too, but I wouldn't really expect that in this code.

It would probably be really useful to get more data points - is the
problem reliably in this area, or is it going to be random and all
over the place.

That said:

> And worse, on that last error, the /host/ is now going into meltdown
> (running 4.7.5) with 32 CPUs all burning down in ACPI code:

The obvious question here is how much you trust the environment if the
host ends up also showing problems. Maybe you do end up having hw
issues pop up too.

The primary suspect would presumably be the development kernel you're
testing triggering something, but it has to be asked..

 Linus


[PATCH v5 00/14] ACPI platform MSI support and its example mbigen

2016-12-21 Thread Hanjun Guo
From: Hanjun Guo 

v4 -> v5:
- Add mbigen support back with tested on with Agustin's patchset,
  and it's a good example of how ACPI platform MSI works
- rebased on top of lastest Linus tree (commit 52bce91 splice: 
reinstate SIGPIPE/EPIPE handling)

v3 -> v4:
- Drop mbi-gen patches to just submit platform msi support because
  will rebase mbi-gen patches on top of Agustin's patchset, and 
discusion
  is going there.
- Add a patch to support device topology such as NC(named componant, 
paltform device)
  ->SMMU->ITS which suggested by Lorenzo;
- rebased on top of Lorenzo's v9 of ACPI IORT ARM SMMU support;
- rebased on top of 4.9-rc7

v2 -> v3:
- Drop RFC tag
- Rebase against v4.9-rc2 and Lorenzo's v6 of ACPI IORT ARM SMMU 
support [1]
- Add 3 cleanup patches (patch 1, 2, 3)
- Drop arch_init call patch from last version
- Introduce a callback for platform device to set msi domain
- Introduce a new API to get paltform device's domain instead of
  reusing the PCI one in previous version
- Add a patch to rework iort_node_get_id()

[1]: http://www.mail-archive.com/linux-kernel@vger.kernel.org/msg1251993.html

v1 -> v2:
- Fix the bug of if multi Interrupt() resoures in single _PRS,
  we need to calculate all the irq numbers (I missed it in previous
  version);
- Rebased on Marc's irq/irqchip-4.9 branch and Lorenzo's v5
  SMMU patches (also Robin's SMMu patches)
- Add patch irqchip: mbigen: promote mbigen init.

With platform msi support landed in the kernel, and the introduction
of IORT for GICv3 ITS (PCI MSI) and SMMU, the framework for platform msi
is ready, this patch set add few patches to enable the ACPI platform
msi support.

For platform device connecting to ITS on arm platform, we have IORT
table with the named componant node to describe the mappings of paltform
device and ITS, so we can retrieve the dev id and find its parent
irqdomain (ITS) from IORT table (simlar with the ACPI ITS support).

The fisrt 3 patches are cleanups;

Patch 4,5 are refactoring its_pmsi_prepare() for both DT and ACPI
then retrieve the dev id from iort;

Patch 6,7 to create platform msi domain to ACPI case which scanned
the MADT table;

Patch 8,9,10,11 to setup the msi domain for platform device based
on IORT table.

Patch 12,13,14 convert dt based mbigen driver to support ACPI.

Teasted on Hisilicon D03/D05.

Happy holidays!

Thanks
Hanjun

Hanjun Guo (12):
  ACPI: ARM64: IORT: minor cleanup for iort_match_node_callback()
  irqchip: gic-v3-its: keep the head file include in alphabetic order
  ACPI: ARM64: IORT: add missing comment for iort_dev_find_its_id()
  irqchip: gicv3-its: platform-msi: refactor its_pmsi_prepare()
  ACPI: platform-msi: retrieve dev id from IORT
  irqchip: gicv3-its: platform-msi: refactor its_pmsi_init() to prepare
for ACPI
  irqchip: gicv3-its: platform-msi: scan MADT to create platform msi
domain
  ACPI: ARM64: IORT: rework iort_node_get_id()
  ACPI: platform: setup MSI domain for ACPI based platform device
  ACPI: ARM64: IORT: rework iort_node_get_id() for NC->SMMU->ITS case
  msi: platform: make platform_msi_create_device_domain() ACPI aware
  irqchip: mbigen: Add ACPI support

Kefeng Wang (2):
  irqchip: mbigen: drop module owner
  irqchip: mbigen: introduce mbigen_of_create_domain()

 drivers/acpi/acpi_platform.c  |  11 ++
 drivers/acpi/arm64/iort.c | 138 --
 drivers/base/platform-msi.c   |   3 +-
 drivers/base/platform.c   |   3 +
 drivers/irqchip/irq-gic-v3-its-platform-msi.c | 106 +++-
 drivers/irqchip/irq-gic-v3-its.c  |   3 +-
 drivers/irqchip/irq-mbigen.c  | 109 
 include/linux/acpi_iort.h |  11 ++
 include/linux/platform_device.h   |   3 +
 9 files changed, 309 insertions(+), 78 deletions(-)

-- 
1.7.12.4



[PATCH v5 00/14] ACPI platform MSI support and its example mbigen

2016-12-21 Thread Hanjun Guo
From: Hanjun Guo 

v4 -> v5:
- Add mbigen support back with tested on with Agustin's patchset,
  and it's a good example of how ACPI platform MSI works
- rebased on top of lastest Linus tree (commit 52bce91 splice: 
reinstate SIGPIPE/EPIPE handling)

v3 -> v4:
- Drop mbi-gen patches to just submit platform msi support because
  will rebase mbi-gen patches on top of Agustin's patchset, and 
discusion
  is going there.
- Add a patch to support device topology such as NC(named componant, 
paltform device)
  ->SMMU->ITS which suggested by Lorenzo;
- rebased on top of Lorenzo's v9 of ACPI IORT ARM SMMU support;
- rebased on top of 4.9-rc7

v2 -> v3:
- Drop RFC tag
- Rebase against v4.9-rc2 and Lorenzo's v6 of ACPI IORT ARM SMMU 
support [1]
- Add 3 cleanup patches (patch 1, 2, 3)
- Drop arch_init call patch from last version
- Introduce a callback for platform device to set msi domain
- Introduce a new API to get paltform device's domain instead of
  reusing the PCI one in previous version
- Add a patch to rework iort_node_get_id()

[1]: http://www.mail-archive.com/linux-kernel@vger.kernel.org/msg1251993.html

v1 -> v2:
- Fix the bug of if multi Interrupt() resoures in single _PRS,
  we need to calculate all the irq numbers (I missed it in previous
  version);
- Rebased on Marc's irq/irqchip-4.9 branch and Lorenzo's v5
  SMMU patches (also Robin's SMMu patches)
- Add patch irqchip: mbigen: promote mbigen init.

With platform msi support landed in the kernel, and the introduction
of IORT for GICv3 ITS (PCI MSI) and SMMU, the framework for platform msi
is ready, this patch set add few patches to enable the ACPI platform
msi support.

For platform device connecting to ITS on arm platform, we have IORT
table with the named componant node to describe the mappings of paltform
device and ITS, so we can retrieve the dev id and find its parent
irqdomain (ITS) from IORT table (simlar with the ACPI ITS support).

The fisrt 3 patches are cleanups;

Patch 4,5 are refactoring its_pmsi_prepare() for both DT and ACPI
then retrieve the dev id from iort;

Patch 6,7 to create platform msi domain to ACPI case which scanned
the MADT table;

Patch 8,9,10,11 to setup the msi domain for platform device based
on IORT table.

Patch 12,13,14 convert dt based mbigen driver to support ACPI.

Teasted on Hisilicon D03/D05.

Happy holidays!

Thanks
Hanjun

Hanjun Guo (12):
  ACPI: ARM64: IORT: minor cleanup for iort_match_node_callback()
  irqchip: gic-v3-its: keep the head file include in alphabetic order
  ACPI: ARM64: IORT: add missing comment for iort_dev_find_its_id()
  irqchip: gicv3-its: platform-msi: refactor its_pmsi_prepare()
  ACPI: platform-msi: retrieve dev id from IORT
  irqchip: gicv3-its: platform-msi: refactor its_pmsi_init() to prepare
for ACPI
  irqchip: gicv3-its: platform-msi: scan MADT to create platform msi
domain
  ACPI: ARM64: IORT: rework iort_node_get_id()
  ACPI: platform: setup MSI domain for ACPI based platform device
  ACPI: ARM64: IORT: rework iort_node_get_id() for NC->SMMU->ITS case
  msi: platform: make platform_msi_create_device_domain() ACPI aware
  irqchip: mbigen: Add ACPI support

Kefeng Wang (2):
  irqchip: mbigen: drop module owner
  irqchip: mbigen: introduce mbigen_of_create_domain()

 drivers/acpi/acpi_platform.c  |  11 ++
 drivers/acpi/arm64/iort.c | 138 --
 drivers/base/platform-msi.c   |   3 +-
 drivers/base/platform.c   |   3 +
 drivers/irqchip/irq-gic-v3-its-platform-msi.c | 106 +++-
 drivers/irqchip/irq-gic-v3-its.c  |   3 +-
 drivers/irqchip/irq-mbigen.c  | 109 
 include/linux/acpi_iort.h |  11 ++
 include/linux/platform_device.h   |   3 +
 9 files changed, 309 insertions(+), 78 deletions(-)

-- 
1.7.12.4



[PATCH v5 06/14] irqchip: gicv3-its: platform-msi: refactor its_pmsi_init() to prepare for ACPI

2016-12-21 Thread Hanjun Guo
From: Hanjun Guo 

Introduce its_pmsi_init_one() to refactor the code to isolate
ACPI common code to prepare for ACPI later.

Signed-off-by: Hanjun Guo 
Tested-by: Sinan Kaya 
Cc: Marc Zyngier 
Cc: Tomasz Nowicki 
Cc: Thomas Gleixner 
---
 drivers/irqchip/irq-gic-v3-its-platform-msi.c | 45 ---
 1 file changed, 27 insertions(+), 18 deletions(-)

diff --git a/drivers/irqchip/irq-gic-v3-its-platform-msi.c 
b/drivers/irqchip/irq-gic-v3-its-platform-msi.c
index 16587a9..ff72704 100644
--- a/drivers/irqchip/irq-gic-v3-its-platform-msi.c
+++ b/drivers/irqchip/irq-gic-v3-its-platform-msi.c
@@ -84,34 +84,43 @@ static int its_pmsi_prepare(struct irq_domain *domain, 
struct device *dev,
{},
 };
 
-static int __init its_pmsi_init(void)
+static int __init its_pmsi_init_one(struct fwnode_handle *fwnode,
+   const char *name)
 {
-   struct device_node *np;
struct irq_domain *parent;
 
+   parent = irq_find_matching_fwnode(fwnode, DOMAIN_BUS_NEXUS);
+   if (!parent || !msi_get_domain_info(parent)) {
+   pr_err("%s: unable to locate ITS domain\n", name);
+   return -ENXIO;
+   }
+
+   if (!platform_msi_create_irq_domain(fwnode, _pmsi_domain_info,
+   parent)) {
+   pr_err("%s: unable to create platform domain\n", name);
+   return -ENXIO;
+   }
+
+   pr_info("Platform MSI: %s domain created\n", name);
+   return 0;
+}
+
+static void __init its_pmsi_of_init(void)
+{
+   struct device_node *np;
+
for (np = of_find_matching_node(NULL, its_device_id); np;
 np = of_find_matching_node(np, its_device_id)) {
if (!of_property_read_bool(np, "msi-controller"))
continue;
 
-   parent = irq_find_matching_host(np, DOMAIN_BUS_NEXUS);
-   if (!parent || !msi_get_domain_info(parent)) {
-   pr_err("%s: unable to locate ITS domain\n",
-  np->full_name);
-   continue;
-   }
-
-   if (!platform_msi_create_irq_domain(of_node_to_fwnode(np),
-   _pmsi_domain_info,
-   parent)) {
-   pr_err("%s: unable to create platform domain\n",
-  np->full_name);
-   continue;
-   }
-
-   pr_info("Platform MSI: %s domain created\n", np->full_name);
+   its_pmsi_init_one(of_node_to_fwnode(np), np->full_name);
}
+}
 
+static int __init its_pmsi_init(void)
+{
+   its_pmsi_of_init();
return 0;
 }
 early_initcall(its_pmsi_init);
-- 
1.7.12.4



[PATCH v5 09/14] ACPI: platform: setup MSI domain for ACPI based platform device

2016-12-21 Thread Hanjun Guo
From: Hanjun Guo 

With the platform msi domain created, we can set up the msi domain
for a platform device when it's probed.

In order to do that, we need to get the domain that the platform
device connecting to, so the iort_get_platform_device_domain() is
introduced to retrieve the domain from iort.

After the domain is retrieved, we need a proper way to set the
domain to paltform device, as some platform devices such as an
irqchip needs the msi irqdomain to be the interrupt parent domain,
we need to get irqdomain before platform device is probed but after
the platform device is allocated, so introduce a callback (pre_add_cb)
in pdevinfo to prepare firmware related information which is needed
for device probe, then set the msi domain in that callback.

Signed-off-by: Hanjun Guo 
Cc: Marc Zyngier 
Cc: Rafael J. Wysocki 
Cc: Greg KH 
Cc: Lorenzo Pieralisi 
Cc: Thomas Gleixner 
---
 drivers/acpi/acpi_platform.c| 11 +++
 drivers/acpi/arm64/iort.c   | 43 +
 drivers/base/platform.c |  3 +++
 include/linux/acpi_iort.h   |  3 +++
 include/linux/platform_device.h |  3 +++
 5 files changed, 63 insertions(+)

diff --git a/drivers/acpi/acpi_platform.c b/drivers/acpi/acpi_platform.c
index b4c1a6a..5d8d61b4 100644
--- a/drivers/acpi/acpi_platform.c
+++ b/drivers/acpi/acpi_platform.c
@@ -12,6 +12,7 @@
  */
 
 #include 
+#include 
 #include 
 #include 
 #include 
@@ -48,6 +49,15 @@ static void acpi_platform_fill_resource(struct acpi_device 
*adev,
 }
 
 /**
+ * acpi_platform_pre_add_cb - callback before platform device is added, to
+ * prepare firmware related information which is needed for device probe
+ */
+static void acpi_platform_pre_add_cb(struct device *dev)
+{
+   acpi_configure_pmsi_domain(dev);
+}
+
+/**
  * acpi_create_platform_device - Create platform device for ACPI device node
  * @adev: ACPI device node to create a platform device for.
  * @properties: Optional collection of build-in properties.
@@ -109,6 +119,7 @@ struct platform_device *acpi_create_platform_device(struct 
acpi_device *adev,
pdevinfo.num_res = count;
pdevinfo.fwnode = acpi_fwnode_handle(adev);
pdevinfo.properties = properties;
+   pdevinfo.pre_add_cb = acpi_platform_pre_add_cb;
 
if (acpi_dma_supported(adev))
pdevinfo.dma_mask = DMA_BIT_MASK(32);
diff --git a/drivers/acpi/arm64/iort.c b/drivers/acpi/arm64/iort.c
index bc68d93..6b72fcb 100644
--- a/drivers/acpi/arm64/iort.c
+++ b/drivers/acpi/arm64/iort.c
@@ -527,6 +527,49 @@ struct irq_domain *iort_get_device_domain(struct device 
*dev, u32 req_id)
return irq_find_matching_fwnode(handle, DOMAIN_BUS_PCI_MSI);
 }
 
+/**
+ * iort_get_platform_device_domain() - Find MSI domain related to a
+ * platform device
+ * @dev: the dev pointer associated with the platform device
+ *
+ * Returns: the MSI domain for this device, NULL otherwise
+ */
+static struct irq_domain *iort_get_platform_device_domain(struct device *dev)
+{
+   struct acpi_iort_node *node, *msi_parent;
+   struct fwnode_handle *iort_fwnode;
+   struct acpi_iort_its_group *its;
+
+   /* find its associated iort node */
+   node = iort_scan_node(ACPI_IORT_NODE_NAMED_COMPONENT,
+ iort_match_node_callback, dev);
+   if (!node)
+   return NULL;
+
+   /* then find its msi parent node */
+   msi_parent = iort_node_get_id(node, NULL, IORT_MSI_TYPE, 0);
+   if (!msi_parent)
+   return NULL;
+
+   /* Move to ITS specific data */
+   its = (struct acpi_iort_its_group *)msi_parent->node_data;
+
+   iort_fwnode = iort_find_domain_token(its->identifiers[0]);
+   if (!iort_fwnode)
+   return NULL;
+
+   return irq_find_matching_fwnode(iort_fwnode, DOMAIN_BUS_PLATFORM_MSI);
+}
+
+void acpi_configure_pmsi_domain(struct device *dev)
+{
+   struct irq_domain *msi_domain;
+
+   msi_domain = iort_get_platform_device_domain(dev);
+   if (msi_domain)
+   dev_set_msi_domain(dev, msi_domain);
+}
+
 static int __get_pci_rid(struct pci_dev *pdev, u16 alias, void *data)
 {
u32 *rid = data;
diff --git a/drivers/base/platform.c b/drivers/base/platform.c
index c4af003..3e68f31 100644
--- a/drivers/base/platform.c
+++ b/drivers/base/platform.c
@@ -537,6 +537,9 @@ struct platform_device *platform_device_register_full(
goto err;
}
 
+   if (pdevinfo->pre_add_cb)
+   pdevinfo->pre_add_cb(>dev);
+
ret = platform_device_add(pdev);
if (ret) {
 err:
diff --git a/include/linux/acpi_iort.h b/include/linux/acpi_iort.h
index ef99fd52..33f5ac3 100644
--- a/include/linux/acpi_iort.h
+++ b/include/linux/acpi_iort.h
@@ -38,6 +38,7 @@
 /* IOMMU interface */
 void 

[PATCH v5 09/14] ACPI: platform: setup MSI domain for ACPI based platform device

2016-12-21 Thread Hanjun Guo
From: Hanjun Guo 

With the platform msi domain created, we can set up the msi domain
for a platform device when it's probed.

In order to do that, we need to get the domain that the platform
device connecting to, so the iort_get_platform_device_domain() is
introduced to retrieve the domain from iort.

After the domain is retrieved, we need a proper way to set the
domain to paltform device, as some platform devices such as an
irqchip needs the msi irqdomain to be the interrupt parent domain,
we need to get irqdomain before platform device is probed but after
the platform device is allocated, so introduce a callback (pre_add_cb)
in pdevinfo to prepare firmware related information which is needed
for device probe, then set the msi domain in that callback.

Signed-off-by: Hanjun Guo 
Cc: Marc Zyngier 
Cc: Rafael J. Wysocki 
Cc: Greg KH 
Cc: Lorenzo Pieralisi 
Cc: Thomas Gleixner 
---
 drivers/acpi/acpi_platform.c| 11 +++
 drivers/acpi/arm64/iort.c   | 43 +
 drivers/base/platform.c |  3 +++
 include/linux/acpi_iort.h   |  3 +++
 include/linux/platform_device.h |  3 +++
 5 files changed, 63 insertions(+)

diff --git a/drivers/acpi/acpi_platform.c b/drivers/acpi/acpi_platform.c
index b4c1a6a..5d8d61b4 100644
--- a/drivers/acpi/acpi_platform.c
+++ b/drivers/acpi/acpi_platform.c
@@ -12,6 +12,7 @@
  */
 
 #include 
+#include 
 #include 
 #include 
 #include 
@@ -48,6 +49,15 @@ static void acpi_platform_fill_resource(struct acpi_device 
*adev,
 }
 
 /**
+ * acpi_platform_pre_add_cb - callback before platform device is added, to
+ * prepare firmware related information which is needed for device probe
+ */
+static void acpi_platform_pre_add_cb(struct device *dev)
+{
+   acpi_configure_pmsi_domain(dev);
+}
+
+/**
  * acpi_create_platform_device - Create platform device for ACPI device node
  * @adev: ACPI device node to create a platform device for.
  * @properties: Optional collection of build-in properties.
@@ -109,6 +119,7 @@ struct platform_device *acpi_create_platform_device(struct 
acpi_device *adev,
pdevinfo.num_res = count;
pdevinfo.fwnode = acpi_fwnode_handle(adev);
pdevinfo.properties = properties;
+   pdevinfo.pre_add_cb = acpi_platform_pre_add_cb;
 
if (acpi_dma_supported(adev))
pdevinfo.dma_mask = DMA_BIT_MASK(32);
diff --git a/drivers/acpi/arm64/iort.c b/drivers/acpi/arm64/iort.c
index bc68d93..6b72fcb 100644
--- a/drivers/acpi/arm64/iort.c
+++ b/drivers/acpi/arm64/iort.c
@@ -527,6 +527,49 @@ struct irq_domain *iort_get_device_domain(struct device 
*dev, u32 req_id)
return irq_find_matching_fwnode(handle, DOMAIN_BUS_PCI_MSI);
 }
 
+/**
+ * iort_get_platform_device_domain() - Find MSI domain related to a
+ * platform device
+ * @dev: the dev pointer associated with the platform device
+ *
+ * Returns: the MSI domain for this device, NULL otherwise
+ */
+static struct irq_domain *iort_get_platform_device_domain(struct device *dev)
+{
+   struct acpi_iort_node *node, *msi_parent;
+   struct fwnode_handle *iort_fwnode;
+   struct acpi_iort_its_group *its;
+
+   /* find its associated iort node */
+   node = iort_scan_node(ACPI_IORT_NODE_NAMED_COMPONENT,
+ iort_match_node_callback, dev);
+   if (!node)
+   return NULL;
+
+   /* then find its msi parent node */
+   msi_parent = iort_node_get_id(node, NULL, IORT_MSI_TYPE, 0);
+   if (!msi_parent)
+   return NULL;
+
+   /* Move to ITS specific data */
+   its = (struct acpi_iort_its_group *)msi_parent->node_data;
+
+   iort_fwnode = iort_find_domain_token(its->identifiers[0]);
+   if (!iort_fwnode)
+   return NULL;
+
+   return irq_find_matching_fwnode(iort_fwnode, DOMAIN_BUS_PLATFORM_MSI);
+}
+
+void acpi_configure_pmsi_domain(struct device *dev)
+{
+   struct irq_domain *msi_domain;
+
+   msi_domain = iort_get_platform_device_domain(dev);
+   if (msi_domain)
+   dev_set_msi_domain(dev, msi_domain);
+}
+
 static int __get_pci_rid(struct pci_dev *pdev, u16 alias, void *data)
 {
u32 *rid = data;
diff --git a/drivers/base/platform.c b/drivers/base/platform.c
index c4af003..3e68f31 100644
--- a/drivers/base/platform.c
+++ b/drivers/base/platform.c
@@ -537,6 +537,9 @@ struct platform_device *platform_device_register_full(
goto err;
}
 
+   if (pdevinfo->pre_add_cb)
+   pdevinfo->pre_add_cb(>dev);
+
ret = platform_device_add(pdev);
if (ret) {
 err:
diff --git a/include/linux/acpi_iort.h b/include/linux/acpi_iort.h
index ef99fd52..33f5ac3 100644
--- a/include/linux/acpi_iort.h
+++ b/include/linux/acpi_iort.h
@@ -38,6 +38,7 @@
 /* IOMMU interface */
 void iort_set_dma_mask(struct device *dev);
 const struct iommu_ops *iort_iommu_configure(struct device *dev);
+void acpi_configure_pmsi_domain(struct device *dev);
 #else
 static 

[PATCH v5 06/14] irqchip: gicv3-its: platform-msi: refactor its_pmsi_init() to prepare for ACPI

2016-12-21 Thread Hanjun Guo
From: Hanjun Guo 

Introduce its_pmsi_init_one() to refactor the code to isolate
ACPI common code to prepare for ACPI later.

Signed-off-by: Hanjun Guo 
Tested-by: Sinan Kaya 
Cc: Marc Zyngier 
Cc: Tomasz Nowicki 
Cc: Thomas Gleixner 
---
 drivers/irqchip/irq-gic-v3-its-platform-msi.c | 45 ---
 1 file changed, 27 insertions(+), 18 deletions(-)

diff --git a/drivers/irqchip/irq-gic-v3-its-platform-msi.c 
b/drivers/irqchip/irq-gic-v3-its-platform-msi.c
index 16587a9..ff72704 100644
--- a/drivers/irqchip/irq-gic-v3-its-platform-msi.c
+++ b/drivers/irqchip/irq-gic-v3-its-platform-msi.c
@@ -84,34 +84,43 @@ static int its_pmsi_prepare(struct irq_domain *domain, 
struct device *dev,
{},
 };
 
-static int __init its_pmsi_init(void)
+static int __init its_pmsi_init_one(struct fwnode_handle *fwnode,
+   const char *name)
 {
-   struct device_node *np;
struct irq_domain *parent;
 
+   parent = irq_find_matching_fwnode(fwnode, DOMAIN_BUS_NEXUS);
+   if (!parent || !msi_get_domain_info(parent)) {
+   pr_err("%s: unable to locate ITS domain\n", name);
+   return -ENXIO;
+   }
+
+   if (!platform_msi_create_irq_domain(fwnode, _pmsi_domain_info,
+   parent)) {
+   pr_err("%s: unable to create platform domain\n", name);
+   return -ENXIO;
+   }
+
+   pr_info("Platform MSI: %s domain created\n", name);
+   return 0;
+}
+
+static void __init its_pmsi_of_init(void)
+{
+   struct device_node *np;
+
for (np = of_find_matching_node(NULL, its_device_id); np;
 np = of_find_matching_node(np, its_device_id)) {
if (!of_property_read_bool(np, "msi-controller"))
continue;
 
-   parent = irq_find_matching_host(np, DOMAIN_BUS_NEXUS);
-   if (!parent || !msi_get_domain_info(parent)) {
-   pr_err("%s: unable to locate ITS domain\n",
-  np->full_name);
-   continue;
-   }
-
-   if (!platform_msi_create_irq_domain(of_node_to_fwnode(np),
-   _pmsi_domain_info,
-   parent)) {
-   pr_err("%s: unable to create platform domain\n",
-  np->full_name);
-   continue;
-   }
-
-   pr_info("Platform MSI: %s domain created\n", np->full_name);
+   its_pmsi_init_one(of_node_to_fwnode(np), np->full_name);
}
+}
 
+static int __init its_pmsi_init(void)
+{
+   its_pmsi_of_init();
return 0;
 }
 early_initcall(its_pmsi_init);
-- 
1.7.12.4



[PATCH v5 11/14] msi: platform: make platform_msi_create_device_domain() ACPI aware

2016-12-21 Thread Hanjun Guo
From: Hanjun Guo 

With the platform msi domain created for ITS, irqchip such as
mbi-gen connecting ITS, which needs ctreate its own irqdomain.

Fortunately with the platform msi support upstreamed by Marc,
we just need to add minor code to make it run properly.

platform_msi_create_device_domain() is almost ready for ACPI use
except of_node_to_fwnode() is for dt only, make it ACPI aware then
things will work in both DTS and ACPI.

Signed-off-by: Hanjun Guo 
Cc: Marc Zyngier 
Cc: Greg KH 
Cc: Thomas Gleixner 
Cc: Greg KH 
---
 drivers/base/platform-msi.c | 3 +--
 1 file changed, 1 insertion(+), 2 deletions(-)

diff --git a/drivers/base/platform-msi.c b/drivers/base/platform-msi.c
index be6a599..035ca3b 100644
--- a/drivers/base/platform-msi.c
+++ b/drivers/base/platform-msi.c
@@ -345,8 +345,7 @@ struct irq_domain *
 
data->host_data = host_data;
domain = irq_domain_create_hierarchy(dev->msi_domain, 0, nvec,
-of_node_to_fwnode(dev->of_node),
-ops, data);
+dev->fwnode, ops, data);
if (!domain)
goto free_priv;
 
-- 
1.7.12.4



[PATCH v5 11/14] msi: platform: make platform_msi_create_device_domain() ACPI aware

2016-12-21 Thread Hanjun Guo
From: Hanjun Guo 

With the platform msi domain created for ITS, irqchip such as
mbi-gen connecting ITS, which needs ctreate its own irqdomain.

Fortunately with the platform msi support upstreamed by Marc,
we just need to add minor code to make it run properly.

platform_msi_create_device_domain() is almost ready for ACPI use
except of_node_to_fwnode() is for dt only, make it ACPI aware then
things will work in both DTS and ACPI.

Signed-off-by: Hanjun Guo 
Cc: Marc Zyngier 
Cc: Greg KH 
Cc: Thomas Gleixner 
Cc: Greg KH 
---
 drivers/base/platform-msi.c | 3 +--
 1 file changed, 1 insertion(+), 2 deletions(-)

diff --git a/drivers/base/platform-msi.c b/drivers/base/platform-msi.c
index be6a599..035ca3b 100644
--- a/drivers/base/platform-msi.c
+++ b/drivers/base/platform-msi.c
@@ -345,8 +345,7 @@ struct irq_domain *
 
data->host_data = host_data;
domain = irq_domain_create_hierarchy(dev->msi_domain, 0, nvec,
-of_node_to_fwnode(dev->of_node),
-ops, data);
+dev->fwnode, ops, data);
if (!domain)
goto free_priv;
 
-- 
1.7.12.4



[PATCH v5 12/14] irqchip: mbigen: drop module owner

2016-12-21 Thread Hanjun Guo
From: Kefeng Wang 

Module owner will be set by driver core, so drop it.

Signed-off-by: Kefeng Wang 
Signed-off-by: Hanjun Guo 
Cc: Marc Zyngier 
Cc: Thomas Gleixner 
Cc: Ma Jun 
---
 drivers/irqchip/irq-mbigen.c | 1 -
 1 file changed, 1 deletion(-)

diff --git a/drivers/irqchip/irq-mbigen.c b/drivers/irqchip/irq-mbigen.c
index 03b79b0..c01ab41 100644
--- a/drivers/irqchip/irq-mbigen.c
+++ b/drivers/irqchip/irq-mbigen.c
@@ -293,7 +293,6 @@ static int mbigen_device_probe(struct platform_device *pdev)
 static struct platform_driver mbigen_platform_driver = {
.driver = {
.name   = "Hisilicon MBIGEN-V2",
-   .owner  = THIS_MODULE,
.of_match_table = mbigen_of_match,
},
.probe  = mbigen_device_probe,
-- 
1.7.12.4



[PATCH v5 10/14] ACPI: ARM64: IORT: rework iort_node_get_id() for NC->SMMU->ITS case

2016-12-21 Thread Hanjun Guo
From: Hanjun Guo 

iort_node_get_id() for now only support NC(named componant)->SMMU
or NC->ITS cases, we also have other device topology such NC->
SMMU->ITS, so rework iort_node_get_id() for those cases.

Signed-off-by: Hanjun Guo 
Cc: Lorenzo Pieralisi 
---
 drivers/acpi/arm64/iort.c | 59 ++-
 1 file changed, 33 insertions(+), 26 deletions(-)

diff --git a/drivers/acpi/arm64/iort.c b/drivers/acpi/arm64/iort.c
index 6b72fcb..9b3f268 100644
--- a/drivers/acpi/arm64/iort.c
+++ b/drivers/acpi/arm64/iort.c
@@ -292,22 +292,28 @@ static acpi_status iort_match_node_callback(struct 
acpi_iort_node *node,
return status;
 }
 
-static int iort_id_map(struct acpi_iort_id_mapping *map, u8 type, u32 rid_in,
-  u32 *rid_out)
+static int iort_id_single_map(struct acpi_iort_id_mapping *map, u8 type,
+ u32 *rid_out)
 {
/* Single mapping does not care for input id */
if (map->flags & ACPI_IORT_ID_SINGLE_MAPPING) {
if (type == ACPI_IORT_NODE_NAMED_COMPONENT ||
type == ACPI_IORT_NODE_PCI_ROOT_COMPLEX) {
-   *rid_out = map->output_base;
+   if (rid_out)
+   *rid_out = map->output_base;
return 0;
}
 
pr_warn(FW_BUG "[map %p] SINGLE MAPPING flag not allowed for 
node type %d, skipping ID map\n",
map, type);
-   return -ENXIO;
}
 
+   return -ENXIO;
+}
+
+static int iort_id_map(struct acpi_iort_id_mapping *map, u8 type, u32 rid_in,
+  u32 *rid_out)
+{
if (rid_in < map->input_base ||
(rid_in >= map->input_base + map->id_count))
return -ENXIO;
@@ -324,33 +330,34 @@ struct acpi_iort_node *iort_node_get_id(struct 
acpi_iort_node *node,
struct acpi_iort_node *parent;
struct acpi_iort_id_mapping *map;
 
-   if (!node->mapping_offset || !node->mapping_count ||
-index >= node->mapping_count)
-   return NULL;
-
-   map = ACPI_ADD_PTR(struct acpi_iort_id_mapping, node,
-  node->mapping_offset);
+   while (node) {
+   if (!node->mapping_offset || !node->mapping_count ||
+index >= node->mapping_count)
+   return NULL;
 
-   /* Firmware bug! */
-   if (!map->output_reference) {
-   pr_err(FW_BUG "[node %p type %d] ID map has NULL parent 
reference\n",
-  node, node->type);
-   return NULL;
-   }
+   map = ACPI_ADD_PTR(struct acpi_iort_id_mapping, node,
+  node->mapping_offset);
 
-   parent = ACPI_ADD_PTR(struct acpi_iort_node, iort_table,
-  map->output_reference);
+   /* Firmware bug! */
+   if (!map->output_reference) {
+   pr_err(FW_BUG "[node %p type %d] ID map has NULL parent 
reference\n",
+  node, node->type);
+   return NULL;
+   }
 
-   if (!(IORT_TYPE_MASK(parent->type) & type_mask))
-   return NULL;
+   parent = ACPI_ADD_PTR(struct acpi_iort_node, iort_table,
+ map->output_reference);
 
-   if (map[index].flags & ACPI_IORT_ID_SINGLE_MAPPING) {
-   if (node->type == ACPI_IORT_NODE_NAMED_COMPONENT ||
-   node->type == ACPI_IORT_NODE_PCI_ROOT_COMPLEX) {
-   if (id_out)
-   *id_out = map[index].output_base;
-   return parent;
+   /* go upstream to find its parent */
+   if (!(IORT_TYPE_MASK(parent->type) & type_mask)) {
+   node = parent;
+   continue;
}
+
+   if (iort_id_single_map([index], node->type, id_out))
+   break;
+
+   return parent;
}
 
return NULL;
-- 
1.7.12.4



Re: George's crazy full state idea (Re: HalfSipHash Acceptable Usage)

2016-12-21 Thread Andy Lutomirski
On Wed, Dec 21, 2016 at 9:01 PM, George Spelvin
 wrote:
> Andy Lutomirski wrote:
>> I don't even think it needs that.  This is just adding a
>> non-destructive final operation, right?
>
> It is, but the problem is that SipHash is intended for *small* inputs,
> so the standard implementations aren't broken into init/update/final
> functions.
>
> There's just one big function that keeps the state variables in
> registers and never stores them anywhere.
>
> If we *had* init/update/final functions, then it would be trivial.
>
>> Just to clarify, if we replace SipHash with a black box, I think this
>> effectively means, where "entropy" is random_get_entropy() || jiffies
>> || current->pid:
>
>> The first call returns H(random seed || entropy_0 || secret).  The
>> second call returns H(random seed || entropy_0 || secret || entropy_1
>> || secret).  Etc.
>
> Basically, yes.  I was skipping the padding byte and keying the
> finalization rounds on the grounds of "can't hurt and might help",
> but we could do it a more standard way.
>
>> If not, then I have a fairly strong preference to keep whatever
>> construction we come up with consistent with something that could
>> actually happen with invocations of unmodified SipHash -- then all the
>> security analysis on SipHash goes through.
>
> Okay.  I don't think it makes a difference, but it's not a *big* waste
> of time.  If we have finalization rounds, we can reduce the secret
> to 128 bits.
>
> If we include the padding byte, we can do one of two things:
> 1) Make the secret 184 bits, to fill up the final partial word as
>much as possible, or
> 2) Make the entropy 1 byte smaller and conceptually misalign the
>secret.  What we'd actually do is remove the last byte of
>the secret and include it in the entropy words, but that's
>just a rotation of the secret between storage and hashing.
>
> Also, I assume you'd like SipHash-2-4, since you want to rely
> on a security analysis.

I haven't looked, but I assume that the analysis at least thought
about reduced rounds, so maybe other variants are okay.

>> The one thing I don't like is
>> that I don't see how to prove that you can't run it backwards if you
>> manage to acquire a memory dump.  In fact, I that that there exist, at
>> least in theory, hash functions that are secure in the random oracle
>> model but that *can* be run backwards given the full state.  From
>> memory, SHA-3 has exactly that property, and it would be a bit sad for
>> a CSPRNG to be reversible.
>
> Er...  get_random_int() is specifically *not* designed to be resistant
> to state capture, and I didn't try.  Remember, what it's used for
> is ASLR, what we're worried about is somene learning the layouts
> of still-running processes, and and if you get a memory dump, you have
> the memory layout!

True, but it's called get_random_int(), and it seems like making it
stronger, especially if the performance cost is low to zero, is a good
thing.

>
> If you want anti-backtracking, though, it's easy to add.  What we
> hash is:
>
> entropy_0 || secret || output_0 || entropy_1 || secret || output_1 || ...
>
> You mix the output word right back in to the (unfinalized) state after
> generating it.  This is still equivalent to unmodified back-box SipHash,
> you're just using a (conceptually independent) SipHash invocation to
> produce some of its input.

Ah, cute.  This could probably be sped up by doing something like:

entropy_0 || secret || output_0 ^ entropy_1 || secret || ...

It's a little weak because the output is only 64 bits, so you could
plausibly backtrack it on a GPU or FPGA cluster or on an ASIC if the
old entropy is guessable.  I suspect there are sneaky ways around it
like using output_n-1 ^ output_n-2 or similar.  I'll sleep on it.

>
> The only remaining issues are:
> 1) How many rounds, and
> 2) May we use HalfSipHash?

I haven't looked closely enough to have a real opinion here.  I don't
know what the security margin is believed to be.

>
> I'd *like* to persuade you that skipping the padding byte wouldn't
> invalidate any security proofs, because it's true and would simplify
> the code.  But if you want 100% stock, I'm willing to cater to that.

I lean toward stock in the absence of a particularly good reason.  At
the very least I'd want to read that paper carefully.

>
> Ted, what do you think?



-- 
Andy Lutomirski
AMA Capital Management, LLC


[PATCH v5 08/14] ACPI: ARM64: IORT: rework iort_node_get_id()

2016-12-21 Thread Hanjun Guo
From: Hanjun Guo 

iort_node_get_id() has two output, one is the mapped ids,
the other is the referenced parent node which is returned
from the function.

For now we need a API just return its parent node for
single mapping, so just update this function slightly then
reuse it later.

Signed-off-by: Hanjun Guo 
Cc: Lorenzo Pieralisi 
Cc: Marc Zyngier 
---
 drivers/acpi/arm64/iort.c | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

diff --git a/drivers/acpi/arm64/iort.c b/drivers/acpi/arm64/iort.c
index ab7bae7..bc68d93 100644
--- a/drivers/acpi/arm64/iort.c
+++ b/drivers/acpi/arm64/iort.c
@@ -347,7 +347,8 @@ struct acpi_iort_node *iort_node_get_id(struct 
acpi_iort_node *node,
if (map[index].flags & ACPI_IORT_ID_SINGLE_MAPPING) {
if (node->type == ACPI_IORT_NODE_NAMED_COMPONENT ||
node->type == ACPI_IORT_NODE_PCI_ROOT_COMPLEX) {
-   *id_out = map[index].output_base;
+   if (id_out)
+   *id_out = map[index].output_base;
return parent;
}
}
-- 
1.7.12.4



[PATCH v5 12/14] irqchip: mbigen: drop module owner

2016-12-21 Thread Hanjun Guo
From: Kefeng Wang 

Module owner will be set by driver core, so drop it.

Signed-off-by: Kefeng Wang 
Signed-off-by: Hanjun Guo 
Cc: Marc Zyngier 
Cc: Thomas Gleixner 
Cc: Ma Jun 
---
 drivers/irqchip/irq-mbigen.c | 1 -
 1 file changed, 1 deletion(-)

diff --git a/drivers/irqchip/irq-mbigen.c b/drivers/irqchip/irq-mbigen.c
index 03b79b0..c01ab41 100644
--- a/drivers/irqchip/irq-mbigen.c
+++ b/drivers/irqchip/irq-mbigen.c
@@ -293,7 +293,6 @@ static int mbigen_device_probe(struct platform_device *pdev)
 static struct platform_driver mbigen_platform_driver = {
.driver = {
.name   = "Hisilicon MBIGEN-V2",
-   .owner  = THIS_MODULE,
.of_match_table = mbigen_of_match,
},
.probe  = mbigen_device_probe,
-- 
1.7.12.4



[PATCH v5 10/14] ACPI: ARM64: IORT: rework iort_node_get_id() for NC->SMMU->ITS case

2016-12-21 Thread Hanjun Guo
From: Hanjun Guo 

iort_node_get_id() for now only support NC(named componant)->SMMU
or NC->ITS cases, we also have other device topology such NC->
SMMU->ITS, so rework iort_node_get_id() for those cases.

Signed-off-by: Hanjun Guo 
Cc: Lorenzo Pieralisi 
---
 drivers/acpi/arm64/iort.c | 59 ++-
 1 file changed, 33 insertions(+), 26 deletions(-)

diff --git a/drivers/acpi/arm64/iort.c b/drivers/acpi/arm64/iort.c
index 6b72fcb..9b3f268 100644
--- a/drivers/acpi/arm64/iort.c
+++ b/drivers/acpi/arm64/iort.c
@@ -292,22 +292,28 @@ static acpi_status iort_match_node_callback(struct 
acpi_iort_node *node,
return status;
 }
 
-static int iort_id_map(struct acpi_iort_id_mapping *map, u8 type, u32 rid_in,
-  u32 *rid_out)
+static int iort_id_single_map(struct acpi_iort_id_mapping *map, u8 type,
+ u32 *rid_out)
 {
/* Single mapping does not care for input id */
if (map->flags & ACPI_IORT_ID_SINGLE_MAPPING) {
if (type == ACPI_IORT_NODE_NAMED_COMPONENT ||
type == ACPI_IORT_NODE_PCI_ROOT_COMPLEX) {
-   *rid_out = map->output_base;
+   if (rid_out)
+   *rid_out = map->output_base;
return 0;
}
 
pr_warn(FW_BUG "[map %p] SINGLE MAPPING flag not allowed for 
node type %d, skipping ID map\n",
map, type);
-   return -ENXIO;
}
 
+   return -ENXIO;
+}
+
+static int iort_id_map(struct acpi_iort_id_mapping *map, u8 type, u32 rid_in,
+  u32 *rid_out)
+{
if (rid_in < map->input_base ||
(rid_in >= map->input_base + map->id_count))
return -ENXIO;
@@ -324,33 +330,34 @@ struct acpi_iort_node *iort_node_get_id(struct 
acpi_iort_node *node,
struct acpi_iort_node *parent;
struct acpi_iort_id_mapping *map;
 
-   if (!node->mapping_offset || !node->mapping_count ||
-index >= node->mapping_count)
-   return NULL;
-
-   map = ACPI_ADD_PTR(struct acpi_iort_id_mapping, node,
-  node->mapping_offset);
+   while (node) {
+   if (!node->mapping_offset || !node->mapping_count ||
+index >= node->mapping_count)
+   return NULL;
 
-   /* Firmware bug! */
-   if (!map->output_reference) {
-   pr_err(FW_BUG "[node %p type %d] ID map has NULL parent 
reference\n",
-  node, node->type);
-   return NULL;
-   }
+   map = ACPI_ADD_PTR(struct acpi_iort_id_mapping, node,
+  node->mapping_offset);
 
-   parent = ACPI_ADD_PTR(struct acpi_iort_node, iort_table,
-  map->output_reference);
+   /* Firmware bug! */
+   if (!map->output_reference) {
+   pr_err(FW_BUG "[node %p type %d] ID map has NULL parent 
reference\n",
+  node, node->type);
+   return NULL;
+   }
 
-   if (!(IORT_TYPE_MASK(parent->type) & type_mask))
-   return NULL;
+   parent = ACPI_ADD_PTR(struct acpi_iort_node, iort_table,
+ map->output_reference);
 
-   if (map[index].flags & ACPI_IORT_ID_SINGLE_MAPPING) {
-   if (node->type == ACPI_IORT_NODE_NAMED_COMPONENT ||
-   node->type == ACPI_IORT_NODE_PCI_ROOT_COMPLEX) {
-   if (id_out)
-   *id_out = map[index].output_base;
-   return parent;
+   /* go upstream to find its parent */
+   if (!(IORT_TYPE_MASK(parent->type) & type_mask)) {
+   node = parent;
+   continue;
}
+
+   if (iort_id_single_map([index], node->type, id_out))
+   break;
+
+   return parent;
}
 
return NULL;
-- 
1.7.12.4



Re: George's crazy full state idea (Re: HalfSipHash Acceptable Usage)

2016-12-21 Thread Andy Lutomirski
On Wed, Dec 21, 2016 at 9:01 PM, George Spelvin
 wrote:
> Andy Lutomirski wrote:
>> I don't even think it needs that.  This is just adding a
>> non-destructive final operation, right?
>
> It is, but the problem is that SipHash is intended for *small* inputs,
> so the standard implementations aren't broken into init/update/final
> functions.
>
> There's just one big function that keeps the state variables in
> registers and never stores them anywhere.
>
> If we *had* init/update/final functions, then it would be trivial.
>
>> Just to clarify, if we replace SipHash with a black box, I think this
>> effectively means, where "entropy" is random_get_entropy() || jiffies
>> || current->pid:
>
>> The first call returns H(random seed || entropy_0 || secret).  The
>> second call returns H(random seed || entropy_0 || secret || entropy_1
>> || secret).  Etc.
>
> Basically, yes.  I was skipping the padding byte and keying the
> finalization rounds on the grounds of "can't hurt and might help",
> but we could do it a more standard way.
>
>> If not, then I have a fairly strong preference to keep whatever
>> construction we come up with consistent with something that could
>> actually happen with invocations of unmodified SipHash -- then all the
>> security analysis on SipHash goes through.
>
> Okay.  I don't think it makes a difference, but it's not a *big* waste
> of time.  If we have finalization rounds, we can reduce the secret
> to 128 bits.
>
> If we include the padding byte, we can do one of two things:
> 1) Make the secret 184 bits, to fill up the final partial word as
>much as possible, or
> 2) Make the entropy 1 byte smaller and conceptually misalign the
>secret.  What we'd actually do is remove the last byte of
>the secret and include it in the entropy words, but that's
>just a rotation of the secret between storage and hashing.
>
> Also, I assume you'd like SipHash-2-4, since you want to rely
> on a security analysis.

I haven't looked, but I assume that the analysis at least thought
about reduced rounds, so maybe other variants are okay.

>> The one thing I don't like is
>> that I don't see how to prove that you can't run it backwards if you
>> manage to acquire a memory dump.  In fact, I that that there exist, at
>> least in theory, hash functions that are secure in the random oracle
>> model but that *can* be run backwards given the full state.  From
>> memory, SHA-3 has exactly that property, and it would be a bit sad for
>> a CSPRNG to be reversible.
>
> Er...  get_random_int() is specifically *not* designed to be resistant
> to state capture, and I didn't try.  Remember, what it's used for
> is ASLR, what we're worried about is somene learning the layouts
> of still-running processes, and and if you get a memory dump, you have
> the memory layout!

True, but it's called get_random_int(), and it seems like making it
stronger, especially if the performance cost is low to zero, is a good
thing.

>
> If you want anti-backtracking, though, it's easy to add.  What we
> hash is:
>
> entropy_0 || secret || output_0 || entropy_1 || secret || output_1 || ...
>
> You mix the output word right back in to the (unfinalized) state after
> generating it.  This is still equivalent to unmodified back-box SipHash,
> you're just using a (conceptually independent) SipHash invocation to
> produce some of its input.

Ah, cute.  This could probably be sped up by doing something like:

entropy_0 || secret || output_0 ^ entropy_1 || secret || ...

It's a little weak because the output is only 64 bits, so you could
plausibly backtrack it on a GPU or FPGA cluster or on an ASIC if the
old entropy is guessable.  I suspect there are sneaky ways around it
like using output_n-1 ^ output_n-2 or similar.  I'll sleep on it.

>
> The only remaining issues are:
> 1) How many rounds, and
> 2) May we use HalfSipHash?

I haven't looked closely enough to have a real opinion here.  I don't
know what the security margin is believed to be.

>
> I'd *like* to persuade you that skipping the padding byte wouldn't
> invalidate any security proofs, because it's true and would simplify
> the code.  But if you want 100% stock, I'm willing to cater to that.

I lean toward stock in the absence of a particularly good reason.  At
the very least I'd want to read that paper carefully.

>
> Ted, what do you think?



-- 
Andy Lutomirski
AMA Capital Management, LLC


[PATCH v5 08/14] ACPI: ARM64: IORT: rework iort_node_get_id()

2016-12-21 Thread Hanjun Guo
From: Hanjun Guo 

iort_node_get_id() has two output, one is the mapped ids,
the other is the referenced parent node which is returned
from the function.

For now we need a API just return its parent node for
single mapping, so just update this function slightly then
reuse it later.

Signed-off-by: Hanjun Guo 
Cc: Lorenzo Pieralisi 
Cc: Marc Zyngier 
---
 drivers/acpi/arm64/iort.c | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

diff --git a/drivers/acpi/arm64/iort.c b/drivers/acpi/arm64/iort.c
index ab7bae7..bc68d93 100644
--- a/drivers/acpi/arm64/iort.c
+++ b/drivers/acpi/arm64/iort.c
@@ -347,7 +347,8 @@ struct acpi_iort_node *iort_node_get_id(struct 
acpi_iort_node *node,
if (map[index].flags & ACPI_IORT_ID_SINGLE_MAPPING) {
if (node->type == ACPI_IORT_NODE_NAMED_COMPONENT ||
node->type == ACPI_IORT_NODE_PCI_ROOT_COMPLEX) {
-   *id_out = map[index].output_base;
+   if (id_out)
+   *id_out = map[index].output_base;
return parent;
}
}
-- 
1.7.12.4



[PATCH v5 02/14] irqchip: gic-v3-its: keep the head file include in alphabetic order

2016-12-21 Thread Hanjun Guo
From: Hanjun Guo 

The head file is strictly in alphabetic order now, so let's
be the rule breaker. As acpi_iort.h includes acpi.h so remove
the duplidate acpi.h inclusion as well.

Signed-off-by: Hanjun Guo 
Cc: Marc Zyngier 
Cc: Tomasz Nowicki 
---
 drivers/irqchip/irq-gic-v3-its.c | 3 +--
 1 file changed, 1 insertion(+), 2 deletions(-)

diff --git a/drivers/irqchip/irq-gic-v3-its.c b/drivers/irqchip/irq-gic-v3-its.c
index 69b040f..f471939 100644
--- a/drivers/irqchip/irq-gic-v3-its.c
+++ b/drivers/irqchip/irq-gic-v3-its.c
@@ -15,14 +15,13 @@
  * along with this program.  If not, see .
  */
 
-#include 
+#include 
 #include 
 #include 
 #include 
 #include 
 #include 
 #include 
-#include 
 #include 
 #include 
 #include 
-- 
1.7.12.4



[PATCH v5 03/14] ACPI: ARM64: IORT: add missing comment for iort_dev_find_its_id()

2016-12-21 Thread Hanjun Guo
From: Hanjun Guo 

We are missing req_id's comment for iort_dev_find_its_id(),
add it back.

Signed-off-by: Hanjun Guo 
Cc: Lorenzo Pieralisi 
Cc: Tomasz Nowicki 
---
 drivers/acpi/arm64/iort.c | 1 +
 1 file changed, 1 insertion(+)

diff --git a/drivers/acpi/arm64/iort.c b/drivers/acpi/arm64/iort.c
index 46e2d82..174e983 100644
--- a/drivers/acpi/arm64/iort.c
+++ b/drivers/acpi/arm64/iort.c
@@ -446,6 +446,7 @@ u32 iort_msi_map_rid(struct device *dev, u32 req_id)
 /**
  * iort_dev_find_its_id() - Find the ITS identifier for a device
  * @dev: The device.
+ * @req_id: Device's Requster ID
  * @idx: Index of the ITS identifier list.
  * @its_id: ITS identifier.
  *
-- 
1.7.12.4



[PATCH v5 05/14] ACPI: platform-msi: retrieve dev id from IORT

2016-12-21 Thread Hanjun Guo
From: Hanjun Guo 

For devices connecting to ITS, it needs dev id to identify
itself, and this dev id is represented in the IORT table in
named componant node [1] for platform devices, so in this
patch we will scan the IORT to retrieve device's dev id.

Introduce iort_pmsi_get_dev_id() with pointer dev passed
in for that purpose.

[1]: https://static.docs.arm.com/den0049/b/DEN0049B_IO_Remapping_Table.pdf

Signed-off-by: Hanjun Guo 
Tested-by: Sinan Kaya 
Cc: Marc Zyngier 
Cc: Lorenzo Pieralisi 
Cc: Tomasz Nowicki 
Cc: Thomas Gleixner 
---
 drivers/acpi/arm64/iort.c | 26 ++
 drivers/irqchip/irq-gic-v3-its-platform-msi.c |  4 +++-
 include/linux/acpi_iort.h |  8 
 3 files changed, 37 insertions(+), 1 deletion(-)

diff --git a/drivers/acpi/arm64/iort.c b/drivers/acpi/arm64/iort.c
index 174e983..ab7bae7 100644
--- a/drivers/acpi/arm64/iort.c
+++ b/drivers/acpi/arm64/iort.c
@@ -444,6 +444,32 @@ u32 iort_msi_map_rid(struct device *dev, u32 req_id)
 }
 
 /**
+ * iort_pmsi_get_dev_id() - Get the device id for a device
+ * @dev: The device for which the mapping is to be done.
+ * @dev_id: The device ID found.
+ *
+ * Returns: 0 for successful find a dev id, errors otherwise
+ */
+int iort_pmsi_get_dev_id(struct device *dev, u32 *dev_id)
+{
+   struct acpi_iort_node *node;
+
+   if (!iort_table)
+   return -ENODEV;
+
+   node = iort_find_dev_node(dev);
+   if (!node) {
+   dev_err(dev, "can't find related IORT node\n");
+   return -ENODEV;
+   }
+
+   if(!iort_node_get_id(node, dev_id, IORT_MSI_TYPE, 0))
+   return -ENODEV;
+
+   return 0;
+}
+
+/**
  * iort_dev_find_its_id() - Find the ITS identifier for a device
  * @dev: The device.
  * @req_id: Device's Requster ID
diff --git a/drivers/irqchip/irq-gic-v3-its-platform-msi.c 
b/drivers/irqchip/irq-gic-v3-its-platform-msi.c
index 3c94278..16587a9 100644
--- a/drivers/irqchip/irq-gic-v3-its-platform-msi.c
+++ b/drivers/irqchip/irq-gic-v3-its-platform-msi.c
@@ -15,6 +15,7 @@
  * along with this program.  If not, see .
  */
 
+#include 
 #include 
 #include 
 #include 
@@ -56,7 +57,8 @@ static int its_pmsi_prepare(struct irq_domain *domain, struct 
device *dev,
 
msi_info = msi_get_domain_info(domain->parent);
 
-   ret = of_pmsi_get_dev_id(domain, dev, _id);
+   ret = dev->of_node ? of_pmsi_get_dev_id(domain, dev, _id) :
+   iort_pmsi_get_dev_id(dev, _id);
if (ret)
return ret;
 
diff --git a/include/linux/acpi_iort.h b/include/linux/acpi_iort.h
index 77e0809..ef99fd52 100644
--- a/include/linux/acpi_iort.h
+++ b/include/linux/acpi_iort.h
@@ -33,6 +33,7 @@
 void acpi_iort_init(void);
 bool iort_node_match(u8 type);
 u32 iort_msi_map_rid(struct device *dev, u32 req_id);
+int iort_pmsi_get_dev_id(struct device *dev, u32 *dev_id);
 struct irq_domain *iort_get_device_domain(struct device *dev, u32 req_id);
 /* IOMMU interface */
 void iort_set_dma_mask(struct device *dev);
@@ -42,9 +43,16 @@ static inline void acpi_iort_init(void) { }
 static inline bool iort_node_match(u8 type) { return false; }
 static inline u32 iort_msi_map_rid(struct device *dev, u32 req_id)
 { return req_id; }
+
 static inline struct irq_domain *iort_get_device_domain(struct device *dev,
u32 req_id)
 { return NULL; }
+
+static inline int iort_pmsi_get_dev_id(struct device *dev, u32 *dev_id)
+{
+   return -ENODEV;
+}
+
 /* IOMMU interface */
 static inline void iort_set_dma_mask(struct device *dev) { }
 static inline
-- 
1.7.12.4



[PATCH v5 02/14] irqchip: gic-v3-its: keep the head file include in alphabetic order

2016-12-21 Thread Hanjun Guo
From: Hanjun Guo 

The head file is strictly in alphabetic order now, so let's
be the rule breaker. As acpi_iort.h includes acpi.h so remove
the duplidate acpi.h inclusion as well.

Signed-off-by: Hanjun Guo 
Cc: Marc Zyngier 
Cc: Tomasz Nowicki 
---
 drivers/irqchip/irq-gic-v3-its.c | 3 +--
 1 file changed, 1 insertion(+), 2 deletions(-)

diff --git a/drivers/irqchip/irq-gic-v3-its.c b/drivers/irqchip/irq-gic-v3-its.c
index 69b040f..f471939 100644
--- a/drivers/irqchip/irq-gic-v3-its.c
+++ b/drivers/irqchip/irq-gic-v3-its.c
@@ -15,14 +15,13 @@
  * along with this program.  If not, see .
  */
 
-#include 
+#include 
 #include 
 #include 
 #include 
 #include 
 #include 
 #include 
-#include 
 #include 
 #include 
 #include 
-- 
1.7.12.4



[PATCH v5 03/14] ACPI: ARM64: IORT: add missing comment for iort_dev_find_its_id()

2016-12-21 Thread Hanjun Guo
From: Hanjun Guo 

We are missing req_id's comment for iort_dev_find_its_id(),
add it back.

Signed-off-by: Hanjun Guo 
Cc: Lorenzo Pieralisi 
Cc: Tomasz Nowicki 
---
 drivers/acpi/arm64/iort.c | 1 +
 1 file changed, 1 insertion(+)

diff --git a/drivers/acpi/arm64/iort.c b/drivers/acpi/arm64/iort.c
index 46e2d82..174e983 100644
--- a/drivers/acpi/arm64/iort.c
+++ b/drivers/acpi/arm64/iort.c
@@ -446,6 +446,7 @@ u32 iort_msi_map_rid(struct device *dev, u32 req_id)
 /**
  * iort_dev_find_its_id() - Find the ITS identifier for a device
  * @dev: The device.
+ * @req_id: Device's Requster ID
  * @idx: Index of the ITS identifier list.
  * @its_id: ITS identifier.
  *
-- 
1.7.12.4



[PATCH v5 05/14] ACPI: platform-msi: retrieve dev id from IORT

2016-12-21 Thread Hanjun Guo
From: Hanjun Guo 

For devices connecting to ITS, it needs dev id to identify
itself, and this dev id is represented in the IORT table in
named componant node [1] for platform devices, so in this
patch we will scan the IORT to retrieve device's dev id.

Introduce iort_pmsi_get_dev_id() with pointer dev passed
in for that purpose.

[1]: https://static.docs.arm.com/den0049/b/DEN0049B_IO_Remapping_Table.pdf

Signed-off-by: Hanjun Guo 
Tested-by: Sinan Kaya 
Cc: Marc Zyngier 
Cc: Lorenzo Pieralisi 
Cc: Tomasz Nowicki 
Cc: Thomas Gleixner 
---
 drivers/acpi/arm64/iort.c | 26 ++
 drivers/irqchip/irq-gic-v3-its-platform-msi.c |  4 +++-
 include/linux/acpi_iort.h |  8 
 3 files changed, 37 insertions(+), 1 deletion(-)

diff --git a/drivers/acpi/arm64/iort.c b/drivers/acpi/arm64/iort.c
index 174e983..ab7bae7 100644
--- a/drivers/acpi/arm64/iort.c
+++ b/drivers/acpi/arm64/iort.c
@@ -444,6 +444,32 @@ u32 iort_msi_map_rid(struct device *dev, u32 req_id)
 }
 
 /**
+ * iort_pmsi_get_dev_id() - Get the device id for a device
+ * @dev: The device for which the mapping is to be done.
+ * @dev_id: The device ID found.
+ *
+ * Returns: 0 for successful find a dev id, errors otherwise
+ */
+int iort_pmsi_get_dev_id(struct device *dev, u32 *dev_id)
+{
+   struct acpi_iort_node *node;
+
+   if (!iort_table)
+   return -ENODEV;
+
+   node = iort_find_dev_node(dev);
+   if (!node) {
+   dev_err(dev, "can't find related IORT node\n");
+   return -ENODEV;
+   }
+
+   if(!iort_node_get_id(node, dev_id, IORT_MSI_TYPE, 0))
+   return -ENODEV;
+
+   return 0;
+}
+
+/**
  * iort_dev_find_its_id() - Find the ITS identifier for a device
  * @dev: The device.
  * @req_id: Device's Requster ID
diff --git a/drivers/irqchip/irq-gic-v3-its-platform-msi.c 
b/drivers/irqchip/irq-gic-v3-its-platform-msi.c
index 3c94278..16587a9 100644
--- a/drivers/irqchip/irq-gic-v3-its-platform-msi.c
+++ b/drivers/irqchip/irq-gic-v3-its-platform-msi.c
@@ -15,6 +15,7 @@
  * along with this program.  If not, see .
  */
 
+#include 
 #include 
 #include 
 #include 
@@ -56,7 +57,8 @@ static int its_pmsi_prepare(struct irq_domain *domain, struct 
device *dev,
 
msi_info = msi_get_domain_info(domain->parent);
 
-   ret = of_pmsi_get_dev_id(domain, dev, _id);
+   ret = dev->of_node ? of_pmsi_get_dev_id(domain, dev, _id) :
+   iort_pmsi_get_dev_id(dev, _id);
if (ret)
return ret;
 
diff --git a/include/linux/acpi_iort.h b/include/linux/acpi_iort.h
index 77e0809..ef99fd52 100644
--- a/include/linux/acpi_iort.h
+++ b/include/linux/acpi_iort.h
@@ -33,6 +33,7 @@
 void acpi_iort_init(void);
 bool iort_node_match(u8 type);
 u32 iort_msi_map_rid(struct device *dev, u32 req_id);
+int iort_pmsi_get_dev_id(struct device *dev, u32 *dev_id);
 struct irq_domain *iort_get_device_domain(struct device *dev, u32 req_id);
 /* IOMMU interface */
 void iort_set_dma_mask(struct device *dev);
@@ -42,9 +43,16 @@ static inline void acpi_iort_init(void) { }
 static inline bool iort_node_match(u8 type) { return false; }
 static inline u32 iort_msi_map_rid(struct device *dev, u32 req_id)
 { return req_id; }
+
 static inline struct irq_domain *iort_get_device_domain(struct device *dev,
u32 req_id)
 { return NULL; }
+
+static inline int iort_pmsi_get_dev_id(struct device *dev, u32 *dev_id)
+{
+   return -ENODEV;
+}
+
 /* IOMMU interface */
 static inline void iort_set_dma_mask(struct device *dev) { }
 static inline
-- 
1.7.12.4



Re: [kernel-hardening] Re: [PATCH v7 3/6] random: use SipHash in place of MD5

2016-12-21 Thread Theodore Ts'o
On Thu, Dec 22, 2016 at 03:49:39AM +0100, Jason A. Donenfeld wrote:
> 
> Funny -- while you guys were sending this back & forth, I was writing
> my reply to Andy which essentially arrives at the same conclusion.
> Given that we're all arriving to the same thing, and that Ted shot in
> this direction long before we all did, I'm leaning toward abandoning
> SipHash for the de-MD5-ification of get_random_int/long, and working
> on polishing Ted's idea into something shiny for this patchset.

here are my numbers comparing siphash (using the first three patches
of the v7 siphash patches) with my batched chacha20 implementation.
The results are taken by running get_random_* 1 times, and then
dividing the numbers by 1 to get the average number of cycles for
the call.  I compiled 32-bit and 64-bit kernels, and ran the results
using kvm:

   siphashbatched chacha20
 get_random_int  get_random_long   get_random_int   get_random_long   

32-bit270  278 114146
64-bit 75   75 106186

> I did have two objections to this. The first was that my SipHash
> construction is faster.

Well, it's faster on everything except 32-bit x86.  :-P

> The second, and the more
> important one, was that batching entropy up like this means that 32
> calls will be really fast, and then the 33rd will be slow, since it
> has to do a whole ChaCha round, because get_random_bytes must be
> called to refill the batch.

... and this will take 2121 cycles on 64-bit x86, and 2315 cycles on a
32-bit x86.  Which on a 2.3 GHz processor, is just under a
microsecond.  As far as being inconsistent on process startup, I very
much doubt a microsecond is really going to be visible to the user.  :-)

The bottom line is that I think we're really "pixel peeping" at this
point --- which is what obsessed digital photographers will do when
debating the quality of a Canon vs Nikon DSLR by blowing up a photo by
a thousand times, and then trying to claim that this is visible to the
human eye.  Or people who obsessing over the frequency response curves
of TH-X00 headphones with Mahogony vs Purpleheart wood, when it's
likely that in a blind head-to-head comparison, most people wouldn't
be able to tell the difference

I think the main argument for using the batched getrandom approach is
that it, I would argue, simpler than introducing siphash into the
picture.  On 64-bit platforms it is faster and more consistent, so
it's basically that versus complexity of having to adding siphash to
the things that people have to analyze when considering random number
security on Linux.   But it's a close call either way, I think.

  - Ted

P.S.  My benchmarking code

diff --git a/drivers/char/random.c b/drivers/char/random.c
index a51f0ff43f00..41860864b775 100644
--- a/drivers/char/random.c
+++ b/drivers/char/random.c
@@ -1682,6 +1682,55 @@ static int rand_initialize(void)
 }
 early_initcall(rand_initialize);
 
+static unsigned int get_random_int_new(void);
+static unsigned long get_random_long_new(void);
+
+#define NUM_CYCLES 1
+#define AVG(finish, start) ((unsigned int)(finish - start + NUM_CYCLES/2) / 
NUM_CYCLES)
+
+static int rand_benchmark(void)
+{
+   cycles_t start,finish;
+   int i, out;
+
+   pr_crit("random benchmark!!\n");
+   start = get_cycles();
+   for (i = 0; i < NUM_CYCLES; i++) {
+   get_random_int();}
+   finish = get_cycles();
+   pr_err("get_random_int # cycles: %u\n", AVG(finish, start));
+
+   start = get_cycles();
+   for (i = 0; i < NUM_CYCLES; i++) {
+   get_random_int_new();
+   }
+   finish = get_cycles();
+   pr_err("get_random_int_new (batched chacha20) # cycles: %u\n", 
AVG(finish, start));
+
+   start = get_cycles();
+   for (i = 0; i < NUM_CYCLES; i++) {
+   get_random_long();
+   }
+   finish = get_cycles();
+   pr_err("get_random_long # cycles: %u\n", AVG(finish, start));
+
+   start = get_cycles();
+   for (i = 0; i < NUM_CYCLES; i++) {
+   get_random_long_new();
+   }
+   finish = get_cycles();
+   pr_err("get_random_long_new (batched chacha20) # cycles: %u\n", 
AVG(finish, start));
+
+   start = get_cycles();
+   for (i = 0; i < NUM_CYCLES; i++) {
+   get_random_bytes(, sizeof(out));
+   }
+   finish = get_cycles();
+   pr_err("get_random_bytes # cycles: %u\n", AVG(finish, start));
+   return 0;
+}
+device_initcall(rand_benchmark);
+
 #ifdef CONFIG_BLOCK
 void rand_initialize_disk(struct gendisk *disk)
 {
@@ -2064,8 +2113,10 @@ unsigned int get_random_int(void)
unsigned int ret;
u64 *chaining;
 
+#if 0  // force slow path
if (arch_get_random_int())
return ret;
+#endif
 
chaining = _cpu_var(get_random_int_chaining);
ret = *chaining = 

Re: [kernel-hardening] Re: [PATCH v7 3/6] random: use SipHash in place of MD5

2016-12-21 Thread Theodore Ts'o
On Thu, Dec 22, 2016 at 03:49:39AM +0100, Jason A. Donenfeld wrote:
> 
> Funny -- while you guys were sending this back & forth, I was writing
> my reply to Andy which essentially arrives at the same conclusion.
> Given that we're all arriving to the same thing, and that Ted shot in
> this direction long before we all did, I'm leaning toward abandoning
> SipHash for the de-MD5-ification of get_random_int/long, and working
> on polishing Ted's idea into something shiny for this patchset.

here are my numbers comparing siphash (using the first three patches
of the v7 siphash patches) with my batched chacha20 implementation.
The results are taken by running get_random_* 1 times, and then
dividing the numbers by 1 to get the average number of cycles for
the call.  I compiled 32-bit and 64-bit kernels, and ran the results
using kvm:

   siphashbatched chacha20
 get_random_int  get_random_long   get_random_int   get_random_long   

32-bit270  278 114146
64-bit 75   75 106186

> I did have two objections to this. The first was that my SipHash
> construction is faster.

Well, it's faster on everything except 32-bit x86.  :-P

> The second, and the more
> important one, was that batching entropy up like this means that 32
> calls will be really fast, and then the 33rd will be slow, since it
> has to do a whole ChaCha round, because get_random_bytes must be
> called to refill the batch.

... and this will take 2121 cycles on 64-bit x86, and 2315 cycles on a
32-bit x86.  Which on a 2.3 GHz processor, is just under a
microsecond.  As far as being inconsistent on process startup, I very
much doubt a microsecond is really going to be visible to the user.  :-)

The bottom line is that I think we're really "pixel peeping" at this
point --- which is what obsessed digital photographers will do when
debating the quality of a Canon vs Nikon DSLR by blowing up a photo by
a thousand times, and then trying to claim that this is visible to the
human eye.  Or people who obsessing over the frequency response curves
of TH-X00 headphones with Mahogony vs Purpleheart wood, when it's
likely that in a blind head-to-head comparison, most people wouldn't
be able to tell the difference

I think the main argument for using the batched getrandom approach is
that it, I would argue, simpler than introducing siphash into the
picture.  On 64-bit platforms it is faster and more consistent, so
it's basically that versus complexity of having to adding siphash to
the things that people have to analyze when considering random number
security on Linux.   But it's a close call either way, I think.

  - Ted

P.S.  My benchmarking code

diff --git a/drivers/char/random.c b/drivers/char/random.c
index a51f0ff43f00..41860864b775 100644
--- a/drivers/char/random.c
+++ b/drivers/char/random.c
@@ -1682,6 +1682,55 @@ static int rand_initialize(void)
 }
 early_initcall(rand_initialize);
 
+static unsigned int get_random_int_new(void);
+static unsigned long get_random_long_new(void);
+
+#define NUM_CYCLES 1
+#define AVG(finish, start) ((unsigned int)(finish - start + NUM_CYCLES/2) / 
NUM_CYCLES)
+
+static int rand_benchmark(void)
+{
+   cycles_t start,finish;
+   int i, out;
+
+   pr_crit("random benchmark!!\n");
+   start = get_cycles();
+   for (i = 0; i < NUM_CYCLES; i++) {
+   get_random_int();}
+   finish = get_cycles();
+   pr_err("get_random_int # cycles: %u\n", AVG(finish, start));
+
+   start = get_cycles();
+   for (i = 0; i < NUM_CYCLES; i++) {
+   get_random_int_new();
+   }
+   finish = get_cycles();
+   pr_err("get_random_int_new (batched chacha20) # cycles: %u\n", 
AVG(finish, start));
+
+   start = get_cycles();
+   for (i = 0; i < NUM_CYCLES; i++) {
+   get_random_long();
+   }
+   finish = get_cycles();
+   pr_err("get_random_long # cycles: %u\n", AVG(finish, start));
+
+   start = get_cycles();
+   for (i = 0; i < NUM_CYCLES; i++) {
+   get_random_long_new();
+   }
+   finish = get_cycles();
+   pr_err("get_random_long_new (batched chacha20) # cycles: %u\n", 
AVG(finish, start));
+
+   start = get_cycles();
+   for (i = 0; i < NUM_CYCLES; i++) {
+   get_random_bytes(, sizeof(out));
+   }
+   finish = get_cycles();
+   pr_err("get_random_bytes # cycles: %u\n", AVG(finish, start));
+   return 0;
+}
+device_initcall(rand_benchmark);
+
 #ifdef CONFIG_BLOCK
 void rand_initialize_disk(struct gendisk *disk)
 {
@@ -2064,8 +2113,10 @@ unsigned int get_random_int(void)
unsigned int ret;
u64 *chaining;
 
+#if 0  // force slow path
if (arch_get_random_int())
return ret;
+#endif
 
chaining = _cpu_var(get_random_int_chaining);
ret = *chaining = 

  1   2   3   4   5   6   7   8   9   10   >